summaryrefslogtreecommitdiff
path: root/drivers/gpu
diff options
context:
space:
mode:
authorThomas Zimmermann <tzimmermann@suse.de>2025-10-13 09:19:19 +0200
committerThomas Zimmermann <tzimmermann@suse.de>2025-10-13 09:19:19 +0200
commit9b966ae42235a88eaea714be09ff3d698535bdfe (patch)
tree7470df78fb74fdfda1f773feb3822c1c9ab38616 /drivers/gpu
parent5385871282e5c2831c226d32cf2ce26b45a7b164 (diff)
parent3a8660878839faadb4f1a6dd72c3179c1df56787 (diff)
Merge drm/drm-next into drm-misc-next
Updating drm-misc-next to the state of v6.18-rc1. Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
Diffstat (limited to 'drivers/gpu')
-rw-r--r--drivers/gpu/drm/Kconfig4
-rw-r--r--drivers/gpu/drm/Makefile1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/Makefile1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu.h7
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c16
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h12
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10_3.c4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v11.c4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v12.c4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c19
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c41
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c41
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c85
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_device.c83
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_fdinfo.c1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c8
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c11
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h6
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.c11
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.h2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_i2c.c18
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c66
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h11
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h6
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.c65
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.h10
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c14
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_object.c13
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_object.h2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c5
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_psp_ta.c20
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c15
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c3
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h30
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.c4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.h8
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c110
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h8
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c355
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_utils.h91
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c16
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c55
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h8
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c8
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c143
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h39
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c61
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h5
-rw-r--r--drivers/gpu/drm/amd/amdgpu/atom.c11
-rw-r--r--drivers/gpu/drm/amd/amdgpu/dce_v11_0.c3817
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c5
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c20
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c5
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c7
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c5
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c3
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c3
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c12
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c3
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c8
-rw-r--r--drivers/gpu/drm/amd/amdgpu/jpeg_v1_0.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c58
-rw-r--r--drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.h6
-rw-r--r--drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_5.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_0.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mes_userqueue.c95
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mes_v11_0.c6
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mes_v12_0.c5
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c14
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mxgpu_nv.h2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/smu_v11_0_i2c.c5
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c6
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c14
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c14
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c16
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c23
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c25
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c14
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c17
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vcn_v5_0_1.c99
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vi.c7
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_chardev.c12
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_device.c56
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_migrate.c127
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_priv.h2
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_process.c4
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_svm.c4
-rw-r--r--drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c203
-rw-r--r--drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h10
-rw-r--r--drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c88
-rw-r--r--drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c24
-rw-r--r--drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c12
-rw-r--r--drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c6
-rw-r--r--drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_pp_smu.c1
-rw-r--r--drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_replay.c2
-rw-r--r--drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_services.c4
-rw-r--r--drivers/gpu/drm/amd/display/dc/clk_mgr/clk_mgr.c2
-rw-r--r--drivers/gpu/drm/amd/display/dc/clk_mgr/dce100/dce_clk_mgr.c3
-rw-r--r--drivers/gpu/drm/amd/display/dc/clk_mgr/dce110/dce110_clk_mgr.c2
-rw-r--r--drivers/gpu/drm/amd/display/dc/clk_mgr/dce60/dce60_clk_mgr.c5
-rw-r--r--drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30_clk_mgr_smu_msg.c2
-rw-r--r--drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_clk_mgr.c2
-rw-r--r--drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_clk_mgr.c144
-rw-r--r--drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_clk_mgr.h5
-rw-r--r--drivers/gpu/drm/amd/display/dc/clk_mgr/dcn315/dcn315_clk_mgr.c2
-rw-r--r--drivers/gpu/drm/amd/display/dc/clk_mgr/dcn316/dcn316_clk_mgr.c2
-rw-r--r--drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/dcn32_clk_mgr.c2
-rw-r--r--drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/dcn32_clk_mgr_smu_msg.c5
-rw-r--r--drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c123
-rw-r--r--drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_clk_mgr.c28
-rw-r--r--drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_clk_mgr_smu_msg.c130
-rw-r--r--drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_clk_mgr_smu_msg.h10
-rw-r--r--drivers/gpu/drm/amd/display/dc/core/dc.c18
-rw-r--r--drivers/gpu/drm/amd/display/dc/core/dc_link_enc_cfg.c2
-rw-r--r--drivers/gpu/drm/amd/display/dc/core/dc_link_exports.c3
-rw-r--r--drivers/gpu/drm/amd/display/dc/core/dc_resource.c43
-rw-r--r--drivers/gpu/drm/amd/display/dc/core/dc_state.c2
-rw-r--r--drivers/gpu/drm/amd/display/dc/dc.h5
-rw-r--r--drivers/gpu/drm/amd/display/dc/dc_helper.c5
-rw-r--r--drivers/gpu/drm/amd/display/dc/dc_spl_translate.c2
-rw-r--r--drivers/gpu/drm/amd/display/dc/dc_stream.h3
-rw-r--r--drivers/gpu/drm/amd/display/dc/dc_types.h1
-rw-r--r--drivers/gpu/drm/amd/display/dc/dccg/dcn35/dcn35_dccg.c24
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce/dce_transform.c21
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce/dce_transform.h4
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce/dmub_replay.c3
-rw-r--r--drivers/gpu/drm/amd/display/dc/dio/dcn10/dcn10_stream_encoder.c2
-rw-r--r--drivers/gpu/drm/amd/display/dc/dio/dcn20/dcn20_stream_encoder.c2
-rw-r--r--drivers/gpu/drm/amd/display/dc/dio/dcn31/dcn31_dio_link_encoder.c2
-rw-r--r--drivers/gpu/drm/amd/display/dc/dio/dcn314/dcn314_dio_stream_encoder.c2
-rw-r--r--drivers/gpu/drm/amd/display/dc/dio/dcn32/dcn32_dio_stream_encoder.c2
-rw-r--r--drivers/gpu/drm/amd/display/dc/dio/dcn35/dcn35_dio_stream_encoder.c2
-rw-r--r--drivers/gpu/drm/amd/display/dc/dio/dcn401/dcn401_dio_stream_encoder.c2
-rw-r--r--drivers/gpu/drm/amd/display/dc/dm_services.h13
-rw-r--r--drivers/gpu/drm/amd/display/dc/dm_services_types.h2
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c3
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.c4
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c2
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c8
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn351/dcn351_fpu.c6
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2/display_mode_core.c2
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c41
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_wrapper.c4
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_types.h6
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn3.c21
-rw-r--r--drivers/gpu/drm/amd/display/dc/dsc/dcn20/dcn20_dsc.c3
-rw-r--r--drivers/gpu/drm/amd/display/dc/hdcp/hdcp_msg.c2
-rw-r--r--drivers/gpu/drm/amd/display/dc/hubbub/dcn32/dcn32_hubbub.c38
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c40
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c4
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c19
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dcn21/dcn21_hwseq.c2
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dcn30/dcn30_hwseq.c2
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dcn31/dcn31_hwseq.c2
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dcn314/dcn314_hwseq.c2
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c4
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c2
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c25
-rw-r--r--drivers/gpu/drm/amd/display/dc/inc/core_types.h15
-rw-r--r--drivers/gpu/drm/amd/display/dc/inc/link_service.h (renamed from drivers/gpu/drm/amd/display/dc/inc/link.h)6
-rw-r--r--drivers/gpu/drm/amd/display/dc/inc/resource.h1
-rw-r--r--drivers/gpu/drm/amd/display/dc/link/accessories/link_dp_cts.c13
-rw-r--r--drivers/gpu/drm/amd/display/dc/link/accessories/link_dp_cts.h2
-rw-r--r--drivers/gpu/drm/amd/display/dc/link/accessories/link_dp_trace.h2
-rw-r--r--drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_dio.h2
-rw-r--r--drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_dio_fixed_vs_pe_retimer.h2
-rw-r--r--drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_hpo_dp.h2
-rw-r--r--drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_hpo_fixed_vs_pe_retimer_dp.h2
-rw-r--r--drivers/gpu/drm/amd/display/dc/link/link_detection.c4
-rw-r--r--drivers/gpu/drm/amd/display/dc/link/link_detection.h2
-rw-r--r--drivers/gpu/drm/amd/display/dc/link/link_dpms.c2
-rw-r--r--drivers/gpu/drm/amd/display/dc/link/link_dpms.h2
-rw-r--r--drivers/gpu/drm/amd/display/dc/link/link_factory.h2
-rw-r--r--drivers/gpu/drm/amd/display/dc/link/link_resource.h2
-rw-r--r--drivers/gpu/drm/amd/display/dc/link/link_validation.h2
-rw-r--r--drivers/gpu/drm/amd/display/dc/link/protocols/link_ddc.h2
-rw-r--r--drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_capability.h2
-rw-r--r--drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia.h2
-rw-r--r--drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia_bw.h2
-rw-r--r--drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_irq_handler.h2
-rw-r--r--drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_phy.h2
-rw-r--r--drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training.c9
-rw-r--r--drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training.h2
-rw-r--r--drivers/gpu/drm/amd/display/dc/link/protocols/link_dpcd.h2
-rw-r--r--drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.c14
-rw-r--r--drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.h2
-rw-r--r--drivers/gpu/drm/amd/display/dc/link/protocols/link_hpd.h2
-rw-r--r--drivers/gpu/drm/amd/display/dc/resource/dce100/dce100_resource.c25
-rw-r--r--drivers/gpu/drm/amd/display/dc/resource/dce100/dce100_resource.h9
-rw-r--r--drivers/gpu/drm/amd/display/dc/resource/dce120/dce120_resource.c2
-rw-r--r--drivers/gpu/drm/amd/display/dc/resource/dce60/dce60_resource.c64
-rw-r--r--drivers/gpu/drm/amd/display/dc/resource/dce80/dce80_resource.c60
-rw-r--r--drivers/gpu/drm/amd/display/dc/resource/dcn20/dcn20_resource.c2
-rw-r--r--drivers/gpu/drm/amd/display/dc/resource/dcn30/dcn30_resource.c2
-rw-r--r--drivers/gpu/drm/amd/display/dc/resource/dcn302/dcn302_resource.c3
-rw-r--r--drivers/gpu/drm/amd/display/dc/resource/dcn303/dcn303_resource.c2
-rw-r--r--drivers/gpu/drm/amd/display/dc/resource/dcn31/dcn31_resource.c5
-rw-r--r--drivers/gpu/drm/amd/display/dc/resource/dcn31/dcn31_resource.h3
-rw-r--r--drivers/gpu/drm/amd/display/dc/resource/dcn314/dcn314_resource.c1
-rw-r--r--drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c5
-rw-r--r--drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.h3
-rw-r--r--drivers/gpu/drm/amd/display/dc/resource/dcn321/dcn321_resource.c2
-rw-r--r--drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c21
-rw-r--r--drivers/gpu/drm/amd/display/dc/resource/dcn351/dcn351_resource.c22
-rw-r--r--drivers/gpu/drm/amd/display/dc/resource/dcn36/dcn36_resource.c21
-rw-r--r--drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.c5
-rw-r--r--drivers/gpu/drm/amd/display/dc/sspl/dc_spl.c10
-rw-r--r--drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h6
-rw-r--r--drivers/gpu/drm/amd/display/dmub/src/dmub_dcn32.c53
-rw-r--r--drivers/gpu/drm/amd/display/dmub/src/dmub_dcn32.h8
-rw-r--r--drivers/gpu/drm/amd/include/amd_shared.h98
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/dce/dce_6_0_d.h7
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/dce/dce_6_0_sh_mask.h2
-rw-r--r--drivers/gpu/drm/amd/include/dm_pp_interface.h1
-rw-r--r--drivers/gpu/drm/amd/include/kgd_pp_interface.h4
-rw-r--r--drivers/gpu/drm/amd/include/mes_v11_api_def.h3
-rw-r--r--drivers/gpu/drm/amd/include/mes_v12_api_def.h3
-rw-r--r--drivers/gpu/drm/amd/pm/amdgpu_dpm_internal.c87
-rw-r--r--drivers/gpu/drm/amd/pm/amdgpu_pm.c234
-rw-r--r--drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h4
-rw-r--r--drivers/gpu/drm/amd/pm/inc/amdgpu_dpm_internal.h6
-rw-r--r--drivers/gpu/drm/amd/pm/legacy-dpm/kv_dpm.c4
-rw-r--r--drivers/gpu/drm/amd/pm/legacy-dpm/legacy_dpm.c8
-rw-r--r--drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c65
-rw-r--r--drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c11
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c55
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h28
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_12_pmfw.h12
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_12_ppsmc.h3
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0_0_pptable.h (renamed from drivers/gpu/drm/amd/pm/inc/smu_v13_0_0_pptable.h)0
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c19
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c19
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c19
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c15
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c3
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c19
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_12_ppt.c50
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c78
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.h6
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c19
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/smu_cmn.h2
-rw-r--r--drivers/gpu/drm/amd/ras/rascore/Makefile0
-rw-r--r--drivers/gpu/drm/amd/ras/rascore/ras_core_status.h (renamed from drivers/gpu/drm/amd/amdgpu/dce_v11_0.h)21
-rw-r--r--drivers/gpu/drm/ast/ast_dp.c2
-rw-r--r--drivers/gpu/drm/bridge/analogix/anx7625.c6
-rw-r--r--drivers/gpu/drm/bridge/cadence/cdns-mhdp8546-core.c6
-rw-r--r--drivers/gpu/drm/bridge/samsung-dsim.c353
-rw-r--r--drivers/gpu/drm/bridge/waveshare-dsi.c4
-rw-r--r--drivers/gpu/drm/display/drm_dp_cec.c2
-rw-r--r--drivers/gpu/drm/drm_drv.c2
-rw-r--r--drivers/gpu/drm/drm_gem.c4
-rw-r--r--drivers/gpu/drm/drm_gpusvm.c317
-rw-r--r--drivers/gpu/drm/drm_gpuvm.c2
-rw-r--r--drivers/gpu/drm/drm_panic_qr.rs2
-rw-r--r--drivers/gpu/drm/exynos/exynos7_drm_decon.c36
-rw-r--r--drivers/gpu/drm/exynos/exynos_drm_dsi.c9
-rw-r--r--drivers/gpu/drm/gma500/oaktrail_hdmi.c2
-rw-r--r--drivers/gpu/drm/i915/Makefile2
-rw-r--r--drivers/gpu/drm/i915/display/i9xx_plane.c4
-rw-r--r--drivers/gpu/drm/i915/display/intel_bo.c15
-rw-r--r--drivers/gpu/drm/i915/display/intel_bo.h3
-rw-r--r--drivers/gpu/drm/i915/display/intel_ddi.c74
-rw-r--r--drivers/gpu/drm/i915/display/intel_display.c3
-rw-r--r--drivers/gpu/drm/i915/display/intel_display_device.c5
-rw-r--r--drivers/gpu/drm/i915/display/intel_display_device.h1
-rw-r--r--drivers/gpu/drm/i915/display/intel_display_types.h2
-rw-r--r--drivers/gpu/drm/i915/display/intel_dp_aux_backlight.c5
-rw-r--r--drivers/gpu/drm/i915/display/intel_fb.c23
-rw-r--r--drivers/gpu/drm/i915/display/intel_fb.h3
-rw-r--r--drivers/gpu/drm/i915/display/intel_fbc.c14
-rw-r--r--drivers/gpu/drm/i915/display/intel_panic.c27
-rw-r--r--drivers/gpu/drm/i915/display/intel_panic.h14
-rw-r--r--drivers/gpu/drm/i915/display/intel_plane.c6
-rw-r--r--drivers/gpu/drm/i915/display/intel_psr.c81
-rw-r--r--drivers/gpu/drm/i915/display/intel_psr.h2
-rw-r--r--drivers/gpu/drm/i915/display/skl_universal_plane.c3
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c2
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_object.c4
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_object.h11
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_pages.c42
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_shmem.c7
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_shrinker.c4
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_wait.c8
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gemfs.c9
-rw-r--r--drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c4
-rw-r--r--drivers/gpu/drm/i915/gt/intel_context_types.h1
-rw-r--r--drivers/gpu/drm/i915/gt/intel_execlists_submission.c6
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_buffer_pool.c1
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_mcr.c1
-rw-r--r--drivers/gpu/drm/i915/gt/intel_rc6.c2
-rw-r--r--drivers/gpu/drm/i915/gt/intel_reset.c7
-rw-r--r--drivers/gpu/drm/i915/gt/intel_reset_types.h2
-rw-r--r--drivers/gpu/drm/i915/gt/intel_ring_submission.c7
-rw-r--r--drivers/gpu/drm/i915/gt/intel_rps.c2
-rw-r--r--drivers/gpu/drm/i915/gt/intel_timeline.h1
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_tlb.c6
-rw-r--r--drivers/gpu/drm/i915/gt/sysfs_engines.c1
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_gsc_proxy.c6
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_gsc_uc_heci_cmd_submit.c2
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc.c8
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c3
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c4
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_log.c12
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_log.h8
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c13
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c10
-rw-r--r--drivers/gpu/drm/i915/gvt/cmd_parser.c2
-rw-r--r--drivers/gpu/drm/i915/gvt/mmio_context.c6
-rw-r--r--drivers/gpu/drm/i915/i915_debugfs.c3
-rw-r--r--drivers/gpu/drm/i915/i915_driver.c18
-rw-r--r--drivers/gpu/drm/i915/i915_drv.h6
-rw-r--r--drivers/gpu/drm/i915/i915_gem.c3
-rw-r--r--drivers/gpu/drm/i915/i915_irq.c6
-rw-r--r--drivers/gpu/drm/i915/i915_list_util.h23
-rw-r--r--drivers/gpu/drm/i915/i915_ptr_util.h66
-rw-r--r--drivers/gpu/drm/i915/i915_request.h5
-rw-r--r--drivers/gpu/drm/i915/i915_switcheroo.c7
-rw-r--r--drivers/gpu/drm/i915/i915_timer_util.c36
-rw-r--r--drivers/gpu/drm/i915/i915_timer_util.h23
-rw-r--r--drivers/gpu/drm/i915/i915_utils.c30
-rw-r--r--drivers/gpu/drm/i915/i915_utils.h215
-rw-r--r--drivers/gpu/drm/i915/i915_vma.h6
-rw-r--r--drivers/gpu/drm/i915/i915_wait_util.h119
-rw-r--r--drivers/gpu/drm/i915/intel_pcode.c1
-rw-r--r--drivers/gpu/drm/i915/intel_uncore.c7
-rw-r--r--drivers/gpu/drm/i915/pxp/intel_pxp.c4
-rw-r--r--drivers/gpu/drm/i915/selftests/i915_request.c5
-rw-r--r--drivers/gpu/drm/i915/selftests/i915_selftest.c2
-rw-r--r--drivers/gpu/drm/i915/selftests/igt_spinner.c5
-rw-r--r--drivers/gpu/drm/i915/soc/intel_dram.c2
-rw-r--r--drivers/gpu/drm/i915/vlv_suspend.c5
-rw-r--r--drivers/gpu/drm/msm/adreno/a6xx_catalog.c92
-rw-r--r--drivers/gpu/drm/msm/adreno/a6xx_gmu.c108
-rw-r--r--drivers/gpu/drm/msm/adreno/a6xx_gmu.h14
-rw-r--r--drivers/gpu/drm/msm/adreno/a6xx_gpu.c242
-rw-r--r--drivers/gpu/drm/msm/adreno/a6xx_gpu.h3
-rw-r--r--drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c10
-rw-r--r--drivers/gpu/drm/msm/adreno/a6xx_hfi.c34
-rw-r--r--drivers/gpu/drm/msm/adreno/a6xx_preempt.c44
-rw-r--r--drivers/gpu/drm/msm/adreno/adreno_device.c13
-rw-r--r--drivers/gpu/drm/msm/adreno/adreno_gpu.c21
-rw-r--r--drivers/gpu/drm/msm/adreno/adreno_gpu.h1
-rw-r--r--drivers/gpu/drm/msm/disp/dpu1/dpu_core_perf.c35
-rw-r--r--drivers/gpu/drm/msm/disp/dpu1/dpu_core_perf.h3
-rw-r--r--drivers/gpu/drm/msm/disp/dpu1/dpu_crtc.c17
-rw-r--r--drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_wb.c2
-rw-r--r--drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c5
-rw-r--r--drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.h1
-rw-r--r--drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c2
-rw-r--r--drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c4
-rw-r--r--drivers/gpu/drm/msm/disp/dpu1/dpu_rm.c23
-rw-r--r--drivers/gpu/drm/msm/disp/dpu1/dpu_writeback.c10
-rw-r--r--drivers/gpu/drm/msm/disp/mdp4/mdp4_kms.c29
-rw-r--r--drivers/gpu/drm/msm/disp/mdp4/mdp4_kms.h2
-rw-r--r--drivers/gpu/drm/msm/disp/mdp4/mdp4_lcdc_encoder.c2
-rw-r--r--drivers/gpu/drm/msm/disp/mdp4/mdp4_lvds_pll.c47
-rw-r--r--drivers/gpu/drm/msm/disp/mdp5/mdp5_kms.c2
-rw-r--r--drivers/gpu/drm/msm/dsi/phy/dsi_phy.h1
-rw-r--r--drivers/gpu/drm/msm/dsi/phy/dsi_phy_10nm.c16
-rw-r--r--drivers/gpu/drm/msm/dsi/phy/dsi_phy_14nm.c34
-rw-r--r--drivers/gpu/drm/msm/dsi/phy/dsi_phy_28nm.c21
-rw-r--r--drivers/gpu/drm/msm/dsi/phy/dsi_phy_28nm_8960.c32
-rw-r--r--drivers/gpu/drm/msm/dsi/phy/dsi_phy_7nm.c95
-rw-r--r--drivers/gpu/drm/msm/hdmi/hdmi_phy_8996.c16
-rw-r--r--drivers/gpu/drm/msm/hdmi/hdmi_phy_8998.c16
-rw-r--r--drivers/gpu/drm/msm/hdmi/hdmi_pll_8960.c12
-rw-r--r--drivers/gpu/drm/msm/msm_drv.c1
-rw-r--r--drivers/gpu/drm/msm/msm_drv.h2
-rw-r--r--drivers/gpu/drm/msm/msm_gem.c21
-rw-r--r--drivers/gpu/drm/msm/msm_gem.h6
-rw-r--r--drivers/gpu/drm/msm/msm_gem_prime.c2
-rw-r--r--drivers/gpu/drm/msm/msm_gem_vma.c31
-rw-r--r--drivers/gpu/drm/msm/msm_gpu.c2
-rw-r--r--drivers/gpu/drm/msm/msm_gpu.h9
-rw-r--r--drivers/gpu/drm/msm/msm_gpu_trace.h12
-rw-r--r--drivers/gpu/drm/msm/msm_iommu.c8
-rw-r--r--drivers/gpu/drm/msm/msm_kms.c14
-rw-r--r--drivers/gpu/drm/msm/msm_mdss.c3
-rw-r--r--drivers/gpu/drm/msm/msm_submitqueue.c4
-rw-r--r--drivers/gpu/drm/msm/registers/adreno/a6xx.xml718
-rw-r--r--drivers/gpu/drm/msm/registers/adreno/a6xx_descriptors.xml40
-rw-r--r--drivers/gpu/drm/msm/registers/adreno/a6xx_enums.xml50
-rw-r--r--drivers/gpu/drm/msm/registers/adreno/a6xx_gmu.xml11
-rw-r--r--drivers/gpu/drm/msm/registers/adreno/adreno_pm4.xml179
-rw-r--r--drivers/gpu/drm/msm/registers/display/dsi_phy_7nm.xml11
-rw-r--r--drivers/gpu/drm/msm/registers/gen_header.py201
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_bo.c2
-rw-r--r--drivers/gpu/drm/nova/driver.rs4
-rw-r--r--drivers/gpu/drm/nova/gem.rs10
-rw-r--r--drivers/gpu/drm/panthor/panthor_sched.c8
-rw-r--r--drivers/gpu/drm/rockchip/dw-mipi-dsi-rockchip.c142
-rw-r--r--drivers/gpu/drm/rockchip/dw_hdmi-rockchip.c80
-rw-r--r--drivers/gpu/drm/rockchip/dw_hdmi_qp-rockchip.c68
-rw-r--r--drivers/gpu/drm/rockchip/inno_hdmi.c11
-rw-r--r--drivers/gpu/drm/rockchip/rockchip_drm_vop2.h1
-rw-r--r--drivers/gpu/drm/rockchip/rockchip_lvds.h21
-rw-r--r--drivers/gpu/drm/rockchip/rockchip_vop2_reg.c15
-rw-r--r--drivers/gpu/drm/tiny/Kconfig2
-rw-r--r--drivers/gpu/drm/tiny/pixpaper.c4
-rw-r--r--drivers/gpu/drm/tyr/Kconfig19
-rw-r--r--drivers/gpu/drm/tyr/Makefile3
-rw-r--r--drivers/gpu/drm/tyr/driver.rs205
-rw-r--r--drivers/gpu/drm/tyr/file.rs56
-rw-r--r--drivers/gpu/drm/tyr/gem.rs18
-rw-r--r--drivers/gpu/drm/tyr/gpu.rs219
-rw-r--r--drivers/gpu/drm/tyr/regs.rs108
-rw-r--r--drivers/gpu/drm/tyr/tyr.rs22
-rw-r--r--drivers/gpu/drm/v3d/v3d_drv.h2
-rw-r--r--drivers/gpu/drm/v3d/v3d_fence.c2
-rw-r--r--drivers/gpu/drm/v3d/v3d_gem.c1
-rw-r--r--drivers/gpu/drm/v3d/v3d_gemfs.c9
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c17
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_validation.c6
-rw-r--r--drivers/gpu/drm/xe/Kconfig2
-rw-r--r--drivers/gpu/drm/xe/Kconfig.debug1
-rw-r--r--drivers/gpu/drm/xe/Makefile6
-rw-r--r--drivers/gpu/drm/xe/abi/guc_actions_abi.h3
-rw-r--r--drivers/gpu/drm/xe/abi/guc_actions_slpc_abi.h5
-rw-r--r--drivers/gpu/drm/xe/abi/guc_klvs_abi.h25
-rw-r--r--drivers/gpu/drm/xe/compat-i915-headers/gem/i915_gem_stolen.h15
-rw-r--r--drivers/gpu/drm/xe/display/intel_bo.c91
-rw-r--r--drivers/gpu/drm/xe/display/intel_fbdev_fb.c18
-rw-r--r--drivers/gpu/drm/xe/display/xe_display.c33
-rw-r--r--drivers/gpu/drm/xe/display/xe_dsb_buffer.c10
-rw-r--r--drivers/gpu/drm/xe/display/xe_fb_pin.c73
-rw-r--r--drivers/gpu/drm/xe/display/xe_hdcp_gsc.c8
-rw-r--r--drivers/gpu/drm/xe/display/xe_panic.c80
-rw-r--r--drivers/gpu/drm/xe/display/xe_plane_initial.c4
-rw-r--r--drivers/gpu/drm/xe/regs/xe_gt_regs.h1
-rw-r--r--drivers/gpu/drm/xe/regs/xe_lrc_layout.h3
-rw-r--r--drivers/gpu/drm/xe/tests/xe_bo.c36
-rw-r--r--drivers/gpu/drm/xe/tests/xe_dma_buf.c16
-rw-r--r--drivers/gpu/drm/xe/tests/xe_guc_g2g_test.c776
-rw-r--r--drivers/gpu/drm/xe/tests/xe_live_test_mod.c2
-rw-r--r--drivers/gpu/drm/xe/tests/xe_migrate.c66
-rw-r--r--drivers/gpu/drm/xe/tests/xe_pci.c239
-rw-r--r--drivers/gpu/drm/xe/tests/xe_pci_test.h15
-rw-r--r--drivers/gpu/drm/xe/tests/xe_wa_test.c91
-rw-r--r--drivers/gpu/drm/xe/xe_bb.c4
-rw-r--r--drivers/gpu/drm/xe/xe_bo.c865
-rw-r--r--drivers/gpu/drm/xe/xe_bo.h78
-rw-r--r--drivers/gpu/drm/xe/xe_bo_evict.c4
-rw-r--r--drivers/gpu/drm/xe/xe_bo_types.h15
-rw-r--r--drivers/gpu/drm/xe/xe_configfs.c481
-rw-r--r--drivers/gpu/drm/xe/xe_configfs.h12
-rw-r--r--drivers/gpu/drm/xe/xe_debugfs.c66
-rw-r--r--drivers/gpu/drm/xe/xe_device.c30
-rw-r--r--drivers/gpu/drm/xe/xe_device_sysfs.c98
-rw-r--r--drivers/gpu/drm/xe/xe_device_types.h19
-rw-r--r--drivers/gpu/drm/xe/xe_dma_buf.c72
-rw-r--r--drivers/gpu/drm/xe/xe_eu_stall.c5
-rw-r--r--drivers/gpu/drm/xe/xe_exec.c31
-rw-r--r--drivers/gpu/drm/xe/xe_exec_queue.c22
-rw-r--r--drivers/gpu/drm/xe/xe_exec_queue_types.h8
-rw-r--r--drivers/gpu/drm/xe/xe_execlist.c25
-rw-r--r--drivers/gpu/drm/xe/xe_execlist_types.h2
-rw-r--r--drivers/gpu/drm/xe/xe_ggtt.c27
-rw-r--r--drivers/gpu/drm/xe/xe_ggtt.h5
-rw-r--r--drivers/gpu/drm/xe/xe_gsc.c8
-rw-r--r--drivers/gpu/drm/xe/xe_gt.c2
-rw-r--r--drivers/gpu/drm/xe/xe_gt_debugfs.c73
-rw-r--r--drivers/gpu/drm/xe/xe_gt_freq.c28
-rw-r--r--drivers/gpu/drm/xe/xe_gt_mcr.c2
-rw-r--r--drivers/gpu/drm/xe/xe_gt_mcr.h3
-rw-r--r--drivers/gpu/drm/xe/xe_gt_pagefault.c28
-rw-r--r--drivers/gpu/drm/xe/xe_gt_printk.h32
-rw-r--r--drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c22
-rw-r--r--drivers/gpu/drm/xe/xe_gt_sriov_pf_migration.c24
-rw-r--r--drivers/gpu/drm/xe/xe_gt_stats.c57
-rw-r--r--drivers/gpu/drm/xe/xe_gt_stats.h1
-rw-r--r--drivers/gpu/drm/xe/xe_gt_stats_types.h33
-rw-r--r--drivers/gpu/drm/xe/xe_gt_topology.c48
-rw-r--r--drivers/gpu/drm/xe/xe_gt_topology.h4
-rw-r--r--drivers/gpu/drm/xe/xe_guc.c52
-rw-r--r--drivers/gpu/drm/xe/xe_guc.h4
-rw-r--r--drivers/gpu/drm/xe/xe_guc_ads.c4
-rw-r--r--drivers/gpu/drm/xe/xe_guc_ct.c55
-rw-r--r--drivers/gpu/drm/xe/xe_guc_ct.h1
-rw-r--r--drivers/gpu/drm/xe/xe_guc_engine_activity.c13
-rw-r--r--drivers/gpu/drm/xe/xe_guc_exec_queue_types.h4
-rw-r--r--drivers/gpu/drm/xe/xe_guc_fwif.h30
-rw-r--r--drivers/gpu/drm/xe/xe_guc_log.h2
-rw-r--r--drivers/gpu/drm/xe/xe_guc_pc.c67
-rw-r--r--drivers/gpu/drm/xe/xe_guc_pc.h2
-rw-r--r--drivers/gpu/drm/xe/xe_guc_pc_types.h2
-rw-r--r--drivers/gpu/drm/xe/xe_guc_submit.c140
-rw-r--r--drivers/gpu/drm/xe/xe_guc_submit.h4
-rw-r--r--drivers/gpu/drm/xe/xe_hmm.c325
-rw-r--r--drivers/gpu/drm/xe/xe_hmm.h18
-rw-r--r--drivers/gpu/drm/xe/xe_hw_engine_group.c6
-rw-r--r--drivers/gpu/drm/xe/xe_hwmon.c45
-rw-r--r--drivers/gpu/drm/xe/xe_i2c.c2
-rw-r--r--drivers/gpu/drm/xe/xe_late_bind_fw.c464
-rw-r--r--drivers/gpu/drm/xe/xe_late_bind_fw.h17
-rw-r--r--drivers/gpu/drm/xe/xe_late_bind_fw_types.h75
-rw-r--r--drivers/gpu/drm/xe/xe_lmtt.c12
-rw-r--r--drivers/gpu/drm/xe/xe_lrc.c90
-rw-r--r--drivers/gpu/drm/xe/xe_migrate.c47
-rw-r--r--drivers/gpu/drm/xe/xe_nvm.c5
-rw-r--r--drivers/gpu/drm/xe/xe_oa.c6
-rw-r--r--drivers/gpu/drm/xe/xe_pci.c40
-rw-r--r--drivers/gpu/drm/xe/xe_pci_types.h1
-rw-r--r--drivers/gpu/drm/xe/xe_pm.c14
-rw-r--r--drivers/gpu/drm/xe/xe_printk.h129
-rw-r--r--drivers/gpu/drm/xe/xe_psmi.c24
-rw-r--r--drivers/gpu/drm/xe/xe_pt.c157
-rw-r--r--drivers/gpu/drm/xe/xe_pt.h3
-rw-r--r--drivers/gpu/drm/xe/xe_pt_types.h2
-rw-r--r--drivers/gpu/drm/xe/xe_pxp.c1
-rw-r--r--drivers/gpu/drm/xe/xe_pxp_submit.c34
-rw-r--r--drivers/gpu/drm/xe/xe_query.c20
-rw-r--r--drivers/gpu/drm/xe/xe_rtp.c6
-rw-r--r--drivers/gpu/drm/xe/xe_rtp.h3
-rw-r--r--drivers/gpu/drm/xe/xe_sriov.c14
-rw-r--r--drivers/gpu/drm/xe/xe_sriov.h2
-rw-r--r--drivers/gpu/drm/xe/xe_sriov_vf.c115
-rw-r--r--drivers/gpu/drm/xe/xe_sriov_vf.h6
-rw-r--r--drivers/gpu/drm/xe/xe_sriov_vf_ccs.c75
-rw-r--r--drivers/gpu/drm/xe/xe_sriov_vf_ccs.h17
-rw-r--r--drivers/gpu/drm/xe/xe_sriov_vf_ccs_types.h44
-rw-r--r--drivers/gpu/drm/xe/xe_sriov_vf_types.h12
-rw-r--r--drivers/gpu/drm/xe/xe_survivability_mode.c11
-rw-r--r--drivers/gpu/drm/xe/xe_svm.c371
-rw-r--r--drivers/gpu/drm/xe/xe_svm.h75
-rw-r--r--drivers/gpu/drm/xe/xe_tile_debugfs.c135
-rw-r--r--drivers/gpu/drm/xe/xe_tile_debugfs.h13
-rw-r--r--drivers/gpu/drm/xe/xe_tile_printk.h127
-rw-r--r--drivers/gpu/drm/xe/xe_tile_sysfs.c12
-rw-r--r--drivers/gpu/drm/xe/xe_tlb_inval.c3
-rw-r--r--drivers/gpu/drm/xe/xe_uc_fw.c29
-rw-r--r--drivers/gpu/drm/xe/xe_uc_fw_abi.h130
-rw-r--r--drivers/gpu/drm/xe/xe_uc_fw_types.h3
-rw-r--r--drivers/gpu/drm/xe/xe_userptr.c320
-rw-r--r--drivers/gpu/drm/xe/xe_userptr.h107
-rw-r--r--drivers/gpu/drm/xe/xe_validation.c278
-rw-r--r--drivers/gpu/drm/xe/xe_validation.h192
-rw-r--r--drivers/gpu/drm/xe/xe_vm.c608
-rw-r--r--drivers/gpu/drm/xe/xe_vm.h55
-rw-r--r--drivers/gpu/drm/xe/xe_vm_madvise.c40
-rw-r--r--drivers/gpu/drm/xe/xe_vm_types.h100
-rw-r--r--drivers/gpu/drm/xe/xe_wa.c63
-rw-r--r--drivers/gpu/drm/xe/xe_wa_oob.rules1
-rw-r--r--drivers/gpu/nova-core/driver.rs46
-rw-r--r--drivers/gpu/nova-core/falcon.rs113
-rw-r--r--drivers/gpu/nova-core/falcon/gsp.rs16
-rw-r--r--drivers/gpu/nova-core/falcon/hal.rs2
-rw-r--r--drivers/gpu/nova-core/falcon/hal/ga102.rs47
-rw-r--r--drivers/gpu/nova-core/falcon/sec2.rs13
-rw-r--r--drivers/gpu/nova-core/fb.rs8
-rw-r--r--drivers/gpu/nova-core/firmware.rs113
-rw-r--r--drivers/gpu/nova-core/firmware/booter.rs375
-rw-r--r--drivers/gpu/nova-core/firmware/fwsec.rs17
-rw-r--r--drivers/gpu/nova-core/firmware/gsp.rs243
-rw-r--r--drivers/gpu/nova-core/firmware/riscv.rs91
-rw-r--r--drivers/gpu/nova-core/gpu.rs209
-rw-r--r--drivers/gpu/nova-core/gsp.rs22
-rw-r--r--drivers/gpu/nova-core/gsp/boot.rs137
-rw-r--r--drivers/gpu/nova-core/gsp/fw.rs7
-rw-r--r--drivers/gpu/nova-core/gsp/fw/r570_144.rs29
-rw-r--r--drivers/gpu/nova-core/gsp/fw/r570_144/bindings.rs1
-rw-r--r--drivers/gpu/nova-core/nova_core.rs1
-rw-r--r--drivers/gpu/nova-core/regs.rs84
-rw-r--r--drivers/gpu/nova-core/regs/macros.rs751
-rw-r--r--drivers/gpu/nova-core/util.rs20
-rw-r--r--drivers/gpu/nova-core/vbios.rs180
578 files changed, 14926 insertions, 10249 deletions
diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig
index f7ea8e895c0c..7e6bc0b3a589 100644
--- a/drivers/gpu/drm/Kconfig
+++ b/drivers/gpu/drm/Kconfig
@@ -396,9 +396,11 @@ source "drivers/gpu/drm/sprd/Kconfig"
source "drivers/gpu/drm/imagination/Kconfig"
+source "drivers/gpu/drm/tyr/Kconfig"
+
config DRM_HYPERV
tristate "DRM Support for Hyper-V synthetic video device"
- depends on DRM && PCI && HYPERV
+ depends on DRM && PCI && HYPERV_VMBUS
select DRM_CLIENT_SELECTION
select DRM_KMS_HELPER
select DRM_GEM_SHMEM_HELPER
diff --git a/drivers/gpu/drm/Makefile b/drivers/gpu/drm/Makefile
index 5ba4ffdb8055..c2672f369aed 100644
--- a/drivers/gpu/drm/Makefile
+++ b/drivers/gpu/drm/Makefile
@@ -221,6 +221,7 @@ obj-$(CONFIG_DRM_VBOXVIDEO) += vboxvideo/
obj-$(CONFIG_DRM_LIMA) += lima/
obj-$(CONFIG_DRM_PANFROST) += panfrost/
obj-$(CONFIG_DRM_PANTHOR) += panthor/
+obj-$(CONFIG_DRM_TYR) += tyr/
obj-$(CONFIG_DRM_ASPEED_GFX) += aspeed/
obj-$(CONFIG_DRM_MCDE) += mcde/
obj-$(CONFIG_DRM_TIDSS) += tidss/
diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile
index 2d0fea87af79..64e7acff8f18 100644
--- a/drivers/gpu/drm/amd/amdgpu/Makefile
+++ b/drivers/gpu/drm/amd/amdgpu/Makefile
@@ -138,7 +138,6 @@ amdgpu-y += \
# add DCE block
amdgpu-y += \
dce_v10_0.o \
- dce_v11_0.o \
amdgpu_vkms.o
# add GFX block
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 17848ce65d1f..2a0df4cabb99 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -63,6 +63,7 @@
#include "kgd_pp_interface.h"
#include "amd_shared.h"
+#include "amdgpu_utils.h"
#include "amdgpu_mode.h"
#include "amdgpu_ih.h"
#include "amdgpu_irq.h"
@@ -434,7 +435,6 @@ struct amdgpu_clock {
uint32_t default_mclk;
uint32_t default_sclk;
uint32_t default_dispclk;
- uint32_t current_dispclk;
uint32_t dp_extclk;
uint32_t max_pixel_clock;
};
@@ -545,7 +545,7 @@ struct amdgpu_wb {
* this value can be accessed directly by using the offset as an index.
* For the GPU address, it is necessary to use gpu_addr and the offset.
*/
- volatile uint32_t *wb;
+ uint32_t *wb;
/**
* @gpu_addr:
@@ -721,7 +721,7 @@ int amdgpu_cs_wait_fences_ioctl(struct drm_device *dev, void *data,
/* VRAM scratch page for HDP bug, default vram page */
struct amdgpu_mem_scratch {
struct amdgpu_bo *robj;
- volatile uint32_t *ptr;
+ uint32_t *ptr;
u64 gpu_addr;
};
@@ -752,6 +752,7 @@ typedef void (*amdgpu_block_wreg_t)(struct amdgpu_device*, uint32_t, uint32_t, u
struct amdgpu_mmio_remap {
u32 reg_offset;
resource_size_t bus_addr;
+ struct amdgpu_bo *bo;
};
/* Define the HW IP blocks will be used in driver , add more if necessary */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
index fbe7616555c8..a2879d2b7c8e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
@@ -250,16 +250,24 @@ void amdgpu_amdkfd_interrupt(struct amdgpu_device *adev,
void amdgpu_amdkfd_suspend(struct amdgpu_device *adev, bool suspend_proc)
{
- if (adev->kfd.dev)
- kgd2kfd_suspend(adev->kfd.dev, suspend_proc);
+ if (adev->kfd.dev) {
+ if (adev->in_s0ix)
+ kgd2kfd_stop_sched_all_nodes(adev->kfd.dev);
+ else
+ kgd2kfd_suspend(adev->kfd.dev, suspend_proc);
+ }
}
int amdgpu_amdkfd_resume(struct amdgpu_device *adev, bool resume_proc)
{
int r = 0;
- if (adev->kfd.dev)
- r = kgd2kfd_resume(adev->kfd.dev, resume_proc);
+ if (adev->kfd.dev) {
+ if (adev->in_s0ix)
+ r = kgd2kfd_start_sched_all_nodes(adev->kfd.dev);
+ else
+ r = kgd2kfd_resume(adev->kfd.dev, resume_proc);
+ }
return r;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
index 127927b16ee2..9e120c934cc1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
@@ -428,7 +428,9 @@ void kgd2kfd_smi_event_throttle(struct kfd_dev *kfd, uint64_t throttle_bitmask);
int kgd2kfd_check_and_lock_kfd(struct kfd_dev *kfd);
void kgd2kfd_unlock_kfd(struct kfd_dev *kfd);
int kgd2kfd_start_sched(struct kfd_dev *kfd, uint32_t node_id);
+int kgd2kfd_start_sched_all_nodes(struct kfd_dev *kfd);
int kgd2kfd_stop_sched(struct kfd_dev *kfd, uint32_t node_id);
+int kgd2kfd_stop_sched_all_nodes(struct kfd_dev *kfd);
bool kgd2kfd_compute_active(struct kfd_dev *kfd, uint32_t node_id);
bool kgd2kfd_vmfault_fast_path(struct amdgpu_device *adev, struct amdgpu_iv_entry *entry,
bool retry_fault);
@@ -518,11 +520,21 @@ static inline int kgd2kfd_start_sched(struct kfd_dev *kfd, uint32_t node_id)
return 0;
}
+static inline int kgd2kfd_start_sched_all_nodes(struct kfd_dev *kfd)
+{
+ return 0;
+}
+
static inline int kgd2kfd_stop_sched(struct kfd_dev *kfd, uint32_t node_id)
{
return 0;
}
+static inline int kgd2kfd_stop_sched_all_nodes(struct kfd_dev *kfd)
+{
+ return 0;
+}
+
static inline bool kgd2kfd_compute_active(struct kfd_dev *kfd, uint32_t node_id)
{
return false;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
index 04ef0ca10541..0239114fb6c4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
@@ -352,7 +352,7 @@ static int kgd_hqd_dump(struct amdgpu_device *adev,
(*dump)[i++][1] = RREG32_SOC15_IP(GC, addr); \
} while (0)
- *dump = kmalloc(HQD_N_REGS*2*sizeof(uint32_t), GFP_KERNEL);
+ *dump = kmalloc_array(HQD_N_REGS, sizeof(**dump), GFP_KERNEL);
if (*dump == NULL)
return -ENOMEM;
@@ -449,7 +449,7 @@ static int kgd_hqd_sdma_dump(struct amdgpu_device *adev,
#undef HQD_N_REGS
#define HQD_N_REGS (19+6+7+10)
- *dump = kmalloc(HQD_N_REGS*2*sizeof(uint32_t), GFP_KERNEL);
+ *dump = kmalloc_array(HQD_N_REGS, sizeof(**dump), GFP_KERNEL);
if (*dump == NULL)
return -ENOMEM;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10_3.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10_3.c
index 6d08bc2781a3..f2278a0937ff 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10_3.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10_3.c
@@ -338,7 +338,7 @@ static int hqd_dump_v10_3(struct amdgpu_device *adev,
(*dump)[i++][1] = RREG32_SOC15_IP(GC, addr); \
} while (0)
- *dump = kmalloc(HQD_N_REGS*2*sizeof(uint32_t), GFP_KERNEL);
+ *dump = kmalloc_array(HQD_N_REGS, sizeof(**dump), GFP_KERNEL);
if (*dump == NULL)
return -ENOMEM;
@@ -435,7 +435,7 @@ static int hqd_sdma_dump_v10_3(struct amdgpu_device *adev,
#undef HQD_N_REGS
#define HQD_N_REGS (19+6+7+12)
- *dump = kmalloc(HQD_N_REGS*2*sizeof(uint32_t), GFP_KERNEL);
+ *dump = kmalloc_array(HQD_N_REGS, sizeof(**dump), GFP_KERNEL);
if (*dump == NULL)
return -ENOMEM;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v11.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v11.c
index e0e6a6a49d90..aaccf0b9947d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v11.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v11.c
@@ -323,7 +323,7 @@ static int hqd_dump_v11(struct amdgpu_device *adev,
(*dump)[i++][1] = RREG32(addr); \
} while (0)
- *dump = kmalloc(HQD_N_REGS*2*sizeof(uint32_t), GFP_KERNEL);
+ *dump = kmalloc_array(HQD_N_REGS, sizeof(**dump), GFP_KERNEL);
if (*dump == NULL)
return -ENOMEM;
@@ -420,7 +420,7 @@ static int hqd_sdma_dump_v11(struct amdgpu_device *adev,
#undef HQD_N_REGS
#define HQD_N_REGS (7+11+1+12+12)
- *dump = kmalloc(HQD_N_REGS*2*sizeof(uint32_t), GFP_KERNEL);
+ *dump = kmalloc_array(HQD_N_REGS, sizeof(**dump), GFP_KERNEL);
if (*dump == NULL)
return -ENOMEM;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v12.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v12.c
index 6f0dc23c901b..e0ceab400b2d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v12.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v12.c
@@ -115,7 +115,7 @@ static int hqd_dump_v12(struct amdgpu_device *adev,
(*dump)[i++][1] = RREG32(addr); \
} while (0)
- *dump = kmalloc(HQD_N_REGS*2*sizeof(uint32_t), GFP_KERNEL);
+ *dump = kmalloc_array(HQD_N_REGS, sizeof(**dump), GFP_KERNEL);
if (*dump == NULL)
return -ENOMEM;
@@ -146,7 +146,7 @@ static int hqd_sdma_dump_v12(struct amdgpu_device *adev,
#undef HQD_N_REGS
#define HQD_N_REGS (last_reg - first_reg + 1)
- *dump = kmalloc(HQD_N_REGS*2*sizeof(uint32_t), GFP_KERNEL);
+ *dump = kmalloc_array(HQD_N_REGS, sizeof(**dump), GFP_KERNEL);
if (*dump == NULL)
return -ENOMEM;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index c3b34a410375..83020963dfde 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -1089,7 +1089,7 @@ static int init_user_pages(struct kgd_mem *mem, uint64_t user_addr,
return 0;
}
- ret = amdgpu_ttm_tt_get_user_pages(bo, bo->tbo.ttm->pages, &range);
+ ret = amdgpu_ttm_tt_get_user_pages(bo, &range);
if (ret) {
if (ret == -EAGAIN)
pr_debug("Failed to get user pages, try again\n");
@@ -1103,6 +1103,9 @@ static int init_user_pages(struct kgd_mem *mem, uint64_t user_addr,
pr_err("%s: Failed to reserve BO\n", __func__);
goto release_out;
}
+
+ amdgpu_ttm_tt_set_user_pages(bo->tbo.ttm, range);
+
amdgpu_bo_placement_from_domain(bo, mem->domain);
ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
if (ret)
@@ -2565,8 +2568,7 @@ static int update_invalid_user_pages(struct amdkfd_process_info *process_info,
}
/* Get updated user pages */
- ret = amdgpu_ttm_tt_get_user_pages(bo, bo->tbo.ttm->pages,
- &mem->range);
+ ret = amdgpu_ttm_tt_get_user_pages(bo, &mem->range);
if (ret) {
pr_debug("Failed %d to get user pages\n", ret);
@@ -2584,17 +2586,24 @@ static int update_invalid_user_pages(struct amdkfd_process_info *process_info,
* from the KFD, trigger a segmentation fault in VM debug mode.
*/
if (amdgpu_ttm_adev(bo->tbo.bdev)->debug_vm_userptr) {
+ struct kfd_process *p;
+
pr_err("Pid %d unmapped memory before destroying userptr at GPU addr 0x%llx\n",
pid_nr(process_info->pid), mem->va);
// Send GPU VM fault to user space
- kfd_signal_vm_fault_event_with_userptr(kfd_lookup_process_by_pid(process_info->pid),
- mem->va);
+ p = kfd_lookup_process_by_pid(process_info->pid);
+ if (p) {
+ kfd_signal_vm_fault_event_with_userptr(p, mem->va);
+ kfd_unref_process(p);
+ }
}
ret = 0;
}
+ amdgpu_ttm_tt_set_user_pages(bo->tbo.ttm, mem->range);
+
mutex_lock(&process_info->notifier_lock);
/* Mark the BO as valid unless it was invalidated
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c
index 9dfdc08cc887..763f2b8dcf13 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c
@@ -706,7 +706,6 @@ int amdgpu_atombios_get_clock_info(struct amdgpu_device *adev)
}
adev->clock.dp_extclk =
le16_to_cpu(firmware_info->info_21.usUniphyDPModeExtClkFreq);
- adev->clock.current_dispclk = adev->clock.default_dispclk;
adev->clock.max_pixel_clock = le16_to_cpu(firmware_info->info.usMaxPixelClock);
if (adev->clock.max_pixel_clock == 0)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
index 702f6610d024..66fb37b64388 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
@@ -184,43 +184,36 @@ void amdgpu_bo_list_put(struct amdgpu_bo_list *list)
int amdgpu_bo_create_list_entry_array(struct drm_amdgpu_bo_list_in *in,
struct drm_amdgpu_bo_list_entry **info_param)
{
- const void __user *uptr = u64_to_user_ptr(in->bo_info_ptr);
const uint32_t info_size = sizeof(struct drm_amdgpu_bo_list_entry);
+ const void __user *uptr = u64_to_user_ptr(in->bo_info_ptr);
+ const uint32_t bo_info_size = in->bo_info_size;
+ const uint32_t bo_number = in->bo_number;
struct drm_amdgpu_bo_list_entry *info;
- int r;
-
- info = kvmalloc_array(in->bo_number, info_size, GFP_KERNEL);
- if (!info)
- return -ENOMEM;
/* copy the handle array from userspace to a kernel buffer */
- r = -EFAULT;
- if (likely(info_size == in->bo_info_size)) {
- unsigned long bytes = in->bo_number *
- in->bo_info_size;
-
- if (copy_from_user(info, uptr, bytes))
- goto error_free;
-
+ if (likely(info_size == bo_info_size)) {
+ info = vmemdup_array_user(uptr, bo_number, info_size);
+ if (IS_ERR(info))
+ return PTR_ERR(info);
} else {
- unsigned long bytes = min(in->bo_info_size, info_size);
+ const uint32_t bytes = min(bo_info_size, info_size);
unsigned i;
- memset(info, 0, in->bo_number * info_size);
- for (i = 0; i < in->bo_number; ++i) {
- if (copy_from_user(&info[i], uptr, bytes))
- goto error_free;
+ info = kvmalloc_array(bo_number, info_size, GFP_KERNEL);
+ if (!info)
+ return -ENOMEM;
- uptr += in->bo_info_size;
+ memset(info, 0, bo_number * info_size);
+ for (i = 0; i < bo_number; ++i, uptr += bo_info_size) {
+ if (copy_from_user(&info[i], uptr, bytes)) {
+ kvfree(info);
+ return -EFAULT;
+ }
}
}
*info_param = info;
return 0;
-
-error_free:
- kvfree(info);
- return r;
}
int amdgpu_bo_list_ioctl(struct drm_device *dev, void *data,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h
index 555cd6d877c3..a716c9886c74 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h
@@ -38,7 +38,6 @@ struct amdgpu_bo_list_entry {
struct amdgpu_bo *bo;
struct amdgpu_bo_va *bo_va;
uint32_t priority;
- struct page **user_pages;
struct hmm_range *range;
bool user_invalidated;
};
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c
index bf38fc69c1cf..47e9bfba0642 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c
@@ -398,30 +398,28 @@ static void amdgpu_connector_add_common_modes(struct drm_encoder *encoder,
struct drm_display_mode *mode = NULL;
struct drm_display_mode *native_mode = &amdgpu_encoder->native_mode;
int i;
- static const struct mode_size {
+ int n;
+ struct mode_size {
+ char name[DRM_DISPLAY_MODE_LEN];
int w;
int h;
- } common_modes[17] = {
- { 640, 480},
- { 720, 480},
- { 800, 600},
- { 848, 480},
- {1024, 768},
- {1152, 768},
- {1280, 720},
- {1280, 800},
- {1280, 854},
- {1280, 960},
- {1280, 1024},
- {1440, 900},
- {1400, 1050},
- {1680, 1050},
- {1600, 1200},
- {1920, 1080},
- {1920, 1200}
+ } common_modes[] = {
+ { "640x480", 640, 480},
+ { "800x600", 800, 600},
+ { "1024x768", 1024, 768},
+ { "1280x720", 1280, 720},
+ { "1280x800", 1280, 800},
+ {"1280x1024", 1280, 1024},
+ { "1440x900", 1440, 900},
+ {"1680x1050", 1680, 1050},
+ {"1600x1200", 1600, 1200},
+ {"1920x1080", 1920, 1080},
+ {"1920x1200", 1920, 1200}
};
- for (i = 0; i < 17; i++) {
+ n = ARRAY_SIZE(common_modes);
+
+ for (i = 0; i < n; i++) {
if (amdgpu_encoder->devices & (ATOM_DEVICE_TV_SUPPORT)) {
if (common_modes[i].w > 1024 ||
common_modes[i].h > 768)
@@ -434,12 +432,11 @@ static void amdgpu_connector_add_common_modes(struct drm_encoder *encoder,
common_modes[i].h == native_mode->vdisplay))
continue;
}
- if (common_modes[i].w < 320 || common_modes[i].h < 200)
- continue;
mode = drm_cvt_mode(dev, common_modes[i].w, common_modes[i].h, 60, false, false, false);
if (!mode)
return;
+ strscpy(mode->name, common_modes[i].name, DRM_DISPLAY_MODE_LEN);
drm_mode_probed_add(connector, mode);
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index 2ac9729e4c86..9cd7741d2254 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -29,6 +29,7 @@
#include <linux/pagemap.h>
#include <linux/sync_file.h>
#include <linux/dma-buf.h>
+#include <linux/hmm.h>
#include <drm/amdgpu_drm.h>
#include <drm/drm_syncobj.h>
@@ -178,25 +179,17 @@ static int amdgpu_cs_pass1(struct amdgpu_cs_parser *p,
struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
unsigned int num_ibs[AMDGPU_CS_GANG_SIZE] = { };
struct amdgpu_vm *vm = &fpriv->vm;
- uint64_t *chunk_array_user;
uint64_t *chunk_array;
uint32_t uf_offset = 0;
size_t size;
int ret;
int i;
- chunk_array = kvmalloc_array(cs->in.num_chunks, sizeof(uint64_t),
- GFP_KERNEL);
- if (!chunk_array)
- return -ENOMEM;
-
- /* get chunks */
- chunk_array_user = u64_to_user_ptr(cs->in.chunks);
- if (copy_from_user(chunk_array, chunk_array_user,
- sizeof(uint64_t)*cs->in.num_chunks)) {
- ret = -EFAULT;
- goto free_chunk;
- }
+ chunk_array = memdup_array_user(u64_to_user_ptr(cs->in.chunks),
+ cs->in.num_chunks,
+ sizeof(uint64_t));
+ if (IS_ERR(chunk_array))
+ return PTR_ERR(chunk_array);
p->nchunks = cs->in.num_chunks;
p->chunks = kvmalloc_array(p->nchunks, sizeof(struct amdgpu_cs_chunk),
@@ -209,7 +202,6 @@ static int amdgpu_cs_pass1(struct amdgpu_cs_parser *p,
for (i = 0; i < p->nchunks; i++) {
struct drm_amdgpu_cs_chunk __user *chunk_ptr = NULL;
struct drm_amdgpu_cs_chunk user_chunk;
- uint32_t __user *cdata;
chunk_ptr = u64_to_user_ptr(chunk_array[i]);
if (copy_from_user(&user_chunk, chunk_ptr,
@@ -222,20 +214,16 @@ static int amdgpu_cs_pass1(struct amdgpu_cs_parser *p,
p->chunks[i].length_dw = user_chunk.length_dw;
size = p->chunks[i].length_dw;
- cdata = u64_to_user_ptr(user_chunk.chunk_data);
- p->chunks[i].kdata = kvmalloc_array(size, sizeof(uint32_t),
- GFP_KERNEL);
- if (p->chunks[i].kdata == NULL) {
- ret = -ENOMEM;
+ p->chunks[i].kdata = vmemdup_array_user(u64_to_user_ptr(user_chunk.chunk_data),
+ size,
+ sizeof(uint32_t));
+ if (IS_ERR(p->chunks[i].kdata)) {
+ ret = PTR_ERR(p->chunks[i].kdata);
i--;
goto free_partial_kdata;
}
size *= sizeof(uint32_t);
- if (copy_from_user(p->chunks[i].kdata, cdata, size)) {
- ret = -EFAULT;
- goto free_partial_kdata;
- }
/* Assume the worst on the following checks */
ret = -EINVAL;
@@ -286,7 +274,7 @@ static int amdgpu_cs_pass1(struct amdgpu_cs_parser *p,
}
}
- if (!p->gang_size) {
+ if (!p->gang_size || (amdgpu_sriov_vf(p->adev) && p->gang_size > 1)) {
ret = -EINVAL;
goto free_all_kdata;
}
@@ -896,26 +884,13 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) {
bool userpage_invalidated = false;
struct amdgpu_bo *bo = e->bo;
- int i;
-
- e->user_pages = kvcalloc(bo->tbo.ttm->num_pages,
- sizeof(struct page *),
- GFP_KERNEL);
- if (!e->user_pages) {
- drm_err(adev_to_drm(p->adev), "kvmalloc_array failure\n");
- r = -ENOMEM;
- goto out_free_user_pages;
- }
- r = amdgpu_ttm_tt_get_user_pages(bo, e->user_pages, &e->range);
- if (r) {
- kvfree(e->user_pages);
- e->user_pages = NULL;
+ r = amdgpu_ttm_tt_get_user_pages(bo, &e->range);
+ if (r)
goto out_free_user_pages;
- }
for (i = 0; i < bo->tbo.ttm->num_pages; i++) {
- if (bo->tbo.ttm->pages[i] != e->user_pages[i]) {
+ if (bo->tbo.ttm->pages[i] != hmm_pfn_to_page(e->range->hmm_pfns[i])) {
userpage_invalidated = true;
break;
}
@@ -959,7 +934,7 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
}
if (amdgpu_ttm_tt_is_userptr(e->bo->tbo.ttm) &&
- e->user_invalidated && e->user_pages) {
+ e->user_invalidated) {
amdgpu_bo_placement_from_domain(e->bo,
AMDGPU_GEM_DOMAIN_CPU);
r = ttm_bo_validate(&e->bo->tbo, &e->bo->placement,
@@ -968,11 +943,8 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
goto out_free_user_pages;
amdgpu_ttm_tt_set_user_pages(e->bo->tbo.ttm,
- e->user_pages);
+ e->range);
}
-
- kvfree(e->user_pages);
- e->user_pages = NULL;
}
amdgpu_cs_get_threshold_for_moves(p->adev, &p->bytes_moved_threshold,
@@ -1014,11 +986,7 @@ out_free_user_pages:
amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) {
struct amdgpu_bo *bo = e->bo;
- if (!e->user_pages)
- continue;
amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm, e->range);
- kvfree(e->user_pages);
- e->user_pages = NULL;
e->range = NULL;
}
mutex_unlock(&p->bo_list->bo_list_mutex);
@@ -1767,30 +1735,21 @@ int amdgpu_cs_wait_fences_ioctl(struct drm_device *dev, void *data,
{
struct amdgpu_device *adev = drm_to_adev(dev);
union drm_amdgpu_wait_fences *wait = data;
- uint32_t fence_count = wait->in.fence_count;
- struct drm_amdgpu_fence *fences_user;
struct drm_amdgpu_fence *fences;
int r;
/* Get the fences from userspace */
- fences = kmalloc_array(fence_count, sizeof(struct drm_amdgpu_fence),
- GFP_KERNEL);
- if (fences == NULL)
- return -ENOMEM;
-
- fences_user = u64_to_user_ptr(wait->in.fences);
- if (copy_from_user(fences, fences_user,
- sizeof(struct drm_amdgpu_fence) * fence_count)) {
- r = -EFAULT;
- goto err_free_fences;
- }
+ fences = memdup_array_user(u64_to_user_ptr(wait->in.fences),
+ wait->in.fence_count,
+ sizeof(struct drm_amdgpu_fence));
+ if (IS_ERR(fences))
+ return PTR_ERR(fences);
if (wait->in.wait_all)
r = amdgpu_cs_wait_all_fences(adev, filp, wait, fences);
else
r = amdgpu_cs_wait_any_fence(adev, filp, wait, fences);
-err_free_fences:
kfree(fences);
return r;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index bdfb80377e6a..7a899fb4de29 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -5072,6 +5072,10 @@ static int amdgpu_device_evict_resources(struct amdgpu_device *adev)
if (!adev->in_s4 && (adev->flags & AMD_IS_APU))
return 0;
+ /* No need to evict when going to S5 through S4 callbacks */
+ if (system_state == SYSTEM_POWER_OFF)
+ return 0;
+
ret = amdgpu_ttm_evict_resources(adev, TTM_PL_VRAM);
if (ret) {
dev_warn(adev->dev, "evicting device resources failed\n");
@@ -5196,7 +5200,7 @@ int amdgpu_device_suspend(struct drm_device *dev, bool notify_clients)
adev->in_suspend = true;
if (amdgpu_sriov_vf(adev)) {
- if (!adev->in_s0ix && !adev->in_runpm)
+ if (!adev->in_runpm)
amdgpu_amdkfd_suspend_process(adev);
amdgpu_virt_fini_data_exchange(adev);
r = amdgpu_virt_request_full_gpu(adev, false);
@@ -5216,10 +5220,8 @@ int amdgpu_device_suspend(struct drm_device *dev, bool notify_clients)
amdgpu_device_ip_suspend_phase1(adev);
- if (!adev->in_s0ix) {
- amdgpu_amdkfd_suspend(adev, !amdgpu_sriov_vf(adev) && !adev->in_runpm);
- amdgpu_userq_suspend(adev);
- }
+ amdgpu_amdkfd_suspend(adev, !amdgpu_sriov_vf(adev) && !adev->in_runpm);
+ amdgpu_userq_suspend(adev);
r = amdgpu_device_evict_resources(adev);
if (r)
@@ -5314,15 +5316,13 @@ int amdgpu_device_resume(struct drm_device *dev, bool notify_clients)
goto exit;
}
- if (!adev->in_s0ix) {
- r = amdgpu_amdkfd_resume(adev, !amdgpu_sriov_vf(adev) && !adev->in_runpm);
- if (r)
- goto exit;
+ r = amdgpu_amdkfd_resume(adev, !amdgpu_sriov_vf(adev) && !adev->in_runpm);
+ if (r)
+ goto exit;
- r = amdgpu_userq_resume(adev);
- if (r)
- goto exit;
- }
+ r = amdgpu_userq_resume(adev);
+ if (r)
+ goto exit;
r = amdgpu_device_ip_late_init(adev);
if (r)
@@ -5335,7 +5335,7 @@ exit:
amdgpu_virt_init_data_exchange(adev);
amdgpu_virt_release_full_gpu(adev, true);
- if (!adev->in_s0ix && !r && !adev->in_runpm)
+ if (!r && !adev->in_runpm)
r = amdgpu_amdkfd_resume_process(adev);
}
@@ -6389,23 +6389,28 @@ static int amdgpu_device_sched_resume(struct list_head *device_list,
if (!drm_drv_uses_atomic_modeset(adev_to_drm(tmp_adev)) && !job_signaled)
drm_helper_resume_force_mode(adev_to_drm(tmp_adev));
- if (tmp_adev->asic_reset_res)
- r = tmp_adev->asic_reset_res;
-
- tmp_adev->asic_reset_res = 0;
-
- if (r) {
+ if (tmp_adev->asic_reset_res) {
/* bad news, how to tell it to userspace ?
* for ras error, we should report GPU bad status instead of
* reset failure
*/
if (reset_context->src != AMDGPU_RESET_SRC_RAS ||
!amdgpu_ras_eeprom_check_err_threshold(tmp_adev))
- dev_info(tmp_adev->dev, "GPU reset(%d) failed\n",
- atomic_read(&tmp_adev->gpu_reset_counter));
- amdgpu_vf_error_put(tmp_adev, AMDGIM_ERROR_VF_GPU_RESET_FAIL, 0, r);
+ dev_info(
+ tmp_adev->dev,
+ "GPU reset(%d) failed with error %d \n",
+ atomic_read(
+ &tmp_adev->gpu_reset_counter),
+ tmp_adev->asic_reset_res);
+ amdgpu_vf_error_put(tmp_adev,
+ AMDGIM_ERROR_VF_GPU_RESET_FAIL, 0,
+ tmp_adev->asic_reset_res);
+ if (!r)
+ r = tmp_adev->asic_reset_res;
+ tmp_adev->asic_reset_res = 0;
} else {
- dev_info(tmp_adev->dev, "GPU reset(%d) succeeded!\n", atomic_read(&tmp_adev->gpu_reset_counter));
+ dev_info(tmp_adev->dev, "GPU reset(%d) succeeded!\n",
+ atomic_read(&tmp_adev->gpu_reset_counter));
if (amdgpu_acpi_smart_shift_update(tmp_adev,
AMDGPU_SS_DEV_D0))
dev_warn(tmp_adev->dev,
@@ -6937,7 +6942,8 @@ pci_ers_result_t amdgpu_pci_error_detected(struct pci_dev *pdev, pci_channel_sta
{
struct drm_device *dev = pci_get_drvdata(pdev);
struct amdgpu_device *adev = drm_to_adev(dev);
- struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev);
+ struct amdgpu_hive_info *hive __free(xgmi_put_hive) =
+ amdgpu_get_xgmi_hive(adev);
struct amdgpu_reset_context reset_context;
struct list_head device_list;
@@ -6976,10 +6982,8 @@ pci_ers_result_t amdgpu_pci_error_detected(struct pci_dev *pdev, pci_channel_sta
amdgpu_device_recovery_get_reset_lock(adev, &device_list);
amdgpu_device_halt_activities(adev, NULL, &reset_context, &device_list,
hive, false);
- if (hive) {
+ if (hive)
mutex_unlock(&hive->hive_lock);
- amdgpu_put_xgmi_hive(hive);
- }
return PCI_ERS_RESULT_NEED_RESET;
case pci_channel_io_perm_failure:
/* Permanent error, prepare for device removal */
@@ -7158,28 +7162,35 @@ void amdgpu_pci_resume(struct pci_dev *pdev)
static void amdgpu_device_cache_switch_state(struct amdgpu_device *adev)
{
- struct pci_dev *parent = pci_upstream_bridge(adev->pdev);
+ struct pci_dev *swus, *swds;
int r;
- if (parent->vendor != PCI_VENDOR_ID_ATI)
+ swds = pci_upstream_bridge(adev->pdev);
+ if (!swds || swds->vendor != PCI_VENDOR_ID_ATI ||
+ pci_pcie_type(swds) != PCI_EXP_TYPE_DOWNSTREAM)
+ return;
+ swus = pci_upstream_bridge(swds);
+ if (!swus ||
+ (swus->vendor != PCI_VENDOR_ID_ATI &&
+ swus->vendor != PCI_VENDOR_ID_AMD) ||
+ pci_pcie_type(swus) != PCI_EXP_TYPE_UPSTREAM)
return;
/* If already saved, return */
if (adev->pcie_reset_ctx.swus)
return;
/* Upstream bridge is ATI, assume it's SWUS/DS architecture */
- r = pci_save_state(parent);
+ r = pci_save_state(swds);
if (r)
return;
- adev->pcie_reset_ctx.swds_pcistate = pci_store_saved_state(parent);
+ adev->pcie_reset_ctx.swds_pcistate = pci_store_saved_state(swds);
- parent = pci_upstream_bridge(parent);
- r = pci_save_state(parent);
+ r = pci_save_state(swus);
if (r)
return;
- adev->pcie_reset_ctx.swus_pcistate = pci_store_saved_state(parent);
+ adev->pcie_reset_ctx.swus_pcistate = pci_store_saved_state(swus);
- adev->pcie_reset_ctx.swus = parent;
+ adev->pcie_reset_ctx.swus = swus;
}
static void amdgpu_device_load_switch_state(struct amdgpu_device *adev)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index ece251cbe8c3..bff25ef3e2d0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -960,7 +960,7 @@ module_param_named(tmz, amdgpu_tmz, int, 0444);
*/
MODULE_PARM_DESC(
freesync_video,
- "Enable freesync modesetting optimization feature (0 = off (default), 1 = on)");
+ "Adds additional modes via VRR for refresh changes without a full modeset (0 = off (default), 1 = on)");
module_param_named(freesync_video, amdgpu_freesync_vid_mode, uint, 0444);
/**
@@ -2674,7 +2674,7 @@ static int amdgpu_pmops_thaw(struct device *dev)
struct drm_device *drm_dev = dev_get_drvdata(dev);
/* do not resume device if it's normal hibernation */
- if (!pm_hibernate_is_recovering())
+ if (!pm_hibernate_is_recovering() && !pm_hibernation_mode_is_suspend())
return 0;
return amdgpu_device_resume(drm_dev, true);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fdinfo.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fdinfo.c
index 91d638098889..b349bb3676d5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fdinfo.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fdinfo.c
@@ -70,6 +70,7 @@ void amdgpu_show_fdinfo(struct drm_printer *p, struct drm_file *file)
[AMDGPU_PL_GWS] = "gws",
[AMDGPU_PL_OA] = "oa",
[AMDGPU_PL_DOORBELL] = "doorbell",
+ [AMDGPU_PL_MMIO_REMAP] = "mmioremap",
};
unsigned int hw_ip, i;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
index 1c9b5009304e..094c508d3d44 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
@@ -458,6 +458,9 @@ int amdgpu_gem_create_ioctl(struct drm_device *dev, void *data,
/* always clear VRAM */
flags |= AMDGPU_GEM_CREATE_VRAM_CLEARED;
+ if (args->in.domains & AMDGPU_GEM_DOMAIN_MMIO_REMAP)
+ return -EINVAL;
+
/* create a gem object to contain this object in */
if (args->in.domains & (AMDGPU_GEM_DOMAIN_GDS |
AMDGPU_GEM_DOMAIN_GWS | AMDGPU_GEM_DOMAIN_OA)) {
@@ -569,8 +572,7 @@ int amdgpu_gem_userptr_ioctl(struct drm_device *dev, void *data,
goto release_object;
if (args->flags & AMDGPU_GEM_USERPTR_VALIDATE) {
- r = amdgpu_ttm_tt_get_user_pages(bo, bo->tbo.ttm->pages,
- &range);
+ r = amdgpu_ttm_tt_get_user_pages(bo, &range);
if (r)
goto release_object;
@@ -578,6 +580,8 @@ int amdgpu_gem_userptr_ioctl(struct drm_device *dev, void *data,
if (r)
goto user_pages_done;
+ amdgpu_ttm_tt_set_user_pages(bo->tbo.ttm, range);
+
amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_GTT);
r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
amdgpu_bo_unreserve(bo);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
index 98aa99b314c9..ebe2b4c68b0f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
@@ -1102,6 +1102,9 @@ uint32_t amdgpu_kiq_rreg(struct amdgpu_device *adev, uint32_t reg, uint32_t xcc_
might_sleep();
while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) {
+ if (amdgpu_in_reset(adev))
+ goto failed_kiq_read;
+
msleep(MAX_KIQ_REG_BAILOUT_INTERVAL);
r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
}
@@ -1171,6 +1174,8 @@ void amdgpu_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v, uint3
might_sleep();
while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) {
+ if (amdgpu_in_reset(adev))
+ goto failed_kiq_write;
msleep(MAX_KIQ_REG_BAILOUT_INTERVAL);
r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
@@ -2280,7 +2285,7 @@ void amdgpu_gfx_profile_ring_end_use(struct amdgpu_ring *ring)
* Return:
* return the latest index.
*/
-u32 amdgpu_gfx_csb_preamble_start(volatile u32 *buffer)
+u32 amdgpu_gfx_csb_preamble_start(u32 *buffer)
{
u32 count = 0;
@@ -2304,7 +2309,7 @@ u32 amdgpu_gfx_csb_preamble_start(volatile u32 *buffer)
* Return:
* return the latest index.
*/
-u32 amdgpu_gfx_csb_data_parser(struct amdgpu_device *adev, volatile u32 *buffer, u32 count)
+u32 amdgpu_gfx_csb_data_parser(struct amdgpu_device *adev, u32 *buffer, u32 count)
{
const struct cs_section_def *sect = NULL;
const struct cs_extent_def *ext = NULL;
@@ -2331,7 +2336,7 @@ u32 amdgpu_gfx_csb_data_parser(struct amdgpu_device *adev, volatile u32 *buffer,
* @buffer: This is an output variable that gets the PACKET3 preamble end.
* @count: Index to start set the preemble end.
*/
-void amdgpu_gfx_csb_preamble_end(volatile u32 *buffer, u32 count)
+void amdgpu_gfx_csb_preamble_end(u32 *buffer, u32 count)
{
buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
index 08f268dab8f5..fb5f7a0ee029 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
@@ -642,9 +642,9 @@ void amdgpu_gfx_enforce_isolation_ring_end_use(struct amdgpu_ring *ring);
void amdgpu_gfx_profile_idle_work_handler(struct work_struct *work);
void amdgpu_gfx_profile_ring_begin_use(struct amdgpu_ring *ring);
void amdgpu_gfx_profile_ring_end_use(struct amdgpu_ring *ring);
-u32 amdgpu_gfx_csb_preamble_start(volatile u32 *buffer);
-u32 amdgpu_gfx_csb_data_parser(struct amdgpu_device *adev, volatile u32 *buffer, u32 count);
-void amdgpu_gfx_csb_preamble_end(volatile u32 *buffer, u32 count);
+u32 amdgpu_gfx_csb_preamble_start(u32 *buffer);
+u32 amdgpu_gfx_csb_data_parser(struct amdgpu_device *adev, u32 *buffer, u32 count);
+void amdgpu_gfx_csb_preamble_end(u32 *buffer, u32 count);
void amdgpu_debugfs_gfx_sched_mask_init(struct amdgpu_device *adev);
void amdgpu_debugfs_compute_sched_mask_init(struct amdgpu_device *adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.c
index e36fede7f74c..2c6a6b858112 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.c
@@ -167,13 +167,12 @@ void amdgpu_hmm_unregister(struct amdgpu_bo *bo)
int amdgpu_hmm_range_get_pages(struct mmu_interval_notifier *notifier,
uint64_t start, uint64_t npages, bool readonly,
- void *owner, struct page **pages,
+ void *owner,
struct hmm_range **phmm_range)
{
struct hmm_range *hmm_range;
unsigned long end;
unsigned long timeout;
- unsigned long i;
unsigned long *pfns;
int r = 0;
@@ -222,14 +221,6 @@ retry:
hmm_range->start = start;
hmm_range->hmm_pfns = pfns;
- /*
- * Due to default_flags, all pages are HMM_PFN_VALID or
- * hmm_range_fault() fails. FIXME: The pages cannot be touched outside
- * the notifier_lock, and mmu_interval_read_retry() must be done first.
- */
- for (i = 0; pages && i < npages; i++)
- pages[i] = hmm_pfn_to_page(pfns[i]);
-
*phmm_range = hmm_range;
return 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.h
index e2edcd010ccc..953e1d06de20 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.h
@@ -33,7 +33,7 @@
int amdgpu_hmm_range_get_pages(struct mmu_interval_notifier *notifier,
uint64_t start, uint64_t npages, bool readonly,
- void *owner, struct page **pages,
+ void *owner,
struct hmm_range **phmm_range);
bool amdgpu_hmm_range_get_pages_done(struct hmm_range *hmm_range);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_i2c.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_i2c.c
index 57101d24422f..9cb72f0c5277 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_i2c.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_i2c.c
@@ -184,7 +184,7 @@ struct amdgpu_i2c_chan *amdgpu_i2c_create(struct drm_device *dev,
snprintf(i2c->adapter.name, sizeof(i2c->adapter.name),
"AMDGPU i2c hw bus %s", name);
i2c->adapter.algo = &amdgpu_atombios_i2c_algo;
- ret = i2c_add_adapter(&i2c->adapter);
+ ret = devm_i2c_add_adapter(dev->dev, &i2c->adapter);
if (ret)
goto out_free;
} else {
@@ -215,15 +215,6 @@ out_free:
}
-void amdgpu_i2c_destroy(struct amdgpu_i2c_chan *i2c)
-{
- if (!i2c)
- return;
- WARN_ON(i2c->has_aux);
- i2c_del_adapter(&i2c->adapter);
- kfree(i2c);
-}
-
void amdgpu_i2c_init(struct amdgpu_device *adev)
{
if (!adev->is_atom_fw) {
@@ -248,12 +239,9 @@ void amdgpu_i2c_fini(struct amdgpu_device *adev)
{
int i;
- for (i = 0; i < AMDGPU_MAX_I2C_BUS; i++) {
- if (adev->i2c_bus[i]) {
- amdgpu_i2c_destroy(adev->i2c_bus[i]);
+ for (i = 0; i < AMDGPU_MAX_I2C_BUS; i++)
+ if (adev->i2c_bus[i])
adev->i2c_bus[i] = NULL;
- }
- }
}
/* looks up bus based on id */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c
index 5dd78a9cb12d..3ef5bc95642c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c
@@ -275,13 +275,12 @@ static int amdgpu_vmid_grab_reserved(struct amdgpu_vm *vm,
{
struct amdgpu_device *adev = ring->adev;
unsigned vmhub = ring->vm_hub;
- struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub];
uint64_t fence_context = adev->fence_context + ring->idx;
bool needs_flush = vm->use_cpu_for_update;
uint64_t updates = amdgpu_vm_tlb_seq(vm);
int r;
- *id = id_mgr->reserved;
+ *id = vm->reserved_vmid[vmhub];
if ((*id)->owner != vm->immediate.fence_context ||
!amdgpu_vmid_compatible(*id, job) ||
(*id)->flushed_updates < updates ||
@@ -474,40 +473,61 @@ bool amdgpu_vmid_uses_reserved(struct amdgpu_vm *vm, unsigned int vmhub)
return vm->reserved_vmid[vmhub];
}
-int amdgpu_vmid_alloc_reserved(struct amdgpu_device *adev,
+/*
+ * amdgpu_vmid_alloc_reserved - reserve a specific VMID for this vm
+ * @adev: amdgpu device structure
+ * @vm: the VM to reserve an ID for
+ * @vmhub: the VMHUB which should be used
+ *
+ * Mostly used to have a reserved VMID for debugging and SPM.
+ *
+ * Returns: 0 for success, -ENOENT if an ID is already reserved.
+ */
+int amdgpu_vmid_alloc_reserved(struct amdgpu_device *adev, struct amdgpu_vm *vm,
unsigned vmhub)
{
struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub];
+ struct amdgpu_vmid *id;
+ int r = 0;
mutex_lock(&id_mgr->lock);
-
- ++id_mgr->reserved_use_count;
- if (!id_mgr->reserved) {
- struct amdgpu_vmid *id;
-
- id = list_first_entry(&id_mgr->ids_lru, struct amdgpu_vmid,
- list);
- /* Remove from normal round robin handling */
- list_del_init(&id->list);
- id_mgr->reserved = id;
+ if (vm->reserved_vmid[vmhub])
+ goto unlock;
+ if (id_mgr->reserved_vmid) {
+ r = -ENOENT;
+ goto unlock;
}
-
+ /* Remove from normal round robin handling */
+ id = list_first_entry(&id_mgr->ids_lru, struct amdgpu_vmid, list);
+ list_del_init(&id->list);
+ vm->reserved_vmid[vmhub] = id;
+ id_mgr->reserved_vmid = true;
mutex_unlock(&id_mgr->lock);
+
return 0;
+unlock:
+ mutex_unlock(&id_mgr->lock);
+ return r;
}
-void amdgpu_vmid_free_reserved(struct amdgpu_device *adev,
+/*
+ * amdgpu_vmid_free_reserved - free up a reserved VMID again
+ * @adev: amdgpu device structure
+ * @vm: the VM with the reserved ID
+ * @vmhub: the VMHUB which should be used
+ */
+void amdgpu_vmid_free_reserved(struct amdgpu_device *adev, struct amdgpu_vm *vm,
unsigned vmhub)
{
struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub];
mutex_lock(&id_mgr->lock);
- if (!--id_mgr->reserved_use_count) {
- /* give the reserved ID back to normal round robin */
- list_add(&id_mgr->reserved->list, &id_mgr->ids_lru);
- id_mgr->reserved = NULL;
+ if (vm->reserved_vmid[vmhub]) {
+ list_add(&vm->reserved_vmid[vmhub]->list,
+ &id_mgr->ids_lru);
+ vm->reserved_vmid[vmhub] = NULL;
+ id_mgr->reserved_vmid = false;
}
-
mutex_unlock(&id_mgr->lock);
}
@@ -574,7 +594,6 @@ void amdgpu_vmid_mgr_init(struct amdgpu_device *adev)
mutex_init(&id_mgr->lock);
INIT_LIST_HEAD(&id_mgr->ids_lru);
- id_mgr->reserved_use_count = 0;
/* for GC <10, SDMA uses MMHUB so use first_kfd_vmid for both GC and MM */
if (amdgpu_ip_version(adev, GC_HWIP, 0) < IP_VERSION(10, 0, 0))
@@ -594,11 +613,6 @@ void amdgpu_vmid_mgr_init(struct amdgpu_device *adev)
list_add_tail(&id_mgr->ids[j].list, &id_mgr->ids_lru);
}
}
- /* alloc a default reserved vmid to enforce isolation */
- for (i = 0; i < (adev->xcp_mgr ? adev->xcp_mgr->num_xcps : 1); i++) {
- if (adev->enforce_isolation[i] != AMDGPU_ENFORCE_ISOLATION_DISABLE)
- amdgpu_vmid_alloc_reserved(adev, AMDGPU_GFXHUB(i));
- }
}
/**
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h
index 240fa6751260..b3649cd3af56 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h
@@ -67,8 +67,7 @@ struct amdgpu_vmid_mgr {
unsigned num_ids;
struct list_head ids_lru;
struct amdgpu_vmid ids[AMDGPU_NUM_VMID];
- struct amdgpu_vmid *reserved;
- unsigned int reserved_use_count;
+ bool reserved_vmid;
};
int amdgpu_pasid_alloc(unsigned int bits);
@@ -79,10 +78,10 @@ void amdgpu_pasid_free_delayed(struct dma_resv *resv,
bool amdgpu_vmid_had_gpu_reset(struct amdgpu_device *adev,
struct amdgpu_vmid *id);
bool amdgpu_vmid_uses_reserved(struct amdgpu_vm *vm, unsigned int vmhub);
-int amdgpu_vmid_alloc_reserved(struct amdgpu_device *adev,
- unsigned vmhub);
-void amdgpu_vmid_free_reserved(struct amdgpu_device *adev,
- unsigned vmhub);
+int amdgpu_vmid_alloc_reserved(struct amdgpu_device *adev, struct amdgpu_vm *vm,
+ unsigned vmhub);
+void amdgpu_vmid_free_reserved(struct amdgpu_device *adev, struct amdgpu_vm *vm,
+ unsigned vmhub);
int amdgpu_vmid_grab(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
struct amdgpu_job *job, struct dma_fence **fence);
void amdgpu_vmid_reset(struct amdgpu_device *adev, unsigned vmhub,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h
index 7f7ea046e209..f58b6be7fccc 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h
@@ -56,14 +56,14 @@ struct amdgpu_ih_ring {
bool use_bus_addr;
struct amdgpu_bo *ring_obj;
- volatile uint32_t *ring;
+ uint32_t *ring;
uint64_t gpu_addr;
uint64_t wptr_addr;
- volatile uint32_t *wptr_cpu;
+ uint32_t *wptr_cpu;
uint64_t rptr_addr;
- volatile uint32_t *rptr_cpu;
+ uint32_t *rptr_cpu;
bool enabled;
unsigned rptr;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.c
index 22da65f45226..6b7d66b6d4cc 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.c
@@ -540,3 +540,68 @@ void amdgpu_jpeg_print_ip_state(struct amdgpu_ip_block *ip_block, struct drm_pri
drm_printf(p, "\nInactive Instance:JPEG%d\n", i);
}
}
+
+static inline bool amdgpu_jpeg_reg_valid(u32 reg)
+{
+ if (reg < JPEG_REG_RANGE_START || reg > JPEG_REG_RANGE_END ||
+ (reg >= JPEG_ATOMIC_RANGE_START && reg <= JPEG_ATOMIC_RANGE_END))
+ return false;
+ else
+ return true;
+}
+
+/**
+ * amdgpu_jpeg_dec_parse_cs - command submission parser
+ *
+ * @parser: Command submission parser context
+ * @job: the job to parse
+ * @ib: the IB to parse
+ *
+ * Parse the command stream, return -EINVAL for invalid packet,
+ * 0 otherwise
+ */
+
+int amdgpu_jpeg_dec_parse_cs(struct amdgpu_cs_parser *parser,
+ struct amdgpu_job *job,
+ struct amdgpu_ib *ib)
+{
+ u32 i, reg, res, cond, type;
+ struct amdgpu_device *adev = parser->adev;
+
+ for (i = 0; i < ib->length_dw ; i += 2) {
+ reg = CP_PACKETJ_GET_REG(ib->ptr[i]);
+ res = CP_PACKETJ_GET_RES(ib->ptr[i]);
+ cond = CP_PACKETJ_GET_COND(ib->ptr[i]);
+ type = CP_PACKETJ_GET_TYPE(ib->ptr[i]);
+
+ if (res) /* only support 0 at the moment */
+ return -EINVAL;
+
+ switch (type) {
+ case PACKETJ_TYPE0:
+ if (cond != PACKETJ_CONDITION_CHECK0 ||
+ !amdgpu_jpeg_reg_valid(reg)) {
+ dev_err(adev->dev, "Invalid packet [0x%08x]!\n", ib->ptr[i]);
+ return -EINVAL;
+ }
+ break;
+ case PACKETJ_TYPE3:
+ if (cond != PACKETJ_CONDITION_CHECK3 ||
+ !amdgpu_jpeg_reg_valid(reg)) {
+ dev_err(adev->dev, "Invalid packet [0x%08x]!\n", ib->ptr[i]);
+ return -EINVAL;
+ }
+ break;
+ case PACKETJ_TYPE6:
+ if (ib->ptr[i] == CP_PACKETJ_NOP)
+ continue;
+ dev_err(adev->dev, "Invalid packet [0x%08x]!\n", ib->ptr[i]);
+ return -EINVAL;
+ default:
+ dev_err(adev->dev, "Unknown packet type %d !\n", type);
+ return -EINVAL;
+ }
+ }
+
+ return 0;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.h
index 4f0775e39b54..346ae0ab09d3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.h
@@ -25,11 +25,18 @@
#define __AMDGPU_JPEG_H__
#include "amdgpu_ras.h"
+#include "amdgpu_cs.h"
#define AMDGPU_MAX_JPEG_INSTANCES 4
#define AMDGPU_MAX_JPEG_RINGS 10
#define AMDGPU_MAX_JPEG_RINGS_4_0_3 8
+#define JPEG_REG_RANGE_START 0x4000
+#define JPEG_REG_RANGE_END 0x41c2
+#define JPEG_ATOMIC_RANGE_START 0x4120
+#define JPEG_ATOMIC_RANGE_END 0x412A
+
+
#define AMDGPU_JPEG_HARVEST_JPEG0 (1 << 0)
#define AMDGPU_JPEG_HARVEST_JPEG1 (1 << 1)
@@ -170,5 +177,8 @@ int amdgpu_jpeg_reg_dump_init(struct amdgpu_device *adev,
const struct amdgpu_hwip_reg_entry *reg, u32 count);
void amdgpu_jpeg_dump_ip_state(struct amdgpu_ip_block *ip_block);
void amdgpu_jpeg_print_ip_state(struct amdgpu_ip_block *ip_block, struct drm_printer *p);
+int amdgpu_jpeg_dec_parse_cs(struct amdgpu_cs_parser *parser,
+ struct amdgpu_job *job,
+ struct amdgpu_ib *ib);
#endif /*__AMDGPU_JPEG_H__*/
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
index 8a76960803c6..a9327472c651 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
@@ -939,6 +939,10 @@ out:
if (adev->gfx.config.ta_cntl2_truncate_coord_mode)
dev_info->ids_flags |= AMDGPU_IDS_FLAGS_CONFORMANT_TRUNC_COORD;
+ /* Gang submit is not supported under SRIOV currently */
+ if (!amdgpu_sriov_vf(adev))
+ dev_info->ids_flags |= AMDGPU_IDS_FLAGS_GANG_SUBMIT;
+
if (amdgpu_passthrough(adev))
dev_info->ids_flags |= (AMDGPU_IDS_FLAGS_MODE_PT <<
AMDGPU_IDS_FLAGS_MODE_SHIFT) &
@@ -1417,14 +1421,10 @@ int amdgpu_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv)
amdgpu_debugfs_vm_init(file_priv);
- r = amdgpu_vm_init(adev, &fpriv->vm, fpriv->xcp_id);
+ r = amdgpu_vm_init(adev, &fpriv->vm, fpriv->xcp_id, pasid);
if (r)
goto error_pasid;
- r = amdgpu_vm_set_pasid(adev, &fpriv->vm, pasid);
- if (r)
- goto error_vm;
-
fpriv->prt_va = amdgpu_vm_bo_add(adev, &fpriv->vm, NULL);
if (!fpriv->prt_va) {
r = -ENOMEM;
@@ -1464,10 +1464,8 @@ error_vm:
amdgpu_vm_fini(adev, &fpriv->vm);
error_pasid:
- if (pasid) {
+ if (pasid)
amdgpu_pasid_free(pasid);
- amdgpu_vm_set_pasid(adev, &fpriv->vm, 0);
- }
kfree(fpriv);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index d18bade9c98f..e08f58de4b17 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -153,6 +153,14 @@ void amdgpu_bo_placement_from_domain(struct amdgpu_bo *abo, u32 domain)
c++;
}
+ if (domain & AMDGPU_GEM_DOMAIN_MMIO_REMAP) {
+ places[c].fpfn = 0;
+ places[c].lpfn = 0;
+ places[c].mem_type = AMDGPU_PL_MMIO_REMAP;
+ places[c].flags = 0;
+ c++;
+ }
+
if (domain & AMDGPU_GEM_DOMAIN_GTT) {
places[c].fpfn = 0;
places[c].lpfn = 0;
@@ -1546,6 +1554,8 @@ uint32_t amdgpu_bo_mem_stats_placement(struct amdgpu_bo *bo)
return AMDGPU_PL_OA;
case AMDGPU_GEM_DOMAIN_DOORBELL:
return AMDGPU_PL_DOORBELL;
+ case AMDGPU_GEM_DOMAIN_MMIO_REMAP:
+ return AMDGPU_PL_MMIO_REMAP;
default:
return TTM_PL_SYSTEM;
}
@@ -1629,6 +1639,9 @@ u64 amdgpu_bo_print_info(int id, struct amdgpu_bo *bo, struct seq_file *m)
case AMDGPU_PL_DOORBELL:
placement = "DOORBELL";
break;
+ case AMDGPU_PL_MMIO_REMAP:
+ placement = "MMIO REMAP";
+ break;
case TTM_PL_SYSTEM:
default:
placement = "CPU";
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
index 87523fcd4386..656b8a931dae 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
@@ -167,6 +167,8 @@ static inline unsigned amdgpu_mem_type_to_domain(u32 mem_type)
return AMDGPU_GEM_DOMAIN_OA;
case AMDGPU_PL_DOORBELL:
return AMDGPU_GEM_DOMAIN_DOORBELL;
+ case AMDGPU_PL_MMIO_REMAP:
+ return AMDGPU_GEM_DOMAIN_MMIO_REMAP;
default:
break;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
index 3696f48c233b..8c0e5d03de50 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
@@ -506,7 +506,8 @@ static int psp_sw_init(struct amdgpu_ip_block *ip_block)
}
ret = amdgpu_bo_create_kernel(adev, PSP_1_MEG, PSP_1_MEG,
- AMDGPU_GEM_DOMAIN_VRAM,
+ (amdgpu_sriov_vf(adev) || adev->debug_use_vram_fw_buf) ?
+ AMDGPU_GEM_DOMAIN_VRAM : AMDGPU_GEM_DOMAIN_GTT,
&psp->fw_pri_bo,
&psp->fw_pri_mc_addr,
&psp->fw_pri_buf);
@@ -2351,7 +2352,7 @@ static int psp_securedisplay_initialize(struct psp_context *psp)
}
ret = psp_ta_load(psp, &psp->securedisplay_context.context);
- if (!ret) {
+ if (!ret && !psp->securedisplay_context.context.resp_status) {
psp->securedisplay_context.context.initialized = true;
mutex_init(&psp->securedisplay_context.mutex);
} else
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp_ta.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp_ta.c
index 38face981c3e..6e8aad91bcd3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp_ta.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp_ta.c
@@ -171,13 +171,9 @@ static ssize_t ta_if_load_debugfs_write(struct file *fp, const char *buf, size_t
copy_pos += sizeof(uint32_t);
- ta_bin = kzalloc(ta_bin_len, GFP_KERNEL);
- if (!ta_bin)
- return -ENOMEM;
- if (copy_from_user((void *)ta_bin, &buf[copy_pos], ta_bin_len)) {
- ret = -EFAULT;
- goto err_free_bin;
- }
+ ta_bin = memdup_user(&buf[copy_pos], ta_bin_len);
+ if (IS_ERR(ta_bin))
+ return PTR_ERR(ta_bin);
/* Set TA context and functions */
set_ta_context_funcs(psp, ta_type, &context);
@@ -327,13 +323,9 @@ static ssize_t ta_if_invoke_debugfs_write(struct file *fp, const char *buf, size
return -EFAULT;
copy_pos += sizeof(uint32_t);
- shared_buf = kzalloc(shared_buf_len, GFP_KERNEL);
- if (!shared_buf)
- return -ENOMEM;
- if (copy_from_user((void *)shared_buf, &buf[copy_pos], shared_buf_len)) {
- ret = -EFAULT;
- goto err_free_shared_buf;
- }
+ shared_buf = memdup_user(&buf[copy_pos], shared_buf_len);
+ if (IS_ERR(shared_buf))
+ return PTR_ERR(shared_buf);
set_ta_context_funcs(psp, ta_type, &context);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index 7fe5b1940df8..e0ee21150860 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -219,10 +219,17 @@ static int amdgpu_check_address_validity(struct amdgpu_device *adev,
struct amdgpu_vram_block_info blk_info;
uint64_t page_pfns[32] = {0};
int i, ret, count;
+ bool hit = false;
if (amdgpu_ip_version(adev, UMC_HWIP, 0) < IP_VERSION(12, 0, 0))
return 0;
+ if (amdgpu_sriov_vf(adev)) {
+ if (amdgpu_virt_check_vf_critical_region(adev, address, &hit))
+ return -EPERM;
+ return hit ? -EACCES : 0;
+ }
+
if ((address >= adev->gmc.mc_vram_size) ||
(address >= RAS_UMC_INJECT_ADDR_LIMIT))
return -EFAULT;
@@ -2702,6 +2709,7 @@ static void amdgpu_ras_do_recovery(struct work_struct *work)
struct amdgpu_device *adev = ras->adev;
struct list_head device_list, *device_list_handle = NULL;
struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev);
+ unsigned int error_query_mode;
enum ras_event_type type;
if (hive) {
@@ -2730,6 +2738,13 @@ static void amdgpu_ras_do_recovery(struct work_struct *work)
device_list_handle = &device_list;
}
+ if (amdgpu_ras_get_error_query_mode(adev, &error_query_mode)) {
+ if (error_query_mode == AMDGPU_RAS_FIRMWARE_ERROR_QUERY) {
+ /* wait 500ms to ensure pmfw polling mca bank info done */
+ msleep(500);
+ }
+ }
+
type = amdgpu_ras_get_fatal_error_event(adev);
list_for_each_entry(remote_adev,
device_list_handle, gmc.xgmi.head) {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h
index 50fcd86e1033..be2e56ce1355 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h
@@ -91,6 +91,7 @@ static inline void amdgpu_res_first(struct ttm_resource *res,
break;
case TTM_PL_TT:
case AMDGPU_PL_DOORBELL:
+ case AMDGPU_PL_MMIO_REMAP:
node = to_ttm_range_mgr_node(res)->mm_nodes;
while (start >= node->size << PAGE_SHIFT)
start -= node++->size << PAGE_SHIFT;
@@ -153,6 +154,7 @@ static inline void amdgpu_res_next(struct amdgpu_res_cursor *cur, uint64_t size)
break;
case TTM_PL_TT:
case AMDGPU_PL_DOORBELL:
+ case AMDGPU_PL_MMIO_REMAP:
node = cur->node;
cur->node = ++node;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
index 486c3646710c..8f6ce948c684 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
@@ -364,7 +364,8 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring,
/* Allocate ring buffer */
if (ring->ring_obj == NULL) {
- r = amdgpu_bo_create_kernel(adev, ring->ring_size + ring->funcs->extra_dw, PAGE_SIZE,
+ r = amdgpu_bo_create_kernel(adev, ring->ring_size + ring->funcs->extra_bytes,
+ PAGE_SIZE,
AMDGPU_GEM_DOMAIN_GTT,
&ring->ring_obj,
&ring->gpu_addr,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
index 7670f5d82b9e..b6b649179776 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
@@ -114,7 +114,7 @@ struct amdgpu_sched {
*/
struct amdgpu_fence_driver {
uint64_t gpu_addr;
- volatile uint32_t *cpu_addr;
+ uint32_t *cpu_addr;
/* sync_seq is protected by ring emission lock */
uint32_t sync_seq;
atomic_t last_seq;
@@ -211,7 +211,18 @@ struct amdgpu_ring_funcs {
bool support_64bit_ptrs;
bool no_user_fence;
bool secure_submission_supported;
- unsigned extra_dw;
+
+ /**
+ * @extra_bytes:
+ *
+ * Optional extra space in bytes that is added to the ring size
+ * when allocating the BO that holds the contents of the ring.
+ * This space isn't used for command submission to the ring,
+ * but is just there to satisfy some hardware requirements or
+ * implement workarounds. It's up to the implementation of each
+ * specific ring to initialize this space.
+ */
+ unsigned extra_bytes;
/* ring read/write ptr handling */
u64 (*get_rptr)(struct amdgpu_ring *ring);
@@ -298,7 +309,7 @@ struct amdgpu_ring {
unsigned int ring_backup_entries_to_copy;
unsigned rptr_offs;
u64 rptr_gpu_addr;
- volatile u32 *rptr_cpu_addr;
+ u32 *rptr_cpu_addr;
/**
* @wptr:
@@ -378,19 +389,19 @@ struct amdgpu_ring {
* This is the CPU address pointer in the writeback slot. This is used
* to commit changes to the GPU.
*/
- volatile u32 *wptr_cpu_addr;
+ u32 *wptr_cpu_addr;
unsigned fence_offs;
u64 fence_gpu_addr;
- volatile u32 *fence_cpu_addr;
+ u32 *fence_cpu_addr;
uint64_t current_ctx;
char name[16];
u32 trail_seq;
unsigned trail_fence_offs;
u64 trail_fence_gpu_addr;
- volatile u32 *trail_fence_cpu_addr;
+ u32 *trail_fence_cpu_addr;
unsigned cond_exe_offs;
u64 cond_exe_gpu_addr;
- volatile u32 *cond_exe_cpu_addr;
+ u32 *cond_exe_cpu_addr;
unsigned int set_q_mode_offs;
u32 *set_q_mode_ptr;
u64 set_q_mode_token;
@@ -470,10 +481,7 @@ static inline void amdgpu_ring_set_preempt_cond_exec(struct amdgpu_ring *ring,
static inline void amdgpu_ring_clear_ring(struct amdgpu_ring *ring)
{
- int i = 0;
- while (i <= ring->buf_mask)
- ring->ring[i++] = ring->funcs->nop;
-
+ memset32(ring->ring, ring->funcs->nop, ring->buf_mask + 1);
}
static inline void amdgpu_ring_write(struct amdgpu_ring *ring, uint32_t v)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.c
index db5791e1a7ce..5aa830a02d80 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.c
@@ -89,7 +89,7 @@ void amdgpu_gfx_rlc_exit_safe_mode(struct amdgpu_device *adev, int xcc_id)
int amdgpu_gfx_rlc_init_sr(struct amdgpu_device *adev, u32 dws)
{
const u32 *src_ptr;
- volatile u32 *dst_ptr;
+ u32 *dst_ptr;
u32 i;
int r;
@@ -189,7 +189,7 @@ int amdgpu_gfx_rlc_init_cpt(struct amdgpu_device *adev)
void amdgpu_gfx_rlc_setup_cp_table(struct amdgpu_device *adev)
{
const __le32 *fw_data;
- volatile u32 *dst_ptr;
+ u32 *dst_ptr;
int me, i, max_me;
u32 bo_offset = 0;
u32 table_offset, table_size;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.h
index c210625be220..2ce310b31942 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.h
@@ -251,7 +251,7 @@ struct amdgpu_rlc_funcs {
* and it also provides a pointer to it which is used by the firmware
* to load the clear state in some cases.
*/
- void (*get_csb_buffer)(struct amdgpu_device *adev, volatile u32 *buffer);
+ void (*get_csb_buffer)(struct amdgpu_device *adev, u32 *buffer);
int (*get_cp_table_num)(struct amdgpu_device *adev);
int (*resume)(struct amdgpu_device *adev);
void (*stop)(struct amdgpu_device *adev);
@@ -275,19 +275,19 @@ struct amdgpu_rlc {
/* for power gating */
struct amdgpu_bo *save_restore_obj;
uint64_t save_restore_gpu_addr;
- volatile uint32_t *sr_ptr;
+ uint32_t *sr_ptr;
const u32 *reg_list;
u32 reg_list_size;
/* for clear state */
struct amdgpu_bo *clear_state_obj;
uint64_t clear_state_gpu_addr;
- volatile uint32_t *cs_ptr;
+ uint32_t *cs_ptr;
const struct cs_section_def *cs_data;
u32 clear_state_size;
/* for cp tables */
struct amdgpu_bo *cp_table_obj;
uint64_t cp_table_gpu_addr;
- volatile uint32_t *cp_table_ptr;
+ uint32_t *cp_table_ptr;
u32 cp_table_size;
/* safe mode for updating CG/PG state */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index 428265046815..aa9ee5dffa45 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -123,6 +123,7 @@ static void amdgpu_evict_flags(struct ttm_buffer_object *bo,
case AMDGPU_PL_GWS:
case AMDGPU_PL_OA:
case AMDGPU_PL_DOORBELL:
+ case AMDGPU_PL_MMIO_REMAP:
placement->num_placement = 0;
return;
@@ -448,7 +449,8 @@ bool amdgpu_res_cpu_visible(struct amdgpu_device *adev,
return false;
if (res->mem_type == TTM_PL_SYSTEM || res->mem_type == TTM_PL_TT ||
- res->mem_type == AMDGPU_PL_PREEMPT || res->mem_type == AMDGPU_PL_DOORBELL)
+ res->mem_type == AMDGPU_PL_PREEMPT || res->mem_type == AMDGPU_PL_DOORBELL ||
+ res->mem_type == AMDGPU_PL_MMIO_REMAP)
return true;
if (res->mem_type != TTM_PL_VRAM)
@@ -539,10 +541,12 @@ static int amdgpu_bo_move(struct ttm_buffer_object *bo, bool evict,
old_mem->mem_type == AMDGPU_PL_GWS ||
old_mem->mem_type == AMDGPU_PL_OA ||
old_mem->mem_type == AMDGPU_PL_DOORBELL ||
+ old_mem->mem_type == AMDGPU_PL_MMIO_REMAP ||
new_mem->mem_type == AMDGPU_PL_GDS ||
new_mem->mem_type == AMDGPU_PL_GWS ||
new_mem->mem_type == AMDGPU_PL_OA ||
- new_mem->mem_type == AMDGPU_PL_DOORBELL) {
+ new_mem->mem_type == AMDGPU_PL_DOORBELL ||
+ new_mem->mem_type == AMDGPU_PL_MMIO_REMAP) {
/* Nothing to save here */
amdgpu_bo_move_notify(bo, evict, new_mem);
ttm_bo_move_null(bo, new_mem);
@@ -630,6 +634,12 @@ static int amdgpu_ttm_io_mem_reserve(struct ttm_device *bdev,
mem->bus.is_iomem = true;
mem->bus.caching = ttm_uncached;
break;
+ case AMDGPU_PL_MMIO_REMAP:
+ mem->bus.offset = mem->start << PAGE_SHIFT;
+ mem->bus.offset += adev->rmmio_remap.bus_addr;
+ mem->bus.is_iomem = true;
+ mem->bus.caching = ttm_uncached;
+ break;
default:
return -EINVAL;
}
@@ -647,6 +657,8 @@ static unsigned long amdgpu_ttm_io_mem_pfn(struct ttm_buffer_object *bo,
if (bo->resource->mem_type == AMDGPU_PL_DOORBELL)
return ((uint64_t)(adev->doorbell.base + cursor.start)) >> PAGE_SHIFT;
+ else if (bo->resource->mem_type == AMDGPU_PL_MMIO_REMAP)
+ return ((uint64_t)(adev->rmmio_remap.bus_addr + cursor.start)) >> PAGE_SHIFT;
return (adev->gmc.aper_base + cursor.start) >> PAGE_SHIFT;
}
@@ -696,7 +708,7 @@ struct amdgpu_ttm_tt {
* Calling function must call amdgpu_ttm_tt_userptr_range_done() once and only
* once afterwards to stop HMM tracking
*/
-int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo, struct page **pages,
+int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo,
struct hmm_range **range)
{
struct ttm_tt *ttm = bo->tbo.ttm;
@@ -733,7 +745,7 @@ int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo, struct page **pages,
readonly = amdgpu_ttm_tt_is_readonly(ttm);
r = amdgpu_hmm_range_get_pages(&bo->notifier, start, ttm->num_pages,
- readonly, NULL, pages, range);
+ readonly, NULL, range);
out_unlock:
mmap_read_unlock(mm);
if (r)
@@ -785,12 +797,12 @@ bool amdgpu_ttm_tt_get_user_pages_done(struct ttm_tt *ttm,
* that backs user memory and will ultimately be mapped into the device
* address space.
*/
-void amdgpu_ttm_tt_set_user_pages(struct ttm_tt *ttm, struct page **pages)
+void amdgpu_ttm_tt_set_user_pages(struct ttm_tt *ttm, struct hmm_range *range)
{
unsigned long i;
for (i = 0; i < ttm->num_pages; ++i)
- ttm->pages[i] = pages ? pages[i] : NULL;
+ ttm->pages[i] = range ? hmm_pfn_to_page(range->hmm_pfns[i]) : NULL;
}
/*
@@ -1356,7 +1368,8 @@ uint64_t amdgpu_ttm_tt_pde_flags(struct ttm_tt *ttm, struct ttm_resource *mem)
if (mem && (mem->mem_type == TTM_PL_TT ||
mem->mem_type == AMDGPU_PL_DOORBELL ||
- mem->mem_type == AMDGPU_PL_PREEMPT)) {
+ mem->mem_type == AMDGPU_PL_PREEMPT ||
+ mem->mem_type == AMDGPU_PL_MMIO_REMAP)) {
flags |= AMDGPU_PTE_SYSTEM;
if (ttm->caching == ttm_cached)
@@ -1843,6 +1856,59 @@ static void amdgpu_ttm_pools_fini(struct amdgpu_device *adev)
adev->mman.ttm_pools = NULL;
}
+/**
+ * amdgpu_ttm_mmio_remap_bo_init - Allocate the singleton 4K MMIO_REMAP BO
+ * @adev: amdgpu device
+ *
+ * Allocates a one-page (4K) GEM BO in AMDGPU_GEM_DOMAIN_MMIO_REMAP when the
+ * hardware exposes a remap base (adev->rmmio_remap.bus_addr) and the host
+ * PAGE_SIZE is <= AMDGPU_GPU_PAGE_SIZE (4K). The BO is created as a regular
+ * GEM object (amdgpu_bo_create).
+ *
+ * Return:
+ * * 0 on success or intentional skip (feature not present/unsupported)
+ * * negative errno on allocation failure
+ */
+static int amdgpu_ttm_mmio_remap_bo_init(struct amdgpu_device *adev)
+{
+ struct amdgpu_bo_param bp;
+ int r;
+
+ /* Skip if HW doesn't expose remap, or if PAGE_SIZE > AMDGPU_GPU_PAGE_SIZE (4K). */
+ if (!adev->rmmio_remap.bus_addr || PAGE_SIZE > AMDGPU_GPU_PAGE_SIZE)
+ return 0;
+
+ memset(&bp, 0, sizeof(bp));
+
+ /* Create exactly one GEM BO in the MMIO_REMAP domain. */
+ bp.type = ttm_bo_type_device; /* userspace-mappable GEM */
+ bp.size = AMDGPU_GPU_PAGE_SIZE; /* 4K */
+ bp.byte_align = AMDGPU_GPU_PAGE_SIZE;
+ bp.domain = AMDGPU_GEM_DOMAIN_MMIO_REMAP;
+ bp.flags = 0;
+ bp.resv = NULL;
+ bp.bo_ptr_size = sizeof(struct amdgpu_bo);
+
+ r = amdgpu_bo_create(adev, &bp, &adev->rmmio_remap.bo);
+ if (r)
+ return r;
+
+ return 0;
+}
+
+/**
+ * amdgpu_ttm_mmio_remap_bo_fini - Free the singleton MMIO_REMAP BO
+ * @adev: amdgpu device
+ *
+ * Frees the kernel-owned MMIO_REMAP BO if it was allocated by
+ * amdgpu_ttm_mmio_remap_bo_init().
+ */
+static void amdgpu_ttm_mmio_remap_bo_fini(struct amdgpu_device *adev)
+{
+ amdgpu_bo_unref(&adev->rmmio_remap.bo);
+ adev->rmmio_remap.bo = NULL;
+}
+
/*
* amdgpu_ttm_init - Init the memory management (ttm) as well as various
* gtt/vram related fields.
@@ -1879,11 +1945,13 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
}
adev->mman.initialized = true;
- /* Initialize VRAM pool with all of VRAM divided into pages */
- r = amdgpu_vram_mgr_init(adev);
- if (r) {
- dev_err(adev->dev, "Failed initializing VRAM heap.\n");
- return r;
+ if (!adev->gmc.is_app_apu) {
+ /* Initialize VRAM pool with all of VRAM divided into pages */
+ r = amdgpu_vram_mgr_init(adev);
+ if (r) {
+ dev_err(adev->dev, "Failed initializing VRAM heap.\n");
+ return r;
+ }
}
/* Change the size here instead of the init above so only lpfn is affected */
@@ -2010,6 +2078,18 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
return r;
}
+ /* Initialize MMIO-remap pool (single page 4K) */
+ r = amdgpu_ttm_init_on_chip(adev, AMDGPU_PL_MMIO_REMAP, 1);
+ if (r) {
+ dev_err(adev->dev, "Failed initializing MMIO-remap heap.\n");
+ return r;
+ }
+
+ /* Allocate the singleton MMIO_REMAP BO (4K) if supported */
+ r = amdgpu_ttm_mmio_remap_bo_init(adev);
+ if (r)
+ return r;
+
/* Initialize preemptible memory pool */
r = amdgpu_preempt_mgr_init(adev);
if (r) {
@@ -2072,6 +2152,8 @@ void amdgpu_ttm_fini(struct amdgpu_device *adev)
}
amdgpu_bo_free_kernel(&adev->mman.sdma_access_bo, NULL,
&adev->mman.sdma_access_ptr);
+
+ amdgpu_ttm_mmio_remap_bo_fini(adev);
amdgpu_ttm_fw_reserve_vram_fini(adev);
amdgpu_ttm_drv_reserve_vram_fini(adev);
@@ -2084,7 +2166,8 @@ void amdgpu_ttm_fini(struct amdgpu_device *adev)
drm_dev_exit(idx);
}
- amdgpu_vram_mgr_fini(adev);
+ if (!adev->gmc.is_app_apu)
+ amdgpu_vram_mgr_fini(adev);
amdgpu_gtt_mgr_fini(adev);
amdgpu_preempt_mgr_fini(adev);
amdgpu_doorbell_fini(adev);
@@ -2093,6 +2176,7 @@ void amdgpu_ttm_fini(struct amdgpu_device *adev)
ttm_range_man_fini(&adev->mman.bdev, AMDGPU_PL_GWS);
ttm_range_man_fini(&adev->mman.bdev, AMDGPU_PL_OA);
ttm_range_man_fini(&adev->mman.bdev, AMDGPU_PL_DOORBELL);
+ ttm_range_man_fini(&adev->mman.bdev, AMDGPU_PL_MMIO_REMAP);
ttm_device_fini(&adev->mman.bdev);
adev->mman.initialized = false;
dev_info(adev->dev, "amdgpu: ttm finalized\n");
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
index d82d107fdcc6..0be2728aa872 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
@@ -34,7 +34,8 @@
#define AMDGPU_PL_OA (TTM_PL_PRIV + 2)
#define AMDGPU_PL_PREEMPT (TTM_PL_PRIV + 3)
#define AMDGPU_PL_DOORBELL (TTM_PL_PRIV + 4)
-#define __AMDGPU_PL_NUM (TTM_PL_PRIV + 5)
+#define AMDGPU_PL_MMIO_REMAP (TTM_PL_PRIV + 5)
+#define __AMDGPU_PL_NUM (TTM_PL_PRIV + 6)
#define AMDGPU_GTT_MAX_TRANSFER_SIZE 512
#define AMDGPU_GTT_NUM_TRANSFER_WINDOWS 2
@@ -190,7 +191,7 @@ void amdgpu_ttm_recover_gart(struct ttm_buffer_object *tbo);
uint64_t amdgpu_ttm_domain_start(struct amdgpu_device *adev, uint32_t type);
#if IS_ENABLED(CONFIG_DRM_AMDGPU_USERPTR)
-int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo, struct page **pages,
+int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo,
struct hmm_range **range);
void amdgpu_ttm_tt_discard_user_pages(struct ttm_tt *ttm,
struct hmm_range *range);
@@ -198,7 +199,6 @@ bool amdgpu_ttm_tt_get_user_pages_done(struct ttm_tt *ttm,
struct hmm_range *range);
#else
static inline int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo,
- struct page **pages,
struct hmm_range **range)
{
return -EPERM;
@@ -214,7 +214,7 @@ static inline bool amdgpu_ttm_tt_get_user_pages_done(struct ttm_tt *ttm,
}
#endif
-void amdgpu_ttm_tt_set_user_pages(struct ttm_tt *ttm, struct page **pages);
+void amdgpu_ttm_tt_set_user_pages(struct ttm_tt *ttm, struct hmm_range *range);
int amdgpu_ttm_tt_get_userptr(const struct ttm_buffer_object *tbo,
uint64_t *user_addr);
int amdgpu_ttm_tt_set_userptr(struct ttm_buffer_object *bo,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
index 467e8fa6cb8b..1add21160d21 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
@@ -44,8 +44,41 @@ u32 amdgpu_userq_get_supported_ip_mask(struct amdgpu_device *adev)
return userq_ip_mask;
}
+int amdgpu_userq_input_va_validate(struct amdgpu_vm *vm, u64 addr,
+ u64 expected_size)
+{
+ struct amdgpu_bo_va_mapping *va_map;
+ u64 user_addr;
+ u64 size;
+ int r = 0;
+
+ user_addr = (addr & AMDGPU_GMC_HOLE_MASK) >> AMDGPU_GPU_PAGE_SHIFT;
+ size = expected_size >> AMDGPU_GPU_PAGE_SHIFT;
+
+ r = amdgpu_bo_reserve(vm->root.bo, false);
+ if (r)
+ return r;
+
+ va_map = amdgpu_vm_bo_lookup_mapping(vm, user_addr);
+ if (!va_map) {
+ r = -EINVAL;
+ goto out_err;
+ }
+ /* Only validate the userq whether resident in the VM mapping range */
+ if (user_addr >= va_map->start &&
+ va_map->last - user_addr + 1 >= size) {
+ amdgpu_bo_unreserve(vm->root.bo);
+ return 0;
+ }
+
+ r = -EINVAL;
+out_err:
+ amdgpu_bo_unreserve(vm->root.bo);
+ return r;
+}
+
static int
-amdgpu_userq_unmap_helper(struct amdgpu_userq_mgr *uq_mgr,
+amdgpu_userq_preempt_helper(struct amdgpu_userq_mgr *uq_mgr,
struct amdgpu_usermode_queue *queue)
{
struct amdgpu_device *adev = uq_mgr->adev;
@@ -54,6 +87,49 @@ amdgpu_userq_unmap_helper(struct amdgpu_userq_mgr *uq_mgr,
int r = 0;
if (queue->state == AMDGPU_USERQ_STATE_MAPPED) {
+ r = userq_funcs->preempt(uq_mgr, queue);
+ if (r) {
+ queue->state = AMDGPU_USERQ_STATE_HUNG;
+ } else {
+ queue->state = AMDGPU_USERQ_STATE_PREEMPTED;
+ }
+ }
+
+ return r;
+}
+
+static int
+amdgpu_userq_restore_helper(struct amdgpu_userq_mgr *uq_mgr,
+ struct amdgpu_usermode_queue *queue)
+{
+ struct amdgpu_device *adev = uq_mgr->adev;
+ const struct amdgpu_userq_funcs *userq_funcs =
+ adev->userq_funcs[queue->queue_type];
+ int r = 0;
+
+ if (queue->state == AMDGPU_USERQ_STATE_PREEMPTED) {
+ r = userq_funcs->restore(uq_mgr, queue);
+ if (r) {
+ queue->state = AMDGPU_USERQ_STATE_HUNG;
+ } else {
+ queue->state = AMDGPU_USERQ_STATE_MAPPED;
+ }
+ }
+
+ return r;
+}
+
+static int
+amdgpu_userq_unmap_helper(struct amdgpu_userq_mgr *uq_mgr,
+ struct amdgpu_usermode_queue *queue)
+{
+ struct amdgpu_device *adev = uq_mgr->adev;
+ const struct amdgpu_userq_funcs *userq_funcs =
+ adev->userq_funcs[queue->queue_type];
+ int r = 0;
+
+ if ((queue->state == AMDGPU_USERQ_STATE_MAPPED) ||
+ (queue->state == AMDGPU_USERQ_STATE_PREEMPTED)) {
r = userq_funcs->unmap(uq_mgr, queue);
if (r)
queue->state = AMDGPU_USERQ_STATE_HUNG;
@@ -112,22 +188,6 @@ amdgpu_userq_cleanup(struct amdgpu_userq_mgr *uq_mgr,
kfree(queue);
}
-int
-amdgpu_userq_active(struct amdgpu_userq_mgr *uq_mgr)
-{
- struct amdgpu_usermode_queue *queue;
- int queue_id;
- int ret = 0;
-
- mutex_lock(&uq_mgr->userq_mutex);
- /* Resume all the queues for this process */
- idr_for_each_entry(&uq_mgr->userq_idr, queue, queue_id)
- ret += queue->state == AMDGPU_USERQ_STATE_MAPPED;
-
- mutex_unlock(&uq_mgr->userq_mutex);
- return ret;
-}
-
static struct amdgpu_usermode_queue *
amdgpu_userq_find(struct amdgpu_userq_mgr *uq_mgr, int qid)
{
@@ -323,6 +383,11 @@ amdgpu_userq_destroy(struct drm_file *filp, int queue_id)
debugfs_remove_recursive(queue->debugfs_queue);
#endif
r = amdgpu_userq_unmap_helper(uq_mgr, queue);
+ /*TODO: It requires a reset for userq hw unmap error*/
+ if (unlikely(r != AMDGPU_USERQ_STATE_UNMAPPED)) {
+ drm_warn(adev_to_drm(uq_mgr->adev), "trying to destroy a HW mapping userq\n");
+ queue->state = AMDGPU_USERQ_STATE_HUNG;
+ }
amdgpu_userq_cleanup(uq_mgr, queue, queue_id);
mutex_unlock(&uq_mgr->userq_mutex);
@@ -404,27 +469,10 @@ amdgpu_userq_create(struct drm_file *filp, union drm_amdgpu_userq *args)
(args->in.flags & AMDGPU_USERQ_CREATE_FLAGS_QUEUE_PRIORITY_MASK) >>
AMDGPU_USERQ_CREATE_FLAGS_QUEUE_PRIORITY_SHIFT;
- /* Usermode queues are only supported for GFX IP as of now */
- if (args->in.ip_type != AMDGPU_HW_IP_GFX &&
- args->in.ip_type != AMDGPU_HW_IP_DMA &&
- args->in.ip_type != AMDGPU_HW_IP_COMPUTE) {
- drm_file_err(uq_mgr->file, "Usermode queue doesn't support IP type %u\n",
- args->in.ip_type);
- return -EINVAL;
- }
-
r = amdgpu_userq_priority_permit(filp, priority);
if (r)
return r;
- if ((args->in.flags & AMDGPU_USERQ_CREATE_FLAGS_QUEUE_SECURE) &&
- (args->in.ip_type != AMDGPU_HW_IP_GFX) &&
- (args->in.ip_type != AMDGPU_HW_IP_COMPUTE) &&
- !amdgpu_is_tmz(adev)) {
- drm_file_err(uq_mgr->file, "Secure only supported on GFX/Compute queues\n");
- return -EINVAL;
- }
-
r = pm_runtime_get_sync(adev_to_drm(adev)->dev);
if (r < 0) {
drm_file_err(uq_mgr->file, "pm_runtime_get_sync() failed for userqueue create\n");
@@ -456,6 +504,15 @@ amdgpu_userq_create(struct drm_file *filp, union drm_amdgpu_userq *args)
r = -ENOMEM;
goto unlock;
}
+
+ /* Validate the userq virtual address.*/
+ if (amdgpu_userq_input_va_validate(&fpriv->vm, args->in.queue_va, args->in.queue_size) ||
+ amdgpu_userq_input_va_validate(&fpriv->vm, args->in.rptr_va, AMDGPU_GPU_PAGE_SIZE) ||
+ amdgpu_userq_input_va_validate(&fpriv->vm, args->in.wptr_va, AMDGPU_GPU_PAGE_SIZE)) {
+ r = -EINVAL;
+ kfree(queue);
+ goto unlock;
+ }
queue->doorbell_handle = args->in.doorbell_handle;
queue->queue_type = args->in.ip_type;
queue->vm = &fpriv->vm;
@@ -543,22 +600,45 @@ unlock:
return r;
}
-int amdgpu_userq_ioctl(struct drm_device *dev, void *data,
- struct drm_file *filp)
+static int amdgpu_userq_input_args_validate(struct drm_device *dev,
+ union drm_amdgpu_userq *args,
+ struct drm_file *filp)
{
- union drm_amdgpu_userq *args = data;
- int r;
+ struct amdgpu_device *adev = drm_to_adev(dev);
switch (args->in.op) {
case AMDGPU_USERQ_OP_CREATE:
if (args->in.flags & ~(AMDGPU_USERQ_CREATE_FLAGS_QUEUE_PRIORITY_MASK |
AMDGPU_USERQ_CREATE_FLAGS_QUEUE_SECURE))
return -EINVAL;
- r = amdgpu_userq_create(filp, args);
- if (r)
- drm_file_err(filp, "Failed to create usermode queue\n");
- break;
+ /* Usermode queues are only supported for GFX IP as of now */
+ if (args->in.ip_type != AMDGPU_HW_IP_GFX &&
+ args->in.ip_type != AMDGPU_HW_IP_DMA &&
+ args->in.ip_type != AMDGPU_HW_IP_COMPUTE) {
+ drm_file_err(filp, "Usermode queue doesn't support IP type %u\n",
+ args->in.ip_type);
+ return -EINVAL;
+ }
+
+ if ((args->in.flags & AMDGPU_USERQ_CREATE_FLAGS_QUEUE_SECURE) &&
+ (args->in.ip_type != AMDGPU_HW_IP_GFX) &&
+ (args->in.ip_type != AMDGPU_HW_IP_COMPUTE) &&
+ !amdgpu_is_tmz(adev)) {
+ drm_file_err(filp, "Secure only supported on GFX/Compute queues\n");
+ return -EINVAL;
+ }
+ if (args->in.queue_va == AMDGPU_BO_INVALID_OFFSET ||
+ args->in.queue_va == 0 ||
+ args->in.queue_size == 0) {
+ drm_file_err(filp, "invalidate userq queue va or size\n");
+ return -EINVAL;
+ }
+ if (!args->in.wptr_va || !args->in.rptr_va) {
+ drm_file_err(filp, "invalidate userq queue rptr or wptr\n");
+ return -EINVAL;
+ }
+ break;
case AMDGPU_USERQ_OP_FREE:
if (args->in.ip_type ||
args->in.doorbell_handle ||
@@ -571,6 +651,31 @@ int amdgpu_userq_ioctl(struct drm_device *dev, void *data,
args->in.mqd ||
args->in.mqd_size)
return -EINVAL;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+int amdgpu_userq_ioctl(struct drm_device *dev, void *data,
+ struct drm_file *filp)
+{
+ union drm_amdgpu_userq *args = data;
+ int r;
+
+ if (amdgpu_userq_input_args_validate(dev, args, filp) < 0)
+ return -EINVAL;
+
+ switch (args->in.op) {
+ case AMDGPU_USERQ_OP_CREATE:
+ r = amdgpu_userq_create(filp, args);
+ if (r)
+ drm_file_err(filp, "Failed to create usermode queue\n");
+ break;
+
+ case AMDGPU_USERQ_OP_FREE:
r = amdgpu_userq_destroy(filp, args->in.queue_id);
if (r)
drm_file_err(filp, "Failed to destroy usermode queue\n");
@@ -593,7 +698,7 @@ amdgpu_userq_restore_all(struct amdgpu_userq_mgr *uq_mgr)
/* Resume all the queues for this process */
idr_for_each_entry(&uq_mgr->userq_idr, queue, queue_id) {
- r = amdgpu_userq_map_helper(uq_mgr, queue);
+ r = amdgpu_userq_restore_helper(uq_mgr, queue);
if (r)
ret = r;
}
@@ -603,108 +708,106 @@ amdgpu_userq_restore_all(struct amdgpu_userq_mgr *uq_mgr)
return ret;
}
+static int amdgpu_userq_validate_vm(void *param, struct amdgpu_bo *bo)
+{
+ struct ttm_operation_ctx ctx = { false, false };
+
+ amdgpu_bo_placement_from_domain(bo, bo->allowed_domains);
+ return ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
+}
+
+/* Handle all BOs on the invalidated list, validate them and update the PTs */
static int
-amdgpu_userq_validate_vm_bo(void *_unused, struct amdgpu_bo *bo)
+amdgpu_userq_bo_validate(struct amdgpu_device *adev, struct drm_exec *exec,
+ struct amdgpu_vm *vm)
{
struct ttm_operation_ctx ctx = { false, false };
+ struct amdgpu_bo_va *bo_va;
+ struct amdgpu_bo *bo;
int ret;
- amdgpu_bo_placement_from_domain(bo, bo->allowed_domains);
+ spin_lock(&vm->status_lock);
+ while (!list_empty(&vm->invalidated)) {
+ bo_va = list_first_entry(&vm->invalidated,
+ struct amdgpu_bo_va,
+ base.vm_status);
+ spin_unlock(&vm->status_lock);
- ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
- if (ret)
- DRM_ERROR("Fail to validate\n");
+ bo = bo_va->base.bo;
+ ret = drm_exec_prepare_obj(exec, &bo->tbo.base, 2);
+ if (unlikely(ret))
+ return ret;
- return ret;
+ amdgpu_bo_placement_from_domain(bo, bo->allowed_domains);
+ ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
+ if (ret)
+ return ret;
+
+ /* This moves the bo_va to the done list */
+ ret = amdgpu_vm_bo_update(adev, bo_va, false);
+ if (ret)
+ return ret;
+
+ spin_lock(&vm->status_lock);
+ }
+ spin_unlock(&vm->status_lock);
+
+ return 0;
}
+/* Make sure the whole VM is ready to be used */
static int
-amdgpu_userq_validate_bos(struct amdgpu_userq_mgr *uq_mgr)
+amdgpu_userq_vm_validate(struct amdgpu_userq_mgr *uq_mgr)
{
struct amdgpu_fpriv *fpriv = uq_mgr_to_fpriv(uq_mgr);
- struct amdgpu_vm *vm = &fpriv->vm;
struct amdgpu_device *adev = uq_mgr->adev;
+ struct amdgpu_vm *vm = &fpriv->vm;
struct amdgpu_bo_va *bo_va;
- struct ww_acquire_ctx *ticket;
struct drm_exec exec;
- struct amdgpu_bo *bo;
- struct dma_resv *resv;
- bool clear, unlock;
- int ret = 0;
+ int ret;
drm_exec_init(&exec, DRM_EXEC_IGNORE_DUPLICATES, 0);
drm_exec_until_all_locked(&exec) {
- ret = amdgpu_vm_lock_pd(vm, &exec, 2);
+ ret = amdgpu_vm_lock_pd(vm, &exec, 1);
drm_exec_retry_on_contention(&exec);
- if (unlikely(ret)) {
- drm_file_err(uq_mgr->file, "Failed to lock PD\n");
+ if (unlikely(ret))
goto unlock_all;
- }
- /* Lock the done list */
- list_for_each_entry(bo_va, &vm->done, base.vm_status) {
- bo = bo_va->base.bo;
- if (!bo)
- continue;
-
- ret = drm_exec_lock_obj(&exec, &bo->tbo.base);
- drm_exec_retry_on_contention(&exec);
- if (unlikely(ret))
- goto unlock_all;
- }
- }
-
- spin_lock(&vm->status_lock);
- while (!list_empty(&vm->moved)) {
- bo_va = list_first_entry(&vm->moved, struct amdgpu_bo_va,
- base.vm_status);
- spin_unlock(&vm->status_lock);
-
- /* Per VM BOs never need to bo cleared in the page tables */
- ret = amdgpu_vm_bo_update(adev, bo_va, false);
- if (ret)
+ ret = amdgpu_vm_lock_done_list(vm, &exec, 1);
+ drm_exec_retry_on_contention(&exec);
+ if (unlikely(ret))
goto unlock_all;
- spin_lock(&vm->status_lock);
- }
-
- ticket = &exec.ticket;
- while (!list_empty(&vm->invalidated)) {
- bo_va = list_first_entry(&vm->invalidated, struct amdgpu_bo_va,
- base.vm_status);
- resv = bo_va->base.bo->tbo.base.resv;
- spin_unlock(&vm->status_lock);
- bo = bo_va->base.bo;
- ret = amdgpu_userq_validate_vm_bo(NULL, bo);
- if (ret) {
- drm_file_err(uq_mgr->file, "Failed to validate BO\n");
+ /* This validates PDs, PTs and per VM BOs */
+ ret = amdgpu_vm_validate(adev, vm, NULL,
+ amdgpu_userq_validate_vm,
+ NULL);
+ if (unlikely(ret))
goto unlock_all;
- }
- /* Try to reserve the BO to avoid clearing its ptes */
- if (!adev->debug_vm && dma_resv_trylock(resv)) {
- clear = false;
- unlock = true;
- /* The caller is already holding the reservation lock */
- } else if (dma_resv_locking_ctx(resv) == ticket) {
- clear = false;
- unlock = false;
- /* Somebody else is using the BO right now */
- } else {
- clear = true;
- unlock = false;
- }
+ /* This locks and validates the remaining evicted BOs */
+ ret = amdgpu_userq_bo_validate(adev, &exec, vm);
+ drm_exec_retry_on_contention(&exec);
+ if (unlikely(ret))
+ goto unlock_all;
+ }
- ret = amdgpu_vm_bo_update(adev, bo_va, clear);
+ ret = amdgpu_vm_handle_moved(adev, vm, NULL);
+ if (ret)
+ goto unlock_all;
- if (unlock)
- dma_resv_unlock(resv);
- if (ret)
- goto unlock_all;
+ ret = amdgpu_vm_update_pdes(adev, vm, false);
+ if (ret)
+ goto unlock_all;
- spin_lock(&vm->status_lock);
- }
- spin_unlock(&vm->status_lock);
+ /*
+ * We need to wait for all VM updates to finish before restarting the
+ * queues. Using the done list like that is now ok since everything is
+ * locked in place.
+ */
+ list_for_each_entry(bo_va, &vm->done, base.vm_status)
+ dma_fence_wait(bo_va->last_pt_update, false);
+ dma_fence_wait(vm->last_update, false);
ret = amdgpu_eviction_fence_replace_fence(&fpriv->evf_mgr, &exec);
if (ret)
@@ -725,7 +828,7 @@ static void amdgpu_userq_restore_worker(struct work_struct *work)
mutex_lock(&uq_mgr->userq_mutex);
- ret = amdgpu_userq_validate_bos(uq_mgr);
+ ret = amdgpu_userq_vm_validate(uq_mgr);
if (ret) {
drm_file_err(uq_mgr->file, "Failed to validate BOs to restore\n");
goto unlock;
@@ -750,7 +853,7 @@ amdgpu_userq_evict_all(struct amdgpu_userq_mgr *uq_mgr)
/* Try to unmap all the queues in this process ctx */
idr_for_each_entry(&uq_mgr->userq_idr, queue, queue_id) {
- r = amdgpu_userq_unmap_helper(uq_mgr, queue);
+ r = amdgpu_userq_preempt_helper(uq_mgr, queue);
if (r)
ret = r;
}
@@ -876,7 +979,10 @@ int amdgpu_userq_suspend(struct amdgpu_device *adev)
cancel_delayed_work_sync(&uqm->resume_work);
mutex_lock(&uqm->userq_mutex);
idr_for_each_entry(&uqm->userq_idr, queue, queue_id) {
- r = amdgpu_userq_unmap_helper(uqm, queue);
+ if (adev->in_s0ix)
+ r = amdgpu_userq_preempt_helper(uqm, queue);
+ else
+ r = amdgpu_userq_unmap_helper(uqm, queue);
if (r)
ret = r;
}
@@ -901,7 +1007,10 @@ int amdgpu_userq_resume(struct amdgpu_device *adev)
list_for_each_entry_safe(uqm, tmp, &adev->userq_mgr_list, list) {
mutex_lock(&uqm->userq_mutex);
idr_for_each_entry(&uqm->userq_idr, queue, queue_id) {
- r = amdgpu_userq_map_helper(uqm, queue);
+ if (adev->in_s0ix)
+ r = amdgpu_userq_restore_helper(uqm, queue);
+ else
+ r = amdgpu_userq_map_helper(uqm, queue);
if (r)
ret = r;
}
@@ -935,7 +1044,7 @@ int amdgpu_userq_stop_sched_for_enforce_isolation(struct amdgpu_device *adev,
if (((queue->queue_type == AMDGPU_HW_IP_GFX) ||
(queue->queue_type == AMDGPU_HW_IP_COMPUTE)) &&
(queue->xcp_id == idx)) {
- r = amdgpu_userq_unmap_helper(uqm, queue);
+ r = amdgpu_userq_preempt_helper(uqm, queue);
if (r)
ret = r;
}
@@ -969,7 +1078,7 @@ int amdgpu_userq_start_sched_for_enforce_isolation(struct amdgpu_device *adev,
if (((queue->queue_type == AMDGPU_HW_IP_GFX) ||
(queue->queue_type == AMDGPU_HW_IP_COMPUTE)) &&
(queue->xcp_id == idx)) {
- r = amdgpu_userq_map_helper(uqm, queue);
+ r = amdgpu_userq_restore_helper(uqm, queue);
if (r)
ret = r;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h
index 1bd84f4cce78..c027dd916672 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h
@@ -120,8 +120,6 @@ void amdgpu_userq_destroy_object(struct amdgpu_userq_mgr *uq_mgr,
void amdgpu_userq_evict(struct amdgpu_userq_mgr *uq_mgr,
struct amdgpu_eviction_fence *ev_fence);
-int amdgpu_userq_active(struct amdgpu_userq_mgr *uq_mgr);
-
void amdgpu_userq_ensure_ev_fence(struct amdgpu_userq_mgr *userq_mgr,
struct amdgpu_eviction_fence_mgr *evf_mgr);
@@ -139,4 +137,6 @@ int amdgpu_userq_stop_sched_for_enforce_isolation(struct amdgpu_device *adev,
int amdgpu_userq_start_sched_for_enforce_isolation(struct amdgpu_device *adev,
u32 idx);
+int amdgpu_userq_input_va_validate(struct amdgpu_vm *vm, u64 addr,
+ u64 expected_size);
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c
index 95e91d1dc58a..761bad98da3e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c
@@ -284,7 +284,7 @@ static int amdgpu_userq_fence_create(struct amdgpu_usermode_queue *userq,
/* Check if hardware has already processed the job */
spin_lock_irqsave(&fence_drv->fence_list_lock, flags);
- if (!dma_fence_is_signaled_locked(fence))
+ if (!dma_fence_is_signaled(fence))
list_add_tail(&userq_fence->link, &fence_drv->fences);
else
dma_fence_put(fence);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_utils.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_utils.h
new file mode 100644
index 000000000000..1e40ca3b1584
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_utils.h
@@ -0,0 +1,91 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright 2025 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef AMDGPU_UTILS_H_
+#define AMDGPU_UTILS_H_
+
+/* ---------- Generic 2‑bit capability attribute encoding ----------
+ * 00 INVALID, 01 RO, 10 WO, 11 RW
+ */
+enum amdgpu_cap_attr {
+ AMDGPU_CAP_ATTR_INVALID = 0,
+ AMDGPU_CAP_ATTR_RO = 1 << 0,
+ AMDGPU_CAP_ATTR_WO = 1 << 1,
+ AMDGPU_CAP_ATTR_RW = (AMDGPU_CAP_ATTR_RO | AMDGPU_CAP_ATTR_WO),
+};
+
+#define AMDGPU_CAP_ATTR_BITS 2
+#define AMDGPU_CAP_ATTR_MAX ((1U << AMDGPU_CAP_ATTR_BITS) - 1)
+
+/* Internal helper to build helpers for a given enum NAME */
+#define DECLARE_ATTR_CAP_CLASS_HELPERS(NAME) \
+enum { NAME##_BITMAP_BITS = NAME##_COUNT * AMDGPU_CAP_ATTR_BITS }; \
+struct NAME##_caps { \
+ DECLARE_BITMAP(bmap, NAME##_BITMAP_BITS); \
+}; \
+static inline unsigned int NAME##_ATTR_START(enum NAME##_cap_id cap) \
+{ return (unsigned int)cap * AMDGPU_CAP_ATTR_BITS; } \
+static inline void NAME##_attr_init(struct NAME##_caps *c) \
+{ if (c) bitmap_zero(c->bmap, NAME##_BITMAP_BITS); } \
+static inline int NAME##_attr_set(struct NAME##_caps *c, \
+ enum NAME##_cap_id cap, enum amdgpu_cap_attr attr) \
+{ \
+ if (!c) \
+ return -EINVAL; \
+ if (cap >= NAME##_COUNT) \
+ return -EINVAL; \
+ if ((unsigned int)attr > AMDGPU_CAP_ATTR_MAX) \
+ return -EINVAL; \
+ bitmap_write(c->bmap, (unsigned long)attr, \
+ NAME##_ATTR_START(cap), AMDGPU_CAP_ATTR_BITS); \
+ return 0; \
+} \
+static inline int NAME##_attr_get(const struct NAME##_caps *c, \
+ enum NAME##_cap_id cap, enum amdgpu_cap_attr *out) \
+{ \
+ unsigned long v; \
+ if (!c || !out) \
+ return -EINVAL; \
+ if (cap >= NAME##_COUNT) \
+ return -EINVAL; \
+ v = bitmap_read(c->bmap, NAME##_ATTR_START(cap), AMDGPU_CAP_ATTR_BITS); \
+ *out = (enum amdgpu_cap_attr)v; \
+ return 0; \
+} \
+static inline bool NAME##_cap_is_ro(const struct NAME##_caps *c, enum NAME##_cap_id id) \
+{ enum amdgpu_cap_attr a; return !NAME##_attr_get(c, id, &a) && a == AMDGPU_CAP_ATTR_RO; } \
+static inline bool NAME##_cap_is_wo(const struct NAME##_caps *c, enum NAME##_cap_id id) \
+{ enum amdgpu_cap_attr a; return !NAME##_attr_get(c, id, &a) && a == AMDGPU_CAP_ATTR_WO; } \
+static inline bool NAME##_cap_is_rw(const struct NAME##_caps *c, enum NAME##_cap_id id) \
+{ enum amdgpu_cap_attr a; return !NAME##_attr_get(c, id, &a) && a == AMDGPU_CAP_ATTR_RW; }
+
+/* Element expander for enum creation */
+#define _CAP_ENUM_ELEM(x) x,
+
+/* Public macro: declare enum + helpers from an X‑macro list */
+#define DECLARE_ATTR_CAP_CLASS(NAME, LIST_MACRO) \
+ enum NAME##_cap_id { LIST_MACRO(_CAP_ENUM_ELEM) NAME##_COUNT }; \
+ DECLARE_ATTR_CAP_CLASS_HELPERS(NAME)
+
+#endif /* AMDGPU_UTILS_H_ */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
index 595f0df17bcc..5e0786ea911b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
@@ -257,12 +257,12 @@ int amdgpu_vcn_sw_init(struct amdgpu_device *adev, int i)
return 0;
}
-int amdgpu_vcn_sw_fini(struct amdgpu_device *adev, int i)
+void amdgpu_vcn_sw_fini(struct amdgpu_device *adev, int i)
{
int j;
if (adev->vcn.harvest_config & (1 << i))
- return 0;
+ return;
amdgpu_bo_free_kernel(
&adev->vcn.inst[i].dpg_sram_bo,
@@ -292,8 +292,6 @@ int amdgpu_vcn_sw_fini(struct amdgpu_device *adev, int i)
mutex_destroy(&adev->vcn.inst[i].vcn_pg_lock);
mutex_destroy(&adev->vcn.inst[i].vcn1_jpeg1_workaround);
-
- return 0;
}
bool amdgpu_vcn_is_disabled_vcn(struct amdgpu_device *adev, enum vcn_ring_type type, uint32_t vcn_instance)
@@ -1159,7 +1157,7 @@ static ssize_t amdgpu_debugfs_vcn_fwlog_read(struct file *f, char __user *buf,
{
struct amdgpu_vcn_inst *vcn;
void *log_buf;
- volatile struct amdgpu_vcn_fwlog *plog;
+ struct amdgpu_vcn_fwlog *plog;
unsigned int read_pos, write_pos, available, i, read_bytes = 0;
unsigned int read_num[2] = {0};
@@ -1172,7 +1170,7 @@ static ssize_t amdgpu_debugfs_vcn_fwlog_read(struct file *f, char __user *buf,
log_buf = vcn->fw_shared.cpu_addr + vcn->fw_shared.mem_size;
- plog = (volatile struct amdgpu_vcn_fwlog *)log_buf;
+ plog = (struct amdgpu_vcn_fwlog *)log_buf;
read_pos = plog->rptr;
write_pos = plog->wptr;
@@ -1239,11 +1237,11 @@ void amdgpu_debugfs_vcn_fwlog_init(struct amdgpu_device *adev, uint8_t i,
void amdgpu_vcn_fwlog_init(struct amdgpu_vcn_inst *vcn)
{
#if defined(CONFIG_DEBUG_FS)
- volatile uint32_t *flag = vcn->fw_shared.cpu_addr;
+ uint32_t *flag = vcn->fw_shared.cpu_addr;
void *fw_log_cpu_addr = vcn->fw_shared.cpu_addr + vcn->fw_shared.mem_size;
uint64_t fw_log_gpu_addr = vcn->fw_shared.gpu_addr + vcn->fw_shared.mem_size;
- volatile struct amdgpu_vcn_fwlog *log_buf = fw_log_cpu_addr;
- volatile struct amdgpu_fw_shared_fw_logging *fw_log = vcn->fw_shared.cpu_addr
+ struct amdgpu_vcn_fwlog *log_buf = fw_log_cpu_addr;
+ struct amdgpu_fw_shared_fw_logging *fw_log = vcn->fw_shared.cpu_addr
+ vcn->fw_shared.log_offset;
*flag |= cpu_to_le32(AMDGPU_VCN_FW_LOGGING_FLAG);
fw_log->is_enabled = 1;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h
index 6d9acd36041d..dc8a17bcc3c8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h
@@ -501,7 +501,7 @@ struct amdgpu_vcn5_fw_shared {
struct amdgpu_fw_shared_rb_setup rb_setup;
struct amdgpu_fw_shared_smu_interface_info smu_dpm_interface;
struct amdgpu_fw_shared_drm_key_wa drm_key_wa;
- uint8_t pad3[9];
+ uint8_t pad3[404];
};
#define VCN_BLOCK_ENCODE_DISABLE_MASK 0x80
@@ -516,7 +516,7 @@ enum vcn_ring_type {
int amdgpu_vcn_early_init(struct amdgpu_device *adev, int i);
int amdgpu_vcn_sw_init(struct amdgpu_device *adev, int i);
-int amdgpu_vcn_sw_fini(struct amdgpu_device *adev, int i);
+void amdgpu_vcn_sw_fini(struct amdgpu_device *adev, int i);
int amdgpu_vcn_suspend(struct amdgpu_device *adev, int i);
int amdgpu_vcn_resume(struct amdgpu_device *adev, int i);
void amdgpu_vcn_ring_begin_use(struct amdgpu_ring *ring);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
index 13f0cdeb59c4..3328ab63376b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
@@ -828,11 +828,14 @@ static void amdgpu_virt_init_ras(struct amdgpu_device *adev)
{
ratelimit_state_init(&adev->virt.ras.ras_error_cnt_rs, 5 * HZ, 1);
ratelimit_state_init(&adev->virt.ras.ras_cper_dump_rs, 5 * HZ, 1);
+ ratelimit_state_init(&adev->virt.ras.ras_chk_criti_rs, 5 * HZ, 1);
ratelimit_set_flags(&adev->virt.ras.ras_error_cnt_rs,
RATELIMIT_MSG_ON_RELEASE);
ratelimit_set_flags(&adev->virt.ras.ras_cper_dump_rs,
RATELIMIT_MSG_ON_RELEASE);
+ ratelimit_set_flags(&adev->virt.ras.ras_chk_criti_rs,
+ RATELIMIT_MSG_ON_RELEASE);
mutex_init(&adev->virt.ras.ras_telemetry_mutex);
@@ -1501,3 +1504,55 @@ void amdgpu_virt_request_bad_pages(struct amdgpu_device *adev)
if (virt->ops && virt->ops->req_bad_pages)
virt->ops->req_bad_pages(adev);
}
+
+static int amdgpu_virt_cache_chk_criti_hit(struct amdgpu_device *adev,
+ struct amdsriov_ras_telemetry *host_telemetry,
+ bool *hit)
+{
+ struct amd_sriov_ras_chk_criti *tmp = NULL;
+ uint32_t checksum, used_size;
+
+ checksum = host_telemetry->header.checksum;
+ used_size = host_telemetry->header.used_size;
+
+ if (used_size > (AMD_SRIOV_RAS_TELEMETRY_SIZE_KB << 10))
+ return 0;
+
+ tmp = kmemdup(&host_telemetry->body.chk_criti, used_size, GFP_KERNEL);
+ if (!tmp)
+ return -ENOMEM;
+
+ if (checksum != amd_sriov_msg_checksum(tmp, used_size, 0, 0))
+ goto out;
+
+ if (hit)
+ *hit = tmp->hit ? true : false;
+
+out:
+ kfree(tmp);
+
+ return 0;
+}
+
+int amdgpu_virt_check_vf_critical_region(struct amdgpu_device *adev, u64 addr, bool *hit)
+{
+ struct amdgpu_virt *virt = &adev->virt;
+ int r = -EPERM;
+
+ if (!virt->ops || !virt->ops->req_ras_chk_criti)
+ return -EOPNOTSUPP;
+
+ /* Host allows 15 ras telemetry requests per 60 seconds. Afterwhich, the Host
+ * will ignore incoming guest messages. Ratelimit the guest messages to
+ * prevent guest self DOS.
+ */
+ if (__ratelimit(&virt->ras.ras_chk_criti_rs)) {
+ mutex_lock(&virt->ras.ras_telemetry_mutex);
+ if (!virt->ops->req_ras_chk_criti(adev, addr))
+ r = amdgpu_virt_cache_chk_criti_hit(
+ adev, virt->fw_reserve.ras_telemetry, hit);
+ mutex_unlock(&virt->ras.ras_telemetry_mutex);
+ }
+
+ return r;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
index 58accf2259b3..d1172c8e58c4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
@@ -98,6 +98,7 @@ struct amdgpu_virt_ops {
int (*req_ras_err_count)(struct amdgpu_device *adev);
int (*req_ras_cper_dump)(struct amdgpu_device *adev, u64 vf_rptr);
int (*req_bad_pages)(struct amdgpu_device *adev);
+ int (*req_ras_chk_criti)(struct amdgpu_device *adev, u64 addr);
};
/*
@@ -252,10 +253,15 @@ struct amdgpu_virt_ras_err_handler_data {
struct amdgpu_virt_ras {
struct ratelimit_state ras_error_cnt_rs;
struct ratelimit_state ras_cper_dump_rs;
+ struct ratelimit_state ras_chk_criti_rs;
struct mutex ras_telemetry_mutex;
uint64_t cper_rptr;
};
+#define AMDGPU_VIRT_CAPS_LIST(X) X(AMDGPU_VIRT_CAP_POWER_LIMIT)
+
+DECLARE_ATTR_CAP_CLASS(amdgpu_virt, AMDGPU_VIRT_CAPS_LIST);
+
/* GPU virtualization */
struct amdgpu_virt {
uint32_t caps;
@@ -274,6 +280,7 @@ struct amdgpu_virt {
const struct amdgpu_virt_ops *ops;
struct amdgpu_vf_error_buffer vf_errors;
struct amdgpu_virt_fw_reserve fw_reserve;
+ struct amdgpu_virt_caps virt_caps;
uint32_t gim_feature;
uint32_t reg_access_mode;
int req_init_data_ver;
@@ -448,4 +455,5 @@ int amdgpu_virt_ras_telemetry_post_reset(struct amdgpu_device *adev);
bool amdgpu_virt_ras_telemetry_block_en(struct amdgpu_device *adev,
enum amdgpu_ras_block block);
void amdgpu_virt_request_bad_pages(struct amdgpu_device *adev);
+int amdgpu_virt_check_vf_critical_region(struct amdgpu_device *adev, u64 addr, bool *hit);
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c
index 155bb9891a17..79bad9cbe2ab 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c
@@ -14,7 +14,6 @@
#include "dce_v8_0.h"
#endif
#include "dce_v10_0.h"
-#include "dce_v11_0.h"
#include "ivsrcid/ivsrcid_vislands30.h"
#include "amdgpu_vkms.h"
#include "amdgpu_display.h"
@@ -581,13 +580,6 @@ static int amdgpu_vkms_hw_init(struct amdgpu_ip_block *ip_block)
case CHIP_TONGA:
dce_v10_0_disable_dce(adev);
break;
- case CHIP_CARRIZO:
- case CHIP_STONEY:
- case CHIP_POLARIS10:
- case CHIP_POLARIS11:
- case CHIP_VEGAM:
- dce_v11_0_disable_dce(adev);
- break;
case CHIP_TOPAZ:
#ifdef CONFIG_DRM_AMDGPU_SI
case CHIP_HAINAN:
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index dbda3a38a2b0..c1a801203949 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -128,43 +128,14 @@ struct amdgpu_vm_tlb_seq_struct {
};
/**
- * amdgpu_vm_set_pasid - manage pasid and vm ptr mapping
- *
- * @adev: amdgpu_device pointer
- * @vm: amdgpu_vm pointer
- * @pasid: the pasid the VM is using on this GPU
- *
- * Set the pasid this VM is using on this GPU, can also be used to remove the
- * pasid by passing in zero.
+ * amdgpu_vm_assert_locked - check if VM is correctly locked
+ * @vm: the VM which schould be tested
*
+ * Asserts that the VM root PD is locked.
*/
-int amdgpu_vm_set_pasid(struct amdgpu_device *adev, struct amdgpu_vm *vm,
- u32 pasid)
+static void amdgpu_vm_assert_locked(struct amdgpu_vm *vm)
{
- int r;
-
- if (vm->pasid == pasid)
- return 0;
-
- if (vm->pasid) {
- r = xa_err(xa_erase_irq(&adev->vm_manager.pasids, vm->pasid));
- if (r < 0)
- return r;
-
- vm->pasid = 0;
- }
-
- if (pasid) {
- r = xa_err(xa_store_irq(&adev->vm_manager.pasids, pasid, vm,
- GFP_KERNEL));
- if (r < 0)
- return r;
-
- vm->pasid = pasid;
- }
-
-
- return 0;
+ dma_resv_assert_held(vm->root.bo->tbo.base.resv);
}
/**
@@ -181,6 +152,7 @@ static void amdgpu_vm_bo_evicted(struct amdgpu_vm_bo_base *vm_bo)
struct amdgpu_bo *bo = vm_bo->bo;
vm_bo->moved = true;
+ amdgpu_vm_assert_locked(vm);
spin_lock(&vm_bo->vm->status_lock);
if (bo->tbo.type == ttm_bo_type_kernel)
list_move(&vm_bo->vm_status, &vm->evicted);
@@ -198,6 +170,7 @@ static void amdgpu_vm_bo_evicted(struct amdgpu_vm_bo_base *vm_bo)
*/
static void amdgpu_vm_bo_moved(struct amdgpu_vm_bo_base *vm_bo)
{
+ amdgpu_vm_assert_locked(vm_bo->vm);
spin_lock(&vm_bo->vm->status_lock);
list_move(&vm_bo->vm_status, &vm_bo->vm->moved);
spin_unlock(&vm_bo->vm->status_lock);
@@ -213,6 +186,7 @@ static void amdgpu_vm_bo_moved(struct amdgpu_vm_bo_base *vm_bo)
*/
static void amdgpu_vm_bo_idle(struct amdgpu_vm_bo_base *vm_bo)
{
+ amdgpu_vm_assert_locked(vm_bo->vm);
spin_lock(&vm_bo->vm->status_lock);
list_move(&vm_bo->vm_status, &vm_bo->vm->idle);
spin_unlock(&vm_bo->vm->status_lock);
@@ -260,6 +234,7 @@ static void amdgpu_vm_bo_evicted_user(struct amdgpu_vm_bo_base *vm_bo)
*/
static void amdgpu_vm_bo_relocated(struct amdgpu_vm_bo_base *vm_bo)
{
+ amdgpu_vm_assert_locked(vm_bo->vm);
if (vm_bo->bo->parent) {
spin_lock(&vm_bo->vm->status_lock);
list_move(&vm_bo->vm_status, &vm_bo->vm->relocated);
@@ -279,6 +254,7 @@ static void amdgpu_vm_bo_relocated(struct amdgpu_vm_bo_base *vm_bo)
*/
static void amdgpu_vm_bo_done(struct amdgpu_vm_bo_base *vm_bo)
{
+ amdgpu_vm_assert_locked(vm_bo->vm);
spin_lock(&vm_bo->vm->status_lock);
list_move(&vm_bo->vm_status, &vm_bo->vm->done);
spin_unlock(&vm_bo->vm->status_lock);
@@ -295,10 +271,13 @@ static void amdgpu_vm_bo_reset_state_machine(struct amdgpu_vm *vm)
{
struct amdgpu_vm_bo_base *vm_bo, *tmp;
+ amdgpu_vm_assert_locked(vm);
+
spin_lock(&vm->status_lock);
list_splice_init(&vm->done, &vm->invalidated);
list_for_each_entry(vm_bo, &vm->invalidated, vm_status)
vm_bo->moved = true;
+
list_for_each_entry_safe(vm_bo, tmp, &vm->idle, vm_status) {
struct amdgpu_bo *bo = vm_bo->bo;
@@ -327,6 +306,7 @@ static void amdgpu_vm_update_shared(struct amdgpu_vm_bo_base *base)
uint32_t bo_memtype = amdgpu_bo_mem_stats_placement(bo);
bool shared;
+ dma_resv_assert_held(bo->tbo.base.resv);
spin_lock(&vm->status_lock);
shared = drm_gem_object_is_shared_for_memory_stats(&bo->tbo.base);
if (base->shared != shared) {
@@ -485,6 +465,42 @@ int amdgpu_vm_lock_pd(struct amdgpu_vm *vm, struct drm_exec *exec,
}
/**
+ * amdgpu_vm_lock_done_list - lock all BOs on the done list
+ * @vm: vm providing the BOs
+ * @exec: drm execution context
+ * @num_fences: number of extra fences to reserve
+ *
+ * Lock the BOs on the done list in the DRM execution context.
+ */
+int amdgpu_vm_lock_done_list(struct amdgpu_vm *vm, struct drm_exec *exec,
+ unsigned int num_fences)
+{
+ struct list_head *prev = &vm->done;
+ struct amdgpu_bo_va *bo_va;
+ struct amdgpu_bo *bo;
+ int ret;
+
+ /* We can only trust prev->next while holding the lock */
+ spin_lock(&vm->status_lock);
+ while (!list_is_head(prev->next, &vm->done)) {
+ bo_va = list_entry(prev->next, typeof(*bo_va), base.vm_status);
+ spin_unlock(&vm->status_lock);
+
+ bo = bo_va->base.bo;
+ if (bo) {
+ ret = drm_exec_prepare_obj(exec, &bo->tbo.base, 1);
+ if (unlikely(ret))
+ return ret;
+ }
+ spin_lock(&vm->status_lock);
+ prev = prev->next;
+ }
+ spin_unlock(&vm->status_lock);
+
+ return 0;
+}
+
+/**
* amdgpu_vm_move_to_lru_tail - move all BOs to the end of LRU
*
* @adev: amdgpu device pointer
@@ -616,18 +632,7 @@ int amdgpu_vm_validate(struct amdgpu_device *adev, struct amdgpu_vm *vm,
spin_unlock(&vm->status_lock);
bo = bo_base->bo;
-
- if (dma_resv_locking_ctx(bo->tbo.base.resv) != ticket) {
- struct amdgpu_task_info *ti = amdgpu_vm_get_task_info_vm(vm);
-
- pr_warn_ratelimited("Evicted user BO is not reserved\n");
- if (ti) {
- pr_warn_ratelimited("pid %d\n", ti->task.pid);
- amdgpu_vm_put_task_info(ti);
- }
-
- return -EINVAL;
- }
+ dma_resv_assert_held(bo->tbo.base.resv);
r = validate(param, bo);
if (r)
@@ -660,6 +665,8 @@ bool amdgpu_vm_ready(struct amdgpu_vm *vm)
{
bool ret;
+ amdgpu_vm_assert_locked(vm);
+
amdgpu_vm_eviction_lock(vm);
ret = !vm->evicting;
amdgpu_vm_eviction_unlock(vm);
@@ -962,6 +969,8 @@ int amdgpu_vm_update_pdes(struct amdgpu_device *adev,
LIST_HEAD(relocated);
int r, idx;
+ amdgpu_vm_assert_locked(vm);
+
spin_lock(&vm->status_lock);
list_splice_init(&vm->relocated, &relocated);
spin_unlock(&vm->status_lock);
@@ -2540,6 +2549,7 @@ void amdgpu_vm_set_task_info(struct amdgpu_vm *vm)
* @adev: amdgpu_device pointer
* @vm: requested vm
* @xcp_id: GPU partition selection id
+ * @pasid: the pasid the VM is using on this GPU
*
* Init @vm fields.
*
@@ -2547,7 +2557,7 @@ void amdgpu_vm_set_task_info(struct amdgpu_vm *vm)
* 0 for success, error for failure.
*/
int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
- int32_t xcp_id)
+ int32_t xcp_id, uint32_t pasid)
{
struct amdgpu_bo *root_bo;
struct amdgpu_bo_vm *root;
@@ -2623,12 +2633,26 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
if (r)
dev_dbg(adev->dev, "Failed to create task info for VM\n");
+ /* Store new PASID in XArray (if non-zero) */
+ if (pasid != 0) {
+ r = xa_err(xa_store_irq(&adev->vm_manager.pasids, pasid, vm, GFP_KERNEL));
+ if (r < 0)
+ goto error_free_root;
+
+ vm->pasid = pasid;
+ }
+
amdgpu_bo_unreserve(vm->root.bo);
amdgpu_bo_unref(&root_bo);
return 0;
error_free_root:
+ /* If PASID was partially set, erase it from XArray before failing */
+ if (vm->pasid != 0) {
+ xa_erase_irq(&adev->vm_manager.pasids, vm->pasid);
+ vm->pasid = 0;
+ }
amdgpu_vm_pt_free_root(adev, vm);
amdgpu_bo_unreserve(vm->root.bo);
amdgpu_bo_unref(&root_bo);
@@ -2734,7 +2758,11 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
root = amdgpu_bo_ref(vm->root.bo);
amdgpu_bo_reserve(root, true);
- amdgpu_vm_set_pasid(adev, vm, 0);
+ /* Remove PASID mapping before destroying VM */
+ if (vm->pasid != 0) {
+ xa_erase_irq(&adev->vm_manager.pasids, vm->pasid);
+ vm->pasid = 0;
+ }
dma_fence_wait(vm->last_unlocked, false);
dma_fence_put(vm->last_unlocked);
dma_fence_wait(vm->last_tlb_flush, false);
@@ -2775,10 +2803,7 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
dma_fence_put(vm->last_update);
for (i = 0; i < AMDGPU_MAX_VMHUBS; i++) {
- if (vm->reserved_vmid[i]) {
- amdgpu_vmid_free_reserved(adev, i);
- vm->reserved_vmid[i] = false;
- }
+ amdgpu_vmid_free_reserved(adev, vm, i);
}
ttm_lru_bulk_move_fini(&adev->mman.bdev, &vm->lru_bulk_move);
@@ -2874,6 +2899,7 @@ int amdgpu_vm_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
union drm_amdgpu_vm *args = data;
struct amdgpu_device *adev = drm_to_adev(dev);
struct amdgpu_fpriv *fpriv = filp->driver_priv;
+ struct amdgpu_vm *vm = &fpriv->vm;
/* No valid flags defined yet */
if (args->in.flags)
@@ -2882,17 +2908,10 @@ int amdgpu_vm_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
switch (args->in.op) {
case AMDGPU_VM_OP_RESERVE_VMID:
/* We only have requirement to reserve vmid from gfxhub */
- if (!fpriv->vm.reserved_vmid[AMDGPU_GFXHUB(0)]) {
- amdgpu_vmid_alloc_reserved(adev, AMDGPU_GFXHUB(0));
- fpriv->vm.reserved_vmid[AMDGPU_GFXHUB(0)] = true;
- }
-
+ amdgpu_vmid_alloc_reserved(adev, vm, AMDGPU_GFXHUB(0));
break;
case AMDGPU_VM_OP_UNRESERVE_VMID:
- if (fpriv->vm.reserved_vmid[AMDGPU_GFXHUB(0)]) {
- amdgpu_vmid_free_reserved(adev, AMDGPU_GFXHUB(0));
- fpriv->vm.reserved_vmid[AMDGPU_GFXHUB(0)] = false;
- }
+ amdgpu_vmid_free_reserved(adev, vm, AMDGPU_GFXHUB(0));
break;
default:
return -EINVAL;
@@ -3030,6 +3049,8 @@ void amdgpu_debugfs_vm_bo_info(struct amdgpu_vm *vm, struct seq_file *m)
unsigned int total_done_objs = 0;
unsigned int id = 0;
+ amdgpu_vm_assert_locked(vm);
+
spin_lock(&vm->status_lock);
seq_puts(m, "\tIdle BOs:\n");
list_for_each_entry_safe(bo_va, tmp, &vm->idle, base.vm_status) {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
index 67eaf5402e7e..cf0ec94e8a07 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
@@ -349,12 +349,16 @@ struct amdgpu_vm {
/* Memory statistics for this vm, protected by status_lock */
struct amdgpu_mem_stats stats[__AMDGPU_PL_NUM];
+ /*
+ * The following lists contain amdgpu_vm_bo_base objects for either
+ * PDs, PTs or per VM BOs. The state transits are:
+ *
+ * evicted -> relocated (PDs, PTs) or moved (per VM BOs) -> idle
+ */
+
/* Per-VM and PT BOs who needs a validation */
struct list_head evicted;
- /* BOs for user mode queues that need a validation */
- struct list_head evicted_user;
-
/* PT BOs which relocated and their parent need an update */
struct list_head relocated;
@@ -364,15 +368,29 @@ struct amdgpu_vm {
/* All BOs of this VM not currently in the state machine */
struct list_head idle;
+ /*
+ * The following lists contain amdgpu_vm_bo_base objects for BOs which
+ * have their own dma_resv object and not depend on the root PD. Their
+ * state transits are:
+ *
+ * evicted_user or invalidated -> done
+ */
+
+ /* BOs for user mode queues that need a validation */
+ struct list_head evicted_user;
+
/* regular invalidated BOs, but not yet updated in the PT */
struct list_head invalidated;
- /* BO mappings freed, but not yet updated in the PT */
- struct list_head freed;
-
/* BOs which are invalidated, has been updated in the PTs */
struct list_head done;
+ /*
+ * This list contains amdgpu_bo_va_mapping objects which have been freed
+ * but not updated in the PTs
+ */
+ struct list_head freed;
+
/* contains the page directory */
struct amdgpu_vm_bo_base root;
struct dma_fence *last_update;
@@ -394,7 +412,7 @@ struct amdgpu_vm {
struct dma_fence *last_unlocked;
unsigned int pasid;
- bool reserved_vmid[AMDGPU_MAX_VMHUBS];
+ struct amdgpu_vmid *reserved_vmid[AMDGPU_MAX_VMHUBS];
/* Flag to indicate if VM tables are updated by CPU or GPU (SDMA) */
bool use_cpu_for_update;
@@ -482,15 +500,14 @@ extern const struct amdgpu_vm_update_funcs amdgpu_vm_sdma_funcs;
void amdgpu_vm_manager_init(struct amdgpu_device *adev);
void amdgpu_vm_manager_fini(struct amdgpu_device *adev);
-int amdgpu_vm_set_pasid(struct amdgpu_device *adev, struct amdgpu_vm *vm,
- u32 pasid);
-
long amdgpu_vm_wait_idle(struct amdgpu_vm *vm, long timeout);
-int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, int32_t xcp_id);
+int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, int32_t xcp_id, uint32_t pasid);
int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm);
void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm);
int amdgpu_vm_lock_pd(struct amdgpu_vm *vm, struct drm_exec *exec,
unsigned int num_fences);
+int amdgpu_vm_lock_done_list(struct amdgpu_vm *vm, struct drm_exec *exec,
+ unsigned int num_fences);
bool amdgpu_vm_ready(struct amdgpu_vm *vm);
uint64_t amdgpu_vm_generation(struct amdgpu_device *adev, struct amdgpu_vm *vm);
int amdgpu_vm_validate(struct amdgpu_device *adev, struct amdgpu_vm *vm,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
index e69db0a93378..a5adb2ed9b3c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
@@ -425,45 +425,6 @@ out:
return ret;
}
-static void amdgpu_dummy_vram_mgr_debug(struct ttm_resource_manager *man,
- struct drm_printer *printer)
-{
- DRM_DEBUG_DRIVER("Dummy vram mgr debug\n");
-}
-
-static bool amdgpu_dummy_vram_mgr_compatible(struct ttm_resource_manager *man,
- struct ttm_resource *res,
- const struct ttm_place *place,
- size_t size)
-{
- DRM_DEBUG_DRIVER("Dummy vram mgr compatible\n");
- return false;
-}
-
-static bool amdgpu_dummy_vram_mgr_intersects(struct ttm_resource_manager *man,
- struct ttm_resource *res,
- const struct ttm_place *place,
- size_t size)
-{
- DRM_DEBUG_DRIVER("Dummy vram mgr intersects\n");
- return true;
-}
-
-static void amdgpu_dummy_vram_mgr_del(struct ttm_resource_manager *man,
- struct ttm_resource *res)
-{
- DRM_DEBUG_DRIVER("Dummy vram mgr deleted\n");
-}
-
-static int amdgpu_dummy_vram_mgr_new(struct ttm_resource_manager *man,
- struct ttm_buffer_object *tbo,
- const struct ttm_place *place,
- struct ttm_resource **res)
-{
- DRM_DEBUG_DRIVER("Dummy vram mgr new\n");
- return -ENOSPC;
-}
-
/**
* amdgpu_vram_mgr_new - allocate new ranges
*
@@ -932,14 +893,6 @@ static void amdgpu_vram_mgr_debug(struct ttm_resource_manager *man,
mutex_unlock(&mgr->lock);
}
-static const struct ttm_resource_manager_func amdgpu_dummy_vram_mgr_func = {
- .alloc = amdgpu_dummy_vram_mgr_new,
- .free = amdgpu_dummy_vram_mgr_del,
- .intersects = amdgpu_dummy_vram_mgr_intersects,
- .compatible = amdgpu_dummy_vram_mgr_compatible,
- .debug = amdgpu_dummy_vram_mgr_debug
-};
-
static const struct ttm_resource_manager_func amdgpu_vram_mgr_func = {
.alloc = amdgpu_vram_mgr_new,
.free = amdgpu_vram_mgr_del,
@@ -973,16 +926,10 @@ int amdgpu_vram_mgr_init(struct amdgpu_device *adev)
INIT_LIST_HEAD(&mgr->allocated_vres_list);
mgr->default_page_size = PAGE_SIZE;
- if (!adev->gmc.is_app_apu) {
- man->func = &amdgpu_vram_mgr_func;
-
- err = drm_buddy_init(&mgr->mm, man->size, PAGE_SIZE);
- if (err)
- return err;
- } else {
- man->func = &amdgpu_dummy_vram_mgr_func;
- DRM_INFO("Setup dummy vram mgr\n");
- }
+ man->func = &amdgpu_vram_mgr_func;
+ err = drm_buddy_init(&mgr->mm, man->size, PAGE_SIZE);
+ if (err)
+ return err;
ttm_set_driver_manager(&adev->mman.bdev, TTM_PL_VRAM, &mgr->manager);
ttm_resource_manager_set_used(man, true);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h
index bba0b26fee8f..5f36aff17e79 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h
@@ -126,4 +126,8 @@ uint32_t amdgpu_xgmi_get_max_bandwidth(struct amdgpu_device *adev);
void amgpu_xgmi_set_max_speed_width(struct amdgpu_device *adev,
uint16_t max_speed, uint8_t max_width);
+
+/* Cleanup macro for use with __free(xgmi_put_hive) */
+DEFINE_FREE(xgmi_put_hive, struct amdgpu_hive_info *, if (_T) amdgpu_put_xgmi_hive(_T))
+
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h b/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h
index 33edad1f9dcd..3a79ed7d8031 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h
@@ -405,12 +405,17 @@ struct amd_sriov_ras_cper_dump {
uint32_t buf[];
};
+struct amd_sriov_ras_chk_criti {
+ uint32_t hit;
+};
+
struct amdsriov_ras_telemetry {
struct amd_sriov_ras_telemetry_header header;
union {
struct amd_sriov_ras_telemetry_error_count error_count;
struct amd_sriov_ras_cper_dump cper_dump;
+ struct amd_sriov_ras_chk_criti chk_criti;
} body;
};
diff --git a/drivers/gpu/drm/amd/amdgpu/atom.c b/drivers/gpu/drm/amd/amdgpu/atom.c
index 1c994d0cc50b..7a063e44d429 100644
--- a/drivers/gpu/drm/amd/amdgpu/atom.c
+++ b/drivers/gpu/drm/amd/amdgpu/atom.c
@@ -1246,6 +1246,10 @@ static int amdgpu_atom_execute_table_locked(struct atom_context *ctx, int index,
ectx.last_jump_jiffies = 0;
if (ws) {
ectx.ws = kcalloc(4, ws, GFP_KERNEL);
+ if (!ectx.ws) {
+ ret = -ENOMEM;
+ goto free;
+ }
ectx.ws_size = ws;
} else {
ectx.ws = NULL;
@@ -1498,7 +1502,7 @@ static void atom_get_vbios_build(struct atom_context *ctx)
{
unsigned char *atom_rom_hdr;
unsigned char *str;
- uint16_t base;
+ uint16_t base, len;
base = CU16(ATOM_ROM_TABLE_PTR);
atom_rom_hdr = CSTR(base);
@@ -1511,8 +1515,9 @@ static void atom_get_vbios_build(struct atom_context *ctx)
while (str < atom_rom_hdr && *str++)
;
- if ((str + STRLEN_NORMAL) < atom_rom_hdr)
- strscpy(ctx->build_num, str, STRLEN_NORMAL);
+ len = min(atom_rom_hdr - str, STRLEN_NORMAL);
+ if (len)
+ strscpy(ctx->build_num, str, len);
}
struct atom_context *amdgpu_atom_parse(struct card_info *card, void *bios)
diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c
deleted file mode 100644
index e84608891300..000000000000
--- a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c
+++ /dev/null
@@ -1,3817 +0,0 @@
-/*
- * Copyright 2014 Advanced Micro Devices, Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
- * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- *
- */
-
-#include <drm/drm_edid.h>
-#include <drm/drm_fourcc.h>
-#include <drm/drm_modeset_helper.h>
-#include <drm/drm_modeset_helper_vtables.h>
-#include <drm/drm_vblank.h>
-
-#include "amdgpu.h"
-#include "amdgpu_pm.h"
-#include "amdgpu_i2c.h"
-#include "vid.h"
-#include "atom.h"
-#include "amdgpu_atombios.h"
-#include "atombios_crtc.h"
-#include "atombios_encoders.h"
-#include "amdgpu_pll.h"
-#include "amdgpu_connectors.h"
-#include "amdgpu_display.h"
-#include "dce_v11_0.h"
-
-#include "dce/dce_11_0_d.h"
-#include "dce/dce_11_0_sh_mask.h"
-#include "dce/dce_11_0_enum.h"
-#include "oss/oss_3_0_d.h"
-#include "oss/oss_3_0_sh_mask.h"
-#include "gmc/gmc_8_1_d.h"
-#include "gmc/gmc_8_1_sh_mask.h"
-
-#include "ivsrcid/ivsrcid_vislands30.h"
-
-static void dce_v11_0_set_display_funcs(struct amdgpu_device *adev);
-static void dce_v11_0_set_irq_funcs(struct amdgpu_device *adev);
-static void dce_v11_0_hpd_int_ack(struct amdgpu_device *adev, int hpd);
-
-static const u32 crtc_offsets[] =
-{
- CRTC0_REGISTER_OFFSET,
- CRTC1_REGISTER_OFFSET,
- CRTC2_REGISTER_OFFSET,
- CRTC3_REGISTER_OFFSET,
- CRTC4_REGISTER_OFFSET,
- CRTC5_REGISTER_OFFSET,
- CRTC6_REGISTER_OFFSET
-};
-
-static const u32 hpd_offsets[] =
-{
- HPD0_REGISTER_OFFSET,
- HPD1_REGISTER_OFFSET,
- HPD2_REGISTER_OFFSET,
- HPD3_REGISTER_OFFSET,
- HPD4_REGISTER_OFFSET,
- HPD5_REGISTER_OFFSET
-};
-
-static const uint32_t dig_offsets[] = {
- DIG0_REGISTER_OFFSET,
- DIG1_REGISTER_OFFSET,
- DIG2_REGISTER_OFFSET,
- DIG3_REGISTER_OFFSET,
- DIG4_REGISTER_OFFSET,
- DIG5_REGISTER_OFFSET,
- DIG6_REGISTER_OFFSET,
- DIG7_REGISTER_OFFSET,
- DIG8_REGISTER_OFFSET
-};
-
-static const struct {
- uint32_t reg;
- uint32_t vblank;
- uint32_t vline;
- uint32_t hpd;
-
-} interrupt_status_offsets[] = { {
- .reg = mmDISP_INTERRUPT_STATUS,
- .vblank = DISP_INTERRUPT_STATUS__LB_D1_VBLANK_INTERRUPT_MASK,
- .vline = DISP_INTERRUPT_STATUS__LB_D1_VLINE_INTERRUPT_MASK,
- .hpd = DISP_INTERRUPT_STATUS__DC_HPD1_INTERRUPT_MASK
-}, {
- .reg = mmDISP_INTERRUPT_STATUS_CONTINUE,
- .vblank = DISP_INTERRUPT_STATUS_CONTINUE__LB_D2_VBLANK_INTERRUPT_MASK,
- .vline = DISP_INTERRUPT_STATUS_CONTINUE__LB_D2_VLINE_INTERRUPT_MASK,
- .hpd = DISP_INTERRUPT_STATUS_CONTINUE__DC_HPD2_INTERRUPT_MASK
-}, {
- .reg = mmDISP_INTERRUPT_STATUS_CONTINUE2,
- .vblank = DISP_INTERRUPT_STATUS_CONTINUE2__LB_D3_VBLANK_INTERRUPT_MASK,
- .vline = DISP_INTERRUPT_STATUS_CONTINUE2__LB_D3_VLINE_INTERRUPT_MASK,
- .hpd = DISP_INTERRUPT_STATUS_CONTINUE2__DC_HPD3_INTERRUPT_MASK
-}, {
- .reg = mmDISP_INTERRUPT_STATUS_CONTINUE3,
- .vblank = DISP_INTERRUPT_STATUS_CONTINUE3__LB_D4_VBLANK_INTERRUPT_MASK,
- .vline = DISP_INTERRUPT_STATUS_CONTINUE3__LB_D4_VLINE_INTERRUPT_MASK,
- .hpd = DISP_INTERRUPT_STATUS_CONTINUE3__DC_HPD4_INTERRUPT_MASK
-}, {
- .reg = mmDISP_INTERRUPT_STATUS_CONTINUE4,
- .vblank = DISP_INTERRUPT_STATUS_CONTINUE4__LB_D5_VBLANK_INTERRUPT_MASK,
- .vline = DISP_INTERRUPT_STATUS_CONTINUE4__LB_D5_VLINE_INTERRUPT_MASK,
- .hpd = DISP_INTERRUPT_STATUS_CONTINUE4__DC_HPD5_INTERRUPT_MASK
-}, {
- .reg = mmDISP_INTERRUPT_STATUS_CONTINUE5,
- .vblank = DISP_INTERRUPT_STATUS_CONTINUE5__LB_D6_VBLANK_INTERRUPT_MASK,
- .vline = DISP_INTERRUPT_STATUS_CONTINUE5__LB_D6_VLINE_INTERRUPT_MASK,
- .hpd = DISP_INTERRUPT_STATUS_CONTINUE5__DC_HPD6_INTERRUPT_MASK
-} };
-
-static const u32 cz_golden_settings_a11[] =
-{
- mmCRTC_DOUBLE_BUFFER_CONTROL, 0x00010101, 0x00010000,
- mmFBC_MISC, 0x1f311fff, 0x14300000,
-};
-
-static const u32 cz_mgcg_cgcg_init[] =
-{
- mmXDMA_CLOCK_GATING_CNTL, 0xffffffff, 0x00000100,
- mmXDMA_MEM_POWER_CNTL, 0x00000101, 0x00000000,
-};
-
-static const u32 stoney_golden_settings_a11[] =
-{
- mmCRTC_DOUBLE_BUFFER_CONTROL, 0x00010101, 0x00010000,
- mmFBC_MISC, 0x1f311fff, 0x14302000,
-};
-
-static const u32 polaris11_golden_settings_a11[] =
-{
- mmDCI_CLK_CNTL, 0x00000080, 0x00000000,
- mmFBC_DEBUG_COMP, 0x000000f0, 0x00000070,
- mmFBC_DEBUG1, 0xffffffff, 0x00000008,
- mmFBC_MISC, 0x9f313fff, 0x14302008,
- mmHDMI_CONTROL, 0x313f031f, 0x00000011,
-};
-
-static const u32 polaris10_golden_settings_a11[] =
-{
- mmDCI_CLK_CNTL, 0x00000080, 0x00000000,
- mmFBC_DEBUG_COMP, 0x000000f0, 0x00000070,
- mmFBC_MISC, 0x9f313fff, 0x14302008,
- mmHDMI_CONTROL, 0x313f031f, 0x00000011,
-};
-
-static void dce_v11_0_init_golden_registers(struct amdgpu_device *adev)
-{
- switch (adev->asic_type) {
- case CHIP_CARRIZO:
- amdgpu_device_program_register_sequence(adev,
- cz_mgcg_cgcg_init,
- ARRAY_SIZE(cz_mgcg_cgcg_init));
- amdgpu_device_program_register_sequence(adev,
- cz_golden_settings_a11,
- ARRAY_SIZE(cz_golden_settings_a11));
- break;
- case CHIP_STONEY:
- amdgpu_device_program_register_sequence(adev,
- stoney_golden_settings_a11,
- ARRAY_SIZE(stoney_golden_settings_a11));
- break;
- case CHIP_POLARIS11:
- case CHIP_POLARIS12:
- amdgpu_device_program_register_sequence(adev,
- polaris11_golden_settings_a11,
- ARRAY_SIZE(polaris11_golden_settings_a11));
- break;
- case CHIP_POLARIS10:
- case CHIP_VEGAM:
- amdgpu_device_program_register_sequence(adev,
- polaris10_golden_settings_a11,
- ARRAY_SIZE(polaris10_golden_settings_a11));
- break;
- default:
- break;
- }
-}
-
-static u32 dce_v11_0_audio_endpt_rreg(struct amdgpu_device *adev,
- u32 block_offset, u32 reg)
-{
- unsigned long flags;
- u32 r;
-
- spin_lock_irqsave(&adev->audio_endpt_idx_lock, flags);
- WREG32(mmAZALIA_F0_CODEC_ENDPOINT_INDEX + block_offset, reg);
- r = RREG32(mmAZALIA_F0_CODEC_ENDPOINT_DATA + block_offset);
- spin_unlock_irqrestore(&adev->audio_endpt_idx_lock, flags);
-
- return r;
-}
-
-static void dce_v11_0_audio_endpt_wreg(struct amdgpu_device *adev,
- u32 block_offset, u32 reg, u32 v)
-{
- unsigned long flags;
-
- spin_lock_irqsave(&adev->audio_endpt_idx_lock, flags);
- WREG32(mmAZALIA_F0_CODEC_ENDPOINT_INDEX + block_offset, reg);
- WREG32(mmAZALIA_F0_CODEC_ENDPOINT_DATA + block_offset, v);
- spin_unlock_irqrestore(&adev->audio_endpt_idx_lock, flags);
-}
-
-static u32 dce_v11_0_vblank_get_counter(struct amdgpu_device *adev, int crtc)
-{
- if (crtc < 0 || crtc >= adev->mode_info.num_crtc)
- return 0;
- else
- return RREG32(mmCRTC_STATUS_FRAME_COUNT + crtc_offsets[crtc]);
-}
-
-static void dce_v11_0_pageflip_interrupt_init(struct amdgpu_device *adev)
-{
- unsigned i;
-
- /* Enable pflip interrupts */
- for (i = 0; i < adev->mode_info.num_crtc; i++)
- amdgpu_irq_get(adev, &adev->pageflip_irq, i);
-}
-
-static void dce_v11_0_pageflip_interrupt_fini(struct amdgpu_device *adev)
-{
- unsigned i;
-
- /* Disable pflip interrupts */
- for (i = 0; i < adev->mode_info.num_crtc; i++)
- amdgpu_irq_put(adev, &adev->pageflip_irq, i);
-}
-
-/**
- * dce_v11_0_page_flip - pageflip callback.
- *
- * @adev: amdgpu_device pointer
- * @crtc_id: crtc to cleanup pageflip on
- * @crtc_base: new address of the crtc (GPU MC address)
- * @async: asynchronous flip
- *
- * Triggers the actual pageflip by updating the primary
- * surface base address.
- */
-static void dce_v11_0_page_flip(struct amdgpu_device *adev,
- int crtc_id, u64 crtc_base, bool async)
-{
- struct amdgpu_crtc *amdgpu_crtc = adev->mode_info.crtcs[crtc_id];
- struct drm_framebuffer *fb = amdgpu_crtc->base.primary->fb;
- u32 tmp;
-
- /* flip immediate for async, default is vsync */
- tmp = RREG32(mmGRPH_FLIP_CONTROL + amdgpu_crtc->crtc_offset);
- tmp = REG_SET_FIELD(tmp, GRPH_FLIP_CONTROL,
- GRPH_SURFACE_UPDATE_IMMEDIATE_EN, async ? 1 : 0);
- WREG32(mmGRPH_FLIP_CONTROL + amdgpu_crtc->crtc_offset, tmp);
- /* update pitch */
- WREG32(mmGRPH_PITCH + amdgpu_crtc->crtc_offset,
- fb->pitches[0] / fb->format->cpp[0]);
- /* update the scanout addresses */
- WREG32(mmGRPH_PRIMARY_SURFACE_ADDRESS_HIGH + amdgpu_crtc->crtc_offset,
- upper_32_bits(crtc_base));
- /* writing to the low address triggers the update */
- WREG32(mmGRPH_PRIMARY_SURFACE_ADDRESS + amdgpu_crtc->crtc_offset,
- lower_32_bits(crtc_base));
- /* post the write */
- RREG32(mmGRPH_PRIMARY_SURFACE_ADDRESS + amdgpu_crtc->crtc_offset);
-}
-
-static int dce_v11_0_crtc_get_scanoutpos(struct amdgpu_device *adev, int crtc,
- u32 *vbl, u32 *position)
-{
- if ((crtc < 0) || (crtc >= adev->mode_info.num_crtc))
- return -EINVAL;
-
- *vbl = RREG32(mmCRTC_V_BLANK_START_END + crtc_offsets[crtc]);
- *position = RREG32(mmCRTC_STATUS_POSITION + crtc_offsets[crtc]);
-
- return 0;
-}
-
-/**
- * dce_v11_0_hpd_sense - hpd sense callback.
- *
- * @adev: amdgpu_device pointer
- * @hpd: hpd (hotplug detect) pin
- *
- * Checks if a digital monitor is connected (evergreen+).
- * Returns true if connected, false if not connected.
- */
-static bool dce_v11_0_hpd_sense(struct amdgpu_device *adev,
- enum amdgpu_hpd_id hpd)
-{
- bool connected = false;
-
- if (hpd >= adev->mode_info.num_hpd)
- return connected;
-
- if (RREG32(mmDC_HPD_INT_STATUS + hpd_offsets[hpd]) &
- DC_HPD_INT_STATUS__DC_HPD_SENSE_MASK)
- connected = true;
-
- return connected;
-}
-
-/**
- * dce_v11_0_hpd_set_polarity - hpd set polarity callback.
- *
- * @adev: amdgpu_device pointer
- * @hpd: hpd (hotplug detect) pin
- *
- * Set the polarity of the hpd pin (evergreen+).
- */
-static void dce_v11_0_hpd_set_polarity(struct amdgpu_device *adev,
- enum amdgpu_hpd_id hpd)
-{
- u32 tmp;
- bool connected = dce_v11_0_hpd_sense(adev, hpd);
-
- if (hpd >= adev->mode_info.num_hpd)
- return;
-
- tmp = RREG32(mmDC_HPD_INT_CONTROL + hpd_offsets[hpd]);
- if (connected)
- tmp = REG_SET_FIELD(tmp, DC_HPD_INT_CONTROL, DC_HPD_INT_POLARITY, 0);
- else
- tmp = REG_SET_FIELD(tmp, DC_HPD_INT_CONTROL, DC_HPD_INT_POLARITY, 1);
- WREG32(mmDC_HPD_INT_CONTROL + hpd_offsets[hpd], tmp);
-}
-
-/**
- * dce_v11_0_hpd_init - hpd setup callback.
- *
- * @adev: amdgpu_device pointer
- *
- * Setup the hpd pins used by the card (evergreen+).
- * Enable the pin, set the polarity, and enable the hpd interrupts.
- */
-static void dce_v11_0_hpd_init(struct amdgpu_device *adev)
-{
- struct drm_device *dev = adev_to_drm(adev);
- struct drm_connector *connector;
- struct drm_connector_list_iter iter;
- u32 tmp;
-
- drm_connector_list_iter_begin(dev, &iter);
- drm_for_each_connector_iter(connector, &iter) {
- struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector);
-
- if (amdgpu_connector->hpd.hpd >= adev->mode_info.num_hpd)
- continue;
-
- if (connector->connector_type == DRM_MODE_CONNECTOR_eDP ||
- connector->connector_type == DRM_MODE_CONNECTOR_LVDS) {
- /* don't try to enable hpd on eDP or LVDS avoid breaking the
- * aux dp channel on imac and help (but not completely fix)
- * https://bugzilla.redhat.com/show_bug.cgi?id=726143
- * also avoid interrupt storms during dpms.
- */
- tmp = RREG32(mmDC_HPD_INT_CONTROL + hpd_offsets[amdgpu_connector->hpd.hpd]);
- tmp = REG_SET_FIELD(tmp, DC_HPD_INT_CONTROL, DC_HPD_INT_EN, 0);
- WREG32(mmDC_HPD_INT_CONTROL + hpd_offsets[amdgpu_connector->hpd.hpd], tmp);
- continue;
- }
-
- tmp = RREG32(mmDC_HPD_CONTROL + hpd_offsets[amdgpu_connector->hpd.hpd]);
- tmp = REG_SET_FIELD(tmp, DC_HPD_CONTROL, DC_HPD_EN, 1);
- WREG32(mmDC_HPD_CONTROL + hpd_offsets[amdgpu_connector->hpd.hpd], tmp);
-
- tmp = RREG32(mmDC_HPD_TOGGLE_FILT_CNTL + hpd_offsets[amdgpu_connector->hpd.hpd]);
- tmp = REG_SET_FIELD(tmp, DC_HPD_TOGGLE_FILT_CNTL,
- DC_HPD_CONNECT_INT_DELAY,
- AMDGPU_HPD_CONNECT_INT_DELAY_IN_MS);
- tmp = REG_SET_FIELD(tmp, DC_HPD_TOGGLE_FILT_CNTL,
- DC_HPD_DISCONNECT_INT_DELAY,
- AMDGPU_HPD_DISCONNECT_INT_DELAY_IN_MS);
- WREG32(mmDC_HPD_TOGGLE_FILT_CNTL + hpd_offsets[amdgpu_connector->hpd.hpd], tmp);
-
- dce_v11_0_hpd_int_ack(adev, amdgpu_connector->hpd.hpd);
- dce_v11_0_hpd_set_polarity(adev, amdgpu_connector->hpd.hpd);
- amdgpu_irq_get(adev, &adev->hpd_irq, amdgpu_connector->hpd.hpd);
- }
- drm_connector_list_iter_end(&iter);
-}
-
-/**
- * dce_v11_0_hpd_fini - hpd tear down callback.
- *
- * @adev: amdgpu_device pointer
- *
- * Tear down the hpd pins used by the card (evergreen+).
- * Disable the hpd interrupts.
- */
-static void dce_v11_0_hpd_fini(struct amdgpu_device *adev)
-{
- struct drm_device *dev = adev_to_drm(adev);
- struct drm_connector *connector;
- struct drm_connector_list_iter iter;
- u32 tmp;
-
- drm_connector_list_iter_begin(dev, &iter);
- drm_for_each_connector_iter(connector, &iter) {
- struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector);
-
- if (amdgpu_connector->hpd.hpd >= adev->mode_info.num_hpd)
- continue;
-
- tmp = RREG32(mmDC_HPD_CONTROL + hpd_offsets[amdgpu_connector->hpd.hpd]);
- tmp = REG_SET_FIELD(tmp, DC_HPD_CONTROL, DC_HPD_EN, 0);
- WREG32(mmDC_HPD_CONTROL + hpd_offsets[amdgpu_connector->hpd.hpd], tmp);
-
- amdgpu_irq_put(adev, &adev->hpd_irq, amdgpu_connector->hpd.hpd);
- }
- drm_connector_list_iter_end(&iter);
-}
-
-static u32 dce_v11_0_hpd_get_gpio_reg(struct amdgpu_device *adev)
-{
- return mmDC_GPIO_HPD_A;
-}
-
-static bool dce_v11_0_is_display_hung(struct amdgpu_device *adev)
-{
- u32 crtc_hung = 0;
- u32 crtc_status[6];
- u32 i, j, tmp;
-
- for (i = 0; i < adev->mode_info.num_crtc; i++) {
- tmp = RREG32(mmCRTC_CONTROL + crtc_offsets[i]);
- if (REG_GET_FIELD(tmp, CRTC_CONTROL, CRTC_MASTER_EN)) {
- crtc_status[i] = RREG32(mmCRTC_STATUS_HV_COUNT + crtc_offsets[i]);
- crtc_hung |= (1 << i);
- }
- }
-
- for (j = 0; j < 10; j++) {
- for (i = 0; i < adev->mode_info.num_crtc; i++) {
- if (crtc_hung & (1 << i)) {
- tmp = RREG32(mmCRTC_STATUS_HV_COUNT + crtc_offsets[i]);
- if (tmp != crtc_status[i])
- crtc_hung &= ~(1 << i);
- }
- }
- if (crtc_hung == 0)
- return false;
- udelay(100);
- }
-
- return true;
-}
-
-static void dce_v11_0_set_vga_render_state(struct amdgpu_device *adev,
- bool render)
-{
- u32 tmp;
-
- /* Lockout access through VGA aperture*/
- tmp = RREG32(mmVGA_HDP_CONTROL);
- if (render)
- tmp = REG_SET_FIELD(tmp, VGA_HDP_CONTROL, VGA_MEMORY_DISABLE, 0);
- else
- tmp = REG_SET_FIELD(tmp, VGA_HDP_CONTROL, VGA_MEMORY_DISABLE, 1);
- WREG32(mmVGA_HDP_CONTROL, tmp);
-
- /* disable VGA render */
- tmp = RREG32(mmVGA_RENDER_CONTROL);
- if (render)
- tmp = REG_SET_FIELD(tmp, VGA_RENDER_CONTROL, VGA_VSTATUS_CNTL, 1);
- else
- tmp = REG_SET_FIELD(tmp, VGA_RENDER_CONTROL, VGA_VSTATUS_CNTL, 0);
- WREG32(mmVGA_RENDER_CONTROL, tmp);
-}
-
-static int dce_v11_0_get_num_crtc (struct amdgpu_device *adev)
-{
- int num_crtc = 0;
-
- switch (adev->asic_type) {
- case CHIP_CARRIZO:
- num_crtc = 3;
- break;
- case CHIP_STONEY:
- num_crtc = 2;
- break;
- case CHIP_POLARIS10:
- case CHIP_VEGAM:
- num_crtc = 6;
- break;
- case CHIP_POLARIS11:
- case CHIP_POLARIS12:
- num_crtc = 5;
- break;
- default:
- num_crtc = 0;
- }
- return num_crtc;
-}
-
-void dce_v11_0_disable_dce(struct amdgpu_device *adev)
-{
- /*Disable VGA render and enabled crtc, if has DCE engine*/
- if (amdgpu_atombios_has_dce_engine_info(adev)) {
- u32 tmp;
- int crtc_enabled, i;
-
- dce_v11_0_set_vga_render_state(adev, false);
-
- /*Disable crtc*/
- for (i = 0; i < dce_v11_0_get_num_crtc(adev); i++) {
- crtc_enabled = REG_GET_FIELD(RREG32(mmCRTC_CONTROL + crtc_offsets[i]),
- CRTC_CONTROL, CRTC_MASTER_EN);
- if (crtc_enabled) {
- WREG32(mmCRTC_UPDATE_LOCK + crtc_offsets[i], 1);
- tmp = RREG32(mmCRTC_CONTROL + crtc_offsets[i]);
- tmp = REG_SET_FIELD(tmp, CRTC_CONTROL, CRTC_MASTER_EN, 0);
- WREG32(mmCRTC_CONTROL + crtc_offsets[i], tmp);
- WREG32(mmCRTC_UPDATE_LOCK + crtc_offsets[i], 0);
- }
- }
- }
-}
-
-static void dce_v11_0_program_fmt(struct drm_encoder *encoder)
-{
- struct drm_device *dev = encoder->dev;
- struct amdgpu_device *adev = drm_to_adev(dev);
- struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
- struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(encoder->crtc);
- struct drm_connector *connector = amdgpu_get_connector_for_encoder(encoder);
- int bpc = 0;
- u32 tmp = 0;
- enum amdgpu_connector_dither dither = AMDGPU_FMT_DITHER_DISABLE;
-
- if (connector) {
- struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector);
- bpc = amdgpu_connector_get_monitor_bpc(connector);
- dither = amdgpu_connector->dither;
- }
-
- /* LVDS/eDP FMT is set up by atom */
- if (amdgpu_encoder->devices & ATOM_DEVICE_LCD_SUPPORT)
- return;
-
- /* not needed for analog */
- if ((amdgpu_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC1) ||
- (amdgpu_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC2))
- return;
-
- if (bpc == 0)
- return;
-
- switch (bpc) {
- case 6:
- if (dither == AMDGPU_FMT_DITHER_ENABLE) {
- /* XXX sort out optimal dither settings */
- tmp = REG_SET_FIELD(tmp, FMT_BIT_DEPTH_CONTROL, FMT_FRAME_RANDOM_ENABLE, 1);
- tmp = REG_SET_FIELD(tmp, FMT_BIT_DEPTH_CONTROL, FMT_HIGHPASS_RANDOM_ENABLE, 1);
- tmp = REG_SET_FIELD(tmp, FMT_BIT_DEPTH_CONTROL, FMT_SPATIAL_DITHER_EN, 1);
- tmp = REG_SET_FIELD(tmp, FMT_BIT_DEPTH_CONTROL, FMT_SPATIAL_DITHER_DEPTH, 0);
- } else {
- tmp = REG_SET_FIELD(tmp, FMT_BIT_DEPTH_CONTROL, FMT_TRUNCATE_EN, 1);
- tmp = REG_SET_FIELD(tmp, FMT_BIT_DEPTH_CONTROL, FMT_TRUNCATE_DEPTH, 0);
- }
- break;
- case 8:
- if (dither == AMDGPU_FMT_DITHER_ENABLE) {
- /* XXX sort out optimal dither settings */
- tmp = REG_SET_FIELD(tmp, FMT_BIT_DEPTH_CONTROL, FMT_FRAME_RANDOM_ENABLE, 1);
- tmp = REG_SET_FIELD(tmp, FMT_BIT_DEPTH_CONTROL, FMT_HIGHPASS_RANDOM_ENABLE, 1);
- tmp = REG_SET_FIELD(tmp, FMT_BIT_DEPTH_CONTROL, FMT_RGB_RANDOM_ENABLE, 1);
- tmp = REG_SET_FIELD(tmp, FMT_BIT_DEPTH_CONTROL, FMT_SPATIAL_DITHER_EN, 1);
- tmp = REG_SET_FIELD(tmp, FMT_BIT_DEPTH_CONTROL, FMT_SPATIAL_DITHER_DEPTH, 1);
- } else {
- tmp = REG_SET_FIELD(tmp, FMT_BIT_DEPTH_CONTROL, FMT_TRUNCATE_EN, 1);
- tmp = REG_SET_FIELD(tmp, FMT_BIT_DEPTH_CONTROL, FMT_TRUNCATE_DEPTH, 1);
- }
- break;
- case 10:
- if (dither == AMDGPU_FMT_DITHER_ENABLE) {
- /* XXX sort out optimal dither settings */
- tmp = REG_SET_FIELD(tmp, FMT_BIT_DEPTH_CONTROL, FMT_FRAME_RANDOM_ENABLE, 1);
- tmp = REG_SET_FIELD(tmp, FMT_BIT_DEPTH_CONTROL, FMT_HIGHPASS_RANDOM_ENABLE, 1);
- tmp = REG_SET_FIELD(tmp, FMT_BIT_DEPTH_CONTROL, FMT_RGB_RANDOM_ENABLE, 1);
- tmp = REG_SET_FIELD(tmp, FMT_BIT_DEPTH_CONTROL, FMT_SPATIAL_DITHER_EN, 1);
- tmp = REG_SET_FIELD(tmp, FMT_BIT_DEPTH_CONTROL, FMT_SPATIAL_DITHER_DEPTH, 2);
- } else {
- tmp = REG_SET_FIELD(tmp, FMT_BIT_DEPTH_CONTROL, FMT_TRUNCATE_EN, 1);
- tmp = REG_SET_FIELD(tmp, FMT_BIT_DEPTH_CONTROL, FMT_TRUNCATE_DEPTH, 2);
- }
- break;
- default:
- /* not needed */
- break;
- }
-
- WREG32(mmFMT_BIT_DEPTH_CONTROL + amdgpu_crtc->crtc_offset, tmp);
-}
-
-
-/* display watermark setup */
-/**
- * dce_v11_0_line_buffer_adjust - Set up the line buffer
- *
- * @adev: amdgpu_device pointer
- * @amdgpu_crtc: the selected display controller
- * @mode: the current display mode on the selected display
- * controller
- *
- * Setup up the line buffer allocation for
- * the selected display controller (CIK).
- * Returns the line buffer size in pixels.
- */
-static u32 dce_v11_0_line_buffer_adjust(struct amdgpu_device *adev,
- struct amdgpu_crtc *amdgpu_crtc,
- struct drm_display_mode *mode)
-{
- u32 tmp, buffer_alloc, i, mem_cfg;
- u32 pipe_offset = amdgpu_crtc->crtc_id;
- /*
- * Line Buffer Setup
- * There are 6 line buffers, one for each display controllers.
- * There are 3 partitions per LB. Select the number of partitions
- * to enable based on the display width. For display widths larger
- * than 4096, you need use to use 2 display controllers and combine
- * them using the stereo blender.
- */
- if (amdgpu_crtc->base.enabled && mode) {
- if (mode->crtc_hdisplay < 1920) {
- mem_cfg = 1;
- buffer_alloc = 2;
- } else if (mode->crtc_hdisplay < 2560) {
- mem_cfg = 2;
- buffer_alloc = 2;
- } else if (mode->crtc_hdisplay < 4096) {
- mem_cfg = 0;
- buffer_alloc = (adev->flags & AMD_IS_APU) ? 2 : 4;
- } else {
- DRM_DEBUG_KMS("Mode too big for LB!\n");
- mem_cfg = 0;
- buffer_alloc = (adev->flags & AMD_IS_APU) ? 2 : 4;
- }
- } else {
- mem_cfg = 1;
- buffer_alloc = 0;
- }
-
- tmp = RREG32(mmLB_MEMORY_CTRL + amdgpu_crtc->crtc_offset);
- tmp = REG_SET_FIELD(tmp, LB_MEMORY_CTRL, LB_MEMORY_CONFIG, mem_cfg);
- WREG32(mmLB_MEMORY_CTRL + amdgpu_crtc->crtc_offset, tmp);
-
- tmp = RREG32(mmPIPE0_DMIF_BUFFER_CONTROL + pipe_offset);
- tmp = REG_SET_FIELD(tmp, PIPE0_DMIF_BUFFER_CONTROL, DMIF_BUFFERS_ALLOCATED, buffer_alloc);
- WREG32(mmPIPE0_DMIF_BUFFER_CONTROL + pipe_offset, tmp);
-
- for (i = 0; i < adev->usec_timeout; i++) {
- tmp = RREG32(mmPIPE0_DMIF_BUFFER_CONTROL + pipe_offset);
- if (REG_GET_FIELD(tmp, PIPE0_DMIF_BUFFER_CONTROL, DMIF_BUFFERS_ALLOCATION_COMPLETED))
- break;
- udelay(1);
- }
-
- if (amdgpu_crtc->base.enabled && mode) {
- switch (mem_cfg) {
- case 0:
- default:
- return 4096 * 2;
- case 1:
- return 1920 * 2;
- case 2:
- return 2560 * 2;
- }
- }
-
- /* controller not enabled, so no lb used */
- return 0;
-}
-
-/**
- * cik_get_number_of_dram_channels - get the number of dram channels
- *
- * @adev: amdgpu_device pointer
- *
- * Look up the number of video ram channels (CIK).
- * Used for display watermark bandwidth calculations
- * Returns the number of dram channels
- */
-static u32 cik_get_number_of_dram_channels(struct amdgpu_device *adev)
-{
- u32 tmp = RREG32(mmMC_SHARED_CHMAP);
-
- switch (REG_GET_FIELD(tmp, MC_SHARED_CHMAP, NOOFCHAN)) {
- case 0:
- default:
- return 1;
- case 1:
- return 2;
- case 2:
- return 4;
- case 3:
- return 8;
- case 4:
- return 3;
- case 5:
- return 6;
- case 6:
- return 10;
- case 7:
- return 12;
- case 8:
- return 16;
- }
-}
-
-struct dce10_wm_params {
- u32 dram_channels; /* number of dram channels */
- u32 yclk; /* bandwidth per dram data pin in kHz */
- u32 sclk; /* engine clock in kHz */
- u32 disp_clk; /* display clock in kHz */
- u32 src_width; /* viewport width */
- u32 active_time; /* active display time in ns */
- u32 blank_time; /* blank time in ns */
- bool interlaced; /* mode is interlaced */
- fixed20_12 vsc; /* vertical scale ratio */
- u32 num_heads; /* number of active crtcs */
- u32 bytes_per_pixel; /* bytes per pixel display + overlay */
- u32 lb_size; /* line buffer allocated to pipe */
- u32 vtaps; /* vertical scaler taps */
-};
-
-/**
- * dce_v11_0_dram_bandwidth - get the dram bandwidth
- *
- * @wm: watermark calculation data
- *
- * Calculate the raw dram bandwidth (CIK).
- * Used for display watermark bandwidth calculations
- * Returns the dram bandwidth in MBytes/s
- */
-static u32 dce_v11_0_dram_bandwidth(struct dce10_wm_params *wm)
-{
- /* Calculate raw DRAM Bandwidth */
- fixed20_12 dram_efficiency; /* 0.7 */
- fixed20_12 yclk, dram_channels, bandwidth;
- fixed20_12 a;
-
- a.full = dfixed_const(1000);
- yclk.full = dfixed_const(wm->yclk);
- yclk.full = dfixed_div(yclk, a);
- dram_channels.full = dfixed_const(wm->dram_channels * 4);
- a.full = dfixed_const(10);
- dram_efficiency.full = dfixed_const(7);
- dram_efficiency.full = dfixed_div(dram_efficiency, a);
- bandwidth.full = dfixed_mul(dram_channels, yclk);
- bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
-
- return dfixed_trunc(bandwidth);
-}
-
-/**
- * dce_v11_0_dram_bandwidth_for_display - get the dram bandwidth for display
- *
- * @wm: watermark calculation data
- *
- * Calculate the dram bandwidth used for display (CIK).
- * Used for display watermark bandwidth calculations
- * Returns the dram bandwidth for display in MBytes/s
- */
-static u32 dce_v11_0_dram_bandwidth_for_display(struct dce10_wm_params *wm)
-{
- /* Calculate DRAM Bandwidth and the part allocated to display. */
- fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
- fixed20_12 yclk, dram_channels, bandwidth;
- fixed20_12 a;
-
- a.full = dfixed_const(1000);
- yclk.full = dfixed_const(wm->yclk);
- yclk.full = dfixed_div(yclk, a);
- dram_channels.full = dfixed_const(wm->dram_channels * 4);
- a.full = dfixed_const(10);
- disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
- disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
- bandwidth.full = dfixed_mul(dram_channels, yclk);
- bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
-
- return dfixed_trunc(bandwidth);
-}
-
-/**
- * dce_v11_0_data_return_bandwidth - get the data return bandwidth
- *
- * @wm: watermark calculation data
- *
- * Calculate the data return bandwidth used for display (CIK).
- * Used for display watermark bandwidth calculations
- * Returns the data return bandwidth in MBytes/s
- */
-static u32 dce_v11_0_data_return_bandwidth(struct dce10_wm_params *wm)
-{
- /* Calculate the display Data return Bandwidth */
- fixed20_12 return_efficiency; /* 0.8 */
- fixed20_12 sclk, bandwidth;
- fixed20_12 a;
-
- a.full = dfixed_const(1000);
- sclk.full = dfixed_const(wm->sclk);
- sclk.full = dfixed_div(sclk, a);
- a.full = dfixed_const(10);
- return_efficiency.full = dfixed_const(8);
- return_efficiency.full = dfixed_div(return_efficiency, a);
- a.full = dfixed_const(32);
- bandwidth.full = dfixed_mul(a, sclk);
- bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
-
- return dfixed_trunc(bandwidth);
-}
-
-/**
- * dce_v11_0_dmif_request_bandwidth - get the dmif bandwidth
- *
- * @wm: watermark calculation data
- *
- * Calculate the dmif bandwidth used for display (CIK).
- * Used for display watermark bandwidth calculations
- * Returns the dmif bandwidth in MBytes/s
- */
-static u32 dce_v11_0_dmif_request_bandwidth(struct dce10_wm_params *wm)
-{
- /* Calculate the DMIF Request Bandwidth */
- fixed20_12 disp_clk_request_efficiency; /* 0.8 */
- fixed20_12 disp_clk, bandwidth;
- fixed20_12 a, b;
-
- a.full = dfixed_const(1000);
- disp_clk.full = dfixed_const(wm->disp_clk);
- disp_clk.full = dfixed_div(disp_clk, a);
- a.full = dfixed_const(32);
- b.full = dfixed_mul(a, disp_clk);
-
- a.full = dfixed_const(10);
- disp_clk_request_efficiency.full = dfixed_const(8);
- disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
-
- bandwidth.full = dfixed_mul(b, disp_clk_request_efficiency);
-
- return dfixed_trunc(bandwidth);
-}
-
-/**
- * dce_v11_0_available_bandwidth - get the min available bandwidth
- *
- * @wm: watermark calculation data
- *
- * Calculate the min available bandwidth used for display (CIK).
- * Used for display watermark bandwidth calculations
- * Returns the min available bandwidth in MBytes/s
- */
-static u32 dce_v11_0_available_bandwidth(struct dce10_wm_params *wm)
-{
- /* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
- u32 dram_bandwidth = dce_v11_0_dram_bandwidth(wm);
- u32 data_return_bandwidth = dce_v11_0_data_return_bandwidth(wm);
- u32 dmif_req_bandwidth = dce_v11_0_dmif_request_bandwidth(wm);
-
- return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
-}
-
-/**
- * dce_v11_0_average_bandwidth - get the average available bandwidth
- *
- * @wm: watermark calculation data
- *
- * Calculate the average available bandwidth used for display (CIK).
- * Used for display watermark bandwidth calculations
- * Returns the average available bandwidth in MBytes/s
- */
-static u32 dce_v11_0_average_bandwidth(struct dce10_wm_params *wm)
-{
- /* Calculate the display mode Average Bandwidth
- * DisplayMode should contain the source and destination dimensions,
- * timing, etc.
- */
- fixed20_12 bpp;
- fixed20_12 line_time;
- fixed20_12 src_width;
- fixed20_12 bandwidth;
- fixed20_12 a;
-
- a.full = dfixed_const(1000);
- line_time.full = dfixed_const(wm->active_time + wm->blank_time);
- line_time.full = dfixed_div(line_time, a);
- bpp.full = dfixed_const(wm->bytes_per_pixel);
- src_width.full = dfixed_const(wm->src_width);
- bandwidth.full = dfixed_mul(src_width, bpp);
- bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
- bandwidth.full = dfixed_div(bandwidth, line_time);
-
- return dfixed_trunc(bandwidth);
-}
-
-/**
- * dce_v11_0_latency_watermark - get the latency watermark
- *
- * @wm: watermark calculation data
- *
- * Calculate the latency watermark (CIK).
- * Used for display watermark bandwidth calculations
- * Returns the latency watermark in ns
- */
-static u32 dce_v11_0_latency_watermark(struct dce10_wm_params *wm)
-{
- /* First calculate the latency in ns */
- u32 mc_latency = 2000; /* 2000 ns. */
- u32 available_bandwidth = dce_v11_0_available_bandwidth(wm);
- u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
- u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
- u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
- u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
- (wm->num_heads * cursor_line_pair_return_time);
- u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
- u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
- u32 tmp, dmif_size = 12288;
- fixed20_12 a, b, c;
-
- if (wm->num_heads == 0)
- return 0;
-
- a.full = dfixed_const(2);
- b.full = dfixed_const(1);
- if ((wm->vsc.full > a.full) ||
- ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
- (wm->vtaps >= 5) ||
- ((wm->vsc.full >= a.full) && wm->interlaced))
- max_src_lines_per_dst_line = 4;
- else
- max_src_lines_per_dst_line = 2;
-
- a.full = dfixed_const(available_bandwidth);
- b.full = dfixed_const(wm->num_heads);
- a.full = dfixed_div(a, b);
- tmp = div_u64((u64) dmif_size * (u64) wm->disp_clk, mc_latency + 512);
- tmp = min(dfixed_trunc(a), tmp);
-
- lb_fill_bw = min(tmp, wm->disp_clk * wm->bytes_per_pixel / 1000);
-
- a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
- b.full = dfixed_const(1000);
- c.full = dfixed_const(lb_fill_bw);
- b.full = dfixed_div(c, b);
- a.full = dfixed_div(a, b);
- line_fill_time = dfixed_trunc(a);
-
- if (line_fill_time < wm->active_time)
- return latency;
- else
- return latency + (line_fill_time - wm->active_time);
-
-}
-
-/**
- * dce_v11_0_average_bandwidth_vs_dram_bandwidth_for_display - check
- * average and available dram bandwidth
- *
- * @wm: watermark calculation data
- *
- * Check if the display average bandwidth fits in the display
- * dram bandwidth (CIK).
- * Used for display watermark bandwidth calculations
- * Returns true if the display fits, false if not.
- */
-static bool dce_v11_0_average_bandwidth_vs_dram_bandwidth_for_display(struct dce10_wm_params *wm)
-{
- if (dce_v11_0_average_bandwidth(wm) <=
- (dce_v11_0_dram_bandwidth_for_display(wm) / wm->num_heads))
- return true;
- else
- return false;
-}
-
-/**
- * dce_v11_0_average_bandwidth_vs_available_bandwidth - check
- * average and available bandwidth
- *
- * @wm: watermark calculation data
- *
- * Check if the display average bandwidth fits in the display
- * available bandwidth (CIK).
- * Used for display watermark bandwidth calculations
- * Returns true if the display fits, false if not.
- */
-static bool dce_v11_0_average_bandwidth_vs_available_bandwidth(struct dce10_wm_params *wm)
-{
- if (dce_v11_0_average_bandwidth(wm) <=
- (dce_v11_0_available_bandwidth(wm) / wm->num_heads))
- return true;
- else
- return false;
-}
-
-/**
- * dce_v11_0_check_latency_hiding - check latency hiding
- *
- * @wm: watermark calculation data
- *
- * Check latency hiding (CIK).
- * Used for display watermark bandwidth calculations
- * Returns true if the display fits, false if not.
- */
-static bool dce_v11_0_check_latency_hiding(struct dce10_wm_params *wm)
-{
- u32 lb_partitions = wm->lb_size / wm->src_width;
- u32 line_time = wm->active_time + wm->blank_time;
- u32 latency_tolerant_lines;
- u32 latency_hiding;
- fixed20_12 a;
-
- a.full = dfixed_const(1);
- if (wm->vsc.full > a.full)
- latency_tolerant_lines = 1;
- else {
- if (lb_partitions <= (wm->vtaps + 1))
- latency_tolerant_lines = 1;
- else
- latency_tolerant_lines = 2;
- }
-
- latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
-
- if (dce_v11_0_latency_watermark(wm) <= latency_hiding)
- return true;
- else
- return false;
-}
-
-/**
- * dce_v11_0_program_watermarks - program display watermarks
- *
- * @adev: amdgpu_device pointer
- * @amdgpu_crtc: the selected display controller
- * @lb_size: line buffer size
- * @num_heads: number of display controllers in use
- *
- * Calculate and program the display watermarks for the
- * selected display controller (CIK).
- */
-static void dce_v11_0_program_watermarks(struct amdgpu_device *adev,
- struct amdgpu_crtc *amdgpu_crtc,
- u32 lb_size, u32 num_heads)
-{
- struct drm_display_mode *mode = &amdgpu_crtc->base.mode;
- struct dce10_wm_params wm_low, wm_high;
- u32 active_time;
- u32 line_time = 0;
- u32 latency_watermark_a = 0, latency_watermark_b = 0;
- u32 tmp, wm_mask, lb_vblank_lead_lines = 0;
-
- if (amdgpu_crtc->base.enabled && num_heads && mode) {
- active_time = (u32) div_u64((u64)mode->crtc_hdisplay * 1000000,
- (u32)mode->clock);
- line_time = (u32) div_u64((u64)mode->crtc_htotal * 1000000,
- (u32)mode->clock);
- line_time = min_t(u32, line_time, 65535);
-
- /* watermark for high clocks */
- if (adev->pm.dpm_enabled) {
- wm_high.yclk =
- amdgpu_dpm_get_mclk(adev, false) * 10;
- wm_high.sclk =
- amdgpu_dpm_get_sclk(adev, false) * 10;
- } else {
- wm_high.yclk = adev->pm.current_mclk * 10;
- wm_high.sclk = adev->pm.current_sclk * 10;
- }
-
- wm_high.disp_clk = mode->clock;
- wm_high.src_width = mode->crtc_hdisplay;
- wm_high.active_time = active_time;
- wm_high.blank_time = line_time - wm_high.active_time;
- wm_high.interlaced = false;
- if (mode->flags & DRM_MODE_FLAG_INTERLACE)
- wm_high.interlaced = true;
- wm_high.vsc = amdgpu_crtc->vsc;
- wm_high.vtaps = 1;
- if (amdgpu_crtc->rmx_type != RMX_OFF)
- wm_high.vtaps = 2;
- wm_high.bytes_per_pixel = 4; /* XXX: get this from fb config */
- wm_high.lb_size = lb_size;
- wm_high.dram_channels = cik_get_number_of_dram_channels(adev);
- wm_high.num_heads = num_heads;
-
- /* set for high clocks */
- latency_watermark_a = min_t(u32, dce_v11_0_latency_watermark(&wm_high), 65535);
-
- /* possibly force display priority to high */
- /* should really do this at mode validation time... */
- if (!dce_v11_0_average_bandwidth_vs_dram_bandwidth_for_display(&wm_high) ||
- !dce_v11_0_average_bandwidth_vs_available_bandwidth(&wm_high) ||
- !dce_v11_0_check_latency_hiding(&wm_high) ||
- (adev->mode_info.disp_priority == 2)) {
- DRM_DEBUG_KMS("force priority to high\n");
- }
-
- /* watermark for low clocks */
- if (adev->pm.dpm_enabled) {
- wm_low.yclk =
- amdgpu_dpm_get_mclk(adev, true) * 10;
- wm_low.sclk =
- amdgpu_dpm_get_sclk(adev, true) * 10;
- } else {
- wm_low.yclk = adev->pm.current_mclk * 10;
- wm_low.sclk = adev->pm.current_sclk * 10;
- }
-
- wm_low.disp_clk = mode->clock;
- wm_low.src_width = mode->crtc_hdisplay;
- wm_low.active_time = active_time;
- wm_low.blank_time = line_time - wm_low.active_time;
- wm_low.interlaced = false;
- if (mode->flags & DRM_MODE_FLAG_INTERLACE)
- wm_low.interlaced = true;
- wm_low.vsc = amdgpu_crtc->vsc;
- wm_low.vtaps = 1;
- if (amdgpu_crtc->rmx_type != RMX_OFF)
- wm_low.vtaps = 2;
- wm_low.bytes_per_pixel = 4; /* XXX: get this from fb config */
- wm_low.lb_size = lb_size;
- wm_low.dram_channels = cik_get_number_of_dram_channels(adev);
- wm_low.num_heads = num_heads;
-
- /* set for low clocks */
- latency_watermark_b = min_t(u32, dce_v11_0_latency_watermark(&wm_low), 65535);
-
- /* possibly force display priority to high */
- /* should really do this at mode validation time... */
- if (!dce_v11_0_average_bandwidth_vs_dram_bandwidth_for_display(&wm_low) ||
- !dce_v11_0_average_bandwidth_vs_available_bandwidth(&wm_low) ||
- !dce_v11_0_check_latency_hiding(&wm_low) ||
- (adev->mode_info.disp_priority == 2)) {
- DRM_DEBUG_KMS("force priority to high\n");
- }
- lb_vblank_lead_lines = DIV_ROUND_UP(lb_size, mode->crtc_hdisplay);
- }
-
- /* select wm A */
- wm_mask = RREG32(mmDPG_WATERMARK_MASK_CONTROL + amdgpu_crtc->crtc_offset);
- tmp = REG_SET_FIELD(wm_mask, DPG_WATERMARK_MASK_CONTROL, URGENCY_WATERMARK_MASK, 1);
- WREG32(mmDPG_WATERMARK_MASK_CONTROL + amdgpu_crtc->crtc_offset, tmp);
- tmp = RREG32(mmDPG_PIPE_URGENCY_CONTROL + amdgpu_crtc->crtc_offset);
- tmp = REG_SET_FIELD(tmp, DPG_PIPE_URGENCY_CONTROL, URGENCY_LOW_WATERMARK, latency_watermark_a);
- tmp = REG_SET_FIELD(tmp, DPG_PIPE_URGENCY_CONTROL, URGENCY_HIGH_WATERMARK, line_time);
- WREG32(mmDPG_PIPE_URGENCY_CONTROL + amdgpu_crtc->crtc_offset, tmp);
- /* select wm B */
- tmp = REG_SET_FIELD(wm_mask, DPG_WATERMARK_MASK_CONTROL, URGENCY_WATERMARK_MASK, 2);
- WREG32(mmDPG_WATERMARK_MASK_CONTROL + amdgpu_crtc->crtc_offset, tmp);
- tmp = RREG32(mmDPG_PIPE_URGENCY_CONTROL + amdgpu_crtc->crtc_offset);
- tmp = REG_SET_FIELD(tmp, DPG_PIPE_URGENCY_CONTROL, URGENCY_LOW_WATERMARK, latency_watermark_b);
- tmp = REG_SET_FIELD(tmp, DPG_PIPE_URGENCY_CONTROL, URGENCY_HIGH_WATERMARK, line_time);
- WREG32(mmDPG_PIPE_URGENCY_CONTROL + amdgpu_crtc->crtc_offset, tmp);
- /* restore original selection */
- WREG32(mmDPG_WATERMARK_MASK_CONTROL + amdgpu_crtc->crtc_offset, wm_mask);
-
- /* save values for DPM */
- amdgpu_crtc->line_time = line_time;
-
- /* Save number of lines the linebuffer leads before the scanout */
- amdgpu_crtc->lb_vblank_lead_lines = lb_vblank_lead_lines;
-}
-
-/**
- * dce_v11_0_bandwidth_update - program display watermarks
- *
- * @adev: amdgpu_device pointer
- *
- * Calculate and program the display watermarks and line
- * buffer allocation (CIK).
- */
-static void dce_v11_0_bandwidth_update(struct amdgpu_device *adev)
-{
- struct drm_display_mode *mode = NULL;
- u32 num_heads = 0, lb_size;
- int i;
-
- amdgpu_display_update_priority(adev);
-
- for (i = 0; i < adev->mode_info.num_crtc; i++) {
- if (adev->mode_info.crtcs[i]->base.enabled)
- num_heads++;
- }
- for (i = 0; i < adev->mode_info.num_crtc; i++) {
- mode = &adev->mode_info.crtcs[i]->base.mode;
- lb_size = dce_v11_0_line_buffer_adjust(adev, adev->mode_info.crtcs[i], mode);
- dce_v11_0_program_watermarks(adev, adev->mode_info.crtcs[i],
- lb_size, num_heads);
- }
-}
-
-static void dce_v11_0_audio_get_connected_pins(struct amdgpu_device *adev)
-{
- int i;
- u32 offset, tmp;
-
- for (i = 0; i < adev->mode_info.audio.num_pins; i++) {
- offset = adev->mode_info.audio.pin[i].offset;
- tmp = RREG32_AUDIO_ENDPT(offset,
- ixAZALIA_F0_CODEC_PIN_CONTROL_RESPONSE_CONFIGURATION_DEFAULT);
- if (((tmp &
- AZALIA_F0_CODEC_PIN_CONTROL_RESPONSE_CONFIGURATION_DEFAULT__PORT_CONNECTIVITY_MASK) >>
- AZALIA_F0_CODEC_PIN_CONTROL_RESPONSE_CONFIGURATION_DEFAULT__PORT_CONNECTIVITY__SHIFT) == 1)
- adev->mode_info.audio.pin[i].connected = false;
- else
- adev->mode_info.audio.pin[i].connected = true;
- }
-}
-
-static struct amdgpu_audio_pin *dce_v11_0_audio_get_pin(struct amdgpu_device *adev)
-{
- int i;
-
- dce_v11_0_audio_get_connected_pins(adev);
-
- for (i = 0; i < adev->mode_info.audio.num_pins; i++) {
- if (adev->mode_info.audio.pin[i].connected)
- return &adev->mode_info.audio.pin[i];
- }
- DRM_ERROR("No connected audio pins found!\n");
- return NULL;
-}
-
-static void dce_v11_0_afmt_audio_select_pin(struct drm_encoder *encoder)
-{
- struct amdgpu_device *adev = drm_to_adev(encoder->dev);
- struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
- struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv;
- u32 tmp;
-
- if (!dig || !dig->afmt || !dig->afmt->pin)
- return;
-
- tmp = RREG32(mmAFMT_AUDIO_SRC_CONTROL + dig->afmt->offset);
- tmp = REG_SET_FIELD(tmp, AFMT_AUDIO_SRC_CONTROL, AFMT_AUDIO_SRC_SELECT, dig->afmt->pin->id);
- WREG32(mmAFMT_AUDIO_SRC_CONTROL + dig->afmt->offset, tmp);
-}
-
-static void dce_v11_0_audio_write_latency_fields(struct drm_encoder *encoder,
- struct drm_display_mode *mode)
-{
- struct drm_device *dev = encoder->dev;
- struct amdgpu_device *adev = drm_to_adev(dev);
- struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
- struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv;
- struct drm_connector *connector;
- struct drm_connector_list_iter iter;
- struct amdgpu_connector *amdgpu_connector = NULL;
- u32 tmp;
- int interlace = 0;
-
- if (!dig || !dig->afmt || !dig->afmt->pin)
- return;
-
- drm_connector_list_iter_begin(dev, &iter);
- drm_for_each_connector_iter(connector, &iter) {
- if (connector->encoder == encoder) {
- amdgpu_connector = to_amdgpu_connector(connector);
- break;
- }
- }
- drm_connector_list_iter_end(&iter);
-
- if (!amdgpu_connector) {
- DRM_ERROR("Couldn't find encoder's connector\n");
- return;
- }
-
- if (mode->flags & DRM_MODE_FLAG_INTERLACE)
- interlace = 1;
- if (connector->latency_present[interlace]) {
- tmp = REG_SET_FIELD(0, AZALIA_F0_CODEC_PIN_CONTROL_RESPONSE_LIPSYNC,
- VIDEO_LIPSYNC, connector->video_latency[interlace]);
- tmp = REG_SET_FIELD(0, AZALIA_F0_CODEC_PIN_CONTROL_RESPONSE_LIPSYNC,
- AUDIO_LIPSYNC, connector->audio_latency[interlace]);
- } else {
- tmp = REG_SET_FIELD(0, AZALIA_F0_CODEC_PIN_CONTROL_RESPONSE_LIPSYNC,
- VIDEO_LIPSYNC, 0);
- tmp = REG_SET_FIELD(0, AZALIA_F0_CODEC_PIN_CONTROL_RESPONSE_LIPSYNC,
- AUDIO_LIPSYNC, 0);
- }
- WREG32_AUDIO_ENDPT(dig->afmt->pin->offset,
- ixAZALIA_F0_CODEC_PIN_CONTROL_RESPONSE_LIPSYNC, tmp);
-}
-
-static void dce_v11_0_audio_write_speaker_allocation(struct drm_encoder *encoder)
-{
- struct drm_device *dev = encoder->dev;
- struct amdgpu_device *adev = drm_to_adev(dev);
- struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
- struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv;
- struct drm_connector *connector;
- struct drm_connector_list_iter iter;
- struct amdgpu_connector *amdgpu_connector = NULL;
- u32 tmp;
- u8 *sadb = NULL;
- int sad_count;
-
- if (!dig || !dig->afmt || !dig->afmt->pin)
- return;
-
- drm_connector_list_iter_begin(dev, &iter);
- drm_for_each_connector_iter(connector, &iter) {
- if (connector->encoder == encoder) {
- amdgpu_connector = to_amdgpu_connector(connector);
- break;
- }
- }
- drm_connector_list_iter_end(&iter);
-
- if (!amdgpu_connector) {
- DRM_ERROR("Couldn't find encoder's connector\n");
- return;
- }
-
- sad_count = drm_edid_to_speaker_allocation(amdgpu_connector->edid, &sadb);
- if (sad_count < 0) {
- DRM_ERROR("Couldn't read Speaker Allocation Data Block: %d\n", sad_count);
- sad_count = 0;
- }
-
- /* program the speaker allocation */
- tmp = RREG32_AUDIO_ENDPT(dig->afmt->pin->offset,
- ixAZALIA_F0_CODEC_PIN_CONTROL_CHANNEL_SPEAKER);
- tmp = REG_SET_FIELD(tmp, AZALIA_F0_CODEC_PIN_CONTROL_CHANNEL_SPEAKER,
- DP_CONNECTION, 0);
- /* set HDMI mode */
- tmp = REG_SET_FIELD(tmp, AZALIA_F0_CODEC_PIN_CONTROL_CHANNEL_SPEAKER,
- HDMI_CONNECTION, 1);
- if (sad_count)
- tmp = REG_SET_FIELD(tmp, AZALIA_F0_CODEC_PIN_CONTROL_CHANNEL_SPEAKER,
- SPEAKER_ALLOCATION, sadb[0]);
- else
- tmp = REG_SET_FIELD(tmp, AZALIA_F0_CODEC_PIN_CONTROL_CHANNEL_SPEAKER,
- SPEAKER_ALLOCATION, 5); /* stereo */
- WREG32_AUDIO_ENDPT(dig->afmt->pin->offset,
- ixAZALIA_F0_CODEC_PIN_CONTROL_CHANNEL_SPEAKER, tmp);
-
- kfree(sadb);
-}
-
-static void dce_v11_0_audio_write_sad_regs(struct drm_encoder *encoder)
-{
- struct drm_device *dev = encoder->dev;
- struct amdgpu_device *adev = drm_to_adev(dev);
- struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
- struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv;
- struct drm_connector *connector;
- struct drm_connector_list_iter iter;
- struct amdgpu_connector *amdgpu_connector = NULL;
- struct cea_sad *sads;
- int i, sad_count;
-
- static const u16 eld_reg_to_type[][2] = {
- { ixAZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR0, HDMI_AUDIO_CODING_TYPE_PCM },
- { ixAZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR1, HDMI_AUDIO_CODING_TYPE_AC3 },
- { ixAZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR2, HDMI_AUDIO_CODING_TYPE_MPEG1 },
- { ixAZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR3, HDMI_AUDIO_CODING_TYPE_MP3 },
- { ixAZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR4, HDMI_AUDIO_CODING_TYPE_MPEG2 },
- { ixAZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR5, HDMI_AUDIO_CODING_TYPE_AAC_LC },
- { ixAZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR6, HDMI_AUDIO_CODING_TYPE_DTS },
- { ixAZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR7, HDMI_AUDIO_CODING_TYPE_ATRAC },
- { ixAZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR9, HDMI_AUDIO_CODING_TYPE_EAC3 },
- { ixAZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR10, HDMI_AUDIO_CODING_TYPE_DTS_HD },
- { ixAZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR11, HDMI_AUDIO_CODING_TYPE_MLP },
- { ixAZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR13, HDMI_AUDIO_CODING_TYPE_WMA_PRO },
- };
-
- if (!dig || !dig->afmt || !dig->afmt->pin)
- return;
-
- drm_connector_list_iter_begin(dev, &iter);
- drm_for_each_connector_iter(connector, &iter) {
- if (connector->encoder == encoder) {
- amdgpu_connector = to_amdgpu_connector(connector);
- break;
- }
- }
- drm_connector_list_iter_end(&iter);
-
- if (!amdgpu_connector) {
- DRM_ERROR("Couldn't find encoder's connector\n");
- return;
- }
-
- sad_count = drm_edid_to_sad(amdgpu_connector->edid, &sads);
- if (sad_count < 0)
- DRM_ERROR("Couldn't read SADs: %d\n", sad_count);
- if (sad_count <= 0)
- return;
- BUG_ON(!sads);
-
- for (i = 0; i < ARRAY_SIZE(eld_reg_to_type); i++) {
- u32 tmp = 0;
- u8 stereo_freqs = 0;
- int max_channels = -1;
- int j;
-
- for (j = 0; j < sad_count; j++) {
- struct cea_sad *sad = &sads[j];
-
- if (sad->format == eld_reg_to_type[i][1]) {
- if (sad->channels > max_channels) {
- tmp = REG_SET_FIELD(tmp, AZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR0,
- MAX_CHANNELS, sad->channels);
- tmp = REG_SET_FIELD(tmp, AZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR0,
- DESCRIPTOR_BYTE_2, sad->byte2);
- tmp = REG_SET_FIELD(tmp, AZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR0,
- SUPPORTED_FREQUENCIES, sad->freq);
- max_channels = sad->channels;
- }
-
- if (sad->format == HDMI_AUDIO_CODING_TYPE_PCM)
- stereo_freqs |= sad->freq;
- else
- break;
- }
- }
-
- tmp = REG_SET_FIELD(tmp, AZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR0,
- SUPPORTED_FREQUENCIES_STEREO, stereo_freqs);
- WREG32_AUDIO_ENDPT(dig->afmt->pin->offset, eld_reg_to_type[i][0], tmp);
- }
-
- kfree(sads);
-}
-
-static void dce_v11_0_audio_enable(struct amdgpu_device *adev,
- struct amdgpu_audio_pin *pin,
- bool enable)
-{
- if (!pin)
- return;
-
- WREG32_AUDIO_ENDPT(pin->offset, ixAZALIA_F0_CODEC_PIN_CONTROL_HOT_PLUG_CONTROL,
- enable ? AZALIA_F0_CODEC_PIN_CONTROL_HOT_PLUG_CONTROL__AUDIO_ENABLED_MASK : 0);
-}
-
-static const u32 pin_offsets[] =
-{
- AUD0_REGISTER_OFFSET,
- AUD1_REGISTER_OFFSET,
- AUD2_REGISTER_OFFSET,
- AUD3_REGISTER_OFFSET,
- AUD4_REGISTER_OFFSET,
- AUD5_REGISTER_OFFSET,
- AUD6_REGISTER_OFFSET,
- AUD7_REGISTER_OFFSET,
-};
-
-static int dce_v11_0_audio_init(struct amdgpu_device *adev)
-{
- int i;
-
- if (!amdgpu_audio)
- return 0;
-
- adev->mode_info.audio.enabled = true;
-
- switch (adev->asic_type) {
- case CHIP_CARRIZO:
- case CHIP_STONEY:
- adev->mode_info.audio.num_pins = 7;
- break;
- case CHIP_POLARIS10:
- case CHIP_VEGAM:
- adev->mode_info.audio.num_pins = 8;
- break;
- case CHIP_POLARIS11:
- case CHIP_POLARIS12:
- adev->mode_info.audio.num_pins = 6;
- break;
- default:
- return -EINVAL;
- }
-
- for (i = 0; i < adev->mode_info.audio.num_pins; i++) {
- adev->mode_info.audio.pin[i].channels = -1;
- adev->mode_info.audio.pin[i].rate = -1;
- adev->mode_info.audio.pin[i].bits_per_sample = -1;
- adev->mode_info.audio.pin[i].status_bits = 0;
- adev->mode_info.audio.pin[i].category_code = 0;
- adev->mode_info.audio.pin[i].connected = false;
- adev->mode_info.audio.pin[i].offset = pin_offsets[i];
- adev->mode_info.audio.pin[i].id = i;
- /* disable audio. it will be set up later */
- /* XXX remove once we switch to ip funcs */
- dce_v11_0_audio_enable(adev, &adev->mode_info.audio.pin[i], false);
- }
-
- return 0;
-}
-
-static void dce_v11_0_audio_fini(struct amdgpu_device *adev)
-{
- if (!amdgpu_audio)
- return;
-
- if (!adev->mode_info.audio.enabled)
- return;
-
- adev->mode_info.audio.enabled = false;
-}
-
-/*
- * update the N and CTS parameters for a given pixel clock rate
- */
-static void dce_v11_0_afmt_update_ACR(struct drm_encoder *encoder, uint32_t clock)
-{
- struct drm_device *dev = encoder->dev;
- struct amdgpu_device *adev = drm_to_adev(dev);
- struct amdgpu_afmt_acr acr = amdgpu_afmt_acr(clock);
- struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
- struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv;
- u32 tmp;
-
- tmp = RREG32(mmHDMI_ACR_32_0 + dig->afmt->offset);
- tmp = REG_SET_FIELD(tmp, HDMI_ACR_32_0, HDMI_ACR_CTS_32, acr.cts_32khz);
- WREG32(mmHDMI_ACR_32_0 + dig->afmt->offset, tmp);
- tmp = RREG32(mmHDMI_ACR_32_1 + dig->afmt->offset);
- tmp = REG_SET_FIELD(tmp, HDMI_ACR_32_1, HDMI_ACR_N_32, acr.n_32khz);
- WREG32(mmHDMI_ACR_32_1 + dig->afmt->offset, tmp);
-
- tmp = RREG32(mmHDMI_ACR_44_0 + dig->afmt->offset);
- tmp = REG_SET_FIELD(tmp, HDMI_ACR_44_0, HDMI_ACR_CTS_44, acr.cts_44_1khz);
- WREG32(mmHDMI_ACR_44_0 + dig->afmt->offset, tmp);
- tmp = RREG32(mmHDMI_ACR_44_1 + dig->afmt->offset);
- tmp = REG_SET_FIELD(tmp, HDMI_ACR_44_1, HDMI_ACR_N_44, acr.n_44_1khz);
- WREG32(mmHDMI_ACR_44_1 + dig->afmt->offset, tmp);
-
- tmp = RREG32(mmHDMI_ACR_48_0 + dig->afmt->offset);
- tmp = REG_SET_FIELD(tmp, HDMI_ACR_48_0, HDMI_ACR_CTS_48, acr.cts_48khz);
- WREG32(mmHDMI_ACR_48_0 + dig->afmt->offset, tmp);
- tmp = RREG32(mmHDMI_ACR_48_1 + dig->afmt->offset);
- tmp = REG_SET_FIELD(tmp, HDMI_ACR_48_1, HDMI_ACR_N_48, acr.n_48khz);
- WREG32(mmHDMI_ACR_48_1 + dig->afmt->offset, tmp);
-
-}
-
-/*
- * build a HDMI Video Info Frame
- */
-static void dce_v11_0_afmt_update_avi_infoframe(struct drm_encoder *encoder,
- void *buffer, size_t size)
-{
- struct drm_device *dev = encoder->dev;
- struct amdgpu_device *adev = drm_to_adev(dev);
- struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
- struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv;
- uint8_t *frame = buffer + 3;
- uint8_t *header = buffer;
-
- WREG32(mmAFMT_AVI_INFO0 + dig->afmt->offset,
- frame[0x0] | (frame[0x1] << 8) | (frame[0x2] << 16) | (frame[0x3] << 24));
- WREG32(mmAFMT_AVI_INFO1 + dig->afmt->offset,
- frame[0x4] | (frame[0x5] << 8) | (frame[0x6] << 16) | (frame[0x7] << 24));
- WREG32(mmAFMT_AVI_INFO2 + dig->afmt->offset,
- frame[0x8] | (frame[0x9] << 8) | (frame[0xA] << 16) | (frame[0xB] << 24));
- WREG32(mmAFMT_AVI_INFO3 + dig->afmt->offset,
- frame[0xC] | (frame[0xD] << 8) | (header[1] << 24));
-}
-
-static void dce_v11_0_audio_set_dto(struct drm_encoder *encoder, u32 clock)
-{
- struct drm_device *dev = encoder->dev;
- struct amdgpu_device *adev = drm_to_adev(dev);
- struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
- struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv;
- struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(encoder->crtc);
- u32 dto_phase = 24 * 1000;
- u32 dto_modulo = clock;
- u32 tmp;
-
- if (!dig || !dig->afmt)
- return;
-
- /* XXX two dtos; generally use dto0 for hdmi */
- /* Express [24MHz / target pixel clock] as an exact rational
- * number (coefficient of two integer numbers. DCCG_AUDIO_DTOx_PHASE
- * is the numerator, DCCG_AUDIO_DTOx_MODULE is the denominator
- */
- tmp = RREG32(mmDCCG_AUDIO_DTO_SOURCE);
- tmp = REG_SET_FIELD(tmp, DCCG_AUDIO_DTO_SOURCE, DCCG_AUDIO_DTO0_SOURCE_SEL,
- amdgpu_crtc->crtc_id);
- WREG32(mmDCCG_AUDIO_DTO_SOURCE, tmp);
- WREG32(mmDCCG_AUDIO_DTO0_PHASE, dto_phase);
- WREG32(mmDCCG_AUDIO_DTO0_MODULE, dto_modulo);
-}
-
-/*
- * update the info frames with the data from the current display mode
- */
-static void dce_v11_0_afmt_setmode(struct drm_encoder *encoder,
- struct drm_display_mode *mode)
-{
- struct drm_device *dev = encoder->dev;
- struct amdgpu_device *adev = drm_to_adev(dev);
- struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
- struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv;
- struct drm_connector *connector = amdgpu_get_connector_for_encoder(encoder);
- u8 buffer[HDMI_INFOFRAME_HEADER_SIZE + HDMI_AVI_INFOFRAME_SIZE];
- struct hdmi_avi_infoframe frame;
- ssize_t err;
- u32 tmp;
- int bpc = 8;
-
- if (!dig || !dig->afmt)
- return;
-
- /* Silent, r600_hdmi_enable will raise WARN for us */
- if (!dig->afmt->enabled)
- return;
-
- /* hdmi deep color mode general control packets setup, if bpc > 8 */
- if (encoder->crtc) {
- struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(encoder->crtc);
- bpc = amdgpu_crtc->bpc;
- }
-
- /* disable audio prior to setting up hw */
- dig->afmt->pin = dce_v11_0_audio_get_pin(adev);
- dce_v11_0_audio_enable(adev, dig->afmt->pin, false);
-
- dce_v11_0_audio_set_dto(encoder, mode->clock);
-
- tmp = RREG32(mmHDMI_VBI_PACKET_CONTROL + dig->afmt->offset);
- tmp = REG_SET_FIELD(tmp, HDMI_VBI_PACKET_CONTROL, HDMI_NULL_SEND, 1);
- WREG32(mmHDMI_VBI_PACKET_CONTROL + dig->afmt->offset, tmp); /* send null packets when required */
-
- WREG32(mmAFMT_AUDIO_CRC_CONTROL + dig->afmt->offset, 0x1000);
-
- tmp = RREG32(mmHDMI_CONTROL + dig->afmt->offset);
- switch (bpc) {
- case 0:
- case 6:
- case 8:
- case 16:
- default:
- tmp = REG_SET_FIELD(tmp, HDMI_CONTROL, HDMI_DEEP_COLOR_ENABLE, 0);
- tmp = REG_SET_FIELD(tmp, HDMI_CONTROL, HDMI_DEEP_COLOR_DEPTH, 0);
- DRM_DEBUG("%s: Disabling hdmi deep color for %d bpc.\n",
- connector->name, bpc);
- break;
- case 10:
- tmp = REG_SET_FIELD(tmp, HDMI_CONTROL, HDMI_DEEP_COLOR_ENABLE, 1);
- tmp = REG_SET_FIELD(tmp, HDMI_CONTROL, HDMI_DEEP_COLOR_DEPTH, 1);
- DRM_DEBUG("%s: Enabling hdmi deep color 30 for 10 bpc.\n",
- connector->name);
- break;
- case 12:
- tmp = REG_SET_FIELD(tmp, HDMI_CONTROL, HDMI_DEEP_COLOR_ENABLE, 1);
- tmp = REG_SET_FIELD(tmp, HDMI_CONTROL, HDMI_DEEP_COLOR_DEPTH, 2);
- DRM_DEBUG("%s: Enabling hdmi deep color 36 for 12 bpc.\n",
- connector->name);
- break;
- }
- WREG32(mmHDMI_CONTROL + dig->afmt->offset, tmp);
-
- tmp = RREG32(mmHDMI_VBI_PACKET_CONTROL + dig->afmt->offset);
- tmp = REG_SET_FIELD(tmp, HDMI_VBI_PACKET_CONTROL, HDMI_NULL_SEND, 1); /* send null packets when required */
- tmp = REG_SET_FIELD(tmp, HDMI_VBI_PACKET_CONTROL, HDMI_GC_SEND, 1); /* send general control packets */
- tmp = REG_SET_FIELD(tmp, HDMI_VBI_PACKET_CONTROL, HDMI_GC_CONT, 1); /* send general control packets every frame */
- WREG32(mmHDMI_VBI_PACKET_CONTROL + dig->afmt->offset, tmp);
-
- tmp = RREG32(mmHDMI_INFOFRAME_CONTROL0 + dig->afmt->offset);
- /* enable audio info frames (frames won't be set until audio is enabled) */
- tmp = REG_SET_FIELD(tmp, HDMI_INFOFRAME_CONTROL0, HDMI_AUDIO_INFO_SEND, 1);
- /* required for audio info values to be updated */
- tmp = REG_SET_FIELD(tmp, HDMI_INFOFRAME_CONTROL0, HDMI_AUDIO_INFO_CONT, 1);
- WREG32(mmHDMI_INFOFRAME_CONTROL0 + dig->afmt->offset, tmp);
-
- tmp = RREG32(mmAFMT_INFOFRAME_CONTROL0 + dig->afmt->offset);
- /* required for audio info values to be updated */
- tmp = REG_SET_FIELD(tmp, AFMT_INFOFRAME_CONTROL0, AFMT_AUDIO_INFO_UPDATE, 1);
- WREG32(mmAFMT_INFOFRAME_CONTROL0 + dig->afmt->offset, tmp);
-
- tmp = RREG32(mmHDMI_INFOFRAME_CONTROL1 + dig->afmt->offset);
- /* anything other than 0 */
- tmp = REG_SET_FIELD(tmp, HDMI_INFOFRAME_CONTROL1, HDMI_AUDIO_INFO_LINE, 2);
- WREG32(mmHDMI_INFOFRAME_CONTROL1 + dig->afmt->offset, tmp);
-
- WREG32(mmHDMI_GC + dig->afmt->offset, 0); /* unset HDMI_GC_AVMUTE */
-
- tmp = RREG32(mmHDMI_AUDIO_PACKET_CONTROL + dig->afmt->offset);
- /* set the default audio delay */
- tmp = REG_SET_FIELD(tmp, HDMI_AUDIO_PACKET_CONTROL, HDMI_AUDIO_DELAY_EN, 1);
- /* should be suffient for all audio modes and small enough for all hblanks */
- tmp = REG_SET_FIELD(tmp, HDMI_AUDIO_PACKET_CONTROL, HDMI_AUDIO_PACKETS_PER_LINE, 3);
- WREG32(mmHDMI_AUDIO_PACKET_CONTROL + dig->afmt->offset, tmp);
-
- tmp = RREG32(mmAFMT_AUDIO_PACKET_CONTROL + dig->afmt->offset);
- /* allow 60958 channel status fields to be updated */
- tmp = REG_SET_FIELD(tmp, AFMT_AUDIO_PACKET_CONTROL, AFMT_60958_CS_UPDATE, 1);
- WREG32(mmAFMT_AUDIO_PACKET_CONTROL + dig->afmt->offset, tmp);
-
- tmp = RREG32(mmHDMI_ACR_PACKET_CONTROL + dig->afmt->offset);
- if (bpc > 8)
- /* clear SW CTS value */
- tmp = REG_SET_FIELD(tmp, HDMI_ACR_PACKET_CONTROL, HDMI_ACR_SOURCE, 0);
- else
- /* select SW CTS value */
- tmp = REG_SET_FIELD(tmp, HDMI_ACR_PACKET_CONTROL, HDMI_ACR_SOURCE, 1);
- /* allow hw to sent ACR packets when required */
- tmp = REG_SET_FIELD(tmp, HDMI_ACR_PACKET_CONTROL, HDMI_ACR_AUTO_SEND, 1);
- WREG32(mmHDMI_ACR_PACKET_CONTROL + dig->afmt->offset, tmp);
-
- dce_v11_0_afmt_update_ACR(encoder, mode->clock);
-
- tmp = RREG32(mmAFMT_60958_0 + dig->afmt->offset);
- tmp = REG_SET_FIELD(tmp, AFMT_60958_0, AFMT_60958_CS_CHANNEL_NUMBER_L, 1);
- WREG32(mmAFMT_60958_0 + dig->afmt->offset, tmp);
-
- tmp = RREG32(mmAFMT_60958_1 + dig->afmt->offset);
- tmp = REG_SET_FIELD(tmp, AFMT_60958_1, AFMT_60958_CS_CHANNEL_NUMBER_R, 2);
- WREG32(mmAFMT_60958_1 + dig->afmt->offset, tmp);
-
- tmp = RREG32(mmAFMT_60958_2 + dig->afmt->offset);
- tmp = REG_SET_FIELD(tmp, AFMT_60958_2, AFMT_60958_CS_CHANNEL_NUMBER_2, 3);
- tmp = REG_SET_FIELD(tmp, AFMT_60958_2, AFMT_60958_CS_CHANNEL_NUMBER_3, 4);
- tmp = REG_SET_FIELD(tmp, AFMT_60958_2, AFMT_60958_CS_CHANNEL_NUMBER_4, 5);
- tmp = REG_SET_FIELD(tmp, AFMT_60958_2, AFMT_60958_CS_CHANNEL_NUMBER_5, 6);
- tmp = REG_SET_FIELD(tmp, AFMT_60958_2, AFMT_60958_CS_CHANNEL_NUMBER_6, 7);
- tmp = REG_SET_FIELD(tmp, AFMT_60958_2, AFMT_60958_CS_CHANNEL_NUMBER_7, 8);
- WREG32(mmAFMT_60958_2 + dig->afmt->offset, tmp);
-
- dce_v11_0_audio_write_speaker_allocation(encoder);
-
- WREG32(mmAFMT_AUDIO_PACKET_CONTROL2 + dig->afmt->offset,
- (0xff << AFMT_AUDIO_PACKET_CONTROL2__AFMT_AUDIO_CHANNEL_ENABLE__SHIFT));
-
- dce_v11_0_afmt_audio_select_pin(encoder);
- dce_v11_0_audio_write_sad_regs(encoder);
- dce_v11_0_audio_write_latency_fields(encoder, mode);
-
- err = drm_hdmi_avi_infoframe_from_display_mode(&frame, connector, mode);
- if (err < 0) {
- DRM_ERROR("failed to setup AVI infoframe: %zd\n", err);
- return;
- }
-
- err = hdmi_avi_infoframe_pack(&frame, buffer, sizeof(buffer));
- if (err < 0) {
- DRM_ERROR("failed to pack AVI infoframe: %zd\n", err);
- return;
- }
-
- dce_v11_0_afmt_update_avi_infoframe(encoder, buffer, sizeof(buffer));
-
- tmp = RREG32(mmHDMI_INFOFRAME_CONTROL0 + dig->afmt->offset);
- /* enable AVI info frames */
- tmp = REG_SET_FIELD(tmp, HDMI_INFOFRAME_CONTROL0, HDMI_AVI_INFO_SEND, 1);
- /* required for audio info values to be updated */
- tmp = REG_SET_FIELD(tmp, HDMI_INFOFRAME_CONTROL0, HDMI_AVI_INFO_CONT, 1);
- WREG32(mmHDMI_INFOFRAME_CONTROL0 + dig->afmt->offset, tmp);
-
- tmp = RREG32(mmHDMI_INFOFRAME_CONTROL1 + dig->afmt->offset);
- tmp = REG_SET_FIELD(tmp, HDMI_INFOFRAME_CONTROL1, HDMI_AVI_INFO_LINE, 2);
- WREG32(mmHDMI_INFOFRAME_CONTROL1 + dig->afmt->offset, tmp);
-
- tmp = RREG32(mmAFMT_AUDIO_PACKET_CONTROL + dig->afmt->offset);
- /* send audio packets */
- tmp = REG_SET_FIELD(tmp, AFMT_AUDIO_PACKET_CONTROL, AFMT_AUDIO_SAMPLE_SEND, 1);
- WREG32(mmAFMT_AUDIO_PACKET_CONTROL + dig->afmt->offset, tmp);
-
- WREG32(mmAFMT_RAMP_CONTROL0 + dig->afmt->offset, 0x00FFFFFF);
- WREG32(mmAFMT_RAMP_CONTROL1 + dig->afmt->offset, 0x007FFFFF);
- WREG32(mmAFMT_RAMP_CONTROL2 + dig->afmt->offset, 0x00000001);
- WREG32(mmAFMT_RAMP_CONTROL3 + dig->afmt->offset, 0x00000001);
-
- /* enable audio after to setting up hw */
- dce_v11_0_audio_enable(adev, dig->afmt->pin, true);
-}
-
-static void dce_v11_0_afmt_enable(struct drm_encoder *encoder, bool enable)
-{
- struct drm_device *dev = encoder->dev;
- struct amdgpu_device *adev = drm_to_adev(dev);
- struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
- struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv;
-
- if (!dig || !dig->afmt)
- return;
-
- /* Silent, r600_hdmi_enable will raise WARN for us */
- if (enable && dig->afmt->enabled)
- return;
- if (!enable && !dig->afmt->enabled)
- return;
-
- if (!enable && dig->afmt->pin) {
- dce_v11_0_audio_enable(adev, dig->afmt->pin, false);
- dig->afmt->pin = NULL;
- }
-
- dig->afmt->enabled = enable;
-
- DRM_DEBUG("%sabling AFMT interface @ 0x%04X for encoder 0x%x\n",
- enable ? "En" : "Dis", dig->afmt->offset, amdgpu_encoder->encoder_id);
-}
-
-static int dce_v11_0_afmt_init(struct amdgpu_device *adev)
-{
- int i;
-
- for (i = 0; i < adev->mode_info.num_dig; i++)
- adev->mode_info.afmt[i] = NULL;
-
- /* DCE11 has audio blocks tied to DIG encoders */
- for (i = 0; i < adev->mode_info.num_dig; i++) {
- adev->mode_info.afmt[i] = kzalloc(sizeof(struct amdgpu_afmt), GFP_KERNEL);
- if (adev->mode_info.afmt[i]) {
- adev->mode_info.afmt[i]->offset = dig_offsets[i];
- adev->mode_info.afmt[i]->id = i;
- } else {
- int j;
- for (j = 0; j < i; j++) {
- kfree(adev->mode_info.afmt[j]);
- adev->mode_info.afmt[j] = NULL;
- }
- return -ENOMEM;
- }
- }
- return 0;
-}
-
-static void dce_v11_0_afmt_fini(struct amdgpu_device *adev)
-{
- int i;
-
- for (i = 0; i < adev->mode_info.num_dig; i++) {
- kfree(adev->mode_info.afmt[i]);
- adev->mode_info.afmt[i] = NULL;
- }
-}
-
-static const u32 vga_control_regs[6] =
-{
- mmD1VGA_CONTROL,
- mmD2VGA_CONTROL,
- mmD3VGA_CONTROL,
- mmD4VGA_CONTROL,
- mmD5VGA_CONTROL,
- mmD6VGA_CONTROL,
-};
-
-static void dce_v11_0_vga_enable(struct drm_crtc *crtc, bool enable)
-{
- struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
- struct drm_device *dev = crtc->dev;
- struct amdgpu_device *adev = drm_to_adev(dev);
- u32 vga_control;
-
- vga_control = RREG32(vga_control_regs[amdgpu_crtc->crtc_id]) & ~1;
- if (enable)
- WREG32(vga_control_regs[amdgpu_crtc->crtc_id], vga_control | 1);
- else
- WREG32(vga_control_regs[amdgpu_crtc->crtc_id], vga_control);
-}
-
-static void dce_v11_0_grph_enable(struct drm_crtc *crtc, bool enable)
-{
- struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
- struct drm_device *dev = crtc->dev;
- struct amdgpu_device *adev = drm_to_adev(dev);
-
- if (enable)
- WREG32(mmGRPH_ENABLE + amdgpu_crtc->crtc_offset, 1);
- else
- WREG32(mmGRPH_ENABLE + amdgpu_crtc->crtc_offset, 0);
-}
-
-static int dce_v11_0_crtc_do_set_base(struct drm_crtc *crtc,
- struct drm_framebuffer *fb,
- int x, int y, int atomic)
-{
- struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
- struct drm_device *dev = crtc->dev;
- struct amdgpu_device *adev = drm_to_adev(dev);
- struct drm_framebuffer *target_fb;
- struct drm_gem_object *obj;
- struct amdgpu_bo *abo;
- uint64_t fb_location, tiling_flags;
- uint32_t fb_format, fb_pitch_pixels;
- u32 fb_swap = REG_SET_FIELD(0, GRPH_SWAP_CNTL, GRPH_ENDIAN_SWAP, ENDIAN_NONE);
- u32 pipe_config;
- u32 tmp, viewport_w, viewport_h;
- int r;
- bool bypass_lut = false;
-
- /* no fb bound */
- if (!atomic && !crtc->primary->fb) {
- DRM_DEBUG_KMS("No FB bound\n");
- return 0;
- }
-
- if (atomic)
- target_fb = fb;
- else
- target_fb = crtc->primary->fb;
-
- /* If atomic, assume fb object is pinned & idle & fenced and
- * just update base pointers
- */
- obj = target_fb->obj[0];
- abo = gem_to_amdgpu_bo(obj);
- r = amdgpu_bo_reserve(abo, false);
- if (unlikely(r != 0))
- return r;
-
- if (!atomic) {
- abo->flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS;
- r = amdgpu_bo_pin(abo, AMDGPU_GEM_DOMAIN_VRAM);
- if (unlikely(r != 0)) {
- amdgpu_bo_unreserve(abo);
- return -EINVAL;
- }
- }
- fb_location = amdgpu_bo_gpu_offset(abo);
-
- amdgpu_bo_get_tiling_flags(abo, &tiling_flags);
- amdgpu_bo_unreserve(abo);
-
- pipe_config = AMDGPU_TILING_GET(tiling_flags, PIPE_CONFIG);
-
- switch (target_fb->format->format) {
- case DRM_FORMAT_C8:
- fb_format = REG_SET_FIELD(0, GRPH_CONTROL, GRPH_DEPTH, 0);
- fb_format = REG_SET_FIELD(fb_format, GRPH_CONTROL, GRPH_FORMAT, 0);
- break;
- case DRM_FORMAT_XRGB4444:
- case DRM_FORMAT_ARGB4444:
- fb_format = REG_SET_FIELD(0, GRPH_CONTROL, GRPH_DEPTH, 1);
- fb_format = REG_SET_FIELD(fb_format, GRPH_CONTROL, GRPH_FORMAT, 2);
-#ifdef __BIG_ENDIAN
- fb_swap = REG_SET_FIELD(fb_swap, GRPH_SWAP_CNTL, GRPH_ENDIAN_SWAP,
- ENDIAN_8IN16);
-#endif
- break;
- case DRM_FORMAT_XRGB1555:
- case DRM_FORMAT_ARGB1555:
- fb_format = REG_SET_FIELD(0, GRPH_CONTROL, GRPH_DEPTH, 1);
- fb_format = REG_SET_FIELD(fb_format, GRPH_CONTROL, GRPH_FORMAT, 0);
-#ifdef __BIG_ENDIAN
- fb_swap = REG_SET_FIELD(fb_swap, GRPH_SWAP_CNTL, GRPH_ENDIAN_SWAP,
- ENDIAN_8IN16);
-#endif
- break;
- case DRM_FORMAT_BGRX5551:
- case DRM_FORMAT_BGRA5551:
- fb_format = REG_SET_FIELD(0, GRPH_CONTROL, GRPH_DEPTH, 1);
- fb_format = REG_SET_FIELD(fb_format, GRPH_CONTROL, GRPH_FORMAT, 5);
-#ifdef __BIG_ENDIAN
- fb_swap = REG_SET_FIELD(fb_swap, GRPH_SWAP_CNTL, GRPH_ENDIAN_SWAP,
- ENDIAN_8IN16);
-#endif
- break;
- case DRM_FORMAT_RGB565:
- fb_format = REG_SET_FIELD(0, GRPH_CONTROL, GRPH_DEPTH, 1);
- fb_format = REG_SET_FIELD(fb_format, GRPH_CONTROL, GRPH_FORMAT, 1);
-#ifdef __BIG_ENDIAN
- fb_swap = REG_SET_FIELD(fb_swap, GRPH_SWAP_CNTL, GRPH_ENDIAN_SWAP,
- ENDIAN_8IN16);
-#endif
- break;
- case DRM_FORMAT_XRGB8888:
- case DRM_FORMAT_ARGB8888:
- fb_format = REG_SET_FIELD(0, GRPH_CONTROL, GRPH_DEPTH, 2);
- fb_format = REG_SET_FIELD(fb_format, GRPH_CONTROL, GRPH_FORMAT, 0);
-#ifdef __BIG_ENDIAN
- fb_swap = REG_SET_FIELD(fb_swap, GRPH_SWAP_CNTL, GRPH_ENDIAN_SWAP,
- ENDIAN_8IN32);
-#endif
- break;
- case DRM_FORMAT_XRGB2101010:
- case DRM_FORMAT_ARGB2101010:
- fb_format = REG_SET_FIELD(0, GRPH_CONTROL, GRPH_DEPTH, 2);
- fb_format = REG_SET_FIELD(fb_format, GRPH_CONTROL, GRPH_FORMAT, 1);
-#ifdef __BIG_ENDIAN
- fb_swap = REG_SET_FIELD(fb_swap, GRPH_SWAP_CNTL, GRPH_ENDIAN_SWAP,
- ENDIAN_8IN32);
-#endif
- /* Greater 8 bpc fb needs to bypass hw-lut to retain precision */
- bypass_lut = true;
- break;
- case DRM_FORMAT_BGRX1010102:
- case DRM_FORMAT_BGRA1010102:
- fb_format = REG_SET_FIELD(0, GRPH_CONTROL, GRPH_DEPTH, 2);
- fb_format = REG_SET_FIELD(fb_format, GRPH_CONTROL, GRPH_FORMAT, 4);
-#ifdef __BIG_ENDIAN
- fb_swap = REG_SET_FIELD(fb_swap, GRPH_SWAP_CNTL, GRPH_ENDIAN_SWAP,
- ENDIAN_8IN32);
-#endif
- /* Greater 8 bpc fb needs to bypass hw-lut to retain precision */
- bypass_lut = true;
- break;
- case DRM_FORMAT_XBGR8888:
- case DRM_FORMAT_ABGR8888:
- fb_format = REG_SET_FIELD(0, GRPH_CONTROL, GRPH_DEPTH, 2);
- fb_format = REG_SET_FIELD(fb_format, GRPH_CONTROL, GRPH_FORMAT, 0);
- fb_swap = REG_SET_FIELD(fb_swap, GRPH_SWAP_CNTL, GRPH_RED_CROSSBAR, 2);
- fb_swap = REG_SET_FIELD(fb_swap, GRPH_SWAP_CNTL, GRPH_BLUE_CROSSBAR, 2);
-#ifdef __BIG_ENDIAN
- fb_swap = REG_SET_FIELD(fb_swap, GRPH_SWAP_CNTL, GRPH_ENDIAN_SWAP,
- ENDIAN_8IN32);
-#endif
- break;
- default:
- DRM_ERROR("Unsupported screen format %p4cc\n",
- &target_fb->format->format);
- return -EINVAL;
- }
-
- if (AMDGPU_TILING_GET(tiling_flags, ARRAY_MODE) == ARRAY_2D_TILED_THIN1) {
- unsigned bankw, bankh, mtaspect, tile_split, num_banks;
-
- bankw = AMDGPU_TILING_GET(tiling_flags, BANK_WIDTH);
- bankh = AMDGPU_TILING_GET(tiling_flags, BANK_HEIGHT);
- mtaspect = AMDGPU_TILING_GET(tiling_flags, MACRO_TILE_ASPECT);
- tile_split = AMDGPU_TILING_GET(tiling_flags, TILE_SPLIT);
- num_banks = AMDGPU_TILING_GET(tiling_flags, NUM_BANKS);
-
- fb_format = REG_SET_FIELD(fb_format, GRPH_CONTROL, GRPH_NUM_BANKS, num_banks);
- fb_format = REG_SET_FIELD(fb_format, GRPH_CONTROL, GRPH_ARRAY_MODE,
- ARRAY_2D_TILED_THIN1);
- fb_format = REG_SET_FIELD(fb_format, GRPH_CONTROL, GRPH_TILE_SPLIT,
- tile_split);
- fb_format = REG_SET_FIELD(fb_format, GRPH_CONTROL, GRPH_BANK_WIDTH, bankw);
- fb_format = REG_SET_FIELD(fb_format, GRPH_CONTROL, GRPH_BANK_HEIGHT, bankh);
- fb_format = REG_SET_FIELD(fb_format, GRPH_CONTROL, GRPH_MACRO_TILE_ASPECT,
- mtaspect);
- fb_format = REG_SET_FIELD(fb_format, GRPH_CONTROL, GRPH_MICRO_TILE_MODE,
- ADDR_SURF_MICRO_TILING_DISPLAY);
- } else if (AMDGPU_TILING_GET(tiling_flags, ARRAY_MODE) == ARRAY_1D_TILED_THIN1) {
- fb_format = REG_SET_FIELD(fb_format, GRPH_CONTROL, GRPH_ARRAY_MODE,
- ARRAY_1D_TILED_THIN1);
- }
-
- fb_format = REG_SET_FIELD(fb_format, GRPH_CONTROL, GRPH_PIPE_CONFIG,
- pipe_config);
-
- dce_v11_0_vga_enable(crtc, false);
-
- /* Make sure surface address is updated at vertical blank rather than
- * horizontal blank
- */
- tmp = RREG32(mmGRPH_FLIP_CONTROL + amdgpu_crtc->crtc_offset);
- tmp = REG_SET_FIELD(tmp, GRPH_FLIP_CONTROL,
- GRPH_SURFACE_UPDATE_H_RETRACE_EN, 0);
- WREG32(mmGRPH_FLIP_CONTROL + amdgpu_crtc->crtc_offset, tmp);
-
- WREG32(mmGRPH_PRIMARY_SURFACE_ADDRESS_HIGH + amdgpu_crtc->crtc_offset,
- upper_32_bits(fb_location));
- WREG32(mmGRPH_SECONDARY_SURFACE_ADDRESS_HIGH + amdgpu_crtc->crtc_offset,
- upper_32_bits(fb_location));
- WREG32(mmGRPH_PRIMARY_SURFACE_ADDRESS + amdgpu_crtc->crtc_offset,
- (u32)fb_location & GRPH_PRIMARY_SURFACE_ADDRESS__GRPH_PRIMARY_SURFACE_ADDRESS_MASK);
- WREG32(mmGRPH_SECONDARY_SURFACE_ADDRESS + amdgpu_crtc->crtc_offset,
- (u32) fb_location & GRPH_SECONDARY_SURFACE_ADDRESS__GRPH_SECONDARY_SURFACE_ADDRESS_MASK);
- WREG32(mmGRPH_CONTROL + amdgpu_crtc->crtc_offset, fb_format);
- WREG32(mmGRPH_SWAP_CNTL + amdgpu_crtc->crtc_offset, fb_swap);
-
- /*
- * The LUT only has 256 slots for indexing by a 8 bpc fb. Bypass the LUT
- * for > 8 bpc scanout to avoid truncation of fb indices to 8 msb's, to
- * retain the full precision throughout the pipeline.
- */
- tmp = RREG32(mmGRPH_LUT_10BIT_BYPASS + amdgpu_crtc->crtc_offset);
- if (bypass_lut)
- tmp = REG_SET_FIELD(tmp, GRPH_LUT_10BIT_BYPASS, GRPH_LUT_10BIT_BYPASS_EN, 1);
- else
- tmp = REG_SET_FIELD(tmp, GRPH_LUT_10BIT_BYPASS, GRPH_LUT_10BIT_BYPASS_EN, 0);
- WREG32(mmGRPH_LUT_10BIT_BYPASS + amdgpu_crtc->crtc_offset, tmp);
-
- if (bypass_lut)
- DRM_DEBUG_KMS("Bypassing hardware LUT due to 10 bit fb scanout.\n");
-
- WREG32(mmGRPH_SURFACE_OFFSET_X + amdgpu_crtc->crtc_offset, 0);
- WREG32(mmGRPH_SURFACE_OFFSET_Y + amdgpu_crtc->crtc_offset, 0);
- WREG32(mmGRPH_X_START + amdgpu_crtc->crtc_offset, 0);
- WREG32(mmGRPH_Y_START + amdgpu_crtc->crtc_offset, 0);
- WREG32(mmGRPH_X_END + amdgpu_crtc->crtc_offset, target_fb->width);
- WREG32(mmGRPH_Y_END + amdgpu_crtc->crtc_offset, target_fb->height);
-
- fb_pitch_pixels = target_fb->pitches[0] / target_fb->format->cpp[0];
- WREG32(mmGRPH_PITCH + amdgpu_crtc->crtc_offset, fb_pitch_pixels);
-
- dce_v11_0_grph_enable(crtc, true);
-
- WREG32(mmLB_DESKTOP_HEIGHT + amdgpu_crtc->crtc_offset,
- target_fb->height);
-
- x &= ~3;
- y &= ~1;
- WREG32(mmVIEWPORT_START + amdgpu_crtc->crtc_offset,
- (x << 16) | y);
- viewport_w = crtc->mode.hdisplay;
- viewport_h = (crtc->mode.vdisplay + 1) & ~1;
- WREG32(mmVIEWPORT_SIZE + amdgpu_crtc->crtc_offset,
- (viewport_w << 16) | viewport_h);
-
- /* set pageflip to happen anywhere in vblank interval */
- WREG32(mmCRTC_MASTER_UPDATE_MODE + amdgpu_crtc->crtc_offset, 0);
-
- if (!atomic && fb && fb != crtc->primary->fb) {
- abo = gem_to_amdgpu_bo(fb->obj[0]);
- r = amdgpu_bo_reserve(abo, true);
- if (unlikely(r != 0))
- return r;
- amdgpu_bo_unpin(abo);
- amdgpu_bo_unreserve(abo);
- }
-
- /* Bytes per pixel may have changed */
- dce_v11_0_bandwidth_update(adev);
-
- return 0;
-}
-
-static void dce_v11_0_set_interleave(struct drm_crtc *crtc,
- struct drm_display_mode *mode)
-{
- struct drm_device *dev = crtc->dev;
- struct amdgpu_device *adev = drm_to_adev(dev);
- struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
- u32 tmp;
-
- tmp = RREG32(mmLB_DATA_FORMAT + amdgpu_crtc->crtc_offset);
- if (mode->flags & DRM_MODE_FLAG_INTERLACE)
- tmp = REG_SET_FIELD(tmp, LB_DATA_FORMAT, INTERLEAVE_EN, 1);
- else
- tmp = REG_SET_FIELD(tmp, LB_DATA_FORMAT, INTERLEAVE_EN, 0);
- WREG32(mmLB_DATA_FORMAT + amdgpu_crtc->crtc_offset, tmp);
-}
-
-static void dce_v11_0_crtc_load_lut(struct drm_crtc *crtc)
-{
- struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
- struct drm_device *dev = crtc->dev;
- struct amdgpu_device *adev = drm_to_adev(dev);
- u16 *r, *g, *b;
- int i;
- u32 tmp;
-
- DRM_DEBUG_KMS("%d\n", amdgpu_crtc->crtc_id);
-
- tmp = RREG32(mmINPUT_CSC_CONTROL + amdgpu_crtc->crtc_offset);
- tmp = REG_SET_FIELD(tmp, INPUT_CSC_CONTROL, INPUT_CSC_GRPH_MODE, 0);
- WREG32(mmINPUT_CSC_CONTROL + amdgpu_crtc->crtc_offset, tmp);
-
- tmp = RREG32(mmPRESCALE_GRPH_CONTROL + amdgpu_crtc->crtc_offset);
- tmp = REG_SET_FIELD(tmp, PRESCALE_GRPH_CONTROL, GRPH_PRESCALE_BYPASS, 1);
- WREG32(mmPRESCALE_GRPH_CONTROL + amdgpu_crtc->crtc_offset, tmp);
-
- tmp = RREG32(mmINPUT_GAMMA_CONTROL + amdgpu_crtc->crtc_offset);
- tmp = REG_SET_FIELD(tmp, INPUT_GAMMA_CONTROL, GRPH_INPUT_GAMMA_MODE, 0);
- WREG32(mmINPUT_GAMMA_CONTROL + amdgpu_crtc->crtc_offset, tmp);
-
- WREG32(mmDC_LUT_CONTROL + amdgpu_crtc->crtc_offset, 0);
-
- WREG32(mmDC_LUT_BLACK_OFFSET_BLUE + amdgpu_crtc->crtc_offset, 0);
- WREG32(mmDC_LUT_BLACK_OFFSET_GREEN + amdgpu_crtc->crtc_offset, 0);
- WREG32(mmDC_LUT_BLACK_OFFSET_RED + amdgpu_crtc->crtc_offset, 0);
-
- WREG32(mmDC_LUT_WHITE_OFFSET_BLUE + amdgpu_crtc->crtc_offset, 0xffff);
- WREG32(mmDC_LUT_WHITE_OFFSET_GREEN + amdgpu_crtc->crtc_offset, 0xffff);
- WREG32(mmDC_LUT_WHITE_OFFSET_RED + amdgpu_crtc->crtc_offset, 0xffff);
-
- WREG32(mmDC_LUT_RW_MODE + amdgpu_crtc->crtc_offset, 0);
- WREG32(mmDC_LUT_WRITE_EN_MASK + amdgpu_crtc->crtc_offset, 0x00000007);
-
- WREG32(mmDC_LUT_RW_INDEX + amdgpu_crtc->crtc_offset, 0);
- r = crtc->gamma_store;
- g = r + crtc->gamma_size;
- b = g + crtc->gamma_size;
- for (i = 0; i < 256; i++) {
- WREG32(mmDC_LUT_30_COLOR + amdgpu_crtc->crtc_offset,
- ((*r++ & 0xffc0) << 14) |
- ((*g++ & 0xffc0) << 4) |
- (*b++ >> 6));
- }
-
- tmp = RREG32(mmDEGAMMA_CONTROL + amdgpu_crtc->crtc_offset);
- tmp = REG_SET_FIELD(tmp, DEGAMMA_CONTROL, GRPH_DEGAMMA_MODE, 0);
- tmp = REG_SET_FIELD(tmp, DEGAMMA_CONTROL, CURSOR_DEGAMMA_MODE, 0);
- tmp = REG_SET_FIELD(tmp, DEGAMMA_CONTROL, CURSOR2_DEGAMMA_MODE, 0);
- WREG32(mmDEGAMMA_CONTROL + amdgpu_crtc->crtc_offset, tmp);
-
- tmp = RREG32(mmGAMUT_REMAP_CONTROL + amdgpu_crtc->crtc_offset);
- tmp = REG_SET_FIELD(tmp, GAMUT_REMAP_CONTROL, GRPH_GAMUT_REMAP_MODE, 0);
- WREG32(mmGAMUT_REMAP_CONTROL + amdgpu_crtc->crtc_offset, tmp);
-
- tmp = RREG32(mmREGAMMA_CONTROL + amdgpu_crtc->crtc_offset);
- tmp = REG_SET_FIELD(tmp, REGAMMA_CONTROL, GRPH_REGAMMA_MODE, 0);
- WREG32(mmREGAMMA_CONTROL + amdgpu_crtc->crtc_offset, tmp);
-
- tmp = RREG32(mmOUTPUT_CSC_CONTROL + amdgpu_crtc->crtc_offset);
- tmp = REG_SET_FIELD(tmp, OUTPUT_CSC_CONTROL, OUTPUT_CSC_GRPH_MODE, 0);
- WREG32(mmOUTPUT_CSC_CONTROL + amdgpu_crtc->crtc_offset, tmp);
-
- /* XXX match this to the depth of the crtc fmt block, move to modeset? */
- WREG32(mmDENORM_CONTROL + amdgpu_crtc->crtc_offset, 0);
- /* XXX this only needs to be programmed once per crtc at startup,
- * not sure where the best place for it is
- */
- tmp = RREG32(mmALPHA_CONTROL + amdgpu_crtc->crtc_offset);
- tmp = REG_SET_FIELD(tmp, ALPHA_CONTROL, CURSOR_ALPHA_BLND_ENA, 1);
- WREG32(mmALPHA_CONTROL + amdgpu_crtc->crtc_offset, tmp);
-}
-
-static int dce_v11_0_pick_dig_encoder(struct drm_encoder *encoder)
-{
- struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
- struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv;
-
- switch (amdgpu_encoder->encoder_id) {
- case ENCODER_OBJECT_ID_INTERNAL_UNIPHY:
- if (dig->linkb)
- return 1;
- else
- return 0;
- case ENCODER_OBJECT_ID_INTERNAL_UNIPHY1:
- if (dig->linkb)
- return 3;
- else
- return 2;
- case ENCODER_OBJECT_ID_INTERNAL_UNIPHY2:
- if (dig->linkb)
- return 5;
- else
- return 4;
- case ENCODER_OBJECT_ID_INTERNAL_UNIPHY3:
- return 6;
- default:
- DRM_ERROR("invalid encoder_id: 0x%x\n", amdgpu_encoder->encoder_id);
- return 0;
- }
-}
-
-/**
- * dce_v11_0_pick_pll - Allocate a PPLL for use by the crtc.
- *
- * @crtc: drm crtc
- *
- * Returns the PPLL (Pixel PLL) to be used by the crtc. For DP monitors
- * a single PPLL can be used for all DP crtcs/encoders. For non-DP
- * monitors a dedicated PPLL must be used. If a particular board has
- * an external DP PLL, return ATOM_PPLL_INVALID to skip PLL programming
- * as there is no need to program the PLL itself. If we are not able to
- * allocate a PLL, return ATOM_PPLL_INVALID to skip PLL programming to
- * avoid messing up an existing monitor.
- *
- * Asic specific PLL information
- *
- * DCE 10.x
- * Tonga
- * - PPLL1, PPLL2 are available for all UNIPHY (both DP and non-DP)
- * CI
- * - PPLL0, PPLL1, PPLL2 are available for all UNIPHY (both DP and non-DP) and DAC
- *
- */
-static u32 dce_v11_0_pick_pll(struct drm_crtc *crtc)
-{
- struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
- struct drm_device *dev = crtc->dev;
- struct amdgpu_device *adev = drm_to_adev(dev);
- u32 pll_in_use;
- int pll;
-
- if ((adev->asic_type == CHIP_POLARIS10) ||
- (adev->asic_type == CHIP_POLARIS11) ||
- (adev->asic_type == CHIP_POLARIS12) ||
- (adev->asic_type == CHIP_VEGAM)) {
- struct amdgpu_encoder *amdgpu_encoder =
- to_amdgpu_encoder(amdgpu_crtc->encoder);
- struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv;
-
- if (ENCODER_MODE_IS_DP(amdgpu_atombios_encoder_get_encoder_mode(amdgpu_crtc->encoder)))
- return ATOM_DP_DTO;
-
- switch (amdgpu_encoder->encoder_id) {
- case ENCODER_OBJECT_ID_INTERNAL_UNIPHY:
- if (dig->linkb)
- return ATOM_COMBOPHY_PLL1;
- else
- return ATOM_COMBOPHY_PLL0;
- case ENCODER_OBJECT_ID_INTERNAL_UNIPHY1:
- if (dig->linkb)
- return ATOM_COMBOPHY_PLL3;
- else
- return ATOM_COMBOPHY_PLL2;
- case ENCODER_OBJECT_ID_INTERNAL_UNIPHY2:
- if (dig->linkb)
- return ATOM_COMBOPHY_PLL5;
- else
- return ATOM_COMBOPHY_PLL4;
- default:
- DRM_ERROR("invalid encoder_id: 0x%x\n", amdgpu_encoder->encoder_id);
- return ATOM_PPLL_INVALID;
- }
- }
-
- if (ENCODER_MODE_IS_DP(amdgpu_atombios_encoder_get_encoder_mode(amdgpu_crtc->encoder))) {
- if (adev->clock.dp_extclk)
- /* skip PPLL programming if using ext clock */
- return ATOM_PPLL_INVALID;
- else {
- /* use the same PPLL for all DP monitors */
- pll = amdgpu_pll_get_shared_dp_ppll(crtc);
- if (pll != ATOM_PPLL_INVALID)
- return pll;
- }
- } else {
- /* use the same PPLL for all monitors with the same clock */
- pll = amdgpu_pll_get_shared_nondp_ppll(crtc);
- if (pll != ATOM_PPLL_INVALID)
- return pll;
- }
-
- /* XXX need to determine what plls are available on each DCE11 part */
- pll_in_use = amdgpu_pll_get_use_mask(crtc);
- if (adev->flags & AMD_IS_APU) {
- if (!(pll_in_use & (1 << ATOM_PPLL1)))
- return ATOM_PPLL1;
- if (!(pll_in_use & (1 << ATOM_PPLL0)))
- return ATOM_PPLL0;
- DRM_ERROR("unable to allocate a PPLL\n");
- return ATOM_PPLL_INVALID;
- } else {
- if (!(pll_in_use & (1 << ATOM_PPLL2)))
- return ATOM_PPLL2;
- if (!(pll_in_use & (1 << ATOM_PPLL1)))
- return ATOM_PPLL1;
- if (!(pll_in_use & (1 << ATOM_PPLL0)))
- return ATOM_PPLL0;
- DRM_ERROR("unable to allocate a PPLL\n");
- return ATOM_PPLL_INVALID;
- }
- return ATOM_PPLL_INVALID;
-}
-
-static void dce_v11_0_lock_cursor(struct drm_crtc *crtc, bool lock)
-{
- struct amdgpu_device *adev = drm_to_adev(crtc->dev);
- struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
- uint32_t cur_lock;
-
- cur_lock = RREG32(mmCUR_UPDATE + amdgpu_crtc->crtc_offset);
- if (lock)
- cur_lock = REG_SET_FIELD(cur_lock, CUR_UPDATE, CURSOR_UPDATE_LOCK, 1);
- else
- cur_lock = REG_SET_FIELD(cur_lock, CUR_UPDATE, CURSOR_UPDATE_LOCK, 0);
- WREG32(mmCUR_UPDATE + amdgpu_crtc->crtc_offset, cur_lock);
-}
-
-static void dce_v11_0_hide_cursor(struct drm_crtc *crtc)
-{
- struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
- struct amdgpu_device *adev = drm_to_adev(crtc->dev);
- u32 tmp;
-
- tmp = RREG32(mmCUR_CONTROL + amdgpu_crtc->crtc_offset);
- tmp = REG_SET_FIELD(tmp, CUR_CONTROL, CURSOR_EN, 0);
- WREG32(mmCUR_CONTROL + amdgpu_crtc->crtc_offset, tmp);
-}
-
-static void dce_v11_0_show_cursor(struct drm_crtc *crtc)
-{
- struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
- struct amdgpu_device *adev = drm_to_adev(crtc->dev);
- u32 tmp;
-
- WREG32(mmCUR_SURFACE_ADDRESS_HIGH + amdgpu_crtc->crtc_offset,
- upper_32_bits(amdgpu_crtc->cursor_addr));
- WREG32(mmCUR_SURFACE_ADDRESS + amdgpu_crtc->crtc_offset,
- lower_32_bits(amdgpu_crtc->cursor_addr));
-
- tmp = RREG32(mmCUR_CONTROL + amdgpu_crtc->crtc_offset);
- tmp = REG_SET_FIELD(tmp, CUR_CONTROL, CURSOR_EN, 1);
- tmp = REG_SET_FIELD(tmp, CUR_CONTROL, CURSOR_MODE, 2);
- WREG32(mmCUR_CONTROL + amdgpu_crtc->crtc_offset, tmp);
-}
-
-static int dce_v11_0_cursor_move_locked(struct drm_crtc *crtc,
- int x, int y)
-{
- struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
- struct amdgpu_device *adev = drm_to_adev(crtc->dev);
- int xorigin = 0, yorigin = 0;
-
- amdgpu_crtc->cursor_x = x;
- amdgpu_crtc->cursor_y = y;
-
- /* avivo cursor are offset into the total surface */
- x += crtc->x;
- y += crtc->y;
- DRM_DEBUG("x %d y %d c->x %d c->y %d\n", x, y, crtc->x, crtc->y);
-
- if (x < 0) {
- xorigin = min(-x, amdgpu_crtc->max_cursor_width - 1);
- x = 0;
- }
- if (y < 0) {
- yorigin = min(-y, amdgpu_crtc->max_cursor_height - 1);
- y = 0;
- }
-
- WREG32(mmCUR_POSITION + amdgpu_crtc->crtc_offset, (x << 16) | y);
- WREG32(mmCUR_HOT_SPOT + amdgpu_crtc->crtc_offset, (xorigin << 16) | yorigin);
- WREG32(mmCUR_SIZE + amdgpu_crtc->crtc_offset,
- ((amdgpu_crtc->cursor_width - 1) << 16) | (amdgpu_crtc->cursor_height - 1));
-
- return 0;
-}
-
-static int dce_v11_0_crtc_cursor_move(struct drm_crtc *crtc,
- int x, int y)
-{
- int ret;
-
- dce_v11_0_lock_cursor(crtc, true);
- ret = dce_v11_0_cursor_move_locked(crtc, x, y);
- dce_v11_0_lock_cursor(crtc, false);
-
- return ret;
-}
-
-static int dce_v11_0_crtc_cursor_set2(struct drm_crtc *crtc,
- struct drm_file *file_priv,
- uint32_t handle,
- uint32_t width,
- uint32_t height,
- int32_t hot_x,
- int32_t hot_y)
-{
- struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
- struct drm_gem_object *obj;
- struct amdgpu_bo *aobj;
- int ret;
-
- if (!handle) {
- /* turn off cursor */
- dce_v11_0_hide_cursor(crtc);
- obj = NULL;
- goto unpin;
- }
-
- if ((width > amdgpu_crtc->max_cursor_width) ||
- (height > amdgpu_crtc->max_cursor_height)) {
- DRM_ERROR("bad cursor width or height %d x %d\n", width, height);
- return -EINVAL;
- }
-
- obj = drm_gem_object_lookup(file_priv, handle);
- if (!obj) {
- DRM_ERROR("Cannot find cursor object %x for crtc %d\n", handle, amdgpu_crtc->crtc_id);
- return -ENOENT;
- }
-
- aobj = gem_to_amdgpu_bo(obj);
- ret = amdgpu_bo_reserve(aobj, false);
- if (ret != 0) {
- drm_gem_object_put(obj);
- return ret;
- }
-
- aobj->flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS;
- ret = amdgpu_bo_pin(aobj, AMDGPU_GEM_DOMAIN_VRAM);
- amdgpu_bo_unreserve(aobj);
- if (ret) {
- DRM_ERROR("Failed to pin new cursor BO (%d)\n", ret);
- drm_gem_object_put(obj);
- return ret;
- }
- amdgpu_crtc->cursor_addr = amdgpu_bo_gpu_offset(aobj);
-
- dce_v11_0_lock_cursor(crtc, true);
-
- if (width != amdgpu_crtc->cursor_width ||
- height != amdgpu_crtc->cursor_height ||
- hot_x != amdgpu_crtc->cursor_hot_x ||
- hot_y != amdgpu_crtc->cursor_hot_y) {
- int x, y;
-
- x = amdgpu_crtc->cursor_x + amdgpu_crtc->cursor_hot_x - hot_x;
- y = amdgpu_crtc->cursor_y + amdgpu_crtc->cursor_hot_y - hot_y;
-
- dce_v11_0_cursor_move_locked(crtc, x, y);
-
- amdgpu_crtc->cursor_width = width;
- amdgpu_crtc->cursor_height = height;
- amdgpu_crtc->cursor_hot_x = hot_x;
- amdgpu_crtc->cursor_hot_y = hot_y;
- }
-
- dce_v11_0_show_cursor(crtc);
- dce_v11_0_lock_cursor(crtc, false);
-
-unpin:
- if (amdgpu_crtc->cursor_bo) {
- struct amdgpu_bo *aobj = gem_to_amdgpu_bo(amdgpu_crtc->cursor_bo);
- ret = amdgpu_bo_reserve(aobj, true);
- if (likely(ret == 0)) {
- amdgpu_bo_unpin(aobj);
- amdgpu_bo_unreserve(aobj);
- }
- drm_gem_object_put(amdgpu_crtc->cursor_bo);
- }
-
- amdgpu_crtc->cursor_bo = obj;
- return 0;
-}
-
-static void dce_v11_0_cursor_reset(struct drm_crtc *crtc)
-{
- struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
-
- if (amdgpu_crtc->cursor_bo) {
- dce_v11_0_lock_cursor(crtc, true);
-
- dce_v11_0_cursor_move_locked(crtc, amdgpu_crtc->cursor_x,
- amdgpu_crtc->cursor_y);
-
- dce_v11_0_show_cursor(crtc);
-
- dce_v11_0_lock_cursor(crtc, false);
- }
-}
-
-static int dce_v11_0_crtc_gamma_set(struct drm_crtc *crtc, u16 *red, u16 *green,
- u16 *blue, uint32_t size,
- struct drm_modeset_acquire_ctx *ctx)
-{
- dce_v11_0_crtc_load_lut(crtc);
-
- return 0;
-}
-
-static void dce_v11_0_crtc_destroy(struct drm_crtc *crtc)
-{
- struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
-
- drm_crtc_cleanup(crtc);
- kfree(amdgpu_crtc);
-}
-
-static const struct drm_crtc_funcs dce_v11_0_crtc_funcs = {
- .cursor_set2 = dce_v11_0_crtc_cursor_set2,
- .cursor_move = dce_v11_0_crtc_cursor_move,
- .gamma_set = dce_v11_0_crtc_gamma_set,
- .set_config = amdgpu_display_crtc_set_config,
- .destroy = dce_v11_0_crtc_destroy,
- .page_flip_target = amdgpu_display_crtc_page_flip_target,
- .get_vblank_counter = amdgpu_get_vblank_counter_kms,
- .enable_vblank = amdgpu_enable_vblank_kms,
- .disable_vblank = amdgpu_disable_vblank_kms,
- .get_vblank_timestamp = drm_crtc_vblank_helper_get_vblank_timestamp,
-};
-
-static void dce_v11_0_crtc_dpms(struct drm_crtc *crtc, int mode)
-{
- struct drm_device *dev = crtc->dev;
- struct amdgpu_device *adev = drm_to_adev(dev);
- struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
- unsigned type;
-
- switch (mode) {
- case DRM_MODE_DPMS_ON:
- amdgpu_crtc->enabled = true;
- amdgpu_atombios_crtc_enable(crtc, ATOM_ENABLE);
- dce_v11_0_vga_enable(crtc, true);
- amdgpu_atombios_crtc_blank(crtc, ATOM_DISABLE);
- dce_v11_0_vga_enable(crtc, false);
- /* Make sure VBLANK and PFLIP interrupts are still enabled */
- type = amdgpu_display_crtc_idx_to_irq_type(adev,
- amdgpu_crtc->crtc_id);
- amdgpu_irq_update(adev, &adev->crtc_irq, type);
- amdgpu_irq_update(adev, &adev->pageflip_irq, type);
- drm_crtc_vblank_on(crtc);
- dce_v11_0_crtc_load_lut(crtc);
- break;
- case DRM_MODE_DPMS_STANDBY:
- case DRM_MODE_DPMS_SUSPEND:
- case DRM_MODE_DPMS_OFF:
- drm_crtc_vblank_off(crtc);
- if (amdgpu_crtc->enabled) {
- dce_v11_0_vga_enable(crtc, true);
- amdgpu_atombios_crtc_blank(crtc, ATOM_ENABLE);
- dce_v11_0_vga_enable(crtc, false);
- }
- amdgpu_atombios_crtc_enable(crtc, ATOM_DISABLE);
- amdgpu_crtc->enabled = false;
- break;
- }
- /* adjust pm to dpms */
- amdgpu_dpm_compute_clocks(adev);
-}
-
-static void dce_v11_0_crtc_prepare(struct drm_crtc *crtc)
-{
- /* disable crtc pair power gating before programming */
- amdgpu_atombios_crtc_powergate(crtc, ATOM_DISABLE);
- amdgpu_atombios_crtc_lock(crtc, ATOM_ENABLE);
- dce_v11_0_crtc_dpms(crtc, DRM_MODE_DPMS_OFF);
-}
-
-static void dce_v11_0_crtc_commit(struct drm_crtc *crtc)
-{
- dce_v11_0_crtc_dpms(crtc, DRM_MODE_DPMS_ON);
- amdgpu_atombios_crtc_lock(crtc, ATOM_DISABLE);
-}
-
-static void dce_v11_0_crtc_disable(struct drm_crtc *crtc)
-{
- struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
- struct drm_device *dev = crtc->dev;
- struct amdgpu_device *adev = drm_to_adev(dev);
- struct amdgpu_atom_ss ss;
- int i;
-
- dce_v11_0_crtc_dpms(crtc, DRM_MODE_DPMS_OFF);
- if (crtc->primary->fb) {
- int r;
- struct amdgpu_bo *abo;
-
- abo = gem_to_amdgpu_bo(crtc->primary->fb->obj[0]);
- r = amdgpu_bo_reserve(abo, true);
- if (unlikely(r))
- DRM_ERROR("failed to reserve abo before unpin\n");
- else {
- amdgpu_bo_unpin(abo);
- amdgpu_bo_unreserve(abo);
- }
- }
- /* disable the GRPH */
- dce_v11_0_grph_enable(crtc, false);
-
- amdgpu_atombios_crtc_powergate(crtc, ATOM_ENABLE);
-
- for (i = 0; i < adev->mode_info.num_crtc; i++) {
- if (adev->mode_info.crtcs[i] &&
- adev->mode_info.crtcs[i]->enabled &&
- i != amdgpu_crtc->crtc_id &&
- amdgpu_crtc->pll_id == adev->mode_info.crtcs[i]->pll_id) {
- /* one other crtc is using this pll don't turn
- * off the pll
- */
- goto done;
- }
- }
-
- switch (amdgpu_crtc->pll_id) {
- case ATOM_PPLL0:
- case ATOM_PPLL1:
- case ATOM_PPLL2:
- /* disable the ppll */
- amdgpu_atombios_crtc_program_pll(crtc, amdgpu_crtc->crtc_id, amdgpu_crtc->pll_id,
- 0, 0, ATOM_DISABLE, 0, 0, 0, 0, 0, false, &ss);
- break;
- case ATOM_COMBOPHY_PLL0:
- case ATOM_COMBOPHY_PLL1:
- case ATOM_COMBOPHY_PLL2:
- case ATOM_COMBOPHY_PLL3:
- case ATOM_COMBOPHY_PLL4:
- case ATOM_COMBOPHY_PLL5:
- /* disable the ppll */
- amdgpu_atombios_crtc_program_pll(crtc, ATOM_CRTC_INVALID, amdgpu_crtc->pll_id,
- 0, 0, ATOM_DISABLE, 0, 0, 0, 0, 0, false, &ss);
- break;
- default:
- break;
- }
-done:
- amdgpu_crtc->pll_id = ATOM_PPLL_INVALID;
- amdgpu_crtc->adjusted_clock = 0;
- amdgpu_crtc->encoder = NULL;
- amdgpu_crtc->connector = NULL;
-}
-
-static int dce_v11_0_crtc_mode_set(struct drm_crtc *crtc,
- struct drm_display_mode *mode,
- struct drm_display_mode *adjusted_mode,
- int x, int y, struct drm_framebuffer *old_fb)
-{
- struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
- struct drm_device *dev = crtc->dev;
- struct amdgpu_device *adev = drm_to_adev(dev);
-
- if (!amdgpu_crtc->adjusted_clock)
- return -EINVAL;
-
- if ((adev->asic_type == CHIP_POLARIS10) ||
- (adev->asic_type == CHIP_POLARIS11) ||
- (adev->asic_type == CHIP_POLARIS12) ||
- (adev->asic_type == CHIP_VEGAM)) {
- struct amdgpu_encoder *amdgpu_encoder =
- to_amdgpu_encoder(amdgpu_crtc->encoder);
- int encoder_mode =
- amdgpu_atombios_encoder_get_encoder_mode(amdgpu_crtc->encoder);
-
- /* SetPixelClock calculates the plls and ss values now */
- amdgpu_atombios_crtc_program_pll(crtc, amdgpu_crtc->crtc_id,
- amdgpu_crtc->pll_id,
- encoder_mode, amdgpu_encoder->encoder_id,
- adjusted_mode->clock, 0, 0, 0, 0,
- amdgpu_crtc->bpc, amdgpu_crtc->ss_enabled, &amdgpu_crtc->ss);
- } else {
- amdgpu_atombios_crtc_set_pll(crtc, adjusted_mode);
- }
- amdgpu_atombios_crtc_set_dtd_timing(crtc, adjusted_mode);
- dce_v11_0_crtc_do_set_base(crtc, old_fb, x, y, 0);
- amdgpu_atombios_crtc_overscan_setup(crtc, mode, adjusted_mode);
- amdgpu_atombios_crtc_scaler_setup(crtc);
- dce_v11_0_cursor_reset(crtc);
- /* update the hw version fpr dpm */
- amdgpu_crtc->hw_mode = *adjusted_mode;
-
- return 0;
-}
-
-static bool dce_v11_0_crtc_mode_fixup(struct drm_crtc *crtc,
- const struct drm_display_mode *mode,
- struct drm_display_mode *adjusted_mode)
-{
- struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
- struct drm_device *dev = crtc->dev;
- struct drm_encoder *encoder;
-
- /* assign the encoder to the amdgpu crtc to avoid repeated lookups later */
- list_for_each_entry(encoder, &dev->mode_config.encoder_list, head) {
- if (encoder->crtc == crtc) {
- amdgpu_crtc->encoder = encoder;
- amdgpu_crtc->connector = amdgpu_get_connector_for_encoder(encoder);
- break;
- }
- }
- if ((amdgpu_crtc->encoder == NULL) || (amdgpu_crtc->connector == NULL)) {
- amdgpu_crtc->encoder = NULL;
- amdgpu_crtc->connector = NULL;
- return false;
- }
- if (!amdgpu_display_crtc_scaling_mode_fixup(crtc, mode, adjusted_mode))
- return false;
- if (amdgpu_atombios_crtc_prepare_pll(crtc, adjusted_mode))
- return false;
- /* pick pll */
- amdgpu_crtc->pll_id = dce_v11_0_pick_pll(crtc);
- /* if we can't get a PPLL for a non-DP encoder, fail */
- if ((amdgpu_crtc->pll_id == ATOM_PPLL_INVALID) &&
- !ENCODER_MODE_IS_DP(amdgpu_atombios_encoder_get_encoder_mode(amdgpu_crtc->encoder)))
- return false;
-
- return true;
-}
-
-static int dce_v11_0_crtc_set_base(struct drm_crtc *crtc, int x, int y,
- struct drm_framebuffer *old_fb)
-{
- return dce_v11_0_crtc_do_set_base(crtc, old_fb, x, y, 0);
-}
-
-static int dce_v11_0_crtc_set_base_atomic(struct drm_crtc *crtc,
- struct drm_framebuffer *fb,
- int x, int y, enum mode_set_atomic state)
-{
- return dce_v11_0_crtc_do_set_base(crtc, fb, x, y, 1);
-}
-
-static const struct drm_crtc_helper_funcs dce_v11_0_crtc_helper_funcs = {
- .dpms = dce_v11_0_crtc_dpms,
- .mode_fixup = dce_v11_0_crtc_mode_fixup,
- .mode_set = dce_v11_0_crtc_mode_set,
- .mode_set_base = dce_v11_0_crtc_set_base,
- .mode_set_base_atomic = dce_v11_0_crtc_set_base_atomic,
- .prepare = dce_v11_0_crtc_prepare,
- .commit = dce_v11_0_crtc_commit,
- .disable = dce_v11_0_crtc_disable,
- .get_scanout_position = amdgpu_crtc_get_scanout_position,
-};
-
-static void dce_v11_0_panic_flush(struct drm_plane *plane)
-{
- struct drm_framebuffer *fb;
- struct amdgpu_crtc *amdgpu_crtc;
- struct amdgpu_device *adev;
- uint32_t fb_format;
-
- if (!plane->fb)
- return;
-
- fb = plane->fb;
- amdgpu_crtc = to_amdgpu_crtc(plane->crtc);
- adev = drm_to_adev(fb->dev);
-
- /* Disable DC tiling */
- fb_format = RREG32(mmGRPH_CONTROL + amdgpu_crtc->crtc_offset);
- fb_format &= ~GRPH_CONTROL__GRPH_ARRAY_MODE_MASK;
- WREG32(mmGRPH_CONTROL + amdgpu_crtc->crtc_offset, fb_format);
-
-}
-
-static const struct drm_plane_helper_funcs dce_v11_0_drm_primary_plane_helper_funcs = {
- .get_scanout_buffer = amdgpu_display_get_scanout_buffer,
- .panic_flush = dce_v11_0_panic_flush,
-};
-
-static int dce_v11_0_crtc_init(struct amdgpu_device *adev, int index)
-{
- struct amdgpu_crtc *amdgpu_crtc;
-
- amdgpu_crtc = kzalloc(sizeof(struct amdgpu_crtc) +
- (AMDGPUFB_CONN_LIMIT * sizeof(struct drm_connector *)), GFP_KERNEL);
- if (amdgpu_crtc == NULL)
- return -ENOMEM;
-
- drm_crtc_init(adev_to_drm(adev), &amdgpu_crtc->base, &dce_v11_0_crtc_funcs);
-
- drm_mode_crtc_set_gamma_size(&amdgpu_crtc->base, 256);
- amdgpu_crtc->crtc_id = index;
- adev->mode_info.crtcs[index] = amdgpu_crtc;
-
- amdgpu_crtc->max_cursor_width = 128;
- amdgpu_crtc->max_cursor_height = 128;
- adev_to_drm(adev)->mode_config.cursor_width = amdgpu_crtc->max_cursor_width;
- adev_to_drm(adev)->mode_config.cursor_height = amdgpu_crtc->max_cursor_height;
-
- switch (amdgpu_crtc->crtc_id) {
- case 0:
- default:
- amdgpu_crtc->crtc_offset = CRTC0_REGISTER_OFFSET;
- break;
- case 1:
- amdgpu_crtc->crtc_offset = CRTC1_REGISTER_OFFSET;
- break;
- case 2:
- amdgpu_crtc->crtc_offset = CRTC2_REGISTER_OFFSET;
- break;
- case 3:
- amdgpu_crtc->crtc_offset = CRTC3_REGISTER_OFFSET;
- break;
- case 4:
- amdgpu_crtc->crtc_offset = CRTC4_REGISTER_OFFSET;
- break;
- case 5:
- amdgpu_crtc->crtc_offset = CRTC5_REGISTER_OFFSET;
- break;
- }
-
- amdgpu_crtc->pll_id = ATOM_PPLL_INVALID;
- amdgpu_crtc->adjusted_clock = 0;
- amdgpu_crtc->encoder = NULL;
- amdgpu_crtc->connector = NULL;
- drm_crtc_helper_add(&amdgpu_crtc->base, &dce_v11_0_crtc_helper_funcs);
- drm_plane_helper_add(amdgpu_crtc->base.primary, &dce_v11_0_drm_primary_plane_helper_funcs);
-
- return 0;
-}
-
-static int dce_v11_0_early_init(struct amdgpu_ip_block *ip_block)
-{
- struct amdgpu_device *adev = ip_block->adev;
-
- adev->audio_endpt_rreg = &dce_v11_0_audio_endpt_rreg;
- adev->audio_endpt_wreg = &dce_v11_0_audio_endpt_wreg;
-
- dce_v11_0_set_display_funcs(adev);
-
- adev->mode_info.num_crtc = dce_v11_0_get_num_crtc(adev);
-
- switch (adev->asic_type) {
- case CHIP_CARRIZO:
- adev->mode_info.num_hpd = 6;
- adev->mode_info.num_dig = 9;
- break;
- case CHIP_STONEY:
- adev->mode_info.num_hpd = 6;
- adev->mode_info.num_dig = 9;
- break;
- case CHIP_POLARIS10:
- case CHIP_VEGAM:
- adev->mode_info.num_hpd = 6;
- adev->mode_info.num_dig = 6;
- break;
- case CHIP_POLARIS11:
- case CHIP_POLARIS12:
- adev->mode_info.num_hpd = 5;
- adev->mode_info.num_dig = 5;
- break;
- default:
- /* FIXME: not supported yet */
- return -EINVAL;
- }
-
- dce_v11_0_set_irq_funcs(adev);
-
- return 0;
-}
-
-static int dce_v11_0_sw_init(struct amdgpu_ip_block *ip_block)
-{
- int r, i;
- struct amdgpu_device *adev = ip_block->adev;
-
- for (i = 0; i < adev->mode_info.num_crtc; i++) {
- r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, i + 1, &adev->crtc_irq);
- if (r)
- return r;
- }
-
- for (i = VISLANDS30_IV_SRCID_D1_GRPH_PFLIP; i < 20; i += 2) {
- r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, i, &adev->pageflip_irq);
- if (r)
- return r;
- }
-
- /* HPD hotplug */
- r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_HOTPLUG_DETECT_A, &adev->hpd_irq);
- if (r)
- return r;
-
- adev_to_drm(adev)->mode_config.funcs = &amdgpu_mode_funcs;
-
- adev_to_drm(adev)->mode_config.async_page_flip = true;
-
- adev_to_drm(adev)->mode_config.max_width = 16384;
- adev_to_drm(adev)->mode_config.max_height = 16384;
-
- adev_to_drm(adev)->mode_config.preferred_depth = 24;
- adev_to_drm(adev)->mode_config.prefer_shadow = 1;
-
- adev_to_drm(adev)->mode_config.fb_modifiers_not_supported = true;
-
- r = amdgpu_display_modeset_create_props(adev);
- if (r)
- return r;
-
- adev_to_drm(adev)->mode_config.max_width = 16384;
- adev_to_drm(adev)->mode_config.max_height = 16384;
-
-
- /* allocate crtcs */
- for (i = 0; i < adev->mode_info.num_crtc; i++) {
- r = dce_v11_0_crtc_init(adev, i);
- if (r)
- return r;
- }
-
- if (amdgpu_atombios_get_connector_info_from_object_table(adev))
- amdgpu_display_print_display_setup(adev_to_drm(adev));
- else
- return -EINVAL;
-
- /* setup afmt */
- r = dce_v11_0_afmt_init(adev);
- if (r)
- return r;
-
- r = dce_v11_0_audio_init(adev);
- if (r)
- return r;
-
- /* Disable vblank IRQs aggressively for power-saving */
- /* XXX: can this be enabled for DC? */
- adev_to_drm(adev)->vblank_disable_immediate = true;
-
- r = drm_vblank_init(adev_to_drm(adev), adev->mode_info.num_crtc);
- if (r)
- return r;
-
- INIT_DELAYED_WORK(&adev->hotplug_work,
- amdgpu_display_hotplug_work_func);
-
- drm_kms_helper_poll_init(adev_to_drm(adev));
-
- adev->mode_info.mode_config_initialized = true;
- return 0;
-}
-
-static int dce_v11_0_sw_fini(struct amdgpu_ip_block *ip_block)
-{
- struct amdgpu_device *adev = ip_block->adev;
-
- drm_edid_free(adev->mode_info.bios_hardcoded_edid);
-
- drm_kms_helper_poll_fini(adev_to_drm(adev));
-
- dce_v11_0_audio_fini(adev);
-
- dce_v11_0_afmt_fini(adev);
-
- drm_mode_config_cleanup(adev_to_drm(adev));
- adev->mode_info.mode_config_initialized = false;
-
- return 0;
-}
-
-static int dce_v11_0_hw_init(struct amdgpu_ip_block *ip_block)
-{
- int i;
- struct amdgpu_device *adev = ip_block->adev;
-
- dce_v11_0_init_golden_registers(adev);
-
- /* disable vga render */
- dce_v11_0_set_vga_render_state(adev, false);
- /* init dig PHYs, disp eng pll */
- amdgpu_atombios_crtc_powergate_init(adev);
- amdgpu_atombios_encoder_init_dig(adev);
- if ((adev->asic_type == CHIP_POLARIS10) ||
- (adev->asic_type == CHIP_POLARIS11) ||
- (adev->asic_type == CHIP_POLARIS12) ||
- (adev->asic_type == CHIP_VEGAM)) {
- amdgpu_atombios_crtc_set_dce_clock(adev, adev->clock.default_dispclk,
- DCE_CLOCK_TYPE_DISPCLK, ATOM_GCK_DFS);
- amdgpu_atombios_crtc_set_dce_clock(adev, 0,
- DCE_CLOCK_TYPE_DPREFCLK, ATOM_GCK_DFS);
- } else {
- amdgpu_atombios_crtc_set_disp_eng_pll(adev, adev->clock.default_dispclk);
- }
-
- /* initialize hpd */
- dce_v11_0_hpd_init(adev);
-
- for (i = 0; i < adev->mode_info.audio.num_pins; i++) {
- dce_v11_0_audio_enable(adev, &adev->mode_info.audio.pin[i], false);
- }
-
- dce_v11_0_pageflip_interrupt_init(adev);
-
- return 0;
-}
-
-static int dce_v11_0_hw_fini(struct amdgpu_ip_block *ip_block)
-{
- int i;
- struct amdgpu_device *adev = ip_block->adev;
-
- dce_v11_0_hpd_fini(adev);
-
- for (i = 0; i < adev->mode_info.audio.num_pins; i++) {
- dce_v11_0_audio_enable(adev, &adev->mode_info.audio.pin[i], false);
- }
-
- dce_v11_0_pageflip_interrupt_fini(adev);
-
- flush_delayed_work(&adev->hotplug_work);
-
- return 0;
-}
-
-static int dce_v11_0_suspend(struct amdgpu_ip_block *ip_block)
-{
- struct amdgpu_device *adev = ip_block->adev;
- int r;
-
- r = amdgpu_display_suspend_helper(adev);
- if (r)
- return r;
-
- adev->mode_info.bl_level =
- amdgpu_atombios_encoder_get_backlight_level_from_reg(adev);
-
- return dce_v11_0_hw_fini(ip_block);
-}
-
-static int dce_v11_0_resume(struct amdgpu_ip_block *ip_block)
-{
- struct amdgpu_device *adev = ip_block->adev;
- int ret;
-
- amdgpu_atombios_encoder_set_backlight_level_to_reg(adev,
- adev->mode_info.bl_level);
-
- ret = dce_v11_0_hw_init(ip_block);
-
- /* turn on the BL */
- if (adev->mode_info.bl_encoder) {
- u8 bl_level = amdgpu_display_backlight_get_level(adev,
- adev->mode_info.bl_encoder);
- amdgpu_display_backlight_set_level(adev, adev->mode_info.bl_encoder,
- bl_level);
- }
- if (ret)
- return ret;
-
- return amdgpu_display_resume_helper(adev);
-}
-
-static bool dce_v11_0_is_idle(struct amdgpu_ip_block *ip_block)
-{
- return true;
-}
-
-static int dce_v11_0_soft_reset(struct amdgpu_ip_block *ip_block)
-{
- u32 srbm_soft_reset = 0, tmp;
- struct amdgpu_device *adev = ip_block->adev;
-
- if (dce_v11_0_is_display_hung(adev))
- srbm_soft_reset |= SRBM_SOFT_RESET__SOFT_RESET_DC_MASK;
-
- if (srbm_soft_reset) {
- tmp = RREG32(mmSRBM_SOFT_RESET);
- tmp |= srbm_soft_reset;
- dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
- WREG32(mmSRBM_SOFT_RESET, tmp);
- tmp = RREG32(mmSRBM_SOFT_RESET);
-
- udelay(50);
-
- tmp &= ~srbm_soft_reset;
- WREG32(mmSRBM_SOFT_RESET, tmp);
- tmp = RREG32(mmSRBM_SOFT_RESET);
-
- /* Wait a little for things to settle down */
- udelay(50);
- }
- return 0;
-}
-
-static void dce_v11_0_set_crtc_vblank_interrupt_state(struct amdgpu_device *adev,
- int crtc,
- enum amdgpu_interrupt_state state)
-{
- u32 lb_interrupt_mask;
-
- if (crtc >= adev->mode_info.num_crtc) {
- DRM_DEBUG("invalid crtc %d\n", crtc);
- return;
- }
-
- switch (state) {
- case AMDGPU_IRQ_STATE_DISABLE:
- lb_interrupt_mask = RREG32(mmLB_INTERRUPT_MASK + crtc_offsets[crtc]);
- lb_interrupt_mask = REG_SET_FIELD(lb_interrupt_mask, LB_INTERRUPT_MASK,
- VBLANK_INTERRUPT_MASK, 0);
- WREG32(mmLB_INTERRUPT_MASK + crtc_offsets[crtc], lb_interrupt_mask);
- break;
- case AMDGPU_IRQ_STATE_ENABLE:
- lb_interrupt_mask = RREG32(mmLB_INTERRUPT_MASK + crtc_offsets[crtc]);
- lb_interrupt_mask = REG_SET_FIELD(lb_interrupt_mask, LB_INTERRUPT_MASK,
- VBLANK_INTERRUPT_MASK, 1);
- WREG32(mmLB_INTERRUPT_MASK + crtc_offsets[crtc], lb_interrupt_mask);
- break;
- default:
- break;
- }
-}
-
-static void dce_v11_0_set_crtc_vline_interrupt_state(struct amdgpu_device *adev,
- int crtc,
- enum amdgpu_interrupt_state state)
-{
- u32 lb_interrupt_mask;
-
- if (crtc >= adev->mode_info.num_crtc) {
- DRM_DEBUG("invalid crtc %d\n", crtc);
- return;
- }
-
- switch (state) {
- case AMDGPU_IRQ_STATE_DISABLE:
- lb_interrupt_mask = RREG32(mmLB_INTERRUPT_MASK + crtc_offsets[crtc]);
- lb_interrupt_mask = REG_SET_FIELD(lb_interrupt_mask, LB_INTERRUPT_MASK,
- VLINE_INTERRUPT_MASK, 0);
- WREG32(mmLB_INTERRUPT_MASK + crtc_offsets[crtc], lb_interrupt_mask);
- break;
- case AMDGPU_IRQ_STATE_ENABLE:
- lb_interrupt_mask = RREG32(mmLB_INTERRUPT_MASK + crtc_offsets[crtc]);
- lb_interrupt_mask = REG_SET_FIELD(lb_interrupt_mask, LB_INTERRUPT_MASK,
- VLINE_INTERRUPT_MASK, 1);
- WREG32(mmLB_INTERRUPT_MASK + crtc_offsets[crtc], lb_interrupt_mask);
- break;
- default:
- break;
- }
-}
-
-static int dce_v11_0_set_hpd_irq_state(struct amdgpu_device *adev,
- struct amdgpu_irq_src *source,
- unsigned hpd,
- enum amdgpu_interrupt_state state)
-{
- u32 tmp;
-
- if (hpd >= adev->mode_info.num_hpd) {
- DRM_DEBUG("invalid hpd %d\n", hpd);
- return 0;
- }
-
- switch (state) {
- case AMDGPU_IRQ_STATE_DISABLE:
- tmp = RREG32(mmDC_HPD_INT_CONTROL + hpd_offsets[hpd]);
- tmp = REG_SET_FIELD(tmp, DC_HPD_INT_CONTROL, DC_HPD_INT_EN, 0);
- WREG32(mmDC_HPD_INT_CONTROL + hpd_offsets[hpd], tmp);
- break;
- case AMDGPU_IRQ_STATE_ENABLE:
- tmp = RREG32(mmDC_HPD_INT_CONTROL + hpd_offsets[hpd]);
- tmp = REG_SET_FIELD(tmp, DC_HPD_INT_CONTROL, DC_HPD_INT_EN, 1);
- WREG32(mmDC_HPD_INT_CONTROL + hpd_offsets[hpd], tmp);
- break;
- default:
- break;
- }
-
- return 0;
-}
-
-static int dce_v11_0_set_crtc_irq_state(struct amdgpu_device *adev,
- struct amdgpu_irq_src *source,
- unsigned type,
- enum amdgpu_interrupt_state state)
-{
- switch (type) {
- case AMDGPU_CRTC_IRQ_VBLANK1:
- dce_v11_0_set_crtc_vblank_interrupt_state(adev, 0, state);
- break;
- case AMDGPU_CRTC_IRQ_VBLANK2:
- dce_v11_0_set_crtc_vblank_interrupt_state(adev, 1, state);
- break;
- case AMDGPU_CRTC_IRQ_VBLANK3:
- dce_v11_0_set_crtc_vblank_interrupt_state(adev, 2, state);
- break;
- case AMDGPU_CRTC_IRQ_VBLANK4:
- dce_v11_0_set_crtc_vblank_interrupt_state(adev, 3, state);
- break;
- case AMDGPU_CRTC_IRQ_VBLANK5:
- dce_v11_0_set_crtc_vblank_interrupt_state(adev, 4, state);
- break;
- case AMDGPU_CRTC_IRQ_VBLANK6:
- dce_v11_0_set_crtc_vblank_interrupt_state(adev, 5, state);
- break;
- case AMDGPU_CRTC_IRQ_VLINE1:
- dce_v11_0_set_crtc_vline_interrupt_state(adev, 0, state);
- break;
- case AMDGPU_CRTC_IRQ_VLINE2:
- dce_v11_0_set_crtc_vline_interrupt_state(adev, 1, state);
- break;
- case AMDGPU_CRTC_IRQ_VLINE3:
- dce_v11_0_set_crtc_vline_interrupt_state(adev, 2, state);
- break;
- case AMDGPU_CRTC_IRQ_VLINE4:
- dce_v11_0_set_crtc_vline_interrupt_state(adev, 3, state);
- break;
- case AMDGPU_CRTC_IRQ_VLINE5:
- dce_v11_0_set_crtc_vline_interrupt_state(adev, 4, state);
- break;
- case AMDGPU_CRTC_IRQ_VLINE6:
- dce_v11_0_set_crtc_vline_interrupt_state(adev, 5, state);
- break;
- default:
- break;
- }
- return 0;
-}
-
-static int dce_v11_0_set_pageflip_irq_state(struct amdgpu_device *adev,
- struct amdgpu_irq_src *src,
- unsigned type,
- enum amdgpu_interrupt_state state)
-{
- u32 reg;
-
- if (type >= adev->mode_info.num_crtc) {
- DRM_ERROR("invalid pageflip crtc %d\n", type);
- return -EINVAL;
- }
-
- reg = RREG32(mmGRPH_INTERRUPT_CONTROL + crtc_offsets[type]);
- if (state == AMDGPU_IRQ_STATE_DISABLE)
- WREG32(mmGRPH_INTERRUPT_CONTROL + crtc_offsets[type],
- reg & ~GRPH_INTERRUPT_CONTROL__GRPH_PFLIP_INT_MASK_MASK);
- else
- WREG32(mmGRPH_INTERRUPT_CONTROL + crtc_offsets[type],
- reg | GRPH_INTERRUPT_CONTROL__GRPH_PFLIP_INT_MASK_MASK);
-
- return 0;
-}
-
-static int dce_v11_0_pageflip_irq(struct amdgpu_device *adev,
- struct amdgpu_irq_src *source,
- struct amdgpu_iv_entry *entry)
-{
- unsigned long flags;
- unsigned crtc_id;
- struct amdgpu_crtc *amdgpu_crtc;
- struct amdgpu_flip_work *works;
-
- crtc_id = (entry->src_id - 8) >> 1;
- amdgpu_crtc = adev->mode_info.crtcs[crtc_id];
-
- if (crtc_id >= adev->mode_info.num_crtc) {
- DRM_ERROR("invalid pageflip crtc %d\n", crtc_id);
- return -EINVAL;
- }
-
- if (RREG32(mmGRPH_INTERRUPT_STATUS + crtc_offsets[crtc_id]) &
- GRPH_INTERRUPT_STATUS__GRPH_PFLIP_INT_OCCURRED_MASK)
- WREG32(mmGRPH_INTERRUPT_STATUS + crtc_offsets[crtc_id],
- GRPH_INTERRUPT_STATUS__GRPH_PFLIP_INT_CLEAR_MASK);
-
- /* IRQ could occur when in initial stage */
- if(amdgpu_crtc == NULL)
- return 0;
-
- spin_lock_irqsave(&adev_to_drm(adev)->event_lock, flags);
- works = amdgpu_crtc->pflip_works;
- if (amdgpu_crtc->pflip_status != AMDGPU_FLIP_SUBMITTED){
- DRM_DEBUG_DRIVER("amdgpu_crtc->pflip_status = %d != "
- "AMDGPU_FLIP_SUBMITTED(%d)\n",
- amdgpu_crtc->pflip_status,
- AMDGPU_FLIP_SUBMITTED);
- spin_unlock_irqrestore(&adev_to_drm(adev)->event_lock, flags);
- return 0;
- }
-
- /* page flip completed. clean up */
- amdgpu_crtc->pflip_status = AMDGPU_FLIP_NONE;
- amdgpu_crtc->pflip_works = NULL;
-
- /* wakeup usersapce */
- if(works->event)
- drm_crtc_send_vblank_event(&amdgpu_crtc->base, works->event);
-
- spin_unlock_irqrestore(&adev_to_drm(adev)->event_lock, flags);
-
- drm_crtc_vblank_put(&amdgpu_crtc->base);
- schedule_work(&works->unpin_work);
-
- return 0;
-}
-
-static void dce_v11_0_hpd_int_ack(struct amdgpu_device *adev,
- int hpd)
-{
- u32 tmp;
-
- if (hpd >= adev->mode_info.num_hpd) {
- DRM_DEBUG("invalid hpd %d\n", hpd);
- return;
- }
-
- tmp = RREG32(mmDC_HPD_INT_CONTROL + hpd_offsets[hpd]);
- tmp = REG_SET_FIELD(tmp, DC_HPD_INT_CONTROL, DC_HPD_INT_ACK, 1);
- WREG32(mmDC_HPD_INT_CONTROL + hpd_offsets[hpd], tmp);
-}
-
-static void dce_v11_0_crtc_vblank_int_ack(struct amdgpu_device *adev,
- int crtc)
-{
- u32 tmp;
-
- if (crtc < 0 || crtc >= adev->mode_info.num_crtc) {
- DRM_DEBUG("invalid crtc %d\n", crtc);
- return;
- }
-
- tmp = RREG32(mmLB_VBLANK_STATUS + crtc_offsets[crtc]);
- tmp = REG_SET_FIELD(tmp, LB_VBLANK_STATUS, VBLANK_ACK, 1);
- WREG32(mmLB_VBLANK_STATUS + crtc_offsets[crtc], tmp);
-}
-
-static void dce_v11_0_crtc_vline_int_ack(struct amdgpu_device *adev,
- int crtc)
-{
- u32 tmp;
-
- if (crtc < 0 || crtc >= adev->mode_info.num_crtc) {
- DRM_DEBUG("invalid crtc %d\n", crtc);
- return;
- }
-
- tmp = RREG32(mmLB_VLINE_STATUS + crtc_offsets[crtc]);
- tmp = REG_SET_FIELD(tmp, LB_VLINE_STATUS, VLINE_ACK, 1);
- WREG32(mmLB_VLINE_STATUS + crtc_offsets[crtc], tmp);
-}
-
-static int dce_v11_0_crtc_irq(struct amdgpu_device *adev,
- struct amdgpu_irq_src *source,
- struct amdgpu_iv_entry *entry)
-{
- unsigned crtc = entry->src_id - 1;
- uint32_t disp_int = RREG32(interrupt_status_offsets[crtc].reg);
- unsigned int irq_type = amdgpu_display_crtc_idx_to_irq_type(adev,
- crtc);
-
- switch (entry->src_data[0]) {
- case 0: /* vblank */
- if (disp_int & interrupt_status_offsets[crtc].vblank)
- dce_v11_0_crtc_vblank_int_ack(adev, crtc);
- else
- DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
-
- if (amdgpu_irq_enabled(adev, source, irq_type)) {
- drm_handle_vblank(adev_to_drm(adev), crtc);
- }
- DRM_DEBUG("IH: D%d vblank\n", crtc + 1);
-
- break;
- case 1: /* vline */
- if (disp_int & interrupt_status_offsets[crtc].vline)
- dce_v11_0_crtc_vline_int_ack(adev, crtc);
- else
- DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
-
- DRM_DEBUG("IH: D%d vline\n", crtc + 1);
-
- break;
- default:
- DRM_DEBUG("Unhandled interrupt: %d %d\n", entry->src_id, entry->src_data[0]);
- break;
- }
-
- return 0;
-}
-
-static int dce_v11_0_hpd_irq(struct amdgpu_device *adev,
- struct amdgpu_irq_src *source,
- struct amdgpu_iv_entry *entry)
-{
- uint32_t disp_int, mask;
- unsigned hpd;
-
- if (entry->src_data[0] >= adev->mode_info.num_hpd) {
- DRM_DEBUG("Unhandled interrupt: %d %d\n", entry->src_id, entry->src_data[0]);
- return 0;
- }
-
- hpd = entry->src_data[0];
- disp_int = RREG32(interrupt_status_offsets[hpd].reg);
- mask = interrupt_status_offsets[hpd].hpd;
-
- if (disp_int & mask) {
- dce_v11_0_hpd_int_ack(adev, hpd);
- schedule_delayed_work(&adev->hotplug_work, 0);
- DRM_DEBUG("IH: HPD%d\n", hpd + 1);
- }
-
- return 0;
-}
-
-static int dce_v11_0_set_clockgating_state(struct amdgpu_ip_block *ip_block,
- enum amd_clockgating_state state)
-{
- return 0;
-}
-
-static int dce_v11_0_set_powergating_state(struct amdgpu_ip_block *ip_block,
- enum amd_powergating_state state)
-{
- return 0;
-}
-
-static const struct amd_ip_funcs dce_v11_0_ip_funcs = {
- .name = "dce_v11_0",
- .early_init = dce_v11_0_early_init,
- .sw_init = dce_v11_0_sw_init,
- .sw_fini = dce_v11_0_sw_fini,
- .hw_init = dce_v11_0_hw_init,
- .hw_fini = dce_v11_0_hw_fini,
- .suspend = dce_v11_0_suspend,
- .resume = dce_v11_0_resume,
- .is_idle = dce_v11_0_is_idle,
- .soft_reset = dce_v11_0_soft_reset,
- .set_clockgating_state = dce_v11_0_set_clockgating_state,
- .set_powergating_state = dce_v11_0_set_powergating_state,
-};
-
-static void dce_v11_0_encoder_mode_set(struct drm_encoder *encoder,
- struct drm_display_mode *mode,
- struct drm_display_mode *adjusted_mode)
-{
- struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
-
- amdgpu_encoder->pixel_clock = adjusted_mode->clock;
-
- /* need to call this here rather than in prepare() since we need some crtc info */
- amdgpu_atombios_encoder_dpms(encoder, DRM_MODE_DPMS_OFF);
-
- /* set scaler clears this on some chips */
- dce_v11_0_set_interleave(encoder->crtc, mode);
-
- if (amdgpu_atombios_encoder_get_encoder_mode(encoder) == ATOM_ENCODER_MODE_HDMI) {
- dce_v11_0_afmt_enable(encoder, true);
- dce_v11_0_afmt_setmode(encoder, adjusted_mode);
- }
-}
-
-static void dce_v11_0_encoder_prepare(struct drm_encoder *encoder)
-{
- struct amdgpu_device *adev = drm_to_adev(encoder->dev);
- struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
- struct drm_connector *connector = amdgpu_get_connector_for_encoder(encoder);
-
- if ((amdgpu_encoder->active_device &
- (ATOM_DEVICE_DFP_SUPPORT | ATOM_DEVICE_LCD_SUPPORT)) ||
- (amdgpu_encoder_get_dp_bridge_encoder_id(encoder) !=
- ENCODER_OBJECT_ID_NONE)) {
- struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv;
- if (dig) {
- dig->dig_encoder = dce_v11_0_pick_dig_encoder(encoder);
- if (amdgpu_encoder->active_device & ATOM_DEVICE_DFP_SUPPORT)
- dig->afmt = adev->mode_info.afmt[dig->dig_encoder];
- }
- }
-
- amdgpu_atombios_scratch_regs_lock(adev, true);
-
- if (connector) {
- struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector);
-
- /* select the clock/data port if it uses a router */
- if (amdgpu_connector->router.cd_valid)
- amdgpu_i2c_router_select_cd_port(amdgpu_connector);
-
- /* turn eDP panel on for mode set */
- if (connector->connector_type == DRM_MODE_CONNECTOR_eDP)
- amdgpu_atombios_encoder_set_edp_panel_power(connector,
- ATOM_TRANSMITTER_ACTION_POWER_ON);
- }
-
- /* this is needed for the pll/ss setup to work correctly in some cases */
- amdgpu_atombios_encoder_set_crtc_source(encoder);
- /* set up the FMT blocks */
- dce_v11_0_program_fmt(encoder);
-}
-
-static void dce_v11_0_encoder_commit(struct drm_encoder *encoder)
-{
- struct drm_device *dev = encoder->dev;
- struct amdgpu_device *adev = drm_to_adev(dev);
-
- /* need to call this here as we need the crtc set up */
- amdgpu_atombios_encoder_dpms(encoder, DRM_MODE_DPMS_ON);
- amdgpu_atombios_scratch_regs_lock(adev, false);
-}
-
-static void dce_v11_0_encoder_disable(struct drm_encoder *encoder)
-{
- struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
- struct amdgpu_encoder_atom_dig *dig;
-
- amdgpu_atombios_encoder_dpms(encoder, DRM_MODE_DPMS_OFF);
-
- if (amdgpu_atombios_encoder_is_digital(encoder)) {
- if (amdgpu_atombios_encoder_get_encoder_mode(encoder) == ATOM_ENCODER_MODE_HDMI)
- dce_v11_0_afmt_enable(encoder, false);
- dig = amdgpu_encoder->enc_priv;
- dig->dig_encoder = -1;
- }
- amdgpu_encoder->active_device = 0;
-}
-
-/* these are handled by the primary encoders */
-static void dce_v11_0_ext_prepare(struct drm_encoder *encoder)
-{
-
-}
-
-static void dce_v11_0_ext_commit(struct drm_encoder *encoder)
-{
-
-}
-
-static void
-dce_v11_0_ext_mode_set(struct drm_encoder *encoder,
- struct drm_display_mode *mode,
- struct drm_display_mode *adjusted_mode)
-{
-
-}
-
-static void dce_v11_0_ext_disable(struct drm_encoder *encoder)
-{
-
-}
-
-static void
-dce_v11_0_ext_dpms(struct drm_encoder *encoder, int mode)
-{
-
-}
-
-static const struct drm_encoder_helper_funcs dce_v11_0_ext_helper_funcs = {
- .dpms = dce_v11_0_ext_dpms,
- .prepare = dce_v11_0_ext_prepare,
- .mode_set = dce_v11_0_ext_mode_set,
- .commit = dce_v11_0_ext_commit,
- .disable = dce_v11_0_ext_disable,
- /* no detect for TMDS/LVDS yet */
-};
-
-static const struct drm_encoder_helper_funcs dce_v11_0_dig_helper_funcs = {
- .dpms = amdgpu_atombios_encoder_dpms,
- .mode_fixup = amdgpu_atombios_encoder_mode_fixup,
- .prepare = dce_v11_0_encoder_prepare,
- .mode_set = dce_v11_0_encoder_mode_set,
- .commit = dce_v11_0_encoder_commit,
- .disable = dce_v11_0_encoder_disable,
- .detect = amdgpu_atombios_encoder_dig_detect,
-};
-
-static const struct drm_encoder_helper_funcs dce_v11_0_dac_helper_funcs = {
- .dpms = amdgpu_atombios_encoder_dpms,
- .mode_fixup = amdgpu_atombios_encoder_mode_fixup,
- .prepare = dce_v11_0_encoder_prepare,
- .mode_set = dce_v11_0_encoder_mode_set,
- .commit = dce_v11_0_encoder_commit,
- .detect = amdgpu_atombios_encoder_dac_detect,
-};
-
-static void dce_v11_0_encoder_destroy(struct drm_encoder *encoder)
-{
- struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
- if (amdgpu_encoder->devices & (ATOM_DEVICE_LCD_SUPPORT))
- amdgpu_atombios_encoder_fini_backlight(amdgpu_encoder);
- kfree(amdgpu_encoder->enc_priv);
- drm_encoder_cleanup(encoder);
- kfree(amdgpu_encoder);
-}
-
-static const struct drm_encoder_funcs dce_v11_0_encoder_funcs = {
- .destroy = dce_v11_0_encoder_destroy,
-};
-
-static void dce_v11_0_encoder_add(struct amdgpu_device *adev,
- uint32_t encoder_enum,
- uint32_t supported_device,
- u16 caps)
-{
- struct drm_device *dev = adev_to_drm(adev);
- struct drm_encoder *encoder;
- struct amdgpu_encoder *amdgpu_encoder;
-
- /* see if we already added it */
- list_for_each_entry(encoder, &dev->mode_config.encoder_list, head) {
- amdgpu_encoder = to_amdgpu_encoder(encoder);
- if (amdgpu_encoder->encoder_enum == encoder_enum) {
- amdgpu_encoder->devices |= supported_device;
- return;
- }
-
- }
-
- /* add a new one */
- amdgpu_encoder = kzalloc(sizeof(struct amdgpu_encoder), GFP_KERNEL);
- if (!amdgpu_encoder)
- return;
-
- encoder = &amdgpu_encoder->base;
- switch (adev->mode_info.num_crtc) {
- case 1:
- encoder->possible_crtcs = 0x1;
- break;
- case 2:
- default:
- encoder->possible_crtcs = 0x3;
- break;
- case 3:
- encoder->possible_crtcs = 0x7;
- break;
- case 4:
- encoder->possible_crtcs = 0xf;
- break;
- case 5:
- encoder->possible_crtcs = 0x1f;
- break;
- case 6:
- encoder->possible_crtcs = 0x3f;
- break;
- }
-
- amdgpu_encoder->enc_priv = NULL;
-
- amdgpu_encoder->encoder_enum = encoder_enum;
- amdgpu_encoder->encoder_id = (encoder_enum & OBJECT_ID_MASK) >> OBJECT_ID_SHIFT;
- amdgpu_encoder->devices = supported_device;
- amdgpu_encoder->rmx_type = RMX_OFF;
- amdgpu_encoder->underscan_type = UNDERSCAN_OFF;
- amdgpu_encoder->is_ext_encoder = false;
- amdgpu_encoder->caps = caps;
-
- switch (amdgpu_encoder->encoder_id) {
- case ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC1:
- case ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC2:
- drm_encoder_init(dev, encoder, &dce_v11_0_encoder_funcs,
- DRM_MODE_ENCODER_DAC, NULL);
- drm_encoder_helper_add(encoder, &dce_v11_0_dac_helper_funcs);
- break;
- case ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DVO1:
- case ENCODER_OBJECT_ID_INTERNAL_UNIPHY:
- case ENCODER_OBJECT_ID_INTERNAL_UNIPHY1:
- case ENCODER_OBJECT_ID_INTERNAL_UNIPHY2:
- case ENCODER_OBJECT_ID_INTERNAL_UNIPHY3:
- if (amdgpu_encoder->devices & (ATOM_DEVICE_LCD_SUPPORT)) {
- amdgpu_encoder->rmx_type = RMX_FULL;
- drm_encoder_init(dev, encoder, &dce_v11_0_encoder_funcs,
- DRM_MODE_ENCODER_LVDS, NULL);
- amdgpu_encoder->enc_priv = amdgpu_atombios_encoder_get_lcd_info(amdgpu_encoder);
- } else if (amdgpu_encoder->devices & (ATOM_DEVICE_CRT_SUPPORT)) {
- drm_encoder_init(dev, encoder, &dce_v11_0_encoder_funcs,
- DRM_MODE_ENCODER_DAC, NULL);
- amdgpu_encoder->enc_priv = amdgpu_atombios_encoder_get_dig_info(amdgpu_encoder);
- } else {
- drm_encoder_init(dev, encoder, &dce_v11_0_encoder_funcs,
- DRM_MODE_ENCODER_TMDS, NULL);
- amdgpu_encoder->enc_priv = amdgpu_atombios_encoder_get_dig_info(amdgpu_encoder);
- }
- drm_encoder_helper_add(encoder, &dce_v11_0_dig_helper_funcs);
- break;
- case ENCODER_OBJECT_ID_SI170B:
- case ENCODER_OBJECT_ID_CH7303:
- case ENCODER_OBJECT_ID_EXTERNAL_SDVOA:
- case ENCODER_OBJECT_ID_EXTERNAL_SDVOB:
- case ENCODER_OBJECT_ID_TITFP513:
- case ENCODER_OBJECT_ID_VT1623:
- case ENCODER_OBJECT_ID_HDMI_SI1930:
- case ENCODER_OBJECT_ID_TRAVIS:
- case ENCODER_OBJECT_ID_NUTMEG:
- /* these are handled by the primary encoders */
- amdgpu_encoder->is_ext_encoder = true;
- if (amdgpu_encoder->devices & (ATOM_DEVICE_LCD_SUPPORT))
- drm_encoder_init(dev, encoder, &dce_v11_0_encoder_funcs,
- DRM_MODE_ENCODER_LVDS, NULL);
- else if (amdgpu_encoder->devices & (ATOM_DEVICE_CRT_SUPPORT))
- drm_encoder_init(dev, encoder, &dce_v11_0_encoder_funcs,
- DRM_MODE_ENCODER_DAC, NULL);
- else
- drm_encoder_init(dev, encoder, &dce_v11_0_encoder_funcs,
- DRM_MODE_ENCODER_TMDS, NULL);
- drm_encoder_helper_add(encoder, &dce_v11_0_ext_helper_funcs);
- break;
- }
-}
-
-static const struct amdgpu_display_funcs dce_v11_0_display_funcs = {
- .bandwidth_update = &dce_v11_0_bandwidth_update,
- .vblank_get_counter = &dce_v11_0_vblank_get_counter,
- .backlight_set_level = &amdgpu_atombios_encoder_set_backlight_level,
- .backlight_get_level = &amdgpu_atombios_encoder_get_backlight_level,
- .hpd_sense = &dce_v11_0_hpd_sense,
- .hpd_set_polarity = &dce_v11_0_hpd_set_polarity,
- .hpd_get_gpio_reg = &dce_v11_0_hpd_get_gpio_reg,
- .page_flip = &dce_v11_0_page_flip,
- .page_flip_get_scanoutpos = &dce_v11_0_crtc_get_scanoutpos,
- .add_encoder = &dce_v11_0_encoder_add,
- .add_connector = &amdgpu_connector_add,
-};
-
-static void dce_v11_0_set_display_funcs(struct amdgpu_device *adev)
-{
- adev->mode_info.funcs = &dce_v11_0_display_funcs;
-}
-
-static const struct amdgpu_irq_src_funcs dce_v11_0_crtc_irq_funcs = {
- .set = dce_v11_0_set_crtc_irq_state,
- .process = dce_v11_0_crtc_irq,
-};
-
-static const struct amdgpu_irq_src_funcs dce_v11_0_pageflip_irq_funcs = {
- .set = dce_v11_0_set_pageflip_irq_state,
- .process = dce_v11_0_pageflip_irq,
-};
-
-static const struct amdgpu_irq_src_funcs dce_v11_0_hpd_irq_funcs = {
- .set = dce_v11_0_set_hpd_irq_state,
- .process = dce_v11_0_hpd_irq,
-};
-
-static void dce_v11_0_set_irq_funcs(struct amdgpu_device *adev)
-{
- if (adev->mode_info.num_crtc > 0)
- adev->crtc_irq.num_types = AMDGPU_CRTC_IRQ_VLINE1 + adev->mode_info.num_crtc;
- else
- adev->crtc_irq.num_types = 0;
- adev->crtc_irq.funcs = &dce_v11_0_crtc_irq_funcs;
-
- adev->pageflip_irq.num_types = adev->mode_info.num_crtc;
- adev->pageflip_irq.funcs = &dce_v11_0_pageflip_irq_funcs;
-
- adev->hpd_irq.num_types = adev->mode_info.num_hpd;
- adev->hpd_irq.funcs = &dce_v11_0_hpd_irq_funcs;
-}
-
-const struct amdgpu_ip_block_version dce_v11_0_ip_block =
-{
- .type = AMD_IP_BLOCK_TYPE_DCE,
- .major = 11,
- .minor = 0,
- .rev = 0,
- .funcs = &dce_v11_0_ip_funcs,
-};
-
-const struct amdgpu_ip_block_version dce_v11_2_ip_block =
-{
- .type = AMD_IP_BLOCK_TYPE_DCE,
- .major = 11,
- .minor = 2,
- .rev = 0,
- .funcs = &dce_v11_0_ip_funcs,
-};
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
index 264183ab24ec..8841d7213de4 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
@@ -4075,7 +4075,7 @@ static int gfx_v10_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
struct dma_fence *f = NULL;
unsigned int index;
uint64_t gpu_addr;
- volatile uint32_t *cpu_ptr;
+ uint32_t *cpu_ptr;
long r;
memset(&ib, 0, sizeof(ib));
@@ -4322,8 +4322,7 @@ static u32 gfx_v10_0_get_csb_size(struct amdgpu_device *adev)
return count;
}
-static void gfx_v10_0_get_csb_buffer(struct amdgpu_device *adev,
- volatile u32 *buffer)
+static void gfx_v10_0_get_csb_buffer(struct amdgpu_device *adev, u32 *buffer)
{
u32 count = 0;
int ctx_reg_offset;
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
index 3d9c045a8a64..66c47c466532 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
@@ -603,7 +603,7 @@ static int gfx_v11_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
struct dma_fence *f = NULL;
unsigned index;
uint64_t gpu_addr;
- volatile uint32_t *cpu_ptr;
+ uint32_t *cpu_ptr;
long r;
/* MES KIQ fw hasn't indirect buffer support for now */
@@ -850,8 +850,7 @@ static u32 gfx_v11_0_get_csb_size(struct amdgpu_device *adev)
return count;
}
-static void gfx_v11_0_get_csb_buffer(struct amdgpu_device *adev,
- volatile u32 *buffer)
+static void gfx_v11_0_get_csb_buffer(struct amdgpu_device *adev, u32 *buffer)
{
u32 count = 0;
int ctx_reg_offset;
@@ -1654,6 +1653,21 @@ static int gfx_v11_0_sw_init(struct amdgpu_ip_block *ip_block)
}
}
break;
+ case IP_VERSION(11, 0, 1):
+ case IP_VERSION(11, 0, 4):
+ adev->gfx.cleaner_shader_ptr = gfx_11_0_3_cleaner_shader_hex;
+ adev->gfx.cleaner_shader_size = sizeof(gfx_11_0_3_cleaner_shader_hex);
+ if (adev->gfx.pfp_fw_version >= 102 &&
+ adev->gfx.mec_fw_version >= 66 &&
+ adev->mes.fw_version[0] >= 128) {
+ adev->gfx.enable_cleaner_shader = true;
+ r = amdgpu_gfx_cleaner_shader_sw_init(adev, adev->gfx.cleaner_shader_size);
+ if (r) {
+ adev->gfx.enable_cleaner_shader = false;
+ dev_err(adev->dev, "Failed to initialize cleaner shader\n");
+ }
+ }
+ break;
case IP_VERSION(11, 5, 0):
case IP_VERSION(11, 5, 1):
adev->gfx.cleaner_shader_ptr = gfx_11_0_3_cleaner_shader_hex;
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c
index 5dbc5dbc694a..710ec9c34e43 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c
@@ -497,7 +497,7 @@ static int gfx_v12_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
struct dma_fence *f = NULL;
unsigned index;
uint64_t gpu_addr;
- volatile uint32_t *cpu_ptr;
+ uint32_t *cpu_ptr;
long r;
/* MES KIQ fw hasn't indirect buffer support for now */
@@ -685,8 +685,7 @@ static u32 gfx_v12_0_get_csb_size(struct amdgpu_device *adev)
return count;
}
-static void gfx_v12_0_get_csb_buffer(struct amdgpu_device *adev,
- volatile u32 *buffer)
+static void gfx_v12_0_get_csb_buffer(struct amdgpu_device *adev, u32 *buffer)
{
u32 count = 0, clustercount = 0, i;
const struct cs_section_def *sect = NULL;
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c
index 70d7a1f434c4..7693b7953426 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c
@@ -86,7 +86,7 @@ MODULE_FIRMWARE("amdgpu/hainan_ce.bin");
MODULE_FIRMWARE("amdgpu/hainan_rlc.bin");
static u32 gfx_v6_0_get_csb_size(struct amdgpu_device *adev);
-static void gfx_v6_0_get_csb_buffer(struct amdgpu_device *adev, volatile u32 *buffer);
+static void gfx_v6_0_get_csb_buffer(struct amdgpu_device *adev, u32 *buffer);
//static void gfx_v6_0_init_cp_pg_table(struct amdgpu_device *adev);
static void gfx_v6_0_init_pg(struct amdgpu_device *adev);
@@ -2354,7 +2354,7 @@ static void gfx_v6_0_ring_emit_wreg(struct amdgpu_ring *ring,
static int gfx_v6_0_rlc_init(struct amdgpu_device *adev)
{
const u32 *src_ptr;
- volatile u32 *dst_ptr;
+ u32 *dst_ptr;
u32 dws;
u64 reg_list_mc_addr;
const struct cs_section_def *cs_data;
@@ -2855,8 +2855,7 @@ static u32 gfx_v6_0_get_csb_size(struct amdgpu_device *adev)
return count;
}
-static void gfx_v6_0_get_csb_buffer(struct amdgpu_device *adev,
- volatile u32 *buffer)
+static void gfx_v6_0_get_csb_buffer(struct amdgpu_device *adev, u32 *buffer)
{
u32 count = 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
index 2aa323dab34e..5976ed55d9db 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
@@ -883,7 +883,7 @@ static const u32 kalindi_rlc_save_restore_register_list[] = {
};
static u32 gfx_v7_0_get_csb_size(struct amdgpu_device *adev);
-static void gfx_v7_0_get_csb_buffer(struct amdgpu_device *adev, volatile u32 *buffer);
+static void gfx_v7_0_get_csb_buffer(struct amdgpu_device *adev, u32 *buffer);
static void gfx_v7_0_init_pg(struct amdgpu_device *adev);
static void gfx_v7_0_get_cu_info(struct amdgpu_device *adev);
@@ -3882,8 +3882,7 @@ static u32 gfx_v7_0_get_csb_size(struct amdgpu_device *adev)
return count;
}
-static void gfx_v7_0_get_csb_buffer(struct amdgpu_device *adev,
- volatile u32 *buffer)
+static void gfx_v7_0_get_csb_buffer(struct amdgpu_device *adev, u32 *buffer)
{
u32 count = 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
index 367449d8061b..0856ff65288c 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
@@ -1220,8 +1220,7 @@ out:
return err;
}
-static void gfx_v8_0_get_csb_buffer(struct amdgpu_device *adev,
- volatile u32 *buffer)
+static void gfx_v8_0_get_csb_buffer(struct amdgpu_device *adev, u32 *buffer)
{
u32 count = 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index a6ff9a137a83..dd19a97436db 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -1648,8 +1648,7 @@ static u32 gfx_v9_0_get_csb_size(struct amdgpu_device *adev)
return count;
}
-static void gfx_v9_0_get_csb_buffer(struct amdgpu_device *adev,
- volatile u32 *buffer)
+static void gfx_v9_0_get_csb_buffer(struct amdgpu_device *adev, u32 *buffer)
{
u32 count = 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
index 8ba66d4dfe86..77f9d5b9a556 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
@@ -3560,6 +3560,7 @@ static int gfx_v9_4_3_reset_kcq(struct amdgpu_ring *ring,
struct amdgpu_device *adev = ring->adev;
struct amdgpu_kiq *kiq = &adev->gfx.kiq[ring->xcc_id];
struct amdgpu_ring *kiq_ring = &kiq->ring;
+ int reset_mode = AMDGPU_RESET_TYPE_PER_QUEUE;
unsigned long flags;
int r;
@@ -3597,6 +3598,7 @@ pipe_reset:
if (!(adev->gfx.compute_supported_reset & AMDGPU_RESET_TYPE_PER_PIPE))
return -EOPNOTSUPP;
r = gfx_v9_4_3_reset_hw_pipe(ring);
+ reset_mode = AMDGPU_RESET_TYPE_PER_PIPE;
dev_info(adev->dev, "ring: %s pipe reset :%s\n", ring->name,
r ? "failed" : "successfully");
if (r)
@@ -3619,10 +3621,20 @@ pipe_reset:
r = amdgpu_ring_test_ring(kiq_ring);
spin_unlock_irqrestore(&kiq->ring_lock, flags);
if (r) {
+ if (reset_mode == AMDGPU_RESET_TYPE_PER_QUEUE)
+ goto pipe_reset;
+
dev_err(adev->dev, "fail to remap queue\n");
return r;
}
+ if (reset_mode == AMDGPU_RESET_TYPE_PER_QUEUE) {
+ r = amdgpu_ring_test_ring(ring);
+ if (r)
+ goto pipe_reset;
+ }
+
+
return amdgpu_ring_reset_helper_end(ring, timedout_fence);
}
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c
index 76d3c40735b0..f4a19357ccbc 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c
@@ -337,7 +337,7 @@ static void gmc_v12_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
int vmid, i;
if (adev->enable_uni_mes && adev->mes.ring[AMDGPU_MES_SCHED_PIPE].sched.ready &&
- (adev->mes.sched_version & AMDGPU_MES_VERSION_MASK) >= 0x81) {
+ (adev->mes.sched_version & AMDGPU_MES_VERSION_MASK) >= 0x84) {
struct mes_inv_tlbs_pasid_input input = {0};
input.pasid = pasid;
input.flush_type = flush_type;
@@ -521,6 +521,7 @@ static void gmc_v12_0_get_vm_pte(struct amdgpu_device *adev,
*flags &= ~AMDGPU_PTE_NOALLOC;
if (vm_flags & AMDGPU_VM_PAGE_PRT) {
+ *flags |= AMDGPU_PTE_PRT_GFX12;
*flags |= AMDGPU_PTE_SNOOPED;
*flags |= AMDGPU_PTE_SYSTEM;
*flags |= AMDGPU_PTE_IS_PTE;
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index 8404695eb13f..0d1dd587db5f 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -1834,11 +1834,19 @@ static void gmc_v9_0_save_registers(struct amdgpu_device *adev)
static void gmc_v9_4_3_init_vram_info(struct amdgpu_device *adev)
{
+ static const u32 regBIF_BIOS_SCRATCH_4 = 0x50;
+ u32 vram_info;
+
adev->gmc.vram_type = AMDGPU_VRAM_TYPE_HBM;
adev->gmc.vram_width = 128 * 64;
if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 5, 0))
adev->gmc.vram_type = AMDGPU_VRAM_TYPE_HBM3E;
+
+ if (!(adev->flags & AMD_IS_APU) && !amdgpu_sriov_vf(adev)) {
+ vram_info = RREG32(regBIF_BIOS_SCRATCH_4);
+ adev->gmc.vram_vendor = vram_info & 0xF;
+ }
}
static int gmc_v9_0_sw_init(struct amdgpu_ip_block *ip_block)
diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v1_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v1_0.c
index 9e428e669ada..b5bb7f4d607c 100644
--- a/drivers/gpu/drm/amd/amdgpu/jpeg_v1_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v1_0.c
@@ -557,7 +557,7 @@ static const struct amdgpu_ring_funcs jpeg_v1_0_decode_ring_vm_funcs = {
.nop = PACKET0(0x81ff, 0),
.support_64bit_ptrs = false,
.no_user_fence = true,
- .extra_dw = 64,
+ .extra_bytes = 256,
.get_rptr = jpeg_v1_0_decode_ring_get_rptr,
.get_wptr = jpeg_v1_0_decode_ring_get_wptr,
.set_wptr = jpeg_v1_0_decode_ring_set_wptr,
diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c
index 58239c405fda..27c76bd424cf 100644
--- a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c
@@ -23,7 +23,6 @@
#include "amdgpu.h"
#include "amdgpu_jpeg.h"
-#include "amdgpu_cs.h"
#include "amdgpu_pm.h"
#include "soc15.h"
#include "soc15d.h"
@@ -806,7 +805,7 @@ static const struct amdgpu_ring_funcs jpeg_v2_0_dec_ring_vm_funcs = {
.get_rptr = jpeg_v2_0_dec_ring_get_rptr,
.get_wptr = jpeg_v2_0_dec_ring_get_wptr,
.set_wptr = jpeg_v2_0_dec_ring_set_wptr,
- .parse_cs = jpeg_v2_dec_ring_parse_cs,
+ .parse_cs = amdgpu_jpeg_dec_parse_cs,
.emit_frame_size =
SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 +
SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 +
@@ -854,58 +853,3 @@ const struct amdgpu_ip_block_version jpeg_v2_0_ip_block = {
.rev = 0,
.funcs = &jpeg_v2_0_ip_funcs,
};
-
-/**
- * jpeg_v2_dec_ring_parse_cs - command submission parser
- *
- * @parser: Command submission parser context
- * @job: the job to parse
- * @ib: the IB to parse
- *
- * Parse the command stream, return -EINVAL for invalid packet,
- * 0 otherwise
- */
-int jpeg_v2_dec_ring_parse_cs(struct amdgpu_cs_parser *parser,
- struct amdgpu_job *job,
- struct amdgpu_ib *ib)
-{
- u32 i, reg, res, cond, type;
- struct amdgpu_device *adev = parser->adev;
-
- for (i = 0; i < ib->length_dw ; i += 2) {
- reg = CP_PACKETJ_GET_REG(ib->ptr[i]);
- res = CP_PACKETJ_GET_RES(ib->ptr[i]);
- cond = CP_PACKETJ_GET_COND(ib->ptr[i]);
- type = CP_PACKETJ_GET_TYPE(ib->ptr[i]);
-
- if (res) /* only support 0 at the moment */
- return -EINVAL;
-
- switch (type) {
- case PACKETJ_TYPE0:
- if (cond != PACKETJ_CONDITION_CHECK0 || reg < JPEG_REG_RANGE_START ||
- reg > JPEG_REG_RANGE_END) {
- dev_err(adev->dev, "Invalid packet [0x%08x]!\n", ib->ptr[i]);
- return -EINVAL;
- }
- break;
- case PACKETJ_TYPE3:
- if (cond != PACKETJ_CONDITION_CHECK3 || reg < JPEG_REG_RANGE_START ||
- reg > JPEG_REG_RANGE_END) {
- dev_err(adev->dev, "Invalid packet [0x%08x]!\n", ib->ptr[i]);
- return -EINVAL;
- }
- break;
- case PACKETJ_TYPE6:
- if (ib->ptr[i] == CP_PACKETJ_NOP)
- continue;
- dev_err(adev->dev, "Invalid packet [0x%08x]!\n", ib->ptr[i]);
- return -EINVAL;
- default:
- dev_err(adev->dev, "Unknown packet type %d !\n", type);
- return -EINVAL;
- }
- }
-
- return 0;
-}
diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.h b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.h
index 63fadda7a673..654e43e83e2c 100644
--- a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.h
+++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.h
@@ -45,9 +45,6 @@
#define JRBC_DEC_EXTERNAL_REG_WRITE_ADDR 0x18000
-#define JPEG_REG_RANGE_START 0x4000
-#define JPEG_REG_RANGE_END 0x41c2
-
void jpeg_v2_0_dec_ring_insert_start(struct amdgpu_ring *ring);
void jpeg_v2_0_dec_ring_insert_end(struct amdgpu_ring *ring);
void jpeg_v2_0_dec_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq,
@@ -60,9 +57,6 @@ void jpeg_v2_0_dec_ring_emit_vm_flush(struct amdgpu_ring *ring,
unsigned vmid, uint64_t pd_addr);
void jpeg_v2_0_dec_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg, uint32_t val);
void jpeg_v2_0_dec_ring_nop(struct amdgpu_ring *ring, uint32_t count);
-int jpeg_v2_dec_ring_parse_cs(struct amdgpu_cs_parser *parser,
- struct amdgpu_job *job,
- struct amdgpu_ib *ib);
extern const struct amdgpu_ip_block_version jpeg_v2_0_ip_block;
diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c
index 3e2c389242db..20983f126b49 100644
--- a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c
+++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c
@@ -696,7 +696,7 @@ static const struct amdgpu_ring_funcs jpeg_v2_5_dec_ring_vm_funcs = {
.get_rptr = jpeg_v2_5_dec_ring_get_rptr,
.get_wptr = jpeg_v2_5_dec_ring_get_wptr,
.set_wptr = jpeg_v2_5_dec_ring_set_wptr,
- .parse_cs = jpeg_v2_dec_ring_parse_cs,
+ .parse_cs = amdgpu_jpeg_dec_parse_cs,
.emit_frame_size =
SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 +
SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 +
@@ -727,7 +727,7 @@ static const struct amdgpu_ring_funcs jpeg_v2_6_dec_ring_vm_funcs = {
.get_rptr = jpeg_v2_5_dec_ring_get_rptr,
.get_wptr = jpeg_v2_5_dec_ring_get_wptr,
.set_wptr = jpeg_v2_5_dec_ring_set_wptr,
- .parse_cs = jpeg_v2_dec_ring_parse_cs,
+ .parse_cs = amdgpu_jpeg_dec_parse_cs,
.emit_frame_size =
SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 +
SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 +
diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c
index a44eb2667664..d1a011c40ba2 100644
--- a/drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c
@@ -597,7 +597,7 @@ static const struct amdgpu_ring_funcs jpeg_v3_0_dec_ring_vm_funcs = {
.get_rptr = jpeg_v3_0_dec_ring_get_rptr,
.get_wptr = jpeg_v3_0_dec_ring_get_wptr,
.set_wptr = jpeg_v3_0_dec_ring_set_wptr,
- .parse_cs = jpeg_v2_dec_ring_parse_cs,
+ .parse_cs = amdgpu_jpeg_dec_parse_cs,
.emit_frame_size =
SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 +
SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 +
diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.c
index da3ee69f1a3b..33db2c1ae6cc 100644
--- a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.c
@@ -762,7 +762,7 @@ static const struct amdgpu_ring_funcs jpeg_v4_0_dec_ring_vm_funcs = {
.get_rptr = jpeg_v4_0_dec_ring_get_rptr,
.get_wptr = jpeg_v4_0_dec_ring_get_wptr,
.set_wptr = jpeg_v4_0_dec_ring_set_wptr,
- .parse_cs = jpeg_v2_dec_ring_parse_cs,
+ .parse_cs = amdgpu_jpeg_dec_parse_cs,
.emit_frame_size =
SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 +
SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 +
diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c
index a78144773fab..aae7328973d1 100644
--- a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c
+++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c
@@ -1177,7 +1177,7 @@ static const struct amdgpu_ring_funcs jpeg_v4_0_3_dec_ring_vm_funcs = {
.get_rptr = jpeg_v4_0_3_dec_ring_get_rptr,
.get_wptr = jpeg_v4_0_3_dec_ring_get_wptr,
.set_wptr = jpeg_v4_0_3_dec_ring_set_wptr,
- .parse_cs = jpeg_v2_dec_ring_parse_cs,
+ .parse_cs = amdgpu_jpeg_dec_parse_cs,
.emit_frame_size =
SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 +
SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 +
diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_5.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_5.c
index 5d86e1d846eb..54fd9c800c40 100644
--- a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_5.c
+++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_5.c
@@ -807,7 +807,7 @@ static const struct amdgpu_ring_funcs jpeg_v4_0_5_dec_ring_vm_funcs = {
.get_rptr = jpeg_v4_0_5_dec_ring_get_rptr,
.get_wptr = jpeg_v4_0_5_dec_ring_get_wptr,
.set_wptr = jpeg_v4_0_5_dec_ring_set_wptr,
- .parse_cs = jpeg_v2_dec_ring_parse_cs,
+ .parse_cs = amdgpu_jpeg_dec_parse_cs,
.emit_frame_size =
SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 +
SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 +
diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_0.c
index 34c70270ea1d..46bf15dce2bd 100644
--- a/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_0.c
@@ -683,7 +683,7 @@ static const struct amdgpu_ring_funcs jpeg_v5_0_0_dec_ring_vm_funcs = {
.get_rptr = jpeg_v5_0_0_dec_ring_get_rptr,
.get_wptr = jpeg_v5_0_0_dec_ring_get_wptr,
.set_wptr = jpeg_v5_0_0_dec_ring_set_wptr,
- .parse_cs = jpeg_v2_dec_ring_parse_cs,
+ .parse_cs = amdgpu_jpeg_dec_parse_cs,
.emit_frame_size =
SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 +
SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 +
diff --git a/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c b/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c
index aee26f80bd53..2db9b2c63693 100644
--- a/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c
+++ b/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c
@@ -254,6 +254,7 @@ static int mes_userq_mqd_create(struct amdgpu_userq_mgr *uq_mgr,
struct amdgpu_mqd *mqd_hw_default = &adev->mqds[queue->queue_type];
struct drm_amdgpu_userq_in *mqd_user = args_in;
struct amdgpu_mqd_prop *userq_props;
+ struct amdgpu_gfx_shadow_info shadow_info;
int r;
/* Structure to initialize MQD for userqueue using generic MQD init function */
@@ -263,13 +264,6 @@ static int mes_userq_mqd_create(struct amdgpu_userq_mgr *uq_mgr,
return -ENOMEM;
}
- if (!mqd_user->wptr_va || !mqd_user->rptr_va ||
- !mqd_user->queue_va || mqd_user->queue_size == 0) {
- DRM_ERROR("Invalid MQD parameters for userqueue\n");
- r = -EINVAL;
- goto free_props;
- }
-
r = amdgpu_userq_create_object(uq_mgr, &queue->mqd, mqd_hw_default->mqd_size);
if (r) {
DRM_ERROR("Failed to create MQD object for userqueue\n");
@@ -286,6 +280,8 @@ static int mes_userq_mqd_create(struct amdgpu_userq_mgr *uq_mgr,
userq_props->doorbell_index = queue->doorbell_index;
userq_props->fence_address = queue->fence_drv->gpu_addr;
+ if (adev->gfx.funcs->get_gfx_shadow_info)
+ adev->gfx.funcs->get_gfx_shadow_info(adev, &shadow_info, true);
if (queue->queue_type == AMDGPU_HW_IP_COMPUTE) {
struct drm_amdgpu_userq_mqd_compute_gfx11 *compute_mqd;
@@ -302,6 +298,10 @@ static int mes_userq_mqd_create(struct amdgpu_userq_mgr *uq_mgr,
goto free_mqd;
}
+ if (amdgpu_userq_input_va_validate(queue->vm, compute_mqd->eop_va,
+ max_t(u32, PAGE_SIZE, AMDGPU_GPU_PAGE_SIZE)))
+ goto free_mqd;
+
userq_props->eop_gpu_addr = compute_mqd->eop_va;
userq_props->hqd_pipe_priority = AMDGPU_GFX_PIPE_PRIO_NORMAL;
userq_props->hqd_queue_priority = AMDGPU_GFX_QUEUE_PRIORITY_MINIMUM;
@@ -329,6 +329,11 @@ static int mes_userq_mqd_create(struct amdgpu_userq_mgr *uq_mgr,
userq_props->csa_addr = mqd_gfx_v11->csa_va;
userq_props->tmz_queue =
mqd_user->flags & AMDGPU_USERQ_CREATE_FLAGS_QUEUE_SECURE;
+
+ if (amdgpu_userq_input_va_validate(queue->vm, mqd_gfx_v11->shadow_va,
+ shadow_info.shadow_size))
+ goto free_mqd;
+
kfree(mqd_gfx_v11);
} else if (queue->queue_type == AMDGPU_HW_IP_DMA) {
struct drm_amdgpu_userq_mqd_sdma_gfx11 *mqd_sdma_v11;
@@ -346,6 +351,10 @@ static int mes_userq_mqd_create(struct amdgpu_userq_mgr *uq_mgr,
goto free_mqd;
}
+ if (amdgpu_userq_input_va_validate(queue->vm, mqd_sdma_v11->csa_va,
+ shadow_info.csa_size))
+ goto free_mqd;
+
userq_props->csa_addr = mqd_sdma_v11->csa_va;
kfree(mqd_sdma_v11);
}
@@ -395,10 +404,82 @@ mes_userq_mqd_destroy(struct amdgpu_userq_mgr *uq_mgr,
amdgpu_userq_destroy_object(uq_mgr, &queue->mqd);
}
+static int mes_userq_preempt(struct amdgpu_userq_mgr *uq_mgr,
+ struct amdgpu_usermode_queue *queue)
+{
+ struct amdgpu_device *adev = uq_mgr->adev;
+ struct mes_suspend_gang_input queue_input;
+ struct amdgpu_userq_obj *ctx = &queue->fw_obj;
+ signed long timeout = 2100000; /* 2100 ms */
+ u64 fence_gpu_addr;
+ u32 fence_offset;
+ u64 *fence_ptr;
+ int i, r;
+
+ if (queue->state != AMDGPU_USERQ_STATE_MAPPED)
+ return 0;
+ r = amdgpu_device_wb_get(adev, &fence_offset);
+ if (r)
+ return r;
+
+ fence_gpu_addr = adev->wb.gpu_addr + (fence_offset * 4);
+ fence_ptr = (u64 *)&adev->wb.wb[fence_offset];
+ *fence_ptr = 0;
+
+ memset(&queue_input, 0x0, sizeof(struct mes_suspend_gang_input));
+ queue_input.gang_context_addr = ctx->gpu_addr + AMDGPU_USERQ_PROC_CTX_SZ;
+ queue_input.suspend_fence_addr = fence_gpu_addr;
+ queue_input.suspend_fence_value = 1;
+ amdgpu_mes_lock(&adev->mes);
+ r = adev->mes.funcs->suspend_gang(&adev->mes, &queue_input);
+ amdgpu_mes_unlock(&adev->mes);
+ if (r) {
+ DRM_ERROR("Failed to suspend gang: %d\n", r);
+ goto out;
+ }
+
+ for (i = 0; i < timeout; i++) {
+ if (*fence_ptr == 1)
+ goto out;
+ udelay(1);
+ }
+ r = -ETIMEDOUT;
+
+out:
+ amdgpu_device_wb_free(adev, fence_offset);
+ return r;
+}
+
+static int mes_userq_restore(struct amdgpu_userq_mgr *uq_mgr,
+ struct amdgpu_usermode_queue *queue)
+{
+ struct amdgpu_device *adev = uq_mgr->adev;
+ struct mes_resume_gang_input queue_input;
+ struct amdgpu_userq_obj *ctx = &queue->fw_obj;
+ int r;
+
+ if (queue->state == AMDGPU_USERQ_STATE_HUNG)
+ return -EINVAL;
+ if (queue->state != AMDGPU_USERQ_STATE_PREEMPTED)
+ return 0;
+
+ memset(&queue_input, 0x0, sizeof(struct mes_resume_gang_input));
+ queue_input.gang_context_addr = ctx->gpu_addr + AMDGPU_USERQ_PROC_CTX_SZ;
+
+ amdgpu_mes_lock(&adev->mes);
+ r = adev->mes.funcs->resume_gang(&adev->mes, &queue_input);
+ amdgpu_mes_unlock(&adev->mes);
+ if (r)
+ dev_err(adev->dev, "Failed to resume queue, err (%d)\n", r);
+ return r;
+}
+
const struct amdgpu_userq_funcs userq_mes_funcs = {
.mqd_create = mes_userq_mqd_create,
.mqd_destroy = mes_userq_mqd_destroy,
.unmap = mes_userq_unmap,
.map = mes_userq_map,
.detect_and_reset = mes_userq_detect_and_reset,
+ .preempt = mes_userq_preempt,
+ .restore = mes_userq_restore,
};
diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
index 3b91ea601add..e82188431f79 100644
--- a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
@@ -713,6 +713,12 @@ static int mes_v11_0_set_hw_resources(struct amdgpu_mes *mes)
mes_set_hw_res_pkt.enable_reg_active_poll = 1;
mes_set_hw_res_pkt.enable_level_process_quantum_check = 1;
mes_set_hw_res_pkt.oversubscription_timer = 50;
+ if ((mes->adev->mes.sched_version & AMDGPU_MES_VERSION_MASK) >= 0x7f)
+ mes_set_hw_res_pkt.enable_lr_compute_wa = 1;
+ else
+ dev_info_once(mes->adev->dev,
+ "MES FW version must be >= 0x7f to enable LR compute workaround.\n");
+
if (amdgpu_mes_log_enable) {
mes_set_hw_res_pkt.enable_mes_event_int_logging = 1;
mes_set_hw_res_pkt.event_intr_history_gpu_mc_ptr =
diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c
index 998893dff08e..aff06f06aeee 100644
--- a/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c
@@ -769,6 +769,11 @@ static int mes_v12_0_set_hw_resources(struct amdgpu_mes *mes, int pipe)
mes_set_hw_res_pkt.use_different_vmid_compute = 1;
mes_set_hw_res_pkt.enable_reg_active_poll = 1;
mes_set_hw_res_pkt.enable_level_process_quantum_check = 1;
+ if ((mes->adev->mes.sched_version & AMDGPU_MES_VERSION_MASK) >= 0x82)
+ mes_set_hw_res_pkt.enable_lr_compute_wa = 1;
+ else
+ dev_info_once(adev->dev,
+ "MES FW version must be >= 0x82 to enable LR compute workaround.\n");
/*
* Keep oversubscribe timer for sdma . When we have unmapped doorbell
diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c
index 457972aa5632..e5282a5d05d9 100644
--- a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c
+++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c
@@ -202,6 +202,9 @@ send_request:
case IDH_REQ_RAS_CPER_DUMP:
event = IDH_RAS_CPER_DUMP_READY;
break;
+ case IDH_REQ_RAS_CHK_CRITI:
+ event = IDH_REQ_RAS_CHK_CRITI_READY;
+ break;
default:
break;
}
@@ -556,6 +559,16 @@ static int xgpu_nv_req_ras_bad_pages(struct amdgpu_device *adev)
return xgpu_nv_send_access_requests(adev, IDH_REQ_RAS_BAD_PAGES);
}
+static int xgpu_nv_check_vf_critical_region(struct amdgpu_device *adev, u64 addr)
+{
+ uint32_t addr_hi, addr_lo;
+
+ addr_hi = (uint32_t)(addr >> 32);
+ addr_lo = (uint32_t)(addr & 0xFFFFFFFF);
+ return xgpu_nv_send_access_requests_with_param(
+ adev, IDH_REQ_RAS_CHK_CRITI, addr_hi, addr_lo, 0);
+}
+
const struct amdgpu_virt_ops xgpu_nv_virt_ops = {
.req_full_gpu = xgpu_nv_request_full_gpu_access,
.rel_full_gpu = xgpu_nv_release_full_gpu_access,
@@ -569,4 +582,5 @@ const struct amdgpu_virt_ops xgpu_nv_virt_ops = {
.req_ras_err_count = xgpu_nv_req_ras_err_count,
.req_ras_cper_dump = xgpu_nv_req_ras_cper_dump,
.req_bad_pages = xgpu_nv_req_ras_bad_pages,
+ .req_ras_chk_criti = xgpu_nv_check_vf_critical_region
};
diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.h b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.h
index 5808689562cc..c1083e5e41e0 100644
--- a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.h
+++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.h
@@ -43,6 +43,7 @@ enum idh_request {
IDH_REQ_RAS_ERROR_COUNT = 203,
IDH_REQ_RAS_CPER_DUMP = 204,
IDH_REQ_RAS_BAD_PAGES = 205,
+ IDH_REQ_RAS_CHK_CRITI = 206
};
enum idh_event {
@@ -62,6 +63,7 @@ enum idh_event {
IDH_RAS_BAD_PAGES_READY = 15,
IDH_RAS_BAD_PAGES_NOTIFICATION = 16,
IDH_UNRECOV_ERR_NOTIFICATION = 17,
+ IDH_REQ_RAS_CHK_CRITI_READY = 18,
IDH_TEXT_MESSAGE = 255,
};
diff --git a/drivers/gpu/drm/amd/amdgpu/smu_v11_0_i2c.c b/drivers/gpu/drm/amd/amdgpu/smu_v11_0_i2c.c
index dd2d66090d23..68aef47254a9 100644
--- a/drivers/gpu/drm/amd/amdgpu/smu_v11_0_i2c.c
+++ b/drivers/gpu/drm/amd/amdgpu/smu_v11_0_i2c.c
@@ -743,7 +743,7 @@ int smu_v11_0_i2c_control_init(struct amdgpu_device *adev)
adev->pm.ras_eeprom_i2c_bus = &adev->pm.smu_i2c[0].adapter;
adev->pm.fru_eeprom_i2c_bus = &adev->pm.smu_i2c[0].adapter;
- res = i2c_add_adapter(control);
+ res = devm_i2c_add_adapter(adev->dev, control);
if (res)
DRM_ERROR("Failed to register hw i2c, err: %d\n", res);
@@ -752,9 +752,6 @@ int smu_v11_0_i2c_control_init(struct amdgpu_device *adev)
void smu_v11_0_i2c_control_fini(struct amdgpu_device *adev)
{
- struct i2c_adapter *control = adev->pm.ras_eeprom_i2c_bus;
-
- i2c_del_adapter(control);
adev->pm.ras_eeprom_i2c_bus = NULL;
adev->pm.fru_eeprom_i2c_bus = NULL;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c
index 1e89ba153d9d..a316797875a8 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c
@@ -193,7 +193,7 @@ static int vcn_v1_0_sw_init(struct amdgpu_ip_block *ip_block)
adev->vcn.inst[0].pause_dpg_mode = vcn_v1_0_pause_dpg_mode;
if (amdgpu_vcnfw_log) {
- volatile struct amdgpu_fw_shared *fw_shared = adev->vcn.inst->fw_shared.cpu_addr;
+ struct amdgpu_fw_shared *fw_shared = adev->vcn.inst->fw_shared.cpu_addr;
fw_shared->present_flag_0 = 0;
amdgpu_vcn_fwlog_init(adev->vcn.inst);
@@ -230,11 +230,11 @@ static int vcn_v1_0_sw_fini(struct amdgpu_ip_block *ip_block)
jpeg_v1_0_sw_fini(ip_block);
- r = amdgpu_vcn_sw_fini(adev, 0);
+ amdgpu_vcn_sw_fini(adev, 0);
kfree(adev->vcn.ip_dump);
- return r;
+ return 0;
}
/**
diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c
index b115137ab2d6..8897dcc9c1a0 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c
@@ -137,7 +137,7 @@ static int vcn_v2_0_sw_init(struct amdgpu_ip_block *ip_block)
struct amdgpu_ring *ring;
int i, r;
struct amdgpu_device *adev = ip_block->adev;
- volatile struct amdgpu_fw_shared *fw_shared;
+ struct amdgpu_fw_shared *fw_shared;
/* VCN DEC TRAP */
r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN,
@@ -252,7 +252,7 @@ static int vcn_v2_0_sw_fini(struct amdgpu_ip_block *ip_block)
{
int r, idx;
struct amdgpu_device *adev = ip_block->adev;
- volatile struct amdgpu_fw_shared *fw_shared = adev->vcn.inst->fw_shared.cpu_addr;
+ struct amdgpu_fw_shared *fw_shared = adev->vcn.inst->fw_shared.cpu_addr;
if (drm_dev_enter(adev_to_drm(adev), &idx)) {
fw_shared->present_flag_0 = 0;
@@ -267,9 +267,9 @@ static int vcn_v2_0_sw_fini(struct amdgpu_ip_block *ip_block)
amdgpu_vcn_sysfs_reset_mask_fini(adev);
- r = amdgpu_vcn_sw_fini(adev, 0);
+ amdgpu_vcn_sw_fini(adev, 0);
- return r;
+ return 0;
}
/**
@@ -853,7 +853,7 @@ static void vcn_v2_0_enable_static_power_gating(struct amdgpu_vcn_inst *vinst)
static int vcn_v2_0_start_dpg_mode(struct amdgpu_vcn_inst *vinst, bool indirect)
{
struct amdgpu_device *adev = vinst->adev;
- volatile struct amdgpu_fw_shared *fw_shared = adev->vcn.inst->fw_shared.cpu_addr;
+ struct amdgpu_fw_shared *fw_shared = adev->vcn.inst->fw_shared.cpu_addr;
struct amdgpu_ring *ring = &adev->vcn.inst->ring_dec;
uint32_t rb_bufsz, tmp;
int ret;
@@ -1001,7 +1001,7 @@ static int vcn_v2_0_start_dpg_mode(struct amdgpu_vcn_inst *vinst, bool indirect)
static int vcn_v2_0_start(struct amdgpu_vcn_inst *vinst)
{
struct amdgpu_device *adev = vinst->adev;
- volatile struct amdgpu_fw_shared *fw_shared = adev->vcn.inst->fw_shared.cpu_addr;
+ struct amdgpu_fw_shared *fw_shared = adev->vcn.inst->fw_shared.cpu_addr;
struct amdgpu_ring *ring = &adev->vcn.inst->ring_dec;
uint32_t rb_bufsz, tmp;
uint32_t lmi_swap_cntl;
@@ -1308,7 +1308,7 @@ static int vcn_v2_0_pause_dpg_mode(struct amdgpu_vcn_inst *vinst,
UVD_POWER_STATUS__UVD_POWER_STATUS_MASK);
if (!ret_code) {
- volatile struct amdgpu_fw_shared *fw_shared = adev->vcn.inst->fw_shared.cpu_addr;
+ struct amdgpu_fw_shared *fw_shared = adev->vcn.inst->fw_shared.cpu_addr;
/* pause DPG */
reg_data |= UVD_DPG_PAUSE__NJ_PAUSE_DPG_REQ_MASK;
WREG32_SOC15(UVD, 0, mmUVD_DPG_PAUSE, reg_data);
diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c
index 904b94bc8693..cebee453871c 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c
@@ -277,7 +277,7 @@ static int vcn_v2_5_sw_init(struct amdgpu_ip_block *ip_block)
struct amdgpu_device *adev = ip_block->adev;
for (j = 0; j < adev->vcn.num_vcn_inst; j++) {
- volatile struct amdgpu_fw_shared *fw_shared;
+ struct amdgpu_fw_shared *fw_shared;
if (adev->vcn.harvest_config & (1 << j))
continue;
@@ -420,7 +420,7 @@ static int vcn_v2_5_sw_fini(struct amdgpu_ip_block *ip_block)
{
int i, r, idx;
struct amdgpu_device *adev = ip_block->adev;
- volatile struct amdgpu_fw_shared *fw_shared;
+ struct amdgpu_fw_shared *fw_shared;
if (drm_dev_enter(adev_to_drm(adev), &idx)) {
for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
@@ -442,9 +442,7 @@ static int vcn_v2_5_sw_fini(struct amdgpu_ip_block *ip_block)
r = amdgpu_vcn_suspend(adev, i);
if (r)
return r;
- r = amdgpu_vcn_sw_fini(adev, i);
- if (r)
- return r;
+ amdgpu_vcn_sw_fini(adev, i);
}
return 0;
@@ -1000,7 +998,7 @@ static int vcn_v2_5_start_dpg_mode(struct amdgpu_vcn_inst *vinst, bool indirect)
{
struct amdgpu_device *adev = vinst->adev;
int inst_idx = vinst->inst;
- volatile struct amdgpu_fw_shared *fw_shared = adev->vcn.inst[inst_idx].fw_shared.cpu_addr;
+ struct amdgpu_fw_shared *fw_shared = adev->vcn.inst[inst_idx].fw_shared.cpu_addr;
struct amdgpu_ring *ring;
uint32_t rb_bufsz, tmp;
int ret;
@@ -1157,7 +1155,7 @@ static int vcn_v2_5_start(struct amdgpu_vcn_inst *vinst)
{
struct amdgpu_device *adev = vinst->adev;
int i = vinst->inst;
- volatile struct amdgpu_fw_shared *fw_shared =
+ struct amdgpu_fw_shared *fw_shared =
adev->vcn.inst[i].fw_shared.cpu_addr;
struct amdgpu_ring *ring;
uint32_t rb_bufsz, tmp;
@@ -1669,7 +1667,7 @@ static int vcn_v2_5_pause_dpg_mode(struct amdgpu_vcn_inst *vinst,
UVD_POWER_STATUS__UVD_POWER_STATUS_MASK);
if (!ret_code) {
- volatile struct amdgpu_fw_shared *fw_shared = adev->vcn.inst[inst_idx].fw_shared.cpu_addr;
+ struct amdgpu_fw_shared *fw_shared = adev->vcn.inst[inst_idx].fw_shared.cpu_addr;
/* pause DPG */
reg_data |= UVD_DPG_PAUSE__NJ_PAUSE_DPG_REQ_MASK;
diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c
index f3085137ba08..d9cf8f0feeb3 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c
@@ -191,7 +191,7 @@ static int vcn_v3_0_sw_init(struct amdgpu_ip_block *ip_block)
}
for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
- volatile struct amdgpu_fw_shared *fw_shared;
+ struct amdgpu_fw_shared *fw_shared;
if (adev->vcn.harvest_config & (1 << i))
continue;
@@ -327,7 +327,7 @@ static int vcn_v3_0_sw_fini(struct amdgpu_ip_block *ip_block)
if (drm_dev_enter(adev_to_drm(adev), &idx)) {
for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
- volatile struct amdgpu_fw_shared *fw_shared;
+ struct amdgpu_fw_shared *fw_shared;
if (adev->vcn.harvest_config & (1 << i))
continue;
@@ -349,9 +349,7 @@ static int vcn_v3_0_sw_fini(struct amdgpu_ip_block *ip_block)
if (r)
return r;
- r = amdgpu_vcn_sw_fini(adev, i);
- if (r)
- return r;
+ amdgpu_vcn_sw_fini(adev, i);
}
return 0;
@@ -1031,7 +1029,7 @@ static int vcn_v3_0_start_dpg_mode(struct amdgpu_vcn_inst *vinst, bool indirect)
{
struct amdgpu_device *adev = vinst->adev;
int inst_idx = vinst->inst;
- volatile struct amdgpu_fw_shared *fw_shared = adev->vcn.inst[inst_idx].fw_shared.cpu_addr;
+ struct amdgpu_fw_shared *fw_shared = adev->vcn.inst[inst_idx].fw_shared.cpu_addr;
struct amdgpu_ring *ring;
uint32_t rb_bufsz, tmp;
int ret;
@@ -1196,7 +1194,7 @@ static int vcn_v3_0_start(struct amdgpu_vcn_inst *vinst)
{
struct amdgpu_device *adev = vinst->adev;
int i = vinst->inst;
- volatile struct amdgpu_fw_shared *fw_shared;
+ struct amdgpu_fw_shared *fw_shared;
struct amdgpu_ring *ring;
uint32_t rb_bufsz, tmp;
int j, k, r;
@@ -1717,7 +1715,7 @@ static int vcn_v3_0_pause_dpg_mode(struct amdgpu_vcn_inst *vinst,
{
struct amdgpu_device *adev = vinst->adev;
int inst_idx = vinst->inst;
- volatile struct amdgpu_fw_shared *fw_shared;
+ struct amdgpu_fw_shared *fw_shared;
struct amdgpu_ring *ring;
uint32_t reg_data = 0;
int ret_code;
@@ -1836,7 +1834,7 @@ static uint64_t vcn_v3_0_dec_ring_get_wptr(struct amdgpu_ring *ring)
static void vcn_v3_0_dec_ring_set_wptr(struct amdgpu_ring *ring)
{
struct amdgpu_device *adev = ring->adev;
- volatile struct amdgpu_fw_shared *fw_shared;
+ struct amdgpu_fw_shared *fw_shared;
if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) {
/*whenever update RBC_RB_WPTR, we save the wptr in shared rb.wptr and scratch2 */
diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c
index bc9dfe5ffea7..3ae666522d57 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c
@@ -148,7 +148,7 @@ static int vcn_v4_0_early_init(struct amdgpu_ip_block *ip_block)
static int vcn_v4_0_fw_shared_init(struct amdgpu_device *adev, int inst_idx)
{
- volatile struct amdgpu_vcn4_fw_shared *fw_shared;
+ struct amdgpu_vcn4_fw_shared *fw_shared;
fw_shared = adev->vcn.inst[inst_idx].fw_shared.cpu_addr;
fw_shared->present_flag_0 = cpu_to_le32(AMDGPU_FW_SHARED_FLAG_0_UNIFIED_QUEUE);
@@ -278,7 +278,7 @@ static int vcn_v4_0_sw_fini(struct amdgpu_ip_block *ip_block)
if (drm_dev_enter(adev_to_drm(adev), &idx)) {
for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
- volatile struct amdgpu_vcn4_fw_shared *fw_shared;
+ struct amdgpu_vcn4_fw_shared *fw_shared;
if (adev->vcn.harvest_config & (1 << i))
continue;
@@ -302,11 +302,8 @@ static int vcn_v4_0_sw_fini(struct amdgpu_ip_block *ip_block)
amdgpu_vcn_sysfs_reset_mask_fini(adev);
- for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
- r = amdgpu_vcn_sw_fini(adev, i);
- if (r)
- return r;
- }
+ for (i = 0; i < adev->vcn.num_vcn_inst; i++)
+ amdgpu_vcn_sw_fini(adev, i);
return 0;
}
@@ -1000,7 +997,7 @@ static int vcn_v4_0_start_dpg_mode(struct amdgpu_vcn_inst *vinst, bool indirect)
{
struct amdgpu_device *adev = vinst->adev;
int inst_idx = vinst->inst;
- volatile struct amdgpu_vcn4_fw_shared *fw_shared = adev->vcn.inst[inst_idx].fw_shared.cpu_addr;
+ struct amdgpu_vcn4_fw_shared *fw_shared = adev->vcn.inst[inst_idx].fw_shared.cpu_addr;
struct amdgpu_ring *ring;
uint32_t tmp;
int ret;
@@ -1140,7 +1137,7 @@ static int vcn_v4_0_start(struct amdgpu_vcn_inst *vinst)
{
struct amdgpu_device *adev = vinst->adev;
int i = vinst->inst;
- volatile struct amdgpu_vcn4_fw_shared *fw_shared;
+ struct amdgpu_vcn4_fw_shared *fw_shared;
struct amdgpu_ring *ring;
uint32_t tmp;
int j, k, r;
@@ -1357,8 +1354,8 @@ static int vcn_v4_0_start_sriov(struct amdgpu_device *adev)
struct mmsch_v4_0_cmd_end end = { {0} };
struct mmsch_v4_0_init_header header;
- volatile struct amdgpu_vcn4_fw_shared *fw_shared;
- volatile struct amdgpu_fw_shared_rb_setup *rb_setup;
+ struct amdgpu_vcn4_fw_shared *fw_shared;
+ struct amdgpu_fw_shared_rb_setup *rb_setup;
direct_wt.cmd_header.command_type =
MMSCH_COMMAND__DIRECT_REG_WRITE;
@@ -1609,7 +1606,7 @@ static int vcn_v4_0_stop(struct amdgpu_vcn_inst *vinst)
{
struct amdgpu_device *adev = vinst->adev;
int i = vinst->inst;
- volatile struct amdgpu_vcn4_fw_shared *fw_shared;
+ struct amdgpu_vcn4_fw_shared *fw_shared;
uint32_t tmp;
int r = 0;
@@ -1980,7 +1977,7 @@ static struct amdgpu_ring_funcs vcn_v4_0_unified_ring_vm_funcs = {
.type = AMDGPU_RING_TYPE_VCN_ENC,
.align_mask = 0x3f,
.nop = VCN_ENC_CMD_NO_OP,
- .extra_dw = sizeof(struct amdgpu_vcn_rb_metadata),
+ .extra_bytes = sizeof(struct amdgpu_vcn_rb_metadata),
.get_rptr = vcn_v4_0_unified_ring_get_rptr,
.get_wptr = vcn_v4_0_unified_ring_get_wptr,
.set_wptr = vcn_v4_0_unified_ring_set_wptr,
diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c
index 7b93a275ec4f..eacf4e93ba2f 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c
@@ -212,7 +212,11 @@ static int vcn_v4_0_3_sw_init(struct amdgpu_ip_block *ip_block)
ring->vm_hub = AMDGPU_MMHUB0(adev->vcn.inst[i].aid_id);
sprintf(ring->name, "vcn_unified_%d", adev->vcn.inst[i].aid_id);
- r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst->irq, 0,
+
+ /* There are no per-instance irq source IDs on 4.0.3, the IH
+ * packets use a separate field to differentiate instances.
+ */
+ r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst[0].irq, 0,
AMDGPU_RING_PRIO_DEFAULT,
&adev->vcn.inst[i].sched_score);
if (r)
@@ -259,7 +263,7 @@ static int vcn_v4_0_3_sw_fini(struct amdgpu_ip_block *ip_block)
if (drm_dev_enter(&adev->ddev, &idx)) {
for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
- volatile struct amdgpu_vcn4_fw_shared *fw_shared;
+ struct amdgpu_vcn4_fw_shared *fw_shared;
fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr;
fw_shared->present_flag_0 = 0;
@@ -279,11 +283,8 @@ static int vcn_v4_0_3_sw_fini(struct amdgpu_ip_block *ip_block)
amdgpu_vcn_sysfs_reset_mask_fini(adev);
- for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
- r = amdgpu_vcn_sw_fini(adev, i);
- if (r)
- return r;
- }
+ for (i = 0; i < adev->vcn.num_vcn_inst; i++)
+ amdgpu_vcn_sw_fini(adev, i);
return 0;
}
@@ -844,7 +845,7 @@ static int vcn_v4_0_3_start_dpg_mode(struct amdgpu_vcn_inst *vinst,
{
struct amdgpu_device *adev = vinst->adev;
int inst_idx = vinst->inst;
- volatile struct amdgpu_vcn4_fw_shared *fw_shared =
+ struct amdgpu_vcn4_fw_shared *fw_shared =
adev->vcn.inst[inst_idx].fw_shared.cpu_addr;
struct amdgpu_ring *ring;
int vcn_inst, ret;
@@ -1011,8 +1012,8 @@ static int vcn_v4_0_3_start_sriov(struct amdgpu_device *adev)
struct mmsch_v4_0_cmd_end end = { {0} };
struct mmsch_v4_0_3_init_header header;
- volatile struct amdgpu_vcn4_fw_shared *fw_shared;
- volatile struct amdgpu_fw_shared_rb_setup *rb_setup;
+ struct amdgpu_vcn4_fw_shared *fw_shared;
+ struct amdgpu_fw_shared_rb_setup *rb_setup;
direct_wt.cmd_header.command_type =
MMSCH_COMMAND__DIRECT_REG_WRITE;
@@ -1186,7 +1187,7 @@ static int vcn_v4_0_3_start(struct amdgpu_vcn_inst *vinst)
{
struct amdgpu_device *adev = vinst->adev;
int i = vinst->inst;
- volatile struct amdgpu_vcn4_fw_shared *fw_shared;
+ struct amdgpu_vcn4_fw_shared *fw_shared;
struct amdgpu_ring *ring;
int j, k, r, vcn_inst;
uint32_t tmp;
@@ -1396,7 +1397,7 @@ static int vcn_v4_0_3_stop(struct amdgpu_vcn_inst *vinst)
{
struct amdgpu_device *adev = vinst->adev;
int i = vinst->inst;
- volatile struct amdgpu_vcn4_fw_shared *fw_shared;
+ struct amdgpu_vcn4_fw_shared *fw_shared;
int r = 0, vcn_inst;
uint32_t tmp;
diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c
index 6dbf33b26ee2..b107ee80e472 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c
@@ -149,7 +149,7 @@ static int vcn_v4_0_5_sw_init(struct amdgpu_ip_block *ip_block)
int i, r;
for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
- volatile struct amdgpu_vcn4_fw_shared *fw_shared;
+ struct amdgpu_vcn4_fw_shared *fw_shared;
if (adev->vcn.harvest_config & (1 << i))
continue;
@@ -249,7 +249,7 @@ static int vcn_v4_0_5_sw_fini(struct amdgpu_ip_block *ip_block)
if (drm_dev_enter(adev_to_drm(adev), &idx)) {
for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
- volatile struct amdgpu_vcn4_fw_shared *fw_shared;
+ struct amdgpu_vcn4_fw_shared *fw_shared;
if (adev->vcn.harvest_config & (1 << i))
continue;
@@ -270,9 +270,7 @@ static int vcn_v4_0_5_sw_fini(struct amdgpu_ip_block *ip_block)
if (r)
return r;
- r = amdgpu_vcn_sw_fini(adev, i);
- if (r)
- return r;
+ amdgpu_vcn_sw_fini(adev, i);
}
return 0;
@@ -912,7 +910,7 @@ static int vcn_v4_0_5_start_dpg_mode(struct amdgpu_vcn_inst *vinst,
{
struct amdgpu_device *adev = vinst->adev;
int inst_idx = vinst->inst;
- volatile struct amdgpu_vcn4_fw_shared *fw_shared = adev->vcn.inst[inst_idx].fw_shared.cpu_addr;
+ struct amdgpu_vcn4_fw_shared *fw_shared = adev->vcn.inst[inst_idx].fw_shared.cpu_addr;
struct amdgpu_ring *ring;
uint32_t tmp;
int ret;
@@ -1049,7 +1047,7 @@ static int vcn_v4_0_5_start(struct amdgpu_vcn_inst *vinst)
{
struct amdgpu_device *adev = vinst->adev;
int i = vinst->inst;
- volatile struct amdgpu_vcn4_fw_shared *fw_shared;
+ struct amdgpu_vcn4_fw_shared *fw_shared;
struct amdgpu_ring *ring;
uint32_t tmp;
int j, k, r;
@@ -1268,7 +1266,7 @@ static int vcn_v4_0_5_stop(struct amdgpu_vcn_inst *vinst)
{
struct amdgpu_device *adev = vinst->adev;
int i = vinst->inst;
- volatile struct amdgpu_vcn4_fw_shared *fw_shared;
+ struct amdgpu_vcn4_fw_shared *fw_shared;
uint32_t tmp;
int r = 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c
index 536f06b81706..0202df5db1e1 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c
@@ -129,7 +129,7 @@ static int vcn_v5_0_0_sw_init(struct amdgpu_ip_block *ip_block)
int i, r;
for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
- volatile struct amdgpu_vcn5_fw_shared *fw_shared;
+ struct amdgpu_vcn5_fw_shared *fw_shared;
if (adev->vcn.harvest_config & (1 << i))
continue;
@@ -211,7 +211,7 @@ static int vcn_v5_0_0_sw_fini(struct amdgpu_ip_block *ip_block)
if (drm_dev_enter(adev_to_drm(adev), &idx)) {
for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
- volatile struct amdgpu_vcn5_fw_shared *fw_shared;
+ struct amdgpu_vcn5_fw_shared *fw_shared;
if (adev->vcn.harvest_config & (1 << i))
continue;
@@ -232,11 +232,8 @@ static int vcn_v5_0_0_sw_fini(struct amdgpu_ip_block *ip_block)
amdgpu_vcn_sysfs_reset_mask_fini(adev);
- for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
- r = amdgpu_vcn_sw_fini(adev, i);
- if (r)
- return r;
- }
+ for (i = 0; i < adev->vcn.num_vcn_inst; i++)
+ amdgpu_vcn_sw_fini(adev, i);
return 0;
}
@@ -695,7 +692,7 @@ static int vcn_v5_0_0_start_dpg_mode(struct amdgpu_vcn_inst *vinst,
{
struct amdgpu_device *adev = vinst->adev;
int inst_idx = vinst->inst;
- volatile struct amdgpu_vcn5_fw_shared *fw_shared = adev->vcn.inst[inst_idx].fw_shared.cpu_addr;
+ struct amdgpu_vcn5_fw_shared *fw_shared = adev->vcn.inst[inst_idx].fw_shared.cpu_addr;
struct amdgpu_ring *ring;
uint32_t tmp;
int ret;
@@ -805,7 +802,7 @@ static int vcn_v5_0_0_start(struct amdgpu_vcn_inst *vinst)
{
struct amdgpu_device *adev = vinst->adev;
int i = vinst->inst;
- volatile struct amdgpu_vcn5_fw_shared *fw_shared;
+ struct amdgpu_vcn5_fw_shared *fw_shared;
struct amdgpu_ring *ring;
uint32_t tmp;
int j, k, r;
@@ -998,7 +995,7 @@ static int vcn_v5_0_0_stop(struct amdgpu_vcn_inst *vinst)
{
struct amdgpu_device *adev = vinst->adev;
int i = vinst->inst;
- volatile struct amdgpu_vcn5_fw_shared *fw_shared;
+ struct amdgpu_vcn5_fw_shared *fw_shared;
uint32_t tmp;
int r = 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_1.c b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_1.c
index 4b01e35ad7ef..714350cabf2f 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_1.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_1.c
@@ -113,6 +113,25 @@ static int vcn_v5_0_1_early_init(struct amdgpu_ip_block *ip_block)
return 0;
}
+static int vcn_v5_0_1_late_init(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+
+ adev->vcn.supported_reset =
+ amdgpu_get_soft_full_reset_mask(&adev->vcn.inst[0].ring_enc[0]);
+
+ switch (amdgpu_ip_version(adev, MP0_HWIP, 0)) {
+ case IP_VERSION(13, 0, 12):
+ if ((adev->psp.sos.fw_version >= 0x00450025) && amdgpu_dpm_reset_vcn_is_supported(adev))
+ adev->vcn.supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
+ break;
+ default:
+ break;
+ }
+
+ return 0;
+}
+
static void vcn_v5_0_1_fw_shared_init(struct amdgpu_device *adev, int inst_idx)
{
struct amdgpu_vcn5_fw_shared *fw_shared;
@@ -187,10 +206,6 @@ static int vcn_v5_0_1_sw_init(struct amdgpu_ip_block *ip_block)
vcn_v5_0_1_fw_shared_init(adev, i);
}
- /* TODO: Add queue reset mask when FW fully supports it */
- adev->vcn.supported_reset =
- amdgpu_get_soft_full_reset_mask(&adev->vcn.inst[0].ring_enc[0]);
-
if (amdgpu_sriov_vf(adev)) {
r = amdgpu_virt_alloc_mm_table(adev);
if (r)
@@ -226,7 +241,7 @@ static int vcn_v5_0_1_sw_fini(struct amdgpu_ip_block *ip_block)
if (drm_dev_enter(adev_to_drm(adev), &idx)) {
for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
- volatile struct amdgpu_vcn5_fw_shared *fw_shared;
+ struct amdgpu_vcn5_fw_shared *fw_shared;
fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr;
fw_shared->present_flag_0 = 0;
@@ -245,14 +260,28 @@ static int vcn_v5_0_1_sw_fini(struct amdgpu_ip_block *ip_block)
return r;
}
- for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
- r = amdgpu_vcn_sw_fini(adev, i);
- if (r)
- return r;
- }
-
amdgpu_vcn_sysfs_reset_mask_fini(adev);
+ for (i = 0; i < adev->vcn.num_vcn_inst; i++)
+ amdgpu_vcn_sw_fini(adev, i);
+
+ return 0;
+}
+
+static int vcn_v5_0_1_hw_init_inst(struct amdgpu_device *adev, int i)
+{
+ struct amdgpu_ring *ring;
+ int vcn_inst;
+
+ vcn_inst = GET_INST(VCN, i);
+ ring = &adev->vcn.inst[i].ring_enc[0];
+
+ if (ring->use_doorbell)
+ adev->nbio.funcs->vcn_doorbell_range(adev, ring->use_doorbell,
+ ((adev->doorbell_index.vcn.vcn_ring0_1 << 1) +
+ 11 * vcn_inst),
+ adev->vcn.inst[i].aid_id);
+
return 0;
}
@@ -267,7 +296,7 @@ static int vcn_v5_0_1_hw_init(struct amdgpu_ip_block *ip_block)
{
struct amdgpu_device *adev = ip_block->adev;
struct amdgpu_ring *ring;
- int i, r, vcn_inst;
+ int i, r;
if (amdgpu_sriov_vf(adev)) {
r = vcn_v5_0_1_start_sriov(adev);
@@ -285,14 +314,8 @@ static int vcn_v5_0_1_hw_init(struct amdgpu_ip_block *ip_block)
if (RREG32_SOC15(VCN, GET_INST(VCN, 0), regVCN_RRMT_CNTL) & 0x100)
adev->vcn.caps |= AMDGPU_VCN_CAPS(RRMT_ENABLED);
for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
- vcn_inst = GET_INST(VCN, i);
ring = &adev->vcn.inst[i].ring_enc[0];
-
- if (ring->use_doorbell)
- adev->nbio.funcs->vcn_doorbell_range(adev, ring->use_doorbell,
- ((adev->doorbell_index.vcn.vcn_ring0_1 << 1) +
- 11 * vcn_inst),
- adev->vcn.inst[i].aid_id);
+ vcn_v5_0_1_hw_init_inst(adev, i);
/* Re-init fw_shared, if required */
vcn_v5_0_1_fw_shared_init(adev, i);
@@ -643,7 +666,7 @@ static int vcn_v5_0_1_start_dpg_mode(struct amdgpu_vcn_inst *vinst,
{
struct amdgpu_device *adev = vinst->adev;
int inst_idx = vinst->inst;
- volatile struct amdgpu_vcn5_fw_shared *fw_shared =
+ struct amdgpu_vcn5_fw_shared *fw_shared =
adev->vcn.inst[inst_idx].fw_shared.cpu_addr;
struct amdgpu_ring *ring;
struct dpg_pause_state state = {.fw_based = VCN_DPG_STATE__PAUSE};
@@ -779,8 +802,8 @@ static int vcn_v5_0_1_start_sriov(struct amdgpu_device *adev)
struct mmsch_v5_0_cmd_end end = { {0} };
struct mmsch_v5_0_init_header header;
- volatile struct amdgpu_vcn5_fw_shared *fw_shared;
- volatile struct amdgpu_fw_shared_rb_setup *rb_setup;
+ struct amdgpu_vcn5_fw_shared *fw_shared;
+ struct amdgpu_fw_shared_rb_setup *rb_setup;
direct_wt.cmd_header.command_type =
MMSCH_COMMAND__DIRECT_REG_WRITE;
@@ -954,7 +977,7 @@ static int vcn_v5_0_1_start(struct amdgpu_vcn_inst *vinst)
{
struct amdgpu_device *adev = vinst->adev;
int i = vinst->inst;
- volatile struct amdgpu_vcn5_fw_shared *fw_shared;
+ struct amdgpu_vcn5_fw_shared *fw_shared;
struct amdgpu_ring *ring;
uint32_t tmp;
int j, k, r, vcn_inst;
@@ -1146,7 +1169,7 @@ static int vcn_v5_0_1_stop(struct amdgpu_vcn_inst *vinst)
{
struct amdgpu_device *adev = vinst->adev;
int i = vinst->inst;
- volatile struct amdgpu_vcn5_fw_shared *fw_shared;
+ struct amdgpu_vcn5_fw_shared *fw_shared;
uint32_t tmp;
int r = 0, vcn_inst;
@@ -1276,6 +1299,31 @@ static void vcn_v5_0_1_unified_ring_set_wptr(struct amdgpu_ring *ring)
}
}
+static int vcn_v5_0_1_ring_reset(struct amdgpu_ring *ring,
+ unsigned int vmid,
+ struct amdgpu_fence *timedout_fence)
+{
+ int r = 0;
+ int vcn_inst;
+ struct amdgpu_device *adev = ring->adev;
+ struct amdgpu_vcn_inst *vinst = &adev->vcn.inst[ring->me];
+
+ amdgpu_ring_reset_helper_begin(ring, timedout_fence);
+
+ vcn_inst = GET_INST(VCN, ring->me);
+ r = amdgpu_dpm_reset_vcn(adev, 1 << vcn_inst);
+
+ if (r) {
+ DRM_DEV_ERROR(adev->dev, "VCN reset fail : %d\n", r);
+ return r;
+ }
+
+ vcn_v5_0_1_hw_init_inst(adev, ring->me);
+ vcn_v5_0_1_start_dpg_mode(vinst, vinst->indirect_sram);
+
+ return amdgpu_ring_reset_helper_end(ring, timedout_fence);
+}
+
static const struct amdgpu_ring_funcs vcn_v5_0_1_unified_ring_vm_funcs = {
.type = AMDGPU_RING_TYPE_VCN_ENC,
.align_mask = 0x3f,
@@ -1304,6 +1352,7 @@ static const struct amdgpu_ring_funcs vcn_v5_0_1_unified_ring_vm_funcs = {
.emit_wreg = vcn_v4_0_3_enc_ring_emit_wreg,
.emit_reg_wait = vcn_v4_0_3_enc_ring_emit_reg_wait,
.emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
+ .reset = vcn_v5_0_1_ring_reset,
};
/**
@@ -1507,7 +1556,7 @@ static void vcn_v5_0_1_set_irq_funcs(struct amdgpu_device *adev)
static const struct amd_ip_funcs vcn_v5_0_1_ip_funcs = {
.name = "vcn_v5_0_1",
.early_init = vcn_v5_0_1_early_init,
- .late_init = NULL,
+ .late_init = vcn_v5_0_1_late_init,
.sw_init = vcn_v5_0_1_sw_init,
.sw_fini = vcn_v5_0_1_sw_fini,
.hw_init = vcn_v5_0_1_hw_init,
diff --git a/drivers/gpu/drm/amd/amdgpu/vi.c b/drivers/gpu/drm/amd/amdgpu/vi.c
index 9b3510e53112..a611a7345125 100644
--- a/drivers/gpu/drm/amd/amdgpu/vi.c
+++ b/drivers/gpu/drm/amd/amdgpu/vi.c
@@ -67,7 +67,6 @@
#include "sdma_v2_4.h"
#include "sdma_v3_0.h"
#include "dce_v10_0.h"
-#include "dce_v11_0.h"
#include "iceland_ih.h"
#include "tonga_ih.h"
#include "cz_ih.h"
@@ -2124,8 +2123,6 @@ int vi_set_ip_blocks(struct amdgpu_device *adev)
else if (amdgpu_device_has_dc_support(adev))
amdgpu_device_ip_block_add(adev, &dm_ip_block);
#endif
- else
- amdgpu_device_ip_block_add(adev, &dce_v11_2_ip_block);
amdgpu_device_ip_block_add(adev, &uvd_v6_3_ip_block);
amdgpu_device_ip_block_add(adev, &vce_v3_4_ip_block);
break;
@@ -2142,8 +2139,6 @@ int vi_set_ip_blocks(struct amdgpu_device *adev)
else if (amdgpu_device_has_dc_support(adev))
amdgpu_device_ip_block_add(adev, &dm_ip_block);
#endif
- else
- amdgpu_device_ip_block_add(adev, &dce_v11_0_ip_block);
amdgpu_device_ip_block_add(adev, &uvd_v6_0_ip_block);
amdgpu_device_ip_block_add(adev, &vce_v3_1_ip_block);
#if defined(CONFIG_DRM_AMD_ACP)
@@ -2163,8 +2158,6 @@ int vi_set_ip_blocks(struct amdgpu_device *adev)
else if (amdgpu_device_has_dc_support(adev))
amdgpu_device_ip_block_add(adev, &dm_ip_block);
#endif
- else
- amdgpu_device_ip_block_add(adev, &dce_v11_0_ip_block);
amdgpu_device_ip_block_add(adev, &uvd_v6_2_ip_block);
amdgpu_device_ip_block_add(adev, &vce_v3_4_ip_block);
#if defined(CONFIG_DRM_AMD_ACP)
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
index 8535a52a62ca..0f0719528bcc 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -521,15 +521,10 @@ static int kfd_ioctl_set_cu_mask(struct file *filp, struct kfd_process *p,
cu_mask_size = sizeof(uint32_t) * (max_num_cus/32);
}
- minfo.cu_mask.ptr = kzalloc(cu_mask_size, GFP_KERNEL);
- if (!minfo.cu_mask.ptr)
- return -ENOMEM;
-
- retval = copy_from_user(minfo.cu_mask.ptr, cu_mask_ptr, cu_mask_size);
- if (retval) {
+ minfo.cu_mask.ptr = memdup_user(cu_mask_ptr, cu_mask_size);
+ if (IS_ERR(minfo.cu_mask.ptr)) {
pr_debug("Could not copy CU mask from userspace");
- retval = -EFAULT;
- goto out;
+ return PTR_ERR(minfo.cu_mask.ptr);
}
mutex_lock(&p->mutex);
@@ -538,7 +533,6 @@ static int kfd_ioctl_set_cu_mask(struct file *filp, struct kfd_process *p,
mutex_unlock(&p->mutex);
-out:
kfree(minfo.cu_mask.ptr);
return retval;
}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
index 7e749f9b6d69..e9cfb80bd436 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
@@ -495,6 +495,7 @@ struct kfd_dev *kgd2kfd_probe(struct amdgpu_device *adev, bool vf)
mutex_init(&kfd->doorbell_mutex);
ida_init(&kfd->doorbell_ida);
+ atomic_set(&kfd->kfd_processes_count, 0);
return kfd;
}
@@ -1133,7 +1134,15 @@ void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry)
}
for (i = 0; i < kfd->num_nodes; i++) {
- node = kfd->nodes[i];
+ /* Race if another thread in b/w
+ * kfd_cleanup_nodes and kfree(kfd),
+ * when kfd->nodes[i] = NULL
+ */
+ if (kfd->nodes[i])
+ node = kfd->nodes[i];
+ else
+ return;
+
spin_lock_irqsave(&node->interrupt_lock, flags);
if (node->interrupts_active
@@ -1485,6 +1494,15 @@ int kgd2kfd_check_and_lock_kfd(struct kfd_dev *kfd)
mutex_lock(&kfd_processes_mutex);
+ /* kfd_processes_count is per kfd_dev, return -EBUSY without
+ * further check
+ */
+ if (!!atomic_read(&kfd->kfd_processes_count)) {
+ pr_debug("process_wq_release not finished\n");
+ r = -EBUSY;
+ goto out;
+ }
+
if (hash_empty(kfd_processes_table) && !kfd_is_locked(kfd))
goto out;
@@ -1550,6 +1568,25 @@ int kgd2kfd_start_sched(struct kfd_dev *kfd, uint32_t node_id)
return ret;
}
+int kgd2kfd_start_sched_all_nodes(struct kfd_dev *kfd)
+{
+ struct kfd_node *node;
+ int i, r;
+
+ if (!kfd->init_complete)
+ return 0;
+
+ for (i = 0; i < kfd->num_nodes; i++) {
+ node = kfd->nodes[i];
+ r = node->dqm->ops.unhalt(node->dqm);
+ if (r) {
+ dev_err(kfd_device, "Error in starting scheduler\n");
+ return r;
+ }
+ }
+ return 0;
+}
+
int kgd2kfd_stop_sched(struct kfd_dev *kfd, uint32_t node_id)
{
struct kfd_node *node;
@@ -1567,6 +1604,23 @@ int kgd2kfd_stop_sched(struct kfd_dev *kfd, uint32_t node_id)
return node->dqm->ops.halt(node->dqm);
}
+int kgd2kfd_stop_sched_all_nodes(struct kfd_dev *kfd)
+{
+ struct kfd_node *node;
+ int i, r;
+
+ if (!kfd->init_complete)
+ return 0;
+
+ for (i = 0; i < kfd->num_nodes; i++) {
+ node = kfd->nodes[i];
+ r = node->dqm->ops.halt(node->dqm);
+ if (r)
+ return r;
+ }
+ return 0;
+}
+
bool kgd2kfd_compute_active(struct kfd_dev *kfd, uint32_t node_id)
{
struct kfd_node *node;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
index 86315ecb6f1d..59a5a3fea65d 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
@@ -39,22 +39,22 @@
#endif
#define dev_fmt(fmt) "kfd_migrate: " fmt
-static uint64_t
-svm_migrate_direct_mapping_addr(struct amdgpu_device *adev, uint64_t addr)
+static u64
+svm_migrate_direct_mapping_addr(struct amdgpu_device *adev, u64 addr)
{
return addr + amdgpu_ttm_domain_start(adev, TTM_PL_VRAM);
}
static int
-svm_migrate_gart_map(struct amdgpu_ring *ring, uint64_t npages,
- dma_addr_t *addr, uint64_t *gart_addr, uint64_t flags)
+svm_migrate_gart_map(struct amdgpu_ring *ring, u64 npages,
+ dma_addr_t *addr, u64 *gart_addr, u64 flags)
{
struct amdgpu_device *adev = ring->adev;
struct amdgpu_job *job;
unsigned int num_dw, num_bytes;
struct dma_fence *fence;
- uint64_t src_addr, dst_addr;
- uint64_t pte_flags;
+ u64 src_addr, dst_addr;
+ u64 pte_flags;
void *cpu_addr;
int r;
@@ -123,15 +123,15 @@ svm_migrate_gart_map(struct amdgpu_ring *ring, uint64_t npages,
static int
svm_migrate_copy_memory_gart(struct amdgpu_device *adev, dma_addr_t *sys,
- uint64_t *vram, uint64_t npages,
+ u64 *vram, u64 npages,
enum MIGRATION_COPY_DIR direction,
struct dma_fence **mfence)
{
- const uint64_t GTT_MAX_PAGES = AMDGPU_GTT_MAX_TRANSFER_SIZE;
+ const u64 GTT_MAX_PAGES = AMDGPU_GTT_MAX_TRANSFER_SIZE;
struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
- uint64_t gart_s, gart_d;
+ u64 gart_s, gart_d;
struct dma_fence *next;
- uint64_t size;
+ u64 size;
int r;
mutex_lock(&adev->mman.gtt_window_lock);
@@ -261,30 +261,42 @@ static void svm_migrate_put_sys_page(unsigned long addr)
put_page(page);
}
-static long
+static unsigned long svm_migrate_successful_pages(struct migrate_vma *migrate)
+{
+ unsigned long mpages = 0;
+ unsigned long i;
+
+ for (i = 0; i < migrate->npages; i++) {
+ if (migrate->dst[i] & MIGRATE_PFN_VALID &&
+ migrate->src[i] & MIGRATE_PFN_MIGRATE)
+ mpages++;
+ }
+ return mpages;
+}
+
+static int
svm_migrate_copy_to_vram(struct kfd_node *node, struct svm_range *prange,
struct migrate_vma *migrate, struct dma_fence **mfence,
- dma_addr_t *scratch, uint64_t ttm_res_offset)
+ dma_addr_t *scratch, u64 ttm_res_offset)
{
- uint64_t npages = migrate->npages;
+ u64 npages = migrate->npages;
struct amdgpu_device *adev = node->adev;
struct device *dev = adev->dev;
struct amdgpu_res_cursor cursor;
- long mpages;
+ u64 mpages = 0;
dma_addr_t *src;
- uint64_t *dst;
- uint64_t i, j;
+ u64 *dst;
+ u64 i, j;
int r;
pr_debug("svms 0x%p [0x%lx 0x%lx 0x%llx]\n", prange->svms, prange->start,
prange->last, ttm_res_offset);
src = scratch;
- dst = (uint64_t *)(scratch + npages);
+ dst = (u64 *)(scratch + npages);
amdgpu_res_first(prange->ttm_res, ttm_res_offset,
npages << PAGE_SHIFT, &cursor);
- mpages = 0;
for (i = j = 0; (i < npages) && (mpages < migrate->cpages); i++) {
struct page *spage;
@@ -345,14 +357,13 @@ svm_migrate_copy_to_vram(struct kfd_node *node, struct svm_range *prange,
out_free_vram_pages:
if (r) {
pr_debug("failed %d to copy memory to vram\n", r);
- while (i-- && mpages) {
+ for (i = 0; i < npages && mpages; i++) {
if (!dst[i])
continue;
svm_migrate_put_vram_page(adev, dst[i]);
migrate->dst[i] = 0;
mpages--;
}
- mpages = r;
}
#ifdef DEBUG_FORCE_MIXED_DOMAINS
@@ -370,22 +381,22 @@ out_free_vram_pages:
}
#endif
- return mpages;
+ return r;
}
static long
svm_migrate_vma_to_vram(struct kfd_node *node, struct svm_range *prange,
- struct vm_area_struct *vma, uint64_t start,
- uint64_t end, uint32_t trigger, uint64_t ttm_res_offset)
+ struct vm_area_struct *vma, u64 start,
+ u64 end, uint32_t trigger, u64 ttm_res_offset)
{
struct kfd_process *p = container_of(prange->svms, struct kfd_process, svms);
- uint64_t npages = (end - start) >> PAGE_SHIFT;
+ u64 npages = (end - start) >> PAGE_SHIFT;
struct amdgpu_device *adev = node->adev;
struct kfd_process_device *pdd;
struct dma_fence *mfence = NULL;
struct migrate_vma migrate = { 0 };
unsigned long cpages = 0;
- long mpages = 0;
+ unsigned long mpages = 0;
dma_addr_t *scratch;
void *buf;
int r = -ENOMEM;
@@ -398,7 +409,7 @@ svm_migrate_vma_to_vram(struct kfd_node *node, struct svm_range *prange,
migrate.pgmap_owner = SVM_ADEV_PGMAP_OWNER(adev);
buf = kvcalloc(npages,
- 2 * sizeof(*migrate.src) + sizeof(uint64_t) + sizeof(dma_addr_t),
+ 2 * sizeof(*migrate.src) + sizeof(u64) + sizeof(dma_addr_t),
GFP_KERNEL);
if (!buf)
goto out;
@@ -431,17 +442,15 @@ svm_migrate_vma_to_vram(struct kfd_node *node, struct svm_range *prange,
else
pr_debug("0x%lx pages collected\n", cpages);
- mpages = svm_migrate_copy_to_vram(node, prange, &migrate, &mfence, scratch, ttm_res_offset);
+ r = svm_migrate_copy_to_vram(node, prange, &migrate, &mfence, scratch, ttm_res_offset);
migrate_vma_pages(&migrate);
svm_migrate_copy_done(adev, mfence);
migrate_vma_finalize(&migrate);
- if (mpages >= 0)
- pr_debug("migrated/collected/requested 0x%lx/0x%lx/0x%lx\n",
- mpages, cpages, migrate.npages);
- else
- r = mpages;
+ mpages = svm_migrate_successful_pages(&migrate);
+ pr_debug("migrated/collected/requested 0x%lx/0x%lx/0x%lx\n",
+ mpages, cpages, migrate.npages);
svm_range_dma_unmap_dev(adev->dev, scratch, 0, npages);
@@ -451,13 +460,14 @@ out_free:
start >> PAGE_SHIFT, end >> PAGE_SHIFT,
0, node->id, trigger, r);
out:
- if (!r && mpages > 0) {
+ if (!r && mpages) {
pdd = svm_range_get_pdd_by_node(prange, node);
if (pdd)
WRITE_ONCE(pdd->page_in, pdd->page_in + mpages);
- }
- return r ? r : mpages;
+ return mpages;
+ }
+ return r;
}
/**
@@ -481,7 +491,7 @@ svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc,
{
unsigned long addr, start, end;
struct vm_area_struct *vma;
- uint64_t ttm_res_offset;
+ u64 ttm_res_offset;
struct kfd_node *node;
unsigned long mpages = 0;
long r = 0;
@@ -568,18 +578,17 @@ static void svm_migrate_page_free(struct page *page)
}
}
-static long
+static int
svm_migrate_copy_to_ram(struct amdgpu_device *adev, struct svm_range *prange,
struct migrate_vma *migrate, struct dma_fence **mfence,
- dma_addr_t *scratch, uint64_t npages)
+ dma_addr_t *scratch, u64 npages)
{
struct device *dev = adev->dev;
- uint64_t *src;
+ u64 *src;
dma_addr_t *dst;
struct page *dpage;
- long mpages;
- uint64_t i = 0, j;
- uint64_t addr;
+ u64 i = 0, j;
+ u64 addr;
int r = 0;
pr_debug("svms 0x%p [0x%lx 0x%lx]\n", prange->svms, prange->start,
@@ -587,10 +596,9 @@ svm_migrate_copy_to_ram(struct amdgpu_device *adev, struct svm_range *prange,
addr = migrate->start;
- src = (uint64_t *)(scratch + npages);
+ src = (u64 *)(scratch + npages);
dst = scratch;
- mpages = 0;
for (i = 0, j = 0; i < npages; i++, addr += PAGE_SIZE) {
struct page *spage;
@@ -639,7 +647,6 @@ svm_migrate_copy_to_ram(struct amdgpu_device *adev, struct svm_range *prange,
dst[i] >> PAGE_SHIFT, page_to_pfn(dpage));
migrate->dst[i] = migrate_pfn(page_to_pfn(dpage));
- mpages++;
j++;
}
@@ -649,17 +656,13 @@ svm_migrate_copy_to_ram(struct amdgpu_device *adev, struct svm_range *prange,
out_oom:
if (r) {
pr_debug("failed %d copy to ram\n", r);
- while (i-- && mpages) {
- if (!migrate->dst[i])
- continue;
+ while (i--) {
svm_migrate_put_sys_page(dst[i]);
migrate->dst[i] = 0;
- mpages--;
}
- mpages = r;
}
- return mpages;
+ return r;
}
/**
@@ -681,13 +684,13 @@ out_oom:
*/
static long
svm_migrate_vma_to_ram(struct kfd_node *node, struct svm_range *prange,
- struct vm_area_struct *vma, uint64_t start, uint64_t end,
+ struct vm_area_struct *vma, u64 start, u64 end,
uint32_t trigger, struct page *fault_page)
{
struct kfd_process *p = container_of(prange->svms, struct kfd_process, svms);
- uint64_t npages = (end - start) >> PAGE_SHIFT;
+ u64 npages = (end - start) >> PAGE_SHIFT;
unsigned long cpages = 0;
- long mpages = 0;
+ unsigned long mpages = 0;
struct amdgpu_device *adev = node->adev;
struct kfd_process_device *pdd;
struct dma_fence *mfence = NULL;
@@ -707,7 +710,7 @@ svm_migrate_vma_to_ram(struct kfd_node *node, struct svm_range *prange,
migrate.flags = MIGRATE_VMA_SELECT_DEVICE_PRIVATE;
buf = kvcalloc(npages,
- 2 * sizeof(*migrate.src) + sizeof(uint64_t) + sizeof(dma_addr_t),
+ 2 * sizeof(*migrate.src) + sizeof(u64) + sizeof(dma_addr_t),
GFP_KERNEL);
if (!buf)
goto out;
@@ -741,15 +744,13 @@ svm_migrate_vma_to_ram(struct kfd_node *node, struct svm_range *prange,
else
pr_debug("0x%lx pages collected\n", cpages);
- mpages = svm_migrate_copy_to_ram(adev, prange, &migrate, &mfence,
+ r = svm_migrate_copy_to_ram(adev, prange, &migrate, &mfence,
scratch, npages);
migrate_vma_pages(&migrate);
- if (mpages >= 0)
- pr_debug("migrated/collected/requested 0x%lx/0x%lx/0x%lx\n",
+ mpages = svm_migrate_successful_pages(&migrate);
+ pr_debug("migrated/collected/requested 0x%lx/0x%lx/0x%lx\n",
mpages, cpages, migrate.npages);
- else
- r = mpages;
svm_migrate_copy_done(adev, mfence);
migrate_vma_finalize(&migrate);
@@ -762,7 +763,7 @@ out_free:
start >> PAGE_SHIFT, end >> PAGE_SHIFT,
node->id, 0, trigger, r);
out:
- if (!r && mpages > 0) {
+ if (!r && mpages) {
pdd = svm_range_get_pdd_by_node(prange, node);
if (pdd)
WRITE_ONCE(pdd->page_out, pdd->page_out + mpages);
@@ -846,8 +847,8 @@ int svm_migrate_vram_to_ram(struct svm_range *prange, struct mm_struct *mm,
if (r >= 0) {
WARN_ONCE(prange->vram_pages < mpages,
- "Recorded vram pages(0x%llx) should not be less than migration pages(0x%lx).",
- prange->vram_pages, mpages);
+ "Recorded vram pages(0x%llx) should not be less than migration pages(0x%lx).",
+ prange->vram_pages, mpages);
prange->vram_pages -= mpages;
/* prange does not have vram page set its actual_loc to system
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index d01ef5ac0766..70ef051511bb 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -382,6 +382,8 @@ struct kfd_dev {
/* for dynamic partitioning */
int kfd_dev_lock;
+
+ atomic_t kfd_processes_count;
};
enum kfd_mempool {
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
index 5be28c6c4f6a..ddfe30c13e9d 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
@@ -1088,6 +1088,8 @@ static void kfd_process_destroy_pdds(struct kfd_process *p)
pdd->runtime_inuse = false;
}
+ atomic_dec(&pdd->dev->kfd->kfd_processes_count);
+
kfree(pdd);
p->pdds[i] = NULL;
}
@@ -1649,6 +1651,8 @@ struct kfd_process_device *kfd_create_process_device_data(struct kfd_node *dev,
/* Init idr used for memory handle translation */
idr_init(&pdd->alloc_idr);
+ atomic_inc(&dev->kfd->kfd_processes_count);
+
return pdd;
}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
index 68ba239b2e5d..9d72411c3379 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
@@ -1738,7 +1738,7 @@ static int svm_range_validate_and_map(struct mm_struct *mm,
WRITE_ONCE(p->svms.faulting_task, current);
r = amdgpu_hmm_range_get_pages(&prange->notifier, addr, npages,
- readonly, owner, NULL,
+ readonly, owner,
&hmm_range);
WRITE_ONCE(p->svms.faulting_task, NULL);
if (r)
@@ -3045,6 +3045,8 @@ retry_write_locked:
if (svms->checkpoint_ts[gpuidx] != 0) {
if (amdgpu_ih_ts_after_or_equal(ts, svms->checkpoint_ts[gpuidx])) {
pr_debug("draining retry fault, drop fault 0x%llx\n", addr);
+ if (write_locked)
+ mmap_write_downgrade(mm);
r = -EAGAIN;
goto out_unlock_svms;
} else {
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index 62defeccbb5c..0d03e324d5b9 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -233,6 +233,7 @@ static int amdgpu_dm_encoder_init(struct drm_device *dev,
static int amdgpu_dm_connector_get_modes(struct drm_connector *connector);
+static int amdgpu_dm_atomic_setup_commit(struct drm_atomic_state *state);
static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state);
static int amdgpu_dm_atomic_check(struct drm_device *dev,
@@ -417,8 +418,7 @@ static inline bool update_planes_and_stream_adapter(struct dc *dc,
/*
* Previous frame finished and HW is ready for optimization.
*/
- if (update_type == UPDATE_TYPE_FAST)
- dc_post_update_surfaces_to_stream(dc);
+ dc_post_update_surfaces_to_stream(dc);
return dc_update_planes_and_stream(dc,
array_of_surface_update,
@@ -2000,6 +2000,10 @@ static int amdgpu_dm_init(struct amdgpu_device *adev)
init_data.flags.disable_ips_in_vpb = 0;
+ /* DCN35 and above supports dynamic DTBCLK switch */
+ if (amdgpu_ip_version(adev, DCE_HWIP, 0) >= IP_VERSION(3, 5, 0))
+ init_data.flags.allow_0_dtb_clk = true;
+
/* Enable DWB for tested platforms only */
if (amdgpu_ip_version(adev, DCE_HWIP, 0) >= IP_VERSION(3, 0, 0))
init_data.num_virtual_links = 1;
@@ -2081,6 +2085,8 @@ static int amdgpu_dm_init(struct amdgpu_device *adev)
dc_hardware_init(adev->dm.dc);
+ adev->dm.restore_backlight = true;
+
adev->dm.hpd_rx_offload_wq = hpd_rx_irq_create_workqueue(adev);
if (!adev->dm.hpd_rx_offload_wq) {
drm_err(adev_to_drm(adev), "failed to create hpd rx offload workqueue.\n");
@@ -2945,7 +2951,7 @@ static int dm_oem_i2c_hw_init(struct amdgpu_device *adev)
return -ENOMEM;
}
- r = i2c_add_adapter(&oem_i2c->base);
+ r = devm_i2c_add_adapter(adev->dev, &oem_i2c->base);
if (r) {
drm_info(adev_to_drm(adev), "Failed to register oem i2c\n");
kfree(oem_i2c);
@@ -2957,17 +2963,6 @@ static int dm_oem_i2c_hw_init(struct amdgpu_device *adev)
return 0;
}
-static void dm_oem_i2c_hw_fini(struct amdgpu_device *adev)
-{
- struct amdgpu_display_manager *dm = &adev->dm;
-
- if (dm->oem_i2c) {
- i2c_del_adapter(&dm->oem_i2c->base);
- kfree(dm->oem_i2c);
- dm->oem_i2c = NULL;
- }
-}
-
/**
* dm_hw_init() - Initialize DC device
* @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
@@ -3018,8 +3013,6 @@ static int dm_hw_fini(struct amdgpu_ip_block *ip_block)
{
struct amdgpu_device *adev = ip_block->adev;
- dm_oem_i2c_hw_fini(adev);
-
amdgpu_dm_hpd_fini(adev);
amdgpu_dm_irq_fini(adev);
@@ -3047,14 +3040,20 @@ static void dm_gpureset_toggle_interrupts(struct amdgpu_device *adev,
drm_warn(adev_to_drm(adev), "Failed to %s pflip interrupts\n",
enable ? "enable" : "disable");
- if (enable) {
- if (amdgpu_dm_crtc_vrr_active(to_dm_crtc_state(acrtc->base.state)))
- rc = amdgpu_dm_crtc_set_vupdate_irq(&acrtc->base, true);
- } else
- rc = amdgpu_dm_crtc_set_vupdate_irq(&acrtc->base, false);
-
- if (rc)
- drm_warn(adev_to_drm(adev), "Failed to %sable vupdate interrupt\n", enable ? "en" : "dis");
+ if (dc_supports_vrr(adev->dm.dc->ctx->dce_version)) {
+ if (enable) {
+ if (amdgpu_dm_crtc_vrr_active(
+ to_dm_crtc_state(acrtc->base.state)))
+ rc = amdgpu_dm_crtc_set_vupdate_irq(
+ &acrtc->base, true);
+ } else
+ rc = amdgpu_dm_crtc_set_vupdate_irq(
+ &acrtc->base, false);
+
+ if (rc)
+ drm_warn(adev_to_drm(adev), "Failed to %sable vupdate interrupt\n",
+ enable ? "en" : "dis");
+ }
irq_source = IRQ_TYPE_VBLANK + acrtc->otg_inst;
/* During gpu-reset we disable and then enable vblank irq, so
@@ -3443,6 +3442,7 @@ static int dm_resume(struct amdgpu_ip_block *ip_block)
dc_set_power_state(dm->dc, DC_ACPI_CM_POWER_STATE_D0);
dc_resume(dm->dc);
+ adev->dm.restore_backlight = true;
amdgpu_dm_irq_resume_early(adev);
@@ -3641,7 +3641,7 @@ static const struct drm_mode_config_funcs amdgpu_dm_mode_funcs = {
static struct drm_mode_config_helper_funcs amdgpu_dm_mode_config_helperfuncs = {
.atomic_commit_tail = amdgpu_dm_atomic_commit_tail,
- .atomic_commit_setup = drm_dp_mst_atomic_setup_commit,
+ .atomic_commit_setup = amdgpu_dm_atomic_setup_commit,
};
static void update_connector_ext_caps(struct amdgpu_dm_connector *aconnector)
@@ -4833,6 +4833,16 @@ static void convert_custom_brightness(const struct amdgpu_dm_backlight_caps *cap
if (!caps->data_points)
return;
+ /*
+ * Handle the case where brightness is below the first data point
+ * Interpolate between (0,0) and (first_signal, first_lum)
+ */
+ if (brightness < caps->luminance_data[0].input_signal) {
+ lum = DIV_ROUND_CLOSEST(caps->luminance_data[0].luminance * brightness,
+ caps->luminance_data[0].input_signal);
+ goto scale;
+ }
+
left = 0;
right = caps->data_points - 1;
while (left <= right) {
@@ -6427,6 +6437,10 @@ static void fill_stream_properties_from_drm_display_mode(
&& aconnector
&& aconnector->force_yuv420_output)
timing_out->pixel_encoding = PIXEL_ENCODING_YCBCR420;
+ else if ((connector->display_info.color_formats & DRM_COLOR_FORMAT_YCBCR422)
+ && aconnector
+ && aconnector->force_yuv422_output)
+ timing_out->pixel_encoding = PIXEL_ENCODING_YCBCR422;
else if ((connector->display_info.color_formats & DRM_COLOR_FORMAT_YCBCR444)
&& stream->signal == SIGNAL_TYPE_HDMI_TYPE_A)
timing_out->pixel_encoding = PIXEL_ENCODING_YCBCR444;
@@ -7384,10 +7398,6 @@ static void amdgpu_dm_connector_destroy(struct drm_connector *connector)
drm_dp_cec_unregister_connector(&aconnector->dm_dp_aux.aux);
drm_connector_unregister(connector);
drm_connector_cleanup(connector);
- if (aconnector->i2c) {
- i2c_del_adapter(&aconnector->i2c->base);
- kfree(aconnector->i2c);
- }
kfree(aconnector->dm_dp_aux.aux.name);
kfree(connector);
@@ -7687,6 +7697,7 @@ create_validate_stream_for_sink(struct drm_connector *connector,
bpc_limit = 8;
do {
+ drm_dbg_kms(connector->dev, "Trying with %d bpc\n", requested_bpc);
stream = create_stream_for_sink(connector, drm_mode,
dm_state, old_stream,
requested_bpc);
@@ -7722,16 +7733,41 @@ create_validate_stream_for_sink(struct drm_connector *connector,
} while (stream == NULL && requested_bpc >= bpc_limit);
- if ((dc_result == DC_FAIL_ENC_VALIDATE ||
- dc_result == DC_EXCEED_DONGLE_CAP) &&
- !aconnector->force_yuv420_output) {
- DRM_DEBUG_KMS("%s:%d Retry forcing yuv420 encoding\n",
- __func__, __LINE__);
-
- aconnector->force_yuv420_output = true;
+ switch (dc_result) {
+ /*
+ * If we failed to validate DP bandwidth stream with the requested RGB color depth,
+ * we try to fallback and configure in order:
+ * YUV422 (8bpc, 6bpc)
+ * YUV420 (8bpc, 6bpc)
+ */
+ case DC_FAIL_ENC_VALIDATE:
+ case DC_EXCEED_DONGLE_CAP:
+ case DC_NO_DP_LINK_BANDWIDTH:
+ /* recursively entered twice and already tried both YUV422 and YUV420 */
+ if (aconnector->force_yuv422_output && aconnector->force_yuv420_output)
+ break;
+ /* first failure; try YUV422 */
+ if (!aconnector->force_yuv422_output) {
+ drm_dbg_kms(connector->dev, "%s:%d Validation failed with %d, retrying w/ YUV422\n",
+ __func__, __LINE__, dc_result);
+ aconnector->force_yuv422_output = true;
+ /* recursively entered and YUV422 failed, try YUV420 */
+ } else if (!aconnector->force_yuv420_output) {
+ drm_dbg_kms(connector->dev, "%s:%d Validation failed with %d, retrying w/ YUV420\n",
+ __func__, __LINE__, dc_result);
+ aconnector->force_yuv420_output = true;
+ }
stream = create_validate_stream_for_sink(connector, drm_mode,
- dm_state, old_stream);
+ dm_state, old_stream);
+ aconnector->force_yuv422_output = false;
aconnector->force_yuv420_output = false;
+ break;
+ case DC_OK:
+ break;
+ default:
+ drm_dbg_kms(connector->dev, "%s:%d Unhandled validation failure %d\n",
+ __func__, __LINE__, dc_result);
+ break;
}
return stream;
@@ -8239,6 +8275,10 @@ static void amdgpu_dm_connector_add_common_modes(struct drm_encoder *encoder,
{"1920x1200", 1920, 1200}
};
+ if ((connector->connector_type != DRM_MODE_CONNECTOR_eDP) &&
+ (connector->connector_type != DRM_MODE_CONNECTOR_LVDS))
+ return;
+
n = ARRAY_SIZE(common_modes);
for (i = 0; i < n; i++) {
@@ -8719,7 +8759,7 @@ static int amdgpu_dm_connector_init(struct amdgpu_display_manager *dm,
}
aconnector->i2c = i2c;
- res = i2c_add_adapter(&i2c->base);
+ res = devm_i2c_add_adapter(dm->adev->dev, &i2c->base);
if (res) {
drm_err(adev_to_drm(dm->adev), "Failed to register hw i2c %d\n", link->link_index);
@@ -8817,7 +8857,16 @@ static int amdgpu_dm_encoder_init(struct drm_device *dev,
static void manage_dm_interrupts(struct amdgpu_device *adev,
struct amdgpu_crtc *acrtc,
struct dm_crtc_state *acrtc_state)
-{
+{ /*
+ * We cannot be sure that the frontend index maps to the same
+ * backend index - some even map to more than one.
+ * So we have to go through the CRTC to find the right IRQ.
+ */
+ int irq_type = amdgpu_display_crtc_idx_to_irq_type(
+ adev,
+ acrtc->crtc_id);
+ struct drm_device *dev = adev_to_drm(adev);
+
struct drm_vblank_crtc_config config = {0};
struct dc_crtc_timing *timing;
int offdelay;
@@ -8870,7 +8919,35 @@ static void manage_dm_interrupts(struct amdgpu_device *adev,
drm_crtc_vblank_on_config(&acrtc->base,
&config);
+ /* Allow RX6xxx, RX7700, RX7800 GPUs to call amdgpu_irq_get.*/
+ switch (amdgpu_ip_version(adev, DCE_HWIP, 0)) {
+ case IP_VERSION(3, 0, 0):
+ case IP_VERSION(3, 0, 2):
+ case IP_VERSION(3, 0, 3):
+ case IP_VERSION(3, 2, 0):
+ if (amdgpu_irq_get(adev, &adev->pageflip_irq, irq_type))
+ drm_err(dev, "DM_IRQ: Cannot get pageflip irq!\n");
+#if defined(CONFIG_DRM_AMD_SECURE_DISPLAY)
+ if (amdgpu_irq_get(adev, &adev->vline0_irq, irq_type))
+ drm_err(dev, "DM_IRQ: Cannot get vline0 irq!\n");
+#endif
+ }
+
} else {
+ /* Allow RX6xxx, RX7700, RX7800 GPUs to call amdgpu_irq_put.*/
+ switch (amdgpu_ip_version(adev, DCE_HWIP, 0)) {
+ case IP_VERSION(3, 0, 0):
+ case IP_VERSION(3, 0, 2):
+ case IP_VERSION(3, 0, 3):
+ case IP_VERSION(3, 2, 0):
+#if defined(CONFIG_DRM_AMD_SECURE_DISPLAY)
+ if (amdgpu_irq_put(adev, &adev->vline0_irq, irq_type))
+ drm_err(dev, "DM_IRQ: Cannot put vline0 irq!\n");
+#endif
+ if (amdgpu_irq_put(adev, &adev->pageflip_irq, irq_type))
+ drm_err(dev, "DM_IRQ: Cannot put pageflip irq!\n");
+ }
+
drm_crtc_vblank_off(&acrtc->base);
}
}
@@ -9892,7 +9969,6 @@ static void amdgpu_dm_commit_streams(struct drm_atomic_state *state,
bool mode_set_reset_required = false;
u32 i;
struct dc_commit_streams_params params = {dc_state->streams, dc_state->stream_count};
- bool set_backlight_level = false;
/* Disable writeback */
for_each_old_connector_in_state(state, connector, old_con_state, i) {
@@ -10012,7 +10088,6 @@ static void amdgpu_dm_commit_streams(struct drm_atomic_state *state,
acrtc->hw_mode = new_crtc_state->mode;
crtc->hwmode = new_crtc_state->mode;
mode_set_reset_required = true;
- set_backlight_level = true;
} else if (modereset_required(new_crtc_state)) {
drm_dbg_atomic(dev,
"Atomic commit: RESET. crtc id %d:[%p]\n",
@@ -10069,13 +10144,16 @@ static void amdgpu_dm_commit_streams(struct drm_atomic_state *state,
* to fix a flicker issue.
* It will cause the dm->actual_brightness is not the current panel brightness
* level. (the dm->brightness is the correct panel level)
- * So we set the backlight level with dm->brightness value after set mode
+ * So we set the backlight level with dm->brightness value after initial
+ * set mode. Use restore_backlight flag to avoid setting backlight level
+ * for every subsequent mode set.
*/
- if (set_backlight_level) {
+ if (dm->restore_backlight) {
for (i = 0; i < dm->num_of_edps; i++) {
if (dm->backlight_dev[i])
amdgpu_dm_backlight_set_level(dm, i, dm->brightness[i]);
}
+ dm->restore_backlight = false;
}
}
@@ -10293,6 +10371,39 @@ static void amdgpu_dm_update_hdcp(struct drm_atomic_state *state)
}
}
+static int amdgpu_dm_atomic_setup_commit(struct drm_atomic_state *state)
+{
+ struct drm_crtc *crtc;
+ struct drm_crtc_state *old_crtc_state, *new_crtc_state;
+ struct dm_crtc_state *dm_old_crtc_state, *dm_new_crtc_state;
+ int i, ret;
+
+ ret = drm_dp_mst_atomic_setup_commit(state);
+ if (ret)
+ return ret;
+
+ for_each_oldnew_crtc_in_state(state, crtc, old_crtc_state, new_crtc_state, i) {
+ dm_old_crtc_state = to_dm_crtc_state(old_crtc_state);
+ dm_new_crtc_state = to_dm_crtc_state(new_crtc_state);
+ /*
+ * Color management settings. We also update color properties
+ * when a modeset is needed, to ensure it gets reprogrammed.
+ */
+ if (dm_new_crtc_state->base.active && dm_new_crtc_state->stream &&
+ (dm_new_crtc_state->base.color_mgmt_changed ||
+ dm_old_crtc_state->regamma_tf != dm_new_crtc_state->regamma_tf ||
+ drm_atomic_crtc_needs_modeset(new_crtc_state))) {
+ ret = amdgpu_dm_update_crtc_color_mgmt(dm_new_crtc_state);
+ if (ret) {
+ drm_dbg_atomic(state->dev, "Failed to update color state\n");
+ return ret;
+ }
+ }
+ }
+
+ return 0;
+}
+
/**
* amdgpu_dm_atomic_commit_tail() - AMDgpu DM's commit tail implementation.
* @state: The atomic state to commit
@@ -10788,6 +10899,8 @@ static void get_freesync_config_for_crtc(
} else {
config.state = VRR_STATE_INACTIVE;
}
+ } else {
+ config.state = VRR_STATE_UNSUPPORTED;
}
out:
new_crtc_state->freesync_config = config;
@@ -11105,7 +11218,7 @@ skip_modeset:
if (dm_new_crtc_state->base.color_mgmt_changed ||
dm_old_crtc_state->regamma_tf != dm_new_crtc_state->regamma_tf ||
drm_atomic_crtc_needs_modeset(new_crtc_state)) {
- ret = amdgpu_dm_update_crtc_color_mgmt(dm_new_crtc_state);
+ ret = amdgpu_dm_check_crtc_color_mgmt(dm_new_crtc_state, true);
if (ret)
goto fail;
}
@@ -12689,7 +12802,7 @@ void amdgpu_dm_update_freesync_caps(struct drm_connector *connector,
dm_con_state = to_dm_connector_state(connector->state);
- if (!adev->dm.freesync_module)
+ if (!adev->dm.freesync_module || !dc_supports_vrr(sink->ctx->dce_version))
goto update;
edid = drm_edid_raw(drm_edid); // FIXME: Get rid of drm_edid_raw()
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
index 159f8ded0439..009f206226f0 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
@@ -631,6 +631,13 @@ struct amdgpu_display_manager {
u32 actual_brightness[AMDGPU_DM_MAX_NUM_EDP];
/**
+ * @restore_backlight:
+ *
+ * Flag to indicate whether to restore backlight after modeset.
+ */
+ bool restore_backlight;
+
+ /**
* @aux_hpd_discon_quirk:
*
* quirk for hpd discon while aux is on-going.
@@ -799,6 +806,7 @@ struct amdgpu_dm_connector {
bool fake_enable;
bool force_yuv420_output;
+ bool force_yuv422_output;
struct dsc_preferred_settings dsc_settings;
union dp_downstream_port_present mst_downstream_port_present;
/* Cached display modes */
@@ -1046,6 +1054,8 @@ void amdgpu_dm_init_color_mod(void);
int amdgpu_dm_create_color_properties(struct amdgpu_device *adev);
int amdgpu_dm_verify_lut_sizes(const struct drm_crtc_state *crtc_state);
int amdgpu_dm_update_crtc_color_mgmt(struct dm_crtc_state *crtc);
+int amdgpu_dm_check_crtc_color_mgmt(struct dm_crtc_state *crtc,
+ bool check_only);
int amdgpu_dm_update_plane_color_mgmt(struct dm_crtc_state *crtc,
struct drm_plane_state *plane_state,
struct dc_plane_state *dc_plane_state);
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
index c7387af725d6..a4ac6d442278 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
@@ -566,12 +566,11 @@ static int __set_output_tf(struct dc_transfer_func *func,
return res ? 0 : -ENOMEM;
}
-static int amdgpu_dm_set_atomic_regamma(struct dc_stream_state *stream,
+static int amdgpu_dm_set_atomic_regamma(struct dc_transfer_func *out_tf,
const struct drm_color_lut *regamma_lut,
uint32_t regamma_size, bool has_rom,
enum dc_transfer_func_predefined tf)
{
- struct dc_transfer_func *out_tf = &stream->out_transfer_func;
int ret = 0;
if (regamma_size || tf != TRANSFER_FUNCTION_LINEAR) {
@@ -821,7 +820,7 @@ int amdgpu_dm_verify_lut3d_size(struct amdgpu_device *adev,
struct dm_plane_state *dm_plane_state = to_dm_plane_state(plane_state);
const struct drm_color_lut *shaper = NULL, *lut3d = NULL;
uint32_t exp_size, size, dim_size = MAX_COLOR_3DLUT_SIZE;
- bool has_3dlut = adev->dm.dc->caps.color.dpp.hw_3d_lut;
+ bool has_3dlut = adev->dm.dc->caps.color.dpp.hw_3d_lut || adev->dm.dc->caps.color.mpc.preblend;
/* shaper LUT is only available if 3D LUT color caps */
exp_size = has_3dlut ? MAX_COLOR_LUT_ENTRIES : 0;
@@ -885,33 +884,33 @@ int amdgpu_dm_verify_lut_sizes(const struct drm_crtc_state *crtc_state)
}
/**
- * amdgpu_dm_update_crtc_color_mgmt: Maps DRM color management to DC stream.
+ * amdgpu_dm_check_crtc_color_mgmt: Check if DRM color props are programmable by DC.
* @crtc: amdgpu_dm crtc state
+ * @check_only: only check color state without update dc stream
*
- * With no plane level color management properties we're free to use any
- * of the HW blocks as long as the CRTC CTM always comes before the
- * CRTC RGM and after the CRTC DGM.
- *
- * - The CRTC RGM block will be placed in the RGM LUT block if it is non-linear.
- * - The CRTC DGM block will be placed in the DGM LUT block if it is non-linear.
- * - The CRTC CTM will be placed in the gamut remap block if it is non-linear.
+ * This function just verifies CRTC LUT sizes, if there is enough space for
+ * output transfer function and if its parameters can be calculated by AMD
+ * color module. It also adjusts some settings for programming CRTC degamma at
+ * plane stage, using plane DGM block.
*
* The RGM block is typically more fully featured and accurate across
* all ASICs - DCE can't support a custom non-linear CRTC DGM.
*
* For supporting both plane level color management and CRTC level color
- * management at once we have to either restrict the usage of CRTC properties
- * or blend adjustments together.
+ * management at once we have to either restrict the usage of some CRTC
+ * properties or blend adjustments together.
*
* Returns:
- * 0 on success. Error code if setup fails.
+ * 0 on success. Error code if validation fails.
*/
-int amdgpu_dm_update_crtc_color_mgmt(struct dm_crtc_state *crtc)
+
+int amdgpu_dm_check_crtc_color_mgmt(struct dm_crtc_state *crtc,
+ bool check_only)
{
struct dc_stream_state *stream = crtc->stream;
struct amdgpu_device *adev = drm_to_adev(crtc->base.state->dev);
bool has_rom = adev->asic_type <= CHIP_RAVEN;
- struct drm_color_ctm *ctm = NULL;
+ struct dc_transfer_func *out_tf;
const struct drm_color_lut *degamma_lut, *regamma_lut;
uint32_t degamma_size, regamma_size;
bool has_regamma, has_degamma;
@@ -940,6 +939,14 @@ int amdgpu_dm_update_crtc_color_mgmt(struct dm_crtc_state *crtc)
crtc->cm_has_degamma = false;
crtc->cm_is_degamma_srgb = false;
+ if (check_only) {
+ out_tf = kvzalloc(sizeof(*out_tf), GFP_KERNEL);
+ if (!out_tf)
+ return -ENOMEM;
+ } else {
+ out_tf = &stream->out_transfer_func;
+ }
+
/* Setup regamma and degamma. */
if (is_legacy) {
/*
@@ -954,8 +961,8 @@ int amdgpu_dm_update_crtc_color_mgmt(struct dm_crtc_state *crtc)
* inverse color ramp in legacy userspace.
*/
crtc->cm_is_degamma_srgb = true;
- stream->out_transfer_func.type = TF_TYPE_DISTRIBUTED_POINTS;
- stream->out_transfer_func.tf = TRANSFER_FUNCTION_SRGB;
+ out_tf->type = TF_TYPE_DISTRIBUTED_POINTS;
+ out_tf->tf = TRANSFER_FUNCTION_SRGB;
/*
* Note: although we pass has_rom as parameter here, we never
* actually use ROM because the color module only takes the ROM
@@ -963,16 +970,12 @@ int amdgpu_dm_update_crtc_color_mgmt(struct dm_crtc_state *crtc)
*
* See more in mod_color_calculate_regamma_params()
*/
- r = __set_legacy_tf(&stream->out_transfer_func, regamma_lut,
+ r = __set_legacy_tf(out_tf, regamma_lut,
regamma_size, has_rom);
- if (r)
- return r;
} else {
regamma_size = has_regamma ? regamma_size : 0;
- r = amdgpu_dm_set_atomic_regamma(stream, regamma_lut,
+ r = amdgpu_dm_set_atomic_regamma(out_tf, regamma_lut,
regamma_size, has_rom, tf);
- if (r)
- return r;
}
/*
@@ -981,6 +984,43 @@ int amdgpu_dm_update_crtc_color_mgmt(struct dm_crtc_state *crtc)
* have to place the CTM in the OCSC in that case.
*/
crtc->cm_has_degamma = has_degamma;
+ if (check_only)
+ kvfree(out_tf);
+
+ return r;
+}
+
+/**
+ * amdgpu_dm_update_crtc_color_mgmt: Maps DRM color management to DC stream.
+ * @crtc: amdgpu_dm crtc state
+ *
+ * With no plane level color management properties we're free to use any
+ * of the HW blocks as long as the CRTC CTM always comes before the
+ * CRTC RGM and after the CRTC DGM.
+ *
+ * - The CRTC RGM block will be placed in the RGM LUT block if it is non-linear.
+ * - The CRTC DGM block will be placed in the DGM LUT block if it is non-linear.
+ * - The CRTC CTM will be placed in the gamut remap block if it is non-linear.
+ *
+ * The RGM block is typically more fully featured and accurate across
+ * all ASICs - DCE can't support a custom non-linear CRTC DGM.
+ *
+ * For supporting both plane level color management and CRTC level color
+ * management at once we have to either restrict the usage of CRTC properties
+ * or blend adjustments together.
+ *
+ * Returns:
+ * 0 on success. Error code if setup fails.
+ */
+int amdgpu_dm_update_crtc_color_mgmt(struct dm_crtc_state *crtc)
+{
+ struct dc_stream_state *stream = crtc->stream;
+ struct drm_color_ctm *ctm = NULL;
+ int ret;
+
+ ret = amdgpu_dm_check_crtc_color_mgmt(crtc, false);
+ if (ret)
+ return ret;
/* Setup CRTC CTM. */
if (crtc->base.ctm) {
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c
index 45feb404b097..1ec9d03ad747 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c
@@ -218,8 +218,10 @@ static void amdgpu_dm_idle_worker(struct work_struct *work)
break;
}
- if (idle_work->enable)
+ if (idle_work->enable) {
+ dc_post_update_surfaces_to_stream(idle_work->dm->dc);
dc_allow_idle_optimizations(idle_work->dm->dc, true);
+ }
mutex_unlock(&idle_work->dm->dc_lock);
}
idle_work->dm->idle_workqueue->running = false;
@@ -273,8 +275,10 @@ static void amdgpu_dm_crtc_vblank_control_worker(struct work_struct *work)
vblank_work->acrtc->dm_irq_params.allow_sr_entry);
}
- if (dm->active_vblank_irq_count == 0)
+ if (dm->active_vblank_irq_count == 0) {
+ dc_post_update_surfaces_to_stream(dm->dc);
dc_allow_idle_optimizations(dm->dc, true);
+ }
mutex_unlock(&dm->dc_lock);
@@ -317,13 +321,17 @@ static inline int amdgpu_dm_crtc_set_vblank(struct drm_crtc *crtc, bool enable)
dc->config.disable_ips != DMUB_IPS_DISABLE_ALL &&
sr_supported && vblank->config.disable_immediate)
drm_crtc_vblank_restore(crtc);
+ }
- /* vblank irq on -> Only need vupdate irq in vrr mode */
- if (amdgpu_dm_crtc_vrr_active(acrtc_state))
- rc = amdgpu_dm_crtc_set_vupdate_irq(crtc, true);
- } else {
- /* vblank irq off -> vupdate irq off */
- rc = amdgpu_dm_crtc_set_vupdate_irq(crtc, false);
+ if (dc_supports_vrr(dm->dc->ctx->dce_version)) {
+ if (enable) {
+ /* vblank irq on -> Only need vupdate irq in vrr mode */
+ if (amdgpu_dm_crtc_vrr_active(acrtc_state))
+ rc = amdgpu_dm_crtc_set_vupdate_irq(crtc, true);
+ } else {
+ /* vblank irq off -> vupdate irq off */
+ rc = amdgpu_dm_crtc_set_vupdate_irq(crtc, false);
+ }
}
if (rc)
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c
index 58e084f52526..19038f336155 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c
@@ -768,14 +768,18 @@ struct hdcp_workqueue *hdcp_create_workqueue(struct amdgpu_device *adev,
struct mod_hdcp_ddc_funcs *ddc_funcs = &config->ddc.funcs;
config->psp.handle = &adev->psp;
- if (dc->ctx->dce_version == DCN_VERSION_3_1 ||
+ if (dc->ctx->dce_version == DCN_VERSION_3_1 ||
dc->ctx->dce_version == DCN_VERSION_3_14 ||
dc->ctx->dce_version == DCN_VERSION_3_15 ||
- dc->ctx->dce_version == DCN_VERSION_3_5 ||
+ dc->ctx->dce_version == DCN_VERSION_3_16 ||
+ dc->ctx->dce_version == DCN_VERSION_3_2 ||
+ dc->ctx->dce_version == DCN_VERSION_3_21 ||
+ dc->ctx->dce_version == DCN_VERSION_3_5 ||
dc->ctx->dce_version == DCN_VERSION_3_51 ||
- dc->ctx->dce_version == DCN_VERSION_3_6 ||
- dc->ctx->dce_version == DCN_VERSION_3_16)
+ dc->ctx->dce_version == DCN_VERSION_3_6 ||
+ dc->ctx->dce_version == DCN_VERSION_4_01)
config->psp.caps.dtm_v3_supported = 1;
+
config->ddc.handle = dc_get_link_at_index(dc, i);
ddc_funcs->write_i2c = lp_write_i2c;
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c
index eef51652ca35..e027798ece03 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c
@@ -146,7 +146,7 @@ static void amdgpu_dm_plane_add_modifier(uint64_t **mods, uint64_t *size, uint64
if (*cap - *size < 1) {
uint64_t new_cap = *cap * 2;
- uint64_t *new_mods = kmalloc(new_cap * sizeof(uint64_t), GFP_KERNEL);
+ uint64_t *new_mods = kmalloc_array(new_cap, sizeof(uint64_t), GFP_KERNEL);
if (!new_mods) {
kfree(*mods);
@@ -732,7 +732,7 @@ static int amdgpu_dm_plane_get_plane_modifiers(struct amdgpu_device *adev, unsig
if (adev->family < AMDGPU_FAMILY_AI)
return 0;
- *mods = kmalloc(capacity * sizeof(uint64_t), GFP_KERNEL);
+ *mods = kmalloc_array(capacity, sizeof(uint64_t), GFP_KERNEL);
if (plane_type == DRM_PLANE_TYPE_CURSOR) {
amdgpu_dm_plane_add_modifier(mods, &size, &capacity, DRM_FORMAT_MOD_LINEAR);
@@ -1633,7 +1633,7 @@ dm_atomic_plane_attach_color_mgmt_properties(struct amdgpu_display_manager *dm,
drm_object_attach_property(&plane->base,
dm->adev->mode_info.plane_ctm_property, 0);
- if (dpp_color_caps.hw_3d_lut) {
+ if (dpp_color_caps.hw_3d_lut || dm->dc->caps.color.mpc.preblend) {
drm_object_attach_property(&plane->base,
mode_info.plane_shaper_lut_property, 0);
drm_object_attach_property(&plane->base,
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_pp_smu.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_pp_smu.c
index e5771f490f2e..11b2ea6edf95 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_pp_smu.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_pp_smu.c
@@ -98,6 +98,7 @@ bool dm_pp_apply_display_requirements(
const struct dm_pp_single_disp_config *dc_cfg =
&pp_display_cfg->disp_configs[i];
adev->pm.pm_display_cfg.displays[i].controller_id = dc_cfg->pipe_idx + 1;
+ adev->pm.pm_display_cfg.displays[i].pixel_clock = dc_cfg->pixel_clock;
}
amdgpu_dpm_display_configuration_change(adev, &adev->pm.pm_display_cfg);
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_replay.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_replay.c
index 82ea3fe5e764..80704d709e44 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_replay.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_replay.c
@@ -31,7 +31,7 @@
#include "amdgpu_dm.h"
#include "modules/power/power_helpers.h"
#include "dmub/inc/dmub_cmd.h"
-#include "dc/inc/link.h"
+#include "dc/inc/link_service.h"
/*
* amdgpu_dm_link_supports_replay() - check if the link supports replay
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_services.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_services.c
index 132de4071efd..8550d5e8b753 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_services.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_services.c
@@ -53,11 +53,11 @@ void dm_perf_trace_timestamp(const char *func_name, unsigned int line, struct dc
func_name, line);
}
-void dm_trace_smu_msg(uint32_t msg_id, uint32_t param_in, struct dc_context *ctx)
+void dm_trace_smu_enter(uint32_t msg_id, uint32_t param_in, unsigned int delay, struct dc_context *ctx)
{
}
-void dm_trace_smu_delay(uint32_t delay, struct dc_context *ctx)
+void dm_trace_smu_exit(bool success, uint32_t response, struct dc_context *ctx)
{
}
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/clk_mgr.c
index 4071851f9e86..15cf13ec5302 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/clk_mgr.c
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/clk_mgr.c
@@ -28,7 +28,7 @@
#include "dccg.h"
#include "clk_mgr_internal.h"
#include "dc_state_priv.h"
-#include "link.h"
+#include "link_service.h"
#include "dce100/dce_clk_mgr.h"
#include "dce110/dce110_clk_mgr.h"
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dce100/dce_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dce100/dce_clk_mgr.c
index dbd6ef1b60a0..6131ede2db7a 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dce100/dce_clk_mgr.c
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dce100/dce_clk_mgr.c
@@ -463,6 +463,9 @@ void dce_clk_mgr_construct(
clk_mgr->max_clks_state = DM_PP_CLOCKS_STATE_NOMINAL;
clk_mgr->cur_min_clks_state = DM_PP_CLOCKS_STATE_INVALID;
+ base->clks.max_supported_dispclk_khz =
+ clk_mgr->max_clks_by_state[DM_PP_CLOCKS_STATE_PERFORMANCE].display_clk_khz;
+
dce_clock_read_integrated_info(clk_mgr);
dce_clock_read_ss_info(clk_mgr);
}
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dce110/dce110_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dce110/dce110_clk_mgr.c
index 13cf415e38e5..d50b9440210e 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dce110/dce110_clk_mgr.c
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dce110/dce110_clk_mgr.c
@@ -164,7 +164,7 @@ void dce110_fill_display_configs(
stream->link->cur_link_settings.link_rate;
cfg->link_settings.link_spread =
stream->link->cur_link_settings.link_spread;
- cfg->sym_clock = stream->phy_pix_clk;
+ cfg->pixel_clock = stream->phy_pix_clk;
/* Round v_refresh*/
cfg->v_refresh = stream->timing.pix_clk_100hz * 100;
cfg->v_refresh /= stream->timing.h_total;
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dce60/dce60_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dce60/dce60_clk_mgr.c
index a39641a0ff09..69dd80d9f738 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dce60/dce60_clk_mgr.c
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dce60/dce60_clk_mgr.c
@@ -147,6 +147,8 @@ void dce60_clk_mgr_construct(
struct dc_context *ctx,
struct clk_mgr_internal *clk_mgr)
{
+ struct clk_mgr *base = &clk_mgr->base;
+
dce_clk_mgr_construct(ctx, clk_mgr);
memcpy(clk_mgr->max_clks_by_state,
@@ -157,5 +159,8 @@ void dce60_clk_mgr_construct(
clk_mgr->clk_mgr_shift = &disp_clk_shift;
clk_mgr->clk_mgr_mask = &disp_clk_mask;
clk_mgr->base.funcs = &dce60_funcs;
+
+ base->clks.max_supported_dispclk_khz =
+ clk_mgr->max_clks_by_state[DM_PP_CLOCKS_STATE_PERFORMANCE].display_clk_khz;
}
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30_clk_mgr_smu_msg.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30_clk_mgr_smu_msg.c
index 3253115a153d..827bc2431d5d 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30_clk_mgr_smu_msg.c
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30_clk_mgr_smu_msg.c
@@ -69,7 +69,7 @@ static uint32_t dcn30_smu_wait_for_response(struct clk_mgr_internal *clk_mgr, un
/* handle DALSMC_Result_CmdRejectedBusy? */
- TRACE_SMU_DELAY(delay_us * (initial_max_retries - max_retries), clk_mgr->base.ctx);
+ TRACE_SMU_MSG_DELAY(0, 0, delay_us * (initial_max_retries - max_retries), clk_mgr->base.ctx);
return reg;
}
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_clk_mgr.c
index bc123f1884da..051052bd10c9 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_clk_mgr.c
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_clk_mgr.c
@@ -47,7 +47,7 @@
#include "dcn30/dcn30_clk_mgr.h"
#include "dc_dmub_srv.h"
-#include "link.h"
+#include "link_service.h"
#include "logger_types.h"
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_clk_mgr.c
index 91d872d6d392..9e63fa72101c 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_clk_mgr.c
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_clk_mgr.c
@@ -48,7 +48,7 @@
#include "dcn31/dcn31_clk_mgr.h"
#include "dc_dmub_srv.h"
-#include "link.h"
+#include "link_service.h"
#include "dcn314_smu.h"
@@ -77,6 +77,7 @@ static const struct IP_BASE CLK_BASE = { { { { 0x00016C00, 0x02401800, 0, 0, 0,
#undef DC_LOGGER
#define DC_LOGGER \
clk_mgr->base.base.ctx->logger
+
#define regCLK1_CLK_PLL_REQ 0x0237
#define regCLK1_CLK_PLL_REQ_BASE_IDX 0
@@ -87,8 +88,70 @@ static const struct IP_BASE CLK_BASE = { { { { 0x00016C00, 0x02401800, 0, 0, 0,
#define CLK1_CLK_PLL_REQ__PllSpineDiv_MASK 0x0000F000L
#define CLK1_CLK_PLL_REQ__FbMult_frac_MASK 0xFFFF0000L
+#define regCLK1_CLK0_DFS_CNTL 0x0269
+#define regCLK1_CLK0_DFS_CNTL_BASE_IDX 0
+#define regCLK1_CLK1_DFS_CNTL 0x026c
+#define regCLK1_CLK1_DFS_CNTL_BASE_IDX 0
+#define regCLK1_CLK2_DFS_CNTL 0x026f
+#define regCLK1_CLK2_DFS_CNTL_BASE_IDX 0
+#define regCLK1_CLK3_DFS_CNTL 0x0272
+#define regCLK1_CLK3_DFS_CNTL_BASE_IDX 0
+#define regCLK1_CLK4_DFS_CNTL 0x0275
+#define regCLK1_CLK4_DFS_CNTL_BASE_IDX 0
+#define regCLK1_CLK5_DFS_CNTL 0x0278
+#define regCLK1_CLK5_DFS_CNTL_BASE_IDX 0
+
+#define regCLK1_CLK0_CURRENT_CNT 0x02fb
+#define regCLK1_CLK0_CURRENT_CNT_BASE_IDX 0
+#define regCLK1_CLK1_CURRENT_CNT 0x02fc
+#define regCLK1_CLK1_CURRENT_CNT_BASE_IDX 0
+#define regCLK1_CLK2_CURRENT_CNT 0x02fd
+#define regCLK1_CLK2_CURRENT_CNT_BASE_IDX 0
+#define regCLK1_CLK3_CURRENT_CNT 0x02fe
+#define regCLK1_CLK3_CURRENT_CNT_BASE_IDX 0
+#define regCLK1_CLK4_CURRENT_CNT 0x02ff
+#define regCLK1_CLK4_CURRENT_CNT_BASE_IDX 0
+#define regCLK1_CLK5_CURRENT_CNT 0x0300
+#define regCLK1_CLK5_CURRENT_CNT_BASE_IDX 0
+
+#define regCLK1_CLK0_BYPASS_CNTL 0x028a
+#define regCLK1_CLK0_BYPASS_CNTL_BASE_IDX 0
+#define regCLK1_CLK1_BYPASS_CNTL 0x0293
+#define regCLK1_CLK1_BYPASS_CNTL_BASE_IDX 0
#define regCLK1_CLK2_BYPASS_CNTL 0x029c
#define regCLK1_CLK2_BYPASS_CNTL_BASE_IDX 0
+#define regCLK1_CLK3_BYPASS_CNTL 0x02a5
+#define regCLK1_CLK3_BYPASS_CNTL_BASE_IDX 0
+#define regCLK1_CLK4_BYPASS_CNTL 0x02ae
+#define regCLK1_CLK4_BYPASS_CNTL_BASE_IDX 0
+#define regCLK1_CLK5_BYPASS_CNTL 0x02b7
+#define regCLK1_CLK5_BYPASS_CNTL_BASE_IDX 0
+
+#define regCLK1_CLK0_DS_CNTL 0x0283
+#define regCLK1_CLK0_DS_CNTL_BASE_IDX 0
+#define regCLK1_CLK1_DS_CNTL 0x028c
+#define regCLK1_CLK1_DS_CNTL_BASE_IDX 0
+#define regCLK1_CLK2_DS_CNTL 0x0295
+#define regCLK1_CLK2_DS_CNTL_BASE_IDX 0
+#define regCLK1_CLK3_DS_CNTL 0x029e
+#define regCLK1_CLK3_DS_CNTL_BASE_IDX 0
+#define regCLK1_CLK4_DS_CNTL 0x02a7
+#define regCLK1_CLK4_DS_CNTL_BASE_IDX 0
+#define regCLK1_CLK5_DS_CNTL 0x02b0
+#define regCLK1_CLK5_DS_CNTL_BASE_IDX 0
+
+#define regCLK1_CLK0_ALLOW_DS 0x0284
+#define regCLK1_CLK0_ALLOW_DS_BASE_IDX 0
+#define regCLK1_CLK1_ALLOW_DS 0x028d
+#define regCLK1_CLK1_ALLOW_DS_BASE_IDX 0
+#define regCLK1_CLK2_ALLOW_DS 0x0296
+#define regCLK1_CLK2_ALLOW_DS_BASE_IDX 0
+#define regCLK1_CLK3_ALLOW_DS 0x029f
+#define regCLK1_CLK3_ALLOW_DS_BASE_IDX 0
+#define regCLK1_CLK4_ALLOW_DS 0x02a8
+#define regCLK1_CLK4_ALLOW_DS_BASE_IDX 0
+#define regCLK1_CLK5_ALLOW_DS 0x02b1
+#define regCLK1_CLK5_ALLOW_DS_BASE_IDX 0
#define CLK1_CLK2_BYPASS_CNTL__CLK2_BYPASS_SEL__SHIFT 0x0
#define CLK1_CLK2_BYPASS_CNTL__CLK2_BYPASS_DIV__SHIFT 0x10
@@ -185,6 +248,8 @@ void dcn314_init_clocks(struct clk_mgr *clk_mgr)
{
struct clk_mgr_internal *clk_mgr_int = TO_CLK_MGR_INTERNAL(clk_mgr);
uint32_t ref_dtbclk = clk_mgr->clks.ref_dtbclk_khz;
+ struct clk_mgr_dcn314 *clk_mgr_dcn314 = TO_CLK_MGR_DCN314(clk_mgr_int);
+ struct clk_log_info log_info = {0};
memset(&(clk_mgr->clks), 0, sizeof(struct dc_clocks));
// Assumption is that boot state always supports pstate
@@ -200,6 +265,9 @@ void dcn314_init_clocks(struct clk_mgr *clk_mgr)
dce_adjust_dp_ref_freq_for_ss(clk_mgr_int, clk_mgr->dprefclk_khz);
else
clk_mgr->dp_dto_source_clock_in_khz = clk_mgr->dprefclk_khz;
+
+ dcn314_dump_clk_registers(&clk_mgr->boot_snapshot, &clk_mgr_dcn314->base.base, &log_info);
+ clk_mgr->clks.dispclk_khz = clk_mgr->boot_snapshot.dispclk * 1000;
}
void dcn314_update_clocks(struct clk_mgr *clk_mgr_base,
@@ -218,6 +286,8 @@ void dcn314_update_clocks(struct clk_mgr *clk_mgr_base,
if (dc->work_arounds.skip_clock_update)
return;
+ display_count = dcn314_get_active_display_cnt_wa(dc, context);
+
/*
* if it is safe to lower, but we are already in the lower state, we don't have to do anything
* also if safe to lower is false, we just go in the higher state
@@ -236,7 +306,6 @@ void dcn314_update_clocks(struct clk_mgr *clk_mgr_base,
}
/* check that we're not already in lower */
if (clk_mgr_base->clks.pwr_state != DCN_PWR_STATE_LOW_POWER) {
- display_count = dcn314_get_active_display_cnt_wa(dc, context);
/* if we can go lower, go lower */
if (display_count == 0) {
union display_idle_optimization_u idle_info = { 0 };
@@ -293,11 +362,19 @@ void dcn314_update_clocks(struct clk_mgr *clk_mgr_base,
update_dppclk = true;
}
- if (should_set_clock(safe_to_lower, new_clocks->dispclk_khz, clk_mgr_base->clks.dispclk_khz)) {
+ if (should_set_clock(safe_to_lower, new_clocks->dispclk_khz, clk_mgr_base->clks.dispclk_khz) &&
+ (new_clocks->dispclk_khz > 0 || (safe_to_lower && display_count == 0))) {
+ int requested_dispclk_khz = new_clocks->dispclk_khz;
+
dcn314_disable_otg_wa(clk_mgr_base, context, safe_to_lower, true);
+ /* Clamp the requested clock to PMFW based on their limit. */
+ if (dc->debug.min_disp_clk_khz > 0 && requested_dispclk_khz < dc->debug.min_disp_clk_khz)
+ requested_dispclk_khz = dc->debug.min_disp_clk_khz;
+
+ dcn314_smu_set_dispclk(clk_mgr, requested_dispclk_khz);
clk_mgr_base->clks.dispclk_khz = new_clocks->dispclk_khz;
- dcn314_smu_set_dispclk(clk_mgr, clk_mgr_base->clks.dispclk_khz);
+
dcn314_disable_otg_wa(clk_mgr_base, context, safe_to_lower, false);
update_dispclk = true;
@@ -385,10 +462,65 @@ bool dcn314_are_clock_states_equal(struct dc_clocks *a,
return true;
}
-static void dcn314_dump_clk_registers(struct clk_state_registers_and_bypass *regs_and_bypass,
+
+static void dcn314_dump_clk_registers_internal(struct dcn35_clk_internal *internal, struct clk_mgr *clk_mgr_base)
+{
+ struct clk_mgr_internal *clk_mgr = TO_CLK_MGR_INTERNAL(clk_mgr_base);
+
+ // read dtbclk
+ internal->CLK1_CLK4_CURRENT_CNT = REG_READ(CLK1_CLK4_CURRENT_CNT);
+ internal->CLK1_CLK4_BYPASS_CNTL = REG_READ(CLK1_CLK4_BYPASS_CNTL);
+
+ // read dcfclk
+ internal->CLK1_CLK3_CURRENT_CNT = REG_READ(CLK1_CLK3_CURRENT_CNT);
+ internal->CLK1_CLK3_BYPASS_CNTL = REG_READ(CLK1_CLK3_BYPASS_CNTL);
+
+ // read dcf deep sleep divider
+ internal->CLK1_CLK3_DS_CNTL = REG_READ(CLK1_CLK3_DS_CNTL);
+ internal->CLK1_CLK3_ALLOW_DS = REG_READ(CLK1_CLK3_ALLOW_DS);
+
+ // read dppclk
+ internal->CLK1_CLK1_CURRENT_CNT = REG_READ(CLK1_CLK1_CURRENT_CNT);
+ internal->CLK1_CLK1_BYPASS_CNTL = REG_READ(CLK1_CLK1_BYPASS_CNTL);
+
+ // read dprefclk
+ internal->CLK1_CLK2_CURRENT_CNT = REG_READ(CLK1_CLK2_CURRENT_CNT);
+ internal->CLK1_CLK2_BYPASS_CNTL = REG_READ(CLK1_CLK2_BYPASS_CNTL);
+
+ // read dispclk
+ internal->CLK1_CLK0_CURRENT_CNT = REG_READ(CLK1_CLK0_CURRENT_CNT);
+ internal->CLK1_CLK0_BYPASS_CNTL = REG_READ(CLK1_CLK0_BYPASS_CNTL);
+}
+
+void dcn314_dump_clk_registers(struct clk_state_registers_and_bypass *regs_and_bypass,
struct clk_mgr *clk_mgr_base, struct clk_log_info *log_info)
{
- return;
+
+ struct dcn35_clk_internal internal = {0};
+
+ dcn314_dump_clk_registers_internal(&internal, clk_mgr_base);
+
+ regs_and_bypass->dcfclk = internal.CLK1_CLK3_CURRENT_CNT / 10;
+ regs_and_bypass->dcf_deep_sleep_divider = internal.CLK1_CLK3_DS_CNTL / 10;
+ regs_and_bypass->dcf_deep_sleep_allow = internal.CLK1_CLK3_ALLOW_DS;
+ regs_and_bypass->dprefclk = internal.CLK1_CLK2_CURRENT_CNT / 10;
+ regs_and_bypass->dispclk = internal.CLK1_CLK0_CURRENT_CNT / 10;
+ regs_and_bypass->dppclk = internal.CLK1_CLK1_CURRENT_CNT / 10;
+ regs_and_bypass->dtbclk = internal.CLK1_CLK4_CURRENT_CNT / 10;
+
+ regs_and_bypass->dppclk_bypass = internal.CLK1_CLK1_BYPASS_CNTL & 0x0007;
+ if (regs_and_bypass->dppclk_bypass < 0 || regs_and_bypass->dppclk_bypass > 4)
+ regs_and_bypass->dppclk_bypass = 0;
+ regs_and_bypass->dcfclk_bypass = internal.CLK1_CLK3_BYPASS_CNTL & 0x0007;
+ if (regs_and_bypass->dcfclk_bypass < 0 || regs_and_bypass->dcfclk_bypass > 4)
+ regs_and_bypass->dcfclk_bypass = 0;
+ regs_and_bypass->dispclk_bypass = internal.CLK1_CLK0_BYPASS_CNTL & 0x0007;
+ if (regs_and_bypass->dispclk_bypass < 0 || regs_and_bypass->dispclk_bypass > 4)
+ regs_and_bypass->dispclk_bypass = 0;
+ regs_and_bypass->dprefclk_bypass = internal.CLK1_CLK2_BYPASS_CNTL & 0x0007;
+ if (regs_and_bypass->dprefclk_bypass < 0 || regs_and_bypass->dprefclk_bypass > 4)
+ regs_and_bypass->dprefclk_bypass = 0;
+
}
static struct clk_bw_params dcn314_bw_params = {
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_clk_mgr.h b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_clk_mgr.h
index 002c28e80720..0577eb527bc3 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_clk_mgr.h
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_clk_mgr.h
@@ -65,4 +65,9 @@ void dcn314_clk_mgr_construct(struct dc_context *ctx,
void dcn314_clk_mgr_destroy(struct clk_mgr_internal *clk_mgr_int);
+
+void dcn314_dump_clk_registers(struct clk_state_registers_and_bypass *regs_and_bypass,
+ struct clk_mgr *clk_mgr_base, struct clk_log_info *log_info);
+
+
#endif //__DCN314_CLK_MGR_H__
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn315/dcn315_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn315/dcn315_clk_mgr.c
index e4d22f74f986..b315ed91e010 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn315/dcn315_clk_mgr.c
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn315/dcn315_clk_mgr.c
@@ -46,7 +46,7 @@
#define DC_LOGGER \
clk_mgr->base.base.ctx->logger
-#include "link.h"
+#include "link_service.h"
#define TO_CLK_MGR_DCN315(clk_mgr)\
container_of(clk_mgr, struct clk_mgr_dcn315, base)
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn316/dcn316_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn316/dcn316_clk_mgr.c
index 49efea0c8fcf..1769b1f26e75 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn316/dcn316_clk_mgr.c
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn316/dcn316_clk_mgr.c
@@ -39,7 +39,7 @@
#include "dcn316_smu.h"
#include "dm_helpers.h"
#include "dc_dmub_srv.h"
-#include "link.h"
+#include "link_service.h"
// DCN316 this is CLK1 instance
#define MAX_INSTANCE 7
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/dcn32_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/dcn32_clk_mgr.c
index 8376e2b0e73d..7da7b41bd092 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/dcn32_clk_mgr.c
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/dcn32_clk_mgr.c
@@ -33,7 +33,7 @@
#include "reg_helper.h"
#include "core_types.h"
#include "dm_helpers.h"
-#include "link.h"
+#include "link_service.h"
#include "dc_state_priv.h"
#include "atomfirmware.h"
#include "dcn32_smu13_driver_if.h"
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/dcn32_clk_mgr_smu_msg.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/dcn32_clk_mgr_smu_msg.c
index cf2d35363e8b..5d80fdf63ffc 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/dcn32_clk_mgr_smu_msg.c
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/dcn32_clk_mgr_smu_msg.c
@@ -63,7 +63,8 @@ static uint32_t dcn32_smu_wait_for_response(struct clk_mgr_internal *clk_mgr, un
udelay(delay_us);
} while (max_retries--);
- TRACE_SMU_DELAY(delay_us * (initial_max_retries - max_retries), clk_mgr->base.ctx);
+ TRACE_SMU_MSG_DELAY(0, 0, delay_us * (initial_max_retries - max_retries), clk_mgr->base.ctx);
+
return reg;
}
@@ -120,7 +121,7 @@ static uint32_t dcn32_smu_wait_for_response_delay(struct clk_mgr_internal *clk_m
*total_delay_us += delay_us;
} while (max_retries--);
- TRACE_SMU_DELAY(*total_delay_us, clk_mgr->base.ctx);
+ TRACE_SMU_MSG_DELAY(0, 0, *total_delay_us, clk_mgr->base.ctx);
return reg;
}
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c
index bb1ac12a2b09..b11383fba35f 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c
@@ -44,7 +44,7 @@
#include "dcn31/dcn31_clk_mgr.h"
#include "dc_dmub_srv.h"
-#include "link.h"
+#include "link_service.h"
#include "logger_types.h"
#undef DC_LOGGER
@@ -587,9 +587,118 @@ bool dcn35_are_clock_states_equal(struct dc_clocks *a,
return true;
}
-static void dcn35_dump_clk_registers(struct clk_state_registers_and_bypass *regs_and_bypass,
+static void dcn35_save_clk_registers_internal(struct dcn35_clk_internal *internal, struct clk_mgr *clk_mgr_base)
+{
+ struct clk_mgr_internal *clk_mgr = TO_CLK_MGR_INTERNAL(clk_mgr_base);
+
+ // read dtbclk
+ internal->CLK1_CLK4_CURRENT_CNT = REG_READ(CLK1_CLK4_CURRENT_CNT);
+ internal->CLK1_CLK4_BYPASS_CNTL = REG_READ(CLK1_CLK4_BYPASS_CNTL);
+
+ // read dcfclk
+ internal->CLK1_CLK3_CURRENT_CNT = REG_READ(CLK1_CLK3_CURRENT_CNT);
+ internal->CLK1_CLK3_BYPASS_CNTL = REG_READ(CLK1_CLK3_BYPASS_CNTL);
+
+ // read dcf deep sleep divider
+ internal->CLK1_CLK3_DS_CNTL = REG_READ(CLK1_CLK3_DS_CNTL);
+ internal->CLK1_CLK3_ALLOW_DS = REG_READ(CLK1_CLK3_ALLOW_DS);
+
+ // read dppclk
+ internal->CLK1_CLK1_CURRENT_CNT = REG_READ(CLK1_CLK1_CURRENT_CNT);
+ internal->CLK1_CLK1_BYPASS_CNTL = REG_READ(CLK1_CLK1_BYPASS_CNTL);
+
+ // read dprefclk
+ internal->CLK1_CLK2_CURRENT_CNT = REG_READ(CLK1_CLK2_CURRENT_CNT);
+ internal->CLK1_CLK2_BYPASS_CNTL = REG_READ(CLK1_CLK2_BYPASS_CNTL);
+
+ // read dispclk
+ internal->CLK1_CLK0_CURRENT_CNT = REG_READ(CLK1_CLK0_CURRENT_CNT);
+ internal->CLK1_CLK0_BYPASS_CNTL = REG_READ(CLK1_CLK0_BYPASS_CNTL);
+}
+
+static void dcn35_save_clk_registers(struct clk_state_registers_and_bypass *regs_and_bypass,
struct clk_mgr_dcn35 *clk_mgr)
{
+ struct dcn35_clk_internal internal = {0};
+ char *bypass_clks[5] = {"0x0 DFS", "0x1 REFCLK", "0x2 ERROR", "0x3 400 FCH", "0x4 600 FCH"};
+
+ dcn35_save_clk_registers_internal(&internal, &clk_mgr->base.base);
+
+ regs_and_bypass->dcfclk = internal.CLK1_CLK3_CURRENT_CNT / 10;
+ regs_and_bypass->dcf_deep_sleep_divider = internal.CLK1_CLK3_DS_CNTL / 10;
+ regs_and_bypass->dcf_deep_sleep_allow = internal.CLK1_CLK3_ALLOW_DS;
+ regs_and_bypass->dprefclk = internal.CLK1_CLK2_CURRENT_CNT / 10;
+ regs_and_bypass->dispclk = internal.CLK1_CLK0_CURRENT_CNT / 10;
+ regs_and_bypass->dppclk = internal.CLK1_CLK1_CURRENT_CNT / 10;
+ regs_and_bypass->dtbclk = internal.CLK1_CLK4_CURRENT_CNT / 10;
+
+ regs_and_bypass->dppclk_bypass = internal.CLK1_CLK1_BYPASS_CNTL & 0x0007;
+ if (regs_and_bypass->dppclk_bypass < 0 || regs_and_bypass->dppclk_bypass > 4)
+ regs_and_bypass->dppclk_bypass = 0;
+ regs_and_bypass->dcfclk_bypass = internal.CLK1_CLK3_BYPASS_CNTL & 0x0007;
+ if (regs_and_bypass->dcfclk_bypass < 0 || regs_and_bypass->dcfclk_bypass > 4)
+ regs_and_bypass->dcfclk_bypass = 0;
+ regs_and_bypass->dispclk_bypass = internal.CLK1_CLK0_BYPASS_CNTL & 0x0007;
+ if (regs_and_bypass->dispclk_bypass < 0 || regs_and_bypass->dispclk_bypass > 4)
+ regs_and_bypass->dispclk_bypass = 0;
+ regs_and_bypass->dprefclk_bypass = internal.CLK1_CLK2_BYPASS_CNTL & 0x0007;
+ if (regs_and_bypass->dprefclk_bypass < 0 || regs_and_bypass->dprefclk_bypass > 4)
+ regs_and_bypass->dprefclk_bypass = 0;
+
+ if (clk_mgr->base.base.ctx->dc->debug.pstate_enabled) {
+ DC_LOG_SMU("clk_type,clk_value,deepsleep_cntl,deepsleep_allow,bypass\n");
+
+ DC_LOG_SMU("dcfclk,%d,%d,%d,%s\n",
+ regs_and_bypass->dcfclk,
+ regs_and_bypass->dcf_deep_sleep_divider,
+ regs_and_bypass->dcf_deep_sleep_allow,
+ bypass_clks[(int) regs_and_bypass->dcfclk_bypass]);
+
+ DC_LOG_SMU("dprefclk,%d,N/A,N/A,%s\n",
+ regs_and_bypass->dprefclk,
+ bypass_clks[(int) regs_and_bypass->dprefclk_bypass]);
+
+ DC_LOG_SMU("dispclk,%d,N/A,N/A,%s\n",
+ regs_and_bypass->dispclk,
+ bypass_clks[(int) regs_and_bypass->dispclk_bypass]);
+
+ // REGISTER VALUES
+ DC_LOG_SMU("reg_name,value,clk_type");
+
+ DC_LOG_SMU("CLK1_CLK3_CURRENT_CNT,%d,dcfclk",
+ internal.CLK1_CLK3_CURRENT_CNT);
+
+ DC_LOG_SMU("CLK1_CLK4_CURRENT_CNT,%d,dtbclk",
+ internal.CLK1_CLK4_CURRENT_CNT);
+
+ DC_LOG_SMU("CLK1_CLK3_DS_CNTL,%d,dcf_deep_sleep_divider",
+ internal.CLK1_CLK3_DS_CNTL);
+
+ DC_LOG_SMU("CLK1_CLK3_ALLOW_DS,%d,dcf_deep_sleep_allow",
+ internal.CLK1_CLK3_ALLOW_DS);
+
+ DC_LOG_SMU("CLK1_CLK2_CURRENT_CNT,%d,dprefclk",
+ internal.CLK1_CLK2_CURRENT_CNT);
+
+ DC_LOG_SMU("CLK1_CLK0_CURRENT_CNT,%d,dispclk",
+ internal.CLK1_CLK0_CURRENT_CNT);
+
+ DC_LOG_SMU("CLK1_CLK1_CURRENT_CNT,%d,dppclk",
+ internal.CLK1_CLK1_CURRENT_CNT);
+
+ DC_LOG_SMU("CLK1_CLK3_BYPASS_CNTL,%d,dcfclk_bypass",
+ internal.CLK1_CLK3_BYPASS_CNTL);
+
+ DC_LOG_SMU("CLK1_CLK2_BYPASS_CNTL,%d,dprefclk_bypass",
+ internal.CLK1_CLK2_BYPASS_CNTL);
+
+ DC_LOG_SMU("CLK1_CLK0_BYPASS_CNTL,%d,dispclk_bypass",
+ internal.CLK1_CLK0_BYPASS_CNTL);
+
+ DC_LOG_SMU("CLK1_CLK1_BYPASS_CNTL,%d,dppclk_bypass",
+ internal.CLK1_CLK1_BYPASS_CNTL);
+
+ }
}
static bool dcn35_is_spll_ssc_enabled(struct clk_mgr *clk_mgr_base)
@@ -623,6 +732,7 @@ static void init_clk_states(struct clk_mgr *clk_mgr)
void dcn35_init_clocks(struct clk_mgr *clk_mgr)
{
struct clk_mgr_internal *clk_mgr_int = TO_CLK_MGR_INTERNAL(clk_mgr);
+ struct clk_mgr_dcn35 *clk_mgr_dcn35 = TO_CLK_MGR_DCN35(clk_mgr_int);
init_clk_states(clk_mgr);
@@ -633,6 +743,13 @@ void dcn35_init_clocks(struct clk_mgr *clk_mgr)
else
clk_mgr->dp_dto_source_clock_in_khz = clk_mgr->dprefclk_khz;
+ dcn35_save_clk_registers(&clk_mgr->boot_snapshot, clk_mgr_dcn35);
+
+ clk_mgr->clks.ref_dtbclk_khz = clk_mgr->boot_snapshot.dtbclk * 10;
+ if (clk_mgr->boot_snapshot.dtbclk > 59000) {
+ /*dtbclk enabled based on */
+ clk_mgr->clks.dtbclk_en = true;
+ }
}
static struct clk_bw_params dcn35_bw_params = {
.vram_type = Ddr4MemType,
@@ -1323,7 +1440,7 @@ void dcn35_clk_mgr_construct(
dcn35_bw_params.wm_table = ddr5_wm_table;
}
/* Saved clocks configured at boot for debug purposes */
- dcn35_dump_clk_registers(&clk_mgr->base.base.boot_snapshot, clk_mgr);
+ dcn35_save_clk_registers(&clk_mgr->base.base.boot_snapshot, clk_mgr);
clk_mgr->base.base.dprefclk_khz = dcn35_smu_get_dprefclk(&clk_mgr->base);
clk_mgr->base.base.clks.ref_dtbclk_khz = 600000;
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_clk_mgr.c
index 47ff4c965d76..306016c1f109 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_clk_mgr.c
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_clk_mgr.c
@@ -13,7 +13,7 @@
#include "reg_helper.h"
#include "core_types.h"
#include "dm_helpers.h"
-#include "link.h"
+#include "link_service.h"
#include "dc_state_priv.h"
#include "atomfirmware.h"
@@ -162,7 +162,7 @@ static void dcn401_init_single_clock(struct clk_mgr_internal *clk_mgr, PPCLK_e c
unsigned int i;
char *entry_i = (char *)entry_0;
- uint32_t ret = dcn30_smu_get_dpm_freq_by_index(clk_mgr, clk, 0xFF);
+ uint32_t ret = dcn401_smu_get_dpm_freq_by_index(clk_mgr, clk, 0xFF);
if (ret & (1 << 31))
/* fine-grained, only min and max */
@@ -174,7 +174,7 @@ static void dcn401_init_single_clock(struct clk_mgr_internal *clk_mgr, PPCLK_e c
/* if the initial message failed, num_levels will be 0 */
for (i = 0; i < *num_levels && i < ARRAY_SIZE(clk_mgr->base.bw_params->clk_table.entries); i++) {
- *((unsigned int *)entry_i) = (dcn30_smu_get_dpm_freq_by_index(clk_mgr, clk, i) & 0xFFFF);
+ *((unsigned int *)entry_i) = (dcn401_smu_get_dpm_freq_by_index(clk_mgr, clk, i) & 0xFFFF);
entry_i += sizeof(clk_mgr->base.bw_params->clk_table.entries[0]);
}
}
@@ -231,20 +231,20 @@ void dcn401_init_clocks(struct clk_mgr *clk_mgr_base)
clk_mgr->smu_present = false;
clk_mgr->dpm_present = false;
- if (!clk_mgr_base->force_smu_not_present && dcn30_smu_get_smu_version(clk_mgr, &clk_mgr->smu_ver))
+ if (!clk_mgr_base->force_smu_not_present && dcn401_smu_get_smu_version(clk_mgr, &clk_mgr->smu_ver))
clk_mgr->smu_present = true;
if (!clk_mgr->smu_present)
return;
- dcn30_smu_check_driver_if_version(clk_mgr);
- dcn30_smu_check_msg_header_version(clk_mgr);
+ dcn401_smu_check_driver_if_version(clk_mgr);
+ dcn401_smu_check_msg_header_version(clk_mgr);
/* DCFCLK */
dcn401_init_single_clock(clk_mgr, PPCLK_DCFCLK,
&clk_mgr_base->bw_params->clk_table.entries[0].dcfclk_mhz,
&num_entries_per_clk->num_dcfclk_levels);
- clk_mgr_base->bw_params->dc_mode_limit.dcfclk_mhz = dcn30_smu_get_dc_mode_max_dpm_freq(clk_mgr, PPCLK_DCFCLK);
+ clk_mgr_base->bw_params->dc_mode_limit.dcfclk_mhz = dcn401_smu_get_dc_mode_max_dpm_freq(clk_mgr, PPCLK_DCFCLK);
if (num_entries_per_clk->num_dcfclk_levels && clk_mgr_base->bw_params->dc_mode_limit.dcfclk_mhz ==
clk_mgr_base->bw_params->clk_table.entries[num_entries_per_clk->num_dcfclk_levels - 1].dcfclk_mhz)
clk_mgr_base->bw_params->dc_mode_limit.dcfclk_mhz = 0;
@@ -253,7 +253,7 @@ void dcn401_init_clocks(struct clk_mgr *clk_mgr_base)
dcn401_init_single_clock(clk_mgr, PPCLK_SOCCLK,
&clk_mgr_base->bw_params->clk_table.entries[0].socclk_mhz,
&num_entries_per_clk->num_socclk_levels);
- clk_mgr_base->bw_params->dc_mode_limit.socclk_mhz = dcn30_smu_get_dc_mode_max_dpm_freq(clk_mgr, PPCLK_SOCCLK);
+ clk_mgr_base->bw_params->dc_mode_limit.socclk_mhz = dcn401_smu_get_dc_mode_max_dpm_freq(clk_mgr, PPCLK_SOCCLK);
if (num_entries_per_clk->num_socclk_levels && clk_mgr_base->bw_params->dc_mode_limit.socclk_mhz ==
clk_mgr_base->bw_params->clk_table.entries[num_entries_per_clk->num_socclk_levels - 1].socclk_mhz)
clk_mgr_base->bw_params->dc_mode_limit.socclk_mhz = 0;
@@ -263,7 +263,7 @@ void dcn401_init_clocks(struct clk_mgr *clk_mgr_base)
dcn401_init_single_clock(clk_mgr, PPCLK_DTBCLK,
&clk_mgr_base->bw_params->clk_table.entries[0].dtbclk_mhz,
&num_entries_per_clk->num_dtbclk_levels);
- clk_mgr_base->bw_params->dc_mode_limit.dtbclk_mhz = dcn30_smu_get_dc_mode_max_dpm_freq(clk_mgr, PPCLK_DTBCLK);
+ clk_mgr_base->bw_params->dc_mode_limit.dtbclk_mhz = dcn401_smu_get_dc_mode_max_dpm_freq(clk_mgr, PPCLK_DTBCLK);
if (num_entries_per_clk->num_dtbclk_levels && clk_mgr_base->bw_params->dc_mode_limit.dtbclk_mhz ==
clk_mgr_base->bw_params->clk_table.entries[num_entries_per_clk->num_dtbclk_levels - 1].dtbclk_mhz)
clk_mgr_base->bw_params->dc_mode_limit.dtbclk_mhz = 0;
@@ -273,7 +273,7 @@ void dcn401_init_clocks(struct clk_mgr *clk_mgr_base)
dcn401_init_single_clock(clk_mgr, PPCLK_DISPCLK,
&clk_mgr_base->bw_params->clk_table.entries[0].dispclk_mhz,
&num_entries_per_clk->num_dispclk_levels);
- clk_mgr_base->bw_params->dc_mode_limit.dispclk_mhz = dcn30_smu_get_dc_mode_max_dpm_freq(clk_mgr, PPCLK_DISPCLK);
+ clk_mgr_base->bw_params->dc_mode_limit.dispclk_mhz = dcn401_smu_get_dc_mode_max_dpm_freq(clk_mgr, PPCLK_DISPCLK);
if (num_entries_per_clk->num_dispclk_levels && clk_mgr_base->bw_params->dc_mode_limit.dispclk_mhz ==
clk_mgr_base->bw_params->clk_table.entries[num_entries_per_clk->num_dispclk_levels - 1].dispclk_mhz)
clk_mgr_base->bw_params->dc_mode_limit.dispclk_mhz = 0;
@@ -1318,8 +1318,8 @@ static void dcn401_notify_wm_ranges(struct clk_mgr *clk_mgr_base)
table->Watermarks.WatermarkRow[i].WmSetting = i;
table->Watermarks.WatermarkRow[i].Flags = clk_mgr->base.bw_params->wm_table.nv_entries[i].pmfw_breakdown.wm_type;
}
- dcn30_smu_set_dram_addr_high(clk_mgr, clk_mgr->wm_range_table_addr >> 32);
- dcn30_smu_set_dram_addr_low(clk_mgr, clk_mgr->wm_range_table_addr & 0xFFFFFFFF);
+ dcn401_smu_set_dram_addr_high(clk_mgr, clk_mgr->wm_range_table_addr >> 32);
+ dcn401_smu_set_dram_addr_low(clk_mgr, clk_mgr->wm_range_table_addr & 0xFFFFFFFF);
dcn401_smu_transfer_wm_table_dram_2_smu(clk_mgr);
}
@@ -1390,7 +1390,7 @@ static void dcn401_get_memclk_states_from_smu(struct clk_mgr *clk_mgr_base)
clk_mgr_base->bw_params->clk_table.entries[num_entries_per_clk->num_memclk_levels - 1].memclk_mhz;
}
- clk_mgr_base->bw_params->dc_mode_limit.memclk_mhz = dcn30_smu_get_dc_mode_max_dpm_freq(clk_mgr, PPCLK_UCLK);
+ clk_mgr_base->bw_params->dc_mode_limit.memclk_mhz = dcn401_smu_get_dc_mode_max_dpm_freq(clk_mgr, PPCLK_UCLK);
if (num_entries_per_clk->num_memclk_levels && clk_mgr_base->bw_params->dc_mode_limit.memclk_mhz ==
clk_mgr_base->bw_params->clk_table.entries[num_entries_per_clk->num_memclk_levels - 1].memclk_mhz)
clk_mgr_base->bw_params->dc_mode_limit.memclk_mhz = 0;
@@ -1399,7 +1399,7 @@ static void dcn401_get_memclk_states_from_smu(struct clk_mgr *clk_mgr_base)
dcn401_init_single_clock(clk_mgr, PPCLK_FCLK,
&clk_mgr_base->bw_params->clk_table.entries[0].fclk_mhz,
&num_entries_per_clk->num_fclk_levels);
- clk_mgr_base->bw_params->dc_mode_limit.fclk_mhz = dcn30_smu_get_dc_mode_max_dpm_freq(clk_mgr, PPCLK_FCLK);
+ clk_mgr_base->bw_params->dc_mode_limit.fclk_mhz = dcn401_smu_get_dc_mode_max_dpm_freq(clk_mgr, PPCLK_FCLK);
if (num_entries_per_clk->num_fclk_levels && clk_mgr_base->bw_params->dc_mode_limit.fclk_mhz ==
clk_mgr_base->bw_params->clk_table.entries[num_entries_per_clk->num_fclk_levels - 1].fclk_mhz)
clk_mgr_base->bw_params->dc_mode_limit.fclk_mhz = 0;
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_clk_mgr_smu_msg.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_clk_mgr_smu_msg.c
index 21c35528f61f..3a263840893e 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_clk_mgr_smu_msg.c
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_clk_mgr_smu_msg.c
@@ -57,6 +57,8 @@ static bool dcn401_smu_send_msg_with_param(struct clk_mgr_internal *clk_mgr, uin
/* Wait for response register to be ready */
dcn401_smu_wait_for_response(clk_mgr, 10, 200000);
+ TRACE_SMU_MSG_ENTER(msg_id, param_in, clk_mgr->base.ctx);
+
/* Clear response register */
REG_WRITE(DAL_RESP_REG, 0);
@@ -71,9 +73,11 @@ static bool dcn401_smu_send_msg_with_param(struct clk_mgr_internal *clk_mgr, uin
if (param_out)
*param_out = REG_READ(DAL_ARG_REG);
+ TRACE_SMU_MSG_EXIT(true, param_out ? *param_out : 0, clk_mgr->base.ctx);
return true;
}
+ TRACE_SMU_MSG_EXIT(false, 0, clk_mgr->base.ctx);
return false;
}
@@ -102,8 +106,6 @@ static uint32_t dcn401_smu_wait_for_response_delay(struct clk_mgr_internal *clk_
*total_delay_us += delay_us;
} while (max_retries--);
- TRACE_SMU_DELAY(*total_delay_us, clk_mgr->base.ctx);
-
return reg;
}
@@ -115,6 +117,8 @@ static bool dcn401_smu_send_msg_with_param_delay(struct clk_mgr_internal *clk_mg
/* Wait for response register to be ready */
dcn401_smu_wait_for_response_delay(clk_mgr, 10, 200000, &delay1_us);
+ TRACE_SMU_MSG_ENTER(msg_id, param_in, clk_mgr->base.ctx);
+
/* Clear response register */
REG_WRITE(DAL_RESP_REG, 0);
@@ -124,18 +128,71 @@ static bool dcn401_smu_send_msg_with_param_delay(struct clk_mgr_internal *clk_mg
/* Trigger the message transaction by writing the message ID */
REG_WRITE(DAL_MSG_REG, msg_id);
- TRACE_SMU_MSG(msg_id, param_in, clk_mgr->base.ctx);
-
/* Wait for response */
if (dcn401_smu_wait_for_response_delay(clk_mgr, 10, 200000, &delay2_us) == DALSMC_Result_OK) {
if (param_out)
*param_out = REG_READ(DAL_ARG_REG);
*total_delay_us = delay1_us + delay2_us;
+ TRACE_SMU_MSG_EXIT(true, param_out ? *param_out : 0, clk_mgr->base.ctx);
return true;
}
*total_delay_us = delay1_us + 2000000;
+ TRACE_SMU_MSG_EXIT(false, 0, clk_mgr->base.ctx);
+ return false;
+}
+
+bool dcn401_smu_get_smu_version(struct clk_mgr_internal *clk_mgr, unsigned int *version)
+{
+ smu_print("SMU Get SMU version\n");
+
+ if (dcn401_smu_send_msg_with_param(clk_mgr,
+ DALSMC_MSG_GetSmuVersion, 0, version)) {
+
+ smu_print("SMU version: %d\n", *version);
+
+ return true;
+ }
+
+ return false;
+}
+
+/* Message output should match SMU11_DRIVER_IF_VERSION in smu11_driver_if.h */
+bool dcn401_smu_check_driver_if_version(struct clk_mgr_internal *clk_mgr)
+{
+ uint32_t response = 0;
+
+ smu_print("SMU Check driver if version\n");
+
+ if (dcn401_smu_send_msg_with_param(clk_mgr,
+ DALSMC_MSG_GetDriverIfVersion, 0, &response)) {
+
+ smu_print("SMU driver if version: %d\n", response);
+
+ if (response == SMU14_DRIVER_IF_VERSION)
+ return true;
+ }
+
+ return false;
+}
+
+/* Message output should match DALSMC_VERSION in dalsmc.h */
+bool dcn401_smu_check_msg_header_version(struct clk_mgr_internal *clk_mgr)
+{
+ uint32_t response = 0;
+
+ smu_print("SMU Check msg header version\n");
+
+ if (dcn401_smu_send_msg_with_param(clk_mgr,
+ DALSMC_MSG_GetMsgHeaderVersion, 0, &response)) {
+
+ smu_print("SMU msg header version: %d\n", response);
+
+ if (response == DALSMC_VERSION)
+ return true;
+ }
+
return false;
}
@@ -163,6 +220,22 @@ void dcn401_smu_send_cab_for_uclk_message(struct clk_mgr_internal *clk_mgr, unsi
smu_print("Numways for SubVP : %d\n", num_ways);
}
+void dcn401_smu_set_dram_addr_high(struct clk_mgr_internal *clk_mgr, uint32_t addr_high)
+{
+ smu_print("SMU Set DRAM addr high: %d\n", addr_high);
+
+ dcn401_smu_send_msg_with_param(clk_mgr,
+ DALSMC_MSG_SetDalDramAddrHigh, addr_high, NULL);
+}
+
+void dcn401_smu_set_dram_addr_low(struct clk_mgr_internal *clk_mgr, uint32_t addr_low)
+{
+ smu_print("SMU Set DRAM addr low: %d\n", addr_low);
+
+ dcn401_smu_send_msg_with_param(clk_mgr,
+ DALSMC_MSG_SetDalDramAddrLow, addr_low, NULL);
+}
+
void dcn401_smu_transfer_wm_table_dram_2_smu(struct clk_mgr_internal *clk_mgr)
{
smu_print("SMU Transfer WM table DRAM 2 SMU\n");
@@ -348,3 +421,52 @@ unsigned int dcn401_smu_get_num_of_umc_channels(struct clk_mgr_internal *clk_mgr
return response;
}
+
+/*
+ * Frequency in MHz returned in lower 16 bits for valid DPM level
+ *
+ * Call with dpm_level = 0xFF to query features, return value will be:
+ * Bits 7:0 - number of DPM levels
+ * Bit 28 - 1 = auto DPM on
+ * Bit 29 - 1 = sweep DPM on
+ * Bit 30 - 1 = forced DPM on
+ * Bit 31 - 0 = discrete, 1 = fine-grained
+ *
+ * With fine-grained DPM, only min and max frequencies will be reported
+ *
+ * Returns 0 on failure
+ */
+unsigned int dcn401_smu_get_dpm_freq_by_index(struct clk_mgr_internal *clk_mgr, uint32_t clk, uint8_t dpm_level)
+{
+ uint32_t response = 0;
+
+ /* bits 23:16 for clock type, lower 8 bits for DPM level */
+ uint32_t param = (clk << 16) | dpm_level;
+
+ smu_print("SMU Get dpm freq by index: clk = %d, dpm_level = %d\n", clk, dpm_level);
+
+ dcn401_smu_send_msg_with_param(clk_mgr,
+ DALSMC_MSG_GetDpmFreqByIndex, param, &response);
+
+ smu_print("SMU dpm freq: %d MHz\n", response);
+
+ return response;
+}
+
+/* Returns the max DPM frequency in DC mode in MHz, 0 on failure */
+unsigned int dcn401_smu_get_dc_mode_max_dpm_freq(struct clk_mgr_internal *clk_mgr, uint32_t clk)
+{
+ uint32_t response = 0;
+
+ /* bits 23:16 for clock type */
+ uint32_t param = clk << 16;
+
+ smu_print("SMU Get DC mode max DPM freq: clk = %d\n", clk);
+
+ dcn401_smu_send_msg_with_param(clk_mgr,
+ DALSMC_MSG_GetDcModeMaxDpmFreq, param, &response);
+
+ smu_print("SMU DC mode max DMP freq: %d MHz\n", response);
+
+ return response;
+}
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_clk_mgr_smu_msg.h b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_clk_mgr_smu_msg.h
index e02eb1294b37..4f5ac603e822 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_clk_mgr_smu_msg.h
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_clk_mgr_smu_msg.h
@@ -7,11 +7,17 @@
#include "os_types.h"
#include "core_types.h"
-#include "dcn32/dcn32_clk_mgr_smu_msg.h"
+struct clk_mgr_internal;
+
+bool dcn401_smu_get_smu_version(struct clk_mgr_internal *clk_mgr, unsigned int *version);
+bool dcn401_smu_check_driver_if_version(struct clk_mgr_internal *clk_mgr);
+bool dcn401_smu_check_msg_header_version(struct clk_mgr_internal *clk_mgr);
void dcn401_smu_send_fclk_pstate_message(struct clk_mgr_internal *clk_mgr, bool support);
void dcn401_smu_send_uclk_pstate_message(struct clk_mgr_internal *clk_mgr, bool support);
void dcn401_smu_send_cab_for_uclk_message(struct clk_mgr_internal *clk_mgr, unsigned int num_ways);
+void dcn401_smu_set_dram_addr_high(struct clk_mgr_internal *clk_mgr, uint32_t addr_high);
+void dcn401_smu_set_dram_addr_low(struct clk_mgr_internal *clk_mgr, uint32_t addr_low);
void dcn401_smu_transfer_wm_table_dram_2_smu(struct clk_mgr_internal *clk_mgr);
void dcn401_smu_set_pme_workaround(struct clk_mgr_internal *clk_mgr);
unsigned int dcn401_smu_set_hard_min_by_freq(struct clk_mgr_internal *clk_mgr, uint32_t clk, uint16_t freq_mhz);
@@ -29,5 +35,7 @@ bool dcn401_smu_set_subvp_uclk_fclk_hardmin(struct clk_mgr_internal *clk_mgr,
void dcn401_smu_set_min_deep_sleep_dcef_clk(struct clk_mgr_internal *clk_mgr, uint32_t freq_mhz);
void dcn401_smu_set_num_of_displays(struct clk_mgr_internal *clk_mgr, uint32_t num_displays);
unsigned int dcn401_smu_get_num_of_umc_channels(struct clk_mgr_internal *clk_mgr);
+unsigned int dcn401_smu_get_dc_mode_max_dpm_freq(struct clk_mgr_internal *clk_mgr, uint32_t clk);
+unsigned int dcn401_smu_get_dpm_freq_by_index(struct clk_mgr_internal *clk_mgr, uint32_t clk, uint8_t dpm_level);
#endif /* __DCN401_CLK_MGR_SMU_MSG_H_ */
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c
index 5963019d1e74..5f2d5638c819 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc.c
@@ -60,7 +60,7 @@
#include "link_encoder.h"
#include "link_enc_cfg.h"
-#include "link.h"
+#include "link_service.h"
#include "dm_helpers.h"
#include "mem_input.h"
@@ -460,7 +460,7 @@ bool dc_stream_adjust_vmin_vmax(struct dc *dc,
* avoid conflicting with firmware updates.
*/
if (dc->ctx->dce_version > DCE_VERSION_MAX) {
- if ((dc->optimized_required || dc->wm_optimized_required) &&
+ if (dc->optimized_required &&
(stream->adjust.v_total_max != adjust->v_total_max ||
stream->adjust.v_total_min != adjust->v_total_min)) {
stream->adjust.timing_adjust_pending = true;
@@ -2577,7 +2577,6 @@ void dc_post_update_surfaces_to_stream(struct dc *dc)
}
dc->optimized_required = false;
- dc->wm_optimized_required = false;
}
bool dc_set_generic_gpio_for_stereo(bool enable,
@@ -3056,8 +3055,6 @@ enum surface_update_type dc_check_update_surfaces_for_stream(
} else if (memcmp(&dc->current_state->bw_ctx.bw.dcn.clk, &dc->clk_mgr->clks, offsetof(struct dc_clocks, prev_p_state_change_support)) != 0) {
dc->optimized_required = true;
}
-
- dc->optimized_required |= dc->wm_optimized_required;
}
return type;
@@ -3313,6 +3310,9 @@ static void copy_stream_update_to_stream(struct dc *dc,
if (update->adaptive_sync_infopacket)
stream->adaptive_sync_infopacket = *update->adaptive_sync_infopacket;
+ if (update->avi_infopacket)
+ stream->avi_infopacket = *update->avi_infopacket;
+
if (update->dither_option)
stream->dither_option = *update->dither_option;
@@ -3607,7 +3607,8 @@ static void commit_planes_do_stream_update(struct dc *dc,
stream_update->vsp_infopacket ||
stream_update->hfvsif_infopacket ||
stream_update->adaptive_sync_infopacket ||
- stream_update->vtem_infopacket) {
+ stream_update->vtem_infopacket ||
+ stream_update->avi_infopacket) {
resource_build_info_frame(pipe_ctx);
dc->hwss.update_info_frame(pipe_ctx);
@@ -5079,6 +5080,7 @@ static bool full_update_required(struct dc *dc,
stream_update->hfvsif_infopacket ||
stream_update->vtem_infopacket ||
stream_update->adaptive_sync_infopacket ||
+ stream_update->avi_infopacket ||
stream_update->dpms_off ||
stream_update->allow_freesync ||
stream_update->vrr_active_variable ||
@@ -5622,8 +5624,8 @@ void dc_allow_idle_optimizations_internal(struct dc *dc, bool allow, char const
subvp_pipe_type[i] = dc_state_get_pipe_subvp_type(context, pipe);
}
}
-
- DC_LOG_DC("%s: allow_idle=%d\n HardMinUClk_Khz=%d HardMinDramclk_Khz=%d\n Pipe_0=%d Pipe_1=%d Pipe_2=%d Pipe_3=%d Pipe_4=%d Pipe_5=%d (caller=%s)\n",
+ if (!dc->caps.is_apu)
+ DC_LOG_DC("%s: allow_idle=%d\n HardMinUClk_Khz=%d HardMinDramclk_Khz=%d\n Pipe_0=%d Pipe_1=%d Pipe_2=%d Pipe_3=%d Pipe_4=%d Pipe_5=%d (caller=%s)\n",
__func__, allow, idle_fclk_khz, idle_dramclk_khz, subvp_pipe_type[0], subvp_pipe_type[1], subvp_pipe_type[2],
subvp_pipe_type[3], subvp_pipe_type[4], subvp_pipe_type[5], caller_name);
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_enc_cfg.c b/drivers/gpu/drm/amd/display/dc/core/dc_link_enc_cfg.c
index 814f68d76257..a180f68f711c 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_link_enc_cfg.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_link_enc_cfg.c
@@ -24,7 +24,7 @@
#include "link_enc_cfg.h"
#include "resource.h"
-#include "link.h"
+#include "link_service.h"
#define DC_LOGGER dc->ctx->logger
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_exports.c b/drivers/gpu/drm/amd/display/dc/core/dc_link_exports.c
index b7a5de4ecb61..9acd30019717 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_link_exports.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_link_exports.c
@@ -33,8 +33,9 @@
* dc.h with detail interface documentation, then add function implementation
* in this file which calls link functions.
*/
-#include "link.h"
+#include "link_service.h"
#include "dce/dce_i2c.h"
+
struct dc_link *dc_get_link_at_index(struct dc *dc, uint32_t link_index)
{
if (link_index >= MAX_LINKS)
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c
index d712548b1927..bc5dedf5f60c 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c
@@ -40,7 +40,7 @@
#include "virtual/virtual_stream_encoder.h"
#include "dpcd_defs.h"
#include "link_enc_cfg.h"
-#include "link.h"
+#include "link_service.h"
#include "clk_mgr.h"
#include "dc_state_priv.h"
#include "dc_stream_priv.h"
@@ -95,7 +95,6 @@
#define DC_LOGGER \
dc->ctx->logger
#define DC_LOGGER_INIT(logger)
-
#include "dml2/dml2_wrapper.h"
#define UNABLE_TO_SPLIT -1
@@ -2149,7 +2148,7 @@ int resource_get_odm_slice_dst_width(struct pipe_ctx *otg_master,
h_active = timing->h_addressable +
timing->h_border_left +
timing->h_border_right +
- otg_master->hblank_borrow;
+ otg_master->dsc_padding_params.dsc_hactive_padding;
width = h_active / count;
if (otg_master->stream_res.tg)
@@ -4267,39 +4266,33 @@ fail:
return res;
}
+#if defined(CONFIG_DRM_AMD_DC_FP)
+#endif /* CONFIG_DRM_AMD_DC_FP */
+
/**
- * decide_hblank_borrow - Decides the horizontal blanking borrow value for a given pipe context.
+ * calculate_timing_params_for_dsc_with_padding - Calculates timing parameters for DSC with padding.
* @pipe_ctx: Pointer to the pipe context structure.
*
- * This function calculates the horizontal blanking borrow value for a given pipe context based on the
+ * This function calculates the timing parameters for a given pipe context based on the
* display stream compression (DSC) configuration. If the horizontal active pixels (hactive) are less
- * than the total width of the DSC slices, it sets the hblank_borrow value to the difference. If the
- * total horizontal timing minus the hblank_borrow value is less than 32, it resets the hblank_borrow
+ * than the total width of the DSC slices, it sets the dsc_hactive_padding value to the difference. If the
+ * total horizontal timing minus the dsc_hactive_padding value is less than 32, it resets the dsc_hactive_padding
* value to 0.
*/
-static void decide_hblank_borrow(struct pipe_ctx *pipe_ctx)
+static void calculate_timing_params_for_dsc_with_padding(struct pipe_ctx *pipe_ctx)
{
- uint32_t hactive;
- uint32_t ceil_slice_width;
struct dc_stream_state *stream = NULL;
if (!pipe_ctx)
return;
stream = pipe_ctx->stream;
+ pipe_ctx->dsc_padding_params.dsc_hactive_padding = 0;
+ pipe_ctx->dsc_padding_params.dsc_htotal_padding = 0;
- if (stream->timing.flags.DSC) {
- hactive = stream->timing.h_addressable + stream->timing.h_border_left + stream->timing.h_border_right;
-
- /* Assume if determined slices does not divide Hactive evenly, Hborrow is needed for padding*/
- if (hactive % stream->timing.dsc_cfg.num_slices_h != 0) {
- ceil_slice_width = (hactive / stream->timing.dsc_cfg.num_slices_h) + 1;
- pipe_ctx->hblank_borrow = ceil_slice_width * stream->timing.dsc_cfg.num_slices_h - hactive;
+ if (stream)
+ pipe_ctx->dsc_padding_params.dsc_pix_clk_100hz = stream->timing.pix_clk_100hz;
- if (stream->timing.h_total - hactive - pipe_ctx->hblank_borrow < 32)
- pipe_ctx->hblank_borrow = 0;
- }
- }
}
/**
@@ -4342,7 +4335,7 @@ enum dc_status dc_validate_global_state(
/* Decide whether hblank borrow is needed and save it in pipe_ctx */
if (dc->debug.enable_hblank_borrow)
- decide_hblank_borrow(pipe_ctx);
+ calculate_timing_params_for_dsc_with_padding(pipe_ctx);
if (dc->res_pool->funcs->patch_unknown_plane_state &&
pipe_ctx->plane_state &&
@@ -4417,8 +4410,14 @@ static void set_avi_info_frame(
unsigned int fr_ind = pipe_ctx->stream->timing.fr_index;
enum dc_timing_3d_format format;
+ if (stream->avi_infopacket.valid) {
+ *info_packet = stream->avi_infopacket;
+ return;
+ }
+
memset(&hdmi_info, 0, sizeof(union hdmi_info_packet));
+
color_space = pipe_ctx->stream->output_color_space;
if (color_space == COLOR_SPACE_UNKNOWN)
color_space = (stream->timing.pixel_encoding == PIXEL_ENCODING_RGB) ?
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_state.c b/drivers/gpu/drm/amd/display/dc/core/dc_state.c
index 883054bb18e7..c61300a7cb1c 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_state.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_state.c
@@ -211,7 +211,7 @@ struct dc_state *dc_state_create(struct dc *dc, struct dc_state_create_params *p
return NULL;
}
- if (!dml2_create(dc, &dc->dml2_dc_power_options, &state->bw_ctx.dml2_dc_power_source)) {
+ if (dc->caps.dcmode_power_limits_present && !dml2_create(dc, &dc->dml2_dc_power_options, &state->bw_ctx.dml2_dc_power_source)) {
dc_state_release(state);
return NULL;
}
diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h
index 954a2786fbe2..98f0b6b3c213 100644
--- a/drivers/gpu/drm/amd/display/dc/dc.h
+++ b/drivers/gpu/drm/amd/display/dc/dc.h
@@ -55,7 +55,7 @@ struct aux_payload;
struct set_config_cmd_payload;
struct dmub_notification;
-#define DC_VER "3.2.349"
+#define DC_VER "3.2.351"
/**
* MAX_SURFACES - representative of the upper bound of surfaces that can be piped to a single CRTC
@@ -1163,6 +1163,7 @@ struct dc_debug_options {
unsigned int auxless_alpm_lfps_silence_ns;
unsigned int auxless_alpm_lfps_t1t2_us;
short auxless_alpm_lfps_t1t2_offset_us;
+ bool disable_stutter_for_wm_program;
};
@@ -1391,7 +1392,6 @@ union surface_update_flags {
uint32_t in_transfer_func_change:1;
uint32_t input_csc_change:1;
uint32_t coeff_reduction_change:1;
- uint32_t output_tf_change:1;
uint32_t pixel_format_change:1;
uint32_t plane_size_change:1;
uint32_t gamut_remap_change:1;
@@ -1735,7 +1735,6 @@ struct dc {
/* Require to optimize clocks and bandwidth for added/removed planes */
bool optimized_required;
- bool wm_optimized_required;
bool idle_optimizations_allowed;
bool enable_c20_dtm_b0;
diff --git a/drivers/gpu/drm/amd/display/dc/dc_helper.c b/drivers/gpu/drm/amd/display/dc/dc_helper.c
index 51e41aed7316..5a365bd19933 100644
--- a/drivers/gpu/drm/amd/display/dc/dc_helper.c
+++ b/drivers/gpu/drm/amd/display/dc/dc_helper.c
@@ -755,3 +755,8 @@ char *dce_version_to_string(const int version)
return "Unknown";
}
}
+
+bool dc_supports_vrr(const enum dce_version v)
+{
+ return v >= DCE_VERSION_8_0;
+}
diff --git a/drivers/gpu/drm/amd/display/dc/dc_spl_translate.c b/drivers/gpu/drm/amd/display/dc/dc_spl_translate.c
index 7f57661433eb..55704d4457ef 100644
--- a/drivers/gpu/drm/amd/display/dc/dc_spl_translate.c
+++ b/drivers/gpu/drm/amd/display/dc/dc_spl_translate.c
@@ -128,7 +128,7 @@ void translate_SPL_in_params_from_pipe_ctx(struct pipe_ctx *pipe_ctx, struct spl
spl_in->odm_slice_index = resource_get_odm_slice_index(pipe_ctx);
// Make spl input basic out info output_size width point to stream h active
spl_in->basic_out.output_size.width =
- stream->timing.h_addressable + stream->timing.h_border_left + stream->timing.h_border_right + pipe_ctx->hblank_borrow;
+ stream->timing.h_addressable + stream->timing.h_border_left + stream->timing.h_border_right + pipe_ctx->dsc_padding_params.dsc_hactive_padding;
// Make spl input basic out info output_size height point to v active
spl_in->basic_out.output_size.height =
stream->timing.v_addressable + stream->timing.v_border_bottom + stream->timing.v_border_top;
diff --git a/drivers/gpu/drm/amd/display/dc/dc_stream.h b/drivers/gpu/drm/amd/display/dc/dc_stream.h
index 5fc6fea211de..76cf9fdedab0 100644
--- a/drivers/gpu/drm/amd/display/dc/dc_stream.h
+++ b/drivers/gpu/drm/amd/display/dc/dc_stream.h
@@ -203,6 +203,7 @@ struct dc_stream_state {
struct dc_info_packet hfvsif_infopacket;
struct dc_info_packet vtem_infopacket;
struct dc_info_packet adaptive_sync_infopacket;
+ struct dc_info_packet avi_infopacket;
uint8_t dsc_packed_pps[128];
struct rect src; /* composition area */
struct rect dst; /* stream addressable area */
@@ -335,6 +336,8 @@ struct dc_stream_update {
struct dc_info_packet *hfvsif_infopacket;
struct dc_info_packet *vtem_infopacket;
struct dc_info_packet *adaptive_sync_infopacket;
+ struct dc_info_packet *avi_infopacket;
+
bool *dpms_off;
bool integer_scaling_update;
bool *allow_freesync;
diff --git a/drivers/gpu/drm/amd/display/dc/dc_types.h b/drivers/gpu/drm/amd/display/dc/dc_types.h
index 619834a328a3..b5aa03a3e39c 100644
--- a/drivers/gpu/drm/amd/display/dc/dc_types.h
+++ b/drivers/gpu/drm/amd/display/dc/dc_types.h
@@ -1217,6 +1217,7 @@ struct dc_panel_config {
bool rc_disable;
bool rc_allow_static_screen;
bool rc_allow_fullscreen_VPB;
+ bool read_psrcap_again;
unsigned int replay_enable_option;
} psr;
/* ABM */
diff --git a/drivers/gpu/drm/amd/display/dc/dccg/dcn35/dcn35_dccg.c b/drivers/gpu/drm/amd/display/dc/dccg/dcn35/dcn35_dccg.c
index 0ce9489ac6b7..de6d62401362 100644
--- a/drivers/gpu/drm/amd/display/dc/dccg/dcn35/dcn35_dccg.c
+++ b/drivers/gpu/drm/amd/display/dc/dccg/dcn35/dcn35_dccg.c
@@ -39,6 +39,7 @@
#define CTX \
dccg_dcn->base.ctx
+#include "logger_types.h"
#define DC_LOGGER \
dccg->ctx->logger
@@ -1136,7 +1137,7 @@ static void dcn35_set_dppclk_enable(struct dccg *dccg,
default:
break;
}
- //DC_LOG_DEBUG("%s: dpp_inst(%d) DPPCLK_EN = %d\n", __func__, dpp_inst, enable);
+ DC_LOG_DEBUG("%s: dpp_inst(%d) DPPCLK_EN = %d\n", __func__, dpp_inst, enable);
}
@@ -1406,6 +1407,10 @@ static void dccg35_set_dtbclk_dto(
* PIPEx_DTO_SRC_SEL should not be programmed during DTBCLK update since OTG may still be on, and the
* programming is handled in program_pix_clk() regardless, so it can be removed from here.
*/
+ DC_LOG_DEBUG("%s: OTG%d DTBCLK DTO enabled: pixclk_khz=%d, ref_dtbclk_khz=%d, req_dtbclk_khz=%d, phase=%d, modulo=%d\n",
+ __func__, params->otg_inst, params->pixclk_khz,
+ params->ref_dtbclk_khz, req_dtbclk_khz, phase, modulo);
+
} else {
switch (params->otg_inst) {
case 0:
@@ -1431,6 +1436,8 @@ static void dccg35_set_dtbclk_dto(
REG_WRITE(DTBCLK_DTO_MODULO[params->otg_inst], 0);
REG_WRITE(DTBCLK_DTO_PHASE[params->otg_inst], 0);
+
+ DC_LOG_DEBUG("%s: OTG%d DTBCLK DTO disabled\n", __func__, params->otg_inst);
}
}
@@ -1475,6 +1482,8 @@ static void dccg35_set_dpstreamclk(
BREAK_TO_DEBUGGER();
return;
}
+ DC_LOG_DEBUG("%s: dp_hpo_inst(%d) DPSTREAMCLK_EN = %d, DPSTREAMCLK_SRC_SEL = %d\n",
+ __func__, dp_hpo_inst, (src == REFCLK) ? 0 : 1, otg_inst);
}
@@ -1514,6 +1523,8 @@ static void dccg35_set_dpstreamclk_root_clock_gating(
BREAK_TO_DEBUGGER();
return;
}
+ DC_LOG_DEBUG("%s: dp_hpo_inst(%d) DPSTREAMCLK_ROOT_GATE_DISABLE = %d\n",
+ __func__, dp_hpo_inst, enable ? 1 : 0);
}
@@ -1553,7 +1564,7 @@ static void dccg35_set_physymclk_root_clock_gating(
BREAK_TO_DEBUGGER();
return;
}
- //DC_LOG_DEBUG("%s: dpp_inst(%d) PHYESYMCLK_ROOT_GATE_DISABLE:\n", __func__, phy_inst, enable ? 0 : 1);
+ DC_LOG_DEBUG("%s: dpp_inst(%d) PHYESYMCLK_ROOT_GATE_DISABLE: %d\n", __func__, phy_inst, enable ? 0 : 1);
}
@@ -1626,6 +1637,8 @@ static void dccg35_set_physymclk(
BREAK_TO_DEBUGGER();
return;
}
+ DC_LOG_DEBUG("%s: phy_inst(%d) PHYxSYMCLK_EN = %d, PHYxSYMCLK_SRC_SEL = %d\n",
+ __func__, phy_inst, force_enable ? 1 : 0, clk_src);
}
static void dccg35_set_valid_pixel_rate(
@@ -1673,6 +1686,7 @@ static void dccg35_dpp_root_clock_control(
}
dccg->dpp_clock_gated[dpp_inst] = !clock_on;
+ DC_LOG_DEBUG("%s: dpp_inst(%d) clock_on = %d\n", __func__, dpp_inst, clock_on);
}
static void dccg35_disable_symclk32_se(
@@ -1731,6 +1745,7 @@ static void dccg35_disable_symclk32_se(
BREAK_TO_DEBUGGER();
return;
}
+
}
static void dccg35_init_cb(struct dccg *dccg)
@@ -1738,7 +1753,6 @@ static void dccg35_init_cb(struct dccg *dccg)
(void)dccg;
/* Any RCG should be done when driver enter low power mode*/
}
-
void dccg35_init(struct dccg *dccg)
{
int otg_inst;
@@ -1753,6 +1767,8 @@ void dccg35_init(struct dccg *dccg)
for (otg_inst = 0; otg_inst < 2; otg_inst++) {
dccg31_disable_symclk32_le(dccg, otg_inst);
dccg31_set_symclk32_le_root_clock_gating(dccg, otg_inst, false);
+ DC_LOG_DEBUG("%s: OTG%d SYMCLK32_LE disabled and root clock gating disabled\n",
+ __func__, otg_inst);
}
// if (dccg->ctx->dc->debug.root_clock_optimization.bits.symclk32_se)
@@ -1765,6 +1781,8 @@ void dccg35_init(struct dccg *dccg)
dccg35_set_dpstreamclk(dccg, REFCLK, otg_inst,
otg_inst);
dccg35_set_dpstreamclk_root_clock_gating(dccg, otg_inst, false);
+ DC_LOG_DEBUG("%s: OTG%d DPSTREAMCLK disabled and root clock gating disabled\n",
+ __func__, otg_inst);
}
/*
diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_transform.c b/drivers/gpu/drm/amd/display/dc/dce/dce_transform.c
index 2b1673d69ea8..1ab5ae9b5ea5 100644
--- a/drivers/gpu/drm/amd/display/dc/dce/dce_transform.c
+++ b/drivers/gpu/drm/amd/display/dc/dce/dce_transform.c
@@ -154,10 +154,13 @@ static bool dce60_setup_scaling_configuration(
REG_SET(SCL_BYPASS_CONTROL, 0, SCL_BYPASS_MODE, 0);
if (data->taps.h_taps + data->taps.v_taps <= 2) {
- /* Set bypass */
-
- /* DCE6 has no SCL_MODE register, skip scale mode programming */
+ /* Disable scaler functionality */
+ REG_WRITE(SCL_SCALER_ENABLE, 0);
+ /* Clear registers that can cause glitches even when the scaler is off */
+ REG_WRITE(SCL_TAP_CONTROL, 0);
+ REG_WRITE(SCL_AUTOMATIC_MODE_CONTROL, 0);
+ REG_WRITE(SCL_F_SHARP_CONTROL, 0);
return false;
}
@@ -165,7 +168,7 @@ static bool dce60_setup_scaling_configuration(
SCL_H_NUM_OF_TAPS, data->taps.h_taps - 1,
SCL_V_NUM_OF_TAPS, data->taps.v_taps - 1);
- /* DCE6 has no SCL_MODE register, skip scale mode programming */
+ REG_WRITE(SCL_SCALER_ENABLE, 1);
/* DCE6 has no SCL_BOUNDARY_MODE bit, skip replace out of bound pixels */
@@ -502,6 +505,8 @@ static void dce60_transform_set_scaler(
REG_SET(DC_LB_MEM_SIZE, 0,
DC_LB_MEM_SIZE, xfm_dce->lb_memory_size);
+ REG_WRITE(SCL_UPDATE, 0x00010000);
+
/* Clear SCL_F_SHARP_CONTROL value to 0 */
REG_WRITE(SCL_F_SHARP_CONTROL, 0);
@@ -527,8 +532,7 @@ static void dce60_transform_set_scaler(
if (coeffs_v != xfm_dce->filter_v || coeffs_h != xfm_dce->filter_h) {
/* 4. Program vertical filters */
if (xfm_dce->filter_v == NULL)
- REG_SET(SCL_VERT_FILTER_CONTROL, 0,
- SCL_V_2TAP_HARDCODE_COEF_EN, 0);
+ REG_WRITE(SCL_VERT_FILTER_CONTROL, 0);
program_multi_taps_filter(
xfm_dce,
data->taps.v_taps,
@@ -542,8 +546,7 @@ static void dce60_transform_set_scaler(
/* 5. Program horizontal filters */
if (xfm_dce->filter_h == NULL)
- REG_SET(SCL_HORZ_FILTER_CONTROL, 0,
- SCL_H_2TAP_HARDCODE_COEF_EN, 0);
+ REG_WRITE(SCL_HORZ_FILTER_CONTROL, 0);
program_multi_taps_filter(
xfm_dce,
data->taps.h_taps,
@@ -566,6 +569,8 @@ static void dce60_transform_set_scaler(
/* DCE6 has no SCL_COEF_UPDATE_COMPLETE bit to flip to new coefficient memory */
/* DCE6 DATA_FORMAT register does not support ALPHA_EN */
+
+ REG_WRITE(SCL_UPDATE, 0);
}
#endif
diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_transform.h b/drivers/gpu/drm/amd/display/dc/dce/dce_transform.h
index cbce194ec7b8..eb716e8337e2 100644
--- a/drivers/gpu/drm/amd/display/dc/dce/dce_transform.h
+++ b/drivers/gpu/drm/amd/display/dc/dce/dce_transform.h
@@ -155,6 +155,9 @@
SRI(SCL_COEF_RAM_TAP_DATA, SCL, id), \
SRI(VIEWPORT_START, SCL, id), \
SRI(VIEWPORT_SIZE, SCL, id), \
+ SRI(SCL_SCALER_ENABLE, SCL, id), \
+ SRI(SCL_HORZ_FILTER_INIT_RGB_LUMA, SCL, id), \
+ SRI(SCL_HORZ_FILTER_INIT_CHROMA, SCL, id), \
SRI(SCL_HORZ_FILTER_SCALE_RATIO, SCL, id), \
SRI(SCL_VERT_FILTER_SCALE_RATIO, SCL, id), \
SRI(SCL_VERT_FILTER_INIT, SCL, id), \
@@ -590,6 +593,7 @@ struct dce_transform_registers {
uint32_t SCL_VERT_FILTER_SCALE_RATIO;
uint32_t SCL_HORZ_FILTER_INIT;
#if defined(CONFIG_DRM_AMD_DC_SI)
+ uint32_t SCL_SCALER_ENABLE;
uint32_t SCL_HORZ_FILTER_INIT_RGB_LUMA;
uint32_t SCL_HORZ_FILTER_INIT_CHROMA;
#endif
diff --git a/drivers/gpu/drm/amd/display/dc/dce/dmub_replay.c b/drivers/gpu/drm/amd/display/dc/dce/dmub_replay.c
index 65b979617b0c..f9542edff14b 100644
--- a/drivers/gpu/drm/amd/display/dc/dce/dmub_replay.c
+++ b/drivers/gpu/drm/amd/display/dc/dce/dmub_replay.c
@@ -3,7 +3,7 @@
// Copyright 2024 Advanced Micro Devices, Inc.
#include "dc.h"
-#include "link.h"
+#include "link_service.h"
#include "dc_dmub_srv.h"
#include "dmub/dmub_srv.h"
#include "core_types.h"
@@ -169,6 +169,7 @@ static bool dmub_replay_copy_settings(struct dmub_replay *dmub,
copy_settings_data->max_deviation_line = link->dpcd_caps.pr_info.max_deviation_line;
copy_settings_data->smu_optimizations_en = link->replay_settings.replay_smu_opt_enable;
copy_settings_data->replay_timing_sync_supported = link->replay_settings.config.replay_timing_sync_supported;
+ copy_settings_data->replay_support_fast_resync_in_ultra_sleep_mode = link->replay_settings.config.replay_support_fast_resync_in_ultra_sleep_mode;
copy_settings_data->debug.bitfields.enable_ips_visual_confirm = dc->dc->debug.enable_ips_visual_confirm;
diff --git a/drivers/gpu/drm/amd/display/dc/dio/dcn10/dcn10_stream_encoder.c b/drivers/gpu/drm/amd/display/dc/dio/dcn10/dcn10_stream_encoder.c
index 22e66b375a7f..d928b4dcf6b8 100644
--- a/drivers/gpu/drm/amd/display/dc/dio/dcn10/dcn10_stream_encoder.c
+++ b/drivers/gpu/drm/amd/display/dc/dio/dcn10/dcn10_stream_encoder.c
@@ -28,7 +28,7 @@
#include "dcn10_stream_encoder.h"
#include "reg_helper.h"
#include "hw_shared.h"
-#include "link.h"
+#include "link_service.h"
#include "dpcd_defs.h"
#include "dcn30/dcn30_afmt.h"
diff --git a/drivers/gpu/drm/amd/display/dc/dio/dcn20/dcn20_stream_encoder.c b/drivers/gpu/drm/amd/display/dc/dio/dcn20/dcn20_stream_encoder.c
index 0b47aeb60e79..bec0b4aaeb2b 100644
--- a/drivers/gpu/drm/amd/display/dc/dio/dcn20/dcn20_stream_encoder.c
+++ b/drivers/gpu/drm/amd/display/dc/dio/dcn20/dcn20_stream_encoder.c
@@ -29,7 +29,7 @@
#include "dcn20_stream_encoder.h"
#include "reg_helper.h"
#include "hw_shared.h"
-#include "link.h"
+#include "link_service.h"
#include "dpcd_defs.h"
#define DC_LOGGER \
diff --git a/drivers/gpu/drm/amd/display/dc/dio/dcn31/dcn31_dio_link_encoder.c b/drivers/gpu/drm/amd/display/dc/dio/dcn31/dcn31_dio_link_encoder.c
index 9a92f73d5b7f..84cc2ddc52fe 100644
--- a/drivers/gpu/drm/amd/display/dc/dio/dcn31/dcn31_dio_link_encoder.c
+++ b/drivers/gpu/drm/amd/display/dc/dio/dcn31/dcn31_dio_link_encoder.c
@@ -37,7 +37,7 @@
#include "link_enc_cfg.h"
#include "dc_dmub_srv.h"
#include "dal_asic_id.h"
-#include "link.h"
+#include "link_service.h"
#define CTX \
enc10->base.ctx
diff --git a/drivers/gpu/drm/amd/display/dc/dio/dcn314/dcn314_dio_stream_encoder.c b/drivers/gpu/drm/amd/display/dc/dio/dcn314/dcn314_dio_stream_encoder.c
index ae81451a3a72..3e85e9c3d2cb 100644
--- a/drivers/gpu/drm/amd/display/dc/dio/dcn314/dcn314_dio_stream_encoder.c
+++ b/drivers/gpu/drm/amd/display/dc/dio/dcn314/dcn314_dio_stream_encoder.c
@@ -30,7 +30,7 @@
#include "dcn314_dio_stream_encoder.h"
#include "reg_helper.h"
#include "hw_shared.h"
-#include "link.h"
+#include "link_service.h"
#include "dpcd_defs.h"
#define DC_LOGGER \
diff --git a/drivers/gpu/drm/amd/display/dc/dio/dcn32/dcn32_dio_stream_encoder.c b/drivers/gpu/drm/amd/display/dc/dio/dcn32/dcn32_dio_stream_encoder.c
index 1a9bb614c41e..3523d1cdc1a3 100644
--- a/drivers/gpu/drm/amd/display/dc/dio/dcn32/dcn32_dio_stream_encoder.c
+++ b/drivers/gpu/drm/amd/display/dc/dio/dcn32/dcn32_dio_stream_encoder.c
@@ -29,7 +29,7 @@
#include "dcn32_dio_stream_encoder.h"
#include "reg_helper.h"
#include "hw_shared.h"
-#include "link.h"
+#include "link_service.h"
#include "dpcd_defs.h"
#define DC_LOGGER \
diff --git a/drivers/gpu/drm/amd/display/dc/dio/dcn35/dcn35_dio_stream_encoder.c b/drivers/gpu/drm/amd/display/dc/dio/dcn35/dcn35_dio_stream_encoder.c
index 6f30b6cc3c76..fd5d1dbf9dc6 100644
--- a/drivers/gpu/drm/amd/display/dc/dio/dcn35/dcn35_dio_stream_encoder.c
+++ b/drivers/gpu/drm/amd/display/dc/dio/dcn35/dcn35_dio_stream_encoder.c
@@ -29,7 +29,7 @@
#include "dcn35_dio_stream_encoder.h"
#include "reg_helper.h"
#include "hw_shared.h"
-#include "link.h"
+#include "link_service.h"
#include "dpcd_defs.h"
#define DC_LOGGER \
diff --git a/drivers/gpu/drm/amd/display/dc/dio/dcn401/dcn401_dio_stream_encoder.c b/drivers/gpu/drm/amd/display/dc/dio/dcn401/dcn401_dio_stream_encoder.c
index d5fa551dd3c9..99aab70ef3e1 100644
--- a/drivers/gpu/drm/amd/display/dc/dio/dcn401/dcn401_dio_stream_encoder.c
+++ b/drivers/gpu/drm/amd/display/dc/dio/dcn401/dcn401_dio_stream_encoder.c
@@ -32,7 +32,7 @@
#include "dcn401_dio_stream_encoder.h"
#include "reg_helper.h"
#include "hw_shared.h"
-#include "link.h"
+#include "link_service.h"
#include "dpcd_defs.h"
#define DC_LOGGER \
diff --git a/drivers/gpu/drm/amd/display/dc/dm_services.h b/drivers/gpu/drm/amd/display/dc/dm_services.h
index 7b9c22c45453..fbbf9c757b3c 100644
--- a/drivers/gpu/drm/amd/display/dc/dm_services.h
+++ b/drivers/gpu/drm/amd/display/dc/dm_services.h
@@ -277,12 +277,13 @@ void dm_perf_trace_timestamp(const char *func_name, unsigned int line, struct dc
/*
* SMU message tracing
*/
-void dm_trace_smu_msg(uint32_t msg_id, uint32_t param_in, struct dc_context *ctx);
-void dm_trace_smu_delay(uint32_t delay, struct dc_context *ctx);
-
-#define TRACE_SMU_MSG(msg_id, param_in, ctx) dm_trace_smu_msg(msg_id, param_in, ctx)
-#define TRACE_SMU_DELAY(response_delay, ctx) dm_trace_smu_delay(response_delay, ctx)
+void dm_trace_smu_enter(uint32_t msg_id, uint32_t param_in, unsigned int delay, struct dc_context *ctx);
+void dm_trace_smu_exit(bool success, uint32_t response, struct dc_context *ctx);
+#define TRACE_SMU_MSG_DELAY(msg_id, param_in, delay, ctx) dm_trace_smu_enter(msg_id, param_in, delay, ctx)
+#define TRACE_SMU_MSG(msg_id, param_in, ctx) dm_trace_smu_enter(msg_id, param_in, 0, ctx)
+#define TRACE_SMU_MSG_ENTER(msg_id, param_in, ctx) dm_trace_smu_enter(msg_id, param_in, 0, ctx)
+#define TRACE_SMU_MSG_EXIT(success, response, ctx) dm_trace_smu_exit(success, response, ctx)
/*
* DMUB Interfaces
@@ -311,4 +312,6 @@ void dm_dtn_log_end(struct dc_context *ctx,
char *dce_version_to_string(const int version);
+bool dc_supports_vrr(const enum dce_version v);
+
#endif /* __DM_SERVICES_H__ */
diff --git a/drivers/gpu/drm/amd/display/dc/dm_services_types.h b/drivers/gpu/drm/amd/display/dc/dm_services_types.h
index bf63da266a18..3b093b8699ab 100644
--- a/drivers/gpu/drm/amd/display/dc/dm_services_types.h
+++ b/drivers/gpu/drm/amd/display/dc/dm_services_types.h
@@ -127,7 +127,7 @@ struct dm_pp_single_disp_config {
uint32_t src_height;
uint32_t src_width;
uint32_t v_refresh;
- uint32_t sym_clock; /* HDMI only */
+ uint32_t pixel_clock; /* Pixel clock in KHz (for HDMI only: normalized) */
struct dc_link_settings link_settings; /* DP only */
};
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c
index 2a2eaf6adf26..7aaf13bbd4e4 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c
@@ -30,8 +30,7 @@
#include "dcn20/dcn20_resource.h"
#include "dcn21/dcn21_resource.h"
#include "clk_mgr/dcn21/rn_clk_mgr.h"
-
-#include "link.h"
+#include "link_service.h"
#include "dcn20_fpu.h"
#include "dc_state_priv.h"
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.c
index 17a21bcbde17..1a28061bb9ff 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.c
@@ -808,6 +808,8 @@ void dcn316_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_param
int dcn_get_max_non_odm_pix_rate_100hz(struct _vcs_dpi_soc_bounding_box_st *soc)
{
+ dc_assert_fp_enabled();
+
return soc->clock_limits[0].dispclk_mhz * 10000.0 / (1.0 + soc->dcn_downspread_percent / 100.0);
}
@@ -815,6 +817,8 @@ int dcn_get_approx_det_segs_required_for_pstate(
struct _vcs_dpi_soc_bounding_box_st *soc,
int pix_clk_100hz, int bpp, int seg_size_kb)
{
+ dc_assert_fp_enabled();
+
/* Roughly calculate required crb to hide latency. In practice there is slightly
* more buffer available for latency hiding
*/
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c
index 18388fb00be8..8a0f128722b0 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c
@@ -31,7 +31,7 @@
// We need this includes for WATERMARKS_* defines
#include "clk_mgr/dcn32/dcn32_smu13_driver_if.h"
#include "dcn30/dcn30_resource.h"
-#include "link.h"
+#include "link_service.h"
#include "dc_state_priv.h"
#define DC_LOGGER_INIT(logger)
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c
index 5d73efa2f0c9..817a370e80a7 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c
@@ -31,7 +31,7 @@
#include "dml/dcn31/dcn31_fpu.h"
#include "dml/dml_inline_defs.h"
-#include "link.h"
+#include "link_service.h"
#define DC_LOGGER_INIT(logger)
@@ -445,6 +445,8 @@ int dcn35_populate_dml_pipes_from_context_fpu(struct dc *dc,
bool upscaled = false;
const unsigned int max_allowed_vblank_nom = 1023;
+ dc_assert_fp_enabled();
+
dcn31_populate_dml_pipes_from_context(dc, context, pipes,
validate_mode);
@@ -498,9 +500,7 @@ int dcn35_populate_dml_pipes_from_context_fpu(struct dc *dc,
pipes[pipe_cnt].pipe.src.unbounded_req_mode = false;
- DC_FP_START();
dcn31_zero_pipe_dcc_fraction(pipes, pipe_cnt);
- DC_FP_END();
pipes[pipe_cnt].pipe.dest.vfront_porch = timing->v_front_porch;
pipes[pipe_cnt].pipe.src.dcc_rate = 3;
@@ -581,6 +581,8 @@ void dcn35_decide_zstate_support(struct dc *dc, struct dc_state *context)
unsigned int i, plane_count = 0;
DC_LOGGER_INIT(dc->ctx->logger);
+ dc_assert_fp_enabled();
+
for (i = 0; i < dc->res_pool->pipe_count; i++) {
if (context->res_ctx.pipe_ctx[i].plane_state)
plane_count++;
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn351/dcn351_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn351/dcn351_fpu.c
index 6f516af82956..77023b619f1e 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn351/dcn351_fpu.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn351/dcn351_fpu.c
@@ -10,7 +10,7 @@
#include "dml/dcn35/dcn35_fpu.h"
#include "dml/dml_inline_defs.h"
-#include "link.h"
+#include "link_service.h"
#define DC_LOGGER_INIT(logger)
@@ -478,6 +478,8 @@ int dcn351_populate_dml_pipes_from_context_fpu(struct dc *dc,
bool upscaled = false;
const unsigned int max_allowed_vblank_nom = 1023;
+ dc_assert_fp_enabled();
+
dcn31_populate_dml_pipes_from_context(dc, context, pipes,
validate_mode);
@@ -531,9 +533,7 @@ int dcn351_populate_dml_pipes_from_context_fpu(struct dc *dc,
pipes[pipe_cnt].pipe.src.unbounded_req_mode = false;
- DC_FP_START();
dcn31_zero_pipe_dcc_fraction(pipes, pipe_cnt);
- DC_FP_END();
pipes[pipe_cnt].pipe.dest.vfront_porch = timing->v_front_porch;
pipes[pipe_cnt].pipe.src.dcc_rate = 3;
diff --git a/drivers/gpu/drm/amd/display/dc/dml2/display_mode_core.c b/drivers/gpu/drm/amd/display/dc/dml2/display_mode_core.c
index 715f9019a33e..4b9b2e84d381 100644
--- a/drivers/gpu/drm/amd/display/dc/dml2/display_mode_core.c
+++ b/drivers/gpu/drm/amd/display/dc/dml2/display_mode_core.c
@@ -6529,7 +6529,7 @@ static noinline_for_stack void dml_prefetch_check(struct display_mode_lib_st *mo
mode_lib->ms.TotImmediateFlipBytes = 0;
for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
if (!(mode_lib->ms.policy.ImmediateFlipRequirement[k] == dml_immediate_flip_not_required)) {
- mode_lib->ms.TotImmediateFlipBytes = mode_lib->ms.TotImmediateFlipBytes + mode_lib->ms.NoOfDPP[j][k] * mode_lib->ms.PDEAndMetaPTEBytesPerFrame[j][k] + mode_lib->ms.MetaRowBytes[j][k];
+ mode_lib->ms.TotImmediateFlipBytes = mode_lib->ms.TotImmediateFlipBytes + mode_lib->ms.NoOfDPP[j][k] * (mode_lib->ms.PDEAndMetaPTEBytesPerFrame[j][k] + mode_lib->ms.MetaRowBytes[j][k]);
if (mode_lib->ms.use_one_row_for_frame_flip[j][k]) {
mode_lib->ms.TotImmediateFlipBytes = mode_lib->ms.TotImmediateFlipBytes + mode_lib->ms.NoOfDPP[j][k] * (2 * mode_lib->ms.DPTEBytesPerRow[j][k]);
} else {
diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c
index f6879e622271..bf5e7f4e0416 100644
--- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c
+++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c
@@ -84,25 +84,29 @@ static unsigned int calc_max_hardware_v_total(const struct dc_stream_state *stre
static void populate_dml21_timing_config_from_stream_state(struct dml2_timing_cfg *timing,
struct dc_stream_state *stream,
+ struct pipe_ctx *pipe_ctx,
struct dml2_context *dml_ctx)
{
unsigned int hblank_start, vblank_start, min_hardware_refresh_in_uhz;
+ uint32_t pix_clk_100hz;
- timing->h_active = stream->timing.h_addressable + stream->timing.h_border_left + stream->timing.h_border_right;
+ timing->h_active = stream->timing.h_addressable + stream->timing.h_border_left + stream->timing.h_border_right + pipe_ctx->dsc_padding_params.dsc_hactive_padding;
timing->v_active = stream->timing.v_addressable + stream->timing.v_border_bottom + stream->timing.v_border_top;
timing->h_front_porch = stream->timing.h_front_porch;
timing->v_front_porch = stream->timing.v_front_porch;
timing->pixel_clock_khz = stream->timing.pix_clk_100hz / 10;
+ if (pipe_ctx->dsc_padding_params.dsc_hactive_padding != 0)
+ timing->pixel_clock_khz = pipe_ctx->dsc_padding_params.dsc_pix_clk_100hz / 10;
if (stream->timing.timing_3d_format == TIMING_3D_FORMAT_HW_FRAME_PACKING)
timing->pixel_clock_khz *= 2;
- timing->h_total = stream->timing.h_total;
+ timing->h_total = stream->timing.h_total + pipe_ctx->dsc_padding_params.dsc_htotal_padding;
timing->v_total = stream->timing.v_total;
timing->h_sync_width = stream->timing.h_sync_width;
timing->interlaced = stream->timing.flags.INTERLACE;
hblank_start = stream->timing.h_total - stream->timing.h_front_porch;
- timing->h_blank_end = hblank_start - stream->timing.h_addressable
+ timing->h_blank_end = hblank_start - stream->timing.h_addressable - pipe_ctx->dsc_padding_params.dsc_hactive_padding
- stream->timing.h_border_left - stream->timing.h_border_right;
if (hblank_start < stream->timing.h_addressable)
@@ -121,8 +125,13 @@ static void populate_dml21_timing_config_from_stream_state(struct dml2_timing_cf
/* limit min refresh rate to DC cap */
min_hardware_refresh_in_uhz = stream->timing.min_refresh_in_uhz;
if (stream->ctx->dc->caps.max_v_total != 0) {
- min_hardware_refresh_in_uhz = div64_u64((stream->timing.pix_clk_100hz * 100000000ULL),
- (stream->timing.h_total * (long long)calc_max_hardware_v_total(stream)));
+ if (pipe_ctx->dsc_padding_params.dsc_hactive_padding != 0) {
+ pix_clk_100hz = pipe_ctx->dsc_padding_params.dsc_pix_clk_100hz;
+ } else {
+ pix_clk_100hz = stream->timing.pix_clk_100hz;
+ }
+ min_hardware_refresh_in_uhz = div64_u64((pix_clk_100hz * 100000000ULL),
+ (timing->h_total * (long long)calc_max_hardware_v_total(stream)));
}
timing->drr_config.min_refresh_uhz = max(stream->timing.min_refresh_in_uhz, min_hardware_refresh_in_uhz);
@@ -173,21 +182,6 @@ static void populate_dml21_timing_config_from_stream_state(struct dml2_timing_cf
timing->vblank_nom = timing->v_total - timing->v_active;
}
-/**
- * adjust_dml21_hblank_timing_config_from_pipe_ctx - Adjusts the horizontal blanking timing configuration
- * based on the pipe context.
- * @timing: Pointer to the dml2_timing_cfg structure to be adjusted.
- * @pipe: Pointer to the pipe_ctx structure containing the horizontal blanking borrow value.
- *
- * This function modifies the horizontal active and blank end timings by adding and subtracting
- * the horizontal blanking borrow value from the pipe context, respectively.
- */
-static void adjust_dml21_hblank_timing_config_from_pipe_ctx(struct dml2_timing_cfg *timing, struct pipe_ctx *pipe)
-{
- timing->h_active += pipe->hblank_borrow;
- timing->h_blank_end -= pipe->hblank_borrow;
-}
-
static void populate_dml21_output_config_from_stream_state(struct dml2_link_output_cfg *output,
struct dc_stream_state *stream, const struct pipe_ctx *pipe)
{
@@ -487,7 +481,9 @@ static const struct scaler_data *get_scaler_data_for_plane(
temp_pipe->plane_state = pipe->plane_state;
temp_pipe->plane_res.scl_data.taps = pipe->plane_res.scl_data.taps;
temp_pipe->stream_res = pipe->stream_res;
- temp_pipe->hblank_borrow = pipe->hblank_borrow;
+ temp_pipe->dsc_padding_params.dsc_hactive_padding = pipe->dsc_padding_params.dsc_hactive_padding;
+ temp_pipe->dsc_padding_params.dsc_htotal_padding = pipe->dsc_padding_params.dsc_htotal_padding;
+ temp_pipe->dsc_padding_params.dsc_pix_clk_100hz = pipe->dsc_padding_params.dsc_pix_clk_100hz;
dml_ctx->config.callbacks.build_scaling_params(temp_pipe);
break;
}
@@ -755,8 +751,7 @@ bool dml21_map_dc_state_into_dml_display_cfg(const struct dc *in_dc, struct dc_s
disp_cfg_stream_location = dml_dispcfg->num_streams++;
ASSERT(disp_cfg_stream_location >= 0 && disp_cfg_stream_location < __DML2_WRAPPER_MAX_STREAMS_PLANES__);
- populate_dml21_timing_config_from_stream_state(&dml_dispcfg->stream_descriptors[disp_cfg_stream_location].timing, context->streams[stream_index], dml_ctx);
- adjust_dml21_hblank_timing_config_from_pipe_ctx(&dml_dispcfg->stream_descriptors[disp_cfg_stream_location].timing, &context->res_ctx.pipe_ctx[stream_index]);
+ populate_dml21_timing_config_from_stream_state(&dml_dispcfg->stream_descriptors[disp_cfg_stream_location].timing, context->streams[stream_index], &context->res_ctx.pipe_ctx[stream_index], dml_ctx);
populate_dml21_output_config_from_stream_state(&dml_dispcfg->stream_descriptors[disp_cfg_stream_location].output, context->streams[stream_index], &context->res_ctx.pipe_ctx[stream_index]);
populate_dml21_stream_overrides_from_stream_state(&dml_dispcfg->stream_descriptors[disp_cfg_stream_location], context->streams[stream_index], &context->stream_status[stream_index]);
diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_wrapper.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_wrapper.c
index 798abb2b2e67..08f7f03b1023 100644
--- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_wrapper.c
+++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_wrapper.c
@@ -224,7 +224,9 @@ static bool dml21_mode_check_and_programming(const struct dc *in_dc, struct dc_s
dml_ctx->config.svp_pstate.callbacks.release_phantom_streams_and_planes(in_dc, context);
/* Populate stream, plane mappings and other fields in display config. */
+ DC_FP_START();
result = dml21_map_dc_state_into_dml_display_cfg(in_dc, context, dml_ctx);
+ DC_FP_END();
if (!result)
return false;
@@ -279,7 +281,9 @@ static bool dml21_check_mode_support(const struct dc *in_dc, struct dc_state *co
dml_ctx->config.svp_pstate.callbacks.release_phantom_streams_and_planes(in_dc, context);
mode_support->dml2_instance = dml_init->dml2_instance;
+ DC_FP_START();
dml21_map_dc_state_into_dml_display_cfg(in_dc, context, dml_ctx);
+ DC_FP_END();
dml_ctx->v21.mode_programming.dml2_instance->scratch.build_mode_programming_locals.mode_programming_params.programming = dml_ctx->v21.mode_programming.programming;
DC_FP_START();
is_supported = dml2_check_mode_supported(mode_support);
diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_types.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_types.h
index 7de10a95cfdb..41adb1104d0f 100644
--- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_types.h
+++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_types.h
@@ -16,9 +16,9 @@ struct dml2_instance;
enum dml2_project_id {
dml2_project_invalid = 0,
- dml2_project_dcn4x_stage1 = 1,
- dml2_project_dcn4x_stage2 = 2,
- dml2_project_dcn4x_stage2_auto_drr_svp = 3,
+ dml2_project_dcn4x_stage1,
+ dml2_project_dcn4x_stage2,
+ dml2_project_dcn4x_stage2_auto_drr_svp,
};
enum dml2_pstate_change_support {
diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn3.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn3.c
index e763c8e45da8..1b9579a32ff2 100644
--- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn3.c
+++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn3.c
@@ -48,18 +48,19 @@ static void set_reserved_time_on_all_planes_with_stream_index(struct display_con
static void remove_duplicates(double *list_a, int *list_a_size)
{
- int cur_element = 0;
- // For all elements b[i] in list_b[]
- while (cur_element < *list_a_size - 1) {
- if (list_a[cur_element] == list_a[cur_element + 1]) {
- for (int j = cur_element + 1; j < *list_a_size - 1; j++) {
- list_a[j] = list_a[j + 1];
- }
- *list_a_size = *list_a_size - 1;
- } else {
- cur_element++;
+ int j = 0;
+
+ if (*list_a_size == 0)
+ return;
+
+ for (int i = 1; i < *list_a_size; i++) {
+ if (list_a[j] != list_a[i]) {
+ j++;
+ list_a[j] = list_a[i];
}
}
+
+ *list_a_size = j + 1;
}
static bool increase_mpc_combine_factor(unsigned int *mpc_combine_factor, unsigned int limit)
diff --git a/drivers/gpu/drm/amd/display/dc/dsc/dcn20/dcn20_dsc.c b/drivers/gpu/drm/amd/display/dc/dsc/dcn20/dcn20_dsc.c
index bd1b9aef6d5c..89f0d999bf35 100644
--- a/drivers/gpu/drm/amd/display/dc/dsc/dcn20/dcn20_dsc.c
+++ b/drivers/gpu/drm/amd/display/dc/dsc/dcn20/dcn20_dsc.c
@@ -406,9 +406,10 @@ bool dsc_prepare_config(const struct dsc_config *dsc_cfg, struct dsc_reg_values
dsc_reg_vals->alternate_ich_encoding_en = dsc_reg_vals->pps.dsc_version_minor == 1 ? 0 : 1;
dsc_reg_vals->ich_reset_at_eol = (dsc_cfg->is_odm || dsc_reg_vals->num_slices_h > 1) ? 0xF : 0;
+ // Need to find the ceiling value for the slice width
+ dsc_reg_vals->pps.slice_width = (dsc_cfg->pic_width + dsc_cfg->dc_dsc_cfg.num_slices_h - 1) / dsc_cfg->dc_dsc_cfg.num_slices_h;
// TODO: in addition to validating slice height (pic height must be divisible by slice height),
// see what happens when the same condition doesn't apply for slice_width/pic_width.
- dsc_reg_vals->pps.slice_width = dsc_cfg->pic_width / dsc_cfg->dc_dsc_cfg.num_slices_h;
dsc_reg_vals->pps.slice_height = dsc_cfg->pic_height / dsc_cfg->dc_dsc_cfg.num_slices_v;
ASSERT(dsc_reg_vals->pps.slice_height * dsc_cfg->dc_dsc_cfg.num_slices_v == dsc_cfg->pic_height);
diff --git a/drivers/gpu/drm/amd/display/dc/hdcp/hdcp_msg.c b/drivers/gpu/drm/amd/display/dc/hdcp/hdcp_msg.c
index 1313a7c5d87b..73a1e6a03719 100644
--- a/drivers/gpu/drm/amd/display/dc/hdcp/hdcp_msg.c
+++ b/drivers/gpu/drm/amd/display/dc/hdcp/hdcp_msg.c
@@ -28,7 +28,7 @@
#include "include/hdcp_msg_types.h"
#include "include/signal_types.h"
#include "core_types.h"
-#include "link.h"
+#include "link_service.h"
#include "link_hwss.h"
#include "link/protocols/link_dpcd.h"
diff --git a/drivers/gpu/drm/amd/display/dc/hubbub/dcn32/dcn32_hubbub.c b/drivers/gpu/drm/amd/display/dc/hubbub/dcn32/dcn32_hubbub.c
index 92957398ac0a..4d4ca6d77bbd 100644
--- a/drivers/gpu/drm/amd/display/dc/hubbub/dcn32/dcn32_hubbub.c
+++ b/drivers/gpu/drm/amd/display/dc/hubbub/dcn32/dcn32_hubbub.c
@@ -28,6 +28,7 @@
#include "dcn32_hubbub.h"
#include "dm_services.h"
#include "reg_helper.h"
+#include "dal_asic_id.h"
#define CTX \
@@ -72,6 +73,14 @@ static void dcn32_init_crb(struct hubbub *hubbub)
REG_UPDATE(DCHUBBUB_DEBUG_CTRL_0, DET_DEPTH, 0x47F);
}
+static void hubbub32_set_sdp_control(struct hubbub *hubbub, bool dc_control)
+{
+ struct dcn20_hubbub *hubbub2 = TO_DCN20_HUBBUB(hubbub);
+
+ REG_UPDATE(DCHUBBUB_SDPIF_CFG0,
+ SDPIF_PORT_CONTROL, dc_control);
+}
+
void hubbub32_set_request_limit(struct hubbub *hubbub, int memory_channel_count, int words_per_channel)
{
struct dcn20_hubbub *hubbub2 = TO_DCN20_HUBBUB(hubbub);
@@ -754,8 +763,18 @@ static bool hubbub32_program_watermarks(
unsigned int refclk_mhz,
bool safe_to_lower)
{
+ struct dc *dc = hubbub->ctx->dc;
bool wm_pending = false;
+ if (!safe_to_lower && dc->debug.disable_stutter_for_wm_program &&
+ (ASICREV_IS_GC_11_0_0(dc->ctx->asic_id.hw_internal_rev) ||
+ ASICREV_IS_GC_11_0_3(dc->ctx->asic_id.hw_internal_rev))) {
+ /* before raising watermarks, SDP control give to DF, stutter must be disabled */
+ wm_pending = true;
+ hubbub32_set_sdp_control(hubbub, false);
+ hubbub1_allow_self_refresh_control(hubbub, false);
+ }
+
if (hubbub32_program_urgent_watermarks(hubbub, watermarks, refclk_mhz, safe_to_lower))
wm_pending = true;
@@ -786,10 +805,20 @@ static bool hubbub32_program_watermarks(
REG_UPDATE(DCHUBBUB_ARB_DF_REQ_OUTSTAND,
DCHUBBUB_ARB_MIN_REQ_OUTSTAND, 0x1FF);*/
- if (safe_to_lower || hubbub->ctx->dc->debug.disable_stutter)
- hubbub1_allow_self_refresh_control(hubbub, !hubbub->ctx->dc->debug.disable_stutter);
+ if (safe_to_lower) {
+ /* after lowering watermarks, stutter setting is restored, SDP control given to DC */
+ hubbub1_allow_self_refresh_control(hubbub, !dc->debug.disable_stutter);
+
+ if (dc->debug.disable_stutter_for_wm_program &&
+ (ASICREV_IS_GC_11_0_0(dc->ctx->asic_id.hw_internal_rev) ||
+ ASICREV_IS_GC_11_0_3(dc->ctx->asic_id.hw_internal_rev))) {
+ hubbub32_set_sdp_control(hubbub, true);
+ }
+ } else if (dc->debug.disable_stutter) {
+ hubbub1_allow_self_refresh_control(hubbub, !dc->debug.disable_stutter);
+ }
- hubbub32_force_usr_retraining_allow(hubbub, hubbub->ctx->dc->debug.force_usr_allow);
+ hubbub32_force_usr_retraining_allow(hubbub, dc->debug.force_usr_allow);
return wm_pending;
}
@@ -974,8 +1003,7 @@ void hubbub32_init(struct hubbub *hubbub)
ignore the "df_pre_cstate_req" from the SDP port control.
only the DCN will determine when to connect the SDP port
*/
- REG_UPDATE(DCHUBBUB_SDPIF_CFG0,
- SDPIF_PORT_CONTROL, 1);
+ hubbub32_set_sdp_control(hubbub, true);
/*Set SDP's max outstanding request to 512
must set the register back to 0 (max outstanding = 256) in zero frame buffer mode*/
REG_UPDATE(DCHUBBUB_SDPIF_CFG1,
diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c
index 153d68375fa3..24184b4eb352 100644
--- a/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c
@@ -48,7 +48,7 @@
#include "link_encoder.h"
#include "link_enc_cfg.h"
#include "link_hwss.h"
-#include "link.h"
+#include "link_service.h"
#include "dccg.h"
#include "clock_source.h"
#include "clk_mgr.h"
@@ -1601,17 +1601,19 @@ enum dc_status dce110_apply_single_controller_ctx_to_hw(
}
if (pipe_ctx->stream_res.audio != NULL) {
- build_audio_output(context, pipe_ctx, &pipe_ctx->stream_res.audio_output);
+ struct audio_output audio_output = {0};
- link_hwss->setup_audio_output(pipe_ctx, &pipe_ctx->stream_res.audio_output,
+ build_audio_output(context, pipe_ctx, &audio_output);
+
+ link_hwss->setup_audio_output(pipe_ctx, &audio_output,
pipe_ctx->stream_res.audio->inst);
pipe_ctx->stream_res.audio->funcs->az_configure(
pipe_ctx->stream_res.audio,
pipe_ctx->stream->signal,
- &pipe_ctx->stream_res.audio_output.crtc_info,
+ &audio_output.crtc_info,
&pipe_ctx->stream->audio_info,
- &pipe_ctx->stream_res.audio_output.dp_link_info);
+ &audio_output.dp_link_info);
if (dc->config.disable_hbr_audio_dp2)
if (pipe_ctx->stream_res.audio->funcs->az_disable_hbr_audio &&
@@ -1923,10 +1925,8 @@ void dce110_enable_accelerated_mode(struct dc *dc, struct dc_state *context)
get_edp_streams(context, edp_streams, &edp_stream_num);
- // Check fastboot support, disable on DCE8 because of blank screens
- if (edp_num && edp_stream_num && dc->ctx->dce_version != DCE_VERSION_8_0 &&
- dc->ctx->dce_version != DCE_VERSION_8_1 &&
- dc->ctx->dce_version != DCE_VERSION_8_3) {
+ /* Check fastboot support, disable on DCE 6-8 because of blank screens */
+ if (edp_num && edp_stream_num && dc->ctx->dce_version < DCE_VERSION_10_0) {
for (i = 0; i < edp_num; i++) {
edp_link = edp_links[i];
if (edp_link != edp_streams[0]->link)
@@ -2385,7 +2385,9 @@ static void dce110_setup_audio_dto(
if (pipe_ctx->stream->signal != SIGNAL_TYPE_HDMI_TYPE_A)
continue;
if (pipe_ctx->stream_res.audio != NULL) {
- build_audio_output(context, pipe_ctx, &pipe_ctx->stream_res.audio_output);
+ struct audio_output audio_output;
+
+ build_audio_output(context, pipe_ctx, &audio_output);
if (dc->res_pool->dccg && dc->res_pool->dccg->funcs->set_audio_dtbclk_dto) {
struct dtbclk_dto_params dto_params = {0};
@@ -2396,14 +2398,14 @@ static void dce110_setup_audio_dto(
pipe_ctx->stream_res.audio->funcs->wall_dto_setup(
pipe_ctx->stream_res.audio,
pipe_ctx->stream->signal,
- &pipe_ctx->stream_res.audio_output.crtc_info,
- &pipe_ctx->stream_res.audio_output.pll_info);
+ &audio_output.crtc_info,
+ &audio_output.pll_info);
} else
pipe_ctx->stream_res.audio->funcs->wall_dto_setup(
pipe_ctx->stream_res.audio,
pipe_ctx->stream->signal,
- &pipe_ctx->stream_res.audio_output.crtc_info,
- &pipe_ctx->stream_res.audio_output.pll_info);
+ &audio_output.crtc_info,
+ &audio_output.pll_info);
break;
}
}
@@ -2423,15 +2425,15 @@ static void dce110_setup_audio_dto(
continue;
if (pipe_ctx->stream_res.audio != NULL) {
- build_audio_output(context,
- pipe_ctx,
- &pipe_ctx->stream_res.audio_output);
+ struct audio_output audio_output = {0};
+
+ build_audio_output(context, pipe_ctx, &audio_output);
pipe_ctx->stream_res.audio->funcs->wall_dto_setup(
pipe_ctx->stream_res.audio,
pipe_ctx->stream->signal,
- &pipe_ctx->stream_res.audio_output.crtc_info,
- &pipe_ctx->stream_res.audio_output.pll_info);
+ &audio_output.crtc_info,
+ &audio_output.pll_info);
break;
}
}
diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c
index 506c3bbbf221..e9fe97f0c4ea 100644
--- a/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c
@@ -55,7 +55,7 @@
#include "dce/dmub_hw_lock_mgr.h"
#include "dc_trace.h"
#include "dce/dmub_outbox.h"
-#include "link.h"
+#include "link_service.h"
#include "dc_state_priv.h"
#define DC_LOGGER \
@@ -3347,7 +3347,7 @@ void dcn10_prepare_bandwidth(
context,
false);
- dc->wm_optimized_required = hubbub->funcs->program_watermarks(hubbub,
+ dc->optimized_required = hubbub->funcs->program_watermarks(hubbub,
&context->bw_ctx.bw.dcn.watermarks,
dc->res_pool->ref_clocks.dchub_ref_clock_inKhz / 1000,
true);
diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c
index cc377fcda6ff..9477c9f9e196 100644
--- a/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c
@@ -54,7 +54,7 @@
#include "dpcd_defs.h"
#include "inc/link_enc_cfg.h"
#include "link_hwss.h"
-#include "link.h"
+#include "link_service.h"
#include "dc_state_priv.h"
#define DC_LOGGER \
@@ -1982,10 +1982,8 @@ static void dcn20_program_pipe(
* updating on slave planes
*/
if (pipe_ctx->update_flags.bits.enable ||
- pipe_ctx->update_flags.bits.plane_changed ||
- pipe_ctx->stream->update_flags.bits.out_tf ||
- (pipe_ctx->plane_state &&
- pipe_ctx->plane_state->update_flags.bits.output_tf_change))
+ pipe_ctx->update_flags.bits.plane_changed ||
+ pipe_ctx->stream->update_flags.bits.out_tf)
hws->funcs.set_output_transfer_func(dc, pipe_ctx, pipe_ctx->stream);
/* If the pipe has been enabled or has a different opp, we
@@ -2390,10 +2388,10 @@ void dcn20_prepare_bandwidth(
}
/* program dchubbub watermarks:
- * For assigning wm_optimized_required, use |= operator since we don't want
+ * For assigning optimized_required, use |= operator since we don't want
* to clear the value if the optimize has not happened yet
*/
- dc->wm_optimized_required |= hubbub->funcs->program_watermarks(hubbub,
+ dc->optimized_required |= hubbub->funcs->program_watermarks(hubbub,
&context->bw_ctx.bw.dcn.watermarks,
dc->res_pool->ref_clocks.dchub_ref_clock_inKhz / 1000,
false);
@@ -2406,10 +2404,10 @@ void dcn20_prepare_bandwidth(
if (hubbub->funcs->program_compbuf_size) {
if (context->bw_ctx.dml.ip.min_comp_buffer_size_kbytes) {
compbuf_size_kb = context->bw_ctx.dml.ip.min_comp_buffer_size_kbytes;
- dc->wm_optimized_required |= (compbuf_size_kb != dc->current_state->bw_ctx.dml.ip.min_comp_buffer_size_kbytes);
+ dc->optimized_required |= (compbuf_size_kb != dc->current_state->bw_ctx.dml.ip.min_comp_buffer_size_kbytes);
} else {
compbuf_size_kb = context->bw_ctx.bw.dcn.compbuf_size_kb;
- dc->wm_optimized_required |= (compbuf_size_kb != dc->current_state->bw_ctx.bw.dcn.compbuf_size_kb);
+ dc->optimized_required |= (compbuf_size_kb != dc->current_state->bw_ctx.bw.dcn.compbuf_size_kb);
}
hubbub->funcs->program_compbuf_size(hubbub, compbuf_size_kb, false);
@@ -3131,7 +3129,8 @@ void dcn20_fpga_init_hw(struct dc *dc)
res_pool->dccg->funcs->dccg_init(res_pool->dccg);
//Enable ability to power gate / don't force power on permanently
- hws->funcs.enable_power_gating_plane(hws, true);
+ if (hws->funcs.enable_power_gating_plane)
+ hws->funcs.enable_power_gating_plane(hws, true);
// Specific to FPGA dccg and registers
REG_WRITE(RBBMIF_TIMEOUT_DIS, 0xFFFFFFFF);
diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn21/dcn21_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn21/dcn21_hwseq.c
index 61efb15572ff..e2269211553c 100644
--- a/drivers/gpu/drm/amd/display/dc/hwss/dcn21/dcn21_hwseq.c
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn21/dcn21_hwseq.c
@@ -35,7 +35,7 @@
#include "hw/clk_mgr.h"
#include "dc_dmub_srv.h"
#include "abm.h"
-#include "link.h"
+#include "link_service.h"
#define DC_LOGGER_INIT(logger)
diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn30/dcn30_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn30/dcn30_hwseq.c
index 139a63101488..e47ed5571dfd 100644
--- a/drivers/gpu/drm/amd/display/dc/hwss/dcn30/dcn30_hwseq.c
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn30/dcn30_hwseq.c
@@ -50,7 +50,7 @@
#include "dpcd_defs.h"
#include "dcn20/dcn20_hwseq.h"
#include "dcn30/dcn30_resource.h"
-#include "link.h"
+#include "link_service.h"
#include "dc_state_priv.h"
diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn31/dcn31_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn31/dcn31_hwseq.c
index 8ba934b83957..b822f2dffff0 100644
--- a/drivers/gpu/drm/amd/display/dc/hwss/dcn31/dcn31_hwseq.c
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn31/dcn31_hwseq.c
@@ -45,7 +45,7 @@
#include "link_hwss.h"
#include "dpcd_defs.h"
#include "dce/dmub_outbox.h"
-#include "link.h"
+#include "link_service.h"
#include "dcn10/dcn10_hwseq.h"
#include "dcn21/dcn21_hwseq.h"
#include "inc/link_enc_cfg.h"
diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn314/dcn314_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn314/dcn314_hwseq.c
index 560984533950..f925f669f2a4 100644
--- a/drivers/gpu/drm/amd/display/dc/hwss/dcn314/dcn314_hwseq.c
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn314/dcn314_hwseq.c
@@ -46,7 +46,7 @@
#include "link_hwss.h"
#include "dpcd_defs.h"
#include "dce/dmub_outbox.h"
-#include "link.h"
+#include "link_service.h"
#include "dcn10/dcn10_hwseq.h"
#include "inc/link_enc_cfg.h"
#include "dcn30/dcn30_vpg.h"
diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c
index 416b1dca3dac..f39292952702 100644
--- a/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c
@@ -49,7 +49,7 @@
#include "dcn20/dcn20_optc.h"
#include "dce/dmub_hw_lock_mgr.h"
#include "dcn32/dcn32_resource.h"
-#include "link.h"
+#include "link_service.h"
#include "../dcn20/dcn20_hwseq.h"
#include "dc_state_priv.h"
@@ -1052,7 +1052,7 @@ void dcn32_update_dsc_on_stream(struct pipe_ctx *pipe_ctx, bool enable)
}
/* Enable DSC hw block */
- dsc_cfg.pic_width = (stream->timing.h_addressable + pipe_ctx->hblank_borrow +
+ dsc_cfg.pic_width = (stream->timing.h_addressable + pipe_ctx->dsc_padding_params.dsc_hactive_padding +
stream->timing.h_border_left + stream->timing.h_border_right) / opp_cnt;
dsc_cfg.pic_height = stream->timing.v_addressable + stream->timing.v_border_top + stream->timing.v_border_bottom;
dsc_cfg.pixel_encoding = stream->timing.pixel_encoding;
diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c
index 764eff6a4ec6..05011061822c 100644
--- a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c
@@ -46,7 +46,7 @@
#include "link_hwss.h"
#include "dpcd_defs.h"
#include "dce/dmub_outbox.h"
-#include "link.h"
+#include "link_service.h"
#include "dcn10/dcn10_hwseq.h"
#include "inc/link_enc_cfg.h"
#include "dcn30/dcn30_vpg.h"
diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c
index d5b5e2ce6ff6..7c276c319086 100644
--- a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c
@@ -25,7 +25,7 @@
#include "dpcd_defs.h"
#include "clk_mgr.h"
#include "dsc.h"
-#include "link.h"
+#include "link_service.h"
#include "dce/dmub_hw_lock_mgr.h"
#include "dcn10/dcn10_cm_common.h"
@@ -810,9 +810,12 @@ enum dc_status dcn401_enable_stream_timing(
if (dc->hwseq->funcs.PLAT_58856_wa && (!dc_is_dp_signal(stream->signal)))
dc->hwseq->funcs.PLAT_58856_wa(context, pipe_ctx);
- /* if we are borrowing from hblank, h_addressable needs to be adjusted */
- if (dc->debug.enable_hblank_borrow)
- patched_crtc_timing.h_addressable = patched_crtc_timing.h_addressable + pipe_ctx->hblank_borrow;
+ /* if we are padding, h_addressable needs to be adjusted */
+ if (dc->debug.enable_hblank_borrow) {
+ patched_crtc_timing.h_addressable = patched_crtc_timing.h_addressable + pipe_ctx->dsc_padding_params.dsc_hactive_padding;
+ patched_crtc_timing.h_total = patched_crtc_timing.h_total + pipe_ctx->dsc_padding_params.dsc_htotal_padding;
+ patched_crtc_timing.pix_clk_100hz = pipe_ctx->dsc_padding_params.dsc_pix_clk_100hz;
+ }
pipe_ctx->stream_res.tg->funcs->program_timing(
pipe_ctx->stream_res.tg,
@@ -1380,22 +1383,22 @@ void dcn401_prepare_bandwidth(struct dc *dc,
false);
/* program dchubbub watermarks:
- * For assigning wm_optimized_required, use |= operator since we don't want
+ * For assigning optimized_required, use |= operator since we don't want
* to clear the value if the optimize has not happened yet
*/
- dc->wm_optimized_required |= hubbub->funcs->program_watermarks(hubbub,
+ dc->optimized_required |= hubbub->funcs->program_watermarks(hubbub,
&context->bw_ctx.bw.dcn.watermarks,
dc->res_pool->ref_clocks.dchub_ref_clock_inKhz / 1000,
false);
/* update timeout thresholds */
if (hubbub->funcs->program_arbiter) {
- dc->wm_optimized_required |= hubbub->funcs->program_arbiter(hubbub, &context->bw_ctx.bw.dcn.arb_regs, false);
+ dc->optimized_required |= hubbub->funcs->program_arbiter(hubbub, &context->bw_ctx.bw.dcn.arb_regs, false);
}
/* decrease compbuf size */
if (hubbub->funcs->program_compbuf_segments) {
compbuf_size = context->bw_ctx.bw.dcn.arb_regs.compbuf_size;
- dc->wm_optimized_required |= (compbuf_size != dc->current_state->bw_ctx.bw.dcn.arb_regs.compbuf_size);
+ dc->optimized_required |= (compbuf_size != dc->current_state->bw_ctx.bw.dcn.arb_regs.compbuf_size);
hubbub->funcs->program_compbuf_segments(hubbub, compbuf_size, false);
}
@@ -2029,10 +2032,8 @@ void dcn401_program_pipe(
* updating on slave planes
*/
if (pipe_ctx->update_flags.bits.enable ||
- pipe_ctx->update_flags.bits.plane_changed ||
- pipe_ctx->stream->update_flags.bits.out_tf ||
- (pipe_ctx->plane_state &&
- pipe_ctx->plane_state->update_flags.bits.output_tf_change))
+ pipe_ctx->update_flags.bits.plane_changed ||
+ pipe_ctx->stream->update_flags.bits.out_tf)
hws->funcs.set_output_transfer_func(dc, pipe_ctx, pipe_ctx->stream);
/* If the pipe has been enabled or has a different opp, we
diff --git a/drivers/gpu/drm/amd/display/dc/inc/core_types.h b/drivers/gpu/drm/amd/display/dc/inc/core_types.h
index d30f94c35f11..d11893f8c916 100644
--- a/drivers/gpu/drm/amd/display/dc/inc/core_types.h
+++ b/drivers/gpu/drm/amd/display/dc/inc/core_types.h
@@ -228,7 +228,8 @@ struct resource_funcs {
enum dc_status (*update_dc_state_for_encoder_switch)(struct dc_link *link,
struct dc_link_settings *link_setting,
uint8_t pipe_count,
- struct pipe_ctx *pipes);
+ struct pipe_ctx *pipes,
+ struct audio_output *audio_output);
};
struct audio_support{
@@ -360,8 +361,6 @@ struct stream_resource {
uint8_t gsl_group;
struct test_pattern_params test_pattern_params;
-
- struct audio_output audio_output;
};
struct plane_resource {
@@ -437,6 +436,13 @@ enum p_state_switch_method {
P_STATE_V_BLANK_SUB_VP,
};
+struct dsc_padding_params {
+ /* pixels borrowed from hblank to hactive */
+ uint8_t dsc_hactive_padding;
+ uint32_t dsc_htotal_padding;
+ uint32_t dsc_pix_clk_100hz;
+};
+
struct pipe_ctx {
struct dc_plane_state *plane_state;
struct dc_stream_state *stream;
@@ -494,8 +500,7 @@ struct pipe_ctx {
/* subvp_index: only valid if the pipe is a SUBVP_MAIN*/
uint8_t subvp_index;
struct pixel_rate_divider pixel_rate_divider;
- /* pixels borrowed from hblank to hactive */
- uint8_t hblank_borrow;
+ struct dsc_padding_params dsc_padding_params;
/* next vupdate */
uint32_t next_vupdate;
uint32_t wait_frame_count;
diff --git a/drivers/gpu/drm/amd/display/dc/inc/link.h b/drivers/gpu/drm/amd/display/dc/inc/link_service.h
index 0cce49d95e26..1e34e84160aa 100644
--- a/drivers/gpu/drm/amd/display/dc/inc/link.h
+++ b/drivers/gpu/drm/amd/display/dc/inc/link_service.h
@@ -42,8 +42,8 @@
* dc_link_exports.c or other dc files implement dc.h
*
* DC to Link:
- * dc_link_exports.c or other dc files include link.h
- * link_factory.c implements link.h
+ * dc_link_exports.c or other dc files include link_service.h
+ * link_factory.c implements link_service.h
*
* Link sub-component to Link sub-component:
* link_factory.c includes --> link_xxx.h
@@ -73,7 +73,7 @@
* 2. Implement your function in the suitable link_xxx.c file.
* 3. Assign the function to link_service in link_factory.c
* 4. NEVER include link_xxx.h headers outside link component.
- * 5. NEVER include link.h on DM side.
+ * 5. NEVER include link_service.h on DM side.
*/
#include "core_types.h"
diff --git a/drivers/gpu/drm/amd/display/dc/inc/resource.h b/drivers/gpu/drm/amd/display/dc/inc/resource.h
index a890f581f4e8..4e26a16a8743 100644
--- a/drivers/gpu/drm/amd/display/dc/inc/resource.h
+++ b/drivers/gpu/drm/amd/display/dc/inc/resource.h
@@ -45,6 +45,7 @@ enum dce_version resource_parse_asic_id(
struct resource_caps {
int num_timing_generator;
int num_opp;
+ int num_dpp;
int num_video_plane;
int num_audio;
int num_stream_encoder;
diff --git a/drivers/gpu/drm/amd/display/dc/link/accessories/link_dp_cts.c b/drivers/gpu/drm/amd/display/dc/link/accessories/link_dp_cts.c
index 23f41c99fa38..9e33bf937a69 100644
--- a/drivers/gpu/drm/amd/display/dc/link/accessories/link_dp_cts.c
+++ b/drivers/gpu/drm/amd/display/dc/link/accessories/link_dp_cts.c
@@ -75,7 +75,7 @@ static void dp_retrain_link_dp_test(struct dc_link *link,
bool is_hpo_acquired;
uint8_t count;
int i;
-
+ struct audio_output audio_output[MAX_PIPES];
struct dc_stream_state *streams_on_link[MAX_PIPES];
int num_streams_on_link = 0;
@@ -101,7 +101,7 @@ static void dp_retrain_link_dp_test(struct dc_link *link,
if (needs_divider_update && link->dc->res_pool->funcs->update_dc_state_for_encoder_switch) {
link->dc->res_pool->funcs->update_dc_state_for_encoder_switch(link,
link_setting, count,
- *pipes);
+ *pipes, &audio_output[0]);
for (i = 0; i < count; i++) {
pipes[i]->clock_source->funcs->program_pix_clk(
pipes[i]->clock_source,
@@ -113,16 +113,15 @@ static void dp_retrain_link_dp_test(struct dc_link *link,
const struct link_hwss *link_hwss = get_link_hwss(
link, &pipes[i]->link_res);
- link_hwss->setup_audio_output(pipes[i],
- &pipes[i]->stream_res.audio_output,
- pipes[i]->stream_res.audio->inst);
+ link_hwss->setup_audio_output(pipes[i], &audio_output[i],
+ pipes[i]->stream_res.audio->inst);
pipes[i]->stream_res.audio->funcs->az_configure(
pipes[i]->stream_res.audio,
pipes[i]->stream->signal,
- &pipes[i]->stream_res.audio_output.crtc_info,
+ &audio_output[i].crtc_info,
&pipes[i]->stream->audio_info,
- &pipes[i]->stream_res.audio_output.dp_link_info);
+ &audio_output[i].dp_link_info);
if (link->dc->config.disable_hbr_audio_dp2 &&
pipes[i]->stream_res.audio->funcs->az_disable_hbr_audio &&
diff --git a/drivers/gpu/drm/amd/display/dc/link/accessories/link_dp_cts.h b/drivers/gpu/drm/amd/display/dc/link/accessories/link_dp_cts.h
index eae23ea7f6ec..033650cdb811 100644
--- a/drivers/gpu/drm/amd/display/dc/link/accessories/link_dp_cts.h
+++ b/drivers/gpu/drm/amd/display/dc/link/accessories/link_dp_cts.h
@@ -24,7 +24,7 @@
*/
#ifndef __LINK_DP_CTS_H__
#define __LINK_DP_CTS_H__
-#include "link.h"
+#include "link_service.h"
void dp_handle_automated_test(struct dc_link *link);
bool dp_set_test_pattern(
struct dc_link *link,
diff --git a/drivers/gpu/drm/amd/display/dc/link/accessories/link_dp_trace.h b/drivers/gpu/drm/amd/display/dc/link/accessories/link_dp_trace.h
index ab437a0c9101..9ff4a6c46a2b 100644
--- a/drivers/gpu/drm/amd/display/dc/link/accessories/link_dp_trace.h
+++ b/drivers/gpu/drm/amd/display/dc/link/accessories/link_dp_trace.h
@@ -24,7 +24,7 @@
*/
#ifndef __LINK_DP_TRACE_H__
#define __LINK_DP_TRACE_H__
-#include "link.h"
+#include "link_service.h"
void dp_trace_init(struct dc_link *link);
void dp_trace_reset(struct dc_link *link);
diff --git a/drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_dio.h b/drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_dio.h
index 45f0e091fcb0..4a25210a344f 100644
--- a/drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_dio.h
+++ b/drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_dio.h
@@ -27,7 +27,7 @@
#define __LINK_HWSS_DIO_H__
#include "link_hwss.h"
-#include "link.h"
+#include "link_service.h"
const struct link_hwss *get_dio_link_hwss(void);
bool can_use_dio_link_hwss(const struct dc_link *link,
diff --git a/drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_dio_fixed_vs_pe_retimer.h b/drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_dio_fixed_vs_pe_retimer.h
index 9ac08a332540..cf578a8662a4 100644
--- a/drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_dio_fixed_vs_pe_retimer.h
+++ b/drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_dio_fixed_vs_pe_retimer.h
@@ -25,7 +25,7 @@
#ifndef __LINK_HWSS_DIO_FIXED_VS_PE_RETIMER_H__
#define __LINK_HWSS_DIO_FIXED_VS_PE_RETIMER_H__
-#include "link.h"
+#include "link_service.h"
uint32_t dp_dio_fixed_vs_pe_retimer_get_lttpr_write_address(struct dc_link *link);
uint8_t dp_dio_fixed_vs_pe_retimer_lane_cfg_to_hw_cfg(struct dc_link *link);
diff --git a/drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_hpo_dp.h b/drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_hpo_dp.h
index 1d3ed8ca83b5..7c9005bc2587 100644
--- a/drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_hpo_dp.h
+++ b/drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_hpo_dp.h
@@ -26,7 +26,7 @@
#define __LINK_HWSS_HPO_DP_H__
#include "link_hwss.h"
-#include "link.h"
+#include "link_service.h"
void set_hpo_dp_throttled_vcp_size(struct pipe_ctx *pipe_ctx,
struct fixed31_32 throttled_vcp_size);
diff --git a/drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_hpo_fixed_vs_pe_retimer_dp.h b/drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_hpo_fixed_vs_pe_retimer_dp.h
index 82301187bc7c..8bf36827ecfb 100644
--- a/drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_hpo_fixed_vs_pe_retimer_dp.h
+++ b/drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_hpo_fixed_vs_pe_retimer_dp.h
@@ -25,7 +25,7 @@
#ifndef __LINK_HWSS_HPO_FIXED_VS_PE_RETIMER_DP_H__
#define __LINK_HWSS_HPO_FIXED_VS_PE_RETIMER_DP_H__
-#include "link.h"
+#include "link_service.h"
bool requires_fixed_vs_pe_retimer_hpo_link_hwss(const struct dc_link *link);
const struct link_hwss *get_hpo_fixed_vs_pe_retimer_dp_link_hwss(void);
diff --git a/drivers/gpu/drm/amd/display/dc/link/link_detection.c b/drivers/gpu/drm/amd/display/dc/link/link_detection.c
index b717e430051a..85303167a553 100644
--- a/drivers/gpu/drm/amd/display/dc/link/link_detection.c
+++ b/drivers/gpu/drm/amd/display/dc/link/link_detection.c
@@ -1140,6 +1140,10 @@ static bool detect_link_and_local_sink(struct dc_link *link,
if (sink->sink_signal == SIGNAL_TYPE_HDMI_TYPE_A &&
!sink->edid_caps.edid_hdmi)
sink->sink_signal = SIGNAL_TYPE_DVI_SINGLE_LINK;
+ else if (dc_is_dvi_signal(sink->sink_signal) &&
+ aud_support->hdmi_audio_native &&
+ sink->edid_caps.edid_hdmi)
+ sink->sink_signal = SIGNAL_TYPE_HDMI_TYPE_A;
if (link->local_sink && dc_is_dp_signal(sink_caps.signal))
dp_trace_init(link);
diff --git a/drivers/gpu/drm/amd/display/dc/link/link_detection.h b/drivers/gpu/drm/amd/display/dc/link/link_detection.h
index 7da05078721e..1ab29476060b 100644
--- a/drivers/gpu/drm/amd/display/dc/link/link_detection.h
+++ b/drivers/gpu/drm/amd/display/dc/link/link_detection.h
@@ -25,7 +25,7 @@
#ifndef __DC_LINK_DETECTION_H__
#define __DC_LINK_DETECTION_H__
-#include "link.h"
+#include "link_service.h"
bool link_detect(struct dc_link *link, enum dc_detect_reason reason);
bool link_detect_connection_type(struct dc_link *link,
enum dc_connection_type *type);
diff --git a/drivers/gpu/drm/amd/display/dc/link/link_dpms.c b/drivers/gpu/drm/amd/display/dc/link/link_dpms.c
index 08ee8d2f777b..83419e1a9036 100644
--- a/drivers/gpu/drm/amd/display/dc/link/link_dpms.c
+++ b/drivers/gpu/drm/amd/display/dc/link/link_dpms.c
@@ -832,7 +832,7 @@ void link_set_dsc_on_stream(struct pipe_ctx *pipe_ctx, bool enable)
enum optc_dsc_mode optc_dsc_mode;
/* Enable DSC hw block */
- dsc_cfg.pic_width = (stream->timing.h_addressable + pipe_ctx->hblank_borrow +
+ dsc_cfg.pic_width = (stream->timing.h_addressable + pipe_ctx->dsc_padding_params.dsc_hactive_padding +
stream->timing.h_border_left + stream->timing.h_border_right) / opp_cnt;
dsc_cfg.pic_height = stream->timing.v_addressable + stream->timing.v_border_top + stream->timing.v_border_bottom;
dsc_cfg.pixel_encoding = stream->timing.pixel_encoding;
diff --git a/drivers/gpu/drm/amd/display/dc/link/link_dpms.h b/drivers/gpu/drm/amd/display/dc/link/link_dpms.h
index 9398f9c1666a..bd6fc63064a3 100644
--- a/drivers/gpu/drm/amd/display/dc/link/link_dpms.h
+++ b/drivers/gpu/drm/amd/display/dc/link/link_dpms.h
@@ -26,7 +26,7 @@
#ifndef __DC_LINK_DPMS_H__
#define __DC_LINK_DPMS_H__
-#include "link.h"
+#include "link_service.h"
void link_set_dpms_on(
struct dc_state *state,
struct pipe_ctx *pipe_ctx);
diff --git a/drivers/gpu/drm/amd/display/dc/link/link_factory.h b/drivers/gpu/drm/amd/display/dc/link/link_factory.h
index e96220d48d03..aad36ca1a31c 100644
--- a/drivers/gpu/drm/amd/display/dc/link/link_factory.h
+++ b/drivers/gpu/drm/amd/display/dc/link/link_factory.h
@@ -24,7 +24,7 @@
*/
#ifndef __LINK_FACTORY_H__
#define __LINK_FACTORY_H__
-#include "link.h"
+#include "link_service.h"
struct dc_link *link_create(const struct link_init_data *init_params);
void link_destroy(struct dc_link **link);
diff --git a/drivers/gpu/drm/amd/display/dc/link/link_resource.h b/drivers/gpu/drm/amd/display/dc/link/link_resource.h
index 1907bda3cb6e..f7aa3bc3a93a 100644
--- a/drivers/gpu/drm/amd/display/dc/link/link_resource.h
+++ b/drivers/gpu/drm/amd/display/dc/link/link_resource.h
@@ -24,7 +24,7 @@
*/
#ifndef __LINK_RESOURCE_H__
#define __LINK_RESOURCE_H__
-#include "link.h"
+#include "link_service.h"
void link_get_cur_res_map(const struct dc *dc, uint32_t *map);
void link_restore_res_map(const struct dc *dc, uint32_t *map);
void link_get_cur_link_res(const struct dc_link *link,
diff --git a/drivers/gpu/drm/amd/display/dc/link/link_validation.h b/drivers/gpu/drm/amd/display/dc/link/link_validation.h
index 9553c81053fe..595774e76453 100644
--- a/drivers/gpu/drm/amd/display/dc/link/link_validation.h
+++ b/drivers/gpu/drm/amd/display/dc/link/link_validation.h
@@ -24,7 +24,7 @@
*/
#ifndef __LINK_VALIDATION_H__
#define __LINK_VALIDATION_H__
-#include "link.h"
+#include "link_service.h"
enum dc_status link_validate_mode_timing(
const struct dc_stream_state *stream,
diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_ddc.h b/drivers/gpu/drm/amd/display/dc/link/protocols/link_ddc.h
index a3e25e55bed6..d3e6f01a6a90 100644
--- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_ddc.h
+++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_ddc.h
@@ -26,7 +26,7 @@
#ifndef __DAL_DDC_SERVICE_H__
#define __DAL_DDC_SERVICE_H__
-#include "link.h"
+#include "link_service.h"
#define AUX_POWER_UP_WA_DELAY 500
#define I2C_OVER_AUX_DEFER_WA_DELAY 70
diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_capability.h b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_capability.h
index 7170db5a1c13..6e17f72a752f 100644
--- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_capability.h
+++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_capability.h
@@ -26,7 +26,7 @@
#ifndef __DC_LINK_DP_CAPABILITY_H__
#define __DC_LINK_DP_CAPABILITY_H__
-#include "link.h"
+#include "link_service.h"
bool detect_dp_sink_caps(struct dc_link *link);
diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia.h b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia.h
index a61edfc9ca7a..7cd03fa4892b 100644
--- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia.h
+++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia.h
@@ -27,7 +27,7 @@
#ifndef __DC_LINK_DPIA_H__
#define __DC_LINK_DPIA_H__
-#include "link.h"
+#include "link_service.h"
/* Read tunneling device capability from DPCD and update link capability
* accordingly.
diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia_bw.h b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia_bw.h
index 41efcb3e44e2..30cd8e2b9d35 100644
--- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia_bw.h
+++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia_bw.h
@@ -26,7 +26,7 @@
#ifndef DC_INC_LINK_DP_DPIA_BW_H_
#define DC_INC_LINK_DP_DPIA_BW_H_
-#include "link.h"
+#include "link_service.h"
/*
diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_irq_handler.h b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_irq_handler.h
index ac33730fedd4..87516fb3b45a 100644
--- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_irq_handler.h
+++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_irq_handler.h
@@ -26,7 +26,7 @@
#ifndef __DC_LINK_DP_IRQ_HANDLER_H__
#define __DC_LINK_DP_IRQ_HANDLER_H__
-#include "link.h"
+#include "link_service.h"
bool dp_parse_link_loss_status(
struct dc_link *link,
union hpd_irq_data *hpd_irq_dpcd_data);
diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_phy.h b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_phy.h
index ab1c1f8f1f8b..58e154494582 100644
--- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_phy.h
+++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_phy.h
@@ -26,7 +26,7 @@
#ifndef __DC_LINK_DP_PHY_H__
#define __DC_LINK_DP_PHY_H__
-#include "link.h"
+#include "link_service.h"
void dp_enable_link_phy(
struct dc_link *link,
const struct link_resource *link_res,
diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training.c
index 134093ce5a8e..08e2b572e0ff 100644
--- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training.c
+++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training.c
@@ -1729,6 +1729,15 @@ bool perform_link_training_with_retries(
break;
}
+ if (link->ep_type == DISPLAY_ENDPOINT_USB4_DPIA &&
+ stream->signal == SIGNAL_TYPE_DISPLAY_PORT_MST &&
+ !link->dc->config.enable_dpia_pre_training) {
+ if (j == (attempts - 1))
+ do_fallback = true;
+ else
+ do_fallback = false;
+ }
+
if (j == (attempts - 1)) {
DC_LOG_WARNING(
"%s: Link(%d) training attempt %u of %d failed @ rate(%d) x lane(%d) @ spread = %x : fail reason:(%d)\n",
diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training.h b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training.h
index 574b083e0936..ce52de22ab7a 100644
--- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training.h
+++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training.h
@@ -26,7 +26,7 @@
#ifndef __DC_LINK_DP_TRAINING_H__
#define __DC_LINK_DP_TRAINING_H__
-#include "link.h"
+#include "link_service.h"
bool perform_link_training_with_retries(
const struct dc_link_settings *link_setting,
diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dpcd.h b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dpcd.h
index 08d787a1e451..c2717c678c72 100644
--- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dpcd.h
+++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dpcd.h
@@ -25,7 +25,7 @@
#ifndef __LINK_DPCD_H__
#define __LINK_DPCD_H__
-#include "link.h"
+#include "link_service.h"
#include "dpcd_defs.h"
enum dc_status core_link_read_dpcd(
diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.c
index 8b7b87b21c2e..5e806edbb9f6 100644
--- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.c
+++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.c
@@ -703,6 +703,20 @@ bool edp_setup_psr(struct dc_link *link,
if (!link)
return false;
+ /* This is a workaround: some vendors require the source to
+ * read the PSR cap; otherwise, the vendor's PSR feature will
+ * fall back to its default behavior, causing a misconfiguration
+ * of this feature.
+ */
+ if (link->panel_config.psr.read_psrcap_again) {
+ dm_helpers_dp_read_dpcd(
+ link->ctx,
+ link,
+ DP_PSR_SUPPORT,
+ &link->dpcd_caps.psr_info.psr_version,
+ sizeof(link->dpcd_caps.psr_info.psr_version));
+ }
+
//Clear PSR cfg
memset(&psr_configuration, 0, sizeof(psr_configuration));
dm_helpers_dp_write_dpcd(
diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.h b/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.h
index 4a475d5b9dde..62a6344e613e 100644
--- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.h
+++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.h
@@ -25,7 +25,7 @@
#ifndef __DC_LINK_EDP_PANEL_CONTROL_H__
#define __DC_LINK_EDP_PANEL_CONTROL_H__
-#include "link.h"
+#include "link_service.h"
enum dp_panel_mode dp_get_panel_mode(struct dc_link *link);
void dp_set_panel_mode(struct dc_link *link, enum dp_panel_mode panel_mode);
diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_hpd.h b/drivers/gpu/drm/amd/display/dc/link/protocols/link_hpd.h
index 4fb526b264f9..af529328ba17 100644
--- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_hpd.h
+++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_hpd.h
@@ -26,7 +26,7 @@
#ifndef __DC_LINK_HPD_H__
#define __DC_LINK_HPD_H__
-#include "link.h"
+#include "link_service.h"
enum hpd_source_id get_hpd_line(struct dc_link *link);
/*
diff --git a/drivers/gpu/drm/amd/display/dc/resource/dce100/dce100_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dce100/dce100_resource.c
index 3a51be63f020..c4b4dc3ad8c9 100644
--- a/drivers/gpu/drm/amd/display/dc/resource/dce100/dce100_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/resource/dce100/dce100_resource.c
@@ -29,6 +29,7 @@
#include "stream_encoder.h"
#include "resource.h"
+#include "clk_mgr.h"
#include "include/irq_service_interface.h"
#include "virtual/virtual_stream_encoder.h"
#include "dce110/dce110_resource.h"
@@ -836,17 +837,24 @@ static enum dc_status build_mapped_resource(
return DC_OK;
}
-static enum dc_status dce100_validate_bandwidth(
+enum dc_status dce100_validate_bandwidth(
struct dc *dc,
struct dc_state *context,
enum dc_validate_mode validate_mode)
{
int i;
bool at_least_one_pipe = false;
+ struct dc_stream_state *stream = NULL;
+ const uint32_t max_pix_clk_khz = max(dc->clk_mgr->clks.max_supported_dispclk_khz, 400000);
for (i = 0; i < dc->res_pool->pipe_count; i++) {
- if (context->res_ctx.pipe_ctx[i].stream)
+ stream = context->res_ctx.pipe_ctx[i].stream;
+ if (stream) {
at_least_one_pipe = true;
+
+ if (stream->timing.pix_clk_100hz >= max_pix_clk_khz * 10)
+ return DC_FAIL_BANDWIDTH_VALIDATE;
+ }
}
if (at_least_one_pipe) {
@@ -854,7 +862,16 @@ static enum dc_status dce100_validate_bandwidth(
context->bw_ctx.bw.dce.dispclk_khz = 681000;
context->bw_ctx.bw.dce.yclk_khz = 250000 * MEMORY_TYPE_MULTIPLIER_CZ;
} else {
- context->bw_ctx.bw.dce.dispclk_khz = 0;
+ /* On DCE 6.0 and 6.4 the PLL0 is both the display engine clock and
+ * the DP clock, and shouldn't be turned off. Just select the display
+ * clock value from its low power mode.
+ */
+ if (dc->ctx->dce_version == DCE_VERSION_6_0 ||
+ dc->ctx->dce_version == DCE_VERSION_6_4)
+ context->bw_ctx.bw.dce.dispclk_khz = 352000;
+ else
+ context->bw_ctx.bw.dce.dispclk_khz = 0;
+
context->bw_ctx.bw.dce.yclk_khz = 0;
}
@@ -881,7 +898,7 @@ static bool dce100_validate_surface_sets(
return true;
}
-static enum dc_status dce100_validate_global(
+enum dc_status dce100_validate_global(
struct dc *dc,
struct dc_state *context)
{
diff --git a/drivers/gpu/drm/amd/display/dc/resource/dce100/dce100_resource.h b/drivers/gpu/drm/amd/display/dc/resource/dce100/dce100_resource.h
index fecab7c560f5..dd150a4b4610 100644
--- a/drivers/gpu/drm/amd/display/dc/resource/dce100/dce100_resource.h
+++ b/drivers/gpu/drm/amd/display/dc/resource/dce100/dce100_resource.h
@@ -41,6 +41,15 @@ struct resource_pool *dce100_create_resource_pool(
enum dc_status dce100_validate_plane(const struct dc_plane_state *plane_state, struct dc_caps *caps);
+enum dc_status dce100_validate_global(
+ struct dc *dc,
+ struct dc_state *context);
+
+enum dc_status dce100_validate_bandwidth(
+ struct dc *dc,
+ struct dc_state *context,
+ enum dc_validate_mode validate_mode);
+
enum dc_status dce100_add_stream_to_ctx(
struct dc *dc,
struct dc_state *new_ctx,
diff --git a/drivers/gpu/drm/amd/display/dc/resource/dce120/dce120_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dce120/dce120_resource.c
index 2f23cc6df571..540e04ec1e2d 100644
--- a/drivers/gpu/drm/amd/display/dc/resource/dce120/dce120_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/resource/dce120/dce120_resource.c
@@ -67,7 +67,7 @@
#include "reg_helper.h"
#include "dce100/dce100_resource.h"
-#include "link.h"
+#include "link_service.h"
#ifndef mmDP0_DP_DPHY_INTERNAL_CTRL
#define mmDP0_DP_DPHY_INTERNAL_CTRL 0x210f
diff --git a/drivers/gpu/drm/amd/display/dc/resource/dce60/dce60_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dce60/dce60_resource.c
index 53b60044653f..b75be6ad64f6 100644
--- a/drivers/gpu/drm/amd/display/dc/resource/dce60/dce60_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/resource/dce60/dce60_resource.c
@@ -34,6 +34,7 @@
#include "stream_encoder.h"
#include "resource.h"
+#include "clk_mgr.h"
#include "include/irq_service_interface.h"
#include "irq/dce60/irq_service_dce60.h"
#include "dce110/dce110_timing_generator.h"
@@ -403,13 +404,13 @@ static const struct dc_plane_cap plane_cap = {
},
.max_upscale_factor = {
- .argb8888 = 16000,
+ .argb8888 = 1,
.nv12 = 1,
.fp16 = 1
},
.max_downscale_factor = {
- .argb8888 = 250,
+ .argb8888 = 1,
.nv12 = 1,
.fp16 = 1
}
@@ -863,61 +864,6 @@ static void dce60_resource_destruct(struct dce110_resource_pool *pool)
}
}
-static enum dc_status dce60_validate_bandwidth(
- struct dc *dc,
- struct dc_state *context,
- enum dc_validate_mode validate_mode)
-{
- int i;
- bool at_least_one_pipe = false;
-
- for (i = 0; i < dc->res_pool->pipe_count; i++) {
- if (context->res_ctx.pipe_ctx[i].stream)
- at_least_one_pipe = true;
- }
-
- if (at_least_one_pipe) {
- /* TODO implement when needed but for now hardcode max value*/
- context->bw_ctx.bw.dce.dispclk_khz = 681000;
- context->bw_ctx.bw.dce.yclk_khz = 250000 * MEMORY_TYPE_MULTIPLIER_CZ;
- } else {
- context->bw_ctx.bw.dce.dispclk_khz = 0;
- context->bw_ctx.bw.dce.yclk_khz = 0;
- }
-
- return DC_OK;
-}
-
-static bool dce60_validate_surface_sets(
- struct dc_state *context)
-{
- int i;
-
- for (i = 0; i < context->stream_count; i++) {
- if (context->stream_status[i].plane_count == 0)
- continue;
-
- if (context->stream_status[i].plane_count > 1)
- return false;
-
- if (context->stream_status[i].plane_states[0]->format
- >= SURFACE_PIXEL_FORMAT_VIDEO_BEGIN)
- return false;
- }
-
- return true;
-}
-
-static enum dc_status dce60_validate_global(
- struct dc *dc,
- struct dc_state *context)
-{
- if (!dce60_validate_surface_sets(context))
- return DC_FAIL_SURFACE_VALIDATE;
-
- return DC_OK;
-}
-
static void dce60_destroy_resource_pool(struct resource_pool **pool)
{
struct dce110_resource_pool *dce110_pool = TO_DCE110_RES_POOL(*pool);
@@ -931,10 +877,10 @@ static const struct resource_funcs dce60_res_pool_funcs = {
.destroy = dce60_destroy_resource_pool,
.link_enc_create = dce60_link_encoder_create,
.panel_cntl_create = dce60_panel_cntl_create,
- .validate_bandwidth = dce60_validate_bandwidth,
+ .validate_bandwidth = dce100_validate_bandwidth,
.validate_plane = dce100_validate_plane,
.add_stream_to_ctx = dce100_add_stream_to_ctx,
- .validate_global = dce60_validate_global,
+ .validate_global = dce100_validate_global,
.find_first_free_match_stream_enc_for_link = dce100_find_first_free_match_stream_enc_for_link
};
diff --git a/drivers/gpu/drm/amd/display/dc/resource/dce80/dce80_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dce80/dce80_resource.c
index 3e8b0ac11d90..5b7769745202 100644
--- a/drivers/gpu/drm/amd/display/dc/resource/dce80/dce80_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/resource/dce80/dce80_resource.c
@@ -32,6 +32,7 @@
#include "stream_encoder.h"
#include "resource.h"
+#include "clk_mgr.h"
#include "include/irq_service_interface.h"
#include "irq/dce80/irq_service_dce80.h"
#include "dce110/dce110_timing_generator.h"
@@ -869,61 +870,6 @@ static void dce80_resource_destruct(struct dce110_resource_pool *pool)
}
}
-static enum dc_status dce80_validate_bandwidth(
- struct dc *dc,
- struct dc_state *context,
- enum dc_validate_mode validate_mode)
-{
- int i;
- bool at_least_one_pipe = false;
-
- for (i = 0; i < dc->res_pool->pipe_count; i++) {
- if (context->res_ctx.pipe_ctx[i].stream)
- at_least_one_pipe = true;
- }
-
- if (at_least_one_pipe) {
- /* TODO implement when needed but for now hardcode max value*/
- context->bw_ctx.bw.dce.dispclk_khz = 681000;
- context->bw_ctx.bw.dce.yclk_khz = 250000 * MEMORY_TYPE_MULTIPLIER_CZ;
- } else {
- context->bw_ctx.bw.dce.dispclk_khz = 0;
- context->bw_ctx.bw.dce.yclk_khz = 0;
- }
-
- return DC_OK;
-}
-
-static bool dce80_validate_surface_sets(
- struct dc_state *context)
-{
- int i;
-
- for (i = 0; i < context->stream_count; i++) {
- if (context->stream_status[i].plane_count == 0)
- continue;
-
- if (context->stream_status[i].plane_count > 1)
- return false;
-
- if (context->stream_status[i].plane_states[0]->format
- >= SURFACE_PIXEL_FORMAT_VIDEO_BEGIN)
- return false;
- }
-
- return true;
-}
-
-static enum dc_status dce80_validate_global(
- struct dc *dc,
- struct dc_state *context)
-{
- if (!dce80_validate_surface_sets(context))
- return DC_FAIL_SURFACE_VALIDATE;
-
- return DC_OK;
-}
-
static void dce80_destroy_resource_pool(struct resource_pool **pool)
{
struct dce110_resource_pool *dce110_pool = TO_DCE110_RES_POOL(*pool);
@@ -937,10 +883,10 @@ static const struct resource_funcs dce80_res_pool_funcs = {
.destroy = dce80_destroy_resource_pool,
.link_enc_create = dce80_link_encoder_create,
.panel_cntl_create = dce80_panel_cntl_create,
- .validate_bandwidth = dce80_validate_bandwidth,
+ .validate_bandwidth = dce100_validate_bandwidth,
.validate_plane = dce100_validate_plane,
.add_stream_to_ctx = dce100_add_stream_to_ctx,
- .validate_global = dce80_validate_global,
+ .validate_global = dce100_validate_global,
.find_first_free_match_stream_enc_for_link = dce100_find_first_free_match_stream_enc_for_link
};
diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn20/dcn20_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn20/dcn20_resource.c
index f9cbdad3ef37..84b38d2d6967 100644
--- a/drivers/gpu/drm/amd/display/dc/resource/dcn20/dcn20_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/resource/dcn20/dcn20_resource.c
@@ -85,7 +85,7 @@
#include "vm_helper.h"
#include "link_enc_cfg.h"
-#include "link.h"
+#include "link_service.h"
#define DC_LOGGER_INIT(logger)
diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn30/dcn30_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn30/dcn30_resource.c
index 201ed863b69e..ff63f59ff928 100644
--- a/drivers/gpu/drm/amd/display/dc/resource/dcn30/dcn30_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/resource/dcn30/dcn30_resource.c
@@ -60,7 +60,7 @@
#include "dml/display_mode_vba.h"
#include "dcn30/dcn30_dccg.h"
#include "dcn10/dcn10_resource.h"
-#include "link.h"
+#include "link_service.h"
#include "dce/dce_panel_cntl.h"
#include "dcn30/dcn30_dwb.h"
diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn302/dcn302_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn302/dcn302_resource.c
index 3345068a878c..61623cb518d9 100644
--- a/drivers/gpu/drm/amd/display/dc/resource/dcn302/dcn302_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/resource/dcn302/dcn302_resource.c
@@ -47,7 +47,8 @@
#include "dcn10/dcn10_resource.h"
-#include "link.h"
+#include "link_service.h"
+
#include "dce/dce_abm.h"
#include "dce/dce_audio.h"
#include "dce/dce_aux.h"
diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn303/dcn303_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn303/dcn303_resource.c
index 3479e1eab4cd..02b9a84f2db3 100644
--- a/drivers/gpu/drm/amd/display/dc/resource/dcn303/dcn303_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/resource/dcn303/dcn303_resource.c
@@ -47,7 +47,7 @@
#include "dcn10/dcn10_resource.h"
-#include "link.h"
+#include "link_service.h"
#include "dce/dce_abm.h"
#include "dce/dce_audio.h"
diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn31/dcn31_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn31/dcn31_resource.c
index ca17e5d8fdc2..3ed7f50554e2 100644
--- a/drivers/gpu/drm/amd/display/dc/resource/dcn31/dcn31_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/resource/dcn31/dcn31_resource.c
@@ -2239,7 +2239,8 @@ struct resource_pool *dcn31_create_resource_pool(
enum dc_status dcn31_update_dc_state_for_encoder_switch(struct dc_link *link,
struct dc_link_settings *link_setting,
uint8_t pipe_count,
- struct pipe_ctx *pipes)
+ struct pipe_ctx *pipes,
+ struct audio_output *audio_output)
{
struct dc_state *state = link->dc->current_state;
int i;
@@ -2254,7 +2255,7 @@ enum dc_status dcn31_update_dc_state_for_encoder_switch(struct dc_link *link,
// Setup audio
if (pipes[i].stream_res.audio != NULL)
- build_audio_output(state, &pipes[i], &pipes[i].stream_res.audio_output);
+ build_audio_output(state, &pipes[i], &audio_output[i]);
}
#else
/* This DCN requires rate divider updates and audio reprogramming to allow DP1<-->DP2 link rate switching,
diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn31/dcn31_resource.h b/drivers/gpu/drm/amd/display/dc/resource/dcn31/dcn31_resource.h
index 7e8fde65528f..c32c85ef0ba4 100644
--- a/drivers/gpu/drm/amd/display/dc/resource/dcn31/dcn31_resource.h
+++ b/drivers/gpu/drm/amd/display/dc/resource/dcn31/dcn31_resource.h
@@ -69,7 +69,8 @@ unsigned int dcn31_get_det_buffer_size(
enum dc_status dcn31_update_dc_state_for_encoder_switch(struct dc_link *link,
struct dc_link_settings *link_setting,
uint8_t pipe_count,
- struct pipe_ctx *pipes);
+ struct pipe_ctx *pipes,
+ struct audio_output *audio_output);
/*temp: B0 specific before switch to dcn313 headers*/
#ifndef regPHYPLLF_PIXCLK_RESYNC_CNTL
diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn314/dcn314_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn314/dcn314_resource.c
index 663c49cce4aa..d4917a35b991 100644
--- a/drivers/gpu/drm/amd/display/dc/resource/dcn314/dcn314_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/resource/dcn314/dcn314_resource.c
@@ -927,6 +927,7 @@ static const struct dc_debug_options debug_defaults_drv = {
.enable_legacy_fast_update = true,
.using_dml2 = false,
.disable_dsc_power_gate = true,
+ .min_disp_clk_khz = 100000,
};
static const struct dc_panel_config panel_config_defaults = {
diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c
index 9917b366f00c..3965a7f1b64b 100644
--- a/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c
@@ -69,7 +69,7 @@
#include "dml/display_mode_vba.h"
#include "dcn32/dcn32_dccg.h"
#include "dcn10/dcn10_resource.h"
-#include "link.h"
+#include "link_service.h"
#include "dcn31/dcn31_panel_cntl.h"
#include "dcn30/dcn30_dwb.h"
@@ -739,6 +739,7 @@ static const struct dc_debug_options debug_defaults_drv = {
.fpo_vactive_min_active_margin_us = 200,
.fpo_vactive_max_blank_us = 1000,
.enable_legacy_fast_update = false,
+ .disable_stutter_for_wm_program = true
};
static struct dce_aux *dcn32_aux_engine_create(
@@ -2852,7 +2853,7 @@ struct pipe_ctx *dcn32_acquire_free_pipe_as_secondary_opp_head(
free_pipe->plane_res.xfm = pool->transforms[free_pipe_idx];
free_pipe->plane_res.dpp = pool->dpps[free_pipe_idx];
free_pipe->plane_res.mpcc_inst = pool->dpps[free_pipe_idx]->inst;
- free_pipe->hblank_borrow = otg_master->hblank_borrow;
+ free_pipe->dsc_padding_params = otg_master->dsc_padding_params;
if (free_pipe->stream->timing.flags.DSC == 1) {
dcn20_acquire_dsc(free_pipe->stream->ctx->dc,
&new_ctx->res_ctx,
diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.h b/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.h
index 20d714596021..99f0432288b4 100644
--- a/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.h
+++ b/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.h
@@ -1230,7 +1230,8 @@ unsigned int dcn32_get_max_hw_cursor_size(const struct dc *dc,
SR(DCHUBBUB_ARB_MALL_CNTL), \
SR(DCN_VM_FAULT_ADDR_MSB), SR(DCN_VM_FAULT_ADDR_LSB), \
SR(DCN_VM_FAULT_CNTL), SR(DCN_VM_FAULT_STATUS), \
- SR(SDPIF_REQUEST_RATE_LIMIT)
+ SR(SDPIF_REQUEST_RATE_LIMIT), \
+ SR(DCHUBBUB_SDPIF_CFG0)
/* DCCG */
diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn321/dcn321_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn321/dcn321_resource.c
index 061c0907d802..ad214986f7ac 100644
--- a/drivers/gpu/drm/amd/display/dc/resource/dcn321/dcn321_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/resource/dcn321/dcn321_resource.c
@@ -72,7 +72,7 @@
#include "dml/display_mode_vba.h"
#include "dcn32/dcn32_dccg.h"
#include "dcn10/dcn10_resource.h"
-#include "link.h"
+#include "link_service.h"
#include "dcn31/dcn31_panel_cntl.h"
#include "dcn30/dcn30_dwb.h"
diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c
index 8475c6eec547..fff57f23f4f7 100644
--- a/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c
@@ -61,7 +61,7 @@
#include "dcn31/dcn31_hpo_dp_stream_encoder.h"
#include "dcn31/dcn31_hpo_dp_link_encoder.h"
#include "dcn32/dcn32_hpo_dp_link_encoder.h"
-#include "link.h"
+#include "link_service.h"
#include "dcn31/dcn31_apg.h"
#include "dcn32/dcn32_dio_link_encoder.h"
#include "dcn31/dcn31_vpg.h"
@@ -1760,6 +1760,20 @@ enum dc_status dcn35_patch_unknown_plane_state(struct dc_plane_state *plane_stat
}
+static int populate_dml_pipes_from_context_fpu(struct dc *dc,
+ struct dc_state *context,
+ display_e2e_pipe_params_st *pipes,
+ enum dc_validate_mode validate_mode)
+{
+ int ret;
+
+ DC_FP_START();
+ ret = dcn35_populate_dml_pipes_from_context_fpu(dc, context, pipes, validate_mode);
+ DC_FP_END();
+
+ return ret;
+}
+
static struct resource_funcs dcn35_res_pool_funcs = {
.destroy = dcn35_destroy_resource_pool,
.link_enc_create = dcn35_link_encoder_create,
@@ -1770,7 +1784,7 @@ static struct resource_funcs dcn35_res_pool_funcs = {
.validate_bandwidth = dcn35_validate_bandwidth,
.calculate_wm_and_dlg = NULL,
.update_soc_for_wm_a = dcn31_update_soc_for_wm_a,
- .populate_dml_pipes = dcn35_populate_dml_pipes_from_context_fpu,
+ .populate_dml_pipes = populate_dml_pipes_from_context_fpu,
.acquire_free_pipe_as_secondary_dpp_pipe = dcn20_acquire_free_pipe_for_layer,
.release_pipe = dcn20_release_pipe,
.add_stream_to_ctx = dcn30_add_stream_to_ctx,
@@ -1900,9 +1914,6 @@ static bool dcn35_resource_construct(
dc->caps.num_of_host_routers = 2;
dc->caps.num_of_dpias_per_host_router = 2;
- dc->caps.num_of_host_routers = 2;
- dc->caps.num_of_dpias_per_host_router = 2;
-
/* max_disp_clock_khz_at_vmin is slightly lower than the STA value in order
* to provide some margin.
* It's expected for furture ASIC to have equal or higher value, in order to
diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn351/dcn351_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn351/dcn351_resource.c
index 0971c0f74186..0abd163b425e 100644
--- a/drivers/gpu/drm/amd/display/dc/resource/dcn351/dcn351_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/resource/dcn351/dcn351_resource.c
@@ -40,7 +40,7 @@
#include "dcn31/dcn31_hpo_dp_stream_encoder.h"
#include "dcn31/dcn31_hpo_dp_link_encoder.h"
#include "dcn32/dcn32_hpo_dp_link_encoder.h"
-#include "link.h"
+#include "link_service.h"
#include "dcn31/dcn31_apg.h"
#include "dcn32/dcn32_dio_link_encoder.h"
#include "dcn31/dcn31_vpg.h"
@@ -1732,6 +1732,21 @@ static enum dc_status dcn351_validate_bandwidth(struct dc *dc,
return out ? DC_OK : DC_FAIL_BANDWIDTH_VALIDATE;
}
+static int populate_dml_pipes_from_context_fpu(struct dc *dc,
+ struct dc_state *context,
+ display_e2e_pipe_params_st *pipes,
+ enum dc_validate_mode validate_mode)
+{
+ int ret;
+
+ DC_FP_START();
+ ret = dcn351_populate_dml_pipes_from_context_fpu(dc, context, pipes, validate_mode);
+ DC_FP_END();
+
+ return ret;
+
+}
+
static struct resource_funcs dcn351_res_pool_funcs = {
.destroy = dcn351_destroy_resource_pool,
.link_enc_create = dcn35_link_encoder_create,
@@ -1742,7 +1757,7 @@ static struct resource_funcs dcn351_res_pool_funcs = {
.validate_bandwidth = dcn351_validate_bandwidth,
.calculate_wm_and_dlg = NULL,
.update_soc_for_wm_a = dcn31_update_soc_for_wm_a,
- .populate_dml_pipes = dcn351_populate_dml_pipes_from_context_fpu,
+ .populate_dml_pipes = populate_dml_pipes_from_context_fpu,
.acquire_free_pipe_as_secondary_dpp_pipe = dcn20_acquire_free_pipe_for_layer,
.release_pipe = dcn20_release_pipe,
.add_stream_to_ctx = dcn30_add_stream_to_ctx,
@@ -1872,9 +1887,6 @@ static bool dcn351_resource_construct(
dc->caps.num_of_host_routers = 2;
dc->caps.num_of_dpias_per_host_router = 2;
- dc->caps.num_of_host_routers = 2;
- dc->caps.num_of_dpias_per_host_router = 2;
-
/* max_disp_clock_khz_at_vmin is slightly lower than the STA value in order
* to provide some margin.
* It's expected for furture ASIC to have equal or higher value, in order to
diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn36/dcn36_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn36/dcn36_resource.c
index 8bae7fcedc22..ca125ee6c2fb 100644
--- a/drivers/gpu/drm/amd/display/dc/resource/dcn36/dcn36_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/resource/dcn36/dcn36_resource.c
@@ -40,7 +40,7 @@
#include "dcn31/dcn31_hpo_dp_stream_encoder.h"
#include "dcn31/dcn31_hpo_dp_link_encoder.h"
#include "dcn32/dcn32_hpo_dp_link_encoder.h"
-#include "link.h"
+#include "link_service.h"
#include "dcn31/dcn31_apg.h"
#include "dcn32/dcn32_dio_link_encoder.h"
#include "dcn31/dcn31_vpg.h"
@@ -1734,6 +1734,20 @@ static enum dc_status dcn35_validate_bandwidth(struct dc *dc,
}
+static int populate_dml_pipes_from_context_fpu(struct dc *dc,
+ struct dc_state *context,
+ display_e2e_pipe_params_st *pipes,
+ enum dc_validate_mode validate_mode)
+{
+ int ret;
+
+ DC_FP_START();
+ ret = dcn35_populate_dml_pipes_from_context_fpu(dc, context, pipes, validate_mode);
+ DC_FP_END();
+
+ return ret;
+}
+
static struct resource_funcs dcn36_res_pool_funcs = {
.destroy = dcn36_destroy_resource_pool,
.link_enc_create = dcn35_link_encoder_create,
@@ -1744,7 +1758,7 @@ static struct resource_funcs dcn36_res_pool_funcs = {
.validate_bandwidth = dcn35_validate_bandwidth,
.calculate_wm_and_dlg = NULL,
.update_soc_for_wm_a = dcn31_update_soc_for_wm_a,
- .populate_dml_pipes = dcn35_populate_dml_pipes_from_context_fpu,
+ .populate_dml_pipes = populate_dml_pipes_from_context_fpu,
.acquire_free_pipe_as_secondary_dpp_pipe = dcn20_acquire_free_pipe_for_layer,
.release_pipe = dcn20_release_pipe,
.add_stream_to_ctx = dcn30_add_stream_to_ctx,
@@ -1873,9 +1887,6 @@ static bool dcn36_resource_construct(
dc->caps.num_of_host_routers = 2;
dc->caps.num_of_dpias_per_host_router = 2;
- dc->caps.num_of_host_routers = 2;
- dc->caps.num_of_dpias_per_host_router = 2;
-
/* max_disp_clock_khz_at_vmin is slightly lower than the STA value in order
* to provide some margin.
* It's expected for furture ASIC to have equal or higher value, in order to
diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.c
index 068c123ea8a8..1d18807e4749 100644
--- a/drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.c
@@ -50,7 +50,7 @@
#include "dml/display_mode_vba.h"
#include "dcn401/dcn401_dccg.h"
#include "dcn10/dcn10_resource.h"
-#include "link.h"
+#include "link_service.h"
#include "link_enc_cfg.h"
#include "dcn31/dcn31_panel_cntl.h"
@@ -1699,6 +1699,9 @@ static void dcn401_build_pipe_pix_clk_params(struct pipe_ctx *pipe_ctx)
pixel_clk_params->requested_pix_clk_100hz = stream->timing.pix_clk_100hz;
+ if (pipe_ctx->dsc_padding_params.dsc_hactive_padding != 0)
+ pixel_clk_params->requested_pix_clk_100hz = pipe_ctx->dsc_padding_params.dsc_pix_clk_100hz;
+
if (!pipe_ctx->stream->ctx->dc->config.unify_link_enc_assignment)
link_enc = link_enc_cfg_get_link_enc(link);
if (link_enc)
diff --git a/drivers/gpu/drm/amd/display/dc/sspl/dc_spl.c b/drivers/gpu/drm/amd/display/dc/sspl/dc_spl.c
index 55b929ca7982..b1fb0f8a253a 100644
--- a/drivers/gpu/drm/amd/display/dc/sspl/dc_spl.c
+++ b/drivers/gpu/drm/amd/display/dc/sspl/dc_spl.c
@@ -641,16 +641,16 @@ static void spl_calculate_inits_and_viewports(struct spl_in *spl_in,
/* this gives the direction of the cositing (negative will move
* left, right otherwise)
*/
- int sign = 1;
+ int h_sign = flip_horz_scan_dir ? -1 : 1;
+ int v_sign = flip_vert_scan_dir ? -1 : 1;
switch (spl_in->basic_in.cositing) {
-
case CHROMA_COSITING_TOPLEFT:
- init_adj_h = spl_fixpt_from_fraction(sign, 4);
- init_adj_v = spl_fixpt_from_fraction(sign, 4);
+ init_adj_h = spl_fixpt_from_fraction(h_sign, 4);
+ init_adj_v = spl_fixpt_from_fraction(v_sign, 4);
break;
case CHROMA_COSITING_LEFT:
- init_adj_h = spl_fixpt_from_fraction(sign, 4);
+ init_adj_h = spl_fixpt_from_fraction(h_sign, 4);
init_adj_v = spl_fixpt_zero;
break;
case CHROMA_COSITING_NONE:
diff --git a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h
index e65747f7f12f..92248224b713 100644
--- a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h
+++ b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h
@@ -4143,9 +4143,13 @@ struct dmub_cmd_replay_copy_settings_data {
*/
uint8_t hpo_link_enc_inst;
/**
+ * Determines if fast resync in ultra sleep mode is enabled/disabled.
+ */
+ uint8_t replay_support_fast_resync_in_ultra_sleep_mode;
+ /**
* @pad: Align structure to 4 byte boundary.
*/
- uint8_t pad[2];
+ uint8_t pad[1];
};
diff --git a/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn32.c b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn32.c
index e7056205b050..ce041f6239dc 100644
--- a/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn32.c
+++ b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn32.c
@@ -89,44 +89,50 @@ static inline void dmub_dcn32_translate_addr(const union dmub_addr *addr_in,
void dmub_dcn32_reset(struct dmub_srv *dmub)
{
union dmub_gpint_data_register cmd;
- const uint32_t timeout = 30;
- uint32_t in_reset, scratch, i;
+ const uint32_t timeout = 100000;
+ uint32_t in_reset, is_enabled, scratch, i, pwait_mode;
REG_GET(DMCUB_CNTL2, DMCUB_SOFT_RESET, &in_reset);
+ REG_GET(DMCUB_CNTL, DMCUB_ENABLE, &is_enabled);
- if (in_reset == 0) {
+ if (in_reset == 0 && is_enabled != 0) {
cmd.bits.status = 1;
cmd.bits.command_code = DMUB_GPINT__STOP_FW;
cmd.bits.param = 0;
dmub->hw_funcs.set_gpint(dmub, cmd);
- /**
- * Timeout covers both the ACK and the wait
- * for remaining work to finish.
- *
- * This is mostly bound by the PHY disable sequence.
- * Each register check will be greater than 1us, so
- * don't bother using udelay.
- */
-
for (i = 0; i < timeout; ++i) {
if (dmub->hw_funcs.is_gpint_acked(dmub, cmd))
break;
+
+ udelay(1);
}
for (i = 0; i < timeout; ++i) {
- scratch = dmub->hw_funcs.get_gpint_response(dmub);
+ scratch = REG_READ(DMCUB_SCRATCH7);
if (scratch == DMUB_GPINT__STOP_FW_RESPONSE)
break;
+
+ udelay(1);
}
+ for (i = 0; i < timeout; ++i) {
+ REG_GET(DMCUB_CNTL, DMCUB_PWAIT_MODE_STATUS, &pwait_mode);
+ if (pwait_mode & (1 << 0))
+ break;
+
+ udelay(1);
+ }
/* Force reset in case we timed out, DMCUB is likely hung. */
}
- REG_UPDATE(DMCUB_CNTL2, DMCUB_SOFT_RESET, 1);
- REG_UPDATE(DMCUB_CNTL, DMCUB_ENABLE, 0);
- REG_UPDATE(MMHUBBUB_SOFT_RESET, DMUIF_SOFT_RESET, 1);
+ if (is_enabled) {
+ REG_UPDATE(DMCUB_CNTL2, DMCUB_SOFT_RESET, 1);
+ udelay(1);
+ REG_UPDATE(DMCUB_CNTL, DMCUB_ENABLE, 0);
+ }
+
REG_WRITE(DMCUB_INBOX1_RPTR, 0);
REG_WRITE(DMCUB_INBOX1_WPTR, 0);
REG_WRITE(DMCUB_OUTBOX1_RPTR, 0);
@@ -135,7 +141,7 @@ void dmub_dcn32_reset(struct dmub_srv *dmub)
REG_WRITE(DMCUB_OUTBOX0_WPTR, 0);
REG_WRITE(DMCUB_SCRATCH0, 0);
- /* Clear the GPINT command manually so we don't reset again. */
+ /* Clear the GPINT command manually so we don't send anything during boot. */
cmd.all = 0;
dmub->hw_funcs.set_gpint(dmub, cmd);
}
@@ -419,8 +425,8 @@ uint32_t dmub_dcn32_get_current_time(struct dmub_srv *dmub)
void dmub_dcn32_get_diagnostic_data(struct dmub_srv *dmub)
{
- uint32_t is_dmub_enabled, is_soft_reset, is_sec_reset;
- uint32_t is_traceport_enabled, is_cw0_enabled, is_cw6_enabled;
+ uint32_t is_dmub_enabled, is_soft_reset, is_pwait;
+ uint32_t is_traceport_enabled, is_cw6_enabled;
struct dmub_timeout_info timeout = {0};
if (!dmub)
@@ -470,18 +476,15 @@ void dmub_dcn32_get_diagnostic_data(struct dmub_srv *dmub)
REG_GET(DMCUB_CNTL, DMCUB_ENABLE, &is_dmub_enabled);
dmub->debug.is_dmcub_enabled = is_dmub_enabled;
+ REG_GET(DMCUB_CNTL, DMCUB_PWAIT_MODE_STATUS, &is_pwait);
+ dmub->debug.is_pwait = is_pwait;
+
REG_GET(DMCUB_CNTL2, DMCUB_SOFT_RESET, &is_soft_reset);
dmub->debug.is_dmcub_soft_reset = is_soft_reset;
- REG_GET(DMCUB_SEC_CNTL, DMCUB_SEC_RESET_STATUS, &is_sec_reset);
- dmub->debug.is_dmcub_secure_reset = is_sec_reset;
-
REG_GET(DMCUB_CNTL, DMCUB_TRACEPORT_EN, &is_traceport_enabled);
dmub->debug.is_traceport_en = is_traceport_enabled;
- REG_GET(DMCUB_REGION3_CW0_TOP_ADDRESS, DMCUB_REGION3_CW0_ENABLE, &is_cw0_enabled);
- dmub->debug.is_cw0_enabled = is_cw0_enabled;
-
REG_GET(DMCUB_REGION3_CW6_TOP_ADDRESS, DMCUB_REGION3_CW6_ENABLE, &is_cw6_enabled);
dmub->debug.is_cw6_enabled = is_cw6_enabled;
diff --git a/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn32.h b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn32.h
index 1a229450c53d..daf81027d663 100644
--- a/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn32.h
+++ b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn32.h
@@ -89,6 +89,9 @@ struct dmub_srv;
DMUB_SR(DMCUB_REGION5_OFFSET) \
DMUB_SR(DMCUB_REGION5_OFFSET_HIGH) \
DMUB_SR(DMCUB_REGION5_TOP_ADDRESS) \
+ DMUB_SR(DMCUB_REGION6_OFFSET) \
+ DMUB_SR(DMCUB_REGION6_OFFSET_HIGH) \
+ DMUB_SR(DMCUB_REGION6_TOP_ADDRESS) \
DMUB_SR(DMCUB_SCRATCH0) \
DMUB_SR(DMCUB_SCRATCH1) \
DMUB_SR(DMCUB_SCRATCH2) \
@@ -155,6 +158,8 @@ struct dmub_srv;
DMUB_SF(DMCUB_REGION4_TOP_ADDRESS, DMCUB_REGION4_ENABLE) \
DMUB_SF(DMCUB_REGION5_TOP_ADDRESS, DMCUB_REGION5_TOP_ADDRESS) \
DMUB_SF(DMCUB_REGION5_TOP_ADDRESS, DMCUB_REGION5_ENABLE) \
+ DMUB_SF(DMCUB_REGION6_TOP_ADDRESS, DMCUB_REGION6_TOP_ADDRESS) \
+ DMUB_SF(DMCUB_REGION6_TOP_ADDRESS, DMCUB_REGION6_ENABLE) \
DMUB_SF(CC_DC_PIPE_DIS, DC_DMCUB_ENABLE) \
DMUB_SF(MMHUBBUB_SOFT_RESET, DMUIF_SOFT_RESET) \
DMUB_SF(DCN_VM_FB_LOCATION_BASE, FB_BASE) \
@@ -162,7 +167,8 @@ struct dmub_srv;
DMUB_SF(DMCUB_INBOX0_WPTR, DMCUB_INBOX0_WPTR) \
DMUB_SF(DMCUB_REGION3_TMR_AXI_SPACE, DMCUB_REGION3_TMR_AXI_SPACE) \
DMUB_SF(DMCUB_INTERRUPT_ENABLE, DMCUB_GPINT_IH_INT_EN) \
- DMUB_SF(DMCUB_INTERRUPT_ACK, DMCUB_GPINT_IH_INT_ACK)
+ DMUB_SF(DMCUB_INTERRUPT_ACK, DMCUB_GPINT_IH_INT_ACK) \
+ DMUB_SF(DMCUB_CNTL, DMCUB_PWAIT_MODE_STATUS)
struct dmub_srv_dcn32_reg_offset {
#define DMUB_SR(reg) uint32_t reg;
diff --git a/drivers/gpu/drm/amd/include/amd_shared.h b/drivers/gpu/drm/amd/include/amd_shared.h
index bfb446736ca8..75efda2969cf 100644
--- a/drivers/gpu/drm/amd/include/amd_shared.h
+++ b/drivers/gpu/drm/amd/include/amd_shared.h
@@ -239,18 +239,51 @@ enum amd_harvest_ip_mask {
AMD_HARVEST_IP_DMU_MASK = 0x4,
};
+/**
+ * enum DC_FEATURE_MASK - Bits that control DC feature defaults
+ */
enum DC_FEATURE_MASK {
//Default value can be found at "uint amdgpu_dc_feature_mask"
- DC_FBC_MASK = (1 << 0), //0x1, disabled by default
- DC_MULTI_MON_PP_MCLK_SWITCH_MASK = (1 << 1), //0x2, enabled by default
- DC_DISABLE_FRACTIONAL_PWM_MASK = (1 << 2), //0x4, disabled by default
- DC_PSR_MASK = (1 << 3), //0x8, disabled by default for dcn < 3.1
- DC_EDP_NO_POWER_SEQUENCING = (1 << 4), //0x10, disabled by default
- DC_DISABLE_LTTPR_DP1_4A = (1 << 5), //0x20, disabled by default
- DC_DISABLE_LTTPR_DP2_0 = (1 << 6), //0x40, disabled by default
- DC_PSR_ALLOW_SMU_OPT = (1 << 7), //0x80, disabled by default
- DC_PSR_ALLOW_MULTI_DISP_OPT = (1 << 8), //0x100, disabled by default
- DC_REPLAY_MASK = (1 << 9), //0x200, disabled by default for dcn < 3.1.4
+ /**
+ * @DC_FBC_MASK: (0x1) disabled by default
+ */
+ DC_FBC_MASK = (1 << 0),
+ /**
+ * @DC_MULTI_MON_PP_MCLK_SWITCH_MASK: (0x2) enabled by default
+ */
+ DC_MULTI_MON_PP_MCLK_SWITCH_MASK = (1 << 1),
+ /**
+ * @DC_DISABLE_FRACTIONAL_PWM_MASK: (0x4) disabled by default
+ */
+ DC_DISABLE_FRACTIONAL_PWM_MASK = (1 << 2),
+ /**
+ * @DC_PSR_MASK: (0x8) disabled by default for DCN < 3.1
+ */
+ DC_PSR_MASK = (1 << 3),
+ /**
+ * @DC_EDP_NO_POWER_SEQUENCING: (0x10) disabled by default
+ */
+ DC_EDP_NO_POWER_SEQUENCING = (1 << 4),
+ /**
+ * @DC_DISABLE_LTTPR_DP1_4A: (0x20) disabled by default
+ */
+ DC_DISABLE_LTTPR_DP1_4A = (1 << 5),
+ /**
+ * @DC_DISABLE_LTTPR_DP2_0: (0x40) disabled by default
+ */
+ DC_DISABLE_LTTPR_DP2_0 = (1 << 6),
+ /**
+ * @DC_PSR_ALLOW_SMU_OPT: (0x80) disabled by default
+ */
+ DC_PSR_ALLOW_SMU_OPT = (1 << 7),
+ /**
+ * @DC_PSR_ALLOW_MULTI_DISP_OPT: (0x100) disabled by default
+ */
+ DC_PSR_ALLOW_MULTI_DISP_OPT = (1 << 8),
+ /**
+ * @DC_REPLAY_MASK: (0x200) disabled by default for DCN < 3.1.4
+ */
+ DC_REPLAY_MASK = (1 << 9),
};
/**
@@ -258,64 +291,64 @@ enum DC_FEATURE_MASK {
*/
enum DC_DEBUG_MASK {
/**
- * @DC_DISABLE_PIPE_SPLIT: If set, disable pipe-splitting
+ * @DC_DISABLE_PIPE_SPLIT: (0x1) If set, disable pipe-splitting
*/
DC_DISABLE_PIPE_SPLIT = 0x1,
/**
- * @DC_DISABLE_STUTTER: If set, disable memory stutter mode
+ * @DC_DISABLE_STUTTER: (0x2) If set, disable memory stutter mode
*/
DC_DISABLE_STUTTER = 0x2,
/**
- * @DC_DISABLE_DSC: If set, disable display stream compression
+ * @DC_DISABLE_DSC: (0x4) If set, disable display stream compression
*/
DC_DISABLE_DSC = 0x4,
/**
- * @DC_DISABLE_CLOCK_GATING: If set, disable clock gating optimizations
+ * @DC_DISABLE_CLOCK_GATING: (0x8) If set, disable clock gating optimizations
*/
DC_DISABLE_CLOCK_GATING = 0x8,
/**
- * @DC_DISABLE_PSR: If set, disable Panel self refresh v1 and PSR-SU
+ * @DC_DISABLE_PSR: (0x10) If set, disable Panel self refresh v1 and PSR-SU
*/
DC_DISABLE_PSR = 0x10,
/**
- * @DC_FORCE_SUBVP_MCLK_SWITCH: If set, force mclk switch in subvp, even
+ * @DC_FORCE_SUBVP_MCLK_SWITCH: (0x20) If set, force mclk switch in subvp, even
* if mclk switch in vblank is possible
*/
DC_FORCE_SUBVP_MCLK_SWITCH = 0x20,
/**
- * @DC_DISABLE_MPO: If set, disable multi-plane offloading
+ * @DC_DISABLE_MPO: (0x40) If set, disable multi-plane offloading
*/
DC_DISABLE_MPO = 0x40,
/**
- * @DC_ENABLE_DPIA_TRACE: If set, enable trace logging for DPIA
+ * @DC_ENABLE_DPIA_TRACE: (0x80) If set, enable trace logging for DPIA
*/
DC_ENABLE_DPIA_TRACE = 0x80,
/**
- * @DC_ENABLE_DML2: If set, force usage of DML2, even if the DCN version
+ * @DC_ENABLE_DML2: (0x100) If set, force usage of DML2, even if the DCN version
* does not default to it.
*/
DC_ENABLE_DML2 = 0x100,
/**
- * @DC_DISABLE_PSR_SU: If set, disable PSR SU
+ * @DC_DISABLE_PSR_SU: (0x200) If set, disable PSR SU
*/
DC_DISABLE_PSR_SU = 0x200,
/**
- * @DC_DISABLE_REPLAY: If set, disable Panel Replay
+ * @DC_DISABLE_REPLAY: (0x400) If set, disable Panel Replay
*/
DC_DISABLE_REPLAY = 0x400,
/**
- * @DC_DISABLE_IPS: If set, disable all Idle Power States, all the time.
+ * @DC_DISABLE_IPS: (0x800) If set, disable all Idle Power States, all the time.
* If more than one IPS debug bit is set, the lowest bit takes
* precedence. For example, if DC_FORCE_IPS_ENABLE and
* DC_DISABLE_IPS_DYNAMIC are set, then DC_DISABLE_IPS_DYNAMIC takes
@@ -324,56 +357,57 @@ enum DC_DEBUG_MASK {
DC_DISABLE_IPS = 0x800,
/**
- * @DC_DISABLE_IPS_DYNAMIC: If set, disable all IPS, all the time,
+ * @DC_DISABLE_IPS_DYNAMIC: (0x1000) If set, disable all IPS, all the time,
* *except* when driver goes into suspend.
*/
DC_DISABLE_IPS_DYNAMIC = 0x1000,
/**
- * @DC_DISABLE_IPS2_DYNAMIC: If set, disable IPS2 (IPS1 allowed) if
+ * @DC_DISABLE_IPS2_DYNAMIC: (0x2000) If set, disable IPS2 (IPS1 allowed) if
* there is an enabled display. Otherwise, enable all IPS.
*/
DC_DISABLE_IPS2_DYNAMIC = 0x2000,
/**
- * @DC_FORCE_IPS_ENABLE: If set, force enable all IPS, all the time.
+ * @DC_FORCE_IPS_ENABLE: (0x4000) If set, force enable all IPS, all the time.
*/
DC_FORCE_IPS_ENABLE = 0x4000,
/**
- * @DC_DISABLE_ACPI_EDID: If set, don't attempt to fetch EDID for
+ * @DC_DISABLE_ACPI_EDID: (0x8000) If set, don't attempt to fetch EDID for
* eDP display from ACPI _DDC method.
*/
DC_DISABLE_ACPI_EDID = 0x8000,
/**
- * @DC_DISABLE_HDMI_CEC: If set, disable HDMI-CEC feature in amdgpu driver.
+ * @DC_DISABLE_HDMI_CEC: (0x10000) If set, disable HDMI-CEC feature in amdgpu driver.
*/
DC_DISABLE_HDMI_CEC = 0x10000,
/**
- * @DC_DISABLE_SUBVP_FAMS: If set, disable DCN Sub-Viewport & Firmware Assisted
+ * @DC_DISABLE_SUBVP_FAMS: (0x20000) If set, disable DCN Sub-Viewport & Firmware Assisted
* Memory Clock Switching (FAMS) feature in amdgpu driver.
*/
DC_DISABLE_SUBVP_FAMS = 0x20000,
/**
- * @DC_DISABLE_CUSTOM_BRIGHTNESS_CURVE: If set, disable support for custom brightness curves
+ * @DC_DISABLE_CUSTOM_BRIGHTNESS_CURVE: (0x40000) If set, disable support for custom
+ * brightness curves
*/
DC_DISABLE_CUSTOM_BRIGHTNESS_CURVE = 0x40000,
/**
- * @DC_HDCP_LC_FORCE_FW_ENABLE: If set, use HDCP Locality Check FW
+ * @DC_HDCP_LC_FORCE_FW_ENABLE: (0x80000) If set, use HDCP Locality Check FW
* path regardless of reported HW capabilities.
*/
DC_HDCP_LC_FORCE_FW_ENABLE = 0x80000,
/**
- * @DC_HDCP_LC_ENABLE_SW_FALLBACK: If set, upon HDCP Locality Check FW
+ * @DC_HDCP_LC_ENABLE_SW_FALLBACK: (0x100000) If set, upon HDCP Locality Check FW
* path failure, retry using legacy SW path.
*/
DC_HDCP_LC_ENABLE_SW_FALLBACK = 0x100000,
/**
- * @DC_SKIP_DETECTION_LT: If set, skip detection link training
+ * @DC_SKIP_DETECTION_LT: (0x200000) If set, skip detection link training
*/
DC_SKIP_DETECTION_LT = 0x200000,
};
diff --git a/drivers/gpu/drm/amd/include/asic_reg/dce/dce_6_0_d.h b/drivers/gpu/drm/amd/include/asic_reg/dce/dce_6_0_d.h
index 9de01ae574c0..067eddd9c62d 100644
--- a/drivers/gpu/drm/amd/include/asic_reg/dce/dce_6_0_d.h
+++ b/drivers/gpu/drm/amd/include/asic_reg/dce/dce_6_0_d.h
@@ -4115,6 +4115,7 @@
#define mmSCL0_SCL_COEF_RAM_CONFLICT_STATUS 0x1B55
#define mmSCL0_SCL_COEF_RAM_SELECT 0x1B40
#define mmSCL0_SCL_COEF_RAM_TAP_DATA 0x1B41
+#define mmSCL0_SCL_SCALER_ENABLE 0x1B42
#define mmSCL0_SCL_CONTROL 0x1B44
#define mmSCL0_SCL_DEBUG 0x1B6A
#define mmSCL0_SCL_DEBUG2 0x1B69
@@ -4144,6 +4145,7 @@
#define mmSCL1_SCL_COEF_RAM_CONFLICT_STATUS 0x1E55
#define mmSCL1_SCL_COEF_RAM_SELECT 0x1E40
#define mmSCL1_SCL_COEF_RAM_TAP_DATA 0x1E41
+#define mmSCL1_SCL_SCALER_ENABLE 0x1E42
#define mmSCL1_SCL_CONTROL 0x1E44
#define mmSCL1_SCL_DEBUG 0x1E6A
#define mmSCL1_SCL_DEBUG2 0x1E69
@@ -4173,6 +4175,7 @@
#define mmSCL2_SCL_COEF_RAM_CONFLICT_STATUS 0x4155
#define mmSCL2_SCL_COEF_RAM_SELECT 0x4140
#define mmSCL2_SCL_COEF_RAM_TAP_DATA 0x4141
+#define mmSCL2_SCL_SCALER_ENABLE 0x4142
#define mmSCL2_SCL_CONTROL 0x4144
#define mmSCL2_SCL_DEBUG 0x416A
#define mmSCL2_SCL_DEBUG2 0x4169
@@ -4202,6 +4205,7 @@
#define mmSCL3_SCL_COEF_RAM_CONFLICT_STATUS 0x4455
#define mmSCL3_SCL_COEF_RAM_SELECT 0x4440
#define mmSCL3_SCL_COEF_RAM_TAP_DATA 0x4441
+#define mmSCL3_SCL_SCALER_ENABLE 0x4442
#define mmSCL3_SCL_CONTROL 0x4444
#define mmSCL3_SCL_DEBUG 0x446A
#define mmSCL3_SCL_DEBUG2 0x4469
@@ -4231,6 +4235,7 @@
#define mmSCL4_SCL_COEF_RAM_CONFLICT_STATUS 0x4755
#define mmSCL4_SCL_COEF_RAM_SELECT 0x4740
#define mmSCL4_SCL_COEF_RAM_TAP_DATA 0x4741
+#define mmSCL4_SCL_SCALER_ENABLE 0x4742
#define mmSCL4_SCL_CONTROL 0x4744
#define mmSCL4_SCL_DEBUG 0x476A
#define mmSCL4_SCL_DEBUG2 0x4769
@@ -4260,6 +4265,7 @@
#define mmSCL5_SCL_COEF_RAM_CONFLICT_STATUS 0x4A55
#define mmSCL5_SCL_COEF_RAM_SELECT 0x4A40
#define mmSCL5_SCL_COEF_RAM_TAP_DATA 0x4A41
+#define mmSCL5_SCL_SCALER_ENABLE 0x4A42
#define mmSCL5_SCL_CONTROL 0x4A44
#define mmSCL5_SCL_DEBUG 0x4A6A
#define mmSCL5_SCL_DEBUG2 0x4A69
@@ -4287,6 +4293,7 @@
#define mmSCL_COEF_RAM_CONFLICT_STATUS 0x1B55
#define mmSCL_COEF_RAM_SELECT 0x1B40
#define mmSCL_COEF_RAM_TAP_DATA 0x1B41
+#define mmSCL_SCALER_ENABLE 0x1B42
#define mmSCL_CONTROL 0x1B44
#define mmSCL_DEBUG 0x1B6A
#define mmSCL_DEBUG2 0x1B69
diff --git a/drivers/gpu/drm/amd/include/asic_reg/dce/dce_6_0_sh_mask.h b/drivers/gpu/drm/amd/include/asic_reg/dce/dce_6_0_sh_mask.h
index 2d6a598a6c25..9317a7afa621 100644
--- a/drivers/gpu/drm/amd/include/asic_reg/dce/dce_6_0_sh_mask.h
+++ b/drivers/gpu/drm/amd/include/asic_reg/dce/dce_6_0_sh_mask.h
@@ -8650,6 +8650,8 @@
#define REGAMMA_LUT_INDEX__REGAMMA_LUT_INDEX__SHIFT 0x00000000
#define REGAMMA_LUT_WRITE_EN_MASK__REGAMMA_LUT_WRITE_EN_MASK_MASK 0x00000007L
#define REGAMMA_LUT_WRITE_EN_MASK__REGAMMA_LUT_WRITE_EN_MASK__SHIFT 0x00000000
+#define SCL_SCALER_ENABLE__SCL_SCALE_EN_MASK 0x00000001L
+#define SCL_SCALER_ENABLE__SCL_SCALE_EN__SHIFT 0x00000000
#define SCL_ALU_CONTROL__SCL_ALU_DISABLE_MASK 0x00000001L
#define SCL_ALU_CONTROL__SCL_ALU_DISABLE__SHIFT 0x00000000
#define SCL_BYPASS_CONTROL__SCL_BYPASS_MODE_MASK 0x00000003L
diff --git a/drivers/gpu/drm/amd/include/dm_pp_interface.h b/drivers/gpu/drm/amd/include/dm_pp_interface.h
index acd1cef61b7c..349544504c93 100644
--- a/drivers/gpu/drm/amd/include/dm_pp_interface.h
+++ b/drivers/gpu/drm/amd/include/dm_pp_interface.h
@@ -65,6 +65,7 @@ struct single_display_configuration {
uint32_t view_resolution_cy;
enum amd_pp_display_config_type displayconfigtype;
uint32_t vertical_refresh; /* for active display */
+ uint32_t pixel_clock; /* Pixel clock in KHz (for HDMI only: normalized) */
};
#define MAX_NUM_DISPLAY 32
diff --git a/drivers/gpu/drm/amd/include/kgd_pp_interface.h b/drivers/gpu/drm/amd/include/kgd_pp_interface.h
index 2f7e4b5bebf3..2b0cdb2a2775 100644
--- a/drivers/gpu/drm/amd/include/kgd_pp_interface.h
+++ b/drivers/gpu/drm/amd/include/kgd_pp_interface.h
@@ -162,6 +162,10 @@ enum amd_pp_sensors {
AMDGPU_PP_SENSOR_PEAK_PSTATE_SCLK,
AMDGPU_PP_SENSOR_PEAK_PSTATE_MCLK,
AMDGPU_PP_SENSOR_VCN_LOAD,
+ AMDGPU_PP_SENSOR_NODEPOWERLIMIT,
+ AMDGPU_PP_SENSOR_NODEPOWER,
+ AMDGPU_PP_SENSOR_GPPTRESIDENCY,
+ AMDGPU_PP_SENSOR_MAXNODEPOWERLIMIT,
};
enum amd_pp_task {
diff --git a/drivers/gpu/drm/amd/include/mes_v11_api_def.h b/drivers/gpu/drm/amd/include/mes_v11_api_def.h
index 15680c3f4970..ab1cfc92dbeb 100644
--- a/drivers/gpu/drm/amd/include/mes_v11_api_def.h
+++ b/drivers/gpu/drm/amd/include/mes_v11_api_def.h
@@ -238,7 +238,8 @@ union MESAPI_SET_HW_RESOURCES {
uint32_t enable_mes_sch_stb_log : 1;
uint32_t limit_single_process : 1;
uint32_t is_strix_tmz_wa_enabled :1;
- uint32_t reserved : 13;
+ uint32_t enable_lr_compute_wa : 1;
+ uint32_t reserved : 12;
};
uint32_t uint32_t_all;
};
diff --git a/drivers/gpu/drm/amd/include/mes_v12_api_def.h b/drivers/gpu/drm/amd/include/mes_v12_api_def.h
index c04bd351b250..69611c7e30e3 100644
--- a/drivers/gpu/drm/amd/include/mes_v12_api_def.h
+++ b/drivers/gpu/drm/amd/include/mes_v12_api_def.h
@@ -287,7 +287,8 @@ union MESAPI_SET_HW_RESOURCES {
uint32_t limit_single_process : 1;
uint32_t unmapped_doorbell_handling: 2;
uint32_t enable_mes_fence_int: 1;
- uint32_t reserved : 10;
+ uint32_t enable_lr_compute_wa : 1;
+ uint32_t reserved : 9;
};
uint32_t uint32_all;
};
diff --git a/drivers/gpu/drm/amd/pm/amdgpu_dpm_internal.c b/drivers/gpu/drm/amd/pm/amdgpu_dpm_internal.c
index 2d2d2d5e6763..b5e9c3ecf703 100644
--- a/drivers/gpu/drm/amd/pm/amdgpu_dpm_internal.c
+++ b/drivers/gpu/drm/amd/pm/amdgpu_dpm_internal.c
@@ -27,76 +27,69 @@
#include "amdgpu_smu.h"
#include "amdgpu_dpm_internal.h"
-void amdgpu_dpm_get_active_displays(struct amdgpu_device *adev)
+void amdgpu_dpm_get_display_cfg(struct amdgpu_device *adev)
{
struct drm_device *ddev = adev_to_drm(adev);
+ struct amd_pp_display_configuration *cfg = &adev->pm.pm_display_cfg;
+ struct single_display_configuration *display_cfg;
struct drm_crtc *crtc;
struct amdgpu_crtc *amdgpu_crtc;
+ struct amdgpu_connector *conn;
+ int num_crtcs = 0;
+ int vrefresh;
+ u32 vblank_in_pixels, vblank_time_us;
+
+ cfg->min_vblank_time = 0xffffffff; /* if the displays are off, vblank time is max */
- adev->pm.dpm.new_active_crtcs = 0;
- adev->pm.dpm.new_active_crtc_count = 0;
if (adev->mode_info.num_crtc && adev->mode_info.mode_config_initialized) {
- list_for_each_entry(crtc,
- &ddev->mode_config.crtc_list, head) {
+ list_for_each_entry(crtc, &ddev->mode_config.crtc_list, head) {
amdgpu_crtc = to_amdgpu_crtc(crtc);
- if (amdgpu_crtc->enabled) {
- adev->pm.dpm.new_active_crtcs |= (1 << amdgpu_crtc->crtc_id);
- adev->pm.dpm.new_active_crtc_count++;
- }
- }
- }
-}
-u32 amdgpu_dpm_get_vblank_time(struct amdgpu_device *adev)
-{
- struct drm_device *dev = adev_to_drm(adev);
- struct drm_crtc *crtc;
- struct amdgpu_crtc *amdgpu_crtc;
- u32 vblank_in_pixels;
- u32 vblank_time_us = 0xffffffff; /* if the displays are off, vblank time is max */
+ /* The array should only contain active displays. */
+ if (!amdgpu_crtc->enabled)
+ continue;
+
+ conn = to_amdgpu_connector(amdgpu_crtc->connector);
+ display_cfg = &adev->pm.pm_display_cfg.displays[num_crtcs++];
+
+ if (amdgpu_crtc->hw_mode.clock) {
+ vrefresh = drm_mode_vrefresh(&amdgpu_crtc->hw_mode);
- if (adev->mode_info.num_crtc && adev->mode_info.mode_config_initialized) {
- list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
- amdgpu_crtc = to_amdgpu_crtc(crtc);
- if (crtc->enabled && amdgpu_crtc->enabled && amdgpu_crtc->hw_mode.clock) {
vblank_in_pixels =
amdgpu_crtc->hw_mode.crtc_htotal *
(amdgpu_crtc->hw_mode.crtc_vblank_end -
amdgpu_crtc->hw_mode.crtc_vdisplay +
(amdgpu_crtc->v_border * 2));
- vblank_time_us = vblank_in_pixels * 1000 / amdgpu_crtc->hw_mode.clock;
+ vblank_time_us =
+ vblank_in_pixels * 1000 / amdgpu_crtc->hw_mode.clock;
- /* we have issues with mclk switching with
- * refresh rates over 120 hz on the non-DC code.
+ /* The legacy (non-DC) code has issues with mclk switching
+ * with refresh rates over 120 Hz. Disable mclk switching.
*/
- if (drm_mode_vrefresh(&amdgpu_crtc->hw_mode) > 120)
+ if (vrefresh > 120)
vblank_time_us = 0;
- break;
- }
- }
- }
+ /* Find minimum vblank time. */
+ if (vblank_time_us < cfg->min_vblank_time)
+ cfg->min_vblank_time = vblank_time_us;
- return vblank_time_us;
-}
-
-u32 amdgpu_dpm_get_vrefresh(struct amdgpu_device *adev)
-{
- struct drm_device *dev = adev_to_drm(adev);
- struct drm_crtc *crtc;
- struct amdgpu_crtc *amdgpu_crtc;
- u32 vrefresh = 0;
+ /* Find vertical refresh rate of first active display. */
+ if (!cfg->vrefresh)
+ cfg->vrefresh = vrefresh;
+ }
- if (adev->mode_info.num_crtc && adev->mode_info.mode_config_initialized) {
- list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
- amdgpu_crtc = to_amdgpu_crtc(crtc);
- if (crtc->enabled && amdgpu_crtc->enabled && amdgpu_crtc->hw_mode.clock) {
- vrefresh = drm_mode_vrefresh(&amdgpu_crtc->hw_mode);
- break;
+ if (amdgpu_crtc->crtc_id < cfg->crtc_index) {
+ /* Find first active CRTC and its line time. */
+ cfg->crtc_index = amdgpu_crtc->crtc_id;
+ cfg->line_time_in_us = amdgpu_crtc->line_time;
}
+
+ display_cfg->controller_id = amdgpu_crtc->crtc_id;
+ display_cfg->pixel_clock = conn->pixelclock_for_modeset;
}
}
- return vrefresh;
+ cfg->display_clk = adev->clock.default_dispclk;
+ cfg->num_display = num_crtcs;
}
diff --git a/drivers/gpu/drm/amd/pm/amdgpu_pm.c b/drivers/gpu/drm/amd/pm/amdgpu_pm.c
index 96590c1da553..b5fbb0fd1dc0 100644
--- a/drivers/gpu/drm/amd/pm/amdgpu_pm.c
+++ b/drivers/gpu/drm/amd/pm/amdgpu_pm.c
@@ -1421,9 +1421,9 @@ static ssize_t amdgpu_set_pp_power_profile_mode(struct device *dev,
return -EINVAL;
}
-static int amdgpu_hwmon_get_sensor_generic(struct amdgpu_device *adev,
- enum amd_pp_sensors sensor,
- void *query)
+static int amdgpu_pm_get_sensor_generic(struct amdgpu_device *adev,
+ enum amd_pp_sensors sensor,
+ void *query)
{
int r, size = sizeof(uint32_t);
@@ -1456,7 +1456,7 @@ static ssize_t amdgpu_get_gpu_busy_percent(struct device *dev,
unsigned int value;
int r;
- r = amdgpu_hwmon_get_sensor_generic(adev, AMDGPU_PP_SENSOR_GPU_LOAD, &value);
+ r = amdgpu_pm_get_sensor_generic(adev, AMDGPU_PP_SENSOR_GPU_LOAD, &value);
if (r)
return r;
@@ -1480,7 +1480,7 @@ static ssize_t amdgpu_get_mem_busy_percent(struct device *dev,
unsigned int value;
int r;
- r = amdgpu_hwmon_get_sensor_generic(adev, AMDGPU_PP_SENSOR_MEM_LOAD, &value);
+ r = amdgpu_pm_get_sensor_generic(adev, AMDGPU_PP_SENSOR_MEM_LOAD, &value);
if (r)
return r;
@@ -1504,7 +1504,7 @@ static ssize_t amdgpu_get_vcn_busy_percent(struct device *dev,
unsigned int value;
int r;
- r = amdgpu_hwmon_get_sensor_generic(adev, AMDGPU_PP_SENSOR_VCN_LOAD, &value);
+ r = amdgpu_pm_get_sensor_generic(adev, AMDGPU_PP_SENSOR_VCN_LOAD, &value);
if (r)
return r;
@@ -1783,7 +1783,7 @@ static int amdgpu_show_powershift_percent(struct device *dev,
uint32_t ss_power;
int r = 0, i;
- r = amdgpu_hwmon_get_sensor_generic(adev, sensor, (void *)&ss_power);
+ r = amdgpu_pm_get_sensor_generic(adev, sensor, (void *)&ss_power);
if (r == -EOPNOTSUPP) {
/* sensor not available on dGPU, try to read from APU */
adev = NULL;
@@ -1796,7 +1796,7 @@ static int amdgpu_show_powershift_percent(struct device *dev,
}
mutex_unlock(&mgpu_info.mutex);
if (adev)
- r = amdgpu_hwmon_get_sensor_generic(adev, sensor, (void *)&ss_power);
+ r = amdgpu_pm_get_sensor_generic(adev, sensor, (void *)&ss_power);
}
if (r)
@@ -1906,11 +1906,11 @@ static int ss_bias_attr_update(struct amdgpu_device *adev, struct amdgpu_device_
if (!amdgpu_device_supports_smart_shift(adev))
*states = ATTR_STATE_UNSUPPORTED;
- else if (amdgpu_hwmon_get_sensor_generic(adev, AMDGPU_PP_SENSOR_SS_APU_SHARE,
- (void *)&ss_power))
+ else if (amdgpu_pm_get_sensor_generic(adev, AMDGPU_PP_SENSOR_SS_APU_SHARE,
+ (void *)&ss_power))
*states = ATTR_STATE_UNSUPPORTED;
- else if (amdgpu_hwmon_get_sensor_generic(adev, AMDGPU_PP_SENSOR_SS_DGPU_SHARE,
- (void *)&ss_power))
+ else if (amdgpu_pm_get_sensor_generic(adev, AMDGPU_PP_SENSOR_SS_DGPU_SHARE,
+ (void *)&ss_power))
*states = ATTR_STATE_UNSUPPORTED;
return 0;
@@ -2081,8 +2081,9 @@ static int pp_dpm_clk_default_attr_update(struct amdgpu_device *adev, struct amd
* for user application to monitor various board reated attributes.
*
* The amdgpu driver provides a sysfs API for reporting board attributes. Presently,
- * only two types of attributes are reported, baseboard temperature and
- * gpu board temperature. Both of them are reported as binary files.
+ * seven types of attributes are reported. Baseboard temperature and
+ * gpu board temperature are reported as binary files. Npm status, current node power limit,
+ * max node power limit, node power and global ppt residency is reported as ASCII text file.
*
* * .. code-block:: console
*
@@ -2090,6 +2091,15 @@ static int pp_dpm_clk_default_attr_update(struct amdgpu_device *adev, struct amd
*
* hexdump /sys/bus/pci/devices/.../board/gpuboard_temp
*
+ * hexdump /sys/bus/pci/devices/.../board/npm_status
+ *
+ * hexdump /sys/bus/pci/devices/.../board/cur_node_power_limit
+ *
+ * hexdump /sys/bus/pci/devices/.../board/max_node_power_limit
+ *
+ * hexdump /sys/bus/pci/devices/.../board/node_power
+ *
+ * hexdump /sys/bus/pci/devices/.../board/global_ppt_resid
*/
/**
@@ -2168,8 +2178,129 @@ out:
return size;
}
+/**
+ * DOC: cur_node_power_limit
+ *
+ * The amdgpu driver provides a sysfs API for retrieving current node power limit.
+ * The file cur_node_power_limit is used for this.
+ */
+static ssize_t amdgpu_show_cur_node_power_limit(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct drm_device *ddev = dev_get_drvdata(dev);
+ struct amdgpu_device *adev = drm_to_adev(ddev);
+ u32 nplimit;
+ int r;
+
+ /* get the current node power limit */
+ r = amdgpu_pm_get_sensor_generic(adev, AMDGPU_PP_SENSOR_NODEPOWERLIMIT,
+ (void *)&nplimit);
+ if (r)
+ return r;
+
+ return sysfs_emit(buf, "%u\n", nplimit);
+}
+
+/**
+ * DOC: node_power
+ *
+ * The amdgpu driver provides a sysfs API for retrieving current node power.
+ * The file node_power is used for this.
+ */
+static ssize_t amdgpu_show_node_power(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct drm_device *ddev = dev_get_drvdata(dev);
+ struct amdgpu_device *adev = drm_to_adev(ddev);
+ u32 npower;
+ int r;
+
+ /* get the node power */
+ r = amdgpu_pm_get_sensor_generic(adev, AMDGPU_PP_SENSOR_NODEPOWER,
+ (void *)&npower);
+ if (r)
+ return r;
+
+ return sysfs_emit(buf, "%u\n", npower);
+}
+
+/**
+ * DOC: npm_status
+ *
+ * The amdgpu driver provides a sysfs API for retrieving current node power management status.
+ * The file npm_status is used for this. It shows the status as enabled or disabled based on
+ * current node power value. If node power is zero, status is disabled else enabled.
+ */
+static ssize_t amdgpu_show_npm_status(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct drm_device *ddev = dev_get_drvdata(dev);
+ struct amdgpu_device *adev = drm_to_adev(ddev);
+ u32 npower;
+ int r;
+
+ /* get the node power */
+ r = amdgpu_pm_get_sensor_generic(adev, AMDGPU_PP_SENSOR_NODEPOWER,
+ (void *)&npower);
+ if (r)
+ return r;
+
+ return sysfs_emit(buf, "%s\n", npower ? "enabled" : "disabled");
+}
+
+/**
+ * DOC: global_ppt_resid
+ *
+ * The amdgpu driver provides a sysfs API for retrieving global ppt residency.
+ * The file global_ppt_resid is used for this.
+ */
+static ssize_t amdgpu_show_global_ppt_resid(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct drm_device *ddev = dev_get_drvdata(dev);
+ struct amdgpu_device *adev = drm_to_adev(ddev);
+ u32 gpptresid;
+ int r;
+
+ /* get the global ppt residency */
+ r = amdgpu_pm_get_sensor_generic(adev, AMDGPU_PP_SENSOR_GPPTRESIDENCY,
+ (void *)&gpptresid);
+ if (r)
+ return r;
+
+ return sysfs_emit(buf, "%u\n", gpptresid);
+}
+
+/**
+ * DOC: max_node_power_limit
+ *
+ * The amdgpu driver provides a sysfs API for retrieving maximum node power limit.
+ * The file max_node_power_limit is used for this.
+ */
+static ssize_t amdgpu_show_max_node_power_limit(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct drm_device *ddev = dev_get_drvdata(dev);
+ struct amdgpu_device *adev = drm_to_adev(ddev);
+ u32 max_nplimit;
+ int r;
+
+ /* get the max node power limit */
+ r = amdgpu_pm_get_sensor_generic(adev, AMDGPU_PP_SENSOR_MAXNODEPOWERLIMIT,
+ (void *)&max_nplimit);
+ if (r)
+ return r;
+
+ return sysfs_emit(buf, "%u\n", max_nplimit);
+}
+
static DEVICE_ATTR(baseboard_temp, 0444, amdgpu_get_baseboard_temp_metrics, NULL);
static DEVICE_ATTR(gpuboard_temp, 0444, amdgpu_get_gpuboard_temp_metrics, NULL);
+static DEVICE_ATTR(cur_node_power_limit, 0444, amdgpu_show_cur_node_power_limit, NULL);
+static DEVICE_ATTR(node_power, 0444, amdgpu_show_node_power, NULL);
+static DEVICE_ATTR(global_ppt_resid, 0444, amdgpu_show_global_ppt_resid, NULL);
+static DEVICE_ATTR(max_node_power_limit, 0444, amdgpu_show_max_node_power_limit, NULL);
+static DEVICE_ATTR(npm_status, 0444, amdgpu_show_npm_status, NULL);
static struct attribute *board_attrs[] = {
&dev_attr_baseboard_temp.attr,
@@ -2636,18 +2767,18 @@ static ssize_t amdgpu_hwmon_show_temp(struct device *dev,
switch (channel) {
case PP_TEMP_JUNCTION:
/* get current junction temperature */
- r = amdgpu_hwmon_get_sensor_generic(adev, AMDGPU_PP_SENSOR_HOTSPOT_TEMP,
- (void *)&temp);
+ r = amdgpu_pm_get_sensor_generic(adev, AMDGPU_PP_SENSOR_HOTSPOT_TEMP,
+ (void *)&temp);
break;
case PP_TEMP_EDGE:
/* get current edge temperature */
- r = amdgpu_hwmon_get_sensor_generic(adev, AMDGPU_PP_SENSOR_EDGE_TEMP,
- (void *)&temp);
+ r = amdgpu_pm_get_sensor_generic(adev, AMDGPU_PP_SENSOR_EDGE_TEMP,
+ (void *)&temp);
break;
case PP_TEMP_MEM:
/* get current memory temperature */
- r = amdgpu_hwmon_get_sensor_generic(adev, AMDGPU_PP_SENSOR_MEM_TEMP,
- (void *)&temp);
+ r = amdgpu_pm_get_sensor_generic(adev, AMDGPU_PP_SENSOR_MEM_TEMP,
+ (void *)&temp);
break;
default:
r = -EINVAL;
@@ -2909,8 +3040,8 @@ static ssize_t amdgpu_hwmon_get_fan1_min(struct device *dev,
u32 min_rpm = 0;
int r;
- r = amdgpu_hwmon_get_sensor_generic(adev, AMDGPU_PP_SENSOR_MIN_FAN_RPM,
- (void *)&min_rpm);
+ r = amdgpu_pm_get_sensor_generic(adev, AMDGPU_PP_SENSOR_MIN_FAN_RPM,
+ (void *)&min_rpm);
if (r)
return r;
@@ -2926,8 +3057,8 @@ static ssize_t amdgpu_hwmon_get_fan1_max(struct device *dev,
u32 max_rpm = 0;
int r;
- r = amdgpu_hwmon_get_sensor_generic(adev, AMDGPU_PP_SENSOR_MAX_FAN_RPM,
- (void *)&max_rpm);
+ r = amdgpu_pm_get_sensor_generic(adev, AMDGPU_PP_SENSOR_MAX_FAN_RPM,
+ (void *)&max_rpm);
if (r)
return r;
@@ -3060,8 +3191,8 @@ static ssize_t amdgpu_hwmon_show_vddgfx(struct device *dev,
int r;
/* get the voltage */
- r = amdgpu_hwmon_get_sensor_generic(adev, AMDGPU_PP_SENSOR_VDDGFX,
- (void *)&vddgfx);
+ r = amdgpu_pm_get_sensor_generic(adev, AMDGPU_PP_SENSOR_VDDGFX,
+ (void *)&vddgfx);
if (r)
return r;
@@ -3077,8 +3208,8 @@ static ssize_t amdgpu_hwmon_show_vddboard(struct device *dev,
int r;
/* get the voltage */
- r = amdgpu_hwmon_get_sensor_generic(adev, AMDGPU_PP_SENSOR_VDDBOARD,
- (void *)&vddboard);
+ r = amdgpu_pm_get_sensor_generic(adev, AMDGPU_PP_SENSOR_VDDBOARD,
+ (void *)&vddboard);
if (r)
return r;
@@ -3111,8 +3242,8 @@ static ssize_t amdgpu_hwmon_show_vddnb(struct device *dev,
return -EINVAL;
/* get the voltage */
- r = amdgpu_hwmon_get_sensor_generic(adev, AMDGPU_PP_SENSOR_VDDNB,
- (void *)&vddnb);
+ r = amdgpu_pm_get_sensor_generic(adev, AMDGPU_PP_SENSOR_VDDNB,
+ (void *)&vddnb);
if (r)
return r;
@@ -3134,7 +3265,7 @@ static int amdgpu_hwmon_get_power(struct device *dev,
u32 query = 0;
int r;
- r = amdgpu_hwmon_get_sensor_generic(adev, sensor, (void *)&query);
+ r = amdgpu_pm_get_sensor_generic(adev, sensor, (void *)&query);
if (r)
return r;
@@ -3254,9 +3385,6 @@ static ssize_t amdgpu_hwmon_set_power_cap(struct device *dev,
int err;
u32 value;
- if (amdgpu_sriov_vf(adev))
- return -EINVAL;
-
err = kstrtou32(buf, 10, &value);
if (err)
return err;
@@ -3287,8 +3415,8 @@ static ssize_t amdgpu_hwmon_show_sclk(struct device *dev,
int r;
/* get the sclk */
- r = amdgpu_hwmon_get_sensor_generic(adev, AMDGPU_PP_SENSOR_GFX_SCLK,
- (void *)&sclk);
+ r = amdgpu_pm_get_sensor_generic(adev, AMDGPU_PP_SENSOR_GFX_SCLK,
+ (void *)&sclk);
if (r)
return r;
@@ -3311,8 +3439,8 @@ static ssize_t amdgpu_hwmon_show_mclk(struct device *dev,
int r;
/* get the sclk */
- r = amdgpu_hwmon_get_sensor_generic(adev, AMDGPU_PP_SENSOR_GFX_MCLK,
- (void *)&mclk);
+ r = amdgpu_pm_get_sensor_generic(adev, AMDGPU_PP_SENSOR_GFX_MCLK,
+ (void *)&mclk);
if (r)
return r;
@@ -3598,6 +3726,10 @@ static umode_t hwmon_attributes_visible(struct kobject *kobj,
return 0;
}
+ if (attr == &sensor_dev_attr_power1_cap.dev_attr.attr &&
+ amdgpu_virt_cap_is_rw(&adev->virt.virt_caps, AMDGPU_VIRT_CAP_POWER_LIMIT))
+ effective_mode |= S_IWUSR;
+
/* not implemented yet for APUs having < GC 9.3.0 (Renoir) */
if (((adev->family == AMDGPU_FAMILY_SI) ||
((adev->flags & AMD_IS_APU) && (gc_ver < IP_VERSION(9, 3, 0)))) &&
@@ -3606,10 +3738,12 @@ static umode_t hwmon_attributes_visible(struct kobject *kobj,
/* not all products support both average and instantaneous */
if (attr == &sensor_dev_attr_power1_average.dev_attr.attr &&
- amdgpu_hwmon_get_sensor_generic(adev, AMDGPU_PP_SENSOR_GPU_AVG_POWER, (void *)&tmp) == -EOPNOTSUPP)
+ amdgpu_pm_get_sensor_generic(adev, AMDGPU_PP_SENSOR_GPU_AVG_POWER,
+ (void *)&tmp) == -EOPNOTSUPP)
return 0;
if (attr == &sensor_dev_attr_power1_input.dev_attr.attr &&
- amdgpu_hwmon_get_sensor_generic(adev, AMDGPU_PP_SENSOR_GPU_INPUT_POWER, (void *)&tmp) == -EOPNOTSUPP)
+ amdgpu_pm_get_sensor_generic(adev, AMDGPU_PP_SENSOR_GPU_INPUT_POWER,
+ (void *)&tmp) == -EOPNOTSUPP)
return 0;
/* hide max/min values if we can't both query and manage the fan */
@@ -3648,8 +3782,8 @@ static umode_t hwmon_attributes_visible(struct kobject *kobj,
/* only few boards support vddboard */
if ((attr == &sensor_dev_attr_in2_input.dev_attr.attr ||
attr == &sensor_dev_attr_in2_label.dev_attr.attr) &&
- amdgpu_hwmon_get_sensor_generic(adev, AMDGPU_PP_SENSOR_VDDBOARD,
- (void *)&tmp) == -EOPNOTSUPP)
+ amdgpu_pm_get_sensor_generic(adev, AMDGPU_PP_SENSOR_VDDBOARD,
+ (void *)&tmp) == -EOPNOTSUPP)
return 0;
/* no mclk on APUs other than gc 9,4,3*/
@@ -4531,6 +4665,7 @@ int amdgpu_pm_sysfs_init(struct amdgpu_device *adev)
{
enum amdgpu_sriov_vf_mode mode;
uint32_t mask = 0;
+ uint32_t tmp;
int ret;
if (adev->pm.sysfs_initialized)
@@ -4597,6 +4732,21 @@ int amdgpu_pm_sysfs_init(struct amdgpu_device *adev)
&amdgpu_board_attr_group);
if (ret)
goto err_out0;
+ if (amdgpu_pm_get_sensor_generic(adev, AMDGPU_PP_SENSOR_MAXNODEPOWERLIMIT,
+ (void *)&tmp) != -EOPNOTSUPP) {
+ sysfs_add_file_to_group(&adev->dev->kobj,
+ &dev_attr_cur_node_power_limit.attr,
+ amdgpu_board_attr_group.name);
+ sysfs_add_file_to_group(&adev->dev->kobj, &dev_attr_node_power.attr,
+ amdgpu_board_attr_group.name);
+ sysfs_add_file_to_group(&adev->dev->kobj, &dev_attr_global_ppt_resid.attr,
+ amdgpu_board_attr_group.name);
+ sysfs_add_file_to_group(&adev->dev->kobj,
+ &dev_attr_max_node_power_limit.attr,
+ amdgpu_board_attr_group.name);
+ sysfs_add_file_to_group(&adev->dev->kobj, &dev_attr_npm_status.attr,
+ amdgpu_board_attr_group.name);
+ }
}
adev->pm.sysfs_initialized = true;
diff --git a/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h b/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h
index 9748744133d9..65c1d98af26c 100644
--- a/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h
+++ b/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h
@@ -263,10 +263,6 @@ struct amdgpu_dpm {
u32 voltage_response_time;
u32 backbias_response_time;
void *priv;
- u32 new_active_crtcs;
- int new_active_crtc_count;
- u32 current_active_crtcs;
- int current_active_crtc_count;
struct amdgpu_dpm_dynamic_state dyn_state;
struct amdgpu_dpm_fan fan;
u32 tdp_limit;
diff --git a/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm_internal.h b/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm_internal.h
index 5c2a89f0d5d5..cc6d7ba040e9 100644
--- a/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm_internal.h
+++ b/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm_internal.h
@@ -23,10 +23,6 @@
#ifndef __AMDGPU_DPM_INTERNAL_H__
#define __AMDGPU_DPM_INTERNAL_H__
-void amdgpu_dpm_get_active_displays(struct amdgpu_device *adev);
-
-u32 amdgpu_dpm_get_vblank_time(struct amdgpu_device *adev);
-
-u32 amdgpu_dpm_get_vrefresh(struct amdgpu_device *adev);
+void amdgpu_dpm_get_display_cfg(struct amdgpu_device *adev);
#endif
diff --git a/drivers/gpu/drm/amd/pm/legacy-dpm/kv_dpm.c b/drivers/gpu/drm/amd/pm/legacy-dpm/kv_dpm.c
index 307ebf7e3226..33eb85dd68e9 100644
--- a/drivers/gpu/drm/amd/pm/legacy-dpm/kv_dpm.c
+++ b/drivers/gpu/drm/amd/pm/legacy-dpm/kv_dpm.c
@@ -2299,7 +2299,7 @@ static void kv_apply_state_adjust_rules(struct amdgpu_device *adev,
if (pi->sys_info.nb_dpm_enable) {
force_high = (mclk >= pi->sys_info.nbp_memory_clock[3]) ||
- pi->video_start || (adev->pm.dpm.new_active_crtc_count >= 3) ||
+ pi->video_start || (adev->pm.pm_display_cfg.num_display >= 3) ||
pi->disable_nb_ps3_in_battery;
ps->dpm0_pg_nb_ps_lo = force_high ? 0x2 : 0x3;
ps->dpm0_pg_nb_ps_hi = 0x2;
@@ -2358,7 +2358,7 @@ static int kv_calculate_nbps_level_settings(struct amdgpu_device *adev)
return 0;
force_high = ((mclk >= pi->sys_info.nbp_memory_clock[3]) ||
- (adev->pm.dpm.new_active_crtc_count >= 3) || pi->video_start);
+ (adev->pm.pm_display_cfg.num_display >= 3) || pi->video_start);
if (force_high) {
for (i = pi->lowest_valid; i <= pi->highest_valid; i++)
diff --git a/drivers/gpu/drm/amd/pm/legacy-dpm/legacy_dpm.c b/drivers/gpu/drm/amd/pm/legacy-dpm/legacy_dpm.c
index 52dbf6d0469d..c7ed0b457129 100644
--- a/drivers/gpu/drm/amd/pm/legacy-dpm/legacy_dpm.c
+++ b/drivers/gpu/drm/amd/pm/legacy-dpm/legacy_dpm.c
@@ -771,7 +771,7 @@ static struct amdgpu_ps *amdgpu_dpm_pick_power_state(struct amdgpu_device *adev,
int i;
struct amdgpu_ps *ps;
u32 ui_class;
- bool single_display = adev->pm.dpm.new_active_crtc_count < 2;
+ bool single_display = adev->pm.pm_display_cfg.num_display < 2;
/* check if the vblank period is too short to adjust the mclk */
if (single_display && adev->powerplay.pp_funcs->vblank_too_short) {
@@ -944,9 +944,6 @@ static int amdgpu_dpm_change_power_state_locked(struct amdgpu_device *adev)
amdgpu_dpm_post_set_power_state(adev);
- adev->pm.dpm.current_active_crtcs = adev->pm.dpm.new_active_crtcs;
- adev->pm.dpm.current_active_crtc_count = adev->pm.dpm.new_active_crtc_count;
-
if (pp_funcs->force_performance_level) {
if (adev->pm.dpm.thermal_active) {
enum amd_dpm_forced_level level = adev->pm.dpm.forced_level;
@@ -967,7 +964,8 @@ void amdgpu_legacy_dpm_compute_clocks(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- amdgpu_dpm_get_active_displays(adev);
+ if (!adev->dc_enabled)
+ amdgpu_dpm_get_display_cfg(adev);
amdgpu_dpm_change_power_state_locked(adev);
}
diff --git a/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c b/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c
index 6595a611ce6e..cf9932e68055 100644
--- a/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c
+++ b/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c
@@ -3081,7 +3081,7 @@ static int si_get_vce_clock_voltage(struct amdgpu_device *adev,
static bool si_dpm_vblank_too_short(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- u32 vblank_time = amdgpu_dpm_get_vblank_time(adev);
+ u32 vblank_time = adev->pm.pm_display_cfg.min_vblank_time;
/* we never hit the non-gddr5 limit so disable it */
u32 switch_limit = adev->gmc.vram_type == AMDGPU_VRAM_TYPE_GDDR5 ? 450 : 0;
@@ -3447,9 +3447,10 @@ static void rv770_get_engine_memory_ss(struct amdgpu_device *adev)
static void si_apply_state_adjust_rules(struct amdgpu_device *adev,
struct amdgpu_ps *rps)
{
+ const struct amd_pp_display_configuration *display_cfg =
+ &adev->pm.pm_display_cfg;
struct si_ps *ps = si_get_ps(rps);
struct amdgpu_clock_and_voltage_limits *max_limits;
- struct amdgpu_connector *conn;
bool disable_mclk_switching = false;
bool disable_sclk_switching = false;
u32 mclk, sclk;
@@ -3488,14 +3489,9 @@ static void si_apply_state_adjust_rules(struct amdgpu_device *adev,
* For example, 4K 60Hz and 1080p 144Hz fall into this category.
* Find number of such displays connected.
*/
- for (i = 0; i < adev->mode_info.num_crtc; i++) {
- if (!(adev->pm.dpm.new_active_crtcs & (1 << i)) ||
- !adev->mode_info.crtcs[i]->enabled)
- continue;
-
- conn = to_amdgpu_connector(adev->mode_info.crtcs[i]->connector);
-
- if (conn->pixelclock_for_modeset > 297000)
+ for (i = 0; i < display_cfg->num_display; i++) {
+ /* The array only contains active displays. */
+ if (display_cfg->displays[i].pixel_clock > 297000)
high_pixelclock_count++;
}
@@ -3523,7 +3519,7 @@ static void si_apply_state_adjust_rules(struct amdgpu_device *adev,
rps->ecclk = 0;
}
- if ((adev->pm.dpm.new_active_crtc_count > 1) ||
+ if ((adev->pm.pm_display_cfg.num_display > 1) ||
si_dpm_vblank_too_short(adev))
disable_mclk_switching = true;
@@ -3671,7 +3667,7 @@ static void si_apply_state_adjust_rules(struct amdgpu_device *adev,
ps->performance_levels[i].mclk,
max_limits->vddc, &ps->performance_levels[i].vddc);
btc_apply_voltage_dependency_rules(&adev->pm.dpm.dyn_state.vddc_dependency_on_dispclk,
- adev->clock.current_dispclk,
+ display_cfg->display_clk,
max_limits->vddc, &ps->performance_levels[i].vddc);
}
@@ -4196,16 +4192,16 @@ static void si_program_ds_registers(struct amdgpu_device *adev)
static void si_program_display_gap(struct amdgpu_device *adev)
{
+ const struct amd_pp_display_configuration *cfg = &adev->pm.pm_display_cfg;
u32 tmp, pipe;
- int i;
tmp = RREG32(mmCG_DISPLAY_GAP_CNTL) & ~(CG_DISPLAY_GAP_CNTL__DISP1_GAP_MASK | CG_DISPLAY_GAP_CNTL__DISP2_GAP_MASK);
- if (adev->pm.dpm.new_active_crtc_count > 0)
+ if (cfg->num_display > 0)
tmp |= R600_PM_DISPLAY_GAP_VBLANK_OR_WM << CG_DISPLAY_GAP_CNTL__DISP1_GAP__SHIFT;
else
tmp |= R600_PM_DISPLAY_GAP_IGNORE << CG_DISPLAY_GAP_CNTL__DISP1_GAP__SHIFT;
- if (adev->pm.dpm.new_active_crtc_count > 1)
+ if (cfg->num_display > 1)
tmp |= R600_PM_DISPLAY_GAP_VBLANK_OR_WM << CG_DISPLAY_GAP_CNTL__DISP2_GAP__SHIFT;
else
tmp |= R600_PM_DISPLAY_GAP_IGNORE << CG_DISPLAY_GAP_CNTL__DISP2_GAP__SHIFT;
@@ -4215,17 +4211,8 @@ static void si_program_display_gap(struct amdgpu_device *adev)
tmp = RREG32(DCCG_DISP_SLOW_SELECT_REG);
pipe = (tmp & DCCG_DISP1_SLOW_SELECT_MASK) >> DCCG_DISP1_SLOW_SELECT_SHIFT;
- if ((adev->pm.dpm.new_active_crtc_count > 0) &&
- (!(adev->pm.dpm.new_active_crtcs & (1 << pipe)))) {
- /* find the first active crtc */
- for (i = 0; i < adev->mode_info.num_crtc; i++) {
- if (adev->pm.dpm.new_active_crtcs & (1 << i))
- break;
- }
- if (i == adev->mode_info.num_crtc)
- pipe = 0;
- else
- pipe = i;
+ if (cfg->num_display > 0 && pipe != cfg->crtc_index) {
+ pipe = cfg->crtc_index;
tmp &= ~DCCG_DISP1_SLOW_SELECT_MASK;
tmp |= DCCG_DISP1_SLOW_SELECT(pipe);
@@ -4236,7 +4223,7 @@ static void si_program_display_gap(struct amdgpu_device *adev)
* This can be a problem on PowerXpress systems or if you want to use the card
* for offscreen rendering or compute if there are no crtcs enabled.
*/
- si_notify_smc_display_change(adev, adev->pm.dpm.new_active_crtc_count > 0);
+ si_notify_smc_display_change(adev, cfg->num_display > 0);
}
static void si_enable_spread_spectrum(struct amdgpu_device *adev, bool enable)
@@ -5545,7 +5532,7 @@ static int si_convert_power_level_to_smc(struct amdgpu_device *adev,
(pl->mclk <= pi->mclk_stutter_mode_threshold) &&
!eg_pi->uvd_enabled &&
(RREG32(mmDPG_PIPE_STUTTER_CONTROL) & DPG_PIPE_STUTTER_CONTROL__STUTTER_ENABLE_MASK) &&
- (adev->pm.dpm.new_active_crtc_count <= 2)) {
+ (adev->pm.pm_display_cfg.num_display <= 2)) {
level->mcFlags |= SISLANDS_SMC_MC_STUTTER_EN;
}
@@ -5694,7 +5681,7 @@ static bool si_is_state_ulv_compatible(struct amdgpu_device *adev,
/* XXX validate against display requirements! */
for (i = 0; i < adev->pm.dpm.dyn_state.vddc_dependency_on_dispclk.count; i++) {
- if (adev->clock.current_dispclk <=
+ if (adev->pm.pm_display_cfg.display_clk <=
adev->pm.dpm.dyn_state.vddc_dependency_on_dispclk.entries[i].clk) {
if (ulv->pl.vddc <
adev->pm.dpm.dyn_state.vddc_dependency_on_dispclk.entries[i].v)
@@ -5848,30 +5835,22 @@ static int si_upload_ulv_state(struct amdgpu_device *adev)
static int si_upload_smc_data(struct amdgpu_device *adev)
{
- struct amdgpu_crtc *amdgpu_crtc = NULL;
- int i;
+ const struct amd_pp_display_configuration *cfg = &adev->pm.pm_display_cfg;
u32 crtc_index = 0;
u32 mclk_change_block_cp_min = 0;
u32 mclk_change_block_cp_max = 0;
- for (i = 0; i < adev->mode_info.num_crtc; i++) {
- if (adev->pm.dpm.new_active_crtcs & (1 << i)) {
- amdgpu_crtc = adev->mode_info.crtcs[i];
- break;
- }
- }
-
/* When a display is plugged in, program these so that the SMC
* performs MCLK switching when it doesn't cause flickering.
* When no display is plugged in, there is no need to restrict
* MCLK switching, so program them to zero.
*/
- if (adev->pm.dpm.new_active_crtc_count && amdgpu_crtc) {
- crtc_index = amdgpu_crtc->crtc_id;
+ if (cfg->num_display) {
+ crtc_index = cfg->crtc_index;
- if (amdgpu_crtc->line_time) {
- mclk_change_block_cp_min = 200 / amdgpu_crtc->line_time;
- mclk_change_block_cp_max = 100 / amdgpu_crtc->line_time;
+ if (cfg->line_time_in_us) {
+ mclk_change_block_cp_min = 200 / cfg->line_time_in_us;
+ mclk_change_block_cp_max = 100 / cfg->line_time_in_us;
}
}
diff --git a/drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c b/drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c
index b48a031cbba0..554492dfa3c0 100644
--- a/drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c
+++ b/drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c
@@ -1554,16 +1554,7 @@ static void pp_pm_compute_clocks(void *handle)
struct amdgpu_device *adev = hwmgr->adev;
if (!adev->dc_enabled) {
- amdgpu_dpm_get_active_displays(adev);
- adev->pm.pm_display_cfg.num_display = adev->pm.dpm.new_active_crtc_count;
- adev->pm.pm_display_cfg.vrefresh = amdgpu_dpm_get_vrefresh(adev);
- adev->pm.pm_display_cfg.min_vblank_time = amdgpu_dpm_get_vblank_time(adev);
- /* we have issues with mclk switching with
- * refresh rates over 120 hz on the non-DC code.
- */
- if (adev->pm.pm_display_cfg.vrefresh > 120)
- adev->pm.pm_display_cfg.min_vblank_time = 0;
-
+ amdgpu_dpm_get_display_cfg(adev);
pp_display_configuration_change(handle,
&adev->pm.pm_display_cfg);
}
diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
index c5965924e7c6..fb8086859857 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
@@ -766,7 +766,6 @@ static int smu_set_funcs(struct amdgpu_device *adev)
case IP_VERSION(13, 0, 14):
case IP_VERSION(13, 0, 12):
smu_v13_0_6_set_ppt_funcs(smu);
- smu_v13_0_6_set_temp_funcs(smu);
/* Enable pp_od_clk_voltage node */
smu->od_enabled = true;
break;
@@ -1316,6 +1315,33 @@ static void smu_init_power_profile(struct smu_context *smu)
smu_power_profile_mode_get(smu, smu->power_profile_mode);
}
+void smu_feature_cap_set(struct smu_context *smu, enum smu_feature_cap_id fea_id)
+{
+ struct smu_feature_cap *fea_cap = &smu->fea_cap;
+
+ if (fea_id >= SMU_FEATURE_CAP_ID__COUNT)
+ return;
+
+ set_bit(fea_id, fea_cap->cap_map);
+}
+
+bool smu_feature_cap_test(struct smu_context *smu, enum smu_feature_cap_id fea_id)
+{
+ struct smu_feature_cap *fea_cap = &smu->fea_cap;
+
+ if (fea_id >= SMU_FEATURE_CAP_ID__COUNT)
+ return false;
+
+ return test_bit(fea_id, fea_cap->cap_map);
+}
+
+static void smu_feature_cap_init(struct smu_context *smu)
+{
+ struct smu_feature_cap *fea_cap = &smu->fea_cap;
+
+ bitmap_zero(fea_cap->cap_map, SMU_FEATURE_CAP_ID__COUNT);
+}
+
static int smu_sw_init(struct amdgpu_ip_block *ip_block)
{
struct amdgpu_device *adev = ip_block->adev;
@@ -1348,6 +1374,8 @@ static int smu_sw_init(struct amdgpu_ip_block *ip_block)
INIT_DELAYED_WORK(&smu->swctf_delayed_work,
smu_swctf_delayed_work_handler);
+ smu_feature_cap_init(smu);
+
ret = smu_smc_table_sw_init(smu);
if (ret) {
dev_err(adev->dev, "Failed to sw init smc table!\n");
@@ -1897,7 +1925,6 @@ static int smu_hw_init(struct amdgpu_ip_block *ip_block)
for (i = 0; i < adev->vcn.num_vcn_inst; i++)
smu_dpm_set_vcn_enable(smu, true, i);
smu_dpm_set_jpeg_enable(smu, true);
- smu_dpm_set_vpe_enable(smu, true);
smu_dpm_set_umsch_mm_enable(smu, true);
smu_set_mall_enable(smu);
smu_set_gfx_cgpg(smu, true);
@@ -2105,7 +2132,6 @@ static int smu_hw_fini(struct amdgpu_ip_block *ip_block)
}
smu_dpm_set_jpeg_enable(smu, false);
adev->jpeg.cur_state = AMD_PG_STATE_GATE;
- smu_dpm_set_vpe_enable(smu, false);
smu_dpm_set_umsch_mm_enable(smu, false);
if (!smu->pm_enabled)
@@ -2237,7 +2263,7 @@ static int smu_resume(struct amdgpu_ip_block *ip_block)
return ret;
}
- if (smu_dpm_ctx->dpm_level == AMD_DPM_FORCED_LEVEL_MANUAL) {
+ if (smu_dpm_ctx->dpm_level == AMD_DPM_FORCED_LEVEL_MANUAL && smu->od_enabled) {
ret = smu_od_edit_dpm_table(smu, PP_OD_COMMIT_DPM_TABLE, NULL, 0);
if (ret)
return ret;
@@ -3508,15 +3534,10 @@ bool smu_mode1_reset_is_support(struct smu_context *smu)
bool smu_link_reset_is_support(struct smu_context *smu)
{
- bool ret = false;
-
if (!smu->pm_enabled)
return false;
- if (smu->ppt_funcs && smu->ppt_funcs->link_reset_is_support)
- ret = smu->ppt_funcs->link_reset_is_support(smu);
-
- return ret;
+ return smu_feature_cap_test(smu, SMU_FEATURE_CAP_ID__LINK_RESET);
}
int smu_mode1_reset(struct smu_context *smu)
@@ -4106,12 +4127,7 @@ int smu_send_rma_reason(struct smu_context *smu)
*/
bool smu_reset_sdma_is_supported(struct smu_context *smu)
{
- bool ret = false;
-
- if (smu->ppt_funcs && smu->ppt_funcs->reset_sdma_is_supported)
- ret = smu->ppt_funcs->reset_sdma_is_supported(smu);
-
- return ret;
+ return smu_feature_cap_test(smu, SMU_FEATURE_CAP_ID__SDMA_RESET);
}
int smu_reset_sdma(struct smu_context *smu, uint32_t inst_mask)
@@ -4126,12 +4142,7 @@ int smu_reset_sdma(struct smu_context *smu, uint32_t inst_mask)
bool smu_reset_vcn_is_supported(struct smu_context *smu)
{
- bool ret = false;
-
- if (smu->ppt_funcs && smu->ppt_funcs->reset_vcn_is_supported)
- ret = smu->ppt_funcs->reset_vcn_is_supported(smu);
-
- return ret;
+ return smu_feature_cap_test(smu, SMU_FEATURE_CAP_ID__VCN_RESET);
}
int smu_reset_vcn(struct smu_context *smu, uint32_t inst_mask)
diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
index 5976eda80035..582c186d8b62 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
+++ b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
@@ -528,6 +528,17 @@ enum smu_fw_status {
*/
#define SMU_WBRF_EVENT_HANDLING_PACE 10
+enum smu_feature_cap_id {
+ SMU_FEATURE_CAP_ID__LINK_RESET = 0,
+ SMU_FEATURE_CAP_ID__SDMA_RESET,
+ SMU_FEATURE_CAP_ID__VCN_RESET,
+ SMU_FEATURE_CAP_ID__COUNT,
+};
+
+struct smu_feature_cap {
+ DECLARE_BITMAP(cap_map, SMU_FEATURE_CAP_ID__COUNT);
+};
+
struct smu_context {
struct amdgpu_device *adev;
struct amdgpu_irq_src irq_source;
@@ -550,6 +561,7 @@ struct smu_context {
struct amd_pp_display_configuration *display_config;
struct smu_baco_context smu_baco;
struct smu_temperature_range thermal_range;
+ struct smu_feature_cap fea_cap;
void *od_settings;
struct smu_umd_pstate_table pstate_table;
@@ -1273,11 +1285,6 @@ struct pptable_funcs {
bool (*mode1_reset_is_support)(struct smu_context *smu);
/**
- * @link_reset_is_support: Check if GPU supports link reset.
- */
- bool (*link_reset_is_support)(struct smu_context *smu);
-
- /**
* @mode1_reset: Perform mode1 reset.
*
* Complete GPU reset.
@@ -1427,19 +1434,11 @@ struct pptable_funcs {
* @reset_sdma: message SMU to soft reset sdma instance.
*/
int (*reset_sdma)(struct smu_context *smu, uint32_t inst_mask);
- /**
- * @reset_sdma_is_supported: Check if support resets the SDMA engine.
- */
- bool (*reset_sdma_is_supported)(struct smu_context *smu);
/**
* @reset_vcn: message SMU to soft reset vcn instance.
*/
int (*dpm_reset_vcn)(struct smu_context *smu, uint32_t inst_mask);
- /**
- * @reset_vcn_is_supported: Check if support resets vcn.
- */
- bool (*reset_vcn_is_supported)(struct smu_context *smu);
/**
* @get_ecc_table: message SMU to get ECC INFO table.
@@ -1788,4 +1787,7 @@ ssize_t smu_get_pm_policy_info(struct smu_context *smu,
enum pp_pm_policy p_type, char *sysbuf);
#endif
+
+void smu_feature_cap_set(struct smu_context *smu, enum smu_feature_cap_id fea_id);
+bool smu_feature_cap_test(struct smu_context *smu, enum smu_feature_cap_id fea_id);
#endif
diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_12_pmfw.h b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_12_pmfw.h
index 1c407a8e96ee..bf6aa9620911 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_12_pmfw.h
+++ b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_12_pmfw.h
@@ -191,7 +191,7 @@ typedef enum {
#define SMU_METRICS_TABLE_VERSION 0x14
-#define SMU_SYSTEM_METRICS_TABLE_VERSION 0x0
+#define SMU_SYSTEM_METRICS_TABLE_VERSION 0x1
typedef struct __attribute__((packed, aligned(4))) {
uint64_t AccumulationCounter;
@@ -304,7 +304,12 @@ typedef struct {
int16_t SystemTemperatures[SYSTEM_TEMP_MAX_ENTRIES]; // Signed integer temperature value in Celsius, unused fields are set to 0xFFFF
int16_t NodeTemperatures[NODE_TEMP_MAX_TEMP_ENTRIES]; // Signed integer temperature value in Celsius, unused fields are set to 0xFFFF
int16_t VrTemperatures[SVI_MAX_TEMP_ENTRIES]; // Signed integer temperature value in Celsius
- int16_t spare[3];
+ int16_t spare[7];
+
+ //NPM: NODE POWER MANAGEMENT
+ uint32_t NodePowerLimit;
+ uint32_t NodePower;
+ uint32_t GlobalPPTResidencyAcc;
} SystemMetricsTable_t;
#pragma pack(pop)
@@ -359,6 +364,9 @@ typedef struct {
// General info
uint32_t pldmVersion[2];
+
+ //Node Power Limit
+ uint32_t MaxNodePowerLimit;
} StaticMetricsTable_t;
#pragma pack(pop)
diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_12_ppsmc.h b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_12_ppsmc.h
index aff2776a8b6f..4b066c42e0ec 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_12_ppsmc.h
+++ b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_12_ppsmc.h
@@ -120,7 +120,8 @@
#define PPSMC_MSG_GetBadPageSeverity 0x5B
#define PPSMC_MSG_GetSystemMetricsTable 0x5C
#define PPSMC_MSG_GetSystemMetricsVersion 0x5D
-#define PPSMC_Message_Count 0x5E
+#define PPSMC_MSG_ResetVCN 0x5E
+#define PPSMC_Message_Count 0x5F
//PPSMC Reset Types for driver msg argument
#define PPSMC_RESET_TYPE_DRIVER_MODE_1_RESET 0x1
diff --git a/drivers/gpu/drm/amd/pm/inc/smu_v13_0_0_pptable.h b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0_0_pptable.h
index 251ed011b3b0..251ed011b3b0 100644
--- a/drivers/gpu/drm/amd/pm/inc/smu_v13_0_0_pptable.h
+++ b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0_0_pptable.h
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c
index 599eddb5a67d..4fff78da81ff 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c
@@ -1745,10 +1745,10 @@ static int arcturus_i2c_control_init(struct smu_context *smu)
snprintf(control->name, sizeof(control->name), "AMDGPU SMU %d", i);
i2c_set_adapdata(control, smu_i2c);
- res = i2c_add_adapter(control);
+ res = devm_i2c_add_adapter(adev->dev, control);
if (res) {
DRM_ERROR("Failed to register hw i2c, err: %d\n", res);
- goto Out_err;
+ return res;
}
}
@@ -1756,27 +1756,12 @@ static int arcturus_i2c_control_init(struct smu_context *smu)
adev->pm.fru_eeprom_i2c_bus = &adev->pm.smu_i2c[1].adapter;
return 0;
-Out_err:
- for ( ; i >= 0; i--) {
- struct amdgpu_smu_i2c_bus *smu_i2c = &adev->pm.smu_i2c[i];
- struct i2c_adapter *control = &smu_i2c->adapter;
-
- i2c_del_adapter(control);
- }
- return res;
}
static void arcturus_i2c_control_fini(struct smu_context *smu)
{
struct amdgpu_device *adev = smu->adev;
- int i;
- for (i = 0; i < MAX_SMU_I2C_BUSES; i++) {
- struct amdgpu_smu_i2c_bus *smu_i2c = &adev->pm.smu_i2c[i];
- struct i2c_adapter *control = &smu_i2c->adapter;
-
- i2c_del_adapter(control);
- }
adev->pm.ras_eeprom_i2c_bus = NULL;
adev->pm.fru_eeprom_i2c_bus = NULL;
}
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c
index aac202d0c30e..0028f10ead42 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c
@@ -3145,10 +3145,10 @@ static int navi10_i2c_control_init(struct smu_context *smu)
control->quirks = &navi10_i2c_control_quirks;
i2c_set_adapdata(control, smu_i2c);
- res = i2c_add_adapter(control);
+ res = devm_i2c_add_adapter(adev->dev, control);
if (res) {
DRM_ERROR("Failed to register hw i2c, err: %d\n", res);
- goto Out_err;
+ return res;
}
}
@@ -3156,27 +3156,12 @@ static int navi10_i2c_control_init(struct smu_context *smu)
adev->pm.fru_eeprom_i2c_bus = &adev->pm.smu_i2c[1].adapter;
return 0;
-Out_err:
- for ( ; i >= 0; i--) {
- struct amdgpu_smu_i2c_bus *smu_i2c = &adev->pm.smu_i2c[i];
- struct i2c_adapter *control = &smu_i2c->adapter;
-
- i2c_del_adapter(control);
- }
- return res;
}
static void navi10_i2c_control_fini(struct smu_context *smu)
{
struct amdgpu_device *adev = smu->adev;
- int i;
- for (i = 0; i < MAX_SMU_I2C_BUSES; i++) {
- struct amdgpu_smu_i2c_bus *smu_i2c = &adev->pm.smu_i2c[i];
- struct i2c_adapter *control = &smu_i2c->adapter;
-
- i2c_del_adapter(control);
- }
adev->pm.ras_eeprom_i2c_bus = NULL;
adev->pm.fru_eeprom_i2c_bus = NULL;
}
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c
index d57591509aed..31c2c0386b1f 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c
@@ -2648,10 +2648,10 @@ static int sienna_cichlid_i2c_control_init(struct smu_context *smu)
control->quirks = &sienna_cichlid_i2c_control_quirks;
i2c_set_adapdata(control, smu_i2c);
- res = i2c_add_adapter(control);
+ res = devm_i2c_add_adapter(adev->dev, control);
if (res) {
DRM_ERROR("Failed to register hw i2c, err: %d\n", res);
- goto Out_err;
+ return res;
}
}
/* assign the buses used for the FRU EEPROM and RAS EEPROM */
@@ -2660,27 +2660,12 @@ static int sienna_cichlid_i2c_control_init(struct smu_context *smu)
adev->pm.fru_eeprom_i2c_bus = &adev->pm.smu_i2c[0].adapter;
return 0;
-Out_err:
- for ( ; i >= 0; i--) {
- struct amdgpu_smu_i2c_bus *smu_i2c = &adev->pm.smu_i2c[i];
- struct i2c_adapter *control = &smu_i2c->adapter;
-
- i2c_del_adapter(control);
- }
- return res;
}
static void sienna_cichlid_i2c_control_fini(struct smu_context *smu)
{
struct amdgpu_device *adev = smu->adev;
- int i;
- for (i = 0; i < MAX_SMU_I2C_BUSES; i++) {
- struct amdgpu_smu_i2c_bus *smu_i2c = &adev->pm.smu_i2c[i];
- struct i2c_adapter *control = &smu_i2c->adapter;
-
- i2c_del_adapter(control);
- }
adev->pm.ras_eeprom_i2c_bus = NULL;
adev->pm.fru_eeprom_i2c_bus = NULL;
}
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c
index b067147b7c41..18d5d0704509 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c
@@ -1641,33 +1641,22 @@ static int aldebaran_i2c_control_init(struct smu_context *smu)
control->quirks = &aldebaran_i2c_control_quirks;
i2c_set_adapdata(control, smu_i2c);
- res = i2c_add_adapter(control);
+ res = devm_i2c_add_adapter(adev->dev, control);
if (res) {
DRM_ERROR("Failed to register hw i2c, err: %d\n", res);
- goto Out_err;
+ return res;
}
adev->pm.ras_eeprom_i2c_bus = &adev->pm.smu_i2c[0].adapter;
adev->pm.fru_eeprom_i2c_bus = &adev->pm.smu_i2c[0].adapter;
return 0;
-Out_err:
- i2c_del_adapter(control);
-
- return res;
}
static void aldebaran_i2c_control_fini(struct smu_context *smu)
{
struct amdgpu_device *adev = smu->adev;
- int i;
-
- for (i = 0; i < MAX_SMU_I2C_BUSES; i++) {
- struct amdgpu_smu_i2c_bus *smu_i2c = &adev->pm.smu_i2c[i];
- struct i2c_adapter *control = &smu_i2c->adapter;
- i2c_del_adapter(control);
- }
adev->pm.ras_eeprom_i2c_bus = NULL;
adev->pm.fru_eeprom_i2c_bus = NULL;
}
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c
index 1a1f2a6b2e52..a89075e25717 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c
@@ -288,7 +288,8 @@ int smu_v13_0_check_fw_version(struct smu_context *smu)
* Considering above, we just leave user a verbal message instead
* of halt driver loading.
*/
- if (if_version != smu->smc_driver_if_version) {
+ if (smu->smc_driver_if_version != SMU_IGNORE_IF_VERSION &&
+ if_version != smu->smc_driver_if_version) {
dev_info(adev->dev, "smu driver if version = 0x%08x, smu fw if version = 0x%08x, "
"smu fw program = %d, smu fw version = 0x%08x (%d.%d.%d)\n",
smu->smc_driver_if_version, if_version,
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c
index e084ed99ec0e..c1062e5f0393 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c
@@ -2825,10 +2825,10 @@ static int smu_v13_0_0_i2c_control_init(struct smu_context *smu)
control->quirks = &smu_v13_0_0_i2c_control_quirks;
i2c_set_adapdata(control, smu_i2c);
- res = i2c_add_adapter(control);
+ res = devm_i2c_add_adapter(adev->dev, control);
if (res) {
DRM_ERROR("Failed to register hw i2c, err: %d\n", res);
- goto Out_err;
+ return res;
}
}
@@ -2838,27 +2838,12 @@ static int smu_v13_0_0_i2c_control_init(struct smu_context *smu)
adev->pm.fru_eeprom_i2c_bus = &adev->pm.smu_i2c[0].adapter;
return 0;
-Out_err:
- for ( ; i >= 0; i--) {
- struct amdgpu_smu_i2c_bus *smu_i2c = &adev->pm.smu_i2c[i];
- struct i2c_adapter *control = &smu_i2c->adapter;
-
- i2c_del_adapter(control);
- }
- return res;
}
static void smu_v13_0_0_i2c_control_fini(struct smu_context *smu)
{
struct amdgpu_device *adev = smu->adev;
- int i;
- for (i = 0; i < MAX_SMU_I2C_BUSES; i++) {
- struct amdgpu_smu_i2c_bus *smu_i2c = &adev->pm.smu_i2c[i];
- struct i2c_adapter *control = &smu_i2c->adapter;
-
- i2c_del_adapter(control);
- }
adev->pm.ras_eeprom_i2c_bus = NULL;
adev->pm.fru_eeprom_i2c_bus = NULL;
}
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_12_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_12_ppt.c
index 0bec12b348ce..cb3fea9e8cf3 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_12_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_12_ppt.c
@@ -136,8 +136,9 @@ const struct cmn2asic_msg_mapping smu_v13_0_12_message_map[SMU_MSG_MAX_COUNT] =
MSG_MAP(RmaDueToBadPageThreshold, PPSMC_MSG_RmaDueToBadPageThreshold, 0),
MSG_MAP(SetThrottlingPolicy, PPSMC_MSG_SetThrottlingPolicy, 0),
MSG_MAP(ResetSDMA, PPSMC_MSG_ResetSDMA, 0),
+ MSG_MAP(ResetVCN, PPSMC_MSG_ResetVCN, 0),
MSG_MAP(GetStaticMetricsTable, PPSMC_MSG_GetStaticMetricsTable, 1),
- MSG_MAP(GetSystemMetricsTable, PPSMC_MSG_GetSystemMetricsTable, 0),
+ MSG_MAP(GetSystemMetricsTable, PPSMC_MSG_GetSystemMetricsTable, 1),
};
int smu_v13_0_12_tables_init(struct smu_context *smu)
@@ -341,6 +342,9 @@ int smu_v13_0_12_setup_driver_pptable(struct smu_context *smu)
static_metrics->pldmVersion[0] != 0xFFFFFFFF)
smu->adev->firmware.pldm_version =
static_metrics->pldmVersion[0];
+ if (smu_v13_0_6_cap_supported(smu, SMU_CAP(NPM_METRICS)))
+ pptable->MaxNodePowerLimit =
+ SMUQ10_ROUND(static_metrics->MaxNodePowerLimit);
smu_v13_0_12_init_xgmi_data(smu, static_metrics);
pptable->Init = true;
}
@@ -580,6 +584,50 @@ static bool smu_v13_0_12_is_temp_metrics_supported(struct smu_context *smu,
return false;
}
+int smu_v13_0_12_get_npm_data(struct smu_context *smu,
+ enum amd_pp_sensors sensor,
+ uint32_t *value)
+{
+ struct smu_table_context *smu_table = &smu->smu_table;
+ struct PPTable_t *pptable =
+ (struct PPTable_t *)smu_table->driver_pptable;
+ struct smu_table *tables = smu_table->tables;
+ SystemMetricsTable_t *metrics;
+ struct smu_table *sys_table;
+ int ret;
+
+ if (!smu_v13_0_6_cap_supported(smu, SMU_CAP(NPM_METRICS)))
+ return -EOPNOTSUPP;
+
+ if (sensor == AMDGPU_PP_SENSOR_MAXNODEPOWERLIMIT) {
+ *value = pptable->MaxNodePowerLimit;
+ return 0;
+ }
+
+ ret = smu_v13_0_12_get_system_metrics_table(smu);
+ if (ret)
+ return ret;
+
+ sys_table = &tables[SMU_TABLE_PMFW_SYSTEM_METRICS];
+ metrics = (SystemMetricsTable_t *)sys_table->cache.buffer;
+
+ switch (sensor) {
+ case AMDGPU_PP_SENSOR_NODEPOWERLIMIT:
+ *value = SMUQ10_ROUND(metrics->NodePowerLimit);
+ break;
+ case AMDGPU_PP_SENSOR_NODEPOWER:
+ *value = SMUQ10_ROUND(metrics->NodePower);
+ break;
+ case AMDGPU_PP_SENSOR_GPPTRESIDENCY:
+ *value = SMUQ10_ROUND(metrics->GlobalPPTResidencyAcc);
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return ret;
+}
+
static ssize_t smu_v13_0_12_get_temp_metrics(struct smu_context *smu,
enum smu_temp_metric_type type, void *table)
{
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
index ebee659f8a1c..285cf7979693 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
@@ -143,7 +143,7 @@ static const struct cmn2asic_msg_mapping smu_v13_0_6_message_map[SMU_MSG_MAX_COU
MSG_MAP(GetMinDpmFreq, PPSMC_MSG_GetMinDpmFreq, 1),
MSG_MAP(GetMaxDpmFreq, PPSMC_MSG_GetMaxDpmFreq, 1),
MSG_MAP(GetDpmFreqByIndex, PPSMC_MSG_GetDpmFreqByIndex, 1),
- MSG_MAP(SetPptLimit, PPSMC_MSG_SetPptLimit, 0),
+ MSG_MAP(SetPptLimit, PPSMC_MSG_SetPptLimit, 1),
MSG_MAP(GetPptLimit, PPSMC_MSG_GetPptLimit, 1),
MSG_MAP(GfxDeviceDriverReset, PPSMC_MSG_GfxDriverReset, SMU_MSG_RAS_PRI | SMU_MSG_NO_PRECHECK),
MSG_MAP(DramLogSetDramAddrHigh, PPSMC_MSG_DramLogSetDramAddrHigh, 0),
@@ -353,8 +353,15 @@ static void smu_v13_0_12_init_caps(struct smu_context *smu)
smu_v13_0_6_cap_set(smu, SMU_CAP(PLDM_VERSION));
}
+ if (fw_ver > 0x04560900)
+ smu_v13_0_6_cap_set(smu, SMU_CAP(VCN_RESET));
+
if (fw_ver >= 0x04560700) {
- if (!amdgpu_sriov_vf(smu->adev))
+ if (fw_ver >= 0x04560900) {
+ smu_v13_0_6_cap_set(smu, SMU_CAP(TEMP_METRICS));
+ if (smu->adev->gmc.xgmi.physical_node_id == 0)
+ smu_v13_0_6_cap_set(smu, SMU_CAP(NPM_METRICS));
+ } else if (!amdgpu_sriov_vf(smu->adev))
smu_v13_0_6_cap_set(smu, SMU_CAP(TEMP_METRICS));
} else {
smu_v13_0_12_tables_fini(smu);
@@ -413,6 +420,10 @@ static void smu_v13_0_6_init_caps(struct smu_context *smu)
smu_v13_0_6_cap_set(smu, SMU_CAP(HST_LIMIT_METRICS));
if (amdgpu_sriov_vf(adev)) {
+ if (fw_ver >= 0x00558200)
+ amdgpu_virt_attr_set(&adev->virt.virt_caps,
+ AMDGPU_VIRT_CAP_POWER_LIMIT,
+ AMDGPU_CAP_ATTR_RW);
if ((pgm == 0 && fw_ver >= 0x00558000) ||
(pgm == 7 && fw_ver >= 0x7551000)) {
smu_v13_0_6_cap_set(smu,
@@ -439,8 +450,7 @@ static void smu_v13_0_6_init_caps(struct smu_context *smu)
((pgm == 4) && (fw_ver >= 0x4557000)))
smu_v13_0_6_cap_set(smu, SMU_CAP(SDMA_RESET));
- if (((pgm == 0) && (fw_ver >= 0x00558200)) ||
- ((pgm == 4) && (fw_ver >= 0x04557100)))
+ if ((pgm == 0) && (fw_ver >= 0x00558200))
smu_v13_0_6_cap_set(smu, SMU_CAP(VCN_RESET));
}
@@ -1795,6 +1805,15 @@ static int smu_v13_0_6_read_sensor(struct smu_context *smu,
ret = -EOPNOTSUPP;
break;
}
+ case AMDGPU_PP_SENSOR_NODEPOWERLIMIT:
+ case AMDGPU_PP_SENSOR_NODEPOWER:
+ case AMDGPU_PP_SENSOR_GPPTRESIDENCY:
+ case AMDGPU_PP_SENSOR_MAXNODEPOWERLIMIT:
+ ret = smu_v13_0_12_get_npm_data(smu, sensor, (uint32_t *)data);
+ if (ret)
+ return ret;
+ *size = 4;
+ break;
case AMDGPU_PP_SENSOR_GPU_AVG_POWER:
default:
ret = -EOPNOTSUPP;
@@ -2490,10 +2509,10 @@ static int smu_v13_0_6_i2c_control_init(struct smu_context *smu)
control->quirks = &smu_v13_0_6_i2c_control_quirks;
i2c_set_adapdata(control, smu_i2c);
- res = i2c_add_adapter(control);
+ res = devm_i2c_add_adapter(adev->dev, control);
if (res) {
DRM_ERROR("Failed to register hw i2c, err: %d\n", res);
- goto Out_err;
+ return res;
}
}
@@ -2501,27 +2520,12 @@ static int smu_v13_0_6_i2c_control_init(struct smu_context *smu)
adev->pm.fru_eeprom_i2c_bus = &adev->pm.smu_i2c[0].adapter;
return 0;
-Out_err:
- for ( ; i >= 0; i--) {
- struct amdgpu_smu_i2c_bus *smu_i2c = &adev->pm.smu_i2c[i];
- struct i2c_adapter *control = &smu_i2c->adapter;
-
- i2c_del_adapter(control);
- }
- return res;
}
static void smu_v13_0_6_i2c_control_fini(struct smu_context *smu)
{
struct amdgpu_device *adev = smu->adev;
- int i;
-
- for (i = 0; i < MAX_SMU_I2C_BUSES; i++) {
- struct amdgpu_smu_i2c_bus *smu_i2c = &adev->pm.smu_i2c[i];
- struct i2c_adapter *control = &smu_i2c->adapter;
- i2c_del_adapter(control);
- }
adev->pm.ras_eeprom_i2c_bus = NULL;
adev->pm.fru_eeprom_i2c_bus = NULL;
}
@@ -3223,6 +3227,20 @@ static int smu_v13_0_6_reset_vcn(struct smu_context *smu, uint32_t inst_mask)
}
+static int smu_v13_0_6_post_init(struct smu_context *smu)
+{
+ if (smu_v13_0_6_is_link_reset_supported(smu))
+ smu_feature_cap_set(smu, SMU_FEATURE_CAP_ID__LINK_RESET);
+
+ if (smu_v13_0_6_reset_sdma_is_supported(smu))
+ smu_feature_cap_set(smu, SMU_FEATURE_CAP_ID__SDMA_RESET);
+
+ if (smu_v13_0_6_reset_vcn_is_supported(smu))
+ smu_feature_cap_set(smu, SMU_FEATURE_CAP_ID__VCN_RESET);
+
+ return 0;
+}
+
static int mca_smu_set_debug_mode(struct amdgpu_device *adev, bool enable)
{
struct smu_context *smu = adev->powerplay.pp_handle;
@@ -3839,6 +3857,12 @@ static const struct aca_smu_funcs smu_v13_0_6_aca_smu_funcs = {
.parse_error_code = aca_smu_parse_error_code,
};
+static void smu_v13_0_6_set_temp_funcs(struct smu_context *smu)
+{
+ smu->smu_temp.temp_funcs = (amdgpu_ip_version(smu->adev, MP1_HWIP, 0)
+ == IP_VERSION(13, 0, 12)) ? &smu_v13_0_12_temp_funcs : NULL;
+}
+
static const struct pptable_funcs smu_v13_0_6_ppt_funcs = {
/* init dpm */
.get_allowed_feature_mask = smu_v13_0_6_get_allowed_feature_mask,
@@ -3886,7 +3910,6 @@ static const struct pptable_funcs smu_v13_0_6_ppt_funcs = {
.get_xcp_metrics = smu_v13_0_6_get_xcp_metrics,
.get_thermal_temperature_range = smu_v13_0_6_get_thermal_temperature_range,
.mode1_reset_is_support = smu_v13_0_6_is_mode1_reset_supported,
- .link_reset_is_support = smu_v13_0_6_is_link_reset_supported,
.mode1_reset = smu_v13_0_6_mode1_reset,
.mode2_reset = smu_v13_0_6_mode2_reset,
.link_reset = smu_v13_0_6_link_reset,
@@ -3896,9 +3919,8 @@ static const struct pptable_funcs smu_v13_0_6_ppt_funcs = {
.send_hbm_bad_pages_num = smu_v13_0_6_smu_send_hbm_bad_page_num,
.send_rma_reason = smu_v13_0_6_send_rma_reason,
.reset_sdma = smu_v13_0_6_reset_sdma,
- .reset_sdma_is_supported = smu_v13_0_6_reset_sdma_is_supported,
.dpm_reset_vcn = smu_v13_0_6_reset_vcn,
- .reset_vcn_is_supported = smu_v13_0_6_reset_vcn_is_supported,
+ .post_init = smu_v13_0_6_post_init,
};
void smu_v13_0_6_set_ppt_funcs(struct smu_context *smu)
@@ -3910,15 +3932,11 @@ void smu_v13_0_6_set_ppt_funcs(struct smu_context *smu)
smu->feature_map = (amdgpu_ip_version(smu->adev, MP1_HWIP, 0) == IP_VERSION(13, 0, 12)) ?
smu_v13_0_12_feature_mask_map : smu_v13_0_6_feature_mask_map;
smu->table_map = smu_v13_0_6_table_map;
- smu->smc_driver_if_version = SMU13_0_6_DRIVER_IF_VERSION;
+ smu->smc_driver_if_version = SMU_IGNORE_IF_VERSION;
smu->smc_fw_caps |= SMU_FW_CAP_RAS_PRI;
smu_v13_0_set_smu_mailbox_registers(smu);
+ smu_v13_0_6_set_temp_funcs(smu);
amdgpu_mca_smu_init_funcs(smu->adev, &smu_v13_0_6_mca_smu_funcs);
amdgpu_aca_set_smu_funcs(smu->adev, &smu_v13_0_6_aca_smu_funcs);
}
-void smu_v13_0_6_set_temp_funcs(struct smu_context *smu)
-{
- smu->smu_temp.temp_funcs = (amdgpu_ip_version(smu->adev, MP1_HWIP, 0)
- == IP_VERSION(13, 0, 12)) ? &smu_v13_0_12_temp_funcs : NULL;
-}
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.h b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.h
index aae9a546a67e..7ef5f3e66c27 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.h
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.h
@@ -49,6 +49,7 @@ struct PPTable_t {
uint32_t MaxLclkDpmRange;
uint32_t MinLclkDpmRange;
uint64_t PublicSerialNumber_AID;
+ uint32_t MaxNodePowerLimit;
bool Init;
};
@@ -70,11 +71,11 @@ enum smu_v13_0_6_caps {
SMU_CAP(BOARD_VOLTAGE),
SMU_CAP(PLDM_VERSION),
SMU_CAP(TEMP_METRICS),
+ SMU_CAP(NPM_METRICS),
SMU_CAP(ALL),
};
extern void smu_v13_0_6_set_ppt_funcs(struct smu_context *smu);
-extern void smu_v13_0_6_set_temp_funcs(struct smu_context *smu);
bool smu_v13_0_6_cap_supported(struct smu_context *smu, enum smu_v13_0_6_caps cap);
int smu_v13_0_6_get_static_metrics_table(struct smu_context *smu);
int smu_v13_0_6_get_metrics_table(struct smu_context *smu, void *metrics_table,
@@ -92,6 +93,9 @@ ssize_t smu_v13_0_12_get_xcp_metrics(struct smu_context *smu,
void *smu_metrics);
int smu_v13_0_12_tables_init(struct smu_context *smu);
void smu_v13_0_12_tables_fini(struct smu_context *smu);
+int smu_v13_0_12_get_npm_data(struct smu_context *smu,
+ enum amd_pp_sensors sensor,
+ uint32_t *value);
extern const struct cmn2asic_mapping smu_v13_0_12_feature_mask_map[];
extern const struct cmn2asic_msg_mapping smu_v13_0_12_message_map[];
extern const struct smu_temp_funcs smu_v13_0_12_temp_funcs;
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c
index f32474af90b3..086501cc5213 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c
@@ -2087,10 +2087,10 @@ static int smu_v14_0_2_i2c_control_init(struct smu_context *smu)
control->quirks = &smu_v14_0_2_i2c_control_quirks;
i2c_set_adapdata(control, smu_i2c);
- res = i2c_add_adapter(control);
+ res = devm_i2c_add_adapter(adev->dev, control);
if (res) {
DRM_ERROR("Failed to register hw i2c, err: %d\n", res);
- goto Out_err;
+ return res;
}
}
@@ -2100,27 +2100,12 @@ static int smu_v14_0_2_i2c_control_init(struct smu_context *smu)
adev->pm.fru_eeprom_i2c_bus = &adev->pm.smu_i2c[0].adapter;
return 0;
-Out_err:
- for ( ; i >= 0; i--) {
- struct amdgpu_smu_i2c_bus *smu_i2c = &adev->pm.smu_i2c[i];
- struct i2c_adapter *control = &smu_i2c->adapter;
-
- i2c_del_adapter(control);
- }
- return res;
}
static void smu_v14_0_2_i2c_control_fini(struct smu_context *smu)
{
struct amdgpu_device *adev = smu->adev;
- int i;
- for (i = 0; i < MAX_SMU_I2C_BUSES; i++) {
- struct amdgpu_smu_i2c_bus *smu_i2c = &adev->pm.smu_i2c[i];
- struct i2c_adapter *control = &smu_i2c->adapter;
-
- i2c_del_adapter(control);
- }
adev->pm.ras_eeprom_i2c_bus = NULL;
adev->pm.fru_eeprom_i2c_bus = NULL;
}
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.h b/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.h
index d588f74b98de..0ae91c8b6d72 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.h
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.h
@@ -40,6 +40,8 @@
#define SMU_IH_INTERRUPT_CONTEXT_ID_FAN_ABNORMAL 0x8
#define SMU_IH_INTERRUPT_CONTEXT_ID_FAN_RECOVERY 0x9
+#define SMU_IGNORE_IF_VERSION 0xFFFFFFFF
+
#define smu_cmn_init_soft_gpu_metrics(ptr, frev, crev) \
do { \
typecheck(struct gpu_metrics_v##frev##_##crev *, (ptr)); \
diff --git a/drivers/gpu/drm/amd/ras/rascore/Makefile b/drivers/gpu/drm/amd/ras/rascore/Makefile
new file mode 100644
index 000000000000..e69de29bb2d1
--- /dev/null
+++ b/drivers/gpu/drm/amd/ras/rascore/Makefile
diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.h b/drivers/gpu/drm/amd/ras/rascore/ras_core_status.h
index 0d878ca3acba..144fbe4ceb9a 100644
--- a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.h
+++ b/drivers/gpu/drm/amd/ras/rascore/ras_core_status.h
@@ -1,5 +1,6 @@
+/* SPDX-License-Identifier: MIT */
/*
- * Copyright 2014 Advanced Micro Devices, Inc.
+ * Copyright 2025 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -21,12 +22,16 @@
*
*/
-#ifndef __DCE_V11_0_H__
-#define __DCE_V11_0_H__
-
-extern const struct amdgpu_ip_block_version dce_v11_0_ip_block;
-extern const struct amdgpu_ip_block_version dce_v11_2_ip_block;
-
-void dce_v11_0_disable_dce(struct amdgpu_device *adev);
+#ifndef __RAS_CORE_STATUS_H__
+#define __RAS_CORE_STATUS_H__
+#define RAS_CORE_OK 0
+#define RAS_CORE_NOT_SUPPORTED 248
+#define RAS_CORE_FAIL_ERROR_QUERY 249
+#define RAS_CORE_FAIL_ERROR_INJECTION 250
+#define RAS_CORE_FAIL_FATAL_RECOVERY 251
+#define RAS_CORE_FAIL_POISON_CONSUMPTION 252
+#define RAS_CORE_FAIL_POISON_CREATION 253
+#define RAS_CORE_FAIL_NO_VALID_BANKS 254
+#define RAS_CORE_GPU_IN_MODE1_RESET 255
#endif
diff --git a/drivers/gpu/drm/ast/ast_dp.c b/drivers/gpu/drm/ast/ast_dp.c
index 19c04687b0fe..8e650a02c528 100644
--- a/drivers/gpu/drm/ast/ast_dp.c
+++ b/drivers/gpu/drm/ast/ast_dp.c
@@ -134,7 +134,7 @@ static int ast_astdp_read_edid_block(void *data, u8 *buf, unsigned int block, si
* 3. The Delays are often longer a lot when system resume from S3/S4.
*/
if (j)
- mdelay(j + 1);
+ msleep(j + 1);
/* Wait for EDID offset to show up in mirror register */
vgacrd7 = ast_get_index_reg(ast, AST_IO_VGACRI, 0xd7);
diff --git a/drivers/gpu/drm/bridge/analogix/anx7625.c b/drivers/gpu/drm/bridge/analogix/anx7625.c
index 609cdb9d371e..6f3fdcb6afdb 100644
--- a/drivers/gpu/drm/bridge/analogix/anx7625.c
+++ b/drivers/gpu/drm/bridge/analogix/anx7625.c
@@ -2678,7 +2678,7 @@ static int anx7625_i2c_probe(struct i2c_client *client)
ret = devm_request_threaded_irq(dev, platform->pdata.intp_irq,
NULL, anx7625_intr_hpd_isr,
IRQF_TRIGGER_FALLING |
- IRQF_ONESHOT,
+ IRQF_ONESHOT | IRQF_NO_AUTOEN,
"anx7625-intp", platform);
if (ret) {
DRM_DEV_ERROR(dev, "fail to request irq\n");
@@ -2747,8 +2747,10 @@ static int anx7625_i2c_probe(struct i2c_client *client)
}
/* Add work function */
- if (platform->pdata.intp_irq)
+ if (platform->pdata.intp_irq) {
+ enable_irq(platform->pdata.intp_irq);
queue_work(platform->workqueue, &platform->work);
+ }
if (platform->pdata.audio_en)
anx7625_register_audio(dev, platform);
diff --git a/drivers/gpu/drm/bridge/cadence/cdns-mhdp8546-core.c b/drivers/gpu/drm/bridge/cadence/cdns-mhdp8546-core.c
index a614d1384f71..38726ae1bf15 100644
--- a/drivers/gpu/drm/bridge/cadence/cdns-mhdp8546-core.c
+++ b/drivers/gpu/drm/bridge/cadence/cdns-mhdp8546-core.c
@@ -1984,8 +1984,10 @@ static void cdns_mhdp_atomic_enable(struct drm_bridge *bridge,
mhdp_state = to_cdns_mhdp_bridge_state(new_state);
mhdp_state->current_mode = drm_mode_duplicate(bridge->dev, mode);
- if (!mhdp_state->current_mode)
- return;
+ if (!mhdp_state->current_mode) {
+ ret = -EINVAL;
+ goto out;
+ }
drm_mode_set_name(mhdp_state->current_mode);
diff --git a/drivers/gpu/drm/bridge/samsung-dsim.c b/drivers/gpu/drm/bridge/samsung-dsim.c
index b5dd71f6a990..eabc4c32f6ab 100644
--- a/drivers/gpu/drm/bridge/samsung-dsim.c
+++ b/drivers/gpu/drm/bridge/samsung-dsim.c
@@ -31,11 +31,10 @@
/* returns true iff both arguments logically differs */
#define NEQV(a, b) (!(a) ^ !(b))
-/* DSIM_STATUS */
+/* DSIM_STATUS or DSIM_DPHY_STATUS */
#define DSIM_STOP_STATE_DAT(x) (((x) & 0xf) << 0)
#define DSIM_STOP_STATE_CLK BIT(8)
#define DSIM_TX_READY_HS_CLK BIT(10)
-#define DSIM_PLL_STABLE BIT(31)
/* DSIM_SWRST */
#define DSIM_FUNCRST BIT(16)
@@ -46,17 +45,13 @@
#define DSIM_BTA_TIMEOUT(x) ((x) << 16)
/* DSIM_CLKCTRL */
-#define DSIM_ESC_PRESCALER(x) (((x) & 0xffff) << 0)
-#define DSIM_ESC_PRESCALER_MASK (0xffff << 0)
-#define DSIM_LANE_ESC_CLK_EN_CLK BIT(19)
-#define DSIM_LANE_ESC_CLK_EN_DATA(x) (((x) & 0xf) << 20)
-#define DSIM_LANE_ESC_CLK_EN_DATA_MASK (0xf << 20)
-#define DSIM_BYTE_CLKEN BIT(24)
-#define DSIM_BYTE_CLK_SRC(x) (((x) & 0x3) << 25)
-#define DSIM_BYTE_CLK_SRC_MASK (0x3 << 25)
-#define DSIM_PLL_BYPASS BIT(27)
-#define DSIM_ESC_CLKEN BIT(28)
-#define DSIM_TX_REQUEST_HSCLK BIT(31)
+#define DSIM_ESC_PRESCALER(x) (((x) & 0xffff) << 0)
+#define DSIM_ESC_PRESCALER_MASK (0xffff << 0)
+#define DSIM_LANE_ESC_CLK_EN_DATA(x, offset) (((x) & 0xf) << offset)
+#define DSIM_LANE_ESC_CLK_EN_DATA_MASK(offset) (0xf << offset)
+#define DSIM_BYTE_CLK_SRC(x) (((x) & 0x3) << 25)
+#define DSIM_BYTE_CLK_SRC_MASK (0x3 << 25)
+#define DSIM_PLL_BYPASS BIT(27)
/* DSIM_CONFIG */
#define DSIM_LANE_EN_CLK BIT(0)
@@ -91,7 +86,6 @@
*/
#define DSIM_HSE_DISABLE_MODE BIT(23)
#define DSIM_AUTO_MODE BIT(24)
-#define DSIM_VIDEO_MODE BIT(25)
#define DSIM_BURST_MODE BIT(26)
#define DSIM_SYNC_INFORM BIT(27)
#define DSIM_EOT_DISABLE BIT(28)
@@ -129,9 +123,9 @@
#define DSIM_MAIN_HBP_MASK ((0xffff) << 0)
/* DSIM_MSYNC */
-#define DSIM_MAIN_VSA(x) ((x) << 22)
+#define DSIM_MAIN_VSA(x, offset) ((x) << offset)
#define DSIM_MAIN_HSA(x) ((x) << 0)
-#define DSIM_MAIN_VSA_MASK ((0x3ff) << 22)
+#define DSIM_MAIN_VSA_MASK(offset) ((0x3ff) << offset)
#define DSIM_MAIN_HSA_MASK ((0xffff) << 0)
/* DSIM_SDRESOL */
@@ -157,6 +151,11 @@
#define DSIM_INT_RX_ECC_ERR BIT(15)
#define DSIM_INT_RX_CRC_ERR BIT(14)
+/* DSIM_SFRCTRL */
+#define DSIM_SFR_CTRL_STAND_BY BIT(4)
+#define DSIM_SFR_CTRL_SHADOW_UPDATE BIT(1)
+#define DSIM_SFR_CTRL_SHADOW_EN BIT(0)
+
/* DSIM_FIFOCTRL */
#define DSIM_RX_DATA_FULL BIT(25)
#define DSIM_RX_DATA_EMPTY BIT(24)
@@ -191,9 +190,7 @@
#define DSIM_PLL_DPDNSWAP_DAT (1 << 24)
#define DSIM_FREQ_BAND(x) ((x) << 24)
#define DSIM_PLL_EN BIT(23)
-#define DSIM_PLL_P(x, offset) ((x) << (offset))
-#define DSIM_PLL_M(x) ((x) << 4)
-#define DSIM_PLL_S(x) ((x) << 1)
+#define DSIM_PLL(x, offset) ((x) << (offset))
/* DSIM_PHYCTRL */
#define DSIM_PHYCTRL_ULPS_EXIT(x) (((x) & 0x1ff) << 0)
@@ -222,25 +219,42 @@
#define DSI_XFER_TIMEOUT_MS 100
#define DSI_RX_FIFO_EMPTY 0x30800002
-#define OLD_SCLK_MIPI_CLK_NAME "pll_clk"
-
#define PS_TO_CYCLE(ps, hz) DIV64_U64_ROUND_CLOSEST(((ps) * (hz)), 1000000000000ULL)
-static const char *const clk_names[5] = {
- "bus_clk",
- "sclk_mipi",
- "phyclk_mipidphy0_bitclkdiv8",
- "phyclk_mipidphy0_rxclkesc0",
- "sclk_rgb_vclk_to_dsim0"
-};
-
enum samsung_dsim_transfer_type {
EXYNOS_DSI_TX,
EXYNOS_DSI_RX,
};
+static struct clk_bulk_data exynos3_clk_bulk_data[] = {
+ { .id = "bus_clk" },
+ { .id = "pll_clk" },
+};
+
+static struct clk_bulk_data exynos4_clk_bulk_data[] = {
+ { .id = "bus_clk" },
+ { .id = "sclk_mipi" },
+};
+
+static struct clk_bulk_data exynos5433_clk_bulk_data[] = {
+ { .id = "bus_clk" },
+ { .id = "sclk_mipi" },
+ { .id = "phyclk_mipidphy0_bitclkdiv8" },
+ { .id = "phyclk_mipidphy0_rxclkesc0" },
+ { .id = "sclk_rgb_vclk_to_dsim0" },
+};
+
+static struct clk_bulk_data exynos7870_clk_bulk_data[] = {
+ { .id = "bus" },
+ { .id = "pll" },
+ { .id = "byte" },
+ { .id = "esc" },
+};
+
enum reg_idx {
- DSIM_STATUS_REG, /* Status register */
+ DSIM_STATUS_REG, /* Status register (legacy) */
+ DSIM_LINK_STATUS_REG, /* Link status register */
+ DSIM_DPHY_STATUS_REG, /* D-PHY status register */
DSIM_SWRST_REG, /* Software reset register */
DSIM_CLKCTRL_REG, /* Clock control register */
DSIM_TIMEOUT_REG, /* Time out register */
@@ -255,6 +269,7 @@ enum reg_idx {
DSIM_PKTHDR_REG, /* Packet Header FIFO register */
DSIM_PAYLOAD_REG, /* Payload FIFO register */
DSIM_RXFIFO_REG, /* Read FIFO register */
+ DSIM_SFRCTRL_REG, /* SFR standby and shadow control register */
DSIM_FIFOCTRL_REG, /* FIFO status and control register */
DSIM_PLLCTRL_REG, /* PLL control register */
DSIM_PHYCTRL_REG,
@@ -312,6 +327,32 @@ static const unsigned int exynos5433_reg_ofs[] = {
[DSIM_PHYTIMING2_REG] = 0xBC,
};
+static const unsigned int exynos7870_reg_ofs[] = {
+ [DSIM_LINK_STATUS_REG] = 0x04,
+ [DSIM_DPHY_STATUS_REG] = 0x08,
+ [DSIM_SWRST_REG] = 0x0C,
+ [DSIM_CLKCTRL_REG] = 0x10,
+ [DSIM_TIMEOUT_REG] = 0x14,
+ [DSIM_ESCMODE_REG] = 0x1C,
+ [DSIM_MDRESOL_REG] = 0x20,
+ [DSIM_MVPORCH_REG] = 0x24,
+ [DSIM_MHPORCH_REG] = 0x28,
+ [DSIM_MSYNC_REG] = 0x2C,
+ [DSIM_CONFIG_REG] = 0x30,
+ [DSIM_INTSRC_REG] = 0x34,
+ [DSIM_INTMSK_REG] = 0x38,
+ [DSIM_PKTHDR_REG] = 0x3C,
+ [DSIM_PAYLOAD_REG] = 0x40,
+ [DSIM_RXFIFO_REG] = 0x44,
+ [DSIM_SFRCTRL_REG] = 0x48,
+ [DSIM_FIFOCTRL_REG] = 0x4C,
+ [DSIM_PLLCTRL_REG] = 0x94,
+ [DSIM_PHYCTRL_REG] = 0xA4,
+ [DSIM_PHYTIMING_REG] = 0xB4,
+ [DSIM_PHYTIMING1_REG] = 0xB8,
+ [DSIM_PHYTIMING2_REG] = 0xBC,
+};
+
enum reg_value_idx {
RESET_TYPE,
PLL_TIMER,
@@ -384,6 +425,24 @@ static const unsigned int exynos5433_reg_values[] = {
[PHYTIMING_HS_TRAIL] = DSIM_PHYTIMING2_HS_TRAIL(0x0c),
};
+static const unsigned int exynos7870_reg_values[] = {
+ [RESET_TYPE] = DSIM_SWRST,
+ [PLL_TIMER] = 80000,
+ [STOP_STATE_CNT] = 0xa,
+ [PHYCTRL_ULPS_EXIT] = DSIM_PHYCTRL_ULPS_EXIT(0x177),
+ [PHYCTRL_VREG_LP] = 0,
+ [PHYCTRL_SLEW_UP] = 0,
+ [PHYTIMING_LPX] = DSIM_PHYTIMING_LPX(0x07),
+ [PHYTIMING_HS_EXIT] = DSIM_PHYTIMING_HS_EXIT(0x0c),
+ [PHYTIMING_CLK_PREPARE] = DSIM_PHYTIMING1_CLK_PREPARE(0x08),
+ [PHYTIMING_CLK_ZERO] = DSIM_PHYTIMING1_CLK_ZERO(0x2b),
+ [PHYTIMING_CLK_POST] = DSIM_PHYTIMING1_CLK_POST(0x0d),
+ [PHYTIMING_CLK_TRAIL] = DSIM_PHYTIMING1_CLK_TRAIL(0x09),
+ [PHYTIMING_HS_PREPARE] = DSIM_PHYTIMING2_HS_PREPARE(0x09),
+ [PHYTIMING_HS_ZERO] = DSIM_PHYTIMING2_HS_ZERO(0x0f),
+ [PHYTIMING_HS_TRAIL] = DSIM_PHYTIMING2_HS_TRAIL(0x0c),
+};
+
static const unsigned int imx8mm_dsim_reg_values[] = {
[RESET_TYPE] = DSIM_SWRST,
[PLL_TIMER] = 500,
@@ -405,13 +464,26 @@ static const unsigned int imx8mm_dsim_reg_values[] = {
static const struct samsung_dsim_driver_data exynos3_dsi_driver_data = {
.reg_ofs = exynos_reg_ofs,
.plltmr_reg = 0x50,
+ .has_legacy_status_reg = 1,
.has_freqband = 1,
.has_clklane_stop = 1,
- .num_clks = 2,
+ .clk_data = exynos3_clk_bulk_data,
+ .num_clks = ARRAY_SIZE(exynos3_clk_bulk_data),
.max_freq = 1000,
+ .wait_for_hdr_fifo = 1,
.wait_for_reset = 1,
.num_bits_resol = 11,
+ .video_mode_bit = 25,
+ .pll_stable_bit = 31,
+ .esc_clken_bit = 28,
+ .byte_clken_bit = 24,
+ .tx_req_hsclk_bit = 31,
+ .lane_esc_clk_bit = 19,
+ .lane_esc_data_offset = 20,
.pll_p_offset = 13,
+ .pll_m_offset = 4,
+ .pll_s_offset = 1,
+ .main_vsa_offset = 22,
.reg_values = reg_values,
.pll_fin_min = 6,
.pll_fin_max = 12,
@@ -424,13 +496,26 @@ static const struct samsung_dsim_driver_data exynos3_dsi_driver_data = {
static const struct samsung_dsim_driver_data exynos4_dsi_driver_data = {
.reg_ofs = exynos_reg_ofs,
.plltmr_reg = 0x50,
+ .has_legacy_status_reg = 1,
.has_freqband = 1,
.has_clklane_stop = 1,
- .num_clks = 2,
+ .clk_data = exynos4_clk_bulk_data,
+ .num_clks = ARRAY_SIZE(exynos4_clk_bulk_data),
.max_freq = 1000,
+ .wait_for_hdr_fifo = 1,
.wait_for_reset = 1,
.num_bits_resol = 11,
+ .video_mode_bit = 25,
+ .pll_stable_bit = 31,
+ .esc_clken_bit = 28,
+ .byte_clken_bit = 24,
+ .tx_req_hsclk_bit = 31,
+ .lane_esc_clk_bit = 19,
+ .lane_esc_data_offset = 20,
.pll_p_offset = 13,
+ .pll_m_offset = 4,
+ .pll_s_offset = 1,
+ .main_vsa_offset = 22,
.reg_values = reg_values,
.pll_fin_min = 6,
.pll_fin_max = 12,
@@ -443,11 +528,24 @@ static const struct samsung_dsim_driver_data exynos4_dsi_driver_data = {
static const struct samsung_dsim_driver_data exynos5_dsi_driver_data = {
.reg_ofs = exynos_reg_ofs,
.plltmr_reg = 0x58,
- .num_clks = 2,
+ .has_legacy_status_reg = 1,
+ .clk_data = exynos3_clk_bulk_data,
+ .num_clks = ARRAY_SIZE(exynos3_clk_bulk_data),
.max_freq = 1000,
+ .wait_for_hdr_fifo = 1,
.wait_for_reset = 1,
.num_bits_resol = 11,
+ .video_mode_bit = 25,
+ .pll_stable_bit = 31,
+ .esc_clken_bit = 28,
+ .byte_clken_bit = 24,
+ .tx_req_hsclk_bit = 31,
+ .lane_esc_clk_bit = 19,
+ .lane_esc_data_offset = 20,
.pll_p_offset = 13,
+ .pll_m_offset = 4,
+ .pll_s_offset = 1,
+ .main_vsa_offset = 22,
.reg_values = reg_values,
.pll_fin_min = 6,
.pll_fin_max = 12,
@@ -459,12 +557,25 @@ static const struct samsung_dsim_driver_data exynos5_dsi_driver_data = {
static const struct samsung_dsim_driver_data exynos5433_dsi_driver_data = {
.reg_ofs = exynos5433_reg_ofs,
.plltmr_reg = 0xa0,
+ .has_legacy_status_reg = 1,
.has_clklane_stop = 1,
- .num_clks = 5,
+ .clk_data = exynos5433_clk_bulk_data,
+ .num_clks = ARRAY_SIZE(exynos5433_clk_bulk_data),
.max_freq = 1500,
+ .wait_for_hdr_fifo = 1,
.wait_for_reset = 0,
.num_bits_resol = 12,
+ .video_mode_bit = 25,
+ .pll_stable_bit = 31,
+ .esc_clken_bit = 28,
+ .byte_clken_bit = 24,
+ .tx_req_hsclk_bit = 31,
+ .lane_esc_clk_bit = 19,
+ .lane_esc_data_offset = 20,
.pll_p_offset = 13,
+ .pll_m_offset = 4,
+ .pll_s_offset = 1,
+ .main_vsa_offset = 22,
.reg_values = exynos5433_reg_values,
.pll_fin_min = 6,
.pll_fin_max = 12,
@@ -476,12 +587,25 @@ static const struct samsung_dsim_driver_data exynos5433_dsi_driver_data = {
static const struct samsung_dsim_driver_data exynos5422_dsi_driver_data = {
.reg_ofs = exynos5433_reg_ofs,
.plltmr_reg = 0xa0,
+ .has_legacy_status_reg = 1,
.has_clklane_stop = 1,
- .num_clks = 2,
+ .clk_data = exynos3_clk_bulk_data,
+ .num_clks = ARRAY_SIZE(exynos3_clk_bulk_data),
.max_freq = 1500,
+ .wait_for_hdr_fifo = 1,
.wait_for_reset = 1,
.num_bits_resol = 12,
+ .video_mode_bit = 25,
+ .pll_stable_bit = 31,
+ .esc_clken_bit = 28,
+ .byte_clken_bit = 24,
+ .tx_req_hsclk_bit = 31,
+ .lane_esc_clk_bit = 19,
+ .lane_esc_data_offset = 20,
.pll_p_offset = 13,
+ .pll_m_offset = 4,
+ .pll_s_offset = 1,
+ .main_vsa_offset = 22,
.reg_values = exynos5422_reg_values,
.pll_fin_min = 6,
.pll_fin_max = 12,
@@ -490,19 +614,62 @@ static const struct samsung_dsim_driver_data exynos5422_dsi_driver_data = {
.min_freq = 500,
};
+static const struct samsung_dsim_driver_data exynos7870_dsi_driver_data = {
+ .reg_ofs = exynos7870_reg_ofs,
+ .plltmr_reg = 0xa0,
+ .has_clklane_stop = 1,
+ .has_sfrctrl = 1,
+ .clk_data = exynos7870_clk_bulk_data,
+ .num_clks = ARRAY_SIZE(exynos7870_clk_bulk_data),
+ .max_freq = 1500,
+ .wait_for_hdr_fifo = 0,
+ .wait_for_reset = 1,
+ .num_bits_resol = 12,
+ .video_mode_bit = 18,
+ .pll_stable_bit = 24,
+ .esc_clken_bit = 16,
+ .byte_clken_bit = 17,
+ .tx_req_hsclk_bit = 20,
+ .lane_esc_clk_bit = 8,
+ .lane_esc_data_offset = 9,
+ .pll_p_offset = 13,
+ .pll_m_offset = 3,
+ .pll_s_offset = 0,
+ .main_vsa_offset = 16,
+ .reg_values = exynos7870_reg_values,
+ .pll_fin_min = 6,
+ .pll_fin_max = 12,
+ .m_min = 41,
+ .m_max = 125,
+ .min_freq = 500,
+};
+
static const struct samsung_dsim_driver_data imx8mm_dsi_driver_data = {
.reg_ofs = exynos5433_reg_ofs,
.plltmr_reg = 0xa0,
+ .has_legacy_status_reg = 1,
.has_clklane_stop = 1,
- .num_clks = 2,
+ .clk_data = exynos4_clk_bulk_data,
+ .num_clks = ARRAY_SIZE(exynos4_clk_bulk_data),
.max_freq = 2100,
+ .wait_for_hdr_fifo = 1,
.wait_for_reset = 0,
.num_bits_resol = 12,
+ .video_mode_bit = 25,
+ .pll_stable_bit = 31,
+ .esc_clken_bit = 28,
+ .byte_clken_bit = 24,
+ .tx_req_hsclk_bit = 31,
+ .lane_esc_clk_bit = 19,
+ .lane_esc_data_offset = 20,
/*
* Unlike Exynos, PLL_P(PMS_P) offset 14 is used in i.MX8M Mini/Nano/Plus
* downstream driver - drivers/gpu/drm/bridge/sec-dsim.c
*/
.pll_p_offset = 14,
+ .pll_m_offset = 4,
+ .pll_s_offset = 1,
+ .main_vsa_offset = 22,
.reg_values = imx8mm_dsim_reg_values,
.pll_fin_min = 2,
.pll_fin_max = 30,
@@ -518,6 +685,7 @@ samsung_dsim_types[DSIM_TYPE_COUNT] = {
[DSIM_TYPE_EXYNOS5410] = &exynos5_dsi_driver_data,
[DSIM_TYPE_EXYNOS5422] = &exynos5422_dsi_driver_data,
[DSIM_TYPE_EXYNOS5433] = &exynos5433_dsi_driver_data,
+ [DSIM_TYPE_EXYNOS7870] = &exynos7870_dsi_driver_data,
[DSIM_TYPE_IMX8MM] = &imx8mm_dsi_driver_data,
[DSIM_TYPE_IMX8MP] = &imx8mm_dsi_driver_data,
};
@@ -653,8 +821,9 @@ static unsigned long samsung_dsim_set_pll(struct samsung_dsim *dsi,
writel(driver_data->reg_values[PLL_TIMER],
dsi->reg_base + driver_data->plltmr_reg);
- reg = DSIM_PLL_EN | DSIM_PLL_P(p, driver_data->pll_p_offset) |
- DSIM_PLL_M(m) | DSIM_PLL_S(s);
+ reg = DSIM_PLL_EN | DSIM_PLL(p, driver_data->pll_p_offset)
+ | DSIM_PLL(m, driver_data->pll_m_offset)
+ | DSIM_PLL(s, driver_data->pll_s_offset);
if (driver_data->has_freqband) {
static const unsigned long freq_bands[] = {
@@ -682,14 +851,17 @@ static unsigned long samsung_dsim_set_pll(struct samsung_dsim *dsi,
samsung_dsim_write(dsi, DSIM_PLLCTRL_REG, reg);
- timeout = 1000;
+ timeout = 3000;
do {
if (timeout-- == 0) {
dev_err(dsi->dev, "PLL failed to stabilize\n");
return 0;
}
- reg = samsung_dsim_read(dsi, DSIM_STATUS_REG);
- } while ((reg & DSIM_PLL_STABLE) == 0);
+ if (driver_data->has_legacy_status_reg)
+ reg = samsung_dsim_read(dsi, DSIM_STATUS_REG);
+ else
+ reg = samsung_dsim_read(dsi, DSIM_LINK_STATUS_REG);
+ } while ((reg & BIT(driver_data->pll_stable_bit)) == 0);
dsi->hs_clock = fout;
@@ -698,6 +870,7 @@ static unsigned long samsung_dsim_set_pll(struct samsung_dsim *dsi,
static int samsung_dsim_enable_clock(struct samsung_dsim *dsi)
{
+ const struct samsung_dsim_driver_data *driver_data = dsi->driver_data;
unsigned long hs_clk, byte_clk, esc_clk, pix_clk;
unsigned long esc_div;
u32 reg;
@@ -731,15 +904,17 @@ static int samsung_dsim_enable_clock(struct samsung_dsim *dsi)
hs_clk, byte_clk, esc_clk);
reg = samsung_dsim_read(dsi, DSIM_CLKCTRL_REG);
- reg &= ~(DSIM_ESC_PRESCALER_MASK | DSIM_LANE_ESC_CLK_EN_CLK
- | DSIM_LANE_ESC_CLK_EN_DATA_MASK | DSIM_PLL_BYPASS
- | DSIM_BYTE_CLK_SRC_MASK);
- reg |= DSIM_ESC_CLKEN | DSIM_BYTE_CLKEN
- | DSIM_ESC_PRESCALER(esc_div)
- | DSIM_LANE_ESC_CLK_EN_CLK
- | DSIM_LANE_ESC_CLK_EN_DATA(BIT(dsi->lanes) - 1)
- | DSIM_BYTE_CLK_SRC(0)
- | DSIM_TX_REQUEST_HSCLK;
+ reg &= ~(DSIM_ESC_PRESCALER_MASK | BIT(driver_data->lane_esc_clk_bit)
+ | DSIM_LANE_ESC_CLK_EN_DATA_MASK(driver_data->lane_esc_data_offset)
+ | DSIM_PLL_BYPASS
+ | DSIM_BYTE_CLK_SRC_MASK);
+ reg |= BIT(driver_data->esc_clken_bit) | BIT(driver_data->byte_clken_bit)
+ | DSIM_ESC_PRESCALER(esc_div)
+ | BIT(driver_data->lane_esc_clk_bit)
+ | DSIM_LANE_ESC_CLK_EN_DATA(BIT(dsi->lanes) - 1,
+ driver_data->lane_esc_data_offset)
+ | DSIM_BYTE_CLK_SRC(0)
+ | BIT(driver_data->tx_req_hsclk_bit);
samsung_dsim_write(dsi, DSIM_CLKCTRL_REG, reg);
return 0;
@@ -843,11 +1018,14 @@ static void samsung_dsim_set_phy_ctrl(struct samsung_dsim *dsi)
static void samsung_dsim_disable_clock(struct samsung_dsim *dsi)
{
+ const struct samsung_dsim_driver_data *driver_data = dsi->driver_data;
u32 reg;
reg = samsung_dsim_read(dsi, DSIM_CLKCTRL_REG);
- reg &= ~(DSIM_LANE_ESC_CLK_EN_CLK | DSIM_LANE_ESC_CLK_EN_DATA_MASK
- | DSIM_ESC_CLKEN | DSIM_BYTE_CLKEN);
+ reg &= ~(BIT(driver_data->lane_esc_clk_bit)
+ | DSIM_LANE_ESC_CLK_EN_DATA_MASK(driver_data->lane_esc_data_offset)
+ | BIT(driver_data->esc_clken_bit)
+ | BIT(driver_data->byte_clken_bit));
samsung_dsim_write(dsi, DSIM_CLKCTRL_REG, reg);
reg = samsung_dsim_read(dsi, DSIM_PLLCTRL_REG);
@@ -891,7 +1069,7 @@ static int samsung_dsim_init_link(struct samsung_dsim *dsi)
* mode, otherwise it will support command mode.
*/
if (dsi->mode_flags & MIPI_DSI_MODE_VIDEO) {
- reg |= DSIM_VIDEO_MODE;
+ reg |= BIT(driver_data->video_mode_bit);
/*
* The user manual describes that following bits are ignored in
@@ -962,7 +1140,10 @@ static int samsung_dsim_init_link(struct samsung_dsim *dsi)
return -EFAULT;
}
- reg = samsung_dsim_read(dsi, DSIM_STATUS_REG);
+ if (driver_data->has_legacy_status_reg)
+ reg = samsung_dsim_read(dsi, DSIM_STATUS_REG);
+ else
+ reg = samsung_dsim_read(dsi, DSIM_DPHY_STATUS_REG);
if ((reg & DSIM_STOP_STATE_DAT(lanes_mask))
!= DSIM_STOP_STATE_DAT(lanes_mask))
continue;
@@ -983,6 +1164,7 @@ static void samsung_dsim_set_display_mode(struct samsung_dsim *dsi)
{
struct drm_display_mode *m = &dsi->mode;
unsigned int num_bits_resol = dsi->driver_data->num_bits_resol;
+ unsigned int main_vsa_offset = dsi->driver_data->main_vsa_offset;
u32 reg;
if (dsi->mode_flags & MIPI_DSI_MODE_VIDEO) {
@@ -1009,7 +1191,7 @@ static void samsung_dsim_set_display_mode(struct samsung_dsim *dsi)
reg = DSIM_MAIN_HFP(hfp) | DSIM_MAIN_HBP(hbp);
samsung_dsim_write(dsi, DSIM_MHPORCH_REG, reg);
- reg = DSIM_MAIN_VSA(m->vsync_end - m->vsync_start)
+ reg = DSIM_MAIN_VSA(m->vsync_end - m->vsync_start, main_vsa_offset)
| DSIM_MAIN_HSA(hsa);
samsung_dsim_write(dsi, DSIM_MSYNC_REG, reg);
}
@@ -1023,6 +1205,7 @@ static void samsung_dsim_set_display_mode(struct samsung_dsim *dsi)
static void samsung_dsim_set_display_enable(struct samsung_dsim *dsi, bool enable)
{
+ const struct samsung_dsim_driver_data *driver_data = dsi->driver_data;
u32 reg;
reg = samsung_dsim_read(dsi, DSIM_MDRESOL_REG);
@@ -1031,6 +1214,15 @@ static void samsung_dsim_set_display_enable(struct samsung_dsim *dsi, bool enabl
else
reg &= ~DSIM_MAIN_STAND_BY;
samsung_dsim_write(dsi, DSIM_MDRESOL_REG, reg);
+
+ if (driver_data->has_sfrctrl) {
+ reg = samsung_dsim_read(dsi, DSIM_SFRCTRL_REG);
+ if (enable)
+ reg |= DSIM_SFR_CTRL_STAND_BY;
+ else
+ reg &= ~DSIM_SFR_CTRL_STAND_BY;
+ samsung_dsim_write(dsi, DSIM_SFRCTRL_REG, reg);
+ }
}
static int samsung_dsim_wait_for_hdr_fifo(struct samsung_dsim *dsi)
@@ -1087,6 +1279,7 @@ static void samsung_dsim_send_to_fifo(struct samsung_dsim *dsi,
{
struct device *dev = dsi->dev;
struct mipi_dsi_packet *pkt = &xfer->packet;
+ const struct samsung_dsim_driver_data *driver_data = dsi->driver_data;
const u8 *payload = pkt->payload + xfer->tx_done;
u16 length = pkt->payload_length - xfer->tx_done;
bool first = !xfer->tx_done;
@@ -1127,9 +1320,11 @@ static void samsung_dsim_send_to_fifo(struct samsung_dsim *dsi,
return;
reg = get_unaligned_le32(pkt->header);
- if (samsung_dsim_wait_for_hdr_fifo(dsi)) {
- dev_err(dev, "waiting for header FIFO timed out\n");
- return;
+ if (driver_data->wait_for_hdr_fifo) {
+ if (samsung_dsim_wait_for_hdr_fifo(dsi)) {
+ dev_err(dev, "waiting for header FIFO timed out\n");
+ return;
+ }
}
if (NEQV(xfer->flags & MIPI_DSI_MSG_USE_LPM,
@@ -1922,7 +2117,7 @@ int samsung_dsim_probe(struct platform_device *pdev)
{
struct device *dev = &pdev->dev;
struct samsung_dsim *dsi;
- int ret, i;
+ int ret;
dsi = devm_drm_bridge_alloc(dev, struct samsung_dsim, bridge, &samsung_dsim_bridge_funcs);
if (IS_ERR(dsi))
@@ -1946,23 +2141,11 @@ int samsung_dsim_probe(struct platform_device *pdev)
if (ret)
return dev_err_probe(dev, ret, "failed to get regulators\n");
- dsi->clks = devm_kcalloc(dev, dsi->driver_data->num_clks,
- sizeof(*dsi->clks), GFP_KERNEL);
- if (!dsi->clks)
- return -ENOMEM;
-
- for (i = 0; i < dsi->driver_data->num_clks; i++) {
- dsi->clks[i] = devm_clk_get(dev, clk_names[i]);
- if (IS_ERR(dsi->clks[i])) {
- if (strcmp(clk_names[i], "sclk_mipi") == 0) {
- dsi->clks[i] = devm_clk_get(dev, OLD_SCLK_MIPI_CLK_NAME);
- if (!IS_ERR(dsi->clks[i]))
- continue;
- }
-
- dev_info(dev, "failed to get the clock: %s\n", clk_names[i]);
- return PTR_ERR(dsi->clks[i]);
- }
+ ret = devm_clk_bulk_get(dev, dsi->driver_data->num_clks,
+ dsi->driver_data->clk_data);
+ if (ret) {
+ dev_err(dev, "failed to get clocks in bulk (%d)\n", ret);
+ return ret;
}
dsi->reg_base = devm_platform_ioremap_resource(pdev, 0);
@@ -2035,7 +2218,7 @@ static int samsung_dsim_suspend(struct device *dev)
{
struct samsung_dsim *dsi = dev_get_drvdata(dev);
const struct samsung_dsim_driver_data *driver_data = dsi->driver_data;
- int ret, i;
+ int ret;
usleep_range(10000, 20000);
@@ -2051,8 +2234,7 @@ static int samsung_dsim_suspend(struct device *dev)
phy_power_off(dsi->phy);
- for (i = driver_data->num_clks - 1; i > -1; i--)
- clk_disable_unprepare(dsi->clks[i]);
+ clk_bulk_disable_unprepare(driver_data->num_clks, driver_data->clk_data);
ret = regulator_bulk_disable(ARRAY_SIZE(dsi->supplies), dsi->supplies);
if (ret < 0)
@@ -2065,7 +2247,7 @@ static int samsung_dsim_resume(struct device *dev)
{
struct samsung_dsim *dsi = dev_get_drvdata(dev);
const struct samsung_dsim_driver_data *driver_data = dsi->driver_data;
- int ret, i;
+ int ret;
ret = regulator_bulk_enable(ARRAY_SIZE(dsi->supplies), dsi->supplies);
if (ret < 0) {
@@ -2073,11 +2255,9 @@ static int samsung_dsim_resume(struct device *dev)
return ret;
}
- for (i = 0; i < driver_data->num_clks; i++) {
- ret = clk_prepare_enable(dsi->clks[i]);
- if (ret < 0)
- goto err_clk;
- }
+ ret = clk_bulk_prepare_enable(driver_data->num_clks, driver_data->clk_data);
+ if (ret < 0)
+ goto err_clk;
ret = phy_power_on(dsi->phy);
if (ret < 0) {
@@ -2088,8 +2268,7 @@ static int samsung_dsim_resume(struct device *dev)
return 0;
err_clk:
- while (--i > -1)
- clk_disable_unprepare(dsi->clks[i]);
+ clk_bulk_disable_unprepare(driver_data->num_clks, driver_data->clk_data);
regulator_bulk_disable(ARRAY_SIZE(dsi->supplies), dsi->supplies);
return ret;
diff --git a/drivers/gpu/drm/bridge/waveshare-dsi.c b/drivers/gpu/drm/bridge/waveshare-dsi.c
index 01c70e7d3d3b..43f4e7412d72 100644
--- a/drivers/gpu/drm/bridge/waveshare-dsi.c
+++ b/drivers/gpu/drm/bridge/waveshare-dsi.c
@@ -147,8 +147,8 @@ static int ws_bridge_probe(struct i2c_client *i2c)
int ret;
ws = devm_drm_bridge_alloc(dev, struct ws_bridge, bridge, &ws_bridge_bridge_funcs);
- if (!ws)
- return -ENOMEM;
+ if (IS_ERR(ws))
+ return PTR_ERR(ws);
ws->dev = dev;
diff --git a/drivers/gpu/drm/display/drm_dp_cec.c b/drivers/gpu/drm/display/drm_dp_cec.c
index 3b50d817c839..436bfe9f9081 100644
--- a/drivers/gpu/drm/display/drm_dp_cec.c
+++ b/drivers/gpu/drm/display/drm_dp_cec.c
@@ -42,7 +42,7 @@
*
* https://hverkuil.home.xs4all.nl/cec-status.txt
*
- * Please mail me (hverkuil@xs4all.nl) if you find an adapter that works
+ * Please mail me (hverkuil@kernel.org) if you find an adapter that works
* and is not yet listed there.
*
* Note that the current implementation does not support CEC over an MST hub.
diff --git a/drivers/gpu/drm/drm_drv.c b/drivers/gpu/drm/drm_drv.c
index 0ac723a46a91..8e3cb08241c8 100644
--- a/drivers/gpu/drm/drm_drv.c
+++ b/drivers/gpu/drm/drm_drv.c
@@ -696,7 +696,6 @@ static void drm_dev_init_release(struct drm_device *dev, void *res)
mutex_destroy(&dev->master_mutex);
mutex_destroy(&dev->clientlist_mutex);
mutex_destroy(&dev->filelist_mutex);
- mutex_destroy(&dev->struct_mutex);
}
static int drm_dev_init(struct drm_device *dev,
@@ -737,7 +736,6 @@ static int drm_dev_init(struct drm_device *dev,
INIT_LIST_HEAD(&dev->vblank_event_list);
spin_lock_init(&dev->event_lock);
- mutex_init(&dev->struct_mutex);
mutex_init(&dev->filelist_mutex);
mutex_init(&dev->clientlist_mutex);
mutex_init(&dev->master_mutex);
diff --git a/drivers/gpu/drm/drm_gem.c b/drivers/gpu/drm/drm_gem.c
index cbeb76b2124f..a1a9c828938b 100644
--- a/drivers/gpu/drm/drm_gem.c
+++ b/drivers/gpu/drm/drm_gem.c
@@ -626,7 +626,7 @@ struct page **drm_gem_get_pages(struct drm_gem_object *obj)
struct page **pages;
struct folio *folio;
struct folio_batch fbatch;
- long i, j, npages;
+ unsigned long i, j, npages;
if (WARN_ON(!obj->filp))
return ERR_PTR(-EINVAL);
@@ -650,7 +650,7 @@ struct page **drm_gem_get_pages(struct drm_gem_object *obj)
i = 0;
while (i < npages) {
- long nr;
+ unsigned long nr;
folio = shmem_read_folio_gfp(mapping, i,
mapping_gfp_mask(mapping));
if (IS_ERR(folio))
diff --git a/drivers/gpu/drm/drm_gpusvm.c b/drivers/gpu/drm/drm_gpusvm.c
index e2a9a6ae1d54..cb906765897e 100644
--- a/drivers/gpu/drm/drm_gpusvm.c
+++ b/drivers/gpu/drm/drm_gpusvm.c
@@ -361,7 +361,6 @@ static const struct mmu_interval_notifier_ops drm_gpusvm_notifier_ops = {
* @name: Name of the GPU SVM.
* @drm: Pointer to the DRM device structure.
* @mm: Pointer to the mm_struct for the address space.
- * @device_private_page_owner: Device private pages owner.
* @mm_start: Start address of GPU SVM.
* @mm_range: Range of the GPU SVM.
* @notifier_size: Size of individual notifiers.
@@ -373,23 +372,35 @@ static const struct mmu_interval_notifier_ops drm_gpusvm_notifier_ops = {
*
* This function initializes the GPU SVM.
*
+ * Note: If only using the simple drm_gpusvm_pages API (get/unmap/free),
+ * then only @gpusvm, @name, and @drm are expected. However, the same base
+ * @gpusvm can also be used with both modes together in which case the full
+ * setup is needed, where the core drm_gpusvm_pages API will simply never use
+ * the other fields.
+ *
* Return: 0 on success, a negative error code on failure.
*/
int drm_gpusvm_init(struct drm_gpusvm *gpusvm,
const char *name, struct drm_device *drm,
- struct mm_struct *mm, void *device_private_page_owner,
+ struct mm_struct *mm,
unsigned long mm_start, unsigned long mm_range,
unsigned long notifier_size,
const struct drm_gpusvm_ops *ops,
const unsigned long *chunk_sizes, int num_chunks)
{
- if (!ops->invalidate || !num_chunks)
- return -EINVAL;
+ if (mm) {
+ if (!ops->invalidate || !num_chunks)
+ return -EINVAL;
+ mmgrab(mm);
+ } else {
+ /* No full SVM mode, only core drm_gpusvm_pages API. */
+ if (ops || num_chunks || mm_range || notifier_size)
+ return -EINVAL;
+ }
gpusvm->name = name;
gpusvm->drm = drm;
gpusvm->mm = mm;
- gpusvm->device_private_page_owner = device_private_page_owner;
gpusvm->mm_start = mm_start;
gpusvm->mm_range = mm_range;
gpusvm->notifier_size = notifier_size;
@@ -397,7 +408,6 @@ int drm_gpusvm_init(struct drm_gpusvm *gpusvm,
gpusvm->chunk_sizes = chunk_sizes;
gpusvm->num_chunks = num_chunks;
- mmgrab(mm);
gpusvm->root = RB_ROOT_CACHED;
INIT_LIST_HEAD(&gpusvm->notifier_list);
@@ -489,7 +499,8 @@ void drm_gpusvm_fini(struct drm_gpusvm *gpusvm)
drm_gpusvm_range_remove(gpusvm, range);
}
- mmdrop(gpusvm->mm);
+ if (gpusvm->mm)
+ mmdrop(gpusvm->mm);
WARN_ON(!RB_EMPTY_ROOT(&gpusvm->root.rb_root));
}
EXPORT_SYMBOL_GPL(drm_gpusvm_fini);
@@ -629,18 +640,48 @@ drm_gpusvm_range_alloc(struct drm_gpusvm *gpusvm,
range->itree.start = ALIGN_DOWN(fault_addr, chunk_size);
range->itree.last = ALIGN(fault_addr + 1, chunk_size) - 1;
INIT_LIST_HEAD(&range->entry);
- range->notifier_seq = LONG_MAX;
- range->flags.migrate_devmem = migrate_devmem ? 1 : 0;
+ range->pages.notifier_seq = LONG_MAX;
+ range->pages.flags.migrate_devmem = migrate_devmem ? 1 : 0;
return range;
}
/**
+ * drm_gpusvm_hmm_pfn_to_order() - Get the largest CPU mapping order.
+ * @hmm_pfn: The current hmm_pfn.
+ * @hmm_pfn_index: Index of the @hmm_pfn within the pfn array.
+ * @npages: Number of pages within the pfn array i.e the hmm range size.
+ *
+ * To allow skipping PFNs with the same flags (like when they belong to
+ * the same huge PTE) when looping over the pfn array, take a given a hmm_pfn,
+ * and return the largest order that will fit inside the CPU PTE, but also
+ * crucially accounting for the original hmm range boundaries.
+ *
+ * Return: The largest order that will safely fit within the size of the hmm_pfn
+ * CPU PTE.
+ */
+static unsigned int drm_gpusvm_hmm_pfn_to_order(unsigned long hmm_pfn,
+ unsigned long hmm_pfn_index,
+ unsigned long npages)
+{
+ unsigned long size;
+
+ size = 1UL << hmm_pfn_to_map_order(hmm_pfn);
+ size -= (hmm_pfn & ~HMM_PFN_FLAGS) & (size - 1);
+ hmm_pfn_index += size;
+ if (hmm_pfn_index > npages)
+ size -= (hmm_pfn_index - npages);
+
+ return ilog2(size);
+}
+
+/**
* drm_gpusvm_check_pages() - Check pages
* @gpusvm: Pointer to the GPU SVM structure
* @notifier: Pointer to the GPU SVM notifier structure
* @start: Start address
* @end: End address
+ * @dev_private_owner: The device private page owner
*
* Check if pages between start and end have been faulted in on the CPU. Use to
* prevent migration of pages without CPU backing store.
@@ -649,14 +690,15 @@ drm_gpusvm_range_alloc(struct drm_gpusvm *gpusvm,
*/
static bool drm_gpusvm_check_pages(struct drm_gpusvm *gpusvm,
struct drm_gpusvm_notifier *notifier,
- unsigned long start, unsigned long end)
+ unsigned long start, unsigned long end,
+ void *dev_private_owner)
{
struct hmm_range hmm_range = {
.default_flags = 0,
.notifier = &notifier->notifier,
.start = start,
.end = end,
- .dev_private_owner = gpusvm->device_private_page_owner,
+ .dev_private_owner = dev_private_owner,
};
unsigned long timeout =
jiffies + msecs_to_jiffies(HMM_RANGE_DEFAULT_TIMEOUT);
@@ -693,7 +735,7 @@ static bool drm_gpusvm_check_pages(struct drm_gpusvm *gpusvm,
err = -EFAULT;
goto err_free;
}
- i += 0x1 << hmm_pfn_to_map_order(pfns[i]);
+ i += 0x1 << drm_gpusvm_hmm_pfn_to_order(pfns[i], i, npages);
}
err_free:
@@ -710,6 +752,7 @@ err_free:
* @gpuva_start: Start address of GPUVA which mirrors CPU
* @gpuva_end: End address of GPUVA which mirrors CPU
* @check_pages_threshold: Check CPU pages for present threshold
+ * @dev_private_owner: The device private page owner
*
* This function determines the chunk size for the GPU SVM range based on the
* fault address, GPU SVM chunk sizes, existing GPU SVM ranges, and the virtual
@@ -724,7 +767,8 @@ drm_gpusvm_range_chunk_size(struct drm_gpusvm *gpusvm,
unsigned long fault_addr,
unsigned long gpuva_start,
unsigned long gpuva_end,
- unsigned long check_pages_threshold)
+ unsigned long check_pages_threshold,
+ void *dev_private_owner)
{
unsigned long start, end;
int i = 0;
@@ -771,7 +815,7 @@ retry:
* process-many-malloc' mallocs at least 64k at a time.
*/
if (end - start <= check_pages_threshold &&
- !drm_gpusvm_check_pages(gpusvm, notifier, start, end)) {
+ !drm_gpusvm_check_pages(gpusvm, notifier, start, end, dev_private_owner)) {
++i;
goto retry;
}
@@ -914,7 +958,8 @@ drm_gpusvm_range_find_or_insert(struct drm_gpusvm *gpusvm,
chunk_size = drm_gpusvm_range_chunk_size(gpusvm, notifier, vas,
fault_addr, gpuva_start,
gpuva_end,
- ctx->check_pages_threshold);
+ ctx->check_pages_threshold,
+ ctx->device_private_page_owner);
if (chunk_size == LONG_MAX) {
err = -EINVAL;
goto err_notifier_remove;
@@ -951,31 +996,31 @@ err_mmunlock:
EXPORT_SYMBOL_GPL(drm_gpusvm_range_find_or_insert);
/**
- * __drm_gpusvm_range_unmap_pages() - Unmap pages associated with a GPU SVM range (internal)
+ * __drm_gpusvm_unmap_pages() - Unmap pages associated with GPU SVM pages (internal)
* @gpusvm: Pointer to the GPU SVM structure
- * @range: Pointer to the GPU SVM range structure
+ * @svm_pages: Pointer to the GPU SVM pages structure
* @npages: Number of pages to unmap
*
- * This function unmap pages associated with a GPU SVM range. Assumes and
+ * This function unmap pages associated with a GPU SVM pages struct. Assumes and
* asserts correct locking is in place when called.
*/
-static void __drm_gpusvm_range_unmap_pages(struct drm_gpusvm *gpusvm,
- struct drm_gpusvm_range *range,
- unsigned long npages)
+static void __drm_gpusvm_unmap_pages(struct drm_gpusvm *gpusvm,
+ struct drm_gpusvm_pages *svm_pages,
+ unsigned long npages)
{
- unsigned long i, j;
- struct drm_pagemap *dpagemap = range->dpagemap;
+ struct drm_pagemap *dpagemap = svm_pages->dpagemap;
struct device *dev = gpusvm->drm->dev;
+ unsigned long i, j;
lockdep_assert_held(&gpusvm->notifier_lock);
- if (range->flags.has_dma_mapping) {
- struct drm_gpusvm_range_flags flags = {
- .__flags = range->flags.__flags,
+ if (svm_pages->flags.has_dma_mapping) {
+ struct drm_gpusvm_pages_flags flags = {
+ .__flags = svm_pages->flags.__flags,
};
for (i = 0, j = 0; i < npages; j++) {
- struct drm_pagemap_addr *addr = &range->dma_addr[j];
+ struct drm_pagemap_addr *addr = &svm_pages->dma_addr[j];
if (addr->proto == DRM_INTERCONNECT_SYSTEM)
dma_unmap_page(dev,
@@ -991,31 +1036,52 @@ static void __drm_gpusvm_range_unmap_pages(struct drm_gpusvm *gpusvm,
/* WRITE_ONCE pairs with READ_ONCE for opportunistic checks */
flags.has_devmem_pages = false;
flags.has_dma_mapping = false;
- WRITE_ONCE(range->flags.__flags, flags.__flags);
+ WRITE_ONCE(svm_pages->flags.__flags, flags.__flags);
- range->dpagemap = NULL;
+ svm_pages->dpagemap = NULL;
}
}
/**
- * drm_gpusvm_range_free_pages() - Free pages associated with a GPU SVM range
+ * __drm_gpusvm_free_pages() - Free dma array associated with GPU SVM pages
* @gpusvm: Pointer to the GPU SVM structure
- * @range: Pointer to the GPU SVM range structure
+ * @svm_pages: Pointer to the GPU SVM pages structure
*
* This function frees the dma address array associated with a GPU SVM range.
*/
-static void drm_gpusvm_range_free_pages(struct drm_gpusvm *gpusvm,
- struct drm_gpusvm_range *range)
+static void __drm_gpusvm_free_pages(struct drm_gpusvm *gpusvm,
+ struct drm_gpusvm_pages *svm_pages)
{
lockdep_assert_held(&gpusvm->notifier_lock);
- if (range->dma_addr) {
- kvfree(range->dma_addr);
- range->dma_addr = NULL;
+ if (svm_pages->dma_addr) {
+ kvfree(svm_pages->dma_addr);
+ svm_pages->dma_addr = NULL;
}
}
/**
+ * drm_gpusvm_free_pages() - Free dma-mapping associated with GPU SVM pages
+ * struct
+ * @gpusvm: Pointer to the GPU SVM structure
+ * @svm_pages: Pointer to the GPU SVM pages structure
+ * @npages: Number of mapped pages
+ *
+ * This function unmaps and frees the dma address array associated with a GPU
+ * SVM pages struct.
+ */
+void drm_gpusvm_free_pages(struct drm_gpusvm *gpusvm,
+ struct drm_gpusvm_pages *svm_pages,
+ unsigned long npages)
+{
+ drm_gpusvm_notifier_lock(gpusvm);
+ __drm_gpusvm_unmap_pages(gpusvm, svm_pages, npages);
+ __drm_gpusvm_free_pages(gpusvm, svm_pages);
+ drm_gpusvm_notifier_unlock(gpusvm);
+}
+EXPORT_SYMBOL_GPL(drm_gpusvm_free_pages);
+
+/**
* drm_gpusvm_range_remove() - Remove GPU SVM range
* @gpusvm: Pointer to the GPU SVM structure
* @range: Pointer to the GPU SVM range to be removed
@@ -1040,8 +1106,8 @@ void drm_gpusvm_range_remove(struct drm_gpusvm *gpusvm,
return;
drm_gpusvm_notifier_lock(gpusvm);
- __drm_gpusvm_range_unmap_pages(gpusvm, range, npages);
- drm_gpusvm_range_free_pages(gpusvm, range);
+ __drm_gpusvm_unmap_pages(gpusvm, &range->pages, npages);
+ __drm_gpusvm_free_pages(gpusvm, &range->pages);
__drm_gpusvm_range_remove(notifier, range);
drm_gpusvm_notifier_unlock(gpusvm);
@@ -1107,6 +1173,28 @@ void drm_gpusvm_range_put(struct drm_gpusvm_range *range)
EXPORT_SYMBOL_GPL(drm_gpusvm_range_put);
/**
+ * drm_gpusvm_pages_valid() - GPU SVM range pages valid
+ * @gpusvm: Pointer to the GPU SVM structure
+ * @svm_pages: Pointer to the GPU SVM pages structure
+ *
+ * This function determines if a GPU SVM range pages are valid. Expected be
+ * called holding gpusvm->notifier_lock and as the last step before committing a
+ * GPU binding. This is akin to a notifier seqno check in the HMM documentation
+ * but due to wider notifiers (i.e., notifiers which span multiple ranges) this
+ * function is required for finer grained checking (i.e., per range) if pages
+ * are valid.
+ *
+ * Return: True if GPU SVM range has valid pages, False otherwise
+ */
+static bool drm_gpusvm_pages_valid(struct drm_gpusvm *gpusvm,
+ struct drm_gpusvm_pages *svm_pages)
+{
+ lockdep_assert_held(&gpusvm->notifier_lock);
+
+ return svm_pages->flags.has_devmem_pages || svm_pages->flags.has_dma_mapping;
+}
+
+/**
* drm_gpusvm_range_pages_valid() - GPU SVM range pages valid
* @gpusvm: Pointer to the GPU SVM structure
* @range: Pointer to the GPU SVM range structure
@@ -1123,9 +1211,7 @@ EXPORT_SYMBOL_GPL(drm_gpusvm_range_put);
bool drm_gpusvm_range_pages_valid(struct drm_gpusvm *gpusvm,
struct drm_gpusvm_range *range)
{
- lockdep_assert_held(&gpusvm->notifier_lock);
-
- return range->flags.has_devmem_pages || range->flags.has_dma_mapping;
+ return drm_gpusvm_pages_valid(gpusvm, &range->pages);
}
EXPORT_SYMBOL_GPL(drm_gpusvm_range_pages_valid);
@@ -1139,66 +1225,71 @@ EXPORT_SYMBOL_GPL(drm_gpusvm_range_pages_valid);
*
* Return: True if GPU SVM range has valid pages, False otherwise
*/
-static bool
-drm_gpusvm_range_pages_valid_unlocked(struct drm_gpusvm *gpusvm,
- struct drm_gpusvm_range *range)
+static bool drm_gpusvm_pages_valid_unlocked(struct drm_gpusvm *gpusvm,
+ struct drm_gpusvm_pages *svm_pages)
{
bool pages_valid;
- if (!range->dma_addr)
+ if (!svm_pages->dma_addr)
return false;
drm_gpusvm_notifier_lock(gpusvm);
- pages_valid = drm_gpusvm_range_pages_valid(gpusvm, range);
+ pages_valid = drm_gpusvm_pages_valid(gpusvm, svm_pages);
if (!pages_valid)
- drm_gpusvm_range_free_pages(gpusvm, range);
+ __drm_gpusvm_free_pages(gpusvm, svm_pages);
drm_gpusvm_notifier_unlock(gpusvm);
return pages_valid;
}
/**
- * drm_gpusvm_range_get_pages() - Get pages for a GPU SVM range
+ * drm_gpusvm_get_pages() - Get pages and populate GPU SVM pages struct
* @gpusvm: Pointer to the GPU SVM structure
- * @range: Pointer to the GPU SVM range structure
+ * @svm_pages: The SVM pages to populate. This will contain the dma-addresses
+ * @mm: The mm corresponding to the CPU range
+ * @notifier: The corresponding notifier for the given CPU range
+ * @pages_start: Start CPU address for the pages
+ * @pages_end: End CPU address for the pages (exclusive)
* @ctx: GPU SVM context
*
- * This function gets pages for a GPU SVM range and ensures they are mapped for
- * DMA access.
+ * This function gets and maps pages for CPU range and ensures they are
+ * mapped for DMA access.
*
* Return: 0 on success, negative error code on failure.
*/
-int drm_gpusvm_range_get_pages(struct drm_gpusvm *gpusvm,
- struct drm_gpusvm_range *range,
- const struct drm_gpusvm_ctx *ctx)
+int drm_gpusvm_get_pages(struct drm_gpusvm *gpusvm,
+ struct drm_gpusvm_pages *svm_pages,
+ struct mm_struct *mm,
+ struct mmu_interval_notifier *notifier,
+ unsigned long pages_start, unsigned long pages_end,
+ const struct drm_gpusvm_ctx *ctx)
{
- struct mmu_interval_notifier *notifier = &range->notifier->notifier;
struct hmm_range hmm_range = {
.default_flags = HMM_PFN_REQ_FAULT | (ctx->read_only ? 0 :
HMM_PFN_REQ_WRITE),
.notifier = notifier,
- .start = drm_gpusvm_range_start(range),
- .end = drm_gpusvm_range_end(range),
- .dev_private_owner = gpusvm->device_private_page_owner,
+ .start = pages_start,
+ .end = pages_end,
+ .dev_private_owner = ctx->device_private_page_owner,
};
- struct mm_struct *mm = gpusvm->mm;
void *zdd;
unsigned long timeout =
jiffies + msecs_to_jiffies(HMM_RANGE_DEFAULT_TIMEOUT);
unsigned long i, j;
- unsigned long npages = npages_in_range(drm_gpusvm_range_start(range),
- drm_gpusvm_range_end(range));
+ unsigned long npages = npages_in_range(pages_start, pages_end);
unsigned long num_dma_mapped;
unsigned int order = 0;
unsigned long *pfns;
int err = 0;
struct dev_pagemap *pagemap;
struct drm_pagemap *dpagemap;
- struct drm_gpusvm_range_flags flags;
+ struct drm_gpusvm_pages_flags flags;
+ enum dma_data_direction dma_dir = ctx->read_only ? DMA_TO_DEVICE :
+ DMA_BIDIRECTIONAL;
retry:
hmm_range.notifier_seq = mmu_interval_read_begin(notifier);
- if (drm_gpusvm_range_pages_valid_unlocked(gpusvm, range))
+ if (drm_gpusvm_pages_valid_unlocked(gpusvm, svm_pages))
goto set_seqno;
pfns = kvmalloc_array(npages, sizeof(*pfns), GFP_KERNEL);
@@ -1238,7 +1329,7 @@ map_pages:
*/
drm_gpusvm_notifier_lock(gpusvm);
- flags.__flags = range->flags.__flags;
+ flags.__flags = svm_pages->flags.__flags;
if (flags.unmapped) {
drm_gpusvm_notifier_unlock(gpusvm);
err = -EFAULT;
@@ -1251,13 +1342,12 @@ map_pages:
goto retry;
}
- if (!range->dma_addr) {
+ if (!svm_pages->dma_addr) {
/* Unlock and restart mapping to allocate memory. */
drm_gpusvm_notifier_unlock(gpusvm);
- range->dma_addr = kvmalloc_array(npages,
- sizeof(*range->dma_addr),
- GFP_KERNEL);
- if (!range->dma_addr) {
+ svm_pages->dma_addr =
+ kvmalloc_array(npages, sizeof(*svm_pages->dma_addr), GFP_KERNEL);
+ if (!svm_pages->dma_addr) {
err = -ENOMEM;
goto err_free;
}
@@ -1270,7 +1360,7 @@ map_pages:
for (i = 0, j = 0; i < npages; ++j) {
struct page *page = hmm_pfn_to_page(pfns[i]);
- order = hmm_pfn_to_map_order(pfns[i]);
+ order = drm_gpusvm_hmm_pfn_to_order(pfns[i], i, npages);
if (is_device_private_page(page) ||
is_device_coherent_page(page)) {
if (zdd != page->zone_device_data && i > 0) {
@@ -1296,13 +1386,13 @@ map_pages:
goto err_unmap;
}
}
- range->dma_addr[j] =
+ svm_pages->dma_addr[j] =
dpagemap->ops->device_map(dpagemap,
gpusvm->drm->dev,
page, order,
- DMA_BIDIRECTIONAL);
+ dma_dir);
if (dma_mapping_error(gpusvm->drm->dev,
- range->dma_addr[j].addr)) {
+ svm_pages->dma_addr[j].addr)) {
err = -EFAULT;
goto err_unmap;
}
@@ -1322,15 +1412,15 @@ map_pages:
addr = dma_map_page(gpusvm->drm->dev,
page, 0,
PAGE_SIZE << order,
- DMA_BIDIRECTIONAL);
+ dma_dir);
if (dma_mapping_error(gpusvm->drm->dev, addr)) {
err = -EFAULT;
goto err_unmap;
}
- range->dma_addr[j] = drm_pagemap_addr_encode
+ svm_pages->dma_addr[j] = drm_pagemap_addr_encode
(addr, DRM_INTERCONNECT_SYSTEM, order,
- DMA_BIDIRECTIONAL);
+ dma_dir);
}
i += 1 << order;
num_dma_mapped = i;
@@ -1339,21 +1429,21 @@ map_pages:
if (pagemap) {
flags.has_devmem_pages = true;
- range->dpagemap = dpagemap;
+ svm_pages->dpagemap = dpagemap;
}
/* WRITE_ONCE pairs with READ_ONCE for opportunistic checks */
- WRITE_ONCE(range->flags.__flags, flags.__flags);
+ WRITE_ONCE(svm_pages->flags.__flags, flags.__flags);
drm_gpusvm_notifier_unlock(gpusvm);
kvfree(pfns);
set_seqno:
- range->notifier_seq = hmm_range.notifier_seq;
+ svm_pages->notifier_seq = hmm_range.notifier_seq;
return 0;
err_unmap:
- __drm_gpusvm_range_unmap_pages(gpusvm, range, num_dma_mapped);
+ __drm_gpusvm_unmap_pages(gpusvm, svm_pages, num_dma_mapped);
drm_gpusvm_notifier_unlock(gpusvm);
err_free:
kvfree(pfns);
@@ -1361,11 +1451,62 @@ err_free:
goto retry;
return err;
}
+EXPORT_SYMBOL_GPL(drm_gpusvm_get_pages);
+
+/**
+ * drm_gpusvm_range_get_pages() - Get pages for a GPU SVM range
+ * @gpusvm: Pointer to the GPU SVM structure
+ * @range: Pointer to the GPU SVM range structure
+ * @ctx: GPU SVM context
+ *
+ * This function gets pages for a GPU SVM range and ensures they are mapped for
+ * DMA access.
+ *
+ * Return: 0 on success, negative error code on failure.
+ */
+int drm_gpusvm_range_get_pages(struct drm_gpusvm *gpusvm,
+ struct drm_gpusvm_range *range,
+ const struct drm_gpusvm_ctx *ctx)
+{
+ return drm_gpusvm_get_pages(gpusvm, &range->pages, gpusvm->mm,
+ &range->notifier->notifier,
+ drm_gpusvm_range_start(range),
+ drm_gpusvm_range_end(range), ctx);
+}
EXPORT_SYMBOL_GPL(drm_gpusvm_range_get_pages);
/**
+ * drm_gpusvm_unmap_pages() - Unmap GPU svm pages
+ * @gpusvm: Pointer to the GPU SVM structure
+ * @svm_pages: Pointer to the GPU SVM pages structure
+ * @npages: Number of pages in @svm_pages.
+ * @ctx: GPU SVM context
+ *
+ * This function unmaps pages associated with a GPU SVM pages struct. If
+ * @in_notifier is set, it is assumed that gpusvm->notifier_lock is held in
+ * write mode; if it is clear, it acquires gpusvm->notifier_lock in read mode.
+ * Must be called in the invalidate() callback of the corresponding notifier for
+ * IOMMU security model.
+ */
+void drm_gpusvm_unmap_pages(struct drm_gpusvm *gpusvm,
+ struct drm_gpusvm_pages *svm_pages,
+ unsigned long npages,
+ const struct drm_gpusvm_ctx *ctx)
+{
+ if (ctx->in_notifier)
+ lockdep_assert_held_write(&gpusvm->notifier_lock);
+ else
+ drm_gpusvm_notifier_lock(gpusvm);
+
+ __drm_gpusvm_unmap_pages(gpusvm, svm_pages, npages);
+
+ if (!ctx->in_notifier)
+ drm_gpusvm_notifier_unlock(gpusvm);
+}
+EXPORT_SYMBOL_GPL(drm_gpusvm_unmap_pages);
+
+/**
* drm_gpusvm_range_unmap_pages() - Unmap pages associated with a GPU SVM range
- * drm_gpusvm_range_evict() - Evict GPU SVM range
* @gpusvm: Pointer to the GPU SVM structure
* @range: Pointer to the GPU SVM range structure
* @ctx: GPU SVM context
@@ -1383,15 +1524,7 @@ void drm_gpusvm_range_unmap_pages(struct drm_gpusvm *gpusvm,
unsigned long npages = npages_in_range(drm_gpusvm_range_start(range),
drm_gpusvm_range_end(range));
- if (ctx->in_notifier)
- lockdep_assert_held_write(&gpusvm->notifier_lock);
- else
- drm_gpusvm_notifier_lock(gpusvm);
-
- __drm_gpusvm_range_unmap_pages(gpusvm, range, npages);
-
- if (!ctx->in_notifier)
- drm_gpusvm_notifier_unlock(gpusvm);
+ return drm_gpusvm_unmap_pages(gpusvm, &range->pages, npages, ctx);
}
EXPORT_SYMBOL_GPL(drm_gpusvm_range_unmap_pages);
@@ -1489,10 +1622,10 @@ void drm_gpusvm_range_set_unmapped(struct drm_gpusvm_range *range,
{
lockdep_assert_held_write(&range->gpusvm->notifier_lock);
- range->flags.unmapped = true;
+ range->pages.flags.unmapped = true;
if (drm_gpusvm_range_start(range) < mmu_range->start ||
drm_gpusvm_range_end(range) > mmu_range->end)
- range->flags.partial_unmap = true;
+ range->pages.flags.partial_unmap = true;
}
EXPORT_SYMBOL_GPL(drm_gpusvm_range_set_unmapped);
diff --git a/drivers/gpu/drm/drm_gpuvm.c b/drivers/gpu/drm/drm_gpuvm.c
index a52e95555549..af63f4d00315 100644
--- a/drivers/gpu/drm/drm_gpuvm.c
+++ b/drivers/gpu/drm/drm_gpuvm.c
@@ -2533,8 +2533,6 @@ static const struct drm_gpuvm_ops lock_ops = {
*
* The expected usage is::
*
- * .. code-block:: c
- *
* vm_bind {
* struct drm_exec exec;
*
diff --git a/drivers/gpu/drm/drm_panic_qr.rs b/drivers/gpu/drm/drm_panic_qr.rs
index 50c286c5cee8..ac27e86c601c 100644
--- a/drivers/gpu/drm/drm_panic_qr.rs
+++ b/drivers/gpu/drm/drm_panic_qr.rs
@@ -968,7 +968,7 @@ pub unsafe extern "C" fn drm_panic_qr_generate(
// nul-terminated string.
let url_cstr: &CStr = unsafe { CStr::from_char_ptr(url) };
let segments = &[
- &Segment::Binary(url_cstr.as_bytes()),
+ &Segment::Binary(url_cstr.to_bytes()),
&Segment::Numeric(&data_slice[0..data_len]),
];
match EncodedMsg::new(segments, tmp_slice) {
diff --git a/drivers/gpu/drm/exynos/exynos7_drm_decon.c b/drivers/gpu/drm/exynos/exynos7_drm_decon.c
index 805aa28c1723..b8d9b7251319 100644
--- a/drivers/gpu/drm/exynos/exynos7_drm_decon.c
+++ b/drivers/gpu/drm/exynos/exynos7_drm_decon.c
@@ -69,7 +69,6 @@ struct decon_context {
void __iomem *regs;
unsigned long irq_flags;
bool i80_if;
- bool suspended;
wait_queue_head_t wait_vsync_queue;
atomic_t wait_vsync_event;
@@ -132,9 +131,6 @@ static void decon_shadow_protect_win(struct decon_context *ctx,
static void decon_wait_for_vblank(struct decon_context *ctx)
{
- if (ctx->suspended)
- return;
-
atomic_set(&ctx->wait_vsync_event, 1);
/*
@@ -210,9 +206,6 @@ static void decon_commit(struct exynos_drm_crtc *crtc)
struct drm_display_mode *mode = &crtc->base.state->adjusted_mode;
u32 val, clkdiv;
- if (ctx->suspended)
- return;
-
/* nothing to do if we haven't set the mode yet */
if (mode->htotal == 0 || mode->vtotal == 0)
return;
@@ -274,9 +267,6 @@ static int decon_enable_vblank(struct exynos_drm_crtc *crtc)
struct decon_context *ctx = crtc->ctx;
u32 val;
- if (ctx->suspended)
- return -EPERM;
-
if (!test_and_set_bit(0, &ctx->irq_flags)) {
val = readl(ctx->regs + VIDINTCON0);
@@ -299,9 +289,6 @@ static void decon_disable_vblank(struct exynos_drm_crtc *crtc)
struct decon_context *ctx = crtc->ctx;
u32 val;
- if (ctx->suspended)
- return;
-
if (test_and_clear_bit(0, &ctx->irq_flags)) {
val = readl(ctx->regs + VIDINTCON0);
@@ -404,9 +391,6 @@ static void decon_atomic_begin(struct exynos_drm_crtc *crtc)
struct decon_context *ctx = crtc->ctx;
int i;
- if (ctx->suspended)
- return;
-
for (i = 0; i < WINDOWS_NR; i++)
decon_shadow_protect_win(ctx, i, true);
}
@@ -427,9 +411,6 @@ static void decon_update_plane(struct exynos_drm_crtc *crtc,
unsigned int pitch = fb->pitches[0];
unsigned int vidw_addr0_base = ctx->data->vidw_buf_start_base;
- if (ctx->suspended)
- return;
-
/*
* SHADOWCON/PRTCON register is used for enabling timing.
*
@@ -517,9 +498,6 @@ static void decon_disable_plane(struct exynos_drm_crtc *crtc,
unsigned int win = plane->index;
u32 val;
- if (ctx->suspended)
- return;
-
/* protect windows */
decon_shadow_protect_win(ctx, win, true);
@@ -538,9 +516,6 @@ static void decon_atomic_flush(struct exynos_drm_crtc *crtc)
struct decon_context *ctx = crtc->ctx;
int i;
- if (ctx->suspended)
- return;
-
for (i = 0; i < WINDOWS_NR; i++)
decon_shadow_protect_win(ctx, i, false);
exynos_crtc_handle_event(crtc);
@@ -568,9 +543,6 @@ static void decon_atomic_enable(struct exynos_drm_crtc *crtc)
struct decon_context *ctx = crtc->ctx;
int ret;
- if (!ctx->suspended)
- return;
-
ret = pm_runtime_resume_and_get(ctx->dev);
if (ret < 0) {
DRM_DEV_ERROR(ctx->dev, "failed to enable DECON device.\n");
@@ -584,8 +556,6 @@ static void decon_atomic_enable(struct exynos_drm_crtc *crtc)
decon_enable_vblank(ctx->crtc);
decon_commit(ctx->crtc);
-
- ctx->suspended = false;
}
static void decon_atomic_disable(struct exynos_drm_crtc *crtc)
@@ -593,9 +563,6 @@ static void decon_atomic_disable(struct exynos_drm_crtc *crtc)
struct decon_context *ctx = crtc->ctx;
int i;
- if (ctx->suspended)
- return;
-
/*
* We need to make sure that all windows are disabled before we
* suspend that connector. Otherwise we might try to scan from
@@ -605,8 +572,6 @@ static void decon_atomic_disable(struct exynos_drm_crtc *crtc)
decon_disable_plane(crtc, &ctx->planes[i]);
pm_runtime_put_sync(ctx->dev);
-
- ctx->suspended = true;
}
static const struct exynos_drm_crtc_ops decon_crtc_ops = {
@@ -727,7 +692,6 @@ static int decon_probe(struct platform_device *pdev)
return -ENOMEM;
ctx->dev = dev;
- ctx->suspended = true;
ctx->data = of_device_get_match_data(dev);
i80_if_timings = of_get_child_by_name(dev->of_node, "i80-if-timings");
diff --git a/drivers/gpu/drm/exynos/exynos_drm_dsi.c b/drivers/gpu/drm/exynos/exynos_drm_dsi.c
index 896a03639e2d..c4d098ab7863 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_dsi.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_dsi.c
@@ -154,6 +154,11 @@ static const struct samsung_dsim_plat_data exynos5433_dsi_pdata = {
.host_ops = &exynos_dsi_exynos_host_ops,
};
+static const struct samsung_dsim_plat_data exynos7870_dsi_pdata = {
+ .hw_type = DSIM_TYPE_EXYNOS7870,
+ .host_ops = &exynos_dsi_exynos_host_ops,
+};
+
static const struct of_device_id exynos_dsi_of_match[] = {
{
.compatible = "samsung,exynos3250-mipi-dsi",
@@ -175,6 +180,10 @@ static const struct of_device_id exynos_dsi_of_match[] = {
.compatible = "samsung,exynos5433-mipi-dsi",
.data = &exynos5433_dsi_pdata,
},
+ {
+ .compatible = "samsung,exynos7870-mipi-dsi",
+ .data = &exynos7870_dsi_pdata,
+ },
{ /* sentinel. */ }
};
MODULE_DEVICE_TABLE(of, exynos_dsi_of_match);
diff --git a/drivers/gpu/drm/gma500/oaktrail_hdmi.c b/drivers/gpu/drm/gma500/oaktrail_hdmi.c
index 1cf394369127..c0feca58511d 100644
--- a/drivers/gpu/drm/gma500/oaktrail_hdmi.c
+++ b/drivers/gpu/drm/gma500/oaktrail_hdmi.c
@@ -726,8 +726,8 @@ void oaktrail_hdmi_teardown(struct drm_device *dev)
if (hdmi_dev) {
pdev = hdmi_dev->dev;
- pci_set_drvdata(pdev, NULL);
oaktrail_hdmi_i2c_exit(pdev);
+ pci_set_drvdata(pdev, NULL);
iounmap(hdmi_dev->regs);
kfree(hdmi_dev);
pci_dev_put(pdev);
diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index 853543443072..e58c0c158b3a 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -32,6 +32,7 @@ i915-y += \
i915_scatterlist.o \
i915_switcheroo.o \
i915_sysfs.o \
+ i915_timer_util.o \
i915_utils.o \
intel_clock_gating.o \
intel_cpu_info.o \
@@ -280,6 +281,7 @@ i915-y += \
display/intel_modeset_setup.o \
display/intel_modeset_verify.o \
display/intel_overlay.o \
+ display/intel_panic.o \
display/intel_pch.o \
display/intel_pch_display.o \
display/intel_pch_refclk.o \
diff --git a/drivers/gpu/drm/i915/display/i9xx_plane.c b/drivers/gpu/drm/i915/display/i9xx_plane.c
index 3eb96d8abba8..407deb5dfb57 100644
--- a/drivers/gpu/drm/i915/display/i9xx_plane.c
+++ b/drivers/gpu/drm/i915/display/i9xx_plane.c
@@ -15,7 +15,6 @@
#include "i9xx_plane.h"
#include "i9xx_plane_regs.h"
#include "intel_atomic.h"
-#include "intel_bo.h"
#include "intel_de.h"
#include "intel_display_irq.h"
#include "intel_display_regs.h"
@@ -23,6 +22,7 @@
#include "intel_fb.h"
#include "intel_fbc.h"
#include "intel_frontbuffer.h"
+#include "intel_panic.h"
#include "intel_plane.h"
#include "intel_sprite.h"
@@ -1178,7 +1178,7 @@ i9xx_get_initial_plane_config(struct intel_crtc *crtc,
drm_WARN_ON(display->drm, pipe != crtc->pipe);
- intel_fb = intel_bo_alloc_framebuffer();
+ intel_fb = intel_framebuffer_alloc();
if (!intel_fb) {
drm_dbg_kms(display->drm, "failed to alloc fb\n");
return;
diff --git a/drivers/gpu/drm/i915/display/intel_bo.c b/drivers/gpu/drm/i915/display/intel_bo.c
index d29c1508ccb9..6ae1374d5c2b 100644
--- a/drivers/gpu/drm/i915/display/intel_bo.c
+++ b/drivers/gpu/drm/i915/display/intel_bo.c
@@ -59,18 +59,3 @@ void intel_bo_describe(struct seq_file *m, struct drm_gem_object *obj)
{
i915_debugfs_describe_obj(m, to_intel_bo(obj));
}
-
-struct intel_framebuffer *intel_bo_alloc_framebuffer(void)
-{
- return i915_gem_object_alloc_framebuffer();
-}
-
-int intel_bo_panic_setup(struct drm_scanout_buffer *sb)
-{
- return i915_gem_object_panic_setup(sb);
-}
-
-void intel_bo_panic_finish(struct intel_framebuffer *fb)
-{
- return i915_gem_object_panic_finish(fb);
-}
diff --git a/drivers/gpu/drm/i915/display/intel_bo.h b/drivers/gpu/drm/i915/display/intel_bo.h
index 97087a64d23b..48d87019e48a 100644
--- a/drivers/gpu/drm/i915/display/intel_bo.h
+++ b/drivers/gpu/drm/i915/display/intel_bo.h
@@ -25,8 +25,5 @@ struct intel_frontbuffer *intel_bo_set_frontbuffer(struct drm_gem_object *obj,
struct intel_frontbuffer *front);
void intel_bo_describe(struct seq_file *m, struct drm_gem_object *obj);
-struct intel_framebuffer *intel_bo_alloc_framebuffer(void);
-int intel_bo_panic_setup(struct drm_scanout_buffer *sb);
-void intel_bo_panic_finish(struct intel_framebuffer *fb);
#endif /* __INTEL_BO__ */
diff --git a/drivers/gpu/drm/i915/display/intel_ddi.c b/drivers/gpu/drm/i915/display/intel_ddi.c
index 46017091bb0b..c09aa759f4d4 100644
--- a/drivers/gpu/drm/i915/display/intel_ddi.c
+++ b/drivers/gpu/drm/i915/display/intel_ddi.c
@@ -26,6 +26,7 @@
*/
#include <linux/iopoll.h>
+#include <linux/seq_buf.h>
#include <linux/string_helpers.h>
#include <drm/display/drm_dp_helper.h>
@@ -596,8 +597,9 @@ intel_ddi_transcoder_func_reg_val_get(struct intel_encoder *encoder,
enum transcoder master;
master = crtc_state->mst_master_transcoder;
- drm_WARN_ON(display->drm,
- master == INVALID_TRANSCODER);
+ if (drm_WARN_ON(display->drm,
+ master == INVALID_TRANSCODER))
+ master = TRANSCODER_A;
temp |= TRANS_DDI_MST_TRANSPORT_SELECT(master);
}
} else {
@@ -5067,11 +5069,45 @@ static bool port_in_use(struct intel_display *display, enum port port)
return false;
}
+static const char *intel_ddi_encoder_name(struct intel_display *display,
+ enum port port, enum phy phy,
+ struct seq_buf *s)
+{
+ if (DISPLAY_VER(display) >= 13 && port >= PORT_D_XELPD) {
+ seq_buf_printf(s, "DDI %c/PHY %c",
+ port_name(port - PORT_D_XELPD + PORT_D),
+ phy_name(phy));
+ } else if (DISPLAY_VER(display) >= 12) {
+ enum tc_port tc_port = intel_port_to_tc(display, port);
+
+ seq_buf_printf(s, "DDI %s%c/PHY %s%c",
+ port >= PORT_TC1 ? "TC" : "",
+ port >= PORT_TC1 ? port_tc_name(port) : port_name(port),
+ tc_port != TC_PORT_NONE ? "TC" : "",
+ tc_port != TC_PORT_NONE ? tc_port_name(tc_port) : phy_name(phy));
+ } else if (DISPLAY_VER(display) >= 11) {
+ enum tc_port tc_port = intel_port_to_tc(display, port);
+
+ seq_buf_printf(s, "DDI %c%s/PHY %s%c",
+ port_name(port),
+ port >= PORT_C ? " (TC)" : "",
+ tc_port != TC_PORT_NONE ? "TC" : "",
+ tc_port != TC_PORT_NONE ? tc_port_name(tc_port) : phy_name(phy));
+ } else {
+ seq_buf_printf(s, "DDI %c/PHY %c", port_name(port), phy_name(phy));
+ }
+
+ drm_WARN_ON(display->drm, seq_buf_has_overflowed(s));
+
+ return seq_buf_str(s);
+}
+
void intel_ddi_init(struct intel_display *display,
const struct intel_bios_encoder_data *devdata)
{
struct intel_digital_port *dig_port;
struct intel_encoder *encoder;
+ DECLARE_SEQ_BUF(encoder_name, 20);
bool init_hdmi, init_dp;
enum port port;
enum phy phy;
@@ -5156,37 +5192,9 @@ void intel_ddi_init(struct intel_display *display,
encoder = &dig_port->base;
encoder->devdata = devdata;
- if (DISPLAY_VER(display) >= 13 && port >= PORT_D_XELPD) {
- drm_encoder_init(display->drm, &encoder->base, &intel_ddi_funcs,
- DRM_MODE_ENCODER_TMDS,
- "DDI %c/PHY %c",
- port_name(port - PORT_D_XELPD + PORT_D),
- phy_name(phy));
- } else if (DISPLAY_VER(display) >= 12) {
- enum tc_port tc_port = intel_port_to_tc(display, port);
-
- drm_encoder_init(display->drm, &encoder->base, &intel_ddi_funcs,
- DRM_MODE_ENCODER_TMDS,
- "DDI %s%c/PHY %s%c",
- port >= PORT_TC1 ? "TC" : "",
- port >= PORT_TC1 ? port_tc_name(port) : port_name(port),
- tc_port != TC_PORT_NONE ? "TC" : "",
- tc_port != TC_PORT_NONE ? tc_port_name(tc_port) : phy_name(phy));
- } else if (DISPLAY_VER(display) >= 11) {
- enum tc_port tc_port = intel_port_to_tc(display, port);
-
- drm_encoder_init(display->drm, &encoder->base, &intel_ddi_funcs,
- DRM_MODE_ENCODER_TMDS,
- "DDI %c%s/PHY %s%c",
- port_name(port),
- port >= PORT_C ? " (TC)" : "",
- tc_port != TC_PORT_NONE ? "TC" : "",
- tc_port != TC_PORT_NONE ? tc_port_name(tc_port) : phy_name(phy));
- } else {
- drm_encoder_init(display->drm, &encoder->base, &intel_ddi_funcs,
- DRM_MODE_ENCODER_TMDS,
- "DDI %c/PHY %c", port_name(port), phy_name(phy));
- }
+ drm_encoder_init(display->drm, &encoder->base, &intel_ddi_funcs,
+ DRM_MODE_ENCODER_TMDS, "%s",
+ intel_ddi_encoder_name(display, port, phy, &encoder_name));
intel_encoder_link_check_init(encoder, intel_ddi_link_check);
diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c
index c1a3a95c65f0..5dca7f96b425 100644
--- a/drivers/gpu/drm/i915/display/intel_display.c
+++ b/drivers/gpu/drm/i915/display/intel_display.c
@@ -7271,6 +7271,9 @@ static void intel_atomic_dsb_finish(struct intel_atomic_state *state,
intel_psr_trigger_frame_change_event(new_crtc_state->dsb_commit,
state, crtc);
+ intel_psr_wait_for_idle_dsb(new_crtc_state->dsb_commit,
+ new_crtc_state);
+
if (new_crtc_state->use_dsb)
intel_dsb_vblank_evade(state, new_crtc_state->dsb_commit);
diff --git a/drivers/gpu/drm/i915/display/intel_display_device.c b/drivers/gpu/drm/i915/display/intel_display_device.c
index 65f0efc35bb7..a002bc6ce7b0 100644
--- a/drivers/gpu/drm/i915/display/intel_display_device.c
+++ b/drivers/gpu/drm/i915/display/intel_display_device.c
@@ -1944,6 +1944,11 @@ void intel_display_device_info_print(const struct intel_display_device_info *inf
drm_printf(p, "rawclk rate: %u kHz\n", runtime->rawclk_freq);
}
+bool intel_display_device_present(struct intel_display *display)
+{
+ return display && HAS_DISPLAY(display);
+}
+
/*
* Assuming the device has display hardware, should it be enabled?
*
diff --git a/drivers/gpu/drm/i915/display/intel_display_device.h b/drivers/gpu/drm/i915/display/intel_display_device.h
index 6e87b763fe7c..f329f1beafef 100644
--- a/drivers/gpu/drm/i915/display/intel_display_device.h
+++ b/drivers/gpu/drm/i915/display/intel_display_device.h
@@ -306,6 +306,7 @@ struct intel_display_device_info {
} color;
};
+bool intel_display_device_present(struct intel_display *display);
bool intel_display_device_enabled(struct intel_display *display);
struct intel_display *intel_display_device_probe(struct pci_dev *pdev);
void intel_display_device_remove(struct intel_display *display);
diff --git a/drivers/gpu/drm/i915/display/intel_display_types.h b/drivers/gpu/drm/i915/display/intel_display_types.h
index fd9d2527889b..358ab922d7a7 100644
--- a/drivers/gpu/drm/i915/display/intel_display_types.h
+++ b/drivers/gpu/drm/i915/display/intel_display_types.h
@@ -60,6 +60,7 @@ struct intel_ddi_buf_trans;
struct intel_fbc;
struct intel_global_objs_state;
struct intel_hdcp_shim;
+struct intel_panic;
struct intel_tc_port;
/*
@@ -149,6 +150,7 @@ struct intel_framebuffer {
unsigned int vtd_guard;
unsigned int (*panic_tiling)(unsigned int x, unsigned int y, unsigned int width);
+ struct intel_panic *panic;
};
enum intel_hotplug_state {
diff --git a/drivers/gpu/drm/i915/display/intel_dp_aux_backlight.c b/drivers/gpu/drm/i915/display/intel_dp_aux_backlight.c
index 12084a542fc5..eb05ef4bd9f6 100644
--- a/drivers/gpu/drm/i915/display/intel_dp_aux_backlight.c
+++ b/drivers/gpu/drm/i915/display/intel_dp_aux_backlight.c
@@ -508,9 +508,6 @@ static void intel_dp_aux_vesa_disable_backlight(const struct drm_connector_state
struct intel_panel *panel = &connector->panel;
struct intel_dp *intel_dp = enc_to_intel_dp(connector->encoder);
- if (panel->backlight.edp.vesa.luminance_control_support)
- return;
-
drm_edp_backlight_disable(&intel_dp->aux, &panel->backlight.edp.vesa.info);
if (!panel->backlight.edp.vesa.info.aux_enable)
@@ -533,7 +530,7 @@ static int intel_dp_aux_vesa_setup_backlight(struct intel_connector *connector,
luminance_range->max_luminance,
panel->vbt.backlight.pwm_freq_hz,
intel_dp->edp_dpcd, &current_level, &current_mode,
- false);
+ panel->backlight.edp.vesa.luminance_control_support);
if (ret < 0)
return ret;
diff --git a/drivers/gpu/drm/i915/display/intel_fb.c b/drivers/gpu/drm/i915/display/intel_fb.c
index b210c3250501..22a4a1575d22 100644
--- a/drivers/gpu/drm/i915/display/intel_fb.c
+++ b/drivers/gpu/drm/i915/display/intel_fb.c
@@ -20,6 +20,7 @@
#include "intel_fb.h"
#include "intel_fb_bo.h"
#include "intel_frontbuffer.h"
+#include "intel_panic.h"
#include "intel_plane.h"
#define check_array_bounds(display, a, i) drm_WARN_ON((display)->drm, (i) >= ARRAY_SIZE(a))
@@ -2343,6 +2344,26 @@ intel_user_framebuffer_create(struct drm_device *dev,
return fb;
}
+struct intel_framebuffer *intel_framebuffer_alloc(void)
+{
+ struct intel_framebuffer *intel_fb;
+ struct intel_panic *panic;
+
+ intel_fb = kzalloc(sizeof(*intel_fb), GFP_KERNEL);
+ if (!intel_fb)
+ return NULL;
+
+ panic = intel_panic_alloc();
+ if (!panic) {
+ kfree(intel_fb);
+ return NULL;
+ }
+
+ intel_fb->panic = panic;
+
+ return intel_fb;
+}
+
struct drm_framebuffer *
intel_framebuffer_create(struct drm_gem_object *obj,
const struct drm_format_info *info,
@@ -2351,7 +2372,7 @@ intel_framebuffer_create(struct drm_gem_object *obj,
struct intel_framebuffer *intel_fb;
int ret;
- intel_fb = intel_bo_alloc_framebuffer();
+ intel_fb = intel_framebuffer_alloc();
if (!intel_fb)
return ERR_PTR(-ENOMEM);
diff --git a/drivers/gpu/drm/i915/display/intel_fb.h b/drivers/gpu/drm/i915/display/intel_fb.h
index 403b8b63721a..22514d5f2bb6 100644
--- a/drivers/gpu/drm/i915/display/intel_fb.h
+++ b/drivers/gpu/drm/i915/display/intel_fb.h
@@ -104,6 +104,9 @@ int intel_framebuffer_init(struct intel_framebuffer *ifb,
struct drm_gem_object *obj,
const struct drm_format_info *info,
struct drm_mode_fb_cmd2 *mode_cmd);
+
+struct intel_framebuffer *intel_framebuffer_alloc(void);
+
struct drm_framebuffer *
intel_framebuffer_create(struct drm_gem_object *obj,
const struct drm_format_info *info,
diff --git a/drivers/gpu/drm/i915/display/intel_fbc.c b/drivers/gpu/drm/i915/display/intel_fbc.c
index d4c5deff9cbe..0d380c825791 100644
--- a/drivers/gpu/drm/i915/display/intel_fbc.c
+++ b/drivers/gpu/drm/i915/display/intel_fbc.c
@@ -98,11 +98,7 @@ struct intel_fbc {
struct intel_display *display;
const struct intel_fbc_funcs *funcs;
- /*
- * This is always the inner lock when overlapping with
- * struct_mutex and it's the outer lock when overlapping
- * with stolen_lock.
- */
+ /* This is always the outer lock when overlapping with stolen_lock */
struct mutex lock;
unsigned int busy_bits;
@@ -383,11 +379,11 @@ static void i8xx_fbc_program_cfb(struct intel_fbc *fbc)
struct drm_i915_private *i915 = to_i915(display->drm);
drm_WARN_ON(display->drm,
- range_overflows_end_t(u64, i915_gem_stolen_area_address(i915),
+ range_end_overflows_t(u64, i915_gem_stolen_area_address(i915),
i915_gem_stolen_node_offset(&fbc->compressed_fb),
U32_MAX));
drm_WARN_ON(display->drm,
- range_overflows_end_t(u64, i915_gem_stolen_area_address(i915),
+ range_end_overflows_t(u64, i915_gem_stolen_area_address(i915),
i915_gem_stolen_node_offset(&fbc->compressed_llb),
U32_MAX));
intel_de_write(display, FBC_CFB_BASE,
@@ -1550,14 +1546,14 @@ static int intel_fbc_check_plane(struct intel_atomic_state *state,
* having a Y offset that isn't divisible by 4 causes FIFO underrun
* and screen flicker.
*/
- if (DISPLAY_VER(display) >= 9 &&
+ if (IS_DISPLAY_VER(display, 9, 12) &&
plane_state->view.color_plane[0].y & 3) {
plane_state->no_fbc_reason = "plane start Y offset misaligned";
return 0;
}
/* Wa_22010751166: icl, ehl, tgl, dg1, rkl */
- if (DISPLAY_VER(display) >= 11 &&
+ if (IS_DISPLAY_VER(display, 9, 12) &&
(plane_state->view.color_plane[0].y +
(drm_rect_height(&plane_state->uapi.src) >> 16)) & 3) {
plane_state->no_fbc_reason = "plane end Y offset misaligned";
diff --git a/drivers/gpu/drm/i915/display/intel_panic.c b/drivers/gpu/drm/i915/display/intel_panic.c
new file mode 100644
index 000000000000..7311ce4e8b6c
--- /dev/null
+++ b/drivers/gpu/drm/i915/display/intel_panic.c
@@ -0,0 +1,27 @@
+// SPDX-License-Identifier: MIT
+/* Copyright © 2025 Intel Corporation */
+
+#include <drm/drm_panic.h>
+
+#include "gem/i915_gem_object.h"
+#include "intel_display_types.h"
+#include "intel_fb.h"
+#include "intel_panic.h"
+
+struct intel_panic *intel_panic_alloc(void)
+{
+ return i915_gem_object_alloc_panic();
+}
+
+int intel_panic_setup(struct intel_panic *panic, struct drm_scanout_buffer *sb)
+{
+ struct intel_framebuffer *fb = sb->private;
+ struct drm_gem_object *obj = intel_fb_bo(&fb->base);
+
+ return i915_gem_object_panic_setup(panic, sb, obj, fb->panic_tiling);
+}
+
+void intel_panic_finish(struct intel_panic *panic)
+{
+ return i915_gem_object_panic_finish(panic);
+}
diff --git a/drivers/gpu/drm/i915/display/intel_panic.h b/drivers/gpu/drm/i915/display/intel_panic.h
new file mode 100644
index 000000000000..afb472e924aa
--- /dev/null
+++ b/drivers/gpu/drm/i915/display/intel_panic.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: MIT */
+/* Copyright © 2025 Intel Corporation */
+
+#ifndef __INTEL_PANIC_H__
+#define __INTEL_PANIC_H__
+
+struct drm_scanout_buffer;
+struct intel_panic;
+
+struct intel_panic *intel_panic_alloc(void);
+int intel_panic_setup(struct intel_panic *panic, struct drm_scanout_buffer *sb);
+void intel_panic_finish(struct intel_panic *panic);
+
+#endif /* __INTEL_PANIC_H__ */
diff --git a/drivers/gpu/drm/i915/display/intel_plane.c b/drivers/gpu/drm/i915/display/intel_plane.c
index 81f05ee9a21a..2329f09d413d 100644
--- a/drivers/gpu/drm/i915/display/intel_plane.c
+++ b/drivers/gpu/drm/i915/display/intel_plane.c
@@ -47,7 +47,6 @@
#include "gem/i915_gem_object.h"
#include "i915_scheduler_types.h"
#include "i9xx_plane_regs.h"
-#include "intel_bo.h"
#include "intel_cdclk.h"
#include "intel_cursor.h"
#include "intel_display_rps.h"
@@ -56,6 +55,7 @@
#include "intel_fb.h"
#include "intel_fb_pin.h"
#include "intel_fbdev.h"
+#include "intel_panic.h"
#include "intel_plane.h"
#include "intel_psr.h"
#include "skl_scaler.h"
@@ -1326,7 +1326,7 @@ static void intel_panic_flush(struct drm_plane *plane)
struct drm_framebuffer *fb = plane_state->hw.fb;
struct intel_framebuffer *intel_fb = to_intel_framebuffer(fb);
- intel_bo_panic_finish(intel_fb);
+ intel_panic_finish(intel_fb->panic);
if (crtc_state->enable_psr2_sel_fetch) {
/* Force a full update for psr2 */
@@ -1409,7 +1409,7 @@ static int intel_get_scanout_buffer(struct drm_plane *plane,
return -EOPNOTSUPP;
}
sb->private = intel_fb;
- ret = intel_bo_panic_setup(sb);
+ ret = intel_panic_setup(intel_fb->panic, sb);
if (ret)
return ret;
}
diff --git a/drivers/gpu/drm/i915/display/intel_psr.c b/drivers/gpu/drm/i915/display/intel_psr.c
index 22433fe2ee14..01bf304c705f 100644
--- a/drivers/gpu/drm/i915/display/intel_psr.c
+++ b/drivers/gpu/drm/i915/display/intel_psr.c
@@ -42,6 +42,7 @@
#include "intel_dmc.h"
#include "intel_dp.h"
#include "intel_dp_aux.h"
+#include "intel_dsb.h"
#include "intel_frontbuffer.h"
#include "intel_hdmi.h"
#include "intel_psr.h"
@@ -494,12 +495,14 @@ static u8 intel_dp_get_su_capability(struct intel_dp *intel_dp)
{
u8 su_capability = 0;
- if (intel_dp->psr.sink_panel_replay_su_support)
- drm_dp_dpcd_readb(&intel_dp->aux,
- DP_PANEL_REPLAY_CAP_CAPABILITY,
- &su_capability);
- else
+ if (intel_dp->psr.sink_panel_replay_su_support) {
+ if (drm_dp_dpcd_read_byte(&intel_dp->aux,
+ DP_PANEL_REPLAY_CAP_CAPABILITY,
+ &su_capability) < 0)
+ return 0;
+ } else {
su_capability = intel_dp->psr_dpcd[1];
+ }
return su_capability;
}
@@ -2997,35 +3000,57 @@ void intel_psr_post_plane_update(struct intel_atomic_state *state,
}
}
-static int _psr2_ready_for_pipe_update_locked(struct intel_dp *intel_dp)
+/*
+ * From bspec: Panel Self Refresh (BDW+)
+ * Max. time for PSR to idle = Inverse of the refresh rate + 6 ms of
+ * exit training time + 1.5 ms of aux channel handshake. 50 ms is
+ * defensive enough to cover everything.
+ */
+#define PSR_IDLE_TIMEOUT_MS 50
+
+static int
+_psr2_ready_for_pipe_update_locked(const struct intel_crtc_state *new_crtc_state,
+ struct intel_dsb *dsb)
{
- struct intel_display *display = to_intel_display(intel_dp);
- enum transcoder cpu_transcoder = intel_dp->psr.transcoder;
+ struct intel_display *display = to_intel_display(new_crtc_state);
+ enum transcoder cpu_transcoder = new_crtc_state->cpu_transcoder;
/*
* Any state lower than EDP_PSR2_STATUS_STATE_DEEP_SLEEP is enough.
* As all higher states has bit 4 of PSR2 state set we can just wait for
* EDP_PSR2_STATUS_STATE_DEEP_SLEEP to be cleared.
*/
+ if (dsb) {
+ intel_dsb_poll(dsb, EDP_PSR2_STATUS(display, cpu_transcoder),
+ EDP_PSR2_STATUS_STATE_DEEP_SLEEP, 0, 200,
+ PSR_IDLE_TIMEOUT_MS * 1000 / 200);
+ return true;
+ }
+
return intel_de_wait_for_clear(display,
EDP_PSR2_STATUS(display, cpu_transcoder),
- EDP_PSR2_STATUS_STATE_DEEP_SLEEP, 50);
+ EDP_PSR2_STATUS_STATE_DEEP_SLEEP,
+ PSR_IDLE_TIMEOUT_MS);
}
-static int _psr1_ready_for_pipe_update_locked(struct intel_dp *intel_dp)
+static int
+_psr1_ready_for_pipe_update_locked(const struct intel_crtc_state *new_crtc_state,
+ struct intel_dsb *dsb)
{
- struct intel_display *display = to_intel_display(intel_dp);
- enum transcoder cpu_transcoder = intel_dp->psr.transcoder;
+ struct intel_display *display = to_intel_display(new_crtc_state);
+ enum transcoder cpu_transcoder = new_crtc_state->cpu_transcoder;
+
+ if (dsb) {
+ intel_dsb_poll(dsb, psr_status_reg(display, cpu_transcoder),
+ EDP_PSR_STATUS_STATE_MASK, 0, 200,
+ PSR_IDLE_TIMEOUT_MS * 1000 / 200);
+ return true;
+ }
- /*
- * From bspec: Panel Self Refresh (BDW+)
- * Max. time for PSR to idle = Inverse of the refresh rate + 6 ms of
- * exit training time + 1.5 ms of aux channel handshake. 50 ms is
- * defensive enough to cover everything.
- */
return intel_de_wait_for_clear(display,
psr_status_reg(display, cpu_transcoder),
- EDP_PSR_STATUS_STATE_MASK, 50);
+ EDP_PSR_STATUS_STATE_MASK,
+ PSR_IDLE_TIMEOUT_MS);
}
/**
@@ -3054,9 +3079,11 @@ void intel_psr_wait_for_idle_locked(const struct intel_crtc_state *new_crtc_stat
continue;
if (intel_dp->psr.sel_update_enabled)
- ret = _psr2_ready_for_pipe_update_locked(intel_dp);
+ ret = _psr2_ready_for_pipe_update_locked(new_crtc_state,
+ NULL);
else
- ret = _psr1_ready_for_pipe_update_locked(intel_dp);
+ ret = _psr1_ready_for_pipe_update_locked(new_crtc_state,
+ NULL);
if (ret)
drm_err(display->drm,
@@ -3064,6 +3091,18 @@ void intel_psr_wait_for_idle_locked(const struct intel_crtc_state *new_crtc_stat
}
}
+void intel_psr_wait_for_idle_dsb(struct intel_dsb *dsb,
+ const struct intel_crtc_state *new_crtc_state)
+{
+ if (!new_crtc_state->has_psr || new_crtc_state->has_panel_replay)
+ return;
+
+ if (new_crtc_state->has_sel_update)
+ _psr2_ready_for_pipe_update_locked(new_crtc_state, dsb);
+ else
+ _psr1_ready_for_pipe_update_locked(new_crtc_state, dsb);
+}
+
static bool __psr_wait_for_idle_locked(struct intel_dp *intel_dp)
{
struct intel_display *display = to_intel_display(intel_dp);
diff --git a/drivers/gpu/drm/i915/display/intel_psr.h b/drivers/gpu/drm/i915/display/intel_psr.h
index 9b061a22361f..077751aa599f 100644
--- a/drivers/gpu/drm/i915/display/intel_psr.h
+++ b/drivers/gpu/drm/i915/display/intel_psr.h
@@ -52,6 +52,8 @@ void intel_psr_get_config(struct intel_encoder *encoder,
void intel_psr_irq_handler(struct intel_dp *intel_dp, u32 psr_iir);
void intel_psr_short_pulse(struct intel_dp *intel_dp);
void intel_psr_wait_for_idle_locked(const struct intel_crtc_state *new_crtc_state);
+void intel_psr_wait_for_idle_dsb(struct intel_dsb *dsb,
+ const struct intel_crtc_state *new_crtc_state);
bool intel_psr_enabled(struct intel_dp *intel_dp);
int intel_psr2_sel_fetch_update(struct intel_atomic_state *state,
struct intel_crtc *crtc);
diff --git a/drivers/gpu/drm/i915/display/skl_universal_plane.c b/drivers/gpu/drm/i915/display/skl_universal_plane.c
index 950dc79dbdd4..e13fb781e7b2 100644
--- a/drivers/gpu/drm/i915/display/skl_universal_plane.c
+++ b/drivers/gpu/drm/i915/display/skl_universal_plane.c
@@ -20,6 +20,7 @@
#include "intel_fb.h"
#include "intel_fbc.h"
#include "intel_frontbuffer.h"
+#include "intel_panic.h"
#include "intel_plane.h"
#include "intel_psr.h"
#include "intel_psr_regs.h"
@@ -3028,7 +3029,7 @@ skl_get_initial_plane_config(struct intel_crtc *crtc,
return;
}
- intel_fb = intel_bo_alloc_framebuffer();
+ intel_fb = intel_framebuffer_alloc();
if (!intel_fb) {
drm_dbg_kms(display->drm, "failed to alloc fb\n");
return;
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
index f243f8a5215d..39c7c32e1e74 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
@@ -182,7 +182,7 @@ enum {
* the object. Simple! ... The relocation entries are stored in user memory
* and so to access them we have to copy them into a local buffer. That copy
* has to avoid taking any pagefaults as they may lead back to a GEM object
- * requiring the struct_mutex (i.e. recursive deadlock). So once again we split
+ * requiring the vm->mutex (i.e. recursive deadlock). So once again we split
* the relocation into multiple passes. First we try to do everything within an
* atomic context (avoid the pagefaults) which requires that we never wait. If
* we detect that we may wait, or if we need to fault, then we have to fallback
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c b/drivers/gpu/drm/i915/gem/i915_gem_object.c
index 1f38e367c60b..478011e5ecb3 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c
@@ -459,8 +459,8 @@ static void i915_gem_free_object(struct drm_gem_object *gem_obj)
atomic_inc(&i915->mm.free_count);
/*
- * Since we require blocking on struct_mutex to unbind the freed
- * object from the GPU before releasing resources back to the
+ * Since we require blocking on drm_i915_gem_object->vma.lock to unbind
+ * the freed object from the GPU before releasing resources back to the
* system, we can not do that directly from the RCU callback (which may
* be a softirq context), but must instead then defer that work onto a
* kthread. We use the RCU callback rather than move the freed object
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h b/drivers/gpu/drm/i915/gem/i915_gem_object.h
index 565f8fa330db..148034ef504d 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h
@@ -16,9 +16,9 @@
#include "i915_gem_ww.h"
#include "i915_vma_types.h"
-struct drm_scanout_buffer;
enum intel_region_id;
-struct intel_framebuffer;
+struct drm_scanout_buffer;
+struct intel_panic;
#define obj_to_i915(obj__) to_i915((obj__)->base.dev)
@@ -693,9 +693,10 @@ i915_gem_object_unpin_pages(struct drm_i915_gem_object *obj)
int __i915_gem_object_put_pages(struct drm_i915_gem_object *obj);
int i915_gem_object_truncate(struct drm_i915_gem_object *obj);
-struct intel_framebuffer *i915_gem_object_alloc_framebuffer(void);
-int i915_gem_object_panic_setup(struct drm_scanout_buffer *sb);
-void i915_gem_object_panic_finish(struct intel_framebuffer *fb);
+struct intel_panic *i915_gem_object_alloc_panic(void);
+int i915_gem_object_panic_setup(struct intel_panic *panic, struct drm_scanout_buffer *sb,
+ struct drm_gem_object *_obj, bool panic_tiling);
+void i915_gem_object_panic_finish(struct intel_panic *panic);
/**
* i915_gem_object_pin_map - return a contiguous mapping of the entire object
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pages.c b/drivers/gpu/drm/i915/gem/i915_gem_pages.c
index c16a57160b26..3f09cbce05bb 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_pages.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_pages.c
@@ -357,23 +357,13 @@ static void *i915_gem_object_map_pfn(struct drm_i915_gem_object *obj,
return vaddr ?: ERR_PTR(-ENOMEM);
}
-struct i915_panic_data {
+struct intel_panic {
struct page **pages;
int page;
void *vaddr;
};
-struct i915_framebuffer {
- struct intel_framebuffer base;
- struct i915_panic_data panic;
-};
-
-static inline struct i915_panic_data *to_i915_panic_data(struct intel_framebuffer *fb)
-{
- return &container_of_const(fb, struct i915_framebuffer, base)->panic;
-}
-
-static void i915_panic_kunmap(struct i915_panic_data *panic)
+static void i915_panic_kunmap(struct intel_panic *panic)
{
if (panic->vaddr) {
drm_clflush_virt_range(panic->vaddr, PAGE_SIZE);
@@ -420,7 +410,7 @@ static void i915_gem_object_panic_page_set_pixel(struct drm_scanout_buffer *sb,
unsigned int new_page;
unsigned int offset;
struct intel_framebuffer *fb = (struct intel_framebuffer *)sb->private;
- struct i915_panic_data *panic = to_i915_panic_data(fb);
+ struct intel_panic *panic = fb->panic;
if (fb->panic_tiling)
offset = fb->panic_tiling(sb->width, x, y);
@@ -441,14 +431,13 @@ static void i915_gem_object_panic_page_set_pixel(struct drm_scanout_buffer *sb,
}
}
-struct intel_framebuffer *i915_gem_object_alloc_framebuffer(void)
+struct intel_panic *i915_gem_object_alloc_panic(void)
{
- struct i915_framebuffer *i915_fb;
+ struct intel_panic *panic;
+
+ panic = kzalloc(sizeof(*panic), GFP_KERNEL);
- i915_fb = kzalloc(sizeof(*i915_fb), GFP_KERNEL);
- if (i915_fb)
- return &i915_fb->base;
- return NULL;
+ return panic;
}
/*
@@ -456,12 +445,11 @@ struct intel_framebuffer *i915_gem_object_alloc_framebuffer(void)
* Use current vaddr if it exists, or setup a list of pages.
* pfn is not supported yet.
*/
-int i915_gem_object_panic_setup(struct drm_scanout_buffer *sb)
+int i915_gem_object_panic_setup(struct intel_panic *panic, struct drm_scanout_buffer *sb,
+ struct drm_gem_object *_obj, bool panic_tiling)
{
enum i915_map_type has_type;
- struct intel_framebuffer *fb = (struct intel_framebuffer *)sb->private;
- struct i915_panic_data *panic = to_i915_panic_data(fb);
- struct drm_i915_gem_object *obj = to_intel_bo(intel_fb_bo(&fb->base));
+ struct drm_i915_gem_object *obj = to_intel_bo(_obj);
void *ptr;
ptr = page_unpack_bits(obj->mm.mapping, &has_type);
@@ -471,7 +459,7 @@ int i915_gem_object_panic_setup(struct drm_scanout_buffer *sb)
else
iosys_map_set_vaddr(&sb->map[0], ptr);
- if (fb->panic_tiling)
+ if (panic_tiling)
sb->set_pixel = i915_gem_object_panic_map_set_pixel;
return 0;
}
@@ -486,10 +474,8 @@ int i915_gem_object_panic_setup(struct drm_scanout_buffer *sb)
return -EOPNOTSUPP;
}
-void i915_gem_object_panic_finish(struct intel_framebuffer *fb)
+void i915_gem_object_panic_finish(struct intel_panic *panic)
{
- struct i915_panic_data *panic = to_i915_panic_data(fb);
-
i915_panic_kunmap(panic);
panic->page = -1;
kfree(panic->pages);
@@ -779,7 +765,7 @@ __i915_gem_object_get_page(struct drm_i915_gem_object *obj, pgoff_t n)
GEM_BUG_ON(!i915_gem_object_has_struct_page(obj));
sg = i915_gem_object_get_sg(obj, n, &offset);
- return nth_page(sg_page(sg), offset);
+ return sg_page(sg) + offset;
}
/* Like i915_gem_object_get_page(), but mark the returned page dirty */
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
index e3d188455f67..b9dae15c1d16 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
@@ -514,6 +514,13 @@ static int __create_shmem(struct drm_i915_private *i915,
if (IS_ERR(filp))
return PTR_ERR(filp);
+ /*
+ * Prevent -EFBIG by allowing large writes beyond MAX_NON_LFS on shmem
+ * objects by setting O_LARGEFILE.
+ */
+ if (force_o_largefile())
+ filp->f_flags |= O_LARGEFILE;
+
obj->filp = filp;
return 0;
}
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c
index b81e67504bbe..7a3e74a6676e 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c
@@ -170,7 +170,7 @@ i915_gem_shrink(struct i915_gem_ww_ctx *ww,
* Also note that although these lists do not hold a reference to
* the object we can safely grab one here: The final object
* unreferencing and the bound_list are both protected by the
- * dev->struct_mutex and so we won't ever be able to observe an
+ * i915->mm.obj_lock and so we won't ever be able to observe an
* object on the bound_list with a reference count equals 0.
*/
for (phase = phases; phase->list; phase++) {
@@ -185,7 +185,7 @@ i915_gem_shrink(struct i915_gem_ww_ctx *ww,
/*
* We serialize our access to unreferenced objects through
- * the use of the struct_mutex. While the objects are not
+ * the use of the obj_lock. While the objects are not
* yet freed (due to RCU then a workqueue) we still want
* to be able to shrink their pages, so they remain on
* the unbound/bound list until actually freed.
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_wait.c b/drivers/gpu/drm/i915/gem/i915_gem_wait.c
index 991666fd9f85..54829801d3f7 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_wait.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_wait.c
@@ -217,10 +217,10 @@ static unsigned long to_wait_timeout(s64 timeout_ns)
*
* The wait ioctl with a timeout of 0 reimplements the busy ioctl. With any
* non-zero timeout parameter the wait ioctl will wait for the given number of
- * nanoseconds on an object becoming unbusy. Since the wait itself does so
- * without holding struct_mutex the object may become re-busied before this
- * function completes. A similar but shorter * race condition exists in the busy
- * ioctl
+ * nanoseconds on an object becoming unbusy. Since the wait occurs without
+ * holding a global or exclusive lock the object may become re-busied before
+ * this function completes. A similar but shorter * race condition exists
+ * in the busy ioctl
*/
int
i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
diff --git a/drivers/gpu/drm/i915/gem/i915_gemfs.c b/drivers/gpu/drm/i915/gem/i915_gemfs.c
index a09e2eb47175..8f13ec4ff0d0 100644
--- a/drivers/gpu/drm/i915/gem/i915_gemfs.c
+++ b/drivers/gpu/drm/i915/gem/i915_gemfs.c
@@ -11,11 +11,6 @@
#include "i915_gemfs.h"
#include "i915_utils.h"
-static int add_param(struct fs_context *fc, const char *key, const char *val)
-{
- return vfs_parse_fs_string(fc, key, val, strlen(val));
-}
-
void i915_gemfs_init(struct drm_i915_private *i915)
{
struct file_system_type *type;
@@ -48,9 +43,9 @@ void i915_gemfs_init(struct drm_i915_private *i915)
fc = fs_context_for_mount(type, SB_KERNMOUNT);
if (IS_ERR(fc))
goto err;
- ret = add_param(fc, "source", "tmpfs");
+ ret = vfs_parse_fs_string(fc, "source", "tmpfs");
if (!ret)
- ret = add_param(fc, "huge", "within_size");
+ ret = vfs_parse_fs_string(fc, "huge", "within_size");
if (!ret)
gemfs = fc_mount_longterm(fc);
put_fs_context(fc);
diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c
index e747f5ed195e..539c620364e3 100644
--- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c
@@ -5,7 +5,7 @@
#include "i915_selftest.h"
-#include "display/intel_display_core.h"
+#include "display/intel_display_device.h"
#include "gt/intel_context.h"
#include "gt/intel_engine_regs.h"
#include "gt/intel_engine_user.h"
@@ -122,7 +122,7 @@ static bool fastblit_supports_x_tiling(const struct drm_i915_private *i915)
if (GRAPHICS_VER_FULL(i915) < IP_VER(12, 55))
return false;
- return HAS_DISPLAY(display);
+ return intel_display_device_present(display);
}
static bool fast_blit_ok(const struct blit_buffer *buf)
diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h
index 98c7f6052069..10070ee4d74c 100644
--- a/drivers/gpu/drm/i915/gt/intel_context_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_context_types.h
@@ -14,7 +14,6 @@
#include "i915_active_types.h"
#include "i915_sw_fence.h"
-#include "i915_utils.h"
#include "intel_engine_types.h"
#include "intel_sseu.h"
#include "intel_wakeref.h"
diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
index 03baa7fa0a27..7f389cb0bde4 100644
--- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
+++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
@@ -106,14 +106,18 @@
* preemption, but just sampling the new tail pointer).
*
*/
+
#include <linux/interrupt.h>
#include <linux/string_helpers.h>
+#include "gen8_engine_cs.h"
#include "i915_drv.h"
+#include "i915_list_util.h"
#include "i915_reg.h"
+#include "i915_timer_util.h"
#include "i915_trace.h"
#include "i915_vgpu.h"
-#include "gen8_engine_cs.h"
+#include "i915_wait_util.h"
#include "intel_breadcrumbs.h"
#include "intel_context.h"
#include "intel_engine_heartbeat.h"
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_buffer_pool.c b/drivers/gpu/drm/i915/gt/intel_gt_buffer_pool.c
index 86b5a9ba323d..c7befc5c20d0 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_buffer_pool.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt_buffer_pool.c
@@ -7,6 +7,7 @@
#include "gem/i915_gem_object.h"
#include "i915_drv.h"
+#include "i915_list_util.h"
#include "intel_engine_pm.h"
#include "intel_gt_buffer_pool.h"
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_mcr.c b/drivers/gpu/drm/i915/gt/intel_gt_mcr.c
index a60822e2b5d4..c3afa321fe30 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_mcr.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt_mcr.c
@@ -4,6 +4,7 @@
*/
#include "i915_drv.h"
+#include "i915_wait_util.h"
#include "intel_gt.h"
#include "intel_gt_mcr.h"
#include "intel_gt_print.h"
diff --git a/drivers/gpu/drm/i915/gt/intel_rc6.c b/drivers/gpu/drm/i915/gt/intel_rc6.c
index 9ca42589da4d..bf38cc5fe872 100644
--- a/drivers/gpu/drm/i915/gt/intel_rc6.c
+++ b/drivers/gpu/drm/i915/gt/intel_rc6.c
@@ -341,7 +341,7 @@ static int vlv_rc6_init(struct intel_rc6 *rc6)
return PTR_ERR(pctx);
}
- GEM_BUG_ON(range_overflows_end_t(u64,
+ GEM_BUG_ON(range_end_overflows_t(u64,
i915->dsm.stolen.start,
pctx->stolen->start,
U32_MAX));
diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c
index 4a1675dea1c7..41b5036dc538 100644
--- a/drivers/gpu/drm/i915/gt/intel_reset.c
+++ b/drivers/gpu/drm/i915/gt/intel_reset.c
@@ -9,18 +9,17 @@
#include "display/intel_display_reset.h"
#include "display/intel_overlay.h"
-
#include "gem/i915_gem_context.h"
-
#include "gt/intel_gt_regs.h"
-
#include "gt/uc/intel_gsc_fw.h"
+#include "uc/intel_guc.h"
#include "i915_drv.h"
#include "i915_file_private.h"
#include "i915_gpu_error.h"
#include "i915_irq.h"
#include "i915_reg.h"
+#include "i915_wait_util.h"
#include "intel_breadcrumbs.h"
#include "intel_engine_pm.h"
#include "intel_engine_regs.h"
@@ -32,8 +31,6 @@
#include "intel_pci_config.h"
#include "intel_reset.h"
-#include "uc/intel_guc.h"
-
#define RESET_MAX_RETRIES 3
static void client_mark_guilty(struct i915_gem_context *ctx, bool banned)
diff --git a/drivers/gpu/drm/i915/gt/intel_reset_types.h b/drivers/gpu/drm/i915/gt/intel_reset_types.h
index 4f5fd393af6f..ee4eb574a219 100644
--- a/drivers/gpu/drm/i915/gt/intel_reset_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_reset_types.h
@@ -20,7 +20,7 @@ struct intel_reset {
* FENCE registers).
*
* #I915_RESET_ENGINE[num_engines] - Since the driver doesn't need to
- * acquire the struct_mutex to reset an engine, we need an explicit
+ * acquire a global lock to reset an engine, we need an explicit
* flag to prevent two concurrent reset attempts in the same engine.
* As the number of engines continues to grow, allocate the flags from
* the most significant bits.
diff --git a/drivers/gpu/drm/i915/gt/intel_ring_submission.c b/drivers/gpu/drm/i915/gt/intel_ring_submission.c
index 2a6d79abf25b..8314a4b0505e 100644
--- a/drivers/gpu/drm/i915/gt/intel_ring_submission.c
+++ b/drivers/gpu/drm/i915/gt/intel_ring_submission.c
@@ -15,18 +15,19 @@
#include "i915_irq.h"
#include "i915_mitigations.h"
#include "i915_reg.h"
+#include "i915_wait_util.h"
#include "intel_breadcrumbs.h"
#include "intel_context.h"
+#include "intel_engine_heartbeat.h"
+#include "intel_engine_pm.h"
#include "intel_engine_regs.h"
#include "intel_gt.h"
#include "intel_gt_irq.h"
+#include "intel_gt_print.h"
#include "intel_gt_regs.h"
#include "intel_reset.h"
#include "intel_ring.h"
#include "shmem_utils.h"
-#include "intel_engine_heartbeat.h"
-#include "intel_engine_pm.h"
-#include "intel_gt_print.h"
/* Rough estimate of the typical request size, performing a flush,
* set-context and then emitting the batch.
diff --git a/drivers/gpu/drm/i915/gt/intel_rps.c b/drivers/gpu/drm/i915/gt/intel_rps.c
index 006042e0b229..4da94098bd3e 100644
--- a/drivers/gpu/drm/i915/gt/intel_rps.c
+++ b/drivers/gpu/drm/i915/gt/intel_rps.c
@@ -10,9 +10,11 @@
#include "display/intel_display.h"
#include "display/intel_display_rps.h"
#include "soc/intel_dram.h"
+
#include "i915_drv.h"
#include "i915_irq.h"
#include "i915_reg.h"
+#include "i915_wait_util.h"
#include "intel_breadcrumbs.h"
#include "intel_gt.h"
#include "intel_gt_clock_utils.h"
diff --git a/drivers/gpu/drm/i915/gt/intel_timeline.h b/drivers/gpu/drm/i915/gt/intel_timeline.h
index 57308c4d664a..85b43f9b9d95 100644
--- a/drivers/gpu/drm/i915/gt/intel_timeline.h
+++ b/drivers/gpu/drm/i915/gt/intel_timeline.h
@@ -9,6 +9,7 @@
#include <linux/lockdep.h>
#include "i915_active.h"
+#include "i915_list_util.h"
#include "i915_syncmap.h"
#include "intel_timeline_types.h"
diff --git a/drivers/gpu/drm/i915/gt/selftest_tlb.c b/drivers/gpu/drm/i915/gt/selftest_tlb.c
index 69ed946a39e5..a5184f09d1de 100644
--- a/drivers/gpu/drm/i915/gt/selftest_tlb.c
+++ b/drivers/gpu/drm/i915/gt/selftest_tlb.c
@@ -3,17 +3,17 @@
* Copyright © 2022 Intel Corporation
*/
-#include "i915_selftest.h"
-
#include "gem/i915_gem_internal.h"
#include "gem/i915_gem_lmem.h"
#include "gem/i915_gem_region.h"
#include "gen8_engine_cs.h"
#include "i915_gem_ww.h"
+#include "i915_selftest.h"
+#include "i915_wait_util.h"
+#include "intel_context.h"
#include "intel_engine_regs.h"
#include "intel_gpu_commands.h"
-#include "intel_context.h"
#include "intel_gt.h"
#include "intel_ring.h"
diff --git a/drivers/gpu/drm/i915/gt/sysfs_engines.c b/drivers/gpu/drm/i915/gt/sysfs_engines.c
index aab2759067d2..4a81bc396b21 100644
--- a/drivers/gpu/drm/i915/gt/sysfs_engines.c
+++ b/drivers/gpu/drm/i915/gt/sysfs_engines.c
@@ -7,6 +7,7 @@
#include <linux/sysfs.h>
#include "i915_drv.h"
+#include "i915_timer_util.h"
#include "intel_engine.h"
#include "intel_engine_heartbeat.h"
#include "sysfs_engines.h"
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_gsc_proxy.c b/drivers/gpu/drm/i915/gt/uc/intel_gsc_proxy.c
index d8edd7c054c8..e7444ebc373e 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_gsc_proxy.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_gsc_proxy.c
@@ -10,11 +10,13 @@
#include "gt/intel_gt.h"
#include "gt/intel_gt_print.h"
+
+#include "i915_drv.h"
+#include "i915_reg.h"
+#include "i915_wait_util.h"
#include "intel_gsc_proxy.h"
#include "intel_gsc_uc.h"
#include "intel_gsc_uc_heci_cmd_submit.h"
-#include "i915_drv.h"
-#include "i915_reg.h"
/*
* GSC proxy:
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_gsc_uc_heci_cmd_submit.c b/drivers/gpu/drm/i915/gt/uc/intel_gsc_uc_heci_cmd_submit.c
index 2fde5c360cff..9bd29be7656f 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_gsc_uc_heci_cmd_submit.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_gsc_uc_heci_cmd_submit.c
@@ -8,6 +8,8 @@
#include "gt/intel_gpu_commands.h"
#include "gt/intel_gt.h"
#include "gt/intel_ring.h"
+
+#include "i915_wait_util.h"
#include "intel_gsc_uc_heci_cmd_submit.h"
struct gsc_heci_pkt {
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc.c
index f360f020d8f1..52ec4421a211 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.c
@@ -8,15 +8,17 @@
#include "gt/intel_gt_irq.h"
#include "gt/intel_gt_pm_irq.h"
#include "gt/intel_gt_regs.h"
+
+#include "i915_drv.h"
+#include "i915_irq.h"
+#include "i915_reg.h"
+#include "i915_wait_util.h"
#include "intel_guc.h"
#include "intel_guc_ads.h"
#include "intel_guc_capture.h"
#include "intel_guc_print.h"
#include "intel_guc_slpc.h"
#include "intel_guc_submission.h"
-#include "i915_drv.h"
-#include "i915_irq.h"
-#include "i915_reg.h"
/**
* DOC: GuC
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
index 380a11c92d63..3e7e5badcc2b 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
@@ -5,11 +5,12 @@
#include <linux/circ_buf.h>
#include <linux/ktime.h>
-#include <linux/time64.h>
#include <linux/string_helpers.h>
+#include <linux/time64.h>
#include <linux/timekeeping.h>
#include "i915_drv.h"
+#include "i915_wait_util.h"
#include "intel_guc_ct.h"
#include "intel_guc_print.h"
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c
index 384d1400134d..b1bda1b84f0a 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c
@@ -13,9 +13,11 @@
#include "gt/intel_gt_mcr.h"
#include "gt/intel_gt_regs.h"
#include "gt/intel_rps.h"
+
+#include "i915_drv.h"
+#include "i915_wait_util.h"
#include "intel_guc_fw.h"
#include "intel_guc_print.h"
-#include "i915_drv.h"
static void guc_prepare_xfer(struct intel_gt *gt)
{
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c
index 09a64f224c49..cdff48920ee6 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c
@@ -6,6 +6,8 @@
#include <linux/debugfs.h>
#include <linux/string_helpers.h>
+#include <drm/drm_managed.h>
+
#include "gt/intel_gt.h"
#include "i915_drv.h"
#include "i915_irq.h"
@@ -511,7 +513,11 @@ static void guc_log_relay_unmap(struct intel_guc_log *log)
void intel_guc_log_init_early(struct intel_guc_log *log)
{
- mutex_init(&log->relay.lock);
+ struct intel_guc *guc = log_to_guc(log);
+ struct drm_i915_private *i915 = guc_to_i915(guc);
+
+ drmm_mutex_init(&i915->drm, &log->relay.lock);
+ drmm_mutex_init(&i915->drm, &log->guc_lock);
INIT_WORK(&log->relay.flush_work, copy_debug_logs_work);
log->relay.started = false;
}
@@ -677,7 +683,7 @@ int intel_guc_log_set_level(struct intel_guc_log *log, u32 level)
if (level < GUC_LOG_LEVEL_DISABLED || level > GUC_LOG_LEVEL_MAX)
return -EINVAL;
- mutex_lock(&i915->drm.struct_mutex);
+ mutex_lock(&log->guc_lock);
if (log->level == level)
goto out_unlock;
@@ -695,7 +701,7 @@ int intel_guc_log_set_level(struct intel_guc_log *log, u32 level)
log->level = level;
out_unlock:
- mutex_unlock(&i915->drm.struct_mutex);
+ mutex_unlock(&log->guc_lock);
return ret;
}
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_log.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_log.h
index 02127703be80..13cb93ad0710 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_log.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_log.h
@@ -42,6 +42,14 @@ enum {
struct intel_guc_log {
u32 level;
+ /*
+ * Protects concurrent access and modification of intel_guc_log->level.
+ *
+ * This lock replaces the legacy struct_mutex usage in
+ * intel_guc_log system.
+ */
+ struct mutex guc_lock;
+
/* Allocation settings */
struct {
s32 bytes; /* Size in bytes */
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c
index d5ee6e5e1443..fa9af08f9708 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c
@@ -3,17 +3,20 @@
* Copyright © 2021 Intel Corporation
*/
-#include <drm/drm_cache.h>
#include <linux/string_helpers.h>
+#include <drm/drm_cache.h>
+
+#include "gt/intel_gt.h"
+#include "gt/intel_gt_regs.h"
+#include "gt/intel_rps.h"
+
#include "i915_drv.h"
#include "i915_reg.h"
-#include "intel_guc_slpc.h"
+#include "i915_wait_util.h"
#include "intel_guc_print.h"
+#include "intel_guc_slpc.h"
#include "intel_mchbar_regs.h"
-#include "gt/intel_gt.h"
-#include "gt/intel_gt_regs.h"
-#include "gt/intel_rps.h"
/**
* DOC: SLPC - Dynamic Frequency management
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
index 127316d2c8aa..68f2b8d363ac 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
@@ -25,16 +25,16 @@
#include "gt/intel_mocs.h"
#include "gt/intel_ring.h"
+#include "i915_drv.h"
+#include "i915_irq.h"
+#include "i915_reg.h"
+#include "i915_trace.h"
+#include "i915_wait_util.h"
#include "intel_guc_ads.h"
#include "intel_guc_capture.h"
#include "intel_guc_print.h"
#include "intel_guc_submission.h"
-#include "i915_drv.h"
-#include "i915_reg.h"
-#include "i915_irq.h"
-#include "i915_trace.h"
-
/**
* DOC: GuC-based command submission
*
diff --git a/drivers/gpu/drm/i915/gvt/cmd_parser.c b/drivers/gpu/drm/i915/gvt/cmd_parser.c
index a91e23c22ea1..d432fdd69833 100644
--- a/drivers/gpu/drm/i915/gvt/cmd_parser.c
+++ b/drivers/gpu/drm/i915/gvt/cmd_parser.c
@@ -1921,7 +1921,7 @@ static int perform_bb_shadow(struct parser_exec_state *s)
if (!bb)
return -ENOMEM;
- bb->ppgtt = (s->buf_addr_type == GTT_BUFFER) ? false : true;
+ bb->ppgtt = s->buf_addr_type != GTT_BUFFER;
/*
* The start_offset stores the batch buffer's start gma's
diff --git a/drivers/gpu/drm/i915/gvt/mmio_context.c b/drivers/gpu/drm/i915/gvt/mmio_context.c
index 2f7208843367..0b810baad20a 100644
--- a/drivers/gpu/drm/i915/gvt/mmio_context.c
+++ b/drivers/gpu/drm/i915/gvt/mmio_context.c
@@ -33,14 +33,16 @@
*
*/
-#include "i915_drv.h"
-#include "i915_reg.h"
#include "gt/intel_context.h"
#include "gt/intel_engine_regs.h"
#include "gt/intel_gpu_commands.h"
#include "gt/intel_gt_regs.h"
#include "gt/intel_ring.h"
+
#include "gvt.h"
+#include "i915_drv.h"
+#include "i915_reg.h"
+#include "i915_wait_util.h"
#include "trace.h"
#define GEN9_MOCS_SIZE 64
diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 23fa098c4479..c2e38d4bcd01 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -26,11 +26,11 @@
*
*/
+#include <linux/debugfs.h>
#include <linux/sched/mm.h>
#include <linux/sort.h>
#include <linux/string_helpers.h>
-#include <linux/debugfs.h>
#include <drm/drm_debugfs.h>
#include "gem/i915_gem_context.h"
@@ -54,6 +54,7 @@
#include "i915_irq.h"
#include "i915_reg.h"
#include "i915_scheduler.h"
+#include "i915_wait_util.h"
#include "intel_mchbar_regs.h"
static inline struct drm_i915_private *node_to_i915(struct drm_info_node *node)
diff --git a/drivers/gpu/drm/i915/i915_driver.c b/drivers/gpu/drm/i915/i915_driver.c
index 70f042ce8705..a28c3710c4d5 100644
--- a/drivers/gpu/drm/i915/i915_driver.c
+++ b/drivers/gpu/drm/i915/i915_driver.c
@@ -51,13 +51,15 @@
#include "display/intel_bw.h"
#include "display/intel_cdclk.h"
#include "display/intel_crtc.h"
-#include "display/intel_display_core.h"
+#include "display/intel_display_device.h"
#include "display/intel_display_driver.h"
+#include "display/intel_display_power.h"
#include "display/intel_dmc.h"
#include "display/intel_dp.h"
#include "display/intel_dpt.h"
#include "display/intel_encoder.h"
#include "display/intel_fbdev.h"
+#include "display/intel_gmbus.h"
#include "display/intel_hotplug.h"
#include "display/intel_opregion.h"
#include "display/intel_overlay.h"
@@ -977,7 +979,7 @@ void i915_driver_shutdown(struct drm_i915_private *i915)
intel_power_domains_disable(display);
drm_client_dev_suspend(&i915->drm, false);
- if (HAS_DISPLAY(display)) {
+ if (intel_display_device_present(display)) {
drm_kms_helper_poll_disable(&i915->drm);
intel_display_driver_disable_user_access(display);
@@ -989,7 +991,7 @@ void i915_driver_shutdown(struct drm_i915_private *i915)
intel_irq_suspend(i915);
intel_hpd_cancel_work(display);
- if (HAS_DISPLAY(display))
+ if (intel_display_device_present(display))
intel_display_driver_suspend_access(display);
intel_encoder_suspend_all(display);
@@ -1060,7 +1062,7 @@ static int i915_drm_suspend(struct drm_device *dev)
* properly. */
intel_power_domains_disable(display);
drm_client_dev_suspend(dev, false);
- if (HAS_DISPLAY(display)) {
+ if (intel_display_device_present(display)) {
drm_kms_helper_poll_disable(dev);
intel_display_driver_disable_user_access(display);
}
@@ -1072,7 +1074,7 @@ static int i915_drm_suspend(struct drm_device *dev)
intel_irq_suspend(dev_priv);
intel_hpd_cancel_work(display);
- if (HAS_DISPLAY(display))
+ if (intel_display_device_present(display))
intel_display_driver_suspend_access(display);
intel_encoder_suspend_all(display);
@@ -1219,7 +1221,7 @@ static int i915_drm_resume(struct drm_device *dev)
*/
intel_irq_resume(dev_priv);
- if (HAS_DISPLAY(display))
+ if (intel_display_device_present(display))
drm_mode_config_reset(dev);
i915_gem_resume(dev_priv);
@@ -1228,14 +1230,14 @@ static int i915_drm_resume(struct drm_device *dev)
intel_clock_gating_init(dev_priv);
- if (HAS_DISPLAY(display))
+ if (intel_display_device_present(display))
intel_display_driver_resume_access(display);
intel_hpd_init(display);
intel_display_driver_resume(display);
- if (HAS_DISPLAY(display)) {
+ if (intel_display_device_present(display)) {
intel_display_driver_enable_user_access(display);
drm_kms_helper_poll_enable(dev);
}
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 2f3965feada1..6a768aad8edd 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -114,8 +114,7 @@ struct i915_gem_mm {
struct intel_memory_region *stolen_region;
/** Memory allocator for GTT stolen memory */
struct drm_mm stolen;
- /** Protects the usage of the GTT stolen memory allocator. This is
- * always the inner lock when overlapping with struct_mutex. */
+ /** Protects the usage of the GTT stolen memory allocator */
struct mutex stolen_lock;
/* Protects bound_list/unbound_list and #drm_i915_gem_object.mm.link */
@@ -222,6 +221,9 @@ struct drm_i915_private {
bool irqs_enabled;
+ /* LPT/WPT IOSF sideband protection */
+ struct mutex sbi_lock;
+
/* VLV/CHV IOSF sideband */
struct {
struct mutex lock; /* protect sideband access */
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 8c8d43451f35..e14a0c3db999 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -847,8 +847,7 @@ void i915_gem_runtime_suspend(struct drm_i915_private *i915)
/*
* Only called during RPM suspend. All users of the userfault_list
* must be holding an RPM wakeref to ensure that this can not
- * run concurrently with themselves (and use the struct_mutex for
- * protection between themselves).
+ * run concurrently with themselves.
*/
list_for_each_entry_safe(obj, on,
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index a5fa40ab5de2..8d5da222a187 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -163,11 +163,6 @@ static void ivb_parity_work(struct work_struct *work)
u32 misccpctl;
u8 slice = 0;
- /* We must turn off DOP level clock gating to access the L3 registers.
- * In order to prevent a get/put style interface, acquire struct mutex
- * any time we access those registers.
- */
- mutex_lock(&dev_priv->drm.struct_mutex);
/* If we've screwed up tracking, just let the interrupt fire again */
if (drm_WARN_ON(&dev_priv->drm, !dev_priv->l3_parity.which_slice))
@@ -225,7 +220,6 @@ out:
gen5_gt_enable_irq(gt, GT_PARITY_ERROR(dev_priv));
spin_unlock_irq(gt->irq_lock);
- mutex_unlock(&dev_priv->drm.struct_mutex);
}
static irqreturn_t valleyview_irq_handler(int irq, void *arg)
diff --git a/drivers/gpu/drm/i915/i915_list_util.h b/drivers/gpu/drm/i915/i915_list_util.h
new file mode 100644
index 000000000000..4e515dc8a3e0
--- /dev/null
+++ b/drivers/gpu/drm/i915/i915_list_util.h
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: MIT */
+/* Copyright © 2025 Intel Corporation */
+
+#ifndef __I915_LIST_UTIL_H__
+#define __I915_LIST_UTIL_H__
+
+#include <linux/list.h>
+#include <asm/rwonce.h>
+
+static inline void __list_del_many(struct list_head *head,
+ struct list_head *first)
+{
+ first->prev = head;
+ WRITE_ONCE(head->next, first);
+}
+
+static inline int list_is_last_rcu(const struct list_head *list,
+ const struct list_head *head)
+{
+ return READ_ONCE(list->next) == head;
+}
+
+#endif /* __I915_LIST_UTIL_H__ */
diff --git a/drivers/gpu/drm/i915/i915_ptr_util.h b/drivers/gpu/drm/i915/i915_ptr_util.h
new file mode 100644
index 000000000000..9f8931d7d99b
--- /dev/null
+++ b/drivers/gpu/drm/i915/i915_ptr_util.h
@@ -0,0 +1,66 @@
+/* SPDX-License-Identifier: MIT */
+/* Copyright © 2025 Intel Corporation */
+
+#ifndef __I915_PTR_UTIL_H__
+#define __I915_PTR_UTIL_H__
+
+#include <linux/types.h>
+
+#define ptr_mask_bits(ptr, n) ({ \
+ unsigned long __v = (unsigned long)(ptr); \
+ (typeof(ptr))(__v & -BIT(n)); \
+})
+
+#define ptr_unmask_bits(ptr, n) ((unsigned long)(ptr) & (BIT(n) - 1))
+
+#define ptr_unpack_bits(ptr, bits, n) ({ \
+ unsigned long __v = (unsigned long)(ptr); \
+ *(bits) = __v & (BIT(n) - 1); \
+ (typeof(ptr))(__v & -BIT(n)); \
+})
+
+#define ptr_pack_bits(ptr, bits, n) ({ \
+ unsigned long __bits = (bits); \
+ GEM_BUG_ON(__bits & -BIT(n)); \
+ ((typeof(ptr))((unsigned long)(ptr) | __bits)); \
+})
+
+#define ptr_dec(ptr) ({ \
+ unsigned long __v = (unsigned long)(ptr); \
+ (typeof(ptr))(__v - 1); \
+})
+
+#define ptr_inc(ptr) ({ \
+ unsigned long __v = (unsigned long)(ptr); \
+ (typeof(ptr))(__v + 1); \
+})
+
+#define page_mask_bits(ptr) ptr_mask_bits(ptr, PAGE_SHIFT)
+#define page_unmask_bits(ptr) ptr_unmask_bits(ptr, PAGE_SHIFT)
+#define page_pack_bits(ptr, bits) ptr_pack_bits(ptr, bits, PAGE_SHIFT)
+#define page_unpack_bits(ptr, bits) ptr_unpack_bits(ptr, bits, PAGE_SHIFT)
+
+static __always_inline ptrdiff_t ptrdiff(const void *a, const void *b)
+{
+ return a - b;
+}
+
+#define u64_to_ptr(T, x) ({ \
+ typecheck(u64, x); \
+ (T *)(uintptr_t)(x); \
+})
+
+/*
+ * container_of_user: Extract the superclass from a pointer to a member.
+ *
+ * Exactly like container_of() with the exception that it plays nicely
+ * with sparse for __user @ptr.
+ */
+#define container_of_user(ptr, type, member) ({ \
+ void __user *__mptr = (void __user *)(ptr); \
+ BUILD_BUG_ON_MSG(!__same_type(*(ptr), typeof_member(type, member)) && \
+ !__same_type(*(ptr), void), \
+ "pointer type mismatch in container_of()"); \
+ ((type __user *)(__mptr - offsetof(type, member))); })
+
+#endif /* __I915_PTR_UTIL_H__ */
diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h
index 5f7e8138ec14..b09135301f39 100644
--- a/drivers/gpu/drm/i915/i915_request.h
+++ b/drivers/gpu/drm/i915/i915_request.h
@@ -31,19 +31,20 @@
#include <linux/llist.h>
#include <linux/lockdep.h>
+#include <uapi/drm/i915_drm.h>
+
#include "gem/i915_gem_context_types.h"
#include "gt/intel_context_types.h"
#include "gt/intel_engine_types.h"
#include "gt/intel_timeline_types.h"
#include "i915_gem.h"
+#include "i915_ptr_util.h"
#include "i915_scheduler.h"
#include "i915_selftest.h"
#include "i915_sw_fence.h"
#include "i915_vma_resource.h"
-#include <uapi/drm/i915_drm.h>
-
struct drm_file;
struct drm_i915_gem_object;
struct drm_printer;
diff --git a/drivers/gpu/drm/i915/i915_switcheroo.c b/drivers/gpu/drm/i915/i915_switcheroo.c
index 3a95a55b2e87..d5b6d8ab31a2 100644
--- a/drivers/gpu/drm/i915/i915_switcheroo.c
+++ b/drivers/gpu/drm/i915/i915_switcheroo.c
@@ -5,7 +5,7 @@
#include <linux/vga_switcheroo.h>
-#include "display/intel_display_core.h"
+#include "display/intel_display_device.h"
#include "i915_driver.h"
#include "i915_drv.h"
@@ -22,7 +22,7 @@ static void i915_switcheroo_set_state(struct pci_dev *pdev,
dev_err(&pdev->dev, "DRM not initialized, aborting switch.\n");
return;
}
- if (!HAS_DISPLAY(display)) {
+ if (!intel_display_device_present(display)) {
dev_err(&pdev->dev, "Device state not initialized, aborting switch.\n");
return;
}
@@ -52,7 +52,8 @@ static bool i915_switcheroo_can_switch(struct pci_dev *pdev)
* locking inversion with the driver load path. And the access here is
* completely racy anyway. So don't bother with locking for now.
*/
- return i915 && HAS_DISPLAY(display) && atomic_read(&i915->drm.open_count) == 0;
+ return i915 && intel_display_device_present(display) &&
+ atomic_read(&i915->drm.open_count) == 0;
}
static const struct vga_switcheroo_client_ops i915_switcheroo_ops = {
diff --git a/drivers/gpu/drm/i915/i915_timer_util.c b/drivers/gpu/drm/i915/i915_timer_util.c
new file mode 100644
index 000000000000..ee4cfd8b3c07
--- /dev/null
+++ b/drivers/gpu/drm/i915/i915_timer_util.c
@@ -0,0 +1,36 @@
+// SPDX-License-Identifier: MIT
+/* Copyright © 2025 Intel Corporation */
+
+#include <linux/jiffies.h>
+
+#include "i915_timer_util.h"
+
+void cancel_timer(struct timer_list *t)
+{
+ if (!timer_active(t))
+ return;
+
+ timer_delete(t);
+ WRITE_ONCE(t->expires, 0);
+}
+
+void set_timer_ms(struct timer_list *t, unsigned long timeout)
+{
+ if (!timeout) {
+ cancel_timer(t);
+ return;
+ }
+
+ timeout = msecs_to_jiffies(timeout);
+
+ /*
+ * Paranoia to make sure the compiler computes the timeout before
+ * loading 'jiffies' as jiffies is volatile and may be updated in
+ * the background by a timer tick. All to reduce the complexity
+ * of the addition and reduce the risk of losing a jiffy.
+ */
+ barrier();
+
+ /* Keep t->expires = 0 reserved to indicate a canceled timer. */
+ mod_timer(t, jiffies + timeout ?: 1);
+}
diff --git a/drivers/gpu/drm/i915/i915_timer_util.h b/drivers/gpu/drm/i915/i915_timer_util.h
new file mode 100644
index 000000000000..f35ad730820c
--- /dev/null
+++ b/drivers/gpu/drm/i915/i915_timer_util.h
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: MIT */
+/* Copyright © 2025 Intel Corporation */
+
+#ifndef __I915_TIMER_UTIL_H__
+#define __I915_TIMER_UTIL_H__
+
+#include <linux/timer.h>
+#include <asm/rwonce.h>
+
+void cancel_timer(struct timer_list *t);
+void set_timer_ms(struct timer_list *t, unsigned long timeout);
+
+static inline bool timer_active(const struct timer_list *t)
+{
+ return READ_ONCE(t->expires);
+}
+
+static inline bool timer_expired(const struct timer_list *t)
+{
+ return timer_active(t) && !timer_pending(t);
+}
+
+#endif /* __I915_TIMER_UTIL_H__ */
diff --git a/drivers/gpu/drm/i915/i915_utils.c b/drivers/gpu/drm/i915/i915_utils.c
index b60c28fbd207..49f7ed413132 100644
--- a/drivers/gpu/drm/i915/i915_utils.c
+++ b/drivers/gpu/drm/i915/i915_utils.c
@@ -47,36 +47,6 @@ bool i915_error_injected(void)
#endif
-void cancel_timer(struct timer_list *t)
-{
- if (!timer_active(t))
- return;
-
- timer_delete(t);
- WRITE_ONCE(t->expires, 0);
-}
-
-void set_timer_ms(struct timer_list *t, unsigned long timeout)
-{
- if (!timeout) {
- cancel_timer(t);
- return;
- }
-
- timeout = msecs_to_jiffies(timeout);
-
- /*
- * Paranoia to make sure the compiler computes the timeout before
- * loading 'jiffies' as jiffies is volatile and may be updated in
- * the background by a timer tick. All to reduce the complexity
- * of the addition and reduce the risk of losing a jiffy.
- */
- barrier();
-
- /* Keep t->expires = 0 reserved to indicate a canceled timer. */
- mod_timer(t, jiffies + timeout ?: 1);
-}
-
bool i915_vtd_active(struct drm_i915_private *i915)
{
if (device_iommu_mapped(i915->drm.dev))
diff --git a/drivers/gpu/drm/i915/i915_utils.h b/drivers/gpu/drm/i915/i915_utils.h
index 9cb40c2c4b12..a0c892e4c40d 100644
--- a/drivers/gpu/drm/i915/i915_utils.h
+++ b/drivers/gpu/drm/i915/i915_utils.h
@@ -25,7 +25,6 @@
#ifndef __I915_UTILS_H
#define __I915_UTILS_H
-#include <linux/list.h>
#include <linux/overflow.h>
#include <linux/sched.h>
#include <linux/string_helpers.h>
@@ -38,7 +37,6 @@
#endif
struct drm_i915_private;
-struct timer_list;
#define MISSING_CASE(x) WARN(1, "Missing case (%s == %ld)\n", \
__stringify(x), (long)(x))
@@ -67,88 +65,12 @@ bool i915_error_injected(void);
drm_err(&(i915)->drm, fmt, ##__VA_ARGS__); \
})
-#define range_overflows(start, size, max) ({ \
- typeof(start) start__ = (start); \
- typeof(size) size__ = (size); \
- typeof(max) max__ = (max); \
- (void)(&start__ == &size__); \
- (void)(&start__ == &max__); \
- start__ >= max__ || size__ > max__ - start__; \
-})
-
-#define range_overflows_t(type, start, size, max) \
- range_overflows((type)(start), (type)(size), (type)(max))
-
-#define range_overflows_end(start, size, max) ({ \
- typeof(start) start__ = (start); \
- typeof(size) size__ = (size); \
- typeof(max) max__ = (max); \
- (void)(&start__ == &size__); \
- (void)(&start__ == &max__); \
- start__ > max__ || size__ > max__ - start__; \
-})
-
-#define range_overflows_end_t(type, start, size, max) \
- range_overflows_end((type)(start), (type)(size), (type)(max))
-
-#define ptr_mask_bits(ptr, n) ({ \
- unsigned long __v = (unsigned long)(ptr); \
- (typeof(ptr))(__v & -BIT(n)); \
-})
-
-#define ptr_unmask_bits(ptr, n) ((unsigned long)(ptr) & (BIT(n) - 1))
-
-#define ptr_unpack_bits(ptr, bits, n) ({ \
- unsigned long __v = (unsigned long)(ptr); \
- *(bits) = __v & (BIT(n) - 1); \
- (typeof(ptr))(__v & -BIT(n)); \
-})
-
-#define ptr_pack_bits(ptr, bits, n) ({ \
- unsigned long __bits = (bits); \
- GEM_BUG_ON(__bits & -BIT(n)); \
- ((typeof(ptr))((unsigned long)(ptr) | __bits)); \
-})
-
-#define ptr_dec(ptr) ({ \
- unsigned long __v = (unsigned long)(ptr); \
- (typeof(ptr))(__v - 1); \
-})
-
-#define ptr_inc(ptr) ({ \
- unsigned long __v = (unsigned long)(ptr); \
- (typeof(ptr))(__v + 1); \
-})
-
-#define page_mask_bits(ptr) ptr_mask_bits(ptr, PAGE_SHIFT)
-#define page_unmask_bits(ptr) ptr_unmask_bits(ptr, PAGE_SHIFT)
-#define page_pack_bits(ptr, bits) ptr_pack_bits(ptr, bits, PAGE_SHIFT)
-#define page_unpack_bits(ptr, bits) ptr_unpack_bits(ptr, bits, PAGE_SHIFT)
-
#define fetch_and_zero(ptr) ({ \
typeof(*ptr) __T = *(ptr); \
*(ptr) = (typeof(*ptr))0; \
__T; \
})
-static __always_inline ptrdiff_t ptrdiff(const void *a, const void *b)
-{
- return a - b;
-}
-
-/*
- * container_of_user: Extract the superclass from a pointer to a member.
- *
- * Exactly like container_of() with the exception that it plays nicely
- * with sparse for __user @ptr.
- */
-#define container_of_user(ptr, type, member) ({ \
- void __user *__mptr = (void __user *)(ptr); \
- BUILD_BUG_ON_MSG(!__same_type(*(ptr), typeof_member(type, member)) && \
- !__same_type(*(ptr), void), \
- "pointer type mismatch in container_of()"); \
- ((type __user *)(__mptr - offsetof(type, member))); })
-
/*
* check_user_mbz: Check that a user value exists and is zero
*
@@ -167,11 +89,6 @@ static __always_inline ptrdiff_t ptrdiff(const void *a, const void *b)
get_user(mbz__, (U)) ? -EFAULT : mbz__ ? -EINVAL : 0; \
})
-#define u64_to_ptr(T, x) ({ \
- typecheck(u64, x); \
- (T *)(uintptr_t)(x); \
-})
-
#define __mask_next_bit(mask) ({ \
int __idx = ffs(mask) - 1; \
mask &= ~BIT(__idx); \
@@ -183,19 +100,6 @@ static inline bool is_power_of_2_u64(u64 n)
return (n != 0 && ((n & (n - 1)) == 0));
}
-static inline void __list_del_many(struct list_head *head,
- struct list_head *first)
-{
- first->prev = head;
- WRITE_ONCE(head->next, first);
-}
-
-static inline int list_is_last_rcu(const struct list_head *list,
- const struct list_head *head)
-{
- return READ_ONCE(list->next) == head;
-}
-
static inline unsigned long msecs_to_jiffies_timeout(const unsigned int m)
{
unsigned long j = msecs_to_jiffies(m);
@@ -230,112 +134,6 @@ wait_remaining_ms_from_jiffies(unsigned long timestamp_jiffies, int to_wait_ms)
}
}
-/*
- * __wait_for - magic wait macro
- *
- * Macro to help avoid open coding check/wait/timeout patterns. Note that it's
- * important that we check the condition again after having timed out, since the
- * timeout could be due to preemption or similar and we've never had a chance to
- * check the condition before the timeout.
- */
-#define __wait_for(OP, COND, US, Wmin, Wmax) ({ \
- const ktime_t end__ = ktime_add_ns(ktime_get_raw(), 1000ll * (US)); \
- long wait__ = (Wmin); /* recommended min for usleep is 10 us */ \
- int ret__; \
- might_sleep(); \
- for (;;) { \
- const bool expired__ = ktime_after(ktime_get_raw(), end__); \
- OP; \
- /* Guarantee COND check prior to timeout */ \
- barrier(); \
- if (COND) { \
- ret__ = 0; \
- break; \
- } \
- if (expired__) { \
- ret__ = -ETIMEDOUT; \
- break; \
- } \
- usleep_range(wait__, wait__ * 2); \
- if (wait__ < (Wmax)) \
- wait__ <<= 1; \
- } \
- ret__; \
-})
-
-#define _wait_for(COND, US, Wmin, Wmax) __wait_for(, (COND), (US), (Wmin), \
- (Wmax))
-#define wait_for(COND, MS) _wait_for((COND), (MS) * 1000, 10, 1000)
-
-/*
- * If CONFIG_PREEMPT_COUNT is disabled, in_atomic() always reports false.
- * On PREEMPT_RT the context isn't becoming atomic because it is used in an
- * interrupt handler or because a spinlock_t is acquired. This leads to
- * warnings which don't occur otherwise and therefore the check is disabled.
- */
-#if IS_ENABLED(CONFIG_DRM_I915_DEBUG) && IS_ENABLED(CONFIG_PREEMPT_COUNT) && !defined(CONFIG_PREEMPT_RT)
-# define _WAIT_FOR_ATOMIC_CHECK(ATOMIC) WARN_ON_ONCE((ATOMIC) && !in_atomic())
-#else
-# define _WAIT_FOR_ATOMIC_CHECK(ATOMIC) do { } while (0)
-#endif
-
-#define _wait_for_atomic(COND, US, ATOMIC) \
-({ \
- int cpu, ret, timeout = (US) * 1000; \
- u64 base; \
- _WAIT_FOR_ATOMIC_CHECK(ATOMIC); \
- if (!(ATOMIC)) { \
- preempt_disable(); \
- cpu = smp_processor_id(); \
- } \
- base = local_clock(); \
- for (;;) { \
- u64 now = local_clock(); \
- if (!(ATOMIC)) \
- preempt_enable(); \
- /* Guarantee COND check prior to timeout */ \
- barrier(); \
- if (COND) { \
- ret = 0; \
- break; \
- } \
- if (now - base >= timeout) { \
- ret = -ETIMEDOUT; \
- break; \
- } \
- cpu_relax(); \
- if (!(ATOMIC)) { \
- preempt_disable(); \
- if (unlikely(cpu != smp_processor_id())) { \
- timeout -= now - base; \
- cpu = smp_processor_id(); \
- base = local_clock(); \
- } \
- } \
- } \
- ret; \
-})
-
-#define wait_for_us(COND, US) \
-({ \
- int ret__; \
- BUILD_BUG_ON(!__builtin_constant_p(US)); \
- if ((US) > 10) \
- ret__ = _wait_for((COND), (US), 10, 10); \
- else \
- ret__ = _wait_for_atomic((COND), (US), 0); \
- ret__; \
-})
-
-#define wait_for_atomic_us(COND, US) \
-({ \
- BUILD_BUG_ON(!__builtin_constant_p(US)); \
- BUILD_BUG_ON((US) > 50000); \
- _wait_for_atomic((COND), (US), 1); \
-})
-
-#define wait_for_atomic(COND, MS) wait_for_atomic_us((COND), (MS) * 1000)
-
#define KHz(x) (1000 * (x))
#define MHz(x) KHz(1000 * (x))
@@ -351,19 +149,6 @@ static inline void __add_taint_for_CI(unsigned int taint)
add_taint(taint, LOCKDEP_STILL_OK);
}
-void cancel_timer(struct timer_list *t);
-void set_timer_ms(struct timer_list *t, unsigned long timeout);
-
-static inline bool timer_active(const struct timer_list *t)
-{
- return READ_ONCE(t->expires);
-}
-
-static inline bool timer_expired(const struct timer_list *t)
-{
- return timer_active(t) && !timer_pending(t);
-}
-
static inline bool i915_run_as_guest(void)
{
#if IS_ENABLED(CONFIG_X86)
diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h
index 0f9eee6d18d2..8054047840aa 100644
--- a/drivers/gpu/drm/i915/i915_vma.h
+++ b/drivers/gpu/drm/i915/i915_vma.h
@@ -30,12 +30,12 @@
#include <drm/drm_mm.h>
-#include "gt/intel_ggtt_fencing.h"
#include "gem/i915_gem_object.h"
-
-#include "i915_gem_gtt.h"
+#include "gt/intel_ggtt_fencing.h"
#include "i915_active.h"
+#include "i915_gem_gtt.h"
+#include "i915_ptr_util.h"
#include "i915_request.h"
#include "i915_vma_resource.h"
#include "i915_vma_types.h"
diff --git a/drivers/gpu/drm/i915/i915_wait_util.h b/drivers/gpu/drm/i915/i915_wait_util.h
new file mode 100644
index 000000000000..7376898e3bf8
--- /dev/null
+++ b/drivers/gpu/drm/i915/i915_wait_util.h
@@ -0,0 +1,119 @@
+/* SPDX-License-Identifier: MIT */
+/* Copyright © 2025 Intel Corporation */
+
+#ifndef __I915_WAIT_UTIL_H__
+#define __I915_WAIT_UTIL_H__
+
+#include <linux/compiler.h>
+#include <linux/delay.h>
+#include <linux/ktime.h>
+#include <linux/sched/clock.h>
+#include <linux/smp.h>
+
+/*
+ * __wait_for - magic wait macro
+ *
+ * Macro to help avoid open coding check/wait/timeout patterns. Note that it's
+ * important that we check the condition again after having timed out, since the
+ * timeout could be due to preemption or similar and we've never had a chance to
+ * check the condition before the timeout.
+ */
+#define __wait_for(OP, COND, US, Wmin, Wmax) ({ \
+ const ktime_t end__ = ktime_add_ns(ktime_get_raw(), 1000ll * (US)); \
+ long wait__ = (Wmin); /* recommended min for usleep is 10 us */ \
+ int ret__; \
+ might_sleep(); \
+ for (;;) { \
+ const bool expired__ = ktime_after(ktime_get_raw(), end__); \
+ OP; \
+ /* Guarantee COND check prior to timeout */ \
+ barrier(); \
+ if (COND) { \
+ ret__ = 0; \
+ break; \
+ } \
+ if (expired__) { \
+ ret__ = -ETIMEDOUT; \
+ break; \
+ } \
+ usleep_range(wait__, wait__ * 2); \
+ if (wait__ < (Wmax)) \
+ wait__ <<= 1; \
+ } \
+ ret__; \
+})
+
+#define _wait_for(COND, US, Wmin, Wmax) __wait_for(, (COND), (US), (Wmin), \
+ (Wmax))
+#define wait_for(COND, MS) _wait_for((COND), (MS) * 1000, 10, 1000)
+
+/*
+ * If CONFIG_PREEMPT_COUNT is disabled, in_atomic() always reports false.
+ * On PREEMPT_RT the context isn't becoming atomic because it is used in an
+ * interrupt handler or because a spinlock_t is acquired. This leads to
+ * warnings which don't occur otherwise and therefore the check is disabled.
+ */
+#if IS_ENABLED(CONFIG_DRM_I915_DEBUG) && IS_ENABLED(CONFIG_PREEMPT_COUNT) && !defined(CONFIG_PREEMPT_RT)
+# define _WAIT_FOR_ATOMIC_CHECK(ATOMIC) WARN_ON_ONCE((ATOMIC) && !in_atomic())
+#else
+# define _WAIT_FOR_ATOMIC_CHECK(ATOMIC) do { } while (0)
+#endif
+
+#define _wait_for_atomic(COND, US, ATOMIC) \
+({ \
+ int cpu, ret, timeout = (US) * 1000; \
+ u64 base; \
+ _WAIT_FOR_ATOMIC_CHECK(ATOMIC); \
+ if (!(ATOMIC)) { \
+ preempt_disable(); \
+ cpu = smp_processor_id(); \
+ } \
+ base = local_clock(); \
+ for (;;) { \
+ u64 now = local_clock(); \
+ if (!(ATOMIC)) \
+ preempt_enable(); \
+ /* Guarantee COND check prior to timeout */ \
+ barrier(); \
+ if (COND) { \
+ ret = 0; \
+ break; \
+ } \
+ if (now - base >= timeout) { \
+ ret = -ETIMEDOUT; \
+ break; \
+ } \
+ cpu_relax(); \
+ if (!(ATOMIC)) { \
+ preempt_disable(); \
+ if (unlikely(cpu != smp_processor_id())) { \
+ timeout -= now - base; \
+ cpu = smp_processor_id(); \
+ base = local_clock(); \
+ } \
+ } \
+ } \
+ ret; \
+})
+
+#define wait_for_us(COND, US) \
+({ \
+ int ret__; \
+ BUILD_BUG_ON(!__builtin_constant_p(US)); \
+ if ((US) > 10) \
+ ret__ = _wait_for((COND), (US), 10, 10); \
+ else \
+ ret__ = _wait_for_atomic((COND), (US), 0); \
+ ret__; \
+})
+
+#define wait_for_atomic_us(COND, US) \
+({ \
+ BUILD_BUG_ON(!__builtin_constant_p(US)); \
+ BUILD_BUG_ON((US) > 50000); \
+ _wait_for_atomic((COND), (US), 1); \
+})
+
+#define wait_for_atomic(COND, MS) wait_for_atomic_us((COND), (MS) * 1000)
+
+#endif /* __I915_WAIT_UTIL_H__ */
diff --git a/drivers/gpu/drm/i915/intel_pcode.c b/drivers/gpu/drm/i915/intel_pcode.c
index 81da75108c60..55ffedad2490 100644
--- a/drivers/gpu/drm/i915/intel_pcode.c
+++ b/drivers/gpu/drm/i915/intel_pcode.c
@@ -5,6 +5,7 @@
#include "i915_drv.h"
#include "i915_reg.h"
+#include "i915_wait_util.h"
#include "intel_pcode.h"
static int gen6_check_mailbox_status(u32 mbox)
diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c
index 4ccba7c8ffb3..8cb59f8d1f4c 100644
--- a/drivers/gpu/drm/i915/intel_uncore.c
+++ b/drivers/gpu/drm/i915/intel_uncore.c
@@ -21,19 +21,20 @@
* IN THE SOFTWARE.
*/
-#include <drm/drm_managed.h>
#include <linux/pm_runtime.h>
-#include "display/intel_display_core.h"
+#include <drm/drm_managed.h>
-#include "gt/intel_gt.h"
+#include "display/intel_display_core.h"
#include "gt/intel_engine_regs.h"
+#include "gt/intel_gt.h"
#include "gt/intel_gt_regs.h"
#include "i915_drv.h"
#include "i915_iosf_mbi.h"
#include "i915_reg.h"
#include "i915_vgpu.h"
+#include "i915_wait_util.h"
#include "intel_uncore_trace.h"
#define FORCEWAKE_ACK_TIMEOUT_MS 50
diff --git a/drivers/gpu/drm/i915/pxp/intel_pxp.c b/drivers/gpu/drm/i915/pxp/intel_pxp.c
index f8da693ad3ce..27d545c4e6a5 100644
--- a/drivers/gpu/drm/i915/pxp/intel_pxp.c
+++ b/drivers/gpu/drm/i915/pxp/intel_pxp.c
@@ -2,15 +2,15 @@
/*
* Copyright(c) 2020 Intel Corporation.
*/
+
#include <linux/workqueue.h>
#include "gem/i915_gem_context.h"
-
#include "gt/intel_context.h"
#include "gt/intel_gt.h"
#include "i915_drv.h"
-
+#include "i915_wait_util.h"
#include "intel_pxp.h"
#include "intel_pxp_gsccs.h"
#include "intel_pxp_irq.h"
diff --git a/drivers/gpu/drm/i915/selftests/i915_request.c b/drivers/gpu/drm/i915/selftests/i915_request.c
index 2fb7a9e7efec..48cd617247d1 100644
--- a/drivers/gpu/drm/i915/selftests/i915_request.c
+++ b/drivers/gpu/drm/i915/selftests/i915_request.c
@@ -22,14 +22,13 @@
*
*/
-#include <linux/prime_numbers.h>
#include <linux/pm_qos.h>
+#include <linux/prime_numbers.h>
#include <linux/sort.h>
#include "gem/i915_gem_internal.h"
#include "gem/i915_gem_pm.h"
#include "gem/selftests/mock_context.h"
-
#include "gt/intel_engine_heartbeat.h"
#include "gt/intel_engine_pm.h"
#include "gt/intel_engine_user.h"
@@ -40,11 +39,11 @@
#include "i915_random.h"
#include "i915_selftest.h"
+#include "i915_wait_util.h"
#include "igt_flush_test.h"
#include "igt_live_test.h"
#include "igt_spinner.h"
#include "lib_sw_fence.h"
-
#include "mock_drm.h"
#include "mock_gem_device.h"
diff --git a/drivers/gpu/drm/i915/selftests/i915_selftest.c b/drivers/gpu/drm/i915/selftests/i915_selftest.c
index 889281819c5b..9c276c9d0a75 100644
--- a/drivers/gpu/drm/i915/selftests/i915_selftest.c
+++ b/drivers/gpu/drm/i915/selftests/i915_selftest.c
@@ -31,7 +31,7 @@
#include "i915_driver.h"
#include "i915_drv.h"
#include "i915_selftest.h"
-
+#include "i915_wait_util.h"
#include "igt_flush_test.h"
struct i915_selftest i915_selftest __read_mostly = {
diff --git a/drivers/gpu/drm/i915/selftests/igt_spinner.c b/drivers/gpu/drm/i915/selftests/igt_spinner.c
index 8c3e1f20e5a1..820364171ebe 100644
--- a/drivers/gpu/drm/i915/selftests/igt_spinner.c
+++ b/drivers/gpu/drm/i915/selftests/igt_spinner.c
@@ -3,12 +3,13 @@
*
* Copyright © 2018 Intel Corporation
*/
-#include "gt/intel_gpu_commands.h"
-#include "gt/intel_gt.h"
#include "gem/i915_gem_internal.h"
#include "gem/selftests/igt_gem_utils.h"
+#include "gt/intel_gpu_commands.h"
+#include "gt/intel_gt.h"
+#include "i915_wait_util.h"
#include "igt_spinner.h"
int igt_spinner_init(struct igt_spinner *spin, struct intel_gt *gt)
diff --git a/drivers/gpu/drm/i915/soc/intel_dram.c b/drivers/gpu/drm/i915/soc/intel_dram.c
index 149527827624..edffaed8f9a7 100644
--- a/drivers/gpu/drm/i915/soc/intel_dram.c
+++ b/drivers/gpu/drm/i915/soc/intel_dram.c
@@ -732,7 +732,7 @@ int intel_dram_detect(struct drm_i915_private *i915)
struct dram_info *dram_info;
int ret;
- if (IS_DG2(i915) || !HAS_DISPLAY(display))
+ if (IS_DG2(i915) || !intel_display_device_present(display))
return 0;
dram_info = drmm_kzalloc(&i915->drm, sizeof(*dram_info), GFP_KERNEL);
diff --git a/drivers/gpu/drm/i915/vlv_suspend.c b/drivers/gpu/drm/i915/vlv_suspend.c
index fc9f311ea1db..221e4c0b2c58 100644
--- a/drivers/gpu/drm/i915/vlv_suspend.c
+++ b/drivers/gpu/drm/i915/vlv_suspend.c
@@ -8,16 +8,17 @@
#include <drm/drm_print.h>
+#include "gt/intel_gt_regs.h"
+
#include "i915_drv.h"
#include "i915_reg.h"
#include "i915_trace.h"
#include "i915_utils.h"
+#include "i915_wait_util.h"
#include "intel_clock_gating.h"
#include "intel_uncore_trace.h"
#include "vlv_suspend.h"
-#include "gt/intel_gt_regs.h"
-
struct vlv_s0ix_state {
/* GAM */
u32 wr_watermark;
diff --git a/drivers/gpu/drm/msm/adreno/a6xx_catalog.c b/drivers/gpu/drm/msm/adreno/a6xx_catalog.c
index 00e1afd46b81..44df6410bce1 100644
--- a/drivers/gpu/drm/msm/adreno/a6xx_catalog.c
+++ b/drivers/gpu/drm/msm/adreno/a6xx_catalog.c
@@ -913,6 +913,11 @@ static const struct adreno_info a6xx_gpus[] = {
{ /* sentinel */ },
},
},
+ .speedbins = ADRENO_SPEEDBINS(
+ { 0, 0 },
+ { 185, 0 },
+ { 127, 1 },
+ ),
}, {
.chip_ids = ADRENO_CHIP_IDS(
0x06030001,
@@ -1024,6 +1029,11 @@ static const struct adreno_info a6xx_gpus[] = {
.gmu_cgc_mode = 0x00020200,
.prim_fifo_threshold = 0x00300200,
},
+ .speedbins = ADRENO_SPEEDBINS(
+ { 0, 0 },
+ { 169, 0 },
+ { 113, 1 },
+ ),
}, {
.chip_ids = ADRENO_CHIP_IDS(0x06030500),
.family = ADRENO_6XX_GEN4,
@@ -1343,6 +1353,69 @@ static const uint32_t a7xx_pwrup_reglist_regs[] = {
DECLARE_ADRENO_REGLIST_LIST(a7xx_pwrup_reglist);
+/* Applicable for X185, A750 */
+static const u32 a750_ifpc_reglist_regs[] = {
+ REG_A6XX_TPL1_BICUBIC_WEIGHTS_TABLE(0),
+ REG_A6XX_TPL1_BICUBIC_WEIGHTS_TABLE(1),
+ REG_A6XX_TPL1_BICUBIC_WEIGHTS_TABLE(2),
+ REG_A6XX_TPL1_BICUBIC_WEIGHTS_TABLE(3),
+ REG_A6XX_TPL1_BICUBIC_WEIGHTS_TABLE(4),
+ REG_A6XX_TPL1_NC_MODE_CNTL,
+ REG_A6XX_SP_NC_MODE_CNTL,
+ REG_A6XX_CP_DBG_ECO_CNTL,
+ REG_A6XX_CP_PROTECT_CNTL,
+ REG_A6XX_CP_PROTECT(0),
+ REG_A6XX_CP_PROTECT(1),
+ REG_A6XX_CP_PROTECT(2),
+ REG_A6XX_CP_PROTECT(3),
+ REG_A6XX_CP_PROTECT(4),
+ REG_A6XX_CP_PROTECT(5),
+ REG_A6XX_CP_PROTECT(6),
+ REG_A6XX_CP_PROTECT(7),
+ REG_A6XX_CP_PROTECT(8),
+ REG_A6XX_CP_PROTECT(9),
+ REG_A6XX_CP_PROTECT(10),
+ REG_A6XX_CP_PROTECT(11),
+ REG_A6XX_CP_PROTECT(12),
+ REG_A6XX_CP_PROTECT(13),
+ REG_A6XX_CP_PROTECT(14),
+ REG_A6XX_CP_PROTECT(15),
+ REG_A6XX_CP_PROTECT(16),
+ REG_A6XX_CP_PROTECT(17),
+ REG_A6XX_CP_PROTECT(18),
+ REG_A6XX_CP_PROTECT(19),
+ REG_A6XX_CP_PROTECT(20),
+ REG_A6XX_CP_PROTECT(21),
+ REG_A6XX_CP_PROTECT(22),
+ REG_A6XX_CP_PROTECT(23),
+ REG_A6XX_CP_PROTECT(24),
+ REG_A6XX_CP_PROTECT(25),
+ REG_A6XX_CP_PROTECT(26),
+ REG_A6XX_CP_PROTECT(27),
+ REG_A6XX_CP_PROTECT(28),
+ REG_A6XX_CP_PROTECT(29),
+ REG_A6XX_CP_PROTECT(30),
+ REG_A6XX_CP_PROTECT(31),
+ REG_A6XX_CP_PROTECT(32),
+ REG_A6XX_CP_PROTECT(33),
+ REG_A6XX_CP_PROTECT(34),
+ REG_A6XX_CP_PROTECT(35),
+ REG_A6XX_CP_PROTECT(36),
+ REG_A6XX_CP_PROTECT(37),
+ REG_A6XX_CP_PROTECT(38),
+ REG_A6XX_CP_PROTECT(39),
+ REG_A6XX_CP_PROTECT(40),
+ REG_A6XX_CP_PROTECT(41),
+ REG_A6XX_CP_PROTECT(42),
+ REG_A6XX_CP_PROTECT(43),
+ REG_A6XX_CP_PROTECT(44),
+ REG_A6XX_CP_PROTECT(45),
+ REG_A6XX_CP_PROTECT(46),
+ REG_A6XX_CP_PROTECT(47),
+};
+
+DECLARE_ADRENO_REGLIST_LIST(a750_ifpc_reglist);
+
static const struct adreno_info a7xx_gpus[] = {
{
.chip_ids = ADRENO_CHIP_IDS(0x07000200),
@@ -1432,14 +1505,27 @@ static const struct adreno_info a7xx_gpus[] = {
.inactive_period = DRM_MSM_INACTIVE_PERIOD,
.quirks = ADRENO_QUIRK_HAS_CACHED_COHERENT |
ADRENO_QUIRK_HAS_HW_APRIV |
- ADRENO_QUIRK_PREEMPTION,
+ ADRENO_QUIRK_PREEMPTION |
+ ADRENO_QUIRK_IFPC,
.init = a6xx_gpu_init,
.a6xx = &(const struct a6xx_info) {
.hwcg = a740_hwcg,
.protect = &a730_protect,
.pwrup_reglist = &a7xx_pwrup_reglist,
+ .ifpc_reglist = &a750_ifpc_reglist,
.gmu_chipid = 0x7050001,
.gmu_cgc_mode = 0x00020202,
+ .bcms = (const struct a6xx_bcm[]) {
+ { .name = "SH0", .buswidth = 16 },
+ { .name = "MC0", .buswidth = 4 },
+ {
+ .name = "ACV",
+ .fixed = true,
+ .perfmode = BIT(3),
+ .perfmode_bw = 16500000,
+ },
+ { /* sentinel */ },
+ },
},
.preempt_record_size = 4192 * SZ_1K,
.speedbins = ADRENO_SPEEDBINS(
@@ -1460,12 +1546,14 @@ static const struct adreno_info a7xx_gpus[] = {
.inactive_period = DRM_MSM_INACTIVE_PERIOD,
.quirks = ADRENO_QUIRK_HAS_CACHED_COHERENT |
ADRENO_QUIRK_HAS_HW_APRIV |
- ADRENO_QUIRK_PREEMPTION,
+ ADRENO_QUIRK_PREEMPTION |
+ ADRENO_QUIRK_IFPC,
.init = a6xx_gpu_init,
.zapfw = "gen70900_zap.mbn",
.a6xx = &(const struct a6xx_info) {
.protect = &a730_protect,
.pwrup_reglist = &a7xx_pwrup_reglist,
+ .ifpc_reglist = &a750_ifpc_reglist,
.gmu_chipid = 0x7090100,
.gmu_cgc_mode = 0x00020202,
.bcms = (const struct a6xx_bcm[]) {
diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gmu.c b/drivers/gpu/drm/msm/adreno/a6xx_gmu.c
index 28e6705c6da6..fc62fef2fed8 100644
--- a/drivers/gpu/drm/msm/adreno/a6xx_gmu.c
+++ b/drivers/gpu/drm/msm/adreno/a6xx_gmu.c
@@ -93,14 +93,25 @@ bool a6xx_gmu_sptprac_is_on(struct a6xx_gmu *gmu)
/* Check to see if the GX rail is still powered */
bool a6xx_gmu_gx_is_on(struct a6xx_gmu *gmu)
{
+ struct a6xx_gpu *a6xx_gpu = container_of(gmu, struct a6xx_gpu, gmu);
+ struct adreno_gpu *adreno_gpu = &a6xx_gpu->base;
u32 val;
/* This can be called from gpu state code so make sure GMU is valid */
if (!gmu->initialized)
return false;
+ /* If GMU is absent, then GX power domain is ON as long as GPU is in active state */
+ if (adreno_has_gmu_wrapper(adreno_gpu))
+ return true;
+
val = gmu_read(gmu, REG_A6XX_GMU_SPTPRAC_PWR_CLK_STATUS);
+ if (adreno_is_a7xx(adreno_gpu))
+ return !(val &
+ (A7XX_GMU_SPTPRAC_PWR_CLK_STATUS_GX_HM_GDSC_POWER_OFF |
+ A7XX_GMU_SPTPRAC_PWR_CLK_STATUS_GX_HM_CLK_OFF));
+
return !(val &
(A6XX_GMU_SPTPRAC_PWR_CLK_STATUS_GX_HM_GDSC_POWER_OFF |
A6XX_GMU_SPTPRAC_PWR_CLK_STATUS_GX_HM_CLK_OFF));
@@ -272,6 +283,8 @@ static int a6xx_gmu_start(struct a6xx_gmu *gmu)
if (ret)
DRM_DEV_ERROR(gmu->dev, "GMU firmware initialization timed out\n");
+ set_bit(GMU_STATUS_FW_START, &gmu->status);
+
return ret;
}
@@ -403,7 +416,10 @@ int a6xx_sptprac_enable(struct a6xx_gmu *gmu)
int ret;
u32 val;
- if (!gmu->legacy)
+ WARN_ON(!gmu->legacy);
+
+ /* Nothing to do if GMU does the power management */
+ if (gmu->idle_level > GMU_IDLE_STATE_ACTIVE)
return 0;
gmu_write(gmu, REG_A6XX_GMU_GX_SPTPRAC_POWER_CONTROL, 0x778000);
@@ -518,6 +534,9 @@ static int a6xx_rpmh_start(struct a6xx_gmu *gmu)
int ret;
u32 val;
+ if (!test_and_clear_bit(GMU_STATUS_PDC_SLEEP, &gmu->status))
+ return 0;
+
gmu_write(gmu, REG_A6XX_GMU_RSCC_CONTROL_REQ, BIT(1));
ret = gmu_poll_timeout(gmu, REG_A6XX_GMU_RSCC_CONTROL_ACK, val,
@@ -545,6 +564,9 @@ static void a6xx_rpmh_stop(struct a6xx_gmu *gmu)
int ret;
u32 val;
+ if (test_and_clear_bit(GMU_STATUS_FW_START, &gmu->status))
+ return;
+
gmu_write(gmu, REG_A6XX_GMU_RSCC_CONTROL_REQ, 1);
ret = gmu_poll_timeout_rscc(gmu, REG_A6XX_GPU_RSCC_RSC_STATUS0_DRV0,
@@ -553,6 +575,8 @@ static void a6xx_rpmh_stop(struct a6xx_gmu *gmu)
DRM_DEV_ERROR(gmu->dev, "Unable to power off the GPU RSC\n");
gmu_write(gmu, REG_A6XX_GMU_RSCC_CONTROL_REQ, 0);
+
+ set_bit(GMU_STATUS_PDC_SLEEP, &gmu->status);
}
static inline void pdc_write(void __iomem *ptr, u32 offset, u32 value)
@@ -681,8 +705,6 @@ setup_pdc:
/* ensure no writes happen before the uCode is fully written */
wmb();
- a6xx_rpmh_stop(gmu);
-
err:
if (!IS_ERR_OR_NULL(pdcptr))
iounmap(pdcptr);
@@ -842,19 +864,15 @@ static int a6xx_gmu_fw_start(struct a6xx_gmu *gmu, unsigned int state)
else
gmu_write(gmu, REG_A6XX_GMU_GENERAL_7, 1);
- if (state == GMU_WARM_BOOT) {
- ret = a6xx_rpmh_start(gmu);
- if (ret)
- return ret;
- } else {
+ ret = a6xx_rpmh_start(gmu);
+ if (ret)
+ return ret;
+
+ if (state == GMU_COLD_BOOT) {
if (WARN(!adreno_gpu->fw[ADRENO_FW_GMU],
"GMU firmware is not loaded\n"))
return -ENOENT;
- ret = a6xx_rpmh_start(gmu);
- if (ret)
- return ret;
-
ret = a6xx_gmu_fw_load(gmu);
if (ret)
return ret;
@@ -925,10 +943,7 @@ static int a6xx_gmu_fw_start(struct a6xx_gmu *gmu, unsigned int state)
ret = a6xx_gmu_gfx_rail_on(gmu);
if (ret)
return ret;
- }
- /* Enable SPTP_PC if the CPU is responsible for it */
- if (gmu->idle_level < GMU_IDLE_STATE_SPTP) {
ret = a6xx_sptprac_enable(gmu);
if (ret)
return ret;
@@ -980,6 +995,22 @@ static void a6xx_gmu_rpmh_off(struct a6xx_gmu *gmu)
val, (val & 1), 100, 10000);
gmu_poll_timeout_rscc(gmu, REG_A6XX_RSCC_TCS3_DRV0_STATUS + seqmem_off,
val, (val & 1), 100, 1000);
+
+ if (!adreno_is_a740_family(adreno_gpu))
+ return;
+
+ gmu_poll_timeout_rscc(gmu, REG_A7XX_RSCC_TCS4_DRV0_STATUS + seqmem_off,
+ val, (val & 1), 100, 10000);
+ gmu_poll_timeout_rscc(gmu, REG_A7XX_RSCC_TCS5_DRV0_STATUS + seqmem_off,
+ val, (val & 1), 100, 10000);
+ gmu_poll_timeout_rscc(gmu, REG_A7XX_RSCC_TCS6_DRV0_STATUS + seqmem_off,
+ val, (val & 1), 100, 10000);
+ gmu_poll_timeout_rscc(gmu, REG_A7XX_RSCC_TCS7_DRV0_STATUS + seqmem_off,
+ val, (val & 1), 100, 1000);
+ gmu_poll_timeout_rscc(gmu, REG_A7XX_RSCC_TCS8_DRV0_STATUS + seqmem_off,
+ val, (val & 1), 100, 10000);
+ gmu_poll_timeout_rscc(gmu, REG_A7XX_RSCC_TCS9_DRV0_STATUS + seqmem_off,
+ val, (val & 1), 100, 1000);
}
/* Force the GMU off in case it isn't responsive */
@@ -1023,6 +1054,8 @@ static void a6xx_gmu_force_off(struct a6xx_gmu *gmu)
/* Reset GPU core blocks */
a6xx_gpu_sw_reset(gpu, true);
+
+ a6xx_rpmh_stop(gmu);
}
static void a6xx_gmu_set_initial_freq(struct msm_gpu *gpu, struct a6xx_gmu *gmu)
@@ -1128,6 +1161,11 @@ int a6xx_gmu_resume(struct a6xx_gpu *a6xx_gpu)
/* Set the GPU to the current freq */
a6xx_gmu_set_initial_freq(gpu, gmu);
+ if (refcount_read(&gpu->sysprof_active) > 1) {
+ ret = a6xx_gmu_set_oob(gmu, GMU_OOB_PERFCOUNTER_SET);
+ if (!ret)
+ set_bit(GMU_STATUS_OOB_PERF_SET, &gmu->status);
+ }
out:
/* On failure, shut down the GMU to leave it in a good state */
if (ret) {
@@ -1175,6 +1213,9 @@ static void a6xx_gmu_shutdown(struct a6xx_gmu *gmu)
a6xx_gmu_clear_oob(&a6xx_gpu->gmu, GMU_OOB_GPU_SET);
}
+ if (test_and_clear_bit(GMU_STATUS_OOB_PERF_SET, &gmu->status))
+ a6xx_gmu_clear_oob(gmu, GMU_OOB_PERFCOUNTER_SET);
+
ret = a6xx_gmu_wait_for_idle(gmu);
/* If the GMU isn't responding assume it is hung */
@@ -1318,8 +1359,6 @@ static int a6xx_gmu_memory_probe(struct drm_device *drm, struct a6xx_gmu *gmu)
struct msm_mmu *mmu;
mmu = msm_iommu_new(gmu->dev, 0);
- if (!mmu)
- return -ENODEV;
if (IS_ERR(mmu))
return PTR_ERR(mmu);
@@ -1692,6 +1731,7 @@ static int a6xx_gmu_acd_probe(struct a6xx_gmu *gmu)
u32 val;
freq = gmu->gpu_freqs[i];
+ /* This is unlikely to fail because we are passing back a known freq */
opp = dev_pm_opp_find_freq_exact(&gpu->pdev->dev, freq, true);
np = dev_pm_opp_get_of_node(opp);
@@ -1790,6 +1830,35 @@ static int a6xx_gmu_get_irq(struct a6xx_gmu *gmu, struct platform_device *pdev,
return irq;
}
+void a6xx_gmu_sysprof_setup(struct msm_gpu *gpu)
+{
+ struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
+ struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
+ struct a6xx_gmu *gmu = &a6xx_gpu->gmu;
+ unsigned int sysprof_active;
+
+ /* Nothing to do if GPU is suspended. We will handle this during GMU resume */
+ if (!pm_runtime_get_if_active(&gpu->pdev->dev))
+ return;
+
+ mutex_lock(&gmu->lock);
+
+ sysprof_active = refcount_read(&gpu->sysprof_active);
+
+ /*
+ * 'Perfcounter select' register values are lost during IFPC collapse. To avoid that,
+ * use the currently unused perfcounter oob vote to block IFPC when sysprof is active
+ */
+ if ((sysprof_active > 1) && !test_and_set_bit(GMU_STATUS_OOB_PERF_SET, &gmu->status))
+ a6xx_gmu_set_oob(gmu, GMU_OOB_PERFCOUNTER_SET);
+ else if ((sysprof_active == 1) && test_and_clear_bit(GMU_STATUS_OOB_PERF_SET, &gmu->status))
+ a6xx_gmu_clear_oob(gmu, GMU_OOB_PERFCOUNTER_SET);
+
+ mutex_unlock(&gmu->lock);
+
+ pm_runtime_put(&gpu->pdev->dev);
+}
+
void a6xx_gmu_remove(struct a6xx_gpu *a6xx_gpu)
{
struct adreno_gpu *adreno_gpu = &a6xx_gpu->base;
@@ -1932,8 +2001,9 @@ int a6xx_gmu_init(struct a6xx_gpu *a6xx_gpu, struct device_node *node)
if (ret)
return ret;
- /* Fow now, don't do anything fancy until we get our feet under us */
- gmu->idle_level = GMU_IDLE_STATE_ACTIVE;
+ /* Set GMU idle level */
+ gmu->idle_level = (adreno_gpu->info->quirks & ADRENO_QUIRK_IFPC) ?
+ GMU_IDLE_STATE_IFPC : GMU_IDLE_STATE_ACTIVE;
pm_runtime_enable(gmu->dev);
diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gmu.h b/drivers/gpu/drm/msm/adreno/a6xx_gmu.h
index d1ce11131ba6..06cfc294016f 100644
--- a/drivers/gpu/drm/msm/adreno/a6xx_gmu.h
+++ b/drivers/gpu/drm/msm/adreno/a6xx_gmu.h
@@ -50,6 +50,9 @@ struct a6xx_bcm {
/* The GMU does not do any idle state management */
#define GMU_IDLE_STATE_ACTIVE 0
+/* Unknown power state. Not exposed by the firmware. For documentation purpose only */
+#define GMU_IDLE_STATE_RESERVED 1
+
/* The GMU manages SPTP power collapse */
#define GMU_IDLE_STATE_SPTP 2
@@ -117,6 +120,14 @@ struct a6xx_gmu {
struct qmp *qmp;
struct a6xx_hfi_msg_bw_table *bw_table;
+
+/* To check if we can trigger sleep seq at PDC. Cleared in a6xx_rpmh_stop() */
+#define GMU_STATUS_FW_START 0
+/* To track if PDC sleep seq was done */
+#define GMU_STATUS_PDC_SLEEP 1
+/* To track Perfcounter OOB set status */
+#define GMU_STATUS_OOB_PERF_SET 2
+ unsigned long status;
};
static inline u32 gmu_read(struct a6xx_gmu *gmu, u32 offset)
@@ -158,6 +169,9 @@ static inline u64 gmu_read64(struct a6xx_gmu *gmu, u32 lo, u32 hi)
#define gmu_poll_timeout(gmu, addr, val, cond, interval, timeout) \
readl_poll_timeout((gmu)->mmio + ((addr) << 2), val, cond, \
interval, timeout)
+#define gmu_poll_timeout_atomic(gmu, addr, val, cond, interval, timeout) \
+ readl_poll_timeout_atomic((gmu)->mmio + ((addr) << 2), val, cond, \
+ interval, timeout)
static inline u32 gmu_read_rscc(struct a6xx_gmu *gmu, u32 offset)
{
diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
index 45dd5fd1c2bf..b8f8ae940b55 100644
--- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
@@ -16,6 +16,97 @@
#define GPU_PAS_ID 13
+static u64 read_gmu_ao_counter(struct a6xx_gpu *a6xx_gpu)
+{
+ u64 count_hi, count_lo, temp;
+
+ do {
+ count_hi = gmu_read(&a6xx_gpu->gmu, REG_A6XX_GMU_ALWAYS_ON_COUNTER_H);
+ count_lo = gmu_read(&a6xx_gpu->gmu, REG_A6XX_GMU_ALWAYS_ON_COUNTER_L);
+ temp = gmu_read(&a6xx_gpu->gmu, REG_A6XX_GMU_ALWAYS_ON_COUNTER_H);
+ } while (unlikely(count_hi != temp));
+
+ return (count_hi << 32) | count_lo;
+}
+
+static bool fence_status_check(struct msm_gpu *gpu, u32 offset, u32 value, u32 status, u32 mask)
+{
+ /* Success if !writedropped0/1 */
+ if (!(status & mask))
+ return true;
+
+ udelay(10);
+
+ /* Try to update fenced register again */
+ gpu_write(gpu, offset, value);
+
+ /* We can't do a posted write here because the power domain could be
+ * in collapse state. So use the heaviest barrier instead
+ */
+ mb();
+ return false;
+}
+
+static int fenced_write(struct a6xx_gpu *a6xx_gpu, u32 offset, u32 value, u32 mask)
+{
+ struct adreno_gpu *adreno_gpu = &a6xx_gpu->base;
+ struct msm_gpu *gpu = &adreno_gpu->base;
+ struct a6xx_gmu *gmu = &a6xx_gpu->gmu;
+ u32 status;
+
+ gpu_write(gpu, offset, value);
+
+ /* Nothing else to be done in the case of no-GMU */
+ if (adreno_has_gmu_wrapper(adreno_gpu))
+ return 0;
+
+ /* We can't do a posted write here because the power domain could be
+ * in collapse state. So use the heaviest barrier instead
+ */
+ mb();
+
+ if (!gmu_poll_timeout(gmu, REG_A6XX_GMU_AHB_FENCE_STATUS, status,
+ fence_status_check(gpu, offset, value, status, mask), 0, 1000))
+ return 0;
+
+ /* Try again for another 1ms before failing */
+ gpu_write(gpu, offset, value);
+ mb();
+
+ if (!gmu_poll_timeout(gmu, REG_A6XX_GMU_AHB_FENCE_STATUS, status,
+ fence_status_check(gpu, offset, value, status, mask), 0, 1000)) {
+ /*
+ * The 'delay' warning is here because the pause to print this
+ * warning will allow gpu to move to power collapse which
+ * defeats the purpose of continuous polling for 2 ms
+ */
+ dev_err_ratelimited(gmu->dev, "delay in fenced register write (0x%x)\n",
+ offset);
+ return 0;
+ }
+
+ dev_err_ratelimited(gmu->dev, "fenced register write (0x%x) fail\n",
+ offset);
+
+ return -ETIMEDOUT;
+}
+
+int a6xx_fenced_write(struct a6xx_gpu *a6xx_gpu, u32 offset, u64 value, u32 mask, bool is_64b)
+{
+ int ret;
+
+ ret = fenced_write(a6xx_gpu, offset, lower_32_bits(value), mask);
+ if (ret)
+ return ret;
+
+ if (!is_64b)
+ return 0;
+
+ ret = fenced_write(a6xx_gpu, offset + 1, upper_32_bits(value), mask);
+
+ return ret;
+}
+
static inline bool _a6xx_check_idle(struct msm_gpu *gpu)
{
struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
@@ -86,7 +177,7 @@ static void a6xx_flush(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
/* Update HW if this is the current ring and we are not in preempt*/
if (!a6xx_in_preempt(a6xx_gpu)) {
if (a6xx_gpu->cur_ring == ring)
- gpu_write(gpu, REG_A6XX_CP_RB_WPTR, wptr);
+ a6xx_fenced_write(a6xx_gpu, REG_A6XX_CP_RB_WPTR, wptr, BIT(0), false);
else
ring->restore_wptr = true;
} else {
@@ -173,8 +264,8 @@ static void a6xx_set_pagetable(struct a6xx_gpu *a6xx_gpu,
* Needed for preemption
*/
OUT_PKT7(ring, CP_MEM_WRITE, 5);
- OUT_RING(ring, CP_MEM_WRITE_0_ADDR_LO(lower_32_bits(memptr)));
- OUT_RING(ring, CP_MEM_WRITE_1_ADDR_HI(upper_32_bits(memptr)));
+ OUT_RING(ring, A5XX_CP_MEM_WRITE_ADDR_LO(lower_32_bits(memptr)));
+ OUT_RING(ring, A5XX_CP_MEM_WRITE_ADDR_HI(upper_32_bits(memptr)));
OUT_RING(ring, lower_32_bits(ttbr));
OUT_RING(ring, upper_32_bits(ttbr));
OUT_RING(ring, ctx->seqno);
@@ -204,9 +295,9 @@ static void a6xx_set_pagetable(struct a6xx_gpu *a6xx_gpu,
*/
OUT_PKT7(ring, CP_WAIT_REG_MEM, 6);
OUT_RING(ring, CP_WAIT_REG_MEM_0_FUNCTION(WRITE_EQ));
- OUT_RING(ring, CP_WAIT_REG_MEM_1_POLL_ADDR_LO(
+ OUT_RING(ring, CP_WAIT_REG_MEM_POLL_ADDR_LO(
REG_A6XX_RBBM_PERFCTR_SRAM_INIT_STATUS));
- OUT_RING(ring, CP_WAIT_REG_MEM_2_POLL_ADDR_HI(0));
+ OUT_RING(ring, CP_WAIT_REG_MEM_POLL_ADDR_HI(0));
OUT_RING(ring, CP_WAIT_REG_MEM_3_REF(0x1));
OUT_RING(ring, CP_WAIT_REG_MEM_4_MASK(0x1));
OUT_RING(ring, CP_WAIT_REG_MEM_5_DELAY_LOOP_CYCLES(0));
@@ -298,8 +389,7 @@ static void a6xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit)
OUT_RING(ring, upper_32_bits(rbmemptr(ring, fence)));
OUT_RING(ring, submit->seqno);
- trace_msm_gpu_submit_flush(submit,
- gpu_read64(gpu, REG_A6XX_CP_ALWAYS_ON_COUNTER));
+ trace_msm_gpu_submit_flush(submit, read_gmu_ao_counter(a6xx_gpu));
a6xx_flush(gpu, ring);
}
@@ -499,8 +589,7 @@ static void a7xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit)
}
- trace_msm_gpu_submit_flush(submit,
- gpu_read64(gpu, REG_A6XX_CP_ALWAYS_ON_COUNTER));
+ trace_msm_gpu_submit_flush(submit, read_gmu_ao_counter(a6xx_gpu));
a6xx_flush(gpu, ring);
@@ -739,11 +828,10 @@ static void a7xx_patch_pwrup_reglist(struct msm_gpu *gpu)
u32 *dest = (u32 *)&lock->regs[0];
int i;
- reglist = adreno_gpu->info->a6xx->pwrup_reglist;
-
lock->gpu_req = lock->cpu_req = lock->turn = 0;
- lock->ifpc_list_len = 0;
- lock->preemption_list_len = reglist->count;
+
+ reglist = adreno_gpu->info->a6xx->ifpc_reglist;
+ lock->ifpc_list_len = reglist->count;
/*
* For each entry in each of the lists, write the offset and the current
@@ -754,6 +842,14 @@ static void a7xx_patch_pwrup_reglist(struct msm_gpu *gpu)
*dest++ = gpu_read(gpu, reglist->regs[i]);
}
+ reglist = adreno_gpu->info->a6xx->pwrup_reglist;
+ lock->preemption_list_len = reglist->count;
+
+ for (i = 0; i < reglist->count; i++) {
+ *dest++ = reglist->regs[i];
+ *dest++ = gpu_read(gpu, reglist->regs[i]);
+ }
+
/*
* The overall register list is composed of
* 1. Static IFPC-only registers
@@ -1241,14 +1337,14 @@ static int hw_init(struct msm_gpu *gpu)
/* Set weights for bicubic filtering */
if (adreno_is_a650_family(adreno_gpu) || adreno_is_x185(adreno_gpu)) {
- gpu_write(gpu, REG_A6XX_TPL1_BICUBIC_WEIGHTS_TABLE_0, 0);
- gpu_write(gpu, REG_A6XX_TPL1_BICUBIC_WEIGHTS_TABLE_1,
+ gpu_write(gpu, REG_A6XX_TPL1_BICUBIC_WEIGHTS_TABLE(0), 0);
+ gpu_write(gpu, REG_A6XX_TPL1_BICUBIC_WEIGHTS_TABLE(1),
0x3fe05ff4);
- gpu_write(gpu, REG_A6XX_TPL1_BICUBIC_WEIGHTS_TABLE_2,
+ gpu_write(gpu, REG_A6XX_TPL1_BICUBIC_WEIGHTS_TABLE(2),
0x3fa0ebee);
- gpu_write(gpu, REG_A6XX_TPL1_BICUBIC_WEIGHTS_TABLE_3,
+ gpu_write(gpu, REG_A6XX_TPL1_BICUBIC_WEIGHTS_TABLE(3),
0x3f5193ed);
- gpu_write(gpu, REG_A6XX_TPL1_BICUBIC_WEIGHTS_TABLE_4,
+ gpu_write(gpu, REG_A6XX_TPL1_BICUBIC_WEIGHTS_TABLE(4),
0x3f0243f0);
}
@@ -1448,21 +1544,25 @@ static void a6xx_recover(struct msm_gpu *gpu)
adreno_dump_info(gpu);
- for (i = 0; i < 8; i++)
- DRM_DEV_INFO(&gpu->pdev->dev, "CP_SCRATCH_REG%d: %u\n", i,
- gpu_read(gpu, REG_A6XX_CP_SCRATCH_REG(i)));
+ if (a6xx_gmu_gx_is_on(&a6xx_gpu->gmu)) {
+ /* Sometimes crashstate capture is skipped, so SQE should be halted here again */
+ gpu_write(gpu, REG_A6XX_CP_SQE_CNTL, 3);
+
+ for (i = 0; i < 8; i++)
+ DRM_DEV_INFO(&gpu->pdev->dev, "CP_SCRATCH_REG%d: %u\n", i,
+ gpu_read(gpu, REG_A6XX_CP_SCRATCH_REG(i)));
- if (hang_debug)
- a6xx_dump(gpu);
+ if (hang_debug)
+ a6xx_dump(gpu);
+
+ }
/*
* To handle recovery specific sequences during the rpm suspend we are
* about to trigger
*/
- a6xx_gpu->hung = true;
- /* Halt SQE first */
- gpu_write(gpu, REG_A6XX_CP_SQE_CNTL, 3);
+ a6xx_gpu->hung = true;
pm_runtime_dont_use_autosuspend(&gpu->pdev->dev);
@@ -1693,8 +1793,6 @@ static void a6xx_cp_hw_err_irq(struct msm_gpu *gpu)
static void a6xx_fault_detect_irq(struct msm_gpu *gpu)
{
- struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
- struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
struct msm_ringbuffer *ring = gpu->funcs->active_ring(gpu);
/*
@@ -1706,13 +1804,6 @@ static void a6xx_fault_detect_irq(struct msm_gpu *gpu)
if (gpu_read(gpu, REG_A6XX_RBBM_STATUS3) & A6XX_RBBM_STATUS3_SMMU_STALLED_ON_FAULT)
return;
- /*
- * Force the GPU to stay on until after we finish
- * collecting information
- */
- if (!adreno_has_gmu_wrapper(adreno_gpu))
- gmu_write(&a6xx_gpu->gmu, REG_A6XX_GMU_GMU_PWR_COL_KEEPALIVE, 1);
-
DRM_DEV_ERROR(&gpu->pdev->dev,
"gpu fault ring %d fence %x status %8.8X rb %4.4x/%4.4x ib1 %16.16llX/%4.4x ib2 %16.16llX/%4.4x\n",
ring ? ring->id : -1, ring ? ring->fctx->last_fence : 0,
@@ -1727,6 +1818,9 @@ static void a6xx_fault_detect_irq(struct msm_gpu *gpu)
/* Turn off the hangcheck timer to keep it from bothering us */
timer_delete(&gpu->hangcheck_timer);
+ /* Turn off interrupts to avoid triggering recovery again */
+ gpu_write(gpu, REG_A6XX_RBBM_INT_0_MASK, 0);
+
kthread_queue_work(gpu->worker, &gpu->recover_work);
}
@@ -1751,9 +1845,49 @@ static void a7xx_sw_fuse_violation_irq(struct msm_gpu *gpu)
}
}
+static void a6xx_gpu_keepalive_vote(struct msm_gpu *gpu, bool on)
+{
+ struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
+ struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
+
+ if (adreno_has_gmu_wrapper(adreno_gpu))
+ return;
+
+ gmu_write(&a6xx_gpu->gmu, REG_A6XX_GMU_GMU_PWR_COL_KEEPALIVE, on);
+}
+
+static int irq_poll_fence(struct msm_gpu *gpu)
+{
+ struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
+ struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
+ struct a6xx_gmu *gmu = &a6xx_gpu->gmu;
+ u32 status;
+
+ if (adreno_has_gmu_wrapper(adreno_gpu))
+ return 0;
+
+ if (gmu_poll_timeout_atomic(gmu, REG_A6XX_GMU_AO_AHB_FENCE_CTRL, status, !status, 1, 100)) {
+ u32 rbbm_unmasked = gmu_read(gmu, REG_A6XX_GMU_RBBM_INT_UNMASKED_STATUS);
+
+ dev_err_ratelimited(&gpu->pdev->dev,
+ "irq fence poll timeout, fence_ctrl=0x%x, unmasked_status=0x%x\n",
+ status, rbbm_unmasked);
+ return -ETIMEDOUT;
+ }
+
+ return 0;
+}
+
static irqreturn_t a6xx_irq(struct msm_gpu *gpu)
{
struct msm_drm_private *priv = gpu->dev->dev_private;
+
+ /* Set keepalive vote to avoid power collapse after RBBM_INT_0_STATUS is read */
+ a6xx_gpu_keepalive_vote(gpu, true);
+
+ if (irq_poll_fence(gpu))
+ goto done;
+
u32 status = gpu_read(gpu, REG_A6XX_RBBM_INT_0_STATUS);
gpu_write(gpu, REG_A6XX_RBBM_INT_CLEAR_CMD, status);
@@ -1790,6 +1924,9 @@ static irqreturn_t a6xx_irq(struct msm_gpu *gpu)
if (status & A6XX_RBBM_INT_0_MASK_CP_SW)
a6xx_preempt_irq(gpu);
+done:
+ a6xx_gpu_keepalive_vote(gpu, false);
+
return IRQ_HANDLED;
}
@@ -2179,16 +2316,7 @@ static int a6xx_gmu_get_timestamp(struct msm_gpu *gpu, uint64_t *value)
struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
- mutex_lock(&a6xx_gpu->gmu.lock);
-
- /* Force the GPU power on so we can read this register */
- a6xx_gmu_set_oob(&a6xx_gpu->gmu, GMU_OOB_PERFCOUNTER_SET);
-
- *value = gpu_read64(gpu, REG_A6XX_CP_ALWAYS_ON_COUNTER);
-
- a6xx_gmu_clear_oob(&a6xx_gpu->gmu, GMU_OOB_PERFCOUNTER_SET);
-
- mutex_unlock(&a6xx_gpu->gmu.lock);
+ *value = read_gmu_ao_counter(a6xx_gpu);
return 0;
}
@@ -2298,18 +2426,36 @@ static uint32_t a6xx_get_rptr(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
if (adreno_gpu->base.hw_apriv || a6xx_gpu->has_whereami)
return a6xx_gpu->shadow[ring->id];
+ /*
+ * This is true only on an A6XX_GEN1 with GMU, has IFPC enabled and a super old SQE firmware
+ * without 'whereami' support
+ */
+ WARN_ONCE((to_adreno_gpu(gpu)->info->quirks & ADRENO_QUIRK_IFPC),
+ "Can't read CP_RB_RPTR register reliably\n");
+
return ring->memptrs->rptr = gpu_read(gpu, REG_A6XX_CP_RB_RPTR);
}
static bool a6xx_progress(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
{
- struct msm_cp_state cp_state = {
+ struct msm_cp_state cp_state;
+ bool progress;
+
+ /*
+ * With IFPC, KMD doesn't know whether GX power domain is collapsed
+ * or not. So, we can't blindly read the below registers in GX domain.
+ * Lets trust the hang detection in HW and lie to the caller that
+ * there was progress.
+ */
+ if (to_adreno_gpu(gpu)->info->quirks & ADRENO_QUIRK_IFPC)
+ return true;
+
+ cp_state = (struct msm_cp_state) {
.ib1_base = gpu_read64(gpu, REG_A6XX_CP_IB1_BASE),
.ib2_base = gpu_read64(gpu, REG_A6XX_CP_IB2_BASE),
.ib1_rem = gpu_read(gpu, REG_A6XX_CP_IB1_REM_SIZE),
.ib2_rem = gpu_read(gpu, REG_A6XX_CP_IB2_REM_SIZE),
};
- bool progress;
/*
* Adjust the remaining data to account for what has already been
@@ -2408,6 +2554,7 @@ static const struct adreno_gpu_funcs funcs = {
.create_private_vm = a6xx_create_private_vm,
.get_rptr = a6xx_get_rptr,
.progress = a6xx_progress,
+ .sysprof_setup = a6xx_gmu_sysprof_setup,
},
.get_timestamp = a6xx_gmu_get_timestamp,
};
@@ -2468,6 +2615,7 @@ static const struct adreno_gpu_funcs funcs_a7xx = {
.create_private_vm = a6xx_create_private_vm,
.get_rptr = a6xx_get_rptr,
.progress = a6xx_progress,
+ .sysprof_setup = a6xx_gmu_sysprof_setup,
},
.get_timestamp = a6xx_gmu_get_timestamp,
};
diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.h b/drivers/gpu/drm/msm/adreno/a6xx_gpu.h
index 6e71f617fc3d..0b17d36c36a9 100644
--- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.h
+++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.h
@@ -45,6 +45,7 @@ struct a6xx_info {
const struct adreno_reglist *hwcg;
const struct adreno_protect *protect;
const struct adreno_reglist_list *pwrup_reglist;
+ const struct adreno_reglist_list *ifpc_reglist;
u32 gmu_chipid;
u32 gmu_cgc_mode;
u32 prim_fifo_threshold;
@@ -254,6 +255,7 @@ void a6xx_gmu_clear_oob(struct a6xx_gmu *gmu, enum a6xx_gmu_oob_state state);
int a6xx_gmu_init(struct a6xx_gpu *a6xx_gpu, struct device_node *node);
int a6xx_gmu_wrapper_init(struct a6xx_gpu *a6xx_gpu, struct device_node *node);
void a6xx_gmu_remove(struct a6xx_gpu *a6xx_gpu);
+void a6xx_gmu_sysprof_setup(struct msm_gpu *gpu);
void a6xx_preempt_init(struct msm_gpu *gpu);
void a6xx_preempt_hw_init(struct msm_gpu *gpu);
@@ -295,5 +297,6 @@ int a6xx_gpu_state_put(struct msm_gpu_state *state);
void a6xx_bus_clear_pending_transactions(struct adreno_gpu *adreno_gpu, bool gx_off);
void a6xx_gpu_sw_reset(struct msm_gpu *gpu, bool assert);
+int a6xx_fenced_write(struct a6xx_gpu *gpu, u32 offset, u64 value, u32 mask, bool is_64b);
#endif /* __A6XX_GPU_H__ */
diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c b/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c
index d5d1271fce61..4c7f3c642f6a 100644
--- a/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c
+++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c
@@ -1586,8 +1586,7 @@ struct msm_gpu_state *a6xx_gpu_state_get(struct msm_gpu *gpu)
struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
struct a6xx_gpu_state *a6xx_state = kzalloc(sizeof(*a6xx_state),
GFP_KERNEL);
- bool stalled = !!(gpu_read(gpu, REG_A6XX_RBBM_STATUS3) &
- A6XX_RBBM_STATUS3_SMMU_STALLED_ON_FAULT);
+ bool stalled;
if (!a6xx_state)
return ERR_PTR(-ENOMEM);
@@ -1608,15 +1607,20 @@ struct msm_gpu_state *a6xx_gpu_state_get(struct msm_gpu *gpu)
}
/* If GX isn't on the rest of the data isn't going to be accessible */
- if (!adreno_has_gmu_wrapper(adreno_gpu) && !a6xx_gmu_gx_is_on(&a6xx_gpu->gmu))
+ if (!a6xx_gmu_gx_is_on(&a6xx_gpu->gmu))
return &a6xx_state->base;
+ /* Halt SQE first */
+ gpu_write(gpu, REG_A6XX_CP_SQE_CNTL, 3);
+
/* Get the banks of indexed registers */
if (adreno_is_a7xx(adreno_gpu))
a7xx_get_indexed_registers(gpu, a6xx_state);
else
a6xx_get_indexed_registers(gpu, a6xx_state);
+ stalled = !!(gpu_read(gpu, REG_A6XX_RBBM_STATUS3) &
+ A6XX_RBBM_STATUS3_SMMU_STALLED_ON_FAULT);
/*
* Try to initialize the crashdumper, if we are not dumping state
* with the SMMU stalled. The crashdumper needs memory access to
diff --git a/drivers/gpu/drm/msm/adreno/a6xx_hfi.c b/drivers/gpu/drm/msm/adreno/a6xx_hfi.c
index 8e69b1e84657..550de6ad68ef 100644
--- a/drivers/gpu/drm/msm/adreno/a6xx_hfi.c
+++ b/drivers/gpu/drm/msm/adreno/a6xx_hfi.c
@@ -21,6 +21,7 @@ static const char * const a6xx_hfi_msg_id[] = {
HFI_MSG_ID(HFI_H2F_MSG_PERF_TABLE),
HFI_MSG_ID(HFI_H2F_MSG_TEST),
HFI_MSG_ID(HFI_H2F_MSG_START),
+ HFI_MSG_ID(HFI_H2F_FEATURE_CTRL),
HFI_MSG_ID(HFI_H2F_MSG_CORE_FW_START),
HFI_MSG_ID(HFI_H2F_MSG_GX_BW_PERF_VOTE),
HFI_MSG_ID(HFI_H2F_MSG_PREPARE_SLUMBER),
@@ -765,23 +766,40 @@ send:
NULL, 0);
}
+static int a6xx_hfi_feature_ctrl_msg(struct a6xx_gmu *gmu, u32 feature, u32 enable, u32 data)
+{
+ struct a6xx_hfi_msg_feature_ctrl msg = {
+ .feature = feature,
+ .enable = enable,
+ .data = data,
+ };
+
+ return a6xx_hfi_send_msg(gmu, HFI_H2F_FEATURE_CTRL, &msg, sizeof(msg), NULL, 0);
+}
+
+#define HFI_FEATURE_IFPC 9
+#define IFPC_LONG_HYST 0x1680
+
+static int a6xx_hfi_enable_ifpc(struct a6xx_gmu *gmu)
+{
+ if (gmu->idle_level != GMU_IDLE_STATE_IFPC)
+ return 0;
+
+ return a6xx_hfi_feature_ctrl_msg(gmu, HFI_FEATURE_IFPC, 1, IFPC_LONG_HYST);
+}
+
#define HFI_FEATURE_ACD 12
static int a6xx_hfi_enable_acd(struct a6xx_gmu *gmu)
{
struct a6xx_hfi_acd_table *acd_table = &gmu->acd_table;
- struct a6xx_hfi_msg_feature_ctrl msg = {
- .feature = HFI_FEATURE_ACD,
- .enable = 1,
- .data = 0,
- };
int ret;
if (!acd_table->enable_by_level)
return 0;
/* Enable ACD feature at GMU */
- ret = a6xx_hfi_send_msg(gmu, HFI_H2F_FEATURE_CTRL, &msg, sizeof(msg), NULL, 0);
+ ret = a6xx_hfi_feature_ctrl_msg(gmu, HFI_FEATURE_ACD, 1, 0);
if (ret) {
DRM_DEV_ERROR(gmu->dev, "Unable to enable ACD (%d)\n", ret);
return ret;
@@ -898,6 +916,10 @@ int a6xx_hfi_start(struct a6xx_gmu *gmu, int boot_state)
if (ret)
return ret;
+ ret = a6xx_hfi_enable_ifpc(gmu);
+ if (ret)
+ return ret;
+
ret = a6xx_hfi_send_core_fw_start(gmu);
if (ret)
return ret;
diff --git a/drivers/gpu/drm/msm/adreno/a6xx_preempt.c b/drivers/gpu/drm/msm/adreno/a6xx_preempt.c
index 6a12a35dabff..afc5f4aa3b17 100644
--- a/drivers/gpu/drm/msm/adreno/a6xx_preempt.c
+++ b/drivers/gpu/drm/msm/adreno/a6xx_preempt.c
@@ -41,7 +41,7 @@ static inline void set_preempt_state(struct a6xx_gpu *gpu,
}
/* Write the most recent wptr for the given ring into the hardware */
-static inline void update_wptr(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
+static inline void update_wptr(struct a6xx_gpu *a6xx_gpu, struct msm_ringbuffer *ring)
{
unsigned long flags;
uint32_t wptr;
@@ -51,7 +51,7 @@ static inline void update_wptr(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
if (ring->restore_wptr) {
wptr = get_wptr(ring);
- gpu_write(gpu, REG_A6XX_CP_RB_WPTR, wptr);
+ a6xx_fenced_write(a6xx_gpu, REG_A6XX_CP_RB_WPTR, wptr, BIT(0), false);
ring->restore_wptr = false;
}
@@ -111,9 +111,9 @@ static void preempt_prepare_postamble(struct a6xx_gpu *a6xx_gpu)
postamble[count++] = PKT7(CP_WAIT_REG_MEM, 6);
postamble[count++] = CP_WAIT_REG_MEM_0_FUNCTION(WRITE_EQ);
- postamble[count++] = CP_WAIT_REG_MEM_1_POLL_ADDR_LO(
+ postamble[count++] = CP_WAIT_REG_MEM_POLL_ADDR_LO(
REG_A6XX_RBBM_PERFCTR_SRAM_INIT_STATUS);
- postamble[count++] = CP_WAIT_REG_MEM_2_POLL_ADDR_HI(0);
+ postamble[count++] = CP_WAIT_REG_MEM_POLL_ADDR_HI(0);
postamble[count++] = CP_WAIT_REG_MEM_3_REF(0x1);
postamble[count++] = CP_WAIT_REG_MEM_4_MASK(0x1);
postamble[count++] = CP_WAIT_REG_MEM_5_DELAY_LOOP_CYCLES(0);
@@ -136,6 +136,21 @@ static void preempt_disable_postamble(struct a6xx_gpu *a6xx_gpu)
a6xx_gpu->postamble_enabled = false;
}
+/*
+ * Set preemption keepalive vote. Please note that this vote is different from the one used in
+ * a6xx_irq()
+ */
+static void a6xx_preempt_keepalive_vote(struct msm_gpu *gpu, bool on)
+{
+ struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
+ struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
+
+ if (adreno_has_gmu_wrapper(adreno_gpu))
+ return;
+
+ gmu_write(&a6xx_gpu->gmu, REG_A6XX_GMU_PWR_COL_PREEMPT_KEEPALIVE, on);
+}
+
void a6xx_preempt_irq(struct msm_gpu *gpu)
{
uint32_t status;
@@ -172,10 +187,12 @@ void a6xx_preempt_irq(struct msm_gpu *gpu)
set_preempt_state(a6xx_gpu, PREEMPT_FINISH);
- update_wptr(gpu, a6xx_gpu->cur_ring);
+ update_wptr(a6xx_gpu, a6xx_gpu->cur_ring);
set_preempt_state(a6xx_gpu, PREEMPT_NONE);
+ a6xx_preempt_keepalive_vote(gpu, false);
+
trace_msm_gpu_preemption_irq(a6xx_gpu->cur_ring->id);
/*
@@ -268,7 +285,7 @@ void a6xx_preempt_trigger(struct msm_gpu *gpu)
*/
if (!ring || (a6xx_gpu->cur_ring == ring)) {
set_preempt_state(a6xx_gpu, PREEMPT_FINISH);
- update_wptr(gpu, a6xx_gpu->cur_ring);
+ update_wptr(a6xx_gpu, a6xx_gpu->cur_ring);
set_preempt_state(a6xx_gpu, PREEMPT_NONE);
spin_unlock_irqrestore(&a6xx_gpu->eval_lock, flags);
return;
@@ -302,13 +319,16 @@ void a6xx_preempt_trigger(struct msm_gpu *gpu)
spin_unlock_irqrestore(&ring->preempt_lock, flags);
- gpu_write64(gpu,
- REG_A6XX_CP_CONTEXT_SWITCH_SMMU_INFO,
- a6xx_gpu->preempt_smmu_iova[ring->id]);
+ /* Set the keepalive bit to keep the GPU ON until preemption is complete */
+ a6xx_preempt_keepalive_vote(gpu, true);
+
+ a6xx_fenced_write(a6xx_gpu,
+ REG_A6XX_CP_CONTEXT_SWITCH_SMMU_INFO, a6xx_gpu->preempt_smmu_iova[ring->id],
+ BIT(1), true);
- gpu_write64(gpu,
+ a6xx_fenced_write(a6xx_gpu,
REG_A6XX_CP_CONTEXT_SWITCH_PRIV_NON_SECURE_RESTORE_ADDR,
- a6xx_gpu->preempt_iova[ring->id]);
+ a6xx_gpu->preempt_iova[ring->id], BIT(1), true);
a6xx_gpu->next_ring = ring;
@@ -328,7 +348,7 @@ void a6xx_preempt_trigger(struct msm_gpu *gpu)
set_preempt_state(a6xx_gpu, PREEMPT_TRIGGERED);
/* Trigger the preemption */
- gpu_write(gpu, REG_A6XX_CP_CONTEXT_SWITCH_CNTL, cntl);
+ a6xx_fenced_write(a6xx_gpu, REG_A6XX_CP_CONTEXT_SWITCH_CNTL, cntl, BIT(1), false);
}
static int preempt_init_ring(struct a6xx_gpu *a6xx_gpu,
diff --git a/drivers/gpu/drm/msm/adreno/adreno_device.c b/drivers/gpu/drm/msm/adreno/adreno_device.c
index 50945bfe9b49..28f744f3caf7 100644
--- a/drivers/gpu/drm/msm/adreno/adreno_device.c
+++ b/drivers/gpu/drm/msm/adreno/adreno_device.c
@@ -24,6 +24,10 @@ bool disable_acd;
MODULE_PARM_DESC(disable_acd, "Forcefully disable GPU ACD");
module_param_unsafe(disable_acd, bool, 0400);
+static bool skip_gpu;
+MODULE_PARM_DESC(no_gpu, "Disable GPU driver register (0=enable GPU driver register (default), 1=skip GPU driver register");
+module_param(skip_gpu, bool, 0400);
+
extern const struct adreno_gpulist a2xx_gpulist;
extern const struct adreno_gpulist a3xx_gpulist;
extern const struct adreno_gpulist a4xx_gpulist;
@@ -184,6 +188,9 @@ bool adreno_has_gpu(struct device_node *node)
uint32_t chip_id;
int ret;
+ if (skip_gpu)
+ return false;
+
ret = find_chipid(node, &chip_id);
if (ret)
return false;
@@ -404,10 +411,16 @@ static struct platform_driver adreno_driver = {
void __init adreno_register(void)
{
+ if (skip_gpu)
+ return;
+
platform_driver_register(&adreno_driver);
}
void __exit adreno_unregister(void)
{
+ if (skip_gpu)
+ return;
+
platform_driver_unregister(&adreno_driver);
}
diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.c b/drivers/gpu/drm/msm/adreno/adreno_gpu.c
index f1230465bf0d..afaa3cfefd35 100644
--- a/drivers/gpu/drm/msm/adreno/adreno_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.c
@@ -10,7 +10,7 @@
#include <linux/interconnect.h>
#include <linux/firmware/qcom/qcom_scm.h>
#include <linux/kernel.h>
-#include <linux/of_address.h>
+#include <linux/of_reserved_mem.h>
#include <linux/pm_opp.h>
#include <linux/slab.h>
#include <linux/soc/qcom/mdt_loader.h>
@@ -33,7 +33,7 @@ static int zap_shader_load_mdt(struct msm_gpu *gpu, const char *fwname,
struct device *dev = &gpu->pdev->dev;
const struct firmware *fw;
const char *signed_fwname = NULL;
- struct device_node *np, *mem_np;
+ struct device_node *np;
struct resource r;
phys_addr_t mem_phys;
ssize_t mem_size;
@@ -51,18 +51,11 @@ static int zap_shader_load_mdt(struct msm_gpu *gpu, const char *fwname,
return -ENODEV;
}
- mem_np = of_parse_phandle(np, "memory-region", 0);
- of_node_put(np);
- if (!mem_np) {
+ ret = of_reserved_mem_region_to_resource(np, 0, &r);
+ if (ret) {
zap_available = false;
- return -EINVAL;
- }
-
- ret = of_address_to_resource(mem_np, 0, &r);
- of_node_put(mem_np);
- if (ret)
return ret;
-
+ }
mem_phys = r.start;
/*
@@ -209,9 +202,7 @@ adreno_iommu_create_vm(struct msm_gpu *gpu,
u64 start, size;
mmu = msm_iommu_gpu_new(&pdev->dev, gpu, quirks);
- if (!mmu)
- return ERR_PTR(-ENODEV);
- else if (IS_ERR_OR_NULL(mmu))
+ if (IS_ERR(mmu))
return ERR_CAST(mmu);
geometry = msm_iommu_get_geometry(mmu);
diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.h b/drivers/gpu/drm/msm/adreno/adreno_gpu.h
index 9dc93c247196..390fa6720d9b 100644
--- a/drivers/gpu/drm/msm/adreno/adreno_gpu.h
+++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.h
@@ -59,6 +59,7 @@ enum adreno_family {
#define ADRENO_QUIRK_HAS_CACHED_COHERENT BIT(4)
#define ADRENO_QUIRK_PREEMPTION BIT(5)
#define ADRENO_QUIRK_4GB_VA BIT(6)
+#define ADRENO_QUIRK_IFPC BIT(7)
/* Helper for formating the chip_id in the way that userspace tools like
* crashdec expect.
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_core_perf.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_core_perf.c
index 0fb5789c60d0..13cc658065c5 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_core_perf.c
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_core_perf.c
@@ -32,6 +32,26 @@ enum dpu_perf_mode {
};
/**
+ * dpu_core_perf_adjusted_mode_clk - Adjust given mode clock rate according to
+ * the perf clock factor.
+ * @crtc_clk_rate - Unadjusted mode clock rate
+ * @perf_cfg: performance configuration
+ */
+u64 dpu_core_perf_adjusted_mode_clk(u64 mode_clk_rate,
+ const struct dpu_perf_cfg *perf_cfg)
+{
+ u32 clk_factor;
+
+ clk_factor = perf_cfg->clk_inefficiency_factor;
+ if (clk_factor) {
+ mode_clk_rate *= clk_factor;
+ do_div(mode_clk_rate, 100);
+ }
+
+ return mode_clk_rate;
+}
+
+/**
* _dpu_core_perf_calc_bw() - to calculate BW per crtc
* @perf_cfg: performance configuration
* @crtc: pointer to a crtc
@@ -75,28 +95,21 @@ static u64 _dpu_core_perf_calc_clk(const struct dpu_perf_cfg *perf_cfg,
struct drm_plane *plane;
struct dpu_plane_state *pstate;
struct drm_display_mode *mode;
- u64 crtc_clk;
- u32 clk_factor;
+ u64 mode_clk;
mode = &state->adjusted_mode;
- crtc_clk = (u64)mode->vtotal * mode->hdisplay * drm_mode_vrefresh(mode);
+ mode_clk = (u64)mode->vtotal * mode->hdisplay * drm_mode_vrefresh(mode);
drm_atomic_crtc_for_each_plane(plane, crtc) {
pstate = to_dpu_plane_state(plane->state);
if (!pstate)
continue;
- crtc_clk = max(pstate->plane_clk, crtc_clk);
- }
-
- clk_factor = perf_cfg->clk_inefficiency_factor;
- if (clk_factor) {
- crtc_clk *= clk_factor;
- do_div(crtc_clk, 100);
+ mode_clk = max(pstate->plane_clk, mode_clk);
}
- return crtc_clk;
+ return dpu_core_perf_adjusted_mode_clk(mode_clk, perf_cfg);
}
static struct dpu_kms *_dpu_crtc_get_kms(struct drm_crtc *crtc)
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_core_perf.h b/drivers/gpu/drm/msm/disp/dpu1/dpu_core_perf.h
index d2f21d34e501..3740bc97422c 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_core_perf.h
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_core_perf.h
@@ -54,6 +54,9 @@ struct dpu_core_perf {
u32 fix_core_ab_vote;
};
+u64 dpu_core_perf_adjusted_mode_clk(u64 clk_rate,
+ const struct dpu_perf_cfg *perf_cfg);
+
int dpu_core_perf_crtc_check(struct drm_crtc *crtc,
struct drm_crtc_state *state);
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_crtc.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_crtc.c
index 94912b4708fb..4b970a59deaf 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_crtc.c
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_crtc.c
@@ -377,11 +377,10 @@ static void _dpu_crtc_setup_blend_cfg(struct dpu_crtc_mixer *mixer,
static void _dpu_crtc_program_lm_output_roi(struct drm_crtc *crtc)
{
struct dpu_crtc_state *crtc_state;
- int lm_idx, lm_horiz_position;
+ int lm_idx;
crtc_state = to_dpu_crtc_state(crtc->state);
- lm_horiz_position = 0;
for (lm_idx = 0; lm_idx < crtc_state->num_mixers; lm_idx++) {
const struct drm_rect *lm_roi = &crtc_state->lm_bounds[lm_idx];
struct dpu_hw_mixer *hw_lm = crtc_state->mixers[lm_idx].hw_lm;
@@ -392,7 +391,7 @@ static void _dpu_crtc_program_lm_output_roi(struct drm_crtc *crtc)
cfg.out_width = drm_rect_width(lm_roi);
cfg.out_height = drm_rect_height(lm_roi);
- cfg.right_mixer = lm_horiz_position++;
+ cfg.right_mixer = lm_idx & 0x1;
cfg.flags = 0;
hw_lm->ops.setup_mixer_out(hw_lm, &cfg);
}
@@ -1534,6 +1533,7 @@ static enum drm_mode_status dpu_crtc_mode_valid(struct drm_crtc *crtc,
const struct drm_display_mode *mode)
{
struct dpu_kms *dpu_kms = _dpu_crtc_get_kms(crtc);
+ u64 adjusted_mode_clk;
/* if there is no 3d_mux block we cannot merge LMs so we cannot
* split the large layer into 2 LMs, filter out such modes
@@ -1541,6 +1541,17 @@ static enum drm_mode_status dpu_crtc_mode_valid(struct drm_crtc *crtc,
if (!dpu_kms->catalog->caps->has_3d_merge &&
mode->hdisplay > dpu_kms->catalog->caps->max_mixer_width)
return MODE_BAD_HVALUE;
+
+ adjusted_mode_clk = dpu_core_perf_adjusted_mode_clk(mode->clock,
+ dpu_kms->perf.perf_cfg);
+
+ /*
+ * The given mode, adjusted for the perf clock factor, should not exceed
+ * the max core clock rate
+ */
+ if (dpu_kms->perf.max_core_clk_rate < adjusted_mode_clk * 1000)
+ return MODE_CLOCK_HIGH;
+
/*
* max crtc width is equal to the max mixer width * 2 and max height is 4K
*/
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_wb.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_wb.c
index 56a5b596554d..46f348972a97 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_wb.c
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_wb.c
@@ -446,7 +446,7 @@ static void _dpu_encoder_phys_wb_handle_wbdone_timeout(
static int dpu_encoder_phys_wb_wait_for_commit_done(
struct dpu_encoder_phys *phys_enc)
{
- unsigned long ret;
+ int ret;
struct dpu_encoder_wait_info wait_info;
struct dpu_encoder_phys_wb *wb_enc = to_dpu_encoder_phys_wb(phys_enc);
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c
index e824cd64fd3f..6641455c4ec6 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c
@@ -338,7 +338,6 @@ static const struct dpu_sspp_sub_blks dpu_dma_sblk = _DMA_SBLK();
*************************************************************/
static const struct dpu_lm_sub_blks msm8998_lm_sblk = {
- .maxwidth = DEFAULT_DPU_OUTPUT_LINE_WIDTH,
.maxblendstages = 7, /* excluding base layer */
.blendstage_base = { /* offsets relative to mixer base */
0x20, 0x50, 0x80, 0xb0, 0x230,
@@ -347,7 +346,6 @@ static const struct dpu_lm_sub_blks msm8998_lm_sblk = {
};
static const struct dpu_lm_sub_blks sdm845_lm_sblk = {
- .maxwidth = DEFAULT_DPU_OUTPUT_LINE_WIDTH,
.maxblendstages = 11, /* excluding base layer */
.blendstage_base = { /* offsets relative to mixer base */
0x20, 0x38, 0x50, 0x68, 0x80, 0x98,
@@ -356,7 +354,6 @@ static const struct dpu_lm_sub_blks sdm845_lm_sblk = {
};
static const struct dpu_lm_sub_blks sc7180_lm_sblk = {
- .maxwidth = DEFAULT_DPU_OUTPUT_LINE_WIDTH,
.maxblendstages = 7, /* excluding base layer */
.blendstage_base = { /* offsets relative to mixer base */
0x20, 0x38, 0x50, 0x68, 0x80, 0x98, 0xb0
@@ -364,7 +361,6 @@ static const struct dpu_lm_sub_blks sc7180_lm_sblk = {
};
static const struct dpu_lm_sub_blks sm8750_lm_sblk = {
- .maxwidth = DEFAULT_DPU_OUTPUT_LINE_WIDTH,
.maxblendstages = 11, /* excluding base layer */
.blendstage_base = { /* offsets relative to mixer base */
/* 0x40 + n*0x30 */
@@ -374,7 +370,6 @@ static const struct dpu_lm_sub_blks sm8750_lm_sblk = {
};
static const struct dpu_lm_sub_blks qcm2290_lm_sblk = {
- .maxwidth = DEFAULT_DPU_LINE_WIDTH,
.maxblendstages = 4, /* excluding base layer */
.blendstage_base = { /* offsets relative to mixer base */
0x20, 0x38, 0x50, 0x68
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.h b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.h
index a78bb2c334e3..f0768f54e9b3 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.h
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.h
@@ -307,7 +307,6 @@ struct dpu_sspp_sub_blks {
* @blendstage_base: Blend-stage register base offset
*/
struct dpu_lm_sub_blks {
- u32 maxwidth;
u32 maxblendstages;
u32 blendstage_base[MAX_BLOCKS];
};
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c
index a306077647c3..4e5a8ecd31f7 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c
@@ -1110,7 +1110,7 @@ static int _dpu_kms_mmu_init(struct dpu_kms *dpu_kms)
{
struct drm_gpuvm *vm;
- vm = msm_kms_init_vm(dpu_kms->dev);
+ vm = msm_kms_init_vm(dpu_kms->dev, dpu_kms->dev->dev->parent);
if (IS_ERR(vm))
return PTR_ERR(vm);
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c
index 6859e8ef6b05..f54cf0faa1c7 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c
@@ -922,6 +922,9 @@ static int dpu_plane_is_multirect_capable(struct dpu_hw_sspp *sspp,
if (MSM_FORMAT_IS_YUV(fmt))
return false;
+ if (!sspp)
+ return true;
+
if (!test_bit(DPU_SSPP_SMART_DMA_V1, &sspp->cap->features) &&
!test_bit(DPU_SSPP_SMART_DMA_V2, &sspp->cap->features))
return false;
@@ -1028,6 +1031,7 @@ static int dpu_plane_try_multirect_shared(struct dpu_plane_state *pstate,
prev_pipe->multirect_mode != DPU_SSPP_MULTIRECT_NONE)
return false;
+ /* Do not validate SSPP of current plane when it is not ready */
if (!dpu_plane_is_multirect_capable(pipe->sspp, pipe_cfg, fmt) ||
!dpu_plane_is_multirect_capable(prev_pipe->sspp, prev_pipe_cfg, prev_fmt))
return false;
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_rm.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_rm.c
index 25382120cb1a..2c77c74fac0f 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_rm.c
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_rm.c
@@ -865,6 +865,21 @@ void dpu_rm_release_all_sspp(struct dpu_global_state *global_state,
ARRAY_SIZE(global_state->sspp_to_crtc_id), crtc_id);
}
+static char *dpu_hw_blk_type_name[] = {
+ [DPU_HW_BLK_TOP] = "TOP",
+ [DPU_HW_BLK_SSPP] = "SSPP",
+ [DPU_HW_BLK_LM] = "LM",
+ [DPU_HW_BLK_CTL] = "CTL",
+ [DPU_HW_BLK_PINGPONG] = "pingpong",
+ [DPU_HW_BLK_INTF] = "INTF",
+ [DPU_HW_BLK_WB] = "WB",
+ [DPU_HW_BLK_DSPP] = "DSPP",
+ [DPU_HW_BLK_MERGE_3D] = "merge_3d",
+ [DPU_HW_BLK_DSC] = "DSC",
+ [DPU_HW_BLK_CDM] = "CDM",
+ [DPU_HW_BLK_MAX] = "unknown",
+};
+
/**
* dpu_rm_get_assigned_resources - Get hw resources of the given type that are
* assigned to this encoder
@@ -946,13 +961,13 @@ int dpu_rm_get_assigned_resources(struct dpu_rm *rm,
}
if (num_blks == blks_size) {
- DPU_ERROR("More than %d resources assigned to crtc %d\n",
- blks_size, crtc_id);
+ DPU_ERROR("More than %d %s assigned to crtc %d\n",
+ blks_size, dpu_hw_blk_type_name[type], crtc_id);
break;
}
if (!hw_blks[i]) {
- DPU_ERROR("Allocated resource %d unavailable to assign to crtc %d\n",
- type, crtc_id);
+ DPU_ERROR("%s unavailable to assign to crtc %d\n",
+ dpu_hw_blk_type_name[type], crtc_id);
break;
}
blks[num_blks++] = hw_blks[i];
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_writeback.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_writeback.c
index 8ff496082902..cd73468e369a 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_writeback.c
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_writeback.c
@@ -80,7 +80,6 @@ static int dpu_wb_conn_atomic_check(struct drm_connector *connector,
static const struct drm_connector_funcs dpu_wb_conn_funcs = {
.reset = drm_atomic_helper_connector_reset,
.fill_modes = drm_helper_probe_single_connector_modes,
- .destroy = drm_connector_cleanup,
.atomic_duplicate_state = drm_atomic_helper_connector_duplicate_state,
.atomic_destroy_state = drm_atomic_helper_connector_destroy_state,
};
@@ -131,12 +130,9 @@ int dpu_writeback_init(struct drm_device *dev, struct drm_encoder *enc,
drm_connector_helper_add(&dpu_wb_conn->base.base, &dpu_wb_conn_helper_funcs);
- /* DPU initializes the encoder and sets it up completely for writeback
- * cases and hence should use the new API drm_writeback_connector_init_with_encoder
- * to initialize the writeback connector
- */
- rc = drm_writeback_connector_init_with_encoder(dev, &dpu_wb_conn->base, enc,
- &dpu_wb_conn_funcs, format_list, num_formats);
+ rc = drmm_writeback_connector_init(dev, &dpu_wb_conn->base,
+ &dpu_wb_conn_funcs, enc,
+ format_list, num_formats);
if (!rc)
dpu_wb_conn->wb_enc = enc;
diff --git a/drivers/gpu/drm/msm/disp/mdp4/mdp4_kms.c b/drivers/gpu/drm/msm/disp/mdp4/mdp4_kms.c
index 0952c7f18abd..809ca191e9de 100644
--- a/drivers/gpu/drm/msm/disp/mdp4/mdp4_kms.c
+++ b/drivers/gpu/drm/msm/disp/mdp4/mdp4_kms.c
@@ -391,11 +391,9 @@ static void read_mdp_hw_revision(struct mdp4_kms *mdp4_kms,
static int mdp4_kms_init(struct drm_device *dev)
{
- struct platform_device *pdev = to_platform_device(dev->dev);
struct msm_drm_private *priv = dev->dev_private;
struct mdp4_kms *mdp4_kms = to_mdp4_kms(to_mdp_kms(priv->kms));
struct msm_kms *kms = NULL;
- struct msm_mmu *mmu;
struct drm_gpuvm *vm;
int ret;
u32 major, minor;
@@ -458,29 +456,14 @@ static int mdp4_kms_init(struct drm_device *dev)
mdp4_disable(mdp4_kms);
mdelay(16);
- mmu = msm_iommu_new(&pdev->dev, 0);
- if (IS_ERR(mmu)) {
- ret = PTR_ERR(mmu);
+ vm = msm_kms_init_vm(mdp4_kms->dev, NULL);
+ if (IS_ERR(vm)) {
+ ret = PTR_ERR(vm);
goto fail;
- } else if (!mmu) {
- DRM_DEV_INFO(dev->dev, "no iommu, fallback to phys "
- "contig buffers for scanout\n");
- vm = NULL;
- } else {
- vm = msm_gem_vm_create(dev, mmu, "mdp4",
- 0x1000, 0x100000000 - 0x1000,
- true);
-
- if (IS_ERR(vm)) {
- if (!IS_ERR(mmu))
- mmu->funcs->destroy(mmu);
- ret = PTR_ERR(vm);
- goto fail;
- }
-
- kms->vm = vm;
}
+ kms->vm = vm;
+
ret = modeset_init(mdp4_kms);
if (ret) {
DRM_DEV_ERROR(dev->dev, "modeset_init failed: %d\n", ret);
@@ -529,7 +512,7 @@ static int mdp4_probe(struct platform_device *pdev)
mdp4_kms = devm_kzalloc(dev, sizeof(*mdp4_kms), GFP_KERNEL);
if (!mdp4_kms)
- return dev_err_probe(dev, -ENOMEM, "failed to allocate kms\n");
+ return -ENOMEM;
mdp4_kms->mmio = msm_ioremap(pdev, NULL);
if (IS_ERR(mdp4_kms->mmio))
diff --git a/drivers/gpu/drm/msm/disp/mdp4/mdp4_kms.h b/drivers/gpu/drm/msm/disp/mdp4/mdp4_kms.h
index fb348583dc84..06458d4ee48c 100644
--- a/drivers/gpu/drm/msm/disp/mdp4/mdp4_kms.h
+++ b/drivers/gpu/drm/msm/disp/mdp4/mdp4_kms.h
@@ -202,6 +202,6 @@ static inline struct drm_encoder *mdp4_dsi_encoder_init(struct drm_device *dev)
}
#endif
-struct clk *mpd4_get_lcdc_clock(struct drm_device *dev);
+struct clk *mdp4_get_lcdc_clock(struct drm_device *dev);
#endif /* __MDP4_KMS_H__ */
diff --git a/drivers/gpu/drm/msm/disp/mdp4/mdp4_lcdc_encoder.c b/drivers/gpu/drm/msm/disp/mdp4/mdp4_lcdc_encoder.c
index 06a307c1272d..1051873057f6 100644
--- a/drivers/gpu/drm/msm/disp/mdp4/mdp4_lcdc_encoder.c
+++ b/drivers/gpu/drm/msm/disp/mdp4/mdp4_lcdc_encoder.c
@@ -375,7 +375,7 @@ struct drm_encoder *mdp4_lcdc_encoder_init(struct drm_device *dev)
drm_encoder_helper_add(encoder, &mdp4_lcdc_encoder_helper_funcs);
- mdp4_lcdc_encoder->lcdc_clk = mpd4_get_lcdc_clock(dev);
+ mdp4_lcdc_encoder->lcdc_clk = mdp4_get_lcdc_clock(dev);
if (IS_ERR(mdp4_lcdc_encoder->lcdc_clk)) {
DRM_DEV_ERROR(dev->dev, "failed to get lvds_clk\n");
return ERR_CAST(mdp4_lcdc_encoder->lcdc_clk);
diff --git a/drivers/gpu/drm/msm/disp/mdp4/mdp4_lvds_pll.c b/drivers/gpu/drm/msm/disp/mdp4/mdp4_lvds_pll.c
index fa2c29470510..04c49bf3d854 100644
--- a/drivers/gpu/drm/msm/disp/mdp4/mdp4_lvds_pll.c
+++ b/drivers/gpu/drm/msm/disp/mdp4/mdp4_lvds_pll.c
@@ -54,7 +54,7 @@ static const struct pll_rate *find_rate(unsigned long rate)
return &freqtbl[i-1];
}
-static int mpd4_lvds_pll_enable(struct clk_hw *hw)
+static int mdp4_lvds_pll_enable(struct clk_hw *hw)
{
struct mdp4_lvds_pll *lvds_pll = to_mdp4_lvds_pll(hw);
struct mdp4_kms *mdp4_kms = get_kms(lvds_pll);
@@ -80,7 +80,7 @@ static int mpd4_lvds_pll_enable(struct clk_hw *hw)
return 0;
}
-static void mpd4_lvds_pll_disable(struct clk_hw *hw)
+static void mdp4_lvds_pll_disable(struct clk_hw *hw)
{
struct mdp4_lvds_pll *lvds_pll = to_mdp4_lvds_pll(hw);
struct mdp4_kms *mdp4_kms = get_kms(lvds_pll);
@@ -91,21 +91,24 @@ static void mpd4_lvds_pll_disable(struct clk_hw *hw)
mdp4_write(mdp4_kms, REG_MDP4_LVDS_PHY_PLL_CTRL_0, 0x0);
}
-static unsigned long mpd4_lvds_pll_recalc_rate(struct clk_hw *hw,
+static unsigned long mdp4_lvds_pll_recalc_rate(struct clk_hw *hw,
unsigned long parent_rate)
{
struct mdp4_lvds_pll *lvds_pll = to_mdp4_lvds_pll(hw);
return lvds_pll->pixclk;
}
-static long mpd4_lvds_pll_round_rate(struct clk_hw *hw, unsigned long rate,
- unsigned long *parent_rate)
+static int mdp4_lvds_pll_determine_rate(struct clk_hw *hw,
+ struct clk_rate_request *req)
{
- const struct pll_rate *pll_rate = find_rate(rate);
- return pll_rate->rate;
+ const struct pll_rate *pll_rate = find_rate(req->rate);
+
+ req->rate = pll_rate->rate;
+
+ return 0;
}
-static int mpd4_lvds_pll_set_rate(struct clk_hw *hw, unsigned long rate,
+static int mdp4_lvds_pll_set_rate(struct clk_hw *hw, unsigned long rate,
unsigned long parent_rate)
{
struct mdp4_lvds_pll *lvds_pll = to_mdp4_lvds_pll(hw);
@@ -114,26 +117,26 @@ static int mpd4_lvds_pll_set_rate(struct clk_hw *hw, unsigned long rate,
}
-static const struct clk_ops mpd4_lvds_pll_ops = {
- .enable = mpd4_lvds_pll_enable,
- .disable = mpd4_lvds_pll_disable,
- .recalc_rate = mpd4_lvds_pll_recalc_rate,
- .round_rate = mpd4_lvds_pll_round_rate,
- .set_rate = mpd4_lvds_pll_set_rate,
+static const struct clk_ops mdp4_lvds_pll_ops = {
+ .enable = mdp4_lvds_pll_enable,
+ .disable = mdp4_lvds_pll_disable,
+ .recalc_rate = mdp4_lvds_pll_recalc_rate,
+ .determine_rate = mdp4_lvds_pll_determine_rate,
+ .set_rate = mdp4_lvds_pll_set_rate,
};
-static const struct clk_parent_data mpd4_lvds_pll_parents[] = {
+static const struct clk_parent_data mdp4_lvds_pll_parents[] = {
{ .fw_name = "pxo", .name = "pxo", },
};
static struct clk_init_data pll_init = {
- .name = "mpd4_lvds_pll",
- .ops = &mpd4_lvds_pll_ops,
- .parent_data = mpd4_lvds_pll_parents,
- .num_parents = ARRAY_SIZE(mpd4_lvds_pll_parents),
+ .name = "mdp4_lvds_pll",
+ .ops = &mdp4_lvds_pll_ops,
+ .parent_data = mdp4_lvds_pll_parents,
+ .num_parents = ARRAY_SIZE(mdp4_lvds_pll_parents),
};
-static struct clk_hw *mpd4_lvds_pll_init(struct drm_device *dev)
+static struct clk_hw *mdp4_lvds_pll_init(struct drm_device *dev)
{
struct mdp4_lvds_pll *lvds_pll;
int ret;
@@ -156,14 +159,14 @@ static struct clk_hw *mpd4_lvds_pll_init(struct drm_device *dev)
return &lvds_pll->pll_hw;
}
-struct clk *mpd4_get_lcdc_clock(struct drm_device *dev)
+struct clk *mdp4_get_lcdc_clock(struct drm_device *dev)
{
struct clk_hw *hw;
struct clk *clk;
/* TODO: do we need different pll in other cases? */
- hw = mpd4_lvds_pll_init(dev);
+ hw = mdp4_lvds_pll_init(dev);
if (IS_ERR(hw)) {
DRM_DEV_ERROR(dev->dev, "failed to register LVDS PLL\n");
return ERR_CAST(hw);
diff --git a/drivers/gpu/drm/msm/disp/mdp5/mdp5_kms.c b/drivers/gpu/drm/msm/disp/mdp5/mdp5_kms.c
index 5b6ca8dd929e..61edf6864092 100644
--- a/drivers/gpu/drm/msm/disp/mdp5/mdp5_kms.c
+++ b/drivers/gpu/drm/msm/disp/mdp5/mdp5_kms.c
@@ -534,7 +534,7 @@ static int mdp5_kms_init(struct drm_device *dev)
}
mdelay(16);
- vm = msm_kms_init_vm(mdp5_kms->dev);
+ vm = msm_kms_init_vm(mdp5_kms->dev, pdev->dev.parent);
if (IS_ERR(vm)) {
ret = PTR_ERR(vm);
goto fail;
diff --git a/drivers/gpu/drm/msm/dsi/phy/dsi_phy.h b/drivers/gpu/drm/msm/dsi/phy/dsi_phy.h
index 3cbf08231492..e391505fdaf0 100644
--- a/drivers/gpu/drm/msm/dsi/phy/dsi_phy.h
+++ b/drivers/gpu/drm/msm/dsi/phy/dsi_phy.h
@@ -109,6 +109,7 @@ struct msm_dsi_phy {
struct msm_dsi_dphy_timing timing;
const struct msm_dsi_phy_cfg *cfg;
void *tuning_cfg;
+ void *pll_data;
enum msm_dsi_phy_usecase usecase;
bool regulator_ldo_mode;
diff --git a/drivers/gpu/drm/msm/dsi/phy/dsi_phy_10nm.c b/drivers/gpu/drm/msm/dsi/phy/dsi_phy_10nm.c
index af2e30f3f842..ec486ff02c9b 100644
--- a/drivers/gpu/drm/msm/dsi/phy/dsi_phy_10nm.c
+++ b/drivers/gpu/drm/msm/dsi/phy/dsi_phy_10nm.c
@@ -444,21 +444,19 @@ static unsigned long dsi_pll_10nm_vco_recalc_rate(struct clk_hw *hw,
return (unsigned long)vco_rate;
}
-static long dsi_pll_10nm_clk_round_rate(struct clk_hw *hw,
- unsigned long rate, unsigned long *parent_rate)
+static int dsi_pll_10nm_clk_determine_rate(struct clk_hw *hw,
+ struct clk_rate_request *req)
{
struct dsi_pll_10nm *pll_10nm = to_pll_10nm(hw);
- if (rate < pll_10nm->phy->cfg->min_pll_rate)
- return pll_10nm->phy->cfg->min_pll_rate;
- else if (rate > pll_10nm->phy->cfg->max_pll_rate)
- return pll_10nm->phy->cfg->max_pll_rate;
- else
- return rate;
+ req->rate = clamp_t(unsigned long, req->rate,
+ pll_10nm->phy->cfg->min_pll_rate, pll_10nm->phy->cfg->max_pll_rate);
+
+ return 0;
}
static const struct clk_ops clk_ops_dsi_pll_10nm_vco = {
- .round_rate = dsi_pll_10nm_clk_round_rate,
+ .determine_rate = dsi_pll_10nm_clk_determine_rate,
.set_rate = dsi_pll_10nm_vco_set_rate,
.recalc_rate = dsi_pll_10nm_vco_recalc_rate,
.prepare = dsi_pll_10nm_vco_prepare,
diff --git a/drivers/gpu/drm/msm/dsi/phy/dsi_phy_14nm.c b/drivers/gpu/drm/msm/dsi/phy/dsi_phy_14nm.c
index 3a1c8ece6657..fdefcbd9c284 100644
--- a/drivers/gpu/drm/msm/dsi/phy/dsi_phy_14nm.c
+++ b/drivers/gpu/drm/msm/dsi/phy/dsi_phy_14nm.c
@@ -578,21 +578,19 @@ static void dsi_pll_14nm_vco_unprepare(struct clk_hw *hw)
pll_14nm->phy->pll_on = false;
}
-static long dsi_pll_14nm_clk_round_rate(struct clk_hw *hw,
- unsigned long rate, unsigned long *parent_rate)
+static int dsi_pll_14nm_clk_determine_rate(struct clk_hw *hw,
+ struct clk_rate_request *req)
{
struct dsi_pll_14nm *pll_14nm = to_pll_14nm(hw);
- if (rate < pll_14nm->phy->cfg->min_pll_rate)
- return pll_14nm->phy->cfg->min_pll_rate;
- else if (rate > pll_14nm->phy->cfg->max_pll_rate)
- return pll_14nm->phy->cfg->max_pll_rate;
- else
- return rate;
+ req->rate = clamp_t(unsigned long, req->rate,
+ pll_14nm->phy->cfg->min_pll_rate, pll_14nm->phy->cfg->max_pll_rate);
+
+ return 0;
}
static const struct clk_ops clk_ops_dsi_pll_14nm_vco = {
- .round_rate = dsi_pll_14nm_clk_round_rate,
+ .determine_rate = dsi_pll_14nm_clk_determine_rate,
.set_rate = dsi_pll_14nm_vco_set_rate,
.recalc_rate = dsi_pll_14nm_vco_recalc_rate,
.prepare = dsi_pll_14nm_vco_prepare,
@@ -622,18 +620,20 @@ static unsigned long dsi_pll_14nm_postdiv_recalc_rate(struct clk_hw *hw,
postdiv->flags, width);
}
-static long dsi_pll_14nm_postdiv_round_rate(struct clk_hw *hw,
- unsigned long rate,
- unsigned long *prate)
+static int dsi_pll_14nm_postdiv_determine_rate(struct clk_hw *hw,
+ struct clk_rate_request *req)
{
struct dsi_pll_14nm_postdiv *postdiv = to_pll_14nm_postdiv(hw);
struct dsi_pll_14nm *pll_14nm = postdiv->pll;
- DBG("DSI%d PLL parent rate=%lu", pll_14nm->phy->id, rate);
+ DBG("DSI%d PLL parent rate=%lu", pll_14nm->phy->id, req->rate);
- return divider_round_rate(hw, rate, prate, NULL,
- postdiv->width,
- postdiv->flags);
+ req->rate = divider_round_rate(hw, req->rate, &req->best_parent_rate,
+ NULL,
+ postdiv->width,
+ postdiv->flags);
+
+ return 0;
}
static int dsi_pll_14nm_postdiv_set_rate(struct clk_hw *hw, unsigned long rate,
@@ -680,7 +680,7 @@ static int dsi_pll_14nm_postdiv_set_rate(struct clk_hw *hw, unsigned long rate,
static const struct clk_ops clk_ops_dsi_pll_14nm_postdiv = {
.recalc_rate = dsi_pll_14nm_postdiv_recalc_rate,
- .round_rate = dsi_pll_14nm_postdiv_round_rate,
+ .determine_rate = dsi_pll_14nm_postdiv_determine_rate,
.set_rate = dsi_pll_14nm_postdiv_set_rate,
};
diff --git a/drivers/gpu/drm/msm/dsi/phy/dsi_phy_28nm.c b/drivers/gpu/drm/msm/dsi/phy/dsi_phy_28nm.c
index 90348a2af3e9..d00e415b9a99 100644
--- a/drivers/gpu/drm/msm/dsi/phy/dsi_phy_28nm.c
+++ b/drivers/gpu/drm/msm/dsi/phy/dsi_phy_28nm.c
@@ -533,21 +533,20 @@ static void dsi_pll_28nm_vco_unprepare(struct clk_hw *hw)
pll_28nm->phy->pll_on = false;
}
-static long dsi_pll_28nm_clk_round_rate(struct clk_hw *hw,
- unsigned long rate, unsigned long *parent_rate)
+static int dsi_pll_28nm_clk_determine_rate(struct clk_hw *hw,
+ struct clk_rate_request *req)
{
struct dsi_pll_28nm *pll_28nm = to_pll_28nm(hw);
- if (rate < pll_28nm->phy->cfg->min_pll_rate)
- return pll_28nm->phy->cfg->min_pll_rate;
- else if (rate > pll_28nm->phy->cfg->max_pll_rate)
- return pll_28nm->phy->cfg->max_pll_rate;
- else
- return rate;
+ req->rate = clamp_t(unsigned long, req->rate,
+ pll_28nm->phy->cfg->min_pll_rate,
+ pll_28nm->phy->cfg->max_pll_rate);
+
+ return 0;
}
static const struct clk_ops clk_ops_dsi_pll_28nm_vco_hpm = {
- .round_rate = dsi_pll_28nm_clk_round_rate,
+ .determine_rate = dsi_pll_28nm_clk_determine_rate,
.set_rate = dsi_pll_28nm_clk_set_rate,
.recalc_rate = dsi_pll_28nm_clk_recalc_rate,
.prepare = dsi_pll_28nm_vco_prepare_hpm,
@@ -556,7 +555,7 @@ static const struct clk_ops clk_ops_dsi_pll_28nm_vco_hpm = {
};
static const struct clk_ops clk_ops_dsi_pll_28nm_vco_lp = {
- .round_rate = dsi_pll_28nm_clk_round_rate,
+ .determine_rate = dsi_pll_28nm_clk_determine_rate,
.set_rate = dsi_pll_28nm_clk_set_rate,
.recalc_rate = dsi_pll_28nm_clk_recalc_rate,
.prepare = dsi_pll_28nm_vco_prepare_lp,
@@ -565,7 +564,7 @@ static const struct clk_ops clk_ops_dsi_pll_28nm_vco_lp = {
};
static const struct clk_ops clk_ops_dsi_pll_28nm_vco_8226 = {
- .round_rate = dsi_pll_28nm_clk_round_rate,
+ .determine_rate = dsi_pll_28nm_clk_determine_rate,
.set_rate = dsi_pll_28nm_clk_set_rate,
.recalc_rate = dsi_pll_28nm_clk_recalc_rate,
.prepare = dsi_pll_28nm_vco_prepare_8226,
diff --git a/drivers/gpu/drm/msm/dsi/phy/dsi_phy_28nm_8960.c b/drivers/gpu/drm/msm/dsi/phy/dsi_phy_28nm_8960.c
index f3643320ff2f..8dcce9581dc3 100644
--- a/drivers/gpu/drm/msm/dsi/phy/dsi_phy_28nm_8960.c
+++ b/drivers/gpu/drm/msm/dsi/phy/dsi_phy_28nm_8960.c
@@ -231,21 +231,19 @@ static void dsi_pll_28nm_vco_unprepare(struct clk_hw *hw)
pll_28nm->phy->pll_on = false;
}
-static long dsi_pll_28nm_clk_round_rate(struct clk_hw *hw,
- unsigned long rate, unsigned long *parent_rate)
+static int dsi_pll_28nm_clk_determine_rate(struct clk_hw *hw,
+ struct clk_rate_request *req)
{
struct dsi_pll_28nm *pll_28nm = to_pll_28nm(hw);
- if (rate < pll_28nm->phy->cfg->min_pll_rate)
- return pll_28nm->phy->cfg->min_pll_rate;
- else if (rate > pll_28nm->phy->cfg->max_pll_rate)
- return pll_28nm->phy->cfg->max_pll_rate;
- else
- return rate;
+ req->rate = clamp_t(unsigned long, req->rate,
+ pll_28nm->phy->cfg->min_pll_rate, pll_28nm->phy->cfg->max_pll_rate);
+
+ return 0;
}
static const struct clk_ops clk_ops_dsi_pll_28nm_vco = {
- .round_rate = dsi_pll_28nm_clk_round_rate,
+ .determine_rate = dsi_pll_28nm_clk_determine_rate,
.set_rate = dsi_pll_28nm_clk_set_rate,
.recalc_rate = dsi_pll_28nm_clk_recalc_rate,
.prepare = dsi_pll_28nm_vco_prepare,
@@ -296,18 +294,20 @@ static unsigned int get_vco_mul_factor(unsigned long byte_clk_rate)
return 8;
}
-static long clk_bytediv_round_rate(struct clk_hw *hw, unsigned long rate,
- unsigned long *prate)
+static int clk_bytediv_determine_rate(struct clk_hw *hw,
+ struct clk_rate_request *req)
{
unsigned long best_parent;
unsigned int factor;
- factor = get_vco_mul_factor(rate);
+ factor = get_vco_mul_factor(req->rate);
+
+ best_parent = req->rate * factor;
+ req->best_parent_rate = clk_hw_round_rate(clk_hw_get_parent(hw), best_parent);
- best_parent = rate * factor;
- *prate = clk_hw_round_rate(clk_hw_get_parent(hw), best_parent);
+ req->rate = req->best_parent_rate / factor;
- return *prate / factor;
+ return 0;
}
static int clk_bytediv_set_rate(struct clk_hw *hw, unsigned long rate,
@@ -328,7 +328,7 @@ static int clk_bytediv_set_rate(struct clk_hw *hw, unsigned long rate,
/* Our special byte clock divider ops */
static const struct clk_ops clk_bytediv_ops = {
- .round_rate = clk_bytediv_round_rate,
+ .determine_rate = clk_bytediv_determine_rate,
.set_rate = clk_bytediv_set_rate,
.recalc_rate = clk_bytediv_recalc_rate,
};
diff --git a/drivers/gpu/drm/msm/dsi/phy/dsi_phy_7nm.c b/drivers/gpu/drm/msm/dsi/phy/dsi_phy_7nm.c
index 8c98f91a5930..32f06edd21a9 100644
--- a/drivers/gpu/drm/msm/dsi/phy/dsi_phy_7nm.c
+++ b/drivers/gpu/drm/msm/dsi/phy/dsi_phy_7nm.c
@@ -90,6 +90,13 @@ struct dsi_pll_7nm {
/* protects REG_DSI_7nm_PHY_CMN_CLK_CFG1 register */
spinlock_t pclk_mux_lock;
+ /*
+ * protects REG_DSI_7nm_PHY_CMN_CTRL_0 register and pll_enable_cnt
+ * member
+ */
+ spinlock_t pll_enable_lock;
+ int pll_enable_cnt;
+
struct pll_7nm_cached_state cached_state;
struct dsi_pll_7nm *slave;
@@ -103,6 +110,9 @@ struct dsi_pll_7nm {
*/
static struct dsi_pll_7nm *pll_7nm_list[DSI_MAX];
+static void dsi_pll_enable_pll_bias(struct dsi_pll_7nm *pll);
+static void dsi_pll_disable_pll_bias(struct dsi_pll_7nm *pll);
+
static void dsi_pll_setup_config(struct dsi_pll_config *config)
{
config->ssc_freq = 31500;
@@ -340,6 +350,7 @@ static int dsi_pll_7nm_vco_set_rate(struct clk_hw *hw, unsigned long rate,
struct dsi_pll_7nm *pll_7nm = to_pll_7nm(hw);
struct dsi_pll_config config;
+ dsi_pll_enable_pll_bias(pll_7nm);
DBG("DSI PLL%d rate=%lu, parent's=%lu", pll_7nm->phy->id, rate,
parent_rate);
@@ -357,6 +368,7 @@ static int dsi_pll_7nm_vco_set_rate(struct clk_hw *hw, unsigned long rate,
dsi_pll_ssc_commit(pll_7nm, &config);
+ dsi_pll_disable_pll_bias(pll_7nm);
/* flush, ensure all register writes are done*/
wmb();
@@ -385,19 +397,47 @@ static int dsi_pll_7nm_lock_status(struct dsi_pll_7nm *pll)
static void dsi_pll_disable_pll_bias(struct dsi_pll_7nm *pll)
{
- u32 data = readl(pll->phy->base + REG_DSI_7nm_PHY_CMN_CTRL_0);
+ unsigned long flags;
+ u32 data;
+ spin_lock_irqsave(&pll->pll_enable_lock, flags);
+ --pll->pll_enable_cnt;
+ if (pll->pll_enable_cnt < 0) {
+ spin_unlock_irqrestore(&pll->pll_enable_lock, flags);
+ DRM_DEV_ERROR_RATELIMITED(&pll->phy->pdev->dev,
+ "bug: imbalance in disabling PLL bias\n");
+ return;
+ } else if (pll->pll_enable_cnt > 0) {
+ spin_unlock_irqrestore(&pll->pll_enable_lock, flags);
+ return;
+ } /* else: == 0 */
+
+ data = readl(pll->phy->base + REG_DSI_7nm_PHY_CMN_CTRL_0);
+ data &= ~DSI_7nm_PHY_CMN_CTRL_0_PLL_SHUTDOWNB;
writel(0, pll->phy->pll_base + REG_DSI_7nm_PHY_PLL_SYSTEM_MUXES);
- writel(data & ~BIT(5), pll->phy->base + REG_DSI_7nm_PHY_CMN_CTRL_0);
+ writel(data, pll->phy->base + REG_DSI_7nm_PHY_CMN_CTRL_0);
+ spin_unlock_irqrestore(&pll->pll_enable_lock, flags);
ndelay(250);
}
static void dsi_pll_enable_pll_bias(struct dsi_pll_7nm *pll)
{
- u32 data = readl(pll->phy->base + REG_DSI_7nm_PHY_CMN_CTRL_0);
+ unsigned long flags;
+ u32 data;
+
+ spin_lock_irqsave(&pll->pll_enable_lock, flags);
+ if (pll->pll_enable_cnt++) {
+ spin_unlock_irqrestore(&pll->pll_enable_lock, flags);
+ WARN_ON(pll->pll_enable_cnt == INT_MAX);
+ return;
+ }
+
+ data = readl(pll->phy->base + REG_DSI_7nm_PHY_CMN_CTRL_0);
+ data |= DSI_7nm_PHY_CMN_CTRL_0_PLL_SHUTDOWNB;
+ writel(data, pll->phy->base + REG_DSI_7nm_PHY_CMN_CTRL_0);
- writel(data | BIT(5), pll->phy->base + REG_DSI_7nm_PHY_CMN_CTRL_0);
writel(0xc0, pll->phy->pll_base + REG_DSI_7nm_PHY_PLL_SYSTEM_MUXES);
+ spin_unlock_irqrestore(&pll->pll_enable_lock, flags);
ndelay(250);
}
@@ -491,6 +531,10 @@ static int dsi_pll_7nm_vco_prepare(struct clk_hw *hw)
if (pll_7nm->slave)
dsi_pll_enable_global_clk(pll_7nm->slave);
+ writel(0x1, pll_7nm->phy->base + REG_DSI_7nm_PHY_CMN_RBUF_CTRL);
+ if (pll_7nm->slave)
+ writel(0x1, pll_7nm->slave->phy->base + REG_DSI_7nm_PHY_CMN_RBUF_CTRL);
+
error:
return rc;
}
@@ -534,6 +578,7 @@ static unsigned long dsi_pll_7nm_vco_recalc_rate(struct clk_hw *hw,
u32 dec;
u64 pll_freq, tmp64;
+ dsi_pll_enable_pll_bias(pll_7nm);
dec = readl(base + REG_DSI_7nm_PHY_PLL_DECIMAL_DIV_START_1);
dec &= 0xff;
@@ -558,24 +603,24 @@ static unsigned long dsi_pll_7nm_vco_recalc_rate(struct clk_hw *hw,
DBG("DSI PLL%d returning vco rate = %lu, dec = %x, frac = %x",
pll_7nm->phy->id, (unsigned long)vco_rate, dec, frac);
+ dsi_pll_disable_pll_bias(pll_7nm);
+
return (unsigned long)vco_rate;
}
-static long dsi_pll_7nm_clk_round_rate(struct clk_hw *hw,
- unsigned long rate, unsigned long *parent_rate)
+static int dsi_pll_7nm_clk_determine_rate(struct clk_hw *hw,
+ struct clk_rate_request *req)
{
struct dsi_pll_7nm *pll_7nm = to_pll_7nm(hw);
- if (rate < pll_7nm->phy->cfg->min_pll_rate)
- return pll_7nm->phy->cfg->min_pll_rate;
- else if (rate > pll_7nm->phy->cfg->max_pll_rate)
- return pll_7nm->phy->cfg->max_pll_rate;
- else
- return rate;
+ req->rate = clamp_t(unsigned long, req->rate,
+ pll_7nm->phy->cfg->min_pll_rate, pll_7nm->phy->cfg->max_pll_rate);
+
+ return 0;
}
static const struct clk_ops clk_ops_dsi_pll_7nm_vco = {
- .round_rate = dsi_pll_7nm_clk_round_rate,
+ .determine_rate = dsi_pll_7nm_clk_determine_rate,
.set_rate = dsi_pll_7nm_vco_set_rate,
.recalc_rate = dsi_pll_7nm_vco_recalc_rate,
.prepare = dsi_pll_7nm_vco_prepare,
@@ -593,6 +638,7 @@ static void dsi_7nm_pll_save_state(struct msm_dsi_phy *phy)
void __iomem *phy_base = pll_7nm->phy->base;
u32 cmn_clk_cfg0, cmn_clk_cfg1;
+ dsi_pll_enable_pll_bias(pll_7nm);
cached->pll_out_div = readl(pll_7nm->phy->pll_base +
REG_DSI_7nm_PHY_PLL_PLL_OUTDIV_RATE);
cached->pll_out_div &= 0x3;
@@ -604,6 +650,7 @@ static void dsi_7nm_pll_save_state(struct msm_dsi_phy *phy)
cmn_clk_cfg1 = readl(phy_base + REG_DSI_7nm_PHY_CMN_CLK_CFG1);
cached->pll_mux = FIELD_GET(DSI_7nm_PHY_CMN_CLK_CFG1_DSICLK_SEL__MASK, cmn_clk_cfg1);
+ dsi_pll_disable_pll_bias(pll_7nm);
DBG("DSI PLL%d outdiv %x bit_clk_div %x pix_clk_div %x pll_mux %x",
pll_7nm->phy->id, cached->pll_out_div, cached->bit_clk_div,
cached->pix_clk_div, cached->pll_mux);
@@ -826,8 +873,10 @@ static int dsi_pll_7nm_init(struct msm_dsi_phy *phy)
spin_lock_init(&pll_7nm->postdiv_lock);
spin_lock_init(&pll_7nm->pclk_mux_lock);
+ spin_lock_init(&pll_7nm->pll_enable_lock);
pll_7nm->phy = phy;
+ phy->pll_data = pll_7nm;
ret = pll_7nm_register(pll_7nm, phy->provided_clocks->hws);
if (ret) {
@@ -839,6 +888,12 @@ static int dsi_pll_7nm_init(struct msm_dsi_phy *phy)
/* TODO: Remove this when we have proper display handover support */
msm_dsi_phy_pll_save_state(phy);
+ /*
+ * Store also proper vco_current_rate, because its value will be used in
+ * dsi_7nm_pll_restore_state().
+ */
+ if (!dsi_pll_7nm_vco_recalc_rate(&pll_7nm->clk_hw, VCO_REF_CLK_RATE))
+ pll_7nm->vco_current_rate = pll_7nm->phy->cfg->min_pll_rate;
return 0;
}
@@ -910,8 +965,10 @@ static int dsi_7nm_phy_enable(struct msm_dsi_phy *phy,
u32 const delay_us = 5;
u32 const timeout_us = 1000;
struct msm_dsi_dphy_timing *timing = &phy->timing;
+ struct dsi_pll_7nm *pll = phy->pll_data;
void __iomem *base = phy->base;
bool less_than_1500_mhz;
+ unsigned long flags;
u32 vreg_ctrl_0, vreg_ctrl_1, lane_ctrl0;
u32 glbl_pemph_ctrl_0;
u32 glbl_str_swi_cal_sel_ctrl, glbl_hstx_str_ctrl_0;
@@ -1033,9 +1090,13 @@ static int dsi_7nm_phy_enable(struct msm_dsi_phy *phy,
glbl_rescode_bot_ctrl = 0x3c;
}
+ spin_lock_irqsave(&pll->pll_enable_lock, flags);
+ pll->pll_enable_cnt = 1;
/* de-assert digital and pll power down */
- data = BIT(6) | BIT(5);
+ data = DSI_7nm_PHY_CMN_CTRL_0_DIGTOP_PWRDN_B |
+ DSI_7nm_PHY_CMN_CTRL_0_PLL_SHUTDOWNB;
writel(data, base + REG_DSI_7nm_PHY_CMN_CTRL_0);
+ spin_unlock_irqrestore(&pll->pll_enable_lock, flags);
/* Assert PLL core reset */
writel(0x00, base + REG_DSI_7nm_PHY_CMN_PLL_CNTRL);
@@ -1148,7 +1209,9 @@ static bool dsi_7nm_set_continuous_clock(struct msm_dsi_phy *phy, bool enable)
static void dsi_7nm_phy_disable(struct msm_dsi_phy *phy)
{
+ struct dsi_pll_7nm *pll = phy->pll_data;
void __iomem *base = phy->base;
+ unsigned long flags;
u32 data;
DBG("");
@@ -1175,8 +1238,12 @@ static void dsi_7nm_phy_disable(struct msm_dsi_phy *phy)
writel(data, base + REG_DSI_7nm_PHY_CMN_CTRL_0);
writel(0, base + REG_DSI_7nm_PHY_CMN_LANE_CTRL0);
+ spin_lock_irqsave(&pll->pll_enable_lock, flags);
+ pll->pll_enable_cnt = 0;
/* Turn off all PHY blocks */
writel(0x00, base + REG_DSI_7nm_PHY_CMN_CTRL_0);
+ spin_unlock_irqrestore(&pll->pll_enable_lock, flags);
+
/* make sure phy is turned off */
wmb();
diff --git a/drivers/gpu/drm/msm/hdmi/hdmi_phy_8996.c b/drivers/gpu/drm/msm/hdmi/hdmi_phy_8996.c
index 8c8d80b59573..36e928b0fd5a 100644
--- a/drivers/gpu/drm/msm/hdmi/hdmi_phy_8996.c
+++ b/drivers/gpu/drm/msm/hdmi/hdmi_phy_8996.c
@@ -629,16 +629,12 @@ static int hdmi_8996_pll_prepare(struct clk_hw *hw)
return 0;
}
-static long hdmi_8996_pll_round_rate(struct clk_hw *hw,
- unsigned long rate,
- unsigned long *parent_rate)
+static int hdmi_8996_pll_determine_rate(struct clk_hw *hw,
+ struct clk_rate_request *req)
{
- if (rate < HDMI_PCLK_MIN_FREQ)
- return HDMI_PCLK_MIN_FREQ;
- else if (rate > HDMI_PCLK_MAX_FREQ)
- return HDMI_PCLK_MAX_FREQ;
- else
- return rate;
+ req->rate = clamp_t(unsigned long, req->rate, HDMI_PCLK_MIN_FREQ, HDMI_PCLK_MAX_FREQ);
+
+ return 0;
}
static unsigned long hdmi_8996_pll_recalc_rate(struct clk_hw *hw,
@@ -684,7 +680,7 @@ static int hdmi_8996_pll_is_enabled(struct clk_hw *hw)
static const struct clk_ops hdmi_8996_pll_ops = {
.set_rate = hdmi_8996_pll_set_clk_rate,
- .round_rate = hdmi_8996_pll_round_rate,
+ .determine_rate = hdmi_8996_pll_determine_rate,
.recalc_rate = hdmi_8996_pll_recalc_rate,
.prepare = hdmi_8996_pll_prepare,
.unprepare = hdmi_8996_pll_unprepare,
diff --git a/drivers/gpu/drm/msm/hdmi/hdmi_phy_8998.c b/drivers/gpu/drm/msm/hdmi/hdmi_phy_8998.c
index 33bb48ae58a2..a86ff3706369 100644
--- a/drivers/gpu/drm/msm/hdmi/hdmi_phy_8998.c
+++ b/drivers/gpu/drm/msm/hdmi/hdmi_phy_8998.c
@@ -646,16 +646,12 @@ static int hdmi_8998_pll_prepare(struct clk_hw *hw)
return 0;
}
-static long hdmi_8998_pll_round_rate(struct clk_hw *hw,
- unsigned long rate,
- unsigned long *parent_rate)
+static int hdmi_8998_pll_determine_rate(struct clk_hw *hw,
+ struct clk_rate_request *req)
{
- if (rate < HDMI_PCLK_MIN_FREQ)
- return HDMI_PCLK_MIN_FREQ;
- else if (rate > HDMI_PCLK_MAX_FREQ)
- return HDMI_PCLK_MAX_FREQ;
- else
- return rate;
+ req->rate = clamp_t(unsigned long, req->rate, HDMI_PCLK_MIN_FREQ, HDMI_PCLK_MAX_FREQ);
+
+ return 0;
}
static unsigned long hdmi_8998_pll_recalc_rate(struct clk_hw *hw,
@@ -688,7 +684,7 @@ static int hdmi_8998_pll_is_enabled(struct clk_hw *hw)
static const struct clk_ops hdmi_8998_pll_ops = {
.set_rate = hdmi_8998_pll_set_clk_rate,
- .round_rate = hdmi_8998_pll_round_rate,
+ .determine_rate = hdmi_8998_pll_determine_rate,
.recalc_rate = hdmi_8998_pll_recalc_rate,
.prepare = hdmi_8998_pll_prepare,
.unprepare = hdmi_8998_pll_unprepare,
diff --git a/drivers/gpu/drm/msm/hdmi/hdmi_pll_8960.c b/drivers/gpu/drm/msm/hdmi/hdmi_pll_8960.c
index 83c8781fcc3f..6ba6bbdb7e05 100644
--- a/drivers/gpu/drm/msm/hdmi/hdmi_pll_8960.c
+++ b/drivers/gpu/drm/msm/hdmi/hdmi_pll_8960.c
@@ -373,12 +373,14 @@ static unsigned long hdmi_pll_recalc_rate(struct clk_hw *hw,
return pll->pixclk;
}
-static long hdmi_pll_round_rate(struct clk_hw *hw, unsigned long rate,
- unsigned long *parent_rate)
+static int hdmi_pll_determine_rate(struct clk_hw *hw,
+ struct clk_rate_request *req)
{
- const struct pll_rate *pll_rate = find_rate(rate);
+ const struct pll_rate *pll_rate = find_rate(req->rate);
+
+ req->rate = pll_rate->rate;
- return pll_rate->rate;
+ return 0;
}
static int hdmi_pll_set_rate(struct clk_hw *hw, unsigned long rate,
@@ -402,7 +404,7 @@ static const struct clk_ops hdmi_pll_ops = {
.enable = hdmi_pll_enable,
.disable = hdmi_pll_disable,
.recalc_rate = hdmi_pll_recalc_rate,
- .round_rate = hdmi_pll_round_rate,
+ .determine_rate = hdmi_pll_determine_rate,
.set_rate = hdmi_pll_set_rate,
};
diff --git a/drivers/gpu/drm/msm/msm_drv.c b/drivers/gpu/drm/msm/msm_drv.c
index 9dcc7a596a11..7e977fec4100 100644
--- a/drivers/gpu/drm/msm/msm_drv.c
+++ b/drivers/gpu/drm/msm/msm_drv.c
@@ -826,6 +826,7 @@ static const struct file_operations fops = {
#define DRIVER_FEATURES_KMS ( \
DRIVER_GEM | \
+ DRIVER_GEM_GPUVA | \
DRIVER_ATOMIC | \
DRIVER_MODESET | \
0 )
diff --git a/drivers/gpu/drm/msm/msm_drv.h b/drivers/gpu/drm/msm/msm_drv.h
index 985db9febd98..6d847d593f1a 100644
--- a/drivers/gpu/drm/msm/msm_drv.h
+++ b/drivers/gpu/drm/msm/msm_drv.h
@@ -229,7 +229,7 @@ void msm_crtc_disable_vblank(struct drm_crtc *crtc);
int msm_register_mmu(struct drm_device *dev, struct msm_mmu *mmu);
void msm_unregister_mmu(struct drm_device *dev, struct msm_mmu *mmu);
-struct drm_gpuvm *msm_kms_init_vm(struct drm_device *dev);
+struct drm_gpuvm *msm_kms_init_vm(struct drm_device *dev, struct device *mdss_dev);
bool msm_use_mmu(struct drm_device *dev);
int msm_ioctl_gem_submit(struct drm_device *dev, void *data,
diff --git a/drivers/gpu/drm/msm/msm_gem.c b/drivers/gpu/drm/msm/msm_gem.c
index a6635ba426d4..688705a871cf 100644
--- a/drivers/gpu/drm/msm/msm_gem.c
+++ b/drivers/gpu/drm/msm/msm_gem.c
@@ -193,7 +193,7 @@ static struct page **get_pages(struct drm_gem_object *obj)
if (!msm_obj->pages) {
struct drm_device *dev = obj->dev;
struct page **p;
- int npages = obj->size >> PAGE_SHIFT;
+ size_t npages = obj->size >> PAGE_SHIFT;
p = drm_gem_get_pages(obj);
@@ -1171,7 +1171,7 @@ static int msm_gem_object_mmap(struct drm_gem_object *obj, struct vm_area_struct
/* convenience method to construct a GEM buffer object, and userspace handle */
int msm_gem_new_handle(struct drm_device *dev, struct drm_file *file,
- uint32_t size, uint32_t flags, uint32_t *handle,
+ size_t size, uint32_t flags, uint32_t *handle,
char *name)
{
struct drm_gem_object *obj;
@@ -1237,9 +1237,8 @@ static const struct drm_gem_object_funcs msm_gem_object_funcs = {
.vm_ops = &vm_ops,
};
-static int msm_gem_new_impl(struct drm_device *dev,
- uint32_t size, uint32_t flags,
- struct drm_gem_object **obj)
+static int msm_gem_new_impl(struct drm_device *dev, uint32_t flags,
+ struct drm_gem_object **obj)
{
struct msm_drm_private *priv = dev->dev_private;
struct msm_gem_object *msm_obj;
@@ -1273,7 +1272,7 @@ static int msm_gem_new_impl(struct drm_device *dev,
return 0;
}
-struct drm_gem_object *msm_gem_new(struct drm_device *dev, uint32_t size, uint32_t flags)
+struct drm_gem_object *msm_gem_new(struct drm_device *dev, size_t size, uint32_t flags)
{
struct msm_drm_private *priv = dev->dev_private;
struct msm_gem_object *msm_obj;
@@ -1288,7 +1287,7 @@ struct drm_gem_object *msm_gem_new(struct drm_device *dev, uint32_t size, uint32
if (size == 0)
return ERR_PTR(-EINVAL);
- ret = msm_gem_new_impl(dev, size, flags, &obj);
+ ret = msm_gem_new_impl(dev, flags, &obj);
if (ret)
return ERR_PTR(ret);
@@ -1328,12 +1327,12 @@ struct drm_gem_object *msm_gem_import(struct drm_device *dev,
struct msm_drm_private *priv = dev->dev_private;
struct msm_gem_object *msm_obj;
struct drm_gem_object *obj;
- uint32_t size;
- int ret, npages;
+ size_t size, npages;
+ int ret;
size = PAGE_ALIGN(dmabuf->size);
- ret = msm_gem_new_impl(dev, size, MSM_BO_WC, &obj);
+ ret = msm_gem_new_impl(dev, MSM_BO_WC, &obj);
if (ret)
return ERR_PTR(ret);
@@ -1376,7 +1375,7 @@ fail:
return ERR_PTR(ret);
}
-void *msm_gem_kernel_new(struct drm_device *dev, uint32_t size, uint32_t flags,
+void *msm_gem_kernel_new(struct drm_device *dev, size_t size, uint32_t flags,
struct drm_gpuvm *vm, struct drm_gem_object **bo,
uint64_t *iova)
{
diff --git a/drivers/gpu/drm/msm/msm_gem.h b/drivers/gpu/drm/msm/msm_gem.h
index 751c3b4965bc..a4cf31853c50 100644
--- a/drivers/gpu/drm/msm/msm_gem.h
+++ b/drivers/gpu/drm/msm/msm_gem.h
@@ -297,10 +297,10 @@ bool msm_gem_active(struct drm_gem_object *obj);
int msm_gem_cpu_prep(struct drm_gem_object *obj, uint32_t op, ktime_t *timeout);
int msm_gem_cpu_fini(struct drm_gem_object *obj);
int msm_gem_new_handle(struct drm_device *dev, struct drm_file *file,
- uint32_t size, uint32_t flags, uint32_t *handle, char *name);
+ size_t size, uint32_t flags, uint32_t *handle, char *name);
struct drm_gem_object *msm_gem_new(struct drm_device *dev,
- uint32_t size, uint32_t flags);
-void *msm_gem_kernel_new(struct drm_device *dev, uint32_t size, uint32_t flags,
+ size_t size, uint32_t flags);
+void *msm_gem_kernel_new(struct drm_device *dev, size_t size, uint32_t flags,
struct drm_gpuvm *vm, struct drm_gem_object **bo,
uint64_t *iova);
void msm_gem_kernel_put(struct drm_gem_object *bo, struct drm_gpuvm *vm);
diff --git a/drivers/gpu/drm/msm/msm_gem_prime.c b/drivers/gpu/drm/msm/msm_gem_prime.c
index c0a33ac839cb..036d34c674d9 100644
--- a/drivers/gpu/drm/msm/msm_gem_prime.c
+++ b/drivers/gpu/drm/msm/msm_gem_prime.c
@@ -15,7 +15,7 @@
struct sg_table *msm_gem_prime_get_sg_table(struct drm_gem_object *obj)
{
struct msm_gem_object *msm_obj = to_msm_bo(obj);
- int npages = obj->size >> PAGE_SHIFT;
+ size_t npages = obj->size >> PAGE_SHIFT;
if (msm_obj->flags & MSM_BO_NO_SHARE)
return ERR_PTR(-EINVAL);
diff --git a/drivers/gpu/drm/msm/msm_gem_vma.c b/drivers/gpu/drm/msm/msm_gem_vma.c
index 6df6b7c0984d..8316af1723c2 100644
--- a/drivers/gpu/drm/msm/msm_gem_vma.c
+++ b/drivers/gpu/drm/msm/msm_gem_vma.c
@@ -1030,6 +1030,7 @@ vm_bind_job_lookup_ops(struct msm_vm_bind_job *job, struct drm_msm_vm_bind *args
struct drm_device *dev = job->vm->drm;
int ret = 0;
int cnt = 0;
+ int i = -1;
if (args->nr_ops == 1) {
/* Single op case, the op is inlined: */
@@ -1063,11 +1064,12 @@ vm_bind_job_lookup_ops(struct msm_vm_bind_job *job, struct drm_msm_vm_bind *args
spin_lock(&file->table_lock);
- for (unsigned i = 0; i < args->nr_ops; i++) {
+ for (i = 0; i < args->nr_ops; i++) {
+ struct msm_vm_bind_op *op = &job->ops[i];
struct drm_gem_object *obj;
- if (!job->ops[i].handle) {
- job->ops[i].obj = NULL;
+ if (!op->handle) {
+ op->obj = NULL;
continue;
}
@@ -1075,16 +1077,22 @@ vm_bind_job_lookup_ops(struct msm_vm_bind_job *job, struct drm_msm_vm_bind *args
* normally use drm_gem_object_lookup(), but for bulk lookup
* all under single table_lock just hit object_idr directly:
*/
- obj = idr_find(&file->object_idr, job->ops[i].handle);
+ obj = idr_find(&file->object_idr, op->handle);
if (!obj) {
- ret = UERR(EINVAL, dev, "invalid handle %u at index %u\n", job->ops[i].handle, i);
+ ret = UERR(EINVAL, dev, "invalid handle %u at index %u\n", op->handle, i);
goto out_unlock;
}
drm_gem_object_get(obj);
- job->ops[i].obj = obj;
+ op->obj = obj;
cnt++;
+
+ if ((op->range + op->obj_offset) > obj->size) {
+ ret = UERR(EINVAL, dev, "invalid range: %016llx + %016llx > %016zx\n",
+ op->range, op->obj_offset, obj->size);
+ goto out_unlock;
+ }
}
*nr_bos = cnt;
@@ -1092,6 +1100,17 @@ vm_bind_job_lookup_ops(struct msm_vm_bind_job *job, struct drm_msm_vm_bind *args
out_unlock:
spin_unlock(&file->table_lock);
+ if (ret) {
+ for (; i >= 0; i--) {
+ struct msm_vm_bind_op *op = &job->ops[i];
+
+ if (!op->obj)
+ continue;
+
+ drm_gem_object_put(op->obj);
+ op->obj = NULL;
+ }
+ }
out:
return ret;
}
diff --git a/drivers/gpu/drm/msm/msm_gpu.c b/drivers/gpu/drm/msm/msm_gpu.c
index 26c5ce897cbb..17759abc46d7 100644
--- a/drivers/gpu/drm/msm/msm_gpu.c
+++ b/drivers/gpu/drm/msm/msm_gpu.c
@@ -304,7 +304,7 @@ static void crashstate_get_bos(struct msm_gpu_state *state, struct msm_gem_submi
sizeof(struct msm_gpu_state_bo), GFP_KERNEL);
for (int i = 0; state->bos && i < submit->nr_bos; i++) {
- struct drm_gem_object *obj = submit->bos[i].obj;;
+ struct drm_gem_object *obj = submit->bos[i].obj;
bool dump = rd_full || (submit->bos[i].flags & MSM_SUBMIT_BO_DUMP);
msm_gem_lock(obj);
diff --git a/drivers/gpu/drm/msm/msm_gpu.h b/drivers/gpu/drm/msm/msm_gpu.h
index b2a96544f92a..a597f2bee30b 100644
--- a/drivers/gpu/drm/msm/msm_gpu.h
+++ b/drivers/gpu/drm/msm/msm_gpu.h
@@ -16,6 +16,7 @@
#include "msm_drv.h"
#include "msm_fence.h"
+#include "msm_gpu_trace.h"
#include "msm_ringbuffer.h"
#include "msm_gem.h"
@@ -91,6 +92,7 @@ struct msm_gpu_funcs {
* for cmdstream that is buffered in this FIFO upstream of the CP fw.
*/
bool (*progress)(struct msm_gpu *gpu, struct msm_ringbuffer *ring);
+ void (*sysprof_setup)(struct msm_gpu *gpu);
};
/* Additional state for iommu faults: */
@@ -613,16 +615,19 @@ struct msm_gpu_state {
static inline void gpu_write(struct msm_gpu *gpu, u32 reg, u32 data)
{
+ trace_msm_gpu_regaccess(reg);
writel(data, gpu->mmio + (reg << 2));
}
static inline u32 gpu_read(struct msm_gpu *gpu, u32 reg)
{
+ trace_msm_gpu_regaccess(reg);
return readl(gpu->mmio + (reg << 2));
}
static inline void gpu_rmw(struct msm_gpu *gpu, u32 reg, u32 mask, u32 or)
{
+ trace_msm_gpu_regaccess(reg);
msm_rmw(gpu->mmio + (reg << 2), mask, or);
}
@@ -644,7 +649,9 @@ static inline u64 gpu_read64(struct msm_gpu *gpu, u32 reg)
* when the lo is read, so make sure to read the lo first to trigger
* that
*/
+ trace_msm_gpu_regaccess(reg);
val = (u64) readl(gpu->mmio + (reg << 2));
+ trace_msm_gpu_regaccess(reg+1);
val |= ((u64) readl(gpu->mmio + ((reg + 1) << 2)) << 32);
return val;
@@ -652,8 +659,10 @@ static inline u64 gpu_read64(struct msm_gpu *gpu, u32 reg)
static inline void gpu_write64(struct msm_gpu *gpu, u32 reg, u64 val)
{
+ trace_msm_gpu_regaccess(reg);
/* Why not a writeq here? Read the screed above */
writel(lower_32_bits(val), gpu->mmio + (reg << 2));
+ trace_msm_gpu_regaccess(reg+1);
writel(upper_32_bits(val), gpu->mmio + ((reg + 1) << 2));
}
diff --git a/drivers/gpu/drm/msm/msm_gpu_trace.h b/drivers/gpu/drm/msm/msm_gpu_trace.h
index 781bbe5540bd..5417f8d389a3 100644
--- a/drivers/gpu/drm/msm/msm_gpu_trace.h
+++ b/drivers/gpu/drm/msm/msm_gpu_trace.h
@@ -219,6 +219,18 @@ TRACE_EVENT(msm_mmu_prealloc_cleanup,
TP_printk("count=%u, remaining=%u", __entry->count, __entry->remaining)
);
+TRACE_EVENT(msm_gpu_regaccess,
+ TP_PROTO(u32 offset),
+ TP_ARGS(offset),
+ TP_STRUCT__entry(
+ __field(u32, offset)
+ ),
+ TP_fast_assign(
+ __entry->offset = offset;
+ ),
+ TP_printk("offset=0x%x", __entry->offset)
+);
+
#endif
#undef TRACE_INCLUDE_PATH
diff --git a/drivers/gpu/drm/msm/msm_iommu.c b/drivers/gpu/drm/msm/msm_iommu.c
index 76cdd5ea06a0..0e18619f96cb 100644
--- a/drivers/gpu/drm/msm/msm_iommu.c
+++ b/drivers/gpu/drm/msm/msm_iommu.c
@@ -721,7 +721,7 @@ struct msm_mmu *msm_iommu_new(struct device *dev, unsigned long quirks)
int ret;
if (!device_iommu_mapped(dev))
- return NULL;
+ return ERR_PTR(-ENODEV);
domain = iommu_paging_domain_alloc(dev);
if (IS_ERR(domain))
@@ -756,7 +756,7 @@ struct msm_mmu *msm_iommu_disp_new(struct device *dev, unsigned long quirks)
struct msm_mmu *mmu;
mmu = msm_iommu_new(dev, quirks);
- if (IS_ERR_OR_NULL(mmu))
+ if (IS_ERR(mmu))
return mmu;
iommu = to_msm_iommu(mmu);
@@ -772,11 +772,11 @@ struct msm_mmu *msm_iommu_gpu_new(struct device *dev, struct msm_gpu *gpu, unsig
struct msm_mmu *mmu;
mmu = msm_iommu_new(dev, quirks);
- if (IS_ERR_OR_NULL(mmu))
+ if (IS_ERR(mmu))
return mmu;
iommu = to_msm_iommu(mmu);
- if (adreno_smmu && adreno_smmu->cookie) {
+ if (adreno_smmu->cookie) {
const struct io_pgtable_cfg *cfg =
adreno_smmu->get_ttbr1_cfg(adreno_smmu->cookie);
size_t tblsz = get_tblsz(cfg);
diff --git a/drivers/gpu/drm/msm/msm_kms.c b/drivers/gpu/drm/msm/msm_kms.c
index 56828d218e88..6e5e94f5c9a7 100644
--- a/drivers/gpu/drm/msm/msm_kms.c
+++ b/drivers/gpu/drm/msm/msm_kms.c
@@ -177,12 +177,11 @@ static int msm_kms_fault_handler(void *arg, unsigned long iova, int flags, void
return -ENOSYS;
}
-struct drm_gpuvm *msm_kms_init_vm(struct drm_device *dev)
+struct drm_gpuvm *msm_kms_init_vm(struct drm_device *dev, struct device *mdss_dev)
{
struct drm_gpuvm *vm;
struct msm_mmu *mmu;
struct device *mdp_dev = dev->dev;
- struct device *mdss_dev = mdp_dev->parent;
struct msm_drm_private *priv = dev->dev_private;
struct msm_kms *kms = priv->kms;
struct device *iommu_dev;
@@ -193,18 +192,17 @@ struct drm_gpuvm *msm_kms_init_vm(struct drm_device *dev)
*/
if (device_iommu_mapped(mdp_dev))
iommu_dev = mdp_dev;
- else
+ else if (mdss_dev && device_iommu_mapped(mdss_dev))
iommu_dev = mdss_dev;
+ else {
+ drm_info(dev, "no IOMMU, bailing out\n");
+ return ERR_PTR(-ENODEV);
+ }
mmu = msm_iommu_disp_new(iommu_dev, 0);
if (IS_ERR(mmu))
return ERR_CAST(mmu);
- if (!mmu) {
- drm_info(dev, "no IOMMU, fallback to phys contig buffers for scanout\n");
- return NULL;
- }
-
vm = msm_gem_vm_create(dev, mmu, "mdp_kms",
0x1000, 0x100000000 - 0x1000, true);
if (IS_ERR(vm)) {
diff --git a/drivers/gpu/drm/msm/msm_mdss.c b/drivers/gpu/drm/msm/msm_mdss.c
index 39885b333910..2d0e3e784c04 100644
--- a/drivers/gpu/drm/msm/msm_mdss.c
+++ b/drivers/gpu/drm/msm/msm_mdss.c
@@ -154,8 +154,7 @@ static int _msm_mdss_irq_domain_add(struct msm_mdss *msm_mdss)
dev = msm_mdss->dev;
- domain = irq_domain_create_linear(of_fwnode_handle(dev->of_node), 32,
- &msm_mdss_irqdomain_ops, msm_mdss);
+ domain = irq_domain_create_linear(dev_fwnode(dev), 32, &msm_mdss_irqdomain_ops, msm_mdss);
if (!domain) {
dev_err(dev, "failed to add irq_domain\n");
return -EINVAL;
diff --git a/drivers/gpu/drm/msm/msm_submitqueue.c b/drivers/gpu/drm/msm/msm_submitqueue.c
index 8617a82cd6b3..d53dfad16bde 100644
--- a/drivers/gpu/drm/msm/msm_submitqueue.c
+++ b/drivers/gpu/drm/msm/msm_submitqueue.c
@@ -40,6 +40,10 @@ int msm_context_set_sysprof(struct msm_context *ctx, struct msm_gpu *gpu, int sy
break;
}
+ /* Some gpu families require additional setup for sysprof */
+ if (gpu->funcs->sysprof_setup)
+ gpu->funcs->sysprof_setup(gpu);
+
ctx->sysprof = sysprof;
return 0;
diff --git a/drivers/gpu/drm/msm/registers/adreno/a6xx.xml b/drivers/gpu/drm/msm/registers/adreno/a6xx.xml
index 86fab2750ba7..9459b6038217 100644
--- a/drivers/gpu/drm/msm/registers/adreno/a6xx.xml
+++ b/drivers/gpu/drm/msm/registers/adreno/a6xx.xml
@@ -814,7 +814,7 @@ by a particular renderpass/blit.
<bitfield name="Y" low="16" high="29" type="uint"/>
</bitset>
- <reg32 offset="0x8000" name="GRAS_CL_CNTL" usage="rp_blit">
+ <bitset name="a6xx_gras_cl_cntl" inline="yes">
<bitfield name="CLIP_DISABLE" pos="0" type="boolean"/>
<bitfield name="ZNEAR_CLIP_DISABLE" pos="1" type="boolean"/>
<bitfield name="ZFAR_CLIP_DISABLE" pos="2" type="boolean"/>
@@ -826,18 +826,20 @@ by a particular renderpass/blit.
<bitfield name="VP_CLIP_CODE_IGNORE" pos="7" type="boolean"/>
<bitfield name="VP_XFORM_DISABLE" pos="8" type="boolean"/>
<bitfield name="PERSP_DIVISION_DISABLE" pos="9" type="boolean"/>
- </reg32>
+ </bitset>
+
+ <reg32 offset="0x8000" name="GRAS_CL_CNTL" type="a6xx_gras_cl_cntl" variants="A6XX-A7XX" usage="rp_blit"/>
<bitset name="a6xx_gras_xs_clip_cull_distance" inline="yes">
<bitfield name="CLIP_MASK" low="0" high="7"/>
<bitfield name="CULL_MASK" low="8" high="15"/>
</bitset>
- <reg32 offset="0x8001" name="GRAS_CL_VS_CLIP_CULL_DISTANCE" type="a6xx_gras_xs_clip_cull_distance" usage="rp_blit"/>
- <reg32 offset="0x8002" name="GRAS_CL_DS_CLIP_CULL_DISTANCE" type="a6xx_gras_xs_clip_cull_distance" usage="rp_blit"/>
- <reg32 offset="0x8003" name="GRAS_CL_GS_CLIP_CULL_DISTANCE" type="a6xx_gras_xs_clip_cull_distance" usage="rp_blit"/>
- <reg32 offset="0x8004" name="GRAS_CL_ARRAY_SIZE" low="0" high="10" type="uint" usage="rp_blit"/>
+ <reg32 offset="0x8001" name="GRAS_CL_VS_CLIP_CULL_DISTANCE" type="a6xx_gras_xs_clip_cull_distance" usage="rp_blit" variants="A6XX-A7XX" />
+ <reg32 offset="0x8002" name="GRAS_CL_DS_CLIP_CULL_DISTANCE" type="a6xx_gras_xs_clip_cull_distance" usage="rp_blit" variants="A6XX-A7XX" />
+ <reg32 offset="0x8003" name="GRAS_CL_GS_CLIP_CULL_DISTANCE" type="a6xx_gras_xs_clip_cull_distance" usage="rp_blit" variants="A6XX-A7XX" />
+ <reg32 offset="0x8004" name="GRAS_CL_ARRAY_SIZE" low="0" high="10" type="uint" usage="rp_blit" variants="A6XX-A7XX" />
- <reg32 offset="0x8005" name="GRAS_CL_INTERP_CNTL" usage="rp_blit">
+ <bitset name="a6xx_gras_cl_interp_cntl" inline="yes">
<!-- see also RB_INTERP_CNTL -->
<bitfield name="IJ_PERSP_PIXEL" pos="0" type="boolean"/>
<bitfield name="IJ_PERSP_CENTROID" pos="1" type="boolean"/>
@@ -848,26 +850,69 @@ by a particular renderpass/blit.
<bitfield name="COORD_MASK" low="6" high="9" type="hex"/>
<bitfield name="UNK10" pos="10" type="boolean" variants="A7XX-"/>
<bitfield name="UNK11" pos="11" type="boolean" variants="A7XX-"/>
- </reg32>
- <reg32 offset="0x8006" name="GRAS_CL_GUARDBAND_CLIP_ADJ" usage="rp_blit">
+ </bitset>
+
+ <reg32 offset="0x8005" name="GRAS_CL_INTERP_CNTL" type="a6xx_gras_cl_interp_cntl" variants="A6XX-A7XX" usage="rp_blit"/>
+
+ <bitset name="a6xx_gras_cl_guardband_clip_adj" inline="true">
<bitfield name="HORZ" low="0" high="8" type="uint"/>
<bitfield name="VERT" low="10" high="18" type="uint"/>
- </reg32>
+ </bitset>
+
+ <reg32 offset="0x8006" name="GRAS_CL_GUARDBAND_CLIP_ADJ" type="a6xx_gras_cl_guardband_clip_adj" variants="A6XX-A7XX" usage="rp_blit"/>
<!-- Something connected to depth-stencil attachment size -->
<reg32 offset="0x8007" name="GRAS_UNKNOWN_8007" variants="A7XX-" usage="rp_blit"/>
- <reg32 offset="0x8008" name="GRAS_UNKNOWN_8008" variants="A7XX-" usage="cmd"/>
+ <!-- the scale/offset is per view, with up to 6 views -->
+ <bitset name="a6xx_gras_bin_foveat" inline="yes">
+ <bitfield name="BINSCALEEN" pos="6" type="boolean"/>
+ <enum name="a7xx_bin_scale">
+ <value value="0" name="NOSCALE"/>
+ <value value="1" name="SCALE2X"/>
+ <value value="2" name="SCALE4X"/>
+ </enum>
+ <bitfield name="XSCALE_0" low="8" high="9" type="a7xx_bin_scale"/>
+ <bitfield name="YSCALE_0" low="10" high="11" type="a7xx_bin_scale"/>
+ <bitfield name="XSCALE_1" low="12" high="13" type="a7xx_bin_scale"/>
+ <bitfield name="YSCALE_1" low="14" high="15" type="a7xx_bin_scale"/>
+ <bitfield name="XSCALE_2" low="16" high="17" type="a7xx_bin_scale"/>
+ <bitfield name="YSCALE_2" low="18" high="19" type="a7xx_bin_scale"/>
+ <bitfield name="XSCALE_3" low="20" high="21" type="a7xx_bin_scale"/>
+ <bitfield name="YSCALE_3" low="22" high="23" type="a7xx_bin_scale"/>
+ <bitfield name="XSCALE_4" low="24" high="25" type="a7xx_bin_scale"/>
+ <bitfield name="YSCALE_4" low="26" high="27" type="a7xx_bin_scale"/>
+ <bitfield name="XSCALE_5" low="28" high="29" type="a7xx_bin_scale"/>
+ <bitfield name="YSCALE_5" low="30" high="31" type="a7xx_bin_scale"/>
+ </bitset>
- <reg32 offset="0x8009" name="GRAS_UNKNOWN_8009" variants="A7XX-" usage="cmd"/>
- <reg32 offset="0x800a" name="GRAS_UNKNOWN_800A" variants="A7XX-" usage="cmd"/>
- <reg32 offset="0x800b" name="GRAS_UNKNOWN_800B" variants="A7XX-" usage="cmd"/>
- <reg32 offset="0x800c" name="GRAS_UNKNOWN_800C" variants="A7XX-" usage="cmd"/>
+ <reg32 offset="0x8008" name="GRAS_BIN_FOVEAT" type="a6xx_gras_bin_foveat" variants="A7XX" usage="cmd"/>
+
+ <reg32 offset="0x8009" name="GRAS_BIN_FOVEAT_OFFSET_0" variants="A7XX-" usage="cmd">
+ <bitfield name="XOFFSET_0" low="0" high="9" shr="2" type="uint"/>
+ <bitfield name="XOFFSET_1" low="10" high="19" shr="2" type="uint"/>
+ <bitfield name="XOFFSET_2" low="20" high="29" shr="2" type="uint"/>
+ </reg32>
+ <reg32 offset="0x800a" name="GRAS_BIN_FOVEAT_OFFSET_1" variants="A7XX-" usage="cmd">
+ <bitfield name="XOFFSET_3" low="0" high="9" shr="2" type="uint"/>
+ <bitfield name="XOFFSET_4" low="10" high="19" shr="2" type="uint"/>
+ <bitfield name="XOFFSET_5" low="20" high="29" shr="2" type="uint"/>
+ </reg32>
+ <reg32 offset="0x800b" name="GRAS_BIN_FOVEAT_OFFSET_2" variants="A7XX-" usage="cmd">
+ <bitfield name="YOFFSET_0" low="0" high="9" shr="2" type="uint"/>
+ <bitfield name="YOFFSET_1" low="10" high="19" shr="2" type="uint"/>
+ <bitfield name="YOFFSET_2" low="20" high="29" shr="2" type="uint"/>
+ </reg32>
+ <reg32 offset="0x800c" name="GRAS_BIN_FOVEAT_OFFSET_3" variants="A7XX-" usage="cmd">
+ <bitfield name="YOFFSET_3" low="0" high="9" shr="2" type="uint"/>
+ <bitfield name="YOFFSET_4" low="10" high="19" shr="2" type="uint"/>
+ <bitfield name="YOFFSET_5" low="20" high="29" shr="2" type="uint"/>
+ </reg32>
<!-- <reg32 offset="0x80f0" name="GRAS_UNKNOWN_80F0" type="a6xx_reg_xy"/> -->
<!-- 0x8006-0x800f invalid -->
- <array offset="0x8010" name="GRAS_CL_VIEWPORT" stride="6" length="16" usage="rp_blit">
+ <array offset="0x8010" name="GRAS_CL_VIEWPORT" stride="6" length="16" variants="A6XX-A7XX" usage="rp_blit">
<reg32 offset="0" name="XOFFSET" type="float"/>
<reg32 offset="1" name="XSCALE" type="float"/>
<reg32 offset="2" name="YOFFSET" type="float"/>
@@ -875,12 +920,13 @@ by a particular renderpass/blit.
<reg32 offset="4" name="ZOFFSET" type="float"/>
<reg32 offset="5" name="ZSCALE" type="float"/>
</array>
- <array offset="0x8070" name="GRAS_CL_VIEWPORT_ZCLAMP" stride="2" length="16" usage="rp_blit">
+
+ <array offset="0x8070" name="GRAS_CL_VIEWPORT_ZCLAMP" stride="2" length="16" variants="A6XX-A7XX" usage="rp_blit">
<reg32 offset="0" name="MIN" type="float"/>
<reg32 offset="1" name="MAX" type="float"/>
</array>
- <reg32 offset="0x8090" name="GRAS_SU_CNTL" usage="rp_blit">
+ <bitset name="a6xx_gras_su_cntl" varset="chip">
<bitfield name="CULL_FRONT" pos="0" type="boolean"/>
<bitfield name="CULL_BACK" pos="1" type="boolean"/>
<bitfield name="FRONT_CW" pos="2" type="boolean"/>
@@ -890,39 +936,66 @@ by a particular renderpass/blit.
<bitfield name="LINE_MODE" pos="13" type="a5xx_line_mode"/>
<bitfield name="UNK15" low="15" high="16"/>
<!--
- On gen1 only MULTIVIEW_ENABLE exists. On gen3 we have
- the ability to add the view index to either the RT array
- index or the viewport index, and it seems that
- MULTIVIEW_ENABLE doesn't do anything, instead we need to
- set at least one of RENDERTARGETINDEXINCR or
- VIEWPORTINDEXINCR to enable multiview. The blob still
- sets MULTIVIEW_ENABLE regardless.
- TODO: what about gen2 (a640)?
+ On gen1 only MULTIVIEW_ENABLE exists. On gen3 we have
+ the ability to add the view index to either the RT array
+ index or the viewport index, and it seems that
+ MULTIVIEW_ENABLE doesn't do anything, instead we need to
+ set at least one of RENDERTARGETINDEXINCR or
+ VIEWPORTINDEXINCR to enable multiview. The blob still
+ sets MULTIVIEW_ENABLE regardless.
+ TODO: what about gen2 (a640)?
-->
<bitfield name="MULTIVIEW_ENABLE" pos="17" type="boolean"/>
- <bitfield name="RENDERTARGETINDEXINCR" pos="18" type="boolean"/>
- <bitfield name="VIEWPORTINDEXINCR" pos="19" type="boolean"/>
- <bitfield name="UNK20" low="20" high="22"/>
- </reg32>
- <reg32 offset="0x8091" name="GRAS_SU_POINT_MINMAX" usage="rp_blit">
+ <bitfield name="RENDERTARGETINDEXINCR" pos="18" type="boolean" variants="A6XX-A7XX"/>
+ <bitfield name="VIEWPORTINDEXINCR" pos="19" type="boolean" variants="A6XX-A7XX"/>
+ <bitfield name="UNK20" low="20" high="22" variants="A6XX-A7XX"/>
+ </bitset>
+ <reg32 offset="0x8090" name="GRAS_SU_CNTL" type="a6xx_gras_su_cntl" variants="A6XX-A7XX" usage="rp_blit"/>
+
+ <bitset name="a6xx_gras_su_point_minmax" inline="yes">
<bitfield name="MIN" low="0" high="15" type="ufixed" radix="4"/>
<bitfield name="MAX" low="16" high="31" type="ufixed" radix="4"/>
- </reg32>
- <reg32 offset="0x8092" name="GRAS_SU_POINT_SIZE" low="0" high="15" type="fixed" radix="4" usage="rp_blit"/>
+ </bitset>
+
+ <reg32 offset="0x8091" name="GRAS_SU_POINT_MINMAX" type="a6xx_gras_su_point_minmax" variants="A6XX-A7XX" usage="rp_blit"/>
+ <reg32 offset="0x8092" name="GRAS_SU_POINT_SIZE" low="0" high="15" type="fixed" radix="4" variants="A6XX-A7XX" usage="rp_blit"/>
+
+ <bitset name="a6xx_gras_su_depth_cntl" inline="yes">
+ <bitfield name="Z_TEST_ENABLE" pos="0" type="boolean"/>
+ </bitset>
+
+ <reg32 offset="0x8114" name="GRAS_SU_DEPTH_CNTL" variants="A6XX-A7XX" type="a6xx_gras_su_depth_cntl" usage="rp_blit"/>
+
+ <bitset name="a6xx_gras_su_stencil_cntl" inline="yes">
+ <bitfield name="STENCIL_ENABLE" pos="0" type="boolean"/>
+ </bitset>
+
+ <reg32 offset="0x8115" name="GRAS_SU_STENCIL_CNTL" type="a6xx_gras_su_stencil_cntl" variants="A6XX-A7XX" usage="rp_blit"/>
+
+ <bitset name="a6xx_gras_su_render_cntl" inline="yes">
+ <bitfield name="FS_DISABLE" pos="7" type="boolean"/>
+ </bitset>
+
+ <reg32 offset="0x8116" name="GRAS_SU_RENDER_CNTL" type="a6xx_gras_su_render_cntl" variants="A7XX" usage="rp_blit"/>
+
<!-- 0x8093 invalid -->
- <reg32 offset="0x8094" name="GRAS_SU_DEPTH_PLANE_CNTL" usage="rp_blit">
+ <bitset name="a6xx_depth_plane_cntl" inline="yes">
<bitfield name="Z_MODE" low="0" high="1" type="a6xx_ztest_mode"/>
- </reg32>
- <reg32 offset="0x8095" name="GRAS_SU_POLY_OFFSET_SCALE" type="float" usage="rp_blit"/>
- <reg32 offset="0x8096" name="GRAS_SU_POLY_OFFSET_OFFSET" type="float" usage="rp_blit"/>
- <reg32 offset="0x8097" name="GRAS_SU_POLY_OFFSET_OFFSET_CLAMP" type="float" usage="rp_blit"/>
- <!-- duplicates RB_DEPTH_BUFFER_INFO: -->
- <reg32 offset="0x8098" name="GRAS_SU_DEPTH_BUFFER_INFO" usage="rp_blit">
+ </bitset>
+
+ <reg32 offset="0x8094" name="GRAS_SU_DEPTH_PLANE_CNTL" type="a6xx_depth_plane_cntl" variants="A6XX-A7XX" usage="rp_blit"/>
+ <reg32 offset="0x8095" name="GRAS_SU_POLY_OFFSET_SCALE" type="float" variants="A6XX-A7XX" usage="rp_blit"/>
+ <reg32 offset="0x8096" name="GRAS_SU_POLY_OFFSET_OFFSET" type="float" variants="A6XX-A7XX" usage="rp_blit"/>
+ <reg32 offset="0x8097" name="GRAS_SU_POLY_OFFSET_OFFSET_CLAMP" type="float" variants="A6XX-A7XX" usage="rp_blit"/>
+ <bitset name="a6xx_depth_buffer_info" inline="yes">
<bitfield name="DEPTH_FORMAT" low="0" high="2" type="a6xx_depth_format"/>
<bitfield name="UNK3" pos="3"/>
- </reg32>
+ </bitset>
- <reg32 offset="0x8099" name="GRAS_SU_CONSERVATIVE_RAS_CNTL" usage="cmd">
+ <!-- duplicates RB_DEPTH_BUFFER_INFO: -->
+ <reg32 offset="0x8098" name="GRAS_SU_DEPTH_BUFFER_INFO" type="a6xx_depth_buffer_info" variants="A6XX-A7XX" usage="rp_blit"/>
+
+ <bitset name="a6xx_gras_su_conservative_ras_cntl" inline="yes">
<bitfield name="CONSERVATIVERASEN" pos="0" type="boolean"/>
<enum name="a6xx_shift_amount">
<value value="0" name="NO_SHIFT"/>
@@ -932,7 +1005,10 @@ by a particular renderpass/blit.
<bitfield name="SHIFTAMOUNT" low="1" high="2" type="a6xx_shift_amount"/>
<bitfield name="INNERCONSERVATIVERASEN" pos="3" type="boolean"/>
<bitfield name="UNK4" low="4" high="5"/>
- </reg32>
+ </bitset>
+
+ <reg32 offset="0x8099" name="GRAS_SU_CONSERVATIVE_RAS_CNTL" type="a6xx_gras_su_conservative_ras_cntl" variants="A6XX-A7XX" usage="cmd"/>
+
<reg32 offset="0x809a" name="GRAS_SU_PATH_RENDERING_CNTL">
<bitfield name="UNK0" pos="0" type="boolean"/>
<bitfield name="LINELENGTHEN" pos="1" type="boolean"/>
@@ -942,10 +1018,13 @@ by a particular renderpass/blit.
<bitfield name="WRITES_LAYER" pos="0" type="boolean"/>
<bitfield name="WRITES_VIEW" pos="1" type="boolean"/>
</bitset>
- <reg32 offset="0x809b" name="GRAS_SU_VS_SIV_CNTL" type="a6xx_gras_us_xs_siv_cntl" usage="rp_blit"/>
- <reg32 offset="0x809c" name="GRAS_SU_GS_SIV_CNTL" type="a6xx_gras_us_xs_siv_cntl" usage="rp_blit"/>
- <reg32 offset="0x809d" name="GRAS_SU_DS_SIV_CNTL" type="a6xx_gras_us_xs_siv_cntl" usage="rp_blit"/>
- <!-- 0x809e/0x809f invalid -->
+ <reg32 offset="0x809b" name="GRAS_SU_VS_SIV_CNTL" type="a6xx_gras_us_xs_siv_cntl" variants="A6XX-A7XX" usage="rp_blit"/>
+ <reg32 offset="0x809c" name="GRAS_SU_GS_SIV_CNTL" type="a6xx_gras_us_xs_siv_cntl" variants="A6XX-A7XX" usage="rp_blit"/>
+ <reg32 offset="0x809d" name="GRAS_SU_DS_SIV_CNTL" type="a6xx_gras_us_xs_siv_cntl" variants="A6XX-A7XX" usage="rp_blit"/>
+
+ <bitset name="a6xx_rast_cntl" inline="yes">
+ <bitfield name="MODE" low="0" high="1" type="a6xx_polygon_mode"/>
+ </bitset>
<enum name="a6xx_sequenced_thread_dist">
<value value="0x0" name="DIST_SCREEN_COORD"/>
@@ -993,7 +1072,7 @@ by a particular renderpass/blit.
<value value="0x3" name="RB_BT"/>
</enum>
- <reg32 offset="0x80a0" name="GRAS_SC_CNTL" usage="rp_blit">
+ <bitset name="a6xx_gras_sc_cntl" inline="yes">
<bitfield name="CCUSINGLECACHELINESIZE" low="0" high="2"/>
<bitfield name="SINGLE_PRIM_MODE" low="3" high="4" type="a6xx_single_prim_mode"/>
<bitfield name="RASTER_MODE" pos="5" type="a6xx_raster_mode"/>
@@ -1003,7 +1082,9 @@ by a particular renderpass/blit.
<bitfield name="UNK9" pos="9" type="boolean"/>
<bitfield name="ROTATION" low="10" high="11" type="uint"/>
<bitfield name="EARLYVIZOUTEN" pos="12" type="boolean"/>
- </reg32>
+ </bitset>
+
+ <reg32 offset="0x80a0" name="GRAS_SC_CNTL" type="a6xx_gras_sc_cntl" variants="A6XX-A7XX" usage="rp_blit"/>
<enum name="a6xx_render_mode">
<value value="0x0" name="RENDERING_PASS"/>
@@ -1024,7 +1105,7 @@ by a particular renderpass/blit.
<value value="0x4" name="LRZ_FEEDBACK_LATE_Z"/>
</enum>
- <reg32 offset="0x80a1" name="GRAS_SC_BIN_CNTL" usage="rp_blit">
+ <bitset name="a6xx_bin_cntl" inline="yes">
<bitfield name="BINW" low="0" high="5" shr="5" type="uint"/>
<bitfield name="BINH" low="8" high="14" shr="4" type="uint"/>
<bitfield name="RENDER_MODE" low="18" high="20" type="a6xx_render_mode"/>
@@ -1037,18 +1118,25 @@ by a particular renderpass/blit.
In sysmem mode GRAS_LRZ_CNTL.LRZ_WRITE is not considered.
</doc>
<bitfield name="LRZ_FEEDBACK_ZMODE_MASK" low="24" high="26" type="a6xx_lrz_feedback_mask"/>
- <bitfield name="UNK27" pos="27"/>
- </reg32>
+ <bitfield name="FORCE_LRZ_DIS" pos="27" type="boolean"/>
+ </bitset>
+
+ <reg32 offset="0x80a1" name="GRAS_SC_BIN_CNTL" type="a6xx_bin_cntl" variants="A6XX-A7XX" usage="rp_blit"/>
- <reg32 offset="0x80a2" name="GRAS_SC_RAS_MSAA_CNTL" usage="rp_blit">
+ <bitset name="a6xx_gras_sc_ras_msaa_cntl" inline="yes">
<bitfield name="SAMPLES" low="0" high="1" type="a3xx_msaa_samples"/>
<bitfield name="UNK2" pos="2"/>
<bitfield name="UNK3" pos="3"/>
- </reg32>
- <reg32 offset="0x80a3" name="GRAS_SC_DEST_MSAA_CNTL" usage="rp_blit">
+ </bitset>
+
+ <reg32 offset="0x80a2" name="GRAS_SC_RAS_MSAA_CNTL" type="a6xx_gras_sc_ras_msaa_cntl" variants="A6XX-A7XX" usage="rp_blit"/>
+
+ <bitset name="a6xx_gras_sc_dest_msaa_cntl" inline="yes">
<bitfield name="SAMPLES" low="0" high="1" type="a3xx_msaa_samples"/>
<bitfield name="MSAA_DISABLE" pos="2" type="boolean"/>
- </reg32>
+ </bitset>
+
+ <reg32 offset="0x80a3" name="GRAS_SC_DEST_MSAA_CNTL" type="a6xx_gras_sc_dest_msaa_cntl" variants="A6XX-A7XX" usage="rp_blit"/>
<bitset name="a6xx_msaa_sample_pos_cntl" inline="yes">
<bitfield name="UNK0" pos="0"/>
@@ -1066,30 +1154,35 @@ by a particular renderpass/blit.
<bitfield name="SAMPLE_3_Y" low="28" high="31" radix="4" type="fixed"/>
</bitset>
- <reg32 offset="0x80a4" name="GRAS_SC_MSAA_SAMPLE_POS_CNTL" type="a6xx_msaa_sample_pos_cntl" usage="rp_blit"/>
- <reg32 offset="0x80a5" name="GRAS_SC_PROGRAMMABLE_MSAA_POS_0" type="a6xx_programmable_msaa_pos" usage="rp_blit"/>
- <reg32 offset="0x80a6" name="GRAS_SC_PROGRAMMABLE_MSAA_POS_1" type="a6xx_programmable_msaa_pos" usage="rp_blit"/>
+ <reg32 offset="0x80a4" name="GRAS_SC_MSAA_SAMPLE_POS_CNTL" type="a6xx_msaa_sample_pos_cntl" variants="A6XX-A7XX" usage="rp_blit"/>
+ <reg32 offset="0x80a5" name="GRAS_SC_PROGRAMMABLE_MSAA_POS_0" type="a6xx_programmable_msaa_pos" variants="A6XX-A7XX" usage="rp_blit"/>
+ <reg32 offset="0x80a6" name="GRAS_SC_PROGRAMMABLE_MSAA_POS_1" type="a6xx_programmable_msaa_pos" variants="A6XX-A7XX" usage="rp_blit"/>
- <reg32 offset="0x80a7" name="GRAS_UNKNOWN_80A7" variants="A7XX-" usage="cmd"/>
+ <reg32 offset="0x80a7" name="GRAS_ROTATION_CNTL" variants="A7XX" usage="cmd"/>
- <!-- 0x80a7-0x80ae invalid -->
- <reg32 offset="0x80af" name="GRAS_UNKNOWN_80AF" pos="0" usage="cmd"/>
+ <bitset name="a6xx_screen_scissor_cntl" inline="yes">
+ <bitfield name="SCISSOR_DISABLE" pos="0" type="boolean"/>
+ </bitset>
+
+ <reg32 offset="0x80af" name="GRAS_SC_SCREEN_SCISSOR_CNTL" type="a6xx_screen_scissor_cntl" variants="A6XX-A7XX" pos="0" usage="cmd"/>
<bitset name="a6xx_scissor_xy" inline="yes">
<bitfield name="X" low="0" high="15" type="uint"/>
<bitfield name="Y" low="16" high="31" type="uint"/>
</bitset>
- <array offset="0x80b0" name="GRAS_SC_SCREEN_SCISSOR" stride="2" length="16" usage="rp_blit">
+
+ <array offset="0x80b0" name="GRAS_SC_SCREEN_SCISSOR" stride="2" length="16" variants="A6XX-A7XX" usage="rp_blit">
<reg32 offset="0" name="TL" type="a6xx_scissor_xy"/>
<reg32 offset="1" name="BR" type="a6xx_scissor_xy"/>
</array>
- <array offset="0x80d0" name="GRAS_SC_VIEWPORT_SCISSOR" stride="2" length="16" usage="rp_blit">
+
+ <array offset="0x80d0" name="GRAS_SC_VIEWPORT_SCISSOR" stride="2" length="16" variants="A6XX-A7XX" usage="rp_blit">
<reg32 offset="0" name="TL" type="a6xx_scissor_xy"/>
<reg32 offset="1" name="BR" type="a6xx_scissor_xy"/>
</array>
- <reg32 offset="0x80f0" name="GRAS_SC_WINDOW_SCISSOR_TL" type="a6xx_reg_xy" usage="rp_blit"/>
- <reg32 offset="0x80f1" name="GRAS_SC_WINDOW_SCISSOR_BR" type="a6xx_reg_xy" usage="rp_blit"/>
+ <reg32 offset="0x80f0" name="GRAS_SC_WINDOW_SCISSOR_TL" type="a6xx_reg_xy" variants="A6XX-A7XX" usage="rp_blit"/>
+ <reg32 offset="0x80f1" name="GRAS_SC_WINDOW_SCISSOR_BR" type="a6xx_reg_xy" variants="A6XX-A7XX" usage="rp_blit"/>
<enum name="a6xx_fsr_combiner">
<value value="0" name="FSR_COMBINER_OP_KEEP"/>
@@ -1099,7 +1192,7 @@ by a particular renderpass/blit.
<value value="4" name="FSR_COMBINER_OP_MUL"/>
</enum>
- <reg32 offset="0x80f4" name="GRAS_VRS_CONFIG" variants="A7XX-" usage="rp_blit">
+ <bitset name="a6xx_gras_vrs_config">
<bitfield name="PIPELINE_FSR_ENABLE" pos="0" type="boolean"/>
<bitfield name="FRAG_SIZE_X" low="1" high="2" type="uint"/>
<bitfield name="FRAG_SIZE_Y" low="3" high="4" type="uint"/>
@@ -1107,20 +1200,32 @@ by a particular renderpass/blit.
<bitfield name="COMBINER_OP_2" low="8" high="10" type="a6xx_fsr_combiner"/>
<bitfield name="ATTACHMENT_FSR_ENABLE" pos="13" type="boolean"/>
<bitfield name="PRIMITIVE_FSR_ENABLE" pos="20" type="boolean"/>
- </reg32>
- <reg32 offset="0x80f5" name="GRAS_QUALITY_BUFFER_INFO" variants="A7XX-" usage="rp_blit">
+ </bitset>
+
+ <reg32 offset="0x80f4" name="GRAS_VRS_CONFIG" type="a6xx_gras_vrs_config" variants="A7XX" usage="rp_blit"/>
+
+ <bitset name="a6xx_gras_quality_buffer_info" inline="yes">
<bitfield name="LAYERED" pos="0" type="boolean"/>
<bitfield name="TILE_MODE" low="1" high="2" type="a6xx_tile_mode"/>
- </reg32>
- <reg32 offset="0x80f6" name="GRAS_QUALITY_BUFFER_DIMENSION" variants="A7XX-" usage="rp_blit">
+ </bitset>
+
+ <reg32 offset="0x80f5" name="GRAS_QUALITY_BUFFER_INFO" type="a6xx_gras_quality_buffer_info" variants="A7XX" usage="rp_blit"/>
+
+ <bitset name="a6xx_gras_quality_buffer_dimension" inline="yes">
<bitfield name="WIDTH" low="0" high="15" type="uint"/>
<bitfield name="HEIGHT" low="16" high="31" type="uint"/>
- </reg32>
- <reg64 offset="0x80f8" name="GRAS_QUALITY_BUFFER_BASE" variants="A7XX-" type="waddress" usage="rp_blit"/>
- <reg32 offset="0x80fa" name="GRAS_QUALITY_BUFFER_PITCH" variants="A7XX-" usage="rp_blit">
+ </bitset>
+
+ <reg32 offset="0x80f6" name="GRAS_QUALITY_BUFFER_DIMENSION" type="a6xx_gras_quality_buffer_dimension" variants="A7XX" usage="rp_blit"/>
+
+ <reg64 offset="0x80f8" name="GRAS_QUALITY_BUFFER_BASE" variants="A7XX" type="waddress" usage="rp_blit"/>
+
+ <bitset name="a6xx_gras_quality_buffer_pitch" inline="yes">
<bitfield name="PITCH" shr="6" low="0" high="7" type="uint"/>
<bitfield name="ARRAY_PITCH" shr="6" low="10" high="28" type="uint"/>
- </reg32>
+ </bitset>
+
+ <reg32 offset="0x80fa" name="GRAS_QUALITY_BUFFER_PITCH" type="a6xx_gras_quality_buffer_pitch" variants="A7XX" usage="rp_blit"/>
<enum name="a6xx_lrz_dir_status">
<value value="0x1" name="LRZ_DIR_LE"/>
@@ -1128,7 +1233,7 @@ by a particular renderpass/blit.
<value value="0x3" name="LRZ_DIR_INVALID"/>
</enum>
- <reg32 offset="0x8100" name="GRAS_LRZ_CNTL" usage="rp_blit">
+ <bitset name="a6xx_gras_lrz_cntl" inline="yes">
<bitfield name="ENABLE" pos="0" type="boolean"/>
<doc>LRZ write also disabled for blend/etc.</doc>
<bitfield name="LRZ_WRITE" pos="1" type="boolean"/>
@@ -1155,26 +1260,36 @@ by a particular renderpass/blit.
</doc>
<bitfield name="DISABLE_ON_WRONG_DIR" pos="9" type="boolean" variants="A6XX"/>
<bitfield name="Z_FUNC" low="11" high="13" type="adreno_compare_func" variants="A7XX-"/>
- </reg32>
+ </bitset>
+
+ <reg32 offset="0x8100" name="GRAS_LRZ_CNTL" type="a6xx_gras_lrz_cntl" usage="rp_blit" variants="A6XX-A7XX"/>
<enum name="a6xx_fragcoord_sample_mode">
<value value="0" name="FRAGCOORD_CENTER"/>
<value value="3" name="FRAGCOORD_SAMPLE"/>
</enum>
- <reg32 offset="0x8101" name="GRAS_LRZ_PS_INPUT_CNTL" low="0" high="2" usage="rp_blit">
+ <bitset name="a6xx_gras_lrz_ps_input_cntl" inline="yes">
<bitfield name="SAMPLEID" pos="0" type="boolean"/>
<bitfield name="FRAGCOORDSAMPLEMODE" low="1" high="2" type="a6xx_fragcoord_sample_mode"/>
- </reg32>
+ </bitset>
+
+ <reg32 offset="0x8101" name="GRAS_LRZ_PS_INPUT_CNTL" type="a6xx_gras_lrz_ps_input_cntl" usage="rp_blit" variants="A6XX-A7XX"/>
- <reg32 offset="0x8102" name="GRAS_LRZ_MRT_BUFFER_INFO_0" usage="rp_blit">
+ <bitset name="a6xx_gras_lrz_mrt_buffer_info_0" inline="yes">
<bitfield name="COLOR_FORMAT" low="0" high="7" type="a6xx_format"/>
- </reg32>
- <reg64 offset="0x8103" name="GRAS_LRZ_BUFFER_BASE" align="256" type="waddress" usage="rp_blit"/>
- <reg32 offset="0x8105" name="GRAS_LRZ_BUFFER_PITCH" usage="rp_blit">
+ </bitset>
+
+ <reg32 offset="0x8102" name="GRAS_LRZ_MRT_BUFFER_INFO_0" type="a6xx_gras_lrz_mrt_buffer_info_0" usage="rp_blit" variants="A6XX-A7XX"/>
+
+ <reg64 offset="0x8103" name="GRAS_LRZ_BUFFER_BASE" align="256" type="waddress" usage="rp_blit" variants="A6XX-A7XX"/>
+
+ <bitset name="a6xx_gras_lrz_buffer_pitch" inline="yes">
<bitfield name="PITCH" low="0" high="7" shr="5" type="uint"/>
<bitfield name="ARRAY_PITCH" low="10" high="28" shr="8" type="uint"/>
- </reg32>
+ </bitset>
+
+ <reg32 offset="0x8105" name="GRAS_LRZ_BUFFER_PITCH" type="a6xx_gras_lrz_buffer_pitch" usage="rp_blit" variants="A6XX-A7XX"/>
<!--
The LRZ "fast clear" buffer is initialized to zero's by blob, and
@@ -1207,7 +1322,6 @@ by a particular renderpass/blit.
not.
-->
<reg64 offset="0x8106" name="GRAS_LRZ_FAST_CLEAR_BUFFER_BASE" align="64" type="waddress" usage="rp_blit"/>
- <!-- 0x8108 invalid -->
<reg32 offset="0x8109" name="GRAS_LRZ_PS_SAMPLEFREQ_CNTL" usage="rp_blit">
<bitfield name="PER_SAMP_MODE" pos="0" type="boolean"/>
</reg32>
@@ -1232,19 +1346,20 @@ by a particular renderpass/blit.
<!-- 0x810c-0x810f invalid -->
- <reg32 offset="0x8110" name="GRAS_UNKNOWN_8110" low="0" high="1" usage="cmd"/>
+ <reg32 offset="0x8110" name="GRAS_MODE_CNTL" low="0" high="1" variants="A6XX-A7XX" usage="cmd"/>
<!-- A bit tentative but it's a color and it is followed by LRZ_CLEAR -->
- <reg32 offset="0x8111" name="GRAS_LRZ_DEPTH_CLEAR" type="float" variants="A7XX-"/>
+ <reg32 offset="0x8111" name="GRAS_LRZ_DEPTH_CLEAR" type="float" variants="A7XX"/>
- <reg32 offset="0x8113" name="GRAS_LRZ_DEPTH_BUFFER_INFO" variants="A7XX-" usage="rp_blit">
+ <bitset name="a6xx_gras_lrz_depth_buffer_info" inline="yes">
<bitfield name="DEPTH_FORMAT" low="0" high="2" type="a6xx_depth_format"/>
<bitfield name="UNK3" pos="3"/>
- </reg32>
+ </bitset>
+
+ <reg32 offset="0x8113" name="GRAS_LRZ_DEPTH_BUFFER_INFO" type="a6xx_gras_lrz_depth_buffer_info" variants="A7XX" usage="rp_blit"/>
- <!-- Always written together and always equal 09510840 00000a62 -->
- <reg32 offset="0x8120" name="GRAS_UNKNOWN_8120" variants="A7XX-" usage="cmd"/>
- <reg32 offset="0x8121" name="GRAS_UNKNOWN_8121" variants="A7XX-" usage="cmd"/>
+ <doc>LUT used to convert quality buffer values to HW shading rate values. An array of 4-bit values.</doc>
+ <array offset="0x8120" name="GRAS_LRZ_QUALITY_LOOKUP_TABLE" variants="A7XX-" stride="1" length="2"/>
<!-- 0x8112-0x83ff invalid -->
@@ -1269,28 +1384,29 @@ by a particular renderpass/blit.
<bitfield name="D24S8" pos="19" type="boolean"/>
<!-- some sort of channel mask, disabled channels are set to zero ? -->
<bitfield name="MASK" low="20" high="23"/>
- <bitfield name="IFMT" low="24" high="28" type="a6xx_2d_ifmt"/>
+ <bitfield name="IFMT" low="24" high="26" type="a6xx_2d_ifmt"/>
+ <bitfield name="UNK27" pos="27" type="boolean"/>
+ <bitfield name="UNK28" pos="28" type="boolean"/>
<bitfield name="RASTER_MODE" pos="29" type="a6xx_raster_mode"/>
- <bitfield name="UNK30" pos="30" type="boolean" variants="A7XX-"/>
+ <bitfield name="COPY" pos="30" type="boolean" variants="A7XX-"/>
</bitset>
- <reg32 offset="0x8400" name="GRAS_A2D_BLT_CNTL" type="a6xx_a2d_bit_cntl" usage="rp_blit"/>
+ <reg32 offset="0x8400" name="GRAS_A2D_BLT_CNTL" type="a6xx_a2d_bit_cntl" variants="A6XX-A7XX" usage="rp_blit"/>
<!-- note: the low 8 bits for src coords are valid, probably fixed point
it would be a bit weird though, since we subtract 1 from BR coords
apparently signed, gallium driver uses negative coords and it works?
-->
- <reg32 offset="0x8401" name="GRAS_A2D_SRC_XMIN" low="8" high="24" type="int" usage="rp_blit"/>
- <reg32 offset="0x8402" name="GRAS_A2D_SRC_XMAX" low="8" high="24" type="int" usage="rp_blit"/>
- <reg32 offset="0x8403" name="GRAS_A2D_SRC_YMIN" low="8" high="24" type="int" usage="rp_blit"/>
- <reg32 offset="0x8404" name="GRAS_A2D_SRC_YMAX" low="8" high="24" type="int" usage="rp_blit"/>
- <reg32 offset="0x8405" name="GRAS_A2D_DEST_TL" type="a6xx_reg_xy" usage="rp_blit"/>
- <reg32 offset="0x8406" name="GRAS_A2D_DEST_BR" type="a6xx_reg_xy" usage="rp_blit"/>
+ <reg32 offset="0x8401" name="GRAS_A2D_SRC_XMIN" low="8" high="24" type="int" variants="A6XX-A7XX" usage="rp_blit"/>
+ <reg32 offset="0x8402" name="GRAS_A2D_SRC_XMAX" low="8" high="24" type="int" variants="A6XX-A7XX" usage="rp_blit"/>
+ <reg32 offset="0x8403" name="GRAS_A2D_SRC_YMIN" low="8" high="24" type="int" variants="A6XX-A7XX" usage="rp_blit"/>
+ <reg32 offset="0x8404" name="GRAS_A2D_SRC_YMAX" low="8" high="24" type="int" variants="A6XX-A7XX" usage="rp_blit"/>
+ <reg32 offset="0x8405" name="GRAS_A2D_DEST_TL" type="a6xx_reg_xy" variants="A6XX-A7XX" usage="rp_blit"/>
+ <reg32 offset="0x8406" name="GRAS_A2D_DEST_BR" type="a6xx_reg_xy" variants="A6XX-A7XX" usage="rp_blit"/>
<reg32 offset="0x8407" name="GRAS_2D_UNKNOWN_8407" low="0" high="31"/>
<reg32 offset="0x8408" name="GRAS_2D_UNKNOWN_8408" low="0" high="31"/>
<reg32 offset="0x8409" name="GRAS_2D_UNKNOWN_8409" low="0" high="31"/>
- <reg32 offset="0x840a" name="GRAS_A2D_SCISSOR_TL" type="a6xx_reg_xy" usage="rp_blit"/>
- <reg32 offset="0x840b" name="GRAS_A2D_SCISSOR_BR" type="a6xx_reg_xy" usage="rp_blit"/>
- <!-- 0x840c-0x85ff invalid -->
+ <reg32 offset="0x840a" name="GRAS_A2D_SCISSOR_TL" type="a6xx_reg_xy" variants="A6XX-A7XX" usage="rp_blit"/>
+ <reg32 offset="0x840b" name="GRAS_A2D_SCISSOR_BR" type="a6xx_reg_xy" variants="A6XX-A7XX" usage="rp_blit"/>
<!-- always 0x880 ? (and 0 in a640/a650 traces?) -->
<reg32 offset="0x8600" name="GRAS_DBG_ECO_CNTL" usage="cmd">
@@ -1308,22 +1424,7 @@ by a particular renderpass/blit.
-->
<!-- same as GRAS_BIN_CONTROL, but without bit 27: -->
- <reg32 offset="0x8800" name="RB_CNTL" variants="A6XX" usage="rp_blit">
- <bitfield name="BINW" low="0" high="5" shr="5" type="uint"/>
- <bitfield name="BINH" low="8" high="14" shr="4" type="uint"/>
- <bitfield name="RENDER_MODE" low="18" high="20" type="a6xx_render_mode"/>
- <bitfield name="FORCE_LRZ_WRITE_DIS" pos="21" type="boolean"/>
- <bitfield name="BUFFERS_LOCATION" low="22" high="23" type="a6xx_buffers_location"/>
- <bitfield name="LRZ_FEEDBACK_ZMODE_MASK" low="24" high="26" type="a6xx_lrz_feedback_mask"/>
- </reg32>
-
- <reg32 offset="0x8800" name="RB_CNTL" variants="A7XX-" usage="rp_blit">
- <bitfield name="BINW" low="0" high="5" shr="5" type="uint"/>
- <bitfield name="BINH" low="8" high="14" shr="4" type="uint"/>
- <bitfield name="RENDER_MODE" low="18" high="20" type="a6xx_render_mode"/>
- <bitfield name="FORCE_LRZ_WRITE_DIS" pos="21" type="boolean"/>
- <bitfield name="LRZ_FEEDBACK_ZMODE_MASK" low="24" high="26" type="a6xx_lrz_feedback_mask"/>
- </reg32>
+ <reg32 offset="0x8800" name="RB_CNTL" variants="A6XX-A7XX" type="a6xx_bin_cntl" usage="rp_blit"/>
<reg32 offset="0x8801" name="RB_RENDER_CNTL" variants="A6XX" usage="rp_blit">
<bitfield name="CCUSINGLECACHELINESIZE" low="3" high="5"/>
@@ -1347,9 +1448,6 @@ by a particular renderpass/blit.
<bitfield name="CONSERVATIVERASEN" pos="11" type="boolean"/>
<bitfield name="INNERCONSERVATIVERASEN" pos="12" type="boolean"/>
</reg32>
- <reg32 offset="0x8116" name="GRAS_SU_RENDER_CNTL" variants="A7XX-" usage="rp_blit">
- <bitfield name="FS_DISABLE" pos="7" type="boolean"/>
- </reg32>
<reg32 offset="0x8802" name="RB_RAS_MSAA_CNTL" usage="rp_blit">
<bitfield name="SAMPLES" low="0" high="1" type="a3xx_msaa_samples"/>
@@ -1516,9 +1614,7 @@ by a particular renderpass/blit.
<bitfield name="SAMPLE_MASK" low="16" high="31"/>
</reg32>
<!-- 0x8866-0x886f invalid -->
- <reg32 offset="0x8870" name="RB_DEPTH_PLANE_CNTL" usage="rp_blit">
- <bitfield name="Z_MODE" low="0" high="1" type="a6xx_ztest_mode"/>
- </reg32>
+ <reg32 offset="0x8870" name="RB_DEPTH_PLANE_CNTL" type="a6xx_depth_plane_cntl" usage="rp_blit"/>
<reg32 offset="0x8871" name="RB_DEPTH_CNTL" usage="rp_blit">
<bitfield name="Z_TEST_ENABLE" pos="0" type="boolean"/>
@@ -1532,14 +1628,9 @@ by a particular renderpass/blit.
<bitfield name="Z_READ_ENABLE" pos="6" type="boolean"/>
<bitfield name="Z_BOUNDS_ENABLE" pos="7" type="boolean"/>
</reg32>
- <reg32 offset="0x8114" name="GRAS_SU_DEPTH_CNTL" usage="rp_blit">
- <bitfield name="Z_TEST_ENABLE" pos="0" type="boolean"/>
- </reg32>
+
<!-- duplicates GRAS_SU_DEPTH_BUFFER_INFO: -->
- <reg32 offset="0x8872" name="RB_DEPTH_BUFFER_INFO" variants="A6XX" usage="rp_blit">
- <bitfield name="DEPTH_FORMAT" low="0" high="2" type="a6xx_depth_format"/>
- <bitfield name="UNK3" low="3" high="4"/>
- </reg32>
+ <reg32 offset="0x8872" name="RB_DEPTH_BUFFER_INFO" variants="A6XX" type="a6xx_depth_buffer_info" usage="rp_blit"/>
<!-- first 4 bits duplicates GRAS_SU_DEPTH_BUFFER_INFO -->
<reg32 offset="0x8872" name="RB_DEPTH_BUFFER_INFO" variants="A7XX-" usage="rp_blit">
<bitfield name="DEPTH_FORMAT" low="0" high="2" type="a6xx_depth_format"/>
@@ -1575,9 +1666,7 @@ by a particular renderpass/blit.
<bitfield name="ZPASS_BF" low="26" high="28" type="adreno_stencil_op"/>
<bitfield name="ZFAIL_BF" low="29" high="31" type="adreno_stencil_op"/>
</reg32>
- <reg32 offset="0x8115" name="GRAS_SU_STENCIL_CNTL" usage="rp_blit">
- <bitfield name="STENCIL_ENABLE" pos="0" type="boolean"/>
- </reg32>
+
<reg32 offset="0x8881" name="RB_STENCIL_BUFFER_INFO" variants="A6XX" usage="rp_blit">
<bitfield name="SEPARATE_STENCIL" pos="0" type="boolean"/>
<bitfield name="UNK1" pos="1" type="boolean"/>
@@ -1616,8 +1705,9 @@ by a particular renderpass/blit.
<reg32 offset="0x8899" name="RB_UNKNOWN_8899" variants="A7XX-" usage="cmd"/>
<!-- 0x8899-0x88bf invalid -->
<!-- clamps depth value for depth test/write -->
- <reg32 offset="0x88c0" name="RB_VIEWPORT_ZCLAMP_MIN" type="float" usage="rp_blit"/>
- <reg32 offset="0x88c1" name="RB_VIEWPORT_ZCLAMP_MAX" type="float" usage="rp_blit"/>
+ <reg32 offset="0x88c0" name="RB_VIEWPORT_ZCLAMP_MIN" type="float" usage="rp_blit" variants="A6XX-A7XX"/>
+ <reg32 offset="0x88c1" name="RB_VIEWPORT_ZCLAMP_MAX" type="float" usage="rp_blit" variants="A6XX-A7XX"/>
+
<!-- 0x88c2-0x88cf invalid-->
<reg32 offset="0x88d0" name="RB_RESOLVE_CNTL_0" usage="rp_blit">
<bitfield name="UNK0" low="0" high="12"/>
@@ -1626,7 +1716,7 @@ by a particular renderpass/blit.
<reg32 offset="0x88d1" name="RB_RESOLVE_CNTL_1" type="a6xx_reg_xy" usage="rp_blit"/>
<reg32 offset="0x88d2" name="RB_RESOLVE_CNTL_2" type="a6xx_reg_xy" usage="rp_blit"/>
<!-- weird to duplicate other regs from same block?? -->
- <reg32 offset="0x88d3" name="RB_RESOLVE_CNTL_3" usage="rp_blit">
+ <reg32 offset="0x88d3" name="RB_RESOLVE_CNTL_3" variants="A6XX-A7XX" usage="rp_blit">
<bitfield name="BINW" low="0" high="5" shr="5" type="uint"/>
<bitfield name="BINH" low="8" high="14" shr="4" type="uint"/>
</reg32>
@@ -1650,10 +1740,13 @@ by a particular renderpass/blit.
<!-- array-pitch is size of layer -->
<reg32 offset="0x88db" name="RB_RESOLVE_SYSTEM_BUFFER_ARRAY_PITCH" low="0" high="28" shr="6" type="uint" usage="rp_blit"/>
<reg64 offset="0x88dc" name="RB_RESOLVE_SYSTEM_FLAG_BUFFER_BASE" type="waddress" align="64" usage="rp_blit"/>
- <reg32 offset="0x88de" name="RB_RESOLVE_SYSTEM_FLAG_BUFFER_PITCH" usage="rp_blit">
+
+ <bitset name="a6xx_flag_buffer_pitch" inline="yes">
<bitfield name="PITCH" low="0" high="10" shr="6" type="uint"/>
- <bitfield name="ARRAY_PITCH" low="11" high="27" shr="7" type="uint"/>
- </reg32>
+ <bitfield name="ARRAY_PITCH" low="11" high="28" shr="7" type="uint"/>
+ </bitset>
+
+ <reg32 offset="0x88de" name="RB_RESOLVE_SYSTEM_FLAG_BUFFER_PITCH" type="a6xx_flag_buffer_pitch" usage="rp_blit"/>
<reg32 offset="0x88df" name="RB_RESOLVE_CLEAR_COLOR_DW0" usage="rp_blit"/>
<reg32 offset="0x88e0" name="RB_RESOLVE_CLEAR_COLOR_DW1" usage="rp_blit"/>
@@ -1726,10 +1819,7 @@ by a particular renderpass/blit.
<reg32 offset="0x88f0" name="RB_UNKNOWN_88F0" low="0" high="11" usage="cmd"/>
<!-- could be for separate stencil? (or may not be a flag buffer at all) -->
<reg64 offset="0x88f1" name="RB_UNK_FLAG_BUFFER_BASE" type="waddress" align="64"/>
- <reg32 offset="0x88f3" name="RB_UNK_FLAG_BUFFER_PITCH">
- <bitfield name="PITCH" low="0" high="10" shr="6" type="uint"/>
- <bitfield name="ARRAY_PITCH" low="11" high="23" shr="7" type="uint"/>
- </reg32>
+ <reg32 offset="0x88f3" name="RB_UNK_FLAG_BUFFER_PITCH" type="a6xx_flag_buffer_pitch"/>
<reg32 offset="0x88f4" name="RB_VRS_CONFIG" usage="rp_blit">
<bitfield name="UNK2" pos="2" type="boolean"/>
@@ -1737,8 +1827,9 @@ by a particular renderpass/blit.
<bitfield name="ATTACHMENT_FSR_ENABLE" pos="5" type="boolean"/>
<bitfield name="PRIMITIVE_FSR_ENABLE" pos="18" type="boolean"/>
</reg32>
- <!-- Connected to VK_EXT_fragment_density_map? -->
- <reg32 offset="0x88f5" name="RB_UNKNOWN_88F5" variants="A7XX-"/>
+ <reg32 offset="0x88f5" name="RB_BIN_FOVEAT" variants="A7XX-" usage="cmd">
+ <bitfield name="BINSCALEEN" pos="6" type="boolean"/>
+ </reg32>
<!-- 0x88f6-0x88ff invalid -->
<reg64 offset="0x8900" name="RB_DEPTH_FLAG_BUFFER_BASE" type="waddress" align="64" usage="rp_blit"/>
<reg32 offset="0x8902" name="RB_DEPTH_FLAG_BUFFER_PITCH" usage="rp_blit">
@@ -1747,12 +1838,10 @@ by a particular renderpass/blit.
<bitfield name="UNK8" low="8" high="10"/>
<bitfield name="ARRAY_PITCH" low="11" high="27" shr="7" type="uint"/>
</reg32>
+
<array offset="0x8903" name="RB_COLOR_FLAG_BUFFER" stride="3" length="8" usage="rp_blit">
<reg64 offset="0" name="ADDR" type="waddress" align="64"/>
- <reg32 offset="2" name="PITCH">
- <bitfield name="PITCH" low="0" high="10" shr="6" type="uint"/>
- <bitfield name="ARRAY_PITCH" low="11" high="28" shr="7" type="uint"/>
- </reg32>
+ <reg32 offset="2" name="PITCH" type="a6xx_flag_buffer_pitch"/>
</array>
<!-- 0x891b-0x8926 invalid -->
<doc>
@@ -1815,7 +1904,7 @@ by a particular renderpass/blit.
<reg64 offset="0x8c1e" name="RB_A2D_DEST_BUFFER_BASE_2" type="waddress" align="64" usage="rp_blit"/>
<reg64 offset="0x8c20" name="RB_A2D_DEST_FLAG_BUFFER_BASE" type="waddress" align="64" usage="rp_blit"/>
- <reg32 offset="0x8c22" name="RB_A2D_DEST_FLAG_BUFFER_PITCH" low="0" high="7" shr="6" type="uint" usage="rp_blit"/>
+ <reg32 offset="0x8c22" name="RB_A2D_DEST_FLAG_BUFFER_PITCH" type="a6xx_flag_buffer_pitch" usage="rp_blit"/>
<!-- this is a guess but seems likely (for NV12 with UBWC): -->
<reg64 offset="0x8c23" name="RB_A2D_DEST_FLAG_BUFFER_BASE_1" type="waddress" align="64" usage="rp_blit"/>
<reg32 offset="0x8c25" name="RB_A2D_DEST_FLAG_BUFFER_PITCH_1" low="0" high="7" shr="6" type="uint" usage="rp_blit"/>
@@ -1921,13 +2010,13 @@ by a particular renderpass/blit.
<bitfield name="CLIP_DIST_03_LOC" low="8" high="15" type="uint"/>
<bitfield name="CLIP_DIST_47_LOC" low="16" high="23" type="uint"/>
</bitset>
- <reg32 offset="0x9101" name="VPC_VS_CLIP_CULL_CNTL" type="a6xx_vpc_xs_clip_cntl" usage="rp_blit"/>
- <reg32 offset="0x9102" name="VPC_GS_CLIP_CULL_CNTL" type="a6xx_vpc_xs_clip_cntl" usage="rp_blit"/>
- <reg32 offset="0x9103" name="VPC_DS_CLIP_CULL_CNTL" type="a6xx_vpc_xs_clip_cntl" usage="rp_blit"/>
+ <reg32 offset="0x9101" name="VPC_VS_CLIP_CULL_CNTL" type="a6xx_vpc_xs_clip_cntl" variants="A6XX-A7XX" usage="rp_blit"/>
+ <reg32 offset="0x9102" name="VPC_GS_CLIP_CULL_CNTL" type="a6xx_vpc_xs_clip_cntl" variants="A6XX-A7XX" usage="rp_blit"/>
+ <reg32 offset="0x9103" name="VPC_DS_CLIP_CULL_CNTL" type="a6xx_vpc_xs_clip_cntl" variants="A6XX-A7XX" usage="rp_blit"/>
- <reg32 offset="0x9311" name="VPC_VS_CLIP_CULL_CNTL_V2" type="a6xx_vpc_xs_clip_cntl" usage="rp_blit"/>
- <reg32 offset="0x9312" name="VPC_GS_CLIP_CULL_CNTL_V2" type="a6xx_vpc_xs_clip_cntl" usage="rp_blit"/>
- <reg32 offset="0x9313" name="VPC_DS_CLIP_CULL_CNTL_V2" type="a6xx_vpc_xs_clip_cntl" usage="rp_blit"/>
+ <reg32 offset="0x9311" name="VPC_VS_CLIP_CULL_CNTL_V2" type="a6xx_vpc_xs_clip_cntl" variants="A6XX-A7XX" usage="rp_blit"/>
+ <reg32 offset="0x9312" name="VPC_GS_CLIP_CULL_CNTL_V2" type="a6xx_vpc_xs_clip_cntl" variants="A6XX-A7XX" usage="rp_blit"/>
+ <reg32 offset="0x9313" name="VPC_DS_CLIP_CULL_CNTL_V2" type="a6xx_vpc_xs_clip_cntl" variants="A6XX-A7XX" usage="rp_blit"/>
<bitset name="a6xx_vpc_xs_siv_cntl" inline="yes">
<bitfield name="LAYERLOC" low="0" high="7" type="uint"/>
@@ -1935,23 +2024,33 @@ by a particular renderpass/blit.
<bitfield name="SHADINGRATELOC" low="16" high="23" type="uint" variants="A7XX-"/>
</bitset>
- <reg32 offset="0x9104" name="VPC_VS_SIV_CNTL" type="a6xx_vpc_xs_siv_cntl" usage="rp_blit"/>
- <reg32 offset="0x9105" name="VPC_GS_SIV_CNTL" type="a6xx_vpc_xs_siv_cntl" usage="rp_blit"/>
- <reg32 offset="0x9106" name="VPC_DS_SIV_CNTL" type="a6xx_vpc_xs_siv_cntl" usage="rp_blit"/>
+ <reg32 offset="0x9104" name="VPC_VS_SIV_CNTL" type="a6xx_vpc_xs_siv_cntl" variants="A6XX-A7XX" usage="rp_blit"/>
+ <reg32 offset="0x9105" name="VPC_GS_SIV_CNTL" type="a6xx_vpc_xs_siv_cntl" variants="A6XX-A7XX" usage="rp_blit"/>
+ <reg32 offset="0x9106" name="VPC_DS_SIV_CNTL" type="a6xx_vpc_xs_siv_cntl" variants="A6XX-A7XX" usage="rp_blit"/>
+
- <reg32 offset="0x9314" name="VPC_VS_SIV_CNTL_V2" type="a6xx_vpc_xs_siv_cntl" usage="rp_blit"/>
- <reg32 offset="0x9315" name="VPC_GS_SIV_CNTL_V2" type="a6xx_vpc_xs_siv_cntl" usage="rp_blit"/>
- <reg32 offset="0x9316" name="VPC_DS_SIV_CNTL_V2" type="a6xx_vpc_xs_siv_cntl" usage="rp_blit"/>
+ <reg32 offset="0x9314" name="VPC_VS_SIV_CNTL_V2" type="a6xx_vpc_xs_siv_cntl" variants="A6XX-A7XX" usage="rp_blit"/>
+ <reg32 offset="0x9315" name="VPC_GS_SIV_CNTL_V2" type="a6xx_vpc_xs_siv_cntl" variants="A6XX-A7XX" usage="rp_blit"/>
+ <reg32 offset="0x9316" name="VPC_DS_SIV_CNTL_V2" type="a6xx_vpc_xs_siv_cntl" variants="A6XX-A7XX" usage="rp_blit"/>
+
+ <bitset name="a6xx_vpc_rast_stream_cntl" inline="yes">
+ <!-- which stream to send to GRAS -->
+ <bitfield name="STREAM" low="0" high="1" type="uint"/>
+ <!-- discard primitives before rasterization -->
+ <bitfield name="DISCARD" pos="2" type="boolean"/>
+ </bitset>
+
+ <reg32 offset="0x9980" name="VPC_RAST_STREAM_CNTL" type="a6xx_vpc_rast_stream_cntl" variants="A6XX" usage="rp_blit"/>
+ <reg32 offset="0x9107" name="VPC_RAST_STREAM_CNTL" type="a6xx_vpc_rast_stream_cntl" variants="A7XX" usage="rp_blit"/>
+ <reg32 offset="0x9317" name="VPC_RAST_STREAM_CNTL_V2" type="a6xx_vpc_rast_stream_cntl" variants="A7XX" usage="rp_blit"/>
<reg32 offset="0x9107" name="VPC_UNKNOWN_9107" variants="A6XX" usage="rp_blit">
<!-- this mirrors VPC_RAST_STREAM_CNTL::DISCARD, although it seems it's unused -->
<bitfield name="RASTER_DISCARD" pos="0" type="boolean"/>
<bitfield name="UNK2" pos="2" type="boolean"/>
</reg32>
- <reg32 offset="0x9108" name="VPC_RAST_CNTL" usage="rp_blit">
- <bitfield name="MODE" low="0" high="1" type="a6xx_polygon_mode"/>
- </reg32>
+ <reg32 offset="0x9108" name="VPC_RAST_CNTL" type="a6xx_rast_cntl" variants="A6XX-A7XX" usage="rp_blit"/>
<bitset name="a6xx_pc_cntl" inline="yes">
<bitfield name="PRIMITIVE_RESTART" pos="0" type="boolean"/>
<bitfield name="PROVOKING_VTX_LAST" pos="1" type="boolean"/>
@@ -1991,10 +2090,10 @@ by a particular renderpass/blit.
<bitfield name="VIEWS" low="2" high="6" type="uint"/>
</bitset>
- <reg32 offset="0x9109" name="VPC_PC_CNTL" type="a6xx_pc_cntl" variants="A7XX-" usage="rp_blit"/>
- <reg32 offset="0x910a" name="VPC_GS_PARAM_0" type="a6xx_gs_param_0" variants="A7XX-" usage="rp_blit"/>
- <reg32 offset="0x910b" name="VPC_STEREO_RENDERING_VIEWMASK" type="hex" low="0" high="15" variants="A7XX-" usage="rp_blit"/>
- <reg32 offset="0x910c" name="VPC_STEREO_RENDERING_CNTL" type="a6xx_stereo_rendering_cntl" variants="A7XX-" usage="rp_blit"/>
+ <reg32 offset="0x9109" name="VPC_PC_CNTL" type="a6xx_pc_cntl" variants="A7XX" usage="rp_blit"/>
+ <reg32 offset="0x910a" name="VPC_GS_PARAM_0" type="a6xx_gs_param_0" variants="A7XX" usage="rp_blit"/>
+ <reg32 offset="0x910b" name="VPC_STEREO_RENDERING_VIEWMASK" type="hex" low="0" high="15" variants="A7XX" usage="rp_blit"/>
+ <reg32 offset="0x910c" name="VPC_STEREO_RENDERING_CNTL" type="a6xx_stereo_rendering_cntl" variants="A7XX" usage="rp_blit"/>
<enum name="a6xx_varying_interp_mode">
<value value="0" name="INTERP_SMOOTH"/>
@@ -2011,11 +2110,11 @@ by a particular renderpass/blit.
</enum>
<!-- 0x9109-0x91ff invalid -->
- <array offset="0x9200" name="VPC_VARYING_INTERP_MODE" stride="1" length="8" usage="rp_blit">
+ <array offset="0x9200" name="VPC_VARYING_INTERP_MODE" stride="1" length="8" variants="A6XX-A7XX" usage="rp_blit">
<doc>Packed array of a6xx_varying_interp_mode</doc>
<reg32 offset="0x0" name="MODE"/>
</array>
- <array offset="0x9208" name="VPC_VARYING_REPLACE_MODE_0" stride="1" length="8" usage="rp_blit">
+ <array offset="0x9208" name="VPC_VARYING_REPLACE_MODE" stride="1" length="8" variants="A6XX-A7XX" usage="rp_blit">
<doc>Packed array of a6xx_varying_ps_repl_mode</doc>
<reg32 offset="0x0" name="MODE"/>
</array>
@@ -2024,12 +2123,12 @@ by a particular renderpass/blit.
<reg32 offset="0x9210" name="VPC_UNKNOWN_9210" low="0" high="31" variants="A6XX" usage="cmd"/>
<reg32 offset="0x9211" name="VPC_UNKNOWN_9211" low="0" high="31" variants="A6XX" usage="cmd"/>
- <array offset="0x9212" name="VPC_VARYING_LM_TRANSFER_CNTL_0" stride="1" length="4" usage="rp_blit">
+ <array offset="0x9212" name="VPC_VARYING_LM_TRANSFER_CNTL" stride="1" length="4" variants="A6XX-A7XX" usage="rp_blit">
<!-- one bit per varying component: -->
<reg32 offset="0" name="DISABLE"/>
</array>
- <reg32 offset="0x9216" name="VPC_SO_MAPPING_WPTR" usage="rp_blit">
+ <bitset name="a6xx_vpc_so_mapping_wptr" inline="yes">
<!--
Choose which DWORD to write to. There is an array of
(4 * 64) DWORD's, dumped in the devcoredump at
@@ -2056,20 +2155,25 @@ by a particular renderpass/blit.
<bitfield name="ADDR" low="0" high="7" type="hex"/>
<!-- clear all A_EN and B_EN bits for all DWORD's -->
<bitfield name="RESET" pos="16" type="boolean"/>
- </reg32>
- <!-- special register, write multiple times to load SO program (not readable) -->
- <reg32 offset="0x9217" name="VPC_SO_MAPPING_PORT" usage="rp_blit">
+ </bitset>
+
+ <reg32 offset="0x9216" name="VPC_SO_MAPPING_WPTR" type="a6xx_vpc_so_mapping_wptr" variants="A6XX-A7XX" usage="rp_blit"/>
+
+ <bitset name="a6xx_vpc_so_mapping_port" inline="yes">
<bitfield name="A_BUF" low="0" high="1" type="uint"/>
<bitfield name="A_OFF" low="2" high="10" shr="2" type="uint"/>
<bitfield name="A_EN" pos="11" type="boolean"/>
<bitfield name="B_BUF" low="12" high="13" type="uint"/>
<bitfield name="B_OFF" low="14" high="22" shr="2" type="uint"/>
<bitfield name="B_EN" pos="23" type="boolean"/>
- </reg32>
+ </bitset>
+
+ <!-- special register, write multiple times to load SO program (not readable) -->
+ <reg32 offset="0x9217" name="VPC_SO_MAPPING_PORT" type="a6xx_vpc_so_mapping_port" variants="A6XX-A7XX" usage="rp_blit"/>
- <reg64 offset="0x9218" name="VPC_SO_QUERY_BASE" type="waddress" align="32" usage="cmd"/>
+ <reg64 offset="0x9218" name="VPC_SO_QUERY_BASE" type="waddress" align="32" variants="A6XX-A7XX" usage="cmd"/>
- <array offset="0x921a" name="VPC_SO" stride="7" length="4" usage="cmd">
+ <array offset="0x921a" name="VPC_SO" stride="7" length="4" variants="A6XX-A7XX" usage="cmd">
<reg64 offset="0" name="BUFFER_BASE" type="waddress" align="32"/>
<reg32 offset="2" name="BUFFER_SIZE" low="2" high="31" shr="2"/>
<reg32 offset="3" name="BUFFER_STRIDE" low="0" high="9" shr="2"/>
@@ -2077,12 +2181,13 @@ by a particular renderpass/blit.
<reg64 offset="5" name="FLUSH_BASE" type="waddress" align="32"/>
</array>
- <reg32 offset="0x9236" name="VPC_REPLACE_MODE_CNTL" usage="cmd">
+ <bitset name="a6xx_vpc_replace_mode_cntl" inline="yes">
<bitfield name="INVERT" pos="0" type="boolean"/>
- </reg32>
- <!-- 0x9237-0x92ff invalid -->
- <!-- always 0x0 ? -->
- <reg32 offset="0x9300" name="VPC_UNKNOWN_9300" low="0" high="2" usage="cmd"/>
+ </bitset>
+
+ <reg32 offset="0x9236" name="VPC_REPLACE_MODE_CNTL" type="a6xx_vpc_replace_mode_cntl" variants="A6XX-A7XX" usage="cmd"/>
+
+ <reg32 offset="0x9300" name="VPC_ROTATION_CNTL" low="0" high="2" variants="A6XX-A7XX" usage="cmd"/>
<bitset name="a6xx_vpc_xs_cntl" inline="yes">
<doc>
@@ -2101,11 +2206,12 @@ by a particular renderpass/blit.
</doc>
</bitfield>
</bitset>
- <reg32 offset="0x9301" name="VPC_VS_CNTL" type="a6xx_vpc_xs_cntl" usage="rp_blit"/>
- <reg32 offset="0x9302" name="VPC_GS_CNTL" type="a6xx_vpc_xs_cntl" usage="rp_blit"/>
- <reg32 offset="0x9303" name="VPC_DS_CNTL" type="a6xx_vpc_xs_cntl" usage="rp_blit"/>
- <reg32 offset="0x9304" name="VPC_PS_CNTL" usage="rp_blit">
+ <reg32 offset="0x9301" name="VPC_VS_CNTL" type="a6xx_vpc_xs_cntl" variants="A6XX-A7XX" usage="rp_blit"/>
+ <reg32 offset="0x9302" name="VPC_GS_CNTL" type="a6xx_vpc_xs_cntl" variants="A6XX-A7XX" usage="rp_blit"/>
+ <reg32 offset="0x9303" name="VPC_DS_CNTL" type="a6xx_vpc_xs_cntl" variants="A6XX-A7XX" usage="rp_blit"/>
+
+ <bitset name="a6xx_vpc_ps_cntl" inline="yes">
<bitfield name="NUMNONPOSVAR" low="0" high="7" type="uint"/>
<!-- for fixed-function (i.e. no GS) gl_PrimitiveID in FS -->
<bitfield name="PRIMIDLOC" low="8" high="15" type="uint"/>
@@ -2122,9 +2228,11 @@ by a particular renderpass/blit.
ViewID through the VS.
</doc>
</bitfield>
- </reg32>
+ </bitset>
- <reg32 offset="0x9305" name="VPC_SO_CNTL" usage="rp_blit">
+ <reg32 offset="0x9304" name="VPC_PS_CNTL" type="a6xx_vpc_ps_cntl" variants="A6XX-A7XX" usage="rp_blit"/>
+
+ <bitset name="a6xx_vpc_so_cntl" inline="yes">
<!--
It's offset by 1, and 0 means "disabled"
-->
@@ -2133,22 +2241,28 @@ by a particular renderpass/blit.
<bitfield name="BUF2_STREAM" low="6" high="8" type="uint"/>
<bitfield name="BUF3_STREAM" low="9" high="11" type="uint"/>
<bitfield name="STREAM_ENABLE" low="15" high="18" type="hex"/>
- </reg32>
- <reg32 offset="0x9306" name="VPC_SO_OVERRIDE" usage="rp_blit">
+ </bitset>
+
+ <reg32 offset="0x9305" name="VPC_SO_CNTL" type="a6xx_vpc_so_cntl" variants="A6XX-A7XX" usage="rp_blit"/>
+
+ <bitset name="a6xx_so_override" inline="yes">
<bitfield name="DISABLE" pos="0" type="boolean"/>
- </reg32>
- <reg32 offset="0x9307" name="VPC_PS_RAST_CNTL" variants="A6XX-" usage="rp_blit"> <!-- A702 + A7xx -->
- <bitfield name="MODE" low="0" high="1" type="a6xx_polygon_mode"/>
- </reg32>
- <reg32 offset="0x9308" name="VPC_ATTR_BUF_GMEM_SIZE" variants="A7XX-" usage="rp_blit">
- <bitfield name="SIZE_GMEM" low="0" high="31"/>
- </reg32>
- <reg32 offset="0x9309" name="VPC_ATTR_BUF_GMEM_BASE" variants="A7XX-" usage="rp_blit">
- <bitfield name="BASE_GMEM" low="0" high="31"/>
- </reg32>
- <reg32 offset="0x9b09" name="PC_ATTR_BUF_GMEM_SIZE" variants="A7XX-" usage="rp_blit">
- <bitfield name="SIZE_GMEM" low="0" high="31"/>
- </reg32>
+ </bitset>
+
+ <reg32 offset="0x9306" name="VPC_SO_OVERRIDE" type="a6xx_so_override" variants="A6XX-A7XX" usage="rp_blit"/>
+
+ <reg32 offset="0x9807" name="PC_DGEN_SO_OVERRIDE" type="a6xx_so_override" variants="A7XX" usage="rp_blit"/>
+
+ <reg32 offset="0x9307" name="VPC_PS_RAST_CNTL" type="a6xx_rast_cntl" variants="A6XX-A7XX" usage="rp_blit"/>
+
+ <reg32 offset="0x9308" name="VPC_ATTR_BUF_GMEM_SIZE" variants="A7XX" type="uint" usage="rp_blit"/>
+ <reg32 offset="0x9309" name="VPC_ATTR_BUF_GMEM_BASE" variants="A7XX" type="uint" usage="rp_blit"/>
+
+ <reg32 offset="0x9b09" name="PC_ATTR_BUF_GMEM_SIZE" variants="A7XX" type="uint" usage="rp_blit"/>
+
+ <reg32 offset="0x930a" name="VPC_UNKNOWN_930A" variants="A7XX"/>
+
+ <reg32 offset="0x960a" name="VPC_FLATSHADE_MODE_CNTL" variants="A7XX"/>
<!-- 0x9307-0x95ff invalid -->
@@ -2163,52 +2277,62 @@ by a particular renderpass/blit.
<!-- TODO: regs from 0x9624-0x963a -->
<!-- 0x963b-0x97ff invalid -->
- <reg32 offset="0x9800" name="PC_HS_PARAM_0" low="0" high="5" type="uint" usage="rp_blit"/>
+ <reg32 offset="0x9800" name="PC_HS_PARAM_0" low="0" high="5" type="uint" variants="A6XX-A7XX" usage="rp_blit"/>
- <!-- always 0x0 ? -->
- <reg32 offset="0x9801" name="PC_HS_PARAM_1" usage="rp_blit">
+ <bitset name="a6xx_pc_hs_param_1" inline="yes">
<bitfield name="SIZE" low="0" high="10" type="uint"/>
<bitfield name="UNK13" pos="13"/>
- </reg32>
+ </bitset>
+
+ <reg32 offset="0x9801" name="PC_HS_PARAM_1" type="a6xx_pc_hs_param_1" variants="A6XX-A7XX" usage="rp_blit"/>
- <reg32 offset="0x9802" name="PC_DS_PARAM" usage="rp_blit">
+ <bitset name="a6xx_pc_ds_param" inline="yes">
<bitfield name="SPACING" low="0" high="1" type="a6xx_tess_spacing"/>
<bitfield name="OUTPUT" low="2" high="3" type="a6xx_tess_output"/>
- </reg32>
+ </bitset>
+
+ <reg32 offset="0x9802" name="PC_DS_PARAM" type="a6xx_pc_ds_param" variants="A6XX-A7XX" usage="rp_blit"/>
+
+ <reg32 offset="0x9803" name="PC_RESTART_INDEX" low="0" high="31" type="uint" variants="A6XX-A7XX" usage="rp_blit"/>
- <reg32 offset="0x9803" name="PC_RESTART_INDEX" low="0" high="31" type="uint" usage="rp_blit"/>
- <reg32 offset="0x9804" name="PC_MODE_CNTL" low="0" high="7" usage="rp_blit"/>
+ <reg32 offset="0x9804" name="PC_MODE_CNTL" low="0" high="7" variants="A6XX-A7XX" usage="rp_blit"/>
<reg32 offset="0x9805" name="PC_POWER_CNTL" low="0" high="2" usage="rp_blit"/>
- <reg32 offset="0x9806" name="PC_PS_CNTL" usage="rp_blit">
+ <bitset name="a6xx_pc_ps_cntl" inline="yes">
<bitfield name="PRIMITIVEIDEN" pos="0" type="boolean"/>
- </reg32>
+ </bitset>
- <!-- New in a6xx gen3+ -->
- <reg32 offset="0x9808" name="PC_DGEN_SO_CNTL" usage="rp_blit">
+ <reg32 offset="0x9806" name="PC_PS_CNTL" type="a6xx_pc_ps_cntl" variants="A6XX-A7XX" usage="rp_blit"/>
+
+ <bitset name="a6xx_pc_dgen_so_cntl" inline="yes">
<bitfield name="STREAM_ENABLE" low="15" high="18" type="hex"/>
- </reg32>
+ </bitset>
+
+ <!-- New in a6xx gen3+ -->
+ <reg32 offset="0x9808" name="PC_DGEN_SO_CNTL" type="a6xx_pc_dgen_so_cntl" variants="A6XX-A7XX" usage="rp_blit"/>
- <reg32 offset="0x980a" name="PC_DGEN_SU_CONSERVATIVE_RAS_CNTL">
+ <bitset name="a6xx_pc_dgen_su_conservative_ras_cntl" inline="yes">
<bitfield name="CONSERVATIVERASEN" pos="0" type="boolean"/>
- </reg32>
- <!-- 0x980b-0x983f invalid -->
+ </bitset>
+
+ <reg32 offset="0x980a" name="PC_DGEN_SU_CONSERVATIVE_RAS_CNTL" type="a6xx_pc_dgen_su_conservative_ras_cntl" variants="A6XX-A7XX"/>
<!-- 0x9840 - 0x9842 are not readable -->
- <reg32 offset="0x9840" name="PC_DRAW_INITIATOR">
+ <bitset name="a6xx_draw_initiator" inline="yes">
<bitfield name="STATE_ID" low="0" high="7"/>
- </reg32>
+ </bitset>
- <reg32 offset="0x9841" name="PC_KERNEL_INITIATOR">
- <bitfield name="STATE_ID" low="0" high="7"/>
- </reg32>
+ <reg32 offset="0x9840" name="PC_DRAW_INITIATOR" type="a6xx_draw_initiator" variants="A6XX-A7XX"/>
+ <reg32 offset="0x9841" name="PC_KERNEL_INITIATOR" type="a6xx_draw_initiator" variants="A6XX-A7XX"/>
- <reg32 offset="0x9842" name="PC_EVENT_INITIATOR">
+ <bitset name="a6xx_event_initiator" inline="yes">
<!-- I think only the low bit is actually used? -->
<bitfield name="STATE_ID" low="16" high="23"/>
<bitfield name="EVENT" low="0" high="6" type="vgt_event_type"/>
- </reg32>
+ </bitset>
+
+ <reg32 offset="0x9842" name="PC_EVENT_INITIATOR" type="a6xx_event_initiator" variants="A6XX-A7XX"/>
<!--
0x9880 written in a lot of places by SQE, same value gets written
@@ -2219,45 +2343,21 @@ by a particular renderpass/blit.
<!-- 0x9843-0x997f invalid -->
- <reg32 offset="0x9981" name="PC_DGEN_RAST_CNTL" variants="A6XX" usage="rp_blit">
- <bitfield name="MODE" low="0" high="1" type="a6xx_polygon_mode"/>
- </reg32>
- <reg32 offset="0x9809" name="PC_DGEN_RAST_CNTL" variants="A7XX-" usage="rp_blit">
- <bitfield name="MODE" low="0" high="1" type="a6xx_polygon_mode"/>
- </reg32>
-
- <reg32 offset="0x9980" name="VPC_RAST_STREAM_CNTL" variants="A6XX" usage="rp_blit">
- <!-- which stream to send to GRAS -->
- <bitfield name="STREAM" low="0" high="1" type="uint"/>
- <!-- discard primitives before rasterization -->
- <bitfield name="DISCARD" pos="2" type="boolean"/>
- </reg32>
- <!-- VPC_RAST_STREAM_CNTL -->
- <reg32 offset="0x9107" name="VPC_RAST_STREAM_CNTL" variants="A7XX-" usage="rp_blit">
- <!-- which stream to send to GRAS -->
- <bitfield name="STREAM" low="0" high="1" type="uint"/>
- <!-- discard primitives before rasterization -->
- <bitfield name="DISCARD" pos="2" type="boolean"/>
- </reg32>
- <reg32 offset="0x9317" name="VPC_RAST_STREAM_CNTL_V2" variants="A7XX-" usage="rp_blit">
- <!-- which stream to send to GRAS -->
- <bitfield name="STREAM" low="0" high="1" type="uint"/>
- <!-- discard primitives before rasterization -->
- <bitfield name="DISCARD" pos="2" type="boolean"/>
- </reg32>
+ <reg32 offset="0x9981" name="PC_DGEN_RAST_CNTL" type="a6xx_rast_cntl" variants="A6XX" usage="rp_blit"/>
+ <reg32 offset="0x9809" name="PC_DGEN_RAST_CNTL" type="a6xx_rast_cntl" variants="A7XX" usage="rp_blit"/>
<!-- Both are a750+.
Probably needed to correctly overlap execution of several draws.
-->
- <reg32 offset="0x9885" name="PC_HS_BUFFER_SIZE" variants="A7XX-" usage="cmd"/>
+ <reg32 offset="0x9885" name="PC_HS_BUFFER_SIZE" variants="A7XX" usage="cmd"/>
<!-- Blob adds a bit more space {0x10, 0x20, 0x30, 0x40} bytes, but the meaning of
this additional space is not known.
-->
- <reg32 offset="0x9886" name="PC_TF_BUFFER_SIZE" variants="A7XX-" usage="cmd"/>
+ <reg32 offset="0x9886" name="PC_TF_BUFFER_SIZE" variants="A7XX" usage="cmd"/>
<!-- 0x9982-0x9aff invalid -->
- <reg32 offset="0x9b00" name="PC_CNTL" type="a6xx_pc_cntl" usage="rp_blit"/>
+ <reg32 offset="0x9b00" name="PC_CNTL" type="a6xx_pc_cntl" variants="A6XX-A7XX" usage="rp_blit"/>
<bitset name="a6xx_pc_xs_cntl" inline="yes">
<doc>
@@ -2270,18 +2370,18 @@ by a particular renderpass/blit.
<bitfield name="LAYER" pos="9" type="boolean"/>
<bitfield name="VIEW" pos="10" type="boolean"/>
<!-- note: PC_VS_CNTL doesn't have the PRIMITIVE_ID bit -->
+ <!-- since HS can't output anything, only PRIMITIVE_ID is valid -->
<bitfield name="PRIMITIVE_ID" pos="11" type="boolean"/>
<bitfield name="CLIP_MASK" low="16" high="23" type="uint"/>
<bitfield name="SHADINGRATE" pos="24" type="boolean" variants="A7XX-"/>
</bitset>
- <reg32 offset="0x9b01" name="PC_VS_CNTL" type="a6xx_pc_xs_cntl" usage="rp_blit"/>
- <reg32 offset="0x9b02" name="PC_GS_CNTL" type="a6xx_pc_xs_cntl" usage="rp_blit"/>
- <!-- since HS can't output anything, only PRIMITIVE_ID is valid -->
- <reg32 offset="0x9b03" name="PC_HS_CNTL" type="a6xx_pc_xs_cntl" usage="rp_blit"/>
- <reg32 offset="0x9b04" name="PC_DS_CNTL" type="a6xx_pc_xs_cntl" usage="rp_blit"/>
+ <reg32 offset="0x9b01" name="PC_VS_CNTL" type="a6xx_pc_xs_cntl" variants="A6XX-A7XX" usage="rp_blit"/>
+ <reg32 offset="0x9b02" name="PC_GS_CNTL" type="a6xx_pc_xs_cntl" variants="A6XX-A7XX" usage="rp_blit"/>
+ <reg32 offset="0x9b03" name="PC_HS_CNTL" type="a6xx_pc_xs_cntl" variants="A6XX-A7XX" usage="rp_blit"/>
+ <reg32 offset="0x9b04" name="PC_DS_CNTL" type="a6xx_pc_xs_cntl" variants="A6XX-A7XX" usage="rp_blit"/>
- <reg32 offset="0x9b05" name="PC_GS_PARAM_0" type="a6xx_gs_param_0" usage="rp_blit"/>
+ <reg32 offset="0x9b05" name="PC_GS_PARAM_0" type="a6xx_gs_param_0" variants="A6XX-A7XX" usage="rp_blit"/>
<reg32 offset="0x9b06" name="PC_PRIMITIVE_CNTL_6" variants="A6XX" usage="rp_blit">
<doc>
@@ -2290,9 +2390,9 @@ by a particular renderpass/blit.
<bitfield name="STRIDE_IN_VPC" low="0" high="10" type="uint"/>
</reg32>
- <reg32 offset="0x9b07" name="PC_STEREO_RENDERING_CNTL" type="a6xx_stereo_rendering_cntl" usage="rp_blit"/>
+ <reg32 offset="0x9b07" name="PC_STEREO_RENDERING_CNTL" type="a6xx_stereo_rendering_cntl" variants="A6XX-A7XX" usage="rp_blit"/>
<!-- mask of enabled views, doesn't exist on A630 -->
- <reg32 offset="0x9b08" name="PC_STEREO_RENDERING_VIEWMASK" type="hex" low="0" high="15" usage="rp_blit"/>
+ <reg32 offset="0x9b08" name="PC_STEREO_RENDERING_VIEWMASK" type="hex" low="0" high="15" variants="A6XX-A7XX" usage="rp_blit"/>
<!-- 0x9b09-0x9bff invalid -->
<reg32 offset="0x9c00" name="PC_2D_EVENT_CMD">
<!-- special register (but note first 8 bits can be written/read) -->
@@ -2303,34 +2403,39 @@ by a particular renderpass/blit.
<!-- TODO: 0x9e00-0xa000 range incomplete -->
<reg32 offset="0x9e00" name="PC_DBG_ECO_CNTL"/>
<reg32 offset="0x9e01" name="PC_ADDR_MODE_CNTL" type="a5xx_address_mode"/>
- <reg64 offset="0x9e04" name="PC_DMA_BASE"/>
- <reg32 offset="0x9e06" name="PC_DMA_OFFSET" type="uint"/>
- <reg32 offset="0x9e07" name="PC_DMA_SIZE" type="uint"/>
+ <reg64 offset="0x9e04" name="PC_DMA_BASE" type="address" variants="A6XX-A7XX"/>
+ <reg32 offset="0x9e06" name="PC_DMA_OFFSET" type="uint" variants="A6XX-A7XX"/>
+ <reg32 offset="0x9e07" name="PC_DMA_SIZE" type="uint" variants="A6XX-A7XX"/>
+
<reg64 offset="0x9e08" name="PC_TESS_BASE" variants="A6XX" type="waddress" align="32" usage="cmd"/>
- <reg64 offset="0x9810" name="PC_TESS_BASE" variants="A7XX-" type="waddress" align="32" usage="cmd"/>
+ <reg64 offset="0x9810" name="PC_TESS_BASE" variants="A7XX" type="waddress" align="32" usage="cmd"/>
- <reg32 offset="0x9e0b" name="PC_DRAWCALL_CNTL" type="vgt_draw_initiator_a4xx">
+ <reg32 offset="0x9e0b" name="PC_DRAWCALL_CNTL" type="vgt_draw_initiator_a4xx" variants="A6XX-A7XX">
<doc>
Possibly not really "initiating" the draw but the layout is similar
to VGT_DRAW_INITIATOR on older gens
</doc>
</reg32>
- <reg32 offset="0x9e0c" name="PC_DRAWCALL_INSTANCE_NUM" type="uint"/>
- <reg32 offset="0x9e0d" name="PC_DRAWCALL_SIZE" type="uint"/>
+ <reg32 offset="0x9e0c" name="PC_DRAWCALL_INSTANCE_NUM" type="uint" variants="A6XX-A7XX"/>
+ <reg32 offset="0x9e0d" name="PC_DRAWCALL_SIZE" type="uint" variants="A6XX-A7XX"/>
<!-- These match the contents of CP_SET_BIN_DATA (not written directly) -->
- <reg32 offset="0x9e11" name="PC_VIS_STREAM_CNTL">
+ <bitset name="a6xx_pc_vis_stream_cntl" inline="yes">
<bitfield name="UNK0" low="0" high="15"/>
<bitfield name="VSC_SIZE" low="16" high="21" type="uint"/>
<bitfield name="VSC_N" low="22" high="26" type="uint"/>
- </reg32>
- <reg64 offset="0x9e12" name="PC_PVIS_STREAM_BIN_BASE" type="waddress" align="32"/>
- <reg64 offset="0x9e14" name="PC_DVIS_STREAM_BIN_BASE" type="waddress" align="32"/>
+ </bitset>
+
+ <reg32 offset="0x9e11" name="PC_VIS_STREAM_CNTL" type="a6xx_pc_vis_stream_cntl" variants="A6XX-A7XX"/>
+ <reg64 offset="0x9e12" name="PC_PVIS_STREAM_BIN_BASE" type="waddress" align="32" variants="A6XX-A7XX"/>
+ <reg64 offset="0x9e14" name="PC_DVIS_STREAM_BIN_BASE" type="waddress" align="32" variants="A6XX-A7XX"/>
- <reg32 offset="0x9e1c" name="PC_DRAWCALL_CNTL_OVERRIDE">
+ <bitset name="a6xx_pc_drawcall_cntl_override" inline="yes">
<doc>Written by CP_SET_VISIBILITY_OVERRIDE handler</doc>
<bitfield name="OVERRIDE" pos="0" type="boolean"/>
- </reg32>
+ </bitset>
+
+ <reg32 offset="0x9e1c" name="PC_DRAWCALL_CNTL_OVERRIDE" type="a6xx_pc_drawcall_cntl_override" variants="A6XX-A7XX"/>
<reg32 offset="0x9e24" name="PC_UNKNOWN_9E24" variants="A7XX-" usage="cmd"/>
@@ -2936,7 +3041,7 @@ by a particular renderpass/blit.
<reg32 offset="0xa9b3" name="SP_CS_PROGRAM_COUNTER_OFFSET" type="uint" usage="cmd"/>
<reg64 offset="0xa9b4" name="SP_CS_BASE" type="address" align="32" usage="cmd"/>
<reg32 offset="0xa9b6" name="SP_CS_PVT_MEM_PARAM" type="a6xx_sp_xs_pvt_mem_param" usage="cmd"/>
- <reg64 offset="0xa9b7" name="SP_CS_PVT_MEM_BASE" align="32" usage="cmd"/>
+ <reg64 offset="0xa9b7" name="SP_CS_PVT_MEM_BASE" type="waddress" align="32" usage="cmd"/>
<reg32 offset="0xa9b9" name="SP_CS_PVT_MEM_SIZE" type="a6xx_sp_xs_pvt_mem_size" usage="cmd"/>
<reg32 offset="0xa9ba" name="SP_CS_TSIZE" low="0" high="7" type="uint" usage="cmd"/>
<reg32 offset="0xa9bb" name="SP_CS_CONFIG" type="a6xx_sp_xs_config" usage="cmd"/>
@@ -3021,7 +3126,7 @@ by a particular renderpass/blit.
UAV state for compute shader:
-->
<reg64 offset="0xa9f2" name="SP_CS_UAV_BASE" type="address" align="16" variants="A6XX"/>
- <reg64 offset="0xa9f8" name="SP_CS_UAV_BASE" type="address" align="16" variants="A7XX"/>
+ <reg64 offset="0xa9f8" name="SP_CS_UAV_BASE" type="address" align="16" variants="A7XX-"/>
<reg32 offset="0xaa00" name="SP_CS_USIZE" low="0" high="6" type="uint"/>
<!-- Correlated with avgs/uvgs usage in FS -->
@@ -3104,14 +3209,19 @@ by a particular renderpass/blit.
instructions VS/HS/DS/GS/FS. See SP_CS_UAV_BASE_* for compute shaders.
-->
<reg64 offset="0xab1a" name="SP_GFX_UAV_BASE" type="address" align="16" usage="cmd"/>
- <reg32 offset="0xab20" name="SP_GFX_USIZE" low="0" high="6" type="uint" usage="cmd"/>
+ <reg32 offset="0xab20" name="SP_GFX_USIZE" low="0" high="6" type="uint" variants="A6XX-A7XX" usage="cmd"/>
- <reg32 offset="0xab22" name="SP_UNKNOWN_AB22" variants="A7XX-" usage="cmd"/>
+ <reg32 offset="0xab22" name="SP_UNKNOWN_AB22" variants="A7XX" usage="cmd"/>
+
+ <enum name="a6xx_sp_a2d_output_ifmt_type">
+ <value name="OUTPUT_IFMT_2D_FLOAT" value="0"/>
+ <value name="OUTPUT_IFMT_2D_SINT" value="1"/>
+ <value name="OUTPUT_IFMT_2D_UINT" value="2"/>
+ </enum>
<bitset name="a6xx_sp_a2d_output_info" inline="yes">
- <bitfield name="NORM" pos="0" type="boolean"/>
- <bitfield name="SINT" pos="1" type="boolean"/>
- <bitfield name="UINT" pos="2" type="boolean"/>
+ <bitfield name="HALF_PRECISION" pos="0" type="boolean"/>
+ <bitfield name="IFMT_TYPE" low="1" high="2" type="a6xx_sp_a2d_output_ifmt_type"/>
<!-- looks like HW only cares about the base type of this format,
which matches the ifmt? -->
<bitfield name="COLOR_FORMAT" low="3" high="10" type="a6xx_format"/>
@@ -3156,7 +3266,7 @@ by a particular renderpass/blit.
<reg32 offset="0xae6b" name="SP_UNKNOWN_AE6B" variants="A7XX-" usage="cmd"/>
<reg32 offset="0xae6c" name="SP_HLSQ_DBG_ECO_CNTL" variants="A7XX-" usage="cmd"/>
<reg32 offset="0xae6d" name="SP_READ_SEL" variants="A7XX-">
- <bitfield name="LOCATION" low="18" high="19" type="a7xx_state_location"/>
+ <bitfield name="LOCATION" low="18" high="20" type="a7xx_state_location"/>
<bitfield name="PIPE" low="16" high="17" type="a7xx_pipe"/>
<bitfield name="STATETYPE" low="8" high="15" type="a7xx_statetype_id"/>
<bitfield name="USPTP" low="4" high="7"/>
@@ -3192,7 +3302,7 @@ by a particular renderpass/blit.
<!-- looks to work in the same way as a5xx: -->
<reg64 offset="0xb302" name="TPL1_GFX_BORDER_COLOR_BASE" type="address" align="128" usage="cmd"/>
- <reg32 offset="0xb304" name="TPL1_MSAA_SAMPLE_POS_CNTL" type="a6xx_msaa_sample_pos_cntl" usage="rp_blit"/>
+ <reg32 offset="0xb304" name="TPL1_MSAA_SAMPLE_POS_CNTL" type="a6xx_msaa_sample_pos_cntl" variants="A6XX-A7XX" usage="rp_blit"/>
<reg32 offset="0xb305" name="TPL1_PROGRAMMABLE_MSAA_POS_0" type="a6xx_programmable_msaa_pos" usage="rp_blit"/>
<reg32 offset="0xb306" name="TPL1_PROGRAMMABLE_MSAA_POS_1" type="a6xx_programmable_msaa_pos" usage="rp_blit"/>
<reg32 offset="0xb307" name="TPL1_WINDOW_OFFSET" type="a6xx_reg_xy" usage="rp_blit"/>
@@ -3232,12 +3342,12 @@ by a particular renderpass/blit.
</reg32>
<reg32 offset="0xb2c0" name="TPL1_A2D_SRC_TEXTURE_INFO" type="a6xx_a2d_src_texture_info" variants="A7XX-" usage="rp_blit"/>
- <reg32 offset="0xb2c1" name="TPL1_A2D_SRC_TEXTURE_SIZE" variants="A7XX">
+ <reg32 offset="0xb2c1" name="TPL1_A2D_SRC_TEXTURE_SIZE" variants="A7XX-">
<bitfield name="WIDTH" low="0" high="14" type="uint"/>
<bitfield name="HEIGHT" low="15" high="29" type="uint"/>
</reg32>
<reg64 offset="0xb2c2" name="TPL1_A2D_SRC_TEXTURE_BASE" type="address" align="16" variants="A7XX-" usage="rp_blit"/>
- <reg32 offset="0xb2c4" name="TPL1_A2D_SRC_TEXTURE_PITCH" variants="A7XX">
+ <reg32 offset="0xb2c4" name="TPL1_A2D_SRC_TEXTURE_PITCH" variants="A7XX-">
<!--
Bits from 3..9 must be zero unless 'TPL1_A2D_BLT_CNTL::TYPE'
is A6XX_TEX_IMG_BUFFER, which allows for lower alignment.
@@ -3270,13 +3380,13 @@ by a particular renderpass/blit.
<reg32 offset="0xb2ce" name="SP_PS_UNKNOWN_B4CE" low="0" high="31" variants="A7XX"/>
<reg32 offset="0xb2cf" name="SP_PS_UNKNOWN_B4CF" low="0" high="30" variants="A7XX"/>
<reg32 offset="0xb2d0" name="SP_PS_UNKNOWN_B4D0" low="0" high="29" variants="A7XX"/>
- <reg32 offset="0xb2d1" name="TPL1_A2D_WINDOW_OFFSET" type="a6xx_reg_xy" variants="A7XX"/>
+ <reg32 offset="0xb2d1" name="TPL1_A2D_WINDOW_OFFSET" type="a6xx_reg_xy" variants="A7XX-"/>
<reg32 offset="0xb2d2" name="TPL1_A2D_BLT_CNTL" variants="A7XX-" usage="rp_blit">
<bitfield name="RAW_COPY" pos="0" type="boolean"/>
<bitfield name="START_OFFSET_TEXELS" low="16" high="21"/>
<bitfield name="TYPE" low="29" high="31" type="a6xx_tex_type"/>
</reg32>
- <reg32 offset="0xab21" name="SP_WINDOW_OFFSET" type="a6xx_reg_xy" variants="A7XX-" usage="rp_blit"/>
+ <reg32 offset="0xab21" name="SP_WINDOW_OFFSET" type="a6xx_reg_xy" variants="A7XX" usage="rp_blit"/>
<!-- always 0x100000 or 0x1000000? -->
<reg32 offset="0xb600" name="TPL1_DBG_ECO_CNTL" low="0" high="25" usage="cmd"/>
@@ -3296,17 +3406,13 @@ by a particular renderpass/blit.
</reg32>
<reg32 offset="0xb605" name="TPL1_UNKNOWN_B605" low="0" high="7" type="uint" variants="A6XX" usage="cmd"/> <!-- always 0x0 or 0x44 ? -->
- <reg32 offset="0xb608" name="TPL1_BICUBIC_WEIGHTS_TABLE_0" low="0" high="29" variants="A6XX"/>
- <reg32 offset="0xb609" name="TPL1_BICUBIC_WEIGHTS_TABLE_1" low="0" high="29" variants="A6XX"/>
- <reg32 offset="0xb60a" name="TPL1_BICUBIC_WEIGHTS_TABLE_2" low="0" high="29" variants="A6XX"/>
- <reg32 offset="0xb60b" name="TPL1_BICUBIC_WEIGHTS_TABLE_3" low="0" high="29" variants="A6XX"/>
- <reg32 offset="0xb60c" name="TPL1_BICUBIC_WEIGHTS_TABLE_4" low="0" high="29" variants="A6XX"/>
+ <array offset="0xb608" name="TPL1_BICUBIC_WEIGHTS_TABLE" stride="1" length="5" variants="A6XX">
+ <reg32 offset="0" name="REG" low="0" high="29"/>
+ </array>
- <reg32 offset="0xb608" name="TPL1_BICUBIC_WEIGHTS_TABLE_0" low="0" high="29" variants="A7XX" usage="cmd"/>
- <reg32 offset="0xb609" name="TPL1_BICUBIC_WEIGHTS_TABLE_1" low="0" high="29" variants="A7XX" usage="cmd"/>
- <reg32 offset="0xb60a" name="TPL1_BICUBIC_WEIGHTS_TABLE_2" low="0" high="29" variants="A7XX" usage="cmd"/>
- <reg32 offset="0xb60b" name="TPL1_BICUBIC_WEIGHTS_TABLE_3" low="0" high="29" variants="A7XX" usage="cmd"/>
- <reg32 offset="0xb60c" name="TPL1_BICUBIC_WEIGHTS_TABLE_4" low="0" high="29" variants="A7XX" usage="cmd"/>
+ <array offset="0xb608" name="TPL1_BICUBIC_WEIGHTS_TABLE" stride="1" length="5" variants="A7XX">
+ <reg32 offset="0" name="REG" low="0" high="29" usage="cmd"/>
+ </array>
<array offset="0xb610" name="TPL1_PERFCTR_TP_SEL" stride="1" length="12" variants="A6XX"/>
<array offset="0xb610" name="TPL1_PERFCTR_TP_SEL" stride="1" length="18" variants="A7XX"/>
@@ -3638,7 +3744,7 @@ by a particular renderpass/blit.
<reg32 offset="0xbb10" name="SP_PS_CONST_CONFIG" type="a6xx_xs_const_config" variants="A6XX" usage="rp_blit"/>
<reg32 offset="0xab03" name="SP_PS_CONST_CONFIG" type="a6xx_xs_const_config" variants="A7XX-" usage="rp_blit"/>
- <array offset="0xab40" name="SP_SHARED_CONSTANT_GFX_0" stride="1" length="64" variants="A7XX-"/>
+ <array offset="0xab40" name="SP_SHARED_CONSTANT_GFX" stride="1" length="64" variants="A7XX"/>
<reg32 offset="0xbb11" name="HLSQ_SHARED_CONSTS" variants="A6XX" usage="cmd">
<doc>
@@ -3800,7 +3906,7 @@ by a particular renderpass/blit.
<reg32 offset="0x0030" name="CFG_DBGBUS_TRACE_BUF2"/>
</domain>
-<domain name="A7XX_CX_DBGC" width="32">
+<domain name="A7XX_CX_DBGC" width="32" varset="chip">
<!-- Bitfields shifted, but otherwise the same: -->
<reg32 offset="0x0000" name="CFG_DBGBUS_SEL_A" variants="A7XX-">
<bitfield high="7" low="0" name="PING_INDEX"/>
diff --git a/drivers/gpu/drm/msm/registers/adreno/a6xx_descriptors.xml b/drivers/gpu/drm/msm/registers/adreno/a6xx_descriptors.xml
index 307d43dda8a2..56cfaff614a4 100644
--- a/drivers/gpu/drm/msm/registers/adreno/a6xx_descriptors.xml
+++ b/drivers/gpu/drm/msm/registers/adreno/a6xx_descriptors.xml
@@ -9,38 +9,6 @@ xsi:schemaLocation="https://gitlab.freedesktop.org/freedreno/ rules-fd.xsd">
<domain name="A6XX_TEX_SAMP" width="32">
<doc>Texture sampler dwords</doc>
- <enum name="a6xx_tex_filter"> <!-- same as a4xx? -->
- <value name="A6XX_TEX_NEAREST" value="0"/>
- <value name="A6XX_TEX_LINEAR" value="1"/>
- <value name="A6XX_TEX_ANISO" value="2"/>
- <value name="A6XX_TEX_CUBIC" value="3"/> <!-- a650 only -->
- </enum>
- <enum name="a6xx_tex_clamp"> <!-- same as a4xx? -->
- <value name="A6XX_TEX_REPEAT" value="0"/>
- <value name="A6XX_TEX_CLAMP_TO_EDGE" value="1"/>
- <value name="A6XX_TEX_MIRROR_REPEAT" value="2"/>
- <value name="A6XX_TEX_CLAMP_TO_BORDER" value="3"/>
- <value name="A6XX_TEX_MIRROR_CLAMP" value="4"/>
- </enum>
- <enum name="a6xx_tex_aniso"> <!-- same as a4xx? -->
- <value name="A6XX_TEX_ANISO_1" value="0"/>
- <value name="A6XX_TEX_ANISO_2" value="1"/>
- <value name="A6XX_TEX_ANISO_4" value="2"/>
- <value name="A6XX_TEX_ANISO_8" value="3"/>
- <value name="A6XX_TEX_ANISO_16" value="4"/>
- </enum>
- <enum name="a6xx_reduction_mode">
- <value name="A6XX_REDUCTION_MODE_AVERAGE" value="0"/>
- <value name="A6XX_REDUCTION_MODE_MIN" value="1"/>
- <value name="A6XX_REDUCTION_MODE_MAX" value="2"/>
- </enum>
- <enum name="a6xx_fast_border_color">
- <!-- R B G A -->
- <value name="A6XX_BORDER_COLOR_0_0_0_0" value="0"/>
- <value name="A6XX_BORDER_COLOR_0_0_0_1" value="1"/>
- <value name="A6XX_BORDER_COLOR_1_1_1_0" value="2"/>
- <value name="A6XX_BORDER_COLOR_1_1_1_1" value="3"/>
- </enum>
<reg32 offset="0" name="0">
<bitfield name="MIPFILTER_LINEAR_NEAR" pos="0" type="boolean"/>
@@ -79,14 +47,6 @@ xsi:schemaLocation="https://gitlab.freedesktop.org/freedreno/ rules-fd.xsd">
<domain name="A6XX_TEX_CONST" width="32" varset="chip">
<doc>Texture constant dwords</doc>
- <enum name="a6xx_tex_swiz"> <!-- same as a4xx? -->
- <value name="A6XX_TEX_X" value="0"/>
- <value name="A6XX_TEX_Y" value="1"/>
- <value name="A6XX_TEX_Z" value="2"/>
- <value name="A6XX_TEX_W" value="3"/>
- <value name="A6XX_TEX_ZERO" value="4"/>
- <value name="A6XX_TEX_ONE" value="5"/>
- </enum>
<reg32 offset="0" name="0">
<bitfield name="TILE_MODE" low="0" high="1" type="a6xx_tile_mode"/>
<bitfield name="SRGB" pos="2" type="boolean"/>
diff --git a/drivers/gpu/drm/msm/registers/adreno/a6xx_enums.xml b/drivers/gpu/drm/msm/registers/adreno/a6xx_enums.xml
index 665539b098c6..4e42f055b85f 100644
--- a/drivers/gpu/drm/msm/registers/adreno/a6xx_enums.xml
+++ b/drivers/gpu/drm/msm/registers/adreno/a6xx_enums.xml
@@ -320,14 +320,14 @@ to upconvert to 32b float internally?
16b float: 3
-->
<enum name="a6xx_2d_ifmt">
- <value value="0x10" name="R2D_UNORM8"/>
<value value="0x7" name="R2D_INT32"/>
<value value="0x6" name="R2D_INT16"/>
<value value="0x5" name="R2D_INT8"/>
<value value="0x4" name="R2D_FLOAT32"/>
<value value="0x3" name="R2D_FLOAT16"/>
+ <value value="0x2" name="R2D_SNORM8"/>
<value value="0x1" name="R2D_UNORM8_SRGB"/>
- <value value="0x0" name="R2D_RAW"/>
+ <value value="0x0" name="R2D_UNORM8"/>
</enum>
<enum name="a6xx_tex_type">
@@ -380,4 +380,50 @@ to upconvert to 32b float internally?
<value value="0x3" name="TESS_CCW_TRIS"/>
</enum>
+<enum name="a6xx_tex_filter"> <!-- same as a4xx? -->
+ <value name="A6XX_TEX_NEAREST" value="0"/>
+ <value name="A6XX_TEX_LINEAR" value="1"/>
+ <value name="A6XX_TEX_ANISO" value="2"/>
+ <value name="A6XX_TEX_CUBIC" value="3"/> <!-- a650 only -->
+</enum>
+
+<enum name="a6xx_tex_clamp"> <!-- same as a4xx? -->
+ <value name="A6XX_TEX_REPEAT" value="0"/>
+ <value name="A6XX_TEX_CLAMP_TO_EDGE" value="1"/>
+ <value name="A6XX_TEX_MIRROR_REPEAT" value="2"/>
+ <value name="A6XX_TEX_CLAMP_TO_BORDER" value="3"/>
+ <value name="A6XX_TEX_MIRROR_CLAMP" value="4"/>
+</enum>
+
+<enum name="a6xx_tex_aniso"> <!-- same as a4xx? -->
+ <value name="A6XX_TEX_ANISO_1" value="0"/>
+ <value name="A6XX_TEX_ANISO_2" value="1"/>
+ <value name="A6XX_TEX_ANISO_4" value="2"/>
+ <value name="A6XX_TEX_ANISO_8" value="3"/>
+ <value name="A6XX_TEX_ANISO_16" value="4"/>
+</enum>
+
+<enum name="a6xx_reduction_mode">
+ <value name="A6XX_REDUCTION_MODE_AVERAGE" value="0"/>
+ <value name="A6XX_REDUCTION_MODE_MIN" value="1"/>
+ <value name="A6XX_REDUCTION_MODE_MAX" value="2"/>
+</enum>
+
+<enum name="a6xx_fast_border_color">
+ <!-- R B G A -->
+ <value name="A6XX_BORDER_COLOR_0_0_0_0" value="0"/>
+ <value name="A6XX_BORDER_COLOR_0_0_0_1" value="1"/>
+ <value name="A6XX_BORDER_COLOR_1_1_1_0" value="2"/>
+ <value name="A6XX_BORDER_COLOR_1_1_1_1" value="3"/>
+</enum>
+
+<enum name="a6xx_tex_swiz"> <!-- same as a4xx? -->
+ <value name="A6XX_TEX_X" value="0"/>
+ <value name="A6XX_TEX_Y" value="1"/>
+ <value name="A6XX_TEX_Z" value="2"/>
+ <value name="A6XX_TEX_W" value="3"/>
+ <value name="A6XX_TEX_ZERO" value="4"/>
+ <value name="A6XX_TEX_ONE" value="5"/>
+</enum>
+
</database>
diff --git a/drivers/gpu/drm/msm/registers/adreno/a6xx_gmu.xml b/drivers/gpu/drm/msm/registers/adreno/a6xx_gmu.xml
index 3d2cc339b8f1..b15a242d974d 100644
--- a/drivers/gpu/drm/msm/registers/adreno/a6xx_gmu.xml
+++ b/drivers/gpu/drm/msm/registers/adreno/a6xx_gmu.xml
@@ -99,6 +99,10 @@ xsi:schemaLocation="https://gitlab.freedesktop.org/freedreno/ rules-fd.xsd">
<bitfield name="GX_HM_GDSC_POWER_OFF" pos="6" type="boolean"/>
<bitfield name="GX_HM_CLK_OFF" pos="7" type="boolean"/>
</reg32>
+ <reg32 offset="0x50d0" name="GMU_SPTPRAC_PWR_CLK_STATUS" variants="A7XX">
+ <bitfield name="GX_HM_GDSC_POWER_OFF" pos="0" type="boolean"/>
+ <bitfield name="GX_HM_CLK_OFF" pos="1" type="boolean"/>
+ </reg32>
<reg32 offset="0x50e4" name="GMU_GPU_NAP_CTRL">
<bitfield name="HW_NAP_ENABLE" pos="0"/>
<bitfield name="SID" low="4" high="8"/>
@@ -127,6 +131,7 @@ xsi:schemaLocation="https://gitlab.freedesktop.org/freedreno/ rules-fd.xsd">
<reg32 offset="0x5088" name="GMU_ALWAYS_ON_COUNTER_L"/>
<reg32 offset="0x5089" name="GMU_ALWAYS_ON_COUNTER_H"/>
<reg32 offset="0x50c3" name="GMU_GMU_PWR_COL_KEEPALIVE"/>
+ <reg32 offset="0x50c4" name="GMU_PWR_COL_PREEMPT_KEEPALIVE"/>
<reg32 offset="0x5180" name="GMU_HFI_CTRL_STATUS"/>
<reg32 offset="0x5181" name="GMU_HFI_VERSION_INFO"/>
<reg32 offset="0x5182" name="GMU_HFI_SFR_ADDR"/>
@@ -228,6 +233,12 @@ xsi:schemaLocation="https://gitlab.freedesktop.org/freedreno/ rules-fd.xsd">
<reg32 offset="0x03ee" name="RSCC_TCS1_DRV0_STATUS"/>
<reg32 offset="0x0496" name="RSCC_TCS2_DRV0_STATUS"/>
<reg32 offset="0x053e" name="RSCC_TCS3_DRV0_STATUS"/>
+ <reg32 offset="0x05e6" name="RSCC_TCS4_DRV0_STATUS" variants="A7XX"/>
+ <reg32 offset="0x068e" name="RSCC_TCS5_DRV0_STATUS" variants="A7XX"/>
+ <reg32 offset="0x0736" name="RSCC_TCS6_DRV0_STATUS" variants="A7XX"/>
+ <reg32 offset="0x07de" name="RSCC_TCS7_DRV0_STATUS" variants="A7XX"/>
+ <reg32 offset="0x0886" name="RSCC_TCS8_DRV0_STATUS" variants="A7XX"/>
+ <reg32 offset="0x092e" name="RSCC_TCS9_DRV0_STATUS" variants="A7XX"/>
</domain>
</database>
diff --git a/drivers/gpu/drm/msm/registers/adreno/adreno_pm4.xml b/drivers/gpu/drm/msm/registers/adreno/adreno_pm4.xml
index 7abc08635495..0e10e1c6d263 100644
--- a/drivers/gpu/drm/msm/registers/adreno/adreno_pm4.xml
+++ b/drivers/gpu/drm/msm/registers/adreno/adreno_pm4.xml
@@ -120,12 +120,12 @@ xsi:schemaLocation="https://gitlab.freedesktop.org/freedreno/ rules-fd.xsd">
<value name="LRZ_FLUSH" value="38" variants="A5XX-"/>
<value name="BLIT_OP_FILL_2D" value="39" variants="A5XX-"/>
<value name="BLIT_OP_COPY_2D" value="40" variants="A5XX-A6XX"/>
- <value name="UNK_40" value="40" variants="A7XX"/>
+ <value name="LRZ_CACHE_INVALIDATE" value="40" variants="A7XX"/>
<value name="LRZ_Q_CACHE_INVALIDATE" value="41" variants="A7XX"/>
<value name="BLIT_OP_SCALE_2D" value="42" variants="A5XX-"/>
<value name="CONTEXT_DONE_2D" value="43" variants="A5XX-"/>
- <value name="UNK_2C" value="44" variants="A5XX-"/>
- <value name="UNK_2D" value="45" variants="A5XX-"/>
+ <value name="VSC_BINNING_START" value="44" variants="A5XX-"/>
+ <value name="VSC_BINNING_END" value="45" variants="A5XX-"/>
<!-- a6xx events -->
<doc>
@@ -523,7 +523,7 @@ xsi:schemaLocation="https://gitlab.freedesktop.org/freedreno/ rules-fd.xsd">
<!--
Seems to set the mode flags which control which CP_SET_DRAW_STATE
packets are executed, based on their ENABLE_MASK values
-
+
CP_SET_MODE w/ payload of 0x1 seems to cause CP_SET_DRAW_STATE
packets w/ ENABLE_MASK & 0x6 to execute immediately
-->
@@ -640,8 +640,7 @@ xsi:schemaLocation="https://gitlab.freedesktop.org/freedreno/ rules-fd.xsd">
<value name="CP_BV_BR_COUNT_OPS" value="0x1b" variants="A7XX-"/>
<doc> Clears, adds to local, or adds to global timestamp </doc>
<value name="CP_MODIFY_TIMESTAMP" value="0x1c" variants="A7XX-"/>
- <!-- similar to CP_CONTEXT_REG_BUNCH, but discards first two dwords?? -->
- <value name="CP_CONTEXT_REG_BUNCH2" value="0x5d" variants="A7XX-"/>
+ <value name="CP_NON_CONTEXT_REG_BUNCH" value="0x5d" variants="A7XX-"/>
<doc>
Write to a scratch memory that is read by CP_REG_TEST with
SOURCE_SCRATCH_MEM set. It's not the same scratch as scratch registers.
@@ -918,12 +917,6 @@ opcode: CP_LOAD_STATE4 (30) (4 dwords)
</reg32>
<stripe varset="chip" variants="A5XX-">
- <reg32 offset="4" name="4">
- <bitfield name="INDX_BASE_LO" low="0" high="31"/>
- </reg32>
- <reg32 offset="5" name="5">
- <bitfield name="INDX_BASE_HI" low="0" high="31"/>
- </reg32>
<reg64 offset="4" name="INDX_BASE" type="address"/>
<reg32 offset="6" name="6">
<!-- max # of elements in index buffer -->
@@ -1099,8 +1092,10 @@ opcode: CP_LOAD_STATE4 (30) (4 dwords)
<bitfield name="BINNING" pos="20" varset="chip" variants="A6XX-" type="boolean"/>
<bitfield name="GMEM" pos="21" varset="chip" variants="A6XX-" type="boolean"/>
<bitfield name="SYSMEM" pos="22" varset="chip" variants="A6XX-" type="boolean"/>
- <bitfield name="GROUP_ID" low="24" high="28" type="uint"/>
+ <!-- high bit is 28 until a750: -->
+ <bitfield name="GROUP_ID" low="24" high="29" type="uint"/>
</reg32>
+ <reg64 offset="1" name="ADDR" type="address"/>
<reg32 offset="1" name="1">
<bitfield name="ADDR_LO" low="0" high="31" type="hex"/>
</reg32>
@@ -1166,26 +1161,11 @@ opcode: CP_LOAD_STATE4 (30) (4 dwords)
</reg32>
<stripe varset="a7xx_abs_mask_mode" variants="NO_ABS_MASK">
<!-- BIN_DATA_ADDR -> VSC_PIPE[p].DATA_ADDRESS -->
- <reg32 offset="1" name="1">
- <bitfield name="BIN_DATA_ADDR_LO" low="0" high="31" type="hex"/>
- </reg32>
- <reg32 offset="2" name="2">
- <bitfield name="BIN_DATA_ADDR_HI" low="0" high="31" type="hex"/>
- </reg32>
+ <reg64 offset="1" name="BIN_DATA_ADDR" type="address"/>
<!-- BIN_SIZE_ADDRESS -> VSC_SIZE_ADDRESS + (p * 4)-->
- <reg32 offset="3" name="3">
- <bitfield name="BIN_SIZE_ADDRESS_LO" low="0" high="31"/>
- </reg32>
- <reg32 offset="4" name="4">
- <bitfield name="BIN_SIZE_ADDRESS_HI" low="0" high="31"/>
- </reg32>
+ <reg64 offset="3" name="BIN_SIZE_ADDR" type="address"/>
<!-- new on a6xx, where BIN_DATA_ADDR is the DRAW_STRM: -->
- <reg32 offset="5" name="5">
- <bitfield name="BIN_PRIM_STRM_LO" low="0" high="31"/>
- </reg32>
- <reg32 offset="6" name="6">
- <bitfield name="BIN_PRIM_STRM_HI" low="0" high="31"/>
- </reg32>
+ <reg64 offset="5" name="BIN_PRIM_STRM" type="address"/>
<!--
a7xx adds a few more addresses to the end of the pkt
-->
@@ -1195,26 +1175,11 @@ opcode: CP_LOAD_STATE4 (30) (4 dwords)
<stripe varset="a7xx_abs_mask_mode" variants="ABS_MASK">
<reg32 offset="1" name="ABS_MASK"/>
<!-- BIN_DATA_ADDR -> VSC_PIPE[p].DATA_ADDRESS -->
- <reg32 offset="2" name="2">
- <bitfield name="BIN_DATA_ADDR_LO" low="0" high="31" type="hex"/>
- </reg32>
- <reg32 offset="3" name="3">
- <bitfield name="BIN_DATA_ADDR_HI" low="0" high="31" type="hex"/>
- </reg32>
+ <reg64 offset="2" name="BIN_DATA_ADDR" type="address"/>
<!-- BIN_SIZE_ADDRESS -> VSC_SIZE_ADDRESS + (p * 4)-->
- <reg32 offset="4" name="4">
- <bitfield name="BIN_SIZE_ADDRESS_LO" low="0" high="31"/>
- </reg32>
- <reg32 offset="5" name="5">
- <bitfield name="BIN_SIZE_ADDRESS_HI" low="0" high="31"/>
- </reg32>
+ <reg64 offset="4" name="BIN_SIZE_ADDR" type="address"/>
<!-- new on a6xx, where BIN_DATA_ADDR is the DRAW_STRM: -->
- <reg32 offset="6" name="6">
- <bitfield name="BIN_PRIM_STRM_LO" low="0" high="31"/>
- </reg32>
- <reg32 offset="7" name="7">
- <bitfield name="BIN_PRIM_STRM_HI" low="0" high="31"/>
- </reg32>
+ <reg64 offset="6" name="BIN_PRIM_STRM" type="address"/>
<!--
a7xx adds a few more addresses to the end of the pkt
-->
@@ -1300,7 +1265,7 @@ opcode: CP_LOAD_STATE4 (30) (4 dwords)
</reg32>
</domain>
-<domain name="CP_REG_TO_MEM" width="32">
+<domain name="CP_REG_TO_MEM" width="32" prefix="chip">
<reg32 offset="0" name="0">
<bitfield name="REG" low="0" high="17" type="hex"/>
<!-- number of registers/dwords copied is max(CNT, 1). -->
@@ -1308,12 +1273,12 @@ opcode: CP_LOAD_STATE4 (30) (4 dwords)
<bitfield name="64B" pos="30" type="boolean"/>
<bitfield name="ACCUMULATE" pos="31" type="boolean"/>
</reg32>
- <reg32 offset="1" name="1">
- <bitfield name="DEST" low="0" high="31"/>
- </reg32>
- <reg32 offset="2" name="2" varset="chip" variants="A5XX-">
- <bitfield name="DEST_HI" low="0" high="31"/>
- </reg32>
+ <stripe varset="chip" variants="A2XX-A4XX">
+ <reg32 offset="1" name="DEST" type="address"/>
+ </stripe>
+ <stripe varset="chip" variants="A5XX-">
+ <reg64 offset="1" name="DEST" type="address"/>
+ </stripe>
</domain>
<domain name="CP_REG_TO_MEM_OFFSET_REG" width="32">
@@ -1329,12 +1294,7 @@ opcode: CP_LOAD_STATE4 (30) (4 dwords)
<bitfield name="64B" pos="30" type="boolean"/>
<bitfield name="ACCUMULATE" pos="31" type="boolean"/>
</reg32>
- <reg32 offset="1" name="1">
- <bitfield name="DEST" low="0" high="31"/>
- </reg32>
- <reg32 offset="2" name="2" varset="chip" variants="A5XX-">
- <bitfield name="DEST_HI" low="0" high="31"/>
- </reg32>
+ <reg64 offset="1" name="DEST" type="waddress"/>
<reg32 offset="3" name="3">
<bitfield name="OFFSET0" low="0" high="17" type="hex"/>
<bitfield name="OFFSET0_SCRATCH" pos="19" type="boolean"/>
@@ -1354,18 +1314,8 @@ opcode: CP_LOAD_STATE4 (30) (4 dwords)
<bitfield name="64B" pos="30" type="boolean"/>
<bitfield name="ACCUMULATE" pos="31" type="boolean"/>
</reg32>
- <reg32 offset="1" name="1">
- <bitfield name="DEST" low="0" high="31"/>
- </reg32>
- <reg32 offset="2" name="2" varset="chip" variants="A5XX-">
- <bitfield name="DEST_HI" low="0" high="31"/>
- </reg32>
- <reg32 offset="3" name="3">
- <bitfield name="OFFSET_LO" low="0" high="31" type="hex"/>
- </reg32>
- <reg32 offset="4" name="4">
- <bitfield name="OFFSET_HI" low="0" high="31" type="hex"/>
- </reg32>
+ <reg64 offset="1" name="DEST" type="waddress"/>
+ <reg64 offset="3" name="OFFSET" type="waddress"/>
</domain>
<domain name="CP_MEM_TO_REG" width="32">
@@ -1378,12 +1328,12 @@ opcode: CP_LOAD_STATE4 (30) (4 dwords)
<!-- does the same thing as CP_MEM_TO_MEM::UNK31 -->
<bitfield name="UNK31" pos="31" type="boolean"/>
</reg32>
- <reg32 offset="1" name="1">
- <bitfield name="SRC" low="0" high="31"/>
- </reg32>
- <reg32 offset="2" name="2" varset="chip" variants="A5XX-">
- <bitfield name="SRC_HI" low="0" high="31"/>
- </reg32>
+ <stripe varset="chip" variants="A2XX-A4XX">
+ <reg32 offset="1" name="SRC" type="address"/>
+ </stripe>
+ <stripe varset="chip" variants="A5XX-">
+ <reg64 offset="1" name="SRC" type="address"/>
+ </stripe>
</domain>
<domain name="CP_MEM_TO_MEM" width="32">
@@ -1403,6 +1353,10 @@ opcode: CP_LOAD_STATE4 (30) (4 dwords)
<!-- some other kind of wait -->
<bitfield name="UNK31" pos="31" type="boolean"/>
</reg32>
+ <reg64 offset="1" name="DST" type="waddress"/>
+ <reg64 offset="3" name="SRC_A" type="address"/>
+ <reg64 offset="5" name="SRC_B" type="address"/>
+ <reg64 offset="7" name="SRC_C" type="address"/>
<!--
followed by sequence of addresses.. the first is the
destination and the rest are N src addresses which are
@@ -1461,12 +1415,12 @@ opcode: CP_LOAD_STATE4 (30) (4 dwords)
</domain>
<domain name="CP_MEM_WRITE" width="32">
- <reg32 offset="0" name="0">
- <bitfield name="ADDR_LO" low="0" high="31"/>
- </reg32>
- <reg32 offset="1" name="1">
- <bitfield name="ADDR_HI" low="0" high="31"/>
- </reg32>
+ <stripe varset="chip" variants="A2XX-A4XX">
+ <reg32 offset="0" name="ADDR" type="address"/>
+ </stripe>
+ <stripe varset="chip" variants="A5XX-">
+ <reg64 offset="0" name="ADDR" type="address"/>
+ </stripe>
<!-- followed by the DWORDs to write -->
</domain>
@@ -1518,24 +1472,14 @@ opcode: CP_LOAD_STATE4 (30) (4 dwords)
<bitfield name="POLL" low="4" high="5" type="poll_memory_type"/>
<bitfield name="WRITE_MEMORY" pos="8" type="boolean"/>
</reg32>
- <reg32 offset="1" name="1">
- <bitfield name="POLL_ADDR_LO" low="0" high="31" type="hex"/>
- </reg32>
- <reg32 offset="2" name="2">
- <bitfield name="POLL_ADDR_HI" low="0" high="31" type="hex"/>
- </reg32>
+ <reg64 offset="1" name="POLL_ADDR" type="address"/>
<reg32 offset="3" name="3">
<bitfield name="REF" low="0" high="31"/>
</reg32>
<reg32 offset="4" name="4">
<bitfield name="MASK" low="0" high="31"/>
</reg32>
- <reg32 offset="5" name="5">
- <bitfield name="WRITE_ADDR_LO" low="0" high="31" type="hex"/>
- </reg32>
- <reg32 offset="6" name="6">
- <bitfield name="WRITE_ADDR_HI" low="0" high="31" type="hex"/>
- </reg32>
+ <reg64 offset="5" name="WRITE_ADDR" type="waddress"/>
<reg32 offset="7" name="7">
<bitfield name="WRITE_DATA" low="0" high="31"/>
</reg32>
@@ -1550,12 +1494,7 @@ opcode: CP_LOAD_STATE4 (30) (4 dwords)
<!-- Reserved for flags, presumably? Unused in FW -->
<bitfield name="RESERVED" low="0" high="31" type="hex"/>
</reg32>
- <reg32 offset="1" name="1">
- <bitfield name="POLL_ADDR_LO" low="0" high="31" type="hex"/>
- </reg32>
- <reg32 offset="2" name="2">
- <bitfield name="POLL_ADDR_HI" low="0" high="31" type="hex"/>
- </reg32>
+ <reg64 offset="1" name="POLL_ADDR" type="address"/>
<reg32 offset="3" name="3">
<bitfield name="REF" low="0" high="31"/>
</reg32>
@@ -1573,12 +1512,7 @@ opcode: CP_LOAD_STATE4 (30) (4 dwords)
<bitfield name="POLL" low="4" high="5" type="poll_memory_type"/>
<bitfield name="WRITE_MEMORY" pos="8" type="boolean"/>
</reg32>
- <reg32 offset="1" name="1">
- <bitfield name="POLL_ADDR_LO" low="0" high="31" type="hex"/>
- </reg32>
- <reg32 offset="2" name="2">
- <bitfield name="POLL_ADDR_HI" low="0" high="31" type="hex"/>
- </reg32>
+ <reg64 offset="1" name="POLL_ADDR" type="address"/>
<reg32 offset="3" name="3">
<bitfield name="REF" low="0" high="31"/>
</reg32>
@@ -1712,12 +1646,7 @@ opcode: CP_LOAD_STATE4 (30) (4 dwords)
TODO what is gpuaddr for, seems to be all 0's.. maybe needed for
context switch?
-->
- <reg32 offset="1" name="1">
- <bitfield name="ADDR_0_LO" low="0" high="31"/>
- </reg32>
- <reg32 offset="2" name="2">
- <bitfield name="ADDR_0_HI" low="0" high="31"/>
- </reg32>
+ <reg64 offset="1" name="ADDR" type="waddress"/>
<reg32 offset="3" name="3">
<!-- ??? -->
</reg32>
@@ -1832,9 +1761,7 @@ opcode: CP_LOAD_STATE4 (30) (4 dwords)
<reg32 offset="0" name="0">
</reg32>
<stripe varset="chip" variants="A4XX">
- <reg32 offset="1" name="1">
- <bitfield name="ADDR" low="0" high="31"/>
- </reg32>
+ <reg32 offset="1" name="ADDR" type="address"/>
<reg32 offset="2" name="2">
<!-- localsize is value minus one: -->
<bitfield name="LOCALSIZEX" low="2" high="11" type="uint"/>
@@ -1843,12 +1770,7 @@ opcode: CP_LOAD_STATE4 (30) (4 dwords)
</reg32>
</stripe>
<stripe varset="chip" variants="A5XX-">
- <reg32 offset="1" name="1">
- <bitfield name="ADDR_LO" low="0" high="31"/>
- </reg32>
- <reg32 offset="2" name="2">
- <bitfield name="ADDR_HI" low="0" high="31"/>
- </reg32>
+ <reg64 offset="1" name="ADDR" type="address"/>
<reg32 offset="3" name="3">
<!-- localsize is value minus one: -->
<bitfield name="LOCALSIZEX" low="2" high="11" type="uint"/>
@@ -2161,12 +2083,7 @@ opcode: CP_LOAD_STATE4 (30) (4 dwords)
</doc>
</value>
</enum>
- <reg32 offset="0" name="0">
- <bitfield name="ADDR_LO" low="0" high="31"/>
- </reg32>
- <reg32 offset="1" name="1">
- <bitfield name="ADDR_HI" low="0" high="31"/>
- </reg32>
+ <reg64 offset="0" name="ADDR" type="address"/>
<reg32 offset="2" name="2">
<bitfield name="DWORDS" low="0" high="19" type="uint"/>
<bitfield name="TYPE" low="20" high="21" type="amble_type"/>
diff --git a/drivers/gpu/drm/msm/registers/display/dsi_phy_7nm.xml b/drivers/gpu/drm/msm/registers/display/dsi_phy_7nm.xml
index 4e5ac0f25dea..f41516dd0567 100644
--- a/drivers/gpu/drm/msm/registers/display/dsi_phy_7nm.xml
+++ b/drivers/gpu/drm/msm/registers/display/dsi_phy_7nm.xml
@@ -22,7 +22,16 @@ xsi:schemaLocation="https://gitlab.freedesktop.org/freedreno/ rules-fd.xsd">
<reg32 offset="0x00018" name="GLBL_CTRL"/>
<reg32 offset="0x0001c" name="RBUF_CTRL"/>
<reg32 offset="0x00020" name="VREG_CTRL_0"/>
- <reg32 offset="0x00024" name="CTRL_0"/>
+ <reg32 offset="0x00024" name="CTRL_0">
+ <bitfield name="CLKSL_SHUTDOWNB" pos="7" type="boolean"/>
+ <bitfield name="DIGTOP_PWRDN_B" pos="6" type="boolean"/>
+ <bitfield name="PLL_SHUTDOWNB" pos="5" type="boolean"/>
+ <bitfield name="DLN3_SHUTDOWNB" pos="4" type="boolean"/>
+ <bitfield name="DLN2_SHUTDOWNB" pos="3" type="boolean"/>
+ <bitfield name="CLK_SHUTDOWNB" pos="2" type="boolean"/>
+ <bitfield name="DLN1_SHUTDOWNB" pos="1" type="boolean"/>
+ <bitfield name="DLN0_SHUTDOWNB" pos="0" type="boolean"/>
+ </reg32>
<reg32 offset="0x00028" name="CTRL_1"/>
<reg32 offset="0x0002c" name="CTRL_2"/>
<reg32 offset="0x00030" name="CTRL_3"/>
diff --git a/drivers/gpu/drm/msm/registers/gen_header.py b/drivers/gpu/drm/msm/registers/gen_header.py
index a409404627c7..1d603dadfabd 100644
--- a/drivers/gpu/drm/msm/registers/gen_header.py
+++ b/drivers/gpu/drm/msm/registers/gen_header.py
@@ -11,7 +11,6 @@ import collections
import argparse
import time
import datetime
-import re
class Error(Exception):
def __init__(self, message):
@@ -31,7 +30,7 @@ class Enum(object):
def names(self):
return [n for (n, value) in self.values]
- def dump(self):
+ def dump(self, is_deprecated):
use_hex = False
for (name, value) in self.values:
if value > 0x1000:
@@ -45,7 +44,7 @@ class Enum(object):
print("\t%s = %d," % (name, value))
print("};\n")
- def dump_pack_struct(self):
+ def dump_pack_struct(self, is_deprecated):
pass
class Field(object):
@@ -70,11 +69,11 @@ class Field(object):
raise parser.error("booleans should be 1 bit fields")
elif self.type == "float" and not (high - low == 31 or high - low == 15):
raise parser.error("floats should be 16 or 32 bit fields")
- elif not self.type in builtin_types and not self.type in parser.enums:
+ elif self.type not in builtin_types and self.type not in parser.enums:
raise parser.error("unknown type '%s'" % self.type)
def ctype(self, var_name):
- if self.type == None:
+ if self.type is None:
type = "uint32_t"
val = var_name
elif self.type == "boolean":
@@ -124,7 +123,7 @@ def field_name(reg, f):
name = f.name.lower()
else:
# We hit this path when a reg is defined with no bitset fields, ie.
- # <reg32 offset="0x88db" name="RB_BLIT_DST_ARRAY_PITCH" low="0" high="28" shr="6" type="uint"/>
+ # <reg32 offset="0x88db" name="RB_RESOLVE_SYSTEM_BUFFER_ARRAY_PITCH" low="0" high="28" shr="6" type="uint"/>
name = reg.name.lower()
if (name in [ "double", "float", "int" ]) or not (name[0].isalpha()):
@@ -146,10 +145,23 @@ def indices_strides(indices):
"%s(i%d)" % (offset, idx)
for (idx, (ctype, stride, offset)) in enumerate(indices)])
+def is_number(str):
+ try:
+ int(str)
+ return True
+ except ValueError:
+ return False
+
+def sanitize_variant(variant):
+ if variant and "-" in variant:
+ return variant[:variant.index("-")]
+ return variant
+
class Bitset(object):
def __init__(self, name, template):
self.name = name
self.inline = False
+ self.reg = None
if template:
self.fields = template.fields[:]
else:
@@ -175,11 +187,7 @@ class Bitset(object):
print("#endif\n")
print(" return (struct fd_reg_pair) {")
- if reg.array:
- print(" .reg = REG_%s(__i)," % reg.full_name)
- else:
- print(" .reg = REG_%s," % reg.full_name)
-
+ print(" .reg = (uint32_t)%s," % reg.reg_offset())
print(" .value =")
for f in self.fields:
if f.type in [ "address", "waddress" ]:
@@ -204,7 +212,7 @@ class Bitset(object):
print(" };")
- def dump_pack_struct(self, reg=None):
+ def dump_pack_struct(self, is_deprecated, reg=None):
if not reg:
return
@@ -229,12 +237,15 @@ class Bitset(object):
tab_to(" uint32_t", "dword;")
print("};\n")
+ depcrstr = ""
+ if is_deprecated:
+ depcrstr = " FD_DEPRECATED"
if reg.array:
- print("static inline struct fd_reg_pair\npack_%s(uint32_t __i, struct %s fields)\n{" %
- (prefix, prefix))
+ print("static inline%s struct fd_reg_pair\npack_%s(uint32_t __i, struct %s fields)\n{" %
+ (depcrstr, prefix, prefix))
else:
- print("static inline struct fd_reg_pair\npack_%s(struct %s fields)\n{" %
- (prefix, prefix))
+ print("static inline%s struct fd_reg_pair\npack_%s(struct %s fields)\n{" %
+ (depcrstr, prefix, prefix))
self.dump_regpair_builder(reg)
@@ -253,18 +264,23 @@ class Bitset(object):
(prefix, prefix, prefix, skip))
- def dump(self, prefix=None):
- if prefix == None:
+ def dump(self, is_deprecated, prefix=None):
+ if prefix is None:
prefix = self.name
+ if self.reg and self.reg.bit_size == 64:
+ print("static inline uint32_t %s_LO(uint32_t val)\n{" % prefix)
+ print("\treturn val;\n}")
+ print("static inline uint32_t %s_HI(uint32_t val)\n{" % prefix)
+ print("\treturn val;\n}")
for f in self.fields:
if f.name:
name = prefix + "_" + f.name
else:
name = prefix
- if not f.name and f.low == 0 and f.shr == 0 and not f.type in ["float", "fixed", "ufixed"]:
+ if not f.name and f.low == 0 and f.shr == 0 and f.type not in ["float", "fixed", "ufixed"]:
pass
- elif f.type == "boolean" or (f.type == None and f.low == f.high):
+ elif f.type == "boolean" or (f.type is None and f.low == f.high):
tab_to("#define %s" % name, "0x%08x" % (1 << f.low))
else:
tab_to("#define %s__MASK" % name, "0x%08x" % mask(f.low, f.high))
@@ -286,6 +302,7 @@ class Array(object):
self.domain = domain
self.variant = variant
self.parent = parent
+ self.children = []
if self.parent:
self.name = self.parent.name + "_" + self.local_name
else:
@@ -337,12 +354,15 @@ class Array(object):
offset += self.parent.total_offset()
return offset
- def dump(self):
+ def dump(self, is_deprecated):
+ depcrstr = ""
+ if is_deprecated:
+ depcrstr = " FD_DEPRECATED"
proto = indices_varlist(self.indices())
strides = indices_strides(self.indices())
array_offset = self.total_offset()
if self.fixed_offsets:
- print("static inline uint32_t __offset_%s(%s idx)" % (self.local_name, self.index_ctype()))
+ print("static inline%s uint32_t __offset_%s(%s idx)" % (depcrstr, self.local_name, self.index_ctype()))
print("{\n\tswitch (idx) {")
if self.index_type:
for val, offset in zip(self.index_type.names(), self.offsets):
@@ -357,7 +377,7 @@ class Array(object):
else:
tab_to("#define REG_%s_%s(%s)" % (self.domain, self.name, proto), "(0x%08x + %s )\n" % (array_offset, strides))
- def dump_pack_struct(self):
+ def dump_pack_struct(self, is_deprecated):
pass
def dump_regpair_builder(self):
@@ -373,6 +393,7 @@ class Reg(object):
self.bit_size = bit_size
if array:
self.name = array.name + "_" + self.name
+ array.children.append(self)
self.full_name = self.domain + "_" + self.name
if "stride" in attrs:
self.stride = int(attrs["stride"], 0)
@@ -397,25 +418,34 @@ class Reg(object):
else:
return self.offset
- def dump(self):
+ def reg_offset(self):
+ if self.array:
+ offset = self.array.offset + self.offset
+ return "(0x%08x + 0x%x*__i)" % (offset, self.array.stride)
+ return "0x%08x" % self.offset
+
+ def dump(self, is_deprecated):
+ depcrstr = ""
+ if is_deprecated:
+ depcrstr = " FD_DEPRECATED "
proto = indices_prototype(self.indices())
strides = indices_strides(self.indices())
offset = self.total_offset()
if proto == '':
tab_to("#define REG_%s" % self.full_name, "0x%08x" % offset)
else:
- print("static inline uint32_t REG_%s(%s) { return 0x%08x + %s; }" % (self.full_name, proto, offset, strides))
+ print("static inline%s uint32_t REG_%s(%s) { return 0x%08x + %s; }" % (depcrstr, self.full_name, proto, offset, strides))
if self.bitset.inline:
- self.bitset.dump(self.full_name)
+ self.bitset.dump(is_deprecated, self.full_name)
+ print("")
- def dump_pack_struct(self):
+ def dump_pack_struct(self, is_deprecated):
if self.bitset.inline:
- self.bitset.dump_pack_struct(self)
+ self.bitset.dump_pack_struct(is_deprecated, self)
def dump_regpair_builder(self):
- if self.bitset.inline:
- self.bitset.dump_regpair_builder(self)
+ self.bitset.dump_regpair_builder(self)
def dump_py(self):
print("\tREG_%s = 0x%08x" % (self.full_name, self.offset))
@@ -444,9 +474,6 @@ class Parser(object):
self.variants = set()
self.file = []
self.xml_files = []
- self.copyright_year = None
- self.authors = []
- self.license = None
def error(self, message):
parser, filename = self.stack[-1]
@@ -454,7 +481,7 @@ class Parser(object):
def prefix(self, variant=None):
if self.current_prefix_type == "variant" and variant:
- return variant
+ return sanitize_variant(variant)
elif self.current_stripe:
return self.current_stripe + "_" + self.current_domain
elif self.current_prefix:
@@ -500,15 +527,22 @@ class Parser(object):
return varset
def parse_variants(self, attrs):
- if not "variants" in attrs:
+ if "variants" not in attrs:
return None
- variant = attrs["variants"].split(",")[0]
- if "-" in variant:
- variant = variant[:variant.index("-")]
+ variant = attrs["variants"].split(",")[0]
varset = self.parse_varset(attrs)
- assert varset.has_name(variant)
+ if "-" in variant:
+ # if we have a range, validate that both the start and end
+ # of the range are valid enums:
+ start = variant[:variant.index("-")]
+ end = variant[variant.index("-") + 1:]
+ assert varset.has_name(start)
+ if end != "":
+ assert varset.has_name(end)
+ else:
+ assert varset.has_name(variant)
return variant
@@ -572,9 +606,6 @@ class Parser(object):
error_str = str(xmlschema.error_log.filter_from_errors()[0])
raise self.error("Schema validation failed for: " + filename + "\n" + error_str)
except ImportError as e:
- if self.validate:
- raise e
-
print("lxml not found, skipping validation", file=sys.stderr)
def do_parse(self, filename):
@@ -620,6 +651,7 @@ class Parser(object):
self.current_reg = Reg(attrs, self.prefix(variant), self.current_array, bit_size)
self.current_reg.bitset = self.current_bitset
+ self.current_bitset.reg = self.current_reg
if len(self.stack) == 1:
self.file.append(self.current_reg)
@@ -643,7 +675,7 @@ class Parser(object):
elif name == "domain":
self.current_domain = attrs["name"]
if "prefix" in attrs:
- self.current_prefix = self.parse_variants(attrs)
+ self.current_prefix = sanitize_variant(self.parse_variants(attrs))
self.current_prefix_type = attrs["prefix"]
else:
self.current_prefix = None
@@ -651,7 +683,7 @@ class Parser(object):
if "varset" in attrs:
self.current_varset = self.enums[attrs["varset"]]
elif name == "stripe":
- self.current_stripe = self.parse_variants(attrs)
+ self.current_stripe = sanitize_variant(self.parse_variants(attrs))
elif name == "enum":
self.current_enum_value = 0
self.current_enum = Enum(attrs["name"])
@@ -686,10 +718,6 @@ class Parser(object):
self.parse_field(attrs["name"], attrs)
elif name == "database":
self.do_validate(attrs["xsi:schemaLocation"])
- elif name == "copyright":
- self.copyright_year = attrs["year"]
- elif name == "author":
- self.authors.append(attrs["name"] + " <" + attrs["email"] + "> " + attrs["name"])
def end_element(self, name):
if name == "domain":
@@ -703,11 +731,16 @@ class Parser(object):
elif name == "reg32":
self.current_reg = None
elif name == "array":
+ # if the array has no Reg children, push an implicit reg32:
+ if len(self.current_array.children) == 0:
+ attrs = {
+ "name": "REG",
+ "offset": "0",
+ }
+ self.parse_reg(attrs, 32)
self.current_array = self.current_array.parent
elif name == "enum":
self.current_enum = None
- elif name == "license":
- self.license = self.cdata
def character_data(self, data):
self.cdata += data
@@ -720,10 +753,10 @@ class Parser(object):
if variants:
for variant, vreg in variants.items():
if reg == vreg:
- d[(usage, variant)].append(reg)
+ d[(usage, sanitize_variant(variant))].append(reg)
else:
for variant in self.variants:
- d[(usage, variant)].append(reg)
+ d[(usage, sanitize_variant(variant))].append(reg)
print("#ifdef __cplusplus")
@@ -753,6 +786,9 @@ class Parser(object):
print("#endif")
+ def has_variants(self, reg):
+ return reg.name in self.variant_regs and not is_number(reg.name) and not is_number(reg.name[1:])
+
def dump(self):
enums = []
bitsets = []
@@ -766,7 +802,7 @@ class Parser(object):
regs.append(e)
for e in enums + bitsets + regs:
- e.dump()
+ e.dump(self.has_variants(e))
self.dump_reg_usages()
@@ -782,8 +818,7 @@ class Parser(object):
def dump_reg_variants(self, regname, variants):
- # Don't bother for things that only have a single variant:
- if len(variants) == 1:
+ if is_number(regname) or is_number(regname[1:]):
return
print("#ifdef __cplusplus")
print("struct __%s {" % regname)
@@ -834,11 +869,20 @@ class Parser(object):
xtravar = "__i, "
print("__%s(%sstruct __%s fields) {" % (regname, xtra, regname))
for variant in variants.keys():
- print(" if (%s == %s) {" % (varenum.upper(), variant))
+ if "-" in variant:
+ start = variant[:variant.index("-")]
+ end = variant[variant.index("-") + 1:]
+ if end != "":
+ print(" if ((%s >= %s) && (%s <= %s)) {" % (varenum.upper(), start, varenum.upper(), end))
+ else:
+ print(" if (%s >= %s) {" % (varenum.upper(), start))
+ else:
+ print(" if (%s == %s) {" % (varenum.upper(), variant))
reg = variants[variant]
reg.dump_regpair_builder()
print(" } else")
print(" assert(!\"invalid variant\");")
+ print(" return (struct fd_reg_pair){};")
print("}")
if bit_size == 64:
@@ -851,7 +895,7 @@ class Parser(object):
def dump_structs(self):
for e in self.file:
- e.dump_pack_struct()
+ e.dump_pack_struct(self.has_variants(e))
for regname in self.variant_regs:
self.dump_reg_variants(regname, self.variant_regs[regname])
@@ -868,33 +912,7 @@ def dump_c(args, guard, func):
print("#ifndef %s\n#define %s\n" % (guard, guard))
- print("""/* Autogenerated file, DO NOT EDIT manually!
-
-This file was generated by the rules-ng-ng gen_header.py tool in this git repository:
-http://gitlab.freedesktop.org/mesa/mesa/
-git clone https://gitlab.freedesktop.org/mesa/mesa.git
-
-The rules-ng-ng source files this header was generated from are:
-""")
- maxlen = 0
- for filepath in p.xml_files:
- new_filepath = re.sub("^.+drivers","drivers",filepath)
- maxlen = max(maxlen, len(new_filepath))
- for filepath in p.xml_files:
- pad = " " * (maxlen - len(new_filepath))
- filesize = str(os.path.getsize(filepath))
- filesize = " " * (7 - len(filesize)) + filesize
- filetime = time.ctime(os.path.getmtime(filepath))
- print("- " + new_filepath + pad + " (" + filesize + " bytes, from <stripped>)")
- if p.copyright_year:
- current_year = str(datetime.date.today().year)
- print()
- print("Copyright (C) %s-%s by the following authors:" % (p.copyright_year, current_year))
- for author in p.authors:
- print("- " + author)
- if p.license:
- print(p.license)
- print("*/")
+ print("/* Autogenerated file, DO NOT EDIT manually! */")
print()
print("#ifdef __KERNEL__")
@@ -912,9 +930,20 @@ The rules-ng-ng source files this header was generated from are:
print("#endif")
print()
+ print("#ifndef FD_NO_DEPRECATED_PACK")
+ print("#define FD_DEPRECATED __attribute__((deprecated))")
+ print("#else")
+ print("#define FD_DEPRECATED")
+ print("#endif")
+ print()
+
func(p)
- print("\n#endif /* %s */" % guard)
+ print()
+ print("#undef FD_DEPRECATED")
+ print()
+
+ print("#endif /* %s */" % guard)
def dump_c_defines(args):
@@ -931,7 +960,7 @@ def dump_py_defines(args):
p = Parser()
try:
- p.parse(args.rnn, args.xml)
+ p.parse(args.rnn, args.xml, args.validate)
except Error as e:
print(e, file=sys.stderr)
exit(1)
diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c b/drivers/gpu/drm/nouveau/nouveau_bo.c
index b96f0555ca14..f26562eafffc 100644
--- a/drivers/gpu/drm/nouveau/nouveau_bo.c
+++ b/drivers/gpu/drm/nouveau/nouveau_bo.c
@@ -929,7 +929,7 @@ done:
nvif_vmm_put(vmm, &old_mem->vma[1]);
nvif_vmm_put(vmm, &old_mem->vma[0]);
}
- return 0;
+ return ret;
}
static int
diff --git a/drivers/gpu/drm/nova/driver.rs b/drivers/gpu/drm/nova/driver.rs
index b28b2e05cc15..91b7380f83ab 100644
--- a/drivers/gpu/drm/nova/driver.rs
+++ b/drivers/gpu/drm/nova/driver.rs
@@ -1,6 +1,8 @@
// SPDX-License-Identifier: GPL-2.0
-use kernel::{auxiliary, c_str, device::Core, drm, drm::gem, drm::ioctl, prelude::*, types::ARef};
+use kernel::{
+ auxiliary, c_str, device::Core, drm, drm::gem, drm::ioctl, prelude::*, sync::aref::ARef,
+};
use crate::file::File;
use crate::gem::NovaObject;
diff --git a/drivers/gpu/drm/nova/gem.rs b/drivers/gpu/drm/nova/gem.rs
index 33b62d21400c..2760ba4f3450 100644
--- a/drivers/gpu/drm/nova/gem.rs
+++ b/drivers/gpu/drm/nova/gem.rs
@@ -4,7 +4,7 @@ use kernel::{
drm,
drm::{gem, gem::BaseObject},
prelude::*,
- types::ARef,
+ sync::aref::ARef,
};
use crate::{
@@ -16,16 +16,14 @@ use crate::{
#[pin_data]
pub(crate) struct NovaObject {}
-impl gem::BaseDriverObject<gem::Object<NovaObject>> for NovaObject {
+impl gem::DriverObject for NovaObject {
+ type Driver = NovaDriver;
+
fn new(_dev: &NovaDevice, _size: usize) -> impl PinInit<Self, Error> {
try_pin_init!(NovaObject {})
}
}
-impl gem::DriverObject for NovaObject {
- type Driver = NovaDriver;
-}
-
impl NovaObject {
/// Create a new DRM GEM object.
pub(crate) fn new(dev: &NovaDevice, size: usize) -> Result<ARef<gem::Object<Self>>> {
diff --git a/drivers/gpu/drm/panthor/panthor_sched.c b/drivers/gpu/drm/panthor/panthor_sched.c
index 0cc9055f4ee5..f5e01cb16cfc 100644
--- a/drivers/gpu/drm/panthor/panthor_sched.c
+++ b/drivers/gpu/drm/panthor/panthor_sched.c
@@ -898,8 +898,7 @@ static void group_free_queue(struct panthor_group *group, struct panthor_queue *
if (IS_ERR_OR_NULL(queue))
return;
- if (queue->entity.fence_context)
- drm_sched_entity_destroy(&queue->entity);
+ drm_sched_entity_destroy(&queue->entity);
if (queue->scheduler.ops)
drm_sched_fini(&queue->scheduler);
@@ -3609,11 +3608,6 @@ int panthor_group_destroy(struct panthor_file *pfile, u32 group_handle)
if (!group)
return -EINVAL;
- for (u32 i = 0; i < group->queue_count; i++) {
- if (group->queues[i])
- drm_sched_entity_destroy(&group->queues[i]->entity);
- }
-
mutex_lock(&sched->reset.lock);
mutex_lock(&sched->lock);
group->destroyed = true;
diff --git a/drivers/gpu/drm/rockchip/dw-mipi-dsi-rockchip.c b/drivers/gpu/drm/rockchip/dw-mipi-dsi-rockchip.c
index 3398160ad75e..5523911b990d 100644
--- a/drivers/gpu/drm/rockchip/dw-mipi-dsi-rockchip.c
+++ b/drivers/gpu/drm/rockchip/dw-mipi-dsi-rockchip.c
@@ -7,6 +7,7 @@
*/
#include <linux/clk.h>
+#include <linux/hw_bitfield.h>
#include <linux/iopoll.h>
#include <linux/math64.h>
#include <linux/mfd/syscon.h>
@@ -148,7 +149,7 @@
#define DW_MIPI_NEEDS_GRF_CLK BIT(1)
#define PX30_GRF_PD_VO_CON1 0x0438
-#define PX30_DSI_FORCETXSTOPMODE (0xf << 7)
+#define PX30_DSI_FORCETXSTOPMODE (0xfUL << 7)
#define PX30_DSI_FORCERXMODE BIT(6)
#define PX30_DSI_TURNDISABLE BIT(5)
#define PX30_DSI_LCDC_SEL BIT(0)
@@ -167,16 +168,16 @@
#define RK3399_DSI1_LCDC_SEL BIT(4)
#define RK3399_GRF_SOC_CON22 0x6258
-#define RK3399_DSI0_TURNREQUEST (0xf << 12)
-#define RK3399_DSI0_TURNDISABLE (0xf << 8)
-#define RK3399_DSI0_FORCETXSTOPMODE (0xf << 4)
-#define RK3399_DSI0_FORCERXMODE (0xf << 0)
+#define RK3399_DSI0_TURNREQUEST (0xfUL << 12)
+#define RK3399_DSI0_TURNDISABLE (0xfUL << 8)
+#define RK3399_DSI0_FORCETXSTOPMODE (0xfUL << 4)
+#define RK3399_DSI0_FORCERXMODE (0xfUL << 0)
#define RK3399_GRF_SOC_CON23 0x625c
-#define RK3399_DSI1_TURNDISABLE (0xf << 12)
-#define RK3399_DSI1_FORCETXSTOPMODE (0xf << 8)
-#define RK3399_DSI1_FORCERXMODE (0xf << 4)
-#define RK3399_DSI1_ENABLE (0xf << 0)
+#define RK3399_DSI1_TURNDISABLE (0xfUL << 12)
+#define RK3399_DSI1_FORCETXSTOPMODE (0xfUL << 8)
+#define RK3399_DSI1_FORCERXMODE (0xfUL << 4)
+#define RK3399_DSI1_ENABLE (0xfUL << 0)
#define RK3399_GRF_SOC_CON24 0x6260
#define RK3399_TXRX_MASTERSLAVEZ BIT(7)
@@ -186,8 +187,8 @@
#define RK3399_TXRX_TURNREQUEST GENMASK(3, 0)
#define RK3568_GRF_VO_CON2 0x0368
-#define RK3568_DSI0_SKEWCALHS (0x1f << 11)
-#define RK3568_DSI0_FORCETXSTOPMODE (0xf << 4)
+#define RK3568_DSI0_SKEWCALHS (0x1fUL << 11)
+#define RK3568_DSI0_FORCETXSTOPMODE (0xfUL << 4)
#define RK3568_DSI0_TURNDISABLE BIT(2)
#define RK3568_DSI0_FORCERXMODE BIT(0)
@@ -197,18 +198,16 @@
* come from. Name GRF_VO_CON3 is assumed.
*/
#define RK3568_GRF_VO_CON3 0x36c
-#define RK3568_DSI1_SKEWCALHS (0x1f << 11)
-#define RK3568_DSI1_FORCETXSTOPMODE (0xf << 4)
+#define RK3568_DSI1_SKEWCALHS (0x1fUL << 11)
+#define RK3568_DSI1_FORCETXSTOPMODE (0xfUL << 4)
#define RK3568_DSI1_TURNDISABLE BIT(2)
#define RK3568_DSI1_FORCERXMODE BIT(0)
#define RV1126_GRF_DSIPHY_CON 0x10220
-#define RV1126_DSI_FORCETXSTOPMODE (0xf << 4)
+#define RV1126_DSI_FORCETXSTOPMODE (0xfUL << 4)
#define RV1126_DSI_TURNDISABLE BIT(2)
#define RV1126_DSI_FORCERXMODE BIT(0)
-#define HIWORD_UPDATE(val, mask) (val | (mask) << 16)
-
enum {
DW_DSI_USAGE_IDLE,
DW_DSI_USAGE_DSI,
@@ -1484,14 +1483,13 @@ static const struct rockchip_dw_dsi_chip_data px30_chip_data[] = {
{
.reg = 0xff450000,
.lcdsel_grf_reg = PX30_GRF_PD_VO_CON1,
- .lcdsel_big = HIWORD_UPDATE(0, PX30_DSI_LCDC_SEL),
- .lcdsel_lit = HIWORD_UPDATE(PX30_DSI_LCDC_SEL,
- PX30_DSI_LCDC_SEL),
+ .lcdsel_big = FIELD_PREP_WM16_CONST(PX30_DSI_LCDC_SEL, 0),
+ .lcdsel_lit = FIELD_PREP_WM16_CONST(PX30_DSI_LCDC_SEL, 1),
.lanecfg1_grf_reg = PX30_GRF_PD_VO_CON1,
- .lanecfg1 = HIWORD_UPDATE(0, PX30_DSI_TURNDISABLE |
- PX30_DSI_FORCERXMODE |
- PX30_DSI_FORCETXSTOPMODE),
+ .lanecfg1 = FIELD_PREP_WM16_CONST((PX30_DSI_TURNDISABLE |
+ PX30_DSI_FORCERXMODE |
+ PX30_DSI_FORCETXSTOPMODE), 0),
.max_data_lanes = 4,
},
@@ -1502,9 +1500,9 @@ static const struct rockchip_dw_dsi_chip_data rk3128_chip_data[] = {
{
.reg = 0x10110000,
.lanecfg1_grf_reg = RK3128_GRF_LVDS_CON0,
- .lanecfg1 = HIWORD_UPDATE(0, RK3128_DSI_TURNDISABLE |
- RK3128_DSI_FORCERXMODE |
- RK3128_DSI_FORCETXSTOPMODE),
+ .lanecfg1 = FIELD_PREP_WM16_CONST((RK3128_DSI_TURNDISABLE |
+ RK3128_DSI_FORCERXMODE |
+ RK3128_DSI_FORCETXSTOPMODE), 0),
.max_data_lanes = 4,
},
{ /* sentinel */ }
@@ -1514,16 +1512,16 @@ static const struct rockchip_dw_dsi_chip_data rk3288_chip_data[] = {
{
.reg = 0xff960000,
.lcdsel_grf_reg = RK3288_GRF_SOC_CON6,
- .lcdsel_big = HIWORD_UPDATE(0, RK3288_DSI0_LCDC_SEL),
- .lcdsel_lit = HIWORD_UPDATE(RK3288_DSI0_LCDC_SEL, RK3288_DSI0_LCDC_SEL),
+ .lcdsel_big = FIELD_PREP_WM16_CONST(RK3288_DSI0_LCDC_SEL, 0),
+ .lcdsel_lit = FIELD_PREP_WM16_CONST(RK3288_DSI0_LCDC_SEL, 1),
.max_data_lanes = 4,
},
{
.reg = 0xff964000,
.lcdsel_grf_reg = RK3288_GRF_SOC_CON6,
- .lcdsel_big = HIWORD_UPDATE(0, RK3288_DSI1_LCDC_SEL),
- .lcdsel_lit = HIWORD_UPDATE(RK3288_DSI1_LCDC_SEL, RK3288_DSI1_LCDC_SEL),
+ .lcdsel_big = FIELD_PREP_WM16_CONST(RK3288_DSI1_LCDC_SEL, 0),
+ .lcdsel_lit = FIELD_PREP_WM16_CONST(RK3288_DSI1_LCDC_SEL, 1),
.max_data_lanes = 4,
},
@@ -1539,13 +1537,13 @@ static int rk3399_dphy_tx1rx1_init(struct phy *phy)
* Assume ISP0 is supplied by the RX0 dphy.
*/
regmap_write(dsi->grf_regmap, RK3399_GRF_SOC_CON24,
- HIWORD_UPDATE(0, RK3399_TXRX_SRC_SEL_ISP0));
+ FIELD_PREP_WM16(RK3399_TXRX_SRC_SEL_ISP0, 0));
regmap_write(dsi->grf_regmap, RK3399_GRF_SOC_CON24,
- HIWORD_UPDATE(0, RK3399_TXRX_MASTERSLAVEZ));
+ FIELD_PREP_WM16(RK3399_TXRX_MASTERSLAVEZ, 0));
regmap_write(dsi->grf_regmap, RK3399_GRF_SOC_CON24,
- HIWORD_UPDATE(0, RK3399_TXRX_BASEDIR));
+ FIELD_PREP_WM16(RK3399_TXRX_BASEDIR, 0));
regmap_write(dsi->grf_regmap, RK3399_GRF_SOC_CON23,
- HIWORD_UPDATE(0, RK3399_DSI1_ENABLE));
+ FIELD_PREP_WM16(RK3399_DSI1_ENABLE, 0));
return 0;
}
@@ -1559,21 +1557,20 @@ static int rk3399_dphy_tx1rx1_power_on(struct phy *phy)
usleep_range(100, 150);
regmap_write(dsi->grf_regmap, RK3399_GRF_SOC_CON24,
- HIWORD_UPDATE(0, RK3399_TXRX_MASTERSLAVEZ));
+ FIELD_PREP_WM16(RK3399_TXRX_MASTERSLAVEZ, 0));
regmap_write(dsi->grf_regmap, RK3399_GRF_SOC_CON24,
- HIWORD_UPDATE(RK3399_TXRX_BASEDIR, RK3399_TXRX_BASEDIR));
+ FIELD_PREP_WM16(RK3399_TXRX_BASEDIR, 1));
regmap_write(dsi->grf_regmap, RK3399_GRF_SOC_CON23,
- HIWORD_UPDATE(0, RK3399_DSI1_FORCERXMODE));
+ FIELD_PREP_WM16(RK3399_DSI1_FORCERXMODE, 0));
regmap_write(dsi->grf_regmap, RK3399_GRF_SOC_CON23,
- HIWORD_UPDATE(0, RK3399_DSI1_FORCETXSTOPMODE));
+ FIELD_PREP_WM16(RK3399_DSI1_FORCETXSTOPMODE, 0));
/* Disable lane turn around, which is ignored in receive mode */
regmap_write(dsi->grf_regmap, RK3399_GRF_SOC_CON24,
- HIWORD_UPDATE(0, RK3399_TXRX_TURNREQUEST));
+ FIELD_PREP_WM16(RK3399_TXRX_TURNREQUEST, 0));
regmap_write(dsi->grf_regmap, RK3399_GRF_SOC_CON23,
- HIWORD_UPDATE(RK3399_DSI1_TURNDISABLE,
- RK3399_DSI1_TURNDISABLE));
+ FIELD_PREP_WM16(RK3399_DSI1_TURNDISABLE, 0xf));
usleep_range(100, 150);
dsi_write(dsi, DSI_PHY_TST_CTRL0, PHY_TESTCLK | PHY_UNTESTCLR);
@@ -1581,8 +1578,8 @@ static int rk3399_dphy_tx1rx1_power_on(struct phy *phy)
/* Enable dphy lanes */
regmap_write(dsi->grf_regmap, RK3399_GRF_SOC_CON23,
- HIWORD_UPDATE(GENMASK(dsi->dphy_config.lanes - 1, 0),
- RK3399_DSI1_ENABLE));
+ FIELD_PREP_WM16(RK3399_DSI1_ENABLE,
+ GENMASK(dsi->dphy_config.lanes - 1, 0)));
usleep_range(100, 150);
@@ -1594,7 +1591,7 @@ static int rk3399_dphy_tx1rx1_power_off(struct phy *phy)
struct dw_mipi_dsi_rockchip *dsi = phy_get_drvdata(phy);
regmap_write(dsi->grf_regmap, RK3399_GRF_SOC_CON23,
- HIWORD_UPDATE(0, RK3399_DSI1_ENABLE));
+ FIELD_PREP_WM16(RK3399_DSI1_ENABLE, 0));
return 0;
}
@@ -1603,15 +1600,14 @@ static const struct rockchip_dw_dsi_chip_data rk3399_chip_data[] = {
{
.reg = 0xff960000,
.lcdsel_grf_reg = RK3399_GRF_SOC_CON20,
- .lcdsel_big = HIWORD_UPDATE(0, RK3399_DSI0_LCDC_SEL),
- .lcdsel_lit = HIWORD_UPDATE(RK3399_DSI0_LCDC_SEL,
- RK3399_DSI0_LCDC_SEL),
+ .lcdsel_big = FIELD_PREP_WM16_CONST(RK3399_DSI0_LCDC_SEL, 0),
+ .lcdsel_lit = FIELD_PREP_WM16_CONST(RK3399_DSI0_LCDC_SEL, 1),
.lanecfg1_grf_reg = RK3399_GRF_SOC_CON22,
- .lanecfg1 = HIWORD_UPDATE(0, RK3399_DSI0_TURNREQUEST |
- RK3399_DSI0_TURNDISABLE |
- RK3399_DSI0_FORCETXSTOPMODE |
- RK3399_DSI0_FORCERXMODE),
+ .lanecfg1 = FIELD_PREP_WM16_CONST((RK3399_DSI0_TURNREQUEST |
+ RK3399_DSI0_TURNDISABLE |
+ RK3399_DSI0_FORCETXSTOPMODE |
+ RK3399_DSI0_FORCERXMODE), 0),
.flags = DW_MIPI_NEEDS_PHY_CFG_CLK | DW_MIPI_NEEDS_GRF_CLK,
.max_data_lanes = 4,
@@ -1619,25 +1615,23 @@ static const struct rockchip_dw_dsi_chip_data rk3399_chip_data[] = {
{
.reg = 0xff968000,
.lcdsel_grf_reg = RK3399_GRF_SOC_CON20,
- .lcdsel_big = HIWORD_UPDATE(0, RK3399_DSI1_LCDC_SEL),
- .lcdsel_lit = HIWORD_UPDATE(RK3399_DSI1_LCDC_SEL,
- RK3399_DSI1_LCDC_SEL),
+ .lcdsel_big = FIELD_PREP_WM16_CONST(RK3399_DSI1_LCDC_SEL, 0),
+ .lcdsel_lit = FIELD_PREP_WM16_CONST(RK3399_DSI1_LCDC_SEL, 1),
+
.lanecfg1_grf_reg = RK3399_GRF_SOC_CON23,
- .lanecfg1 = HIWORD_UPDATE(0, RK3399_DSI1_TURNDISABLE |
- RK3399_DSI1_FORCETXSTOPMODE |
- RK3399_DSI1_FORCERXMODE |
- RK3399_DSI1_ENABLE),
+ .lanecfg1 = FIELD_PREP_WM16_CONST((RK3399_DSI1_TURNDISABLE |
+ RK3399_DSI1_FORCETXSTOPMODE |
+ RK3399_DSI1_FORCERXMODE |
+ RK3399_DSI1_ENABLE), 0),
.lanecfg2_grf_reg = RK3399_GRF_SOC_CON24,
- .lanecfg2 = HIWORD_UPDATE(RK3399_TXRX_MASTERSLAVEZ |
- RK3399_TXRX_ENABLECLK,
- RK3399_TXRX_MASTERSLAVEZ |
- RK3399_TXRX_ENABLECLK |
- RK3399_TXRX_BASEDIR),
+ .lanecfg2 = (FIELD_PREP_WM16_CONST(RK3399_TXRX_MASTERSLAVEZ, 1) |
+ FIELD_PREP_WM16_CONST(RK3399_TXRX_ENABLECLK, 1) |
+ FIELD_PREP_WM16_CONST(RK3399_TXRX_BASEDIR, 0)),
.enable_grf_reg = RK3399_GRF_SOC_CON23,
- .enable = HIWORD_UPDATE(RK3399_DSI1_ENABLE, RK3399_DSI1_ENABLE),
+ .enable = FIELD_PREP_WM16_CONST(RK3399_DSI1_ENABLE, RK3399_DSI1_ENABLE),
.flags = DW_MIPI_NEEDS_PHY_CFG_CLK | DW_MIPI_NEEDS_GRF_CLK,
.max_data_lanes = 4,
@@ -1653,19 +1647,19 @@ static const struct rockchip_dw_dsi_chip_data rk3568_chip_data[] = {
{
.reg = 0xfe060000,
.lanecfg1_grf_reg = RK3568_GRF_VO_CON2,
- .lanecfg1 = HIWORD_UPDATE(0, RK3568_DSI0_SKEWCALHS |
- RK3568_DSI0_FORCETXSTOPMODE |
- RK3568_DSI0_TURNDISABLE |
- RK3568_DSI0_FORCERXMODE),
+ .lanecfg1 = (FIELD_PREP_WM16_CONST(RK3568_DSI0_SKEWCALHS, 0) |
+ FIELD_PREP_WM16_CONST(RK3568_DSI0_FORCETXSTOPMODE, 0) |
+ FIELD_PREP_WM16_CONST(RK3568_DSI0_TURNDISABLE, 0) |
+ FIELD_PREP_WM16_CONST(RK3568_DSI0_FORCERXMODE, 0)),
.max_data_lanes = 4,
},
{
.reg = 0xfe070000,
.lanecfg1_grf_reg = RK3568_GRF_VO_CON3,
- .lanecfg1 = HIWORD_UPDATE(0, RK3568_DSI1_SKEWCALHS |
- RK3568_DSI1_FORCETXSTOPMODE |
- RK3568_DSI1_TURNDISABLE |
- RK3568_DSI1_FORCERXMODE),
+ .lanecfg1 = (FIELD_PREP_WM16_CONST(RK3568_DSI1_SKEWCALHS, 0) |
+ FIELD_PREP_WM16_CONST(RK3568_DSI1_FORCETXSTOPMODE, 0) |
+ FIELD_PREP_WM16_CONST(RK3568_DSI1_TURNDISABLE, 0) |
+ FIELD_PREP_WM16_CONST(RK3568_DSI1_FORCERXMODE, 0)),
.max_data_lanes = 4,
},
{ /* sentinel */ }
@@ -1675,9 +1669,9 @@ static const struct rockchip_dw_dsi_chip_data rv1126_chip_data[] = {
{
.reg = 0xffb30000,
.lanecfg1_grf_reg = RV1126_GRF_DSIPHY_CON,
- .lanecfg1 = HIWORD_UPDATE(0, RV1126_DSI_TURNDISABLE |
- RV1126_DSI_FORCERXMODE |
- RV1126_DSI_FORCETXSTOPMODE),
+ .lanecfg1 = (FIELD_PREP_WM16_CONST(RV1126_DSI_TURNDISABLE, 0) |
+ FIELD_PREP_WM16_CONST(RV1126_DSI_FORCERXMODE, 0) |
+ FIELD_PREP_WM16_CONST(RV1126_DSI_FORCETXSTOPMODE, 0)),
.max_data_lanes = 4,
},
{ /* sentinel */ }
diff --git a/drivers/gpu/drm/rockchip/dw_hdmi-rockchip.c b/drivers/gpu/drm/rockchip/dw_hdmi-rockchip.c
index acb59b25d928..7b613997bb50 100644
--- a/drivers/gpu/drm/rockchip/dw_hdmi-rockchip.c
+++ b/drivers/gpu/drm/rockchip/dw_hdmi-rockchip.c
@@ -4,6 +4,7 @@
*/
#include <linux/clk.h>
+#include <linux/hw_bitfield.h>
#include <linux/mfd/syscon.h>
#include <linux/module.h>
#include <linux/platform_device.h>
@@ -54,8 +55,6 @@
#define RK3568_HDMI_SDAIN_MSK BIT(15)
#define RK3568_HDMI_SCLIN_MSK BIT(14)
-#define HIWORD_UPDATE(val, mask) (val | (mask) << 16)
-
/**
* struct rockchip_hdmi_chip_data - splite the grf setting of kind of chips
* @lcdsel_grf_reg: grf register offset of lcdc select
@@ -355,17 +354,14 @@ static void dw_hdmi_rk3228_setup_hpd(struct dw_hdmi *dw_hdmi, void *data)
dw_hdmi_phy_setup_hpd(dw_hdmi, data);
- regmap_write(hdmi->regmap,
- RK3228_GRF_SOC_CON6,
- HIWORD_UPDATE(RK3228_HDMI_HPD_VSEL | RK3228_HDMI_SDA_VSEL |
- RK3228_HDMI_SCL_VSEL,
- RK3228_HDMI_HPD_VSEL | RK3228_HDMI_SDA_VSEL |
- RK3228_HDMI_SCL_VSEL));
-
- regmap_write(hdmi->regmap,
- RK3228_GRF_SOC_CON2,
- HIWORD_UPDATE(RK3228_HDMI_SDAIN_MSK | RK3228_HDMI_SCLIN_MSK,
- RK3228_HDMI_SDAIN_MSK | RK3228_HDMI_SCLIN_MSK));
+ regmap_write(hdmi->regmap, RK3228_GRF_SOC_CON6,
+ FIELD_PREP_WM16(RK3228_HDMI_HPD_VSEL, 1) |
+ FIELD_PREP_WM16(RK3228_HDMI_SDA_VSEL, 1) |
+ FIELD_PREP_WM16(RK3228_HDMI_SCL_VSEL, 1));
+
+ regmap_write(hdmi->regmap, RK3228_GRF_SOC_CON2,
+ FIELD_PREP_WM16(RK3228_HDMI_SDAIN_MSK, 1) |
+ FIELD_PREP_WM16(RK3328_HDMI_SCLIN_MSK, 1));
}
static enum drm_connector_status
@@ -377,15 +373,13 @@ dw_hdmi_rk3328_read_hpd(struct dw_hdmi *dw_hdmi, void *data)
status = dw_hdmi_phy_read_hpd(dw_hdmi, data);
if (status == connector_status_connected)
- regmap_write(hdmi->regmap,
- RK3328_GRF_SOC_CON4,
- HIWORD_UPDATE(RK3328_HDMI_SDA_5V | RK3328_HDMI_SCL_5V,
- RK3328_HDMI_SDA_5V | RK3328_HDMI_SCL_5V));
+ regmap_write(hdmi->regmap, RK3328_GRF_SOC_CON4,
+ FIELD_PREP_WM16(RK3328_HDMI_SDA_5V, 1) |
+ FIELD_PREP_WM16(RK3328_HDMI_SCL_5V, 1));
else
- regmap_write(hdmi->regmap,
- RK3328_GRF_SOC_CON4,
- HIWORD_UPDATE(0, RK3328_HDMI_SDA_5V |
- RK3328_HDMI_SCL_5V));
+ regmap_write(hdmi->regmap, RK3328_GRF_SOC_CON4,
+ FIELD_PREP_WM16(RK3328_HDMI_SDA_5V, 0) |
+ FIELD_PREP_WM16(RK3328_HDMI_SCL_5V, 0));
return status;
}
@@ -396,21 +390,21 @@ static void dw_hdmi_rk3328_setup_hpd(struct dw_hdmi *dw_hdmi, void *data)
dw_hdmi_phy_setup_hpd(dw_hdmi, data);
/* Enable and map pins to 3V grf-controlled io-voltage */
- regmap_write(hdmi->regmap,
- RK3328_GRF_SOC_CON4,
- HIWORD_UPDATE(0, RK3328_HDMI_HPD_SARADC | RK3328_HDMI_CEC_5V |
- RK3328_HDMI_SDA_5V | RK3328_HDMI_SCL_5V |
- RK3328_HDMI_HPD_5V));
- regmap_write(hdmi->regmap,
- RK3328_GRF_SOC_CON3,
- HIWORD_UPDATE(0, RK3328_HDMI_SDA5V_GRF | RK3328_HDMI_SCL5V_GRF |
- RK3328_HDMI_HPD5V_GRF |
- RK3328_HDMI_CEC5V_GRF));
- regmap_write(hdmi->regmap,
- RK3328_GRF_SOC_CON2,
- HIWORD_UPDATE(RK3328_HDMI_SDAIN_MSK | RK3328_HDMI_SCLIN_MSK,
- RK3328_HDMI_SDAIN_MSK | RK3328_HDMI_SCLIN_MSK |
- RK3328_HDMI_HPD_IOE));
+ regmap_write(hdmi->regmap, RK3328_GRF_SOC_CON4,
+ FIELD_PREP_WM16(RK3328_HDMI_HPD_SARADC, 0) |
+ FIELD_PREP_WM16(RK3328_HDMI_CEC_5V, 0) |
+ FIELD_PREP_WM16(RK3328_HDMI_SDA_5V, 0) |
+ FIELD_PREP_WM16(RK3328_HDMI_SCL_5V, 0) |
+ FIELD_PREP_WM16(RK3328_HDMI_HPD_5V, 0));
+ regmap_write(hdmi->regmap, RK3328_GRF_SOC_CON3,
+ FIELD_PREP_WM16(RK3328_HDMI_SDA5V_GRF, 0) |
+ FIELD_PREP_WM16(RK3328_HDMI_SCL5V_GRF, 0) |
+ FIELD_PREP_WM16(RK3328_HDMI_HPD5V_GRF, 0) |
+ FIELD_PREP_WM16(RK3328_HDMI_CEC5V_GRF, 0));
+ regmap_write(hdmi->regmap, RK3328_GRF_SOC_CON2,
+ FIELD_PREP_WM16(RK3328_HDMI_SDAIN_MSK, 1) |
+ FIELD_PREP_WM16(RK3328_HDMI_SCLIN_MSK, 1) |
+ FIELD_PREP_WM16(RK3328_HDMI_HPD_IOE, 0));
dw_hdmi_rk3328_read_hpd(dw_hdmi, data);
}
@@ -438,8 +432,8 @@ static const struct dw_hdmi_plat_data rk3228_hdmi_drv_data = {
static struct rockchip_hdmi_chip_data rk3288_chip_data = {
.lcdsel_grf_reg = RK3288_GRF_SOC_CON6,
- .lcdsel_big = HIWORD_UPDATE(0, RK3288_HDMI_LCDC_SEL),
- .lcdsel_lit = HIWORD_UPDATE(RK3288_HDMI_LCDC_SEL, RK3288_HDMI_LCDC_SEL),
+ .lcdsel_big = FIELD_PREP_WM16_CONST(RK3288_HDMI_LCDC_SEL, 0),
+ .lcdsel_lit = FIELD_PREP_WM16_CONST(RK3288_HDMI_LCDC_SEL, 1),
.max_tmds_clock = 340000,
};
@@ -475,8 +469,8 @@ static const struct dw_hdmi_plat_data rk3328_hdmi_drv_data = {
static struct rockchip_hdmi_chip_data rk3399_chip_data = {
.lcdsel_grf_reg = RK3399_GRF_SOC_CON20,
- .lcdsel_big = HIWORD_UPDATE(0, RK3399_HDMI_LCDC_SEL),
- .lcdsel_lit = HIWORD_UPDATE(RK3399_HDMI_LCDC_SEL, RK3399_HDMI_LCDC_SEL),
+ .lcdsel_big = FIELD_PREP_WM16_CONST(RK3399_HDMI_LCDC_SEL, 0),
+ .lcdsel_lit = FIELD_PREP_WM16_CONST(RK3399_HDMI_LCDC_SEL, 1),
.max_tmds_clock = 594000,
};
@@ -589,10 +583,8 @@ static int dw_hdmi_rockchip_bind(struct device *dev, struct device *master,
if (hdmi->chip_data == &rk3568_chip_data) {
regmap_write(hdmi->regmap, RK3568_GRF_VO_CON1,
- HIWORD_UPDATE(RK3568_HDMI_SDAIN_MSK |
- RK3568_HDMI_SCLIN_MSK,
- RK3568_HDMI_SDAIN_MSK |
- RK3568_HDMI_SCLIN_MSK));
+ FIELD_PREP_WM16(RK3568_HDMI_SDAIN_MSK, 1) |
+ FIELD_PREP_WM16(RK3568_HDMI_SCLIN_MSK, 1));
}
drm_encoder_helper_add(encoder, &dw_hdmi_rockchip_encoder_helper_funcs);
diff --git a/drivers/gpu/drm/rockchip/dw_hdmi_qp-rockchip.c b/drivers/gpu/drm/rockchip/dw_hdmi_qp-rockchip.c
index 7d531b6f4c09..ed6e8f036f4b 100644
--- a/drivers/gpu/drm/rockchip/dw_hdmi_qp-rockchip.c
+++ b/drivers/gpu/drm/rockchip/dw_hdmi_qp-rockchip.c
@@ -9,6 +9,7 @@
#include <linux/clk.h>
#include <linux/gpio/consumer.h>
+#include <linux/hw_bitfield.h>
#include <linux/mfd/syscon.h>
#include <linux/module.h>
#include <linux/platform_device.h>
@@ -66,7 +67,8 @@
#define RK3588_HDMI1_HPD_INT_MSK BIT(15)
#define RK3588_HDMI1_HPD_INT_CLR BIT(14)
#define RK3588_GRF_SOC_CON7 0x031c
-#define RK3588_SET_HPD_PATH_MASK GENMASK(13, 12)
+#define RK3588_HPD_HDMI0_IO_EN_MASK BIT(12)
+#define RK3588_HPD_HDMI1_IO_EN_MASK BIT(13)
#define RK3588_GRF_SOC_STATUS1 0x0384
#define RK3588_HDMI0_LEVEL_INT BIT(16)
#define RK3588_HDMI1_LEVEL_INT BIT(24)
@@ -80,7 +82,6 @@
#define RK3588_HDMI0_GRANT_SEL BIT(10)
#define RK3588_HDMI1_GRANT_SEL BIT(12)
-#define HIWORD_UPDATE(val, mask) ((val) | (mask) << 16)
#define HOTPLUG_DEBOUNCE_MS 150
#define MAX_HDMI_PORT_NUM 2
@@ -185,11 +186,11 @@ static void dw_hdmi_qp_rk3588_setup_hpd(struct dw_hdmi_qp *dw_hdmi, void *data)
u32 val;
if (hdmi->port_id)
- val = HIWORD_UPDATE(RK3588_HDMI1_HPD_INT_CLR,
- RK3588_HDMI1_HPD_INT_CLR | RK3588_HDMI1_HPD_INT_MSK);
+ val = (FIELD_PREP_WM16(RK3588_HDMI1_HPD_INT_CLR, 1) |
+ FIELD_PREP_WM16(RK3588_HDMI1_HPD_INT_MSK, 0));
else
- val = HIWORD_UPDATE(RK3588_HDMI0_HPD_INT_CLR,
- RK3588_HDMI0_HPD_INT_CLR | RK3588_HDMI0_HPD_INT_MSK);
+ val = (FIELD_PREP_WM16(RK3588_HDMI0_HPD_INT_CLR, 1) |
+ FIELD_PREP_WM16(RK3588_HDMI0_HPD_INT_MSK, 0));
regmap_write(hdmi->regmap, RK3588_GRF_SOC_CON2, val);
}
@@ -218,8 +219,8 @@ static void dw_hdmi_qp_rk3576_setup_hpd(struct dw_hdmi_qp *dw_hdmi, void *data)
struct rockchip_hdmi_qp *hdmi = (struct rockchip_hdmi_qp *)data;
u32 val;
- val = HIWORD_UPDATE(RK3576_HDMI_HPD_INT_CLR,
- RK3576_HDMI_HPD_INT_CLR | RK3576_HDMI_HPD_INT_MSK);
+ val = (FIELD_PREP_WM16(RK3576_HDMI_HPD_INT_CLR, 1) |
+ FIELD_PREP_WM16(RK3576_HDMI_HPD_INT_MSK, 0));
regmap_write(hdmi->regmap, RK3576_IOC_MISC_CON0, val);
regmap_write(hdmi->regmap, 0xa404, 0xffff0102);
@@ -254,7 +255,7 @@ static irqreturn_t dw_hdmi_qp_rk3576_hardirq(int irq, void *dev_id)
regmap_read(hdmi->regmap, RK3576_IOC_HDMI_HPD_STATUS, &intr_stat);
if (intr_stat) {
- val = HIWORD_UPDATE(RK3576_HDMI_HPD_INT_MSK, RK3576_HDMI_HPD_INT_MSK);
+ val = FIELD_PREP_WM16(RK3576_HDMI_HPD_INT_MSK, 1);
regmap_write(hdmi->regmap, RK3576_IOC_MISC_CON0, val);
return IRQ_WAKE_THREAD;
@@ -273,12 +274,12 @@ static irqreturn_t dw_hdmi_qp_rk3576_irq(int irq, void *dev_id)
if (!intr_stat)
return IRQ_NONE;
- val = HIWORD_UPDATE(RK3576_HDMI_HPD_INT_CLR, RK3576_HDMI_HPD_INT_CLR);
+ val = FIELD_PREP_WM16(RK3576_HDMI_HPD_INT_CLR, 1);
regmap_write(hdmi->regmap, RK3576_IOC_MISC_CON0, val);
mod_delayed_work(system_wq, &hdmi->hpd_work,
msecs_to_jiffies(HOTPLUG_DEBOUNCE_MS));
- val = HIWORD_UPDATE(0, RK3576_HDMI_HPD_INT_MSK);
+ val = FIELD_PREP_WM16(RK3576_HDMI_HPD_INT_MSK, 0);
regmap_write(hdmi->regmap, RK3576_IOC_MISC_CON0, val);
return IRQ_HANDLED;
@@ -293,11 +294,9 @@ static irqreturn_t dw_hdmi_qp_rk3588_hardirq(int irq, void *dev_id)
if (intr_stat) {
if (hdmi->port_id)
- val = HIWORD_UPDATE(RK3588_HDMI1_HPD_INT_MSK,
- RK3588_HDMI1_HPD_INT_MSK);
+ val = FIELD_PREP_WM16(RK3588_HDMI1_HPD_INT_MSK, 1);
else
- val = HIWORD_UPDATE(RK3588_HDMI0_HPD_INT_MSK,
- RK3588_HDMI0_HPD_INT_MSK);
+ val = FIELD_PREP_WM16(RK3588_HDMI0_HPD_INT_MSK, 1);
regmap_write(hdmi->regmap, RK3588_GRF_SOC_CON2, val);
return IRQ_WAKE_THREAD;
}
@@ -315,20 +314,18 @@ static irqreturn_t dw_hdmi_qp_rk3588_irq(int irq, void *dev_id)
return IRQ_NONE;
if (hdmi->port_id)
- val = HIWORD_UPDATE(RK3588_HDMI1_HPD_INT_CLR,
- RK3588_HDMI1_HPD_INT_CLR);
+ val = FIELD_PREP_WM16(RK3588_HDMI1_HPD_INT_CLR, 1);
else
- val = HIWORD_UPDATE(RK3588_HDMI0_HPD_INT_CLR,
- RK3588_HDMI0_HPD_INT_CLR);
+ val = FIELD_PREP_WM16(RK3588_HDMI0_HPD_INT_CLR, 1);
regmap_write(hdmi->regmap, RK3588_GRF_SOC_CON2, val);
mod_delayed_work(system_wq, &hdmi->hpd_work,
msecs_to_jiffies(HOTPLUG_DEBOUNCE_MS));
if (hdmi->port_id)
- val |= HIWORD_UPDATE(0, RK3588_HDMI1_HPD_INT_MSK);
+ val |= FIELD_PREP_WM16(RK3588_HDMI1_HPD_INT_MSK, 0);
else
- val |= HIWORD_UPDATE(0, RK3588_HDMI0_HPD_INT_MSK);
+ val |= FIELD_PREP_WM16(RK3588_HDMI0_HPD_INT_MSK, 0);
regmap_write(hdmi->regmap, RK3588_GRF_SOC_CON2, val);
return IRQ_HANDLED;
@@ -338,14 +335,14 @@ static void dw_hdmi_qp_rk3576_io_init(struct rockchip_hdmi_qp *hdmi)
{
u32 val;
- val = HIWORD_UPDATE(RK3576_SCLIN_MASK, RK3576_SCLIN_MASK) |
- HIWORD_UPDATE(RK3576_SDAIN_MASK, RK3576_SDAIN_MASK) |
- HIWORD_UPDATE(RK3576_HDMI_GRANT_SEL, RK3576_HDMI_GRANT_SEL) |
- HIWORD_UPDATE(RK3576_I2S_SEL_MASK, RK3576_I2S_SEL_MASK);
+ val = FIELD_PREP_WM16(RK3576_SCLIN_MASK, 1) |
+ FIELD_PREP_WM16(RK3576_SDAIN_MASK, 1) |
+ FIELD_PREP_WM16(RK3576_HDMI_GRANT_SEL, 1) |
+ FIELD_PREP_WM16(RK3576_I2S_SEL_MASK, 1);
regmap_write(hdmi->vo_regmap, RK3576_VO0_GRF_SOC_CON14, val);
- val = HIWORD_UPDATE(0, RK3576_HDMI_HPD_INT_MSK);
+ val = FIELD_PREP_WM16(RK3576_HDMI_HPD_INT_MSK, 0);
regmap_write(hdmi->regmap, RK3576_IOC_MISC_CON0, val);
}
@@ -353,27 +350,28 @@ static void dw_hdmi_qp_rk3588_io_init(struct rockchip_hdmi_qp *hdmi)
{
u32 val;
- val = HIWORD_UPDATE(RK3588_SCLIN_MASK, RK3588_SCLIN_MASK) |
- HIWORD_UPDATE(RK3588_SDAIN_MASK, RK3588_SDAIN_MASK) |
- HIWORD_UPDATE(RK3588_MODE_MASK, RK3588_MODE_MASK) |
- HIWORD_UPDATE(RK3588_I2S_SEL_MASK, RK3588_I2S_SEL_MASK);
+ val = FIELD_PREP_WM16(RK3588_SCLIN_MASK, 1) |
+ FIELD_PREP_WM16(RK3588_SDAIN_MASK, 1) |
+ FIELD_PREP_WM16(RK3588_MODE_MASK, 1) |
+ FIELD_PREP_WM16(RK3588_I2S_SEL_MASK, 1);
regmap_write(hdmi->vo_regmap,
hdmi->port_id ? RK3588_GRF_VO1_CON6 : RK3588_GRF_VO1_CON3,
val);
- val = HIWORD_UPDATE(RK3588_SET_HPD_PATH_MASK, RK3588_SET_HPD_PATH_MASK);
+ val = FIELD_PREP_WM16(RK3588_HPD_HDMI0_IO_EN_MASK, 1) |
+ FIELD_PREP_WM16(RK3588_HPD_HDMI1_IO_EN_MASK, 1);
regmap_write(hdmi->regmap, RK3588_GRF_SOC_CON7, val);
if (hdmi->port_id)
- val = HIWORD_UPDATE(RK3588_HDMI1_GRANT_SEL, RK3588_HDMI1_GRANT_SEL);
+ val = FIELD_PREP_WM16(RK3588_HDMI1_GRANT_SEL, 1);
else
- val = HIWORD_UPDATE(RK3588_HDMI0_GRANT_SEL, RK3588_HDMI0_GRANT_SEL);
+ val = FIELD_PREP_WM16(RK3588_HDMI0_GRANT_SEL, 1);
regmap_write(hdmi->vo_regmap, RK3588_GRF_VO1_CON9, val);
if (hdmi->port_id)
- val = HIWORD_UPDATE(RK3588_HDMI1_HPD_INT_MSK, RK3588_HDMI1_HPD_INT_MSK);
+ val = FIELD_PREP_WM16(RK3588_HDMI1_HPD_INT_MSK, 1);
else
- val = HIWORD_UPDATE(RK3588_HDMI0_HPD_INT_MSK, RK3588_HDMI0_HPD_INT_MSK);
+ val = FIELD_PREP_WM16(RK3588_HDMI0_HPD_INT_MSK, 1);
regmap_write(hdmi->regmap, RK3588_GRF_SOC_CON2, val);
}
diff --git a/drivers/gpu/drm/rockchip/inno_hdmi.c b/drivers/gpu/drm/rockchip/inno_hdmi.c
index 1ab3ad4bde9e..f24827dc1421 100644
--- a/drivers/gpu/drm/rockchip/inno_hdmi.c
+++ b/drivers/gpu/drm/rockchip/inno_hdmi.c
@@ -10,6 +10,7 @@
#include <linux/delay.h>
#include <linux/err.h>
#include <linux/hdmi.h>
+#include <linux/hw_bitfield.h>
#include <linux/mfd/syscon.h>
#include <linux/mod_devicetable.h>
#include <linux/module.h>
@@ -382,8 +383,6 @@ enum {
#define HDMI_CEC_BUSFREETIME_H 0xdd
#define HDMI_CEC_LOGICADDR 0xde
-#define HIWORD_UPDATE(val, mask) ((val) | (mask) << 16)
-
#define RK3036_GRF_SOC_CON2 0x148
#define RK3036_HDMI_PHSYNC BIT(4)
#define RK3036_HDMI_PVSYNC BIT(5)
@@ -756,10 +755,10 @@ static int inno_hdmi_config_video_timing(struct inno_hdmi *hdmi,
int value, psync;
if (hdmi->variant->dev_type == RK3036_HDMI) {
- psync = mode->flags & DRM_MODE_FLAG_PHSYNC ? RK3036_HDMI_PHSYNC : 0;
- value = HIWORD_UPDATE(psync, RK3036_HDMI_PHSYNC);
- psync = mode->flags & DRM_MODE_FLAG_PVSYNC ? RK3036_HDMI_PVSYNC : 0;
- value |= HIWORD_UPDATE(psync, RK3036_HDMI_PVSYNC);
+ psync = mode->flags & DRM_MODE_FLAG_PHSYNC ? 1 : 0;
+ value = FIELD_PREP_WM16(RK3036_HDMI_PHSYNC, psync);
+ psync = mode->flags & DRM_MODE_FLAG_PVSYNC ? 1 : 0;
+ value |= FIELD_PREP_WM16(RK3036_HDMI_PVSYNC, psync);
regmap_write(hdmi->grf, RK3036_GRF_SOC_CON2, value);
}
diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_vop2.h b/drivers/gpu/drm/rockchip/rockchip_drm_vop2.h
index fa5c56f16047..9124191899ba 100644
--- a/drivers/gpu/drm/rockchip/rockchip_drm_vop2.h
+++ b/drivers/gpu/drm/rockchip/rockchip_drm_vop2.h
@@ -33,7 +33,6 @@
#define WIN_FEATURE_AFBDC BIT(0)
#define WIN_FEATURE_CLUSTER BIT(1)
-#define HIWORD_UPDATE(v, h, l) ((GENMASK(h, l) << 16) | ((v) << (l)))
/*
* the delay number of a window in different mode.
*/
diff --git a/drivers/gpu/drm/rockchip/rockchip_lvds.h b/drivers/gpu/drm/rockchip/rockchip_lvds.h
index ca83d7b6bea7..2d92447d819b 100644
--- a/drivers/gpu/drm/rockchip/rockchip_lvds.h
+++ b/drivers/gpu/drm/rockchip/rockchip_lvds.h
@@ -9,6 +9,9 @@
#ifndef _ROCKCHIP_LVDS_
#define _ROCKCHIP_LVDS_
+#include <linux/bits.h>
+#include <linux/hw_bitfield.h>
+
#define RK3288_LVDS_CH0_REG0 0x00
#define RK3288_LVDS_CH0_REG0_LVDS_EN BIT(7)
#define RK3288_LVDS_CH0_REG0_TTL_EN BIT(6)
@@ -106,18 +109,16 @@
#define LVDS_VESA_18 2
#define LVDS_JEIDA_18 3
-#define HIWORD_UPDATE(v, h, l) ((GENMASK(h, l) << 16) | ((v) << (l)))
-
#define PX30_LVDS_GRF_PD_VO_CON0 0x434
-#define PX30_LVDS_TIE_CLKS(val) HIWORD_UPDATE(val, 8, 8)
-#define PX30_LVDS_INVERT_CLKS(val) HIWORD_UPDATE(val, 9, 9)
-#define PX30_LVDS_INVERT_DCLK(val) HIWORD_UPDATE(val, 5, 5)
+#define PX30_LVDS_TIE_CLKS(val) FIELD_PREP_WM16(BIT(8), (val))
+#define PX30_LVDS_INVERT_CLKS(val) FIELD_PREP_WM16(BIT(9), (val))
+#define PX30_LVDS_INVERT_DCLK(val) FIELD_PREP_WM16(BIT(5), (val))
#define PX30_LVDS_GRF_PD_VO_CON1 0x438
-#define PX30_LVDS_FORMAT(val) HIWORD_UPDATE(val, 14, 13)
-#define PX30_LVDS_MODE_EN(val) HIWORD_UPDATE(val, 12, 12)
-#define PX30_LVDS_MSBSEL(val) HIWORD_UPDATE(val, 11, 11)
-#define PX30_LVDS_P2S_EN(val) HIWORD_UPDATE(val, 6, 6)
-#define PX30_LVDS_VOP_SEL(val) HIWORD_UPDATE(val, 1, 1)
+#define PX30_LVDS_FORMAT(val) FIELD_PREP_WM16(GENMASK(14, 13), (val))
+#define PX30_LVDS_MODE_EN(val) FIELD_PREP_WM16(BIT(12), (val))
+#define PX30_LVDS_MSBSEL(val) FIELD_PREP_WM16(BIT(11), (val))
+#define PX30_LVDS_P2S_EN(val) FIELD_PREP_WM16(BIT(6), (val))
+#define PX30_LVDS_VOP_SEL(val) FIELD_PREP_WM16(BIT(1), (val))
#endif /* _ROCKCHIP_LVDS_ */
diff --git a/drivers/gpu/drm/rockchip/rockchip_vop2_reg.c b/drivers/gpu/drm/rockchip/rockchip_vop2_reg.c
index 45c5e3987813..38c49030c7ab 100644
--- a/drivers/gpu/drm/rockchip/rockchip_vop2_reg.c
+++ b/drivers/gpu/drm/rockchip/rockchip_vop2_reg.c
@@ -7,6 +7,7 @@
#include <linux/bitfield.h>
#include <linux/kernel.h>
#include <linux/component.h>
+#include <linux/hw_bitfield.h>
#include <linux/mod_devicetable.h>
#include <linux/platform_device.h>
#include <linux/of.h>
@@ -1695,8 +1696,9 @@ static unsigned long rk3588_set_intf_mux(struct vop2_video_port *vp, int id, u32
die |= RK3588_SYS_DSP_INFACE_EN_HDMI0 |
FIELD_PREP(RK3588_SYS_DSP_INFACE_EN_EDP_HDMI0_MUX, vp->id);
val = rk3588_get_hdmi_pol(polflags);
- regmap_write(vop2->vop_grf, RK3588_GRF_VOP_CON2, HIWORD_UPDATE(1, 1, 1));
- regmap_write(vop2->vo1_grf, RK3588_GRF_VO1_CON0, HIWORD_UPDATE(val, 6, 5));
+ regmap_write(vop2->vop_grf, RK3588_GRF_VOP_CON2, FIELD_PREP_WM16(BIT(1), 1));
+ regmap_write(vop2->vo1_grf, RK3588_GRF_VO1_CON0,
+ FIELD_PREP_WM16(GENMASK(6, 5), val));
break;
case ROCKCHIP_VOP2_EP_HDMI1:
div &= ~RK3588_DSP_IF_EDP_HDMI1_DCLK_DIV;
@@ -1707,8 +1709,9 @@ static unsigned long rk3588_set_intf_mux(struct vop2_video_port *vp, int id, u32
die |= RK3588_SYS_DSP_INFACE_EN_HDMI1 |
FIELD_PREP(RK3588_SYS_DSP_INFACE_EN_EDP_HDMI1_MUX, vp->id);
val = rk3588_get_hdmi_pol(polflags);
- regmap_write(vop2->vop_grf, RK3588_GRF_VOP_CON2, HIWORD_UPDATE(1, 4, 4));
- regmap_write(vop2->vo1_grf, RK3588_GRF_VO1_CON0, HIWORD_UPDATE(val, 8, 7));
+ regmap_write(vop2->vop_grf, RK3588_GRF_VOP_CON2, FIELD_PREP_WM16(BIT(4), 1));
+ regmap_write(vop2->vo1_grf, RK3588_GRF_VO1_CON0,
+ FIELD_PREP_WM16(GENMASK(8, 7), val));
break;
case ROCKCHIP_VOP2_EP_EDP0:
div &= ~RK3588_DSP_IF_EDP_HDMI0_DCLK_DIV;
@@ -1718,7 +1721,7 @@ static unsigned long rk3588_set_intf_mux(struct vop2_video_port *vp, int id, u32
die &= ~RK3588_SYS_DSP_INFACE_EN_EDP_HDMI0_MUX;
die |= RK3588_SYS_DSP_INFACE_EN_EDP0 |
FIELD_PREP(RK3588_SYS_DSP_INFACE_EN_EDP_HDMI0_MUX, vp->id);
- regmap_write(vop2->vop_grf, RK3588_GRF_VOP_CON2, HIWORD_UPDATE(1, 0, 0));
+ regmap_write(vop2->vop_grf, RK3588_GRF_VOP_CON2, FIELD_PREP_WM16(BIT(0), 1));
break;
case ROCKCHIP_VOP2_EP_EDP1:
div &= ~RK3588_DSP_IF_EDP_HDMI1_DCLK_DIV;
@@ -1728,7 +1731,7 @@ static unsigned long rk3588_set_intf_mux(struct vop2_video_port *vp, int id, u32
die &= ~RK3588_SYS_DSP_INFACE_EN_EDP_HDMI1_MUX;
die |= RK3588_SYS_DSP_INFACE_EN_EDP1 |
FIELD_PREP(RK3588_SYS_DSP_INFACE_EN_EDP_HDMI1_MUX, vp->id);
- regmap_write(vop2->vop_grf, RK3588_GRF_VOP_CON2, HIWORD_UPDATE(1, 3, 3));
+ regmap_write(vop2->vop_grf, RK3588_GRF_VOP_CON2, FIELD_PREP_WM16(BIT(3), 1));
break;
case ROCKCHIP_VOP2_EP_MIPI0:
div &= ~RK3588_DSP_IF_MIPI0_PCLK_DIV;
diff --git a/drivers/gpu/drm/tiny/Kconfig b/drivers/gpu/drm/tiny/Kconfig
index 94a5bf61a115..7d9e85e932d7 100644
--- a/drivers/gpu/drm/tiny/Kconfig
+++ b/drivers/gpu/drm/tiny/Kconfig
@@ -86,7 +86,7 @@ config DRM_PIXPAPER
tristate "DRM support for PIXPAPER display panels"
depends on DRM && SPI
select DRM_CLIENT_SELECTION
- select DRM_GEM_DMA_HELPER
+ select DRM_GEM_SHMEM_HELPER
select DRM_KMS_HELPER
help
DRM driver for the Mayqueen Pixpaper e-ink display panel.
diff --git a/drivers/gpu/drm/tiny/pixpaper.c b/drivers/gpu/drm/tiny/pixpaper.c
index b1379cb5f030..32598fb2fee7 100644
--- a/drivers/gpu/drm/tiny/pixpaper.c
+++ b/drivers/gpu/drm/tiny/pixpaper.c
@@ -968,8 +968,8 @@ static const struct drm_crtc_funcs pixpaper_crtc_funcs = {
.atomic_destroy_state = drm_atomic_helper_crtc_destroy_state,
};
-static int pixpaper_mode_valid(struct drm_crtc *crtc,
- const struct drm_display_mode *mode)
+static enum drm_mode_status
+pixpaper_mode_valid(struct drm_crtc *crtc, const struct drm_display_mode *mode)
{
if (mode->hdisplay == PIXPAPER_WIDTH &&
mode->vdisplay == PIXPAPER_HEIGHT) {
diff --git a/drivers/gpu/drm/tyr/Kconfig b/drivers/gpu/drm/tyr/Kconfig
new file mode 100644
index 000000000000..4b55308fd2eb
--- /dev/null
+++ b/drivers/gpu/drm/tyr/Kconfig
@@ -0,0 +1,19 @@
+# SPDX-License-Identifier: GPL-2.0 or MIT
+
+config DRM_TYR
+ tristate "Tyr (Rust DRM support for ARM Mali CSF-based GPUs)"
+ depends on DRM=y
+ depends on RUST
+ depends on ARM || ARM64 || COMPILE_TEST
+ depends on !GENERIC_ATOMIC64 # for IOMMU_IO_PGTABLE_LPAE
+ default n
+ help
+ Rust DRM driver for ARM Mali CSF-based GPUs.
+
+ This driver is for Mali (or Immortalis) Valhall Gxxx GPUs.
+
+ Note that the Mali-G68 and Mali-G78, while Valhall architecture, will
+ be supported with the panfrost driver as they are not CSF GPUs.
+
+ if M is selected, the module will be called tyr. This driver is work
+ in progress and may not be functional.
diff --git a/drivers/gpu/drm/tyr/Makefile b/drivers/gpu/drm/tyr/Makefile
new file mode 100644
index 000000000000..ba545f65f2c0
--- /dev/null
+++ b/drivers/gpu/drm/tyr/Makefile
@@ -0,0 +1,3 @@
+# SPDX-License-Identifier: GPL-2.0 or MIT
+
+obj-$(CONFIG_DRM_TYR) += tyr.o
diff --git a/drivers/gpu/drm/tyr/driver.rs b/drivers/gpu/drm/tyr/driver.rs
new file mode 100644
index 000000000000..d5625dd1e41c
--- /dev/null
+++ b/drivers/gpu/drm/tyr/driver.rs
@@ -0,0 +1,205 @@
+// SPDX-License-Identifier: GPL-2.0 or MIT
+
+use kernel::c_str;
+use kernel::clk::Clk;
+use kernel::clk::OptionalClk;
+use kernel::device::Bound;
+use kernel::device::Core;
+use kernel::device::Device;
+use kernel::devres::Devres;
+use kernel::drm;
+use kernel::drm::ioctl;
+use kernel::new_mutex;
+use kernel::of;
+use kernel::platform;
+use kernel::prelude::*;
+use kernel::regulator;
+use kernel::regulator::Regulator;
+use kernel::sizes::SZ_2M;
+use kernel::sync::Arc;
+use kernel::sync::Mutex;
+use kernel::time;
+use kernel::types::ARef;
+
+use crate::file::File;
+use crate::gem::TyrObject;
+use crate::gpu;
+use crate::gpu::GpuInfo;
+use crate::regs;
+
+pub(crate) type IoMem = kernel::io::mem::IoMem<SZ_2M>;
+
+/// Convenience type alias for the DRM device type for this driver.
+pub(crate) type TyrDevice = drm::Device<TyrDriver>;
+
+#[pin_data(PinnedDrop)]
+pub(crate) struct TyrDriver {
+ device: ARef<TyrDevice>,
+}
+
+#[pin_data(PinnedDrop)]
+pub(crate) struct TyrData {
+ pub(crate) pdev: ARef<platform::Device>,
+
+ #[pin]
+ clks: Mutex<Clocks>,
+
+ #[pin]
+ regulators: Mutex<Regulators>,
+
+ /// Some information on the GPU.
+ ///
+ /// This is mainly queried by userspace, i.e.: Mesa.
+ pub(crate) gpu_info: GpuInfo,
+}
+
+// Both `Clk` and `Regulator` do not implement `Send` or `Sync`, but they
+// should. There are patches on the mailing list to address this, but they have
+// not landed yet.
+//
+// For now, add this workaround so that this patch compiles with the promise
+// that it will be removed in a future patch.
+//
+// SAFETY: This will be removed in a future patch.
+unsafe impl Send for TyrData {}
+// SAFETY: This will be removed in a future patch.
+unsafe impl Sync for TyrData {}
+
+fn issue_soft_reset(dev: &Device<Bound>, iomem: &Devres<IoMem>) -> Result {
+ regs::GPU_CMD.write(dev, iomem, regs::GPU_CMD_SOFT_RESET)?;
+
+ // TODO: We cannot poll, as there is no support in Rust currently, so we
+ // sleep. Change this when read_poll_timeout() is implemented in Rust.
+ kernel::time::delay::fsleep(time::Delta::from_millis(100));
+
+ if regs::GPU_IRQ_RAWSTAT.read(dev, iomem)? & regs::GPU_IRQ_RAWSTAT_RESET_COMPLETED == 0 {
+ dev_err!(dev, "GPU reset failed with errno\n");
+ dev_err!(
+ dev,
+ "GPU_INT_RAWSTAT is {}\n",
+ regs::GPU_IRQ_RAWSTAT.read(dev, iomem)?
+ );
+
+ return Err(EIO);
+ }
+
+ Ok(())
+}
+
+kernel::of_device_table!(
+ OF_TABLE,
+ MODULE_OF_TABLE,
+ <TyrDriver as platform::Driver>::IdInfo,
+ [
+ (of::DeviceId::new(c_str!("rockchip,rk3588-mali")), ()),
+ (of::DeviceId::new(c_str!("arm,mali-valhall-csf")), ())
+ ]
+);
+
+impl platform::Driver for TyrDriver {
+ type IdInfo = ();
+ const OF_ID_TABLE: Option<of::IdTable<Self::IdInfo>> = Some(&OF_TABLE);
+
+ fn probe(
+ pdev: &platform::Device<Core>,
+ _info: Option<&Self::IdInfo>,
+ ) -> Result<Pin<KBox<Self>>> {
+ let core_clk = Clk::get(pdev.as_ref(), Some(c_str!("core")))?;
+ let stacks_clk = OptionalClk::get(pdev.as_ref(), Some(c_str!("stacks")))?;
+ let coregroup_clk = OptionalClk::get(pdev.as_ref(), Some(c_str!("coregroup")))?;
+
+ core_clk.prepare_enable()?;
+ stacks_clk.prepare_enable()?;
+ coregroup_clk.prepare_enable()?;
+
+ let mali_regulator = Regulator::<regulator::Enabled>::get(pdev.as_ref(), c_str!("mali"))?;
+ let sram_regulator = Regulator::<regulator::Enabled>::get(pdev.as_ref(), c_str!("sram"))?;
+
+ let request = pdev.io_request_by_index(0).ok_or(ENODEV)?;
+ let iomem = Arc::pin_init(request.iomap_sized::<SZ_2M>(), GFP_KERNEL)?;
+
+ issue_soft_reset(pdev.as_ref(), &iomem)?;
+ gpu::l2_power_on(pdev.as_ref(), &iomem)?;
+
+ let gpu_info = GpuInfo::new(pdev.as_ref(), &iomem)?;
+ gpu_info.log(pdev);
+
+ let platform: ARef<platform::Device> = pdev.into();
+
+ let data = try_pin_init!(TyrData {
+ pdev: platform.clone(),
+ clks <- new_mutex!(Clocks {
+ core: core_clk,
+ stacks: stacks_clk,
+ coregroup: coregroup_clk,
+ }),
+ regulators <- new_mutex!(Regulators {
+ mali: mali_regulator,
+ sram: sram_regulator,
+ }),
+ gpu_info,
+ });
+
+ let tdev: ARef<TyrDevice> = drm::Device::new(pdev.as_ref(), data)?;
+ drm::driver::Registration::new_foreign_owned(&tdev, pdev.as_ref(), 0)?;
+
+ let driver = KBox::pin_init(try_pin_init!(TyrDriver { device: tdev }), GFP_KERNEL)?;
+
+ // We need this to be dev_info!() because dev_dbg!() does not work at
+ // all in Rust for now, and we need to see whether probe succeeded.
+ dev_info!(pdev.as_ref(), "Tyr initialized correctly.\n");
+ Ok(driver)
+ }
+}
+
+#[pinned_drop]
+impl PinnedDrop for TyrDriver {
+ fn drop(self: Pin<&mut Self>) {}
+}
+
+#[pinned_drop]
+impl PinnedDrop for TyrData {
+ fn drop(self: Pin<&mut Self>) {
+ // TODO: the type-state pattern for Clks will fix this.
+ let clks = self.clks.lock();
+ clks.core.disable_unprepare();
+ clks.stacks.disable_unprepare();
+ clks.coregroup.disable_unprepare();
+ }
+}
+
+// We need to retain the name "panthor" to achieve drop-in compatibility with
+// the C driver in the userspace stack.
+const INFO: drm::DriverInfo = drm::DriverInfo {
+ major: 1,
+ minor: 5,
+ patchlevel: 0,
+ name: c_str!("panthor"),
+ desc: c_str!("ARM Mali Tyr DRM driver"),
+};
+
+#[vtable]
+impl drm::Driver for TyrDriver {
+ type Data = TyrData;
+ type File = File;
+ type Object = drm::gem::Object<TyrObject>;
+
+ const INFO: drm::DriverInfo = INFO;
+
+ kernel::declare_drm_ioctls! {
+ (PANTHOR_DEV_QUERY, drm_panthor_dev_query, ioctl::RENDER_ALLOW, File::dev_query),
+ }
+}
+
+#[pin_data]
+struct Clocks {
+ core: Clk,
+ stacks: OptionalClk,
+ coregroup: OptionalClk,
+}
+
+#[pin_data]
+struct Regulators {
+ mali: Regulator<regulator::Enabled>,
+ sram: Regulator<regulator::Enabled>,
+}
diff --git a/drivers/gpu/drm/tyr/file.rs b/drivers/gpu/drm/tyr/file.rs
new file mode 100644
index 000000000000..0ef432947b73
--- /dev/null
+++ b/drivers/gpu/drm/tyr/file.rs
@@ -0,0 +1,56 @@
+// SPDX-License-Identifier: GPL-2.0 or MIT
+
+use kernel::drm;
+use kernel::prelude::*;
+use kernel::uaccess::UserSlice;
+use kernel::uapi;
+
+use crate::driver::TyrDevice;
+use crate::TyrDriver;
+
+#[pin_data]
+pub(crate) struct File {}
+
+/// Convenience type alias for our DRM `File` type
+pub(crate) type DrmFile = drm::file::File<File>;
+
+impl drm::file::DriverFile for File {
+ type Driver = TyrDriver;
+
+ fn open(_dev: &drm::Device<Self::Driver>) -> Result<Pin<KBox<Self>>> {
+ KBox::try_pin_init(try_pin_init!(Self {}), GFP_KERNEL)
+ }
+}
+
+impl File {
+ pub(crate) fn dev_query(
+ tdev: &TyrDevice,
+ devquery: &mut uapi::drm_panthor_dev_query,
+ _file: &DrmFile,
+ ) -> Result<u32> {
+ if devquery.pointer == 0 {
+ match devquery.type_ {
+ uapi::drm_panthor_dev_query_type_DRM_PANTHOR_DEV_QUERY_GPU_INFO => {
+ devquery.size = core::mem::size_of_val(&tdev.gpu_info) as u32;
+ Ok(0)
+ }
+ _ => Err(EINVAL),
+ }
+ } else {
+ match devquery.type_ {
+ uapi::drm_panthor_dev_query_type_DRM_PANTHOR_DEV_QUERY_GPU_INFO => {
+ let mut writer = UserSlice::new(
+ UserPtr::from_addr(devquery.pointer as usize),
+ devquery.size as usize,
+ )
+ .writer();
+
+ writer.write(&tdev.gpu_info)?;
+
+ Ok(0)
+ }
+ _ => Err(EINVAL),
+ }
+ }
+ }
+}
diff --git a/drivers/gpu/drm/tyr/gem.rs b/drivers/gpu/drm/tyr/gem.rs
new file mode 100644
index 000000000000..1273bf89dbd5
--- /dev/null
+++ b/drivers/gpu/drm/tyr/gem.rs
@@ -0,0 +1,18 @@
+// SPDX-License-Identifier: GPL-2.0 or MIT
+
+use crate::driver::TyrDevice;
+use crate::driver::TyrDriver;
+use kernel::drm::gem;
+use kernel::prelude::*;
+
+/// GEM Object inner driver data
+#[pin_data]
+pub(crate) struct TyrObject {}
+
+impl gem::DriverObject for TyrObject {
+ type Driver = TyrDriver;
+
+ fn new(_dev: &TyrDevice, _size: usize) -> impl PinInit<Self, Error> {
+ try_pin_init!(TyrObject {})
+ }
+}
diff --git a/drivers/gpu/drm/tyr/gpu.rs b/drivers/gpu/drm/tyr/gpu.rs
new file mode 100644
index 000000000000..6c582910dd5d
--- /dev/null
+++ b/drivers/gpu/drm/tyr/gpu.rs
@@ -0,0 +1,219 @@
+// SPDX-License-Identifier: GPL-2.0 or MIT
+
+use kernel::bits::genmask_u32;
+use kernel::device::Bound;
+use kernel::device::Device;
+use kernel::devres::Devres;
+use kernel::platform;
+use kernel::prelude::*;
+use kernel::time;
+use kernel::transmute::AsBytes;
+
+use crate::driver::IoMem;
+use crate::regs;
+
+/// Struct containing information that can be queried by userspace. This is read from
+/// the GPU's registers.
+///
+/// # Invariants
+///
+/// - The layout of this struct identical to the C `struct drm_panthor_gpu_info`.
+#[repr(C)]
+pub(crate) struct GpuInfo {
+ pub(crate) gpu_id: u32,
+ pub(crate) gpu_rev: u32,
+ pub(crate) csf_id: u32,
+ pub(crate) l2_features: u32,
+ pub(crate) tiler_features: u32,
+ pub(crate) mem_features: u32,
+ pub(crate) mmu_features: u32,
+ pub(crate) thread_features: u32,
+ pub(crate) max_threads: u32,
+ pub(crate) thread_max_workgroup_size: u32,
+ pub(crate) thread_max_barrier_size: u32,
+ pub(crate) coherency_features: u32,
+ pub(crate) texture_features: [u32; 4],
+ pub(crate) as_present: u32,
+ pub(crate) pad0: u32,
+ pub(crate) shader_present: u64,
+ pub(crate) l2_present: u64,
+ pub(crate) tiler_present: u64,
+ pub(crate) core_features: u32,
+ pub(crate) pad: u32,
+}
+
+impl GpuInfo {
+ pub(crate) fn new(dev: &Device<Bound>, iomem: &Devres<IoMem>) -> Result<Self> {
+ let gpu_id = regs::GPU_ID.read(dev, iomem)?;
+ let csf_id = regs::GPU_CSF_ID.read(dev, iomem)?;
+ let gpu_rev = regs::GPU_REVID.read(dev, iomem)?;
+ let core_features = regs::GPU_CORE_FEATURES.read(dev, iomem)?;
+ let l2_features = regs::GPU_L2_FEATURES.read(dev, iomem)?;
+ let tiler_features = regs::GPU_TILER_FEATURES.read(dev, iomem)?;
+ let mem_features = regs::GPU_MEM_FEATURES.read(dev, iomem)?;
+ let mmu_features = regs::GPU_MMU_FEATURES.read(dev, iomem)?;
+ let thread_features = regs::GPU_THREAD_FEATURES.read(dev, iomem)?;
+ let max_threads = regs::GPU_THREAD_MAX_THREADS.read(dev, iomem)?;
+ let thread_max_workgroup_size = regs::GPU_THREAD_MAX_WORKGROUP_SIZE.read(dev, iomem)?;
+ let thread_max_barrier_size = regs::GPU_THREAD_MAX_BARRIER_SIZE.read(dev, iomem)?;
+ let coherency_features = regs::GPU_COHERENCY_FEATURES.read(dev, iomem)?;
+
+ let texture_features = regs::GPU_TEXTURE_FEATURES0.read(dev, iomem)?;
+
+ let as_present = regs::GPU_AS_PRESENT.read(dev, iomem)?;
+
+ let shader_present = u64::from(regs::GPU_SHADER_PRESENT_LO.read(dev, iomem)?);
+ let shader_present =
+ shader_present | u64::from(regs::GPU_SHADER_PRESENT_HI.read(dev, iomem)?) << 32;
+
+ let tiler_present = u64::from(regs::GPU_TILER_PRESENT_LO.read(dev, iomem)?);
+ let tiler_present =
+ tiler_present | u64::from(regs::GPU_TILER_PRESENT_HI.read(dev, iomem)?) << 32;
+
+ let l2_present = u64::from(regs::GPU_L2_PRESENT_LO.read(dev, iomem)?);
+ let l2_present = l2_present | u64::from(regs::GPU_L2_PRESENT_HI.read(dev, iomem)?) << 32;
+
+ Ok(Self {
+ gpu_id,
+ gpu_rev,
+ csf_id,
+ l2_features,
+ tiler_features,
+ mem_features,
+ mmu_features,
+ thread_features,
+ max_threads,
+ thread_max_workgroup_size,
+ thread_max_barrier_size,
+ coherency_features,
+ // TODO: Add texture_features_{1,2,3}.
+ texture_features: [texture_features, 0, 0, 0],
+ as_present,
+ pad0: 0,
+ shader_present,
+ l2_present,
+ tiler_present,
+ core_features,
+ pad: 0,
+ })
+ }
+
+ pub(crate) fn log(&self, pdev: &platform::Device) {
+ let major = (self.gpu_id >> 16) & 0xff;
+ let minor = (self.gpu_id >> 8) & 0xff;
+ let status = self.gpu_id & 0xff;
+
+ let model_name = if let Some(model) = GPU_MODELS
+ .iter()
+ .find(|&f| f.major == major && f.minor == minor)
+ {
+ model.name
+ } else {
+ "unknown"
+ };
+
+ dev_info!(
+ pdev.as_ref(),
+ "mali-{} id 0x{:x} major 0x{:x} minor 0x{:x} status 0x{:x}",
+ model_name,
+ self.gpu_id >> 16,
+ major,
+ minor,
+ status
+ );
+
+ dev_info!(
+ pdev.as_ref(),
+ "Features: L2:{:#x} Tiler:{:#x} Mem:{:#x} MMU:{:#x} AS:{:#x}",
+ self.l2_features,
+ self.tiler_features,
+ self.mem_features,
+ self.mmu_features,
+ self.as_present
+ );
+
+ dev_info!(
+ pdev.as_ref(),
+ "shader_present=0x{:016x} l2_present=0x{:016x} tiler_present=0x{:016x}",
+ self.shader_present,
+ self.l2_present,
+ self.tiler_present
+ );
+ }
+
+ /// Returns the number of virtual address bits supported by the GPU.
+ #[expect(dead_code)]
+ pub(crate) fn va_bits(&self) -> u32 {
+ self.mmu_features & genmask_u32(0..=7)
+ }
+
+ /// Returns the number of physical address bits supported by the GPU.
+ #[expect(dead_code)]
+ pub(crate) fn pa_bits(&self) -> u32 {
+ (self.mmu_features >> 8) & genmask_u32(0..=7)
+ }
+}
+
+// SAFETY: `GpuInfo`'s invariant guarantees that it is the same type that is
+// already exposed to userspace by the C driver. This implies that it fulfills
+// the requirements for `AsBytes`.
+//
+// This means:
+//
+// - No implicit padding,
+// - No kernel pointers,
+// - No interior mutability.
+unsafe impl AsBytes for GpuInfo {}
+
+struct GpuModels {
+ name: &'static str,
+ major: u32,
+ minor: u32,
+}
+
+const GPU_MODELS: [GpuModels; 1] = [GpuModels {
+ name: "g610",
+ major: 10,
+ minor: 7,
+}];
+
+#[allow(dead_code)]
+pub(crate) struct GpuId {
+ pub(crate) arch_major: u32,
+ pub(crate) arch_minor: u32,
+ pub(crate) arch_rev: u32,
+ pub(crate) prod_major: u32,
+ pub(crate) ver_major: u32,
+ pub(crate) ver_minor: u32,
+ pub(crate) ver_status: u32,
+}
+
+impl From<u32> for GpuId {
+ fn from(value: u32) -> Self {
+ GpuId {
+ arch_major: (value & genmask_u32(28..=31)) >> 28,
+ arch_minor: (value & genmask_u32(24..=27)) >> 24,
+ arch_rev: (value & genmask_u32(20..=23)) >> 20,
+ prod_major: (value & genmask_u32(16..=19)) >> 16,
+ ver_major: (value & genmask_u32(12..=15)) >> 12,
+ ver_minor: (value & genmask_u32(4..=11)) >> 4,
+ ver_status: value & genmask_u32(0..=3),
+ }
+ }
+}
+
+/// Powers on the l2 block.
+pub(crate) fn l2_power_on(dev: &Device<Bound>, iomem: &Devres<IoMem>) -> Result {
+ regs::L2_PWRON_LO.write(dev, iomem, 1)?;
+
+ // TODO: We cannot poll, as there is no support in Rust currently, so we
+ // sleep. Change this when read_poll_timeout() is implemented in Rust.
+ kernel::time::delay::fsleep(time::Delta::from_millis(100));
+
+ if regs::L2_READY_LO.read(dev, iomem)? != 1 {
+ dev_err!(dev, "Failed to power on the GPU\n");
+ return Err(EIO);
+ }
+
+ Ok(())
+}
diff --git a/drivers/gpu/drm/tyr/regs.rs b/drivers/gpu/drm/tyr/regs.rs
new file mode 100644
index 000000000000..f46933aaa221
--- /dev/null
+++ b/drivers/gpu/drm/tyr/regs.rs
@@ -0,0 +1,108 @@
+// SPDX-License-Identifier: GPL-2.0 or MIT
+
+// We don't expect that all the registers and fields will be used, even in the
+// future.
+//
+// Nevertheless, it is useful to have most of them defined, like the C driver
+// does.
+#![allow(dead_code)]
+
+use kernel::bits::bit_u32;
+use kernel::device::Bound;
+use kernel::device::Device;
+use kernel::devres::Devres;
+use kernel::prelude::*;
+
+use crate::driver::IoMem;
+
+/// Represents a register in the Register Set
+///
+/// TODO: Replace this with the Nova `register!()` macro when it is available.
+/// In particular, this will automatically give us 64bit register reads and
+/// writes.
+pub(crate) struct Register<const OFFSET: usize>;
+
+impl<const OFFSET: usize> Register<OFFSET> {
+ #[inline]
+ pub(crate) fn read(&self, dev: &Device<Bound>, iomem: &Devres<IoMem>) -> Result<u32> {
+ let value = (*iomem).access(dev)?.read32(OFFSET);
+ Ok(value)
+ }
+
+ #[inline]
+ pub(crate) fn write(&self, dev: &Device<Bound>, iomem: &Devres<IoMem>, value: u32) -> Result {
+ (*iomem).access(dev)?.write32(value, OFFSET);
+ Ok(())
+ }
+}
+
+pub(crate) const GPU_ID: Register<0x0> = Register;
+pub(crate) const GPU_L2_FEATURES: Register<0x4> = Register;
+pub(crate) const GPU_CORE_FEATURES: Register<0x8> = Register;
+pub(crate) const GPU_CSF_ID: Register<0x1c> = Register;
+pub(crate) const GPU_REVID: Register<0x280> = Register;
+pub(crate) const GPU_TILER_FEATURES: Register<0xc> = Register;
+pub(crate) const GPU_MEM_FEATURES: Register<0x10> = Register;
+pub(crate) const GPU_MMU_FEATURES: Register<0x14> = Register;
+pub(crate) const GPU_AS_PRESENT: Register<0x18> = Register;
+pub(crate) const GPU_IRQ_RAWSTAT: Register<0x20> = Register;
+
+pub(crate) const GPU_IRQ_RAWSTAT_FAULT: u32 = bit_u32(0);
+pub(crate) const GPU_IRQ_RAWSTAT_PROTECTED_FAULT: u32 = bit_u32(1);
+pub(crate) const GPU_IRQ_RAWSTAT_RESET_COMPLETED: u32 = bit_u32(8);
+pub(crate) const GPU_IRQ_RAWSTAT_POWER_CHANGED_SINGLE: u32 = bit_u32(9);
+pub(crate) const GPU_IRQ_RAWSTAT_POWER_CHANGED_ALL: u32 = bit_u32(10);
+pub(crate) const GPU_IRQ_RAWSTAT_CLEAN_CACHES_COMPLETED: u32 = bit_u32(17);
+pub(crate) const GPU_IRQ_RAWSTAT_DOORBELL_STATUS: u32 = bit_u32(18);
+pub(crate) const GPU_IRQ_RAWSTAT_MCU_STATUS: u32 = bit_u32(19);
+
+pub(crate) const GPU_IRQ_CLEAR: Register<0x24> = Register;
+pub(crate) const GPU_IRQ_MASK: Register<0x28> = Register;
+pub(crate) const GPU_IRQ_STAT: Register<0x2c> = Register;
+pub(crate) const GPU_CMD: Register<0x30> = Register;
+pub(crate) const GPU_CMD_SOFT_RESET: u32 = 1 | (1 << 8);
+pub(crate) const GPU_CMD_HARD_RESET: u32 = 1 | (2 << 8);
+pub(crate) const GPU_THREAD_FEATURES: Register<0xac> = Register;
+pub(crate) const GPU_THREAD_MAX_THREADS: Register<0xa0> = Register;
+pub(crate) const GPU_THREAD_MAX_WORKGROUP_SIZE: Register<0xa4> = Register;
+pub(crate) const GPU_THREAD_MAX_BARRIER_SIZE: Register<0xa8> = Register;
+pub(crate) const GPU_TEXTURE_FEATURES0: Register<0xb0> = Register;
+pub(crate) const GPU_SHADER_PRESENT_LO: Register<0x100> = Register;
+pub(crate) const GPU_SHADER_PRESENT_HI: Register<0x104> = Register;
+pub(crate) const GPU_TILER_PRESENT_LO: Register<0x110> = Register;
+pub(crate) const GPU_TILER_PRESENT_HI: Register<0x114> = Register;
+pub(crate) const GPU_L2_PRESENT_LO: Register<0x120> = Register;
+pub(crate) const GPU_L2_PRESENT_HI: Register<0x124> = Register;
+pub(crate) const L2_READY_LO: Register<0x160> = Register;
+pub(crate) const L2_READY_HI: Register<0x164> = Register;
+pub(crate) const L2_PWRON_LO: Register<0x1a0> = Register;
+pub(crate) const L2_PWRON_HI: Register<0x1a4> = Register;
+pub(crate) const L2_PWRTRANS_LO: Register<0x220> = Register;
+pub(crate) const L2_PWRTRANS_HI: Register<0x204> = Register;
+pub(crate) const L2_PWRACTIVE_LO: Register<0x260> = Register;
+pub(crate) const L2_PWRACTIVE_HI: Register<0x264> = Register;
+
+pub(crate) const MCU_CONTROL: Register<0x700> = Register;
+pub(crate) const MCU_CONTROL_ENABLE: u32 = 1;
+pub(crate) const MCU_CONTROL_AUTO: u32 = 2;
+pub(crate) const MCU_CONTROL_DISABLE: u32 = 0;
+
+pub(crate) const MCU_STATUS: Register<0x704> = Register;
+pub(crate) const MCU_STATUS_DISABLED: u32 = 0;
+pub(crate) const MCU_STATUS_ENABLED: u32 = 1;
+pub(crate) const MCU_STATUS_HALT: u32 = 2;
+pub(crate) const MCU_STATUS_FATAL: u32 = 3;
+
+pub(crate) const GPU_COHERENCY_FEATURES: Register<0x300> = Register;
+
+pub(crate) const JOB_IRQ_RAWSTAT: Register<0x1000> = Register;
+pub(crate) const JOB_IRQ_CLEAR: Register<0x1004> = Register;
+pub(crate) const JOB_IRQ_MASK: Register<0x1008> = Register;
+pub(crate) const JOB_IRQ_STAT: Register<0x100c> = Register;
+
+pub(crate) const JOB_IRQ_GLOBAL_IF: u32 = bit_u32(31);
+
+pub(crate) const MMU_IRQ_RAWSTAT: Register<0x2000> = Register;
+pub(crate) const MMU_IRQ_CLEAR: Register<0x2004> = Register;
+pub(crate) const MMU_IRQ_MASK: Register<0x2008> = Register;
+pub(crate) const MMU_IRQ_STAT: Register<0x200c> = Register;
diff --git a/drivers/gpu/drm/tyr/tyr.rs b/drivers/gpu/drm/tyr/tyr.rs
new file mode 100644
index 000000000000..861d1db43072
--- /dev/null
+++ b/drivers/gpu/drm/tyr/tyr.rs
@@ -0,0 +1,22 @@
+// SPDX-License-Identifier: GPL-2.0 or MIT
+
+//! Arm Mali Tyr DRM driver.
+//!
+//! The name "Tyr" is inspired by Norse mythology, reflecting Arm's tradition of
+//! naming their GPUs after Nordic mythological figures and places.
+
+use crate::driver::TyrDriver;
+
+mod driver;
+mod file;
+mod gem;
+mod gpu;
+mod regs;
+
+kernel::module_platform_driver! {
+ type: TyrDriver,
+ name: "tyr",
+ authors: ["The Tyr driver authors"],
+ description: "Arm Mali Tyr DRM driver",
+ license: "Dual MIT/GPL",
+}
diff --git a/drivers/gpu/drm/v3d/v3d_drv.h b/drivers/gpu/drm/v3d/v3d_drv.h
index 0317f3d7452a..1884686985b8 100644
--- a/drivers/gpu/drm/v3d/v3d_drv.h
+++ b/drivers/gpu/drm/v3d/v3d_drv.h
@@ -62,6 +62,8 @@ struct v3d_queue_state {
/* Currently active job for this queue */
struct v3d_job *active_job;
spinlock_t queue_lock;
+ /* Protect dma fence for signalling job completion */
+ spinlock_t fence_lock;
};
/* Performance monitor object. The perform lifetime is controlled by userspace
diff --git a/drivers/gpu/drm/v3d/v3d_fence.c b/drivers/gpu/drm/v3d/v3d_fence.c
index 8f8471adae34..c82500a1df73 100644
--- a/drivers/gpu/drm/v3d/v3d_fence.c
+++ b/drivers/gpu/drm/v3d/v3d_fence.c
@@ -15,7 +15,7 @@ struct dma_fence *v3d_fence_create(struct v3d_dev *v3d, enum v3d_queue q)
fence->dev = &v3d->drm;
fence->queue = q;
fence->seqno = ++queue->emit_seqno;
- dma_fence_init(&fence->base, &v3d_fence_ops, &queue->queue_lock,
+ dma_fence_init(&fence->base, &v3d_fence_ops, &queue->fence_lock,
queue->fence_context, fence->seqno);
return &fence->base;
diff --git a/drivers/gpu/drm/v3d/v3d_gem.c b/drivers/gpu/drm/v3d/v3d_gem.c
index c77d90aa9b82..bb110d35f749 100644
--- a/drivers/gpu/drm/v3d/v3d_gem.c
+++ b/drivers/gpu/drm/v3d/v3d_gem.c
@@ -273,6 +273,7 @@ v3d_gem_init(struct drm_device *dev)
seqcount_init(&queue->stats.lock);
spin_lock_init(&queue->queue_lock);
+ spin_lock_init(&queue->fence_lock);
}
spin_lock_init(&v3d->mm_lock);
diff --git a/drivers/gpu/drm/v3d/v3d_gemfs.c b/drivers/gpu/drm/v3d/v3d_gemfs.c
index 8ec6ed82b3d9..c1a30166c099 100644
--- a/drivers/gpu/drm/v3d/v3d_gemfs.c
+++ b/drivers/gpu/drm/v3d/v3d_gemfs.c
@@ -7,11 +7,6 @@
#include "v3d_drv.h"
-static int add_param(struct fs_context *fc, const char *key, const char *val)
-{
- return vfs_parse_fs_string(fc, key, val, strlen(val));
-}
-
void v3d_gemfs_init(struct v3d_dev *v3d)
{
struct file_system_type *type;
@@ -38,9 +33,9 @@ void v3d_gemfs_init(struct v3d_dev *v3d)
fc = fs_context_for_mount(type, SB_KERNMOUNT);
if (IS_ERR(fc))
goto err;
- ret = add_param(fc, "source", "tmpfs");
+ ret = vfs_parse_fs_string(fc, "source", "tmpfs");
if (!ret)
- ret = add_param(fc, "huge", "within_size");
+ ret = vfs_parse_fs_string(fc, "huge", "within_size");
if (!ret)
gemfs = fc_mount_longterm(fc);
put_fs_context(fc);
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c
index 819704ac675d..d539f25b5fbe 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c
@@ -1497,6 +1497,7 @@ static int vmw_cmd_dma(struct vmw_private *dev_priv,
SVGA3dCmdHeader *header)
{
struct vmw_bo *vmw_bo = NULL;
+ struct vmw_resource *res;
struct vmw_surface *srf = NULL;
VMW_DECLARE_CMD_VAR(*cmd, SVGA3dCmdSurfaceDMA);
int ret;
@@ -1532,18 +1533,24 @@ static int vmw_cmd_dma(struct vmw_private *dev_priv,
dirty = (cmd->body.transfer == SVGA3D_WRITE_HOST_VRAM) ?
VMW_RES_DIRTY_SET : 0;
- ret = vmw_cmd_res_check(dev_priv, sw_context, vmw_res_surface,
- dirty, user_surface_converter,
- &cmd->body.host.sid, NULL);
+ ret = vmw_cmd_res_check(dev_priv, sw_context, vmw_res_surface, dirty,
+ user_surface_converter, &cmd->body.host.sid,
+ NULL);
if (unlikely(ret != 0)) {
if (unlikely(ret != -ERESTARTSYS))
VMW_DEBUG_USER("could not find surface for DMA.\n");
return ret;
}
- srf = vmw_res_to_srf(sw_context->res_cache[vmw_res_surface].res);
+ res = sw_context->res_cache[vmw_res_surface].res;
+ if (!res) {
+ VMW_DEBUG_USER("Invalid DMA surface.\n");
+ return -EINVAL;
+ }
- vmw_kms_cursor_snoop(srf, sw_context->fp->tfile, &vmw_bo->tbo, header);
+ srf = vmw_res_to_srf(res);
+ vmw_kms_cursor_snoop(srf, sw_context->fp->tfile, &vmw_bo->tbo,
+ header);
return 0;
}
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_validation.c b/drivers/gpu/drm/vmwgfx/vmwgfx_validation.c
index 7ee93e7191c7..35dc94c3db39 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_validation.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_validation.c
@@ -308,8 +308,10 @@ int vmw_validation_add_resource(struct vmw_validation_context *ctx,
hash_add_rcu(ctx->sw_context->res_ht, &node->hash.head, node->hash.key);
}
node->res = vmw_resource_reference_unless_doomed(res);
- if (!node->res)
+ if (!node->res) {
+ hash_del_rcu(&node->hash.head);
return -ESRCH;
+ }
node->first_usage = 1;
if (!res->dev_priv->has_mob) {
@@ -636,7 +638,7 @@ void vmw_validation_drop_ht(struct vmw_validation_context *ctx)
hash_del_rcu(&val->hash.head);
list_for_each_entry(val, &ctx->resource_ctx_list, head)
- hash_del_rcu(&entry->hash.head);
+ hash_del_rcu(&val->hash.head);
ctx->sw_context = NULL;
}
diff --git a/drivers/gpu/drm/xe/Kconfig b/drivers/gpu/drm/xe/Kconfig
index 714d5702dfd7..7219f6b884b6 100644
--- a/drivers/gpu/drm/xe/Kconfig
+++ b/drivers/gpu/drm/xe/Kconfig
@@ -40,12 +40,12 @@ config DRM_XE
select DRM_TTM
select DRM_TTM_HELPER
select DRM_EXEC
+ select DRM_GPUSVM if !UML && DEVICE_PRIVATE
select DRM_GPUVM
select DRM_SCHED
select MMU_NOTIFIER
select WANT_DEV_COREDUMP
select AUXILIARY_BUS
- select HMM_MIRROR
select REGMAP if I2C
help
Driver for Intel Xe2 series GPUs and later. Experimental support
diff --git a/drivers/gpu/drm/xe/Kconfig.debug b/drivers/gpu/drm/xe/Kconfig.debug
index 01735c6ece8b..87902b4bd6d3 100644
--- a/drivers/gpu/drm/xe/Kconfig.debug
+++ b/drivers/gpu/drm/xe/Kconfig.debug
@@ -104,6 +104,7 @@ config DRM_XE_DEBUG_GUC
config DRM_XE_USERPTR_INVAL_INJECT
bool "Inject userptr invalidation -EINVAL errors"
+ depends on DRM_GPUSVM
default n
help
Choose this option when debugging error paths that
diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile
index 987e4fe10538..d9c6cf0f189e 100644
--- a/drivers/gpu/drm/xe/Makefile
+++ b/drivers/gpu/drm/xe/Makefile
@@ -84,6 +84,7 @@ xe-y += xe_bb.o \
xe_hw_error.o \
xe_hw_fence.o \
xe_irq.o \
+ xe_late_bind_fw.o \
xe_lrc.o \
xe_migrate.o \
xe_mmio.o \
@@ -130,6 +131,7 @@ xe-y += xe_bb.o \
xe_tuning.o \
xe_uc.o \
xe_uc_fw.o \
+ xe_validation.o \
xe_vm.o \
xe_vm_madvise.o \
xe_vram.o \
@@ -140,8 +142,8 @@ xe-y += xe_bb.o \
xe_wopcm.o
xe-$(CONFIG_I2C) += xe_i2c.o
-xe-$(CONFIG_HMM_MIRROR) += xe_hmm.o
xe-$(CONFIG_DRM_XE_GPUSVM) += xe_svm.o
+xe-$(CONFIG_DRM_GPUSVM) += xe_userptr.o
# graphics hardware monitoring (HWMON) support
xe-$(CONFIG_HWMON) += xe_hwmon.o
@@ -210,6 +212,7 @@ xe-$(CONFIG_DRM_XE_DISPLAY) += \
display/xe_dsb_buffer.o \
display/xe_fb_pin.o \
display/xe_hdcp_gsc.o \
+ display/xe_panic.o \
display/xe_plane_initial.o \
display/xe_tdf.o
@@ -325,6 +328,7 @@ ifeq ($(CONFIG_DEBUG_FS),y)
xe_gt_stats.o \
xe_guc_debugfs.o \
xe_huc_debugfs.o \
+ xe_tile_debugfs.o \
xe_uc_debugfs.o
xe-$(CONFIG_PCI_IOV) += xe_gt_sriov_pf_debugfs.o
diff --git a/drivers/gpu/drm/xe/abi/guc_actions_abi.h b/drivers/gpu/drm/xe/abi/guc_actions_abi.h
index d8cf68a0516d..31090c69dfbe 100644
--- a/drivers/gpu/drm/xe/abi/guc_actions_abi.h
+++ b/drivers/gpu/drm/xe/abi/guc_actions_abi.h
@@ -117,6 +117,7 @@ enum xe_guc_action {
XE_GUC_ACTION_ENTER_S_STATE = 0x501,
XE_GUC_ACTION_EXIT_S_STATE = 0x502,
XE_GUC_ACTION_GLOBAL_SCHED_POLICY_CHANGE = 0x506,
+ XE_GUC_ACTION_UPDATE_SCHEDULING_POLICIES_KLV = 0x509,
XE_GUC_ACTION_SCHED_CONTEXT = 0x1000,
XE_GUC_ACTION_SCHED_CONTEXT_MODE_SET = 0x1001,
XE_GUC_ACTION_SCHED_CONTEXT_MODE_DONE = 0x1002,
@@ -154,6 +155,8 @@ enum xe_guc_action {
XE_GUC_ACTION_NOTIFY_FLUSH_LOG_BUFFER_TO_FILE = 0x8003,
XE_GUC_ACTION_NOTIFY_CRASH_DUMP_POSTED = 0x8004,
XE_GUC_ACTION_NOTIFY_EXCEPTION = 0x8005,
+ XE_GUC_ACTION_TEST_G2G_SEND = 0xF001,
+ XE_GUC_ACTION_TEST_G2G_RECV = 0xF002,
XE_GUC_ACTION_LIMIT
};
diff --git a/drivers/gpu/drm/xe/abi/guc_actions_slpc_abi.h b/drivers/gpu/drm/xe/abi/guc_actions_slpc_abi.h
index b28c8fa061f7..ce5c59517528 100644
--- a/drivers/gpu/drm/xe/abi/guc_actions_slpc_abi.h
+++ b/drivers/gpu/drm/xe/abi/guc_actions_slpc_abi.h
@@ -210,6 +210,11 @@ struct slpc_shared_data {
u8 reserved_mode_definition[4096];
} __packed;
+enum slpc_power_profile {
+ SLPC_POWER_PROFILE_BASE = 0x0,
+ SLPC_POWER_PROFILE_POWER_SAVING = 0x1
+};
+
/**
* DOC: SLPC H2G MESSAGE FORMAT
*
diff --git a/drivers/gpu/drm/xe/abi/guc_klvs_abi.h b/drivers/gpu/drm/xe/abi/guc_klvs_abi.h
index 0e78351c6ef5..265a135e7061 100644
--- a/drivers/gpu/drm/xe/abi/guc_klvs_abi.h
+++ b/drivers/gpu/drm/xe/abi/guc_klvs_abi.h
@@ -17,6 +17,7 @@
* | 0 | 31:16 | **KEY** - KLV key identifier |
* | | | - `GuC Self Config KLVs`_ |
* | | | - `GuC Opt In Feature KLVs`_ |
+ * | | | - `GuC Scheduling Policies KLVs`_ |
* | | | - `GuC VGT Policy KLVs`_ |
* | | | - `GuC VF Configuration KLVs`_ |
* | | | |
@@ -153,6 +154,30 @@ enum {
#define GUC_KLV_OPT_IN_FEATURE_DYNAMIC_INHIBIT_CONTEXT_SWITCH_LEN 0u
/**
+ * DOC: GuC Scheduling Policies KLVs
+ *
+ * `GuC KLV`_ keys available for use with UPDATE_SCHEDULING_POLICIES_KLV.
+ *
+ * _`GUC_KLV_SCHEDULING_POLICIES_RENDER_COMPUTE_YIELD` : 0x1001
+ * Some platforms do not allow concurrent execution of RCS and CCS
+ * workloads from different address spaces. By default, the GuC prioritizes
+ * RCS submissions over CCS ones, which can lead to CCS workloads being
+ * significantly (or completely) starved of execution time. This KLV allows
+ * the driver to specify a quantum (in ms) and a ratio (percentage value
+ * between 0 and 100), and the GuC will prioritize the CCS for that
+ * percentage of each quantum. For example, specifying 100ms and 30% will
+ * make the GuC prioritize the CCS for 30ms of every 100ms.
+ * Note that this does not necessarly mean that RCS and CCS engines will
+ * only be active for their percentage of the quantum, as the restriction
+ * only kicks in if both classes are fully busy with non-compatible address
+ * spaces; i.e., if one engine is idle or running the same address space,
+ * a pending job on the other engine will still be submitted to the HW no
+ * matter what the ratio is
+ */
+#define GUC_KLV_SCHEDULING_POLICIES_RENDER_COMPUTE_YIELD_KEY 0x1001
+#define GUC_KLV_SCHEDULING_POLICIES_RENDER_COMPUTE_YIELD_LEN 2u
+
+/**
* DOC: GuC VGT Policy KLVs
*
* `GuC KLV`_ keys available for use with PF2GUC_UPDATE_VGT_POLICY.
diff --git a/drivers/gpu/drm/xe/compat-i915-headers/gem/i915_gem_stolen.h b/drivers/gpu/drm/xe/compat-i915-headers/gem/i915_gem_stolen.h
index 41d39d67817a..f097fc6d5127 100644
--- a/drivers/gpu/drm/xe/compat-i915-headers/gem/i915_gem_stolen.h
+++ b/drivers/gpu/drm/xe/compat-i915-headers/gem/i915_gem_stolen.h
@@ -8,6 +8,7 @@
#include "xe_ttm_stolen_mgr.h"
#include "xe_res_cursor.h"
+#include "xe_validation.h"
struct xe_bo;
@@ -21,7 +22,7 @@ static inline int i915_gem_stolen_insert_node_in_range(struct xe_device *xe,
u32 start, u32 end)
{
struct xe_bo *bo;
- int err;
+ int err = 0;
u32 flags = XE_BO_FLAG_PINNED | XE_BO_FLAG_STOLEN;
if (start < SZ_4K)
@@ -32,21 +33,13 @@ static inline int i915_gem_stolen_insert_node_in_range(struct xe_device *xe,
start = ALIGN(start, align);
}
- bo = xe_bo_create_locked_range(xe, xe_device_get_root_tile(xe),
- NULL, size, start, end,
- ttm_bo_type_kernel, flags, 0);
+ bo = xe_bo_create_pin_range_novm(xe, xe_device_get_root_tile(xe),
+ size, start, end, ttm_bo_type_kernel, flags);
if (IS_ERR(bo)) {
err = PTR_ERR(bo);
bo = NULL;
return err;
}
- err = xe_bo_pin(bo);
- xe_bo_unlock_vm_held(bo);
-
- if (err) {
- xe_bo_put(fb->bo);
- bo = NULL;
- }
fb->bo = bo;
diff --git a/drivers/gpu/drm/xe/display/intel_bo.c b/drivers/gpu/drm/xe/display/intel_bo.c
index 910632f57c3d..27437c22bd70 100644
--- a/drivers/gpu/drm/xe/display/intel_bo.c
+++ b/drivers/gpu/drm/xe/display/intel_bo.c
@@ -1,12 +1,7 @@
// SPDX-License-Identifier: MIT
/* Copyright © 2024 Intel Corporation */
-#include <drm/drm_cache.h>
#include <drm/drm_gem.h>
-#include <drm/drm_panic.h>
-
-#include "intel_fb.h"
-#include "intel_display_types.h"
#include "xe_bo.h"
#include "intel_bo.h"
@@ -64,89 +59,3 @@ void intel_bo_describe(struct seq_file *m, struct drm_gem_object *obj)
{
/* FIXME */
}
-
-struct xe_panic_data {
- struct page **pages;
- int page;
- void *vaddr;
-};
-
-struct xe_framebuffer {
- struct intel_framebuffer base;
- struct xe_panic_data panic;
-};
-
-static inline struct xe_panic_data *to_xe_panic_data(struct intel_framebuffer *fb)
-{
- return &container_of_const(fb, struct xe_framebuffer, base)->panic;
-}
-
-static void xe_panic_kunmap(struct xe_panic_data *panic)
-{
- if (panic->vaddr) {
- drm_clflush_virt_range(panic->vaddr, PAGE_SIZE);
- kunmap_local(panic->vaddr);
- panic->vaddr = NULL;
- }
-}
-
-/*
- * The scanout buffer pages are not mapped, so for each pixel,
- * use kmap_local_page_try_from_panic() to map the page, and write the pixel.
- * Try to keep the map from the previous pixel, to avoid too much map/unmap.
- */
-static void xe_panic_page_set_pixel(struct drm_scanout_buffer *sb, unsigned int x,
- unsigned int y, u32 color)
-{
- struct intel_framebuffer *fb = (struct intel_framebuffer *)sb->private;
- struct xe_panic_data *panic = to_xe_panic_data(fb);
- struct xe_bo *bo = gem_to_xe_bo(intel_fb_bo(&fb->base));
- unsigned int new_page;
- unsigned int offset;
-
- if (fb->panic_tiling)
- offset = fb->panic_tiling(sb->width, x, y);
- else
- offset = y * sb->pitch[0] + x * sb->format->cpp[0];
-
- new_page = offset >> PAGE_SHIFT;
- offset = offset % PAGE_SIZE;
- if (new_page != panic->page) {
- xe_panic_kunmap(panic);
- panic->page = new_page;
- panic->vaddr = ttm_bo_kmap_try_from_panic(&bo->ttm,
- panic->page);
- }
- if (panic->vaddr) {
- u32 *pix = panic->vaddr + offset;
- *pix = color;
- }
-}
-
-struct intel_framebuffer *intel_bo_alloc_framebuffer(void)
-{
- struct xe_framebuffer *xe_fb;
-
- xe_fb = kzalloc(sizeof(*xe_fb), GFP_KERNEL);
- if (xe_fb)
- return &xe_fb->base;
- return NULL;
-}
-
-int intel_bo_panic_setup(struct drm_scanout_buffer *sb)
-{
- struct intel_framebuffer *fb = (struct intel_framebuffer *)sb->private;
- struct xe_panic_data *panic = to_xe_panic_data(fb);
-
- panic->page = -1;
- sb->set_pixel = xe_panic_page_set_pixel;
- return 0;
-}
-
-void intel_bo_panic_finish(struct intel_framebuffer *fb)
-{
- struct xe_panic_data *panic = to_xe_panic_data(fb);
-
- xe_panic_kunmap(panic);
- panic->page = -1;
-}
diff --git a/drivers/gpu/drm/xe/display/intel_fbdev_fb.c b/drivers/gpu/drm/xe/display/intel_fbdev_fb.c
index d96ba2b51065..8ea9a472113c 100644
--- a/drivers/gpu/drm/xe/display/intel_fbdev_fb.c
+++ b/drivers/gpu/drm/xe/display/intel_fbdev_fb.c
@@ -42,11 +42,11 @@ struct intel_framebuffer *intel_fbdev_fb_alloc(struct drm_fb_helper *helper,
obj = ERR_PTR(-ENODEV);
if (!IS_DGFX(xe) && !XE_GT_WA(xe_root_mmio_gt(xe), 22019338487_display)) {
- obj = xe_bo_create_pin_map(xe, xe_device_get_root_tile(xe),
- NULL, size,
- ttm_bo_type_kernel, XE_BO_FLAG_SCANOUT |
- XE_BO_FLAG_STOLEN |
- XE_BO_FLAG_GGTT);
+ obj = xe_bo_create_pin_map_novm(xe, xe_device_get_root_tile(xe),
+ size,
+ ttm_bo_type_kernel, XE_BO_FLAG_SCANOUT |
+ XE_BO_FLAG_STOLEN |
+ XE_BO_FLAG_GGTT, false);
if (!IS_ERR(obj))
drm_info(&xe->drm, "Allocated fbdev into stolen\n");
else
@@ -54,10 +54,10 @@ struct intel_framebuffer *intel_fbdev_fb_alloc(struct drm_fb_helper *helper,
}
if (IS_ERR(obj)) {
- obj = xe_bo_create_pin_map(xe, xe_device_get_root_tile(xe), NULL, size,
- ttm_bo_type_kernel, XE_BO_FLAG_SCANOUT |
- XE_BO_FLAG_VRAM_IF_DGFX(xe_device_get_root_tile(xe)) |
- XE_BO_FLAG_GGTT);
+ obj = xe_bo_create_pin_map_novm(xe, xe_device_get_root_tile(xe), size,
+ ttm_bo_type_kernel, XE_BO_FLAG_SCANOUT |
+ XE_BO_FLAG_VRAM_IF_DGFX(xe_device_get_root_tile(xe)) |
+ XE_BO_FLAG_GGTT, false);
}
if (IS_ERR(obj)) {
diff --git a/drivers/gpu/drm/xe/display/xe_display.c b/drivers/gpu/drm/xe/display/xe_display.c
index 8b68d70db6c8..19e691fccf8c 100644
--- a/drivers/gpu/drm/xe/display/xe_display.c
+++ b/drivers/gpu/drm/xe/display/xe_display.c
@@ -20,7 +20,7 @@
#include "intel_audio.h"
#include "intel_bw.h"
#include "intel_display.h"
-#include "intel_display_core.h"
+#include "intel_display_device.h"
#include "intel_display_driver.h"
#include "intel_display_irq.h"
#include "intel_display_types.h"
@@ -37,13 +37,6 @@
/* Xe device functions */
-static bool has_display(struct xe_device *xe)
-{
- struct intel_display *display = xe->display;
-
- return HAS_DISPLAY(display);
-}
-
/**
* xe_display_driver_probe_defer - Detect if we need to wait for other drivers
* early on
@@ -290,7 +283,7 @@ static void xe_display_enable_d3cold(struct xe_device *xe)
intel_dmc_suspend(display);
- if (has_display(xe))
+ if (intel_display_device_present(display))
intel_hpd_poll_enable(display);
}
@@ -303,14 +296,14 @@ static void xe_display_disable_d3cold(struct xe_device *xe)
intel_dmc_resume(display);
- if (has_display(xe))
+ if (intel_display_device_present(display))
drm_mode_config_reset(&xe->drm);
intel_display_driver_init_hw(display);
intel_hpd_init(display);
- if (has_display(xe))
+ if (intel_display_device_present(display))
intel_hpd_poll_disable(display);
intel_opregion_resume(display);
@@ -333,7 +326,7 @@ void xe_display_pm_suspend(struct xe_device *xe)
intel_power_domains_disable(display);
drm_client_dev_suspend(&xe->drm, false);
- if (has_display(xe)) {
+ if (intel_display_device_present(display)) {
drm_kms_helper_poll_disable(&xe->drm);
intel_display_driver_disable_user_access(display);
intel_display_driver_suspend(display);
@@ -345,7 +338,7 @@ void xe_display_pm_suspend(struct xe_device *xe)
intel_hpd_cancel_work(display);
- if (has_display(xe)) {
+ if (intel_display_device_present(display)) {
intel_display_driver_suspend_access(display);
intel_encoder_suspend_all(display);
}
@@ -365,7 +358,7 @@ void xe_display_pm_shutdown(struct xe_device *xe)
intel_power_domains_disable(display);
drm_client_dev_suspend(&xe->drm, false);
- if (has_display(xe)) {
+ if (intel_display_device_present(display)) {
drm_kms_helper_poll_disable(&xe->drm);
intel_display_driver_disable_user_access(display);
intel_display_driver_suspend(display);
@@ -376,7 +369,7 @@ void xe_display_pm_shutdown(struct xe_device *xe)
intel_encoder_block_all_hpds(display);
intel_hpd_cancel_work(display);
- if (has_display(xe))
+ if (intel_display_device_present(display))
intel_display_driver_suspend_access(display);
intel_encoder_suspend_all(display);
@@ -465,25 +458,25 @@ void xe_display_pm_resume(struct xe_device *xe)
intel_dmc_resume(display);
- if (has_display(xe))
+ if (intel_display_device_present(display))
drm_mode_config_reset(&xe->drm);
intel_display_driver_init_hw(display);
- if (has_display(xe))
+ if (intel_display_device_present(display))
intel_display_driver_resume_access(display);
intel_hpd_init(display);
intel_encoder_unblock_all_hpds(display);
- if (has_display(xe)) {
+ if (intel_display_device_present(display)) {
intel_display_driver_resume(display);
drm_kms_helper_poll_enable(&xe->drm);
intel_display_driver_enable_user_access(display);
}
- if (has_display(xe))
+ if (intel_display_device_present(display))
intel_hpd_poll_disable(display);
intel_opregion_resume(display);
@@ -548,7 +541,7 @@ int xe_display_probe(struct xe_device *xe)
xe->display = display;
- if (has_display(xe))
+ if (intel_display_device_present(display))
return 0;
no_display:
diff --git a/drivers/gpu/drm/xe/display/xe_dsb_buffer.c b/drivers/gpu/drm/xe/display/xe_dsb_buffer.c
index 9f941fc2e36b..58581d7aaae6 100644
--- a/drivers/gpu/drm/xe/display/xe_dsb_buffer.c
+++ b/drivers/gpu/drm/xe/display/xe_dsb_buffer.c
@@ -43,11 +43,11 @@ bool intel_dsb_buffer_create(struct intel_crtc *crtc, struct intel_dsb_buffer *d
return false;
/* Set scanout flag for WC mapping */
- obj = xe_bo_create_pin_map(xe, xe_device_get_root_tile(xe),
- NULL, PAGE_ALIGN(size),
- ttm_bo_type_kernel,
- XE_BO_FLAG_VRAM_IF_DGFX(xe_device_get_root_tile(xe)) |
- XE_BO_FLAG_SCANOUT | XE_BO_FLAG_GGTT);
+ obj = xe_bo_create_pin_map_novm(xe, xe_device_get_root_tile(xe),
+ PAGE_ALIGN(size),
+ ttm_bo_type_kernel,
+ XE_BO_FLAG_VRAM_IF_DGFX(xe_device_get_root_tile(xe)) |
+ XE_BO_FLAG_SCANOUT | XE_BO_FLAG_GGTT, false);
if (IS_ERR(obj)) {
kfree(vma);
return false;
diff --git a/drivers/gpu/drm/xe/display/xe_fb_pin.c b/drivers/gpu/drm/xe/display/xe_fb_pin.c
index f1f8b5ab53ef..1fd4a815e784 100644
--- a/drivers/gpu/drm/xe/display/xe_fb_pin.c
+++ b/drivers/gpu/drm/xe/display/xe_fb_pin.c
@@ -102,29 +102,29 @@ static int __xe_pin_fb_vma_dpt(const struct intel_framebuffer *fb,
XE_PAGE_SIZE);
if (IS_DGFX(xe))
- dpt = xe_bo_create_pin_map_at_aligned(xe, tile0, NULL,
- dpt_size, ~0ull,
- ttm_bo_type_kernel,
- XE_BO_FLAG_VRAM0 |
- XE_BO_FLAG_GGTT |
- XE_BO_FLAG_PAGETABLE,
- alignment);
+ dpt = xe_bo_create_pin_map_at_novm(xe, tile0,
+ dpt_size, ~0ull,
+ ttm_bo_type_kernel,
+ XE_BO_FLAG_VRAM0 |
+ XE_BO_FLAG_GGTT |
+ XE_BO_FLAG_PAGETABLE,
+ alignment, false);
else
- dpt = xe_bo_create_pin_map_at_aligned(xe, tile0, NULL,
- dpt_size, ~0ull,
- ttm_bo_type_kernel,
- XE_BO_FLAG_STOLEN |
- XE_BO_FLAG_GGTT |
- XE_BO_FLAG_PAGETABLE,
- alignment);
+ dpt = xe_bo_create_pin_map_at_novm(xe, tile0,
+ dpt_size, ~0ull,
+ ttm_bo_type_kernel,
+ XE_BO_FLAG_STOLEN |
+ XE_BO_FLAG_GGTT |
+ XE_BO_FLAG_PAGETABLE,
+ alignment, false);
if (IS_ERR(dpt))
- dpt = xe_bo_create_pin_map_at_aligned(xe, tile0, NULL,
- dpt_size, ~0ull,
- ttm_bo_type_kernel,
- XE_BO_FLAG_SYSTEM |
- XE_BO_FLAG_GGTT |
- XE_BO_FLAG_PAGETABLE,
- alignment);
+ dpt = xe_bo_create_pin_map_at_novm(xe, tile0,
+ dpt_size, ~0ull,
+ ttm_bo_type_kernel,
+ XE_BO_FLAG_SYSTEM |
+ XE_BO_FLAG_GGTT |
+ XE_BO_FLAG_PAGETABLE,
+ alignment, false);
if (IS_ERR(dpt))
return PTR_ERR(dpt);
@@ -281,7 +281,9 @@ static struct i915_vma *__xe_pin_fb_vma(const struct intel_framebuffer *fb,
struct i915_vma *vma = kzalloc(sizeof(*vma), GFP_KERNEL);
struct drm_gem_object *obj = intel_fb_bo(&fb->base);
struct xe_bo *bo = gem_to_xe_bo(obj);
- int ret;
+ struct xe_validation_ctx ctx;
+ struct drm_exec exec;
+ int ret = 0;
if (!vma)
return ERR_PTR(-ENODEV);
@@ -308,17 +310,22 @@ static struct i915_vma *__xe_pin_fb_vma(const struct intel_framebuffer *fb,
* Pin the framebuffer, we can't use xe_bo_(un)pin functions as the
* assumptions are incorrect for framebuffers
*/
- ret = ttm_bo_reserve(&bo->ttm, false, false, NULL);
- if (ret)
- goto err;
-
- if (IS_DGFX(xe))
- ret = xe_bo_migrate(bo, XE_PL_VRAM0);
- else
- ret = xe_bo_validate(bo, NULL, true);
- if (!ret)
- ttm_bo_pin(&bo->ttm);
- ttm_bo_unreserve(&bo->ttm);
+ xe_validation_guard(&ctx, &xe->val, &exec, (struct xe_val_flags) {.interruptible = true},
+ ret) {
+ ret = drm_exec_lock_obj(&exec, &bo->ttm.base);
+ drm_exec_retry_on_contention(&exec);
+ if (ret)
+ break;
+
+ if (IS_DGFX(xe))
+ ret = xe_bo_migrate(bo, XE_PL_VRAM0, NULL, &exec);
+ else
+ ret = xe_bo_validate(bo, NULL, true, &exec);
+ drm_exec_retry_on_contention(&exec);
+ xe_validation_retry_on_oom(&ctx, &ret);
+ if (!ret)
+ ttm_bo_pin(&bo->ttm);
+ }
if (ret)
goto err;
diff --git a/drivers/gpu/drm/xe/display/xe_hdcp_gsc.c b/drivers/gpu/drm/xe/display/xe_hdcp_gsc.c
index 30f1073141fc..4ae847b628e2 100644
--- a/drivers/gpu/drm/xe/display/xe_hdcp_gsc.c
+++ b/drivers/gpu/drm/xe/display/xe_hdcp_gsc.c
@@ -72,10 +72,10 @@ static int intel_hdcp_gsc_initialize_message(struct xe_device *xe,
int ret = 0;
/* allocate object of two page for HDCP command memory and store it */
- bo = xe_bo_create_pin_map(xe, xe_device_get_root_tile(xe), NULL, PAGE_SIZE * 2,
- ttm_bo_type_kernel,
- XE_BO_FLAG_SYSTEM |
- XE_BO_FLAG_GGTT);
+ bo = xe_bo_create_pin_map_novm(xe, xe_device_get_root_tile(xe), PAGE_SIZE * 2,
+ ttm_bo_type_kernel,
+ XE_BO_FLAG_SYSTEM |
+ XE_BO_FLAG_GGTT, false);
if (IS_ERR(bo)) {
drm_err(&xe->drm, "Failed to allocate bo for HDCP streaming command!\n");
diff --git a/drivers/gpu/drm/xe/display/xe_panic.c b/drivers/gpu/drm/xe/display/xe_panic.c
new file mode 100644
index 000000000000..f32b23338331
--- /dev/null
+++ b/drivers/gpu/drm/xe/display/xe_panic.c
@@ -0,0 +1,80 @@
+// SPDX-License-Identifier: MIT
+/* Copyright © 2025 Intel Corporation */
+
+#include <drm/drm_cache.h>
+#include <drm/drm_panic.h>
+
+#include "intel_display_types.h"
+#include "intel_fb.h"
+#include "intel_panic.h"
+#include "xe_bo.h"
+
+struct intel_panic {
+ struct page **pages;
+ int page;
+ void *vaddr;
+};
+
+static void xe_panic_kunmap(struct intel_panic *panic)
+{
+ if (panic->vaddr) {
+ drm_clflush_virt_range(panic->vaddr, PAGE_SIZE);
+ kunmap_local(panic->vaddr);
+ panic->vaddr = NULL;
+ }
+}
+
+/*
+ * The scanout buffer pages are not mapped, so for each pixel,
+ * use kmap_local_page_try_from_panic() to map the page, and write the pixel.
+ * Try to keep the map from the previous pixel, to avoid too much map/unmap.
+ */
+static void xe_panic_page_set_pixel(struct drm_scanout_buffer *sb, unsigned int x,
+ unsigned int y, u32 color)
+{
+ struct intel_framebuffer *fb = (struct intel_framebuffer *)sb->private;
+ struct intel_panic *panic = fb->panic;
+ struct xe_bo *bo = gem_to_xe_bo(intel_fb_bo(&fb->base));
+ unsigned int new_page;
+ unsigned int offset;
+
+ if (fb->panic_tiling)
+ offset = fb->panic_tiling(sb->width, x, y);
+ else
+ offset = y * sb->pitch[0] + x * sb->format->cpp[0];
+
+ new_page = offset >> PAGE_SHIFT;
+ offset = offset % PAGE_SIZE;
+ if (new_page != panic->page) {
+ xe_panic_kunmap(panic);
+ panic->page = new_page;
+ panic->vaddr = ttm_bo_kmap_try_from_panic(&bo->ttm,
+ panic->page);
+ }
+ if (panic->vaddr) {
+ u32 *pix = panic->vaddr + offset;
+ *pix = color;
+ }
+}
+
+struct intel_panic *intel_panic_alloc(void)
+{
+ struct intel_panic *panic;
+
+ panic = kzalloc(sizeof(*panic), GFP_KERNEL);
+
+ return panic;
+}
+
+int intel_panic_setup(struct intel_panic *panic, struct drm_scanout_buffer *sb)
+{
+ panic->page = -1;
+ sb->set_pixel = xe_panic_page_set_pixel;
+ return 0;
+}
+
+void intel_panic_finish(struct intel_panic *panic)
+{
+ xe_panic_kunmap(panic);
+ panic->page = -1;
+}
diff --git a/drivers/gpu/drm/xe/display/xe_plane_initial.c b/drivers/gpu/drm/xe/display/xe_plane_initial.c
index 826ac3d578b7..94f00def811b 100644
--- a/drivers/gpu/drm/xe/display/xe_plane_initial.c
+++ b/drivers/gpu/drm/xe/display/xe_plane_initial.c
@@ -140,8 +140,8 @@ initial_plane_bo(struct xe_device *xe,
page_size);
size -= base;
- bo = xe_bo_create_pin_map_at(xe, tile0, NULL, size, phys_base,
- ttm_bo_type_kernel, flags);
+ bo = xe_bo_create_pin_map_at_novm(xe, tile0, size, phys_base,
+ ttm_bo_type_kernel, flags, 0, false);
if (IS_ERR(bo)) {
drm_dbg(&xe->drm,
"Failed to create bo phys_base=%pa size %u with flags %x: %li\n",
diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h
index f96b2e2b3064..06cb6b02ec64 100644
--- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h
+++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h
@@ -522,6 +522,7 @@
#define TDL_CHICKEN XE_REG_MCR(0xe5f4, XE_REG_OPTION_MASKED)
#define QID_WAIT_FOR_THREAD_NOT_RUN_DISABLE REG_BIT(12)
+#define EUSTALL_PERF_SAMPLING_DISABLE REG_BIT(5)
#define LSC_CHICKEN_BIT_0 XE_REG_MCR(0xe7c8)
#define DISABLE_D8_D16_COASLESCE REG_BIT(30)
diff --git a/drivers/gpu/drm/xe/regs/xe_lrc_layout.h b/drivers/gpu/drm/xe/regs/xe_lrc_layout.h
index 1b101edb838b..b5eff383902c 100644
--- a/drivers/gpu/drm/xe/regs/xe_lrc_layout.h
+++ b/drivers/gpu/drm/xe/regs/xe_lrc_layout.h
@@ -40,7 +40,4 @@
#define INDIRECT_CTX_RING_START_UDW (0x08 + 1)
#define INDIRECT_CTX_RING_CTL (0x0a + 1)
-#define CTX_INDIRECT_CTX_OFFSET_MASK REG_GENMASK(15, 6)
-#define CTX_INDIRECT_CTX_OFFSET_DEFAULT REG_FIELD_PREP(CTX_INDIRECT_CTX_OFFSET_MASK, 0xd)
-
#endif
diff --git a/drivers/gpu/drm/xe/tests/xe_bo.c b/drivers/gpu/drm/xe/tests/xe_bo.c
index 7b40cc8be1c9..2294cf89f3e1 100644
--- a/drivers/gpu/drm/xe/tests/xe_bo.c
+++ b/drivers/gpu/drm/xe/tests/xe_bo.c
@@ -23,7 +23,7 @@
static int ccs_test_migrate(struct xe_tile *tile, struct xe_bo *bo,
bool clear, u64 get_val, u64 assign_val,
- struct kunit *test)
+ struct kunit *test, struct drm_exec *exec)
{
struct dma_fence *fence;
struct ttm_tt *ttm;
@@ -35,7 +35,7 @@ static int ccs_test_migrate(struct xe_tile *tile, struct xe_bo *bo,
u32 offset;
/* Move bo to VRAM if not already there. */
- ret = xe_bo_validate(bo, NULL, false);
+ ret = xe_bo_validate(bo, NULL, false, exec);
if (ret) {
KUNIT_FAIL(test, "Failed to validate bo.\n");
return ret;
@@ -60,7 +60,7 @@ static int ccs_test_migrate(struct xe_tile *tile, struct xe_bo *bo,
}
/* Evict to system. CCS data should be copied. */
- ret = xe_bo_evict(bo);
+ ret = xe_bo_evict(bo, exec);
if (ret) {
KUNIT_FAIL(test, "Failed to evict bo.\n");
return ret;
@@ -132,14 +132,15 @@ static void ccs_test_run_tile(struct xe_device *xe, struct xe_tile *tile,
/* TODO: Sanity check */
unsigned int bo_flags = XE_BO_FLAG_VRAM_IF_DGFX(tile);
+ struct drm_exec *exec = XE_VALIDATION_OPT_OUT;
if (IS_DGFX(xe))
kunit_info(test, "Testing vram id %u\n", tile->id);
else
kunit_info(test, "Testing system memory\n");
- bo = xe_bo_create_user(xe, NULL, NULL, SZ_1M, DRM_XE_GEM_CPU_CACHING_WC,
- bo_flags);
+ bo = xe_bo_create_user(xe, NULL, SZ_1M, DRM_XE_GEM_CPU_CACHING_WC,
+ bo_flags, exec);
if (IS_ERR(bo)) {
KUNIT_FAIL(test, "Failed to create bo.\n");
return;
@@ -149,18 +150,18 @@ static void ccs_test_run_tile(struct xe_device *xe, struct xe_tile *tile,
kunit_info(test, "Verifying that CCS data is cleared on creation.\n");
ret = ccs_test_migrate(tile, bo, false, 0ULL, 0xdeadbeefdeadbeefULL,
- test);
+ test, exec);
if (ret)
goto out_unlock;
kunit_info(test, "Verifying that CCS data survives migration.\n");
ret = ccs_test_migrate(tile, bo, false, 0xdeadbeefdeadbeefULL,
- 0xdeadbeefdeadbeefULL, test);
+ 0xdeadbeefdeadbeefULL, test, exec);
if (ret)
goto out_unlock;
kunit_info(test, "Verifying that CCS data can be properly cleared.\n");
- ret = ccs_test_migrate(tile, bo, true, 0ULL, 0ULL, test);
+ ret = ccs_test_migrate(tile, bo, true, 0ULL, 0ULL, test, exec);
out_unlock:
xe_bo_unlock(bo);
@@ -210,6 +211,7 @@ static int evict_test_run_tile(struct xe_device *xe, struct xe_tile *tile, struc
struct xe_bo *bo, *external;
unsigned int bo_flags = XE_BO_FLAG_VRAM_IF_DGFX(tile);
struct xe_vm *vm = xe_migrate_get_vm(xe_device_get_root_tile(xe)->migrate);
+ struct drm_exec *exec = XE_VALIDATION_OPT_OUT;
struct xe_gt *__gt;
int err, i, id;
@@ -218,25 +220,25 @@ static int evict_test_run_tile(struct xe_device *xe, struct xe_tile *tile, struc
for (i = 0; i < 2; ++i) {
xe_vm_lock(vm, false);
- bo = xe_bo_create_user(xe, NULL, vm, 0x10000,
+ bo = xe_bo_create_user(xe, vm, 0x10000,
DRM_XE_GEM_CPU_CACHING_WC,
- bo_flags);
+ bo_flags, exec);
xe_vm_unlock(vm);
if (IS_ERR(bo)) {
KUNIT_FAIL(test, "bo create err=%pe\n", bo);
break;
}
- external = xe_bo_create_user(xe, NULL, NULL, 0x10000,
+ external = xe_bo_create_user(xe, NULL, 0x10000,
DRM_XE_GEM_CPU_CACHING_WC,
- bo_flags);
+ bo_flags, NULL);
if (IS_ERR(external)) {
KUNIT_FAIL(test, "external bo create err=%pe\n", external);
goto cleanup_bo;
}
xe_bo_lock(external, false);
- err = xe_bo_pin_external(external, false);
+ err = xe_bo_pin_external(external, false, exec);
xe_bo_unlock(external);
if (err) {
KUNIT_FAIL(test, "external bo pin err=%pe\n",
@@ -294,7 +296,7 @@ static int evict_test_run_tile(struct xe_device *xe, struct xe_tile *tile, struc
if (i) {
down_read(&vm->lock);
xe_vm_lock(vm, false);
- err = xe_bo_validate(bo, bo->vm, false);
+ err = xe_bo_validate(bo, bo->vm, false, exec);
xe_vm_unlock(vm);
up_read(&vm->lock);
if (err) {
@@ -303,7 +305,7 @@ static int evict_test_run_tile(struct xe_device *xe, struct xe_tile *tile, struc
goto cleanup_all;
}
xe_bo_lock(external, false);
- err = xe_bo_validate(external, NULL, false);
+ err = xe_bo_validate(external, NULL, false, exec);
xe_bo_unlock(external);
if (err) {
KUNIT_FAIL(test, "external bo valid err=%pe\n",
@@ -495,9 +497,9 @@ static int shrink_test_run_device(struct xe_device *xe)
INIT_LIST_HEAD(&link->link);
/* We can create bos using WC caching here. But it is slower. */
- bo = xe_bo_create_user(xe, NULL, NULL, XE_BO_SHRINK_SIZE,
+ bo = xe_bo_create_user(xe, NULL, XE_BO_SHRINK_SIZE,
DRM_XE_GEM_CPU_CACHING_WB,
- XE_BO_FLAG_SYSTEM);
+ XE_BO_FLAG_SYSTEM, NULL);
if (IS_ERR(bo)) {
if (bo != ERR_PTR(-ENOMEM) && bo != ERR_PTR(-ENOSPC) &&
bo != ERR_PTR(-EINTR) && bo != ERR_PTR(-ERESTARTSYS))
diff --git a/drivers/gpu/drm/xe/tests/xe_dma_buf.c b/drivers/gpu/drm/xe/tests/xe_dma_buf.c
index 5baeab6b6fb7..a7e548a2bdfb 100644
--- a/drivers/gpu/drm/xe/tests/xe_dma_buf.c
+++ b/drivers/gpu/drm/xe/tests/xe_dma_buf.c
@@ -27,7 +27,8 @@ static bool is_dynamic(struct dma_buf_test_params *params)
}
static void check_residency(struct kunit *test, struct xe_bo *exported,
- struct xe_bo *imported, struct dma_buf *dmabuf)
+ struct xe_bo *imported, struct dma_buf *dmabuf,
+ struct drm_exec *exec)
{
struct dma_buf_test_params *params = to_dma_buf_test_params(test->priv);
u32 mem_type;
@@ -62,7 +63,7 @@ static void check_residency(struct kunit *test, struct xe_bo *exported,
* importer is on a different device. If they're on the same device,
* the exporter and the importer should be the same bo.
*/
- ret = xe_bo_evict(exported);
+ ret = xe_bo_evict(exported, exec);
if (ret) {
if (ret != -EINTR && ret != -ERESTARTSYS)
KUNIT_FAIL(test, "Evicting exporter failed with err=%d.\n",
@@ -77,7 +78,7 @@ static void check_residency(struct kunit *test, struct xe_bo *exported,
}
/* Re-validate the importer. This should move also exporter in. */
- ret = xe_bo_validate(imported, NULL, false);
+ ret = xe_bo_validate(imported, NULL, false, exec);
if (ret) {
if (ret != -EINTR && ret != -ERESTARTSYS)
KUNIT_FAIL(test, "Validating importer failed with err=%d.\n",
@@ -113,8 +114,8 @@ static void xe_test_dmabuf_import_same_driver(struct xe_device *xe)
size = SZ_64K;
kunit_info(test, "running %s\n", __func__);
- bo = xe_bo_create_user(xe, NULL, NULL, size, DRM_XE_GEM_CPU_CACHING_WC,
- params->mem_mask);
+ bo = xe_bo_create_user(xe, NULL, size, DRM_XE_GEM_CPU_CACHING_WC,
+ params->mem_mask, NULL);
if (IS_ERR(bo)) {
KUNIT_FAIL(test, "xe_bo_create() failed with err=%ld\n",
PTR_ERR(bo));
@@ -142,11 +143,12 @@ static void xe_test_dmabuf_import_same_driver(struct xe_device *xe)
KUNIT_FAIL(test,
"xe_gem_prime_import() succeeded when it shouldn't have\n");
} else {
+ struct drm_exec *exec = XE_VALIDATION_OPT_OUT;
int err;
/* Is everything where we expect it to be? */
xe_bo_lock(import_bo, false);
- err = xe_bo_validate(import_bo, NULL, false);
+ err = xe_bo_validate(import_bo, NULL, false, exec);
/* Pinning in VRAM is not allowed. */
if (!is_dynamic(params) &&
@@ -159,7 +161,7 @@ static void xe_test_dmabuf_import_same_driver(struct xe_device *xe)
err == -ERESTARTSYS);
if (!err)
- check_residency(test, bo, import_bo, dmabuf);
+ check_residency(test, bo, import_bo, dmabuf, exec);
xe_bo_unlock(import_bo);
}
drm_gem_object_put(import);
diff --git a/drivers/gpu/drm/xe/tests/xe_guc_g2g_test.c b/drivers/gpu/drm/xe/tests/xe_guc_g2g_test.c
new file mode 100644
index 000000000000..3b213fcae916
--- /dev/null
+++ b/drivers/gpu/drm/xe/tests/xe_guc_g2g_test.c
@@ -0,0 +1,776 @@
+// SPDX-License-Identifier: GPL-2.0 AND MIT
+/*
+ * Copyright © 2025 Intel Corporation
+ */
+
+#include <linux/delay.h>
+
+#include <kunit/test.h>
+#include <kunit/visibility.h>
+
+#include "tests/xe_kunit_helpers.h"
+#include "tests/xe_pci_test.h"
+#include "tests/xe_test.h"
+
+#include "xe_bo.h"
+#include "xe_device.h"
+#include "xe_pm.h"
+
+/*
+ * There are different ways to allocate the G2G buffers. The plan for this test
+ * is to make sure that all the possible options work. The particular option
+ * chosen by the driver may vary from one platform to another, it may also change
+ * with time. So to ensure consistency of testing, the relevant driver code is
+ * replicated here to guarantee it won't change without the test being updated
+ * to keep testing the other options.
+ *
+ * In order to test the actual code being used by the driver, there is also the
+ * 'default' scheme. That will use the official driver routines to test whatever
+ * method the driver is using on the current platform at the current time.
+ */
+enum {
+ /* Driver defined allocation scheme */
+ G2G_CTB_TYPE_DEFAULT,
+ /* Single buffer in host memory */
+ G2G_CTB_TYPE_HOST,
+ /* Single buffer in a specific tile, loops across all tiles */
+ G2G_CTB_TYPE_TILE,
+};
+
+/*
+ * Payload is opaque to GuC. So KMD can define any structure or size it wants.
+ */
+struct g2g_test_payload {
+ u32 tx_dev;
+ u32 tx_tile;
+ u32 rx_dev;
+ u32 rx_tile;
+ u32 seqno;
+};
+
+static void g2g_test_send(struct kunit *test, struct xe_guc *guc,
+ u32 far_tile, u32 far_dev,
+ struct g2g_test_payload *payload)
+{
+ struct xe_device *xe = guc_to_xe(guc);
+ struct xe_gt *gt = guc_to_gt(guc);
+ u32 *action, total;
+ size_t payload_len;
+ int ret;
+
+ static_assert(IS_ALIGNED(sizeof(*payload), sizeof(u32)));
+ payload_len = sizeof(*payload) / sizeof(u32);
+
+ total = 4 + payload_len;
+ action = kunit_kmalloc_array(test, total, sizeof(*action), GFP_KERNEL);
+ KUNIT_ASSERT_NOT_ERR_OR_NULL(test, action);
+
+ action[0] = XE_GUC_ACTION_TEST_G2G_SEND;
+ action[1] = far_tile;
+ action[2] = far_dev;
+ action[3] = payload_len;
+ memcpy(action + 4, payload, payload_len * sizeof(u32));
+
+ atomic_inc(&xe->g2g_test_count);
+
+ /*
+ * Should specify the expected response notification here. Problem is that
+ * the response will be coming from a different GuC. By the end, it should
+ * all add up as long as an equal number of messages are sent from each GuC
+ * and to each GuC. However, in the middle negative reservation space errors
+ * and such like can occur. Rather than add intrusive changes to the CT layer
+ * it is simpler to just not bother counting it at all. The system should be
+ * idle when running the selftest, and the selftest's notification total size
+ * is well within the G2H allocation size. So there should be no issues with
+ * needing to block for space, which is all the tracking code is really for.
+ */
+ ret = xe_guc_ct_send(&guc->ct, action, total, 0, 0);
+ kunit_kfree(test, action);
+ KUNIT_ASSERT_EQ_MSG(test, 0, ret, "G2G send failed: %d [%d:%d -> %d:%d]\n", ret,
+ gt_to_tile(gt)->id, G2G_DEV(gt), far_tile, far_dev);
+}
+
+/*
+ * NB: Can't use KUNIT_ASSERT and friends in here as this is called asynchronously
+ * from the G2H notification handler. Need that to actually complete rather than
+ * thread-abort in order to keep the rest of the driver alive!
+ */
+int xe_guc_g2g_test_notification(struct xe_guc *guc, u32 *msg, u32 len)
+{
+ struct xe_device *xe = guc_to_xe(guc);
+ struct xe_gt *rx_gt = guc_to_gt(guc), *test_gt, *tx_gt = NULL;
+ u32 tx_tile, tx_dev, rx_tile, rx_dev, idx, got_len;
+ struct g2g_test_payload *payload;
+ size_t payload_len;
+ int ret = 0, i;
+
+ payload_len = sizeof(*payload) / sizeof(u32);
+
+ if (unlikely(len != (G2H_LEN_DW_G2G_NOTIFY_MIN + payload_len))) {
+ xe_gt_err(rx_gt, "G2G test notification invalid length %u", len);
+ ret = -EPROTO;
+ goto done;
+ }
+
+ tx_tile = msg[0];
+ tx_dev = msg[1];
+ got_len = msg[2];
+ payload = (struct g2g_test_payload *)(msg + 3);
+
+ rx_tile = gt_to_tile(rx_gt)->id;
+ rx_dev = G2G_DEV(rx_gt);
+
+ if (got_len != payload_len) {
+ xe_gt_err(rx_gt, "G2G: Invalid payload length: %u vs %zu\n", got_len, payload_len);
+ ret = -EPROTO;
+ goto done;
+ }
+
+ if (payload->tx_dev != tx_dev || payload->tx_tile != tx_tile ||
+ payload->rx_dev != rx_dev || payload->rx_tile != rx_tile) {
+ xe_gt_err(rx_gt, "G2G: Invalid payload: %d:%d -> %d:%d vs %d:%d -> %d:%d! [%d]\n",
+ payload->tx_tile, payload->tx_dev, payload->rx_tile, payload->rx_dev,
+ tx_tile, tx_dev, rx_tile, rx_dev, payload->seqno);
+ ret = -EPROTO;
+ goto done;
+ }
+
+ if (!xe->g2g_test_array) {
+ xe_gt_err(rx_gt, "G2G: Missing test array!\n");
+ ret = -ENOMEM;
+ goto done;
+ }
+
+ for_each_gt(test_gt, xe, i) {
+ if (gt_to_tile(test_gt)->id != tx_tile)
+ continue;
+
+ if (G2G_DEV(test_gt) != tx_dev)
+ continue;
+
+ if (tx_gt) {
+ xe_gt_err(rx_gt, "G2G: Got duplicate TX GTs: %d vs %d for %d:%d!\n",
+ tx_gt->info.id, test_gt->info.id, tx_tile, tx_dev);
+ ret = -EINVAL;
+ goto done;
+ }
+
+ tx_gt = test_gt;
+ }
+ if (!tx_gt) {
+ xe_gt_err(rx_gt, "G2G: Failed to find a TX GT for %d:%d!\n", tx_tile, tx_dev);
+ ret = -EINVAL;
+ goto done;
+ }
+
+ idx = (tx_gt->info.id * xe->info.gt_count) + rx_gt->info.id;
+
+ if (xe->g2g_test_array[idx] != payload->seqno - 1) {
+ xe_gt_err(rx_gt, "G2G: Seqno mismatch %d vs %d for %d:%d -> %d:%d!\n",
+ xe->g2g_test_array[idx], payload->seqno - 1,
+ tx_tile, tx_dev, rx_tile, rx_dev);
+ ret = -EINVAL;
+ goto done;
+ }
+
+ xe->g2g_test_array[idx] = payload->seqno;
+
+done:
+ atomic_dec(&xe->g2g_test_count);
+ return ret;
+}
+
+/*
+ * Send the given seqno from all GuCs to all other GuCs in tile/GT order
+ */
+static void g2g_test_in_order(struct kunit *test, struct xe_device *xe, u32 seqno)
+{
+ struct xe_gt *near_gt, *far_gt;
+ int i, j;
+
+ for_each_gt(near_gt, xe, i) {
+ u32 near_tile = gt_to_tile(near_gt)->id;
+ u32 near_dev = G2G_DEV(near_gt);
+
+ for_each_gt(far_gt, xe, j) {
+ u32 far_tile = gt_to_tile(far_gt)->id;
+ u32 far_dev = G2G_DEV(far_gt);
+ struct g2g_test_payload payload;
+
+ if (far_gt->info.id == near_gt->info.id)
+ continue;
+
+ payload.tx_dev = near_dev;
+ payload.tx_tile = near_tile;
+ payload.rx_dev = far_dev;
+ payload.rx_tile = far_tile;
+ payload.seqno = seqno;
+ g2g_test_send(test, &near_gt->uc.guc, far_tile, far_dev, &payload);
+ }
+ }
+}
+
+#define WAIT_TIME_MS 100
+#define WAIT_COUNT (1000 / WAIT_TIME_MS)
+
+static void g2g_wait_for_complete(void *_xe)
+{
+ struct xe_device *xe = (struct xe_device *)_xe;
+ struct kunit *test = kunit_get_current_test();
+ int wait = 0;
+
+ /* Wait for all G2H messages to be received */
+ while (atomic_read(&xe->g2g_test_count)) {
+ if (++wait > WAIT_COUNT)
+ break;
+
+ msleep(WAIT_TIME_MS);
+ }
+
+ KUNIT_ASSERT_EQ_MSG(test, 0, atomic_read(&xe->g2g_test_count),
+ "Timed out waiting for notifications\n");
+ kunit_info(test, "Got all notifications back\n");
+}
+
+#undef WAIT_TIME_MS
+#undef WAIT_COUNT
+
+static void g2g_clean_array(void *_xe)
+{
+ struct xe_device *xe = (struct xe_device *)_xe;
+
+ xe->g2g_test_array = NULL;
+}
+
+#define NUM_LOOPS 16
+
+static void g2g_run_test(struct kunit *test, struct xe_device *xe)
+{
+ u32 seqno, max_array;
+ int ret, i, j;
+
+ max_array = xe->info.gt_count * xe->info.gt_count;
+ xe->g2g_test_array = kunit_kcalloc(test, max_array, sizeof(u32), GFP_KERNEL);
+ KUNIT_ASSERT_NOT_ERR_OR_NULL(test, xe->g2g_test_array);
+
+ ret = kunit_add_action_or_reset(test, g2g_clean_array, xe);
+ KUNIT_ASSERT_EQ_MSG(test, 0, ret, "Failed to register clean up action\n");
+
+ /*
+ * Send incrementing seqnos from all GuCs to all other GuCs in tile/GT order.
+ * Tile/GT order doesn't really mean anything to the hardware but it is going
+ * to be a fixed sequence every time.
+ *
+ * Verify that each one comes back having taken the correct route.
+ */
+ ret = kunit_add_action(test, g2g_wait_for_complete, xe);
+ KUNIT_ASSERT_EQ_MSG(test, 0, ret, "Failed to register clean up action\n");
+ for (seqno = 1; seqno < NUM_LOOPS; seqno++)
+ g2g_test_in_order(test, xe, seqno);
+ seqno--;
+
+ kunit_release_action(test, &g2g_wait_for_complete, xe);
+
+ /* Check for the final seqno in each slot */
+ for (i = 0; i < xe->info.gt_count; i++) {
+ for (j = 0; j < xe->info.gt_count; j++) {
+ u32 idx = (j * xe->info.gt_count) + i;
+
+ if (i == j)
+ KUNIT_ASSERT_EQ_MSG(test, 0, xe->g2g_test_array[idx],
+ "identity seqno modified: %d for %dx%d!\n",
+ xe->g2g_test_array[idx], i, j);
+ else
+ KUNIT_ASSERT_EQ_MSG(test, seqno, xe->g2g_test_array[idx],
+ "invalid seqno: %d vs %d for %dx%d!\n",
+ xe->g2g_test_array[idx], seqno, i, j);
+ }
+ }
+
+ kunit_kfree(test, xe->g2g_test_array);
+ kunit_release_action(test, &g2g_clean_array, xe);
+
+ kunit_info(test, "Test passed\n");
+}
+
+#undef NUM_LOOPS
+
+static void g2g_ct_stop(struct xe_guc *guc)
+{
+ struct xe_gt *remote_gt, *gt = guc_to_gt(guc);
+ struct xe_device *xe = gt_to_xe(gt);
+ int i, t;
+
+ for_each_gt(remote_gt, xe, i) {
+ u32 tile, dev;
+
+ if (remote_gt->info.id == gt->info.id)
+ continue;
+
+ tile = gt_to_tile(remote_gt)->id;
+ dev = G2G_DEV(remote_gt);
+
+ for (t = 0; t < XE_G2G_TYPE_LIMIT; t++)
+ guc_g2g_deregister(guc, tile, dev, t);
+ }
+}
+
+/* Size of a single allocation that contains all G2G CTBs across all GTs */
+static u32 g2g_ctb_size(struct kunit *test, struct xe_device *xe)
+{
+ unsigned int count = xe->info.gt_count;
+ u32 num_channels = (count * (count - 1)) / 2;
+
+ kunit_info(test, "Size: (%d * %d / 2) * %d * 0x%08X + 0x%08X => 0x%08X [%d]\n",
+ count, count - 1, XE_G2G_TYPE_LIMIT, G2G_BUFFER_SIZE, G2G_DESC_AREA_SIZE,
+ num_channels * XE_G2G_TYPE_LIMIT * G2G_BUFFER_SIZE + G2G_DESC_AREA_SIZE,
+ num_channels * XE_G2G_TYPE_LIMIT);
+
+ return num_channels * XE_G2G_TYPE_LIMIT * G2G_BUFFER_SIZE + G2G_DESC_AREA_SIZE;
+}
+
+/*
+ * Use the driver's regular CTB allocation scheme.
+ */
+static void g2g_alloc_default(struct kunit *test, struct xe_device *xe)
+{
+ struct xe_gt *gt;
+ int i;
+
+ kunit_info(test, "Default [tiles = %d, GTs = %d]\n",
+ xe->info.tile_count, xe->info.gt_count);
+
+ for_each_gt(gt, xe, i) {
+ struct xe_guc *guc = &gt->uc.guc;
+ int ret;
+
+ ret = guc_g2g_alloc(guc);
+ KUNIT_ASSERT_EQ_MSG(test, 0, ret, "G2G alloc failed: %pe", ERR_PTR(ret));
+ continue;
+ }
+}
+
+static void g2g_distribute(struct kunit *test, struct xe_device *xe, struct xe_bo *bo)
+{
+ struct xe_gt *root_gt, *gt;
+ int i;
+
+ root_gt = xe_device_get_gt(xe, 0);
+ root_gt->uc.guc.g2g.bo = bo;
+ root_gt->uc.guc.g2g.owned = true;
+ kunit_info(test, "[%d.%d] Assigned 0x%p\n", gt_to_tile(root_gt)->id, root_gt->info.id, bo);
+
+ for_each_gt(gt, xe, i) {
+ if (gt->info.id != 0) {
+ gt->uc.guc.g2g.owned = false;
+ gt->uc.guc.g2g.bo = xe_bo_get(bo);
+ kunit_info(test, "[%d.%d] Pinned 0x%p\n",
+ gt_to_tile(gt)->id, gt->info.id, gt->uc.guc.g2g.bo);
+ }
+
+ KUNIT_ASSERT_NOT_ERR_OR_NULL(test, gt->uc.guc.g2g.bo);
+ }
+}
+
+/*
+ * Allocate a single blob on the host and split between all G2G CTBs.
+ */
+static void g2g_alloc_host(struct kunit *test, struct xe_device *xe)
+{
+ struct xe_bo *bo;
+ u32 g2g_size;
+
+ kunit_info(test, "Host [tiles = %d, GTs = %d]\n", xe->info.tile_count, xe->info.gt_count);
+
+ g2g_size = g2g_ctb_size(test, xe);
+ bo = xe_managed_bo_create_pin_map(xe, xe_device_get_root_tile(xe), g2g_size,
+ XE_BO_FLAG_SYSTEM |
+ XE_BO_FLAG_GGTT |
+ XE_BO_FLAG_GGTT_ALL |
+ XE_BO_FLAG_GGTT_INVALIDATE);
+ KUNIT_ASSERT_NOT_ERR_OR_NULL(test, bo);
+ kunit_info(test, "[HST] G2G buffer create: 0x%p\n", bo);
+
+ xe_map_memset(xe, &bo->vmap, 0, 0, g2g_size);
+
+ g2g_distribute(test, xe, bo);
+}
+
+/*
+ * Allocate a single blob on the given tile and split between all G2G CTBs.
+ */
+static void g2g_alloc_tile(struct kunit *test, struct xe_device *xe, struct xe_tile *tile)
+{
+ struct xe_bo *bo;
+ u32 g2g_size;
+
+ KUNIT_ASSERT_TRUE(test, IS_DGFX(xe));
+ KUNIT_ASSERT_NOT_ERR_OR_NULL(test, tile);
+
+ kunit_info(test, "Tile %d [tiles = %d, GTs = %d]\n",
+ tile->id, xe->info.tile_count, xe->info.gt_count);
+
+ g2g_size = g2g_ctb_size(test, xe);
+ bo = xe_managed_bo_create_pin_map(xe, tile, g2g_size,
+ XE_BO_FLAG_VRAM_IF_DGFX(tile) |
+ XE_BO_FLAG_GGTT |
+ XE_BO_FLAG_GGTT_ALL |
+ XE_BO_FLAG_GGTT_INVALIDATE);
+ KUNIT_ASSERT_NOT_ERR_OR_NULL(test, bo);
+ kunit_info(test, "[%d.*] G2G buffer create: 0x%p\n", tile->id, bo);
+
+ xe_map_memset(xe, &bo->vmap, 0, 0, g2g_size);
+
+ g2g_distribute(test, xe, bo);
+}
+
+static void g2g_free(struct kunit *test, struct xe_device *xe)
+{
+ struct xe_gt *gt;
+ struct xe_bo *bo;
+ int i;
+
+ for_each_gt(gt, xe, i) {
+ bo = gt->uc.guc.g2g.bo;
+ if (!bo)
+ continue;
+
+ if (gt->uc.guc.g2g.owned) {
+ xe_managed_bo_unpin_map_no_vm(bo);
+ kunit_info(test, "[%d.%d] Unmapped 0x%p\n",
+ gt_to_tile(gt)->id, gt->info.id, bo);
+ } else {
+ xe_bo_put(bo);
+ kunit_info(test, "[%d.%d] Unpinned 0x%p\n",
+ gt_to_tile(gt)->id, gt->info.id, bo);
+ }
+
+ gt->uc.guc.g2g.bo = NULL;
+ }
+}
+
+static void g2g_stop(struct kunit *test, struct xe_device *xe)
+{
+ struct xe_gt *gt;
+ int i;
+
+ for_each_gt(gt, xe, i) {
+ struct xe_guc *guc = &gt->uc.guc;
+
+ if (!guc->g2g.bo)
+ continue;
+
+ g2g_ct_stop(guc);
+ }
+
+ g2g_free(test, xe);
+}
+
+/*
+ * Generate a unique id for each bi-directional CTB for each pair of
+ * near and far tiles/devices. The id can then be used as an index into
+ * a single allocation that is sub-divided into multiple CTBs.
+ *
+ * For example, with two devices per tile and two tiles, the table should
+ * look like:
+ * Far <tile>.<dev>
+ * 0.0 0.1 1.0 1.1
+ * N 0.0 --/-- 00/01 02/03 04/05
+ * e 0.1 01/00 --/-- 06/07 08/09
+ * a 1.0 03/02 07/06 --/-- 10/11
+ * r 1.1 05/04 09/08 11/10 --/--
+ *
+ * Where each entry is Rx/Tx channel id.
+ *
+ * So GuC #3 (tile 1, dev 1) talking to GuC #2 (tile 1, dev 0) would
+ * be reading from channel #11 and writing to channel #10. Whereas,
+ * GuC #2 talking to GuC #3 would be read on #10 and write to #11.
+ */
+static int g2g_slot_flat(u32 near_tile, u32 near_dev, u32 far_tile, u32 far_dev,
+ u32 type, u32 max_inst, bool have_dev)
+{
+ u32 near = near_tile, far = far_tile;
+ u32 idx = 0, x, y, direction;
+ int i;
+
+ if (have_dev) {
+ near = (near << 1) | near_dev;
+ far = (far << 1) | far_dev;
+ }
+
+ /* No need to send to one's self */
+ if (far == near)
+ return -1;
+
+ if (far > near) {
+ /* Top right table half */
+ x = far;
+ y = near;
+
+ /* T/R is 'forwards' direction */
+ direction = type;
+ } else {
+ /* Bottom left table half */
+ x = near;
+ y = far;
+
+ /* B/L is 'backwards' direction */
+ direction = (1 - type);
+ }
+
+ /* Count the rows prior to the target */
+ for (i = y; i > 0; i--)
+ idx += max_inst - i;
+
+ /* Count this row up to the target */
+ idx += (x - 1 - y);
+
+ /* Slots are in Rx/Tx pairs */
+ idx *= 2;
+
+ /* Pick Rx/Tx direction */
+ idx += direction;
+
+ return idx;
+}
+
+static int g2g_register_flat(struct xe_guc *guc, u32 far_tile, u32 far_dev, u32 type, bool have_dev)
+{
+ struct xe_gt *gt = guc_to_gt(guc);
+ struct xe_device *xe = gt_to_xe(gt);
+ u32 near_tile = gt_to_tile(gt)->id;
+ u32 near_dev = G2G_DEV(gt);
+ u32 max = xe->info.gt_count;
+ int idx;
+ u32 base, desc, buf;
+
+ if (!guc->g2g.bo)
+ return -ENODEV;
+
+ idx = g2g_slot_flat(near_tile, near_dev, far_tile, far_dev, type, max, have_dev);
+ xe_assert(xe, idx >= 0);
+
+ base = guc_bo_ggtt_addr(guc, guc->g2g.bo);
+ desc = base + idx * G2G_DESC_SIZE;
+ buf = base + idx * G2G_BUFFER_SIZE + G2G_DESC_AREA_SIZE;
+
+ xe_assert(xe, (desc - base + G2G_DESC_SIZE) <= G2G_DESC_AREA_SIZE);
+ xe_assert(xe, (buf - base + G2G_BUFFER_SIZE) <= xe_bo_size(guc->g2g.bo));
+
+ return guc_action_register_g2g_buffer(guc, type, far_tile, far_dev,
+ desc, buf, G2G_BUFFER_SIZE);
+}
+
+static void g2g_start(struct kunit *test, struct xe_guc *guc)
+{
+ struct xe_gt *remote_gt, *gt = guc_to_gt(guc);
+ struct xe_device *xe = gt_to_xe(gt);
+ unsigned int i;
+ int t, ret;
+ bool have_dev;
+
+ KUNIT_ASSERT_NOT_ERR_OR_NULL(test, guc->g2g.bo);
+
+ /* GuC interface will need extending if more GT device types are ever created. */
+ KUNIT_ASSERT_TRUE(test,
+ (gt->info.type == XE_GT_TYPE_MAIN) ||
+ (gt->info.type == XE_GT_TYPE_MEDIA));
+
+ /* Channel numbering depends on whether there are multiple GTs per tile */
+ have_dev = xe->info.gt_count > xe->info.tile_count;
+
+ for_each_gt(remote_gt, xe, i) {
+ u32 tile, dev;
+
+ if (remote_gt->info.id == gt->info.id)
+ continue;
+
+ tile = gt_to_tile(remote_gt)->id;
+ dev = G2G_DEV(remote_gt);
+
+ for (t = 0; t < XE_G2G_TYPE_LIMIT; t++) {
+ ret = g2g_register_flat(guc, tile, dev, t, have_dev);
+ KUNIT_ASSERT_EQ_MSG(test, 0, ret, "G2G register failed: %pe", ERR_PTR(ret));
+ }
+ }
+}
+
+static void g2g_reinit(struct kunit *test, struct xe_device *xe, int ctb_type, struct xe_tile *tile)
+{
+ struct xe_gt *gt;
+ int i, found = 0;
+
+ g2g_stop(test, xe);
+
+ for_each_gt(gt, xe, i) {
+ struct xe_guc *guc = &gt->uc.guc;
+
+ KUNIT_ASSERT_NULL(test, guc->g2g.bo);
+ }
+
+ switch (ctb_type) {
+ case G2G_CTB_TYPE_DEFAULT:
+ g2g_alloc_default(test, xe);
+ break;
+
+ case G2G_CTB_TYPE_HOST:
+ g2g_alloc_host(test, xe);
+ break;
+
+ case G2G_CTB_TYPE_TILE:
+ g2g_alloc_tile(test, xe, tile);
+ break;
+
+ default:
+ KUNIT_ASSERT_TRUE(test, false);
+ }
+
+ for_each_gt(gt, xe, i) {
+ struct xe_guc *guc = &gt->uc.guc;
+
+ if (!guc->g2g.bo)
+ continue;
+
+ if (ctb_type == G2G_CTB_TYPE_DEFAULT)
+ guc_g2g_start(guc);
+ else
+ g2g_start(test, guc);
+ found++;
+ }
+
+ KUNIT_ASSERT_GT_MSG(test, found, 1, "insufficient G2G channels running: %d", found);
+
+ kunit_info(test, "Testing across %d GTs\n", found);
+}
+
+static void g2g_recreate_ctb(void *_xe)
+{
+ struct xe_device *xe = (struct xe_device *)_xe;
+ struct kunit *test = kunit_get_current_test();
+
+ g2g_stop(test, xe);
+
+ if (xe_guc_g2g_wanted(xe))
+ g2g_reinit(test, xe, G2G_CTB_TYPE_DEFAULT, NULL);
+}
+
+static void g2g_pm_runtime_put(void *_xe)
+{
+ struct xe_device *xe = (struct xe_device *)_xe;
+
+ xe_pm_runtime_put(xe);
+}
+
+static void g2g_pm_runtime_get(struct kunit *test)
+{
+ struct xe_device *xe = test->priv;
+ int ret;
+
+ xe_pm_runtime_get(xe);
+ ret = kunit_add_action_or_reset(test, g2g_pm_runtime_put, xe);
+ KUNIT_ASSERT_EQ_MSG(test, 0, ret, "Failed to register runtime PM action\n");
+}
+
+static void g2g_check_skip(struct kunit *test)
+{
+ struct xe_device *xe = test->priv;
+ struct xe_gt *gt;
+ int i;
+
+ if (IS_SRIOV_VF(xe))
+ kunit_skip(test, "not supported from a VF");
+
+ if (xe->info.gt_count <= 1)
+ kunit_skip(test, "not enough GTs");
+
+ for_each_gt(gt, xe, i) {
+ struct xe_guc *guc = &gt->uc.guc;
+
+ if (guc->fw.build_type == CSS_UKERNEL_INFO_BUILDTYPE_PROD)
+ kunit_skip(test,
+ "G2G test interface not available in production firmware builds\n");
+ }
+}
+
+/*
+ * Simple test that does not try to recreate the CTBs.
+ * Requires that the platform already enables G2G comms
+ * but has no risk of leaving the system in a broken state
+ * afterwards.
+ */
+static void xe_live_guc_g2g_kunit_default(struct kunit *test)
+{
+ struct xe_device *xe = test->priv;
+
+ if (!xe_guc_g2g_wanted(xe))
+ kunit_skip(test, "G2G not enabled");
+
+ g2g_check_skip(test);
+
+ g2g_pm_runtime_get(test);
+
+ kunit_info(test, "Testing default CTBs\n");
+ g2g_run_test(test, xe);
+
+ kunit_release_action(test, &g2g_pm_runtime_put, xe);
+}
+
+/*
+ * More complex test that re-creates the CTBs in various location to
+ * test access to each location from each GuC. Can be run even on
+ * systems that do not enable G2G by default. On the other hand,
+ * because it recreates the CTBs, if something goes wrong it could
+ * leave the system with broken G2G comms.
+ */
+static void xe_live_guc_g2g_kunit_allmem(struct kunit *test)
+{
+ struct xe_device *xe = test->priv;
+ int ret;
+
+ g2g_check_skip(test);
+
+ g2g_pm_runtime_get(test);
+
+ /* Make sure to leave the system as we found it */
+ ret = kunit_add_action_or_reset(test, g2g_recreate_ctb, xe);
+ KUNIT_ASSERT_EQ_MSG(test, 0, ret, "Failed to register CTB re-creation action\n");
+
+ kunit_info(test, "Testing CTB type 'default'...\n");
+ g2g_reinit(test, xe, G2G_CTB_TYPE_DEFAULT, NULL);
+ g2g_run_test(test, xe);
+
+ kunit_info(test, "Testing CTB type 'host'...\n");
+ g2g_reinit(test, xe, G2G_CTB_TYPE_HOST, NULL);
+ g2g_run_test(test, xe);
+
+ if (IS_DGFX(xe)) {
+ struct xe_tile *tile;
+ int id;
+
+ for_each_tile(tile, xe, id) {
+ kunit_info(test, "Testing CTB type 'tile: #%d'...\n", id);
+
+ g2g_reinit(test, xe, G2G_CTB_TYPE_TILE, tile);
+ g2g_run_test(test, xe);
+ }
+ } else {
+ kunit_info(test, "Skipping local memory on integrated platform\n");
+ }
+
+ kunit_release_action(test, g2g_recreate_ctb, xe);
+ kunit_release_action(test, g2g_pm_runtime_put, xe);
+}
+
+static struct kunit_case xe_guc_g2g_tests[] = {
+ KUNIT_CASE_PARAM(xe_live_guc_g2g_kunit_default, xe_pci_live_device_gen_param),
+ KUNIT_CASE_PARAM(xe_live_guc_g2g_kunit_allmem, xe_pci_live_device_gen_param),
+ {}
+};
+
+VISIBLE_IF_KUNIT
+struct kunit_suite xe_guc_g2g_test_suite = {
+ .name = "xe_guc_g2g",
+ .test_cases = xe_guc_g2g_tests,
+ .init = xe_kunit_helper_xe_device_live_test_init,
+};
+EXPORT_SYMBOL_IF_KUNIT(xe_guc_g2g_test_suite);
diff --git a/drivers/gpu/drm/xe/tests/xe_live_test_mod.c b/drivers/gpu/drm/xe/tests/xe_live_test_mod.c
index 81277c77016d..c55e46f1ae92 100644
--- a/drivers/gpu/drm/xe/tests/xe_live_test_mod.c
+++ b/drivers/gpu/drm/xe/tests/xe_live_test_mod.c
@@ -10,12 +10,14 @@ extern struct kunit_suite xe_bo_shrink_test_suite;
extern struct kunit_suite xe_dma_buf_test_suite;
extern struct kunit_suite xe_migrate_test_suite;
extern struct kunit_suite xe_mocs_test_suite;
+extern struct kunit_suite xe_guc_g2g_test_suite;
kunit_test_suite(xe_bo_test_suite);
kunit_test_suite(xe_bo_shrink_test_suite);
kunit_test_suite(xe_dma_buf_test_suite);
kunit_test_suite(xe_migrate_test_suite);
kunit_test_suite(xe_mocs_test_suite);
+kunit_test_suite(xe_guc_g2g_test_suite);
MODULE_AUTHOR("Intel Corporation");
MODULE_LICENSE("GPL");
diff --git a/drivers/gpu/drm/xe/tests/xe_migrate.c b/drivers/gpu/drm/xe/tests/xe_migrate.c
index edd1e701aa1c..5904d658d1f2 100644
--- a/drivers/gpu/drm/xe/tests/xe_migrate.c
+++ b/drivers/gpu/drm/xe/tests/xe_migrate.c
@@ -70,7 +70,7 @@ static int run_sanity_job(struct xe_migrate *m, struct xe_device *xe,
} } while (0)
static void test_copy(struct xe_migrate *m, struct xe_bo *bo,
- struct kunit *test, u32 region)
+ struct kunit *test, u32 region, struct drm_exec *exec)
{
struct xe_device *xe = tile_to_xe(m->tile);
u64 retval, expected = 0;
@@ -84,14 +84,15 @@ static void test_copy(struct xe_migrate *m, struct xe_bo *bo,
ttm_bo_type_kernel,
region |
XE_BO_FLAG_NEEDS_CPU_ACCESS |
- XE_BO_FLAG_PINNED);
+ XE_BO_FLAG_PINNED,
+ exec);
if (IS_ERR(remote)) {
KUNIT_FAIL(test, "Failed to allocate remote bo for %s: %pe\n",
str, remote);
return;
}
- err = xe_bo_validate(remote, NULL, false);
+ err = xe_bo_validate(remote, NULL, false, exec);
if (err) {
KUNIT_FAIL(test, "Failed to validate system bo for %s: %i\n",
str, err);
@@ -161,13 +162,13 @@ out_unlock:
}
static void test_copy_sysmem(struct xe_migrate *m, struct xe_bo *bo,
- struct kunit *test)
+ struct drm_exec *exec, struct kunit *test)
{
- test_copy(m, bo, test, XE_BO_FLAG_SYSTEM);
+ test_copy(m, bo, test, XE_BO_FLAG_SYSTEM, exec);
}
static void test_copy_vram(struct xe_migrate *m, struct xe_bo *bo,
- struct kunit *test)
+ struct drm_exec *exec, struct kunit *test)
{
u32 region;
@@ -178,10 +179,11 @@ static void test_copy_vram(struct xe_migrate *m, struct xe_bo *bo,
region = XE_BO_FLAG_VRAM1;
else
region = XE_BO_FLAG_VRAM0;
- test_copy(m, bo, test, region);
+ test_copy(m, bo, test, region, exec);
}
-static void xe_migrate_sanity_test(struct xe_migrate *m, struct kunit *test)
+static void xe_migrate_sanity_test(struct xe_migrate *m, struct kunit *test,
+ struct drm_exec *exec)
{
struct xe_tile *tile = m->tile;
struct xe_device *xe = tile_to_xe(tile);
@@ -202,7 +204,8 @@ static void xe_migrate_sanity_test(struct xe_migrate *m, struct kunit *test)
big = xe_bo_create_pin_map(xe, tile, m->q->vm, SZ_4M,
ttm_bo_type_kernel,
- XE_BO_FLAG_VRAM_IF_DGFX(tile));
+ XE_BO_FLAG_VRAM_IF_DGFX(tile),
+ exec);
if (IS_ERR(big)) {
KUNIT_FAIL(test, "Failed to allocate bo: %li\n", PTR_ERR(big));
goto vunmap;
@@ -210,7 +213,8 @@ static void xe_migrate_sanity_test(struct xe_migrate *m, struct kunit *test)
pt = xe_bo_create_pin_map(xe, tile, m->q->vm, XE_PAGE_SIZE,
ttm_bo_type_kernel,
- XE_BO_FLAG_VRAM_IF_DGFX(tile));
+ XE_BO_FLAG_VRAM_IF_DGFX(tile),
+ exec);
if (IS_ERR(pt)) {
KUNIT_FAIL(test, "Failed to allocate fake pt: %li\n",
PTR_ERR(pt));
@@ -220,7 +224,8 @@ static void xe_migrate_sanity_test(struct xe_migrate *m, struct kunit *test)
tiny = xe_bo_create_pin_map(xe, tile, m->q->vm,
2 * SZ_4K,
ttm_bo_type_kernel,
- XE_BO_FLAG_VRAM_IF_DGFX(tile));
+ XE_BO_FLAG_VRAM_IF_DGFX(tile),
+ exec);
if (IS_ERR(tiny)) {
KUNIT_FAIL(test, "Failed to allocate tiny fake pt: %li\n",
PTR_ERR(tiny));
@@ -290,10 +295,10 @@ static void xe_migrate_sanity_test(struct xe_migrate *m, struct kunit *test)
check(retval, expected, "Command clear small last value", test);
kunit_info(test, "Copying small buffer object to system\n");
- test_copy_sysmem(m, tiny, test);
+ test_copy_sysmem(m, tiny, exec, test);
if (xe->info.tile_count > 1) {
kunit_info(test, "Copying small buffer object to other vram\n");
- test_copy_vram(m, tiny, test);
+ test_copy_vram(m, tiny, exec, test);
}
/* Clear a big bo */
@@ -312,10 +317,10 @@ static void xe_migrate_sanity_test(struct xe_migrate *m, struct kunit *test)
check(retval, expected, "Command clear big last value", test);
kunit_info(test, "Copying big buffer object to system\n");
- test_copy_sysmem(m, big, test);
+ test_copy_sysmem(m, big, exec, test);
if (xe->info.tile_count > 1) {
kunit_info(test, "Copying big buffer object to other vram\n");
- test_copy_vram(m, big, test);
+ test_copy_vram(m, big, exec, test);
}
out:
@@ -343,10 +348,11 @@ static int migrate_test_run_device(struct xe_device *xe)
for_each_tile(tile, xe, id) {
struct xe_migrate *m = tile->migrate;
+ struct drm_exec *exec = XE_VALIDATION_OPT_OUT;
kunit_info(test, "Testing tile id %d.\n", id);
xe_vm_lock(m->q->vm, false);
- xe_migrate_sanity_test(m, test);
+ xe_migrate_sanity_test(m, test, exec);
xe_vm_unlock(m->q->vm);
}
@@ -490,7 +496,7 @@ err_sync:
static void test_migrate(struct xe_device *xe, struct xe_tile *tile,
struct xe_bo *sys_bo, struct xe_bo *vram_bo, struct xe_bo *ccs_bo,
- struct kunit *test)
+ struct drm_exec *exec, struct kunit *test)
{
struct dma_fence *fence;
u64 expected, retval;
@@ -509,7 +515,7 @@ static void test_migrate(struct xe_device *xe, struct xe_tile *tile,
dma_fence_put(fence);
kunit_info(test, "Evict vram buffer object\n");
- ret = xe_bo_evict(vram_bo);
+ ret = xe_bo_evict(vram_bo, exec);
if (ret) {
KUNIT_FAIL(test, "Failed to evict bo.\n");
return;
@@ -538,7 +544,7 @@ static void test_migrate(struct xe_device *xe, struct xe_tile *tile,
dma_fence_put(fence);
kunit_info(test, "Restore vram buffer object\n");
- ret = xe_bo_validate(vram_bo, NULL, false);
+ ret = xe_bo_validate(vram_bo, NULL, false, exec);
if (ret) {
KUNIT_FAIL(test, "Failed to validate vram bo for: %li\n", ret);
return;
@@ -636,13 +642,14 @@ static void validate_ccs_test_run_tile(struct xe_device *xe, struct xe_tile *til
{
struct xe_bo *sys_bo, *vram_bo = NULL, *ccs_bo = NULL;
unsigned int bo_flags = XE_BO_FLAG_VRAM_IF_DGFX(tile);
+ struct drm_exec *exec;
long ret;
- sys_bo = xe_bo_create_user(xe, NULL, NULL, SZ_4M,
+ sys_bo = xe_bo_create_user(xe, NULL, SZ_4M,
DRM_XE_GEM_CPU_CACHING_WC,
XE_BO_FLAG_SYSTEM |
XE_BO_FLAG_NEEDS_CPU_ACCESS |
- XE_BO_FLAG_PINNED);
+ XE_BO_FLAG_PINNED, NULL);
if (IS_ERR(sys_bo)) {
KUNIT_FAIL(test, "xe_bo_create() failed with err=%ld\n",
@@ -650,8 +657,9 @@ static void validate_ccs_test_run_tile(struct xe_device *xe, struct xe_tile *til
return;
}
+ exec = XE_VALIDATION_OPT_OUT;
xe_bo_lock(sys_bo, false);
- ret = xe_bo_validate(sys_bo, NULL, false);
+ ret = xe_bo_validate(sys_bo, NULL, false, exec);
if (ret) {
KUNIT_FAIL(test, "Failed to validate system bo for: %li\n", ret);
goto free_sysbo;
@@ -664,10 +672,10 @@ static void validate_ccs_test_run_tile(struct xe_device *xe, struct xe_tile *til
}
xe_bo_unlock(sys_bo);
- ccs_bo = xe_bo_create_user(xe, NULL, NULL, SZ_4M,
+ ccs_bo = xe_bo_create_user(xe, NULL, SZ_4M,
DRM_XE_GEM_CPU_CACHING_WC,
bo_flags | XE_BO_FLAG_NEEDS_CPU_ACCESS |
- XE_BO_FLAG_PINNED);
+ XE_BO_FLAG_PINNED, NULL);
if (IS_ERR(ccs_bo)) {
KUNIT_FAIL(test, "xe_bo_create() failed with err=%ld\n",
@@ -676,7 +684,7 @@ static void validate_ccs_test_run_tile(struct xe_device *xe, struct xe_tile *til
}
xe_bo_lock(ccs_bo, false);
- ret = xe_bo_validate(ccs_bo, NULL, false);
+ ret = xe_bo_validate(ccs_bo, NULL, false, exec);
if (ret) {
KUNIT_FAIL(test, "Failed to validate system bo for: %li\n", ret);
goto free_ccsbo;
@@ -689,10 +697,10 @@ static void validate_ccs_test_run_tile(struct xe_device *xe, struct xe_tile *til
}
xe_bo_unlock(ccs_bo);
- vram_bo = xe_bo_create_user(xe, NULL, NULL, SZ_4M,
+ vram_bo = xe_bo_create_user(xe, NULL, SZ_4M,
DRM_XE_GEM_CPU_CACHING_WC,
bo_flags | XE_BO_FLAG_NEEDS_CPU_ACCESS |
- XE_BO_FLAG_PINNED);
+ XE_BO_FLAG_PINNED, NULL);
if (IS_ERR(vram_bo)) {
KUNIT_FAIL(test, "xe_bo_create() failed with err=%ld\n",
PTR_ERR(vram_bo));
@@ -700,7 +708,7 @@ static void validate_ccs_test_run_tile(struct xe_device *xe, struct xe_tile *til
}
xe_bo_lock(vram_bo, false);
- ret = xe_bo_validate(vram_bo, NULL, false);
+ ret = xe_bo_validate(vram_bo, NULL, false, exec);
if (ret) {
KUNIT_FAIL(test, "Failed to validate vram bo for: %li\n", ret);
goto free_vrambo;
@@ -713,7 +721,7 @@ static void validate_ccs_test_run_tile(struct xe_device *xe, struct xe_tile *til
}
test_clear(xe, tile, sys_bo, vram_bo, test);
- test_migrate(xe, tile, sys_bo, vram_bo, ccs_bo, test);
+ test_migrate(xe, tile, sys_bo, vram_bo, ccs_bo, exec, test);
xe_bo_unlock(vram_bo);
xe_bo_lock(vram_bo, false);
diff --git a/drivers/gpu/drm/xe/tests/xe_pci.c b/drivers/gpu/drm/xe/tests/xe_pci.c
index db30c5156d0c..69e2840c7ef0 100644
--- a/drivers/gpu/drm/xe/tests/xe_pci.c
+++ b/drivers/gpu/drm/xe/tests/xe_pci.c
@@ -12,12 +12,219 @@
#include <kunit/test-bug.h>
#include <kunit/visibility.h>
+#define PLATFORM_CASE(platform__, graphics_step__) \
+ { \
+ .platform = XE_ ## platform__, \
+ .subplatform = XE_SUBPLATFORM_NONE, \
+ .step = { .graphics = STEP_ ## graphics_step__ } \
+ }
+
+#define SUBPLATFORM_CASE(platform__, subplatform__, graphics_step__) \
+ { \
+ .platform = XE_ ## platform__, \
+ .subplatform = XE_SUBPLATFORM_ ## platform__ ## _ ## subplatform__, \
+ .step = { .graphics = STEP_ ## graphics_step__ } \
+ }
+
+#define GMDID_CASE(platform__, graphics_verx100__, graphics_step__, \
+ media_verx100__, media_step__) \
+ { \
+ .platform = XE_ ## platform__, \
+ .subplatform = XE_SUBPLATFORM_NONE, \
+ .graphics_verx100 = graphics_verx100__, \
+ .media_verx100 = media_verx100__, \
+ .step = { .graphics = STEP_ ## graphics_step__, \
+ .media = STEP_ ## media_step__ } \
+ }
+
+static const struct xe_pci_fake_data cases[] = {
+ PLATFORM_CASE(TIGERLAKE, B0),
+ PLATFORM_CASE(DG1, A0),
+ PLATFORM_CASE(DG1, B0),
+ PLATFORM_CASE(ALDERLAKE_S, A0),
+ PLATFORM_CASE(ALDERLAKE_S, B0),
+ PLATFORM_CASE(ALDERLAKE_S, C0),
+ PLATFORM_CASE(ALDERLAKE_S, D0),
+ PLATFORM_CASE(ALDERLAKE_P, A0),
+ PLATFORM_CASE(ALDERLAKE_P, B0),
+ PLATFORM_CASE(ALDERLAKE_P, C0),
+ SUBPLATFORM_CASE(ALDERLAKE_S, RPLS, D0),
+ SUBPLATFORM_CASE(ALDERLAKE_P, RPLU, E0),
+ SUBPLATFORM_CASE(DG2, G10, C0),
+ SUBPLATFORM_CASE(DG2, G11, B1),
+ SUBPLATFORM_CASE(DG2, G12, A1),
+ GMDID_CASE(METEORLAKE, 1270, A0, 1300, A0),
+ GMDID_CASE(METEORLAKE, 1271, A0, 1300, A0),
+ GMDID_CASE(METEORLAKE, 1274, A0, 1300, A0),
+ GMDID_CASE(LUNARLAKE, 2004, A0, 2000, A0),
+ GMDID_CASE(LUNARLAKE, 2004, B0, 2000, A0),
+ GMDID_CASE(BATTLEMAGE, 2001, A0, 1301, A1),
+ GMDID_CASE(PANTHERLAKE, 3000, A0, 3000, A0),
+};
+
+KUNIT_ARRAY_PARAM(platform, cases, xe_pci_fake_data_desc);
+
+/**
+ * xe_pci_fake_data_gen_params - Generate struct xe_pci_fake_data parameters
+ * @prev: the pointer to the previous parameter to iterate from or NULL
+ * @desc: output buffer with minimum size of KUNIT_PARAM_DESC_SIZE
+ *
+ * This function prepares struct xe_pci_fake_data parameter.
+ *
+ * To be used only as a parameter generator function in &KUNIT_CASE_PARAM.
+ *
+ * Return: pointer to the next parameter or NULL if no more parameters
+ */
+const void *xe_pci_fake_data_gen_params(struct kunit *test, const void *prev, char *desc)
+{
+ return platform_gen_params(test, prev, desc);
+}
+EXPORT_SYMBOL_IF_KUNIT(xe_pci_fake_data_gen_params);
+
+static const struct xe_device_desc *lookup_desc(enum xe_platform p)
+{
+ const struct xe_device_desc *desc;
+ const struct pci_device_id *ids;
+
+ for (ids = pciidlist; ids->driver_data; ids++) {
+ desc = (const void *)ids->driver_data;
+ if (desc->platform == p)
+ return desc;
+ }
+ return NULL;
+}
+
+static const struct xe_subplatform_desc *lookup_sub_desc(enum xe_platform p, enum xe_subplatform s)
+{
+ const struct xe_device_desc *desc = lookup_desc(p);
+ const struct xe_subplatform_desc *spd;
+
+ if (desc && desc->subplatforms)
+ for (spd = desc->subplatforms; spd->subplatform; spd++)
+ if (spd->subplatform == s)
+ return spd;
+ return NULL;
+}
+
+static const char *lookup_platform_name(enum xe_platform p)
+{
+ const struct xe_device_desc *desc = lookup_desc(p);
+
+ return desc ? desc->platform_name : "INVALID";
+}
+
+static const char *__lookup_subplatform_name(enum xe_platform p, enum xe_subplatform s)
+{
+ const struct xe_subplatform_desc *desc = lookup_sub_desc(p, s);
+
+ return desc ? desc->name : "INVALID";
+}
+
+static const char *lookup_subplatform_name(enum xe_platform p, enum xe_subplatform s)
+{
+ return s == XE_SUBPLATFORM_NONE ? "" : __lookup_subplatform_name(p, s);
+}
+
+static const char *subplatform_prefix(enum xe_subplatform s)
+{
+ return s == XE_SUBPLATFORM_NONE ? "" : " ";
+}
+
+static const char *step_prefix(enum xe_step step)
+{
+ return step == STEP_NONE ? "" : " ";
+}
+
+static const char *step_name(enum xe_step step)
+{
+ return step == STEP_NONE ? "" : xe_step_name(step);
+}
+
+static const char *sriov_prefix(enum xe_sriov_mode mode)
+{
+ return mode <= XE_SRIOV_MODE_NONE ? "" : " ";
+}
+
+static const char *sriov_name(enum xe_sriov_mode mode)
+{
+ return mode <= XE_SRIOV_MODE_NONE ? "" : xe_sriov_mode_to_string(mode);
+}
+
+static const char *lookup_graphics_name(unsigned int verx100)
+{
+ const struct xe_ip *ip = find_graphics_ip(verx100);
+
+ return ip ? ip->name : "";
+}
+
+static const char *lookup_media_name(unsigned int verx100)
+{
+ const struct xe_ip *ip = find_media_ip(verx100);
+
+ return ip ? ip->name : "";
+}
+
+/**
+ * xe_pci_fake_data_desc - Describe struct xe_pci_fake_data parameter
+ * @param: the &struct xe_pci_fake_data parameter to describe
+ * @desc: output buffer with minimum size of KUNIT_PARAM_DESC_SIZE
+ *
+ * This function prepares description of the struct xe_pci_fake_data parameter.
+ *
+ * It is tailored for use in parameterized KUnit tests where parameter generator
+ * is based on the struct xe_pci_fake_data arrays.
+ */
+void xe_pci_fake_data_desc(const struct xe_pci_fake_data *param, char *desc)
+{
+ if (param->graphics_verx100 || param->media_verx100)
+ snprintf(desc, KUNIT_PARAM_DESC_SIZE, "%s%s%s %u.%02u(%s)%s%s %u.%02u(%s)%s%s%s%s",
+ lookup_platform_name(param->platform),
+ subplatform_prefix(param->subplatform),
+ lookup_subplatform_name(param->platform, param->subplatform),
+ param->graphics_verx100 / 100, param->graphics_verx100 % 100,
+ lookup_graphics_name(param->graphics_verx100),
+ step_prefix(param->step.graphics), step_name(param->step.graphics),
+ param->media_verx100 / 100, param->media_verx100 % 100,
+ lookup_media_name(param->media_verx100),
+ step_prefix(param->step.media), step_name(param->step.media),
+ sriov_prefix(param->sriov_mode), sriov_name(param->sriov_mode));
+ else
+ snprintf(desc, KUNIT_PARAM_DESC_SIZE, "%s%s%s%s%s%s%s",
+ lookup_platform_name(param->platform),
+ subplatform_prefix(param->subplatform),
+ lookup_subplatform_name(param->platform, param->subplatform),
+ step_prefix(param->step.graphics), step_name(param->step.graphics),
+ sriov_prefix(param->sriov_mode), sriov_name(param->sriov_mode));
+}
+EXPORT_SYMBOL_IF_KUNIT(xe_pci_fake_data_desc);
+
static void xe_ip_kunit_desc(const struct xe_ip *param, char *desc)
{
snprintf(desc, KUNIT_PARAM_DESC_SIZE, "%u.%02u %s",
param->verx100 / 100, param->verx100 % 100, param->name);
}
+/*
+ * Pre-GMDID Graphics and Media IPs definitions.
+ *
+ * Mimic the way GMDID IPs are declared so the same
+ * param generator can be used for both
+ */
+static const struct xe_ip pre_gmdid_graphics_ips[] = {
+ { 1200, "Xe_LP", &graphics_xelp },
+ { 1210, "Xe_LP+", &graphics_xelp },
+ { 1255, "Xe_HPG", &graphics_xehpg },
+ { 1260, "Xe_HPC", &graphics_xehpc },
+};
+
+static const struct xe_ip pre_gmdid_media_ips[] = {
+ { 1200, "Xe_M", &media_xem },
+ { 1255, "Xe_HPM", &media_xem },
+};
+
+KUNIT_ARRAY_PARAM(pre_gmdid_graphics_ip, pre_gmdid_graphics_ips, xe_ip_kunit_desc);
+KUNIT_ARRAY_PARAM(pre_gmdid_media_ip, pre_gmdid_media_ips, xe_ip_kunit_desc);
+
KUNIT_ARRAY_PARAM(graphics_ip, graphics_ips, xe_ip_kunit_desc);
KUNIT_ARRAY_PARAM(media_ip, media_ips, xe_ip_kunit_desc);
@@ -44,9 +251,16 @@ KUNIT_ARRAY_PARAM(pci_id, pciidlist, xe_pci_id_kunit_desc);
*
* Return: pointer to the next parameter or NULL if no more parameters
*/
-const void *xe_pci_graphics_ip_gen_param(const void *prev, char *desc)
+const void *xe_pci_graphics_ip_gen_param(struct kunit *test, const void *prev, char *desc)
{
- return graphics_ip_gen_params(prev, desc);
+ const void *next = pre_gmdid_graphics_ip_gen_params(test, prev, desc);
+
+ if (next)
+ return next;
+ if (is_insidevar(prev, pre_gmdid_graphics_ips))
+ prev = NULL;
+
+ return graphics_ip_gen_params(test, prev, desc);
}
EXPORT_SYMBOL_IF_KUNIT(xe_pci_graphics_ip_gen_param);
@@ -61,9 +275,16 @@ EXPORT_SYMBOL_IF_KUNIT(xe_pci_graphics_ip_gen_param);
*
* Return: pointer to the next parameter or NULL if no more parameters
*/
-const void *xe_pci_media_ip_gen_param(const void *prev, char *desc)
+const void *xe_pci_media_ip_gen_param(struct kunit *test, const void *prev, char *desc)
{
- return media_ip_gen_params(prev, desc);
+ const void *next = pre_gmdid_media_ip_gen_params(test, prev, desc);
+
+ if (next)
+ return next;
+ if (is_insidevar(prev, pre_gmdid_media_ips))
+ prev = NULL;
+
+ return media_ip_gen_params(test, prev, desc);
}
EXPORT_SYMBOL_IF_KUNIT(xe_pci_media_ip_gen_param);
@@ -78,9 +299,9 @@ EXPORT_SYMBOL_IF_KUNIT(xe_pci_media_ip_gen_param);
*
* Return: pointer to the next parameter or NULL if no more parameters
*/
-const void *xe_pci_id_gen_param(const void *prev, char *desc)
+const void *xe_pci_id_gen_param(struct kunit *test, const void *prev, char *desc)
{
- const struct pci_device_id *pci = pci_id_gen_params(prev, desc);
+ const struct pci_device_id *pci = pci_id_gen_params(test, prev, desc);
return pci->driver_data ? pci : NULL;
}
@@ -94,10 +315,10 @@ static void fake_read_gmdid(struct xe_device *xe, enum xe_gmdid_type type,
if (type == GMDID_MEDIA) {
*ver = data->media_verx100;
- *revid = xe_step_to_gmdid(data->media_step);
+ *revid = xe_step_to_gmdid(data->step.media);
} else {
*ver = data->graphics_verx100;
- *revid = xe_step_to_gmdid(data->graphics_step);
+ *revid = xe_step_to_gmdid(data->step.graphics);
}
}
@@ -166,7 +387,7 @@ EXPORT_SYMBOL_IF_KUNIT(xe_pci_fake_device_init);
* Return: pointer to the next &struct xe_device ready to be used as a parameter
* or NULL if there are no more Xe devices on the system.
*/
-const void *xe_pci_live_device_gen_param(const void *prev, char *desc)
+const void *xe_pci_live_device_gen_param(struct kunit *test, const void *prev, char *desc)
{
const struct xe_device *xe = prev;
struct device *dev = xe ? xe->drm.dev : NULL;
diff --git a/drivers/gpu/drm/xe/tests/xe_pci_test.h b/drivers/gpu/drm/xe/tests/xe_pci_test.h
index ce4d2b86b778..30505d1cbefc 100644
--- a/drivers/gpu/drm/xe/tests/xe_pci_test.h
+++ b/drivers/gpu/drm/xe/tests/xe_pci_test.h
@@ -7,9 +7,11 @@
#define _XE_PCI_TEST_H_
#include <linux/types.h>
+#include <kunit/test.h>
#include "xe_platform_types.h"
#include "xe_sriov_types.h"
+#include "xe_step_types.h"
struct xe_device;
@@ -17,17 +19,18 @@ struct xe_pci_fake_data {
enum xe_sriov_mode sriov_mode;
enum xe_platform platform;
enum xe_subplatform subplatform;
+ struct xe_step_info step;
u32 graphics_verx100;
u32 media_verx100;
- u32 graphics_step;
- u32 media_step;
};
int xe_pci_fake_device_init(struct xe_device *xe);
+const void *xe_pci_fake_data_gen_params(struct kunit *test, const void *prev, char *desc);
+void xe_pci_fake_data_desc(const struct xe_pci_fake_data *param, char *desc);
-const void *xe_pci_graphics_ip_gen_param(const void *prev, char *desc);
-const void *xe_pci_media_ip_gen_param(const void *prev, char *desc);
-const void *xe_pci_id_gen_param(const void *prev, char *desc);
-const void *xe_pci_live_device_gen_param(const void *prev, char *desc);
+const void *xe_pci_graphics_ip_gen_param(struct kunit *test, const void *prev, char *desc);
+const void *xe_pci_media_ip_gen_param(struct kunit *test, const void *prev, char *desc);
+const void *xe_pci_id_gen_param(struct kunit *test, const void *prev, char *desc);
+const void *xe_pci_live_device_gen_param(struct kunit *test, const void *prev, char *desc);
#endif
diff --git a/drivers/gpu/drm/xe/tests/xe_wa_test.c b/drivers/gpu/drm/xe/tests/xe_wa_test.c
index 416258c193f6..49d191043dfa 100644
--- a/drivers/gpu/drm/xe/tests/xe_wa_test.c
+++ b/drivers/gpu/drm/xe/tests/xe_wa_test.c
@@ -15,87 +15,10 @@
#include "xe_tuning.h"
#include "xe_wa.h"
-struct platform_test_case {
- const char *name;
- enum xe_platform platform;
- enum xe_subplatform subplatform;
- u32 graphics_verx100;
- u32 media_verx100;
- struct xe_step_info step;
-};
-
-#define PLATFORM_CASE(platform__, graphics_step__) \
- { \
- .name = #platform__ " (" #graphics_step__ ")", \
- .platform = XE_ ## platform__, \
- .subplatform = XE_SUBPLATFORM_NONE, \
- .step = { .graphics = STEP_ ## graphics_step__ } \
- }
-
-
-#define SUBPLATFORM_CASE(platform__, subplatform__, graphics_step__) \
- { \
- .name = #platform__ "_" #subplatform__ " (" #graphics_step__ ")", \
- .platform = XE_ ## platform__, \
- .subplatform = XE_SUBPLATFORM_ ## platform__ ## _ ## subplatform__, \
- .step = { .graphics = STEP_ ## graphics_step__ } \
- }
-
-#define GMDID_CASE(platform__, graphics_verx100__, graphics_step__, \
- media_verx100__, media_step__) \
- { \
- .name = #platform__ " (g:" #graphics_step__ ", m:" #media_step__ ")",\
- .platform = XE_ ## platform__, \
- .subplatform = XE_SUBPLATFORM_NONE, \
- .graphics_verx100 = graphics_verx100__, \
- .media_verx100 = media_verx100__, \
- .step = { .graphics = STEP_ ## graphics_step__, \
- .media = STEP_ ## media_step__ } \
- }
-
-static const struct platform_test_case cases[] = {
- PLATFORM_CASE(TIGERLAKE, B0),
- PLATFORM_CASE(DG1, A0),
- PLATFORM_CASE(DG1, B0),
- PLATFORM_CASE(ALDERLAKE_S, A0),
- PLATFORM_CASE(ALDERLAKE_S, B0),
- PLATFORM_CASE(ALDERLAKE_S, C0),
- PLATFORM_CASE(ALDERLAKE_S, D0),
- PLATFORM_CASE(ALDERLAKE_P, A0),
- PLATFORM_CASE(ALDERLAKE_P, B0),
- PLATFORM_CASE(ALDERLAKE_P, C0),
- SUBPLATFORM_CASE(ALDERLAKE_S, RPLS, D0),
- SUBPLATFORM_CASE(ALDERLAKE_P, RPLU, E0),
- SUBPLATFORM_CASE(DG2, G10, C0),
- SUBPLATFORM_CASE(DG2, G11, B1),
- SUBPLATFORM_CASE(DG2, G12, A1),
- GMDID_CASE(METEORLAKE, 1270, A0, 1300, A0),
- GMDID_CASE(METEORLAKE, 1271, A0, 1300, A0),
- GMDID_CASE(METEORLAKE, 1274, A0, 1300, A0),
- GMDID_CASE(LUNARLAKE, 2004, A0, 2000, A0),
- GMDID_CASE(LUNARLAKE, 2004, B0, 2000, A0),
- GMDID_CASE(BATTLEMAGE, 2001, A0, 1301, A1),
- GMDID_CASE(PANTHERLAKE, 3000, A0, 3000, A0),
-};
-
-static void platform_desc(const struct platform_test_case *t, char *desc)
-{
- strscpy(desc, t->name, KUNIT_PARAM_DESC_SIZE);
-}
-
-KUNIT_ARRAY_PARAM(platform, cases, platform_desc);
-
static int xe_wa_test_init(struct kunit *test)
{
- const struct platform_test_case *param = test->param_value;
- struct xe_pci_fake_data data = {
- .platform = param->platform,
- .subplatform = param->subplatform,
- .graphics_verx100 = param->graphics_verx100,
- .media_verx100 = param->media_verx100,
- .graphics_step = param->step.graphics,
- .media_step = param->step.media,
- };
+ const struct xe_pci_fake_data *param = test->param_value;
+ struct xe_pci_fake_data data = *param;
struct xe_device *xe;
struct device *dev;
int ret;
@@ -120,13 +43,6 @@ static int xe_wa_test_init(struct kunit *test)
return 0;
}
-static void xe_wa_test_exit(struct kunit *test)
-{
- struct xe_device *xe = test->priv;
-
- drm_kunit_helper_free_device(test, xe->drm.dev);
-}
-
static void xe_wa_gt(struct kunit *test)
{
struct xe_device *xe = test->priv;
@@ -144,14 +60,13 @@ static void xe_wa_gt(struct kunit *test)
}
static struct kunit_case xe_wa_tests[] = {
- KUNIT_CASE_PARAM(xe_wa_gt, platform_gen_params),
+ KUNIT_CASE_PARAM(xe_wa_gt, xe_pci_fake_data_gen_params),
{}
};
static struct kunit_suite xe_rtp_test_suite = {
.name = "xe_wa",
.init = xe_wa_test_init,
- .exit = xe_wa_test_exit,
.test_cases = xe_wa_tests,
};
diff --git a/drivers/gpu/drm/xe/xe_bb.c b/drivers/gpu/drm/xe/xe_bb.c
index feb6e013dc38..6d20229c11de 100644
--- a/drivers/gpu/drm/xe/xe_bb.c
+++ b/drivers/gpu/drm/xe/xe_bb.c
@@ -64,7 +64,7 @@ struct xe_bb *xe_bb_ccs_new(struct xe_gt *gt, u32 dwords,
enum xe_sriov_vf_ccs_rw_ctxs ctx_id)
{
struct xe_bb *bb = kmalloc(sizeof(*bb), GFP_KERNEL);
- struct xe_tile *tile = gt_to_tile(gt);
+ struct xe_device *xe = gt_to_xe(gt);
struct xe_sa_manager *bb_pool;
int err;
@@ -78,7 +78,7 @@ struct xe_bb *xe_bb_ccs_new(struct xe_gt *gt, u32 dwords,
* So, this extra DW acts as a guard here.
*/
- bb_pool = tile->sriov.vf.ccs[ctx_id].mem.ccs_bb_pool;
+ bb_pool = xe->sriov.vf.ccs.contexts[ctx_id].mem.ccs_bb_pool;
bb->bo = xe_sa_bo_new(bb_pool, 4 * (dwords + 1));
if (IS_ERR(bb->bo)) {
diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c
index 81bae7a59038..fbe81cda15c2 100644
--- a/drivers/gpu/drm/xe/xe_bo.c
+++ b/drivers/gpu/drm/xe/xe_bo.c
@@ -975,11 +975,11 @@ static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict,
* CCS meta data is migrated from TT -> SMEM. So, let us detach the
* BBs from BO as it is no longer needed.
*/
- if (IS_VF_CCS_BB_VALID(xe, bo) && old_mem_type == XE_PL_TT &&
+ if (IS_VF_CCS_READY(xe) && old_mem_type == XE_PL_TT &&
new_mem->mem_type == XE_PL_SYSTEM)
xe_sriov_vf_ccs_detach_bo(bo);
- if (IS_SRIOV_VF(xe) &&
+ if (IS_VF_CCS_READY(xe) &&
((move_lacks_source && new_mem->mem_type == XE_PL_TT) ||
(old_mem_type == XE_PL_SYSTEM && new_mem->mem_type == XE_PL_TT)) &&
handle_system_ccs)
@@ -995,7 +995,7 @@ out:
if (timeout < 0)
ret = timeout;
- if (IS_VF_CCS_BB_VALID(xe, bo))
+ if (IS_VF_CCS_READY(xe))
xe_sriov_vf_ccs_detach_bo(bo);
xe_tt_unmap_sg(xe, ttm_bo->ttm);
@@ -1142,42 +1142,47 @@ out_unref:
int xe_bo_notifier_prepare_pinned(struct xe_bo *bo)
{
struct xe_device *xe = ttm_to_xe_device(bo->ttm.bdev);
+ struct xe_validation_ctx ctx;
+ struct drm_exec exec;
struct xe_bo *backup;
int ret = 0;
- xe_bo_lock(bo, false);
+ xe_validation_guard(&ctx, &xe->val, &exec, (struct xe_val_flags) {.exclusive = true}, ret) {
+ ret = drm_exec_lock_obj(&exec, &bo->ttm.base);
+ drm_exec_retry_on_contention(&exec);
+ xe_assert(xe, !ret);
+ xe_assert(xe, !bo->backup_obj);
- xe_assert(xe, !bo->backup_obj);
+ /*
+ * Since this is called from the PM notifier we might have raced with
+ * someone unpinning this after we dropped the pinned list lock and
+ * grabbing the above bo lock.
+ */
+ if (!xe_bo_is_pinned(bo))
+ break;
- /*
- * Since this is called from the PM notifier we might have raced with
- * someone unpinning this after we dropped the pinned list lock and
- * grabbing the above bo lock.
- */
- if (!xe_bo_is_pinned(bo))
- goto out_unlock_bo;
+ if (!xe_bo_is_vram(bo))
+ break;
- if (!xe_bo_is_vram(bo))
- goto out_unlock_bo;
+ if (bo->flags & XE_BO_FLAG_PINNED_NORESTORE)
+ break;
- if (bo->flags & XE_BO_FLAG_PINNED_NORESTORE)
- goto out_unlock_bo;
+ backup = xe_bo_init_locked(xe, NULL, NULL, bo->ttm.base.resv, NULL, xe_bo_size(bo),
+ DRM_XE_GEM_CPU_CACHING_WB, ttm_bo_type_kernel,
+ XE_BO_FLAG_SYSTEM | XE_BO_FLAG_NEEDS_CPU_ACCESS |
+ XE_BO_FLAG_PINNED, &exec);
+ if (IS_ERR(backup)) {
+ drm_exec_retry_on_contention(&exec);
+ ret = PTR_ERR(backup);
+ xe_validation_retry_on_oom(&ctx, &ret);
+ break;
+ }
- backup = ___xe_bo_create_locked(xe, NULL, NULL, bo->ttm.base.resv, NULL, xe_bo_size(bo),
- DRM_XE_GEM_CPU_CACHING_WB, ttm_bo_type_kernel,
- XE_BO_FLAG_SYSTEM | XE_BO_FLAG_NEEDS_CPU_ACCESS |
- XE_BO_FLAG_PINNED);
- if (IS_ERR(backup)) {
- ret = PTR_ERR(backup);
- goto out_unlock_bo;
+ backup->parent_obj = xe_bo_get(bo); /* Released by bo_destroy */
+ ttm_bo_pin(&backup->ttm);
+ bo->backup_obj = backup;
}
- backup->parent_obj = xe_bo_get(bo); /* Released by bo_destroy */
- ttm_bo_pin(&backup->ttm);
- bo->backup_obj = backup;
-
-out_unlock_bo:
- xe_bo_unlock(bo);
return ret;
}
@@ -1203,57 +1208,12 @@ int xe_bo_notifier_unprepare_pinned(struct xe_bo *bo)
return 0;
}
-/**
- * xe_bo_evict_pinned() - Evict a pinned VRAM object to system memory
- * @bo: The buffer object to move.
- *
- * On successful completion, the object memory will be moved to system memory.
- *
- * This is needed to for special handling of pinned VRAM object during
- * suspend-resume.
- *
- * Return: 0 on success. Negative error code on failure.
- */
-int xe_bo_evict_pinned(struct xe_bo *bo)
+static int xe_bo_evict_pinned_copy(struct xe_bo *bo, struct xe_bo *backup)
{
- struct xe_device *xe = ttm_to_xe_device(bo->ttm.bdev);
- struct xe_bo *backup = bo->backup_obj;
- bool backup_created = false;
+ struct xe_device *xe = xe_bo_device(bo);
bool unmap = false;
int ret = 0;
- xe_bo_lock(bo, false);
-
- if (WARN_ON(!bo->ttm.resource)) {
- ret = -EINVAL;
- goto out_unlock_bo;
- }
-
- if (WARN_ON(!xe_bo_is_pinned(bo))) {
- ret = -EINVAL;
- goto out_unlock_bo;
- }
-
- if (!xe_bo_is_vram(bo))
- goto out_unlock_bo;
-
- if (bo->flags & XE_BO_FLAG_PINNED_NORESTORE)
- goto out_unlock_bo;
-
- if (!backup) {
- backup = ___xe_bo_create_locked(xe, NULL, NULL, bo->ttm.base.resv,
- NULL, xe_bo_size(bo),
- DRM_XE_GEM_CPU_CACHING_WB, ttm_bo_type_kernel,
- XE_BO_FLAG_SYSTEM | XE_BO_FLAG_NEEDS_CPU_ACCESS |
- XE_BO_FLAG_PINNED);
- if (IS_ERR(backup)) {
- ret = PTR_ERR(backup);
- goto out_unlock_bo;
- }
- backup->parent_obj = xe_bo_get(bo); /* Released by bo_destroy */
- backup_created = true;
- }
-
if (xe_bo_is_user(bo) || (bo->flags & XE_BO_FLAG_PINNED_LATE_RESTORE)) {
struct xe_migrate *migrate;
struct dma_fence *fence;
@@ -1263,14 +1223,11 @@ int xe_bo_evict_pinned(struct xe_bo *bo)
else
migrate = mem_type_to_migrate(xe, bo->ttm.resource->mem_type);
+ xe_assert(xe, bo->ttm.base.resv == backup->ttm.base.resv);
ret = dma_resv_reserve_fences(bo->ttm.base.resv, 1);
if (ret)
goto out_backup;
- ret = dma_resv_reserve_fences(backup->ttm.base.resv, 1);
- if (ret)
- goto out_backup;
-
fence = xe_migrate_copy(migrate, bo, backup, bo->ttm.resource,
backup->ttm.resource, false);
if (IS_ERR(fence)) {
@@ -1280,8 +1237,6 @@ int xe_bo_evict_pinned(struct xe_bo *bo)
dma_resv_add_fence(bo->ttm.base.resv, fence,
DMA_RESV_USAGE_KERNEL);
- dma_resv_add_fence(backup->ttm.base.resv, fence,
- DMA_RESV_USAGE_KERNEL);
dma_fence_put(fence);
} else {
ret = xe_bo_vmap(backup);
@@ -1291,7 +1246,7 @@ int xe_bo_evict_pinned(struct xe_bo *bo)
if (iosys_map_is_null(&bo->vmap)) {
ret = xe_bo_vmap(bo);
if (ret)
- goto out_backup;
+ goto out_vunmap;
unmap = true;
}
@@ -1301,15 +1256,78 @@ int xe_bo_evict_pinned(struct xe_bo *bo)
if (!bo->backup_obj)
bo->backup_obj = backup;
-
-out_backup:
+out_vunmap:
xe_bo_vunmap(backup);
- if (ret && backup_created)
- xe_bo_put(backup);
-out_unlock_bo:
+out_backup:
if (unmap)
xe_bo_vunmap(bo);
- xe_bo_unlock(bo);
+
+ return ret;
+}
+
+/**
+ * xe_bo_evict_pinned() - Evict a pinned VRAM object to system memory
+ * @bo: The buffer object to move.
+ *
+ * On successful completion, the object memory will be moved to system memory.
+ *
+ * This is needed to for special handling of pinned VRAM object during
+ * suspend-resume.
+ *
+ * Return: 0 on success. Negative error code on failure.
+ */
+int xe_bo_evict_pinned(struct xe_bo *bo)
+{
+ struct xe_device *xe = ttm_to_xe_device(bo->ttm.bdev);
+ struct xe_validation_ctx ctx;
+ struct drm_exec exec;
+ struct xe_bo *backup = bo->backup_obj;
+ bool backup_created = false;
+ int ret = 0;
+
+ xe_validation_guard(&ctx, &xe->val, &exec, (struct xe_val_flags) {.exclusive = true}, ret) {
+ ret = drm_exec_lock_obj(&exec, &bo->ttm.base);
+ drm_exec_retry_on_contention(&exec);
+ xe_assert(xe, !ret);
+
+ if (WARN_ON(!bo->ttm.resource)) {
+ ret = -EINVAL;
+ break;
+ }
+
+ if (WARN_ON(!xe_bo_is_pinned(bo))) {
+ ret = -EINVAL;
+ break;
+ }
+
+ if (!xe_bo_is_vram(bo))
+ break;
+
+ if (bo->flags & XE_BO_FLAG_PINNED_NORESTORE)
+ break;
+
+ if (!backup) {
+ backup = xe_bo_init_locked(xe, NULL, NULL, bo->ttm.base.resv, NULL,
+ xe_bo_size(bo),
+ DRM_XE_GEM_CPU_CACHING_WB, ttm_bo_type_kernel,
+ XE_BO_FLAG_SYSTEM | XE_BO_FLAG_NEEDS_CPU_ACCESS |
+ XE_BO_FLAG_PINNED, &exec);
+ if (IS_ERR(backup)) {
+ drm_exec_retry_on_contention(&exec);
+ ret = PTR_ERR(backup);
+ xe_validation_retry_on_oom(&ctx, &ret);
+ break;
+ }
+ backup->parent_obj = xe_bo_get(bo); /* Released by bo_destroy */
+ backup_created = true;
+ }
+
+ ret = xe_bo_evict_pinned_copy(bo, backup);
+ }
+
+ if (ret && backup_created)
+ xe_bo_put(backup);
+
return ret;
}
@@ -1359,10 +1377,6 @@ int xe_bo_restore_pinned(struct xe_bo *bo)
if (ret)
goto out_unlock_bo;
- ret = dma_resv_reserve_fences(backup->ttm.base.resv, 1);
- if (ret)
- goto out_unlock_bo;
-
fence = xe_migrate_copy(migrate, backup, bo,
backup->ttm.resource, bo->ttm.resource,
false);
@@ -1373,8 +1387,6 @@ int xe_bo_restore_pinned(struct xe_bo *bo)
dma_resv_add_fence(bo->ttm.base.resv, fence,
DMA_RESV_USAGE_KERNEL);
- dma_resv_add_fence(backup->ttm.base.resv, fence,
- DMA_RESV_USAGE_KERNEL);
dma_fence_put(fence);
} else {
ret = xe_bo_vmap(backup);
@@ -1530,7 +1542,7 @@ static void xe_ttm_bo_delete_mem_notify(struct ttm_buffer_object *ttm_bo)
if (!xe_bo_is_xe_bo(ttm_bo))
return;
- if (IS_VF_CCS_BB_VALID(ttm_to_xe_device(ttm_bo->bdev), bo))
+ if (IS_VF_CCS_READY(ttm_to_xe_device(ttm_bo->bdev)))
xe_sriov_vf_ccs_detach_bo(bo);
/*
@@ -1726,65 +1738,246 @@ static bool should_migrate_to_smem(struct xe_bo *bo)
bo->attr.atomic_access == DRM_XE_ATOMIC_CPU;
}
-static vm_fault_t xe_gem_fault(struct vm_fault *vmf)
+static int xe_bo_wait_usage_kernel(struct xe_bo *bo, struct ttm_operation_ctx *ctx)
+{
+ long lerr;
+
+ if (ctx->no_wait_gpu)
+ return dma_resv_test_signaled(bo->ttm.base.resv, DMA_RESV_USAGE_KERNEL) ?
+ 0 : -EBUSY;
+
+ lerr = dma_resv_wait_timeout(bo->ttm.base.resv, DMA_RESV_USAGE_KERNEL,
+ ctx->interruptible, MAX_SCHEDULE_TIMEOUT);
+ if (lerr < 0)
+ return lerr;
+ if (lerr == 0)
+ return -EBUSY;
+
+ return 0;
+}
+
+/* Populate the bo if swapped out, or migrate if the access mode requires that. */
+static int xe_bo_fault_migrate(struct xe_bo *bo, struct ttm_operation_ctx *ctx,
+ struct drm_exec *exec)
+{
+ struct ttm_buffer_object *tbo = &bo->ttm;
+ int err = 0;
+
+ if (ttm_manager_type(tbo->bdev, tbo->resource->mem_type)->use_tt) {
+ err = xe_bo_wait_usage_kernel(bo, ctx);
+ if (!err)
+ err = ttm_bo_populate(&bo->ttm, ctx);
+ } else if (should_migrate_to_smem(bo)) {
+ xe_assert(xe_bo_device(bo), bo->flags & XE_BO_FLAG_SYSTEM);
+ err = xe_bo_migrate(bo, XE_PL_TT, ctx, exec);
+ }
+
+ return err;
+}
+
+/* Call into TTM to populate PTEs, and register bo for PTE removal on runtime suspend. */
+static vm_fault_t __xe_bo_cpu_fault(struct vm_fault *vmf, struct xe_device *xe, struct xe_bo *bo)
+{
+ vm_fault_t ret;
+
+ trace_xe_bo_cpu_fault(bo);
+
+ ret = ttm_bo_vm_fault_reserved(vmf, vmf->vma->vm_page_prot,
+ TTM_BO_VM_NUM_PREFAULT);
+ /*
+ * When TTM is actually called to insert PTEs, ensure no blocking conditions
+ * remain, in which case TTM may drop locks and return VM_FAULT_RETRY.
+ */
+ xe_assert(xe, ret != VM_FAULT_RETRY);
+
+ if (ret == VM_FAULT_NOPAGE &&
+ mem_type_is_vram(bo->ttm.resource->mem_type)) {
+ mutex_lock(&xe->mem_access.vram_userfault.lock);
+ if (list_empty(&bo->vram_userfault_link))
+ list_add(&bo->vram_userfault_link,
+ &xe->mem_access.vram_userfault.list);
+ mutex_unlock(&xe->mem_access.vram_userfault.lock);
+ }
+
+ return ret;
+}
+
+static vm_fault_t xe_err_to_fault_t(int err)
+{
+ switch (err) {
+ case 0:
+ case -EINTR:
+ case -ERESTARTSYS:
+ case -EAGAIN:
+ return VM_FAULT_NOPAGE;
+ case -ENOMEM:
+ case -ENOSPC:
+ return VM_FAULT_OOM;
+ default:
+ break;
+ }
+ return VM_FAULT_SIGBUS;
+}
+
+static bool xe_ttm_bo_is_imported(struct ttm_buffer_object *tbo)
+{
+ dma_resv_assert_held(tbo->base.resv);
+
+ return tbo->ttm &&
+ (tbo->ttm->page_flags & (TTM_TT_FLAG_EXTERNAL | TTM_TT_FLAG_EXTERNAL_MAPPABLE)) ==
+ TTM_TT_FLAG_EXTERNAL;
+}
+
+static vm_fault_t xe_bo_cpu_fault_fastpath(struct vm_fault *vmf, struct xe_device *xe,
+ struct xe_bo *bo, bool needs_rpm)
+{
+ struct ttm_buffer_object *tbo = &bo->ttm;
+ vm_fault_t ret = VM_FAULT_RETRY;
+ struct xe_validation_ctx ctx;
+ struct ttm_operation_ctx tctx = {
+ .interruptible = true,
+ .no_wait_gpu = true,
+ .gfp_retry_mayfail = true,
+
+ };
+ int err;
+
+ if (needs_rpm && !xe_pm_runtime_get_if_active(xe))
+ return VM_FAULT_RETRY;
+
+ err = xe_validation_ctx_init(&ctx, &xe->val, NULL,
+ (struct xe_val_flags) {
+ .interruptible = true,
+ .no_block = true
+ });
+ if (err)
+ goto out_pm;
+
+ if (!dma_resv_trylock(tbo->base.resv))
+ goto out_validation;
+
+ if (xe_ttm_bo_is_imported(tbo)) {
+ ret = VM_FAULT_SIGBUS;
+ drm_dbg(&xe->drm, "CPU trying to access an imported buffer object.\n");
+ goto out_unlock;
+ }
+
+ err = xe_bo_fault_migrate(bo, &tctx, NULL);
+ if (err) {
+ /* Return VM_FAULT_RETRY on these errors. */
+ if (err != -ENOMEM && err != -ENOSPC && err != -EBUSY)
+ ret = xe_err_to_fault_t(err);
+ goto out_unlock;
+ }
+
+ if (dma_resv_test_signaled(bo->ttm.base.resv, DMA_RESV_USAGE_KERNEL))
+ ret = __xe_bo_cpu_fault(vmf, xe, bo);
+
+out_unlock:
+ dma_resv_unlock(tbo->base.resv);
+out_validation:
+ xe_validation_ctx_fini(&ctx);
+out_pm:
+ if (needs_rpm)
+ xe_pm_runtime_put(xe);
+
+ return ret;
+}
+
+static vm_fault_t xe_bo_cpu_fault(struct vm_fault *vmf)
{
struct ttm_buffer_object *tbo = vmf->vma->vm_private_data;
struct drm_device *ddev = tbo->base.dev;
struct xe_device *xe = to_xe_device(ddev);
struct xe_bo *bo = ttm_to_xe_bo(tbo);
bool needs_rpm = bo->flags & XE_BO_FLAG_VRAM_MASK;
+ bool retry_after_wait = false;
+ struct xe_validation_ctx ctx;
+ struct drm_exec exec;
vm_fault_t ret;
- int idx, r = 0;
+ int err = 0;
+ int idx;
+
+ if (!drm_dev_enter(&xe->drm, &idx))
+ return ttm_bo_vm_dummy_page(vmf, vmf->vma->vm_page_prot);
+
+ ret = xe_bo_cpu_fault_fastpath(vmf, xe, bo, needs_rpm);
+ if (ret != VM_FAULT_RETRY)
+ goto out;
+
+ if (fault_flag_allow_retry_first(vmf->flags)) {
+ if (vmf->flags & FAULT_FLAG_RETRY_NOWAIT)
+ goto out;
+ retry_after_wait = true;
+ xe_bo_get(bo);
+ mmap_read_unlock(vmf->vma->vm_mm);
+ } else {
+ ret = VM_FAULT_NOPAGE;
+ }
+
+ /*
+ * The fastpath failed and we were not required to return and retry immediately.
+ * We're now running in one of two modes:
+ *
+ * 1) retry_after_wait == true: The mmap_read_lock() is dropped, and we're trying
+ * to resolve blocking waits. But we can't resolve the fault since the
+ * mmap_read_lock() is dropped. After retrying the fault, the aim is that the fastpath
+ * should succeed. But it may fail since we drop the bo lock.
+ *
+ * 2) retry_after_wait == false: The fastpath failed, typically even after
+ * a retry. Do whatever's necessary to resolve the fault.
+ *
+ * This construct is recommended to avoid excessive waits under the mmap_lock.
+ */
if (needs_rpm)
xe_pm_runtime_get(xe);
- ret = ttm_bo_vm_reserve(tbo, vmf);
- if (ret)
- goto out;
+ xe_validation_guard(&ctx, &xe->val, &exec, (struct xe_val_flags) {.interruptible = true},
+ err) {
+ struct ttm_operation_ctx tctx = {
+ .interruptible = true,
+ .no_wait_gpu = false,
+ .gfp_retry_mayfail = retry_after_wait,
+ };
- if (drm_dev_enter(ddev, &idx)) {
- trace_xe_bo_cpu_fault(bo);
+ err = drm_exec_lock_obj(&exec, &tbo->base);
+ drm_exec_retry_on_contention(&exec);
+ if (err)
+ break;
- if (should_migrate_to_smem(bo)) {
- xe_assert(xe, bo->flags & XE_BO_FLAG_SYSTEM);
+ if (xe_ttm_bo_is_imported(tbo)) {
+ err = -EFAULT;
+ drm_dbg(&xe->drm, "CPU trying to access an imported buffer object.\n");
+ break;
+ }
- r = xe_bo_migrate(bo, XE_PL_TT);
- if (r == -EBUSY || r == -ERESTARTSYS || r == -EINTR)
- ret = VM_FAULT_NOPAGE;
- else if (r)
- ret = VM_FAULT_SIGBUS;
+ err = xe_bo_fault_migrate(bo, &tctx, &exec);
+ if (err) {
+ drm_exec_retry_on_contention(&exec);
+ xe_validation_retry_on_oom(&ctx, &err);
+ break;
}
- if (!ret)
- ret = ttm_bo_vm_fault_reserved(vmf,
- vmf->vma->vm_page_prot,
- TTM_BO_VM_NUM_PREFAULT);
- drm_dev_exit(idx);
- if (ret == VM_FAULT_RETRY &&
- !(vmf->flags & FAULT_FLAG_RETRY_NOWAIT))
- goto out;
+ err = xe_bo_wait_usage_kernel(bo, &tctx);
+ if (err)
+ break;
- /*
- * ttm_bo_vm_reserve() already has dma_resv_lock.
- */
- if (ret == VM_FAULT_NOPAGE &&
- mem_type_is_vram(tbo->resource->mem_type)) {
- mutex_lock(&xe->mem_access.vram_userfault.lock);
- if (list_empty(&bo->vram_userfault_link))
- list_add(&bo->vram_userfault_link,
- &xe->mem_access.vram_userfault.list);
- mutex_unlock(&xe->mem_access.vram_userfault.lock);
- }
- } else {
- ret = ttm_bo_vm_dummy_page(vmf, vmf->vma->vm_page_prot);
+ if (!retry_after_wait)
+ ret = __xe_bo_cpu_fault(vmf, xe, bo);
}
+ /* if retry_after_wait == true, we *must* return VM_FAULT_RETRY. */
+ if (err && !retry_after_wait)
+ ret = xe_err_to_fault_t(err);
- dma_resv_unlock(tbo->base.resv);
-out:
if (needs_rpm)
xe_pm_runtime_put(xe);
+ if (retry_after_wait)
+ xe_bo_put(bo);
+out:
+ drm_dev_exit(idx);
+
return ret;
}
@@ -1828,7 +2021,7 @@ int xe_bo_read(struct xe_bo *bo, u64 offset, void *dst, int size)
}
static const struct vm_operations_struct xe_gem_vm_ops = {
- .fault = xe_gem_fault,
+ .fault = xe_bo_cpu_fault,
.open = ttm_bo_vm_open,
.close = ttm_bo_vm_close,
.access = xe_bo_vm_access,
@@ -1876,11 +2069,32 @@ void xe_bo_free(struct xe_bo *bo)
kfree(bo);
}
-struct xe_bo *___xe_bo_create_locked(struct xe_device *xe, struct xe_bo *bo,
- struct xe_tile *tile, struct dma_resv *resv,
- struct ttm_lru_bulk_move *bulk, size_t size,
- u16 cpu_caching, enum ttm_bo_type type,
- u32 flags)
+/**
+ * xe_bo_init_locked() - Initialize or create an xe_bo.
+ * @xe: The xe device.
+ * @bo: An already allocated buffer object or NULL
+ * if the function should allocate a new one.
+ * @tile: The tile to select for migration of this bo, and the tile used for
+ * GGTT binding if any. Only to be non-NULL for ttm_bo_type_kernel bos.
+ * @resv: Pointer to a locked shared reservation object to use fo this bo,
+ * or NULL for the xe_bo to use its own.
+ * @bulk: The bulk move to use for LRU bumping, or NULL for external bos.
+ * @size: The storage size to use for the bo.
+ * @cpu_caching: The cpu caching used for system memory backing store.
+ * @type: The TTM buffer object type.
+ * @flags: XE_BO_FLAG_ flags.
+ * @exec: The drm_exec transaction to use for exhaustive eviction.
+ *
+ * Initialize or create an xe buffer object. On failure, any allocated buffer
+ * object passed in @bo will have been unreferenced.
+ *
+ * Return: The buffer object on success. Negative error pointer on failure.
+ */
+struct xe_bo *xe_bo_init_locked(struct xe_device *xe, struct xe_bo *bo,
+ struct xe_tile *tile, struct dma_resv *resv,
+ struct ttm_lru_bulk_move *bulk, size_t size,
+ u16 cpu_caching, enum ttm_bo_type type,
+ u32 flags, struct drm_exec *exec)
{
struct ttm_operation_ctx ctx = {
.interruptible = true,
@@ -1949,6 +2163,7 @@ struct xe_bo *___xe_bo_create_locked(struct xe_device *xe, struct xe_bo *bo,
ctx.resv = resv;
}
+ xe_validation_assert_exec(xe, exec, &bo->ttm.base);
if (!(flags & XE_BO_FLAG_FIXED_PLACEMENT)) {
err = __xe_bo_placement_for_flags(xe, bo, bo->flags);
if (WARN_ON(err)) {
@@ -2050,7 +2265,7 @@ __xe_bo_create_locked(struct xe_device *xe,
struct xe_tile *tile, struct xe_vm *vm,
size_t size, u64 start, u64 end,
u16 cpu_caching, enum ttm_bo_type type, u32 flags,
- u64 alignment)
+ u64 alignment, struct drm_exec *exec)
{
struct xe_bo *bo = NULL;
int err;
@@ -2071,11 +2286,11 @@ __xe_bo_create_locked(struct xe_device *xe,
}
}
- bo = ___xe_bo_create_locked(xe, bo, tile, vm ? xe_vm_resv(vm) : NULL,
- vm && !xe_vm_in_fault_mode(vm) &&
- flags & XE_BO_FLAG_USER ?
- &vm->lru_bulk_move : NULL, size,
- cpu_caching, type, flags);
+ bo = xe_bo_init_locked(xe, bo, tile, vm ? xe_vm_resv(vm) : NULL,
+ vm && !xe_vm_in_fault_mode(vm) &&
+ flags & XE_BO_FLAG_USER ?
+ &vm->lru_bulk_move : NULL, size,
+ cpu_caching, type, flags, exec);
if (IS_ERR(bo))
return bo;
@@ -2109,9 +2324,10 @@ __xe_bo_create_locked(struct xe_device *xe,
if (flags & XE_BO_FLAG_FIXED_PLACEMENT) {
err = xe_ggtt_insert_bo_at(t->mem.ggtt, bo,
- start + xe_bo_size(bo), U64_MAX);
+ start + xe_bo_size(bo), U64_MAX,
+ exec);
} else {
- err = xe_ggtt_insert_bo(t->mem.ggtt, bo);
+ err = xe_ggtt_insert_bo(t->mem.ggtt, bo, exec);
}
if (err)
goto err_unlock_put_bo;
@@ -2128,82 +2344,166 @@ err_unlock_put_bo:
return ERR_PTR(err);
}
-struct xe_bo *
-xe_bo_create_locked_range(struct xe_device *xe,
- struct xe_tile *tile, struct xe_vm *vm,
- size_t size, u64 start, u64 end,
- enum ttm_bo_type type, u32 flags, u64 alignment)
-{
- return __xe_bo_create_locked(xe, tile, vm, size, start, end, 0, type,
- flags, alignment);
-}
-
+/**
+ * xe_bo_create_locked() - Create a BO
+ * @xe: The xe device.
+ * @tile: The tile to select for migration of this bo, and the tile used for
+ * GGTT binding if any. Only to be non-NULL for ttm_bo_type_kernel bos.
+ * @vm: The local vm or NULL for external objects.
+ * @size: The storage size to use for the bo.
+ * @type: The TTM buffer object type.
+ * @flags: XE_BO_FLAG_ flags.
+ * @exec: The drm_exec transaction to use for exhaustive eviction.
+ *
+ * Create a locked xe BO with no range- nor alignment restrictions.
+ *
+ * Return: The buffer object on success. Negative error pointer on failure.
+ */
struct xe_bo *xe_bo_create_locked(struct xe_device *xe, struct xe_tile *tile,
struct xe_vm *vm, size_t size,
- enum ttm_bo_type type, u32 flags)
+ enum ttm_bo_type type, u32 flags,
+ struct drm_exec *exec)
{
return __xe_bo_create_locked(xe, tile, vm, size, 0, ~0ULL, 0, type,
- flags, 0);
+ flags, 0, exec);
}
-struct xe_bo *xe_bo_create_user(struct xe_device *xe, struct xe_tile *tile,
- struct xe_vm *vm, size_t size,
- u16 cpu_caching,
- u32 flags)
+static struct xe_bo *xe_bo_create_novm(struct xe_device *xe, struct xe_tile *tile,
+ size_t size, u16 cpu_caching,
+ enum ttm_bo_type type, u32 flags,
+ u64 alignment, bool intr)
{
- struct xe_bo *bo = __xe_bo_create_locked(xe, tile, vm, size, 0, ~0ULL,
- cpu_caching, ttm_bo_type_device,
- flags | XE_BO_FLAG_USER, 0);
- if (!IS_ERR(bo))
- xe_bo_unlock_vm_held(bo);
+ struct xe_validation_ctx ctx;
+ struct drm_exec exec;
+ struct xe_bo *bo;
+ int ret = 0;
- return bo;
+ xe_validation_guard(&ctx, &xe->val, &exec, (struct xe_val_flags) {.interruptible = intr},
+ ret) {
+ bo = __xe_bo_create_locked(xe, tile, NULL, size, 0, ~0ULL,
+ cpu_caching, type, flags, alignment, &exec);
+ drm_exec_retry_on_contention(&exec);
+ if (IS_ERR(bo)) {
+ ret = PTR_ERR(bo);
+ xe_validation_retry_on_oom(&ctx, &ret);
+ } else {
+ xe_bo_unlock(bo);
+ }
+ }
+
+ return ret ? ERR_PTR(ret) : bo;
}
-struct xe_bo *xe_bo_create(struct xe_device *xe, struct xe_tile *tile,
- struct xe_vm *vm, size_t size,
- enum ttm_bo_type type, u32 flags)
+/**
+ * xe_bo_create_user() - Create a user BO
+ * @xe: The xe device.
+ * @vm: The local vm or NULL for external objects.
+ * @size: The storage size to use for the bo.
+ * @cpu_caching: The caching mode to be used for system backing store.
+ * @flags: XE_BO_FLAG_ flags.
+ * @exec: The drm_exec transaction to use for exhaustive eviction, or NULL
+ * if such a transaction should be initiated by the call.
+ *
+ * Create a bo on behalf of user-space.
+ *
+ * Return: The buffer object on success. Negative error pointer on failure.
+ */
+struct xe_bo *xe_bo_create_user(struct xe_device *xe,
+ struct xe_vm *vm, size_t size,
+ u16 cpu_caching,
+ u32 flags, struct drm_exec *exec)
{
- struct xe_bo *bo = xe_bo_create_locked(xe, tile, vm, size, type, flags);
+ struct xe_bo *bo;
- if (!IS_ERR(bo))
- xe_bo_unlock_vm_held(bo);
+ flags |= XE_BO_FLAG_USER;
+
+ if (vm || exec) {
+ xe_assert(xe, exec);
+ bo = __xe_bo_create_locked(xe, NULL, vm, size, 0, ~0ULL,
+ cpu_caching, ttm_bo_type_device,
+ flags, 0, exec);
+ if (!IS_ERR(bo))
+ xe_bo_unlock_vm_held(bo);
+ } else {
+ bo = xe_bo_create_novm(xe, NULL, size, cpu_caching,
+ ttm_bo_type_device, flags, 0, true);
+ }
return bo;
}
-struct xe_bo *xe_bo_create_pin_map_at(struct xe_device *xe, struct xe_tile *tile,
- struct xe_vm *vm,
- size_t size, u64 offset,
- enum ttm_bo_type type, u32 flags)
+/**
+ * xe_bo_create_pin_range_novm() - Create and pin a BO with range options.
+ * @xe: The xe device.
+ * @tile: The tile to select for migration of this bo, and the tile used for
+ * GGTT binding if any. Only to be non-NULL for ttm_bo_type_kernel bos.
+ * @size: The storage size to use for the bo.
+ * @start: Start of fixed VRAM range or 0.
+ * @end: End of fixed VRAM range or ~0ULL.
+ * @type: The TTM buffer object type.
+ * @flags: XE_BO_FLAG_ flags.
+ *
+ * Create an Xe BO with range- and options. If @start and @end indicate
+ * a fixed VRAM range, this must be a ttm_bo_type_kernel bo with VRAM placement
+ * only.
+ *
+ * Return: The buffer object on success. Negative error pointer on failure.
+ */
+struct xe_bo *xe_bo_create_pin_range_novm(struct xe_device *xe, struct xe_tile *tile,
+ size_t size, u64 start, u64 end,
+ enum ttm_bo_type type, u32 flags)
{
- return xe_bo_create_pin_map_at_aligned(xe, tile, vm, size, offset,
- type, flags, 0);
+ struct xe_validation_ctx ctx;
+ struct drm_exec exec;
+ struct xe_bo *bo;
+ int err = 0;
+
+ xe_validation_guard(&ctx, &xe->val, &exec, (struct xe_val_flags) {}, err) {
+ bo = __xe_bo_create_locked(xe, tile, NULL, size, start, end,
+ 0, type, flags, 0, &exec);
+ if (IS_ERR(bo)) {
+ drm_exec_retry_on_contention(&exec);
+ err = PTR_ERR(bo);
+ xe_validation_retry_on_oom(&ctx, &err);
+ break;
+ }
+
+ err = xe_bo_pin(bo, &exec);
+ xe_bo_unlock(bo);
+ if (err) {
+ xe_bo_put(bo);
+ drm_exec_retry_on_contention(&exec);
+ xe_validation_retry_on_oom(&ctx, &err);
+ break;
+ }
+ }
+
+ return err ? ERR_PTR(err) : bo;
}
-struct xe_bo *xe_bo_create_pin_map_at_aligned(struct xe_device *xe,
- struct xe_tile *tile,
- struct xe_vm *vm,
- size_t size, u64 offset,
- enum ttm_bo_type type, u32 flags,
- u64 alignment)
+static struct xe_bo *xe_bo_create_pin_map_at_aligned(struct xe_device *xe,
+ struct xe_tile *tile,
+ struct xe_vm *vm,
+ size_t size, u64 offset,
+ enum ttm_bo_type type, u32 flags,
+ u64 alignment, struct drm_exec *exec)
{
struct xe_bo *bo;
int err;
u64 start = offset == ~0ull ? 0 : offset;
- u64 end = offset == ~0ull ? offset : start + size;
+ u64 end = offset == ~0ull ? ~0ull : start + size;
if (flags & XE_BO_FLAG_STOLEN &&
xe_ttm_stolen_cpu_access_needs_ggtt(xe))
flags |= XE_BO_FLAG_GGTT;
- bo = xe_bo_create_locked_range(xe, tile, vm, size, start, end, type,
- flags | XE_BO_FLAG_NEEDS_CPU_ACCESS | XE_BO_FLAG_PINNED,
- alignment);
+ bo = __xe_bo_create_locked(xe, tile, vm, size, start, end, 0, type,
+ flags | XE_BO_FLAG_NEEDS_CPU_ACCESS | XE_BO_FLAG_PINNED,
+ alignment, exec);
if (IS_ERR(bo))
return bo;
- err = xe_bo_pin(bo);
+ err = xe_bo_pin(bo, exec);
if (err)
goto err_put;
@@ -2223,11 +2523,100 @@ err_put:
return ERR_PTR(err);
}
+/**
+ * xe_bo_create_pin_map_at_novm() - Create pinned and mapped bo at optional VRAM offset
+ * @xe: The xe device.
+ * @tile: The tile to select for migration of this bo, and the tile used for
+ * GGTT binding if any. Only to be non-NULL for ttm_bo_type_kernel bos.
+ * @size: The storage size to use for the bo.
+ * @offset: Optional VRAM offset or %~0ull for don't care.
+ * @type: The TTM buffer object type.
+ * @flags: XE_BO_FLAG_ flags.
+ * @alignment: GGTT alignment.
+ * @intr: Whether to execute any waits for backing store interruptible.
+ *
+ * Create a pinned and optionally mapped bo with VRAM offset and GGTT alignment
+ * options. The bo will be external and not associated with a VM.
+ *
+ * Return: The buffer object on success. Negative error pointer on failure.
+ * In particular, the function may return ERR_PTR(%-EINTR) if @intr was set
+ * to true on entry.
+ */
+struct xe_bo *
+xe_bo_create_pin_map_at_novm(struct xe_device *xe, struct xe_tile *tile,
+ size_t size, u64 offset, enum ttm_bo_type type, u32 flags,
+ u64 alignment, bool intr)
+{
+ struct xe_validation_ctx ctx;
+ struct drm_exec exec;
+ struct xe_bo *bo;
+ int ret = 0;
+
+ xe_validation_guard(&ctx, &xe->val, &exec, (struct xe_val_flags) {.interruptible = intr},
+ ret) {
+ bo = xe_bo_create_pin_map_at_aligned(xe, tile, NULL, size, offset,
+ type, flags, alignment, &exec);
+ if (IS_ERR(bo)) {
+ drm_exec_retry_on_contention(&exec);
+ ret = PTR_ERR(bo);
+ xe_validation_retry_on_oom(&ctx, &ret);
+ }
+ }
+
+ return ret ? ERR_PTR(ret) : bo;
+}
+
+/**
+ * xe_bo_create_pin_map() - Create pinned and mapped bo
+ * @xe: The xe device.
+ * @tile: The tile to select for migration of this bo, and the tile used for
+ * @vm: The vm to associate the buffer object with. The vm's resv must be locked
+ * with the transaction represented by @exec.
+ * GGTT binding if any. Only to be non-NULL for ttm_bo_type_kernel bos.
+ * @size: The storage size to use for the bo.
+ * @type: The TTM buffer object type.
+ * @flags: XE_BO_FLAG_ flags.
+ * @exec: The drm_exec transaction to use for exhaustive eviction, and
+ * previously used for locking @vm's resv.
+ *
+ * Create a pinned and mapped bo. The bo will be external and not associated
+ * with a VM.
+ *
+ * Return: The buffer object on success. Negative error pointer on failure.
+ * In particular, the function may return ERR_PTR(%-EINTR) if @exec was
+ * configured for interruptible locking.
+ */
struct xe_bo *xe_bo_create_pin_map(struct xe_device *xe, struct xe_tile *tile,
struct xe_vm *vm, size_t size,
- enum ttm_bo_type type, u32 flags)
+ enum ttm_bo_type type, u32 flags,
+ struct drm_exec *exec)
{
- return xe_bo_create_pin_map_at(xe, tile, vm, size, ~0ull, type, flags);
+ return xe_bo_create_pin_map_at_aligned(xe, tile, vm, size, ~0ull, type, flags,
+ 0, exec);
+}
+
+/**
+ * xe_bo_create_pin_map_novm() - Create pinned and mapped bo
+ * @xe: The xe device.
+ * @tile: The tile to select for migration of this bo, and the tile used for
+ * GGTT binding if any. Only to be non-NULL for ttm_bo_type_kernel bos.
+ * @size: The storage size to use for the bo.
+ * @type: The TTM buffer object type.
+ * @flags: XE_BO_FLAG_ flags.
+ * @intr: Whether to execut any waits for backing store interruptible.
+ *
+ * Create a pinned and mapped bo. The bo will be external and not associated
+ * with a VM.
+ *
+ * Return: The buffer object on success. Negative error pointer on failure.
+ * In particular, the function may return ERR_PTR(%-EINTR) if @intr was set
+ * to true on entry.
+ */
+struct xe_bo *xe_bo_create_pin_map_novm(struct xe_device *xe, struct xe_tile *tile,
+ size_t size, enum ttm_bo_type type, u32 flags,
+ bool intr)
+{
+ return xe_bo_create_pin_map_at_novm(xe, tile, size, ~0ull, type, flags, 0, intr);
}
static void __xe_bo_unpin_map_no_vm(void *arg)
@@ -2242,8 +2631,7 @@ struct xe_bo *xe_managed_bo_create_pin_map(struct xe_device *xe, struct xe_tile
int ret;
KUNIT_STATIC_STUB_REDIRECT(xe_managed_bo_create_pin_map, xe, tile, size, flags);
-
- bo = xe_bo_create_pin_map(xe, tile, NULL, size, ttm_bo_type_kernel, flags);
+ bo = xe_bo_create_pin_map_novm(xe, tile, size, ttm_bo_type_kernel, flags, true);
if (IS_ERR(bo))
return bo;
@@ -2254,6 +2642,11 @@ struct xe_bo *xe_managed_bo_create_pin_map(struct xe_device *xe, struct xe_tile
return bo;
}
+void xe_managed_bo_unpin_map_no_vm(struct xe_bo *bo)
+{
+ devm_release_action(xe_bo_device(bo)->drm.dev, __xe_bo_unpin_map_no_vm, bo);
+}
+
struct xe_bo *xe_managed_bo_create_from_data(struct xe_device *xe, struct xe_tile *tile,
const void *data, size_t size, u32 flags)
{
@@ -2326,6 +2719,7 @@ uint64_t vram_region_gpu_offset(struct ttm_resource *res)
* xe_bo_pin_external - pin an external BO
* @bo: buffer object to be pinned
* @in_place: Pin in current placement, don't attempt to migrate.
+ * @exec: The drm_exec transaction to use for exhaustive eviction.
*
* Pin an external (not tied to a VM, can be exported via dma-buf / prime FD)
* BO. Unique call compared to xe_bo_pin as this function has it own set of
@@ -2333,7 +2727,7 @@ uint64_t vram_region_gpu_offset(struct ttm_resource *res)
*
* Returns 0 for success, negative error code otherwise.
*/
-int xe_bo_pin_external(struct xe_bo *bo, bool in_place)
+int xe_bo_pin_external(struct xe_bo *bo, bool in_place, struct drm_exec *exec)
{
struct xe_device *xe = xe_bo_device(bo);
int err;
@@ -2343,7 +2737,7 @@ int xe_bo_pin_external(struct xe_bo *bo, bool in_place)
if (!xe_bo_is_pinned(bo)) {
if (!in_place) {
- err = xe_bo_validate(bo, NULL, false);
+ err = xe_bo_validate(bo, NULL, false, exec);
if (err)
return err;
}
@@ -2366,7 +2760,17 @@ int xe_bo_pin_external(struct xe_bo *bo, bool in_place)
return 0;
}
-int xe_bo_pin(struct xe_bo *bo)
+/**
+ * xe_bo_pin() - Pin a kernel bo after potentially migrating it
+ * @bo: The kernel bo to pin.
+ * @exec: The drm_exec transaction to use for exhaustive eviction.
+ *
+ * Attempts to migrate a bo to @bo->placement. If that succeeds,
+ * pins the bo.
+ *
+ * Return: %0 on success, negative error code on migration failure.
+ */
+int xe_bo_pin(struct xe_bo *bo, struct drm_exec *exec)
{
struct ttm_place *place = &bo->placements[0];
struct xe_device *xe = xe_bo_device(bo);
@@ -2388,7 +2792,7 @@ int xe_bo_pin(struct xe_bo *bo)
/* We only expect at most 1 pin */
xe_assert(xe, !xe_bo_is_pinned(bo));
- err = xe_bo_validate(bo, NULL, false);
+ err = xe_bo_validate(bo, NULL, false, exec);
if (err)
return err;
@@ -2481,6 +2885,7 @@ void xe_bo_unpin(struct xe_bo *bo)
* NULL. Used together with @allow_res_evict.
* @allow_res_evict: Whether it's allowed to evict bos sharing @vm's
* reservation object.
+ * @exec: The drm_exec transaction to use for exhaustive eviction.
*
* Make sure the bo is in allowed placement, migrating it if necessary. If
* needed, other bos will be evicted. If bos selected for eviction shares
@@ -2490,7 +2895,8 @@ void xe_bo_unpin(struct xe_bo *bo)
* Return: 0 on success, negative error code on failure. May return
* -EINTR or -ERESTARTSYS if internal waits are interrupted by a signal.
*/
-int xe_bo_validate(struct xe_bo *bo, struct xe_vm *vm, bool allow_res_evict)
+int xe_bo_validate(struct xe_bo *bo, struct xe_vm *vm, bool allow_res_evict,
+ struct drm_exec *exec)
{
struct ttm_operation_ctx ctx = {
.interruptible = true,
@@ -2512,6 +2918,7 @@ int xe_bo_validate(struct xe_bo *bo, struct xe_vm *vm, bool allow_res_evict)
xe_vm_set_validating(vm, allow_res_evict);
trace_xe_bo_validate(bo);
+ xe_validation_assert_exec(xe_bo_device(bo), exec, &bo->ttm.base);
ret = ttm_bo_validate(&bo->ttm, &bo->placement, &ctx);
xe_vm_clear_validating(vm, allow_res_evict);
@@ -2707,8 +3114,9 @@ int xe_gem_create_ioctl(struct drm_device *dev, void *data,
struct xe_device *xe = to_xe_device(dev);
struct xe_file *xef = to_xe_file(file);
struct drm_xe_gem_create *args = data;
+ struct xe_validation_ctx ctx;
+ struct drm_exec exec;
struct xe_vm *vm = NULL;
- ktime_t end = 0;
struct xe_bo *bo;
unsigned int bo_flags;
u32 handle;
@@ -2782,25 +3190,26 @@ int xe_gem_create_ioctl(struct drm_device *dev, void *data,
return -ENOENT;
}
-retry:
- if (vm) {
- err = xe_vm_lock(vm, true);
- if (err)
- goto out_vm;
+ err = 0;
+ xe_validation_guard(&ctx, &xe->val, &exec, (struct xe_val_flags) {.interruptible = true},
+ err) {
+ if (vm) {
+ err = xe_vm_drm_exec_lock(vm, &exec);
+ drm_exec_retry_on_contention(&exec);
+ if (err)
+ break;
+ }
+ bo = xe_bo_create_user(xe, vm, args->size, args->cpu_caching,
+ bo_flags, &exec);
+ drm_exec_retry_on_contention(&exec);
+ if (IS_ERR(bo)) {
+ err = PTR_ERR(bo);
+ xe_validation_retry_on_oom(&ctx, &err);
+ break;
+ }
}
-
- bo = xe_bo_create_user(xe, NULL, vm, args->size, args->cpu_caching,
- bo_flags);
-
- if (vm)
- xe_vm_unlock(vm);
-
- if (IS_ERR(bo)) {
- err = PTR_ERR(bo);
- if (xe_vm_validate_should_retry(NULL, err, &end))
- goto retry;
+ if (err)
goto out_vm;
- }
if (args->extensions) {
err = gem_create_user_extensions(xe, bo, args->extensions, 0);
@@ -2949,6 +3358,9 @@ static void xe_place_from_ttm_type(u32 mem_type, struct ttm_place *place)
* xe_bo_migrate - Migrate an object to the desired region id
* @bo: The buffer object to migrate.
* @mem_type: The TTM region type to migrate to.
+ * @tctx: A pointer to a struct ttm_operation_ctx or NULL if
+ * a default interruptibe ctx is to be used.
+ * @exec: The drm_exec transaction to use for exhaustive eviction.
*
* Attempt to migrate the buffer object to the desired memory region. The
* buffer object may not be pinned, and must be locked.
@@ -2960,7 +3372,8 @@ static void xe_place_from_ttm_type(u32 mem_type, struct ttm_place *place)
* Return: 0 on success. Negative error code on failure. In particular may
* return -EINTR or -ERESTARTSYS if signal pending.
*/
-int xe_bo_migrate(struct xe_bo *bo, u32 mem_type)
+int xe_bo_migrate(struct xe_bo *bo, u32 mem_type, struct ttm_operation_ctx *tctx,
+ struct drm_exec *exec)
{
struct xe_device *xe = ttm_to_xe_device(bo->ttm.bdev);
struct ttm_operation_ctx ctx = {
@@ -2972,6 +3385,7 @@ int xe_bo_migrate(struct xe_bo *bo, u32 mem_type)
struct ttm_place requested;
xe_bo_assert_held(bo);
+ tctx = tctx ? tctx : &ctx;
if (bo->ttm.resource->mem_type == mem_type)
return 0;
@@ -2998,19 +3412,22 @@ int xe_bo_migrate(struct xe_bo *bo, u32 mem_type)
add_vram(xe, bo, &requested, bo->flags, mem_type, &c);
}
- return ttm_bo_validate(&bo->ttm, &placement, &ctx);
+ if (!tctx->no_wait_gpu)
+ xe_validation_assert_exec(xe_bo_device(bo), exec, &bo->ttm.base);
+ return ttm_bo_validate(&bo->ttm, &placement, tctx);
}
/**
* xe_bo_evict - Evict an object to evict placement
* @bo: The buffer object to migrate.
+ * @exec: The drm_exec transaction to use for exhaustive eviction.
*
* On successful completion, the object memory will be moved to evict
* placement. This function blocks until the object has been fully moved.
*
* Return: 0 on success. Negative error code on failure.
*/
-int xe_bo_evict(struct xe_bo *bo)
+int xe_bo_evict(struct xe_bo *bo, struct drm_exec *exec)
{
struct ttm_operation_ctx ctx = {
.interruptible = false,
@@ -3169,11 +3586,11 @@ int xe_bo_dumb_create(struct drm_file *file_priv,
if (err)
return err;
- bo = xe_bo_create_user(xe, NULL, NULL, args->size,
+ bo = xe_bo_create_user(xe, NULL, args->size,
DRM_XE_GEM_CPU_CACHING_WC,
XE_BO_FLAG_VRAM_IF_DGFX(xe_device_get_root_tile(xe)) |
XE_BO_FLAG_SCANOUT |
- XE_BO_FLAG_NEEDS_CPU_ACCESS);
+ XE_BO_FLAG_NEEDS_CPU_ACCESS, NULL);
if (IS_ERR(bo))
return PTR_ERR(bo);
diff --git a/drivers/gpu/drm/xe/xe_bo.h b/drivers/gpu/drm/xe/xe_bo.h
index cfb1ec266a6d..a77af42b5f9e 100644
--- a/drivers/gpu/drm/xe/xe_bo.h
+++ b/drivers/gpu/drm/xe/xe_bo.h
@@ -10,6 +10,7 @@
#include "xe_bo_types.h"
#include "xe_macros.h"
+#include "xe_validation.h"
#include "xe_vm_types.h"
#include "xe_vm.h"
#include "xe_vram_types.h"
@@ -88,40 +89,34 @@ struct sg_table;
struct xe_bo *xe_bo_alloc(void);
void xe_bo_free(struct xe_bo *bo);
-struct xe_bo *___xe_bo_create_locked(struct xe_device *xe, struct xe_bo *bo,
- struct xe_tile *tile, struct dma_resv *resv,
- struct ttm_lru_bulk_move *bulk, size_t size,
- u16 cpu_caching, enum ttm_bo_type type,
- u32 flags);
-struct xe_bo *
-xe_bo_create_locked_range(struct xe_device *xe,
- struct xe_tile *tile, struct xe_vm *vm,
- size_t size, u64 start, u64 end,
- enum ttm_bo_type type, u32 flags, u64 alignment);
+struct xe_bo *xe_bo_init_locked(struct xe_device *xe, struct xe_bo *bo,
+ struct xe_tile *tile, struct dma_resv *resv,
+ struct ttm_lru_bulk_move *bulk, size_t size,
+ u16 cpu_caching, enum ttm_bo_type type,
+ u32 flags, struct drm_exec *exec);
struct xe_bo *xe_bo_create_locked(struct xe_device *xe, struct xe_tile *tile,
struct xe_vm *vm, size_t size,
- enum ttm_bo_type type, u32 flags);
-struct xe_bo *xe_bo_create(struct xe_device *xe, struct xe_tile *tile,
- struct xe_vm *vm, size_t size,
- enum ttm_bo_type type, u32 flags);
-struct xe_bo *xe_bo_create_user(struct xe_device *xe, struct xe_tile *tile,
- struct xe_vm *vm, size_t size,
- u16 cpu_caching,
- u32 flags);
+ enum ttm_bo_type type, u32 flags,
+ struct drm_exec *exec);
+struct xe_bo *xe_bo_create_user(struct xe_device *xe, struct xe_vm *vm, size_t size,
+ u16 cpu_caching, u32 flags, struct drm_exec *exec);
struct xe_bo *xe_bo_create_pin_map(struct xe_device *xe, struct xe_tile *tile,
struct xe_vm *vm, size_t size,
- enum ttm_bo_type type, u32 flags);
-struct xe_bo *xe_bo_create_pin_map_at(struct xe_device *xe, struct xe_tile *tile,
- struct xe_vm *vm, size_t size, u64 offset,
- enum ttm_bo_type type, u32 flags);
-struct xe_bo *xe_bo_create_pin_map_at_aligned(struct xe_device *xe,
- struct xe_tile *tile,
- struct xe_vm *vm,
- size_t size, u64 offset,
- enum ttm_bo_type type, u32 flags,
- u64 alignment);
+ enum ttm_bo_type type, u32 flags,
+ struct drm_exec *exec);
+struct xe_bo *xe_bo_create_pin_map_novm(struct xe_device *xe, struct xe_tile *tile,
+ size_t size, enum ttm_bo_type type, u32 flags,
+ bool intr);
+struct xe_bo *xe_bo_create_pin_range_novm(struct xe_device *xe, struct xe_tile *tile,
+ size_t size, u64 start, u64 end,
+ enum ttm_bo_type type, u32 flags);
+struct xe_bo *
+xe_bo_create_pin_map_at_novm(struct xe_device *xe, struct xe_tile *tile,
+ size_t size, u64 offset, enum ttm_bo_type type,
+ u32 flags, u64 alignment, bool intr);
struct xe_bo *xe_managed_bo_create_pin_map(struct xe_device *xe, struct xe_tile *tile,
size_t size, u32 flags);
+void xe_managed_bo_unpin_map_no_vm(struct xe_bo *bo);
struct xe_bo *xe_managed_bo_create_from_data(struct xe_device *xe, struct xe_tile *tile,
const void *data, size_t size, u32 flags);
int xe_managed_bo_reinit_in_vram(struct xe_device *xe, struct xe_tile *tile, struct xe_bo **src);
@@ -200,11 +195,12 @@ static inline void xe_bo_unlock_vm_held(struct xe_bo *bo)
}
}
-int xe_bo_pin_external(struct xe_bo *bo, bool in_place);
-int xe_bo_pin(struct xe_bo *bo);
+int xe_bo_pin_external(struct xe_bo *bo, bool in_place, struct drm_exec *exec);
+int xe_bo_pin(struct xe_bo *bo, struct drm_exec *exec);
void xe_bo_unpin_external(struct xe_bo *bo);
void xe_bo_unpin(struct xe_bo *bo);
-int xe_bo_validate(struct xe_bo *bo, struct xe_vm *vm, bool allow_res_evict);
+int xe_bo_validate(struct xe_bo *bo, struct xe_vm *vm, bool allow_res_evict,
+ struct drm_exec *exec);
static inline bool xe_bo_is_pinned(struct xe_bo *bo)
{
@@ -285,8 +281,9 @@ uint64_t vram_region_gpu_offset(struct ttm_resource *res);
bool xe_bo_can_migrate(struct xe_bo *bo, u32 mem_type);
-int xe_bo_migrate(struct xe_bo *bo, u32 mem_type);
-int xe_bo_evict(struct xe_bo *bo);
+int xe_bo_migrate(struct xe_bo *bo, u32 mem_type, struct ttm_operation_ctx *ctc,
+ struct drm_exec *exec);
+int xe_bo_evict(struct xe_bo *bo, struct drm_exec *exec);
int xe_bo_evict_pinned(struct xe_bo *bo);
int xe_bo_notifier_prepare_pinned(struct xe_bo *bo);
@@ -315,6 +312,21 @@ static inline size_t xe_bo_ccs_pages_start(struct xe_bo *bo)
return PAGE_ALIGN(xe_bo_size(bo));
}
+/**
+ * xe_bo_has_valid_ccs_bb - Check if CCS's BBs were setup for the BO.
+ * @bo: the &xe_bo to check
+ *
+ * The CCS's BBs should only be setup by the driver VF, but it is safe
+ * to call this function also by non-VF driver.
+ *
+ * Return: true iff the CCS's BBs are setup, false otherwise.
+ */
+static inline bool xe_bo_has_valid_ccs_bb(struct xe_bo *bo)
+{
+ return bo->bb_ccs[XE_SRIOV_VF_CCS_READ_CTX] &&
+ bo->bb_ccs[XE_SRIOV_VF_CCS_WRITE_CTX];
+}
+
static inline bool xe_bo_has_pages(struct xe_bo *bo)
{
if ((bo->ttm.ttm && ttm_tt_is_populated(bo->ttm.ttm)) ||
diff --git a/drivers/gpu/drm/xe/xe_bo_evict.c b/drivers/gpu/drm/xe/xe_bo_evict.c
index 7484ce55a303..d5dbc51e8612 100644
--- a/drivers/gpu/drm/xe/xe_bo_evict.c
+++ b/drivers/gpu/drm/xe/xe_bo_evict.c
@@ -158,8 +158,8 @@ int xe_bo_evict_all(struct xe_device *xe)
if (ret)
return ret;
- ret = xe_bo_apply_to_pinned(xe, &xe->pinned.late.kernel_bo_present,
- &xe->pinned.late.evicted, xe_bo_evict_pinned);
+ ret = xe_bo_apply_to_pinned(xe, &xe->pinned.late.external,
+ &xe->pinned.late.external, xe_bo_evict_pinned);
if (!ret)
ret = xe_bo_apply_to_pinned(xe, &xe->pinned.late.kernel_bo_present,
diff --git a/drivers/gpu/drm/xe/xe_bo_types.h b/drivers/gpu/drm/xe/xe_bo_types.h
index 314652afdca7..d4fe3c8dca5b 100644
--- a/drivers/gpu/drm/xe/xe_bo_types.h
+++ b/drivers/gpu/drm/xe/xe_bo_types.h
@@ -25,7 +25,9 @@ struct xe_vm;
/* TODO: To be selected with VM_MADVISE */
#define XE_BO_PRIORITY_NORMAL 1
-/** @xe_bo: XE buffer object */
+/**
+ * struct xe_bo - Xe buffer object
+ */
struct xe_bo {
/** @ttm: TTM base buffer object */
struct ttm_buffer_object ttm;
@@ -47,7 +49,7 @@ struct xe_bo {
struct xe_ggtt_node *ggtt_node[XE_MAX_TILES_PER_DEVICE];
/** @vmap: iosys map of this buffer */
struct iosys_map vmap;
- /** @ttm_kmap: TTM bo kmap object for internal use only. Keep off. */
+ /** @kmap: TTM bo kmap object for internal use only. Keep off. */
struct ttm_bo_kmap_obj kmap;
/** @pinned_link: link to present / evicted list of pinned BO */
struct list_head pinned_link;
@@ -82,10 +84,10 @@ struct xe_bo {
/** @created: Whether the bo has passed initial creation */
bool created;
- /** @ccs_cleared */
+ /** @ccs_cleared: true means that CCS region of BO is already cleared */
bool ccs_cleared;
- /** @bb_ccs_rw: BB instructions of CCS read/write. Valid only for VF */
+ /** @bb_ccs: BB instructions of CCS read/write. Valid only for VF */
struct xe_bb *bb_ccs[XE_SRIOV_VF_CCS_CTX_COUNT];
/**
@@ -99,9 +101,10 @@ struct xe_bo {
struct drm_pagemap_devmem devmem_allocation;
/** @vram_userfault_link: Link into @mem_access.vram_userfault.list */
- struct list_head vram_userfault_link;
+ struct list_head vram_userfault_link;
- /** @min_align: minimum alignment needed for this BO if different
+ /**
+ * @min_align: minimum alignment needed for this BO if different
* from default
*/
u64 min_align;
diff --git a/drivers/gpu/drm/xe/xe_configfs.c b/drivers/gpu/drm/xe/xe_configfs.c
index 1025d3979b06..139663423185 100644
--- a/drivers/gpu/drm/xe/xe_configfs.c
+++ b/drivers/gpu/drm/xe/xe_configfs.c
@@ -4,6 +4,7 @@
*/
#include <linux/bitops.h>
+#include <linux/ctype.h>
#include <linux/configfs.h>
#include <linux/cleanup.h>
#include <linux/find.h>
@@ -12,6 +13,7 @@
#include <linux/pci.h>
#include <linux/string.h>
+#include "instructions/xe_mi_commands.h"
#include "xe_configfs.h"
#include "xe_hw_engine_types.h"
#include "xe_module.h"
@@ -21,7 +23,7 @@
* DOC: Xe Configfs
*
* Overview
- * =========
+ * ========
*
* Configfs is a filesystem-based manager of kernel objects. XE KMD registers a
* configfs subsystem called ``xe`` that creates a directory in the mounted
@@ -34,7 +36,7 @@
*
* To create a device, the ``xe`` module should already be loaded, but some
* attributes can only be set before binding the device. It can be accomplished
- * by blocking the driver autoprobe:
+ * by blocking the driver autoprobe::
*
* # echo 0 > /sys/bus/pci/drivers_autoprobe
* # modprobe xe
@@ -115,6 +117,58 @@
*
* This attribute can only be set before binding to the device.
*
+ * Context restore BB
+ * ------------------
+ *
+ * Allow to execute a batch buffer during any context switches. When the
+ * GPU is restoring the context, it executes additional commands. It's useful
+ * for testing additional workarounds and validating certain HW behaviors: it's
+ * not intended for normal execution and will taint the kernel with TAINT_TEST
+ * when used.
+ *
+ * The syntax allows to pass straight instructions to be executed by the engine
+ * in a batch buffer or set specific registers.
+ *
+ * #. Generic instruction::
+ *
+ * <engine-class> cmd <instr> [[dword0] [dword1] [...]]
+ *
+ * #. Simple register setting::
+ *
+ * <engine-class> reg <address> <value>
+ *
+ * Commands are saved per engine class: all instances of that class will execute
+ * those commands during context switch. The instruction, dword arguments,
+ * addresses and values are in hex format like in the examples below.
+ *
+ * #. Execute a LRI command to write 0xDEADBEEF to register 0x4f10 after the
+ * normal context restore::
+ *
+ * # echo 'rcs cmd 11000001 4F100 DEADBEEF' \
+ * > /sys/kernel/config/xe/0000:03:00.0/ctx_restore_post_bb
+ *
+ * #. Execute a LRI command to write 0xDEADBEEF to register 0x4f10 at the
+ * beginning of the context restore::
+ *
+ * # echo 'rcs cmd 11000001 4F100 DEADBEEF' \
+ * > /sys/kernel/config/xe/0000:03:00.0/ctx_restore_mid_bb
+
+ * #. Load certain values in a couple of registers (it can be used as a simpler
+ * alternative to the `cmd`) action::
+ *
+ * # cat > /sys/kernel/config/xe/0000:03:00.0/ctx_restore_post_bb <<EOF
+ * rcs reg 4F100 DEADBEEF
+ * rcs reg 4F104 FFFFFFFF
+ * EOF
+ *
+ * .. note::
+ *
+ * When using multiple lines, make sure to use a command that is
+ * implemented with a single write syscall, like HEREDOC.
+ *
+ * Currently this is implemented only for post and mid context restore and
+ * these attributes can only be set before binding to the device.
+ *
* Remove devices
* ==============
*
@@ -123,17 +177,27 @@
* # rmdir /sys/kernel/config/xe/0000:03:00.0/
*/
+/* Similar to struct xe_bb, but not tied to HW (yet) */
+struct wa_bb {
+ u32 *cs;
+ u32 len; /* in dwords */
+};
+
struct xe_config_group_device {
struct config_group group;
struct xe_config_device {
u64 engines_allowed;
+ struct wa_bb ctx_restore_post_bb[XE_ENGINE_CLASS_MAX];
+ struct wa_bb ctx_restore_mid_bb[XE_ENGINE_CLASS_MAX];
bool survivability_mode;
bool enable_psmi;
} config;
/* protects attributes */
struct mutex lock;
+ /* matching descriptor */
+ const struct xe_device_desc *desc;
};
static const struct xe_config_device device_defaults = {
@@ -150,6 +214,7 @@ static void set_device_defaults(struct xe_config_device *config)
struct engine_info {
const char *cls;
u64 mask;
+ enum xe_engine_class engine_class;
};
/* Some helpful macros to aid on the sizing of buffer allocation when parsing */
@@ -157,12 +222,12 @@ struct engine_info {
#define MAX_ENGINE_INSTANCE_CHARS 2
static const struct engine_info engine_info[] = {
- { .cls = "rcs", .mask = XE_HW_ENGINE_RCS_MASK },
- { .cls = "bcs", .mask = XE_HW_ENGINE_BCS_MASK },
- { .cls = "vcs", .mask = XE_HW_ENGINE_VCS_MASK },
- { .cls = "vecs", .mask = XE_HW_ENGINE_VECS_MASK },
- { .cls = "ccs", .mask = XE_HW_ENGINE_CCS_MASK },
- { .cls = "gsccs", .mask = XE_HW_ENGINE_GSCCS_MASK },
+ { .cls = "rcs", .mask = XE_HW_ENGINE_RCS_MASK, .engine_class = XE_ENGINE_CLASS_RENDER },
+ { .cls = "bcs", .mask = XE_HW_ENGINE_BCS_MASK, .engine_class = XE_ENGINE_CLASS_COPY },
+ { .cls = "vcs", .mask = XE_HW_ENGINE_VCS_MASK, .engine_class = XE_ENGINE_CLASS_VIDEO_DECODE },
+ { .cls = "vecs", .mask = XE_HW_ENGINE_VECS_MASK, .engine_class = XE_ENGINE_CLASS_VIDEO_ENHANCE },
+ { .cls = "ccs", .mask = XE_HW_ENGINE_CCS_MASK, .engine_class = XE_ENGINE_CLASS_COMPUTE },
+ { .cls = "gsccs", .mask = XE_HW_ENGINE_GSCCS_MASK, .engine_class = XE_ENGINE_CLASS_OTHER },
};
static struct xe_config_group_device *to_xe_config_group_device(struct config_item *item)
@@ -251,7 +316,18 @@ static ssize_t engines_allowed_show(struct config_item *item, char *page)
return p - page;
}
-static bool lookup_engine_mask(const char *pattern, u64 *mask)
+/*
+ * Lookup engine_info. If @mask is not NULL, reduce the mask according to the
+ * instance in @pattern.
+ *
+ * Examples of inputs:
+ * - lookup_engine_info("rcs0", &mask): return "rcs" entry from @engine_info and
+ * mask == BIT_ULL(XE_HW_ENGINE_RCS0)
+ * - lookup_engine_info("rcs*", &mask): return "rcs" entry from @engine_info and
+ * mask == XE_HW_ENGINE_RCS_MASK
+ * - lookup_engine_info("rcs", NULL): return "rcs" entry from @engine_info
+ */
+static const struct engine_info *lookup_engine_info(const char *pattern, u64 *mask)
{
for (size_t i = 0; i < ARRAY_SIZE(engine_info); i++) {
u8 instance;
@@ -261,44 +337,62 @@ static bool lookup_engine_mask(const char *pattern, u64 *mask)
continue;
pattern += strlen(engine_info[i].cls);
+ if (!mask)
+ return *pattern ? NULL : &engine_info[i];
if (!strcmp(pattern, "*")) {
*mask = engine_info[i].mask;
- return true;
+ return &engine_info[i];
}
if (kstrtou8(pattern, 10, &instance))
- return false;
+ return NULL;
bit = __ffs64(engine_info[i].mask) + instance;
if (bit >= fls64(engine_info[i].mask))
- return false;
+ return NULL;
*mask = BIT_ULL(bit);
- return true;
+ return &engine_info[i];
}
- return false;
+ return NULL;
+}
+
+static int parse_engine(const char *s, const char *end_chars, u64 *mask,
+ const struct engine_info **pinfo)
+{
+ char buf[MAX_ENGINE_CLASS_CHARS + MAX_ENGINE_INSTANCE_CHARS + 1];
+ const struct engine_info *info;
+ size_t len;
+
+ len = strcspn(s, end_chars);
+ if (len >= sizeof(buf))
+ return -EINVAL;
+
+ memcpy(buf, s, len);
+ buf[len] = '\0';
+
+ info = lookup_engine_info(buf, mask);
+ if (!info)
+ return -ENOENT;
+
+ if (pinfo)
+ *pinfo = info;
+
+ return len;
}
static ssize_t engines_allowed_store(struct config_item *item, const char *page,
size_t len)
{
struct xe_config_group_device *dev = to_xe_config_group_device(item);
- size_t patternlen, p;
+ ssize_t patternlen, p;
u64 mask, val = 0;
for (p = 0; p < len; p += patternlen + 1) {
- char buf[MAX_ENGINE_CLASS_CHARS + MAX_ENGINE_INSTANCE_CHARS + 1];
-
- patternlen = strcspn(page + p, ",\n");
- if (patternlen >= sizeof(buf))
- return -EINVAL;
-
- memcpy(buf, page + p, patternlen);
- buf[patternlen] = '\0';
-
- if (!lookup_engine_mask(buf, &mask))
+ patternlen = parse_engine(page + p, ",\n", &mask, NULL);
+ if (patternlen < 0)
return -EINVAL;
val |= mask;
@@ -339,11 +433,250 @@ static ssize_t enable_psmi_store(struct config_item *item, const char *page, siz
return len;
}
+static bool wa_bb_read_advance(bool dereference, char **p,
+ const char *append, size_t len,
+ size_t *max_size)
+{
+ if (dereference) {
+ if (len >= *max_size)
+ return false;
+ *max_size -= len;
+ if (append)
+ memcpy(*p, append, len);
+ }
+
+ *p += len;
+
+ return true;
+}
+
+static ssize_t wa_bb_show(struct xe_config_group_device *dev,
+ struct wa_bb wa_bb[static XE_ENGINE_CLASS_MAX],
+ char *data, size_t sz)
+{
+ char *p = data;
+
+ guard(mutex)(&dev->lock);
+
+ for (size_t i = 0; i < ARRAY_SIZE(engine_info); i++) {
+ enum xe_engine_class ec = engine_info[i].engine_class;
+ size_t len;
+
+ if (!wa_bb[ec].len)
+ continue;
+
+ len = snprintf(p, sz, "%s:", engine_info[i].cls);
+ if (!wa_bb_read_advance(data, &p, NULL, len, &sz))
+ return -ENOBUFS;
+
+ for (size_t j = 0; j < wa_bb[ec].len; j++) {
+ len = snprintf(p, sz, " %08x", wa_bb[ec].cs[j]);
+ if (!wa_bb_read_advance(data, &p, NULL, len, &sz))
+ return -ENOBUFS;
+ }
+
+ if (!wa_bb_read_advance(data, &p, "\n", 1, &sz))
+ return -ENOBUFS;
+ }
+
+ if (!wa_bb_read_advance(data, &p, "", 1, &sz))
+ return -ENOBUFS;
+
+ /* Reserve one more to match check for '\0' */
+ if (!data)
+ p++;
+
+ return p - data;
+}
+
+static ssize_t ctx_restore_mid_bb_show(struct config_item *item, char *page)
+{
+ struct xe_config_group_device *dev = to_xe_config_group_device(item);
+
+ return wa_bb_show(dev, dev->config.ctx_restore_mid_bb, page, SZ_4K);
+}
+
+static ssize_t ctx_restore_post_bb_show(struct config_item *item, char *page)
+{
+ struct xe_config_group_device *dev = to_xe_config_group_device(item);
+
+ return wa_bb_show(dev, dev->config.ctx_restore_post_bb, page, SZ_4K);
+}
+
+static void wa_bb_append(struct wa_bb *wa_bb, u32 val)
+{
+ if (wa_bb->cs)
+ wa_bb->cs[wa_bb->len] = val;
+
+ wa_bb->len++;
+}
+
+static ssize_t parse_hex(const char *line, u32 *pval)
+{
+ char numstr[12];
+ const char *p;
+ ssize_t numlen;
+
+ p = line + strspn(line, " \t");
+ if (!*p || *p == '\n')
+ return 0;
+
+ numlen = strcspn(p, " \t\n");
+ if (!numlen || numlen >= sizeof(numstr) - 1)
+ return -EINVAL;
+
+ memcpy(numstr, p, numlen);
+ numstr[numlen] = '\0';
+ p += numlen;
+
+ if (kstrtou32(numstr, 16, pval))
+ return -EINVAL;
+
+ return p - line;
+}
+
+/*
+ * Parse lines with the format
+ *
+ * <engine-class> cmd <u32> <u32...>
+ * <engine-class> reg <u32_addr> <u32_val>
+ *
+ * and optionally save them in @wa_bb[i].cs is non-NULL.
+ *
+ * Return the number of dwords parsed.
+ */
+static ssize_t parse_wa_bb_lines(const char *lines,
+ struct wa_bb wa_bb[static XE_ENGINE_CLASS_MAX])
+{
+ ssize_t dwords = 0, ret;
+ const char *p;
+
+ for (p = lines; *p; p++) {
+ const struct engine_info *info = NULL;
+ u32 val, val2;
+
+ /* Also allow empty lines */
+ p += strspn(p, " \t\n");
+ if (!*p)
+ break;
+
+ ret = parse_engine(p, " \t\n", NULL, &info);
+ if (ret < 0)
+ return ret;
+
+ p += ret;
+ p += strspn(p, " \t");
+
+ if (str_has_prefix(p, "cmd")) {
+ for (p += strlen("cmd"); *p;) {
+ ret = parse_hex(p, &val);
+ if (ret < 0)
+ return -EINVAL;
+ if (!ret)
+ break;
+
+ p += ret;
+ dwords++;
+ wa_bb_append(&wa_bb[info->engine_class], val);
+ }
+ } else if (str_has_prefix(p, "reg")) {
+ p += strlen("reg");
+ ret = parse_hex(p, &val);
+ if (ret <= 0)
+ return -EINVAL;
+
+ p += ret;
+ ret = parse_hex(p, &val2);
+ if (ret <= 0)
+ return -EINVAL;
+
+ p += ret;
+ dwords += 3;
+ wa_bb_append(&wa_bb[info->engine_class],
+ MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(1));
+ wa_bb_append(&wa_bb[info->engine_class], val);
+ wa_bb_append(&wa_bb[info->engine_class], val2);
+ } else {
+ return -EINVAL;
+ }
+ }
+
+ return dwords;
+}
+
+static ssize_t wa_bb_store(struct wa_bb wa_bb[static XE_ENGINE_CLASS_MAX],
+ struct xe_config_group_device *dev,
+ const char *page, size_t len)
+{
+ /* tmp_wa_bb must match wa_bb's size */
+ struct wa_bb tmp_wa_bb[XE_ENGINE_CLASS_MAX] = { };
+ ssize_t count, class;
+ u32 *tmp;
+
+ /* 1. Count dwords - wa_bb[i].cs is NULL for all classes */
+ count = parse_wa_bb_lines(page, tmp_wa_bb);
+ if (count < 0)
+ return count;
+
+ guard(mutex)(&dev->lock);
+
+ if (is_bound(dev))
+ return -EBUSY;
+
+ /*
+ * 2. Allocate a u32 array and set the pointers to the right positions
+ * according to the length of each class' wa_bb
+ */
+ tmp = krealloc(wa_bb[0].cs, count * sizeof(u32), GFP_KERNEL);
+ if (!tmp)
+ return -ENOMEM;
+
+ if (!count) {
+ memset(wa_bb, 0, sizeof(tmp_wa_bb));
+ return len;
+ }
+
+ for (class = 0, count = 0; class < XE_ENGINE_CLASS_MAX; ++class) {
+ tmp_wa_bb[class].cs = tmp + count;
+ count += tmp_wa_bb[class].len;
+ tmp_wa_bb[class].len = 0;
+ }
+
+ /* 3. Parse wa_bb lines again, this time saving the values */
+ count = parse_wa_bb_lines(page, tmp_wa_bb);
+ if (count < 0)
+ return count;
+
+ memcpy(wa_bb, tmp_wa_bb, sizeof(tmp_wa_bb));
+
+ return len;
+}
+
+static ssize_t ctx_restore_mid_bb_store(struct config_item *item,
+ const char *data, size_t sz)
+{
+ struct xe_config_group_device *dev = to_xe_config_group_device(item);
+
+ return wa_bb_store(dev->config.ctx_restore_mid_bb, dev, data, sz);
+}
+
+static ssize_t ctx_restore_post_bb_store(struct config_item *item,
+ const char *data, size_t sz)
+{
+ struct xe_config_group_device *dev = to_xe_config_group_device(item);
+
+ return wa_bb_store(dev->config.ctx_restore_post_bb, dev, data, sz);
+}
+
+CONFIGFS_ATTR(, ctx_restore_mid_bb);
+CONFIGFS_ATTR(, ctx_restore_post_bb);
CONFIGFS_ATTR(, enable_psmi);
CONFIGFS_ATTR(, engines_allowed);
CONFIGFS_ATTR(, survivability_mode);
static struct configfs_attribute *xe_config_device_attrs[] = {
+ &attr_ctx_restore_mid_bb,
+ &attr_ctx_restore_post_bb,
&attr_enable_psmi,
&attr_engines_allowed,
&attr_survivability_mode,
@@ -355,6 +688,8 @@ static void xe_config_device_release(struct config_item *item)
struct xe_config_group_device *dev = to_xe_config_group_device(item);
mutex_destroy(&dev->lock);
+
+ kfree(dev->config.ctx_restore_post_bb[0].cs);
kfree(dev);
}
@@ -362,8 +697,26 @@ static struct configfs_item_operations xe_config_device_ops = {
.release = xe_config_device_release,
};
+static bool xe_config_device_is_visible(struct config_item *item,
+ struct configfs_attribute *attr, int n)
+{
+ struct xe_config_group_device *dev = to_xe_config_group_device(item);
+
+ if (attr == &attr_survivability_mode) {
+ if (!dev->desc->is_dgfx || dev->desc->platform < XE_BATTLEMAGE)
+ return false;
+ }
+
+ return true;
+}
+
+static struct configfs_group_operations xe_config_device_group_ops = {
+ .is_visible = xe_config_device_is_visible,
+};
+
static const struct config_item_type xe_config_device_type = {
.ct_item_ops = &xe_config_device_ops,
+ .ct_group_ops = &xe_config_device_group_ops,
.ct_attrs = xe_config_device_attrs,
.ct_owner = THIS_MODULE,
};
@@ -442,6 +795,7 @@ static struct config_group *xe_config_make_device_group(struct config_group *gro
if (!dev)
return ERR_PTR(-ENOMEM);
+ dev->desc = match;
set_device_defaults(&dev->config);
config_group_init_type_name(&dev->group, name, &xe_config_device_type);
@@ -451,12 +805,12 @@ static struct config_group *xe_config_make_device_group(struct config_group *gro
return &dev->group;
}
-static struct configfs_group_operations xe_config_device_group_ops = {
+static struct configfs_group_operations xe_config_group_ops = {
.make_group = xe_config_make_device_group,
};
static const struct config_item_type xe_configfs_type = {
- .ct_group_ops = &xe_config_device_group_ops,
+ .ct_group_ops = &xe_config_group_ops,
.ct_owner = THIS_MODULE,
};
@@ -543,23 +897,6 @@ bool xe_configfs_get_survivability_mode(struct pci_dev *pdev)
}
/**
- * xe_configfs_clear_survivability_mode - clear configfs survivability mode
- * @pdev: pci device
- */
-void xe_configfs_clear_survivability_mode(struct pci_dev *pdev)
-{
- struct xe_config_group_device *dev = find_xe_config_group_device(pdev);
-
- if (!dev)
- return;
-
- guard(mutex)(&dev->lock);
- dev->config.survivability_mode = 0;
-
- config_group_put(&dev->group);
-}
-
-/**
* xe_configfs_get_engines_allowed - get engine allowed mask from configfs
* @pdev: pci device
*
@@ -594,11 +931,63 @@ bool xe_configfs_get_psmi_enabled(struct pci_dev *pdev)
return false;
ret = dev->config.enable_psmi;
- config_item_put(&dev->group.cg_item);
+ config_group_put(&dev->group);
return ret;
}
+/**
+ * xe_configfs_get_ctx_restore_mid_bb - get configfs ctx_restore_mid_bb setting
+ * @pdev: pci device
+ * @class: hw engine class
+ * @cs: pointer to the bb to use - only valid during probe
+ *
+ * Return: Number of dwords used in the mid_ctx_restore setting in configfs
+ */
+u32 xe_configfs_get_ctx_restore_mid_bb(struct pci_dev *pdev,
+ enum xe_engine_class class,
+ const u32 **cs)
+{
+ struct xe_config_group_device *dev = find_xe_config_group_device(pdev);
+ u32 len;
+
+ if (!dev)
+ return 0;
+
+ if (cs)
+ *cs = dev->config.ctx_restore_mid_bb[class].cs;
+
+ len = dev->config.ctx_restore_mid_bb[class].len;
+ config_group_put(&dev->group);
+
+ return len;
+}
+
+/**
+ * xe_configfs_get_ctx_restore_post_bb - get configfs ctx_restore_post_bb setting
+ * @pdev: pci device
+ * @class: hw engine class
+ * @cs: pointer to the bb to use - only valid during probe
+ *
+ * Return: Number of dwords used in the post_ctx_restore setting in configfs
+ */
+u32 xe_configfs_get_ctx_restore_post_bb(struct pci_dev *pdev,
+ enum xe_engine_class class,
+ const u32 **cs)
+{
+ struct xe_config_group_device *dev = find_xe_config_group_device(pdev);
+ u32 len;
+
+ if (!dev)
+ return 0;
+
+ *cs = dev->config.ctx_restore_post_bb[class].cs;
+ len = dev->config.ctx_restore_post_bb[class].len;
+ config_group_put(&dev->group);
+
+ return len;
+}
+
int __init xe_configfs_init(void)
{
int ret;
@@ -614,7 +1003,7 @@ int __init xe_configfs_init(void)
return 0;
}
-void __exit xe_configfs_exit(void)
+void xe_configfs_exit(void)
{
configfs_unregister_subsystem(&xe_configfs);
mutex_destroy(&xe_configfs.su_mutex);
diff --git a/drivers/gpu/drm/xe/xe_configfs.h b/drivers/gpu/drm/xe/xe_configfs.h
index 58c8c3164000..c61e0e47ed94 100644
--- a/drivers/gpu/drm/xe/xe_configfs.h
+++ b/drivers/gpu/drm/xe/xe_configfs.h
@@ -8,6 +8,8 @@
#include <linux/limits.h>
#include <linux/types.h>
+#include <xe_hw_engine_types.h>
+
struct pci_dev;
#if IS_ENABLED(CONFIG_CONFIGFS_FS)
@@ -15,17 +17,23 @@ int xe_configfs_init(void);
void xe_configfs_exit(void);
void xe_configfs_check_device(struct pci_dev *pdev);
bool xe_configfs_get_survivability_mode(struct pci_dev *pdev);
-void xe_configfs_clear_survivability_mode(struct pci_dev *pdev);
u64 xe_configfs_get_engines_allowed(struct pci_dev *pdev);
bool xe_configfs_get_psmi_enabled(struct pci_dev *pdev);
+u32 xe_configfs_get_ctx_restore_mid_bb(struct pci_dev *pdev, enum xe_engine_class,
+ const u32 **cs);
+u32 xe_configfs_get_ctx_restore_post_bb(struct pci_dev *pdev, enum xe_engine_class,
+ const u32 **cs);
#else
static inline int xe_configfs_init(void) { return 0; }
static inline void xe_configfs_exit(void) { }
static inline void xe_configfs_check_device(struct pci_dev *pdev) { }
static inline bool xe_configfs_get_survivability_mode(struct pci_dev *pdev) { return false; }
-static inline void xe_configfs_clear_survivability_mode(struct pci_dev *pdev) { }
static inline u64 xe_configfs_get_engines_allowed(struct pci_dev *pdev) { return U64_MAX; }
static inline bool xe_configfs_get_psmi_enabled(struct pci_dev *pdev) { return false; }
+static inline u32 xe_configfs_get_ctx_restore_mid_bb(struct pci_dev *pdev, enum xe_engine_class,
+ const u32 **cs) { return 0; }
+static inline u32 xe_configfs_get_ctx_restore_post_bb(struct pci_dev *pdev, enum xe_engine_class,
+ const u32 **cs) { return 0; }
#endif
#endif
diff --git a/drivers/gpu/drm/xe/xe_debugfs.c b/drivers/gpu/drm/xe/xe_debugfs.c
index 8d6df6bd885e..cd977dbd1ef6 100644
--- a/drivers/gpu/drm/xe/xe_debugfs.c
+++ b/drivers/gpu/drm/xe/xe_debugfs.c
@@ -24,7 +24,9 @@
#include "xe_pxp_debugfs.h"
#include "xe_sriov.h"
#include "xe_sriov_pf.h"
+#include "xe_sriov_vf.h"
#include "xe_step.h"
+#include "xe_tile_debugfs.h"
#include "xe_wa.h"
#include "xe_vsec.h"
@@ -38,7 +40,7 @@ DECLARE_FAULT_ATTR(gt_reset_failure);
DECLARE_FAULT_ATTR(inject_csc_hw_error);
static void read_residency_counter(struct xe_device *xe, struct xe_mmio *mmio,
- u32 offset, char *name, struct drm_printer *p)
+ u32 offset, const char *name, struct drm_printer *p)
{
u64 residency = 0;
int ret;
@@ -134,9 +136,9 @@ static int dgfx_pkg_residencies_show(struct seq_file *m, void *data)
p = drm_seq_file_printer(m);
xe_pm_runtime_get(xe);
mmio = xe_root_tile_mmio(xe);
- struct {
+ static const struct {
u32 offset;
- char *name;
+ const char *name;
} residencies[] = {
{BMG_G2_RESIDENCY_OFFSET, "Package G2"},
{BMG_G6_RESIDENCY_OFFSET, "Package G6"},
@@ -163,9 +165,9 @@ static int dgfx_pcie_link_residencies_show(struct seq_file *m, void *data)
xe_pm_runtime_get(xe);
mmio = xe_root_tile_mmio(xe);
- struct {
+ static const struct {
u32 offset;
- char *name;
+ const char *name;
} residencies[] = {
{BMG_PCIE_LINK_L0_RESIDENCY_OFFSET, "PCIE LINK L0 RESIDENCY"},
{BMG_PCIE_LINK_L1_RESIDENCY_OFFSET, "PCIE LINK L1 RESIDENCY"},
@@ -329,23 +331,44 @@ static const struct file_operations atomic_svm_timeslice_ms_fops = {
.write = atomic_svm_timeslice_ms_set,
};
-static void create_tile_debugfs(struct xe_tile *tile, struct dentry *root)
+static ssize_t disable_late_binding_show(struct file *f, char __user *ubuf,
+ size_t size, loff_t *pos)
{
- char name[8];
+ struct xe_device *xe = file_inode(f)->i_private;
+ struct xe_late_bind *late_bind = &xe->late_bind;
+ char buf[32];
+ int len;
- snprintf(name, sizeof(name), "tile%u", tile->id);
- tile->debugfs = debugfs_create_dir(name, root);
- if (IS_ERR(tile->debugfs))
- return;
+ len = scnprintf(buf, sizeof(buf), "%d\n", late_bind->disable);
+
+ return simple_read_from_buffer(ubuf, size, pos, buf, len);
+}
+
+static ssize_t disable_late_binding_set(struct file *f, const char __user *ubuf,
+ size_t size, loff_t *pos)
+{
+ struct xe_device *xe = file_inode(f)->i_private;
+ struct xe_late_bind *late_bind = &xe->late_bind;
+ u32 uval;
+ ssize_t ret;
- /*
- * Store the xe_tile pointer as private data of the tile/ directory
- * node so other tile specific attributes under that directory may
- * refer to it by looking at its parent node private data.
- */
- tile->debugfs->d_inode->i_private = tile;
+ ret = kstrtouint_from_user(ubuf, size, sizeof(uval), &uval);
+ if (ret)
+ return ret;
+
+ if (uval > 1)
+ return -EINVAL;
+
+ late_bind->disable = !!uval;
+ return size;
}
+static const struct file_operations disable_late_binding_fops = {
+ .owner = THIS_MODULE,
+ .read = disable_late_binding_show,
+ .write = disable_late_binding_set,
+};
+
void xe_debugfs_register(struct xe_device *xe)
{
struct ttm_device *bdev = &xe->ttm;
@@ -362,7 +385,7 @@ void xe_debugfs_register(struct xe_device *xe)
ARRAY_SIZE(debugfs_list),
root, minor);
- if (xe->info.platform == XE_BATTLEMAGE) {
+ if (xe->info.platform == XE_BATTLEMAGE && !IS_SRIOV_VF(xe)) {
drm_debugfs_create_files(debugfs_residencies,
ARRAY_SIZE(debugfs_residencies),
root, minor);
@@ -379,6 +402,9 @@ void xe_debugfs_register(struct xe_device *xe)
debugfs_create_file("atomic_svm_timeslice_ms", 0600, root, xe,
&atomic_svm_timeslice_ms_fops);
+ debugfs_create_file("disable_late_binding", 0600, root, xe,
+ &disable_late_binding_fops);
+
for (mem_type = XE_PL_VRAM0; mem_type <= XE_PL_VRAM1; ++mem_type) {
man = ttm_manager_type(bdev, mem_type);
@@ -398,7 +424,7 @@ void xe_debugfs_register(struct xe_device *xe)
ttm_resource_manager_create_debugfs(man, root, "stolen_mm");
for_each_tile(tile, xe, tile_id)
- create_tile_debugfs(tile, root);
+ xe_tile_debugfs_register(tile);
for_each_gt(gt, xe, id)
xe_gt_debugfs_register(gt);
@@ -411,4 +437,6 @@ void xe_debugfs_register(struct xe_device *xe)
if (IS_SRIOV_PF(xe))
xe_sriov_pf_debugfs_register(xe, root);
+ else if (IS_SRIOV_VF(xe))
+ xe_sriov_vf_debugfs_register(xe, root);
}
diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c
index 9e4773a17ef8..2883b39c9b37 100644
--- a/drivers/gpu/drm/xe/xe_device.c
+++ b/drivers/gpu/drm/xe/xe_device.c
@@ -45,6 +45,7 @@
#include "xe_hwmon.h"
#include "xe_i2c.h"
#include "xe_irq.h"
+#include "xe_late_bind_fw.h"
#include "xe_mmio.h"
#include "xe_module.h"
#include "xe_nvm.h"
@@ -457,6 +458,8 @@ struct xe_device *xe_device_create(struct pci_dev *pdev,
if (err)
goto err;
+ xe_validation_device_init(&xe->val);
+
init_waitqueue_head(&xe->ufence_wq);
init_rwsem(&xe->usm.lock);
@@ -530,7 +533,7 @@ static bool xe_driver_flr_disabled(struct xe_device *xe)
* re-init and saving/restoring (or re-populating) the wiped memory. Since we
* perform the FLR as the very last action before releasing access to the HW
* during the driver release flow, we don't attempt recovery at all, because
- * if/when a new instance of i915 is bound to the device it will do a full
+ * if/when a new instance of Xe is bound to the device it will do a full
* re-init anyway.
*/
static void __xe_driver_flr(struct xe_device *xe)
@@ -682,16 +685,16 @@ static int wait_for_lmem_ready(struct xe_device *xe)
}
ALLOW_ERROR_INJECTION(wait_for_lmem_ready, ERRNO); /* See xe_pci_probe() */
-static void sriov_update_device_info(struct xe_device *xe)
+static void vf_update_device_info(struct xe_device *xe)
{
+ xe_assert(xe, IS_SRIOV_VF(xe));
/* disable features that are not available/applicable to VFs */
- if (IS_SRIOV_VF(xe)) {
- xe->info.probe_display = 0;
- xe->info.has_heci_cscfi = 0;
- xe->info.has_heci_gscfi = 0;
- xe->info.skip_guc_pc = 1;
- xe->info.skip_pcode = 1;
- }
+ xe->info.probe_display = 0;
+ xe->info.has_heci_cscfi = 0;
+ xe->info.has_heci_gscfi = 0;
+ xe->info.has_late_bind = 0;
+ xe->info.skip_guc_pc = 1;
+ xe->info.skip_pcode = 1;
}
static int xe_device_vram_alloc(struct xe_device *xe)
@@ -732,7 +735,8 @@ int xe_device_probe_early(struct xe_device *xe)
xe_sriov_probe_early(xe);
- sriov_update_device_info(xe);
+ if (IS_SRIOV_VF(xe))
+ vf_update_device_info(xe);
err = xe_pcode_probe_early(xe);
if (err || xe_survivability_mode_is_requested(xe)) {
@@ -901,6 +905,10 @@ int xe_device_probe(struct xe_device *xe)
if (err)
return err;
+ err = xe_late_bind_init(&xe->late_bind);
+ if (err)
+ return err;
+
err = xe_oa_init(xe);
if (err)
return err;
@@ -950,7 +958,7 @@ int xe_device_probe(struct xe_device *xe)
xe_vsec_init(xe);
- err = xe_sriov_late_init(xe);
+ err = xe_sriov_init_late(xe);
if (err)
goto err_unregister_display;
diff --git a/drivers/gpu/drm/xe/xe_device_sysfs.c b/drivers/gpu/drm/xe/xe_device_sysfs.c
index 6ee422594b56..c5151c86a98a 100644
--- a/drivers/gpu/drm/xe/xe_device_sysfs.c
+++ b/drivers/gpu/drm/xe/xe_device_sysfs.c
@@ -71,6 +71,15 @@ vram_d3cold_threshold_store(struct device *dev, struct device_attribute *attr,
static DEVICE_ATTR_RW(vram_d3cold_threshold);
+static struct attribute *vram_attrs[] = {
+ &dev_attr_vram_d3cold_threshold.attr,
+ NULL
+};
+
+static const struct attribute_group vram_attr_group = {
+ .attrs = vram_attrs,
+};
+
static ssize_t
lb_fan_control_version_show(struct device *dev, struct device_attribute *attr, char *buf)
{
@@ -149,41 +158,16 @@ out:
}
static DEVICE_ATTR_ADMIN_RO(lb_voltage_regulator_version);
-static int late_bind_create_files(struct device *dev)
-{
- struct xe_device *xe = pdev_to_xe_device(to_pci_dev(dev));
- struct xe_tile *root = xe_device_get_root_tile(xe);
- u32 cap = 0;
- int ret;
-
- xe_pm_runtime_get(xe);
-
- ret = xe_pcode_read(root, PCODE_MBOX(PCODE_LATE_BINDING, GET_CAPABILITY_STATUS, 0),
- &cap, NULL);
- if (ret) {
- if (ret == -ENXIO) {
- drm_dbg(&xe->drm, "Late binding not supported by firmware\n");
- ret = 0;
- }
- goto out;
- }
-
- if (REG_FIELD_GET(V1_FAN_SUPPORTED, cap)) {
- ret = sysfs_create_file(&dev->kobj, &dev_attr_lb_fan_control_version.attr);
- if (ret)
- goto out;
- }
-
- if (REG_FIELD_GET(VR_PARAMS_SUPPORTED, cap))
- ret = sysfs_create_file(&dev->kobj, &dev_attr_lb_voltage_regulator_version.attr);
-out:
- xe_pm_runtime_put(xe);
-
- return ret;
-}
+static struct attribute *late_bind_attrs[] = {
+ &dev_attr_lb_fan_control_version.attr,
+ &dev_attr_lb_voltage_regulator_version.attr,
+ NULL
+};
-static void late_bind_remove_files(struct device *dev)
+static umode_t late_bind_attr_is_visible(struct kobject *kobj,
+ struct attribute *attr, int n)
{
+ struct device *dev = kobj_to_dev(kobj);
struct xe_device *xe = pdev_to_xe_device(to_pci_dev(dev));
struct xe_tile *root = xe_device_get_root_tile(xe);
u32 cap = 0;
@@ -193,18 +177,25 @@ static void late_bind_remove_files(struct device *dev)
ret = xe_pcode_read(root, PCODE_MBOX(PCODE_LATE_BINDING, GET_CAPABILITY_STATUS, 0),
&cap, NULL);
+ xe_pm_runtime_put(xe);
if (ret)
- goto out;
+ return 0;
- if (REG_FIELD_GET(V1_FAN_SUPPORTED, cap))
- sysfs_remove_file(&dev->kobj, &dev_attr_lb_fan_control_version.attr);
+ if (attr == &dev_attr_lb_fan_control_version.attr &&
+ REG_FIELD_GET(V1_FAN_SUPPORTED, cap))
+ return attr->mode;
+ if (attr == &dev_attr_lb_voltage_regulator_version.attr &&
+ REG_FIELD_GET(VR_PARAMS_SUPPORTED, cap))
+ return attr->mode;
- if (REG_FIELD_GET(VR_PARAMS_SUPPORTED, cap))
- sysfs_remove_file(&dev->kobj, &dev_attr_lb_voltage_regulator_version.attr);
-out:
- xe_pm_runtime_put(xe);
+ return 0;
}
+static const struct attribute_group late_bind_attr_group = {
+ .attrs = late_bind_attrs,
+ .is_visible = late_bind_attr_is_visible,
+};
+
/**
* DOC: PCIe Gen5 Limitations
*
@@ -278,24 +269,15 @@ auto_link_downgrade_status_show(struct device *dev, struct device_attribute *att
}
static DEVICE_ATTR_ADMIN_RO(auto_link_downgrade_status);
-static const struct attribute *auto_link_downgrade_attrs[] = {
+static struct attribute *auto_link_downgrade_attrs[] = {
&dev_attr_auto_link_downgrade_capable.attr,
&dev_attr_auto_link_downgrade_status.attr,
NULL
};
-static void xe_device_sysfs_fini(void *arg)
-{
- struct xe_device *xe = arg;
-
- if (xe->d3cold.capable)
- sysfs_remove_file(&xe->drm.dev->kobj, &dev_attr_vram_d3cold_threshold.attr);
-
- if (xe->info.platform == XE_BATTLEMAGE) {
- sysfs_remove_files(&xe->drm.dev->kobj, auto_link_downgrade_attrs);
- late_bind_remove_files(xe->drm.dev);
- }
-}
+static const struct attribute_group auto_link_downgrade_attr_group = {
+ .attrs = auto_link_downgrade_attrs,
+};
int xe_device_sysfs_init(struct xe_device *xe)
{
@@ -303,20 +285,20 @@ int xe_device_sysfs_init(struct xe_device *xe)
int ret;
if (xe->d3cold.capable) {
- ret = sysfs_create_file(&dev->kobj, &dev_attr_vram_d3cold_threshold.attr);
+ ret = devm_device_add_group(dev, &vram_attr_group);
if (ret)
return ret;
}
- if (xe->info.platform == XE_BATTLEMAGE) {
- ret = sysfs_create_files(&dev->kobj, auto_link_downgrade_attrs);
+ if (xe->info.platform == XE_BATTLEMAGE && !IS_SRIOV_VF(xe)) {
+ ret = devm_device_add_group(dev, &auto_link_downgrade_attr_group);
if (ret)
return ret;
- ret = late_bind_create_files(dev);
+ ret = devm_device_add_group(dev, &late_bind_attr_group);
if (ret)
return ret;
}
- return devm_add_action_or_reset(dev, xe_device_sysfs_fini, xe);
+ return 0;
}
diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h
index 1e780f8a2a8c..74d7af830b85 100644
--- a/drivers/gpu/drm/xe/xe_device_types.h
+++ b/drivers/gpu/drm/xe/xe_device_types.h
@@ -14,6 +14,7 @@
#include "xe_devcoredump_types.h"
#include "xe_heci_gsc.h"
+#include "xe_late_bind_fw_types.h"
#include "xe_lmtt_types.h"
#include "xe_memirq_types.h"
#include "xe_oa_types.h"
@@ -26,6 +27,7 @@
#include "xe_sriov_vf_ccs_types.h"
#include "xe_step_types.h"
#include "xe_survivability_mode_types.h"
+#include "xe_validation.h"
#if IS_ENABLED(CONFIG_DRM_XE_DEBUG)
#define TEST_VM_OPS_ERROR
@@ -183,9 +185,6 @@ struct xe_tile {
struct {
/** @sriov.vf.ggtt_balloon: GGTT regions excluded from use. */
struct xe_ggtt_node *ggtt_balloon[2];
-
- /** @sriov.vf.ccs: CCS read and write contexts for VF. */
- struct xe_tile_vf_ccs ccs[XE_SRIOV_VF_CCS_CTX_COUNT];
} vf;
} sriov;
@@ -282,6 +281,8 @@ struct xe_device {
u8 has_heci_cscfi:1;
/** @info.has_heci_gscfi: device has heci gscfi */
u8 has_heci_gscfi:1;
+ /** @info.has_late_bind: Device has firmware late binding support */
+ u8 has_late_bind:1;
/** @info.has_llc: Device has a shared CPU+GPU last level cache */
u8 has_llc:1;
/** @info.has_mbx_power_limits: Device has support to manage power limits using
@@ -535,6 +536,9 @@ struct xe_device {
/** @nvm: discrete graphics non-volatile memory */
struct intel_dg_nvm_dev *nvm;
+ /** @late_bind: xe mei late bind interface */
+ struct xe_late_bind late_bind;
+
/** @oa: oa observation subsystem */
struct xe_oa oa;
@@ -586,6 +590,8 @@ struct xe_device {
*/
atomic64_t global_total_pages;
#endif
+ /** @val: The domain for exhaustive eviction, which is currently per device. */
+ struct xe_validation_device val;
/** @psmi: GPU debugging via additional validation HW */
struct {
@@ -595,6 +601,13 @@ struct xe_device {
u8 region_mask;
} psmi;
+#if IS_ENABLED(CONFIG_DRM_XE_KUNIT_TEST)
+ /** @g2g_test_array: for testing G2G communications */
+ u32 *g2g_test_array;
+ /** @g2g_test_count: for testing G2G communications */
+ atomic_t g2g_test_count;
+#endif
+
/* private: */
#if IS_ENABLED(CONFIG_DRM_XE_DISPLAY)
diff --git a/drivers/gpu/drm/xe/xe_dma_buf.c b/drivers/gpu/drm/xe/xe_dma_buf.c
index 95d06bd65b0f..a7d67725c3ee 100644
--- a/drivers/gpu/drm/xe/xe_dma_buf.c
+++ b/drivers/gpu/drm/xe/xe_dma_buf.c
@@ -51,6 +51,7 @@ static int xe_dma_buf_pin(struct dma_buf_attachment *attach)
struct drm_gem_object *obj = attach->dmabuf->priv;
struct xe_bo *bo = gem_to_xe_bo(obj);
struct xe_device *xe = xe_bo_device(bo);
+ struct drm_exec *exec = XE_VALIDATION_UNSUPPORTED;
int ret;
/*
@@ -63,7 +64,7 @@ static int xe_dma_buf_pin(struct dma_buf_attachment *attach)
return -EINVAL;
}
- ret = xe_bo_migrate(bo, XE_PL_TT);
+ ret = xe_bo_migrate(bo, XE_PL_TT, NULL, exec);
if (ret) {
if (ret != -EINTR && ret != -ERESTARTSYS)
drm_dbg(&xe->drm,
@@ -72,7 +73,7 @@ static int xe_dma_buf_pin(struct dma_buf_attachment *attach)
return ret;
}
- ret = xe_bo_pin_external(bo, true);
+ ret = xe_bo_pin_external(bo, true, exec);
xe_assert(xe, !ret);
return 0;
@@ -92,6 +93,7 @@ static struct sg_table *xe_dma_buf_map(struct dma_buf_attachment *attach,
struct dma_buf *dma_buf = attach->dmabuf;
struct drm_gem_object *obj = dma_buf->priv;
struct xe_bo *bo = gem_to_xe_bo(obj);
+ struct drm_exec *exec = XE_VALIDATION_UNSUPPORTED;
struct sg_table *sgt;
int r = 0;
@@ -100,9 +102,9 @@ static struct sg_table *xe_dma_buf_map(struct dma_buf_attachment *attach,
if (!xe_bo_is_pinned(bo)) {
if (!attach->peer2peer)
- r = xe_bo_migrate(bo, XE_PL_TT);
+ r = xe_bo_migrate(bo, XE_PL_TT, NULL, exec);
else
- r = xe_bo_validate(bo, NULL, false);
+ r = xe_bo_validate(bo, NULL, false, exec);
if (r)
return ERR_PTR(r);
}
@@ -161,15 +163,26 @@ static int xe_dma_buf_begin_cpu_access(struct dma_buf *dma_buf,
struct xe_bo *bo = gem_to_xe_bo(obj);
bool reads = (direction == DMA_BIDIRECTIONAL ||
direction == DMA_FROM_DEVICE);
+ struct xe_validation_ctx ctx;
+ struct drm_exec exec;
+ int ret = 0;
if (!reads)
return 0;
/* Can we do interruptible lock here? */
- xe_bo_lock(bo, false);
- (void)xe_bo_migrate(bo, XE_PL_TT);
- xe_bo_unlock(bo);
+ xe_validation_guard(&ctx, &xe_bo_device(bo)->val, &exec, (struct xe_val_flags) {}, ret) {
+ ret = drm_exec_lock_obj(&exec, &bo->ttm.base);
+ drm_exec_retry_on_contention(&exec);
+ if (ret)
+ break;
+
+ ret = xe_bo_migrate(bo, XE_PL_TT, NULL, &exec);
+ drm_exec_retry_on_contention(&exec);
+ xe_validation_retry_on_oom(&ctx, &ret);
+ }
+ /* If we failed, cpu-access takes place in current placement. */
return 0;
}
@@ -220,32 +233,45 @@ xe_dma_buf_init_obj(struct drm_device *dev, struct xe_bo *storage,
{
struct dma_resv *resv = dma_buf->resv;
struct xe_device *xe = to_xe_device(dev);
+ struct xe_validation_ctx ctx;
+ struct drm_gem_object *dummy_obj;
+ struct drm_exec exec;
struct xe_bo *bo;
- int ret;
-
- dma_resv_lock(resv, NULL);
- bo = ___xe_bo_create_locked(xe, storage, NULL, resv, NULL, dma_buf->size,
- 0, /* Will require 1way or 2way for vm_bind */
- ttm_bo_type_sg, XE_BO_FLAG_SYSTEM);
- if (IS_ERR(bo)) {
- ret = PTR_ERR(bo);
- goto error;
+ int ret = 0;
+
+ dummy_obj = drm_gpuvm_resv_object_alloc(&xe->drm);
+ if (!dummy_obj)
+ return ERR_PTR(-ENOMEM);
+
+ dummy_obj->resv = resv;
+ xe_validation_guard(&ctx, &xe->val, &exec, (struct xe_val_flags) {}, ret) {
+ ret = drm_exec_lock_obj(&exec, dummy_obj);
+ drm_exec_retry_on_contention(&exec);
+ if (ret)
+ break;
+
+ bo = xe_bo_init_locked(xe, storage, NULL, resv, NULL, dma_buf->size,
+ 0, /* Will require 1way or 2way for vm_bind */
+ ttm_bo_type_sg, XE_BO_FLAG_SYSTEM, &exec);
+ drm_exec_retry_on_contention(&exec);
+ if (IS_ERR(bo)) {
+ ret = PTR_ERR(bo);
+ xe_validation_retry_on_oom(&ctx, &ret);
+ break;
+ }
}
- dma_resv_unlock(resv);
-
- return &bo->ttm.base;
+ drm_gem_object_put(dummy_obj);
-error:
- dma_resv_unlock(resv);
- return ERR_PTR(ret);
+ return ret ? ERR_PTR(ret) : &bo->ttm.base;
}
static void xe_dma_buf_move_notify(struct dma_buf_attachment *attach)
{
struct drm_gem_object *obj = attach->importer_priv;
struct xe_bo *bo = gem_to_xe_bo(obj);
+ struct drm_exec *exec = XE_VALIDATION_UNSUPPORTED;
- XE_WARN_ON(xe_bo_evict(bo));
+ XE_WARN_ON(xe_bo_evict(bo, exec));
}
static const struct dma_buf_attach_ops xe_dma_buf_attach_ops = {
diff --git a/drivers/gpu/drm/xe/xe_eu_stall.c b/drivers/gpu/drm/xe/xe_eu_stall.c
index fdd514fec5ef..f5cfdf29fde3 100644
--- a/drivers/gpu/drm/xe/xe_eu_stall.c
+++ b/drivers/gpu/drm/xe/xe_eu_stall.c
@@ -617,9 +617,8 @@ static int xe_eu_stall_data_buf_alloc(struct xe_eu_stall_data_stream *stream,
size = stream->per_xecore_buf_size * last_xecore;
- bo = xe_bo_create_pin_map_at_aligned(tile->xe, tile, NULL,
- size, ~0ull, ttm_bo_type_kernel,
- XE_BO_FLAG_SYSTEM | XE_BO_FLAG_GGTT, SZ_64);
+ bo = xe_bo_create_pin_map_at_novm(tile->xe, tile, size, ~0ull, ttm_bo_type_kernel,
+ XE_BO_FLAG_SYSTEM | XE_BO_FLAG_GGTT, SZ_64, false);
if (IS_ERR(bo)) {
kfree(stream->xecore_buf);
return PTR_ERR(bo);
diff --git a/drivers/gpu/drm/xe/xe_exec.c b/drivers/gpu/drm/xe/xe_exec.c
index 374c831e691b..7715e74bb945 100644
--- a/drivers/gpu/drm/xe/xe_exec.c
+++ b/drivers/gpu/drm/xe/xe_exec.c
@@ -19,6 +19,7 @@
#include "xe_ring_ops_types.h"
#include "xe_sched_job.h"
#include "xe_sync.h"
+#include "xe_svm.h"
#include "xe_vm.h"
/**
@@ -97,9 +98,13 @@
static int xe_exec_fn(struct drm_gpuvm_exec *vm_exec)
{
struct xe_vm *vm = container_of(vm_exec->vm, struct xe_vm, gpuvm);
+ int ret;
/* The fence slot added here is intended for the exec sched job. */
- return xe_vm_validate_rebind(vm, &vm_exec->exec, 1);
+ xe_vm_set_validation_exec(vm, &vm_exec->exec);
+ ret = xe_vm_validate_rebind(vm, &vm_exec->exec, 1);
+ xe_vm_set_validation_exec(vm, NULL);
+ return ret;
}
int xe_exec_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
@@ -115,10 +120,10 @@ int xe_exec_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
struct drm_gpuvm_exec vm_exec = {.extra.fn = xe_exec_fn};
struct drm_exec *exec = &vm_exec.exec;
u32 i, num_syncs, num_ufence = 0;
+ struct xe_validation_ctx ctx;
struct xe_sched_job *job;
struct xe_vm *vm;
bool write_locked, skip_retry = false;
- ktime_t end = 0;
int err = 0;
struct xe_hw_engine_group *group;
enum xe_hw_engine_group_execution_mode mode, previous_mode;
@@ -246,17 +251,12 @@ retry:
if (err)
goto err_unlock_list;
- vm_exec.vm = &vm->gpuvm;
- vm_exec.flags = DRM_EXEC_INTERRUPTIBLE_WAIT;
- if (xe_vm_in_lr_mode(vm)) {
- drm_exec_init(exec, vm_exec.flags, 0);
- } else {
- err = drm_gpuvm_exec_lock(&vm_exec);
- if (err) {
- if (xe_vm_validate_should_retry(exec, err, &end))
- err = -EAGAIN;
+ if (!xe_vm_in_lr_mode(vm)) {
+ vm_exec.vm = &vm->gpuvm;
+ vm_exec.flags = DRM_EXEC_INTERRUPTIBLE_WAIT;
+ err = xe_validation_exec_lock(&ctx, &vm_exec, &xe->val);
+ if (err)
goto err_unlock_list;
- }
}
if (xe_vm_is_closed_or_banned(q->vm)) {
@@ -303,7 +303,7 @@ retry:
if (err)
goto err_put_job;
- err = down_read_interruptible(&vm->userptr.notifier_lock);
+ err = xe_svm_notifier_lock_interruptible(vm);
if (err)
goto err_put_job;
@@ -345,12 +345,13 @@ retry:
err_repin:
if (!xe_vm_in_lr_mode(vm))
- up_read(&vm->userptr.notifier_lock);
+ xe_svm_notifier_unlock(vm);
err_put_job:
if (err)
xe_sched_job_put(job);
err_exec:
- drm_exec_fini(exec);
+ if (!xe_vm_in_lr_mode(vm))
+ xe_validation_ctx_fini(&ctx);
err_unlock_list:
up_read(&vm->lock);
if (err == -EAGAIN && !skip_retry)
diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c b/drivers/gpu/drm/xe/xe_exec_queue.c
index 063c89d981e5..37b2b93b73d6 100644
--- a/drivers/gpu/drm/xe/xe_exec_queue.c
+++ b/drivers/gpu/drm/xe/xe_exec_queue.c
@@ -199,6 +199,16 @@ err_lrc:
return err;
}
+static void __xe_exec_queue_fini(struct xe_exec_queue *q)
+{
+ int i;
+
+ q->ops->fini(q);
+
+ for (i = 0; i < q->width; ++i)
+ xe_lrc_put(q->lrc[i]);
+}
+
struct xe_exec_queue *xe_exec_queue_create(struct xe_device *xe, struct xe_vm *vm,
u32 logical_mask, u16 width,
struct xe_hw_engine *hwe, u32 flags,
@@ -229,11 +239,13 @@ struct xe_exec_queue *xe_exec_queue_create(struct xe_device *xe, struct xe_vm *v
if (xe_exec_queue_uses_pxp(q)) {
err = xe_pxp_exec_queue_add(xe->pxp, q);
if (err)
- goto err_post_alloc;
+ goto err_post_init;
}
return q;
+err_post_init:
+ __xe_exec_queue_fini(q);
err_post_alloc:
__xe_exec_queue_free(q);
return ERR_PTR(err);
@@ -331,13 +343,11 @@ void xe_exec_queue_destroy(struct kref *ref)
xe_exec_queue_put(eq);
}
- q->ops->fini(q);
+ q->ops->destroy(q);
}
void xe_exec_queue_fini(struct xe_exec_queue *q)
{
- int i;
-
/*
* Before releasing our ref to lrc and xef, accumulate our run ticks
* and wakeup any waiters.
@@ -346,9 +356,7 @@ void xe_exec_queue_fini(struct xe_exec_queue *q)
if (q->xef && atomic_dec_and_test(&q->xef->exec_queue.pending_removal))
wake_up_var(&q->xef->exec_queue.pending_removal);
- for (i = 0; i < q->width; ++i)
- xe_lrc_put(q->lrc[i]);
-
+ __xe_exec_queue_fini(q);
__xe_exec_queue_free(q);
}
diff --git a/drivers/gpu/drm/xe/xe_exec_queue_types.h b/drivers/gpu/drm/xe/xe_exec_queue_types.h
index ba443a497b38..27b76cf9da89 100644
--- a/drivers/gpu/drm/xe/xe_exec_queue_types.h
+++ b/drivers/gpu/drm/xe/xe_exec_queue_types.h
@@ -181,8 +181,14 @@ struct xe_exec_queue_ops {
int (*init)(struct xe_exec_queue *q);
/** @kill: Kill inflight submissions for backend */
void (*kill)(struct xe_exec_queue *q);
- /** @fini: Fini exec queue for submission backend */
+ /** @fini: Undoes the init() for submission backend */
void (*fini)(struct xe_exec_queue *q);
+ /**
+ * @destroy: Destroy exec queue for submission backend. The backend
+ * function must call xe_exec_queue_fini() (which will in turn call the
+ * fini() backend function) to ensure the queue is properly cleaned up.
+ */
+ void (*destroy)(struct xe_exec_queue *q);
/** @set_priority: Set priority for exec queue */
int (*set_priority)(struct xe_exec_queue *q,
enum xe_exec_queue_priority priority);
diff --git a/drivers/gpu/drm/xe/xe_execlist.c b/drivers/gpu/drm/xe/xe_execlist.c
index 788f56b066b6..f83d421ac9d3 100644
--- a/drivers/gpu/drm/xe/xe_execlist.c
+++ b/drivers/gpu/drm/xe/xe_execlist.c
@@ -385,10 +385,20 @@ err_free:
return err;
}
-static void execlist_exec_queue_fini_async(struct work_struct *w)
+static void execlist_exec_queue_fini(struct xe_exec_queue *q)
+{
+ struct xe_execlist_exec_queue *exl = q->execlist;
+
+ drm_sched_entity_fini(&exl->entity);
+ drm_sched_fini(&exl->sched);
+
+ kfree(exl);
+}
+
+static void execlist_exec_queue_destroy_async(struct work_struct *w)
{
struct xe_execlist_exec_queue *ee =
- container_of(w, struct xe_execlist_exec_queue, fini_async);
+ container_of(w, struct xe_execlist_exec_queue, destroy_async);
struct xe_exec_queue *q = ee->q;
struct xe_execlist_exec_queue *exl = q->execlist;
struct xe_device *xe = gt_to_xe(q->gt);
@@ -401,10 +411,6 @@ static void execlist_exec_queue_fini_async(struct work_struct *w)
list_del(&exl->active_link);
spin_unlock_irqrestore(&exl->port->lock, flags);
- drm_sched_entity_fini(&exl->entity);
- drm_sched_fini(&exl->sched);
- kfree(exl);
-
xe_exec_queue_fini(q);
}
@@ -413,10 +419,10 @@ static void execlist_exec_queue_kill(struct xe_exec_queue *q)
/* NIY */
}
-static void execlist_exec_queue_fini(struct xe_exec_queue *q)
+static void execlist_exec_queue_destroy(struct xe_exec_queue *q)
{
- INIT_WORK(&q->execlist->fini_async, execlist_exec_queue_fini_async);
- queue_work(system_unbound_wq, &q->execlist->fini_async);
+ INIT_WORK(&q->execlist->destroy_async, execlist_exec_queue_destroy_async);
+ queue_work(system_unbound_wq, &q->execlist->destroy_async);
}
static int execlist_exec_queue_set_priority(struct xe_exec_queue *q,
@@ -467,6 +473,7 @@ static const struct xe_exec_queue_ops execlist_exec_queue_ops = {
.init = execlist_exec_queue_init,
.kill = execlist_exec_queue_kill,
.fini = execlist_exec_queue_fini,
+ .destroy = execlist_exec_queue_destroy,
.set_priority = execlist_exec_queue_set_priority,
.set_timeslice = execlist_exec_queue_set_timeslice,
.set_preempt_timeout = execlist_exec_queue_set_preempt_timeout,
diff --git a/drivers/gpu/drm/xe/xe_execlist_types.h b/drivers/gpu/drm/xe/xe_execlist_types.h
index 415140936f11..92c4ba52db0c 100644
--- a/drivers/gpu/drm/xe/xe_execlist_types.h
+++ b/drivers/gpu/drm/xe/xe_execlist_types.h
@@ -42,7 +42,7 @@ struct xe_execlist_exec_queue {
bool has_run;
- struct work_struct fini_async;
+ struct work_struct destroy_async;
enum xe_exec_queue_priority active_priority;
struct list_head active_link;
diff --git a/drivers/gpu/drm/xe/xe_ggtt.c b/drivers/gpu/drm/xe/xe_ggtt.c
index 71c7690a92b3..7fdd0a97a628 100644
--- a/drivers/gpu/drm/xe/xe_ggtt.c
+++ b/drivers/gpu/drm/xe/xe_ggtt.c
@@ -28,6 +28,7 @@
#include "xe_pm.h"
#include "xe_res_cursor.h"
#include "xe_sriov.h"
+#include "xe_tile_printk.h"
#include "xe_tile_sriov_vf.h"
#include "xe_tlb_inval.h"
#include "xe_wa.h"
@@ -269,7 +270,7 @@ int xe_ggtt_init_early(struct xe_ggtt *ggtt)
gsm_size = probe_gsm_size(pdev);
if (gsm_size == 0) {
- drm_err(&xe->drm, "Hardware reported no preallocated GSM\n");
+ xe_tile_err(ggtt->tile, "Hardware reported no preallocated GSM\n");
return -ENOMEM;
}
@@ -466,8 +467,8 @@ static void xe_ggtt_dump_node(struct xe_ggtt *ggtt,
if (IS_ENABLED(CONFIG_DRM_XE_DEBUG)) {
string_get_size(node->size, 1, STRING_UNITS_2, buf, sizeof(buf));
- xe_gt_dbg(ggtt->tile->primary_gt, "GGTT %#llx-%#llx (%s) %s\n",
- node->start, node->start + node->size, buf, description);
+ xe_tile_dbg(ggtt->tile, "GGTT %#llx-%#llx (%s) %s\n",
+ node->start, node->start + node->size, buf, description);
}
}
@@ -499,9 +500,8 @@ int xe_ggtt_node_insert_balloon_locked(struct xe_ggtt_node *node, u64 start, u64
err = drm_mm_reserve_node(&ggtt->mm, &node->base);
- if (xe_gt_WARN(ggtt->tile->primary_gt, err,
- "Failed to balloon GGTT %#llx-%#llx (%pe)\n",
- node->base.start, node->base.start + node->base.size, ERR_PTR(err)))
+ if (xe_tile_WARN(ggtt->tile, err, "Failed to balloon GGTT %#llx-%#llx (%pe)\n",
+ node->base.start, node->base.start + node->base.size, ERR_PTR(err)))
return err;
xe_ggtt_dump_node(ggtt, &node->base, "balloon");
@@ -731,7 +731,7 @@ void xe_ggtt_map_bo_unlocked(struct xe_ggtt *ggtt, struct xe_bo *bo)
}
static int __xe_ggtt_insert_bo_at(struct xe_ggtt *ggtt, struct xe_bo *bo,
- u64 start, u64 end)
+ u64 start, u64 end, struct drm_exec *exec)
{
u64 alignment = bo->min_align > 0 ? bo->min_align : XE_PAGE_SIZE;
u8 tile_id = ggtt->tile->id;
@@ -746,7 +746,7 @@ static int __xe_ggtt_insert_bo_at(struct xe_ggtt *ggtt, struct xe_bo *bo,
return 0;
}
- err = xe_bo_validate(bo, NULL, false);
+ err = xe_bo_validate(bo, NULL, false, exec);
if (err)
return err;
@@ -788,25 +788,28 @@ out:
* @bo: the &xe_bo to be inserted
* @start: address where it will be inserted
* @end: end of the range where it will be inserted
+ * @exec: The drm_exec transaction to use for exhaustive eviction.
*
* Return: 0 on success or a negative error code on failure.
*/
int xe_ggtt_insert_bo_at(struct xe_ggtt *ggtt, struct xe_bo *bo,
- u64 start, u64 end)
+ u64 start, u64 end, struct drm_exec *exec)
{
- return __xe_ggtt_insert_bo_at(ggtt, bo, start, end);
+ return __xe_ggtt_insert_bo_at(ggtt, bo, start, end, exec);
}
/**
* xe_ggtt_insert_bo - Insert BO into GGTT
* @ggtt: the &xe_ggtt where bo will be inserted
* @bo: the &xe_bo to be inserted
+ * @exec: The drm_exec transaction to use for exhaustive eviction.
*
* Return: 0 on success or a negative error code on failure.
*/
-int xe_ggtt_insert_bo(struct xe_ggtt *ggtt, struct xe_bo *bo)
+int xe_ggtt_insert_bo(struct xe_ggtt *ggtt, struct xe_bo *bo,
+ struct drm_exec *exec)
{
- return __xe_ggtt_insert_bo_at(ggtt, bo, 0, U64_MAX);
+ return __xe_ggtt_insert_bo_at(ggtt, bo, 0, U64_MAX, exec);
}
/**
diff --git a/drivers/gpu/drm/xe/xe_ggtt.h b/drivers/gpu/drm/xe/xe_ggtt.h
index fbe1e397d05d..75fc7a1efea7 100644
--- a/drivers/gpu/drm/xe/xe_ggtt.h
+++ b/drivers/gpu/drm/xe/xe_ggtt.h
@@ -10,6 +10,7 @@
struct drm_printer;
struct xe_tile;
+struct drm_exec;
struct xe_ggtt *xe_ggtt_alloc(struct xe_tile *tile);
int xe_ggtt_init_early(struct xe_ggtt *ggtt);
@@ -31,9 +32,9 @@ bool xe_ggtt_node_allocated(const struct xe_ggtt_node *node);
void xe_ggtt_map_bo(struct xe_ggtt *ggtt, struct xe_ggtt_node *node,
struct xe_bo *bo, u16 pat_index);
void xe_ggtt_map_bo_unlocked(struct xe_ggtt *ggtt, struct xe_bo *bo);
-int xe_ggtt_insert_bo(struct xe_ggtt *ggtt, struct xe_bo *bo);
+int xe_ggtt_insert_bo(struct xe_ggtt *ggtt, struct xe_bo *bo, struct drm_exec *exec);
int xe_ggtt_insert_bo_at(struct xe_ggtt *ggtt, struct xe_bo *bo,
- u64 start, u64 end);
+ u64 start, u64 end, struct drm_exec *exec);
void xe_ggtt_remove_bo(struct xe_ggtt *ggtt, struct xe_bo *bo);
u64 xe_ggtt_largest_hole(struct xe_ggtt *ggtt, u64 alignment, u64 *spare);
diff --git a/drivers/gpu/drm/xe/xe_gsc.c b/drivers/gpu/drm/xe/xe_gsc.c
index f5ae28af60d4..83d61bf8ec62 100644
--- a/drivers/gpu/drm/xe/xe_gsc.c
+++ b/drivers/gpu/drm/xe/xe_gsc.c
@@ -136,10 +136,10 @@ static int query_compatibility_version(struct xe_gsc *gsc)
u64 ggtt_offset;
int err;
- bo = xe_bo_create_pin_map(xe, tile, NULL, GSC_VER_PKT_SZ * 2,
- ttm_bo_type_kernel,
- XE_BO_FLAG_SYSTEM |
- XE_BO_FLAG_GGTT);
+ bo = xe_bo_create_pin_map_novm(xe, tile, GSC_VER_PKT_SZ * 2,
+ ttm_bo_type_kernel,
+ XE_BO_FLAG_SYSTEM |
+ XE_BO_FLAG_GGTT, false);
if (IS_ERR(bo)) {
xe_gt_err(gt, "failed to allocate bo for GSC version query\n");
return PTR_ERR(bo);
diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c
index 34505a6d93ed..3e0ad7e5b5df 100644
--- a/drivers/gpu/drm/xe/xe_gt.c
+++ b/drivers/gpu/drm/xe/xe_gt.c
@@ -98,7 +98,7 @@ void xe_gt_sanitize(struct xe_gt *gt)
* FIXME: if xe_uc_sanitize is called here, on TGL driver will not
* reload
*/
- gt->uc.guc.submission_state.enabled = false;
+ xe_guc_submit_disable(&gt->uc.guc);
}
static void xe_gt_enable_host_l2_vram(struct xe_gt *gt)
diff --git a/drivers/gpu/drm/xe/xe_gt_debugfs.c b/drivers/gpu/drm/xe/xe_gt_debugfs.c
index bf3a67b5951c..f253e2df4907 100644
--- a/drivers/gpu/drm/xe/xe_gt_debugfs.c
+++ b/drivers/gpu/drm/xe/xe_gt_debugfs.c
@@ -31,6 +31,7 @@
#include "xe_reg_whitelist.h"
#include "xe_sa.h"
#include "xe_sriov.h"
+#include "xe_sriov_vf_ccs.h"
#include "xe_tuning.h"
#include "xe_uc_debugfs.h"
#include "xe_wa.h"
@@ -123,45 +124,6 @@ static int powergate_info(struct xe_gt *gt, struct drm_printer *p)
return ret;
}
-static int sa_info(struct xe_gt *gt, struct drm_printer *p)
-{
- struct xe_tile *tile = gt_to_tile(gt);
-
- xe_pm_runtime_get(gt_to_xe(gt));
- drm_suballoc_dump_debug_info(&tile->mem.kernel_bb_pool->base, p,
- xe_sa_manager_gpu_addr(tile->mem.kernel_bb_pool));
- xe_pm_runtime_put(gt_to_xe(gt));
-
- return 0;
-}
-
-static int sa_info_vf_ccs(struct xe_gt *gt, struct drm_printer *p)
-{
- struct xe_tile *tile = gt_to_tile(gt);
- struct xe_sa_manager *bb_pool;
- enum xe_sriov_vf_ccs_rw_ctxs ctx_id;
-
- if (!IS_VF_CCS_READY(gt_to_xe(gt)))
- return 0;
-
- xe_pm_runtime_get(gt_to_xe(gt));
-
- for_each_ccs_rw_ctx(ctx_id) {
- bb_pool = tile->sriov.vf.ccs[ctx_id].mem.ccs_bb_pool;
- if (!bb_pool)
- break;
-
- drm_printf(p, "ccs %s bb suballoc info\n", ctx_id ? "write" : "read");
- drm_printf(p, "-------------------------\n");
- drm_suballoc_dump_debug_info(&bb_pool->base, p, xe_sa_manager_gpu_addr(bb_pool));
- drm_puts(p, "\n");
- }
-
- xe_pm_runtime_put(gt_to_xe(gt));
-
- return 0;
-}
-
static int topology(struct xe_gt *gt, struct drm_printer *p)
{
xe_pm_runtime_get(gt_to_xe(gt));
@@ -316,7 +278,6 @@ static int hwconfig(struct xe_gt *gt, struct drm_printer *p)
* - without access to the PF specific data
*/
static const struct drm_info_list vf_safe_debugfs_list[] = {
- {"sa_info", .show = xe_gt_debugfs_simple_show, .data = sa_info},
{"topology", .show = xe_gt_debugfs_simple_show, .data = topology},
{"ggtt", .show = xe_gt_debugfs_simple_show, .data = ggtt},
{"register-save-restore", .show = xe_gt_debugfs_simple_show, .data = register_save_restore},
@@ -327,17 +288,9 @@ static const struct drm_info_list vf_safe_debugfs_list[] = {
{"default_lrc_bcs", .show = xe_gt_debugfs_simple_show, .data = bcs_default_lrc},
{"default_lrc_vcs", .show = xe_gt_debugfs_simple_show, .data = vcs_default_lrc},
{"default_lrc_vecs", .show = xe_gt_debugfs_simple_show, .data = vecs_default_lrc},
- {"stats", .show = xe_gt_debugfs_simple_show, .data = xe_gt_stats_print_info},
{"hwconfig", .show = xe_gt_debugfs_simple_show, .data = hwconfig},
};
-/*
- * only for GT debugfs files which are valid on VF. Not valid on PF.
- */
-static const struct drm_info_list vf_only_debugfs_list[] = {
- {"sa_info_vf_ccs", .show = xe_gt_debugfs_simple_show, .data = sa_info_vf_ccs},
-};
-
/* everything else should be added here */
static const struct drm_info_list pf_only_debugfs_list[] = {
{"hw_engines", .show = xe_gt_debugfs_simple_show, .data = hw_engines},
@@ -363,6 +316,24 @@ static ssize_t write_to_gt_call(const char __user *userbuf, size_t count, loff_t
return count;
}
+static ssize_t stats_write(struct file *file, const char __user *userbuf,
+ size_t count, loff_t *ppos)
+{
+ struct seq_file *s = file->private_data;
+ struct xe_gt *gt = s->private;
+
+ return write_to_gt_call(userbuf, count, ppos, xe_gt_stats_clear, gt);
+}
+
+static int stats_show(struct seq_file *s, void *unused)
+{
+ struct drm_printer p = drm_seq_file_printer(s);
+ struct xe_gt *gt = s->private;
+
+ return xe_gt_stats_print_info(gt, &p);
+}
+DEFINE_SHOW_STORE_ATTRIBUTE(stats);
+
static void force_reset(struct xe_gt *gt)
{
struct xe_device *xe = gt_to_xe(gt);
@@ -448,6 +419,7 @@ void xe_gt_debugfs_register(struct xe_gt *gt)
root->d_inode->i_private = gt;
/* VF safe */
+ debugfs_create_file("stats", 0600, root, gt, &stats_fops);
debugfs_create_file("force_reset", 0600, root, gt, &force_reset_fops);
debugfs_create_file("force_reset_sync", 0600, root, gt, &force_reset_sync_fops);
@@ -459,11 +431,6 @@ void xe_gt_debugfs_register(struct xe_gt *gt)
drm_debugfs_create_files(pf_only_debugfs_list,
ARRAY_SIZE(pf_only_debugfs_list),
root, minor);
- else
- drm_debugfs_create_files(vf_only_debugfs_list,
- ARRAY_SIZE(vf_only_debugfs_list),
- root, minor);
-
xe_uc_debugfs_register(&gt->uc, root);
diff --git a/drivers/gpu/drm/xe/xe_gt_freq.c b/drivers/gpu/drm/xe/xe_gt_freq.c
index 60d9354e7dbf..4ff1b6b58d6b 100644
--- a/drivers/gpu/drm/xe/xe_gt_freq.c
+++ b/drivers/gpu/drm/xe/xe_gt_freq.c
@@ -227,6 +227,33 @@ static ssize_t max_freq_store(struct kobject *kobj,
}
static struct kobj_attribute attr_max_freq = __ATTR_RW(max_freq);
+static ssize_t power_profile_show(struct kobject *kobj,
+ struct kobj_attribute *attr,
+ char *buff)
+{
+ struct device *dev = kobj_to_dev(kobj);
+
+ xe_guc_pc_get_power_profile(dev_to_pc(dev), buff);
+
+ return strlen(buff);
+}
+
+static ssize_t power_profile_store(struct kobject *kobj,
+ struct kobj_attribute *attr,
+ const char *buff, size_t count)
+{
+ struct device *dev = kobj_to_dev(kobj);
+ struct xe_guc_pc *pc = dev_to_pc(dev);
+ int err;
+
+ xe_pm_runtime_get(dev_to_xe(dev));
+ err = xe_guc_pc_set_power_profile(pc, buff);
+ xe_pm_runtime_put(dev_to_xe(dev));
+
+ return err ?: count;
+}
+static struct kobj_attribute attr_power_profile = __ATTR_RW(power_profile);
+
static const struct attribute *freq_attrs[] = {
&attr_act_freq.attr,
&attr_cur_freq.attr,
@@ -236,6 +263,7 @@ static const struct attribute *freq_attrs[] = {
&attr_rpn_freq.attr,
&attr_min_freq.attr,
&attr_max_freq.attr,
+ &attr_power_profile.attr,
NULL
};
diff --git a/drivers/gpu/drm/xe/xe_gt_mcr.c b/drivers/gpu/drm/xe/xe_gt_mcr.c
index 683ac021a06d..8fb1cae91724 100644
--- a/drivers/gpu/drm/xe/xe_gt_mcr.c
+++ b/drivers/gpu/drm/xe/xe_gt_mcr.c
@@ -362,7 +362,7 @@ fallback:
* @group: pointer to storage for steering group ID
* @instance: pointer to storage for steering instance ID
*/
-void xe_gt_mcr_get_dss_steering(struct xe_gt *gt, unsigned int dss, u16 *group, u16 *instance)
+void xe_gt_mcr_get_dss_steering(const struct xe_gt *gt, unsigned int dss, u16 *group, u16 *instance)
{
xe_gt_assert(gt, dss < XE_MAX_DSS_FUSE_BITS);
diff --git a/drivers/gpu/drm/xe/xe_gt_mcr.h b/drivers/gpu/drm/xe/xe_gt_mcr.h
index bc06520befab..283a1c9770e2 100644
--- a/drivers/gpu/drm/xe/xe_gt_mcr.h
+++ b/drivers/gpu/drm/xe/xe_gt_mcr.h
@@ -31,7 +31,8 @@ bool xe_gt_mcr_get_nonterminated_steering(struct xe_gt *gt,
u8 *group, u8 *instance);
void xe_gt_mcr_steering_dump(struct xe_gt *gt, struct drm_printer *p);
-void xe_gt_mcr_get_dss_steering(struct xe_gt *gt, unsigned int dss, u16 *group, u16 *instance);
+void xe_gt_mcr_get_dss_steering(const struct xe_gt *gt,
+ unsigned int dss, u16 *group, u16 *instance);
u32 xe_gt_mcr_steering_info_to_dss_id(struct xe_gt *gt, u16 group, u16 instance);
/*
diff --git a/drivers/gpu/drm/xe/xe_gt_pagefault.c b/drivers/gpu/drm/xe/xe_gt_pagefault.c
index d02d22fb3659..a054d6010ae0 100644
--- a/drivers/gpu/drm/xe/xe_gt_pagefault.c
+++ b/drivers/gpu/drm/xe/xe_gt_pagefault.c
@@ -87,10 +87,8 @@ static int xe_pf_begin(struct drm_exec *exec, struct xe_vma *vma,
if (!bo)
return 0;
- err = need_vram_move ? xe_bo_migrate(bo, vram->placement) :
- xe_bo_validate(bo, vm, true);
-
- return err;
+ return need_vram_move ? xe_bo_migrate(bo, vram->placement, NULL, exec) :
+ xe_bo_validate(bo, vm, true, exec);
}
static int handle_vma_pagefault(struct xe_gt *gt, struct xe_vma *vma,
@@ -98,9 +96,9 @@ static int handle_vma_pagefault(struct xe_gt *gt, struct xe_vma *vma,
{
struct xe_vm *vm = xe_vma_vm(vma);
struct xe_tile *tile = gt_to_tile(gt);
+ struct xe_validation_ctx ctx;
struct drm_exec exec;
struct dma_fence *fence;
- ktime_t end = 0;
int err, needs_vram;
lockdep_assert_held_write(&vm->lock);
@@ -129,22 +127,22 @@ retry_userptr:
}
/* Lock VM and BOs dma-resv */
- drm_exec_init(&exec, 0, 0);
+ xe_validation_ctx_init(&ctx, &vm->xe->val, &exec, (struct xe_val_flags) {});
drm_exec_until_all_locked(&exec) {
err = xe_pf_begin(&exec, vma, needs_vram == 1, tile->mem.vram);
drm_exec_retry_on_contention(&exec);
- if (xe_vm_validate_should_retry(&exec, err, &end))
- err = -EAGAIN;
+ xe_validation_retry_on_oom(&ctx, &err);
if (err)
goto unlock_dma_resv;
/* Bind VMA only to the GT that has faulted */
trace_xe_vma_pf_bind(vma);
+ xe_vm_set_validation_exec(vm, &exec);
fence = xe_vma_rebind(vm, vma, BIT(tile->id));
+ xe_vm_set_validation_exec(vm, NULL);
if (IS_ERR(fence)) {
err = PTR_ERR(fence);
- if (xe_vm_validate_should_retry(&exec, err, &end))
- err = -EAGAIN;
+ xe_validation_retry_on_oom(&ctx, &err);
goto unlock_dma_resv;
}
}
@@ -153,7 +151,7 @@ retry_userptr:
dma_fence_put(fence);
unlock_dma_resv:
- drm_exec_fini(&exec);
+ xe_validation_ctx_fini(&ctx);
if (err == -EAGAIN)
goto retry_userptr;
@@ -535,6 +533,7 @@ static int handle_acc(struct xe_gt *gt, struct acc *acc)
{
struct xe_device *xe = gt_to_xe(gt);
struct xe_tile *tile = gt_to_tile(gt);
+ struct xe_validation_ctx ctx;
struct drm_exec exec;
struct xe_vm *vm;
struct xe_vma *vma;
@@ -564,15 +563,14 @@ static int handle_acc(struct xe_gt *gt, struct acc *acc)
goto unlock_vm;
/* Lock VM and BOs dma-resv */
- drm_exec_init(&exec, 0, 0);
+ xe_validation_ctx_init(&ctx, &vm->xe->val, &exec, (struct xe_val_flags) {});
drm_exec_until_all_locked(&exec) {
ret = xe_pf_begin(&exec, vma, IS_DGFX(vm->xe), tile->mem.vram);
drm_exec_retry_on_contention(&exec);
- if (ret)
- break;
+ xe_validation_retry_on_oom(&ctx, &ret);
}
- drm_exec_fini(&exec);
+ xe_validation_ctx_fini(&ctx);
unlock_vm:
up_read(&vm->lock);
xe_vm_put(vm);
diff --git a/drivers/gpu/drm/xe/xe_gt_printk.h b/drivers/gpu/drm/xe/xe_gt_printk.h
index 11da0228cea7..1313d32862db 100644
--- a/drivers/gpu/drm/xe/xe_gt_printk.h
+++ b/drivers/gpu/drm/xe/xe_gt_printk.h
@@ -6,18 +6,22 @@
#ifndef _XE_GT_PRINTK_H_
#define _XE_GT_PRINTK_H_
-#include <drm/drm_print.h>
-
#include "xe_gt_types.h"
+#include "xe_tile_printk.h"
+
+#define __XE_GT_PRINTK_FMT(_gt, _fmt, _args...) "GT%u: " _fmt, (_gt)->info.id, ##_args
#define xe_gt_printk(_gt, _level, _fmt, ...) \
- drm_##_level(&gt_to_xe(_gt)->drm, "GT%u: " _fmt, (_gt)->info.id, ##__VA_ARGS__)
+ xe_tile_printk((_gt)->tile, _level, __XE_GT_PRINTK_FMT((_gt), _fmt, ##__VA_ARGS__))
+
+#define xe_gt_err(_gt, _fmt, ...) \
+ xe_gt_printk((_gt), err, _fmt, ##__VA_ARGS__)
#define xe_gt_err_once(_gt, _fmt, ...) \
xe_gt_printk((_gt), err_once, _fmt, ##__VA_ARGS__)
-#define xe_gt_err(_gt, _fmt, ...) \
- xe_gt_printk((_gt), err, _fmt, ##__VA_ARGS__)
+#define xe_gt_err_ratelimited(_gt, _fmt, ...) \
+ xe_gt_printk((_gt), err_ratelimited, _fmt, ##__VA_ARGS__)
#define xe_gt_warn(_gt, _fmt, ...) \
xe_gt_printk((_gt), warn, _fmt, ##__VA_ARGS__)
@@ -31,20 +35,20 @@
#define xe_gt_dbg(_gt, _fmt, ...) \
xe_gt_printk((_gt), dbg, _fmt, ##__VA_ARGS__)
-#define xe_gt_err_ratelimited(_gt, _fmt, ...) \
- xe_gt_printk((_gt), err_ratelimited, _fmt, ##__VA_ARGS__)
+#define xe_gt_WARN_type(_gt, _type, _condition, _fmt, ...) \
+ xe_tile_WARN##_type((_gt)->tile, _condition, _fmt, ## __VA_ARGS__)
#define xe_gt_WARN(_gt, _condition, _fmt, ...) \
- drm_WARN(&gt_to_xe(_gt)->drm, _condition, "GT%u: " _fmt, (_gt)->info.id, ##__VA_ARGS__)
+ xe_gt_WARN_type((_gt),, _condition, __XE_GT_PRINTK_FMT((_gt), _fmt, ##__VA_ARGS__))
#define xe_gt_WARN_ONCE(_gt, _condition, _fmt, ...) \
- drm_WARN_ONCE(&gt_to_xe(_gt)->drm, _condition, "GT%u: " _fmt, (_gt)->info.id, ##__VA_ARGS__)
+ xe_gt_WARN_type((_gt), _ONCE, _condition, __XE_GT_PRINTK_FMT((_gt), _fmt, ##__VA_ARGS__))
#define xe_gt_WARN_ON(_gt, _condition) \
- xe_gt_WARN((_gt), _condition, "%s(%s)", "gt_WARN_ON", __stringify(_condition))
+ xe_gt_WARN((_gt), _condition, "%s(%s)", "WARN_ON", __stringify(_condition))
#define xe_gt_WARN_ON_ONCE(_gt, _condition) \
- xe_gt_WARN_ONCE((_gt), _condition, "%s(%s)", "gt_WARN_ON_ONCE", __stringify(_condition))
+ xe_gt_WARN_ONCE((_gt), _condition, "%s(%s)", "WARN_ON_ONCE", __stringify(_condition))
static inline void __xe_gt_printfn_err(struct drm_printer *p, struct va_format *vaf)
{
@@ -67,12 +71,12 @@ static inline void __xe_gt_printfn_dbg(struct drm_printer *p, struct va_format *
/*
* The original xe_gt_dbg() callsite annotations are useless here,
- * redirect to the tweaked drm_dbg_printer() instead.
+ * redirect to the tweaked xe_tile_dbg_printer() instead.
*/
- dbg = drm_dbg_printer(&gt_to_xe(gt)->drm, DRM_UT_DRIVER, NULL);
+ dbg = xe_tile_dbg_printer((gt)->tile);
dbg.origin = p->origin;
- drm_printf(&dbg, "GT%u: %pV", gt->info.id, vaf);
+ drm_printf(&dbg, __XE_GT_PRINTK_FMT(gt, "%pV", vaf));
}
/**
diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c
index c8f0320d032f..6344b5205c08 100644
--- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c
+++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c
@@ -1478,23 +1478,16 @@ static int pf_provision_vf_lmem(struct xe_gt *gt, unsigned int vfid, u64 size)
return 0;
xe_gt_assert(gt, pf_get_lmem_alignment(gt) == SZ_2M);
- bo = xe_bo_create_locked(xe, tile, NULL,
- ALIGN(size, PAGE_SIZE),
- ttm_bo_type_kernel,
- XE_BO_FLAG_VRAM_IF_DGFX(tile) |
- XE_BO_FLAG_NEEDS_2M |
- XE_BO_FLAG_PINNED |
- XE_BO_FLAG_PINNED_LATE_RESTORE);
+ bo = xe_bo_create_pin_range_novm(xe, tile,
+ ALIGN(size, PAGE_SIZE), 0, ~0ull,
+ ttm_bo_type_kernel,
+ XE_BO_FLAG_VRAM_IF_DGFX(tile) |
+ XE_BO_FLAG_NEEDS_2M |
+ XE_BO_FLAG_PINNED |
+ XE_BO_FLAG_PINNED_LATE_RESTORE);
if (IS_ERR(bo))
return PTR_ERR(bo);
- err = xe_bo_pin(bo);
- xe_bo_unlock(bo);
- if (unlikely(err)) {
- xe_bo_put(bo);
- return err;
- }
-
config->lmem_obj = bo;
if (xe_device_has_lmtt(xe)) {
@@ -1636,7 +1629,6 @@ static u64 pf_estimate_fair_lmem(struct xe_gt *gt, unsigned int num_vfs)
u64 fair;
fair = div_u64(available, num_vfs);
- fair = rounddown_pow_of_two(fair); /* XXX: ttm_vram_mgr & drm_buddy limitation */
fair = ALIGN_DOWN(fair, alignment);
#ifdef MAX_FAIR_LMEM
fair = min_t(u64, MAX_FAIR_LMEM, fair);
diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_migration.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_migration.c
index c712111aa30d..44cc612b0a75 100644
--- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_migration.c
+++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_migration.c
@@ -55,12 +55,12 @@ static int pf_send_guc_save_vf_state(struct xe_gt *gt, unsigned int vfid,
xe_gt_assert(gt, size % sizeof(u32) == 0);
xe_gt_assert(gt, size == ndwords * sizeof(u32));
- bo = xe_bo_create_pin_map(xe, tile, NULL,
- ALIGN(size, PAGE_SIZE),
- ttm_bo_type_kernel,
- XE_BO_FLAG_SYSTEM |
- XE_BO_FLAG_GGTT |
- XE_BO_FLAG_GGTT_INVALIDATE);
+ bo = xe_bo_create_pin_map_novm(xe, tile,
+ ALIGN(size, PAGE_SIZE),
+ ttm_bo_type_kernel,
+ XE_BO_FLAG_SYSTEM |
+ XE_BO_FLAG_GGTT |
+ XE_BO_FLAG_GGTT_INVALIDATE, false);
if (IS_ERR(bo))
return PTR_ERR(bo);
@@ -91,12 +91,12 @@ static int pf_send_guc_restore_vf_state(struct xe_gt *gt, unsigned int vfid,
xe_gt_assert(gt, size % sizeof(u32) == 0);
xe_gt_assert(gt, size == ndwords * sizeof(u32));
- bo = xe_bo_create_pin_map(xe, tile, NULL,
- ALIGN(size, PAGE_SIZE),
- ttm_bo_type_kernel,
- XE_BO_FLAG_SYSTEM |
- XE_BO_FLAG_GGTT |
- XE_BO_FLAG_GGTT_INVALIDATE);
+ bo = xe_bo_create_pin_map_novm(xe, tile,
+ ALIGN(size, PAGE_SIZE),
+ ttm_bo_type_kernel,
+ XE_BO_FLAG_SYSTEM |
+ XE_BO_FLAG_GGTT |
+ XE_BO_FLAG_GGTT_INVALIDATE, false);
if (IS_ERR(bo))
return PTR_ERR(bo);
diff --git a/drivers/gpu/drm/xe/xe_gt_stats.c b/drivers/gpu/drm/xe/xe_gt_stats.c
index 30f942671c2b..5f74706bab81 100644
--- a/drivers/gpu/drm/xe/xe_gt_stats.c
+++ b/drivers/gpu/drm/xe/xe_gt_stats.c
@@ -26,11 +26,46 @@ void xe_gt_stats_incr(struct xe_gt *gt, const enum xe_gt_stats_id id, int incr)
atomic64_add(incr, &gt->stats.counters[id]);
}
+#define DEF_STAT_STR(ID, name) [XE_GT_STATS_ID_##ID] = name
+
static const char *const stat_description[__XE_GT_STATS_NUM_IDS] = {
- "svm_pagefault_count",
- "tlb_inval_count",
- "vma_pagefault_count",
- "vma_pagefault_kb",
+ DEF_STAT_STR(SVM_PAGEFAULT_COUNT, "svm_pagefault_count"),
+ DEF_STAT_STR(TLB_INVAL, "tlb_inval_count"),
+ DEF_STAT_STR(SVM_TLB_INVAL_COUNT, "svm_tlb_inval_count"),
+ DEF_STAT_STR(SVM_TLB_INVAL_US, "svm_tlb_inval_us"),
+ DEF_STAT_STR(VMA_PAGEFAULT_COUNT, "vma_pagefault_count"),
+ DEF_STAT_STR(VMA_PAGEFAULT_KB, "vma_pagefault_kb"),
+ DEF_STAT_STR(SVM_4K_PAGEFAULT_COUNT, "svm_4K_pagefault_count"),
+ DEF_STAT_STR(SVM_64K_PAGEFAULT_COUNT, "svm_64K_pagefault_count"),
+ DEF_STAT_STR(SVM_2M_PAGEFAULT_COUNT, "svm_2M_pagefault_count"),
+ DEF_STAT_STR(SVM_4K_VALID_PAGEFAULT_COUNT, "svm_4K_valid_pagefault_count"),
+ DEF_STAT_STR(SVM_64K_VALID_PAGEFAULT_COUNT, "svm_64K_valid_pagefault_count"),
+ DEF_STAT_STR(SVM_2M_VALID_PAGEFAULT_COUNT, "svm_2M_valid_pagefault_count"),
+ DEF_STAT_STR(SVM_4K_PAGEFAULT_US, "svm_4K_pagefault_us"),
+ DEF_STAT_STR(SVM_64K_PAGEFAULT_US, "svm_64K_pagefault_us"),
+ DEF_STAT_STR(SVM_2M_PAGEFAULT_US, "svm_2M_pagefault_us"),
+ DEF_STAT_STR(SVM_4K_MIGRATE_COUNT, "svm_4K_migrate_count"),
+ DEF_STAT_STR(SVM_64K_MIGRATE_COUNT, "svm_64K_migrate_count"),
+ DEF_STAT_STR(SVM_2M_MIGRATE_COUNT, "svm_2M_migrate_count"),
+ DEF_STAT_STR(SVM_4K_MIGRATE_US, "svm_4K_migrate_us"),
+ DEF_STAT_STR(SVM_64K_MIGRATE_US, "svm_64K_migrate_us"),
+ DEF_STAT_STR(SVM_2M_MIGRATE_US, "svm_2M_migrate_us"),
+ DEF_STAT_STR(SVM_DEVICE_COPY_US, "svm_device_copy_us"),
+ DEF_STAT_STR(SVM_4K_DEVICE_COPY_US, "svm_4K_device_copy_us"),
+ DEF_STAT_STR(SVM_64K_DEVICE_COPY_US, "svm_64K_device_copy_us"),
+ DEF_STAT_STR(SVM_2M_DEVICE_COPY_US, "svm_2M_device_copy_us"),
+ DEF_STAT_STR(SVM_CPU_COPY_US, "svm_cpu_copy_us"),
+ DEF_STAT_STR(SVM_4K_CPU_COPY_US, "svm_4K_cpu_copy_us"),
+ DEF_STAT_STR(SVM_64K_CPU_COPY_US, "svm_64K_cpu_copy_us"),
+ DEF_STAT_STR(SVM_2M_CPU_COPY_US, "svm_2M_cpu_copy_us"),
+ DEF_STAT_STR(SVM_DEVICE_COPY_KB, "svm_device_copy_kb"),
+ DEF_STAT_STR(SVM_CPU_COPY_KB, "svm_cpu_copy_kb"),
+ DEF_STAT_STR(SVM_4K_GET_PAGES_US, "svm_4K_get_pages_us"),
+ DEF_STAT_STR(SVM_64K_GET_PAGES_US, "svm_64K_get_pages_us"),
+ DEF_STAT_STR(SVM_2M_GET_PAGES_US, "svm_2M_get_pages_us"),
+ DEF_STAT_STR(SVM_4K_BIND_US, "svm_4K_bind_us"),
+ DEF_STAT_STR(SVM_64K_BIND_US, "svm_64K_bind_us"),
+ DEF_STAT_STR(SVM_2M_BIND_US, "svm_2M_bind_us"),
};
/**
@@ -50,3 +85,17 @@ int xe_gt_stats_print_info(struct xe_gt *gt, struct drm_printer *p)
return 0;
}
+
+/**
+ * xe_gt_stats_clear - Clear the GT stats
+ * @gt: GT structure
+ *
+ * This clear (zeros) all the available GT stats.
+ */
+void xe_gt_stats_clear(struct xe_gt *gt)
+{
+ int id;
+
+ for (id = 0; id < ARRAY_SIZE(gt->stats.counters); ++id)
+ atomic64_set(&gt->stats.counters[id], 0);
+}
diff --git a/drivers/gpu/drm/xe/xe_gt_stats.h b/drivers/gpu/drm/xe/xe_gt_stats.h
index 38325ef53617..e8aea32bc971 100644
--- a/drivers/gpu/drm/xe/xe_gt_stats.h
+++ b/drivers/gpu/drm/xe/xe_gt_stats.h
@@ -13,6 +13,7 @@ struct drm_printer;
#ifdef CONFIG_DEBUG_FS
int xe_gt_stats_print_info(struct xe_gt *gt, struct drm_printer *p);
+void xe_gt_stats_clear(struct xe_gt *gt);
void xe_gt_stats_incr(struct xe_gt *gt, const enum xe_gt_stats_id id, int incr);
#else
static inline void
diff --git a/drivers/gpu/drm/xe/xe_gt_stats_types.h b/drivers/gpu/drm/xe/xe_gt_stats_types.h
index be3244d7133c..d8348a8de2e1 100644
--- a/drivers/gpu/drm/xe/xe_gt_stats_types.h
+++ b/drivers/gpu/drm/xe/xe_gt_stats_types.h
@@ -9,8 +9,41 @@
enum xe_gt_stats_id {
XE_GT_STATS_ID_SVM_PAGEFAULT_COUNT,
XE_GT_STATS_ID_TLB_INVAL,
+ XE_GT_STATS_ID_SVM_TLB_INVAL_COUNT,
+ XE_GT_STATS_ID_SVM_TLB_INVAL_US,
XE_GT_STATS_ID_VMA_PAGEFAULT_COUNT,
XE_GT_STATS_ID_VMA_PAGEFAULT_KB,
+ XE_GT_STATS_ID_SVM_4K_PAGEFAULT_COUNT,
+ XE_GT_STATS_ID_SVM_64K_PAGEFAULT_COUNT,
+ XE_GT_STATS_ID_SVM_2M_PAGEFAULT_COUNT,
+ XE_GT_STATS_ID_SVM_4K_VALID_PAGEFAULT_COUNT,
+ XE_GT_STATS_ID_SVM_64K_VALID_PAGEFAULT_COUNT,
+ XE_GT_STATS_ID_SVM_2M_VALID_PAGEFAULT_COUNT,
+ XE_GT_STATS_ID_SVM_4K_PAGEFAULT_US,
+ XE_GT_STATS_ID_SVM_64K_PAGEFAULT_US,
+ XE_GT_STATS_ID_SVM_2M_PAGEFAULT_US,
+ XE_GT_STATS_ID_SVM_4K_MIGRATE_COUNT,
+ XE_GT_STATS_ID_SVM_64K_MIGRATE_COUNT,
+ XE_GT_STATS_ID_SVM_2M_MIGRATE_COUNT,
+ XE_GT_STATS_ID_SVM_4K_MIGRATE_US,
+ XE_GT_STATS_ID_SVM_64K_MIGRATE_US,
+ XE_GT_STATS_ID_SVM_2M_MIGRATE_US,
+ XE_GT_STATS_ID_SVM_DEVICE_COPY_US,
+ XE_GT_STATS_ID_SVM_4K_DEVICE_COPY_US,
+ XE_GT_STATS_ID_SVM_64K_DEVICE_COPY_US,
+ XE_GT_STATS_ID_SVM_2M_DEVICE_COPY_US,
+ XE_GT_STATS_ID_SVM_CPU_COPY_US,
+ XE_GT_STATS_ID_SVM_4K_CPU_COPY_US,
+ XE_GT_STATS_ID_SVM_64K_CPU_COPY_US,
+ XE_GT_STATS_ID_SVM_2M_CPU_COPY_US,
+ XE_GT_STATS_ID_SVM_DEVICE_COPY_KB,
+ XE_GT_STATS_ID_SVM_CPU_COPY_KB,
+ XE_GT_STATS_ID_SVM_4K_GET_PAGES_US,
+ XE_GT_STATS_ID_SVM_64K_GET_PAGES_US,
+ XE_GT_STATS_ID_SVM_2M_GET_PAGES_US,
+ XE_GT_STATS_ID_SVM_4K_BIND_US,
+ XE_GT_STATS_ID_SVM_64K_BIND_US,
+ XE_GT_STATS_ID_SVM_2M_BIND_US,
/* must be the last entry */
__XE_GT_STATS_NUM_IDS,
};
diff --git a/drivers/gpu/drm/xe/xe_gt_topology.c b/drivers/gpu/drm/xe/xe_gt_topology.c
index a0baa560dd71..4e61c5e39bcb 100644
--- a/drivers/gpu/drm/xe/xe_gt_topology.c
+++ b/drivers/gpu/drm/xe/xe_gt_topology.c
@@ -12,6 +12,7 @@
#include "regs/xe_gt_regs.h"
#include "xe_assert.h"
#include "xe_gt.h"
+#include "xe_gt_mcr.h"
#include "xe_gt_printk.h"
#include "xe_mmio.h"
#include "xe_wa.h"
@@ -122,6 +123,21 @@ gen_l3_mask_from_pattern(struct xe_device *xe, xe_l3_bank_mask_t dst,
}
}
+bool xe_gt_topology_report_l3(struct xe_gt *gt)
+{
+ /*
+ * No known userspace needs/uses the L3 bank mask reported by
+ * the media GT, and the hardware itself is known to report bogus
+ * values on several platforms. Only report L3 bank mask as part
+ * of the media GT's topology on pre-Xe3 platforms since that's
+ * already part of our ABI.
+ */
+ if (xe_gt_is_media_type(gt) && MEDIA_VER(gt_to_xe(gt)) >= 30)
+ return false;
+
+ return true;
+}
+
static void
load_l3_bank_mask(struct xe_gt *gt, xe_l3_bank_mask_t l3_bank_mask)
{
@@ -129,16 +145,7 @@ load_l3_bank_mask(struct xe_gt *gt, xe_l3_bank_mask_t l3_bank_mask)
struct xe_mmio *mmio = &gt->mmio;
u32 fuse3 = xe_mmio_read32(mmio, MIRROR_FUSE3);
- /*
- * PTL platforms with media version 30.00 do not provide proper values
- * for the media GT's L3 bank registers. Skip the readout since we
- * don't have any way to obtain real values.
- *
- * This may get re-described as an official workaround in the future,
- * but there's no tracking number assigned yet so we use a custom
- * OOB workaround descriptor.
- */
- if (XE_GT_WA(gt, no_media_l3))
+ if (!xe_gt_topology_report_l3(gt))
return;
if (GRAPHICS_VER(xe) >= 30) {
@@ -275,8 +282,9 @@ xe_gt_topology_dump(struct xe_gt *gt, struct drm_printer *p)
drm_printf(p, "EU type: %s\n",
eu_type_to_str(gt->fuse_topo.eu_type));
- drm_printf(p, "L3 bank mask: %*pb\n", XE_MAX_L3_BANK_MASK_BITS,
- gt->fuse_topo.l3_bank_mask);
+ if (xe_gt_topology_report_l3(gt))
+ drm_printf(p, "L3 bank mask: %*pb\n", XE_MAX_L3_BANK_MASK_BITS,
+ gt->fuse_topo.l3_bank_mask);
}
/*
@@ -328,3 +336,19 @@ bool xe_gt_has_compute_dss(struct xe_gt *gt, unsigned int dss)
{
return test_bit(dss, gt->fuse_topo.c_dss_mask);
}
+
+bool xe_gt_has_discontiguous_dss_groups(const struct xe_gt *gt)
+{
+ unsigned int xecore;
+ int last_group = -1;
+ u16 group, instance;
+
+ for_each_dss_steering(xecore, gt, group, instance) {
+ if (last_group != group) {
+ if (group - last_group > 1)
+ return true;
+ last_group = group;
+ }
+ }
+ return false;
+}
diff --git a/drivers/gpu/drm/xe/xe_gt_topology.h b/drivers/gpu/drm/xe/xe_gt_topology.h
index c8140704ad4c..5e62f5949b7b 100644
--- a/drivers/gpu/drm/xe/xe_gt_topology.h
+++ b/drivers/gpu/drm/xe/xe_gt_topology.h
@@ -47,4 +47,8 @@ xe_gt_topology_has_dss_in_quadrant(struct xe_gt *gt, int quad);
bool xe_gt_has_geometry_dss(struct xe_gt *gt, unsigned int dss);
bool xe_gt_has_compute_dss(struct xe_gt *gt, unsigned int dss);
+bool xe_gt_has_discontiguous_dss_groups(const struct xe_gt *gt);
+
+bool xe_gt_topology_report_l3(struct xe_gt *gt);
+
#endif /* _XE_GT_TOPOLOGY_H_ */
diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c
index 37d06c51180c..00789844ea4d 100644
--- a/drivers/gpu/drm/xe/xe_guc.c
+++ b/drivers/gpu/drm/xe/xe_guc.c
@@ -74,8 +74,7 @@ static u32 guc_ctl_debug_flags(struct xe_guc *guc)
if (!GUC_LOG_LEVEL_IS_VERBOSE(level))
flags |= GUC_LOG_DISABLED;
else
- flags |= GUC_LOG_LEVEL_TO_VERBOSITY(level) <<
- GUC_LOG_VERBOSITY_SHIFT;
+ flags |= FIELD_PREP(GUC_LOG_VERBOSITY, GUC_LOG_LEVEL_TO_VERBOSITY(level));
return flags;
}
@@ -122,22 +121,14 @@ static u32 guc_ctl_log_params_flags(struct xe_guc *guc)
BUILD_BUG_ON(!CAPTURE_BUFFER_SIZE);
BUILD_BUG_ON(!IS_ALIGNED(CAPTURE_BUFFER_SIZE, CAPTURE_UNIT));
- BUILD_BUG_ON((CRASH_BUFFER_SIZE / LOG_UNIT - 1) >
- (GUC_LOG_CRASH_MASK >> GUC_LOG_CRASH_SHIFT));
- BUILD_BUG_ON((DEBUG_BUFFER_SIZE / LOG_UNIT - 1) >
- (GUC_LOG_DEBUG_MASK >> GUC_LOG_DEBUG_SHIFT));
- BUILD_BUG_ON((CAPTURE_BUFFER_SIZE / CAPTURE_UNIT - 1) >
- (GUC_LOG_CAPTURE_MASK >> GUC_LOG_CAPTURE_SHIFT));
-
flags = GUC_LOG_VALID |
GUC_LOG_NOTIFY_ON_HALF_FULL |
CAPTURE_FLAG |
LOG_FLAG |
- ((CRASH_BUFFER_SIZE / LOG_UNIT - 1) << GUC_LOG_CRASH_SHIFT) |
- ((DEBUG_BUFFER_SIZE / LOG_UNIT - 1) << GUC_LOG_DEBUG_SHIFT) |
- ((CAPTURE_BUFFER_SIZE / CAPTURE_UNIT - 1) <<
- GUC_LOG_CAPTURE_SHIFT) |
- (offset << GUC_LOG_BUF_ADDR_SHIFT);
+ FIELD_PREP(GUC_LOG_CRASH, CRASH_BUFFER_SIZE / LOG_UNIT - 1) |
+ FIELD_PREP(GUC_LOG_DEBUG, DEBUG_BUFFER_SIZE / LOG_UNIT - 1) |
+ FIELD_PREP(GUC_LOG_CAPTURE, CAPTURE_BUFFER_SIZE / CAPTURE_UNIT - 1) |
+ FIELD_PREP(GUC_LOG_BUF_ADDR, offset);
#undef LOG_UNIT
#undef LOG_FLAG
@@ -150,7 +141,7 @@ static u32 guc_ctl_log_params_flags(struct xe_guc *guc)
static u32 guc_ctl_ads_flags(struct xe_guc *guc)
{
u32 ads = guc_bo_ggtt_addr(guc, guc->ads.bo) >> PAGE_SHIFT;
- u32 flags = ads << GUC_ADS_ADDR_SHIFT;
+ u32 flags = FIELD_PREP(GUC_ADS_ADDR, ads);
return flags;
}
@@ -709,10 +700,6 @@ static int xe_guc_realloc_post_hwconfig(struct xe_guc *guc)
if (ret)
return ret;
- ret = xe_managed_bo_reinit_in_vram(xe, tile, &guc->ct.bo);
- if (ret)
- return ret;
-
return 0;
}
@@ -847,6 +834,10 @@ int xe_guc_init_post_hwconfig(struct xe_guc *guc)
if (ret)
return ret;
+ ret = xe_guc_ct_init_post_hwconfig(&guc->ct);
+ if (ret)
+ return ret;
+
guc_init_params_post_hwconfig(guc);
ret = xe_guc_submit_init(guc, ~0);
@@ -888,9 +879,7 @@ int xe_guc_post_load_init(struct xe_guc *guc)
return ret;
}
- guc->submission_state.enabled = true;
-
- return 0;
+ return xe_guc_submit_enable(guc);
}
int xe_guc_reset(struct xe_guc *guc)
@@ -1066,7 +1055,7 @@ static s32 guc_pc_get_cur_freq(struct xe_guc_pc *guc_pc)
#endif
#define GUC_LOAD_TIME_WARN_MS 200
-static void guc_wait_ucode(struct xe_guc *guc)
+static int guc_wait_ucode(struct xe_guc *guc)
{
struct xe_gt *gt = guc_to_gt(guc);
struct xe_mmio *mmio = &gt->mmio;
@@ -1173,7 +1162,7 @@ static void guc_wait_ucode(struct xe_guc *guc)
break;
}
- xe_device_declare_wedged(gt_to_xe(gt));
+ return -EPROTO;
} else if (delta_ms > GUC_LOAD_TIME_WARN_MS) {
xe_gt_warn(gt, "excessive init time: %lldms! [status = 0x%08X, timeouts = %d]\n",
delta_ms, status, count);
@@ -1185,7 +1174,10 @@ static void guc_wait_ucode(struct xe_guc *guc)
delta_ms, xe_guc_pc_get_act_freq(guc_pc), guc_pc_get_cur_freq(guc_pc),
before_freq, status, count);
}
+
+ return 0;
}
+ALLOW_ERROR_INJECTION(guc_wait_ucode, ERRNO);
static int __xe_guc_upload(struct xe_guc *guc)
{
@@ -1217,14 +1209,16 @@ static int __xe_guc_upload(struct xe_guc *guc)
goto out;
/* Wait for authentication */
- guc_wait_ucode(guc);
+ ret = guc_wait_ucode(guc);
+ if (ret)
+ goto out;
xe_uc_fw_change_status(&guc->fw, XE_UC_FIRMWARE_RUNNING);
return 0;
out:
xe_uc_fw_change_status(&guc->fw, XE_UC_FIRMWARE_LOAD_FAIL);
- return 0 /* FIXME: ret, don't want to stop load currently */;
+ return ret;
}
static int vf_guc_min_load_for_hwconfig(struct xe_guc *guc)
@@ -1602,7 +1596,7 @@ void xe_guc_sanitize(struct xe_guc *guc)
{
xe_uc_fw_sanitize(&guc->fw);
xe_guc_ct_disable(&guc->ct);
- guc->submission_state.enabled = false;
+ xe_guc_submit_disable(guc);
}
int xe_guc_reset_prepare(struct xe_guc *guc)
@@ -1695,3 +1689,7 @@ void xe_guc_declare_wedged(struct xe_guc *guc)
xe_guc_ct_stop(&guc->ct);
xe_guc_submit_wedge(guc);
}
+
+#if IS_ENABLED(CONFIG_DRM_XE_KUNIT_TEST)
+#include "tests/xe_guc_g2g_test.c"
+#endif
diff --git a/drivers/gpu/drm/xe/xe_guc.h b/drivers/gpu/drm/xe/xe_guc.h
index 22cf019a11bf..1cca05967e62 100644
--- a/drivers/gpu/drm/xe/xe_guc.h
+++ b/drivers/gpu/drm/xe/xe_guc.h
@@ -53,6 +53,10 @@ void xe_guc_stop(struct xe_guc *guc);
int xe_guc_start(struct xe_guc *guc);
void xe_guc_declare_wedged(struct xe_guc *guc);
+#if IS_ENABLED(CONFIG_DRM_XE_KUNIT_TEST)
+int xe_guc_g2g_test_notification(struct xe_guc *guc, u32 *payload, u32 len);
+#endif
+
static inline u16 xe_engine_class_to_guc_class(enum xe_engine_class class)
{
switch (class) {
diff --git a/drivers/gpu/drm/xe/xe_guc_ads.c b/drivers/gpu/drm/xe/xe_guc_ads.c
index 5631722f34f5..58e0b0294a5b 100644
--- a/drivers/gpu/drm/xe/xe_guc_ads.c
+++ b/drivers/gpu/drm/xe/xe_guc_ads.c
@@ -339,7 +339,7 @@ static void guc_waklv_init(struct xe_guc_ads *ads)
if (XE_GT_WA(gt, 13011645652)) {
u32 data = 0xC40;
- guc_waklv_enable(ads, &data, sizeof(data) / sizeof(u32), &offset, &remain,
+ guc_waklv_enable(ads, &data, 1, &offset, &remain,
GUC_WA_KLV_NP_RD_WRITE_TO_CLEAR_RCSM_AT_CGP_LATE_RESTORE);
}
@@ -355,7 +355,7 @@ static void guc_waklv_init(struct xe_guc_ads *ads)
0x0,
0xF,
};
- guc_waklv_enable(ads, data, sizeof(data) / sizeof(u32), &offset, &remain,
+ guc_waklv_enable(ads, data, ARRAY_SIZE(data), &offset, &remain,
GUC_WA_KLV_RESTORE_UNSAVED_MEDIA_CONTROL_REG);
}
diff --git a/drivers/gpu/drm/xe/xe_guc_ct.c b/drivers/gpu/drm/xe/xe_guc_ct.c
index 848065a25c44..18f6327bf552 100644
--- a/drivers/gpu/drm/xe/xe_guc_ct.c
+++ b/drivers/gpu/drm/xe/xe_guc_ct.c
@@ -39,6 +39,8 @@ static void receive_g2h(struct xe_guc_ct *ct);
static void g2h_worker_func(struct work_struct *w);
static void safe_mode_worker_func(struct work_struct *w);
static void ct_exit_safe_mode(struct xe_guc_ct *ct);
+static void guc_ct_change_state(struct xe_guc_ct *ct,
+ enum xe_guc_ct_state state);
#if IS_ENABLED(CONFIG_DRM_XE_DEBUG)
enum {
@@ -252,6 +254,13 @@ int xe_guc_ct_init_noalloc(struct xe_guc_ct *ct)
}
ALLOW_ERROR_INJECTION(xe_guc_ct_init_noalloc, ERRNO); /* See xe_pci_probe() */
+static void guc_action_disable_ct(void *arg)
+{
+ struct xe_guc_ct *ct = arg;
+
+ guc_ct_change_state(ct, XE_GUC_CT_STATE_DISABLED);
+}
+
int xe_guc_ct_init(struct xe_guc_ct *ct)
{
struct xe_device *xe = ct_to_xe(ct);
@@ -268,10 +277,39 @@ int xe_guc_ct_init(struct xe_guc_ct *ct)
return PTR_ERR(bo);
ct->bo = bo;
- return 0;
+
+ return devm_add_action_or_reset(xe->drm.dev, guc_action_disable_ct, ct);
}
ALLOW_ERROR_INJECTION(xe_guc_ct_init, ERRNO); /* See xe_pci_probe() */
+/**
+ * xe_guc_ct_init_post_hwconfig - Reinitialize the GuC CTB in VRAM
+ * @ct: the &xe_guc_ct
+ *
+ * Allocate a new BO in VRAM and free the previous BO that was allocated
+ * in system memory (SMEM). Applicable only for DGFX products.
+ *
+ * Return: 0 on success, or a negative errno on failure.
+ */
+int xe_guc_ct_init_post_hwconfig(struct xe_guc_ct *ct)
+{
+ struct xe_device *xe = ct_to_xe(ct);
+ struct xe_gt *gt = ct_to_gt(ct);
+ struct xe_tile *tile = gt_to_tile(gt);
+ int ret;
+
+ xe_assert(xe, !xe_guc_ct_enabled(ct));
+
+ if (IS_DGFX(xe)) {
+ ret = xe_managed_bo_reinit_in_vram(xe, tile, &ct->bo);
+ if (ret)
+ return ret;
+ }
+
+ devm_remove_action(xe->drm.dev, guc_action_disable_ct, ct);
+ return devm_add_action_or_reset(xe->drm.dev, guc_action_disable_ct, ct);
+}
+
#define desc_read(xe_, guc_ctb__, field_) \
xe_map_rd_field(xe_, &guc_ctb__->desc, 0, \
struct guc_ct_buffer_desc, field_)
@@ -1040,11 +1078,15 @@ static bool retry_failure(struct xe_guc_ct *ct, int ret)
return true;
}
+#define GUC_SEND_RETRY_LIMIT 50
+#define GUC_SEND_RETRY_MSLEEP 5
+
static int guc_ct_send_recv(struct xe_guc_ct *ct, const u32 *action, u32 len,
u32 *response_buffer, bool no_fail)
{
struct xe_gt *gt = ct_to_gt(ct);
struct g2h_fence g2h_fence;
+ unsigned int retries = 0;
int ret = 0;
/*
@@ -1109,6 +1151,12 @@ retry_same_fence:
xe_gt_dbg(gt, "H2G action %#x retrying: reason %#x\n",
action[0], g2h_fence.reason);
mutex_unlock(&ct->lock);
+ if (++retries > GUC_SEND_RETRY_LIMIT) {
+ xe_gt_err(gt, "H2G action %#x reached retry limit=%u, aborting\n",
+ action[0], GUC_SEND_RETRY_LIMIT);
+ return -ELOOP;
+ }
+ msleep(GUC_SEND_RETRY_MSLEEP * retries);
goto retry;
}
if (g2h_fence.fail) {
@@ -1438,6 +1486,11 @@ static int process_g2h_msg(struct xe_guc_ct *ct, u32 *msg, u32 len)
case XE_GUC_ACTION_NOTIFY_EXCEPTION:
ret = guc_crash_process_msg(ct, action);
break;
+#if IS_ENABLED(CONFIG_DRM_XE_KUNIT_TEST)
+ case XE_GUC_ACTION_TEST_G2G_RECV:
+ ret = xe_guc_g2g_test_notification(guc, payload, adj_len);
+ break;
+#endif
default:
xe_gt_err(gt, "unexpected G2H action 0x%04x\n", action);
}
diff --git a/drivers/gpu/drm/xe/xe_guc_ct.h b/drivers/gpu/drm/xe/xe_guc_ct.h
index 18d4225e6502..cf41210ab30a 100644
--- a/drivers/gpu/drm/xe/xe_guc_ct.h
+++ b/drivers/gpu/drm/xe/xe_guc_ct.h
@@ -13,6 +13,7 @@ struct xe_device;
int xe_guc_ct_init_noalloc(struct xe_guc_ct *ct);
int xe_guc_ct_init(struct xe_guc_ct *ct);
+int xe_guc_ct_init_post_hwconfig(struct xe_guc_ct *ct);
int xe_guc_ct_enable(struct xe_guc_ct *ct);
void xe_guc_ct_disable(struct xe_guc_ct *ct);
void xe_guc_ct_stop(struct xe_guc_ct *ct);
diff --git a/drivers/gpu/drm/xe/xe_guc_engine_activity.c b/drivers/gpu/drm/xe/xe_guc_engine_activity.c
index 92e1f9f41b8c..2b99c1ebdd58 100644
--- a/drivers/gpu/drm/xe/xe_guc_engine_activity.c
+++ b/drivers/gpu/drm/xe/xe_guc_engine_activity.c
@@ -94,16 +94,17 @@ static int allocate_engine_activity_buffers(struct xe_guc *guc,
struct xe_tile *tile = gt_to_tile(gt);
struct xe_bo *bo, *metadata_bo;
- metadata_bo = xe_bo_create_pin_map(gt_to_xe(gt), tile, NULL, PAGE_ALIGN(metadata_size),
- ttm_bo_type_kernel, XE_BO_FLAG_SYSTEM |
- XE_BO_FLAG_GGTT | XE_BO_FLAG_GGTT_INVALIDATE);
+ metadata_bo = xe_bo_create_pin_map_novm(gt_to_xe(gt), tile, PAGE_ALIGN(metadata_size),
+ ttm_bo_type_kernel, XE_BO_FLAG_SYSTEM |
+ XE_BO_FLAG_GGTT | XE_BO_FLAG_GGTT_INVALIDATE,
+ false);
if (IS_ERR(metadata_bo))
return PTR_ERR(metadata_bo);
- bo = xe_bo_create_pin_map(gt_to_xe(gt), tile, NULL, PAGE_ALIGN(size),
- ttm_bo_type_kernel, XE_BO_FLAG_VRAM_IF_DGFX(tile) |
- XE_BO_FLAG_GGTT | XE_BO_FLAG_GGTT_INVALIDATE);
+ bo = xe_bo_create_pin_map_novm(gt_to_xe(gt), tile, PAGE_ALIGN(size),
+ ttm_bo_type_kernel, XE_BO_FLAG_VRAM_IF_DGFX(tile) |
+ XE_BO_FLAG_GGTT | XE_BO_FLAG_GGTT_INVALIDATE, false);
if (IS_ERR(bo)) {
xe_bo_unpin_map_no_vm(metadata_bo);
diff --git a/drivers/gpu/drm/xe/xe_guc_exec_queue_types.h b/drivers/gpu/drm/xe/xe_guc_exec_queue_types.h
index a3f421e2adc0..c30c0e3ccbbb 100644
--- a/drivers/gpu/drm/xe/xe_guc_exec_queue_types.h
+++ b/drivers/gpu/drm/xe/xe_guc_exec_queue_types.h
@@ -35,8 +35,8 @@ struct xe_guc_exec_queue {
struct xe_sched_msg static_msgs[MAX_STATIC_MSG_TYPE];
/** @lr_tdr: long running TDR worker */
struct work_struct lr_tdr;
- /** @fini_async: do final fini async from this worker */
- struct work_struct fini_async;
+ /** @destroy_async: do final destroy async from this worker */
+ struct work_struct destroy_async;
/** @resume_time: time of last resume */
u64 resume_time;
/** @state: GuC specific state for this xe_exec_queue */
diff --git a/drivers/gpu/drm/xe/xe_guc_fwif.h b/drivers/gpu/drm/xe/xe_guc_fwif.h
index 0508f1064178..50c4c2406132 100644
--- a/drivers/gpu/drm/xe/xe_guc_fwif.h
+++ b/drivers/gpu/drm/xe/xe_guc_fwif.h
@@ -15,6 +15,7 @@
#define G2H_LEN_DW_SCHED_CONTEXT_MODE_SET 4
#define G2H_LEN_DW_DEREGISTER_CONTEXT 3
#define G2H_LEN_DW_TLB_INVALIDATE 3
+#define G2H_LEN_DW_G2G_NOTIFY_MIN 3
#define GUC_ID_MAX 65535
#define GUC_ID_UNKNOWN 0xffffffff
@@ -65,6 +66,7 @@ struct guc_ctxt_registration_info {
u32 hwlrca_hi;
};
#define CONTEXT_REGISTRATION_FLAG_KMD BIT(0)
+#define CONTEXT_REGISTRATION_FLAG_TYPE GENMASK(2, 1)
/* 32-bit KLV structure as used by policy updates and others */
struct guc_klv_generic_dw_t {
@@ -89,13 +91,10 @@ struct guc_update_exec_queue_policy {
#define GUC_LOG_NOTIFY_ON_HALF_FULL BIT(1)
#define GUC_LOG_CAPTURE_ALLOC_UNITS BIT(2)
#define GUC_LOG_LOG_ALLOC_UNITS BIT(3)
-#define GUC_LOG_CRASH_SHIFT 4
-#define GUC_LOG_CRASH_MASK (0x3 << GUC_LOG_CRASH_SHIFT)
-#define GUC_LOG_DEBUG_SHIFT 6
-#define GUC_LOG_DEBUG_MASK (0xF << GUC_LOG_DEBUG_SHIFT)
-#define GUC_LOG_CAPTURE_SHIFT 10
-#define GUC_LOG_CAPTURE_MASK (0x3 << GUC_LOG_CAPTURE_SHIFT)
-#define GUC_LOG_BUF_ADDR_SHIFT 12
+#define GUC_LOG_CRASH REG_GENMASK(5, 4)
+#define GUC_LOG_DEBUG REG_GENMASK(9, 6)
+#define GUC_LOG_CAPTURE REG_GENMASK(11, 10)
+#define GUC_LOG_BUF_ADDR REG_GENMASK(31, 12)
#define GUC_CTL_WA 1
#define GUC_WA_GAM_CREDITS BIT(10)
@@ -117,21 +116,14 @@ struct guc_update_exec_queue_policy {
#define GUC_CTL_DISABLE_SCHEDULER BIT(14)
#define GUC_CTL_DEBUG 3
-#define GUC_LOG_VERBOSITY_SHIFT 0
-#define GUC_LOG_VERBOSITY_LOW (0 << GUC_LOG_VERBOSITY_SHIFT)
-#define GUC_LOG_VERBOSITY_MED (1 << GUC_LOG_VERBOSITY_SHIFT)
-#define GUC_LOG_VERBOSITY_HIGH (2 << GUC_LOG_VERBOSITY_SHIFT)
-#define GUC_LOG_VERBOSITY_ULTRA (3 << GUC_LOG_VERBOSITY_SHIFT)
-#define GUC_LOG_VERBOSITY_MIN 0
+#define GUC_LOG_VERBOSITY REG_GENMASK(1, 0)
#define GUC_LOG_VERBOSITY_MAX 3
-#define GUC_LOG_VERBOSITY_MASK 0x0000000f
-#define GUC_LOG_DESTINATION_MASK (3 << 4)
-#define GUC_LOG_DISABLED (1 << 6)
-#define GUC_PROFILE_ENABLED (1 << 7)
+#define GUC_LOG_DESTINATION REG_GENMASK(5, 4)
+#define GUC_LOG_DISABLED BIT(6)
+#define GUC_PROFILE_ENABLED BIT(7)
#define GUC_CTL_ADS 4
-#define GUC_ADS_ADDR_SHIFT 1
-#define GUC_ADS_ADDR_MASK (0xFFFFF << GUC_ADS_ADDR_SHIFT)
+#define GUC_ADS_ADDR REG_GENMASK(21, 1)
#define GUC_CTL_DEVID 5
diff --git a/drivers/gpu/drm/xe/xe_guc_log.h b/drivers/gpu/drm/xe/xe_guc_log.h
index f1e2b0be90a9..98a47ac42b08 100644
--- a/drivers/gpu/drm/xe/xe_guc_log.h
+++ b/drivers/gpu/drm/xe/xe_guc_log.h
@@ -17,7 +17,7 @@ struct xe_device;
#define DEBUG_BUFFER_SIZE SZ_8M
#define CAPTURE_BUFFER_SIZE SZ_2M
#else
-#define CRASH_BUFFER_SIZE SZ_8K
+#define CRASH_BUFFER_SIZE SZ_16K
#define DEBUG_BUFFER_SIZE SZ_64K
#define CAPTURE_BUFFER_SIZE SZ_1M
#endif
diff --git a/drivers/gpu/drm/xe/xe_guc_pc.c b/drivers/gpu/drm/xe/xe_guc_pc.c
index 88557e86d637..53fdf59524c4 100644
--- a/drivers/gpu/drm/xe/xe_guc_pc.c
+++ b/drivers/gpu/drm/xe/xe_guc_pc.c
@@ -79,6 +79,11 @@
* Xe driver enables SLPC with all of its defaults features and frequency
* selection, which varies per platform.
*
+ * Power profiles add another level of control to SLPC. When power saving
+ * profile is chosen, SLPC will use conservative thresholds to ramp frequency,
+ * thus saving power. Base profile is default and ensures balanced performance
+ * for any workload.
+ *
* Render-C States:
* ================
*
@@ -1171,6 +1176,61 @@ static int pc_action_set_strategy(struct xe_guc_pc *pc, u32 val)
return ret;
}
+static const char *power_profile_to_string(struct xe_guc_pc *pc)
+{
+ switch (pc->power_profile) {
+ case SLPC_POWER_PROFILE_BASE:
+ return "base";
+ case SLPC_POWER_PROFILE_POWER_SAVING:
+ return "power_saving";
+ default:
+ return "invalid";
+ }
+}
+
+void xe_guc_pc_get_power_profile(struct xe_guc_pc *pc, char *profile)
+{
+ switch (pc->power_profile) {
+ case SLPC_POWER_PROFILE_BASE:
+ sprintf(profile, "[%s] %s\n", "base", "power_saving");
+ break;
+ case SLPC_POWER_PROFILE_POWER_SAVING:
+ sprintf(profile, "%s [%s]\n", "base", "power_saving");
+ break;
+ default:
+ sprintf(profile, "invalid");
+ }
+}
+
+int xe_guc_pc_set_power_profile(struct xe_guc_pc *pc, const char *buf)
+{
+ int ret = 0;
+ u32 val;
+
+ if (strncmp("base", buf, strlen("base")) == 0)
+ val = SLPC_POWER_PROFILE_BASE;
+ else if (strncmp("power_saving", buf, strlen("power_saving")) == 0)
+ val = SLPC_POWER_PROFILE_POWER_SAVING;
+ else
+ return -EINVAL;
+
+ guard(mutex)(&pc->freq_lock);
+ xe_pm_runtime_get_noresume(pc_to_xe(pc));
+
+ ret = pc_action_set_param(pc,
+ SLPC_PARAM_POWER_PROFILE,
+ val);
+ if (ret)
+ xe_gt_err_once(pc_to_gt(pc), "Failed to set power profile to %d: %pe\n",
+ val, ERR_PTR(ret));
+ else
+ pc->power_profile = val;
+
+ xe_pm_runtime_put(pc_to_xe(pc));
+
+ return ret;
+}
+
/**
* xe_guc_pc_start - Start GuC's Power Conservation component
* @pc: Xe_GuC_PC instance
@@ -1249,6 +1309,11 @@ int xe_guc_pc_start(struct xe_guc_pc *pc)
/* Enable SLPC Optimized Strategy for compute */
ret = pc_action_set_strategy(pc, SLPC_OPTIMIZED_STRATEGY_COMPUTE);
+ /* Set cached value of power_profile */
+ ret = xe_guc_pc_set_power_profile(pc, power_profile_to_string(pc));
+ if (unlikely(ret))
+ xe_gt_err(gt, "Failed to set SLPC power profile: %pe\n", ERR_PTR(ret));
+
out:
xe_force_wake_put(gt_to_fw(gt), fw_ref);
return ret;
@@ -1327,6 +1392,8 @@ int xe_guc_pc_init(struct xe_guc_pc *pc)
pc->bo = bo;
+ pc->power_profile = SLPC_POWER_PROFILE_BASE;
+
return devm_add_action_or_reset(xe->drm.dev, xe_guc_pc_fini_hw, pc);
}
diff --git a/drivers/gpu/drm/xe/xe_guc_pc.h b/drivers/gpu/drm/xe/xe_guc_pc.h
index 52ecdd5ddbff..0e31396f103c 100644
--- a/drivers/gpu/drm/xe/xe_guc_pc.h
+++ b/drivers/gpu/drm/xe/xe_guc_pc.h
@@ -31,6 +31,8 @@ int xe_guc_pc_get_min_freq(struct xe_guc_pc *pc, u32 *freq);
int xe_guc_pc_set_min_freq(struct xe_guc_pc *pc, u32 freq);
int xe_guc_pc_get_max_freq(struct xe_guc_pc *pc, u32 *freq);
int xe_guc_pc_set_max_freq(struct xe_guc_pc *pc, u32 freq);
+int xe_guc_pc_set_power_profile(struct xe_guc_pc *pc, const char *buf);
+void xe_guc_pc_get_power_profile(struct xe_guc_pc *pc, char *profile);
enum xe_gt_idle_state xe_guc_pc_c_status(struct xe_guc_pc *pc);
u64 xe_guc_pc_rc6_residency(struct xe_guc_pc *pc);
diff --git a/drivers/gpu/drm/xe/xe_guc_pc_types.h b/drivers/gpu/drm/xe/xe_guc_pc_types.h
index c02053948a57..5e4ea53fbee6 100644
--- a/drivers/gpu/drm/xe/xe_guc_pc_types.h
+++ b/drivers/gpu/drm/xe/xe_guc_pc_types.h
@@ -37,6 +37,8 @@ struct xe_guc_pc {
struct mutex freq_lock;
/** @freq_ready: Only handle freq changes, if they are really ready */
bool freq_ready;
+ /** @power_profile: Base or power_saving profile */
+ u32 power_profile;
};
#endif /* _XE_GUC_PC_TYPES_H_ */
diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c
index 1185b23b1384..53024eb5670b 100644
--- a/drivers/gpu/drm/xe/xe_guc_submit.c
+++ b/drivers/gpu/drm/xe/xe_guc_submit.c
@@ -32,6 +32,7 @@
#include "xe_guc_ct.h"
#include "xe_guc_exec_queue_types.h"
#include "xe_guc_id_mgr.h"
+#include "xe_guc_klv_helpers.h"
#include "xe_guc_submit_types.h"
#include "xe_hw_engine.h"
#include "xe_hw_fence.h"
@@ -316,6 +317,71 @@ int xe_guc_submit_init(struct xe_guc *guc, unsigned int num_ids)
return drmm_add_action_or_reset(&xe->drm, guc_submit_fini, guc);
}
+/*
+ * Given that we want to guarantee enough RCS throughput to avoid missing
+ * frames, we set the yield policy to 20% of each 80ms interval.
+ */
+#define RC_YIELD_DURATION 80 /* in ms */
+#define RC_YIELD_RATIO 20 /* in percent */
+static u32 *emit_render_compute_yield_klv(u32 *emit)
+{
+ *emit++ = PREP_GUC_KLV_TAG(SCHEDULING_POLICIES_RENDER_COMPUTE_YIELD);
+ *emit++ = RC_YIELD_DURATION;
+ *emit++ = RC_YIELD_RATIO;
+
+ return emit;
+}
+
+#define SCHEDULING_POLICY_MAX_DWORDS 16
+static int guc_init_global_schedule_policy(struct xe_guc *guc)
+{
+ u32 data[SCHEDULING_POLICY_MAX_DWORDS];
+ u32 *emit = data;
+ u32 count = 0;
+ int ret;
+
+ if (GUC_SUBMIT_VER(guc) < MAKE_GUC_VER(1, 1, 0))
+ return 0;
+
+ *emit++ = XE_GUC_ACTION_UPDATE_SCHEDULING_POLICIES_KLV;
+
+ if (CCS_MASK(guc_to_gt(guc)))
+ emit = emit_render_compute_yield_klv(emit);
+
+ count = emit - data;
+ if (count > 1) {
+ xe_assert(guc_to_xe(guc), count <= SCHEDULING_POLICY_MAX_DWORDS);
+
+ ret = xe_guc_ct_send_block(&guc->ct, data, count);
+ if (ret < 0) {
+ xe_gt_err(guc_to_gt(guc),
+ "failed to enable GuC scheduling policies: %pe\n",
+ ERR_PTR(ret));
+ return ret;
+ }
+ }
+
+ return 0;
+}
+
+int xe_guc_submit_enable(struct xe_guc *guc)
+{
+ int ret;
+
+ ret = guc_init_global_schedule_policy(guc);
+ if (ret)
+ return ret;
+
+ guc->submission_state.enabled = true;
+
+ return 0;
+}
+
+void xe_guc_submit_disable(struct xe_guc *guc)
+{
+ guc->submission_state.enabled = false;
+}
+
static void __release_guc_id(struct xe_guc *guc, struct xe_exec_queue *q, u32 xa_count)
{
int i;
@@ -558,10 +624,8 @@ static void register_exec_queue(struct xe_exec_queue *q, int ctx_type)
info.engine_submit_mask = q->logical_mask;
info.hwlrca_lo = lower_32_bits(xe_lrc_descriptor(lrc));
info.hwlrca_hi = upper_32_bits(xe_lrc_descriptor(lrc));
- info.flags = CONTEXT_REGISTRATION_FLAG_KMD;
-
- if (ctx_type != GUC_CONTEXT_NORMAL)
- info.flags |= BIT(ctx_type);
+ info.flags = CONTEXT_REGISTRATION_FLAG_KMD |
+ FIELD_PREP(CONTEXT_REGISTRATION_FLAG_TYPE, ctx_type);
if (xe_exec_queue_is_parallel(q)) {
u64 ggtt_addr = xe_lrc_parallel_ggtt_addr(lrc);
@@ -1355,48 +1419,57 @@ rearm:
return DRM_GPU_SCHED_STAT_NO_HANG;
}
-static void __guc_exec_queue_fini_async(struct work_struct *w)
+static void guc_exec_queue_fini(struct xe_exec_queue *q)
+{
+ struct xe_guc_exec_queue *ge = q->guc;
+ struct xe_guc *guc = exec_queue_to_guc(q);
+
+ release_guc_id(guc, q);
+ xe_sched_entity_fini(&ge->entity);
+ xe_sched_fini(&ge->sched);
+
+ /*
+ * RCU free due sched being exported via DRM scheduler fences
+ * (timeline name).
+ */
+ kfree_rcu(ge, rcu);
+}
+
+static void __guc_exec_queue_destroy_async(struct work_struct *w)
{
struct xe_guc_exec_queue *ge =
- container_of(w, struct xe_guc_exec_queue, fini_async);
+ container_of(w, struct xe_guc_exec_queue, destroy_async);
struct xe_exec_queue *q = ge->q;
struct xe_guc *guc = exec_queue_to_guc(q);
xe_pm_runtime_get(guc_to_xe(guc));
trace_xe_exec_queue_destroy(q);
- release_guc_id(guc, q);
if (xe_exec_queue_is_lr(q))
cancel_work_sync(&ge->lr_tdr);
/* Confirm no work left behind accessing device structures */
cancel_delayed_work_sync(&ge->sched.base.work_tdr);
- xe_sched_entity_fini(&ge->entity);
- xe_sched_fini(&ge->sched);
- /*
- * RCU free due sched being exported via DRM scheduler fences
- * (timeline name).
- */
- kfree_rcu(ge, rcu);
xe_exec_queue_fini(q);
+
xe_pm_runtime_put(guc_to_xe(guc));
}
-static void guc_exec_queue_fini_async(struct xe_exec_queue *q)
+static void guc_exec_queue_destroy_async(struct xe_exec_queue *q)
{
struct xe_guc *guc = exec_queue_to_guc(q);
struct xe_device *xe = guc_to_xe(guc);
- INIT_WORK(&q->guc->fini_async, __guc_exec_queue_fini_async);
+ INIT_WORK(&q->guc->destroy_async, __guc_exec_queue_destroy_async);
/* We must block on kernel engines so slabs are empty on driver unload */
if (q->flags & EXEC_QUEUE_FLAG_PERMANENT || exec_queue_wedged(q))
- __guc_exec_queue_fini_async(&q->guc->fini_async);
+ __guc_exec_queue_destroy_async(&q->guc->destroy_async);
else
- queue_work(xe->destroy_wq, &q->guc->fini_async);
+ queue_work(xe->destroy_wq, &q->guc->destroy_async);
}
-static void __guc_exec_queue_fini(struct xe_guc *guc, struct xe_exec_queue *q)
+static void __guc_exec_queue_destroy(struct xe_guc *guc, struct xe_exec_queue *q)
{
/*
* Might be done from within the GPU scheduler, need to do async as we
@@ -1405,7 +1478,7 @@ static void __guc_exec_queue_fini(struct xe_guc *guc, struct xe_exec_queue *q)
* this we and don't really care when everything is fini'd, just that it
* is.
*/
- guc_exec_queue_fini_async(q);
+ guc_exec_queue_destroy_async(q);
}
static void __guc_exec_queue_process_msg_cleanup(struct xe_sched_msg *msg)
@@ -1419,7 +1492,7 @@ static void __guc_exec_queue_process_msg_cleanup(struct xe_sched_msg *msg)
if (exec_queue_registered(q))
disable_scheduling_deregister(guc, q);
else
- __guc_exec_queue_fini(guc, q);
+ __guc_exec_queue_destroy(guc, q);
}
static bool guc_exec_queue_allowed_to_change_state(struct xe_exec_queue *q)
@@ -1652,14 +1725,14 @@ static bool guc_exec_queue_try_add_msg(struct xe_exec_queue *q,
#define STATIC_MSG_CLEANUP 0
#define STATIC_MSG_SUSPEND 1
#define STATIC_MSG_RESUME 2
-static void guc_exec_queue_fini(struct xe_exec_queue *q)
+static void guc_exec_queue_destroy(struct xe_exec_queue *q)
{
struct xe_sched_msg *msg = q->guc->static_msgs + STATIC_MSG_CLEANUP;
if (!(q->flags & EXEC_QUEUE_FLAG_PERMANENT) && !exec_queue_wedged(q))
guc_exec_queue_add_msg(q, msg, CLEANUP);
else
- __guc_exec_queue_fini(exec_queue_to_guc(q), q);
+ __guc_exec_queue_destroy(exec_queue_to_guc(q), q);
}
static int guc_exec_queue_set_priority(struct xe_exec_queue *q,
@@ -1789,6 +1862,7 @@ static const struct xe_exec_queue_ops guc_exec_queue_ops = {
.init = guc_exec_queue_init,
.kill = guc_exec_queue_kill,
.fini = guc_exec_queue_fini,
+ .destroy = guc_exec_queue_destroy,
.set_priority = guc_exec_queue_set_priority,
.set_timeslice = guc_exec_queue_set_timeslice,
.set_preempt_timeout = guc_exec_queue_set_preempt_timeout,
@@ -1810,7 +1884,7 @@ static void guc_exec_queue_stop(struct xe_guc *guc, struct xe_exec_queue *q)
if (exec_queue_extra_ref(q) || xe_exec_queue_is_lr(q))
xe_exec_queue_put(q);
else if (exec_queue_destroyed(q))
- __guc_exec_queue_fini(guc, q);
+ __guc_exec_queue_destroy(guc, q);
}
if (q->guc->suspend_pending) {
set_exec_queue_suspended(q);
@@ -2029,7 +2103,7 @@ g2h_exec_queue_lookup(struct xe_guc *guc, u32 guc_id)
q = xa_load(&guc->submission_state.exec_queue_lookup, guc_id);
if (unlikely(!q)) {
- xe_gt_err(gt, "Not engine present for guc_id %u\n", guc_id);
+ xe_gt_err(gt, "No exec queue found for guc_id %u\n", guc_id);
return NULL;
}
@@ -2139,7 +2213,7 @@ static void handle_deregister_done(struct xe_guc *guc, struct xe_exec_queue *q)
if (exec_queue_extra_ref(q) || xe_exec_queue_is_lr(q))
xe_exec_queue_put(q);
else
- __guc_exec_queue_fini(guc, q);
+ __guc_exec_queue_destroy(guc, q);
}
int xe_guc_deregister_done_handler(struct xe_guc *guc, u32 *msg, u32 len)
@@ -2528,7 +2602,7 @@ static void guc_exec_queue_print(struct xe_exec_queue *q, struct drm_printer *p)
}
/**
- * xe_guc_register_exec_queue - Register exec queue for a given context type.
+ * xe_guc_register_vf_exec_queue - Register exec queue for a given context type.
* @q: Execution queue
* @ctx_type: Type of the context
*
@@ -2539,15 +2613,17 @@ static void guc_exec_queue_print(struct xe_exec_queue *q, struct drm_printer *p)
*
* Returns - None.
*/
-void xe_guc_register_exec_queue(struct xe_exec_queue *q, int ctx_type)
+void xe_guc_register_vf_exec_queue(struct xe_exec_queue *q, int ctx_type)
{
struct xe_guc *guc = exec_queue_to_guc(q);
struct xe_device *xe = guc_to_xe(guc);
+ struct xe_gt *gt = guc_to_gt(guc);
- xe_assert(xe, IS_SRIOV_VF(xe));
- xe_assert(xe, !IS_DGFX(xe));
- xe_assert(xe, (ctx_type > GUC_CONTEXT_NORMAL &&
- ctx_type < GUC_CONTEXT_COUNT));
+ xe_gt_assert(gt, IS_SRIOV_VF(xe));
+ xe_gt_assert(gt, !IS_DGFX(xe));
+ xe_gt_assert(gt, ctx_type == GUC_CONTEXT_COMPRESSION_SAVE ||
+ ctx_type == GUC_CONTEXT_COMPRESSION_RESTORE);
+ xe_gt_assert(gt, GUC_SUBMIT_VER(guc) >= MAKE_GUC_VER(1, 23, 0));
register_exec_queue(q, ctx_type);
enable_scheduling(q);
diff --git a/drivers/gpu/drm/xe/xe_guc_submit.h b/drivers/gpu/drm/xe/xe_guc_submit.h
index 6b5df5d0956b..78c3f07e31a0 100644
--- a/drivers/gpu/drm/xe/xe_guc_submit.h
+++ b/drivers/gpu/drm/xe/xe_guc_submit.h
@@ -13,6 +13,8 @@ struct xe_exec_queue;
struct xe_guc;
int xe_guc_submit_init(struct xe_guc *guc, unsigned int num_ids);
+int xe_guc_submit_enable(struct xe_guc *guc);
+void xe_guc_submit_disable(struct xe_guc *guc);
int xe_guc_submit_reset_prepare(struct xe_guc *guc);
void xe_guc_submit_reset_wait(struct xe_guc *guc);
@@ -46,7 +48,7 @@ xe_guc_exec_queue_snapshot_print(struct xe_guc_submit_exec_queue_snapshot *snaps
void
xe_guc_exec_queue_snapshot_free(struct xe_guc_submit_exec_queue_snapshot *snapshot);
void xe_guc_submit_print(struct xe_guc *guc, struct drm_printer *p);
-void xe_guc_register_exec_queue(struct xe_exec_queue *q, int ctx_type);
+void xe_guc_register_vf_exec_queue(struct xe_exec_queue *q, int ctx_type);
int xe_guc_contexts_hwsp_rebase(struct xe_guc *guc, void *scratch);
diff --git a/drivers/gpu/drm/xe/xe_hmm.c b/drivers/gpu/drm/xe/xe_hmm.c
deleted file mode 100644
index 57b71956ddf4..000000000000
--- a/drivers/gpu/drm/xe/xe_hmm.c
+++ /dev/null
@@ -1,325 +0,0 @@
-// SPDX-License-Identifier: MIT
-/*
- * Copyright © 2024 Intel Corporation
- */
-
-#include <linux/scatterlist.h>
-#include <linux/mmu_notifier.h>
-#include <linux/dma-mapping.h>
-#include <linux/memremap.h>
-#include <linux/swap.h>
-#include <linux/hmm.h>
-#include <linux/mm.h>
-#include "xe_hmm.h"
-#include "xe_vm.h"
-#include "xe_bo.h"
-
-static u64 xe_npages_in_range(unsigned long start, unsigned long end)
-{
- return (end - start) >> PAGE_SHIFT;
-}
-
-static int xe_alloc_sg(struct xe_device *xe, struct sg_table *st,
- struct hmm_range *range, struct rw_semaphore *notifier_sem)
-{
- unsigned long i, npages, hmm_pfn;
- unsigned long num_chunks = 0;
- int ret;
-
- /* HMM docs says this is needed. */
- ret = down_read_interruptible(notifier_sem);
- if (ret)
- return ret;
-
- if (mmu_interval_read_retry(range->notifier, range->notifier_seq)) {
- up_read(notifier_sem);
- return -EAGAIN;
- }
-
- npages = xe_npages_in_range(range->start, range->end);
- for (i = 0; i < npages;) {
- unsigned long len;
-
- hmm_pfn = range->hmm_pfns[i];
- xe_assert(xe, hmm_pfn & HMM_PFN_VALID);
-
- len = 1UL << hmm_pfn_to_map_order(hmm_pfn);
-
- /* If order > 0 the page may extend beyond range->start */
- len -= (hmm_pfn & ~HMM_PFN_FLAGS) & (len - 1);
- i += len;
- num_chunks++;
- }
- up_read(notifier_sem);
-
- return sg_alloc_table(st, num_chunks, GFP_KERNEL);
-}
-
-/**
- * xe_build_sg() - build a scatter gather table for all the physical pages/pfn
- * in a hmm_range. dma-map pages if necessary. dma-address is save in sg table
- * and will be used to program GPU page table later.
- * @xe: the xe device who will access the dma-address in sg table
- * @range: the hmm range that we build the sg table from. range->hmm_pfns[]
- * has the pfn numbers of pages that back up this hmm address range.
- * @st: pointer to the sg table.
- * @notifier_sem: The xe notifier lock.
- * @write: whether we write to this range. This decides dma map direction
- * for system pages. If write we map it bi-diretional; otherwise
- * DMA_TO_DEVICE
- *
- * All the contiguous pfns will be collapsed into one entry in
- * the scatter gather table. This is for the purpose of efficiently
- * programming GPU page table.
- *
- * The dma_address in the sg table will later be used by GPU to
- * access memory. So if the memory is system memory, we need to
- * do a dma-mapping so it can be accessed by GPU/DMA.
- *
- * FIXME: This function currently only support pages in system
- * memory. If the memory is GPU local memory (of the GPU who
- * is going to access memory), we need gpu dpa (device physical
- * address), and there is no need of dma-mapping. This is TBD.
- *
- * FIXME: dma-mapping for peer gpu device to access remote gpu's
- * memory. Add this when you support p2p
- *
- * This function allocates the storage of the sg table. It is
- * caller's responsibility to free it calling sg_free_table.
- *
- * Returns 0 if successful; -ENOMEM if fails to allocate memory
- */
-static int xe_build_sg(struct xe_device *xe, struct hmm_range *range,
- struct sg_table *st,
- struct rw_semaphore *notifier_sem,
- bool write)
-{
- unsigned long npages = xe_npages_in_range(range->start, range->end);
- struct device *dev = xe->drm.dev;
- struct scatterlist *sgl;
- struct page *page;
- unsigned long i, j;
-
- lockdep_assert_held(notifier_sem);
-
- i = 0;
- for_each_sg(st->sgl, sgl, st->nents, j) {
- unsigned long hmm_pfn, size;
-
- hmm_pfn = range->hmm_pfns[i];
- page = hmm_pfn_to_page(hmm_pfn);
- xe_assert(xe, !is_device_private_page(page));
-
- size = 1UL << hmm_pfn_to_map_order(hmm_pfn);
- size -= page_to_pfn(page) & (size - 1);
- i += size;
-
- if (unlikely(j == st->nents - 1)) {
- xe_assert(xe, i >= npages);
- if (i > npages)
- size -= (i - npages);
-
- sg_mark_end(sgl);
- } else {
- xe_assert(xe, i < npages);
- }
-
- sg_set_page(sgl, page, size << PAGE_SHIFT, 0);
- }
-
- return dma_map_sgtable(dev, st, write ? DMA_BIDIRECTIONAL : DMA_TO_DEVICE,
- DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_NO_KERNEL_MAPPING);
-}
-
-static void xe_hmm_userptr_set_mapped(struct xe_userptr_vma *uvma)
-{
- struct xe_userptr *userptr = &uvma->userptr;
- struct xe_vm *vm = xe_vma_vm(&uvma->vma);
-
- lockdep_assert_held_write(&vm->lock);
- lockdep_assert_held(&vm->userptr.notifier_lock);
-
- mutex_lock(&userptr->unmap_mutex);
- xe_assert(vm->xe, !userptr->mapped);
- userptr->mapped = true;
- mutex_unlock(&userptr->unmap_mutex);
-}
-
-void xe_hmm_userptr_unmap(struct xe_userptr_vma *uvma)
-{
- struct xe_userptr *userptr = &uvma->userptr;
- struct xe_vma *vma = &uvma->vma;
- bool write = !xe_vma_read_only(vma);
- struct xe_vm *vm = xe_vma_vm(vma);
- struct xe_device *xe = vm->xe;
-
- if (!lockdep_is_held_type(&vm->userptr.notifier_lock, 0) &&
- !lockdep_is_held_type(&vm->lock, 0) &&
- !(vma->gpuva.flags & XE_VMA_DESTROYED)) {
- /* Don't unmap in exec critical section. */
- xe_vm_assert_held(vm);
- /* Don't unmap while mapping the sg. */
- lockdep_assert_held(&vm->lock);
- }
-
- mutex_lock(&userptr->unmap_mutex);
- if (userptr->sg && userptr->mapped)
- dma_unmap_sgtable(xe->drm.dev, userptr->sg,
- write ? DMA_BIDIRECTIONAL : DMA_TO_DEVICE, 0);
- userptr->mapped = false;
- mutex_unlock(&userptr->unmap_mutex);
-}
-
-/**
- * xe_hmm_userptr_free_sg() - Free the scatter gather table of userptr
- * @uvma: the userptr vma which hold the scatter gather table
- *
- * With function xe_userptr_populate_range, we allocate storage of
- * the userptr sg table. This is a helper function to free this
- * sg table, and dma unmap the address in the table.
- */
-void xe_hmm_userptr_free_sg(struct xe_userptr_vma *uvma)
-{
- struct xe_userptr *userptr = &uvma->userptr;
-
- xe_assert(xe_vma_vm(&uvma->vma)->xe, userptr->sg);
- xe_hmm_userptr_unmap(uvma);
- sg_free_table(userptr->sg);
- userptr->sg = NULL;
-}
-
-/**
- * xe_hmm_userptr_populate_range() - Populate physical pages of a virtual
- * address range
- *
- * @uvma: userptr vma which has information of the range to populate.
- * @is_mm_mmap_locked: True if mmap_read_lock is already acquired by caller.
- *
- * This function populate the physical pages of a virtual
- * address range. The populated physical pages is saved in
- * userptr's sg table. It is similar to get_user_pages but call
- * hmm_range_fault.
- *
- * This function also read mmu notifier sequence # (
- * mmu_interval_read_begin), for the purpose of later
- * comparison (through mmu_interval_read_retry).
- *
- * This must be called with mmap read or write lock held.
- *
- * This function allocates the storage of the userptr sg table.
- * It is caller's responsibility to free it calling sg_free_table.
- *
- * returns: 0 for success; negative error no on failure
- */
-int xe_hmm_userptr_populate_range(struct xe_userptr_vma *uvma,
- bool is_mm_mmap_locked)
-{
- unsigned long timeout =
- jiffies + msecs_to_jiffies(HMM_RANGE_DEFAULT_TIMEOUT);
- unsigned long *pfns;
- struct xe_userptr *userptr;
- struct xe_vma *vma = &uvma->vma;
- u64 userptr_start = xe_vma_userptr(vma);
- u64 userptr_end = userptr_start + xe_vma_size(vma);
- struct xe_vm *vm = xe_vma_vm(vma);
- struct hmm_range hmm_range = {
- .pfn_flags_mask = 0, /* ignore pfns */
- .default_flags = HMM_PFN_REQ_FAULT,
- .start = userptr_start,
- .end = userptr_end,
- .notifier = &uvma->userptr.notifier,
- .dev_private_owner = vm->xe,
- };
- bool write = !xe_vma_read_only(vma);
- unsigned long notifier_seq;
- u64 npages;
- int ret;
-
- userptr = &uvma->userptr;
-
- if (is_mm_mmap_locked)
- mmap_assert_locked(userptr->notifier.mm);
-
- if (vma->gpuva.flags & XE_VMA_DESTROYED)
- return 0;
-
- notifier_seq = mmu_interval_read_begin(&userptr->notifier);
- if (notifier_seq == userptr->notifier_seq)
- return 0;
-
- if (userptr->sg)
- xe_hmm_userptr_free_sg(uvma);
-
- npages = xe_npages_in_range(userptr_start, userptr_end);
- pfns = kvmalloc_array(npages, sizeof(*pfns), GFP_KERNEL);
- if (unlikely(!pfns))
- return -ENOMEM;
-
- if (write)
- hmm_range.default_flags |= HMM_PFN_REQ_WRITE;
-
- if (!mmget_not_zero(userptr->notifier.mm)) {
- ret = -EFAULT;
- goto free_pfns;
- }
-
- hmm_range.hmm_pfns = pfns;
-
- while (true) {
- hmm_range.notifier_seq = mmu_interval_read_begin(&userptr->notifier);
-
- if (!is_mm_mmap_locked)
- mmap_read_lock(userptr->notifier.mm);
-
- ret = hmm_range_fault(&hmm_range);
-
- if (!is_mm_mmap_locked)
- mmap_read_unlock(userptr->notifier.mm);
-
- if (ret == -EBUSY) {
- if (time_after(jiffies, timeout))
- break;
-
- continue;
- }
- break;
- }
-
- mmput(userptr->notifier.mm);
-
- if (ret)
- goto free_pfns;
-
- ret = xe_alloc_sg(vm->xe, &userptr->sgt, &hmm_range, &vm->userptr.notifier_lock);
- if (ret)
- goto free_pfns;
-
- ret = down_read_interruptible(&vm->userptr.notifier_lock);
- if (ret)
- goto free_st;
-
- if (mmu_interval_read_retry(hmm_range.notifier, hmm_range.notifier_seq)) {
- ret = -EAGAIN;
- goto out_unlock;
- }
-
- ret = xe_build_sg(vm->xe, &hmm_range, &userptr->sgt,
- &vm->userptr.notifier_lock, write);
- if (ret)
- goto out_unlock;
-
- userptr->sg = &userptr->sgt;
- xe_hmm_userptr_set_mapped(uvma);
- userptr->notifier_seq = hmm_range.notifier_seq;
- up_read(&vm->userptr.notifier_lock);
- kvfree(pfns);
- return 0;
-
-out_unlock:
- up_read(&vm->userptr.notifier_lock);
-free_st:
- sg_free_table(&userptr->sgt);
-free_pfns:
- kvfree(pfns);
- return ret;
-}
diff --git a/drivers/gpu/drm/xe/xe_hmm.h b/drivers/gpu/drm/xe/xe_hmm.h
deleted file mode 100644
index 0ea98d8e7bbc..000000000000
--- a/drivers/gpu/drm/xe/xe_hmm.h
+++ /dev/null
@@ -1,18 +0,0 @@
-/* SPDX-License-Identifier: MIT
- *
- * Copyright © 2024 Intel Corporation
- */
-
-#ifndef _XE_HMM_H_
-#define _XE_HMM_H_
-
-#include <linux/types.h>
-
-struct xe_userptr_vma;
-
-int xe_hmm_userptr_populate_range(struct xe_userptr_vma *uvma, bool is_mm_mmap_locked);
-
-void xe_hmm_userptr_free_sg(struct xe_userptr_vma *uvma);
-
-void xe_hmm_userptr_unmap(struct xe_userptr_vma *uvma);
-#endif
diff --git a/drivers/gpu/drm/xe/xe_hw_engine_group.c b/drivers/gpu/drm/xe/xe_hw_engine_group.c
index 58bee3ffe881..fa4db5f23342 100644
--- a/drivers/gpu/drm/xe/xe_hw_engine_group.c
+++ b/drivers/gpu/drm/xe/xe_hw_engine_group.c
@@ -213,17 +213,13 @@ static int xe_hw_engine_group_suspend_faulting_lr_jobs(struct xe_hw_engine_group
err = q->ops->suspend_wait(q);
if (err)
- goto err_suspend;
+ return err;
}
if (need_resume)
xe_hw_engine_group_resume_faulting_lr_jobs(group);
return 0;
-
-err_suspend:
- up_write(&group->mode_sem);
- return err;
}
/**
diff --git a/drivers/gpu/drm/xe/xe_hwmon.c b/drivers/gpu/drm/xe/xe_hwmon.c
index 32a76ae6e9dc..b6790589e623 100644
--- a/drivers/gpu/drm/xe/xe_hwmon.c
+++ b/drivers/gpu/drm/xe/xe_hwmon.c
@@ -286,7 +286,7 @@ static struct xe_reg xe_hwmon_get_reg(struct xe_hwmon *hwmon, enum xe_hwmon_reg
*/
static void xe_hwmon_power_max_read(struct xe_hwmon *hwmon, u32 attr, int channel, long *value)
{
- u64 reg_val = 0, min, max;
+ u32 reg_val = 0;
struct xe_device *xe = hwmon->xe;
struct xe_reg rapl_limit, pkg_power_sku;
struct xe_mmio *mmio = xe_root_tile_mmio(xe);
@@ -294,7 +294,7 @@ static void xe_hwmon_power_max_read(struct xe_hwmon *hwmon, u32 attr, int channe
mutex_lock(&hwmon->hwmon_lock);
if (hwmon->xe->info.has_mbx_power_limits) {
- xe_hwmon_pcode_read_power_limit(hwmon, attr, channel, (u32 *)&reg_val);
+ xe_hwmon_pcode_read_power_limit(hwmon, attr, channel, &reg_val);
} else {
rapl_limit = xe_hwmon_get_reg(hwmon, REG_PKG_RAPL_LIMIT, channel);
pkg_power_sku = xe_hwmon_get_reg(hwmon, REG_PKG_POWER_SKU, channel);
@@ -304,19 +304,21 @@ static void xe_hwmon_power_max_read(struct xe_hwmon *hwmon, u32 attr, int channe
/* Check if PL limits are disabled. */
if (!(reg_val & PWR_LIM_EN)) {
*value = PL_DISABLE;
- drm_info(&hwmon->xe->drm, "%s disabled for channel %d, val 0x%016llx\n",
+ drm_info(&hwmon->xe->drm, "%s disabled for channel %d, val 0x%08x\n",
PWR_ATTR_TO_STR(attr), channel, reg_val);
goto unlock;
}
reg_val = REG_FIELD_GET(PWR_LIM_VAL, reg_val);
- *value = mul_u64_u32_shr(reg_val, SF_POWER, hwmon->scl_shift_power);
+ *value = mul_u32_u32(reg_val, SF_POWER) >> hwmon->scl_shift_power;
/* For platforms with mailbox power limit support clamping would be done by pcode. */
if (!hwmon->xe->info.has_mbx_power_limits) {
- reg_val = xe_mmio_read64_2x32(mmio, pkg_power_sku);
- min = REG_FIELD_GET(PKG_MIN_PWR, reg_val);
- max = REG_FIELD_GET(PKG_MAX_PWR, reg_val);
+ u64 pkg_pwr, min, max;
+
+ pkg_pwr = xe_mmio_read64_2x32(mmio, pkg_power_sku);
+ min = REG_FIELD_GET(PKG_MIN_PWR, pkg_pwr);
+ max = REG_FIELD_GET(PKG_MAX_PWR, pkg_pwr);
min = mul_u64_u32_shr(min, SF_POWER, hwmon->scl_shift_power);
max = mul_u64_u32_shr(max, SF_POWER, hwmon->scl_shift_power);
if (min && max)
@@ -493,8 +495,8 @@ xe_hwmon_power_max_interval_show(struct device *dev, struct device_attribute *at
{
struct xe_hwmon *hwmon = dev_get_drvdata(dev);
struct xe_mmio *mmio = xe_root_tile_mmio(hwmon->xe);
- u32 x, y, x_w = 2; /* 2 bits */
- u64 r, tau4, out;
+ u32 reg_val, x, y, x_w = 2; /* 2 bits */
+ u64 tau4, out;
int channel = (to_sensor_dev_attr(attr)->index % 2) ? CHANNEL_PKG : CHANNEL_CARD;
u32 power_attr = (to_sensor_dev_attr(attr)->index > 1) ? PL2_HWMON_ATTR : PL1_HWMON_ATTR;
@@ -505,23 +507,24 @@ xe_hwmon_power_max_interval_show(struct device *dev, struct device_attribute *at
mutex_lock(&hwmon->hwmon_lock);
if (hwmon->xe->info.has_mbx_power_limits) {
- ret = xe_hwmon_pcode_read_power_limit(hwmon, power_attr, channel, (u32 *)&r);
+ ret = xe_hwmon_pcode_read_power_limit(hwmon, power_attr, channel, &reg_val);
if (ret) {
drm_err(&hwmon->xe->drm,
- "power interval read fail, ch %d, attr %d, r 0%llx, ret %d\n",
- channel, power_attr, r, ret);
- r = 0;
+ "power interval read fail, ch %d, attr %d, val 0x%08x, ret %d\n",
+ channel, power_attr, reg_val, ret);
+ reg_val = 0;
}
} else {
- r = xe_mmio_read32(mmio, xe_hwmon_get_reg(hwmon, REG_PKG_RAPL_LIMIT, channel));
+ reg_val = xe_mmio_read32(mmio, xe_hwmon_get_reg(hwmon, REG_PKG_RAPL_LIMIT,
+ channel));
}
mutex_unlock(&hwmon->hwmon_lock);
xe_pm_runtime_put(hwmon->xe);
- x = REG_FIELD_GET(PWR_LIM_TIME_X, r);
- y = REG_FIELD_GET(PWR_LIM_TIME_Y, r);
+ x = REG_FIELD_GET(PWR_LIM_TIME_X, reg_val);
+ y = REG_FIELD_GET(PWR_LIM_TIME_Y, reg_val);
/*
* tau = (1 + (x / 4)) * power(2,y), x = bits(23:22), y = bits(21:17)
@@ -1294,13 +1297,6 @@ xe_hwmon_get_preregistration_info(struct xe_hwmon *hwmon)
xe_hwmon_fan_input_read(hwmon, channel, &fan_speed);
}
-static void xe_hwmon_mutex_destroy(void *arg)
-{
- struct xe_hwmon *hwmon = arg;
-
- mutex_destroy(&hwmon->hwmon_lock);
-}
-
int xe_hwmon_register(struct xe_device *xe)
{
struct device *dev = xe->drm.dev;
@@ -1319,8 +1315,7 @@ int xe_hwmon_register(struct xe_device *xe)
if (!hwmon)
return -ENOMEM;
- mutex_init(&hwmon->hwmon_lock);
- ret = devm_add_action_or_reset(dev, xe_hwmon_mutex_destroy, hwmon);
+ ret = devm_mutex_init(dev, &hwmon->hwmon_lock);
if (ret)
return ret;
diff --git a/drivers/gpu/drm/xe/xe_i2c.c b/drivers/gpu/drm/xe/xe_i2c.c
index 044dda517b7c..48dfcb41fa08 100644
--- a/drivers/gpu/drm/xe/xe_i2c.c
+++ b/drivers/gpu/drm/xe/xe_i2c.c
@@ -259,7 +259,7 @@ void xe_i2c_pm_resume(struct xe_device *xe, bool d3cold)
return;
if (d3cold)
- xe_mmio_rmw32(mmio, I2C_CONFIG_CMD, 0, PCI_COMMAND_MEMORY);
+ xe_mmio_rmw32(mmio, I2C_CONFIG_CMD, 0, PCI_COMMAND_MEMORY | PCI_COMMAND_MASTER);
xe_mmio_rmw32(mmio, I2C_CONFIG_PMCSR, PCI_PM_CTRL_STATE_MASK, (__force u32)PCI_D0);
drm_dbg(&xe->drm, "pmcsr: 0x%08x\n", xe_mmio_read32(mmio, I2C_CONFIG_PMCSR));
diff --git a/drivers/gpu/drm/xe/xe_late_bind_fw.c b/drivers/gpu/drm/xe/xe_late_bind_fw.c
new file mode 100644
index 000000000000..768442ca7da6
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_late_bind_fw.c
@@ -0,0 +1,464 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2025 Intel Corporation
+ */
+
+#include <linux/component.h>
+#include <linux/delay.h>
+#include <linux/firmware.h>
+
+#include <drm/drm_managed.h>
+#include <drm/intel/i915_component.h>
+#include <drm/intel/intel_lb_mei_interface.h>
+#include <drm/drm_print.h>
+
+#include "xe_device.h"
+#include "xe_late_bind_fw.h"
+#include "xe_pcode.h"
+#include "xe_pcode_api.h"
+#include "xe_pm.h"
+
+/*
+ * The component should load quite quickly in most cases, but it could take
+ * a bit. Using a very big timeout just to cover the worst case scenario
+ */
+#define LB_INIT_TIMEOUT_MS 20000
+
+/*
+ * Retry interval set to 6 seconds, in steps of 200 ms, to allow time for
+ * other OS components to release the MEI CL handle
+ */
+#define LB_FW_LOAD_RETRY_MAXCOUNT 30
+#define LB_FW_LOAD_RETRY_PAUSE_MS 200
+
+static const u32 fw_id_to_type[] = {
+ [XE_LB_FW_FAN_CONTROL] = INTEL_LB_TYPE_FAN_CONTROL,
+ };
+
+static const char * const fw_id_to_name[] = {
+ [XE_LB_FW_FAN_CONTROL] = "fan_control",
+ };
+
+static struct xe_device *
+late_bind_to_xe(struct xe_late_bind *late_bind)
+{
+ return container_of(late_bind, struct xe_device, late_bind);
+}
+
+static struct xe_device *
+late_bind_fw_to_xe(struct xe_late_bind_fw *lb_fw)
+{
+ return container_of(lb_fw, struct xe_device, late_bind.late_bind_fw[lb_fw->id]);
+}
+
+/* Refer to the "Late Bind based Firmware Layout" documentation entry for details */
+static int parse_cpd_header(struct xe_late_bind_fw *lb_fw,
+ const void *data, size_t size, const char *manifest_entry)
+{
+ struct xe_device *xe = late_bind_fw_to_xe(lb_fw);
+ const struct gsc_cpd_header_v2 *header = data;
+ const struct gsc_manifest_header *manifest;
+ const struct gsc_cpd_entry *entry;
+ size_t min_size = sizeof(*header);
+ u32 offset = 0;
+ int i;
+
+ /* manifest_entry is mandatory */
+ xe_assert(xe, manifest_entry);
+
+ if (size < min_size || header->header_marker != GSC_CPD_HEADER_MARKER)
+ return -ENOENT;
+
+ if (header->header_length < sizeof(struct gsc_cpd_header_v2)) {
+ drm_err(&xe->drm, "%s late binding fw: Invalid CPD header length %u!\n",
+ fw_id_to_name[lb_fw->id], header->header_length);
+ return -EINVAL;
+ }
+
+ min_size = header->header_length + sizeof(struct gsc_cpd_entry) * header->num_of_entries;
+ if (size < min_size) {
+ drm_err(&xe->drm, "%s late binding fw: too small! %zu < %zu\n",
+ fw_id_to_name[lb_fw->id], size, min_size);
+ return -ENODATA;
+ }
+
+ /* Look for the manifest first */
+ entry = (void *)header + header->header_length;
+ for (i = 0; i < header->num_of_entries; i++, entry++)
+ if (strcmp(entry->name, manifest_entry) == 0)
+ offset = entry->offset & GSC_CPD_ENTRY_OFFSET_MASK;
+
+ if (!offset) {
+ drm_err(&xe->drm, "%s late binding fw: Failed to find manifest_entry\n",
+ fw_id_to_name[lb_fw->id]);
+ return -ENODATA;
+ }
+
+ min_size = offset + sizeof(struct gsc_manifest_header);
+ if (size < min_size) {
+ drm_err(&xe->drm, "%s late binding fw: too small! %zu < %zu\n",
+ fw_id_to_name[lb_fw->id], size, min_size);
+ return -ENODATA;
+ }
+
+ manifest = data + offset;
+
+ lb_fw->version = manifest->fw_version;
+
+ return 0;
+}
+
+/* Refer to the "Late Bind based Firmware Layout" documentation entry for details */
+static int parse_lb_layout(struct xe_late_bind_fw *lb_fw,
+ const void *data, size_t size, const char *fpt_entry)
+{
+ struct xe_device *xe = late_bind_fw_to_xe(lb_fw);
+ const struct csc_fpt_header *header = data;
+ const struct csc_fpt_entry *entry;
+ size_t min_size = sizeof(*header);
+ u32 offset = 0;
+ int i;
+
+ /* fpt_entry is mandatory */
+ xe_assert(xe, fpt_entry);
+
+ if (size < min_size || header->header_marker != CSC_FPT_HEADER_MARKER)
+ return -ENOENT;
+
+ if (header->header_length < sizeof(struct csc_fpt_header)) {
+ drm_err(&xe->drm, "%s late binding fw: Invalid FPT header length %u!\n",
+ fw_id_to_name[lb_fw->id], header->header_length);
+ return -EINVAL;
+ }
+
+ min_size = header->header_length + sizeof(struct csc_fpt_entry) * header->num_of_entries;
+ if (size < min_size) {
+ drm_err(&xe->drm, "%s late binding fw: too small! %zu < %zu\n",
+ fw_id_to_name[lb_fw->id], size, min_size);
+ return -ENODATA;
+ }
+
+ /* Look for the cpd header first */
+ entry = (void *)header + header->header_length;
+ for (i = 0; i < header->num_of_entries; i++, entry++)
+ if (strcmp(entry->name, fpt_entry) == 0)
+ offset = entry->offset;
+
+ if (!offset) {
+ drm_err(&xe->drm, "%s late binding fw: Failed to find fpt_entry\n",
+ fw_id_to_name[lb_fw->id]);
+ return -ENODATA;
+ }
+
+ min_size = offset + sizeof(struct gsc_cpd_header_v2);
+ if (size < min_size) {
+ drm_err(&xe->drm, "%s late binding fw: too small! %zu < %zu\n",
+ fw_id_to_name[lb_fw->id], size, min_size);
+ return -ENODATA;
+ }
+
+ return parse_cpd_header(lb_fw, data + offset, size - offset, "LTES.man");
+}
+
+static const char *xe_late_bind_parse_status(uint32_t status)
+{
+ switch (status) {
+ case INTEL_LB_STATUS_SUCCESS:
+ return "success";
+ case INTEL_LB_STATUS_4ID_MISMATCH:
+ return "4Id Mismatch";
+ case INTEL_LB_STATUS_ARB_FAILURE:
+ return "ARB Failure";
+ case INTEL_LB_STATUS_GENERAL_ERROR:
+ return "General Error";
+ case INTEL_LB_STATUS_INVALID_PARAMS:
+ return "Invalid Params";
+ case INTEL_LB_STATUS_INVALID_SIGNATURE:
+ return "Invalid Signature";
+ case INTEL_LB_STATUS_INVALID_PAYLOAD:
+ return "Invalid Payload";
+ case INTEL_LB_STATUS_TIMEOUT:
+ return "Timeout";
+ default:
+ return "Unknown error";
+ }
+}
+
+static int xe_late_bind_fw_num_fans(struct xe_late_bind *late_bind, u32 *num_fans)
+{
+ struct xe_device *xe = late_bind_to_xe(late_bind);
+ struct xe_tile *root_tile = xe_device_get_root_tile(xe);
+
+ return xe_pcode_read(root_tile,
+ PCODE_MBOX(FAN_SPEED_CONTROL, FSC_READ_NUM_FANS, 0), num_fans, NULL);
+}
+
+void xe_late_bind_wait_for_worker_completion(struct xe_late_bind *late_bind)
+{
+ struct xe_device *xe = late_bind_to_xe(late_bind);
+ struct xe_late_bind_fw *lbfw;
+ int fw_id;
+
+ for (fw_id = 0; fw_id < XE_LB_FW_MAX_ID; fw_id++) {
+ lbfw = &late_bind->late_bind_fw[fw_id];
+ if (lbfw->payload && late_bind->wq) {
+ drm_dbg(&xe->drm, "Flush work: load %s firmware\n",
+ fw_id_to_name[lbfw->id]);
+ flush_work(&lbfw->work);
+ }
+ }
+}
+
+static void xe_late_bind_work(struct work_struct *work)
+{
+ struct xe_late_bind_fw *lbfw = container_of(work, struct xe_late_bind_fw, work);
+ struct xe_late_bind *late_bind = container_of(lbfw, struct xe_late_bind,
+ late_bind_fw[lbfw->id]);
+ struct xe_device *xe = late_bind_to_xe(late_bind);
+ int retry = LB_FW_LOAD_RETRY_MAXCOUNT;
+ int ret;
+ int slept;
+
+ xe_device_assert_mem_access(xe);
+
+ /* we can queue this before the component is bound */
+ for (slept = 0; slept < LB_INIT_TIMEOUT_MS; slept += 100) {
+ if (late_bind->component.ops)
+ break;
+ msleep(100);
+ }
+
+ if (!late_bind->component.ops) {
+ drm_err(&xe->drm, "Late bind component not bound\n");
+ /* Do not re-attempt fw load */
+ drmm_kfree(&xe->drm, (void *)lbfw->payload);
+ lbfw->payload = NULL;
+ goto out;
+ }
+
+ drm_dbg(&xe->drm, "Load %s firmware\n", fw_id_to_name[lbfw->id]);
+
+ do {
+ ret = late_bind->component.ops->push_payload(late_bind->component.mei_dev,
+ lbfw->type,
+ lbfw->flags,
+ lbfw->payload,
+ lbfw->payload_size);
+ if (!ret)
+ break;
+ msleep(LB_FW_LOAD_RETRY_PAUSE_MS);
+ } while (--retry && ret == -EBUSY);
+
+ if (!ret) {
+ drm_dbg(&xe->drm, "Load %s firmware successful\n",
+ fw_id_to_name[lbfw->id]);
+ goto out;
+ }
+
+ if (ret > 0)
+ drm_err(&xe->drm, "Load %s firmware failed with err %d, %s\n",
+ fw_id_to_name[lbfw->id], ret, xe_late_bind_parse_status(ret));
+ else
+ drm_err(&xe->drm, "Load %s firmware failed with err %d",
+ fw_id_to_name[lbfw->id], ret);
+ /* Do not re-attempt fw load */
+ drmm_kfree(&xe->drm, (void *)lbfw->payload);
+ lbfw->payload = NULL;
+
+out:
+ xe_pm_runtime_put(xe);
+}
+
+int xe_late_bind_fw_load(struct xe_late_bind *late_bind)
+{
+ struct xe_device *xe = late_bind_to_xe(late_bind);
+ struct xe_late_bind_fw *lbfw;
+ int fw_id;
+
+ if (!late_bind->component_added)
+ return -ENODEV;
+
+ if (late_bind->disable)
+ return 0;
+
+ for (fw_id = 0; fw_id < XE_LB_FW_MAX_ID; fw_id++) {
+ lbfw = &late_bind->late_bind_fw[fw_id];
+ if (lbfw->payload) {
+ xe_pm_runtime_get_noresume(xe);
+ queue_work(late_bind->wq, &lbfw->work);
+ }
+ }
+ return 0;
+}
+
+static int __xe_late_bind_fw_init(struct xe_late_bind *late_bind, u32 fw_id)
+{
+ struct xe_device *xe = late_bind_to_xe(late_bind);
+ struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
+ struct xe_late_bind_fw *lb_fw;
+ const struct firmware *fw;
+ u32 num_fans;
+ int ret;
+
+ if (fw_id >= XE_LB_FW_MAX_ID)
+ return -EINVAL;
+
+ lb_fw = &late_bind->late_bind_fw[fw_id];
+
+ lb_fw->id = fw_id;
+ lb_fw->type = fw_id_to_type[lb_fw->id];
+ lb_fw->flags &= ~INTEL_LB_FLAG_IS_PERSISTENT;
+
+ if (lb_fw->type == INTEL_LB_TYPE_FAN_CONTROL) {
+ ret = xe_late_bind_fw_num_fans(late_bind, &num_fans);
+ if (ret) {
+ drm_dbg(&xe->drm, "Failed to read number of fans: %d\n", ret);
+ return 0; /* Not a fatal error, continue without fan control */
+ }
+ drm_dbg(&xe->drm, "Number of Fans: %d\n", num_fans);
+ if (!num_fans)
+ return 0;
+ }
+
+ snprintf(lb_fw->blob_path, sizeof(lb_fw->blob_path), "xe/%s_8086_%04x_%04x_%04x.bin",
+ fw_id_to_name[lb_fw->id], pdev->device,
+ pdev->subsystem_vendor, pdev->subsystem_device);
+
+ drm_dbg(&xe->drm, "Request late binding firmware %s\n", lb_fw->blob_path);
+ ret = firmware_request_nowarn(&fw, lb_fw->blob_path, xe->drm.dev);
+ if (ret) {
+ drm_dbg(&xe->drm, "%s late binding fw not available for current device",
+ fw_id_to_name[lb_fw->id]);
+ return 0;
+ }
+
+ if (fw->size > XE_LB_MAX_PAYLOAD_SIZE) {
+ drm_err(&xe->drm, "Firmware %s size %zu is larger than max pay load size %u\n",
+ lb_fw->blob_path, fw->size, XE_LB_MAX_PAYLOAD_SIZE);
+ release_firmware(fw);
+ return -ENODATA;
+ }
+
+ ret = parse_lb_layout(lb_fw, fw->data, fw->size, "LTES");
+ if (ret)
+ return ret;
+
+ lb_fw->payload_size = fw->size;
+ lb_fw->payload = drmm_kzalloc(&xe->drm, lb_fw->payload_size, GFP_KERNEL);
+ if (!lb_fw->payload) {
+ release_firmware(fw);
+ return -ENOMEM;
+ }
+
+ drm_info(&xe->drm, "Using %s firmware from %s version %u.%u.%u.%u\n",
+ fw_id_to_name[lb_fw->id], lb_fw->blob_path,
+ lb_fw->version.major, lb_fw->version.minor,
+ lb_fw->version.hotfix, lb_fw->version.build);
+
+ memcpy((void *)lb_fw->payload, fw->data, lb_fw->payload_size);
+ release_firmware(fw);
+ INIT_WORK(&lb_fw->work, xe_late_bind_work);
+
+ return 0;
+}
+
+static int xe_late_bind_fw_init(struct xe_late_bind *late_bind)
+{
+ int ret;
+ int fw_id;
+
+ late_bind->wq = alloc_ordered_workqueue("late-bind-ordered-wq", 0);
+ if (!late_bind->wq)
+ return -ENOMEM;
+
+ for (fw_id = 0; fw_id < XE_LB_FW_MAX_ID; fw_id++) {
+ ret = __xe_late_bind_fw_init(late_bind, fw_id);
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+}
+
+static int xe_late_bind_component_bind(struct device *xe_kdev,
+ struct device *mei_kdev, void *data)
+{
+ struct xe_device *xe = kdev_to_xe_device(xe_kdev);
+ struct xe_late_bind *late_bind = &xe->late_bind;
+
+ late_bind->component.ops = data;
+ late_bind->component.mei_dev = mei_kdev;
+
+ return 0;
+}
+
+static void xe_late_bind_component_unbind(struct device *xe_kdev,
+ struct device *mei_kdev, void *data)
+{
+ struct xe_device *xe = kdev_to_xe_device(xe_kdev);
+ struct xe_late_bind *late_bind = &xe->late_bind;
+
+ xe_late_bind_wait_for_worker_completion(late_bind);
+
+ late_bind->component.ops = NULL;
+}
+
+static const struct component_ops xe_late_bind_component_ops = {
+ .bind = xe_late_bind_component_bind,
+ .unbind = xe_late_bind_component_unbind,
+};
+
+static void xe_late_bind_remove(void *arg)
+{
+ struct xe_late_bind *late_bind = arg;
+ struct xe_device *xe = late_bind_to_xe(late_bind);
+
+ xe_late_bind_wait_for_worker_completion(late_bind);
+
+ late_bind->component_added = false;
+
+ component_del(xe->drm.dev, &xe_late_bind_component_ops);
+ if (late_bind->wq) {
+ destroy_workqueue(late_bind->wq);
+ late_bind->wq = NULL;
+ }
+}
+
+/**
+ * xe_late_bind_init() - add xe mei late binding component
+ * @late_bind: pointer to late bind structure.
+ *
+ * Return: 0 if the initialization was successful, a negative errno otherwise.
+ */
+int xe_late_bind_init(struct xe_late_bind *late_bind)
+{
+ struct xe_device *xe = late_bind_to_xe(late_bind);
+ int err;
+
+ if (!xe->info.has_late_bind)
+ return 0;
+
+ if (!IS_ENABLED(CONFIG_INTEL_MEI_LB) || !IS_ENABLED(CONFIG_INTEL_MEI_GSC)) {
+ drm_info(&xe->drm, "Can't init xe mei late bind missing mei component\n");
+ return 0;
+ }
+
+ err = component_add_typed(xe->drm.dev, &xe_late_bind_component_ops,
+ INTEL_COMPONENT_LB);
+ if (err < 0) {
+ drm_err(&xe->drm, "Failed to add mei late bind component (%pe)\n", ERR_PTR(err));
+ return err;
+ }
+
+ late_bind->component_added = true;
+
+ err = devm_add_action_or_reset(xe->drm.dev, xe_late_bind_remove, late_bind);
+ if (err)
+ return err;
+
+ err = xe_late_bind_fw_init(late_bind);
+ if (err)
+ return err;
+
+ return xe_late_bind_fw_load(late_bind);
+}
diff --git a/drivers/gpu/drm/xe/xe_late_bind_fw.h b/drivers/gpu/drm/xe/xe_late_bind_fw.h
new file mode 100644
index 000000000000..07e437390539
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_late_bind_fw.h
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2025 Intel Corporation
+ */
+
+#ifndef _XE_LATE_BIND_FW_H_
+#define _XE_LATE_BIND_FW_H_
+
+#include <linux/types.h>
+
+struct xe_late_bind;
+
+int xe_late_bind_init(struct xe_late_bind *late_bind);
+int xe_late_bind_fw_load(struct xe_late_bind *late_bind);
+void xe_late_bind_wait_for_worker_completion(struct xe_late_bind *late_bind);
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_late_bind_fw_types.h b/drivers/gpu/drm/xe/xe_late_bind_fw_types.h
new file mode 100644
index 000000000000..0f5da89ce98b
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_late_bind_fw_types.h
@@ -0,0 +1,75 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2025 Intel Corporation
+ */
+
+#ifndef _XE_LATE_BIND_TYPES_H_
+#define _XE_LATE_BIND_TYPES_H_
+
+#include <linux/iosys-map.h>
+#include <linux/mutex.h>
+#include <linux/types.h>
+#include <linux/workqueue.h>
+#include "xe_uc_fw_abi.h"
+
+#define XE_LB_MAX_PAYLOAD_SIZE SZ_4K
+
+/**
+ * xe_late_bind_fw_id - enum to determine late binding fw index
+ */
+enum xe_late_bind_fw_id {
+ XE_LB_FW_FAN_CONTROL = 0,
+ XE_LB_FW_MAX_ID
+};
+
+/**
+ * struct xe_late_bind_fw
+ */
+struct xe_late_bind_fw {
+ /** @id: firmware index */
+ u32 id;
+ /** @blob_path: firmware binary path */
+ char blob_path[PATH_MAX];
+ /** @type: firmware type */
+ u32 type;
+ /** @flags: firmware flags */
+ u32 flags;
+ /** @payload: to store the late binding blob */
+ const u8 *payload;
+ /** @payload_size: late binding blob payload_size */
+ size_t payload_size;
+ /** @work: worker to upload latebind blob */
+ struct work_struct work;
+ /** @version: late binding blob manifest version */
+ struct gsc_version version;
+};
+
+/**
+ * struct xe_late_bind_component - Late Binding services component
+ * @mei_dev: device that provide Late Binding service.
+ * @ops: Ops implemented by Late Binding driver, used by Xe driver.
+ *
+ * Communication between Xe and MEI drivers for Late Binding services
+ */
+struct xe_late_bind_component {
+ struct device *mei_dev;
+ const struct intel_lb_component_ops *ops;
+};
+
+/**
+ * struct xe_late_bind
+ */
+struct xe_late_bind {
+ /** @component: struct for communication with mei component */
+ struct xe_late_bind_component component;
+ /** @late_bind_fw: late binding firmware array */
+ struct xe_late_bind_fw late_bind_fw[XE_LB_FW_MAX_ID];
+ /** @wq: workqueue to submit request to download late bind blob */
+ struct workqueue_struct *wq;
+ /** @component_added: whether the component has been added */
+ bool component_added;
+ /** @disable: to block late binding reload during pm resume flow*/
+ bool disable;
+};
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_lmtt.c b/drivers/gpu/drm/xe/xe_lmtt.c
index f2bfbfa3efa1..62fc5a1a332d 100644
--- a/drivers/gpu/drm/xe/xe_lmtt.c
+++ b/drivers/gpu/drm/xe/xe_lmtt.c
@@ -67,12 +67,12 @@ static struct xe_lmtt_pt *lmtt_pt_alloc(struct xe_lmtt *lmtt, unsigned int level
goto out;
}
- bo = xe_bo_create_pin_map(lmtt_to_xe(lmtt), lmtt_to_tile(lmtt), NULL,
- PAGE_ALIGN(lmtt->ops->lmtt_pte_size(level) *
- lmtt->ops->lmtt_pte_num(level)),
- ttm_bo_type_kernel,
- XE_BO_FLAG_VRAM_IF_DGFX(lmtt_to_tile(lmtt)) |
- XE_BO_FLAG_NEEDS_64K);
+ bo = xe_bo_create_pin_map_novm(lmtt_to_xe(lmtt), lmtt_to_tile(lmtt),
+ PAGE_ALIGN(lmtt->ops->lmtt_pte_size(level) *
+ lmtt->ops->lmtt_pte_num(level)),
+ ttm_bo_type_kernel,
+ XE_BO_FLAG_VRAM_IF_DGFX(lmtt_to_tile(lmtt)) |
+ XE_BO_FLAG_NEEDS_64K, false);
if (IS_ERR(bo)) {
err = PTR_ERR(bo);
goto out_free_pt;
diff --git a/drivers/gpu/drm/xe/xe_lrc.c b/drivers/gpu/drm/xe/xe_lrc.c
index 8f6c3ba47882..47e9df775072 100644
--- a/drivers/gpu/drm/xe/xe_lrc.c
+++ b/drivers/gpu/drm/xe/xe_lrc.c
@@ -8,6 +8,7 @@
#include <generated/xe_wa_oob.h>
#include <linux/ascii85.h>
+#include <linux/panic.h>
#include "instructions/xe_mi_commands.h"
#include "instructions/xe_gfxpipe_commands.h"
@@ -16,6 +17,7 @@
#include "regs/xe_lrc_layout.h"
#include "xe_bb.h"
#include "xe_bo.h"
+#include "xe_configfs.h"
#include "xe_device.h"
#include "xe_drm_client.h"
#include "xe_exec_queue_types.h"
@@ -75,11 +77,17 @@ lrc_to_xe(struct xe_lrc *lrc)
static bool
gt_engine_needs_indirect_ctx(struct xe_gt *gt, enum xe_engine_class class)
{
+ struct xe_device *xe = gt_to_xe(gt);
+
if (XE_GT_WA(gt, 16010904313) &&
(class == XE_ENGINE_CLASS_RENDER ||
class == XE_ENGINE_CLASS_COMPUTE))
return true;
+ if (xe_configfs_get_ctx_restore_mid_bb(to_pci_dev(xe->drm.dev),
+ class, NULL))
+ return true;
+
return false;
}
@@ -1102,6 +1110,64 @@ static ssize_t setup_timestamp_wa(struct xe_lrc *lrc, struct xe_hw_engine *hwe,
return cmd - batch;
}
+static ssize_t setup_configfs_post_ctx_restore_bb(struct xe_lrc *lrc,
+ struct xe_hw_engine *hwe,
+ u32 *batch, size_t max_len)
+{
+ struct xe_device *xe = gt_to_xe(lrc->gt);
+ const u32 *user_batch;
+ u32 *cmd = batch;
+ u32 count;
+
+ count = xe_configfs_get_ctx_restore_post_bb(to_pci_dev(xe->drm.dev),
+ hwe->class, &user_batch);
+ if (!count)
+ return 0;
+
+ if (count > max_len)
+ return -ENOSPC;
+
+ /*
+ * This should be used only for tests and validation. Taint the kernel
+ * as anything could be submitted directly in context switches
+ */
+ add_taint(TAINT_TEST, LOCKDEP_STILL_OK);
+
+ memcpy(cmd, user_batch, count * sizeof(u32));
+ cmd += count;
+
+ return cmd - batch;
+}
+
+static ssize_t setup_configfs_mid_ctx_restore_bb(struct xe_lrc *lrc,
+ struct xe_hw_engine *hwe,
+ u32 *batch, size_t max_len)
+{
+ struct xe_device *xe = gt_to_xe(lrc->gt);
+ const u32 *user_batch;
+ u32 *cmd = batch;
+ u32 count;
+
+ count = xe_configfs_get_ctx_restore_mid_bb(to_pci_dev(xe->drm.dev),
+ hwe->class, &user_batch);
+ if (!count)
+ return 0;
+
+ if (count > max_len)
+ return -ENOSPC;
+
+ /*
+ * This should be used only for tests and validation. Taint the kernel
+ * as anything could be submitted directly in context switches
+ */
+ add_taint(TAINT_TEST, LOCKDEP_STILL_OK);
+
+ memcpy(cmd, user_batch, count * sizeof(u32));
+ cmd += count;
+
+ return cmd - batch;
+}
+
static ssize_t setup_invalidate_state_cache_wa(struct xe_lrc *lrc,
struct xe_hw_engine *hwe,
u32 *batch, size_t max_len)
@@ -1203,6 +1269,7 @@ int xe_lrc_setup_wa_bb_with_scratch(struct xe_lrc *lrc, struct xe_hw_engine *hwe
{ .setup = setup_timestamp_wa },
{ .setup = setup_invalidate_state_cache_wa },
{ .setup = setup_utilization_wa },
+ { .setup = setup_configfs_post_ctx_restore_bb },
};
struct bo_setup_state state = {
.lrc = lrc,
@@ -1249,8 +1316,12 @@ static int setup_wa_bb(struct xe_lrc *lrc, struct xe_hw_engine *hwe)
static int
setup_indirect_ctx(struct xe_lrc *lrc, struct xe_hw_engine *hwe)
{
- static struct bo_setup rcs_funcs[] = {
+ static const struct bo_setup rcs_funcs[] = {
{ .setup = setup_timestamp_wa },
+ { .setup = setup_configfs_mid_ctx_restore_bb },
+ };
+ static const struct bo_setup xcs_funcs[] = {
+ { .setup = setup_configfs_mid_ctx_restore_bb },
};
struct bo_setup_state state = {
.lrc = lrc,
@@ -1268,6 +1339,9 @@ setup_indirect_ctx(struct xe_lrc *lrc, struct xe_hw_engine *hwe)
hwe->class == XE_ENGINE_CLASS_COMPUTE) {
state.funcs = rcs_funcs;
state.num_funcs = ARRAY_SIZE(rcs_funcs);
+ } else {
+ state.funcs = xcs_funcs;
+ state.num_funcs = ARRAY_SIZE(xcs_funcs);
}
if (xe_gt_WARN_ON(lrc->gt, !state.funcs))
@@ -1294,14 +1368,15 @@ setup_indirect_ctx(struct xe_lrc *lrc, struct xe_hw_engine *hwe)
finish_bo(&state);
kfree(state.buffer);
+ /*
+ * Enable INDIRECT_CTX leaving INDIRECT_CTX_OFFSET at its default: it
+ * varies per engine class, but the default is good enough
+ */
xe_lrc_write_ctx_reg(lrc,
CTX_CS_INDIRECT_CTX,
(xe_bo_ggtt_addr(lrc->bo) + state.offset) |
/* Size in CLs. */
(state.written * sizeof(u32) / 64));
- xe_lrc_write_ctx_reg(lrc,
- CTX_CS_INDIRECT_CTX_OFFSET,
- CTX_INDIRECT_CTX_OFFSET_DEFAULT);
return 0;
}
@@ -1340,9 +1415,10 @@ static int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe,
if (vm && vm->xef) /* userspace */
bo_flags |= XE_BO_FLAG_PINNED_LATE_RESTORE;
- lrc->bo = xe_bo_create_pin_map(xe, tile, NULL, bo_size,
- ttm_bo_type_kernel,
- bo_flags);
+ lrc->bo = xe_bo_create_pin_map_novm(xe, tile,
+ bo_size,
+ ttm_bo_type_kernel,
+ bo_flags, false);
if (IS_ERR(lrc->bo))
return PTR_ERR(lrc->bo);
diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c
index 9643442ef101..1d667fa36cf3 100644
--- a/drivers/gpu/drm/xe/xe_migrate.c
+++ b/drivers/gpu/drm/xe/xe_migrate.c
@@ -35,6 +35,7 @@
#include "xe_sched_job.h"
#include "xe_sync.h"
#include "xe_trace_bo.h"
+#include "xe_validation.h"
#include "xe_vm.h"
#include "xe_vram.h"
@@ -173,7 +174,7 @@ static void xe_migrate_program_identity(struct xe_device *xe, struct xe_vm *vm,
}
static int xe_migrate_prepare_vm(struct xe_tile *tile, struct xe_migrate *m,
- struct xe_vm *vm)
+ struct xe_vm *vm, struct drm_exec *exec)
{
struct xe_device *xe = tile_to_xe(tile);
u16 pat_index = xe->pat.idx[XE_CACHE_WB];
@@ -200,7 +201,7 @@ static int xe_migrate_prepare_vm(struct xe_tile *tile, struct xe_migrate *m,
num_entries * XE_PAGE_SIZE,
ttm_bo_type_kernel,
XE_BO_FLAG_VRAM_IF_DGFX(tile) |
- XE_BO_FLAG_PAGETABLE);
+ XE_BO_FLAG_PAGETABLE, exec);
if (IS_ERR(bo))
return PTR_ERR(bo);
@@ -393,6 +394,24 @@ struct xe_migrate *xe_migrate_alloc(struct xe_tile *tile)
return m;
}
+static int xe_migrate_lock_prepare_vm(struct xe_tile *tile, struct xe_migrate *m, struct xe_vm *vm)
+{
+ struct xe_device *xe = tile_to_xe(tile);
+ struct xe_validation_ctx ctx;
+ struct drm_exec exec;
+ int err = 0;
+
+ xe_validation_guard(&ctx, &xe->val, &exec, (struct xe_val_flags) {}, err) {
+ err = xe_vm_drm_exec_lock(vm, &exec);
+ drm_exec_retry_on_contention(&exec);
+ err = xe_migrate_prepare_vm(tile, m, vm, &exec);
+ drm_exec_retry_on_contention(&exec);
+ xe_validation_retry_on_oom(&ctx, &err);
+ }
+
+ return err;
+}
+
/**
* xe_migrate_init() - Initialize a migrate context
* @m: The migration context
@@ -413,11 +432,9 @@ int xe_migrate_init(struct xe_migrate *m)
if (IS_ERR(vm))
return PTR_ERR(vm);
- xe_vm_lock(vm, false);
- err = xe_migrate_prepare_vm(tile, m, vm);
- xe_vm_unlock(vm);
+ err = xe_migrate_lock_prepare_vm(tile, m, vm);
if (err)
- goto err_out;
+ return err;
if (xe->info.has_usm) {
struct xe_hw_engine *hwe = xe_gt_hw_engine(primary_gt,
@@ -842,11 +859,15 @@ struct dma_fence *xe_migrate_copy(struct xe_migrate *m,
batch_size += pte_update_size(m, pte_flags, src, &src_it, &src_L0,
&src_L0_ofs, &src_L0_pt, 0, 0,
avail_pts);
-
- pte_flags = dst_is_vram ? PTE_UPDATE_FLAG_IS_VRAM : 0;
- batch_size += pte_update_size(m, pte_flags, dst, &dst_it, &src_L0,
- &dst_L0_ofs, &dst_L0_pt, 0,
- avail_pts, avail_pts);
+ if (copy_only_ccs) {
+ dst_L0_ofs = src_L0_ofs;
+ } else {
+ pte_flags = dst_is_vram ? PTE_UPDATE_FLAG_IS_VRAM : 0;
+ batch_size += pte_update_size(m, pte_flags, dst,
+ &dst_it, &src_L0,
+ &dst_L0_ofs, &dst_L0_pt,
+ 0, avail_pts, avail_pts);
+ }
if (copy_system_ccs) {
xe_assert(xe, type_device);
@@ -876,7 +897,7 @@ struct dma_fence *xe_migrate_copy(struct xe_migrate *m,
if (dst_is_vram && xe_migrate_allow_identity(src_L0, &dst_it))
xe_res_next(&dst_it, src_L0);
- else
+ else if (!copy_only_ccs)
emit_pte(m, bb, dst_L0_pt, dst_is_vram, copy_system_ccs,
&dst_it, src_L0, dst);
@@ -908,7 +929,7 @@ struct dma_fence *xe_migrate_copy(struct xe_migrate *m,
if (!fence) {
err = xe_sched_job_add_deps(job, src_bo->ttm.base.resv,
DMA_RESV_USAGE_BOOKKEEP);
- if (!err && src_bo != dst_bo)
+ if (!err && src_bo->ttm.base.resv != dst_bo->ttm.base.resv)
err = xe_sched_job_add_deps(job, dst_bo->ttm.base.resv,
DMA_RESV_USAGE_BOOKKEEP);
if (err)
diff --git a/drivers/gpu/drm/xe/xe_nvm.c b/drivers/gpu/drm/xe/xe_nvm.c
index 9ca4a5ae1693..33f4ac82fc80 100644
--- a/drivers/gpu/drm/xe/xe_nvm.c
+++ b/drivers/gpu/drm/xe/xe_nvm.c
@@ -35,6 +35,10 @@ static const struct intel_dg_nvm_region regions[INTEL_DG_NVM_REGIONS] = {
static void xe_nvm_release_dev(struct device *dev)
{
+ struct auxiliary_device *aux = container_of(dev, struct auxiliary_device, dev);
+ struct intel_dg_nvm_dev *nvm = container_of(aux, struct intel_dg_nvm_dev, aux_dev);
+
+ kfree(nvm);
}
static bool xe_nvm_non_posted_erase(struct xe_device *xe)
@@ -162,6 +166,5 @@ void xe_nvm_fini(struct xe_device *xe)
auxiliary_device_delete(&nvm->aux_dev);
auxiliary_device_uninit(&nvm->aux_dev);
- kfree(nvm);
xe->nvm = NULL;
}
diff --git a/drivers/gpu/drm/xe/xe_oa.c b/drivers/gpu/drm/xe/xe_oa.c
index a188bad172ad..a4894eb0d7f3 100644
--- a/drivers/gpu/drm/xe/xe_oa.c
+++ b/drivers/gpu/drm/xe/xe_oa.c
@@ -883,9 +883,9 @@ static int xe_oa_alloc_oa_buffer(struct xe_oa_stream *stream, size_t size)
{
struct xe_bo *bo;
- bo = xe_bo_create_pin_map(stream->oa->xe, stream->gt->tile, NULL,
- size, ttm_bo_type_kernel,
- XE_BO_FLAG_SYSTEM | XE_BO_FLAG_GGTT);
+ bo = xe_bo_create_pin_map_novm(stream->oa->xe, stream->gt->tile,
+ size, ttm_bo_type_kernel,
+ XE_BO_FLAG_SYSTEM | XE_BO_FLAG_GGTT, false);
if (IS_ERR(bo))
return PTR_ERR(bo);
diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c
index 046d330bad34..be91343829dd 100644
--- a/drivers/gpu/drm/xe/xe_pci.c
+++ b/drivers/gpu/drm/xe/xe_pci.c
@@ -334,6 +334,7 @@ static const struct xe_device_desc bmg_desc = {
.has_mbx_power_limits = true,
.has_gsc_nvm = 1,
.has_heci_cscfi = 1,
+ .has_late_bind = true,
.has_sriov = true,
.max_gt_per_tile = 2,
.needs_scratch = true,
@@ -510,6 +511,26 @@ static void read_gmdid(struct xe_device *xe, enum xe_gmdid_type type, u32 *ver,
*revid = REG_FIELD_GET(GMD_ID_REVID, val);
}
+static const struct xe_ip *find_graphics_ip(unsigned int verx100)
+{
+ KUNIT_STATIC_STUB_REDIRECT(find_graphics_ip, verx100);
+
+ for (int i = 0; i < ARRAY_SIZE(graphics_ips); i++)
+ if (graphics_ips[i].verx100 == verx100)
+ return &graphics_ips[i];
+ return NULL;
+}
+
+static const struct xe_ip *find_media_ip(unsigned int verx100)
+{
+ KUNIT_STATIC_STUB_REDIRECT(find_media_ip, verx100);
+
+ for (int i = 0; i < ARRAY_SIZE(media_ips); i++)
+ if (media_ips[i].verx100 == verx100)
+ return &media_ips[i];
+ return NULL;
+}
+
/*
* Read IP version from hardware and select graphics/media IP descriptors
* based on the result.
@@ -527,14 +548,7 @@ static void handle_gmdid(struct xe_device *xe,
read_gmdid(xe, GMDID_GRAPHICS, &ver, graphics_revid);
- for (int i = 0; i < ARRAY_SIZE(graphics_ips); i++) {
- if (ver == graphics_ips[i].verx100) {
- *graphics_ip = &graphics_ips[i];
-
- break;
- }
- }
-
+ *graphics_ip = find_graphics_ip(ver);
if (!*graphics_ip) {
drm_err(&xe->drm, "Hardware reports unknown graphics version %u.%02u\n",
ver / 100, ver % 100);
@@ -545,14 +559,7 @@ static void handle_gmdid(struct xe_device *xe,
if (ver == 0)
return;
- for (int i = 0; i < ARRAY_SIZE(media_ips); i++) {
- if (ver == media_ips[i].verx100) {
- *media_ip = &media_ips[i];
-
- break;
- }
- }
-
+ *media_ip = find_media_ip(ver);
if (!*media_ip) {
drm_err(&xe->drm, "Hardware reports unknown media version %u.%02u\n",
ver / 100, ver % 100);
@@ -581,6 +588,7 @@ static int xe_info_init_early(struct xe_device *xe,
xe->info.has_gsc_nvm = desc->has_gsc_nvm;
xe->info.has_heci_gscfi = desc->has_heci_gscfi;
xe->info.has_heci_cscfi = desc->has_heci_cscfi;
+ xe->info.has_late_bind = desc->has_late_bind;
xe->info.has_llc = desc->has_llc;
xe->info.has_pxp = desc->has_pxp;
xe->info.has_sriov = desc->has_sriov;
diff --git a/drivers/gpu/drm/xe/xe_pci_types.h b/drivers/gpu/drm/xe/xe_pci_types.h
index b63002fc0f67..9b9766a3baa3 100644
--- a/drivers/gpu/drm/xe/xe_pci_types.h
+++ b/drivers/gpu/drm/xe/xe_pci_types.h
@@ -39,6 +39,7 @@ struct xe_device_desc {
u8 has_gsc_nvm:1;
u8 has_heci_gscfi:1;
u8 has_heci_cscfi:1;
+ u8 has_late_bind:1;
u8 has_llc:1;
u8 has_mbx_power_limits:1;
u8 has_pxp:1;
diff --git a/drivers/gpu/drm/xe/xe_pm.c b/drivers/gpu/drm/xe/xe_pm.c
index 6eea4190bbd2..2c5a44377994 100644
--- a/drivers/gpu/drm/xe/xe_pm.c
+++ b/drivers/gpu/drm/xe/xe_pm.c
@@ -21,6 +21,7 @@
#include "xe_gt_idle.h"
#include "xe_i2c.h"
#include "xe_irq.h"
+#include "xe_late_bind_fw.h"
#include "xe_pcode.h"
#include "xe_pxp.h"
#include "xe_sriov_vf_ccs.h"
@@ -129,6 +130,8 @@ int xe_pm_suspend(struct xe_device *xe)
if (err)
goto err;
+ xe_late_bind_wait_for_worker_completion(&xe->late_bind);
+
for_each_gt(gt, xe, id)
xe_gt_suspend_prepare(gt);
@@ -198,7 +201,7 @@ int xe_pm_resume(struct xe_device *xe)
if (err)
goto err;
- xe_i2c_pm_resume(xe, xe->d3cold.allowed);
+ xe_i2c_pm_resume(xe, true);
xe_irq_resume(xe);
@@ -213,9 +216,11 @@ int xe_pm_resume(struct xe_device *xe)
xe_pxp_pm_resume(xe->pxp);
- if (IS_SRIOV_VF(xe))
+ if (IS_VF_CCS_READY(xe))
xe_sriov_vf_ccs_register_context(xe);
+ xe_late_bind_fw_load(&xe->late_bind);
+
drm_dbg(&xe->drm, "Device resumed\n");
return 0;
err:
@@ -598,9 +603,12 @@ int xe_pm_runtime_resume(struct xe_device *xe)
xe_pxp_pm_resume(xe->pxp);
- if (IS_SRIOV_VF(xe))
+ if (IS_VF_CCS_READY(xe))
xe_sriov_vf_ccs_register_context(xe);
+ if (xe->d3cold.allowed)
+ xe_late_bind_fw_load(&xe->late_bind);
+
out:
xe_rpm_lockmap_release(xe);
xe_pm_write_callback_task(xe, NULL);
diff --git a/drivers/gpu/drm/xe/xe_printk.h b/drivers/gpu/drm/xe/xe_printk.h
new file mode 100644
index 000000000000..c5be2385aa95
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_printk.h
@@ -0,0 +1,129 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2025 Intel Corporation
+ */
+
+#ifndef _XE_PRINTK_H_
+#define _XE_PRINTK_H_
+
+#include <drm/drm_print.h>
+
+#include "xe_device_types.h"
+
+#define __XE_PRINTK_FMT(_xe, _fmt, _args...) _fmt, ##_args
+
+#define xe_printk(_xe, _level, _fmt, ...) \
+ drm_##_level(&(_xe)->drm, __XE_PRINTK_FMT((_xe), _fmt, ## __VA_ARGS__))
+
+#define xe_err(_xe, _fmt, ...) \
+ xe_printk((_xe), err, _fmt, ##__VA_ARGS__)
+
+#define xe_err_once(_xe, _fmt, ...) \
+ xe_printk((_xe), err_once, _fmt, ##__VA_ARGS__)
+
+#define xe_err_ratelimited(_xe, _fmt, ...) \
+ xe_printk((_xe), err_ratelimited, _fmt, ##__VA_ARGS__)
+
+#define xe_warn(_xe, _fmt, ...) \
+ xe_printk((_xe), warn, _fmt, ##__VA_ARGS__)
+
+#define xe_notice(_xe, _fmt, ...) \
+ xe_printk((_xe), notice, _fmt, ##__VA_ARGS__)
+
+#define xe_info(_xe, _fmt, ...) \
+ xe_printk((_xe), info, _fmt, ##__VA_ARGS__)
+
+#define xe_dbg(_xe, _fmt, ...) \
+ xe_printk((_xe), dbg, _fmt, ##__VA_ARGS__)
+
+#define xe_WARN_type(_xe, _type, _condition, _fmt, ...) \
+ drm_WARN##_type(&(_xe)->drm, _condition, _fmt, ## __VA_ARGS__)
+
+#define xe_WARN(_xe, _condition, _fmt, ...) \
+ xe_WARN_type((_xe),, _condition, __XE_PRINTK_FMT((_xe), _fmt, ## __VA_ARGS__))
+
+#define xe_WARN_ONCE(_xe, _condition, _fmt, ...) \
+ xe_WARN_type((_xe), _ONCE, _condition, __XE_PRINTK_FMT((_xe), _fmt, ## __VA_ARGS__))
+
+#define xe_WARN_ON(_xe, _condition) \
+ xe_WARN((_xe), _condition, "%s(%s)", "WARN_ON", __stringify(_condition))
+
+#define xe_WARN_ON_ONCE(_xe, _condition) \
+ xe_WARN_ONCE((_xe), _condition, "%s(%s)", "WARN_ON_ONCE", __stringify(_condition))
+
+static inline void __xe_printfn_err(struct drm_printer *p, struct va_format *vaf)
+{
+ struct xe_device *xe = p->arg;
+
+ xe_err(xe, "%pV", vaf);
+}
+
+static inline void __xe_printfn_info(struct drm_printer *p, struct va_format *vaf)
+{
+ struct xe_device *xe = p->arg;
+
+ xe_info(xe, "%pV", vaf);
+}
+
+static inline void __xe_printfn_dbg(struct drm_printer *p, struct va_format *vaf)
+{
+ struct xe_device *xe = p->arg;
+ struct drm_printer ddp;
+
+ /*
+ * The original xe_dbg() callsite annotations are useless here,
+ * redirect to the tweaked drm_dbg_printer() instead.
+ */
+ ddp = drm_dbg_printer(&xe->drm, DRM_UT_DRIVER, NULL);
+ ddp.origin = p->origin;
+
+ drm_printf(&ddp, __XE_PRINTK_FMT(xe, "%pV", vaf));
+}
+
+/**
+ * xe_err_printer - Construct a &drm_printer that outputs to xe_err()
+ * @xe: the &xe_device pointer to use in xe_err()
+ *
+ * Return: The &drm_printer object.
+ */
+static inline struct drm_printer xe_err_printer(struct xe_device *xe)
+{
+ struct drm_printer p = {
+ .printfn = __xe_printfn_err,
+ .arg = xe,
+ };
+ return p;
+}
+
+/**
+ * xe_info_printer - Construct a &drm_printer that outputs to xe_info()
+ * @xe: the &xe_device pointer to use in xe_info()
+ *
+ * Return: The &drm_printer object.
+ */
+static inline struct drm_printer xe_info_printer(struct xe_device *xe)
+{
+ struct drm_printer p = {
+ .printfn = __xe_printfn_info,
+ .arg = xe,
+ };
+ return p;
+}
+
+/**
+ * xe_dbg_printer - Construct a &drm_printer that outputs like xe_dbg()
+ * @xe: the &xe_device pointer to use in xe_dbg()
+ *
+ * Return: The &drm_printer object.
+ */
+static inline struct drm_printer xe_dbg_printer(struct xe_device *xe)
+{
+ struct drm_printer p = {
+ .printfn = __xe_printfn_dbg,
+ .arg = xe,
+ .origin = (const void *)_THIS_IP_,
+ };
+ return p;
+}
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_psmi.c b/drivers/gpu/drm/xe/xe_psmi.c
index a2c9ff5bfd59..45d142191d60 100644
--- a/drivers/gpu/drm/xe/xe_psmi.c
+++ b/drivers/gpu/drm/xe/xe_psmi.c
@@ -68,9 +68,7 @@ static void psmi_cleanup(struct xe_device *xe)
static struct xe_bo *psmi_alloc_object(struct xe_device *xe,
unsigned int id, size_t bo_size)
{
- struct xe_bo *bo = NULL;
struct xe_tile *tile;
- int err;
if (!id || !bo_size)
return NULL;
@@ -78,22 +76,12 @@ static struct xe_bo *psmi_alloc_object(struct xe_device *xe,
tile = &xe->tiles[id - 1];
/* VRAM: Allocate GEM object for the capture buffer */
- bo = xe_bo_create_locked(xe, tile, NULL, bo_size,
- ttm_bo_type_kernel,
- XE_BO_FLAG_VRAM_IF_DGFX(tile) |
- XE_BO_FLAG_PINNED |
- XE_BO_FLAG_PINNED_LATE_RESTORE |
- XE_BO_FLAG_NEEDS_CPU_ACCESS);
-
- if (!IS_ERR(bo)) {
- /* Buffer written by HW, ensure stays resident */
- err = xe_bo_pin(bo);
- if (err)
- bo = ERR_PTR(err);
- xe_bo_unlock(bo);
- }
-
- return bo;
+ return xe_bo_create_pin_range_novm(xe, tile, bo_size, 0, ~0ull,
+ ttm_bo_type_kernel,
+ XE_BO_FLAG_VRAM_IF_DGFX(tile) |
+ XE_BO_FLAG_PINNED |
+ XE_BO_FLAG_PINNED_LATE_RESTORE |
+ XE_BO_FLAG_NEEDS_CPU_ACCESS);
}
/*
diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c
index c129048a9a09..a1c88f9a6c76 100644
--- a/drivers/gpu/drm/xe/xe_pt.c
+++ b/drivers/gpu/drm/xe/xe_pt.c
@@ -13,17 +13,17 @@
#include "xe_drm_client.h"
#include "xe_exec_queue.h"
#include "xe_gt.h"
-#include "xe_tlb_inval_job.h"
#include "xe_migrate.h"
#include "xe_pt_types.h"
#include "xe_pt_walk.h"
#include "xe_res_cursor.h"
#include "xe_sched_job.h"
-#include "xe_sync.h"
#include "xe_svm.h"
+#include "xe_sync.h"
#include "xe_tlb_inval_job.h"
#include "xe_trace.h"
#include "xe_ttm_stolen_mgr.h"
+#include "xe_userptr.h"
#include "xe_vm.h"
struct xe_pt_dir {
@@ -89,6 +89,7 @@ static void xe_pt_free(struct xe_pt *pt)
* @vm: The vm to create for.
* @tile: The tile to create for.
* @level: The page-table level.
+ * @exec: The drm_exec object used to lock the vm.
*
* Allocate and initialize a single struct xe_pt metadata structure. Also
* create the corresponding page-table bo, but don't initialize it. If the
@@ -100,7 +101,7 @@ static void xe_pt_free(struct xe_pt *pt)
* error.
*/
struct xe_pt *xe_pt_create(struct xe_vm *vm, struct xe_tile *tile,
- unsigned int level)
+ unsigned int level, struct drm_exec *exec)
{
struct xe_pt *pt;
struct xe_bo *bo;
@@ -124,9 +125,11 @@ struct xe_pt *xe_pt_create(struct xe_vm *vm, struct xe_tile *tile,
bo_flags |= XE_BO_FLAG_PINNED_LATE_RESTORE;
pt->level = level;
+
+ drm_WARN_ON(&vm->xe->drm, IS_ERR_OR_NULL(exec));
bo = xe_bo_create_pin_map(vm->xe, tile, vm, SZ_4K,
ttm_bo_type_kernel,
- bo_flags);
+ bo_flags, exec);
if (IS_ERR(bo)) {
err = PTR_ERR(bo);
goto err_kfree;
@@ -590,7 +593,8 @@ xe_pt_stage_bind_entry(struct xe_ptw *parent, pgoff_t offset,
if (covers || !*child) {
u64 flags = 0;
- xe_child = xe_pt_create(xe_walk->vm, xe_walk->tile, level - 1);
+ xe_child = xe_pt_create(xe_walk->vm, xe_walk->tile, level - 1,
+ xe_vm_validation_exec(vm));
if (IS_ERR(xe_child))
return PTR_ERR(xe_child);
@@ -729,7 +733,7 @@ xe_pt_stage_bind(struct xe_tile *tile, struct xe_vma *vma,
return -EAGAIN;
}
if (xe_svm_range_has_dma_mapping(range)) {
- xe_res_first_dma(range->base.dma_addr, 0,
+ xe_res_first_dma(range->base.pages.dma_addr, 0,
range->base.itree.last + 1 - range->base.itree.start,
&curs);
xe_svm_range_debug(range, "BIND PREPARE - MIXED");
@@ -760,8 +764,8 @@ xe_pt_stage_bind(struct xe_tile *tile, struct xe_vma *vma,
if (!xe_vma_is_null(vma) && !range) {
if (xe_vma_is_userptr(vma))
- xe_res_first_sg(to_userptr_vma(vma)->userptr.sg, 0,
- xe_vma_size(vma), &curs);
+ xe_res_first_dma(to_userptr_vma(vma)->userptr.pages.dma_addr, 0,
+ xe_vma_size(vma), &curs);
else if (xe_bo_is_vram(bo) || xe_bo_is_stolen(bo))
xe_res_first(bo->ttm.resource, xe_vma_bo_offset(vma),
xe_vma_size(vma), &curs);
@@ -914,7 +918,7 @@ bool xe_pt_zap_ptes(struct xe_tile *tile, struct xe_vma *vma)
if (xe_vma_bo(vma))
xe_bo_assert_held(xe_vma_bo(vma));
else if (xe_vma_is_userptr(vma))
- lockdep_assert_held(&xe_vma_vm(vma)->userptr.notifier_lock);
+ lockdep_assert_held(&xe_vma_vm(vma)->svm.gpusvm.notifier_lock);
if (!(pt_mask & BIT(tile->id)))
return false;
@@ -1049,7 +1053,7 @@ static void xe_pt_commit_locks_assert(struct xe_vma *vma)
xe_pt_commit_prepare_locks_assert(vma);
if (xe_vma_is_userptr(vma))
- lockdep_assert_held_read(&vm->userptr.notifier_lock);
+ xe_svm_assert_held_read(vm);
}
static void xe_pt_commit(struct xe_vma *vma,
@@ -1376,6 +1380,7 @@ static int xe_pt_pre_commit(struct xe_migrate_pt_update *pt_update)
pt_update_ops, rftree);
}
+#if IS_ENABLED(CONFIG_DRM_GPUSVM)
#ifdef CONFIG_DRM_XE_USERPTR_INVAL_INJECT
static bool xe_pt_userptr_inject_eagain(struct xe_userptr_vma *uvma)
@@ -1406,7 +1411,7 @@ static int vma_check_userptr(struct xe_vm *vm, struct xe_vma *vma,
struct xe_userptr_vma *uvma;
unsigned long notifier_seq;
- lockdep_assert_held_read(&vm->userptr.notifier_lock);
+ xe_svm_assert_held_read(vm);
if (!xe_vma_is_userptr(vma))
return 0;
@@ -1415,7 +1420,7 @@ static int vma_check_userptr(struct xe_vm *vm, struct xe_vma *vma,
if (xe_pt_userptr_inject_eagain(uvma))
xe_vma_userptr_force_invalidate(uvma);
- notifier_seq = uvma->userptr.notifier_seq;
+ notifier_seq = uvma->userptr.pages.notifier_seq;
if (!mmu_interval_read_retry(&uvma->userptr.notifier,
notifier_seq))
@@ -1431,12 +1436,12 @@ static int vma_check_userptr(struct xe_vm *vm, struct xe_vma *vma,
return 0;
}
-static int op_check_userptr(struct xe_vm *vm, struct xe_vma_op *op,
- struct xe_vm_pgtable_update_ops *pt_update)
+static int op_check_svm_userptr(struct xe_vm *vm, struct xe_vma_op *op,
+ struct xe_vm_pgtable_update_ops *pt_update)
{
int err = 0;
- lockdep_assert_held_read(&vm->userptr.notifier_lock);
+ xe_svm_assert_held_read(vm);
switch (op->base.op) {
case DRM_GPUVA_OP_MAP:
@@ -1454,9 +1459,40 @@ static int op_check_userptr(struct xe_vm *vm, struct xe_vma_op *op,
case DRM_GPUVA_OP_UNMAP:
break;
case DRM_GPUVA_OP_PREFETCH:
- err = vma_check_userptr(vm, gpuva_to_vma(op->base.prefetch.va),
- pt_update);
+ if (xe_vma_is_cpu_addr_mirror(gpuva_to_vma(op->base.prefetch.va))) {
+ struct xe_svm_range *range = op->map_range.range;
+ unsigned long i;
+
+ xe_assert(vm->xe,
+ xe_vma_is_cpu_addr_mirror(gpuva_to_vma(op->base.prefetch.va)));
+ xa_for_each(&op->prefetch_range.range, i, range) {
+ xe_svm_range_debug(range, "PRE-COMMIT");
+
+ if (!xe_svm_range_pages_valid(range)) {
+ xe_svm_range_debug(range, "PRE-COMMIT - RETRY");
+ return -ENODATA;
+ }
+ }
+ } else {
+ err = vma_check_userptr(vm, gpuva_to_vma(op->base.prefetch.va), pt_update);
+ }
+ break;
+#if IS_ENABLED(CONFIG_DRM_XE_GPUSVM)
+ case DRM_GPUVA_OP_DRIVER:
+ if (op->subop == XE_VMA_SUBOP_MAP_RANGE) {
+ struct xe_svm_range *range = op->map_range.range;
+
+ xe_assert(vm->xe, xe_vma_is_cpu_addr_mirror(op->map_range.vma));
+
+ xe_svm_range_debug(range, "PRE-COMMIT");
+
+ if (!xe_svm_range_pages_valid(range)) {
+ xe_svm_range_debug(range, "PRE-COMMIT - RETRY");
+ return -EAGAIN;
+ }
+ }
break;
+#endif
default:
drm_warn(&vm->xe->drm, "NOT POSSIBLE");
}
@@ -1464,7 +1500,7 @@ static int op_check_userptr(struct xe_vm *vm, struct xe_vma_op *op,
return err;
}
-static int xe_pt_userptr_pre_commit(struct xe_migrate_pt_update *pt_update)
+static int xe_pt_svm_userptr_pre_commit(struct xe_migrate_pt_update *pt_update)
{
struct xe_vm *vm = pt_update->vops->vm;
struct xe_vma_ops *vops = pt_update->vops;
@@ -1477,69 +1513,18 @@ static int xe_pt_userptr_pre_commit(struct xe_migrate_pt_update *pt_update)
if (err)
return err;
- down_read(&vm->userptr.notifier_lock);
+ xe_svm_notifier_lock(vm);
list_for_each_entry(op, &vops->list, link) {
- err = op_check_userptr(vm, op, pt_update_ops);
+ err = op_check_svm_userptr(vm, op, pt_update_ops);
if (err) {
- up_read(&vm->userptr.notifier_lock);
+ xe_svm_notifier_unlock(vm);
break;
}
}
return err;
}
-
-#if IS_ENABLED(CONFIG_DRM_XE_GPUSVM)
-static int xe_pt_svm_pre_commit(struct xe_migrate_pt_update *pt_update)
-{
- struct xe_vm *vm = pt_update->vops->vm;
- struct xe_vma_ops *vops = pt_update->vops;
- struct xe_vma_op *op;
- unsigned long i;
- int err;
-
- err = xe_pt_pre_commit(pt_update);
- if (err)
- return err;
-
- xe_svm_notifier_lock(vm);
-
- list_for_each_entry(op, &vops->list, link) {
- struct xe_svm_range *range = NULL;
-
- if (op->subop == XE_VMA_SUBOP_UNMAP_RANGE)
- continue;
-
- if (op->base.op == DRM_GPUVA_OP_PREFETCH) {
- xe_assert(vm->xe,
- xe_vma_is_cpu_addr_mirror(gpuva_to_vma(op->base.prefetch.va)));
- xa_for_each(&op->prefetch_range.range, i, range) {
- xe_svm_range_debug(range, "PRE-COMMIT");
-
- if (!xe_svm_range_pages_valid(range)) {
- xe_svm_range_debug(range, "PRE-COMMIT - RETRY");
- xe_svm_notifier_unlock(vm);
- return -ENODATA;
- }
- }
- } else {
- xe_assert(vm->xe, xe_vma_is_cpu_addr_mirror(op->map_range.vma));
- xe_assert(vm->xe, op->subop == XE_VMA_SUBOP_MAP_RANGE);
- range = op->map_range.range;
-
- xe_svm_range_debug(range, "PRE-COMMIT");
-
- if (!xe_svm_range_pages_valid(range)) {
- xe_svm_range_debug(range, "PRE-COMMIT - RETRY");
- xe_svm_notifier_unlock(vm);
- return -EAGAIN;
- }
- }
- }
-
- return 0;
-}
#endif
struct xe_pt_stage_unbind_walk {
@@ -1843,7 +1828,7 @@ static int bind_op_prepare(struct xe_vm *vm, struct xe_tile *tile,
xe_vma_start(vma),
xe_vma_end(vma));
++pt_update_ops->current_op;
- pt_update_ops->needs_userptr_lock |= xe_vma_is_userptr(vma);
+ pt_update_ops->needs_svm_lock |= xe_vma_is_userptr(vma);
/*
* If rebind, we have to invalidate TLB on !LR vms to invalidate
@@ -1951,7 +1936,7 @@ static int unbind_op_prepare(struct xe_tile *tile,
xe_pt_update_ops_rfence_interval(pt_update_ops, xe_vma_start(vma),
xe_vma_end(vma));
++pt_update_ops->current_op;
- pt_update_ops->needs_userptr_lock |= xe_vma_is_userptr(vma);
+ pt_update_ops->needs_svm_lock |= xe_vma_is_userptr(vma);
pt_update_ops->needs_invalidation = true;
xe_pt_commit_prepare_unbind(vma, pt_op->entries, pt_op->num_entries);
@@ -2199,7 +2184,7 @@ static void bind_op_commit(struct xe_vm *vm, struct xe_tile *tile,
vma->tile_invalidated & ~BIT(tile->id));
vma->tile_staged &= ~BIT(tile->id);
if (xe_vma_is_userptr(vma)) {
- lockdep_assert_held_read(&vm->userptr.notifier_lock);
+ xe_svm_assert_held_read(vm);
to_userptr_vma(vma)->userptr.initial_bind = true;
}
@@ -2235,7 +2220,7 @@ static void unbind_op_commit(struct xe_vm *vm, struct xe_tile *tile,
if (!vma->tile_present) {
list_del_init(&vma->combined_links.rebind);
if (xe_vma_is_userptr(vma)) {
- lockdep_assert_held_read(&vm->userptr.notifier_lock);
+ xe_svm_assert_held_read(vm);
spin_lock(&vm->userptr.invalidated_lock);
list_del_init(&to_userptr_vma(vma)->userptr.invalidate_link);
@@ -2338,20 +2323,14 @@ static const struct xe_migrate_pt_update_ops migrate_ops = {
.pre_commit = xe_pt_pre_commit,
};
-static const struct xe_migrate_pt_update_ops userptr_migrate_ops = {
- .populate = xe_vm_populate_pgtable,
- .clear = xe_migrate_clear_pgtable_callback,
- .pre_commit = xe_pt_userptr_pre_commit,
-};
-
-#if IS_ENABLED(CONFIG_DRM_XE_GPUSVM)
-static const struct xe_migrate_pt_update_ops svm_migrate_ops = {
+#if IS_ENABLED(CONFIG_DRM_GPUSVM)
+static const struct xe_migrate_pt_update_ops svm_userptr_migrate_ops = {
.populate = xe_vm_populate_pgtable,
.clear = xe_migrate_clear_pgtable_callback,
- .pre_commit = xe_pt_svm_pre_commit,
+ .pre_commit = xe_pt_svm_userptr_pre_commit,
};
#else
-static const struct xe_migrate_pt_update_ops svm_migrate_ops;
+static const struct xe_migrate_pt_update_ops svm_userptr_migrate_ops;
#endif
static struct xe_dep_scheduler *to_dep_scheduler(struct xe_exec_queue *q,
@@ -2389,9 +2368,7 @@ xe_pt_update_ops_run(struct xe_tile *tile, struct xe_vma_ops *vops)
int err = 0, i;
struct xe_migrate_pt_update update = {
.ops = pt_update_ops->needs_svm_lock ?
- &svm_migrate_ops :
- pt_update_ops->needs_userptr_lock ?
- &userptr_migrate_ops :
+ &svm_userptr_migrate_ops :
&migrate_ops,
.vops = vops,
.tile_id = tile->id,
@@ -2533,8 +2510,6 @@ xe_pt_update_ops_run(struct xe_tile *tile, struct xe_vma_ops *vops)
if (pt_update_ops->needs_svm_lock)
xe_svm_notifier_unlock(vm);
- if (pt_update_ops->needs_userptr_lock)
- up_read(&vm->userptr.notifier_lock);
xe_tlb_inval_job_put(mjob);
xe_tlb_inval_job_put(ijob);
diff --git a/drivers/gpu/drm/xe/xe_pt.h b/drivers/gpu/drm/xe/xe_pt.h
index 5ecf003d513c..4daeebaab5a1 100644
--- a/drivers/gpu/drm/xe/xe_pt.h
+++ b/drivers/gpu/drm/xe/xe_pt.h
@@ -10,6 +10,7 @@
#include "xe_pt_types.h"
struct dma_fence;
+struct drm_exec;
struct xe_bo;
struct xe_device;
struct xe_exec_queue;
@@ -29,7 +30,7 @@ struct xe_vma_ops;
unsigned int xe_pt_shift(unsigned int level);
struct xe_pt *xe_pt_create(struct xe_vm *vm, struct xe_tile *tile,
- unsigned int level);
+ unsigned int level, struct drm_exec *exec);
void xe_pt_populate_empty(struct xe_tile *tile, struct xe_vm *vm,
struct xe_pt *pt);
diff --git a/drivers/gpu/drm/xe/xe_pt_types.h b/drivers/gpu/drm/xe/xe_pt_types.h
index 17cdd7c7e9f5..881f01e14db8 100644
--- a/drivers/gpu/drm/xe/xe_pt_types.h
+++ b/drivers/gpu/drm/xe/xe_pt_types.h
@@ -105,8 +105,6 @@ struct xe_vm_pgtable_update_ops {
u32 current_op;
/** @needs_svm_lock: Needs SVM lock */
bool needs_svm_lock;
- /** @needs_userptr_lock: Needs userptr lock */
- bool needs_userptr_lock;
/** @needs_invalidation: Needs invalidation */
bool needs_invalidation;
/**
diff --git a/drivers/gpu/drm/xe/xe_pxp.c b/drivers/gpu/drm/xe/xe_pxp.c
index 3d62008c99f1..bdbdbbf6a678 100644
--- a/drivers/gpu/drm/xe/xe_pxp.c
+++ b/drivers/gpu/drm/xe/xe_pxp.c
@@ -688,6 +688,7 @@ start:
return ret;
}
+ALLOW_ERROR_INJECTION(xe_pxp_exec_queue_add, ERRNO);
static void __pxp_exec_queue_remove(struct xe_pxp *pxp, struct xe_exec_queue *q, bool lock)
{
diff --git a/drivers/gpu/drm/xe/xe_pxp_submit.c b/drivers/gpu/drm/xe/xe_pxp_submit.c
index ca95f2a4d4ef..e60526e30030 100644
--- a/drivers/gpu/drm/xe/xe_pxp_submit.c
+++ b/drivers/gpu/drm/xe/xe_pxp_submit.c
@@ -54,8 +54,9 @@ static int allocate_vcs_execution_resources(struct xe_pxp *pxp)
* Each termination is 16 DWORDS, so 4K is enough to contain a
* termination for each sessions.
*/
- bo = xe_bo_create_pin_map(xe, tile, NULL, SZ_4K, ttm_bo_type_kernel,
- XE_BO_FLAG_SYSTEM | XE_BO_FLAG_PINNED | XE_BO_FLAG_GGTT);
+ bo = xe_bo_create_pin_map_novm(xe, tile, SZ_4K, ttm_bo_type_kernel,
+ XE_BO_FLAG_SYSTEM | XE_BO_FLAG_PINNED | XE_BO_FLAG_GGTT,
+ false);
if (IS_ERR(bo)) {
err = PTR_ERR(bo);
goto out_queue;
@@ -87,7 +88,9 @@ static int allocate_gsc_client_resources(struct xe_gt *gt,
{
struct xe_tile *tile = gt_to_tile(gt);
struct xe_device *xe = tile_to_xe(tile);
+ struct xe_validation_ctx ctx;
struct xe_hw_engine *hwe;
+ struct drm_exec exec;
struct xe_vm *vm;
struct xe_bo *bo;
struct xe_exec_queue *q;
@@ -106,15 +109,26 @@ static int allocate_gsc_client_resources(struct xe_gt *gt,
return PTR_ERR(vm);
/* We allocate a single object for the batch and the in/out memory */
- xe_vm_lock(vm, false);
- bo = xe_bo_create_pin_map(xe, tile, vm, PXP_BB_SIZE + inout_size * 2,
- ttm_bo_type_kernel,
- XE_BO_FLAG_SYSTEM | XE_BO_FLAG_PINNED | XE_BO_FLAG_NEEDS_UC);
- xe_vm_unlock(vm);
- if (IS_ERR(bo)) {
- err = PTR_ERR(bo);
- goto vm_out;
+
+ xe_validation_guard(&ctx, &xe->val, &exec, (struct xe_val_flags){}, err) {
+ err = xe_vm_drm_exec_lock(vm, &exec);
+ drm_exec_retry_on_contention(&exec);
+ if (err)
+ break;
+
+ bo = xe_bo_create_pin_map(xe, tile, vm, PXP_BB_SIZE + inout_size * 2,
+ ttm_bo_type_kernel,
+ XE_BO_FLAG_SYSTEM | XE_BO_FLAG_PINNED |
+ XE_BO_FLAG_NEEDS_UC, &exec);
+ drm_exec_retry_on_contention(&exec);
+ if (IS_ERR(bo)) {
+ err = PTR_ERR(bo);
+ xe_validation_retry_on_oom(&ctx, &err);
+ break;
+ }
}
+ if (err)
+ goto vm_out;
fence = xe_vm_bind_kernel_bo(vm, bo, NULL, 0, XE_CACHE_WB);
if (IS_ERR(fence)) {
diff --git a/drivers/gpu/drm/xe/xe_query.c b/drivers/gpu/drm/xe/xe_query.c
index 4dbe5732cb7f..2e9ff33ed2fe 100644
--- a/drivers/gpu/drm/xe/xe_query.c
+++ b/drivers/gpu/drm/xe/xe_query.c
@@ -21,6 +21,7 @@
#include "xe_force_wake.h"
#include "xe_ggtt.h"
#include "xe_gt.h"
+#include "xe_gt_topology.h"
#include "xe_guc_hwconfig.h"
#include "xe_macros.h"
#include "xe_mmio.h"
@@ -275,8 +276,7 @@ static int query_mem_regions(struct xe_device *xe,
mem_regions->mem_regions[0].instance = 0;
mem_regions->mem_regions[0].min_page_size = PAGE_SIZE;
mem_regions->mem_regions[0].total_size = man->size << PAGE_SHIFT;
- if (perfmon_capable())
- mem_regions->mem_regions[0].used = ttm_resource_manager_usage(man);
+ mem_regions->mem_regions[0].used = ttm_resource_manager_usage(man);
mem_regions->num_mem_regions = 1;
for (i = XE_PL_VRAM0; i <= XE_PL_VRAM1; ++i) {
@@ -292,13 +292,11 @@ static int query_mem_regions(struct xe_device *xe,
mem_regions->mem_regions[mem_regions->num_mem_regions].total_size =
man->size;
- if (perfmon_capable()) {
- xe_ttm_vram_get_used(man,
- &mem_regions->mem_regions
- [mem_regions->num_mem_regions].used,
- &mem_regions->mem_regions
- [mem_regions->num_mem_regions].cpu_visible_used);
- }
+ xe_ttm_vram_get_used(man,
+ &mem_regions->mem_regions
+ [mem_regions->num_mem_regions].used,
+ &mem_regions->mem_regions
+ [mem_regions->num_mem_regions].cpu_visible_used);
mem_regions->mem_regions[mem_regions->num_mem_regions].cpu_visible_size =
xe_ttm_vram_get_cpu_visible_size(man);
@@ -477,7 +475,7 @@ static size_t calc_topo_query_size(struct xe_device *xe)
sizeof_field(struct xe_gt, fuse_topo.eu_mask_per_dss);
/* L3bank mask may not be available for some GTs */
- if (!XE_GT_WA(gt, no_media_l3))
+ if (xe_gt_topology_report_l3(gt))
query_size += sizeof(struct drm_xe_query_topology_mask) +
sizeof_field(struct xe_gt, fuse_topo.l3_bank_mask);
}
@@ -540,7 +538,7 @@ static int query_gt_topology(struct xe_device *xe,
* mask, then it's better to omit L3 from the query rather than
* reporting bogus or zeroed information to userspace.
*/
- if (!XE_GT_WA(gt, no_media_l3)) {
+ if (xe_gt_topology_report_l3(gt)) {
topo.type = DRM_XE_TOPO_L3_BANK;
err = copy_mask(&query_ptr, &topo, gt->fuse_topo.l3_bank_mask,
sizeof(gt->fuse_topo.l3_bank_mask));
diff --git a/drivers/gpu/drm/xe/xe_rtp.c b/drivers/gpu/drm/xe/xe_rtp.c
index 47ea1521dc80..b5f430d59f80 100644
--- a/drivers/gpu/drm/xe/xe_rtp.c
+++ b/drivers/gpu/drm/xe/xe_rtp.c
@@ -370,3 +370,9 @@ bool xe_rtp_match_psmi_enabled(const struct xe_gt *gt,
{
return xe_configfs_get_psmi_enabled(to_pci_dev(gt_to_xe(gt)->drm.dev));
}
+
+bool xe_rtp_match_gt_has_discontiguous_dss_groups(const struct xe_gt *gt,
+ const struct xe_hw_engine *hwe)
+{
+ return xe_gt_has_discontiguous_dss_groups(gt);
+}
diff --git a/drivers/gpu/drm/xe/xe_rtp.h b/drivers/gpu/drm/xe/xe_rtp.h
index 7951fefdbe04..ac12ddf6cde6 100644
--- a/drivers/gpu/drm/xe/xe_rtp.h
+++ b/drivers/gpu/drm/xe/xe_rtp.h
@@ -480,4 +480,7 @@ bool xe_rtp_match_not_sriov_vf(const struct xe_gt *gt,
bool xe_rtp_match_psmi_enabled(const struct xe_gt *gt,
const struct xe_hw_engine *hwe);
+bool xe_rtp_match_gt_has_discontiguous_dss_groups(const struct xe_gt *gt,
+ const struct xe_hw_engine *hwe);
+
#endif
diff --git a/drivers/gpu/drm/xe/xe_sriov.c b/drivers/gpu/drm/xe/xe_sriov.c
index 87911fb4eea7..7d2d6de2aabf 100644
--- a/drivers/gpu/drm/xe/xe_sriov.c
+++ b/drivers/gpu/drm/xe/xe_sriov.c
@@ -160,19 +160,15 @@ const char *xe_sriov_function_name(unsigned int n, char *buf, size_t size)
}
/**
- * xe_sriov_late_init() - SR-IOV late initialization functions.
+ * xe_sriov_init_late() - SR-IOV late initialization functions.
* @xe: the &xe_device to initialize
*
- * On VF this function will initialize code for CCS migration.
- *
* Return: 0 on success or a negative error code on failure.
*/
-int xe_sriov_late_init(struct xe_device *xe)
+int xe_sriov_init_late(struct xe_device *xe)
{
- int err = 0;
-
- if (IS_VF_CCS_INIT_NEEDED(xe))
- err = xe_sriov_vf_ccs_init(xe);
+ if (IS_SRIOV_VF(xe))
+ return xe_sriov_vf_init_late(xe);
- return err;
+ return 0;
}
diff --git a/drivers/gpu/drm/xe/xe_sriov.h b/drivers/gpu/drm/xe/xe_sriov.h
index 0e0c1abf2d14..6db45df55615 100644
--- a/drivers/gpu/drm/xe/xe_sriov.h
+++ b/drivers/gpu/drm/xe/xe_sriov.h
@@ -18,7 +18,7 @@ const char *xe_sriov_function_name(unsigned int n, char *buf, size_t len);
void xe_sriov_probe_early(struct xe_device *xe);
void xe_sriov_print_info(struct xe_device *xe, struct drm_printer *p);
int xe_sriov_init(struct xe_device *xe);
-int xe_sriov_late_init(struct xe_device *xe);
+int xe_sriov_init_late(struct xe_device *xe);
static inline enum xe_sriov_mode xe_device_sriov_mode(const struct xe_device *xe)
{
diff --git a/drivers/gpu/drm/xe/xe_sriov_vf.c b/drivers/gpu/drm/xe/xe_sriov_vf.c
index 5de81f213d83..cdd9f8e78b2a 100644
--- a/drivers/gpu/drm/xe/xe_sriov_vf.c
+++ b/drivers/gpu/drm/xe/xe_sriov_vf.c
@@ -3,6 +3,7 @@
* Copyright © 2023-2024 Intel Corporation
*/
+#include <drm/drm_debugfs.h>
#include <drm/drm_managed.h>
#include "xe_assert.h"
@@ -10,6 +11,7 @@
#include "xe_gt.h"
#include "xe_gt_sriov_printk.h"
#include "xe_gt_sriov_vf.h"
+#include "xe_guc.h"
#include "xe_guc_ct.h"
#include "xe_guc_submit.h"
#include "xe_irq.h"
@@ -18,6 +20,7 @@
#include "xe_sriov.h"
#include "xe_sriov_printk.h"
#include "xe_sriov_vf.h"
+#include "xe_sriov_vf_ccs.h"
#include "xe_tile_sriov_vf.h"
/**
@@ -127,16 +130,66 @@
* | | |
*/
-static bool vf_migration_supported(struct xe_device *xe)
+/**
+ * xe_sriov_vf_migration_supported - Report whether SR-IOV VF migration is
+ * supported or not.
+ * @xe: the &xe_device to check
+ *
+ * Returns: true if VF migration is supported, false otherwise.
+ */
+bool xe_sriov_vf_migration_supported(struct xe_device *xe)
+{
+ xe_assert(xe, IS_SRIOV_VF(xe));
+ return xe->sriov.vf.migration.enabled;
+}
+
+static void vf_disable_migration(struct xe_device *xe, const char *fmt, ...)
+{
+ struct va_format vaf;
+ va_list va_args;
+
+ xe_assert(xe, IS_SRIOV_VF(xe));
+
+ va_start(va_args, fmt);
+ vaf.fmt = fmt;
+ vaf.va = &va_args;
+ xe_sriov_notice(xe, "migration disabled: %pV\n", &vaf);
+ va_end(va_args);
+
+ xe->sriov.vf.migration.enabled = false;
+}
+
+static void migration_worker_func(struct work_struct *w);
+
+static void vf_migration_init_early(struct xe_device *xe)
{
/*
* TODO: Add conditions to allow specific platforms, when they're
* supported at production quality.
*/
- return IS_ENABLED(CONFIG_DRM_XE_DEBUG);
-}
+ if (!IS_ENABLED(CONFIG_DRM_XE_DEBUG))
+ return vf_disable_migration(xe,
+ "experimental feature not available on production builds");
+
+ if (GRAPHICS_VER(xe) < 20)
+ return vf_disable_migration(xe, "requires gfx version >= 20, but only %u found",
+ GRAPHICS_VER(xe));
+
+ if (!IS_DGFX(xe)) {
+ struct xe_uc_fw_version guc_version;
+
+ xe_gt_sriov_vf_guc_versions(xe_device_get_gt(xe, 0), NULL, &guc_version);
+ if (MAKE_GUC_VER_STRUCT(guc_version) < MAKE_GUC_VER(1, 23, 0))
+ return vf_disable_migration(xe,
+ "CCS migration requires GuC ABI >= 1.23 but only %u.%u found",
+ guc_version.major, guc_version.minor);
+ }
-static void migration_worker_func(struct work_struct *w);
+ INIT_WORK(&xe->sriov.vf.migration.worker, migration_worker_func);
+
+ xe->sriov.vf.migration.enabled = true;
+ xe_sriov_dbg(xe, "migration support enabled\n");
+}
/**
* xe_sriov_vf_init_early - Initialize SR-IOV VF specific data.
@@ -144,10 +197,7 @@ static void migration_worker_func(struct work_struct *w);
*/
void xe_sriov_vf_init_early(struct xe_device *xe)
{
- INIT_WORK(&xe->sriov.vf.migration.worker, migration_worker_func);
-
- if (!vf_migration_supported(xe))
- xe_sriov_info(xe, "migration not supported by this module version\n");
+ vf_migration_init_early(xe);
}
/**
@@ -302,8 +352,8 @@ static void vf_post_migration_recovery(struct xe_device *xe)
xe_pm_runtime_get(xe);
vf_post_migration_shutdown(xe);
- if (!vf_migration_supported(xe)) {
- xe_sriov_err(xe, "migration not supported by this module version\n");
+ if (!xe_sriov_vf_migration_supported(xe)) {
+ xe_sriov_err(xe, "migration is not supported\n");
err = -ENOTRECOVERABLE;
goto fail;
}
@@ -378,3 +428,48 @@ void xe_sriov_vf_start_migration_recovery(struct xe_device *xe)
drm_info(&xe->drm, "VF migration recovery %s\n", started ?
"scheduled" : "already in progress");
}
+
+/**
+ * xe_sriov_vf_init_late() - SR-IOV VF late initialization functions.
+ * @xe: the &xe_device to initialize
+ *
+ * This function initializes code for CCS migration.
+ *
+ * Return: 0 on success or a negative error code on failure.
+ */
+int xe_sriov_vf_init_late(struct xe_device *xe)
+{
+ int err = 0;
+
+ if (xe_sriov_vf_migration_supported(xe))
+ err = xe_sriov_vf_ccs_init(xe);
+
+ return err;
+}
+
+static int sa_info_vf_ccs(struct seq_file *m, void *data)
+{
+ struct drm_info_node *node = m->private;
+ struct xe_device *xe = to_xe_device(node->minor->dev);
+ struct drm_printer p = drm_seq_file_printer(m);
+
+ xe_sriov_vf_ccs_print(xe, &p);
+ return 0;
+}
+
+static const struct drm_info_list debugfs_list[] = {
+ { .name = "sa_info_vf_ccs", .show = sa_info_vf_ccs },
+};
+
+/**
+ * xe_sriov_vf_debugfs_register - Register VF debugfs attributes.
+ * @xe: the &xe_device
+ * @root: the root &dentry
+ *
+ * Prepare debugfs attributes exposed by the VF.
+ */
+void xe_sriov_vf_debugfs_register(struct xe_device *xe, struct dentry *root)
+{
+ drm_debugfs_create_files(debugfs_list, ARRAY_SIZE(debugfs_list),
+ root, xe->drm.primary);
+}
diff --git a/drivers/gpu/drm/xe/xe_sriov_vf.h b/drivers/gpu/drm/xe/xe_sriov_vf.h
index 7b8622cff2b7..9e752105ec2a 100644
--- a/drivers/gpu/drm/xe/xe_sriov_vf.h
+++ b/drivers/gpu/drm/xe/xe_sriov_vf.h
@@ -6,9 +6,15 @@
#ifndef _XE_SRIOV_VF_H_
#define _XE_SRIOV_VF_H_
+#include <linux/types.h>
+
+struct dentry;
struct xe_device;
void xe_sriov_vf_init_early(struct xe_device *xe);
+int xe_sriov_vf_init_late(struct xe_device *xe);
void xe_sriov_vf_start_migration_recovery(struct xe_device *xe);
+bool xe_sriov_vf_migration_supported(struct xe_device *xe);
+void xe_sriov_vf_debugfs_register(struct xe_device *xe, struct dentry *root);
#endif
diff --git a/drivers/gpu/drm/xe/xe_sriov_vf_ccs.c b/drivers/gpu/drm/xe/xe_sriov_vf_ccs.c
index 4872e43eb440..8dec616c37c9 100644
--- a/drivers/gpu/drm/xe/xe_sriov_vf_ccs.c
+++ b/drivers/gpu/drm/xe/xe_sriov_vf_ccs.c
@@ -13,8 +13,10 @@
#include "xe_guc_submit.h"
#include "xe_lrc.h"
#include "xe_migrate.h"
+#include "xe_pm.h"
#include "xe_sa.h"
#include "xe_sriov_printk.h"
+#include "xe_sriov_vf.h"
#include "xe_sriov_vf_ccs.h"
#include "xe_sriov_vf_ccs_types.h"
@@ -135,7 +137,7 @@ static u64 get_ccs_bb_pool_size(struct xe_device *xe)
return round_up(bb_pool_size * 2, SZ_1M);
}
-static int alloc_bb_pool(struct xe_tile *tile, struct xe_tile_vf_ccs *ctx)
+static int alloc_bb_pool(struct xe_tile *tile, struct xe_sriov_vf_ccs_ctx *ctx)
{
struct xe_device *xe = tile_to_xe(tile);
struct xe_sa_manager *sa_manager;
@@ -167,7 +169,7 @@ static int alloc_bb_pool(struct xe_tile *tile, struct xe_tile_vf_ccs *ctx)
return 0;
}
-static void ccs_rw_update_ring(struct xe_tile_vf_ccs *ctx)
+static void ccs_rw_update_ring(struct xe_sriov_vf_ccs_ctx *ctx)
{
u64 addr = xe_sa_manager_gpu_addr(ctx->mem.ccs_bb_pool);
struct xe_lrc *lrc = xe_exec_queue_lrc(ctx->mig_q);
@@ -184,9 +186,8 @@ static void ccs_rw_update_ring(struct xe_tile_vf_ccs *ctx)
xe_lrc_set_ring_tail(lrc, lrc->ring.tail);
}
-static int register_save_restore_context(struct xe_tile_vf_ccs *ctx)
+static int register_save_restore_context(struct xe_sriov_vf_ccs_ctx *ctx)
{
- int err = -EINVAL;
int ctx_type;
switch (ctx->ctx_id) {
@@ -197,10 +198,10 @@ static int register_save_restore_context(struct xe_tile_vf_ccs *ctx)
ctx_type = GUC_CONTEXT_COMPRESSION_RESTORE;
break;
default:
- return err;
+ return -EINVAL;
}
- xe_guc_register_exec_queue(ctx->mig_q, ctx_type);
+ xe_guc_register_vf_exec_queue(ctx->mig_q, ctx_type);
return 0;
}
@@ -215,16 +216,14 @@ static int register_save_restore_context(struct xe_tile_vf_ccs *ctx)
*/
int xe_sriov_vf_ccs_register_context(struct xe_device *xe)
{
- struct xe_tile *tile = xe_device_get_root_tile(xe);
enum xe_sriov_vf_ccs_rw_ctxs ctx_id;
- struct xe_tile_vf_ccs *ctx;
+ struct xe_sriov_vf_ccs_ctx *ctx;
int err;
- if (!IS_VF_CCS_READY(xe))
- return 0;
+ xe_assert(xe, IS_VF_CCS_READY(xe));
for_each_ccs_rw_ctx(ctx_id) {
- ctx = &tile->sriov.vf.ccs[ctx_id];
+ ctx = &xe->sriov.vf.ccs.contexts[ctx_id];
err = register_save_restore_context(ctx);
if (err)
return err;
@@ -235,7 +234,7 @@ int xe_sriov_vf_ccs_register_context(struct xe_device *xe)
static void xe_sriov_vf_ccs_fini(void *arg)
{
- struct xe_tile_vf_ccs *ctx = arg;
+ struct xe_sriov_vf_ccs_ctx *ctx = arg;
struct xe_lrc *lrc = xe_exec_queue_lrc(ctx->mig_q);
/*
@@ -259,17 +258,19 @@ int xe_sriov_vf_ccs_init(struct xe_device *xe)
{
struct xe_tile *tile = xe_device_get_root_tile(xe);
enum xe_sriov_vf_ccs_rw_ctxs ctx_id;
- struct xe_tile_vf_ccs *ctx;
+ struct xe_sriov_vf_ccs_ctx *ctx;
struct xe_exec_queue *q;
u32 flags;
int err;
xe_assert(xe, IS_SRIOV_VF(xe));
- xe_assert(xe, !IS_DGFX(xe));
- xe_assert(xe, xe_device_has_flat_ccs(xe));
+ xe_assert(xe, xe_sriov_vf_migration_supported(xe));
+
+ if (IS_DGFX(xe) || !xe_device_has_flat_ccs(xe))
+ return 0;
for_each_ccs_rw_ctx(ctx_id) {
- ctx = &tile->sriov.vf.ccs[ctx_id];
+ ctx = &xe->sriov.vf.ccs.contexts[ctx_id];
ctx->ctx_id = ctx_id;
flags = EXEC_QUEUE_FLAG_KERNEL |
@@ -324,13 +325,12 @@ int xe_sriov_vf_ccs_attach_bo(struct xe_bo *bo)
{
struct xe_device *xe = xe_bo_device(bo);
enum xe_sriov_vf_ccs_rw_ctxs ctx_id;
- struct xe_tile_vf_ccs *ctx;
+ struct xe_sriov_vf_ccs_ctx *ctx;
struct xe_tile *tile;
struct xe_bb *bb;
int err = 0;
- if (!IS_VF_CCS_READY(xe))
- return 0;
+ xe_assert(xe, IS_VF_CCS_READY(xe));
tile = xe_device_get_root_tile(xe);
@@ -339,7 +339,7 @@ int xe_sriov_vf_ccs_attach_bo(struct xe_bo *bo)
/* bb should be NULL here. Assert if not NULL */
xe_assert(xe, !bb);
- ctx = &tile->sriov.vf.ccs[ctx_id];
+ ctx = &xe->sriov.vf.ccs.contexts[ctx_id];
err = xe_migrate_ccs_rw_copy(tile, ctx->mig_q, bo, ctx_id);
}
return err;
@@ -361,7 +361,9 @@ int xe_sriov_vf_ccs_detach_bo(struct xe_bo *bo)
enum xe_sriov_vf_ccs_rw_ctxs ctx_id;
struct xe_bb *bb;
- if (!IS_VF_CCS_READY(xe))
+ xe_assert(xe, IS_VF_CCS_READY(xe));
+
+ if (!xe_bo_has_valid_ccs_bb(bo))
return 0;
for_each_ccs_rw_ctx(ctx_id) {
@@ -375,3 +377,34 @@ int xe_sriov_vf_ccs_detach_bo(struct xe_bo *bo)
}
return 0;
}
+
+/**
+ * xe_sriov_vf_ccs_print - Print VF CCS details.
+ * @xe: the &xe_device
+ * @p: the &drm_printer
+ *
+ * This function is for VF use only.
+ */
+void xe_sriov_vf_ccs_print(struct xe_device *xe, struct drm_printer *p)
+{
+ struct xe_sa_manager *bb_pool;
+ enum xe_sriov_vf_ccs_rw_ctxs ctx_id;
+
+ if (!IS_VF_CCS_READY(xe))
+ return;
+
+ xe_pm_runtime_get(xe);
+
+ for_each_ccs_rw_ctx(ctx_id) {
+ bb_pool = xe->sriov.vf.ccs.contexts[ctx_id].mem.ccs_bb_pool;
+ if (!bb_pool)
+ break;
+
+ drm_printf(p, "ccs %s bb suballoc info\n", ctx_id ? "write" : "read");
+ drm_printf(p, "-------------------------\n");
+ drm_suballoc_dump_debug_info(&bb_pool->base, p, xe_sa_manager_gpu_addr(bb_pool));
+ drm_puts(p, "\n");
+ }
+
+ xe_pm_runtime_put(xe);
+}
diff --git a/drivers/gpu/drm/xe/xe_sriov_vf_ccs.h b/drivers/gpu/drm/xe/xe_sriov_vf_ccs.h
index 1f1baf685fec..0745c0ff0228 100644
--- a/drivers/gpu/drm/xe/xe_sriov_vf_ccs.h
+++ b/drivers/gpu/drm/xe/xe_sriov_vf_ccs.h
@@ -6,6 +6,11 @@
#ifndef _XE_SRIOV_VF_CCS_H_
#define _XE_SRIOV_VF_CCS_H_
+#include "xe_device_types.h"
+#include "xe_sriov.h"
+#include "xe_sriov_vf_ccs_types.h"
+
+struct drm_printer;
struct xe_device;
struct xe_bo;
@@ -13,5 +18,17 @@ int xe_sriov_vf_ccs_init(struct xe_device *xe);
int xe_sriov_vf_ccs_attach_bo(struct xe_bo *bo);
int xe_sriov_vf_ccs_detach_bo(struct xe_bo *bo);
int xe_sriov_vf_ccs_register_context(struct xe_device *xe);
+void xe_sriov_vf_ccs_print(struct xe_device *xe, struct drm_printer *p);
+
+static inline bool xe_sriov_vf_ccs_ready(struct xe_device *xe)
+{
+ xe_assert(xe, IS_SRIOV_VF(xe));
+ return xe->sriov.vf.ccs.initialized;
+}
+
+#define IS_VF_CCS_READY(xe) ({ \
+ struct xe_device *xe__ = (xe); \
+ IS_SRIOV_VF(xe__) && xe_sriov_vf_ccs_ready(xe__); \
+ })
#endif
diff --git a/drivers/gpu/drm/xe/xe_sriov_vf_ccs_types.h b/drivers/gpu/drm/xe/xe_sriov_vf_ccs_types.h
index 93435a6f4cb6..22c499943d2a 100644
--- a/drivers/gpu/drm/xe/xe_sriov_vf_ccs_types.h
+++ b/drivers/gpu/drm/xe/xe_sriov_vf_ccs_types.h
@@ -6,48 +6,46 @@
#ifndef _XE_SRIOV_VF_CCS_TYPES_H_
#define _XE_SRIOV_VF_CCS_TYPES_H_
+#include <linux/types.h>
+
#define for_each_ccs_rw_ctx(id__) \
for ((id__) = 0; (id__) < XE_SRIOV_VF_CCS_CTX_COUNT; (id__)++)
-#define IS_VF_CCS_READY(xe) ({ \
- struct xe_device *___xe = (xe); \
- xe_assert(___xe, IS_SRIOV_VF(___xe)); \
- ___xe->sriov.vf.ccs.initialized; \
- })
-
-#define IS_VF_CCS_INIT_NEEDED(xe) ({\
- struct xe_device *___xe = (xe); \
- IS_SRIOV_VF(___xe) && !IS_DGFX(___xe) && \
- xe_device_has_flat_ccs(___xe) && GRAPHICS_VER(___xe) >= 20; \
- })
-
enum xe_sriov_vf_ccs_rw_ctxs {
XE_SRIOV_VF_CCS_READ_CTX,
XE_SRIOV_VF_CCS_WRITE_CTX,
XE_SRIOV_VF_CCS_CTX_COUNT
};
-#define IS_VF_CCS_BB_VALID(xe, bo) ({ \
- struct xe_device *___xe = (xe); \
- struct xe_bo *___bo = (bo); \
- IS_SRIOV_VF(___xe) && \
- ___bo->bb_ccs[XE_SRIOV_VF_CCS_READ_CTX] && \
- ___bo->bb_ccs[XE_SRIOV_VF_CCS_WRITE_CTX]; \
- })
-
struct xe_migrate;
struct xe_sa_manager;
-struct xe_tile_vf_ccs {
- /** @id: Id to which context it belongs to */
+/**
+ * struct xe_sriov_vf_ccs_ctx - VF CCS migration context data.
+ */
+struct xe_sriov_vf_ccs_ctx {
+ /** @ctx_id: Id to which context it belongs to */
enum xe_sriov_vf_ccs_rw_ctxs ctx_id;
+
/** @mig_q: exec queues used for migration */
struct xe_exec_queue *mig_q;
+ /** @mem: memory data */
struct {
- /** @ccs_bb_pool: Pool from which batch buffers are allocated. */
+ /** @mem.ccs_bb_pool: Pool from which batch buffers are allocated. */
struct xe_sa_manager *ccs_bb_pool;
} mem;
};
+/**
+ * struct xe_sriov_vf_ccs - The VF CCS migration support data.
+ */
+struct xe_sriov_vf_ccs {
+ /** @contexts: CCS read and write contexts for VF. */
+ struct xe_sriov_vf_ccs_ctx contexts[XE_SRIOV_VF_CCS_CTX_COUNT];
+
+ /** @initialized: Initialization of VF CCS is completed or not. */
+ bool initialized;
+};
+
#endif
diff --git a/drivers/gpu/drm/xe/xe_sriov_vf_types.h b/drivers/gpu/drm/xe/xe_sriov_vf_types.h
index 24a873c50c49..426cc5841958 100644
--- a/drivers/gpu/drm/xe/xe_sriov_vf_types.h
+++ b/drivers/gpu/drm/xe/xe_sriov_vf_types.h
@@ -9,6 +9,8 @@
#include <linux/types.h>
#include <linux/workqueue_types.h>
+#include "xe_sriov_vf_ccs_types.h"
+
/**
* struct xe_sriov_vf_relay_version - PF ABI version details.
*/
@@ -35,13 +37,15 @@ struct xe_device_vf {
struct work_struct worker;
/** @migration.gt_flags: Per-GT request flags for VF migration recovery */
unsigned long gt_flags;
+ /**
+ * @migration.enabled: flag indicating if migration support
+ * was enabled or not due to missing prerequisites
+ */
+ bool enabled;
} migration;
/** @ccs: VF CCS state data */
- struct {
- /** @ccs.initialized: Initilalization of VF CCS is completed or not */
- bool initialized;
- } ccs;
+ struct xe_sriov_vf_ccs ccs;
};
#endif
diff --git a/drivers/gpu/drm/xe/xe_survivability_mode.c b/drivers/gpu/drm/xe/xe_survivability_mode.c
index 7999cc5262a5..1662bfddd4bc 100644
--- a/drivers/gpu/drm/xe/xe_survivability_mode.c
+++ b/drivers/gpu/drm/xe/xe_survivability_mode.c
@@ -289,19 +289,10 @@ bool xe_survivability_mode_is_requested(struct xe_device *xe)
u32 data;
bool survivability_mode;
- if (!IS_DGFX(xe) || IS_SRIOV_VF(xe))
+ if (!IS_DGFX(xe) || IS_SRIOV_VF(xe) || xe->info.platform < XE_BATTLEMAGE)
return false;
survivability_mode = xe_configfs_get_survivability_mode(pdev);
-
- if (xe->info.platform < XE_BATTLEMAGE) {
- if (survivability_mode) {
- dev_err(&pdev->dev, "Survivability Mode is not supported on this card\n");
- xe_configfs_clear_survivability_mode(pdev);
- }
- return false;
- }
-
/* Enable survivability mode if set via configfs */
if (survivability_mode)
return true;
diff --git a/drivers/gpu/drm/xe/xe_svm.c b/drivers/gpu/drm/xe/xe_svm.c
index 76c6d74c1208..7e2db71ff34e 100644
--- a/drivers/gpu/drm/xe/xe_svm.c
+++ b/drivers/gpu/drm/xe/xe_svm.c
@@ -6,6 +6,7 @@
#include <drm/drm_drv.h>
#include "xe_bo.h"
+#include "xe_exec_queue_types.h"
#include "xe_gt_stats.h"
#include "xe_migrate.h"
#include "xe_module.h"
@@ -25,9 +26,9 @@ static bool xe_svm_range_in_vram(struct xe_svm_range *range)
* memory.
*/
- struct drm_gpusvm_range_flags flags = {
+ struct drm_gpusvm_pages_flags flags = {
/* Pairs with WRITE_ONCE in drm_gpusvm.c */
- .__flags = READ_ONCE(range->base.flags.__flags),
+ .__flags = READ_ONCE(range->base.pages.flags.__flags),
};
return flags.has_devmem_pages;
@@ -49,15 +50,15 @@ static struct xe_vm *range_to_vm(struct drm_gpusvm_range *r)
return gpusvm_to_vm(r->gpusvm);
}
-#define range_debug(r__, operaton__) \
+#define range_debug(r__, operation__) \
vm_dbg(&range_to_vm(&(r__)->base)->xe->drm, \
"%s: asid=%u, gpusvm=%p, vram=%d,%d, seqno=%lu, " \
"start=0x%014lx, end=0x%014lx, size=%lu", \
- (operaton__), range_to_vm(&(r__)->base)->usm.asid, \
+ (operation__), range_to_vm(&(r__)->base)->usm.asid, \
(r__)->base.gpusvm, \
xe_svm_range_in_vram((r__)) ? 1 : 0, \
xe_svm_range_has_vram_binding((r__)) ? 1 : 0, \
- (r__)->base.notifier_seq, \
+ (r__)->base.pages.notifier_seq, \
xe_svm_range_start((r__)), xe_svm_range_end((r__)), \
xe_svm_range_size((r__)))
@@ -66,11 +67,6 @@ void xe_svm_range_debug(struct xe_svm_range *range, const char *operation)
range_debug(range, operation);
}
-static void *xe_svm_devm_owner(struct xe_device *xe)
-{
- return xe;
-}
-
static struct drm_gpusvm_range *
xe_svm_range_alloc(struct drm_gpusvm *gpusvm)
{
@@ -112,6 +108,11 @@ xe_svm_garbage_collector_add_range(struct xe_vm *vm, struct xe_svm_range *range,
&vm->svm.garbage_collector.work);
}
+static void xe_svm_tlb_inval_count_stats_incr(struct xe_gt *gt)
+{
+ xe_gt_stats_incr(gt, XE_GT_STATS_ID_SVM_TLB_INVAL_COUNT, 1);
+}
+
static u8
xe_svm_range_notifier_event_begin(struct xe_vm *vm, struct drm_gpusvm_range *r,
const struct mmu_notifier_range *mmu_range,
@@ -128,7 +129,7 @@ xe_svm_range_notifier_event_begin(struct xe_vm *vm, struct drm_gpusvm_range *r,
range_debug(range, "NOTIFIER");
/* Skip if already unmapped or if no binding exist */
- if (range->base.flags.unmapped || !range->tile_present)
+ if (range->base.pages.flags.unmapped || !range->tile_present)
return 0;
range_debug(range, "NOTIFIER - EXECUTE");
@@ -144,13 +145,19 @@ xe_svm_range_notifier_event_begin(struct xe_vm *vm, struct drm_gpusvm_range *r,
*/
for_each_tile(tile, xe, id)
if (xe_pt_zap_ptes_range(tile, vm, range)) {
- tile_mask |= BIT(id);
/*
* WRITE_ONCE pairs with READ_ONCE in
* xe_vm_has_valid_gpu_mapping()
*/
WRITE_ONCE(range->tile_invalidated,
range->tile_invalidated | BIT(id));
+
+ if (!(tile_mask & BIT(id))) {
+ xe_svm_tlb_inval_count_stats_incr(tile->primary_gt);
+ if (tile->media_gt)
+ xe_svm_tlb_inval_count_stats_incr(tile->media_gt);
+ tile_mask |= BIT(id);
+ }
}
return tile_mask;
@@ -170,6 +177,24 @@ xe_svm_range_notifier_event_end(struct xe_vm *vm, struct drm_gpusvm_range *r,
mmu_range);
}
+static s64 xe_svm_stats_ktime_us_delta(ktime_t start)
+{
+ return IS_ENABLED(CONFIG_DEBUG_FS) ?
+ ktime_us_delta(ktime_get(), start) : 0;
+}
+
+static void xe_svm_tlb_inval_us_stats_incr(struct xe_gt *gt, ktime_t start)
+{
+ s64 us_delta = xe_svm_stats_ktime_us_delta(start);
+
+ xe_gt_stats_incr(gt, XE_GT_STATS_ID_SVM_TLB_INVAL_US, us_delta);
+}
+
+static ktime_t xe_svm_stats_ktime_get(void)
+{
+ return IS_ENABLED(CONFIG_DEBUG_FS) ? ktime_get() : 0;
+}
+
static void xe_svm_invalidate(struct drm_gpusvm *gpusvm,
struct drm_gpusvm_notifier *notifier,
const struct mmu_notifier_range *mmu_range)
@@ -177,8 +202,10 @@ static void xe_svm_invalidate(struct drm_gpusvm *gpusvm,
struct xe_vm *vm = gpusvm_to_vm(gpusvm);
struct xe_device *xe = vm->xe;
struct drm_gpusvm_range *r, *first;
+ struct xe_tile *tile;
+ ktime_t start = xe_svm_stats_ktime_get();
u64 adj_start = mmu_range->start, adj_end = mmu_range->end;
- u8 tile_mask = 0;
+ u8 tile_mask = 0, id;
long err;
xe_svm_assert_in_notifier(vm);
@@ -231,6 +258,13 @@ range_notifier_event_end:
r = first;
drm_gpusvm_for_each_range(r, notifier, adj_start, adj_end)
xe_svm_range_notifier_event_end(vm, r, mmu_range);
+ for_each_tile(tile, xe, id) {
+ if (tile_mask & BIT(id)) {
+ xe_svm_tlb_inval_us_stats_incr(tile->primary_gt, start);
+ if (tile->media_gt)
+ xe_svm_tlb_inval_us_stats_incr(tile->media_gt, start);
+ }
+ }
}
static int __xe_svm_garbage_collector(struct xe_vm *vm,
@@ -308,8 +342,8 @@ static int xe_svm_garbage_collector(struct xe_vm *vm)
if (xe_vm_is_closed_or_banned(vm))
return -ENOENT;
- spin_lock(&vm->svm.garbage_collector.lock);
for (;;) {
+ spin_lock(&vm->svm.garbage_collector.lock);
range = list_first_entry_or_null(&vm->svm.garbage_collector.range_list,
typeof(*range),
garbage_collector_link);
@@ -338,8 +372,6 @@ static int xe_svm_garbage_collector(struct xe_vm *vm)
else
return err;
}
-
- spin_lock(&vm->svm.garbage_collector.lock);
}
spin_unlock(&vm->svm.garbage_collector.lock);
@@ -384,11 +416,66 @@ enum xe_svm_copy_dir {
XE_SVM_COPY_TO_SRAM,
};
+static void xe_svm_copy_kb_stats_incr(struct xe_gt *gt,
+ const enum xe_svm_copy_dir dir,
+ int kb)
+{
+ if (dir == XE_SVM_COPY_TO_VRAM)
+ xe_gt_stats_incr(gt, XE_GT_STATS_ID_SVM_DEVICE_COPY_KB, kb);
+ else
+ xe_gt_stats_incr(gt, XE_GT_STATS_ID_SVM_CPU_COPY_KB, kb);
+}
+
+static void xe_svm_copy_us_stats_incr(struct xe_gt *gt,
+ const enum xe_svm_copy_dir dir,
+ unsigned long npages,
+ ktime_t start)
+{
+ s64 us_delta = xe_svm_stats_ktime_us_delta(start);
+
+ if (dir == XE_SVM_COPY_TO_VRAM) {
+ switch (npages) {
+ case 1:
+ xe_gt_stats_incr(gt, XE_GT_STATS_ID_SVM_4K_DEVICE_COPY_US,
+ us_delta);
+ break;
+ case 16:
+ xe_gt_stats_incr(gt, XE_GT_STATS_ID_SVM_64K_DEVICE_COPY_US,
+ us_delta);
+ break;
+ case 512:
+ xe_gt_stats_incr(gt, XE_GT_STATS_ID_SVM_2M_DEVICE_COPY_US,
+ us_delta);
+ break;
+ }
+ xe_gt_stats_incr(gt, XE_GT_STATS_ID_SVM_DEVICE_COPY_US,
+ us_delta);
+ } else {
+ switch (npages) {
+ case 1:
+ xe_gt_stats_incr(gt, XE_GT_STATS_ID_SVM_4K_CPU_COPY_US,
+ us_delta);
+ break;
+ case 16:
+ xe_gt_stats_incr(gt, XE_GT_STATS_ID_SVM_64K_CPU_COPY_US,
+ us_delta);
+ break;
+ case 512:
+ xe_gt_stats_incr(gt, XE_GT_STATS_ID_SVM_2M_CPU_COPY_US,
+ us_delta);
+ break;
+ }
+ xe_gt_stats_incr(gt, XE_GT_STATS_ID_SVM_CPU_COPY_US,
+ us_delta);
+ }
+}
+
static int xe_svm_copy(struct page **pages,
struct drm_pagemap_addr *pagemap_addr,
unsigned long npages, const enum xe_svm_copy_dir dir)
{
struct xe_vram_region *vr = NULL;
+ struct xe_gt *gt = NULL;
struct xe_device *xe;
struct dma_fence *fence = NULL;
unsigned long i;
@@ -396,6 +483,7 @@ static int xe_svm_copy(struct page **pages,
u64 vram_addr = XE_VRAM_ADDR_INVALID;
int err = 0, pos = 0;
bool sram = dir == XE_SVM_COPY_TO_SRAM;
+ ktime_t start = xe_svm_stats_ktime_get();
/*
* This flow is complex: it locates physically contiguous device pages,
@@ -422,6 +510,7 @@ static int xe_svm_copy(struct page **pages,
if (!vr && spage) {
vr = page_to_vr(spage);
+ gt = xe_migrate_exec_queue(vr->migrate)->gt;
xe = vr->xe;
}
XE_WARN_ON(spage && page_to_vr(spage) != vr);
@@ -461,6 +550,9 @@ static int xe_svm_copy(struct page **pages,
int incr = (match && last) ? 1 : 0;
if (vram_addr != XE_VRAM_ADDR_INVALID) {
+ xe_svm_copy_kb_stats_incr(gt, dir,
+ (i - pos + incr) *
+ (PAGE_SIZE / SZ_1K));
if (sram) {
vm_dbg(&xe->drm,
"COPY TO SRAM - 0x%016llx -> 0x%016llx, NPAGES=%ld",
@@ -499,6 +591,8 @@ static int xe_svm_copy(struct page **pages,
/* Extra mismatched device page, copy it */
if (!match && last && vram_addr != XE_VRAM_ADDR_INVALID) {
+ xe_svm_copy_kb_stats_incr(gt, dir,
+ (PAGE_SIZE / SZ_1K));
if (sram) {
vm_dbg(&xe->drm,
"COPY TO SRAM - 0x%016llx -> 0x%016llx, NPAGES=%d",
@@ -532,6 +626,14 @@ err_out:
dma_fence_put(fence);
}
+ /*
+ * XXX: We can't derive the GT here (or anywhere in this functions, but
+ * compute always uses the primary GT so accumlate stats on the likely
+ * GT of the fault.
+ */
+ if (gt)
+ xe_svm_copy_us_stats_incr(gt, dir, npages, start);
+
return err;
#undef XE_MIGRATE_CHUNK_SIZE
#undef XE_VRAM_ADDR_INVALID
@@ -630,22 +732,25 @@ int xe_svm_init(struct xe_vm *vm)
{
int err;
- spin_lock_init(&vm->svm.garbage_collector.lock);
- INIT_LIST_HEAD(&vm->svm.garbage_collector.range_list);
- INIT_WORK(&vm->svm.garbage_collector.work,
- xe_svm_garbage_collector_work_func);
-
- err = drm_gpusvm_init(&vm->svm.gpusvm, "Xe SVM", &vm->xe->drm,
- current->mm, xe_svm_devm_owner(vm->xe), 0,
- vm->size, xe_modparam.svm_notifier_size * SZ_1M,
- &gpusvm_ops, fault_chunk_sizes,
- ARRAY_SIZE(fault_chunk_sizes));
- if (err)
- return err;
-
- drm_gpusvm_driver_set_lock(&vm->svm.gpusvm, &vm->lock);
+ if (vm->flags & XE_VM_FLAG_FAULT_MODE) {
+ spin_lock_init(&vm->svm.garbage_collector.lock);
+ INIT_LIST_HEAD(&vm->svm.garbage_collector.range_list);
+ INIT_WORK(&vm->svm.garbage_collector.work,
+ xe_svm_garbage_collector_work_func);
+
+ err = drm_gpusvm_init(&vm->svm.gpusvm, "Xe SVM", &vm->xe->drm,
+ current->mm, 0, vm->size,
+ xe_modparam.svm_notifier_size * SZ_1M,
+ &gpusvm_ops, fault_chunk_sizes,
+ ARRAY_SIZE(fault_chunk_sizes));
+ drm_gpusvm_driver_set_lock(&vm->svm.gpusvm, &vm->lock);
+ } else {
+ err = drm_gpusvm_init(&vm->svm.gpusvm, "Xe SVM (simple)",
+ &vm->xe->drm, NULL, 0, 0, 0, NULL,
+ NULL, 0);
+ }
- return 0;
+ return err;
}
/**
@@ -716,7 +821,7 @@ bool xe_svm_range_validate(struct xe_vm *vm,
xe_svm_notifier_lock(vm);
ret = (range->tile_present & ~range->tile_invalidated & tile_mask) == tile_mask &&
- (devmem_preferred == range->base.flags.has_devmem_pages);
+ (devmem_preferred == range->base.pages.flags.has_devmem_pages);
xe_svm_notifier_unlock(vm);
@@ -755,49 +860,48 @@ static int xe_drm_pagemap_populate_mm(struct drm_pagemap *dpagemap,
struct xe_device *xe = vr->xe;
struct device *dev = xe->drm.dev;
struct drm_buddy_block *block;
+ struct xe_validation_ctx vctx;
struct list_head *blocks;
+ struct drm_exec exec;
struct xe_bo *bo;
- ktime_t time_end = 0;
- int err, idx;
+ int err = 0, idx;
if (!drm_dev_enter(&xe->drm, &idx))
return -ENODEV;
xe_pm_runtime_get(xe);
- retry:
- bo = xe_bo_create_locked(vr->xe, NULL, NULL, end - start,
- ttm_bo_type_device,
- (IS_DGFX(xe) ? XE_BO_FLAG_VRAM(vr) : XE_BO_FLAG_SYSTEM) |
- XE_BO_FLAG_CPU_ADDR_MIRROR);
- if (IS_ERR(bo)) {
- err = PTR_ERR(bo);
- if (xe_vm_validate_should_retry(NULL, err, &time_end))
- goto retry;
- goto out_pm_put;
- }
-
- drm_pagemap_devmem_init(&bo->devmem_allocation, dev, mm,
- &dpagemap_devmem_ops, dpagemap, end - start);
+ xe_validation_guard(&vctx, &xe->val, &exec, (struct xe_val_flags) {}, err) {
+ bo = xe_bo_create_locked(xe, NULL, NULL, end - start,
+ ttm_bo_type_device,
+ (IS_DGFX(xe) ? XE_BO_FLAG_VRAM(vr) : XE_BO_FLAG_SYSTEM) |
+ XE_BO_FLAG_CPU_ADDR_MIRROR, &exec);
+ drm_exec_retry_on_contention(&exec);
+ if (IS_ERR(bo)) {
+ err = PTR_ERR(bo);
+ xe_validation_retry_on_oom(&vctx, &err);
+ break;
+ }
- blocks = &to_xe_ttm_vram_mgr_resource(bo->ttm.resource)->blocks;
- list_for_each_entry(block, blocks, link)
- block->private = vr;
+ drm_pagemap_devmem_init(&bo->devmem_allocation, dev, mm,
+ &dpagemap_devmem_ops, dpagemap, end - start);
- xe_bo_get(bo);
+ blocks = &to_xe_ttm_vram_mgr_resource(bo->ttm.resource)->blocks;
+ list_for_each_entry(block, blocks, link)
+ block->private = vr;
- /* Ensure the device has a pm ref while there are device pages active. */
- xe_pm_runtime_get_noresume(xe);
- err = drm_pagemap_migrate_to_devmem(&bo->devmem_allocation, mm,
- start, end, timeslice_ms,
- xe_svm_devm_owner(xe));
- if (err)
- xe_svm_devmem_release(&bo->devmem_allocation);
+ xe_bo_get(bo);
- xe_bo_unlock(bo);
- xe_bo_put(bo);
-
-out_pm_put:
+ /* Ensure the device has a pm ref while there are device pages active. */
+ xe_pm_runtime_get_noresume(xe);
+ err = drm_pagemap_migrate_to_devmem(&bo->devmem_allocation, mm,
+ start, end, timeslice_ms,
+ xe_svm_devm_owner(xe));
+ if (err)
+ xe_svm_devmem_release(&bo->devmem_allocation);
+ xe_bo_unlock(bo);
+ xe_bo_put(bo);
+ }
xe_pm_runtime_put(xe);
drm_dev_exit(idx);
@@ -827,17 +931,17 @@ bool xe_svm_range_needs_migrate_to_vram(struct xe_svm_range *range, struct xe_vm
struct xe_vm *vm = range_to_vm(&range->base);
u64 range_size = xe_svm_range_size(range);
- if (!range->base.flags.migrate_devmem || !preferred_region_is_vram)
+ if (!range->base.pages.flags.migrate_devmem || !preferred_region_is_vram)
return false;
xe_assert(vm->xe, IS_DGFX(vm->xe));
- if (preferred_region_is_vram && xe_svm_range_in_vram(range)) {
+ if (xe_svm_range_in_vram(range)) {
drm_info(&vm->xe->drm, "Range is already in VRAM\n");
return false;
}
- if (preferred_region_is_vram && range_size < SZ_64K && !supports_4K_migration(vm->xe)) {
+ if (range_size < SZ_64K && !supports_4K_migration(vm->xe)) {
drm_dbg(&vm->xe->drm, "Platform doesn't support SZ_4K range migration\n");
return false;
}
@@ -845,27 +949,78 @@ bool xe_svm_range_needs_migrate_to_vram(struct xe_svm_range *range, struct xe_vm
return true;
}
+#define DECL_SVM_RANGE_COUNT_STATS(elem, stat) \
+static void xe_svm_range_##elem##_count_stats_incr(struct xe_gt *gt, \
+ struct xe_svm_range *range) \
+{ \
+ switch (xe_svm_range_size(range)) { \
+ case SZ_4K: \
+ xe_gt_stats_incr(gt, XE_GT_STATS_ID_SVM_4K_##stat##_COUNT, 1); \
+ break; \
+ case SZ_64K: \
+ xe_gt_stats_incr(gt, XE_GT_STATS_ID_SVM_64K_##stat##_COUNT, 1); \
+ break; \
+ case SZ_2M: \
+ xe_gt_stats_incr(gt, XE_GT_STATS_ID_SVM_2M_##stat##_COUNT, 1); \
+ break; \
+ } \
+} \
+
+DECL_SVM_RANGE_COUNT_STATS(fault, PAGEFAULT)
+DECL_SVM_RANGE_COUNT_STATS(valid_fault, VALID_PAGEFAULT)
+DECL_SVM_RANGE_COUNT_STATS(migrate, MIGRATE)
+
+#define DECL_SVM_RANGE_US_STATS(elem, stat) \
+static void xe_svm_range_##elem##_us_stats_incr(struct xe_gt *gt, \
+ struct xe_svm_range *range, \
+ ktime_t start) \
+{ \
+ s64 us_delta = xe_svm_stats_ktime_us_delta(start); \
+\
+ switch (xe_svm_range_size(range)) { \
+ case SZ_4K: \
+ xe_gt_stats_incr(gt, XE_GT_STATS_ID_SVM_4K_##stat##_US, \
+ us_delta); \
+ break; \
+ case SZ_64K: \
+ xe_gt_stats_incr(gt, XE_GT_STATS_ID_SVM_64K_##stat##_US, \
+ us_delta); \
+ break; \
+ case SZ_2M: \
+ xe_gt_stats_incr(gt, XE_GT_STATS_ID_SVM_2M_##stat##_US, \
+ us_delta); \
+ break; \
+ } \
+} \
+
+DECL_SVM_RANGE_US_STATS(migrate, MIGRATE)
+DECL_SVM_RANGE_US_STATS(get_pages, GET_PAGES)
+DECL_SVM_RANGE_US_STATS(bind, BIND)
+DECL_SVM_RANGE_US_STATS(fault, PAGEFAULT)
+
static int __xe_svm_handle_pagefault(struct xe_vm *vm, struct xe_vma *vma,
struct xe_gt *gt, u64 fault_addr,
bool need_vram)
{
+ int devmem_possible = IS_DGFX(vm->xe) &&
+ IS_ENABLED(CONFIG_DRM_XE_PAGEMAP);
struct drm_gpusvm_ctx ctx = {
.read_only = xe_vma_read_only(vma),
- .devmem_possible = IS_DGFX(vm->xe) &&
- IS_ENABLED(CONFIG_DRM_XE_PAGEMAP),
- .check_pages_threshold = IS_DGFX(vm->xe) &&
- IS_ENABLED(CONFIG_DRM_XE_PAGEMAP) ? SZ_64K : 0,
- .devmem_only = need_vram && IS_ENABLED(CONFIG_DRM_XE_PAGEMAP),
- .timeslice_ms = need_vram && IS_DGFX(vm->xe) &&
- IS_ENABLED(CONFIG_DRM_XE_PAGEMAP) ?
+ .devmem_possible = devmem_possible,
+ .check_pages_threshold = devmem_possible ? SZ_64K : 0,
+ .devmem_only = need_vram && devmem_possible,
+ .timeslice_ms = need_vram && devmem_possible ?
vm->xe->atomic_svm_timeslice_ms : 0,
+ .device_private_page_owner = xe_svm_devm_owner(vm->xe),
};
+ struct xe_validation_ctx vctx;
+ struct drm_exec exec;
struct xe_svm_range *range;
struct dma_fence *fence;
struct drm_pagemap *dpagemap;
struct xe_tile *tile = gt_to_tile(gt);
int migrate_try_count = ctx.devmem_only ? 3 : 1;
- ktime_t end = 0;
+ ktime_t start = xe_svm_stats_ktime_get(), bind_start, get_pages_start;
int err;
lockdep_assert_held_write(&vm->lock);
@@ -884,23 +1039,34 @@ retry:
if (IS_ERR(range))
return PTR_ERR(range);
- if (ctx.devmem_only && !range->base.flags.migrate_devmem)
- return -EACCES;
+ xe_svm_range_fault_count_stats_incr(gt, range);
- if (xe_svm_range_is_valid(range, tile, ctx.devmem_only))
- return 0;
+ if (ctx.devmem_only && !range->base.pages.flags.migrate_devmem) {
+ err = -EACCES;
+ goto out;
+ }
+
+ if (xe_svm_range_is_valid(range, tile, ctx.devmem_only)) {
+ xe_svm_range_valid_fault_count_stats_incr(gt, range);
+ range_debug(range, "PAGE FAULT - VALID");
+ goto out;
+ }
range_debug(range, "PAGE FAULT");
dpagemap = xe_vma_resolve_pagemap(vma, tile);
if (--migrate_try_count >= 0 &&
xe_svm_range_needs_migrate_to_vram(range, vma, !!dpagemap || ctx.devmem_only)) {
+ ktime_t migrate_start = xe_svm_stats_ktime_get();
+
/* TODO : For multi-device dpagemap will be used to find the
* remote tile and remote device. Will need to modify
* xe_svm_alloc_vram to use dpagemap for future multi-device
* support.
*/
+ xe_svm_range_migrate_count_stats_incr(gt, range);
err = xe_svm_alloc_vram(tile, range, &ctx);
+ xe_svm_range_migrate_us_stats_incr(gt, range, migrate_start);
ctx.timeslice_ms <<= 1; /* Double timeslice if we have to retry */
if (err) {
if (migrate_try_count || !ctx.devmem_only) {
@@ -917,6 +1083,8 @@ retry:
}
}
+ get_pages_start = xe_svm_stats_ktime_get();
+
range_debug(range, "GET PAGES");
err = xe_svm_range_get_pages(vm, range, &ctx);
/* Corner where CPU mappings have changed */
@@ -936,32 +1104,45 @@ retry:
}
if (err) {
range_debug(range, "PAGE FAULT - FAIL PAGE COLLECT");
- goto err_out;
+ goto out;
}
+ xe_svm_range_get_pages_us_stats_incr(gt, range, get_pages_start);
range_debug(range, "PAGE FAULT - BIND");
-retry_bind:
- xe_vm_lock(vm, false);
- fence = xe_vm_range_rebind(vm, vma, range, BIT(tile->id));
- if (IS_ERR(fence)) {
- xe_vm_unlock(vm);
- err = PTR_ERR(fence);
- if (err == -EAGAIN) {
- ctx.timeslice_ms <<= 1; /* Double timeslice if we have to retry */
- range_debug(range, "PAGE FAULT - RETRY BIND");
- goto retry;
+ bind_start = xe_svm_stats_ktime_get();
+ xe_validation_guard(&vctx, &vm->xe->val, &exec, (struct xe_val_flags) {}, err) {
+ err = xe_vm_drm_exec_lock(vm, &exec);
+ drm_exec_retry_on_contention(&exec);
+
+ xe_vm_set_validation_exec(vm, &exec);
+ fence = xe_vm_range_rebind(vm, vma, range, BIT(tile->id));
+ xe_vm_set_validation_exec(vm, NULL);
+ if (IS_ERR(fence)) {
+ drm_exec_retry_on_contention(&exec);
+ err = PTR_ERR(fence);
+ xe_validation_retry_on_oom(&vctx, &err);
+ xe_svm_range_bind_us_stats_incr(gt, range, bind_start);
+ break;
}
- if (xe_vm_validate_should_retry(NULL, err, &end))
- goto retry_bind;
- goto err_out;
}
- xe_vm_unlock(vm);
+ if (err)
+ goto err_out;
dma_fence_wait(fence, false);
dma_fence_put(fence);
+ xe_svm_range_bind_us_stats_incr(gt, range, bind_start);
+
+out:
+ xe_svm_range_fault_us_stats_incr(gt, range, start);
+ return 0;
err_out:
+ if (err == -EAGAIN) {
+ ctx.timeslice_ms <<= 1; /* Double timeslice if we have to retry */
+ range_debug(range, "PAGE FAULT - RETRY BIND");
+ goto retry;
+ }
return err;
}
@@ -1089,7 +1270,7 @@ struct xe_svm_range *xe_svm_range_find_or_insert(struct xe_vm *vm, u64 addr,
r = drm_gpusvm_range_find_or_insert(&vm->svm.gpusvm, max(addr, xe_vma_start(vma)),
xe_vma_start(vma), xe_vma_end(vma), ctx);
if (IS_ERR(r))
- return ERR_PTR(PTR_ERR(r));
+ return ERR_CAST(r);
return to_xe_range(r);
}
@@ -1221,7 +1402,7 @@ int xe_svm_alloc_vram(struct xe_tile *tile, struct xe_svm_range *range,
{
struct drm_pagemap *dpagemap;
- xe_assert(tile_to_xe(tile), range->base.flags.migrate_devmem);
+ xe_assert(tile_to_xe(tile), range->base.pages.flags.migrate_devmem);
range_debug(range, "ALLOCATE VRAM");
dpagemap = tile_local_pagemap(tile);
diff --git a/drivers/gpu/drm/xe/xe_svm.h b/drivers/gpu/drm/xe/xe_svm.h
index 9d6a8840a8b7..0955d2ac8d74 100644
--- a/drivers/gpu/drm/xe/xe_svm.h
+++ b/drivers/gpu/drm/xe/xe_svm.h
@@ -6,6 +6,20 @@
#ifndef _XE_SVM_H_
#define _XE_SVM_H_
+struct xe_device;
+
+/**
+ * xe_svm_devm_owner() - Return the owner of device private memory
+ * @xe: The xe device.
+ *
+ * Return: The owner of this device's device private memory to use in
+ * hmm_range_fault()-
+ */
+static inline void *xe_svm_devm_owner(struct xe_device *xe)
+{
+ return xe;
+}
+
#if IS_ENABLED(CONFIG_DRM_XE_GPUSVM)
#include <drm/drm_pagemap.h>
@@ -105,7 +119,7 @@ struct drm_pagemap *xe_vma_resolve_pagemap(struct xe_vma *vma, struct xe_tile *t
static inline bool xe_svm_range_has_dma_mapping(struct xe_svm_range *range)
{
lockdep_assert_held(&range->base.gpusvm->notifier_lock);
- return range->base.flags.has_dma_mapping;
+ return range->base.pages.flags.has_dma_mapping;
}
/**
@@ -155,19 +169,11 @@ static inline unsigned long xe_svm_range_size(struct xe_svm_range *range)
return drm_gpusvm_range_size(&range->base);
}
-#define xe_svm_assert_in_notifier(vm__) \
- lockdep_assert_held_write(&(vm__)->svm.gpusvm.notifier_lock)
-
-#define xe_svm_notifier_lock(vm__) \
- drm_gpusvm_notifier_lock(&(vm__)->svm.gpusvm)
-
-#define xe_svm_notifier_unlock(vm__) \
- drm_gpusvm_notifier_unlock(&(vm__)->svm.gpusvm)
-
void xe_svm_flush(struct xe_vm *vm);
#else
#include <linux/interval_tree.h>
+#include "xe_vm.h"
struct drm_pagemap_addr;
struct drm_gpusvm_ctx;
@@ -184,7 +190,9 @@ struct xe_vram_region;
struct xe_svm_range {
struct {
struct interval_tree_node itree;
- const struct drm_pagemap_addr *dma_addr;
+ struct {
+ const struct drm_pagemap_addr *dma_addr;
+ } pages;
} base;
u32 tile_present;
u32 tile_invalidated;
@@ -204,12 +212,21 @@ int xe_devm_add(struct xe_tile *tile, struct xe_vram_region *vr)
static inline
int xe_svm_init(struct xe_vm *vm)
{
+#if IS_ENABLED(CONFIG_DRM_GPUSVM)
+ return drm_gpusvm_init(&vm->svm.gpusvm, "Xe SVM (simple)", &vm->xe->drm,
+ NULL, NULL, 0, 0, 0, NULL, NULL, 0);
+#else
return 0;
+#endif
}
static inline
void xe_svm_fini(struct xe_vm *vm)
{
+#if IS_ENABLED(CONFIG_DRM_GPUSVM)
+ xe_assert(vm->xe, xe_vm_is_closed(vm));
+ drm_gpusvm_fini(&vm->svm.gpusvm);
+#endif
}
static inline
@@ -326,19 +343,47 @@ struct drm_pagemap *xe_vma_resolve_pagemap(struct xe_vma *vma, struct xe_tile *t
return NULL;
}
-#define xe_svm_assert_in_notifier(...) do {} while (0)
+static inline void xe_svm_flush(struct xe_vm *vm)
+{
+}
#define xe_svm_range_has_dma_mapping(...) false
+#endif /* CONFIG_DRM_XE_GPUSVM */
+
+#if IS_ENABLED(CONFIG_DRM_GPUSVM) /* Need to support userptr without XE_GPUSVM */
+#define xe_svm_assert_in_notifier(vm__) \
+ lockdep_assert_held_write(&(vm__)->svm.gpusvm.notifier_lock)
+
+#define xe_svm_assert_held_read(vm__) \
+ lockdep_assert_held_read(&(vm__)->svm.gpusvm.notifier_lock)
+
+#define xe_svm_notifier_lock(vm__) \
+ drm_gpusvm_notifier_lock(&(vm__)->svm.gpusvm)
+
+#define xe_svm_notifier_lock_interruptible(vm__) \
+ down_read_interruptible(&(vm__)->svm.gpusvm.notifier_lock)
+
+#define xe_svm_notifier_unlock(vm__) \
+ drm_gpusvm_notifier_unlock(&(vm__)->svm.gpusvm)
+
+#else
+#define xe_svm_assert_in_notifier(...) do {} while (0)
+
+static inline void xe_svm_assert_held_read(struct xe_vm *vm)
+{
+}
static inline void xe_svm_notifier_lock(struct xe_vm *vm)
{
}
-static inline void xe_svm_notifier_unlock(struct xe_vm *vm)
+static inline int xe_svm_notifier_lock_interruptible(struct xe_vm *vm)
{
+ return 0;
}
-static inline void xe_svm_flush(struct xe_vm *vm)
+static inline void xe_svm_notifier_unlock(struct xe_vm *vm)
{
}
-#endif
+#endif /* CONFIG_DRM_GPUSVM */
+
#endif
diff --git a/drivers/gpu/drm/xe/xe_tile_debugfs.c b/drivers/gpu/drm/xe/xe_tile_debugfs.c
new file mode 100644
index 000000000000..5523874cba7b
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_tile_debugfs.c
@@ -0,0 +1,135 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2025 Intel Corporation
+ */
+
+#include <linux/debugfs.h>
+#include <drm/drm_debugfs.h>
+
+#include "xe_pm.h"
+#include "xe_sa.h"
+#include "xe_tile_debugfs.h"
+
+static struct xe_tile *node_to_tile(struct drm_info_node *node)
+{
+ return node->dent->d_parent->d_inode->i_private;
+}
+
+/**
+ * tile_debugfs_simple_show - A show callback for struct drm_info_list
+ * @m: the &seq_file
+ * @data: data used by the drm debugfs helpers
+ *
+ * This callback can be used in struct drm_info_list to describe debugfs
+ * files that are &xe_tile specific.
+ *
+ * It is assumed that those debugfs files will be created on directory entry
+ * which struct dentry d_inode->i_private points to &xe_tile.
+ *
+ * /sys/kernel/debug/dri/0/
+ * ├── tile0/ # tile = dentry->d_inode->i_private
+ * │ │ ├── id # tile = dentry->d_parent->d_inode->i_private
+ *
+ * This function assumes that &m->private will be set to the &struct
+ * drm_info_node corresponding to the instance of the info on a given &struct
+ * drm_minor (see struct drm_info_list.show for details).
+ *
+ * This function also assumes that struct drm_info_list.data will point to the
+ * function code that will actually print a file content::
+ *
+ * int (*print)(struct xe_tile *, struct drm_printer *)
+ *
+ * Example::
+ *
+ * int tile_id(struct xe_tile *tile, struct drm_printer *p)
+ * {
+ * drm_printf(p, "%u\n", tile->id);
+ * return 0;
+ * }
+ *
+ * static const struct drm_info_list info[] = {
+ * { name = "id", .show = tile_debugfs_simple_show, .data = tile_id },
+ * };
+ *
+ * dir = debugfs_create_dir("tile0", parent);
+ * dir->d_inode->i_private = tile;
+ * drm_debugfs_create_files(info, ARRAY_SIZE(info), dir, minor);
+ *
+ * Return: 0 on success or a negative error code on failure.
+ */
+static int tile_debugfs_simple_show(struct seq_file *m, void *data)
+{
+ struct drm_printer p = drm_seq_file_printer(m);
+ struct drm_info_node *node = m->private;
+ struct xe_tile *tile = node_to_tile(node);
+ int (*print)(struct xe_tile *, struct drm_printer *) = node->info_ent->data;
+
+ return print(tile, &p);
+}
+
+/**
+ * tile_debugfs_show_with_rpm - A show callback for struct drm_info_list
+ * @m: the &seq_file
+ * @data: data used by the drm debugfs helpers
+ *
+ * Similar to tile_debugfs_simple_show() but implicitly takes a RPM ref.
+ *
+ * Return: 0 on success or a negative error code on failure.
+ */
+static int tile_debugfs_show_with_rpm(struct seq_file *m, void *data)
+{
+ struct drm_info_node *node = m->private;
+ struct xe_tile *tile = node_to_tile(node);
+ struct xe_device *xe = tile_to_xe(tile);
+ int ret;
+
+ xe_pm_runtime_get(xe);
+ ret = tile_debugfs_simple_show(m, data);
+ xe_pm_runtime_put(xe);
+
+ return ret;
+}
+
+static int sa_info(struct xe_tile *tile, struct drm_printer *p)
+{
+ drm_suballoc_dump_debug_info(&tile->mem.kernel_bb_pool->base, p,
+ xe_sa_manager_gpu_addr(tile->mem.kernel_bb_pool));
+
+ return 0;
+}
+
+/* only for debugfs files which can be safely used on the VF */
+static const struct drm_info_list vf_safe_debugfs_list[] = {
+ { "sa_info", .show = tile_debugfs_show_with_rpm, .data = sa_info },
+};
+
+/**
+ * xe_tile_debugfs_register - Register tile's debugfs attributes
+ * @tile: the &xe_tile to register
+ *
+ * Create debugfs sub-directory with a name that includes a tile ID and
+ * then creates set of debugfs files (attributes) specific to this tile.
+ */
+void xe_tile_debugfs_register(struct xe_tile *tile)
+{
+ struct xe_device *xe = tile_to_xe(tile);
+ struct drm_minor *minor = xe->drm.primary;
+ struct dentry *root = minor->debugfs_root;
+ char name[8];
+
+ snprintf(name, sizeof(name), "tile%u", tile->id);
+ tile->debugfs = debugfs_create_dir(name, root);
+ if (IS_ERR(tile->debugfs))
+ return;
+
+ /*
+ * Store the xe_tile pointer as private data of the tile/ directory
+ * node so other tile specific attributes under that directory may
+ * refer to it by looking at its parent node private data.
+ */
+ tile->debugfs->d_inode->i_private = tile;
+
+ drm_debugfs_create_files(vf_safe_debugfs_list,
+ ARRAY_SIZE(vf_safe_debugfs_list),
+ tile->debugfs, minor);
+}
diff --git a/drivers/gpu/drm/xe/xe_tile_debugfs.h b/drivers/gpu/drm/xe/xe_tile_debugfs.h
new file mode 100644
index 000000000000..0e5f724de37f
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_tile_debugfs.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2025 Intel Corporation
+ */
+
+#ifndef _XE_TILE_DEBUGFS_H_
+#define _XE_TILE_DEBUGFS_H_
+
+struct xe_tile;
+
+void xe_tile_debugfs_register(struct xe_tile *tile);
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_tile_printk.h b/drivers/gpu/drm/xe/xe_tile_printk.h
new file mode 100644
index 000000000000..63640a42685d
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_tile_printk.h
@@ -0,0 +1,127 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2025 Intel Corporation
+ */
+
+#ifndef _xe_tile_printk_H_
+#define _xe_tile_printk_H_
+
+#include "xe_printk.h"
+
+#define __XE_TILE_PRINTK_FMT(_tile, _fmt, _args...) "Tile%u: " _fmt, (_tile)->id, ##_args
+
+#define xe_tile_printk(_tile, _level, _fmt, ...) \
+ xe_printk((_tile)->xe, _level, __XE_TILE_PRINTK_FMT((_tile), _fmt, ##__VA_ARGS__))
+
+#define xe_tile_err(_tile, _fmt, ...) \
+ xe_tile_printk((_tile), err, _fmt, ##__VA_ARGS__)
+
+#define xe_tile_err_once(_tile, _fmt, ...) \
+ xe_tile_printk((_tile), err_once, _fmt, ##__VA_ARGS__)
+
+#define xe_tile_err_ratelimited(_tile, _fmt, ...) \
+ xe_tile_printk((_tile), err_ratelimited, _fmt, ##__VA_ARGS__)
+
+#define xe_tile_warn(_tile, _fmt, ...) \
+ xe_tile_printk((_tile), warn, _fmt, ##__VA_ARGS__)
+
+#define xe_tile_notice(_tile, _fmt, ...) \
+ xe_tile_printk((_tile), notice, _fmt, ##__VA_ARGS__)
+
+#define xe_tile_info(_tile, _fmt, ...) \
+ xe_tile_printk((_tile), info, _fmt, ##__VA_ARGS__)
+
+#define xe_tile_dbg(_tile, _fmt, ...) \
+ xe_tile_printk((_tile), dbg, _fmt, ##__VA_ARGS__)
+
+#define xe_tile_WARN_type(_tile, _type, _condition, _fmt, ...) \
+ xe_WARN##_type((_tile)->xe, _condition, _fmt, ## __VA_ARGS__)
+
+#define xe_tile_WARN(_tile, _condition, _fmt, ...) \
+ xe_tile_WARN_type((_tile),, _condition, __XE_TILE_PRINTK_FMT((_tile), _fmt, ##__VA_ARGS__))
+
+#define xe_tile_WARN_ONCE(_tile, _condition, _fmt, ...) \
+ xe_tile_WARN_type((_tile), _ONCE, _condition, __XE_TILE_PRINTK_FMT((_tile), _fmt, ##__VA_ARGS__))
+
+#define xe_tile_WARN_ON(_tile, _condition) \
+ xe_tile_WARN((_tile), _condition, "%s(%s)", "WARN_ON", __stringify(_condition))
+
+#define xe_tile_WARN_ON_ONCE(_tile, _condition) \
+ xe_tile_WARN_ONCE((_tile), _condition, "%s(%s)", "WARN_ON_ONCE", __stringify(_condition))
+
+static inline void __xe_tile_printfn_err(struct drm_printer *p, struct va_format *vaf)
+{
+ struct xe_tile *tile = p->arg;
+
+ xe_tile_err(tile, "%pV", vaf);
+}
+
+static inline void __xe_tile_printfn_info(struct drm_printer *p, struct va_format *vaf)
+{
+ struct xe_tile *tile = p->arg;
+
+ xe_tile_info(tile, "%pV", vaf);
+}
+
+static inline void __xe_tile_printfn_dbg(struct drm_printer *p, struct va_format *vaf)
+{
+ struct xe_tile *tile = p->arg;
+ struct drm_printer dbg;
+
+ /*
+ * The original xe_tile_dbg() callsite annotations are useless here,
+ * redirect to the tweaked xe_dbg_printer() instead.
+ */
+ dbg = xe_dbg_printer(tile->xe);
+ dbg.origin = p->origin;
+
+ drm_printf(&dbg, __XE_TILE_PRINTK_FMT(tile, "%pV", vaf));
+}
+
+/**
+ * xe_tile_err_printer - Construct a &drm_printer that outputs to xe_tile_err()
+ * @tile: the &xe_tile pointer to use in xe_tile_err()
+ *
+ * Return: The &drm_printer object.
+ */
+static inline struct drm_printer xe_tile_err_printer(struct xe_tile *tile)
+{
+ struct drm_printer p = {
+ .printfn = __xe_tile_printfn_err,
+ .arg = tile,
+ };
+ return p;
+}
+
+/**
+ * xe_tile_info_printer - Construct a &drm_printer that outputs to xe_tile_info()
+ * @tile: the &xe_tile pointer to use in xe_tile_info()
+ *
+ * Return: The &drm_printer object.
+ */
+static inline struct drm_printer xe_tile_info_printer(struct xe_tile *tile)
+{
+ struct drm_printer p = {
+ .printfn = __xe_tile_printfn_info,
+ .arg = tile,
+ };
+ return p;
+}
+
+/**
+ * xe_tile_dbg_printer - Construct a &drm_printer that outputs like xe_tile_dbg()
+ * @tile: the &xe_tile pointer to use in xe_tile_dbg()
+ *
+ * Return: The &drm_printer object.
+ */
+static inline struct drm_printer xe_tile_dbg_printer(struct xe_tile *tile)
+{
+ struct drm_printer p = {
+ .printfn = __xe_tile_printfn_dbg,
+ .arg = tile,
+ .origin = (const void *)_THIS_IP_,
+ };
+ return p;
+}
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_tile_sysfs.c b/drivers/gpu/drm/xe/xe_tile_sysfs.c
index b804234a6551..9e1236a9ec67 100644
--- a/drivers/gpu/drm/xe/xe_tile_sysfs.c
+++ b/drivers/gpu/drm/xe/xe_tile_sysfs.c
@@ -44,16 +44,18 @@ int xe_tile_sysfs_init(struct xe_tile *tile)
kt->tile = tile;
err = kobject_add(&kt->base, &dev->kobj, "tile%d", tile->id);
- if (err) {
- kobject_put(&kt->base);
- return err;
- }
+ if (err)
+ goto err_object;
tile->sysfs = &kt->base;
err = xe_vram_freq_sysfs_init(tile);
if (err)
- return err;
+ goto err_object;
return devm_add_action_or_reset(xe->drm.dev, tile_sysfs_fini, tile);
+
+err_object:
+ kobject_put(&kt->base);
+ return err;
}
diff --git a/drivers/gpu/drm/xe/xe_tlb_inval.c b/drivers/gpu/drm/xe/xe_tlb_inval.c
index e6e97b5a7b5c..918a59e686ea 100644
--- a/drivers/gpu/drm/xe/xe_tlb_inval.c
+++ b/drivers/gpu/drm/xe/xe_tlb_inval.c
@@ -10,11 +10,10 @@
#include "xe_force_wake.h"
#include "xe_gt.h"
#include "xe_gt_printk.h"
+#include "xe_gt_stats.h"
#include "xe_guc.h"
#include "xe_guc_ct.h"
#include "xe_guc_tlb_inval.h"
-#include "xe_gt_stats.h"
-#include "xe_tlb_inval.h"
#include "xe_mmio.h"
#include "xe_pm.h"
#include "xe_tlb_inval.h"
diff --git a/drivers/gpu/drm/xe/xe_uc_fw.c b/drivers/gpu/drm/xe/xe_uc_fw.c
index 9bbdde604923..622b76078567 100644
--- a/drivers/gpu/drm/xe/xe_uc_fw.c
+++ b/drivers/gpu/drm/xe/xe_uc_fw.c
@@ -115,8 +115,8 @@ struct fw_blobs_by_type {
#define XE_GT_TYPE_ANY XE_GT_TYPE_UNINITIALIZED
#define XE_GUC_FIRMWARE_DEFS(fw_def, mmp_ver, major_ver) \
- fw_def(PANTHERLAKE, GT_TYPE_ANY, major_ver(xe, guc, ptl, 70, 47, 0)) \
- fw_def(BATTLEMAGE, GT_TYPE_ANY, major_ver(xe, guc, bmg, 70, 45, 2)) \
+ fw_def(PANTHERLAKE, GT_TYPE_ANY, major_ver(xe, guc, ptl, 70, 49, 4)) \
+ fw_def(BATTLEMAGE, GT_TYPE_ANY, major_ver(xe, guc, bmg, 70, 49, 4)) \
fw_def(LUNARLAKE, GT_TYPE_ANY, major_ver(xe, guc, lnl, 70, 45, 2)) \
fw_def(METEORLAKE, GT_TYPE_ANY, major_ver(i915, guc, mtl, 70, 44, 1)) \
fw_def(DG2, GT_TYPE_ANY, major_ver(i915, guc, dg2, 70, 45, 2)) \
@@ -328,7 +328,7 @@ static void uc_fw_fini(struct drm_device *drm, void *arg)
xe_uc_fw_change_status(uc_fw, XE_UC_FIRMWARE_SELECTED);
}
-static int guc_read_css_info(struct xe_uc_fw *uc_fw, struct uc_css_header *css)
+static int guc_read_css_info(struct xe_uc_fw *uc_fw, struct uc_css_guc_info *guc_info)
{
struct xe_gt *gt = uc_fw_to_gt(uc_fw);
struct xe_uc_fw_version *release = &uc_fw->versions.found[XE_UC_FW_VER_RELEASE];
@@ -343,11 +343,12 @@ static int guc_read_css_info(struct xe_uc_fw *uc_fw, struct uc_css_header *css)
return -EINVAL;
}
- compatibility->major = FIELD_GET(CSS_SW_VERSION_UC_MAJOR, css->submission_version);
- compatibility->minor = FIELD_GET(CSS_SW_VERSION_UC_MINOR, css->submission_version);
- compatibility->patch = FIELD_GET(CSS_SW_VERSION_UC_PATCH, css->submission_version);
+ compatibility->major = FIELD_GET(CSS_SW_VERSION_UC_MAJOR, guc_info->submission_version);
+ compatibility->minor = FIELD_GET(CSS_SW_VERSION_UC_MINOR, guc_info->submission_version);
+ compatibility->patch = FIELD_GET(CSS_SW_VERSION_UC_PATCH, guc_info->submission_version);
- uc_fw->private_data_size = css->private_data_size;
+ uc_fw->build_type = FIELD_GET(CSS_UKERNEL_INFO_BUILDTYPE, guc_info->ukernel_info);
+ uc_fw->private_data_size = guc_info->private_data_size;
return 0;
}
@@ -416,8 +417,8 @@ static int parse_css_header(struct xe_uc_fw *uc_fw, const void *fw_data, size_t
css = (struct uc_css_header *)fw_data;
/* Check integrity of size values inside CSS header */
- size = (css->header_size_dw - css->key_size_dw - css->modulus_size_dw -
- css->exponent_size_dw) * sizeof(u32);
+ size = (css->header_size_dw - css->rsa_info.key_size_dw - css->rsa_info.modulus_size_dw -
+ css->rsa_info.exponent_size_dw) * sizeof(u32);
if (unlikely(size != sizeof(struct uc_css_header))) {
drm_warn(&xe->drm,
"%s firmware %s: unexpected header size: %zu != %zu\n",
@@ -430,7 +431,7 @@ static int parse_css_header(struct xe_uc_fw *uc_fw, const void *fw_data, size_t
uc_fw->ucode_size = (css->size_dw - css->header_size_dw) * sizeof(u32);
/* now RSA */
- uc_fw->rsa_size = css->key_size_dw * sizeof(u32);
+ uc_fw->rsa_size = css->rsa_info.key_size_dw * sizeof(u32);
/* At least, it should have header, uCode and RSA. Size of all three. */
size = sizeof(struct uc_css_header) + uc_fw->ucode_size +
@@ -443,12 +444,12 @@ static int parse_css_header(struct xe_uc_fw *uc_fw, const void *fw_data, size_t
}
/* Get version numbers from the CSS header */
- release->major = FIELD_GET(CSS_SW_VERSION_UC_MAJOR, css->sw_version);
- release->minor = FIELD_GET(CSS_SW_VERSION_UC_MINOR, css->sw_version);
- release->patch = FIELD_GET(CSS_SW_VERSION_UC_PATCH, css->sw_version);
+ release->major = FIELD_GET(CSS_SW_VERSION_UC_MAJOR, css->guc_info.sw_version);
+ release->minor = FIELD_GET(CSS_SW_VERSION_UC_MINOR, css->guc_info.sw_version);
+ release->patch = FIELD_GET(CSS_SW_VERSION_UC_PATCH, css->guc_info.sw_version);
if (uc_fw->type == XE_UC_FW_TYPE_GUC)
- return guc_read_css_info(uc_fw, css);
+ return guc_read_css_info(uc_fw, &css->guc_info);
return 0;
}
diff --git a/drivers/gpu/drm/xe/xe_uc_fw_abi.h b/drivers/gpu/drm/xe/xe_uc_fw_abi.h
index 87ade41209d0..3c9a63d13032 100644
--- a/drivers/gpu/drm/xe/xe_uc_fw_abi.h
+++ b/drivers/gpu/drm/xe/xe_uc_fw_abi.h
@@ -44,6 +44,39 @@
* in fw. So driver will load a truncated firmware in this case.
*/
+struct uc_css_rsa_info {
+ u32 key_size_dw;
+ u32 modulus_size_dw;
+ u32 exponent_size_dw;
+} __packed;
+
+struct uc_css_guc_info {
+ u32 time;
+#define CSS_TIME_HOUR (0xFF << 0)
+#define CSS_TIME_MIN (0xFF << 8)
+#define CSS_TIME_SEC (0xFFFF << 16)
+ u32 reserved0[5];
+ u32 sw_version;
+#define CSS_SW_VERSION_UC_MAJOR (0xFF << 16)
+#define CSS_SW_VERSION_UC_MINOR (0xFF << 8)
+#define CSS_SW_VERSION_UC_PATCH (0xFF << 0)
+ u32 submission_version;
+ u32 reserved1[11];
+ u32 header_info;
+#define CSS_HEADER_INFO_SVN (0xFF)
+#define CSS_HEADER_INFO_COPY_VALID (0x1 << 31)
+ u32 private_data_size;
+ u32 ukernel_info;
+#define CSS_UKERNEL_INFO_DEVICEID (0xFFFF << 16)
+#define CSS_UKERNEL_INFO_PRODKEY (0xFF << 8)
+#define CSS_UKERNEL_INFO_BUILDTYPE (0x3 << 2)
+#define CSS_UKERNEL_INFO_BUILDTYPE_PROD 0
+#define CSS_UKERNEL_INFO_BUILDTYPE_PREPROD 1
+#define CSS_UKERNEL_INFO_BUILDTYPE_DEBUG 2
+#define CSS_UKERNEL_INFO_ENCSTATUS (0x1 << 1)
+#define CSS_UKERNEL_INFO_COPY_VALID (0x1 << 0)
+} __packed;
+
struct uc_css_header {
u32 module_type;
/*
@@ -52,36 +85,21 @@ struct uc_css_header {
*/
u32 header_size_dw;
u32 header_version;
- u32 module_id;
+ u32 reserved0;
u32 module_vendor;
u32 date;
-#define CSS_DATE_DAY (0xFF << 0)
-#define CSS_DATE_MONTH (0xFF << 8)
-#define CSS_DATE_YEAR (0xFFFF << 16)
+#define CSS_DATE_DAY (0xFF << 0)
+#define CSS_DATE_MONTH (0xFF << 8)
+#define CSS_DATE_YEAR (0xFFFF << 16)
u32 size_dw; /* uCode plus header_size_dw */
- u32 key_size_dw;
- u32 modulus_size_dw;
- u32 exponent_size_dw;
- u32 time;
-#define CSS_TIME_HOUR (0xFF << 0)
-#define CSS_DATE_MIN (0xFF << 8)
-#define CSS_DATE_SEC (0xFFFF << 16)
- char username[8];
- char buildnumber[12];
- u32 sw_version;
-#define CSS_SW_VERSION_UC_MAJOR (0xFF << 16)
-#define CSS_SW_VERSION_UC_MINOR (0xFF << 8)
-#define CSS_SW_VERSION_UC_PATCH (0xFF << 0)
union {
- u32 submission_version; /* only applies to GuC */
- u32 reserved2;
+ u32 reserved1[3];
+ struct uc_css_rsa_info rsa_info;
};
- u32 reserved0[12];
union {
- u32 private_data_size; /* only applies to GuC */
- u32 reserved1;
+ u32 reserved2[22];
+ struct uc_css_guc_info guc_info;
};
- u32 header_info;
} __packed;
static_assert(sizeof(struct uc_css_header) == 128);
@@ -318,4 +336,70 @@ struct gsc_manifest_header {
u32 exponent_size; /* in dwords */
} __packed;
+/**
+ * DOC: Late binding Firmware Layout
+ *
+ * The Late binding binary starts with FPT header, which contains locations
+ * of various partitions of the binary. Here we're interested in finding out
+ * manifest version. To the manifest version, we need to locate CPD header
+ * one of the entry in CPD header points to manifest header. Manifest header
+ * contains the version.
+ *
+ * +================================================+
+ * | FPT Header |
+ * +================================================+
+ * | FPT entries[] |
+ * | entry1 |
+ * | ... |
+ * | entryX |
+ * | "LTES" |
+ * | ... |
+ * | offset >-----------------------------|------o
+ * +================================================+ |
+ * |
+ * +================================================+ |
+ * | CPD Header |<-----o
+ * +================================================+
+ * | CPD entries[] |
+ * | entry1 |
+ * | ... |
+ * | entryX |
+ * | "LTES.man" |
+ * | ... |
+ * | offset >----------------------------|------o
+ * +================================================+ |
+ * |
+ * +================================================+ |
+ * | Manifest Header |<-----o
+ * | ... |
+ * | FW version |
+ * | ... |
+ * +================================================+
+ */
+
+/* FPT Headers */
+struct csc_fpt_header {
+ u32 header_marker;
+#define CSC_FPT_HEADER_MARKER 0x54504624
+ u32 num_of_entries;
+ u8 header_version;
+ u8 entry_version;
+ u8 header_length; /* in bytes */
+ u8 flags;
+ u16 ticks_to_add;
+ u16 tokens_to_add;
+ u32 uma_size;
+ u32 crc32;
+ struct gsc_version fitc_version;
+} __packed;
+
+struct csc_fpt_entry {
+ u8 name[4]; /* partition name */
+ u32 reserved1;
+ u32 offset; /* offset from beginning of CSE region */
+ u32 length; /* partition length in bytes */
+ u32 reserved2[3];
+ u32 partition_flags;
+} __packed;
+
#endif
diff --git a/drivers/gpu/drm/xe/xe_uc_fw_types.h b/drivers/gpu/drm/xe/xe_uc_fw_types.h
index 914026015019..77a1dcf8b4ed 100644
--- a/drivers/gpu/drm/xe/xe_uc_fw_types.h
+++ b/drivers/gpu/drm/xe/xe_uc_fw_types.h
@@ -147,6 +147,9 @@ struct xe_uc_fw {
/** @private_data_size: size of private data found in uC css header */
u32 private_data_size;
+
+ /** @build_type: Firmware build type (see CSS_UKERNEL_INFO_BUILDTYPE for definitions) */
+ u32 build_type;
};
#endif
diff --git a/drivers/gpu/drm/xe/xe_userptr.c b/drivers/gpu/drm/xe/xe_userptr.c
new file mode 100644
index 000000000000..f16e92cd8090
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_userptr.c
@@ -0,0 +1,320 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2025 Intel Corporation
+ */
+
+#include "xe_userptr.h"
+
+#include <linux/mm.h>
+
+#include "xe_trace_bo.h"
+
+/**
+ * xe_vma_userptr_check_repin() - Advisory check for repin needed
+ * @uvma: The userptr vma
+ *
+ * Check if the userptr vma has been invalidated since last successful
+ * repin. The check is advisory only and can the function can be called
+ * without the vm->svm.gpusvm.notifier_lock held. There is no guarantee that the
+ * vma userptr will remain valid after a lockless check, so typically
+ * the call needs to be followed by a proper check under the notifier_lock.
+ *
+ * Return: 0 if userptr vma is valid, -EAGAIN otherwise; repin recommended.
+ */
+int xe_vma_userptr_check_repin(struct xe_userptr_vma *uvma)
+{
+ return mmu_interval_check_retry(&uvma->userptr.notifier,
+ uvma->userptr.pages.notifier_seq) ?
+ -EAGAIN : 0;
+}
+
+/**
+ * __xe_vm_userptr_needs_repin() - Check whether the VM does have userptrs
+ * that need repinning.
+ * @vm: The VM.
+ *
+ * This function checks for whether the VM has userptrs that need repinning,
+ * and provides a release-type barrier on the svm.gpusvm.notifier_lock after
+ * checking.
+ *
+ * Return: 0 if there are no userptrs needing repinning, -EAGAIN if there are.
+ */
+int __xe_vm_userptr_needs_repin(struct xe_vm *vm)
+{
+ lockdep_assert_held_read(&vm->svm.gpusvm.notifier_lock);
+
+ return (list_empty(&vm->userptr.repin_list) &&
+ list_empty(&vm->userptr.invalidated)) ? 0 : -EAGAIN;
+}
+
+int xe_vma_userptr_pin_pages(struct xe_userptr_vma *uvma)
+{
+ struct xe_vma *vma = &uvma->vma;
+ struct xe_vm *vm = xe_vma_vm(vma);
+ struct xe_device *xe = vm->xe;
+ struct drm_gpusvm_ctx ctx = {
+ .read_only = xe_vma_read_only(vma),
+ .device_private_page_owner = NULL,
+ };
+
+ lockdep_assert_held(&vm->lock);
+ xe_assert(xe, xe_vma_is_userptr(vma));
+
+ if (vma->gpuva.flags & XE_VMA_DESTROYED)
+ return 0;
+
+ return drm_gpusvm_get_pages(&vm->svm.gpusvm, &uvma->userptr.pages,
+ uvma->userptr.notifier.mm,
+ &uvma->userptr.notifier,
+ xe_vma_userptr(vma),
+ xe_vma_userptr(vma) + xe_vma_size(vma),
+ &ctx);
+}
+
+static void __vma_userptr_invalidate(struct xe_vm *vm, struct xe_userptr_vma *uvma)
+{
+ struct xe_userptr *userptr = &uvma->userptr;
+ struct xe_vma *vma = &uvma->vma;
+ struct dma_resv_iter cursor;
+ struct dma_fence *fence;
+ struct drm_gpusvm_ctx ctx = {
+ .in_notifier = true,
+ .read_only = xe_vma_read_only(vma),
+ };
+ long err;
+
+ /*
+ * Tell exec and rebind worker they need to repin and rebind this
+ * userptr.
+ */
+ if (!xe_vm_in_fault_mode(vm) &&
+ !(vma->gpuva.flags & XE_VMA_DESTROYED)) {
+ spin_lock(&vm->userptr.invalidated_lock);
+ list_move_tail(&userptr->invalidate_link,
+ &vm->userptr.invalidated);
+ spin_unlock(&vm->userptr.invalidated_lock);
+ }
+
+ /*
+ * Preempt fences turn into schedule disables, pipeline these.
+ * Note that even in fault mode, we need to wait for binds and
+ * unbinds to complete, and those are attached as BOOKMARK fences
+ * to the vm.
+ */
+ dma_resv_iter_begin(&cursor, xe_vm_resv(vm),
+ DMA_RESV_USAGE_BOOKKEEP);
+ dma_resv_for_each_fence_unlocked(&cursor, fence)
+ dma_fence_enable_sw_signaling(fence);
+ dma_resv_iter_end(&cursor);
+
+ err = dma_resv_wait_timeout(xe_vm_resv(vm),
+ DMA_RESV_USAGE_BOOKKEEP,
+ false, MAX_SCHEDULE_TIMEOUT);
+ XE_WARN_ON(err <= 0);
+
+ if (xe_vm_in_fault_mode(vm) && userptr->initial_bind) {
+ err = xe_vm_invalidate_vma(vma);
+ XE_WARN_ON(err);
+ }
+
+ drm_gpusvm_unmap_pages(&vm->svm.gpusvm, &uvma->userptr.pages,
+ xe_vma_size(vma) >> PAGE_SHIFT, &ctx);
+}
+
+static bool vma_userptr_invalidate(struct mmu_interval_notifier *mni,
+ const struct mmu_notifier_range *range,
+ unsigned long cur_seq)
+{
+ struct xe_userptr_vma *uvma = container_of(mni, typeof(*uvma), userptr.notifier);
+ struct xe_vma *vma = &uvma->vma;
+ struct xe_vm *vm = xe_vma_vm(vma);
+
+ xe_assert(vm->xe, xe_vma_is_userptr(vma));
+ trace_xe_vma_userptr_invalidate(vma);
+
+ if (!mmu_notifier_range_blockable(range))
+ return false;
+
+ vm_dbg(&xe_vma_vm(vma)->xe->drm,
+ "NOTIFIER: addr=0x%016llx, range=0x%016llx",
+ xe_vma_start(vma), xe_vma_size(vma));
+
+ down_write(&vm->svm.gpusvm.notifier_lock);
+ mmu_interval_set_seq(mni, cur_seq);
+
+ __vma_userptr_invalidate(vm, uvma);
+ up_write(&vm->svm.gpusvm.notifier_lock);
+ trace_xe_vma_userptr_invalidate_complete(vma);
+
+ return true;
+}
+
+static const struct mmu_interval_notifier_ops vma_userptr_notifier_ops = {
+ .invalidate = vma_userptr_invalidate,
+};
+
+#if IS_ENABLED(CONFIG_DRM_XE_USERPTR_INVAL_INJECT)
+/**
+ * xe_vma_userptr_force_invalidate() - force invalidate a userptr
+ * @uvma: The userptr vma to invalidate
+ *
+ * Perform a forced userptr invalidation for testing purposes.
+ */
+void xe_vma_userptr_force_invalidate(struct xe_userptr_vma *uvma)
+{
+ struct xe_vm *vm = xe_vma_vm(&uvma->vma);
+
+ /* Protect against concurrent userptr pinning */
+ lockdep_assert_held(&vm->lock);
+ /* Protect against concurrent notifiers */
+ lockdep_assert_held(&vm->svm.gpusvm.notifier_lock);
+ /*
+ * Protect against concurrent instances of this function and
+ * the critical exec sections
+ */
+ xe_vm_assert_held(vm);
+
+ if (!mmu_interval_read_retry(&uvma->userptr.notifier,
+ uvma->userptr.pages.notifier_seq))
+ uvma->userptr.pages.notifier_seq -= 2;
+ __vma_userptr_invalidate(vm, uvma);
+}
+#endif
+
+int xe_vm_userptr_pin(struct xe_vm *vm)
+{
+ struct xe_userptr_vma *uvma, *next;
+ int err = 0;
+
+ xe_assert(vm->xe, !xe_vm_in_fault_mode(vm));
+ lockdep_assert_held_write(&vm->lock);
+
+ /* Collect invalidated userptrs */
+ spin_lock(&vm->userptr.invalidated_lock);
+ xe_assert(vm->xe, list_empty(&vm->userptr.repin_list));
+ list_for_each_entry_safe(uvma, next, &vm->userptr.invalidated,
+ userptr.invalidate_link) {
+ list_del_init(&uvma->userptr.invalidate_link);
+ list_add_tail(&uvma->userptr.repin_link,
+ &vm->userptr.repin_list);
+ }
+ spin_unlock(&vm->userptr.invalidated_lock);
+
+ /* Pin and move to bind list */
+ list_for_each_entry_safe(uvma, next, &vm->userptr.repin_list,
+ userptr.repin_link) {
+ err = xe_vma_userptr_pin_pages(uvma);
+ if (err == -EFAULT) {
+ list_del_init(&uvma->userptr.repin_link);
+ /*
+ * We might have already done the pin once already, but
+ * then had to retry before the re-bind happened, due
+ * some other condition in the caller, but in the
+ * meantime the userptr got dinged by the notifier such
+ * that we need to revalidate here, but this time we hit
+ * the EFAULT. In such a case make sure we remove
+ * ourselves from the rebind list to avoid going down in
+ * flames.
+ */
+ if (!list_empty(&uvma->vma.combined_links.rebind))
+ list_del_init(&uvma->vma.combined_links.rebind);
+
+ /* Wait for pending binds */
+ xe_vm_lock(vm, false);
+ dma_resv_wait_timeout(xe_vm_resv(vm),
+ DMA_RESV_USAGE_BOOKKEEP,
+ false, MAX_SCHEDULE_TIMEOUT);
+
+ down_read(&vm->svm.gpusvm.notifier_lock);
+ err = xe_vm_invalidate_vma(&uvma->vma);
+ up_read(&vm->svm.gpusvm.notifier_lock);
+ xe_vm_unlock(vm);
+ if (err)
+ break;
+ } else {
+ if (err)
+ break;
+
+ list_del_init(&uvma->userptr.repin_link);
+ list_move_tail(&uvma->vma.combined_links.rebind,
+ &vm->rebind_list);
+ }
+ }
+
+ if (err) {
+ down_write(&vm->svm.gpusvm.notifier_lock);
+ spin_lock(&vm->userptr.invalidated_lock);
+ list_for_each_entry_safe(uvma, next, &vm->userptr.repin_list,
+ userptr.repin_link) {
+ list_del_init(&uvma->userptr.repin_link);
+ list_move_tail(&uvma->userptr.invalidate_link,
+ &vm->userptr.invalidated);
+ }
+ spin_unlock(&vm->userptr.invalidated_lock);
+ up_write(&vm->svm.gpusvm.notifier_lock);
+ }
+ return err;
+}
+
+/**
+ * xe_vm_userptr_check_repin() - Check whether the VM might have userptrs
+ * that need repinning.
+ * @vm: The VM.
+ *
+ * This function does an advisory check for whether the VM has userptrs that
+ * need repinning.
+ *
+ * Return: 0 if there are no indications of userptrs needing repinning,
+ * -EAGAIN if there are.
+ */
+int xe_vm_userptr_check_repin(struct xe_vm *vm)
+{
+ return (list_empty_careful(&vm->userptr.repin_list) &&
+ list_empty_careful(&vm->userptr.invalidated)) ? 0 : -EAGAIN;
+}
+
+int xe_userptr_setup(struct xe_userptr_vma *uvma, unsigned long start,
+ unsigned long range)
+{
+ struct xe_userptr *userptr = &uvma->userptr;
+ int err;
+
+ INIT_LIST_HEAD(&userptr->invalidate_link);
+ INIT_LIST_HEAD(&userptr->repin_link);
+
+ err = mmu_interval_notifier_insert(&userptr->notifier, current->mm,
+ start, range,
+ &vma_userptr_notifier_ops);
+ if (err)
+ return err;
+
+ userptr->pages.notifier_seq = LONG_MAX;
+
+ return 0;
+}
+
+void xe_userptr_remove(struct xe_userptr_vma *uvma)
+{
+ struct xe_vm *vm = xe_vma_vm(&uvma->vma);
+ struct xe_userptr *userptr = &uvma->userptr;
+
+ drm_gpusvm_free_pages(&vm->svm.gpusvm, &uvma->userptr.pages,
+ xe_vma_size(&uvma->vma) >> PAGE_SHIFT);
+
+ /*
+ * Since userptr pages are not pinned, we can't remove
+ * the notifier until we're sure the GPU is not accessing
+ * them anymore
+ */
+ mmu_interval_notifier_remove(&userptr->notifier);
+}
+
+void xe_userptr_destroy(struct xe_userptr_vma *uvma)
+{
+ struct xe_vm *vm = xe_vma_vm(&uvma->vma);
+
+ spin_lock(&vm->userptr.invalidated_lock);
+ xe_assert(vm->xe, list_empty(&uvma->userptr.repin_link));
+ list_del(&uvma->userptr.invalidate_link);
+ spin_unlock(&vm->userptr.invalidated_lock);
+}
diff --git a/drivers/gpu/drm/xe/xe_userptr.h b/drivers/gpu/drm/xe/xe_userptr.h
new file mode 100644
index 000000000000..ef801234991e
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_userptr.h
@@ -0,0 +1,107 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2025 Intel Corporation
+ */
+
+#ifndef _XE_USERPTR_H_
+#define _XE_USERPTR_H_
+
+#include <linux/list.h>
+#include <linux/mutex.h>
+#include <linux/notifier.h>
+#include <linux/scatterlist.h>
+#include <linux/spinlock.h>
+
+#include <drm/drm_gpusvm.h>
+
+struct xe_vm;
+struct xe_vma;
+struct xe_userptr_vma;
+
+/** struct xe_userptr_vm - User pointer VM level state */
+struct xe_userptr_vm {
+ /**
+ * @userptr.repin_list: list of VMAs which are user pointers,
+ * and needs repinning. Protected by @lock.
+ */
+ struct list_head repin_list;
+ /**
+ * @userptr.invalidated_lock: Protects the
+ * @userptr.invalidated list.
+ */
+ spinlock_t invalidated_lock;
+ /**
+ * @userptr.invalidated: List of invalidated userptrs, not yet
+ * picked
+ * up for revalidation. Protected from access with the
+ * @invalidated_lock. Removing items from the list
+ * additionally requires @lock in write mode, and adding
+ * items to the list requires either the @svm.gpusvm.notifier_lock in
+ * write mode, OR @lock in write mode.
+ */
+ struct list_head invalidated;
+};
+
+/** struct xe_userptr - User pointer */
+struct xe_userptr {
+ /** @invalidate_link: Link for the vm::userptr.invalidated list */
+ struct list_head invalidate_link;
+ /** @userptr: link into VM repin list if userptr. */
+ struct list_head repin_link;
+ /**
+ * @pages: gpusvm pages for this user pointer.
+ */
+ struct drm_gpusvm_pages pages;
+ /**
+ * @notifier: MMU notifier for user pointer (invalidation call back)
+ */
+ struct mmu_interval_notifier notifier;
+
+ /**
+ * @initial_bind: user pointer has been bound at least once.
+ * write: vm->svm.gpusvm.notifier_lock in read mode and vm->resv held.
+ * read: vm->svm.gpusvm.notifier_lock in write mode or vm->resv held.
+ */
+ bool initial_bind;
+#if IS_ENABLED(CONFIG_DRM_XE_USERPTR_INVAL_INJECT)
+ u32 divisor;
+#endif
+};
+
+#if IS_ENABLED(CONFIG_DRM_GPUSVM)
+void xe_userptr_remove(struct xe_userptr_vma *uvma);
+int xe_userptr_setup(struct xe_userptr_vma *uvma, unsigned long start,
+ unsigned long range);
+void xe_userptr_destroy(struct xe_userptr_vma *uvma);
+
+int xe_vm_userptr_pin(struct xe_vm *vm);
+int __xe_vm_userptr_needs_repin(struct xe_vm *vm);
+int xe_vm_userptr_check_repin(struct xe_vm *vm);
+int xe_vma_userptr_pin_pages(struct xe_userptr_vma *uvma);
+int xe_vma_userptr_check_repin(struct xe_userptr_vma *uvma);
+#else
+static inline void xe_userptr_remove(struct xe_userptr_vma *uvma) {}
+
+static inline int xe_userptr_setup(struct xe_userptr_vma *uvma,
+ unsigned long start, unsigned long range)
+{
+ return -ENODEV;
+}
+
+static inline void xe_userptr_destroy(struct xe_userptr_vma *uvma) {}
+
+static inline int xe_vm_userptr_pin(struct xe_vm *vm) { return 0; }
+static inline int __xe_vm_userptr_needs_repin(struct xe_vm *vm) { return 0; }
+static inline int xe_vm_userptr_check_repin(struct xe_vm *vm) { return 0; }
+static inline int xe_vma_userptr_pin_pages(struct xe_userptr_vma *uvma) { return -ENODEV; }
+static inline int xe_vma_userptr_check_repin(struct xe_userptr_vma *uvma) { return -ENODEV; };
+#endif
+
+#if IS_ENABLED(CONFIG_DRM_XE_USERPTR_INVAL_INJECT)
+void xe_vma_userptr_force_invalidate(struct xe_userptr_vma *uvma);
+#else
+static inline void xe_vma_userptr_force_invalidate(struct xe_userptr_vma *uvma)
+{
+}
+#endif
+#endif
diff --git a/drivers/gpu/drm/xe/xe_validation.c b/drivers/gpu/drm/xe/xe_validation.c
new file mode 100644
index 000000000000..826cd09966ef
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_validation.c
@@ -0,0 +1,278 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2024 Intel Corporation
+ */
+#include "xe_bo.h"
+#include <drm/drm_exec.h>
+#include <drm/drm_gem.h>
+#include <drm/drm_gpuvm.h>
+
+#include "xe_assert.h"
+#include "xe_validation.h"
+
+#ifdef CONFIG_DRM_XE_DEBUG
+/**
+ * xe_validation_assert_exec() - Assert that the drm_exec pointer is suitable
+ * for validation.
+ * @xe: Pointer to the xe device.
+ * @exec: The drm_exec pointer to check.
+ * @obj: Pointer to the object subject to validation.
+ *
+ * NULL exec pointers are not allowed.
+ * For XE_VALIDATION_UNIMPLEMENTED, no checking.
+ * For XE_VLIDATION_OPT_OUT, check that the caller is a kunit test
+ * For XE_VALIDATION_UNSUPPORTED, check that the object subject to
+ * validation is a dma-buf, for which support for ww locking is
+ * not in place in the dma-buf layer.
+ */
+void xe_validation_assert_exec(const struct xe_device *xe,
+ const struct drm_exec *exec,
+ const struct drm_gem_object *obj)
+{
+ xe_assert(xe, exec);
+ if (IS_ERR(exec)) {
+ switch (PTR_ERR(exec)) {
+ case __XE_VAL_UNIMPLEMENTED:
+ break;
+ case __XE_VAL_UNSUPPORTED:
+ xe_assert(xe, !!obj->dma_buf);
+ break;
+#if IS_ENABLED(CONFIG_KUNIT)
+ case __XE_VAL_OPT_OUT:
+ xe_assert(xe, current->kunit_test);
+ break;
+#endif
+ default:
+ xe_assert(xe, false);
+ }
+ }
+}
+#endif
+
+static int xe_validation_lock(struct xe_validation_ctx *ctx)
+{
+ struct xe_validation_device *val = ctx->val;
+ int ret = 0;
+
+ if (ctx->val_flags.interruptible) {
+ if (ctx->request_exclusive)
+ ret = down_write_killable(&val->lock);
+ else
+ ret = down_read_interruptible(&val->lock);
+ } else {
+ if (ctx->request_exclusive)
+ down_write(&val->lock);
+ else
+ down_read(&val->lock);
+ }
+
+ if (!ret) {
+ ctx->lock_held = true;
+ ctx->lock_held_exclusive = ctx->request_exclusive;
+ }
+
+ return ret;
+}
+
+static int xe_validation_trylock(struct xe_validation_ctx *ctx)
+{
+ struct xe_validation_device *val = ctx->val;
+ bool locked;
+
+ if (ctx->request_exclusive)
+ locked = down_write_trylock(&val->lock);
+ else
+ locked = down_read_trylock(&val->lock);
+
+ if (locked) {
+ ctx->lock_held = true;
+ ctx->lock_held_exclusive = ctx->request_exclusive;
+ }
+
+ return locked ? 0 : -EWOULDBLOCK;
+}
+
+static void xe_validation_unlock(struct xe_validation_ctx *ctx)
+{
+ if (!ctx->lock_held)
+ return;
+
+ if (ctx->lock_held_exclusive)
+ up_write(&ctx->val->lock);
+ else
+ up_read(&ctx->val->lock);
+
+ ctx->lock_held = false;
+}
+
+/**
+ * xe_validation_ctx_init() - Initialize an xe_validation_ctx
+ * @ctx: The xe_validation_ctx to initialize.
+ * @val: The xe_validation_device representing the validation domain.
+ * @exec: The struct drm_exec to use for the transaction. May be NULL.
+ * @flags: The flags to use for initialization.
+ *
+ * Initialize and lock a an xe_validation transaction using the validation domain
+ * represented by @val. Also initialize the drm_exec object forwarding parts of
+ * @flags to the drm_exec initialization. The @flags.exclusive flag should
+ * typically be set to false to avoid locking out other validators from the
+ * domain until an OOM is hit. For testing- or final attempt purposes it can,
+ * however, be set to true.
+ *
+ * Return: %0 on success, %-EINTR if interruptible initial locking failed with a
+ * signal pending. If @flags.no_block is set to true, a failed trylock
+ * returns %-EWOULDBLOCK.
+ */
+int xe_validation_ctx_init(struct xe_validation_ctx *ctx, struct xe_validation_device *val,
+ struct drm_exec *exec, const struct xe_val_flags flags)
+{
+ int ret;
+
+ ctx->exec = exec;
+ ctx->val = val;
+ ctx->lock_held = false;
+ ctx->lock_held_exclusive = false;
+ ctx->request_exclusive = flags.exclusive;
+ ctx->val_flags = flags;
+ ctx->exec_flags = 0;
+ ctx->nr = 0;
+
+ if (flags.no_block)
+ ret = xe_validation_trylock(ctx);
+ else
+ ret = xe_validation_lock(ctx);
+ if (ret)
+ return ret;
+
+ if (exec) {
+ if (flags.interruptible)
+ ctx->exec_flags |= DRM_EXEC_INTERRUPTIBLE_WAIT;
+ if (flags.exec_ignore_duplicates)
+ ctx->exec_flags |= DRM_EXEC_IGNORE_DUPLICATES;
+ drm_exec_init(exec, ctx->exec_flags, ctx->nr);
+ }
+
+ return 0;
+}
+
+#ifdef CONFIG_DEBUG_WW_MUTEX_SLOWPATH
+/*
+ * This abuses both drm_exec and ww_mutex internals and should be
+ * replaced by checking for -EDEADLK when we can make TTM
+ * stop converting -EDEADLK to -ENOMEM.
+ * An alternative is to not have exhaustive eviction with
+ * CONFIG_DEBUG_WW_MUTEX_SLOWPATH until that happens.
+ */
+static bool xe_validation_contention_injected(struct drm_exec *exec)
+{
+ return !!exec->ticket.contending_lock;
+}
+
+#else
+
+static bool xe_validation_contention_injected(struct drm_exec *exec)
+{
+ return false;
+}
+
+#endif
+
+static bool __xe_validation_should_retry(struct xe_validation_ctx *ctx, int ret)
+{
+ if (ret == -ENOMEM &&
+ ((ctx->request_exclusive &&
+ xe_validation_contention_injected(ctx->exec)) ||
+ !ctx->request_exclusive)) {
+ ctx->request_exclusive = true;
+ return true;
+ }
+
+ return false;
+}
+
+/**
+ * xe_validation_exec_lock() - Perform drm_gpuvm_exec_lock within a validation
+ * transaction.
+ * @ctx: An uninitialized xe_validation_ctx.
+ * @vm_exec: An initialized struct vm_exec.
+ * @val: The validation domain.
+ *
+ * The drm_gpuvm_exec_lock() function internally initializes its drm_exec
+ * transaction and therefore doesn't lend itself very well to be using
+ * xe_validation_ctx_init(). Provide a helper that takes an uninitialized
+ * xe_validation_ctx and calls drm_gpuvm_exec_lock() with OOM retry.
+ *
+ * Return: %0 on success, negative error code on failure.
+ */
+int xe_validation_exec_lock(struct xe_validation_ctx *ctx,
+ struct drm_gpuvm_exec *vm_exec,
+ struct xe_validation_device *val)
+{
+ int ret;
+
+ memset(ctx, 0, sizeof(*ctx));
+ ctx->exec = &vm_exec->exec;
+ ctx->exec_flags = vm_exec->flags;
+ ctx->val = val;
+ if (ctx->exec_flags & DRM_EXEC_INTERRUPTIBLE_WAIT)
+ ctx->val_flags.interruptible = 1;
+ if (ctx->exec_flags & DRM_EXEC_IGNORE_DUPLICATES)
+ ctx->val_flags.exec_ignore_duplicates = 1;
+retry:
+ ret = xe_validation_lock(ctx);
+ if (ret)
+ return ret;
+
+ ret = drm_gpuvm_exec_lock(vm_exec);
+ if (ret) {
+ xe_validation_unlock(ctx);
+ if (__xe_validation_should_retry(ctx, ret))
+ goto retry;
+ }
+
+ return ret;
+}
+
+/**
+ * xe_validation_ctx_fini() - Finalize a validation transaction
+ * @ctx: The Validation transaction to finalize.
+ *
+ * Finalize a validation transaction and its related drm_exec transaction.
+ */
+void xe_validation_ctx_fini(struct xe_validation_ctx *ctx)
+{
+ if (ctx->exec)
+ drm_exec_fini(ctx->exec);
+ xe_validation_unlock(ctx);
+}
+
+/**
+ * xe_validation_should_retry() - Determine if a validation transaction should retry
+ * @ctx: The validation transaction.
+ * @ret: Pointer to a return value variable.
+ *
+ * Determines whether a validation transaction should retry based on the
+ * internal transaction state and the return value pointed to by @ret.
+ * If a validation should be retried, the transaction is prepared for that,
+ * and the validation locked might be re-locked in exclusive mode, and *@ret
+ * is set to %0. If the re-locking errors, typically due to interruptible
+ * locking with signal pending, *@ret is instead set to -EINTR and the
+ * function returns %false.
+ *
+ * Return: %true if validation should be retried, %false otherwise.
+ */
+bool xe_validation_should_retry(struct xe_validation_ctx *ctx, int *ret)
+{
+ if (__xe_validation_should_retry(ctx, *ret)) {
+ drm_exec_fini(ctx->exec);
+ *ret = 0;
+ if (ctx->request_exclusive != ctx->lock_held_exclusive) {
+ xe_validation_unlock(ctx);
+ *ret = xe_validation_lock(ctx);
+ }
+ drm_exec_init(ctx->exec, ctx->exec_flags, ctx->nr);
+ return !*ret;
+ }
+
+ return false;
+}
diff --git a/drivers/gpu/drm/xe/xe_validation.h b/drivers/gpu/drm/xe/xe_validation.h
new file mode 100644
index 000000000000..fec331d791e7
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_validation.h
@@ -0,0 +1,192 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2024 Intel Corporation
+ */
+#ifndef _XE_VALIDATION_H_
+#define _XE_VALIDATION_H_
+
+#include <linux/dma-resv.h>
+#include <linux/types.h>
+#include <linux/rwsem.h>
+
+struct drm_exec;
+struct drm_gem_object;
+struct drm_gpuvm_exec;
+struct xe_device;
+
+#ifdef CONFIG_PROVE_LOCKING
+/**
+ * xe_validation_lockdep() - Assert that a drm_exec locking transaction can
+ * be initialized at this point.
+ */
+static inline void xe_validation_lockdep(void)
+{
+ struct ww_acquire_ctx ticket;
+
+ ww_acquire_init(&ticket, &reservation_ww_class);
+ ww_acquire_fini(&ticket);
+}
+#else
+static inline void xe_validation_lockdep(void)
+{
+}
+#endif
+
+/*
+ * Various values of the drm_exec pointer where we've not (yet)
+ * implemented full ww locking.
+ *
+ * XE_VALIDATION_UNIMPLEMENTED means implementation is pending.
+ * A lockdep check is made to assure that a drm_exec locking
+ * transaction can actually take place where the macro is
+ * used. If this asserts, the exec pointer needs to be assigned
+ * higher up in the callchain and passed down.
+ *
+ * XE_VALIDATION_UNSUPPORTED is for dma-buf code only where
+ * the dma-buf layer doesn't support WW locking.
+ *
+ * XE_VALIDATION_OPT_OUT is for simplification of kunit tests where
+ * exhaustive eviction isn't necessary.
+ */
+#define __XE_VAL_UNIMPLEMENTED -EINVAL
+#define XE_VALIDATION_UNIMPLEMENTED (xe_validation_lockdep(), \
+ (struct drm_exec *)ERR_PTR(__XE_VAL_UNIMPLEMENTED))
+
+#define __XE_VAL_UNSUPPORTED -EOPNOTSUPP
+#define XE_VALIDATION_UNSUPPORTED ((struct drm_exec *)ERR_PTR(__XE_VAL_UNSUPPORTED))
+
+#define __XE_VAL_OPT_OUT -ENOMEM
+#define XE_VALIDATION_OPT_OUT (xe_validation_lockdep(), \
+ (struct drm_exec *)ERR_PTR(__XE_VAL_OPT_OUT))
+#ifdef CONFIG_DRM_XE_DEBUG
+void xe_validation_assert_exec(const struct xe_device *xe, const struct drm_exec *exec,
+ const struct drm_gem_object *obj);
+#else
+#define xe_validation_assert_exec(_xe, _exec, _obj) \
+ do { \
+ (void)_xe; (void)_exec; (void)_obj; \
+ } while (0)
+#endif
+
+/**
+ * struct xe_validation_device - The domain for exhaustive eviction
+ * @lock: The lock used to exclude other processes from allocating graphics memory
+ *
+ * The struct xe_validation_device represents the domain for which we want to use
+ * exhaustive eviction. The @lock is typically grabbed in read mode for allocations
+ * but when graphics memory allocation fails, it is retried with the write mode held.
+ */
+struct xe_validation_device {
+ struct rw_semaphore lock;
+};
+
+/**
+ * struct xe_val_flags - Flags for xe_validation_ctx_init().
+ * @exclusive: Start the validation transaction by locking out all other validators.
+ * @no_block: Don't block on initialization.
+ * @interruptible: Block interruptible if blocking. Implies initializing the drm_exec
+ * context with the DRM_EXEC_INTERRUPTIBLE_WAIT flag.
+ * @exec_ignore_duplicates: Initialize the drm_exec context with the
+ * DRM_EXEC_IGNORE_DUPLICATES flag.
+ */
+struct xe_val_flags {
+ u32 exclusive :1;
+ u32 no_block :1;
+ u32 interruptible :1;
+ u32 exec_ignore_duplicates :1;
+};
+
+/**
+ * struct xe_validation_ctx - A struct drm_exec subclass with support for
+ * exhaustive eviction
+ * @exec: The drm_exec object base class. Note that we use a pointer instead of
+ * embedding to avoid diamond inheritance.
+ * @val: The exhaustive eviction domain.
+ * @val_flags: Copy of the struct xe_val_flags passed to xe_validation_ctx_init.
+ * @lock_held: Whether The domain lock is currently held.
+ * @lock_held_exclusive: Whether the domain lock is held in exclusive mode.
+ * @request_exclusive: Whether to lock exclusively (write mode) the next time
+ * the domain lock is locked.
+ * @exec_flags: The drm_exec flags used for drm_exec (re-)initialization.
+ * @nr: The drm_exec nr parameter used for drm_exec (re-)initializaiton.
+ */
+struct xe_validation_ctx {
+ struct drm_exec *exec;
+ struct xe_validation_device *val;
+ struct xe_val_flags val_flags;
+ bool lock_held;
+ bool lock_held_exclusive;
+ bool request_exclusive;
+ u32 exec_flags;
+ unsigned int nr;
+};
+
+int xe_validation_ctx_init(struct xe_validation_ctx *ctx, struct xe_validation_device *val,
+ struct drm_exec *exec, const struct xe_val_flags flags);
+
+int xe_validation_exec_lock(struct xe_validation_ctx *ctx, struct drm_gpuvm_exec *vm_exec,
+ struct xe_validation_device *val);
+
+void xe_validation_ctx_fini(struct xe_validation_ctx *ctx);
+
+bool xe_validation_should_retry(struct xe_validation_ctx *ctx, int *ret);
+
+/**
+ * xe_validation_retry_on_oom() - Retry on oom in an xe_validaton transaction
+ * @_ctx: Pointer to the xe_validation_ctx
+ * @_ret: The current error value possibly holding -ENOMEM
+ *
+ * Use this in way similar to drm_exec_retry_on_contention().
+ * If @_ret contains -ENOMEM the tranaction is restarted once in a way that
+ * blocks other transactions and allows exhastive eviction. If the transaction
+ * was already restarted once, Just return the -ENOMEM. May also set
+ * _ret to -EINTR if not retrying and waits are interruptible.
+ * May only be used within a drm_exec_until_all_locked() loop.
+ */
+#define xe_validation_retry_on_oom(_ctx, _ret) \
+ do { \
+ if (xe_validation_should_retry(_ctx, _ret)) \
+ goto *__drm_exec_retry_ptr; \
+ } while (0)
+
+/**
+ * xe_validation_device_init - Initialize a struct xe_validation_device
+ * @val: The xe_validation_device to init.
+ */
+static inline void
+xe_validation_device_init(struct xe_validation_device *val)
+{
+ init_rwsem(&val->lock);
+}
+
+/*
+ * Make guard() and scoped_guard() work with xe_validation_ctx
+ * so that we can exit transactions without caring about the
+ * cleanup.
+ */
+DEFINE_CLASS(xe_validation, struct xe_validation_ctx *,
+ if (_T) xe_validation_ctx_fini(_T);,
+ ({_ret = xe_validation_ctx_init(_ctx, _val, _exec, _flags);
+ _ret ? NULL : _ctx; }),
+ struct xe_validation_ctx *_ctx, struct xe_validation_device *_val,
+ struct drm_exec *_exec, const struct xe_val_flags _flags, int _ret);
+static inline void *class_xe_validation_lock_ptr(class_xe_validation_t *_T)
+{return *_T; }
+#define class_xe_validation_is_conditional true
+
+/**
+ * xe_validation_guard() - An auto-cleanup xe_validation_ctx transaction
+ * @_ctx: The xe_validation_ctx.
+ * @_val: The xe_validation_device.
+ * @_exec: The struct drm_exec object
+ * @_flags: Flags for the xe_validation_ctx initialization.
+ * @_ret: Return in / out parameter. May be set by this macro. Typicall 0 when called.
+ *
+ * This macro is will initiate a drm_exec transaction with additional support for
+ * exhaustive eviction.
+ */
+#define xe_validation_guard(_ctx, _val, _exec, _flags, _ret) \
+ scoped_guard(xe_validation, _ctx, _val, _exec, _flags, _ret) \
+ drm_exec_until_all_locked(_exec)
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
index c00a5ff31817..027e6ce648c5 100644
--- a/drivers/gpu/drm/xe/xe_vm.c
+++ b/drivers/gpu/drm/xe/xe_vm.c
@@ -41,7 +41,6 @@
#include "xe_tlb_inval.h"
#include "xe_trace_bo.h"
#include "xe_wa.h"
-#include "xe_hmm.h"
static struct drm_gem_object *xe_vm_obj(struct xe_vm *vm)
{
@@ -49,34 +48,17 @@ static struct drm_gem_object *xe_vm_obj(struct xe_vm *vm)
}
/**
- * xe_vma_userptr_check_repin() - Advisory check for repin needed
- * @uvma: The userptr vma
+ * xe_vm_drm_exec_lock() - Lock the vm's resv with a drm_exec transaction
+ * @vm: The vm whose resv is to be locked.
+ * @exec: The drm_exec transaction.
*
- * Check if the userptr vma has been invalidated since last successful
- * repin. The check is advisory only and can the function can be called
- * without the vm->userptr.notifier_lock held. There is no guarantee that the
- * vma userptr will remain valid after a lockless check, so typically
- * the call needs to be followed by a proper check under the notifier_lock.
+ * Helper to lock the vm's resv as part of a drm_exec transaction.
*
- * Return: 0 if userptr vma is valid, -EAGAIN otherwise; repin recommended.
+ * Return: %0 on success. See drm_exec_lock_obj() for error codes.
*/
-int xe_vma_userptr_check_repin(struct xe_userptr_vma *uvma)
+int xe_vm_drm_exec_lock(struct xe_vm *vm, struct drm_exec *exec)
{
- return mmu_interval_check_retry(&uvma->userptr.notifier,
- uvma->userptr.notifier_seq) ?
- -EAGAIN : 0;
-}
-
-int xe_vma_userptr_pin_pages(struct xe_userptr_vma *uvma)
-{
- struct xe_vma *vma = &uvma->vma;
- struct xe_vm *vm = xe_vma_vm(vma);
- struct xe_device *xe = vm->xe;
-
- lockdep_assert_held(&vm->lock);
- xe_assert(xe, xe_vma_is_userptr(vma));
-
- return xe_hmm_userptr_populate_range(uvma, false);
+ return drm_exec_lock_obj(exec, xe_vm_obj(vm));
}
static bool preempt_fences_waiting(struct xe_vm *vm)
@@ -228,6 +210,7 @@ int xe_vm_add_compute_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q)
.num_fences = 1,
};
struct drm_exec *exec = &vm_exec.exec;
+ struct xe_validation_ctx ctx;
struct dma_fence *pfence;
int err;
bool wait;
@@ -235,14 +218,14 @@ int xe_vm_add_compute_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q)
xe_assert(vm->xe, xe_vm_in_preempt_fence_mode(vm));
down_write(&vm->lock);
- err = drm_gpuvm_exec_lock(&vm_exec);
+ err = xe_validation_exec_lock(&ctx, &vm_exec, &vm->xe->val);
if (err)
goto out_up_write;
pfence = xe_preempt_fence_create(q, q->lr.context,
++q->lr.seqno);
- if (!pfence) {
- err = -ENOMEM;
+ if (IS_ERR(pfence)) {
+ err = PTR_ERR(pfence);
goto out_fini;
}
@@ -250,7 +233,7 @@ int xe_vm_add_compute_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q)
++vm->preempt.num_exec_queues;
q->lr.pfence = pfence;
- down_read(&vm->userptr.notifier_lock);
+ xe_svm_notifier_lock(vm);
drm_gpuvm_resv_add_fence(&vm->gpuvm, exec, pfence,
DMA_RESV_USAGE_BOOKKEEP, DMA_RESV_USAGE_BOOKKEEP);
@@ -264,10 +247,10 @@ int xe_vm_add_compute_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q)
if (wait)
dma_fence_enable_sw_signaling(pfence);
- up_read(&vm->userptr.notifier_lock);
+ xe_svm_notifier_unlock(vm);
out_fini:
- drm_exec_fini(exec);
+ xe_validation_ctx_fini(&ctx);
out_up_write:
up_write(&vm->lock);
@@ -300,25 +283,6 @@ void xe_vm_remove_compute_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q)
up_write(&vm->lock);
}
-/**
- * __xe_vm_userptr_needs_repin() - Check whether the VM does have userptrs
- * that need repinning.
- * @vm: The VM.
- *
- * This function checks for whether the VM has userptrs that need repinning,
- * and provides a release-type barrier on the userptr.notifier_lock after
- * checking.
- *
- * Return: 0 if there are no userptrs needing repinning, -EAGAIN if there are.
- */
-int __xe_vm_userptr_needs_repin(struct xe_vm *vm)
-{
- lockdep_assert_held_read(&vm->userptr.notifier_lock);
-
- return (list_empty(&vm->userptr.repin_list) &&
- list_empty(&vm->userptr.invalidated)) ? 0 : -EAGAIN;
-}
-
#define XE_VM_REBIND_RETRY_TIMEOUT_MS 1000
/**
@@ -350,39 +314,6 @@ void xe_vm_kill(struct xe_vm *vm, bool unlocked)
/* TODO: Inform user the VM is banned */
}
-/**
- * xe_vm_validate_should_retry() - Whether to retry after a validate error.
- * @exec: The drm_exec object used for locking before validation.
- * @err: The error returned from ttm_bo_validate().
- * @end: A ktime_t cookie that should be set to 0 before first use and
- * that should be reused on subsequent calls.
- *
- * With multiple active VMs, under memory pressure, it is possible that
- * ttm_bo_validate() run into -EDEADLK and in such case returns -ENOMEM.
- * Until ttm properly handles locking in such scenarios, best thing the
- * driver can do is retry with a timeout. Check if that is necessary, and
- * if so unlock the drm_exec's objects while keeping the ticket to prepare
- * for a rerun.
- *
- * Return: true if a retry after drm_exec_init() is recommended;
- * false otherwise.
- */
-bool xe_vm_validate_should_retry(struct drm_exec *exec, int err, ktime_t *end)
-{
- ktime_t cur;
-
- if (err != -ENOMEM)
- return false;
-
- cur = ktime_get();
- *end = *end ? : ktime_add_ms(cur, XE_VM_REBIND_RETRY_TIMEOUT_MS);
- if (!ktime_before(cur, *end))
- return false;
-
- msleep(20);
- return true;
-}
-
static int xe_gpuvm_validate(struct drm_gpuvm_bo *vm_bo, struct drm_exec *exec)
{
struct xe_vm *vm = gpuvm_to_vm(vm_bo->vm);
@@ -397,7 +328,7 @@ static int xe_gpuvm_validate(struct drm_gpuvm_bo *vm_bo, struct drm_exec *exec)
if (!try_wait_for_completion(&vm->xe->pm_block))
return -EAGAIN;
- ret = xe_bo_validate(gem_to_xe_bo(vm_bo->obj), vm, false);
+ ret = xe_bo_validate(gem_to_xe_bo(vm_bo->obj), vm, false, exec);
if (ret)
return ret;
@@ -513,10 +444,10 @@ void xe_vm_resume_rebind_worker(struct xe_vm *vm)
static void preempt_rebind_work_func(struct work_struct *w)
{
struct xe_vm *vm = container_of(w, struct xe_vm, preempt.rebind_work);
+ struct xe_validation_ctx ctx;
struct drm_exec exec;
unsigned int fence_count = 0;
LIST_HEAD(preempt_fences);
- ktime_t end = 0;
int err = 0;
long wait;
int __maybe_unused tries = 0;
@@ -544,18 +475,19 @@ retry:
goto out_unlock_outer;
}
- drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT, 0);
+ err = xe_validation_ctx_init(&ctx, &vm->xe->val, &exec,
+ (struct xe_val_flags) {.interruptible = true});
+ if (err)
+ goto out_unlock_outer;
drm_exec_until_all_locked(&exec) {
bool done = false;
err = xe_preempt_work_begin(&exec, vm, &done);
drm_exec_retry_on_contention(&exec);
+ xe_validation_retry_on_oom(&ctx, &err);
if (err || done) {
- drm_exec_fini(&exec);
- if (err && xe_vm_validate_should_retry(&exec, err, &end))
- err = -EAGAIN;
-
+ xe_validation_ctx_fini(&ctx);
goto out_unlock_outer;
}
}
@@ -564,7 +496,9 @@ retry:
if (err)
goto out_unlock;
+ xe_vm_set_validation_exec(vm, &exec);
err = xe_vm_rebind(vm, true);
+ xe_vm_set_validation_exec(vm, NULL);
if (err)
goto out_unlock;
@@ -582,9 +516,9 @@ retry:
(!(__tries)++ || __xe_vm_userptr_needs_repin(__vm)) : \
__xe_vm_userptr_needs_repin(__vm))
- down_read(&vm->userptr.notifier_lock);
+ xe_svm_notifier_lock(vm);
if (retry_required(tries, vm)) {
- up_read(&vm->userptr.notifier_lock);
+ xe_svm_notifier_unlock(vm);
err = -EAGAIN;
goto out_unlock;
}
@@ -598,10 +532,10 @@ retry:
/* Point of no return. */
arm_preempt_fences(vm, &preempt_fences);
resume_and_reinstall_preempt_fences(vm, &exec);
- up_read(&vm->userptr.notifier_lock);
+ xe_svm_notifier_unlock(vm);
out_unlock:
- drm_exec_fini(&exec);
+ xe_validation_ctx_fini(&ctx);
out_unlock_outer:
if (err == -EAGAIN) {
trace_xe_vm_rebind_worker_retry(vm);
@@ -619,203 +553,6 @@ out_unlock_outer:
trace_xe_vm_rebind_worker_exit(vm);
}
-static void __vma_userptr_invalidate(struct xe_vm *vm, struct xe_userptr_vma *uvma)
-{
- struct xe_userptr *userptr = &uvma->userptr;
- struct xe_vma *vma = &uvma->vma;
- struct dma_resv_iter cursor;
- struct dma_fence *fence;
- long err;
-
- /*
- * Tell exec and rebind worker they need to repin and rebind this
- * userptr.
- */
- if (!xe_vm_in_fault_mode(vm) &&
- !(vma->gpuva.flags & XE_VMA_DESTROYED)) {
- spin_lock(&vm->userptr.invalidated_lock);
- list_move_tail(&userptr->invalidate_link,
- &vm->userptr.invalidated);
- spin_unlock(&vm->userptr.invalidated_lock);
- }
-
- /*
- * Preempt fences turn into schedule disables, pipeline these.
- * Note that even in fault mode, we need to wait for binds and
- * unbinds to complete, and those are attached as BOOKMARK fences
- * to the vm.
- */
- dma_resv_iter_begin(&cursor, xe_vm_resv(vm),
- DMA_RESV_USAGE_BOOKKEEP);
- dma_resv_for_each_fence_unlocked(&cursor, fence)
- dma_fence_enable_sw_signaling(fence);
- dma_resv_iter_end(&cursor);
-
- err = dma_resv_wait_timeout(xe_vm_resv(vm),
- DMA_RESV_USAGE_BOOKKEEP,
- false, MAX_SCHEDULE_TIMEOUT);
- XE_WARN_ON(err <= 0);
-
- if (xe_vm_in_fault_mode(vm) && userptr->initial_bind) {
- err = xe_vm_invalidate_vma(vma);
- XE_WARN_ON(err);
- }
-
- xe_hmm_userptr_unmap(uvma);
-}
-
-static bool vma_userptr_invalidate(struct mmu_interval_notifier *mni,
- const struct mmu_notifier_range *range,
- unsigned long cur_seq)
-{
- struct xe_userptr_vma *uvma = container_of(mni, typeof(*uvma), userptr.notifier);
- struct xe_vma *vma = &uvma->vma;
- struct xe_vm *vm = xe_vma_vm(vma);
-
- xe_assert(vm->xe, xe_vma_is_userptr(vma));
- trace_xe_vma_userptr_invalidate(vma);
-
- if (!mmu_notifier_range_blockable(range))
- return false;
-
- vm_dbg(&xe_vma_vm(vma)->xe->drm,
- "NOTIFIER: addr=0x%016llx, range=0x%016llx",
- xe_vma_start(vma), xe_vma_size(vma));
-
- down_write(&vm->userptr.notifier_lock);
- mmu_interval_set_seq(mni, cur_seq);
-
- __vma_userptr_invalidate(vm, uvma);
- up_write(&vm->userptr.notifier_lock);
- trace_xe_vma_userptr_invalidate_complete(vma);
-
- return true;
-}
-
-static const struct mmu_interval_notifier_ops vma_userptr_notifier_ops = {
- .invalidate = vma_userptr_invalidate,
-};
-
-#if IS_ENABLED(CONFIG_DRM_XE_USERPTR_INVAL_INJECT)
-/**
- * xe_vma_userptr_force_invalidate() - force invalidate a userptr
- * @uvma: The userptr vma to invalidate
- *
- * Perform a forced userptr invalidation for testing purposes.
- */
-void xe_vma_userptr_force_invalidate(struct xe_userptr_vma *uvma)
-{
- struct xe_vm *vm = xe_vma_vm(&uvma->vma);
-
- /* Protect against concurrent userptr pinning */
- lockdep_assert_held(&vm->lock);
- /* Protect against concurrent notifiers */
- lockdep_assert_held(&vm->userptr.notifier_lock);
- /*
- * Protect against concurrent instances of this function and
- * the critical exec sections
- */
- xe_vm_assert_held(vm);
-
- if (!mmu_interval_read_retry(&uvma->userptr.notifier,
- uvma->userptr.notifier_seq))
- uvma->userptr.notifier_seq -= 2;
- __vma_userptr_invalidate(vm, uvma);
-}
-#endif
-
-int xe_vm_userptr_pin(struct xe_vm *vm)
-{
- struct xe_userptr_vma *uvma, *next;
- int err = 0;
-
- xe_assert(vm->xe, !xe_vm_in_fault_mode(vm));
- lockdep_assert_held_write(&vm->lock);
-
- /* Collect invalidated userptrs */
- spin_lock(&vm->userptr.invalidated_lock);
- xe_assert(vm->xe, list_empty(&vm->userptr.repin_list));
- list_for_each_entry_safe(uvma, next, &vm->userptr.invalidated,
- userptr.invalidate_link) {
- list_del_init(&uvma->userptr.invalidate_link);
- list_add_tail(&uvma->userptr.repin_link,
- &vm->userptr.repin_list);
- }
- spin_unlock(&vm->userptr.invalidated_lock);
-
- /* Pin and move to bind list */
- list_for_each_entry_safe(uvma, next, &vm->userptr.repin_list,
- userptr.repin_link) {
- err = xe_vma_userptr_pin_pages(uvma);
- if (err == -EFAULT) {
- list_del_init(&uvma->userptr.repin_link);
- /*
- * We might have already done the pin once already, but
- * then had to retry before the re-bind happened, due
- * some other condition in the caller, but in the
- * meantime the userptr got dinged by the notifier such
- * that we need to revalidate here, but this time we hit
- * the EFAULT. In such a case make sure we remove
- * ourselves from the rebind list to avoid going down in
- * flames.
- */
- if (!list_empty(&uvma->vma.combined_links.rebind))
- list_del_init(&uvma->vma.combined_links.rebind);
-
- /* Wait for pending binds */
- xe_vm_lock(vm, false);
- dma_resv_wait_timeout(xe_vm_resv(vm),
- DMA_RESV_USAGE_BOOKKEEP,
- false, MAX_SCHEDULE_TIMEOUT);
-
- down_read(&vm->userptr.notifier_lock);
- err = xe_vm_invalidate_vma(&uvma->vma);
- up_read(&vm->userptr.notifier_lock);
- xe_vm_unlock(vm);
- if (err)
- break;
- } else {
- if (err)
- break;
-
- list_del_init(&uvma->userptr.repin_link);
- list_move_tail(&uvma->vma.combined_links.rebind,
- &vm->rebind_list);
- }
- }
-
- if (err) {
- down_write(&vm->userptr.notifier_lock);
- spin_lock(&vm->userptr.invalidated_lock);
- list_for_each_entry_safe(uvma, next, &vm->userptr.repin_list,
- userptr.repin_link) {
- list_del_init(&uvma->userptr.repin_link);
- list_move_tail(&uvma->userptr.invalidate_link,
- &vm->userptr.invalidated);
- }
- spin_unlock(&vm->userptr.invalidated_lock);
- up_write(&vm->userptr.notifier_lock);
- }
- return err;
-}
-
-/**
- * xe_vm_userptr_check_repin() - Check whether the VM might have userptrs
- * that need repinning.
- * @vm: The VM.
- *
- * This function does an advisory check for whether the VM has userptrs that
- * need repinning.
- *
- * Return: 0 if there are no indications of userptrs needing repinning,
- * -EAGAIN if there are.
- */
-int xe_vm_userptr_check_repin(struct xe_vm *vm)
-{
- return (list_empty_careful(&vm->userptr.repin_list) &&
- list_empty_careful(&vm->userptr.invalidated)) ? 0 : -EAGAIN;
-}
-
static int xe_vma_ops_alloc(struct xe_vma_ops *vops, bool array_of_binds)
{
int i;
@@ -1280,25 +1017,17 @@ static struct xe_vma *xe_vma_create(struct xe_vm *vm,
drm_gpuvm_bo_put(vm_bo);
} else /* userptr or null */ {
if (!is_null && !is_cpu_addr_mirror) {
- struct xe_userptr *userptr = &to_userptr_vma(vma)->userptr;
+ struct xe_userptr_vma *uvma = to_userptr_vma(vma);
u64 size = end - start + 1;
int err;
- INIT_LIST_HEAD(&userptr->invalidate_link);
- INIT_LIST_HEAD(&userptr->repin_link);
vma->gpuva.gem.offset = bo_offset_or_userptr;
- mutex_init(&userptr->unmap_mutex);
- err = mmu_interval_notifier_insert(&userptr->notifier,
- current->mm,
- xe_vma_userptr(vma), size,
- &vma_userptr_notifier_ops);
+ err = xe_userptr_setup(uvma, xe_vma_userptr(vma), size);
if (err) {
xe_vma_free(vma);
return ERR_PTR(err);
}
-
- userptr->notifier_seq = LONG_MAX;
}
xe_vm_get(vm);
@@ -1318,18 +1047,8 @@ static void xe_vma_destroy_late(struct xe_vma *vma)
if (xe_vma_is_userptr(vma)) {
struct xe_userptr_vma *uvma = to_userptr_vma(vma);
- struct xe_userptr *userptr = &uvma->userptr;
-
- if (userptr->sg)
- xe_hmm_userptr_free_sg(uvma);
- /*
- * Since userptr pages are not pinned, we can't remove
- * the notifier until we're sure the GPU is not accessing
- * them anymore
- */
- mmu_interval_notifier_remove(&userptr->notifier);
- mutex_destroy(&userptr->unmap_mutex);
+ xe_userptr_remove(uvma);
xe_vm_put(vm);
} else if (xe_vma_is_null(vma) || xe_vma_is_cpu_addr_mirror(vma)) {
xe_vm_put(vm);
@@ -1366,11 +1085,7 @@ static void xe_vma_destroy(struct xe_vma *vma, struct dma_fence *fence)
if (xe_vma_is_userptr(vma)) {
xe_assert(vm->xe, vma->gpuva.flags & XE_VMA_DESTROYED);
-
- spin_lock(&vm->userptr.invalidated_lock);
- xe_assert(vm->xe, list_empty(&to_userptr_vma(vma)->userptr.repin_link));
- list_del(&to_userptr_vma(vma)->userptr.invalidate_link);
- spin_unlock(&vm->userptr.invalidated_lock);
+ xe_userptr_destroy(to_userptr_vma(vma));
} else if (!xe_vma_is_null(vma) && !xe_vma_is_cpu_addr_mirror(vma)) {
xe_bo_assert_held(xe_vma_bo(vma));
@@ -1418,20 +1133,19 @@ int xe_vm_lock_vma(struct drm_exec *exec, struct xe_vma *vma)
static void xe_vma_destroy_unlocked(struct xe_vma *vma)
{
+ struct xe_device *xe = xe_vma_vm(vma)->xe;
+ struct xe_validation_ctx ctx;
struct drm_exec exec;
- int err;
+ int err = 0;
- drm_exec_init(&exec, 0, 0);
- drm_exec_until_all_locked(&exec) {
+ xe_validation_guard(&ctx, &xe->val, &exec, (struct xe_val_flags) {}, err) {
err = xe_vm_lock_vma(&exec, vma);
drm_exec_retry_on_contention(&exec);
if (XE_WARN_ON(err))
break;
+ xe_vma_destroy(vma, NULL);
}
-
- xe_vma_destroy(vma, NULL);
-
- drm_exec_fini(&exec);
+ xe_assert(xe, !err);
}
struct xe_vma *
@@ -1656,6 +1370,7 @@ static void vm_destroy_work_func(struct work_struct *w);
* @xe: xe device.
* @tile: tile to set up for.
* @vm: vm to set up for.
+ * @exec: The struct drm_exec object used to lock the vm resv.
*
* Sets up a pagetable tree with one page-table per level and a single
* leaf PTE. All pagetable entries point to the single page-table or,
@@ -1665,20 +1380,19 @@ static void vm_destroy_work_func(struct work_struct *w);
* Return: 0 on success, negative error code on error.
*/
static int xe_vm_create_scratch(struct xe_device *xe, struct xe_tile *tile,
- struct xe_vm *vm)
+ struct xe_vm *vm, struct drm_exec *exec)
{
u8 id = tile->id;
int i;
for (i = MAX_HUGEPTE_LEVEL; i < vm->pt_root[id]->level; i++) {
- vm->scratch_pt[id][i] = xe_pt_create(vm, tile, i);
+ vm->scratch_pt[id][i] = xe_pt_create(vm, tile, i, exec);
if (IS_ERR(vm->scratch_pt[id][i])) {
int err = PTR_ERR(vm->scratch_pt[id][i]);
vm->scratch_pt[id][i] = NULL;
return err;
}
-
xe_pt_populate_empty(tile, vm, vm->scratch_pt[id][i]);
}
@@ -1706,9 +1420,26 @@ static void xe_vm_free_scratch(struct xe_vm *vm)
}
}
+static void xe_vm_pt_destroy(struct xe_vm *vm)
+{
+ struct xe_tile *tile;
+ u8 id;
+
+ xe_vm_assert_held(vm);
+
+ for_each_tile(tile, vm->xe, id) {
+ if (vm->pt_root[id]) {
+ xe_pt_destroy(vm->pt_root[id], vm->flags, NULL);
+ vm->pt_root[id] = NULL;
+ }
+ }
+}
+
struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags, struct xe_file *xef)
{
struct drm_gem_object *vm_resv_obj;
+ struct xe_validation_ctx ctx;
+ struct drm_exec exec;
struct xe_vm *vm;
int err, number_tiles = 0;
struct xe_tile *tile;
@@ -1752,7 +1483,6 @@ struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags, struct xe_file *xef)
INIT_LIST_HEAD(&vm->userptr.repin_list);
INIT_LIST_HEAD(&vm->userptr.invalidated);
- init_rwsem(&vm->userptr.notifier_lock);
spin_lock_init(&vm->userptr.invalidated_lock);
ttm_lru_bulk_move_init(&vm->lru_bulk_move);
@@ -1779,11 +1509,9 @@ struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags, struct xe_file *xef)
INIT_LIST_HEAD(&vm->preempt.pm_activate_link);
}
- if (flags & XE_VM_FLAG_FAULT_MODE) {
- err = xe_svm_init(vm);
- if (err)
- goto err_no_resv;
- }
+ err = xe_svm_init(vm);
+ if (err)
+ goto err_no_resv;
vm_resv_obj = drm_gpuvm_resv_object_alloc(&xe->drm);
if (!vm_resv_obj) {
@@ -1796,49 +1524,68 @@ struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags, struct xe_file *xef)
drm_gem_object_put(vm_resv_obj);
- err = xe_vm_lock(vm, true);
- if (err)
- goto err_close;
-
- if (IS_DGFX(xe) && xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K)
- vm->flags |= XE_VM_FLAG_64K;
-
- for_each_tile(tile, xe, id) {
- if (flags & XE_VM_FLAG_MIGRATION &&
- tile->id != XE_VM_FLAG_TILE_ID(flags))
- continue;
+ err = 0;
+ xe_validation_guard(&ctx, &xe->val, &exec, (struct xe_val_flags) {.interruptible = true},
+ err) {
+ err = xe_vm_drm_exec_lock(vm, &exec);
+ drm_exec_retry_on_contention(&exec);
- vm->pt_root[id] = xe_pt_create(vm, tile, xe->info.vm_max_level);
- if (IS_ERR(vm->pt_root[id])) {
- err = PTR_ERR(vm->pt_root[id]);
- vm->pt_root[id] = NULL;
- goto err_unlock_close;
- }
- }
+ if (IS_DGFX(xe) && xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K)
+ vm->flags |= XE_VM_FLAG_64K;
- if (xe_vm_has_scratch(vm)) {
for_each_tile(tile, xe, id) {
- if (!vm->pt_root[id])
+ if (flags & XE_VM_FLAG_MIGRATION &&
+ tile->id != XE_VM_FLAG_TILE_ID(flags))
continue;
- err = xe_vm_create_scratch(xe, tile, vm);
+ vm->pt_root[id] = xe_pt_create(vm, tile, xe->info.vm_max_level,
+ &exec);
+ if (IS_ERR(vm->pt_root[id])) {
+ err = PTR_ERR(vm->pt_root[id]);
+ vm->pt_root[id] = NULL;
+ xe_vm_pt_destroy(vm);
+ drm_exec_retry_on_contention(&exec);
+ xe_validation_retry_on_oom(&ctx, &err);
+ break;
+ }
+ }
+ if (err)
+ break;
+
+ if (xe_vm_has_scratch(vm)) {
+ for_each_tile(tile, xe, id) {
+ if (!vm->pt_root[id])
+ continue;
+
+ err = xe_vm_create_scratch(xe, tile, vm, &exec);
+ if (err) {
+ xe_vm_free_scratch(vm);
+ xe_vm_pt_destroy(vm);
+ drm_exec_retry_on_contention(&exec);
+ xe_validation_retry_on_oom(&ctx, &err);
+ break;
+ }
+ }
if (err)
- goto err_unlock_close;
+ break;
+ vm->batch_invalidate_tlb = true;
}
- vm->batch_invalidate_tlb = true;
- }
- if (vm->flags & XE_VM_FLAG_LR_MODE)
- vm->batch_invalidate_tlb = false;
+ if (vm->flags & XE_VM_FLAG_LR_MODE) {
+ INIT_WORK(&vm->preempt.rebind_work, preempt_rebind_work_func);
+ vm->batch_invalidate_tlb = false;
+ }
- /* Fill pt_root after allocating scratch tables */
- for_each_tile(tile, xe, id) {
- if (!vm->pt_root[id])
- continue;
+ /* Fill pt_root after allocating scratch tables */
+ for_each_tile(tile, xe, id) {
+ if (!vm->pt_root[id])
+ continue;
- xe_pt_populate_empty(tile, vm, vm->pt_root[id]);
+ xe_pt_populate_empty(tile, vm, vm->pt_root[id]);
+ }
}
- xe_vm_unlock(vm);
+ if (err)
+ goto err_close;
/* Kernel migration VM shouldn't have a circular loop.. */
if (!(flags & XE_VM_FLAG_MIGRATION)) {
@@ -1871,7 +1618,7 @@ struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags, struct xe_file *xef)
&xe->usm.next_asid, GFP_KERNEL);
up_write(&xe->usm.lock);
if (err < 0)
- goto err_unlock_close;
+ goto err_close;
vm->usm.asid = asid;
}
@@ -1880,8 +1627,6 @@ struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags, struct xe_file *xef)
return vm;
-err_unlock_close:
- xe_vm_unlock(vm);
err_close:
xe_vm_close_and_put(vm);
return ERR_PTR(err);
@@ -1988,9 +1733,9 @@ void xe_vm_close_and_put(struct xe_vm *vm)
vma = gpuva_to_vma(gpuva);
if (xe_vma_has_no_bo(vma)) {
- down_read(&vm->userptr.notifier_lock);
+ xe_svm_notifier_lock(vm);
vma->gpuva.flags |= XE_VMA_DESTROYED;
- up_read(&vm->userptr.notifier_lock);
+ xe_svm_notifier_unlock(vm);
}
xe_vm_remove_vma(vm, vma);
@@ -2014,13 +1759,7 @@ void xe_vm_close_and_put(struct xe_vm *vm)
* destroy the pagetables immediately.
*/
xe_vm_free_scratch(vm);
-
- for_each_tile(tile, xe, id) {
- if (vm->pt_root[id]) {
- xe_pt_destroy(vm->pt_root[id], vm->flags, NULL);
- vm->pt_root[id] = NULL;
- }
- }
+ xe_vm_pt_destroy(vm);
xe_vm_unlock(vm);
/*
@@ -2034,8 +1773,7 @@ void xe_vm_close_and_put(struct xe_vm *vm)
xe_vma_destroy_unlocked(vma);
}
- if (xe_vm_in_fault_mode(vm))
- xe_svm_fini(vm);
+ xe_svm_fini(vm);
up_write(&vm->lock);
@@ -2328,6 +2066,8 @@ int xe_vm_query_vmas_attrs_ioctl(struct drm_device *dev, void *data, struct drm_
err = copy_to_user(attrs_user, mem_attrs,
args->sizeof_mem_range_attr * args->num_mem_ranges);
+ if (err)
+ err = -EFAULT;
free_mem_attrs:
kvfree(mem_attrs);
@@ -2376,9 +2116,9 @@ static const u32 region_to_mem_type[] = {
static void prep_vma_destroy(struct xe_vm *vm, struct xe_vma *vma,
bool post_commit)
{
- down_read(&vm->userptr.notifier_lock);
+ xe_svm_notifier_lock(vm);
vma->gpuva.flags |= XE_VMA_DESTROYED;
- up_read(&vm->userptr.notifier_lock);
+ xe_svm_notifier_unlock(vm);
if (post_commit)
xe_vm_remove_vma(vm, vma);
}
@@ -2639,6 +2379,7 @@ static struct xe_vma *new_vma(struct xe_vm *vm, struct drm_gpuva_op_map *op,
struct xe_vma_mem_attr *attr, unsigned int flags)
{
struct xe_bo *bo = op->gem.obj ? gem_to_xe_bo(op->gem.obj) : NULL;
+ struct xe_validation_ctx ctx;
struct drm_exec exec;
struct xe_vma *vma;
int err = 0;
@@ -2646,9 +2387,9 @@ static struct xe_vma *new_vma(struct xe_vm *vm, struct drm_gpuva_op_map *op,
lockdep_assert_held_write(&vm->lock);
if (bo) {
- drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT, 0);
- drm_exec_until_all_locked(&exec) {
- err = 0;
+ err = 0;
+ xe_validation_guard(&ctx, &vm->xe->val, &exec,
+ (struct xe_val_flags) {.interruptible = true}, err) {
if (!bo->vm) {
err = drm_exec_lock_obj(&exec, xe_vm_obj(vm));
drm_exec_retry_on_contention(&exec);
@@ -2657,27 +2398,35 @@ static struct xe_vma *new_vma(struct xe_vm *vm, struct drm_gpuva_op_map *op,
err = drm_exec_lock_obj(&exec, &bo->ttm.base);
drm_exec_retry_on_contention(&exec);
}
- if (err) {
- drm_exec_fini(&exec);
+ if (err)
return ERR_PTR(err);
- }
- }
- }
- vma = xe_vma_create(vm, bo, op->gem.offset,
- op->va.addr, op->va.addr +
- op->va.range - 1, attr, flags);
- if (IS_ERR(vma))
- goto err_unlock;
- if (xe_vma_is_userptr(vma))
- err = xe_vma_userptr_pin_pages(to_userptr_vma(vma));
- else if (!xe_vma_has_no_bo(vma) && !bo->vm)
- err = add_preempt_fences(vm, bo);
+ vma = xe_vma_create(vm, bo, op->gem.offset,
+ op->va.addr, op->va.addr +
+ op->va.range - 1, attr, flags);
+ if (IS_ERR(vma))
+ return vma;
-err_unlock:
- if (bo)
- drm_exec_fini(&exec);
+ if (!bo->vm) {
+ err = add_preempt_fences(vm, bo);
+ if (err) {
+ prep_vma_destroy(vm, vma, false);
+ xe_vma_destroy(vma, NULL);
+ }
+ }
+ }
+ if (err)
+ return ERR_PTR(err);
+ } else {
+ vma = xe_vma_create(vm, NULL, op->gem.offset,
+ op->va.addr, op->va.addr +
+ op->va.range - 1, attr, flags);
+ if (IS_ERR(vma))
+ return vma;
+ if (xe_vma_is_userptr(vma))
+ err = xe_vma_userptr_pin_pages(to_userptr_vma(vma));
+ }
if (err) {
prep_vma_destroy(vm, vma, false);
xe_vma_destroy_unlocked(vma);
@@ -3021,9 +2770,9 @@ static void xe_vma_op_unwind(struct xe_vm *vm, struct xe_vma_op *op,
struct xe_vma *vma = gpuva_to_vma(op->base.unmap.va);
if (vma) {
- down_read(&vm->userptr.notifier_lock);
+ xe_svm_notifier_lock(vm);
vma->gpuva.flags &= ~XE_VMA_DESTROYED;
- up_read(&vm->userptr.notifier_lock);
+ xe_svm_notifier_unlock(vm);
if (post_commit)
xe_vm_insert_vma(vm, vma);
}
@@ -3042,9 +2791,9 @@ static void xe_vma_op_unwind(struct xe_vm *vm, struct xe_vma_op *op,
xe_vma_destroy_unlocked(op->remap.next);
}
if (vma) {
- down_read(&vm->userptr.notifier_lock);
+ xe_svm_notifier_lock(vm);
vma->gpuva.flags &= ~XE_VMA_DESTROYED;
- up_read(&vm->userptr.notifier_lock);
+ xe_svm_notifier_unlock(vm);
if (post_commit)
xe_vm_insert_vma(vm, vma);
}
@@ -3094,7 +2843,7 @@ static int vma_lock_and_validate(struct drm_exec *exec, struct xe_vma *vma,
err = drm_exec_lock_obj(exec, &bo->ttm.base);
if (!err && validate)
err = xe_bo_validate(bo, vm,
- !xe_vm_in_preempt_fence_mode(vm));
+ !xe_vm_in_preempt_fence_mode(vm), exec);
}
return err;
@@ -3132,6 +2881,7 @@ static int prefetch_ranges(struct xe_vm *vm, struct xe_vma_op *op)
ctx.read_only = xe_vma_read_only(vma);
ctx.devmem_possible = devmem_possible;
ctx.check_pages_threshold = devmem_possible ? SZ_64K : 0;
+ ctx.device_private_page_owner = xe_svm_devm_owner(vm->xe);
/* TODO: Threading the migration */
xa_for_each(&op->prefetch_range.range, i, svm_range) {
@@ -3212,7 +2962,9 @@ static int op_lock_and_prep(struct drm_exec *exec, struct xe_vm *vm,
false);
if (!err && !xe_vma_has_no_bo(vma))
err = xe_bo_migrate(xe_vma_bo(vma),
- region_to_mem_type[region]);
+ region_to_mem_type[region],
+ NULL,
+ exec);
break;
}
default:
@@ -3475,35 +3227,37 @@ static void vm_bind_ioctl_ops_fini(struct xe_vm *vm, struct xe_vma_ops *vops,
static struct dma_fence *vm_bind_ioctl_ops_execute(struct xe_vm *vm,
struct xe_vma_ops *vops)
{
+ struct xe_validation_ctx ctx;
struct drm_exec exec;
struct dma_fence *fence;
- int err;
+ int err = 0;
lockdep_assert_held_write(&vm->lock);
- drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT |
- DRM_EXEC_IGNORE_DUPLICATES, 0);
- drm_exec_until_all_locked(&exec) {
+ xe_validation_guard(&ctx, &vm->xe->val, &exec,
+ ((struct xe_val_flags) {
+ .interruptible = true,
+ .exec_ignore_duplicates = true,
+ }), err) {
err = vm_bind_ioctl_ops_lock_and_prep(&exec, vm, vops);
drm_exec_retry_on_contention(&exec);
- if (err) {
- fence = ERR_PTR(err);
- goto unlock;
- }
+ xe_validation_retry_on_oom(&ctx, &err);
+ if (err)
+ return ERR_PTR(err);
+ xe_vm_set_validation_exec(vm, &exec);
fence = ops_execute(vm, vops);
+ xe_vm_set_validation_exec(vm, NULL);
if (IS_ERR(fence)) {
if (PTR_ERR(fence) == -ENODATA)
vm_bind_ioctl_ops_fini(vm, vops, NULL);
- goto unlock;
+ return fence;
}
vm_bind_ioctl_ops_fini(vm, vops, fence);
}
-unlock:
- drm_exec_fini(&exec);
- return fence;
+ return err ? ERR_PTR(err) : fence;
}
ALLOW_ERROR_INJECTION(vm_bind_ioctl_ops_execute, ERRNO);
@@ -3619,6 +3373,8 @@ static int vm_bind_ioctl_check_args(struct xe_device *xe, struct xe_vm *vm,
op == DRM_XE_VM_BIND_OP_MAP_USERPTR) ||
XE_IOCTL_DBG(xe, coh_mode == XE_COH_NONE &&
op == DRM_XE_VM_BIND_OP_MAP_USERPTR) ||
+ XE_IOCTL_DBG(xe, op == DRM_XE_VM_BIND_OP_MAP_USERPTR &&
+ !IS_ENABLED(CONFIG_DRM_GPUSVM)) ||
XE_IOCTL_DBG(xe, obj &&
op == DRM_XE_VM_BIND_OP_PREFETCH) ||
XE_IOCTL_DBG(xe, prefetch_region &&
@@ -4054,10 +3810,14 @@ release_vm_lock:
*/
int xe_vm_lock(struct xe_vm *vm, bool intr)
{
+ int ret;
+
if (intr)
- return dma_resv_lock_interruptible(xe_vm_resv(vm), NULL);
+ ret = dma_resv_lock_interruptible(xe_vm_resv(vm), NULL);
+ else
+ ret = dma_resv_lock(xe_vm_resv(vm), NULL);
- return dma_resv_lock(xe_vm_resv(vm), NULL);
+ return ret;
}
/**
@@ -4164,13 +3924,13 @@ int xe_vm_invalidate_vma(struct xe_vma *vma)
*/
if (IS_ENABLED(CONFIG_PROVE_LOCKING)) {
if (xe_vma_is_userptr(vma)) {
- lockdep_assert(lockdep_is_held_type(&vm->userptr.notifier_lock, 0) ||
- (lockdep_is_held_type(&vm->userptr.notifier_lock, 1) &&
+ lockdep_assert(lockdep_is_held_type(&vm->svm.gpusvm.notifier_lock, 0) ||
+ (lockdep_is_held_type(&vm->svm.gpusvm.notifier_lock, 1) &&
lockdep_is_held(&xe_vm_resv(vm)->lock.base)));
WARN_ON_ONCE(!mmu_interval_check_retry
(&to_userptr_vma(vma)->userptr.notifier,
- to_userptr_vma(vma)->userptr.notifier_seq));
+ to_userptr_vma(vma)->userptr.pages.notifier_seq));
WARN_ON_ONCE(!dma_resv_test_signaled(xe_vm_resv(vm),
DMA_RESV_USAGE_BOOKKEEP));
diff --git a/drivers/gpu/drm/xe/xe_vm.h b/drivers/gpu/drm/xe/xe_vm.h
index d631c4b25c51..ef8a5019574e 100644
--- a/drivers/gpu/drm/xe/xe_vm.h
+++ b/drivers/gpu/drm/xe/xe_vm.h
@@ -220,12 +220,6 @@ static inline bool xe_vm_in_preempt_fence_mode(struct xe_vm *vm)
int xe_vm_add_compute_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q);
void xe_vm_remove_compute_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q);
-int xe_vm_userptr_pin(struct xe_vm *vm);
-
-int __xe_vm_userptr_needs_repin(struct xe_vm *vm);
-
-int xe_vm_userptr_check_repin(struct xe_vm *vm);
-
int xe_vm_rebind(struct xe_vm *vm, bool rebind_worker);
struct dma_fence *xe_vma_rebind(struct xe_vm *vm, struct xe_vma *vma,
u8 tile_mask);
@@ -266,12 +260,6 @@ static inline void xe_vm_reactivate_rebind(struct xe_vm *vm)
}
}
-int xe_vma_userptr_pin_pages(struct xe_userptr_vma *uvma);
-
-int xe_vma_userptr_check_repin(struct xe_userptr_vma *uvma);
-
-bool xe_vm_validate_should_retry(struct drm_exec *exec, int err, ktime_t *end);
-
int xe_vm_lock_vma(struct drm_exec *exec, struct xe_vma *vma);
int xe_vm_validate_rebind(struct xe_vm *vm, struct drm_exec *exec,
@@ -302,6 +290,8 @@ void xe_vm_kill(struct xe_vm *vm, bool unlocked);
*/
#define xe_vm_assert_held(vm) dma_resv_assert_held(xe_vm_resv(vm))
+int xe_vm_drm_exec_lock(struct xe_vm *vm, struct drm_exec *exec);
+
#if IS_ENABLED(CONFIG_DRM_XE_DEBUG_VM)
#define vm_dbg drm_dbg
#else
@@ -331,7 +321,7 @@ static inline void xe_vm_set_validating(struct xe_vm *vm, bool allow_res_evict)
if (vm && !allow_res_evict) {
xe_vm_assert_held(vm);
/* Pairs with READ_ONCE in xe_vm_is_validating() */
- WRITE_ONCE(vm->validating, current);
+ WRITE_ONCE(vm->validation.validating, current);
}
}
@@ -349,7 +339,7 @@ static inline void xe_vm_clear_validating(struct xe_vm *vm, bool allow_res_evict
{
if (vm && !allow_res_evict) {
/* Pairs with READ_ONCE in xe_vm_is_validating() */
- WRITE_ONCE(vm->validating, NULL);
+ WRITE_ONCE(vm->validation.validating, NULL);
}
}
@@ -367,7 +357,7 @@ static inline void xe_vm_clear_validating(struct xe_vm *vm, bool allow_res_evict
static inline bool xe_vm_is_validating(struct xe_vm *vm)
{
/* Pairs with WRITE_ONCE in xe_vm_is_validating() */
- if (READ_ONCE(vm->validating) == current) {
+ if (READ_ONCE(vm->validation.validating) == current) {
xe_vm_assert_held(vm);
return true;
}
@@ -375,6 +365,34 @@ static inline bool xe_vm_is_validating(struct xe_vm *vm)
}
/**
+ * xe_vm_set_validation_exec() - Accessor to set the drm_exec object
+ * @vm: The vm we want to register a drm_exec object with.
+ * @exec: The exec object we want to register.
+ *
+ * Set the drm_exec object used to lock the vm's resv.
+ */
+static inline void xe_vm_set_validation_exec(struct xe_vm *vm, struct drm_exec *exec)
+{
+ xe_vm_assert_held(vm);
+ xe_assert(vm->xe, !!exec ^ !!vm->validation._exec);
+ vm->validation._exec = exec;
+}
+
+/**
+ * xe_vm_set_validation_exec() - Accessor to read the drm_exec object
+ * @vm: The vm we want to register a drm_exec object with.
+ *
+ * Return: The drm_exec object used to lock the vm's resv. The value
+ * is a valid pointer, %NULL, or one of the special values defined in
+ * xe_validation.h.
+ */
+static inline struct drm_exec *xe_vm_validation_exec(struct xe_vm *vm)
+{
+ xe_vm_assert_held(vm);
+ return vm->validation._exec;
+}
+
+/**
* xe_vm_has_valid_gpu_mapping() - Advisory helper to check if VMA or SVM range has
* a valid GPU mapping
* @tile: The tile which the GPU mapping belongs to
@@ -393,11 +411,4 @@ static inline bool xe_vm_is_validating(struct xe_vm *vm)
#define xe_vm_has_valid_gpu_mapping(tile, tile_present, tile_invalidated) \
((READ_ONCE(tile_present) & ~READ_ONCE(tile_invalidated)) & BIT((tile)->id))
-#if IS_ENABLED(CONFIG_DRM_XE_USERPTR_INVAL_INJECT)
-void xe_vma_userptr_force_invalidate(struct xe_userptr_vma *uvma);
-#else
-static inline void xe_vma_userptr_force_invalidate(struct xe_userptr_vma *uvma)
-{
-}
-#endif
#endif
diff --git a/drivers/gpu/drm/xe/xe_vm_madvise.c b/drivers/gpu/drm/xe/xe_vm_madvise.c
index 09c5783ee523..cad3cf627c3f 100644
--- a/drivers/gpu/drm/xe/xe_vm_madvise.c
+++ b/drivers/gpu/drm/xe/xe_vm_madvise.c
@@ -18,9 +18,8 @@ struct xe_vmas_in_madvise_range {
u64 range;
struct xe_vma **vmas;
int num_vmas;
- bool has_svm_vmas;
bool has_bo_vmas;
- bool has_userptr_vmas;
+ bool has_svm_userptr_vmas;
};
static int get_vmas(struct xe_vm *vm, struct xe_vmas_in_madvise_range *madvise_range)
@@ -46,10 +45,8 @@ static int get_vmas(struct xe_vm *vm, struct xe_vmas_in_madvise_range *madvise_r
if (xe_vma_bo(vma))
madvise_range->has_bo_vmas = true;
- else if (xe_vma_is_cpu_addr_mirror(vma))
- madvise_range->has_svm_vmas = true;
- else if (xe_vma_is_userptr(vma))
- madvise_range->has_userptr_vmas = true;
+ else if (xe_vma_is_cpu_addr_mirror(vma) || xe_vma_is_userptr(vma))
+ madvise_range->has_svm_userptr_vmas = true;
if (madvise_range->num_vmas == max_vmas) {
max_vmas <<= 1;
@@ -127,8 +124,6 @@ static void madvise_atomic(struct xe_device *xe, struct xe_vm *vm,
vmas[i]->attr.atomic_access = op->atomic.val;
}
- vmas[i]->attr.atomic_access = op->atomic.val;
-
bo = xe_vma_bo(vmas[i]);
if (!bo || bo->attr.atomic_access == op->atomic.val)
continue;
@@ -201,12 +196,12 @@ static u8 xe_zap_ptes_in_madvise_range(struct xe_vm *vm, u64 start, u64 end)
if (xe_pt_zap_ptes(tile, vma)) {
tile_mask |= BIT(id);
- /*
- * WRITE_ONCE pairs with READ_ONCE
- * in xe_vm_has_valid_gpu_mapping()
- */
- WRITE_ONCE(vma->tile_invalidated,
- vma->tile_invalidated | BIT(id));
+ /*
+ * WRITE_ONCE pairs with READ_ONCE
+ * in xe_vm_has_valid_gpu_mapping()
+ */
+ WRITE_ONCE(vma->tile_invalidated,
+ vma->tile_invalidated | BIT(id));
}
}
}
@@ -256,7 +251,7 @@ static bool madvise_args_are_sane(struct xe_device *xe, const struct drm_xe_madv
if (XE_IOCTL_DBG(xe, args->preferred_mem_loc.pad))
return false;
- if (XE_IOCTL_DBG(xe, args->atomic.reserved))
+ if (XE_IOCTL_DBG(xe, args->preferred_mem_loc.reserved))
return false;
break;
}
@@ -409,29 +404,20 @@ int xe_vm_madvise_ioctl(struct drm_device *dev, void *data, struct drm_file *fil
}
}
- if (madvise_range.has_userptr_vmas) {
- err = down_read_interruptible(&vm->userptr.notifier_lock);
+ if (madvise_range.has_svm_userptr_vmas) {
+ err = xe_svm_notifier_lock_interruptible(vm);
if (err)
goto err_fini;
}
- if (madvise_range.has_svm_vmas) {
- err = down_read_interruptible(&vm->svm.gpusvm.notifier_lock);
- if (err)
- goto unlock_userptr;
- }
-
attr_type = array_index_nospec(args->type, ARRAY_SIZE(madvise_funcs));
madvise_funcs[attr_type](xe, vm, madvise_range.vmas, madvise_range.num_vmas, args);
err = xe_vm_invalidate_madvise_range(vm, args->start, args->start + args->range);
- if (madvise_range.has_svm_vmas)
+ if (madvise_range.has_svm_userptr_vmas)
xe_svm_notifier_unlock(vm);
-unlock_userptr:
- if (madvise_range.has_userptr_vmas)
- up_read(&vm->userptr.notifier_lock);
err_fini:
if (madvise_range.has_bo_vmas)
drm_exec_fini(&exec);
diff --git a/drivers/gpu/drm/xe/xe_vm_types.h b/drivers/gpu/drm/xe/xe_vm_types.h
index e1a786db5f89..da39940501d8 100644
--- a/drivers/gpu/drm/xe/xe_vm_types.h
+++ b/drivers/gpu/drm/xe/xe_vm_types.h
@@ -17,6 +17,7 @@
#include "xe_device_types.h"
#include "xe_pt_types.h"
#include "xe_range_fence.h"
+#include "xe_userptr.h"
struct xe_bo;
struct xe_svm_range;
@@ -46,37 +47,6 @@ struct xe_vm_pgtable_update_op;
#define XE_VMA_DUMPABLE (DRM_GPUVA_USERBITS << 8)
#define XE_VMA_SYSTEM_ALLOCATOR (DRM_GPUVA_USERBITS << 9)
-/** struct xe_userptr - User pointer */
-struct xe_userptr {
- /** @invalidate_link: Link for the vm::userptr.invalidated list */
- struct list_head invalidate_link;
- /** @userptr: link into VM repin list if userptr. */
- struct list_head repin_link;
- /**
- * @notifier: MMU notifier for user pointer (invalidation call back)
- */
- struct mmu_interval_notifier notifier;
- /** @sgt: storage for a scatter gather table */
- struct sg_table sgt;
- /** @sg: allocated scatter gather table */
- struct sg_table *sg;
- /** @notifier_seq: notifier sequence number */
- unsigned long notifier_seq;
- /** @unmap_mutex: Mutex protecting dma-unmapping */
- struct mutex unmap_mutex;
- /**
- * @initial_bind: user pointer has been bound at least once.
- * write: vm->userptr.notifier_lock in read mode and vm->resv held.
- * read: vm->userptr.notifier_lock in write mode or vm->resv held.
- */
- bool initial_bind;
- /** @mapped: Whether the @sgt sg-table is dma-mapped. Protected by @unmap_mutex. */
- bool mapped;
-#if IS_ENABLED(CONFIG_DRM_XE_USERPTR_INVAL_INJECT)
- u32 divisor;
-#endif
-};
-
/**
* struct xe_vma_mem_attr - memory attributes associated with vma
*/
@@ -140,10 +110,10 @@ struct xe_vma {
/**
* @tile_invalidated: Tile mask of binding are invalidated for this VMA.
- * protected by BO's resv and for userptrs, vm->userptr.notifier_lock in
- * write mode for writing or vm->userptr.notifier_lock in read mode and
+ * protected by BO's resv and for userptrs, vm->svm.gpusvm.notifier_lock in
+ * write mode for writing or vm->svm.gpusvm.notifier_lock in read mode and
* the vm->resv. For stable reading, BO's resv or userptr
- * vm->userptr.notifier_lock in read mode is required. Can be
+ * vm->svm.gpusvm.notifier_lock in read mode is required. Can be
* opportunistically read with READ_ONCE outside of locks.
*/
u8 tile_invalidated;
@@ -154,7 +124,7 @@ struct xe_vma {
/**
* @tile_present: Tile mask of binding are present for this VMA.
* protected by vm->lock, vm->resv and for userptrs,
- * vm->userptr.notifier_lock for writing. Needs either for reading,
+ * vm->svm.gpusvm.notifier_lock for writing. Needs either for reading,
* but if reading is done under the vm->lock only, it needs to be held
* in write mode.
*/
@@ -289,33 +259,7 @@ struct xe_vm {
const struct xe_pt_ops *pt_ops;
/** @userptr: user pointer state */
- struct {
- /**
- * @userptr.repin_list: list of VMAs which are user pointers,
- * and needs repinning. Protected by @lock.
- */
- struct list_head repin_list;
- /**
- * @notifier_lock: protects notifier in write mode and
- * submission in read mode.
- */
- struct rw_semaphore notifier_lock;
- /**
- * @userptr.invalidated_lock: Protects the
- * @userptr.invalidated list.
- */
- spinlock_t invalidated_lock;
- /**
- * @userptr.invalidated: List of invalidated userptrs, not yet
- * picked
- * up for revalidation. Protected from access with the
- * @invalidated_lock. Removing items from the list
- * additionally requires @lock in write mode, and adding
- * items to the list requires either the @userptr.notifier_lock in
- * write mode, OR @lock in write mode.
- */
- struct list_head invalidated;
- } userptr;
+ struct xe_userptr_vm userptr;
/** @preempt: preempt state */
struct {
@@ -363,18 +307,34 @@ struct xe_vm {
} error_capture;
/**
+ * @validation: Validation data only valid with the vm resv held.
+ * Note: This is really task state of the task holding the vm resv,
+ * and moving forward we should
+ * come up with a better way of passing this down the call-
+ * chain.
+ */
+ struct {
+ /**
+ * @validation.validating: The task that is currently making bos resident.
+ * for this vm.
+ * Protected by the VM's resv for writing. Opportunistic reading can be done
+ * using READ_ONCE. Note: This is a workaround for the
+ * TTM eviction_valuable() callback not being passed a struct
+ * ttm_operation_context(). Future work might want to address this.
+ */
+ struct task_struct *validating;
+ /**
+ * @validation.exec The drm_exec context used when locking the vm resv.
+ * Protected by the vm's resv.
+ */
+ struct drm_exec *_exec;
+ } validation;
+
+ /**
* @tlb_flush_seqno: Required TLB flush seqno for the next exec.
* protected by the vm resv.
*/
u64 tlb_flush_seqno;
- /**
- * @validating: The task that is currently making bos resident for this vm.
- * Protected by the VM's resv for writing. Opportunistic reading can be done
- * using READ_ONCE. Note: This is a workaround for the
- * TTM eviction_valuable() callback not being passed a struct
- * ttm_operation_context(). Future work might want to address this.
- */
- struct task_struct *validating;
/** @batch_invalidate_tlb: Always invalidate TLB before batch start */
bool batch_invalidate_tlb;
/** @xef: XE file handle for tracking this VM's drm client */
diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c
index 52c7df4c3afd..cd03891654a1 100644
--- a/drivers/gpu/drm/xe/xe_wa.c
+++ b/drivers/gpu/drm/xe/xe_wa.c
@@ -39,7 +39,8 @@
* Register Immediate commands) once when initializing the device and saved in
* the default context. That default context is then used on every context
* creation to have a "primed golden context", i.e. a context image that
- * already contains the changes needed to all the registers.
+ * already contains the changes needed to all the registers. See
+ * drivers/gpu/drm/xe/xe_lrc.c for default context handling.
*
* - Engine workarounds: the list of these WAs is applied whenever the specific
* engine is reset. It's also possible that a set of engine classes share a
@@ -48,10 +49,10 @@
* them need to keeep the workaround programming: the approach taken in the
* driver is to tie those workarounds to the first compute/render engine that
* is registered. When executing with GuC submission, engine resets are
- * outside of kernel driver control, hence the list of registers involved in
+ * outside of kernel driver control, hence the list of registers involved is
* written once, on engine initialization, and then passed to GuC, that
* saves/restores their values before/after the reset takes place. See
- * ``drivers/gpu/drm/xe/xe_guc_ads.c`` for reference.
+ * drivers/gpu/drm/xe/xe_guc_ads.c for reference.
*
* - GT workarounds: the list of these WAs is applied whenever these registers
* revert to their default values: on GPU reset, suspend/resume [1]_, etc.
@@ -66,21 +67,39 @@
* hardware on every HW context restore. These buffers are created and
* programmed in the default context so the hardware always go through those
* programming sequences when switching contexts. The support for workaround
- * batchbuffers is enabled these hardware mechanisms:
+ * batchbuffers is enabled via these hardware mechanisms:
*
- * #. INDIRECT_CTX: A batchbuffer and an offset are provided in the default
- * context, pointing the hardware to jump to that location when that offset
- * is reached in the context restore. Workaround batchbuffer in the driver
- * currently uses this mechanism for all platforms.
+ * #. INDIRECT_CTX (also known as **mid context restore bb**): A batchbuffer
+ * and an offset are provided in the default context, pointing the hardware
+ * to jump to that location when that offset is reached in the context
+ * restore. When a context is being restored, this is executed after the
+ * ring context, in the middle (or beginning) of the engine context image.
*
- * #. BB_PER_CTX_PTR: A batchbuffer is provided in the default context,
- * pointing the hardware to a buffer to continue executing after the
- * engine registers are restored in a context restore sequence. This is
- * currently not used in the driver.
+ * #. BB_PER_CTX_PTR (also known as **post context restore bb**): A
+ * batchbuffer is provided in the default context, pointing the hardware to
+ * a buffer to continue executing after the engine registers are restored
+ * in a context restore sequence.
+ *
+ * Below is the timeline for a context restore sequence:
+ *
+ * .. code::
+ *
+ * INDIRECT_CTX_OFFSET
+ * |----------->|
+ * .------------.------------.-------------.------------.--------------.-----------.
+ * |Ring | Engine | Mid-context | Engine | Post-context | Ring |
+ * |Restore | Restore (1)| BB Restore | Restore (2)| BB Restore | Execution |
+ * `------------'------------'-------------'------------'--------------'-----------'
*
* - Other/OOB: There are WAs that, due to their nature, cannot be applied from
* a central place. Those are peppered around the rest of the code, as needed.
- * Workarounds related to the display IP are the main example.
+ * There's a central place to control which workarounds are enabled:
+ * drivers/gpu/drm/xe/xe_wa_oob.rules for GT workarounds and
+ * drivers/gpu/drm/xe/xe_device_wa_oob.rules for device/SoC workarounds.
+ * These files only record which workarounds are enabled: during early device
+ * initialization those rules are evaluated and recorded by the driver. Then
+ * later the driver checks with ``XE_GT_WA()`` and ``XE_DEVICE_WA()`` to
+ * implement them.
*
* .. [1] Technically, some registers are powercontext saved & restored, so they
* survive a suspend/resume. In practice, writing them again is not too
@@ -612,6 +631,13 @@ static const struct xe_rtp_entry_sr engine_was[] = {
FUNC(xe_rtp_match_first_render_or_compute)),
XE_RTP_ACTIONS(SET(TDL_TSL_CHICKEN, RES_CHK_SPR_DIS))
},
+ { XE_RTP_NAME("18041344222"),
+ XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, 2002),
+ FUNC(xe_rtp_match_first_render_or_compute),
+ FUNC(xe_rtp_match_not_sriov_vf),
+ FUNC(xe_rtp_match_gt_has_discontiguous_dss_groups)),
+ XE_RTP_ACTIONS(SET(TDL_CHICKEN, EUSTALL_PERF_SAMPLING_DISABLE))
+ },
/* Xe2_LPM */
@@ -672,6 +698,13 @@ static const struct xe_rtp_entry_sr engine_was[] = {
XE_RTP_RULES(GRAPHICS_VERSION(3003), FUNC(xe_rtp_match_first_render_or_compute)),
XE_RTP_ACTIONS(SET(HALF_SLICE_CHICKEN7, CLEAR_OPTIMIZATION_DISABLE))
},
+ { XE_RTP_NAME("18041344222"),
+ XE_RTP_RULES(GRAPHICS_VERSION_RANGE(3000, 3001),
+ FUNC(xe_rtp_match_first_render_or_compute),
+ FUNC(xe_rtp_match_not_sriov_vf),
+ FUNC(xe_rtp_match_gt_has_discontiguous_dss_groups)),
+ XE_RTP_ACTIONS(SET(TDL_CHICKEN, EUSTALL_PERF_SAMPLING_DISABLE))
+ },
};
static const struct xe_rtp_entry_sr lrc_was[] = {
@@ -879,6 +912,10 @@ static const struct xe_rtp_entry_sr lrc_was[] = {
DIS_PARTIAL_AUTOSTRIP |
DIS_AUTOSTRIP))
},
+ { XE_RTP_NAME("22021007897"),
+ XE_RTP_RULES(GRAPHICS_VERSION_RANGE(3000, 3003), ENGINE_CLASS(RENDER)),
+ XE_RTP_ACTIONS(SET(COMMON_SLICE_CHICKEN4, SBE_PUSH_CONSTANT_BEHIND_FIX_ENABLE))
+ },
};
static __maybe_unused const struct xe_rtp_entry oob_was[] = {
diff --git a/drivers/gpu/drm/xe/xe_wa_oob.rules b/drivers/gpu/drm/xe/xe_wa_oob.rules
index 338c344dcd7d..f3a6d5d239ce 100644
--- a/drivers/gpu/drm/xe/xe_wa_oob.rules
+++ b/drivers/gpu/drm/xe/xe_wa_oob.rules
@@ -49,7 +49,6 @@
16023588340 GRAPHICS_VERSION(2001), FUNC(xe_rtp_match_not_sriov_vf)
14019789679 GRAPHICS_VERSION(1255)
GRAPHICS_VERSION_RANGE(1270, 2004)
-no_media_l3 MEDIA_VERSION_RANGE(3000, 3002)
14022866841 GRAPHICS_VERSION(3000), GRAPHICS_STEP(A0, B0)
MEDIA_VERSION(3000), MEDIA_STEP(A0, B0)
16021333562 GRAPHICS_VERSION_RANGE(1200, 1274)
diff --git a/drivers/gpu/nova-core/driver.rs b/drivers/gpu/nova-core/driver.rs
index 274989ea1fb4..edc72052e27a 100644
--- a/drivers/gpu/nova-core/driver.rs
+++ b/drivers/gpu/nova-core/driver.rs
@@ -1,6 +1,14 @@
// SPDX-License-Identifier: GPL-2.0
-use kernel::{auxiliary, bindings, c_str, device::Core, pci, prelude::*, sizes::SZ_16M, sync::Arc};
+use kernel::{
+ auxiliary, c_str,
+ device::Core,
+ pci,
+ pci::{Class, ClassMask, Vendor},
+ prelude::*,
+ sizes::SZ_16M,
+ sync::Arc,
+};
use crate::gpu::Gpu;
@@ -18,10 +26,25 @@ kernel::pci_device_table!(
PCI_TABLE,
MODULE_PCI_TABLE,
<NovaCore as pci::Driver>::IdInfo,
- [(
- pci::DeviceId::from_id(bindings::PCI_VENDOR_ID_NVIDIA, bindings::PCI_ANY_ID as u32),
- ()
- )]
+ [
+ // Modern NVIDIA GPUs will show up as either VGA or 3D controllers.
+ (
+ pci::DeviceId::from_class_and_vendor(
+ Class::DISPLAY_VGA,
+ ClassMask::ClassSubclass,
+ Vendor::NVIDIA
+ ),
+ ()
+ ),
+ (
+ pci::DeviceId::from_class_and_vendor(
+ Class::DISPLAY_3D,
+ ClassMask::ClassSubclass,
+ Vendor::NVIDIA
+ ),
+ ()
+ ),
+ ]
);
impl pci::Driver for NovaCore {
@@ -34,14 +57,19 @@ impl pci::Driver for NovaCore {
pdev.enable_device_mem()?;
pdev.set_master();
- let bar = Arc::pin_init(
+ let devres_bar = Arc::pin_init(
pdev.iomap_region_sized::<BAR0_SIZE>(0, c_str!("nova-core/bar0")),
GFP_KERNEL,
)?;
+ // Used to provided a `&Bar0` to `Gpu::new` without tying it to the lifetime of
+ // `devres_bar`.
+ let bar_clone = Arc::clone(&devres_bar);
+ let bar = bar_clone.access(pdev.as_ref())?;
+
let this = KBox::pin_init(
try_pin_init!(Self {
- gpu <- Gpu::new(pdev, bar)?,
+ gpu <- Gpu::new(pdev, devres_bar, bar),
_reg: auxiliary::Registration::new(
pdev.as_ref(),
c_str!("nova-drm"),
@@ -54,4 +82,8 @@ impl pci::Driver for NovaCore {
Ok(this)
}
+
+ fn unbind(pdev: &pci::Device<Core>, this: Pin<&Self>) {
+ this.gpu.unbind(pdev.as_ref());
+ }
}
diff --git a/drivers/gpu/nova-core/falcon.rs b/drivers/gpu/nova-core/falcon.rs
index 50437c67c14a..37e6298195e4 100644
--- a/drivers/gpu/nova-core/falcon.rs
+++ b/drivers/gpu/nova-core/falcon.rs
@@ -4,16 +4,17 @@
use core::ops::Deref;
use hal::FalconHal;
-use kernel::bindings;
use kernel::device;
+use kernel::dma::DmaAddress;
use kernel::prelude::*;
+use kernel::sync::aref::ARef;
use kernel::time::Delta;
-use kernel::types::ARef;
use crate::dma::DmaObject;
use crate::driver::Bar0;
use crate::gpu::Chipset;
use crate::regs;
+use crate::regs::macros::RegisterBase;
use crate::util;
pub(crate) mod gsp;
@@ -274,14 +275,25 @@ impl From<bool> for FalconFbifMemType {
}
}
-/// Trait defining the parameters of a given Falcon instance.
-pub(crate) trait FalconEngine: Sync {
- /// Base I/O address for the falcon, relative from which its registers are accessed.
- const BASE: usize;
+/// Type used to represent the `PFALCON` registers address base for a given falcon engine.
+pub(crate) struct PFalconBase(());
+
+/// Type used to represent the `PFALCON2` registers address base for a given falcon engine.
+pub(crate) struct PFalcon2Base(());
+
+/// Trait defining the parameters of a given Falcon engine.
+///
+/// Each engine provides one base for `PFALCON` and `PFALCON2` registers. The `ID` constant is used
+/// to identify a given Falcon instance with register I/O methods.
+pub(crate) trait FalconEngine:
+ Send + Sync + RegisterBase<PFalconBase> + RegisterBase<PFalcon2Base> + Sized
+{
+ /// Singleton of the engine, used to identify it with register I/O methods.
+ const ID: Self;
}
/// Represents a portion of the firmware to be loaded into a particular memory (e.g. IMEM or DMEM).
-#[derive(Debug)]
+#[derive(Debug, Clone)]
pub(crate) struct FalconLoadTarget {
/// Offset from the start of the source object to copy from.
pub(crate) src_start: u32,
@@ -292,7 +304,7 @@ pub(crate) struct FalconLoadTarget {
}
/// Parameters for the falcon boot ROM.
-#[derive(Debug)]
+#[derive(Debug, Clone)]
pub(crate) struct FalconBromParams {
/// Offset in `DMEM`` of the firmware's signature.
pub(crate) pkc_data_offset: u32,
@@ -343,13 +355,13 @@ impl<E: FalconEngine + 'static> Falcon<E> {
bar: &Bar0,
need_riscv: bool,
) -> Result<Self> {
- let hwcfg1 = regs::NV_PFALCON_FALCON_HWCFG1::read(bar, E::BASE);
+ let hwcfg1 = regs::NV_PFALCON_FALCON_HWCFG1::read(bar, &E::ID);
// Check that the revision and security model contain valid values.
let _ = hwcfg1.core_rev()?;
let _ = hwcfg1.security_model()?;
if need_riscv {
- let hwcfg2 = regs::NV_PFALCON_FALCON_HWCFG2::read(bar, E::BASE);
+ let hwcfg2 = regs::NV_PFALCON_FALCON_HWCFG2::read(bar, &E::ID);
if !hwcfg2.riscv() {
dev_err!(
dev,
@@ -369,7 +381,7 @@ impl<E: FalconEngine + 'static> Falcon<E> {
fn reset_wait_mem_scrubbing(&self, bar: &Bar0) -> Result {
// TIMEOUT: memory scrubbing should complete in less than 20ms.
util::wait_on(Delta::from_millis(20), || {
- if regs::NV_PFALCON_FALCON_HWCFG2::read(bar, E::BASE).mem_scrubbing_done() {
+ if regs::NV_PFALCON_FALCON_HWCFG2::read(bar, &E::ID).mem_scrubbing_done() {
Some(())
} else {
None
@@ -379,12 +391,12 @@ impl<E: FalconEngine + 'static> Falcon<E> {
/// Reset the falcon engine.
fn reset_eng(&self, bar: &Bar0) -> Result {
- let _ = regs::NV_PFALCON_FALCON_HWCFG2::read(bar, E::BASE);
+ let _ = regs::NV_PFALCON_FALCON_HWCFG2::read(bar, &E::ID);
// According to OpenRM's `kflcnPreResetWait_GA102` documentation, HW sometimes does not set
// RESET_READY so a non-failing timeout is used.
let _ = util::wait_on(Delta::from_micros(150), || {
- let r = regs::NV_PFALCON_FALCON_HWCFG2::read(bar, E::BASE);
+ let r = regs::NV_PFALCON_FALCON_HWCFG2::read(bar, &E::ID);
if r.reset_ready() {
Some(())
} else {
@@ -392,13 +404,13 @@ impl<E: FalconEngine + 'static> Falcon<E> {
}
});
- regs::NV_PFALCON_FALCON_ENGINE::alter(bar, E::BASE, |v| v.set_reset(true));
+ regs::NV_PFALCON_FALCON_ENGINE::alter(bar, &E::ID, |v| v.set_reset(true));
// TODO[DLAY]: replace with udelay() or equivalent once available.
// TIMEOUT: falcon engine should not take more than 10us to reset.
let _: Result = util::wait_on(Delta::from_micros(10), || None);
- regs::NV_PFALCON_FALCON_ENGINE::alter(bar, E::BASE, |v| v.set_reset(false));
+ regs::NV_PFALCON_FALCON_ENGINE::alter(bar, &E::ID, |v| v.set_reset(false));
self.reset_wait_mem_scrubbing(bar)?;
@@ -413,7 +425,7 @@ impl<E: FalconEngine + 'static> Falcon<E> {
regs::NV_PFALCON_FALCON_RM::default()
.set_value(regs::NV_PMC_BOOT_0::read(bar).into())
- .write(bar, E::BASE);
+ .write(bar, &E::ID);
Ok(())
}
@@ -443,7 +455,7 @@ impl<E: FalconEngine + 'static> Falcon<E> {
fw.dma_handle_with_offset(load_offsets.src_start as usize)?,
),
};
- if dma_start % bindings::dma_addr_t::from(DMA_LEN) > 0 {
+ if dma_start % DmaAddress::from(DMA_LEN) > 0 {
dev_err!(
self.dev,
"DMA transfer start addresses must be a multiple of {}",
@@ -451,44 +463,57 @@ impl<E: FalconEngine + 'static> Falcon<E> {
);
return Err(EINVAL);
}
- if load_offsets.len % DMA_LEN > 0 {
- dev_err!(
- self.dev,
- "DMA transfer length must be a multiple of {}",
- DMA_LEN
- );
- return Err(EINVAL);
- }
+
+ // DMA transfers can only be done in units of 256 bytes. Compute how many such transfers we
+ // need to perform.
+ let num_transfers = load_offsets.len.div_ceil(DMA_LEN);
+
+ // Check that the area we are about to transfer is within the bounds of the DMA object.
+ // Upper limit of transfer is `(num_transfers * DMA_LEN) + load_offsets.src_start`.
+ match num_transfers
+ .checked_mul(DMA_LEN)
+ .and_then(|size| size.checked_add(load_offsets.src_start))
+ {
+ None => {
+ dev_err!(self.dev, "DMA transfer length overflow");
+ return Err(EOVERFLOW);
+ }
+ Some(upper_bound) if upper_bound as usize > fw.size() => {
+ dev_err!(self.dev, "DMA transfer goes beyond range of DMA object");
+ return Err(EINVAL);
+ }
+ Some(_) => (),
+ };
// Set up the base source DMA address.
regs::NV_PFALCON_FALCON_DMATRFBASE::default()
.set_base((dma_start >> 8) as u32)
- .write(bar, E::BASE);
+ .write(bar, &E::ID);
regs::NV_PFALCON_FALCON_DMATRFBASE1::default()
.set_base((dma_start >> 40) as u16)
- .write(bar, E::BASE);
+ .write(bar, &E::ID);
let cmd = regs::NV_PFALCON_FALCON_DMATRFCMD::default()
.set_size(DmaTrfCmdSize::Size256B)
.set_imem(target_mem == FalconMem::Imem)
.set_sec(if sec { 1 } else { 0 });
- for pos in (0..load_offsets.len).step_by(DMA_LEN as usize) {
+ for pos in (0..num_transfers).map(|i| i * DMA_LEN) {
// Perform a transfer of size `DMA_LEN`.
regs::NV_PFALCON_FALCON_DMATRFMOFFS::default()
.set_offs(load_offsets.dst_start + pos)
- .write(bar, E::BASE);
+ .write(bar, &E::ID);
regs::NV_PFALCON_FALCON_DMATRFFBOFFS::default()
.set_offs(src_start + pos)
- .write(bar, E::BASE);
- cmd.write(bar, E::BASE);
+ .write(bar, &E::ID);
+ cmd.write(bar, &E::ID);
// Wait for the transfer to complete.
// TIMEOUT: arbitrarily large value, no DMA transfer to the falcon's small memories
// should ever take that long.
util::wait_on(Delta::from_secs(2), || {
- let r = regs::NV_PFALCON_FALCON_DMATRFCMD::read(bar, E::BASE);
+ let r = regs::NV_PFALCON_FALCON_DMATRFCMD::read(bar, &E::ID);
if r.idle() {
Some(())
} else {
@@ -502,9 +527,9 @@ impl<E: FalconEngine + 'static> Falcon<E> {
/// Perform a DMA load into `IMEM` and `DMEM` of `fw`, and prepare the falcon to run it.
pub(crate) fn dma_load<F: FalconFirmware<Target = E>>(&self, bar: &Bar0, fw: &F) -> Result {
- regs::NV_PFALCON_FBIF_CTL::alter(bar, E::BASE, |v| v.set_allow_phys_no_ctx(true));
- regs::NV_PFALCON_FALCON_DMACTL::default().write(bar, E::BASE);
- regs::NV_PFALCON_FBIF_TRANSCFG::alter(bar, E::BASE, |v| {
+ regs::NV_PFALCON_FBIF_CTL::alter(bar, &E::ID, |v| v.set_allow_phys_no_ctx(true));
+ regs::NV_PFALCON_FALCON_DMACTL::default().write(bar, &E::ID);
+ regs::NV_PFALCON_FBIF_TRANSCFG::alter(bar, &E::ID, 0, |v| {
v.set_target(FalconFbifTarget::CoherentSysmem)
.set_mem_type(FalconFbifMemType::Physical)
});
@@ -517,7 +542,7 @@ impl<E: FalconEngine + 'static> Falcon<E> {
// Set `BootVec` to start of non-secure code.
regs::NV_PFALCON_FALCON_BOOTVEC::default()
.set_value(fw.boot_addr())
- .write(bar, E::BASE);
+ .write(bar, &E::ID);
Ok(())
}
@@ -538,27 +563,27 @@ impl<E: FalconEngine + 'static> Falcon<E> {
if let Some(mbox0) = mbox0 {
regs::NV_PFALCON_FALCON_MAILBOX0::default()
.set_value(mbox0)
- .write(bar, E::BASE);
+ .write(bar, &E::ID);
}
if let Some(mbox1) = mbox1 {
regs::NV_PFALCON_FALCON_MAILBOX1::default()
.set_value(mbox1)
- .write(bar, E::BASE);
+ .write(bar, &E::ID);
}
- match regs::NV_PFALCON_FALCON_CPUCTL::read(bar, E::BASE).alias_en() {
+ match regs::NV_PFALCON_FALCON_CPUCTL::read(bar, &E::ID).alias_en() {
true => regs::NV_PFALCON_FALCON_CPUCTL_ALIAS::default()
.set_startcpu(true)
- .write(bar, E::BASE),
+ .write(bar, &E::ID),
false => regs::NV_PFALCON_FALCON_CPUCTL::default()
.set_startcpu(true)
- .write(bar, E::BASE),
+ .write(bar, &E::ID),
}
// TIMEOUT: arbitrarily large value, firmwares should complete in less than 2 seconds.
util::wait_on(Delta::from_secs(2), || {
- let r = regs::NV_PFALCON_FALCON_CPUCTL::read(bar, E::BASE);
+ let r = regs::NV_PFALCON_FALCON_CPUCTL::read(bar, &E::ID);
if r.halted() {
Some(())
} else {
@@ -567,8 +592,8 @@ impl<E: FalconEngine + 'static> Falcon<E> {
})?;
let (mbox0, mbox1) = (
- regs::NV_PFALCON_FALCON_MAILBOX0::read(bar, E::BASE).value(),
- regs::NV_PFALCON_FALCON_MAILBOX1::read(bar, E::BASE).value(),
+ regs::NV_PFALCON_FALCON_MAILBOX0::read(bar, &E::ID).value(),
+ regs::NV_PFALCON_FALCON_MAILBOX1::read(bar, &E::ID).value(),
);
Ok((mbox0, mbox1))
diff --git a/drivers/gpu/nova-core/falcon/gsp.rs b/drivers/gpu/nova-core/falcon/gsp.rs
index d622e9a64470..f17599cb49fa 100644
--- a/drivers/gpu/nova-core/falcon/gsp.rs
+++ b/drivers/gpu/nova-core/falcon/gsp.rs
@@ -2,23 +2,31 @@
use crate::{
driver::Bar0,
- falcon::{Falcon, FalconEngine},
- regs,
+ falcon::{Falcon, FalconEngine, PFalcon2Base, PFalconBase},
+ regs::{self, macros::RegisterBase},
};
/// Type specifying the `Gsp` falcon engine. Cannot be instantiated.
pub(crate) struct Gsp(());
-impl FalconEngine for Gsp {
+impl RegisterBase<PFalconBase> for Gsp {
const BASE: usize = 0x00110000;
}
+impl RegisterBase<PFalcon2Base> for Gsp {
+ const BASE: usize = 0x00111000;
+}
+
+impl FalconEngine for Gsp {
+ const ID: Self = Gsp(());
+}
+
impl Falcon<Gsp> {
/// Clears the SWGEN0 bit in the Falcon's IRQ status clear register to
/// allow GSP to signal CPU for processing new messages in message queue.
pub(crate) fn clear_swgen0_intr(&self, bar: &Bar0) {
regs::NV_PFALCON_FALCON_IRQSCLR::default()
.set_swgen0(true)
- .write(bar, Gsp::BASE);
+ .write(bar, &Gsp::ID);
}
}
diff --git a/drivers/gpu/nova-core/falcon/hal.rs b/drivers/gpu/nova-core/falcon/hal.rs
index b233bc365882..bba288455617 100644
--- a/drivers/gpu/nova-core/falcon/hal.rs
+++ b/drivers/gpu/nova-core/falcon/hal.rs
@@ -13,7 +13,7 @@ mod ga102;
/// Implements chipset-specific low-level operations. The trait is generic against [`FalconEngine`]
/// so its `BASE` parameter can be used in order to avoid runtime bound checks when accessing
/// registers.
-pub(crate) trait FalconHal<E: FalconEngine>: Sync {
+pub(crate) trait FalconHal<E: FalconEngine>: Send + Sync {
/// Activates the Falcon core if the engine is a risvc/falcon dual engine.
fn select_core(&self, _falcon: &Falcon<E>, _bar: &Bar0) -> Result {
Ok(())
diff --git a/drivers/gpu/nova-core/falcon/hal/ga102.rs b/drivers/gpu/nova-core/falcon/hal/ga102.rs
index 52c33d3f22a8..0b1cbe7853b3 100644
--- a/drivers/gpu/nova-core/falcon/hal/ga102.rs
+++ b/drivers/gpu/nova-core/falcon/hal/ga102.rs
@@ -16,15 +16,15 @@ use crate::util;
use super::FalconHal;
fn select_core_ga102<E: FalconEngine>(bar: &Bar0) -> Result {
- let bcr_ctrl = regs::NV_PRISCV_RISCV_BCR_CTRL::read(bar, E::BASE);
+ let bcr_ctrl = regs::NV_PRISCV_RISCV_BCR_CTRL::read(bar, &E::ID);
if bcr_ctrl.core_select() != PeregrineCoreSelect::Falcon {
regs::NV_PRISCV_RISCV_BCR_CTRL::default()
.set_core_select(PeregrineCoreSelect::Falcon)
- .write(bar, E::BASE);
+ .write(bar, &E::ID);
// TIMEOUT: falcon core should take less than 10ms to report being enabled.
util::wait_on(Delta::from_millis(10), || {
- let r = regs::NV_PRISCV_RISCV_BCR_CTRL::read(bar, E::BASE);
+ let r = regs::NV_PRISCV_RISCV_BCR_CTRL::read(bar, &E::ID);
if r.valid() {
Some(())
} else {
@@ -42,50 +42,47 @@ fn signature_reg_fuse_version_ga102(
engine_id_mask: u16,
ucode_id: u8,
) -> Result<u32> {
- // TODO[REGA]: The ucode fuse versions are contained in the
- // FUSE_OPT_FPF_<ENGINE>_UCODE<X>_VERSION registers, which are an array. Our register
- // definition macros do not allow us to manage them properly, so we need to hardcode their
- // addresses for now. Clean this up once we support register arrays.
+ const NV_FUSE_OPT_FPF_SIZE: u8 = regs::NV_FUSE_OPT_FPF_SIZE as u8;
// Each engine has 16 ucode version registers numbered from 1 to 16.
- if ucode_id == 0 || ucode_id > 16 {
- dev_err!(dev, "invalid ucode id {:#x}", ucode_id);
- return Err(EINVAL);
- }
+ let ucode_idx = match ucode_id {
+ 1..=NV_FUSE_OPT_FPF_SIZE => (ucode_id - 1) as usize,
+ _ => {
+ dev_err!(dev, "invalid ucode id {:#x}", ucode_id);
+ return Err(EINVAL);
+ }
+ };
- // Base address of the FUSE registers array corresponding to the engine.
- let reg_fuse_base = if engine_id_mask & 0x0001 != 0 {
- regs::NV_FUSE_OPT_FPF_SEC2_UCODE1_VERSION::OFFSET
+ // `ucode_idx` is guaranteed to be in the range [0..15], making the `read` calls provable valid
+ // at build-time.
+ let reg_fuse_version = if engine_id_mask & 0x0001 != 0 {
+ regs::NV_FUSE_OPT_FPF_SEC2_UCODE1_VERSION::read(bar, ucode_idx).data()
} else if engine_id_mask & 0x0004 != 0 {
- regs::NV_FUSE_OPT_FPF_NVDEC_UCODE1_VERSION::OFFSET
+ regs::NV_FUSE_OPT_FPF_NVDEC_UCODE1_VERSION::read(bar, ucode_idx).data()
} else if engine_id_mask & 0x0400 != 0 {
- regs::NV_FUSE_OPT_FPF_GSP_UCODE1_VERSION::OFFSET
+ regs::NV_FUSE_OPT_FPF_GSP_UCODE1_VERSION::read(bar, ucode_idx).data()
} else {
dev_err!(dev, "unexpected engine_id_mask {:#x}", engine_id_mask);
return Err(EINVAL);
};
- // Read `reg_fuse_base[ucode_id - 1]`.
- let reg_fuse_version =
- bar.read32(reg_fuse_base + ((ucode_id - 1) as usize * core::mem::size_of::<u32>()));
-
// TODO[NUMM]: replace with `last_set_bit` once it lands.
- Ok(u32::BITS - reg_fuse_version.leading_zeros())
+ Ok(u16::BITS - reg_fuse_version.leading_zeros())
}
fn program_brom_ga102<E: FalconEngine>(bar: &Bar0, params: &FalconBromParams) -> Result {
regs::NV_PFALCON2_FALCON_BROM_PARAADDR::default()
.set_value(params.pkc_data_offset)
- .write(bar, E::BASE);
+ .write(bar, &E::ID, 0);
regs::NV_PFALCON2_FALCON_BROM_ENGIDMASK::default()
.set_value(u32::from(params.engine_id_mask))
- .write(bar, E::BASE);
+ .write(bar, &E::ID);
regs::NV_PFALCON2_FALCON_BROM_CURR_UCODE_ID::default()
.set_ucode_id(params.ucode_id)
- .write(bar, E::BASE);
+ .write(bar, &E::ID);
regs::NV_PFALCON2_FALCON_MOD_SEL::default()
.set_algo(FalconModSelAlgo::Rsa3k)
- .write(bar, E::BASE);
+ .write(bar, &E::ID);
Ok(())
}
diff --git a/drivers/gpu/nova-core/falcon/sec2.rs b/drivers/gpu/nova-core/falcon/sec2.rs
index 5147d9e2a7fe..815786c8480d 100644
--- a/drivers/gpu/nova-core/falcon/sec2.rs
+++ b/drivers/gpu/nova-core/falcon/sec2.rs
@@ -1,10 +1,19 @@
// SPDX-License-Identifier: GPL-2.0
-use crate::falcon::FalconEngine;
+use crate::falcon::{FalconEngine, PFalcon2Base, PFalconBase};
+use crate::regs::macros::RegisterBase;
/// Type specifying the `Sec2` falcon engine. Cannot be instantiated.
pub(crate) struct Sec2(());
-impl FalconEngine for Sec2 {
+impl RegisterBase<PFalconBase> for Sec2 {
const BASE: usize = 0x00840000;
}
+
+impl RegisterBase<PFalcon2Base> for Sec2 {
+ const BASE: usize = 0x00841000;
+}
+
+impl FalconEngine for Sec2 {
+ const ID: Self = Sec2(());
+}
diff --git a/drivers/gpu/nova-core/fb.rs b/drivers/gpu/nova-core/fb.rs
index 4a702525fff4..27d9edab8347 100644
--- a/drivers/gpu/nova-core/fb.rs
+++ b/drivers/gpu/nova-core/fb.rs
@@ -3,8 +3,9 @@
use core::ops::Range;
use kernel::prelude::*;
+use kernel::ptr::{Alignable, Alignment};
use kernel::sizes::*;
-use kernel::types::ARef;
+use kernel::sync::aref::ARef;
use kernel::{dev_warn, device};
use crate::dma::DmaObject;
@@ -130,10 +131,9 @@ impl FbLayout {
};
let frts = {
- const FRTS_DOWN_ALIGN: u64 = SZ_128K as u64;
+ const FRTS_DOWN_ALIGN: Alignment = Alignment::new::<SZ_128K>();
const FRTS_SIZE: u64 = SZ_1M as u64;
- // TODO[NUMM]: replace with `align_down` once it lands.
- let frts_base = (vga_workspace.start & !(FRTS_DOWN_ALIGN - 1)) - FRTS_SIZE;
+ let frts_base = vga_workspace.start.align_down(FRTS_DOWN_ALIGN) - FRTS_SIZE;
frts_base..frts_base + FRTS_SIZE
};
diff --git a/drivers/gpu/nova-core/firmware.rs b/drivers/gpu/nova-core/firmware.rs
index 2931912ddba0..4179a74a2342 100644
--- a/drivers/gpu/nova-core/firmware.rs
+++ b/drivers/gpu/nova-core/firmware.rs
@@ -4,48 +4,36 @@
//! to be loaded into a given execution unit.
use core::marker::PhantomData;
+use core::mem::size_of;
use kernel::device;
use kernel::firmware;
use kernel::prelude::*;
use kernel::str::CString;
+use kernel::transmute::FromBytes;
use crate::dma::DmaObject;
use crate::falcon::FalconFirmware;
use crate::gpu;
-use crate::gpu::Chipset;
+pub(crate) mod booter;
pub(crate) mod fwsec;
-
-pub(crate) const FIRMWARE_VERSION: &str = "535.113.01";
-
-/// Structure encapsulating the firmware blobs required for the GPU to operate.
-#[expect(dead_code)]
-pub(crate) struct Firmware {
- booter_load: firmware::Firmware,
- booter_unload: firmware::Firmware,
- bootloader: firmware::Firmware,
- gsp: firmware::Firmware,
-}
-
-impl Firmware {
- pub(crate) fn new(dev: &device::Device, chipset: Chipset, ver: &str) -> Result<Firmware> {
- let mut chip_name = CString::try_from_fmt(fmt!("{chipset}"))?;
- chip_name.make_ascii_lowercase();
- let chip_name = &*chip_name;
-
- let request = |name_| {
- CString::try_from_fmt(fmt!("nvidia/{chip_name}/gsp/{name_}-{ver}.bin"))
- .and_then(|path| firmware::Firmware::request(&path, dev))
- };
-
- Ok(Firmware {
- booter_load: request("booter_load")?,
- booter_unload: request("booter_unload")?,
- bootloader: request("bootloader")?,
- gsp: request("gsp")?,
- })
- }
+pub(crate) mod gsp;
+pub(crate) mod riscv;
+
+pub(crate) const FIRMWARE_VERSION: &str = "570.144";
+
+/// Requests the GPU firmware `name` suitable for `chipset`, with version `ver`.
+fn request_firmware(
+ dev: &device::Device,
+ chipset: gpu::Chipset,
+ name: &str,
+ ver: &str,
+) -> Result<firmware::Firmware> {
+ let chip_name = chipset.name();
+
+ CString::try_from_fmt(fmt!("nvidia/{chip_name}/gsp/{name}-{ver}.bin"))
+ .and_then(|path| firmware::Firmware::request(&path, dev))
}
/// Structure used to describe some firmwares, notably FWSEC-FRTS.
@@ -150,6 +138,65 @@ impl<F: FalconFirmware> FirmwareDmaObject<F, Unsigned> {
}
}
+/// Header common to most firmware files.
+#[repr(C)]
+#[derive(Debug, Clone)]
+struct BinHdr {
+ /// Magic number, must be `0x10de`.
+ bin_magic: u32,
+ /// Version of the header.
+ bin_ver: u32,
+ /// Size in bytes of the binary (to be ignored).
+ bin_size: u32,
+ /// Offset of the start of the application-specific header.
+ header_offset: u32,
+ /// Offset of the start of the data payload.
+ data_offset: u32,
+ /// Size in bytes of the data payload.
+ data_size: u32,
+}
+
+// SAFETY: all bit patterns are valid for this type, and it doesn't use interior mutability.
+unsafe impl FromBytes for BinHdr {}
+
+// A firmware blob starting with a `BinHdr`.
+struct BinFirmware<'a> {
+ hdr: BinHdr,
+ fw: &'a [u8],
+}
+
+impl<'a> BinFirmware<'a> {
+ /// Interpret `fw` as a firmware image starting with a [`BinHdr`], and returns the
+ /// corresponding [`BinFirmware`] that can be used to extract its payload.
+ fn new(fw: &'a firmware::Firmware) -> Result<Self> {
+ const BIN_MAGIC: u32 = 0x10de;
+ let fw = fw.data();
+
+ fw.get(0..size_of::<BinHdr>())
+ // Extract header.
+ .and_then(BinHdr::from_bytes_copy)
+ // Validate header.
+ .and_then(|hdr| {
+ if hdr.bin_magic == BIN_MAGIC {
+ Some(hdr)
+ } else {
+ None
+ }
+ })
+ .map(|hdr| Self { hdr, fw })
+ .ok_or(EINVAL)
+ }
+
+ /// Returns the data payload of the firmware, or `None` if the data range is out of bounds of
+ /// the firmware image.
+ fn data(&self) -> Option<&[u8]> {
+ let fw_start = self.hdr.data_offset as usize;
+ let fw_size = self.hdr.data_size as usize;
+
+ self.fw.get(fw_start..fw_start + fw_size)
+ }
+}
+
pub(crate) struct ModInfoBuilder<const N: usize>(firmware::ModInfoBuilder<N>);
impl<const N: usize> ModInfoBuilder<N> {
@@ -180,8 +227,8 @@ impl<const N: usize> ModInfoBuilder<N> {
let mut this = Self(firmware::ModInfoBuilder::new(module_name));
let mut i = 0;
- while i < gpu::Chipset::NAMES.len() {
- this = this.make_entry_chipset(gpu::Chipset::NAMES[i]);
+ while i < gpu::Chipset::ALL.len() {
+ this = this.make_entry_chipset(gpu::Chipset::ALL[i].name());
i += 1;
}
diff --git a/drivers/gpu/nova-core/firmware/booter.rs b/drivers/gpu/nova-core/firmware/booter.rs
new file mode 100644
index 000000000000..b4ff1b17e4a0
--- /dev/null
+++ b/drivers/gpu/nova-core/firmware/booter.rs
@@ -0,0 +1,375 @@
+// SPDX-License-Identifier: GPL-2.0
+
+//! Support for loading and patching the `Booter` firmware. `Booter` is a Heavy Secured firmware
+//! running on [`Sec2`], that is used on Turing/Ampere to load the GSP firmware into the GSP falcon
+//! (and optionally unload it through a separate firmware image).
+
+use core::marker::PhantomData;
+use core::mem::size_of;
+use core::ops::Deref;
+
+use kernel::device;
+use kernel::prelude::*;
+use kernel::transmute::FromBytes;
+
+use crate::dma::DmaObject;
+use crate::driver::Bar0;
+use crate::falcon::sec2::Sec2;
+use crate::falcon::{Falcon, FalconBromParams, FalconFirmware, FalconLoadParams, FalconLoadTarget};
+use crate::firmware::{BinFirmware, FirmwareDmaObject, FirmwareSignature, Signed, Unsigned};
+use crate::gpu::Chipset;
+
+/// Local convenience function to return a copy of `S` by reinterpreting the bytes starting at
+/// `offset` in `slice`.
+fn frombytes_at<S: FromBytes + Sized>(slice: &[u8], offset: usize) -> Result<S> {
+ slice
+ .get(offset..offset + size_of::<S>())
+ .and_then(S::from_bytes_copy)
+ .ok_or(EINVAL)
+}
+
+/// Heavy-Secured firmware header.
+///
+/// Such firmwares have an application-specific payload that needs to be patched with a given
+/// signature.
+#[repr(C)]
+#[derive(Debug, Clone)]
+struct HsHeaderV2 {
+ /// Offset to the start of the signatures.
+ sig_prod_offset: u32,
+ /// Size in bytes of the signatures.
+ sig_prod_size: u32,
+ /// Offset to a `u32` containing the location at which to patch the signature in the microcode
+ /// image.
+ patch_loc_offset: u32,
+ /// Offset to a `u32` containing the index of the signature to patch.
+ patch_sig_offset: u32,
+ /// Start offset to the signature metadata.
+ meta_data_offset: u32,
+ /// Size in bytes of the signature metadata.
+ meta_data_size: u32,
+ /// Offset to a `u32` containing the number of signatures in the signatures section.
+ num_sig_offset: u32,
+ /// Offset of the application-specific header.
+ header_offset: u32,
+ /// Size in bytes of the application-specific header.
+ header_size: u32,
+}
+
+// SAFETY: all bit patterns are valid for this type, and it doesn't use interior mutability.
+unsafe impl FromBytes for HsHeaderV2 {}
+
+/// Heavy-Secured Firmware image container.
+///
+/// This provides convenient access to the fields of [`HsHeaderV2`] that are actually indices to
+/// read from in the firmware data.
+struct HsFirmwareV2<'a> {
+ hdr: HsHeaderV2,
+ fw: &'a [u8],
+}
+
+impl<'a> HsFirmwareV2<'a> {
+ /// Interprets the header of `bin_fw` as a [`HsHeaderV2`] and returns an instance of
+ /// `HsFirmwareV2` for further parsing.
+ ///
+ /// Fails if the header pointed at by `bin_fw` is not within the bounds of the firmware image.
+ fn new(bin_fw: &BinFirmware<'a>) -> Result<Self> {
+ frombytes_at::<HsHeaderV2>(bin_fw.fw, bin_fw.hdr.header_offset as usize)
+ .map(|hdr| Self { hdr, fw: bin_fw.fw })
+ }
+
+ /// Returns the location at which the signatures should be patched in the microcode image.
+ ///
+ /// Fails if the offset of the patch location is outside the bounds of the firmware
+ /// image.
+ fn patch_location(&self) -> Result<u32> {
+ frombytes_at::<u32>(self.fw, self.hdr.patch_loc_offset as usize)
+ }
+
+ /// Returns an iterator to the signatures of the firmware. The iterator can be empty if the
+ /// firmware is unsigned.
+ ///
+ /// Fails if the pointed signatures are outside the bounds of the firmware image.
+ fn signatures_iter(&'a self) -> Result<impl Iterator<Item = BooterSignature<'a>>> {
+ let num_sig = frombytes_at::<u32>(self.fw, self.hdr.num_sig_offset as usize)?;
+ let iter = match self.hdr.sig_prod_size.checked_div(num_sig) {
+ // If there are no signatures, return an iterator that will yield zero elements.
+ None => (&[] as &[u8]).chunks_exact(1),
+ Some(sig_size) => {
+ let patch_sig = frombytes_at::<u32>(self.fw, self.hdr.patch_sig_offset as usize)?;
+ let signatures_start = (self.hdr.sig_prod_offset + patch_sig) as usize;
+
+ self.fw
+ // Get signatures range.
+ .get(signatures_start..signatures_start + self.hdr.sig_prod_size as usize)
+ .ok_or(EINVAL)?
+ .chunks_exact(sig_size as usize)
+ }
+ };
+
+ // Map the byte slices into signatures.
+ Ok(iter.map(BooterSignature))
+ }
+}
+
+/// Signature parameters, as defined in the firmware.
+#[repr(C)]
+struct HsSignatureParams {
+ /// Fuse version to use.
+ fuse_ver: u32,
+ /// Mask of engine IDs this firmware applies to.
+ engine_id_mask: u32,
+ /// ID of the microcode.
+ ucode_id: u32,
+}
+
+// SAFETY: all bit patterns are valid for this type, and it doesn't use interior mutability.
+unsafe impl FromBytes for HsSignatureParams {}
+
+impl HsSignatureParams {
+ /// Returns the signature parameters contained in `hs_fw`.
+ ///
+ /// Fails if the meta data parameter of `hs_fw` is outside the bounds of the firmware image, or
+ /// if its size doesn't match that of [`HsSignatureParams`].
+ fn new(hs_fw: &HsFirmwareV2<'_>) -> Result<Self> {
+ let start = hs_fw.hdr.meta_data_offset as usize;
+ let end = start
+ .checked_add(hs_fw.hdr.meta_data_size as usize)
+ .ok_or(EINVAL)?;
+
+ hs_fw
+ .fw
+ .get(start..end)
+ .and_then(Self::from_bytes_copy)
+ .ok_or(EINVAL)
+ }
+}
+
+/// Header for code and data load offsets.
+#[repr(C)]
+#[derive(Debug, Clone)]
+struct HsLoadHeaderV2 {
+ // Offset at which the code starts.
+ os_code_offset: u32,
+ // Total size of the code, for all apps.
+ os_code_size: u32,
+ // Offset at which the data starts.
+ os_data_offset: u32,
+ // Size of the data.
+ os_data_size: u32,
+ // Number of apps following this header. Each app is described by a [`HsLoadHeaderV2App`].
+ num_apps: u32,
+}
+
+// SAFETY: all bit patterns are valid for this type, and it doesn't use interior mutability.
+unsafe impl FromBytes for HsLoadHeaderV2 {}
+
+impl HsLoadHeaderV2 {
+ /// Returns the load header contained in `hs_fw`.
+ ///
+ /// Fails if the header pointed at by `hs_fw` is not within the bounds of the firmware image.
+ fn new(hs_fw: &HsFirmwareV2<'_>) -> Result<Self> {
+ frombytes_at::<Self>(hs_fw.fw, hs_fw.hdr.header_offset as usize)
+ }
+}
+
+/// Header for app code loader.
+#[repr(C)]
+#[derive(Debug, Clone)]
+struct HsLoadHeaderV2App {
+ /// Offset at which to load the app code.
+ offset: u32,
+ /// Length in bytes of the app code.
+ len: u32,
+}
+
+// SAFETY: all bit patterns are valid for this type, and it doesn't use interior mutability.
+unsafe impl FromBytes for HsLoadHeaderV2App {}
+
+impl HsLoadHeaderV2App {
+ /// Returns the [`HsLoadHeaderV2App`] for app `idx` of `hs_fw`.
+ ///
+ /// Fails if `idx` is larger than the number of apps declared in `hs_fw`, or if the header is
+ /// not within the bounds of the firmware image.
+ fn new(hs_fw: &HsFirmwareV2<'_>, idx: u32) -> Result<Self> {
+ let load_hdr = HsLoadHeaderV2::new(hs_fw)?;
+ if idx >= load_hdr.num_apps {
+ Err(EINVAL)
+ } else {
+ frombytes_at::<Self>(
+ hs_fw.fw,
+ (hs_fw.hdr.header_offset as usize)
+ // Skip the load header...
+ .checked_add(size_of::<HsLoadHeaderV2>())
+ // ... and jump to app header `idx`.
+ .and_then(|offset| {
+ offset.checked_add((idx as usize).checked_mul(size_of::<Self>())?)
+ })
+ .ok_or(EINVAL)?,
+ )
+ }
+ }
+}
+
+/// Signature for Booter firmware. Their size is encoded into the header and not known a compile
+/// time, so we just wrap a byte slices on which we can implement [`FirmwareSignature`].
+struct BooterSignature<'a>(&'a [u8]);
+
+impl<'a> AsRef<[u8]> for BooterSignature<'a> {
+ fn as_ref(&self) -> &[u8] {
+ self.0
+ }
+}
+
+impl<'a> FirmwareSignature<BooterFirmware> for BooterSignature<'a> {}
+
+/// The `Booter` loader firmware, responsible for loading the GSP.
+pub(crate) struct BooterFirmware {
+ // Load parameters for `IMEM` falcon memory.
+ imem_load_target: FalconLoadTarget,
+ // Load parameters for `DMEM` falcon memory.
+ dmem_load_target: FalconLoadTarget,
+ // BROM falcon parameters.
+ brom_params: FalconBromParams,
+ // Device-mapped firmware image.
+ ucode: FirmwareDmaObject<Self, Signed>,
+}
+
+impl FirmwareDmaObject<BooterFirmware, Unsigned> {
+ fn new_booter(dev: &device::Device<device::Bound>, data: &[u8]) -> Result<Self> {
+ DmaObject::from_data(dev, data).map(|ucode| Self(ucode, PhantomData))
+ }
+}
+
+#[derive(Copy, Clone, Debug, PartialEq)]
+pub(crate) enum BooterKind {
+ Loader,
+ #[expect(unused)]
+ Unloader,
+}
+
+impl BooterFirmware {
+ /// Parses the Booter firmware contained in `fw`, and patches the correct signature so it is
+ /// ready to be loaded and run on `falcon`.
+ pub(crate) fn new(
+ dev: &device::Device<device::Bound>,
+ kind: BooterKind,
+ chipset: Chipset,
+ ver: &str,
+ falcon: &Falcon<<Self as FalconFirmware>::Target>,
+ bar: &Bar0,
+ ) -> Result<Self> {
+ let fw_name = match kind {
+ BooterKind::Loader => "booter_load",
+ BooterKind::Unloader => "booter_unload",
+ };
+ let fw = super::request_firmware(dev, chipset, fw_name, ver)?;
+ let bin_fw = BinFirmware::new(&fw)?;
+
+ // The binary firmware embeds a Heavy-Secured firmware.
+ let hs_fw = HsFirmwareV2::new(&bin_fw)?;
+
+ // The Heavy-Secured firmware embeds a firmware load descriptor.
+ let load_hdr = HsLoadHeaderV2::new(&hs_fw)?;
+
+ // Offset in `ucode` where to patch the signature.
+ let patch_loc = hs_fw.patch_location()?;
+
+ let sig_params = HsSignatureParams::new(&hs_fw)?;
+ let brom_params = FalconBromParams {
+ // `load_hdr.os_data_offset` is an absolute index, but `pkc_data_offset` is from the
+ // signature patch location.
+ pkc_data_offset: patch_loc
+ .checked_sub(load_hdr.os_data_offset)
+ .ok_or(EINVAL)?,
+ engine_id_mask: u16::try_from(sig_params.engine_id_mask).map_err(|_| EINVAL)?,
+ ucode_id: u8::try_from(sig_params.ucode_id).map_err(|_| EINVAL)?,
+ };
+ let app0 = HsLoadHeaderV2App::new(&hs_fw, 0)?;
+
+ // Object containing the firmware microcode to be signature-patched.
+ let ucode = bin_fw
+ .data()
+ .ok_or(EINVAL)
+ .and_then(|data| FirmwareDmaObject::<Self, _>::new_booter(dev, data))?;
+
+ let ucode_signed = {
+ let mut signatures = hs_fw.signatures_iter()?.peekable();
+
+ if signatures.peek().is_none() {
+ // If there are no signatures, then the firmware is unsigned.
+ ucode.no_patch_signature()
+ } else {
+ // Obtain the version from the fuse register, and extract the corresponding
+ // signature.
+ let reg_fuse_version = falcon.signature_reg_fuse_version(
+ bar,
+ brom_params.engine_id_mask,
+ brom_params.ucode_id,
+ )?;
+
+ // `0` means the last signature should be used.
+ const FUSE_VERSION_USE_LAST_SIG: u32 = 0;
+ let signature = match reg_fuse_version {
+ FUSE_VERSION_USE_LAST_SIG => signatures.last(),
+ // Otherwise hardware fuse version needs to be subtracted to obtain the index.
+ reg_fuse_version => {
+ let Some(idx) = sig_params.fuse_ver.checked_sub(reg_fuse_version) else {
+ dev_err!(dev, "invalid fuse version for Booter firmware\n");
+ return Err(EINVAL);
+ };
+ signatures.nth(idx as usize)
+ }
+ }
+ .ok_or(EINVAL)?;
+
+ ucode.patch_signature(&signature, patch_loc as usize)?
+ }
+ };
+
+ Ok(Self {
+ imem_load_target: FalconLoadTarget {
+ src_start: app0.offset,
+ dst_start: 0,
+ len: app0.len,
+ },
+ dmem_load_target: FalconLoadTarget {
+ src_start: load_hdr.os_data_offset,
+ dst_start: 0,
+ len: load_hdr.os_data_size,
+ },
+ brom_params,
+ ucode: ucode_signed,
+ })
+ }
+}
+
+impl FalconLoadParams for BooterFirmware {
+ fn imem_load_params(&self) -> FalconLoadTarget {
+ self.imem_load_target.clone()
+ }
+
+ fn dmem_load_params(&self) -> FalconLoadTarget {
+ self.dmem_load_target.clone()
+ }
+
+ fn brom_params(&self) -> FalconBromParams {
+ self.brom_params.clone()
+ }
+
+ fn boot_addr(&self) -> u32 {
+ self.imem_load_target.src_start
+ }
+}
+
+impl Deref for BooterFirmware {
+ type Target = DmaObject;
+
+ fn deref(&self) -> &Self::Target {
+ &self.ucode.0
+ }
+}
+
+impl FalconFirmware for BooterFirmware {
+ type Target = Sec2;
+}
diff --git a/drivers/gpu/nova-core/firmware/fwsec.rs b/drivers/gpu/nova-core/firmware/fwsec.rs
index 0dff3cfa90af..8edbb5c0572c 100644
--- a/drivers/gpu/nova-core/firmware/fwsec.rs
+++ b/drivers/gpu/nova-core/firmware/fwsec.rs
@@ -202,9 +202,6 @@ pub(crate) struct FwsecFirmware {
ucode: FirmwareDmaObject<Self, Signed>,
}
-// We need to load full DMEM pages.
-const DMEM_LOAD_SIZE_ALIGN: u32 = 256;
-
impl FalconLoadParams for FwsecFirmware {
fn imem_load_params(&self) -> FalconLoadTarget {
FalconLoadTarget {
@@ -218,11 +215,7 @@ impl FalconLoadParams for FwsecFirmware {
FalconLoadTarget {
src_start: self.desc.imem_load_size,
dst_start: self.desc.dmem_phys_base,
- // TODO[NUMM]: replace with `align_up` once it lands.
- len: self
- .desc
- .dmem_load_size
- .next_multiple_of(DMEM_LOAD_SIZE_ALIGN),
+ len: self.desc.dmem_load_size,
}
}
@@ -253,8 +246,8 @@ impl FalconFirmware for FwsecFirmware {
impl FirmwareDmaObject<FwsecFirmware, Unsigned> {
fn new_fwsec(dev: &Device<device::Bound>, bios: &Vbios, cmd: FwsecCommand) -> Result<Self> {
- let desc = bios.fwsec_image().header(dev)?;
- let ucode = bios.fwsec_image().ucode(dev, desc)?;
+ let desc = bios.fwsec_image().header()?;
+ let ucode = bios.fwsec_image().ucode(desc)?;
let mut dma_object = DmaObject::from_data(dev, ucode)?;
let hdr_offset = (desc.imem_load_size + desc.interface_offset) as usize;
@@ -343,7 +336,7 @@ impl FwsecFirmware {
let ucode_dma = FirmwareDmaObject::<Self, _>::new_fwsec(dev, bios, cmd)?;
// Patch signature if needed.
- let desc = bios.fwsec_image().header(dev)?;
+ let desc = bios.fwsec_image().header()?;
let ucode_signed = if desc.signature_count != 0 {
let sig_base_img = (desc.imem_load_size + desc.pkc_data_offset) as usize;
let desc_sig_versions = u32::from(desc.signature_versions);
@@ -382,7 +375,7 @@ impl FwsecFirmware {
dev_dbg!(dev, "patching signature with index {}\n", signature_idx);
let signature = bios
.fwsec_image()
- .sigs(dev, desc)
+ .sigs(desc)
.and_then(|sigs| sigs.get(signature_idx).ok_or(EINVAL))?;
ucode_dma.patch_signature(signature, sig_base_img)?
diff --git a/drivers/gpu/nova-core/firmware/gsp.rs b/drivers/gpu/nova-core/firmware/gsp.rs
new file mode 100644
index 000000000000..9b70095434c6
--- /dev/null
+++ b/drivers/gpu/nova-core/firmware/gsp.rs
@@ -0,0 +1,243 @@
+// SPDX-License-Identifier: GPL-2.0
+
+use core::mem::size_of_val;
+
+use kernel::device;
+use kernel::dma::{DataDirection, DmaAddress};
+use kernel::kvec;
+use kernel::prelude::*;
+use kernel::scatterlist::{Owned, SGTable};
+
+use crate::dma::DmaObject;
+use crate::firmware::riscv::RiscvFirmware;
+use crate::gpu::{Architecture, Chipset};
+use crate::gsp::GSP_PAGE_SIZE;
+
+/// Ad-hoc and temporary module to extract sections from ELF images.
+///
+/// Some firmware images are currently packaged as ELF files, where sections names are used as keys
+/// to specific and related bits of data. Future firmware versions are scheduled to move away from
+/// that scheme before nova-core becomes stable, which means this module will eventually be
+/// removed.
+mod elf {
+ use core::mem::size_of;
+
+ use kernel::bindings;
+ use kernel::str::CStr;
+ use kernel::transmute::FromBytes;
+
+ /// Newtype to provide a [`FromBytes`] implementation.
+ #[repr(transparent)]
+ struct Elf64Hdr(bindings::elf64_hdr);
+ // SAFETY: all bit patterns are valid for this type, and it doesn't use interior mutability.
+ unsafe impl FromBytes for Elf64Hdr {}
+
+ #[repr(transparent)]
+ struct Elf64SHdr(bindings::elf64_shdr);
+ // SAFETY: all bit patterns are valid for this type, and it doesn't use interior mutability.
+ unsafe impl FromBytes for Elf64SHdr {}
+
+ /// Tries to extract section with name `name` from the ELF64 image `elf`, and returns it.
+ pub(super) fn elf64_section<'a, 'b>(elf: &'a [u8], name: &'b str) -> Option<&'a [u8]> {
+ let hdr = &elf
+ .get(0..size_of::<bindings::elf64_hdr>())
+ .and_then(Elf64Hdr::from_bytes)?
+ .0;
+
+ // Get all the section headers.
+ let mut shdr = {
+ let shdr_num = usize::from(hdr.e_shnum);
+ let shdr_start = usize::try_from(hdr.e_shoff).ok()?;
+ let shdr_end = shdr_num
+ .checked_mul(size_of::<Elf64SHdr>())
+ .and_then(|v| v.checked_add(shdr_start))?;
+
+ elf.get(shdr_start..shdr_end)
+ .map(|slice| slice.chunks_exact(size_of::<Elf64SHdr>()))?
+ };
+
+ // Get the strings table.
+ let strhdr = shdr
+ .clone()
+ .nth(usize::from(hdr.e_shstrndx))
+ .and_then(Elf64SHdr::from_bytes)?;
+
+ // Find the section which name matches `name` and return it.
+ shdr.find(|&sh| {
+ let Some(hdr) = Elf64SHdr::from_bytes(sh) else {
+ return false;
+ };
+
+ let Some(name_idx) = strhdr
+ .0
+ .sh_offset
+ .checked_add(u64::from(hdr.0.sh_name))
+ .and_then(|idx| usize::try_from(idx).ok())
+ else {
+ return false;
+ };
+
+ // Get the start of the name.
+ elf.get(name_idx..)
+ // Stop at the first `0`.
+ .and_then(|nstr| nstr.get(0..=nstr.iter().position(|b| *b == 0)?))
+ // Convert into CStr. This should never fail because of the line above.
+ .and_then(|nstr| CStr::from_bytes_with_nul(nstr).ok())
+ // Convert into str.
+ .and_then(|c_str| c_str.to_str().ok())
+ // Check that the name matches.
+ .map(|str| str == name)
+ .unwrap_or(false)
+ })
+ // Return the slice containing the section.
+ .and_then(|sh| {
+ let hdr = Elf64SHdr::from_bytes(sh)?;
+ let start = usize::try_from(hdr.0.sh_offset).ok()?;
+ let end = usize::try_from(hdr.0.sh_size)
+ .ok()
+ .and_then(|sh_size| start.checked_add(sh_size))?;
+
+ elf.get(start..end)
+ })
+ }
+}
+
+/// GSP firmware with 3-level radix page tables for the GSP bootloader.
+///
+/// The bootloader expects firmware to be mapped starting at address 0 in GSP's virtual address
+/// space:
+///
+/// ```text
+/// Level 0: 1 page, 1 entry -> points to first level 1 page
+/// Level 1: Multiple pages/entries -> each entry points to a level 2 page
+/// Level 2: Multiple pages/entries -> each entry points to a firmware page
+/// ```
+///
+/// Each page is 4KB, each entry is 8 bytes (64-bit DMA address).
+/// Also known as "Radix3" firmware.
+#[pin_data]
+pub(crate) struct GspFirmware {
+ /// The GSP firmware inside a [`VVec`], device-mapped via a SG table.
+ #[pin]
+ fw: SGTable<Owned<VVec<u8>>>,
+ /// Level 2 page table whose entries contain DMA addresses of firmware pages.
+ #[pin]
+ level2: SGTable<Owned<VVec<u8>>>,
+ /// Level 1 page table whose entries contain DMA addresses of level 2 pages.
+ #[pin]
+ level1: SGTable<Owned<VVec<u8>>>,
+ /// Level 0 page table (single 4KB page) with one entry: DMA address of first level 1 page.
+ level0: DmaObject,
+ /// Size in bytes of the firmware contained in [`Self::fw`].
+ size: usize,
+ /// Device-mapped GSP signatures matching the GPU's [`Chipset`].
+ signatures: DmaObject,
+ /// GSP bootloader, verifies the GSP firmware before loading and running it.
+ bootloader: RiscvFirmware,
+}
+
+impl GspFirmware {
+ /// Loads the GSP firmware binaries, map them into `dev`'s address-space, and creates the page
+ /// tables expected by the GSP bootloader to load it.
+ pub(crate) fn new<'a, 'b>(
+ dev: &'a device::Device<device::Bound>,
+ chipset: Chipset,
+ ver: &'b str,
+ ) -> Result<impl PinInit<Self, Error> + 'a> {
+ let fw = super::request_firmware(dev, chipset, "gsp", ver)?;
+
+ let fw_section = elf::elf64_section(fw.data(), ".fwimage").ok_or(EINVAL)?;
+
+ let sigs_section = match chipset.arch() {
+ Architecture::Ampere => ".fwsignature_ga10x",
+ _ => return Err(ENOTSUPP),
+ };
+ let signatures = elf::elf64_section(fw.data(), sigs_section)
+ .ok_or(EINVAL)
+ .and_then(|data| DmaObject::from_data(dev, data))?;
+
+ let size = fw_section.len();
+
+ // Move the firmware into a vmalloc'd vector and map it into the device address
+ // space.
+ let fw_vvec = VVec::with_capacity(fw_section.len(), GFP_KERNEL)
+ .and_then(|mut v| {
+ v.extend_from_slice(fw_section, GFP_KERNEL)?;
+ Ok(v)
+ })
+ .map_err(|_| ENOMEM)?;
+
+ let bl = super::request_firmware(dev, chipset, "bootloader", ver)?;
+ let bootloader = RiscvFirmware::new(dev, &bl)?;
+
+ Ok(try_pin_init!(Self {
+ fw <- SGTable::new(dev, fw_vvec, DataDirection::ToDevice, GFP_KERNEL),
+ level2 <- {
+ // Allocate the level 2 page table, map the firmware onto it, and map it into the
+ // device address space.
+ VVec::<u8>::with_capacity(
+ fw.iter().count() * core::mem::size_of::<u64>(),
+ GFP_KERNEL,
+ )
+ .map_err(|_| ENOMEM)
+ .and_then(|level2| map_into_lvl(&fw, level2))
+ .map(|level2| SGTable::new(dev, level2, DataDirection::ToDevice, GFP_KERNEL))?
+ },
+ level1 <- {
+ // Allocate the level 1 page table, map the level 2 page table onto it, and map it
+ // into the device address space.
+ VVec::<u8>::with_capacity(
+ level2.iter().count() * core::mem::size_of::<u64>(),
+ GFP_KERNEL,
+ )
+ .map_err(|_| ENOMEM)
+ .and_then(|level1| map_into_lvl(&level2, level1))
+ .map(|level1| SGTable::new(dev, level1, DataDirection::ToDevice, GFP_KERNEL))?
+ },
+ level0: {
+ // Allocate the level 0 page table as a device-visible DMA object, and map the
+ // level 1 page table onto it.
+
+ // Level 0 page table data.
+ let mut level0_data = kvec![0u8; GSP_PAGE_SIZE]?;
+
+ // Fill level 1 page entry.
+ #[allow(clippy::useless_conversion)]
+ let level1_entry = u64::from(level1.iter().next().unwrap().dma_address());
+ let dst = &mut level0_data[..size_of_val(&level1_entry)];
+ dst.copy_from_slice(&level1_entry.to_le_bytes());
+
+ // Turn the level0 page table into a [`DmaObject`].
+ DmaObject::from_data(dev, &level0_data)?
+ },
+ size,
+ signatures,
+ bootloader,
+ }))
+ }
+
+ #[expect(unused)]
+ /// Returns the DMA handle of the radix3 level 0 page table.
+ pub(crate) fn radix3_dma_handle(&self) -> DmaAddress {
+ self.level0.dma_handle()
+ }
+}
+
+/// Build a page table from a scatter-gather list.
+///
+/// Takes each DMA-mapped region from `sg_table` and writes page table entries
+/// for all 4KB pages within that region. For example, a 16KB SG entry becomes
+/// 4 consecutive page table entries.
+fn map_into_lvl(sg_table: &SGTable<Owned<VVec<u8>>>, mut dst: VVec<u8>) -> Result<VVec<u8>> {
+ for sg_entry in sg_table.iter() {
+ // Number of pages we need to map.
+ let num_pages = (sg_entry.dma_len() as usize).div_ceil(GSP_PAGE_SIZE);
+
+ for i in 0..num_pages {
+ let entry = sg_entry.dma_address() + (i as u64 * GSP_PAGE_SIZE as u64);
+ dst.extend_from_slice(&entry.to_le_bytes(), GFP_KERNEL)?;
+ }
+ }
+
+ Ok(dst)
+}
diff --git a/drivers/gpu/nova-core/firmware/riscv.rs b/drivers/gpu/nova-core/firmware/riscv.rs
new file mode 100644
index 000000000000..afb08f5bc4ba
--- /dev/null
+++ b/drivers/gpu/nova-core/firmware/riscv.rs
@@ -0,0 +1,91 @@
+// SPDX-License-Identifier: GPL-2.0
+
+//! Support for firmware binaries designed to run on a RISC-V core. Such firmwares files have a
+//! dedicated header.
+
+use core::mem::size_of;
+
+use kernel::device;
+use kernel::firmware::Firmware;
+use kernel::prelude::*;
+use kernel::transmute::FromBytes;
+
+use crate::dma::DmaObject;
+use crate::firmware::BinFirmware;
+
+/// Descriptor for microcode running on a RISC-V core.
+#[repr(C)]
+#[derive(Debug)]
+struct RmRiscvUCodeDesc {
+ version: u32,
+ bootloader_offset: u32,
+ bootloader_size: u32,
+ bootloader_param_offset: u32,
+ bootloader_param_size: u32,
+ riscv_elf_offset: u32,
+ riscv_elf_size: u32,
+ app_version: u32,
+ manifest_offset: u32,
+ manifest_size: u32,
+ monitor_data_offset: u32,
+ monitor_data_size: u32,
+ monitor_code_offset: u32,
+ monitor_code_size: u32,
+}
+
+// SAFETY: all bit patterns are valid for this type, and it doesn't use interior mutability.
+unsafe impl FromBytes for RmRiscvUCodeDesc {}
+
+impl RmRiscvUCodeDesc {
+ /// Interprets the header of `bin_fw` as a [`RmRiscvUCodeDesc`] and returns it.
+ ///
+ /// Fails if the header pointed at by `bin_fw` is not within the bounds of the firmware image.
+ fn new(bin_fw: &BinFirmware<'_>) -> Result<Self> {
+ let offset = bin_fw.hdr.header_offset as usize;
+
+ bin_fw
+ .fw
+ .get(offset..offset + size_of::<Self>())
+ .and_then(Self::from_bytes_copy)
+ .ok_or(EINVAL)
+ }
+}
+
+/// A parsed firmware for a RISC-V core, ready to be loaded and run.
+#[expect(unused)]
+pub(crate) struct RiscvFirmware {
+ /// Offset at which the code starts in the firmware image.
+ code_offset: u32,
+ /// Offset at which the data starts in the firmware image.
+ data_offset: u32,
+ /// Offset at which the manifest starts in the firmware image.
+ manifest_offset: u32,
+ /// Application version.
+ app_version: u32,
+ /// Device-mapped firmware image.
+ ucode: DmaObject,
+}
+
+impl RiscvFirmware {
+ /// Parses the RISC-V firmware image contained in `fw`.
+ pub(crate) fn new(dev: &device::Device<device::Bound>, fw: &Firmware) -> Result<Self> {
+ let bin_fw = BinFirmware::new(fw)?;
+
+ let riscv_desc = RmRiscvUCodeDesc::new(&bin_fw)?;
+
+ let ucode = {
+ let start = bin_fw.hdr.data_offset as usize;
+ let len = bin_fw.hdr.data_size as usize;
+
+ DmaObject::from_data(dev, fw.data().get(start..start + len).ok_or(EINVAL)?)?
+ };
+
+ Ok(Self {
+ ucode,
+ code_offset: riscv_desc.monitor_code_offset,
+ data_offset: riscv_desc.monitor_data_offset,
+ manifest_offset: riscv_desc.manifest_offset,
+ app_version: riscv_desc.app_version,
+ })
+ }
+}
diff --git a/drivers/gpu/nova-core/gpu.rs b/drivers/gpu/nova-core/gpu.rs
index b5c9786619a9..af20e2daea24 100644
--- a/drivers/gpu/nova-core/gpu.rs
+++ b/drivers/gpu/nova-core/gpu.rs
@@ -1,18 +1,13 @@
// SPDX-License-Identifier: GPL-2.0
-use kernel::{device, devres::Devres, error::code::*, pci, prelude::*, sync::Arc};
+use kernel::{device, devres::Devres, error::code::*, fmt, pci, prelude::*, sync::Arc};
use crate::driver::Bar0;
-use crate::falcon::{gsp::Gsp, sec2::Sec2, Falcon};
-use crate::fb::FbLayout;
+use crate::falcon::{gsp::Gsp as GspFalcon, sec2::Sec2 as Sec2Falcon, Falcon};
use crate::fb::SysmemFlush;
-use crate::firmware::fwsec::{FwsecCommand, FwsecFirmware};
-use crate::firmware::{Firmware, FIRMWARE_VERSION};
use crate::gfw;
+use crate::gsp::Gsp;
use crate::regs;
-use crate::util;
-use crate::vbios::Vbios;
-use core::fmt;
macro_rules! define_chipset {
({ $($variant:ident = $value:expr),* $(,)* }) =>
@@ -28,13 +23,23 @@ macro_rules! define_chipset {
$( Chipset::$variant, )*
];
- pub(crate) const NAMES: [&'static str; Self::ALL.len()] = [
- $( util::const_bytes_to_str(
- util::to_lowercase_bytes::<{ stringify!($variant).len() }>(
- stringify!($variant)
- ).as_slice()
- ), )*
- ];
+ ::kernel::macros::paste!(
+ /// Returns the name of this chipset, in lowercase.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// let chipset = Chipset::GA102;
+ /// assert_eq!(chipset.name(), "ga102");
+ /// ```
+ pub(crate) const fn name(&self) -> &'static str {
+ match *self {
+ $(
+ Chipset::$variant => stringify!([<$variant:lower>]),
+ )*
+ }
+ }
+ );
}
// TODO[FPRI]: replace with something like derive(FromPrimitive)
@@ -163,150 +168,74 @@ impl Spec {
}
/// Structure holding the resources required to operate the GPU.
-#[pin_data(PinnedDrop)]
+#[pin_data]
pub(crate) struct Gpu {
spec: Spec,
/// MMIO mapping of PCI BAR 0
bar: Arc<Devres<Bar0>>,
- fw: Firmware,
/// System memory page required for flushing all pending GPU-side memory writes done through
/// PCIE into system memory, via sysmembar (A GPU-initiated HW memory-barrier operation).
sysmem_flush: SysmemFlush,
-}
-
-#[pinned_drop]
-impl PinnedDrop for Gpu {
- fn drop(self: Pin<&mut Self>) {
- // Unregister the sysmem flush page before we release it.
- self.bar
- .try_access_with(|b| self.sysmem_flush.unregister(b));
- }
+ /// GSP falcon instance, used for GSP boot up and cleanup.
+ gsp_falcon: Falcon<GspFalcon>,
+ /// SEC2 falcon instance, used for GSP boot up and cleanup.
+ sec2_falcon: Falcon<Sec2Falcon>,
+ /// GSP runtime data. Temporarily an empty placeholder.
+ #[pin]
+ gsp: Gsp,
}
impl Gpu {
- /// Helper function to load and run the FWSEC-FRTS firmware and confirm that it has properly
- /// created the WPR2 region.
- ///
- /// TODO: this needs to be moved into a larger type responsible for booting the whole GSP
- /// (`GspBooter`?).
- fn run_fwsec_frts(
- dev: &device::Device<device::Bound>,
- falcon: &Falcon<Gsp>,
- bar: &Bar0,
- bios: &Vbios,
- fb_layout: &FbLayout,
- ) -> Result<()> {
- // Check that the WPR2 region does not already exists - if it does, we cannot run
- // FWSEC-FRTS until the GPU is reset.
- if regs::NV_PFB_PRI_MMU_WPR2_ADDR_HI::read(bar).higher_bound() != 0 {
- dev_err!(
- dev,
- "WPR2 region already exists - GPU needs to be reset to proceed\n"
- );
- return Err(EBUSY);
- }
+ pub(crate) fn new<'a>(
+ pdev: &'a pci::Device<device::Bound>,
+ devres_bar: Arc<Devres<Bar0>>,
+ bar: &'a Bar0,
+ ) -> impl PinInit<Self, Error> + 'a {
+ try_pin_init!(Self {
+ spec: Spec::new(bar).inspect(|spec| {
+ dev_info!(
+ pdev.as_ref(),
+ "NVIDIA (Chipset: {}, Architecture: {:?}, Revision: {})\n",
+ spec.chipset,
+ spec.chipset.arch(),
+ spec.revision
+ );
+ })?,
- let fwsec_frts = FwsecFirmware::new(
- dev,
- falcon,
- bar,
- bios,
- FwsecCommand::Frts {
- frts_addr: fb_layout.frts.start,
- frts_size: fb_layout.frts.end - fb_layout.frts.start,
+ // We must wait for GFW_BOOT completion before doing any significant setup on the GPU.
+ _: {
+ gfw::wait_gfw_boot_completion(bar)
+ .inspect_err(|_| dev_err!(pdev.as_ref(), "GFW boot did not complete"))?;
},
- )?;
- // Run FWSEC-FRTS to create the WPR2 region.
- fwsec_frts.run(dev, falcon, bar)?;
+ sysmem_flush: SysmemFlush::register(pdev.as_ref(), bar, spec.chipset)?,
- // SCRATCH_E contains the error code for FWSEC-FRTS.
- let frts_status = regs::NV_PBUS_SW_SCRATCH_0E::read(bar).frts_err_code();
- if frts_status != 0 {
- dev_err!(
- dev,
- "FWSEC-FRTS returned with error code {:#x}",
- frts_status
- );
+ gsp_falcon: Falcon::new(
+ pdev.as_ref(),
+ spec.chipset,
+ bar,
+ spec.chipset > Chipset::GA100,
+ )
+ .inspect(|falcon| falcon.clear_swgen0_intr(bar))?,
- return Err(EIO);
- }
+ sec2_falcon: Falcon::new(pdev.as_ref(), spec.chipset, bar, true)?,
- // Check that the WPR2 region has been created as we requested.
- let (wpr2_lo, wpr2_hi) = (
- regs::NV_PFB_PRI_MMU_WPR2_ADDR_LO::read(bar).lower_bound(),
- regs::NV_PFB_PRI_MMU_WPR2_ADDR_HI::read(bar).higher_bound(),
- );
+ gsp <- Gsp::new(),
- match (wpr2_lo, wpr2_hi) {
- (_, 0) => {
- dev_err!(dev, "WPR2 region not created after running FWSEC-FRTS\n");
+ _: { gsp.boot(pdev, bar, spec.chipset, gsp_falcon, sec2_falcon)? },
- Err(EIO)
- }
- (wpr2_lo, _) if wpr2_lo != fb_layout.frts.start => {
- dev_err!(
- dev,
- "WPR2 region created at unexpected address {:#x}; expected {:#x}\n",
- wpr2_lo,
- fb_layout.frts.start,
- );
-
- Err(EIO)
- }
- (wpr2_lo, wpr2_hi) => {
- dev_dbg!(dev, "WPR2: {:#x}-{:#x}\n", wpr2_lo, wpr2_hi);
- dev_dbg!(dev, "GPU instance built\n");
-
- Ok(())
- }
- }
+ bar: devres_bar,
+ })
}
- pub(crate) fn new(
- pdev: &pci::Device<device::Bound>,
- devres_bar: Arc<Devres<Bar0>>,
- ) -> Result<impl PinInit<Self>> {
- let bar = devres_bar.access(pdev.as_ref())?;
- let spec = Spec::new(bar)?;
- let fw = Firmware::new(pdev.as_ref(), spec.chipset, FIRMWARE_VERSION)?;
-
- dev_info!(
- pdev.as_ref(),
- "NVIDIA (Chipset: {}, Architecture: {:?}, Revision: {})\n",
- spec.chipset,
- spec.chipset.arch(),
- spec.revision
- );
-
- // We must wait for GFW_BOOT completion before doing any significant setup on the GPU.
- gfw::wait_gfw_boot_completion(bar)
- .inspect_err(|_| dev_err!(pdev.as_ref(), "GFW boot did not complete"))?;
-
- let sysmem_flush = SysmemFlush::register(pdev.as_ref(), bar, spec.chipset)?;
-
- let gsp_falcon = Falcon::<Gsp>::new(
- pdev.as_ref(),
- spec.chipset,
- bar,
- spec.chipset > Chipset::GA100,
- )?;
- gsp_falcon.clear_swgen0_intr(bar);
-
- let _sec2_falcon = Falcon::<Sec2>::new(pdev.as_ref(), spec.chipset, bar, true)?;
-
- let fb_layout = FbLayout::new(spec.chipset, bar)?;
- dev_dbg!(pdev.as_ref(), "{:#x?}\n", fb_layout);
-
- let bios = Vbios::new(pdev, bar)?;
-
- Self::run_fwsec_frts(pdev.as_ref(), &gsp_falcon, bar, &bios, &fb_layout)?;
-
- Ok(pin_init!(Self {
- spec,
- bar: devres_bar,
- fw,
- sysmem_flush,
- }))
+ /// Called when the corresponding [`Device`](device::Device) is unbound.
+ ///
+ /// Note: This method must only be called from `Driver::unbind`.
+ pub(crate) fn unbind(&self, dev: &device::Device<device::Core>) {
+ kernel::warn_on!(self
+ .bar
+ .access(dev)
+ .inspect(|bar| self.sysmem_flush.unregister(bar))
+ .is_err());
}
}
diff --git a/drivers/gpu/nova-core/gsp.rs b/drivers/gpu/nova-core/gsp.rs
new file mode 100644
index 000000000000..64e472e7a9d3
--- /dev/null
+++ b/drivers/gpu/nova-core/gsp.rs
@@ -0,0 +1,22 @@
+// SPDX-License-Identifier: GPL-2.0
+
+mod boot;
+
+use kernel::prelude::*;
+
+mod fw;
+
+pub(crate) const GSP_PAGE_SHIFT: usize = 12;
+pub(crate) const GSP_PAGE_SIZE: usize = 1 << GSP_PAGE_SHIFT;
+
+/// GSP runtime data.
+///
+/// This is an empty pinned placeholder for now.
+#[pin_data]
+pub(crate) struct Gsp {}
+
+impl Gsp {
+ pub(crate) fn new() -> impl PinInit<Self> {
+ pin_init!(Self {})
+ }
+}
diff --git a/drivers/gpu/nova-core/gsp/boot.rs b/drivers/gpu/nova-core/gsp/boot.rs
new file mode 100644
index 000000000000..2800f3aee37d
--- /dev/null
+++ b/drivers/gpu/nova-core/gsp/boot.rs
@@ -0,0 +1,137 @@
+// SPDX-License-Identifier: GPL-2.0
+
+use kernel::device;
+use kernel::pci;
+use kernel::prelude::*;
+
+use crate::driver::Bar0;
+use crate::falcon::{gsp::Gsp, sec2::Sec2, Falcon};
+use crate::fb::FbLayout;
+use crate::firmware::{
+ booter::{BooterFirmware, BooterKind},
+ fwsec::{FwsecCommand, FwsecFirmware},
+ gsp::GspFirmware,
+ FIRMWARE_VERSION,
+};
+use crate::gpu::Chipset;
+use crate::regs;
+use crate::vbios::Vbios;
+
+impl super::Gsp {
+ /// Helper function to load and run the FWSEC-FRTS firmware and confirm that it has properly
+ /// created the WPR2 region.
+ fn run_fwsec_frts(
+ dev: &device::Device<device::Bound>,
+ falcon: &Falcon<Gsp>,
+ bar: &Bar0,
+ bios: &Vbios,
+ fb_layout: &FbLayout,
+ ) -> Result<()> {
+ // Check that the WPR2 region does not already exists - if it does, we cannot run
+ // FWSEC-FRTS until the GPU is reset.
+ if regs::NV_PFB_PRI_MMU_WPR2_ADDR_HI::read(bar).higher_bound() != 0 {
+ dev_err!(
+ dev,
+ "WPR2 region already exists - GPU needs to be reset to proceed\n"
+ );
+ return Err(EBUSY);
+ }
+
+ let fwsec_frts = FwsecFirmware::new(
+ dev,
+ falcon,
+ bar,
+ bios,
+ FwsecCommand::Frts {
+ frts_addr: fb_layout.frts.start,
+ frts_size: fb_layout.frts.end - fb_layout.frts.start,
+ },
+ )?;
+
+ // Run FWSEC-FRTS to create the WPR2 region.
+ fwsec_frts.run(dev, falcon, bar)?;
+
+ // SCRATCH_E contains the error code for FWSEC-FRTS.
+ let frts_status = regs::NV_PBUS_SW_SCRATCH_0E_FRTS_ERR::read(bar).frts_err_code();
+ if frts_status != 0 {
+ dev_err!(
+ dev,
+ "FWSEC-FRTS returned with error code {:#x}",
+ frts_status
+ );
+
+ return Err(EIO);
+ }
+
+ // Check that the WPR2 region has been created as we requested.
+ let (wpr2_lo, wpr2_hi) = (
+ regs::NV_PFB_PRI_MMU_WPR2_ADDR_LO::read(bar).lower_bound(),
+ regs::NV_PFB_PRI_MMU_WPR2_ADDR_HI::read(bar).higher_bound(),
+ );
+
+ match (wpr2_lo, wpr2_hi) {
+ (_, 0) => {
+ dev_err!(dev, "WPR2 region not created after running FWSEC-FRTS\n");
+
+ Err(EIO)
+ }
+ (wpr2_lo, _) if wpr2_lo != fb_layout.frts.start => {
+ dev_err!(
+ dev,
+ "WPR2 region created at unexpected address {:#x}; expected {:#x}\n",
+ wpr2_lo,
+ fb_layout.frts.start,
+ );
+
+ Err(EIO)
+ }
+ (wpr2_lo, wpr2_hi) => {
+ dev_dbg!(dev, "WPR2: {:#x}-{:#x}\n", wpr2_lo, wpr2_hi);
+ dev_dbg!(dev, "GPU instance built\n");
+
+ Ok(())
+ }
+ }
+ }
+
+ /// Attempt to boot the GSP.
+ ///
+ /// This is a GPU-dependent and complex procedure that involves loading firmware files from
+ /// user-space, patching them with signatures, and building firmware-specific intricate data
+ /// structures that the GSP will use at runtime.
+ ///
+ /// Upon return, the GSP is up and running, and its runtime object given as return value.
+ pub(crate) fn boot(
+ self: Pin<&mut Self>,
+ pdev: &pci::Device<device::Bound>,
+ bar: &Bar0,
+ chipset: Chipset,
+ gsp_falcon: &Falcon<Gsp>,
+ sec2_falcon: &Falcon<Sec2>,
+ ) -> Result {
+ let dev = pdev.as_ref();
+
+ let bios = Vbios::new(dev, bar)?;
+
+ let _gsp_fw = KBox::pin_init(
+ GspFirmware::new(dev, chipset, FIRMWARE_VERSION)?,
+ GFP_KERNEL,
+ )?;
+
+ let fb_layout = FbLayout::new(chipset, bar)?;
+ dev_dbg!(dev, "{:#x?}\n", fb_layout);
+
+ Self::run_fwsec_frts(dev, gsp_falcon, bar, &bios, &fb_layout)?;
+
+ let _booter_loader = BooterFirmware::new(
+ dev,
+ BooterKind::Loader,
+ chipset,
+ FIRMWARE_VERSION,
+ sec2_falcon,
+ bar,
+ )?;
+
+ Ok(())
+ }
+}
diff --git a/drivers/gpu/nova-core/gsp/fw.rs b/drivers/gpu/nova-core/gsp/fw.rs
new file mode 100644
index 000000000000..34226dd00982
--- /dev/null
+++ b/drivers/gpu/nova-core/gsp/fw.rs
@@ -0,0 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0
+
+mod r570_144;
+
+// Alias to avoid repeating the version number with every use.
+#[expect(unused)]
+use r570_144 as bindings;
diff --git a/drivers/gpu/nova-core/gsp/fw/r570_144.rs b/drivers/gpu/nova-core/gsp/fw/r570_144.rs
new file mode 100644
index 000000000000..35cb0370a7c9
--- /dev/null
+++ b/drivers/gpu/nova-core/gsp/fw/r570_144.rs
@@ -0,0 +1,29 @@
+// SPDX-License-Identifier: GPL-2.0
+
+//! Firmware bindings.
+//!
+//! Imports the generated bindings by `bindgen`.
+//!
+//! This module may not be directly used. Please abstract or re-export the needed symbols in the
+//! parent module instead.
+
+#![cfg_attr(test, allow(deref_nullptr))]
+#![cfg_attr(test, allow(unaligned_references))]
+#![cfg_attr(test, allow(unsafe_op_in_unsafe_fn))]
+#![allow(
+ dead_code,
+ unused_imports,
+ clippy::all,
+ clippy::undocumented_unsafe_blocks,
+ clippy::ptr_as_ptr,
+ clippy::ref_as_ptr,
+ missing_docs,
+ non_camel_case_types,
+ non_upper_case_globals,
+ non_snake_case,
+ improper_ctypes,
+ unreachable_pub,
+ unsafe_op_in_unsafe_fn
+)]
+use kernel::ffi;
+include!("r570_144/bindings.rs");
diff --git a/drivers/gpu/nova-core/gsp/fw/r570_144/bindings.rs b/drivers/gpu/nova-core/gsp/fw/r570_144/bindings.rs
new file mode 100644
index 000000000000..cec594032515
--- /dev/null
+++ b/drivers/gpu/nova-core/gsp/fw/r570_144/bindings.rs
@@ -0,0 +1 @@
+// SPDX-License-Identifier: GPL-2.0
diff --git a/drivers/gpu/nova-core/nova_core.rs b/drivers/gpu/nova-core/nova_core.rs
index cb2bbb30cba1..fffcaee2249f 100644
--- a/drivers/gpu/nova-core/nova_core.rs
+++ b/drivers/gpu/nova-core/nova_core.rs
@@ -9,6 +9,7 @@ mod fb;
mod firmware;
mod gfw;
mod gpu;
+mod gsp;
mod regs;
mod util;
mod vbios;
diff --git a/drivers/gpu/nova-core/regs.rs b/drivers/gpu/nova-core/regs.rs
index d49fddf6a3c6..206dab2e1335 100644
--- a/drivers/gpu/nova-core/regs.rs
+++ b/drivers/gpu/nova-core/regs.rs
@@ -5,11 +5,11 @@
#![allow(non_camel_case_types)]
#[macro_use]
-mod macros;
+pub(crate) mod macros;
use crate::falcon::{
DmaTrfCmdSize, FalconCoreRev, FalconCoreRevSubversion, FalconFbifMemType, FalconFbifTarget,
- FalconModSelAlgo, FalconSecurityModel, PeregrineCoreSelect,
+ FalconModSelAlgo, FalconSecurityModel, PFalcon2Base, PFalconBase, PeregrineCoreSelect,
};
use crate::gpu::{Architecture, Chipset};
use kernel::prelude::*;
@@ -28,7 +28,7 @@ impl NV_PMC_BOOT_0 {
/// Combines `architecture_0` and `architecture_1` to obtain the architecture of the chip.
pub(crate) fn architecture(self) -> Result<Architecture> {
Architecture::try_from(
- self.architecture_0() | (self.architecture_1() << Self::ARCHITECTURE_0.len()),
+ self.architecture_0() | (self.architecture_1() << Self::ARCHITECTURE_0_RANGE.len()),
)
}
@@ -36,7 +36,8 @@ impl NV_PMC_BOOT_0 {
pub(crate) fn chipset(self) -> Result<Chipset> {
self.architecture()
.map(|arch| {
- ((arch as u32) << Self::IMPLEMENTATION.len()) | u32::from(self.implementation())
+ ((arch as u32) << Self::IMPLEMENTATION_RANGE.len())
+ | u32::from(self.implementation())
})
.and_then(Chipset::try_from)
}
@@ -44,8 +45,10 @@ impl NV_PMC_BOOT_0 {
// PBUS
-// TODO[REGA]: this is an array of registers.
-register!(NV_PBUS_SW_SCRATCH_0E@0x00001438 {
+register!(NV_PBUS_SW_SCRATCH @ 0x00001400[64] {});
+
+register!(NV_PBUS_SW_SCRATCH_0E_FRTS_ERR => NV_PBUS_SW_SCRATCH[0xe],
+ "scratch register 0xe used as FRTS firmware error code" {
31:16 frts_err_code as u16;
});
@@ -123,13 +126,12 @@ register!(NV_PGC6_AON_SECURE_SCRATCH_GROUP_05_PRIV_LEVEL_MASK @ 0x00118128,
0:0 read_protection_level0 as bool, "Set after FWSEC lowers its protection level";
});
-// TODO[REGA]: This is an array of registers.
-register!(NV_PGC6_AON_SECURE_SCRATCH_GROUP_05 @ 0x00118234 {
- 31:0 value as u32;
-});
+// OpenRM defines this as a register array, but doesn't specify its size and only uses its first
+// element. Be conservative until we know the actual size or need to use more registers.
+register!(NV_PGC6_AON_SECURE_SCRATCH_GROUP_05 @ 0x00118234[1] {});
register!(
- NV_PGC6_AON_SECURE_SCRATCH_GROUP_05_0_GFW_BOOT => NV_PGC6_AON_SECURE_SCRATCH_GROUP_05,
+ NV_PGC6_AON_SECURE_SCRATCH_GROUP_05_0_GFW_BOOT => NV_PGC6_AON_SECURE_SCRATCH_GROUP_05[0],
"Scratch group 05 register 0 used as GFW boot progress indicator" {
7:0 progress as u8, "Progress of GFW boot (0xff means completed)";
}
@@ -180,38 +182,40 @@ impl NV_PDISP_VGA_WORKSPACE_BASE {
// FUSE
-register!(NV_FUSE_OPT_FPF_NVDEC_UCODE1_VERSION @ 0x00824100 {
+pub(crate) const NV_FUSE_OPT_FPF_SIZE: usize = 16;
+
+register!(NV_FUSE_OPT_FPF_NVDEC_UCODE1_VERSION @ 0x00824100[NV_FUSE_OPT_FPF_SIZE] {
15:0 data as u16;
});
-register!(NV_FUSE_OPT_FPF_SEC2_UCODE1_VERSION @ 0x00824140 {
+register!(NV_FUSE_OPT_FPF_SEC2_UCODE1_VERSION @ 0x00824140[NV_FUSE_OPT_FPF_SIZE] {
15:0 data as u16;
});
-register!(NV_FUSE_OPT_FPF_GSP_UCODE1_VERSION @ 0x008241c0 {
+register!(NV_FUSE_OPT_FPF_GSP_UCODE1_VERSION @ 0x008241c0[NV_FUSE_OPT_FPF_SIZE] {
15:0 data as u16;
});
// PFALCON
-register!(NV_PFALCON_FALCON_IRQSCLR @ +0x00000004 {
+register!(NV_PFALCON_FALCON_IRQSCLR @ PFalconBase[0x00000004] {
4:4 halt as bool;
6:6 swgen0 as bool;
});
-register!(NV_PFALCON_FALCON_MAILBOX0 @ +0x00000040 {
+register!(NV_PFALCON_FALCON_MAILBOX0 @ PFalconBase[0x00000040] {
31:0 value as u32;
});
-register!(NV_PFALCON_FALCON_MAILBOX1 @ +0x00000044 {
+register!(NV_PFALCON_FALCON_MAILBOX1 @ PFalconBase[0x00000044] {
31:0 value as u32;
});
-register!(NV_PFALCON_FALCON_RM @ +0x00000084 {
+register!(NV_PFALCON_FALCON_RM @ PFalconBase[0x00000084] {
31:0 value as u32;
});
-register!(NV_PFALCON_FALCON_HWCFG2 @ +0x000000f4 {
+register!(NV_PFALCON_FALCON_HWCFG2 @ PFalconBase[0x000000f4] {
10:10 riscv as bool;
12:12 mem_scrubbing as bool, "Set to 0 after memory scrubbing is completed";
31:31 reset_ready as bool, "Signal indicating that reset is completed (GA102+)";
@@ -224,17 +228,17 @@ impl NV_PFALCON_FALCON_HWCFG2 {
}
}
-register!(NV_PFALCON_FALCON_CPUCTL @ +0x00000100 {
+register!(NV_PFALCON_FALCON_CPUCTL @ PFalconBase[0x00000100] {
1:1 startcpu as bool;
4:4 halted as bool;
6:6 alias_en as bool;
});
-register!(NV_PFALCON_FALCON_BOOTVEC @ +0x00000104 {
+register!(NV_PFALCON_FALCON_BOOTVEC @ PFalconBase[0x00000104] {
31:0 value as u32;
});
-register!(NV_PFALCON_FALCON_DMACTL @ +0x0000010c {
+register!(NV_PFALCON_FALCON_DMACTL @ PFalconBase[0x0000010c] {
0:0 require_ctx as bool;
1:1 dmem_scrubbing as bool;
2:2 imem_scrubbing as bool;
@@ -242,15 +246,15 @@ register!(NV_PFALCON_FALCON_DMACTL @ +0x0000010c {
7:7 secure_stat as bool;
});
-register!(NV_PFALCON_FALCON_DMATRFBASE @ +0x00000110 {
+register!(NV_PFALCON_FALCON_DMATRFBASE @ PFalconBase[0x00000110] {
31:0 base as u32;
});
-register!(NV_PFALCON_FALCON_DMATRFMOFFS @ +0x00000114 {
+register!(NV_PFALCON_FALCON_DMATRFMOFFS @ PFalconBase[0x00000114] {
23:0 offs as u32;
});
-register!(NV_PFALCON_FALCON_DMATRFCMD @ +0x00000118 {
+register!(NV_PFALCON_FALCON_DMATRFCMD @ PFalconBase[0x00000118] {
0:0 full as bool;
1:1 idle as bool;
3:2 sec as u8;
@@ -261,60 +265,62 @@ register!(NV_PFALCON_FALCON_DMATRFCMD @ +0x00000118 {
16:16 set_dmtag as u8;
});
-register!(NV_PFALCON_FALCON_DMATRFFBOFFS @ +0x0000011c {
+register!(NV_PFALCON_FALCON_DMATRFFBOFFS @ PFalconBase[0x0000011c] {
31:0 offs as u32;
});
-register!(NV_PFALCON_FALCON_DMATRFBASE1 @ +0x00000128 {
+register!(NV_PFALCON_FALCON_DMATRFBASE1 @ PFalconBase[0x00000128] {
8:0 base as u16;
});
-register!(NV_PFALCON_FALCON_HWCFG1 @ +0x0000012c {
+register!(NV_PFALCON_FALCON_HWCFG1 @ PFalconBase[0x0000012c] {
3:0 core_rev as u8 ?=> FalconCoreRev, "Core revision";
5:4 security_model as u8 ?=> FalconSecurityModel, "Security model";
7:6 core_rev_subversion as u8 ?=> FalconCoreRevSubversion, "Core revision subversion";
});
-register!(NV_PFALCON_FALCON_CPUCTL_ALIAS @ +0x00000130 {
+register!(NV_PFALCON_FALCON_CPUCTL_ALIAS @ PFalconBase[0x00000130] {
1:1 startcpu as bool;
});
// Actually known as `NV_PSEC_FALCON_ENGINE` and `NV_PGSP_FALCON_ENGINE` depending on the falcon
// instance.
-register!(NV_PFALCON_FALCON_ENGINE @ +0x000003c0 {
+register!(NV_PFALCON_FALCON_ENGINE @ PFalconBase[0x000003c0] {
0:0 reset as bool;
});
-// TODO[REGA]: this is an array of registers.
-register!(NV_PFALCON_FBIF_TRANSCFG @ +0x00000600 {
+register!(NV_PFALCON_FBIF_TRANSCFG @ PFalconBase[0x00000600[8]] {
1:0 target as u8 ?=> FalconFbifTarget;
2:2 mem_type as bool => FalconFbifMemType;
});
-register!(NV_PFALCON_FBIF_CTL @ +0x00000624 {
+register!(NV_PFALCON_FBIF_CTL @ PFalconBase[0x00000624] {
7:7 allow_phys_no_ctx as bool;
});
-register!(NV_PFALCON2_FALCON_MOD_SEL @ +0x00001180 {
+/* PFALCON2 */
+
+register!(NV_PFALCON2_FALCON_MOD_SEL @ PFalcon2Base[0x00000180] {
7:0 algo as u8 ?=> FalconModSelAlgo;
});
-register!(NV_PFALCON2_FALCON_BROM_CURR_UCODE_ID @ +0x00001198 {
+register!(NV_PFALCON2_FALCON_BROM_CURR_UCODE_ID @ PFalcon2Base[0x00000198] {
7:0 ucode_id as u8;
});
-register!(NV_PFALCON2_FALCON_BROM_ENGIDMASK @ +0x0000119c {
+register!(NV_PFALCON2_FALCON_BROM_ENGIDMASK @ PFalcon2Base[0x0000019c] {
31:0 value as u32;
});
-// TODO[REGA]: this is an array of registers.
-register!(NV_PFALCON2_FALCON_BROM_PARAADDR @ +0x00001210 {
+// OpenRM defines this as a register array, but doesn't specify its size and only uses its first
+// element. Be conservative until we know the actual size or need to use more registers.
+register!(NV_PFALCON2_FALCON_BROM_PARAADDR @ PFalcon2Base[0x00000210[1]] {
31:0 value as u32;
});
// PRISCV
-register!(NV_PRISCV_RISCV_BCR_CTRL @ +0x00001668 {
+register!(NV_PRISCV_RISCV_BCR_CTRL @ PFalconBase[0x00001668] {
0:0 valid as bool;
4:4 core_select as bool => PeregrineCoreSelect;
8:8 br_fetch as bool;
diff --git a/drivers/gpu/nova-core/regs/macros.rs b/drivers/gpu/nova-core/regs/macros.rs
index a3e6de1779d4..8058e1696df9 100644
--- a/drivers/gpu/nova-core/regs/macros.rs
+++ b/drivers/gpu/nova-core/regs/macros.rs
@@ -1,17 +1,27 @@
// SPDX-License-Identifier: GPL-2.0
-//! Macro to define register layout and accessors.
+//! `register!` macro to define register layout and accessors.
//!
//! A single register typically includes several fields, which are accessed through a combination
//! of bit-shift and mask operations that introduce a class of potential mistakes, notably because
//! not all possible field values are necessarily valid.
//!
-//! The macro in this module allow to define, using an intruitive and readable syntax, a dedicated
-//! type for each register with its own field accessors that can return an error is a field's value
-//! is invalid.
+//! The `register!` macro in this module provides an intuitive and readable syntax for defining a
+//! dedicated type for each register. Each such type comes with its own field accessors that can
+//! return an error if a field's value is invalid.
-/// Defines a dedicated type for a register with an absolute offset, alongside with getter and
-/// setter methods for its fields and methods to read and write it from an `Io` region.
+/// Trait providing a base address to be added to the offset of a relative register to obtain
+/// its actual offset.
+///
+/// The `T` generic argument is used to distinguish which base to use, in case a type provides
+/// several bases. It is given to the `register!` macro to restrict the use of the register to
+/// implementors of this particular variant.
+pub(crate) trait RegisterBase<T> {
+ const BASE: usize;
+}
+
+/// Defines a dedicated type for a register with an absolute offset, including getter and setter
+/// methods for its fields and methods to read and write it from an `Io` region.
///
/// Example:
///
@@ -24,7 +34,7 @@
/// ```
///
/// This defines a `BOOT_0` type which can be read or written from offset `0x100` of an `Io`
-/// region. It is composed of 3 fields, for instance `minor_revision` is made of the 4 less
+/// region. It is composed of 3 fields, for instance `minor_revision` is made of the 4 least
/// significant bits of the register. Each field can be accessed and modified using accessor
/// methods:
///
@@ -33,130 +43,344 @@
/// let boot0 = BOOT_0::read(&bar);
/// pr_info!("chip revision: {}.{}", boot0.major_revision(), boot0.minor_revision());
///
-/// // `Chipset::try_from` will be called with the value of the field and returns an error if the
-/// // value is invalid.
+/// // `Chipset::try_from` is called with the value of the `chipset` field and returns an
+/// // error if it is invalid.
/// let chipset = boot0.chipset()?;
///
/// // Update some fields and write the value back.
/// boot0.set_major_revision(3).set_minor_revision(10).write(&bar);
///
-/// // Or just read and update the register in a single step:
+/// // Or, just read and update the register in a single step:
/// BOOT_0::alter(&bar, |r| r.set_major_revision(3).set_minor_revision(10));
/// ```
///
-/// Fields can be defined as follows:
+/// Fields are defined as follows:
///
-/// - `as <type>` simply returns the field value casted as the requested integer type, typically
-/// `u32`, `u16`, `u8` or `bool`. Note that `bool` fields must have a range of 1 bit.
+/// - `as <type>` simply returns the field value casted to <type>, typically `u32`, `u16`, `u8` or
+/// `bool`. Note that `bool` fields must have a range of 1 bit.
/// - `as <type> => <into_type>` calls `<into_type>`'s `From::<<type>>` implementation and returns
/// the result.
/// - `as <type> ?=> <try_into_type>` calls `<try_into_type>`'s `TryFrom::<<type>>` implementation
-/// and returns the result. This is useful on fields for which not all values are value.
+/// and returns the result. This is useful with fields for which not all values are valid.
///
/// The documentation strings are optional. If present, they will be added to the type's
/// definition, or the field getter and setter methods they are attached to.
///
-/// Putting a `+` before the address of the register makes it relative to a base: the `read` and
-/// `write` methods take a `base` argument that is added to the specified address before access,
-/// and `try_read` and `try_write` methods are also created, allowing access with offsets unknown
-/// at compile-time:
+/// It is also possible to create a alias register by using the `=> ALIAS` syntax. This is useful
+/// for cases where a register's interpretation depends on the context:
///
/// ```no_run
-/// register!(CPU_CTL @ +0x0000010, "CPU core control" {
-/// 0:0 start as bool, "Start the CPU core";
+/// register!(SCRATCH @ 0x00000200, "Scratch register" {
+/// 31:0 value as u32, "Raw value";
/// });
///
-/// // Flip the `start` switch for the CPU core which base address is at `CPU_BASE`.
-/// let cpuctl = CPU_CTL::read(&bar, CPU_BASE);
-/// pr_info!("CPU CTL: {:#x}", cpuctl);
-/// cpuctl.set_start(true).write(&bar, CPU_BASE);
+/// register!(SCRATCH_BOOT_STATUS => SCRATCH, "Boot status of the firmware" {
+/// 0:0 completed as bool, "Whether the firmware has completed booting";
+/// });
/// ```
///
-/// It is also possible to create a alias register by using the `=> ALIAS` syntax. This is useful
-/// for cases where a register's interpretation depends on the context:
+/// In this example, `SCRATCH_0_BOOT_STATUS` uses the same I/O address as `SCRATCH`, while also
+/// providing its own `completed` field.
+///
+/// ## Relative registers
+///
+/// A register can be defined as being accessible from a fixed offset of a provided base. For
+/// instance, imagine the following I/O space:
+///
+/// ```text
+/// +-----------------------------+
+/// | ... |
+/// | |
+/// 0x100--->+------------CPU0-------------+
+/// | |
+/// 0x110--->+-----------------------------+
+/// | CPU_CTL |
+/// +-----------------------------+
+/// | ... |
+/// | |
+/// | |
+/// 0x200--->+------------CPU1-------------+
+/// | |
+/// 0x210--->+-----------------------------+
+/// | CPU_CTL |
+/// +-----------------------------+
+/// | ... |
+/// +-----------------------------+
+/// ```
+///
+/// `CPU0` and `CPU1` both have a `CPU_CTL` register that starts at offset `0x10` of their I/O
+/// space segment. Since both instances of `CPU_CTL` share the same layout, we don't want to define
+/// them twice and would prefer a way to select which one to use from a single definition
+///
+/// This can be done using the `Base[Offset]` syntax when specifying the register's address.
+///
+/// `Base` is an arbitrary type (typically a ZST) to be used as a generic parameter of the
+/// [`RegisterBase`] trait to provide the base as a constant, i.e. each type providing a base for
+/// this register needs to implement `RegisterBase<Base>`. Here is the above example translated
+/// into code:
///
/// ```no_run
-/// register!(SCRATCH_0 @ 0x0000100, "Scratch register 0" {
-/// 31:0 value as u32, "Raw value";
+/// // Type used to identify the base.
+/// pub(crate) struct CpuCtlBase;
///
-/// register!(SCRATCH_0_BOOT_STATUS => SCRATCH_0, "Boot status of the firmware" {
-/// 0:0 completed as bool, "Whether the firmware has completed booting";
+/// // ZST describing `CPU0`.
+/// struct Cpu0;
+/// impl RegisterBase<CpuCtlBase> for Cpu0 {
+/// const BASE: usize = 0x100;
+/// }
+/// // Singleton of `CPU0` used to identify it.
+/// const CPU0: Cpu0 = Cpu0;
+///
+/// // ZST describing `CPU1`.
+/// struct Cpu1;
+/// impl RegisterBase<CpuCtlBase> for Cpu1 {
+/// const BASE: usize = 0x200;
+/// }
+/// // Singleton of `CPU1` used to identify it.
+/// const CPU1: Cpu1 = Cpu1;
+///
+/// // This makes `CPU_CTL` accessible from all implementors of `RegisterBase<CpuCtlBase>`.
+/// register!(CPU_CTL @ CpuCtlBase[0x10], "CPU core control" {
+/// 0:0 start as bool, "Start the CPU core";
+/// });
+///
+/// // The `read`, `write` and `alter` methods of relative registers take an extra `base` argument
+/// // that is used to resolve its final address by adding its `BASE` to the offset of the
+/// // register.
+///
+/// // Start `CPU0`.
+/// CPU_CTL::alter(bar, &CPU0, |r| r.set_start(true));
+///
+/// // Start `CPU1`.
+/// CPU_CTL::alter(bar, &CPU1, |r| r.set_start(true));
+///
+/// // Aliases can also be defined for relative register.
+/// register!(CPU_CTL_ALIAS => CpuCtlBase[CPU_CTL], "Alias to CPU core control" {
+/// 1:1 alias_start as bool, "Start the aliased CPU core";
+/// });
+///
+/// // Start the aliased `CPU0`.
+/// CPU_CTL_ALIAS::alter(bar, &CPU0, |r| r.set_alias_start(true));
/// ```
///
-/// In this example, `SCRATCH_0_BOOT_STATUS` uses the same I/O address as `SCRATCH_0`, while also
-/// providing its own `completed` method.
+/// ## Arrays of registers
+///
+/// Some I/O areas contain consecutive values that can be interpreted in the same way. These areas
+/// can be defined as an array of identical registers, allowing them to be accessed by index with
+/// compile-time or runtime bound checking. Simply define their address as `Address[Size]`, and add
+/// an `idx` parameter to their `read`, `write` and `alter` methods:
+///
+/// ```no_run
+/// # fn no_run() -> Result<(), Error> {
+/// # fn get_scratch_idx() -> usize {
+/// # 0x15
+/// # }
+/// // Array of 64 consecutive registers with the same layout starting at offset `0x80`.
+/// register!(SCRATCH @ 0x00000080[64], "Scratch registers" {
+/// 31:0 value as u32;
+/// });
+///
+/// // Read scratch register 0, i.e. I/O address `0x80`.
+/// let scratch_0 = SCRATCH::read(bar, 0).value();
+/// // Read scratch register 15, i.e. I/O address `0x80 + (15 * 4)`.
+/// let scratch_15 = SCRATCH::read(bar, 15).value();
+///
+/// // This is out of bounds and won't build.
+/// // let scratch_128 = SCRATCH::read(bar, 128).value();
+///
+/// // Runtime-obtained array index.
+/// let scratch_idx = get_scratch_idx();
+/// // Access on a runtime index returns an error if it is out-of-bounds.
+/// let some_scratch = SCRATCH::try_read(bar, scratch_idx)?.value();
+///
+/// // Alias to a particular register in an array.
+/// // Here `SCRATCH[8]` is used to convey the firmware exit code.
+/// register!(FIRMWARE_STATUS => SCRATCH[8], "Firmware exit status code" {
+/// 7:0 status as u8;
+/// });
+///
+/// let status = FIRMWARE_STATUS::read(bar).status();
+///
+/// // Non-contiguous register arrays can be defined by adding a stride parameter.
+/// // Here, each of the 16 registers of the array are separated by 8 bytes, meaning that the
+/// // registers of the two declarations below are interleaved.
+/// register!(SCRATCH_INTERLEAVED_0 @ 0x000000c0[16 ; 8], "Scratch registers bank 0" {
+/// 31:0 value as u32;
+/// });
+/// register!(SCRATCH_INTERLEAVED_1 @ 0x000000c4[16 ; 8], "Scratch registers bank 1" {
+/// 31:0 value as u32;
+/// });
+/// # Ok(())
+/// # }
+/// ```
+///
+/// ## Relative arrays of registers
+///
+/// Combining the two features described in the sections above, arrays of registers accessible from
+/// a base can also be defined:
+///
+/// ```no_run
+/// # fn no_run() -> Result<(), Error> {
+/// # fn get_scratch_idx() -> usize {
+/// # 0x15
+/// # }
+/// // Type used as parameter of `RegisterBase` to specify the base.
+/// pub(crate) struct CpuCtlBase;
+///
+/// // ZST describing `CPU0`.
+/// struct Cpu0;
+/// impl RegisterBase<CpuCtlBase> for Cpu0 {
+/// const BASE: usize = 0x100;
+/// }
+/// // Singleton of `CPU0` used to identify it.
+/// const CPU0: Cpu0 = Cpu0;
+///
+/// // ZST describing `CPU1`.
+/// struct Cpu1;
+/// impl RegisterBase<CpuCtlBase> for Cpu1 {
+/// const BASE: usize = 0x200;
+/// }
+/// // Singleton of `CPU1` used to identify it.
+/// const CPU1: Cpu1 = Cpu1;
+///
+/// // 64 per-cpu scratch registers, arranged as an contiguous array.
+/// register!(CPU_SCRATCH @ CpuCtlBase[0x00000080[64]], "Per-CPU scratch registers" {
+/// 31:0 value as u32;
+/// });
+///
+/// let cpu0_scratch_0 = CPU_SCRATCH::read(bar, &Cpu0, 0).value();
+/// let cpu1_scratch_15 = CPU_SCRATCH::read(bar, &Cpu1, 15).value();
+///
+/// // This won't build.
+/// // let cpu0_scratch_128 = CPU_SCRATCH::read(bar, &Cpu0, 128).value();
+///
+/// // Runtime-obtained array index.
+/// let scratch_idx = get_scratch_idx();
+/// // Access on a runtime value returns an error if it is out-of-bounds.
+/// let cpu0_some_scratch = CPU_SCRATCH::try_read(bar, &Cpu0, scratch_idx)?.value();
+///
+/// // `SCRATCH[8]` is used to convey the firmware exit code.
+/// register!(CPU_FIRMWARE_STATUS => CpuCtlBase[CPU_SCRATCH[8]],
+/// "Per-CPU firmware exit status code" {
+/// 7:0 status as u8;
+/// });
+///
+/// let cpu0_status = CPU_FIRMWARE_STATUS::read(bar, &Cpu0).status();
+///
+/// // Non-contiguous register arrays can be defined by adding a stride parameter.
+/// // Here, each of the 16 registers of the array are separated by 8 bytes, meaning that the
+/// // registers of the two declarations below are interleaved.
+/// register!(CPU_SCRATCH_INTERLEAVED_0 @ CpuCtlBase[0x00000d00[16 ; 8]],
+/// "Scratch registers bank 0" {
+/// 31:0 value as u32;
+/// });
+/// register!(CPU_SCRATCH_INTERLEAVED_1 @ CpuCtlBase[0x00000d04[16 ; 8]],
+/// "Scratch registers bank 1" {
+/// 31:0 value as u32;
+/// });
+/// # Ok(())
+/// # }
+/// ```
macro_rules! register {
// Creates a register at a fixed offset of the MMIO space.
+ ($name:ident @ $offset:literal $(, $comment:literal)? { $($fields:tt)* } ) => {
+ register!(@core $name $(, $comment)? { $($fields)* } );
+ register!(@io_fixed $name @ $offset);
+ };
+
+ // Creates an alias register of fixed offset register `alias` with its own fields.
+ ($name:ident => $alias:ident $(, $comment:literal)? { $($fields:tt)* } ) => {
+ register!(@core $name $(, $comment)? { $($fields)* } );
+ register!(@io_fixed $name @ $alias::OFFSET);
+ };
+
+ // Creates a register at a relative offset from a base address provider.
+ ($name:ident @ $base:ty [ $offset:literal ] $(, $comment:literal)? { $($fields:tt)* } ) => {
+ register!(@core $name $(, $comment)? { $($fields)* } );
+ register!(@io_relative $name @ $base [ $offset ]);
+ };
+
+ // Creates an alias register of relative offset register `alias` with its own fields.
+ ($name:ident => $base:ty [ $alias:ident ] $(, $comment:literal)? { $($fields:tt)* }) => {
+ register!(@core $name $(, $comment)? { $($fields)* } );
+ register!(@io_relative $name @ $base [ $alias::OFFSET ]);
+ };
+
+ // Creates an array of registers at a fixed offset of the MMIO space.
(
- $name:ident @ $offset:literal $(, $comment:literal)? {
+ $name:ident @ $offset:literal [ $size:expr ; $stride:expr ] $(, $comment:literal)? {
$($fields:tt)*
}
) => {
- register!(@common $name @ $offset $(, $comment)?);
- register!(@field_accessors $name { $($fields)* });
- register!(@io $name @ $offset);
+ static_assert!(::core::mem::size_of::<u32>() <= $stride);
+ register!(@core $name $(, $comment)? { $($fields)* } );
+ register!(@io_array $name @ $offset [ $size ; $stride ]);
};
- // Creates a alias register of fixed offset register `alias` with its own fields.
+ // Shortcut for contiguous array of registers (stride == size of element).
(
- $name:ident => $alias:ident $(, $comment:literal)? {
+ $name:ident @ $offset:literal [ $size:expr ] $(, $comment:literal)? {
$($fields:tt)*
}
) => {
- register!(@common $name @ $alias::OFFSET $(, $comment)?);
- register!(@field_accessors $name { $($fields)* });
- register!(@io $name @ $alias::OFFSET);
+ register!($name @ $offset [ $size ; ::core::mem::size_of::<u32>() ] $(, $comment)? {
+ $($fields)*
+ } );
+ };
+
+ // Creates an array of registers at a relative offset from a base address provider.
+ (
+ $name:ident @ $base:ty [ $offset:literal [ $size:expr ; $stride:expr ] ]
+ $(, $comment:literal)? { $($fields:tt)* }
+ ) => {
+ static_assert!(::core::mem::size_of::<u32>() <= $stride);
+ register!(@core $name $(, $comment)? { $($fields)* } );
+ register!(@io_relative_array $name @ $base [ $offset [ $size ; $stride ] ]);
};
- // Creates a register at a relative offset from a base address.
+ // Shortcut for contiguous array of relative registers (stride == size of element).
(
- $name:ident @ + $offset:literal $(, $comment:literal)? {
+ $name:ident @ $base:ty [ $offset:literal [ $size:expr ] ] $(, $comment:literal)? {
$($fields:tt)*
}
) => {
- register!(@common $name @ $offset $(, $comment)?);
- register!(@field_accessors $name { $($fields)* });
- register!(@io$name @ + $offset);
+ register!($name @ $base [ $offset [ $size ; ::core::mem::size_of::<u32>() ] ]
+ $(, $comment)? { $($fields)* } );
};
- // Creates a alias register of relative offset register `alias` with its own fields.
+ // Creates an alias of register `idx` of relative array of registers `alias` with its own
+ // fields.
(
- $name:ident => + $alias:ident $(, $comment:literal)? {
+ $name:ident => $base:ty [ $alias:ident [ $idx:expr ] ] $(, $comment:literal)? {
$($fields:tt)*
}
) => {
- register!(@common $name @ $alias::OFFSET $(, $comment)?);
- register!(@field_accessors $name { $($fields)* });
- register!(@io $name @ + $alias::OFFSET);
+ static_assert!($idx < $alias::SIZE);
+ register!(@core $name $(, $comment)? { $($fields)* } );
+ register!(@io_relative $name @ $base [ $alias::OFFSET + $idx * $alias::STRIDE ] );
+ };
+
+ // Creates an alias of register `idx` of array of registers `alias` with its own fields.
+ // This rule belongs to the (non-relative) register arrays set, but needs to be put last
+ // to avoid it being interpreted in place of the relative register array alias rule.
+ ($name:ident => $alias:ident [ $idx:expr ] $(, $comment:literal)? { $($fields:tt)* }) => {
+ static_assert!($idx < $alias::SIZE);
+ register!(@core $name $(, $comment)? { $($fields)* } );
+ register!(@io_fixed $name @ $alias::OFFSET + $idx * $alias::STRIDE );
};
// All rules below are helpers.
- // Defines the wrapper `$name` type, as well as its relevant implementations (`Debug`, `BitOr`,
- // and conversion to regular `u32`).
- (@common $name:ident @ $offset:expr $(, $comment:literal)?) => {
+ // Defines the wrapper `$name` type, as well as its relevant implementations (`Debug`,
+ // `Default`, `BitOr`, and conversion to the value type) and field accessor methods.
+ (@core $name:ident $(, $comment:literal)? { $($fields:tt)* }) => {
$(
#[doc=$comment]
)?
#[repr(transparent)]
- #[derive(Clone, Copy, Default)]
+ #[derive(Clone, Copy)]
pub(crate) struct $name(u32);
- #[allow(dead_code)]
- impl $name {
- pub(crate) const OFFSET: usize = $offset;
- }
-
- // TODO[REGA]: display the raw hex value, then the value of all the fields. This requires
- // matching the fields, which will complexify the syntax considerably...
- impl ::core::fmt::Debug for $name {
- fn fmt(&self, f: &mut ::core::fmt::Formatter<'_>) -> ::core::fmt::Result {
- f.debug_tuple(stringify!($name))
- .field(&format_args!("0x{0:x}", &self.0))
- .finish()
- }
- }
-
impl ::core::ops::BitOr for $name {
type Output = Self;
@@ -170,6 +394,34 @@ macro_rules! register {
reg.0
}
}
+
+ register!(@fields_dispatcher $name { $($fields)* });
+ };
+
+ // Captures the fields and passes them to all the implementers that require field information.
+ //
+ // Used to simplify the matching rules for implementers, so they don't need to match the entire
+ // complex fields rule even though they only make use of part of it.
+ (@fields_dispatcher $name:ident {
+ $($hi:tt:$lo:tt $field:ident as $type:tt
+ $(?=> $try_into_type:ty)?
+ $(=> $into_type:ty)?
+ $(, $comment:literal)?
+ ;
+ )*
+ }
+ ) => {
+ register!(@field_accessors $name {
+ $(
+ $hi:$lo $field as $type
+ $(?=> $try_into_type)?
+ $(=> $into_type)?
+ $(, $comment)?
+ ;
+ )*
+ });
+ register!(@debug $name { $($field;)* });
+ register!(@default $name { $($field;)* });
};
// Defines all the field getter/methods methods for `$name`.
@@ -228,7 +480,7 @@ macro_rules! register {
$(, $comment:literal)?;
) => {
register!(
- @leaf_accessor $name $hi:$lo $field as bool
+ @leaf_accessor $name $hi:$lo $field
{ |f| <$into_type>::from(if f != 0 { true } else { false }) }
$into_type => $into_type $(, $comment)?;
);
@@ -246,7 +498,7 @@ macro_rules! register {
@field_accessor $name:ident $hi:tt:$lo:tt $field:ident as $type:tt ?=> $try_into_type:ty
$(, $comment:literal)?;
) => {
- register!(@leaf_accessor $name $hi:$lo $field as $type
+ register!(@leaf_accessor $name $hi:$lo $field
{ |f| <$try_into_type>::try_from(f as $type) } $try_into_type =>
::core::result::Result<
$try_into_type,
@@ -260,11 +512,11 @@ macro_rules! register {
@field_accessor $name:ident $hi:tt:$lo:tt $field:ident as $type:tt => $into_type:ty
$(, $comment:literal)?;
) => {
- register!(@leaf_accessor $name $hi:$lo $field as $type
+ register!(@leaf_accessor $name $hi:$lo $field
{ |f| <$into_type>::from(f as $type) } $into_type => $into_type $(, $comment)?;);
};
- // Shortcut for fields defined as non-`bool` without the `=>` or `?=>` syntax.
+ // Shortcut for non-boolean fields defined without the `=>` or `?=>` syntax.
(
@field_accessor $name:ident $hi:tt:$lo:tt $field:ident as $type:tt
$(, $comment:literal)?;
@@ -274,11 +526,11 @@ macro_rules! register {
// Generates the accessor methods for a single field.
(
- @leaf_accessor $name:ident $hi:tt:$lo:tt $field:ident as $type:ty
+ @leaf_accessor $name:ident $hi:tt:$lo:tt $field:ident
{ $process:expr } $to_type:ty => $res_type:ty $(, $comment:literal)?;
) => {
::kernel::macros::paste!(
- const [<$field:upper>]: ::core::ops::RangeInclusive<u8> = $lo..=$hi;
+ const [<$field:upper _RANGE>]: ::core::ops::RangeInclusive<u8> = $lo..=$hi;
const [<$field:upper _MASK>]: u32 = ((((1 << $hi) - 1) << 1) + 1) - ((1 << $lo) - 1);
const [<$field:upper _SHIFT>]: u32 = Self::[<$field:upper _MASK>].trailing_zeros();
);
@@ -287,7 +539,7 @@ macro_rules! register {
#[doc="Returns the value of this field:"]
#[doc=$comment]
)?
- #[inline]
+ #[inline(always)]
pub(crate) fn $field(self) -> $res_type {
::kernel::macros::paste!(
const MASK: u32 = $name::[<$field:upper _MASK>];
@@ -303,7 +555,7 @@ macro_rules! register {
#[doc="Sets the value of this field:"]
#[doc=$comment]
)?
- #[inline]
+ #[inline(always)]
pub(crate) fn [<set_ $field>](mut self, value: $to_type) -> Self {
const MASK: u32 = $name::[<$field:upper _MASK>];
const SHIFT: u32 = $name::[<$field:upper _SHIFT>];
@@ -315,25 +567,64 @@ macro_rules! register {
);
};
- // Creates the IO accessors for a fixed offset register.
- (@io $name:ident @ $offset:expr) => {
+ // Generates the `Debug` implementation for `$name`.
+ (@debug $name:ident { $($field:ident;)* }) => {
+ impl ::kernel::fmt::Debug for $name {
+ fn fmt(&self, f: &mut ::kernel::fmt::Formatter<'_>) -> ::kernel::fmt::Result {
+ f.debug_struct(stringify!($name))
+ .field("<raw>", &::kernel::prelude::fmt!("{:#x}", &self.0))
+ $(
+ .field(stringify!($field), &self.$field())
+ )*
+ .finish()
+ }
+ }
+ };
+
+ // Generates the `Default` implementation for `$name`.
+ (@default $name:ident { $($field:ident;)* }) => {
+ /// Returns a value for the register where all fields are set to their default value.
+ impl ::core::default::Default for $name {
+ fn default() -> Self {
+ #[allow(unused_mut)]
+ let mut value = Self(Default::default());
+
+ ::kernel::macros::paste!(
+ $(
+ value.[<set_ $field>](Default::default());
+ )*
+ );
+
+ value
+ }
+ }
+ };
+
+ // Generates the IO accessors for a fixed offset register.
+ (@io_fixed $name:ident @ $offset:expr) => {
#[allow(dead_code)]
impl $name {
- #[inline]
+ pub(crate) const OFFSET: usize = $offset;
+
+ /// Read the register from its address in `io`.
+ #[inline(always)]
pub(crate) fn read<const SIZE: usize, T>(io: &T) -> Self where
T: ::core::ops::Deref<Target = ::kernel::io::Io<SIZE>>,
{
Self(io.read32($offset))
}
- #[inline]
+ /// Write the value contained in `self` to the register address in `io`.
+ #[inline(always)]
pub(crate) fn write<const SIZE: usize, T>(self, io: &T) where
T: ::core::ops::Deref<Target = ::kernel::io::Io<SIZE>>,
{
io.write32(self.0, $offset)
}
- #[inline]
+ /// Read the register from its address in `io` and run `f` on its value to obtain a new
+ /// value to write back.
+ #[inline(always)]
pub(crate) fn alter<const SIZE: usize, T, F>(
io: &T,
f: F,
@@ -347,76 +638,322 @@ macro_rules! register {
}
};
- // Create the IO accessors for a relative offset register.
- (@io $name:ident @ + $offset:literal) => {
+ // Generates the IO accessors for a relative offset register.
+ (@io_relative $name:ident @ $base:ty [ $offset:expr ]) => {
+ #[allow(dead_code)]
+ impl $name {
+ pub(crate) const OFFSET: usize = $offset;
+
+ /// Read the register from `io`, using the base address provided by `base` and adding
+ /// the register's offset to it.
+ #[inline(always)]
+ pub(crate) fn read<const SIZE: usize, T, B>(
+ io: &T,
+ #[allow(unused_variables)]
+ base: &B,
+ ) -> Self where
+ T: ::core::ops::Deref<Target = ::kernel::io::Io<SIZE>>,
+ B: crate::regs::macros::RegisterBase<$base>,
+ {
+ const OFFSET: usize = $name::OFFSET;
+
+ let value = io.read32(
+ <B as crate::regs::macros::RegisterBase<$base>>::BASE + OFFSET
+ );
+
+ Self(value)
+ }
+
+ /// Write the value contained in `self` to `io`, using the base address provided by
+ /// `base` and adding the register's offset to it.
+ #[inline(always)]
+ pub(crate) fn write<const SIZE: usize, T, B>(
+ self,
+ io: &T,
+ #[allow(unused_variables)]
+ base: &B,
+ ) where
+ T: ::core::ops::Deref<Target = ::kernel::io::Io<SIZE>>,
+ B: crate::regs::macros::RegisterBase<$base>,
+ {
+ const OFFSET: usize = $name::OFFSET;
+
+ io.write32(
+ self.0,
+ <B as crate::regs::macros::RegisterBase<$base>>::BASE + OFFSET
+ );
+ }
+
+ /// Read the register from `io`, using the base address provided by `base` and adding
+ /// the register's offset to it, then run `f` on its value to obtain a new value to
+ /// write back.
+ #[inline(always)]
+ pub(crate) fn alter<const SIZE: usize, T, B, F>(
+ io: &T,
+ base: &B,
+ f: F,
+ ) where
+ T: ::core::ops::Deref<Target = ::kernel::io::Io<SIZE>>,
+ B: crate::regs::macros::RegisterBase<$base>,
+ F: ::core::ops::FnOnce(Self) -> Self,
+ {
+ let reg = f(Self::read(io, base));
+ reg.write(io, base);
+ }
+ }
+ };
+
+ // Generates the IO accessors for an array of registers.
+ (@io_array $name:ident @ $offset:literal [ $size:expr ; $stride:expr ]) => {
#[allow(dead_code)]
impl $name {
- #[inline]
+ pub(crate) const OFFSET: usize = $offset;
+ pub(crate) const SIZE: usize = $size;
+ pub(crate) const STRIDE: usize = $stride;
+
+ /// Read the array register at index `idx` from its address in `io`.
+ #[inline(always)]
pub(crate) fn read<const SIZE: usize, T>(
io: &T,
- base: usize,
+ idx: usize,
) -> Self where
T: ::core::ops::Deref<Target = ::kernel::io::Io<SIZE>>,
{
- Self(io.read32(base + $offset))
+ build_assert!(idx < Self::SIZE);
+
+ let offset = Self::OFFSET + (idx * Self::STRIDE);
+ let value = io.read32(offset);
+
+ Self(value)
}
- #[inline]
+ /// Write the value contained in `self` to the array register with index `idx` in `io`.
+ #[inline(always)]
pub(crate) fn write<const SIZE: usize, T>(
self,
io: &T,
- base: usize,
+ idx: usize
) where
T: ::core::ops::Deref<Target = ::kernel::io::Io<SIZE>>,
{
- io.write32(self.0, base + $offset)
+ build_assert!(idx < Self::SIZE);
+
+ let offset = Self::OFFSET + (idx * Self::STRIDE);
+
+ io.write32(self.0, offset);
}
- #[inline]
+ /// Read the array register at index `idx` in `io` and run `f` on its value to obtain a
+ /// new value to write back.
+ #[inline(always)]
pub(crate) fn alter<const SIZE: usize, T, F>(
io: &T,
- base: usize,
+ idx: usize,
f: F,
) where
T: ::core::ops::Deref<Target = ::kernel::io::Io<SIZE>>,
F: ::core::ops::FnOnce(Self) -> Self,
{
- let reg = f(Self::read(io, base));
- reg.write(io, base);
+ let reg = f(Self::read(io, idx));
+ reg.write(io, idx);
}
- #[inline]
+ /// Read the array register at index `idx` from its address in `io`.
+ ///
+ /// The validity of `idx` is checked at run-time, and `EINVAL` is returned is the
+ /// access was out-of-bounds.
+ #[inline(always)]
pub(crate) fn try_read<const SIZE: usize, T>(
io: &T,
- base: usize,
+ idx: usize,
) -> ::kernel::error::Result<Self> where
T: ::core::ops::Deref<Target = ::kernel::io::Io<SIZE>>,
{
- io.try_read32(base + $offset).map(Self)
+ if idx < Self::SIZE {
+ Ok(Self::read(io, idx))
+ } else {
+ Err(EINVAL)
+ }
}
- #[inline]
+ /// Write the value contained in `self` to the array register with index `idx` in `io`.
+ ///
+ /// The validity of `idx` is checked at run-time, and `EINVAL` is returned is the
+ /// access was out-of-bounds.
+ #[inline(always)]
pub(crate) fn try_write<const SIZE: usize, T>(
self,
io: &T,
- base: usize,
- ) -> ::kernel::error::Result<()> where
+ idx: usize,
+ ) -> ::kernel::error::Result where
T: ::core::ops::Deref<Target = ::kernel::io::Io<SIZE>>,
{
- io.try_write32(self.0, base + $offset)
+ if idx < Self::SIZE {
+ Ok(self.write(io, idx))
+ } else {
+ Err(EINVAL)
+ }
}
- #[inline]
+ /// Read the array register at index `idx` in `io` and run `f` on its value to obtain a
+ /// new value to write back.
+ ///
+ /// The validity of `idx` is checked at run-time, and `EINVAL` is returned is the
+ /// access was out-of-bounds.
+ #[inline(always)]
pub(crate) fn try_alter<const SIZE: usize, T, F>(
io: &T,
- base: usize,
+ idx: usize,
+ f: F,
+ ) -> ::kernel::error::Result where
+ T: ::core::ops::Deref<Target = ::kernel::io::Io<SIZE>>,
+ F: ::core::ops::FnOnce(Self) -> Self,
+ {
+ if idx < Self::SIZE {
+ Ok(Self::alter(io, idx, f))
+ } else {
+ Err(EINVAL)
+ }
+ }
+ }
+ };
+
+ // Generates the IO accessors for an array of relative registers.
+ (
+ @io_relative_array $name:ident @ $base:ty
+ [ $offset:literal [ $size:expr ; $stride:expr ] ]
+ ) => {
+ #[allow(dead_code)]
+ impl $name {
+ pub(crate) const OFFSET: usize = $offset;
+ pub(crate) const SIZE: usize = $size;
+ pub(crate) const STRIDE: usize = $stride;
+
+ /// Read the array register at index `idx` from `io`, using the base address provided
+ /// by `base` and adding the register's offset to it.
+ #[inline(always)]
+ pub(crate) fn read<const SIZE: usize, T, B>(
+ io: &T,
+ #[allow(unused_variables)]
+ base: &B,
+ idx: usize,
+ ) -> Self where
+ T: ::core::ops::Deref<Target = ::kernel::io::Io<SIZE>>,
+ B: crate::regs::macros::RegisterBase<$base>,
+ {
+ build_assert!(idx < Self::SIZE);
+
+ let offset = <B as crate::regs::macros::RegisterBase<$base>>::BASE +
+ Self::OFFSET + (idx * Self::STRIDE);
+ let value = io.read32(offset);
+
+ Self(value)
+ }
+
+ /// Write the value contained in `self` to `io`, using the base address provided by
+ /// `base` and adding the offset of array register `idx` to it.
+ #[inline(always)]
+ pub(crate) fn write<const SIZE: usize, T, B>(
+ self,
+ io: &T,
+ #[allow(unused_variables)]
+ base: &B,
+ idx: usize
+ ) where
+ T: ::core::ops::Deref<Target = ::kernel::io::Io<SIZE>>,
+ B: crate::regs::macros::RegisterBase<$base>,
+ {
+ build_assert!(idx < Self::SIZE);
+
+ let offset = <B as crate::regs::macros::RegisterBase<$base>>::BASE +
+ Self::OFFSET + (idx * Self::STRIDE);
+
+ io.write32(self.0, offset);
+ }
+
+ /// Read the array register at index `idx` from `io`, using the base address provided
+ /// by `base` and adding the register's offset to it, then run `f` on its value to
+ /// obtain a new value to write back.
+ #[inline(always)]
+ pub(crate) fn alter<const SIZE: usize, T, B, F>(
+ io: &T,
+ base: &B,
+ idx: usize,
+ f: F,
+ ) where
+ T: ::core::ops::Deref<Target = ::kernel::io::Io<SIZE>>,
+ B: crate::regs::macros::RegisterBase<$base>,
+ F: ::core::ops::FnOnce(Self) -> Self,
+ {
+ let reg = f(Self::read(io, base, idx));
+ reg.write(io, base, idx);
+ }
+
+ /// Read the array register at index `idx` from `io`, using the base address provided
+ /// by `base` and adding the register's offset to it.
+ ///
+ /// The validity of `idx` is checked at run-time, and `EINVAL` is returned is the
+ /// access was out-of-bounds.
+ #[inline(always)]
+ pub(crate) fn try_read<const SIZE: usize, T, B>(
+ io: &T,
+ base: &B,
+ idx: usize,
+ ) -> ::kernel::error::Result<Self> where
+ T: ::core::ops::Deref<Target = ::kernel::io::Io<SIZE>>,
+ B: crate::regs::macros::RegisterBase<$base>,
+ {
+ if idx < Self::SIZE {
+ Ok(Self::read(io, base, idx))
+ } else {
+ Err(EINVAL)
+ }
+ }
+
+ /// Write the value contained in `self` to `io`, using the base address provided by
+ /// `base` and adding the offset of array register `idx` to it.
+ ///
+ /// The validity of `idx` is checked at run-time, and `EINVAL` is returned is the
+ /// access was out-of-bounds.
+ #[inline(always)]
+ pub(crate) fn try_write<const SIZE: usize, T, B>(
+ self,
+ io: &T,
+ base: &B,
+ idx: usize,
+ ) -> ::kernel::error::Result where
+ T: ::core::ops::Deref<Target = ::kernel::io::Io<SIZE>>,
+ B: crate::regs::macros::RegisterBase<$base>,
+ {
+ if idx < Self::SIZE {
+ Ok(self.write(io, base, idx))
+ } else {
+ Err(EINVAL)
+ }
+ }
+
+ /// Read the array register at index `idx` from `io`, using the base address provided
+ /// by `base` and adding the register's offset to it, then run `f` on its value to
+ /// obtain a new value to write back.
+ ///
+ /// The validity of `idx` is checked at run-time, and `EINVAL` is returned is the
+ /// access was out-of-bounds.
+ #[inline(always)]
+ pub(crate) fn try_alter<const SIZE: usize, T, B, F>(
+ io: &T,
+ base: &B,
+ idx: usize,
f: F,
- ) -> ::kernel::error::Result<()> where
+ ) -> ::kernel::error::Result where
T: ::core::ops::Deref<Target = ::kernel::io::Io<SIZE>>,
+ B: crate::regs::macros::RegisterBase<$base>,
F: ::core::ops::FnOnce(Self) -> Self,
{
- let reg = f(Self::try_read(io, base)?);
- reg.try_write(io, base)
+ if idx < Self::SIZE {
+ Ok(Self::alter(io, base, idx, f))
+ } else {
+ Err(EINVAL)
+ }
}
}
};
diff --git a/drivers/gpu/nova-core/util.rs b/drivers/gpu/nova-core/util.rs
index 76cedf3710d7..bf35f00cb732 100644
--- a/drivers/gpu/nova-core/util.rs
+++ b/drivers/gpu/nova-core/util.rs
@@ -3,26 +3,6 @@
use kernel::prelude::*;
use kernel::time::{Delta, Instant, Monotonic};
-pub(crate) const fn to_lowercase_bytes<const N: usize>(s: &str) -> [u8; N] {
- let src = s.as_bytes();
- let mut dst = [0; N];
- let mut i = 0;
-
- while i < src.len() && i < N {
- dst[i] = (src[i] as char).to_ascii_lowercase() as u8;
- i += 1;
- }
-
- dst
-}
-
-pub(crate) const fn const_bytes_to_str(bytes: &[u8]) -> &str {
- match core::str::from_utf8(bytes) {
- Ok(string) => string,
- Err(_) => kernel::build_error!("Bytes are not valid UTF-8."),
- }
-}
-
/// Wait until `cond` is true or `timeout` elapsed.
///
/// When `cond` evaluates to `Some`, its return value is returned.
diff --git a/drivers/gpu/nova-core/vbios.rs b/drivers/gpu/nova-core/vbios.rs
index 5b5d9f38cbb3..71fbe71b84db 100644
--- a/drivers/gpu/nova-core/vbios.rs
+++ b/drivers/gpu/nova-core/vbios.rs
@@ -8,8 +8,9 @@ use crate::firmware::FalconUCodeDescV3;
use core::convert::TryFrom;
use kernel::device;
use kernel::error::Result;
-use kernel::pci;
use kernel::prelude::*;
+use kernel::ptr::{Alignable, Alignment};
+use kernel::types::ARef;
/// The offset of the VBIOS ROM in the BAR0 space.
const ROM_OFFSET: usize = 0x300000;
@@ -31,7 +32,7 @@ const FALCON_UCODE_ENTRY_APPID_FWSEC_PROD: u8 = 0x85;
/// Vbios Reader for constructing the VBIOS data.
struct VbiosIterator<'a> {
- pdev: &'a pci::Device,
+ dev: &'a device::Device,
bar0: &'a Bar0,
/// VBIOS data vector: As BIOS images are scanned, they are added to this vector for reference
/// or copying into other data structures. It is the entire scanned contents of the VBIOS which
@@ -46,9 +47,9 @@ struct VbiosIterator<'a> {
}
impl<'a> VbiosIterator<'a> {
- fn new(pdev: &'a pci::Device, bar0: &'a Bar0) -> Result<Self> {
+ fn new(dev: &'a device::Device, bar0: &'a Bar0) -> Result<Self> {
Ok(Self {
- pdev,
+ dev,
bar0,
data: KVec::new(),
current_offset: 0,
@@ -64,7 +65,7 @@ impl<'a> VbiosIterator<'a> {
// Ensure length is a multiple of 4 for 32-bit reads
if len % core::mem::size_of::<u32>() != 0 {
dev_err!(
- self.pdev.as_ref(),
+ self.dev,
"VBIOS read length {} is not a multiple of 4\n",
len
);
@@ -89,7 +90,7 @@ impl<'a> VbiosIterator<'a> {
/// Read bytes at a specific offset, filling any gap.
fn read_more_at_offset(&mut self, offset: usize, len: usize) -> Result {
if offset > BIOS_MAX_SCAN_LEN {
- dev_err!(self.pdev.as_ref(), "Error: exceeded BIOS scan limit.\n");
+ dev_err!(self.dev, "Error: exceeded BIOS scan limit.\n");
return Err(EINVAL);
}
@@ -115,7 +116,7 @@ impl<'a> VbiosIterator<'a> {
if offset + len > data_len {
self.read_more_at_offset(offset, len).inspect_err(|e| {
dev_err!(
- self.pdev.as_ref(),
+ self.dev,
"Failed to read more at offset {:#x}: {:?}\n",
offset,
e
@@ -123,9 +124,9 @@ impl<'a> VbiosIterator<'a> {
})?;
}
- BiosImage::new(self.pdev, &self.data[offset..offset + len]).inspect_err(|err| {
+ BiosImage::new(self.dev, &self.data[offset..offset + len]).inspect_err(|err| {
dev_err!(
- self.pdev.as_ref(),
+ self.dev,
"Failed to {} at offset {:#x}: {:?}\n",
context,
offset,
@@ -146,10 +147,7 @@ impl<'a> Iterator for VbiosIterator<'a> {
}
if self.current_offset > BIOS_MAX_SCAN_LEN {
- dev_err!(
- self.pdev.as_ref(),
- "Error: exceeded BIOS scan limit, stopping scan\n"
- );
+ dev_err!(self.dev, "Error: exceeded BIOS scan limit, stopping scan\n");
return None;
}
@@ -177,8 +175,7 @@ impl<'a> Iterator for VbiosIterator<'a> {
// Advance to next image (aligned to 512 bytes).
self.current_offset += image_size;
- // TODO[NUMM]: replace with `align_up` once it lands.
- self.current_offset = self.current_offset.next_multiple_of(512);
+ self.current_offset = self.current_offset.align_up(Alignment::new::<512>())?;
Some(Ok(full_image))
}
@@ -192,18 +189,18 @@ impl Vbios {
/// Probe for VBIOS extraction.
///
/// Once the VBIOS object is built, `bar0` is not read for [`Vbios`] purposes anymore.
- pub(crate) fn new(pdev: &pci::Device, bar0: &Bar0) -> Result<Vbios> {
+ pub(crate) fn new(dev: &device::Device, bar0: &Bar0) -> Result<Vbios> {
// Images to extract from iteration
let mut pci_at_image: Option<PciAtBiosImage> = None;
let mut first_fwsec_image: Option<FwSecBiosBuilder> = None;
let mut second_fwsec_image: Option<FwSecBiosBuilder> = None;
// Parse all VBIOS images in the ROM
- for image_result in VbiosIterator::new(pdev, bar0)? {
+ for image_result in VbiosIterator::new(dev, bar0)? {
let full_image = image_result?;
dev_dbg!(
- pdev.as_ref(),
+ dev,
"Found BIOS image: size: {:#x}, type: {}, last: {}\n",
full_image.image_size_bytes(),
full_image.image_type_str(),
@@ -234,14 +231,14 @@ impl Vbios {
(second_fwsec_image, first_fwsec_image, pci_at_image)
{
second
- .setup_falcon_data(pdev, &pci_at, &first)
- .inspect_err(|e| dev_err!(pdev.as_ref(), "Falcon data setup failed: {:?}\n", e))?;
+ .setup_falcon_data(&pci_at, &first)
+ .inspect_err(|e| dev_err!(dev, "Falcon data setup failed: {:?}\n", e))?;
Ok(Vbios {
- fwsec_image: second.build(pdev)?,
+ fwsec_image: second.build()?,
})
} else {
dev_err!(
- pdev.as_ref(),
+ dev,
"Missing required images for falcon data setup, skipping\n"
);
Err(EINVAL)
@@ -284,9 +281,9 @@ struct PcirStruct {
}
impl PcirStruct {
- fn new(pdev: &pci::Device, data: &[u8]) -> Result<Self> {
+ fn new(dev: &device::Device, data: &[u8]) -> Result<Self> {
if data.len() < core::mem::size_of::<PcirStruct>() {
- dev_err!(pdev.as_ref(), "Not enough data for PcirStruct\n");
+ dev_err!(dev, "Not enough data for PcirStruct\n");
return Err(EINVAL);
}
@@ -295,11 +292,7 @@ impl PcirStruct {
// Signature should be "PCIR" (0x52494350) or "NPDS" (0x5344504e).
if &signature != b"PCIR" && &signature != b"NPDS" {
- dev_err!(
- pdev.as_ref(),
- "Invalid signature for PcirStruct: {:?}\n",
- signature
- );
+ dev_err!(dev, "Invalid signature for PcirStruct: {:?}\n", signature);
return Err(EINVAL);
}
@@ -308,7 +301,7 @@ impl PcirStruct {
let image_len = u16::from_le_bytes([data[16], data[17]]);
if image_len == 0 {
- dev_err!(pdev.as_ref(), "Invalid image length: 0\n");
+ dev_err!(dev, "Invalid image length: 0\n");
return Err(EINVAL);
}
@@ -345,7 +338,7 @@ impl PcirStruct {
/// its header) is in the [`PciAtBiosImage`] and the falcon data it is pointing to is in the
/// [`FwSecBiosImage`].
#[derive(Debug, Clone, Copy)]
-#[expect(dead_code)]
+#[repr(C)]
struct BitHeader {
/// 0h: BIT Header Identifier (BMP=0x7FFF/BIT=0xB8FF)
id: u16,
@@ -365,7 +358,7 @@ struct BitHeader {
impl BitHeader {
fn new(data: &[u8]) -> Result<Self> {
- if data.len() < 12 {
+ if data.len() < core::mem::size_of::<Self>() {
return Err(EINVAL);
}
@@ -467,7 +460,7 @@ struct PciRomHeader {
}
impl PciRomHeader {
- fn new(pdev: &pci::Device, data: &[u8]) -> Result<Self> {
+ fn new(dev: &device::Device, data: &[u8]) -> Result<Self> {
if data.len() < 26 {
// Need at least 26 bytes to read pciDataStrucPtr and sizeOfBlock.
return Err(EINVAL);
@@ -479,7 +472,7 @@ impl PciRomHeader {
match signature {
0xAA55 | 0xBB77 | 0x4E56 => {}
_ => {
- dev_err!(pdev.as_ref(), "ROM signature unknown {:#x}\n", signature);
+ dev_err!(dev, "ROM signature unknown {:#x}\n", signature);
return Err(EINVAL);
}
}
@@ -538,9 +531,9 @@ struct NpdeStruct {
}
impl NpdeStruct {
- fn new(pdev: &pci::Device, data: &[u8]) -> Option<Self> {
+ fn new(dev: &device::Device, data: &[u8]) -> Option<Self> {
if data.len() < core::mem::size_of::<Self>() {
- dev_dbg!(pdev.as_ref(), "Not enough data for NpdeStruct\n");
+ dev_dbg!(dev, "Not enough data for NpdeStruct\n");
return None;
}
@@ -549,17 +542,13 @@ impl NpdeStruct {
// Signature should be "NPDE" (0x4544504E).
if &signature != b"NPDE" {
- dev_dbg!(
- pdev.as_ref(),
- "Invalid signature for NpdeStruct: {:?}\n",
- signature
- );
+ dev_dbg!(dev, "Invalid signature for NpdeStruct: {:?}\n", signature);
return None;
}
let subimage_len = u16::from_le_bytes([data[8], data[9]]);
if subimage_len == 0 {
- dev_dbg!(pdev.as_ref(), "Invalid subimage length: 0\n");
+ dev_dbg!(dev, "Invalid subimage length: 0\n");
return None;
}
@@ -584,7 +573,7 @@ impl NpdeStruct {
/// Try to find NPDE in the data, the NPDE is right after the PCIR.
fn find_in_data(
- pdev: &pci::Device,
+ dev: &device::Device,
data: &[u8],
rom_header: &PciRomHeader,
pcir: &PcirStruct,
@@ -596,12 +585,12 @@ impl NpdeStruct {
// Check if we have enough data
if npde_start + core::mem::size_of::<Self>() > data.len() {
- dev_dbg!(pdev.as_ref(), "Not enough data for NPDE\n");
+ dev_dbg!(dev, "Not enough data for NPDE\n");
return None;
}
// Try to create NPDE from the data
- NpdeStruct::new(pdev, &data[npde_start..])
+ NpdeStruct::new(dev, &data[npde_start..])
}
}
@@ -669,10 +658,10 @@ impl BiosImage {
/// Create a [`BiosImageBase`] from a byte slice and convert it to a [`BiosImage`] which
/// triggers the constructor of the specific BiosImage enum variant.
- fn new(pdev: &pci::Device, data: &[u8]) -> Result<Self> {
- let base = BiosImageBase::new(pdev, data)?;
+ fn new(dev: &device::Device, data: &[u8]) -> Result<Self> {
+ let base = BiosImageBase::new(dev, data)?;
let image = base.into_image().inspect_err(|e| {
- dev_err!(pdev.as_ref(), "Failed to create BiosImage: {:?}\n", e);
+ dev_err!(dev, "Failed to create BiosImage: {:?}\n", e);
})?;
Ok(image)
@@ -754,9 +743,10 @@ impl TryFrom<BiosImageBase> for BiosImage {
///
/// Each BiosImage type has a BiosImageBase type along with other image-specific fields. Note that
/// Rust favors composition of types over inheritance.
-#[derive(Debug)]
#[expect(dead_code)]
struct BiosImageBase {
+ /// Used for logging.
+ dev: ARef<device::Device>,
/// PCI ROM Expansion Header
rom_header: PciRomHeader,
/// PCI Data Structure
@@ -773,16 +763,16 @@ impl BiosImageBase {
}
/// Creates a new BiosImageBase from raw byte data.
- fn new(pdev: &pci::Device, data: &[u8]) -> Result<Self> {
+ fn new(dev: &device::Device, data: &[u8]) -> Result<Self> {
// Ensure we have enough data for the ROM header.
if data.len() < 26 {
- dev_err!(pdev.as_ref(), "Not enough data for ROM header\n");
+ dev_err!(dev, "Not enough data for ROM header\n");
return Err(EINVAL);
}
// Parse the ROM header.
- let rom_header = PciRomHeader::new(pdev, &data[0..26])
- .inspect_err(|e| dev_err!(pdev.as_ref(), "Failed to create PciRomHeader: {:?}\n", e))?;
+ let rom_header = PciRomHeader::new(dev, &data[0..26])
+ .inspect_err(|e| dev_err!(dev, "Failed to create PciRomHeader: {:?}\n", e))?;
// Get the PCI Data Structure using the pointer from the ROM header.
let pcir_offset = rom_header.pci_data_struct_offset as usize;
@@ -791,28 +781,29 @@ impl BiosImageBase {
.ok_or(EINVAL)
.inspect_err(|_| {
dev_err!(
- pdev.as_ref(),
+ dev,
"PCIR offset {:#x} out of bounds (data length: {})\n",
pcir_offset,
data.len()
);
dev_err!(
- pdev.as_ref(),
+ dev,
"Consider reading more data for construction of BiosImage\n"
);
})?;
- let pcir = PcirStruct::new(pdev, pcir_data)
- .inspect_err(|e| dev_err!(pdev.as_ref(), "Failed to create PcirStruct: {:?}\n", e))?;
+ let pcir = PcirStruct::new(dev, pcir_data)
+ .inspect_err(|e| dev_err!(dev, "Failed to create PcirStruct: {:?}\n", e))?;
// Look for NPDE structure if this is not an NBSI image (type != 0x70).
- let npde = NpdeStruct::find_in_data(pdev, data, &rom_header, &pcir);
+ let npde = NpdeStruct::find_in_data(dev, data, &rom_header, &pcir);
// Create a copy of the data.
let mut data_copy = KVec::new();
data_copy.extend_from_slice(data, GFP_KERNEL)?;
Ok(BiosImageBase {
+ dev: dev.into(),
rom_header,
pcir,
npde,
@@ -848,7 +839,7 @@ impl PciAtBiosImage {
///
/// This is just a 4 byte structure that contains a pointer to the Falcon data in the FWSEC
/// image.
- fn falcon_data_ptr(&self, pdev: &pci::Device) -> Result<u32> {
+ fn falcon_data_ptr(&self) -> Result<u32> {
let token = self.get_bit_token(BIT_TOKEN_ID_FALCON_DATA)?;
// Make sure we don't go out of bounds
@@ -859,14 +850,14 @@ impl PciAtBiosImage {
// read the 4 bytes at the offset specified in the token
let offset = token.data_offset as usize;
let bytes: [u8; 4] = self.base.data[offset..offset + 4].try_into().map_err(|_| {
- dev_err!(pdev.as_ref(), "Failed to convert data slice to array");
+ dev_err!(self.base.dev, "Failed to convert data slice to array");
EINVAL
})?;
let data_ptr = u32::from_le_bytes(bytes);
if (data_ptr as usize) < self.base.data.len() {
- dev_err!(pdev.as_ref(), "Falcon data pointer out of bounds\n");
+ dev_err!(self.base.dev, "Falcon data pointer out of bounds\n");
return Err(EINVAL);
}
@@ -892,7 +883,7 @@ impl TryFrom<BiosImageBase> for PciAtBiosImage {
/// The [`PmuLookupTableEntry`] structure is a single entry in the [`PmuLookupTable`].
///
/// See the [`PmuLookupTable`] description for more information.
-#[expect(dead_code)]
+#[repr(C, packed)]
struct PmuLookupTableEntry {
application_id: u8,
target_id: u8,
@@ -901,7 +892,7 @@ struct PmuLookupTableEntry {
impl PmuLookupTableEntry {
fn new(data: &[u8]) -> Result<Self> {
- if data.len() < 6 {
+ if data.len() < core::mem::size_of::<Self>() {
return Err(EINVAL);
}
@@ -928,7 +919,7 @@ struct PmuLookupTable {
}
impl PmuLookupTable {
- fn new(pdev: &pci::Device, data: &[u8]) -> Result<Self> {
+ fn new(dev: &device::Device, data: &[u8]) -> Result<Self> {
if data.len() < 4 {
return Err(EINVAL);
}
@@ -940,10 +931,7 @@ impl PmuLookupTable {
let required_bytes = header_len + (entry_count * entry_len);
if data.len() < required_bytes {
- dev_err!(
- pdev.as_ref(),
- "PmuLookupTable data length less than required\n"
- );
+ dev_err!(dev, "PmuLookupTable data length less than required\n");
return Err(EINVAL);
}
@@ -956,11 +944,7 @@ impl PmuLookupTable {
// Debug logging of entries (dumps the table data to dmesg)
for i in (header_len..required_bytes).step_by(entry_len) {
- dev_dbg!(
- pdev.as_ref(),
- "PMU entry: {:02x?}\n",
- &data[i..][..entry_len]
- );
+ dev_dbg!(dev, "PMU entry: {:02x?}\n", &data[i..][..entry_len]);
}
Ok(PmuLookupTable {
@@ -997,11 +981,10 @@ impl PmuLookupTable {
impl FwSecBiosBuilder {
fn setup_falcon_data(
&mut self,
- pdev: &pci::Device,
pci_at_image: &PciAtBiosImage,
first_fwsec: &FwSecBiosBuilder,
) -> Result {
- let mut offset = pci_at_image.falcon_data_ptr(pdev)? as usize;
+ let mut offset = pci_at_image.falcon_data_ptr()? as usize;
let mut pmu_in_first_fwsec = false;
// The falcon data pointer assumes that the PciAt and FWSEC images
@@ -1024,10 +1007,15 @@ impl FwSecBiosBuilder {
self.falcon_data_offset = Some(offset);
if pmu_in_first_fwsec {
- self.pmu_lookup_table =
- Some(PmuLookupTable::new(pdev, &first_fwsec.base.data[offset..])?);
+ self.pmu_lookup_table = Some(PmuLookupTable::new(
+ &self.base.dev,
+ &first_fwsec.base.data[offset..],
+ )?);
} else {
- self.pmu_lookup_table = Some(PmuLookupTable::new(pdev, &self.base.data[offset..])?);
+ self.pmu_lookup_table = Some(PmuLookupTable::new(
+ &self.base.dev,
+ &self.base.data[offset..],
+ )?);
}
match self
@@ -1040,7 +1028,7 @@ impl FwSecBiosBuilder {
let mut ucode_offset = entry.data as usize;
ucode_offset -= pci_at_image.base.data.len();
if ucode_offset < first_fwsec.base.data.len() {
- dev_err!(pdev.as_ref(), "Falcon Ucode offset not in second Fwsec.\n");
+ dev_err!(self.base.dev, "Falcon Ucode offset not in second Fwsec.\n");
return Err(EINVAL);
}
ucode_offset -= first_fwsec.base.data.len();
@@ -1048,7 +1036,7 @@ impl FwSecBiosBuilder {
}
Err(e) => {
dev_err!(
- pdev.as_ref(),
+ self.base.dev,
"PmuLookupTableEntry not found, error: {:?}\n",
e
);
@@ -1059,7 +1047,7 @@ impl FwSecBiosBuilder {
}
/// Build the final FwSecBiosImage from this builder
- fn build(self, pdev: &pci::Device) -> Result<FwSecBiosImage> {
+ fn build(self) -> Result<FwSecBiosImage> {
let ret = FwSecBiosImage {
base: self.base,
falcon_ucode_offset: self.falcon_ucode_offset.ok_or(EINVAL)?,
@@ -1067,8 +1055,8 @@ impl FwSecBiosBuilder {
if cfg!(debug_assertions) {
// Print the desc header for debugging
- let desc = ret.header(pdev.as_ref())?;
- dev_dbg!(pdev.as_ref(), "PmuLookupTableEntry desc: {:#?}\n", desc);
+ let desc = ret.header()?;
+ dev_dbg!(ret.base.dev, "PmuLookupTableEntry desc: {:#?}\n", desc);
}
Ok(ret)
@@ -1077,13 +1065,16 @@ impl FwSecBiosBuilder {
impl FwSecBiosImage {
/// Get the FwSec header ([`FalconUCodeDescV3`]).
- pub(crate) fn header(&self, dev: &device::Device) -> Result<&FalconUCodeDescV3> {
+ pub(crate) fn header(&self) -> Result<&FalconUCodeDescV3> {
// Get the falcon ucode offset that was found in setup_falcon_data.
let falcon_ucode_offset = self.falcon_ucode_offset;
// Make sure the offset is within the data bounds.
if falcon_ucode_offset + core::mem::size_of::<FalconUCodeDescV3>() > self.base.data.len() {
- dev_err!(dev, "fwsec-frts header not contained within BIOS bounds\n");
+ dev_err!(
+ self.base.dev,
+ "fwsec-frts header not contained within BIOS bounds\n"
+ );
return Err(ERANGE);
}
@@ -1095,7 +1086,7 @@ impl FwSecBiosImage {
let ver = (hdr & 0xff00) >> 8;
if ver != 3 {
- dev_err!(dev, "invalid fwsec firmware version: {:?}\n", ver);
+ dev_err!(self.base.dev, "invalid fwsec firmware version: {:?}\n", ver);
return Err(EINVAL);
}
@@ -1115,7 +1106,7 @@ impl FwSecBiosImage {
}
/// Get the ucode data as a byte slice
- pub(crate) fn ucode(&self, dev: &device::Device, desc: &FalconUCodeDescV3) -> Result<&[u8]> {
+ pub(crate) fn ucode(&self, desc: &FalconUCodeDescV3) -> Result<&[u8]> {
let falcon_ucode_offset = self.falcon_ucode_offset;
// The ucode data follows the descriptor.
@@ -1127,15 +1118,16 @@ impl FwSecBiosImage {
.data
.get(ucode_data_offset..ucode_data_offset + size)
.ok_or(ERANGE)
- .inspect_err(|_| dev_err!(dev, "fwsec ucode data not contained within BIOS bounds\n"))
+ .inspect_err(|_| {
+ dev_err!(
+ self.base.dev,
+ "fwsec ucode data not contained within BIOS bounds\n"
+ )
+ })
}
/// Get the signatures as a byte slice
- pub(crate) fn sigs(
- &self,
- dev: &device::Device,
- desc: &FalconUCodeDescV3,
- ) -> Result<&[Bcrt30Rsa3kSignature]> {
+ pub(crate) fn sigs(&self, desc: &FalconUCodeDescV3) -> Result<&[Bcrt30Rsa3kSignature]> {
// The signatures data follows the descriptor.
let sigs_data_offset = self.falcon_ucode_offset + core::mem::size_of::<FalconUCodeDescV3>();
let sigs_size =
@@ -1144,7 +1136,7 @@ impl FwSecBiosImage {
// Make sure the data is within bounds.
if sigs_data_offset + sigs_size > self.base.data.len() {
dev_err!(
- dev,
+ self.base.dev,
"fwsec signatures data not contained within BIOS bounds\n"
);
return Err(ERANGE);