diff options
author | Alex Deucher <alexander.deucher@amd.com> | 2025-04-08 10:39:06 -0400 |
---|---|---|
committer | Alex Deucher <alexander.deucher@amd.com> | 2025-04-11 16:58:15 -0400 |
commit | 2e0454b730648e9349ca54eb4a8142d77e8e7008 (patch) | |
tree | e7b54ac2a6a3fb7e3305feadf6d19627bbe3fc14 | |
parent | b86fd212f37631551f9620e28062dd2b0c53ccdd (diff) |
drm/amdgpu: adjust enforce_isolation handling
Switch from a bool to an enum and allow more options
for enforce isolation. There are now 3 modes of operation:
- Disabled (0)
- Enabled (serialization and cleaner shader) (1)
- Enabled in legacy mode (no serialization or cleaner shader) (2)
This provides better flexibility for more use cases.
Acked-by: Srinivasan Shanmugam <srinivasan.shanmugam@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu.h | 11 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 16 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 22 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 12 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | 39 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c | 2 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_job.h | 1 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c | 2 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 3 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/mes_v11_0.c | 2 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/mes_v12_0.c | 2 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c | 11 |
12 files changed, 93 insertions, 30 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index bb5df7831308..b156e31ac86a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -230,7 +230,7 @@ extern int amdgpu_force_asic_type; extern int amdgpu_smartshift_bias; extern int amdgpu_use_xgmi_p2p; extern int amdgpu_mtype_local; -extern bool enforce_isolation; +extern int amdgpu_enforce_isolation; #ifdef CONFIG_HSA_AMD extern int sched_policy; extern bool debug_evictions; @@ -873,6 +873,13 @@ struct amdgpu_init_level { struct amdgpu_reset_domain; struct amdgpu_fru_info; +enum amdgpu_enforce_isolation_mode { + AMDGPU_ENFORCE_ISOLATION_DISABLE = 0, + AMDGPU_ENFORCE_ISOLATION_ENABLE = 1, + AMDGPU_ENFORCE_ISOLATION_ENABLE_LEGACY = 2, +}; + + /* * Non-zero (true) if the GPU has VRAM. Zero (false) otherwise. */ @@ -1225,7 +1232,7 @@ struct amdgpu_device { /* Protection for the following isolation structure */ struct mutex enforce_isolation_mutex; - bool enforce_isolation[MAX_XCP]; + enum amdgpu_enforce_isolation_mode enforce_isolation[MAX_XCP]; struct amdgpu_isolation { void *owner; struct dma_fence *spearhead; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index ea047305eb64..0941b3495b2c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -296,7 +296,21 @@ static int amdgpu_cs_pass1(struct amdgpu_cs_parser *p, num_ibs[i], &p->jobs[i]); if (ret) goto free_all_kdata; - p->jobs[i]->enforce_isolation = p->adev->enforce_isolation[fpriv->xcp_id]; + switch (p->adev->enforce_isolation[fpriv->xcp_id]) { + case AMDGPU_ENFORCE_ISOLATION_DISABLE: + default: + p->jobs[i]->enforce_isolation = false; + p->jobs[i]->run_cleaner_shader = false; + break; + case AMDGPU_ENFORCE_ISOLATION_ENABLE: + p->jobs[i]->enforce_isolation = true; + p->jobs[i]->run_cleaner_shader = true; + break; + case AMDGPU_ENFORCE_ISOLATION_ENABLE_LEGACY: + p->jobs[i]->enforce_isolation = true; + p->jobs[i]->run_cleaner_shader = false; + break; + } } p->gang_leader = p->jobs[p->gang_leader_idx]; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 8e150e9393c7..475bcd2a8a31 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -2145,8 +2145,26 @@ static int amdgpu_device_check_arguments(struct amdgpu_device *adev) adev->firmware.load_type = amdgpu_ucode_get_load_type(adev, amdgpu_fw_load_type); - for (i = 0; i < MAX_XCP; i++) - adev->enforce_isolation[i] = !!enforce_isolation; + for (i = 0; i < MAX_XCP; i++) { + switch (amdgpu_enforce_isolation) { + case -1: + case 0: + default: + /* disable */ + adev->enforce_isolation[i] = AMDGPU_ENFORCE_ISOLATION_DISABLE; + break; + case 1: + /* enable */ + adev->enforce_isolation[i] = + AMDGPU_ENFORCE_ISOLATION_ENABLE; + break; + case 2: + /* enable legacy mode */ + adev->enforce_isolation[i] = + AMDGPU_ENFORCE_ISOLATION_ENABLE_LEGACY; + break; + } + } return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index f5e83acb6169..a117cd95b9dc 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -179,7 +179,7 @@ uint amdgpu_pg_mask = 0xffffffff; uint amdgpu_sdma_phase_quantum = 32; char *amdgpu_disable_cu; char *amdgpu_virtual_display; -bool enforce_isolation; +int amdgpu_enforce_isolation = -1; int amdgpu_modeset = -1; /* Specifies the default granularity for SVM, used in buffer @@ -1038,11 +1038,13 @@ module_param_named(user_partt_mode, amdgpu_user_partt_mode, uint, 0444); /** - * DOC: enforce_isolation (bool) - * enforce process isolation between graphics and compute via using the same reserved vmid. + * DOC: enforce_isolation (int) + * enforce process isolation between graphics and compute. + * (-1 = auto, 0 = disable, 1 = enable, 2 = enable legacy mode) */ -module_param(enforce_isolation, bool, 0444); -MODULE_PARM_DESC(enforce_isolation, "enforce process isolation between graphics and compute . enforce_isolation = on"); +module_param_named(enforce_isolation, amdgpu_enforce_isolation, int, 0444); +MODULE_PARM_DESC(enforce_isolation, +"enforce process isolation between graphics and compute. (-1 = auto, 0 = disable, 1 = enable, 2 = enable legacy mode)"); /** * DOC: modeset (int) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c index 663830c6c73b..2c933d436e56 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c @@ -1468,6 +1468,8 @@ static int amdgpu_gfx_run_cleaner_shader_job(struct amdgpu_ring *ring) goto err; job->enforce_isolation = true; + /* always run the cleaner shader */ + job->run_cleaner_shader = true; ib = &job->ibs[0]; for (i = 0; i <= ring->funcs->align_mask; ++i) @@ -1599,7 +1601,7 @@ static ssize_t amdgpu_gfx_set_run_cleaner_shader(struct device *dev, * Provides the sysfs read interface to get the current settings of the 'enforce_isolation' * feature for each GPU partition. Reading from the 'enforce_isolation' * sysfs file returns the isolation settings for all partitions, where '0' - * indicates disabled and '1' indicates enabled. + * indicates disabled, '1' indicates enabled, and '2' indicates enabled in legacy mode. * * Return: The number of bytes read from the sysfs file. */ @@ -1634,9 +1636,10 @@ static ssize_t amdgpu_gfx_get_enforce_isolation(struct device *dev, * @count: The size of the input data * * This function allows control over the 'enforce_isolation' feature, which - * serializes access to the graphics engine. Writing '1' or '0' to the - * 'enforce_isolation' sysfs file enables or disables process isolation for - * each partition. The input should specify the setting for all partitions. + * serializes access to the graphics engine. Writing '1', '2', or '0' to the + * 'enforce_isolation' sysfs file enables (full or legacy) or disables process + * isolation for each partition. The input should specify the setting for all + * partitions. * * Return: The number of bytes written to the sysfs file. */ @@ -1673,13 +1676,29 @@ static ssize_t amdgpu_gfx_set_enforce_isolation(struct device *dev, return -EINVAL; for (i = 0; i < num_partitions; i++) { - if (partition_values[i] != 0 && partition_values[i] != 1) + if (partition_values[i] != 0 && + partition_values[i] != 1 && + partition_values[i] != 2) return -EINVAL; } mutex_lock(&adev->enforce_isolation_mutex); - for (i = 0; i < num_partitions; i++) - adev->enforce_isolation[i] = partition_values[i]; + for (i = 0; i < num_partitions; i++) { + switch (partition_values[i]) { + case 0: + default: + adev->enforce_isolation[i] = AMDGPU_ENFORCE_ISOLATION_DISABLE; + break; + case 1: + adev->enforce_isolation[i] = + AMDGPU_ENFORCE_ISOLATION_ENABLE; + break; + case 2: + adev->enforce_isolation[i] = + AMDGPU_ENFORCE_ISOLATION_ENABLE_LEGACY; + break; + } + } mutex_unlock(&adev->enforce_isolation_mutex); amdgpu_mes_update_enforce_isolation(adev); @@ -2034,7 +2053,7 @@ amdgpu_gfx_enforce_isolation_wait_for_kfd(struct amdgpu_device *adev, bool wait = false; mutex_lock(&adev->enforce_isolation_mutex); - if (adev->enforce_isolation[idx]) { + if (adev->enforce_isolation[idx] == AMDGPU_ENFORCE_ISOLATION_ENABLE) { /* set the initial values if nothing is set */ if (!adev->gfx.enforce_isolation_jiffies[idx]) { adev->gfx.enforce_isolation_jiffies[idx] = jiffies; @@ -2101,7 +2120,7 @@ void amdgpu_gfx_enforce_isolation_ring_begin_use(struct amdgpu_ring *ring) amdgpu_gfx_enforce_isolation_wait_for_kfd(adev, idx); mutex_lock(&adev->enforce_isolation_mutex); - if (adev->enforce_isolation[idx]) { + if (adev->enforce_isolation[idx] == AMDGPU_ENFORCE_ISOLATION_ENABLE) { if (adev->kfd.init_complete) sched_work = true; } @@ -2138,7 +2157,7 @@ void amdgpu_gfx_enforce_isolation_ring_end_use(struct amdgpu_ring *ring) return; mutex_lock(&adev->enforce_isolation_mutex); - if (adev->enforce_isolation[idx]) { + if (adev->enforce_isolation[idx] == AMDGPU_ENFORCE_ISOLATION_ENABLE) { if (adev->kfd.init_complete) sched_work = true; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c index 4c4e087230ac..359c19de9a5b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c @@ -588,7 +588,7 @@ void amdgpu_vmid_mgr_init(struct amdgpu_device *adev) } /* alloc a default reserved vmid to enforce isolation */ for (i = 0; i < (adev->xcp_mgr ? adev->xcp_mgr->num_xcps : 1); i++) { - if (adev->enforce_isolation[i]) + if (adev->enforce_isolation[i] != AMDGPU_ENFORCE_ISOLATION_DISABLE) amdgpu_vmid_alloc_reserved(adev, AMDGPU_GFXHUB(i)); } } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h index ce6b9ba967ff..f2c049129661 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h @@ -78,6 +78,7 @@ struct amdgpu_job { /* enforce isolation */ bool enforce_isolation; + bool run_cleaner_shader; uint32_t num_ibs; struct amdgpu_ib ibs[]; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c index 36f2e8716126..38ea64d87a0a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c @@ -768,7 +768,7 @@ int amdgpu_mes_update_enforce_isolation(struct amdgpu_device *adev) if (adev->enable_mes && adev->gfx.enable_cleaner_shader) { mutex_lock(&adev->enforce_isolation_mutex); for (i = 0; i < (adev->xcp_mgr ? adev->xcp_mgr->num_xcps : 1); i++) { - if (adev->enforce_isolation[i]) + if (adev->enforce_isolation[i] == AMDGPU_ENFORCE_ISOLATION_ENABLE) r |= amdgpu_mes_set_enforce_isolation(adev, i, true); else r |= amdgpu_mes_set_enforce_isolation(adev, i, false); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index aa65d64fb15c..3911c78f8282 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -787,7 +787,8 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, pasid_mapping_needed &= adev->gmc.gmc_funcs->emit_pasid_mapping && ring->funcs->emit_wreg; - cleaner_shader_needed = adev->gfx.enable_cleaner_shader && + cleaner_shader_needed = job->run_cleaner_shader && + adev->gfx.enable_cleaner_shader && ring->funcs->emit_cleaner_shader && job->base.s_fence && &job->base.s_fence->scheduled == isolation->spearhead; diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c index 344d32268c3c..f7aa45775ead 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c @@ -724,7 +724,7 @@ static int mes_v11_0_set_hw_resources(struct amdgpu_mes *mes) mes->event_log_gpu_addr; } - if (adev->enforce_isolation[0]) + if (adev->enforce_isolation[0] == AMDGPU_ENFORCE_ISOLATION_ENABLE) mes_set_hw_res_pkt.limit_single_process = 1; return mes_v11_0_submit_pkt_and_poll_completion(mes, diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c index be43e19b7b7f..b0e042a4cea1 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c @@ -762,7 +762,7 @@ static int mes_v12_0_set_hw_resources(struct amdgpu_mes *mes, int pipe) pipe * (AMDGPU_MES_LOG_BUFFER_SIZE + AMDGPU_MES_MSCRATCH_SIZE); } - if (adev->enforce_isolation[0]) + if (adev->enforce_isolation[0] == AMDGPU_ENFORCE_ISOLATION_ENABLE) mes_set_hw_res_pkt.limit_single_process = 1; return mes_v12_0_submit_pkt_and_poll_completion(mes, pipe, diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c index 2893fd5e5d00..fa28c57692b8 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c @@ -43,7 +43,7 @@ static int pm_map_process_v9(struct packet_manager *pm, memset(buffer, 0, sizeof(struct pm4_mes_map_process)); packet->header.u32All = pm_build_pm4_header(IT_MAP_PROCESS, sizeof(struct pm4_mes_map_process)); - if (adev->enforce_isolation[kfd->node_id]) + if (adev->enforce_isolation[kfd->node_id] == AMDGPU_ENFORCE_ISOLATION_ENABLE) packet->bitfields2.exec_cleaner_shader = 1; packet->bitfields2.diq_enable = (qpd->is_debug) ? 1 : 0; packet->bitfields2.process_quantum = 10; @@ -102,7 +102,8 @@ static int pm_map_process_aldebaran(struct packet_manager *pm, memset(buffer, 0, sizeof(struct pm4_mes_map_process_aldebaran)); packet->header.u32All = pm_build_pm4_header(IT_MAP_PROCESS, sizeof(struct pm4_mes_map_process_aldebaran)); - if (adev->enforce_isolation[knode->node_id]) + if (adev->enforce_isolation[knode->node_id] == + AMDGPU_ENFORCE_ISOLATION_ENABLE) packet->bitfields2.exec_cleaner_shader = 1; packet->bitfields2.diq_enable = (qpd->is_debug) ? 1 : 0; packet->bitfields2.process_quantum = 10; @@ -165,9 +166,9 @@ static int pm_runlist_v9(struct packet_manager *pm, uint32_t *buffer, * hws_max_conc_proc has been done in * kgd2kfd_device_init(). */ - concurrent_proc_cnt = adev->enforce_isolation[kfd->node_id] ? - 1 : min(pm->dqm->processes_count, - kfd->max_proc_per_quantum); + concurrent_proc_cnt = (adev->enforce_isolation[kfd->node_id] == + AMDGPU_ENFORCE_ISOLATION_ENABLE) ? + 1 : min(pm->dqm->processes_count, kfd->max_proc_per_quantum); packet = (struct pm4_mes_runlist *)buffer; |