summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2020-06-02 15:04:15 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2020-06-02 15:04:15 -0700
commitfaa392181a0bd42c5478175cef601adeecdc91b6 (patch)
treee020e1142e34786676d0cd40f539bccdbb66099e /drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
parentcfa3b8068b09f25037146bfd5eed041b78878bee (diff)
parent9ca1f474cea0edc14a1d7ec933e5472c0ff115d3 (diff)
Merge tag 'drm-next-2020-06-02' of git://anongit.freedesktop.org/drm/drm
Pull drm updates from Dave Airlie: "Highlights: - Core DRM had a lot of refactoring around managed drm resources to make drivers simpler. - Intel Tigerlake support is on by default - amdgpu now support p2p PCI buffer sharing and encrypted GPU memory Details: core: - uapi: error out EBUSY when existing master - uapi: rework SET/DROP MASTER permission handling - remove drm_pci.h - drm_pci* are now legacy - introduced managed DRM resources - subclassing support for drm_framebuffer - simple encoder helper - edid improvements - vblank + writeback documentation improved - drm/mm - optimise tree searches - port drivers to use devm_drm_dev_alloc dma-buf: - add flag for p2p buffer support mst: - ACT timeout improvements - remove drm_dp_mst_has_audio - don't use 2nd TX slot - spec recommends against it bridge: - dw-hdmi various improvements - chrontel ch7033 support - fix stack issues with old gcc hdmi: - add unpack function for drm infoframe fbdev: - misc fbdev driver fixes i915: - uapi: global sseu pinning - uapi: OA buffer polling - uapi: remove generated perf code - uapi: per-engine default property values in sysfs - Tigerlake GEN12 enabled. - Lots of gem refactoring - Tigerlake enablement patches - move to drm_device logging - Icelake gamma HW readout - push MST link retrain to hotplug work - bandwidth atomic helpers - ICL fixes - RPS/GT refactoring - Cherryview full-ppgtt support - i915 locking guidelines documented - require linear fb stride to be 512 multiple on gen9 - Tigerlake SAGV support amdgpu: - uapi: encrypted GPU memory handling - uapi: add MEM_SYNC IB flag - p2p dma-buf support - export VRAM dma-bufs - FRU chip access support - RAS/SR-IOV updates - Powerplay locking fixes - VCN DPG (powergating) enablement - GFX10 clockgating fixes - DC fixes - GPU reset fixes - navi SDMA fix - expose FP16 for modesetting - DP 1.4 compliance fixes - gfx10 soft recovery - Improved Critical Thermal Faults handling - resizable BAR on gmc10 amdkfd: - uapi: GWS resource management - track GPU memory per process - report PCI domain in topology radeon: - safe reg list generator fixes nouveau: - HD audio fixes on recent systems - vGPU detection (fail probe if we're on one, for now) - Interlaced mode fixes (mostly avoidance on Turing, which doesn't support it) - SVM improvements/fixes - NVIDIA format modifier support - Misc other fixes. adv7511: - HDMI SPDIF support ast: - allocate crtc state size - fix double assignment - fix suspend bochs: - drop connector register cirrus: - move to tiny drivers. exynos: - fix imported dma-buf mapping - enable runtime PM - fixes and cleanups mediatek: - DPI pin mode swap - config mipi_tx current/impedance lima: - devfreq + cooling device support - task handling improvements - runtime PM support pl111: - vexpress init improvements - fix module auto-load rcar-du: - DT bindings conversion to YAML - Planes zpos sanity check and fix - MAINTAINERS entry for LVDS panel driver mcde: - fix return value mgag200: - use managed config init stm: - read endpoints from DT vboxvideo: - use PCI managed functions - drop WC mtrr vkms: - enable cursor by default rockchip: - afbc support virtio: - various cleanups qxl: - fix cursor notify port hisilicon: - 128-byte stride alignment fix sun4i: - improved format handling" * tag 'drm-next-2020-06-02' of git://anongit.freedesktop.org/drm/drm: (1401 commits) drm/amd/display: Fix potential integer wraparound resulting in a hang drm/amd/display: drop cursor position check in atomic test drm/amdgpu: fix device attribute node create failed with multi gpu drm/nouveau: use correct conflicting framebuffer API drm/vblank: Fix -Wformat compile warnings on some arches drm/amdgpu: Sync with VM root BO when switching VM to CPU update mode drm/amd/display: Handle GPU reset for DC block drm/amdgpu: add apu flags (v2) drm/amd/powerpay: Disable gfxoff when setting manual mode on picasso and raven drm/amdgpu: fix pm sysfs node handling (v2) drm/amdgpu: move gpu_info parsing after common early init drm/amdgpu: move discovery gfx config fetching drm/nouveau/dispnv50: fix runtime pm imbalance on error drm/nouveau: fix runtime pm imbalance on error drm/nouveau: fix runtime pm imbalance on error drm/nouveau/debugfs: fix runtime pm imbalance on error drm/nouveau/nouveau/hmm: fix migrate zero page to GPU drm/nouveau/nouveau/hmm: fix nouveau_dmem_chunk allocations drm/nouveau/kms/nv50-: Share DP SST mode_valid() handling with MST drm/nouveau/kms/nv50-: Move 8BPC limit for MST into nv50_mstc_get_modes() ...
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c')
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c107
1 files changed, 82 insertions, 25 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
index a41272fbcba2..2badbc0355f2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
@@ -56,19 +56,23 @@ static void amdgpu_vcn_idle_work_handler(struct work_struct *work);
int amdgpu_vcn_sw_init(struct amdgpu_device *adev)
{
- unsigned long bo_size;
+ unsigned long bo_size, fw_shared_bo_size;
const char *fw_name;
const struct common_firmware_header *hdr;
unsigned char fw_check;
int i, r;
INIT_DELAYED_WORK(&adev->vcn.idle_work, amdgpu_vcn_idle_work_handler);
+ mutex_init(&adev->vcn.vcn_pg_lock);
+ atomic_set(&adev->vcn.total_submission_cnt, 0);
+ for (i = 0; i < adev->vcn.num_vcn_inst; i++)
+ atomic_set(&adev->vcn.inst[i].dpg_enc_submission_cnt, 0);
switch (adev->asic_type) {
case CHIP_RAVEN:
- if (adev->rev_id >= 8)
+ if (adev->apu_flags & AMD_APU_IS_RAVEN2)
fw_name = FIRMWARE_RAVEN2;
- else if (adev->pdev->device == 0x15d8)
+ else if (adev->apu_flags & AMD_APU_IS_PICASSO)
fw_name = FIRMWARE_PICASSO;
else
fw_name = FIRMWARE_RAVEN;
@@ -178,6 +182,17 @@ int amdgpu_vcn_sw_init(struct amdgpu_device *adev)
return r;
}
}
+
+ r = amdgpu_bo_create_kernel(adev, AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_fw_shared)),
+ PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM, &adev->vcn.inst[i].fw_shared_bo,
+ &adev->vcn.inst[i].fw_shared_gpu_addr, &adev->vcn.inst[i].fw_shared_cpu_addr);
+ if (r) {
+ dev_err(adev->dev, "VCN %d (%d) failed to allocate firmware shared bo\n", i, r);
+ return r;
+ }
+
+ fw_shared_bo_size = amdgpu_bo_size(adev->vcn.inst[i].fw_shared_bo);
+ adev->vcn.inst[i].saved_shm_bo = kvmalloc(fw_shared_bo_size, GFP_KERNEL);
}
return 0;
@@ -192,6 +207,12 @@ int amdgpu_vcn_sw_fini(struct amdgpu_device *adev)
for (j = 0; j < adev->vcn.num_vcn_inst; ++j) {
if (adev->vcn.harvest_config & (1 << j))
continue;
+
+ kvfree(adev->vcn.inst[j].saved_shm_bo);
+ amdgpu_bo_free_kernel(&adev->vcn.inst[j].fw_shared_bo,
+ &adev->vcn.inst[j].fw_shared_gpu_addr,
+ (void **)&adev->vcn.inst[j].fw_shared_cpu_addr);
+
if (adev->vcn.indirect_sram) {
amdgpu_bo_free_kernel(&adev->vcn.inst[j].dpg_sram_bo,
&adev->vcn.inst[j].dpg_sram_gpu_addr,
@@ -210,6 +231,7 @@ int amdgpu_vcn_sw_fini(struct amdgpu_device *adev)
}
release_firmware(adev->vcn.fw);
+ mutex_destroy(&adev->vcn.vcn_pg_lock);
return 0;
}
@@ -236,6 +258,17 @@ int amdgpu_vcn_suspend(struct amdgpu_device *adev)
return -ENOMEM;
memcpy_fromio(adev->vcn.inst[i].saved_bo, ptr, size);
+
+ if (adev->vcn.inst[i].fw_shared_bo == NULL)
+ return 0;
+
+ if (!adev->vcn.inst[i].saved_shm_bo)
+ return -ENOMEM;
+
+ size = amdgpu_bo_size(adev->vcn.inst[i].fw_shared_bo);
+ ptr = adev->vcn.inst[i].fw_shared_cpu_addr;
+
+ memcpy_fromio(adev->vcn.inst[i].saved_shm_bo, ptr, size);
}
return 0;
}
@@ -273,6 +306,17 @@ int amdgpu_vcn_resume(struct amdgpu_device *adev)
}
memset_io(ptr, 0, size);
}
+
+ if (adev->vcn.inst[i].fw_shared_bo == NULL)
+ return -EINVAL;
+
+ size = amdgpu_bo_size(adev->vcn.inst[i].fw_shared_bo);
+ ptr = adev->vcn.inst[i].fw_shared_cpu_addr;
+
+ if (adev->vcn.inst[i].saved_shm_bo != NULL)
+ memcpy_toio(ptr, adev->vcn.inst[i].saved_shm_bo, size);
+ else
+ memset_io(ptr, 0, size);
}
return 0;
}
@@ -295,7 +339,8 @@ static void amdgpu_vcn_idle_work_handler(struct work_struct *work)
if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) {
struct dpg_pause_state new_state;
- if (fence[j])
+ if (fence[j] ||
+ unlikely(atomic_read(&adev->vcn.inst[j].dpg_enc_submission_cnt)))
new_state.fw_based = VCN_DPG_STATE__PAUSE;
else
new_state.fw_based = VCN_DPG_STATE__UNPAUSE;
@@ -307,8 +352,7 @@ static void amdgpu_vcn_idle_work_handler(struct work_struct *work)
fences += fence[j];
}
- if (fences == 0) {
- amdgpu_gfx_off_ctrl(adev, true);
+ if (!fences && !atomic_read(&adev->vcn.total_submission_cnt)) {
amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCN,
AMD_PG_STATE_GATE);
} else {
@@ -319,36 +363,46 @@ static void amdgpu_vcn_idle_work_handler(struct work_struct *work)
void amdgpu_vcn_ring_begin_use(struct amdgpu_ring *ring)
{
struct amdgpu_device *adev = ring->adev;
- bool set_clocks = !cancel_delayed_work_sync(&adev->vcn.idle_work);
- if (set_clocks) {
- amdgpu_gfx_off_ctrl(adev, false);
- amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCN,
- AMD_PG_STATE_UNGATE);
- }
+ atomic_inc(&adev->vcn.total_submission_cnt);
+ cancel_delayed_work_sync(&adev->vcn.idle_work);
+
+ mutex_lock(&adev->vcn.vcn_pg_lock);
+ amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCN,
+ AMD_PG_STATE_UNGATE);
if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) {
struct dpg_pause_state new_state;
- unsigned int fences = 0;
- unsigned int i;
- for (i = 0; i < adev->vcn.num_enc_rings; ++i) {
- fences += amdgpu_fence_count_emitted(&adev->vcn.inst[ring->me].ring_enc[i]);
- }
- if (fences)
+ if (ring->funcs->type == AMDGPU_RING_TYPE_VCN_ENC) {
+ atomic_inc(&adev->vcn.inst[ring->me].dpg_enc_submission_cnt);
new_state.fw_based = VCN_DPG_STATE__PAUSE;
- else
- new_state.fw_based = VCN_DPG_STATE__UNPAUSE;
+ } else {
+ unsigned int fences = 0;
+ unsigned int i;
- if (ring->funcs->type == AMDGPU_RING_TYPE_VCN_ENC)
- new_state.fw_based = VCN_DPG_STATE__PAUSE;
+ for (i = 0; i < adev->vcn.num_enc_rings; ++i)
+ fences += amdgpu_fence_count_emitted(&adev->vcn.inst[ring->me].ring_enc[i]);
+
+ if (fences || atomic_read(&adev->vcn.inst[ring->me].dpg_enc_submission_cnt))
+ new_state.fw_based = VCN_DPG_STATE__PAUSE;
+ else
+ new_state.fw_based = VCN_DPG_STATE__UNPAUSE;
+ }
adev->vcn.pause_dpg_mode(adev, ring->me, &new_state);
}
+ mutex_unlock(&adev->vcn.vcn_pg_lock);
}
void amdgpu_vcn_ring_end_use(struct amdgpu_ring *ring)
{
+ if (ring->adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG &&
+ ring->funcs->type == AMDGPU_RING_TYPE_VCN_ENC)
+ atomic_dec(&ring->adev->vcn.inst[ring->me].dpg_enc_submission_cnt);
+
+ atomic_dec(&ring->adev->vcn.total_submission_cnt);
+
schedule_delayed_work(&ring->adev->vcn.idle_work, VCN_IDLE_TIMEOUT);
}
@@ -390,7 +444,8 @@ static int amdgpu_vcn_dec_send_msg(struct amdgpu_ring *ring,
uint64_t addr;
int i, r;
- r = amdgpu_job_alloc_with_ib(adev, 64, &job);
+ r = amdgpu_job_alloc_with_ib(adev, 64,
+ AMDGPU_IB_POOL_DIRECT, &job);
if (r)
goto err;
@@ -557,7 +612,8 @@ static int amdgpu_vcn_enc_get_create_msg(struct amdgpu_ring *ring, uint32_t hand
uint64_t addr;
int i, r;
- r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4, &job);
+ r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4,
+ AMDGPU_IB_POOL_DIRECT, &job);
if (r)
return r;
@@ -610,7 +666,8 @@ static int amdgpu_vcn_enc_get_destroy_msg(struct amdgpu_ring *ring, uint32_t han
uint64_t addr;
int i, r;
- r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4, &job);
+ r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4,
+ AMDGPU_IB_POOL_DIRECT, &job);
if (r)
return r;