summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMikko Perttunen <mperttunen@nvidia.com>2025-02-05 06:10:27 +0000
committerThierry Reding <treding@nvidia.com>2025-05-07 17:47:36 +0200
commit408ec8e406d980b073f7c2d6144fdddbb093d4fd (patch)
tree7acc297c53f73c8fd552e7b894afc8fcaeda13f4
parent61a85887a9fc198b710a3b576db006c78855d1c0 (diff)
drm/tegra: falcon: Pipeline firmware copy
The Falcon DMA engine allows queueing multiple operations for improved performance. Do this to optimize firmware loading. A performance improvement of 4x to 6x is observed. Co-developed-by: Ivan Raul Guadarrama <iguadarrama@nvidia.com> Signed-off-by: Ivan Raul Guadarrama <iguadarrama@nvidia.com> Signed-off-by: Mikko Perttunen <mperttunen@nvidia.com> Signed-off-by: Thierry Reding <treding@nvidia.com> Link: https://lore.kernel.org/r/20250205061027.1205748-1-mperttunen@nvidia.com
-rw-r--r--drivers/gpu/drm/tegra/falcon.c20
-rw-r--r--drivers/gpu/drm/tegra/falcon.h1
2 files changed, 20 insertions, 1 deletions
diff --git a/drivers/gpu/drm/tegra/falcon.c b/drivers/gpu/drm/tegra/falcon.c
index c0d85463eb1a..17f616bbcb45 100644
--- a/drivers/gpu/drm/tegra/falcon.c
+++ b/drivers/gpu/drm/tegra/falcon.c
@@ -30,6 +30,14 @@ int falcon_wait_idle(struct falcon *falcon)
(value == 0), 10, 100000);
}
+static int falcon_dma_wait_not_full(struct falcon *falcon)
+{
+ u32 value;
+
+ return readl_poll_timeout(falcon->regs + FALCON_DMATRFCMD, value,
+ !(value & FALCON_DMATRFCMD_FULL), 10, 100000);
+}
+
static int falcon_dma_wait_idle(struct falcon *falcon)
{
u32 value;
@@ -44,6 +52,7 @@ static int falcon_copy_chunk(struct falcon *falcon,
enum falcon_memory target)
{
u32 cmd = FALCON_DMATRFCMD_SIZE_256B;
+ int err;
if (target == FALCON_MEMORY_IMEM)
cmd |= FALCON_DMATRFCMD_IMEM;
@@ -56,11 +65,15 @@ static int falcon_copy_chunk(struct falcon *falcon,
*/
cmd |= FALCON_DMATRFCMD_DMACTX(1);
+ err = falcon_dma_wait_not_full(falcon);
+ if (err < 0)
+ return err;
+
falcon_writel(falcon, offset, FALCON_DMATRFMOFFS);
falcon_writel(falcon, base, FALCON_DMATRFFBOFFS);
falcon_writel(falcon, cmd, FALCON_DMATRFCMD);
- return falcon_dma_wait_idle(falcon);
+ return 0;
}
static void falcon_copy_firmware_image(struct falcon *falcon,
@@ -191,6 +204,11 @@ int falcon_boot(struct falcon *falcon)
falcon_copy_chunk(falcon, falcon->firmware.code.offset + offset,
offset, FALCON_MEMORY_IMEM);
+ /* wait for DMA to complete */
+ err = falcon_dma_wait_idle(falcon);
+ if (err < 0)
+ return err;
+
/* setup falcon interrupts */
falcon_writel(falcon, FALCON_IRQMSET_EXT(0xff) |
FALCON_IRQMSET_SWGEN1 |
diff --git a/drivers/gpu/drm/tegra/falcon.h b/drivers/gpu/drm/tegra/falcon.h
index 1955cf11a8a6..902bb7e4fd0f 100644
--- a/drivers/gpu/drm/tegra/falcon.h
+++ b/drivers/gpu/drm/tegra/falcon.h
@@ -47,6 +47,7 @@
#define FALCON_DMATRFMOFFS 0x00001114
#define FALCON_DMATRFCMD 0x00001118
+#define FALCON_DMATRFCMD_FULL (1 << 0)
#define FALCON_DMATRFCMD_IDLE (1 << 1)
#define FALCON_DMATRFCMD_IMEM (1 << 4)
#define FALCON_DMATRFCMD_SIZE_256B (6 << 8)