summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--drivers/gpu/drm/tegra/Kconfig3
-rw-r--r--drivers/gpu/drm/tegra/Makefile3
-rw-r--r--drivers/gpu/drm/tegra/dc.c194
-rw-r--r--drivers/gpu/drm/tegra/dc.h3
-rw-r--r--drivers/gpu/drm/tegra/drm.c30
-rw-r--r--drivers/gpu/drm/tegra/drm.h1
-rw-r--r--drivers/gpu/drm/tegra/gem.c171
-rw-r--r--drivers/gpu/drm/tegra/gr2d.c151
-rw-r--r--drivers/gpu/drm/tegra/gr3d.c353
-rw-r--r--drivers/gpu/drm/tegra/hdmi.c183
-rw-r--r--drivers/gpu/drm/tegra/hub.h1
-rw-r--r--drivers/gpu/drm/tegra/nvdec.c466
-rw-r--r--drivers/gpu/drm/tegra/plane.c65
-rw-r--r--drivers/gpu/drm/tegra/plane.h2
-rw-r--r--drivers/gpu/drm/tegra/rgb.c53
-rw-r--r--drivers/gpu/drm/tegra/submit.c77
-rw-r--r--drivers/gpu/drm/tegra/uapi.c68
-rw-r--r--drivers/gpu/drm/tegra/uapi.h5
-rw-r--r--drivers/gpu/drm/tegra/vic.c61
-rw-r--r--drivers/gpu/host1x/Kconfig1
-rw-r--r--drivers/gpu/host1x/bus.c80
-rw-r--r--drivers/gpu/host1x/channel.c8
-rw-r--r--drivers/gpu/host1x/debug.c15
-rw-r--r--drivers/gpu/host1x/dev.c185
-rw-r--r--drivers/gpu/host1x/dev.h5
-rw-r--r--drivers/gpu/host1x/hw/channel_hw.c44
-rw-r--r--drivers/gpu/host1x/intr.c3
-rw-r--r--drivers/gpu/host1x/job.c160
-rw-r--r--drivers/gpu/host1x/job.h6
-rw-r--r--drivers/gpu/host1x/syncpt.c5
-rw-r--r--drivers/soc/tegra/common.c25
-rw-r--r--include/linux/host1x.h76
-rw-r--r--include/soc/tegra/common.h15
33 files changed, 2020 insertions, 498 deletions
diff --git a/drivers/gpu/drm/tegra/Kconfig b/drivers/gpu/drm/tegra/Kconfig
index 1650a448eabd..8cf5aeb9db6c 100644
--- a/drivers/gpu/drm/tegra/Kconfig
+++ b/drivers/gpu/drm/tegra/Kconfig
@@ -12,6 +12,9 @@ config DRM_TEGRA
select INTERCONNECT
select IOMMU_IOVA
select CEC_CORE if CEC_NOTIFIER
+ select SND_SIMPLE_CARD if SND_SOC_TEGRA20_SPDIF
+ select SND_SOC_HDMI_CODEC if SND_SOC_TEGRA20_SPDIF
+ select SND_AUDIO_GRAPH_CARD if SND_SOC_TEGRA20_SPDIF
help
Choose this option if you have an NVIDIA Tegra SoC.
diff --git a/drivers/gpu/drm/tegra/Makefile b/drivers/gpu/drm/tegra/Makefile
index d801909182cf..df6cc986aeba 100644
--- a/drivers/gpu/drm/tegra/Makefile
+++ b/drivers/gpu/drm/tegra/Makefile
@@ -23,7 +23,8 @@ tegra-drm-y := \
gr2d.o \
gr3d.o \
falcon.o \
- vic.o
+ vic.o \
+ nvdec.o
tegra-drm-y += trace.o
diff --git a/drivers/gpu/drm/tegra/dc.c b/drivers/gpu/drm/tegra/dc.c
index a29d64f87563..eb70eee8992a 100644
--- a/drivers/gpu/drm/tegra/dc.c
+++ b/drivers/gpu/drm/tegra/dc.c
@@ -11,9 +11,12 @@
#include <linux/interconnect.h>
#include <linux/module.h>
#include <linux/of_device.h>
+#include <linux/pm_domain.h>
+#include <linux/pm_opp.h>
#include <linux/pm_runtime.h>
#include <linux/reset.h>
+#include <soc/tegra/common.h>
#include <soc/tegra/pmc.h>
#include <drm/drm_atomic.h>
@@ -890,11 +893,9 @@ static int tegra_cursor_atomic_check(struct drm_plane *plane,
return 0;
}
-static void tegra_cursor_atomic_update(struct drm_plane *plane,
- struct drm_atomic_state *state)
+static void __tegra_cursor_atomic_update(struct drm_plane *plane,
+ struct drm_plane_state *new_state)
{
- struct drm_plane_state *new_state = drm_atomic_get_new_plane_state(state,
- plane);
struct tegra_plane_state *tegra_plane_state = to_tegra_plane_state(new_state);
struct tegra_dc *dc = to_tegra_dc(new_state->crtc);
struct tegra_drm *tegra = plane->dev->dev_private;
@@ -990,6 +991,14 @@ static void tegra_cursor_atomic_update(struct drm_plane *plane,
tegra_dc_writel(dc, value, DC_DISP_CURSOR_POSITION);
}
+static void tegra_cursor_atomic_update(struct drm_plane *plane,
+ struct drm_atomic_state *state)
+{
+ struct drm_plane_state *new_state = drm_atomic_get_new_plane_state(state, plane);
+
+ __tegra_cursor_atomic_update(plane, new_state);
+}
+
static void tegra_cursor_atomic_disable(struct drm_plane *plane,
struct drm_atomic_state *state)
{
@@ -1009,12 +1018,78 @@ static void tegra_cursor_atomic_disable(struct drm_plane *plane,
tegra_dc_writel(dc, value, DC_DISP_DISP_WIN_OPTIONS);
}
+static int tegra_cursor_atomic_async_check(struct drm_plane *plane, struct drm_atomic_state *state)
+{
+ struct drm_plane_state *new_state = drm_atomic_get_new_plane_state(state, plane);
+ struct drm_crtc_state *crtc_state;
+ int min_scale, max_scale;
+ int err;
+
+ crtc_state = drm_atomic_get_existing_crtc_state(state, new_state->crtc);
+ if (WARN_ON(!crtc_state))
+ return -EINVAL;
+
+ if (!crtc_state->active)
+ return -EINVAL;
+
+ if (plane->state->crtc != new_state->crtc ||
+ plane->state->src_w != new_state->src_w ||
+ plane->state->src_h != new_state->src_h ||
+ plane->state->crtc_w != new_state->crtc_w ||
+ plane->state->crtc_h != new_state->crtc_h ||
+ plane->state->fb != new_state->fb ||
+ plane->state->fb == NULL)
+ return -EINVAL;
+
+ min_scale = (1 << 16) / 8;
+ max_scale = (8 << 16) / 1;
+
+ err = drm_atomic_helper_check_plane_state(new_state, crtc_state, min_scale, max_scale,
+ true, true);
+ if (err < 0)
+ return err;
+
+ if (new_state->visible != plane->state->visible)
+ return -EINVAL;
+
+ return 0;
+}
+
+static void tegra_cursor_atomic_async_update(struct drm_plane *plane,
+ struct drm_atomic_state *state)
+{
+ struct drm_plane_state *new_state = drm_atomic_get_new_plane_state(state, plane);
+ struct tegra_dc *dc = to_tegra_dc(new_state->crtc);
+
+ plane->state->src_x = new_state->src_x;
+ plane->state->src_y = new_state->src_y;
+ plane->state->crtc_x = new_state->crtc_x;
+ plane->state->crtc_y = new_state->crtc_y;
+
+ if (new_state->visible) {
+ struct tegra_plane *p = to_tegra_plane(plane);
+ u32 value;
+
+ __tegra_cursor_atomic_update(plane, new_state);
+
+ value = (WIN_A_ACT_REQ << p->index) << 8 | GENERAL_UPDATE;
+ tegra_dc_writel(dc, value, DC_CMD_STATE_CONTROL);
+ (void)tegra_dc_readl(dc, DC_CMD_STATE_CONTROL);
+
+ value = (WIN_A_ACT_REQ << p->index) | GENERAL_ACT_REQ;
+ tegra_dc_writel(dc, value, DC_CMD_STATE_CONTROL);
+ (void)tegra_dc_readl(dc, DC_CMD_STATE_CONTROL);
+ }
+}
+
static const struct drm_plane_helper_funcs tegra_cursor_plane_helper_funcs = {
.prepare_fb = tegra_plane_prepare_fb,
.cleanup_fb = tegra_plane_cleanup_fb,
.atomic_check = tegra_cursor_atomic_check,
.atomic_update = tegra_cursor_atomic_update,
.atomic_disable = tegra_cursor_atomic_disable,
+ .atomic_async_check = tegra_cursor_atomic_async_check,
+ .atomic_async_update = tegra_cursor_atomic_async_update,
};
static const uint64_t linear_modifiers[] = {
@@ -1267,9 +1342,9 @@ static struct drm_plane *tegra_dc_add_planes(struct drm_device *drm,
err = PTR_ERR(planes[i]);
while (i--)
- tegra_plane_funcs.destroy(planes[i]);
+ planes[i]->funcs->destroy(planes[i]);
- tegra_plane_funcs.destroy(primary);
+ primary->funcs->destroy(primary);
return ERR_PTR(err);
}
}
@@ -1762,10 +1837,55 @@ int tegra_dc_state_setup_clock(struct tegra_dc *dc,
return 0;
}
-static void tegra_dc_commit_state(struct tegra_dc *dc,
- struct tegra_dc_state *state)
+static void tegra_dc_update_voltage_state(struct tegra_dc *dc,
+ struct tegra_dc_state *state)
+{
+ unsigned long rate, pstate;
+ struct dev_pm_opp *opp;
+ int err;
+
+ if (!dc->has_opp_table)
+ return;
+
+ /* calculate actual pixel clock rate which depends on internal divider */
+ rate = DIV_ROUND_UP(clk_get_rate(dc->clk) * 2, state->div + 2);
+
+ /* find suitable OPP for the rate */
+ opp = dev_pm_opp_find_freq_ceil(dc->dev, &rate);
+
+ /*
+ * Very high resolution modes may results in a clock rate that is
+ * above the characterized maximum. In this case it's okay to fall
+ * back to the characterized maximum.
+ */
+ if (opp == ERR_PTR(-ERANGE))
+ opp = dev_pm_opp_find_freq_floor(dc->dev, &rate);
+
+ if (IS_ERR(opp)) {
+ dev_err(dc->dev, "failed to find OPP for %luHz: %pe\n",
+ rate, opp);
+ return;
+ }
+
+ pstate = dev_pm_opp_get_required_pstate(opp, 0);
+ dev_pm_opp_put(opp);
+
+ /*
+ * The minimum core voltage depends on the pixel clock rate (which
+ * depends on internal clock divider of the CRTC) and not on the
+ * rate of the display controller clock. This is why we're not using
+ * dev_pm_opp_set_rate() API and instead controlling the power domain
+ * directly.
+ */
+ err = dev_pm_genpd_set_performance_state(dc->dev, pstate);
+ if (err)
+ dev_err(dc->dev, "failed to set power domain state to %lu: %d\n",
+ pstate, err);
+}
+
+static void tegra_dc_set_clock_rate(struct tegra_dc *dc,
+ struct tegra_dc_state *state)
{
- u32 value;
int err;
err = clk_set_parent(dc->clk, state->clk);
@@ -1797,10 +1917,7 @@ static void tegra_dc_commit_state(struct tegra_dc *dc,
state->div);
DRM_DEBUG_KMS("pclk: %lu\n", state->pclk);
- if (!dc->soc->has_nvdisplay) {
- value = SHIFT_CLK_DIVIDER(state->div) | PIXEL_CLK_DIVIDER_PCD1;
- tegra_dc_writel(dc, value, DC_DISP_DISP_CLOCK_CONTROL);
- }
+ tegra_dc_update_voltage_state(dc, state);
}
static void tegra_dc_stop(struct tegra_dc *dc)
@@ -1991,6 +2108,13 @@ static void tegra_crtc_atomic_disable(struct drm_crtc *crtc,
err = host1x_client_suspend(&dc->client);
if (err < 0)
dev_err(dc->dev, "failed to suspend: %d\n", err);
+
+ if (dc->has_opp_table) {
+ err = dev_pm_genpd_set_performance_state(dc->dev, 0);
+ if (err)
+ dev_err(dc->dev,
+ "failed to clear power domain state: %d\n", err);
+ }
}
static void tegra_crtc_atomic_enable(struct drm_crtc *crtc,
@@ -2002,6 +2126,9 @@ static void tegra_crtc_atomic_enable(struct drm_crtc *crtc,
u32 value;
int err;
+ /* apply PLL changes */
+ tegra_dc_set_clock_rate(dc, crtc_state);
+
err = host1x_client_resume(&dc->client);
if (err < 0) {
dev_err(dc->dev, "failed to resume: %d\n", err);
@@ -2076,8 +2203,11 @@ static void tegra_crtc_atomic_enable(struct drm_crtc *crtc,
else
tegra_dc_writel(dc, 0, DC_DISP_BORDER_COLOR);
- /* apply PLL and pixel clock changes */
- tegra_dc_commit_state(dc, crtc_state);
+ /* apply pixel clock changes */
+ if (!dc->soc->has_nvdisplay) {
+ value = SHIFT_CLK_DIVIDER(crtc_state->div) | PIXEL_CLK_DIVIDER_PCD1;
+ tegra_dc_writel(dc, value, DC_DISP_DISP_CLOCK_CONTROL);
+ }
/* program display mode */
tegra_dc_set_timings(dc, mode);
@@ -2107,6 +2237,12 @@ static void tegra_crtc_atomic_enable(struct drm_crtc *crtc,
tegra_dc_writel(dc, value, DC_COM_RG_UNDERFLOW);
}
+ if (dc->rgb) {
+ /* XXX: parameterize? */
+ value = SC0_H_QUALIFIER_NONE | SC1_H_QUALIFIER_NONE;
+ tegra_dc_writel(dc, value, DC_DISP_SHIFT_CLOCK_OPTIONS);
+ }
+
tegra_dc_commit(dc);
drm_crtc_vblank_on(crtc);
@@ -2685,6 +2821,7 @@ static const struct tegra_dc_soc_info tegra20_dc_soc_info = {
.has_win_b_vfilter_mem_client = true,
.has_win_c_without_vert_filter = true,
.plane_tiled_memory_bandwidth_x2 = false,
+ .has_pll_d2_out0 = false,
};
static const struct tegra_dc_soc_info tegra30_dc_soc_info = {
@@ -2707,6 +2844,7 @@ static const struct tegra_dc_soc_info tegra30_dc_soc_info = {
.has_win_b_vfilter_mem_client = true,
.has_win_c_without_vert_filter = false,
.plane_tiled_memory_bandwidth_x2 = true,
+ .has_pll_d2_out0 = true,
};
static const struct tegra_dc_soc_info tegra114_dc_soc_info = {
@@ -2729,6 +2867,7 @@ static const struct tegra_dc_soc_info tegra114_dc_soc_info = {
.has_win_b_vfilter_mem_client = false,
.has_win_c_without_vert_filter = false,
.plane_tiled_memory_bandwidth_x2 = true,
+ .has_pll_d2_out0 = true,
};
static const struct tegra_dc_soc_info tegra124_dc_soc_info = {
@@ -2751,6 +2890,7 @@ static const struct tegra_dc_soc_info tegra124_dc_soc_info = {
.has_win_b_vfilter_mem_client = false,
.has_win_c_without_vert_filter = false,
.plane_tiled_memory_bandwidth_x2 = false,
+ .has_pll_d2_out0 = true,
};
static const struct tegra_dc_soc_info tegra210_dc_soc_info = {
@@ -2773,6 +2913,7 @@ static const struct tegra_dc_soc_info tegra210_dc_soc_info = {
.has_win_b_vfilter_mem_client = false,
.has_win_c_without_vert_filter = false,
.plane_tiled_memory_bandwidth_x2 = false,
+ .has_pll_d2_out0 = true,
};
static const struct tegra_windowgroup_soc tegra186_dc_wgrps[] = {
@@ -2823,6 +2964,7 @@ static const struct tegra_dc_soc_info tegra186_dc_soc_info = {
.wgrps = tegra186_dc_wgrps,
.num_wgrps = ARRAY_SIZE(tegra186_dc_wgrps),
.plane_tiled_memory_bandwidth_x2 = false,
+ .has_pll_d2_out0 = false,
};
static const struct tegra_windowgroup_soc tegra194_dc_wgrps[] = {
@@ -2873,6 +3015,7 @@ static const struct tegra_dc_soc_info tegra194_dc_soc_info = {
.wgrps = tegra194_dc_wgrps,
.num_wgrps = ARRAY_SIZE(tegra194_dc_wgrps),
.plane_tiled_memory_bandwidth_x2 = false,
+ .has_pll_d2_out0 = false,
};
static const struct of_device_id tegra_dc_of_match[] = {
@@ -2973,6 +3116,23 @@ static int tegra_dc_couple(struct tegra_dc *dc)
return 0;
}
+static int tegra_dc_init_opp_table(struct tegra_dc *dc)
+{
+ struct tegra_core_opp_params opp_params = {};
+ int err;
+
+ err = devm_tegra_core_dev_init_opp_table(dc->dev, &opp_params);
+ if (err && err != -ENODEV)
+ return err;
+
+ if (err)
+ dc->has_opp_table = false;
+ else
+ dc->has_opp_table = true;
+
+ return 0;
+}
+
static int tegra_dc_probe(struct platform_device *pdev)
{
u64 dma_mask = dma_get_mask(pdev->dev.parent);
@@ -3038,6 +3198,10 @@ static int tegra_dc_probe(struct platform_device *pdev)
tegra_powergate_power_off(dc->powergate);
}
+ err = tegra_dc_init_opp_table(dc);
+ if (err < 0)
+ return err;
+
dc->regs = devm_platform_ioremap_resource(pdev, 0);
if (IS_ERR(dc->regs))
return PTR_ERR(dc->regs);
diff --git a/drivers/gpu/drm/tegra/dc.h b/drivers/gpu/drm/tegra/dc.h
index 40378308d527..3f91a10ea6c7 100644
--- a/drivers/gpu/drm/tegra/dc.h
+++ b/drivers/gpu/drm/tegra/dc.h
@@ -76,6 +76,7 @@ struct tegra_dc_soc_info {
bool has_win_b_vfilter_mem_client;
bool has_win_c_without_vert_filter;
bool plane_tiled_memory_bandwidth_x2;
+ bool has_pll_d2_out0;
};
struct tegra_dc {
@@ -100,6 +101,8 @@ struct tegra_dc {
struct drm_info_list *debugfs_files;
const struct tegra_dc_soc_info *soc;
+
+ bool has_opp_table;
};
static inline struct tegra_dc *
diff --git a/drivers/gpu/drm/tegra/drm.c b/drivers/gpu/drm/tegra/drm.c
index 8d37d6b00562..e9de91a4e7e8 100644
--- a/drivers/gpu/drm/tegra/drm.c
+++ b/drivers/gpu/drm/tegra/drm.c
@@ -10,6 +10,7 @@
#include <linux/iommu.h>
#include <linux/module.h>
#include <linux/platform_device.h>
+#include <linux/pm_runtime.h>
#include <drm/drm_aperture.h>
#include <drm/drm_atomic.h>
@@ -21,6 +22,10 @@
#include <drm/drm_prime.h>
#include <drm/drm_vblank.h>
+#if IS_ENABLED(CONFIG_ARM_DMA_USE_IOMMU)
+#include <asm/dma-iommu.h>
+#endif
+
#include "dc.h"
#include "drm.h"
#include "gem.h"
@@ -116,6 +121,7 @@ static int tegra_drm_open(struct drm_device *drm, struct drm_file *filp)
static void tegra_drm_context_free(struct tegra_drm_context *context)
{
context->client->ops->close_channel(context);
+ pm_runtime_put(context->client->base.dev);
kfree(context);
}
@@ -427,13 +433,20 @@ static int tegra_client_open(struct tegra_drm_file *fpriv,
{
int err;
+ err = pm_runtime_resume_and_get(client->base.dev);
+ if (err)
+ return err;
+
err = client->ops->open_channel(client, context);
- if (err < 0)
+ if (err < 0) {
+ pm_runtime_put(client->base.dev);
return err;
+ }
err = idr_alloc(&fpriv->legacy_contexts, context, 1, 0, GFP_KERNEL);
if (err < 0) {
client->ops->close_channel(context);
+ pm_runtime_put(client->base.dev);
return err;
}
@@ -936,6 +949,17 @@ int host1x_client_iommu_attach(struct host1x_client *client)
struct iommu_group *group = NULL;
int err;
+#if IS_ENABLED(CONFIG_ARM_DMA_USE_IOMMU)
+ if (client->dev->archdata.mapping) {
+ struct dma_iommu_mapping *mapping =
+ to_dma_iommu_mapping(client->dev);
+ arm_iommu_detach_device(client->dev);
+ arm_iommu_release_mapping(mapping);
+
+ domain = iommu_get_domain_for_dev(client->dev);
+ }
+#endif
+
/*
* If the host1x client is already attached to an IOMMU domain that is
* not the shared IOMMU domain, don't try to attach it to a different
@@ -1344,15 +1368,18 @@ static const struct of_device_id host1x_drm_subdevs[] = {
{ .compatible = "nvidia,tegra210-sor", },
{ .compatible = "nvidia,tegra210-sor1", },
{ .compatible = "nvidia,tegra210-vic", },
+ { .compatible = "nvidia,tegra210-nvdec", },
{ .compatible = "nvidia,tegra186-display", },
{ .compatible = "nvidia,tegra186-dc", },
{ .compatible = "nvidia,tegra186-sor", },
{ .compatible = "nvidia,tegra186-sor1", },
{ .compatible = "nvidia,tegra186-vic", },
+ { .compatible = "nvidia,tegra186-nvdec", },
{ .compatible = "nvidia,tegra194-display", },
{ .compatible = "nvidia,tegra194-dc", },
{ .compatible = "nvidia,tegra194-sor", },
{ .compatible = "nvidia,tegra194-vic", },
+ { .compatible = "nvidia,tegra194-nvdec", },
{ /* sentinel */ }
};
@@ -1376,6 +1403,7 @@ static struct platform_driver * const drivers[] = {
&tegra_gr2d_driver,
&tegra_gr3d_driver,
&tegra_vic_driver,
+ &tegra_nvdec_driver,
};
static int __init host1x_drm_init(void)
diff --git a/drivers/gpu/drm/tegra/drm.h b/drivers/gpu/drm/tegra/drm.h
index 8b28327c931c..fc0a19554eac 100644
--- a/drivers/gpu/drm/tegra/drm.h
+++ b/drivers/gpu/drm/tegra/drm.h
@@ -202,5 +202,6 @@ extern struct platform_driver tegra_sor_driver;
extern struct platform_driver tegra_gr2d_driver;
extern struct platform_driver tegra_gr3d_driver;
extern struct platform_driver tegra_vic_driver;
+extern struct platform_driver tegra_nvdec_driver;
#endif /* HOST1X_DRM_H */
diff --git a/drivers/gpu/drm/tegra/gem.c b/drivers/gpu/drm/tegra/gem.c
index d38fd7e12b57..fce0e52973c2 100644
--- a/drivers/gpu/drm/tegra/gem.c
+++ b/drivers/gpu/drm/tegra/gem.c
@@ -23,86 +23,97 @@
MODULE_IMPORT_NS(DMA_BUF);
-static void tegra_bo_put(struct host1x_bo *bo)
+static unsigned int sg_dma_count_chunks(struct scatterlist *sgl, unsigned int nents)
{
- struct tegra_bo *obj = host1x_to_tegra_bo(bo);
+ dma_addr_t next = ~(dma_addr_t)0;
+ unsigned int count = 0, i;
+ struct scatterlist *s;
- drm_gem_object_put(&obj->gem);
-}
+ for_each_sg(sgl, s, nents, i) {
+ /* sg_dma_address(s) is only valid for entries that have sg_dma_len(s) != 0. */
+ if (!sg_dma_len(s))
+ continue;
-/* XXX move this into lib/scatterlist.c? */
-static int sg_alloc_table_from_sg(struct sg_table *sgt, struct scatterlist *sg,
- unsigned int nents, gfp_t gfp_mask)
-{
- struct scatterlist *dst;
- unsigned int i;
- int err;
+ if (sg_dma_address(s) != next) {
+ next = sg_dma_address(s) + sg_dma_len(s);
+ count++;
+ }
+ }
- err = sg_alloc_table(sgt, nents, gfp_mask);
- if (err < 0)
- return err;
+ return count;
+}
- dst = sgt->sgl;
+static inline unsigned int sgt_dma_count_chunks(struct sg_table *sgt)
+{
+ return sg_dma_count_chunks(sgt->sgl, sgt->nents);
+}
- for (i = 0; i < nents; i++) {
- sg_set_page(dst, sg_page(sg), sg->length, 0);
- dst = sg_next(dst);
- sg = sg_next(sg);
- }
+static void tegra_bo_put(struct host1x_bo *bo)
+{
+ struct tegra_bo *obj = host1x_to_tegra_bo(bo);
- return 0;
+ drm_gem_object_put(&obj->gem);
}
-static struct sg_table *tegra_bo_pin(struct device *dev, struct host1x_bo *bo,
- dma_addr_t *phys)
+static struct host1x_bo_mapping *tegra_bo_pin(struct device *dev, struct host1x_bo *bo,
+ enum dma_data_direction direction)
{
struct tegra_bo *obj = host1x_to_tegra_bo(bo);
- struct sg_table *sgt;
+ struct drm_gem_object *gem = &obj->gem;
+ struct host1x_bo_mapping *map;
int err;
+ map = kzalloc(sizeof(*map), GFP_KERNEL);
+ if (!map)
+ return ERR_PTR(-ENOMEM);
+
+ kref_init(&map->ref);
+ map->bo = host1x_bo_get(bo);
+ map->direction = direction;
+ map->dev = dev;
+
/*
- * If we've manually mapped the buffer object through the IOMMU, make
- * sure to return the IOVA address of our mapping.
- *
- * Similarly, for buffers that have been allocated by the DMA API the
- * physical address can be used for devices that are not attached to
- * an IOMMU. For these devices, callers must pass a valid pointer via
- * the @phys argument.
- *
- * Imported buffers were also already mapped at import time, so the
- * existing mapping can be reused.
+ * Imported buffers need special treatment to satisfy the semantics of DMA-BUF.
*/
- if (phys) {
- *phys = obj->iova;
- return NULL;
+ if (gem->import_attach) {
+ struct dma_buf *buf = gem->import_attach->dmabuf;
+
+ map->attach = dma_buf_attach(buf, dev);
+ if (IS_ERR(map->attach)) {
+ err = PTR_ERR(map->attach);
+ goto free;
+ }
+
+ map->sgt = dma_buf_map_attachment(map->attach, direction);
+ if (IS_ERR(map->sgt)) {
+ dma_buf_detach(buf, map->attach);
+ err = PTR_ERR(map->sgt);
+ goto free;
+ }
+
+ err = sgt_dma_count_chunks(map->sgt);
+ map->size = gem->size;
+
+ goto out;
}
/*
* If we don't have a mapping for this buffer yet, return an SG table
* so that host1x can do the mapping for us via the DMA API.
*/
- sgt = kzalloc(sizeof(*sgt), GFP_KERNEL);
- if (!sgt)
- return ERR_PTR(-ENOMEM);
+ map->sgt = kzalloc(sizeof(*map->sgt), GFP_KERNEL);
+ if (!map->sgt) {
+ err = -ENOMEM;
+ goto free;
+ }
if (obj->pages) {
/*
* If the buffer object was allocated from the explicit IOMMU
* API code paths, construct an SG table from the pages.
*/
- err = sg_alloc_table_from_pages(sgt, obj->pages, obj->num_pages,
- 0, obj->gem.size, GFP_KERNEL);
- if (err < 0)
- goto free;
- } else if (obj->sgt) {
- /*
- * If the buffer object already has an SG table but no pages
- * were allocated for it, it means the buffer was imported and
- * the SG table needs to be copied to avoid overwriting any
- * other potential users of the original SG table.
- */
- err = sg_alloc_table_from_sg(sgt, obj->sgt->sgl,
- obj->sgt->orig_nents, GFP_KERNEL);
+ err = sg_alloc_table_from_pages(map->sgt, obj->pages, obj->num_pages, 0, gem->size,
+ GFP_KERNEL);
if (err < 0)
goto free;
} else {
@@ -111,25 +122,53 @@ static struct sg_table *tegra_bo_pin(struct device *dev, struct host1x_bo *bo,
* not imported, it had to be allocated with the DMA API, so
* the DMA API helper can be used.
*/
- err = dma_get_sgtable(dev, sgt, obj->vaddr, obj->iova,
- obj->gem.size);
+ err = dma_get_sgtable(dev, map->sgt, obj->vaddr, obj->iova, gem->size);
if (err < 0)
goto free;
}
- return sgt;
+ err = dma_map_sgtable(dev, map->sgt, direction, 0);
+ if (err)
+ goto free_sgt;
+
+out:
+ /*
+ * If we've manually mapped the buffer object through the IOMMU, make sure to return the
+ * existing IOVA address of our mapping.
+ */
+ if (!obj->mm) {
+ map->phys = sg_dma_address(map->sgt->sgl);
+ map->chunks = err;
+ } else {
+ map->phys = obj->iova;
+ map->chunks = 1;
+ }
+
+ map->size = gem->size;
+ return map;
+
+free_sgt:
+ sg_free_table(map->sgt);
free:
- kfree(sgt);
+ kfree(map->sgt);
+ kfree(map);
return ERR_PTR(err);
}
-static void tegra_bo_unpin(struct device *dev, struct sg_table *sgt)
+static void tegra_bo_unpin(struct host1x_bo_mapping *map)
{
- if (sgt) {
- sg_free_table(sgt);
- kfree(sgt);
+ if (map->attach) {
+ dma_buf_unmap_attachment(map->attach, map->sgt, map->direction);
+ dma_buf_detach(map->attach->dmabuf, map->attach);
+ } else {
+ dma_unmap_sgtable(map->dev, map->sgt, map->direction, 0);
+ sg_free_table(map->sgt);
+ kfree(map->sgt);
}
+
+ host1x_bo_put(map->bo);
+ kfree(map);
}
static void *tegra_bo_mmap(struct host1x_bo *bo)
@@ -452,8 +491,18 @@ free:
void tegra_bo_free_object(struct drm_gem_object *gem)
{
struct tegra_drm *tegra = gem->dev->dev_private;
+ struct host1x_bo_mapping *mapping, *tmp;
struct tegra_bo *bo = to_tegra_bo(gem);
+ /* remove all mappings of this buffer object from any caches */
+ list_for_each_entry_safe(mapping, tmp, &bo->base.mappings, list) {
+ if (mapping->cache)
+ host1x_bo_unpin(mapping);
+ else
+ dev_err(gem->dev->dev, "mapping %p stale for device %s\n", mapping,
+ dev_name(mapping->dev));
+ }
+
if (tegra->domain)
tegra_bo_iommu_unmap(tegra, bo);
diff --git a/drivers/gpu/drm/tegra/gr2d.c b/drivers/gpu/drm/tegra/gr2d.c
index de288cba3905..e3bb4c99ed39 100644
--- a/drivers/gpu/drm/tegra/gr2d.c
+++ b/drivers/gpu/drm/tegra/gr2d.c
@@ -4,14 +4,25 @@
*/
#include <linux/clk.h>
+#include <linux/delay.h>
#include <linux/iommu.h>
#include <linux/module.h>
#include <linux/of_device.h>
+#include <linux/pm_runtime.h>
+#include <linux/reset.h>
+
+#include <soc/tegra/common.h>
#include "drm.h"
#include "gem.h"
#include "gr2d.h"
+enum {
+ RST_MC,
+ RST_GR2D,
+ RST_GR2D_MAX,
+};
+
struct gr2d_soc {
unsigned int version;
};
@@ -21,6 +32,9 @@ struct gr2d {
struct host1x_channel *channel;
struct clk *clk;
+ struct reset_control_bulk_data resets[RST_GR2D_MAX];
+ unsigned int nresets;
+
const struct gr2d_soc *soc;
DECLARE_BITMAP(addr_regs, GR2D_NUM_REGS);
@@ -56,15 +70,22 @@ static int gr2d_init(struct host1x_client *client)
goto free;
}
+ pm_runtime_enable(client->dev);
+ pm_runtime_use_autosuspend(client->dev);
+ pm_runtime_set_autosuspend_delay(client->dev, 200);
+
err = tegra_drm_register_client(dev->dev_private, drm);
if (err < 0) {
dev_err(client->dev, "failed to register client: %d\n", err);
- goto detach;
+ goto disable_rpm;
}
return 0;
-detach:
+disable_rpm:
+ pm_runtime_dont_use_autosuspend(client->dev);
+ pm_runtime_force_suspend(client->dev);
+
host1x_client_iommu_detach(client);
free:
host1x_syncpt_put(client->syncpts[0]);
@@ -85,10 +106,15 @@ static int gr2d_exit(struct host1x_client *client)
if (err < 0)
return err;
+ pm_runtime_dont_use_autosuspend(client->dev);
+ pm_runtime_force_suspend(client->dev);
+
host1x_client_iommu_detach(client);
host1x_syncpt_put(client->syncpts[0]);
host1x_channel_put(gr2d->channel);
+ gr2d->channel = NULL;
+
return 0;
}
@@ -190,6 +216,27 @@ static const u32 gr2d_addr_regs[] = {
GR2D_VA_BASE_ADDR_SB,
};
+static int gr2d_get_resets(struct device *dev, struct gr2d *gr2d)
+{
+ int err;
+
+ gr2d->resets[RST_MC].id = "mc";
+ gr2d->resets[RST_GR2D].id = "2d";
+ gr2d->nresets = RST_GR2D_MAX;
+
+ err = devm_reset_control_bulk_get_optional_exclusive_released(
+ dev, gr2d->nresets, gr2d->resets);
+ if (err) {
+ dev_err(dev, "failed to get reset: %d\n", err);
+ return err;
+ }
+
+ if (WARN_ON(!gr2d->resets[RST_GR2D].rstc))
+ return -ENOENT;
+
+ return 0;
+}
+
static int gr2d_probe(struct platform_device *pdev)
{
struct device *dev = &pdev->dev;
@@ -202,6 +249,8 @@ static int gr2d_probe(struct platform_device *pdev)
if (!gr2d)
return -ENOMEM;
+ platform_set_drvdata(pdev, gr2d);
+
gr2d->soc = of_device_get_match_data(dev);
syncpts = devm_kzalloc(dev, sizeof(*syncpts), GFP_KERNEL);
@@ -214,11 +263,9 @@ static int gr2d_probe(struct platform_device *pdev)
return PTR_ERR(gr2d->clk);
}
- err = clk_prepare_enable(gr2d->clk);
- if (err) {
- dev_err(dev, "cannot turn on clock\n");
+ err = gr2d_get_resets(dev, gr2d);
+ if (err)
return err;
- }
INIT_LIST_HEAD(&gr2d->client.base.list);
gr2d->client.base.ops = &gr2d_client_ops;
@@ -231,10 +278,13 @@ static int gr2d_probe(struct platform_device *pdev)
gr2d->client.version = gr2d->soc->version;
gr2d->client.ops = &gr2d_ops;
+ err = devm_tegra_core_dev_init_opp_table_common(dev);
+ if (err)
+ return err;
+
err = host1x_client_register(&gr2d->client.base);
if (err < 0) {
dev_err(dev, "failed to register host1x client: %d\n", err);
- clk_disable_unprepare(gr2d->clk);
return err;
}
@@ -242,8 +292,6 @@ static int gr2d_probe(struct platform_device *pdev)
for (i = 0; i < ARRAY_SIZE(gr2d_addr_regs); i++)
set_bit(gr2d_addr_regs[i], gr2d->addr_regs);
- platform_set_drvdata(pdev, gr2d);
-
return 0;
}
@@ -259,15 +307,100 @@ static int gr2d_remove(struct platform_device *pdev)
return err;
}
+ return 0;
+}
+
+static int __maybe_unused gr2d_runtime_suspend(struct device *dev)
+{
+ struct gr2d *gr2d = dev_get_drvdata(dev);
+ int err;
+
+ host1x_channel_stop(gr2d->channel);
+ reset_control_bulk_release(gr2d->nresets, gr2d->resets);
+
+ /*
+ * GR2D module shouldn't be reset while hardware is idling, otherwise
+ * host1x's cmdproc will stuck on trying to access any G2 register
+ * after reset. GR2D module could be either hot-reset or reset after
+ * power-gating of the HEG partition. Hence we will put in reset only
+ * the memory client part of the module, the HEG GENPD will take care
+ * of resetting GR2D module across power-gating.
+ *
+ * On Tegra20 there is no HEG partition, but it's okay to have
+ * undetermined h/w state since userspace is expected to reprogram
+ * the state on each job submission anyways.
+ */
+ err = reset_control_acquire(gr2d->resets[RST_MC].rstc);
+ if (err) {
+ dev_err(dev, "failed to acquire MC reset: %d\n", err);
+ goto acquire_reset;
+ }
+
+ err = reset_control_assert(gr2d->resets[RST_MC].rstc);
+ reset_control_release(gr2d->resets[RST_MC].rstc);
+ if (err) {
+ dev_err(dev, "failed to assert MC reset: %d\n", err);
+ goto acquire_reset;
+ }
+
clk_disable_unprepare(gr2d->clk);
return 0;
+
+acquire_reset:
+ reset_control_bulk_acquire(gr2d->nresets, gr2d->resets);
+ reset_control_bulk_deassert(gr2d->nresets, gr2d->resets);
+
+ return err;
}
+static int __maybe_unused gr2d_runtime_resume(struct device *dev)
+{
+ struct gr2d *gr2d = dev_get_drvdata(dev);
+ int err;
+
+ err = reset_control_bulk_acquire(gr2d->nresets, gr2d->resets);
+ if (err) {
+ dev_err(dev, "failed to acquire reset: %d\n", err);
+ return err;
+ }
+
+ err = clk_prepare_enable(gr2d->clk);
+ if (err) {
+ dev_err(dev, "failed to enable clock: %d\n", err);
+ goto release_reset;
+ }
+
+ usleep_range(2000, 4000);
+
+ /* this is a reset array which deasserts both 2D MC and 2D itself */
+ err = reset_control_bulk_deassert(gr2d->nresets, gr2d->resets);
+ if (err) {
+ dev_err(dev, "failed to deassert reset: %d\n", err);
+ goto disable_clk;
+ }
+
+ return 0;
+
+disable_clk:
+ clk_disable_unprepare(gr2d->clk);
+release_reset:
+ reset_control_bulk_release(gr2d->nresets, gr2d->resets);
+
+ return err;
+}
+
+static const struct dev_pm_ops tegra_gr2d_pm = {
+ SET_RUNTIME_PM_OPS(gr2d_runtime_suspend, gr2d_runtime_resume, NULL)
+ SET_SYSTEM_SLEEP_PM_OPS(pm_runtime_force_suspend,
+ pm_runtime_force_resume)
+};
+
struct platform_driver tegra_gr2d_driver = {
.driver = {
.name = "tegra-gr2d",
.of_match_table = gr2d_match,
+ .pm = &tegra_gr2d_pm,
},
.probe = gr2d_probe,
.remove = gr2d_remove,
diff --git a/drivers/gpu/drm/tegra/gr3d.c b/drivers/gpu/drm/tegra/gr3d.c
index 24442ade0da3..a1fd3113ea96 100644
--- a/drivers/gpu/drm/tegra/gr3d.c
+++ b/drivers/gpu/drm/tegra/gr3d.c
@@ -5,32 +5,47 @@
*/
#include <linux/clk.h>
+#include <linux/delay.h>
#include <linux/host1x.h>
#include <linux/iommu.h>
#include <linux/module.h>
#include <linux/of_device.h>
#include <linux/platform_device.h>
+#include <linux/pm_domain.h>
+#include <linux/pm_opp.h>
+#include <linux/pm_runtime.h>
#include <linux/reset.h>
+#include <soc/tegra/common.h>
#include <soc/tegra/pmc.h>
#include "drm.h"
#include "gem.h"
#include "gr3d.h"
+enum {
+ RST_MC,
+ RST_GR3D,
+ RST_MC2,
+ RST_GR3D2,
+ RST_GR3D_MAX,
+};
+
struct gr3d_soc {
unsigned int version;
+ unsigned int num_clocks;
+ unsigned int num_resets;
};
struct gr3d {
struct tegra_drm_client client;
struct host1x_channel *channel;
- struct clk *clk_secondary;
- struct clk *clk;
- struct reset_control *rst_secondary;
- struct reset_control *rst;
const struct gr3d_soc *soc;
+ struct clk_bulk_data *clocks;
+ unsigned int nclocks;
+ struct reset_control_bulk_data resets[RST_GR3D_MAX];
+ unsigned int nresets;
DECLARE_BITMAP(addr_regs, GR3D_NUM_REGS);
};
@@ -65,15 +80,22 @@ static int gr3d_init(struct host1x_client *client)
goto free;
}
+ pm_runtime_enable(client->dev);
+ pm_runtime_use_autosuspend(client->dev);
+ pm_runtime_set_autosuspend_delay(client->dev, 200);
+
err = tegra_drm_register_client(dev->dev_private, drm);
if (err < 0) {
dev_err(client->dev, "failed to register client: %d\n", err);
- goto detach;
+ goto disable_rpm;
}
return 0;
-detach:
+disable_rpm:
+ pm_runtime_dont_use_autosuspend(client->dev);
+ pm_runtime_force_suspend(client->dev);
+
host1x_client_iommu_detach(client);
free:
host1x_syncpt_put(client->syncpts[0]);
@@ -93,10 +115,15 @@ static int gr3d_exit(struct host1x_client *client)
if (err < 0)
return err;
+ pm_runtime_dont_use_autosuspend(client->dev);
+ pm_runtime_force_suspend(client->dev);
+
host1x_client_iommu_detach(client);
host1x_syncpt_put(client->syncpts[0]);
host1x_channel_put(gr3d->channel);
+ gr3d->channel = NULL;
+
return 0;
}
@@ -155,14 +182,20 @@ static const struct tegra_drm_client_ops gr3d_ops = {
static const struct gr3d_soc tegra20_gr3d_soc = {
.version = 0x20,
+ .num_clocks = 1,
+ .num_resets = 2,
};
static const struct gr3d_soc tegra30_gr3d_soc = {
.version = 0x30,
+ .num_clocks = 2,
+ .num_resets = 4,
};
static const struct gr3d_soc tegra114_gr3d_soc = {
.version = 0x35,
+ .num_clocks = 1,
+ .num_resets = 2,
};
static const struct of_device_id tegra_gr3d_match[] = {
@@ -278,69 +311,216 @@ static const u32 gr3d_addr_regs[] = {
GR3D_GLOBAL_SAMP23SURFADDR(15),
};
-static int gr3d_probe(struct platform_device *pdev)
+static int gr3d_power_up_legacy_domain(struct device *dev, const char *name,
+ unsigned int id)
{
- struct device_node *np = pdev->dev.of_node;
- struct host1x_syncpt **syncpts;
- struct gr3d *gr3d;
+ struct gr3d *gr3d = dev_get_drvdata(dev);
+ struct reset_control *reset;
+ struct clk *clk;
unsigned int i;
int err;
- gr3d = devm_kzalloc(&pdev->dev, sizeof(*gr3d), GFP_KERNEL);
- if (!gr3d)
- return -ENOMEM;
-
- gr3d->soc = of_device_get_match_data(&pdev->dev);
+ /*
+ * Tegra20 device-tree doesn't specify 3d clock name and there is only
+ * one clock for Tegra20. Tegra30+ device-trees always specified names
+ * for the clocks.
+ */
+ if (gr3d->nclocks == 1) {
+ if (id == TEGRA_POWERGATE_3D1)
+ return 0;
+
+ clk = gr3d->clocks[0].clk;
+ } else {
+ for (i = 0; i < gr3d->nclocks; i++) {
+ if (WARN_ON(!gr3d->clocks[i].id))
+ continue;
+
+ if (!strcmp(gr3d->clocks[i].id, name)) {
+ clk = gr3d->clocks[i].clk;
+ break;
+ }
+ }
- syncpts = devm_kzalloc(&pdev->dev, sizeof(*syncpts), GFP_KERNEL);
- if (!syncpts)
- return -ENOMEM;
+ if (WARN_ON(i == gr3d->nclocks))
+ return -EINVAL;
+ }
- gr3d->clk = devm_clk_get(&pdev->dev, NULL);
- if (IS_ERR(gr3d->clk)) {
- dev_err(&pdev->dev, "cannot get clock\n");
- return PTR_ERR(gr3d->clk);
+ /*
+ * We use array of resets, which includes MC resets, and MC
+ * reset shouldn't be asserted while hardware is gated because
+ * MC flushing will fail for gated hardware. Hence for legacy
+ * PD we request the individual reset separately.
+ */
+ reset = reset_control_get_exclusive_released(dev, name);
+ if (IS_ERR(reset))
+ return PTR_ERR(reset);
+
+ err = reset_control_acquire(reset);
+ if (err) {
+ dev_err(dev, "failed to acquire %s reset: %d\n", name, err);
+ } else {
+ err = tegra_powergate_sequence_power_up(id, clk, reset);
+ reset_control_release(reset);
}
- gr3d->rst = devm_reset_control_get(&pdev->dev, "3d");
- if (IS_ERR(gr3d->rst)) {
- dev_err(&pdev->dev, "cannot get reset\n");
- return PTR_ERR(gr3d->rst);
+ reset_control_put(reset);
+ if (err)
+ return err;
+
+ /*
+ * tegra_powergate_sequence_power_up() leaves clocks enabled,
+ * while GENPD not. Hence keep clock-enable balanced.
+ */
+ clk_disable_unprepare(clk);
+
+ return 0;
+}
+
+static void gr3d_del_link(void *link)
+{
+ device_link_del(link);
+}
+
+static int gr3d_init_power(struct device *dev, struct gr3d *gr3d)
+{
+ static const char * const opp_genpd_names[] = { "3d0", "3d1", NULL };
+ const u32 link_flags = DL_FLAG_STATELESS | DL_FLAG_PM_RUNTIME;
+ struct device **opp_virt_devs, *pd_dev;
+ struct device_link *link;
+ unsigned int i;
+ int err;
+
+ err = of_count_phandle_with_args(dev->of_node, "power-domains",
+ "#power-domain-cells");
+ if (err < 0) {
+ if (err != -ENOENT)
+ return err;
+
+ /*
+ * Older device-trees don't use GENPD. In this case we should
+ * toggle power domain manually.
+ */
+ err = gr3d_power_up_legacy_domain(dev, "3d",
+ TEGRA_POWERGATE_3D);
+ if (err)
+ return err;
+
+ err = gr3d_power_up_legacy_domain(dev, "3d2",
+ TEGRA_POWERGATE_3D1);
+ if (err)
+ return err;
+
+ return 0;
}
- if (of_device_is_compatible(np, "nvidia,tegra30-gr3d")) {
- gr3d->clk_secondary = devm_clk_get(&pdev->dev, "3d2");
- if (IS_ERR(gr3d->clk_secondary)) {
- dev_err(&pdev->dev, "cannot get secondary clock\n");
- return PTR_ERR(gr3d->clk_secondary);
+ /*
+ * The PM domain core automatically attaches a single power domain,
+ * otherwise it skips attaching completely. We have a single domain
+ * on Tegra20 and two domains on Tegra30+.
+ */
+ if (dev->pm_domain)
+ return 0;
+
+ err = devm_pm_opp_attach_genpd(dev, opp_genpd_names, &opp_virt_devs);
+ if (err)
+ return err;
+
+ for (i = 0; opp_genpd_names[i]; i++) {
+ pd_dev = opp_virt_devs[i];
+ if (!pd_dev) {
+ dev_err(dev, "failed to get %s power domain\n",
+ opp_genpd_names[i]);
+ return -EINVAL;
}
- gr3d->rst_secondary = devm_reset_control_get(&pdev->dev,
- "3d2");
- if (IS_ERR(gr3d->rst_secondary)) {
- dev_err(&pdev->dev, "cannot get secondary reset\n");
- return PTR_ERR(gr3d->rst_secondary);
+ link = device_link_add(dev, pd_dev, link_flags);
+ if (!link) {
+ dev_err(dev, "failed to link to %s\n", dev_name(pd_dev));
+ return -EINVAL;
}
+
+ err = devm_add_action_or_reset(dev, gr3d_del_link, link);
+ if (err)
+ return err;
}
- err = tegra_powergate_sequence_power_up(TEGRA_POWERGATE_3D, gr3d->clk,
- gr3d->rst);
+ return 0;
+}
+
+static int gr3d_get_clocks(struct device *dev, struct gr3d *gr3d)
+{
+ int err;
+
+ err = devm_clk_bulk_get_all(dev, &gr3d->clocks);
if (err < 0) {
- dev_err(&pdev->dev, "failed to power up 3D unit\n");
+ dev_err(dev, "failed to get clock: %d\n", err);
return err;
}
+ gr3d->nclocks = err;
- if (gr3d->clk_secondary) {
- err = tegra_powergate_sequence_power_up(TEGRA_POWERGATE_3D1,
- gr3d->clk_secondary,
- gr3d->rst_secondary);
- if (err < 0) {
- dev_err(&pdev->dev,
- "failed to power up secondary 3D unit\n");
- return err;
- }
+ if (gr3d->nclocks != gr3d->soc->num_clocks) {
+ dev_err(dev, "invalid number of clocks: %u\n", gr3d->nclocks);
+ return -ENOENT;
+ }
+
+ return 0;
+}
+
+static int gr3d_get_resets(struct device *dev, struct gr3d *gr3d)
+{
+ int err;
+
+ gr3d->resets[RST_MC].id = "mc";
+ gr3d->resets[RST_MC2].id = "mc2";
+ gr3d->resets[RST_GR3D].id = "3d";
+ gr3d->resets[RST_GR3D2].id = "3d2";
+ gr3d->nresets = gr3d->soc->num_resets;
+
+ err = devm_reset_control_bulk_get_optional_exclusive_released(
+ dev, gr3d->nresets, gr3d->resets);
+ if (err) {
+ dev_err(dev, "failed to get reset: %d\n", err);
+ return err;
}
+ if (WARN_ON(!gr3d->resets[RST_GR3D].rstc) ||
+ WARN_ON(!gr3d->resets[RST_GR3D2].rstc && gr3d->nresets == 4))
+ return -ENOENT;
+
+ return 0;
+}
+
+static int gr3d_probe(struct platform_device *pdev)
+{
+ struct host1x_syncpt **syncpts;
+ struct gr3d *gr3d;
+ unsigned int i;
+ int err;
+
+ gr3d = devm_kzalloc(&pdev->dev, sizeof(*gr3d), GFP_KERNEL);
+ if (!gr3d)
+ return -ENOMEM;
+
+ platform_set_drvdata(pdev, gr3d);
+
+ gr3d->soc = of_device_get_match_data(&pdev->dev);
+
+ syncpts = devm_kzalloc(&pdev->dev, sizeof(*syncpts), GFP_KERNEL);
+ if (!syncpts)
+ return -ENOMEM;
+
+ err = gr3d_get_clocks(&pdev->dev, gr3d);
+ if (err)
+ return err;
+
+ err = gr3d_get_resets(&pdev->dev, gr3d);
+ if (err)
+ return err;
+
+ err = gr3d_init_power(&pdev->dev, gr3d);
+ if (err)
+ return err;
+
INIT_LIST_HEAD(&gr3d->client.base.list);
gr3d->client.base.ops = &gr3d_client_ops;
gr3d->client.base.dev = &pdev->dev;
@@ -352,6 +532,10 @@ static int gr3d_probe(struct platform_device *pdev)
gr3d->client.version = gr3d->soc->version;
gr3d->client.ops = &gr3d_ops;
+ err = devm_tegra_core_dev_init_opp_table_common(&pdev->dev);
+ if (err)
+ return err;
+
err = host1x_client_register(&gr3d->client.base);
if (err < 0) {
dev_err(&pdev->dev, "failed to register host1x client: %d\n",
@@ -363,8 +547,6 @@ static int gr3d_probe(struct platform_device *pdev)
for (i = 0; i < ARRAY_SIZE(gr3d_addr_regs); i++)
set_bit(gr3d_addr_regs[i], gr3d->addr_regs);
- platform_set_drvdata(pdev, gr3d);
-
return 0;
}
@@ -380,23 +562,80 @@ static int gr3d_remove(struct platform_device *pdev)
return err;
}
- if (gr3d->clk_secondary) {
- reset_control_assert(gr3d->rst_secondary);
- tegra_powergate_power_off(TEGRA_POWERGATE_3D1);
- clk_disable_unprepare(gr3d->clk_secondary);
+ return 0;
+}
+
+static int __maybe_unused gr3d_runtime_suspend(struct device *dev)
+{
+ struct gr3d *gr3d = dev_get_drvdata(dev);
+ int err;
+
+ host1x_channel_stop(gr3d->channel);
+
+ err = reset_control_bulk_assert(gr3d->nresets, gr3d->resets);
+ if (err) {
+ dev_err(dev, "failed to assert reset: %d\n", err);
+ return err;
+ }
+
+ usleep_range(10, 20);
+
+ /*
+ * Older device-trees don't specify MC resets and power-gating can't
+ * be done safely in that case. Hence we will keep the power ungated
+ * for older DTBs. For newer DTBs, GENPD will perform the power-gating.
+ */
+
+ clk_bulk_disable_unprepare(gr3d->nclocks, gr3d->clocks);
+ reset_control_bulk_release(gr3d->nresets, gr3d->resets);
+
+ return 0;
+}
+
+static int __maybe_unused gr3d_runtime_resume(struct device *dev)
+{
+ struct gr3d *gr3d = dev_get_drvdata(dev);
+ int err;
+
+ err = reset_control_bulk_acquire(gr3d->nresets, gr3d->resets);
+ if (err) {
+ dev_err(dev, "failed to acquire reset: %d\n", err);
+ return err;
+ }
+
+ err = clk_bulk_prepare_enable(gr3d->nclocks, gr3d->clocks);
+ if (err) {
+ dev_err(dev, "failed to enable clock: %d\n", err);
+ goto release_reset;
}
- reset_control_assert(gr3d->rst);
- tegra_powergate_power_off(TEGRA_POWERGATE_3D);
- clk_disable_unprepare(gr3d->clk);
+ err = reset_control_bulk_deassert(gr3d->nresets, gr3d->resets);
+ if (err) {
+ dev_err(dev, "failed to deassert reset: %d\n", err);
+ goto disable_clk;
+ }
return 0;
+
+disable_clk:
+ clk_bulk_disable_unprepare(gr3d->nclocks, gr3d->clocks);
+release_reset:
+ reset_control_bulk_release(gr3d->nresets, gr3d->resets);
+
+ return err;
}
+static const struct dev_pm_ops tegra_gr3d_pm = {
+ SET_RUNTIME_PM_OPS(gr3d_runtime_suspend, gr3d_runtime_resume, NULL)
+ SET_SYSTEM_SLEEP_PM_OPS(pm_runtime_force_suspend,
+ pm_runtime_force_resume)
+};
+
struct platform_driver tegra_gr3d_driver = {
.driver = {
.name = "tegra-gr3d",
.of_match_table = tegra_gr3d_match,
+ .pm = &tegra_gr3d_pm,
},
.probe = gr3d_probe,
.remove = gr3d_remove,
diff --git a/drivers/gpu/drm/tegra/hdmi.c b/drivers/gpu/drm/tegra/hdmi.c
index e5d2a4026028..8845af5d325f 100644
--- a/drivers/gpu/drm/tegra/hdmi.c
+++ b/drivers/gpu/drm/tegra/hdmi.c
@@ -11,10 +11,14 @@
#include <linux/math64.h>
#include <linux/module.h>
#include <linux/of_device.h>
+#include <linux/pm_opp.h>
#include <linux/pm_runtime.h>
#include <linux/regulator/consumer.h>
#include <linux/reset.h>
+#include <soc/tegra/common.h>
+#include <sound/hdmi-codec.h>
+
#include <drm/drm_atomic_helper.h>
#include <drm/drm_crtc.h>
#include <drm/drm_debugfs.h>
@@ -78,6 +82,9 @@ struct tegra_hdmi {
bool dvi;
struct drm_info_list *debugfs_files;
+
+ struct platform_device *audio_pdev;
+ struct mutex audio_lock;
};
static inline struct tegra_hdmi *
@@ -360,6 +367,18 @@ static const struct tmds_config tegra124_tmds_config[] = {
},
};
+static void tegra_hdmi_audio_lock(struct tegra_hdmi *hdmi)
+{
+ mutex_lock(&hdmi->audio_lock);
+ disable_irq(hdmi->irq);
+}
+
+static void tegra_hdmi_audio_unlock(struct tegra_hdmi *hdmi)
+{
+ enable_irq(hdmi->irq);
+ mutex_unlock(&hdmi->audio_lock);
+}
+
static int
tegra_hdmi_get_audio_config(unsigned int audio_freq, unsigned int pix_clock,
struct tegra_hdmi_audio_config *config)
@@ -829,6 +848,23 @@ static void tegra_hdmi_setup_tmds(struct tegra_hdmi *hdmi,
HDMI_NV_PDISP_SOR_IO_PEAK_CURRENT);
}
+static int tegra_hdmi_reconfigure_audio(struct tegra_hdmi *hdmi)
+{
+ int err;
+
+ err = tegra_hdmi_setup_audio(hdmi);
+ if (err < 0) {
+ tegra_hdmi_disable_audio_infoframe(hdmi);
+ tegra_hdmi_disable_audio(hdmi);
+ } else {
+ tegra_hdmi_setup_audio_infoframe(hdmi);
+ tegra_hdmi_enable_audio_infoframe(hdmi);
+ tegra_hdmi_enable_audio(hdmi);
+ }
+
+ return err;
+}
+
static bool tegra_output_is_hdmi(struct tegra_output *output)
{
struct edid *edid;
@@ -1135,6 +1171,8 @@ static void tegra_hdmi_encoder_disable(struct drm_encoder *encoder)
u32 value;
int err;
+ tegra_hdmi_audio_lock(hdmi);
+
/*
* The following accesses registers of the display controller, so make
* sure it's only executed when the output is attached to one.
@@ -1159,6 +1197,10 @@ static void tegra_hdmi_encoder_disable(struct drm_encoder *encoder)
tegra_hdmi_writel(hdmi, 0, HDMI_NV_PDISP_INT_ENABLE);
tegra_hdmi_writel(hdmi, 0, HDMI_NV_PDISP_INT_MASK);
+ hdmi->pixel_clock = 0;
+
+ tegra_hdmi_audio_unlock(hdmi);
+
err = host1x_client_suspend(&hdmi->client);
if (err < 0)
dev_err(hdmi->dev, "failed to suspend: %d\n", err);
@@ -1182,6 +1224,8 @@ static void tegra_hdmi_encoder_enable(struct drm_encoder *encoder)
return;
}
+ tegra_hdmi_audio_lock(hdmi);
+
/*
* Enable and unmask the HDA codec SCRATCH0 register interrupt. This
* is used for interoperability between the HDA codec driver and the
@@ -1195,7 +1239,7 @@ static void tegra_hdmi_encoder_enable(struct drm_encoder *encoder)
h_back_porch = mode->htotal - mode->hsync_end;
h_front_porch = mode->hsync_start - mode->hdisplay;
- err = clk_set_rate(hdmi->clk, hdmi->pixel_clock);
+ err = dev_pm_opp_set_rate(hdmi->dev, hdmi->pixel_clock);
if (err < 0) {
dev_err(hdmi->dev, "failed to set HDMI clock frequency: %d\n",
err);
@@ -1387,6 +1431,8 @@ static void tegra_hdmi_encoder_enable(struct drm_encoder *encoder)
}
/* TODO: add HDCP support */
+
+ tegra_hdmi_audio_unlock(hdmi);
}
static int
@@ -1416,6 +1462,91 @@ static const struct drm_encoder_helper_funcs tegra_hdmi_encoder_helper_funcs = {
.atomic_check = tegra_hdmi_encoder_atomic_check,
};
+static int tegra_hdmi_hw_params(struct device *dev, void *data,
+ struct hdmi_codec_daifmt *fmt,
+ struct hdmi_codec_params *hparms)
+{
+ struct tegra_hdmi *hdmi = data;
+ int ret = 0;
+
+ tegra_hdmi_audio_lock(hdmi);
+
+ hdmi->format.sample_rate = hparms->sample_rate;
+ hdmi->format.channels = hparms->channels;
+
+ if (hdmi->pixel_clock && !hdmi->dvi)
+ ret = tegra_hdmi_reconfigure_audio(hdmi);
+
+ tegra_hdmi_audio_unlock(hdmi);
+
+ return ret;
+}
+
+static int tegra_hdmi_audio_startup(struct device *dev, void *data)
+{
+ struct tegra_hdmi *hdmi = data;
+ int ret;
+
+ ret = host1x_client_resume(&hdmi->client);
+ if (ret < 0)
+ dev_err(hdmi->dev, "failed to resume: %d\n", ret);
+
+ return ret;
+}
+
+static void tegra_hdmi_audio_shutdown(struct device *dev, void *data)
+{
+ struct tegra_hdmi *hdmi = data;
+ int ret;
+
+ tegra_hdmi_audio_lock(hdmi);
+
+ hdmi->format.sample_rate = 0;
+ hdmi->format.channels = 0;
+
+ tegra_hdmi_audio_unlock(hdmi);
+
+ ret = host1x_client_suspend(&hdmi->client);
+ if (ret < 0)
+ dev_err(hdmi->dev, "failed to suspend: %d\n", ret);
+}
+
+static const struct hdmi_codec_ops tegra_hdmi_codec_ops = {
+ .hw_params = tegra_hdmi_hw_params,
+ .audio_startup = tegra_hdmi_audio_startup,
+ .audio_shutdown = tegra_hdmi_audio_shutdown,
+};
+
+static int tegra_hdmi_codec_register(struct tegra_hdmi *hdmi)
+{
+ struct hdmi_codec_pdata codec_data = {};
+
+ if (hdmi->config->has_hda)
+ return 0;
+
+ codec_data.ops = &tegra_hdmi_codec_ops;
+ codec_data.data = hdmi;
+ codec_data.spdif = 1;
+
+ hdmi->audio_pdev = platform_device_register_data(hdmi->dev,
+ HDMI_CODEC_DRV_NAME,
+ PLATFORM_DEVID_AUTO,
+ &codec_data,
+ sizeof(codec_data));
+ if (IS_ERR(hdmi->audio_pdev))
+ return PTR_ERR(hdmi->audio_pdev);
+
+ hdmi->format.channels = 2;
+
+ return 0;
+}
+
+static void tegra_hdmi_codec_unregister(struct tegra_hdmi *hdmi)
+{
+ if (hdmi->audio_pdev)
+ platform_device_unregister(hdmi->audio_pdev);
+}
+
static int tegra_hdmi_init(struct host1x_client *client)
{
struct tegra_hdmi *hdmi = host1x_client_to_hdmi(client);
@@ -1453,28 +1584,47 @@ static int tegra_hdmi_init(struct host1x_client *client)
if (err < 0) {
dev_err(client->dev, "failed to enable HDMI regulator: %d\n",
err);
- return err;
+ goto output_exit;
}
err = regulator_enable(hdmi->pll);
if (err < 0) {
dev_err(hdmi->dev, "failed to enable PLL regulator: %d\n", err);
- return err;
+ goto disable_hdmi;
}
err = regulator_enable(hdmi->vdd);
if (err < 0) {
dev_err(hdmi->dev, "failed to enable VDD regulator: %d\n", err);
- return err;
+ goto disable_pll;
+ }
+
+ err = tegra_hdmi_codec_register(hdmi);
+ if (err < 0) {
+ dev_err(hdmi->dev, "failed to register audio codec: %d\n", err);
+ goto disable_vdd;
}
return 0;
+
+disable_vdd:
+ regulator_disable(hdmi->vdd);
+disable_pll:
+ regulator_disable(hdmi->pll);
+disable_hdmi:
+ regulator_disable(hdmi->hdmi);
+output_exit:
+ tegra_output_exit(&hdmi->output);
+
+ return err;
}
static int tegra_hdmi_exit(struct host1x_client *client)
{
struct tegra_hdmi *hdmi = host1x_client_to_hdmi(client);
+ tegra_hdmi_codec_unregister(hdmi);
+
tegra_output_exit(&hdmi->output);
regulator_disable(hdmi->vdd);
@@ -1599,7 +1749,6 @@ static irqreturn_t tegra_hdmi_irq(int irq, void *data)
{
struct tegra_hdmi *hdmi = data;
u32 value;
- int err;
value = tegra_hdmi_readl(hdmi, HDMI_NV_PDISP_INT_STATUS);
tegra_hdmi_writel(hdmi, value, HDMI_NV_PDISP_INT_STATUS);
@@ -1614,16 +1763,7 @@ static irqreturn_t tegra_hdmi_irq(int irq, void *data)
format = value & SOR_AUDIO_HDA_CODEC_SCRATCH0_FMT_MASK;
tegra_hda_parse_format(format, &hdmi->format);
-
- err = tegra_hdmi_setup_audio(hdmi);
- if (err < 0) {
- tegra_hdmi_disable_audio_infoframe(hdmi);
- tegra_hdmi_disable_audio(hdmi);
- } else {
- tegra_hdmi_setup_audio_infoframe(hdmi);
- tegra_hdmi_enable_audio_infoframe(hdmi);
- tegra_hdmi_enable_audio(hdmi);
- }
+ tegra_hdmi_reconfigure_audio(hdmi);
} else {
tegra_hdmi_disable_audio_infoframe(hdmi);
tegra_hdmi_disable_audio(hdmi);
@@ -1651,6 +1791,8 @@ static int tegra_hdmi_probe(struct platform_device *pdev)
hdmi->stereo = false;
hdmi->dvi = false;
+ mutex_init(&hdmi->audio_lock);
+
hdmi->clk = devm_clk_get(&pdev->dev, NULL);
if (IS_ERR(hdmi->clk)) {
dev_err(&pdev->dev, "failed to get clock\n");
@@ -1732,7 +1874,14 @@ static int tegra_hdmi_probe(struct platform_device *pdev)
}
platform_set_drvdata(pdev, hdmi);
- pm_runtime_enable(&pdev->dev);
+
+ err = devm_pm_runtime_enable(&pdev->dev);
+ if (err)
+ return err;
+
+ err = devm_tegra_core_dev_init_opp_table_common(&pdev->dev);
+ if (err)
+ return err;
INIT_LIST_HEAD(&hdmi->client.list);
hdmi->client.ops = &hdmi_client_ops;
@@ -1753,8 +1902,6 @@ static int tegra_hdmi_remove(struct platform_device *pdev)
struct tegra_hdmi *hdmi = platform_get_drvdata(pdev);
int err;
- pm_runtime_disable(&pdev->dev);
-
err = host1x_client_unregister(&hdmi->client);
if (err < 0) {
dev_err(&pdev->dev, "failed to unregister host1x client: %d\n",
diff --git a/drivers/gpu/drm/tegra/hub.h b/drivers/gpu/drm/tegra/hub.h
index 3efa1be07ff8..23c4b2115ed1 100644
--- a/drivers/gpu/drm/tegra/hub.h
+++ b/drivers/gpu/drm/tegra/hub.h
@@ -72,7 +72,6 @@ to_tegra_display_hub_state(struct drm_private_state *priv)
return container_of(priv, struct tegra_display_hub_state, base);
}
-struct tegra_dc;
struct tegra_plane;
int tegra_display_hub_prepare(struct tegra_display_hub *hub);
diff --git a/drivers/gpu/drm/tegra/nvdec.c b/drivers/gpu/drm/tegra/nvdec.c
new file mode 100644
index 000000000000..79e1e88203cf
--- /dev/null
+++ b/drivers/gpu/drm/tegra/nvdec.c
@@ -0,0 +1,466 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2015-2021, NVIDIA Corporation.
+ */
+
+#include <linux/clk.h>
+#include <linux/delay.h>
+#include <linux/host1x.h>
+#include <linux/iommu.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
+#include <linux/of_platform.h>
+#include <linux/platform_device.h>
+#include <linux/pm_runtime.h>
+#include <linux/reset.h>
+
+#include <soc/tegra/pmc.h>
+
+#include "drm.h"
+#include "falcon.h"
+#include "vic.h"
+
+struct nvdec_config {
+ const char *firmware;
+ unsigned int version;
+ bool supports_sid;
+};
+
+struct nvdec {
+ struct falcon falcon;
+
+ void __iomem *regs;
+ struct tegra_drm_client client;
+ struct host1x_channel *channel;
+ struct device *dev;
+ struct clk *clk;
+
+ /* Platform configuration */
+ const struct nvdec_config *config;
+};
+
+static inline struct nvdec *to_nvdec(struct tegra_drm_client *client)
+{
+ return container_of(client, struct nvdec, client);
+}
+
+static inline void nvdec_writel(struct nvdec *nvdec, u32 value,
+ unsigned int offset)
+{
+ writel(value, nvdec->regs + offset);
+}
+
+static int nvdec_boot(struct nvdec *nvdec)
+{
+#ifdef CONFIG_IOMMU_API
+ struct iommu_fwspec *spec = dev_iommu_fwspec_get(nvdec->dev);
+#endif
+ int err;
+
+#ifdef CONFIG_IOMMU_API
+ if (nvdec->config->supports_sid && spec) {
+ u32 value;
+
+ value = TRANSCFG_ATT(1, TRANSCFG_SID_FALCON) | TRANSCFG_ATT(0, TRANSCFG_SID_HW);
+ nvdec_writel(nvdec, value, VIC_TFBIF_TRANSCFG);
+
+ if (spec->num_ids > 0) {
+ value = spec->ids[0] & 0xffff;
+
+ nvdec_writel(nvdec, value, VIC_THI_STREAMID0);
+ nvdec_writel(nvdec, value, VIC_THI_STREAMID1);
+ }
+ }
+#endif
+
+ err = falcon_boot(&nvdec->falcon);
+ if (err < 0)
+ return err;
+
+ err = falcon_wait_idle(&nvdec->falcon);
+ if (err < 0) {
+ dev_err(nvdec->dev, "falcon boot timed out\n");
+ return err;
+ }
+
+ return 0;
+}
+
+static int nvdec_init(struct host1x_client *client)
+{
+ struct tegra_drm_client *drm = host1x_to_drm_client(client);
+ struct drm_device *dev = dev_get_drvdata(client->host);
+ struct tegra_drm *tegra = dev->dev_private;
+ struct nvdec *nvdec = to_nvdec(drm);
+ int err;
+
+ err = host1x_client_iommu_attach(client);
+ if (err < 0 && err != -ENODEV) {
+ dev_err(nvdec->dev, "failed to attach to domain: %d\n", err);
+ return err;
+ }
+
+ nvdec->channel = host1x_channel_request(client);
+ if (!nvdec->channel) {
+ err = -ENOMEM;
+ goto detach;
+ }
+
+ client->syncpts[0] = host1x_syncpt_request(client, 0);
+ if (!client->syncpts[0]) {
+ err = -ENOMEM;
+ goto free_channel;
+ }
+
+ pm_runtime_enable(client->dev);
+ pm_runtime_use_autosuspend(client->dev);
+ pm_runtime_set_autosuspend_delay(client->dev, 500);
+
+ err = tegra_drm_register_client(tegra, drm);
+ if (err < 0)
+ goto disable_rpm;
+
+ /*
+ * Inherit the DMA parameters (such as maximum segment size) from the
+ * parent host1x device.
+ */
+ client->dev->dma_parms = client->host->dma_parms;
+
+ return 0;
+
+disable_rpm:
+ pm_runtime_dont_use_autosuspend(client->dev);
+ pm_runtime_force_suspend(client->dev);
+
+ host1x_syncpt_put(client->syncpts[0]);
+free_channel:
+ host1x_channel_put(nvdec->channel);
+detach:
+ host1x_client_iommu_detach(client);
+
+ return err;
+}
+
+static int nvdec_exit(struct host1x_client *client)
+{
+ struct tegra_drm_client *drm = host1x_to_drm_client(client);
+ struct drm_device *dev = dev_get_drvdata(client->host);
+ struct tegra_drm *tegra = dev->dev_private;
+ struct nvdec *nvdec = to_nvdec(drm);
+ int err;
+
+ /* avoid a dangling pointer just in case this disappears */
+ client->dev->dma_parms = NULL;
+
+ err = tegra_drm_unregister_client(tegra, drm);
+ if (err < 0)
+ return err;
+
+ pm_runtime_dont_use_autosuspend(client->dev);
+ pm_runtime_force_suspend(client->dev);
+
+ host1x_syncpt_put(client->syncpts[0]);
+ host1x_channel_put(nvdec->channel);
+ host1x_client_iommu_detach(client);
+
+ nvdec->channel = NULL;
+
+ if (client->group) {
+ dma_unmap_single(nvdec->dev, nvdec->falcon.firmware.phys,
+ nvdec->falcon.firmware.size, DMA_TO_DEVICE);
+ tegra_drm_free(tegra, nvdec->falcon.firmware.size,
+ nvdec->falcon.firmware.virt,
+ nvdec->falcon.firmware.iova);
+ } else {
+ dma_free_coherent(nvdec->dev, nvdec->falcon.firmware.size,
+ nvdec->falcon.firmware.virt,
+ nvdec->falcon.firmware.iova);
+ }
+
+ return 0;
+}
+
+static const struct host1x_client_ops nvdec_client_ops = {
+ .init = nvdec_init,
+ .exit = nvdec_exit,
+};
+
+static int nvdec_load_firmware(struct nvdec *nvdec)
+{
+ struct host1x_client *client = &nvdec->client.base;
+ struct tegra_drm *tegra = nvdec->client.drm;
+ dma_addr_t iova;
+ size_t size;
+ void *virt;
+ int err;
+
+ if (nvdec->falcon.firmware.virt)
+ return 0;
+
+ err = falcon_read_firmware(&nvdec->falcon, nvdec->config->firmware);
+ if (err < 0)
+ return err;
+
+ size = nvdec->falcon.firmware.size;
+
+ if (!client->group) {
+ virt = dma_alloc_coherent(nvdec->dev, size, &iova, GFP_KERNEL);
+
+ err = dma_mapping_error(nvdec->dev, iova);
+ if (err < 0)
+ return err;
+ } else {
+ virt = tegra_drm_alloc(tegra, size, &iova);
+ }
+
+ nvdec->falcon.firmware.virt = virt;
+ nvdec->falcon.firmware.iova = iova;
+
+ err = falcon_load_firmware(&nvdec->falcon);
+ if (err < 0)
+ goto cleanup;
+
+ /*
+ * In this case we have received an IOVA from the shared domain, so we
+ * need to make sure to get the physical address so that the DMA API
+ * knows what memory pages to flush the cache for.
+ */
+ if (client->group) {
+ dma_addr_t phys;
+
+ phys = dma_map_single(nvdec->dev, virt, size, DMA_TO_DEVICE);
+
+ err = dma_mapping_error(nvdec->dev, phys);
+ if (err < 0)
+ goto cleanup;
+
+ nvdec->falcon.firmware.phys = phys;
+ }
+
+ return 0;
+
+cleanup:
+ if (!client->group)
+ dma_free_coherent(nvdec->dev, size, virt, iova);
+ else
+ tegra_drm_free(tegra, size, virt, iova);
+
+ return err;
+}
+
+
+static __maybe_unused int nvdec_runtime_resume(struct device *dev)
+{
+ struct nvdec *nvdec = dev_get_drvdata(dev);
+ int err;
+
+ err = clk_prepare_enable(nvdec->clk);
+ if (err < 0)
+ return err;
+
+ usleep_range(10, 20);
+
+ err = nvdec_load_firmware(nvdec);
+ if (err < 0)
+ goto disable;
+
+ err = nvdec_boot(nvdec);
+ if (err < 0)
+ goto disable;
+
+ return 0;
+
+disable:
+ clk_disable_unprepare(nvdec->clk);
+ return err;
+}
+
+static __maybe_unused int nvdec_runtime_suspend(struct device *dev)
+{
+ struct nvdec *nvdec = dev_get_drvdata(dev);
+
+ host1x_channel_stop(nvdec->channel);
+
+ clk_disable_unprepare(nvdec->clk);
+
+ return 0;
+}
+
+static int nvdec_open_channel(struct tegra_drm_client *client,
+ struct tegra_drm_context *context)
+{
+ struct nvdec *nvdec = to_nvdec(client);
+
+ context->channel = host1x_channel_get(nvdec->channel);
+ if (!context->channel)
+ return -ENOMEM;
+
+ return 0;
+}
+
+static void nvdec_close_channel(struct tegra_drm_context *context)
+{
+ host1x_channel_put(context->channel);
+}
+
+static const struct tegra_drm_client_ops nvdec_ops = {
+ .open_channel = nvdec_open_channel,
+ .close_channel = nvdec_close_channel,
+ .submit = tegra_drm_submit,
+};
+
+#define NVIDIA_TEGRA_210_NVDEC_FIRMWARE "nvidia/tegra210/nvdec.bin"
+
+static const struct nvdec_config nvdec_t210_config = {
+ .firmware = NVIDIA_TEGRA_210_NVDEC_FIRMWARE,
+ .version = 0x21,
+ .supports_sid = false,
+};
+
+#define NVIDIA_TEGRA_186_NVDEC_FIRMWARE "nvidia/tegra186/nvdec.bin"
+
+static const struct nvdec_config nvdec_t186_config = {
+ .firmware = NVIDIA_TEGRA_186_NVDEC_FIRMWARE,
+ .version = 0x18,
+ .supports_sid = true,
+};
+
+#define NVIDIA_TEGRA_194_NVDEC_FIRMWARE "nvidia/tegra194/nvdec.bin"
+
+static const struct nvdec_config nvdec_t194_config = {
+ .firmware = NVIDIA_TEGRA_194_NVDEC_FIRMWARE,
+ .version = 0x19,
+ .supports_sid = true,
+};
+
+static const struct of_device_id tegra_nvdec_of_match[] = {
+ { .compatible = "nvidia,tegra210-nvdec", .data = &nvdec_t210_config },
+ { .compatible = "nvidia,tegra186-nvdec", .data = &nvdec_t186_config },
+ { .compatible = "nvidia,tegra194-nvdec", .data = &nvdec_t194_config },
+ { },
+};
+MODULE_DEVICE_TABLE(of, tegra_nvdec_of_match);
+
+static int nvdec_probe(struct platform_device *pdev)
+{
+ struct device *dev = &pdev->dev;
+ struct host1x_syncpt **syncpts;
+ struct nvdec *nvdec;
+ u32 host_class;
+ int err;
+
+ /* inherit DMA mask from host1x parent */
+ err = dma_coerce_mask_and_coherent(dev, *dev->parent->dma_mask);
+ if (err < 0) {
+ dev_err(&pdev->dev, "failed to set DMA mask: %d\n", err);
+ return err;
+ }
+
+ nvdec = devm_kzalloc(dev, sizeof(*nvdec), GFP_KERNEL);
+ if (!nvdec)
+ return -ENOMEM;
+
+ nvdec->config = of_device_get_match_data(dev);
+
+ syncpts = devm_kzalloc(dev, sizeof(*syncpts), GFP_KERNEL);
+ if (!syncpts)
+ return -ENOMEM;
+
+ nvdec->regs = devm_platform_get_and_ioremap_resource(pdev, 0, NULL);
+ if (IS_ERR(nvdec->regs))
+ return PTR_ERR(nvdec->regs);
+
+ nvdec->clk = devm_clk_get(dev, NULL);
+ if (IS_ERR(nvdec->clk)) {
+ dev_err(&pdev->dev, "failed to get clock\n");
+ return PTR_ERR(nvdec->clk);
+ }
+
+ err = clk_set_rate(nvdec->clk, ULONG_MAX);
+ if (err < 0) {
+ dev_err(&pdev->dev, "failed to set clock rate\n");
+ return err;
+ }
+
+ err = of_property_read_u32(dev->of_node, "nvidia,host1x-class", &host_class);
+ if (err < 0)
+ host_class = HOST1X_CLASS_NVDEC;
+
+ nvdec->falcon.dev = dev;
+ nvdec->falcon.regs = nvdec->regs;
+
+ err = falcon_init(&nvdec->falcon);
+ if (err < 0)
+ return err;
+
+ platform_set_drvdata(pdev, nvdec);
+
+ INIT_LIST_HEAD(&nvdec->client.base.list);
+ nvdec->client.base.ops = &nvdec_client_ops;
+ nvdec->client.base.dev = dev;
+ nvdec->client.base.class = host_class;
+ nvdec->client.base.syncpts = syncpts;
+ nvdec->client.base.num_syncpts = 1;
+ nvdec->dev = dev;
+
+ INIT_LIST_HEAD(&nvdec->client.list);
+ nvdec->client.version = nvdec->config->version;
+ nvdec->client.ops = &nvdec_ops;
+
+ err = host1x_client_register(&nvdec->client.base);
+ if (err < 0) {
+ dev_err(dev, "failed to register host1x client: %d\n", err);
+ goto exit_falcon;
+ }
+
+ return 0;
+
+exit_falcon:
+ falcon_exit(&nvdec->falcon);
+
+ return err;
+}
+
+static int nvdec_remove(struct platform_device *pdev)
+{
+ struct nvdec *nvdec = platform_get_drvdata(pdev);
+ int err;
+
+ err = host1x_client_unregister(&nvdec->client.base);
+ if (err < 0) {
+ dev_err(&pdev->dev, "failed to unregister host1x client: %d\n",
+ err);
+ return err;
+ }
+
+ falcon_exit(&nvdec->falcon);
+
+ return 0;
+}
+
+static const struct dev_pm_ops nvdec_pm_ops = {
+ SET_RUNTIME_PM_OPS(nvdec_runtime_suspend, nvdec_runtime_resume, NULL)
+ SET_SYSTEM_SLEEP_PM_OPS(pm_runtime_force_suspend,
+ pm_runtime_force_resume)
+};
+
+struct platform_driver tegra_nvdec_driver = {
+ .driver = {
+ .name = "tegra-nvdec",
+ .of_match_table = tegra_nvdec_of_match,
+ .pm = &nvdec_pm_ops
+ },
+ .probe = nvdec_probe,
+ .remove = nvdec_remove,
+};
+
+#if IS_ENABLED(CONFIG_ARCH_TEGRA_210_SOC)
+MODULE_FIRMWARE(NVIDIA_TEGRA_210_NVDEC_FIRMWARE);
+#endif
+#if IS_ENABLED(CONFIG_ARCH_TEGRA_186_SOC)
+MODULE_FIRMWARE(NVIDIA_TEGRA_186_NVDEC_FIRMWARE);
+#endif
+#if IS_ENABLED(CONFIG_ARCH_TEGRA_194_SOC)
+MODULE_FIRMWARE(NVIDIA_TEGRA_194_NVDEC_FIRMWARE);
+#endif
diff --git a/drivers/gpu/drm/tegra/plane.c b/drivers/gpu/drm/tegra/plane.c
index 16a1cdc28657..321cb1f13da6 100644
--- a/drivers/gpu/drm/tegra/plane.c
+++ b/drivers/gpu/drm/tegra/plane.c
@@ -74,7 +74,7 @@ tegra_plane_atomic_duplicate_state(struct drm_plane *plane)
for (i = 0; i < 3; i++) {
copy->iova[i] = DMA_MAPPING_ERROR;
- copy->sgt[i] = NULL;
+ copy->map[i] = NULL;
}
return &copy->base;
@@ -138,55 +138,37 @@ const struct drm_plane_funcs tegra_plane_funcs = {
static int tegra_dc_pin(struct tegra_dc *dc, struct tegra_plane_state *state)
{
- struct iommu_domain *domain = iommu_get_domain_for_dev(dc->dev);
unsigned int i;
int err;
for (i = 0; i < state->base.fb->format->num_planes; i++) {
struct tegra_bo *bo = tegra_fb_get_plane(state->base.fb, i);
- dma_addr_t phys_addr, *phys;
- struct sg_table *sgt;
+ struct host1x_bo_mapping *map;
- /*
- * If we're not attached to a domain, we already stored the
- * physical address when the buffer was allocated. If we're
- * part of a group that's shared between all display
- * controllers, we've also already mapped the framebuffer
- * through the SMMU. In both cases we can short-circuit the
- * code below and retrieve the stored IOV address.
- */
- if (!domain || dc->client.group)
- phys = &phys_addr;
- else
- phys = NULL;
-
- sgt = host1x_bo_pin(dc->dev, &bo->base, phys);
- if (IS_ERR(sgt)) {
- err = PTR_ERR(sgt);
+ map = host1x_bo_pin(dc->dev, &bo->base, DMA_TO_DEVICE, &dc->client.cache);
+ if (IS_ERR(map)) {
+ err = PTR_ERR(map);
goto unpin;
}
- if (sgt) {
- err = dma_map_sgtable(dc->dev, sgt, DMA_TO_DEVICE, 0);
- if (err)
- goto unpin;
-
+ if (!dc->client.group) {
/*
* The display controller needs contiguous memory, so
* fail if the buffer is discontiguous and we fail to
* map its SG table to a single contiguous chunk of
* I/O virtual memory.
*/
- if (sgt->nents > 1) {
+ if (map->chunks > 1) {
err = -EINVAL;
goto unpin;
}
- state->iova[i] = sg_dma_address(sgt->sgl);
- state->sgt[i] = sgt;
+ state->iova[i] = map->phys;
} else {
- state->iova[i] = phys_addr;
+ state->iova[i] = bo->iova;
}
+
+ state->map[i] = map;
}
return 0;
@@ -195,15 +177,9 @@ unpin:
dev_err(dc->dev, "failed to map plane %u: %d\n", i, err);
while (i--) {
- struct tegra_bo *bo = tegra_fb_get_plane(state->base.fb, i);
- struct sg_table *sgt = state->sgt[i];
-
- if (sgt)
- dma_unmap_sgtable(dc->dev, sgt, DMA_TO_DEVICE, 0);
-
- host1x_bo_unpin(dc->dev, &bo->base, sgt);
+ host1x_bo_unpin(state->map[i]);
state->iova[i] = DMA_MAPPING_ERROR;
- state->sgt[i] = NULL;
+ state->map[i] = NULL;
}
return err;
@@ -214,15 +190,9 @@ static void tegra_dc_unpin(struct tegra_dc *dc, struct tegra_plane_state *state)
unsigned int i;
for (i = 0; i < state->base.fb->format->num_planes; i++) {
- struct tegra_bo *bo = tegra_fb_get_plane(state->base.fb, i);
- struct sg_table *sgt = state->sgt[i];
-
- if (sgt)
- dma_unmap_sgtable(dc->dev, sgt, DMA_TO_DEVICE, 0);
-
- host1x_bo_unpin(dc->dev, &bo->base, sgt);
+ host1x_bo_unpin(state->map[i]);
state->iova[i] = DMA_MAPPING_ERROR;
- state->sgt[i] = NULL;
+ state->map[i] = NULL;
}
}
@@ -230,11 +200,14 @@ int tegra_plane_prepare_fb(struct drm_plane *plane,
struct drm_plane_state *state)
{
struct tegra_dc *dc = to_tegra_dc(state->crtc);
+ int err;
if (!state->fb)
return 0;
- drm_gem_plane_helper_prepare_fb(plane, state);
+ err = drm_gem_plane_helper_prepare_fb(plane, state);
+ if (err < 0)
+ return err;
return tegra_dc_pin(dc, to_tegra_plane_state(state));
}
diff --git a/drivers/gpu/drm/tegra/plane.h b/drivers/gpu/drm/tegra/plane.h
index d9470780c803..dfb20712fbd7 100644
--- a/drivers/gpu/drm/tegra/plane.h
+++ b/drivers/gpu/drm/tegra/plane.h
@@ -43,7 +43,7 @@ struct tegra_plane_legacy_blending_state {
struct tegra_plane_state {
struct drm_plane_state base;
- struct sg_table *sgt[3];
+ struct host1x_bo_mapping *map[3];
dma_addr_t iova[3];
struct tegra_bo_tiling tiling;
diff --git a/drivers/gpu/drm/tegra/rgb.c b/drivers/gpu/drm/tegra/rgb.c
index 606c78a2b988..ff8fce36d2aa 100644
--- a/drivers/gpu/drm/tegra/rgb.c
+++ b/drivers/gpu/drm/tegra/rgb.c
@@ -17,6 +17,8 @@ struct tegra_rgb {
struct tegra_output output;
struct tegra_dc *dc;
+ struct clk *pll_d_out0;
+ struct clk *pll_d2_out0;
struct clk *clk_parent;
struct clk *clk;
};
@@ -116,13 +118,21 @@ static void tegra_rgb_encoder_enable(struct drm_encoder *encoder)
DISP_ORDER_RED_BLUE;
tegra_dc_writel(rgb->dc, value, DC_DISP_DISP_INTERFACE_CONTROL);
- /* XXX: parameterize? */
- value = SC0_H_QUALIFIER_NONE | SC1_H_QUALIFIER_NONE;
- tegra_dc_writel(rgb->dc, value, DC_DISP_SHIFT_CLOCK_OPTIONS);
-
tegra_dc_commit(rgb->dc);
}
+static bool tegra_rgb_pll_rate_change_allowed(struct tegra_rgb *rgb)
+{
+ if (!rgb->pll_d2_out0)
+ return false;
+
+ if (!clk_is_match(rgb->clk_parent, rgb->pll_d_out0) &&
+ !clk_is_match(rgb->clk_parent, rgb->pll_d2_out0))
+ return false;
+
+ return true;
+}
+
static int
tegra_rgb_encoder_atomic_check(struct drm_encoder *encoder,
struct drm_crtc_state *crtc_state,
@@ -151,8 +161,17 @@ tegra_rgb_encoder_atomic_check(struct drm_encoder *encoder,
* and hope that the desired frequency can be matched (or at least
* matched sufficiently close that the panel will still work).
*/
- div = ((clk_get_rate(rgb->clk) * 2) / pclk) - 2;
- pclk = 0;
+ if (tegra_rgb_pll_rate_change_allowed(rgb)) {
+ /*
+ * Set display controller clock to x2 of PCLK in order to
+ * produce higher resolution pulse positions.
+ */
+ div = 2;
+ pclk *= 2;
+ } else {
+ div = ((clk_get_rate(rgb->clk) * 2) / pclk) - 2;
+ pclk = 0;
+ }
err = tegra_dc_state_setup_clock(dc, crtc_state, rgb->clk_parent,
pclk, div);
@@ -210,6 +229,22 @@ int tegra_dc_rgb_probe(struct tegra_dc *dc)
return err;
}
+ rgb->pll_d_out0 = clk_get_sys(NULL, "pll_d_out0");
+ if (IS_ERR(rgb->pll_d_out0)) {
+ err = PTR_ERR(rgb->pll_d_out0);
+ dev_err(dc->dev, "failed to get pll_d_out0: %d\n", err);
+ return err;
+ }
+
+ if (dc->soc->has_pll_d2_out0) {
+ rgb->pll_d2_out0 = clk_get_sys(NULL, "pll_d2_out0");
+ if (IS_ERR(rgb->pll_d2_out0)) {
+ err = PTR_ERR(rgb->pll_d2_out0);
+ dev_err(dc->dev, "failed to get pll_d2_out0: %d\n", err);
+ return err;
+ }
+ }
+
dc->rgb = &rgb->output;
return 0;
@@ -217,9 +252,15 @@ int tegra_dc_rgb_probe(struct tegra_dc *dc)
int tegra_dc_rgb_remove(struct tegra_dc *dc)
{
+ struct tegra_rgb *rgb;
+
if (!dc->rgb)
return 0;
+ rgb = to_rgb(dc->rgb);
+ clk_put(rgb->pll_d2_out0);
+ clk_put(rgb->pll_d_out0);
+
tegra_output_remove(dc->rgb);
dc->rgb = NULL;
diff --git a/drivers/gpu/drm/tegra/submit.c b/drivers/gpu/drm/tegra/submit.c
index 776f825df52f..6d6dd8c35475 100644
--- a/drivers/gpu/drm/tegra/submit.c
+++ b/drivers/gpu/drm/tegra/submit.c
@@ -64,33 +64,62 @@ static void gather_bo_put(struct host1x_bo *host_bo)
kref_put(&bo->ref, gather_bo_release);
}
-static struct sg_table *
-gather_bo_pin(struct device *dev, struct host1x_bo *host_bo, dma_addr_t *phys)
+static struct host1x_bo_mapping *
+gather_bo_pin(struct device *dev, struct host1x_bo *bo, enum dma_data_direction direction)
{
- struct gather_bo *bo = container_of(host_bo, struct gather_bo, base);
- struct sg_table *sgt;
+ struct gather_bo *gather = container_of(bo, struct gather_bo, base);
+ struct host1x_bo_mapping *map;
int err;
- sgt = kzalloc(sizeof(*sgt), GFP_KERNEL);
- if (!sgt)
+ map = kzalloc(sizeof(*map), GFP_KERNEL);
+ if (!map)
return ERR_PTR(-ENOMEM);
- err = dma_get_sgtable(bo->dev, sgt, bo->gather_data, bo->gather_data_dma,
- bo->gather_data_words * 4);
- if (err) {
- kfree(sgt);
- return ERR_PTR(err);
+ kref_init(&map->ref);
+ map->bo = host1x_bo_get(bo);
+ map->direction = direction;
+ map->dev = dev;
+
+ map->sgt = kzalloc(sizeof(*map->sgt), GFP_KERNEL);
+ if (!map->sgt) {
+ err = -ENOMEM;
+ goto free;
}
- return sgt;
+ err = dma_get_sgtable(gather->dev, map->sgt, gather->gather_data, gather->gather_data_dma,
+ gather->gather_data_words * 4);
+ if (err)
+ goto free_sgt;
+
+ err = dma_map_sgtable(dev, map->sgt, direction, 0);
+ if (err)
+ goto free_sgt;
+
+ map->phys = sg_dma_address(map->sgt->sgl);
+ map->size = gather->gather_data_words * 4;
+ map->chunks = err;
+
+ return map;
+
+free_sgt:
+ sg_free_table(map->sgt);
+ kfree(map->sgt);
+free:
+ kfree(map);
+ return ERR_PTR(err);
}
-static void gather_bo_unpin(struct device *dev, struct sg_table *sgt)
+static void gather_bo_unpin(struct host1x_bo_mapping *map)
{
- if (sgt) {
- sg_free_table(sgt);
- kfree(sgt);
- }
+ if (!map)
+ return;
+
+ dma_unmap_sgtable(map->dev, map->sgt, map->direction, 0);
+ sg_free_table(map->sgt);
+ kfree(map->sgt);
+ host1x_bo_put(map->bo);
+
+ kfree(map);
}
static void *gather_bo_mmap(struct host1x_bo *host_bo)
@@ -475,8 +504,8 @@ static void release_job(struct host1x_job *job)
kfree(job_data->used_mappings);
kfree(job_data);
- if (pm_runtime_enabled(client->base.dev))
- pm_runtime_put_autosuspend(client->base.dev);
+ pm_runtime_mark_last_busy(client->base.dev);
+ pm_runtime_put_autosuspend(client->base.dev);
}
int tegra_drm_ioctl_channel_submit(struct drm_device *drm, void *data,
@@ -560,12 +589,10 @@ int tegra_drm_ioctl_channel_submit(struct drm_device *drm, void *data,
}
/* Boot engine. */
- if (pm_runtime_enabled(context->client->base.dev)) {
- err = pm_runtime_resume_and_get(context->client->base.dev);
- if (err < 0) {
- SUBMIT_ERR(context, "could not power up engine: %d", err);
- goto unpin_job;
- }
+ err = pm_runtime_resume_and_get(context->client->base.dev);
+ if (err < 0) {
+ SUBMIT_ERR(context, "could not power up engine: %d", err);
+ goto unpin_job;
}
job->user_data = job_data;
diff --git a/drivers/gpu/drm/tegra/uapi.c b/drivers/gpu/drm/tegra/uapi.c
index 690a339c52ec..9ab9179d2026 100644
--- a/drivers/gpu/drm/tegra/uapi.c
+++ b/drivers/gpu/drm/tegra/uapi.c
@@ -17,11 +17,7 @@ static void tegra_drm_mapping_release(struct kref *ref)
struct tegra_drm_mapping *mapping =
container_of(ref, struct tegra_drm_mapping, ref);
- if (mapping->sgt)
- dma_unmap_sgtable(mapping->dev, mapping->sgt, mapping->direction,
- DMA_ATTR_SKIP_CPU_SYNC);
-
- host1x_bo_unpin(mapping->dev, mapping->bo, mapping->sgt);
+ host1x_bo_unpin(mapping->map);
host1x_bo_put(mapping->bo);
kfree(mapping);
@@ -159,6 +155,7 @@ int tegra_drm_ioctl_channel_map(struct drm_device *drm, void *data, struct drm_f
struct drm_tegra_channel_map *args = data;
struct tegra_drm_mapping *mapping;
struct tegra_drm_context *context;
+ enum dma_data_direction direction;
int err = 0;
if (args->flags & ~DRM_TEGRA_CHANNEL_MAP_READ_WRITE)
@@ -180,68 +177,53 @@ int tegra_drm_ioctl_channel_map(struct drm_device *drm, void *data, struct drm_f
kref_init(&mapping->ref);
- mapping->dev = context->client->base.dev;
mapping->bo = tegra_gem_lookup(file, args->handle);
if (!mapping->bo) {
err = -EINVAL;
- goto unlock;
+ goto free;
}
- if (context->client->base.group) {
- /* IOMMU domain managed directly using IOMMU API */
- host1x_bo_pin(mapping->dev, mapping->bo, &mapping->iova);
- } else {
- switch (args->flags & DRM_TEGRA_CHANNEL_MAP_READ_WRITE) {
- case DRM_TEGRA_CHANNEL_MAP_READ_WRITE:
- mapping->direction = DMA_BIDIRECTIONAL;
- break;
-
- case DRM_TEGRA_CHANNEL_MAP_WRITE:
- mapping->direction = DMA_FROM_DEVICE;
- break;
-
- case DRM_TEGRA_CHANNEL_MAP_READ:
- mapping->direction = DMA_TO_DEVICE;
- break;
+ switch (args->flags & DRM_TEGRA_CHANNEL_MAP_READ_WRITE) {
+ case DRM_TEGRA_CHANNEL_MAP_READ_WRITE:
+ direction = DMA_BIDIRECTIONAL;
+ break;
- default:
- return -EINVAL;
- }
+ case DRM_TEGRA_CHANNEL_MAP_WRITE:
+ direction = DMA_FROM_DEVICE;
+ break;
- mapping->sgt = host1x_bo_pin(mapping->dev, mapping->bo, NULL);
- if (IS_ERR(mapping->sgt)) {
- err = PTR_ERR(mapping->sgt);
- goto put_gem;
- }
+ case DRM_TEGRA_CHANNEL_MAP_READ:
+ direction = DMA_TO_DEVICE;
+ break;
- err = dma_map_sgtable(mapping->dev, mapping->sgt, mapping->direction,
- DMA_ATTR_SKIP_CPU_SYNC);
- if (err)
- goto unpin;
+ default:
+ err = -EINVAL;
+ goto put_gem;
+ }
- mapping->iova = sg_dma_address(mapping->sgt->sgl);
+ mapping->map = host1x_bo_pin(context->client->base.dev, mapping->bo, direction, NULL);
+ if (IS_ERR(mapping->map)) {
+ err = PTR_ERR(mapping->map);
+ goto put_gem;
}
+ mapping->iova = mapping->map->phys;
mapping->iova_end = mapping->iova + host1x_to_tegra_bo(mapping->bo)->gem.size;
err = xa_alloc(&context->mappings, &args->mapping, mapping, XA_LIMIT(1, U32_MAX),
GFP_KERNEL);
if (err < 0)
- goto unmap;
+ goto unpin;
mutex_unlock(&fpriv->lock);
return 0;
-unmap:
- if (mapping->sgt) {
- dma_unmap_sgtable(mapping->dev, mapping->sgt, mapping->direction,
- DMA_ATTR_SKIP_CPU_SYNC);
- }
unpin:
- host1x_bo_unpin(mapping->dev, mapping->bo, mapping->sgt);
+ host1x_bo_unpin(mapping->map);
put_gem:
host1x_bo_put(mapping->bo);
+free:
kfree(mapping);
unlock:
mutex_unlock(&fpriv->lock);
diff --git a/drivers/gpu/drm/tegra/uapi.h b/drivers/gpu/drm/tegra/uapi.h
index 12adad770ad3..92ff1e44ff15 100644
--- a/drivers/gpu/drm/tegra/uapi.h
+++ b/drivers/gpu/drm/tegra/uapi.h
@@ -27,10 +27,9 @@ struct tegra_drm_file {
struct tegra_drm_mapping {
struct kref ref;
- struct device *dev;
+ struct host1x_bo_mapping *map;
struct host1x_bo *bo;
- struct sg_table *sgt;
- enum dma_data_direction direction;
+
dma_addr_t iova;
dma_addr_t iova_end;
};
diff --git a/drivers/gpu/drm/tegra/vic.c b/drivers/gpu/drm/tegra/vic.c
index c02010ff2b7f..1e342fa3d27b 100644
--- a/drivers/gpu/drm/tegra/vic.c
+++ b/drivers/gpu/drm/tegra/vic.c
@@ -5,6 +5,7 @@
#include <linux/clk.h>
#include <linux/delay.h>
+#include <linux/dma-mapping.h>
#include <linux/host1x.h>
#include <linux/iommu.h>
#include <linux/module.h>
@@ -151,9 +152,13 @@ static int vic_init(struct host1x_client *client)
goto free_channel;
}
+ pm_runtime_enable(client->dev);
+ pm_runtime_use_autosuspend(client->dev);
+ pm_runtime_set_autosuspend_delay(client->dev, 500);
+
err = tegra_drm_register_client(tegra, drm);
if (err < 0)
- goto free_syncpt;
+ goto disable_rpm;
/*
* Inherit the DMA parameters (such as maximum segment size) from the
@@ -163,7 +168,10 @@ static int vic_init(struct host1x_client *client)
return 0;
-free_syncpt:
+disable_rpm:
+ pm_runtime_dont_use_autosuspend(client->dev);
+ pm_runtime_force_suspend(client->dev);
+
host1x_syncpt_put(client->syncpts[0]);
free_channel:
host1x_channel_put(vic->channel);
@@ -188,10 +196,15 @@ static int vic_exit(struct host1x_client *client)
if (err < 0)
return err;
+ pm_runtime_dont_use_autosuspend(client->dev);
+ pm_runtime_force_suspend(client->dev);
+
host1x_syncpt_put(client->syncpts[0]);
host1x_channel_put(vic->channel);
host1x_client_iommu_detach(client);
+ vic->channel = NULL;
+
if (client->group) {
dma_unmap_single(vic->dev, vic->falcon.firmware.phys,
vic->falcon.firmware.size, DMA_TO_DEVICE);
@@ -232,12 +245,12 @@ static int vic_load_firmware(struct vic *vic)
if (!client->group) {
virt = dma_alloc_coherent(vic->dev, size, &iova, GFP_KERNEL);
-
- err = dma_mapping_error(vic->dev, iova);
- if (err < 0)
- return err;
+ if (!virt)
+ return -ENOMEM;
} else {
virt = tegra_drm_alloc(tegra, size, &iova);
+ if (IS_ERR(virt))
+ return PTR_ERR(virt);
}
vic->falcon.firmware.virt = virt;
@@ -315,6 +328,8 @@ static int vic_runtime_suspend(struct device *dev)
struct vic *vic = dev_get_drvdata(dev);
int err;
+ host1x_channel_stop(vic->channel);
+
err = reset_control_assert(vic->rst);
if (err < 0)
return err;
@@ -330,27 +345,17 @@ static int vic_open_channel(struct tegra_drm_client *client,
struct tegra_drm_context *context)
{
struct vic *vic = to_vic(client);
- int err;
-
- err = pm_runtime_resume_and_get(vic->dev);
- if (err < 0)
- return err;
context->channel = host1x_channel_get(vic->channel);
- if (!context->channel) {
- pm_runtime_put(vic->dev);
+ if (!context->channel)
return -ENOMEM;
- }
return 0;
}
static void vic_close_channel(struct tegra_drm_context *context)
{
- struct vic *vic = to_vic(context->client);
-
host1x_channel_put(context->channel);
- pm_runtime_put(vic->dev);
}
static const struct tegra_drm_client_ops vic_ops = {
@@ -441,6 +446,12 @@ static int vic_probe(struct platform_device *pdev)
return PTR_ERR(vic->clk);
}
+ err = clk_set_rate(vic->clk, ULONG_MAX);
+ if (err < 0) {
+ dev_err(&pdev->dev, "failed to set clock rate\n");
+ return err;
+ }
+
if (!dev->pm_domain) {
vic->rst = devm_reset_control_get(dev, "vic");
if (IS_ERR(vic->rst)) {
@@ -476,17 +487,8 @@ static int vic_probe(struct platform_device *pdev)
goto exit_falcon;
}
- pm_runtime_enable(&pdev->dev);
- if (!pm_runtime_enabled(&pdev->dev)) {
- err = vic_runtime_resume(&pdev->dev);
- if (err < 0)
- goto unregister_client;
- }
-
return 0;
-unregister_client:
- host1x_client_unregister(&vic->client.base);
exit_falcon:
falcon_exit(&vic->falcon);
@@ -505,11 +507,6 @@ static int vic_remove(struct platform_device *pdev)
return err;
}
- if (pm_runtime_enabled(&pdev->dev))
- pm_runtime_disable(&pdev->dev);
- else
- vic_runtime_suspend(&pdev->dev);
-
falcon_exit(&vic->falcon);
return 0;
@@ -517,6 +514,8 @@ static int vic_remove(struct platform_device *pdev)
static const struct dev_pm_ops vic_pm_ops = {
SET_RUNTIME_PM_OPS(vic_runtime_suspend, vic_runtime_resume, NULL)
+ SET_SYSTEM_SLEEP_PM_OPS(pm_runtime_force_suspend,
+ pm_runtime_force_resume)
};
struct platform_driver tegra_vic_driver = {
diff --git a/drivers/gpu/host1x/Kconfig b/drivers/gpu/host1x/Kconfig
index 6dab94adf25e..6815b4db17c1 100644
--- a/drivers/gpu/host1x/Kconfig
+++ b/drivers/gpu/host1x/Kconfig
@@ -2,6 +2,7 @@
config TEGRA_HOST1X
tristate "NVIDIA Tegra host1x driver"
depends on ARCH_TEGRA || (ARM && COMPILE_TEST)
+ select DMA_SHARED_BUFFER
select IOMMU_IOVA
help
Driver for the NVIDIA Tegra host1x hardware.
diff --git a/drivers/gpu/host1x/bus.c b/drivers/gpu/host1x/bus.c
index 218e3718fd68..bdee16a0bb8e 100644
--- a/drivers/gpu/host1x/bus.c
+++ b/drivers/gpu/host1x/bus.c
@@ -5,6 +5,7 @@
*/
#include <linux/debugfs.h>
+#include <linux/dma-mapping.h>
#include <linux/host1x.h>
#include <linux/of.h>
#include <linux/seq_file.h>
@@ -742,6 +743,7 @@ EXPORT_SYMBOL(host1x_driver_unregister);
*/
void __host1x_client_init(struct host1x_client *client, struct lock_class_key *key)
{
+ host1x_bo_cache_init(&client->cache);
INIT_LIST_HEAD(&client->list);
__mutex_init(&client->lock, "host1x client lock", key);
client->usecount = 0;
@@ -761,7 +763,6 @@ EXPORT_SYMBOL(host1x_client_exit);
/**
* __host1x_client_register() - register a host1x client
* @client: host1x client
- * @key: lock class key for the client-specific mutex
*
* Registers a host1x client with each host1x controller instance. Note that
* each client will only match their parent host1x controller and will only be
@@ -830,6 +831,8 @@ int host1x_client_unregister(struct host1x_client *client)
mutex_unlock(&clients_lock);
+ host1x_bo_cache_destroy(&client->cache);
+
return 0;
}
EXPORT_SYMBOL(host1x_client_unregister);
@@ -904,3 +907,78 @@ unlock:
return err;
}
EXPORT_SYMBOL(host1x_client_resume);
+
+struct host1x_bo_mapping *host1x_bo_pin(struct device *dev, struct host1x_bo *bo,
+ enum dma_data_direction dir,
+ struct host1x_bo_cache *cache)
+{
+ struct host1x_bo_mapping *mapping;
+
+ if (cache) {
+ mutex_lock(&cache->lock);
+
+ list_for_each_entry(mapping, &cache->mappings, entry) {
+ if (mapping->bo == bo && mapping->direction == dir) {
+ kref_get(&mapping->ref);
+ goto unlock;
+ }
+ }
+ }
+
+ mapping = bo->ops->pin(dev, bo, dir);
+ if (IS_ERR(mapping))
+ goto unlock;
+
+ spin_lock(&mapping->bo->lock);
+ list_add_tail(&mapping->list, &bo->mappings);
+ spin_unlock(&mapping->bo->lock);
+
+ if (cache) {
+ INIT_LIST_HEAD(&mapping->entry);
+ mapping->cache = cache;
+
+ list_add_tail(&mapping->entry, &cache->mappings);
+
+ /* bump reference count to track the copy in the cache */
+ kref_get(&mapping->ref);
+ }
+
+unlock:
+ if (cache)
+ mutex_unlock(&cache->lock);
+
+ return mapping;
+}
+EXPORT_SYMBOL(host1x_bo_pin);
+
+static void __host1x_bo_unpin(struct kref *ref)
+{
+ struct host1x_bo_mapping *mapping = to_host1x_bo_mapping(ref);
+
+ /*
+ * When the last reference of the mapping goes away, make sure to remove the mapping from
+ * the cache.
+ */
+ if (mapping->cache)
+ list_del(&mapping->entry);
+
+ spin_lock(&mapping->bo->lock);
+ list_del(&mapping->list);
+ spin_unlock(&mapping->bo->lock);
+
+ mapping->bo->ops->unpin(mapping);
+}
+
+void host1x_bo_unpin(struct host1x_bo_mapping *mapping)
+{
+ struct host1x_bo_cache *cache = mapping->cache;
+
+ if (cache)
+ mutex_lock(&cache->lock);
+
+ kref_put(&mapping->ref, __host1x_bo_unpin);
+
+ if (cache)
+ mutex_unlock(&cache->lock);
+}
+EXPORT_SYMBOL(host1x_bo_unpin);
diff --git a/drivers/gpu/host1x/channel.c b/drivers/gpu/host1x/channel.c
index 4cd212bb570d..2a9a3a8d5931 100644
--- a/drivers/gpu/host1x/channel.c
+++ b/drivers/gpu/host1x/channel.c
@@ -75,6 +75,14 @@ struct host1x_channel *host1x_channel_get_index(struct host1x *host,
return ch;
}
+void host1x_channel_stop(struct host1x_channel *channel)
+{
+ struct host1x *host = dev_get_drvdata(channel->dev->parent);
+
+ host1x_hw_cdma_stop(host, &channel->cdma);
+}
+EXPORT_SYMBOL(host1x_channel_stop);
+
static void release_channel(struct kref *kref)
{
struct host1x_channel *channel =
diff --git a/drivers/gpu/host1x/debug.c b/drivers/gpu/host1x/debug.c
index 8a14880c61bb..18d9c8d206e3 100644
--- a/drivers/gpu/host1x/debug.c
+++ b/drivers/gpu/host1x/debug.c
@@ -7,6 +7,7 @@
*/
#include <linux/debugfs.h>
+#include <linux/pm_runtime.h>
#include <linux/seq_file.h>
#include <linux/uaccess.h>
@@ -52,6 +53,11 @@ static int show_channel(struct host1x_channel *ch, void *data, bool show_fifo)
{
struct host1x *m = dev_get_drvdata(ch->dev->parent);
struct output *o = data;
+ int err;
+
+ err = pm_runtime_resume_and_get(m->dev);
+ if (err < 0)
+ return err;
mutex_lock(&ch->cdma.lock);
mutex_lock(&debug_lock);
@@ -64,6 +70,8 @@ static int show_channel(struct host1x_channel *ch, void *data, bool show_fifo)
mutex_unlock(&debug_lock);
mutex_unlock(&ch->cdma.lock);
+ pm_runtime_put(m->dev);
+
return 0;
}
@@ -71,9 +79,14 @@ static void show_syncpts(struct host1x *m, struct output *o)
{
struct list_head *pos;
unsigned int i;
+ int err;
host1x_debug_output(o, "---- syncpts ----\n");
+ err = pm_runtime_resume_and_get(m->dev);
+ if (err < 0)
+ return;
+
for (i = 0; i < host1x_syncpt_nb_pts(m); i++) {
u32 max = host1x_syncpt_read_max(m->syncpt + i);
u32 min = host1x_syncpt_load(m->syncpt + i);
@@ -101,6 +114,8 @@ static void show_syncpts(struct host1x *m, struct output *o)
base_val);
}
+ pm_runtime_put(m->dev);
+
host1x_debug_output(o, "\n");
}
diff --git a/drivers/gpu/host1x/dev.c b/drivers/gpu/host1x/dev.c
index fbb6447b8659..6994f8c0e02e 100644
--- a/drivers/gpu/host1x/dev.c
+++ b/drivers/gpu/host1x/dev.c
@@ -6,18 +6,26 @@
*/
#include <linux/clk.h>
+#include <linux/delay.h>
#include <linux/dma-mapping.h>
#include <linux/io.h>
#include <linux/list.h>
#include <linux/module.h>
#include <linux/of_device.h>
#include <linux/of.h>
+#include <linux/pm_runtime.h>
#include <linux/slab.h>
+#include <soc/tegra/common.h>
+
#define CREATE_TRACE_POINTS
#include <trace/events/host1x.h>
#undef CREATE_TRACE_POINTS
+#if IS_ENABLED(CONFIG_ARM_DMA_USE_IOMMU)
+#include <asm/dma-iommu.h>
+#endif
+
#include "bus.h"
#include "channel.h"
#include "debug.h"
@@ -132,6 +140,12 @@ static const struct host1x_sid_entry tegra186_sid_table[] = {
.offset = 0x30,
.limit = 0x34
},
+ {
+ /* NVDEC */
+ .base = 0x1b00,
+ .offset = 0x30,
+ .limit = 0x34
+ },
};
static const struct host1x_info host1x06_info = {
@@ -156,6 +170,18 @@ static const struct host1x_sid_entry tegra194_sid_table[] = {
.offset = 0x30,
.limit = 0x34
},
+ {
+ /* NVDEC */
+ .base = 0x1b00,
+ .offset = 0x30,
+ .limit = 0x34
+ },
+ {
+ /* NVDEC1 */
+ .base = 0x1bc0,
+ .offset = 0x30,
+ .limit = 0x34
+ },
};
static const struct host1x_info host1x07_info = {
@@ -190,6 +216,9 @@ static void host1x_setup_sid_table(struct host1x *host)
const struct host1x_info *info = host->info;
unsigned int i;
+ if (!info->has_hypervisor)
+ return;
+
for (i = 0; i < info->num_sid_entries; i++) {
const struct host1x_sid_entry *entry = &info->sid_table[i];
@@ -238,6 +267,17 @@ static struct iommu_domain *host1x_iommu_attach(struct host1x *host)
struct iommu_domain *domain = iommu_get_domain_for_dev(host->dev);
int err;
+#if IS_ENABLED(CONFIG_ARM_DMA_USE_IOMMU)
+ if (host->dev->archdata.mapping) {
+ struct dma_iommu_mapping *mapping =
+ to_dma_iommu_mapping(host->dev);
+ arm_iommu_detach_device(host->dev);
+ arm_iommu_release_mapping(mapping);
+
+ domain = iommu_get_domain_for_dev(host->dev);
+ }
+#endif
+
/*
* We may not always want to enable IOMMU support (for example if the
* host1x firewall is already enabled and we don't support addressing
@@ -347,6 +387,27 @@ static void host1x_iommu_exit(struct host1x *host)
}
}
+static int host1x_get_resets(struct host1x *host)
+{
+ int err;
+
+ host->resets[0].id = "mc";
+ host->resets[1].id = "host1x";
+ host->nresets = ARRAY_SIZE(host->resets);
+
+ err = devm_reset_control_bulk_get_optional_exclusive_released(
+ host->dev, host->nresets, host->resets);
+ if (err) {
+ dev_err(host->dev, "failed to get reset: %d\n", err);
+ return err;
+ }
+
+ if (WARN_ON(!host->resets[1].rstc))
+ return -ENOENT;
+
+ return 0;
+}
+
static int host1x_probe(struct platform_device *pdev)
{
struct host1x *host;
@@ -386,6 +447,7 @@ static int host1x_probe(struct platform_device *pdev)
if (syncpt_irq < 0)
return syncpt_irq;
+ host1x_bo_cache_init(&host->cache);
mutex_init(&host->devices_lock);
INIT_LIST_HEAD(&host->devices);
INIT_LIST_HEAD(&host->list);
@@ -423,12 +485,9 @@ static int host1x_probe(struct platform_device *pdev)
return err;
}
- host->rst = devm_reset_control_get(&pdev->dev, "host1x");
- if (IS_ERR(host->rst)) {
- err = PTR_ERR(host->rst);
- dev_err(&pdev->dev, "failed to get reset: %d\n", err);
+ err = host1x_get_resets(host);
+ if (err)
return err;
- }
err = host1x_iommu_init(host);
if (err < 0) {
@@ -443,22 +502,10 @@ static int host1x_probe(struct platform_device *pdev)
goto iommu_exit;
}
- err = clk_prepare_enable(host->clk);
- if (err < 0) {
- dev_err(&pdev->dev, "failed to enable clock\n");
- goto free_channels;
- }
-
- err = reset_control_deassert(host->rst);
- if (err < 0) {
- dev_err(&pdev->dev, "failed to deassert reset: %d\n", err);
- goto unprepare_disable;
- }
-
err = host1x_syncpt_init(host);
if (err) {
dev_err(&pdev->dev, "failed to initialize syncpts\n");
- goto reset_assert;
+ goto free_channels;
}
err = host1x_intr_init(host, syncpt_irq);
@@ -467,10 +514,18 @@ static int host1x_probe(struct platform_device *pdev)
goto deinit_syncpt;
}
- host1x_debug_init(host);
+ pm_runtime_enable(&pdev->dev);
+
+ err = devm_tegra_core_dev_init_opp_table_common(&pdev->dev);
+ if (err)
+ goto pm_disable;
- if (host->info->has_hypervisor)
- host1x_setup_sid_table(host);
+ /* the driver's code isn't ready yet for the dynamic RPM */
+ err = pm_runtime_resume_and_get(&pdev->dev);
+ if (err)
+ goto pm_disable;
+
+ host1x_debug_init(host);
err = host1x_register(host);
if (err < 0)
@@ -486,13 +541,14 @@ unregister:
host1x_unregister(host);
deinit_debugfs:
host1x_debug_deinit(host);
+
+ pm_runtime_put_sync_suspend(&pdev->dev);
+pm_disable:
+ pm_runtime_disable(&pdev->dev);
+
host1x_intr_deinit(host);
deinit_syncpt:
host1x_syncpt_deinit(host);
-reset_assert:
- reset_control_assert(host->rst);
-unprepare_disable:
- clk_disable_unprepare(host->clk);
free_channels:
host1x_channel_list_free(&host->channel_list);
iommu_exit:
@@ -507,19 +563,94 @@ static int host1x_remove(struct platform_device *pdev)
host1x_unregister(host);
host1x_debug_deinit(host);
+
+ pm_runtime_force_suspend(&pdev->dev);
+
host1x_intr_deinit(host);
host1x_syncpt_deinit(host);
- reset_control_assert(host->rst);
- clk_disable_unprepare(host->clk);
host1x_iommu_exit(host);
+ host1x_bo_cache_destroy(&host->cache);
return 0;
}
+static int __maybe_unused host1x_runtime_suspend(struct device *dev)
+{
+ struct host1x *host = dev_get_drvdata(dev);
+ int err;
+
+ host1x_intr_stop(host);
+ host1x_syncpt_save(host);
+
+ err = reset_control_bulk_assert(host->nresets, host->resets);
+ if (err) {
+ dev_err(dev, "failed to assert reset: %d\n", err);
+ goto resume_host1x;
+ }
+
+ usleep_range(1000, 2000);
+
+ clk_disable_unprepare(host->clk);
+ reset_control_bulk_release(host->nresets, host->resets);
+
+ return 0;
+
+resume_host1x:
+ host1x_setup_sid_table(host);
+ host1x_syncpt_restore(host);
+ host1x_intr_start(host);
+
+ return err;
+}
+
+static int __maybe_unused host1x_runtime_resume(struct device *dev)
+{
+ struct host1x *host = dev_get_drvdata(dev);
+ int err;
+
+ err = reset_control_bulk_acquire(host->nresets, host->resets);
+ if (err) {
+ dev_err(dev, "failed to acquire reset: %d\n", err);
+ return err;
+ }
+
+ err = clk_prepare_enable(host->clk);
+ if (err) {
+ dev_err(dev, "failed to enable clock: %d\n", err);
+ goto release_reset;
+ }
+
+ err = reset_control_bulk_deassert(host->nresets, host->resets);
+ if (err < 0) {
+ dev_err(dev, "failed to deassert reset: %d\n", err);
+ goto disable_clk;
+ }
+
+ host1x_setup_sid_table(host);
+ host1x_syncpt_restore(host);
+ host1x_intr_start(host);
+
+ return 0;
+
+disable_clk:
+ clk_disable_unprepare(host->clk);
+release_reset:
+ reset_control_bulk_release(host->nresets, host->resets);
+
+ return err;
+}
+
+static const struct dev_pm_ops host1x_pm_ops = {
+ SET_RUNTIME_PM_OPS(host1x_runtime_suspend, host1x_runtime_resume,
+ NULL)
+ /* TODO: add system suspend-resume once driver will be ready for that */
+};
+
static struct platform_driver tegra_host1x_driver = {
.driver = {
.name = "tegra-host1x",
.of_match_table = host1x_of_match,
+ .pm = &host1x_pm_ops,
},
.probe = host1x_probe,
.remove = host1x_remove,
diff --git a/drivers/gpu/host1x/dev.h b/drivers/gpu/host1x/dev.h
index fa6d4bc46e98..ca4b082f0cd4 100644
--- a/drivers/gpu/host1x/dev.h
+++ b/drivers/gpu/host1x/dev.h
@@ -118,7 +118,8 @@ struct host1x {
struct host1x_syncpt_base *bases;
struct device *dev;
struct clk *clk;
- struct reset_control *rst;
+ struct reset_control_bulk_data resets[2];
+ unsigned int nresets;
struct iommu_group *group;
struct iommu_domain *domain;
@@ -149,6 +150,8 @@ struct host1x {
struct list_head list;
struct device_dma_parameters dma_parms;
+
+ struct host1x_bo_cache cache;
};
void host1x_hypervisor_writel(struct host1x *host1x, u32 r, u32 v);
diff --git a/drivers/gpu/host1x/hw/channel_hw.c b/drivers/gpu/host1x/hw/channel_hw.c
index 1999780a7203..6b40e9af1e88 100644
--- a/drivers/gpu/host1x/hw/channel_hw.c
+++ b/drivers/gpu/host1x/hw/channel_hw.c
@@ -159,6 +159,27 @@ static void host1x_channel_set_streamid(struct host1x_channel *channel)
#endif
}
+static void host1x_enable_gather_filter(struct host1x_channel *ch)
+{
+#if HOST1X_HW >= 6
+ struct host1x *host = dev_get_drvdata(ch->dev->parent);
+ u32 val;
+
+ if (!host->hv_regs)
+ return;
+
+ val = host1x_hypervisor_readl(
+ host, HOST1X_HV_CH_KERNEL_FILTER_GBUFFER(ch->id / 32));
+ val |= BIT(ch->id % 32);
+ host1x_hypervisor_writel(
+ host, val, HOST1X_HV_CH_KERNEL_FILTER_GBUFFER(ch->id / 32));
+#elif HOST1X_HW >= 4
+ host1x_ch_writel(ch,
+ HOST1X_CHANNEL_CHANNELCTRL_KERNEL_FILTER_GBUFFER(1),
+ HOST1X_CHANNEL_CHANNELCTRL);
+#endif
+}
+
static int channel_submit(struct host1x_job *job)
{
struct host1x_channel *ch = job->channel;
@@ -190,6 +211,7 @@ static int channel_submit(struct host1x_job *job)
}
host1x_channel_set_streamid(ch);
+ host1x_enable_gather_filter(ch);
/* begin a CDMA submit */
err = host1x_cdma_begin(&ch->cdma, job);
@@ -249,27 +271,6 @@ error:
return err;
}
-static void enable_gather_filter(struct host1x *host,
- struct host1x_channel *ch)
-{
-#if HOST1X_HW >= 6
- u32 val;
-
- if (!host->hv_regs)
- return;
-
- val = host1x_hypervisor_readl(
- host, HOST1X_HV_CH_KERNEL_FILTER_GBUFFER(ch->id / 32));
- val |= BIT(ch->id % 32);
- host1x_hypervisor_writel(
- host, val, HOST1X_HV_CH_KERNEL_FILTER_GBUFFER(ch->id / 32));
-#elif HOST1X_HW >= 4
- host1x_ch_writel(ch,
- HOST1X_CHANNEL_CHANNELCTRL_KERNEL_FILTER_GBUFFER(1),
- HOST1X_CHANNEL_CHANNELCTRL);
-#endif
-}
-
static int host1x_channel_init(struct host1x_channel *ch, struct host1x *dev,
unsigned int index)
{
@@ -278,7 +279,6 @@ static int host1x_channel_init(struct host1x_channel *ch, struct host1x *dev,
#else
ch->regs = dev->regs + index * 0x100;
#endif
- enable_gather_filter(dev, ch);
return 0;
}
diff --git a/drivers/gpu/host1x/intr.c b/drivers/gpu/host1x/intr.c
index 45b6be927ec4..965ba21818b1 100644
--- a/drivers/gpu/host1x/intr.c
+++ b/drivers/gpu/host1x/intr.c
@@ -297,14 +297,11 @@ int host1x_intr_init(struct host1x *host, unsigned int irq_sync)
"host1x_sp_%02u", id);
}
- host1x_intr_start(host);
-
return 0;
}
void host1x_intr_deinit(struct host1x *host)
{
- host1x_intr_stop(host);
}
void host1x_intr_start(struct host1x *host)
diff --git a/drivers/gpu/host1x/job.c b/drivers/gpu/host1x/job.c
index 0eef6df7c89e..5e8c183167b7 100644
--- a/drivers/gpu/host1x/job.c
+++ b/drivers/gpu/host1x/job.c
@@ -134,20 +134,20 @@ EXPORT_SYMBOL(host1x_job_add_wait);
static unsigned int pin_job(struct host1x *host, struct host1x_job *job)
{
+ unsigned long mask = HOST1X_RELOC_READ | HOST1X_RELOC_WRITE;
struct host1x_client *client = job->client;
struct device *dev = client->dev;
struct host1x_job_gather *g;
- struct iommu_domain *domain;
- struct sg_table *sgt;
unsigned int i;
int err;
- domain = iommu_get_domain_for_dev(dev);
job->num_unpins = 0;
for (i = 0; i < job->num_relocs; i++) {
struct host1x_reloc *reloc = &job->relocs[i];
- dma_addr_t phys_addr, *phys;
+ enum dma_data_direction direction;
+ struct host1x_bo_mapping *map;
+ struct host1x_bo *bo;
reloc->target.bo = host1x_bo_get(reloc->target.bo);
if (!reloc->target.bo) {
@@ -155,64 +155,44 @@ static unsigned int pin_job(struct host1x *host, struct host1x_job *job)
goto unpin;
}
- /*
- * If the client device is not attached to an IOMMU, the
- * physical address of the buffer object can be used.
- *
- * Similarly, when an IOMMU domain is shared between all
- * host1x clients, the IOVA is already available, so no
- * need to map the buffer object again.
- *
- * XXX Note that this isn't always safe to do because it
- * relies on an assumption that no cache maintenance is
- * needed on the buffer objects.
- */
- if (!domain || client->group)
- phys = &phys_addr;
- else
- phys = NULL;
-
- sgt = host1x_bo_pin(dev, reloc->target.bo, phys);
- if (IS_ERR(sgt)) {
- err = PTR_ERR(sgt);
- goto unpin;
- }
+ bo = reloc->target.bo;
- if (sgt) {
- unsigned long mask = HOST1X_RELOC_READ |
- HOST1X_RELOC_WRITE;
- enum dma_data_direction dir;
-
- switch (reloc->flags & mask) {
- case HOST1X_RELOC_READ:
- dir = DMA_TO_DEVICE;
- break;
+ switch (reloc->flags & mask) {
+ case HOST1X_RELOC_READ:
+ direction = DMA_TO_DEVICE;
+ break;
- case HOST1X_RELOC_WRITE:
- dir = DMA_FROM_DEVICE;
- break;
+ case HOST1X_RELOC_WRITE:
+ direction = DMA_FROM_DEVICE;
+ break;
- case HOST1X_RELOC_READ | HOST1X_RELOC_WRITE:
- dir = DMA_BIDIRECTIONAL;
- break;
+ case HOST1X_RELOC_READ | HOST1X_RELOC_WRITE:
+ direction = DMA_BIDIRECTIONAL;
+ break;
- default:
- err = -EINVAL;
- goto unpin;
- }
+ default:
+ err = -EINVAL;
+ goto unpin;
+ }
- err = dma_map_sgtable(dev, sgt, dir, 0);
- if (err)
- goto unpin;
+ map = host1x_bo_pin(dev, bo, direction, &client->cache);
+ if (IS_ERR(map)) {
+ err = PTR_ERR(map);
+ goto unpin;
+ }
- job->unpins[job->num_unpins].dev = dev;
- job->unpins[job->num_unpins].dir = dir;
- phys_addr = sg_dma_address(sgt->sgl);
+ /*
+ * host1x clients are generally not able to do scatter-gather themselves, so fail
+ * if the buffer is discontiguous and we fail to map its SG table to a single
+ * contiguous chunk of I/O virtual memory.
+ */
+ if (map->chunks > 1) {
+ err = -EINVAL;
+ goto unpin;
}
- job->addr_phys[job->num_unpins] = phys_addr;
- job->unpins[job->num_unpins].bo = reloc->target.bo;
- job->unpins[job->num_unpins].sgt = sgt;
+ job->addr_phys[job->num_unpins] = map->phys;
+ job->unpins[job->num_unpins].map = map;
job->num_unpins++;
}
@@ -224,12 +204,11 @@ static unsigned int pin_job(struct host1x *host, struct host1x_job *job)
return 0;
for (i = 0; i < job->num_cmds; i++) {
+ struct host1x_bo_mapping *map;
size_t gather_size = 0;
struct scatterlist *sg;
- dma_addr_t phys_addr;
unsigned long shift;
struct iova *alloc;
- dma_addr_t *phys;
unsigned int j;
if (job->cmds[i].is_wait)
@@ -243,25 +222,16 @@ static unsigned int pin_job(struct host1x *host, struct host1x_job *job)
goto unpin;
}
- /**
- * If the host1x is not attached to an IOMMU, there is no need
- * to map the buffer object for the host1x, since the physical
- * address can simply be used.
- */
- if (!iommu_get_domain_for_dev(host->dev))
- phys = &phys_addr;
- else
- phys = NULL;
-
- sgt = host1x_bo_pin(host->dev, g->bo, phys);
- if (IS_ERR(sgt)) {
- err = PTR_ERR(sgt);
- goto put;
+ map = host1x_bo_pin(host->dev, g->bo, DMA_TO_DEVICE, &host->cache);
+ if (IS_ERR(map)) {
+ err = PTR_ERR(map);
+ goto unpin;
}
if (host->domain) {
- for_each_sgtable_sg(sgt, sg, j)
+ for_each_sgtable_sg(map->sgt, sg, j)
gather_size += sg->length;
+
gather_size = iova_align(&host->iova, gather_size);
shift = iova_shift(&host->iova);
@@ -272,33 +242,23 @@ static unsigned int pin_job(struct host1x *host, struct host1x_job *job)
goto put;
}
- err = iommu_map_sgtable(host->domain,
- iova_dma_addr(&host->iova, alloc),
- sgt, IOMMU_READ);
+ err = iommu_map_sgtable(host->domain, iova_dma_addr(&host->iova, alloc),
+ map->sgt, IOMMU_READ);
if (err == 0) {
__free_iova(&host->iova, alloc);
err = -EINVAL;
goto put;
}
- job->unpins[job->num_unpins].size = gather_size;
- phys_addr = iova_dma_addr(&host->iova, alloc);
- } else if (sgt) {
- err = dma_map_sgtable(host->dev, sgt, DMA_TO_DEVICE, 0);
- if (err)
- goto put;
-
- job->unpins[job->num_unpins].dir = DMA_TO_DEVICE;
- job->unpins[job->num_unpins].dev = host->dev;
- phys_addr = sg_dma_address(sgt->sgl);
+ map->phys = iova_dma_addr(&host->iova, alloc);
+ map->size = gather_size;
}
- job->addr_phys[job->num_unpins] = phys_addr;
- job->gather_addr_phys[i] = phys_addr;
-
- job->unpins[job->num_unpins].bo = g->bo;
- job->unpins[job->num_unpins].sgt = sgt;
+ job->addr_phys[job->num_unpins] = map->phys;
+ job->unpins[job->num_unpins].map = map;
job->num_unpins++;
+
+ job->gather_addr_phys[i] = map->phys;
}
return 0;
@@ -690,22 +650,16 @@ void host1x_job_unpin(struct host1x_job *job)
unsigned int i;
for (i = 0; i < job->num_unpins; i++) {
- struct host1x_job_unpin_data *unpin = &job->unpins[i];
- struct device *dev = unpin->dev ?: host->dev;
- struct sg_table *sgt = unpin->sgt;
-
- if (!job->enable_firewall && unpin->size && host->domain) {
- iommu_unmap(host->domain, job->addr_phys[i],
- unpin->size);
- free_iova(&host->iova,
- iova_pfn(&host->iova, job->addr_phys[i]));
- }
+ struct host1x_bo_mapping *map = job->unpins[i].map;
+ struct host1x_bo *bo = map->bo;
- if (unpin->dev && sgt)
- dma_unmap_sgtable(unpin->dev, sgt, unpin->dir, 0);
+ if (!job->enable_firewall && map->size && host->domain) {
+ iommu_unmap(host->domain, job->addr_phys[i], map->size);
+ free_iova(&host->iova, iova_pfn(&host->iova, job->addr_phys[i]));
+ }
- host1x_bo_unpin(dev, unpin->bo, sgt);
- host1x_bo_put(unpin->bo);
+ host1x_bo_unpin(map);
+ host1x_bo_put(bo);
}
job->num_unpins = 0;
diff --git a/drivers/gpu/host1x/job.h b/drivers/gpu/host1x/job.h
index b4428c5495c9..dad5a1946693 100644
--- a/drivers/gpu/host1x/job.h
+++ b/drivers/gpu/host1x/job.h
@@ -35,11 +35,7 @@ struct host1x_job_cmd {
};
struct host1x_job_unpin_data {
- struct host1x_bo *bo;
- struct sg_table *sgt;
- struct device *dev;
- size_t size;
- enum dma_data_direction dir;
+ struct host1x_bo_mapping *map;
};
/*
diff --git a/drivers/gpu/host1x/syncpt.c b/drivers/gpu/host1x/syncpt.c
index d198a10848c6..e08e331e46ae 100644
--- a/drivers/gpu/host1x/syncpt.c
+++ b/drivers/gpu/host1x/syncpt.c
@@ -143,6 +143,8 @@ void host1x_syncpt_restore(struct host1x *host)
for (i = 0; i < host1x_syncpt_nb_bases(host); i++)
host1x_hw_syncpt_restore_wait_base(host, sp_base + i);
+ host1x_hw_syncpt_enable_protection(host);
+
wmb();
}
@@ -366,9 +368,6 @@ int host1x_syncpt_init(struct host1x *host)
host->syncpt = syncpt;
host->bases = bases;
- host1x_syncpt_restore(host);
- host1x_hw_syncpt_enable_protection(host);
-
/* Allocate sync point to use for clearing waits for expired fences */
host->nop_sp = host1x_syncpt_alloc(host, 0, "reserved-nop");
if (!host->nop_sp)
diff --git a/drivers/soc/tegra/common.c b/drivers/soc/tegra/common.c
index cd33e99249c3..35c882da55fc 100644
--- a/drivers/soc/tegra/common.c
+++ b/drivers/soc/tegra/common.c
@@ -10,6 +10,7 @@
#include <linux/export.h>
#include <linux/of.h>
#include <linux/pm_opp.h>
+#include <linux/pm_runtime.h>
#include <soc/tegra/common.h>
#include <soc/tegra/fuse.h>
@@ -43,6 +44,7 @@ static int tegra_core_dev_init_opp_state(struct device *dev)
{
unsigned long rate;
struct clk *clk;
+ bool rpm_enabled;
int err;
clk = devm_clk_get(dev, NULL);
@@ -57,8 +59,31 @@ static int tegra_core_dev_init_opp_state(struct device *dev)
return -EINVAL;
}
+ /*
+ * Runtime PM of the device must be enabled in order to set up
+ * GENPD's performance properly because GENPD core checks whether
+ * device is suspended and this check doesn't work while RPM is
+ * disabled. This makes sure the OPP vote below gets cached in
+ * GENPD for the device. Instead, the vote is done the next time
+ * the device gets runtime resumed.
+ */
+ rpm_enabled = pm_runtime_enabled(dev);
+ if (!rpm_enabled)
+ pm_runtime_enable(dev);
+
+ /* should never happen in practice */
+ if (!pm_runtime_enabled(dev)) {
+ dev_WARN(dev, "failed to enable runtime PM\n");
+ pm_runtime_disable(dev);
+ return -EINVAL;
+ }
+
/* first dummy rate-setting initializes voltage vote */
err = dev_pm_opp_set_rate(dev, rate);
+
+ if (!rpm_enabled)
+ pm_runtime_disable(dev);
+
if (err) {
dev_err(dev, "failed to initialize OPP clock: %d\n", err);
return err;
diff --git a/include/linux/host1x.h b/include/linux/host1x.h
index 7bccf589aba7..e8dc5bc41f79 100644
--- a/include/linux/host1x.h
+++ b/include/linux/host1x.h
@@ -7,6 +7,8 @@
#define __LINUX_HOST1X_H
#include <linux/device.h>
+#include <linux/dma-direction.h>
+#include <linux/spinlock.h>
#include <linux/types.h>
enum host1x_class {
@@ -15,6 +17,8 @@ enum host1x_class {
HOST1X_CLASS_GR2D_SB = 0x52,
HOST1X_CLASS_VIC = 0x5D,
HOST1X_CLASS_GR3D = 0x60,
+ HOST1X_CLASS_NVDEC = 0xF0,
+ HOST1X_CLASS_NVDEC1 = 0xF5,
};
struct host1x;
@@ -24,6 +28,28 @@ struct iommu_group;
u64 host1x_get_dma_mask(struct host1x *host1x);
/**
+ * struct host1x_bo_cache - host1x buffer object cache
+ * @mappings: list of mappings
+ * @lock: synchronizes accesses to the list of mappings
+ */
+struct host1x_bo_cache {
+ struct list_head mappings;
+ struct mutex lock;
+};
+
+static inline void host1x_bo_cache_init(struct host1x_bo_cache *cache)
+{
+ INIT_LIST_HEAD(&cache->mappings);
+ mutex_init(&cache->lock);
+}
+
+static inline void host1x_bo_cache_destroy(struct host1x_bo_cache *cache)
+{
+ /* XXX warn if not empty? */
+ mutex_destroy(&cache->lock);
+}
+
+/**
* struct host1x_client_ops - host1x client operations
* @early_init: host1x client early initialization code
* @init: host1x client initialization code
@@ -73,6 +99,8 @@ struct host1x_client {
struct host1x_client *parent;
unsigned int usecount;
struct mutex lock;
+
+ struct host1x_bo_cache cache;
};
/*
@@ -82,23 +110,48 @@ struct host1x_client {
struct host1x_bo;
struct sg_table;
+struct host1x_bo_mapping {
+ struct kref ref;
+ struct dma_buf_attachment *attach;
+ enum dma_data_direction direction;
+ struct list_head list;
+ struct host1x_bo *bo;
+ struct sg_table *sgt;
+ unsigned int chunks;
+ struct device *dev;
+ dma_addr_t phys;
+ size_t size;
+
+ struct host1x_bo_cache *cache;
+ struct list_head entry;
+};
+
+static inline struct host1x_bo_mapping *to_host1x_bo_mapping(struct kref *ref)
+{
+ return container_of(ref, struct host1x_bo_mapping, ref);
+}
+
struct host1x_bo_ops {
struct host1x_bo *(*get)(struct host1x_bo *bo);
void (*put)(struct host1x_bo *bo);
- struct sg_table *(*pin)(struct device *dev, struct host1x_bo *bo,
- dma_addr_t *phys);
- void (*unpin)(struct device *dev, struct sg_table *sgt);
+ struct host1x_bo_mapping *(*pin)(struct device *dev, struct host1x_bo *bo,
+ enum dma_data_direction dir);
+ void (*unpin)(struct host1x_bo_mapping *map);
void *(*mmap)(struct host1x_bo *bo);
void (*munmap)(struct host1x_bo *bo, void *addr);
};
struct host1x_bo {
const struct host1x_bo_ops *ops;
+ struct list_head mappings;
+ spinlock_t lock;
};
static inline void host1x_bo_init(struct host1x_bo *bo,
const struct host1x_bo_ops *ops)
{
+ INIT_LIST_HEAD(&bo->mappings);
+ spin_lock_init(&bo->lock);
bo->ops = ops;
}
@@ -112,18 +165,10 @@ static inline void host1x_bo_put(struct host1x_bo *bo)
bo->ops->put(bo);
}
-static inline struct sg_table *host1x_bo_pin(struct device *dev,
- struct host1x_bo *bo,
- dma_addr_t *phys)
-{
- return bo->ops->pin(dev, bo, phys);
-}
-
-static inline void host1x_bo_unpin(struct device *dev, struct host1x_bo *bo,
- struct sg_table *sgt)
-{
- bo->ops->unpin(dev, sgt);
-}
+struct host1x_bo_mapping *host1x_bo_pin(struct device *dev, struct host1x_bo *bo,
+ enum dma_data_direction dir,
+ struct host1x_bo_cache *cache);
+void host1x_bo_unpin(struct host1x_bo_mapping *map);
static inline void *host1x_bo_mmap(struct host1x_bo *bo)
{
@@ -181,6 +226,7 @@ struct host1x_job;
struct host1x_channel *host1x_channel_request(struct host1x_client *client);
struct host1x_channel *host1x_channel_get(struct host1x_channel *channel);
+void host1x_channel_stop(struct host1x_channel *channel);
void host1x_channel_put(struct host1x_channel *channel);
int host1x_job_submit(struct host1x_job *job);
diff --git a/include/soc/tegra/common.h b/include/soc/tegra/common.h
index af41ad80ec21..8ec1ac07fc85 100644
--- a/include/soc/tegra/common.h
+++ b/include/soc/tegra/common.h
@@ -39,4 +39,19 @@ devm_tegra_core_dev_init_opp_table(struct device *dev,
}
#endif
+static inline int
+devm_tegra_core_dev_init_opp_table_common(struct device *dev)
+{
+ struct tegra_core_opp_params opp_params = {};
+ int err;
+
+ opp_params.init_state = true;
+
+ err = devm_tegra_core_dev_init_opp_table(dev, &opp_params);
+ if (err != -ENODEV)
+ return err;
+
+ return 0;
+}
+
#endif /* __SOC_TEGRA_COMMON_H__ */