linux/drivers/gpu/drm/vc4/vc4_drv.c

408 lines
10 KiB
C
Raw Normal View History

/*
* Copyright (C) 2014-2015 Broadcom
* Copyright (C) 2013 Red Hat
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/
/**
* DOC: Broadcom VC4 Graphics Driver
*
* The Broadcom VideoCore 4 (present in the Raspberry Pi) contains a
* OpenGL ES 2.0-compatible 3D engine called V3D, and a highly
* configurable display output pipeline that supports HDMI, DSI, DPI,
* and Composite TV output.
*
* The 3D engine also has an interface for submitting arbitrary
* compute shader-style jobs using the same shader processor as is
* used for vertex and fragment shaders in GLES 2.0. However, given
* that the hardware isn't able to expose any standard interfaces like
* OpenGL compute shaders or OpenCL, it isn't supported by this
* driver.
*/
#include <linux/clk.h>
#include <linux/component.h>
#include <linux/device.h>
#include <linux/io.h>
#include <linux/module.h>
#include <linux/of_platform.h>
#include <linux/platform_device.h>
#include <linux/pm_runtime.h>
#include <drm/drm_fb_cma_helper.h>
#include <drm/drm_fb_helper.h>
#include "uapi/drm/vc4_drm.h"
#include "vc4_drv.h"
#include "vc4_regs.h"
#define DRIVER_NAME "vc4"
#define DRIVER_DESC "Broadcom VC4 graphics"
#define DRIVER_DATE "20140616"
#define DRIVER_MAJOR 0
#define DRIVER_MINOR 0
#define DRIVER_PATCHLEVEL 0
/* Helper function for mapping the regs on a platform device. */
void __iomem *vc4_ioremap_regs(struct platform_device *dev, int index)
{
struct resource *res;
void __iomem *map;
res = platform_get_resource(dev, IORESOURCE_MEM, index);
map = devm_ioremap_resource(&dev->dev, res);
if (IS_ERR(map)) {
DRM_ERROR("Failed to map registers: %ld\n", PTR_ERR(map));
return map;
}
return map;
}
static int vc4_get_param_ioctl(struct drm_device *dev, void *data,
struct drm_file *file_priv)
{
struct vc4_dev *vc4 = to_vc4_dev(dev);
struct drm_vc4_get_param *args = data;
int ret;
if (args->pad != 0)
return -EINVAL;
switch (args->param) {
case DRM_VC4_PARAM_V3D_IDENT0:
ret = pm_runtime_get_sync(&vc4->v3d->pdev->dev);
if (ret < 0)
return ret;
args->value = V3D_READ(V3D_IDENT0);
pm_runtime_mark_last_busy(&vc4->v3d->pdev->dev);
pm_runtime_put_autosuspend(&vc4->v3d->pdev->dev);
break;
case DRM_VC4_PARAM_V3D_IDENT1:
ret = pm_runtime_get_sync(&vc4->v3d->pdev->dev);
if (ret < 0)
return ret;
args->value = V3D_READ(V3D_IDENT1);
pm_runtime_mark_last_busy(&vc4->v3d->pdev->dev);
pm_runtime_put_autosuspend(&vc4->v3d->pdev->dev);
break;
case DRM_VC4_PARAM_V3D_IDENT2:
ret = pm_runtime_get_sync(&vc4->v3d->pdev->dev);
if (ret < 0)
return ret;
args->value = V3D_READ(V3D_IDENT2);
pm_runtime_mark_last_busy(&vc4->v3d->pdev->dev);
pm_runtime_put_autosuspend(&vc4->v3d->pdev->dev);
break;
case DRM_VC4_PARAM_SUPPORTS_BRANCHES:
case DRM_VC4_PARAM_SUPPORTS_ETC1:
case DRM_VC4_PARAM_SUPPORTS_THREADED_FS:
case DRM_VC4_PARAM_SUPPORTS_FIXED_RCL_ORDER:
case DRM_VC4_PARAM_SUPPORTS_MADVISE:
case DRM_VC4_PARAM_SUPPORTS_PERFMON:
args->value = true;
break;
default:
DRM_DEBUG("Unknown parameter %d\n", args->param);
return -EINVAL;
}
return 0;
}
static int vc4_open(struct drm_device *dev, struct drm_file *file)
{
struct vc4_file *vc4file;
vc4file = kzalloc(sizeof(*vc4file), GFP_KERNEL);
if (!vc4file)
return -ENOMEM;
vc4_perfmon_open_file(vc4file);
file->driver_priv = vc4file;
return 0;
}
static void vc4_close(struct drm_device *dev, struct drm_file *file)
{
struct vc4_file *vc4file = file->driver_priv;
vc4_perfmon_close_file(vc4file);
}
static const struct vm_operations_struct vc4_vm_ops = {
.fault = vc4_fault,
.open = drm_gem_vm_open,
.close = drm_gem_vm_close,
};
static const struct file_operations vc4_drm_fops = {
.owner = THIS_MODULE,
.open = drm_open,
.release = drm_release,
.unlocked_ioctl = drm_ioctl,
.mmap = vc4_mmap,
.poll = drm_poll,
.read = drm_read,
.compat_ioctl = drm_compat_ioctl,
.llseek = noop_llseek,
};
static const struct drm_ioctl_desc vc4_drm_ioctls[] = {
DRM_IOCTL_DEF_DRV(VC4_SUBMIT_CL, vc4_submit_cl_ioctl, DRM_RENDER_ALLOW),
DRM_IOCTL_DEF_DRV(VC4_WAIT_SEQNO, vc4_wait_seqno_ioctl, DRM_RENDER_ALLOW),
DRM_IOCTL_DEF_DRV(VC4_WAIT_BO, vc4_wait_bo_ioctl, DRM_RENDER_ALLOW),
DRM_IOCTL_DEF_DRV(VC4_CREATE_BO, vc4_create_bo_ioctl, DRM_RENDER_ALLOW),
DRM_IOCTL_DEF_DRV(VC4_MMAP_BO, vc4_mmap_bo_ioctl, DRM_RENDER_ALLOW),
DRM_IOCTL_DEF_DRV(VC4_CREATE_SHADER_BO, vc4_create_shader_bo_ioctl, DRM_RENDER_ALLOW),
DRM_IOCTL_DEF_DRV(VC4_GET_HANG_STATE, vc4_get_hang_state_ioctl,
DRM_ROOT_ONLY),
DRM_IOCTL_DEF_DRV(VC4_GET_PARAM, vc4_get_param_ioctl, DRM_RENDER_ALLOW),
DRM_IOCTL_DEF_DRV(VC4_SET_TILING, vc4_set_tiling_ioctl, DRM_RENDER_ALLOW),
DRM_IOCTL_DEF_DRV(VC4_GET_TILING, vc4_get_tiling_ioctl, DRM_RENDER_ALLOW),
drm/vc4: Add an ioctl for labeling GEM BOs for summary stats This has proven immensely useful for debugging memory leaks and overallocation (which is a rather serious concern on the platform, given that we typically run at about 256MB of CMA out of up to 1GB total memory, with framebuffers that are about 8MB ecah). The state of the art without this is to dump debug logs from every GL application, guess as to kernel allocations based on bo_stats, and try to merge that all together into a global picture of memory allocation state. With this, you can add a couple of calls to the debug build of the 3D driver and get a pretty detailed view of GPU memory usage from /debug/dri/0/bo_stats (or when we debug print to dmesg on allocation failure). The Mesa side currently labels at the gallium resource level (so you see that a 1920x20 pixmap has been created, presumably for the window system panel), but we could extend that to be even more useful with glObjectLabel() names being sent all the way down to the kernel. (partial) example of sorted debugfs output with Mesa labeling all resources: kernel BO cache: 16392kb BOs (3) tiling shadow 1920x1080: 8160kb BOs (1) resource 1920x1080@32/0: 8160kb BOs (1) scanout resource 1920x1080@32/0: 8100kb BOs (1) kernel: 8100kb BOs (1) v2: Use strndup_user(), use lockdep assertion instead of just a comment, fix an array[-1] reference, extend comment about name freeing. Signed-off-by: Eric Anholt <eric@anholt.net> Link: https://patchwork.freedesktop.org/patch/msgid/20170725182718.31468-2-eric@anholt.net Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
2017-07-25 18:27:17 +00:00
DRM_IOCTL_DEF_DRV(VC4_LABEL_BO, vc4_label_bo_ioctl, DRM_RENDER_ALLOW),
DRM_IOCTL_DEF_DRV(VC4_GEM_MADVISE, vc4_gem_madvise_ioctl, DRM_RENDER_ALLOW),
DRM_IOCTL_DEF_DRV(VC4_PERFMON_CREATE, vc4_perfmon_create_ioctl, DRM_RENDER_ALLOW),
DRM_IOCTL_DEF_DRV(VC4_PERFMON_DESTROY, vc4_perfmon_destroy_ioctl, DRM_RENDER_ALLOW),
DRM_IOCTL_DEF_DRV(VC4_PERFMON_GET_VALUES, vc4_perfmon_get_values_ioctl, DRM_RENDER_ALLOW),
};
static struct drm_driver vc4_drm_driver = {
.driver_features = (DRIVER_MODESET |
DRIVER_ATOMIC |
DRIVER_GEM |
DRIVER_HAVE_IRQ |
DRIVER_RENDER |
DRIVER_PRIME),
.lastclose = drm_fb_helper_lastclose,
.open = vc4_open,
.postclose = vc4_close,
.irq_handler = vc4_irq,
.irq_preinstall = vc4_irq_preinstall,
.irq_postinstall = vc4_irq_postinstall,
.irq_uninstall = vc4_irq_uninstall,
drm/vc4: Implement precise vblank timestamping. Precise vblank timestamping is implemented via the usual scanout position based method. On VC4 the pixelvalves PV do not have a scanout position register. Only the hardware video scaler HVS has a similar register which describes which scanline for the output is currently composited and stored in the HVS fifo for later consumption by the PV. This causes a problem in that the HVS runs at a much faster clock (system clock / audio gate) than the PV which runs at video mode dot clock, so the unless the fifo between HVS and PV is full, the HVS will progress faster in its observable read line position than video scan rate, so the HVS position reading can't be directly translated into a scanout position for timestamp correction. Additionally when the PV is in vblank, it doesn't consume from the fifo, so the fifo gets full very quickly and then the HVS stops compositing until the PV enters active scanout and starts consuming scanlines from the fifo again, making new space for the HVS to composite. Therefore a simple translation of HVS read position into elapsed time since (or to) start of active scanout does not work, but for the most interesting cases we can still get useful and sufficiently accurate results: 1. The PV enters active scanout of a new frame with the fifo of the HVS completely full, and the HVS can refill any fifo line which gets consumed and thereby freed up by the PV during active scanout very quickly. Therefore the PV and HVS work effectively in lock-step during active scanout with the fifo never having more than 1 scanline freed up by the PV before it gets refilled. The PV's real scanout position is therefore trailing the HVS compositing position as scanoutpos = hvspos - fifosize and we can get the true scanoutpos as HVS readpos minus fifo size, so precise timestamping works while in active scanout, except for the last few scanlines of the frame, when the HVS reaches end of frame, stops compositing and the PV catches up and drains the fifo. This special case would only introduce minor errors though. 2. If we are in vblank, then we can only guess something reasonable. If called from vblank irq, we assume the irq is usually dispatched with minimum delay, so we can take a timestamp taken at entry into the vblank irq handler as a baseline and then add a full vblank duration until the guessed start of active scanout. As irq dispatch is usually pretty low latency this works with relatively low jitter and good results. If we aren't called from vblank then we could be anywhere within the vblank interval, so we return a neutral result, simply the current system timestamp, and hope for the best. Measurement shows the generated timestamps to be rather precise, and at least never off more than 1 vblank duration worst-case. Limitations: Doesn't work well yet for interlaced video modes, therefore disabled in interlaced mode for now. v2: Use the DISPBASE registers to determine the FIFO size (changes by anholt) Signed-off-by: Mario Kleiner <mario.kleiner.de@gmail.com> Signed-off-by: Eric Anholt <eric@anholt.net> Reviewed-and-tested-by: Mario Kleiner <mario.kleiner.de@gmail.com> (v2)
2016-06-23 06:17:50 +00:00
.get_scanout_position = vc4_crtc_get_scanoutpos,
drm/vblank: drop the mode argument from drm_calc_vbltimestamp_from_scanoutpos If we restrict this helper to only kms drivers (which is the case) we can look up the correct mode easily ourselves. But it's a bit tricky: - All legacy drivers look at crtc->hwmode. But that is updated already at the beginning of the modeset helper, which means when we disable a pipe. Hence the final timestamps might be a bit off. But since this is an existing bug I'm not going to change it, but just try to be bug-for-bug compatible with the current code. This only applies to radeon&amdgpu. - i915 tries to get it perfect by updating crtc->hwmode when the pipe is off (i.e. vblank->enabled = false). - All other atomic drivers look at crtc->state->adjusted_mode. Those that look at state->requested_mode simply don't adjust their mode, so it's the same. That has two problems: Accessing crtc->state from interrupt handling code is unsafe, and it's updated before we shut down the pipe. For nonblocking modesets it's even worse. For atomic drivers try to implement what i915 does. To do that we add a new hwmode field to the vblank structure, and update it from drm_calc_timestamping_constants(). For atomic drivers that's called from the right spot by the helper library already, so all fine. But for safety let's enforce that. For legacy driver this function is only called at the end (oh the fun), which is broken, so again let's not bother and just stay bug-for-bug compatible. The benefit is that we can use drm_calc_vbltimestamp_from_scanoutpos directly to implement ->get_vblank_timestamp in every driver, deleting a lot of code. v2: Completely new approach, trying to mimick the i915 solution. v3: Fixup kerneldoc. v4: Drop the WARN_ON to check that the vblank is off, atomic helpers currently unconditionally call this. Recomputing the same stuff should be harmless. v5: Fix typos and move misplaced hunks to the right patches (Neil). v6: Undo hunk movement (kbuild). Cc: Mario Kleiner <mario.kleiner@tuebingen.mpg.de> Cc: Eric Anholt <eric@anholt.net> Cc: Rob Clark <robdclark@gmail.com> Cc: linux-arm-msm@vger.kernel.org Cc: freedreno@lists.freedesktop.org Cc: Alex Deucher <alexander.deucher@amd.com> Cc: Christian König <christian.koenig@amd.com> Cc: Ben Skeggs <bskeggs@redhat.com> Reviewed-by: Neil Armstrong <narmstrong@baylibre.com> Acked-by: Ville Syrjälä <ville.syrjala@linux.intel.com> Signed-off-by: Daniel Vetter <daniel.vetter@intel.com> Link: http://patchwork.freedesktop.org/patch/msgid/20170509140329.24114-4-daniel.vetter@ffwll.ch
2017-05-09 14:03:28 +00:00
.get_vblank_timestamp = drm_calc_vbltimestamp_from_scanoutpos,
#if defined(CONFIG_DEBUG_FS)
.debugfs_init = vc4_debugfs_init,
#endif
.gem_create_object = vc4_create_object,
.gem_free_object_unlocked = vc4_free_object,
.gem_vm_ops = &vc4_vm_ops,
.prime_handle_to_fd = drm_gem_prime_handle_to_fd,
.prime_fd_to_handle = drm_gem_prime_fd_to_handle,
.gem_prime_import = drm_gem_prime_import,
.gem_prime_export = vc4_prime_export,
.gem_prime_res_obj = vc4_prime_res_obj,
.gem_prime_get_sg_table = drm_gem_cma_prime_get_sg_table,
.gem_prime_import_sg_table = vc4_prime_import_sg_table,
.gem_prime_vmap = vc4_prime_vmap,
.gem_prime_vunmap = drm_gem_cma_prime_vunmap,
.gem_prime_mmap = vc4_prime_mmap,
.dumb_create = vc4_dumb_create,
.ioctls = vc4_drm_ioctls,
.num_ioctls = ARRAY_SIZE(vc4_drm_ioctls),
.fops = &vc4_drm_fops,
.name = DRIVER_NAME,
.desc = DRIVER_DESC,
.date = DRIVER_DATE,
.major = DRIVER_MAJOR,
.minor = DRIVER_MINOR,
.patchlevel = DRIVER_PATCHLEVEL,
};
static int compare_dev(struct device *dev, void *data)
{
return dev == data;
}
static void vc4_match_add_drivers(struct device *dev,
struct component_match **match,
struct platform_driver *const *drivers,
int count)
{
int i;
for (i = 0; i < count; i++) {
struct device_driver *drv = &drivers[i]->driver;
struct device *p = NULL, *d;
while ((d = bus_find_device(&platform_bus_type, p, drv,
(void *)platform_bus_type.match))) {
put_device(p);
component_match_add(dev, match, compare_dev, d);
p = d;
}
put_device(p);
}
}
static void vc4_kick_out_firmware_fb(void)
{
struct apertures_struct *ap;
ap = alloc_apertures(1);
if (!ap)
return;
/* Since VC4 is a UMA device, the simplefb node may have been
* located anywhere in memory.
*/
ap->ranges[0].base = 0;
ap->ranges[0].size = ~0;
drm_fb_helper_remove_conflicting_framebuffers(ap, "vc4drmfb", false);
kfree(ap);
}
static int vc4_drm_bind(struct device *dev)
{
struct platform_device *pdev = to_platform_device(dev);
struct drm_device *drm;
struct vc4_dev *vc4;
int ret = 0;
dev->coherent_dma_mask = DMA_BIT_MASK(32);
vc4 = devm_kzalloc(dev, sizeof(*vc4), GFP_KERNEL);
if (!vc4)
return -ENOMEM;
drm = drm_dev_alloc(&vc4_drm_driver, dev);
if (IS_ERR(drm))
return PTR_ERR(drm);
platform_set_drvdata(pdev, drm);
vc4->dev = drm;
drm->dev_private = vc4;
drm/vc4: Add an ioctl for labeling GEM BOs for summary stats This has proven immensely useful for debugging memory leaks and overallocation (which is a rather serious concern on the platform, given that we typically run at about 256MB of CMA out of up to 1GB total memory, with framebuffers that are about 8MB ecah). The state of the art without this is to dump debug logs from every GL application, guess as to kernel allocations based on bo_stats, and try to merge that all together into a global picture of memory allocation state. With this, you can add a couple of calls to the debug build of the 3D driver and get a pretty detailed view of GPU memory usage from /debug/dri/0/bo_stats (or when we debug print to dmesg on allocation failure). The Mesa side currently labels at the gallium resource level (so you see that a 1920x20 pixmap has been created, presumably for the window system panel), but we could extend that to be even more useful with glObjectLabel() names being sent all the way down to the kernel. (partial) example of sorted debugfs output with Mesa labeling all resources: kernel BO cache: 16392kb BOs (3) tiling shadow 1920x1080: 8160kb BOs (1) resource 1920x1080@32/0: 8160kb BOs (1) scanout resource 1920x1080@32/0: 8100kb BOs (1) kernel: 8100kb BOs (1) v2: Use strndup_user(), use lockdep assertion instead of just a comment, fix an array[-1] reference, extend comment about name freeing. Signed-off-by: Eric Anholt <eric@anholt.net> Link: https://patchwork.freedesktop.org/patch/msgid/20170725182718.31468-2-eric@anholt.net Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
2017-07-25 18:27:17 +00:00
ret = vc4_bo_cache_init(drm);
if (ret)
goto dev_unref;
drm_mode_config_init(drm);
vc4_gem_init(drm);
ret = component_bind_all(dev, drm);
if (ret)
goto gem_destroy;
vc4_kick_out_firmware_fb();
ret = drm_dev_register(drm, 0);
if (ret < 0)
goto unbind_all;
vc4_kms_load(drm);
return 0;
unbind_all:
component_unbind_all(dev, drm);
gem_destroy:
vc4_gem_destroy(drm);
vc4_bo_cache_destroy(drm);
drm/vc4: Add an ioctl for labeling GEM BOs for summary stats This has proven immensely useful for debugging memory leaks and overallocation (which is a rather serious concern on the platform, given that we typically run at about 256MB of CMA out of up to 1GB total memory, with framebuffers that are about 8MB ecah). The state of the art without this is to dump debug logs from every GL application, guess as to kernel allocations based on bo_stats, and try to merge that all together into a global picture of memory allocation state. With this, you can add a couple of calls to the debug build of the 3D driver and get a pretty detailed view of GPU memory usage from /debug/dri/0/bo_stats (or when we debug print to dmesg on allocation failure). The Mesa side currently labels at the gallium resource level (so you see that a 1920x20 pixmap has been created, presumably for the window system panel), but we could extend that to be even more useful with glObjectLabel() names being sent all the way down to the kernel. (partial) example of sorted debugfs output with Mesa labeling all resources: kernel BO cache: 16392kb BOs (3) tiling shadow 1920x1080: 8160kb BOs (1) resource 1920x1080@32/0: 8160kb BOs (1) scanout resource 1920x1080@32/0: 8100kb BOs (1) kernel: 8100kb BOs (1) v2: Use strndup_user(), use lockdep assertion instead of just a comment, fix an array[-1] reference, extend comment about name freeing. Signed-off-by: Eric Anholt <eric@anholt.net> Link: https://patchwork.freedesktop.org/patch/msgid/20170725182718.31468-2-eric@anholt.net Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
2017-07-25 18:27:17 +00:00
dev_unref:
drm_dev_unref(drm);
return ret;
}
static void vc4_drm_unbind(struct device *dev)
{
struct platform_device *pdev = to_platform_device(dev);
struct drm_device *drm = platform_get_drvdata(pdev);
drm_dev_unregister(drm);
drm_fb_cma_fbdev_fini(drm);
drm_mode_config_cleanup(drm);
drm_dev_unref(drm);
}
static const struct component_master_ops vc4_drm_ops = {
.bind = vc4_drm_bind,
.unbind = vc4_drm_unbind,
};
static struct platform_driver *const component_drivers[] = {
&vc4_hdmi_driver,
&vc4_vec_driver,
&vc4_dpi_driver,
&vc4_dsi_driver,
&vc4_hvs_driver,
&vc4_crtc_driver,
&vc4_v3d_driver,
};
static int vc4_platform_drm_probe(struct platform_device *pdev)
{
struct component_match *match = NULL;
struct device *dev = &pdev->dev;
vc4_match_add_drivers(dev, &match,
component_drivers, ARRAY_SIZE(component_drivers));
return component_master_add_with_match(dev, &vc4_drm_ops, match);
}
static int vc4_platform_drm_remove(struct platform_device *pdev)
{
component_master_del(&pdev->dev, &vc4_drm_ops);
return 0;
}
static const struct of_device_id vc4_of_match[] = {
{ .compatible = "brcm,bcm2835-vc4", },
{ .compatible = "brcm,cygnus-vc4", },
{},
};
MODULE_DEVICE_TABLE(of, vc4_of_match);
static struct platform_driver vc4_platform_driver = {
.probe = vc4_platform_drm_probe,
.remove = vc4_platform_drm_remove,
.driver = {
.name = "vc4-drm",
.of_match_table = vc4_of_match,
},
};
static int __init vc4_drm_register(void)
{
int ret;
ret = platform_register_drivers(component_drivers,
ARRAY_SIZE(component_drivers));
if (ret)
return ret;
return platform_driver_register(&vc4_platform_driver);
}
static void __exit vc4_drm_unregister(void)
{
platform_unregister_drivers(component_drivers,
ARRAY_SIZE(component_drivers));
platform_driver_unregister(&vc4_platform_driver);
}
module_init(vc4_drm_register);
module_exit(vc4_drm_unregister);
MODULE_ALIAS("platform:vc4-drm");
MODULE_DESCRIPTION("Broadcom VC4 DRM Driver");
MODULE_AUTHOR("Eric Anholt <eric@anholt.net>");
MODULE_LICENSE("GPL v2");