2006-03-10 15:40:10 +00:00
|
|
|
/*
|
|
|
|
* Virtual Video driver - This code emulates a real video device with v4l2 api
|
|
|
|
*
|
|
|
|
* Copyright (c) 2006 by:
|
|
|
|
* Mauro Carvalho Chehab <mchehab--a.t--infradead.org>
|
|
|
|
* Ted Walther <ted--a.t--enumera.com>
|
|
|
|
* John Sokol <sokol--a.t--videotechnology.com>
|
|
|
|
* http://v4l.videotechnology.com/
|
|
|
|
*
|
2011-01-19 15:02:29 +00:00
|
|
|
* Conversion to videobuf2 by Pawel Osciak & Marek Szyprowski
|
|
|
|
* Copyright (c) 2010 Samsung Electronics
|
|
|
|
*
|
2006-03-10 15:40:10 +00:00
|
|
|
* This program is free software; you can redistribute it and/or modify
|
|
|
|
* it under the terms of the BSD Licence, GNU General Public License
|
|
|
|
* as published by the Free Software Foundation; either version 2 of the
|
|
|
|
* License, or (at your option) any later version
|
|
|
|
*/
|
|
|
|
#include <linux/module.h>
|
|
|
|
#include <linux/errno.h>
|
|
|
|
#include <linux/kernel.h>
|
|
|
|
#include <linux/init.h>
|
|
|
|
#include <linux/sched.h>
|
2010-05-07 18:22:26 +00:00
|
|
|
#include <linux/slab.h>
|
2010-04-10 07:13:53 +00:00
|
|
|
#include <linux/font.h>
|
2007-07-02 13:19:38 +00:00
|
|
|
#include <linux/mutex.h>
|
2006-03-10 15:40:10 +00:00
|
|
|
#include <linux/videodev2.h>
|
|
|
|
#include <linux/kthread.h>
|
2006-12-07 04:34:23 +00:00
|
|
|
#include <linux/freezer.h>
|
2011-01-19 15:02:29 +00:00
|
|
|
#include <media/videobuf2-vmalloc.h>
|
2009-02-14 16:23:12 +00:00
|
|
|
#include <media/v4l2-device.h>
|
|
|
|
#include <media/v4l2-ioctl.h>
|
2011-01-23 14:33:16 +00:00
|
|
|
#include <media/v4l2-ctrls.h>
|
2011-03-11 23:01:54 +00:00
|
|
|
#include <media/v4l2-fh.h>
|
2011-06-07 13:20:23 +00:00
|
|
|
#include <media/v4l2-event.h>
|
2010-04-10 07:13:53 +00:00
|
|
|
#include <media/v4l2-common.h>
|
2006-03-10 15:40:10 +00:00
|
|
|
|
2008-06-10 18:21:49 +00:00
|
|
|
#define VIVI_MODULE_NAME "vivi"
|
2008-06-10 03:02:32 +00:00
|
|
|
|
2012-10-23 12:56:59 +00:00
|
|
|
/* Maximum allowed frame rate
|
|
|
|
*
|
|
|
|
* Vivi will allow setting timeperframe in [1/FPS_MAX - FPS_MAX/1] range.
|
|
|
|
*
|
|
|
|
* Ideally FPS_MAX should be infinity, i.e. practically UINT_MAX, but that
|
|
|
|
* might hit application errors when they manipulate these values.
|
|
|
|
*
|
|
|
|
* Besides, for tpf < 1ms image-generation logic should be changed, to avoid
|
|
|
|
* producing frames with equal content.
|
|
|
|
*/
|
|
|
|
#define FPS_MAX 1000
|
2006-03-10 15:40:10 +00:00
|
|
|
|
2010-04-10 07:13:53 +00:00
|
|
|
#define MAX_WIDTH 1920
|
|
|
|
#define MAX_HEIGHT 1200
|
|
|
|
|
[media] Stop using linux/version.h on most video drivers
All the modified drivers didn't have any version increment since
Jan, 1 2011. Several of them didn't have any version increment
for a long time, even having new features and important bug fixes
happening.
As we're now filling the QUERYCAP version with the current Kernel
Release, we don't need to maintain a per-driver version control
anymore. So, let's just use the default.
In order to preserve the Kernel module version history, a
KERNEL_VERSION() macro were added to all modified drivers, and
the extraver number were incremented.
I opted to preserve the per-driver version control to a few
pwc, pvrusb2, s2255, s5p-fimc and sh_vou.
A few drivers are still using the legacy way to handle ioctl's.
So, we can't do such change on them, otherwise, they'll break.
Those are: uvc, et61x251 and sn9c102.
The rationale is that the per-driver version control seems to be
actively maintained on those.
Yet, I think that the better for them would be to just use the
default version numbering, instead of doing that by themselves.
While here, removed a few uneeded include linux/version.h
Acked-by: Hans Verkuil <hans.verkuil@cisco.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
2011-06-24 17:45:49 +00:00
|
|
|
#define VIVI_VERSION "0.8.1"
|
2006-03-10 15:40:10 +00:00
|
|
|
|
2009-02-14 16:23:12 +00:00
|
|
|
MODULE_DESCRIPTION("Video Technology Magazine Virtual Video Capture Board");
|
|
|
|
MODULE_AUTHOR("Mauro Carvalho Chehab, Ted Walther and John Sokol");
|
|
|
|
MODULE_LICENSE("Dual BSD/GPL");
|
[media] Stop using linux/version.h on most video drivers
All the modified drivers didn't have any version increment since
Jan, 1 2011. Several of them didn't have any version increment
for a long time, even having new features and important bug fixes
happening.
As we're now filling the QUERYCAP version with the current Kernel
Release, we don't need to maintain a per-driver version control
anymore. So, let's just use the default.
In order to preserve the Kernel module version history, a
KERNEL_VERSION() macro were added to all modified drivers, and
the extraver number were incremented.
I opted to preserve the per-driver version control to a few
pwc, pvrusb2, s2255, s5p-fimc and sh_vou.
A few drivers are still using the legacy way to handle ioctl's.
So, we can't do such change on them, otherwise, they'll break.
Those are: uvc, et61x251 and sn9c102.
The rationale is that the per-driver version control seems to be
actively maintained on those.
Yet, I think that the better for them would be to just use the
default version numbering, instead of doing that by themselves.
While here, removed a few uneeded include linux/version.h
Acked-by: Hans Verkuil <hans.verkuil@cisco.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
2011-06-24 17:45:49 +00:00
|
|
|
MODULE_VERSION(VIVI_VERSION);
|
2009-02-14 16:23:12 +00:00
|
|
|
|
|
|
|
static unsigned video_nr = -1;
|
|
|
|
module_param(video_nr, uint, 0644);
|
|
|
|
MODULE_PARM_DESC(video_nr, "videoX start number, -1 is autodetect");
|
|
|
|
|
|
|
|
static unsigned n_devs = 1;
|
|
|
|
module_param(n_devs, uint, 0644);
|
|
|
|
MODULE_PARM_DESC(n_devs, "number of video devices to create");
|
|
|
|
|
|
|
|
static unsigned debug;
|
|
|
|
module_param(debug, uint, 0644);
|
|
|
|
MODULE_PARM_DESC(debug, "activates debug info");
|
|
|
|
|
2010-04-10 07:13:53 +00:00
|
|
|
/* Global font descriptor */
|
|
|
|
static const u8 *font8x16;
|
2006-03-10 15:40:10 +00:00
|
|
|
|
2012-10-23 12:56:59 +00:00
|
|
|
/* timeperframe: min/max and default */
|
|
|
|
static const struct v4l2_fract
|
|
|
|
tpf_min = {.numerator = 1, .denominator = FPS_MAX},
|
|
|
|
tpf_max = {.numerator = FPS_MAX, .denominator = 1},
|
|
|
|
tpf_default = {.numerator = 1001, .denominator = 30000}; /* NTSC */
|
|
|
|
|
2009-02-14 16:23:12 +00:00
|
|
|
#define dprintk(dev, level, fmt, arg...) \
|
|
|
|
v4l2_dbg(level, debug, &dev->v4l2_dev, fmt, ## arg)
|
2006-03-10 15:40:10 +00:00
|
|
|
|
|
|
|
/* ------------------------------------------------------------------
|
|
|
|
Basic structures
|
|
|
|
------------------------------------------------------------------*/
|
|
|
|
|
|
|
|
struct vivi_fmt {
|
2012-12-26 15:23:26 +00:00
|
|
|
const char *name;
|
2006-03-10 15:40:10 +00:00
|
|
|
u32 fourcc; /* v4l2 format id */
|
2012-08-06 13:36:18 +00:00
|
|
|
u8 depth;
|
|
|
|
bool is_yuv;
|
2006-03-10 15:40:10 +00:00
|
|
|
};
|
|
|
|
|
2012-12-26 15:23:26 +00:00
|
|
|
static const struct vivi_fmt formats[] = {
|
2008-10-14 15:47:09 +00:00
|
|
|
{
|
|
|
|
.name = "4:2:2, packed, YUYV",
|
|
|
|
.fourcc = V4L2_PIX_FMT_YUYV,
|
|
|
|
.depth = 16,
|
2012-08-06 13:36:18 +00:00
|
|
|
.is_yuv = true,
|
2008-10-14 15:47:09 +00:00
|
|
|
},
|
2008-10-14 15:47:25 +00:00
|
|
|
{
|
|
|
|
.name = "4:2:2, packed, UYVY",
|
|
|
|
.fourcc = V4L2_PIX_FMT_UYVY,
|
|
|
|
.depth = 16,
|
2012-08-06 13:36:18 +00:00
|
|
|
.is_yuv = true,
|
2008-10-14 15:47:25 +00:00
|
|
|
},
|
2012-05-02 06:15:11 +00:00
|
|
|
{
|
|
|
|
.name = "4:2:2, packed, YVYU",
|
|
|
|
.fourcc = V4L2_PIX_FMT_YVYU,
|
|
|
|
.depth = 16,
|
2012-08-06 13:36:18 +00:00
|
|
|
.is_yuv = true,
|
2012-05-02 06:15:11 +00:00
|
|
|
},
|
|
|
|
{
|
|
|
|
.name = "4:2:2, packed, VYUY",
|
|
|
|
.fourcc = V4L2_PIX_FMT_VYUY,
|
|
|
|
.depth = 16,
|
2012-08-06 13:36:18 +00:00
|
|
|
.is_yuv = true,
|
2012-05-02 06:15:11 +00:00
|
|
|
},
|
2008-10-14 15:47:35 +00:00
|
|
|
{
|
|
|
|
.name = "RGB565 (LE)",
|
|
|
|
.fourcc = V4L2_PIX_FMT_RGB565, /* gggbbbbb rrrrrggg */
|
|
|
|
.depth = 16,
|
|
|
|
},
|
|
|
|
{
|
|
|
|
.name = "RGB565 (BE)",
|
|
|
|
.fourcc = V4L2_PIX_FMT_RGB565X, /* rrrrrggg gggbbbbb */
|
|
|
|
.depth = 16,
|
|
|
|
},
|
2008-10-14 15:47:43 +00:00
|
|
|
{
|
|
|
|
.name = "RGB555 (LE)",
|
|
|
|
.fourcc = V4L2_PIX_FMT_RGB555, /* gggbbbbb arrrrrgg */
|
|
|
|
.depth = 16,
|
|
|
|
},
|
|
|
|
{
|
|
|
|
.name = "RGB555 (BE)",
|
|
|
|
.fourcc = V4L2_PIX_FMT_RGB555X, /* arrrrrgg gggbbbbb */
|
|
|
|
.depth = 16,
|
|
|
|
},
|
2012-05-02 06:15:11 +00:00
|
|
|
{
|
|
|
|
.name = "RGB24 (LE)",
|
|
|
|
.fourcc = V4L2_PIX_FMT_RGB24, /* rgb */
|
|
|
|
.depth = 24,
|
|
|
|
},
|
|
|
|
{
|
|
|
|
.name = "RGB24 (BE)",
|
|
|
|
.fourcc = V4L2_PIX_FMT_BGR24, /* bgr */
|
|
|
|
.depth = 24,
|
|
|
|
},
|
|
|
|
{
|
|
|
|
.name = "RGB32 (LE)",
|
|
|
|
.fourcc = V4L2_PIX_FMT_RGB32, /* argb */
|
|
|
|
.depth = 32,
|
|
|
|
},
|
|
|
|
{
|
|
|
|
.name = "RGB32 (BE)",
|
|
|
|
.fourcc = V4L2_PIX_FMT_BGR32, /* bgra */
|
|
|
|
.depth = 32,
|
|
|
|
},
|
2006-03-10 15:40:10 +00:00
|
|
|
};
|
|
|
|
|
2012-12-26 15:23:26 +00:00
|
|
|
static const struct vivi_fmt *__get_format(u32 pixelformat)
|
2008-10-14 15:47:09 +00:00
|
|
|
{
|
2012-12-26 15:23:26 +00:00
|
|
|
const struct vivi_fmt *fmt;
|
2008-10-14 15:47:09 +00:00
|
|
|
unsigned int k;
|
|
|
|
|
|
|
|
for (k = 0; k < ARRAY_SIZE(formats); k++) {
|
|
|
|
fmt = &formats[k];
|
2012-10-23 12:56:59 +00:00
|
|
|
if (fmt->fourcc == pixelformat)
|
2008-10-14 15:47:09 +00:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (k == ARRAY_SIZE(formats))
|
|
|
|
return NULL;
|
|
|
|
|
|
|
|
return &formats[k];
|
|
|
|
}
|
|
|
|
|
2012-12-26 15:23:26 +00:00
|
|
|
static const struct vivi_fmt *get_format(struct v4l2_format *f)
|
2012-10-23 12:56:59 +00:00
|
|
|
{
|
|
|
|
return __get_format(f->fmt.pix.pixelformat);
|
|
|
|
}
|
|
|
|
|
2006-03-10 15:40:10 +00:00
|
|
|
/* buffer for one video frame */
|
|
|
|
struct vivi_buffer {
|
|
|
|
/* common v4l buffer stuff -- must be first */
|
2011-01-19 15:02:29 +00:00
|
|
|
struct vb2_buffer vb;
|
|
|
|
struct list_head list;
|
2006-03-10 15:40:10 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
struct vivi_dmaqueue {
|
|
|
|
struct list_head active;
|
|
|
|
|
|
|
|
/* thread for generating video stream*/
|
|
|
|
struct task_struct *kthread;
|
|
|
|
wait_queue_head_t wq;
|
|
|
|
/* Counters to control fps rate */
|
|
|
|
int frame;
|
|
|
|
int ini_jiffies;
|
|
|
|
};
|
|
|
|
|
|
|
|
static LIST_HEAD(vivi_devlist);
|
|
|
|
|
|
|
|
struct vivi_dev {
|
|
|
|
struct list_head vivi_devlist;
|
2009-02-14 16:23:12 +00:00
|
|
|
struct v4l2_device v4l2_dev;
|
2011-01-23 14:33:16 +00:00
|
|
|
struct v4l2_ctrl_handler ctrl_handler;
|
2012-06-09 14:27:43 +00:00
|
|
|
struct video_device vdev;
|
2006-03-10 15:40:10 +00:00
|
|
|
|
2010-04-10 07:13:53 +00:00
|
|
|
/* controls */
|
2011-01-23 14:33:16 +00:00
|
|
|
struct v4l2_ctrl *brightness;
|
|
|
|
struct v4l2_ctrl *contrast;
|
|
|
|
struct v4l2_ctrl *saturation;
|
|
|
|
struct v4l2_ctrl *hue;
|
2011-06-07 09:34:41 +00:00
|
|
|
struct {
|
|
|
|
/* autogain/gain cluster */
|
|
|
|
struct v4l2_ctrl *autogain;
|
|
|
|
struct v4l2_ctrl *gain;
|
|
|
|
};
|
2011-01-23 14:33:16 +00:00
|
|
|
struct v4l2_ctrl *volume;
|
2012-05-02 06:33:52 +00:00
|
|
|
struct v4l2_ctrl *alpha;
|
2011-01-23 14:33:16 +00:00
|
|
|
struct v4l2_ctrl *button;
|
|
|
|
struct v4l2_ctrl *boolean;
|
|
|
|
struct v4l2_ctrl *int32;
|
|
|
|
struct v4l2_ctrl *int64;
|
|
|
|
struct v4l2_ctrl *menu;
|
|
|
|
struct v4l2_ctrl *string;
|
2011-03-29 19:33:11 +00:00
|
|
|
struct v4l2_ctrl *bitmask;
|
2011-08-05 09:38:05 +00:00
|
|
|
struct v4l2_ctrl *int_menu;
|
2010-04-10 07:13:53 +00:00
|
|
|
|
2007-12-13 19:13:37 +00:00
|
|
|
spinlock_t slock;
|
2008-04-02 21:10:59 +00:00
|
|
|
struct mutex mutex;
|
2006-03-10 15:40:10 +00:00
|
|
|
|
|
|
|
struct vivi_dmaqueue vidq;
|
|
|
|
|
|
|
|
/* Several counters */
|
2010-04-10 07:13:53 +00:00
|
|
|
unsigned ms;
|
2008-01-13 22:36:11 +00:00
|
|
|
unsigned long jiffies;
|
2011-01-23 14:33:16 +00:00
|
|
|
unsigned button_pressed;
|
2007-12-10 07:43:38 +00:00
|
|
|
|
|
|
|
int mv_count; /* Controls bars movement */
|
2009-01-11 13:29:43 +00:00
|
|
|
|
|
|
|
/* Input Number */
|
|
|
|
int input;
|
2009-02-14 16:43:44 +00:00
|
|
|
|
2006-03-10 15:40:10 +00:00
|
|
|
/* video capture */
|
2012-12-26 15:23:26 +00:00
|
|
|
const struct vivi_fmt *fmt;
|
2012-10-23 12:56:59 +00:00
|
|
|
struct v4l2_fract timeperframe;
|
2007-12-10 12:33:52 +00:00
|
|
|
unsigned int width, height;
|
2011-01-19 15:02:29 +00:00
|
|
|
struct vb2_queue vb_vidq;
|
2014-02-10 11:08:45 +00:00
|
|
|
unsigned int seq_count;
|
2006-03-10 15:40:10 +00:00
|
|
|
|
2012-05-02 06:15:11 +00:00
|
|
|
u8 bars[9][3];
|
2012-11-02 12:10:31 +00:00
|
|
|
u8 line[MAX_WIDTH * 8] __attribute__((__aligned__(4)));
|
2012-05-02 06:15:11 +00:00
|
|
|
unsigned int pixelsize;
|
2012-05-02 06:33:52 +00:00
|
|
|
u8 alpha_component;
|
[media] vivi: Optimize gen_text()
I've noticed that vivi takes a lot of CPU to produce its frames.
For example for 8 devices and 8 simple programs running, where each
captures YUY2 640x480 and displays it to X via SDL, profile timing is as
follows:
# cmdline : /home/kirr/local/perf/bin/perf record -g -a sleep 20
# Samples: 82K of event 'cycles'
# Event count (approx.): 31551930117
#
# Overhead Command Shared Object Symbol
# ........ ............... ....................
#
49.48% vivi-* [vivi] [k] gen_twopix
10.79% vivi-* [kernel.kallsyms] [k] memcpy
10.02% rawv libc-2.13.so [.] __memcpy_ssse3
8.35% vivi-* [vivi] [k] gen_text.constprop.6
5.06% Xorg [unknown] [.] 0xa73015f8
2.32% rawv [vivi] [k] gen_twopix
1.22% rawv [vivi] [k] precalculate_line
1.20% vivi-* [vivi] [k] vivi_fillbuff
(rawv is display program, vivi-* is a combination of vivi-000 through vivi-007)
so a lot of time is spent in gen_twopix() which as the follwing
call-graph profile shows ...
49.48% vivi-* [vivi] [k] gen_twopix
|
--- gen_twopix
|
|--96.30%-- gen_text.constprop.6
| vivi_fillbuff
| vivi_thread
| kthread
| ret_from_kernel_thread
|
--3.70%-- vivi_fillbuff
vivi_thread
kthread
ret_from_kernel_thread
... is called mostly from gen_text().
If we'll look at gen_text(), in the inner loop, we'll see
if (chr & (1 << (7 - i)))
gen_twopix(dev, pos + j * dev->pixelsize, WHITE, (x+y) & 1);
else
gen_twopix(dev, pos + j * dev->pixelsize, TEXT_BLACK, (x+y) & 1);
which calls gen_twopix() for every character pixel, and that is very
expensive, because gen_twopix() branches several times.
Now, let's note, that we operate on only two colors - WHITE and
TEXT_BLACK, and that pixel for that colors could be precomputed and
gen_twopix() moved out of the inner loop. Also note, that for black
and white colors even/odd does not make a difference for all supported
pixel formats, so we could stop doing that `odd` gen_twopix() parameter
game.
So the first thing we are doing here is
1) moving gen_twopix() calls out of gen_text() into vivi_fillbuff(),
to pregenerate black and white colors, just before printing
starts.
what we have next is that gen_text's font rendering loop, even with
gen_twopix() calls moved out, was inefficient and branchy, so let's
2) rewrite gen_text() loop so it uses less variables + unroll char
horizontal-rendering loop + instantiate 3 code paths for pixelsizes 2,3
and 4 so that in all inner loops we don't have to branch or make
indirections (*).
Done all above reworks, for gen_text() we get nice, non-branchy
streamlined code (showing loop for pixelsize=2):
? cmp $0x2,%eax
? ? jne 26
? mov -0x18(%ebp),%eax
? mov -0x20(%ebp),%edi
? imul -0x20(%ebp),%eax
? movzwl 0x3ffc(%ebx),%esi
0,08 ? movzwl 0x4000(%ebx),%ecx
0,04 ? add %edi,%edi
? mov 0x0,%ebx
0,51 ? mov %edi,-0x1c(%ebp)
? mov %ebx,-0x14(%ebp)
? movl $0x0,-0x10(%ebp)
? lea 0x20(%edx,%eax,2),%eax
? mov %eax,-0x18(%ebp)
? xchg %ax,%ax
0,04 ? a0: mov 0x8(%ebp),%ebx
? mov -0x18(%ebp),%eax
0,04 ? movzbl (%ebx),%edx
0,16 ? test %dl,%dl
0,04 ? ? je 128
0,08 ? lea 0x0(%esi),%esi
1,61 ? b0:???shl $0x4,%edx
1,02 ? ? mov -0x14(%ebp),%edi
2,04 ? ? add -0x10(%ebp),%edx
2,24 ? ? lea 0x1(%ebx),%ebx
0,27 ? ? movzbl (%edi,%edx,1),%edx
9,92 ? ? mov %esi,%edi
0,39 ? ? test %dl,%dl
2,04 ? ? cmovns %ecx,%edi
4,63 ? ? test $0x40,%dl
0,55 ? ? mov %di,(%eax)
3,76 ? ? mov %esi,%edi
0,71 ? ? cmove %ecx,%edi
3,41 ? ? test $0x20,%dl
0,75 ? ? mov %di,0x2(%eax)
2,43 ? ? mov %esi,%edi
0,59 ? ? cmove %ecx,%edi
4,59 ? ? test $0x10,%dl
0,67 ? ? mov %di,0x4(%eax)
2,55 ? ? mov %esi,%edi
0,78 ? ? cmove %ecx,%edi
4,31 ? ? test $0x8,%dl
0,67 ? ? mov %di,0x6(%eax)
5,76 ? ? mov %esi,%edi
1,80 ? ? cmove %ecx,%edi
4,20 ? ? test $0x4,%dl
0,86 ? ? mov %di,0x8(%eax)
2,98 ? ? mov %esi,%edi
1,37 ? ? cmove %ecx,%edi
4,67 ? ? test $0x2,%dl
0,20 ? ? mov %di,0xa(%eax)
2,78 ? ? mov %esi,%edi
0,75 ? ? cmove %ecx,%edi
3,92 ? ? and $0x1,%edx
0,75 ? ? mov %esi,%edx
2,59 ? ? mov %di,0xc(%eax)
0,59 ? ? cmove %ecx,%edx
3,10 ? ? mov %dx,0xe(%eax)
2,39 ? ? add $0x10,%eax
0,51 ? ? movzbl (%ebx),%edx
2,86 ? ? test %dl,%dl
2,31 ? ???jne b0
0,04 ?128: addl $0x1,-0x10(%ebp)
4,00 ? mov -0x1c(%ebp),%eax
0,04 ? add %eax,-0x18(%ebp)
0,08 ? cmpl $0x10,-0x10(%ebp)
? ? jne a0
which almost goes away from the profile:
# cmdline : /home/kirr/local/perf/bin/perf record -g -a sleep 20
# Samples: 49K of event 'cycles'
# Event count (approx.): 16799780016
#
# Overhead Command Shared Object Symbol
# ........ ............... ....................
#
27.51% rawv libc-2.13.so [.] __memcpy_ssse3
23.77% vivi-* [kernel.kallsyms] [k] memcpy
9.96% Xorg [unknown] [.] 0xa76f5e12
4.94% vivi-* [vivi] [k] gen_text.constprop.6
4.44% rawv [vivi] [k] gen_twopix
3.17% vivi-* [vivi] [k] vivi_fillbuff
2.45% rawv [vivi] [k] precalculate_line
1.20% swapper [kernel.kallsyms] [k] read_hpet
i.e. gen_twopix() overhead dropped from 49% to 4% and gen_text() loops
from ~8% to ~4%, and overal cycles count dropped from 31551930117 to
16799780016 which is ~1.9x whole workload speedup.
(*) for RGB24 rendering I've introduced x24, which could be thought as
synthetic u24 for simplifying the code. That's done because for
memcpy used for conditional assignment, gcc generates suboptimal code
with more indirections.
Fortunately, in C struct assignment is builtin and that's all we
need from pixeltype for font rendering.
Signed-off-by: Kirill Smelkov <kirr@mns.spb.ru>
Acked-by: Hans Verkuil <hans.verkuil@cisco.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
2012-11-02 12:10:30 +00:00
|
|
|
u32 textfg, textbg;
|
2006-03-10 15:40:10 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
/* ------------------------------------------------------------------
|
|
|
|
DMA and thread functions
|
|
|
|
------------------------------------------------------------------*/
|
|
|
|
|
|
|
|
/* Bars and Colors should match positions */
|
|
|
|
|
|
|
|
enum colors {
|
|
|
|
WHITE,
|
2010-04-10 07:13:53 +00:00
|
|
|
AMBER,
|
2006-03-10 15:40:10 +00:00
|
|
|
CYAN,
|
|
|
|
GREEN,
|
|
|
|
MAGENTA,
|
|
|
|
RED,
|
2007-12-10 12:33:52 +00:00
|
|
|
BLUE,
|
|
|
|
BLACK,
|
2010-04-10 07:13:53 +00:00
|
|
|
TEXT_BLACK,
|
2006-03-10 15:40:10 +00:00
|
|
|
};
|
|
|
|
|
2010-04-10 07:13:53 +00:00
|
|
|
/* R G B */
|
2009-01-11 13:29:43 +00:00
|
|
|
#define COLOR_WHITE {204, 204, 204}
|
2010-04-10 07:13:53 +00:00
|
|
|
#define COLOR_AMBER {208, 208, 0}
|
|
|
|
#define COLOR_CYAN { 0, 206, 206}
|
2009-01-11 13:29:43 +00:00
|
|
|
#define COLOR_GREEN { 0, 239, 0}
|
|
|
|
#define COLOR_MAGENTA {239, 0, 239}
|
|
|
|
#define COLOR_RED {205, 0, 0}
|
|
|
|
#define COLOR_BLUE { 0, 0, 255}
|
|
|
|
#define COLOR_BLACK { 0, 0, 0}
|
|
|
|
|
|
|
|
struct bar_std {
|
2010-04-10 07:13:53 +00:00
|
|
|
u8 bar[9][3];
|
2009-01-11 13:29:43 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
/* Maximum number of bars are 10 - otherwise, the input print code
|
|
|
|
should be modified */
|
2012-12-26 15:23:26 +00:00
|
|
|
static const struct bar_std bars[] = {
|
2009-01-11 13:29:43 +00:00
|
|
|
{ /* Standard ITU-R color bar sequence */
|
2010-04-10 07:13:53 +00:00
|
|
|
{ COLOR_WHITE, COLOR_AMBER, COLOR_CYAN, COLOR_GREEN,
|
|
|
|
COLOR_MAGENTA, COLOR_RED, COLOR_BLUE, COLOR_BLACK, COLOR_BLACK }
|
2009-01-11 13:29:43 +00:00
|
|
|
}, {
|
2010-04-10 07:13:53 +00:00
|
|
|
{ COLOR_WHITE, COLOR_AMBER, COLOR_BLACK, COLOR_WHITE,
|
|
|
|
COLOR_AMBER, COLOR_BLACK, COLOR_WHITE, COLOR_AMBER, COLOR_BLACK }
|
2009-01-11 13:29:43 +00:00
|
|
|
}, {
|
2010-04-10 07:13:53 +00:00
|
|
|
{ COLOR_WHITE, COLOR_CYAN, COLOR_BLACK, COLOR_WHITE,
|
|
|
|
COLOR_CYAN, COLOR_BLACK, COLOR_WHITE, COLOR_CYAN, COLOR_BLACK }
|
2009-01-11 13:29:43 +00:00
|
|
|
}, {
|
2010-04-10 07:13:53 +00:00
|
|
|
{ COLOR_WHITE, COLOR_GREEN, COLOR_BLACK, COLOR_WHITE,
|
|
|
|
COLOR_GREEN, COLOR_BLACK, COLOR_WHITE, COLOR_GREEN, COLOR_BLACK }
|
2009-01-11 13:29:43 +00:00
|
|
|
},
|
2006-03-10 15:40:10 +00:00
|
|
|
};
|
|
|
|
|
2009-01-11 13:29:43 +00:00
|
|
|
#define NUM_INPUTS ARRAY_SIZE(bars)
|
|
|
|
|
2007-12-10 12:33:52 +00:00
|
|
|
#define TO_Y(r, g, b) \
|
|
|
|
(((16829 * r + 33039 * g + 6416 * b + 32768) >> 16) + 16)
|
2006-03-10 15:40:10 +00:00
|
|
|
/* RGB to V(Cr) Color transform */
|
2007-12-10 12:33:52 +00:00
|
|
|
#define TO_V(r, g, b) \
|
|
|
|
(((28784 * r - 24103 * g - 4681 * b + 32768) >> 16) + 128)
|
2006-03-10 15:40:10 +00:00
|
|
|
/* RGB to U(Cb) Color transform */
|
2007-12-10 12:33:52 +00:00
|
|
|
#define TO_U(r, g, b) \
|
|
|
|
(((-9714 * r - 19070 * g + 28784 * b + 32768) >> 16) + 128)
|
2006-03-10 15:40:10 +00:00
|
|
|
|
2009-06-25 19:28:23 +00:00
|
|
|
/* precalculate color bar values to speed up rendering */
|
2010-04-10 07:13:53 +00:00
|
|
|
static void precalculate_bars(struct vivi_dev *dev)
|
2009-06-25 19:28:23 +00:00
|
|
|
{
|
2010-04-10 07:13:53 +00:00
|
|
|
u8 r, g, b;
|
2009-06-25 19:28:23 +00:00
|
|
|
int k, is_yuv;
|
|
|
|
|
2010-04-10 07:13:53 +00:00
|
|
|
for (k = 0; k < 9; k++) {
|
|
|
|
r = bars[dev->input].bar[k][0];
|
|
|
|
g = bars[dev->input].bar[k][1];
|
|
|
|
b = bars[dev->input].bar[k][2];
|
2012-08-06 13:36:18 +00:00
|
|
|
is_yuv = dev->fmt->is_yuv;
|
2009-06-25 19:28:23 +00:00
|
|
|
|
2010-04-10 07:13:53 +00:00
|
|
|
switch (dev->fmt->fourcc) {
|
2009-06-25 19:28:23 +00:00
|
|
|
case V4L2_PIX_FMT_RGB565:
|
|
|
|
case V4L2_PIX_FMT_RGB565X:
|
|
|
|
r >>= 3;
|
|
|
|
g >>= 2;
|
|
|
|
b >>= 3;
|
|
|
|
break;
|
|
|
|
case V4L2_PIX_FMT_RGB555:
|
|
|
|
case V4L2_PIX_FMT_RGB555X:
|
|
|
|
r >>= 3;
|
|
|
|
g >>= 3;
|
|
|
|
b >>= 3;
|
|
|
|
break;
|
2012-08-06 13:36:18 +00:00
|
|
|
case V4L2_PIX_FMT_YUYV:
|
|
|
|
case V4L2_PIX_FMT_UYVY:
|
|
|
|
case V4L2_PIX_FMT_YVYU:
|
|
|
|
case V4L2_PIX_FMT_VYUY:
|
2012-05-02 06:15:11 +00:00
|
|
|
case V4L2_PIX_FMT_RGB24:
|
|
|
|
case V4L2_PIX_FMT_BGR24:
|
|
|
|
case V4L2_PIX_FMT_RGB32:
|
|
|
|
case V4L2_PIX_FMT_BGR32:
|
|
|
|
break;
|
2009-06-25 19:28:23 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
if (is_yuv) {
|
2010-04-10 07:13:53 +00:00
|
|
|
dev->bars[k][0] = TO_Y(r, g, b); /* Luma */
|
|
|
|
dev->bars[k][1] = TO_U(r, g, b); /* Cb */
|
|
|
|
dev->bars[k][2] = TO_V(r, g, b); /* Cr */
|
2009-06-25 19:28:23 +00:00
|
|
|
} else {
|
2010-04-10 07:13:53 +00:00
|
|
|
dev->bars[k][0] = r;
|
|
|
|
dev->bars[k][1] = g;
|
|
|
|
dev->bars[k][2] = b;
|
2009-06-25 19:28:23 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2012-05-02 06:15:11 +00:00
|
|
|
/* 'odd' is true for pixels 1, 3, 5, etc. and false for pixels 0, 2, 4, etc. */
|
|
|
|
static void gen_twopix(struct vivi_dev *dev, u8 *buf, int colorpos, bool odd)
|
2008-10-14 15:46:59 +00:00
|
|
|
{
|
2010-04-10 07:13:53 +00:00
|
|
|
u8 r_y, g_u, b_v;
|
2012-05-02 06:33:52 +00:00
|
|
|
u8 alpha = dev->alpha_component;
|
2008-10-14 15:46:59 +00:00
|
|
|
int color;
|
2010-04-10 07:13:53 +00:00
|
|
|
u8 *p;
|
2008-10-14 15:46:59 +00:00
|
|
|
|
2010-04-10 07:13:53 +00:00
|
|
|
r_y = dev->bars[colorpos][0]; /* R or precalculated Y */
|
|
|
|
g_u = dev->bars[colorpos][1]; /* G or precalculated U */
|
|
|
|
b_v = dev->bars[colorpos][2]; /* B or precalculated V */
|
2008-10-14 15:46:59 +00:00
|
|
|
|
2012-05-02 06:15:11 +00:00
|
|
|
for (color = 0; color < dev->pixelsize; color++) {
|
2008-10-14 15:46:59 +00:00
|
|
|
p = buf + color;
|
|
|
|
|
2010-04-10 07:13:53 +00:00
|
|
|
switch (dev->fmt->fourcc) {
|
2008-10-14 15:47:09 +00:00
|
|
|
case V4L2_PIX_FMT_YUYV:
|
|
|
|
switch (color) {
|
|
|
|
case 0:
|
|
|
|
*p = r_y;
|
|
|
|
break;
|
|
|
|
case 1:
|
2012-05-02 06:15:11 +00:00
|
|
|
*p = odd ? b_v : g_u;
|
2008-10-14 15:47:09 +00:00
|
|
|
break;
|
|
|
|
}
|
2008-10-14 15:46:59 +00:00
|
|
|
break;
|
2008-10-14 15:47:25 +00:00
|
|
|
case V4L2_PIX_FMT_UYVY:
|
|
|
|
switch (color) {
|
2012-05-02 06:15:11 +00:00
|
|
|
case 0:
|
|
|
|
*p = odd ? b_v : g_u;
|
|
|
|
break;
|
2008-10-14 15:47:25 +00:00
|
|
|
case 1:
|
|
|
|
*p = r_y;
|
|
|
|
break;
|
2012-05-02 06:15:11 +00:00
|
|
|
}
|
|
|
|
break;
|
|
|
|
case V4L2_PIX_FMT_YVYU:
|
|
|
|
switch (color) {
|
|
|
|
case 0:
|
|
|
|
*p = r_y;
|
|
|
|
break;
|
|
|
|
case 1:
|
|
|
|
*p = odd ? g_u : b_v;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
case V4L2_PIX_FMT_VYUY:
|
|
|
|
switch (color) {
|
2008-10-14 15:47:25 +00:00
|
|
|
case 0:
|
2012-05-02 06:15:11 +00:00
|
|
|
*p = odd ? g_u : b_v;
|
2008-10-14 15:47:25 +00:00
|
|
|
break;
|
2012-05-02 06:15:11 +00:00
|
|
|
case 1:
|
|
|
|
*p = r_y;
|
2008-10-14 15:47:25 +00:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
break;
|
2008-10-14 15:47:35 +00:00
|
|
|
case V4L2_PIX_FMT_RGB565:
|
|
|
|
switch (color) {
|
|
|
|
case 0:
|
|
|
|
*p = (g_u << 5) | b_v;
|
|
|
|
break;
|
|
|
|
case 1:
|
|
|
|
*p = (r_y << 3) | (g_u >> 3);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
case V4L2_PIX_FMT_RGB565X:
|
|
|
|
switch (color) {
|
|
|
|
case 0:
|
|
|
|
*p = (r_y << 3) | (g_u >> 3);
|
|
|
|
break;
|
|
|
|
case 1:
|
|
|
|
*p = (g_u << 5) | b_v;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
break;
|
2008-10-14 15:47:43 +00:00
|
|
|
case V4L2_PIX_FMT_RGB555:
|
|
|
|
switch (color) {
|
|
|
|
case 0:
|
|
|
|
*p = (g_u << 5) | b_v;
|
|
|
|
break;
|
|
|
|
case 1:
|
2012-05-02 06:33:52 +00:00
|
|
|
*p = (alpha & 0x80) | (r_y << 2) | (g_u >> 3);
|
2008-10-14 15:47:43 +00:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
case V4L2_PIX_FMT_RGB555X:
|
|
|
|
switch (color) {
|
|
|
|
case 0:
|
2012-05-02 06:33:52 +00:00
|
|
|
*p = (alpha & 0x80) | (r_y << 2) | (g_u >> 3);
|
2008-10-14 15:47:43 +00:00
|
|
|
break;
|
|
|
|
case 1:
|
|
|
|
*p = (g_u << 5) | b_v;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
break;
|
2012-05-02 06:15:11 +00:00
|
|
|
case V4L2_PIX_FMT_RGB24:
|
|
|
|
switch (color) {
|
|
|
|
case 0:
|
|
|
|
*p = r_y;
|
|
|
|
break;
|
|
|
|
case 1:
|
|
|
|
*p = g_u;
|
|
|
|
break;
|
|
|
|
case 2:
|
|
|
|
*p = b_v;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
case V4L2_PIX_FMT_BGR24:
|
|
|
|
switch (color) {
|
|
|
|
case 0:
|
|
|
|
*p = b_v;
|
|
|
|
break;
|
|
|
|
case 1:
|
|
|
|
*p = g_u;
|
|
|
|
break;
|
|
|
|
case 2:
|
|
|
|
*p = r_y;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
case V4L2_PIX_FMT_RGB32:
|
|
|
|
switch (color) {
|
|
|
|
case 0:
|
2012-05-02 06:33:52 +00:00
|
|
|
*p = alpha;
|
2012-05-02 06:15:11 +00:00
|
|
|
break;
|
|
|
|
case 1:
|
|
|
|
*p = r_y;
|
|
|
|
break;
|
|
|
|
case 2:
|
|
|
|
*p = g_u;
|
|
|
|
break;
|
|
|
|
case 3:
|
|
|
|
*p = b_v;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
case V4L2_PIX_FMT_BGR32:
|
|
|
|
switch (color) {
|
|
|
|
case 0:
|
|
|
|
*p = b_v;
|
|
|
|
break;
|
|
|
|
case 1:
|
|
|
|
*p = g_u;
|
|
|
|
break;
|
|
|
|
case 2:
|
|
|
|
*p = r_y;
|
|
|
|
break;
|
|
|
|
case 3:
|
2012-05-02 06:33:52 +00:00
|
|
|
*p = alpha;
|
2012-05-02 06:15:11 +00:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
break;
|
2008-10-14 15:46:59 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2010-04-10 07:13:53 +00:00
|
|
|
static void precalculate_line(struct vivi_dev *dev)
|
2006-03-10 15:40:10 +00:00
|
|
|
{
|
2012-11-02 12:10:33 +00:00
|
|
|
unsigned pixsize = dev->pixelsize;
|
|
|
|
unsigned pixsize2 = 2*pixsize;
|
|
|
|
int colorpos;
|
|
|
|
u8 *pos;
|
|
|
|
|
|
|
|
for (colorpos = 0; colorpos < 16; ++colorpos) {
|
|
|
|
u8 pix[8];
|
|
|
|
int wstart = colorpos * dev->width / 8;
|
|
|
|
int wend = (colorpos+1) * dev->width / 8;
|
|
|
|
int w;
|
|
|
|
|
|
|
|
gen_twopix(dev, &pix[0], colorpos % 8, 0);
|
|
|
|
gen_twopix(dev, &pix[pixsize], colorpos % 8, 1);
|
|
|
|
|
|
|
|
for (w = wstart/2*2, pos = dev->line + w*pixsize; w < wend; w += 2, pos += pixsize2)
|
|
|
|
memcpy(pos, pix, pixsize2);
|
2006-03-10 15:40:10 +00:00
|
|
|
}
|
2010-04-10 07:13:53 +00:00
|
|
|
}
|
2006-03-10 15:40:10 +00:00
|
|
|
|
[media] vivi: Optimize gen_text()
I've noticed that vivi takes a lot of CPU to produce its frames.
For example for 8 devices and 8 simple programs running, where each
captures YUY2 640x480 and displays it to X via SDL, profile timing is as
follows:
# cmdline : /home/kirr/local/perf/bin/perf record -g -a sleep 20
# Samples: 82K of event 'cycles'
# Event count (approx.): 31551930117
#
# Overhead Command Shared Object Symbol
# ........ ............... ....................
#
49.48% vivi-* [vivi] [k] gen_twopix
10.79% vivi-* [kernel.kallsyms] [k] memcpy
10.02% rawv libc-2.13.so [.] __memcpy_ssse3
8.35% vivi-* [vivi] [k] gen_text.constprop.6
5.06% Xorg [unknown] [.] 0xa73015f8
2.32% rawv [vivi] [k] gen_twopix
1.22% rawv [vivi] [k] precalculate_line
1.20% vivi-* [vivi] [k] vivi_fillbuff
(rawv is display program, vivi-* is a combination of vivi-000 through vivi-007)
so a lot of time is spent in gen_twopix() which as the follwing
call-graph profile shows ...
49.48% vivi-* [vivi] [k] gen_twopix
|
--- gen_twopix
|
|--96.30%-- gen_text.constprop.6
| vivi_fillbuff
| vivi_thread
| kthread
| ret_from_kernel_thread
|
--3.70%-- vivi_fillbuff
vivi_thread
kthread
ret_from_kernel_thread
... is called mostly from gen_text().
If we'll look at gen_text(), in the inner loop, we'll see
if (chr & (1 << (7 - i)))
gen_twopix(dev, pos + j * dev->pixelsize, WHITE, (x+y) & 1);
else
gen_twopix(dev, pos + j * dev->pixelsize, TEXT_BLACK, (x+y) & 1);
which calls gen_twopix() for every character pixel, and that is very
expensive, because gen_twopix() branches several times.
Now, let's note, that we operate on only two colors - WHITE and
TEXT_BLACK, and that pixel for that colors could be precomputed and
gen_twopix() moved out of the inner loop. Also note, that for black
and white colors even/odd does not make a difference for all supported
pixel formats, so we could stop doing that `odd` gen_twopix() parameter
game.
So the first thing we are doing here is
1) moving gen_twopix() calls out of gen_text() into vivi_fillbuff(),
to pregenerate black and white colors, just before printing
starts.
what we have next is that gen_text's font rendering loop, even with
gen_twopix() calls moved out, was inefficient and branchy, so let's
2) rewrite gen_text() loop so it uses less variables + unroll char
horizontal-rendering loop + instantiate 3 code paths for pixelsizes 2,3
and 4 so that in all inner loops we don't have to branch or make
indirections (*).
Done all above reworks, for gen_text() we get nice, non-branchy
streamlined code (showing loop for pixelsize=2):
? cmp $0x2,%eax
? ? jne 26
? mov -0x18(%ebp),%eax
? mov -0x20(%ebp),%edi
? imul -0x20(%ebp),%eax
? movzwl 0x3ffc(%ebx),%esi
0,08 ? movzwl 0x4000(%ebx),%ecx
0,04 ? add %edi,%edi
? mov 0x0,%ebx
0,51 ? mov %edi,-0x1c(%ebp)
? mov %ebx,-0x14(%ebp)
? movl $0x0,-0x10(%ebp)
? lea 0x20(%edx,%eax,2),%eax
? mov %eax,-0x18(%ebp)
? xchg %ax,%ax
0,04 ? a0: mov 0x8(%ebp),%ebx
? mov -0x18(%ebp),%eax
0,04 ? movzbl (%ebx),%edx
0,16 ? test %dl,%dl
0,04 ? ? je 128
0,08 ? lea 0x0(%esi),%esi
1,61 ? b0:???shl $0x4,%edx
1,02 ? ? mov -0x14(%ebp),%edi
2,04 ? ? add -0x10(%ebp),%edx
2,24 ? ? lea 0x1(%ebx),%ebx
0,27 ? ? movzbl (%edi,%edx,1),%edx
9,92 ? ? mov %esi,%edi
0,39 ? ? test %dl,%dl
2,04 ? ? cmovns %ecx,%edi
4,63 ? ? test $0x40,%dl
0,55 ? ? mov %di,(%eax)
3,76 ? ? mov %esi,%edi
0,71 ? ? cmove %ecx,%edi
3,41 ? ? test $0x20,%dl
0,75 ? ? mov %di,0x2(%eax)
2,43 ? ? mov %esi,%edi
0,59 ? ? cmove %ecx,%edi
4,59 ? ? test $0x10,%dl
0,67 ? ? mov %di,0x4(%eax)
2,55 ? ? mov %esi,%edi
0,78 ? ? cmove %ecx,%edi
4,31 ? ? test $0x8,%dl
0,67 ? ? mov %di,0x6(%eax)
5,76 ? ? mov %esi,%edi
1,80 ? ? cmove %ecx,%edi
4,20 ? ? test $0x4,%dl
0,86 ? ? mov %di,0x8(%eax)
2,98 ? ? mov %esi,%edi
1,37 ? ? cmove %ecx,%edi
4,67 ? ? test $0x2,%dl
0,20 ? ? mov %di,0xa(%eax)
2,78 ? ? mov %esi,%edi
0,75 ? ? cmove %ecx,%edi
3,92 ? ? and $0x1,%edx
0,75 ? ? mov %esi,%edx
2,59 ? ? mov %di,0xc(%eax)
0,59 ? ? cmove %ecx,%edx
3,10 ? ? mov %dx,0xe(%eax)
2,39 ? ? add $0x10,%eax
0,51 ? ? movzbl (%ebx),%edx
2,86 ? ? test %dl,%dl
2,31 ? ???jne b0
0,04 ?128: addl $0x1,-0x10(%ebp)
4,00 ? mov -0x1c(%ebp),%eax
0,04 ? add %eax,-0x18(%ebp)
0,08 ? cmpl $0x10,-0x10(%ebp)
? ? jne a0
which almost goes away from the profile:
# cmdline : /home/kirr/local/perf/bin/perf record -g -a sleep 20
# Samples: 49K of event 'cycles'
# Event count (approx.): 16799780016
#
# Overhead Command Shared Object Symbol
# ........ ............... ....................
#
27.51% rawv libc-2.13.so [.] __memcpy_ssse3
23.77% vivi-* [kernel.kallsyms] [k] memcpy
9.96% Xorg [unknown] [.] 0xa76f5e12
4.94% vivi-* [vivi] [k] gen_text.constprop.6
4.44% rawv [vivi] [k] gen_twopix
3.17% vivi-* [vivi] [k] vivi_fillbuff
2.45% rawv [vivi] [k] precalculate_line
1.20% swapper [kernel.kallsyms] [k] read_hpet
i.e. gen_twopix() overhead dropped from 49% to 4% and gen_text() loops
from ~8% to ~4%, and overal cycles count dropped from 31551930117 to
16799780016 which is ~1.9x whole workload speedup.
(*) for RGB24 rendering I've introduced x24, which could be thought as
synthetic u24 for simplifying the code. That's done because for
memcpy used for conditional assignment, gcc generates suboptimal code
with more indirections.
Fortunately, in C struct assignment is builtin and that's all we
need from pixeltype for font rendering.
Signed-off-by: Kirill Smelkov <kirr@mns.spb.ru>
Acked-by: Hans Verkuil <hans.verkuil@cisco.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
2012-11-02 12:10:30 +00:00
|
|
|
/* need this to do rgb24 rendering */
|
|
|
|
typedef struct { u16 __; u8 _; } __attribute__((packed)) x24;
|
|
|
|
|
2010-04-10 07:13:53 +00:00
|
|
|
static void gen_text(struct vivi_dev *dev, char *basep,
|
|
|
|
int y, int x, char *text)
|
|
|
|
{
|
|
|
|
int line;
|
[media] vivi: Optimize gen_text()
I've noticed that vivi takes a lot of CPU to produce its frames.
For example for 8 devices and 8 simple programs running, where each
captures YUY2 640x480 and displays it to X via SDL, profile timing is as
follows:
# cmdline : /home/kirr/local/perf/bin/perf record -g -a sleep 20
# Samples: 82K of event 'cycles'
# Event count (approx.): 31551930117
#
# Overhead Command Shared Object Symbol
# ........ ............... ....................
#
49.48% vivi-* [vivi] [k] gen_twopix
10.79% vivi-* [kernel.kallsyms] [k] memcpy
10.02% rawv libc-2.13.so [.] __memcpy_ssse3
8.35% vivi-* [vivi] [k] gen_text.constprop.6
5.06% Xorg [unknown] [.] 0xa73015f8
2.32% rawv [vivi] [k] gen_twopix
1.22% rawv [vivi] [k] precalculate_line
1.20% vivi-* [vivi] [k] vivi_fillbuff
(rawv is display program, vivi-* is a combination of vivi-000 through vivi-007)
so a lot of time is spent in gen_twopix() which as the follwing
call-graph profile shows ...
49.48% vivi-* [vivi] [k] gen_twopix
|
--- gen_twopix
|
|--96.30%-- gen_text.constprop.6
| vivi_fillbuff
| vivi_thread
| kthread
| ret_from_kernel_thread
|
--3.70%-- vivi_fillbuff
vivi_thread
kthread
ret_from_kernel_thread
... is called mostly from gen_text().
If we'll look at gen_text(), in the inner loop, we'll see
if (chr & (1 << (7 - i)))
gen_twopix(dev, pos + j * dev->pixelsize, WHITE, (x+y) & 1);
else
gen_twopix(dev, pos + j * dev->pixelsize, TEXT_BLACK, (x+y) & 1);
which calls gen_twopix() for every character pixel, and that is very
expensive, because gen_twopix() branches several times.
Now, let's note, that we operate on only two colors - WHITE and
TEXT_BLACK, and that pixel for that colors could be precomputed and
gen_twopix() moved out of the inner loop. Also note, that for black
and white colors even/odd does not make a difference for all supported
pixel formats, so we could stop doing that `odd` gen_twopix() parameter
game.
So the first thing we are doing here is
1) moving gen_twopix() calls out of gen_text() into vivi_fillbuff(),
to pregenerate black and white colors, just before printing
starts.
what we have next is that gen_text's font rendering loop, even with
gen_twopix() calls moved out, was inefficient and branchy, so let's
2) rewrite gen_text() loop so it uses less variables + unroll char
horizontal-rendering loop + instantiate 3 code paths for pixelsizes 2,3
and 4 so that in all inner loops we don't have to branch or make
indirections (*).
Done all above reworks, for gen_text() we get nice, non-branchy
streamlined code (showing loop for pixelsize=2):
? cmp $0x2,%eax
? ? jne 26
? mov -0x18(%ebp),%eax
? mov -0x20(%ebp),%edi
? imul -0x20(%ebp),%eax
? movzwl 0x3ffc(%ebx),%esi
0,08 ? movzwl 0x4000(%ebx),%ecx
0,04 ? add %edi,%edi
? mov 0x0,%ebx
0,51 ? mov %edi,-0x1c(%ebp)
? mov %ebx,-0x14(%ebp)
? movl $0x0,-0x10(%ebp)
? lea 0x20(%edx,%eax,2),%eax
? mov %eax,-0x18(%ebp)
? xchg %ax,%ax
0,04 ? a0: mov 0x8(%ebp),%ebx
? mov -0x18(%ebp),%eax
0,04 ? movzbl (%ebx),%edx
0,16 ? test %dl,%dl
0,04 ? ? je 128
0,08 ? lea 0x0(%esi),%esi
1,61 ? b0:???shl $0x4,%edx
1,02 ? ? mov -0x14(%ebp),%edi
2,04 ? ? add -0x10(%ebp),%edx
2,24 ? ? lea 0x1(%ebx),%ebx
0,27 ? ? movzbl (%edi,%edx,1),%edx
9,92 ? ? mov %esi,%edi
0,39 ? ? test %dl,%dl
2,04 ? ? cmovns %ecx,%edi
4,63 ? ? test $0x40,%dl
0,55 ? ? mov %di,(%eax)
3,76 ? ? mov %esi,%edi
0,71 ? ? cmove %ecx,%edi
3,41 ? ? test $0x20,%dl
0,75 ? ? mov %di,0x2(%eax)
2,43 ? ? mov %esi,%edi
0,59 ? ? cmove %ecx,%edi
4,59 ? ? test $0x10,%dl
0,67 ? ? mov %di,0x4(%eax)
2,55 ? ? mov %esi,%edi
0,78 ? ? cmove %ecx,%edi
4,31 ? ? test $0x8,%dl
0,67 ? ? mov %di,0x6(%eax)
5,76 ? ? mov %esi,%edi
1,80 ? ? cmove %ecx,%edi
4,20 ? ? test $0x4,%dl
0,86 ? ? mov %di,0x8(%eax)
2,98 ? ? mov %esi,%edi
1,37 ? ? cmove %ecx,%edi
4,67 ? ? test $0x2,%dl
0,20 ? ? mov %di,0xa(%eax)
2,78 ? ? mov %esi,%edi
0,75 ? ? cmove %ecx,%edi
3,92 ? ? and $0x1,%edx
0,75 ? ? mov %esi,%edx
2,59 ? ? mov %di,0xc(%eax)
0,59 ? ? cmove %ecx,%edx
3,10 ? ? mov %dx,0xe(%eax)
2,39 ? ? add $0x10,%eax
0,51 ? ? movzbl (%ebx),%edx
2,86 ? ? test %dl,%dl
2,31 ? ???jne b0
0,04 ?128: addl $0x1,-0x10(%ebp)
4,00 ? mov -0x1c(%ebp),%eax
0,04 ? add %eax,-0x18(%ebp)
0,08 ? cmpl $0x10,-0x10(%ebp)
? ? jne a0
which almost goes away from the profile:
# cmdline : /home/kirr/local/perf/bin/perf record -g -a sleep 20
# Samples: 49K of event 'cycles'
# Event count (approx.): 16799780016
#
# Overhead Command Shared Object Symbol
# ........ ............... ....................
#
27.51% rawv libc-2.13.so [.] __memcpy_ssse3
23.77% vivi-* [kernel.kallsyms] [k] memcpy
9.96% Xorg [unknown] [.] 0xa76f5e12
4.94% vivi-* [vivi] [k] gen_text.constprop.6
4.44% rawv [vivi] [k] gen_twopix
3.17% vivi-* [vivi] [k] vivi_fillbuff
2.45% rawv [vivi] [k] precalculate_line
1.20% swapper [kernel.kallsyms] [k] read_hpet
i.e. gen_twopix() overhead dropped from 49% to 4% and gen_text() loops
from ~8% to ~4%, and overal cycles count dropped from 31551930117 to
16799780016 which is ~1.9x whole workload speedup.
(*) for RGB24 rendering I've introduced x24, which could be thought as
synthetic u24 for simplifying the code. That's done because for
memcpy used for conditional assignment, gcc generates suboptimal code
with more indirections.
Fortunately, in C struct assignment is builtin and that's all we
need from pixeltype for font rendering.
Signed-off-by: Kirill Smelkov <kirr@mns.spb.ru>
Acked-by: Hans Verkuil <hans.verkuil@cisco.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
2012-11-02 12:10:30 +00:00
|
|
|
unsigned int width = dev->width;
|
2009-01-11 13:29:43 +00:00
|
|
|
|
2010-04-10 07:13:53 +00:00
|
|
|
/* Checks if it is possible to show string */
|
[media] vivi: Optimize gen_text()
I've noticed that vivi takes a lot of CPU to produce its frames.
For example for 8 devices and 8 simple programs running, where each
captures YUY2 640x480 and displays it to X via SDL, profile timing is as
follows:
# cmdline : /home/kirr/local/perf/bin/perf record -g -a sleep 20
# Samples: 82K of event 'cycles'
# Event count (approx.): 31551930117
#
# Overhead Command Shared Object Symbol
# ........ ............... ....................
#
49.48% vivi-* [vivi] [k] gen_twopix
10.79% vivi-* [kernel.kallsyms] [k] memcpy
10.02% rawv libc-2.13.so [.] __memcpy_ssse3
8.35% vivi-* [vivi] [k] gen_text.constprop.6
5.06% Xorg [unknown] [.] 0xa73015f8
2.32% rawv [vivi] [k] gen_twopix
1.22% rawv [vivi] [k] precalculate_line
1.20% vivi-* [vivi] [k] vivi_fillbuff
(rawv is display program, vivi-* is a combination of vivi-000 through vivi-007)
so a lot of time is spent in gen_twopix() which as the follwing
call-graph profile shows ...
49.48% vivi-* [vivi] [k] gen_twopix
|
--- gen_twopix
|
|--96.30%-- gen_text.constprop.6
| vivi_fillbuff
| vivi_thread
| kthread
| ret_from_kernel_thread
|
--3.70%-- vivi_fillbuff
vivi_thread
kthread
ret_from_kernel_thread
... is called mostly from gen_text().
If we'll look at gen_text(), in the inner loop, we'll see
if (chr & (1 << (7 - i)))
gen_twopix(dev, pos + j * dev->pixelsize, WHITE, (x+y) & 1);
else
gen_twopix(dev, pos + j * dev->pixelsize, TEXT_BLACK, (x+y) & 1);
which calls gen_twopix() for every character pixel, and that is very
expensive, because gen_twopix() branches several times.
Now, let's note, that we operate on only two colors - WHITE and
TEXT_BLACK, and that pixel for that colors could be precomputed and
gen_twopix() moved out of the inner loop. Also note, that for black
and white colors even/odd does not make a difference for all supported
pixel formats, so we could stop doing that `odd` gen_twopix() parameter
game.
So the first thing we are doing here is
1) moving gen_twopix() calls out of gen_text() into vivi_fillbuff(),
to pregenerate black and white colors, just before printing
starts.
what we have next is that gen_text's font rendering loop, even with
gen_twopix() calls moved out, was inefficient and branchy, so let's
2) rewrite gen_text() loop so it uses less variables + unroll char
horizontal-rendering loop + instantiate 3 code paths for pixelsizes 2,3
and 4 so that in all inner loops we don't have to branch or make
indirections (*).
Done all above reworks, for gen_text() we get nice, non-branchy
streamlined code (showing loop for pixelsize=2):
? cmp $0x2,%eax
? ? jne 26
? mov -0x18(%ebp),%eax
? mov -0x20(%ebp),%edi
? imul -0x20(%ebp),%eax
? movzwl 0x3ffc(%ebx),%esi
0,08 ? movzwl 0x4000(%ebx),%ecx
0,04 ? add %edi,%edi
? mov 0x0,%ebx
0,51 ? mov %edi,-0x1c(%ebp)
? mov %ebx,-0x14(%ebp)
? movl $0x0,-0x10(%ebp)
? lea 0x20(%edx,%eax,2),%eax
? mov %eax,-0x18(%ebp)
? xchg %ax,%ax
0,04 ? a0: mov 0x8(%ebp),%ebx
? mov -0x18(%ebp),%eax
0,04 ? movzbl (%ebx),%edx
0,16 ? test %dl,%dl
0,04 ? ? je 128
0,08 ? lea 0x0(%esi),%esi
1,61 ? b0:???shl $0x4,%edx
1,02 ? ? mov -0x14(%ebp),%edi
2,04 ? ? add -0x10(%ebp),%edx
2,24 ? ? lea 0x1(%ebx),%ebx
0,27 ? ? movzbl (%edi,%edx,1),%edx
9,92 ? ? mov %esi,%edi
0,39 ? ? test %dl,%dl
2,04 ? ? cmovns %ecx,%edi
4,63 ? ? test $0x40,%dl
0,55 ? ? mov %di,(%eax)
3,76 ? ? mov %esi,%edi
0,71 ? ? cmove %ecx,%edi
3,41 ? ? test $0x20,%dl
0,75 ? ? mov %di,0x2(%eax)
2,43 ? ? mov %esi,%edi
0,59 ? ? cmove %ecx,%edi
4,59 ? ? test $0x10,%dl
0,67 ? ? mov %di,0x4(%eax)
2,55 ? ? mov %esi,%edi
0,78 ? ? cmove %ecx,%edi
4,31 ? ? test $0x8,%dl
0,67 ? ? mov %di,0x6(%eax)
5,76 ? ? mov %esi,%edi
1,80 ? ? cmove %ecx,%edi
4,20 ? ? test $0x4,%dl
0,86 ? ? mov %di,0x8(%eax)
2,98 ? ? mov %esi,%edi
1,37 ? ? cmove %ecx,%edi
4,67 ? ? test $0x2,%dl
0,20 ? ? mov %di,0xa(%eax)
2,78 ? ? mov %esi,%edi
0,75 ? ? cmove %ecx,%edi
3,92 ? ? and $0x1,%edx
0,75 ? ? mov %esi,%edx
2,59 ? ? mov %di,0xc(%eax)
0,59 ? ? cmove %ecx,%edx
3,10 ? ? mov %dx,0xe(%eax)
2,39 ? ? add $0x10,%eax
0,51 ? ? movzbl (%ebx),%edx
2,86 ? ? test %dl,%dl
2,31 ? ???jne b0
0,04 ?128: addl $0x1,-0x10(%ebp)
4,00 ? mov -0x1c(%ebp),%eax
0,04 ? add %eax,-0x18(%ebp)
0,08 ? cmpl $0x10,-0x10(%ebp)
? ? jne a0
which almost goes away from the profile:
# cmdline : /home/kirr/local/perf/bin/perf record -g -a sleep 20
# Samples: 49K of event 'cycles'
# Event count (approx.): 16799780016
#
# Overhead Command Shared Object Symbol
# ........ ............... ....................
#
27.51% rawv libc-2.13.so [.] __memcpy_ssse3
23.77% vivi-* [kernel.kallsyms] [k] memcpy
9.96% Xorg [unknown] [.] 0xa76f5e12
4.94% vivi-* [vivi] [k] gen_text.constprop.6
4.44% rawv [vivi] [k] gen_twopix
3.17% vivi-* [vivi] [k] vivi_fillbuff
2.45% rawv [vivi] [k] precalculate_line
1.20% swapper [kernel.kallsyms] [k] read_hpet
i.e. gen_twopix() overhead dropped from 49% to 4% and gen_text() loops
from ~8% to ~4%, and overal cycles count dropped from 31551930117 to
16799780016 which is ~1.9x whole workload speedup.
(*) for RGB24 rendering I've introduced x24, which could be thought as
synthetic u24 for simplifying the code. That's done because for
memcpy used for conditional assignment, gcc generates suboptimal code
with more indirections.
Fortunately, in C struct assignment is builtin and that's all we
need from pixeltype for font rendering.
Signed-off-by: Kirill Smelkov <kirr@mns.spb.ru>
Acked-by: Hans Verkuil <hans.verkuil@cisco.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
2012-11-02 12:10:30 +00:00
|
|
|
if (y + 16 >= dev->height || x + strlen(text) * 8 >= width)
|
2010-04-10 07:13:53 +00:00
|
|
|
return;
|
2006-03-10 15:40:10 +00:00
|
|
|
|
|
|
|
/* Print stream time */
|
[media] vivi: Optimize gen_text()
I've noticed that vivi takes a lot of CPU to produce its frames.
For example for 8 devices and 8 simple programs running, where each
captures YUY2 640x480 and displays it to X via SDL, profile timing is as
follows:
# cmdline : /home/kirr/local/perf/bin/perf record -g -a sleep 20
# Samples: 82K of event 'cycles'
# Event count (approx.): 31551930117
#
# Overhead Command Shared Object Symbol
# ........ ............... ....................
#
49.48% vivi-* [vivi] [k] gen_twopix
10.79% vivi-* [kernel.kallsyms] [k] memcpy
10.02% rawv libc-2.13.so [.] __memcpy_ssse3
8.35% vivi-* [vivi] [k] gen_text.constprop.6
5.06% Xorg [unknown] [.] 0xa73015f8
2.32% rawv [vivi] [k] gen_twopix
1.22% rawv [vivi] [k] precalculate_line
1.20% vivi-* [vivi] [k] vivi_fillbuff
(rawv is display program, vivi-* is a combination of vivi-000 through vivi-007)
so a lot of time is spent in gen_twopix() which as the follwing
call-graph profile shows ...
49.48% vivi-* [vivi] [k] gen_twopix
|
--- gen_twopix
|
|--96.30%-- gen_text.constprop.6
| vivi_fillbuff
| vivi_thread
| kthread
| ret_from_kernel_thread
|
--3.70%-- vivi_fillbuff
vivi_thread
kthread
ret_from_kernel_thread
... is called mostly from gen_text().
If we'll look at gen_text(), in the inner loop, we'll see
if (chr & (1 << (7 - i)))
gen_twopix(dev, pos + j * dev->pixelsize, WHITE, (x+y) & 1);
else
gen_twopix(dev, pos + j * dev->pixelsize, TEXT_BLACK, (x+y) & 1);
which calls gen_twopix() for every character pixel, and that is very
expensive, because gen_twopix() branches several times.
Now, let's note, that we operate on only two colors - WHITE and
TEXT_BLACK, and that pixel for that colors could be precomputed and
gen_twopix() moved out of the inner loop. Also note, that for black
and white colors even/odd does not make a difference for all supported
pixel formats, so we could stop doing that `odd` gen_twopix() parameter
game.
So the first thing we are doing here is
1) moving gen_twopix() calls out of gen_text() into vivi_fillbuff(),
to pregenerate black and white colors, just before printing
starts.
what we have next is that gen_text's font rendering loop, even with
gen_twopix() calls moved out, was inefficient and branchy, so let's
2) rewrite gen_text() loop so it uses less variables + unroll char
horizontal-rendering loop + instantiate 3 code paths for pixelsizes 2,3
and 4 so that in all inner loops we don't have to branch or make
indirections (*).
Done all above reworks, for gen_text() we get nice, non-branchy
streamlined code (showing loop for pixelsize=2):
? cmp $0x2,%eax
? ? jne 26
? mov -0x18(%ebp),%eax
? mov -0x20(%ebp),%edi
? imul -0x20(%ebp),%eax
? movzwl 0x3ffc(%ebx),%esi
0,08 ? movzwl 0x4000(%ebx),%ecx
0,04 ? add %edi,%edi
? mov 0x0,%ebx
0,51 ? mov %edi,-0x1c(%ebp)
? mov %ebx,-0x14(%ebp)
? movl $0x0,-0x10(%ebp)
? lea 0x20(%edx,%eax,2),%eax
? mov %eax,-0x18(%ebp)
? xchg %ax,%ax
0,04 ? a0: mov 0x8(%ebp),%ebx
? mov -0x18(%ebp),%eax
0,04 ? movzbl (%ebx),%edx
0,16 ? test %dl,%dl
0,04 ? ? je 128
0,08 ? lea 0x0(%esi),%esi
1,61 ? b0:???shl $0x4,%edx
1,02 ? ? mov -0x14(%ebp),%edi
2,04 ? ? add -0x10(%ebp),%edx
2,24 ? ? lea 0x1(%ebx),%ebx
0,27 ? ? movzbl (%edi,%edx,1),%edx
9,92 ? ? mov %esi,%edi
0,39 ? ? test %dl,%dl
2,04 ? ? cmovns %ecx,%edi
4,63 ? ? test $0x40,%dl
0,55 ? ? mov %di,(%eax)
3,76 ? ? mov %esi,%edi
0,71 ? ? cmove %ecx,%edi
3,41 ? ? test $0x20,%dl
0,75 ? ? mov %di,0x2(%eax)
2,43 ? ? mov %esi,%edi
0,59 ? ? cmove %ecx,%edi
4,59 ? ? test $0x10,%dl
0,67 ? ? mov %di,0x4(%eax)
2,55 ? ? mov %esi,%edi
0,78 ? ? cmove %ecx,%edi
4,31 ? ? test $0x8,%dl
0,67 ? ? mov %di,0x6(%eax)
5,76 ? ? mov %esi,%edi
1,80 ? ? cmove %ecx,%edi
4,20 ? ? test $0x4,%dl
0,86 ? ? mov %di,0x8(%eax)
2,98 ? ? mov %esi,%edi
1,37 ? ? cmove %ecx,%edi
4,67 ? ? test $0x2,%dl
0,20 ? ? mov %di,0xa(%eax)
2,78 ? ? mov %esi,%edi
0,75 ? ? cmove %ecx,%edi
3,92 ? ? and $0x1,%edx
0,75 ? ? mov %esi,%edx
2,59 ? ? mov %di,0xc(%eax)
0,59 ? ? cmove %ecx,%edx
3,10 ? ? mov %dx,0xe(%eax)
2,39 ? ? add $0x10,%eax
0,51 ? ? movzbl (%ebx),%edx
2,86 ? ? test %dl,%dl
2,31 ? ???jne b0
0,04 ?128: addl $0x1,-0x10(%ebp)
4,00 ? mov -0x1c(%ebp),%eax
0,04 ? add %eax,-0x18(%ebp)
0,08 ? cmpl $0x10,-0x10(%ebp)
? ? jne a0
which almost goes away from the profile:
# cmdline : /home/kirr/local/perf/bin/perf record -g -a sleep 20
# Samples: 49K of event 'cycles'
# Event count (approx.): 16799780016
#
# Overhead Command Shared Object Symbol
# ........ ............... ....................
#
27.51% rawv libc-2.13.so [.] __memcpy_ssse3
23.77% vivi-* [kernel.kallsyms] [k] memcpy
9.96% Xorg [unknown] [.] 0xa76f5e12
4.94% vivi-* [vivi] [k] gen_text.constprop.6
4.44% rawv [vivi] [k] gen_twopix
3.17% vivi-* [vivi] [k] vivi_fillbuff
2.45% rawv [vivi] [k] precalculate_line
1.20% swapper [kernel.kallsyms] [k] read_hpet
i.e. gen_twopix() overhead dropped from 49% to 4% and gen_text() loops
from ~8% to ~4%, and overal cycles count dropped from 31551930117 to
16799780016 which is ~1.9x whole workload speedup.
(*) for RGB24 rendering I've introduced x24, which could be thought as
synthetic u24 for simplifying the code. That's done because for
memcpy used for conditional assignment, gcc generates suboptimal code
with more indirections.
Fortunately, in C struct assignment is builtin and that's all we
need from pixeltype for font rendering.
Signed-off-by: Kirill Smelkov <kirr@mns.spb.ru>
Acked-by: Hans Verkuil <hans.verkuil@cisco.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
2012-11-02 12:10:30 +00:00
|
|
|
#define PRINTSTR(PIXTYPE) do { \
|
|
|
|
PIXTYPE fg; \
|
|
|
|
PIXTYPE bg; \
|
|
|
|
memcpy(&fg, &dev->textfg, sizeof(PIXTYPE)); \
|
|
|
|
memcpy(&bg, &dev->textbg, sizeof(PIXTYPE)); \
|
|
|
|
\
|
|
|
|
for (line = 0; line < 16; line++) { \
|
|
|
|
PIXTYPE *pos = (PIXTYPE *)( basep + ((y + line) * width + x) * sizeof(PIXTYPE) ); \
|
|
|
|
u8 *s; \
|
|
|
|
\
|
|
|
|
for (s = text; *s; s++) { \
|
|
|
|
u8 chr = font8x16[*s * 16 + line]; \
|
|
|
|
\
|
|
|
|
pos[0] = (chr & (0x01 << 7) ? fg : bg); \
|
|
|
|
pos[1] = (chr & (0x01 << 6) ? fg : bg); \
|
|
|
|
pos[2] = (chr & (0x01 << 5) ? fg : bg); \
|
|
|
|
pos[3] = (chr & (0x01 << 4) ? fg : bg); \
|
|
|
|
pos[4] = (chr & (0x01 << 3) ? fg : bg); \
|
|
|
|
pos[5] = (chr & (0x01 << 2) ? fg : bg); \
|
|
|
|
pos[6] = (chr & (0x01 << 1) ? fg : bg); \
|
|
|
|
pos[7] = (chr & (0x01 << 0) ? fg : bg); \
|
|
|
|
\
|
|
|
|
pos += 8; \
|
|
|
|
} \
|
|
|
|
} \
|
|
|
|
} while (0)
|
|
|
|
|
|
|
|
switch (dev->pixelsize) {
|
|
|
|
case 2:
|
|
|
|
PRINTSTR(u16); break;
|
|
|
|
case 4:
|
|
|
|
PRINTSTR(u32); break;
|
|
|
|
case 3:
|
|
|
|
PRINTSTR(x24); break;
|
2006-03-10 15:40:10 +00:00
|
|
|
}
|
|
|
|
}
|
2008-04-02 21:10:59 +00:00
|
|
|
|
2010-04-10 07:13:53 +00:00
|
|
|
static void vivi_fillbuff(struct vivi_dev *dev, struct vivi_buffer *buf)
|
2006-03-10 15:40:10 +00:00
|
|
|
{
|
[media] vivi: Move computations out of vivi_fillbuf linecopy loop
The "dev->mvcount % wmax" thing was showing high in profiles (we do it
for each line which ~ 500 per frame)
? 000010c0 <vivi_fillbuff>:
...
0,39 ? 70:???mov 0x3ff4(%edi),%esi
0,22 ? 76:? mov 0x2a0(%edi),%eax
0,30 ? ? mov -0x84(%ebp),%ebx
0,35 ? ? mov %eax,%edx
0,04 ? ? mov -0x7c(%ebp),%ecx
0,35 ? ? sar $0x1f,%edx
0,44 ? ? idivl -0x7c(%ebp)
21,68 ? ? imul %esi,%ecx
0,70 ? ? imul %esi,%ebx
0,52 ? ? add -0x88(%ebp),%ebx
1,65 ? ? mov %ebx,%eax
0,22 ? ? imul %edx,%esi
0,04 ? ? lea 0x3f4(%edi,%esi,1),%edx
2,18 ? ?? call vivi_fillbuff+0xa6
0,74 ? ? addl $0x1,-0x80(%ebp)
62,69 ? ? mov -0x7c(%ebp),%edx
1,18 ? ? mov -0x80(%ebp),%ecx
0,35 ? ? add %edx,-0x84(%ebp)
0,61 ? ? cmp %ecx,-0x8c(%ebp)
0,22 ? ???jne 70
so since all variables stay the same for all iterations let's move
computations out of the loop: the abovementioned division and
"width*pixelsize" too
before:
# cmdline : /home/kirr/local/perf/bin/perf record -g -a sleep 20
#
# Samples: 49K of event 'cycles'
# Event count (approx.): 16475832370
#
# Overhead Command Shared Object
# ........ ............... ......................
#
29.07% rawv libc-2.13.so [.] __memcpy_ssse3
20.57% vivi-* [kernel.kallsyms] [k] memcpy
10.20% Xorg [unknown] [.] 0xa7301494
5.16% vivi-* [vivi] [k] gen_text.constprop.6
4.43% rawv [vivi] [k] gen_twopix
4.36% vivi-* [vivi] [k] vivi_fillbuff
2.42% rawv [vivi] [k] precalculate_line
1.33% swapper [kernel.kallsyms] [k] read_hpet
after:
# cmdline : /home/kirr/local/perf/bin/perf record -g -a sleep 20
#
# Samples: 46K of event 'cycles'
# Event count (approx.): 15574200568
#
# Overhead Command Shared Object
# ........ ............... ....................
#
27.99% rawv libc-2.13.so [.] __memcpy_ssse3
23.29% vivi-* [kernel.kallsyms] [k] memcpy
10.30% Xorg [unknown] [.] 0xa75c98f8
5.34% vivi-* [vivi] [k] gen_text.constprop.6
4.61% rawv [vivi] [k] gen_twopix
2.64% rawv [vivi] [k] precalculate_line
1.37% swapper [kernel.kallsyms] [k] read_hpet
0.79% Xorg [kernel.kallsyms] [k] read_hpet
0.64% Xorg [kernel.kallsyms] [k] unix_poll
0.45% Xorg [kernel.kallsyms] [k] fget_light
0.43% rawv libxcb.so.1.1.0 [.] 0x0000aae9
0.40% runsv [kernel.kallsyms] [k] ext2_try_to_allocate
0.36% Xorg [kernel.kallsyms] [k] _raw_spin_lock_irqsave
0.31% vivi-* [vivi] [k] vivi_fillbuff
(i.e. vivi_fillbuff own overhead is almost gone)
Signed-off-by: Kirill Smelkov <kirr@mns.spb.ru>
Acked-by: Hans Verkuil <hans.verkuil@cisco.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
2012-11-02 12:10:32 +00:00
|
|
|
int stride = dev->width * dev->pixelsize;
|
2011-01-19 15:02:29 +00:00
|
|
|
int hmax = dev->height;
|
|
|
|
void *vbuf = vb2_plane_vaddr(&buf->vb, 0);
|
2010-04-10 07:13:53 +00:00
|
|
|
unsigned ms;
|
|
|
|
char str[100];
|
|
|
|
int h, line = 1;
|
[media] vivi: Move computations out of vivi_fillbuf linecopy loop
The "dev->mvcount % wmax" thing was showing high in profiles (we do it
for each line which ~ 500 per frame)
? 000010c0 <vivi_fillbuff>:
...
0,39 ? 70:???mov 0x3ff4(%edi),%esi
0,22 ? 76:? mov 0x2a0(%edi),%eax
0,30 ? ? mov -0x84(%ebp),%ebx
0,35 ? ? mov %eax,%edx
0,04 ? ? mov -0x7c(%ebp),%ecx
0,35 ? ? sar $0x1f,%edx
0,44 ? ? idivl -0x7c(%ebp)
21,68 ? ? imul %esi,%ecx
0,70 ? ? imul %esi,%ebx
0,52 ? ? add -0x88(%ebp),%ebx
1,65 ? ? mov %ebx,%eax
0,22 ? ? imul %edx,%esi
0,04 ? ? lea 0x3f4(%edi,%esi,1),%edx
2,18 ? ?? call vivi_fillbuff+0xa6
0,74 ? ? addl $0x1,-0x80(%ebp)
62,69 ? ? mov -0x7c(%ebp),%edx
1,18 ? ? mov -0x80(%ebp),%ecx
0,35 ? ? add %edx,-0x84(%ebp)
0,61 ? ? cmp %ecx,-0x8c(%ebp)
0,22 ? ???jne 70
so since all variables stay the same for all iterations let's move
computations out of the loop: the abovementioned division and
"width*pixelsize" too
before:
# cmdline : /home/kirr/local/perf/bin/perf record -g -a sleep 20
#
# Samples: 49K of event 'cycles'
# Event count (approx.): 16475832370
#
# Overhead Command Shared Object
# ........ ............... ......................
#
29.07% rawv libc-2.13.so [.] __memcpy_ssse3
20.57% vivi-* [kernel.kallsyms] [k] memcpy
10.20% Xorg [unknown] [.] 0xa7301494
5.16% vivi-* [vivi] [k] gen_text.constprop.6
4.43% rawv [vivi] [k] gen_twopix
4.36% vivi-* [vivi] [k] vivi_fillbuff
2.42% rawv [vivi] [k] precalculate_line
1.33% swapper [kernel.kallsyms] [k] read_hpet
after:
# cmdline : /home/kirr/local/perf/bin/perf record -g -a sleep 20
#
# Samples: 46K of event 'cycles'
# Event count (approx.): 15574200568
#
# Overhead Command Shared Object
# ........ ............... ....................
#
27.99% rawv libc-2.13.so [.] __memcpy_ssse3
23.29% vivi-* [kernel.kallsyms] [k] memcpy
10.30% Xorg [unknown] [.] 0xa75c98f8
5.34% vivi-* [vivi] [k] gen_text.constprop.6
4.61% rawv [vivi] [k] gen_twopix
2.64% rawv [vivi] [k] precalculate_line
1.37% swapper [kernel.kallsyms] [k] read_hpet
0.79% Xorg [kernel.kallsyms] [k] read_hpet
0.64% Xorg [kernel.kallsyms] [k] unix_poll
0.45% Xorg [kernel.kallsyms] [k] fget_light
0.43% rawv libxcb.so.1.1.0 [.] 0x0000aae9
0.40% runsv [kernel.kallsyms] [k] ext2_try_to_allocate
0.36% Xorg [kernel.kallsyms] [k] _raw_spin_lock_irqsave
0.31% vivi-* [vivi] [k] vivi_fillbuff
(i.e. vivi_fillbuff own overhead is almost gone)
Signed-off-by: Kirill Smelkov <kirr@mns.spb.ru>
Acked-by: Hans Verkuil <hans.verkuil@cisco.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
2012-11-02 12:10:32 +00:00
|
|
|
u8 *linestart;
|
2011-06-07 09:34:41 +00:00
|
|
|
s32 gain;
|
2007-01-25 08:00:01 +00:00
|
|
|
|
2008-06-22 12:11:40 +00:00
|
|
|
if (!vbuf)
|
2007-08-03 02:31:54 +00:00
|
|
|
return;
|
2006-03-10 15:40:10 +00:00
|
|
|
|
[media] vivi: Move computations out of vivi_fillbuf linecopy loop
The "dev->mvcount % wmax" thing was showing high in profiles (we do it
for each line which ~ 500 per frame)
? 000010c0 <vivi_fillbuff>:
...
0,39 ? 70:???mov 0x3ff4(%edi),%esi
0,22 ? 76:? mov 0x2a0(%edi),%eax
0,30 ? ? mov -0x84(%ebp),%ebx
0,35 ? ? mov %eax,%edx
0,04 ? ? mov -0x7c(%ebp),%ecx
0,35 ? ? sar $0x1f,%edx
0,44 ? ? idivl -0x7c(%ebp)
21,68 ? ? imul %esi,%ecx
0,70 ? ? imul %esi,%ebx
0,52 ? ? add -0x88(%ebp),%ebx
1,65 ? ? mov %ebx,%eax
0,22 ? ? imul %edx,%esi
0,04 ? ? lea 0x3f4(%edi,%esi,1),%edx
2,18 ? ?? call vivi_fillbuff+0xa6
0,74 ? ? addl $0x1,-0x80(%ebp)
62,69 ? ? mov -0x7c(%ebp),%edx
1,18 ? ? mov -0x80(%ebp),%ecx
0,35 ? ? add %edx,-0x84(%ebp)
0,61 ? ? cmp %ecx,-0x8c(%ebp)
0,22 ? ???jne 70
so since all variables stay the same for all iterations let's move
computations out of the loop: the abovementioned division and
"width*pixelsize" too
before:
# cmdline : /home/kirr/local/perf/bin/perf record -g -a sleep 20
#
# Samples: 49K of event 'cycles'
# Event count (approx.): 16475832370
#
# Overhead Command Shared Object
# ........ ............... ......................
#
29.07% rawv libc-2.13.so [.] __memcpy_ssse3
20.57% vivi-* [kernel.kallsyms] [k] memcpy
10.20% Xorg [unknown] [.] 0xa7301494
5.16% vivi-* [vivi] [k] gen_text.constprop.6
4.43% rawv [vivi] [k] gen_twopix
4.36% vivi-* [vivi] [k] vivi_fillbuff
2.42% rawv [vivi] [k] precalculate_line
1.33% swapper [kernel.kallsyms] [k] read_hpet
after:
# cmdline : /home/kirr/local/perf/bin/perf record -g -a sleep 20
#
# Samples: 46K of event 'cycles'
# Event count (approx.): 15574200568
#
# Overhead Command Shared Object
# ........ ............... ....................
#
27.99% rawv libc-2.13.so [.] __memcpy_ssse3
23.29% vivi-* [kernel.kallsyms] [k] memcpy
10.30% Xorg [unknown] [.] 0xa75c98f8
5.34% vivi-* [vivi] [k] gen_text.constprop.6
4.61% rawv [vivi] [k] gen_twopix
2.64% rawv [vivi] [k] precalculate_line
1.37% swapper [kernel.kallsyms] [k] read_hpet
0.79% Xorg [kernel.kallsyms] [k] read_hpet
0.64% Xorg [kernel.kallsyms] [k] unix_poll
0.45% Xorg [kernel.kallsyms] [k] fget_light
0.43% rawv libxcb.so.1.1.0 [.] 0x0000aae9
0.40% runsv [kernel.kallsyms] [k] ext2_try_to_allocate
0.36% Xorg [kernel.kallsyms] [k] _raw_spin_lock_irqsave
0.31% vivi-* [vivi] [k] vivi_fillbuff
(i.e. vivi_fillbuff own overhead is almost gone)
Signed-off-by: Kirill Smelkov <kirr@mns.spb.ru>
Acked-by: Hans Verkuil <hans.verkuil@cisco.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
2012-11-02 12:10:32 +00:00
|
|
|
linestart = dev->line + (dev->mv_count % dev->width) * dev->pixelsize;
|
|
|
|
|
2010-04-10 07:13:53 +00:00
|
|
|
for (h = 0; h < hmax; h++)
|
[media] vivi: Move computations out of vivi_fillbuf linecopy loop
The "dev->mvcount % wmax" thing was showing high in profiles (we do it
for each line which ~ 500 per frame)
? 000010c0 <vivi_fillbuff>:
...
0,39 ? 70:???mov 0x3ff4(%edi),%esi
0,22 ? 76:? mov 0x2a0(%edi),%eax
0,30 ? ? mov -0x84(%ebp),%ebx
0,35 ? ? mov %eax,%edx
0,04 ? ? mov -0x7c(%ebp),%ecx
0,35 ? ? sar $0x1f,%edx
0,44 ? ? idivl -0x7c(%ebp)
21,68 ? ? imul %esi,%ecx
0,70 ? ? imul %esi,%ebx
0,52 ? ? add -0x88(%ebp),%ebx
1,65 ? ? mov %ebx,%eax
0,22 ? ? imul %edx,%esi
0,04 ? ? lea 0x3f4(%edi,%esi,1),%edx
2,18 ? ?? call vivi_fillbuff+0xa6
0,74 ? ? addl $0x1,-0x80(%ebp)
62,69 ? ? mov -0x7c(%ebp),%edx
1,18 ? ? mov -0x80(%ebp),%ecx
0,35 ? ? add %edx,-0x84(%ebp)
0,61 ? ? cmp %ecx,-0x8c(%ebp)
0,22 ? ???jne 70
so since all variables stay the same for all iterations let's move
computations out of the loop: the abovementioned division and
"width*pixelsize" too
before:
# cmdline : /home/kirr/local/perf/bin/perf record -g -a sleep 20
#
# Samples: 49K of event 'cycles'
# Event count (approx.): 16475832370
#
# Overhead Command Shared Object
# ........ ............... ......................
#
29.07% rawv libc-2.13.so [.] __memcpy_ssse3
20.57% vivi-* [kernel.kallsyms] [k] memcpy
10.20% Xorg [unknown] [.] 0xa7301494
5.16% vivi-* [vivi] [k] gen_text.constprop.6
4.43% rawv [vivi] [k] gen_twopix
4.36% vivi-* [vivi] [k] vivi_fillbuff
2.42% rawv [vivi] [k] precalculate_line
1.33% swapper [kernel.kallsyms] [k] read_hpet
after:
# cmdline : /home/kirr/local/perf/bin/perf record -g -a sleep 20
#
# Samples: 46K of event 'cycles'
# Event count (approx.): 15574200568
#
# Overhead Command Shared Object
# ........ ............... ....................
#
27.99% rawv libc-2.13.so [.] __memcpy_ssse3
23.29% vivi-* [kernel.kallsyms] [k] memcpy
10.30% Xorg [unknown] [.] 0xa75c98f8
5.34% vivi-* [vivi] [k] gen_text.constprop.6
4.61% rawv [vivi] [k] gen_twopix
2.64% rawv [vivi] [k] precalculate_line
1.37% swapper [kernel.kallsyms] [k] read_hpet
0.79% Xorg [kernel.kallsyms] [k] read_hpet
0.64% Xorg [kernel.kallsyms] [k] unix_poll
0.45% Xorg [kernel.kallsyms] [k] fget_light
0.43% rawv libxcb.so.1.1.0 [.] 0x0000aae9
0.40% runsv [kernel.kallsyms] [k] ext2_try_to_allocate
0.36% Xorg [kernel.kallsyms] [k] _raw_spin_lock_irqsave
0.31% vivi-* [vivi] [k] vivi_fillbuff
(i.e. vivi_fillbuff own overhead is almost gone)
Signed-off-by: Kirill Smelkov <kirr@mns.spb.ru>
Acked-by: Hans Verkuil <hans.verkuil@cisco.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
2012-11-02 12:10:32 +00:00
|
|
|
memcpy(vbuf + h * stride, linestart, stride);
|
2007-08-03 02:31:54 +00:00
|
|
|
|
2006-03-10 15:40:10 +00:00
|
|
|
/* Updates stream time */
|
|
|
|
|
[media] vivi: Optimize gen_text()
I've noticed that vivi takes a lot of CPU to produce its frames.
For example for 8 devices and 8 simple programs running, where each
captures YUY2 640x480 and displays it to X via SDL, profile timing is as
follows:
# cmdline : /home/kirr/local/perf/bin/perf record -g -a sleep 20
# Samples: 82K of event 'cycles'
# Event count (approx.): 31551930117
#
# Overhead Command Shared Object Symbol
# ........ ............... ....................
#
49.48% vivi-* [vivi] [k] gen_twopix
10.79% vivi-* [kernel.kallsyms] [k] memcpy
10.02% rawv libc-2.13.so [.] __memcpy_ssse3
8.35% vivi-* [vivi] [k] gen_text.constprop.6
5.06% Xorg [unknown] [.] 0xa73015f8
2.32% rawv [vivi] [k] gen_twopix
1.22% rawv [vivi] [k] precalculate_line
1.20% vivi-* [vivi] [k] vivi_fillbuff
(rawv is display program, vivi-* is a combination of vivi-000 through vivi-007)
so a lot of time is spent in gen_twopix() which as the follwing
call-graph profile shows ...
49.48% vivi-* [vivi] [k] gen_twopix
|
--- gen_twopix
|
|--96.30%-- gen_text.constprop.6
| vivi_fillbuff
| vivi_thread
| kthread
| ret_from_kernel_thread
|
--3.70%-- vivi_fillbuff
vivi_thread
kthread
ret_from_kernel_thread
... is called mostly from gen_text().
If we'll look at gen_text(), in the inner loop, we'll see
if (chr & (1 << (7 - i)))
gen_twopix(dev, pos + j * dev->pixelsize, WHITE, (x+y) & 1);
else
gen_twopix(dev, pos + j * dev->pixelsize, TEXT_BLACK, (x+y) & 1);
which calls gen_twopix() for every character pixel, and that is very
expensive, because gen_twopix() branches several times.
Now, let's note, that we operate on only two colors - WHITE and
TEXT_BLACK, and that pixel for that colors could be precomputed and
gen_twopix() moved out of the inner loop. Also note, that for black
and white colors even/odd does not make a difference for all supported
pixel formats, so we could stop doing that `odd` gen_twopix() parameter
game.
So the first thing we are doing here is
1) moving gen_twopix() calls out of gen_text() into vivi_fillbuff(),
to pregenerate black and white colors, just before printing
starts.
what we have next is that gen_text's font rendering loop, even with
gen_twopix() calls moved out, was inefficient and branchy, so let's
2) rewrite gen_text() loop so it uses less variables + unroll char
horizontal-rendering loop + instantiate 3 code paths for pixelsizes 2,3
and 4 so that in all inner loops we don't have to branch or make
indirections (*).
Done all above reworks, for gen_text() we get nice, non-branchy
streamlined code (showing loop for pixelsize=2):
? cmp $0x2,%eax
? ? jne 26
? mov -0x18(%ebp),%eax
? mov -0x20(%ebp),%edi
? imul -0x20(%ebp),%eax
? movzwl 0x3ffc(%ebx),%esi
0,08 ? movzwl 0x4000(%ebx),%ecx
0,04 ? add %edi,%edi
? mov 0x0,%ebx
0,51 ? mov %edi,-0x1c(%ebp)
? mov %ebx,-0x14(%ebp)
? movl $0x0,-0x10(%ebp)
? lea 0x20(%edx,%eax,2),%eax
? mov %eax,-0x18(%ebp)
? xchg %ax,%ax
0,04 ? a0: mov 0x8(%ebp),%ebx
? mov -0x18(%ebp),%eax
0,04 ? movzbl (%ebx),%edx
0,16 ? test %dl,%dl
0,04 ? ? je 128
0,08 ? lea 0x0(%esi),%esi
1,61 ? b0:???shl $0x4,%edx
1,02 ? ? mov -0x14(%ebp),%edi
2,04 ? ? add -0x10(%ebp),%edx
2,24 ? ? lea 0x1(%ebx),%ebx
0,27 ? ? movzbl (%edi,%edx,1),%edx
9,92 ? ? mov %esi,%edi
0,39 ? ? test %dl,%dl
2,04 ? ? cmovns %ecx,%edi
4,63 ? ? test $0x40,%dl
0,55 ? ? mov %di,(%eax)
3,76 ? ? mov %esi,%edi
0,71 ? ? cmove %ecx,%edi
3,41 ? ? test $0x20,%dl
0,75 ? ? mov %di,0x2(%eax)
2,43 ? ? mov %esi,%edi
0,59 ? ? cmove %ecx,%edi
4,59 ? ? test $0x10,%dl
0,67 ? ? mov %di,0x4(%eax)
2,55 ? ? mov %esi,%edi
0,78 ? ? cmove %ecx,%edi
4,31 ? ? test $0x8,%dl
0,67 ? ? mov %di,0x6(%eax)
5,76 ? ? mov %esi,%edi
1,80 ? ? cmove %ecx,%edi
4,20 ? ? test $0x4,%dl
0,86 ? ? mov %di,0x8(%eax)
2,98 ? ? mov %esi,%edi
1,37 ? ? cmove %ecx,%edi
4,67 ? ? test $0x2,%dl
0,20 ? ? mov %di,0xa(%eax)
2,78 ? ? mov %esi,%edi
0,75 ? ? cmove %ecx,%edi
3,92 ? ? and $0x1,%edx
0,75 ? ? mov %esi,%edx
2,59 ? ? mov %di,0xc(%eax)
0,59 ? ? cmove %ecx,%edx
3,10 ? ? mov %dx,0xe(%eax)
2,39 ? ? add $0x10,%eax
0,51 ? ? movzbl (%ebx),%edx
2,86 ? ? test %dl,%dl
2,31 ? ???jne b0
0,04 ?128: addl $0x1,-0x10(%ebp)
4,00 ? mov -0x1c(%ebp),%eax
0,04 ? add %eax,-0x18(%ebp)
0,08 ? cmpl $0x10,-0x10(%ebp)
? ? jne a0
which almost goes away from the profile:
# cmdline : /home/kirr/local/perf/bin/perf record -g -a sleep 20
# Samples: 49K of event 'cycles'
# Event count (approx.): 16799780016
#
# Overhead Command Shared Object Symbol
# ........ ............... ....................
#
27.51% rawv libc-2.13.so [.] __memcpy_ssse3
23.77% vivi-* [kernel.kallsyms] [k] memcpy
9.96% Xorg [unknown] [.] 0xa76f5e12
4.94% vivi-* [vivi] [k] gen_text.constprop.6
4.44% rawv [vivi] [k] gen_twopix
3.17% vivi-* [vivi] [k] vivi_fillbuff
2.45% rawv [vivi] [k] precalculate_line
1.20% swapper [kernel.kallsyms] [k] read_hpet
i.e. gen_twopix() overhead dropped from 49% to 4% and gen_text() loops
from ~8% to ~4%, and overal cycles count dropped from 31551930117 to
16799780016 which is ~1.9x whole workload speedup.
(*) for RGB24 rendering I've introduced x24, which could be thought as
synthetic u24 for simplifying the code. That's done because for
memcpy used for conditional assignment, gcc generates suboptimal code
with more indirections.
Fortunately, in C struct assignment is builtin and that's all we
need from pixeltype for font rendering.
Signed-off-by: Kirill Smelkov <kirr@mns.spb.ru>
Acked-by: Hans Verkuil <hans.verkuil@cisco.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
2012-11-02 12:10:30 +00:00
|
|
|
gen_twopix(dev, (u8 *)&dev->textbg, TEXT_BLACK, /*odd=*/ 0);
|
|
|
|
gen_twopix(dev, (u8 *)&dev->textfg, WHITE, /*odd=*/ 0);
|
|
|
|
|
2010-04-10 07:13:53 +00:00
|
|
|
dev->ms += jiffies_to_msecs(jiffies - dev->jiffies);
|
2007-12-10 12:33:52 +00:00
|
|
|
dev->jiffies = jiffies;
|
2010-04-10 07:13:53 +00:00
|
|
|
ms = dev->ms;
|
|
|
|
snprintf(str, sizeof(str), " %02d:%02d:%02d:%03d ",
|
|
|
|
(ms / (60 * 60 * 1000)) % 24,
|
|
|
|
(ms / (60 * 1000)) % 60,
|
|
|
|
(ms / 1000) % 60,
|
|
|
|
ms % 1000);
|
|
|
|
gen_text(dev, vbuf, line++ * 16, 16, str);
|
|
|
|
snprintf(str, sizeof(str), " %dx%d, input %d ",
|
|
|
|
dev->width, dev->height, dev->input);
|
|
|
|
gen_text(dev, vbuf, line++ * 16, 16, str);
|
|
|
|
|
2011-06-07 09:34:41 +00:00
|
|
|
gain = v4l2_ctrl_g_ctrl(dev->gain);
|
2012-01-25 00:05:34 +00:00
|
|
|
mutex_lock(dev->ctrl_handler.lock);
|
2010-04-10 07:13:53 +00:00
|
|
|
snprintf(str, sizeof(str), " brightness %3d, contrast %3d, saturation %3d, hue %d ",
|
2011-01-23 14:33:16 +00:00
|
|
|
dev->brightness->cur.val,
|
|
|
|
dev->contrast->cur.val,
|
|
|
|
dev->saturation->cur.val,
|
|
|
|
dev->hue->cur.val);
|
2010-04-10 07:13:53 +00:00
|
|
|
gen_text(dev, vbuf, line++ * 16, 16, str);
|
2012-05-02 06:33:52 +00:00
|
|
|
snprintf(str, sizeof(str), " autogain %d, gain %3d, volume %3d, alpha 0x%02x ",
|
|
|
|
dev->autogain->cur.val, gain, dev->volume->cur.val,
|
|
|
|
dev->alpha->cur.val);
|
2010-04-10 07:13:53 +00:00
|
|
|
gen_text(dev, vbuf, line++ * 16, 16, str);
|
2011-03-29 19:33:11 +00:00
|
|
|
snprintf(str, sizeof(str), " int32 %d, int64 %lld, bitmask %08x ",
|
2011-01-23 14:33:16 +00:00
|
|
|
dev->int32->cur.val,
|
2011-03-29 19:33:11 +00:00
|
|
|
dev->int64->cur.val64,
|
|
|
|
dev->bitmask->cur.val);
|
2011-01-23 14:33:16 +00:00
|
|
|
gen_text(dev, vbuf, line++ * 16, 16, str);
|
|
|
|
snprintf(str, sizeof(str), " boolean %d, menu %s, string \"%s\" ",
|
|
|
|
dev->boolean->cur.val,
|
|
|
|
dev->menu->qmenu[dev->menu->cur.val],
|
|
|
|
dev->string->cur.string);
|
2012-04-19 14:44:18 +00:00
|
|
|
gen_text(dev, vbuf, line++ * 16, 16, str);
|
2011-08-05 09:38:05 +00:00
|
|
|
snprintf(str, sizeof(str), " integer_menu %lld, value %d ",
|
|
|
|
dev->int_menu->qmenu_int[dev->int_menu->cur.val],
|
|
|
|
dev->int_menu->cur.val);
|
|
|
|
gen_text(dev, vbuf, line++ * 16, 16, str);
|
2012-01-25 00:05:34 +00:00
|
|
|
mutex_unlock(dev->ctrl_handler.lock);
|
2011-01-23 14:33:16 +00:00
|
|
|
if (dev->button_pressed) {
|
|
|
|
dev->button_pressed--;
|
|
|
|
snprintf(str, sizeof(str), " button pressed!");
|
|
|
|
gen_text(dev, vbuf, line++ * 16, 16, str);
|
|
|
|
}
|
2010-04-10 07:13:53 +00:00
|
|
|
|
|
|
|
dev->mv_count += 2;
|
2006-03-10 15:40:10 +00:00
|
|
|
|
2012-07-25 14:48:53 +00:00
|
|
|
buf->vb.v4l2_buf.field = V4L2_FIELD_INTERLACED;
|
2014-02-10 11:08:45 +00:00
|
|
|
buf->vb.v4l2_buf.sequence = dev->seq_count++;
|
2012-09-15 18:14:42 +00:00
|
|
|
v4l2_get_timestamp(&buf->vb.v4l2_buf.timestamp);
|
2006-03-10 15:40:10 +00:00
|
|
|
}
|
|
|
|
|
2010-04-10 07:13:53 +00:00
|
|
|
static void vivi_thread_tick(struct vivi_dev *dev)
|
2006-03-10 15:40:10 +00:00
|
|
|
{
|
2008-04-02 21:10:59 +00:00
|
|
|
struct vivi_dmaqueue *dma_q = &dev->vidq;
|
2010-04-10 07:13:53 +00:00
|
|
|
struct vivi_buffer *buf;
|
2008-04-02 21:10:59 +00:00
|
|
|
unsigned long flags = 0;
|
2006-03-10 15:40:10 +00:00
|
|
|
|
2008-04-02 21:10:59 +00:00
|
|
|
dprintk(dev, 1, "Thread tick\n");
|
2006-03-10 15:40:10 +00:00
|
|
|
|
2008-04-02 21:10:59 +00:00
|
|
|
spin_lock_irqsave(&dev->slock, flags);
|
|
|
|
if (list_empty(&dma_q->active)) {
|
|
|
|
dprintk(dev, 1, "No active queue to serve\n");
|
2011-07-05 10:19:23 +00:00
|
|
|
spin_unlock_irqrestore(&dev->slock, flags);
|
|
|
|
return;
|
2008-04-02 21:10:59 +00:00
|
|
|
}
|
2006-03-10 15:40:10 +00:00
|
|
|
|
2011-01-19 15:02:29 +00:00
|
|
|
buf = list_entry(dma_q->active.next, struct vivi_buffer, list);
|
|
|
|
list_del(&buf->list);
|
2011-07-05 10:19:23 +00:00
|
|
|
spin_unlock_irqrestore(&dev->slock, flags);
|
2006-03-10 15:40:10 +00:00
|
|
|
|
2012-09-15 18:14:42 +00:00
|
|
|
v4l2_get_timestamp(&buf->vb.v4l2_buf.timestamp);
|
2008-04-02 21:10:59 +00:00
|
|
|
|
|
|
|
/* Fill buffer */
|
2010-04-10 07:13:53 +00:00
|
|
|
vivi_fillbuff(dev, buf);
|
2008-04-02 21:10:59 +00:00
|
|
|
dprintk(dev, 1, "filled buffer %p\n", buf);
|
|
|
|
|
2011-01-19 15:02:29 +00:00
|
|
|
vb2_buffer_done(&buf->vb, VB2_BUF_STATE_DONE);
|
|
|
|
dprintk(dev, 2, "[%p/%d] done\n", buf, buf->vb.v4l2_buf.index);
|
2006-03-10 15:40:10 +00:00
|
|
|
}
|
|
|
|
|
2012-10-23 12:56:59 +00:00
|
|
|
#define frames_to_ms(dev, frames) \
|
|
|
|
((frames * dev->timeperframe.numerator * 1000) / dev->timeperframe.denominator)
|
2007-12-13 19:15:41 +00:00
|
|
|
|
2010-04-10 07:13:53 +00:00
|
|
|
static void vivi_sleep(struct vivi_dev *dev)
|
2006-03-10 15:40:10 +00:00
|
|
|
{
|
2008-04-02 21:10:59 +00:00
|
|
|
struct vivi_dmaqueue *dma_q = &dev->vidq;
|
|
|
|
int timeout;
|
2006-03-10 15:40:10 +00:00
|
|
|
DECLARE_WAITQUEUE(wait, current);
|
|
|
|
|
2008-04-09 02:20:00 +00:00
|
|
|
dprintk(dev, 1, "%s dma_q=0x%08lx\n", __func__,
|
2007-12-13 16:30:14 +00:00
|
|
|
(unsigned long)dma_q);
|
2006-03-10 15:40:10 +00:00
|
|
|
|
|
|
|
add_wait_queue(&dma_q->wq, &wait);
|
2007-12-13 19:15:41 +00:00
|
|
|
if (kthread_should_stop())
|
|
|
|
goto stop_task;
|
|
|
|
|
|
|
|
/* Calculate time to wake up */
|
2012-10-23 12:56:59 +00:00
|
|
|
timeout = msecs_to_jiffies(frames_to_ms(dev, 1));
|
2007-12-13 19:15:41 +00:00
|
|
|
|
2010-04-10 07:13:53 +00:00
|
|
|
vivi_thread_tick(dev);
|
2007-12-13 19:15:41 +00:00
|
|
|
|
|
|
|
schedule_timeout_interruptible(timeout);
|
2006-03-10 15:40:10 +00:00
|
|
|
|
2007-12-13 19:15:41 +00:00
|
|
|
stop_task:
|
2006-03-10 15:40:10 +00:00
|
|
|
remove_wait_queue(&dma_q->wq, &wait);
|
|
|
|
try_to_freeze();
|
|
|
|
}
|
|
|
|
|
2006-04-28 00:06:50 +00:00
|
|
|
static int vivi_thread(void *data)
|
2006-03-10 15:40:10 +00:00
|
|
|
{
|
2010-04-10 07:13:53 +00:00
|
|
|
struct vivi_dev *dev = data;
|
2006-03-10 15:40:10 +00:00
|
|
|
|
2007-12-13 16:30:14 +00:00
|
|
|
dprintk(dev, 1, "thread started\n");
|
2006-03-10 15:40:10 +00:00
|
|
|
|
2007-07-17 11:03:35 +00:00
|
|
|
set_freezable();
|
2007-01-14 11:33:24 +00:00
|
|
|
|
2006-03-10 15:40:10 +00:00
|
|
|
for (;;) {
|
2010-04-10 07:13:53 +00:00
|
|
|
vivi_sleep(dev);
|
2006-03-10 15:40:10 +00:00
|
|
|
|
|
|
|
if (kthread_should_stop())
|
|
|
|
break;
|
|
|
|
}
|
2007-12-13 16:30:14 +00:00
|
|
|
dprintk(dev, 1, "thread: exit\n");
|
2006-03-10 15:40:10 +00:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2011-01-19 15:02:29 +00:00
|
|
|
static int vivi_start_generating(struct vivi_dev *dev)
|
2006-03-10 15:40:10 +00:00
|
|
|
{
|
2008-04-02 21:10:59 +00:00
|
|
|
struct vivi_dmaqueue *dma_q = &dev->vidq;
|
2007-12-13 16:30:14 +00:00
|
|
|
|
2008-04-09 02:20:00 +00:00
|
|
|
dprintk(dev, 1, "%s\n", __func__);
|
2006-03-10 15:40:10 +00:00
|
|
|
|
2010-04-10 07:13:53 +00:00
|
|
|
/* Resets frame counters */
|
|
|
|
dev->ms = 0;
|
|
|
|
dev->mv_count = 0;
|
|
|
|
dev->jiffies = jiffies;
|
|
|
|
|
|
|
|
dma_q->frame = 0;
|
|
|
|
dma_q->ini_jiffies = jiffies;
|
2013-07-03 22:04:58 +00:00
|
|
|
dma_q->kthread = kthread_run(vivi_thread, dev, "%s",
|
|
|
|
dev->v4l2_dev.name);
|
2006-03-10 15:40:10 +00:00
|
|
|
|
2006-12-20 13:04:00 +00:00
|
|
|
if (IS_ERR(dma_q->kthread)) {
|
2009-02-14 16:23:12 +00:00
|
|
|
v4l2_err(&dev->v4l2_dev, "kernel_thread() failed\n");
|
2011-01-19 15:02:29 +00:00
|
|
|
return PTR_ERR(dma_q->kthread);
|
2006-03-10 15:40:10 +00:00
|
|
|
}
|
2007-01-14 11:33:24 +00:00
|
|
|
/* Wakes thread */
|
|
|
|
wake_up_interruptible(&dma_q->wq);
|
|
|
|
|
2008-04-09 02:20:00 +00:00
|
|
|
dprintk(dev, 1, "returning from %s\n", __func__);
|
2011-01-19 15:02:29 +00:00
|
|
|
return 0;
|
2006-03-10 15:40:10 +00:00
|
|
|
}
|
|
|
|
|
2011-01-19 15:02:29 +00:00
|
|
|
static void vivi_stop_generating(struct vivi_dev *dev)
|
2006-03-10 15:40:10 +00:00
|
|
|
{
|
2010-04-10 07:13:53 +00:00
|
|
|
struct vivi_dmaqueue *dma_q = &dev->vidq;
|
2007-12-13 16:30:14 +00:00
|
|
|
|
2008-04-09 02:20:00 +00:00
|
|
|
dprintk(dev, 1, "%s\n", __func__);
|
2010-04-10 07:13:53 +00:00
|
|
|
|
2006-03-10 15:40:10 +00:00
|
|
|
/* shutdown control thread */
|
|
|
|
if (dma_q->kthread) {
|
|
|
|
kthread_stop(dma_q->kthread);
|
2007-12-10 12:33:52 +00:00
|
|
|
dma_q->kthread = NULL;
|
2006-03-10 15:40:10 +00:00
|
|
|
}
|
2010-04-10 07:13:53 +00:00
|
|
|
|
2011-01-19 15:02:29 +00:00
|
|
|
/*
|
|
|
|
* Typical driver might need to wait here until dma engine stops.
|
|
|
|
* In this case we can abort imiedetly, so it's just a noop.
|
|
|
|
*/
|
|
|
|
|
|
|
|
/* Release all active buffers */
|
|
|
|
while (!list_empty(&dma_q->active)) {
|
|
|
|
struct vivi_buffer *buf;
|
|
|
|
buf = list_entry(dma_q->active.next, struct vivi_buffer, list);
|
|
|
|
list_del(&buf->list);
|
|
|
|
vb2_buffer_done(&buf->vb, VB2_BUF_STATE_ERROR);
|
|
|
|
dprintk(dev, 2, "[%p/%d] done\n", buf, buf->vb.v4l2_buf.index);
|
|
|
|
}
|
2006-03-10 15:40:10 +00:00
|
|
|
}
|
|
|
|
/* ------------------------------------------------------------------
|
|
|
|
Videobuf operations
|
|
|
|
------------------------------------------------------------------*/
|
2011-08-24 13:30:21 +00:00
|
|
|
static int queue_setup(struct vb2_queue *vq, const struct v4l2_format *fmt,
|
|
|
|
unsigned int *nbuffers, unsigned int *nplanes,
|
|
|
|
unsigned int sizes[], void *alloc_ctxs[])
|
2006-03-10 15:40:10 +00:00
|
|
|
{
|
2011-01-19 15:02:29 +00:00
|
|
|
struct vivi_dev *dev = vb2_get_drv_priv(vq);
|
|
|
|
unsigned long size;
|
|
|
|
|
2014-02-10 11:08:47 +00:00
|
|
|
size = dev->width * dev->height * dev->pixelsize;
|
|
|
|
if (fmt) {
|
|
|
|
if (fmt->fmt.pix.sizeimage < size)
|
|
|
|
return -EINVAL;
|
2012-06-22 08:53:31 +00:00
|
|
|
size = fmt->fmt.pix.sizeimage;
|
2014-02-10 11:08:47 +00:00
|
|
|
/* check against insane over 8K resolution buffers */
|
|
|
|
if (size > 7680 * 4320 * dev->pixelsize)
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
2007-08-23 19:41:14 +00:00
|
|
|
|
2011-01-19 15:02:29 +00:00
|
|
|
*nplanes = 1;
|
2007-08-23 19:41:14 +00:00
|
|
|
|
2011-01-19 15:02:29 +00:00
|
|
|
sizes[0] = size;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* videobuf2-vmalloc allocator is context-less so no need to set
|
|
|
|
* alloc_ctxs array.
|
|
|
|
*/
|
|
|
|
|
|
|
|
dprintk(dev, 1, "%s, count=%d, size=%ld\n", __func__,
|
|
|
|
*nbuffers, size);
|
2007-08-23 19:41:14 +00:00
|
|
|
|
2006-03-10 15:40:10 +00:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2011-01-19 15:02:29 +00:00
|
|
|
static int buffer_prepare(struct vb2_buffer *vb)
|
2006-03-10 15:40:10 +00:00
|
|
|
{
|
2011-01-19 15:02:29 +00:00
|
|
|
struct vivi_dev *dev = vb2_get_drv_priv(vb->vb2_queue);
|
2007-12-10 12:33:52 +00:00
|
|
|
struct vivi_buffer *buf = container_of(vb, struct vivi_buffer, vb);
|
2011-01-19 15:02:29 +00:00
|
|
|
unsigned long size;
|
2006-03-10 15:40:10 +00:00
|
|
|
|
2011-01-19 15:02:29 +00:00
|
|
|
dprintk(dev, 1, "%s, field=%d\n", __func__, vb->v4l2_buf.field);
|
2006-03-10 15:40:10 +00:00
|
|
|
|
2010-04-10 07:13:53 +00:00
|
|
|
BUG_ON(NULL == dev->fmt);
|
2008-04-02 21:10:59 +00:00
|
|
|
|
2011-01-19 15:02:29 +00:00
|
|
|
/*
|
|
|
|
* Theses properties only change when queue is idle, see s_fmt.
|
|
|
|
* The below checks should not be performed here, on each
|
|
|
|
* buffer_prepare (i.e. on each qbuf). Most of the code in this function
|
|
|
|
* should thus be moved to buffer_init and s_fmt.
|
|
|
|
*/
|
2010-04-10 07:13:53 +00:00
|
|
|
if (dev->width < 48 || dev->width > MAX_WIDTH ||
|
|
|
|
dev->height < 32 || dev->height > MAX_HEIGHT)
|
2006-03-10 15:40:10 +00:00
|
|
|
return -EINVAL;
|
2008-04-02 21:10:59 +00:00
|
|
|
|
2012-05-02 06:15:11 +00:00
|
|
|
size = dev->width * dev->height * dev->pixelsize;
|
2011-01-19 15:02:29 +00:00
|
|
|
if (vb2_plane_size(vb, 0) < size) {
|
|
|
|
dprintk(dev, 1, "%s data will not fit into plane (%lu < %lu)\n",
|
|
|
|
__func__, vb2_plane_size(vb, 0), size);
|
2006-03-10 15:40:10 +00:00
|
|
|
return -EINVAL;
|
2011-01-19 15:02:29 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
vb2_set_plane_payload(&buf->vb, 0, size);
|
2006-03-10 15:40:10 +00:00
|
|
|
|
2010-04-10 07:13:53 +00:00
|
|
|
precalculate_bars(dev);
|
|
|
|
precalculate_line(dev);
|
2009-06-25 19:28:23 +00:00
|
|
|
|
2011-01-19 15:02:29 +00:00
|
|
|
return 0;
|
|
|
|
}
|
2006-03-10 15:40:10 +00:00
|
|
|
|
2011-01-19 15:02:29 +00:00
|
|
|
static void buffer_queue(struct vb2_buffer *vb)
|
2006-03-10 15:40:10 +00:00
|
|
|
{
|
2011-01-19 15:02:29 +00:00
|
|
|
struct vivi_dev *dev = vb2_get_drv_priv(vb->vb2_queue);
|
2010-04-10 07:13:53 +00:00
|
|
|
struct vivi_buffer *buf = container_of(vb, struct vivi_buffer, vb);
|
2008-04-02 21:10:59 +00:00
|
|
|
struct vivi_dmaqueue *vidq = &dev->vidq;
|
2011-01-19 15:02:29 +00:00
|
|
|
unsigned long flags = 0;
|
2008-04-02 21:10:59 +00:00
|
|
|
|
2008-04-09 02:20:00 +00:00
|
|
|
dprintk(dev, 1, "%s\n", __func__);
|
2008-04-02 21:10:59 +00:00
|
|
|
|
2011-01-19 15:02:29 +00:00
|
|
|
spin_lock_irqsave(&dev->slock, flags);
|
|
|
|
list_add_tail(&buf->list, &vidq->active);
|
|
|
|
spin_unlock_irqrestore(&dev->slock, flags);
|
2006-03-10 15:40:10 +00:00
|
|
|
}
|
|
|
|
|
2011-08-29 11:51:49 +00:00
|
|
|
static int start_streaming(struct vb2_queue *vq, unsigned int count)
|
2006-03-10 15:40:10 +00:00
|
|
|
{
|
2011-01-19 15:02:29 +00:00
|
|
|
struct vivi_dev *dev = vb2_get_drv_priv(vq);
|
2014-02-21 08:34:49 +00:00
|
|
|
int err;
|
2014-02-10 11:08:45 +00:00
|
|
|
|
2011-01-19 15:02:29 +00:00
|
|
|
dprintk(dev, 1, "%s\n", __func__);
|
2014-02-10 11:08:45 +00:00
|
|
|
dev->seq_count = 0;
|
2014-02-21 08:34:49 +00:00
|
|
|
err = vivi_start_generating(dev);
|
|
|
|
if (err) {
|
|
|
|
struct vivi_buffer *buf, *tmp;
|
|
|
|
|
|
|
|
list_for_each_entry_safe(buf, tmp, &dev->vidq.active, list) {
|
|
|
|
list_del(&buf->list);
|
|
|
|
vb2_buffer_done(&buf->vb, VB2_BUF_STATE_QUEUED);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return err;
|
2011-01-19 15:02:29 +00:00
|
|
|
}
|
2006-03-10 15:40:10 +00:00
|
|
|
|
2011-01-19 15:02:29 +00:00
|
|
|
/* abort streaming and wait for last buffer */
|
2014-04-17 05:47:21 +00:00
|
|
|
static void stop_streaming(struct vb2_queue *vq)
|
2011-01-19 15:02:29 +00:00
|
|
|
{
|
|
|
|
struct vivi_dev *dev = vb2_get_drv_priv(vq);
|
2008-04-09 02:20:00 +00:00
|
|
|
dprintk(dev, 1, "%s\n", __func__);
|
2011-01-19 15:02:29 +00:00
|
|
|
vivi_stop_generating(dev);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void vivi_lock(struct vb2_queue *vq)
|
|
|
|
{
|
|
|
|
struct vivi_dev *dev = vb2_get_drv_priv(vq);
|
|
|
|
mutex_lock(&dev->mutex);
|
|
|
|
}
|
2006-03-10 15:40:10 +00:00
|
|
|
|
2011-01-19 15:02:29 +00:00
|
|
|
static void vivi_unlock(struct vb2_queue *vq)
|
|
|
|
{
|
|
|
|
struct vivi_dev *dev = vb2_get_drv_priv(vq);
|
|
|
|
mutex_unlock(&dev->mutex);
|
2006-03-10 15:40:10 +00:00
|
|
|
}
|
|
|
|
|
2011-01-19 15:02:29 +00:00
|
|
|
|
2012-12-26 15:23:26 +00:00
|
|
|
static const struct vb2_ops vivi_video_qops = {
|
2011-01-19 15:02:29 +00:00
|
|
|
.queue_setup = queue_setup,
|
|
|
|
.buf_prepare = buffer_prepare,
|
|
|
|
.buf_queue = buffer_queue,
|
|
|
|
.start_streaming = start_streaming,
|
|
|
|
.stop_streaming = stop_streaming,
|
|
|
|
.wait_prepare = vivi_unlock,
|
|
|
|
.wait_finish = vivi_lock,
|
2006-03-10 15:40:10 +00:00
|
|
|
};
|
|
|
|
|
2006-06-04 13:34:12 +00:00
|
|
|
/* ------------------------------------------------------------------
|
|
|
|
IOCTL vidioc handling
|
|
|
|
------------------------------------------------------------------*/
|
2007-12-10 12:33:52 +00:00
|
|
|
static int vidioc_querycap(struct file *file, void *priv,
|
2006-06-04 13:34:12 +00:00
|
|
|
struct v4l2_capability *cap)
|
|
|
|
{
|
2010-04-10 07:13:53 +00:00
|
|
|
struct vivi_dev *dev = video_drvdata(file);
|
2009-02-14 16:23:12 +00:00
|
|
|
|
2006-06-04 13:34:12 +00:00
|
|
|
strcpy(cap->driver, "vivi");
|
|
|
|
strcpy(cap->card, "vivi");
|
2012-09-14 09:23:12 +00:00
|
|
|
snprintf(cap->bus_info, sizeof(cap->bus_info),
|
|
|
|
"platform:%s", dev->v4l2_dev.name);
|
2012-01-24 08:24:36 +00:00
|
|
|
cap->device_caps = V4L2_CAP_VIDEO_CAPTURE | V4L2_CAP_STREAMING |
|
|
|
|
V4L2_CAP_READWRITE;
|
|
|
|
cap->capabilities = cap->device_caps | V4L2_CAP_DEVICE_CAPS;
|
2006-06-04 13:34:12 +00:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2008-05-28 15:16:41 +00:00
|
|
|
static int vidioc_enum_fmt_vid_cap(struct file *file, void *priv,
|
2006-06-04 13:34:12 +00:00
|
|
|
struct v4l2_fmtdesc *f)
|
|
|
|
{
|
2012-12-26 15:23:26 +00:00
|
|
|
const struct vivi_fmt *fmt;
|
2008-10-14 15:47:09 +00:00
|
|
|
|
|
|
|
if (f->index >= ARRAY_SIZE(formats))
|
2006-06-04 13:34:12 +00:00
|
|
|
return -EINVAL;
|
|
|
|
|
2008-10-14 15:47:09 +00:00
|
|
|
fmt = &formats[f->index];
|
|
|
|
|
|
|
|
strlcpy(f->description, fmt->name, sizeof(f->description));
|
|
|
|
f->pixelformat = fmt->fourcc;
|
2006-06-04 13:34:12 +00:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2008-05-28 15:16:41 +00:00
|
|
|
static int vidioc_g_fmt_vid_cap(struct file *file, void *priv,
|
2006-06-04 13:34:12 +00:00
|
|
|
struct v4l2_format *f)
|
|
|
|
{
|
2010-04-10 07:13:53 +00:00
|
|
|
struct vivi_dev *dev = video_drvdata(file);
|
2006-06-04 13:34:12 +00:00
|
|
|
|
2010-04-10 07:13:53 +00:00
|
|
|
f->fmt.pix.width = dev->width;
|
|
|
|
f->fmt.pix.height = dev->height;
|
2012-07-25 14:48:53 +00:00
|
|
|
f->fmt.pix.field = V4L2_FIELD_INTERLACED;
|
2010-04-10 07:13:53 +00:00
|
|
|
f->fmt.pix.pixelformat = dev->fmt->fourcc;
|
2006-06-04 13:34:12 +00:00
|
|
|
f->fmt.pix.bytesperline =
|
2010-04-10 07:13:53 +00:00
|
|
|
(f->fmt.pix.width * dev->fmt->depth) >> 3;
|
2006-06-04 13:34:12 +00:00
|
|
|
f->fmt.pix.sizeimage =
|
|
|
|
f->fmt.pix.height * f->fmt.pix.bytesperline;
|
2012-08-06 13:36:18 +00:00
|
|
|
if (dev->fmt->is_yuv)
|
2011-07-29 10:19:46 +00:00
|
|
|
f->fmt.pix.colorspace = V4L2_COLORSPACE_SMPTE170M;
|
|
|
|
else
|
|
|
|
f->fmt.pix.colorspace = V4L2_COLORSPACE_SRGB;
|
2010-04-10 07:13:53 +00:00
|
|
|
return 0;
|
2006-06-04 13:34:12 +00:00
|
|
|
}
|
|
|
|
|
2008-05-28 15:16:41 +00:00
|
|
|
static int vidioc_try_fmt_vid_cap(struct file *file, void *priv,
|
2006-03-10 15:40:10 +00:00
|
|
|
struct v4l2_format *f)
|
|
|
|
{
|
2010-04-10 07:13:53 +00:00
|
|
|
struct vivi_dev *dev = video_drvdata(file);
|
2012-12-26 15:23:26 +00:00
|
|
|
const struct vivi_fmt *fmt;
|
2006-03-10 15:40:10 +00:00
|
|
|
|
2008-10-14 15:47:09 +00:00
|
|
|
fmt = get_format(f);
|
|
|
|
if (!fmt) {
|
2012-07-25 14:48:53 +00:00
|
|
|
dprintk(dev, 1, "Fourcc format (0x%08x) unknown.\n",
|
2008-10-14 15:47:09 +00:00
|
|
|
f->fmt.pix.pixelformat);
|
2012-07-25 14:48:53 +00:00
|
|
|
f->fmt.pix.pixelformat = V4L2_PIX_FMT_YUYV;
|
|
|
|
fmt = get_format(f);
|
2006-03-10 15:40:10 +00:00
|
|
|
}
|
|
|
|
|
2012-07-25 14:48:53 +00:00
|
|
|
f->fmt.pix.field = V4L2_FIELD_INTERLACED;
|
2010-04-10 07:13:53 +00:00
|
|
|
v4l_bound_align_image(&f->fmt.pix.width, 48, MAX_WIDTH, 2,
|
|
|
|
&f->fmt.pix.height, 32, MAX_HEIGHT, 0, 0);
|
2006-03-10 15:40:10 +00:00
|
|
|
f->fmt.pix.bytesperline =
|
|
|
|
(f->fmt.pix.width * fmt->depth) >> 3;
|
|
|
|
f->fmt.pix.sizeimage =
|
|
|
|
f->fmt.pix.height * f->fmt.pix.bytesperline;
|
2012-08-06 13:36:18 +00:00
|
|
|
if (fmt->is_yuv)
|
2011-07-29 10:19:46 +00:00
|
|
|
f->fmt.pix.colorspace = V4L2_COLORSPACE_SMPTE170M;
|
|
|
|
else
|
|
|
|
f->fmt.pix.colorspace = V4L2_COLORSPACE_SRGB;
|
2012-08-06 13:43:13 +00:00
|
|
|
f->fmt.pix.priv = 0;
|
2006-03-10 15:40:10 +00:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2009-01-11 13:29:43 +00:00
|
|
|
static int vidioc_s_fmt_vid_cap(struct file *file, void *priv,
|
|
|
|
struct v4l2_format *f)
|
|
|
|
{
|
2010-04-10 07:13:53 +00:00
|
|
|
struct vivi_dev *dev = video_drvdata(file);
|
2011-01-19 15:02:29 +00:00
|
|
|
struct vb2_queue *q = &dev->vb_vidq;
|
2009-01-11 13:29:43 +00:00
|
|
|
|
2010-04-10 07:13:53 +00:00
|
|
|
int ret = vidioc_try_fmt_vid_cap(file, priv, f);
|
2009-01-11 13:29:43 +00:00
|
|
|
if (ret < 0)
|
|
|
|
return ret;
|
|
|
|
|
2012-06-22 08:53:02 +00:00
|
|
|
if (vb2_is_busy(q)) {
|
2010-04-10 07:13:53 +00:00
|
|
|
dprintk(dev, 1, "%s device busy\n", __func__);
|
2011-01-19 15:02:29 +00:00
|
|
|
return -EBUSY;
|
2009-01-11 13:29:43 +00:00
|
|
|
}
|
|
|
|
|
2010-04-10 07:13:53 +00:00
|
|
|
dev->fmt = get_format(f);
|
2012-05-02 06:15:11 +00:00
|
|
|
dev->pixelsize = dev->fmt->depth / 8;
|
2010-04-10 07:13:53 +00:00
|
|
|
dev->width = f->fmt.pix.width;
|
|
|
|
dev->height = f->fmt.pix.height;
|
2011-01-19 15:02:29 +00:00
|
|
|
|
|
|
|
return 0;
|
2006-03-10 15:40:10 +00:00
|
|
|
}
|
|
|
|
|
2012-08-06 13:37:18 +00:00
|
|
|
static int vidioc_enum_framesizes(struct file *file, void *fh,
|
|
|
|
struct v4l2_frmsizeenum *fsize)
|
|
|
|
{
|
|
|
|
static const struct v4l2_frmsize_stepwise sizes = {
|
|
|
|
48, MAX_WIDTH, 4,
|
|
|
|
32, MAX_HEIGHT, 1
|
|
|
|
};
|
|
|
|
int i;
|
|
|
|
|
|
|
|
if (fsize->index)
|
|
|
|
return -EINVAL;
|
|
|
|
for (i = 0; i < ARRAY_SIZE(formats); i++)
|
|
|
|
if (formats[i].fourcc == fsize->pixel_format)
|
|
|
|
break;
|
|
|
|
if (i == ARRAY_SIZE(formats))
|
|
|
|
return -EINVAL;
|
|
|
|
fsize->type = V4L2_FRMSIZE_TYPE_STEPWISE;
|
|
|
|
fsize->stepwise = sizes;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2006-06-04 13:34:12 +00:00
|
|
|
/* only one input in this sample driver */
|
2007-12-10 12:33:52 +00:00
|
|
|
static int vidioc_enum_input(struct file *file, void *priv,
|
2006-06-04 13:34:12 +00:00
|
|
|
struct v4l2_input *inp)
|
|
|
|
{
|
2009-01-11 13:29:43 +00:00
|
|
|
if (inp->index >= NUM_INPUTS)
|
2006-06-04 13:34:12 +00:00
|
|
|
return -EINVAL;
|
2006-03-10 15:40:10 +00:00
|
|
|
|
2006-06-04 13:34:12 +00:00
|
|
|
inp->type = V4L2_INPUT_TYPE_CAMERA;
|
2009-01-11 13:29:43 +00:00
|
|
|
sprintf(inp->name, "Camera %u", inp->index);
|
2010-04-10 07:13:53 +00:00
|
|
|
return 0;
|
2006-06-04 13:34:12 +00:00
|
|
|
}
|
2006-03-10 15:40:10 +00:00
|
|
|
|
2007-12-10 12:33:52 +00:00
|
|
|
static int vidioc_g_input(struct file *file, void *priv, unsigned int *i)
|
2006-06-04 13:34:12 +00:00
|
|
|
{
|
2010-04-10 07:13:53 +00:00
|
|
|
struct vivi_dev *dev = video_drvdata(file);
|
2009-01-11 13:29:43 +00:00
|
|
|
|
|
|
|
*i = dev->input;
|
2010-04-10 07:13:53 +00:00
|
|
|
return 0;
|
2006-06-04 13:34:12 +00:00
|
|
|
}
|
2010-04-10 07:13:53 +00:00
|
|
|
|
2007-12-10 12:33:52 +00:00
|
|
|
static int vidioc_s_input(struct file *file, void *priv, unsigned int i)
|
2006-06-04 13:34:12 +00:00
|
|
|
{
|
2010-04-10 07:13:53 +00:00
|
|
|
struct vivi_dev *dev = video_drvdata(file);
|
2009-01-11 13:29:43 +00:00
|
|
|
|
|
|
|
if (i >= NUM_INPUTS)
|
2006-06-04 13:34:12 +00:00
|
|
|
return -EINVAL;
|
2006-03-10 15:40:10 +00:00
|
|
|
|
2011-06-07 13:20:23 +00:00
|
|
|
if (i == dev->input)
|
|
|
|
return 0;
|
|
|
|
|
2009-01-11 13:29:43 +00:00
|
|
|
dev->input = i;
|
2013-03-12 06:56:55 +00:00
|
|
|
/*
|
|
|
|
* Modify the brightness range depending on the input.
|
|
|
|
* This makes it easy to use vivi to test if applications can
|
|
|
|
* handle control range modifications and is also how this is
|
|
|
|
* typically used in practice as different inputs may be hooked
|
|
|
|
* up to different receivers with different control ranges.
|
|
|
|
*/
|
|
|
|
v4l2_ctrl_modify_range(dev->brightness,
|
|
|
|
128 * i, 255 + 128 * i, 1, 127 + 128 * i);
|
2010-04-10 07:13:53 +00:00
|
|
|
precalculate_bars(dev);
|
|
|
|
precalculate_line(dev);
|
|
|
|
return 0;
|
2006-06-04 13:34:12 +00:00
|
|
|
}
|
2006-03-10 15:40:10 +00:00
|
|
|
|
2013-10-21 00:34:01 +00:00
|
|
|
/* timeperframe is arbitrary and continuous */
|
2012-10-23 12:56:59 +00:00
|
|
|
static int vidioc_enum_frameintervals(struct file *file, void *priv,
|
|
|
|
struct v4l2_frmivalenum *fival)
|
|
|
|
{
|
2012-12-26 15:23:26 +00:00
|
|
|
const struct vivi_fmt *fmt;
|
2012-10-23 12:56:59 +00:00
|
|
|
|
|
|
|
if (fival->index)
|
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
fmt = __get_format(fival->pixel_format);
|
|
|
|
if (!fmt)
|
|
|
|
return -EINVAL;
|
|
|
|
|
2014-02-25 10:15:54 +00:00
|
|
|
/* check for valid width/height */
|
|
|
|
if (fival->width < 48 || fival->width > MAX_WIDTH || (fival->width & 3))
|
|
|
|
return -EINVAL;
|
|
|
|
if (fival->height < 32 || fival->height > MAX_HEIGHT)
|
|
|
|
return -EINVAL;
|
2012-10-23 12:56:59 +00:00
|
|
|
|
|
|
|
fival->type = V4L2_FRMIVAL_TYPE_CONTINUOUS;
|
|
|
|
|
2013-10-21 00:34:01 +00:00
|
|
|
/* fill in stepwise (step=1.0 is required by V4L2 spec) */
|
2012-10-23 12:56:59 +00:00
|
|
|
fival->stepwise.min = tpf_min;
|
|
|
|
fival->stepwise.max = tpf_max;
|
|
|
|
fival->stepwise.step = (struct v4l2_fract) {1, 1};
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int vidioc_g_parm(struct file *file, void *priv,
|
|
|
|
struct v4l2_streamparm *parm)
|
|
|
|
{
|
|
|
|
struct vivi_dev *dev = video_drvdata(file);
|
|
|
|
|
|
|
|
if (parm->type != V4L2_BUF_TYPE_VIDEO_CAPTURE)
|
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
parm->parm.capture.capability = V4L2_CAP_TIMEPERFRAME;
|
|
|
|
parm->parm.capture.timeperframe = dev->timeperframe;
|
|
|
|
parm->parm.capture.readbuffers = 1;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
#define FRACT_CMP(a, OP, b) \
|
|
|
|
((u64)(a).numerator * (b).denominator OP (u64)(b).numerator * (a).denominator)
|
|
|
|
|
|
|
|
static int vidioc_s_parm(struct file *file, void *priv,
|
|
|
|
struct v4l2_streamparm *parm)
|
|
|
|
{
|
|
|
|
struct vivi_dev *dev = video_drvdata(file);
|
|
|
|
struct v4l2_fract tpf;
|
|
|
|
|
|
|
|
if (parm->type != V4L2_BUF_TYPE_VIDEO_CAPTURE)
|
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
tpf = parm->parm.capture.timeperframe;
|
|
|
|
|
|
|
|
/* tpf: {*, 0} resets timing; clip to [min, max]*/
|
|
|
|
tpf = tpf.denominator ? tpf : tpf_default;
|
|
|
|
tpf = FRACT_CMP(tpf, <, tpf_min) ? tpf_min : tpf;
|
|
|
|
tpf = FRACT_CMP(tpf, >, tpf_max) ? tpf_max : tpf;
|
|
|
|
|
|
|
|
dev->timeperframe = tpf;
|
|
|
|
parm->parm.capture.timeperframe = tpf;
|
|
|
|
parm->parm.capture.readbuffers = 1;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2010-04-10 07:13:53 +00:00
|
|
|
/* --- controls ---------------------------------------------- */
|
2006-03-10 15:40:10 +00:00
|
|
|
|
2011-06-07 09:34:41 +00:00
|
|
|
static int vivi_g_volatile_ctrl(struct v4l2_ctrl *ctrl)
|
|
|
|
{
|
|
|
|
struct vivi_dev *dev = container_of(ctrl->handler, struct vivi_dev, ctrl_handler);
|
|
|
|
|
|
|
|
if (ctrl == dev->autogain)
|
|
|
|
dev->gain->val = jiffies & 0xff;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2011-01-23 14:33:16 +00:00
|
|
|
static int vivi_s_ctrl(struct v4l2_ctrl *ctrl)
|
2006-06-04 13:34:12 +00:00
|
|
|
{
|
2011-01-23 14:33:16 +00:00
|
|
|
struct vivi_dev *dev = container_of(ctrl->handler, struct vivi_dev, ctrl_handler);
|
2006-03-10 15:40:10 +00:00
|
|
|
|
2012-05-02 06:33:52 +00:00
|
|
|
switch (ctrl->id) {
|
|
|
|
case V4L2_CID_ALPHA_COMPONENT:
|
|
|
|
dev->alpha_component = ctrl->val;
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
if (ctrl == dev->button)
|
|
|
|
dev->button_pressed = 30;
|
|
|
|
break;
|
|
|
|
}
|
2011-01-23 14:33:16 +00:00
|
|
|
return 0;
|
2006-03-10 15:40:10 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/* ------------------------------------------------------------------
|
|
|
|
File operations for the device
|
|
|
|
------------------------------------------------------------------*/
|
|
|
|
|
2011-01-23 14:33:16 +00:00
|
|
|
static const struct v4l2_ctrl_ops vivi_ctrl_ops = {
|
2011-06-07 09:34:41 +00:00
|
|
|
.g_volatile_ctrl = vivi_g_volatile_ctrl,
|
2011-01-23 14:33:16 +00:00
|
|
|
.s_ctrl = vivi_s_ctrl,
|
|
|
|
};
|
|
|
|
|
|
|
|
#define VIVI_CID_CUSTOM_BASE (V4L2_CID_USER_BASE | 0xf000)
|
|
|
|
|
|
|
|
static const struct v4l2_ctrl_config vivi_ctrl_button = {
|
|
|
|
.ops = &vivi_ctrl_ops,
|
|
|
|
.id = VIVI_CID_CUSTOM_BASE + 0,
|
|
|
|
.name = "Button",
|
|
|
|
.type = V4L2_CTRL_TYPE_BUTTON,
|
|
|
|
};
|
|
|
|
|
|
|
|
static const struct v4l2_ctrl_config vivi_ctrl_boolean = {
|
|
|
|
.ops = &vivi_ctrl_ops,
|
|
|
|
.id = VIVI_CID_CUSTOM_BASE + 1,
|
|
|
|
.name = "Boolean",
|
|
|
|
.type = V4L2_CTRL_TYPE_BOOLEAN,
|
|
|
|
.min = 0,
|
|
|
|
.max = 1,
|
|
|
|
.step = 1,
|
|
|
|
.def = 1,
|
|
|
|
};
|
|
|
|
|
|
|
|
static const struct v4l2_ctrl_config vivi_ctrl_int32 = {
|
|
|
|
.ops = &vivi_ctrl_ops,
|
|
|
|
.id = VIVI_CID_CUSTOM_BASE + 2,
|
|
|
|
.name = "Integer 32 Bits",
|
|
|
|
.type = V4L2_CTRL_TYPE_INTEGER,
|
2011-01-11 20:32:28 +00:00
|
|
|
.min = 0x80000000,
|
|
|
|
.max = 0x7fffffff,
|
2011-01-23 14:33:16 +00:00
|
|
|
.step = 1,
|
|
|
|
};
|
|
|
|
|
|
|
|
static const struct v4l2_ctrl_config vivi_ctrl_int64 = {
|
|
|
|
.ops = &vivi_ctrl_ops,
|
|
|
|
.id = VIVI_CID_CUSTOM_BASE + 3,
|
|
|
|
.name = "Integer 64 Bits",
|
|
|
|
.type = V4L2_CTRL_TYPE_INTEGER64,
|
|
|
|
};
|
|
|
|
|
|
|
|
static const char * const vivi_ctrl_menu_strings[] = {
|
|
|
|
"Menu Item 0 (Skipped)",
|
|
|
|
"Menu Item 1",
|
|
|
|
"Menu Item 2 (Skipped)",
|
|
|
|
"Menu Item 3",
|
|
|
|
"Menu Item 4",
|
|
|
|
"Menu Item 5 (Skipped)",
|
|
|
|
NULL,
|
|
|
|
};
|
|
|
|
|
|
|
|
static const struct v4l2_ctrl_config vivi_ctrl_menu = {
|
|
|
|
.ops = &vivi_ctrl_ops,
|
|
|
|
.id = VIVI_CID_CUSTOM_BASE + 4,
|
|
|
|
.name = "Menu",
|
|
|
|
.type = V4L2_CTRL_TYPE_MENU,
|
|
|
|
.min = 1,
|
|
|
|
.max = 4,
|
|
|
|
.def = 3,
|
|
|
|
.menu_skip_mask = 0x04,
|
|
|
|
.qmenu = vivi_ctrl_menu_strings,
|
|
|
|
};
|
|
|
|
|
|
|
|
static const struct v4l2_ctrl_config vivi_ctrl_string = {
|
|
|
|
.ops = &vivi_ctrl_ops,
|
|
|
|
.id = VIVI_CID_CUSTOM_BASE + 5,
|
|
|
|
.name = "String",
|
|
|
|
.type = V4L2_CTRL_TYPE_STRING,
|
|
|
|
.min = 2,
|
|
|
|
.max = 4,
|
|
|
|
.step = 1,
|
|
|
|
};
|
|
|
|
|
2011-03-29 19:33:11 +00:00
|
|
|
static const struct v4l2_ctrl_config vivi_ctrl_bitmask = {
|
|
|
|
.ops = &vivi_ctrl_ops,
|
|
|
|
.id = VIVI_CID_CUSTOM_BASE + 6,
|
|
|
|
.name = "Bitmask",
|
|
|
|
.type = V4L2_CTRL_TYPE_BITMASK,
|
|
|
|
.def = 0x80002000,
|
|
|
|
.min = 0,
|
|
|
|
.max = 0x80402010,
|
|
|
|
.step = 0,
|
|
|
|
};
|
|
|
|
|
2011-08-05 09:38:05 +00:00
|
|
|
static const s64 vivi_ctrl_int_menu_values[] = {
|
|
|
|
1, 1, 2, 3, 5, 8, 13, 21, 42,
|
|
|
|
};
|
|
|
|
|
|
|
|
static const struct v4l2_ctrl_config vivi_ctrl_int_menu = {
|
|
|
|
.ops = &vivi_ctrl_ops,
|
|
|
|
.id = VIVI_CID_CUSTOM_BASE + 7,
|
|
|
|
.name = "Integer menu",
|
|
|
|
.type = V4L2_CTRL_TYPE_INTEGER_MENU,
|
|
|
|
.min = 1,
|
|
|
|
.max = 8,
|
|
|
|
.def = 4,
|
|
|
|
.menu_skip_mask = 0x02,
|
|
|
|
.qmenu_int = vivi_ctrl_int_menu_values,
|
|
|
|
};
|
|
|
|
|
2008-12-30 09:58:20 +00:00
|
|
|
static const struct v4l2_file_operations vivi_fops = {
|
2006-03-10 15:40:10 +00:00
|
|
|
.owner = THIS_MODULE,
|
2011-06-07 13:20:23 +00:00
|
|
|
.open = v4l2_fh_open,
|
2012-06-22 08:53:02 +00:00
|
|
|
.release = vb2_fop_release,
|
|
|
|
.read = vb2_fop_read,
|
|
|
|
.poll = vb2_fop_poll,
|
2010-09-20 21:25:55 +00:00
|
|
|
.unlocked_ioctl = video_ioctl2, /* V4L2 ioctl handler */
|
2012-06-22 08:53:02 +00:00
|
|
|
.mmap = vb2_fop_mmap,
|
2006-03-10 15:40:10 +00:00
|
|
|
};
|
|
|
|
|
2008-07-21 05:57:38 +00:00
|
|
|
static const struct v4l2_ioctl_ops vivi_ioctl_ops = {
|
2006-06-04 13:34:12 +00:00
|
|
|
.vidioc_querycap = vidioc_querycap,
|
2008-05-28 15:16:41 +00:00
|
|
|
.vidioc_enum_fmt_vid_cap = vidioc_enum_fmt_vid_cap,
|
|
|
|
.vidioc_g_fmt_vid_cap = vidioc_g_fmt_vid_cap,
|
|
|
|
.vidioc_try_fmt_vid_cap = vidioc_try_fmt_vid_cap,
|
|
|
|
.vidioc_s_fmt_vid_cap = vidioc_s_fmt_vid_cap,
|
2012-08-06 13:37:18 +00:00
|
|
|
.vidioc_enum_framesizes = vidioc_enum_framesizes,
|
2012-06-22 08:53:02 +00:00
|
|
|
.vidioc_reqbufs = vb2_ioctl_reqbufs,
|
2012-06-22 08:53:31 +00:00
|
|
|
.vidioc_create_bufs = vb2_ioctl_create_bufs,
|
|
|
|
.vidioc_prepare_buf = vb2_ioctl_prepare_buf,
|
2012-06-22 08:53:02 +00:00
|
|
|
.vidioc_querybuf = vb2_ioctl_querybuf,
|
|
|
|
.vidioc_qbuf = vb2_ioctl_qbuf,
|
|
|
|
.vidioc_dqbuf = vb2_ioctl_dqbuf,
|
2006-06-04 13:34:12 +00:00
|
|
|
.vidioc_enum_input = vidioc_enum_input,
|
|
|
|
.vidioc_g_input = vidioc_g_input,
|
|
|
|
.vidioc_s_input = vidioc_s_input,
|
2012-10-23 12:56:59 +00:00
|
|
|
.vidioc_enum_frameintervals = vidioc_enum_frameintervals,
|
|
|
|
.vidioc_g_parm = vidioc_g_parm,
|
|
|
|
.vidioc_s_parm = vidioc_s_parm,
|
2012-06-22 08:53:02 +00:00
|
|
|
.vidioc_streamon = vb2_ioctl_streamon,
|
|
|
|
.vidioc_streamoff = vb2_ioctl_streamoff,
|
2012-02-02 11:20:53 +00:00
|
|
|
.vidioc_log_status = v4l2_ctrl_log_status,
|
2012-01-27 19:21:10 +00:00
|
|
|
.vidioc_subscribe_event = v4l2_ctrl_subscribe_event,
|
2011-06-07 13:20:23 +00:00
|
|
|
.vidioc_unsubscribe_event = v4l2_event_unsubscribe,
|
2008-07-21 05:57:38 +00:00
|
|
|
};
|
|
|
|
|
2012-12-26 15:23:26 +00:00
|
|
|
static const struct video_device vivi_template = {
|
2008-07-21 05:57:38 +00:00
|
|
|
.name = "vivi",
|
|
|
|
.fops = &vivi_fops,
|
|
|
|
.ioctl_ops = &vivi_ioctl_ops,
|
2012-06-09 14:27:43 +00:00
|
|
|
.release = video_device_release_empty,
|
2006-03-10 15:40:10 +00:00
|
|
|
};
|
2009-02-14 16:23:12 +00:00
|
|
|
|
2006-06-04 13:34:12 +00:00
|
|
|
/* -----------------------------------------------------------------
|
2006-03-10 15:40:10 +00:00
|
|
|
Initialization and module stuff
|
|
|
|
------------------------------------------------------------------*/
|
|
|
|
|
2009-02-14 16:23:12 +00:00
|
|
|
static int vivi_release(void)
|
|
|
|
{
|
|
|
|
struct vivi_dev *dev;
|
|
|
|
struct list_head *list;
|
2008-09-03 20:11:53 +00:00
|
|
|
|
2009-02-14 16:23:12 +00:00
|
|
|
while (!list_empty(&vivi_devlist)) {
|
|
|
|
list = vivi_devlist.next;
|
|
|
|
list_del(list);
|
|
|
|
dev = list_entry(list, struct vivi_dev, vivi_devlist);
|
|
|
|
|
2009-11-27 16:57:15 +00:00
|
|
|
v4l2_info(&dev->v4l2_dev, "unregistering %s\n",
|
2012-06-09 14:27:43 +00:00
|
|
|
video_device_node_name(&dev->vdev));
|
|
|
|
video_unregister_device(&dev->vdev);
|
2009-02-14 16:23:12 +00:00
|
|
|
v4l2_device_unregister(&dev->v4l2_dev);
|
2011-01-23 14:33:16 +00:00
|
|
|
v4l2_ctrl_handler_free(&dev->ctrl_handler);
|
2009-02-14 16:23:12 +00:00
|
|
|
kfree(dev);
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2009-02-14 16:43:44 +00:00
|
|
|
static int __init vivi_create_instance(int inst)
|
2006-03-10 15:40:10 +00:00
|
|
|
{
|
|
|
|
struct vivi_dev *dev;
|
2007-12-10 07:07:03 +00:00
|
|
|
struct video_device *vfd;
|
2011-01-23 14:33:16 +00:00
|
|
|
struct v4l2_ctrl_handler *hdl;
|
2011-01-19 15:02:29 +00:00
|
|
|
struct vb2_queue *q;
|
2010-04-10 07:13:53 +00:00
|
|
|
int ret;
|
2006-03-10 15:40:10 +00:00
|
|
|
|
2009-02-14 16:23:12 +00:00
|
|
|
dev = kzalloc(sizeof(*dev), GFP_KERNEL);
|
|
|
|
if (!dev)
|
|
|
|
return -ENOMEM;
|
2008-09-03 20:11:53 +00:00
|
|
|
|
2009-02-14 16:23:12 +00:00
|
|
|
snprintf(dev->v4l2_dev.name, sizeof(dev->v4l2_dev.name),
|
2009-02-14 16:43:44 +00:00
|
|
|
"%s-%03d", VIVI_MODULE_NAME, inst);
|
2009-02-14 16:23:12 +00:00
|
|
|
ret = v4l2_device_register(NULL, &dev->v4l2_dev);
|
|
|
|
if (ret)
|
|
|
|
goto free_dev;
|
2006-03-10 15:40:10 +00:00
|
|
|
|
2010-04-10 07:13:53 +00:00
|
|
|
dev->fmt = &formats[0];
|
2012-10-23 12:56:59 +00:00
|
|
|
dev->timeperframe = tpf_default;
|
2010-04-10 07:13:53 +00:00
|
|
|
dev->width = 640;
|
|
|
|
dev->height = 480;
|
2012-05-02 06:15:11 +00:00
|
|
|
dev->pixelsize = dev->fmt->depth / 8;
|
2011-01-23 14:33:16 +00:00
|
|
|
hdl = &dev->ctrl_handler;
|
|
|
|
v4l2_ctrl_handler_init(hdl, 11);
|
|
|
|
dev->volume = v4l2_ctrl_new_std(hdl, &vivi_ctrl_ops,
|
|
|
|
V4L2_CID_AUDIO_VOLUME, 0, 255, 1, 200);
|
|
|
|
dev->brightness = v4l2_ctrl_new_std(hdl, &vivi_ctrl_ops,
|
|
|
|
V4L2_CID_BRIGHTNESS, 0, 255, 1, 127);
|
|
|
|
dev->contrast = v4l2_ctrl_new_std(hdl, &vivi_ctrl_ops,
|
|
|
|
V4L2_CID_CONTRAST, 0, 255, 1, 16);
|
|
|
|
dev->saturation = v4l2_ctrl_new_std(hdl, &vivi_ctrl_ops,
|
|
|
|
V4L2_CID_SATURATION, 0, 255, 1, 127);
|
|
|
|
dev->hue = v4l2_ctrl_new_std(hdl, &vivi_ctrl_ops,
|
|
|
|
V4L2_CID_HUE, -128, 127, 1, 0);
|
2011-06-07 09:34:41 +00:00
|
|
|
dev->autogain = v4l2_ctrl_new_std(hdl, &vivi_ctrl_ops,
|
|
|
|
V4L2_CID_AUTOGAIN, 0, 1, 1, 1);
|
|
|
|
dev->gain = v4l2_ctrl_new_std(hdl, &vivi_ctrl_ops,
|
|
|
|
V4L2_CID_GAIN, 0, 255, 1, 100);
|
2012-05-02 06:33:52 +00:00
|
|
|
dev->alpha = v4l2_ctrl_new_std(hdl, &vivi_ctrl_ops,
|
|
|
|
V4L2_CID_ALPHA_COMPONENT, 0, 255, 1, 0);
|
2011-01-23 14:33:16 +00:00
|
|
|
dev->button = v4l2_ctrl_new_custom(hdl, &vivi_ctrl_button, NULL);
|
|
|
|
dev->int32 = v4l2_ctrl_new_custom(hdl, &vivi_ctrl_int32, NULL);
|
|
|
|
dev->int64 = v4l2_ctrl_new_custom(hdl, &vivi_ctrl_int64, NULL);
|
|
|
|
dev->boolean = v4l2_ctrl_new_custom(hdl, &vivi_ctrl_boolean, NULL);
|
|
|
|
dev->menu = v4l2_ctrl_new_custom(hdl, &vivi_ctrl_menu, NULL);
|
|
|
|
dev->string = v4l2_ctrl_new_custom(hdl, &vivi_ctrl_string, NULL);
|
2011-03-29 19:33:11 +00:00
|
|
|
dev->bitmask = v4l2_ctrl_new_custom(hdl, &vivi_ctrl_bitmask, NULL);
|
2011-08-05 09:38:05 +00:00
|
|
|
dev->int_menu = v4l2_ctrl_new_custom(hdl, &vivi_ctrl_int_menu, NULL);
|
2011-01-23 14:33:16 +00:00
|
|
|
if (hdl->error) {
|
|
|
|
ret = hdl->error;
|
|
|
|
goto unreg_dev;
|
|
|
|
}
|
2011-06-07 09:34:41 +00:00
|
|
|
v4l2_ctrl_auto_cluster(2, &dev->autogain, 0, true);
|
2011-01-23 14:33:16 +00:00
|
|
|
dev->v4l2_dev.ctrl_handler = hdl;
|
2010-04-10 07:13:53 +00:00
|
|
|
|
2010-09-20 21:25:55 +00:00
|
|
|
/* initialize locks */
|
|
|
|
spin_lock_init(&dev->slock);
|
|
|
|
|
2011-01-19 15:02:29 +00:00
|
|
|
/* initialize queue */
|
|
|
|
q = &dev->vb_vidq;
|
|
|
|
q->type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
|
2012-06-14 13:37:47 +00:00
|
|
|
q->io_modes = VB2_MMAP | VB2_USERPTR | VB2_DMABUF | VB2_READ;
|
2011-01-19 15:02:29 +00:00
|
|
|
q->drv_priv = dev;
|
|
|
|
q->buf_struct_size = sizeof(struct vivi_buffer);
|
|
|
|
q->ops = &vivi_video_qops;
|
|
|
|
q->mem_ops = &vb2_vmalloc_memops;
|
2014-02-25 22:12:19 +00:00
|
|
|
q->timestamp_flags = V4L2_BUF_FLAG_TIMESTAMP_MONOTONIC;
|
2011-01-19 15:02:29 +00:00
|
|
|
|
2012-09-17 12:49:38 +00:00
|
|
|
ret = vb2_queue_init(q);
|
|
|
|
if (ret)
|
|
|
|
goto unreg_dev;
|
2011-01-19 15:02:29 +00:00
|
|
|
|
|
|
|
mutex_init(&dev->mutex);
|
2010-04-10 07:13:53 +00:00
|
|
|
|
2009-02-14 16:23:12 +00:00
|
|
|
/* init video dma queues */
|
|
|
|
INIT_LIST_HEAD(&dev->vidq.active);
|
|
|
|
init_waitqueue_head(&dev->vidq.wq);
|
2006-03-10 15:40:10 +00:00
|
|
|
|
2012-06-09 14:27:43 +00:00
|
|
|
vfd = &dev->vdev;
|
2009-02-14 16:23:12 +00:00
|
|
|
*vfd = vivi_template;
|
2009-06-25 19:28:23 +00:00
|
|
|
vfd->debug = debug;
|
2010-04-10 07:13:53 +00:00
|
|
|
vfd->v4l2_dev = &dev->v4l2_dev;
|
2012-06-22 08:53:02 +00:00
|
|
|
vfd->queue = q;
|
2011-03-22 13:14:07 +00:00
|
|
|
set_bit(V4L2_FL_USE_FH_PRIO, &vfd->flags);
|
2011-01-19 15:02:29 +00:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Provide a mutex to v4l2 core. It will be used to protect
|
|
|
|
* all fops and v4l2 ioctls.
|
|
|
|
*/
|
2010-09-20 21:25:55 +00:00
|
|
|
vfd->lock = &dev->mutex;
|
2012-06-09 14:27:43 +00:00
|
|
|
video_set_drvdata(vfd, dev);
|
2007-12-10 07:38:11 +00:00
|
|
|
|
2009-02-14 16:23:12 +00:00
|
|
|
ret = video_register_device(vfd, VFL_TYPE_GRABBER, video_nr);
|
|
|
|
if (ret < 0)
|
2012-06-09 14:27:43 +00:00
|
|
|
goto unreg_dev;
|
2008-09-03 20:11:53 +00:00
|
|
|
|
2009-02-14 16:23:12 +00:00
|
|
|
/* Now that everything is fine, let's add it to device list */
|
|
|
|
list_add_tail(&dev->vivi_devlist, &vivi_devlist);
|
2008-09-03 20:11:53 +00:00
|
|
|
|
2009-11-27 16:57:15 +00:00
|
|
|
v4l2_info(&dev->v4l2_dev, "V4L2 device registered as %s\n",
|
|
|
|
video_device_node_name(vfd));
|
2009-02-14 16:23:12 +00:00
|
|
|
return 0;
|
|
|
|
|
|
|
|
unreg_dev:
|
2011-01-23 14:33:16 +00:00
|
|
|
v4l2_ctrl_handler_free(hdl);
|
2009-02-14 16:23:12 +00:00
|
|
|
v4l2_device_unregister(&dev->v4l2_dev);
|
|
|
|
free_dev:
|
|
|
|
kfree(dev);
|
|
|
|
return ret;
|
|
|
|
}
|
2007-12-10 07:07:03 +00:00
|
|
|
|
2009-02-14 16:23:12 +00:00
|
|
|
/* This routine allocates from 1 to n_devs virtual drivers.
|
|
|
|
|
|
|
|
The real maximum number of virtual drivers will depend on how many drivers
|
|
|
|
will succeed. This is limited to the maximum number of devices that
|
|
|
|
videodev supports, which is equal to VIDEO_NUM_DEVICES.
|
|
|
|
*/
|
|
|
|
static int __init vivi_init(void)
|
|
|
|
{
|
2010-04-10 07:13:53 +00:00
|
|
|
const struct font_desc *font = find_font("VGA8x16");
|
2009-03-06 12:58:12 +00:00
|
|
|
int ret = 0, i;
|
2009-02-14 16:23:12 +00:00
|
|
|
|
2010-04-10 07:13:53 +00:00
|
|
|
if (font == NULL) {
|
|
|
|
printk(KERN_ERR "vivi: could not find font\n");
|
|
|
|
return -ENODEV;
|
|
|
|
}
|
|
|
|
font8x16 = font->data;
|
|
|
|
|
2009-02-14 16:23:12 +00:00
|
|
|
if (n_devs <= 0)
|
|
|
|
n_devs = 1;
|
|
|
|
|
|
|
|
for (i = 0; i < n_devs; i++) {
|
|
|
|
ret = vivi_create_instance(i);
|
|
|
|
if (ret) {
|
|
|
|
/* If some instantiations succeeded, keep driver */
|
|
|
|
if (i)
|
|
|
|
ret = 0;
|
|
|
|
break;
|
|
|
|
}
|
2007-12-10 07:38:11 +00:00
|
|
|
}
|
2007-12-10 07:07:03 +00:00
|
|
|
|
2007-12-10 07:38:11 +00:00
|
|
|
if (ret < 0) {
|
2010-04-10 07:13:53 +00:00
|
|
|
printk(KERN_ERR "vivi: error %d while loading driver\n", ret);
|
2009-02-14 16:23:12 +00:00
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
printk(KERN_INFO "Video Technology Magazine Virtual Video "
|
[media] Stop using linux/version.h on most video drivers
All the modified drivers didn't have any version increment since
Jan, 1 2011. Several of them didn't have any version increment
for a long time, even having new features and important bug fixes
happening.
As we're now filling the QUERYCAP version with the current Kernel
Release, we don't need to maintain a per-driver version control
anymore. So, let's just use the default.
In order to preserve the Kernel module version history, a
KERNEL_VERSION() macro were added to all modified drivers, and
the extraver number were incremented.
I opted to preserve the per-driver version control to a few
pwc, pvrusb2, s2255, s5p-fimc and sh_vou.
A few drivers are still using the legacy way to handle ioctl's.
So, we can't do such change on them, otherwise, they'll break.
Those are: uvc, et61x251 and sn9c102.
The rationale is that the per-driver version control seems to be
actively maintained on those.
Yet, I think that the better for them would be to just use the
default version numbering, instead of doing that by themselves.
While here, removed a few uneeded include linux/version.h
Acked-by: Hans Verkuil <hans.verkuil@cisco.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
2011-06-24 17:45:49 +00:00
|
|
|
"Capture Board ver %s successfully loaded.\n",
|
|
|
|
VIVI_VERSION);
|
2008-09-03 20:11:53 +00:00
|
|
|
|
2009-02-14 16:23:12 +00:00
|
|
|
/* n_devs will reflect the actual number of allocated devices */
|
|
|
|
n_devs = i;
|
2008-09-03 20:11:53 +00:00
|
|
|
|
2006-03-10 15:40:10 +00:00
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void __exit vivi_exit(void)
|
|
|
|
{
|
2007-12-10 07:38:11 +00:00
|
|
|
vivi_release();
|
2006-03-10 15:40:10 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
module_init(vivi_init);
|
|
|
|
module_exit(vivi_exit);
|