Skip to content

Commit

Permalink
udl-kms: avoid division
Browse files Browse the repository at this point in the history
commit 91ba11f upstream.

Division is slow, so it shouldn't be done by the pixel generating code.
The driver supports only 2 or 4 bytes per pixel, so we can replace
division with a shift.

Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
Cc: stable@vger.kernel.org
Signed-off-by: Dave Airlie <airlied@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
  • Loading branch information
Mikulas Patocka authored and Greg Kroah-Hartman committed Sep 5, 2018
1 parent c0357c1 commit f337a54
Show file tree
Hide file tree
Showing 3 changed files with 30 additions and 26 deletions.
2 changes: 1 addition & 1 deletion drivers/gpu/drm/udl/udl_drv.h
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,7 @@ udl_fb_user_fb_create(struct drm_device *dev,
struct drm_file *file,
const struct drm_mode_fb_cmd2 *mode_cmd);

int udl_render_hline(struct drm_device *dev, int bpp, struct urb **urb_ptr,
int udl_render_hline(struct drm_device *dev, int log_bpp, struct urb **urb_ptr,
const char *front, char **urb_buf_ptr,
u32 byte_offset, u32 device_byte_offset, u32 byte_width,
int *ident_ptr, int *sent_ptr);
Expand Down
15 changes: 9 additions & 6 deletions drivers/gpu/drm/udl/udl_fb.c
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,10 @@ int udl_handle_damage(struct udl_framebuffer *fb, int x, int y,
int bytes_identical = 0;
struct urb *urb;
int aligned_x;
int bpp = fb->base.format->cpp[0];
int log_bpp;

BUG_ON(!is_power_of_2(fb->base.format->cpp[0]));
log_bpp = __ffs(fb->base.format->cpp[0]);

if (!fb->active_16)
return 0;
Expand Down Expand Up @@ -125,12 +128,12 @@ int udl_handle_damage(struct udl_framebuffer *fb, int x, int y,

for (i = y; i < y + height ; i++) {
const int line_offset = fb->base.pitches[0] * i;
const int byte_offset = line_offset + (x * bpp);
const int dev_byte_offset = (fb->base.width * bpp * i) + (x * bpp);
if (udl_render_hline(dev, bpp, &urb,
const int byte_offset = line_offset + (x << log_bpp);
const int dev_byte_offset = (fb->base.width * i + x) << log_bpp;
if (udl_render_hline(dev, log_bpp, &urb,
(char *) fb->obj->vmapping,
&cmd, byte_offset, dev_byte_offset,
width * bpp,
width << log_bpp,
&bytes_identical, &bytes_sent))
goto error;
}
Expand All @@ -149,7 +152,7 @@ int udl_handle_damage(struct udl_framebuffer *fb, int x, int y,
error:
atomic_add(bytes_sent, &udl->bytes_sent);
atomic_add(bytes_identical, &udl->bytes_identical);
atomic_add(width*height*bpp, &udl->bytes_rendered);
atomic_add((width * height) << log_bpp, &udl->bytes_rendered);
end_cycles = get_cycles();
atomic_add(((unsigned int) ((end_cycles - start_cycles)
>> 10)), /* Kcycles */
Expand Down
39 changes: 20 additions & 19 deletions drivers/gpu/drm/udl/udl_transfer.c
Original file line number Diff line number Diff line change
Expand Up @@ -83,12 +83,12 @@ static inline u16 pixel32_to_be16(const uint32_t pixel)
((pixel >> 8) & 0xf800));
}

static inline u16 get_pixel_val16(const uint8_t *pixel, int bpp)
static inline u16 get_pixel_val16(const uint8_t *pixel, int log_bpp)
{
u16 pixel_val16 = 0;
if (bpp == 2)
u16 pixel_val16;
if (log_bpp == 1)
pixel_val16 = *(const uint16_t *)pixel;
else if (bpp == 4)
else
pixel_val16 = pixel32_to_be16(*(const uint32_t *)pixel);
return pixel_val16;
}
Expand Down Expand Up @@ -125,8 +125,9 @@ static void udl_compress_hline16(
const u8 *const pixel_end,
uint32_t *device_address_ptr,
uint8_t **command_buffer_ptr,
const uint8_t *const cmd_buffer_end, int bpp)
const uint8_t *const cmd_buffer_end, int log_bpp)
{
const int bpp = 1 << log_bpp;
const u8 *pixel = *pixel_start_ptr;
uint32_t dev_addr = *device_address_ptr;
uint8_t *cmd = *command_buffer_ptr;
Expand All @@ -153,12 +154,12 @@ static void udl_compress_hline16(
raw_pixels_count_byte = cmd++; /* we'll know this later */
raw_pixel_start = pixel;

cmd_pixel_end = pixel + min3(MAX_CMD_PIXELS + 1UL,
(unsigned long)(pixel_end - pixel) / bpp,
(unsigned long)(cmd_buffer_end - 1 - cmd) / 2) * bpp;
cmd_pixel_end = pixel + (min3(MAX_CMD_PIXELS + 1UL,
(unsigned long)(pixel_end - pixel) >> log_bpp,
(unsigned long)(cmd_buffer_end - 1 - cmd) / 2) << log_bpp);

prefetch_range((void *) pixel, cmd_pixel_end - pixel);
pixel_val16 = get_pixel_val16(pixel, bpp);
pixel_val16 = get_pixel_val16(pixel, log_bpp);

while (pixel < cmd_pixel_end) {
const u8 *const start = pixel;
Expand All @@ -170,7 +171,7 @@ static void udl_compress_hline16(
pixel += bpp;

while (pixel < cmd_pixel_end) {
pixel_val16 = get_pixel_val16(pixel, bpp);
pixel_val16 = get_pixel_val16(pixel, log_bpp);
if (pixel_val16 != repeating_pixel_val16)
break;
pixel += bpp;
Expand All @@ -179,10 +180,10 @@ static void udl_compress_hline16(
if (unlikely(pixel > start + bpp)) {
/* go back and fill in raw pixel count */
*raw_pixels_count_byte = (((start -
raw_pixel_start) / bpp) + 1) & 0xFF;
raw_pixel_start) >> log_bpp) + 1) & 0xFF;

/* immediately after raw data is repeat byte */
*cmd++ = (((pixel - start) / bpp) - 1) & 0xFF;
*cmd++ = (((pixel - start) >> log_bpp) - 1) & 0xFF;

/* Then start another raw pixel span */
raw_pixel_start = pixel;
Expand All @@ -192,14 +193,14 @@ static void udl_compress_hline16(

if (pixel > raw_pixel_start) {
/* finalize last RAW span */
*raw_pixels_count_byte = ((pixel-raw_pixel_start) / bpp) & 0xFF;
*raw_pixels_count_byte = ((pixel - raw_pixel_start) >> log_bpp) & 0xFF;
} else {
/* undo unused byte */
cmd--;
}

*cmd_pixels_count_byte = ((pixel - cmd_pixel_start) / bpp) & 0xFF;
dev_addr += ((pixel - cmd_pixel_start) / bpp) * 2;
*cmd_pixels_count_byte = ((pixel - cmd_pixel_start) >> log_bpp) & 0xFF;
dev_addr += ((pixel - cmd_pixel_start) >> log_bpp) * 2;
}

if (cmd_buffer_end <= MIN_RLX_CMD_BYTES + cmd) {
Expand All @@ -222,19 +223,19 @@ static void udl_compress_hline16(
* (that we can only write to, slowly, and can never read), and (optionally)
* our shadow copy that tracks what's been sent to that hardware buffer.
*/
int udl_render_hline(struct drm_device *dev, int bpp, struct urb **urb_ptr,
int udl_render_hline(struct drm_device *dev, int log_bpp, struct urb **urb_ptr,
const char *front, char **urb_buf_ptr,
u32 byte_offset, u32 device_byte_offset,
u32 byte_width,
int *ident_ptr, int *sent_ptr)
{
const u8 *line_start, *line_end, *next_pixel;
u32 base16 = 0 + (device_byte_offset / bpp) * 2;
u32 base16 = 0 + (device_byte_offset >> log_bpp) * 2;
struct urb *urb = *urb_ptr;
u8 *cmd = *urb_buf_ptr;
u8 *cmd_end = (u8 *) urb->transfer_buffer + urb->transfer_buffer_length;

BUG_ON(!(bpp == 2 || bpp == 4));
BUG_ON(!(log_bpp == 1 || log_bpp == 2));

line_start = (u8 *) (front + byte_offset);
next_pixel = line_start;
Expand All @@ -244,7 +245,7 @@ int udl_render_hline(struct drm_device *dev, int bpp, struct urb **urb_ptr,

udl_compress_hline16(&next_pixel,
line_end, &base16,
(u8 **) &cmd, (u8 *) cmd_end, bpp);
(u8 **) &cmd, (u8 *) cmd_end, log_bpp);

if (cmd >= cmd_end) {
int len = cmd - (u8 *) urb->transfer_buffer;
Expand Down

0 comments on commit f337a54

Please sign in to comment.