diff --git a/Makefile.am b/Makefile.am index 418fdc92..de5fbe12 100644 --- a/Makefile.am +++ b/Makefile.am @@ -18,14 +18,16 @@ # IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -ACLOCAL_AMFLAGS = ${ACLOCAL_FLAGS} -I m4 +#Having problems passing through user flags as libtool complains +#ACLOCAL_AMFLAGS = ${ACLOCAL_FLAGS} -I m4 +ACLOCAL_AMFLAGS = -I m4 SUBDIRS = man libobj xvmc src tools MAINTAINERCLEANFILES = ChangeLog INSTALL if HAVE_X11 -SUBDIRS += test +SUBDIRS += test benchmarks endif .PHONY: ChangeLog INSTALL diff --git a/NEWS b/NEWS index 604b9cce..0e200332 100644 --- a/NEWS +++ b/NEWS @@ -21,7 +21,7 @@ should make one more snapshot before an imminent release. Before kernel 3.19, O_NONBLOCK support is broken and so we must avoid reading if we are not expecting an event. - * Backwards compatibilty fix for fake triple buffering with PRIME and + * Backwards compatibility fix for fake triple buffering with PRIME and Xorg-1.15 https://bugs.freedesktop.org/show_bug.cgi?id=85144#c12 @@ -51,7 +51,7 @@ should make one more snapshot before an imminent release. Snapshot 2.99.916 (2014-09-08) ============================== Quick update for MST in UXA - we need to hook up the RandR outputs for -dynamicaly added connectors. +dynamically added connectors. Snapshot 2.99.915 (2014-09-08) @@ -503,7 +503,7 @@ release. backlight property is queried whilst the connector is disabled https://bugs.freedesktop.org/show_bug.cgi?id=70406 - * Pad GETCONNECTOR ioctl for compatability between 32/64-bit userspace + * Pad GETCONNECTOR ioctl for compatibility between 32/64-bit userspace and kernel * Handle long glyph runs correctly @@ -523,7 +523,7 @@ snapshot beforehand to push out the bug fixes from the last week. * Fix video output using sprites when changing the image size - * Apply more restrictive tile constaints for 915g class devices + * Apply more restrictive tile constraints for 915g class devices https://bugs.launchpad.net/ubuntu/+source/xserver-xorg-video-intel/+bug/1232546 * Ensure all overlapping rectangles are drawn for XRenderFillRectangles @@ -1132,7 +1132,7 @@ operation. * Explicitly prevent ring-switching for synchronized rendering to scanouts (for vsync). - * Clip dirty region to slave pixmaps (otherwise UDL is nigh unusuable) + * Clip dirty region to slave pixmaps (otherwise UDL is nigh unusable) https://bugs.freedesktop.org/show_bug.cgi?id=59539 @@ -1226,7 +1226,7 @@ Release 2.20.15 (2012-12-03) ============================ And lo, enabling more of the common acceleration paths for gen4 revealed another lurking bug - something is wrong with how we prepare Y-tiling -surfaces for rendering. For the time being, we can surreptiously disable +surfaces for rendering. For the time being, we can surreptitiously disable them for gen4 and avoid hitting GPU hangs. * Avoid clobbering the render state after failing to convert the @@ -1515,7 +1515,7 @@ Release 2.20.5 (2012-08-26) Another silly bug found, another small bugfix release. The goal was for the driver to bind to all Intel devices supported by the kernel. Unfortunately we were too successful and started claiming Pouslbo, -Medfield and Cedarview devices which are still encumbered by propietary +Medfield and Cedarview devices which are still encumbered by proprietary IP and not supported by this driver. Bugs fixed since 2.20.4: diff --git a/README b/README index cf4d88d8..348983b4 100644 --- a/README +++ b/README @@ -15,9 +15,9 @@ Intel graphics chipsets including: G/Q33,G/Q35,G41,G/Q43,G/GM/Q45 PineView-M (Atom N400 series) PineView-D (Atom D400/D500 series) - Intel(R) HD Graphics: 2000-6000, - Intel(R) Iris(TM) Graphics: 5100/6100, and - Intel(R) Iris(TM) Pro Graphics: 5200/6200/P6300. + Intel(R) HD Graphics, + Intel(R) Iris(TM) Graphics, + Intel(R) Iris(TM) Pro Graphics. Where to get more information about the driver ---------------------------------------------- diff --git a/benchmarks/.gitignore b/benchmarks/.gitignore new file mode 100644 index 00000000..301c0129 --- /dev/null +++ b/benchmarks/.gitignore @@ -0,0 +1,2 @@ +dri2-swap +dri3-swap diff --git a/benchmarks/Makefile.am b/benchmarks/Makefile.am new file mode 100644 index 00000000..4976e8a3 --- /dev/null +++ b/benchmarks/Makefile.am @@ -0,0 +1,14 @@ +AM_CFLAGS = @CWARNFLAGS@ $(X11_CFLAGS) $(DRM_CFLAGS) +LDADD = $(X11_LIBS) $(DRM_LIBS) $(CLOCK_GETTIME_LIBS) + +check_PROGRAMS = + +if DRI2 +check_PROGRAMS += dri2-swap +endif + +if DRI3 +check_PROGRAMS += dri3-swap +AM_CFLAGS += $(X11_DRI3_CFLAGS) +LDADD += $(X11_DRI3_LIBS) +endif diff --git a/benchmarks/dri2-swap.c b/benchmarks/dri2-swap.c new file mode 100644 index 00000000..3d9d30aa --- /dev/null +++ b/benchmarks/dri2-swap.c @@ -0,0 +1,588 @@ +/* + * Copyright (c) 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +static char dri2ExtensionName[] = DRI2_NAME; +static XExtensionInfo *dri2Info; +static XEXT_GENERATE_CLOSE_DISPLAY (DRI2CloseDisplay, dri2Info) + +static Bool +DRI2WireToEvent(Display *dpy, XEvent *event, xEvent *wire); +static Status +DRI2EventToWire(Display *dpy, XEvent *event, xEvent *wire); +static int +DRI2Error(Display *display, xError *err, XExtCodes *codes, int *ret_code); + +static /* const */ XExtensionHooks dri2ExtensionHooks = { + NULL, /* create_gc */ + NULL, /* copy_gc */ + NULL, /* flush_gc */ + NULL, /* free_gc */ + NULL, /* create_font */ + NULL, /* free_font */ + DRI2CloseDisplay, /* close_display */ + DRI2WireToEvent, /* wire_to_event */ + DRI2EventToWire, /* event_to_wire */ + DRI2Error, /* error */ + NULL, /* error_string */ +}; + +static XEXT_GENERATE_FIND_DISPLAY (DRI2FindDisplay, + dri2Info, + dri2ExtensionName, + &dri2ExtensionHooks, + 0, NULL) + +static Bool +DRI2WireToEvent(Display *dpy, XEvent *event, xEvent *wire) +{ + XExtDisplayInfo *info = DRI2FindDisplay(dpy); + + XextCheckExtension(dpy, info, dri2ExtensionName, False); + + switch ((wire->u.u.type & 0x7f) - info->codes->first_event) { +#ifdef X_DRI2SwapBuffers + case DRI2_BufferSwapComplete: + return False; +#endif +#ifdef DRI2_InvalidateBuffers + case DRI2_InvalidateBuffers: + return False; +#endif + default: + /* client doesn't support server event */ + break; + } + + return False; +} + +/* We don't actually support this. It doesn't make sense for clients to + * send each other DRI2 events. + */ +static Status +DRI2EventToWire(Display *dpy, XEvent *event, xEvent *wire) +{ + XExtDisplayInfo *info = DRI2FindDisplay(dpy); + + XextCheckExtension(dpy, info, dri2ExtensionName, False); + + switch (event->type) { + default: + /* client doesn't support server event */ + break; + } + + return Success; +} + +static int +DRI2Error(Display *display, xError *err, XExtCodes *codes, int *ret_code) +{ + if (err->majorCode == codes->major_opcode && + err->errorCode == BadDrawable && + err->minorCode == X_DRI2CopyRegion) + return True; + + /* If the X drawable was destroyed before the GLX drawable, the + * DRI2 drawble will be gone by the time we call + * DRI2DestroyDrawable. So just ignore BadDrawable here. */ + if (err->majorCode == codes->major_opcode && + err->errorCode == BadDrawable && + err->minorCode == X_DRI2DestroyDrawable) + return True; + + /* If the server is non-local DRI2Connect will raise BadRequest. + * Swallow this so that DRI2Connect can signal this in its return code */ + if (err->majorCode == codes->major_opcode && + err->minorCode == X_DRI2Connect && + err->errorCode == BadRequest) { + *ret_code = False; + return True; + } + + return False; +} + +static Bool +DRI2QueryExtension(Display * dpy, int *eventBase, int *errorBase) +{ + XExtDisplayInfo *info = DRI2FindDisplay(dpy); + + if (XextHasExtension(info)) { + *eventBase = info->codes->first_event; + *errorBase = info->codes->first_error; + return True; + } + + return False; +} + +static Bool +DRI2Connect(Display * dpy, XID window, char **driverName, char **deviceName) +{ + XExtDisplayInfo *info = DRI2FindDisplay(dpy); + xDRI2ConnectReply rep; + xDRI2ConnectReq *req; + + XextCheckExtension(dpy, info, dri2ExtensionName, False); + + LockDisplay(dpy); + GetReq(DRI2Connect, req); + req->reqType = info->codes->major_opcode; + req->dri2ReqType = X_DRI2Connect; + req->window = window; + req->driverType = DRI2DriverDRI; + if (!_XReply(dpy, (xReply *) & rep, 0, xFalse)) { + UnlockDisplay(dpy); + SyncHandle(); + return False; + } + + if (rep.driverNameLength == 0 && rep.deviceNameLength == 0) { + UnlockDisplay(dpy); + SyncHandle(); + return False; + } + + *driverName = Xmalloc(rep.driverNameLength + 1); + if (*driverName == NULL) { + _XEatData(dpy, + ((rep.driverNameLength + 3) & ~3) + + ((rep.deviceNameLength + 3) & ~3)); + UnlockDisplay(dpy); + SyncHandle(); + return False; + } + _XReadPad(dpy, *driverName, rep.driverNameLength); + (*driverName)[rep.driverNameLength] = '\0'; + + *deviceName = Xmalloc(rep.deviceNameLength + 1); + if (*deviceName == NULL) { + Xfree(*driverName); + _XEatData(dpy, ((rep.deviceNameLength + 3) & ~3)); + UnlockDisplay(dpy); + SyncHandle(); + return False; + } + _XReadPad(dpy, *deviceName, rep.deviceNameLength); + (*deviceName)[rep.deviceNameLength] = '\0'; + + UnlockDisplay(dpy); + SyncHandle(); + + return True; +} + +static Bool +DRI2Authenticate(Display * dpy, XID window, unsigned int magic) +{ + XExtDisplayInfo *info = DRI2FindDisplay(dpy); + xDRI2AuthenticateReq *req; + xDRI2AuthenticateReply rep; + + XextCheckExtension(dpy, info, dri2ExtensionName, False); + + LockDisplay(dpy); + GetReq(DRI2Authenticate, req); + req->reqType = info->codes->major_opcode; + req->dri2ReqType = X_DRI2Authenticate; + req->window = window; + req->magic = magic; + + if (!_XReply(dpy, (xReply *) & rep, 0, xFalse)) { + UnlockDisplay(dpy); + SyncHandle(); + return False; + } + + UnlockDisplay(dpy); + SyncHandle(); + + return rep.authenticated; +} + +static void +DRI2CreateDrawable(Display * dpy, XID drawable) +{ + XExtDisplayInfo *info = DRI2FindDisplay(dpy); + xDRI2CreateDrawableReq *req; + + XextSimpleCheckExtension(dpy, info, dri2ExtensionName); + + LockDisplay(dpy); + GetReq(DRI2CreateDrawable, req); + req->reqType = info->codes->major_opcode; + req->dri2ReqType = X_DRI2CreateDrawable; + req->drawable = drawable; + UnlockDisplay(dpy); + SyncHandle(); +} + +static void DRI2SwapInterval(Display *dpy, XID drawable, int interval) +{ + XExtDisplayInfo *info = DRI2FindDisplay(dpy); + xDRI2SwapIntervalReq *req; + + XextSimpleCheckExtension (dpy, info, dri2ExtensionName); + + LockDisplay(dpy); + GetReq(DRI2SwapInterval, req); + req->reqType = info->codes->major_opcode; + req->dri2ReqType = X_DRI2SwapInterval; + req->drawable = drawable; + req->interval = interval; + UnlockDisplay(dpy); + SyncHandle(); +} + +static int _x_error_occurred; + +static int +_check_error_handler(Display *display, + XErrorEvent *event) +{ + fprintf(stderr, + "X11 error from display %s, serial=%ld, error=%d, req=%d.%d\n", + DisplayString(display), + event->serial, + event->error_code, + event->request_code, + event->minor_code); + _x_error_occurred++; + return False; /* ignored */ +} + +static double elapsed(const struct timespec *start, + const struct timespec *end) +{ + return 1e6*(end->tv_sec - start->tv_sec) + (end->tv_nsec - start->tv_nsec)/1000; +} + +static void run(Display *dpy, Window win) +{ + xcb_connection_t *c = XGetXCBConnection(dpy); + struct timespec start, end; + int n, completed = 0; + + clock_gettime(CLOCK_MONOTONIC, &start); + do { + for (n = 0; n < 1000; n++) { + unsigned int attachments[] = { DRI2BufferBackLeft }; + unsigned int seq[2]; + + seq[0] = xcb_dri2_swap_buffers_unchecked(c, win, + 0, 0, 0, 0, 0, 0).sequence; + + + seq[1] = xcb_dri2_get_buffers_unchecked(c, win, + 1, 1, attachments).sequence; + + xcb_flush(c); + xcb_discard_reply(c, seq[0]); + xcb_discard_reply(c, seq[1]); + completed++; + } + clock_gettime(CLOCK_MONOTONIC, &end); + } while (end.tv_sec < start.tv_sec + 10); + + printf("%f\n", completed / (elapsed(&start, &end) / 1000000)); +} + +static inline XRRScreenResources *_XRRGetScreenResourcesCurrent(Display *dpy, Window window) +{ + XRRScreenResources *res; + + res = XRRGetScreenResourcesCurrent(dpy, window); + if (res == NULL) + res = XRRGetScreenResources(dpy, window); + + return res; +} + +static XRRModeInfo *lookup_mode(XRRScreenResources *res, int id) +{ + int i; + + for (i = 0; i < res->nmode; i++) { + if (res->modes[i].id == id) + return &res->modes[i]; + } + + return NULL; +} + +static int dri2_open(Display *dpy) +{ + drm_auth_t auth; + char *driver, *device; + int fd; + + if (!DRI2QueryExtension(dpy, &fd, &fd)) + return -1; + + if (!DRI2Connect(dpy, DefaultRootWindow(dpy), &driver, &device)) + return -1; + + fd = open(device, O_RDWR); + if (fd < 0) + return -1; + + if (drmIoctl(fd, DRM_IOCTL_GET_MAGIC, &auth)) + return -1; + + if (!DRI2Authenticate(dpy, DefaultRootWindow(dpy), auth.magic)) + return -1; + + return fd; +} + +static void fullscreen(Display *dpy, Window win) +{ + Atom atom = XInternAtom(dpy, "_NET_WM_STATE_FULLSCREEN", False); + XChangeProperty(dpy, win, + XInternAtom(dpy, "_NET_WM_STATE", False), + XA_ATOM, 32, PropModeReplace, + (unsigned char *)&atom, 1); +} + +static int has_composite(Display *dpy) +{ + int event, error; + int major, minor; + + if (!XDamageQueryExtension (dpy, &event, &error)) + return 0; + + if (!XCompositeQueryExtension(dpy, &event, &error)) + return 0; + + XCompositeQueryVersion(dpy, &major, &minor); + + return major > 0 || minor >= 4; +} + +int main(int argc, char **argv) +{ + Display *dpy; + Window root, win; + XRRScreenResources *res; + XRRCrtcInfo **original_crtc; + XSetWindowAttributes attr; + enum window { ROOT, FULLSCREEN, WINDOW } w = FULLSCREEN; + enum visible {REDIRECTED, NORMAL } v = NORMAL; + enum display { OFF, ON } d = OFF; + int width, height; + int i, fd; + int c; + + while ((c = getopt(argc, argv, "d:v:w:")) != -1) { + switch (c) { + case 'd': + if (strcmp(optarg, "off") == 0) + d = OFF; + else if (strcmp(optarg, "on") == 0) + d = ON; + else + abort(); + break; + + case 'v': + if (strcmp(optarg, "redirected") == 0) + v = REDIRECTED; + else if (strcmp(optarg, "normal") == 0) + v = NORMAL; + else + abort(); + break; + + case 'w': + if (strcmp(optarg, "fullscreen") == 0) + w = FULLSCREEN; + else if (strcmp(optarg, "window") == 0) + w = WINDOW; + else if (strcmp(optarg, "root") == 0) + w = ROOT; + else + abort(); + break; + } + } + + attr.override_redirect = 1; + + dpy = XOpenDisplay(NULL); + if (dpy == NULL) + return 77; + + width = DisplayWidth(dpy, DefaultScreen(dpy)); + height = DisplayHeight(dpy, DefaultScreen(dpy)); + + fd = dri2_open(dpy); + if (fd < 0) + return 77; + + if (DPMSQueryExtension(dpy, &i, &i)) + DPMSDisable(dpy); + + root = DefaultRootWindow(dpy); + + signal(SIGALRM, SIG_IGN); + XSetErrorHandler(_check_error_handler); + + res = NULL; + if (XRRQueryVersion(dpy, &i, &i)) + res = _XRRGetScreenResourcesCurrent(dpy, root); + if (res == NULL) + return 77; + + if (v == REDIRECTED && !has_composite(dpy)) + return 77; + + original_crtc = malloc(sizeof(XRRCrtcInfo *)*res->ncrtc); + for (i = 0; i < res->ncrtc; i++) + original_crtc[i] = XRRGetCrtcInfo(dpy, res, res->crtcs[i]); + + for (i = 0; i < res->ncrtc; i++) + XRRSetCrtcConfig(dpy, res, res->crtcs[i], CurrentTime, + 0, 0, None, RR_Rotate_0, NULL, 0); + + DRI2CreateDrawable(dpy, root); + DRI2SwapInterval(dpy, root, 0); + + if (d != OFF) { + for (i = 0; i < res->noutput; i++) { + XRROutputInfo *output; + XRRModeInfo *mode; + + output = XRRGetOutputInfo(dpy, res, res->outputs[i]); + if (output == NULL) + continue; + + mode = NULL; + if (res->nmode) + mode = lookup_mode(res, output->modes[0]); + if (mode == NULL) + continue; + + XRRSetCrtcConfig(dpy, res, output->crtcs[0], CurrentTime, + 0, 0, output->modes[0], RR_Rotate_0, &res->outputs[i], 1); + width = mode->width; + height = mode->height; + break; + } + if (i == res->noutput) { + _x_error_occurred = 77; + goto restore; + } + } + + if (w == ROOT) { + run(dpy, root); + } else if (w == FULLSCREEN) { + win = XCreateWindow(dpy, root, + 0, 0, width, height, 0, + DefaultDepth(dpy, DefaultScreen(dpy)), + InputOutput, + DefaultVisual(dpy, DefaultScreen(dpy)), + CWOverrideRedirect, &attr); + DRI2CreateDrawable(dpy, win); + DRI2SwapInterval(dpy, win, 0); + if (v == REDIRECTED) { + XCompositeRedirectWindow(dpy, win, CompositeRedirectManual); + XDamageCreate(dpy, win, XDamageReportRawRectangles); + } else + fullscreen(dpy, win); + XMapWindow(dpy, win); + run(dpy, win); + } else if (w == WINDOW) { + win = XCreateWindow(dpy, root, + 0, 0, width/2, height/2, 0, + DefaultDepth(dpy, DefaultScreen(dpy)), + InputOutput, + DefaultVisual(dpy, DefaultScreen(dpy)), + CWOverrideRedirect, &attr); + DRI2CreateDrawable(dpy, win); + DRI2SwapInterval(dpy, win, 0); + if (v == REDIRECTED) { + XCompositeRedirectWindow(dpy, win, CompositeRedirectManual); + XDamageCreate(dpy, win, XDamageReportRawRectangles); + } + XMapWindow(dpy, win); + run(dpy, win); + } + +restore: + for (i = 0; i < res->ncrtc; i++) + XRRSetCrtcConfig(dpy, res, res->crtcs[i], CurrentTime, + 0, 0, None, RR_Rotate_0, NULL, 0); + + for (i = 0; i < res->ncrtc; i++) + XRRSetCrtcConfig(dpy, res, res->crtcs[i], CurrentTime, + original_crtc[i]->x, + original_crtc[i]->y, + original_crtc[i]->mode, + original_crtc[i]->rotation, + original_crtc[i]->outputs, + original_crtc[i]->noutput); + + if (DPMSQueryExtension(dpy, &i, &i)) + DPMSEnable(dpy); + + XSync(dpy, True); + return _x_error_occurred; +} diff --git a/benchmarks/dri3-swap.c b/benchmarks/dri3-swap.c new file mode 100644 index 00000000..4dd423b3 --- /dev/null +++ b/benchmarks/dri3-swap.c @@ -0,0 +1,595 @@ +/* + * Copyright (c) 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +struct dri3_fence { + XID xid; + void *addr; +}; + +static int _x_error_occurred; +static uint32_t stamp; + +struct list { + struct list *next, *prev; +}; + +static void +list_init(struct list *list) +{ + list->next = list->prev = list; +} + +static inline void +__list_add(struct list *entry, + struct list *prev, + struct list *next) +{ + next->prev = entry; + entry->next = next; + entry->prev = prev; + prev->next = entry; +} + +static inline void +list_add(struct list *entry, struct list *head) +{ + __list_add(entry, head, head->next); +} + +static inline void +__list_del(struct list *prev, struct list *next) +{ + next->prev = prev; + prev->next = next; +} + +static inline void +_list_del(struct list *entry) +{ + __list_del(entry->prev, entry->next); +} + +static inline void +list_move(struct list *list, struct list *head) +{ + if (list->prev != head) { + _list_del(list); + list_add(list, head); + } +} + +#define __container_of(ptr, sample, member) \ + (void *)((char *)(ptr) - ((char *)&(sample)->member - (char *)(sample))) + +#define list_for_each_entry(pos, head, member) \ + for (pos = __container_of((head)->next, pos, member); \ + &pos->member != (head); \ + pos = __container_of(pos->member.next, pos, member)) + +static int +_check_error_handler(Display *display, + XErrorEvent *event) +{ + printf("X11 error from display %s, serial=%ld, error=%d, req=%d.%d\n", + DisplayString(display), + event->serial, + event->error_code, + event->request_code, + event->minor_code); + _x_error_occurred++; + return False; /* ignored */ +} + +static int dri3_create_fence(Display *dpy, + Pixmap pixmap, + struct dri3_fence *fence) +{ + xcb_connection_t *c = XGetXCBConnection(dpy); + struct dri3_fence f; + int fd; + + fd = xshmfence_alloc_shm(); + if (fd < 0) + return -1; + + f.addr = xshmfence_map_shm(fd); + if (f.addr == NULL) { + close(fd); + return -1; + } + + f.xid = xcb_generate_id(c); + xcb_dri3_fence_from_fd(c, pixmap, f.xid, 0, fd); + + *fence = f; + return 0; +} + +static double elapsed(const struct timespec *start, + const struct timespec *end) +{ + return 1e6*(end->tv_sec - start->tv_sec) + (end->tv_nsec - start->tv_nsec)/1000; +} + +struct buffer { + struct list link; + Pixmap pixmap; + struct dri3_fence fence; + int fd; + int busy; +}; + +static void run(Display *dpy, Window win) +{ + xcb_connection_t *c = XGetXCBConnection(dpy); + struct timespec start, end; +#define N_BACK 8 + struct buffer buffer[N_BACK]; + struct list mru; + Window root; + unsigned int width, height; + unsigned border, depth; + unsigned present_flags = XCB_PRESENT_OPTION_ASYNC; + xcb_xfixes_region_t update = 0; + int completed = 0; + int queued = 0; + uint32_t eid; + void *Q; + int i, n; + + list_init(&mru); + + XGetGeometry(dpy, win, + &root, &i, &n, &width, &height, &border, &depth); + + _x_error_occurred = 0; + + for (n = 0; n < N_BACK; n++) { + xcb_dri3_buffer_from_pixmap_reply_t *reply; + int *fds; + + buffer[n].pixmap = + XCreatePixmap(dpy, win, width, height, depth); + buffer[n].fence.xid = 0; + buffer[n].fd = -1; + + if (dri3_create_fence(dpy, win, &buffer[n].fence)) + return; + + reply = xcb_dri3_buffer_from_pixmap_reply (c, + xcb_dri3_buffer_from_pixmap(c, buffer[n].pixmap), + NULL); + if (reply == NULL) + return; + + fds = xcb_dri3_buffer_from_pixmap_reply_fds (c, reply); + buffer[n].fd = fds[0]; + free(reply); + + /* start idle */ + xshmfence_trigger(buffer[n].fence.addr); + buffer[n].busy = 0; + list_add(&buffer[n].link, &mru); + } + + eid = xcb_generate_id(c); + xcb_present_select_input(c, eid, win, + XCB_PRESENT_EVENT_MASK_IDLE_NOTIFY | + XCB_PRESENT_EVENT_MASK_COMPLETE_NOTIFY); + Q = xcb_register_for_special_xge(c, &xcb_present_id, eid, &stamp); + + clock_gettime(CLOCK_MONOTONIC, &start); + do { + for (n = 0; n < 1000; n++) { + struct buffer *tmp, *b = NULL; + list_for_each_entry(tmp, &mru, link) { + if (!tmp->busy) { + b = tmp; + break; + } + } + while (b == NULL) { + xcb_present_generic_event_t *ev; + + ev = (xcb_present_generic_event_t *) + xcb_wait_for_special_event(c, Q); + if (ev == NULL) + abort(); + + do { + switch (ev->evtype) { + case XCB_PRESENT_COMPLETE_NOTIFY: + completed++; + queued--; + break; + + case XCB_PRESENT_EVENT_IDLE_NOTIFY: + { + xcb_present_idle_notify_event_t *ie = (xcb_present_idle_notify_event_t *)ev; + assert(ie->serial < N_BACK); + buffer[ie->serial].busy = 0; + if (b == NULL) + b = &buffer[ie->serial]; + break; + } + } + free(ev); + } while ((ev = (xcb_present_generic_event_t *)xcb_poll_for_special_event(c, Q))); + } + + b->busy = 1; + if (b->fence.xid) { + xshmfence_await(b->fence.addr); + xshmfence_reset(b->fence.addr); + } + xcb_present_pixmap(c, win, b->pixmap, b - buffer, + 0, /* valid */ + update, /* update */ + 0, /* x_off */ + 0, /* y_off */ + None, + None, /* wait fence */ + b->fence.xid, + present_flags, + 0, /* target msc */ + 0, /* divisor */ + 0, /* remainder */ + 0, NULL); + list_move(&b->link, &mru); + queued++; + xcb_flush(c); + } + clock_gettime(CLOCK_MONOTONIC, &end); + } while (end.tv_sec < start.tv_sec + 10); + + while (queued) { + xcb_present_generic_event_t *ev; + + ev = (xcb_present_generic_event_t *) + xcb_wait_for_special_event(c, Q); + if (ev == NULL) + abort(); + + do { + switch (ev->evtype) { + case XCB_PRESENT_COMPLETE_NOTIFY: + completed++; + queued--; + break; + + case XCB_PRESENT_EVENT_IDLE_NOTIFY: + break; + } + free(ev); + } while ((ev = (xcb_present_generic_event_t *)xcb_poll_for_special_event(c, Q))); + } + clock_gettime(CLOCK_MONOTONIC, &end); + + printf("%f\n", completed / (elapsed(&start, &end) / 1000000)); +} + +static int has_present(Display *dpy) +{ + xcb_connection_t *c = XGetXCBConnection(dpy); + xcb_generic_error_t *error = NULL; + void *reply; + + reply = xcb_present_query_version_reply(c, + xcb_present_query_version(c, + XCB_PRESENT_MAJOR_VERSION, + XCB_PRESENT_MINOR_VERSION), + &error); + + free(reply); + free(error); + if (reply == NULL) { + fprintf(stderr, "Present not supported on %s\n", DisplayString(dpy)); + return 0; + } + + return 1; +} + +static int has_composite(Display *dpy) +{ + int event, error; + int major, minor; + + if (!XDamageQueryExtension (dpy, &event, &error)) + return 0; + + if (!XCompositeQueryExtension(dpy, &event, &error)) + return 0; + + XCompositeQueryVersion(dpy, &major, &minor); + + return major > 0 || minor >= 4; +} + +static inline XRRScreenResources *_XRRGetScreenResourcesCurrent(Display *dpy, Window window) +{ + XRRScreenResources *res; + + res = XRRGetScreenResourcesCurrent(dpy, window); + if (res == NULL) + res = XRRGetScreenResources(dpy, window); + + return res; +} + +static XRRModeInfo *lookup_mode(XRRScreenResources *res, int id) +{ + int i; + + for (i = 0; i < res->nmode; i++) { + if (res->modes[i].id == id) + return &res->modes[i]; + } + + return NULL; +} + +static void fullscreen(Display *dpy, Window win) +{ + Atom atom = XInternAtom(dpy, "_NET_WM_STATE_FULLSCREEN", False); + XChangeProperty(dpy, win, + XInternAtom(dpy, "_NET_WM_STATE", False), + XA_ATOM, 32, PropModeReplace, + (unsigned char *)&atom, 1); +} + +static int dri3_query_version(Display *dpy, int *major, int *minor) +{ + xcb_connection_t *c = XGetXCBConnection(dpy); + xcb_dri3_query_version_reply_t *reply; + xcb_generic_error_t *error; + + *major = *minor = -1; + + reply = xcb_dri3_query_version_reply(c, + xcb_dri3_query_version(c, + XCB_DRI3_MAJOR_VERSION, + XCB_DRI3_MINOR_VERSION), + &error); + free(error); + if (reply == NULL) + return -1; + + *major = reply->major_version; + *minor = reply->minor_version; + free(reply); + + return 0; +} + +static int has_dri3(Display *dpy) +{ + const xcb_query_extension_reply_t *ext; + int major, minor; + + ext = xcb_get_extension_data(XGetXCBConnection(dpy), &xcb_dri3_id); + if (ext == NULL || !ext->present) + return 0; + + if (dri3_query_version(dpy, &major, &minor) < 0) + return 0; + + return major >= 0; +} + +int main(int argc, char **argv) +{ + Display *dpy; + Window root, win; + XRRScreenResources *res; + XRRCrtcInfo **original_crtc; + XSetWindowAttributes attr; + enum window { ROOT, FULLSCREEN, WINDOW } w = FULLSCREEN; + enum visible {REDIRECTED, NORMAL } v = NORMAL; + enum display { OFF, ON } d = OFF; + int width, height; + int i; + + while ((i = getopt(argc, argv, "d:v:w:")) != -1) { + switch (i) { + case 'd': + if (strcmp(optarg, "off") == 0) + d = OFF; + else if (strcmp(optarg, "on") == 0) + d = ON; + else + abort(); + break; + + case 'v': + if (strcmp(optarg, "redirected") == 0) + v = REDIRECTED; + else if (strcmp(optarg, "normal") == 0) + v = NORMAL; + else + abort(); + break; + + case 'w': + if (strcmp(optarg, "fullscreen") == 0) + w = FULLSCREEN; + else if (strcmp(optarg, "window") == 0) + w = WINDOW; + else if (strcmp(optarg, "root") == 0) + w = ROOT; + else + abort(); + break; + } + } + + attr.override_redirect = 1; + + dpy = XOpenDisplay(NULL); + if (dpy == NULL) + return 77; + + width = DisplayWidth(dpy, DefaultScreen(dpy)); + height = DisplayHeight(dpy, DefaultScreen(dpy)); + + if (!has_present(dpy)) + return 77; + + if (!has_dri3(dpy)) + return 77; + + if (DPMSQueryExtension(dpy, &i, &i)) + DPMSDisable(dpy); + + root = DefaultRootWindow(dpy); + + signal(SIGALRM, SIG_IGN); + XSetErrorHandler(_check_error_handler); + + res = NULL; + if (XRRQueryVersion(dpy, &i, &i)) + res = _XRRGetScreenResourcesCurrent(dpy, root); + if (res == NULL) + return 77; + + if (v == REDIRECTED && !has_composite(dpy)) + return 77; + + original_crtc = malloc(sizeof(XRRCrtcInfo *)*res->ncrtc); + for (i = 0; i < res->ncrtc; i++) + original_crtc[i] = XRRGetCrtcInfo(dpy, res, res->crtcs[i]); + + for (i = 0; i < res->ncrtc; i++) + XRRSetCrtcConfig(dpy, res, res->crtcs[i], CurrentTime, + 0, 0, None, RR_Rotate_0, NULL, 0); + + if (d != OFF) { + for (i = 0; i < res->noutput; i++) { + XRROutputInfo *output; + XRRModeInfo *mode; + + output = XRRGetOutputInfo(dpy, res, res->outputs[i]); + if (output == NULL) + continue; + + mode = NULL; + if (res->nmode) + mode = lookup_mode(res, output->modes[0]); + if (mode == NULL) + continue; + + XRRSetCrtcConfig(dpy, res, output->crtcs[0], CurrentTime, + 0, 0, output->modes[0], RR_Rotate_0, &res->outputs[i], 1); + width = mode->width; + height = mode->height; + break; + } + if (i == res->noutput) { + _x_error_occurred = 77; + goto restore; + } + } + + if (w == ROOT) { + run(dpy, root); + } else if (w == FULLSCREEN) { + win = XCreateWindow(dpy, root, + 0, 0, width, height, 0, + DefaultDepth(dpy, DefaultScreen(dpy)), + InputOutput, + DefaultVisual(dpy, DefaultScreen(dpy)), + CWOverrideRedirect, &attr); + if (v == REDIRECTED) { + XCompositeRedirectWindow(dpy, win, CompositeRedirectManual); + XDamageCreate(dpy, win, XDamageReportRawRectangles); + } else + fullscreen(dpy, win); + XMapWindow(dpy, win); + run(dpy, win); + } else if (w == WINDOW) { + win = XCreateWindow(dpy, root, + 0, 0, width/2, height/2, 0, + DefaultDepth(dpy, DefaultScreen(dpy)), + InputOutput, + DefaultVisual(dpy, DefaultScreen(dpy)), + CWOverrideRedirect, &attr); + if (v == REDIRECTED) { + XCompositeRedirectWindow(dpy, win, CompositeRedirectManual); + XDamageCreate(dpy, win, XDamageReportRawRectangles); + } + XMapWindow(dpy, win); + run(dpy, win); + } + +restore: + for (i = 0; i < res->ncrtc; i++) + XRRSetCrtcConfig(dpy, res, res->crtcs[i], CurrentTime, + 0, 0, None, RR_Rotate_0, NULL, 0); + + for (i = 0; i < res->ncrtc; i++) + XRRSetCrtcConfig(dpy, res, res->crtcs[i], CurrentTime, + original_crtc[i]->x, + original_crtc[i]->y, + original_crtc[i]->mode, + original_crtc[i]->rotation, + original_crtc[i]->outputs, + original_crtc[i]->noutput); + + if (DPMSQueryExtension(dpy, &i, &i)) + DPMSEnable(dpy); + + XSync(dpy, True); + return _x_error_occurred; +} diff --git a/configure.ac b/configure.ac index 61bea435..d13917ec 100644 --- a/configure.ac +++ b/configure.ac @@ -195,18 +195,24 @@ AC_ARG_ENABLE(udev, [UDEV="$enableval"], [UDEV=auto]) +udev_msg=" disabled" if test "x$UDEV" != "xno"; then PKG_CHECK_MODULES(UDEV, [libudev], [udev="yes"], [udev="no"]) + AC_CHECK_HEADERS([sys/stat.h], [], [udev="no"]) if test "x$UDEV" = "xyes" -a "x$udev" != "xyes"; then AC_MSG_ERROR([udev support requested but not found (libudev)]) fi if test "x$udev" = "xyes"; then AC_DEFINE(HAVE_UDEV,1,[Enable udev-based monitor hotplug detection]) + udev_msg=" yes" + else + udev_msg=" no" fi fi -PKG_CHECK_MODULES(X11, [x11 xrender xrandr xext xfixes cairo cairo-xlib-xrender pixman-1 libpng], [x11="yes"], [x11="no"]) +PKG_CHECK_MODULES(X11, [x11 x11-xcb xcb-dri2 xcomposite xdamage xrender xrandr xext xfixes cairo cairo-xlib-xrender pixman-1 libpng], [x11="yes"], [x11="no"]) AM_CONDITIONAL(HAVE_X11, test "x$x11" = "xyes") +echo X11_CLFAGS="$X11_CLFAGS" X11_LIBS="$X11_LIBS" cpuid="yes" AC_TRY_LINK([ @@ -270,10 +276,13 @@ if test "x$shm" = "xyes"; then AC_DEFINE([HAVE_MIT_SHM], 1, [Define to 1 if MIT-SHM is available]) fi -PKG_CHECK_MODULES(X11_DRI3, [xcb-dri3 xcb-sync xcb-present x11-xcb xshmfence x11 xrender xext libdrm], [x11_dri3="yes"], [x11_dri3="no"]) +PKG_CHECK_MODULES(X11_DRI3, [xcb-dri3 xcb-sync xcb-xfixes xcb-present x11-xcb xshmfence x11 xcomposite xdamage xrender xrandr xxf86vm xext libdrm], [x11_dri3="yes"], [x11_dri3="no"]) AM_CONDITIONAL(X11_DRI3, test "x$x11_dri3" = "xyes" -a "x$shm" = "xyes") AM_CONDITIONAL(X11_SHM, test "x$shm" = "xyes") +PKG_CHECK_MODULES(X11_VM, [xxf86vm], [x11_vm="yes"], [x11_vm="no"]) +AM_CONDITIONAL(X11_VM, test "x$x11_vm" = "xyes") + AC_ARG_ENABLE(tools, AS_HELP_STRING([--disable-tools], [Enable building and installing the miscellaneous tools [default=auto]]), @@ -285,7 +294,7 @@ if test "x$shm" != "xyes"; then tools="no" fi if test "x$tools" != "xno"; then - ivo_requires="xrandr xdamage xfixes xcursor xtst xrender xext x11 pixman-1" + ivo_requires="xrandr xdamage xfixes xcursor xtst xrender xscrnsaver xext x11 pixman-1" extra_cflags="" ignore="xinerama" @@ -307,6 +316,8 @@ if test "x$tools" != "xno"; then tools="no" fi + PKG_CHECK_MODULES(TOOL_CURSOR, [xfixes x11 libpng], [cursor="yes"], [ivo="no"]) + IVO_CFLAGS="$IVO_CFLAGS $extra_cflags" fi if test "x$tools" != "xno"; then @@ -315,6 +326,7 @@ fi AC_MSG_CHECKING([whether to build additional tools]) AC_MSG_RESULT([$tools]) AM_CONDITIONAL(BUILD_TOOLS, test "x$tools" != "xno") +AM_CONDITIONAL(BUILD_TOOL_CURSOR, test "x$cursor" = "xyes") # Define a configure option for an alternate module directory AC_ARG_WITH(xorg-module-dir, @@ -339,10 +351,20 @@ AC_ARG_ENABLE(dri2, [DRI2=$enableval], [DRI2=yes]) AC_ARG_ENABLE(dri3, - AS_HELP_STRING([--enable-dri3], - [Enable DRI3 support [[default=no]]]), + AS_HELP_STRING([--disable-dri3], + [Disable DRI3 support [[default=yes]]]), [DRI3=$enableval], - [DRI3=no]) + [DRI3=yes]) +AC_ARG_WITH(default-dri, + AS_HELP_STRING([--with-default-dri], + [Select the default maximum DRI level [default 2]]), + [DRI_DEFAULT=$withval], + [DRI_DEFAULT=2]) +if test "x$DRI_DEFAULT" = "x0"; then + AC_DEFINE(DEFAULT_DRI_LEVEL, 0,[Default DRI level]) +else + AC_DEFINE(DEFAULT_DRI_LEVEL, ~0, [Default DRI level]) +fi AC_ARG_ENABLE(xvmc, AS_HELP_STRING([--disable-xvmc], [Disable XvMC support [[default=yes]]]), @@ -375,14 +397,12 @@ AC_ARG_ENABLE(ums-only, required_xorg_server_version=1.6 required_pixman_version=0.16 -if pkg-config --exists 'pixman-1 >= 0.27.1'; then - AC_DEFINE([HAS_PIXMAN_GLYPHS], 1, [Enable pixman glyph cache]) -fi - -if pkg-config --exists 'pixman-1 >= 0.24.0'; then - AC_DEFINE([HAS_PIXMAN_TRIANGLES], 1, [Enable pixman triangle rasterisation]) -fi - +PKG_CHECK_EXISTS([pixman-1 >= 0.24.0], + AC_DEFINE([HAS_PIXMAN_TRIANGLES], 1, [Enable pixman triangle rasterisation]) + []) +PKG_CHECK_EXISTS([pixman-1 >= 0.27.1], + [AC_DEFINE([HAS_PIXMAN_GLYPHS], 1, [Enable pixman glyph cache])], + []) # Store the list of server defined optional extensions in REQUIRED_MODULES XORG_DRIVER_CHECK_EXT(RANDR, randrproto) XORG_DRIVER_CHECK_EXT(RENDER, renderproto) @@ -398,24 +418,25 @@ AC_ARG_ENABLE(sna, [SNA="$enableval"], [SNA=auto]) +AC_CHECK_HEADERS([dev/wscons/wsconsio.h]) +AC_FUNC_ALLOCA +AC_HEADER_MAJOR + if test "x$SNA" != "xno"; then AC_DEFINE(USE_SNA, 1, [Enable SNA support]) AC_CHECK_HEADERS([sys/sysinfo.h], AC_CHECK_MEMBERS([struct sysinfo.totalram], [], [], [[#include ]])) fi uxa_requires_libdrm=2.4.52 +uxa_requires_pixman=0.24.0 + AC_ARG_ENABLE(uxa, AS_HELP_STRING([--enable-uxa], [Enable Unified Acceleration Architecture (UXA) [default=auto]]), [UXA="$enableval"], [UXA=auto]) if test "x$UXA" = "xauto"; then - if ! pkg-config --exists "libdrm_intel >= $uxa_requires_libdrm"; then - UXA=no - fi - if ! pkg-config --exists 'pixman-1 >= 0.24.0'; then - UXA=no - fi + PKG_CHECK_EXISTS([libdrm_intel >= $uxa_requires_libdrm pixman-1 >= $uxa_requires_pixman], [], [UXA=no]) fi if test "x$UXA" != "xno"; then AC_DEFINE(USE_UXA, 1, [Enable UXA support]) @@ -424,8 +445,10 @@ if test "x$UXA" != "xno"; then UXA=yes fi -PKG_CHECK_MODULES(XORG, [xorg-server >= $required_xorg_server_version xproto fontsproto pixman-1 >= $required_pixman_version $REQUIRED_MODULES]) +PKG_CHECK_MODULES(XORG, [xorg-server >= $required_xorg_server_version xproto fontsproto damageproto pixman-1 >= $required_pixman_version $REQUIRED_MODULES]) ABI_VERSION=`$PKG_CONFIG --variable=abi_videodrv xorg-server` +XSERVER_VERSION=`$PKG_CONFIG --modversion xorg-server` +PIXMAN_VERSION=`$PKG_CONFIG --modversion pixman-1` if test "x$ONLY_UMS" = "xyes"; then UMS="yes" @@ -519,7 +542,12 @@ AC_MSG_RESULT([$have_dri1]) AM_CONDITIONAL(DRI1, test "x$have_dri1" != "xno") if test "x$have_dri1" != "xno"; then AC_DEFINE(HAVE_DRI1,1,[Enable DRI1 driver support]) - dri_msg="$dri_msg DRI1" + str="DRI1" + if test "x$DRI_DEFAULT" = "x1"; then + AC_DEFINE(DEFAULT_DRI_LEVEL,1,[Default DRI level]) + str="*$str" + fi + dri_msg="$dri_msg $str" else DRI1_CFLAGS="" DRI1_LIBS="" @@ -576,7 +604,12 @@ AM_CONDITIONAL(DRI2, test "x$have_dri2" != "xno") AC_MSG_RESULT([$have_dri2]) if test "x$have_dri2" != "xno"; then AC_DEFINE(HAVE_DRI2,1,[Enable DRI2 driver support]) - dri_msg="$dri_msg DRI2" + str="DRI2" + if test "x$DRI_DEFAULT" = "x2"; then + AC_DEFINE(DEFAULT_DRI_LEVEL,2,[Default DRI level]) + str="*$str" + fi + dri_msg="$dri_msg $str" else if test "x$DRI" = "xyes" -a "x$DRI2" != "xno" -a "x$KMS" = "xyes"; then AC_MSG_ERROR([DRI2 requested but prerequisites not found]) @@ -591,13 +624,21 @@ AM_CONDITIONAL(DRI3, test "x$have_dri3" != "xno") AC_MSG_RESULT([$have_dri3]) if test "x$have_dri3" != "xno"; then AC_DEFINE(HAVE_DRI3,1,[Enable DRI3 driver support]) - dri_msg="$dri_msg DRI3" + str="DRI3" + if test "x$DRI_DEFAULT" = "x3"; then + AC_DEFINE(DEFAULT_DRI_LEVEL,3,[Default DRI level]) + str="*$str" + fi + dri_msg="$dri_msg $str" else if test "x$DRI" = "xyes" -a "x$DRI3" != "xno" -a "x$KMS" = "xyes"; then AC_MSG_ERROR([DRI3 requested but prerequisites not found]) fi fi +AC_MSG_CHECKING([default DRI support]) +AC_MSG_RESULT([$DEFAULT_DRI_DEFAULT]) + AC_CHECK_HEADERS([X11/extensions/dpmsconst.h]) PRESENT="no" @@ -711,27 +752,6 @@ if test "x$TEARFREE" = "xyes"; then xp_msg="$xp_msg TearFree" fi -AC_ARG_ENABLE(rendernode, - AS_HELP_STRING([--enable-rendernode], - [Enable use of render nodes (experimental) [default=no]]), - [RENDERNODE="$enableval"], - [RENDERNODE="no"]) -AM_CONDITIONAL(USE_RENDERNODE, test "x$RENDERNODE" = "xyes") -if test "x$RENDERNODE" = "xyes"; then - AC_DEFINE(USE_RENDERNODE,1,[Assume "rendernode" support]) - xp_msg="$xp_msg rendernode" -fi - -AC_ARG_ENABLE(wc-mmap, - AS_HELP_STRING([--enable-wc-mmap], - [Enable use of WriteCombining mmaps [default=no]]), - [WC_MMAP="$enableval"], - [WC_MMAP="no"]) -if test "x$WC_MMAP" = "xyes"; then - AC_DEFINE(USE_WC_MMAP,1,[Enable use of WriteCombining mmaps]) - xp_msg="$xp_msg mmap(wc)" -fi - AC_ARG_ENABLE(create2, AS_HELP_STRING([--enable-create2], [Enable use of create2 ioctl (experimental) [default=no]]), @@ -848,6 +868,7 @@ AC_CONFIG_FILES([ xvmc/shader/mc/Makefile xvmc/shader/vld/Makefile test/Makefile + benchmarks/Makefile tools/Makefile tools/org.x.xf86-video-intel.backlight-helper.policy ]) @@ -855,7 +876,7 @@ AC_OUTPUT echo "" echo "" -test -e `pwd $0`/README && cat `pwd $0`/README +cat $srcdir/README accel_msg="" if test "x$SNA" != "xno"; then @@ -895,13 +916,15 @@ fi echo "" echo "AC_PACKAGE_STRING will be compiled with:" -echo " Xorg Video ABI version: $ABI_VERSION" +echo " Xorg Video ABI version: $ABI_VERSION (xorg-server-$XSERVER_VERSION)" +echo " pixman version: pixman-1-$PIXMAN_VERSION" echo " Acceleration backends:$accel_msg" echo " Additional debugging support?$debug_msg" echo " Support for Kernel Mode Setting? $KMS" echo " Support for legacy User Mode Setting (for i810)? $UMS" echo " Support for Direct Rendering Infrastructure:$dri_msg" echo " Support for Xv motion compensation (XvMC and libXvMC):$xvmc_msg" +echo " Support for display hotplug notifications (udev):$udev_msg" echo " Build additional tools and utilities?$tools_msg" if test -n "$xp_msg"; then echo " Experimental support:$xp_msg" diff --git a/libobj/alloca.c b/libobj/alloca.c new file mode 100644 index 00000000..883e1e9f --- /dev/null +++ b/libobj/alloca.c @@ -0,0 +1,4 @@ +void *alloca(size_t sz) +{ + return NULL; +} diff --git a/man/intel.man b/man/intel.man index 17515206..be398fbe 100644 --- a/man/intel.man +++ b/man/intel.man @@ -27,9 +27,9 @@ supports the i810, i810-DC100, i810e, i815, i830M, 845G, 852GM, 855GM, 865G, 915G, 915GM, 945G, 945GM, 965G, 965Q, 946GZ, 965GM, 945GME, G33, Q33, Q35, G35, GM45, G45, Q45, G43, G41 chipsets, Pineview-M in Atom N400 series, Pineview-D in Atom D400/D500 series, -Intel(R) HD Graphics: 2000-6000, -Intel(R) Iris(TM) Graphics: 5100/6100, and -Intel(R) Iris(TM) Pro Graphics: 5200/6200/P6300. +Intel(R) HD Graphics, +Intel(R) Iris(TM) Graphics, +Intel(R) Iris(TM) Pro Graphics. .SH CONFIGURATION DETAILS Please refer to __xconfigfile__(__filemansuffix__) for general configuration @@ -112,8 +112,8 @@ The default is 8192 if AGP allocable memory is < 128 MB, 16384 if < 192 MB, 24576 if higher. DRI require at least a value of 16384. Higher values may give better 3D performance, at expense of available system memory. .TP -.BI "Option \*qNoAccel\*q \*q" boolean \*q -Disable or enable acceleration. +.BI "Option \*qAccel\*q \*q" boolean \*q +Enable or disable acceleration. .IP Default: acceleration is enabled. @@ -122,8 +122,8 @@ The following driver .B Options are supported for the 830M and later chipsets: .TP -.BI "Option \*qNoAccel\*q \*q" boolean \*q -Disable or enable acceleration. +.BI "Option \*qAccel\*q \*q" boolean \*q +Enable or disable acceleration. .IP Default: acceleration is enabled. .TP @@ -201,6 +201,16 @@ that choice by specifying the entry under /sys/class/backlight to use. .IP Default: Automatic selection. .TP +.BI "Option \*qCustomEDID\*q \*q" string \*q +Override the probed EDID on particular outputs. Sometimes the manufacturer +supplied EDID is corrupt or lacking a few usable modes and supplying a +corrected EDID may be easier than specifying every modeline. This option +allows to pass the path to load an EDID from per output. The format is a +comma separated string of output:path pairs, e.g. +DP1:/path/to/dp1.edid,DP2:/path/to/dp2.edid +.IP +Default: No override, use manufacturer supplied EDIDs. +.TP .BI "Option \*qFallbackDebug\*q \*q" boolean \*q Enable printing of debugging information on acceleration fallbacks to the server log. @@ -225,6 +235,15 @@ i.e. perform synchronous rendering. .IP Default: Disabled .TP +.BI "Option \*qHWRotation\*q \*q" boolean \*q +Override the use of native hardware rotation and force the use of software, +but GPU accelerated where possible, rotation. On some platforms the hardware +can scanout directly into a rotated output bypassing the intermediate rendering +and extra allocations required for software implemented rotation (i.e. native +rotation uses less resources, is quicker and uses less power). This allows you +to disable the native rotation in case of errors. +.IP +Default: Enabled (use hardware rotation) .TP .BI "Option \*qVSync\*q \*q" boolean \*q This option controls the use of commands to synchronise rendering with the @@ -324,13 +343,29 @@ Default: 0 .BI "Option \*qZaphodHeads\*q \*q" string \*q .IP Specify the randr output(s) to use with zaphod mode for a particular driver -instance. If you this option you must use it with all instances of the -driver +instance. If you set this option you must use it with all instances of the +driver. By default, each head is assigned only one CRTC (which limits +using multiple outputs with that head to cloned mode). CRTC can be manually +assigned to individual heads by preceding the output names with a comma +delimited list of pipe numbers followed by a colon. Note that different pipes +may be limited in their functionality and some outputs may only work with +different pipes. .br For example: + +.RS .B Option \*qZaphodHeads\*q \*qLVDS1,VGA1\*q -will assign xrandr outputs LVDS1 and VGA0 to this instance of the driver. + +will assign xrandr outputs LVDS1 and VGA1 to this instance of the driver. +.RE + +.RS +.B +Option \*qZaphodHeads\*q \*q0,2:HDMI1,DP2\*q + +will assign xrandr outputs HDMI1 and DP2 and CRTCs 0 and 2 to this instance of the driver. +.RE .SH OUTPUT CONFIGURATION On 830M and better chipsets, the driver supports runtime configuration of @@ -431,11 +466,11 @@ First DVI SDVO output Second DVI SDVO output .SS "TMDS-1", "TMDS-2", "HDMI-1", "HDMI-2" -DVI/HDMI outputs. Avaliable common properties include: +DVI/HDMI outputs. Available common properties include: .TP \fBBROADCAST_RGB\fP - method used to set RGB color range Adjusting this property allows you to set RGB color range on each -channel in order to match HDTV requirment(default 0 for full +channel in order to match HDTV requirement(default 0 for full range). Setting 1 means RGB color range is 16-235, 0 means RGB color range is 0-255 on each channel. (Full range is 0-255, not 16-235) diff --git a/src/backlight.c b/src/backlight.c index 9f239867..fcbb279f 100644 --- a/src/backlight.c +++ b/src/backlight.c @@ -34,6 +34,12 @@ #include #include +#if MAJOR_IN_MKDEV +#include +#elif MAJOR_IN_SYSMACROS +#include +#endif + #include #include #include @@ -42,6 +48,7 @@ #include #include #include +#include #include #include @@ -84,7 +91,7 @@ void backlight_init(struct backlight *b) b->has_power = 0; } -#ifdef __OpenBSD__ +#ifdef HAVE_DEV_WSCONS_WSCONSIO_H #include #include @@ -122,6 +129,11 @@ int backlight_get(struct backlight *b) return param.curval; } +char *backlight_find_for_device(struct pci_device *pci) +{ + return NULL; +} + int backlight_open(struct backlight *b, char *iface) { struct wsdisplay_param param; @@ -146,12 +158,9 @@ int backlight_open(struct backlight *b, char *iface) return param.curval; } -enum backlight_type backlight_exists(const char *iface) +int backlight_exists(const char *iface) { - if (iface != NULL) - return BL_NONE; - - return BL_PLATFORM; + return iface == NULL; } int backlight_on(struct backlight *b) @@ -163,6 +172,7 @@ int backlight_off(struct backlight *b) { return 0; } + #else static int @@ -213,6 +223,24 @@ __backlight_read(const char *iface, const char *file) } static int +writen(int fd, const char *value, int len) +{ + int ret; + + do { + ret = write(fd, value, len); + if (ret < 0) { + if (errno == EAGAIN || errno == EINTR) + continue; + + return ret; + } + } while (value += ret, len -= ret); + + return 0; +} + +static int __backlight_write(const char *iface, const char *file, const char *value) { int fd, ret; @@ -221,7 +249,7 @@ __backlight_write(const char *iface, const char *file, const char *value) if (fd < 0) return -1; - ret = write(fd, value, strlen(value)+1); + ret = writen(fd, value, strlen(value)+1); close(fd); return ret; @@ -244,10 +272,10 @@ static const char *known_interfaces[] = { "intel_backlight", }; -static enum backlight_type __backlight_type(const char *iface) +static int __backlight_type(const char *iface) { char buf[1024]; - int fd, v; + int fd, v, i; v = -1; fd = __backlight_open(iface, "type", O_RDONLY); @@ -261,39 +289,41 @@ static enum backlight_type __backlight_type(const char *iface) buf[v] = '\0'; if (strcmp(buf, "raw") == 0) - v = BL_RAW; + v = BL_RAW << 8; else if (strcmp(buf, "platform") == 0) - v = BL_PLATFORM; + v = BL_PLATFORM << 8; else if (strcmp(buf, "firmware") == 0) - v = BL_FIRMWARE; + v = BL_FIRMWARE << 8; else - v = BL_NAMED; + v = BL_NAMED << 8; } else - v = BL_NAMED; + v = BL_NAMED << 8; - if (v == BL_NAMED) { - int i; - for (i = 0; i < ARRAY_SIZE(known_interfaces); i++) { - if (strcmp(iface, known_interfaces[i]) == 0) - break; - } - v += i; + for (i = 0; i < ARRAY_SIZE(known_interfaces); i++) { + if (strcmp(iface, known_interfaces[i]) == 0) + break; } + v += i; return v; } -enum backlight_type backlight_exists(const char *iface) +static int __backlight_exists(const char *iface) { if (__backlight_read(iface, "brightness") < 0) - return BL_NONE; + return -1; if (__backlight_read(iface, "max_brightness") <= 0) - return BL_NONE; + return -1; return __backlight_type(iface); } +int backlight_exists(const char *iface) +{ + return __backlight_exists(iface) != -1; +} + static int __backlight_init(struct backlight *b, char *iface, int fd) { b->fd = fd_move_cloexec(fd_set_nonblock(fd)); @@ -399,7 +429,50 @@ __backlight_find(void) continue; /* Fallback to priority list of known iface for old kernels */ - v = backlight_exists(de->d_name); + v = __backlight_exists(de->d_name); + if (v < 0) + continue; + + if (v < best_type) { + char *copy = strdup(de->d_name); + if (copy) { + free(best_iface); + best_iface = copy; + best_type = v; + } + } + } + closedir(dir); + + return best_iface; +} + +char *backlight_find_for_device(struct pci_device *pci) +{ + char path[200]; + unsigned best_type = INT_MAX; + char *best_iface = NULL; + DIR *dir; + struct dirent *de; + + snprintf(path, sizeof(path), + "/sys/bus/pci/devices/%04x:%02x:%02x.%d/backlight", + pci->domain, pci->bus, pci->dev, pci->func); + + dir = opendir(path); + if (dir == NULL) + return NULL; + + while ((de = readdir(dir))) { + int v; + + if (*de->d_name == '.') + continue; + + v = __backlight_exists(de->d_name); + if (v < 0) + continue; + if (v < best_type) { char *copy = strdup(de->d_name); if (copy) { @@ -416,14 +489,17 @@ __backlight_find(void) int backlight_open(struct backlight *b, char *iface) { - int level; + int level, type; if (iface == NULL) iface = __backlight_find(); if (iface == NULL) goto err; - b->type = __backlight_type(iface); + type = __backlight_type(iface); + if (type < 0) + goto err; + b->type = type >> 8; b->max = __backlight_read(iface, "max_brightness"); if (b->max <= 0) @@ -447,7 +523,7 @@ err: int backlight_set(struct backlight *b, int level) { char val[BACKLIGHT_VALUE_LEN]; - int len, ret = 0; + int len; if (b->iface == NULL) return 0; @@ -456,10 +532,7 @@ int backlight_set(struct backlight *b, int level) level = b->max; len = snprintf(val, BACKLIGHT_VALUE_LEN, "%d\n", level); - if (write(b->fd, val, len) != len) - ret = -1; - - return ret; + return writen(b->fd, val, len); } int backlight_get(struct backlight *b) @@ -517,43 +590,6 @@ void backlight_disable(struct backlight *b) void backlight_close(struct backlight *b) { backlight_disable(b); - if (b->pid) + if (b->pid > 0) waitpid(b->pid, NULL, 0); } - -char *backlight_find_for_device(struct pci_device *pci) -{ - char path[200]; - unsigned best_type = INT_MAX; - char *best_iface = NULL; - DIR *dir; - struct dirent *de; - - snprintf(path, sizeof(path), - "/sys/bus/pci/devices/%04x:%02x:%02x.%d/backlight", - pci->domain, pci->bus, pci->dev, pci->func); - - dir = opendir(path); - if (dir == NULL) - return NULL; - - while ((de = readdir(dir))) { - int v; - - if (*de->d_name == '.') - continue; - - v = backlight_exists(de->d_name); - if (v < best_type) { - char *copy = strdup(de->d_name); - if (copy) { - free(best_iface); - best_iface = copy; - best_type = v; - } - } - } - closedir(dir); - - return best_iface; -} diff --git a/src/backlight.h b/src/backlight.h index bb0e28bc..ba17755b 100644 --- a/src/backlight.h +++ b/src/backlight.h @@ -43,7 +43,7 @@ struct backlight { int pid, fd; }; -enum backlight_type backlight_exists(const char *iface); +int backlight_exists(const char *iface); void backlight_init(struct backlight *backlight); int backlight_open(struct backlight *backlight, char *iface); diff --git a/src/compat-api.h b/src/compat-api.h index d09e1fb3..05797a08 100644 --- a/src/compat-api.h +++ b/src/compat-api.h @@ -30,6 +30,7 @@ #include #include +#include #include #ifndef GLYPH_HAS_GLYPH_PICTURE_ACCESSOR @@ -39,7 +40,17 @@ #ifndef XF86_HAS_SCRN_CONV #define xf86ScreenToScrn(s) xf86Screens[(s)->myNum] +#if XORG_VERSION_CURRENT < XORG_VERSION_NUMERIC(1,1,0,0,0) #define xf86ScrnToScreen(s) screenInfo.screens[(s)->scrnIndex] +#else +#define xf86ScrnToScreen(s) ((s)->pScreen) +#endif +#else +#define xf86ScrnToScreen(s) ((s)->pScreen) +#endif + +#if GET_ABI_MAJOR(ABI_VIDEODRV_VERSION) >= 22 +#define HAVE_NOTIFY_FD 1 #endif #ifndef XF86_SCRN_INTERFACE @@ -131,6 +142,17 @@ region_rects(const RegionRec *r) return r->data ? (const BoxRec *)(r->data + 1) : &r->extents; } +inline static void +region_get_boxes(const RegionRec *r, const BoxRec **s, const BoxRec **e) +{ + int n; + if (r->data) + *s = region_boxptr(r), n = r->data->numRects; + else + *s = &r->extents, n = 1; + *e = *s + n; +} + #ifndef INCLUDE_LEGACY_REGION_DEFINES #define RegionCreate(r, s) REGION_CREATE(NULL, r, s) #define RegionBreak(r) REGION_BREAK(NULL, r) @@ -223,4 +245,19 @@ static inline void FreePixmap(PixmapPtr pixmap) dstx, dsty) #endif +#if XORG_VERSION_CURRENT >= XORG_VERSION_NUMERIC(1,12,99,901,0) +#define isGPU(S) (S)->is_gpu +#else +#define isGPU(S) 0 +#endif + +#if HAS_DIRTYTRACKING_ROTATION +#define PixmapSyncDirtyHelper(d, dd) PixmapSyncDirtyHelper(d) +#endif + +#if !HAVE_NOTIFY_FD +#define SetNotifyFd(fd, cb, mode, data) AddGeneralSocket(fd); +#define RemoveNotifyFd(fd) RemoveGeneralSocket(fd) +#endif + #endif diff --git a/src/i915_pciids.h b/src/i915_pciids.h index 180ad0e6..466c7159 100644 --- a/src/i915_pciids.h +++ b/src/i915_pciids.h @@ -134,7 +134,7 @@ #define INTEL_IVB_Q_IDS(info) \ INTEL_QUANTA_VGA_DEVICE(info) /* Quanta transcode */ -#define INTEL_HSW_D_IDS(info) \ +#define INTEL_HSW_IDS(info) \ INTEL_VGA_DEVICE(0x0402, info), /* GT1 desktop */ \ INTEL_VGA_DEVICE(0x0412, info), /* GT2 desktop */ \ INTEL_VGA_DEVICE(0x0422, info), /* GT3 desktop */ \ @@ -179,9 +179,7 @@ INTEL_VGA_DEVICE(0x0D2B, info), /* CRW GT3 reserved */ \ INTEL_VGA_DEVICE(0x0D0E, info), /* CRW GT1 reserved */ \ INTEL_VGA_DEVICE(0x0D1E, info), /* CRW GT2 reserved */ \ - INTEL_VGA_DEVICE(0x0D2E, info) /* CRW GT3 reserved */ \ - -#define INTEL_HSW_M_IDS(info) \ + INTEL_VGA_DEVICE(0x0D2E, info), /* CRW GT3 reserved */ \ INTEL_VGA_DEVICE(0x0406, info), /* GT1 mobile */ \ INTEL_VGA_DEVICE(0x0416, info), /* GT2 mobile */ \ INTEL_VGA_DEVICE(0x0426, info), /* GT2 mobile */ \ @@ -198,60 +196,48 @@ INTEL_VGA_DEVICE(0x0D16, info), /* CRW GT2 mobile */ \ INTEL_VGA_DEVICE(0x0D26, info) /* CRW GT3 mobile */ -#define INTEL_VLV_M_IDS(info) \ +#define INTEL_VLV_IDS(info) \ INTEL_VGA_DEVICE(0x0f30, info), \ INTEL_VGA_DEVICE(0x0f31, info), \ INTEL_VGA_DEVICE(0x0f32, info), \ INTEL_VGA_DEVICE(0x0f33, info), \ - INTEL_VGA_DEVICE(0x0157, info) - -#define INTEL_VLV_D_IDS(info) \ + INTEL_VGA_DEVICE(0x0157, info), \ INTEL_VGA_DEVICE(0x0155, info) -#define _INTEL_BDW_M(gt, id, info) \ - INTEL_VGA_DEVICE((((gt) - 1) << 4) | (id), info) -#define _INTEL_BDW_D(gt, id, info) \ - INTEL_VGA_DEVICE((((gt) - 1) << 4) | (id), info) - -#define _INTEL_BDW_M_IDS(gt, info) \ - _INTEL_BDW_M(gt, 0x1602, info), /* ULT */ \ - _INTEL_BDW_M(gt, 0x1606, info), /* ULT */ \ - _INTEL_BDW_M(gt, 0x160B, info), /* Iris */ \ - _INTEL_BDW_M(gt, 0x160E, info) /* ULX */ - -#define _INTEL_BDW_D_IDS(gt, info) \ - _INTEL_BDW_D(gt, 0x160A, info), /* Server */ \ - _INTEL_BDW_D(gt, 0x160D, info) /* Workstation */ - -#define INTEL_BDW_GT12M_IDS(info) \ - _INTEL_BDW_M_IDS(1, info), \ - _INTEL_BDW_M_IDS(2, info) - -#define INTEL_BDW_GT12D_IDS(info) \ - _INTEL_BDW_D_IDS(1, info), \ - _INTEL_BDW_D_IDS(2, info) - -#define INTEL_BDW_GT3M_IDS(info) \ - _INTEL_BDW_M_IDS(3, info) - -#define INTEL_BDW_GT3D_IDS(info) \ - _INTEL_BDW_D_IDS(3, info) - -#define INTEL_BDW_RSVDM_IDS(info) \ - _INTEL_BDW_M_IDS(4, info) - -#define INTEL_BDW_RSVDD_IDS(info) \ - _INTEL_BDW_D_IDS(4, info) - -#define INTEL_BDW_M_IDS(info) \ - INTEL_BDW_GT12M_IDS(info), \ - INTEL_BDW_GT3M_IDS(info), \ - INTEL_BDW_RSVDM_IDS(info) - -#define INTEL_BDW_D_IDS(info) \ - INTEL_BDW_GT12D_IDS(info), \ - INTEL_BDW_GT3D_IDS(info), \ - INTEL_BDW_RSVDD_IDS(info) +#define INTEL_BDW_GT12_IDS(info) \ + INTEL_VGA_DEVICE(0x1602, info), /* GT1 ULT */ \ + INTEL_VGA_DEVICE(0x1606, info), /* GT1 ULT */ \ + INTEL_VGA_DEVICE(0x160B, info), /* GT1 Iris */ \ + INTEL_VGA_DEVICE(0x160E, info), /* GT1 ULX */ \ + INTEL_VGA_DEVICE(0x1612, info), /* GT2 Halo */ \ + INTEL_VGA_DEVICE(0x1616, info), /* GT2 ULT */ \ + INTEL_VGA_DEVICE(0x161B, info), /* GT2 ULT */ \ + INTEL_VGA_DEVICE(0x161E, info), /* GT2 ULX */ \ + INTEL_VGA_DEVICE(0x160A, info), /* GT1 Server */ \ + INTEL_VGA_DEVICE(0x160D, info), /* GT1 Workstation */ \ + INTEL_VGA_DEVICE(0x161A, info), /* GT2 Server */ \ + INTEL_VGA_DEVICE(0x161D, info) /* GT2 Workstation */ + +#define INTEL_BDW_GT3_IDS(info) \ + INTEL_VGA_DEVICE(0x1622, info), /* ULT */ \ + INTEL_VGA_DEVICE(0x1626, info), /* ULT */ \ + INTEL_VGA_DEVICE(0x162B, info), /* Iris */ \ + INTEL_VGA_DEVICE(0x162E, info), /* ULX */\ + INTEL_VGA_DEVICE(0x162A, info), /* Server */ \ + INTEL_VGA_DEVICE(0x162D, info) /* Workstation */ + +#define INTEL_BDW_RSVD_IDS(info) \ + INTEL_VGA_DEVICE(0x1632, info), /* ULT */ \ + INTEL_VGA_DEVICE(0x1636, info), /* ULT */ \ + INTEL_VGA_DEVICE(0x163B, info), /* Iris */ \ + INTEL_VGA_DEVICE(0x163E, info), /* ULX */ \ + INTEL_VGA_DEVICE(0x163A, info), /* Server */ \ + INTEL_VGA_DEVICE(0x163D, info) /* Workstation */ + +#define INTEL_BDW_IDS(info) \ + INTEL_BDW_GT12_IDS(info), \ + INTEL_BDW_GT3_IDS(info), \ + INTEL_BDW_RSVD_IDS(info) #define INTEL_CHV_IDS(info) \ INTEL_VGA_DEVICE(0x22b0, info), \ @@ -259,21 +245,85 @@ INTEL_VGA_DEVICE(0x22b2, info), \ INTEL_VGA_DEVICE(0x22b3, info) -#define INTEL_SKL_IDS(info) \ - INTEL_VGA_DEVICE(0x1916, info), /* ULT GT2 */ \ +#define INTEL_SKL_GT1_IDS(info) \ INTEL_VGA_DEVICE(0x1906, info), /* ULT GT1 */ \ - INTEL_VGA_DEVICE(0x1926, info), /* ULT GT3 */ \ - INTEL_VGA_DEVICE(0x1921, info), /* ULT GT2F */ \ INTEL_VGA_DEVICE(0x190E, info), /* ULX GT1 */ \ + INTEL_VGA_DEVICE(0x1902, info), /* DT GT1 */ \ + INTEL_VGA_DEVICE(0x190B, info), /* Halo GT1 */ \ + INTEL_VGA_DEVICE(0x190A, info) /* SRV GT1 */ + +#define INTEL_SKL_GT2_IDS(info) \ + INTEL_VGA_DEVICE(0x1916, info), /* ULT GT2 */ \ + INTEL_VGA_DEVICE(0x1921, info), /* ULT GT2F */ \ INTEL_VGA_DEVICE(0x191E, info), /* ULX GT2 */ \ INTEL_VGA_DEVICE(0x1912, info), /* DT GT2 */ \ - INTEL_VGA_DEVICE(0x1902, info), /* DT GT1 */ \ INTEL_VGA_DEVICE(0x191B, info), /* Halo GT2 */ \ - INTEL_VGA_DEVICE(0x192B, info), /* Halo GT3 */ \ - INTEL_VGA_DEVICE(0x190B, info), /* Halo GT1 */ \ INTEL_VGA_DEVICE(0x191A, info), /* SRV GT2 */ \ - INTEL_VGA_DEVICE(0x192A, info), /* SRV GT3 */ \ - INTEL_VGA_DEVICE(0x190A, info), /* SRV GT1 */ \ INTEL_VGA_DEVICE(0x191D, info) /* WKS GT2 */ +#define INTEL_SKL_GT3_IDS(info) \ + INTEL_VGA_DEVICE(0x1923, info), /* ULT GT3 */ \ + INTEL_VGA_DEVICE(0x1926, info), /* ULT GT3 */ \ + INTEL_VGA_DEVICE(0x1927, info), /* ULT GT3 */ \ + INTEL_VGA_DEVICE(0x192B, info), /* Halo GT3 */ \ + INTEL_VGA_DEVICE(0x192D, info) /* SRV GT3 */ + +#define INTEL_SKL_GT4_IDS(info) \ + INTEL_VGA_DEVICE(0x1932, info), /* DT GT4 */ \ + INTEL_VGA_DEVICE(0x193B, info), /* Halo GT4 */ \ + INTEL_VGA_DEVICE(0x193D, info), /* WKS GT4 */ \ + INTEL_VGA_DEVICE(0x192A, info), /* SRV GT4 */ \ + INTEL_VGA_DEVICE(0x193A, info) /* SRV GT4e */ + +#define INTEL_SKL_IDS(info) \ + INTEL_SKL_GT1_IDS(info), \ + INTEL_SKL_GT2_IDS(info), \ + INTEL_SKL_GT3_IDS(info), \ + INTEL_SKL_GT4_IDS(info) + +#define INTEL_BXT_IDS(info) \ + INTEL_VGA_DEVICE(0x0A84, info), \ + INTEL_VGA_DEVICE(0x1A84, info), \ + INTEL_VGA_DEVICE(0x1A85, info), \ + INTEL_VGA_DEVICE(0x5A84, info), /* APL HD Graphics 505 */ \ + INTEL_VGA_DEVICE(0x5A85, info) /* APL HD Graphics 500 */ + +#define INTEL_GLK_IDS(info) \ + INTEL_VGA_DEVICE(0x3184, info), \ + INTEL_VGA_DEVICE(0x3185, info) + +#define INTEL_KBL_GT1_IDS(info) \ + INTEL_VGA_DEVICE(0x5913, info), /* ULT GT1.5 */ \ + INTEL_VGA_DEVICE(0x5915, info), /* ULX GT1.5 */ \ + INTEL_VGA_DEVICE(0x5917, info), /* DT GT1.5 */ \ + INTEL_VGA_DEVICE(0x5906, info), /* ULT GT1 */ \ + INTEL_VGA_DEVICE(0x590E, info), /* ULX GT1 */ \ + INTEL_VGA_DEVICE(0x5902, info), /* DT GT1 */ \ + INTEL_VGA_DEVICE(0x5908, info), /* Halo GT1 */ \ + INTEL_VGA_DEVICE(0x590B, info), /* Halo GT1 */ \ + INTEL_VGA_DEVICE(0x590A, info) /* SRV GT1 */ + +#define INTEL_KBL_GT2_IDS(info) \ + INTEL_VGA_DEVICE(0x5916, info), /* ULT GT2 */ \ + INTEL_VGA_DEVICE(0x5921, info), /* ULT GT2F */ \ + INTEL_VGA_DEVICE(0x591E, info), /* ULX GT2 */ \ + INTEL_VGA_DEVICE(0x5912, info), /* DT GT2 */ \ + INTEL_VGA_DEVICE(0x591B, info), /* Halo GT2 */ \ + INTEL_VGA_DEVICE(0x591A, info), /* SRV GT2 */ \ + INTEL_VGA_DEVICE(0x591D, info) /* WKS GT2 */ + +#define INTEL_KBL_GT3_IDS(info) \ + INTEL_VGA_DEVICE(0x5923, info), /* ULT GT3 */ \ + INTEL_VGA_DEVICE(0x5926, info), /* ULT GT3 */ \ + INTEL_VGA_DEVICE(0x5927, info) /* ULT GT3 */ + +#define INTEL_KBL_GT4_IDS(info) \ + INTEL_VGA_DEVICE(0x593B, info) /* Halo GT4 */ + +#define INTEL_KBL_IDS(info) \ + INTEL_KBL_GT1_IDS(info), \ + INTEL_KBL_GT2_IDS(info), \ + INTEL_KBL_GT3_IDS(info), \ + INTEL_KBL_GT4_IDS(info) + #endif /* _I915_PCIIDS_H */ diff --git a/src/intel_device.c b/src/intel_device.c index 140e1536..c4910cd8 100644 --- a/src/intel_device.c +++ b/src/intel_device.c @@ -38,6 +38,12 @@ #include #include +#if MAJOR_IN_MKDEV +#include +#elif MAJOR_IN_SYSMACROS +#include +#endif + #include #include @@ -197,9 +203,15 @@ static inline struct intel_device *intel_device(ScrnInfoPtr scrn) return xf86GetEntityPrivate(scrn->entityList[0], intel_device_key)->ptr; } +static const char *kernel_module_names[] ={ + "i915", + NULL, +}; + static int is_i915_device(int fd) { drm_version_t version; + const char **kn; char name[5] = ""; memset(&version, 0, sizeof(version)); @@ -209,7 +221,22 @@ static int is_i915_device(int fd) if (drmIoctl(fd, DRM_IOCTL_VERSION, &version)) return 0; - return strcmp("i915", name) == 0; + for (kn = kernel_module_names; *kn; kn++) + if (strcmp(*kn, name) == 0) + return 1; + + return 0; +} + +static int load_i915_kernel_module(void) +{ + const char **kn; + + for (kn = kernel_module_names; *kn; kn++) + if (xf86LoadKernelModule(*kn)) + return 0; + + return -1; } static int is_i915_gem(int fd) @@ -336,7 +363,7 @@ static int __intel_open_device__pci(const struct pci_device *pci) sprintf(path + base, "driver"); if (stat(path, &st)) { - if (xf86LoadKernelModule("i915")) + if (load_i915_kernel_module()) return -1; (void)xf86LoadKernelModule("fbcon"); } @@ -399,7 +426,7 @@ static int __intel_open_device__legacy(const struct pci_device *pci) ret = drmCheckModesettingSupported(id); if (ret) { - if (xf86LoadKernelModule("i915")) + if (load_i915_kernel_module() == 0) ret = drmCheckModesettingSupported(id); if (ret) return -1; @@ -461,9 +488,9 @@ static int is_render_node(int fd, struct stat *st) static char *find_render_node(int fd) { -#if defined(USE_RENDERNODE) struct stat master, render; char buf[128]; + int i; /* Are we a render-node ourselves? */ if (is_render_node(fd, &master)) @@ -472,9 +499,17 @@ static char *find_render_node(int fd) sprintf(buf, "/dev/dri/renderD%d", (int)((master.st_rdev | 0x80) & 0xbf)); if (stat(buf, &render) == 0 && master.st_mode == render.st_mode && - render.st_rdev == ((master.st_rdev | 0x80) & 0xbf)) + render.st_rdev == (master.st_rdev | 0x80)) return strdup(buf); -#endif + + /* Misaligned card <-> renderD, do a full search */ + for (i = 0; i < 16; i++) { + sprintf(buf, "/dev/dri/renderD%d", i + 128); + if (stat(buf, &render) == 0 && + master.st_mode == render.st_mode && + render.st_rdev == (master.st_rdev | 0x80)) + return strdup(buf); + } return NULL; } @@ -608,6 +643,27 @@ err_path: return -1; } +void intel_close_device(int entity_num) +{ + struct intel_device *dev; + + if (intel_device_key == -1) + return; + + dev = xf86GetEntityPrivate(entity_num, intel_device_key)->ptr; + xf86GetEntityPrivate(entity_num, intel_device_key)->ptr = NULL; + if (!dev) + return; + + if (dev->master_count == 0) /* Don't close server-fds */ + close(dev->fd); + + if (dev->render_node != dev->master_node) + free(dev->render_node); + free(dev->master_node); + free(dev); +} + int __intel_peek_fd(ScrnInfoPtr scrn) { struct intel_device *dev; @@ -672,6 +728,12 @@ struct intel_device *intel_get_device(ScrnInfoPtr scrn, int *fd) return dev; } +const char *intel_get_master_name(struct intel_device *dev) +{ + assert(dev && dev->master_node); + return dev->master_node; +} + const char *intel_get_client_name(struct intel_device *dev) { assert(dev && dev->render_node); diff --git a/src/intel_driver.h b/src/intel_driver.h index 28ed1a0e..bece88a0 100644 --- a/src/intel_driver.h +++ b/src/intel_driver.h @@ -124,9 +124,11 @@ int intel_entity_get_devid(int index); int intel_open_device(int entity_num, const struct pci_device *pci, struct xf86_platform_device *dev); +void intel_close_device(int entity_num); int __intel_peek_fd(ScrnInfoPtr scrn); struct intel_device *intel_get_device(ScrnInfoPtr scrn, int *fd); int intel_has_render_node(struct intel_device *dev); +const char *intel_get_master_name(struct intel_device *dev); const char *intel_get_client_name(struct intel_device *dev); int intel_get_client_fd(struct intel_device *dev); int intel_get_device_id(struct intel_device *dev); diff --git a/src/intel_list.h b/src/intel_list.h index 51af825d..c8a3187a 100644 --- a/src/intel_list.h +++ b/src/intel_list.h @@ -306,8 +306,7 @@ list_is_empty(const struct list *head) list_entry((ptr)->prev, type, member) #define __container_of(ptr, sample, member) \ - (void *)((char *)(ptr) \ - - ((char *)&(sample)->member - (char *)(sample))) + (void *)((char *)(ptr) - ((char *)&(sample)->member - (char *)(sample))) /** * Loop through the list given by head and set pos to struct in the list. * @@ -392,17 +391,50 @@ static inline void list_move_tail(struct list *list, struct list *head) #define list_last_entry(ptr, type, member) \ list_entry((ptr)->prev, type, member) -#define list_for_each_entry_reverse(pos, head, member) \ +#define list_for_each_entry_reverse(pos, head, member) \ for (pos = __container_of((head)->prev, pos, member); \ &pos->member != (head); \ pos = __container_of(pos->member.prev, pos, member)) #endif +#define list_for_each_entry_safe_from(pos, tmp, head, member) \ + for (tmp = __container_of(pos->member.next, pos, member); \ + &pos->member != (head); \ + pos = tmp, tmp = __container_of(tmp->member.next, tmp, member)) + #undef container_of #define container_of(ptr, type, member) \ ((type *)((char *)(ptr) - (char *) &((type *)0)->member)) +static inline void __list_splice(const struct list *list, + struct list *prev, + struct list *next) +{ + struct list *first = list->next; + struct list *last = list->prev; + + first->prev = prev; + prev->next = first; + + last->next = next; + next->prev = last; +} + +static inline void list_splice(const struct list *list, + struct list *head) +{ + if (!list_is_empty(list)) + __list_splice(list, head, head->next); +} + +static inline void list_splice_tail(const struct list *list, + struct list *head) +{ + if (!list_is_empty(list)) + __list_splice(list, head->prev, head); +} + static inline int list_is_singular(const struct list *list) { return list->next == list->prev; diff --git a/src/intel_module.c b/src/intel_module.c index 102d52aa..2e97b5ea 100644 --- a/src/intel_module.c +++ b/src/intel_module.c @@ -126,6 +126,17 @@ static const struct intel_device_info intel_skylake_info = { .gen = 0110, }; +static const struct intel_device_info intel_broxton_info = { + .gen = 0111, +}; + +static const struct intel_device_info intel_kabylake_info = { + .gen = 0112, +}; + +static const struct intel_device_info intel_geminilake_info = { + .gen = 0113, +}; static const SymTabRec intel_chipsets[] = { {PCI_CHIP_I810, "i810"}, @@ -234,30 +245,63 @@ static const SymTabRec intel_chipsets[] = { {0x0157, "HD Graphics"}, /* Broadwell Marketing names */ - {0x1602, "HD graphics"}, - {0x1606, "HD graphics"}, - {0x160B, "HD graphics"}, - {0x160A, "HD graphics"}, - {0x160D, "HD graphics"}, - {0x160E, "HD graphics"}, - {0x1612, "HD graphics 5600"}, - {0x1616, "HD graphics 5500"}, - {0x161B, "HD graphics"}, - {0x161A, "HD graphics"}, - {0x161D, "HD graphics"}, - {0x161E, "HD graphics 5300"}, - {0x1622, "Iris Pro graphics 6200"}, - {0x1626, "HD graphics 6000"}, - {0x162B, "Iris graphics 6100"}, - {0x162A, "Iris Pro graphics P6300"}, - {0x162D, "HD graphics"}, - {0x162E, "HD graphics"}, - {0x1632, "HD graphics"}, - {0x1636, "HD graphics"}, - {0x163B, "HD graphics"}, - {0x163A, "HD graphics"}, - {0x163D, "HD graphics"}, - {0x163E, "HD graphics"}, + {0x1602, "HD Graphics"}, + {0x1606, "HD Graphics"}, + {0x160B, "HD Graphics"}, + {0x160A, "HD Graphics"}, + {0x160D, "HD Graphics"}, + {0x160E, "HD Graphics"}, + {0x1612, "HD Graphics 5600"}, + {0x1616, "HD Graphics 5500"}, + {0x161B, "HD Graphics"}, + {0x161A, "HD Graphics"}, + {0x161D, "HD Graphics"}, + {0x161E, "HD Graphics 5300"}, + {0x1622, "Iris Pro Graphics 6200"}, + {0x1626, "HD Graphics 6000"}, + {0x162B, "Iris Graphics 6100"}, + {0x162A, "Iris Pro Graphics P6300"}, + {0x162D, "HD Graphics"}, + {0x162E, "HD Graphics"}, + {0x1632, "HD Graphics"}, + {0x1636, "HD Graphics"}, + {0x163B, "HD Graphics"}, + {0x163A, "HD Graphics"}, + {0x163D, "HD Graphics"}, + {0x163E, "HD Graphics"}, + + /* Cherryview (Cherrytrail/Braswell) */ + {0x22b0, "HD Graphics"}, + {0x22b1, "HD Graphics"}, + {0x22b2, "HD Graphics"}, + {0x22b3, "HD Graphics"}, + + /* Skylake */ + {0x1902, "HD Graphics 510"}, + {0x1906, "HD Graphics 510"}, + {0x190B, "HD Graphics 510"}, + {0x1912, "HD Graphics 530"}, + {0x1916, "HD Graphics 520"}, + {0x191B, "HD Graphics 530"}, + {0x191D, "HD Graphics P530"}, + {0x191E, "HD Graphics 515"}, + {0x1921, "HD Graphics 520"}, + {0x1926, "Iris Graphics 540"}, + {0x1927, "Iris Graphics 550"}, + {0x192B, "Iris Graphics 555"}, + {0x192D, "Iris Graphics P555"}, + {0x1932, "Iris Pro Graphics 580"}, + {0x193A, "Iris Pro Graphics P580"}, + {0x193B, "Iris Pro Graphics 580"}, + {0x193D, "Iris Pro Graphics P580"}, + + /* Broxton (Apollolake) */ + {0x5A84, "HD Graphics 505"}, + {0x5A85, "HD Graphics 500"}, + + /* Kabylake */ + {0x5916, "HD Graphics 620"}, + {0x591E, "HD Graphics 615"}, /* When adding new identifiers, also update: * 1. intel_identify() @@ -305,18 +349,14 @@ static const struct pci_id_match intel_device_match[] = { INTEL_IVB_D_IDS(&intel_ivybridge_info), INTEL_IVB_M_IDS(&intel_ivybridge_info), - INTEL_HSW_D_IDS(&intel_haswell_info), - INTEL_HSW_M_IDS(&intel_haswell_info), - - INTEL_VLV_D_IDS(&intel_valleyview_info), - INTEL_VLV_M_IDS(&intel_valleyview_info), - - INTEL_BDW_D_IDS(&intel_broadwell_info), - INTEL_BDW_M_IDS(&intel_broadwell_info), - + INTEL_HSW_IDS(&intel_haswell_info), + INTEL_VLV_IDS(&intel_valleyview_info), + INTEL_BDW_IDS(&intel_broadwell_info), INTEL_CHV_IDS(&intel_cherryview_info), - INTEL_SKL_IDS(&intel_skylake_info), + INTEL_BXT_IDS(&intel_broxton_info), + INTEL_KBL_IDS(&intel_kabylake_info), + INTEL_GLK_IDS(&intel_geminilake_info), INTEL_VGA_DEVICE(PCI_MATCH_ANY, &intel_generic_info), #endif @@ -448,9 +488,9 @@ static void intel_identify(int flags) if (unique != stack) free(unique); - xf86Msg(X_INFO, INTEL_NAME ": Driver for Intel(R) HD Graphics: 2000-6000\n"); - xf86Msg(X_INFO, INTEL_NAME ": Driver for Intel(R) Iris(TM) Graphics: 5100, 6100\n"); - xf86Msg(X_INFO, INTEL_NAME ": Driver for Intel(R) Iris(TM) Pro Graphics: 5200, 6200, P6300\n"); + xf86Msg(X_INFO, INTEL_NAME ": Driver for Intel(R) HD Graphics\n"); + xf86Msg(X_INFO, INTEL_NAME ": Driver for Intel(R) Iris(TM) Graphics\n"); + xf86Msg(X_INFO, INTEL_NAME ": Driver for Intel(R) Iris(TM) Pro Graphics\n"); } static Bool intel_driver_func(ScrnInfoPtr pScrn, @@ -508,6 +548,9 @@ static enum accel_method { NOACCEL, SNA, UXA } get_accel_method(void) if (hosted()) return SNA; + if (xf86configptr == NULL) /* X -configure */ + return SNA; + dev = _xf86findDriver("intel", xf86configptr->conf_device_lst); if (dev && dev->dev_option_lst) { const char *s; @@ -582,10 +625,17 @@ intel_scrn_create(DriverPtr driver, case NOACCEL: #endif case UXA: - return intel_init_scrn(scrn); + return intel_init_scrn(scrn); #endif - default: break; + default: +#if USE_SNA + return sna_init_scrn(scrn, entity_num); +#elif USE_UXA + return intel_init_scrn(scrn); +#else + break; +#endif } #endif @@ -604,6 +654,8 @@ static Bool intel_pci_probe(DriverPtr driver, struct pci_device *pci, intptr_t match_data) { + Bool ret; + if (intel_open_device(entity_num, pci, NULL) == -1) { #if UMS switch (pci->device_id) { @@ -621,7 +673,11 @@ static Bool intel_pci_probe(DriverPtr driver, #endif } - return intel_scrn_create(driver, entity_num, match_data, 0); + ret = intel_scrn_create(driver, entity_num, match_data, 0); + if (!ret) + intel_close_device(entity_num); + + return ret; } #ifdef XSERVER_PLATFORM_BUS @@ -644,9 +700,16 @@ intel_platform_probe(DriverPtr driver, /* if we get any flags we don't understand fail to probe for now */ if (flags) - return FALSE; + goto err; + + if (!intel_scrn_create(driver, entity_num, match_data, scrn_flags)) + goto err; - return intel_scrn_create(driver, entity_num, match_data, scrn_flags); + return TRUE; + +err: + intel_close_device(entity_num); + return FALSE; } #endif diff --git a/src/intel_options.c b/src/intel_options.c index ff8541a4..7f253ac1 100644 --- a/src/intel_options.c +++ b/src/intel_options.c @@ -2,18 +2,24 @@ #include "config.h" #endif +#include +#include +#include + #include "intel_options.h" const OptionInfoRec intel_options[] = { - {OPTION_ACCEL_DISABLE, "NoAccel", OPTV_BOOLEAN, {0}, 0}, + {OPTION_ACCEL_ENABLE, "Accel", OPTV_BOOLEAN, {0}, 0}, {OPTION_ACCEL_METHOD, "AccelMethod", OPTV_STRING, {0}, 0}, {OPTION_BACKLIGHT, "Backlight", OPTV_STRING, {0}, 0}, + {OPTION_EDID, "CustomEDID", OPTV_STRING, {0}, 0}, {OPTION_DRI, "DRI", OPTV_STRING, {0}, 0}, {OPTION_PRESENT, "Present", OPTV_BOOLEAN, {0}, 1}, {OPTION_COLOR_KEY, "ColorKey", OPTV_INTEGER, {0}, 0}, {OPTION_VIDEO_KEY, "VideoKey", OPTV_INTEGER, {0}, 0}, {OPTION_TILING_2D, "Tiling", OPTV_BOOLEAN, {0}, 1}, {OPTION_TILING_FB, "LinearFramebuffer", OPTV_BOOLEAN, {0}, 0}, + {OPTION_ROTATION, "HWRotation", OPTV_BOOLEAN, {0}, 1}, {OPTION_VSYNC, "VSync", OPTV_BOOLEAN, {0}, 1}, {OPTION_PAGEFLIP, "PageFlip", OPTV_BOOLEAN, {0}, 1}, {OPTION_SWAPBUFFERS_WAIT, "SwapbuffersWait", OPTV_BOOLEAN, {0}, 1}, @@ -21,7 +27,6 @@ const OptionInfoRec intel_options[] = { {OPTION_PREFER_OVERLAY, "XvPreferOverlay", OPTV_BOOLEAN, {0}, 0}, {OPTION_HOTPLUG, "HotPlug", OPTV_BOOLEAN, {0}, 1}, {OPTION_REPROBE, "ReprobeOutputs", OPTV_BOOLEAN, {0}, 0}, - {OPTION_DELETE_DP12, "DeleteUnusedDP12Displays", OPTV_BOOLEAN, {0}, 0}, #ifdef INTEL_XVMC {OPTION_XVMC, "XvMC", OPTV_BOOLEAN, {0}, 1}, #endif @@ -54,3 +59,85 @@ OptionInfoPtr intel_options_get(ScrnInfoPtr scrn) return options; } + +Bool intel_option_cast_to_bool(OptionInfoPtr options, int id, Bool val) +{ +#if XORG_VERSION_CURRENT >= XORG_VERSION_NUMERIC(1,7,99,901,0) + xf86getBoolValue(&val, xf86GetOptValString(options, id)); +#endif + return val; +} + +static int +namecmp(const char *s1, const char *s2) +{ + char c1, c2; + + if (!s1 || *s1 == 0) { + if (!s2 || *s2 == 0) + return 0; + else + return 1; + } + + while (*s1 == '_' || *s1 == ' ' || *s1 == '\t') + s1++; + + while (*s2 == '_' || *s2 == ' ' || *s2 == '\t') + s2++; + + c1 = isupper(*s1) ? tolower(*s1) : *s1; + c2 = isupper(*s2) ? tolower(*s2) : *s2; + while (c1 == c2) { + if (c1 == '\0') + return 0; + + s1++; + while (*s1 == '_' || *s1 == ' ' || *s1 == '\t') + s1++; + + s2++; + while (*s2 == '_' || *s2 == ' ' || *s2 == '\t') + s2++; + + c1 = isupper(*s1) ? tolower(*s1) : *s1; + c2 = isupper(*s2) ? tolower(*s2) : *s2; + } + + return c1 - c2; +} + +unsigned intel_option_cast_to_unsigned(OptionInfoPtr options, int id, unsigned val) +{ +#if XORG_VERSION_CURRENT >= XORG_VERSION_NUMERIC(1,7,99,901,0) + const char *str = xf86GetOptValString(options, id); +#else + const char *str = NULL; +#endif + unsigned v; + + if (str == NULL || *str == '\0') + return val; + + if (namecmp(str, "on") == 0) + return val; + if (namecmp(str, "true") == 0) + return val; + if (namecmp(str, "yes") == 0) + return val; + + if (namecmp(str, "0") == 0) + return 0; + if (namecmp(str, "off") == 0) + return 0; + if (namecmp(str, "false") == 0) + return 0; + if (namecmp(str, "no") == 0) + return 0; + + v = atoi(str); + if (v) + return v; + + return val; +} diff --git a/src/intel_options.h b/src/intel_options.h index 7e2cbd9b..43635f1f 100644 --- a/src/intel_options.h +++ b/src/intel_options.h @@ -12,15 +12,17 @@ */ enum intel_options { - OPTION_ACCEL_DISABLE, + OPTION_ACCEL_ENABLE, OPTION_ACCEL_METHOD, OPTION_BACKLIGHT, + OPTION_EDID, OPTION_DRI, OPTION_PRESENT, OPTION_VIDEO_KEY, OPTION_COLOR_KEY, OPTION_TILING_2D, OPTION_TILING_FB, + OPTION_ROTATION, OPTION_VSYNC, OPTION_PAGEFLIP, OPTION_SWAPBUFFERS_WAIT, @@ -28,7 +30,6 @@ enum intel_options { OPTION_PREFER_OVERLAY, OPTION_HOTPLUG, OPTION_REPROBE, - OPTION_DELETE_DP12, #if defined(XvMCExtension) && defined(ENABLE_XVMC) OPTION_XVMC, #define INTEL_XVMC 1 @@ -51,5 +52,7 @@ enum intel_options { extern const OptionInfoRec intel_options[]; OptionInfoPtr intel_options_get(ScrnInfoPtr scrn); +unsigned intel_option_cast_to_unsigned(OptionInfoPtr, int id, unsigned val); +Bool intel_option_cast_to_bool(OptionInfoPtr, int id, Bool val); #endif /* INTEL_OPTIONS_H */ diff --git a/src/legacy/i810/i810_common.h b/src/legacy/i810/i810_common.h index 4cc10e8b..8355708c 100644 --- a/src/legacy/i810/i810_common.h +++ b/src/legacy/i810/i810_common.h @@ -52,7 +52,7 @@ #define ALIGN(i,m) (((i) + (m) - 1) & ~((m) - 1)) -/* Using usleep() makes things noticably slow. */ +/* Using usleep() makes things noticeably slow. */ #if 0 #define DELAY(x) usleep(x) #else @@ -185,7 +185,7 @@ enum { * - zbuffer linear offset and pitch -- also invarient * - drawing origin in back and depth buffers. * - * Keep the depth/back buffer state here to acommodate private buffers + * Keep the depth/back buffer state here to accommodate private buffers * in the future. */ #define I810_DESTREG_DI0 0 /* CMD_OP_DESTBUFFER_INFO (2 dwords) */ diff --git a/src/legacy/i810/i810_hwmc.c b/src/legacy/i810/i810_hwmc.c index 7cb9c1ab..58661b0a 100644 --- a/src/legacy/i810/i810_hwmc.c +++ b/src/legacy/i810/i810_hwmc.c @@ -171,7 +171,7 @@ static XF86MCAdaptorPtr ppAdapt[1] = * * I810InitMC * - * Initialize the hardware motion compenstation extention for this + * Initialize the hardware motion compensation extension for this * hardware. The initialization routines want the address of the pointers * to the structures, not the address of the structures. This means we * allocate (or create static?) the pointer memory and pass that diff --git a/src/legacy/i810/i810_memory.c b/src/legacy/i810/i810_memory.c index c3de2777..6f274836 100644 --- a/src/legacy/i810/i810_memory.c +++ b/src/legacy/i810/i810_memory.c @@ -76,7 +76,7 @@ I810AllocateGARTMemory(ScrnInfoPtr pScrn) unsigned long size = pScrn->videoRam * 1024UL; I810Ptr pI810 = I810PTR(pScrn); int key; - long tom = 0; + unsigned long tom = 0; unsigned long physical; if (!xf86AgpGARTSupported() || !xf86AcquireGART(pScrn->scrnIndex)) { @@ -132,8 +132,8 @@ I810AllocateGARTMemory(ScrnInfoPtr pScrn) * Keep it 512K aligned for the sake of tiled regions. */ - tom += 0x7ffff; - tom &= ~0x7ffff; + tom += 0x7ffffUL; + tom &= ~0x7ffffUL; if ((key = xf86AllocateGARTMemory(pScrn->scrnIndex, size, 1, NULL)) != -1) { pI810->DcacheOffset = tom; diff --git a/src/legacy/i810/i810_reg.h b/src/legacy/i810/i810_reg.h index 54faeb3d..fa091c5b 100644 --- a/src/legacy/i810/i810_reg.h +++ b/src/legacy/i810/i810_reg.h @@ -245,7 +245,7 @@ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * not sure they refer to local (graphics) memory. * * These details are for the local memory control registers, - * (pp301-310). The test machines are not equiped with local memory, + * (pp301-310). The test machines are not equipped with local memory, * so nothing is tested. Only a single row seems to be supported. */ #define DRAM_ROW_TYPE 0x3000 diff --git a/src/legacy/i810/i810_video.c b/src/legacy/i810/i810_video.c index be49b91d..af683c81 100644 --- a/src/legacy/i810/i810_video.c +++ b/src/legacy/i810/i810_video.c @@ -77,7 +77,11 @@ static int I810PutImage( ScrnInfoPtr, static int I810QueryImageAttributes(ScrnInfoPtr, int, unsigned short *, unsigned short *, int *, int *); +#if !HAVE_NOTIFY_FD static void I810BlockHandler(BLOCKHANDLER_ARGS_DECL); +#else +static void I810BlockHandler(void *data, void *_timeout); +#endif #define MAKE_ATOM(a) MakeAtom(a, sizeof(a) - 1, TRUE) @@ -418,8 +422,14 @@ I810SetupImageVideo(ScreenPtr screen) pI810->adaptor = adapt; +#if !HAVE_NOTIFY_FD pI810->BlockHandler = screen->BlockHandler; screen->BlockHandler = I810BlockHandler; +#else + RegisterBlockAndWakeupHandlers(I810BlockHandler, + (ServerWakeupHandlerProcPtr)NoopDDA, + pScrn); +#endif xvBrightness = MAKE_ATOM("XV_BRIGHTNESS"); xvContrast = MAKE_ATOM("XV_CONTRAST"); @@ -1135,6 +1145,7 @@ I810QueryImageAttributes( return size; } +#if !HAVE_NOTIFY_FD static void I810BlockHandler (BLOCKHANDLER_ARGS_DECL) { @@ -1172,6 +1183,38 @@ I810BlockHandler (BLOCKHANDLER_ARGS_DECL) } } } +#else +static void +I810BlockHandler(void *data, void *_timeout) +{ + ScrnInfoPtr pScrn = data; + I810Ptr pI810 = I810PTR(pScrn); + I810PortPrivPtr pPriv = GET_PORT_PRIVATE(pScrn); + I810OverlayRegPtr overlay = (I810OverlayRegPtr) (pI810->FbBase + pI810->OverlayStart); + + if(pPriv->videoStatus & TIMER_MASK) { + UpdateCurrentTime(); + if(pPriv->videoStatus & OFF_TIMER) { + if(pPriv->offTime < currentTime.milliseconds) { + /* Turn off the overlay */ + overlay->OV0CMD &= 0xFFFFFFFE; + OVERLAY_UPDATE(pI810->OverlayPhysical); + + pPriv->videoStatus = FREE_TIMER; + pPriv->freeTime = currentTime.milliseconds + FREE_DELAY; + } + } else { /* FREE_TIMER */ + if(pPriv->freeTime < currentTime.milliseconds) { + if(pPriv->linear) { + xf86FreeOffscreenLinear(pPriv->linear); + pPriv->linear = NULL; + } + pPriv->videoStatus = 0; + } + } + } +} +#endif /*************************************************************************** @@ -1373,7 +1416,6 @@ I810DisplaySurface( UpdateCurrentTime(); pI810Priv->videoStatus = FREE_TIMER; pI810Priv->freeTime = currentTime.milliseconds + FREE_DELAY; - pScrn->pScreen->BlockHandler = I810BlockHandler; } return Success; diff --git a/src/legacy/i810/xvmc/I810XvMC.c b/src/legacy/i810/xvmc/I810XvMC.c index e6b63d30..a538e999 100644 --- a/src/legacy/i810/xvmc/I810XvMC.c +++ b/src/legacy/i810/xvmc/I810XvMC.c @@ -61,7 +61,7 @@ static int event_base; // Arguments: pI810XvMC private data structure from the current context. // Notes: We faked the drmMapBufs for the i810's security so now we have // to insert an allocated page into the correct spot in the faked -// list to keep up appearences. +// list to keep up appearances. // Concept for this function was taken from Mesa sources. // Returns: drmBufPtr containing the information about the allocated page. ***************************************************************************/ @@ -188,7 +188,7 @@ _X_EXPORT Status XvMCCreateContext(Display *display, XvPortID port, /* Check for drm */ if(! drmAvailable()) { - printf("Direct Rendering is not avilable on this system!\n"); + printf("Direct Rendering is not available on this system!\n"); return BadAlloc; } @@ -3279,7 +3279,7 @@ _X_EXPORT Status XvMCSyncSurface(Display *display,XvMCSurface *surface) { // display - Connection to X server // surface - Surface to flush // Info: -// This command is a noop for i810 becuase we always dispatch buffers in +// This command is a noop for i810 because we always dispatch buffers in // render. There is little gain to be had with 4k buffers. // Returns: Status ***************************************************************************/ diff --git a/src/render_program/exa_wm.g4i b/src/render_program/exa_wm.g4i index 5d3d45b1..587b581c 100644 --- a/src/render_program/exa_wm.g4i +++ b/src/render_program/exa_wm.g4i @@ -57,7 +57,7 @@ define(`mask_dw_dy', `g6.4<0,1,0>F') define(`mask_wo', `g6.12<0,1,0>F') /* - * Local variables. Pairs must be aligned on even reg boundry + * Local variables. Pairs must be aligned on even reg boundary */ /* this holds the X dest coordinates */ diff --git a/src/render_program/exa_wm_yuv_rgb.g8a b/src/render_program/exa_wm_yuv_rgb.g8a index 7def0930..34973ba8 100644 --- a/src/render_program/exa_wm_yuv_rgb.g8a +++ b/src/render_program/exa_wm_yuv_rgb.g8a @@ -76,7 +76,7 @@ add (16) Cbn<1>F Cb<8,8,1>F -0.501961F { compr align1 }; /* * R = Y + Cr * 1.596 */ -mov (8) acc0<1>F Yn<8,8,1>F { compr align1 }; +mov (8) acc0<1>F Yn_01<8,8,1>F { compr align1 }; mac.sat(8) src_sample_r_01<1>F Crn_01<8,8,1>F 1.596F { compr align1 }; mov (8) acc0<1>F Yn_23<8,8,1>F { compr align1 }; @@ -84,7 +84,7 @@ mac.sat(8) src_sample_r_23<1>F Crn_23<8,8,1>F 1.596F { compr align1 }; /* * G = Crn * -0.813 + Cbn * -0.392 + Y */ -mov (8) acc0<1>F Yn_23<8,8,1>F { compr align1 }; +mov (8) acc0<1>F Yn_01<8,8,1>F { compr align1 }; mac (8) acc0<1>F Crn_01<8,8,1>F -0.813F { compr align1 }; mac.sat(8) src_sample_g_01<1>F Cbn_01<8,8,1>F -0.392F { compr align1 }; diff --git a/src/render_program/exa_wm_yuv_rgb.g8b b/src/render_program/exa_wm_yuv_rgb.g8b index 44949538..2cd6fc44 100644 --- a/src/render_program/exa_wm_yuv_rgb.g8b +++ b/src/render_program/exa_wm_yuv_rgb.g8b @@ -6,7 +6,7 @@ { 0x80600048, 0x21c03ae8, 0x3e8d02c0, 0x3fcc49ba }, { 0x00600001, 0x24003ae0, 0x008d0320, 0x00000000 }, { 0x80600048, 0x21e03ae8, 0x3e8d02e0, 0x3fcc49ba }, - { 0x00600001, 0x24003ae0, 0x008d0320, 0x00000000 }, + { 0x00600001, 0x24003ae0, 0x008d0300, 0x00000000 }, { 0x00600048, 0x24003ae0, 0x3e8d02c0, 0xbf5020c5 }, { 0x80600048, 0x22003ae8, 0x3e8d0340, 0xbec8b439 }, { 0x00600001, 0x24003ae0, 0x008d0320, 0x00000000 }, diff --git a/src/sna/Makefile.am b/src/sna/Makefile.am index e09a8d49..adf13963 100644 --- a/src/sna/Makefile.am +++ b/src/sna/Makefile.am @@ -107,6 +107,8 @@ libsna_la_SOURCES = \ gen8_render.h \ gen8_vertex.c \ gen8_vertex.h \ + gen9_render.c \ + gen9_render.h \ xassert.h \ $(NULL) diff --git a/src/sna/blt.c b/src/sna/blt.c index b5bfee69..cb90437a 100644 --- a/src/sna/blt.c +++ b/src/sna/blt.c @@ -30,112 +30,608 @@ #endif #include "sna.h" +#include -#if __x86_64__ -#define USE_SSE2 1 -#endif - -#if USE_SSE2 +#if defined(sse2) +#pragma GCC push_options +#pragma GCC target("sse2,inline-all-stringops,fpmath=sse") +#pragma GCC optimize("Ofast") #include #if __x86_64__ #define have_sse2() 1 #else -enum { - MMX = 0x1, - MMX_EXTENSIONS = 0x2, - SSE = 0x6, - SSE2 = 0x8, - CMOV = 0x10 -}; - -#ifdef __GNUC__ -static unsigned int -detect_cpu_features(void) -{ - unsigned int features; - unsigned int result = 0; - - char vendor[13]; - vendor[0] = 0; - vendor[12] = 0; - - asm ( - "pushf\n" - "pop %%eax\n" - "mov %%eax, %%ecx\n" - "xor $0x00200000, %%eax\n" - "push %%eax\n" - "popf\n" - "pushf\n" - "pop %%eax\n" - "mov $0x0, %%edx\n" - "xor %%ecx, %%eax\n" - "jz 1f\n" - - "mov $0x00000000, %%eax\n" - "push %%ebx\n" - "cpuid\n" - "mov %%ebx, %%eax\n" - "pop %%ebx\n" - "mov %%eax, %1\n" - "mov %%edx, %2\n" - "mov %%ecx, %3\n" - "mov $0x00000001, %%eax\n" - "push %%ebx\n" - "cpuid\n" - "pop %%ebx\n" - "1:\n" - "mov %%edx, %0\n" - : "=r" (result), "=m" (vendor[0]), "=m" (vendor[4]), "=m" (vendor[8]) - :: "%eax", "%ecx", "%edx"); - - features = 0; - if (result) { - /* result now contains the standard feature bits */ - if (result & (1 << 15)) - features |= CMOV; - if (result & (1 << 23)) - features |= MMX; - if (result & (1 << 25)) - features |= SSE; - if (result & (1 << 26)) - features |= SSE2; - } - return features; -} -#else -static unsigned int detect_cpu_features(void) { return 0; } -#endif - static bool have_sse2(void) { static int sse2_present = -1; if (sse2_present == -1) - sse2_present = detect_cpu_features() & SSE2; + sse2_present = sna_cpu_detect() & SSE2; return sse2_present; } #endif -static inline __m128i +static force_inline __m128i xmm_create_mask_32(uint32_t mask) { return _mm_set_epi32(mask, mask, mask, mask); } -static inline __m128i +static force_inline __m128i +xmm_load_128(const __m128i *src) +{ + return _mm_load_si128(src); +} + +static force_inline __m128i xmm_load_128u(const __m128i *src) { return _mm_loadu_si128(src); } -static inline void +static force_inline void xmm_save_128(__m128i *dst, __m128i data) { _mm_store_si128(dst, data); } + +static force_inline void +xmm_save_128u(__m128i *dst, __m128i data) +{ + _mm_storeu_si128(dst, data); +} + +static force_inline void +to_sse128xN(uint8_t *dst, const uint8_t *src, int bytes) +{ + int i; + + for (i = 0; i < bytes / 128; i++) { + __m128i xmm0, xmm1, xmm2, xmm3; + __m128i xmm4, xmm5, xmm6, xmm7; + + xmm0 = xmm_load_128u((const __m128i*)src + 0); + xmm1 = xmm_load_128u((const __m128i*)src + 1); + xmm2 = xmm_load_128u((const __m128i*)src + 2); + xmm3 = xmm_load_128u((const __m128i*)src + 3); + xmm4 = xmm_load_128u((const __m128i*)src + 4); + xmm5 = xmm_load_128u((const __m128i*)src + 5); + xmm6 = xmm_load_128u((const __m128i*)src + 6); + xmm7 = xmm_load_128u((const __m128i*)src + 7); + + xmm_save_128((__m128i*)dst + 0, xmm0); + xmm_save_128((__m128i*)dst + 1, xmm1); + xmm_save_128((__m128i*)dst + 2, xmm2); + xmm_save_128((__m128i*)dst + 3, xmm3); + xmm_save_128((__m128i*)dst + 4, xmm4); + xmm_save_128((__m128i*)dst + 5, xmm5); + xmm_save_128((__m128i*)dst + 6, xmm6); + xmm_save_128((__m128i*)dst + 7, xmm7); + + dst += 128; + src += 128; + } +} + +static force_inline void +to_sse64(uint8_t *dst, const uint8_t *src) +{ + __m128i xmm1, xmm2, xmm3, xmm4; + + xmm1 = xmm_load_128u((const __m128i*)src + 0); + xmm2 = xmm_load_128u((const __m128i*)src + 1); + xmm3 = xmm_load_128u((const __m128i*)src + 2); + xmm4 = xmm_load_128u((const __m128i*)src + 3); + + xmm_save_128((__m128i*)dst + 0, xmm1); + xmm_save_128((__m128i*)dst + 1, xmm2); + xmm_save_128((__m128i*)dst + 2, xmm3); + xmm_save_128((__m128i*)dst + 3, xmm4); +} + +static force_inline void +to_sse32(uint8_t *dst, const uint8_t *src) +{ + __m128i xmm1, xmm2; + + xmm1 = xmm_load_128u((const __m128i*)src + 0); + xmm2 = xmm_load_128u((const __m128i*)src + 1); + + xmm_save_128((__m128i*)dst + 0, xmm1); + xmm_save_128((__m128i*)dst + 1, xmm2); +} + +static force_inline void +to_sse16(uint8_t *dst, const uint8_t *src) +{ + xmm_save_128((__m128i*)dst, xmm_load_128u((const __m128i*)src)); +} + +static void to_memcpy(uint8_t *dst, const uint8_t *src, unsigned len) +{ + assert(len); + if ((uintptr_t)dst & 15) { + if (len <= 16 - ((uintptr_t)dst & 15)) { + memcpy(dst, src, len); + return; + } + + if ((uintptr_t)dst & 1) { + assert(len >= 1); + *dst++ = *src++; + len--; + } + if ((uintptr_t)dst & 2) { + assert(((uintptr_t)dst & 1) == 0); + assert(len >= 2); + *(uint16_t *)dst = *(const uint16_t *)src; + dst += 2; + src += 2; + len -= 2; + } + if ((uintptr_t)dst & 4) { + assert(((uintptr_t)dst & 3) == 0); + assert(len >= 4); + *(uint32_t *)dst = *(const uint32_t *)src; + dst += 4; + src += 4; + len -= 4; + } + if ((uintptr_t)dst & 8) { + assert(((uintptr_t)dst & 7) == 0); + assert(len >= 8); + *(uint64_t *)dst = *(const uint64_t *)src; + dst += 8; + src += 8; + len -= 8; + } + } + + assert(((uintptr_t)dst & 15) == 0); + while (len >= 64) { + to_sse64(dst, src); + dst += 64; + src += 64; + len -= 64; + } + if (len == 0) + return; + + if (len & 32) { + to_sse32(dst, src); + dst += 32; + src += 32; + } + if (len & 16) { + to_sse16(dst, src); + dst += 16; + src += 16; + } + if (len & 8) { + *(uint64_t *)dst = *(uint64_t *)src; + dst += 8; + src += 8; + } + if (len & 4) { + *(uint32_t *)dst = *(uint32_t *)src; + dst += 4; + src += 4; + } + memcpy(dst, src, len & 3); +} + +static void +memcpy_to_tiled_x__swizzle_0__sse2(const void *src, void *dst, int bpp, + int32_t src_stride, int32_t dst_stride, + int16_t src_x, int16_t src_y, + int16_t dst_x, int16_t dst_y, + uint16_t width, uint16_t height) +{ + const unsigned tile_width = 512; + const unsigned tile_height = 8; + const unsigned tile_size = 4096; + + const unsigned cpp = bpp / 8; + const unsigned tile_pixels = tile_width / cpp; + const unsigned tile_shift = ffs(tile_pixels) - 1; + const unsigned tile_mask = tile_pixels - 1; + + unsigned offset_x, length_x; + + DBG(("%s(bpp=%d): src=(%d, %d), dst=(%d, %d), size=%dx%d, pitch=%d/%d\n", + __FUNCTION__, bpp, src_x, src_y, dst_x, dst_y, width, height, src_stride, dst_stride)); + assert(src != dst); + + if (src_x | src_y) + src = (const uint8_t *)src + src_y * src_stride + src_x * cpp; + width *= cpp; + assert(src_stride >= width); + + if (dst_x & tile_mask) { + offset_x = (dst_x & tile_mask) * cpp; + length_x = min(tile_width - offset_x, width); + } else + length_x = 0; + dst = (uint8_t *)dst + (dst_x >> tile_shift) * tile_size; + + while (height--) { + unsigned w = width; + const uint8_t *src_row = src; + uint8_t *tile_row = dst; + + src = (const uint8_t *)src + src_stride; + + tile_row += dst_y / tile_height * dst_stride * tile_height; + tile_row += (dst_y & (tile_height-1)) * tile_width; + dst_y++; + + if (length_x) { + to_memcpy(tile_row + offset_x, src_row, length_x); + + tile_row += tile_size; + src_row = (const uint8_t *)src_row + length_x; + w -= length_x; + } + while (w >= tile_width) { + assert(((uintptr_t)tile_row & (tile_width - 1)) == 0); + to_sse128xN(assume_aligned(tile_row, tile_width), + src_row, tile_width); + tile_row += tile_size; + src_row = (const uint8_t *)src_row + tile_width; + w -= tile_width; + } + if (w) { + assert(((uintptr_t)tile_row & (tile_width - 1)) == 0); + to_memcpy(assume_aligned(tile_row, tile_width), + src_row, w); + } + } +} + +static force_inline void +from_sse128xNu(uint8_t *dst, const uint8_t *src, int bytes) +{ + int i; + + assert(((uintptr_t)src & 15) == 0); + + for (i = 0; i < bytes / 128; i++) { + __m128i xmm0, xmm1, xmm2, xmm3; + __m128i xmm4, xmm5, xmm6, xmm7; + + xmm0 = xmm_load_128((const __m128i*)src + 0); + xmm1 = xmm_load_128((const __m128i*)src + 1); + xmm2 = xmm_load_128((const __m128i*)src + 2); + xmm3 = xmm_load_128((const __m128i*)src + 3); + xmm4 = xmm_load_128((const __m128i*)src + 4); + xmm5 = xmm_load_128((const __m128i*)src + 5); + xmm6 = xmm_load_128((const __m128i*)src + 6); + xmm7 = xmm_load_128((const __m128i*)src + 7); + + xmm_save_128u((__m128i*)dst + 0, xmm0); + xmm_save_128u((__m128i*)dst + 1, xmm1); + xmm_save_128u((__m128i*)dst + 2, xmm2); + xmm_save_128u((__m128i*)dst + 3, xmm3); + xmm_save_128u((__m128i*)dst + 4, xmm4); + xmm_save_128u((__m128i*)dst + 5, xmm5); + xmm_save_128u((__m128i*)dst + 6, xmm6); + xmm_save_128u((__m128i*)dst + 7, xmm7); + + dst += 128; + src += 128; + } +} + +static force_inline void +from_sse128xNa(uint8_t *dst, const uint8_t *src, int bytes) +{ + int i; + + assert(((uintptr_t)dst & 15) == 0); + assert(((uintptr_t)src & 15) == 0); + + for (i = 0; i < bytes / 128; i++) { + __m128i xmm0, xmm1, xmm2, xmm3; + __m128i xmm4, xmm5, xmm6, xmm7; + + xmm0 = xmm_load_128((const __m128i*)src + 0); + xmm1 = xmm_load_128((const __m128i*)src + 1); + xmm2 = xmm_load_128((const __m128i*)src + 2); + xmm3 = xmm_load_128((const __m128i*)src + 3); + xmm4 = xmm_load_128((const __m128i*)src + 4); + xmm5 = xmm_load_128((const __m128i*)src + 5); + xmm6 = xmm_load_128((const __m128i*)src + 6); + xmm7 = xmm_load_128((const __m128i*)src + 7); + + xmm_save_128((__m128i*)dst + 0, xmm0); + xmm_save_128((__m128i*)dst + 1, xmm1); + xmm_save_128((__m128i*)dst + 2, xmm2); + xmm_save_128((__m128i*)dst + 3, xmm3); + xmm_save_128((__m128i*)dst + 4, xmm4); + xmm_save_128((__m128i*)dst + 5, xmm5); + xmm_save_128((__m128i*)dst + 6, xmm6); + xmm_save_128((__m128i*)dst + 7, xmm7); + + dst += 128; + src += 128; + } +} + +static force_inline void +from_sse64u(uint8_t *dst, const uint8_t *src) +{ + __m128i xmm1, xmm2, xmm3, xmm4; + + assert(((uintptr_t)src & 15) == 0); + + xmm1 = xmm_load_128((const __m128i*)src + 0); + xmm2 = xmm_load_128((const __m128i*)src + 1); + xmm3 = xmm_load_128((const __m128i*)src + 2); + xmm4 = xmm_load_128((const __m128i*)src + 3); + + xmm_save_128u((__m128i*)dst + 0, xmm1); + xmm_save_128u((__m128i*)dst + 1, xmm2); + xmm_save_128u((__m128i*)dst + 2, xmm3); + xmm_save_128u((__m128i*)dst + 3, xmm4); +} + +static force_inline void +from_sse64a(uint8_t *dst, const uint8_t *src) +{ + __m128i xmm1, xmm2, xmm3, xmm4; + + assert(((uintptr_t)dst & 15) == 0); + assert(((uintptr_t)src & 15) == 0); + + xmm1 = xmm_load_128((const __m128i*)src + 0); + xmm2 = xmm_load_128((const __m128i*)src + 1); + xmm3 = xmm_load_128((const __m128i*)src + 2); + xmm4 = xmm_load_128((const __m128i*)src + 3); + + xmm_save_128((__m128i*)dst + 0, xmm1); + xmm_save_128((__m128i*)dst + 1, xmm2); + xmm_save_128((__m128i*)dst + 2, xmm3); + xmm_save_128((__m128i*)dst + 3, xmm4); +} + +static force_inline void +from_sse32u(uint8_t *dst, const uint8_t *src) +{ + __m128i xmm1, xmm2; + + xmm1 = xmm_load_128((const __m128i*)src + 0); + xmm2 = xmm_load_128((const __m128i*)src + 1); + + xmm_save_128u((__m128i*)dst + 0, xmm1); + xmm_save_128u((__m128i*)dst + 1, xmm2); +} + +static force_inline void +from_sse32a(uint8_t *dst, const uint8_t *src) +{ + __m128i xmm1, xmm2; + + assert(((uintptr_t)dst & 15) == 0); + assert(((uintptr_t)src & 15) == 0); + + xmm1 = xmm_load_128((const __m128i*)src + 0); + xmm2 = xmm_load_128((const __m128i*)src + 1); + + xmm_save_128((__m128i*)dst + 0, xmm1); + xmm_save_128((__m128i*)dst + 1, xmm2); +} + +static force_inline void +from_sse16u(uint8_t *dst, const uint8_t *src) +{ + assert(((uintptr_t)src & 15) == 0); + + xmm_save_128u((__m128i*)dst, xmm_load_128((const __m128i*)src)); +} + +static force_inline void +from_sse16a(uint8_t *dst, const uint8_t *src) +{ + assert(((uintptr_t)dst & 15) == 0); + assert(((uintptr_t)src & 15) == 0); + + xmm_save_128((__m128i*)dst, xmm_load_128((const __m128i*)src)); +} + +static void +memcpy_from_tiled_x__swizzle_0__sse2(const void *src, void *dst, int bpp, + int32_t src_stride, int32_t dst_stride, + int16_t src_x, int16_t src_y, + int16_t dst_x, int16_t dst_y, + uint16_t width, uint16_t height) +{ + const unsigned tile_width = 512; + const unsigned tile_height = 8; + const unsigned tile_size = 4096; + + const unsigned cpp = bpp / 8; + const unsigned tile_pixels = tile_width / cpp; + const unsigned tile_shift = ffs(tile_pixels) - 1; + const unsigned tile_mask = tile_pixels - 1; + + unsigned length_x, offset_x; + + DBG(("%s(bpp=%d): src=(%d, %d), dst=(%d, %d), size=%dx%d, pitch=%d/%d\n", + __FUNCTION__, bpp, src_x, src_y, dst_x, dst_y, width, height, src_stride, dst_stride)); + assert(src != dst); + + if (dst_x | dst_y) + dst = (uint8_t *)dst + dst_y * dst_stride + dst_x * cpp; + width *= cpp; + assert(dst_stride >= width); + if (src_x & tile_mask) { + offset_x = (src_x & tile_mask) * cpp; + length_x = min(tile_width - offset_x, width); + dst_stride -= width; + dst_stride += (width - length_x) & 15; + } else { + offset_x = 0; + dst_stride -= width & ~15; + } + assert(dst_stride >= 0); + src = (const uint8_t *)src + (src_x >> tile_shift) * tile_size; + + while (height--) { + unsigned w = width; + const uint8_t *tile_row = src; + + tile_row += src_y / tile_height * src_stride * tile_height; + tile_row += (src_y & (tile_height-1)) * tile_width; + src_y++; + + if (offset_x) { + memcpy(dst, tile_row + offset_x, length_x); + tile_row += tile_size; + dst = (uint8_t *)dst + length_x; + w -= length_x; + } + + if ((uintptr_t)dst & 15) { + while (w >= tile_width) { + from_sse128xNu(dst, + assume_aligned(tile_row, tile_width), + tile_width); + tile_row += tile_size; + dst = (uint8_t *)dst + tile_width; + w -= tile_width; + } + while (w >= 64) { + from_sse64u(dst, tile_row); + tile_row += 64; + dst = (uint8_t *)dst + 64; + w -= 64; + } + if (w & 32) { + from_sse32u(dst, tile_row); + tile_row += 32; + dst = (uint8_t *)dst + 32; + } + if (w & 16) { + from_sse16u(dst, tile_row); + tile_row += 16; + dst = (uint8_t *)dst + 16; + } + memcpy(dst, assume_aligned(tile_row, 16), w & 15); + } else { + while (w >= tile_width) { + from_sse128xNa(assume_aligned(dst, 16), + assume_aligned(tile_row, tile_width), + tile_width); + tile_row += tile_size; + dst = (uint8_t *)dst + tile_width; + w -= tile_width; + } + while (w >= 64) { + from_sse64a(dst, tile_row); + tile_row += 64; + dst = (uint8_t *)dst + 64; + w -= 64; + } + if (w & 32) { + from_sse32a(dst, tile_row); + tile_row += 32; + dst = (uint8_t *)dst + 32; + } + if (w & 16) { + from_sse16a(dst, tile_row); + tile_row += 16; + dst = (uint8_t *)dst + 16; + } + memcpy(assume_aligned(dst, 16), + assume_aligned(tile_row, 16), + w & 15); + } + dst = (uint8_t *)dst + dst_stride; + } +} + +static void +memcpy_between_tiled_x__swizzle_0__sse2(const void *src, void *dst, int bpp, + int32_t src_stride, int32_t dst_stride, + int16_t src_x, int16_t src_y, + int16_t dst_x, int16_t dst_y, + uint16_t width, uint16_t height) +{ + const unsigned tile_width = 512; + const unsigned tile_height = 8; + const unsigned tile_size = 4096; + + const unsigned cpp = bpp / 8; + const unsigned tile_pixels = tile_width / cpp; + const unsigned tile_shift = ffs(tile_pixels) - 1; + const unsigned tile_mask = tile_pixels - 1; + + unsigned ox, lx; + + DBG(("%s(bpp=%d): src=(%d, %d), dst=(%d, %d), size=%dx%d, pitch=%d/%d\n", + __FUNCTION__, bpp, src_x, src_y, dst_x, dst_y, width, height, src_stride, dst_stride)); + assert(src != dst); + + width *= cpp; + dst_stride *= tile_height; + src_stride *= tile_height; + + assert((dst_x & tile_mask) == (src_x & tile_mask)); + if (dst_x & tile_mask) { + ox = (dst_x & tile_mask) * cpp; + lx = min(tile_width - ox, width); + assert(lx != 0); + } else + lx = 0; + + if (dst_x) + dst = (uint8_t *)dst + (dst_x >> tile_shift) * tile_size; + if (src_x) + src = (const uint8_t *)src + (src_x >> tile_shift) * tile_size; + + while (height--) { + const uint8_t *src_row; + uint8_t *dst_row; + unsigned w = width; + + dst_row = dst; + dst_row += dst_y / tile_height * dst_stride; + dst_row += (dst_y & (tile_height-1)) * tile_width; + dst_y++; + + src_row = src; + src_row += src_y / tile_height * src_stride; + src_row += (src_y & (tile_height-1)) * tile_width; + src_y++; + + if (lx) { + to_memcpy(dst_row + ox, src_row + ox, lx); + dst_row += tile_size; + src_row += tile_size; + w -= lx; + } + while (w >= tile_width) { + assert(((uintptr_t)dst_row & (tile_width - 1)) == 0); + assert(((uintptr_t)src_row & (tile_width - 1)) == 0); + to_sse128xN(assume_aligned(dst_row, tile_width), + assume_aligned(src_row, tile_width), + tile_width); + dst_row += tile_size; + src_row += tile_size; + w -= tile_width; + } + if (w) { + assert(((uintptr_t)dst_row & (tile_width - 1)) == 0); + assert(((uintptr_t)src_row & (tile_width - 1)) == 0); + to_memcpy(assume_aligned(dst_row, tile_width), + assume_aligned(src_row, tile_width), + w); + } + } +} + +#pragma GCC push_options #endif fast void @@ -257,7 +753,8 @@ memcpy_to_tiled_x__swizzle_0(const void *src, void *dst, int bpp, if (dst_x & tile_mask) { const unsigned x = (dst_x & tile_mask) * cpp; const unsigned len = min(tile_width - x, w); - memcpy(tile_row + x, src, len); + memcpy(assume_misaligned(tile_row + x, tile_width, x), + src, len); tile_row += tile_size; src = (const uint8_t *)src + len; @@ -265,13 +762,13 @@ memcpy_to_tiled_x__swizzle_0(const void *src, void *dst, int bpp, } } while (w >= tile_width) { - memcpy(tile_row, src, tile_width); - + memcpy(assume_aligned(tile_row, tile_width), + src, tile_width); tile_row += tile_size; src = (const uint8_t *)src + tile_width; w -= tile_width; } - memcpy(tile_row, src, w); + memcpy(assume_aligned(tile_row, tile_width), src, w); src = (const uint8_t *)src + src_stride + w; dst_y++; } @@ -313,7 +810,7 @@ memcpy_from_tiled_x__swizzle_0(const void *src, void *dst, int bpp, if (src_x & tile_mask) { const unsigned x = (src_x & tile_mask) * cpp; const unsigned len = min(tile_width - x, w); - memcpy(dst, tile_row + x, len); + memcpy(dst, assume_misaligned(tile_row + x, tile_width, x), len); tile_row += tile_size; dst = (uint8_t *)dst + len; @@ -321,440 +818,371 @@ memcpy_from_tiled_x__swizzle_0(const void *src, void *dst, int bpp, } } while (w >= tile_width) { - memcpy(dst, tile_row, tile_width); + memcpy(dst, + assume_aligned(tile_row, tile_width), + tile_width); tile_row += tile_size; dst = (uint8_t *)dst + tile_width; w -= tile_width; } - memcpy(dst, tile_row, w); + memcpy(dst, assume_aligned(tile_row, tile_width), w); dst = (uint8_t *)dst + dst_stride + w; src_y++; } } -fast_memcpy static void -memcpy_to_tiled_x__swizzle_9(const void *src, void *dst, int bpp, - int32_t src_stride, int32_t dst_stride, - int16_t src_x, int16_t src_y, - int16_t dst_x, int16_t dst_y, - uint16_t width, uint16_t height) +static fast_memcpy void +memcpy_between_tiled_x__swizzle_0(const void *src, void *dst, int bpp, + int32_t src_stride, int32_t dst_stride, + int16_t src_x, int16_t src_y, + int16_t dst_x, int16_t dst_y, + uint16_t width, uint16_t height) { const unsigned tile_width = 512; const unsigned tile_height = 8; const unsigned tile_size = 4096; const unsigned cpp = bpp / 8; - const unsigned stride_tiles = dst_stride / tile_width; - const unsigned swizzle_pixels = 64 / cpp; - const unsigned tile_pixels = ffs(tile_width / cpp) - 1; - const unsigned tile_mask = (1 << tile_pixels) - 1; - - unsigned x, y; + const unsigned tile_pixels = tile_width / cpp; + const unsigned tile_shift = ffs(tile_pixels) - 1; + const unsigned tile_mask = tile_pixels - 1; DBG(("%s(bpp=%d): src=(%d, %d), dst=(%d, %d), size=%dx%d, pitch=%d/%d\n", __FUNCTION__, bpp, src_x, src_y, dst_x, dst_y, width, height, src_stride, dst_stride)); + assert(src != dst); + assert((dst_x & tile_mask) == (src_x & tile_mask)); - src = (const uint8_t *)src + src_y * src_stride + src_x * cpp; - - for (y = 0; y < height; ++y) { - const uint32_t dy = y + dst_y; - const uint32_t tile_row = - (dy / tile_height * stride_tiles * tile_size + - (dy & (tile_height-1)) * tile_width); - const uint8_t *src_row = (const uint8_t *)src + src_stride * y; - uint32_t dx = dst_x, offset; - - x = width * cpp; - if (dx & (swizzle_pixels - 1)) { - const uint32_t swizzle_bound_pixels = ALIGN(dx + 1, swizzle_pixels); - const uint32_t length = min(dst_x + width, swizzle_bound_pixels) - dx; - offset = tile_row + - (dx >> tile_pixels) * tile_size + - (dx & tile_mask) * cpp; - offset ^= (offset >> 3) & 64; - - memcpy((char *)dst + offset, src_row, length * cpp); - - src_row += length * cpp; - x -= length * cpp; - dx += length; - } - while (x >= 64) { - offset = tile_row + - (dx >> tile_pixels) * tile_size + - (dx & tile_mask) * cpp; - offset ^= (offset >> 3) & 64; - - memcpy((char *)dst + offset, src_row, 64); - - src_row += 64; - x -= 64; - dx += swizzle_pixels; - } - if (x) { - offset = tile_row + - (dx >> tile_pixels) * tile_size + - (dx & tile_mask) * cpp; - offset ^= (offset >> 3) & 64; - memcpy((char *)dst + offset, src_row, x); - } - } -} + while (height--) { + unsigned w = width * cpp; + uint8_t *dst_row = dst; + const uint8_t *src_row = src; -fast_memcpy static void -memcpy_from_tiled_x__swizzle_9(const void *src, void *dst, int bpp, - int32_t src_stride, int32_t dst_stride, - int16_t src_x, int16_t src_y, - int16_t dst_x, int16_t dst_y, - uint16_t width, uint16_t height) -{ - const unsigned tile_width = 512; - const unsigned tile_height = 8; - const unsigned tile_size = 4096; + dst_row += dst_y / tile_height * dst_stride * tile_height; + dst_row += (dst_y & (tile_height-1)) * tile_width; + if (dst_x) + dst_row += (dst_x >> tile_shift) * tile_size; + dst_y++; - const unsigned cpp = bpp / 8; - const unsigned stride_tiles = src_stride / tile_width; - const unsigned swizzle_pixels = 64 / cpp; - const unsigned tile_pixels = ffs(tile_width / cpp) - 1; - const unsigned tile_mask = (1 << tile_pixels) - 1; + src_row += src_y / tile_height * src_stride * tile_height; + src_row += (src_y & (tile_height-1)) * tile_width; + if (src_x) + src_row += (src_x >> tile_shift) * tile_size; + src_y++; - unsigned x, y; + if (dst_x & tile_mask) { + const unsigned x = (dst_x & tile_mask) * cpp; + const unsigned len = min(tile_width - x, w); - DBG(("%s(bpp=%d): src=(%d, %d), dst=(%d, %d), size=%dx%d, pitch=%d/%d\n", - __FUNCTION__, bpp, src_x, src_y, dst_x, dst_y, width, height, src_stride, dst_stride)); + memcpy(assume_misaligned(dst_row + x, tile_width, x), + assume_misaligned(src_row + x, tile_width, x), + len); - dst = (uint8_t *)dst + dst_y * dst_stride + dst_x * cpp; - - for (y = 0; y < height; ++y) { - const uint32_t sy = y + src_y; - const uint32_t tile_row = - (sy / tile_height * stride_tiles * tile_size + - (sy & (tile_height-1)) * tile_width); - uint8_t *dst_row = (uint8_t *)dst + dst_stride * y; - uint32_t sx = src_x, offset; - - x = width * cpp; - if (sx & (swizzle_pixels - 1)) { - const uint32_t swizzle_bound_pixels = ALIGN(sx + 1, swizzle_pixels); - const uint32_t length = min(src_x + width, swizzle_bound_pixels) - sx; - offset = tile_row + - (sx >> tile_pixels) * tile_size + - (sx & tile_mask) * cpp; - offset ^= (offset >> 3) & 64; - - memcpy(dst_row, (const char *)src + offset, length * cpp); - - dst_row += length * cpp; - x -= length * cpp; - sx += length; + dst_row += tile_size; + src_row += tile_size; + w -= len; } - while (x >= 64) { - offset = tile_row + - (sx >> tile_pixels) * tile_size + - (sx & tile_mask) * cpp; - offset ^= (offset >> 3) & 64; - memcpy(dst_row, (const char *)src + offset, 64); - - dst_row += 64; - x -= 64; - sx += swizzle_pixels; - } - if (x) { - offset = tile_row + - (sx >> tile_pixels) * tile_size + - (sx & tile_mask) * cpp; - offset ^= (offset >> 3) & 64; - memcpy(dst_row, (const char *)src + offset, x); + while (w >= tile_width) { + memcpy(assume_aligned(dst_row, tile_width), + assume_aligned(src_row, tile_width), + tile_width); + dst_row += tile_size; + src_row += tile_size; + w -= tile_width; } + memcpy(assume_aligned(dst_row, tile_width), + assume_aligned(src_row, tile_width), + w); } } -fast_memcpy static void -memcpy_to_tiled_x__swizzle_9_10(const void *src, void *dst, int bpp, - int32_t src_stride, int32_t dst_stride, - int16_t src_x, int16_t src_y, - int16_t dst_x, int16_t dst_y, - uint16_t width, uint16_t height) -{ - const unsigned tile_width = 512; - const unsigned tile_height = 8; - const unsigned tile_size = 4096; - - const unsigned cpp = bpp / 8; - const unsigned stride_tiles = dst_stride / tile_width; - const unsigned swizzle_pixels = 64 / cpp; - const unsigned tile_pixels = ffs(tile_width / cpp) - 1; - const unsigned tile_mask = (1 << tile_pixels) - 1; +#define memcpy_to_tiled_x(swizzle) \ +fast_memcpy static void \ +memcpy_to_tiled_x__##swizzle (const void *src, void *dst, int bpp, \ + int32_t src_stride, int32_t dst_stride, \ + int16_t src_x, int16_t src_y, \ + int16_t dst_x, int16_t dst_y, \ + uint16_t width, uint16_t height) \ +{ \ + const unsigned tile_width = 512; \ + const unsigned tile_height = 8; \ + const unsigned tile_size = 4096; \ + const unsigned cpp = bpp / 8; \ + const unsigned stride_tiles = dst_stride / tile_width; \ + const unsigned swizzle_pixels = 64 / cpp; \ + const unsigned tile_pixels = ffs(tile_width / cpp) - 1; \ + const unsigned tile_mask = (1 << tile_pixels) - 1; \ + unsigned x, y; \ + DBG(("%s(bpp=%d): src=(%d, %d), dst=(%d, %d), size=%dx%d, pitch=%d/%d\n", \ + __FUNCTION__, bpp, src_x, src_y, dst_x, dst_y, width, height, src_stride, dst_stride)); \ + src = (const uint8_t *)src + src_y * src_stride + src_x * cpp; \ + for (y = 0; y < height; ++y) { \ + const uint32_t dy = y + dst_y; \ + const uint32_t tile_row = \ + (dy / tile_height * stride_tiles * tile_size + \ + (dy & (tile_height-1)) * tile_width); \ + const uint8_t *src_row = (const uint8_t *)src + src_stride * y; \ + uint32_t dx = dst_x; \ + x = width * cpp; \ + if (dx & (swizzle_pixels - 1)) { \ + const uint32_t swizzle_bound_pixels = ALIGN(dx + 1, swizzle_pixels); \ + const uint32_t length = min(dst_x + width, swizzle_bound_pixels) - dx; \ + uint32_t offset = \ + tile_row + \ + (dx >> tile_pixels) * tile_size + \ + (dx & tile_mask) * cpp; \ + memcpy((char *)dst + swizzle(offset), src_row, length * cpp); \ + src_row += length * cpp; \ + x -= length * cpp; \ + dx += length; \ + } \ + while (x >= 64) { \ + uint32_t offset = \ + tile_row + \ + (dx >> tile_pixels) * tile_size + \ + (dx & tile_mask) * cpp; \ + memcpy(assume_aligned((char *)dst+swizzle(offset),64), \ + src_row, 64); \ + src_row += 64; \ + x -= 64; \ + dx += swizzle_pixels; \ + } \ + if (x) { \ + uint32_t offset = \ + tile_row + \ + (dx >> tile_pixels) * tile_size + \ + (dx & tile_mask) * cpp; \ + memcpy(assume_aligned((char *)dst + swizzle(offset), 64), src_row, x); \ + } \ + } \ +} - unsigned x, y; +#define memcpy_from_tiled_x(swizzle) \ +fast_memcpy static void \ +memcpy_from_tiled_x__##swizzle (const void *src, void *dst, int bpp, \ + int32_t src_stride, int32_t dst_stride, \ + int16_t src_x, int16_t src_y, \ + int16_t dst_x, int16_t dst_y, \ + uint16_t width, uint16_t height) \ +{ \ + const unsigned tile_width = 512; \ + const unsigned tile_height = 8; \ + const unsigned tile_size = 4096; \ + const unsigned cpp = bpp / 8; \ + const unsigned stride_tiles = src_stride / tile_width; \ + const unsigned swizzle_pixels = 64 / cpp; \ + const unsigned tile_pixels = ffs(tile_width / cpp) - 1; \ + const unsigned tile_mask = (1 << tile_pixels) - 1; \ + unsigned x, y; \ + DBG(("%s(bpp=%d): src=(%d, %d), dst=(%d, %d), size=%dx%d, pitch=%d/%d\n", \ + __FUNCTION__, bpp, src_x, src_y, dst_x, dst_y, width, height, src_stride, dst_stride)); \ + dst = (uint8_t *)dst + dst_y * dst_stride + dst_x * cpp; \ + for (y = 0; y < height; ++y) { \ + const uint32_t sy = y + src_y; \ + const uint32_t tile_row = \ + (sy / tile_height * stride_tiles * tile_size + \ + (sy & (tile_height-1)) * tile_width); \ + uint8_t *dst_row = (uint8_t *)dst + dst_stride * y; \ + uint32_t sx = src_x; \ + x = width * cpp; \ + if (sx & (swizzle_pixels - 1)) { \ + const uint32_t swizzle_bound_pixels = ALIGN(sx + 1, swizzle_pixels); \ + const uint32_t length = min(src_x + width, swizzle_bound_pixels) - sx; \ + uint32_t offset = \ + tile_row + \ + (sx >> tile_pixels) * tile_size + \ + (sx & tile_mask) * cpp; \ + memcpy(dst_row, (const char *)src + swizzle(offset), length * cpp); \ + dst_row += length * cpp; \ + x -= length * cpp; \ + sx += length; \ + } \ + while (x >= 64) { \ + uint32_t offset = \ + tile_row + \ + (sx >> tile_pixels) * tile_size + \ + (sx & tile_mask) * cpp; \ + memcpy(dst_row, assume_aligned((const char *)src + swizzle(offset), 64), 64); \ + dst_row += 64; \ + x -= 64; \ + sx += swizzle_pixels; \ + } \ + if (x) { \ + uint32_t offset = \ + tile_row + \ + (sx >> tile_pixels) * tile_size + \ + (sx & tile_mask) * cpp; \ + memcpy(dst_row, assume_aligned((const char *)src + swizzle(offset), 64), x); \ + } \ + } \ +} - DBG(("%s(bpp=%d): src=(%d, %d), dst=(%d, %d), size=%dx%d, pitch=%d/%d\n", - __FUNCTION__, bpp, src_x, src_y, dst_x, dst_y, width, height, src_stride, dst_stride)); +#define swizzle_9(X) ((X) ^ (((X) >> 3) & 64)) +memcpy_to_tiled_x(swizzle_9) +memcpy_from_tiled_x(swizzle_9) +#undef swizzle_9 - src = (const uint8_t *)src + src_y * src_stride + src_x * cpp; - - for (y = 0; y < height; ++y) { - const uint32_t dy = y + dst_y; - const uint32_t tile_row = - (dy / tile_height * stride_tiles * tile_size + - (dy & (tile_height-1)) * tile_width); - const uint8_t *src_row = (const uint8_t *)src + src_stride * y; - uint32_t dx = dst_x, offset; - - x = width * cpp; - if (dx & (swizzle_pixels - 1)) { - const uint32_t swizzle_bound_pixels = ALIGN(dx + 1, swizzle_pixels); - const uint32_t length = min(dst_x + width, swizzle_bound_pixels) - dx; - offset = tile_row + - (dx >> tile_pixels) * tile_size + - (dx & tile_mask) * cpp; - offset ^= ((offset ^ (offset >> 1)) >> 3) & 64; - - memcpy((char *)dst + offset, src_row, length * cpp); - - src_row += length * cpp; - x -= length * cpp; - dx += length; - } - while (x >= 64) { - offset = tile_row + - (dx >> tile_pixels) * tile_size + - (dx & tile_mask) * cpp; - offset ^= ((offset ^ (offset >> 1)) >> 3) & 64; +#define swizzle_9_10(X) ((X) ^ ((((X) ^ ((X) >> 1)) >> 3) & 64)) +memcpy_to_tiled_x(swizzle_9_10) +memcpy_from_tiled_x(swizzle_9_10) +#undef swizzle_9_10 - memcpy((char *)dst + offset, src_row, 64); +#define swizzle_9_11(X) ((X) ^ ((((X) ^ ((X) >> 2)) >> 3) & 64)) +memcpy_to_tiled_x(swizzle_9_11) +memcpy_from_tiled_x(swizzle_9_11) +#undef swizzle_9_11 - src_row += 64; - x -= 64; - dx += swizzle_pixels; - } - if (x) { - offset = tile_row + - (dx >> tile_pixels) * tile_size + - (dx & tile_mask) * cpp; - offset ^= ((offset ^ (offset >> 1)) >> 3) & 64; - memcpy((char *)dst + offset, src_row, x); - } - } -} +#define swizzle_9_10_11(X) ((X) ^ ((((X) ^ ((X) >> 1) ^ ((X) >> 2)) >> 3) & 64)) +memcpy_to_tiled_x(swizzle_9_10_11) +memcpy_from_tiled_x(swizzle_9_10_11) +#undef swizzle_9_10_11 -fast_memcpy static void -memcpy_from_tiled_x__swizzle_9_10(const void *src, void *dst, int bpp, - int32_t src_stride, int32_t dst_stride, - int16_t src_x, int16_t src_y, - int16_t dst_x, int16_t dst_y, - uint16_t width, uint16_t height) +static fast_memcpy void +memcpy_to_tiled_x__gen2(const void *src, void *dst, int bpp, + int32_t src_stride, int32_t dst_stride, + int16_t src_x, int16_t src_y, + int16_t dst_x, int16_t dst_y, + uint16_t width, uint16_t height) { - const unsigned tile_width = 512; - const unsigned tile_height = 8; - const unsigned tile_size = 4096; + const unsigned tile_width = 128; + const unsigned tile_height = 16; + const unsigned tile_size = 2048; const unsigned cpp = bpp / 8; - const unsigned stride_tiles = src_stride / tile_width; - const unsigned swizzle_pixels = 64 / cpp; - const unsigned tile_pixels = ffs(tile_width / cpp) - 1; - const unsigned tile_mask = (1 << tile_pixels) - 1; - - unsigned x, y; + const unsigned tile_pixels = tile_width / cpp; + const unsigned tile_shift = ffs(tile_pixels) - 1; + const unsigned tile_mask = tile_pixels - 1; DBG(("%s(bpp=%d): src=(%d, %d), dst=(%d, %d), size=%dx%d, pitch=%d/%d\n", __FUNCTION__, bpp, src_x, src_y, dst_x, dst_y, width, height, src_stride, dst_stride)); + assert(src != dst); - dst = (uint8_t *)dst + dst_y * dst_stride + dst_x * cpp; - - for (y = 0; y < height; ++y) { - const uint32_t sy = y + src_y; - const uint32_t tile_row = - (sy / tile_height * stride_tiles * tile_size + - (sy & (tile_height-1)) * tile_width); - uint8_t *dst_row = (uint8_t *)dst + dst_stride * y; - uint32_t sx = src_x, offset; - - x = width * cpp; - if (sx & (swizzle_pixels - 1)) { - const uint32_t swizzle_bound_pixels = ALIGN(sx + 1, swizzle_pixels); - const uint32_t length = min(src_x + width, swizzle_bound_pixels) - sx; - offset = tile_row + - (sx >> tile_pixels) * tile_size + - (sx & tile_mask) * cpp; - offset ^= ((offset ^ (offset >> 1)) >> 3) & 64; - - memcpy(dst_row, (const char *)src + offset, length * cpp); - - dst_row += length * cpp; - x -= length * cpp; - sx += length; - } - while (x >= 64) { - offset = tile_row + - (sx >> tile_pixels) * tile_size + - (sx & tile_mask) * cpp; - offset ^= ((offset ^ (offset >> 1)) >> 3) & 64; - - memcpy(dst_row, (const char *)src + offset, 64); - - dst_row += 64; - x -= 64; - sx += swizzle_pixels; - } - if (x) { - offset = tile_row + - (sx >> tile_pixels) * tile_size + - (sx & tile_mask) * cpp; - offset ^= ((offset ^ (offset >> 1)) >> 3) & 64; - memcpy(dst_row, (const char *)src + offset, x); - } - } -} - -fast_memcpy static void -memcpy_to_tiled_x__swizzle_9_11(const void *src, void *dst, int bpp, - int32_t src_stride, int32_t dst_stride, - int16_t src_x, int16_t src_y, - int16_t dst_x, int16_t dst_y, - uint16_t width, uint16_t height) -{ - const unsigned tile_width = 512; - const unsigned tile_height = 8; - const unsigned tile_size = 4096; - - const unsigned cpp = bpp / 8; - const unsigned stride_tiles = dst_stride / tile_width; - const unsigned swizzle_pixels = 64 / cpp; - const unsigned tile_pixels = ffs(tile_width / cpp) - 1; - const unsigned tile_mask = (1 << tile_pixels) - 1; + if (src_x | src_y) + src = (const uint8_t *)src + src_y * src_stride + src_x * cpp; + assert(src_stride >= width * cpp); + src_stride -= width * cpp; - unsigned x, y; + while (height--) { + unsigned w = width * cpp; + uint8_t *tile_row = dst; - DBG(("%s(bpp=%d): src=(%d, %d), dst=(%d, %d), size=%dx%d, pitch=%d/%d\n", - __FUNCTION__, bpp, src_x, src_y, dst_x, dst_y, width, height, src_stride, dst_stride)); + tile_row += dst_y / tile_height * dst_stride * tile_height; + tile_row += (dst_y & (tile_height-1)) * tile_width; + if (dst_x) { + tile_row += (dst_x >> tile_shift) * tile_size; + if (dst_x & tile_mask) { + const unsigned x = (dst_x & tile_mask) * cpp; + const unsigned len = min(tile_width - x, w); + memcpy(assume_misaligned(tile_row + x, tile_width, x), src, len); - src = (const uint8_t *)src + src_y * src_stride + src_x * cpp; - - for (y = 0; y < height; ++y) { - const uint32_t dy = y + dst_y; - const uint32_t tile_row = - (dy / tile_height * stride_tiles * tile_size + - (dy & (tile_height-1)) * tile_width); - const uint8_t *src_row = (const uint8_t *)src + src_stride * y; - uint32_t dx = dst_x, offset; - - x = width * cpp; - if (dx & (swizzle_pixels - 1)) { - const uint32_t swizzle_bound_pixels = ALIGN(dx + 1, swizzle_pixels); - const uint32_t length = min(dst_x + width, swizzle_bound_pixels) - dx; - offset = tile_row + - (dx >> tile_pixels) * tile_size + - (dx & tile_mask) * cpp; - offset ^= ((offset ^ (offset >> 2)) >> 3) & 64; - memcpy((char *)dst + offset, src_row, length * cpp); - - src_row += length * cpp; - x -= length * cpp; - dx += length; + tile_row += tile_size; + src = (const uint8_t *)src + len; + w -= len; + } } - while (x >= 64) { - offset = tile_row + - (dx >> tile_pixels) * tile_size + - (dx & tile_mask) * cpp; - offset ^= ((offset ^ (offset >> 2)) >> 3) & 64; - - memcpy((char *)dst + offset, src_row, 64); + while (w >= tile_width) { + memcpy(assume_aligned(tile_row, tile_width), + src, tile_width); - src_row += 64; - x -= 64; - dx += swizzle_pixels; - } - if (x) { - offset = tile_row + - (dx >> tile_pixels) * tile_size + - (dx & tile_mask) * cpp; - offset ^= ((offset ^ (offset >> 2)) >> 3) & 64; - memcpy((char *)dst + offset, src_row, x); + tile_row += tile_size; + src = (const uint8_t *)src + tile_width; + w -= tile_width; } + memcpy(assume_aligned(tile_row, tile_width), src, w); + src = (const uint8_t *)src + src_stride + w; + dst_y++; } } -fast_memcpy static void -memcpy_from_tiled_x__swizzle_9_11(const void *src, void *dst, int bpp, - int32_t src_stride, int32_t dst_stride, - int16_t src_x, int16_t src_y, - int16_t dst_x, int16_t dst_y, - uint16_t width, uint16_t height) +static fast_memcpy void +memcpy_from_tiled_x__gen2(const void *src, void *dst, int bpp, + int32_t src_stride, int32_t dst_stride, + int16_t src_x, int16_t src_y, + int16_t dst_x, int16_t dst_y, + uint16_t width, uint16_t height) { - const unsigned tile_width = 512; - const unsigned tile_height = 8; - const unsigned tile_size = 4096; + const unsigned tile_width = 128; + const unsigned tile_height = 16; + const unsigned tile_size = 2048; const unsigned cpp = bpp / 8; - const unsigned stride_tiles = src_stride / tile_width; - const unsigned swizzle_pixels = 64 / cpp; - const unsigned tile_pixels = ffs(tile_width / cpp) - 1; - const unsigned tile_mask = (1 << tile_pixels) - 1; - - unsigned x, y; + const unsigned tile_pixels = tile_width / cpp; + const unsigned tile_shift = ffs(tile_pixels) - 1; + const unsigned tile_mask = tile_pixels - 1; DBG(("%s(bpp=%d): src=(%d, %d), dst=(%d, %d), size=%dx%d, pitch=%d/%d\n", __FUNCTION__, bpp, src_x, src_y, dst_x, dst_y, width, height, src_stride, dst_stride)); + assert(src != dst); - dst = (uint8_t *)dst + dst_y * dst_stride + dst_x * cpp; - - for (y = 0; y < height; ++y) { - const uint32_t sy = y + src_y; - const uint32_t tile_row = - (sy / tile_height * stride_tiles * tile_size + - (sy & (tile_height-1)) * tile_width); - uint8_t *dst_row = (uint8_t *)dst + dst_stride * y; - uint32_t sx = src_x, offset; - - x = width * cpp; - if (sx & (swizzle_pixels - 1)) { - const uint32_t swizzle_bound_pixels = ALIGN(sx + 1, swizzle_pixels); - const uint32_t length = min(src_x + width, swizzle_bound_pixels) - sx; - offset = tile_row + - (sx >> tile_pixels) * tile_size + - (sx & tile_mask) * cpp; - offset ^= ((offset ^ (offset >> 2)) >> 3) & 64; - memcpy(dst_row, (const char *)src + offset, length * cpp); - - dst_row += length * cpp; - x -= length * cpp; - sx += length; - } - while (x >= 64) { - offset = tile_row + - (sx >> tile_pixels) * tile_size + - (sx & tile_mask) * cpp; - offset ^= ((offset ^ (offset >> 2)) >> 3) & 64; + if (dst_x | dst_y) + dst = (uint8_t *)dst + dst_y * dst_stride + dst_x * cpp; + assert(dst_stride >= width * cpp); + dst_stride -= width * cpp; + + while (height--) { + unsigned w = width * cpp; + const uint8_t *tile_row = src; - memcpy(dst_row, (const char *)src + offset, 64); + tile_row += src_y / tile_height * src_stride * tile_height; + tile_row += (src_y & (tile_height-1)) * tile_width; + if (src_x) { + tile_row += (src_x >> tile_shift) * tile_size; + if (src_x & tile_mask) { + const unsigned x = (src_x & tile_mask) * cpp; + const unsigned len = min(tile_width - x, w); + memcpy(dst, assume_misaligned(tile_row + x, tile_width, x), len); - dst_row += 64; - x -= 64; - sx += swizzle_pixels; + tile_row += tile_size; + dst = (uint8_t *)dst + len; + w -= len; + } } - if (x) { - offset = tile_row + - (sx >> tile_pixels) * tile_size + - (sx & tile_mask) * cpp; - offset ^= ((offset ^ (offset >> 2)) >> 3) & 64; - memcpy(dst_row, (const char *)src + offset, x); + while (w >= tile_width) { + memcpy(dst, + assume_aligned(tile_row, tile_width), + tile_width); + + tile_row += tile_size; + dst = (uint8_t *)dst + tile_width; + w -= tile_width; } + memcpy(dst, assume_aligned(tile_row, tile_width), w); + dst = (uint8_t *)dst + dst_stride + w; + src_y++; } } -void choose_memcpy_tiled_x(struct kgem *kgem, int swizzling) +void choose_memcpy_tiled_x(struct kgem *kgem, int swizzling, unsigned cpu) { + if (kgem->gen < 030) { + if (swizzling == I915_BIT_6_SWIZZLE_NONE) { + DBG(("%s: gen2, no swizzling\n", __FUNCTION__)); + kgem->memcpy_to_tiled_x = memcpy_to_tiled_x__gen2; + kgem->memcpy_from_tiled_x = memcpy_from_tiled_x__gen2; + } else + DBG(("%s: no detiling with swizzle functions for gen2\n", __FUNCTION__)); + return; + } + switch (swizzling) { default: DBG(("%s: unknown swizzling, %d\n", __FUNCTION__, swizzling)); break; case I915_BIT_6_SWIZZLE_NONE: DBG(("%s: no swizzling\n", __FUNCTION__)); - kgem->memcpy_to_tiled_x = memcpy_to_tiled_x__swizzle_0; - kgem->memcpy_from_tiled_x = memcpy_from_tiled_x__swizzle_0; +#if defined(sse2) + if (cpu & SSE2) { + kgem->memcpy_to_tiled_x = memcpy_to_tiled_x__swizzle_0__sse2; + kgem->memcpy_from_tiled_x = memcpy_from_tiled_x__swizzle_0__sse2; + kgem->memcpy_between_tiled_x = memcpy_between_tiled_x__swizzle_0__sse2; + } else +#endif + { + kgem->memcpy_to_tiled_x = memcpy_to_tiled_x__swizzle_0; + kgem->memcpy_from_tiled_x = memcpy_from_tiled_x__swizzle_0; + kgem->memcpy_between_tiled_x = memcpy_between_tiled_x__swizzle_0; + } break; case I915_BIT_6_SWIZZLE_9: DBG(("%s: 6^9 swizzling\n", __FUNCTION__)); @@ -771,6 +1199,11 @@ void choose_memcpy_tiled_x(struct kgem *kgem, int swizzling) kgem->memcpy_to_tiled_x = memcpy_to_tiled_x__swizzle_9_11; kgem->memcpy_from_tiled_x = memcpy_from_tiled_x__swizzle_9_11; break; + case I915_BIT_6_SWIZZLE_9_10_11: + DBG(("%s: 6^9^10^11 swizzling\n", __FUNCTION__)); + kgem->memcpy_to_tiled_x = memcpy_to_tiled_x__swizzle_9_10_11; + kgem->memcpy_from_tiled_x = memcpy_from_tiled_x__swizzle_9_10_11; + break; } } @@ -995,7 +1428,7 @@ memcpy_xor(const void *src, void *dst, int bpp, height = 1; } -#if USE_SSE2 +#if defined(sse2) && __x86_64__ if (have_sse2()) { do { uint32_t *d = (uint32_t *)dst_bytes; @@ -1118,3 +1551,241 @@ memcpy_xor(const void *src, void *dst, int bpp, } } } + +#define BILINEAR_INTERPOLATION_BITS 4 +static inline int +bilinear_weight(pixman_fixed_t x) +{ + return (x >> (16 - BILINEAR_INTERPOLATION_BITS)) & + ((1 << BILINEAR_INTERPOLATION_BITS) - 1); +} + +#if BILINEAR_INTERPOLATION_BITS <= 4 +/* Inspired by Filter_32_opaque from Skia */ +static inline uint32_t +bilinear_interpolation(uint32_t tl, uint32_t tr, + uint32_t bl, uint32_t br, + int distx, int disty) +{ + int distxy, distxiy, distixy, distixiy; + uint32_t lo, hi; + + distx <<= (4 - BILINEAR_INTERPOLATION_BITS); + disty <<= (4 - BILINEAR_INTERPOLATION_BITS); + + distxy = distx * disty; + distxiy = (distx << 4) - distxy; /* distx * (16 - disty) */ + distixy = (disty << 4) - distxy; /* disty * (16 - distx) */ + distixiy = + 16 * 16 - (disty << 4) - + (distx << 4) + distxy; /* (16 - distx) * (16 - disty) */ + + lo = (tl & 0xff00ff) * distixiy; + hi = ((tl >> 8) & 0xff00ff) * distixiy; + + lo += (tr & 0xff00ff) * distxiy; + hi += ((tr >> 8) & 0xff00ff) * distxiy; + + lo += (bl & 0xff00ff) * distixy; + hi += ((bl >> 8) & 0xff00ff) * distixy; + + lo += (br & 0xff00ff) * distxy; + hi += ((br >> 8) & 0xff00ff) * distxy; + + return ((lo >> 8) & 0xff00ff) | (hi & ~0xff00ff); +} +#elif SIZEOF_LONG > 4 +static inline uint32_t +bilinear_interpolation(uint32_t tl, uint32_t tr, + uint32_t bl, uint32_t br, + int distx, int disty) +{ + uint64_t distxy, distxiy, distixy, distixiy; + uint64_t tl64, tr64, bl64, br64; + uint64_t f, r; + + distx <<= (8 - BILINEAR_INTERPOLATION_BITS); + disty <<= (8 - BILINEAR_INTERPOLATION_BITS); + + distxy = distx * disty; + distxiy = distx * (256 - disty); + distixy = (256 - distx) * disty; + distixiy = (256 - distx) * (256 - disty); + + /* Alpha and Blue */ + tl64 = tl & 0xff0000ff; + tr64 = tr & 0xff0000ff; + bl64 = bl & 0xff0000ff; + br64 = br & 0xff0000ff; + + f = tl64 * distixiy + tr64 * distxiy + bl64 * distixy + br64 * distxy; + r = f & 0x0000ff0000ff0000ull; + + /* Red and Green */ + tl64 = tl; + tl64 = ((tl64 << 16) & 0x000000ff00000000ull) | (tl64 & 0x0000ff00ull); + + tr64 = tr; + tr64 = ((tr64 << 16) & 0x000000ff00000000ull) | (tr64 & 0x0000ff00ull); + + bl64 = bl; + bl64 = ((bl64 << 16) & 0x000000ff00000000ull) | (bl64 & 0x0000ff00ull); + + br64 = br; + br64 = ((br64 << 16) & 0x000000ff00000000ull) | (br64 & 0x0000ff00ull); + + f = tl64 * distixiy + tr64 * distxiy + bl64 * distixy + br64 * distxy; + r |= ((f >> 16) & 0x000000ff00000000ull) | (f & 0xff000000ull); + + return (uint32_t)(r >> 16); +} +#else +static inline uint32_t +bilinear_interpolation(uint32_t tl, uint32_t tr, + uint32_t bl, uint32_t br, + int distx, int disty) +{ + int distxy, distxiy, distixy, distixiy; + uint32_t f, r; + + distx <<= (8 - BILINEAR_INTERPOLATION_BITS); + disty <<= (8 - BILINEAR_INTERPOLATION_BITS); + + distxy = distx * disty; + distxiy = (distx << 8) - distxy; /* distx * (256 - disty) */ + distixy = (disty << 8) - distxy; /* disty * (256 - distx) */ + distixiy = + 256 * 256 - (disty << 8) - + (distx << 8) + distxy; /* (256 - distx) * (256 - disty) */ + + /* Blue */ + r = ((tl & 0x000000ff) * distixiy + (tr & 0x000000ff) * distxiy + + (bl & 0x000000ff) * distixy + (br & 0x000000ff) * distxy); + + /* Green */ + f = ((tl & 0x0000ff00) * distixiy + (tr & 0x0000ff00) * distxiy + + (bl & 0x0000ff00) * distixy + (br & 0x0000ff00) * distxy); + r |= f & 0xff000000; + + tl >>= 16; + tr >>= 16; + bl >>= 16; + br >>= 16; + r >>= 16; + + /* Red */ + f = ((tl & 0x000000ff) * distixiy + (tr & 0x000000ff) * distxiy + + (bl & 0x000000ff) * distixy + (br & 0x000000ff) * distxy); + r |= f & 0x00ff0000; + + /* Alpha */ + f = ((tl & 0x0000ff00) * distixiy + (tr & 0x0000ff00) * distxiy + + (bl & 0x0000ff00) * distixy + (br & 0x0000ff00) * distxy); + r |= f & 0xff000000; + + return r; +} +#endif + +static inline uint32_t convert_pixel(const uint8_t *p, int x) +{ + return ((uint32_t *)p)[x]; +} + +fast void +affine_blt(const void *src, void *dst, int bpp, + int16_t src_x, int16_t src_y, + int16_t src_width, int16_t src_height, + int32_t src_stride, + int16_t dst_x, int16_t dst_y, + uint16_t dst_width, uint16_t dst_height, + int32_t dst_stride, + const struct pixman_f_transform *t) +{ + static const uint8_t zero[8] = { 0, 0, 0, 0, 0, 0, 0, 0 }; + const pixman_fixed_t ux = pixman_double_to_fixed(t->m[0][0]); + const pixman_fixed_t uy = pixman_double_to_fixed(t->m[1][0]); + int i, j; + + assert(bpp == 32); + + for (j = 0; j < dst_height; j++) { + pixman_fixed_t x, y; + struct pixman_f_vector v; + uint32_t *b; + + /* reference point is the center of the pixel */ + v.v[0] = dst_x + 0.5; + v.v[1] = dst_y + j + 0.5; + v.v[2] = 1.0; + + pixman_f_transform_point_3d(t, &v); + + x = pixman_double_to_fixed(v.v[0]); + x += pixman_int_to_fixed(src_x - dst_x); + y = pixman_double_to_fixed(v.v[1]); + y += pixman_int_to_fixed(src_y - dst_y); + + b = (uint32_t*)((uint8_t *)dst + (dst_y + j) * dst_stride + dst_x * bpp / 8); + for (i = 0; i < dst_width; i++) { + const uint8_t *row1; + const uint8_t *row2; + int x1, y1, x2, y2; + uint32_t tl, tr, bl, br; + int32_t fx, fy; + + x1 = x - pixman_fixed_1/2; + y1 = y - pixman_fixed_1/2; + + fx = bilinear_weight(x1); + fy = bilinear_weight(y1); + + x1 = pixman_fixed_to_int(x1); + x2 = x1 + 1; + y1 = pixman_fixed_to_int(y1); + y2 = y1 + 1; + + if (x1 >= src_width || x2 < 0 || + y1 >= src_height || y2 < 0) { + b[i] = 0; + goto next; + } + + if (y2 == 0) { + row1 = zero; + } else { + row1 = (uint8_t *)src + src_stride * y1; + row1 += bpp / 8 * x1; + } + + if (y1 == src_height - 1) { + row2 = zero; + } else { + row2 = (uint8_t *)src + src_stride * y2; + row2 += bpp / 8 * x1; + } + + if (x2 == 0) { + tl = 0; + bl = 0; + } else { + tl = convert_pixel(row1, 0); + bl = convert_pixel(row2, 0); + } + + if (x1 == src_width - 1) { + tr = 0; + br = 0; + } else { + tr = convert_pixel(row1, 1); + br = convert_pixel(row2, 1); + } + + b[i] = bilinear_interpolation(tl, tr, bl, br, fx, fy); + +next: + x += ux; + y += uy; + } + } +} diff --git a/src/sna/brw/brw_eu_emit.c b/src/sna/brw/brw_eu_emit.c index 00c984d9..154f939a 100644 --- a/src/sna/brw/brw_eu_emit.c +++ b/src/sna/brw/brw_eu_emit.c @@ -178,7 +178,7 @@ validate_reg(struct brw_instruction *insn, struct brw_reg reg) } if (reg.file == BRW_ARCHITECTURE_REGISTER_FILE && - reg.file == BRW_ARF_NULL) + reg.nr == BRW_ARF_NULL) return; assert(reg.hstride >= 0 && reg.hstride < ARRAY_SIZE(hstride_for_reg)); @@ -700,7 +700,7 @@ push_if_stack(struct brw_compile *p, struct brw_instruction *inst) * * When the matching 'else' instruction is reached (presumably by * countdown of the instruction count patched in by our ELSE/ENDIF - * functions), the relevent flags are inverted. + * functions), the relevant flags are inverted. * * When the matching 'endif' instruction is reached, the flags are * popped off. If the stack is now empty, normal execution resumes. diff --git a/src/sna/compiler.h b/src/sna/compiler.h index ff412179..0f3775ec 100644 --- a/src/sna/compiler.h +++ b/src/sna/compiler.h @@ -39,6 +39,7 @@ #define pure __attribute__((pure)) #define tightly_packed __attribute__((__packed__)) #define flatten __attribute__((flatten)) +#define nonnull __attribute__((nonnull)) #define page_aligned __attribute__((aligned(4096))) #else #define likely(expr) (expr) @@ -51,18 +52,15 @@ #define pure #define tighly_packed #define flatten +#define nonnull #define page_aligned #endif #define HAS_GCC(major, minor) defined(__GNUC__) && (__GNUC__ > (major) || __GNUC__ == (major) && __GNUC_MINOR__ >= (minor)) #if HAS_GCC(4, 5) -#define sse2 __attribute__((target("sse2,fpmath=sse"))) -#define sse4_2 __attribute__((target("sse4.2,sse2,fpmath=sse"))) -#endif - -#if HAS_GCC(4, 7) -#define avx2 __attribute__((target("avx2,sse4.2,sse2,fpmath=sse"))) +#define sse2 fast __attribute__((target("sse2,fpmath=sse"))) +#define sse4_2 fast __attribute__((target("sse4.2,sse2,fpmath=sse"))) #endif #if HAS_GCC(4, 6) && defined(__OPTIMIZE__) @@ -71,10 +69,17 @@ #define fast #endif -#if HAS_GCC(4, 6) && defined(__OPTIMIZE__) -#define fast_memcpy __attribute__((optimize("Ofast"))) __attribute__((target("inline-all-stringops"))) -#elif HAS_GCC(4, 5) && defined(__OPTIMIZE__) -#define fast_memcpy __attribute__((target("inline-all-stringops"))) +#if HAS_GCC(4, 7) +#define avx2 fast __attribute__((target("avx2,avx,sse4.2,sse2,fpmath=sse"))) +#define assume_aligned(ptr, align) __builtin_assume_aligned((ptr), (align)) +#define assume_misaligned(ptr, align, offset) __builtin_assume_aligned((ptr), (align), (offset)) +#else +#define assume_aligned(ptr, align) (ptr) +#define assume_misaligned(ptr, align, offset) (ptr) +#endif + +#if HAS_GCC(4, 5) && defined(__OPTIMIZE__) +#define fast_memcpy fast __attribute__((target("inline-all-stringops"))) #else #define fast_memcpy #endif diff --git a/src/sna/fb/fb.h b/src/sna/fb/fb.h index 8bf9008a..90431747 100644 --- a/src/sna/fb/fb.h +++ b/src/sna/fb/fb.h @@ -24,10 +24,6 @@ #ifndef FB_H #define FB_H -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - #include #include #include diff --git a/src/sna/fb/fbimage.c b/src/sna/fb/fbimage.c index 5af23890..cc81c85b 100644 --- a/src/sna/fb/fbimage.c +++ b/src/sna/fb/fbimage.c @@ -229,13 +229,19 @@ fbGetImage(DrawablePtr drawable, FbBits pm; pm = fbReplicatePixel(planeMask, srcBpp); + dstStride = PixmapBytePad(w, drawable->depth); - if (pm != FB_ALLONES) - memset(d, 0, dstStride * h); dstStride /= sizeof(FbStip); + fbBltStip((FbStip *)(src + (y + srcYoff) * srcStride), srcStride, (x + srcXoff) * srcBpp, - dst, dstStride, 0, w * srcBpp, h, GXcopy, pm, srcBpp); + dst, dstStride, 0, w * srcBpp, h, GXcopy, FB_ALLONES, srcBpp); + + if (pm != FB_ALLONES) { + int i = dstStride * h; + while (i--) + *dst++ &= pm; + } } else { dstStride = BitmapBytePad(w) / sizeof(FbStip); fbBltPlane(src + (y + srcYoff) * srcStride, diff --git a/src/sna/fb/fbpict.h b/src/sna/fb/fbpict.h index 932032f9..20877777 100644 --- a/src/sna/fb/fbpict.h +++ b/src/sna/fb/fbpict.h @@ -24,10 +24,6 @@ #ifndef FBPICT_H #define FBPICT_H -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - #include #include diff --git a/src/sna/gen2_render.c b/src/sna/gen2_render.c index 1104f462..49ad16a3 100644 --- a/src/sna/gen2_render.c +++ b/src/sna/gen2_render.c @@ -35,6 +35,7 @@ #include "sna_reg.h" #include "sna_render.h" #include "sna_render_inline.h" +#include "sna_video.h" #include "gen2_render.h" @@ -48,6 +49,7 @@ #define MAX_3D_SIZE 2048 #define MAX_3D_PITCH 8192 +#define MAX_INLINE (1 << 18) #define BATCH(v) batch_emit(sna, v) #define BATCH_F(v) batch_emit_float(sna, v) @@ -596,39 +598,43 @@ gen2_get_batch(struct sna *sna, const struct sna_composite_op *op) gen2_emit_invariant(sna); } -static void gen2_emit_target(struct sna *sna, const struct sna_composite_op *op) +static void gen2_emit_target(struct sna *sna, + struct kgem_bo *bo, + int width, + int height, + int format) { - assert(!too_large(op->dst.width, op->dst.height)); - assert(op->dst.bo->pitch >= 8 && op->dst.bo->pitch <= MAX_3D_PITCH); + assert(!too_large(width, height)); + assert(bo->pitch >= 8 && bo->pitch <= MAX_3D_PITCH); assert(sna->render.vertex_offset == 0); - assert(op->dst.bo->unique_id); - if (sna->render_state.gen2.target == op->dst.bo->unique_id) { - kgem_bo_mark_dirty(op->dst.bo); + assert(bo->unique_id); + if (sna->render_state.gen2.target == bo->unique_id) { + kgem_bo_mark_dirty(bo); return; } BATCH(_3DSTATE_BUF_INFO_CMD); BATCH(BUF_3D_ID_COLOR_BACK | - gen2_buf_tiling(op->dst.bo->tiling) | - BUF_3D_PITCH(op->dst.bo->pitch)); + gen2_buf_tiling(bo->tiling) | + BUF_3D_PITCH(bo->pitch)); BATCH(kgem_add_reloc(&sna->kgem, sna->kgem.nbatch, - op->dst.bo, + bo, I915_GEM_DOMAIN_RENDER << 16 | I915_GEM_DOMAIN_RENDER, 0)); BATCH(_3DSTATE_DST_BUF_VARS_CMD); - BATCH(gen2_get_dst_format(op->dst.format)); + BATCH(gen2_get_dst_format(format)); BATCH(_3DSTATE_DRAW_RECT_CMD); BATCH(0); BATCH(0); /* ymin, xmin */ - BATCH(DRAW_YMAX(op->dst.height - 1) | - DRAW_XMAX(op->dst.width - 1)); + BATCH(DRAW_YMAX(height - 1) | + DRAW_XMAX(width - 1)); BATCH(0); /* yorig, xorig */ - sna->render_state.gen2.target = op->dst.bo->unique_id; + sna->render_state.gen2.target = bo->unique_id; } static void gen2_disable_logic_op(struct sna *sna) @@ -701,7 +707,11 @@ static void gen2_emit_composite_state(struct sna *sna, kgem_clear_dirty(&sna->kgem); } - gen2_emit_target(sna, op); + gen2_emit_target(sna, + op->dst.bo, + op->dst.width, + op->dst.height, + op->dst.format); unwind = sna->kgem.nbatch; BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | @@ -1190,7 +1200,13 @@ inline static int gen2_get_rectangles(struct sna *sna, sna->render.vertex_offset = sna->kgem.nbatch; BATCH(PRIM3D_INLINE | PRIM3D_RECTLIST); } - } + + need = 0; + } else + need = sna->kgem.nbatch - sna->render.vertex_offset; + + if (rem > MAX_INLINE - need) + rem = MAX_INLINE -need; if (want > 1 && want * size > rem) want = rem / size; @@ -1572,12 +1588,12 @@ gen2_composite_picture(struct sna *sna, if (channel->repeat && (x >= 0 && y >= 0 && - x + w < pixmap->drawable.width && - y + h < pixmap->drawable.height)) { + x + w <= pixmap->drawable.width && + y + h <= pixmap->drawable.height)) { struct sna_pixmap *priv = sna_pixmap(pixmap); if (priv && priv->clear) { DBG(("%s: converting large pixmap source into solid [%08x]\n", __FUNCTION__, priv->clear_color)); - return gen2_composite_solid_init(sna, channel, priv->clear_color); + return gen2_composite_solid_init(sna, channel, solid_color(picture->format, priv->clear_color)); } } } else @@ -1619,7 +1635,9 @@ gen2_composite_set_target(struct sna *sna, } else sna_render_picture_extents(dst, &box); - hint = PREFER_GPU | FORCE_GPU | RENDER_GPU; + hint = PREFER_GPU | RENDER_GPU; + if (!need_tiling(sna, op->dst.width, op->dst.height)) + hint |= FORCE_GPU; if (!partial) { hint |= IGNORE_DAMAGE; if (w == op->dst.width && h == op->dst.height) @@ -2423,7 +2441,11 @@ static void gen2_emit_composite_spans_state(struct sna *sna, uint32_t unwind; gen2_get_batch(sna, &op->base); - gen2_emit_target(sna, &op->base); + gen2_emit_target(sna, + op->base.dst.bo, + op->base.dst.width, + op->base.dst.height, + op->base.dst.format); unwind = sna->kgem.nbatch; BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | @@ -2706,7 +2728,11 @@ static void gen2_emit_fill_composite_state(struct sna *sna, uint32_t ls1; gen2_get_batch(sna, op); - gen2_emit_target(sna, op); + gen2_emit_target(sna, + op->dst.bo, + op->dst.width, + op->dst.height, + op->dst.format); ls1 = sna->kgem.nbatch; BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | @@ -2868,7 +2894,11 @@ static void gen2_emit_fill_state(struct sna *sna, uint32_t ls1; gen2_get_batch(sna, op); - gen2_emit_target(sna, op); + gen2_emit_target(sna, + op->dst.bo, + op->dst.width, + op->dst.height, + op->dst.format); ls1 = sna->kgem.nbatch; BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | @@ -3102,6 +3132,276 @@ gen2_render_fill_one(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo, } static void +gen2_emit_video_state(struct sna *sna, + struct sna_video *video, + struct sna_video_frame *frame, + PixmapPtr pixmap, + struct kgem_bo *dst_bo, + int width, int height, + bool bilinear) +{ + uint32_t ms1, v, unwind; + + gen2_emit_target(sna, dst_bo, width, height, + sna_format_for_depth(pixmap->drawable.depth)); + + unwind = sna->kgem.nbatch; + BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | + I1_LOAD_S(2) | I1_LOAD_S(3) | I1_LOAD_S(8) | 2); + BATCH(1 << 12); + BATCH(S3_CULLMODE_NONE | S3_VERTEXHAS_XY); + BATCH(S8_ENABLE_COLOR_BUFFER_WRITE); + if (memcmp(sna->kgem.batch + sna->render_state.gen2.ls1 + 1, + sna->kgem.batch + unwind + 1, + 3 * sizeof(uint32_t)) == 0) + sna->kgem.nbatch = unwind; + else + sna->render_state.gen2.ls1 = unwind; + + gen2_disable_logic_op(sna); + + unwind = sna->kgem.nbatch; + BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_2 | + LOAD_TEXTURE_BLEND_STAGE(0) | 1); + BATCH(TB0C_LAST_STAGE | TB0C_RESULT_SCALE_1X | TB0C_OUTPUT_WRITE_CURRENT | + TB0C_OP_ARG1 | TB0C_ARG1_SEL_TEXEL0); + BATCH(TB0A_RESULT_SCALE_1X | TB0A_OUTPUT_WRITE_CURRENT | + TB0A_OP_ARG1 | TB0A_ARG1_SEL_ONE); + if (memcmp(sna->kgem.batch + sna->render_state.gen2.ls2 + 1, + sna->kgem.batch + unwind + 1, + 2 * sizeof(uint32_t)) == 0) + sna->kgem.nbatch = unwind; + else + sna->render_state.gen2.ls2 = unwind; + + BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_2 | LOAD_TEXTURE_MAP(0) | 4); + BATCH(kgem_add_reloc(&sna->kgem, sna->kgem.nbatch, + frame->bo, + I915_GEM_DOMAIN_SAMPLER << 16, + 0)); + ms1 = MAPSURF_422 | TM0S1_COLORSPACE_CONVERSION; + switch (frame->id) { + case FOURCC_YUY2: + ms1 |= MT_422_YCRCB_NORMAL; + break; + case FOURCC_UYVY: + ms1 |= MT_422_YCRCB_SWAPY; + break; + } + BATCH(((frame->height - 1) << TM0S1_HEIGHT_SHIFT) | + ((frame->width - 1) << TM0S1_WIDTH_SHIFT) | + ms1 | + gen2_sampler_tiling_bits(frame->bo->tiling)); + BATCH((frame->pitch[0] / 4 - 1) << TM0S2_PITCH_SHIFT | TM0S2_MAP_2D); + if (bilinear) + BATCH(FILTER_LINEAR << TM0S3_MAG_FILTER_SHIFT | + FILTER_LINEAR << TM0S3_MIN_FILTER_SHIFT | + MIPFILTER_NONE << TM0S3_MIP_FILTER_SHIFT); + else + BATCH(FILTER_NEAREST << TM0S3_MAG_FILTER_SHIFT | + FILTER_NEAREST << TM0S3_MIN_FILTER_SHIFT | + MIPFILTER_NONE << TM0S3_MIP_FILTER_SHIFT); + BATCH(0); /* default color */ + + BATCH(_3DSTATE_MAP_COORD_SET_CMD | TEXCOORD_SET(0) | + ENABLE_TEXCOORD_PARAMS | TEXCOORDS_ARE_NORMAL | TEXCOORDTYPE_CARTESIAN | + ENABLE_ADDR_V_CNTL | TEXCOORD_ADDR_V_MODE(TEXCOORDMODE_CLAMP) | + ENABLE_ADDR_U_CNTL | TEXCOORD_ADDR_U_MODE(TEXCOORDMODE_CLAMP)); + + v = _3DSTATE_VERTEX_FORMAT_2_CMD | TEXCOORDFMT_2D; + if (sna->render_state.gen2.vft != v) { + BATCH(v); + sna->render_state.gen2.vft = v; + } +} + +static void +gen2_video_get_batch(struct sna *sna, struct kgem_bo *bo) +{ + kgem_set_mode(&sna->kgem, KGEM_RENDER, bo); + + if (!kgem_check_batch(&sna->kgem, 120) || + !kgem_check_reloc(&sna->kgem, 4) || + !kgem_check_exec(&sna->kgem, 2)) { + _kgem_submit(&sna->kgem); + _kgem_set_mode(&sna->kgem, KGEM_RENDER); + } + + if (sna->render_state.gen2.need_invariant) + gen2_emit_invariant(sna); +} + +static int +gen2_get_inline_rectangles(struct sna *sna, int want, int floats_per_vertex) +{ + int size = floats_per_vertex * 3; + int rem = batch_space(sna) - 1; + + if (rem > MAX_INLINE) + rem = MAX_INLINE; + + if (size * want > rem) + want = rem / size; + + return want; +} + +static bool +gen2_render_video(struct sna *sna, + struct sna_video *video, + struct sna_video_frame *frame, + RegionPtr dstRegion, + PixmapPtr pixmap) +{ + struct sna_pixmap *priv = sna_pixmap(pixmap); + const BoxRec *pbox = region_rects(dstRegion); + int nbox = region_num_rects(dstRegion); + int dst_width = dstRegion->extents.x2 - dstRegion->extents.x1; + int dst_height = dstRegion->extents.y2 - dstRegion->extents.y1; + int src_width = frame->src.x2 - frame->src.x1; + int src_height = frame->src.y2 - frame->src.y1; + float src_offset_x, src_offset_y; + float src_scale_x, src_scale_y; + int pix_xoff, pix_yoff; + struct kgem_bo *dst_bo; + bool bilinear; + int copy = 0; + + DBG(("%s: src:%dx%d (frame:%dx%d) -> dst:%dx%d\n", __FUNCTION__, + src_width, src_height, frame->width, frame->height, dst_width, dst_height)); + + assert(priv->gpu_bo); + dst_bo = priv->gpu_bo; + + bilinear = src_width != dst_width || src_height != dst_height; + + src_scale_x = (float)src_width / dst_width / frame->width; + src_offset_x = (float)frame->src.x1 / frame->width - dstRegion->extents.x1 * src_scale_x; + + src_scale_y = (float)src_height / dst_height / frame->height; + src_offset_y = (float)frame->src.y1 / frame->height - dstRegion->extents.y1 * src_scale_y; + DBG(("%s: src offset (%f, %f), scale (%f, %f)\n", + __FUNCTION__, src_offset_x, src_offset_y, src_scale_x, src_scale_y)); + + if (too_large(pixmap->drawable.width, pixmap->drawable.height) || + dst_bo->pitch > MAX_3D_PITCH) { + int bpp = pixmap->drawable.bitsPerPixel; + + if (too_large(dst_width, dst_height)) + return false; + + dst_bo = kgem_create_2d(&sna->kgem, + dst_width, dst_height, bpp, + kgem_choose_tiling(&sna->kgem, + I915_TILING_X, + dst_width, dst_height, bpp), + 0); + if (!dst_bo) + return false; + + pix_xoff = -dstRegion->extents.x1; + pix_yoff = -dstRegion->extents.y1; + copy = 1; + } else { + /* Set up the offset for translating from the given region + * (in screen coordinates) to the backing pixmap. + */ +#ifdef COMPOSITE + pix_xoff = -pixmap->screen_x + pixmap->drawable.x; + pix_yoff = -pixmap->screen_y + pixmap->drawable.y; +#else + pix_xoff = 0; + pix_yoff = 0; +#endif + + dst_width = pixmap->drawable.width; + dst_height = pixmap->drawable.height; + } + + gen2_video_get_batch(sna, dst_bo); + gen2_emit_video_state(sna, video, frame, pixmap, + dst_bo, dst_width, dst_height, bilinear); + do { + int nbox_this_time = gen2_get_inline_rectangles(sna, nbox, 4); + if (nbox_this_time == 0) { + gen2_video_get_batch(sna, dst_bo); + gen2_emit_video_state(sna, video, frame, pixmap, + dst_bo, dst_width, dst_height, bilinear); + nbox_this_time = gen2_get_inline_rectangles(sna, nbox, 4); + assert(nbox_this_time); + } + nbox -= nbox_this_time; + + BATCH(PRIM3D_INLINE | PRIM3D_RECTLIST | + ((12 * nbox_this_time) - 1)); + do { + int box_x1 = pbox->x1; + int box_y1 = pbox->y1; + int box_x2 = pbox->x2; + int box_y2 = pbox->y2; + + pbox++; + + DBG(("%s: dst (%d, %d), (%d, %d) + (%d, %d); src (%f, %f), (%f, %f)\n", + __FUNCTION__, box_x1, box_y1, box_x2, box_y2, pix_xoff, pix_yoff, + box_x1 * src_scale_x + src_offset_x, + box_y1 * src_scale_y + src_offset_y, + box_x2 * src_scale_x + src_offset_x, + box_y2 * src_scale_y + src_offset_y)); + + /* bottom right */ + BATCH_F(box_x2 + pix_xoff); + BATCH_F(box_y2 + pix_yoff); + BATCH_F(box_x2 * src_scale_x + src_offset_x); + BATCH_F(box_y2 * src_scale_y + src_offset_y); + + /* bottom left */ + BATCH_F(box_x1 + pix_xoff); + BATCH_F(box_y2 + pix_yoff); + BATCH_F(box_x1 * src_scale_x + src_offset_x); + BATCH_F(box_y2 * src_scale_y + src_offset_y); + + /* top left */ + BATCH_F(box_x1 + pix_xoff); + BATCH_F(box_y1 + pix_yoff); + BATCH_F(box_x1 * src_scale_x + src_offset_x); + BATCH_F(box_y1 * src_scale_y + src_offset_y); + } while (--nbox_this_time); + } while (nbox); + + if (copy) { +#ifdef COMPOSITE + pix_xoff = -pixmap->screen_x + pixmap->drawable.x; + pix_yoff = -pixmap->screen_y + pixmap->drawable.y; +#else + pix_xoff = 0; + pix_yoff = 0; +#endif + sna_blt_copy_boxes(sna, GXcopy, + dst_bo, -dstRegion->extents.x1, -dstRegion->extents.y1, + priv->gpu_bo, pix_xoff, pix_yoff, + pixmap->drawable.bitsPerPixel, + region_rects(dstRegion), + region_num_rects(dstRegion)); + + kgem_bo_destroy(&sna->kgem, dst_bo); + } + + if (!DAMAGE_IS_ALL(priv->gpu_damage)) { + if ((pix_xoff | pix_yoff) == 0) { + sna_damage_add(&priv->gpu_damage, dstRegion); + } else { + sna_damage_add_boxes(&priv->gpu_damage, + region_rects(dstRegion), + region_num_rects(dstRegion), + pix_xoff, pix_yoff); + } + } + + return true; +} + +static void gen2_render_copy_setup_source(struct sna_composite_channel *channel, const DrawableRec *draw, struct kgem_bo *bo) @@ -3176,7 +3476,11 @@ static void gen2_emit_copy_state(struct sna *sna, const struct sna_composite_op PIPELINE_FLUSH_TEXTURE_CACHE); kgem_clear_dirty(&sna->kgem); } - gen2_emit_target(sna, op); + gen2_emit_target(sna, + op->dst.bo, + op->dst.width, + op->dst.height, + op->dst.format); ls1 = sna->kgem.nbatch; BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | @@ -3511,7 +3815,7 @@ const char *gen2_render_init(struct sna *sna, const char *backend) render->copy = gen2_render_copy; render->copy_boxes = gen2_render_copy_boxes; - /* XXX YUV color space conversion for video? */ + render->video = gen2_render_video; render->reset = gen2_render_reset; render->flush = gen2_render_flush; diff --git a/src/sna/gen3_render.c b/src/sna/gen3_render.c index 78289f00..4459a562 100644 --- a/src/sna/gen3_render.c +++ b/src/sna/gen3_render.c @@ -448,14 +448,14 @@ gen3_emit_composite_boxes_constant(const struct sna_composite_op *op, float *v) { do { - v[0] = box->x2; - v[1] = box->y2; + v[0] = box->x2 + op->dst.x; + v[1] = box->y2 + op->dst.y; - v[2] = box->x1; - v[3] = box->y2; + v[2] = box->x1 + op->dst.x; + v[3] = box->y2 + op->dst.y; - v[4] = box->x1; - v[5] = box->y1; + v[4] = box->x1 + op->dst.x; + v[5] = box->y1 + op->dst.y; box++; v += 6; @@ -494,18 +494,18 @@ gen3_emit_composite_boxes_identity_gradient(const struct sna_composite_op *op, float *v) { do { - v[0] = box->x2; - v[1] = box->y2; + v[0] = box->x2 + op->dst.x; + v[1] = box->y2 + op->dst.y; v[2] = box->x2 + op->src.offset[0]; v[3] = box->y2 + op->src.offset[1]; - v[4] = box->x1; - v[5] = box->y2; + v[4] = box->x1 + op->dst.x; + v[5] = box->y2 + op->dst.y; v[6] = box->x1 + op->src.offset[0]; v[7] = box->y2 + op->src.offset[1]; - v[8] = box->x1; - v[9] = box->y1; + v[8] = box->x1 + op->dst.x; + v[9] = box->y1 + op->dst.y; v[10] = box->x1 + op->src.offset[0]; v[11] = box->y1 + op->src.offset[1]; @@ -531,6 +531,7 @@ gen3_emit_composite_primitive_affine_gradient(struct sna *sna, v = sna->render.vertices + sna->render.vertex_used; sna->render.vertex_used += 12; + assert(sna->render.vertex_used <= sna->render.vertex_size); v[0] = dst_x + r->width; v[1] = dst_y + r->height; @@ -559,22 +560,22 @@ gen3_emit_composite_boxes_affine_gradient(const struct sna_composite_op *op, const PictTransform *transform = op->src.transform; do { - v[0] = box->x2; - v[1] = box->y2; + v[0] = box->x2 + op->dst.x; + v[1] = box->y2 + op->dst.y; _sna_get_transformed_scaled(box->x2 + op->src.offset[0], box->y2 + op->src.offset[1], transform, op->src.scale, &v[2], &v[3]); - v[4] = box->x1; - v[5] = box->y2; + v[4] = box->x1 + op->dst.x; + v[5] = box->y2 + op->dst.y; _sna_get_transformed_scaled(box->x1 + op->src.offset[0], box->y2 + op->src.offset[1], transform, op->src.scale, &v[6], &v[7]); - v[8] = box->x1; - v[9] = box->y1; + v[8] = box->x1 + op->dst.x; + v[9] = box->y1 + op->dst.y; _sna_get_transformed_scaled(box->x1 + op->src.offset[0], box->y1 + op->src.offset[1], transform, op->src.scale, @@ -596,6 +597,7 @@ gen3_emit_composite_primitive_identity_source(struct sna *sna, v = sna->render.vertices + sna->render.vertex_used; sna->render.vertex_used += 12; + assert(sna->render.vertex_used <= sna->render.vertex_size); v[8] = v[4] = r->dst.x + op->dst.x; v[0] = v[4] + w; @@ -643,6 +645,7 @@ gen3_emit_composite_primitive_identity_source_no_offset(struct sna *sna, v = sna->render.vertices + sna->render.vertex_used; sna->render.vertex_used += 12; + assert(sna->render.vertex_used <= sna->render.vertex_size); v[8] = v[4] = r->dst.x; v[9] = r->dst.y; @@ -693,6 +696,7 @@ gen3_emit_composite_primitive_affine_source(struct sna *sna, v = sna->render.vertices + sna->render.vertex_used; sna->render.vertex_used += 12; + assert(sna->render.vertex_used <= sna->render.vertex_size); v[0] = dst_x + r->width; v[5] = v[1] = dst_y + r->height; @@ -720,10 +724,10 @@ gen3_emit_composite_boxes_affine_source(const struct sna_composite_op *op, const PictTransform *transform = op->src.transform; do { - v[0] = box->x2; - v[5] = v[1] = box->y2; - v[8] = v[4] = box->x1; - v[9] = box->y1; + v[0] = box->x2 + op->dst.x; + v[5] = v[1] = box->y2 + op->dst.y; + v[8] = v[4] = box->x1 + op->dst.x; + v[9] = box->y1 + op->dst.y; _sna_get_transformed_scaled(box->x2 + op->src.offset[0], box->y2 + op->src.offset[1], @@ -756,6 +760,7 @@ gen3_emit_composite_primitive_constant_identity_mask(struct sna *sna, v = sna->render.vertices + sna->render.vertex_used; sna->render.vertex_used += 12; + assert(sna->render.vertex_used <= sna->render.vertex_size); v[8] = v[4] = r->dst.x + op->dst.x; v[0] = v[4] + w; @@ -781,6 +786,7 @@ gen3_emit_composite_primitive_constant_identity_mask_no_offset(struct sna *sna, v = sna->render.vertices + sna->render.vertex_used; sna->render.vertex_used += 12; + assert(sna->render.vertex_used <= sna->render.vertex_size); v[8] = v[4] = r->dst.x; v[9] = r->dst.y; @@ -817,6 +823,7 @@ gen3_emit_composite_primitive_identity_source_mask(struct sna *sna, v = sna->render.vertices + sna->render.vertex_used; sna->render.vertex_used += 18; + assert(sna->render.vertex_used <= sna->render.vertex_size); v[0] = dst_x + w; v[1] = dst_y + h; @@ -862,6 +869,7 @@ gen3_emit_composite_primitive_affine_source_mask(struct sna *sna, v = sna->render.vertices + sna->render.vertex_used; sna->render.vertex_used += 18; + assert(sna->render.vertex_used <= sna->render.vertex_size); v[0] = dst_x + w; v[1] = dst_y + h; @@ -978,6 +986,7 @@ gen3_emit_composite_primitive_constant__sse2(struct sna *sna, v = sna->render.vertices + sna->render.vertex_used; sna->render.vertex_used += 6; + assert(sna->render.vertex_used <= sna->render.vertex_size); v[4] = v[2] = r->dst.x + op->dst.x; v[5] = r->dst.y + op->dst.y; @@ -993,10 +1002,10 @@ gen3_emit_composite_boxes_constant__sse2(const struct sna_composite_op *op, float *v) { do { - v[0] = box->x2; - v[3] = v[1] = box->y2; - v[4] = v[2] = box->x1; - v[5] = box->y1; + v[0] = box->x2 + op->dst.x; + v[3] = v[1] = box->y2 + op->dst.y; + v[4] = v[2] = box->x1 + op->dst.x; + v[5] = box->y1 + op->dst.y; box++; v += 6; @@ -1013,6 +1022,7 @@ gen3_emit_composite_primitive_identity_gradient__sse2(struct sna *sna, v = sna->render.vertices + sna->render.vertex_used; sna->render.vertex_used += 12; + assert(sna->render.vertex_used <= sna->render.vertex_size); x = r->dst.x + op->dst.x; y = r->dst.y + op->dst.y; @@ -1035,10 +1045,10 @@ gen3_emit_composite_boxes_identity_gradient__sse2(const struct sna_composite_op float *v) { do { - v[0] = box->x2; - v[5] = v[1] = box->y2; - v[8] = v[4] = box->x1; - v[9] = box->y1; + v[0] = box->x2 + op->dst.x; + v[5] = v[1] = box->y2 + op->dst.y; + v[8] = v[4] = box->x1 + op->dst.x; + v[9] = box->y1 + op->dst.y; v[2] = box->x2 + op->src.offset[0]; v[7] = v[3] = box->y2 + op->src.offset[1]; @@ -1067,6 +1077,7 @@ gen3_emit_composite_primitive_affine_gradient__sse2(struct sna *sna, v = sna->render.vertices + sna->render.vertex_used; sna->render.vertex_used += 12; + assert(sna->render.vertex_used <= sna->render.vertex_size); v[0] = dst_x + r->width; v[1] = dst_y + r->height; @@ -1095,22 +1106,22 @@ gen3_emit_composite_boxes_affine_gradient__sse2(const struct sna_composite_op *o const PictTransform *transform = op->src.transform; do { - v[0] = box->x2; - v[1] = box->y2; + v[0] = box->x2 + op->dst.x; + v[1] = box->y2 + op->dst.y; _sna_get_transformed_scaled(box->x2 + op->src.offset[0], box->y2 + op->src.offset[1], transform, op->src.scale, &v[2], &v[3]); - v[4] = box->x1; - v[5] = box->y2; + v[4] = box->x1 + op->dst.x; + v[5] = box->y2 + op->dst.y; _sna_get_transformed_scaled(box->x1 + op->src.offset[0], box->y2 + op->src.offset[1], transform, op->src.scale, &v[6], &v[7]); - v[8] = box->x1; - v[9] = box->y1; + v[8] = box->x1 + op->dst.x; + v[9] = box->y1 + op->dst.y; _sna_get_transformed_scaled(box->x1 + op->src.offset[0], box->y1 + op->src.offset[1], transform, op->src.scale, @@ -1132,6 +1143,7 @@ gen3_emit_composite_primitive_identity_source__sse2(struct sna *sna, v = sna->render.vertices + sna->render.vertex_used; sna->render.vertex_used += 12; + assert(sna->render.vertex_used <= sna->render.vertex_size); v[8] = v[4] = r->dst.x + op->dst.x; v[0] = v[4] + w; @@ -1179,6 +1191,7 @@ gen3_emit_composite_primitive_identity_source_no_offset__sse2(struct sna *sna, v = sna->render.vertices + sna->render.vertex_used; sna->render.vertex_used += 12; + assert(sna->render.vertex_used <= sna->render.vertex_size); v[8] = v[4] = r->dst.x; v[9] = r->dst.y; @@ -1227,8 +1240,12 @@ gen3_emit_composite_primitive_affine_source__sse2(struct sna *sna, int src_y = r->src.y + (int)op->src.offset[1]; float *v; + DBG(("%s: src=(%d, %d), dst=(%d, %d), size=%dx%d\n", + __FUNCTION__, src_x, src_y, dst_x, dst_y, r->width, r->height)); + v = sna->render.vertices + sna->render.vertex_used; sna->render.vertex_used += 12; + assert(sna->render.vertex_used <= sna->render.vertex_size); v[0] = dst_x + r->width; v[5] = v[1] = dst_y + r->height; @@ -1256,10 +1273,13 @@ gen3_emit_composite_boxes_affine_source__sse2(const struct sna_composite_op *op, const PictTransform *transform = op->src.transform; do { - v[0] = box->x2; - v[5] = v[1] = box->y2; - v[8] = v[4] = box->x1; - v[9] = box->y1; + DBG(("%s: box=(%d, %d), (%d, %d), src.offset=(%d, %d)\n", + __FUNCTION__, box->x1, box->y1, box->x2, box->y2, op->src.offset[0], op->src.offset[1])); + + v[0] = box->x2 + op->dst.x; + v[5] = v[1] = box->y2 + op->dst.y; + v[8] = v[4] = box->x1 + op->dst.x; + v[9] = box->y1 + op->dst.y; _sna_get_transformed_scaled(box->x2 + op->src.offset[0], box->y2 + op->src.offset[1], @@ -1292,6 +1312,7 @@ gen3_emit_composite_primitive_constant_identity_mask__sse2(struct sna *sna, v = sna->render.vertices + sna->render.vertex_used; sna->render.vertex_used += 12; + assert(sna->render.vertex_used <= sna->render.vertex_size); v[8] = v[4] = r->dst.x + op->dst.x; v[0] = v[4] + w; @@ -1317,6 +1338,7 @@ gen3_emit_composite_primitive_constant_identity_mask_no_offset__sse2(struct sna v = sna->render.vertices + sna->render.vertex_used; sna->render.vertex_used += 12; + assert(sna->render.vertex_used <= sna->render.vertex_size); v[8] = v[4] = r->dst.x; v[9] = r->dst.y; @@ -1353,6 +1375,7 @@ gen3_emit_composite_primitive_identity_source_mask__sse2(struct sna *sna, v = sna->render.vertices + sna->render.vertex_used; sna->render.vertex_used += 18; + assert(sna->render.vertex_used <= sna->render.vertex_size); v[0] = dst_x + w; v[1] = dst_y + h; @@ -1398,6 +1421,7 @@ gen3_emit_composite_primitive_affine_source_mask__sse2(struct sna *sna, v = sna->render.vertices + sna->render.vertex_used; sna->render.vertex_used += 18; + assert(sna->render.vertex_used <= sna->render.vertex_size); v[0] = dst_x + w; v[1] = dst_y + h; @@ -2233,6 +2257,7 @@ static void gen3_vertex_flush(struct sna *sna) static int gen3_vertex_finish(struct sna *sna) { struct kgem_bo *bo; + unsigned hint, size; DBG(("%s: used=%d/%d, vbo active? %d\n", __FUNCTION__, sna->render.vertex_used, sna->render.vertex_size, @@ -2243,6 +2268,7 @@ static int gen3_vertex_finish(struct sna *sna) sna_vertex_wait__locked(&sna->render); + hint = CREATE_GTT_MAP; bo = sna->render.vbo; if (bo) { DBG(("%s: reloc = %d\n", __FUNCTION__, @@ -2251,7 +2277,7 @@ static int gen3_vertex_finish(struct sna *sna) if (sna->render.vertex_reloc[0]) { sna->kgem.batch[sna->render.vertex_reloc[0]] = kgem_add_reloc(&sna->kgem, sna->render.vertex_reloc[0], - bo, I915_GEM_DOMAIN_VERTEX << 16, 0); + bo, I915_GEM_DOMAIN_VERTEX << 16 | KGEM_RELOC_FENCED, 0); sna->render.vertex_reloc[0] = 0; } @@ -2260,17 +2286,29 @@ static int gen3_vertex_finish(struct sna *sna) sna->render.vbo = NULL; kgem_bo_destroy(&sna->kgem, bo); + hint |= CREATE_CACHED | CREATE_NO_THROTTLE; } + size = 256*1024; sna->render.vertices = NULL; - sna->render.vbo = kgem_create_linear(&sna->kgem, - 256*1024, CREATE_GTT_MAP); - if (sna->render.vbo) + sna->render.vbo = kgem_create_linear(&sna->kgem, size, hint); + while (sna->render.vbo == NULL && size > sizeof(sna->render.vertex_data)) { + size /= 2; + sna->render.vbo = kgem_create_linear(&sna->kgem, size, hint); + } + if (sna->render.vbo == NULL) + sna->render.vbo = kgem_create_linear(&sna->kgem, + 256*1024, CREATE_GTT_MAP); + if (sna->render.vbo && + kgem_check_bo(&sna->kgem, sna->render.vbo, NULL)) sna->render.vertices = kgem_bo_map(&sna->kgem, sna->render.vbo); if (sna->render.vertices == NULL) { - if (sna->render.vbo) + if (sna->render.vbo) { kgem_bo_destroy(&sna->kgem, sna->render.vbo); - sna->render.vbo = NULL; + sna->render.vbo = NULL; + } + sna->render.vertices = sna->render.vertex_data; + sna->render.vertex_size = ARRAY_SIZE(sna->render.vertex_data); return 0; } assert(sna->render.vbo->snoop == false); @@ -2280,8 +2318,14 @@ static int gen3_vertex_finish(struct sna *sna) sna->render.vertex_data, sizeof(float)*sna->render.vertex_used); } - sna->render.vertex_size = 64 * 1024 - 1; - return sna->render.vertex_size - sna->render.vertex_used; + + size = __kgem_bo_size(sna->render.vbo)/4; + if (size >= UINT16_MAX) + size = UINT16_MAX - 1; + assert(size > sna->render.vertex_used); + + sna->render.vertex_size = size; + return size - sna->render.vertex_used; } static void gen3_vertex_close(struct sna *sna) @@ -2345,7 +2389,7 @@ static void gen3_vertex_close(struct sna *sna) DBG(("%s: reloc = %d\n", __FUNCTION__, sna->render.vertex_reloc[0])); sna->kgem.batch[sna->render.vertex_reloc[0]] = kgem_add_reloc(&sna->kgem, sna->render.vertex_reloc[0], - bo, I915_GEM_DOMAIN_VERTEX << 16, delta); + bo, I915_GEM_DOMAIN_VERTEX << 16 | KGEM_RELOC_FENCED, delta); sna->render.vertex_reloc[0] = 0; if (sna->render.vbo == NULL) { @@ -2580,6 +2624,7 @@ gen3_render_composite_boxes(struct sna *sna, v = sna->render.vertices + sna->render.vertex_used; sna->render.vertex_used += nbox_this_time * op->floats_per_rect; + assert(sna->render.vertex_used <= sna->render.vertex_size); op->emit_boxes(op, box, nbox_this_time, v); box += nbox_this_time; @@ -2604,6 +2649,7 @@ gen3_render_composite_boxes__thread(struct sna *sna, v = sna->render.vertices + sna->render.vertex_used; sna->render.vertex_used += nbox_this_time * op->floats_per_rect; + assert(sna->render.vertex_used <= sna->render.vertex_size); sna_vertex_acquire__locked(&sna->render); sna_vertex_unlock(&sna->render); @@ -3065,7 +3111,7 @@ gen3_composite_picture(struct sna *sna, if (sna_picture_is_clear(picture, x, y, w, h, &color)) { DBG(("%s: clear drawable [%08x]\n", __FUNCTION__, color)); - return gen3_init_solid(channel, color_convert(color, picture->format, PICT_a8r8g8b8)); + return gen3_init_solid(channel, solid_color(picture->format, color)); } if (!gen3_check_repeat(picture)) @@ -3097,12 +3143,12 @@ gen3_composite_picture(struct sna *sna, if (channel->repeat || (x >= 0 && y >= 0 && - x + w < pixmap->drawable.width && - y + h < pixmap->drawable.height)) { + x + w <= pixmap->drawable.width && + y + h <= pixmap->drawable.height)) { struct sna_pixmap *priv = sna_pixmap(pixmap); if (priv && priv->clear) { DBG(("%s: converting large pixmap source into solid [%08x]\n", __FUNCTION__, priv->clear_color)); - return gen3_init_solid(channel, priv->clear_color); + return gen3_init_solid(channel, solid_color(picture->format, priv->clear_color)); } } } else { @@ -3182,7 +3228,9 @@ gen3_composite_set_target(struct sna *sna, } else sna_render_picture_extents(dst, &box); - hint = PREFER_GPU | FORCE_GPU | RENDER_GPU; + hint = PREFER_GPU | RENDER_GPU; + if (!need_tiling(sna, op->dst.width, op->dst.height)) + hint |= FORCE_GPU; if (!partial) { hint |= IGNORE_DAMAGE; if (w == op->dst.width && h == op->dst.height) @@ -3645,8 +3693,11 @@ gen3_render_composite(struct sna *sna, } } } - DBG(("%s: final src/mask type=%d/%d, affine=%d/%d\n", __FUNCTION__, + DBG(("%s: final src/mask type=%d/%d [constant? %d/%d], transform? %d/%d, affine=%d/%d\n", __FUNCTION__, tmp->src.u.gen3.type, tmp->mask.u.gen3.type, + is_constant_ps(tmp->src.u.gen3.type), + is_constant_ps(tmp->mask.u.gen3.type), + !!tmp->src.transform, !!tmp->mask.transform, tmp->src.is_affine, tmp->mask.is_affine)); tmp->prim_emit = gen3_emit_composite_primitive; @@ -3862,6 +3913,7 @@ gen3_emit_composite_spans_primitive_zero(struct sna *sna, { float *v = sna->render.vertices + sna->render.vertex_used; sna->render.vertex_used += 6; + assert(sna->render.vertex_used <= sna->render.vertex_size); v[0] = op->base.dst.x + box->x2; v[1] = op->base.dst.y + box->y2; @@ -3901,6 +3953,7 @@ gen3_emit_composite_spans_primitive_zero_no_offset(struct sna *sna, { float *v = sna->render.vertices + sna->render.vertex_used; sna->render.vertex_used += 6; + assert(sna->render.vertex_used <= sna->render.vertex_size); v[0] = box->x2; v[3] = v[1] = box->y2; @@ -3932,6 +3985,7 @@ gen3_emit_composite_spans_primitive_constant(struct sna *sna, { float *v = sna->render.vertices + sna->render.vertex_used; sna->render.vertex_used += 9; + assert(sna->render.vertex_used <= sna->render.vertex_size); v[0] = op->base.dst.x + box->x2; v[6] = v[3] = op->base.dst.x + box->x1; @@ -3966,6 +4020,7 @@ gen3_emit_composite_spans_primitive_constant_no_offset(struct sna *sna, { float *v = sna->render.vertices + sna->render.vertex_used; sna->render.vertex_used += 9; + assert(sna->render.vertex_used <= sna->render.vertex_size); v[0] = box->x2; v[6] = v[3] = box->x1; @@ -3999,6 +4054,7 @@ gen3_emit_composite_spans_primitive_identity_source(struct sna *sna, { float *v = sna->render.vertices + sna->render.vertex_used; sna->render.vertex_used += 15; + assert(sna->render.vertex_used <= sna->render.vertex_size); v[0] = op->base.dst.x + box->x2; v[1] = op->base.dst.y + box->y2; @@ -4060,6 +4116,7 @@ gen3_emit_composite_spans_primitive_affine_source(struct sna *sna, v = sna->render.vertices + sna->render.vertex_used; sna->render.vertex_used += 15; + assert(sna->render.vertex_used <= sna->render.vertex_size); v[0] = op->base.dst.x + box->x2; v[6] = v[1] = op->base.dst.y + box->y2; @@ -4125,6 +4182,7 @@ gen3_emit_composite_spans_primitive_identity_gradient(struct sna *sna, { float *v = sna->render.vertices + sna->render.vertex_used; sna->render.vertex_used += 15; + assert(sna->render.vertex_used <= sna->render.vertex_size); v[0] = op->base.dst.x + box->x2; v[1] = op->base.dst.y + box->y2; @@ -4184,6 +4242,7 @@ gen3_emit_composite_spans_primitive_constant__sse2(struct sna *sna, { float *v = sna->render.vertices + sna->render.vertex_used; sna->render.vertex_used += 9; + assert(sna->render.vertex_used <= sna->render.vertex_size); v[0] = op->base.dst.x + box->x2; v[6] = v[3] = op->base.dst.x + box->x1; @@ -4229,6 +4288,7 @@ gen3_render_composite_spans_constant_box__sse2(struct sna *sna, v = sna->render.vertices + sna->render.vertex_used; sna->render.vertex_used += 9; + assert(sna->render.vertex_used <= sna->render.vertex_size); v[0] = box->x2; v[6] = v[3] = box->x1; @@ -4259,6 +4319,7 @@ gen3_render_composite_spans_constant_thread__sse2__boxes(struct sna *sna, v = sna->render.vertices + sna->render.vertex_used; sna->render.vertex_used += nbox_this_time * 9; + assert(sna->render.vertex_used <= sna->render.vertex_size); sna_vertex_acquire__locked(&sna->render); sna_vertex_unlock(&sna->render); @@ -4287,6 +4348,7 @@ gen3_emit_composite_spans_primitive_constant__sse2__no_offset(struct sna *sna, { float *v = sna->render.vertices + sna->render.vertex_used; sna->render.vertex_used += 9; + assert(sna->render.vertex_used <= sna->render.vertex_size); v[0] = box->x2; v[6] = v[3] = box->x1; @@ -4320,6 +4382,7 @@ gen3_emit_composite_spans_primitive_identity_source__sse2(struct sna *sna, { float *v = sna->render.vertices + sna->render.vertex_used; sna->render.vertex_used += 15; + assert(sna->render.vertex_used <= sna->render.vertex_size); v[0] = op->base.dst.x + box->x2; v[1] = op->base.dst.y + box->y2; @@ -4380,6 +4443,7 @@ gen3_emit_composite_spans_primitive_affine_source__sse2(struct sna *sna, v = sna->render.vertices + sna->render.vertex_used; sna->render.vertex_used += 15; + assert(sna->render.vertex_used <= sna->render.vertex_size); v[0] = op->base.dst.x + box->x2; v[6] = v[1] = op->base.dst.y + box->y2; @@ -4445,6 +4509,7 @@ gen3_emit_composite_spans_primitive_identity_gradient__sse2(struct sna *sna, { float *v = sna->render.vertices + sna->render.vertex_used; sna->render.vertex_used += 15; + assert(sna->render.vertex_used <= sna->render.vertex_size); v[0] = op->base.dst.x + box->x2; v[1] = op->base.dst.y + box->y2; @@ -4504,6 +4569,7 @@ gen3_emit_composite_spans_primitive_affine_gradient__sse2(struct sna *sna, PictTransform *transform = op->base.src.transform; float *v = sna->render.vertices + sna->render.vertex_used; sna->render.vertex_used += 15; + assert(sna->render.vertex_used <= sna->render.vertex_size); v[0] = op->base.dst.x + box->x2; v[1] = op->base.dst.y + box->y2; @@ -4577,6 +4643,7 @@ gen3_emit_composite_spans_primitive_affine_gradient(struct sna *sna, PictTransform *transform = op->base.src.transform; float *v = sna->render.vertices + sna->render.vertex_used; sna->render.vertex_used += 15; + assert(sna->render.vertex_used <= sna->render.vertex_size); v[0] = op->base.dst.x + box->x2; v[1] = op->base.dst.y + box->y2; @@ -4676,6 +4743,7 @@ gen3_render_composite_spans_constant_box(struct sna *sna, v = sna->render.vertices + sna->render.vertex_used; sna->render.vertex_used += 9; + assert(sna->render.vertex_used <= sna->render.vertex_size); v[0] = box->x2; v[6] = v[3] = box->x1; @@ -4706,6 +4774,7 @@ gen3_render_composite_spans_constant_thread_boxes(struct sna *sna, v = sna->render.vertices + sna->render.vertex_used; sna->render.vertex_used += nbox_this_time * 9; + assert(sna->render.vertex_used <= sna->render.vertex_size); sna_vertex_acquire__locked(&sna->render); sna_vertex_unlock(&sna->render); @@ -4795,6 +4864,7 @@ gen3_render_composite_spans_boxes__thread(struct sna *sna, v = sna->render.vertices + sna->render.vertex_used; sna->render.vertex_used += nbox_this_time * op->base.floats_per_rect; + assert(sna->render.vertex_used <= sna->render.vertex_size); sna_vertex_acquire__locked(&sna->render); sna_vertex_unlock(&sna->render); @@ -5436,17 +5506,7 @@ gen3_render_video(struct sna *sna, pix_yoff = -dstRegion->extents.y1; copy = 1; } else { - /* Set up the offset for translating from the given region - * (in screen coordinates) to the backing pixmap. - */ -#ifdef COMPOSITE - pix_xoff = -pixmap->screen_x + pixmap->drawable.x; - pix_yoff = -pixmap->screen_y + pixmap->drawable.y; -#else - pix_xoff = 0; - pix_yoff = 0; -#endif - + pix_xoff = pix_yoff = 0; dst_width = pixmap->drawable.width; dst_height = pixmap->drawable.height; } @@ -5502,16 +5562,9 @@ gen3_render_video(struct sna *sna, } while (nbox); if (copy) { -#ifdef COMPOSITE - pix_xoff = -pixmap->screen_x + pixmap->drawable.x; - pix_yoff = -pixmap->screen_y + pixmap->drawable.y; -#else - pix_xoff = 0; - pix_yoff = 0; -#endif sna_blt_copy_boxes(sna, GXcopy, dst_bo, -dstRegion->extents.x1, -dstRegion->extents.y1, - priv->gpu_bo, pix_xoff, pix_yoff, + priv->gpu_bo, 0, 0, pixmap->drawable.bitsPerPixel, region_rects(dstRegion), region_num_rects(dstRegion)); @@ -5519,21 +5572,8 @@ gen3_render_video(struct sna *sna, kgem_bo_destroy(&sna->kgem, dst_bo); } - if (!DAMAGE_IS_ALL(priv->gpu_damage)) { - if ((pix_xoff | pix_yoff) == 0) { - sna_damage_add(&priv->gpu_damage, dstRegion); - sna_damage_subtract(&priv->cpu_damage, dstRegion); - } else { - sna_damage_add_boxes(&priv->gpu_damage, - region_rects(dstRegion), - region_num_rects(dstRegion), - pix_xoff, pix_yoff); - sna_damage_subtract_boxes(&priv->cpu_damage, - region_rects(dstRegion), - region_num_rects(dstRegion), - pix_xoff, pix_yoff); - } - } + if (!DAMAGE_IS_ALL(priv->gpu_damage)) + sna_damage_add(&priv->gpu_damage, dstRegion); return true; } diff --git a/src/sna/gen4_render.c b/src/sna/gen4_render.c index 6c2d3808..72a98aee 100644 --- a/src/sna/gen4_render.c +++ b/src/sna/gen4_render.c @@ -1405,8 +1405,8 @@ gen4_render_video(struct sna *sna, int src_height = frame->src.y2 - frame->src.y1; float src_offset_x, src_offset_y; float src_scale_x, src_scale_y; - int nbox, pix_xoff, pix_yoff; const BoxRec *box; + int nbox; DBG(("%s: %dx%d -> %dx%d\n", __FUNCTION__, src_width, src_height, dst_width, dst_height)); @@ -1445,17 +1445,6 @@ gen4_render_video(struct sna *sna, gen4_align_vertex(sna, &tmp); gen4_video_bind_surfaces(sna, &tmp); - /* Set up the offset for translating from the given region (in screen - * coordinates) to the backing pixmap. - */ -#ifdef COMPOSITE - pix_xoff = -pixmap->screen_x + pixmap->drawable.x; - pix_yoff = -pixmap->screen_y + pixmap->drawable.y; -#else - pix_xoff = 0; - pix_yoff = 0; -#endif - src_scale_x = (float)src_width / dst_width / frame->width; src_offset_x = (float)frame->src.x1 / frame->width - dstRegion->extents.x1 * src_scale_x; @@ -1473,34 +1462,26 @@ gen4_render_video(struct sna *sna, nbox -= n; do { - BoxRec r; - - r.x1 = box->x1 + pix_xoff; - r.x2 = box->x2 + pix_xoff; - r.y1 = box->y1 + pix_yoff; - r.y2 = box->y2 + pix_yoff; - - OUT_VERTEX(r.x2, r.y2); + OUT_VERTEX(box->x2, box->y2); OUT_VERTEX_F(box->x2 * src_scale_x + src_offset_x); OUT_VERTEX_F(box->y2 * src_scale_y + src_offset_y); - OUT_VERTEX(r.x1, r.y2); + OUT_VERTEX(box->x1, box->y2); OUT_VERTEX_F(box->x1 * src_scale_x + src_offset_x); OUT_VERTEX_F(box->y2 * src_scale_y + src_offset_y); - OUT_VERTEX(r.x1, r.y1); + OUT_VERTEX(box->x1, box->y1); OUT_VERTEX_F(box->x1 * src_scale_x + src_offset_x); OUT_VERTEX_F(box->y1 * src_scale_y + src_offset_y); - if (!DAMAGE_IS_ALL(priv->gpu_damage)) { - sna_damage_add_box(&priv->gpu_damage, &r); - sna_damage_subtract_box(&priv->cpu_damage, &r); - } box++; } while (--n); } while (nbox); gen4_vertex_flush(sna); + if (!DAMAGE_IS_ALL(priv->gpu_damage)) + sna_damage_add(&priv->gpu_damage, dstRegion); + return true; } @@ -1585,12 +1566,14 @@ gen4_composite_picture(struct sna *sna, if (channel->repeat && (x >= 0 && y >= 0 && - x + w < pixmap->drawable.width && - y + h < pixmap->drawable.height)) { + x + w <= pixmap->drawable.width && + y + h <= pixmap->drawable.height)) { struct sna_pixmap *priv = sna_pixmap(pixmap); if (priv && priv->clear) { DBG(("%s: converting large pixmap source into solid [%08x]\n", __FUNCTION__, priv->clear_color)); - return gen4_channel_init_solid(sna, channel, priv->clear_color); + return gen4_channel_init_solid(sna, channel, + solid_color(picture->format, + priv->clear_color)); } } } else @@ -1664,7 +1647,9 @@ gen4_composite_set_target(struct sna *sna, } else sna_render_picture_extents(dst, &box); - hint = PREFER_GPU | FORCE_GPU | RENDER_GPU; + hint = PREFER_GPU | RENDER_GPU; + if (!need_tiling(sna, op->dst.width, op->dst.height)) + hint |= FORCE_GPU; if (!partial) { hint |= IGNORE_DAMAGE; if (w == op->dst.width && h == op->dst.height) @@ -2738,6 +2723,20 @@ gen4_render_fill_boxes(struct sna *sna, tmp.dst.format = format; tmp.dst.bo = dst_bo; + sna_render_composite_redirect_init(&tmp); + if (too_large(dst->width, dst->height)) { + BoxRec extents; + + boxes_extents(box, n, &extents); + if (!sna_render_composite_redirect(sna, &tmp, + extents.x1, extents.y1, + extents.x2 - extents.x1, + extents.y2 - extents.y1, + n > 1)) + return sna_tiling_fill_boxes(sna, op, format, color, + dst, dst_bo, box, n); + } + gen4_channel_init_solid(sna, &tmp.src, pixel); tmp.is_affine = true; @@ -2748,8 +2747,10 @@ gen4_render_fill_boxes(struct sna *sna, if (!kgem_check_bo(&sna->kgem, dst_bo, NULL)) { kgem_submit(&sna->kgem); - if (!kgem_check_bo(&sna->kgem, dst_bo, NULL)) + if (!kgem_check_bo(&sna->kgem, dst_bo, NULL)) { + kgem_bo_destroy(&sna->kgem, tmp.src.bo); return false; + } } gen4_align_vertex(sna, &tmp); @@ -2765,6 +2766,7 @@ gen4_render_fill_boxes(struct sna *sna, gen4_vertex_flush(sna); kgem_bo_destroy(&sna->kgem, tmp.src.bo); + sna_render_composite_redirect_done(sna, &tmp); return true; } diff --git a/src/sna/gen5_render.c b/src/sna/gen5_render.c index 37cf1ff9..fb3e79bf 100644 --- a/src/sna/gen5_render.c +++ b/src/sna/gen5_render.c @@ -1355,8 +1355,8 @@ gen5_render_video(struct sna *sna, int src_height = frame->src.y2 - frame->src.y1; float src_offset_x, src_offset_y; float src_scale_x, src_scale_y; - int nbox, pix_xoff, pix_yoff; const BoxRec *box; + int nbox; DBG(("%s: %dx%d -> %dx%d\n", __FUNCTION__, src_width, src_height, dst_width, dst_height)); @@ -1395,17 +1395,6 @@ gen5_render_video(struct sna *sna, gen5_align_vertex(sna, &tmp); gen5_video_bind_surfaces(sna, &tmp); - /* Set up the offset for translating from the given region (in screen - * coordinates) to the backing pixmap. - */ -#ifdef COMPOSITE - pix_xoff = -pixmap->screen_x + pixmap->drawable.x; - pix_yoff = -pixmap->screen_y + pixmap->drawable.y; -#else - pix_xoff = 0; - pix_yoff = 0; -#endif - src_scale_x = (float)src_width / dst_width / frame->width; src_offset_x = (float)frame->src.x1 / frame->width - dstRegion->extents.x1 * src_scale_x; @@ -1415,35 +1404,27 @@ gen5_render_video(struct sna *sna, box = region_rects(dstRegion); nbox = region_num_rects(dstRegion); while (nbox--) { - BoxRec r; - - r.x1 = box->x1 + pix_xoff; - r.x2 = box->x2 + pix_xoff; - r.y1 = box->y1 + pix_yoff; - r.y2 = box->y2 + pix_yoff; - gen5_get_rectangles(sna, &tmp, 1, gen5_video_bind_surfaces); - OUT_VERTEX(r.x2, r.y2); + OUT_VERTEX(box->x2, box->y2); OUT_VERTEX_F(box->x2 * src_scale_x + src_offset_x); OUT_VERTEX_F(box->y2 * src_scale_y + src_offset_y); - OUT_VERTEX(r.x1, r.y2); + OUT_VERTEX(box->x1, box->y2); OUT_VERTEX_F(box->x1 * src_scale_x + src_offset_x); OUT_VERTEX_F(box->y2 * src_scale_y + src_offset_y); - OUT_VERTEX(r.x1, r.y1); + OUT_VERTEX(box->x1, box->y1); OUT_VERTEX_F(box->x1 * src_scale_x + src_offset_x); OUT_VERTEX_F(box->y1 * src_scale_y + src_offset_y); - if (!DAMAGE_IS_ALL(priv->gpu_damage)) { - sna_damage_add_box(&priv->gpu_damage, &r); - sna_damage_subtract_box(&priv->cpu_damage, &r); - } box++; } - gen4_vertex_flush(sna); + + if (!DAMAGE_IS_ALL(priv->gpu_damage)) + sna_damage_add(&priv->gpu_damage, dstRegion); + return true; } @@ -1524,12 +1505,12 @@ gen5_composite_picture(struct sna *sna, if (channel->repeat || (x >= 0 && y >= 0 && - x + w < pixmap->drawable.width && - y + h < pixmap->drawable.height)) { + x + w <= pixmap->drawable.width && + y + h <= pixmap->drawable.height)) { struct sna_pixmap *priv = sna_pixmap(pixmap); if (priv && priv->clear) { DBG(("%s: converting large pixmap source into solid [%08x]\n", __FUNCTION__, priv->clear_color)); - return gen4_channel_init_solid(sna, channel, priv->clear_color); + return gen4_channel_init_solid(sna, channel, solid_color(picture->format, priv->clear_color)); } } } else @@ -1618,7 +1599,9 @@ gen5_composite_set_target(struct sna *sna, } else sna_render_picture_extents(dst, &box); - hint = PREFER_GPU | FORCE_GPU | RENDER_GPU; + hint = PREFER_GPU | RENDER_GPU; + if (!need_tiling(sna, op->dst.width, op->dst.height)) + hint |= FORCE_GPU; if (!partial) { hint |= IGNORE_DAMAGE; if (w == op->dst.width && h == op->dst.height) @@ -2734,6 +2717,19 @@ gen5_render_fill_boxes(struct sna *sna, tmp.dst.format = format; tmp.dst.bo = dst_bo; + if (too_large(dst->width, dst->height)) { + BoxRec extents; + + boxes_extents(box, n, &extents); + if (!sna_render_composite_redirect(sna, &tmp, + extents.x1, extents.y1, + extents.x2 - extents.x1, + extents.y2 - extents.y1, + n > 1)) + return sna_tiling_fill_boxes(sna, op, format, color, + dst, dst_bo, box, n); + } + tmp.src.bo = sna_render_get_solid(sna, pixel); tmp.src.filter = SAMPLER_FILTER_NEAREST; tmp.src.repeat = SAMPLER_EXTEND_REPEAT; @@ -2780,6 +2776,7 @@ gen5_render_fill_boxes(struct sna *sna, gen4_vertex_flush(sna); kgem_bo_destroy(&sna->kgem, tmp.src.bo); + sna_render_composite_redirect_done(sna, &tmp); return true; } diff --git a/src/sna/gen6_common.h b/src/sna/gen6_common.h index 6668620b..b53ec0c9 100644 --- a/src/sna/gen6_common.h +++ b/src/sna/gen6_common.h @@ -30,8 +30,8 @@ #include "sna.h" -#define NO_RING_SWITCH 0 -#define PREFER_RENDER 0 +#define NO_RING_SWITCH(sna) (!(sna)->kgem.has_semaphores) +#define PREFER_RENDER 0 /* -1 -> BLT, 1 -> RENDER */ static inline bool is_uncached(struct sna *sna, struct kgem_bo *bo) @@ -46,40 +46,28 @@ inline static bool can_switch_to_blt(struct sna *sna, if (sna->kgem.ring != KGEM_RENDER) return true; - if (NO_RING_SWITCH) - return false; - - if (!sna->kgem.has_semaphores) - return false; - - if (flags & COPY_LAST) - return true; - if (bo && RQ_IS_BLT(bo->rq)) return true; - if (sna->render_state.gt < 2) - return true; + if (bo && bo->tiling == I915_TILING_Y) + return false; - return kgem_ring_is_idle(&sna->kgem, KGEM_BLT); -} + if (bo && !kgem_bo_can_blt(&sna->kgem, bo)) + return false; -inline static bool can_switch_to_render(struct sna *sna, - struct kgem_bo *bo) -{ - if (sna->kgem.ring == KGEM_RENDER) + if (sna->render_state.gt < 2) return true; - if (NO_RING_SWITCH) + if (bo && RQ_IS_RENDER(bo->rq)) return false; - if (!sna->kgem.has_semaphores) + if (NO_RING_SWITCH(sna)) return false; - if (bo && !RQ_IS_BLT(bo->rq) && !is_uncached(sna, bo)) + if (flags & COPY_LAST) return true; - return !kgem_ring_is_idle(&sna->kgem, KGEM_RENDER); + return kgem_ring_is_idle(&sna->kgem, KGEM_BLT); } static inline bool untiled_tlb_miss(struct kgem_bo *bo) @@ -90,57 +78,95 @@ static inline bool untiled_tlb_miss(struct kgem_bo *bo) return bo->tiling == I915_TILING_NONE && bo->pitch >= 4096; } -static int prefer_blt_bo(struct sna *sna, struct kgem_bo *bo) +static int prefer_blt_bo(struct sna *sna, + struct kgem_bo *src, + struct kgem_bo *dst) { + assert(dst != NULL); + if (PREFER_RENDER) return PREFER_RENDER < 0; - if (bo->rq) - return RQ_IS_BLT(bo->rq); + if (dst->rq) + return RQ_IS_BLT(dst->rq); if (sna->flags & SNA_POWERSAVE) return true; - return bo->tiling == I915_TILING_NONE || is_uncached(sna, bo); -} + if (src) { + if (sna->render_state.gt > 1) + return false; -inline static bool force_blt_ring(struct sna *sna) -{ - if (sna->flags & SNA_POWERSAVE) + if (src->rq) + return RQ_IS_BLT(src->rq); + + if (src->tiling == I915_TILING_Y) + return false; + } else { + if (sna->render_state.gt > 2) + return false; + } + + if (sna->render_state.gt < 2) return true; + return dst->tiling == I915_TILING_NONE || is_uncached(sna, dst); +} + +inline static bool force_blt_ring(struct sna *sna, struct kgem_bo *bo) +{ if (sna->kgem.mode == KGEM_RENDER) return false; + if (NO_RING_SWITCH(sna)) + return sna->kgem.ring == KGEM_BLT; + + if (bo->tiling == I915_TILING_Y) + return false; + + if (sna->flags & SNA_POWERSAVE) + return true; + if (sna->render_state.gt < 2) return true; return false; } -inline static bool prefer_blt_ring(struct sna *sna, - struct kgem_bo *bo, - unsigned flags) +nonnull inline static bool +prefer_blt_ring(struct sna *sna, struct kgem_bo *bo, unsigned flags) { if (PREFER_RENDER) return PREFER_RENDER < 0; - assert(!force_blt_ring(sna)); - assert(!kgem_bo_is_render(bo)); + assert(!force_blt_ring(sna, bo)); + assert(!kgem_bo_is_render(bo) || NO_RING_SWITCH(sna)); + + if (kgem_bo_is_blt(bo)) + return true; return can_switch_to_blt(sna, bo, flags); } -inline static bool prefer_render_ring(struct sna *sna, - struct kgem_bo *bo) +nonnull inline static bool +prefer_render_ring(struct sna *sna, struct kgem_bo *bo) { + if (sna->kgem.ring == KGEM_RENDER) + return true; + + if (sna->kgem.ring != KGEM_NONE && NO_RING_SWITCH(sna)) + return false; + + if (kgem_bo_is_render(bo)) + return true; + if (sna->flags & SNA_POWERSAVE) return false; - if (sna->render_state.gt < 2) - return false; + if (!prefer_blt_bo(sna, NULL, bo)) + return true; - return can_switch_to_render(sna, bo); + return !kgem_ring_is_idle(&sna->kgem, KGEM_RENDER); } inline static bool @@ -153,25 +179,20 @@ prefer_blt_composite(struct sna *sna, struct sna_composite_op *tmp) untiled_tlb_miss(tmp->src.bo)) return true; - if (force_blt_ring(sna)) + if (force_blt_ring(sna, tmp->dst.bo)) return true; - if (kgem_bo_is_render(tmp->dst.bo) || - kgem_bo_is_render(tmp->src.bo)) - return false; - if (prefer_render_ring(sna, tmp->dst.bo)) return false; if (!prefer_blt_ring(sna, tmp->dst.bo, 0)) return false; - return prefer_blt_bo(sna, tmp->dst.bo) || prefer_blt_bo(sna, tmp->src.bo); + return prefer_blt_bo(sna, tmp->src.bo, tmp->dst.bo); } -static inline bool prefer_blt_fill(struct sna *sna, - struct kgem_bo *bo, - unsigned flags) +nonnull static inline bool +prefer_blt_fill(struct sna *sna, struct kgem_bo *bo, unsigned flags) { if (PREFER_RENDER) return PREFER_RENDER < 0; @@ -179,24 +200,21 @@ static inline bool prefer_blt_fill(struct sna *sna, if (untiled_tlb_miss(bo)) return true; - if (force_blt_ring(sna)) + if (force_blt_ring(sna, bo)) return true; if ((flags & (FILL_POINTS | FILL_SPANS)) == 0) { - if (kgem_bo_is_render(bo)) - return false; - if (prefer_render_ring(sna, bo)) return false; if (!prefer_blt_ring(sna, bo, 0)) return false; } else { - if (can_switch_to_blt(sna, bo, 0)) + if (can_switch_to_blt(sna, bo, COPY_LAST)) return true; } - return prefer_blt_bo(sna, bo); + return prefer_blt_bo(sna, NULL, bo); } void gen6_render_context_switch(struct kgem *kgem, int new_mode); diff --git a/src/sna/gen6_render.c b/src/sna/gen6_render.c index 25044685..6b69f216 100644 --- a/src/sna/gen6_render.c +++ b/src/sna/gen6_render.c @@ -1633,9 +1633,9 @@ gen6_render_video(struct sna *sna, int src_height = frame->src.y2 - frame->src.y1; float src_offset_x, src_offset_y; float src_scale_x, src_scale_y; - int nbox, pix_xoff, pix_yoff; unsigned filter; const BoxRec *box; + int nbox; DBG(("%s: src=(%d, %d), dst=(%d, %d), %dx[(%d, %d), (%d, %d)...]\n", __FUNCTION__, @@ -1686,17 +1686,6 @@ gen6_render_video(struct sna *sna, gen6_align_vertex(sna, &tmp); gen6_emit_video_state(sna, &tmp); - /* Set up the offset for translating from the given region (in screen - * coordinates) to the backing pixmap. - */ -#ifdef COMPOSITE - pix_xoff = -pixmap->screen_x + pixmap->drawable.x; - pix_yoff = -pixmap->screen_y + pixmap->drawable.y; -#else - pix_xoff = 0; - pix_yoff = 0; -#endif - src_scale_x = (float)src_width / dst_width / frame->width; src_offset_x = (float)frame->src.x1 / frame->width - dstRegion->extents.x1 * src_scale_x; @@ -1706,35 +1695,27 @@ gen6_render_video(struct sna *sna, box = region_rects(dstRegion); nbox = region_num_rects(dstRegion); while (nbox--) { - BoxRec r; - - r.x1 = box->x1 + pix_xoff; - r.x2 = box->x2 + pix_xoff; - r.y1 = box->y1 + pix_yoff; - r.y2 = box->y2 + pix_yoff; - gen6_get_rectangles(sna, &tmp, 1, gen6_emit_video_state); - OUT_VERTEX(r.x2, r.y2); + OUT_VERTEX(box->x2, box->y2); OUT_VERTEX_F(box->x2 * src_scale_x + src_offset_x); OUT_VERTEX_F(box->y2 * src_scale_y + src_offset_y); - OUT_VERTEX(r.x1, r.y2); + OUT_VERTEX(box->x1, box->y2); OUT_VERTEX_F(box->x1 * src_scale_x + src_offset_x); OUT_VERTEX_F(box->y2 * src_scale_y + src_offset_y); - OUT_VERTEX(r.x1, r.y1); + OUT_VERTEX(box->x1, box->y1); OUT_VERTEX_F(box->x1 * src_scale_x + src_offset_x); OUT_VERTEX_F(box->y1 * src_scale_y + src_offset_y); - if (!DAMAGE_IS_ALL(priv->gpu_damage)) { - sna_damage_add_box(&priv->gpu_damage, &r); - sna_damage_subtract_box(&priv->cpu_damage, &r); - } box++; } - gen4_vertex_flush(sna); + + if (!DAMAGE_IS_ALL(priv->gpu_damage)) + sna_damage_add(&priv->gpu_damage, dstRegion); + return true; } @@ -1815,12 +1796,12 @@ gen6_composite_picture(struct sna *sna, if (channel->repeat && (x >= 0 && y >= 0 && - x + w < pixmap->drawable.width && - y + h < pixmap->drawable.height)) { + x + w <= pixmap->drawable.width && + y + h <= pixmap->drawable.height)) { struct sna_pixmap *priv = sna_pixmap(pixmap); if (priv && priv->clear) { DBG(("%s: converting large pixmap source into solid [%08x]\n", __FUNCTION__, priv->clear_color)); - return gen4_channel_init_solid(sna, channel, priv->clear_color); + return gen4_channel_init_solid(sna, channel, solid_color(picture->format, priv->clear_color)); } } } else @@ -1927,7 +1908,9 @@ gen6_composite_set_target(struct sna *sna, } else sna_render_picture_extents(dst, &box); - hint = PREFER_GPU | FORCE_GPU | RENDER_GPU; + hint = PREFER_GPU | RENDER_GPU; + if (!need_tiling(sna, op->dst.width, op->dst.height)) + hint |= FORCE_GPU; if (!partial) { hint |= IGNORE_DAMAGE; if (w == op->dst.width && h == op->dst.height) @@ -1965,46 +1948,77 @@ gen6_composite_set_target(struct sna *sna, static bool try_blt(struct sna *sna, - PicturePtr dst, PicturePtr src, - int width, int height) + uint8_t op, + PicturePtr src, + PicturePtr mask, + PicturePtr dst, + int16_t src_x, int16_t src_y, + int16_t msk_x, int16_t msk_y, + int16_t dst_x, int16_t dst_y, + int16_t width, int16_t height, + unsigned flags, + struct sna_composite_op *tmp) { struct kgem_bo *bo; if (sna->kgem.mode == KGEM_BLT) { DBG(("%s: already performing BLT\n", __FUNCTION__)); - return true; + goto execute; } if (too_large(width, height)) { DBG(("%s: operation too large for 3D pipe (%d, %d)\n", __FUNCTION__, width, height)); - return true; + goto execute; } bo = __sna_drawable_peek_bo(dst->pDrawable); if (bo == NULL) - return true; - if (bo->rq) - return RQ_IS_BLT(bo->rq); + goto execute; + + if (untiled_tlb_miss(bo)) + goto execute; + + if (bo->rq) { + if (RQ_IS_BLT(bo->rq)) + goto execute; + + return false; + } + + if (bo->tiling == I915_TILING_Y) + goto upload; + + if (src->pDrawable == dst->pDrawable && + can_switch_to_blt(sna, bo, 0)) + goto execute; if (sna_picture_is_solid(src, NULL) && can_switch_to_blt(sna, bo, 0)) - return true; + goto execute; if (src->pDrawable) { - bo = __sna_drawable_peek_bo(src->pDrawable); - if (bo == NULL) - return true; + struct kgem_bo *s = __sna_drawable_peek_bo(src->pDrawable); + if (s == NULL) + goto execute; - if (prefer_blt_bo(sna, bo)) - return true; + if (prefer_blt_bo(sna, s, bo)) + goto execute; } if (sna->kgem.ring == KGEM_BLT) { DBG(("%s: already performing BLT\n", __FUNCTION__)); - return true; + goto execute; } - return false; +upload: + flags |= COMPOSITE_UPLOAD; +execute: + return sna_blt_composite(sna, op, + src, dst, + src_x, src_y, + dst_x, dst_y, + width, height, + flags, tmp); } static bool @@ -2234,13 +2248,13 @@ gen6_render_composite(struct sna *sna, width, height, sna->kgem.ring)); if (mask == NULL && - try_blt(sna, dst, src, width, height) && - sna_blt_composite(sna, op, - src, dst, - src_x, src_y, - dst_x, dst_y, - width, height, - flags, tmp)) + try_blt(sna, op, + src, mask, dst, + src_x, src_y, + msk_x, msk_y, + dst_x, dst_y, + width, height, + flags, tmp)) return true; if (gen6_composite_fallback(sna, src, mask, dst)) @@ -2676,27 +2690,35 @@ static inline bool prefer_blt_copy(struct sna *sna, if (sna->kgem.ring == KGEM_BLT) return true; - if (src_bo == dst_bo && can_switch_to_blt(sna, dst_bo, flags)) + if (flags & COPY_DRI && !sna->kgem.has_semaphores) + return false; + + if ((flags & COPY_SMALL || src_bo == dst_bo) && + can_switch_to_blt(sna, dst_bo, flags)) return true; if (untiled_tlb_miss(src_bo) || untiled_tlb_miss(dst_bo)) return true; - if (force_blt_ring(sna)) + if (force_blt_ring(sna, dst_bo)) return true; if (kgem_bo_is_render(dst_bo) || kgem_bo_is_render(src_bo)) return false; + if (flags & COPY_LAST && + can_switch_to_blt(sna, dst_bo, flags)) + return true; + if (prefer_render_ring(sna, dst_bo)) return false; if (!prefer_blt_ring(sna, dst_bo, flags)) return false; - return prefer_blt_bo(sna, src_bo) || prefer_blt_bo(sna, dst_bo); + return prefer_blt_bo(sna, src_bo, dst_bo); } static bool @@ -2758,8 +2780,7 @@ fallback_blt: assert(src->depth == dst->depth); assert(src->width == dst->width); assert(src->height == dst->height); - return sna_render_copy_boxes__overlap(sna, alu, - src, src_bo, + return sna_render_copy_boxes__overlap(sna, alu, dst, dst_bo, src_dx, src_dy, dst_dx, dst_dy, box, n, &extents); diff --git a/src/sna/gen7_render.c b/src/sna/gen7_render.c index 2ecfd641..aabb8693 100644 --- a/src/sna/gen7_render.c +++ b/src/sna/gen7_render.c @@ -60,8 +60,6 @@ #define NO_FILL_ONE 0 #define NO_FILL_CLEAR 0 -#define NO_RING_SWITCH 0 - #define USE_8_PIXEL_DISPATCH 1 #define USE_16_PIXEL_DISPATCH 1 #define USE_32_PIXEL_DISPATCH 0 @@ -149,7 +147,7 @@ static const struct gt_info hsw_gt1_info = { .max_vs_threads = 70, .max_gs_threads = 70, .max_wm_threads = - (102 - 1) << HSW_PS_MAX_THREADS_SHIFT | + (70 - 1) << HSW_PS_MAX_THREADS_SHIFT | 1 << HSW_PS_SAMPLE_MASK_SHIFT, .urb = { 128, 640, 256, 8 }, .gt = 1, @@ -209,6 +207,12 @@ static const uint32_t ps_kernel_planar[][4] = { #include "exa_wm_write.g7b" }; +static const uint32_t ps_kernel_rgb[][4] = { +#include "exa_wm_src_affine.g7b" +#include "exa_wm_src_sample_argb.g7b" +#include "exa_wm_write.g7b" +}; + #define KERNEL(kernel_enum, kernel, num_surfaces) \ [GEN7_WM_KERNEL_##kernel_enum] = {#kernel_enum, kernel, sizeof(kernel), num_surfaces} #define NOKERNEL(kernel_enum, func, num_surfaces) \ @@ -218,7 +222,7 @@ static const struct wm_kernel_info { const void *data; unsigned int size; int num_surfaces; -} wm_kernels[] = { +} wm_kernels[GEN7_WM_KERNEL_COUNT] = { NOKERNEL(NOMASK, brw_wm_kernel__affine, 2), NOKERNEL(NOMASK_P, brw_wm_kernel__projective, 2), @@ -236,6 +240,7 @@ static const struct wm_kernel_info { KERNEL(VIDEO_PLANAR, ps_kernel_planar, 7), KERNEL(VIDEO_PACKED, ps_kernel_packed, 2), + KERNEL(VIDEO_RGB, ps_kernel_rgb, 2), }; #undef KERNEL @@ -810,7 +815,7 @@ gen7_emit_cc(struct sna *sna, uint32_t blend_offset) DBG(("%s: blend = %x\n", __FUNCTION__, blend_offset)); - /* XXX can have upto 8 blend states preload, selectable via + /* XXX can have up to 8 blend states preload, selectable via * Render Target Index. What other side-effects of Render Target Index? */ @@ -1792,7 +1797,9 @@ static void gen7_emit_video_state(struct sna *sna, frame->pitch[0]; n_src = 6; } else { - if (frame->id == FOURCC_UYVY) + if (frame->id == FOURCC_RGB888) + src_surf_format = GEN7_SURFACEFORMAT_B8G8R8X8_UNORM; + else if (frame->id == FOURCC_UYVY) src_surf_format = GEN7_SURFACEFORMAT_YCRCB_SWAPY; else src_surf_format = GEN7_SURFACEFORMAT_YCRCB_NORMAL; @@ -1826,6 +1833,23 @@ static void gen7_emit_video_state(struct sna *sna, gen7_emit_state(sna, op, offset | dirty); } +static unsigned select_video_kernel(const struct sna_video_frame *frame) +{ + switch (frame->id) { + case FOURCC_YV12: + case FOURCC_I420: + case FOURCC_XVMC: + return GEN7_WM_KERNEL_VIDEO_PLANAR; + + case FOURCC_RGB888: + case FOURCC_RGB565: + return GEN7_WM_KERNEL_VIDEO_RGB; + + default: + return GEN7_WM_KERNEL_VIDEO_PACKED; + } +} + static bool gen7_render_video(struct sna *sna, struct sna_video *video, @@ -1841,9 +1865,9 @@ gen7_render_video(struct sna *sna, int src_height = frame->src.y2 - frame->src.y1; float src_offset_x, src_offset_y; float src_scale_x, src_scale_y; - int nbox, pix_xoff, pix_yoff; unsigned filter; const BoxRec *box; + int nbox; DBG(("%s: src=(%d, %d), dst=(%d, %d), %dx[(%d, %d), (%d, %d)...]\n", __FUNCTION__, @@ -1878,9 +1902,7 @@ gen7_render_video(struct sna *sna, GEN7_SET_FLAGS(SAMPLER_OFFSET(filter, SAMPLER_EXTEND_PAD, SAMPLER_FILTER_NEAREST, SAMPLER_EXTEND_NONE), NO_BLEND, - is_planar_fourcc(frame->id) ? - GEN7_WM_KERNEL_VIDEO_PLANAR : - GEN7_WM_KERNEL_VIDEO_PACKED, + select_video_kernel(frame), 2); tmp.priv = frame; @@ -1896,17 +1918,6 @@ gen7_render_video(struct sna *sna, gen7_align_vertex(sna, &tmp); gen7_emit_video_state(sna, &tmp); - /* Set up the offset for translating from the given region (in screen - * coordinates) to the backing pixmap. - */ -#ifdef COMPOSITE - pix_xoff = -pixmap->screen_x + pixmap->drawable.x; - pix_yoff = -pixmap->screen_y + pixmap->drawable.y; -#else - pix_xoff = 0; - pix_yoff = 0; -#endif - DBG(("%s: src=(%d, %d)x(%d, %d); frame=(%dx%d), dst=(%dx%d)\n", __FUNCTION__, frame->src.x1, frame->src.y1, @@ -1928,45 +1939,36 @@ gen7_render_video(struct sna *sna, box = region_rects(dstRegion); nbox = region_num_rects(dstRegion); while (nbox--) { - BoxRec r; - - DBG(("%s: dst=(%d, %d), (%d, %d) + (%d, %d); src=(%f, %f), (%f, %f)\n", + DBG(("%s: dst=(%d, %d), (%d, %d); src=(%f, %f), (%f, %f)\n", __FUNCTION__, box->x1, box->y1, box->x2, box->y2, - pix_xoff, pix_yoff, box->x1 * src_scale_x + src_offset_x, box->y1 * src_scale_y + src_offset_y, box->x2 * src_scale_x + src_offset_x, box->y2 * src_scale_y + src_offset_y)); - r.x1 = box->x1 + pix_xoff; - r.x2 = box->x2 + pix_xoff; - r.y1 = box->y1 + pix_yoff; - r.y2 = box->y2 + pix_yoff; - gen7_get_rectangles(sna, &tmp, 1, gen7_emit_video_state); - OUT_VERTEX(r.x2, r.y2); + OUT_VERTEX(box->x2, box->y2); OUT_VERTEX_F(box->x2 * src_scale_x + src_offset_x); OUT_VERTEX_F(box->y2 * src_scale_y + src_offset_y); - OUT_VERTEX(r.x1, r.y2); + OUT_VERTEX(box->x1, box->y2); OUT_VERTEX_F(box->x1 * src_scale_x + src_offset_x); OUT_VERTEX_F(box->y2 * src_scale_y + src_offset_y); - OUT_VERTEX(r.x1, r.y1); + OUT_VERTEX(box->x1, box->y1); OUT_VERTEX_F(box->x1 * src_scale_x + src_offset_x); OUT_VERTEX_F(box->y1 * src_scale_y + src_offset_y); - if (!DAMAGE_IS_ALL(priv->gpu_damage)) { - sna_damage_add_box(&priv->gpu_damage, &r); - sna_damage_subtract_box(&priv->cpu_damage, &r); - } box++; } - gen4_vertex_flush(sna); + + if (!DAMAGE_IS_ALL(priv->gpu_damage)) + sna_damage_add(&priv->gpu_damage, dstRegion); + return true; } @@ -2048,12 +2050,13 @@ gen7_composite_picture(struct sna *sna, if (channel->repeat || (x >= 0 && y >= 0 && - x + w < pixmap->drawable.width && - y + h < pixmap->drawable.height)) { + x + w <= pixmap->drawable.width && + y + h <= pixmap->drawable.height)) { struct sna_pixmap *priv = sna_pixmap(pixmap); if (priv && priv->clear) { DBG(("%s: converting large pixmap source into solid [%08x]\n", __FUNCTION__, priv->clear_color)); - return gen4_channel_init_solid(sna, channel, priv->clear_color); + return gen4_channel_init_solid(sna, channel, + solid_color(picture->format, priv->clear_color)); } } } else @@ -2147,7 +2150,9 @@ gen7_composite_set_target(struct sna *sna, } else sna_render_picture_extents(dst, &box); - hint = PREFER_GPU | FORCE_GPU | RENDER_GPU; + hint = PREFER_GPU | RENDER_GPU; + if (!need_tiling(sna, op->dst.width, op->dst.height)) + hint |= FORCE_GPU; if (!partial) { hint |= IGNORE_DAMAGE; if (w == op->dst.width && h == op->dst.height) @@ -2185,46 +2190,78 @@ gen7_composite_set_target(struct sna *sna, static bool try_blt(struct sna *sna, - PicturePtr dst, PicturePtr src, - int width, int height) + uint8_t op, + PicturePtr src, + PicturePtr mask, + PicturePtr dst, + int16_t src_x, int16_t src_y, + int16_t msk_x, int16_t msk_y, + int16_t dst_x, int16_t dst_y, + int16_t width, int16_t height, + unsigned flags, + struct sna_composite_op *tmp) { struct kgem_bo *bo; if (sna->kgem.mode == KGEM_BLT) { DBG(("%s: already performing BLT\n", __FUNCTION__)); - return true; + goto execute; } if (too_large(width, height)) { DBG(("%s: operation too large for 3D pipe (%d, %d)\n", __FUNCTION__, width, height)); - return true; + goto execute; } bo = __sna_drawable_peek_bo(dst->pDrawable); if (bo == NULL) - return true; - if (bo->rq) - return RQ_IS_BLT(bo->rq); + goto execute; + + if (untiled_tlb_miss(bo)) + goto execute; + + if (bo->rq) { + if (RQ_IS_BLT(bo->rq)) + goto execute; + + return false; + } + + if (bo->tiling == I915_TILING_Y) + goto upload; + + if (src->pDrawable == dst->pDrawable && + (sna->render_state.gt < 3 || width*height < 1024) && + can_switch_to_blt(sna, bo, 0)) + goto execute; if (sna_picture_is_solid(src, NULL) && can_switch_to_blt(sna, bo, 0)) - return true; + goto execute; if (src->pDrawable) { - bo = __sna_drawable_peek_bo(src->pDrawable); - if (bo == NULL) - return true; + struct kgem_bo *s = __sna_drawable_peek_bo(src->pDrawable); + if (s == NULL) + goto upload; - if (prefer_blt_bo(sna, bo)) - return true; + if (prefer_blt_bo(sna, s, bo)) + goto execute; } if (sna->kgem.ring == KGEM_BLT) { DBG(("%s: already performing BLT\n", __FUNCTION__)); - return true; + goto execute; } - return false; +upload: + flags |= COMPOSITE_UPLOAD; +execute: + return sna_blt_composite(sna, op, + src, dst, + src_x, src_y, + dst_x, dst_y, + width, height, + flags, tmp); } static bool @@ -2454,13 +2491,13 @@ gen7_render_composite(struct sna *sna, width, height, sna->kgem.mode, sna->kgem.ring)); if (mask == NULL && - try_blt(sna, dst, src, width, height) && - sna_blt_composite(sna, op, - src, dst, - src_x, src_y, - dst_x, dst_y, - width, height, - flags, tmp)) + try_blt(sna, op, + src, mask, dst, + src_x, src_y, + msk_x, msk_y, + dst_x, dst_y, + width, height, + flags, tmp)) return true; if (gen7_composite_fallback(sna, src, mask, dst)) @@ -2878,27 +2915,37 @@ prefer_blt_copy(struct sna *sna, assert((flags & COPY_SYNC) == 0); - if (src_bo == dst_bo && can_switch_to_blt(sna, dst_bo, flags)) - return true; - if (untiled_tlb_miss(src_bo) || untiled_tlb_miss(dst_bo)) return true; - if (force_blt_ring(sna)) + if (flags & COPY_DRI && !sna->kgem.has_semaphores) + return false; + + if (force_blt_ring(sna, dst_bo)) + return true; + + if ((flags & COPY_SMALL || + (sna->render_state.gt < 3 && src_bo == dst_bo)) && + can_switch_to_blt(sna, dst_bo, flags)) return true; if (kgem_bo_is_render(dst_bo) || kgem_bo_is_render(src_bo)) return false; + if (flags & COPY_LAST && + sna->render_state.gt < 3 && + can_switch_to_blt(sna, dst_bo, flags)) + return true; + if (prefer_render_ring(sna, dst_bo)) return false; if (!prefer_blt_ring(sna, dst_bo, flags)) return false; - return prefer_blt_bo(sna, src_bo) || prefer_blt_bo(sna, dst_bo); + return prefer_blt_bo(sna, src_bo, dst_bo); } static bool @@ -2946,7 +2993,7 @@ fallback_blt: &extents)) { bool big = too_large(extents.x2-extents.x1, extents.y2-extents.y1); - if ((big || can_switch_to_blt(sna, dst_bo, flags)) && + if ((big || !prefer_render_ring(sna, dst_bo)) && sna_blt_copy_boxes(sna, alu, src_bo, src_dx, src_dy, dst_bo, dst_dx, dst_dy, @@ -2961,8 +3008,7 @@ fallback_blt: assert(src->depth == dst->depth); assert(src->width == dst->width); assert(src->height == dst->height); - return sna_render_copy_boxes__overlap(sna, alu, - src, src_bo, + return sna_render_copy_boxes__overlap(sna, alu, dst, dst_bo, src_dx, src_dy, dst_dx, dst_dy, box, n, &extents); diff --git a/src/sna/gen8_render.c b/src/sna/gen8_render.c index 6eb11452..445983b1 100644 --- a/src/sna/gen8_render.c +++ b/src/sna/gen8_render.c @@ -106,6 +106,12 @@ static const uint32_t ps_kernel_planar[][4] = { #include "exa_wm_yuv_rgb.g8b" #include "exa_wm_write.g8b" }; + +static const uint32_t ps_kernel_rgb[][4] = { +#include "exa_wm_src_affine.g8b" +#include "exa_wm_src_sample_argb.g8b" +#include "exa_wm_write.g8b" +}; #endif #define SURFACE_DW (64 / sizeof(uint32_t)); @@ -119,7 +125,7 @@ static const struct wm_kernel_info { const void *data; unsigned int size; int num_surfaces; -} wm_kernels[] = { +} wm_kernels[GEN8_WM_KERNEL_COUNT] = { NOKERNEL(NOMASK, gen8_wm_kernel__affine, 2), NOKERNEL(NOMASK_P, gen8_wm_kernel__projective, 2), @@ -138,6 +144,7 @@ static const struct wm_kernel_info { #if !NO_VIDEO KERNEL(VIDEO_PLANAR, ps_kernel_planar, 7), KERNEL(VIDEO_PACKED, ps_kernel_packed, 2), + KERNEL(VIDEO_RGB, ps_kernel_rgb, 2), #endif }; #undef KERNEL @@ -205,6 +212,33 @@ static const struct blendinfo { #define OUT_VERTEX(x,y) vertex_emit_2s(sna, x,y) #define OUT_VERTEX_F(v) vertex_emit(sna, v) +struct gt_info { + const char *name; + struct { + int max_vs_entries; + } urb; +}; + +static const struct gt_info bdw_gt_info = { + .name = "Broadwell (gen8)", + .urb = { .max_vs_entries = 960 }, +}; + +static bool is_bdw(struct sna *sna) +{ + return sna->kgem.gen == 0100; +} + +static const struct gt_info chv_gt_info = { + .name = "Cherryview (gen8)", + .urb = { .max_vs_entries = 640 }, +}; + +static bool is_chv(struct sna *sna) +{ + return sna->kgem.gen == 0101; +} + static inline bool too_large(int width, int height) { return width > GEN8_MAX_SIZE || height > GEN8_MAX_SIZE; @@ -462,7 +496,7 @@ gen8_emit_urb(struct sna *sna) { /* num of VS entries must be divisible by 8 if size < 9 */ OUT_BATCH(GEN8_3DSTATE_URB_VS | (2 - 2)); - OUT_BATCH(960 << URB_ENTRY_NUMBER_SHIFT | + OUT_BATCH(sna->render_state.gen8.info->urb.max_vs_entries << URB_ENTRY_NUMBER_SHIFT | (2 - 1) << URB_ENTRY_SIZE_SHIFT | 4 << URB_STARTING_ADDRESS_SHIFT); @@ -873,7 +907,7 @@ gen8_emit_cc(struct sna *sna, uint32_t blend) assert(blend / GEN8_BLENDFACTOR_COUNT > 0); assert(blend % GEN8_BLENDFACTOR_COUNT > 0); - /* XXX can have upto 8 blend states preload, selectable via + /* XXX can have up to 8 blend states preload, selectable via * Render Target Index. What other side-effects of Render Target Index? */ @@ -1167,6 +1201,7 @@ gen8_emit_pipe_stall(struct sna *sna) { OUT_BATCH(GEN8_PIPE_CONTROL | (6 - 2)); OUT_BATCH(PIPE_CONTROL_CS_STALL | + PIPE_CONTROL_FLUSH | PIPE_CONTROL_STALL_AT_SCOREBOARD); OUT_BATCH64(0); OUT_BATCH64(0); @@ -1876,12 +1911,12 @@ gen8_composite_picture(struct sna *sna, if (channel->repeat || (x >= 0 && y >= 0 && - x + w < pixmap->drawable.width && - y + h < pixmap->drawable.height)) { + x + w <= pixmap->drawable.width && + y + h <= pixmap->drawable.height)) { struct sna_pixmap *priv = sna_pixmap(pixmap); if (priv && priv->clear) { DBG(("%s: converting large pixmap source into solid [%08x]\n", __FUNCTION__, priv->clear_color)); - return gen4_channel_init_solid(sna, channel, priv->clear_color); + return gen4_channel_init_solid(sna, channel, solid_color(picture->format, priv->clear_color)); } } } else @@ -1961,7 +1996,9 @@ gen8_composite_set_target(struct sna *sna, } else sna_render_picture_extents(dst, &box); - hint = PREFER_GPU | FORCE_GPU | RENDER_GPU; + hint = PREFER_GPU | RENDER_GPU; + if (!need_tiling(sna, op->dst.width, op->dst.height)) + hint |= FORCE_GPU; if (!partial) { hint |= IGNORE_DAMAGE; if (w == op->dst.width && h == op->dst.height) @@ -2002,46 +2039,78 @@ gen8_composite_set_target(struct sna *sna, static bool try_blt(struct sna *sna, - PicturePtr dst, PicturePtr src, - int width, int height) + uint8_t op, + PicturePtr src, + PicturePtr mask, + PicturePtr dst, + int16_t src_x, int16_t src_y, + int16_t msk_x, int16_t msk_y, + int16_t dst_x, int16_t dst_y, + int16_t width, int16_t height, + unsigned flags, + struct sna_composite_op *tmp) { struct kgem_bo *bo; if (sna->kgem.mode == KGEM_BLT) { DBG(("%s: already performing BLT\n", __FUNCTION__)); - return true; + goto execute; } if (too_large(width, height)) { DBG(("%s: operation too large for 3D pipe (%d, %d)\n", __FUNCTION__, width, height)); - return true; + goto execute; } bo = __sna_drawable_peek_bo(dst->pDrawable); if (bo == NULL) - return true; - if (bo->rq) - return RQ_IS_BLT(bo->rq); + goto execute; + + if (untiled_tlb_miss(bo)) + goto execute; + + if (bo->rq) { + if (RQ_IS_BLT(bo->rq)) + goto execute; + + return false; + } + + if (bo->tiling == I915_TILING_Y) + goto upload; if (sna_picture_is_solid(src, NULL) && can_switch_to_blt(sna, bo, 0)) - return true; + goto execute; + + if (src->pDrawable == dst->pDrawable && + (sna->render_state.gt < 3 || width*height < 1024) && + can_switch_to_blt(sna, bo, 0)) + goto execute; if (src->pDrawable) { - bo = __sna_drawable_peek_bo(src->pDrawable); - if (bo == NULL) - return true; + struct kgem_bo *s = __sna_drawable_peek_bo(src->pDrawable); + if (s == NULL) + goto upload; - if (prefer_blt_bo(sna, bo)) - return RQ_IS_BLT(bo->rq); + if (prefer_blt_bo(sna, s, bo)) + goto execute; } if (sna->kgem.ring == KGEM_BLT) { DBG(("%s: already performing BLT\n", __FUNCTION__)); - return true; + goto execute; } - return false; +upload: + flags |= COMPOSITE_UPLOAD; +execute: + return sna_blt_composite(sna, op, + src, dst, + src_x, src_y, + dst_x, dst_y, + width, height, + flags, tmp); } static bool @@ -2271,13 +2340,13 @@ gen8_render_composite(struct sna *sna, width, height, sna->kgem.mode, sna->kgem.ring)); if (mask == NULL && - try_blt(sna, dst, src, width, height) && - sna_blt_composite(sna, op, - src, dst, - src_x, src_y, - dst_x, dst_y, - width, height, - flags, tmp)) + try_blt(sna, op, + src, mask, dst, + src_x, src_y, + msk_x, msk_y, + dst_x, dst_y, + width, height, + flags, tmp)) return true; if (gen8_composite_fallback(sna, src, mask, dst)) @@ -2700,27 +2769,37 @@ prefer_blt_copy(struct sna *sna, assert((flags & COPY_SYNC) == 0); - if (src_bo == dst_bo && can_switch_to_blt(sna, dst_bo, flags)) - return true; - if (untiled_tlb_miss(src_bo) || untiled_tlb_miss(dst_bo)) return true; - if (force_blt_ring(sna)) + if (flags & COPY_DRI && !sna->kgem.has_semaphores) + return false; + + if (force_blt_ring(sna, dst_bo)) + return true; + + if ((flags & COPY_SMALL || + (sna->render_state.gt < 3 && src_bo == dst_bo)) && + can_switch_to_blt(sna, dst_bo, flags)) return true; if (kgem_bo_is_render(dst_bo) || kgem_bo_is_render(src_bo)) return false; + if (flags & COPY_LAST && + sna->render_state.gt < 3 && + can_switch_to_blt(sna, dst_bo, flags)) + return true; + if (prefer_render_ring(sna, dst_bo)) return false; if (!prefer_blt_ring(sna, dst_bo, flags)) return false; - return prefer_blt_bo(sna, src_bo) || prefer_blt_bo(sna, dst_bo); + return prefer_blt_bo(sna, src_bo, dst_bo); } static bool @@ -2770,7 +2849,7 @@ fallback_blt: &extents)) { bool big = too_large(extents.x2-extents.x1, extents.y2-extents.y1); - if ((big || can_switch_to_blt(sna, dst_bo, flags)) && + if ((big || !prefer_render_ring(sna, dst_bo)) && sna_blt_copy_boxes(sna, alu, src_bo, src_dx, src_dy, dst_bo, dst_dx, dst_dy, @@ -2785,8 +2864,7 @@ fallback_blt: assert(src->depth == dst->depth); assert(src->width == dst->width); assert(src->height == dst->height); - return sna_render_copy_boxes__overlap(sna, alu, - src, src_bo, + return sna_render_copy_boxes__overlap(sna, alu, dst, dst_bo, src_dx, src_dy, dst_dx, dst_dy, box, n, &extents); @@ -3665,7 +3743,9 @@ static void gen8_emit_video_state(struct sna *sna, frame->pitch[0]; n_src = 6; } else { - if (frame->id == FOURCC_UYVY) + if (frame->id == FOURCC_RGB888) + src_surf_format = SURFACEFORMAT_B8G8R8X8_UNORM; + else if (frame->id == FOURCC_UYVY) src_surf_format = SURFACEFORMAT_YCRCB_SWAPY; else src_surf_format = SURFACEFORMAT_YCRCB_NORMAL; @@ -3697,6 +3777,23 @@ static void gen8_emit_video_state(struct sna *sna, gen8_emit_state(sna, op, offset); } +static unsigned select_video_kernel(const struct sna_video_frame *frame) +{ + switch (frame->id) { + case FOURCC_YV12: + case FOURCC_I420: + case FOURCC_XVMC: + return GEN8_WM_KERNEL_VIDEO_PLANAR; + + case FOURCC_RGB888: + case FOURCC_RGB565: + return GEN8_WM_KERNEL_VIDEO_RGB; + + default: + return GEN8_WM_KERNEL_VIDEO_PACKED; + } +} + static bool gen8_render_video(struct sna *sna, struct sna_video *video, @@ -3712,9 +3809,9 @@ gen8_render_video(struct sna *sna, int src_height = frame->src.y2 - frame->src.y1; float src_offset_x, src_offset_y; float src_scale_x, src_scale_y; - int nbox, pix_xoff, pix_yoff; unsigned filter; const BoxRec *box; + int nbox; DBG(("%s: src=(%d, %d), dst=(%d, %d), %dx[(%d, %d), (%d, %d)...]\n", __FUNCTION__, @@ -3743,6 +3840,11 @@ gen8_render_video(struct sna *sna, tmp.floats_per_vertex = 3; tmp.floats_per_rect = 9; + DBG(("%s: scaling?=%d, planar?=%d [%x]\n", + __FUNCTION__, + src_width != dst_width || src_height != dst_height, + is_planar_fourcc(frame->id), frame->id)); + if (src_width == dst_width && src_height == dst_height) filter = SAMPLER_FILTER_NEAREST; else @@ -3752,9 +3854,7 @@ gen8_render_video(struct sna *sna, GEN8_SET_FLAGS(SAMPLER_OFFSET(filter, SAMPLER_EXTEND_PAD, SAMPLER_FILTER_NEAREST, SAMPLER_EXTEND_NONE), NO_BLEND, - is_planar_fourcc(frame->id) ? - GEN8_WM_KERNEL_VIDEO_PLANAR : - GEN8_WM_KERNEL_VIDEO_PACKED, + select_video_kernel(frame), 2); tmp.priv = frame; @@ -3770,17 +3870,6 @@ gen8_render_video(struct sna *sna, gen8_align_vertex(sna, &tmp); gen8_emit_video_state(sna, &tmp); - /* Set up the offset for translating from the given region (in screen - * coordinates) to the backing pixmap. - */ -#ifdef COMPOSITE - pix_xoff = -pixmap->screen_x + pixmap->drawable.x; - pix_yoff = -pixmap->screen_y + pixmap->drawable.y; -#else - pix_xoff = 0; - pix_yoff = 0; -#endif - DBG(("%s: src=(%d, %d)x(%d, %d); frame=(%dx%d), dst=(%dx%d)\n", __FUNCTION__, frame->src.x1, frame->src.y1, @@ -3802,45 +3891,36 @@ gen8_render_video(struct sna *sna, box = region_rects(dstRegion); nbox = region_num_rects(dstRegion); while (nbox--) { - BoxRec r; - DBG(("%s: dst=(%d, %d), (%d, %d) + (%d, %d); src=(%f, %f), (%f, %f)\n", __FUNCTION__, box->x1, box->y1, box->x2, box->y2, - pix_xoff, pix_yoff, box->x1 * src_scale_x + src_offset_x, box->y1 * src_scale_y + src_offset_y, box->x2 * src_scale_x + src_offset_x, box->y2 * src_scale_y + src_offset_y)); - r.x1 = box->x1 + pix_xoff; - r.x2 = box->x2 + pix_xoff; - r.y1 = box->y1 + pix_yoff; - r.y2 = box->y2 + pix_yoff; - gen8_get_rectangles(sna, &tmp, 1, gen8_emit_video_state); - OUT_VERTEX(r.x2, r.y2); + OUT_VERTEX(box->x2, box->y2); OUT_VERTEX_F(box->x2 * src_scale_x + src_offset_x); OUT_VERTEX_F(box->y2 * src_scale_y + src_offset_y); - OUT_VERTEX(r.x1, r.y2); + OUT_VERTEX(box->x1, box->y2); OUT_VERTEX_F(box->x1 * src_scale_x + src_offset_x); OUT_VERTEX_F(box->y2 * src_scale_y + src_offset_y); - OUT_VERTEX(r.x1, r.y1); + OUT_VERTEX(box->x1, box->y1); OUT_VERTEX_F(box->x1 * src_scale_x + src_offset_x); OUT_VERTEX_F(box->y1 * src_scale_y + src_offset_y); - if (!DAMAGE_IS_ALL(priv->gpu_damage)) { - sna_damage_add_box(&priv->gpu_damage, &r); - sna_damage_subtract_box(&priv->cpu_damage, &r); - } box++; } - gen8_vertex_flush(sna); + + if (!DAMAGE_IS_ALL(priv->gpu_damage)) + sna_damage_add(&priv->gpu_damage, dstRegion); + return true; } #endif @@ -3896,6 +3976,13 @@ static bool gen8_render_setup(struct sna *sna) state->gt = ((devid >> 4) & 0xf) + 1; DBG(("%s: gt=%d\n", __FUNCTION__, state->gt)); + if (is_bdw(sna)) + state->info = &bdw_gt_info; + else if (is_chv(sna)) + state->info = &chv_gt_info; + else + return false; + sna_static_stream_init(&general); /* Zero pad the start. If you see an offset of 0x0 in the batchbuffer @@ -4007,5 +4094,5 @@ const char *gen8_render_init(struct sna *sna, const char *backend) sna->render.max_3d_size = GEN8_MAX_SIZE; sna->render.max_3d_pitch = 1 << 18; - return "Broadwell"; + return sna->render_state.gen8.info->name; } diff --git a/src/sna/gen8_render.h b/src/sna/gen8_render.h index eb4928e7..e6a8dc55 100644 --- a/src/sna/gen8_render.h +++ b/src/sna/gen8_render.h @@ -335,6 +335,7 @@ #define PIPE_CONTROL_IS_FLUSH (1 << 11) #define PIPE_CONTROL_TC_FLUSH (1 << 10) #define PIPE_CONTROL_NOTIFY_ENABLE (1 << 8) +#define PIPE_CONTROL_FLUSH (1 << 7) #define PIPE_CONTROL_GLOBAL_GTT (1 << 2) #define PIPE_CONTROL_LOCAL_PGTT (0 << 2) #define PIPE_CONTROL_STALL_AT_SCOREBOARD (1 << 1) diff --git a/src/sna/gen9_render.c b/src/sna/gen9_render.c new file mode 100644 index 00000000..e5f12c72 --- /dev/null +++ b/src/sna/gen9_render.c @@ -0,0 +1,4156 @@ +/* + * Copyright © 2012,2013 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Chris Wilson + * + */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include "sna.h" +#include "sna_reg.h" +#include "sna_render.h" +#include "sna_render_inline.h" +#include "sna_video.h" + +#include "gen9_render.h" +#include "gen8_eu.h" +#include "gen4_common.h" +#include "gen4_source.h" +#include "gen4_vertex.h" +#include "gen6_common.h" +#include "gen8_vertex.h" + +#define SIM 1 + +#define ALWAYS_INVALIDATE 0 +#define ALWAYS_FLUSH 0 +#define ALWAYS_STALL 0 + +#define NO_COMPOSITE 0 +#define NO_COMPOSITE_SPANS 0 +#define NO_COPY 0 +#define NO_COPY_BOXES 0 +#define NO_FILL 0 +#define NO_FILL_BOXES 0 +#define NO_FILL_ONE 0 +#define NO_FILL_CLEAR 0 +#define NO_VIDEO 0 + +#define USE_8_PIXEL_DISPATCH 1 +#define USE_16_PIXEL_DISPATCH 1 +#define USE_32_PIXEL_DISPATCH 0 + +#if !USE_8_PIXEL_DISPATCH && !USE_16_PIXEL_DISPATCH && !USE_32_PIXEL_DISPATCH +#error "Must select at least 8, 16 or 32 pixel dispatch" +#endif + +#define GEN9_MAX_SIZE 16384 +#define GEN9_GT_BIAS 1 /* Each GT is bigger than previous gen */ + +/* XXX Todo + * + * STR (software tiled rendering) mode. No, really. + * 64x32 pixel blocks align with the rendering cache. Worth considering. + */ + +#define is_aligned(x, y) (((x) & ((y) - 1)) == 0) + +/* Pipeline stages: + * 1. Command Streamer (CS) + * 2. Vertex Fetch (VF) + * 3. Vertex Shader (VS) + * 4. Hull Shader (HS) + * 5. Tesselation Engine (TE) + * 6. Domain Shader (DS) + * 7. Geometry Shader (GS) + * 8. Stream Output Logic (SOL) + * 9. Clipper (CLIP) + * 10. Strip/Fan (SF) + * 11. Windower/Masker (WM) + * 12. Color Calculator (CC) + */ + +#if !NO_VIDEO +static const uint32_t ps_kernel_packed[][4] = { +#include "exa_wm_src_affine.g8b" +#include "exa_wm_src_sample_argb.g8b" +#include "exa_wm_yuv_rgb.g8b" +#include "exa_wm_write.g8b" +}; + +static const uint32_t ps_kernel_planar[][4] = { +#include "exa_wm_src_affine.g8b" +#include "exa_wm_src_sample_planar.g8b" +#include "exa_wm_yuv_rgb.g8b" +#include "exa_wm_write.g8b" +}; + +static const uint32_t ps_kernel_rgb[][4] = { +#include "exa_wm_src_affine.g8b" +#include "exa_wm_src_sample_argb.g8b" +#include "exa_wm_write.g8b" +}; +#endif + +#define SURFACE_DW (64 / sizeof(uint32_t)); + +#define KERNEL(kernel_enum, kernel, num_surfaces) \ + [GEN9_WM_KERNEL_##kernel_enum] = {#kernel_enum, kernel, sizeof(kernel), num_surfaces} +#define NOKERNEL(kernel_enum, func, num_surfaces) \ + [GEN9_WM_KERNEL_##kernel_enum] = {#kernel_enum, (void *)func, 0, num_surfaces} +static const struct wm_kernel_info { + const char *name; + const void *data; + unsigned int size; + int num_surfaces; +} wm_kernels[] = { + NOKERNEL(NOMASK, gen8_wm_kernel__affine, 2), + NOKERNEL(NOMASK_P, gen8_wm_kernel__projective, 2), + + NOKERNEL(MASK, gen8_wm_kernel__affine_mask, 3), + NOKERNEL(MASK_P, gen8_wm_kernel__projective_mask, 3), + + NOKERNEL(MASKCA, gen8_wm_kernel__affine_mask_ca, 3), + NOKERNEL(MASKCA_P, gen8_wm_kernel__projective_mask_ca, 3), + + NOKERNEL(MASKSA, gen8_wm_kernel__affine_mask_sa, 3), + NOKERNEL(MASKSA_P, gen8_wm_kernel__projective_mask_sa, 3), + + NOKERNEL(OPACITY, gen8_wm_kernel__affine_opacity, 2), + NOKERNEL(OPACITY_P, gen8_wm_kernel__projective_opacity, 2), + +#if !NO_VIDEO + KERNEL(VIDEO_PLANAR, ps_kernel_planar, 7), + KERNEL(VIDEO_PACKED, ps_kernel_packed, 2), + KERNEL(VIDEO_RGB, ps_kernel_rgb, 2), +#endif +}; +#undef KERNEL + +static const struct blendinfo { + uint8_t src_alpha; + uint8_t src_blend; + uint8_t dst_blend; +} gen9_blend_op[] = { + /* Clear */ {0, BLENDFACTOR_ZERO, BLENDFACTOR_ZERO}, + /* Src */ {0, BLENDFACTOR_ONE, BLENDFACTOR_ZERO}, + /* Dst */ {0, BLENDFACTOR_ZERO, BLENDFACTOR_ONE}, + /* Over */ {1, BLENDFACTOR_ONE, BLENDFACTOR_INV_SRC_ALPHA}, + /* OverReverse */ {0, BLENDFACTOR_INV_DST_ALPHA, BLENDFACTOR_ONE}, + /* In */ {0, BLENDFACTOR_DST_ALPHA, BLENDFACTOR_ZERO}, + /* InReverse */ {1, BLENDFACTOR_ZERO, BLENDFACTOR_SRC_ALPHA}, + /* Out */ {0, BLENDFACTOR_INV_DST_ALPHA, BLENDFACTOR_ZERO}, + /* OutReverse */ {1, BLENDFACTOR_ZERO, BLENDFACTOR_INV_SRC_ALPHA}, + /* Atop */ {1, BLENDFACTOR_DST_ALPHA, BLENDFACTOR_INV_SRC_ALPHA}, + /* AtopReverse */ {1, BLENDFACTOR_INV_DST_ALPHA, BLENDFACTOR_SRC_ALPHA}, + /* Xor */ {1, BLENDFACTOR_INV_DST_ALPHA, BLENDFACTOR_INV_SRC_ALPHA}, + /* Add */ {0, BLENDFACTOR_ONE, BLENDFACTOR_ONE}, +}; + +/** + * Highest-valued BLENDFACTOR used in gen9_blend_op. + * + * This leaves out GEN9_BLENDFACTOR_INV_DST_COLOR, + * GEN9_BLENDFACTOR_INV_CONST_{COLOR,ALPHA}, + * GEN9_BLENDFACTOR_INV_SRC1_{COLOR,ALPHA} + */ +#define GEN9_BLENDFACTOR_COUNT (BLENDFACTOR_INV_DST_ALPHA + 1) + +#define GEN9_BLEND_STATE_PADDED_SIZE ALIGN(sizeof(struct gen9_blend_state), 64) + +#define BLEND_OFFSET(s, d) \ + ((d != BLENDFACTOR_ZERO) << 15 | ((s) * GEN9_BLENDFACTOR_COUNT + (d)) << 4) + +#define NO_BLEND BLEND_OFFSET(BLENDFACTOR_ONE, BLENDFACTOR_ZERO) +#define CLEAR BLEND_OFFSET(BLENDFACTOR_ZERO, BLENDFACTOR_ZERO) + +#define SAMPLER_OFFSET(sf, se, mf, me) \ + (((((sf) * EXTEND_COUNT + (se)) * FILTER_COUNT + (mf)) * EXTEND_COUNT + (me)) + 2) + +#define VERTEX_2s2s 0 + +#define COPY_SAMPLER 0 +#define COPY_VERTEX VERTEX_2s2s +#define COPY_FLAGS(a) GEN9_SET_FLAGS(COPY_SAMPLER, (a) == GXcopy ? NO_BLEND : CLEAR, GEN9_WM_KERNEL_NOMASK, COPY_VERTEX) + +#define FILL_SAMPLER 1 +#define FILL_VERTEX VERTEX_2s2s +#define FILL_FLAGS(op, format) GEN9_SET_FLAGS(FILL_SAMPLER, gen9_get_blend((op), false, (format)), GEN9_WM_KERNEL_NOMASK, FILL_VERTEX) +#define FILL_FLAGS_NOBLEND GEN9_SET_FLAGS(FILL_SAMPLER, NO_BLEND, GEN9_WM_KERNEL_NOMASK, FILL_VERTEX) + +#define GEN9_SAMPLER(f) (((f) >> 20) & 0xfff) +#define GEN9_BLEND(f) (((f) >> 4) & 0x7ff) +#define GEN9_READS_DST(f) (((f) >> 15) & 1) +#define GEN9_KERNEL(f) (((f) >> 16) & 0xf) +#define GEN9_VERTEX(f) (((f) >> 0) & 0xf) +#define GEN9_SET_FLAGS(S, B, K, V) ((S) << 20 | (K) << 16 | (B) | (V)) + +#define OUT_BATCH(v) batch_emit(sna, v) +#define OUT_BATCH64(v) batch_emit64(sna, v) +#define OUT_VERTEX(x,y) vertex_emit_2s(sna, x,y) +#define OUT_VERTEX_F(v) vertex_emit(sna, v) + +struct gt_info { + const char *name; + struct { + int max_vs_entries; + } urb; +}; + +static const struct gt_info min_gt_info = { + .name = "Skylake (gen9)", + .urb = { .max_vs_entries = 240 }, +}; + +static const struct gt_info skl_gt_info = { + .name = "Skylake (gen9)", + .urb = { .max_vs_entries = 960 }, +}; + +static const struct gt_info bxt_gt_info = { + .name = "Broxton (gen9)", + .urb = { .max_vs_entries = 320 }, +}; + +static const struct gt_info kbl_gt_info = { + .name = "Kabylake (gen9)", + .urb = { .max_vs_entries = 960 }, +}; + +static const struct gt_info glk_gt_info = { + .name = "Geminilake (gen9)", + .urb = { .max_vs_entries = 320 }, +}; + +static bool is_skl(struct sna *sna) +{ + return sna->kgem.gen == 0110; +} + +static bool is_bxt(struct sna *sna) +{ + return sna->kgem.gen == 0111; +} + +static bool is_kbl(struct sna *sna) +{ + return sna->kgem.gen == 0112; +} + +static bool is_glk(struct sna *sna) +{ + return sna->kgem.gen == 0113; +} + + +static inline bool too_large(int width, int height) +{ + return width > GEN9_MAX_SIZE || height > GEN9_MAX_SIZE; +} + +static inline bool unaligned(struct kgem_bo *bo, int bpp) +{ + /* XXX What exactly do we need to meet H_ALIGN and V_ALIGN? */ +#if 0 + int x, y; + + if (bo->proxy == NULL) + return false; + + /* Assume that all tiled proxies are constructed correctly. */ + if (bo->tiling) + return false; + + DBG(("%s: checking alignment of a linear proxy, offset=%d, pitch=%d, bpp=%d: => (%d, %d)\n", + __FUNCTION__, bo->delta, bo->pitch, bpp, + 8 * (bo->delta % bo->pitch) / bpp, bo->delta / bo->pitch)); + + /* This may be a random userptr map, check that it meets the + * render alignment of SURFACE_VALIGN_4 | SURFACE_HALIGN_4. + */ + y = bo->delta / bo->pitch; + if (y & 3) + return true; + + x = 8 * (bo->delta - y * bo->pitch); + if (x & (4*bpp - 1)) + return true; + + return false; +#else + return false; +#endif +} + +static uint32_t gen9_get_blend(int op, + bool has_component_alpha, + uint32_t dst_format) +{ + uint32_t src, dst; + + COMPILE_TIME_ASSERT(BLENDFACTOR_INV_DST_ALPHA*GEN9_BLENDFACTOR_COUNT + BLENDFACTOR_INV_DST_ALPHA <= 0x7ff); + + src = gen9_blend_op[op].src_blend; + dst = gen9_blend_op[op].dst_blend; + + /* If there's no dst alpha channel, adjust the blend op so that + * we'll treat it always as 1. + */ + if (PICT_FORMAT_A(dst_format) == 0) { + if (src == BLENDFACTOR_DST_ALPHA) + src = BLENDFACTOR_ONE; + else if (src == BLENDFACTOR_INV_DST_ALPHA) + src = BLENDFACTOR_ZERO; + } + + /* If the source alpha is being used, then we should only be in a + * case where the source blend factor is 0, and the source blend + * value is the mask channels multiplied by the source picture's alpha. + */ + if (has_component_alpha && gen9_blend_op[op].src_alpha) { + if (dst == BLENDFACTOR_SRC_ALPHA) + dst = BLENDFACTOR_SRC_COLOR; + else if (dst == BLENDFACTOR_INV_SRC_ALPHA) + dst = BLENDFACTOR_INV_SRC_COLOR; + } + + DBG(("blend op=%d, dst=%x [A=%d] => src=%d, dst=%d => offset=%x\n", + op, dst_format, PICT_FORMAT_A(dst_format), + src, dst, (int)(BLEND_OFFSET(src, dst)>>4))); + assert(BLEND_OFFSET(src, dst) >> 4 <= 0xfff); + return BLEND_OFFSET(src, dst); +} + +static uint32_t gen9_get_card_format(PictFormat format) +{ + switch (format) { + default: + return -1; + case PICT_a8r8g8b8: + return SURFACEFORMAT_B8G8R8A8_UNORM; + case PICT_x8r8g8b8: + return SURFACEFORMAT_B8G8R8X8_UNORM; + case PICT_a8b8g8r8: + return SURFACEFORMAT_R8G8B8A8_UNORM; + case PICT_x8b8g8r8: + return SURFACEFORMAT_R8G8B8X8_UNORM; +#ifdef PICT_a2r10g10b10 + case PICT_a2r10g10b10: + return SURFACEFORMAT_B10G10R10A2_UNORM; + case PICT_x2r10g10b10: + return SURFACEFORMAT_B10G10R10X2_UNORM; +#endif + case PICT_r8g8b8: + return SURFACEFORMAT_R8G8B8_UNORM; + case PICT_r5g6b5: + return SURFACEFORMAT_B5G6R5_UNORM; + case PICT_a1r5g5b5: + return SURFACEFORMAT_B5G5R5A1_UNORM; + case PICT_a8: + return SURFACEFORMAT_A8_UNORM; + case PICT_a4r4g4b4: + return SURFACEFORMAT_B4G4R4A4_UNORM; + } +} + +static uint32_t gen9_get_dest_format(PictFormat format) +{ + switch (format) { + default: + return -1; + case PICT_a8r8g8b8: + case PICT_x8r8g8b8: + return SURFACEFORMAT_B8G8R8A8_UNORM; + case PICT_a8b8g8r8: + case PICT_x8b8g8r8: + return SURFACEFORMAT_R8G8B8A8_UNORM; +#ifdef PICT_a2r10g10b10 + case PICT_a2r10g10b10: + case PICT_x2r10g10b10: + return SURFACEFORMAT_B10G10R10A2_UNORM; +#endif + case PICT_r5g6b5: + return SURFACEFORMAT_B5G6R5_UNORM; + case PICT_x1r5g5b5: + case PICT_a1r5g5b5: + return SURFACEFORMAT_B5G5R5A1_UNORM; + case PICT_a8: + return SURFACEFORMAT_A8_UNORM; + case PICT_a4r4g4b4: + case PICT_x4r4g4b4: + return SURFACEFORMAT_B4G4R4A4_UNORM; + } +} + +static bool gen9_check_dst_format(PictFormat format) +{ + if (gen9_get_dest_format(format) != -1) + return true; + + DBG(("%s: unhandled format: %x\n", __FUNCTION__, (int)format)); + return false; +} + +static bool gen9_check_format(uint32_t format) +{ + if (gen9_get_card_format(format) != -1) + return true; + + DBG(("%s: unhandled format: %x\n", __FUNCTION__, (int)format)); + return false; +} + +static uint32_t gen9_filter(uint32_t filter) +{ + switch (filter) { + default: + assert(0); + case PictFilterNearest: + return SAMPLER_FILTER_NEAREST; + case PictFilterBilinear: + return SAMPLER_FILTER_BILINEAR; + } +} + +static uint32_t gen9_check_filter(PicturePtr picture) +{ + switch (picture->filter) { + case PictFilterNearest: + case PictFilterBilinear: + return true; + default: + return false; + } +} + +static uint32_t gen9_repeat(uint32_t repeat) +{ + switch (repeat) { + default: + assert(0); + case RepeatNone: + return SAMPLER_EXTEND_NONE; + case RepeatNormal: + return SAMPLER_EXTEND_REPEAT; + case RepeatPad: + return SAMPLER_EXTEND_PAD; + case RepeatReflect: + return SAMPLER_EXTEND_REFLECT; + } +} + +static bool gen9_check_repeat(PicturePtr picture) +{ + if (!picture->repeat) + return true; + + switch (picture->repeatType) { + case RepeatNone: + case RepeatNormal: + case RepeatPad: + case RepeatReflect: + return true; + default: + return false; + } +} + +static int +gen9_choose_composite_kernel(int op, bool has_mask, bool is_ca, bool is_affine) +{ + int base; + + if (has_mask) { + if (is_ca) { + if (gen9_blend_op[op].src_alpha) + base = GEN9_WM_KERNEL_MASKSA; + else + base = GEN9_WM_KERNEL_MASKCA; + } else + base = GEN9_WM_KERNEL_MASK; + } else + base = GEN9_WM_KERNEL_NOMASK; + + return base + !is_affine; +} + +static void +gen9_emit_push_constants(struct sna *sna) +{ +#if SIM + OUT_BATCH(GEN9_3DSTATE_PUSH_CONSTANT_ALLOC_VS | (2 - 2)); + OUT_BATCH(0); + + OUT_BATCH(GEN9_3DSTATE_PUSH_CONSTANT_ALLOC_HS | (2 - 2)); + OUT_BATCH(0); + + OUT_BATCH(GEN9_3DSTATE_PUSH_CONSTANT_ALLOC_DS | (2 - 2)); + OUT_BATCH(0); + + OUT_BATCH(GEN9_3DSTATE_PUSH_CONSTANT_ALLOC_GS | (2 - 2)); + OUT_BATCH(0); + + OUT_BATCH(GEN9_3DSTATE_PUSH_CONSTANT_ALLOC_PS | (2 - 2)); + OUT_BATCH(0); +#endif +} + +static void +gen9_emit_urb(struct sna *sna) +{ + /* num of VS entries must be divisible by 8 if size < 9 */ + OUT_BATCH(GEN9_3DSTATE_URB_VS | (2 - 2)); + OUT_BATCH(sna->render_state.gen9.info->urb.max_vs_entries << URB_ENTRY_NUMBER_SHIFT | + (2 - 1) << URB_ENTRY_SIZE_SHIFT | + 4 << URB_STARTING_ADDRESS_SHIFT); + + OUT_BATCH(GEN9_3DSTATE_URB_HS | (2 - 2)); + OUT_BATCH(0 << URB_ENTRY_SIZE_SHIFT | + 4 << URB_STARTING_ADDRESS_SHIFT); + + OUT_BATCH(GEN9_3DSTATE_URB_DS | (2 - 2)); + OUT_BATCH(0 << URB_ENTRY_SIZE_SHIFT | + 4 << URB_STARTING_ADDRESS_SHIFT); + + OUT_BATCH(GEN9_3DSTATE_URB_GS | (2 - 2)); + OUT_BATCH(0 << URB_ENTRY_SIZE_SHIFT | + 4 << URB_STARTING_ADDRESS_SHIFT); +} + +static void +gen9_emit_state_base_address(struct sna *sna) +{ + uint32_t num_pages; + + assert(sna->kgem.surface - sna->kgem.nbatch <= 16384); + + /* WaBindlessSurfaceStateModifyEnable:skl,bxt */ + OUT_BATCH(GEN9_STATE_BASE_ADDRESS | (19 - 1 - 2)); + OUT_BATCH64(0); /* general */ + OUT_BATCH(0); /* stateless dataport */ + OUT_BATCH64(kgem_add_reloc64(&sna->kgem, /* surface */ + sna->kgem.nbatch, + NULL, + I915_GEM_DOMAIN_INSTRUCTION << 16, + BASE_ADDRESS_MODIFY)); + OUT_BATCH64(kgem_add_reloc64(&sna->kgem, /* dynamic */ + sna->kgem.nbatch, + sna->render_state.gen9.general_bo, + I915_GEM_DOMAIN_INSTRUCTION << 16, + BASE_ADDRESS_MODIFY)); + OUT_BATCH64(0); /* indirect */ + OUT_BATCH64(kgem_add_reloc64(&sna->kgem, /* instruction */ + sna->kgem.nbatch, + sna->render_state.gen9.general_bo, + I915_GEM_DOMAIN_INSTRUCTION << 16, + BASE_ADDRESS_MODIFY)); + /* upper bounds */ + num_pages = sna->render_state.gen9.general_bo->size.pages.count; + OUT_BATCH(0); /* general */ + OUT_BATCH(num_pages << 12 | 1); /* dynamic */ + OUT_BATCH(0); /* indirect */ + OUT_BATCH(num_pages << 12 | 1); /* instruction */ + + /* Bindless */ + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); +} + +static void +gen9_emit_vs_invariant(struct sna *sna) +{ + OUT_BATCH(GEN9_3DSTATE_VS | (9 - 2)); + OUT_BATCH64(0); /* no VS kernel */ + OUT_BATCH(0); + OUT_BATCH64(0); /* scratch */ + OUT_BATCH(0); + OUT_BATCH(1 << 1); /* pass-through */ + OUT_BATCH(1 << 16 | 1 << 21); /* urb write to SBE */ + +#if SIM + OUT_BATCH(GEN9_3DSTATE_CONSTANT_VS | (11 - 2)); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH64(0); + OUT_BATCH64(0); + OUT_BATCH64(0); + OUT_BATCH64(0); + + OUT_BATCH(GEN9_3DSTATE_BINDING_TABLE_POINTERS_VS | (2 - 2)); + OUT_BATCH(0); + + OUT_BATCH(GEN9_3DSTATE_SAMPLER_STATE_POINTERS_VS | (2 - 2)); + OUT_BATCH(0); +#endif +} + +static void +gen9_emit_hs_invariant(struct sna *sna) +{ + OUT_BATCH(GEN9_3DSTATE_HS | (9 - 2)); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH64(0); /* no HS kernel */ + OUT_BATCH64(0); /* scratch */ + OUT_BATCH(0); + OUT_BATCH(0); /* pass-through */ + +#if SIM + OUT_BATCH(GEN9_3DSTATE_CONSTANT_HS | (11 - 2)); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH64(0); + OUT_BATCH64(0); + OUT_BATCH64(0); + OUT_BATCH64(0); + +#if 1 + OUT_BATCH(GEN9_3DSTATE_BINDING_TABLE_POINTERS_HS | (2 - 2)); + OUT_BATCH(0); + + OUT_BATCH(GEN9_3DSTATE_SAMPLER_STATE_POINTERS_HS | (2 - 2)); + OUT_BATCH(0); +#endif +#endif +} + +static void +gen9_emit_te_invariant(struct sna *sna) +{ + OUT_BATCH(GEN9_3DSTATE_TE | (4 - 2)); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); +} + +static void +gen9_emit_ds_invariant(struct sna *sna) +{ + OUT_BATCH(GEN9_3DSTATE_DS | (11 - 2)); + OUT_BATCH64(0); /* no kernel */ + OUT_BATCH(0); + OUT_BATCH64(0); /* scratch */ + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + +#if SIM + OUT_BATCH(GEN9_3DSTATE_CONSTANT_DS | (11 - 2)); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH64(0); + OUT_BATCH64(0); + OUT_BATCH64(0); + OUT_BATCH64(0); + +#if 1 + OUT_BATCH(GEN9_3DSTATE_BINDING_TABLE_POINTERS_DS | (2 - 2)); + OUT_BATCH(0); + + OUT_BATCH(GEN9_3DSTATE_SAMPLER_STATE_POINTERS_DS | (2 - 2)); + OUT_BATCH(0); +#endif +#endif +} + +static void +gen9_emit_gs_invariant(struct sna *sna) +{ + OUT_BATCH(GEN9_3DSTATE_GS | (10 - 2)); + OUT_BATCH64(0); /* no GS kernel */ + OUT_BATCH(0); + OUT_BATCH64(0); /* scratch */ + OUT_BATCH(0); + OUT_BATCH(0); /* pass-through */ + OUT_BATCH(0); + OUT_BATCH(0); + +#if SIM + OUT_BATCH(GEN9_3DSTATE_CONSTANT_GS | (11 - 2)); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH64(0); + OUT_BATCH64(0); + OUT_BATCH64(0); + OUT_BATCH64(0); + +#if 1 + OUT_BATCH(GEN9_3DSTATE_BINDING_TABLE_POINTERS_GS | (2 - 2)); + OUT_BATCH(0); + + OUT_BATCH(GEN9_3DSTATE_SAMPLER_STATE_POINTERS_GS | (2 - 2)); + OUT_BATCH(0); +#endif +#endif +} + +static void +gen9_emit_sol_invariant(struct sna *sna) +{ + OUT_BATCH(GEN9_3DSTATE_STREAMOUT | (5 - 2)); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); +} + +static void +gen9_emit_sf_invariant(struct sna *sna) +{ + OUT_BATCH(GEN9_3DSTATE_SF | (4 - 2)); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); +} + +static void +gen9_emit_clip_invariant(struct sna *sna) +{ + OUT_BATCH(GEN9_3DSTATE_CLIP | (4 - 2)); + OUT_BATCH(0); + OUT_BATCH(0); /* pass-through */ + OUT_BATCH(0); + + OUT_BATCH(GEN9_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP | (2 - 2)); + OUT_BATCH(0); + + OUT_BATCH(GEN9_3DSTATE_VIEWPORT_STATE_POINTERS_CC | (2 - 2)); + OUT_BATCH(0); +} + +static void +gen9_emit_null_depth_buffer(struct sna *sna) +{ + OUT_BATCH(GEN9_3DSTATE_DEPTH_BUFFER | (8 - 2)); +#if 1 + OUT_BATCH(SURFACE_NULL << DEPTH_BUFFER_TYPE_SHIFT | + DEPTHFORMAT_D32_FLOAT << DEPTH_BUFFER_FORMAT_SHIFT); +#else + OUT_BATCH(SURFACE_2D << DEPTH_BUFFER_TYPE_SHIFT | + DEPTHFORMAT_D16_UNORM << DEPTH_BUFFER_FORMAT_SHIFT); +#endif + OUT_BATCH64(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + +#if SIM + OUT_BATCH(GEN9_3DSTATE_HIER_DEPTH_BUFFER | (5 - 2)); + OUT_BATCH(0); + OUT_BATCH64(0); + OUT_BATCH(0); +#endif + +#if SIM + OUT_BATCH(GEN9_3DSTATE_STENCIL_BUFFER | (5 - 2)); + OUT_BATCH(0); + OUT_BATCH64(0); + OUT_BATCH(0); +#endif + +#if SIM + OUT_BATCH(GEN9_3DSTATE_WM_DEPTH_STENCIL | (4 - 2)); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); +#endif + +#if SIM + OUT_BATCH(GEN9_3DSTATE_CLEAR_PARAMS | (3 - 2)); + OUT_BATCH(0); + OUT_BATCH(0); +#endif +} + +static void +gen9_emit_wm_invariant(struct sna *sna) +{ + gen9_emit_null_depth_buffer(sna); + +#if SIM + OUT_BATCH(GEN9_3DSTATE_SCISSOR_STATE_POINTERS | (2 - 2)); + OUT_BATCH(0); +#endif + + OUT_BATCH(GEN9_3DSTATE_WM | (2 - 2)); + //OUT_BATCH(WM_NONPERSPECTIVE_PIXEL_BARYCENTRIC); /* XXX */ + OUT_BATCH(WM_PERSPECTIVE_PIXEL_BARYCENTRIC); + +#if SIM + OUT_BATCH(GEN9_3DSTATE_WM_CHROMAKEY | (2 - 2)); + OUT_BATCH(0); +#endif + +#if 0 + OUT_BATCH(GEN9_3DSTATE_WM_HZ_OP | (5 - 2)); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); +#endif + + OUT_BATCH(GEN9_3DSTATE_PS_EXTRA | (2 - 2)); + OUT_BATCH(PSX_PIXEL_SHADER_VALID | + PSX_ATTRIBUTE_ENABLE); + + OUT_BATCH(GEN9_3DSTATE_RASTER | (5 - 2)); + OUT_BATCH(RASTER_FRONT_WINDING_CCW | + RASTER_CULL_NONE); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + + OUT_BATCH(GEN9_3DSTATE_SBE_SWIZ | (11 - 2)); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + +#if SIM + OUT_BATCH(GEN9_3DSTATE_CONSTANT_PS | (11 - 2)); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH64(0); + OUT_BATCH64(0); + OUT_BATCH64(0); + OUT_BATCH64(0); +#endif +} + +static void +gen9_emit_cc_invariant(struct sna *sna) +{ +} + +static void +gen9_emit_vf_invariant(struct sna *sna) +{ + int n; + +#if 1 + OUT_BATCH(GEN9_3DSTATE_VF | (2 - 2)); + OUT_BATCH(0); +#endif + + OUT_BATCH(GEN9_3DSTATE_VF_SGVS | (2 - 2)); + OUT_BATCH(0); + + OUT_BATCH(GEN9_3DSTATE_VF_TOPOLOGY | (2 - 2)); + OUT_BATCH(RECTLIST); + + OUT_BATCH(GEN9_3DSTATE_VF_STATISTICS | 0); + + for (n = 1; n <= 3; n++) { + OUT_BATCH(GEN9_3DSTATE_VF_INSTANCING | (3 - 2)); + OUT_BATCH(n); + OUT_BATCH(0); + } +} + +static void +gen9_emit_invariant(struct sna *sna) +{ + OUT_BATCH(GEN9_PIPELINE_SELECT | + PIPELINE_SELECTION_MASK | + PIPELINE_SELECT_3D); + +#if SIM + OUT_BATCH(GEN9_STATE_SIP | (3 - 2)); + OUT_BATCH64(0); +#endif + + OUT_BATCH(GEN9_3DSTATE_MULTISAMPLE | (2 - 2)); + OUT_BATCH(MULTISAMPLE_PIXEL_LOCATION_CENTER | + MULTISAMPLE_NUMSAMPLES_1); /* 1 sample/pixel */ + + OUT_BATCH(GEN9_3DSTATE_SAMPLE_MASK | (2 - 2)); + OUT_BATCH(1); + +#if SIM + OUT_BATCH(GEN9_3DSTATE_SAMPLE_PATTERN | (5 - 2)); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + //OUT_BATCH(8<<20 | 8<<16); + OUT_BATCH(0); +#endif + + gen9_emit_push_constants(sna); + gen9_emit_urb(sna); + + gen9_emit_state_base_address(sna); + + gen9_emit_vf_invariant(sna); + gen9_emit_vs_invariant(sna); + gen9_emit_hs_invariant(sna); + gen9_emit_te_invariant(sna); + gen9_emit_ds_invariant(sna); + gen9_emit_gs_invariant(sna); + gen9_emit_sol_invariant(sna); + gen9_emit_clip_invariant(sna); + gen9_emit_sf_invariant(sna); + gen9_emit_wm_invariant(sna); + gen9_emit_cc_invariant(sna); + + sna->render_state.gen9.needs_invariant = false; +} + +static void +gen9_emit_cc(struct sna *sna, uint32_t blend) +{ + struct gen9_render_state *render = &sna->render_state.gen9; + + if (render->blend == blend) + return; + + DBG(("%s: blend=%x (current=%x), src=%d, dst=%d\n", + __FUNCTION__, blend, render->blend, + blend / GEN9_BLENDFACTOR_COUNT, + blend % GEN9_BLENDFACTOR_COUNT)); + + assert(blend < GEN9_BLENDFACTOR_COUNT * GEN9_BLENDFACTOR_COUNT); + assert(blend / GEN9_BLENDFACTOR_COUNT > 0); + assert(blend % GEN9_BLENDFACTOR_COUNT > 0); + + /* XXX can have up to 8 blend states preload, selectable via + * Render Target Index. What other side-effects of Render Target Index? + */ + + OUT_BATCH(GEN9_3DSTATE_PS_BLEND | (2 - 2)); + if (blend != GEN9_BLEND(NO_BLEND)) { + uint32_t src = blend / GEN9_BLENDFACTOR_COUNT; + uint32_t dst = blend % GEN9_BLENDFACTOR_COUNT; + OUT_BATCH(PS_BLEND_HAS_WRITEABLE_RT | + PS_BLEND_COLOR_BLEND_ENABLE | + src << PS_BLEND_SRC_ALPHA_SHIFT | + dst << PS_BLEND_DST_ALPHA_SHIFT | + src << PS_BLEND_SRC_SHIFT | + dst << PS_BLEND_DST_SHIFT); + } else + OUT_BATCH(PS_BLEND_HAS_WRITEABLE_RT); + + assert(is_aligned(render->cc_blend + blend * GEN9_BLEND_STATE_PADDED_SIZE, 64)); + OUT_BATCH(GEN9_3DSTATE_BLEND_STATE_POINTERS | (2 - 2)); + OUT_BATCH((render->cc_blend + blend * GEN9_BLEND_STATE_PADDED_SIZE) | 1); + + /* Force a CC_STATE pointer change to improve blend performance */ + OUT_BATCH(GEN9_3DSTATE_CC_STATE_POINTERS | (2 - 2)); + OUT_BATCH(0); + + render->blend = blend; +} + +static void +gen9_emit_sampler(struct sna *sna, uint32_t state) +{ + if (sna->render_state.gen9.samplers == state) + return; + + sna->render_state.gen9.samplers = state; + + DBG(("%s: sampler = %x\n", __FUNCTION__, state)); + + assert(2 * sizeof(struct gen9_sampler_state) == 32); + OUT_BATCH(GEN9_3DSTATE_SAMPLER_STATE_POINTERS_PS | (2 - 2)); + OUT_BATCH(sna->render_state.gen9.wm_state + state * 2 * sizeof(struct gen9_sampler_state)); +} + +static void +gen9_emit_sf(struct sna *sna, bool has_mask) +{ + int num_sf_outputs = has_mask ? 2 : 1; + + if (sna->render_state.gen9.num_sf_outputs == num_sf_outputs) + return; + + DBG(("%s: num_sf_outputs=%d\n", __FUNCTION__, num_sf_outputs)); + + sna->render_state.gen9.num_sf_outputs = num_sf_outputs; + + OUT_BATCH(GEN9_3DSTATE_SBE | (6 - 2)); + OUT_BATCH(num_sf_outputs << SBE_NUM_OUTPUTS_SHIFT | + SBE_FORCE_VERTEX_URB_READ_LENGTH | /* forced is faster */ + SBE_FORCE_VERTEX_URB_READ_OFFSET | + 1 << SBE_URB_ENTRY_READ_LENGTH_SHIFT | + 1 << SBE_URB_ENTRY_READ_OFFSET_SHIFT); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(SBE_ACTIVE_COMPONENT_XYZW << 0 | + SBE_ACTIVE_COMPONENT_XYZW << 1); + OUT_BATCH(0); +} + +static void +gen9_emit_wm(struct sna *sna, int kernel) +{ + const uint32_t *kernels; + + assert(kernel < ARRAY_SIZE(wm_kernels)); + if (sna->render_state.gen9.kernel == kernel) + return; + + sna->render_state.gen9.kernel = kernel; + kernels = sna->render_state.gen9.wm_kernel[kernel]; + + DBG(("%s: switching to %s, num_surfaces=%d (8-wide? %d, 16-wide? %d, 32-wide? %d)\n", + __FUNCTION__, + wm_kernels[kernel].name, + wm_kernels[kernel].num_surfaces, + kernels[0], kernels[1], kernels[2])); + assert(is_aligned(kernels[0], 64)); + assert(is_aligned(kernels[1], 64)); + assert(is_aligned(kernels[2], 64)); + + OUT_BATCH(GEN9_3DSTATE_PS | (12 - 2)); + OUT_BATCH64(kernels[0] ?: kernels[1] ?: kernels[2]); + OUT_BATCH(1 << PS_SAMPLER_COUNT_SHIFT | + PS_VECTOR_MASK_ENABLE | + wm_kernels[kernel].num_surfaces << PS_BINDING_TABLE_ENTRY_COUNT_SHIFT); + OUT_BATCH64(0); /* scratch address */ + OUT_BATCH(PS_MAX_THREADS | + (kernels[0] ? PS_8_DISPATCH_ENABLE : 0) | + (kernels[1] ? PS_16_DISPATCH_ENABLE : 0) | + (kernels[2] ? PS_32_DISPATCH_ENABLE : 0)); + OUT_BATCH((kernels[0] ? 4 : kernels[1] ? 6 : 8) << PS_DISPATCH_START_GRF_SHIFT_0 | + 8 << PS_DISPATCH_START_GRF_SHIFT_1 | + 6 << PS_DISPATCH_START_GRF_SHIFT_2); + OUT_BATCH64(kernels[2]); + OUT_BATCH64(kernels[1]); +} + +static bool +gen9_emit_binding_table(struct sna *sna, uint16_t offset) +{ + if (sna->render_state.gen9.surface_table == offset) + return false; + + /* Binding table pointers */ + assert(is_aligned(4*offset, 32)); + OUT_BATCH(GEN9_3DSTATE_BINDING_TABLE_POINTERS_PS | (2 - 2)); + OUT_BATCH(offset*4); + + sna->render_state.gen9.surface_table = offset; + return true; +} + +static bool +gen9_emit_drawing_rectangle(struct sna *sna, + const struct sna_composite_op *op) +{ + uint32_t limit = (op->dst.height - 1) << 16 | (op->dst.width - 1); + uint32_t offset = (uint16_t)op->dst.y << 16 | (uint16_t)op->dst.x; + + assert(!too_large(abs(op->dst.x), abs(op->dst.y))); + assert(!too_large(op->dst.width, op->dst.height)); + + if (sna->render_state.gen9.drawrect_limit == limit && + sna->render_state.gen9.drawrect_offset == offset) + return true; + + sna->render_state.gen9.drawrect_offset = offset; + sna->render_state.gen9.drawrect_limit = limit; + + OUT_BATCH(GEN9_3DSTATE_DRAWING_RECTANGLE | (4 - 2)); + OUT_BATCH(0); + OUT_BATCH(limit); + OUT_BATCH(offset); + return false; +} + +static void +gen9_emit_vertex_elements(struct sna *sna, + const struct sna_composite_op *op) +{ + /* + * vertex data in vertex buffer + * position: (x, y) + * texture coordinate 0: (u0, v0) if (is_affine is true) else (u0, v0, w0) + * texture coordinate 1 if (has_mask is true): same as above + */ + struct gen9_render_state *render = &sna->render_state.gen9; + uint32_t src_format, dw; + int id = GEN9_VERTEX(op->u.gen9.flags); + bool has_mask; + + DBG(("%s: setup id=%d\n", __FUNCTION__, id)); + + if (render->ve_id == id) + return; + render->ve_id = id; + + if (render->ve_dirty) { + /* dummy primitive to flush vertex before change? */ + OUT_BATCH(GEN9_3DPRIMITIVE | (7 - 2)); + OUT_BATCH(0); /* ignored, see VF_TOPOLOGY */ + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(1); /* single instance */ + OUT_BATCH(0); /* start instance location */ + OUT_BATCH(0); /* index buffer offset, ignored */ + } + + /* The VUE layout + * dword 0-3: pad (0.0, 0.0, 0.0. 0.0) + * dword 4-7: position (x, y, 1.0, 1.0), + * dword 8-11: texture coordinate 0 (u0, v0, w0, 1.0) + * dword 12-15: texture coordinate 1 (u1, v1, w1, 1.0) + * + * dword 4-15 are fetched from vertex buffer + */ + has_mask = (id >> 2) != 0; + OUT_BATCH(GEN9_3DSTATE_VERTEX_ELEMENTS | + ((2 * (3 + has_mask)) + 1 - 2)); + + OUT_BATCH(id << VE_INDEX_SHIFT | VE_VALID | + SURFACEFORMAT_R32G32B32A32_FLOAT << VE_FORMAT_SHIFT | + 0 << VE_OFFSET_SHIFT); + OUT_BATCH(COMPONENT_STORE_0 << VE_COMPONENT_0_SHIFT | + COMPONENT_STORE_0 << VE_COMPONENT_1_SHIFT | + COMPONENT_STORE_0 << VE_COMPONENT_2_SHIFT | + COMPONENT_STORE_0 << VE_COMPONENT_3_SHIFT); + + /* x,y */ + OUT_BATCH(id << VE_INDEX_SHIFT | VE_VALID | + SURFACEFORMAT_R16G16_SSCALED << VE_FORMAT_SHIFT | + 0 << VE_OFFSET_SHIFT); + OUT_BATCH(COMPONENT_STORE_SRC << VE_COMPONENT_0_SHIFT | + COMPONENT_STORE_SRC << VE_COMPONENT_1_SHIFT | + COMPONENT_STORE_0 << VE_COMPONENT_2_SHIFT | + COMPONENT_STORE_1_FLT << VE_COMPONENT_3_SHIFT); + + /* u0, v0, w0 */ + DBG(("%s: first channel %d floats, offset=4\n", __FUNCTION__, id & 3)); + dw = COMPONENT_STORE_1_FLT << VE_COMPONENT_3_SHIFT; + switch (id & 3) { + default: + assert(0); + case 0: + src_format = SURFACEFORMAT_R16G16_SSCALED; + dw |= COMPONENT_STORE_SRC << VE_COMPONENT_0_SHIFT; + dw |= COMPONENT_STORE_SRC << VE_COMPONENT_1_SHIFT; + dw |= COMPONENT_STORE_0 << VE_COMPONENT_2_SHIFT; + break; + case 1: + src_format = SURFACEFORMAT_R32_FLOAT; + dw |= COMPONENT_STORE_SRC << VE_COMPONENT_0_SHIFT; + dw |= COMPONENT_STORE_0 << VE_COMPONENT_1_SHIFT; + dw |= COMPONENT_STORE_0 << VE_COMPONENT_2_SHIFT; + break; + case 2: + src_format = SURFACEFORMAT_R32G32_FLOAT; + dw |= COMPONENT_STORE_SRC << VE_COMPONENT_0_SHIFT; + dw |= COMPONENT_STORE_SRC << VE_COMPONENT_1_SHIFT; + dw |= COMPONENT_STORE_0 << VE_COMPONENT_2_SHIFT; + break; + case 3: + src_format = SURFACEFORMAT_R32G32B32_FLOAT; + dw |= COMPONENT_STORE_SRC << VE_COMPONENT_0_SHIFT; + dw |= COMPONENT_STORE_SRC << VE_COMPONENT_1_SHIFT; + dw |= COMPONENT_STORE_SRC << VE_COMPONENT_2_SHIFT; + break; + } + OUT_BATCH(id << VE_INDEX_SHIFT | VE_VALID | + src_format << VE_FORMAT_SHIFT | + 4 << VE_OFFSET_SHIFT); + OUT_BATCH(dw); + + /* u1, v1, w1 */ + if (has_mask) { + unsigned offset = 4 + ((id & 3) ?: 1) * sizeof(float); + DBG(("%s: second channel %d floats, offset=%d\n", __FUNCTION__, (id >> 2) & 3, offset)); + dw = COMPONENT_STORE_1_FLT << VE_COMPONENT_3_SHIFT; + switch (id >> 2) { + case 1: + src_format = SURFACEFORMAT_R32_FLOAT; + dw |= COMPONENT_STORE_SRC << VE_COMPONENT_0_SHIFT; + dw |= COMPONENT_STORE_0 << VE_COMPONENT_1_SHIFT; + dw |= COMPONENT_STORE_0 << VE_COMPONENT_2_SHIFT; + break; + default: + assert(0); + case 2: + src_format = SURFACEFORMAT_R32G32_FLOAT; + dw |= COMPONENT_STORE_SRC << VE_COMPONENT_0_SHIFT; + dw |= COMPONENT_STORE_SRC << VE_COMPONENT_1_SHIFT; + dw |= COMPONENT_STORE_0 << VE_COMPONENT_2_SHIFT; + break; + case 3: + src_format = SURFACEFORMAT_R32G32B32_FLOAT; + dw |= COMPONENT_STORE_SRC << VE_COMPONENT_0_SHIFT; + dw |= COMPONENT_STORE_SRC << VE_COMPONENT_1_SHIFT; + dw |= COMPONENT_STORE_SRC << VE_COMPONENT_2_SHIFT; + break; + } + OUT_BATCH(id << VE_INDEX_SHIFT | VE_VALID | + src_format << VE_FORMAT_SHIFT | + offset << VE_OFFSET_SHIFT); + OUT_BATCH(dw); + } + + render->ve_dirty = true; +} + +inline static void +gen9_emit_pipe_invalidate(struct sna *sna) +{ + OUT_BATCH(GEN9_PIPE_CONTROL | (6 - 2)); + OUT_BATCH(PIPE_CONTROL_WC_FLUSH | + PIPE_CONTROL_TC_FLUSH | + PIPE_CONTROL_CS_STALL); + OUT_BATCH64(0); + OUT_BATCH64(0); +} + +inline static void +gen9_emit_pipe_flush(struct sna *sna, bool need_stall) +{ + unsigned stall; + + stall = 0; + if (need_stall) + stall = (PIPE_CONTROL_CS_STALL | + PIPE_CONTROL_STALL_AT_SCOREBOARD); + + OUT_BATCH(GEN9_PIPE_CONTROL | (6 - 2)); + OUT_BATCH(PIPE_CONTROL_WC_FLUSH | stall); + OUT_BATCH64(0); + OUT_BATCH64(0); +} + +inline static void +gen9_emit_pipe_stall(struct sna *sna) +{ + OUT_BATCH(GEN9_PIPE_CONTROL | (6 - 2)); + OUT_BATCH(PIPE_CONTROL_CS_STALL | + PIPE_CONTROL_FLUSH | + PIPE_CONTROL_STALL_AT_SCOREBOARD); + OUT_BATCH64(0); + OUT_BATCH64(0); +} + +static void +gen9_emit_state(struct sna *sna, + const struct sna_composite_op *op, + uint16_t wm_binding_table) +{ + bool need_invalidate; + bool need_flush; + bool need_stall; + + assert(op->dst.bo->exec); + + need_flush = wm_binding_table & 1 || + (sna->render_state.gen9.emit_flush && GEN9_READS_DST(op->u.gen9.flags)); + if (ALWAYS_FLUSH) + need_flush = true; + + wm_binding_table &= ~1; + + need_stall = sna->render_state.gen9.surface_table != wm_binding_table; + + need_invalidate = kgem_bo_is_dirty(op->src.bo) || kgem_bo_is_dirty(op->mask.bo); + if (ALWAYS_INVALIDATE) + need_invalidate = true; + + need_stall &= gen9_emit_drawing_rectangle(sna, op); + if (ALWAYS_STALL) + need_stall = true; + + if (need_invalidate) { + gen9_emit_pipe_invalidate(sna); + kgem_clear_dirty(&sna->kgem); + assert(op->dst.bo->exec); + kgem_bo_mark_dirty(op->dst.bo); + + need_flush = false; + need_stall = false; + } + if (need_flush) { + gen9_emit_pipe_flush(sna, need_stall); + need_stall = false; + } + if (need_stall) + gen9_emit_pipe_stall(sna); + + gen9_emit_cc(sna, GEN9_BLEND(op->u.gen9.flags)); + gen9_emit_sampler(sna, GEN9_SAMPLER(op->u.gen9.flags)); + gen9_emit_sf(sna, GEN9_VERTEX(op->u.gen9.flags) >> 2); + gen9_emit_wm(sna, GEN9_KERNEL(op->u.gen9.flags)); + gen9_emit_vertex_elements(sna, op); + gen9_emit_binding_table(sna, wm_binding_table); + + sna->render_state.gen9.emit_flush = GEN9_READS_DST(op->u.gen9.flags); +} + +static bool gen9_magic_ca_pass(struct sna *sna, + const struct sna_composite_op *op) +{ + struct gen9_render_state *state = &sna->render_state.gen9; + + if (!op->need_magic_ca_pass) + return false; + + DBG(("%s: CA fixup (%d -> %d)\n", __FUNCTION__, + sna->render.vertex_start, sna->render.vertex_index)); + + gen9_emit_pipe_stall(sna); + + gen9_emit_cc(sna, + GEN9_BLEND(gen9_get_blend(PictOpAdd, true, + op->dst.format))); + gen9_emit_wm(sna, + gen9_choose_composite_kernel(PictOpAdd, + true, true, + op->is_affine)); + + OUT_BATCH(GEN9_3DPRIMITIVE | (7 - 2)); + OUT_BATCH(0); /* ignored, see VF_TOPOLOGY */ + OUT_BATCH(sna->render.vertex_index - sna->render.vertex_start); + OUT_BATCH(sna->render.vertex_start); + OUT_BATCH(1); /* single instance */ + OUT_BATCH(0); /* start instance location */ + OUT_BATCH(0); /* index buffer offset, ignored */ + + state->last_primitive = sna->kgem.nbatch; + state->ve_dirty = false; + return true; +} + +static void null_create(struct sna_static_stream *stream) +{ + /* A bunch of zeros useful for legacy border color and depth-stencil */ + sna_static_stream_map(stream, 64, 64); +} + +static void +sampler_state_init(struct gen9_sampler_state *sampler_state, + sampler_filter_t filter, + sampler_extend_t extend) +{ + COMPILE_TIME_ASSERT(sizeof(*sampler_state) == 4*sizeof(uint32_t)); + + sampler_state->ss0.lod_preclamp = 2; /* GL mode */ + sampler_state->ss0.default_color_mode = 1; + + switch (filter) { + default: + case SAMPLER_FILTER_NEAREST: + sampler_state->ss0.min_filter = MAPFILTER_NEAREST; + sampler_state->ss0.mag_filter = MAPFILTER_NEAREST; + break; + case SAMPLER_FILTER_BILINEAR: + sampler_state->ss0.min_filter = MAPFILTER_LINEAR; + sampler_state->ss0.mag_filter = MAPFILTER_LINEAR; + break; + } + + /* XXX bicubic filter using MAPFILTER_FLEXIBLE */ + + switch (extend) { + default: + case SAMPLER_EXTEND_NONE: + sampler_state->ss3.r_wrap_mode = TEXCOORDMODE_CLAMP_BORDER; + sampler_state->ss3.s_wrap_mode = TEXCOORDMODE_CLAMP_BORDER; + sampler_state->ss3.t_wrap_mode = TEXCOORDMODE_CLAMP_BORDER; + break; + case SAMPLER_EXTEND_REPEAT: + sampler_state->ss3.r_wrap_mode = TEXCOORDMODE_WRAP; + sampler_state->ss3.s_wrap_mode = TEXCOORDMODE_WRAP; + sampler_state->ss3.t_wrap_mode = TEXCOORDMODE_WRAP; + break; + case SAMPLER_EXTEND_PAD: + sampler_state->ss3.r_wrap_mode = TEXCOORDMODE_CLAMP; + sampler_state->ss3.s_wrap_mode = TEXCOORDMODE_CLAMP; + sampler_state->ss3.t_wrap_mode = TEXCOORDMODE_CLAMP; + break; + case SAMPLER_EXTEND_REFLECT: + sampler_state->ss3.r_wrap_mode = TEXCOORDMODE_MIRROR; + sampler_state->ss3.s_wrap_mode = TEXCOORDMODE_MIRROR; + sampler_state->ss3.t_wrap_mode = TEXCOORDMODE_MIRROR; + break; + } +} + +static void +sampler_copy_init(struct gen9_sampler_state *ss) +{ + sampler_state_init(ss, SAMPLER_FILTER_NEAREST, SAMPLER_EXTEND_NONE); + ss->ss3.non_normalized_coord = 1; + + sampler_state_init(ss+1, SAMPLER_FILTER_NEAREST, SAMPLER_EXTEND_NONE); +} + +static void +sampler_fill_init(struct gen9_sampler_state *ss) +{ + sampler_state_init(ss, SAMPLER_FILTER_NEAREST, SAMPLER_EXTEND_REPEAT); + ss->ss3.non_normalized_coord = 1; + + sampler_state_init(ss+1, SAMPLER_FILTER_NEAREST, SAMPLER_EXTEND_NONE); +} + +static uint32_t +gen9_tiling_bits(uint32_t tiling) +{ + switch (tiling) { + default: assert(0); + case I915_TILING_NONE: return 0; + case I915_TILING_X: return SURFACE_TILED; + case I915_TILING_Y: return SURFACE_TILED | SURFACE_TILED_Y; + } +} + +#define MOCS_PTE (1 << 1) +#define MOCS_WB (2 << 1) + +/** + * Sets up the common fields for a surface state buffer for the given + * picture in the given surface state buffer. + */ +static uint32_t +gen9_bind_bo(struct sna *sna, + struct kgem_bo *bo, + uint32_t width, + uint32_t height, + uint32_t format, + bool is_dst) +{ + uint32_t *ss; + uint32_t domains; + int offset; + uint32_t is_scanout = is_dst && bo->scanout; + + /* After the first bind, we manage the cache domains within the batch */ + offset = kgem_bo_get_binding(bo, format | is_dst << 30 | is_scanout << 31); + if (offset) { + if (is_dst) + kgem_bo_mark_dirty(bo); + assert(offset >= sna->kgem.surface); + return offset * sizeof(uint32_t); + } + + offset = sna->kgem.surface -= SURFACE_DW; + ss = sna->kgem.batch + offset; + ss[0] = (SURFACE_2D << SURFACE_TYPE_SHIFT | + gen9_tiling_bits(bo->tiling) | + format << SURFACE_FORMAT_SHIFT | + SURFACE_VALIGN_4 | SURFACE_HALIGN_4); + if (is_dst) { + ss[0] |= SURFACE_RC_READ_WRITE; + domains = I915_GEM_DOMAIN_RENDER << 16 |I915_GEM_DOMAIN_RENDER; + } else + domains = I915_GEM_DOMAIN_SAMPLER << 16; + ss[1] = (is_scanout || (is_dst && is_uncached(sna, bo))) ? MOCS_PTE << 24 : MOCS_WB << 24; + ss[2] = ((width - 1) << SURFACE_WIDTH_SHIFT | + (height - 1) << SURFACE_HEIGHT_SHIFT); + ss[3] = (bo->pitch - 1) << SURFACE_PITCH_SHIFT; + ss[4] = 0; + ss[5] = 0; + ss[6] = 0; + ss[7] = SURFACE_SWIZZLE(RED, GREEN, BLUE, ALPHA); + *(uint64_t *)(ss+8) = kgem_add_reloc64(&sna->kgem, offset + 8, bo, domains, 0); + ss[10] = 0; + ss[11] = 0; + ss[12] = 0; + ss[13] = 0; + ss[14] = 0; + ss[15] = 0; + + kgem_bo_set_binding(bo, format | is_dst << 30 | is_scanout << 31, offset); + + DBG(("[%x] bind bo(handle=%d, addr=%lx), format=%d, width=%d, height=%d, pitch=%d, tiling=%d -> %s\n", + offset, bo->handle, *(uint64_t *)(ss+8), + format, width, height, bo->pitch, bo->tiling, + domains & 0xffff ? "render" : "sampler")); + + return offset * sizeof(uint32_t); +} + +static void gen9_emit_vertex_buffer(struct sna *sna, + const struct sna_composite_op *op) +{ + int id = GEN9_VERTEX(op->u.gen9.flags); + + OUT_BATCH(GEN9_3DSTATE_VERTEX_BUFFERS | (5 - 2)); + OUT_BATCH(id << VB_INDEX_SHIFT | VB_MODIFY_ENABLE | + 4*op->floats_per_vertex); + sna->render.vertex_reloc[sna->render.nvertex_reloc++] = sna->kgem.nbatch; + OUT_BATCH64(0); + OUT_BATCH(~0); /* buffer size: disabled */ + + sna->render.vb_id |= 1 << id; +} + +static void gen9_emit_primitive(struct sna *sna) +{ + if (sna->kgem.nbatch == sna->render_state.gen9.last_primitive) { + sna->render.vertex_offset = sna->kgem.nbatch - 5; + return; + } + + OUT_BATCH(GEN9_3DPRIMITIVE | (7 - 2)); + OUT_BATCH(0); /* ignored, see VF_TOPOLOGY */ + sna->render.vertex_offset = sna->kgem.nbatch; + OUT_BATCH(0); /* vertex count, to be filled in later */ + OUT_BATCH(sna->render.vertex_index); + OUT_BATCH(1); /* single instance */ + OUT_BATCH(0); /* start instance location */ + OUT_BATCH(0); /* index buffer offset, ignored */ + sna->render.vertex_start = sna->render.vertex_index; + + sna->render_state.gen9.last_primitive = sna->kgem.nbatch; + sna->render_state.gen9.ve_dirty = false; +} + +static bool gen9_rectangle_begin(struct sna *sna, + const struct sna_composite_op *op) +{ + int id = 1 << GEN9_VERTEX(op->u.gen9.flags); + int ndwords; + + if (sna_vertex_wait__locked(&sna->render) && sna->render.vertex_offset) + return true; + + ndwords = op->need_magic_ca_pass ? 60 : 6; + if ((sna->render.vb_id & id) == 0) + ndwords += 5; + if (!kgem_check_batch(&sna->kgem, ndwords)) + return false; + + if ((sna->render.vb_id & id) == 0) + gen9_emit_vertex_buffer(sna, op); + + gen9_emit_primitive(sna); + return true; +} + +static int gen9_get_rectangles__flush(struct sna *sna, + const struct sna_composite_op *op) +{ + /* Preventing discarding new vbo after lock contention */ + if (sna_vertex_wait__locked(&sna->render)) { + int rem = vertex_space(sna); + if (rem > op->floats_per_rect) + return rem; + } + + if (!kgem_check_batch(&sna->kgem, op->need_magic_ca_pass ? 65 : 6)) + return 0; + if (!kgem_check_reloc_and_exec(&sna->kgem, 2)) + return 0; + + if (sna->render.vertex_offset) { + gen8_vertex_flush(sna); + if (gen9_magic_ca_pass(sna, op)) { + gen9_emit_pipe_invalidate(sna); + gen9_emit_cc(sna, GEN9_BLEND(op->u.gen9.flags)); + gen9_emit_wm(sna, GEN9_KERNEL(op->u.gen9.flags)); + } + } + + return gen8_vertex_finish(sna); +} + +inline static int gen9_get_rectangles(struct sna *sna, + const struct sna_composite_op *op, + int want, + void (*emit_state)(struct sna *sna, const struct sna_composite_op *op)) +{ + int rem; + + assert(want); + +start: + rem = vertex_space(sna); + if (unlikely(rem < op->floats_per_rect)) { + DBG(("flushing vbo for %s: %d < %d\n", + __FUNCTION__, rem, op->floats_per_rect)); + rem = gen9_get_rectangles__flush(sna, op); + if (unlikely(rem == 0)) + goto flush; + } + + if (unlikely(sna->render.vertex_offset == 0)) { + if (!gen9_rectangle_begin(sna, op)) + goto flush; + else + goto start; + } + + assert(rem <= vertex_space(sna)); + assert(op->floats_per_rect <= rem); + if (want > 1 && want * op->floats_per_rect > rem) + want = rem / op->floats_per_rect; + + assert(want > 0); + sna->render.vertex_index += 3*want; + return want; + +flush: + if (sna->render.vertex_offset) { + gen8_vertex_flush(sna); + gen9_magic_ca_pass(sna, op); + } + sna_vertex_wait__locked(&sna->render); + _kgem_submit(&sna->kgem); + emit_state(sna, op); + goto start; +} + +inline static uint32_t *gen9_composite_get_binding_table(struct sna *sna, + uint16_t *offset) +{ + uint32_t *table; + + assert(sna->kgem.surface <= 16384); + sna->kgem.surface -= SURFACE_DW; + /* Clear all surplus entries to zero in case of prefetch */ + table = memset(sna->kgem.batch + sna->kgem.surface, 0, 64); + + DBG(("%s(%x)\n", __FUNCTION__, 4*sna->kgem.surface)); + + *offset = sna->kgem.surface; + return table; +} + +static void +gen9_get_batch(struct sna *sna, const struct sna_composite_op *op) +{ + kgem_set_mode(&sna->kgem, KGEM_RENDER, op->dst.bo); + + if (!kgem_check_batch_with_surfaces(&sna->kgem, 150, 2*(1+3))) { + DBG(("%s: flushing batch: %d < %d+%d\n", + __FUNCTION__, sna->kgem.surface - sna->kgem.nbatch, + 150, 4*8*2)); + _kgem_submit(&sna->kgem); + _kgem_set_mode(&sna->kgem, KGEM_RENDER); + } + + assert(sna->kgem.mode == KGEM_RENDER); + assert(sna->kgem.ring == KGEM_RENDER); + + if (sna->render_state.gen9.needs_invariant) + gen9_emit_invariant(sna); +} + +static void gen9_emit_composite_state(struct sna *sna, + const struct sna_composite_op *op) +{ + uint32_t *binding_table; + uint16_t offset, dirty; + + gen9_get_batch(sna, op); + + binding_table = gen9_composite_get_binding_table(sna, &offset); + + dirty = kgem_bo_is_dirty(op->dst.bo); + + binding_table[0] = + gen9_bind_bo(sna, + op->dst.bo, op->dst.width, op->dst.height, + gen9_get_dest_format(op->dst.format), + true); + binding_table[1] = + gen9_bind_bo(sna, + op->src.bo, op->src.width, op->src.height, + op->src.card_format, + false); + if (op->mask.bo) { + binding_table[2] = + gen9_bind_bo(sna, + op->mask.bo, + op->mask.width, + op->mask.height, + op->mask.card_format, + false); + } + + if (sna->kgem.surface == offset && + *(uint64_t *)(sna->kgem.batch + sna->render_state.gen9.surface_table) == *(uint64_t*)binding_table && + (op->mask.bo == NULL || + sna->kgem.batch[sna->render_state.gen9.surface_table+2] == binding_table[2])) { + sna->kgem.surface += SURFACE_DW; + offset = sna->render_state.gen9.surface_table; + } + + if (sna->kgem.batch[sna->render_state.gen9.surface_table] == binding_table[0]) + dirty = 0; + + gen9_emit_state(sna, op, offset | dirty); +} + +static void +gen9_align_vertex(struct sna *sna, const struct sna_composite_op *op) +{ + if (op->floats_per_vertex != sna->render_state.gen9.floats_per_vertex) { + DBG(("aligning vertex: was %d, now %d floats per vertex\n", + sna->render_state.gen9.floats_per_vertex, op->floats_per_vertex)); + gen8_vertex_align(sna, op); + sna->render_state.gen9.floats_per_vertex = op->floats_per_vertex; + } +} + +fastcall static void +gen9_render_composite_blt(struct sna *sna, + const struct sna_composite_op *op, + const struct sna_composite_rectangles *r) +{ + gen9_get_rectangles(sna, op, 1, gen9_emit_composite_state); + op->prim_emit(sna, op, r); +} + +fastcall static void +gen9_render_composite_box(struct sna *sna, + const struct sna_composite_op *op, + const BoxRec *box) +{ + struct sna_composite_rectangles r; + + gen9_get_rectangles(sna, op, 1, gen9_emit_composite_state); + + DBG((" %s: (%d, %d), (%d, %d)\n", + __FUNCTION__, + box->x1, box->y1, box->x2, box->y2)); + + r.dst.x = box->x1; + r.dst.y = box->y1; + r.width = box->x2 - box->x1; + r.height = box->y2 - box->y1; + r.src = r.mask = r.dst; + + op->prim_emit(sna, op, &r); +} + +static void +gen9_render_composite_boxes__blt(struct sna *sna, + const struct sna_composite_op *op, + const BoxRec *box, int nbox) +{ + DBG(("composite_boxes(%d)\n", nbox)); + + do { + int nbox_this_time; + + nbox_this_time = gen9_get_rectangles(sna, op, nbox, + gen9_emit_composite_state); + nbox -= nbox_this_time; + + do { + struct sna_composite_rectangles r; + + DBG((" %s: (%d, %d), (%d, %d)\n", + __FUNCTION__, + box->x1, box->y1, box->x2, box->y2)); + + r.dst.x = box->x1; + r.dst.y = box->y1; + r.width = box->x2 - box->x1; + r.height = box->y2 - box->y1; + r.src = r.mask = r.dst; + + op->prim_emit(sna, op, &r); + box++; + } while (--nbox_this_time); + } while (nbox); +} + +static void +gen9_render_composite_boxes(struct sna *sna, + const struct sna_composite_op *op, + const BoxRec *box, int nbox) +{ + DBG(("%s: nbox=%d\n", __FUNCTION__, nbox)); + + do { + int nbox_this_time; + float *v; + + nbox_this_time = gen9_get_rectangles(sna, op, nbox, + gen9_emit_composite_state); + assert(nbox_this_time); + nbox -= nbox_this_time; + + v = sna->render.vertices + sna->render.vertex_used; + sna->render.vertex_used += nbox_this_time * op->floats_per_rect; + + op->emit_boxes(op, box, nbox_this_time, v); + box += nbox_this_time; + } while (nbox); +} + +static void +gen9_render_composite_boxes__thread(struct sna *sna, + const struct sna_composite_op *op, + const BoxRec *box, int nbox) +{ + DBG(("%s: nbox=%d\n", __FUNCTION__, nbox)); + + sna_vertex_lock(&sna->render); + do { + int nbox_this_time; + float *v; + + nbox_this_time = gen9_get_rectangles(sna, op, nbox, + gen9_emit_composite_state); + assert(nbox_this_time); + nbox -= nbox_this_time; + + v = sna->render.vertices + sna->render.vertex_used; + sna->render.vertex_used += nbox_this_time * op->floats_per_rect; + + sna_vertex_acquire__locked(&sna->render); + sna_vertex_unlock(&sna->render); + + op->emit_boxes(op, box, nbox_this_time, v); + box += nbox_this_time; + + sna_vertex_lock(&sna->render); + sna_vertex_release__locked(&sna->render); + } while (nbox); + sna_vertex_unlock(&sna->render); +} + +static uint32_t +gen9_create_blend_state(struct sna_static_stream *stream) +{ + char *base, *ptr; + int src, dst; + + COMPILE_TIME_ASSERT(((GEN9_BLENDFACTOR_COUNT * GEN9_BLENDFACTOR_COUNT << 4) & (1 << 15)) == 0); + + base = sna_static_stream_map(stream, + GEN9_BLENDFACTOR_COUNT * GEN9_BLENDFACTOR_COUNT * GEN9_BLEND_STATE_PADDED_SIZE, + 64); + + ptr = base; + for (src = 0; src < GEN9_BLENDFACTOR_COUNT; src++) { + for (dst = 0; dst < GEN9_BLENDFACTOR_COUNT; dst++) { + struct gen9_blend_state *blend = + (struct gen9_blend_state *)ptr; + + assert(((ptr - base) & 63) == 0); + COMPILE_TIME_ASSERT(sizeof(blend->common) == 4); + COMPILE_TIME_ASSERT(sizeof(blend->rt) == 8); + COMPILE_TIME_ASSERT((char *)&blend->rt - (char *)blend == 4); + + blend->rt.post_blend_clamp = 1; + blend->rt.pre_blend_clamp = 1; + + blend->rt.color_blend = + !(dst == BLENDFACTOR_ZERO && src == BLENDFACTOR_ONE); + blend->rt.dest_blend_factor = dst; + blend->rt.source_blend_factor = src; + blend->rt.color_blend_function = BLENDFUNCTION_ADD; + + blend->rt.dest_alpha_blend_factor = dst; + blend->rt.source_alpha_blend_factor = src; + blend->rt.alpha_blend_function = BLENDFUNCTION_ADD; + + ptr += GEN9_BLEND_STATE_PADDED_SIZE; + } + } + + return sna_static_stream_offsetof(stream, base); +} + +static int +gen9_composite_picture(struct sna *sna, + PicturePtr picture, + struct sna_composite_channel *channel, + int x, int y, + int w, int h, + int dst_x, int dst_y, + bool precise) +{ + PixmapPtr pixmap; + uint32_t color; + int16_t dx, dy; + + DBG(("%s: (%d, %d)x(%d, %d), dst=(%d, %d)\n", + __FUNCTION__, x, y, w, h, dst_x, dst_y)); + + channel->is_solid = false; + channel->card_format = -1; + + if (sna_picture_is_solid(picture, &color)) + return gen4_channel_init_solid(sna, channel, color); + + if (picture->pDrawable == NULL) { + int ret; + + if (picture->pSourcePict->type == SourcePictTypeLinear) + return gen4_channel_init_linear(sna, picture, channel, + x, y, + w, h, + dst_x, dst_y); + + DBG(("%s -- fixup, gradient\n", __FUNCTION__)); + ret = -1; + if (!precise) + ret = sna_render_picture_approximate_gradient(sna, picture, channel, + x, y, w, h, dst_x, dst_y); + if (ret == -1) + ret = sna_render_picture_fixup(sna, picture, channel, + x, y, w, h, dst_x, dst_y); + return ret; + } + + if (picture->alphaMap) { + DBG(("%s -- fallback, alphamap\n", __FUNCTION__)); + return sna_render_picture_fixup(sna, picture, channel, + x, y, w, h, dst_x, dst_y); + } + + if (!gen9_check_repeat(picture)) + return sna_render_picture_fixup(sna, picture, channel, + x, y, w, h, dst_x, dst_y); + + if (!gen9_check_filter(picture)) + return sna_render_picture_fixup(sna, picture, channel, + x, y, w, h, dst_x, dst_y); + + channel->repeat = picture->repeat ? picture->repeatType : RepeatNone; + channel->filter = picture->filter; + + pixmap = get_drawable_pixmap(picture->pDrawable); + get_drawable_deltas(picture->pDrawable, pixmap, &dx, &dy); + + x += dx + picture->pDrawable->x; + y += dy + picture->pDrawable->y; + + channel->is_affine = sna_transform_is_affine(picture->transform); + if (sna_transform_is_imprecise_integer_translation(picture->transform, picture->filter, precise, &dx, &dy)) { + DBG(("%s: integer translation (%d, %d), removing\n", + __FUNCTION__, dx, dy)); + x += dx; + y += dy; + channel->transform = NULL; + channel->filter = PictFilterNearest; + + if (channel->repeat || + (x >= 0 && + y >= 0 && + x + w <= pixmap->drawable.width && + y + h <= pixmap->drawable.height)) { + struct sna_pixmap *priv = sna_pixmap(pixmap); + if (priv && priv->clear) { + DBG(("%s: converting large pixmap source into solid [%08x]\n", __FUNCTION__, priv->clear_color)); + return gen4_channel_init_solid(sna, channel, solid_color(picture->format, priv->clear_color)); + } + } + } else + channel->transform = picture->transform; + + channel->pict_format = picture->format; + channel->card_format = gen9_get_card_format(picture->format); + if (channel->card_format == (unsigned)-1) + return sna_render_picture_convert(sna, picture, channel, pixmap, + x, y, w, h, dst_x, dst_y, + false); + + if (too_large(pixmap->drawable.width, pixmap->drawable.height)) { + DBG(("%s: extracting from pixmap %dx%d\n", __FUNCTION__, + pixmap->drawable.width, pixmap->drawable.height)); + return sna_render_picture_extract(sna, picture, channel, + x, y, w, h, dst_x, dst_y); + } + + return sna_render_pixmap_bo(sna, channel, pixmap, + x, y, w, h, dst_x, dst_y); +} + +inline static bool gen9_composite_channel_convert(struct sna_composite_channel *channel) +{ + if (unaligned(channel->bo, PICT_FORMAT_BPP(channel->pict_format))) + return false; + + channel->repeat = gen9_repeat(channel->repeat); + channel->filter = gen9_filter(channel->filter); + if (channel->card_format == (unsigned)-1) + channel->card_format = gen9_get_card_format(channel->pict_format); + assert(channel->card_format != (unsigned)-1); + + return true; +} + +static void gen9_render_composite_done(struct sna *sna, + const struct sna_composite_op *op) +{ + if (sna->render.vertex_offset) { + gen8_vertex_flush(sna); + gen9_magic_ca_pass(sna, op); + } + + if (op->mask.bo) + kgem_bo_destroy(&sna->kgem, op->mask.bo); + if (op->src.bo) + kgem_bo_destroy(&sna->kgem, op->src.bo); + + sna_render_composite_redirect_done(sna, op); +} + +inline static bool +gen9_composite_set_target(struct sna *sna, + struct sna_composite_op *op, + PicturePtr dst, + int x, int y, int w, int h, + bool partial) +{ + BoxRec box; + unsigned int hint; + + DBG(("%s: (%d, %d)x(%d, %d), partial?=%d\n", __FUNCTION__, x, y, w, h, partial)); + + op->dst.pixmap = get_drawable_pixmap(dst->pDrawable); + op->dst.format = dst->format; + op->dst.width = op->dst.pixmap->drawable.width; + op->dst.height = op->dst.pixmap->drawable.height; + + if (w | h) { + assert(w && h); + box.x1 = x; + box.y1 = y; + box.x2 = x + w; + box.y2 = y + h; + } else + sna_render_picture_extents(dst, &box); + + hint = PREFER_GPU | RENDER_GPU; + if (!need_tiling(sna, op->dst.width, op->dst.height)) + hint |= FORCE_GPU; + if (!partial) { + hint |= IGNORE_DAMAGE; + if (w == op->dst.width && h == op->dst.height) + hint |= REPLACES; + } + + op->dst.bo = sna_drawable_use_bo(dst->pDrawable, hint, &box, &op->damage); + if (op->dst.bo == NULL) + return false; + + assert(!op->damage || !DAMAGE_IS_ALL(*op->damage)); + + if (unaligned(op->dst.bo, dst->pDrawable->bitsPerPixel)) + return false; + + if (hint & REPLACES) { + struct sna_pixmap *priv = sna_pixmap(op->dst.pixmap); + kgem_bo_pair_undo(&sna->kgem, priv->gpu_bo, priv->cpu_bo); + } + + get_drawable_deltas(dst->pDrawable, op->dst.pixmap, + &op->dst.x, &op->dst.y); + + DBG(("%s: pixmap=%ld, format=%08x, size=%dx%d, pitch=%d, delta=(%d,%d),damage=%p\n", + __FUNCTION__, + op->dst.pixmap->drawable.serialNumber, (int)op->dst.format, + op->dst.width, op->dst.height, + op->dst.bo->pitch, + op->dst.x, op->dst.y, + op->damage ? *op->damage : (void *)-1)); + + assert(op->dst.bo->proxy == NULL); + + if (too_large(op->dst.width, op->dst.height) && + !sna_render_composite_redirect(sna, op, x, y, w, h, partial)) + return false; + + return true; +} + +static bool +try_blt(struct sna *sna, + uint8_t op, + PicturePtr src, + PicturePtr mask, + PicturePtr dst, + int16_t src_x, int16_t src_y, + int16_t msk_x, int16_t msk_y, + int16_t dst_x, int16_t dst_y, + int16_t width, int16_t height, + unsigned flags, + struct sna_composite_op *tmp) +{ + struct kgem_bo *bo; + + if (sna->kgem.mode == KGEM_BLT) { + DBG(("%s: already performing BLT\n", __FUNCTION__)); + goto execute; + } + + if (too_large(width, height)) { + DBG(("%s: operation too large for 3D pipe (%d, %d)\n", + __FUNCTION__, width, height)); + goto execute; + } + + bo = __sna_drawable_peek_bo(dst->pDrawable); + if (bo == NULL) + goto execute; + + if (untiled_tlb_miss(bo)) + goto execute; + + if (bo->rq) { + if (RQ_IS_BLT(bo->rq)) + goto execute; + + return false; + } + + if (bo->tiling == I915_TILING_Y) + goto upload; + + if (sna_picture_is_solid(src, NULL) && can_switch_to_blt(sna, bo, 0)) + goto execute; + + if (src->pDrawable == dst->pDrawable && + (sna->render_state.gt < 3 || width*height < 1024) && + can_switch_to_blt(sna, bo, 0)) + goto execute; + + if (src->pDrawable) { + struct kgem_bo *s = __sna_drawable_peek_bo(src->pDrawable); + if (s == NULL) + goto upload; + + if (prefer_blt_bo(sna, s, bo)) + goto execute; + } + + if (sna->kgem.ring == KGEM_BLT) { + DBG(("%s: already performing BLT\n", __FUNCTION__)); + goto execute; + } + +upload: + flags |= COMPOSITE_UPLOAD; +execute: + return sna_blt_composite(sna, op, + src, dst, + src_x, src_y, + dst_x, dst_y, + width, height, + flags, tmp); +} + +static bool +check_gradient(PicturePtr picture, bool precise) +{ + if (picture->pDrawable) + return false; + + switch (picture->pSourcePict->type) { + case SourcePictTypeSolidFill: + case SourcePictTypeLinear: + return false; + default: + return precise; + } +} + +static bool +has_alphamap(PicturePtr p) +{ + return p->alphaMap != NULL; +} + +static bool +need_upload(PicturePtr p) +{ + return p->pDrawable && unattached(p->pDrawable) && untransformed(p); +} + +static bool +source_is_busy(PixmapPtr pixmap) +{ + struct sna_pixmap *priv = sna_pixmap(pixmap); + if (priv == NULL || priv->clear) + return false; + + if (priv->gpu_bo && kgem_bo_is_busy(priv->gpu_bo)) + return true; + + if (priv->cpu_bo && kgem_bo_is_busy(priv->cpu_bo)) + return true; + + return priv->gpu_damage && !priv->cpu_damage; +} + +static bool +source_fallback(PicturePtr p, PixmapPtr pixmap, bool precise) +{ + if (sna_picture_is_solid(p, NULL)) + return false; + + if (p->pSourcePict) + return check_gradient(p, precise); + + if (!gen9_check_repeat(p) || !gen9_check_format(p->format)) + return true; + + if (pixmap && source_is_busy(pixmap)) + return false; + + return has_alphamap(p) || !gen9_check_filter(p) || need_upload(p); +} + +static bool +gen9_composite_fallback(struct sna *sna, + PicturePtr src, + PicturePtr mask, + PicturePtr dst) +{ + PixmapPtr src_pixmap; + PixmapPtr mask_pixmap; + PixmapPtr dst_pixmap; + bool src_fallback, mask_fallback; + + if (!gen9_check_dst_format(dst->format)) { + DBG(("%s: unknown destination format: %d\n", + __FUNCTION__, dst->format)); + return true; + } + + dst_pixmap = get_drawable_pixmap(dst->pDrawable); + + src_pixmap = src->pDrawable ? get_drawable_pixmap(src->pDrawable) : NULL; + src_fallback = source_fallback(src, src_pixmap, + dst->polyMode == PolyModePrecise); + + if (mask) { + mask_pixmap = mask->pDrawable ? get_drawable_pixmap(mask->pDrawable) : NULL; + mask_fallback = source_fallback(mask, mask_pixmap, + dst->polyMode == PolyModePrecise); + } else { + mask_pixmap = NULL; + mask_fallback = false; + } + + /* If we are using the destination as a source and need to + * readback in order to upload the source, do it all + * on the cpu. + */ + if (src_pixmap == dst_pixmap && src_fallback) { + DBG(("%s: src is dst and will fallback\n",__FUNCTION__)); + return true; + } + if (mask_pixmap == dst_pixmap && mask_fallback) { + DBG(("%s: mask is dst and will fallback\n",__FUNCTION__)); + return true; + } + + /* If anything is on the GPU, push everything out to the GPU */ + if (dst_use_gpu(dst_pixmap)) { + DBG(("%s: dst is already on the GPU, try to use GPU\n", + __FUNCTION__)); + return false; + } + + if (src_pixmap && !src_fallback) { + DBG(("%s: src is already on the GPU, try to use GPU\n", + __FUNCTION__)); + return false; + } + if (mask_pixmap && !mask_fallback) { + DBG(("%s: mask is already on the GPU, try to use GPU\n", + __FUNCTION__)); + return false; + } + + /* However if the dst is not on the GPU and we need to + * render one of the sources using the CPU, we may + * as well do the entire operation in place onthe CPU. + */ + if (src_fallback) { + DBG(("%s: dst is on the CPU and src will fallback\n", + __FUNCTION__)); + return true; + } + + if (mask && mask_fallback) { + DBG(("%s: dst is on the CPU and mask will fallback\n", + __FUNCTION__)); + return true; + } + + if (too_large(dst_pixmap->drawable.width, + dst_pixmap->drawable.height) && + dst_is_cpu(dst_pixmap)) { + DBG(("%s: dst is on the CPU and too large\n", __FUNCTION__)); + return true; + } + + DBG(("%s: dst is not on the GPU and the operation should not fallback\n", + __FUNCTION__)); + return dst_use_cpu(dst_pixmap); +} + +static int +reuse_source(struct sna *sna, + PicturePtr src, struct sna_composite_channel *sc, int src_x, int src_y, + PicturePtr mask, struct sna_composite_channel *mc, int msk_x, int msk_y) +{ + uint32_t color; + + if (src_x != msk_x || src_y != msk_y) + return false; + + if (src == mask) { + DBG(("%s: mask is source\n", __FUNCTION__)); + *mc = *sc; + mc->bo = kgem_bo_reference(mc->bo); + return true; + } + + if (sna_picture_is_solid(mask, &color)) + return gen4_channel_init_solid(sna, mc, color); + + if (sc->is_solid) + return false; + + if (src->pDrawable == NULL || mask->pDrawable != src->pDrawable) + return false; + + DBG(("%s: mask reuses source drawable\n", __FUNCTION__)); + + if (!sna_transform_equal(src->transform, mask->transform)) + return false; + + if (!sna_picture_alphamap_equal(src, mask)) + return false; + + if (!gen9_check_repeat(mask)) + return false; + + if (!gen9_check_filter(mask)) + return false; + + if (!gen9_check_format(mask->format)) + return false; + + DBG(("%s: reusing source channel for mask with a twist\n", + __FUNCTION__)); + + *mc = *sc; + mc->repeat = gen9_repeat(mask->repeat ? mask->repeatType : RepeatNone); + mc->filter = gen9_filter(mask->filter); + mc->pict_format = mask->format; + mc->card_format = gen9_get_card_format(mask->format); + mc->bo = kgem_bo_reference(mc->bo); + return true; +} + +static bool +gen9_render_composite(struct sna *sna, + uint8_t op, + PicturePtr src, + PicturePtr mask, + PicturePtr dst, + int16_t src_x, int16_t src_y, + int16_t msk_x, int16_t msk_y, + int16_t dst_x, int16_t dst_y, + int16_t width, int16_t height, + unsigned flags, + struct sna_composite_op *tmp) +{ + if (op >= ARRAY_SIZE(gen9_blend_op)) + return false; + + DBG(("%s: %dx%d, current mode=%d/%d\n", __FUNCTION__, + width, height, sna->kgem.mode, sna->kgem.ring)); + + if (mask == NULL && + try_blt(sna, op, + src, mask, dst, + src_x, src_y, + msk_x, msk_y, + dst_x, dst_y, + width, height, + flags, tmp)) + return true; + + if (gen9_composite_fallback(sna, src, mask, dst)) + goto fallback; + + if (need_tiling(sna, width, height)) + return sna_tiling_composite(op, src, mask, dst, + src_x, src_y, + msk_x, msk_y, + dst_x, dst_y, + width, height, + tmp); + + if (op == PictOpClear && src == sna->clear) + op = PictOpSrc; + tmp->op = op; + if (!gen9_composite_set_target(sna, tmp, dst, + dst_x, dst_y, width, height, + flags & COMPOSITE_PARTIAL || op > PictOpSrc)) + goto fallback; + + switch (gen9_composite_picture(sna, src, &tmp->src, + src_x, src_y, + width, height, + dst_x, dst_y, + dst->polyMode == PolyModePrecise)) { + case -1: + goto cleanup_dst; + case 0: + if (!gen4_channel_init_solid(sna, &tmp->src, 0)) + goto cleanup_dst; + /* fall through to fixup */ + case 1: + /* Did we just switch rings to prepare the source? */ + if (mask == NULL && + (prefer_blt_composite(sna, tmp) || + unaligned(tmp->src.bo, PICT_FORMAT_BPP(tmp->src.pict_format))) && + sna_blt_composite__convert(sna, + dst_x, dst_y, width, height, + tmp)) + return true; + + if (!gen9_composite_channel_convert(&tmp->src)) + goto cleanup_src; + + break; + } + + tmp->is_affine = tmp->src.is_affine; + tmp->has_component_alpha = false; + tmp->need_magic_ca_pass = false; + + tmp->mask.bo = NULL; + tmp->mask.filter = SAMPLER_FILTER_NEAREST; + tmp->mask.repeat = SAMPLER_EXTEND_NONE; + + if (mask) { + if (mask->componentAlpha && PICT_FORMAT_RGB(mask->format)) { + tmp->has_component_alpha = true; + + /* Check if it's component alpha that relies on a source alpha and on + * the source value. We can only get one of those into the single + * source value that we get to blend with. + */ + if (gen9_blend_op[op].src_alpha && + (gen9_blend_op[op].src_blend != BLENDFACTOR_ZERO)) { + if (op != PictOpOver) + goto cleanup_src; + + tmp->need_magic_ca_pass = true; + tmp->op = PictOpOutReverse; + } + } + + if (!reuse_source(sna, + src, &tmp->src, src_x, src_y, + mask, &tmp->mask, msk_x, msk_y)) { + switch (gen9_composite_picture(sna, mask, &tmp->mask, + msk_x, msk_y, + width, height, + dst_x, dst_y, + dst->polyMode == PolyModePrecise)) { + case -1: + goto cleanup_src; + case 0: + if (!gen4_channel_init_solid(sna, &tmp->mask, 0)) + goto cleanup_src; + /* fall through to fixup */ + case 1: + if (!gen9_composite_channel_convert(&tmp->mask)) + goto cleanup_mask; + break; + } + } + + tmp->is_affine &= tmp->mask.is_affine; + } + + tmp->u.gen9.flags = + GEN9_SET_FLAGS(SAMPLER_OFFSET(tmp->src.filter, + tmp->src.repeat, + tmp->mask.filter, + tmp->mask.repeat), + gen9_get_blend(tmp->op, + tmp->has_component_alpha, + tmp->dst.format), + gen9_choose_composite_kernel(tmp->op, + tmp->mask.bo != NULL, + tmp->has_component_alpha, + tmp->is_affine), + gen4_choose_composite_emitter(sna, tmp)); + + tmp->blt = gen9_render_composite_blt; + tmp->box = gen9_render_composite_box; + tmp->boxes = gen9_render_composite_boxes__blt; + if (tmp->emit_boxes){ + tmp->boxes = gen9_render_composite_boxes; + tmp->thread_boxes = gen9_render_composite_boxes__thread; + } + tmp->done = gen9_render_composite_done; + + kgem_set_mode(&sna->kgem, KGEM_RENDER, tmp->dst.bo); + if (!kgem_check_bo(&sna->kgem, + tmp->dst.bo, tmp->src.bo, tmp->mask.bo, + NULL)) { + kgem_submit(&sna->kgem); + if (!kgem_check_bo(&sna->kgem, + tmp->dst.bo, tmp->src.bo, tmp->mask.bo, + NULL)) + goto cleanup_mask; + _kgem_set_mode(&sna->kgem, KGEM_RENDER); + } + + gen9_align_vertex(sna, tmp); + gen9_emit_composite_state(sna, tmp); + return true; + +cleanup_mask: + if (tmp->mask.bo) { + kgem_bo_destroy(&sna->kgem, tmp->mask.bo); + tmp->mask.bo = NULL; + } +cleanup_src: + if (tmp->src.bo) { + kgem_bo_destroy(&sna->kgem, tmp->src.bo); + tmp->src.bo = NULL; + } +cleanup_dst: + if (tmp->redirect.real_bo) { + kgem_bo_destroy(&sna->kgem, tmp->dst.bo); + tmp->redirect.real_bo = NULL; + } +fallback: + return (mask == NULL && + sna_blt_composite(sna, op, + src, dst, + src_x, src_y, + dst_x, dst_y, + width, height, + flags | COMPOSITE_FALLBACK, tmp)); +} + +#if !NO_COMPOSITE_SPANS +fastcall static void +gen9_render_composite_spans_box(struct sna *sna, + const struct sna_composite_spans_op *op, + const BoxRec *box, float opacity) +{ + DBG(("%s: src=+(%d, %d), opacity=%f, dst=+(%d, %d), box=(%d, %d) x (%d, %d)\n", + __FUNCTION__, + op->base.src.offset[0], op->base.src.offset[1], + opacity, + op->base.dst.x, op->base.dst.y, + box->x1, box->y1, + box->x2 - box->x1, + box->y2 - box->y1)); + + gen9_get_rectangles(sna, &op->base, 1, gen9_emit_composite_state); + op->prim_emit(sna, op, box, opacity); +} + +static void +gen9_render_composite_spans_boxes(struct sna *sna, + const struct sna_composite_spans_op *op, + const BoxRec *box, int nbox, + float opacity) +{ + DBG(("%s: nbox=%d, src=+(%d, %d), opacity=%f, dst=+(%d, %d)\n", + __FUNCTION__, nbox, + op->base.src.offset[0], op->base.src.offset[1], + opacity, + op->base.dst.x, op->base.dst.y)); + + do { + int nbox_this_time; + + nbox_this_time = gen9_get_rectangles(sna, &op->base, nbox, + gen9_emit_composite_state); + nbox -= nbox_this_time; + + do { + DBG((" %s: (%d, %d) x (%d, %d)\n", __FUNCTION__, + box->x1, box->y1, + box->x2 - box->x1, + box->y2 - box->y1)); + + op->prim_emit(sna, op, box++, opacity); + } while (--nbox_this_time); + } while (nbox); +} + +fastcall static void +gen9_render_composite_spans_boxes__thread(struct sna *sna, + const struct sna_composite_spans_op *op, + const struct sna_opacity_box *box, + int nbox) +{ + DBG(("%s: nbox=%d, src=+(%d, %d), dst=+(%d, %d)\n", + __FUNCTION__, nbox, + op->base.src.offset[0], op->base.src.offset[1], + op->base.dst.x, op->base.dst.y)); + + sna_vertex_lock(&sna->render); + do { + int nbox_this_time; + float *v; + + nbox_this_time = gen9_get_rectangles(sna, &op->base, nbox, + gen9_emit_composite_state); + assert(nbox_this_time); + nbox -= nbox_this_time; + + v = sna->render.vertices + sna->render.vertex_used; + sna->render.vertex_used += nbox_this_time * op->base.floats_per_rect; + + sna_vertex_acquire__locked(&sna->render); + sna_vertex_unlock(&sna->render); + + op->emit_boxes(op, box, nbox_this_time, v); + box += nbox_this_time; + + sna_vertex_lock(&sna->render); + sna_vertex_release__locked(&sna->render); + } while (nbox); + sna_vertex_unlock(&sna->render); +} + +fastcall static void +gen9_render_composite_spans_done(struct sna *sna, + const struct sna_composite_spans_op *op) +{ + if (sna->render.vertex_offset) + gen8_vertex_flush(sna); + + DBG(("%s()\n", __FUNCTION__)); + + if (op->base.src.bo) + kgem_bo_destroy(&sna->kgem, op->base.src.bo); + + sna_render_composite_redirect_done(sna, &op->base); +} + +static bool +gen9_check_composite_spans(struct sna *sna, + uint8_t op, PicturePtr src, PicturePtr dst, + int16_t width, int16_t height, unsigned flags) +{ + if (op >= ARRAY_SIZE(gen9_blend_op)) + return false; + + if (gen9_composite_fallback(sna, src, NULL, dst)) + return false; + + if (need_tiling(sna, width, height) && + !is_gpu(sna, dst->pDrawable, PREFER_GPU_SPANS)) { + DBG(("%s: fallback, tiled operation not on GPU\n", + __FUNCTION__)); + return false; + } + + return true; +} + +static bool +gen9_render_composite_spans(struct sna *sna, + uint8_t op, + PicturePtr src, + PicturePtr dst, + int16_t src_x, int16_t src_y, + int16_t dst_x, int16_t dst_y, + int16_t width, int16_t height, + unsigned flags, + struct sna_composite_spans_op *tmp) +{ + DBG(("%s: %dx%d with flags=%x, current mode=%d\n", __FUNCTION__, + width, height, flags, sna->kgem.ring)); + + assert(gen9_check_composite_spans(sna, op, src, dst, width, height, flags)); + + if (need_tiling(sna, width, height)) { + DBG(("%s: tiling, operation (%dx%d) too wide for pipeline\n", + __FUNCTION__, width, height)); + return sna_tiling_composite_spans(op, src, dst, + src_x, src_y, dst_x, dst_y, + width, height, flags, tmp); + } + + tmp->base.op = op; + if (!gen9_composite_set_target(sna, &tmp->base, dst, + dst_x, dst_y, width, height, true)) + return false; + + switch (gen9_composite_picture(sna, src, &tmp->base.src, + src_x, src_y, + width, height, + dst_x, dst_y, + dst->polyMode == PolyModePrecise)) { + case -1: + goto cleanup_dst; + case 0: + if (!gen4_channel_init_solid(sna, &tmp->base.src, 0)) + goto cleanup_dst; + /* fall through to fixup */ + case 1: + if (!gen9_composite_channel_convert(&tmp->base.src)) + goto cleanup_src; + break; + } + tmp->base.mask.bo = NULL; + + tmp->base.is_affine = tmp->base.src.is_affine; + tmp->base.need_magic_ca_pass = false; + + tmp->base.u.gen9.flags = + GEN9_SET_FLAGS(SAMPLER_OFFSET(tmp->base.src.filter, + tmp->base.src.repeat, + SAMPLER_FILTER_NEAREST, + SAMPLER_EXTEND_PAD), + gen9_get_blend(tmp->base.op, false, tmp->base.dst.format), + GEN9_WM_KERNEL_OPACITY | !tmp->base.is_affine, + gen4_choose_spans_emitter(sna, tmp)); + + tmp->box = gen9_render_composite_spans_box; + tmp->boxes = gen9_render_composite_spans_boxes; + if (tmp->emit_boxes) + tmp->thread_boxes = gen9_render_composite_spans_boxes__thread; + tmp->done = gen9_render_composite_spans_done; + + kgem_set_mode(&sna->kgem, KGEM_RENDER, tmp->base.dst.bo); + if (!kgem_check_bo(&sna->kgem, + tmp->base.dst.bo, tmp->base.src.bo, + NULL)) { + kgem_submit(&sna->kgem); + if (!kgem_check_bo(&sna->kgem, + tmp->base.dst.bo, tmp->base.src.bo, + NULL)) + goto cleanup_src; + _kgem_set_mode(&sna->kgem, KGEM_RENDER); + } + + gen9_align_vertex(sna, &tmp->base); + gen9_emit_composite_state(sna, &tmp->base); + return true; + +cleanup_src: + if (tmp->base.src.bo) + kgem_bo_destroy(&sna->kgem, tmp->base.src.bo); +cleanup_dst: + if (tmp->base.redirect.real_bo) + kgem_bo_destroy(&sna->kgem, tmp->base.dst.bo); + return false; +} +#endif + +static void +gen9_emit_copy_state(struct sna *sna, + const struct sna_composite_op *op) +{ + uint32_t *binding_table; + uint16_t offset, dirty; + + gen9_get_batch(sna, op); + + binding_table = gen9_composite_get_binding_table(sna, &offset); + + dirty = kgem_bo_is_dirty(op->dst.bo); + + binding_table[0] = + gen9_bind_bo(sna, + op->dst.bo, op->dst.width, op->dst.height, + gen9_get_dest_format(op->dst.format), + true); + binding_table[1] = + gen9_bind_bo(sna, + op->src.bo, op->src.width, op->src.height, + op->src.card_format, + false); + + if (sna->kgem.surface == offset && + *(uint64_t *)(sna->kgem.batch + sna->render_state.gen9.surface_table) == *(uint64_t*)binding_table) { + sna->kgem.surface += SURFACE_DW; + offset = sna->render_state.gen9.surface_table; + } + + if (sna->kgem.batch[sna->render_state.gen9.surface_table] == binding_table[0]) + dirty = 0; + + assert(!GEN9_READS_DST(op->u.gen9.flags)); + gen9_emit_state(sna, op, offset | dirty); +} + +static inline bool +prefer_blt_copy(struct sna *sna, + struct kgem_bo *src_bo, + struct kgem_bo *dst_bo, + unsigned flags) +{ + if (sna->kgem.mode == KGEM_BLT) + return true; + + assert((flags & COPY_SYNC) == 0); + + if (untiled_tlb_miss(src_bo) || + untiled_tlb_miss(dst_bo)) + return true; + + if (flags & COPY_DRI && !sna->kgem.has_semaphores) + return false; + + if (force_blt_ring(sna, dst_bo)) + return true; + + if ((flags & COPY_SMALL || + (sna->render_state.gt < 3 && src_bo == dst_bo)) && + can_switch_to_blt(sna, dst_bo, flags)) + return true; + + if (kgem_bo_is_render(dst_bo) || + kgem_bo_is_render(src_bo)) + return false; + + if (flags & COPY_LAST && + sna->render_state.gt < 3 && + can_switch_to_blt(sna, dst_bo, flags)) + return true; + + if (prefer_render_ring(sna, dst_bo)) + return false; + + if (!prefer_blt_ring(sna, dst_bo, flags)) + return false; + + return prefer_blt_bo(sna, src_bo, dst_bo); +} + +static bool +gen9_render_copy_boxes(struct sna *sna, uint8_t alu, + const DrawableRec *src, struct kgem_bo *src_bo, int16_t src_dx, int16_t src_dy, + const DrawableRec *dst, struct kgem_bo *dst_bo, int16_t dst_dx, int16_t dst_dy, + const BoxRec *box, int n, unsigned flags) +{ + struct sna_composite_op tmp; + BoxRec extents; + + DBG(("%s (%d, %d)->(%d, %d) x %d, alu=%x, flags=%x, self-copy=%d, overlaps? %d\n", + __FUNCTION__, src_dx, src_dy, dst_dx, dst_dy, n, alu, flags, + src_bo == dst_bo, + overlaps(sna, + src_bo, src_dx, src_dy, + dst_bo, dst_dx, dst_dy, + box, n, flags, &extents))); + + if (prefer_blt_copy(sna, src_bo, dst_bo, flags) && + sna_blt_compare_depth(src, dst) && + sna_blt_copy_boxes(sna, alu, + src_bo, src_dx, src_dy, + dst_bo, dst_dx, dst_dy, + dst->bitsPerPixel, + box, n)) + return true; + + if (!(alu == GXcopy || alu == GXclear) || + unaligned(src_bo, src->bitsPerPixel) || + unaligned(dst_bo, dst->bitsPerPixel)) { +fallback_blt: + DBG(("%s: fallback blt\n", __FUNCTION__)); + if (!sna_blt_compare_depth(src, dst)) + return false; + + return sna_blt_copy_boxes_fallback(sna, alu, + src, src_bo, src_dx, src_dy, + dst, dst_bo, dst_dx, dst_dy, + box, n); + } + + if (overlaps(sna, + src_bo, src_dx, src_dy, + dst_bo, dst_dx, dst_dy, + box, n, flags, + &extents)) { + bool big = too_large(extents.x2-extents.x1, extents.y2-extents.y1); + + if ((big || !prefer_render_ring(sna, dst_bo)) && + sna_blt_copy_boxes(sna, alu, + src_bo, src_dx, src_dy, + dst_bo, dst_dx, dst_dy, + dst->bitsPerPixel, + box, n)) + return true; + + if (big) + goto fallback_blt; + + assert(src_bo == dst_bo); + assert(src->depth == dst->depth); + assert(src->width == dst->width); + assert(src->height == dst->height); + return sna_render_copy_boxes__overlap(sna, alu, dst, dst_bo, + src_dx, src_dy, + dst_dx, dst_dy, + box, n, &extents); + } + + if (dst->depth == src->depth) { + tmp.dst.format = sna_render_format_for_depth(dst->depth); + tmp.src.pict_format = tmp.dst.format; + } else { + tmp.dst.format = sna_format_for_depth(dst->depth); + tmp.src.pict_format = sna_format_for_depth(src->depth); + } + if (!gen9_check_format(tmp.src.pict_format)) + goto fallback_blt; + + tmp.dst.pixmap = (PixmapPtr)dst; + tmp.dst.width = dst->width; + tmp.dst.height = dst->height; + tmp.dst.bo = dst_bo; + tmp.dst.x = tmp.dst.y = 0; + tmp.damage = NULL; + + sna_render_composite_redirect_init(&tmp); + if (too_large(tmp.dst.width, tmp.dst.height)) { + int i; + + extents = box[0]; + for (i = 1; i < n; i++) { + if (box[i].x1 < extents.x1) + extents.x1 = box[i].x1; + if (box[i].y1 < extents.y1) + extents.y1 = box[i].y1; + + if (box[i].x2 > extents.x2) + extents.x2 = box[i].x2; + if (box[i].y2 > extents.y2) + extents.y2 = box[i].y2; + } + + if (!sna_render_composite_redirect(sna, &tmp, + extents.x1 + dst_dx, + extents.y1 + dst_dy, + extents.x2 - extents.x1, + extents.y2 - extents.y1, + n > 1)) + goto fallback_tiled; + } + + tmp.src.card_format = gen9_get_card_format(tmp.src.pict_format); + if (too_large(src->width, src->height)) { + int i; + + extents = box[0]; + for (i = 1; i < n; i++) { + if (box[i].x1 < extents.x1) + extents.x1 = box[i].x1; + if (box[i].y1 < extents.y1) + extents.y1 = box[i].y1; + + if (box[i].x2 > extents.x2) + extents.x2 = box[i].x2; + if (box[i].y2 > extents.y2) + extents.y2 = box[i].y2; + } + + if (!sna_render_pixmap_partial(sna, src, src_bo, &tmp.src, + extents.x1 + src_dx, + extents.y1 + src_dy, + extents.x2 - extents.x1, + extents.y2 - extents.y1)) + goto fallback_tiled_dst; + } else { + tmp.src.bo = src_bo; + tmp.src.width = src->width; + tmp.src.height = src->height; + tmp.src.offset[0] = tmp.src.offset[1] = 0; + } + + tmp.mask.bo = NULL; + + tmp.floats_per_vertex = 2; + tmp.floats_per_rect = 6; + tmp.need_magic_ca_pass = 0; + + tmp.u.gen9.flags = COPY_FLAGS(alu); + + kgem_set_mode(&sna->kgem, KGEM_RENDER, tmp.dst.bo); + if (!kgem_check_bo(&sna->kgem, tmp.dst.bo, tmp.src.bo, NULL)) { + kgem_submit(&sna->kgem); + if (!kgem_check_bo(&sna->kgem, tmp.dst.bo, tmp.src.bo, NULL)) { + if (tmp.src.bo != src_bo) + kgem_bo_destroy(&sna->kgem, tmp.src.bo); + if (tmp.redirect.real_bo) + kgem_bo_destroy(&sna->kgem, tmp.dst.bo); + goto fallback_blt; + } + _kgem_set_mode(&sna->kgem, KGEM_RENDER); + } + + src_dx += tmp.src.offset[0]; + src_dy += tmp.src.offset[1]; + + dst_dx += tmp.dst.x; + dst_dy += tmp.dst.y; + + tmp.dst.x = tmp.dst.y = 0; + + gen9_align_vertex(sna, &tmp); + gen9_emit_copy_state(sna, &tmp); + + do { + int16_t *v; + int n_this_time; + + n_this_time = gen9_get_rectangles(sna, &tmp, n, + gen9_emit_copy_state); + n -= n_this_time; + + v = (int16_t *)(sna->render.vertices + sna->render.vertex_used); + sna->render.vertex_used += 6 * n_this_time; + assert(sna->render.vertex_used <= sna->render.vertex_size); + do { + + DBG((" (%d, %d) -> (%d, %d) + (%d, %d)\n", + box->x1 + src_dx, box->y1 + src_dy, + box->x1 + dst_dx, box->y1 + dst_dy, + box->x2 - box->x1, box->y2 - box->y1)); + v[0] = box->x2 + dst_dx; + v[2] = box->x2 + src_dx; + v[1] = v[5] = box->y2 + dst_dy; + v[3] = v[7] = box->y2 + src_dy; + v[8] = v[4] = box->x1 + dst_dx; + v[10] = v[6] = box->x1 + src_dx; + v[9] = box->y1 + dst_dy; + v[11] = box->y1 + src_dy; + v += 12; box++; + } while (--n_this_time); + } while (n); + + gen8_vertex_flush(sna); + sna_render_composite_redirect_done(sna, &tmp); + if (tmp.src.bo != src_bo) + kgem_bo_destroy(&sna->kgem, tmp.src.bo); + return true; + +fallback_tiled_dst: + if (tmp.redirect.real_bo) + kgem_bo_destroy(&sna->kgem, tmp.dst.bo); +fallback_tiled: + DBG(("%s: fallback tiled\n", __FUNCTION__)); + if (sna_blt_compare_depth(src, dst) && + sna_blt_copy_boxes(sna, alu, + src_bo, src_dx, src_dy, + dst_bo, dst_dx, dst_dy, + dst->bitsPerPixel, + box, n)) + return true; + + return sna_tiling_copy_boxes(sna, alu, + src, src_bo, src_dx, src_dy, + dst, dst_bo, dst_dx, dst_dy, + box, n); +} + +static void +gen9_render_copy_blt(struct sna *sna, + const struct sna_copy_op *op, + int16_t sx, int16_t sy, + int16_t w, int16_t h, + int16_t dx, int16_t dy) +{ + int16_t *v; + + gen9_get_rectangles(sna, &op->base, 1, gen9_emit_copy_state); + + v = (int16_t *)&sna->render.vertices[sna->render.vertex_used]; + sna->render.vertex_used += 6; + assert(sna->render.vertex_used <= sna->render.vertex_size); + + v[0] = dx+w; v[1] = dy+h; + v[2] = sx+w; v[3] = sy+h; + v[4] = dx; v[5] = dy+h; + v[6] = sx; v[7] = sy+h; + v[8] = dx; v[9] = dy; + v[10] = sx; v[11] = sy; +} + +static void +gen9_render_copy_done(struct sna *sna, const struct sna_copy_op *op) +{ + if (sna->render.vertex_offset) + gen8_vertex_flush(sna); +} + +static bool +gen9_render_copy(struct sna *sna, uint8_t alu, + PixmapPtr src, struct kgem_bo *src_bo, + PixmapPtr dst, struct kgem_bo *dst_bo, + struct sna_copy_op *op) +{ + DBG(("%s (alu=%d, src=(%dx%d), dst=(%dx%d))\n", + __FUNCTION__, alu, + src->drawable.width, src->drawable.height, + dst->drawable.width, dst->drawable.height)); + + if (prefer_blt_copy(sna, src_bo, dst_bo, 0) && + sna_blt_compare_depth(&src->drawable, &dst->drawable) && + sna_blt_copy(sna, alu, + src_bo, dst_bo, + dst->drawable.bitsPerPixel, + op)) + return true; + + if (!(alu == GXcopy || alu == GXclear) || src_bo == dst_bo || + too_large(src->drawable.width, src->drawable.height) || + too_large(dst->drawable.width, dst->drawable.height) || + unaligned(src_bo, src->drawable.bitsPerPixel) || + unaligned(dst_bo, dst->drawable.bitsPerPixel)) { +fallback: + if (!sna_blt_compare_depth(&src->drawable, &dst->drawable)) + return false; + + return sna_blt_copy(sna, alu, src_bo, dst_bo, + dst->drawable.bitsPerPixel, + op); + } + + if (dst->drawable.depth == src->drawable.depth) { + op->base.dst.format = sna_render_format_for_depth(dst->drawable.depth); + op->base.src.pict_format = op->base.dst.format; + } else { + op->base.dst.format = sna_format_for_depth(dst->drawable.depth); + op->base.src.pict_format = sna_format_for_depth(src->drawable.depth); + } + if (!gen9_check_format(op->base.src.pict_format)) + goto fallback; + + op->base.dst.pixmap = dst; + op->base.dst.width = dst->drawable.width; + op->base.dst.height = dst->drawable.height; + op->base.dst.bo = dst_bo; + + op->base.src.bo = src_bo; + op->base.src.card_format = + gen9_get_card_format(op->base.src.pict_format); + op->base.src.width = src->drawable.width; + op->base.src.height = src->drawable.height; + + op->base.mask.bo = NULL; + + op->base.floats_per_vertex = 2; + op->base.floats_per_rect = 6; + + op->base.u.gen9.flags = COPY_FLAGS(alu); + + kgem_set_mode(&sna->kgem, KGEM_RENDER, dst_bo); + if (!kgem_check_bo(&sna->kgem, dst_bo, src_bo, NULL)) { + kgem_submit(&sna->kgem); + if (!kgem_check_bo(&sna->kgem, dst_bo, src_bo, NULL)) + goto fallback; + _kgem_set_mode(&sna->kgem, KGEM_RENDER); + } + + gen9_align_vertex(sna, &op->base); + gen9_emit_copy_state(sna, &op->base); + + op->blt = gen9_render_copy_blt; + op->done = gen9_render_copy_done; + return true; +} + +static void +gen9_emit_fill_state(struct sna *sna, const struct sna_composite_op *op) +{ + uint32_t *binding_table; + uint16_t offset, dirty; + + /* XXX Render Target Fast Clear + * Set RTFC Enable in PS and render a rectangle. + * Limited to a clearing the full MSC surface only with a + * specific kernel. + */ + + gen9_get_batch(sna, op); + + binding_table = gen9_composite_get_binding_table(sna, &offset); + + dirty = kgem_bo_is_dirty(op->dst.bo); + + binding_table[0] = + gen9_bind_bo(sna, + op->dst.bo, op->dst.width, op->dst.height, + gen9_get_dest_format(op->dst.format), + true); + binding_table[1] = + gen9_bind_bo(sna, + op->src.bo, 1, 1, + SURFACEFORMAT_B8G8R8A8_UNORM, + false); + + if (sna->kgem.surface == offset && + *(uint64_t *)(sna->kgem.batch + sna->render_state.gen9.surface_table) == *(uint64_t*)binding_table) { + sna->kgem.surface += SURFACE_DW; + offset = sna->render_state.gen9.surface_table; + } + + if (sna->kgem.batch[sna->render_state.gen9.surface_table] == binding_table[0]) + dirty = 0; + + gen9_emit_state(sna, op, offset | dirty); +} + +static bool +gen9_render_fill_boxes(struct sna *sna, + CARD8 op, + PictFormat format, + const xRenderColor *color, + const DrawableRec *dst, struct kgem_bo *dst_bo, + const BoxRec *box, int n) +{ + struct sna_composite_op tmp; + uint32_t pixel; + + DBG(("%s (op=%d, color=(%04x, %04x, %04x, %04x) [%08x])\n", + __FUNCTION__, op, + color->red, color->green, color->blue, color->alpha, (int)format)); + + if (op >= ARRAY_SIZE(gen9_blend_op)) { + DBG(("%s: fallback due to unhandled blend op: %d\n", + __FUNCTION__, op)); + return false; + } + + if (prefer_blt_fill(sna, dst_bo, FILL_BOXES) || + !gen9_check_dst_format(format) || + unaligned(dst_bo, PICT_FORMAT_BPP(format))) { + uint8_t alu = GXinvalid; + + if (op <= PictOpSrc) { + pixel = 0; + if (op == PictOpClear) + alu = GXclear; + else if (sna_get_pixel_from_rgba(&pixel, + color->red, + color->green, + color->blue, + color->alpha, + format)) + alu = GXcopy; + } + + if (alu != GXinvalid && + sna_blt_fill_boxes(sna, alu, + dst_bo, dst->bitsPerPixel, + pixel, box, n)) + return true; + + if (!gen9_check_dst_format(format)) + return false; + } + + if (op == PictOpClear) { + pixel = 0; + op = PictOpSrc; + } else if (!sna_get_pixel_from_rgba(&pixel, + color->red, + color->green, + color->blue, + color->alpha, + PICT_a8r8g8b8)) + return false; + + DBG(("%s(%08x x %d [(%d, %d), (%d, %d) ...])\n", + __FUNCTION__, pixel, n, + box[0].x1, box[0].y1, box[0].x2, box[0].y2)); + + tmp.dst.pixmap = (PixmapPtr)dst; + tmp.dst.width = dst->width; + tmp.dst.height = dst->height; + tmp.dst.format = format; + tmp.dst.bo = dst_bo; + tmp.dst.x = tmp.dst.y = 0; + tmp.damage = NULL; + + sna_render_composite_redirect_init(&tmp); + if (too_large(dst->width, dst->height)) { + BoxRec extents; + + boxes_extents(box, n, &extents); + if (!sna_render_composite_redirect(sna, &tmp, + extents.x1, extents.y1, + extents.x2 - extents.x1, + extents.y2 - extents.y1, + n > 1)) + return sna_tiling_fill_boxes(sna, op, format, color, + dst, dst_bo, box, n); + } + + tmp.src.bo = sna_render_get_solid(sna, pixel); + tmp.mask.bo = NULL; + + tmp.floats_per_vertex = 2; + tmp.floats_per_rect = 6; + tmp.need_magic_ca_pass = false; + + tmp.u.gen9.flags = FILL_FLAGS(op, format); + + kgem_set_mode(&sna->kgem, KGEM_RENDER, dst_bo); + if (!kgem_check_bo(&sna->kgem, dst_bo, NULL)) { + kgem_submit(&sna->kgem); + if (!kgem_check_bo(&sna->kgem, dst_bo, NULL)) { + kgem_bo_destroy(&sna->kgem, tmp.src.bo); + tmp.src.bo = NULL; + + if (tmp.redirect.real_bo) { + kgem_bo_destroy(&sna->kgem, tmp.dst.bo); + tmp.redirect.real_bo = NULL; + } + + return false; + } + _kgem_set_mode(&sna->kgem, KGEM_RENDER); + } + + gen9_align_vertex(sna, &tmp); + gen9_emit_fill_state(sna, &tmp); + + do { + int n_this_time; + int16_t *v; + + n_this_time = gen9_get_rectangles(sna, &tmp, n, + gen9_emit_fill_state); + n -= n_this_time; + + v = (int16_t *)(sna->render.vertices + sna->render.vertex_used); + sna->render.vertex_used += 6 * n_this_time; + assert(sna->render.vertex_used <= sna->render.vertex_size); + do { + DBG((" (%d, %d), (%d, %d)\n", + box->x1, box->y1, box->x2, box->y2)); + + v[0] = box->x2; + v[5] = v[1] = box->y2; + v[8] = v[4] = box->x1; + v[9] = box->y1; + v[2] = v[3] = v[7] = 1; + v[6] = v[10] = v[11] = 0; + v += 12; box++; + } while (--n_this_time); + } while (n); + + gen8_vertex_flush(sna); + kgem_bo_destroy(&sna->kgem, tmp.src.bo); + sna_render_composite_redirect_done(sna, &tmp); + return true; +} + +static void +gen9_render_fill_op_blt(struct sna *sna, + const struct sna_fill_op *op, + int16_t x, int16_t y, int16_t w, int16_t h) +{ + int16_t *v; + + DBG(("%s: (%d, %d)x(%d, %d)\n", __FUNCTION__, x, y, w, h)); + + gen9_get_rectangles(sna, &op->base, 1, gen9_emit_fill_state); + + v = (int16_t *)&sna->render.vertices[sna->render.vertex_used]; + sna->render.vertex_used += 6; + assert(sna->render.vertex_used <= sna->render.vertex_size); + + v[0] = x+w; + v[4] = v[8] = x; + v[1] = v[5] = y+h; + v[9] = y; + + v[2] = v[3] = v[7] = 1; + v[6] = v[10] = v[11] = 0; +} + +fastcall static void +gen9_render_fill_op_box(struct sna *sna, + const struct sna_fill_op *op, + const BoxRec *box) +{ + int16_t *v; + + DBG(("%s: (%d, %d),(%d, %d)\n", __FUNCTION__, + box->x1, box->y1, box->x2, box->y2)); + + gen9_get_rectangles(sna, &op->base, 1, gen9_emit_fill_state); + + v = (int16_t *)&sna->render.vertices[sna->render.vertex_used]; + sna->render.vertex_used += 6; + assert(sna->render.vertex_used <= sna->render.vertex_size); + + v[0] = box->x2; + v[8] = v[4] = box->x1; + v[5] = v[1] = box->y2; + v[9] = box->y1; + + v[7] = v[2] = v[3] = 1; + v[6] = v[10] = v[11] = 0; +} + +fastcall static void +gen9_render_fill_op_boxes(struct sna *sna, + const struct sna_fill_op *op, + const BoxRec *box, + int nbox) +{ + DBG(("%s: (%d, %d),(%d, %d)... x %d\n", __FUNCTION__, + box->x1, box->y1, box->x2, box->y2, nbox)); + + do { + int nbox_this_time; + int16_t *v; + + nbox_this_time = gen9_get_rectangles(sna, &op->base, nbox, + gen9_emit_fill_state); + nbox -= nbox_this_time; + + v = (int16_t *)&sna->render.vertices[sna->render.vertex_used]; + sna->render.vertex_used += 6 * nbox_this_time; + assert(sna->render.vertex_used <= sna->render.vertex_size); + + do { + v[0] = box->x2; + v[8] = v[4] = box->x1; + v[5] = v[1] = box->y2; + v[9] = box->y1; + v[7] = v[2] = v[3] = 1; + v[6] = v[10] = v[11] = 0; + box++; v += 12; + } while (--nbox_this_time); + } while (nbox); +} + +static void +gen9_render_fill_op_done(struct sna *sna, const struct sna_fill_op *op) +{ + if (sna->render.vertex_offset) + gen8_vertex_flush(sna); + kgem_bo_destroy(&sna->kgem, op->base.src.bo); +} + +static bool +gen9_render_fill(struct sna *sna, uint8_t alu, + PixmapPtr dst, struct kgem_bo *dst_bo, + uint32_t color, unsigned flags, + struct sna_fill_op *op) +{ + DBG(("%s: (alu=%d, color=%x)\n", __FUNCTION__, alu, color)); + + if (prefer_blt_fill(sna, dst_bo, flags) && + sna_blt_fill(sna, alu, + dst_bo, dst->drawable.bitsPerPixel, + color, + op)) + return true; + + if (!(alu == GXcopy || alu == GXclear) || + too_large(dst->drawable.width, dst->drawable.height) || + unaligned(dst_bo, dst->drawable.bitsPerPixel)) + return sna_blt_fill(sna, alu, + dst_bo, dst->drawable.bitsPerPixel, + color, + op); + + if (alu == GXclear) + color = 0; + + op->base.dst.pixmap = dst; + op->base.dst.width = dst->drawable.width; + op->base.dst.height = dst->drawable.height; + op->base.dst.format = sna_format_for_depth(dst->drawable.depth); + op->base.dst.bo = dst_bo; + op->base.dst.x = op->base.dst.y = 0; + + op->base.src.bo = + sna_render_get_solid(sna, + sna_rgba_for_color(color, + dst->drawable.depth)); + op->base.mask.bo = NULL; + + op->base.need_magic_ca_pass = false; + op->base.floats_per_vertex = 2; + op->base.floats_per_rect = 6; + + op->base.u.gen9.flags = FILL_FLAGS_NOBLEND; + + kgem_set_mode(&sna->kgem, KGEM_RENDER, dst_bo); + if (!kgem_check_bo(&sna->kgem, dst_bo, NULL)) { + kgem_submit(&sna->kgem); + if (!kgem_check_bo(&sna->kgem, dst_bo, NULL)) { + kgem_bo_destroy(&sna->kgem, op->base.src.bo); + return false; + } + + _kgem_set_mode(&sna->kgem, KGEM_RENDER); + } + + gen9_align_vertex(sna, &op->base); + gen9_emit_fill_state(sna, &op->base); + + op->blt = gen9_render_fill_op_blt; + op->box = gen9_render_fill_op_box; + op->boxes = gen9_render_fill_op_boxes; + op->points = NULL; + op->done = gen9_render_fill_op_done; + return true; +} + +static bool +gen9_render_fill_one_try_blt(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo, + uint32_t color, + int16_t x1, int16_t y1, int16_t x2, int16_t y2, + uint8_t alu) +{ + BoxRec box; + + box.x1 = x1; + box.y1 = y1; + box.x2 = x2; + box.y2 = y2; + + return sna_blt_fill_boxes(sna, alu, + bo, dst->drawable.bitsPerPixel, + color, &box, 1); +} + +static bool +gen9_render_fill_one(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo, + uint32_t color, + int16_t x1, int16_t y1, + int16_t x2, int16_t y2, + uint8_t alu) +{ + struct sna_composite_op tmp; + int16_t *v; + + /* Prefer to use the BLT if already engaged */ + if (prefer_blt_fill(sna, bo, FILL_BOXES) && + gen9_render_fill_one_try_blt(sna, dst, bo, color, + x1, y1, x2, y2, alu)) + return true; + + /* Must use the BLT if we can't RENDER... */ + if (!(alu == GXcopy || alu == GXclear) || + too_large(dst->drawable.width, dst->drawable.height) || + unaligned(bo, dst->drawable.bitsPerPixel)) + return gen9_render_fill_one_try_blt(sna, dst, bo, color, + x1, y1, x2, y2, alu); + + if (alu == GXclear) + color = 0; + + tmp.dst.pixmap = dst; + tmp.dst.width = dst->drawable.width; + tmp.dst.height = dst->drawable.height; + tmp.dst.format = sna_format_for_depth(dst->drawable.depth); + tmp.dst.bo = bo; + tmp.dst.x = tmp.dst.y = 0; + + tmp.src.bo = + sna_render_get_solid(sna, + sna_rgba_for_color(color, + dst->drawable.depth)); + tmp.mask.bo = NULL; + + tmp.floats_per_vertex = 2; + tmp.floats_per_rect = 6; + tmp.need_magic_ca_pass = false; + + tmp.u.gen9.flags = FILL_FLAGS_NOBLEND; + + kgem_set_mode(&sna->kgem, KGEM_RENDER, bo); + if (!kgem_check_bo(&sna->kgem, bo, NULL)) { + kgem_submit(&sna->kgem); + if (kgem_check_bo(&sna->kgem, bo, NULL)) { + kgem_bo_destroy(&sna->kgem, tmp.src.bo); + return false; + } + _kgem_set_mode(&sna->kgem, KGEM_RENDER); + } + + gen9_align_vertex(sna, &tmp); + gen9_emit_fill_state(sna, &tmp); + + gen9_get_rectangles(sna, &tmp, 1, gen9_emit_fill_state); + + DBG((" (%d, %d), (%d, %d)\n", x1, y1, x2, y2)); + + v = (int16_t *)&sna->render.vertices[sna->render.vertex_used]; + sna->render.vertex_used += 6; + assert(sna->render.vertex_used <= sna->render.vertex_size); + + v[0] = x2; + v[8] = v[4] = x1; + v[5] = v[1] = y2; + v[9] = y1; + v[7] = v[2] = v[3] = 1; + v[6] = v[10] = v[11] = 0; + + gen8_vertex_flush(sna); + kgem_bo_destroy(&sna->kgem, tmp.src.bo); + + return true; +} + +static bool +gen9_render_clear_try_blt(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo) +{ + BoxRec box; + + box.x1 = 0; + box.y1 = 0; + box.x2 = dst->drawable.width; + box.y2 = dst->drawable.height; + + return sna_blt_fill_boxes(sna, GXclear, + bo, dst->drawable.bitsPerPixel, + 0, &box, 1); +} + +static bool +gen9_render_clear(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo) +{ + struct sna_composite_op tmp; + int16_t *v; + + DBG(("%s: %dx%d\n", + __FUNCTION__, + dst->drawable.width, + dst->drawable.height)); + + /* Prefer to use the BLT if already engaged */ + if (sna->kgem.mode == KGEM_BLT && + gen9_render_clear_try_blt(sna, dst, bo)) + return true; + + /* Must use the BLT if we can't RENDER... */ + if (too_large(dst->drawable.width, dst->drawable.height) || + unaligned(bo, dst->drawable.bitsPerPixel)) + return gen9_render_clear_try_blt(sna, dst, bo); + + tmp.dst.pixmap = dst; + tmp.dst.width = dst->drawable.width; + tmp.dst.height = dst->drawable.height; + tmp.dst.format = sna_format_for_depth(dst->drawable.depth); + tmp.dst.bo = bo; + tmp.dst.x = tmp.dst.y = 0; + + tmp.src.bo = sna_render_get_solid(sna, 0); + tmp.mask.bo = NULL; + + tmp.floats_per_vertex = 2; + tmp.floats_per_rect = 6; + tmp.need_magic_ca_pass = false; + + tmp.u.gen9.flags = FILL_FLAGS_NOBLEND; + + kgem_set_mode(&sna->kgem, KGEM_RENDER, bo); + if (!kgem_check_bo(&sna->kgem, bo, NULL)) { + kgem_submit(&sna->kgem); + if (!kgem_check_bo(&sna->kgem, bo, NULL)) { + kgem_bo_destroy(&sna->kgem, tmp.src.bo); + return false; + } + _kgem_set_mode(&sna->kgem, KGEM_RENDER); + } + + gen9_align_vertex(sna, &tmp); + gen9_emit_fill_state(sna, &tmp); + + gen9_get_rectangles(sna, &tmp, 1, gen9_emit_fill_state); + + v = (int16_t *)&sna->render.vertices[sna->render.vertex_used]; + sna->render.vertex_used += 6; + assert(sna->render.vertex_used <= sna->render.vertex_size); + + v[0] = dst->drawable.width; + v[5] = v[1] = dst->drawable.height; + v[8] = v[4] = 0; + v[9] = 0; + + v[7] = v[2] = v[3] = 1; + v[6] = v[10] = v[11] = 0; + + gen8_vertex_flush(sna); + kgem_bo_destroy(&sna->kgem, tmp.src.bo); + + return true; +} + +#if !NO_VIDEO +static uint32_t gen9_bind_video_source(struct sna *sna, + struct kgem_bo *bo, + uint32_t delta, + int width, + int height, + int pitch, + uint32_t format) +{ + uint32_t *ss; + int offset; + + offset = sna->kgem.surface -= SURFACE_DW; + ss = sna->kgem.batch + offset; + ss[0] = (SURFACE_2D << SURFACE_TYPE_SHIFT | + gen9_tiling_bits(bo->tiling) | + format << SURFACE_FORMAT_SHIFT | + SURFACE_VALIGN_4 | SURFACE_HALIGN_4); + ss[1] = 0; + ss[2] = ((width - 1) << SURFACE_WIDTH_SHIFT | + (height - 1) << SURFACE_HEIGHT_SHIFT); + ss[3] = (pitch - 1) << SURFACE_PITCH_SHIFT; + ss[4] = 0; + ss[5] = 0; + ss[6] = 0; + ss[7] = SURFACE_SWIZZLE(RED, GREEN, BLUE, ALPHA); + *(uint64_t *)(ss+8) = + kgem_add_reloc64(&sna->kgem, offset + 8, bo, + I915_GEM_DOMAIN_SAMPLER << 16, + delta); + ss[10] = 0; + ss[11] = 0; + ss[12] = 0; + ss[13] = 0; + ss[14] = 0; + ss[15] = 0; + + DBG(("[%x] bind bo(handle=%d, addr=%d), format=%d, width=%d, height=%d, pitch=%d, tiling=%d -> sampler\n", + offset, bo->handle, ss[1], + format, width, height, bo->pitch, bo->tiling)); + + return offset * sizeof(uint32_t); +} + +static void gen9_emit_video_state(struct sna *sna, + const struct sna_composite_op *op) +{ + struct sna_video_frame *frame = op->priv; + uint32_t src_surf_format; + uint32_t src_surf_base[6]; + int src_width[6]; + int src_height[6]; + int src_pitch[6]; + uint32_t *binding_table; + uint16_t offset; + int n_src, n; + + /* XXX VeBox, bicubic */ + + gen9_get_batch(sna, op); + + src_surf_base[0] = 0; + src_surf_base[1] = 0; + src_surf_base[2] = frame->VBufOffset; + src_surf_base[3] = frame->VBufOffset; + src_surf_base[4] = frame->UBufOffset; + src_surf_base[5] = frame->UBufOffset; + + if (is_planar_fourcc(frame->id)) { + src_surf_format = SURFACEFORMAT_R8_UNORM; + src_width[1] = src_width[0] = frame->width; + src_height[1] = src_height[0] = frame->height; + src_pitch[1] = src_pitch[0] = frame->pitch[1]; + src_width[4] = src_width[5] = src_width[2] = src_width[3] = + frame->width / 2; + src_height[4] = src_height[5] = src_height[2] = src_height[3] = + frame->height / 2; + src_pitch[4] = src_pitch[5] = src_pitch[2] = src_pitch[3] = + frame->pitch[0]; + n_src = 6; + } else { + if (frame->id == FOURCC_RGB888) + src_surf_format = SURFACEFORMAT_B8G8R8X8_UNORM; + else if (frame->id == FOURCC_UYVY) + src_surf_format = SURFACEFORMAT_YCRCB_SWAPY; + else + src_surf_format = SURFACEFORMAT_YCRCB_NORMAL; + + src_width[0] = frame->width; + src_height[0] = frame->height; + src_pitch[0] = frame->pitch[0]; + n_src = 1; + } + + binding_table = gen9_composite_get_binding_table(sna, &offset); + + binding_table[0] = + gen9_bind_bo(sna, + op->dst.bo, op->dst.width, op->dst.height, + gen9_get_dest_format(op->dst.format), + true); + for (n = 0; n < n_src; n++) { + binding_table[1+n] = + gen9_bind_video_source(sna, + frame->bo, + src_surf_base[n], + src_width[n], + src_height[n], + src_pitch[n], + src_surf_format); + } + + gen9_emit_state(sna, op, offset); +} + +static unsigned select_video_kernel(const struct sna_video_frame *frame) +{ + switch (frame->id) { + case FOURCC_YV12: + case FOURCC_I420: + case FOURCC_XVMC: + return GEN9_WM_KERNEL_VIDEO_PLANAR; + + case FOURCC_RGB888: + case FOURCC_RGB565: + return GEN9_WM_KERNEL_VIDEO_RGB; + + default: + return GEN9_WM_KERNEL_VIDEO_PACKED; + } +} + +static bool +gen9_render_video(struct sna *sna, + struct sna_video *video, + struct sna_video_frame *frame, + RegionPtr dstRegion, + PixmapPtr pixmap) +{ + struct sna_composite_op tmp; + struct sna_pixmap *priv = sna_pixmap(pixmap); + int dst_width = dstRegion->extents.x2 - dstRegion->extents.x1; + int dst_height = dstRegion->extents.y2 - dstRegion->extents.y1; + int src_width = frame->src.x2 - frame->src.x1; + int src_height = frame->src.y2 - frame->src.y1; + float src_offset_x, src_offset_y; + float src_scale_x, src_scale_y; + unsigned filter; + const BoxRec *box; + int nbox; + + DBG(("%s: src=(%d, %d), dst=(%d, %d), %dx[(%d, %d), (%d, %d)...]\n", + __FUNCTION__, + src_width, src_height, dst_width, dst_height, + region_num_rects(dstRegion), + REGION_EXTENTS(NULL, dstRegion)->x1, + REGION_EXTENTS(NULL, dstRegion)->y1, + REGION_EXTENTS(NULL, dstRegion)->x2, + REGION_EXTENTS(NULL, dstRegion)->y2)); + + assert(priv->gpu_bo); + assert(!too_large(pixmap->drawable.width, pixmap->drawable.height)); + assert(!unaligned(priv->gpu_bo, pixmap->drawable.bitsPerPixel)); + + memset(&tmp, 0, sizeof(tmp)); + + tmp.dst.pixmap = pixmap; + tmp.dst.width = pixmap->drawable.width; + tmp.dst.height = pixmap->drawable.height; + tmp.dst.format = sna_render_format_for_depth(pixmap->drawable.depth); + tmp.dst.bo = priv->gpu_bo; + + tmp.src.bo = frame->bo; + tmp.mask.bo = NULL; + + tmp.floats_per_vertex = 3; + tmp.floats_per_rect = 9; + + DBG(("%s: scaling?=%d, planar?=%d [%x]\n", + __FUNCTION__, + src_width != dst_width || src_height != dst_height, + is_planar_fourcc(frame->id), frame->id)); + + if (src_width == dst_width && src_height == dst_height) + filter = SAMPLER_FILTER_NEAREST; + else + filter = SAMPLER_FILTER_BILINEAR; + + tmp.u.gen9.flags = + GEN9_SET_FLAGS(SAMPLER_OFFSET(filter, SAMPLER_EXTEND_PAD, + SAMPLER_FILTER_NEAREST, SAMPLER_EXTEND_NONE), + NO_BLEND, + select_video_kernel(frame), + 2); + tmp.priv = frame; + + kgem_set_mode(&sna->kgem, KGEM_RENDER, tmp.dst.bo); + if (!kgem_check_bo(&sna->kgem, tmp.dst.bo, frame->bo, NULL)) { + kgem_submit(&sna->kgem); + if (!kgem_check_bo(&sna->kgem, tmp.dst.bo, frame->bo, NULL)) + return false; + + _kgem_set_mode(&sna->kgem, KGEM_RENDER); + } + + gen9_align_vertex(sna, &tmp); + gen9_emit_video_state(sna, &tmp); + + DBG(("%s: src=(%d, %d)x(%d, %d); frame=(%dx%d), dst=(%dx%d)\n", + __FUNCTION__, + frame->src.x1, frame->src.y1, + src_width, src_height, + dst_width, dst_height, + frame->width, frame->height)); + + src_scale_x = (float)src_width / dst_width / frame->width; + src_offset_x = (float)frame->src.x1 / frame->width - dstRegion->extents.x1 * src_scale_x; + + src_scale_y = (float)src_height / dst_height / frame->height; + src_offset_y = (float)frame->src.y1 / frame->height - dstRegion->extents.y1 * src_scale_y; + + DBG(("%s: scale=(%f, %f), offset=(%f, %f)\n", + __FUNCTION__, + src_scale_x, src_scale_y, + src_offset_x, src_offset_y)); + + box = region_rects(dstRegion); + nbox = region_num_rects(dstRegion); + while (nbox--) { + DBG(("%s: dst=(%d, %d), (%d, %d) + (%d, %d); src=(%f, %f), (%f, %f)\n", + __FUNCTION__, + box->x1, box->y1, + box->x2, box->y2, + box->x1 * src_scale_x + src_offset_x, + box->y1 * src_scale_y + src_offset_y, + box->x2 * src_scale_x + src_offset_x, + box->y2 * src_scale_y + src_offset_y)); + + gen9_get_rectangles(sna, &tmp, 1, gen9_emit_video_state); + + OUT_VERTEX(box->x2, box->y2); + OUT_VERTEX_F(box->x2 * src_scale_x + src_offset_x); + OUT_VERTEX_F(box->y2 * src_scale_y + src_offset_y); + + OUT_VERTEX(box->x1, box->y2); + OUT_VERTEX_F(box->x1 * src_scale_x + src_offset_x); + OUT_VERTEX_F(box->y2 * src_scale_y + src_offset_y); + + OUT_VERTEX(box->x1, box->y1); + OUT_VERTEX_F(box->x1 * src_scale_x + src_offset_x); + OUT_VERTEX_F(box->y1 * src_scale_y + src_offset_y); + + box++; + } + gen8_vertex_flush(sna); + + if (!DAMAGE_IS_ALL(priv->gpu_damage)) + sna_damage_add(&priv->gpu_damage, dstRegion); + + return true; +} +#endif + +static void gen9_render_flush(struct sna *sna) +{ + gen8_vertex_close(sna); + + assert(sna->render.vb_id == 0); + assert(sna->render.vertex_offset == 0); +} + +static void gen9_render_reset(struct sna *sna) +{ + sna->render_state.gen9.emit_flush = false; + sna->render_state.gen9.needs_invariant = true; + sna->render_state.gen9.ve_id = 3 << 2; + sna->render_state.gen9.ve_dirty = false; + sna->render_state.gen9.last_primitive = -1; + + sna->render_state.gen9.num_sf_outputs = 0; + sna->render_state.gen9.samplers = -1; + sna->render_state.gen9.blend = -1; + sna->render_state.gen9.kernel = -1; + sna->render_state.gen9.drawrect_offset = -1; + sna->render_state.gen9.drawrect_limit = -1; + sna->render_state.gen9.surface_table = 0; + + if (sna->render.vbo && !kgem_bo_can_map(&sna->kgem, sna->render.vbo)) { + DBG(("%s: discarding unmappable vbo\n", __FUNCTION__)); + discard_vbo(sna); + } + + sna->render.vertex_offset = 0; + sna->render.nvertex_reloc = 0; + sna->render.vb_id = 0; +} + +static void gen9_render_fini(struct sna *sna) +{ + kgem_bo_destroy(&sna->kgem, sna->render_state.gen9.general_bo); +} + +static bool gen9_render_setup(struct sna *sna) +{ + struct gen9_render_state *state = &sna->render_state.gen9; + struct sna_static_stream general; + struct gen9_sampler_state *ss; + int i, j, k, l, m; + uint32_t devid; + + devid = intel_get_device_id(sna->dev); + if (devid & 0xf) + state->gt = GEN9_GT_BIAS + ((devid >> 4) & 0xf) + 1; + DBG(("%s: gt=%d\n", __FUNCTION__, state->gt)); + + state->info = &min_gt_info; + if (is_skl(sna)) + state->info = &skl_gt_info; + if (is_bxt(sna)) + state->info = &bxt_gt_info; + if (is_kbl(sna)) + state->info = &kbl_gt_info; + if (is_glk(sna)) + state->info = &glk_gt_info; + + sna_static_stream_init(&general); + + /* Zero pad the start. If you see an offset of 0x0 in the batchbuffer + * dumps, you know it points to zero. + */ + null_create(&general); + + for (m = 0; m < ARRAY_SIZE(wm_kernels); m++) { + if (wm_kernels[m].size) { + state->wm_kernel[m][1] = + sna_static_stream_add(&general, + wm_kernels[m].data, + wm_kernels[m].size, + 64); + } else { + if (USE_8_PIXEL_DISPATCH) { + state->wm_kernel[m][0] = + sna_static_stream_compile_wm(sna, &general, + wm_kernels[m].data, 8); + } + + if (USE_16_PIXEL_DISPATCH) { + state->wm_kernel[m][1] = + sna_static_stream_compile_wm(sna, &general, + wm_kernels[m].data, 16); + } + + if (USE_32_PIXEL_DISPATCH) { + state->wm_kernel[m][2] = + sna_static_stream_compile_wm(sna, &general, + wm_kernels[m].data, 32); + } + } + assert(state->wm_kernel[m][0]|state->wm_kernel[m][1]|state->wm_kernel[m][2]); + } + + COMPILE_TIME_ASSERT(SAMPLER_OFFSET(FILTER_COUNT, EXTEND_COUNT, FILTER_COUNT, EXTEND_COUNT) <= 0x7ff); + ss = sna_static_stream_map(&general, + 2 * sizeof(*ss) * + (2 + + FILTER_COUNT * EXTEND_COUNT * + FILTER_COUNT * EXTEND_COUNT), + 32); + state->wm_state = sna_static_stream_offsetof(&general, ss); + sampler_copy_init(ss); ss += 2; + sampler_fill_init(ss); ss += 2; + for (i = 0; i < FILTER_COUNT; i++) { + for (j = 0; j < EXTEND_COUNT; j++) { + for (k = 0; k < FILTER_COUNT; k++) { + for (l = 0; l < EXTEND_COUNT; l++) { + sampler_state_init(ss++, i, j); + sampler_state_init(ss++, k, l); + } + } + } + } + + state->cc_blend = gen9_create_blend_state(&general); + + state->general_bo = sna_static_stream_fini(sna, &general); + return state->general_bo != NULL; +} + +const char *gen9_render_init(struct sna *sna, const char *backend) +{ + if (!gen9_render_setup(sna)) + return backend; + + sna->kgem.context_switch = gen6_render_context_switch; + sna->kgem.retire = gen6_render_retire; + sna->kgem.expire = gen4_render_expire; + +#if !NO_COMPOSITE + sna->render.composite = gen9_render_composite; + sna->render.prefer_gpu |= PREFER_GPU_RENDER; +#endif +#if !NO_COMPOSITE_SPANS + sna->render.check_composite_spans = gen9_check_composite_spans; + sna->render.composite_spans = gen9_render_composite_spans; + sna->render.prefer_gpu |= PREFER_GPU_SPANS; +#endif +#if !NO_VIDEO + sna->render.video = gen9_render_video; +#endif + +#if !NO_COPY_BOXES + sna->render.copy_boxes = gen9_render_copy_boxes; +#endif +#if !NO_COPY + sna->render.copy = gen9_render_copy; +#endif + +#if !NO_FILL_BOXES + sna->render.fill_boxes = gen9_render_fill_boxes; +#endif +#if !NO_FILL + sna->render.fill = gen9_render_fill; +#endif +#if !NO_FILL_ONE + sna->render.fill_one = gen9_render_fill_one; +#endif +#if !NO_FILL_CLEAR + sna->render.clear = gen9_render_clear; +#endif + + sna->render.flush = gen9_render_flush; + sna->render.reset = gen9_render_reset; + sna->render.fini = gen9_render_fini; + + sna->render.max_3d_size = GEN9_MAX_SIZE; + sna->render.max_3d_pitch = 1 << 18; + return sna->render_state.gen9.info->name; +} diff --git a/src/sna/gen9_render.h b/src/sna/gen9_render.h new file mode 100644 index 00000000..e3cb3f93 --- /dev/null +++ b/src/sna/gen9_render.h @@ -0,0 +1,1130 @@ +#ifndef GEN9_RENDER_H +#define GEN9_RENDER_H + +#define INTEL_MASK(high, low) (((1 << ((high) - (low) + 1)) - 1) << (low)) + +#define GEN9_3D(pipeline,op,sub) \ + ((3 << 29) | ((pipeline) << 27) | ((op) << 24) | ((sub) << 16)) + +#define GEN9_STATE_BASE_ADDRESS GEN9_3D(0, 1, 1) +# define BASE_ADDRESS_MODIFY (1 << 0) + +#define GEN9_STATE_SIP GEN9_3D(0, 1, 2) + +#define GEN9_3DSTATE_VF_STATISTICS GEN9_3D(1, 0, 0xb) +#define GEN9_PIPELINE_SELECT GEN9_3D(1, 1, 4) +# define PIPELINE_SELECT_3D 0 +# define PIPELINE_SELECT_MEDIA 1 +#define PIPELINE_SELECTION_MASK (3 << 8) + +#define GEN9_MEDIA_STATE_POINTERS GEN9_3D(2, 0, 0) +#define GEN9_MEDIA_OBJECT GEN9_3D(2, 1, 0) + +#define GEN9_3DSTATE_CLEAR_PARAMS GEN9_3D(3, 0, 0x04) +#define GEN9_3DSTATE_DEPTH_BUFFER GEN9_3D(3, 0, 0x05) +# define DEPTH_BUFFER_TYPE_SHIFT 29 +# define DEPTH_BUFFER_FORMAT_SHIFT 18 + +#define GEN9_3DSTATE_STENCIL_BUFFER GEN9_3D(3, 0, 0x06) +#define GEN9_3DSTATE_HIER_DEPTH_BUFFER GEN9_3D(3, 0, 0x07) +#define GEN9_3DSTATE_VERTEX_BUFFERS GEN9_3D(3, 0, 0x08) +# define VB_INDEX_SHIFT 26 +# define VB_MODIFY_ENABLE (1 << 14) +#define GEN9_3DSTATE_VERTEX_ELEMENTS GEN9_3D(3, 0, 0x09) +# define VE_INDEX_SHIFT 26 +# define VE_VALID (1 << 25) +# define VE_FORMAT_SHIFT 16 +# define VE_OFFSET_SHIFT 0 +# define VE_COMPONENT_0_SHIFT 28 +# define VE_COMPONENT_1_SHIFT 24 +# define VE_COMPONENT_2_SHIFT 20 +# define VE_COMPONENT_3_SHIFT 16 +#define GEN9_3DSTATE_INDEX_BUFFER GEN9_3D(3, 0, 0x0a) +#define GEN9_3DSTATE_VF GEN9_3D(3, 0, 0x0c) + +#define GEN9_3DSTATE_MULTISAMPLE GEN9_3D(3, 0, 0x0d) +/* DW1 */ +# define MULTISAMPLE_PIXEL_LOCATION_CENTER (0 << 4) +# define MULTISAMPLE_PIXEL_LOCATION_UPPER_LEFT (1 << 4) +# define MULTISAMPLE_NUMSAMPLES_1 (0 << 1) +# define MULTISAMPLE_NUMSAMPLES_4 (2 << 1) +# define MULTISAMPLE_NUMSAMPLES_8 (3 << 1) + +#define GEN9_3DSTATE_CC_STATE_POINTERS GEN9_3D(3, 0, 0x0e) +#define GEN9_3DSTATE_SCISSOR_STATE_POINTERS GEN9_3D(3, 0, 0x0f) + +#define GEN9_3DSTATE_VS GEN9_3D(3, 0, 0x10) +#define GEN9_3DSTATE_GS GEN9_3D(3, 0, 0x11) +#define GEN9_3DSTATE_CLIP GEN9_3D(3, 0, 0x12) +#define GEN9_3DSTATE_SF GEN9_3D(3, 0, 0x13) +# define SF_TRI_PROVOKE_SHIFT 29 +# define SF_LINE_PROVOKE_SHIFT 27 +# define SF_FAN_PROVOKE_SHIFT 25 + +#define GEN9_3DSTATE_WM GEN9_3D(3, 0, 0x14) +/* DW1 */ +# define WM_STATISTICS_ENABLE (1 << 31) +# define WM_DEPTH_CLEAR (1 << 30) +# define WM_DEPTH_RESOLVE (1 << 28) +# define WM_HIERARCHICAL_DEPTH_RESOLVE (1 << 27) +# define WM_KILL_ENABLE (1 << 25) +# define WM_POSITION_ZW_PIXEL (0 << 17) +# define WM_POSITION_ZW_CENTROID (2 << 17) +# define WM_POSITION_ZW_SAMPLE (3 << 17) +# define WM_NONPERSPECTIVE_SAMPLE_BARYCENTRIC (1 << 16) +# define WM_NONPERSPECTIVE_CENTROID_BARYCENTRIC (1 << 15) +# define WM_NONPERSPECTIVE_PIXEL_BARYCENTRIC (1 << 14) +# define WM_PERSPECTIVE_SAMPLE_BARYCENTRIC (1 << 13) +# define WM_PERSPECTIVE_CENTROID_BARYCENTRIC (1 << 12) +# define WM_PERSPECTIVE_PIXEL_BARYCENTRIC (1 << 11) +# define WM_LINE_END_CAP_AA_WIDTH_0_5 (0 << 8) +# define WM_LINE_END_CAP_AA_WIDTH_1_0 (1 << 8) +# define WM_LINE_END_CAP_AA_WIDTH_2_0 (2 << 8) +# define WM_LINE_END_CAP_AA_WIDTH_4_0 (3 << 8) +# define WM_LINE_AA_WIDTH_0_5 (0 << 6) +# define WM_LINE_AA_WIDTH_1_0 (1 << 6) +# define WM_LINE_AA_WIDTH_2_0 (2 << 6) +# define WM_LINE_AA_WIDTH_4_0 (3 << 6) +# define WM_POLYGON_STIPPLE_ENABLE (1 << 4) +# define WM_LINE_STIPPLE_ENABLE (1 << 3) +# define WM_POINT_RASTRULE_UPPER_RIGHT (1 << 2) +# define WM_MSRAST_OFF_PIXEL (0 << 0) +# define WM_MSRAST_OFF_PATTERN (1 << 0) +# define WM_MSRAST_ON_PIXEL (2 << 0) +# define WM_MSRAST_ON_PATTERN (3 << 0) + +#define GEN9_3DSTATE_CONSTANT_VS GEN9_3D(3, 0, 0x15) +#define GEN9_3DSTATE_CONSTANT_GS GEN9_3D(3, 0, 0x16) +#define GEN9_3DSTATE_CONSTANT_PS GEN9_3D(3, 0, 0x17) + +#define GEN9_3DSTATE_SAMPLE_MASK GEN9_3D(3, 0, 0x18) + +#define GEN9_3DSTATE_CONSTANT_HS GEN9_3D(3, 0, 0x19) +#define GEN9_3DSTATE_CONSTANT_DS GEN9_3D(3, 0, 0x1a) + +#define GEN9_3DSTATE_HS GEN9_3D(3, 0, 0x1b) +#define GEN9_3DSTATE_TE GEN9_3D(3, 0, 0x1c) +#define GEN9_3DSTATE_DS GEN9_3D(3, 0, 0x1d) +#define GEN9_3DSTATE_STREAMOUT GEN9_3D(3, 0, 0x1e) + +#define GEN9_3DSTATE_SBE GEN9_3D(3, 0, 0x1f) +/* DW1 */ +# define SBE_FORCE_VERTEX_URB_READ_LENGTH (1<<29) +# define SBE_FORCE_VERTEX_URB_READ_OFFSET (1<<28) +# define SBE_NUM_OUTPUTS_SHIFT 22 +# define SBE_SWIZZLE_ENABLE (1 << 21) +# define SBE_POINT_SPRITE_LOWERLEFT (1 << 20) +# define SBE_URB_ENTRY_READ_LENGTH_SHIFT 11 +# define SBE_URB_ENTRY_READ_OFFSET_SHIFT 5 +#define SBE_ACTIVE_COMPONENT_NONE 0 +#define SBE_ACTIVE_COMPONENT_XY 1 +#define SBE_ACTIVE_COMPONENT_XYZ 2 +#define SBE_ACTIVE_COMPONENT_XYZW 3 + + +#define GEN9_3DSTATE_PS GEN9_3D(3, 0, 0x20) +/* DW1:DW2 kernel pointer */ +/* DW3 */ +# define PS_SPF_MODE (1 << 31) +# define PS_VECTOR_MASK_ENABLE (1 << 30) +# define PS_SAMPLER_COUNT_SHIFT 27 +# define PS_BINDING_TABLE_ENTRY_COUNT_SHIFT 18 +# define PS_FLOATING_POINT_MODE_IEEE_754 (0 << 16) +# define PS_FLOATING_POINT_MODE_ALT (1 << 16) +/* DW4:DW5: scratch space */ +/* DW6 */ +# define PS_MAX_THREADS_SHIFT 23 +# define PS_MAX_THREADS (63 << PS_MAX_THREADS_SHIFT) +# define PS_PUSH_CONSTANT_ENABLE (1 << 11) +# define PS_RENDER_TARGET_CLEAR (1 << 8) +# define PS_RENDER_TARGET_RESOLVE (1 << 6) +# define PS_POSOFFSET_NONE (0 << 3) +# define PS_POSOFFSET_CENTROID (2 << 3) +# define PS_POSOFFSET_SAMPLE (3 << 3) +# define PS_32_DISPATCH_ENABLE (1 << 2) +# define PS_16_DISPATCH_ENABLE (1 << 1) +# define PS_8_DISPATCH_ENABLE (1 << 0) +/* DW7 */ +# define PS_DISPATCH_START_GRF_SHIFT_0 16 +# define PS_DISPATCH_START_GRF_SHIFT_1 8 +# define PS_DISPATCH_START_GRF_SHIFT_2 0 +/* DW8:D9: kernel 1 pointer */ +/* DW10:D11: kernel 2 pointer */ + +#define GEN9_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP GEN9_3D(3, 0, 0x21) +#define GEN9_3DSTATE_VIEWPORT_STATE_POINTERS_CC GEN9_3D(3, 0, 0x23) + +#define GEN9_3DSTATE_BLEND_STATE_POINTERS GEN9_3D(3, 0, 0x24) + +#define GEN9_3DSTATE_BINDING_TABLE_POINTERS_VS GEN9_3D(3, 0, 0x26) +#define GEN9_3DSTATE_BINDING_TABLE_POINTERS_HS GEN9_3D(3, 0, 0x27) +#define GEN9_3DSTATE_BINDING_TABLE_POINTERS_DS GEN9_3D(3, 0, 0x28) +#define GEN9_3DSTATE_BINDING_TABLE_POINTERS_GS GEN9_3D(3, 0, 0x29) +#define GEN9_3DSTATE_BINDING_TABLE_POINTERS_PS GEN9_3D(3, 0, 0x2a) + +#define GEN9_3DSTATE_SAMPLER_STATE_POINTERS_VS GEN9_3D(3, 0, 0x2b) +#define GEN9_3DSTATE_SAMPLER_STATE_POINTERS_HS GEN9_3D(3, 0, 0x2c) +#define GEN9_3DSTATE_SAMPLER_STATE_POINTERS_DS GEN9_3D(3, 0, 0x2d) +#define GEN9_3DSTATE_SAMPLER_STATE_POINTERS_GS GEN9_3D(3, 0, 0x2e) +#define GEN9_3DSTATE_SAMPLER_STATE_POINTERS_PS GEN9_3D(3, 0, 0x2f) + +#define GEN9_3DSTATE_URB_VS GEN9_3D(3, 0, 0x30) +#define GEN9_3DSTATE_URB_HS GEN9_3D(3, 0, 0x31) +#define GEN9_3DSTATE_URB_DS GEN9_3D(3, 0, 0x32) +#define GEN9_3DSTATE_URB_GS GEN9_3D(3, 0, 0x33) +/* DW1 */ +# define URB_ENTRY_NUMBER_SHIFT 0 +# define URB_ENTRY_SIZE_SHIFT 16 +# define URB_STARTING_ADDRESS_SHIFT 25 + +#define GEN9_3DSTATE_GATHER_CONSTANT_VS GEN9_3D(3, 0, 0x34) +#define GEN9_3DSTATE_GATHER_CONSTANT_GS GEN9_3D(3, 0, 0x35) +#define GEN9_3DSTATE_GATHER_CONSTANT_HS GEN9_3D(3, 0, 0x36) +#define GEN9_3DSTATE_GATHER_CONSTANT_DS GEN9_3D(3, 0, 0x37) +#define GEN9_3DSTATE_GATHER_CONSTANT_PS GEN9_3D(3, 0, 0x38) + +#define GEN9_3DSTATE_DX9_CONSTANTF_VS GEN9_3D(3, 0, 0x39) +#define GEN9_3DSTATE_DX9_CONSTANTF_PS GEN9_3D(3, 0, 0x3a) +#define GEN9_3DSTATE_DX9_CONSTANTI_VS GEN9_3D(3, 0, 0x3b) +#define GEN9_3DSTATE_DX9_CONSTANTI_PS GEN9_3D(3, 0, 0x3c) +#define GEN9_3DSTATE_DX9_CONSTANTB_VS GEN9_3D(3, 0, 0x3d) +#define GEN9_3DSTATE_DX9_CONSTANTB_PS GEN9_3D(3, 0, 0x3e) +#define GEN9_3DSTATE_DX9_LOCAL_VALID_VS GEN9_3D(3, 0, 0x3f) +#define GEN9_3DSTATE_DX9_LOCAL_VALID_PS GEN9_3D(3, 0, 0x40) +#define GEN9_3DSTATE_DX9_GENERATE_ACTIVE_VS GEN9_3D(3, 0, 0x41) +#define GEN9_3DSTATE_DX9_GENERATE_ACTIVE_PS GEN9_3D(3, 0, 0x42) + +#define GEN9_3DSTATE_BINDING_TABLE_EDIT_VS GEN9_3D(3, 0, 0x43) +#define GEN9_3DSTATE_BINDING_TABLE_EDIT_GS GEN9_3D(3, 0, 0x44) +#define GEN9_3DSTATE_BINDING_TABLE_EDIT_HS GEN9_3D(3, 0, 0x45) +#define GEN9_3DSTATE_BINDING_TABLE_EDIT_DS GEN9_3D(3, 0, 0x46) +#define GEN9_3DSTATE_BINDING_TABLE_EDIT_PS GEN9_3D(3, 0, 0x47) + +#define GEN9_3DSTATE_VF_INSTANCING GEN9_3D(3, 0, 0x49) +#define GEN9_3DSTATE_VF_SGVS GEN9_3D(3, 0, 0x4a) +# define SGVS_ENABLE_INSTANCE_ID (1 << 31) +# define SGVS_INSTANCE_ID_COMPONENT_SHIFT 29 +# define SGVS_INSTANCE_ID_ELEMENT_OFFSET_SHIFT 16 +# define SGVS_ENABLE_VERTEX_ID (1 << 15) +# define SGVS_VERTEX_ID_COMPONENT_SHIFT 13 +# define SGVS_VERTEX_ID_ELEMENT_OFFSET_SHIFT 0 +#define GEN9_3DSTATE_VF_TOPOLOGY GEN9_3D(3, 0, 0x4b) +# define POINTLIST 0x01 +# define LINELIST 0x02 +# define LINESTRIP 0x03 +# define TRILIST 0x04 +# define TRISTRIP 0x05 +# define TRIFAN 0x06 +# define QUADLIST 0x07 +# define QUADSTRIP 0x08 +# define LINELIST_ADJ 0x09 +# define LINESTRIP_ADJ 0x0A +# define TRILIST_ADJ 0x0B +# define TRISTRIP_ADJ 0x0C +# define TRISTRIP_REVERSE 0x0D +# define POLYGON 0x0E +# define RECTLIST 0x0F +# define LINELOOP 0x10 +# define POINTLIST_BF 0x11 +# define LINESTRIP_CONT 0x12 +# define LINESTRIP_BF 0x13 +# define LINESTRIP_CONT_BF 0x14 +# define TRIFAN_NOSTIPPLE 0x15 + +#define GEN9_3DSTATE_WM_CHROMAKEY GEN9_3D(3, 0, 0x4c) + +#define GEN9_3DSTATE_PS_BLEND GEN9_3D(3, 0, 0x4d) +# define PS_BLEND_ALPHA_TO_COVERAGE_ENABLE (1 << 31) +# define PS_BLEND_HAS_WRITEABLE_RT (1 << 30) +# define PS_BLEND_COLOR_BLEND_ENABLE (1 << 29) +# define PS_BLEND_SRC_ALPHA_SHIFT 24 +# define PS_BLEND_DST_ALPHA_SHIFT 19 +# define PS_BLEND_SRC_SHIFT 14 +# define PS_BLEND_DST_SHIFT 9 +# define PS_BLEND_ALPHA_TEST_ENABLE (1 << 8) +# define PS_BLEND_INDEPENDENT_ALPHA_BLEND_ENABLE (1 << 7) + +#define GEN9_3DSTATE_WM_DEPTH_STENCIL GEN9_3D(3, 0, 0x4e) +/* DW1 */ +# define WM_DS_STENCIL_TEST_MASK_MASK INTEL_MASK(31, 24) +# define WM_DS_STENCIL_TEST_MASK_SHIFT 24 +# define WM_DS_STENCIL_WRITE_MASK_MASK INTEL_MASK(23, 16) +# define WM_DS_STENCIL_WRITE_MASK_SHIFT 16 +# define WM_DS_BF_STENCIL_TEST_MASK_MASK INTEL_MASK(15, 8) +# define WM_DS_BF_STENCIL_TEST_MASK_SHIFT 8 +# define WM_DS_BF_STENCIL_WRITE_MASK_MASK INTEL_MASK(7, 0) +# define WM_DS_DEPTH_FUNC_SHIFT 5 +# define WM_DS_DOUBLE_SIDED_STENCIL_ENABLE (1 << 4) +# define WM_DS_STENCIL_TEST_ENABLE (1 << 3) +# define WM_DS_STENCIL_BUFFER_WRITE_ENABLE (1 << 2) +# define WM_DS_DEPTH_TEST_ENABLE (1 << 1) +# define WM_DS_DEPTH_BUFFER_WRITE_ENABLE (1 << 0) +/* DW2 */ +# define WM_DS_STENCIL_TEST_MASK_MASK INTEL_MASK(31, 24) +# define WM_DS_STENCIL_TEST_MASK_SHIFT 24 +# define WM_DS_STENCIL_WRITE_MASK_MASK INTEL_MASK(23, 16) +# define WM_DS_STENCIL_WRITE_MASK_SHIFT 16 +# define WM_DS_BF_STENCIL_TEST_MASK_MASK INTEL_MASK(15, 8) +# define WM_DS_BF_STENCIL_TEST_MASK_SHIFT 8 +# define WM_DS_BF_STENCIL_WRITE_MASK_MASK INTEL_MASK(7, 0) +# define WM_DS_BF_STENCIL_WRITE_MASK_SHIFT 0 + +#define GEN9_3DSTATE_PS_EXTRA GEN9_3D(3, 0, 0x4f) +# define PSX_PIXEL_SHADER_VALID (1 << 31) +# define PSX_PIXEL_SHADER_NO_RT_WRITE (1 << 30) +# define PSX_OMASK_TO_RENDER_TARGET (1 << 29) +# define PSX_KILL_ENABLE (1 << 28) +# define PSX_PSCDEPTH_OFF (0 << 26) +# define PSX_PSCDEPTH_ON (1 << 26) +# define PSX_PSCDEPTH_ON_GE (2 << 26) +# define PSX_PSCDEPTH_ON_LE (3 << 26) +# define PSX_FORCE_COMPUTED_DEPTH (1 << 25) +# define PSX_USES_SOURCE_DEPTH (1 << 24) +# define PSX_USES_SOURCE_W (1 << 23) +# define PSX_ATTRIBUTE_ENABLE (1 << 8) +# define PSX_SHADER_DISABLES_ALPHA_TO_COVERAGE (1 << 7) +# define PSX_SHADER_IS_PER_SAMPLE (1 << 6) +# define PSX_SHADER_HAS_UAV (1 << 2) +# define PSX_SHADER_USES_INPUT_COVERAGE_MASK (1 << 1) + +#define GEN9_3DSTATE_RASTER GEN9_3D(3, 0, 0x50) +/* DW1 */ +# define RASTER_FRONT_WINDING_CCW (1 << 21) +# define RASTER_CULL_BOTH (0 << 16) +# define RASTER_CULL_NONE (1 << 16) +# define RASTER_CULL_FRONT (2 << 16) +# define RASTER_CULL_BACK (3 << 16) +# define RASTER_SMOOTH_POINT_ENABLE (1 << 13) +# define RASTER_LINE_AA_ENABLE (1 << 2) +# define RASTER_VIEWPORT_Z_CLIP_TEST_ENABLE (1 << 0) + +#define GEN9_3DSTATE_SBE_SWIZ GEN9_3D(3, 0, 0x51) +#define GEN9_3DSTATE_WM_HZ_OP GEN9_3D(3, 0, 0x52) + +#define GEN9_3DSTATE_COMPONENT_PACKING GEN6_3D(3, 0, 0x55) + + + +#define GEN9_3DSTATE_DRAWING_RECTANGLE GEN9_3D(3, 1, 0x00) +#define GEN9_3DSTATE_SAMPLER_PALETTE_LOAD GEN9_3D(3, 1, 0x02) +#define GEN9_3DSTATE_CHROMA_KEY GEN9_3D(3, 1, 0x04) + +#define GEN9_3DSTATE_POLY_STIPPLE_OFFSET GEN9_3D(3, 1, 0x06) +#define GEN9_3DSTATE_POLY_STIPPLE_PATTERN GEN9_3D(3, 1, 0x07) +#define GEN9_3DSTATE_LINE_STIPPLE GEN9_3D(3, 1, 0x08) +#define GEN9_3DSTATE_AA_LINE_PARAMS GEN9_3D(3, 1, 0x0a) +#define GEN9_3DSTATE_SAMPLER_PALETTE_LOAD1 GEN9_3D(3, 1, 0x0c) +#define GEN9_3DSTATE_MONOFILTER_SIZE GEN9_3D(3, 1, 0x11) +#define GEN9_3DSTATE_PUSH_CONSTANT_ALLOC_VS GEN9_3D(3, 1, 0x12) +#define GEN9_3DSTATE_PUSH_CONSTANT_ALLOC_HS GEN9_3D(3, 1, 0x13) +#define GEN9_3DSTATE_PUSH_CONSTANT_ALLOC_DS GEN9_3D(3, 1, 0x14) +#define GEN9_3DSTATE_PUSH_CONSTANT_ALLOC_GS GEN9_3D(3, 1, 0x15) +#define GEN9_3DSTATE_PUSH_CONSTANT_ALLOC_PS GEN9_3D(3, 1, 0x16) +/* DW1 */ +# define PUSH_CONSTANT_BUFFER_OFFSET_SHIFT 16 +# define PUSH_CONSTANT_BUFFER_SIZE_SHIFT 0 + +#define GEN9_3DSTATE_SO_DECL_LIST GEN9_3D(3, 1, 0x17) +#define GEN9_3DSTATE_SO_BUFFER GEN9_3D(3, 1, 0x18) +#define GEN9_3DSTATE_BINDING_TABLE_POOL_ALLOC GEN9_3D(3, 1, 0x19) +#define GEN9_3DSTATE_GATHER_BUFFER_POOL_ALLOC GEN9_3D(3, 1, 0x1a) +#define GEN9_3DSTATE_DX9_CONSTANT_BUFFER_POOL_ALLOC GEN9_3D(3, 1, 0x1b) +#define GEN9_3DSTATE_SAMPLE_PATTERN GEN9_3D(3, 1, 0x1c) + + +/* for GEN9_PIPE_CONTROL */ +#define GEN9_PIPE_CONTROL GEN9_3D(3, 2, 0) +#define PIPE_CONTROL_CS_STALL (1 << 20) +#define PIPE_CONTROL_NOWRITE (0 << 14) +#define PIPE_CONTROL_WRITE_QWORD (1 << 14) +#define PIPE_CONTROL_WRITE_DEPTH (2 << 14) +#define PIPE_CONTROL_WRITE_TIME (3 << 14) +#define PIPE_CONTROL_DEPTH_STALL (1 << 13) +#define PIPE_CONTROL_WC_FLUSH (1 << 12) +#define PIPE_CONTROL_IS_FLUSH (1 << 11) +#define PIPE_CONTROL_TC_FLUSH (1 << 10) +#define PIPE_CONTROL_NOTIFY_ENABLE (1 << 8) +#define PIPE_CONTROL_FLUSH (1 << 7) +#define PIPE_CONTROL_GLOBAL_GTT (1 << 2) +#define PIPE_CONTROL_LOCAL_PGTT (0 << 2) +#define PIPE_CONTROL_STALL_AT_SCOREBOARD (1 << 1) +#define PIPE_CONTROL_DEPTH_CACHE_FLUSH (1 << 0) + + +#define GEN9_3DPRIMITIVE GEN9_3D(3, 3, 0) + +/* 3DPRIMITIVE bits */ +#define VERTEX_SEQUENTIAL (0 << 15) +#define VERTEX_RANDOM (1 << 15) + +#define ANISORATIO_2 0 +#define ANISORATIO_4 1 +#define ANISORATIO_6 2 +#define ANISORATIO_8 3 +#define ANISORATIO_10 4 +#define ANISORATIO_12 5 +#define ANISORATIO_14 6 +#define ANISORATIO_16 7 + +#define BLENDFACTOR_ONE 0x1 +#define BLENDFACTOR_SRC_COLOR 0x2 +#define BLENDFACTOR_SRC_ALPHA 0x3 +#define BLENDFACTOR_DST_ALPHA 0x4 +#define BLENDFACTOR_DST_COLOR 0x5 +#define BLENDFACTOR_SRC_ALPHA_SATURATE 0x6 +#define BLENDFACTOR_CONST_COLOR 0x7 +#define BLENDFACTOR_CONST_ALPHA 0x8 +#define BLENDFACTOR_SRC1_COLOR 0x9 +#define BLENDFACTOR_SRC1_ALPHA 0x0A +#define BLENDFACTOR_ZERO 0x11 +#define BLENDFACTOR_INV_SRC_COLOR 0x12 +#define BLENDFACTOR_INV_SRC_ALPHA 0x13 +#define BLENDFACTOR_INV_DST_ALPHA 0x14 +#define BLENDFACTOR_INV_DST_COLOR 0x15 +#define BLENDFACTOR_INV_CONST_COLOR 0x17 +#define BLENDFACTOR_INV_CONST_ALPHA 0x18 +#define BLENDFACTOR_INV_SRC1_COLOR 0x19 +#define BLENDFACTOR_INV_SRC1_ALPHA 0x1A + +#define BLENDFUNCTION_ADD 0 +#define BLENDFUNCTION_SUBTRACT 1 +#define BLENDFUNCTION_REVERSE_SUBTRACT 2 +#define GEN9_BLENDFUNCTION_MIN 3 +#define BLENDFUNCTION_MAX 4 + +#define ALPHATEST_FORMAT_UNORM8 0 +#define ALPHATEST_FORMAT_FLOAT32 1 + +#define CHROMAKEY_KILL_ON_ANY_MATCH 0 +#define CHROMAKEY_REPLACE_BLACK 1 + +#define CLIP_API_OGL 0 +#define CLIP_API_DX 1 + +#define CLIPMODE_NORMAL 0 +#define CLIPMODE_CLIP_ALL 1 +#define CLIPMODE_CLIP_NON_REJECTED 2 +#define CLIPMODE_REJECT_ALL 3 +#define CLIPMODE_ACCEPT_ALL 4 + +#define CLIP_NDCSPACE 0 +#define CLIP_SCREENSPACE 1 + +#define COMPAREFUNCTION_ALWAYS 0 +#define COMPAREFUNCTION_NEVER 1 +#define COMPAREFUNCTION_LESS 2 +#define COMPAREFUNCTION_EQUAL 3 +#define COMPAREFUNCTION_LEQUAL 4 +#define COMPAREFUNCTION_GREATER 5 +#define COMPAREFUNCTION_NOTEQUAL 6 +#define COMPAREFUNCTION_GEQUAL 7 + +#define COVERAGE_PIXELS_HALF 0 +#define COVERAGE_PIXELS_1 1 +#define COVERAGE_PIXELS_2 2 +#define COVERAGE_PIXELS_4 3 + +#define DEPTHFORMAT_D32_FLOAT_S8X24_UINT 0 +#define DEPTHFORMAT_D32_FLOAT 1 +#define DEPTHFORMAT_D24_UNORM_S8_UINT 2 +#define DEPTHFORMAT_D16_UNORM 5 + +#define FLOATING_POINT_IEEE_754 0 +#define FLOATING_POINT_NON_IEEE_754 1 + +#define INDEX_BYTE 0 +#define INDEX_WORD 1 +#define INDEX_DWORD 2 + +#define LOGICOPFUNCTION_CLEAR 0 +#define LOGICOPFUNCTION_NOR 1 +#define LOGICOPFUNCTION_AND_INVERTED 2 +#define LOGICOPFUNCTION_COPY_INVERTED 3 +#define LOGICOPFUNCTION_AND_REVERSE 4 +#define LOGICOPFUNCTION_INVERT 5 +#define LOGICOPFUNCTION_XOR 6 +#define LOGICOPFUNCTION_NAND 7 +#define LOGICOPFUNCTION_AND 8 +#define LOGICOPFUNCTION_EQUIV 9 +#define LOGICOPFUNCTION_NOOP 10 +#define LOGICOPFUNCTION_OR_INVERTED 11 +#define LOGICOPFUNCTION_COPY 12 +#define LOGICOPFUNCTION_OR_REVERSE 13 +#define LOGICOPFUNCTION_OR 14 +#define LOGICOPFUNCTION_SET 15 + +#define MAPFILTER_NEAREST 0x0 +#define MAPFILTER_LINEAR 0x1 +#define MAPFILTER_ANISOTROPIC 0x2 +#define MAPFILTER_FLEXIBLE 0x3 +#define MAPFILTER_MONO 0x6 + +#define MIPFILTER_NONE 0 +#define MIPFILTER_NEAREST 1 +#define MIPFILTER_LINEAR 3 + +#define POLYGON_FRONT_FACING 0 +#define POLYGON_BACK_FACING 1 + +#define PREFILTER_ALWAYS 0x0 +#define PREFILTER_NEVER 0x1 +#define PREFILTER_LESS 0x2 +#define PREFILTER_EQUAL 0x3 +#define PREFILTER_LEQUAL 0x4 +#define PREFILTER_GREATER 0x5 +#define PREFILTER_NOTEQUAL 0x6 +#define PREFILTER_GEQUAL 0x7 + +#define RASTRULE_UPPER_LEFT 0 +#define RASTRULE_UPPER_RIGHT 1 + +#define STENCILOP_KEEP 0 +#define STENCILOP_ZERO 1 +#define STENCILOP_REPLACE 2 +#define STENCILOP_INCRSAT 3 +#define STENCILOP_DECRSAT 4 +#define STENCILOP_INCR 5 +#define STENCILOP_DECR 6 +#define STENCILOP_INVERT 7 + +#define SURFACE_MIPMAPLAYOUT_BELOW 0 +#define SURFACE_MIPMAPLAYOUT_RIGHT 1 + +#define SURFACEFORMAT_R32G32B32A32_FLOAT 0x000 +#define SURFACEFORMAT_R32G32B32A32_SINT 0x001 +#define SURFACEFORMAT_R32G32B32A32_UINT 0x002 +#define SURFACEFORMAT_R32G32B32A32_UNORM 0x003 +#define SURFACEFORMAT_R32G32B32A32_SNORM 0x004 +#define SURFACEFORMAT_R64G64_FLOAT 0x005 +#define SURFACEFORMAT_R32G32B32X32_FLOAT 0x006 +#define SURFACEFORMAT_R32G32B32A32_SSCALED 0x007 +#define SURFACEFORMAT_R32G32B32A32_USCALED 0x008 +#define SURFACEFORMAT_R32G32B32_FLOAT 0x040 +#define SURFACEFORMAT_R32G32B32_SINT 0x041 +#define SURFACEFORMAT_R32G32B32_UINT 0x042 +#define SURFACEFORMAT_R32G32B32_UNORM 0x043 +#define SURFACEFORMAT_R32G32B32_SNORM 0x044 +#define SURFACEFORMAT_R32G32B32_SSCALED 0x045 +#define SURFACEFORMAT_R32G32B32_USCALED 0x046 +#define SURFACEFORMAT_R16G16B16A16_UNORM 0x080 +#define SURFACEFORMAT_R16G16B16A16_SNORM 0x081 +#define SURFACEFORMAT_R16G16B16A16_SINT 0x082 +#define SURFACEFORMAT_R16G16B16A16_UINT 0x083 +#define SURFACEFORMAT_R16G16B16A16_FLOAT 0x084 +#define SURFACEFORMAT_R32G32_FLOAT 0x085 +#define SURFACEFORMAT_R32G32_SINT 0x086 +#define SURFACEFORMAT_R32G32_UINT 0x087 +#define SURFACEFORMAT_R32_FLOAT_X8X24_TYPELESS 0x088 +#define SURFACEFORMAT_X32_TYPELESS_G8X24_UINT 0x089 +#define SURFACEFORMAT_L32A32_FLOAT 0x08A +#define SURFACEFORMAT_R32G32_UNORM 0x08B +#define SURFACEFORMAT_R32G32_SNORM 0x08C +#define SURFACEFORMAT_R64_FLOAT 0x08D +#define SURFACEFORMAT_R16G16B16X16_UNORM 0x08E +#define SURFACEFORMAT_R16G16B16X16_FLOAT 0x08F +#define SURFACEFORMAT_A32X32_FLOAT 0x090 +#define SURFACEFORMAT_L32X32_FLOAT 0x091 +#define SURFACEFORMAT_I32X32_FLOAT 0x092 +#define SURFACEFORMAT_R16G16B16A16_SSCALED 0x093 +#define SURFACEFORMAT_R16G16B16A16_USCALED 0x094 +#define SURFACEFORMAT_R32G32_SSCALED 0x095 +#define SURFACEFORMAT_R32G32_USCALED 0x096 +#define SURFACEFORMAT_B8G8R8A8_UNORM 0x0C0 +#define SURFACEFORMAT_B8G8R8A8_UNORM_SRGB 0x0C1 +#define SURFACEFORMAT_R10G10B10A2_UNORM 0x0C2 +#define SURFACEFORMAT_R10G10B10A2_UNORM_SRGB 0x0C3 +#define SURFACEFORMAT_R10G10B10A2_UINT 0x0C4 +#define SURFACEFORMAT_R10G10B10_SNORM_A2_UNORM 0x0C5 +#define SURFACEFORMAT_R8G8B8A8_UNORM 0x0C7 +#define SURFACEFORMAT_R8G8B8A8_UNORM_SRGB 0x0C8 +#define SURFACEFORMAT_R8G8B8A8_SNORM 0x0C9 +#define SURFACEFORMAT_R8G8B8A8_SINT 0x0CA +#define SURFACEFORMAT_R8G8B8A8_UINT 0x0CB +#define SURFACEFORMAT_R16G16_UNORM 0x0CC +#define SURFACEFORMAT_R16G16_SNORM 0x0CD +#define SURFACEFORMAT_R16G16_SINT 0x0CE +#define SURFACEFORMAT_R16G16_UINT 0x0CF +#define SURFACEFORMAT_R16G16_FLOAT 0x0D0 +#define SURFACEFORMAT_B10G10R10A2_UNORM 0x0D1 +#define SURFACEFORMAT_B10G10R10A2_UNORM_SRGB 0x0D2 +#define SURFACEFORMAT_R11G11B10_FLOAT 0x0D3 +#define SURFACEFORMAT_R32_SINT 0x0D6 +#define SURFACEFORMAT_R32_UINT 0x0D7 +#define SURFACEFORMAT_R32_FLOAT 0x0D8 +#define SURFACEFORMAT_R24_UNORM_X8_TYPELESS 0x0D9 +#define SURFACEFORMAT_X24_TYPELESS_G8_UINT 0x0DA +#define SURFACEFORMAT_L16A16_UNORM 0x0DF +#define SURFACEFORMAT_I24X8_UNORM 0x0E0 +#define SURFACEFORMAT_L24X8_UNORM 0x0E1 +#define SURFACEFORMAT_A24X8_UNORM 0x0E2 +#define SURFACEFORMAT_I32_FLOAT 0x0E3 +#define SURFACEFORMAT_L32_FLOAT 0x0E4 +#define SURFACEFORMAT_A32_FLOAT 0x0E5 +#define SURFACEFORMAT_B8G8R8X8_UNORM 0x0E9 +#define SURFACEFORMAT_B8G8R8X8_UNORM_SRGB 0x0EA +#define SURFACEFORMAT_R8G8B8X8_UNORM 0x0EB +#define SURFACEFORMAT_R8G8B8X8_UNORM_SRGB 0x0EC +#define SURFACEFORMAT_R9G9B9E5_SHAREDEXP 0x0ED +#define SURFACEFORMAT_B10G10R10X2_UNORM 0x0EE +#define SURFACEFORMAT_L16A16_FLOAT 0x0F0 +#define SURFACEFORMAT_R32_UNORM 0x0F1 +#define SURFACEFORMAT_R32_SNORM 0x0F2 +#define SURFACEFORMAT_R10G10B10X2_USCALED 0x0F3 +#define SURFACEFORMAT_R8G8B8A8_SSCALED 0x0F4 +#define SURFACEFORMAT_R8G8B8A8_USCALED 0x0F5 +#define SURFACEFORMAT_R16G16_SSCALED 0x0F6 +#define SURFACEFORMAT_R16G16_USCALED 0x0F7 +#define SURFACEFORMAT_R32_SSCALED 0x0F8 +#define SURFACEFORMAT_R32_USCALED 0x0F9 +#define SURFACEFORMAT_B5G6R5_UNORM 0x100 +#define SURFACEFORMAT_B5G6R5_UNORM_SRGB 0x101 +#define SURFACEFORMAT_B5G5R5A1_UNORM 0x102 +#define SURFACEFORMAT_B5G5R5A1_UNORM_SRGB 0x103 +#define SURFACEFORMAT_B4G4R4A4_UNORM 0x104 +#define SURFACEFORMAT_B4G4R4A4_UNORM_SRGB 0x105 +#define SURFACEFORMAT_R8G8_UNORM 0x106 +#define SURFACEFORMAT_R8G8_SNORM 0x107 +#define SURFACEFORMAT_R8G8_SINT 0x108 +#define SURFACEFORMAT_R8G8_UINT 0x109 +#define SURFACEFORMAT_R16_UNORM 0x10A +#define SURFACEFORMAT_R16_SNORM 0x10B +#define SURFACEFORMAT_R16_SINT 0x10C +#define SURFACEFORMAT_R16_UINT 0x10D +#define SURFACEFORMAT_R16_FLOAT 0x10E +#define SURFACEFORMAT_I16_UNORM 0x111 +#define SURFACEFORMAT_L16_UNORM 0x112 +#define SURFACEFORMAT_A16_UNORM 0x113 +#define SURFACEFORMAT_L8A8_UNORM 0x114 +#define SURFACEFORMAT_I16_FLOAT 0x115 +#define SURFACEFORMAT_L16_FLOAT 0x116 +#define SURFACEFORMAT_A16_FLOAT 0x117 +#define SURFACEFORMAT_R5G5_SNORM_B6_UNORM 0x119 +#define SURFACEFORMAT_B5G5R5X1_UNORM 0x11A +#define SURFACEFORMAT_B5G5R5X1_UNORM_SRGB 0x11B +#define SURFACEFORMAT_R8G8_SSCALED 0x11C +#define SURFACEFORMAT_R8G8_USCALED 0x11D +#define SURFACEFORMAT_R16_SSCALED 0x11E +#define SURFACEFORMAT_R16_USCALED 0x11F +#define SURFACEFORMAT_R8_UNORM 0x140 +#define SURFACEFORMAT_R8_SNORM 0x141 +#define SURFACEFORMAT_R8_SINT 0x142 +#define SURFACEFORMAT_R8_UINT 0x143 +#define SURFACEFORMAT_A8_UNORM 0x144 +#define SURFACEFORMAT_I8_UNORM 0x145 +#define SURFACEFORMAT_L8_UNORM 0x146 +#define SURFACEFORMAT_P4A4_UNORM 0x147 +#define SURFACEFORMAT_A4P4_UNORM 0x148 +#define SURFACEFORMAT_R8_SSCALED 0x149 +#define SURFACEFORMAT_R8_USCALED 0x14A +#define SURFACEFORMAT_R1_UINT 0x181 +#define SURFACEFORMAT_YCRCB_NORMAL 0x182 +#define SURFACEFORMAT_YCRCB_SWAPUVY 0x183 +#define SURFACEFORMAT_BC1_UNORM 0x186 +#define SURFACEFORMAT_BC2_UNORM 0x187 +#define SURFACEFORMAT_BC3_UNORM 0x188 +#define SURFACEFORMAT_BC4_UNORM 0x189 +#define SURFACEFORMAT_BC5_UNORM 0x18A +#define SURFACEFORMAT_BC1_UNORM_SRGB 0x18B +#define SURFACEFORMAT_BC2_UNORM_SRGB 0x18C +#define SURFACEFORMAT_BC3_UNORM_SRGB 0x18D +#define SURFACEFORMAT_MONO8 0x18E +#define SURFACEFORMAT_YCRCB_SWAPUV 0x18F +#define SURFACEFORMAT_YCRCB_SWAPY 0x190 +#define SURFACEFORMAT_DXT1_RGB 0x191 +#define SURFACEFORMAT_FXT1 0x192 +#define SURFACEFORMAT_R8G8B8_UNORM 0x193 +#define SURFACEFORMAT_R8G8B8_SNORM 0x194 +#define SURFACEFORMAT_R8G8B8_SSCALED 0x195 +#define SURFACEFORMAT_R8G8B8_USCALED 0x196 +#define SURFACEFORMAT_R64G64B64A64_FLOAT 0x197 +#define SURFACEFORMAT_R64G64B64_FLOAT 0x198 +#define SURFACEFORMAT_BC4_SNORM 0x199 +#define SURFACEFORMAT_BC5_SNORM 0x19A +#define SURFACEFORMAT_R16G16B16_UNORM 0x19C +#define SURFACEFORMAT_R16G16B16_SNORM 0x19D +#define SURFACEFORMAT_R16G16B16_SSCALED 0x19E +#define SURFACEFORMAT_R16G16B16_USCALED 0x19F + +#define SURFACE_1D 0 +#define SURFACE_2D 1 +#define SURFACE_3D 2 +#define SURFACE_CUBE 3 +#define SURFACE_BUFFER 4 +#define SURFACE_NULL 7 + +#define TEXCOORDMODE_WRAP 0 +#define TEXCOORDMODE_MIRROR 1 +#define TEXCOORDMODE_CLAMP 2 +#define TEXCOORDMODE_CUBE 3 +#define TEXCOORDMODE_CLAMP_BORDER 4 +#define TEXCOORDMODE_MIRROR_ONCE 5 + +#define THREAD_PRIORITY_NORMAL 0 +#define THREAD_PRIORITY_HIGH 1 + +#define VERTEX_SUBPIXEL_PRECISION_8BITS 0 +#define VERTEX_SUBPIXEL_PRECISION_4BITS 1 + +#define COMPONENT_NOSTORE 0 +#define COMPONENT_STORE_SRC 1 +#define COMPONENT_STORE_0 2 +#define COMPONENT_STORE_1_FLT 3 +#define COMPONENT_STORE_1_INT 4 +#define COMPONENT_STORE_VID 5 +#define COMPONENT_STORE_IID 6 +#define COMPONENT_STORE_PID 7 + +/* Execution Unit (EU) defines + */ + +#define GEN9_ALIGN_1 0 +#define GEN9_ALIGN_16 1 + +#define GEN9_ADDRESS_DIRECT 0 +#define GEN9_ADDRESS_REGISTER_INDIRECT_REGISTER 1 + +#define GEN9_CHANNEL_X 0 +#define GEN9_CHANNEL_Y 1 +#define GEN9_CHANNEL_Z 2 +#define GEN9_CHANNEL_W 3 + +#define GEN9_COMPRESSION_NONE 0 +#define GEN9_COMPRESSION_2NDHALF 1 +#define GEN9_COMPRESSION_COMPRESSED 2 + +#define GEN9_CONDITIONAL_NONE 0 +#define GEN9_CONDITIONAL_Z 1 +#define GEN9_CONDITIONAL_NZ 2 +#define GEN9_CONDITIONAL_EQ 1 /* Z */ +#define GEN9_CONDITIONAL_NEQ 2 /* NZ */ +#define GEN9_CONDITIONAL_G 3 +#define GEN9_CONDITIONAL_GE 4 +#define GEN9_CONDITIONAL_L 5 +#define GEN9_CONDITIONAL_LE 6 +#define GEN9_CONDITIONAL_C 7 +#define GEN9_CONDITIONAL_O 8 + +#define GEN9_DEBUG_NONE 0 +#define GEN9_DEBUG_BREAKPOINT 1 + +#define GEN9_DEPENDENCY_NORMAL 0 +#define GEN9_DEPENDENCY_NOTCLEARED 1 +#define GEN9_DEPENDENCY_NOTCHECKED 2 +#define GEN9_DEPENDENCY_DISABLE 3 + +#define GEN9_EXECUTE_1 0 +#define GEN9_EXECUTE_2 1 +#define GEN9_EXECUTE_4 2 +#define GEN9_EXECUTE_8 3 +#define GEN9_EXECUTE_16 4 +#define GEN9_EXECUTE_32 5 + +#define GEN9_HORIZONTAL_STRIDE_0 0 +#define GEN9_HORIZONTAL_STRIDE_1 1 +#define GEN9_HORIZONTAL_STRIDE_2 2 +#define GEN9_HORIZONTAL_STRIDE_4 3 + +#define GEN9_INSTRUCTION_NORMAL 0 +#define GEN9_INSTRUCTION_SATURATE 1 + +#define GEN9_OPCODE_MOV 1 +#define GEN9_OPCODE_SEL 2 +#define GEN9_OPCODE_NOT 4 +#define GEN9_OPCODE_AND 5 +#define GEN9_OPCODE_OR 6 +#define GEN9_OPCODE_XOR 7 +#define GEN9_OPCODE_SHR 8 +#define GEN9_OPCODE_SHL 9 +#define GEN9_OPCODE_RSR 10 +#define GEN9_OPCODE_RSL 11 +#define GEN9_OPCODE_ASR 12 +#define GEN9_OPCODE_CMP 16 +#define GEN9_OPCODE_JMPI 32 +#define GEN9_OPCODE_IF 34 +#define GEN9_OPCODE_IFF 35 +#define GEN9_OPCODE_ELSE 36 +#define GEN9_OPCODE_ENDIF 37 +#define GEN9_OPCODE_DO 38 +#define GEN9_OPCODE_WHILE 39 +#define GEN9_OPCODE_BREAK 40 +#define GEN9_OPCODE_CONTINUE 41 +#define GEN9_OPCODE_HALT 42 +#define GEN9_OPCODE_MSAVE 44 +#define GEN9_OPCODE_MRESTORE 45 +#define GEN9_OPCODE_PUSH 46 +#define GEN9_OPCODE_POP 47 +#define GEN9_OPCODE_WAIT 48 +#define GEN9_OPCODE_SEND 49 +#define GEN9_OPCODE_ADD 64 +#define GEN9_OPCODE_MUL 65 +#define GEN9_OPCODE_AVG 66 +#define GEN9_OPCODE_FRC 67 +#define GEN9_OPCODE_RNDU 68 +#define GEN9_OPCODE_RNDD 69 +#define GEN9_OPCODE_RNDE 70 +#define GEN9_OPCODE_RNDZ 71 +#define GEN9_OPCODE_MAC 72 +#define GEN9_OPCODE_MACH 73 +#define GEN9_OPCODE_LZD 74 +#define GEN9_OPCODE_SAD2 80 +#define GEN9_OPCODE_SADA2 81 +#define GEN9_OPCODE_DP4 84 +#define GEN9_OPCODE_DPH 85 +#define GEN9_OPCODE_DP3 86 +#define GEN9_OPCODE_DP2 87 +#define GEN9_OPCODE_DPA2 88 +#define GEN9_OPCODE_LINE 89 +#define GEN9_OPCODE_NOP 126 + +#define GEN9_PREDICATE_NONE 0 +#define GEN9_PREDICATE_NORMAL 1 +#define GEN9_PREDICATE_ALIGN1_ANYV 2 +#define GEN9_PREDICATE_ALIGN1_ALLV 3 +#define GEN9_PREDICATE_ALIGN1_ANY2H 4 +#define GEN9_PREDICATE_ALIGN1_ALL2H 5 +#define GEN9_PREDICATE_ALIGN1_ANY4H 6 +#define GEN9_PREDICATE_ALIGN1_ALL4H 7 +#define GEN9_PREDICATE_ALIGN1_ANY8H 8 +#define GEN9_PREDICATE_ALIGN1_ALL8H 9 +#define GEN9_PREDICATE_ALIGN1_ANY16H 10 +#define GEN9_PREDICATE_ALIGN1_ALL16H 11 +#define GEN9_PREDICATE_ALIGN16_REPLICATE_X 2 +#define GEN9_PREDICATE_ALIGN16_REPLICATE_Y 3 +#define GEN9_PREDICATE_ALIGN16_REPLICATE_Z 4 +#define GEN9_PREDICATE_ALIGN16_REPLICATE_W 5 +#define GEN9_PREDICATE_ALIGN16_ANY4H 6 +#define GEN9_PREDICATE_ALIGN16_ALL4H 7 + +#define GEN9_ARCHITECTURE_REGISTER_FILE 0 +#define GEN9_GENERAL_REGISTER_FILE 1 +#define GEN9_MESSAGE_REGISTER_FILE 2 +#define GEN9_IMMEDIATE_VALUE 3 + +#define GEN9_REGISTER_TYPE_UD 0 +#define GEN9_REGISTER_TYPE_D 1 +#define GEN9_REGISTER_TYPE_UW 2 +#define GEN9_REGISTER_TYPE_W 3 +#define GEN9_REGISTER_TYPE_UB 4 +#define GEN9_REGISTER_TYPE_B 5 +#define GEN9_REGISTER_TYPE_VF 5 /* packed float vector, immediates only? */ +#define GEN9_REGISTER_TYPE_HF 6 +#define GEN9_REGISTER_TYPE_V 6 /* packed int vector, immediates only, uword dest only */ +#define GEN9_REGISTER_TYPE_F 7 + +#define GEN9_ARF_NULL 0x00 +#define GEN9_ARF_ADDRESS 0x10 +#define GEN9_ARF_ACCUMULATOR 0x20 +#define GEN9_ARF_FLAG 0x30 +#define GEN9_ARF_MASK 0x40 +#define GEN9_ARF_MASK_STACK 0x50 +#define GEN9_ARF_MASK_STACK_DEPTH 0x60 +#define GEN9_ARF_STATE 0x70 +#define GEN9_ARF_CONTROL 0x80 +#define GEN9_ARF_NOTIFICATION_COUNT 0x90 +#define GEN9_ARF_IP 0xA0 + +#define GEN9_AMASK 0 +#define GEN9_IMASK 1 +#define GEN9_LMASK 2 +#define GEN9_CMASK 3 + +#define GEN9_THREAD_NORMAL 0 +#define GEN9_THREAD_ATOMIC 1 +#define GEN9_THREAD_SWITCH 2 + +#define GEN9_VERTICAL_STRIDE_0 0 +#define GEN9_VERTICAL_STRIDE_1 1 +#define GEN9_VERTICAL_STRIDE_2 2 +#define GEN9_VERTICAL_STRIDE_4 3 +#define GEN9_VERTICAL_STRIDE_8 4 +#define GEN9_VERTICAL_STRIDE_16 5 +#define GEN9_VERTICAL_STRIDE_32 6 +#define GEN9_VERTICAL_STRIDE_64 7 +#define GEN9_VERTICAL_STRIDE_128 8 +#define GEN9_VERTICAL_STRIDE_256 9 +#define GEN9_VERTICAL_STRIDE_ONE_DIMENSIONAL 0xF + +#define GEN9_WIDTH_1 0 +#define GEN9_WIDTH_2 1 +#define GEN9_WIDTH_4 2 +#define GEN9_WIDTH_8 3 +#define GEN9_WIDTH_16 4 + +#define GEN9_STATELESS_BUFFER_BOUNDARY_1K 0 +#define GEN9_STATELESS_BUFFER_BOUNDARY_2K 1 +#define GEN9_STATELESS_BUFFER_BOUNDARY_4K 2 +#define GEN9_STATELESS_BUFFER_BOUNDARY_8K 3 +#define GEN9_STATELESS_BUFFER_BOUNDARY_16K 4 +#define GEN9_STATELESS_BUFFER_BOUNDARY_32K 5 +#define GEN9_STATELESS_BUFFER_BOUNDARY_64K 6 +#define GEN9_STATELESS_BUFFER_BOUNDARY_128K 7 +#define GEN9_STATELESS_BUFFER_BOUNDARY_256K 8 +#define GEN9_STATELESS_BUFFER_BOUNDARY_512K 9 +#define GEN9_STATELESS_BUFFER_BOUNDARY_1M 10 +#define GEN9_STATELESS_BUFFER_BOUNDARY_2M 11 + +#define GEN9_POLYGON_FACING_FRONT 0 +#define GEN9_POLYGON_FACING_BACK 1 + +#define GEN9_MESSAGE_TARGET_NULL 0 +#define GEN9_MESSAGE_TARGET_MATH 1 +#define GEN9_MESSAGE_TARGET_SAMPLER 2 +#define GEN9_MESSAGE_TARGET_GATEWAY 3 +#define GEN9_MESSAGE_TARGET_DATAPORT_READ 4 +#define GEN9_MESSAGE_TARGET_DATAPORT_WRITE 5 +#define GEN9_MESSAGE_TARGET_URB 6 +#define GEN9_MESSAGE_TARGET_THREAD_SPAWNER 7 + +#define GEN9_SAMPLER_RETURN_FORMAT_FLOAT32 0 +#define GEN9_SAMPLER_RETURN_FORMAT_UINT32 2 +#define GEN9_SAMPLER_RETURN_FORMAT_SINT32 3 + +#define GEN9_SAMPLER_MESSAGE_SIMD8_SAMPLE 0 +#define GEN9_SAMPLER_MESSAGE_SIMD16_SAMPLE 0 +#define GEN9_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS 0 +#define GEN9_SAMPLER_MESSAGE_SIMD8_KILLPIX 1 +#define GEN9_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_LOD 1 +#define GEN9_SAMPLER_MESSAGE_SIMD16_SAMPLE_LOD 1 +#define GEN9_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_GRADIENTS 2 +#define GEN9_SAMPLER_MESSAGE_SIMD8_SAMPLE_GRADIENTS 2 +#define GEN9_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_COMPARE 0 +#define GEN9_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE 2 +#define GEN9_SAMPLER_MESSAGE_SIMD4X2_RESINFO 2 +#define GEN9_SAMPLER_MESSAGE_SIMD8_RESINFO 2 +#define GEN9_SAMPLER_MESSAGE_SIMD16_RESINFO 2 +#define GEN9_SAMPLER_MESSAGE_SIMD4X2_LD 3 +#define GEN9_SAMPLER_MESSAGE_SIMD8_LD 3 +#define GEN9_SAMPLER_MESSAGE_SIMD16_LD 3 + +#define GEN9_DATAPORT_OWORD_BLOCK_1_OWORDLOW 0 +#define GEN9_DATAPORT_OWORD_BLOCK_1_OWORDHIGH 1 +#define GEN9_DATAPORT_OWORD_BLOCK_2_OWORDS 2 +#define GEN9_DATAPORT_OWORD_BLOCK_4_OWORDS 3 +#define GEN9_DATAPORT_OWORD_BLOCK_8_OWORDS 4 + +#define GEN9_DATAPORT_OWORD_DUAL_BLOCK_1OWORD 0 +#define GEN9_DATAPORT_OWORD_DUAL_BLOCK_4OWORDS 2 + +#define GEN9_DATAPORT_DWORD_SCATTERED_BLOCK_8DWORDS 2 +#define GEN9_DATAPORT_DWORD_SCATTERED_BLOCK_16DWORDS 3 + +#define GEN9_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ 0 +#define GEN9_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ 1 +#define GEN9_DATAPORT_READ_MESSAGE_DWORD_BLOCK_READ 2 +#define GEN9_DATAPORT_READ_MESSAGE_DWORD_SCATTERED_READ 3 + +#define GEN9_DATAPORT_READ_TARGET_DATA_CACHE 0 +#define GEN9_DATAPORT_READ_TARGET_RENDER_CACHE 1 +#define GEN9_DATAPORT_READ_TARGET_SAMPLER_CACHE 2 + +#define GEN9_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE 0 +#define GEN9_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE_REPLICATED 1 +#define GEN9_DATAPORT_RENDER_TARGET_WRITE_SIMD8_DUAL_SOURCE_SUBSPAN01 2 +#define GEN9_DATAPORT_RENDER_TARGET_WRITE_SIMD8_DUAL_SOURCE_SUBSPAN23 3 +#define GEN9_DATAPORT_RENDER_TARGET_WRITE_SIMD8_SINGLE_SOURCE_SUBSPAN01 4 + +#define GEN9_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE 0 +#define GEN9_DATAPORT_WRITE_MESSAGE_OWORD_DUAL_BLOCK_WRITE 1 +#define GEN9_DATAPORT_WRITE_MESSAGE_DWORD_BLOCK_WRITE 2 +#define GEN9_DATAPORT_WRITE_MESSAGE_DWORD_SCATTERED_WRITE 3 +#define GEN9_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE 4 +#define GEN9_DATAPORT_WRITE_MESSAGE_STREAMED_VERTEX_BUFFER_WRITE 5 +#define GEN9_DATAPORT_WRITE_MESSAGE_FLUSH_RENDER_CACHE 7 + +#define GEN9_MATH_FUNCTION_INV 1 +#define GEN9_MATH_FUNCTION_LOG 2 +#define GEN9_MATH_FUNCTION_EXP 3 +#define GEN9_MATH_FUNCTION_SQRT 4 +#define GEN9_MATH_FUNCTION_RSQ 5 +#define GEN9_MATH_FUNCTION_SIN 6 /* was 7 */ +#define GEN9_MATH_FUNCTION_COS 7 /* was 8 */ +#define GEN9_MATH_FUNCTION_SINCOS 8 /* was 6 */ +#define GEN9_MATH_FUNCTION_TAN 9 +#define GEN9_MATH_FUNCTION_POW 10 +#define GEN9_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER 11 +#define GEN9_MATH_FUNCTION_INT_DIV_QUOTIENT 12 +#define GEN9_MATH_FUNCTION_INT_DIV_REMAINDER 13 + +#define GEN9_MATH_INTEGER_UNSIGNED 0 +#define GEN9_MATH_INTEGER_SIGNED 1 + +#define GEN9_MATH_PRECISION_FULL 0 +#define GEN9_MATH_PRECISION_PARTIAL 1 + +#define GEN9_MATH_SATURATE_NONE 0 +#define GEN9_MATH_SATURATE_SATURATE 1 + +#define GEN9_MATH_DATA_VECTOR 0 +#define GEN9_MATH_DATA_SCALAR 1 + +#define GEN9_URB_OPCODE_WRITE 0 + +#define GEN9_URB_SWIZZLE_NONE 0 +#define GEN9_URB_SWIZZLE_INTERLEAVE 1 +#define GEN9_URB_SWIZZLE_TRANSPOSE 2 + +#define GEN9_SCRATCH_SPACE_SIZE_1K 0 +#define GEN9_SCRATCH_SPACE_SIZE_2K 1 +#define GEN9_SCRATCH_SPACE_SIZE_4K 2 +#define GEN9_SCRATCH_SPACE_SIZE_8K 3 +#define GEN9_SCRATCH_SPACE_SIZE_16K 4 +#define GEN9_SCRATCH_SPACE_SIZE_32K 5 +#define GEN9_SCRATCH_SPACE_SIZE_64K 6 +#define GEN9_SCRATCH_SPACE_SIZE_128K 7 +#define GEN9_SCRATCH_SPACE_SIZE_256K 8 +#define GEN9_SCRATCH_SPACE_SIZE_512K 9 +#define GEN9_SCRATCH_SPACE_SIZE_1M 10 +#define GEN9_SCRATCH_SPACE_SIZE_2M 11 + +struct gen9_blend_state { + struct { + /* 00 */ uint32_t pad:19; + /* 19 */ uint32_t y_dither_offset:2; + /* 21 */ uint32_t x_dither_offset:2; + /* 23 */ uint32_t color_dither_enable:1; + /* 24 */ uint32_t alpha_test_function:3; + /* 27 */ uint32_t alpha_test:1; + /* 28 */ uint32_t alpha_to_coverage_dither:1; + /* 29 */ uint32_t alpha_to_one:1; + /* 30 */ uint32_t ia_blend:1; + /* 31 */ uint32_t alpha_to_coverage:1; + } common; + + struct { + /* 00 */ uint32_t write_disable_blue:1; + /* 01 */ uint32_t write_disable_green:1; + /* 02 */ uint32_t write_disable_red:1; + /* 03 */ uint32_t write_disable_alpha:1; + /* 04 */ uint32_t pad0:1; + /* 05 */ uint32_t alpha_blend_function:3; + /* 08 */ uint32_t dest_alpha_blend_factor:5; + /* 13 */ uint32_t source_alpha_blend_factor:5; + /* 18 */ uint32_t color_blend_function:3; + /* 21 */ uint32_t dest_blend_factor:5; + /* 26 */ uint32_t source_blend_factor:5; + /* 31 */ uint32_t color_blend:1; + /* 32 */ uint32_t post_blend_clamp:1; + /* 33 */ uint32_t pre_blend_clamp:1; + /* 34 */ uint32_t color_clamp_range:2; + /* 36 */ uint32_t pre_blend_source_only_clamp:1; + /* 37 */ uint32_t pad1:22; + /* 59 */ uint32_t logic_op_function:4; + /* 63 */ uint32_t logic_op:1; + } rt; +}; + +struct gen9_color_calc_state { + struct { + /* 00 */ uint32_t alpha_test_format:1; + /* 01 */ uint32_t pad0:14; + /* 15 */ uint32_t round_disable:1; + /* 16 */ uint32_t bf_stencil_ref:8; + /* 24 */ uint32_t stencil_ref:8; + } cc0; + + union { + float alpha_ref_f; + struct { + uint32_t ui:8; + uint32_t pad0:24; + } alpha_ref_fi; + } cc1; + + float constant_r; + float constant_g; + float constant_b; + float constant_a; +}; + +struct gen9_sampler_state { + struct { + /* 00 */ unsigned int aniso_algorithm:1; + /* 01 */ unsigned int lod_bias:13; + /* 14 */ unsigned int min_filter:3; + /* 17 */ unsigned int mag_filter:3; + /* 20 */ unsigned int mip_filter:2; + /* 22 */ unsigned int base_level:5; + /* 27 */ unsigned int lod_preclamp:2; + /* 29 */ unsigned int default_color_mode:1; + /* 30 */ unsigned int flexible_filter_clamp:1; + /* 31 */ unsigned int disable:1; + } ss0; + + struct { + /* 00 */ unsigned int cube_control_mode:1; + /* 01 */ unsigned int shadow_function:3; + /* 04 */ unsigned int chroma_key_mode:1; + /* 05 */ unsigned int chroma_key_index:2; + /* 07 */ unsigned int chroma_key_enable:1; + /* 08 */ unsigned int max_lod:12; + /* 20 */ unsigned int min_lod:12; + } ss1; + + struct { + unsigned int pad:6; + unsigned int default_color_pointer:26; + } ss2; + + struct { + /* 00 */ unsigned int r_wrap_mode:3; + /* 03 */ unsigned int t_wrap_mode:3; + /* 06 */ unsigned int s_wrap_mode:3; + /* 09 */ unsigned int pad:1; + /* 10 */ unsigned int non_normalized_coord:1; + /* 11 */ unsigned int trilinear_quality:2; + /* 13 */ unsigned int address_round:6; + /* 19 */ unsigned int max_aniso:3; + /* 22 */ unsigned int pad0:2; + /* 24 */ unsigned int non_separable_filter:8; + } ss3; +}; + +/* Surface state DW0 */ +#define SURFACE_RC_READ_WRITE (1 << 8) +#define SURFACE_TILED (1 << 13) +#define SURFACE_TILED_Y (1 << 12) +#define SURFACE_FORMAT_SHIFT 18 +#define SURFACE_VALIGN_1 (0 << 16) /* reserved! */ +#define SURFACE_VALIGN_4 (1 << 16) +#define SURFACE_VALIGN_8 (2 << 16) +#define SURFACE_VALIGN_16 (3 << 16) +#define SURFACE_HALIGN_1 (0 << 14) /* reserved! */ +#define SURFACE_HALIGN_4 (1 << 14) +#define SURFACE_HALIGN_8 (2 << 14) +#define SURFACE_HALIGN_16 (3 << 14) +#define SURFACE_TYPE_SHIFT 29 + +/* Surface state DW2 */ +#define SURFACE_HEIGHT_SHIFT 16 +#define SURFACE_WIDTH_SHIFT 0 + +/* Surface state DW3 */ +#define SURFACE_DEPTH_SHIFT 21 +#define SURFACE_PITCH_SHIFT 0 + +#define SWIZZLE_ZERO 0 +#define SWIZZLE_ONE 1 +#define SWIZZLE_RED 4 +#define SWIZZLE_GREEN 5 +#define SWIZZLE_BLUE 6 +#define SWIZZLE_ALPHA 7 +#define __SURFACE_SWIZZLE(r,g,b,a) \ + ((a) << 16 | (b) << 19 | (g) << 22 | (r) << 25) +#define SURFACE_SWIZZLE(r,g,b,a) \ + __SURFACE_SWIZZLE(SWIZZLE_##r, SWIZZLE_##g, SWIZZLE_##b, SWIZZLE_##a) + +typedef enum { + SAMPLER_FILTER_NEAREST = 0, + SAMPLER_FILTER_BILINEAR, + FILTER_COUNT +} sampler_filter_t; + +typedef enum { + SAMPLER_EXTEND_NONE = 0, + SAMPLER_EXTEND_REPEAT, + SAMPLER_EXTEND_PAD, + SAMPLER_EXTEND_REFLECT, + EXTEND_COUNT +} sampler_extend_t; + +#endif diff --git a/src/sna/kgem.c b/src/sna/kgem.c index 78ed5407..f0d171ac 100644 --- a/src/sna/kgem.c +++ b/src/sna/kgem.c @@ -84,6 +84,10 @@ search_snoop_cache(struct kgem *kgem, unsigned int num_pages, unsigned flags); #define DBG_NO_HANDLE_LUT 0 #define DBG_NO_WT 0 #define DBG_NO_WC_MMAP 0 +#define DBG_NO_BLT_Y 0 +#define DBG_NO_SCANOUT_Y 0 +#define DBG_NO_DIRTYFB 0 +#define DBG_NO_DETILING 0 #define DBG_DUMP 0 #define DBG_NO_MALLOC_CACHE 0 @@ -96,11 +100,6 @@ search_snoop_cache(struct kgem *kgem, unsigned int num_pages, unsigned flags); #define SHOW_BATCH_BEFORE 0 #define SHOW_BATCH_AFTER 0 -#if !USE_WC_MMAP -#undef DBG_NO_WC_MMAP -#define DBG_NO_WC_MMAP 1 -#endif - #if 0 #define ASSERT_IDLE(kgem__, handle__) assert(!__kgem_busy(kgem__, handle__)) #define ASSERT_MAYBE_IDLE(kgem__, handle__, expect__) assert(!(expect__) || !__kgem_busy(kgem__, handle__)) @@ -187,6 +186,15 @@ struct local_i915_gem_caching { #define LOCAL_IOCTL_I915_GEM_SET_CACHING DRM_IOW(DRM_COMMAND_BASE + LOCAL_I915_GEM_SET_CACHING, struct local_i915_gem_caching) #define LOCAL_IOCTL_I915_GEM_GET_CACHING DRM_IOW(DRM_COMMAND_BASE + LOCAL_I915_GEM_GET_CACHING, struct local_i915_gem_caching) +struct local_i915_gem_mmap { + uint32_t handle; + uint32_t pad; + uint64_t offset; + uint64_t size; + uint64_t addr_ptr; +}; +#define LOCAL_IOCTL_I915_GEM_MMAP DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_MMAP, struct local_i915_gem_mmap) + struct local_i915_gem_mmap2 { uint32_t handle; uint32_t pad; @@ -216,6 +224,12 @@ static struct kgem_bo *__kgem_freed_bo; static struct kgem_request *__kgem_freed_request; static struct drm_i915_gem_exec_object2 _kgem_dummy_exec; +static inline struct sna *__to_sna(struct kgem *kgem) +{ + /* minor layering violations */ + return container_of(kgem, struct sna, kgem); +} + static inline int bytes(struct kgem_bo *bo) { return __kgem_bo_size(bo); @@ -224,25 +238,31 @@ static inline int bytes(struct kgem_bo *bo) #define bucket(B) (B)->size.pages.bucket #define num_pages(B) (B)->size.pages.count -static int do_ioctl(int fd, unsigned long req, void *arg) +static int __do_ioctl(int fd, unsigned long req, void *arg) { - int err; - -restart: - if (ioctl(fd, req, arg) == 0) - return 0; + do { + int err; - err = errno; + switch ((err = errno)) { + case EAGAIN: + sched_yield(); + case EINTR: + break; + default: + return -err; + } - if (err == EINTR) - goto restart; + if (likely(ioctl(fd, req, arg) == 0)) + return 0; + } while (1); +} - if (err == EAGAIN) { - sched_yield(); - goto restart; - } +inline static int do_ioctl(int fd, unsigned long req, void *arg) +{ + if (likely(ioctl(fd, req, arg) == 0)) + return 0; - return -err; + return __do_ioctl(fd, req, arg); } #ifdef DEBUG_MEMORY @@ -266,6 +286,9 @@ static void assert_tiling(struct kgem *kgem, struct kgem_bo *bo) assert(bo); + if (!kgem->can_fence && kgem->gen >= 040 && bo->tiling) + return; /* lies */ + VG_CLEAR(tiling); tiling.handle = bo->handle; tiling.tiling_mode = bo->tiling; @@ -273,7 +296,7 @@ static void assert_tiling(struct kgem *kgem, struct kgem_bo *bo) assert(tiling.tiling_mode == bo->tiling); } -static void assert_cacheing(struct kgem *kgem, struct kgem_bo *bo) +static void assert_caching(struct kgem *kgem, struct kgem_bo *bo) { struct local_i915_gem_caching arg; int expect = kgem->has_llc ? SNOOPED : UNCACHED; @@ -294,24 +317,117 @@ static void assert_bo_retired(struct kgem_bo *bo) assert(bo->refcnt); assert(bo->rq == NULL); assert(bo->exec == NULL); + assert(!bo->needs_flush); assert(list_is_empty(&bo->request)); } #else #define assert_tiling(kgem, bo) -#define assert_cacheing(kgem, bo) +#define assert_caching(kgem, bo) #define assert_bo_retired(bo) #endif +static int __find_debugfs(struct kgem *kgem) +{ + int i; + + for (i = 0; i < DRM_MAX_MINOR; i++) { + char path[80]; + + sprintf(path, "/sys/kernel/debug/dri/%d/i915_wedged", i); + if (access(path, R_OK) == 0) + return i; + + sprintf(path, "/debug/dri/%d/i915_wedged", i); + if (access(path, R_OK) == 0) + return i; + } + + return -1; +} + +static int kgem_get_minor(struct kgem *kgem) +{ + struct stat st; + + if (fstat(kgem->fd, &st)) + return __find_debugfs(kgem); + + if (!S_ISCHR(st.st_mode)) + return __find_debugfs(kgem); + + return st.st_rdev & 0x63; +} + +static bool find_hang_state(struct kgem *kgem, char *path, int maxlen) +{ + int minor = kgem_get_minor(kgem); + + /* Search for our hang state in a few canonical locations. + * In the unlikely event of having multiple devices, we + * will need to check which minor actually corresponds to ours. + */ + + snprintf(path, maxlen, "/sys/class/drm/card%d/error", minor); + if (access(path, R_OK) == 0) + return true; + + snprintf(path, maxlen, "/sys/kernel/debug/dri/%d/i915_error_state", minor); + if (access(path, R_OK) == 0) + return true; + + snprintf(path, maxlen, "/debug/dri/%d/i915_error_state", minor); + if (access(path, R_OK) == 0) + return true; + + path[0] = '\0'; + return false; +} + +static bool has_error_state(struct kgem *kgem, char *path) +{ + bool ret = false; + char no; + int fd; + + fd = open(path, O_RDONLY); + if (fd >= 0) { + ret = read(fd, &no, 1) == 1 && no != 'N'; + close(fd); + } + + return ret; +} + +static int kgem_get_screen_index(struct kgem *kgem) +{ + return __to_sna(kgem)->scrn->scrnIndex; +} + static void __kgem_set_wedged(struct kgem *kgem) { + static int once; + char path[256]; + + if (kgem->wedged) + return; + + if (!once && + find_hang_state(kgem, path, sizeof(path)) && + has_error_state(kgem, path)) { + xf86DrvMsg(kgem_get_screen_index(kgem), X_ERROR, + "When reporting this, please include %s and the full dmesg.\n", + path); + once = 1; + } + kgem->wedged = true; - sna_render_mark_wedged(container_of(kgem, struct sna, kgem)); + sna_render_mark_wedged(__to_sna(kgem)); } static void kgem_sna_reset(struct kgem *kgem) { - struct sna *sna = container_of(kgem, struct sna, kgem); + struct sna *sna = __to_sna(kgem); sna->render.reset(sna); sna->blt_state.fill_bo = 0; @@ -319,7 +435,7 @@ static void kgem_sna_reset(struct kgem *kgem) static void kgem_sna_flush(struct kgem *kgem) { - struct sna *sna = container_of(kgem, struct sna, kgem); + struct sna *sna = __to_sna(kgem); sna->render.flush(sna); @@ -327,22 +443,53 @@ static void kgem_sna_flush(struct kgem *kgem) sna_render_flush_solid(sna); } -static bool gem_set_tiling(int fd, uint32_t handle, int tiling, int stride) +static bool kgem_bo_rmfb(struct kgem *kgem, struct kgem_bo *bo) +{ + if (bo->scanout && bo->delta) { + DBG(("%s: releasing fb=%d for handle=%d\n", + __FUNCTION__, bo->delta, bo->handle)); + /* XXX will leak if we are not DRM_MASTER. *shrug* */ + do_ioctl(kgem->fd, DRM_IOCTL_MODE_RMFB, &bo->delta); + bo->delta = 0; + return true; + } else + return false; +} + +static bool kgem_set_tiling(struct kgem *kgem, struct kgem_bo *bo, + int tiling, int stride) { struct drm_i915_gem_set_tiling set_tiling; int err; + if (tiling == bo->tiling) { + if (tiling == I915_TILING_NONE) { + bo->pitch = stride; + return true; + } + if (stride == bo->pitch) + return true; + } + if (DBG_NO_TILING) return false; VG_CLEAR(set_tiling); restart: - set_tiling.handle = handle; + set_tiling.handle = bo->handle; set_tiling.tiling_mode = tiling; - set_tiling.stride = stride; + set_tiling.stride = tiling ? stride : 0; - if (ioctl(fd, DRM_IOCTL_I915_GEM_SET_TILING, &set_tiling) == 0) - return true; + if (ioctl(kgem->fd, DRM_IOCTL_I915_GEM_SET_TILING, &set_tiling) == 0) { + bo->tiling = set_tiling.tiling_mode; + bo->pitch = set_tiling.tiling_mode ? set_tiling.stride : stride; + DBG(("%s: handle=%d, tiling=%d [%d], pitch=%d [%d]: %d\n", + __FUNCTION__, bo->handle, + bo->tiling, tiling, + bo->pitch, stride, + set_tiling.tiling_mode == tiling)); + return set_tiling.tiling_mode == tiling; + } err = errno; if (err == EINTR) @@ -353,6 +500,11 @@ restart: goto restart; } + if (err == EBUSY && kgem_bo_rmfb(kgem, bo)) + goto restart; + + ERR(("%s: failed to set-tiling(tiling=%d, pitch=%d) for handle=%d: %d\n", + __FUNCTION__, tiling, stride, bo->handle, err)); return false; } @@ -437,10 +589,15 @@ static void *__kgem_bo_map__gtt(struct kgem *kgem, struct kgem_bo *bo) DBG(("%s(handle=%d, size=%d)\n", __FUNCTION__, bo->handle, bytes(bo))); + if (bo->tiling && !kgem->can_fence) + return NULL; + VG_CLEAR(gtt); retry_gtt: gtt.handle = bo->handle; if ((err = do_ioctl(kgem->fd, DRM_IOCTL_I915_GEM_MMAP_GTT, >t))) { + DBG(("%s: failed %d, throttling/cleaning caches\n", + __FUNCTION__, err)); assert(err != EINVAL); (void)__kgem_throttle_retire(kgem, 0); @@ -460,6 +617,8 @@ retry_mmap: kgem->fd, gtt.offset); if (ptr == MAP_FAILED) { err = errno; + DBG(("%s: failed %d, throttling/cleaning caches\n", + __FUNCTION__, err)); assert(err != EINVAL); if (__kgem_throttle_retire(kgem, 0)) @@ -498,6 +657,8 @@ retry_wc: wc.size = bytes(bo); wc.flags = I915_MMAP_WC; if ((err = do_ioctl(kgem->fd, LOCAL_IOCTL_I915_GEM_MMAP_v2, &wc))) { + DBG(("%s: failed %d, throttling/cleaning caches\n", + __FUNCTION__, err)); assert(err != EINVAL); if (__kgem_throttle_retire(kgem, 0)) @@ -519,16 +680,19 @@ retry_wc: static void *__kgem_bo_map__cpu(struct kgem *kgem, struct kgem_bo *bo) { - struct drm_i915_gem_mmap mmap_arg; + struct local_i915_gem_mmap arg; int err; + VG_CLEAR(arg); + arg.offset = 0; + retry: - VG_CLEAR(mmap_arg); - mmap_arg.handle = bo->handle; - mmap_arg.offset = 0; - mmap_arg.size = bytes(bo); - if ((err = do_ioctl(kgem->fd, DRM_IOCTL_I915_GEM_MMAP, &mmap_arg))) { - assert(err != EINVAL); + arg.handle = bo->handle; + arg.size = bytes(bo); + if ((err = do_ioctl(kgem->fd, LOCAL_IOCTL_I915_GEM_MMAP, &arg))) { + DBG(("%s: failed %d, throttling/cleaning caches\n", + __FUNCTION__, err)); + assert(err != -EINVAL || bo->prime); if (__kgem_throttle_retire(kgem, 0)) goto retry; @@ -536,15 +700,16 @@ retry: if (kgem_cleanup_cache(kgem)) goto retry; - ERR(("%s: failed to mmap handle=%d, %d bytes, into CPU domain: %d\n", - __FUNCTION__, bo->handle, bytes(bo), -err)); + ERR(("%s: failed to mmap handle=%d (prime? %d), %d bytes, into CPU domain: %d\n", + __FUNCTION__, bo->handle, bo->prime, bytes(bo), -err)); + bo->purged = 1; return NULL; } - VG(VALGRIND_MAKE_MEM_DEFINED(mmap_arg.addr_ptr, bytes(bo))); + VG(VALGRIND_MAKE_MEM_DEFINED(arg.addr_ptr, bytes(bo))); DBG(("%s: caching CPU vma for %d\n", __FUNCTION__, bo->handle)); - return bo->map__cpu = (void *)(uintptr_t)mmap_arg.addr_ptr; + return bo->map__cpu = (void *)(uintptr_t)arg.addr_ptr; } static int gem_write(int fd, uint32_t handle, @@ -634,16 +799,10 @@ static void kgem_bo_retire(struct kgem *kgem, struct kgem_bo *bo) assert(bo->exec == NULL); assert(list_is_empty(&bo->vma)); - if (bo->rq) { - __kgem_bo_clear_busy(bo); - kgem_retire(kgem); - assert_bo_retired(bo); - } else { - assert(bo->exec == NULL); - assert(list_is_empty(&bo->request)); - assert(!bo->needs_flush); - ASSERT_IDLE(kgem, bo->handle); - } + if (bo->rq) + __kgem_retire_requests_upto(kgem, bo); + ASSERT_IDLE(kgem, bo->handle); + assert_bo_retired(bo); } static void kgem_bo_maybe_retire(struct kgem *kgem, struct kgem_bo *bo) @@ -655,10 +814,8 @@ static void kgem_bo_maybe_retire(struct kgem *kgem, struct kgem_bo *bo) assert(list_is_empty(&bo->vma)); if (bo->rq) { - if (!__kgem_busy(kgem, bo->handle)) { - __kgem_bo_clear_busy(bo); - kgem_retire(kgem); - } + if (!__kgem_busy(kgem, bo->handle)) + __kgem_retire_requests_upto(kgem, bo); } else { assert(!bo->needs_flush); ASSERT_IDLE(kgem, bo->handle); @@ -694,6 +851,8 @@ retry: } if ((err = gem_write(kgem->fd, bo->handle, 0, length, data))) { + DBG(("%s: failed %d, throttling/cleaning caches\n", + __FUNCTION__, err)); assert(err != EINVAL); (void)__kgem_throttle_retire(kgem, 0); @@ -728,27 +887,21 @@ static uint32_t gem_create(int fd, int num_pages) return create.handle; } -static bool +static void kgem_bo_set_purgeable(struct kgem *kgem, struct kgem_bo *bo) { -#if DBG_NO_MADV - return true; -#else +#if !DBG_NO_MADV struct drm_i915_gem_madvise madv; assert(bo->exec == NULL); - assert(!bo->purged); VG_CLEAR(madv); madv.handle = bo->handle; madv.madv = I915_MADV_DONTNEED; if (do_ioctl(kgem->fd, DRM_IOCTL_I915_GEM_MADVISE, &madv) == 0) { - bo->purged = 1; - kgem->need_purge |= !madv.retained && bo->domain == DOMAIN_GPU; - return madv.retained; + bo->purged = true; + kgem->need_purge |= !madv.retained && bo->domain != DOMAIN_CPU; } - - return true; #endif } @@ -788,7 +941,7 @@ kgem_bo_clear_purgeable(struct kgem *kgem, struct kgem_bo *bo) madv.madv = I915_MADV_WILLNEED; if (do_ioctl(kgem->fd, DRM_IOCTL_I915_GEM_MADVISE, &madv) == 0) { bo->purged = !madv.retained; - kgem->need_purge |= !madv.retained && bo->domain == DOMAIN_GPU; + kgem->need_purge |= !madv.retained && bo->domain != DOMAIN_CPU; return madv.retained; } @@ -869,13 +1022,17 @@ static struct kgem_request *__kgem_request_alloc(struct kgem *kgem) { struct kgem_request *rq; - rq = __kgem_freed_request; - if (rq) { - __kgem_freed_request = *(struct kgem_request **)rq; + if (unlikely(kgem->wedged)) { + rq = &kgem->static_request; } else { - rq = malloc(sizeof(*rq)); - if (rq == NULL) - rq = &kgem->static_request; + rq = __kgem_freed_request; + if (rq) { + __kgem_freed_request = *(struct kgem_request **)rq; + } else { + rq = malloc(sizeof(*rq)); + if (rq == NULL) + rq = &kgem->static_request; + } } list_init(&rq->buffers); @@ -925,11 +1082,11 @@ total_ram_size(void) #ifdef HAVE_STRUCT_SYSINFO_TOTALRAM struct sysinfo info; if (sysinfo(&info) == 0) - return info.totalram * info.mem_unit; + return (size_t)info.totalram * info.mem_unit; #endif #ifdef _SC_PHYS_PAGES - return sysconf(_SC_PHYS_PAGES) * sysconf(_SC_PAGE_SIZE); + return (size_t)sysconf(_SC_PHYS_PAGES) * sysconf(_SC_PAGE_SIZE); #endif return 0; @@ -1150,6 +1307,10 @@ static bool test_has_wc_mmap(struct kgem *kgem) if (DBG_NO_WC_MMAP) return false; + /* XXX See https://bugs.freedesktop.org/show_bug.cgi?id=90841 */ + if (kgem->gen < 033) + return false; + if (gem_param(kgem, LOCAL_I915_PARAM_MMAP_VERSION) < 1) return false; @@ -1187,7 +1348,7 @@ static bool test_has_caching(struct kgem *kgem) static bool test_has_userptr(struct kgem *kgem) { - uint32_t handle; + struct local_i915_gem_userptr arg; void *ptr; if (DBG_NO_USERPTR) @@ -1200,11 +1361,23 @@ static bool test_has_userptr(struct kgem *kgem) if (posix_memalign(&ptr, PAGE_SIZE, PAGE_SIZE)) return false; - handle = gem_userptr(kgem->fd, ptr, PAGE_SIZE, false); - gem_close(kgem->fd, handle); - free(ptr); + VG_CLEAR(arg); + arg.user_ptr = (uintptr_t)ptr; + arg.user_size = PAGE_SIZE; + arg.flags = I915_USERPTR_UNSYNCHRONIZED; - return handle != 0; + if (DBG_NO_UNSYNCHRONIZED_USERPTR || + do_ioctl(kgem->fd, LOCAL_IOCTL_I915_GEM_USERPTR, &arg)) { + arg.flags &= ~I915_USERPTR_UNSYNCHRONIZED; + if (do_ioctl(kgem->fd, LOCAL_IOCTL_I915_GEM_USERPTR, &arg)) + arg.handle = 0; + /* Leak the userptr bo to keep the mmu_notifier alive */ + } else { + gem_close(kgem->fd, arg.handle); + free(ptr); + } + + return arg.handle != 0; } static bool test_has_create2(struct kgem *kgem) @@ -1227,67 +1400,187 @@ static bool test_has_create2(struct kgem *kgem) #endif } -static bool test_has_secure_batches(struct kgem *kgem) +static bool test_can_blt_y(struct kgem *kgem) { - if (DBG_NO_SECURE_BATCHES) + struct drm_i915_gem_exec_object2 object; + uint32_t batch[] = { +#define MI_LOAD_REGISTER_IMM (0x22<<23 | (3-2)) +#define BCS_SWCTRL 0x22200 +#define BCS_SRC_Y (1 << 0) +#define BCS_DST_Y (1 << 1) + MI_LOAD_REGISTER_IMM, + BCS_SWCTRL, + (BCS_SRC_Y | BCS_DST_Y) << 16 | (BCS_SRC_Y | BCS_DST_Y), + + MI_LOAD_REGISTER_IMM, + BCS_SWCTRL, + (BCS_SRC_Y | BCS_DST_Y) << 16, + + MI_BATCH_BUFFER_END, + 0, + }; + int ret; + + if (DBG_NO_BLT_Y) return false; - return gem_param(kgem, LOCAL_I915_PARAM_HAS_SECURE_BATCHES) > 0; + if (kgem->gen < 060) + return false; + + memset(&object, 0, sizeof(object)); + object.handle = gem_create(kgem->fd, 1); + + ret = gem_write(kgem->fd, object.handle, 0, sizeof(batch), batch); + if (ret == 0) { + struct drm_i915_gem_execbuffer2 execbuf; + + memset(&execbuf, 0, sizeof(execbuf)); + execbuf.buffers_ptr = (uintptr_t)&object; + execbuf.buffer_count = 1; + execbuf.flags = KGEM_BLT; + + ret = do_ioctl(kgem->fd, + DRM_IOCTL_I915_GEM_EXECBUFFER2, + &execbuf); + } + gem_close(kgem->fd, object.handle); + + return ret == 0; } -static bool test_has_pinned_batches(struct kgem *kgem) +static bool gem_set_tiling(int fd, uint32_t handle, int tiling, int stride) { - if (DBG_NO_PINNED_BATCHES) + struct drm_i915_gem_set_tiling set_tiling; + + if (DBG_NO_TILING) return false; - return gem_param(kgem, LOCAL_I915_PARAM_HAS_PINNED_BATCHES) > 0; + VG_CLEAR(set_tiling); + set_tiling.handle = handle; + set_tiling.tiling_mode = tiling; + set_tiling.stride = stride; + + if (ioctl(fd, DRM_IOCTL_I915_GEM_SET_TILING, &set_tiling) == 0) + return set_tiling.tiling_mode == tiling; + + return false; } -static int kgem_get_screen_index(struct kgem *kgem) +static bool test_can_scanout_y(struct kgem *kgem) { - struct sna *sna = container_of(kgem, struct sna, kgem); - return sna->scrn->scrnIndex; + struct drm_mode_fb_cmd arg; + bool ret = false; + + if (DBG_NO_SCANOUT_Y) + return false; + + VG_CLEAR(arg); + arg.width = 32; + arg.height = 32; + arg.pitch = 4*32; + arg.bpp = 32; + arg.depth = 24; + arg.handle = gem_create(kgem->fd, 1); + + if (gem_set_tiling(kgem->fd, arg.handle, I915_TILING_Y, arg.pitch)) + ret = do_ioctl(kgem->fd, DRM_IOCTL_MODE_ADDFB, &arg) == 0; + if (!ret) { + struct local_mode_fb_cmd2 { + uint32_t fb_id; + uint32_t width, height; + uint32_t pixel_format; + uint32_t flags; + + uint32_t handles[4]; + uint32_t pitches[4]; + uint32_t offsets[4]; + uint64_t modifiers[4]; + } f; +#define LOCAL_IOCTL_MODE_ADDFB2 DRM_IOWR(0xb8, struct local_mode_fb_cmd2) + memset(&f, 0, sizeof(f)); + f.width = arg.width; + f.height = arg.height; + f.handles[0] = arg.handle; + f.pitches[0] = arg.pitch; + f.modifiers[0] = (uint64_t)1 << 56 | 2; /* MOD_Y_TILED */ + f.pixel_format = 'X' | 'R' << 8 | '2' << 16 | '4' << 24; /* XRGB8888 */ + f.flags = 1 << 1; /* + modifier */ + if (drmIoctl(kgem->fd, LOCAL_IOCTL_MODE_ADDFB2, &f) == 0) { + ret = true; + arg.fb_id = f.fb_id; + } + } + do_ioctl(kgem->fd, DRM_IOCTL_MODE_RMFB, &arg.fb_id); + gem_close(kgem->fd, arg.handle); + + return ret; } -static int __find_debugfs(struct kgem *kgem) +static bool test_has_dirtyfb(struct kgem *kgem) { - int i; + struct drm_mode_fb_cmd create; + bool ret = false; - for (i = 0; i < DRM_MAX_MINOR; i++) { - char path[80]; + if (DBG_NO_DIRTYFB) + return false; - sprintf(path, "/sys/kernel/debug/dri/%d/i915_wedged", i); - if (access(path, R_OK) == 0) - return i; + VG_CLEAR(create); + create.width = 32; + create.height = 32; + create.pitch = 4*32; + create.bpp = 32; + create.depth = 32; + create.handle = gem_create(kgem->fd, 1); + if (create.handle == 0) + return false; - sprintf(path, "/debug/dri/%d/i915_wedged", i); - if (access(path, R_OK) == 0) - return i; + if (drmIoctl(kgem->fd, DRM_IOCTL_MODE_ADDFB, &create) == 0) { + struct drm_mode_fb_dirty_cmd dirty; + + memset(&dirty, 0, sizeof(dirty)); + dirty.fb_id = create.fb_id; + ret = drmIoctl(kgem->fd, + DRM_IOCTL_MODE_DIRTYFB, + &dirty) == 0; + + /* XXX There may be multiple levels of DIRTYFB, depending on + * whether the kernel thinks tracking dirty regions is + * beneficial vs flagging the whole fb as dirty. + */ + + drmIoctl(kgem->fd, + DRM_IOCTL_MODE_RMFB, + &create.fb_id); } + gem_close(kgem->fd, create.handle); - return -1; + return ret; } -static int kgem_get_minor(struct kgem *kgem) +static bool test_has_secure_batches(struct kgem *kgem) { - struct stat st; + if (DBG_NO_SECURE_BATCHES) + return false; - if (fstat(kgem->fd, &st)) - return __find_debugfs(kgem); + return gem_param(kgem, LOCAL_I915_PARAM_HAS_SECURE_BATCHES) > 0; +} - if (!S_ISCHR(st.st_mode)) - return __find_debugfs(kgem); +static bool test_has_pinned_batches(struct kgem *kgem) +{ + if (DBG_NO_PINNED_BATCHES) + return false; - return st.st_rdev & 0x63; + return gem_param(kgem, LOCAL_I915_PARAM_HAS_PINNED_BATCHES) > 0; } static bool kgem_init_pinned_batches(struct kgem *kgem) { int count[2] = { 16, 4 }; int size[2] = { 1, 4 }; + int ret = 0; int n, i; - if (kgem->wedged) + if (unlikely(kgem->wedged)) return true; for (n = 0; n < ARRAY_SIZE(count); n++) { @@ -1311,7 +1604,8 @@ static bool kgem_init_pinned_batches(struct kgem *kgem) } pin.alignment = 0; - if (do_ioctl(kgem->fd, DRM_IOCTL_I915_GEM_PIN, &pin)) { + ret = do_ioctl(kgem->fd, DRM_IOCTL_I915_GEM_PIN, &pin); + if (ret) { gem_close(kgem->fd, pin.handle); free(bo); goto err; @@ -1333,6 +1627,16 @@ err: } } + /* If we fail to pin some memory for 830gm/845g, we need to disable + * acceleration as otherwise the machine will eventually fail. However, + * the kernel started arbitrarily rejecting PIN, so hope for the best + * if the ioctl no longer works. + */ + if (ret != -ENODEV && kgem->gen == 020) + return false; + + kgem->has_pinned_batches = false; + /* For simplicity populate the lists with a single unpinned bo */ for (n = 0; n < ARRAY_SIZE(count); n++) { struct kgem_bo *bo; @@ -1340,18 +1644,18 @@ err: handle = gem_create(kgem->fd, size[n]); if (handle == 0) - break; + return false; bo = __kgem_bo_alloc(handle, size[n]); if (bo == NULL) { gem_close(kgem->fd, handle); - break; + return false; } debug_alloc__bo(kgem, bo); list_add(&bo->list, &kgem->pinned_batches[n]); } - return false; + return true; } static void kgem_init_swizzling(struct kgem *kgem) @@ -1364,7 +1668,7 @@ static void kgem_init_swizzling(struct kgem *kgem) } tiling; #define LOCAL_IOCTL_I915_GEM_GET_TILING DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GEM_GET_TILING, struct local_i915_gem_get_tiling_v2) - VG_CLEAR(tiling); + memset(&tiling, 0, sizeof(tiling)); tiling.handle = gem_create(kgem->fd, 1); if (!tiling.handle) return; @@ -1375,12 +1679,23 @@ static void kgem_init_swizzling(struct kgem *kgem) if (do_ioctl(kgem->fd, LOCAL_IOCTL_I915_GEM_GET_TILING, &tiling)) goto out; - if (kgem->gen < 50 && tiling.phys_swizzle_mode != tiling.swizzle_mode) + DBG(("%s: swizzle_mode=%d, phys_swizzle_mode=%d\n", + __FUNCTION__, tiling.swizzle_mode, tiling.phys_swizzle_mode)); + + kgem->can_fence = + !DBG_NO_TILING && + tiling.swizzle_mode != I915_BIT_6_SWIZZLE_UNKNOWN; + + if (kgem->gen < 050 && tiling.phys_swizzle_mode != tiling.swizzle_mode) goto out; - choose_memcpy_tiled_x(kgem, tiling.swizzle_mode); + if (!DBG_NO_DETILING) + choose_memcpy_tiled_x(kgem, + tiling.swizzle_mode, + __to_sna(kgem)->cpu_features); out: gem_close(kgem->fd, tiling.handle); + DBG(("%s: can fence?=%d\n", __FUNCTION__, kgem->can_fence)); } static void kgem_fixup_relocs(struct kgem *kgem, struct kgem_bo *bo, int shrink) @@ -1399,6 +1714,7 @@ static void kgem_fixup_relocs(struct kgem *kgem, struct kgem_bo *bo, int shrink) bo->handle, (long long)bo->presumed_offset)); for (n = 0; n < kgem->nreloc__self; n++) { int i = kgem->reloc__self[n]; + uint64_t addr; assert(kgem->reloc[i].target_handle == ~0U); kgem->reloc[i].target_handle = bo->target_handle; @@ -1412,13 +1728,17 @@ static void kgem_fixup_relocs(struct kgem *kgem, struct kgem_bo *bo, int shrink) kgem->reloc[i].delta -= shrink; } - kgem->batch[kgem->reloc[i].offset/sizeof(uint32_t)] = - kgem->reloc[i].delta + bo->presumed_offset; + addr = (int)kgem->reloc[i].delta + bo->presumed_offset; + kgem->batch[kgem->reloc[i].offset/sizeof(uint32_t)] = addr; + if (kgem->gen >= 0100) + kgem->batch[kgem->reloc[i].offset/sizeof(uint32_t) + 1] = addr >> 32; } if (n == 256) { for (n = kgem->reloc__self[255]; n < kgem->nreloc; n++) { if (kgem->reloc[n].target_handle == ~0U) { + uint64_t addr; + kgem->reloc[n].target_handle = bo->target_handle; kgem->reloc[n].presumed_offset = bo->presumed_offset; @@ -1429,8 +1749,11 @@ static void kgem_fixup_relocs(struct kgem *kgem, struct kgem_bo *bo, int shrink) kgem->reloc[n].delta - shrink)); kgem->reloc[n].delta -= shrink; } - kgem->batch[kgem->reloc[n].offset/sizeof(uint32_t)] = - kgem->reloc[n].delta + bo->presumed_offset; + + addr = (int)kgem->reloc[n].delta + bo->presumed_offset; + kgem->batch[kgem->reloc[n].offset/sizeof(uint32_t)] = addr; + if (kgem->gen >= 0100) + kgem->batch[kgem->reloc[n].offset/sizeof(uint32_t) + 1] = addr >> 32; } } } @@ -1444,6 +1767,44 @@ static void kgem_fixup_relocs(struct kgem *kgem, struct kgem_bo *bo, int shrink) } } +static int kgem_bo_wait(struct kgem *kgem, struct kgem_bo *bo) +{ + struct local_i915_gem_wait { + uint32_t handle; + uint32_t flags; + int64_t timeout; + } wait; +#define LOCAL_I915_GEM_WAIT 0x2c +#define LOCAL_IOCTL_I915_GEM_WAIT DRM_IOWR(DRM_COMMAND_BASE + LOCAL_I915_GEM_WAIT, struct local_i915_gem_wait) + int ret; + + DBG(("%s: waiting for handle=%d\n", __FUNCTION__, bo->handle)); + if (bo->rq == NULL) + return 0; + + VG_CLEAR(wait); + wait.handle = bo->handle; + wait.flags = 0; + wait.timeout = -1; + ret = do_ioctl(kgem->fd, LOCAL_IOCTL_I915_GEM_WAIT, &wait); + if (ret) { + struct drm_i915_gem_set_domain set_domain; + + VG_CLEAR(set_domain); + set_domain.handle = bo->handle; + set_domain.read_domains = I915_GEM_DOMAIN_GTT; + set_domain.write_domain = I915_GEM_DOMAIN_GTT; + ret = do_ioctl(kgem->fd, + DRM_IOCTL_I915_GEM_SET_DOMAIN, + &set_domain); + } + + if (ret == 0) + __kgem_retire_requests_upto(kgem, bo); + + return ret; +} + static struct kgem_bo *kgem_new_batch(struct kgem *kgem) { struct kgem_bo *last; @@ -1464,20 +1825,41 @@ static struct kgem_bo *kgem_new_batch(struct kgem *kgem) if (!kgem->has_llc) flags |= CREATE_UNCACHED; +restart: kgem->batch_bo = kgem_create_linear(kgem, sizeof(uint32_t)*kgem->batch_size, flags); if (kgem->batch_bo) kgem->batch = kgem_bo_map__cpu(kgem, kgem->batch_bo); if (kgem->batch == NULL) { - DBG(("%s: unable to map batch bo, mallocing(size=%d)\n", - __FUNCTION__, - sizeof(uint32_t)*kgem->batch_size)); + int ring = kgem->ring == KGEM_BLT; + assert(ring < ARRAY_SIZE(kgem->requests)); + if (kgem->batch_bo) { kgem_bo_destroy(kgem, kgem->batch_bo); kgem->batch_bo = NULL; } + if (!list_is_empty(&kgem->requests[ring])) { + struct kgem_request *rq; + + rq = list_first_entry(&kgem->requests[ring], + struct kgem_request, list); + assert(rq->ring == ring); + assert(rq->bo); + assert(RQ(rq->bo->rq) == rq); + if (kgem_bo_wait(kgem, rq->bo) == 0) + goto restart; + } + + if (flags & CREATE_NO_THROTTLE) { + flags &= ~CREATE_NO_THROTTLE; + if (kgem_cleanup_cache(kgem)) + goto restart; + } + + DBG(("%s: unable to map batch bo, mallocing(size=%d)\n", + __FUNCTION__, sizeof(uint32_t)*kgem->batch_size)); if (posix_memalign((void **)&kgem->batch, PAGE_SIZE, ALIGN(sizeof(uint32_t) * kgem->batch_size, PAGE_SIZE))) { ERR(("%s: batch allocation failed, disabling acceleration\n", __FUNCTION__)); @@ -1495,18 +1877,79 @@ static struct kgem_bo *kgem_new_batch(struct kgem *kgem) return last; } -void kgem_init(struct kgem *kgem, int fd, struct pci_device *dev, unsigned gen) +static void +no_retire(struct kgem *kgem) +{ + (void)kgem; +} + +static void +no_expire(struct kgem *kgem) +{ + (void)kgem; +} + +static void +no_context_switch(struct kgem *kgem, int new_mode) +{ + (void)kgem; + (void)new_mode; +} + +static uint64_t get_gtt_size(int fd) { struct drm_i915_gem_get_aperture aperture; + struct local_i915_gem_context_param { + uint32_t context; + uint32_t size; + uint64_t param; +#define LOCAL_CONTEXT_PARAM_BAN_PERIOD 0x1 +#define LOCAL_CONTEXT_PARAM_NO_ZEROMAP 0x2 +#define LOCAL_CONTEXT_PARAM_GTT_SIZE 0x3 + uint64_t value; + } p; +#define LOCAL_I915_GEM_CONTEXT_GETPARAM 0x34 +#define LOCAL_IOCTL_I915_GEM_CONTEXT_GETPARAM DRM_IOWR (DRM_COMMAND_BASE + LOCAL_I915_GEM_CONTEXT_GETPARAM, struct local_i915_gem_context_param) + + memset(&aperture, 0, sizeof(aperture)); + + memset(&p, 0, sizeof(p)); + p.param = LOCAL_CONTEXT_PARAM_GTT_SIZE; + if (drmIoctl(fd, LOCAL_IOCTL_I915_GEM_CONTEXT_GETPARAM, &p) == 0) + aperture.aper_size = p.value; + if (aperture.aper_size == 0) + (void)drmIoctl(fd, DRM_IOCTL_I915_GEM_GET_APERTURE, &aperture); + if (aperture.aper_size == 0) + aperture.aper_size = 64*1024*1024; + + DBG(("%s: aperture size %lld, available now %lld\n", + __FUNCTION__, + (long long)aperture.aper_size, + (long long)aperture.aper_available_size)); + + /* clamp aperture to uint32_t for simplicity */ + if (aperture.aper_size > 0xc0000000) + aperture.aper_size = 0xc0000000; + + return aperture.aper_size; +} + +void kgem_init(struct kgem *kgem, int fd, struct pci_device *dev, unsigned gen) +{ size_t totalram; unsigned half_gpu_max; unsigned int i, j; + uint64_t gtt_size; DBG(("%s: fd=%d, gen=%d\n", __FUNCTION__, fd, gen)); kgem->fd = fd; kgem->gen = gen; + kgem->retire = no_retire; + kgem->expire = no_expire; + kgem->context_switch = no_context_switch; + list_init(&kgem->requests[0]); list_init(&kgem->requests[1]); list_init(&kgem->batch_buffers); @@ -1586,10 +2029,21 @@ void kgem_init(struct kgem *kgem, int fd, struct pci_device *dev, unsigned gen) DBG(("%s: can blt to cpu? %d\n", __FUNCTION__, kgem->can_blt_cpu)); + kgem->can_blt_y = test_can_blt_y(kgem); + DBG(("%s: can blit to Y-tiled surfaces? %d\n", __FUNCTION__, + kgem->can_blt_y)); + kgem->can_render_y = gen != 021 && (gen >> 3) != 4; DBG(("%s: can render to Y-tiled surfaces? %d\n", __FUNCTION__, kgem->can_render_y)); + kgem->can_scanout_y = test_can_scanout_y(kgem); + DBG(("%s: can scanout Y-tiled surfaces? %d\n", __FUNCTION__, + kgem->can_scanout_y)); + + kgem->has_dirtyfb = test_has_dirtyfb(kgem); + DBG(("%s: has dirty fb? %d\n", __FUNCTION__, kgem->has_dirtyfb)); + kgem->has_secure_batches = test_has_secure_batches(kgem); DBG(("%s: can use privileged batchbuffers? %d\n", __FUNCTION__, kgem->has_secure_batches)); @@ -1620,7 +2074,7 @@ void kgem_init(struct kgem *kgem, int fd, struct pci_device *dev, unsigned gen) if (!kgem->has_relaxed_delta && kgem->batch_size > 4*1024) kgem->batch_size = 4*1024; - if (!kgem_init_pinned_batches(kgem) && gen == 020) { + if (!kgem_init_pinned_batches(kgem)) { xf86DrvMsg(kgem_get_screen_index(kgem), X_WARNING, "Unable to reserve memory for GPU, disabling acceleration.\n"); __kgem_set_wedged(kgem); @@ -1640,35 +2094,24 @@ void kgem_init(struct kgem *kgem, int fd, struct pci_device *dev, unsigned gen) !DBG_NO_CPU && (kgem->has_llc | kgem->has_userptr | kgem->has_caching), kgem->has_llc, kgem->has_caching, kgem->has_userptr)); - VG_CLEAR(aperture); - aperture.aper_size = 0; - (void)do_ioctl(fd, DRM_IOCTL_I915_GEM_GET_APERTURE, &aperture); - if (aperture.aper_size == 0) - aperture.aper_size = 64*1024*1024; - - DBG(("%s: aperture size %lld, available now %lld\n", - __FUNCTION__, - (long long)aperture.aper_size, - (long long)aperture.aper_available_size)); - - kgem->aperture_total = aperture.aper_size; - kgem->aperture_high = aperture.aper_size * 3/4; - kgem->aperture_low = aperture.aper_size * 1/3; + gtt_size = get_gtt_size(fd); + kgem->aperture_total = gtt_size; + kgem->aperture_high = gtt_size * 3/4; + kgem->aperture_low = gtt_size * 1/3; if (gen < 033) { /* Severe alignment penalties */ kgem->aperture_high /= 2; kgem->aperture_low /= 2; } - DBG(("%s: aperture low=%d [%d], high=%d [%d]\n", __FUNCTION__, + DBG(("%s: aperture low=%u [%u], high=%u [%u]\n", __FUNCTION__, kgem->aperture_low, kgem->aperture_low / (1024*1024), kgem->aperture_high, kgem->aperture_high / (1024*1024))); kgem->aperture_mappable = 256 * 1024 * 1024; if (dev != NULL) kgem->aperture_mappable = agp_aperture_size(dev, gen); - if (kgem->aperture_mappable == 0 || - kgem->aperture_mappable > aperture.aper_size) - kgem->aperture_mappable = aperture.aper_size; + if (kgem->aperture_mappable == 0 || kgem->aperture_mappable > gtt_size) + kgem->aperture_mappable = gtt_size; DBG(("%s: aperture mappable=%d [%d MiB]\n", __FUNCTION__, kgem->aperture_mappable, kgem->aperture_mappable / (1024*1024))); @@ -1697,7 +2140,7 @@ void kgem_init(struct kgem *kgem, int fd, struct pci_device *dev, unsigned gen) __FUNCTION__)); totalram = kgem->aperture_total; } - DBG(("%s: total ram=%ld\n", __FUNCTION__, (long)totalram)); + DBG(("%s: total ram=%lld\n", __FUNCTION__, (long long)totalram)); if (kgem->max_object_size > totalram / 2) kgem->max_object_size = totalram / 2; if (kgem->max_gpu_size > totalram / 4) @@ -1749,11 +2192,11 @@ void kgem_init(struct kgem *kgem, int fd, struct pci_device *dev, unsigned gen) if (DBG_NO_CPU) kgem->max_cpu_size = 0; - DBG(("%s: maximum object size=%d\n", + DBG(("%s: maximum object size=%u\n", __FUNCTION__, kgem->max_object_size)); - DBG(("%s: large object thresold=%d\n", + DBG(("%s: large object thresold=%u\n", __FUNCTION__, kgem->large_object_size)); - DBG(("%s: max object sizes (gpu=%d, cpu=%d, tile upload=%d, copy=%d)\n", + DBG(("%s: max object sizes (gpu=%u, cpu=%u, tile upload=%u, copy=%u)\n", __FUNCTION__, kgem->max_gpu_size, kgem->max_cpu_size, kgem->max_upload_tile_size, kgem->max_copy_tile_size)); @@ -2043,8 +2486,34 @@ static void kgem_add_bo(struct kgem *kgem, struct kgem_bo *bo) kgem->flush |= bo->flush; } +static void kgem_clear_swctrl(struct kgem *kgem) +{ + uint32_t *b; + + if (kgem->bcs_state == 0) + return; + + DBG(("%s: clearin SWCTRL LRI from %x\n", + __FUNCTION__, kgem->bcs_state)); + + b = kgem->batch + kgem->nbatch; + kgem->nbatch += 7; + + *b++ = MI_FLUSH_DW; + *b++ = 0; + *b++ = 0; + *b++ = 0; + + *b++ = MI_LOAD_REGISTER_IMM; + *b++ = BCS_SWCTRL; + *b++ = (BCS_SRC_Y | BCS_DST_Y) << 16; + + kgem->bcs_state = 0; +} + static uint32_t kgem_end_batch(struct kgem *kgem) { + kgem_clear_swctrl(kgem); kgem->batch[kgem->nbatch++] = MI_BATCH_BUFFER_END; if (kgem->nbatch & 1) kgem->batch[kgem->nbatch++] = MI_NOOP; @@ -2064,17 +2533,6 @@ static void kgem_bo_binding_free(struct kgem *kgem, struct kgem_bo *bo) } } -static void kgem_bo_rmfb(struct kgem *kgem, struct kgem_bo *bo) -{ - if (bo->scanout && bo->delta) { - DBG(("%s: releasing fb=%d for handle=%d\n", - __FUNCTION__, bo->delta, bo->handle)); - /* XXX will leak if we are not DRM_MASTER. *shrug* */ - do_ioctl(kgem->fd, DRM_IOCTL_MODE_RMFB, &bo->delta); - bo->delta = 0; - } -} - static void kgem_bo_free(struct kgem *kgem, struct kgem_bo *bo) { DBG(("%s: handle=%d, size=%d\n", __FUNCTION__, bo->handle, bytes(bo))); @@ -2150,13 +2608,16 @@ inline static void kgem_bo_move_to_inactive(struct kgem *kgem, assert(!bo->snoop); assert(!bo->flush); assert(!bo->needs_flush); + assert(!bo->delta); assert(list_is_empty(&bo->vma)); assert_tiling(kgem, bo); - assert_cacheing(kgem, bo); + assert_caching(kgem, bo); ASSERT_IDLE(kgem, bo->handle); if (bucket(bo) >= NUM_CACHE_BUCKETS) { if (bo->map__gtt) { + DBG(("%s: relinquishing large GTT mapping for handle=%d\n", + __FUNCTION__, bo->handle)); munmap(bo->map__gtt, bytes(bo)); bo->map__gtt = NULL; } @@ -2167,6 +2628,8 @@ inline static void kgem_bo_move_to_inactive(struct kgem *kgem, assert(list_is_empty(&bo->vma)); list_move(&bo->list, &kgem->inactive[bucket(bo)]); if (bo->map__gtt && !kgem_bo_can_map(kgem, bo)) { + DBG(("%s: relinquishing old GTT mapping for handle=%d\n", + __FUNCTION__, bo->handle)); munmap(bo->map__gtt, bytes(bo)); bo->map__gtt = NULL; } @@ -2191,6 +2654,10 @@ static struct kgem_bo *kgem_bo_replace_io(struct kgem_bo *bo) return bo; assert(!bo->snoop); + assert(!bo->purged); + assert(!bo->scanout); + assert(!bo->delta); + if (__kgem_freed_bo) { base = __kgem_freed_bo; __kgem_freed_bo = *(struct kgem_bo **)base; @@ -2221,6 +2688,7 @@ inline static void kgem_bo_remove_from_inactive(struct kgem *kgem, list_del(&bo->list); assert(bo->rq == NULL); assert(bo->exec == NULL); + assert(!bo->purged); if (!list_is_empty(&bo->vma)) { assert(bo->map__gtt || bo->map__wc || bo->map__cpu); list_del(&bo->vma); @@ -2305,7 +2773,6 @@ static void kgem_bo_move_to_scanout(struct kgem *kgem, struct kgem_bo *bo) list_move(&bo->list, &kgem->scanout); kgem->need_expire = true; - } static void kgem_bo_move_to_snoop(struct kgem *kgem, struct kgem_bo *bo) @@ -2316,6 +2783,8 @@ static void kgem_bo_move_to_snoop(struct kgem *kgem, struct kgem_bo *bo) assert(!bo->needs_flush); assert(bo->refcnt == 0); assert(bo->exec == NULL); + assert(!bo->purged); + assert(!bo->delta); if (DBG_NO_SNOOP_CACHE) { kgem_bo_free(kgem, bo); @@ -2351,8 +2820,7 @@ static bool kgem_bo_move_to_cache(struct kgem *kgem, struct kgem_bo *bo) kgem_bo_move_to_snoop(kgem, bo); } else if (bo->scanout) { kgem_bo_move_to_scanout(kgem, bo); - } else if ((bo = kgem_bo_replace_io(bo))->reusable && - kgem_bo_set_purgeable(kgem, bo)) { + } else if ((bo = kgem_bo_replace_io(bo))->reusable) { kgem_bo_move_to_inactive(kgem, bo); retired = true; } else @@ -2429,7 +2897,7 @@ void kgem_bo_undo(struct kgem *kgem, struct kgem_bo *bo) DBG(("%s: only handle in batch, discarding last operations for handle=%d\n", __FUNCTION__, bo->handle)); - assert(bo->exec == &kgem->exec[0]); + assert(bo->exec == &_kgem_dummy_exec || bo->exec == &kgem->exec[0]); assert(kgem->exec[0].handle == bo->handle); assert(RQ(bo->rq) == kgem->next_request); @@ -2457,16 +2925,23 @@ void kgem_bo_pair_undo(struct kgem *kgem, struct kgem_bo *a, struct kgem_bo *b) if (a == NULL || b == NULL) return; + assert(a != b); if (a->exec == NULL || b->exec == NULL) return; - DBG(("%s: only handles in batch, discarding last operations for handle=%d and handle=%d\n", - __FUNCTION__, a->handle, b->handle)); + DBG(("%s: only handles in batch, discarding last operations for handle=%d (index=%d) and handle=%d (index=%d)\n", + __FUNCTION__, + a->handle, a->proxy ? -1 : a->exec - kgem->exec, + b->handle, b->proxy ? -1 : b->exec - kgem->exec)); - assert(a->exec == &kgem->exec[0] || a->exec == &kgem->exec[1]); + assert(a->exec == &_kgem_dummy_exec || + a->exec == &kgem->exec[0] || + a->exec == &kgem->exec[1]); assert(a->handle == kgem->exec[0].handle || a->handle == kgem->exec[1].handle); assert(RQ(a->rq) == kgem->next_request); - assert(b->exec == &kgem->exec[0] || b->exec == &kgem->exec[1]); + assert(b->exec == &_kgem_dummy_exec || + b->exec == &kgem->exec[0] || + b->exec == &kgem->exec[1]); assert(b->handle == kgem->exec[0].handle || b->handle == kgem->exec[1].handle); assert(RQ(b->rq) == kgem->next_request); @@ -2487,6 +2962,7 @@ static void __kgem_bo_destroy(struct kgem *kgem, struct kgem_bo *bo) DBG(("%s: handle=%d, size=%d\n", __FUNCTION__, bo->handle, bytes(bo))); assert(list_is_empty(&bo->list)); + assert(list_is_empty(&bo->vma)); assert(bo->refcnt == 0); assert(bo->proxy == NULL); assert(bo->active_scanout == 0); @@ -2532,7 +3008,7 @@ static void __kgem_bo_destroy(struct kgem *kgem, struct kgem_bo *bo) assert(bo->snoop == false); assert(bo->io == false); assert(bo->scanout == false); - assert_cacheing(kgem, bo); + assert_caching(kgem, bo); kgem_bo_undo(kgem, bo); assert(bo->refcnt == 0); @@ -2556,9 +3032,6 @@ static void __kgem_bo_destroy(struct kgem *kgem, struct kgem_bo *bo) assert(list_is_empty(&bo->request)); if (bo->map__cpu == NULL || bucket(bo) >= NUM_CACHE_BUCKETS) { - if (!kgem_bo_set_purgeable(kgem, bo)) - goto destroy; - if (!kgem->has_llc && bo->domain == DOMAIN_CPU) goto destroy; @@ -2647,7 +3120,7 @@ static bool kgem_retire__flushing(struct kgem *kgem) int count = 0; list_for_each_entry(bo, &kgem->flushing, request) count++; - DBG(("%s: %d bo on flushing list\n", __FUNCTION__, count)); + DBG(("%s: %d bo on flushing list, retired? %d\n", __FUNCTION__, count, retired)); } #endif @@ -2656,6 +3129,34 @@ static bool kgem_retire__flushing(struct kgem *kgem) return retired; } +static bool __kgem_bo_flush(struct kgem *kgem, struct kgem_bo *bo) +{ + struct drm_i915_gem_busy busy; + + if (!bo->needs_flush) + return false; + + bo->needs_flush = false; + + VG_CLEAR(busy); + busy.handle = bo->handle; + busy.busy = !kgem->wedged; + (void)do_ioctl(kgem->fd, DRM_IOCTL_I915_GEM_BUSY, &busy); + DBG(("%s: handle=%d, busy=%d, wedged=%d\n", + __FUNCTION__, bo->handle, busy.busy, kgem->wedged)); + + if (busy.busy == 0) + return false; + + DBG(("%s: moving %d to flushing\n", + __FUNCTION__, bo->handle)); + list_add(&bo->request, &kgem->flushing); + bo->rq = MAKE_REQUEST(kgem, !!(busy.busy & ~0x1ffff)); + bo->needs_flush = busy.busy & 0xffff; + kgem->need_retire = true; + return true; +} + static bool __kgem_retire_rq(struct kgem *kgem, struct kgem_request *rq) { bool retired = false; @@ -2663,6 +3164,8 @@ static bool __kgem_retire_rq(struct kgem *kgem, struct kgem_request *rq) DBG(("%s: request %d complete\n", __FUNCTION__, rq->bo->handle)); assert(RQ(rq->bo->rq) == rq); + assert(rq != (struct kgem_request *)kgem); + assert(rq != &kgem->static_request); if (rq == kgem->fence[rq->ring]) kgem->fence[rq->ring] = NULL; @@ -2680,19 +3183,14 @@ static bool __kgem_retire_rq(struct kgem *kgem, struct kgem_request *rq) list_del(&bo->request); - if (bo->needs_flush) - bo->needs_flush = __kgem_busy(kgem, bo->handle); - if (bo->needs_flush) { - DBG(("%s: moving %d to flushing\n", + if (unlikely(__kgem_bo_flush(kgem, bo))) { + assert(bo != rq->bo); + DBG(("%s: movied %d to flushing\n", __FUNCTION__, bo->handle)); - list_add(&bo->request, &kgem->flushing); - bo->rq = MAKE_REQUEST(kgem, RQ_RING(bo->rq)); - kgem->need_retire = true; continue; } bo->domain = DOMAIN_NONE; - bo->gtt_dirty = false; bo->rq = NULL; if (bo->refcnt) continue; @@ -2706,14 +3204,8 @@ static bool __kgem_retire_rq(struct kgem *kgem, struct kgem_request *rq) assert(rq->bo->refcnt > 0); if (--rq->bo->refcnt == 0) { - if (kgem_bo_set_purgeable(kgem, rq->bo)) { - kgem_bo_move_to_inactive(kgem, rq->bo); - retired = true; - } else { - DBG(("%s: closing %d\n", - __FUNCTION__, rq->bo->handle)); - kgem_bo_free(kgem, rq->bo); - } + kgem_bo_move_to_inactive(kgem, rq->bo); + retired = true; } __kgem_request_free(rq); @@ -2724,13 +3216,18 @@ static bool kgem_retire__requests_ring(struct kgem *kgem, int ring) { bool retired = false; + assert(ring < ARRAY_SIZE(kgem->requests)); while (!list_is_empty(&kgem->requests[ring])) { struct kgem_request *rq; + DBG(("%s: retiring ring %d\n", __FUNCTION__, ring)); + rq = list_first_entry(&kgem->requests[ring], struct kgem_request, list); assert(rq->ring == ring); + assert(rq->bo); + assert(RQ(rq->bo->rq) == rq); if (__kgem_busy(kgem, rq->bo->handle)) break; @@ -2751,8 +3248,8 @@ static bool kgem_retire__requests_ring(struct kgem *kgem, int ring) struct kgem_request, list)->bo; - DBG(("%s: ring=%d, %d outstanding requests, oldest=%d\n", - __FUNCTION__, ring, count, bo ? bo->handle : 0)); + DBG(("%s: ring=%d, %d outstanding requests, oldest=%d, retired? %d\n", + __FUNCTION__, ring, count, bo ? bo->handle : 0, retired)); } #endif @@ -2824,6 +3321,8 @@ bool __kgem_ring_is_idle(struct kgem *kgem, int ring) rq = list_last_entry(&kgem->requests[ring], struct kgem_request, list); assert(rq->ring == ring); + assert(rq->bo); + assert(RQ(rq->bo->rq) == rq); if (__kgem_busy(kgem, rq->bo->handle)) { DBG(("%s: last requests handle=%d still busy\n", __FUNCTION__, rq->bo->handle)); @@ -2845,23 +3344,30 @@ bool __kgem_ring_is_idle(struct kgem *kgem, int ring) return true; } -void __kgem_retire_requests_upto(struct kgem *kgem, struct kgem_bo *bo) +bool __kgem_retire_requests_upto(struct kgem *kgem, struct kgem_bo *bo) { - struct kgem_request *rq = bo->rq, *tmp; - struct list *requests = &kgem->requests[RQ_RING(rq) == I915_EXEC_BLT]; + struct kgem_request * const rq = RQ(bo->rq), *tmp; + struct list *requests = &kgem->requests[rq->ring]; + + DBG(("%s(handle=%d, ring=%d)\n", __FUNCTION__, bo->handle, rq->ring)); - rq = RQ(rq); assert(rq != &kgem->static_request); if (rq == (struct kgem_request *)kgem) { __kgem_bo_clear_busy(bo); - return; + return false; } + assert(rq->ring < ARRAY_SIZE(kgem->requests)); do { tmp = list_first_entry(requests, struct kgem_request, list); assert(tmp->ring == rq->ring); __kgem_retire_rq(kgem, tmp); } while (tmp != rq); + + assert(bo->needs_flush || bo->rq == NULL); + assert(bo->needs_flush || list_is_empty(&bo->request)); + assert(bo->needs_flush || bo->domain == DOMAIN_NONE); + return bo->rq; } #if 0 @@ -2932,6 +3438,7 @@ static void kgem_commit(struct kgem *kgem) bo->binding.offset = 0; bo->domain = DOMAIN_GPU; bo->gpu_dirty = false; + bo->gtt_dirty = false; if (bo->proxy) { /* proxies are not used for domain tracking */ @@ -2955,6 +3462,23 @@ static void kgem_commit(struct kgem *kgem) kgem_throttle(kgem); } + while (!list_is_empty(&rq->buffers)) { + bo = list_first_entry(&rq->buffers, + struct kgem_bo, + request); + + assert(RQ(bo->rq) == rq); + assert(bo->exec == NULL); + assert(bo->domain == DOMAIN_GPU); + + list_del(&bo->request); + bo->domain = DOMAIN_NONE; + bo->rq = NULL; + + if (bo->refcnt == 0) + _kgem_bo_destroy(kgem, bo); + } + kgem_retire(kgem); assert(list_is_empty(&rq->buffers)); @@ -2964,7 +3488,9 @@ static void kgem_commit(struct kgem *kgem) gem_close(kgem->fd, rq->bo->handle); kgem_cleanup_cache(kgem); } else { + assert(rq != (struct kgem_request *)kgem); assert(rq->ring < ARRAY_SIZE(kgem->requests)); + assert(rq->bo); list_add_tail(&rq->list, &kgem->requests[rq->ring]); kgem->need_throttle = kgem->need_retire = 1; @@ -2988,8 +3514,10 @@ static void kgem_close_inactive(struct kgem *kgem) { unsigned int i; - for (i = 0; i < ARRAY_SIZE(kgem->inactive); i++) + for (i = 0; i < ARRAY_SIZE(kgem->inactive); i++) { kgem_close_list(kgem, &kgem->inactive[i]); + assert(list_is_empty(&kgem->inactive[i])); + } } static void kgem_finish_buffers(struct kgem *kgem) @@ -3079,10 +3607,13 @@ static void kgem_finish_buffers(struct kgem *kgem) kgem->has_handle_lut ? bo->base.target_handle : shrink->handle; for (n = 0; n < kgem->nreloc; n++) { if (kgem->reloc[n].target_handle == bo->base.target_handle) { + uint64_t addr = (int)kgem->reloc[n].delta + shrink->presumed_offset; + kgem->batch[kgem->reloc[n].offset/sizeof(kgem->batch[0])] = addr; + if (kgem->gen >= 0100) + kgem->batch[kgem->reloc[n].offset/sizeof(kgem->batch[0]) + 1] = addr >> 32; + kgem->reloc[n].target_handle = shrink->target_handle; kgem->reloc[n].presumed_offset = shrink->presumed_offset; - kgem->batch[kgem->reloc[n].offset/sizeof(kgem->batch[0])] = - kgem->reloc[n].delta + shrink->presumed_offset; } } @@ -3124,10 +3655,13 @@ static void kgem_finish_buffers(struct kgem *kgem) kgem->has_handle_lut ? bo->base.target_handle : shrink->handle; for (n = 0; n < kgem->nreloc; n++) { if (kgem->reloc[n].target_handle == bo->base.target_handle) { + uint64_t addr = (int)kgem->reloc[n].delta + shrink->presumed_offset; + kgem->batch[kgem->reloc[n].offset/sizeof(kgem->batch[0])] = addr; + if (kgem->gen >= 0100) + kgem->batch[kgem->reloc[n].offset/sizeof(kgem->batch[0]) + 1] = addr >> 32; + kgem->reloc[n].target_handle = shrink->target_handle; kgem->reloc[n].presumed_offset = shrink->presumed_offset; - kgem->batch[kgem->reloc[n].offset/sizeof(kgem->batch[0])] = - kgem->reloc[n].delta + shrink->presumed_offset; } } @@ -3195,6 +3729,9 @@ static void kgem_cleanup(struct kgem *kgem) kgem_bo_free(kgem, bo); } + if (--rq->bo->refcnt == 0) + kgem_bo_free(kgem, rq->bo); + __kgem_request_free(rq); } } @@ -3210,7 +3747,9 @@ kgem_batch_write(struct kgem *kgem, char *ptr; int ret; - ASSERT_IDLE(kgem, bo->handle); + assert(bo->exec == NULL); + assert(bo->rq == NULL); + assert(!__kgem_busy(kgem, bo->handle)); #if DBG_NO_EXEC { @@ -3371,55 +3910,54 @@ static int compact_batch_surface(struct kgem *kgem, int *shrink) return size * sizeof(uint32_t); } +static struct kgem_bo *first_available(struct kgem *kgem, struct list *list) +{ + struct kgem_bo *bo; + + list_for_each_entry(bo, list, list) { + assert(bo->refcnt > 0); + + if (bo->rq) { + assert(RQ(bo->rq)->bo == bo); + if (__kgem_busy(kgem, bo->handle)) + break; + + __kgem_retire_rq(kgem, RQ(bo->rq)); + assert(bo->rq == NULL); + } + + if (bo->refcnt > 1) + continue; + + list_move_tail(&bo->list, list); + return kgem_bo_reference(bo); + } + + return NULL; +} + static struct kgem_bo * kgem_create_batch(struct kgem *kgem) { -#if !DBG_NO_SHRINK_BATCHES - struct drm_i915_gem_set_domain set_domain; struct kgem_bo *bo; - int shrink = 0; - int size; + int size, shrink = 0; +#if !DBG_NO_SHRINK_BATCHES if (kgem->surface != kgem->batch_size) size = compact_batch_surface(kgem, &shrink); else size = kgem->nbatch * sizeof(uint32_t); if (size <= 4096) { - bo = list_first_entry(&kgem->pinned_batches[0], - struct kgem_bo, - list); - if (!bo->rq) { -out_4096: - assert(bo->refcnt > 0); - list_move_tail(&bo->list, &kgem->pinned_batches[0]); - bo = kgem_bo_reference(bo); + bo = first_available(kgem, &kgem->pinned_batches[0]); + if (bo) goto write; - } - - if (!__kgem_busy(kgem, bo->handle)) { - assert(RQ(bo->rq)->bo == bo); - __kgem_retire_rq(kgem, RQ(bo->rq)); - goto out_4096; - } } - if (size <= 16384) { - bo = list_first_entry(&kgem->pinned_batches[1], - struct kgem_bo, - list); - if (!bo->rq) { -out_16384: - assert(bo->refcnt > 0); - list_move_tail(&bo->list, &kgem->pinned_batches[1]); - bo = kgem_bo_reference(bo); - goto write; - } - - if (!__kgem_busy(kgem, bo->handle)) { - __kgem_retire_rq(kgem, RQ(bo->rq)); - goto out_16384; - } + if (size <= 16384) { + bo = first_available(kgem, &kgem->pinned_batches[1]); + if (bo) + goto write; } if (kgem->gen == 020) { @@ -3443,16 +3981,8 @@ out_16384: list_move_tail(&bo->list, &kgem->pinned_batches[size > 4096]); DBG(("%s: syncing due to busy batches\n", __FUNCTION__)); - - VG_CLEAR(set_domain); - set_domain.handle = bo->handle; - set_domain.read_domains = I915_GEM_DOMAIN_GTT; - set_domain.write_domain = I915_GEM_DOMAIN_GTT; - if (do_ioctl(kgem->fd, DRM_IOCTL_I915_GEM_SET_DOMAIN, &set_domain)) { - DBG(("%s: sync: GPU hang detected\n", __FUNCTION__)); - kgem_throttle(kgem); + if (kgem_bo_wait(kgem, bo)) return NULL; - } kgem_retire(kgem); assert(bo->rq == NULL); @@ -3460,9 +3990,14 @@ out_16384: goto write; } } +#else + if (kgem->surface != kgem->batch_size) + size = kgem->batch_size * sizeof(uint32_t); + else + size = kgem->nbatch * sizeof(uint32_t); +#endif - bo = NULL; - if (!kgem->has_llc) { + if (!kgem->batch_bo || !kgem->has_llc) { bo = kgem_create_linear(kgem, size, CREATE_NO_THROTTLE); if (bo) { write: @@ -3471,14 +4006,11 @@ write: kgem_bo_destroy(kgem, bo); return NULL; } + return bo; } } - if (bo == NULL) - bo = kgem_new_batch(kgem); - return bo; -#else + return kgem_new_batch(kgem); -#endif } #if !NDEBUG @@ -3530,7 +4062,7 @@ static void dump_fence_regs(struct kgem *kgem) static int do_execbuf(struct kgem *kgem, struct drm_i915_gem_execbuffer2 *execbuf) { - int ret, err; + int ret; retry: ret = do_ioctl(kgem->fd, DRM_IOCTL_I915_GEM_EXECBUFFER2, execbuf); @@ -3547,26 +4079,25 @@ retry: /* last gasp */ ret = do_ioctl(kgem->fd, DRM_IOCTL_I915_GEM_EXECBUFFER2, execbuf); - if (ret == 0) - return 0; + if (ret != -ENOSPC) + return ret; + + /* One final trick up our sleeve for when we run out of space. + * We turn everything off to free up our pinned framebuffers, + * sprites and cursors, and try just one more time. + */ xf86DrvMsg(kgem_get_screen_index(kgem), X_WARNING, "Failed to submit rendering commands, trying again with outputs disabled.\n"); - /* One last trick up our sleeve for when we run out of space. - * We turn everything off to free up our pinned framebuffers, - * sprites and cursors, and try one last time. - */ - err = errno; - if (sna_mode_disable(container_of(kgem, struct sna, kgem))) { + if (sna_mode_disable(__to_sna(kgem))) { kgem_cleanup_cache(kgem); ret = do_ioctl(kgem->fd, DRM_IOCTL_I915_GEM_EXECBUFFER2, execbuf); DBG(("%s: last_gasp ret=%d\n", __FUNCTION__, ret)); - sna_mode_enable(container_of(kgem, struct sna, kgem)); + sna_mode_enable(__to_sna(kgem)); } - errno = err; return ret; } @@ -3575,6 +4106,7 @@ void _kgem_submit(struct kgem *kgem) { struct kgem_request *rq; uint32_t batch_end; + int i, ret; assert(!DBG_NO_HW); assert(!kgem->wedged); @@ -3609,7 +4141,6 @@ void _kgem_submit(struct kgem *kgem) rq->bo = kgem_create_batch(kgem); if (rq->bo) { struct drm_i915_gem_execbuffer2 execbuf; - int i, ret; assert(!rq->bo->needs_flush); @@ -3619,7 +4150,8 @@ void _kgem_submit(struct kgem *kgem) kgem->exec[i].relocs_ptr = (uintptr_t)kgem->reloc; kgem->exec[i].alignment = 0; kgem->exec[i].offset = rq->bo->presumed_offset; - kgem->exec[i].flags = 0; + /* Make sure the kernel releases any fence, ignored if gen4+ */ + kgem->exec[i].flags = EXEC_OBJECT_NEEDS_FENCE; kgem->exec[i].rsvd1 = 0; kgem->exec[i].rsvd2 = 0; @@ -3631,7 +4163,8 @@ void _kgem_submit(struct kgem *kgem) memset(&execbuf, 0, sizeof(execbuf)); execbuf.buffers_ptr = (uintptr_t)kgem->exec; execbuf.buffer_count = kgem->nexec; - execbuf.batch_len = batch_end*sizeof(uint32_t); + if (kgem->gen < 030) + execbuf.batch_len = batch_end*sizeof(uint32_t); execbuf.flags = kgem->ring | kgem->batch_flags; if (DBG_DUMP) { @@ -3645,91 +4178,98 @@ void _kgem_submit(struct kgem *kgem) } ret = do_execbuf(kgem, &execbuf); - if (DEBUG_SYNC && ret == 0) { - struct drm_i915_gem_set_domain set_domain; - - VG_CLEAR(set_domain); - set_domain.handle = rq->bo->handle; - set_domain.read_domains = I915_GEM_DOMAIN_GTT; - set_domain.write_domain = I915_GEM_DOMAIN_GTT; + } else + ret = -ENOMEM; - ret = do_ioctl(kgem->fd, DRM_IOCTL_I915_GEM_SET_DOMAIN, &set_domain); + if (ret < 0) { + kgem_throttle(kgem); + if (!kgem->wedged) { + xf86DrvMsg(kgem_get_screen_index(kgem), X_ERROR, + "Failed to submit rendering commands (%s), disabling acceleration.\n", + strerror(-ret)); + __kgem_set_wedged(kgem); } - if (ret < 0) { - kgem_throttle(kgem); - if (!kgem->wedged) { - xf86DrvMsg(kgem_get_screen_index(kgem), X_ERROR, - "Failed to submit rendering commands, disabling acceleration.\n"); - __kgem_set_wedged(kgem); - } #if !NDEBUG - ErrorF("batch[%d/%d]: %d %d %d, nreloc=%d, nexec=%d, nfence=%d, aperture=%d, fenced=%d, high=%d,%d: errno=%d\n", - kgem->mode, kgem->ring, batch_end, kgem->nbatch, kgem->surface, - kgem->nreloc, kgem->nexec, kgem->nfence, kgem->aperture, kgem->aperture_fenced, kgem->aperture_high, kgem->aperture_total, -ret); + ErrorF("batch[%d/%d]: %d %d %d, nreloc=%d, nexec=%d, nfence=%d, aperture=%d, fenced=%d, high=%d,%d: errno=%d\n", + kgem->mode, kgem->ring, batch_end, kgem->nbatch, kgem->surface, + kgem->nreloc, kgem->nexec, kgem->nfence, kgem->aperture, kgem->aperture_fenced, kgem->aperture_high, kgem->aperture_total, -ret); - for (i = 0; i < kgem->nexec; i++) { - struct kgem_bo *bo, *found = NULL; + for (i = 0; i < kgem->nexec; i++) { + struct kgem_bo *bo, *found = NULL; - list_for_each_entry(bo, &kgem->next_request->buffers, request) { - if (bo->handle == kgem->exec[i].handle) { - found = bo; - break; - } + list_for_each_entry(bo, &kgem->next_request->buffers, request) { + if (bo->handle == kgem->exec[i].handle) { + found = bo; + break; } - ErrorF("exec[%d] = handle:%d, presumed offset: %x, size: %d, tiling %d, fenced %d, snooped %d, deleted %d\n", - i, - kgem->exec[i].handle, - (int)kgem->exec[i].offset, - found ? kgem_bo_size(found) : -1, - found ? found->tiling : -1, - (int)(kgem->exec[i].flags & EXEC_OBJECT_NEEDS_FENCE), - found ? found->snoop : -1, - found ? found->purged : -1); } - for (i = 0; i < kgem->nreloc; i++) { - ErrorF("reloc[%d] = pos:%d, target:%d, delta:%d, read:%x, write:%x, offset:%x\n", - i, - (int)kgem->reloc[i].offset, - kgem->reloc[i].target_handle, - kgem->reloc[i].delta, - kgem->reloc[i].read_domains, - kgem->reloc[i].write_domain, - (int)kgem->reloc[i].presumed_offset); + ErrorF("exec[%d] = handle:%d, presumed offset: %x, size: %d, tiling %d, fenced %d, snooped %d, deleted %d\n", + i, + kgem->exec[i].handle, + (int)kgem->exec[i].offset, + found ? kgem_bo_size(found) : -1, + found ? found->tiling : -1, + (int)(kgem->exec[i].flags & EXEC_OBJECT_NEEDS_FENCE), + found ? found->snoop : -1, + found ? found->purged : -1); + } + for (i = 0; i < kgem->nreloc; i++) { + ErrorF("reloc[%d] = pos:%d, target:%d, delta:%d, read:%x, write:%x, offset:%x\n", + i, + (int)kgem->reloc[i].offset, + kgem->reloc[i].target_handle, + kgem->reloc[i].delta, + kgem->reloc[i].read_domains, + kgem->reloc[i].write_domain, + (int)kgem->reloc[i].presumed_offset); + } + + { + struct drm_i915_gem_get_aperture aperture; + if (do_ioctl(kgem->fd, DRM_IOCTL_I915_GEM_GET_APERTURE, &aperture) == 0) + ErrorF("Aperture size %lld, available %lld\n", + (long long)aperture.aper_size, + (long long)aperture.aper_available_size); + } + + if (ret == -ENOSPC) + dump_gtt_info(kgem); + if (ret == -EDEADLK) + dump_fence_regs(kgem); + + if (DEBUG_SYNC) { + int fd = open("/tmp/batchbuffer", O_WRONLY | O_CREAT | O_APPEND, 0666); + if (fd != -1) { + int ignored = write(fd, kgem->batch, batch_end*sizeof(uint32_t)); + assert(ignored == batch_end*sizeof(uint32_t)); + close(fd); } - { - struct drm_i915_gem_get_aperture aperture; - if (do_ioctl(kgem->fd, DRM_IOCTL_I915_GEM_GET_APERTURE, &aperture) == 0) - ErrorF("Aperture size %lld, available %lld\n", - (long long)aperture.aper_size, - (long long)aperture.aper_available_size); - } + FatalError("SNA: failed to submit batchbuffer, errno=%d\n", -ret); + } +#endif + } else { + if (DEBUG_SYNC) { + struct drm_i915_gem_set_domain set_domain; - if (ret == -ENOSPC) - dump_gtt_info(kgem); - if (ret == -EDEADLK) - dump_fence_regs(kgem); - - if (DEBUG_SYNC) { - int fd = open("/tmp/batchbuffer", O_WRONLY | O_CREAT | O_APPEND, 0666); - if (fd != -1) { - int ignored = write(fd, kgem->batch, batch_end*sizeof(uint32_t)); - assert(ignored == batch_end*sizeof(uint32_t)); - close(fd); - } + VG_CLEAR(set_domain); + set_domain.handle = rq->bo->handle; + set_domain.read_domains = I915_GEM_DOMAIN_GTT; + set_domain.write_domain = I915_GEM_DOMAIN_GTT; - FatalError("SNA: failed to submit batchbuffer, errno=%d\n", -ret); - } -#endif + ret = do_ioctl(kgem->fd, DRM_IOCTL_I915_GEM_SET_DOMAIN, &set_domain); } - } + #if SHOW_BATCH_AFTER - if (gem_read(kgem->fd, rq->bo->handle, kgem->batch, 0, batch_end*sizeof(uint32_t)) == 0) - __kgem_batch_debug(kgem, batch_end); + if (gem_read(kgem->fd, rq->bo->handle, kgem->batch, 0, batch_end*sizeof(uint32_t)) == 0) + __kgem_batch_debug(kgem, batch_end); #endif - kgem_commit(kgem); - if (kgem->wedged) + + kgem_commit(kgem); + } + + if (unlikely(kgem->wedged)) kgem_cleanup(kgem); kgem_reset(kgem); @@ -3737,49 +4277,14 @@ void _kgem_submit(struct kgem *kgem) assert(kgem->next_request != NULL); } -static bool find_hang_state(struct kgem *kgem, char *path, int maxlen) -{ - int minor = kgem_get_minor(kgem); - - /* Search for our hang state in a few canonical locations. - * In the unlikely event of having multiple devices, we - * will need to check which minor actually corresponds to ours. - */ - - snprintf(path, maxlen, "/sys/class/drm/card%d/error", minor); - if (access(path, R_OK) == 0) - return true; - - snprintf(path, maxlen, "/sys/kernel/debug/dri/%d/i915_error_state", minor); - if (access(path, R_OK) == 0) - return true; - - snprintf(path, maxlen, "/debug/dri/%d/i915_error_state", minor); - if (access(path, R_OK) == 0) - return true; - - path[0] = '\0'; - return false; -} - void kgem_throttle(struct kgem *kgem) { - if (kgem->wedged) + if (unlikely(kgem->wedged)) return; if (__kgem_throttle(kgem, true)) { - static int once; - char path[128]; - xf86DrvMsg(kgem_get_screen_index(kgem), X_ERROR, "Detected a hung GPU, disabling acceleration.\n"); - if (!once && find_hang_state(kgem, path, sizeof(path))) { - xf86DrvMsg(kgem_get_screen_index(kgem), X_ERROR, - "When reporting this, please include %s and the full dmesg.\n", - path); - once = 1; - } - __kgem_set_wedged(kgem); kgem->need_throttle = false; } @@ -3860,7 +4365,8 @@ bool kgem_expire_cache(struct kgem *kgem) bool idle; unsigned int i; - time(&now); + if (!time(&now)) + return false; while (__kgem_freed_bo) { bo = __kgem_freed_bo; @@ -3875,7 +4381,7 @@ bool kgem_expire_cache(struct kgem *kgem) } kgem_clean_large_cache(kgem); - if (container_of(kgem, struct sna, kgem)->scrn->vtSema) + if (__to_sna(kgem)->scrn->vtSema) kgem_clean_scanout_cache(kgem); expire = 0; @@ -3885,6 +4391,7 @@ bool kgem_expire_cache(struct kgem *kgem) break; } + assert(now); bo->delta = now; } if (expire) { @@ -3909,7 +4416,7 @@ bool kgem_expire_cache(struct kgem *kgem) #endif kgem_retire(kgem); - if (kgem->wedged) + if (unlikely(kgem->wedged)) kgem_cleanup(kgem); kgem->expire(kgem); @@ -3930,6 +4437,8 @@ bool kgem_expire_cache(struct kgem *kgem) break; } + assert(now); + kgem_bo_set_purgeable(kgem, bo); bo->delta = now; } } @@ -3960,16 +4469,11 @@ bool kgem_expire_cache(struct kgem *kgem) count++; size += bytes(bo); kgem_bo_free(kgem, bo); - DBG(("%s: expiring %d\n", + DBG(("%s: expiring handle=%d\n", __FUNCTION__, bo->handle)); } } - if (!list_is_empty(&preserve)) { - preserve.prev->next = kgem->inactive[i].next; - kgem->inactive[i].next->prev = preserve.prev; - kgem->inactive[i].next = preserve.next; - preserve.next->prev = &kgem->inactive[i]; - } + list_splice_tail(&preserve, &kgem->inactive[i]); } #ifdef DEBUG_MEMORY @@ -3998,31 +4502,30 @@ bool kgem_cleanup_cache(struct kgem *kgem) unsigned int i; int n; + DBG(("%s\n", __FUNCTION__)); + /* sync to the most recent request */ for (n = 0; n < ARRAY_SIZE(kgem->requests); n++) { if (!list_is_empty(&kgem->requests[n])) { struct kgem_request *rq; - struct drm_i915_gem_set_domain set_domain; - rq = list_first_entry(&kgem->requests[n], - struct kgem_request, - list); + rq = list_last_entry(&kgem->requests[n], + struct kgem_request, + list); DBG(("%s: sync on cleanup\n", __FUNCTION__)); - - VG_CLEAR(set_domain); - set_domain.handle = rq->bo->handle; - set_domain.read_domains = I915_GEM_DOMAIN_GTT; - set_domain.write_domain = I915_GEM_DOMAIN_GTT; - (void)do_ioctl(kgem->fd, - DRM_IOCTL_I915_GEM_SET_DOMAIN, - &set_domain); + assert(rq->ring == n); + assert(rq->bo); + assert(RQ(rq->bo->rq) == rq); + kgem_bo_wait(kgem, rq->bo); } + assert(list_is_empty(&kgem->requests[n])); } kgem_retire(kgem); kgem_cleanup(kgem); + DBG(("%s: need_expire?=%d\n", __FUNCTION__, kgem->need_expire)); if (!kgem->need_expire) return false; @@ -4049,6 +4552,8 @@ bool kgem_cleanup_cache(struct kgem *kgem) kgem->need_purge = false; kgem->need_expire = false; + + DBG(("%s: complete\n", __FUNCTION__)); return true; } @@ -4079,16 +4584,15 @@ retry_large: goto discard; if (bo->tiling != I915_TILING_NONE) { - if (use_active) + if (use_active && kgem->gen < 040) goto discard; - if (!gem_set_tiling(kgem->fd, bo->handle, + if (!kgem_set_tiling(kgem, bo, I915_TILING_NONE, 0)) goto discard; - - bo->tiling = I915_TILING_NONE; - bo->pitch = 0; } + assert(bo->tiling == I915_TILING_NONE); + bo->pitch = 0; if (bo->purged && !kgem_bo_clear_purgeable(kgem, bo)) goto discard; @@ -4169,17 +4673,17 @@ discard: break; } - if (I915_TILING_NONE != bo->tiling && - !gem_set_tiling(kgem->fd, bo->handle, - I915_TILING_NONE, 0)) - continue; + if (!kgem_set_tiling(kgem, bo, I915_TILING_NONE, 0)) { + kgem_bo_free(kgem, bo); + break; + } kgem_bo_remove_from_inactive(kgem, bo); assert(list_is_empty(&bo->vma)); assert(list_is_empty(&bo->list)); - bo->tiling = I915_TILING_NONE; - bo->pitch = 0; + assert(bo->tiling == I915_TILING_NONE); + assert(bo->pitch == 0); bo->delta = 0; DBG((" %s: found handle=%d (num_pages=%d) in linear vma cache\n", __FUNCTION__, bo->handle, num_pages(bo))); @@ -4225,13 +4729,13 @@ discard: if (first) continue; - if (!gem_set_tiling(kgem->fd, bo->handle, - I915_TILING_NONE, 0)) - continue; - - bo->tiling = I915_TILING_NONE; - bo->pitch = 0; + if (!kgem_set_tiling(kgem, bo, I915_TILING_NONE, 0)) { + kgem_bo_free(kgem, bo); + break; + } } + assert(bo->tiling == I915_TILING_NONE); + bo->pitch = 0; if (bo->map__gtt || bo->map__wc || bo->map__cpu) { if (flags & (CREATE_CPU_MAP | CREATE_GTT_MAP)) { @@ -4269,7 +4773,7 @@ discard: kgem_bo_remove_from_inactive(kgem, bo); assert(bo->tiling == I915_TILING_NONE); - bo->pitch = 0; + assert(bo->pitch == 0); bo->delta = 0; DBG((" %s: found handle=%d (num_pages=%d) in linear %s cache\n", __FUNCTION__, bo->handle, num_pages(bo), @@ -4340,9 +4844,9 @@ struct kgem_bo *kgem_create_for_name(struct kgem *kgem, uint32_t name) bo->unique_id = kgem_get_unique_id(kgem); bo->tiling = tiling.tiling_mode; - bo->reusable = false; bo->prime = true; - bo->purged = true; /* no coherency guarantees */ + bo->reusable = false; + kgem_bo_unclean(kgem, bo); debug_alloc__bo(kgem, bo); return bo; @@ -4448,6 +4952,8 @@ int kgem_bo_export_to_prime(struct kgem *kgem, struct kgem_bo *bo) #if defined(DRM_IOCTL_PRIME_HANDLE_TO_FD) && defined(O_CLOEXEC) struct drm_prime_handle args; + assert(kgem_bo_is_fenced(kgem, bo)); + VG_CLEAR(args); args.handle = bo->handle; args.flags = O_CLOEXEC; @@ -4479,6 +4985,8 @@ struct kgem_bo *kgem_create_linear(struct kgem *kgem, int size, unsigned flags) if ((flags & CREATE_UNCACHED) == 0) { bo = search_linear_cache(kgem, size, CREATE_INACTIVE | flags); if (bo) { + assert(!bo->purged); + assert(!bo->delta); assert(bo->domain != DOMAIN_GPU); ASSERT_IDLE(kgem, bo->handle); bo->refcnt = 1; @@ -4760,8 +5268,7 @@ static void __kgem_bo_make_scanout(struct kgem *kgem, struct kgem_bo *bo, int width, int height) { - ScrnInfoPtr scrn = - container_of(kgem, struct sna, kgem)->scrn; + ScrnInfoPtr scrn = __to_sna(kgem)->scrn; struct drm_mode_fb_cmd arg; assert(bo->proxy == NULL); @@ -4809,6 +5316,48 @@ static void __kgem_bo_make_scanout(struct kgem *kgem, } } +static bool tiling_changed(struct kgem_bo *bo, int tiling, int pitch) +{ + if (tiling != bo->tiling) + return true; + + return tiling != I915_TILING_NONE && pitch != bo->pitch; +} + +static void set_gpu_tiling(struct kgem *kgem, + struct kgem_bo *bo, + int tiling, int pitch) +{ + DBG(("%s: handle=%d, tiling=%d, pitch=%d\n", + __FUNCTION__, bo->handle, tiling, pitch)); + + if (tiling_changed(bo, tiling, pitch) && bo->map__gtt) { + if (!list_is_empty(&bo->vma)) { + list_del(&bo->vma); + kgem->vma[0].count--; + } + munmap(bo->map__gtt, bytes(bo)); + bo->map__gtt = NULL; + } + + bo->tiling = tiling; + bo->pitch = pitch; +} + +bool kgem_bo_is_fenced(struct kgem *kgem, struct kgem_bo *bo) +{ + struct drm_i915_gem_get_tiling tiling; + + assert(kgem); + assert(bo); + + VG_CLEAR(tiling); + tiling.handle = bo->handle; + tiling.tiling_mode = bo->tiling; + (void)do_ioctl(kgem->fd, DRM_IOCTL_I915_GEM_GET_TILING, &tiling); + return tiling.tiling_mode == bo->tiling; /* assume pitch is fine! */ +} + struct kgem_bo *kgem_create_2d(struct kgem *kgem, int width, int height, @@ -4892,8 +5441,8 @@ struct kgem_bo *kgem_create_2d(struct kgem *kgem, return last; } - if (container_of(kgem, struct sna, kgem)->scrn->vtSema) { - ScrnInfoPtr scrn = container_of(kgem, struct sna, kgem)->scrn; + if (__to_sna(kgem)->scrn->vtSema) { + ScrnInfoPtr scrn = __to_sna(kgem)->scrn; list_for_each_entry_reverse(bo, &kgem->scanout, list) { struct drm_mode_fb_cmd arg; @@ -4915,11 +5464,8 @@ struct kgem_bo *kgem_create_2d(struct kgem *kgem, bo->delta = 0; } - if (gem_set_tiling(kgem->fd, bo->handle, - tiling, pitch)) { - bo->tiling = tiling; - bo->pitch = pitch; - } else { + if (!kgem_set_tiling(kgem, bo, + tiling, pitch)) { kgem_bo_free(kgem, bo); break; } @@ -4950,6 +5496,9 @@ struct kgem_bo *kgem_create_2d(struct kgem *kgem, } } + if (flags & CREATE_CACHED) + return NULL; + bo = __kgem_bo_create_as_display(kgem, size, tiling, pitch); if (bo) return bo; @@ -4987,14 +5536,9 @@ struct kgem_bo *kgem_create_2d(struct kgem *kgem, if (num_pages(bo) < size) continue; - if (bo->pitch != pitch || bo->tiling != tiling) { - if (!gem_set_tiling(kgem->fd, bo->handle, - tiling, pitch)) - continue; - - bo->pitch = pitch; - bo->tiling = tiling; - } + if (!kgem_set_tiling(kgem, bo, tiling, pitch) && + !exact) + set_gpu_tiling(kgem, bo, tiling, pitch); } kgem_bo_remove_from_active(kgem, bo); @@ -5020,14 +5564,11 @@ large_inactive: if (size > num_pages(bo)) continue; - if (bo->tiling != tiling || - (tiling != I915_TILING_NONE && bo->pitch != pitch)) { - if (!gem_set_tiling(kgem->fd, bo->handle, - tiling, pitch)) + if (!kgem_set_tiling(kgem, bo, tiling, pitch)) { + if (kgem->gen >= 040 && !exact) + set_gpu_tiling(kgem, bo, tiling, pitch); + else continue; - - bo->tiling = tiling; - bo->pitch = pitch; } if (bo->purged && !kgem_bo_clear_purgeable(kgem, bo)) { @@ -5039,7 +5580,6 @@ large_inactive: assert(bo->domain != DOMAIN_GPU); bo->unique_id = kgem_get_unique_id(kgem); - bo->pitch = pitch; bo->delta = 0; DBG((" 1:from large inactive: pitch=%d, tiling=%d, handle=%d, id=%d\n", bo->pitch, bo->tiling, bo->handle, bo->unique_id)); @@ -5088,14 +5628,13 @@ large_inactive: if (bo->tiling != tiling || (tiling != I915_TILING_NONE && bo->pitch != pitch)) { if (bo->map__gtt || - !gem_set_tiling(kgem->fd, bo->handle, - tiling, pitch)) { + !kgem_set_tiling(kgem, bo, + tiling, pitch)) { DBG(("inactive GTT vma with wrong tiling: %d < %d\n", bo->tiling, tiling)); - continue; + kgem_bo_free(kgem, bo); + break; } - bo->tiling = tiling; - bo->pitch = pitch; } if (bo->purged && !kgem_bo_clear_purgeable(kgem, bo)) { @@ -5103,8 +5642,11 @@ large_inactive: break; } + if (tiling == I915_TILING_NONE) + bo->pitch = pitch; + assert(bo->tiling == tiling); - bo->pitch = pitch; + assert(bo->pitch >= pitch); bo->delta = 0; bo->unique_id = kgem_get_unique_id(kgem); @@ -5170,15 +5712,12 @@ search_active: if (num_pages(bo) < size) continue; - if (bo->pitch != pitch) { - if (!gem_set_tiling(kgem->fd, - bo->handle, - tiling, pitch)) - continue; - - bo->pitch = pitch; - } + if (!kgem_set_tiling(kgem, bo, tiling, pitch) && + !exact) + set_gpu_tiling(kgem, bo, tiling, pitch); } + assert(bo->tiling == tiling); + assert(bo->pitch >= pitch); kgem_bo_remove_from_active(kgem, bo); @@ -5233,19 +5772,21 @@ search_active: if (num_pages(bo) < size) continue; - if (bo->tiling != tiling || - (tiling != I915_TILING_NONE && bo->pitch != pitch)) { - if (!gem_set_tiling(kgem->fd, - bo->handle, - tiling, pitch)) - continue; + if (!kgem_set_tiling(kgem, bo, tiling, pitch)) { + if (kgem->gen >= 040 && !exact) { + set_gpu_tiling(kgem, bo, + tiling, pitch); + } else { + kgem_bo_free(kgem, bo); + break; + } } + assert(bo->tiling == tiling); + assert(bo->pitch >= pitch); kgem_bo_remove_from_active(kgem, bo); bo->unique_id = kgem_get_unique_id(kgem); - bo->pitch = pitch; - bo->tiling = tiling; bo->delta = 0; DBG((" 1:from active: pitch=%d, tiling=%d, handle=%d, id=%d\n", bo->pitch, bo->tiling, bo->handle, bo->unique_id)); @@ -5323,11 +5864,13 @@ search_inactive: continue; } - if (bo->tiling != tiling || - (tiling != I915_TILING_NONE && bo->pitch != pitch)) { - if (!gem_set_tiling(kgem->fd, bo->handle, - tiling, pitch)) - continue; + if (!kgem_set_tiling(kgem, bo, tiling, pitch)) { + if (kgem->gen >= 040 && !exact) { + set_gpu_tiling(kgem, bo, tiling, pitch); + } else { + kgem_bo_free(kgem, bo); + break; + } } if (bo->purged && !kgem_bo_clear_purgeable(kgem, bo)) { @@ -5338,9 +5881,8 @@ search_inactive: kgem_bo_remove_from_inactive(kgem, bo); assert(list_is_empty(&bo->list)); assert(list_is_empty(&bo->vma)); - - bo->pitch = pitch; - bo->tiling = tiling; + assert(bo->tiling == tiling); + assert(bo->pitch >= pitch); bo->delta = 0; bo->unique_id = kgem_get_unique_id(kgem); @@ -5388,14 +5930,17 @@ search_inactive: kgem_bo_remove_from_active(kgem, bo); __kgem_bo_clear_busy(bo); - if (tiling != I915_TILING_NONE && bo->pitch != pitch) { - if (!gem_set_tiling(kgem->fd, bo->handle, tiling, pitch)) { + if (!kgem_set_tiling(kgem, bo, tiling, pitch)) { + if (kgem->gen >= 040 && !exact) { + set_gpu_tiling(kgem, bo, tiling, pitch); + } else { kgem_bo_free(kgem, bo); goto no_retire; } } + assert(bo->tiling == tiling); + assert(bo->pitch >= pitch); - bo->pitch = pitch; bo->unique_id = kgem_get_unique_id(kgem); bo->delta = 0; DBG((" 2:from active: pitch=%d, tiling=%d, handle=%d, id=%d\n", @@ -5440,18 +5985,21 @@ create: } bo->unique_id = kgem_get_unique_id(kgem); - if (tiling == I915_TILING_NONE || - gem_set_tiling(kgem->fd, handle, tiling, pitch)) { - bo->tiling = tiling; - bo->pitch = pitch; + if (kgem_set_tiling(kgem, bo, tiling, pitch)) { if (flags & CREATE_SCANOUT) __kgem_bo_make_scanout(kgem, bo, width, height); } else { - if (flags & CREATE_EXACT) { - DBG(("%s: failed to set exact tiling (gem_set_tiling)\n", __FUNCTION__)); - gem_close(kgem->fd, handle); - free(bo); - return NULL; + if (kgem->gen >= 040) { + assert(!kgem->can_fence); + bo->tiling = tiling; + bo->pitch = pitch; + } else { + if (flags & CREATE_EXACT) { + DBG(("%s: failed to set exact tiling (gem_set_tiling)\n", __FUNCTION__)); + gem_close(kgem->fd, handle); + free(bo); + return NULL; + } } } @@ -5608,7 +6156,7 @@ static void __kgem_flush(struct kgem *kgem, struct kgem_bo *bo) void kgem_scanout_flush(struct kgem *kgem, struct kgem_bo *bo) { - if (!bo->needs_flush) + if (!bo->needs_flush && !bo->gtt_dirty) return; kgem_bo_submit(kgem, bo); @@ -5621,18 +6169,24 @@ void kgem_scanout_flush(struct kgem *kgem, struct kgem_bo *bo) if (bo->rq) __kgem_flush(kgem, bo); + if (bo->scanout && kgem->needs_dirtyfb) { + struct drm_mode_fb_dirty_cmd cmd; + memset(&cmd, 0, sizeof(cmd)); + cmd.fb_id = bo->delta; + (void)drmIoctl(kgem->fd, DRM_IOCTL_MODE_DIRTYFB, &cmd); + } + /* Whatever actually happens, we can regard the GTT write domain * as being flushed. */ - bo->gtt_dirty = false; - bo->needs_flush = false; - bo->domain = DOMAIN_NONE; + __kgem_bo_clear_dirty(bo); } inline static bool nearly_idle(struct kgem *kgem) { int ring = kgem->ring == KGEM_BLT; + assert(ring < ARRAY_SIZE(kgem->requests)); if (list_is_singular(&kgem->requests[ring])) return true; @@ -5720,7 +6274,7 @@ static inline bool kgem_flush(struct kgem *kgem, bool flush) if (kgem->nreloc == 0) return true; - if (container_of(kgem, struct sna, kgem)->flags & SNA_POWERSAVE) + if (__to_sna(kgem)->flags & SNA_POWERSAVE) return true; if (kgem->flush == flush && kgem->aperture < kgem->aperture_low) @@ -5982,6 +6536,55 @@ bool kgem_check_many_bo_fenced(struct kgem *kgem, ...) return kgem_flush(kgem, flush); } +void __kgem_bcs_set_tiling(struct kgem *kgem, + struct kgem_bo *src, + struct kgem_bo *dst) +{ + uint32_t state, *b; + + DBG(("%s: src handle=%d:tiling=%d, dst handle=%d:tiling=%d\n", + __FUNCTION__, + src ? src->handle : 0, src ? src->tiling : 0, + dst ? dst->handle : 0, dst ? dst->tiling : 0)); + assert(kgem->mode == KGEM_BLT); + assert(dst == NULL || kgem_bo_can_blt(kgem, dst)); + assert(src == NULL || kgem_bo_can_blt(kgem, src)); + + state = 0; + if (dst && dst->tiling == I915_TILING_Y) + state |= BCS_DST_Y; + if (src && src->tiling == I915_TILING_Y) + state |= BCS_SRC_Y; + + if (kgem->bcs_state == state) + return; + + DBG(("%s: updating SWCTRL %x -> %x\n", __FUNCTION__, + kgem->bcs_state, state)); + + /* Over-estimate space in case we need to re-emit the cmd packet */ + if (!kgem_check_batch(kgem, 24)) { + _kgem_submit(kgem); + _kgem_set_mode(kgem, KGEM_BLT); + if (state == 0) + return; + } + + b = kgem->batch + kgem->nbatch; + if (kgem->nbatch) { + *b++ = MI_FLUSH_DW; + *b++ = 0; + *b++ = 0; + *b++ = 0; + } + *b++ = MI_LOAD_REGISTER_IMM; + *b++ = BCS_SWCTRL; + *b++ = (BCS_SRC_Y | BCS_DST_Y) << 16 | state; + kgem->nbatch = b - kgem->batch; + + kgem->bcs_state = state; +} + uint32_t kgem_add_reloc(struct kgem *kgem, uint32_t pos, struct kgem_bo *bo, @@ -6195,12 +6798,6 @@ static void kgem_trim_vma_cache(struct kgem *kgem, int type, int bucket) list_del(&bo->vma); kgem->vma[type].count--; - - if (!bo->purged && !kgem_bo_set_purgeable(kgem, bo)) { - DBG(("%s: freeing unpurgeable old mapping\n", - __FUNCTION__)); - kgem_bo_free(kgem, bo); - } } } @@ -6216,8 +6813,8 @@ static void *__kgem_bo_map__gtt_or_wc(struct kgem *kgem, struct kgem_bo *bo) kgem_trim_vma_cache(kgem, MAP_GTT, bucket(bo)); if (bo->tiling || !kgem->has_wc_mmap) { - assert(num_pages(bo) <= kgem->aperture_mappable / 2); assert(kgem->gen != 021 || bo->tiling != I915_TILING_Y); + warn_unless(num_pages(bo) <= kgem->aperture_mappable / 2); ptr = bo->map__gtt; if (ptr == NULL) @@ -6291,6 +6888,7 @@ void *kgem_bo_map(struct kgem *kgem, struct kgem_bo *bo) DBG(("%s: sync: GPU hang detected\n", __FUNCTION__)); kgem_throttle(kgem); } + bo->needs_flush = false; kgem_bo_retire(kgem, bo); bo->domain = DOMAIN_GTT; bo->gtt_dirty = true; @@ -6319,14 +6917,16 @@ void *kgem_bo_map__wc(struct kgem *kgem, struct kgem_bo *bo) bo->handle, (long)bo->presumed_offset, bo->tiling, bo->map__gtt, bo->map__cpu, bo->domain)); assert(bo->proxy == NULL); - assert(bo->exec == NULL); assert(list_is_empty(&bo->list)); assert_tiling(kgem, bo); assert(!bo->purged || bo->reusable); if (bo->map__wc) return bo->map__wc; + if (!kgem->has_wc_mmap) + return NULL; + kgem_trim_vma_cache(kgem, MAP_GTT, bucket(bo)); return __kgem_bo_map__wc(kgem, bo); } @@ -6373,6 +6973,8 @@ uint32_t kgem_bo_flink(struct kgem *kgem, struct kgem_bo *bo) { struct drm_gem_flink flink; + assert(kgem_bo_is_fenced(kgem, bo)); + VG_CLEAR(flink); flink.handle = bo->handle; if (do_ioctl(kgem->fd, DRM_IOCTL_GEM_FLINK, &flink)) @@ -6387,7 +6989,6 @@ uint32_t kgem_bo_flink(struct kgem *kgem, struct kgem_bo *bo) * party, we track the lifetime accurately. */ bo->reusable = false; - kgem_bo_unclean(kgem, bo); return flink.name; @@ -6411,16 +7012,34 @@ struct kgem_bo *kgem_create_map(struct kgem *kgem, first_page = (uintptr_t)ptr; last_page = first_page + size + PAGE_SIZE - 1; - first_page &= ~(PAGE_SIZE-1); - last_page &= ~(PAGE_SIZE-1); + first_page &= ~(uintptr_t)(PAGE_SIZE-1); + last_page &= ~(uintptr_t)(PAGE_SIZE-1); assert(last_page > first_page); handle = gem_userptr(kgem->fd, (void *)first_page, last_page-first_page, read_only); if (handle == 0) { - DBG(("%s: import failed, errno=%d\n", __FUNCTION__, errno)); - return NULL; + if (read_only && kgem->has_wc_mmap) { + struct drm_i915_gem_set_domain set_domain; + + handle = gem_userptr(kgem->fd, + (void *)first_page, last_page-first_page, + false); + + VG_CLEAR(set_domain); + set_domain.handle = handle; + set_domain.read_domains = I915_GEM_DOMAIN_GTT; + set_domain.write_domain = 0; + if (do_ioctl(kgem->fd, DRM_IOCTL_I915_GEM_SET_DOMAIN, &set_domain)) { + gem_close(kgem->fd, handle); + handle = 0; + } + } + if (handle == 0) { + DBG(("%s: import failed, errno=%d\n", __FUNCTION__, errno)); + return NULL; + } } bo = __kgem_bo_alloc(handle, (last_page - first_page) / PAGE_SIZE); @@ -6483,8 +7102,10 @@ void kgem_bo_sync__cpu(struct kgem *kgem, struct kgem_bo *bo) DBG(("%s: sync: GPU hang detected\n", __FUNCTION__)); kgem_throttle(kgem); } + bo->needs_flush = false; kgem_bo_retire(kgem, bo); bo->domain = DOMAIN_CPU; + bo->gtt_dirty = true; } } @@ -6505,6 +7126,9 @@ void kgem_bo_sync__cpu_full(struct kgem *kgem, struct kgem_bo *bo, bool write) assert(bo->refcnt); assert(!bo->purged); + if (bo->rq == NULL && (kgem->has_llc || bo->snoop) && !write) + return; + if (bo->domain != DOMAIN_CPU || FORCE_MMAP_SYNC & (1 << DOMAIN_CPU)) { struct drm_i915_gem_set_domain set_domain; @@ -6522,9 +7146,11 @@ void kgem_bo_sync__cpu_full(struct kgem *kgem, struct kgem_bo *bo, bool write) DBG(("%s: sync: GPU hang detected\n", __FUNCTION__)); kgem_throttle(kgem); } + bo->needs_flush = false; if (write) { kgem_bo_retire(kgem, bo); bo->domain = DOMAIN_CPU; + bo->gtt_dirty = true; } else { if (bo->exec == NULL) kgem_bo_maybe_retire(kgem, bo); @@ -6539,6 +7165,7 @@ void kgem_bo_sync__gtt(struct kgem *kgem, struct kgem_bo *bo) assert(bo->refcnt); assert(bo->proxy == NULL); assert_tiling(kgem, bo); + assert(!bo->snoop); kgem_bo_submit(kgem, bo); @@ -6559,6 +7186,7 @@ void kgem_bo_sync__gtt(struct kgem *kgem, struct kgem_bo *bo) DBG(("%s: sync: GPU hang detected\n", __FUNCTION__)); kgem_throttle(kgem); } + bo->needs_flush = false; kgem_bo_retire(kgem, bo); bo->domain = DOMAIN_GTT; bo->gtt_dirty = true; @@ -7485,6 +8113,7 @@ kgem_replace_bo(struct kgem *kgem, } _kgem_set_mode(kgem, KGEM_BLT); } + kgem_bcs_set_tiling(kgem, src, dst); br00 = XY_SRC_COPY_BLT_CMD; br13 = pitch; @@ -7553,6 +8182,9 @@ bool kgem_bo_convert_to_gpu(struct kgem *kgem, __FUNCTION__, bo->handle, flags, __kgem_bo_is_busy(kgem, bo))); assert(bo->tiling == I915_TILING_NONE); + if (flags & (__MOVE_PRIME | __MOVE_SCANOUT)) + return false; + if (kgem->has_llc) return true; diff --git a/src/sna/kgem.h b/src/sna/kgem.h index 2267bacf..08b4eb20 100644 --- a/src/sna/kgem.h +++ b/src/sna/kgem.h @@ -42,6 +42,7 @@ struct kgem_bo { #define RQ(rq) ((struct kgem_request *)((uintptr_t)(rq) & ~3)) #define RQ_RING(rq) ((uintptr_t)(rq) & 3) #define RQ_IS_BLT(rq) (RQ_RING(rq) == KGEM_BLT) +#define RQ_IS_RENDER(rq) (RQ_RING(rq) == KGEM_RENDER) #define MAKE_REQUEST(rq, ring) ((struct kgem_request *)((uintptr_t)(rq) | (ring))) struct drm_i915_gem_exec_object2 *exec; @@ -103,7 +104,7 @@ struct kgem_request { struct list list; struct kgem_bo *bo; struct list buffers; - int ring; + unsigned ring; }; enum { @@ -112,6 +113,12 @@ enum { NUM_MAP_TYPES, }; +typedef void (*memcpy_box_func)(const void *src, void *dst, int bpp, + int32_t src_stride, int32_t dst_stride, + int16_t src_x, int16_t src_y, + int16_t dst_x, int16_t dst_y, + uint16_t width, uint16_t height); + struct kgem { unsigned wedged; int fd; @@ -157,6 +164,8 @@ struct kgem { int16_t count; } vma[NUM_MAP_TYPES]; + uint32_t bcs_state; + uint32_t batch_flags; uint32_t batch_flags_base; #define I915_EXEC_SECURE (1<<9) @@ -186,9 +195,15 @@ struct kgem { uint32_t has_no_reloc :1; uint32_t has_handle_lut :1; uint32_t has_wc_mmap :1; + uint32_t has_dirtyfb :1; + uint32_t can_fence :1; uint32_t can_blt_cpu :1; + uint32_t can_blt_y :1; uint32_t can_render_y :1; + uint32_t can_scanout_y :1; + + uint32_t needs_dirtyfb :1; uint16_t fence_max; uint16_t half_cpu_cache_pages; @@ -203,16 +218,9 @@ struct kgem { void (*retire)(struct kgem *kgem); void (*expire)(struct kgem *kgem); - void (*memcpy_to_tiled_x)(const void *src, void *dst, int bpp, - int32_t src_stride, int32_t dst_stride, - int16_t src_x, int16_t src_y, - int16_t dst_x, int16_t dst_y, - uint16_t width, uint16_t height); - void (*memcpy_from_tiled_x)(const void *src, void *dst, int bpp, - int32_t src_stride, int32_t dst_stride, - int16_t src_x, int16_t src_y, - int16_t dst_x, int16_t dst_y, - uint16_t width, uint16_t height); + memcpy_box_func memcpy_to_tiled_x; + memcpy_box_func memcpy_from_tiled_x; + memcpy_box_func memcpy_between_tiled_x; struct kgem_bo *batch_bo; @@ -230,7 +238,7 @@ struct kgem { #define KGEM_MAX_DEFERRED_VBO 16 -#define KGEM_BATCH_RESERVED 1 +#define KGEM_BATCH_RESERVED 8 /* LRI(SWCTRL) + END */ #define KGEM_RELOC_RESERVED (KGEM_MAX_DEFERRED_VBO) #define KGEM_EXEC_RESERVED (1+KGEM_MAX_DEFERRED_VBO) @@ -317,6 +325,7 @@ bool kgem_bo_convert_to_gpu(struct kgem *kgem, struct kgem_bo *bo, unsigned flags); +bool kgem_bo_is_fenced(struct kgem *kgem, struct kgem_bo *bo); uint32_t kgem_bo_get_binding(struct kgem_bo *bo, uint32_t format); void kgem_bo_set_binding(struct kgem_bo *bo, uint32_t format, uint16_t offset); @@ -342,6 +351,11 @@ static inline bool kgem_ring_is_idle(struct kgem *kgem, int ring) { ring = ring == KGEM_BLT; + if (kgem->needs_semaphore && + !list_is_empty(&kgem->requests[!ring]) && + !__kgem_ring_is_idle(kgem, !ring)) + return false; + if (list_is_empty(&kgem->requests[ring])) return true; @@ -390,6 +404,7 @@ void _kgem_bo_destroy(struct kgem *kgem, struct kgem_bo *bo); static inline void kgem_bo_destroy(struct kgem *kgem, struct kgem_bo *bo) { assert(bo->refcnt); + assert(bo->refcnt > bo->active_scanout); if (--bo->refcnt == 0) _kgem_bo_destroy(kgem, bo); } @@ -400,13 +415,13 @@ static inline void kgem_set_mode(struct kgem *kgem, enum kgem_mode mode, struct kgem_bo *bo) { - assert(!kgem->wedged); + warn_unless(!kgem->wedged); #if DEBUG_FLUSH_BATCH kgem_submit(kgem); #endif - if (kgem->nreloc && bo->exec == NULL && kgem_ring_is_idle(kgem, kgem->ring)) { + if (kgem->nreloc && bo->rq == NULL && kgem_ring_is_idle(kgem, kgem->ring)) { DBG(("%s: flushing before new bo\n", __FUNCTION__)); _kgem_submit(kgem); } @@ -422,7 +437,7 @@ static inline void _kgem_set_mode(struct kgem *kgem, enum kgem_mode mode) { assert(kgem->mode == KGEM_NONE); assert(kgem->nbatch == 0); - assert(!kgem->wedged); + warn_unless(!kgem->wedged); kgem->context_switch(kgem, mode); kgem->mode = mode; } @@ -566,7 +581,7 @@ static inline bool kgem_bo_can_blt(struct kgem *kgem, { assert(bo->refcnt); - if (bo->tiling == I915_TILING_Y) { + if (bo->tiling == I915_TILING_Y && !kgem->can_blt_y) { DBG(("%s: can not blt to handle=%d, tiling=Y\n", __FUNCTION__, bo->handle)); return false; @@ -581,6 +596,22 @@ static inline bool kgem_bo_can_blt(struct kgem *kgem, return kgem_bo_blt_pitch_is_ok(kgem, bo); } +void __kgem_bcs_set_tiling(struct kgem *kgem, + struct kgem_bo *src, + struct kgem_bo *dst); + +inline static void kgem_bcs_set_tiling(struct kgem *kgem, + struct kgem_bo *src, + struct kgem_bo *dst) +{ + assert(kgem->mode == KGEM_BLT); + + if (!kgem->can_blt_y) + return; + + __kgem_bcs_set_tiling(kgem, src, dst); +} + static inline bool kgem_bo_is_snoop(struct kgem_bo *bo) { assert(bo->refcnt); @@ -607,17 +638,24 @@ static inline void kgem_bo_mark_busy(struct kgem *kgem, struct kgem_bo *bo, int } } -inline static void __kgem_bo_clear_busy(struct kgem_bo *bo) +static inline void __kgem_bo_clear_dirty(struct kgem_bo *bo) { DBG(("%s: handle=%d\n", __FUNCTION__, bo->handle)); - bo->rq = NULL; - list_del(&bo->request); bo->domain = DOMAIN_NONE; bo->needs_flush = false; bo->gtt_dirty = false; } +inline static void __kgem_bo_clear_busy(struct kgem_bo *bo) +{ + DBG(("%s: handle=%d\n", __FUNCTION__, bo->handle)); + bo->rq = NULL; + list_del(&bo->request); + + __kgem_bo_clear_dirty(bo); +} + static inline bool kgem_bo_is_busy(struct kgem_bo *bo) { DBG(("%s: handle=%d, domain: %d exec? %d, rq? %d\n", __FUNCTION__, @@ -626,7 +664,7 @@ static inline bool kgem_bo_is_busy(struct kgem_bo *bo) return bo->rq; } -void __kgem_retire_requests_upto(struct kgem *kgem, struct kgem_bo *bo); +bool __kgem_retire_requests_upto(struct kgem *kgem, struct kgem_bo *bo); static inline bool __kgem_bo_is_busy(struct kgem *kgem, struct kgem_bo *bo) { DBG(("%s: handle=%d, domain: %d exec? %d, rq? %d\n", __FUNCTION__, @@ -636,14 +674,13 @@ static inline bool __kgem_bo_is_busy(struct kgem *kgem, struct kgem_bo *bo) if (bo->exec) return true; - if (bo->rq && !__kgem_busy(kgem, bo->handle)) { - __kgem_retire_requests_upto(kgem, bo); - assert(list_is_empty(&bo->request)); - assert(bo->rq == NULL); - assert(bo->domain == DOMAIN_NONE); - } + if (bo->rq == NULL) + return false; + + if (__kgem_busy(kgem, bo->handle)) + return true; - return kgem_bo_is_busy(bo); + return __kgem_retire_requests_upto(kgem, bo); } static inline bool kgem_bo_is_render(struct kgem_bo *bo) @@ -651,7 +688,15 @@ static inline bool kgem_bo_is_render(struct kgem_bo *bo) DBG(("%s: handle=%d, rq? %d [%d]\n", __FUNCTION__, bo->handle, bo->rq != NULL, (int)RQ_RING(bo->rq))); assert(bo->refcnt); - return bo->rq && RQ_RING(bo->rq) == I915_EXEC_RENDER; + return bo->rq && RQ_RING(bo->rq) != KGEM_BLT; +} + +static inline bool kgem_bo_is_blt(struct kgem_bo *bo) +{ + DBG(("%s: handle=%d, rq? %d\n", __FUNCTION__, + bo->handle, bo->rq != NULL, (int)RQ_RING(bo->rq))); + assert(bo->refcnt); + return RQ_RING(bo->rq) == KGEM_BLT; } static inline void kgem_bo_mark_unreusable(struct kgem_bo *bo) @@ -852,6 +897,6 @@ memcpy_from_tiled_x(struct kgem *kgem, width, height); } -void choose_memcpy_tiled_x(struct kgem *kgem, int swizzling); +void choose_memcpy_tiled_x(struct kgem *kgem, int swizzling, unsigned cpu); #endif /* KGEM_H */ diff --git a/src/sna/kgem_debug_gen4.c b/src/sna/kgem_debug_gen4.c index 9b80dc88..8e6e47b6 100644 --- a/src/sna/kgem_debug_gen4.c +++ b/src/sna/kgem_debug_gen4.c @@ -598,7 +598,7 @@ int kgem_gen4_decode_3d(struct kgem *kgem, uint32_t offset) assert(len == 7); kgem_debug_print(data, offset, 0, "3DSTATE_DEPTH_BUFFER\n"); - kgem_debug_print(data, offset, 1, "%s, %s, pitch = %d bytes, %stiled, HiZ %d, Seperate Stencil %d\n", + kgem_debug_print(data, offset, 1, "%s, %s, pitch = %d bytes, %stiled, HiZ %d, Separate Stencil %d\n", get_965_surfacetype(data[1] >> 29), get_965_depthformat((data[1] >> 18) & 0x7), (data[1] & 0x0001ffff) + 1, diff --git a/src/sna/kgem_debug_gen5.c b/src/sna/kgem_debug_gen5.c index 8b55dd91..f1b1275f 100644 --- a/src/sna/kgem_debug_gen5.c +++ b/src/sna/kgem_debug_gen5.c @@ -573,7 +573,7 @@ int kgem_gen5_decode_3d(struct kgem *kgem, uint32_t offset) assert(len == 7); kgem_debug_print(data, offset, 0, "3DSTATE_DEPTH_BUFFER\n"); - kgem_debug_print(data, offset, 1, "%s, %s, pitch = %d bytes, %stiled, HiZ %d, Seperate Stencil %d\n", + kgem_debug_print(data, offset, 1, "%s, %s, pitch = %d bytes, %stiled, HiZ %d, Separate Stencil %d\n", get_965_surfacetype(data[1] >> 29), get_965_depthformat((data[1] >> 18) & 0x7), (data[1] & 0x0001ffff) + 1, diff --git a/src/sna/kgem_debug_gen6.c b/src/sna/kgem_debug_gen6.c index 7ef55d38..579c5d54 100644 --- a/src/sna/kgem_debug_gen6.c +++ b/src/sna/kgem_debug_gen6.c @@ -985,7 +985,7 @@ int kgem_gen6_decode_3d(struct kgem *kgem, uint32_t offset) assert(len == 7); kgem_debug_print(data, offset, 0, "3DSTATE_DEPTH_BUFFER\n"); - kgem_debug_print(data, offset, 1, "%s, %s, pitch = %d bytes, %stiled, HiZ %d, Seperate Stencil %d\n", + kgem_debug_print(data, offset, 1, "%s, %s, pitch = %d bytes, %stiled, HiZ %d, Separate Stencil %d\n", get_965_surfacetype(data[1] >> 29), get_965_depthformat((data[1] >> 18) & 0x7), (data[1] & 0x0001ffff) + 1, diff --git a/src/sna/sna.h b/src/sna/sna.h index 18425e30..7861110a 100644 --- a/src/sna/sna.h +++ b/src/sna/sna.h @@ -154,6 +154,8 @@ struct sna_pixmap { #define MAPPED_GTT 1 #define MAPPED_CPU 2 uint8_t flush :2; +#define FLUSH_READ 1 +#define FLUSH_WRITE 2 uint8_t shm :1; uint8_t clear :1; uint8_t header :1; @@ -179,18 +181,31 @@ static inline WindowPtr get_root_window(ScreenPtr screen) #endif } +#if !NDEBUG +static PixmapPtr check_pixmap(PixmapPtr pixmap) +{ + if (pixmap != NULL) { + assert(pixmap->refcnt >= 1); + assert(pixmap->devKind != 0xdeadbeef); + } + return pixmap; +} +#else +#define check_pixmap(p) p +#endif + static inline PixmapPtr get_window_pixmap(WindowPtr window) { assert(window); assert(window->drawable.type != DRAWABLE_PIXMAP); - return fbGetWindowPixmap(window); + return check_pixmap(fbGetWindowPixmap(window)); } static inline PixmapPtr get_drawable_pixmap(DrawablePtr drawable) { assert(drawable); if (drawable->type == DRAWABLE_PIXMAP) - return (PixmapPtr)drawable; + return check_pixmap((PixmapPtr)drawable); else return get_window_pixmap((WindowPtr)drawable); } @@ -244,11 +259,12 @@ struct sna { #define SNA_NO_VSYNC 0x40 #define SNA_TRIPLE_BUFFER 0x80 #define SNA_TEAR_FREE 0x100 -#define SNA_FORCE_SHADOW 0x200 -#define SNA_FLUSH_GTT 0x400 +#define SNA_WANT_TEAR_FREE 0x200 +#define SNA_FORCE_SHADOW 0x400 +#define SNA_FLUSH_GTT 0x800 #define SNA_PERFORMANCE 0x1000 #define SNA_POWERSAVE 0x2000 -#define SNA_REMOVE_OUTPUTS 0x4000 +#define SNA_NO_DPMS 0x4000 #define SNA_HAS_FLIP 0x10000 #define SNA_HAS_ASYNC_FLIP 0x20000 #define SNA_LINEAR_FB 0x40000 @@ -265,7 +281,13 @@ struct sna { #define AVX 0x80 #define AVX2 0x100 - unsigned watch_flush; + bool ignore_copy_area : 1; + + unsigned watch_shm_flush; + unsigned watch_dri_flush; + unsigned damage_event; + bool needs_shm_flush; + bool needs_dri_flush; struct timeval timer_tv; uint32_t timer_expire[NUM_TIMERS]; @@ -284,9 +306,17 @@ struct sna { struct kgem_bo *shadow; unsigned front_active; unsigned shadow_active; + unsigned rr_active; unsigned flip_active; + unsigned hidden; + bool shadow_enabled; + bool shadow_wait; bool dirty; + struct drm_event_vblank *shadow_events; + int shadow_nevent; + int shadow_size; + int max_crtc_width, max_crtc_height; RegionRec shadow_region; RegionRec shadow_cancel; @@ -318,7 +348,8 @@ struct sna { uint32_t fg, bg; int size; - int active; + bool disable; + bool active; int last_x; int last_y; @@ -331,8 +362,9 @@ struct sna { } cursor; struct sna_dri2 { - bool available; - bool open; + bool available : 1; + bool enable : 1; + bool open : 1; #if HAVE_DRI2 void *flip_pending; @@ -341,8 +373,11 @@ struct sna { } dri2; struct sna_dri3 { - bool available; - bool open; + bool available :1; + bool override : 1; + bool enable : 1; + bool open :1; + #if HAVE_DRI3 SyncScreenCreateFenceFunc create_fence; struct list pixmaps; @@ -353,6 +388,9 @@ struct sna { bool available; bool open; #if HAVE_PRESENT + struct list vblank_queue; + uint64_t unflip; + void *freed_info; #endif } present; @@ -364,8 +402,10 @@ struct sna { EntityInfoPtr pEnt; const struct intel_device_info *info; +#if !HAVE_NOTIFY_FD ScreenBlockHandlerProcPtr BlockHandler; ScreenWakeupHandlerProcPtr WakeupHandler; +#endif CloseScreenProcPtr CloseScreen; PicturePtr clear; @@ -383,6 +423,7 @@ struct sna { struct gen6_render_state gen6; struct gen7_render_state gen7; struct gen8_render_state gen8; + struct gen9_render_state gen9; } render_state; /* Broken-out options. */ @@ -420,7 +461,7 @@ bool sna_mode_pre_init(ScrnInfoPtr scrn, struct sna *sna); bool sna_mode_fake_init(struct sna *sna, int num_fake); bool sna_mode_wants_tear_free(struct sna *sna); void sna_mode_adjust_frame(struct sna *sna, int x, int y); -extern void sna_mode_discover(struct sna *sna); +extern void sna_mode_discover(struct sna *sna, bool tell); extern void sna_mode_check(struct sna *sna); extern bool sna_mode_disable(struct sna *sna); extern void sna_mode_enable(struct sna *sna); @@ -434,6 +475,7 @@ extern void sna_shadow_unset_crtc(struct sna *sna, xf86CrtcPtr crtc); extern bool sna_pixmap_discard_shadow_damage(struct sna_pixmap *priv, const RegionRec *region); extern void sna_mode_set_primary(struct sna *sna); +extern bool sna_mode_find_hotplug_connector(struct sna *sna, unsigned id); extern void sna_mode_close(struct sna *sna); extern void sna_mode_fini(struct sna *sna); @@ -444,6 +486,7 @@ extern bool sna_cursors_init(ScreenPtr screen, struct sna *sna); typedef void (*sna_flip_handler_t)(struct drm_event_vblank *e, void *data); +extern bool sna_needs_page_flip(struct sna *sna, struct kgem_bo *bo); extern int sna_page_flip(struct sna *sna, struct kgem_bo *bo, sna_flip_handler_t handler, @@ -461,6 +504,11 @@ to_sna_from_screen(ScreenPtr screen) return to_sna(xf86ScreenToScrn(screen)); } +pure static inline ScreenPtr to_screen_from_sna(struct sna *sna) +{ + return xf86ScrnToScreen(sna->scrn); +} + pure static inline struct sna * to_sna_from_pixmap(PixmapPtr pixmap) { @@ -498,12 +546,11 @@ to_sna_from_kgem(struct kgem *kgem) extern xf86CrtcPtr sna_covering_crtc(struct sna *sna, const BoxRec *box, xf86CrtcPtr desired); +extern xf86CrtcPtr sna_primary_crtc(struct sna *sna); extern bool sna_wait_for_scanline(struct sna *sna, PixmapPtr pixmap, xf86CrtcPtr crtc, const BoxRec *clip); -xf86CrtcPtr sna_mode_first_crtc(struct sna *sna); - const struct ust_msc { uint64_t msc; int tv_sec; @@ -536,6 +583,11 @@ static inline uint64_t ust64(int tv_sec, int tv_usec) return (uint64_t)tv_sec * 1000000 + tv_usec; } +static inline uint64_t swap_ust(const struct ust_msc *swap) +{ + return ust64(swap->tv_sec, swap->tv_usec); +} + #if HAVE_DRI2 bool sna_dri2_open(struct sna *sna, ScreenPtr pScreen); void sna_dri2_page_flip_handler(struct sna *sna, struct drm_event_vblank *event); @@ -567,20 +619,59 @@ bool sna_present_open(struct sna *sna, ScreenPtr pScreen); void sna_present_update(struct sna *sna); void sna_present_close(struct sna *sna, ScreenPtr pScreen); void sna_present_vblank_handler(struct drm_event_vblank *event); +void sna_present_cancel_flip(struct sna *sna); #else static inline bool sna_present_open(struct sna *sna, ScreenPtr pScreen) { return false; } static inline void sna_present_update(struct sna *sna) { } static inline void sna_present_close(struct sna *sna, ScreenPtr pScreen) { } static inline void sna_present_vblank_handler(struct drm_event_vblank *event) { } +static inline void sna_present_cancel_flip(struct sna *sna) { } #endif -extern bool sna_crtc_set_sprite_rotation(xf86CrtcPtr crtc, uint32_t rotation); -extern int sna_crtc_to_pipe(xf86CrtcPtr crtc); -extern uint32_t sna_crtc_to_sprite(xf86CrtcPtr crtc); -extern uint32_t sna_crtc_id(xf86CrtcPtr crtc); -extern bool sna_crtc_is_on(xf86CrtcPtr crtc); +extern unsigned sna_crtc_count_sprites(xf86CrtcPtr crtc); +extern bool sna_crtc_set_sprite_rotation(xf86CrtcPtr crtc, unsigned idx, uint32_t rotation); +extern uint32_t sna_crtc_to_sprite(xf86CrtcPtr crtc, unsigned idx); extern bool sna_crtc_is_transformed(xf86CrtcPtr crtc); +#define CRTC_VBLANK 0x3 +#define CRTC_ON 0x80000000 + +uint32_t sna_crtc_id(xf86CrtcPtr crtc); + +static inline unsigned long *sna_crtc_flags(xf86CrtcPtr crtc) +{ + unsigned long *flags = crtc->driver_private; + assert(flags); + return flags; +} + +static inline unsigned sna_crtc_pipe(xf86CrtcPtr crtc) +{ + return *sna_crtc_flags(crtc) >> 8 & 0xff; +} + +static inline bool sna_crtc_is_on(xf86CrtcPtr crtc) +{ + return *sna_crtc_flags(crtc) & CRTC_ON; +} + +static inline void sna_crtc_set_vblank(xf86CrtcPtr crtc) +{ + assert((*sna_crtc_flags(crtc) & CRTC_VBLANK) < 3); + ++*sna_crtc_flags(crtc); +} + +static inline void sna_crtc_clear_vblank(xf86CrtcPtr crtc) +{ + assert(*sna_crtc_flags(crtc) & CRTC_VBLANK); + --*sna_crtc_flags(crtc); +} + +static inline bool sna_crtc_has_vblank(xf86CrtcPtr crtc) +{ + return *sna_crtc_flags(crtc) & CRTC_VBLANK; +} + CARD32 sna_format_for_depth(int depth); CARD32 sna_render_format_for_depth(int depth); @@ -998,15 +1089,14 @@ static inline uint32_t pixmap_size(PixmapPtr pixmap) bool sna_accel_init(ScreenPtr sreen, struct sna *sna); void sna_accel_create(struct sna *sna); -void sna_accel_block_handler(struct sna *sna, struct timeval **tv); -void sna_accel_wakeup_handler(struct sna *sna); -void sna_accel_watch_flush(struct sna *sna, int enable); +void sna_accel_block(struct sna *sna, struct timeval **tv); void sna_accel_flush(struct sna *sna); void sna_accel_enter(struct sna *sna); void sna_accel_leave(struct sna *sna); void sna_accel_close(struct sna *sna); void sna_accel_free(struct sna *sna); +void sna_watch_flush(struct sna *sna, int enable); void sna_copy_fbcon(struct sna *sna); bool sna_composite_create(struct sna *sna); @@ -1127,6 +1217,16 @@ memcpy_blt(const void *src, void *dst, int bpp, uint16_t width, uint16_t height); void +affine_blt(const void *src, void *dst, int bpp, + int16_t src_x, int16_t src_y, + int16_t src_width, int16_t src_height, + int32_t src_stride, + int16_t dst_x, int16_t dst_y, + uint16_t dst_width, uint16_t dst_height, + int32_t dst_stride, + const struct pixman_f_transform *t); + +void memmove_box(const void *src, void *dst, int bpp, int32_t stride, const BoxRec *box, @@ -1182,6 +1282,31 @@ box_intersect(BoxPtr a, const BoxRec *b) return true; } +const BoxRec * +__find_clip_box_for_y(const BoxRec *begin, const BoxRec *end, int16_t y); +inline static const BoxRec * +find_clip_box_for_y(const BoxRec *begin, const BoxRec *end, int16_t y) +{ + /* Special case for incremental trapezoid clipping */ + if (begin == end) + return end; + + /* Quick test if scanline is within range of clip boxes */ + if (begin->y2 > y) { + assert(end == begin + 1 || + __find_clip_box_for_y(begin, end, y) == begin); + return begin; + } + if (y >= end[-1].y2) { + assert(end == begin + 1 || + __find_clip_box_for_y(begin, end, y) == end); + return end; + } + + /* Otherwise bisect to find the first box crossing y */ + return __find_clip_box_for_y(begin, end, y); +} + unsigned sna_cpu_detect(void); char *sna_cpu_features_to_string(unsigned features, char *line); @@ -1237,4 +1362,17 @@ static inline void sigtrap_put(void) extern int getline(char **line, size_t *len, FILE *file); #endif +static inline void add_shm_flush(struct sna *sna, struct sna_pixmap *priv) +{ + if (!priv->shm) + return; + + DBG(("%s: marking handle=%d for SHM flush\n", + __FUNCTION__, priv->cpu_bo->handle)); + + assert(!priv->flush); + sna_add_flush_pixmap(sna, priv, priv->cpu_bo); + sna->needs_shm_flush = true; +} + #endif /* _SNA_H */ diff --git a/src/sna/sna_accel.c b/src/sna/sna_accel.c index baf5f609..25a075cf 100644 --- a/src/sna/sna_accel.c +++ b/src/sna/sna_accel.c @@ -50,8 +50,11 @@ #endif #include +#include + #include #include +#include #include #ifdef HAVE_VALGRIND @@ -66,7 +69,8 @@ #define FORCE_FLUSH 0 #define FORCE_FULL_SYNC 0 /* https://bugs.freedesktop.org/show_bug.cgi?id=61628 */ -#define DEFAULT_TILING I915_TILING_X +#define DEFAULT_PIXMAP_TILING I915_TILING_X +#define DEFAULT_SCANOUT_TILING I915_TILING_X #define USE_INPLACE 1 #define USE_SPANS 0 /* -1 force CPU, 1 force GPU */ @@ -115,6 +119,11 @@ #define RECTILINEAR 0x4 #define OVERWRITES 0x8 +#if XFONT2_CLIENT_FUNCS_VERSION >= 1 +#define AllocateFontPrivateIndex() xfont2_allocate_font_private_index() +#define FontSetPrivate(font, idx, data) xfont2_font_set_private(font, idx, data) +#endif + #if 0 static void __sna_fallback_flush(DrawablePtr d) { @@ -213,6 +222,7 @@ static GCOps sna_gc_ops__tmp; static const GCFuncs sna_gc_funcs; static const GCFuncs sna_gc_funcs__cpu; +static void sna_shm_watch_flush(struct sna *sna, int enable); static void sna_poly_fill_rect__gpu(DrawablePtr draw, GCPtr gc, int n, xRectangle *rect); @@ -527,10 +537,10 @@ sna_pixmap_alloc_cpu(struct sna *sna, DBG(("%s: allocating CPU buffer (%dx%d)\n", __FUNCTION__, pixmap->drawable.width, pixmap->drawable.height)); - hint = 0; - if ((flags & MOVE_ASYNC_HINT) == 0 && - ((flags & MOVE_READ) == 0 || (priv->gpu_damage && !priv->clear && !sna->kgem.has_llc))) - hint = CREATE_CPU_MAP | CREATE_INACTIVE | CREATE_NO_THROTTLE; + hint = CREATE_CPU_MAP | CREATE_INACTIVE | CREATE_NO_THROTTLE; + if ((flags & MOVE_ASYNC_HINT) || + (priv->gpu_damage && !priv->clear && kgem_bo_is_busy(priv->gpu_bo) && sna->kgem.can_blt_cpu)) + hint = 0; priv->cpu_bo = kgem_create_cpu_2d(&sna->kgem, pixmap->drawable.width, @@ -580,7 +590,7 @@ static void __sna_pixmap_free_cpu(struct sna *sna, struct sna_pixmap *priv) if (priv->cpu_bo->flush) { assert(!priv->cpu_bo->reusable); kgem_bo_sync__cpu(&sna->kgem, priv->cpu_bo); - sna_accel_watch_flush(sna, -1); + sna_shm_watch_flush(sna, -1); } kgem_bo_destroy(&sna->kgem, priv->cpu_bo); } else if (!IS_STATIC_PTR(priv->ptr)) @@ -612,9 +622,9 @@ static bool sna_pixmap_free_cpu(struct sna *sna, struct sna_pixmap *priv, bool a static inline uint32_t default_tiling(struct sna *sna, PixmapPtr pixmap) { -#if DEFAULT_TILING == I915_TILING_NONE +#if DEFAULT_PIXMAP_TILING == I915_TILING_NONE return I915_TILING_NONE; -#elif DEFAULT_TILING == I915_TILING_X +#elif DEFAULT_PIXMAP_TILING == I915_TILING_X return I915_TILING_X; #else /* Try to avoid hitting the Y-tiling GTT mapping bug on 855GM */ @@ -630,15 +640,6 @@ static inline uint32_t default_tiling(struct sna *sna, PixmapPtr pixmap) pixmap->drawable.height > sna->render.max_3d_size)) return I915_TILING_X; - if (sna_damage_is_all(&sna_pixmap(pixmap)->cpu_damage, - pixmap->drawable.width, - pixmap->drawable.height)) { - DBG(("%s: entire source is damaged, using Y-tiling\n", - __FUNCTION__)); - sna_damage_destroy(&sna_pixmap(priv)->gpu_damage); - return I915_TILING_Y; - } - return I915_TILING_Y; #endif } @@ -666,6 +667,7 @@ struct kgem_bo *sna_pixmap_change_tiling(PixmapPtr pixmap, uint32_t tiling) __FUNCTION__, priv->gpu_bo->tiling, tiling, pixmap->drawable.width, pixmap->drawable.height)); assert(priv->gpu_damage == NULL || priv->gpu_bo); + assert(priv->gpu_bo->tiling != tiling); if (priv->pinned) { DBG(("%s: can't convert pinned bo\n", __FUNCTION__)); @@ -690,6 +692,12 @@ struct kgem_bo *sna_pixmap_change_tiling(PixmapPtr pixmap, uint32_t tiling) return NULL; } + if (bo->tiling == priv->gpu_bo->tiling) { + DBG(("%s: tiling request failed\n", __FUNCTION__)); + kgem_bo_destroy(&sna->kgem, bo); + return NULL; + } + box.x1 = box.y1 = 0; box.x2 = pixmap->drawable.width; box.y2 = pixmap->drawable.height; @@ -824,8 +832,8 @@ create_pixmap(struct sna *sna, ScreenPtr screen, datasize += adjust; } - DBG(("%s: allocating pixmap %dx%d, depth=%d, size=%ld\n", - __FUNCTION__, width, height, depth, (long)datasize)); + DBG(("%s: allocating pixmap %dx%d, depth=%d/%d, size=%ld\n", + __FUNCTION__, width, height, depth, bpp, (long)datasize)); pixmap = AllocatePixmap(screen, datasize); if (!pixmap) return NullPixmap; @@ -878,7 +886,11 @@ __pop_freed_pixmap(struct sna *sna) pixmap = sna->freed_pixmap; sna->freed_pixmap = pixmap->devPrivate.ptr; + DBG(("%s: reusing freed pixmap=%ld header\n", + __FUNCTION__, pixmap->drawable.serialNumber)); + assert(pixmap->refcnt == 0); + assert(pixmap->devKind = 0xdeadbeef); assert(sna_pixmap(pixmap)); assert(sna_pixmap(pixmap)->header); @@ -990,7 +1002,7 @@ fallback: } priv->cpu_bo->pitch = pitch; kgem_bo_mark_unreusable(priv->cpu_bo); - sna_accel_watch_flush(sna, 1); + sna_shm_watch_flush(sna, 1); #ifdef DEBUG_MEMORY sna->debug_memory.cpu_bo_allocs++; sna->debug_memory.cpu_bo_bytes += kgem_bo_size(priv->cpu_bo); @@ -1081,6 +1093,18 @@ sna_pixmap_create_scratch(ScreenPtr screen, return pixmap; } +static unsigned small_copy(const RegionRec *region) +{ + if ((region->extents.x2 - region->extents.x1)*(region->extents.y2 - region->extents.y1) < 1024) { + DBG(("%s: region:%dx%d\n", __FUNCTION__, + (region->extents.x2 - region->extents.x1), + (region->extents.y2 - region->extents.y1))); + return COPY_SMALL; + } + + return 0; +} + #ifdef CREATE_PIXMAP_USAGE_SHARED static Bool sna_share_pixmap_backing(PixmapPtr pixmap, ScreenPtr slave, void **fd_handle) @@ -1124,7 +1148,7 @@ sna_share_pixmap_backing(PixmapPtr pixmap, ScreenPtr slave, void **fd_handle) pixmap->drawable.height, pixmap->drawable.bitsPerPixel, I915_TILING_NONE, - CREATE_GTT_MAP | CREATE_PRIME | CREATE_EXACT); + CREATE_GTT_MAP | CREATE_SCANOUT | CREATE_PRIME | CREATE_EXACT); if (bo == NULL) { DBG(("%s: allocation failed\n", __FUNCTION__)); return FALSE; @@ -1243,7 +1267,7 @@ sna_create_pixmap_shared(struct sna *sna, ScreenPtr screen, width, height, pixmap->drawable.bitsPerPixel, I915_TILING_NONE, - CREATE_GTT_MAP | CREATE_PRIME | CREATE_EXACT); + CREATE_GTT_MAP | CREATE_SCANOUT | CREATE_PRIME | CREATE_EXACT); if (priv->gpu_bo == NULL) { free(priv); FreePixmap(pixmap); @@ -1311,7 +1335,7 @@ static PixmapPtr sna_create_pixmap(ScreenPtr screen, if (unlikely((sna->render.prefer_gpu & PREFER_GPU_RENDER) == 0)) flags &= ~KGEM_CAN_CREATE_GPU; - if (wedged(sna)) + if (wedged(sna) && usage != SNA_CREATE_FB) flags &= ~KGEM_CAN_CREATE_GTT; DBG(("%s: usage=%d, flags=%x\n", __FUNCTION__, usage, flags)); @@ -1417,10 +1441,13 @@ static void __sna_free_pixmap(struct sna *sna, __sna_pixmap_free_cpu(sna, priv); if (priv->flush) - sna_accel_watch_flush(sna, -1); + sna_watch_flush(sna, -1); +#if !NDEBUG + pixmap->devKind = 0xdeadbeef; +#endif if (priv->header) { - assert(pixmap->drawable.pScreen == sna->scrn->pScreen); + assert(pixmap->drawable.pScreen == to_screen_from_sna(sna)); assert(!priv->shm); pixmap->devPrivate.ptr = sna->freed_pixmap; sna->freed_pixmap = pixmap; @@ -1485,7 +1512,7 @@ static Bool sna_destroy_pixmap(PixmapPtr pixmap) if (priv->shm && kgem_bo_is_busy(priv->cpu_bo)) { DBG(("%s: deferring release of active SHM pixmap=%ld\n", __FUNCTION__, pixmap->drawable.serialNumber)); - sna_add_flush_pixmap(sna, priv, priv->cpu_bo); + add_shm_flush(sna, priv); kgem_bo_submit(&sna->kgem, priv->cpu_bo); /* XXX ShmDetach */ } else __sna_free_pixmap(sna, pixmap, priv); @@ -1529,7 +1556,7 @@ static inline bool has_coherent_ptr(struct sna *sna, struct sna_pixmap *priv, un if (!priv->cpu_bo) return true; - assert(!priv->cpu_bo->needs_flush); + assert(!priv->cpu_bo->needs_flush || (flags & MOVE_WRITE) == 0); assert(priv->pixmap->devKind == priv->cpu_bo->pitch); return priv->pixmap->devPrivate.ptr == MAP(priv->cpu_bo->map__cpu); } @@ -1557,6 +1584,11 @@ static inline bool has_coherent_ptr(struct sna *sna, struct sna_pixmap *priv, un return true; } + if (priv->pixmap->devPrivate.ptr == MAP(priv->gpu_bo->map__wc)) { + assert(priv->mapped == MAPPED_GTT); + return true; + } + return false; } @@ -1577,6 +1609,16 @@ static inline bool pixmap_inplace(struct sna *sna, return false; if (priv->gpu_bo && kgem_bo_is_busy(priv->gpu_bo)) { + if (priv->clear) { + DBG(("%s: no, clear GPU bo is busy\n", __FUNCTION__)); + return false; + } + + if (flags & MOVE_ASYNC_HINT) { + DBG(("%s: no, async hint and GPU bo is busy\n", __FUNCTION__)); + return false; + } + if ((flags & (MOVE_WRITE | MOVE_READ)) == (MOVE_WRITE | MOVE_READ)) { DBG(("%s: no, GPU bo is busy\n", __FUNCTION__)); return false; @@ -1624,7 +1666,7 @@ static bool sna_pixmap_alloc_gpu(struct sna *sna, if (pixmap->usage_hint == SNA_CREATE_FB && (sna->flags & SNA_LINEAR_FB) == 0) { flags |= CREATE_SCANOUT; tiling = kgem_choose_tiling(&sna->kgem, - -I915_TILING_X, + -DEFAULT_SCANOUT_TILING, pixmap->drawable.width, pixmap->drawable.height, pixmap->drawable.bitsPerPixel); @@ -1861,7 +1903,9 @@ sna_pixmap_undo_cow(struct sna *sna, struct sna_pixmap *priv, unsigned flags) assert(priv->gpu_bo == cow->bo); assert(cow->refcnt); - if (flags && (flags & MOVE_WRITE) == 0 && IS_COW_OWNER(priv->cow)) + if (flags && /* flags == 0 => force decouple */ + (flags & MOVE_WRITE) == 0 && + (((flags & __MOVE_FORCE) == 0) || IS_COW_OWNER(priv->cow))) return true; if (!IS_COW_OWNER(priv->cow)) @@ -1933,7 +1977,7 @@ sna_pixmap_undo_cow(struct sna *sna, struct sna_pixmap *priv, unsigned flags) box.y2 = pixmap->drawable.height; if (flags & __MOVE_PRIME) { - create = CREATE_GTT_MAP | CREATE_PRIME | CREATE_EXACT; + create = CREATE_GTT_MAP | CREATE_SCANOUT | CREATE_PRIME | CREATE_EXACT; tiling = I915_TILING_NONE; } else { create = 0; @@ -2021,6 +2065,10 @@ sna_pixmap_make_cow(struct sna *sna, cow->bo->handle)); src_priv->cow = MAKE_COW_OWNER(cow); + if (src_priv->flush & FLUSH_WRITE) { + assert(!src_priv->shm); + sna_add_flush_pixmap(sna, src_priv, src_priv->gpu_bo); + } } if (cow == COW(dst_priv->cow)) { @@ -2267,6 +2315,7 @@ skip_inplace_map: (flags & MOVE_WRITE ? (void *)priv->gpu_bo : (void *)priv->gpu_damage) && priv->cpu_damage == NULL && priv->gpu_bo->tiling == I915_TILING_NONE && (flags & MOVE_READ || kgem_bo_can_map__cpu(&sna->kgem, priv->gpu_bo, flags & MOVE_WRITE)) && + (!priv->clear || !kgem_bo_is_busy(priv->gpu_bo)) && ((flags & (MOVE_WRITE | MOVE_ASYNC_HINT)) == 0 || (!priv->cow && !priv->move_to_gpu && !__kgem_bo_is_busy(&sna->kgem, priv->gpu_bo)))) { void *ptr; @@ -2330,7 +2379,9 @@ skip_inplace_map: pixmap->devKind, pixmap->devKind * pixmap->drawable.height)); if (priv->cpu_bo) { + kgem_bo_undo(&sna->kgem, priv->cpu_bo); if ((flags & MOVE_ASYNC_HINT || priv->cpu_bo->exec) && + sna->kgem.can_blt_cpu && sna->render.fill_one(sna, pixmap, priv->cpu_bo, priv->clear_color, 0, 0, @@ -2344,21 +2395,26 @@ skip_inplace_map: assert(pixmap->devPrivate.ptr == MAP(priv->cpu_bo->map__cpu)); } - assert(pixmap->devKind); - if (priv->clear_color == 0 || - pixmap->drawable.bitsPerPixel == 8 || - priv->clear_color == (1 << pixmap->drawable.depth) - 1) { - memset(pixmap->devPrivate.ptr, priv->clear_color, - (size_t)pixmap->devKind * pixmap->drawable.height); - } else { - pixman_fill(pixmap->devPrivate.ptr, - pixmap->devKind/sizeof(uint32_t), - pixmap->drawable.bitsPerPixel, - 0, 0, - pixmap->drawable.width, - pixmap->drawable.height, - priv->clear_color); - } + if (sigtrap_get() == 0) { + assert(pixmap->devKind); + sigtrap_assert_active(); + if (priv->clear_color == 0 || + pixmap->drawable.bitsPerPixel == 8 || + priv->clear_color == (1 << pixmap->drawable.depth) - 1) { + memset(pixmap->devPrivate.ptr, priv->clear_color, + (size_t)pixmap->devKind * pixmap->drawable.height); + } else { + pixman_fill(pixmap->devPrivate.ptr, + pixmap->devKind/sizeof(uint32_t), + pixmap->drawable.bitsPerPixel, + 0, 0, + pixmap->drawable.width, + pixmap->drawable.height, + priv->clear_color); + } + sigtrap_put(); + } else + return false; clear_done: sna_damage_all(&priv->cpu_damage, pixmap); @@ -2414,6 +2470,10 @@ done: DBG(("%s: discarding idle GPU bo\n", __FUNCTION__)); sna_pixmap_free_gpu(sna, priv); } + if (priv->flush) { + assert(!priv->shm); + sna_add_flush_pixmap(sna, priv, priv->gpu_bo); + } priv->source_count = SOURCE_BIAS; } @@ -2531,6 +2591,9 @@ static bool cpu_clear_boxes(struct sna *sna, { struct sna_fill_op fill; + if (!sna->kgem.can_blt_cpu) + return false; + if (!sna_fill_init_blt(&fill, sna, pixmap, priv->cpu_bo, GXcopy, priv->clear_color, @@ -2659,6 +2722,10 @@ sna_drawable_move_region_to_cpu(DrawablePtr drawable, } } sna_damage_add_to_pixmap(&priv->cpu_damage, region, pixmap); + if (priv->flush) { + assert(!priv->shm); + sna_add_flush_pixmap(sna, priv, priv->gpu_bo); + } if (dx | dy) RegionTranslate(region, -dx, -dy); @@ -2904,17 +2971,22 @@ move_to_cpu: assert(pixmap->devPrivate.ptr == MAP(priv->cpu_bo->map__cpu)); } - assert(pixmap->devKind); - do { - pixman_fill(pixmap->devPrivate.ptr, - pixmap->devKind/sizeof(uint32_t), - pixmap->drawable.bitsPerPixel, - box->x1, box->y1, - box->x2 - box->x1, - box->y2 - box->y1, - priv->clear_color); - box++; - } while (--n); + if (sigtrap_get() == 0) { + assert(pixmap->devKind); + sigtrap_assert_active(); + do { + pixman_fill(pixmap->devPrivate.ptr, + pixmap->devKind/sizeof(uint32_t), + pixmap->drawable.bitsPerPixel, + box->x1, box->y1, + box->x2 - box->x1, + box->y2 - box->y1, + priv->clear_color); + box++; + } while (--n); + sigtrap_put(); + } else + return false; clear_done: if (flags & MOVE_WRITE || @@ -3209,13 +3281,14 @@ __sna_pixmap_for_gpu(struct sna *sna, PixmapPtr pixmap, unsigned flags) { struct sna_pixmap *priv; + assert(flags & (MOVE_READ | MOVE_WRITE | __MOVE_FORCE)); if ((flags & __MOVE_FORCE) == 0 && wedged(sna)) return NULL; priv = sna_pixmap(pixmap); if (priv == NULL) { DBG(("%s: not attached\n", __FUNCTION__)); - if ((flags & __MOVE_DRI) == 0) + if ((flags & (__MOVE_DRI | __MOVE_SCANOUT)) == 0) return NULL; if (pixmap->usage_hint == -1) { @@ -3238,6 +3311,44 @@ __sna_pixmap_for_gpu(struct sna *sna, PixmapPtr pixmap, unsigned flags) return priv; } +inline static void sna_pixmap_unclean(struct sna *sna, + struct sna_pixmap *priv, + unsigned flags) +{ + struct drm_i915_gem_busy busy; + + assert(DAMAGE_IS_ALL(priv->gpu_damage)); + assert(priv->gpu_bo); + assert(priv->gpu_bo->proxy == NULL); + assert_pixmap_map(priv->pixmap, priv); + + sna_damage_destroy(&priv->cpu_damage); + list_del(&priv->flush_list); + + if (flags & (__MOVE_DRI | __MOVE_SCANOUT)) + return; + + if (!priv->flush || priv->gpu_bo->exec) + return; + + busy.handle = priv->gpu_bo->handle; + busy.busy = 0; + ioctl(sna->kgem.fd, DRM_IOCTL_I915_GEM_BUSY, &busy); + + DBG(("%s(pixmap=%ld): cleaning foreign bo handle=%u, busy=%x [ring=%d]\n", + __FUNCTION__, + priv->pixmap->drawable.serialNumber, + busy.handle, busy.busy, !!(busy.busy & (0xfffe << 16)))); + + if (busy.busy) { + unsigned mode = KGEM_RENDER; + if (busy.busy & (0xfffe << 16)) + mode = KGEM_BLT; + kgem_bo_mark_busy(&sna->kgem, priv->gpu_bo, mode); + } else + __kgem_bo_clear_busy(priv->gpu_bo); +} + struct sna_pixmap * sna_pixmap_move_area_to_gpu(PixmapPtr pixmap, const BoxRec *box, unsigned int flags) { @@ -3287,12 +3398,14 @@ sna_pixmap_move_area_to_gpu(PixmapPtr pixmap, const BoxRec *box, unsigned int fl if (priv->cow) { unsigned cow = flags & (MOVE_READ | MOVE_WRITE | __MOVE_FORCE); + assert(cow); + if ((flags & MOVE_READ) == 0) { if (priv->gpu_damage) { r.extents = *box; r.data = NULL; if (!region_subsumes_damage(&r, priv->gpu_damage)) - cow |= MOVE_READ; + cow |= MOVE_READ | __MOVE_FORCE; } } else { if (priv->cpu_damage) { @@ -3303,22 +3416,18 @@ sna_pixmap_move_area_to_gpu(PixmapPtr pixmap, const BoxRec *box, unsigned int fl } } - if (cow) { - if (!sna_pixmap_undo_cow(sna, priv, cow)) - return NULL; + if (!sna_pixmap_undo_cow(sna, priv, cow)) + return NULL; - if (priv->gpu_bo == NULL) - sna_damage_destroy(&priv->gpu_damage); - } + if (priv->gpu_bo == NULL) + sna_damage_destroy(&priv->gpu_damage); } if (sna_damage_is_all(&priv->gpu_damage, pixmap->drawable.width, pixmap->drawable.height)) { - assert(priv->gpu_bo); - assert(priv->gpu_bo->proxy == NULL); - sna_damage_destroy(&priv->cpu_damage); - list_del(&priv->flush_list); + DBG(("%s: already all-damaged\n", __FUNCTION__)); + sna_pixmap_unclean(sna, priv, flags); goto done; } @@ -3360,10 +3469,7 @@ sna_pixmap_move_area_to_gpu(PixmapPtr pixmap, const BoxRec *box, unsigned int fl return priv; } - if (priv->shm) { - assert(!priv->flush); - sna_add_flush_pixmap(sna, priv, priv->cpu_bo); - } + add_shm_flush(sna, priv); assert(priv->cpu_damage); region_set(&r, box); @@ -3527,7 +3633,8 @@ sna_drawable_use_bo(DrawablePtr drawable, unsigned flags, const BoxRec *box, } if (priv->cow) { - unsigned cow = MOVE_WRITE | MOVE_READ; + unsigned cow = MOVE_WRITE | MOVE_READ | __MOVE_FORCE; + assert(cow); if (flags & IGNORE_DAMAGE) { if (priv->gpu_damage) { @@ -3717,8 +3824,11 @@ create_gpu_bo: else move = MOVE_WRITE | MOVE_READ | MOVE_ASYNC_HINT; - if (sna_pixmap_move_to_gpu(pixmap, move)) + if (sna_pixmap_move_to_gpu(pixmap, move)) { + sna_damage_all(&priv->gpu_damage, + pixmap); goto use_gpu_bo; + } } if (DAMAGE_IS_ALL(priv->gpu_damage) || @@ -3934,26 +4044,28 @@ prefer_gpu_bo: goto move_to_gpu; } - if ((priv->cpu_damage == NULL || flags & IGNORE_DAMAGE)) { - if (priv->gpu_bo && priv->gpu_bo->tiling) { - DBG(("%s: prefer to use GPU bo for rendering large pixmaps\n", __FUNCTION__)); - goto prefer_gpu_bo; + if (!priv->shm) { + if ((priv->cpu_damage == NULL || flags & IGNORE_DAMAGE)) { + if (priv->gpu_bo && priv->gpu_bo->tiling) { + DBG(("%s: prefer to use GPU bo for rendering large pixmaps\n", __FUNCTION__)); + goto prefer_gpu_bo; + } + + if (priv->cpu_bo->pitch >= 4096) { + DBG(("%s: prefer to use GPU bo for rendering wide pixmaps\n", __FUNCTION__)); + goto prefer_gpu_bo; + } } - if (priv->cpu_bo->pitch >= 4096) { - DBG(("%s: prefer to use GPU bo for rendering wide pixmaps\n", __FUNCTION__)); + if ((flags & IGNORE_DAMAGE) == 0 && priv->cpu_bo->snoop) { + DBG(("%s: prefer to use GPU bo for reading from snooped target bo\n", __FUNCTION__)); goto prefer_gpu_bo; } - } - - if ((flags & IGNORE_DAMAGE) == 0 && priv->cpu_bo->snoop) { - DBG(("%s: prefer to use GPU bo for reading from snooped target bo\n", __FUNCTION__)); - goto prefer_gpu_bo; - } - if (!sna->kgem.can_blt_cpu) { - DBG(("%s: can't render to CPU bo, try to use GPU bo\n", __FUNCTION__)); - goto prefer_gpu_bo; + if (!sna->kgem.can_blt_cpu) { + DBG(("%s: can't render to CPU bo, try to use GPU bo\n", __FUNCTION__)); + goto prefer_gpu_bo; + } } } @@ -3967,9 +4079,7 @@ prefer_gpu_bo: } if (priv->shm) { - assert(!priv->flush); - sna_add_flush_pixmap(sna, priv, priv->cpu_bo); - + add_shm_flush(sna, priv); /* As we may have flushed and retired,, recheck for busy bo */ if ((flags & FORCE_GPU) == 0 && !kgem_bo_is_busy(priv->cpu_bo)) return NULL; @@ -4019,7 +4129,7 @@ sna_pixmap_create_upload(ScreenPtr screen, assert(width); assert(height); - if (depth == 1) + if (depth < 8) return create_pixmap(sna, screen, width, height, depth, CREATE_PIXMAP_USAGE_SCRATCH); @@ -4121,27 +4231,21 @@ sna_pixmap_move_to_gpu(PixmapPtr pixmap, unsigned flags) if (priv->cow) { unsigned cow = flags & (MOVE_READ | MOVE_WRITE | __MOVE_FORCE); + assert(cow); if (flags & MOVE_READ && priv->cpu_damage) cow |= MOVE_WRITE; - if (cow) { - if (!sna_pixmap_undo_cow(sna, priv, cow)) - return NULL; + if (!sna_pixmap_undo_cow(sna, priv, cow)) + return NULL; - if (priv->gpu_bo == NULL) - sna_damage_destroy(&priv->gpu_damage); - } + if (priv->gpu_bo == NULL) + sna_damage_destroy(&priv->gpu_damage); } if (sna_damage_is_all(&priv->gpu_damage, pixmap->drawable.width, pixmap->drawable.height)) { DBG(("%s: already all-damaged\n", __FUNCTION__)); - assert(DAMAGE_IS_ALL(priv->gpu_damage)); - assert(priv->gpu_bo); - assert(priv->gpu_bo->proxy == NULL); - assert_pixmap_map(pixmap, priv); - sna_damage_destroy(&priv->cpu_damage); - list_del(&priv->flush_list); + sna_pixmap_unclean(sna, priv, flags); goto active; } @@ -4206,7 +4310,7 @@ sna_pixmap_move_to_gpu(PixmapPtr pixmap, unsigned flags) if (flags & MOVE_INPLACE_HINT || (priv->cpu_damage && priv->cpu_bo == NULL)) create = CREATE_GTT_MAP | CREATE_INACTIVE; if (flags & __MOVE_PRIME) - create |= CREATE_GTT_MAP | CREATE_PRIME | CREATE_EXACT; + create |= CREATE_GTT_MAP | CREATE_SCANOUT | CREATE_PRIME | CREATE_EXACT; sna_pixmap_alloc_gpu(sna, pixmap, priv, create); } @@ -4282,10 +4386,7 @@ sna_pixmap_move_to_gpu(PixmapPtr pixmap, unsigned flags) goto done; } - if (priv->shm) { - assert(!priv->flush); - sna_add_flush_pixmap(sna, priv, priv->cpu_bo); - } + add_shm_flush(sna, priv); n = sna_damage_get_boxes(priv->cpu_damage, &box); assert(n); @@ -4534,7 +4635,7 @@ static inline bool box32_trim_and_translate(Box32Rec *box, DrawablePtr d, GCPtr return box32_clip(box, gc); } -static inline void box_add_pt(BoxPtr box, int16_t x, int16_t y) +static inline void box_add_xy(BoxPtr box, int16_t x, int16_t y) { if (box->x1 > x) box->x1 = x; @@ -4547,6 +4648,11 @@ static inline void box_add_pt(BoxPtr box, int16_t x, int16_t y) box->y2 = y; } +static inline void box_add_pt(BoxPtr box, const DDXPointRec *pt) +{ + box_add_xy(box, pt->x, pt->y); +} + static inline bool box32_to_box16(const Box32Rec *b32, BoxRec *b16) { b16->x1 = b32->x1; @@ -4864,6 +4970,7 @@ try_upload__inplace(PixmapPtr pixmap, RegionRec *region, pixmap->devPrivate.ptr = dst; pixmap->devKind = priv->gpu_bo->pitch; priv->mapped = dst == MAP(priv->gpu_bo->map__cpu) ? MAPPED_CPU : MAPPED_GTT; + priv->cpu &= priv->mapped == MAPPED_CPU; assert(has_coherent_ptr(sna, priv, MOVE_WRITE)); box = region_rects(region); @@ -4923,8 +5030,7 @@ done: sna_damage_all(&priv->gpu_damage, pixmap); } - if (priv->shm) - sna_add_flush_pixmap(sna, priv, priv->cpu_bo); + add_shm_flush(sna, priv); } assert(!priv->clear); @@ -5172,6 +5278,16 @@ static inline uint8_t blt_depth(int depth) } } +inline static void blt_done(struct sna *sna) +{ + sna->blt_state.fill_bo = 0; + if (sna->kgem.nbatch && __kgem_ring_empty(&sna->kgem)) { + DBG(("%s: flushing BLT operation on empty ring\n", + __FUNCTION__)); + _kgem_submit(&sna->kgem); + } +} + static bool sna_put_xybitmap_blt(DrawablePtr drawable, GCPtr gc, RegionPtr region, int x, int y, int w, int h, char *bits) @@ -5217,6 +5333,7 @@ sna_put_xybitmap_blt(DrawablePtr drawable, GCPtr gc, RegionPtr region, kgem_set_mode(&sna->kgem, KGEM_BLT, bo); assert(kgem_bo_can_blt(&sna->kgem, bo)); + kgem_bcs_set_tiling(&sna->kgem, NULL, bo); /* Region is pre-clipped and translated into pixmap space */ box = region_rects(region); @@ -5238,6 +5355,7 @@ sna_put_xybitmap_blt(DrawablePtr drawable, GCPtr gc, RegionPtr region, return false; _kgem_set_mode(&sna->kgem, KGEM_BLT); } + kgem_bcs_set_tiling(&sna->kgem, NULL, bo); upload = kgem_create_buffer(&sna->kgem, bstride*bh, @@ -5331,7 +5449,7 @@ sna_put_xybitmap_blt(DrawablePtr drawable, GCPtr gc, RegionPtr region, box++; } while (--n); - sna->blt_state.fill_bo = 0; + blt_done(sna); return true; } @@ -5381,6 +5499,7 @@ sna_put_xypixmap_blt(DrawablePtr drawable, GCPtr gc, RegionPtr region, kgem_set_mode(&sna->kgem, KGEM_BLT, bo); assert(kgem_bo_can_blt(&sna->kgem, bo)); + kgem_bcs_set_tiling(&sna->kgem, NULL, bo); skip = h * BitmapBytePad(w + left); for (i = 1 << (gc->depth-1); i; i >>= 1, bits += skip) { @@ -5408,6 +5527,7 @@ sna_put_xypixmap_blt(DrawablePtr drawable, GCPtr gc, RegionPtr region, return false; _kgem_set_mode(&sna->kgem, KGEM_BLT); } + kgem_bcs_set_tiling(&sna->kgem, NULL, bo); upload = kgem_create_buffer(&sna->kgem, bstride*bh, @@ -5509,7 +5629,7 @@ sna_put_xypixmap_blt(DrawablePtr drawable, GCPtr gc, RegionPtr region, } while (--n); } - sna->blt_state.fill_bo = 0; + blt_done(sna); return true; } @@ -5837,7 +5957,7 @@ sna_self_copy_boxes(DrawablePtr src, DrawablePtr dst, GCPtr gc, if (!sna->render.copy_boxes(sna, alu, &pixmap->drawable, priv->gpu_bo, sx, sy, &pixmap->drawable, priv->gpu_bo, tx, ty, - box, n, 0)) { + box, n, small_copy(region))) { DBG(("%s: fallback - accelerated copy boxes failed\n", __FUNCTION__)); goto fallback; @@ -6098,6 +6218,9 @@ sna_copy_boxes__inplace(struct sna *sna, RegionPtr region, int alu, kgem_bo_sync__cpu_full(&sna->kgem, src_priv->gpu_bo, FORCE_FULL_SYNC); + if (sigtrap_get()) + return false; + box = region_rects(region); n = region_num_rects(region); if (src_priv->gpu_bo->tiling) { @@ -6137,6 +6260,8 @@ sna_copy_boxes__inplace(struct sna *sna, RegionPtr region, int alu, } } + sigtrap_put(); + return true; upload_inplace: @@ -6234,6 +6359,9 @@ upload_inplace: assert(has_coherent_ptr(sna, src_priv, MOVE_READ)); + if (sigtrap_get()) + return false; + box = region_rects(region); n = region_num_rects(region); if (dst_priv->gpu_bo->tiling) { @@ -6265,15 +6393,19 @@ upload_inplace: } while (--n); if (!dst_priv->shm) { - assert(ptr == MAP(dst_priv->gpu_bo->map__cpu)); dst_pixmap->devPrivate.ptr = ptr; dst_pixmap->devKind = dst_priv->gpu_bo->pitch; - dst_priv->mapped = MAPPED_CPU; + if (ptr == MAP(dst_priv->gpu_bo->map__cpu)) { + dst_priv->mapped = MAPPED_CPU; + dst_priv->cpu = true; + } else + dst_priv->mapped = MAPPED_GTT; assert_pixmap_map(dst_pixmap, dst_priv); - dst_priv->cpu = true; } } + sigtrap_put(); + return true; } @@ -6326,6 +6458,16 @@ sna_copy_boxes(DrawablePtr src, DrawablePtr dst, GCPtr gc, assert(region_num_rects(region)); + if (src_priv && + src_priv->gpu_bo == NULL && + src_priv->cpu_bo == NULL && + src_priv->ptr == NULL) { + /* Rare but still happens, nothing to copy */ + DBG(("%s: src pixmap=%ld is empty\n", + __FUNCTION__, src_pixmap->drawable.serialNumber)); + return; + } + if (src_pixmap == dst_pixmap) return sna_self_copy_boxes(src, dst, gc, region, dx, dy, @@ -6491,15 +6633,14 @@ discard_cow: sna_damage_all(&dst_priv->gpu_damage, dst_pixmap); sna_damage_destroy(&dst_priv->cpu_damage); list_del(&dst_priv->flush_list); - if (dst_priv->shm) - sna_add_flush_pixmap(sna, dst_priv, dst_priv->cpu_bo); + add_shm_flush(sna, dst_priv); return; } } if (!sna->render.copy_boxes(sna, alu, &src_pixmap->drawable, src_priv->gpu_bo, src_dx, src_dy, &dst_pixmap->drawable, bo, 0, 0, - box, n, 0)) { + box, n, small_copy(region))) { DBG(("%s: fallback - accelerated copy boxes failed\n", __FUNCTION__)); goto fallback; @@ -6536,7 +6677,7 @@ discard_cow: if (!sna->render.copy_boxes(sna, alu, &src_pixmap->drawable, src_priv->gpu_bo, src_dx, src_dy, &dst_pixmap->drawable, bo, 0, 0, - box, n, 0)) { + box, n, small_copy(region))) { DBG(("%s: fallback - accelerated copy boxes failed\n", __FUNCTION__)); goto fallback; @@ -6571,15 +6712,12 @@ discard_cow: if (replaces && UNDO) kgem_bo_pair_undo(&sna->kgem, dst_priv->gpu_bo, dst_priv->cpu_bo); - if (src_priv->shm) { - assert(!src_priv->flush); - sna_add_flush_pixmap(sna, src_priv, src_priv->cpu_bo); - } + add_shm_flush(sna, src_priv); if (!sna->render.copy_boxes(sna, alu, &src_pixmap->drawable, src_priv->cpu_bo, src_dx, src_dy, &dst_pixmap->drawable, bo, 0, 0, - box, n, src_priv->shm ? COPY_LAST : 0)) { + box, n, small_copy(region) | (src_priv->shm ? COPY_LAST : 0))) { DBG(("%s: fallback - accelerated copy boxes failed\n", __FUNCTION__)); goto fallback; @@ -6631,8 +6769,7 @@ discard_cow: ok = sna->render.copy_boxes(sna, alu, &src_pixmap->drawable, src_bo, src_dx, src_dy, &dst_pixmap->drawable, bo, 0, 0, - box, n, COPY_LAST); - + box, n, small_copy(region) | COPY_LAST); kgem_bo_sync__cpu(&sna->kgem, src_bo); assert(src_bo->rq == NULL); kgem_bo_destroy(&sna->kgem, src_bo); @@ -6780,18 +6917,22 @@ fallback: return; } - assert(dst_pixmap->devPrivate.ptr); - assert(dst_pixmap->devKind); - do { - pixman_fill(dst_pixmap->devPrivate.ptr, - dst_pixmap->devKind/sizeof(uint32_t), - dst_pixmap->drawable.bitsPerPixel, - box->x1, box->y1, - box->x2 - box->x1, - box->y2 - box->y1, - src_priv->clear_color); - box++; - } while (--n); + if (sigtrap_get() == 0) { + assert(dst_pixmap->devPrivate.ptr); + assert(dst_pixmap->devKind); + sigtrap_assert_active(); + do { + pixman_fill(dst_pixmap->devPrivate.ptr, + dst_pixmap->devKind/sizeof(uint32_t), + dst_pixmap->drawable.bitsPerPixel, + box->x1, box->y1, + box->x2 - box->x1, + box->y2 - box->y1, + src_priv->clear_color); + box++; + } while (--n); + sigtrap_put(); + } } else if (!sna_copy_boxes__inplace(sna, region, alu, src_pixmap, src_priv, src_dx, src_dy, @@ -6848,36 +6989,39 @@ fallback: ((char *)src_pixmap->devPrivate.ptr + src_dy * src_stride + src_dx * bpp / 8); - do { - DBG(("%s: memcpy_blt(box=(%d, %d), (%d, %d), src=(%d, %d), pitches=(%d, %d))\n", - __FUNCTION__, - box->x1, box->y1, - box->x2 - box->x1, - box->y2 - box->y1, - src_dx, src_dy, - src_stride, dst_stride)); - - assert(box->x1 >= 0); - assert(box->y1 >= 0); - assert(box->x2 <= dst_pixmap->drawable.width); - assert(box->y2 <= dst_pixmap->drawable.height); - - assert(box->x1 + src_dx >= 0); - assert(box->y1 + src_dy >= 0); - assert(box->x2 + src_dx <= src_pixmap->drawable.width); - assert(box->y2 + src_dy <= src_pixmap->drawable.height); - assert(has_coherent_ptr(sna, src_priv, MOVE_READ)); - assert(has_coherent_ptr(sna, dst_priv, MOVE_WRITE)); - assert(src_stride); - assert(dst_stride); - memcpy_blt(src_bits, dst_bits, bpp, - src_stride, dst_stride, - box->x1, box->y1, - box->x1, box->y1, - box->x2 - box->x1, - box->y2 - box->y1); - box++; - } while (--n); + if (sigtrap_get() == 0) { + do { + DBG(("%s: memcpy_blt(box=(%d, %d), (%d, %d), src=(%d, %d), pitches=(%d, %d))\n", + __FUNCTION__, + box->x1, box->y1, + box->x2 - box->x1, + box->y2 - box->y1, + src_dx, src_dy, + src_stride, dst_stride)); + + assert(box->x1 >= 0); + assert(box->y1 >= 0); + assert(box->x2 <= dst_pixmap->drawable.width); + assert(box->y2 <= dst_pixmap->drawable.height); + + assert(box->x1 + src_dx >= 0); + assert(box->y1 + src_dy >= 0); + assert(box->x2 + src_dx <= src_pixmap->drawable.width); + assert(box->y2 + src_dy <= src_pixmap->drawable.height); + assert(has_coherent_ptr(sna, src_priv, MOVE_READ)); + assert(has_coherent_ptr(sna, dst_priv, MOVE_WRITE)); + assert(src_stride); + assert(dst_stride); + memcpy_blt(src_bits, dst_bits, bpp, + src_stride, dst_stride, + box->x1, box->y1, + box->x1, box->y1, + box->x2 - box->x1, + box->y2 - box->y1); + box++; + } while (--n); + sigtrap_put(); + } } else { DBG(("%s: fallback -- miCopyRegion\n", __FUNCTION__)); @@ -6931,7 +7075,8 @@ sna_do_copy(DrawablePtr src, DrawablePtr dst, GCPtr gc, /* Short cut for unmapped windows */ if (dst->type == DRAWABLE_WINDOW && !((WindowPtr)dst)->realized) { - DBG(("%s: unmapped\n", __FUNCTION__)); + DBG(("%s: unmapped/unrealized dst (pixmap=%ld)\n", + __FUNCTION__, get_window_pixmap((WindowPtr)dst))); return NULL; } @@ -7115,19 +7260,28 @@ sna_copy_area(DrawablePtr src, DrawablePtr dst, GCPtr gc, if (gc->planemask == 0) return NULL; - DBG(("%s: src=(%d, %d)x(%d, %d)+(%d, %d) -> dst=(%d, %d)+(%d, %d); alu=%d, pm=%lx, depth=%d\n", + if (sna->ignore_copy_area) + return NULL; + + DBG(("%s: src=pixmap=%ld:(%d, %d)x(%d, %d)+(%d, %d) -> dst=pixmap=%ld:(%d, %d)+(%d, %d); alu=%d, pm=%lx, depth=%d\n", __FUNCTION__, + get_drawable_pixmap(src)->drawable.serialNumber, src_x, src_y, width, height, src->x, src->y, + get_drawable_pixmap(dst)->drawable.serialNumber, dst_x, dst_y, dst->x, dst->y, gc->alu, gc->planemask, gc->depth)); if (FORCE_FALLBACK || !ACCEL_COPY_AREA || wedged(sna) || - !PM_IS_SOLID(dst, gc->planemask) || gc->depth < 8) + !PM_IS_SOLID(dst, gc->planemask) || gc->depth < 8) { + DBG(("%s: fallback copy\n", __FUNCTION__)); copy = sna_fallback_copy_boxes; - else if (src == dst) + } else if (src == dst) { + DBG(("%s: self copy\n", __FUNCTION__)); copy = sna_self_copy_boxes; - else + } else { + DBG(("%s: normal copy\n", __FUNCTION__)); copy = sna_copy_boxes; + } return sna_do_copy(src, dst, gc, src_x, src_y, @@ -7136,30 +7290,21 @@ sna_copy_area(DrawablePtr src, DrawablePtr dst, GCPtr gc, copy, 0, NULL); } -static const BoxRec * -find_clip_box_for_y(const BoxRec *begin, const BoxRec *end, int16_t y) +const BoxRec * +__find_clip_box_for_y(const BoxRec *begin, const BoxRec *end, int16_t y) { - const BoxRec *mid; - - if (end == begin) - return end; - - if (end - begin == 1) { + assert(end - begin > 1); + do { + const BoxRec *mid = begin + (end - begin) / 2; + if (mid->y2 > y) + end = mid; + else + begin = mid; + } while (end > begin + 1); if (begin->y2 > y) - return begin; + return begin; else - return end; - } - - mid = begin + (end - begin) / 2; - if (mid->y2 > y) - /* If no box is found in [begin, mid], the function - * will return @mid, which is then known to be the - * correct answer. - */ - return find_clip_box_for_y(begin, mid, y); - else - return find_clip_box_for_y(mid, end, y); + return end; } struct sna_fill_spans { @@ -8223,6 +8368,8 @@ sna_copy_bitmap_blt(DrawablePtr _bitmap, DrawablePtr drawable, GCPtr gc, } br13 |= blt_depth(drawable->depth) << 24; br13 |= copy_ROP[gc->alu] << 16; + DBG(("%s: target-depth=%d, alu=%d, bg=%08x, fg=%08x\n", + __FUNCTION__, drawable->depth, gc->alu, gc->bgPixel, gc->fgPixel)); kgem_set_mode(&sna->kgem, KGEM_BLT, arg->bo); assert(kgem_bo_can_blt(&sna->kgem, arg->bo)); @@ -8255,6 +8402,7 @@ sna_copy_bitmap_blt(DrawablePtr _bitmap, DrawablePtr drawable, GCPtr gc, return; /* XXX fallback? */ _kgem_set_mode(&sna->kgem, KGEM_BLT); } + kgem_bcs_set_tiling(&sna->kgem, NULL, arg->bo); assert(sna->kgem.mode == KGEM_BLT); if (sna->kgem.gen >= 0100) { @@ -8270,8 +8418,8 @@ sna_copy_bitmap_blt(DrawablePtr _bitmap, DrawablePtr drawable, GCPtr gc, I915_GEM_DOMAIN_RENDER | KGEM_RELOC_FENCED, 0); - b[5] = gc->bgPixel; - b[6] = gc->fgPixel; + b[6] = gc->bgPixel; + b[7] = gc->fgPixel; dst = (uint8_t *)&b[8]; sna->kgem.nbatch += 8 + src_stride; @@ -8322,6 +8470,7 @@ sna_copy_bitmap_blt(DrawablePtr _bitmap, DrawablePtr drawable, GCPtr gc, return; /* XXX fallback? */ _kgem_set_mode(&sna->kgem, KGEM_BLT); } + kgem_bcs_set_tiling(&sna->kgem, NULL, arg->bo); upload = kgem_create_buffer(&sna->kgem, bstride*bh, @@ -8408,7 +8557,7 @@ sna_copy_bitmap_blt(DrawablePtr _bitmap, DrawablePtr drawable, GCPtr gc, sna_damage_add_to_pixmap(arg->damage, region, pixmap); } assert_pixmap_damage(pixmap); - sna->blt_state.fill_bo = 0; + blt_done(sna); } static void @@ -8472,6 +8621,7 @@ sna_copy_plane_blt(DrawablePtr source, DrawablePtr drawable, GCPtr gc, return; /* XXX fallback? */ _kgem_set_mode(&sna->kgem, KGEM_BLT); } + kgem_bcs_set_tiling(&sna->kgem, NULL, arg->bo); upload = kgem_create_buffer(&sna->kgem, bstride*bh, @@ -8588,6 +8738,8 @@ sna_copy_plane_blt(DrawablePtr source, DrawablePtr drawable, GCPtr gc, } } + kgem_bcs_set_tiling(&sna->kgem, upload, arg->bo); + assert(sna->kgem.mode == KGEM_BLT); b = sna->kgem.batch + sna->kgem.nbatch; if (sna->kgem.gen >= 0100) { @@ -8641,7 +8793,7 @@ sna_copy_plane_blt(DrawablePtr source, DrawablePtr drawable, GCPtr gc, sna_damage_add_to_pixmap(arg->damage, region, dst_pixmap); } assert_pixmap_damage(dst_pixmap); - sna->blt_state.fill_bo = 0; + blt_done(sna); } static RegionPtr @@ -8895,36 +9047,11 @@ sna_poly_point_extents(DrawablePtr drawable, GCPtr gc, last.x += pt->x; last.y += pt->y; pt++; - box_add_pt(&box, last.x, last.y); + box_add_xy(&box, last.x, last.y); } } else { - --n; ++pt; - while (n >= 8) { - box_add_pt(&box, pt[0].x, pt[0].y); - box_add_pt(&box, pt[1].x, pt[1].y); - box_add_pt(&box, pt[2].x, pt[2].y); - box_add_pt(&box, pt[3].x, pt[3].y); - box_add_pt(&box, pt[4].x, pt[4].y); - box_add_pt(&box, pt[5].x, pt[5].y); - box_add_pt(&box, pt[6].x, pt[6].y); - box_add_pt(&box, pt[7].x, pt[7].y); - pt += 8; - n -= 8; - } - if (n & 4) { - box_add_pt(&box, pt[0].x, pt[0].y); - box_add_pt(&box, pt[1].x, pt[1].y); - box_add_pt(&box, pt[2].x, pt[2].y); - box_add_pt(&box, pt[3].x, pt[3].y); - pt += 4; - } - if (n & 2) { - box_add_pt(&box, pt[0].x, pt[0].y); - box_add_pt(&box, pt[1].x, pt[1].y); - pt += 2; - } - if (n & 1) - box_add_pt(&box, pt[0].x, pt[0].y); + while (--n) + box_add_pt(&box, ++pt); } box.x2++; box.y2++; @@ -9636,7 +9763,7 @@ sna_poly_line_extents(DrawablePtr drawable, GCPtr gc, y += pt->y; if (blt) blt &= pt->x == 0 || pt->y == 0; - box_add_pt(&box, x, y); + box_add_xy(&box, x, y); } } else { int x = box.x1; @@ -9648,7 +9775,7 @@ sna_poly_line_extents(DrawablePtr drawable, GCPtr gc, x = pt->x; y = pt->y; } - box_add_pt(&box, pt->x, pt->y); + box_add_pt(&box, pt); } } box.x2++; @@ -10037,7 +10164,7 @@ out: RegionUninit(&data.region); } -static inline void box_from_seg(BoxPtr b, const xSegment *seg, GCPtr gc) +static inline bool box_from_seg(BoxPtr b, const xSegment *seg, GCPtr gc) { if (seg->x1 == seg->x2) { if (seg->y1 > seg->y2) { @@ -10051,6 +10178,9 @@ static inline void box_from_seg(BoxPtr b, const xSegment *seg, GCPtr gc) if (gc->capStyle != CapNotLast) b->y2++; } + if (b->y1 >= b->y2) + return false; + b->x1 = seg->x1; b->x2 = seg->x1 + 1; } else { @@ -10065,6 +10195,9 @@ static inline void box_from_seg(BoxPtr b, const xSegment *seg, GCPtr gc) if (gc->capStyle != CapNotLast) b->x2++; } + if (b->x1 >= b->x2) + return false; + b->y1 = seg->y1; b->y2 = seg->y1 + 1; } @@ -10073,6 +10206,7 @@ static inline void box_from_seg(BoxPtr b, const xSegment *seg, GCPtr gc) __FUNCTION__, seg->x1, seg->y1, seg->x2, seg->y2, b->x1, b->y1, b->x2, b->y2)); + return true; } static bool @@ -10107,12 +10241,13 @@ sna_poly_segment_blt(DrawablePtr drawable, nbox = ARRAY_SIZE(boxes); n -= nbox; do { - box_from_seg(b, seg++, gc); - if (b->y2 > b->y1 && b->x2 > b->x1) { + if (box_from_seg(b, seg++, gc)) { + assert(!box_empty(b)); b->x1 += dx; b->x2 += dx; b->y1 += dy; b->y2 += dy; + assert(!box_empty(b)); b++; } } while (--nbox); @@ -10131,7 +10266,10 @@ sna_poly_segment_blt(DrawablePtr drawable, nbox = ARRAY_SIZE(boxes); n -= nbox; do { - box_from_seg(b++, seg++, gc); + if (box_from_seg(b, seg++, gc)) { + assert(!box_empty(b)); + b++; + } } while (--nbox); if (b != boxes) { @@ -10156,7 +10294,10 @@ sna_poly_segment_blt(DrawablePtr drawable, do { BoxRec box; - box_from_seg(&box, seg++, gc); + if (!box_from_seg(&box, seg++, gc)) + continue; + + assert(!box_empty(&box)); box.x1 += drawable->x; box.x2 += drawable->x; box.y1 += drawable->y; @@ -10174,6 +10315,7 @@ sna_poly_segment_blt(DrawablePtr drawable, b->x2 += dx; b->y1 += dy; b->y2 += dy; + assert(!box_empty(b)); if (++b == last_box) { fill.boxes(sna, &fill, boxes, last_box-boxes); if (damage) @@ -10185,7 +10327,10 @@ sna_poly_segment_blt(DrawablePtr drawable, } while (--n); } else { do { - box_from_seg(b, seg++, gc); + if (!box_from_seg(b, seg++, gc)) + continue; + + assert(!box_empty(b)); b->x1 += drawable->x; b->x2 += drawable->x; b->y1 += drawable->y; @@ -10195,6 +10340,7 @@ sna_poly_segment_blt(DrawablePtr drawable, b->x2 += dx; b->y1 += dy; b->y2 += dy; + assert(!box_empty(b)); if (++b == last_box) { fill.boxes(sna, &fill, boxes, last_box-boxes); if (damage) @@ -10319,8 +10465,11 @@ sna_poly_zero_segment_blt(DrawablePtr drawable, } b->x2++; b->y2++; - if (oc1 | oc2) - box_intersect(b, extents); + + if ((oc1 | oc2) && !box_intersect(b, extents)) + continue; + + assert(!box_empty(b)); if (++b == last_box) { ret = &&rectangle_continue; goto *jump; @@ -10383,6 +10532,7 @@ rectangle_continue: __FUNCTION__, x1, y1, b->x1, b->y1, b->x2, b->y2)); + assert(!box_empty(b)); if (++b == last_box) { ret = &&X_continue; goto *jump; @@ -10407,6 +10557,7 @@ X_continue: b->x2 = x1 + 1; b->y2 = b->y1 + 1; + assert(!box_empty(b)); if (++b == last_box) { ret = &&X2_continue; goto *jump; @@ -10468,6 +10619,7 @@ X2_continue: b->y2 = y1 + 1; b->x2 = x1 + 1; + assert(!box_empty(b)); if (++b == last_box) { ret = &&Y_continue; goto *jump; @@ -10491,6 +10643,7 @@ Y_continue: b->y2 = y1 + 1; b->x2 = x1 + 1; + assert(!box_empty(b)); if (++b == last_box) { ret = &&Y2_continue; goto *jump; @@ -11785,14 +11938,29 @@ sna_poly_fill_rect_blt(DrawablePtr drawable, if (nbox > ARRAY_SIZE(boxes)) nbox = ARRAY_SIZE(boxes); n -= nbox; - do { + while (nbox >= 2) { + b[0].x1 = rect[0].x + dx; + b[0].y1 = rect[0].y + dy; + b[0].x2 = b[0].x1 + rect[0].width; + b[0].y2 = b[0].y1 + rect[0].height; + + b[1].x1 = rect[1].x + dx; + b[1].y1 = rect[1].y + dy; + b[1].x2 = b[1].x1 + rect[1].width; + b[1].y2 = b[1].y1 + rect[1].height; + + b += 2; + rect += 2; + nbox -= 2; + } + if (nbox) { b->x1 = rect->x + dx; b->y1 = rect->y + dy; b->x2 = b->x1 + rect->width; b->y2 = b->y1 + rect->height; b++; rect++; - } while (--nbox); + } fill.boxes(sna, &fill, boxes, b-boxes); b = boxes; } while (n); @@ -11802,14 +11970,29 @@ sna_poly_fill_rect_blt(DrawablePtr drawable, if (nbox > ARRAY_SIZE(boxes)) nbox = ARRAY_SIZE(boxes); n -= nbox; - do { + while (nbox >= 2) { + b[0].x1 = rect[0].x; + b[0].y1 = rect[0].y; + b[0].x2 = b[0].x1 + rect[0].width; + b[0].y2 = b[0].y1 + rect[0].height; + + b[1].x1 = rect[1].x; + b[1].y1 = rect[1].y; + b[1].x2 = b[1].x1 + rect[1].width; + b[1].y2 = b[1].y1 + rect[1].height; + + b += 2; + rect += 2; + nbox -= 2; + } + if (nbox) { b->x1 = rect->x; b->y1 = rect->y; b->x2 = b->x1 + rect->width; b->y2 = b->y1 + rect->height; b++; rect++; - } while (--nbox); + } fill.boxes(sna, &fill, boxes, b-boxes); b = boxes; } while (n); @@ -12192,6 +12375,7 @@ sna_poly_fill_rect_tiled_8x8_blt(DrawablePtr drawable, return false; _kgem_set_mode(&sna->kgem, KGEM_BLT); } + kgem_bcs_set_tiling(&sna->kgem, tile_bo, bo); get_drawable_deltas(drawable, pixmap, &dx, &dy); assert(extents->x1 + dx >= 0); @@ -12335,6 +12519,7 @@ sna_poly_fill_rect_tiled_8x8_blt(DrawablePtr drawable, _kgem_submit(&sna->kgem); _kgem_set_mode(&sna->kgem, KGEM_BLT); + kgem_bcs_set_tiling(&sna->kgem, tile_bo, bo); } while (1); } else { RegionRec clip; @@ -12403,6 +12588,7 @@ sna_poly_fill_rect_tiled_8x8_blt(DrawablePtr drawable, if (!kgem_check_batch(&sna->kgem, 3)) { _kgem_submit(&sna->kgem); _kgem_set_mode(&sna->kgem, KGEM_BLT); + kgem_bcs_set_tiling(&sna->kgem, tile_bo, bo); unwind_batch = sna->kgem.nbatch; unwind_reloc = sna->kgem.nreloc; @@ -12499,6 +12685,7 @@ sna_poly_fill_rect_tiled_8x8_blt(DrawablePtr drawable, DBG(("%s: emitting split batch\n", __FUNCTION__)); _kgem_submit(&sna->kgem); _kgem_set_mode(&sna->kgem, KGEM_BLT); + kgem_bcs_set_tiling(&sna->kgem, tile_bo, bo); unwind_batch = sna->kgem.nbatch; unwind_reloc = sna->kgem.nreloc; @@ -12572,7 +12759,7 @@ sna_poly_fill_rect_tiled_8x8_blt(DrawablePtr drawable, } done: assert_pixmap_damage(pixmap); - sna->blt_state.fill_bo = 0; + blt_done(sna); return true; } @@ -13128,6 +13315,7 @@ sna_poly_fill_rect_stippled_8x8_blt(DrawablePtr drawable, return false; _kgem_set_mode(&sna->kgem, KGEM_BLT); } + kgem_bcs_set_tiling(&sna->kgem, NULL, bo); if (!clipped) { dx += drawable->x; @@ -13240,6 +13428,7 @@ sna_poly_fill_rect_stippled_8x8_blt(DrawablePtr drawable, _kgem_submit(&sna->kgem); _kgem_set_mode(&sna->kgem, KGEM_BLT); + kgem_bcs_set_tiling(&sna->kgem, NULL, bo); } while (1); } else { RegionRec clip; @@ -13297,6 +13486,7 @@ sna_poly_fill_rect_stippled_8x8_blt(DrawablePtr drawable, if (!kgem_check_batch(&sna->kgem, 3)) { _kgem_submit(&sna->kgem); _kgem_set_mode(&sna->kgem, KGEM_BLT); + kgem_bcs_set_tiling(&sna->kgem, NULL, bo); assert(sna->kgem.mode == KGEM_BLT); b = sna->kgem.batch + sna->kgem.nbatch; @@ -13369,6 +13559,7 @@ sna_poly_fill_rect_stippled_8x8_blt(DrawablePtr drawable, if (!kgem_check_batch(&sna->kgem, 3)) { _kgem_submit(&sna->kgem); _kgem_set_mode(&sna->kgem, KGEM_BLT); + kgem_bcs_set_tiling(&sna->kgem, NULL, bo); assert(sna->kgem.mode == KGEM_BLT); b = sna->kgem.batch + sna->kgem.nbatch; @@ -13419,7 +13610,7 @@ sna_poly_fill_rect_stippled_8x8_blt(DrawablePtr drawable, } assert_pixmap_damage(pixmap); - sna->blt_state.fill_bo = 0; + blt_done(sna); return true; } @@ -13499,6 +13690,7 @@ sna_poly_fill_rect_stippled_1_blt(DrawablePtr drawable, get_drawable_deltas(drawable, pixmap, &dx, &dy); kgem_set_mode(&sna->kgem, KGEM_BLT, bo); assert(kgem_bo_can_blt(&sna->kgem, bo)); + kgem_bcs_set_tiling(&sna->kgem, NULL, bo); br00 = 3 << 20; br13 = bo->pitch; @@ -13543,6 +13735,7 @@ sna_poly_fill_rect_stippled_1_blt(DrawablePtr drawable, return false; _kgem_set_mode(&sna->kgem, KGEM_BLT); } + kgem_bcs_set_tiling(&sna->kgem, NULL, bo); assert(sna->kgem.mode == KGEM_BLT); b = sna->kgem.batch + sna->kgem.nbatch; @@ -13606,6 +13799,7 @@ sna_poly_fill_rect_stippled_1_blt(DrawablePtr drawable, return false; _kgem_set_mode(&sna->kgem, KGEM_BLT); } + kgem_bcs_set_tiling(&sna->kgem, NULL, bo); upload = kgem_create_buffer(&sna->kgem, bstride*bh, @@ -13736,6 +13930,7 @@ sna_poly_fill_rect_stippled_1_blt(DrawablePtr drawable, return false; _kgem_set_mode(&sna->kgem, KGEM_BLT); } + kgem_bcs_set_tiling(&sna->kgem, NULL, bo); assert(sna->kgem.mode == KGEM_BLT); b = sna->kgem.batch + sna->kgem.nbatch; @@ -13797,6 +13992,7 @@ sna_poly_fill_rect_stippled_1_blt(DrawablePtr drawable, return false; _kgem_set_mode(&sna->kgem, KGEM_BLT); } + kgem_bcs_set_tiling(&sna->kgem, NULL, bo); upload = kgem_create_buffer(&sna->kgem, bstride*bh, @@ -13927,6 +14123,7 @@ sna_poly_fill_rect_stippled_1_blt(DrawablePtr drawable, return false; _kgem_set_mode(&sna->kgem, KGEM_BLT); } + kgem_bcs_set_tiling(&sna->kgem, NULL, bo); assert(sna->kgem.mode == KGEM_BLT); b = sna->kgem.batch + sna->kgem.nbatch; @@ -13987,6 +14184,7 @@ sna_poly_fill_rect_stippled_1_blt(DrawablePtr drawable, return false; _kgem_set_mode(&sna->kgem, KGEM_BLT); } + kgem_bcs_set_tiling(&sna->kgem, NULL, bo); upload = kgem_create_buffer(&sna->kgem, bstride*bh, @@ -14064,7 +14262,7 @@ sna_poly_fill_rect_stippled_1_blt(DrawablePtr drawable, } } - sna->blt_state.fill_bo = 0; + blt_done(sna); return true; } @@ -14126,6 +14324,7 @@ sna_poly_fill_rect_stippled_n_box__imm(struct sna *sna, return; /* XXX fallback? */ _kgem_set_mode(&sna->kgem, KGEM_BLT); } + kgem_bcs_set_tiling(&sna->kgem, NULL, bo); assert(sna->kgem.mode == KGEM_BLT); b = sna->kgem.batch + sna->kgem.nbatch; @@ -14251,6 +14450,7 @@ sna_poly_fill_rect_stippled_n_box(struct sna *sna, return; /* XXX fallback? */ _kgem_set_mode(&sna->kgem, KGEM_BLT); } + kgem_bcs_set_tiling(&sna->kgem, NULL, bo); assert(sna->kgem.mode == KGEM_BLT); b = sna->kgem.batch + sna->kgem.nbatch; @@ -14414,6 +14614,7 @@ sna_poly_fill_rect_stippled_n_blt__imm(DrawablePtr drawable, get_drawable_deltas(drawable, pixmap, &dx, &dy); kgem_set_mode(&sna->kgem, KGEM_BLT, bo); assert(kgem_bo_can_blt(&sna->kgem, bo)); + kgem_bcs_set_tiling(&sna->kgem, NULL, bo); br00 = XY_MONO_SRC_COPY_IMM | 3 << 20; br13 = bo->pitch; @@ -14526,7 +14727,7 @@ sna_poly_fill_rect_stippled_n_blt__imm(DrawablePtr drawable, } assert_pixmap_damage(pixmap); - sna->blt_state.fill_bo = 0; + blt_done(sna); return true; } @@ -14559,6 +14760,7 @@ sna_poly_fill_rect_stippled_n_blt(DrawablePtr drawable, get_drawable_deltas(drawable, pixmap, &dx, &dy); kgem_set_mode(&sna->kgem, KGEM_BLT, bo); assert(kgem_bo_can_blt(&sna->kgem, bo)); + kgem_bcs_set_tiling(&sna->kgem, NULL, bo); br00 = XY_MONO_SRC_COPY | 3 << 20; br13 = bo->pitch; @@ -14673,7 +14875,7 @@ sna_poly_fill_rect_stippled_n_blt(DrawablePtr drawable, assert_pixmap_damage(pixmap); if (tile) kgem_bo_destroy(&sna->kgem, tile); - sna->blt_state.fill_bo = 0; + blt_done(sna); return true; } @@ -15281,6 +15483,7 @@ sna_glyph_blt(DrawablePtr drawable, GCPtr gc, } _kgem_set_mode(&sna->kgem, KGEM_BLT); } + kgem_bcs_set_tiling(&sna->kgem, NULL, bo); DBG(("%s: glyph clip box (%d, %d), (%d, %d)\n", __FUNCTION__, @@ -15368,6 +15571,7 @@ sna_glyph_blt(DrawablePtr drawable, GCPtr gc, if (!kgem_check_batch(&sna->kgem, 3+len)) { _kgem_submit(&sna->kgem); _kgem_set_mode(&sna->kgem, KGEM_BLT); + kgem_bcs_set_tiling(&sna->kgem, NULL, bo); DBG(("%s: new batch, glyph clip box (%d, %d), (%d, %d)\n", __FUNCTION__, @@ -15479,7 +15683,7 @@ skip: } assert_pixmap_damage(pixmap); - sna->blt_state.fill_bo = 0; + blt_done(sna); return true; } @@ -16002,6 +16206,7 @@ sna_reversed_glyph_blt(DrawablePtr drawable, GCPtr gc, } _kgem_set_mode(&sna->kgem, KGEM_BLT); } + kgem_bcs_set_tiling(&sna->kgem, NULL, bo); unwind_batch = sna->kgem.nbatch; unwind_reloc = sna->kgem.nreloc; @@ -16111,6 +16316,7 @@ sna_reversed_glyph_blt(DrawablePtr drawable, GCPtr gc, if (!kgem_check_batch(&sna->kgem, 3+len)) { _kgem_submit(&sna->kgem); _kgem_set_mode(&sna->kgem, KGEM_BLT); + kgem_bcs_set_tiling(&sna->kgem, NULL, bo); unwind_batch = sna->kgem.nbatch; unwind_reloc = sna->kgem.nreloc; @@ -16229,7 +16435,7 @@ skip: } assert_pixmap_damage(pixmap); - sna->blt_state.fill_bo = 0; + blt_done(sna); return true; } @@ -16450,6 +16656,7 @@ sna_push_pixels_solid_blt(GCPtr gc, kgem_set_mode(&sna->kgem, KGEM_BLT, bo); assert(kgem_bo_can_blt(&sna->kgem, bo)); + kgem_bcs_set_tiling(&sna->kgem, NULL, bo); /* Region is pre-clipped and translated into pixmap space */ box = region_rects(region); @@ -16471,6 +16678,7 @@ sna_push_pixels_solid_blt(GCPtr gc, return false; _kgem_set_mode(&sna->kgem, KGEM_BLT); } + kgem_bcs_set_tiling(&sna->kgem, NULL, bo); upload = kgem_create_buffer(&sna->kgem, bstride*bh, @@ -16564,7 +16772,7 @@ sna_push_pixels_solid_blt(GCPtr gc, box++; } while (--n); - sna->blt_state.fill_bo = 0; + blt_done(sna); return true; } @@ -16754,7 +16962,9 @@ static int sna_create_gc(GCPtr gc) gc->freeCompClip = 0; gc->pCompositeClip = 0; +#if XORG_VERSION_CURRENT < XORG_VERSION_NUMERIC(1,19,99,1,0) gc->pRotatedPixmap = 0; +#endif fb_gc(gc)->bpp = bits_per_pixel(gc->depth); @@ -16789,7 +16999,8 @@ sna_get_image__inplace(PixmapPtr pixmap, break; } - if (!kgem_bo_can_map__cpu(&sna->kgem, priv->gpu_bo, FORCE_FULL_SYNC)) + if ((flags & MOVE_INPLACE_HINT) == 0 && + !kgem_bo_can_map__cpu(&sna->kgem, priv->gpu_bo, FORCE_FULL_SYNC)) return false; if (idle && __kgem_bo_is_busy(&sna->kgem, priv->gpu_bo)) @@ -16801,11 +17012,19 @@ sna_get_image__inplace(PixmapPtr pixmap, assert(sna_damage_contains_box(&priv->gpu_damage, ®ion->extents) == PIXMAN_REGION_IN); assert(sna_damage_contains_box(&priv->cpu_damage, ®ion->extents) == PIXMAN_REGION_OUT); - src = kgem_bo_map__cpu(&sna->kgem, priv->gpu_bo); - if (src == NULL) - return false; + if (kgem_bo_can_map__cpu(&sna->kgem, priv->gpu_bo, FORCE_FULL_SYNC)) { + src = kgem_bo_map__cpu(&sna->kgem, priv->gpu_bo); + if (src == NULL) + return false; - kgem_bo_sync__cpu_full(&sna->kgem, priv->gpu_bo, FORCE_FULL_SYNC); + kgem_bo_sync__cpu_full(&sna->kgem, priv->gpu_bo, FORCE_FULL_SYNC); + } else { + src = kgem_bo_map__wc(&sna->kgem, priv->gpu_bo); + if (src == NULL) + return false; + + kgem_bo_sync__gtt(&sna->kgem, priv->gpu_bo); + } if (sigtrap_get()) return false; @@ -16833,12 +17052,11 @@ sna_get_image__inplace(PixmapPtr pixmap, region->extents.x2 - region->extents.x1, region->extents.y2 - region->extents.y1); if (!priv->shm) { - assert(src == MAP(priv->gpu_bo->map__cpu)); pixmap->devPrivate.ptr = src; pixmap->devKind = priv->gpu_bo->pitch; - priv->mapped = MAPPED_CPU; + priv->mapped = src == MAP(priv->gpu_bo->map__cpu) ? MAPPED_CPU : MAPPED_GTT; assert_pixmap_map(pixmap, priv); - priv->cpu = true; + priv->cpu &= priv->mapped == MAPPED_CPU; } } @@ -16930,7 +17148,7 @@ sna_get_image__fast(PixmapPtr pixmap, if (priv == NULL || priv->gpu_damage == NULL) return false; - if (priv->clear) { + if (priv->clear && sigtrap_get() == 0) { int w = region->extents.x2 - region->extents.x1; int h = region->extents.y2 - region->extents.y1; int pitch = PixmapBytePad(w, pixmap->drawable.depth); @@ -16939,6 +17157,7 @@ sna_get_image__fast(PixmapPtr pixmap, __FUNCTION__, priv->clear_color)); assert(DAMAGE_IS_ALL(priv->gpu_damage)); assert(priv->cpu_damage == NULL); + sigtrap_assert_active(); if (priv->clear_color == 0 || pixmap->drawable.bitsPerPixel == 8 || @@ -16955,6 +17174,7 @@ sna_get_image__fast(PixmapPtr pixmap, priv->clear_color); } + sigtrap_put(); return true; } @@ -17001,8 +17221,7 @@ sna_get_image(DrawablePtr drawable, if (ACCEL_GET_IMAGE && !FORCE_FALLBACK && format == ZPixmap && - drawable->bitsPerPixel >= 8 && - PM_IS_SOLID(drawable, mask)) { + drawable->bitsPerPixel >= 8) { PixmapPtr pixmap = get_drawable_pixmap(drawable); int16_t dx, dy; @@ -17014,7 +17233,7 @@ sna_get_image(DrawablePtr drawable, region.data = NULL; if (sna_get_image__fast(pixmap, ®ion, dst, flags)) - return; + goto apply_planemask; if (!sna_drawable_move_region_to_cpu(&pixmap->drawable, ®ion, flags)) @@ -17032,6 +17251,16 @@ sna_get_image(DrawablePtr drawable, region.extents.x1, region.extents.y1, 0, 0, w, h); sigtrap_put(); } + +apply_planemask: + if (!PM_IS_SOLID(drawable, mask)) { + FbStip pm = fbReplicatePixel(mask, drawable->bitsPerPixel); + FbStip *d = (FbStip *)dst; + int i, n = PixmapBytePad(w, drawable->depth) / sizeof(FbStip) * h; + + for (i = 0; i < n; i++) + d[i] &= pm; + } } else { region.extents.x1 = x + drawable->x; region.extents.y1 = y + drawable->y; @@ -17162,17 +17391,19 @@ void sna_accel_flush(struct sna *sna) __sna_free_pixmap(sna, priv->pixmap, priv); } } else { + unsigned hints; DBG(("%s: flushing DRI pixmap=%ld\n", __FUNCTION__, priv->pixmap->drawable.serialNumber)); assert(priv->flush); - if (sna_pixmap_move_to_gpu(priv->pixmap, - MOVE_READ | __MOVE_FORCE)) { - if (priv->flush & IS_CLIPPED) { + hints = MOVE_READ | __MOVE_FORCE; + if (priv->flush & FLUSH_WRITE) + hints |= MOVE_WRITE; + if (sna_pixmap_move_to_gpu(priv->pixmap, hints)) { + if (priv->flush & FLUSH_WRITE) { kgem_bo_unclean(&sna->kgem, priv->gpu_bo); sna_damage_all(&priv->gpu_damage, priv->pixmap); assert(priv->cpu_damage == NULL); - priv->clear = false; - priv->cpu = false; + assert(priv->clear == false); } } } @@ -17184,10 +17415,46 @@ void sna_accel_flush(struct sna *sna) } static void -sna_accel_flush_callback(CallbackListPtr *list, - pointer user_data, pointer call_data) +sna_shm_flush_callback(CallbackListPtr *list, + pointer user_data, pointer call_data) { - sna_accel_flush(user_data); + struct sna *sna = user_data; + + if (!sna->needs_shm_flush) + return; + + sna_accel_flush(sna); + sna->needs_shm_flush = false; +} + +static void +sna_flush_callback(CallbackListPtr *list, pointer user_data, pointer call_data) +{ + struct sna *sna = user_data; + + if (!sna->needs_dri_flush) + return; + + sna_accel_flush(sna); + sna->needs_dri_flush = false; +} + +static void +sna_event_callback(CallbackListPtr *list, pointer user_data, pointer call_data) +{ + EventInfoRec *eventinfo = call_data; + struct sna *sna = user_data; + int i; + + if (sna->needs_dri_flush) + return; + + for (i = 0; i < eventinfo->count; i++) { + if (eventinfo->events[i].u.u.type == sna->damage_event) { + sna->needs_dri_flush = true; + return; + } + } } static struct sna_pixmap *sna_accel_scanout(struct sna *sna) @@ -17199,6 +17466,7 @@ static struct sna_pixmap *sna_accel_scanout(struct sna *sna) assert(sna->vblank_interval); assert(sna->front); + assert(!sna->mode.hidden); priv = sna_pixmap(sna->front); if (priv->gpu_bo == NULL) @@ -17217,7 +17485,7 @@ static void sna_accel_disarm_timer(struct sna *sna, int id) static bool has_offload_slaves(struct sna *sna) { #if HAS_PIXMAP_SHARING - ScreenPtr screen = sna->scrn->pScreen; + ScreenPtr screen = to_screen_from_sna(sna); PixmapDirtyUpdatePtr dirty; xorg_list_for_each_entry(dirty, &screen->pixmap_dirty_list, ent) { @@ -17231,11 +17499,14 @@ static bool has_offload_slaves(struct sna *sna) static bool has_shadow(struct sna *sna) { - DamagePtr damage = sna->mode.shadow_damage; + DamagePtr damage; - if (damage == NULL) + if (!sna->mode.shadow_enabled) return false; + damage = sna->mode.shadow_damage; + assert(damage); + DBG(("%s: has pending damage? %d, outstanding flips: %d\n", __FUNCTION__, RegionNotEmpty(DamageRegion(damage)), @@ -17365,9 +17636,8 @@ static bool sna_accel_do_expire(struct sna *sna) static void sna_accel_post_damage(struct sna *sna) { #if HAS_PIXMAP_SHARING - ScreenPtr screen = sna->scrn->pScreen; + ScreenPtr screen = to_screen_from_sna(sna); PixmapDirtyUpdatePtr dirty; - bool flush = false; xorg_list_for_each_entry(dirty, &screen->pixmap_dirty_list, ent) { RegionRec region, *damage; @@ -17376,8 +17646,6 @@ static void sna_accel_post_damage(struct sna *sna) int16_t dx, dy; int n; - assert(dirty->src == sna->front); - damage = DamageRegion(dirty->damage); if (RegionNil(damage)) continue; @@ -17477,7 +17745,14 @@ fallback: box, n, COPY_LAST)) goto fallback; - flush = true; + /* Before signalling the slave via ProcessPending, + * ensure not only the batch is submitted as the + * slave may be using the Damage callback to perform + * its copy, but also that the memory must be coherent + * - we need to treat it as uncached for the PCI slave + * will bypass LLC. + */ + kgem_bo_sync__gtt(&sna->kgem, __sna_pixmap_get_bo(dst)); } DamageRegionProcessPending(&dirty->slave_dst->drawable); @@ -17485,8 +17760,6 @@ skip: RegionUninit(®ion); DamageEmpty(dirty->damage); } - if (flush) - kgem_submit(&sna->kgem); #endif } @@ -17689,6 +17962,7 @@ sna_set_screen_pixmap(PixmapPtr pixmap) static Bool sna_create_window(WindowPtr win) { + DBG(("%s: window=%ld\n", __FUNCTION__, win->drawable.id)); sna_set_window_pixmap(win, win->drawable.pScreen->devPrivate); return TRUE; } @@ -17714,6 +17988,7 @@ sna_unmap_window(WindowPtr win) static Bool sna_destroy_window(WindowPtr win) { + DBG(("%s: window=%ld\n", __FUNCTION__, win->drawable.id)); sna_video_destroy_window(win); sna_dri2_destroy_window(win); return TRUE; @@ -17790,20 +18065,34 @@ static bool sna_option_accel_none(struct sna *sna) if (wedged(sna)) return true; - if (xf86ReturnOptValBool(sna->Options, OPTION_ACCEL_DISABLE, FALSE)) + if (!xf86ReturnOptValBool(sna->Options, OPTION_ACCEL_ENABLE, TRUE)) return true; + if (sna->kgem.gen >= 0120) + return true; + + if (!intel_option_cast_to_bool(sna->Options, + OPTION_ACCEL_METHOD, + !IS_DEFAULT_ACCEL_METHOD(NOACCEL))) + return false; + +#if XORG_VERSION_CURRENT >= XORG_VERSION_NUMERIC(1,7,99,901,0) s = xf86GetOptValString(sna->Options, OPTION_ACCEL_METHOD); if (s == NULL) return IS_DEFAULT_ACCEL_METHOD(NOACCEL); return strcasecmp(s, "none") == 0; +#else + return IS_DEFAULT_ACCEL_METHOD(NOACCEL); +#endif } static bool sna_option_accel_blt(struct sna *sna) { const char *s; + assert(sna->kgem.gen < 0120); + s = xf86GetOptValString(sna->Options, OPTION_ACCEL_METHOD); if (s == NULL) return false; @@ -17811,6 +18100,13 @@ static bool sna_option_accel_blt(struct sna *sna) return strcasecmp(s, "blt") == 0; } +#if HAVE_NOTIFY_FD +static void sna_accel_notify(int fd, int ready, void *data) +{ + sna_mode_wakeup(data); +} +#endif + bool sna_accel_init(ScreenPtr screen, struct sna *sna) { const char *backend; @@ -17822,7 +18118,7 @@ bool sna_accel_init(ScreenPtr screen, struct sna *sna) list_init(&sna->flush_pixmaps); list_init(&sna->active_pixmaps); - AddGeneralSocket(sna->kgem.fd); + SetNotifyFd(sna->kgem.fd, sna_accel_notify, X_NOTIFY_READ, sna); #ifdef DEBUG_MEMORY sna->timer_expire[DEBUG_MEMORY_TIMER] = GetTimeInMillis()+ 10 * 1000; @@ -17892,21 +18188,23 @@ bool sna_accel_init(ScreenPtr screen, struct sna *sna) backend = "disabled"; sna->kgem.wedged = true; sna_render_mark_wedged(sna); - } else if (sna_option_accel_blt(sna) || sna->info->gen >= 0110) + } else if (sna_option_accel_blt(sna)) (void)backend; - else if (sna->info->gen >= 0100) + else if (sna->kgem.gen >= 0110) + backend = gen9_render_init(sna, backend); + else if (sna->kgem.gen >= 0100) backend = gen8_render_init(sna, backend); - else if (sna->info->gen >= 070) + else if (sna->kgem.gen >= 070) backend = gen7_render_init(sna, backend); - else if (sna->info->gen >= 060) + else if (sna->kgem.gen >= 060) backend = gen6_render_init(sna, backend); - else if (sna->info->gen >= 050) + else if (sna->kgem.gen >= 050) backend = gen5_render_init(sna, backend); - else if (sna->info->gen >= 040) + else if (sna->kgem.gen >= 040) backend = gen4_render_init(sna, backend); - else if (sna->info->gen >= 030) + else if (sna->kgem.gen >= 030) backend = gen3_render_init(sna, backend); - else if (sna->info->gen >= 020) + else if (sna->kgem.gen >= 020) backend = gen2_render_init(sna, backend); DBG(("%s(backend=%s, prefer_gpu=%x)\n", @@ -17924,8 +18222,14 @@ bool sna_accel_init(ScreenPtr screen, struct sna *sna) void sna_accel_create(struct sna *sna) { + ExtensionEntry *damage; + DBG(("%s\n", __FUNCTION__)); + damage = CheckExtension("DAMAGE"); + if (damage) + sna->damage_event = damage->eventBase + XDamageNotify; + if (!sna_glyphs_create(sna)) goto fail; @@ -17943,27 +18247,59 @@ fail: no_render_init(sna); } -void sna_accel_watch_flush(struct sna *sna, int enable) +static void sna_shm_watch_flush(struct sna *sna, int enable) { DBG(("%s: enable=%d\n", __FUNCTION__, enable)); assert(enable); - if (sna->watch_flush == 0) { + if (sna->watch_shm_flush == 0) { + DBG(("%s: installing shm watchers\n", __FUNCTION__)); + assert(enable > 0); + + if (!AddCallback(&FlushCallback, sna_shm_flush_callback, sna)) + return; + + sna->watch_shm_flush++; + } + + sna->watch_shm_flush += enable; +} + +void sna_watch_flush(struct sna *sna, int enable) +{ + DBG(("%s: enable=%d\n", __FUNCTION__, enable)); + assert(enable); + + if (sna->watch_dri_flush == 0) { + int err = 0; + DBG(("%s: installing watchers\n", __FUNCTION__)); assert(enable > 0); - if (!AddCallback(&FlushCallback, sna_accel_flush_callback, sna)) { + + if (!sna->damage_event) + return; + + if (!AddCallback(&EventCallback, sna_event_callback, sna)) + err = 1; + + if (!AddCallback(&FlushCallback, sna_flush_callback, sna)) + err = 1; + + if (err) { xf86DrvMsg(sna->scrn->scrnIndex, X_Error, "Failed to attach ourselves to the flush callbacks, expect missing synchronisation with DRI clients (e.g a compositor)\n"); } - sna->watch_flush++; + + sna->watch_dri_flush++; } - sna->watch_flush += enable; + sna->watch_dri_flush += enable; } void sna_accel_leave(struct sna *sna) { DBG(("%s\n", __FUNCTION__)); + sna_scanout_flush(sna); /* as root we always have permission to render */ if (geteuid() == 0) @@ -17997,13 +18333,15 @@ void sna_accel_close(struct sna *sna) sna_pixmap_expire(sna); - DeleteCallback(&FlushCallback, sna_accel_flush_callback, sna); - RemoveGeneralSocket(sna->kgem.fd); + DeleteCallback(&FlushCallback, sna_shm_flush_callback, sna); + DeleteCallback(&FlushCallback, sna_flush_callback, sna); + DeleteCallback(&EventCallback, sna_event_callback, sna); + RemoveNotifyFd(sna->kgem.fd); kgem_cleanup_cache(&sna->kgem); } -void sna_accel_block_handler(struct sna *sna, struct timeval **tv) +void sna_accel_block(struct sna *sna, struct timeval **tv) { sigtrap_assert_inactive(); @@ -18044,10 +18382,17 @@ restart: if (sna_accel_do_debug_memory(sna)) sna_accel_debug_memory(sna); - if (sna->watch_flush == 1) { - DBG(("%s: removing watchers\n", __FUNCTION__)); - DeleteCallback(&FlushCallback, sna_accel_flush_callback, sna); - sna->watch_flush = 0; + if (sna->watch_shm_flush == 1) { + DBG(("%s: removing shm watchers\n", __FUNCTION__)); + DeleteCallback(&FlushCallback, sna_shm_flush_callback, sna); + sna->watch_shm_flush = 0; + } + + if (sna->watch_dri_flush == 1) { + DBG(("%s: removing dri watchers\n", __FUNCTION__)); + DeleteCallback(&FlushCallback, sna_flush_callback, sna); + DeleteCallback(&EventCallback, sna_event_callback, sna); + sna->watch_dri_flush = 0; } if (sna->timer_active & 1) { @@ -18083,22 +18428,6 @@ set_tv: } } -void sna_accel_wakeup_handler(struct sna *sna) -{ - DBG(("%s: nbatch=%d, need_retire=%d, need_purge=%d\n", __FUNCTION__, - sna->kgem.nbatch, sna->kgem.need_retire, sna->kgem.need_purge)); - - if (!sna->kgem.nbatch) - return; - - if (kgem_is_idle(&sna->kgem)) { - DBG(("%s: GPU idle, flushing\n", __FUNCTION__)); - _kgem_submit(&sna->kgem); - } - - sigtrap_assert_inactive(); -} - void sna_accel_free(struct sna *sna) { DBG(("%s\n", __FUNCTION__)); diff --git a/src/sna/sna_acpi.c b/src/sna/sna_acpi.c index dcc0287b..643d04af 100644 --- a/src/sna/sna_acpi.c +++ b/src/sna/sna_acpi.c @@ -92,7 +92,7 @@ void _sna_acpi_wakeup(struct sna *sna) DBG(("%s: error [%d], detaching from acpid\n", __FUNCTION__, n)); /* XXX reattach later? */ - RemoveGeneralSocket(sna->acpi.fd); + RemoveNotifyFd(sna->acpi.fd); sna_acpi_fini(sna); return; } @@ -136,6 +136,13 @@ void _sna_acpi_wakeup(struct sna *sna) } while (n); } +#if HAVE_NOTIFY_FD +static void sna_acpi_notify(int fd, int read, void *data) +{ + _sna_acpi_wakeup(data); +} +#endif + static int read_power_state(const char *path) { DIR *dir; @@ -200,7 +207,7 @@ void sna_acpi_init(struct sna *sna) DBG(("%s: attaching to acpid\n", __FUNCTION__)); - AddGeneralSocket(sna->acpi.fd); + SetNotifyFd(sna->acpi.fd, sna_acpi_notify, X_NOTIFY_READ, sna); sna->acpi.remain = sizeof(sna->acpi.event) - 1; sna->acpi.offset = 0; diff --git a/src/sna/sna_blt.c b/src/sna/sna_blt.c index de8f6ec3..ddd2586d 100644 --- a/src/sna/sna_blt.c +++ b/src/sna/sna_blt.c @@ -86,6 +86,11 @@ static const uint8_t fill_ROP[] = { ROP_1 }; +static void sig_done(struct sna *sna, const struct sna_composite_op *op) +{ + sigtrap_put(); +} + static void nop_done(struct sna *sna, const struct sna_composite_op *op) { assert(sna->kgem.nbatch <= KGEM_BATCH_SIZE(&sna->kgem)); @@ -129,7 +134,6 @@ static bool sna_blt_fill_init(struct sna *sna, struct kgem *kgem = &sna->kgem; assert(kgem_bo_can_blt (kgem, bo)); - assert(bo->tiling != I915_TILING_Y); blt->bo[0] = bo; blt->br13 = bo->pitch; @@ -183,6 +187,7 @@ static bool sna_blt_fill_init(struct sna *sna, return false; _kgem_set_mode(kgem, KGEM_BLT); } + kgem_bcs_set_tiling(kgem, NULL, bo); assert(sna->kgem.mode == KGEM_BLT); b = kgem->batch + kgem->nbatch; @@ -237,17 +242,13 @@ static bool sna_blt_fill_init(struct sna *sna, return true; } -noinline static void sna_blt_fill_begin(struct sna *sna, - const struct sna_blt_state *blt) +noinline static void __sna_blt_fill_begin(struct sna *sna, + const struct sna_blt_state *blt) { struct kgem *kgem = &sna->kgem; uint32_t *b; - if (kgem->nreloc) { - _kgem_submit(kgem); - _kgem_set_mode(kgem, KGEM_BLT); - assert(kgem->nbatch == 0); - } + kgem_bcs_set_tiling(&sna->kgem, NULL, blt->bo[0]); assert(kgem->mode == KGEM_BLT); b = kgem->batch + kgem->nbatch; @@ -293,6 +294,21 @@ noinline static void sna_blt_fill_begin(struct sna *sna, } } +inline static void sna_blt_fill_begin(struct sna *sna, + const struct sna_blt_state *blt) +{ + struct kgem *kgem = &sna->kgem; + + if (kgem->nreloc) { + _kgem_submit(kgem); + _kgem_set_mode(kgem, KGEM_BLT); + kgem_bcs_set_tiling(kgem, NULL, blt->bo[0]); + assert(kgem->nbatch == 0); + } + + __sna_blt_fill_begin(sna, blt); +} + inline static void sna_blt_fill_one(struct sna *sna, const struct sna_blt_state *blt, int16_t x, int16_t y, @@ -330,8 +346,8 @@ static bool sna_blt_copy_init(struct sna *sna, { struct kgem *kgem = &sna->kgem; - assert(kgem_bo_can_blt (kgem, src)); - assert(kgem_bo_can_blt (kgem, dst)); + assert(kgem_bo_can_blt(kgem, src)); + assert(kgem_bo_can_blt(kgem, dst)); blt->bo[0] = src; blt->bo[1] = dst; @@ -370,6 +386,7 @@ static bool sna_blt_copy_init(struct sna *sna, return false; _kgem_set_mode(kgem, KGEM_BLT); } + kgem_bcs_set_tiling(&sna->kgem, src, dst); sna->blt_state.fill_bo = 0; return true; @@ -424,6 +441,7 @@ static bool sna_blt_alpha_fixup_init(struct sna *sna, return false; _kgem_set_mode(kgem, KGEM_BLT); } + kgem_bcs_set_tiling(&sna->kgem, src, dst); sna->blt_state.fill_bo = 0; return true; @@ -454,6 +472,7 @@ static void sna_blt_alpha_fixup_one(struct sna *sna, !kgem_check_reloc(kgem, 2)) { _kgem_submit(kgem); _kgem_set_mode(kgem, KGEM_BLT); + kgem_bcs_set_tiling(&sna->kgem, blt->bo[0], blt->bo[1]); } assert(sna->kgem.mode == KGEM_BLT); @@ -582,6 +601,7 @@ static void sna_blt_copy_one(struct sna *sna, !kgem_check_reloc(kgem, 2)) { _kgem_submit(kgem); _kgem_set_mode(kgem, KGEM_BLT); + kgem_bcs_set_tiling(&sna->kgem, blt->bo[0], blt->bo[1]); } assert(sna->kgem.mode == KGEM_BLT); @@ -912,8 +932,27 @@ sna_composite_mask_is_opaque(PicturePtr mask) return is_solid(mask) && is_white(mask); else if (!PICT_FORMAT_A(mask->format)) return true; - else - return is_solid(mask) && is_opaque_solid(mask); + else if (mask->pSourcePict) { + PictSolidFill *fill = (PictSolidFill *) mask->pSourcePict; + return (fill->color >> 24) == 0xff; + } else { + struct sna_pixmap *priv; + assert(mask->pDrawable); + + if (mask->pDrawable->width == 1 && + mask->pDrawable->height == 1 && + mask->repeat) + return pixel_is_opaque(get_pixel(mask), mask->format); + + if (mask->transform) + return false; + + priv = sna_pixmap_from_drawable(mask->pDrawable); + if (priv == NULL || !priv->clear) + return false; + + return pixel_is_opaque(priv->clear_color, mask->format); + } } fastcall @@ -971,6 +1010,7 @@ static void blt_composite_fill__cpu(struct sna *sna, assert(op->dst.pixmap->devPrivate.ptr); assert(op->dst.pixmap->devKind); + sigtrap_assert_active(); pixman_fill(op->dst.pixmap->devPrivate.ptr, op->dst.pixmap->devKind / sizeof(uint32_t), op->dst.pixmap->drawable.bitsPerPixel, @@ -990,6 +1030,7 @@ blt_composite_fill_box_no_offset__cpu(struct sna *sna, assert(op->dst.pixmap->devPrivate.ptr); assert(op->dst.pixmap->devKind); + sigtrap_assert_active(); pixman_fill(op->dst.pixmap->devPrivate.ptr, op->dst.pixmap->devKind / sizeof(uint32_t), op->dst.pixmap->drawable.bitsPerPixel, @@ -1010,6 +1051,7 @@ blt_composite_fill_boxes_no_offset__cpu(struct sna *sna, assert(op->dst.pixmap->devPrivate.ptr); assert(op->dst.pixmap->devKind); + sigtrap_assert_active(); pixman_fill(op->dst.pixmap->devPrivate.ptr, op->dst.pixmap->devKind / sizeof(uint32_t), op->dst.pixmap->drawable.bitsPerPixel, @@ -1031,6 +1073,7 @@ blt_composite_fill_box__cpu(struct sna *sna, assert(op->dst.pixmap->devPrivate.ptr); assert(op->dst.pixmap->devKind); + sigtrap_assert_active(); pixman_fill(op->dst.pixmap->devPrivate.ptr, op->dst.pixmap->devKind / sizeof(uint32_t), op->dst.pixmap->drawable.bitsPerPixel, @@ -1052,6 +1095,7 @@ blt_composite_fill_boxes__cpu(struct sna *sna, assert(op->dst.pixmap->devPrivate.ptr); assert(op->dst.pixmap->devKind); + sigtrap_assert_active(); pixman_fill(op->dst.pixmap->devPrivate.ptr, op->dst.pixmap->devKind / sizeof(uint32_t), op->dst.pixmap->drawable.bitsPerPixel, @@ -1159,12 +1203,15 @@ static inline void _sna_blt_maybe_clear(const struct sna_composite_op *op, const box->y2 - box->y1 >= op->dst.height) { struct sna_pixmap *priv = sna_pixmap(op->dst.pixmap); if (op->dst.bo == priv->gpu_bo) { + sna_damage_all(&priv->gpu_damage, op->dst.pixmap); + sna_damage_destroy(&priv->cpu_damage); priv->clear = true; priv->clear_color = op->u.blt.pixel; DBG(("%s: pixmap=%ld marking clear [%08x]\n", __FUNCTION__, op->dst.pixmap->drawable.serialNumber, op->u.blt.pixel)); + ((struct sna_composite_op *)op)->damage = NULL; } } } @@ -1404,6 +1451,7 @@ begin_blt(struct sna *sna, return false; _kgem_set_mode(&sna->kgem, KGEM_BLT); + kgem_bcs_set_tiling(&sna->kgem, NULL, op->dst.bo); } return true; @@ -1429,6 +1477,7 @@ prepare_blt_clear(struct sna *sna, DBG(("%s\n", __FUNCTION__)); if (op->dst.bo == NULL) { + op->u.blt.pixel = 0; op->blt = blt_composite_fill__cpu; if (op->dst.x|op->dst.y) { op->box = blt_composite_fill_box__cpu; @@ -1439,9 +1488,8 @@ prepare_blt_clear(struct sna *sna, op->boxes = blt_composite_fill_boxes_no_offset__cpu; op->thread_boxes = blt_composite_fill_boxes_no_offset__cpu; } - op->done = nop_done; - op->u.blt.pixel = 0; - return true; + op->done = sig_done; + return sigtrap_get() == 0; } op->blt = blt_composite_fill; @@ -1484,8 +1532,8 @@ prepare_blt_fill(struct sna *sna, op->boxes = blt_composite_fill_boxes_no_offset__cpu; op->thread_boxes = blt_composite_fill_boxes_no_offset__cpu; } - op->done = nop_done; - return true; + op->done = sig_done; + return sigtrap_get() == 0; } op->blt = blt_composite_fill; @@ -1668,6 +1716,7 @@ static void blt_composite_copy_boxes__thread(struct sna *sna, _kgem_submit(kgem); _kgem_set_mode(kgem, KGEM_BLT); + kgem_bcs_set_tiling(&sna->kgem, src_bo, dst_bo); } while (1); } else { do { @@ -1724,6 +1773,7 @@ static void blt_composite_copy_boxes__thread(struct sna *sna, _kgem_submit(kgem); _kgem_set_mode(kgem, KGEM_BLT); + kgem_bcs_set_tiling(&sna->kgem, src_bo, dst_bo); } while (1); } sna_vertex_unlock(&sna->render); @@ -1806,6 +1856,7 @@ static void blt_composite_copy_boxes__thread64(struct sna *sna, _kgem_submit(kgem); _kgem_set_mode(kgem, KGEM_BLT); + kgem_bcs_set_tiling(&sna->kgem, src_bo, dst_bo); } while (1); } else { do { @@ -1864,6 +1915,7 @@ static void blt_composite_copy_boxes__thread64(struct sna *sna, _kgem_submit(kgem); _kgem_set_mode(kgem, KGEM_BLT); + kgem_bcs_set_tiling(&sna->kgem, src_bo, dst_bo); } while (1); } sna_vertex_unlock(&sna->render); @@ -1973,6 +2025,7 @@ prepare_blt_copy(struct sna *sna, } _kgem_set_mode(&sna->kgem, KGEM_BLT); } + kgem_bcs_set_tiling(&sna->kgem, bo, op->dst.bo); DBG(("%s\n", __FUNCTION__)); @@ -2396,6 +2449,9 @@ prepare_blt_put(struct sna *sna, op->box = blt_put_composite_box; op->boxes = blt_put_composite_boxes; } + + op->done = nop_done; + return true; } else { if (alpha_fixup) { op->u.blt.pixel = alpha_fixup; @@ -2407,10 +2463,10 @@ prepare_blt_put(struct sna *sna, op->box = blt_put_composite_box__cpu; op->boxes = blt_put_composite_boxes__cpu; } - } - op->done = nop_done; - return true; + op->done = sig_done; + return sigtrap_get() == 0; + } } static bool @@ -2544,6 +2600,7 @@ sna_blt_composite(struct sna *sna, clear: if (was_clear && sna_pixmap(tmp->dst.pixmap)->clear_color == 0) { sna_pixmap(tmp->dst.pixmap)->clear = true; +nop: return prepare_blt_nop(sna, tmp); } @@ -2559,6 +2616,7 @@ clear: } tmp->dst.bo = sna_drawable_use_bo(dst->pDrawable, hint, &dst_box, &tmp->damage); + assert(!tmp->damage || !DAMAGE_IS_ALL(*tmp->damage)); if (tmp->dst.bo) { if (!kgem_bo_can_blt(&sna->kgem, tmp->dst.bo)) { DBG(("%s: can not blit to dst, tiling? %d, pitch? %d\n", @@ -2567,6 +2625,8 @@ clear: } if (hint & REPLACES) kgem_bo_undo(&sna->kgem, tmp->dst.bo); + if (flags & COMPOSITE_UPLOAD) + return false; } else { RegionRec region; @@ -2590,32 +2650,40 @@ clear: } if (op == PictOpOver && is_opaque_solid(src)) op = PictOpSrc; - if (op == PictOpAdd && is_white(src)) + if (op == PictOpAdd && + PICT_FORMAT_RGB(src->format) == PICT_FORMAT_RGB(dst->format) && + is_white(src)) op = PictOpSrc; if (was_clear && (op == PictOpAdd || op == PictOpOver)) { if (sna_pixmap(tmp->dst.pixmap)->clear_color == 0) op = PictOpSrc; if (op == PictOpOver) { + unsigned dst_color = solid_color(dst->format, sna_pixmap(tmp->dst.pixmap)->clear_color); color = over(get_solid_color(src, PICT_a8r8g8b8), - color_convert(sna_pixmap(tmp->dst.pixmap)->clear_color, - dst->format, PICT_a8r8g8b8)); + dst_color); op = PictOpSrc; DBG(("%s: precomputing solid OVER (%08x, %08x) -> %08x\n", __FUNCTION__, get_solid_color(src, PICT_a8r8g8b8), - color_convert(sna_pixmap(tmp->dst.pixmap)->clear_color, - dst->format, PICT_a8r8g8b8), + solid_color(dst->format, sna_pixmap(tmp->dst.pixmap)->clear_color), color)); + if (color == dst_color) + goto nop; + else + goto fill; } if (op == PictOpAdd) { + unsigned dst_color = solid_color(dst->format, sna_pixmap(tmp->dst.pixmap)->clear_color); color = add(get_solid_color(src, PICT_a8r8g8b8), - color_convert(sna_pixmap(tmp->dst.pixmap)->clear_color, - dst->format, PICT_a8r8g8b8)); + dst_color); op = PictOpSrc; DBG(("%s: precomputing solid ADD (%08x, %08x) -> %08x\n", __FUNCTION__, get_solid_color(src, PICT_a8r8g8b8), - color_convert(sna_pixmap(tmp->dst.pixmap)->clear_color, - dst->format, PICT_a8r8g8b8), + solid_color(dst->format, sna_pixmap(tmp->dst.pixmap)->clear_color), color)); + if (color == dst_color) + goto nop; + else + goto fill; } } if (op == PictOpOutReverse && is_opaque_solid(src)) @@ -2649,6 +2717,7 @@ fill: } tmp->dst.bo = sna_drawable_use_bo(dst->pDrawable, hint, &dst_box, &tmp->damage); + assert(!tmp->damage || !DAMAGE_IS_ALL(*tmp->damage)); if (tmp->dst.bo) { if (!kgem_bo_can_blt(&sna->kgem, tmp->dst.bo)) { DBG(("%s: can not blit to dst, tiling? %d, pitch? %d\n", @@ -2657,6 +2726,8 @@ fill: } if (hint & REPLACES) kgem_bo_undo(&sna->kgem, tmp->dst.bo); + if (flags & COMPOSITE_UPLOAD) + return false; } else { RegionRec region; @@ -2720,8 +2791,8 @@ fill: if (is_clear(src_pixmap)) { if (src->repeat || (x >= 0 && y >= 0 && - x + width < src_pixmap->drawable.width && - y + height < src_pixmap->drawable.height)) { + x + width <= src_pixmap->drawable.width && + y + height <= src_pixmap->drawable.height)) { color = color_convert(sna_pixmap(src_pixmap)->clear_color, src->format, tmp->dst.format); goto fill; @@ -2795,7 +2866,7 @@ fill: if (src_pixmap->drawable.width <= sna->render.max_3d_size && src_pixmap->drawable.height <= sna->render.max_3d_size && bo->pitch <= sna->render.max_3d_pitch && - (flags & COMPOSITE_FALLBACK) == 0) + (flags & (COMPOSITE_UPLOAD | COMPOSITE_FALLBACK)) == 0) { return false; } @@ -2817,6 +2888,7 @@ fill: } tmp->dst.bo = sna_drawable_use_bo(dst->pDrawable, hint, &dst_box, &tmp->damage); + assert(!tmp->damage || !DAMAGE_IS_ALL(*tmp->damage)); if (tmp->dst.bo && hint & REPLACES) { struct sna_pixmap *priv = sna_pixmap(tmp->dst.pixmap); @@ -2846,7 +2918,7 @@ fallback: DBG(("%s: fallback -- unaccelerated upload\n", __FUNCTION__)); goto fallback; - } else { + } else if ((flags & COMPOSITE_UPLOAD) == 0) { ret = prepare_blt_copy(sna, tmp, bo, alpha_fixup); if (!ret) goto fallback; @@ -3023,6 +3095,7 @@ sna_blt_composite__convert(struct sna *sna, } _kgem_set_mode(&sna->kgem, KGEM_BLT); } + kgem_bcs_set_tiling(&sna->kgem, tmp->src.bo, tmp->dst.bo); if (alpha_fixup) { tmp->blt = blt_composite_copy_with_alpha; @@ -3062,7 +3135,7 @@ static void sna_blt_fill_op_blt(struct sna *sna, if (sna->blt_state.fill_bo != op->base.u.blt.bo[0]->unique_id) { const struct sna_blt_state *blt = &op->base.u.blt; - sna_blt_fill_begin(sna, blt); + __sna_blt_fill_begin(sna, blt); sna->blt_state.fill_bo = blt->bo[0]->unique_id; sna->blt_state.fill_pixel = blt->pixel; @@ -3079,7 +3152,7 @@ fastcall static void sna_blt_fill_op_box(struct sna *sna, if (sna->blt_state.fill_bo != op->base.u.blt.bo[0]->unique_id) { const struct sna_blt_state *blt = &op->base.u.blt; - sna_blt_fill_begin(sna, blt); + __sna_blt_fill_begin(sna, blt); sna->blt_state.fill_bo = blt->bo[0]->unique_id; sna->blt_state.fill_pixel = blt->pixel; @@ -3097,7 +3170,7 @@ fastcall static void sna_blt_fill_op_boxes(struct sna *sna, if (sna->blt_state.fill_bo != op->base.u.blt.bo[0]->unique_id) { const struct sna_blt_state *blt = &op->base.u.blt; - sna_blt_fill_begin(sna, blt); + __sna_blt_fill_begin(sna, blt); sna->blt_state.fill_bo = blt->bo[0]->unique_id; sna->blt_state.fill_pixel = blt->pixel; @@ -3132,7 +3205,7 @@ fastcall static void sna_blt_fill_op_points(struct sna *sna, DBG(("%s: %08x x %d\n", __FUNCTION__, blt->pixel, n)); if (sna->blt_state.fill_bo != op->base.u.blt.bo[0]->unique_id) { - sna_blt_fill_begin(sna, blt); + __sna_blt_fill_begin(sna, blt); sna->blt_state.fill_bo = blt->bo[0]->unique_id; sna->blt_state.fill_pixel = blt->pixel; @@ -3162,65 +3235,15 @@ fastcall static void sna_blt_fill_op_points(struct sna *sna, assert(kgem->nbatch < kgem->surface); if ((dx|dy) == 0) { - while (n_this_time >= 8) { - *((uint64_t *)b + 0) = pt_add(cmd, p+0, 0, 0); - *((uint64_t *)b + 1) = pt_add(cmd, p+1, 0, 0); - *((uint64_t *)b + 2) = pt_add(cmd, p+2, 0, 0); - *((uint64_t *)b + 3) = pt_add(cmd, p+3, 0, 0); - *((uint64_t *)b + 4) = pt_add(cmd, p+4, 0, 0); - *((uint64_t *)b + 5) = pt_add(cmd, p+5, 0, 0); - *((uint64_t *)b + 6) = pt_add(cmd, p+6, 0, 0); - *((uint64_t *)b + 7) = pt_add(cmd, p+7, 0, 0); - b += 16; - n_this_time -= 8; - p += 8; - } - if (n_this_time & 4) { - *((uint64_t *)b + 0) = pt_add(cmd, p+0, 0, 0); - *((uint64_t *)b + 1) = pt_add(cmd, p+1, 0, 0); - *((uint64_t *)b + 2) = pt_add(cmd, p+2, 0, 0); - *((uint64_t *)b + 3) = pt_add(cmd, p+3, 0, 0); - b += 8; - p += 4; - } - if (n_this_time & 2) { - *((uint64_t *)b + 0) = pt_add(cmd, p+0, 0, 0); - *((uint64_t *)b + 1) = pt_add(cmd, p+1, 0, 0); - b += 4; - p += 2; - } - if (n_this_time & 1) - *((uint64_t *)b + 0) = pt_add(cmd, p++, 0, 0); + do { + *(uint64_t *)b = pt_add(cmd, p++, 0, 0); + b += 2; + } while (--n_this_time); } else { - while (n_this_time >= 8) { - *((uint64_t *)b + 0) = pt_add(cmd, p+0, dx, dy); - *((uint64_t *)b + 1) = pt_add(cmd, p+1, dx, dy); - *((uint64_t *)b + 2) = pt_add(cmd, p+2, dx, dy); - *((uint64_t *)b + 3) = pt_add(cmd, p+3, dx, dy); - *((uint64_t *)b + 4) = pt_add(cmd, p+4, dx, dy); - *((uint64_t *)b + 5) = pt_add(cmd, p+5, dx, dy); - *((uint64_t *)b + 6) = pt_add(cmd, p+6, dx, dy); - *((uint64_t *)b + 7) = pt_add(cmd, p+7, dx, dy); - b += 16; - n_this_time -= 8; - p += 8; - } - if (n_this_time & 4) { - *((uint64_t *)b + 0) = pt_add(cmd, p+0, dx, dy); - *((uint64_t *)b + 1) = pt_add(cmd, p+1, dx, dy); - *((uint64_t *)b + 2) = pt_add(cmd, p+2, dx, dy); - *((uint64_t *)b + 3) = pt_add(cmd, p+3, dx, dy); - b += 8; - p += 8; - } - if (n_this_time & 2) { - *((uint64_t *)b + 0) = pt_add(cmd, p+0, dx, dy); - *((uint64_t *)b + 1) = pt_add(cmd, p+1, dx, dy); - b += 4; - p += 2; - } - if (n_this_time & 1) - *((uint64_t *)b + 0) = pt_add(cmd, p++, dx, dy); + do { + *(uint64_t *)b = pt_add(cmd, p++, dx, dy); + b += 2; + } while (--n_this_time); } if (!n) @@ -3414,6 +3437,7 @@ static bool sna_blt_fill_box(struct sna *sna, uint8_t alu, _kgem_set_mode(kgem, KGEM_BLT); } + kgem_bcs_set_tiling(&sna->kgem, NULL, bo); assert(kgem_check_batch(kgem, 6)); assert(kgem_check_reloc(kgem, 1)); @@ -3520,6 +3544,8 @@ bool sna_blt_fill_boxes(struct sna *sna, uint8_t alu, _kgem_set_mode(kgem, KGEM_BLT); } + kgem_bcs_set_tiling(&sna->kgem, NULL, bo); + assert(sna->kgem.mode == KGEM_BLT); b = kgem->batch + kgem->nbatch; if (kgem->gen >= 0100) { @@ -3608,6 +3634,7 @@ bool sna_blt_fill_boxes(struct sna *sna, uint8_t alu, _kgem_submit(kgem); _kgem_set_mode(kgem, KGEM_BLT); + kgem_bcs_set_tiling(&sna->kgem, NULL, bo); assert(sna->kgem.mode == KGEM_BLT); b = kgem->batch + kgem->nbatch; @@ -3754,6 +3781,7 @@ bool sna_blt_copy_boxes(struct sna *sna, uint8_t alu, } _kgem_set_mode(kgem, KGEM_BLT); } + kgem_bcs_set_tiling(&sna->kgem, src_bo, dst_bo); if ((dst_dx | dst_dy) == 0) { if (kgem->gen >= 0100) { @@ -3814,6 +3842,7 @@ bool sna_blt_copy_boxes(struct sna *sna, uint8_t alu, _kgem_submit(kgem); _kgem_set_mode(kgem, KGEM_BLT); + kgem_bcs_set_tiling(&sna->kgem, src_bo, dst_bo); } while (1); } else { uint64_t hdr = (uint64_t)br13 << 32 | cmd | 6; @@ -3871,6 +3900,7 @@ bool sna_blt_copy_boxes(struct sna *sna, uint8_t alu, _kgem_submit(kgem); _kgem_set_mode(kgem, KGEM_BLT); + kgem_bcs_set_tiling(&sna->kgem, src_bo, dst_bo); } while (1); } } else { @@ -3932,6 +3962,7 @@ bool sna_blt_copy_boxes(struct sna *sna, uint8_t alu, _kgem_submit(kgem); _kgem_set_mode(kgem, KGEM_BLT); + kgem_bcs_set_tiling(&sna->kgem, src_bo, dst_bo); } while (1); } else { cmd |= 6; @@ -3989,6 +4020,7 @@ bool sna_blt_copy_boxes(struct sna *sna, uint8_t alu, _kgem_submit(kgem); _kgem_set_mode(kgem, KGEM_BLT); + kgem_bcs_set_tiling(&sna->kgem, src_bo, dst_bo); } while (1); } } @@ -4095,6 +4127,7 @@ bool sna_blt_copy_boxes__with_alpha(struct sna *sna, uint8_t alu, !kgem_check_reloc(kgem, 2)) { _kgem_submit(kgem); _kgem_set_mode(kgem, KGEM_BLT); + kgem_bcs_set_tiling(&sna->kgem, src_bo, dst_bo); } assert(sna->kgem.mode == KGEM_BLT); @@ -4190,6 +4223,7 @@ bool sna_blt_copy_boxes_fallback(struct sna *sna, uint8_t alu, DBG(("%s: dst == src\n", __FUNCTION__)); if (src_bo->tiling == I915_TILING_Y && + !sna->kgem.can_blt_y && kgem_bo_blt_pitch_is_ok(&sna->kgem, src_bo)) { struct kgem_bo *bo; @@ -4237,6 +4271,7 @@ bool sna_blt_copy_boxes_fallback(struct sna *sna, uint8_t alu, } } else { if (src_bo->tiling == I915_TILING_Y && + !sna->kgem.can_blt_y && kgem_bo_blt_pitch_is_ok(&sna->kgem, src_bo)) { DBG(("%s: src is y-tiled\n", __FUNCTION__)); if (src->type != DRAWABLE_PIXMAP) @@ -4251,6 +4286,7 @@ bool sna_blt_copy_boxes_fallback(struct sna *sna, uint8_t alu, } if (dst_bo->tiling == I915_TILING_Y && + !sna->kgem.can_blt_y && kgem_bo_blt_pitch_is_ok(&sna->kgem, dst_bo)) { DBG(("%s: dst is y-tiled\n", __FUNCTION__)); if (dst->type != DRAWABLE_PIXMAP) diff --git a/src/sna/sna_composite.c b/src/sna/sna_composite.c index f01f020e..1da8c291 100644 --- a/src/sna/sna_composite.c +++ b/src/sna/sna_composite.c @@ -452,6 +452,8 @@ static void apply_damage(struct sna_composite_op *op, RegionPtr region) op->damage = NULL; } else sna_damage_add(op->damage, region); + + assert(!op->damage || !DAMAGE_IS_ALL(*op->damage)); } static inline bool use_cpu(PixmapPtr pixmap, struct sna_pixmap *priv, @@ -653,8 +655,9 @@ sna_composite(CARD8 op, RegionRec region; int dx, dy; - DBG(("%s(%d src=%ld+(%d, %d), mask=%ld+(%d, %d), dst=%ld+(%d, %d)+(%d, %d), size=(%d, %d)\n", - __FUNCTION__, op, + DBG(("%s(pixmap=%ld, op=%d, src=%ld+(%d, %d), mask=%ld+(%d, %d), dst=%ld+(%d, %d)+(%d, %d), size=(%d, %d)\n", + __FUNCTION__, + pixmap->drawable.serialNumber, op, get_picture_id(src), src_x, src_y, get_picture_id(mask), mask_x, mask_y, get_picture_id(dst), dst_x, dst_y, @@ -673,13 +676,6 @@ sna_composite(CARD8 op, src = sna->clear; } - if (mask && sna_composite_mask_is_opaque(mask)) { - DBG(("%s: removing opaque %smask\n", - __FUNCTION__, - mask->componentAlpha && PICT_FORMAT_RGB(mask->format) ? "CA " : "")); - mask = NULL; - } - if (!sna_compute_composite_region(®ion, src, mask, dst, src_x, src_y, @@ -688,6 +684,13 @@ sna_composite(CARD8 op, width, height)) return; + if (mask && sna_composite_mask_is_opaque(mask)) { + DBG(("%s: removing opaque %smask\n", + __FUNCTION__, + mask->componentAlpha && PICT_FORMAT_RGB(mask->format) ? "CA " : "")); + mask = NULL; + } + if (NO_COMPOSITE) goto fallback; @@ -756,6 +759,7 @@ sna_composite(CARD8 op, DBG(("%s: fallback due unhandled composite op\n", __FUNCTION__)); goto fallback; } + assert(!tmp.damage || !DAMAGE_IS_ALL(*tmp.damage)); if (region.data == NULL) tmp.box(sna, &tmp, ®ion.extents); @@ -797,8 +801,10 @@ sna_composite_rectangles(CARD8 op, int i, num_boxes; unsigned hint; - DBG(("%s(op=%d, %08x x %d [(%d, %d)x(%d, %d) ...])\n", - __FUNCTION__, op, + DBG(("%s(pixmap=%ld, op=%d, %08x x %d [(%d, %d)x(%d, %d) ...])\n", + __FUNCTION__, + get_drawable_pixmap(dst->pDrawable)->drawable.serialNumber, + op, (color->alpha >> 8 << 24) | (color->red >> 8 << 16) | (color->green >> 8 << 8) | @@ -814,38 +820,40 @@ sna_composite_rectangles(CARD8 op, return; } - if ((color->red|color->green|color->blue|color->alpha) <= 0x00ff) { - switch (op) { - case PictOpOver: - case PictOpOutReverse: - case PictOpAdd: - return; - case PictOpInReverse: - case PictOpSrc: - op = PictOpClear; - break; - case PictOpAtopReverse: - op = PictOpOut; - break; - case PictOpXor: - op = PictOpOverReverse; - break; - } - } if (color->alpha <= 0x00ff) { - switch (op) { - case PictOpOver: - case PictOpOutReverse: - return; - case PictOpInReverse: - op = PictOpClear; - break; - case PictOpAtopReverse: - op = PictOpOut; - break; - case PictOpXor: - op = PictOpOverReverse; - break; + if (PICT_FORMAT_TYPE(dst->format) == PICT_TYPE_A || + (color->red|color->green|color->blue) <= 0x00ff) { + switch (op) { + case PictOpOver: + case PictOpOutReverse: + case PictOpAdd: + return; + case PictOpInReverse: + case PictOpSrc: + op = PictOpClear; + break; + case PictOpAtopReverse: + op = PictOpOut; + break; + case PictOpXor: + op = PictOpOverReverse; + break; + } + } else { + switch (op) { + case PictOpOver: + case PictOpOutReverse: + return; + case PictOpInReverse: + op = PictOpClear; + break; + case PictOpAtopReverse: + op = PictOpOut; + break; + case PictOpXor: + op = PictOpOverReverse; + break; + } } } else if (color->alpha >= 0xff00) { switch (op) { @@ -863,11 +871,16 @@ sna_composite_rectangles(CARD8 op, case PictOpXor: op = PictOpOut; break; + case PictOpAdd: + if (PICT_FORMAT_TYPE(dst->format) == PICT_TYPE_A || + (color->red&color->green&color->blue) >= 0xff00) + op = PictOpSrc; + break; } } /* Avoid reducing overlapping translucent rectangles */ - if (op == PictOpOver && + if ((op == PictOpOver || op == PictOpAdd) && num_rects == 1 && sna_drawable_is_clear(dst->pDrawable)) op = PictOpSrc; @@ -979,6 +992,9 @@ sna_composite_rectangles(CARD8 op, bool ok; if (op == PictOpClear) { + if (priv->clear_color == 0) + goto done; + ok = sna_get_pixel_from_rgba(&pixel, 0, 0, 0, 0, dst->format); @@ -990,8 +1006,11 @@ sna_composite_rectangles(CARD8 op, color->alpha, dst->format); } - if (ok && priv->clear_color == pixel) + if (ok && priv->clear_color == pixel) { + DBG(("%s: matches current clear, skipping\n", + __FUNCTION__)); goto done; + } } if (region.data == NULL) { diff --git a/src/sna/sna_damage.h b/src/sna/sna_damage.h index 272e83bc..d5c727ee 100644 --- a/src/sna/sna_damage.h +++ b/src/sna/sna_damage.h @@ -267,7 +267,7 @@ int _sna_damage_get_boxes(struct sna_damage *damage, const BoxRec **boxes); static inline int sna_damage_get_boxes(struct sna_damage *damage, const BoxRec **boxes) { - assert(damage); + assert(DAMAGE_PTR(damage)); if (DAMAGE_IS_ALL(damage)) { *boxes = &DAMAGE_PTR(damage)->extents; @@ -322,7 +322,8 @@ static inline void sna_damage_destroy(struct sna_damage **damage) if (*damage == NULL) return; - __sna_damage_destroy(DAMAGE_PTR(*damage)); + if (DAMAGE_PTR(*damage)) + __sna_damage_destroy(DAMAGE_PTR(*damage)); *damage = NULL; } diff --git a/src/sna/sna_display.c b/src/sna/sna_display.c index 4b218b70..9b77550e 100644 --- a/src/sna/sna_display.c +++ b/src/sna/sna_display.c @@ -39,6 +39,25 @@ #include #include #include +#include + +#if HAVE_ALLOCA_H +#include +#elif defined __GNUC__ +#define alloca __builtin_alloca +#elif defined _AIX +#define alloca __alloca +#elif defined _MSC_VER +#include +#define alloca _alloca +#else +void *alloca(size_t); +#endif + +#define _PARSE_EDID_ +/* Jump through a few hoops in order to fixup EDIDs */ +#undef VERSION +#undef REVISION #include "sna.h" #include "sna_reg.h" @@ -72,6 +91,10 @@ #include #endif +#define FAIL_CURSOR_IOCTL 0 + +#define COLDPLUG_DELAY_MS 2000 + /* Minor discrepancy between 32-bit/64-bit ABI in old kernels */ union compat_mode_get_connector{ struct drm_mode_get_connector conn; @@ -88,6 +111,8 @@ union compat_mode_get_connector{ #define DEFAULT_DPI 96 #endif +#define OUTPUT_STATUS_CACHE_MS 15000 + #define DRM_MODE_PAGE_FLIP_ASYNC 0x02 #define DRM_CLIENT_CAP_UNIVERSAL_PLANES 2 @@ -106,33 +131,87 @@ struct local_mode_obj_get_properties { }; #define LOCAL_MODE_OBJECT_PLANE 0xeeeeeeee -#if 0 +struct local_mode_set_plane { + uint32_t plane_id; + uint32_t crtc_id; + uint32_t fb_id; /* fb object contains surface format type */ + uint32_t flags; + + /* Signed dest location allows it to be partially off screen */ + int32_t crtc_x, crtc_y; + uint32_t crtc_w, crtc_h; + + /* Source values are 16.16 fixed point */ + uint32_t src_x, src_y; + uint32_t src_h, src_w; +}; +#define LOCAL_IOCTL_MODE_SETPLANE DRM_IOWR(0xB7, struct local_mode_set_plane) + +struct local_mode_get_plane { + uint32_t plane_id; + + uint32_t crtc_id; + uint32_t fb_id; + + uint32_t possible_crtcs; + uint32_t gamma_size; + + uint32_t count_format_types; + uint64_t format_type_ptr; +}; +#define LOCAL_IOCTL_MODE_GETPLANE DRM_IOWR(0xb6, struct local_mode_get_plane) + +struct local_mode_get_plane_res { + uint64_t plane_id_ptr; + uint64_t count_planes; +}; +#define LOCAL_IOCTL_MODE_GETPLANERESOURCES DRM_IOWR(0xb5, struct local_mode_get_plane_res) + +#if 1 #define __DBG DBG #else #define __DBG(x) #endif +#define DBG_NATIVE_ROTATION ~0 /* minimum RR_Rotate_0 */ + extern XF86ConfigPtr xf86configptr; +struct sna_cursor { + struct sna_cursor *next; + uint32_t *image; + bool transformed; + Rotation rotation; + int ref; + int size; + int last_width; + int last_height; + unsigned handle; + unsigned serial; + unsigned alloc; +}; + struct sna_crtc { + unsigned long flags; + uint32_t id; xf86CrtcPtr base; struct drm_mode_modeinfo kmode; - int dpms_mode; PixmapPtr slave_pixmap; DamagePtr slave_damage; - struct kgem_bo *bo, *shadow_bo, *client_bo; + struct kgem_bo *bo, *shadow_bo, *client_bo, *cache_bo; struct sna_cursor *cursor; unsigned int last_cursor_size; uint32_t offset; bool shadow; bool fallback_shadow; bool transform; + bool cursor_transform; + bool hwcursor; bool flip_pending; - uint8_t id; - uint8_t pipe; - RegionRec client_damage; /* XXX overlap with shadow damage? */ + struct pict_f_transform cursor_to_fb, fb_to_cursor; + RegionRec crtc_damage; uint16_t shadow_bo_width, shadow_bo_height; uint32_t rotation; @@ -143,7 +222,9 @@ struct sna_crtc { uint32_t supported; uint32_t current; } rotation; - } primary, sprite; + struct list link; + } primary; + struct list sprites; uint32_t mode_serial, flip_serial; @@ -173,21 +254,33 @@ struct sna_output { unsigned int is_panel : 1; unsigned int add_default_modes : 1; + int connector_type; + int connector_type_id; + + uint32_t link_status_idx; uint32_t edid_idx; uint32_t edid_blob_id; uint32_t edid_len; void *edid_raw; + xf86MonPtr fake_edid_mon; + void *fake_edid_raw; bool has_panel_limits; int panel_hdisplay; int panel_vdisplay; uint32_t dpms_id; - int dpms_mode; + uint8_t dpms_mode; struct backlight backlight; int backlight_active_level; + uint32_t last_detect; + uint32_t status; + unsigned int hotplug_count; + bool update_properties; + bool reprobe; + int num_modes; struct drm_mode_modeinfo *modes; @@ -218,13 +311,91 @@ enum { /* XXX copied from hw/xfree86/modes/xf86Crtc.c */ OPTION_DEFAULT_MODES, }; +static void __sna_output_dpms(xf86OutputPtr output, int dpms, int fixup); static void sna_crtc_disable_cursor(struct sna *sna, struct sna_crtc *crtc); +static bool sna_crtc_flip(struct sna *sna, struct sna_crtc *crtc, + struct kgem_bo *bo, int x, int y); static bool is_zaphod(ScrnInfoPtr scrn) { return xf86IsEntityShared(scrn->entityList[0]); } +static bool +sna_zaphod_match(struct sna *sna, const char *output) +{ + const char *s, *colon; + char t[20]; + unsigned int i = 0; + + s = xf86GetOptValString(sna->Options, OPTION_ZAPHOD); + if (s == NULL) + return false; + + colon = strchr(s, ':'); + if (colon) /* Skip over the ZaphodPipes */ + s = colon + 1; + + do { + /* match any outputs in a comma list, stopping at whitespace */ + switch (*s) { + case '\0': + t[i] = '\0'; + return strcmp(t, output) == 0; + + case ',': + t[i] ='\0'; + if (strcmp(t, output) == 0) + return TRUE; + i = 0; + break; + + case ' ': + case '\t': + case '\n': + case '\r': + break; + + default: + t[i++] = *s; + break; + } + + s++; + } while (i < sizeof(t)); + + return false; +} + +static unsigned +get_zaphod_crtcs(struct sna *sna) +{ + const char *str, *colon; + unsigned crtcs = 0; + + str = xf86GetOptValString(sna->Options, OPTION_ZAPHOD); + if (str == NULL || (colon = strchr(str, ':')) == NULL) { + DBG(("%s: no zaphod pipes, using screen number: %x\n", + __FUNCTION__, + sna->scrn->confScreen->device->screen)); + return 1 << sna->scrn->confScreen->device->screen; + } + + DBG(("%s: ZaphodHeads='%s'\n", __FUNCTION__, str)); + while (str < colon) { + char *end; + unsigned crtc = strtoul(str, &end, 0); + if (end == str) + break; + DBG(("%s: adding CRTC %d to zaphod pipes\n", + __FUNCTION__, crtc)); + crtcs |= 1 << crtc; + str = end + 1; + } + DBG(("%s: ZaphodPipes=%x\n", __FUNCTION__, crtcs)); + return crtcs; +} + inline static unsigned count_to_mask(int x) { return (1 << x) - 1; @@ -247,6 +418,21 @@ static inline struct sna_crtc *to_sna_crtc(xf86CrtcPtr crtc) return crtc->driver_private; } +static inline unsigned __sna_crtc_pipe(struct sna_crtc *crtc) +{ + return crtc->flags >> 8 & 0xff; +} + +static inline unsigned __sna_crtc_id(struct sna_crtc *crtc) +{ + return crtc->id; +} + +uint32_t sna_crtc_id(xf86CrtcPtr crtc) +{ + return __sna_crtc_id(to_sna_crtc(crtc)); +} + static inline bool event_pending(int fd) { struct pollfd pfd; @@ -268,29 +454,37 @@ static inline uint32_t fb_id(struct kgem_bo *bo) return bo->delta; } -uint32_t sna_crtc_id(xf86CrtcPtr crtc) +unsigned sna_crtc_count_sprites(xf86CrtcPtr crtc) { - if (to_sna_crtc(crtc) == NULL) - return 0; - return to_sna_crtc(crtc)->id; -} + struct plane *sprite; + unsigned count; -int sna_crtc_to_pipe(xf86CrtcPtr crtc) -{ - assert(to_sna_crtc(crtc)); - return to_sna_crtc(crtc)->pipe; + count = 0; + list_for_each_entry(sprite, &to_sna_crtc(crtc)->sprites, link) + count++; + + return count; } -uint32_t sna_crtc_to_sprite(xf86CrtcPtr crtc) +static struct plane *lookup_sprite(struct sna_crtc *crtc, unsigned idx) { - assert(to_sna_crtc(crtc)); - return to_sna_crtc(crtc)->sprite.id; + struct plane *sprite; + + list_for_each_entry(sprite, &crtc->sprites, link) + if (idx-- == 0) + return sprite; + + return NULL; } -bool sna_crtc_is_on(xf86CrtcPtr crtc) +uint32_t sna_crtc_to_sprite(xf86CrtcPtr crtc, unsigned idx) { + struct plane *sprite; + assert(to_sna_crtc(crtc)); - return to_sna_crtc(crtc)->bo != NULL; + + sprite = lookup_sprite(to_sna_crtc(crtc), idx); + return sprite ? sprite->id : 0; } bool sna_crtc_is_transformed(xf86CrtcPtr crtc) @@ -299,34 +493,48 @@ bool sna_crtc_is_transformed(xf86CrtcPtr crtc) return to_sna_crtc(crtc)->transform; } -static inline uint64_t msc64(struct sna_crtc *sna_crtc, uint32_t seq) +static inline bool msc64(struct sna_crtc *sna_crtc, uint32_t seq, uint64_t *msc) { + bool record = true; if (seq < sna_crtc->last_seq) { if (sna_crtc->last_seq - seq > 0x40000000) { sna_crtc->wrap_seq++; DBG(("%s: pipe=%d wrapped; was %u, now %u, wraps=%u\n", - __FUNCTION__, sna_crtc->pipe, + __FUNCTION__, __sna_crtc_pipe(sna_crtc), sna_crtc->last_seq, seq, sna_crtc->wrap_seq)); - } else { - ERR(("%s: pipe=%d msc went backwards; was %u, now %u\n", - __FUNCTION__, sna_crtc->pipe, sna_crtc->last_seq, seq)); - seq = sna_crtc->last_seq; + } else { + DBG(("%s: pipe=%d msc went backwards; was %u, now %u; ignoring for last_swap\n", + __FUNCTION__, __sna_crtc_pipe(sna_crtc), sna_crtc->last_seq, seq)); + + record = false; } } - sna_crtc->last_seq = seq; - return (uint64_t)sna_crtc->wrap_seq << 32 | seq; + *msc = (uint64_t)sna_crtc->wrap_seq << 32 | seq; + return record; } uint64_t sna_crtc_record_swap(xf86CrtcPtr crtc, int tv_sec, int tv_usec, unsigned seq) { struct sna_crtc *sna_crtc = to_sna_crtc(crtc); + uint64_t msc; + assert(sna_crtc); - DBG(("%s: recording last swap on pipe=%d, frame %d, time %d.%06d\n", - __FUNCTION__, sna_crtc->pipe, seq, tv_sec, tv_usec)); - sna_crtc->swap.tv_sec = tv_sec; - sna_crtc->swap.tv_usec = tv_usec; - return sna_crtc->swap.msc = msc64(sna_crtc, seq); + + if (msc64(sna_crtc, seq, &msc)) { + DBG(("%s: recording last swap on pipe=%d, frame %d [msc=%08lld], time %d.%06d\n", + __FUNCTION__, __sna_crtc_pipe(sna_crtc), seq, (long long)msc, + tv_sec, tv_usec)); + sna_crtc->swap.tv_sec = tv_sec; + sna_crtc->swap.tv_usec = tv_usec; + sna_crtc->swap.msc = msc; + } else { + DBG(("%s: swap event on pipe=%d, frame %d [msc=%08lld], time %d.%06d\n", + __FUNCTION__, __sna_crtc_pipe(sna_crtc), seq, (long long)msc, + tv_sec, tv_usec)); + } + + return msc; } const struct ust_msc *sna_crtc_last_swap(xf86CrtcPtr crtc) @@ -342,15 +550,6 @@ const struct ust_msc *sna_crtc_last_swap(xf86CrtcPtr crtc) } } -xf86CrtcPtr sna_mode_first_crtc(struct sna *sna) -{ - xf86CrtcConfigPtr config = XF86_CRTC_CONFIG_PTR(sna->scrn); - if (sna->mode.num_real_crtc) - return config->crtc[0]; - else - return NULL; -} - #ifndef NDEBUG static void gem_close(int fd, uint32_t handle); static void assert_scanout(struct kgem *kgem, struct kgem_bo *bo, @@ -372,12 +571,24 @@ static void assert_scanout(struct kgem *kgem, struct kgem_bo *bo, #define assert_scanout(k, b, w, h) #endif +static void assert_crtc_fb(struct sna *sna, struct sna_crtc *crtc) +{ +#ifndef NDEBUG + struct drm_mode_crtc mode = { .crtc_id = __sna_crtc_id(crtc) }; + drmIoctl(sna->kgem.fd, DRM_IOCTL_MODE_GETCRTC, &mode); + assert(mode.fb_id == fb_id(crtc->bo)); +#endif +} + static unsigned get_fb(struct sna *sna, struct kgem_bo *bo, int width, int height) { ScrnInfoPtr scrn = sna->scrn; struct drm_mode_fb_cmd arg; + if (!kgem_bo_is_fenced(&sna->kgem, bo)) + return 0; + assert(bo->refcnt); assert(bo->proxy == NULL); assert(!bo->snoop); @@ -393,8 +604,9 @@ static unsigned get_fb(struct sna *sna, struct kgem_bo *bo, DBG(("%s: create fb %dx%d@%d/%d\n", __FUNCTION__, width, height, scrn->depth, scrn->bitsPerPixel)); - assert(bo->tiling != I915_TILING_Y); + assert(bo->tiling != I915_TILING_Y || sna->kgem.can_scanout_y); assert((bo->pitch & 63) == 0); + assert(scrn->vtSema); /* must be master */ VG_CLEAR(arg); arg.width = width; @@ -404,21 +616,83 @@ static unsigned get_fb(struct sna *sna, struct kgem_bo *bo, arg.depth = scrn->depth; arg.handle = bo->handle; - assert(sna->scrn->vtSema); /* must be master */ if (drmIoctl(sna->kgem.fd, DRM_IOCTL_MODE_ADDFB, &arg)) { - xf86DrvMsg(scrn->scrnIndex, X_ERROR, - "%s: failed to add fb: %dx%d depth=%d, bpp=%d, pitch=%d: %d\n", - __FUNCTION__, width, height, - scrn->depth, scrn->bitsPerPixel, bo->pitch, errno); - return 0; + /* Try again with the fancy version */ + struct local_mode_fb_cmd2 { + uint32_t fb_id; + uint32_t width, height; + uint32_t pixel_format; + uint32_t flags; + + uint32_t handles[4]; + uint32_t pitches[4]; /* pitch for each plane */ + uint32_t offsets[4]; /* offset of each plane */ + uint64_t modifiers[4]; + } f; +#define LOCAL_IOCTL_MODE_ADDFB2 DRM_IOWR(0xb8, struct local_mode_fb_cmd2) + memset(&f, 0, sizeof(f)); + f.width = width; + f.height = height; + /* XXX interlaced */ + f.flags = 1 << 1; /* +modifiers */ + f.handles[0] = bo->handle; + f.pitches[0] = bo->pitch; + + switch (bo->tiling) { + case I915_TILING_NONE: + break; + case I915_TILING_X: + /* I915_FORMAT_MOD_X_TILED */ + f.modifiers[0] = (uint64_t)1 << 56 | 1; + break; + case I915_TILING_Y: + /* I915_FORMAT_MOD_X_TILED */ + f.modifiers[0] = (uint64_t)1 << 56 | 2; + break; + } + +#define fourcc(a,b,c,d) ((a) | (b) << 8 | (c) << 16 | (d) << 24) + switch (scrn->depth) { + default: + ERR(("%s: unhandled screen format, depth=%d\n", + __FUNCTION__, scrn->depth)); + goto fail; + case 8: + f.pixel_format = fourcc('C', '8', ' ', ' '); + break; + case 15: + f.pixel_format = fourcc('X', 'R', '1', '5'); + break; + case 16: + f.pixel_format = fourcc('R', 'G', '1', '6'); + break; + case 24: + f.pixel_format = fourcc('X', 'R', '2', '4'); + break; + case 30: + f.pixel_format = fourcc('X', 'R', '3', '0'); + break; + } +#undef fourcc + + if (drmIoctl(sna->kgem.fd, LOCAL_IOCTL_MODE_ADDFB2, &f)) { +fail: + xf86DrvMsg(scrn->scrnIndex, X_ERROR, + "%s: failed to add fb: %dx%d depth=%d, bpp=%d, pitch=%d: %d\n", + __FUNCTION__, width, height, + scrn->depth, scrn->bitsPerPixel, bo->pitch, errno); + return 0; + } + + arg.fb_id = f.fb_id; } assert(arg.fb_id != 0); - + bo->delta = arg.fb_id; DBG(("%s: attached fb=%d to handle=%d\n", - __FUNCTION__, arg.fb_id, arg.handle)); + __FUNCTION__, bo->delta, arg.handle)); bo->scanout = true; - return bo->delta = arg.fb_id; + return bo->delta; } static uint32_t gem_create(int fd, int size) @@ -438,6 +712,7 @@ static uint32_t gem_create(int fd, int size) static void *gem_mmap(int fd, int handle, int size) { struct drm_i915_gem_mmap_gtt mmap_arg; + struct drm_i915_gem_set_domain set_domain; void *ptr; VG_CLEAR(mmap_arg); @@ -449,6 +724,15 @@ static void *gem_mmap(int fd, int handle, int size) if (ptr == MAP_FAILED) return NULL; + VG_CLEAR(set_domain); + set_domain.handle = handle; + set_domain.read_domains = I915_GEM_DOMAIN_GTT; + set_domain.write_domain = I915_GEM_DOMAIN_GTT; + if (drmIoctl(fd, DRM_IOCTL_I915_GEM_SET_DOMAIN, &set_domain)) { + munmap(ptr, size); + return NULL; + } + return ptr; } @@ -497,8 +781,6 @@ sna_backlight_uevent(int fd, void *closure) if (sna_output->dpms_mode != DPMSModeOn) continue; - assert(output->randr_output); - val = backlight_get(&sna_output->backlight); if (val < 0) continue; @@ -523,6 +805,7 @@ sna_backlight_uevent(int fd, void *closure) TRUE, FALSE); } } + DBG(("%s: complete\n", __FUNCTION__)); } static void sna_backlight_pre_init(struct sna *sna) @@ -570,6 +853,7 @@ static void sna_backlight_drain_uevents(struct sna *sna) if (sna->mode.backlight_monitor == NULL) return; + DBG(("%s()\n", __FUNCTION__)); sna_backlight_uevent(udev_monitor_get_fd(sna->mode.backlight_monitor), sna); } @@ -632,9 +916,22 @@ sna_output_backlight_set(struct sna_output *sna_output, int level) return ret; } +static bool +has_native_backlight(struct sna_output *sna_output) +{ + return sna_output->backlight.type == BL_RAW; +} + static void sna_output_backlight_off(struct sna_output *sna_output) { + /* Trust the kernel to turn the native backlight off. However, we + * do explicitly turn the backlight back on (when we wake the output) + * just in case a third party turns it off! + */ + if (has_native_backlight(sna_output)) + return; + DBG(("%s(%s)\n", __FUNCTION__, sna_output->base->name)); backlight_off(&sna_output->backlight); sna_output_backlight_set(sna_output, 0); @@ -674,7 +971,7 @@ has_user_backlight_override(xf86OutputPtr output) if (*str == '\0') return (char *)str; - if (backlight_exists(str) == BL_NONE) { + if (!backlight_exists(str)) { xf86DrvMsg(output->scrn->scrnIndex, X_ERROR, "Unrecognised backlight control interface '%s'\n", str); @@ -684,6 +981,93 @@ has_user_backlight_override(xf86OutputPtr output) return strdup(str); } +static int get_device_minor(int fd) +{ + struct stat st; + + if (fstat(fd, &st) || !S_ISCHR(st.st_mode)) + return -1; + + return st.st_rdev & 0x63; +} + +static const char * const sysfs_connector_types[] = { + /* DRM_MODE_CONNECTOR_Unknown */ "Unknown", + /* DRM_MODE_CONNECTOR_VGA */ "VGA", + /* DRM_MODE_CONNECTOR_DVII */ "DVI-I", + /* DRM_MODE_CONNECTOR_DVID */ "DVI-D", + /* DRM_MODE_CONNECTOR_DVIA */ "DVI-A", + /* DRM_MODE_CONNECTOR_Composite */ "Composite", + /* DRM_MODE_CONNECTOR_SVIDEO */ "SVIDEO", + /* DRM_MODE_CONNECTOR_LVDS */ "LVDS", + /* DRM_MODE_CONNECTOR_Component */ "Component", + /* DRM_MODE_CONNECTOR_9PinDIN */ "DIN", + /* DRM_MODE_CONNECTOR_DisplayPort */ "DP", + /* DRM_MODE_CONNECTOR_HDMIA */ "HDMI-A", + /* DRM_MODE_CONNECTOR_HDMIB */ "HDMI-B", + /* DRM_MODE_CONNECTOR_TV */ "TV", + /* DRM_MODE_CONNECTOR_eDP */ "eDP", + /* DRM_MODE_CONNECTOR_VIRTUAL */ "Virtual", + /* DRM_MODE_CONNECTOR_DSI */ "DSI", + /* DRM_MODE_CONNECTOR_DPI */ "DPI" +}; + +static char *has_connector_backlight(xf86OutputPtr output) +{ + struct sna_output *sna_output = output->driver_private; + struct sna *sna = to_sna(output->scrn); + char path[1024]; + DIR *dir; + struct dirent *de; + int minor, len; + char *str = NULL; + + if (sna_output->connector_type >= ARRAY_SIZE(sysfs_connector_types)) + return NULL; + + minor = get_device_minor(sna->kgem.fd); + if (minor < 0) + return NULL; + + len = snprintf(path, sizeof(path), + "/sys/class/drm/card%d-%s-%d", + minor, + sysfs_connector_types[sna_output->connector_type], + sna_output->connector_type_id); + DBG(("%s: lookup %s\n", __FUNCTION__, path)); + + dir = opendir(path); + if (dir == NULL) + return NULL; + + while ((de = readdir(dir))) { + struct stat st; + + if (*de->d_name == '.') + continue; + + snprintf(path + len, sizeof(path) - len, + "/%s", de->d_name); + + if (stat(path, &st)) + continue; + + if (!S_ISDIR(st.st_mode)) + continue; + + DBG(("%s: testing %s as backlight\n", + __FUNCTION__, de->d_name)); + + if (backlight_exists(de->d_name)) { + str = strdup(de->d_name); /* leak! */ + break; + } + } + + closedir(dir); + return str; +} + static void sna_output_backlight_init(xf86OutputPtr output) { @@ -696,11 +1080,20 @@ sna_output_backlight_init(xf86OutputPtr output) return; #endif - from = X_CONFIG; - best_iface = has_user_backlight_override(output); + if (sna_output->is_panel) { + from = X_CONFIG; + best_iface = has_user_backlight_override(output); + if (best_iface) + goto done; + } + + best_iface = has_connector_backlight(output); if (best_iface) goto done; + if (!sna_output->is_panel) + return; + /* XXX detect right backlight for multi-GPU/panels */ from = X_PROBED; pci = xf86GetPciInfoForEntity(to_sna(output->scrn)->pEnt->index); @@ -728,6 +1121,38 @@ done: sna_output->backlight.iface, best_iface, output->name); } +#if ABI_VIDEODRV_VERSION >= SET_ABI_VERSION(22, 0) +static inline int sigio_block(void) +{ + return 0; +} +static inline void sigio_unblock(int was_blocked) +{ + (void)was_blocked; +} +#elif XORG_VERSION_CURRENT >= XORG_VERSION_NUMERIC(1,12,99,901,0) +static inline int sigio_block(void) +{ + OsBlockSIGIO(); + return 0; +} +static inline void sigio_unblock(int was_blocked) +{ + OsReleaseSIGIO(); + (void)was_blocked; +} +#else +#include +static inline int sigio_block(void) +{ + return xf86BlockSIGIO(); +} +static inline void sigio_unblock(int was_blocked) +{ + xf86UnblockSIGIO(was_blocked); +} +#endif + static char *canonical_kmode_name(const struct drm_mode_modeinfo *kmode) { char tmp[32], *buf; @@ -781,6 +1206,7 @@ mode_from_kmode(ScrnInfoPtr scrn, mode->VTotal = kmode->vtotal; mode->VScan = kmode->vscan; + mode->VRefresh = kmode->vrefresh; mode->Flags = kmode->flags; mode->name = get_kmode_name(kmode); @@ -814,6 +1240,7 @@ mode_to_kmode(struct drm_mode_modeinfo *kmode, DisplayModePtr mode) kmode->vtotal = mode->VTotal; kmode->vscan = mode->VScan; + kmode->vrefresh = mode->VRefresh; kmode->flags = mode->Flags; if (mode->name) strncpy(kmode->name, mode->name, DRM_DISPLAY_MODE_LEN); @@ -824,11 +1251,12 @@ static void sna_crtc_force_outputs_on(xf86CrtcPtr crtc) { xf86CrtcConfigPtr config = XF86_CRTC_CONFIG_PTR(crtc->scrn); + /* All attached outputs are valid, so update our timestamps */ + unsigned now = GetTimeInMillis(); int i; assert(to_sna_crtc(crtc)); - DBG(("%s(pipe=%d), currently? %d\n", __FUNCTION__, - to_sna_crtc(crtc)->pipe, to_sna_crtc(crtc)->dpms_mode)); + DBG(("%s(pipe=%d)\n", __FUNCTION__, sna_crtc_pipe(crtc))); /* DPMS handling by the kernel is inconsistent, so after setting a * mode on an output presume that we intend for it to be on, or that @@ -843,10 +1271,11 @@ sna_crtc_force_outputs_on(xf86CrtcPtr crtc) if (output->crtc != crtc) continue; - output->funcs->dpms(output, DPMSModeOn); + __sna_output_dpms(output, DPMSModeOn, false); + if (to_sna_output(output)->last_detect) + to_sna_output(output)->last_detect = now; } - to_sna_crtc(crtc)->dpms_mode = DPMSModeOn; #if XF86_CRTC_VERSION >= 3 crtc->active = TRUE; #endif @@ -859,8 +1288,7 @@ sna_crtc_force_outputs_off(xf86CrtcPtr crtc) int i; assert(to_sna_crtc(crtc)); - DBG(("%s(pipe=%d), currently? %d\n", __FUNCTION__, - to_sna_crtc(crtc)->pipe, to_sna_crtc(crtc)->dpms_mode)); + DBG(("%s(pipe=%d)\n", __FUNCTION__, sna_crtc_pipe(crtc))); /* DPMS handling by the kernel is inconsistent, so after setting a * mode on an output presume that we intend for it to be on, or that @@ -875,35 +1303,47 @@ sna_crtc_force_outputs_off(xf86CrtcPtr crtc) if (output->crtc != crtc) continue; - output->funcs->dpms(output, DPMSModeOff); + __sna_output_dpms(output, DPMSModeOff, false); } - - to_sna_crtc(crtc)->dpms_mode = DPMSModeOff; } static unsigned -rotation_reduce(struct plane *p, unsigned rotation) +rotation_reflect(unsigned rotation) { - unsigned unsupported_rotations = rotation & ~p->rotation.supported; + unsigned other_bits; - if (unsupported_rotations == 0) - return rotation; + /* paranoia for future extensions */ + other_bits = rotation & ~RR_Rotate_All; -#define RR_Reflect_XY (RR_Reflect_X | RR_Reflect_Y) + /* flip the reflection to compensate for reflecting the rotation */ + other_bits ^= RR_Reflect_X | RR_Reflect_Y; - if ((unsupported_rotations & RR_Reflect_XY) == RR_Reflect_XY && - p->rotation.supported& RR_Rotate_180) { - rotation &= ~RR_Reflect_XY; - rotation ^= RR_Rotate_180; - } + /* Reflect the screen by rotating the rotation bit, + * which has to have at least RR_Rotate_0 set. This allows + * us to reflect any of the rotation bits, not just 0. + */ + rotation &= RR_Rotate_All; + assert(rotation); + rotation <<= 2; /* RR_Rotate_0 -> RR_Rotate_180 etc */ + rotation |= rotation >> 4; /* RR_Rotate_270' to RR_Rotate_90 */ + rotation &= RR_Rotate_All; + assert(rotation); - if ((unsupported_rotations & RR_Rotate_180) && - (p->rotation.supported& RR_Reflect_XY) == RR_Reflect_XY) { - rotation ^= RR_Reflect_XY; - rotation &= ~RR_Rotate_180; + return rotation | other_bits; +} + +static unsigned +rotation_reduce(struct plane *p, unsigned rotation) +{ + /* If unsupported try exchanging rotation for a reflection */ + if (rotation & ~p->rotation.supported) { + unsigned new_rotation = rotation_reflect(rotation); + if ((new_rotation & p->rotation.supported) == new_rotation) + rotation = new_rotation; } -#undef RR_Reflect_XY + /* Only one rotation bit should be set */ + assert(is_power_of_two(rotation & RR_Rotate_All)); return rotation; } @@ -923,7 +1363,7 @@ rotation_set(struct sna *sna, struct plane *p, uint32_t desired) if (desired == p->rotation.current) return true; - if ((desired & p->rotation.supported) == 0) { + if ((desired & p->rotation.supported) != desired) { errno = EINVAL; return false; } @@ -956,20 +1396,105 @@ rotation_reset(struct plane *p) p->rotation.current = 0; } -bool sna_crtc_set_sprite_rotation(xf86CrtcPtr crtc, uint32_t rotation) +bool sna_crtc_set_sprite_rotation(xf86CrtcPtr crtc, + unsigned idx, + uint32_t rotation) { + struct plane *sprite; assert(to_sna_crtc(crtc)); + + sprite = lookup_sprite(to_sna_crtc(crtc), idx); + if (!sprite) + return false; + DBG(("%s: CRTC:%d [pipe=%d], sprite=%u set-rotation=%x\n", __FUNCTION__, - to_sna_crtc(crtc)->id, to_sna_crtc(crtc)->pipe, to_sna_crtc(crtc)->sprite.id, - rotation)); + sna_crtc_id(crtc), sna_crtc_pipe(crtc), + sprite->id, rotation)); - return rotation_set(to_sna(crtc->scrn), - &to_sna_crtc(crtc)->sprite, - rotation_reduce(&to_sna_crtc(crtc)->sprite, rotation)); + return rotation_set(to_sna(crtc->scrn), sprite, + rotation_reduce(sprite, rotation)); } -static bool +#if HAS_DEBUG_FULL +#if !HAS_DEBUG_FULL +#define LogF ErrorF +#endif +struct kmsg { + int fd; + int saved_loglevel; +}; + +static int kmsg_get_debug(void) +{ + FILE *file; + int v = -1; + + file = fopen("/sys/module/drm/parameters/debug", "r"); + if (file) { + fscanf(file, "%d", &v); + fclose(file); + } + + return v; +} + +static void kmsg_set_debug(int v) +{ + FILE *file; + + file = fopen("/sys/module/drm/parameters/debug", "w"); + if (file) { + fprintf(file, "%d\n", v); + fclose(file); + } +} + +static void kmsg_open(struct kmsg *k) +{ + k->saved_loglevel = kmsg_get_debug(); + if (k->saved_loglevel != -1) + kmsg_set_debug(0xff); + + k->fd = open("/dev/kmsg", O_RDONLY | O_NONBLOCK); + if (k->fd != -1) + lseek(k->fd, 0, SEEK_END); +} + +static void kmsg_close(struct kmsg *k, int dump) +{ + FILE *file; + + file = NULL; + if (k->fd != -1 && dump) + file = fdopen(k->fd, "r"); + if (file) { + size_t len = 0; + char *line = NULL; + + while (getline(&line, &len, file) != -1) { + char *start = strchr(line, ';'); + if (start) + LogF("KMSG: %s", start + 1); + } + + free(line); + fclose(file); + } + + if (k->fd != -1) + close(k->fd); + + if (k->saved_loglevel != -1) + kmsg_set_debug(k->saved_loglevel); +} +#else +struct kmsg { int unused; }; +static void kmsg_open(struct kmsg *k) {} +static void kmsg_close(struct kmsg *k, int dump) {} +#endif + +static int sna_crtc_apply(xf86CrtcPtr crtc) { struct sna *sna = to_sna(crtc->scrn); @@ -978,26 +1503,39 @@ sna_crtc_apply(xf86CrtcPtr crtc) struct drm_mode_crtc arg; uint32_t output_ids[32]; int output_count = 0; - int i; + int sigio, i; + struct kmsg kmsg; + int ret = EINVAL; - DBG(("%s CRTC:%d [pipe=%d], handle=%d\n", __FUNCTION__, sna_crtc->id, sna_crtc->pipe, sna_crtc->bo->handle)); + DBG(("%s CRTC:%d [pipe=%d], handle=%d\n", __FUNCTION__, + __sna_crtc_id(sna_crtc), __sna_crtc_pipe(sna_crtc), + sna_crtc->bo->handle)); if (!sna_crtc->kmode.clock) { ERR(("%s(CRTC:%d [pipe=%d]): attempted to set an invalid mode\n", - __FUNCTION__, sna_crtc->id, sna_crtc->pipe)); - return false; + __FUNCTION__, __sna_crtc_id(sna_crtc), __sna_crtc_pipe(sna_crtc))); + return EINVAL; } + kmsg_open(&kmsg); + sigio = sigio_block(); + assert(sna->mode.num_real_output < ARRAY_SIZE(output_ids)); sna_crtc_disable_cursor(sna, sna_crtc); if (!rotation_set(sna, &sna_crtc->primary, sna_crtc->rotation)) { + memset(&arg, 0, sizeof(arg)); + arg.crtc_id = __sna_crtc_id(sna_crtc); + (void)drmIoctl(sna->kgem.fd, DRM_IOCTL_MODE_SETCRTC, &arg); + } + + if (!rotation_set(sna, &sna_crtc->primary, sna_crtc->rotation)) { ERR(("%s: set-primary-rotation failed (rotation-id=%d, rotation=%d) on CRTC:%d [pipe=%d], errno=%d\n", - __FUNCTION__, sna_crtc->primary.rotation.prop, sna_crtc->rotation, sna_crtc->id, sna_crtc->pipe, errno)); + __FUNCTION__, sna_crtc->primary.rotation.prop, sna_crtc->rotation, __sna_crtc_id(sna_crtc), __sna_crtc_pipe(sna_crtc), errno)); sna_crtc->primary.rotation.supported &= ~sna_crtc->rotation; - return false; + goto unblock; } DBG(("%s: CRTC:%d [pipe=%d] primary rotation set to %x\n", - __FUNCTION__, sna_crtc->id, sna_crtc->pipe, sna_crtc->rotation)); + __FUNCTION__, __sna_crtc_id(sna_crtc), __sna_crtc_pipe(sna_crtc), sna_crtc->rotation)); for (i = 0; i < sna->mode.num_real_output; i++) { xf86OutputPtr output = config->output[i]; @@ -1008,7 +1546,7 @@ sna_crtc_apply(xf86CrtcPtr crtc) * and we lose track of the user settings. */ if (output->crtc == NULL) - output->funcs->dpms(output, DPMSModeOff); + __sna_output_dpms(output, DPMSModeOff, false); if (output->crtc != crtc) continue; @@ -1022,29 +1560,27 @@ sna_crtc_apply(xf86CrtcPtr crtc) DBG(("%s: attaching output '%s' %d [%d] to crtc:%d (pipe %d) (possible crtc:%x, possible clones:%x)\n", __FUNCTION__, output->name, i, to_connector_id(output), - sna_crtc->id, sna_crtc->pipe, + __sna_crtc_id(sna_crtc), __sna_crtc_pipe(sna_crtc), (uint32_t)output->possible_crtcs, (uint32_t)output->possible_clones)); - assert(output->possible_crtcs & (1 << sna_crtc->pipe) || + assert(output->possible_crtcs & (1 << __sna_crtc_pipe(sna_crtc)) || is_zaphod(crtc->scrn)); output_ids[output_count] = to_connector_id(output); if (++output_count == ARRAY_SIZE(output_ids)) { DBG(("%s: too many outputs (%d) for me!\n", __FUNCTION__, output_count)); - errno = EINVAL; - return false; + goto unblock; } } if (output_count == 0) { DBG(("%s: no outputs\n", __FUNCTION__)); - errno = EINVAL; - return false; + goto unblock; } VG_CLEAR(arg); - arg.crtc_id = sna_crtc->id; + arg.crtc_id = __sna_crtc_id(sna_crtc); arg.fb_id = fb_id(sna_crtc->bo); if (sna_crtc->transform || sna_crtc->slave_pixmap) { arg.x = 0; @@ -1061,7 +1597,7 @@ sna_crtc_apply(xf86CrtcPtr crtc) arg.mode_valid = 1; DBG(("%s: applying crtc [%d, pipe=%d] mode=%dx%d+%d+%d@%d, fb=%d%s%s update to %d outputs [%d...]\n", - __FUNCTION__, sna_crtc->id, sna_crtc->pipe, + __FUNCTION__, __sna_crtc_id(sna_crtc), __sna_crtc_pipe(sna_crtc), arg.mode.hdisplay, arg.mode.vdisplay, arg.x, arg.y, @@ -1071,12 +1607,19 @@ sna_crtc_apply(xf86CrtcPtr crtc) sna_crtc->transform ? " [transformed]" : "", output_count, output_count ? output_ids[0] : 0)); - if (drmIoctl(sna->kgem.fd, DRM_IOCTL_MODE_SETCRTC, &arg)) - return false; + ret = 0; + if (unlikely(drmIoctl(sna->kgem.fd, DRM_IOCTL_MODE_SETCRTC, &arg))) { + ret = errno; + goto unblock; + } sna_crtc->mode_serial++; sna_crtc_force_outputs_on(crtc); - return true; + +unblock: + sigio_unblock(sigio); + kmsg_close(&kmsg, ret); + return ret; } static bool overlap(const BoxRec *a, const BoxRec *b) @@ -1094,26 +1637,73 @@ static bool overlap(const BoxRec *a, const BoxRec *b) return true; } +static void defer_event(struct sna *sna, struct drm_event *base) +{ + if (sna->mode.shadow_nevent == sna->mode.shadow_size) { + int size = sna->mode.shadow_size * 2; + void *ptr; + + ptr = realloc(sna->mode.shadow_events, + sizeof(struct drm_event_vblank)*size); + if (!ptr) + return; + + sna->mode.shadow_events = ptr; + sna->mode.shadow_size = size; + } + + memcpy(&sna->mode.shadow_events[sna->mode.shadow_nevent++], + base, sizeof(struct drm_event_vblank)); + DBG(("%s: deferring event count=%d\n", + __func__, sna->mode.shadow_nevent)); +} + +static void flush_events(struct sna *sna) +{ + int n; + + if (!sna->mode.shadow_nevent) + return; + + DBG(("%s: flushing %d events=%d\n", __func__, sna->mode.shadow_nevent)); + + for (n = 0; n < sna->mode.shadow_nevent; n++) { + struct drm_event_vblank *vb = &sna->mode.shadow_events[n]; + + if ((uintptr_t)(vb->user_data) & 2) + sna_present_vblank_handler(vb); + else + sna_dri2_vblank_handler(vb); + } + + sna->mode.shadow_nevent = 0; +} + + static bool wait_for_shadow(struct sna *sna, struct sna_pixmap *priv, unsigned flags) { PixmapPtr pixmap = priv->pixmap; - DamagePtr damage; struct kgem_bo *bo, *tmp; int flip_active; bool ret = true; - DBG(("%s: flags=%x, flips=%d, handle=%d, shadow=%d\n", - __FUNCTION__, flags, sna->mode.flip_active, + DBG(("%s: enabled? %d waiting? %d, flags=%x, flips=%d, pixmap=%ld [front?=%d], handle=%d, shadow=%d\n", + __FUNCTION__, sna->mode.shadow_enabled, sna->mode.shadow_wait, + flags, sna->mode.flip_active, + pixmap->drawable.serialNumber, pixmap == sna->front, priv->gpu_bo->handle, sna->mode.shadow->handle)); assert(priv->move_to_gpu_data == sna); assert(sna->mode.shadow != priv->gpu_bo); - if (flags == 0 || pixmap != sna->front || !sna->mode.shadow_damage) + if (flags == 0 || pixmap != sna->front || !sna->mode.shadow_enabled) goto done; + assert(sna->mode.shadow_damage); + assert(!sna->mode.shadow_wait); + if ((flags & MOVE_WRITE) == 0) { if ((flags & __MOVE_SCANOUT) == 0) { struct sna_crtc *crtc; @@ -1154,9 +1744,7 @@ static bool wait_for_shadow(struct sna *sna, } assert(sna->mode.shadow_active); - - damage = sna->mode.shadow_damage; - sna->mode.shadow_damage = NULL; + sna->mode.shadow_wait = true; flip_active = sna->mode.flip_active; if (flip_active) { @@ -1208,6 +1796,8 @@ static bool wait_for_shadow(struct sna *sna, bo = sna->mode.shadow; } } + assert(sna->mode.shadow_wait); + sna->mode.shadow_wait = false; if (bo->refcnt > 1) { bo = kgem_create_2d(&sna->kgem, @@ -1230,8 +1820,6 @@ static bool wait_for_shadow(struct sna *sna, bo = sna->mode.shadow; } - sna->mode.shadow_damage = damage; - RegionSubtract(&sna->mode.shadow_region, &sna->mode.shadow_region, &sna->mode.shadow_cancel); @@ -1269,6 +1857,7 @@ static bool wait_for_shadow(struct sna *sna, RegionSubtract(&sna->mode.shadow_region, &sna->mode.shadow_region, ®ion); } + crtc->client_bo->active_scanout--; kgem_bo_destroy(&sna->kgem, crtc->client_bo); crtc->client_bo = NULL; list_del(&crtc->shadow_link); @@ -1281,12 +1870,13 @@ static bool wait_for_shadow(struct sna *sna, sna->mode.shadow_region.extents.y1, sna->mode.shadow_region.extents.x2, sna->mode.shadow_region.extents.y2)); - ret = sna->render.copy_boxes(sna, GXcopy, - &pixmap->drawable, priv->gpu_bo, 0, 0, - &pixmap->drawable, bo, 0, 0, - region_rects(&sna->mode.shadow_region), - region_num_rects(&sna->mode.shadow_region), - 0); + if (!sna->render.copy_boxes(sna, GXcopy, + &pixmap->drawable, priv->gpu_bo, 0, 0, + &pixmap->drawable, bo, 0, 0, + region_rects(&sna->mode.shadow_region), + region_num_rects(&sna->mode.shadow_region), + 0)) + ERR(("%s: copy failed\n", __FUNCTION__)); } if (priv->cow) @@ -1295,11 +1885,13 @@ static bool wait_for_shadow(struct sna *sna, sna_pixmap_unmap(pixmap, priv); DBG(("%s: setting front pixmap to handle=%d\n", __FUNCTION__, bo->handle)); + sna->mode.shadow->active_scanout--; tmp = priv->gpu_bo; priv->gpu_bo = bo; if (bo != sna->mode.shadow) kgem_bo_destroy(&sna->kgem, sna->mode.shadow); sna->mode.shadow = tmp; + sna->mode.shadow->active_scanout++; sna_dri2_pixmap_update_bo(sna, pixmap, bo); @@ -1311,6 +1903,9 @@ done: priv->move_to_gpu_data = NULL; priv->move_to_gpu = NULL; + assert(!sna->mode.shadow_wait); + flush_events(sna); + return ret; } @@ -1358,22 +1953,43 @@ bool sna_pixmap_discard_shadow_damage(struct sna_pixmap *priv, return RegionNil(&sna->mode.shadow_region); } +static void sna_mode_damage(DamagePtr damage, RegionPtr region, void *closure) +{ + struct sna *sna = closure; + + if (sna->mode.rr_active) + return; + + /* Throw away the rectangles if the region grows too big */ + region = DamageRegion(damage); + if (region->data) { + RegionRec dup; + + dup = *region; + RegionUninit(&dup); + + region->data = NULL; + } +} + static bool sna_mode_enable_shadow(struct sna *sna) { - ScreenPtr screen = sna->scrn->pScreen; + ScreenPtr screen = to_screen_from_sna(sna); DBG(("%s\n", __FUNCTION__)); assert(sna->mode.shadow == NULL); assert(sna->mode.shadow_damage == NULL); assert(sna->mode.shadow_active == 0); + assert(!sna->mode.shadow_enabled); - sna->mode.shadow_damage = DamageCreate(NULL, NULL, - DamageReportNone, TRUE, - screen, screen); + sna->mode.shadow_damage = DamageCreate(sna_mode_damage, NULL, + DamageReportRawRegion, + TRUE, screen, sna); if (!sna->mode.shadow_damage) return false; DamageRegister(&sna->front->drawable, sna->mode.shadow_damage); + sna->mode.shadow_enabled = true; return true; } @@ -1381,8 +1997,10 @@ static void sna_mode_disable_shadow(struct sna *sna) { struct sna_pixmap *priv; - if (!sna->mode.shadow_damage) + if (!sna->mode.shadow_damage) { + assert(!sna->mode.shadow_enabled); return; + } DBG(("%s\n", __FUNCTION__)); @@ -1393,8 +2011,10 @@ static void sna_mode_disable_shadow(struct sna *sna) DamageUnregister(&sna->front->drawable, sna->mode.shadow_damage); DamageDestroy(sna->mode.shadow_damage); sna->mode.shadow_damage = NULL; + sna->mode.shadow_enabled = false; if (sna->mode.shadow) { + sna->mode.shadow->active_scanout--; kgem_bo_destroy(&sna->kgem, sna->mode.shadow); sna->mode.shadow = NULL; } @@ -1413,7 +2033,7 @@ static void sna_crtc_slave_damage(DamagePtr damage, RegionPtr region, void *clos __FUNCTION__, region->extents.x1, region->extents.y1, region->extents.x2, region->extents.y2, region_num_rects(region), - crtc->pipe, crtc->base->x, crtc->base->y)); + __sna_crtc_pipe(crtc), crtc->base->x, crtc->base->y)); assert(crtc->slave_damage == damage); assert(sna->mode.shadow_damage); @@ -1431,7 +2051,7 @@ static bool sna_crtc_enable_shadow(struct sna *sna, struct sna_crtc *crtc) return true; } - DBG(("%s: enabling for crtc %d\n", __FUNCTION__, crtc->id)); + DBG(("%s: enabling for crtc %d\n", __FUNCTION__, __sna_crtc_id(crtc))); if (!sna->mode.shadow_active) { if (!sna_mode_enable_shadow(sna)) @@ -1443,9 +2063,12 @@ static bool sna_crtc_enable_shadow(struct sna *sna, struct sna_crtc *crtc) if (crtc->slave_pixmap) { assert(crtc->slave_damage == NULL); + DBG(("%s: enabling PRIME slave tracking on CRTC %d [pipe=%d], pixmap=%ld\n", + __FUNCTION__, __sna_crtc_id(crtc), __sna_crtc_pipe(crtc), crtc->slave_pixmap->drawable.serialNumber)); crtc->slave_damage = DamageCreate(sna_crtc_slave_damage, NULL, DamageReportRawRegion, TRUE, - sna->scrn->pScreen, crtc); + to_screen_from_sna(sna), + crtc); if (crtc->slave_damage == NULL) { if (!--sna->mode.shadow_active) sna_mode_disable_shadow(sna); @@ -1465,6 +2088,9 @@ static void sna_crtc_disable_override(struct sna *sna, struct sna_crtc *crtc) if (crtc->client_bo == NULL) return; + assert(crtc->client_bo->refcnt >= crtc->client_bo->active_scanout); + crtc->client_bo->active_scanout--; + if (!crtc->transform) { DrawableRec tmp; @@ -1489,7 +2115,7 @@ static void sna_crtc_disable_shadow(struct sna *sna, struct sna_crtc *crtc) if (!crtc->shadow) return; - DBG(("%s: disabling for crtc %d\n", __FUNCTION__, crtc->id)); + DBG(("%s: disabling for crtc %d\n", __FUNCTION__, __sna_crtc_id(crtc))); assert(sna->mode.shadow_active > 0); if (crtc->slave_damage) { @@ -1517,14 +2143,24 @@ __sna_crtc_disable(struct sna *sna, struct sna_crtc *sna_crtc) sna_crtc_disable_shadow(sna, sna_crtc); if (sna_crtc->bo) { + DBG(("%s: releasing handle=%d from scanout, active=%d\n", + __FUNCTION__,sna_crtc->bo->handle, sna_crtc->bo->active_scanout-1)); + assert(sna_crtc->flags & CRTC_ON); assert(sna_crtc->bo->active_scanout); assert(sna_crtc->bo->refcnt >= sna_crtc->bo->active_scanout); sna_crtc->bo->active_scanout--; kgem_bo_destroy(&sna->kgem, sna_crtc->bo); sna_crtc->bo = NULL; + sna_crtc->flags &= ~CRTC_ON; - assert(sna->mode.front_active); - sna->mode.front_active--; + if (sna->mode.hidden) { + sna->mode.hidden--; + assert(sna->mode.hidden); + assert(sna->mode.front_active == 0); + } else { + assert(sna->mode.front_active); + sna->mode.front_active--; + } sna->mode.dirty = true; } @@ -1532,13 +2168,19 @@ __sna_crtc_disable(struct sna *sna, struct sna_crtc *sna_crtc) kgem_bo_destroy(&sna->kgem, sna_crtc->shadow_bo); sna_crtc->shadow_bo = NULL; } - sna_crtc->transform = false; + if (sna_crtc->transform) { + assert(sna->mode.rr_active); + sna->mode.rr_active--; + sna_crtc->transform = false; + } + sna_crtc->cursor_transform = false; + sna_crtc->hwcursor = true; assert(!sna_crtc->shadow); } static void -sna_crtc_disable(xf86CrtcPtr crtc) +sna_crtc_disable(xf86CrtcPtr crtc, bool force) { struct sna *sna = to_sna(crtc->scrn); struct sna_crtc *sna_crtc = to_sna_crtc(crtc); @@ -1547,14 +2189,16 @@ sna_crtc_disable(xf86CrtcPtr crtc) if (sna_crtc == NULL) return; - DBG(("%s: disabling crtc [%d, pipe=%d]\n", __FUNCTION__, - sna_crtc->id, sna_crtc->pipe)); + if (!force && sna_crtc->bo == NULL) + return; + + DBG(("%s: disabling crtc [%d, pipe=%d], force?=%d\n", __FUNCTION__, + __sna_crtc_id(sna_crtc), __sna_crtc_pipe(sna_crtc), force)); sna_crtc_force_outputs_off(crtc); - assert(sna_crtc->dpms_mode == DPMSModeOff); memset(&arg, 0, sizeof(arg)); - arg.crtc_id = sna_crtc->id; + arg.crtc_id = __sna_crtc_id(sna_crtc); (void)drmIoctl(sna->kgem.fd, DRM_IOCTL_MODE_SETCRTC, &arg); __sna_crtc_disable(sna, sna_crtc); @@ -1574,19 +2218,19 @@ static void update_flush_interval(struct sna *sna) if (!crtc->enabled) { DBG(("%s: CRTC:%d (pipe %d) disabled\n", - __FUNCTION__,i, to_sna_crtc(crtc)->pipe)); + __FUNCTION__,i, sna_crtc_pipe(crtc))); assert(to_sna_crtc(crtc)->bo == NULL); continue; } - if (to_sna_crtc(crtc)->dpms_mode != DPMSModeOn) { + if (to_sna_crtc(crtc)->bo == NULL) { DBG(("%s: CRTC:%d (pipe %d) turned off\n", - __FUNCTION__,i, to_sna_crtc(crtc)->pipe)); + __FUNCTION__,i, sna_crtc_pipe(crtc))); continue; } DBG(("%s: CRTC:%d (pipe %d) vrefresh=%f\n", - __FUNCTION__, i, to_sna_crtc(crtc)->pipe, + __FUNCTION__, i, sna_crtc_pipe(crtc), xf86ModeVRefresh(&crtc->mode))); max_vrefresh = max(max_vrefresh, xf86ModeVRefresh(&crtc->mode)); } @@ -1642,7 +2286,7 @@ void sna_copy_fbcon(struct sna *sna) int dx, dy; int i; - if (wedged(sna)) + if (wedged(sna) || isGPU(sna->scrn)) return; DBG(("%s\n", __FUNCTION__)); @@ -1662,7 +2306,7 @@ void sna_copy_fbcon(struct sna *sna) assert(crtc != NULL); VG_CLEAR(mode); - mode.crtc_id = crtc->id; + mode.crtc_id = __sna_crtc_id(crtc); if (drmIoctl(sna->kgem.fd, DRM_IOCTL_MODE_GETCRTC, &mode)) continue; if (!mode.fb_id) @@ -1726,7 +2370,7 @@ void sna_copy_fbcon(struct sna *sna) kgem_bo_destroy(&sna->kgem, bo); #if ABI_VIDEODRV_VERSION >= SET_ABI_VERSION(10, 0) - sna->scrn->pScreen->canDoBGNoneRoot = ok; + to_screen_from_sna(sna)->canDoBGNoneRoot = ok; #endif } @@ -1736,7 +2380,6 @@ static bool use_shadow(struct sna *sna, xf86CrtcPtr crtc) PictTransform crtc_to_fb; struct pict_f_transform f_crtc_to_fb, f_fb_to_crtc; unsigned pitch_limit; - struct sna_pixmap *priv; BoxRec b; assert(sna->scrn->virtualX && sna->scrn->virtualY); @@ -1765,27 +2408,31 @@ static bool use_shadow(struct sna *sna, xf86CrtcPtr crtc) return true; } - priv = sna_pixmap_force_to_gpu(sna->front, MOVE_READ | __MOVE_SCANOUT); - if (priv == NULL) - return true; /* maybe we can create a bo for the scanout? */ - - if (sna->kgem.gen == 071) - pitch_limit = priv->gpu_bo->tiling ? 16 * 1024 : 32 * 1024; - else if ((sna->kgem.gen >> 3) > 4) - pitch_limit = 32 * 1024; - else if ((sna->kgem.gen >> 3) == 4) - pitch_limit = priv->gpu_bo->tiling ? 16 * 1024 : 32 * 1024; - else if ((sna->kgem.gen >> 3) == 3) - pitch_limit = priv->gpu_bo->tiling ? 8 * 1024 : 16 * 1024; - else - pitch_limit = 8 * 1024; - DBG(("%s: gpu bo handle=%d tiling=%d pitch=%d, limit=%d\n", __FUNCTION__, priv->gpu_bo->handle, priv->gpu_bo->tiling, priv->gpu_bo->pitch, pitch_limit)); - if (priv->gpu_bo->pitch > pitch_limit) - return true; + if (!isGPU(sna->scrn)) { + struct sna_pixmap *priv; - if (priv->gpu_bo->tiling && sna->flags & SNA_LINEAR_FB) { - DBG(("%s: gpu bo is tiled, need linear, forcing shadow\n", __FUNCTION__)); - return true; + priv = sna_pixmap_force_to_gpu(sna->front, MOVE_READ | __MOVE_SCANOUT); + if (priv == NULL) + return true; /* maybe we can create a bo for the scanout? */ + + if (sna->kgem.gen == 071) + pitch_limit = priv->gpu_bo->tiling ? 16 * 1024 : 32 * 1024; + else if ((sna->kgem.gen >> 3) > 4) + pitch_limit = 32 * 1024; + else if ((sna->kgem.gen >> 3) == 4) + pitch_limit = priv->gpu_bo->tiling ? 16 * 1024 : 32 * 1024; + else if ((sna->kgem.gen >> 3) == 3) + pitch_limit = priv->gpu_bo->tiling ? 8 * 1024 : 16 * 1024; + else + pitch_limit = 8 * 1024; + DBG(("%s: gpu bo handle=%d tiling=%d pitch=%d, limit=%d\n", __FUNCTION__, priv->gpu_bo->handle, priv->gpu_bo->tiling, priv->gpu_bo->pitch, pitch_limit)); + if (priv->gpu_bo->pitch > pitch_limit) + return true; + + if (priv->gpu_bo->tiling && sna->flags & SNA_LINEAR_FB) { + DBG(("%s: gpu bo is tiled, need linear, forcing shadow\n", __FUNCTION__)); + return true; + } } transform = NULL; @@ -1800,9 +2447,9 @@ static bool use_shadow(struct sna *sna, xf86CrtcPtr crtc) bool needs_transform = true; unsigned rotation = rotation_reduce(&to_sna_crtc(crtc)->primary, crtc->rotation); DBG(("%s: natively supported rotation? rotation=%x & supported=%x == %d\n", - __FUNCTION__, crtc->rotation, to_sna_crtc(crtc)->primary.rotation.supported, - !!(crtc->rotation & to_sna_crtc(crtc)->primary.rotation.supported))); - if (to_sna_crtc(crtc)->primary.rotation.supported & rotation) + __FUNCTION__, rotation, to_sna_crtc(crtc)->primary.rotation.supported, + rotation == (rotation & to_sna_crtc(crtc)->primary.rotation.supported))); + if ((to_sna_crtc(crtc)->primary.rotation.supported & rotation) == rotation) needs_transform = RRTransformCompute(crtc->x, crtc->y, crtc->mode.HDisplay, crtc->mode.VDisplay, RR_Rotate_0, transform, @@ -1839,6 +2486,7 @@ static void set_shadow(struct sna *sna, RegionPtr region) assert(priv->gpu_bo); assert(sna->mode.shadow); + assert(sna->mode.shadow->active_scanout); DBG(("%s: waiting for region %dx[(%d, %d), (%d, %d)], front handle=%d, shadow handle=%d\n", __FUNCTION__, @@ -1912,6 +2560,28 @@ get_scanout_bo(struct sna *sna, PixmapPtr pixmap) return priv->gpu_bo; } +static void shadow_clear(struct sna *sna, + PixmapPtr front, struct kgem_bo *bo, + xf86CrtcPtr crtc) +{ + bool ok = false; + if (!wedged(sna)) + ok = sna->render.fill_one(sna, front, bo, 0, + 0, 0, crtc->mode.HDisplay, crtc->mode.VDisplay, + GXclear); + if (!ok) { + void *ptr = kgem_bo_map__gtt(&sna->kgem, bo); + if (ptr) + memset(ptr, 0, bo->pitch * crtc->mode.HDisplay); + } + sna->mode.shadow_dirty = true; +} + +static bool rr_active(xf86CrtcPtr crtc) +{ + return crtc->transformPresent || crtc->rotation != RR_Rotate_0; +} + static struct kgem_bo *sna_crtc_attach(xf86CrtcPtr crtc) { struct sna_crtc *sna_crtc = to_sna_crtc(crtc); @@ -1919,10 +2589,15 @@ static struct kgem_bo *sna_crtc_attach(xf86CrtcPtr crtc) struct sna *sna = to_sna(scrn); struct kgem_bo *bo; - sna_crtc->transform = false; + if (sna_crtc->transform) { + assert(sna->mode.rr_active); + sna_crtc->transform = false; + sna->mode.rr_active--; + } sna_crtc->rotation = RR_Rotate_0; if (use_shadow(sna, crtc)) { + PixmapPtr front; unsigned long tiled_limit; int tiling; @@ -1949,6 +2624,10 @@ force_shadow: } tiling = I915_TILING_X; + if (crtc->rotation & (RR_Rotate_90 | RR_Rotate_270) && + sna->kgem.can_scanout_y) + tiling = I915_TILING_Y; + if (sna->kgem.gen == 071) tiled_limit = 16 * 1024 * 8; else if ((sna->kgem.gen >> 3) > 4) @@ -1977,8 +2656,8 @@ force_shadow: return NULL; } - if (__sna_pixmap_get_bo(sna->front) && !crtc->transformPresent) { - DrawableRec tmp; + front = sna_crtc->slave_pixmap ?: sna->front; + if (__sna_pixmap_get_bo(front) && !rr_active(crtc)) { BoxRec b; b.x1 = crtc->x; @@ -1986,28 +2665,48 @@ force_shadow: b.x2 = crtc->x + crtc->mode.HDisplay; b.y2 = crtc->y + crtc->mode.VDisplay; - DBG(("%s: copying onto shadow CRTC: (%d, %d)x(%d, %d), handle=%d\n", - __FUNCTION__, - b.x1, b.y1, - b.x2, b.y2, - bo->handle)); - - tmp.width = crtc->mode.HDisplay; - tmp.height = crtc->mode.VDisplay; - tmp.depth = sna->front->drawable.depth; - tmp.bitsPerPixel = sna->front->drawable.bitsPerPixel; - - (void)sna->render.copy_boxes(sna, GXcopy, - &sna->front->drawable, __sna_pixmap_get_bo(sna->front), 0, 0, - &tmp, bo, -b.x1, -b.y1, - &b, 1, 0); - } + if (b.x1 < 0) + b.x1 = 0; + if (b.y1 < 0) + b.y1 = 0; + if (b.x2 > scrn->virtualX) + b.x2 = scrn->virtualX; + if (b.y2 > scrn->virtualY) + b.y2 = scrn->virtualY; + if (b.x2 - b.x1 < crtc->mode.HDisplay || + b.y2 - b.y1 < crtc->mode.VDisplay) + shadow_clear(sna, front, bo, crtc); + + if (b.y2 > b.y1 && b.x2 > b.x1) { + DrawableRec tmp; + + DBG(("%s: copying onto shadow CRTC: (%d, %d)x(%d, %d) [fb=%dx%d], handle=%d\n", + __FUNCTION__, + b.x1, b.y1, + b.x2-b.x1, b.y2-b.y1, + scrn->virtualX, scrn->virtualY, + bo->handle)); + + tmp.width = crtc->mode.HDisplay; + tmp.height = crtc->mode.VDisplay; + tmp.depth = front->drawable.depth; + tmp.bitsPerPixel = front->drawable.bitsPerPixel; + + if (!sna->render.copy_boxes(sna, GXcopy, + &front->drawable, __sna_pixmap_get_bo(front), 0, 0, + &tmp, bo, -crtc->x, -crtc->y, + &b, 1, COPY_LAST)) + shadow_clear(sna, front, bo, crtc); + } + } else + shadow_clear(sna, front, bo, crtc); sna_crtc->shadow_bo_width = crtc->mode.HDisplay; sna_crtc->shadow_bo_height = crtc->mode.VDisplay; sna_crtc->shadow_bo = bo; out_shadow: sna_crtc->transform = true; + sna->mode.rr_active++; return kgem_bo_reference(bo); } else { if (sna_crtc->shadow_bo) { @@ -2048,26 +2747,26 @@ out_shadow: } if (sna->flags & SNA_TEAR_FREE) { + RegionRec region; + assert(sna_crtc->slave_pixmap == NULL); DBG(("%s: enabling TearFree shadow\n", __FUNCTION__)); + region.extents.x1 = 0; + region.extents.y1 = 0; + region.extents.x2 = sna->scrn->virtualX; + region.extents.y2 = sna->scrn->virtualY; + region.data = NULL; + if (!sna_crtc_enable_shadow(sna, sna_crtc)) { DBG(("%s: failed to enable crtc shadow\n", __FUNCTION__)); return NULL; } - if (sna->mode.shadow == NULL && !wedged(sna)) { - RegionRec region; + if (sna->mode.shadow == NULL) { struct kgem_bo *shadow; DBG(("%s: creating TearFree shadow bo\n", __FUNCTION__)); - - region.extents.x1 = 0; - region.extents.y1 = 0; - region.extents.x2 = sna->scrn->virtualX; - region.extents.y2 = sna->scrn->virtualY; - region.data = NULL; - shadow = kgem_create_2d(&sna->kgem, region.extents.x2, region.extents.y2, @@ -2093,9 +2792,12 @@ out_shadow: goto force_shadow; } + assert(__sna_pixmap_get_bo(sna->front) == NULL || + __sna_pixmap_get_bo(sna->front)->pitch == shadow->pitch); sna->mode.shadow = shadow; - set_shadow(sna, ®ion); + sna->mode.shadow->active_scanout++; } + set_shadow(sna, ®ion); sna_crtc_disable_override(sna, sna_crtc); } else @@ -2107,6 +2809,37 @@ out_shadow: } } +#define SCALING_EPSILON (1./256) + +static bool +is_affine(const struct pixman_f_transform *t) +{ + return (fabs(t->m[2][0]) < SCALING_EPSILON && + fabs(t->m[2][1]) < SCALING_EPSILON); +} + +static double determinant(const struct pixman_f_transform *t) +{ + return t->m[0][0]*t->m[1][1] - t->m[1][0]*t->m[0][1]; +} + +static bool +affine_is_pixel_exact(const struct pixman_f_transform *t) +{ + double det = t->m[2][2] * determinant(t); + if (fabs (det * det - 1.0) < SCALING_EPSILON) { + if (fabs(t->m[0][1]) < SCALING_EPSILON && + fabs(t->m[1][0]) < SCALING_EPSILON) + return true; + + if (fabs(t->m[0][0]) < SCALING_EPSILON && + fabs(t->m[1][1]) < SCALING_EPSILON) + return true; + } + + return false; +} + static void sna_crtc_randr(xf86CrtcPtr crtc) { struct sna_crtc *sna_crtc = to_sna_crtc(crtc); @@ -2152,6 +2885,25 @@ static void sna_crtc_randr(xf86CrtcPtr crtc) } else crtc->transform_in_use = sna_crtc->rotation != RR_Rotate_0; + /* Recompute the cursor after a potential change in transform */ + if (sna_crtc->cursor) { + assert(sna_crtc->cursor->ref > 0); + sna_crtc->cursor->ref--; + sna_crtc->cursor = NULL; + } + + if (needs_transform) { + sna_crtc->hwcursor = is_affine(&f_fb_to_crtc); + sna_crtc->cursor_transform = + sna_crtc->hwcursor && + !affine_is_pixel_exact(&f_fb_to_crtc); + } else { + sna_crtc->hwcursor = true; + sna_crtc->cursor_transform = false; + } + DBG(("%s: hwcursor?=%d, cursor_transform?=%d\n", + __FUNCTION__, sna_crtc->hwcursor, sna_crtc->cursor_transform)); + crtc->crtc_to_framebuffer = crtc_to_fb; crtc->f_crtc_to_framebuffer = f_crtc_to_fb; crtc->f_framebuffer_to_crtc = f_fb_to_crtc; @@ -2184,7 +2936,7 @@ static void sna_crtc_randr(xf86CrtcPtr crtc) static void sna_crtc_damage(xf86CrtcPtr crtc) { - ScreenPtr screen = crtc->scrn->pScreen; + ScreenPtr screen = xf86ScrnToScreen(crtc->scrn); struct sna *sna = to_sna(crtc->scrn); RegionRec region, *damage; @@ -2200,15 +2952,21 @@ sna_crtc_damage(xf86CrtcPtr crtc) if (region.extents.y2 > screen->height) region.extents.y2 = screen->height; + if (region.extents.x2 <= region.extents.x1 || + region.extents.y2 <= region.extents.y1) { + DBG(("%s: crtc not damaged, all-clipped\n", __FUNCTION__)); + return; + } + DBG(("%s: marking crtc %d as completely damaged (%d, %d), (%d, %d)\n", - __FUNCTION__, to_sna_crtc(crtc)->id, + __FUNCTION__, sna_crtc_id(crtc), region.extents.x1, region.extents.y1, region.extents.x2, region.extents.y2)); - to_sna_crtc(crtc)->client_damage = region; assert(sna->mode.shadow_damage && sna->mode.shadow_active); damage = DamageRegion(sna->mode.shadow_damage); RegionUnion(damage, damage, ®ion); + to_sna_crtc(crtc)->crtc_damage = region; DBG(("%s: damage now %dx[(%d, %d), (%d, %d)]\n", __FUNCTION__, @@ -2260,6 +3018,21 @@ static const char *reflection_to_str(Rotation rotation) } } +static void reprobe_connectors(xf86CrtcPtr crtc) +{ + xf86CrtcConfigPtr config = XF86_CRTC_CONFIG_PTR(crtc->scrn); + struct sna *sna = to_sna(crtc->scrn); + int i; + + for (i = 0; i < sna->mode.num_real_output; i++) { + xf86OutputPtr output = config->output[i]; + if (output->crtc == crtc) + to_sna_output(output)->reprobe = true; + } + + sna_mode_discover(sna, true); +} + static Bool __sna_crtc_set_mode(xf86CrtcPtr crtc) { @@ -2268,11 +3041,19 @@ __sna_crtc_set_mode(xf86CrtcPtr crtc) struct kgem_bo *saved_bo, *bo; uint32_t saved_offset; bool saved_transform; + bool saved_hwcursor; + bool saved_cursor_transform; + int ret; - DBG(("%s\n", __FUNCTION__)); + DBG(("%s: CRTC=%d, pipe=%d, hidden?=%d\n", __FUNCTION__, + __sna_crtc_id(sna_crtc), __sna_crtc_pipe(sna_crtc), sna->mode.hidden)); + if (sna->mode.hidden) + return TRUE; saved_bo = sna_crtc->bo; saved_transform = sna_crtc->transform; + saved_cursor_transform = sna_crtc->cursor_transform; + saved_hwcursor = sna_crtc->hwcursor; saved_offset = sna_crtc->offset; sna_crtc->fallback_shadow = false; @@ -2285,26 +3066,31 @@ retry: /* Attach per-crtc pixmap or direct */ } /* Prevent recursion when enabling outputs during execbuffer */ - if (bo->exec && RQ(bo->rq)->bo == NULL) + if (bo->exec && RQ(bo->rq)->bo == NULL) { _kgem_submit(&sna->kgem); + __kgem_bo_clear_dirty(bo); + } sna_crtc->bo = bo; - if (!sna_crtc_apply(crtc)) { - int err = errno; - + ret = sna_crtc_apply(crtc); + if (ret) { kgem_bo_destroy(&sna->kgem, bo); - if (!sna_crtc->shadow) { + if (!sna_crtc->fallback_shadow) { sna_crtc->fallback_shadow = true; goto retry; } xf86DrvMsg(crtc->scrn->scrnIndex, X_ERROR, - "failed to set mode: %s [%d]\n", strerror(err), err); + "failed to set mode: %s [%d]\n", strerror(ret), ret); goto error; } + sna_crtc->flags |= CRTC_ON; bo->active_scanout++; + DBG(("%s: marking handle=%d as active=%d (removing %d from scanout, active=%d)\n", + __FUNCTION__, bo->handle, bo->active_scanout, + saved_bo ? saved_bo->handle : 0, saved_bo ? saved_bo->active_scanout - 1: -1)); if (saved_bo) { assert(saved_bo->active_scanout); assert(saved_bo->refcnt >= saved_bo->active_scanout); @@ -2315,17 +3101,34 @@ retry: /* Attach per-crtc pixmap or direct */ sna_crtc_randr(crtc); if (sna_crtc->transform) sna_crtc_damage(crtc); + if (sna_crtc->cursor && /* Reload cursor if RandR maybe changed */ + (!sna_crtc->hwcursor || + saved_cursor_transform || sna_crtc->cursor_transform || + sna_crtc->cursor->rotation != crtc->rotation)) + sna_crtc_disable_cursor(sna, sna_crtc); + + assert(!sna->mode.hidden); sna->mode.front_active += saved_bo == NULL; sna->mode.dirty = true; - DBG(("%s: front_active=%d\n", __FUNCTION__, sna->mode.front_active)); + DBG(("%s: handle=%d, scanout_active=%d, front_active=%d\n", + __FUNCTION__, bo->handle, bo->active_scanout, sna->mode.front_active)); return TRUE; error: sna_crtc->offset = saved_offset; + if (sna_crtc->transform) { + assert(sna->mode.rr_active); + sna->mode.rr_active--; + } + if (saved_transform) + sna->mode.rr_active++; sna_crtc->transform = saved_transform; + sna_crtc->cursor_transform = saved_cursor_transform; + sna_crtc->hwcursor = saved_hwcursor; sna_crtc->bo = saved_bo; - sna_mode_discover(sna); + + reprobe_connectors(crtc); return FALSE; } @@ -2346,14 +3149,14 @@ sna_crtc_set_mode_major(xf86CrtcPtr crtc, DisplayModePtr mode, xf86DrvMsg(crtc->scrn->scrnIndex, X_INFO, "switch to mode %dx%d@%.1f on %s using pipe %d, position (%d, %d), rotation %s, reflection %s\n", mode->HDisplay, mode->VDisplay, xf86ModeVRefresh(mode), - outputs_for_crtc(crtc, outputs, sizeof(outputs)), sna_crtc->pipe, + outputs_for_crtc(crtc, outputs, sizeof(outputs)), __sna_crtc_pipe(sna_crtc), x, y, rotation_to_str(rotation), reflection_to_str(rotation)); assert(mode->HDisplay <= sna->mode.max_crtc_width && mode->VDisplay <= sna->mode.max_crtc_height); #if HAS_GAMMA - drmModeCrtcSetGamma(sna->kgem.fd, sna_crtc->id, + drmModeCrtcSetGamma(sna->kgem.fd, __sna_crtc_id(sna_crtc), crtc->gamma_size, crtc->gamma_red, crtc->gamma_green, @@ -2372,17 +3175,10 @@ sna_crtc_set_mode_major(xf86CrtcPtr crtc, DisplayModePtr mode, static void sna_crtc_dpms(xf86CrtcPtr crtc, int mode) { - struct sna_crtc *priv = to_sna_crtc(crtc); - DBG(("%s(pipe %d, dpms mode -> %d):= active=%d\n", - __FUNCTION__, priv->pipe, mode, mode == DPMSModeOn)); - if (priv->dpms_mode == mode) - return; - - assert(priv); - priv->dpms_mode = mode; + __FUNCTION__, sna_crtc_pipe(crtc), mode, mode == DPMSModeOn)); - if (mode == DPMSModeOn && crtc->enabled && priv->bo == NULL) { + if (mode == DPMSModeOn && crtc->enabled) { if (__sna_crtc_set_mode(crtc)) update_flush_interval(to_sna(crtc->scrn)); else @@ -2390,7 +3186,7 @@ sna_crtc_dpms(xf86CrtcPtr crtc, int mode) } if (mode != DPMSModeOn) - sna_crtc_disable(crtc); + sna_crtc_disable(crtc, false); } void sna_mode_adjust_frame(struct sna *sna, int x, int y) @@ -2426,7 +3222,7 @@ sna_crtc_gamma_set(xf86CrtcPtr crtc, { assert(to_sna_crtc(crtc)); drmModeCrtcSetGamma(to_sna(crtc->scrn)->kgem.fd, - to_sna_crtc(crtc)->id, + sna_crtc_id(crtc), size, red, green, blue); } @@ -2434,10 +3230,14 @@ static void sna_crtc_destroy(xf86CrtcPtr crtc) { struct sna_crtc *sna_crtc = to_sna_crtc(crtc); + struct plane *sprite, *sn; if (sna_crtc == NULL) return; + list_for_each_entry_safe(sprite, sn, &sna_crtc->sprites, link) + free(sprite); + free(sna_crtc); crtc->driver_private = NULL; } @@ -2455,7 +3255,7 @@ sna_crtc_set_scanout_pixmap(xf86CrtcPtr crtc, PixmapPtr pixmap) return TRUE; DBG(("%s: CRTC:%d, pipe=%d setting scanout pixmap=%ld\n", - __FUNCTION__, sna_crtc->id, sna_crtc->pipe, + __FUNCTION__, __sna_crtc_id(sna_crtc), __sna_crtc_pipe(sna_crtc), pixmap ? pixmap->drawable.serialNumber : 0)); /* Disable first so that we can unregister the damage tracking */ @@ -2576,6 +3376,10 @@ static int plane_details(struct sna *sna, struct plane *p) } } + p->rotation.supported &= DBG_NATIVE_ROTATION; + if (!xf86ReturnOptValBool(sna->Options, OPTION_ROTATION, TRUE)) + p->rotation.supported = RR_Rotate_0; + if (props != (uint32_t *)stack_props) free(props); @@ -2583,20 +3387,26 @@ static int plane_details(struct sna *sna, struct plane *p) return type; } +static void add_sprite_plane(struct sna_crtc *crtc, + struct plane *details) +{ + struct plane *sprite = malloc(sizeof(*sprite)); + if (!sprite) + return; + + memcpy(sprite, details, sizeof(*sprite)); + list_add(&sprite->link, &crtc->sprites); +} + static void sna_crtc_find_planes(struct sna *sna, struct sna_crtc *crtc) { #define LOCAL_IOCTL_SET_CAP DRM_IOWR(0x0d, struct local_set_cap) -#define LOCAL_IOCTL_MODE_GETPLANERESOURCES DRM_IOWR(0xb5, struct local_mode_get_plane_res) -#define LOCAL_IOCTL_MODE_GETPLANE DRM_IOWR(0xb6, struct local_mode_get_plane) struct local_set_cap { uint64_t name; uint64_t value; } cap; - struct local_mode_get_plane_res { - uint64_t plane_id_ptr; - uint64_t count_planes; - } r; + struct local_mode_get_plane_res r; uint32_t stack_planes[32]; uint32_t *planes = stack_planes; int i; @@ -2629,18 +3439,7 @@ sna_crtc_find_planes(struct sna *sna, struct sna_crtc *crtc) VG(VALGRIND_MAKE_MEM_DEFINED(planes, sizeof(uint32_t)*r.count_planes)); for (i = 0; i < r.count_planes; i++) { - struct local_mode_get_plane { - uint32_t plane_id; - - uint32_t crtc_id; - uint32_t fb_id; - - uint32_t possible_crtcs; - uint32_t gamma_size; - - uint32_t count_format_types; - uint64_t format_type_ptr; - } p; + struct local_mode_get_plane p; struct plane details; VG_CLEAR(p); @@ -2649,11 +3448,11 @@ sna_crtc_find_planes(struct sna *sna, struct sna_crtc *crtc) if (drmIoctl(sna->kgem.fd, LOCAL_IOCTL_MODE_GETPLANE, &p)) continue; - if ((p.possible_crtcs & (1 << crtc->pipe)) == 0) + if ((p.possible_crtcs & (1 << __sna_crtc_pipe(crtc))) == 0) continue; DBG(("%s: plane %d is attached to our pipe=%d\n", - __FUNCTION__, planes[i], crtc->pipe)); + __FUNCTION__, planes[i], __sna_crtc_pipe(crtc))); details.id = p.plane_id; details.rotation.prop = 0; @@ -2672,8 +3471,7 @@ sna_crtc_find_planes(struct sna *sna, struct sna_crtc *crtc) break; case DRM_PLANE_TYPE_OVERLAY: - if (crtc->sprite.id == 0) - crtc->sprite = details; + add_sprite_plane(crtc, &details); break; } } @@ -2688,7 +3486,6 @@ sna_crtc_init__rotation(struct sna *sna, struct sna_crtc *crtc) crtc->rotation = RR_Rotate_0; crtc->primary.rotation.supported = RR_Rotate_0; crtc->primary.rotation.current = RR_Rotate_0; - crtc->sprite.rotation = crtc->primary.rotation; } static void @@ -2698,55 +3495,55 @@ sna_crtc_init__cursor(struct sna *sna, struct sna_crtc *crtc) VG_CLEAR(arg); arg.flags = DRM_MODE_CURSOR_BO; - arg.crtc_id = crtc->id; + arg.crtc_id = __sna_crtc_id(crtc); arg.width = arg.height = 0; arg.handle = 0; (void)drmIoctl(sna->kgem.fd, DRM_IOCTL_MODE_CURSOR, &arg); + crtc->hwcursor = true; } static bool -sna_crtc_add(ScrnInfoPtr scrn, int id) +sna_crtc_add(ScrnInfoPtr scrn, unsigned id) { struct sna *sna = to_sna(scrn); xf86CrtcPtr crtc; struct sna_crtc *sna_crtc; struct drm_i915_get_pipe_from_crtc_id get_pipe; - DBG(("%s(%d)\n", __FUNCTION__, id)); + DBG(("%s(%d): is-zaphod? %d\n", __FUNCTION__, id, is_zaphod(scrn))); sna_crtc = calloc(sizeof(struct sna_crtc), 1); if (sna_crtc == NULL) return false; sna_crtc->id = id; - sna_crtc->dpms_mode = -1; VG_CLEAR(get_pipe); get_pipe.pipe = 0; - get_pipe.crtc_id = sna_crtc->id; + get_pipe.crtc_id = id; if (drmIoctl(sna->kgem.fd, DRM_IOCTL_I915_GET_PIPE_FROM_CRTC_ID, &get_pipe)) { free(sna_crtc); return false; } - sna_crtc->pipe = get_pipe.pipe; + assert((unsigned)get_pipe.pipe < 256); + sna_crtc->flags |= get_pipe.pipe << 8; if (is_zaphod(scrn) && - scrn->confScreen->device->screen != sna_crtc->pipe) { + (get_zaphod_crtcs(sna) & (1 << get_pipe.pipe)) == 0) { free(sna_crtc); return true; } + list_init(&sna_crtc->sprites); sna_crtc_init__rotation(sna, sna_crtc); - sna_crtc_find_planes(sna, sna_crtc); - DBG(("%s: CRTC:%d [pipe=%d], primary id=%x: supported-rotations=%x, current-rotation=%x, sprite id=%x: supported-rotations=%x, current-rotation=%x\n", - __FUNCTION__, sna_crtc->id, sna_crtc->pipe, - sna_crtc->primary.id, sna_crtc->primary.rotation.supported, sna_crtc->primary.rotation.current, - sna_crtc->sprite.id, sna_crtc->sprite.rotation.supported, sna_crtc->sprite.rotation.current)); + DBG(("%s: CRTC:%d [pipe=%d], primary id=%x: supported-rotations=%x, current-rotation=%x\n", + __FUNCTION__, id, get_pipe.pipe, + sna_crtc->primary.id, sna_crtc->primary.rotation.supported, sna_crtc->primary.rotation.current)); list_init(&sna_crtc->shadow_link); @@ -2761,7 +3558,7 @@ sna_crtc_add(ScrnInfoPtr scrn, int id) crtc->driver_private = sna_crtc; sna_crtc->base = crtc; DBG(("%s: attached crtc[%d] pipe=%d\n", - __FUNCTION__, id, sna_crtc->pipe)); + __FUNCTION__, id, __sna_crtc_pipe(sna_crtc))); return true; } @@ -2798,20 +3595,56 @@ find_property(struct sna *sna, struct sna_output *output, const char *name) return -1; } +static void update_properties(struct sna *sna, struct sna_output *output) +{ + union compat_mode_get_connector compat_conn; + struct drm_mode_modeinfo dummy; + + VG_CLEAR(compat_conn); + + compat_conn.conn.connector_id = output->id; + compat_conn.conn.count_props = output->num_props; + compat_conn.conn.props_ptr = (uintptr_t)output->prop_ids; + compat_conn.conn.prop_values_ptr = (uintptr_t)output->prop_values; + compat_conn.conn.count_modes = 1; /* skip detect */ + compat_conn.conn.modes_ptr = (uintptr_t)&dummy; + compat_conn.conn.count_encoders = 0; + + (void)drmIoctl(sna->kgem.fd, + DRM_IOCTL_MODE_GETCONNECTOR, + &compat_conn.conn); + + assert(compat_conn.conn.count_props == output->num_props); + output->update_properties = false; +} + static xf86OutputStatus sna_output_detect(xf86OutputPtr output) { struct sna *sna = to_sna(output->scrn); struct sna_output *sna_output = output->driver_private; union compat_mode_get_connector compat_conn; + uint32_t now; DBG(("%s(%s:%d)\n", __FUNCTION__, output->name, sna_output->id)); + sna_output->update_properties = false; if (!sna_output->id) { DBG(("%s(%s) hiding due to lost connection\n", __FUNCTION__, output->name)); return XF86OutputStatusDisconnected; } + /* Cache detections for 15s or hotplug event */ + now = GetTimeInMillis(); + if (sna_output->last_detect != 0 && + (int32_t)(now - sna_output->last_detect) <= OUTPUT_STATUS_CACHE_MS) { + DBG(("%s(%s) reporting cached status (since %dms): %d\n", + __FUNCTION__, output->name, now - sna_output->last_detect, + sna_output->status)); + sna_output->update_properties = true; + return sna_output->status; + } + VG_CLEAR(compat_conn); compat_conn.conn.connector_id = sna_output->id; sna_output->num_modes = compat_conn.conn.count_modes = 0; /* reprobe */ @@ -2854,15 +3687,23 @@ sna_output_detect(xf86OutputPtr output) DBG(("%s(%s): found %d modes, connection status=%d\n", __FUNCTION__, output->name, sna_output->num_modes, compat_conn.conn.connection)); + sna_output->reprobe = false; + sna_output->last_detect = now; switch (compat_conn.conn.connection) { case DRM_MODE_CONNECTED: - return XF86OutputStatusConnected; + sna_output->status = XF86OutputStatusConnected; + output->mm_width = compat_conn.conn.mm_width; + output->mm_height = compat_conn.conn.mm_height; + break; case DRM_MODE_DISCONNECTED: - return XF86OutputStatusDisconnected; + sna_output->status = XF86OutputStatusDisconnected; + break; default: case DRM_MODE_UNKNOWNCONNECTION: - return XF86OutputStatusUnknown; + sna_output->status = XF86OutputStatusUnknown; + break; } + return sna_output->status; } static Bool @@ -2895,6 +3736,27 @@ sna_output_mode_valid(xf86OutputPtr output, DisplayModePtr mode) return MODE_OK; } +static void sna_output_set_parsed_edid(xf86OutputPtr output, xf86MonPtr mon) +{ + unsigned conn_mm_width, conn_mm_height; + + /* We set the output size based on values from the kernel */ + conn_mm_width = output->mm_width; + conn_mm_height = output->mm_height; + + xf86OutputSetEDID(output, mon); + + if (output->mm_width != conn_mm_width || output->mm_height != conn_mm_height) { + DBG(("%s)%s): kernel and Xorg disagree over physical size: kernel=%dx%dmm, Xorg=%dx%dmm\n", + __FUNCTION__, output->name, + conn_mm_width, conn_mm_height, + output->mm_width, output->mm_height)); + } + + output->mm_width = conn_mm_width; + output->mm_height = conn_mm_height; +} + static void sna_output_attach_edid(xf86OutputPtr output) { @@ -2907,6 +3769,13 @@ sna_output_attach_edid(xf86OutputPtr output) if (sna_output->edid_idx == -1) return; + /* Always refresh the blob as the kernel may randomly update the + * id even if the contents of the blob doesn't change, and a + * request for the stale id will return nothing. + */ + if (sna_output->update_properties) + update_properties(sna, sna_output); + raw = sna_output->edid_raw; blob.length = sna_output->edid_len; @@ -2917,8 +3786,12 @@ sna_output_attach_edid(xf86OutputPtr output) old = NULL; blob.blob_id = sna_output->prop_values[sna_output->edid_idx]; - DBG(("%s: attaching EDID id=%d, current=%d\n", - __FUNCTION__, blob.blob_id, sna_output->edid_blob_id)); + if (!blob.blob_id) + goto done; + + DBG(("%s(%s): attaching EDID id=%d, current=%d\n", + __FUNCTION__, output->name, + blob.blob_id, sna_output->edid_blob_id)); if (blob.blob_id == sna_output->edid_blob_id && 0) { /* sigh */ if (output->MonInfo) { /* XXX the property keeps on disappearing... */ @@ -2936,26 +3809,45 @@ sna_output_attach_edid(xf86OutputPtr output) } blob.data = (uintptr_t)raw; - if (drmIoctl(sna->kgem.fd, DRM_IOCTL_MODE_GETPROPBLOB, &blob)) - goto done; + do { + while (drmIoctl(sna->kgem.fd, DRM_IOCTL_MODE_GETPROPBLOB, &blob)) { + update_properties(sna, sna_output); + if (blob.blob_id == sna_output->prop_values[sna_output->edid_idx]) { + DBG(("%s(%s): failed to read blob, reusing previous\n", + __FUNCTION__, output->name)); + goto done; + } + blob.blob_id = sna_output->prop_values[sna_output->edid_idx]; + } - DBG(("%s: retrieving blob id=%d, length=%d\n", - __FUNCTION__, blob.blob_id, blob.length)); + DBG(("%s(%s): retrieving blob id=%d, length=%d\n", + __FUNCTION__, output->name, blob.blob_id, blob.length)); - if (blob.length > sna_output->edid_len) { - raw = realloc(raw, blob.length); - if (raw == NULL) + if (blob.length < 128) goto done; - VG(memset(raw, 0, blob.length)); - blob.data = (uintptr_t)raw; - if (drmIoctl(sna->kgem.fd, DRM_IOCTL_MODE_GETPROPBLOB, &blob)) - goto done; + if (blob.length > sna_output->edid_len) { + raw = realloc(raw, blob.length); + if (raw == NULL) + goto done; + + VG(memset(raw, 0, blob.length)); + blob.data = (uintptr_t)raw; + } + } while (blob.length != sna_output->edid_len && + drmIoctl(sna->kgem.fd, DRM_IOCTL_MODE_GETPROPBLOB, &blob)); + + if (blob.length & 127) { + /* Truncated EDID! Make sure no one reads too far */ + *SECTION(NO_EDID, (uint8_t*)raw) = blob.length/128 - 1; + blob.length &= -128; } if (old && blob.length == sna_output->edid_len && memcmp(old, raw, blob.length) == 0) { + DBG(("%s(%s): EDID + MonInfo is unchanged\n", + __FUNCTION__, output->name)); assert(sna_output->edid_raw == raw); sna_output->edid_blob_id = blob.blob_id; RRChangeOutputProperty(output->randr_output, @@ -2974,31 +3866,186 @@ skip_read: mon->flags |= MONITOR_EDID_COMPLETE_RAWDATA; } -done: - xf86OutputSetEDID(output, mon); - if (raw) { - sna_output->edid_raw = raw; - sna_output->edid_len = blob.length; - sna_output->edid_blob_id = blob.blob_id; +done: + sna_output_set_parsed_edid(output, mon); + if (raw) { + sna_output->edid_raw = raw; + sna_output->edid_len = blob.length; + sna_output->edid_blob_id = blob.blob_id; + } +} + +static void +sna_output_attach_tile(xf86OutputPtr output) +{ +#if XF86_OUTPUT_VERSION >= 3 + struct sna *sna = to_sna(output->scrn); + struct sna_output *sna_output = output->driver_private; + struct drm_mode_get_blob blob; + struct xf86CrtcTileInfo tile_info, *set = NULL; + char *tile; + int id; + + id = find_property(sna, sna_output, "TILE"); + DBG(("%s: found? TILE=%d\n", __FUNCTION__, id)); + if (id == -1) + goto out; + + if (sna_output->update_properties) + update_properties(sna, sna_output); + + VG_CLEAR(blob); + blob.blob_id = sna_output->prop_values[id]; + blob.length = 0; + if (drmIoctl(sna->kgem.fd, DRM_IOCTL_MODE_GETPROPBLOB, &blob)) + goto out; + + do { + id = blob.length; + tile = alloca(id + 1); + blob.data = (uintptr_t)tile; + VG(memset(tile, 0, id)); + DBG(("%s: reading %d bytes for TILE blob\n", __FUNCTION__, id)); + if (drmIoctl(sna->kgem.fd, DRM_IOCTL_MODE_GETPROPBLOB, &blob)) + goto out; + } while (id != blob.length); + + tile[blob.length] = '\0'; /* paranoia */ + DBG(("%s: TILE='%s'\n", __FUNCTION__, tile)); + if (xf86OutputParseKMSTile(tile, blob.length, &tile_info)) + set = &tile_info; +out: + xf86OutputSetTile(output, set); +#endif +} + +static bool duplicate_mode(DisplayModePtr modes, DisplayModePtr m) +{ + if (m == NULL) + return false; + + while (modes) { + if (xf86ModesEqual(modes, m)) + return true; + + modes = modes->next; + } + + return false; +} + +static struct pixel_count { + int16_t width, height; +} common_16_9[] = { + { 640, 360 }, + { 720, 405 }, + { 864, 486 }, + { 960, 540 }, + { 1024, 576 }, + { 1280, 720 }, + { 1366, 768 }, + { 1600, 900 }, + { 1920, 1080 }, + { 2048, 1152 }, + { 2560, 1440 }, + { 2880, 1620 }, + { 3200, 1800 }, + { 3840, 2160 }, + { 4096, 2304 }, + { 5120, 2880 }, + { 7680, 4320 }, + { 15360, 8640 }, +}, common_16_10[] = { + { 1280, 800 }, + { 1400, 900 }, + { 1680, 1050 }, + { 1920, 1200 }, + { 2560, 1600 }, +}; + +static DisplayModePtr +default_modes(DisplayModePtr preferred) +{ + DisplayModePtr modes; + int n; + +#if XORG_VERSION_CURRENT >= XORG_VERSION_NUMERIC(1,6,99,900,0) + modes = xf86GetDefaultModes(); +#else + modes = xf86GetDefaultModes(0, 0); +#endif + + /* XXX O(n^2) mode list generation :( */ + +#if XORG_VERSION_CURRENT >= XORG_VERSION_NUMERIC(1,4,99,901,0) + if (preferred) { + DisplayModePtr m; + + /* Add a half-resolution mode useful for large panels */ + m = xf86GTFMode(preferred->HDisplay/2, + preferred->VDisplay/2, + xf86ModeVRefresh(preferred), + FALSE, FALSE); + if (!duplicate_mode(modes, m)) + modes = xf86ModesAdd(modes, m); + else + free(m); + + if (preferred->VDisplay * 16 > preferred->HDisplay*9 - preferred->HDisplay/32 && + preferred->VDisplay * 16 < preferred->HDisplay*9 + preferred->HDisplay/32) { + DBG(("Adding 16:9 modes -- %d < %d > %d\n", + preferred->HDisplay*9 - preferred->HDisplay/32, + preferred->VDisplay * 16, + preferred->HDisplay*9 + preferred->HDisplay/32)); + for (n = 0; n < ARRAY_SIZE(common_16_9); n++) { + if (preferred->HDisplay <= common_16_9[n].width || + preferred->VDisplay <= common_16_9[n].height) + break; + + m = xf86GTFMode(common_16_9[n].width, + common_16_9[n].height, + xf86ModeVRefresh(preferred), + FALSE, FALSE); + if (!duplicate_mode(modes, m)) + modes = xf86ModesAdd(modes, m); + else + free(m); + } + } + + if (preferred->VDisplay * 16 > preferred->HDisplay*10 - preferred->HDisplay/32 && + preferred->VDisplay * 16 < preferred->HDisplay*10 + preferred->HDisplay/32) { + DBG(("Adding 16:10 modes -- %d < %d > %d\n", + preferred->HDisplay*10 - preferred->HDisplay/32, + preferred->VDisplay * 16, + preferred->HDisplay*10 + preferred->HDisplay/32)); + for (n = 0; n < ARRAY_SIZE(common_16_10); n++) { + if (preferred->HDisplay <= common_16_10[n].width || + preferred->VDisplay <= common_16_10[n].height) + break; + + m = xf86GTFMode(common_16_10[n].width, + common_16_10[n].height, + xf86ModeVRefresh(preferred), + FALSE, FALSE); + if (!duplicate_mode(modes, m)) + modes = xf86ModesAdd(modes, m); + else + free(m); + } + } } -} - -static DisplayModePtr -default_modes(void) -{ -#if XORG_VERSION_CURRENT >= XORG_VERSION_NUMERIC(1,6,99,900,0) - return xf86GetDefaultModes(); -#else - return xf86GetDefaultModes(0, 0); #endif + + return modes; } static DisplayModePtr -sna_output_panel_edid(xf86OutputPtr output, DisplayModePtr modes) +sna_output_add_default_modes(xf86OutputPtr output, DisplayModePtr modes) { xf86MonPtr mon = output->MonInfo; DisplayModePtr i, m, preferred = NULL; - int max_x = 0, max_y = 0; + int max_x = 0, max_y = 0, max_clock = 0; float max_vrefresh = 0.0; if (mon && GTF_SUPPORTED(mon->features.msc)) @@ -3009,16 +4056,17 @@ sna_output_panel_edid(xf86OutputPtr output, DisplayModePtr modes) preferred = m; max_x = max(max_x, m->HDisplay); max_y = max(max_y, m->VDisplay); + max_clock = max(max_clock, m->Clock); max_vrefresh = max(max_vrefresh, xf86ModeVRefresh(m)); } - - max_vrefresh = max(max_vrefresh, 60.0); max_vrefresh *= (1 + SYNC_TOLERANCE); - m = default_modes(); + m = default_modes(preferred); xf86ValidateModesSize(output->scrn, m, max_x, max_y, 0); for (i = m; i; i = i->next) { + if (i->Clock > max_clock) + i->status = MODE_CLOCK_HIGH; if (xf86ModeVRefresh(i) > max_vrefresh) i->status = MODE_VSYNC; if (preferred && @@ -3034,28 +4082,47 @@ sna_output_panel_edid(xf86OutputPtr output, DisplayModePtr modes) } static DisplayModePtr +sna_output_override_edid(xf86OutputPtr output) +{ + struct sna_output *sna_output = output->driver_private; + + if (sna_output->fake_edid_mon == NULL) + return NULL; + + xf86OutputSetEDID(output, sna_output->fake_edid_mon); + return xf86DDCGetModes(output->scrn->scrnIndex, + sna_output->fake_edid_mon); +} + +static DisplayModePtr sna_output_get_modes(xf86OutputPtr output) { struct sna_output *sna_output = output->driver_private; - DisplayModePtr Modes = NULL, current = NULL; + DisplayModePtr Modes, current; int i; DBG(("%s(%s:%d)\n", __FUNCTION__, output->name, sna_output->id)); assert(sna_output->id); + Modes = sna_output_override_edid(output); + if (Modes) + return Modes; + sna_output_attach_edid(output); + sna_output_attach_tile(output); - if (output->crtc) { + current = NULL; + if (output->crtc && !sna_output->hotplug_count) { struct drm_mode_crtc mode; VG_CLEAR(mode); assert(to_sna_crtc(output->crtc)); - mode.crtc_id = to_sna_crtc(output->crtc)->id; + mode.crtc_id = sna_crtc_id(output->crtc); if (drmIoctl(to_sna(output->scrn)->kgem.fd, DRM_IOCTL_MODE_GETCRTC, &mode) == 0) { DBG(("%s: CRTC:%d, pipe=%d: has mode?=%d\n", __FUNCTION__, - to_sna_crtc(output->crtc)->id, - to_sna_crtc(output->crtc)->pipe, + sna_crtc_id(output->crtc), + sna_crtc_pipe(output->crtc), mode.mode_valid && mode.mode.clock)); if (mode.mode_valid && mode.mode.clock) { @@ -3117,7 +4184,7 @@ sna_output_get_modes(xf86OutputPtr output) } if (sna_output->add_default_modes) - Modes = sna_output_panel_edid(output, Modes); + Modes = sna_output_add_default_modes(output, Modes); return Modes; } @@ -3132,6 +4199,8 @@ sna_output_destroy(xf86OutputPtr output) return; free(sna_output->edid_raw); + free(sna_output->fake_edid_raw); + for (i = 0; i < sna_output->num_props; i++) { if (sna_output->props[i].kprop == NULL) continue; @@ -3155,7 +4224,7 @@ sna_output_destroy(xf86OutputPtr output) } static void -sna_output_dpms(xf86OutputPtr output, int dpms) +__sna_output_dpms(xf86OutputPtr output, int dpms, int fixup) { struct sna *sna = to_sna(output->scrn); struct sna_output *sna_output = output->driver_private; @@ -3182,8 +4251,9 @@ sna_output_dpms(xf86OutputPtr output, int dpms) if (sna_output->backlight.iface && dpms != DPMSModeOn) { if (old_dpms == DPMSModeOn) { sna_output->backlight_active_level = sna_output_backlight_get(output); - DBG(("%s: saving current backlight %d\n", - __FUNCTION__, sna_output->backlight_active_level)); + DBG(("%s(%s:%d): saving current backlight %d\n", + __FUNCTION__, output->name, sna_output->id, + sna_output->backlight_active_level)); } sna_output->dpms_mode = dpms; sna_output_backlight_off(sna_output); @@ -3193,18 +4263,31 @@ sna_output_dpms(xf86OutputPtr output, int dpms) drmModeConnectorSetProperty(sna->kgem.fd, sna_output->id, sna_output->dpms_id, - dpms)) - dpms = old_dpms; + dpms)) { + DBG(("%s(%s:%d): failed to set DPMS to %d (fixup? %d)\n", + __FUNCTION__, output->name, sna_output->id, dpms, fixup)); + if (fixup && dpms != DPMSModeOn) { + sna_crtc_disable(output->crtc, false); + return; + } + } if (sna_output->backlight.iface && dpms == DPMSModeOn) { - DBG(("%s: restoring previous backlight %d\n", - __FUNCTION__, sna_output->backlight_active_level)); + DBG(("%s(%d:%d: restoring previous backlight %d\n", + __FUNCTION__, output->name, sna_output->id, + sna_output->backlight_active_level)); sna_output_backlight_on(sna_output); } sna_output->dpms_mode = dpms; } +static void +sna_output_dpms(xf86OutputPtr output, int dpms) +{ + __sna_output_dpms(output, dpms, true); +} + static bool sna_property_ignore(drmModePropertyPtr prop) { @@ -3239,14 +4322,14 @@ sna_output_create_ranged_atom(xf86OutputPtr output, Atom *atom, err = RRConfigureOutputProperty(output->randr_output, *atom, FALSE, TRUE, immutable, 2, atom_range); if (err != 0) - xf86DrvMsg(output->scrn->scrnIndex, X_ERROR, + xf86DrvMsg(output->scrn->scrnIndex, X_WARNING, "RRConfigureOutputProperty error, %d\n", err); err = RRChangeOutputProperty(output->randr_output, *atom, XA_INTEGER, 32, PropModeReplace, 1, &value, FALSE, FALSE); if (err != 0) - xf86DrvMsg(output->scrn->scrnIndex, X_ERROR, + xf86DrvMsg(output->scrn->scrnIndex, X_WARNING, "RRChangeOutputProperty error, %d\n", err); } @@ -3303,7 +4386,7 @@ sna_output_create_resources(xf86OutputPtr output) p->kprop->flags & DRM_MODE_PROP_IMMUTABLE ? TRUE : FALSE, p->num_atoms - 1, (INT32 *)&p->atoms[1]); if (err != 0) { - xf86DrvMsg(output->scrn->scrnIndex, X_ERROR, + xf86DrvMsg(output->scrn->scrnIndex, X_WARNING, "RRConfigureOutputProperty error, %d\n", err); } @@ -3315,7 +4398,7 @@ sna_output_create_resources(xf86OutputPtr output) XA_ATOM, 32, PropModeReplace, 1, &p->atoms[j+1], FALSE, FALSE); if (err != 0) { - xf86DrvMsg(output->scrn->scrnIndex, X_ERROR, + xf86DrvMsg(output->scrn->scrnIndex, X_WARNING, "RRChangeOutputProperty error, %d\n", err); } } @@ -3385,18 +4468,19 @@ sna_output_set_property(xf86OutputPtr output, Atom property, if (value->type != XA_INTEGER || value->format != 32 || value->size != 1) return FALSE; - val = *(uint32_t *)value->data; + val = *(uint32_t *)value->data; drmModeConnectorSetProperty(sna->kgem.fd, sna_output->id, p->kprop->prop_id, (uint64_t)val); return TRUE; } else if (p->kprop->flags & DRM_MODE_PROP_ENUM) { - Atom atom; - const char *name; - int j; + Atom atom; + const char *name; + int j; if (value->type != XA_ATOM || value->format != 32 || value->size != 1) return FALSE; + memcpy(&atom, value->data, 4); name = NameForAtom(atom); if (name == NULL) @@ -3425,7 +4509,7 @@ static Bool sna_output_get_property(xf86OutputPtr output, Atom property) { struct sna_output *sna_output = output->driver_private; - int err; + int err, i, j; if (property == backlight_atom || property == backlight_deprecated_atom) { INT32 val; @@ -3449,7 +4533,7 @@ sna_output_get_property(xf86OutputPtr output, Atom property) XA_INTEGER, 32, PropModeReplace, 1, &val, FALSE, FALSE); if (err != 0) { - xf86DrvMsg(output->scrn->scrnIndex, X_ERROR, + xf86DrvMsg(output->scrn->scrnIndex, X_WARNING, "RRChangeOutputProperty error, %d\n", err); return FALSE; } @@ -3457,6 +4541,40 @@ sna_output_get_property(xf86OutputPtr output, Atom property) return TRUE; } + for (i = 0; i < sna_output->num_props; i++) { + struct sna_property *p = &sna_output->props[i]; + + if (p->atoms == NULL || p->atoms[0] != property) + continue; + + if (sna_output->update_properties && output->scrn->vtSema) + update_properties(to_sna(output->scrn), sna_output); + + err = 0; + if (p->kprop->flags & DRM_MODE_PROP_RANGE) { + err = RRChangeOutputProperty(output->randr_output, + property, XA_INTEGER, 32, + PropModeReplace, 1, + &sna_output->prop_values[i], + FALSE, FALSE); + } else if (p->kprop->flags & DRM_MODE_PROP_ENUM) { + for (j = 0; j < p->kprop->count_enums; j++) { + if (p->kprop->enums[j].value == sna_output->prop_values[i]) + break; + } + err = RRChangeOutputProperty(output->randr_output, + property, XA_ATOM, 32, + PropModeReplace, 1, + &p->atoms[j+1], + FALSE, FALSE); + } + + if (err != 0) + xf86DrvMsg(output->scrn->scrnIndex, X_WARNING, + "RRChangeOutputProperty error, %d\n", err); + return TRUE; + } + return FALSE; } @@ -3500,47 +4618,11 @@ static const char * const output_names[] = { /* DRM_MODE_CONNECTOR_TV */ "TV", /* DRM_MODE_CONNECTOR_eDP */ "eDP", /* DRM_MODE_CONNECTOR_VIRTUAL */ "Virtual", - /* DRM_MODE_CONNECTOR_DSI */ "DSI" + /* DRM_MODE_CONNECTOR_DSI */ "DSI", + /* DRM_MODE_CONNECTOR_DPI */ "DPI" }; static bool -sna_zaphod_match(const char *s, const char *output) -{ - char t[20]; - unsigned int i = 0; - - do { - /* match any outputs in a comma list, stopping at whitespace */ - switch (*s) { - case '\0': - t[i] = '\0'; - return strcmp(t, output) == 0; - - case ',': - t[i] ='\0'; - if (strcmp(t, output) == 0) - return TRUE; - i = 0; - break; - - case ' ': - case '\t': - case '\n': - case '\r': - break; - - default: - t[i++] = *s; - break; - } - - s++; - } while (i < sizeof(t)); - - return false; -} - -static bool output_ignored(ScrnInfoPtr scrn, const char *name) { char monitor_name[64]; @@ -3572,14 +4654,21 @@ gather_encoders(struct sna *sna, uint32_t id, int count, struct drm_mode_get_encoder enc; uint32_t *ids = NULL; + DBG(("%s(%d): expected count=%d\n", __FUNCTION__, id, count)); + VG_CLEAR(compat_conn); + VG_CLEAR(enc); memset(out, 0, sizeof(*out)); do { - free(ids); - ids = malloc(sizeof(*ids) * count); - if (ids == 0) + uint32_t *nids; + + nids = realloc(ids, sizeof(*ids) * count); + if (nids == NULL) { + free(ids); return false; + } + ids = nids; compat_conn.conn.connector_id = id; compat_conn.conn.count_props = 0; @@ -3593,12 +4682,14 @@ gather_encoders(struct sna *sna, uint32_t id, int count, compat_conn.conn.count_encoders = count = 0; } + VG(VALGRIND_MAKE_MEM_DEFINED(ids, sizeof(uint32_t)*compat_conn.conn.count_encoders)); if (count == compat_conn.conn.count_encoders) break; count = compat_conn.conn.count_encoders; } while (1); + DBG(("%s(%d): gathering %d encoders\n", __FUNCTION__, id, count)); for (count = 0; count < compat_conn.conn.count_encoders; count++) { enc.encoder_id = ids[count]; if (drmIoctl(sna->kgem.fd, DRM_IOCTL_MODE_GETENCODER, &enc)) { @@ -3606,6 +4697,8 @@ gather_encoders(struct sna *sna, uint32_t id, int count, count = 0; break; } + DBG(("%s(%d): encoder=%d, possible_crtcs=%x, possible_clones=%x\n", + __FUNCTION__, id, enc.encoder_id, enc.possible_crtcs, enc.possible_clones)); out->possible_crtcs |= enc.possible_crtcs; out->possible_clones |= enc.possible_clones; @@ -3731,6 +4824,116 @@ static int name_from_path(struct sna *sna, return 0; } +static char *fake_edid_name(xf86OutputPtr output) +{ + struct sna *sna = to_sna(output->scrn); + const char *str, *colon; + +#if XORG_VERSION_CURRENT >= XORG_VERSION_NUMERIC(1,7,99,901,0) + str = xf86GetOptValString(sna->Options, OPTION_EDID); +#else + str = NULL; +#endif + if (str == NULL) + return NULL; + + do { + colon = strchr(str, ':'); + if (colon == NULL) + return NULL; + + if (strncmp(str, output->name, colon-str) == 0 && + output->name[colon-str] == '\0') { + char *path; + int len; + + str = colon + 1; + colon = strchr(str, ','); + if (colon) + len = colon - str; + else + len = strlen(str); + + path = malloc(len + 1); + if (path == NULL) + return NULL; + + memcpy(path, str, len); + path[len] = '\0'; + return path; + } + + str = strchr(colon + 1, ','); + if (str == NULL) + return NULL; + + str++; + } while (1); +} + +static void +sna_output_load_fake_edid(xf86OutputPtr output) +{ + struct sna_output *sna_output = output->driver_private; + const char *filename; + FILE *file; + void *raw; + int size; + xf86MonPtr mon; + + filename = fake_edid_name(output); + if (filename == NULL) + return; + + file = fopen(filename, "rb"); + if (file == NULL) + goto err; + + fseek(file, 0, SEEK_END); + size = ftell(file); + if (size % 128) { + fclose(file); + goto err; + } + + raw = malloc(size); + if (raw == NULL) { + fclose(file); + free(raw); + goto err; + } + + fseek(file, 0, SEEK_SET); + if (fread(raw, size, 1, file) != 1) { + fclose(file); + free(raw); + goto err; + } + fclose(file); + + mon = xf86InterpretEDID(output->scrn->scrnIndex, raw); + if (mon == NULL) { + free(raw); + goto err; + } + + if (mon && size > 128) + mon->flags |= MONITOR_EDID_COMPLETE_RAWDATA; + + sna_output->fake_edid_mon = mon; + sna_output->fake_edid_raw = raw; + + xf86DrvMsg(output->scrn->scrnIndex, X_CONFIG, + "Loading EDID from \"%s\" for output %s\n", + filename, output->name); + return; + +err: + xf86DrvMsg(output->scrn->scrnIndex, X_ERROR, + "Could not read EDID file \"%s\" for output %s\n", + filename, output->name); +} + static int sna_output_add(struct sna *sna, unsigned id, unsigned serial) { @@ -3765,6 +4968,7 @@ sna_output_add(struct sna *sna, unsigned id, unsigned serial) return -1; } assert(compat_conn.conn.connector_id == id); + DBG(("%s(%d): has %d associated encoders\n", __FUNCTION__, id, compat_conn.conn.count_encoders)); if (compat_conn.conn.connector_type < ARRAY_SIZE(output_names)) output_name = output_names[compat_conn.conn.connector_type]; @@ -3813,34 +5017,43 @@ sna_output_add(struct sna *sna, unsigned id, unsigned serial) } if (is_zaphod(scrn)) { - const char *str; + unsigned zaphod_crtcs; - str = xf86GetOptValString(sna->Options, OPTION_ZAPHOD); - if (str && !sna_zaphod_match(str, name)) { - DBG(("%s: zaphod mismatch, want %s, have %s\n", __FUNCTION__, str, name)); + if (!sna_zaphod_match(sna, name)) { + DBG(("%s: zaphod mismatch, want %s, have %s\n", + __FUNCTION__, + xf86GetOptValString(sna->Options, OPTION_ZAPHOD) ?: "???", + name)); return 0; } - if ((possible_crtcs & (1 << scrn->confScreen->device->screen)) == 0) { - if (str) { - xf86DrvMsg(scrn->scrnIndex, X_ERROR, - "%s is an invalid output for screen (pipe) %d\n", - name, scrn->confScreen->device->screen); - return -1; - } else - return 0; + zaphod_crtcs = get_zaphod_crtcs(sna); + possible_crtcs &= zaphod_crtcs; + if (possible_crtcs == 0) { + xf86DrvMsg(scrn->scrnIndex, X_ERROR, + "%s is an invalid output for screen %d\n", + name, scrn->confScreen->device->screen); + return -1; } - possible_crtcs = 1; + possible_crtcs >>= ffs(zaphod_crtcs) - 1; } sna_output = calloc(sizeof(struct sna_output), 1); if (!sna_output) return -1; + sna_output->connector_type = compat_conn.conn.connector_type; + sna_output->connector_type_id = compat_conn.conn.connector_type_id; sna_output->num_props = compat_conn.conn.count_props; sna_output->prop_ids = malloc(sizeof(uint32_t)*compat_conn.conn.count_props); sna_output->prop_values = malloc(sizeof(uint64_t)*compat_conn.conn.count_props); + if (sna_output->prop_ids == NULL || sna_output->prop_values == NULL) { + free(sna_output->prop_ids); + free(sna_output->prop_values); + free(sna_output); + return -1; + } compat_conn.conn.count_encoders = 0; @@ -3865,16 +5078,16 @@ sna_output_add(struct sna *sna, unsigned id, unsigned serial) /* Construct name from topology, and recheck if output is acceptable */ path = name_from_path(sna, sna_output, name); if (path) { - const char *str; - if (output_ignored(scrn, name)) { len = 0; goto skip; } - str = xf86GetOptValString(sna->Options, OPTION_ZAPHOD); - if (str && !sna_zaphod_match(str, name)) { - DBG(("%s: zaphod mismatch, want %s, have %s\n", __FUNCTION__, str, name)); + if (is_zaphod(scrn) && !sna_zaphod_match(sna, name)) { + DBG(("%s: zaphod mismatch, want %s, have %s\n", + __FUNCTION__, + xf86GetOptValString(sna->Options, OPTION_ZAPHOD) ?: "???", + name)); len = 0; goto skip; } @@ -3889,7 +5102,6 @@ sna_output_add(struct sna *sna, unsigned id, unsigned serial) if (strcmp(output->name, name) == 0) { assert(output->scrn == scrn); assert(output->funcs == &sna_output_funcs); - assert(to_sna_output(output)->id == 0); sna_output_destroy(output); goto reset; } @@ -3935,6 +5147,8 @@ reset: sna_output->id = compat_conn.conn.connector_id; sna_output->is_panel = is_panel(compat_conn.conn.connector_type); sna_output->edid_idx = find_property(sna, sna_output, "EDID"); + sna_output->link_status_idx = + find_property(sna, sna_output, "link-status"); if (find_property(sna, sna_output, "scaling mode") != -1) sna_output->add_default_modes = xf86ReturnOptValBool(output->options, OPTION_DEFAULT_MODES, TRUE); @@ -3945,10 +5159,8 @@ reset: sna_output->dpms_mode = sna_output->prop_values[i]; DBG(("%s: found 'DPMS' (idx=%d, id=%d), initial value=%d\n", __FUNCTION__, i, sna_output->dpms_id, sna_output->dpms_mode)); - } else { - sna_output->dpms_id = -1; + } else sna_output->dpms_mode = DPMSModeOff; - } sna_output->possible_encoders = possible_encoders; sna_output->attached_encoders = attached_encoders; @@ -3963,12 +5175,13 @@ reset: sna_output->base = output; backlight_init(&sna_output->backlight); - if (sna_output->is_panel) - sna_output_backlight_init(output); + sna_output_backlight_init(output); output->possible_crtcs = possible_crtcs & count_to_mask(sna->mode.num_real_crtc); output->interlaceAllowed = TRUE; + sna_output_load_fake_edid(output); + if (serial) { if (output->randr_output == NULL) { output->randr_output = RROutputCreate(xf86ScrnToScreen(scrn), name, len, output); @@ -3976,6 +5189,7 @@ reset: goto cleanup; } + RROutputChanged(output->randr_output, TRUE); sna_output_create_resources(output); RRPostPendingProperties(output->randr_output); @@ -4009,38 +5223,6 @@ skip: return len; } -static void sna_output_del(xf86OutputPtr output) -{ - ScrnInfoPtr scrn = output->scrn; - xf86CrtcConfigPtr config = XF86_CRTC_CONFIG_PTR(scrn); - int i; - - DBG(("%s(%s)\n", __FUNCTION__, output->name)); - assert(to_sna_output(output)); - - RROutputDestroy(output->randr_output); - sna_output_destroy(output); - - while (output->probed_modes) - xf86DeleteMode(&output->probed_modes, output->probed_modes); - - free(output); - - for (i = 0; i < config->num_output; i++) - if (config->output[i] == output) - break; - assert(i < to_sna(scrn)->mode.num_real_output); - DBG(("%s: removing output #%d of %d\n", - __FUNCTION__, i, to_sna(scrn)->mode.num_real_output)); - - for (; i < config->num_output; i++) { - config->output[i] = config->output[i+1]; - config->output[i]->possible_clones >>= 1; - } - config->num_output--; - to_sna(scrn)->mode.num_real_output--; -} - static int output_rank(const void *A, const void *B) { const xf86OutputPtr *a = A; @@ -4058,6 +5240,7 @@ static void sort_config_outputs(struct sna *sna) { xf86CrtcConfigPtr config = XF86_CRTC_CONFIG_PTR(sna->scrn); qsort(config->output, sna->mode.num_real_output, sizeof(*config->output), output_rank); + config->compat_output = 0; /* make sure it is a sane value */ sna_mode_compute_possible_outputs(sna); } @@ -4080,11 +5263,15 @@ static bool disable_unused_crtc(struct sna *sna) bool update = false; int o, c; + DBG(("%s\n", __FUNCTION__)); + for (c = 0; c < sna->mode.num_real_crtc; c++) { xf86CrtcPtr crtc = config->crtc[c]; - if (!crtc->enabled) + if (!crtc->enabled) { + sna_crtc_disable(crtc, false); continue; + } for (o = 0; o < sna->mode.num_real_output; o++) { xf86OutputPtr output = config->output[o]; @@ -4094,7 +5281,7 @@ static bool disable_unused_crtc(struct sna *sna) if (o == sna->mode.num_real_output) { DBG(("%s: CRTC:%d was enabled with no outputs\n", - __FUNCTION__, to_sna_crtc(crtc)->id)); + __FUNCTION__, sna_crtc_id(crtc))); crtc->enabled = false; update = true; } @@ -4108,17 +5295,145 @@ static bool disable_unused_crtc(struct sna *sna) return update; } -void sna_mode_discover(struct sna *sna) +bool sna_mode_find_hotplug_connector(struct sna *sna, unsigned id) +{ + xf86CrtcConfigPtr config = XF86_CRTC_CONFIG_PTR(sna->scrn); + int i; + + for (i = 0; i < sna->mode.num_real_output; i++) { + struct sna_output *output = to_sna_output(config->output[i]); + if (output->id == id) { + output->reprobe = true; + return true; + } + } + + return false; +} + +static bool +output_retrain_link(struct sna *sna, struct sna_output *output) +{ + struct sna_crtc *crtc = to_sna_crtc(output->base->crtc); + int crtc_x = crtc->offset & 0xffff; + int crtc_y = crtc->offset >> 16; + + return sna_crtc_flip(sna, crtc, crtc->bo, crtc_x, crtc_y); +} + +static bool +output_check_link(struct sna *sna, struct sna_output *output) +{ + uint64_t link_status; + + if (!output->base->crtc) + return true; + + if (output->link_status_idx == -1) + return true; + +#define LINK_STATUS_GOOD 0 + link_status = output->prop_values[output->link_status_idx]; + DBG(("%s: link_status=%d\n", __FUNCTION__, link_status)); + if (link_status == LINK_STATUS_GOOD) + return true; + + /* Perform a modeset as required for "link-status" = BAD */ + if (!output_retrain_link(sna, output)) + return false; + + /* Query the "link-status" again to confirm the modeset */ + update_properties(sna, output); + + link_status = output->prop_values[output->link_status_idx]; + DBG(("%s: link_status=%d after modeset\n", __FUNCTION__, link_status)); + return link_status == LINK_STATUS_GOOD; +} + +static bool +output_check_status(struct sna *sna, struct sna_output *output) +{ + union compat_mode_get_connector compat_conn; + struct drm_mode_modeinfo dummy; + struct drm_mode_get_blob blob; + xf86OutputStatus status; + char *edid; + + VG_CLEAR(compat_conn); + + compat_conn.conn.connection = -1; + compat_conn.conn.connector_id = output->id; + compat_conn.conn.count_modes = 1; /* skip detect */ + compat_conn.conn.modes_ptr = (uintptr_t)&dummy; + compat_conn.conn.count_encoders = 0; + compat_conn.conn.props_ptr = (uintptr_t)output->prop_ids; + compat_conn.conn.prop_values_ptr = (uintptr_t)output->prop_values; + compat_conn.conn.count_props = output->num_props; + + if (drmIoctl(sna->kgem.fd, + DRM_IOCTL_MODE_GETCONNECTOR, + &compat_conn.conn) == 0) + output->update_properties = false; + + if (!output_check_link(sna, output)) + return false; + + if (output->reprobe) + return false; + + switch (compat_conn.conn.connection) { + case DRM_MODE_CONNECTED: + status = XF86OutputStatusConnected; + break; + case DRM_MODE_DISCONNECTED: + status = XF86OutputStatusDisconnected; + break; + default: + case DRM_MODE_UNKNOWNCONNECTION: + status = XF86OutputStatusUnknown; + break; + } + if (output->status != status) + return false; + + if (status != XF86OutputStatusConnected) + return true; + + if (output->num_modes != compat_conn.conn.count_modes) + return false; + + if (output->edid_len == 0) + return false; + + edid = alloca(output->edid_len); + + VG_CLEAR(blob); + blob.blob_id = output->prop_values[output->edid_idx]; + blob.length = output->edid_len; + blob.data = (uintptr_t)edid; + if (drmIoctl(sna->kgem.fd, DRM_IOCTL_MODE_GETPROPBLOB, &blob)) + return false; + + if (blob.length != output->edid_len) + return false; + + return memcmp(edid, output->edid_raw, output->edid_len) == 0; +} + +void sna_mode_discover(struct sna *sna, bool tell) { ScreenPtr screen = xf86ScrnToScreen(sna->scrn); xf86CrtcConfigPtr config = XF86_CRTC_CONFIG_PTR(sna->scrn); + bool force = sna->flags & SNA_REPROBE; struct drm_mode_card_res res; - uint32_t connectors[32]; + uint32_t connectors[32], now; unsigned changed = 0; unsigned serial; int i, j; DBG(("%s()\n", __FUNCTION__)); + sna->flags &= ~SNA_REPROBE; + VG_CLEAR(connectors); memset(&res, 0, sizeof(res)); @@ -4128,10 +5443,11 @@ void sna_mode_discover(struct sna *sna) if (drmIoctl(sna->kgem.fd, DRM_IOCTL_MODE_GETRESOURCES, &res)) return; - DBG(("%s: now %d (was %d) connectors\n", __FUNCTION__, - res.count_connectors, sna->mode.num_real_output)); + DBG(("%s: now %d (was %d) connectors, %d encoders, %d crtc\n", __FUNCTION__, + res.count_connectors, sna->mode.num_real_output, + res.count_encoders, res.count_crtcs)); if (res.count_connectors > 32) - return; + res.count_connectors = 32; assert(sna->mode.num_real_crtc == res.count_crtcs || is_zaphod(sna->scrn)); assert(sna->mode.max_crtc_width == res.max_width); @@ -4142,6 +5458,11 @@ void sna_mode_discover(struct sna *sna) if (serial == 0) serial = ++sna->mode.serial; + if (force) { + changed = 4; + now = 0; + } else + now = GetTimeInMillis(); for (i = 0; i < res.count_connectors; i++) { DBG(("%s: connector[%d] = %d\n", __FUNCTION__, i, connectors[i])); for (j = 0; j < sna->mode.num_real_output; j++) { @@ -4161,32 +5482,42 @@ void sna_mode_discover(struct sna *sna) for (i = 0; i < sna->mode.num_real_output; i++) { xf86OutputPtr output = config->output[i]; + struct sna_output *sna_output = to_sna_output(output); - if (to_sna_output(output)->id == 0) + if (sna_output->id == 0) continue; - if (to_sna_output(output)->serial == serial) + if (sna_output->serial == serial) { + if (output_check_status(sna, sna_output)) { + DBG(("%s: output %s (id=%d), retained state\n", + __FUNCTION__, output->name, sna_output->id)); + sna_output->last_detect = now; + } else { + DBG(("%s: output %s (id=%d), changed state, reprobing\n", + __FUNCTION__, output->name, sna_output->id)); + sna_output->hotplug_count++; + sna_output->last_detect = 0; + changed |= 4; + } continue; + } DBG(("%s: removing output %s (id=%d), serial=%u [now %u]\n", - __FUNCTION__, output->name, to_sna_output(output)->id, - to_sna_output(output)->serial, serial)); + __FUNCTION__, output->name, sna_output->id, + sna_output->serial, serial)); xf86DrvMsg(sna->scrn->scrnIndex, X_INFO, - "%s output %s\n", - sna->flags & SNA_REMOVE_OUTPUTS ? "Removed" : "Disabled", + "Disabled output %s\n", output->name); - if (sna->flags & SNA_REMOVE_OUTPUTS) { - sna_output_del(output); - i--; - } else { - to_sna_output(output)->id = 0; - output->crtc = NULL; - } + sna_output->id = 0; + sna_output->last_detect = 0; + output->crtc = NULL; + RROutputChanged(output->randr_output, TRUE); changed |= 2; } - if (changed) { + /* Have the list of available outputs been updated? */ + if (changed & 3) { DBG(("%s: outputs changed, broadcasting\n", __FUNCTION__)); sna_mode_set_primary(sna); @@ -4200,6 +5531,51 @@ void sna_mode_discover(struct sna *sna) xf86RandR12TellChanged(screen); } + + /* If anything has changed, refresh the RandR information. + * Note this could recurse once from udevless RRGetInfo() probes, + * but only once. + */ + if (changed && tell) + RRGetInfo(screen, TRUE); +} + +/* Since we only probe the current mode on startup, we may not have the full + * list of modes available until the user explicitly requests them. Fake a + * hotplug event after a second after starting to fill in any missing modes. + */ +static CARD32 sna_mode_coldplug(OsTimerPtr timer, CARD32 now, void *data) +{ + struct sna *sna = data; + ScreenPtr screen = xf86ScrnToScreen(sna->scrn); + xf86CrtcConfigPtr config = XF86_CRTC_CONFIG_PTR(sna->scrn); + bool reprobe = false; + int i; + + DBG(("%s()\n", __FUNCTION__)); + + for (i = 0; i < sna->mode.num_real_output; i++) { + xf86OutputPtr output = config->output[i]; + struct sna_output *sna_output = to_sna_output(output); + + if (sna_output->id == 0) + continue; + if (sna_output->last_detect) + continue; + if (output->status == XF86OutputStatusDisconnected) + continue; + + DBG(("%s: output %s connected, needs reprobe\n", + __FUNCTION__, output->name)); + reprobe = true; + } + + if (reprobe) { + RRGetInfo(screen, TRUE); + RRTellChanged(screen); + } + free(timer); + return 0; } static void copy_front(struct sna *sna, PixmapPtr old, PixmapPtr new) @@ -4208,7 +5584,7 @@ static void copy_front(struct sna *sna, PixmapPtr old, PixmapPtr new) DBG(("%s\n", __FUNCTION__)); - if (wedged(sna)) + if (wedged(sna) || isGPU(sna->scrn)) return; old_priv = sna_pixmap_force_to_gpu(old, MOVE_READ); @@ -4220,12 +5596,19 @@ static void copy_front(struct sna *sna, PixmapPtr old, PixmapPtr new) return; if (old_priv->clear) { - (void)sna->render.fill_one(sna, new, new_priv->gpu_bo, - old_priv->clear_color, - 0, 0, - new->drawable.width, - new->drawable.height, - GXcopy); + bool ok = false; + if (!wedged(sna)) + ok = sna->render.fill_one(sna, new, new_priv->gpu_bo, + old_priv->clear_color, + 0, 0, + new->drawable.width, + new->drawable.height, + GXcopy); + if (!ok) { + void *ptr = kgem_bo_map__gtt(&sna->kgem, new_priv->gpu_bo); + if (ptr) + memset(ptr, 0, new_priv->gpu_bo->pitch*new->drawable.height); + } new_priv->clear = true; new_priv->clear_color = old_priv->clear_color; } else { @@ -4281,11 +5664,18 @@ static void copy_front(struct sna *sna, PixmapPtr old, PixmapPtr new) __FUNCTION__, box.x2, box.y2, sx, sy, dx, dy)); if (box.x2 != new->drawable.width || box.y2 != new->drawable.height) { - (void)sna->render.fill_one(sna, new, new_priv->gpu_bo, 0, - 0, 0, - new->drawable.width, - new->drawable.height, - GXclear); + bool ok = false; + if (!wedged(sna)) + ok = sna->render.fill_one(sna, new, new_priv->gpu_bo, 0, + 0, 0, + new->drawable.width, + new->drawable.height, + GXclear); + if (!ok) { + void *ptr = kgem_bo_map__gtt(&sna->kgem, new_priv->gpu_bo); + if (ptr) + memset(ptr, 0, new_priv->gpu_bo->pitch*new->drawable.height); + } } (void)sna->render.copy_boxes(sna, GXcopy, &old->drawable, old_priv->gpu_bo, sx, sy, @@ -4302,7 +5692,7 @@ sna_mode_resize(ScrnInfoPtr scrn, int width, int height) { xf86CrtcConfigPtr config = XF86_CRTC_CONFIG_PTR(scrn); struct sna *sna = to_sna(scrn); - ScreenPtr screen = scrn->pScreen; + ScreenPtr screen = xf86ScrnToScreen(scrn); PixmapPtr new_front; int i; @@ -4337,9 +5727,20 @@ sna_mode_resize(ScrnInfoPtr scrn, int width, int height) for (i = 0; i < sna->mode.num_real_crtc; i++) sna_crtc_disable_shadow(sna, to_sna_crtc(config->crtc[i])); assert(sna->mode.shadow_active == 0); + assert(!sna->mode.shadow_enabled); assert(sna->mode.shadow_damage == NULL); assert(sna->mode.shadow == NULL); + /* Flush pending shadow updates */ + if (sna->mode.flip_active) { + DBG(("%s: waiting for %d outstanding TearFree flips\n", + __FUNCTION__, sna->mode.flip_active)); + while (sna->mode.flip_active && sna_mode_wait_for_event(sna)) + sna_mode_wakeup(sna); + } + + /* Cancel a pending [un]flip (as the pixmaps no longer match) */ + sna_present_cancel_flip(sna); copy_front(sna, sna->front, new_front); screen->SetScreenPixmap(new_front); @@ -4351,14 +5752,6 @@ sna_mode_resize(ScrnInfoPtr scrn, int width, int height) scrn->virtualY = height; scrn->displayWidth = width; - /* Flush pending shadow updates */ - if (sna->mode.flip_active) { - DBG(("%s: waiting for %d outstanding TearFree flips\n", - __FUNCTION__, sna->mode.flip_active)); - while (sna->mode.flip_active && sna_mode_wait_for_event(sna)) - sna_mode_wakeup(sna); - } - /* Only update the CRTCs if we are in control */ if (!scrn->vtSema) return TRUE; @@ -4371,7 +5764,7 @@ sna_mode_resize(ScrnInfoPtr scrn, int width, int height) continue; if (!__sna_crtc_set_mode(crtc)) - sna_crtc_disable(crtc); + sna_crtc_disable(crtc, false); } sna_mode_wakeup(sna); @@ -4381,19 +5774,6 @@ sna_mode_resize(ScrnInfoPtr scrn, int width, int height) } /* cursor handling */ -struct sna_cursor { - struct sna_cursor *next; - uint32_t *image; - Rotation rotation; - int ref; - int size; - int last_width; - int last_height; - unsigned handle; - unsigned serial; - unsigned alloc; -}; - static void rotate_coord(Rotation rotation, int size, int x_dst, int y_dst, @@ -4429,36 +5809,6 @@ rotate_coord(Rotation rotation, int size, *y_src = y_dst; } -static void -rotate_coord_back(Rotation rotation, int size, int *x, int *y) -{ - int t; - - if (rotation & RR_Reflect_X) - *x = size - *x - 1; - if (rotation & RR_Reflect_Y) - *y = size - *y - 1; - - switch (rotation & 0xf) { - case RR_Rotate_0: - break; - case RR_Rotate_90: - t = *x; - *x = *y; - *y = size - t - 1; - break; - case RR_Rotate_180: - *x = size - *x - 1; - *y = size - *y - 1; - break; - case RR_Rotate_270: - t = *x; - *x = size - *y - 1; - *y = t; - break; - } -} - static struct sna_cursor *__sna_create_cursor(struct sna *sna, int size) { struct sna_cursor *c; @@ -4519,6 +5869,17 @@ static uint32_t *get_cursor_argb(CursorPtr c) #endif } +static int __cursor_size(int width, int height) +{ + int i, size; + + i = MAX(width, height); + for (size = 64; size < i; size <<= 1) + ; + + return size; +} + static struct sna_cursor *__sna_get_cursor(struct sna *sna, xf86CrtcPtr crtc) { struct sna_cursor *cursor; @@ -4526,6 +5887,7 @@ static struct sna_cursor *__sna_get_cursor(struct sna *sna, xf86CrtcPtr crtc) const uint32_t *argb; uint32_t *image; int width, height, pitch, size, x, y; + bool transformed; Rotation rotation; assert(sna->cursor.ref); @@ -4537,8 +5899,8 @@ static struct sna_cursor *__sna_get_cursor(struct sna *sna, xf86CrtcPtr crtc) cursor ? cursor->serial : 0, sna->cursor.serial)); if (cursor && cursor->serial == sna->cursor.serial) { - assert(cursor->size == sna->cursor.size); - assert(cursor->rotation == crtc->transform_in_use ? crtc->rotation : RR_Rotate_0); + assert(cursor->size == sna->cursor.size || cursor->transformed); + assert(cursor->rotation == (!to_sna_crtc(crtc)->cursor_transform && crtc->transform_in_use) ? crtc->rotation : RR_Rotate_0); assert(cursor->ref); return cursor; } @@ -4550,22 +5912,81 @@ static struct sna_cursor *__sna_get_cursor(struct sna *sna, xf86CrtcPtr crtc) sna->cursor.serial, get_cursor_argb(sna->cursor.ref) != NULL)); - rotation = crtc->transform_in_use ? crtc->rotation : RR_Rotate_0; + transformed = to_sna_crtc(crtc)->cursor_transform; + rotation = (!transformed && crtc->transform_in_use) ? crtc->rotation : RR_Rotate_0; + + if (transformed) { + struct pixman_box16 box; + + box.x1 = box.y1 = 0; + box.x2 = sna->cursor.ref->bits->width; + box.y2 = sna->cursor.ref->bits->height; - if (sna->cursor.use_gtt) { /* Don't allow phys cursor sharing */ + pixman_f_transform_bounds(&crtc->f_crtc_to_framebuffer, &box); + size = __cursor_size(box.x2 - box.x1, box.y2 - box.y1); + __DBG(("%s: transformed cursor %dx%d -> %dx%d\n", + __FUNCTION__ , + sna->cursor.ref->bits->width, + sna->cursor.ref->bits->height, + box.x2 - box.x1, box.y2 - box.y1)); + } else + size = sna->cursor.size; + + if (crtc->transform_in_use) { + RRTransformPtr T = NULL; + struct pixman_vector v; + + if (crtc->transformPresent) { + T = &crtc->transform; + + /* Cancel any translation from this affine + * transformation. We just want to rotate and scale + * the cursor image. + */ + v.vector[0] = 0; + v.vector[1] = 0; + v.vector[2] = pixman_fixed_1; + pixman_transform_point(&crtc->transform.transform, &v); + } + + RRTransformCompute(0, 0, size, size, crtc->rotation, T, NULL, + &to_sna_crtc(crtc)->cursor_to_fb, + &to_sna_crtc(crtc)->fb_to_cursor); + if (T) + pixman_f_transform_translate( + &to_sna_crtc(crtc)->cursor_to_fb, + &to_sna_crtc(crtc)->fb_to_cursor, + -pixman_fixed_to_double(v.vector[0]), + -pixman_fixed_to_double(v.vector[1])); + + __DBG(("%s: cursor_to_fb [%f %f %f, %f %f %f, %f %f %f]\n", + __FUNCTION__, + to_sna_crtc(crtc)->cursor_to_fb.m[0][0], + to_sna_crtc(crtc)->cursor_to_fb.m[0][1], + to_sna_crtc(crtc)->cursor_to_fb.m[0][2], + to_sna_crtc(crtc)->cursor_to_fb.m[1][0], + to_sna_crtc(crtc)->cursor_to_fb.m[1][1], + to_sna_crtc(crtc)->cursor_to_fb.m[1][2], + to_sna_crtc(crtc)->cursor_to_fb.m[2][0], + to_sna_crtc(crtc)->cursor_to_fb.m[2][1], + to_sna_crtc(crtc)->cursor_to_fb.m[2][2])); + } + + /* Don't allow phys cursor sharing */ + if (sna->cursor.use_gtt && !transformed) { for (cursor = sna->cursor.cursors; cursor; cursor = cursor->next) { - if (cursor->serial == sna->cursor.serial && cursor->rotation == rotation) { + if (cursor->serial == sna->cursor.serial && + cursor->rotation == rotation && + !cursor->transformed) { __DBG(("%s: reusing handle=%d, serial=%d, rotation=%d, size=%d\n", __FUNCTION__, cursor->handle, cursor->serial, cursor->rotation, cursor->size)); assert(cursor->size == sna->cursor.size); return cursor; } } - - cursor = to_sna_crtc(crtc)->cursor; } - size = sna->cursor.size; + cursor = to_sna_crtc(crtc)->cursor; if (cursor && cursor->alloc < 4*size*size) cursor = NULL; @@ -4577,7 +5998,7 @@ static struct sna_cursor *__sna_get_cursor(struct sna *sna, xf86CrtcPtr crtc) } } - width = sna->cursor.ref->bits->width; + width = sna->cursor.ref->bits->width; height = sna->cursor.ref->bits->height; source = sna->cursor.ref->bits->source; mask = sna->cursor.ref->bits->mask; @@ -4585,7 +6006,7 @@ static struct sna_cursor *__sna_get_cursor(struct sna *sna, xf86CrtcPtr crtc) pitch = BitmapBytePad(width); image = cursor->image; - if (image == NULL) { + if (image == NULL || transformed) { image = sna->cursor.scratch; cursor->last_width = cursor->last_height = size; } @@ -4616,6 +6037,21 @@ static struct sna_cursor *__sna_get_cursor(struct sna *sna, xf86CrtcPtr crtc) mask += pitch; source += pitch; } + if (transformed) { + __DBG(("%s: Applying affine BLT to bitmap\n", __FUNCTION__)); + affine_blt(image, cursor->image, 32, + 0, 0, width, height, size * 4, + 0, 0, size, size, size * 4, + &to_sna_crtc(crtc)->cursor_to_fb); + image = cursor->image; + } + } else if (transformed) { + __DBG(("%s: Applying affine BLT to ARGB\n", __FUNCTION__)); + affine_blt(argb, cursor->image, 32, + 0, 0, width, height, width * 4, + 0, 0, size, size, size * 4, + &to_sna_crtc(crtc)->cursor_to_fb); + image = cursor->image; } else memcpy_blt(argb, image, 32, width * 4, size * 4, @@ -4662,9 +6098,16 @@ static struct sna_cursor *__sna_get_cursor(struct sna *sna, xf86CrtcPtr crtc) cursor->size = size; cursor->rotation = rotation; + cursor->transformed = transformed; cursor->serial = sna->cursor.serial; - cursor->last_width = width; - cursor->last_height = height; + if (transformed) { + /* mark the transformed rectangle as dirty, not input */ + cursor->last_width = size; + cursor->last_height = size; + } else { + cursor->last_width = width; + cursor->last_height = height; + } return cursor; } @@ -4674,40 +6117,55 @@ sna_realize_cursor(xf86CursorInfoPtr info, CursorPtr cursor) return NULL; } -#if XORG_VERSION_CURRENT >= XORG_VERSION_NUMERIC(1,12,99,901,0) -static inline int sigio_block(void) -{ - OsBlockSIGIO(); - return 0; -} -static inline void sigio_unblock(int was_blocked) +static void enable_fb_access(ScrnInfoPtr scrn, int state) { - OsReleaseSIGIO(); - (void)was_blocked; -} + scrn->EnableDisableFBAccess( +#ifdef XF86_HAS_SCRN_CONV + scrn, #else -#include -static inline int sigio_block(void) + scrn->scrnIndex, +#endif + state); +} + + +static void __restore_swcursor(ScrnInfoPtr scrn) { - return xf86BlockSIGIO(); + DBG(("%s: attempting to restore SW cursor\n", __FUNCTION__)); + enable_fb_access(scrn, FALSE); + enable_fb_access(scrn, TRUE); + + RemoveBlockAndWakeupHandlers((void *)__restore_swcursor, + (void *)NoopDDA, + scrn); } -static inline void sigio_unblock(int was_blocked) + +static void restore_swcursor(struct sna *sna) { - xf86UnblockSIGIO(was_blocked); + sna->cursor.info->HideCursor(sna->scrn); + + /* XXX Force the cursor to be restored (avoiding recursion) */ + FreeCursor(sna->cursor.ref, None); + sna->cursor.ref = NULL; + + RegisterBlockAndWakeupHandlers((void *)__restore_swcursor, + (void *)NoopDDA, + sna->scrn); } -#endif static void sna_show_cursors(ScrnInfoPtr scrn) { xf86CrtcConfigPtr xf86_config = XF86_CRTC_CONFIG_PTR(scrn); struct sna *sna = to_sna(scrn); + struct kmsg kmsg; int sigio, c; DBG(("%s: cursor?=%d\n", __FUNCTION__, sna->cursor.ref != NULL)); if (sna->cursor.ref == NULL) return; + kmsg_open(&kmsg); sigio = sigio_block(); for (c = 0; c < sna->mode.num_real_crtc; c++) { xf86CrtcPtr crtc = xf86_config->crtc[c]; @@ -4721,7 +6179,7 @@ sna_show_cursors(ScrnInfoPtr scrn) if (!crtc->cursor_in_range) { DBG(("%s: skipping cursor outside CRTC (pipe=%d)\n", - __FUNCTION__, sna_crtc->pipe)); + __FUNCTION__, sna_crtc_pipe(crtc))); continue; } @@ -4729,20 +6187,21 @@ sna_show_cursors(ScrnInfoPtr scrn) if (cursor == NULL || (sna_crtc->cursor == cursor && sna_crtc->last_cursor_size == cursor->size)) { DBG(("%s: skipping cursor already show on CRTC (pipe=%d)\n", - __FUNCTION__, sna_crtc->pipe)); + __FUNCTION__, sna_crtc_pipe(crtc))); continue; } DBG(("%s: CRTC pipe=%d, handle->%d\n", __FUNCTION__, - sna_crtc->pipe, cursor->handle)); + sna_crtc_pipe(crtc), cursor->handle)); VG_CLEAR(arg); arg.flags = DRM_MODE_CURSOR_BO; - arg.crtc_id = sna_crtc->id; + arg.crtc_id = __sna_crtc_id(sna_crtc); arg.width = arg.height = cursor->size; arg.handle = cursor->handle; - if (drmIoctl(sna->kgem.fd, DRM_IOCTL_MODE_CURSOR, &arg) == 0) { + if (!FAIL_CURSOR_IOCTL && + drmIoctl(sna->kgem.fd, DRM_IOCTL_MODE_CURSOR, &arg) == 0) { if (sna_crtc->cursor) { assert(sna_crtc->cursor->ref > 0); sna_crtc->cursor->ref--; @@ -4750,10 +6209,18 @@ sna_show_cursors(ScrnInfoPtr scrn) cursor->ref++; sna_crtc->cursor = cursor; sna_crtc->last_cursor_size = cursor->size; + } else { + ERR(("%s: failed to show cursor on CRTC:%d [pipe=%d], disabling hwcursor: errno=%d\n", + __FUNCTION__, sna_crtc_id(crtc), sna_crtc_pipe(crtc), errno)); + sna->cursor.disable = true; } } sigio_unblock(sigio); sna->cursor.active = true; + kmsg_close(&kmsg, sna->cursor.disable); + + if (unlikely(sna->cursor.disable)) + restore_swcursor(sna); } static void @@ -4789,24 +6256,45 @@ static void sna_crtc_disable_cursor(struct sna *sna, struct sna_crtc *crtc) { struct drm_mode_cursor arg; + int sigio; if (!crtc->cursor) return; - DBG(("%s: CRTC:%d, handle=%d\n", __FUNCTION__, crtc->id, crtc->cursor->handle)); - assert(crtc->cursor->ref); + sigio = sigio_block(); + if (crtc->cursor) { + DBG(("%s: CRTC:%d, handle=%d\n", __FUNCTION__, __sna_crtc_id(crtc), crtc->cursor->handle)); + assert(crtc->cursor->ref > 0); + crtc->cursor->ref--; + crtc->cursor = NULL; + crtc->last_cursor_size = 0; - VG_CLEAR(arg); - arg.flags = DRM_MODE_CURSOR_BO; - arg.crtc_id = crtc->id; - arg.width = arg.height = 0; - arg.handle = 0; + VG_CLEAR(arg); + arg.flags = DRM_MODE_CURSOR_BO; + arg.crtc_id = __sna_crtc_id(crtc); + arg.width = arg.height = 0; + arg.handle = 0; - (void)drmIoctl(sna->kgem.fd, DRM_IOCTL_MODE_CURSOR, &arg); - assert(crtc->cursor->ref > 0); - crtc->cursor->ref--; - crtc->cursor = NULL; - crtc->last_cursor_size = 0; + (void)drmIoctl(sna->kgem.fd, DRM_IOCTL_MODE_CURSOR, &arg); + } + sigio_unblock(sigio); +} + +static void +sna_disable_cursors(ScrnInfoPtr scrn) +{ + xf86CrtcConfigPtr xf86_config = XF86_CRTC_CONFIG_PTR(scrn); + struct sna *sna = to_sna(scrn); + int sigio, c; + + DBG(("%s\n", __FUNCTION__)); + + sigio = sigio_block(); + for (c = 0; c < sna->mode.num_real_crtc; c++) { + assert(to_sna_crtc(xf86_config->crtc[c])); + sna_crtc_disable_cursor(sna, to_sna_crtc(xf86_config->crtc[c])); + } + sigio_unblock(sigio); } static void @@ -4852,6 +6340,7 @@ sna_set_cursor_position(ScrnInfoPtr scrn, int x, int y) { xf86CrtcConfigPtr xf86_config = XF86_CRTC_CONFIG_PTR(scrn); struct sna *sna = to_sna(scrn); + struct kmsg kmsg; int sigio, c; __DBG(("%s(%d, %d), cursor? %d\n", __FUNCTION__, @@ -4859,6 +6348,7 @@ sna_set_cursor_position(ScrnInfoPtr scrn, int x, int y) if (sna->cursor.ref == NULL) return; + kmsg_open(&kmsg); sigio = sigio_block(); sna->cursor.last_x = x; sna->cursor.last_y = y; @@ -4876,27 +6366,37 @@ sna_set_cursor_position(ScrnInfoPtr scrn, int x, int y) VG_CLEAR(arg); arg.flags = 0; - arg.crtc_id = sna_crtc->id; + arg.crtc_id = __sna_crtc_id(sna_crtc); arg.handle = 0; if (sna_crtc->bo == NULL) goto disable; + cursor = __sna_get_cursor(sna, crtc); + if (cursor == NULL) + cursor = sna_crtc->cursor; + if (cursor == NULL) { + __DBG(("%s: failed to grab cursor, disabling\n", __FUNCTION__)); + goto disable; + } + if (crtc->transform_in_use) { int xhot = sna->cursor.ref->bits->xhot; int yhot = sna->cursor.ref->bits->yhot; - struct pict_f_vector v; + struct pict_f_vector v, hot; - v.v[0] = (x + xhot) + 0.5; - v.v[1] = (y + yhot) + 0.5; - v.v[2] = 1; + v.v[0] = x + xhot + .5; + v.v[1] = y + yhot + .5; + v.v[2] = 1.; pixman_f_transform_point(&crtc->f_framebuffer_to_crtc, &v); - rotate_coord_back(crtc->rotation, sna->cursor.size, &xhot, &yhot); + hot.v[0] = xhot; + hot.v[1] = yhot; + hot.v[2] = 1.; + pixman_f_transform_point(&sna_crtc->fb_to_cursor, &hot); - /* cursor will have 0.5 added to it already so floor is sufficent */ - arg.x = floor(v.v[0]) - xhot; - arg.y = floor(v.v[1]) - yhot; + arg.x = floor(v.v[0] - hot.v[0]); + arg.y = floor(v.v[1] - hot.v[1]); } else { arg.x = x - crtc->x; arg.y = y - crtc->y; @@ -4904,15 +6404,6 @@ sna_set_cursor_position(ScrnInfoPtr scrn, int x, int y) if (arg.x < crtc->mode.HDisplay && arg.x > -sna->cursor.size && arg.y < crtc->mode.VDisplay && arg.y > -sna->cursor.size) { - cursor = __sna_get_cursor(sna, crtc); - if (cursor == NULL) - cursor = sna_crtc->cursor; - if (cursor == NULL) { - __DBG(("%s: failed to grab cursor, disabling\n", - __FUNCTION__)); - goto disable; - } - if (sna_crtc->cursor != cursor || sna_crtc->last_cursor_size != cursor->size) { arg.flags |= DRM_MODE_CURSOR_BO; arg.handle = cursor->handle; @@ -4932,10 +6423,13 @@ disable: } __DBG(("%s: CRTC:%d (%d, %d), handle=%d, flags=%x (old cursor handle=%d), move? %d, update handle? %d\n", - __FUNCTION__, sna_crtc->id, arg.x, arg.y, arg.handle, arg.flags, sna_crtc->cursor ? sna_crtc->cursor->handle : 0, + __FUNCTION__, __sna_crtc_id(sna_crtc), arg.x, arg.y, arg.handle, arg.flags, sna_crtc->cursor ? sna_crtc->cursor->handle : 0, arg.flags & DRM_MODE_CURSOR_MOVE, arg.flags & DRM_MODE_CURSOR_BO)); - if (arg.flags && + if (arg.flags == 0) + continue; + + if (!FAIL_CURSOR_IOCTL && drmIoctl(sna->kgem.fd, DRM_IOCTL_MODE_CURSOR, &arg) == 0) { if (arg.flags & DRM_MODE_CURSOR_BO) { if (sna_crtc->cursor) { @@ -4949,9 +6443,21 @@ disable: } else sna_crtc->last_cursor_size = 0; } + } else { + ERR(("%s: failed to update cursor on CRTC:%d [pipe=%d], disabling hwcursor: errno=%d\n", + __FUNCTION__, sna_crtc_id(crtc), sna_crtc_pipe(crtc), errno)); + /* XXX How to force switch back to SW cursor? + * Right now we just want until the next cursor image + * change, which is fairly frequent. + */ + sna->cursor.disable = true; } } sigio_unblock(sigio); + kmsg_close(&kmsg, sna->cursor.disable); + + if (unlikely(sna->cursor.disable)) + restore_swcursor(sna); } #if XORG_VERSION_CURRENT >= XORG_VERSION_NUMERIC(1,15,99,902,2) @@ -4978,17 +6484,6 @@ sna_load_cursor_image(ScrnInfoPtr scrn, unsigned char *src) { } -static int __cursor_size(CursorPtr cursor) -{ - int i, size; - - i = MAX(cursor->bits->width, cursor->bits->height); - for (size = 64; size < i; size <<= 1) - ; - - return size; -} - static bool sna_cursor_preallocate(struct sna *sna) { @@ -5006,6 +6501,50 @@ sna_cursor_preallocate(struct sna *sna) return true; } +static bool +transformable_cursor(struct sna *sna, CursorPtr cursor) +{ + xf86CrtcConfigPtr config = XF86_CRTC_CONFIG_PTR(sna->scrn); + int i; + + for (i = 0; i < sna->mode.num_real_crtc; i++) { + xf86CrtcPtr crtc = config->crtc[i]; + struct pixman_box16 box; + int size; + + if (!to_sna_crtc(crtc)->hwcursor) { + DBG(("%s: hwcursor disabled on CRTC:%d [pipe=%d]\n", + __FUNCTION__, sna_crtc_id(crtc), sna_crtc_pipe(crtc))); + return false; + } + + if (!sna->cursor.use_gtt || !sna->cursor.scratch) { + DBG(("%s: unable to use GTT curosor access [%d] or no scratch [%d]\n", + __FUNCTION__, sna->cursor.use_gtt, sna->cursor.scratch)); + return false; + } + + box.x1 = box.y1 = 0; + box.x2 = cursor->bits->width; + box.y2 = cursor->bits->height; + + if (!pixman_f_transform_bounds(&crtc->f_crtc_to_framebuffer, + &box)) { + DBG(("%s: unable to transform bounds\n", __FUNCTION__)); + return false; + } + + size = __cursor_size(box.x2 - box.x1, box.y2 - box.y1); + if (size > sna->cursor.max_size) { + DBG(("%s: transformed cursor size=%d too large, max=%d\n", + __FUNCTION__, size, sna->cursor.max_size)); + return false; + } + } + + return true; +} + static Bool sna_use_hw_cursor(ScreenPtr screen, CursorPtr cursor) { @@ -5014,6 +6553,9 @@ sna_use_hw_cursor(ScreenPtr screen, CursorPtr cursor) DBG(("%s (%dx%d)?\n", __FUNCTION__, cursor->bits->width, cursor->bits->height)); + if (sna->cursor.disable) + return FALSE; + /* cursors are invariant */ if (cursor == sna->cursor.ref) return TRUE; @@ -5023,12 +6565,24 @@ sna_use_hw_cursor(ScreenPtr screen, CursorPtr cursor) sna->cursor.ref = NULL; } - sna->cursor.size = __cursor_size(cursor); - if (sna->cursor.size > sna->cursor.max_size) + sna->cursor.size = + __cursor_size(cursor->bits->width, cursor->bits->height); + if (sna->cursor.size > sna->cursor.max_size) { + DBG(("%s: cursor size=%d too large, max %d: using sw cursor\n", + __FUNCTION__, sna->cursor.size, sna->cursor.max_size)); return FALSE; + } + + if (sna->mode.rr_active && !transformable_cursor(sna, cursor)) { + DBG(("%s: RandR active [%d] and non-transformable cursor: using sw cursor\n", + __FUNCTION__, sna->mode.rr_active)); + return FALSE; + } - if (!sna_cursor_preallocate(sna)) + if (!sna_cursor_preallocate(sna)) { + DBG(("%s: cursor preallocation failed: using sw cursor\n", __FUNCTION__)); return FALSE; + } sna->cursor.ref = cursor; cursor->refcnt++; @@ -5056,8 +6610,12 @@ sna_cursor_pre_init(struct sna *sna) return; #define LOCAL_IOCTL_GET_CAP DRM_IOWR(0x0c, struct local_get_cap) -#define DRM_CAP_CURSOR_WIDTH 8 -#define DRM_CAP_CURSOR_HEIGHT 9 +#ifndef DRM_CAP_CURSOR_WIDTH +#define DRM_CAP_CURSOR_WIDTH 0x8 +#endif +#ifndef DRM_CAP_CURSOR_HEIGHT +#define DRM_CAP_CURSOR_HEIGHT 0x9 +#endif #define I915_PARAM_HAS_COHERENT_PHYS_GTT 29 @@ -5087,11 +6645,9 @@ sna_cursor_pre_init(struct sna *sna) DBG(("%s: cursor updates use_gtt?=%d\n", __FUNCTION__, sna->cursor.use_gtt)); - if (!sna->cursor.use_gtt) { - sna->cursor.scratch = malloc(sna->cursor.max_size * sna->cursor.max_size * 4); - if (!sna->cursor.scratch) - sna->cursor.max_size = 0; - } + sna->cursor.scratch = malloc(sna->cursor.max_size * sna->cursor.max_size * 4); + if (!sna->cursor.scratch && !sna->cursor.use_gtt) + sna->cursor.max_size = 0; sna->cursor.num_stash = -sna->mode.num_real_crtc; @@ -5193,7 +6749,7 @@ sna_crtc_flip(struct sna *sna, struct sna_crtc *crtc, struct kgem_bo *bo, int x, int output_count = 0; int i; - DBG(("%s CRTC:%d [pipe=%d], handle=%d\n", __FUNCTION__, crtc->id, crtc->pipe, bo->handle)); + DBG(("%s CRTC:%d [pipe=%d], handle=%d\n", __FUNCTION__, __sna_crtc_id(crtc), __sna_crtc_pipe(crtc), bo->handle)); assert(sna->mode.num_real_output < ARRAY_SIZE(output_ids)); assert(crtc->bo); @@ -5207,11 +6763,11 @@ sna_crtc_flip(struct sna *sna, struct sna_crtc *crtc, struct kgem_bo *bo, int x, DBG(("%s: attaching output '%s' %d [%d] to crtc:%d (pipe %d) (possible crtc:%x, possible clones:%x)\n", __FUNCTION__, output->name, i, to_connector_id(output), - crtc->id, crtc->pipe, + __sna_crtc_id(crtc), __sna_crtc_pipe(crtc), (uint32_t)output->possible_crtcs, (uint32_t)output->possible_clones)); - assert(output->possible_crtcs & (1 << crtc->pipe) || + assert(output->possible_crtcs & (1 << __sna_crtc_pipe(crtc)) || is_zaphod(sna->scrn)); output_ids[output_count] = to_connector_id(output); @@ -5221,7 +6777,7 @@ sna_crtc_flip(struct sna *sna, struct sna_crtc *crtc, struct kgem_bo *bo, int x, assert(output_count); VG_CLEAR(arg); - arg.crtc_id = crtc->id; + arg.crtc_id = __sna_crtc_id(crtc); arg.fb_id = fb_id(bo); assert(arg.fb_id); arg.x = x; @@ -5231,20 +6787,74 @@ sna_crtc_flip(struct sna *sna, struct sna_crtc *crtc, struct kgem_bo *bo, int x, arg.mode = crtc->kmode; arg.mode_valid = 1; - DBG(("%s: applying crtc [%d, pipe=%d] mode=%dx%d+%d+%d@%d, fb=%d across %d outputs [%d...]\n", - __FUNCTION__, crtc->id, crtc->pipe, - arg.mode.hdisplay, - arg.mode.vdisplay, - arg.x, arg.y, - arg.mode.clock, - arg.fb_id, - output_count, output_count ? output_ids[0] : 0)); + DBG(("%s: applying crtc [%d, pipe=%d] mode=%dx%d+%d+%d@%d, fb=%d across %d outputs [%d...]\n", + __FUNCTION__, __sna_crtc_id(crtc), __sna_crtc_pipe(crtc), + arg.mode.hdisplay, + arg.mode.vdisplay, + arg.x, arg.y, + arg.mode.clock, + arg.fb_id, + output_count, output_count ? output_ids[0] : 0)); + + if (drmIoctl(sna->kgem.fd, DRM_IOCTL_MODE_SETCRTC, &arg)) + return false; + + crtc->offset = y << 16 | x; + __kgem_bo_clear_dirty(bo); + return true; +} + +static void sna_mode_restore(struct sna *sna) +{ + xf86CrtcConfigPtr config = XF86_CRTC_CONFIG_PTR(sna->scrn); + int error = 0; + int i; + + assert(!sna->mode.hidden); + + for (i = 0; i < sna->mode.num_real_crtc; i++) { + xf86CrtcPtr crtc = config->crtc[i]; + + assert(to_sna_crtc(crtc) != NULL); + if (to_sna_crtc(crtc)->bo == NULL) + continue; + + assert(crtc->enabled); + if (!__sna_crtc_set_mode(crtc)) { + sna_crtc_disable(crtc, false); + error++; + } + } + sna_mode_wakeup(sna); + while (sna->mode.flip_active && sna_mode_wakeup(sna)) + ; + update_flush_interval(sna); + sna_cursors_reload(sna); + sna->mode.dirty = false; + + if (error) + xf86DrvMsg(sna->scrn->scrnIndex, X_ERROR, + "Failed to restore display configuration\n"); +} + +bool sna_needs_page_flip(struct sna *sna, struct kgem_bo *bo) +{ + xf86CrtcConfigPtr config = XF86_CRTC_CONFIG_PTR(sna->scrn); + int i; + + for (i = 0; i < sna->mode.num_real_crtc; i++) { + struct sna_crtc *crtc = config->crtc[i]->driver_private; + + if (crtc->bo == NULL) + continue; - if (drmIoctl(sna->kgem.fd, DRM_IOCTL_MODE_SETCRTC, &arg)) - return false; + if (crtc->bo == bo) + continue; - crtc->offset = y << 16 | x; - return true; + return true; + } + + return false; } int @@ -5256,6 +6866,7 @@ sna_page_flip(struct sna *sna, xf86CrtcConfigPtr config = XF86_CRTC_CONFIG_PTR(sna->scrn); const int width = sna->scrn->virtualX; const int height = sna->scrn->virtualY; + int sigio; int count = 0; int i; @@ -5263,23 +6874,26 @@ sna_page_flip(struct sna *sna, assert(bo->refcnt); assert((sna->flags & SNA_IS_HOSTED) == 0); - assert((sna->flags & SNA_TEAR_FREE) == 0); assert(sna->mode.flip_active == 0); assert(sna->mode.front_active); + assert(!sna->mode.hidden); assert(sna->scrn->vtSema); if ((sna->flags & (data ? SNA_HAS_FLIP : SNA_HAS_ASYNC_FLIP)) == 0) return 0; kgem_bo_submit(&sna->kgem, bo); + __kgem_bo_clear_dirty(bo); + sigio = sigio_block(); for (i = 0; i < sna->mode.num_real_crtc; i++) { struct sna_crtc *crtc = config->crtc[i]->driver_private; struct drm_mode_crtc_page_flip arg; uint32_t crtc_offset; + int fixup; DBG(("%s: crtc %d id=%d, pipe=%d active? %d\n", - __FUNCTION__, i, crtc->id, crtc->pipe, crtc->bo != NULL)); + __FUNCTION__, i, __sna_crtc_id(crtc), __sna_crtc_pipe(crtc), crtc->bo != NULL)); if (crtc->bo == NULL) continue; assert(!crtc->transform); @@ -5288,13 +6902,18 @@ sna_page_flip(struct sna *sna, assert(crtc->bo->refcnt >= crtc->bo->active_scanout); assert(crtc->flip_bo == NULL); - arg.crtc_id = crtc->id; + assert_crtc_fb(sna, crtc); + if (data == NULL && crtc->bo == bo) + goto next_crtc; + + arg.crtc_id = __sna_crtc_id(crtc); arg.fb_id = get_fb(sna, bo, width, height); if (arg.fb_id == 0) { assert(count == 0); - return 0; + break; } + fixup = 0; crtc_offset = crtc->base->y << 16 | crtc->base->x; if (bo->pitch != crtc->bo->pitch || crtc_offset != crtc->offset) { @@ -5303,7 +6922,12 @@ sna_page_flip(struct sna *sna, bo->pitch, crtc->bo->pitch, crtc_offset, crtc->offset)); fixup_flip: + fixup = 1; if (crtc->bo != bo && sna_crtc_flip(sna, crtc, bo, crtc->base->x, crtc->base->y)) { +update_scanout: + DBG(("%s: removing handle=%d [active_scanout=%d] from scanout, installing handle=%d [active_scanout=%d]\n", + __FUNCTION__, crtc->bo->handle, crtc->bo->active_scanout, + bo->handle, bo->active_scanout)); assert(crtc->bo->active_scanout); assert(crtc->bo->refcnt >= crtc->bo->active_scanout); crtc->bo->active_scanout--; @@ -5321,15 +6945,8 @@ fixup_flip: goto next_crtc; /* queue a flip in order to send the event */ - } else { - if (count && !xf86SetDesiredModes(sna->scrn)) { - xf86DrvMsg(sna->scrn->scrnIndex, X_ERROR, - "failed to restore display configuration\n"); - for (; i < sna->mode.num_real_crtc; i++) - sna_crtc_disable(config->crtc[i]); - } - return 0; - } + } else + goto error; } /* Only the reference crtc will finally deliver its page flip @@ -5346,7 +6963,7 @@ fixup_flip: retry_flip: DBG(("%s: crtc %d id=%d, pipe=%d --> fb %d\n", - __FUNCTION__, i, crtc->id, crtc->pipe, arg.fb_id)); + __FUNCTION__, i, __sna_crtc_id(crtc), __sna_crtc_pipe(crtc), arg.fb_id)); if (drmIoctl(sna->kgem.fd, DRM_IOCTL_MODE_PAGE_FLIP, &arg)) { ERR(("%s: pageflip failed with err=%d\n", __FUNCTION__, errno)); @@ -5354,7 +6971,7 @@ retry_flip: struct drm_mode_crtc mode; memset(&mode, 0, sizeof(mode)); - mode.crtc_id = crtc->id; + mode.crtc_id = __sna_crtc_id(crtc); drmIoctl(sna->kgem.fd, DRM_IOCTL_MODE_GETCRTC, &mode); DBG(("%s: crtc=%d, valid?=%d, fb attached?=%d, expected=%d\n", @@ -5366,7 +6983,7 @@ retry_flip: goto fixup_flip; if (count == 0) - return 0; + break; DBG(("%s: throttling on busy flip / waiting for kernel to catch up\n", __FUNCTION__)); drmIoctl(sna->kgem.fd, DRM_IOCTL_I915_GEM_THROTTLE, 0); @@ -5375,15 +6992,25 @@ retry_flip: goto retry_flip; } + if (!fixup) + goto fixup_flip; + +error: xf86DrvMsg(sna->scrn->scrnIndex, X_ERROR, - "page flipping failed, on CRTC:%d (pipe=%d), disabling %s page flips\n", - crtc->id, crtc->pipe, data ? "synchronous": "asynchronous"); + "page flipping failed, on CRTC:%d (pipe=%d), disabling %s page flips\n", + __sna_crtc_id(crtc), __sna_crtc_pipe(crtc), data ? "synchronous": "asynchronous"); + + if (count || crtc->bo == bo) + sna_mode_restore(sna); + sna->flags &= ~(data ? SNA_HAS_FLIP : SNA_HAS_ASYNC_FLIP); - goto fixup_flip; + count = 0; + break; } if (data) { assert(crtc->flip_bo == NULL); + assert(handler); crtc->flip_handler = handler; crtc->flip_data = data; crtc->flip_bo = kgem_bo_reference(bo); @@ -5391,11 +7018,15 @@ retry_flip: crtc->flip_serial = crtc->mode_serial; crtc->flip_pending = true; sna->mode.flip_active++; - } + DBG(("%s: recording flip on CRTC:%d handle=%d, active_scanout=%d, serial=%d\n", + __FUNCTION__, __sna_crtc_id(crtc), crtc->flip_bo->handle, crtc->flip_bo->active_scanout, crtc->flip_serial)); + } else + goto update_scanout; next_crtc: count++; } + sigio_unblock(sigio); DBG(("%s: page flipped %d crtcs\n", __FUNCTION__, count)); return count; @@ -5471,7 +7102,7 @@ static void crtc_init_gamma(xf86CrtcPtr crtc) assert(sna_crtc); - lut.crtc_id = sna_crtc->id; + lut.crtc_id = __sna_crtc_id(sna_crtc); lut.gamma_size = 256; lut.red = (uintptr_t)(gamma); lut.green = (uintptr_t)(gamma + 256); @@ -5485,7 +7116,7 @@ static void crtc_init_gamma(xf86CrtcPtr crtc) } DBG(("%s: CRTC:%d, pipe=%d: gamma set?=%d\n", - __FUNCTION__, sna_crtc->id, sna_crtc->pipe, + __FUNCTION__, __sna_crtc_id(sna_crtc), __sna_crtc_pipe(sna_crtc), gamma_set)); if (!gamma_set) { int i; @@ -5502,6 +7133,7 @@ static void crtc_init_gamma(xf86CrtcPtr crtc) crtc->gamma_red = gamma; crtc->gamma_green = gamma + 256; crtc->gamma_blue = gamma + 2*256; + crtc->gamma_size = 256; } } } @@ -5528,6 +7160,7 @@ static bool sna_probe_initial_configuration(struct sna *sna) { ScrnInfoPtr scrn = sna->scrn; xf86CrtcConfigPtr config = XF86_CRTC_CONFIG_PTR(scrn); + int crtc_active, crtc_enabled; int width, height; int i, j; @@ -5565,6 +7198,7 @@ static bool sna_probe_initial_configuration(struct sna *sna) } /* Copy the existing modes on each CRTCs */ + crtc_active = crtc_enabled = 0; for (i = 0; i < sna->mode.num_real_crtc; i++) { xf86CrtcPtr crtc = config->crtc[i]; struct sna_crtc *sna_crtc = to_sna_crtc(crtc); @@ -5577,12 +7211,12 @@ static bool sna_probe_initial_configuration(struct sna *sna) /* Retrieve the current mode */ VG_CLEAR(mode); - mode.crtc_id = sna_crtc->id; + mode.crtc_id = __sna_crtc_id(sna_crtc); if (drmIoctl(sna->kgem.fd, DRM_IOCTL_MODE_GETCRTC, &mode)) continue; DBG(("%s: CRTC:%d, pipe=%d: has mode?=%d\n", __FUNCTION__, - sna_crtc->id, sna_crtc->pipe, + __sna_crtc_id(sna_crtc), __sna_crtc_pipe(sna_crtc), mode.mode_valid && mode.mode.clock)); if (!mode.mode_valid || mode.mode.clock == 0) @@ -5593,6 +7227,7 @@ static bool sna_probe_initial_configuration(struct sna *sna) crtc->desiredX = mode.x; crtc->desiredY = mode.y; crtc->desiredTransformPresent = FALSE; + crtc_active++; } /* Reconstruct outputs pointing to active CRTC */ @@ -5604,6 +7239,7 @@ static bool sna_probe_initial_configuration(struct sna *sna) crtc_id = (uintptr_t)output->crtc; output->crtc = NULL; + output->status = XF86OutputStatusUnknown; if (sna->flags & SNA_IS_SLAVED) continue; @@ -5623,7 +7259,7 @@ static bool sna_probe_initial_configuration(struct sna *sna) xf86CrtcPtr crtc = config->crtc[j]; assert(to_sna_crtc(crtc)); - if (to_sna_crtc(crtc)->id != crtc_id) + if (sna_crtc_id(crtc) != crtc_id) continue; if (crtc->desiredMode.status == MODE_OK) { @@ -5641,18 +7277,30 @@ static bool sna_probe_initial_configuration(struct sna *sna) "Output %s using initial mode %s on pipe %d\n", output->name, crtc->desiredMode.name, - to_sna_crtc(crtc)->pipe); + sna_crtc_pipe(crtc)); output->crtc = crtc; + output->status = XF86OutputStatusConnected; crtc->enabled = TRUE; + crtc_enabled++; + + output_set_gamma(output, crtc); + + if (output->conf_monitor) { + output->mm_width = output->conf_monitor->mon_width; + output->mm_height = output->conf_monitor->mon_height; + } + +#if 0 + sna_output_attach_edid(output); + sna_output_attach_tile(output); +#endif if (output->mm_width == 0 || output->mm_height == 0) { output->mm_height = (crtc->desiredMode.VDisplay * 254) / (10*DEFAULT_DPI); output->mm_width = (crtc->desiredMode.HDisplay * 254) / (10*DEFAULT_DPI); } - output_set_gamma(output, crtc); - M = calloc(1, sizeof(DisplayModeRec)); if (M) { *M = crtc->desiredMode; @@ -5673,6 +7321,12 @@ static bool sna_probe_initial_configuration(struct sna *sna) } } + if (crtc_active != crtc_enabled) { + DBG(("%s: only enabled %d out of %d active CRTC, forcing a reconfigure\n", + __FUNCTION__, crtc_enabled, crtc_active)); + return false; + } + width = height = 0; for (i = 0; i < sna->mode.num_real_crtc; i++) { xf86CrtcPtr crtc = config->crtc[i]; @@ -5707,8 +7361,8 @@ static bool sna_probe_initial_configuration(struct sna *sna) if (sna_output->num_modes == 0) continue; - width = sna_output->modes[0].hdisplay; - height= sna_output->modes[0].vdisplay; + width = sna_output->modes[0].hdisplay; + height = sna_output->modes[0].vdisplay; DBG(("%s: panel '%s' is %dx%d\n", __FUNCTION__, output->name, width, height)); @@ -5788,7 +7442,7 @@ probe_capabilities(struct sna *sna) sna->flags &= ~(SNA_HAS_FLIP | SNA_HAS_ASYNC_FLIP); if (has_flip(sna)) sna->flags |= SNA_HAS_FLIP; - if (has_flip__async(sna)) + if (has_flip__async(sna) && (sna->flags & SNA_TEAR_FREE) == 0) sna->flags |= SNA_HAS_ASYNC_FLIP; DBG(("%s: page flips? %s, async? %s\n", __FUNCTION__, sna->flags & SNA_HAS_FLIP ? "enabled" : "disabled", @@ -5813,12 +7467,25 @@ sna_crtc_config_notify(ScreenPtr screen) return; } + /* Flush any events completed by the modeset */ + sna_mode_wakeup(sna); + update_flush_interval(sna); + sna->cursor.disable = false; /* Reset HW cursor until the next fail */ sna_cursors_reload(sna); probe_capabilities(sna); sna_present_update(sna); + /* Allow TearFree to come back on when everything is off */ + if (!sna->mode.front_active && sna->flags & SNA_WANT_TEAR_FREE) { + if ((sna->flags & SNA_TEAR_FREE) == 0) + DBG(("%s: enable TearFree next modeset\n", + __FUNCTION__)); + + sna->flags |= SNA_TEAR_FREE; + } + sna->mode.dirty = false; } @@ -5840,6 +7507,7 @@ bool sna_mode_pre_init(ScrnInfoPtr scrn, struct sna *sna) } probe_capabilities(sna); + sna->mode.hidden = 1; if (!xf86GetOptValInteger(sna->Options, OPTION_VIRTUAL, &num_fake)) num_fake = 1; @@ -5855,6 +7523,9 @@ bool sna_mode_pre_init(ScrnInfoPtr scrn, struct sna *sna) if (res) { xf86CrtcConfigPtr xf86_config; + DBG(("%s: found %d CRTC, %d encoders, %d connectors\n", + __FUNCTION__, res->count_crtcs, res->count_encoders, res->count_connectors)); + assert(res->count_crtcs); assert(res->count_connectors); @@ -5862,6 +7533,7 @@ bool sna_mode_pre_init(ScrnInfoPtr scrn, struct sna *sna) xf86_config = XF86_CRTC_CONFIG_PTR(scrn); xf86_config->xf86_crtc_notify = sna_crtc_config_notify; + xf86_config->compat_output = 0; for (i = 0; i < res->count_crtcs; i++) if (!sna_crtc_add(scrn, res->crtcs[i])) @@ -5900,6 +7572,11 @@ bool sna_mode_pre_init(ScrnInfoPtr scrn, struct sna *sna) if (!sna_mode_fake_init(sna, num_fake)) return false; + sna->mode.shadow_size = 256; + sna->mode.shadow_events = malloc(sna->mode.shadow_size * sizeof(struct drm_event_vblank)); + if (!sna->mode.shadow_events) + return false; + if (!sna_probe_initial_configuration(sna)) { xf86CrtcConfigPtr config = XF86_CRTC_CONFIG_PTR(scrn); @@ -5912,6 +7589,7 @@ bool sna_mode_pre_init(ScrnInfoPtr scrn, struct sna *sna) } } sort_config_outputs(sna); + TimerSet(NULL, 0, COLDPLUG_DELAY_MS, sna_mode_coldplug, sna); sna_setup_provider(scrn); return scrn->modes != NULL; @@ -5921,18 +7599,58 @@ bool sna_mode_wants_tear_free(struct sna *sna) { xf86CrtcConfigPtr config = XF86_CRTC_CONFIG_PTR(sna->scrn); + bool found = false; + FILE *file; int i; + file = fopen("/sys/module/i915/parameters/enable_fbc", "r"); + if (file) { + int fbc_enabled = 0; + int value; + + if (fscanf(file, "%d", &value) == 1) + fbc_enabled = value > 0; + fclose(file); + + DBG(("%s: module parameter 'enable_fbc' enabled? %d\n", + __FUNCTION__, fbc_enabled)); + + if (fbc_enabled) + return true; + } + for (i = 0; i < sna->mode.num_real_output; i++) { struct sna_output *output = to_sna_output(config->output[i]); int id = find_property(sna, output, "Panel Self-Refresh"); - if (id !=-1 && output->prop_values[id] != -1) { + if (id == -1) + continue; + + found = true; + if (output->prop_values[id] != -1) { DBG(("%s: Panel Self-Refresh detected on %s\n", __FUNCTION__, config->output[i]->name)); return true; } } + if (!found) { + file = fopen("/sys/module/i915/parameters/enable_psr", "r"); + if (file) { + int psr_enabled = 0; + int value; + + if (fscanf(file, "%d", &value) == 1) + psr_enabled = value > 0; + fclose(file); + + DBG(("%s: module parameter 'enable_psr' enabled? %d\n", + __FUNCTION__, psr_enabled)); + + if (psr_enabled) + return true; + } + } + return false; } @@ -5955,7 +7673,7 @@ sna_mode_set_primary(struct sna *sna) DBG(("%s: setting PrimaryOutput %s\n", __FUNCTION__, output->name)); rr->primaryOutput = output->randr_output; - RROutputChanged(rr->primaryOutput, 0); + RROutputChanged(rr->primaryOutput, FALSE); rr->layoutChanged = TRUE; break; } @@ -5974,12 +7692,9 @@ sna_mode_disable(struct sna *sna) if (!sna->scrn->vtSema) return false; - /* XXX we will cause previously hidden cursors to be reshown, but - * this should be a rare fixup case for severe fragmentation. - */ - sna_hide_cursors(sna->scrn); + sna_disable_cursors(sna->scrn); for (i = 0; i < sna->mode.num_real_crtc; i++) - sna_crtc_disable(config->crtc[i]); + sna_crtc_disable(config->crtc[i], false); assert(sna->mode.front_active == 0); sna_mode_wakeup(sna); @@ -6001,6 +7716,11 @@ sna_mode_enable(struct sna *sna) if (!sna->scrn->vtSema) return; + if (sna->mode.hidden) { + DBG(("%s: hidden outputs\n", __FUNCTION__)); + return; + } + for (i = 0; i < sna->mode.num_real_crtc; i++) { xf86CrtcPtr crtc = config->crtc[i]; @@ -6016,13 +7736,30 @@ sna_mode_enable(struct sna *sna) } update_flush_interval(sna); - sna_show_cursors(sna->scrn); + sna_cursors_reload(sna); sna->mode.dirty = false; } +static void sna_randr_close(struct sna *sna) +{ + xf86CrtcConfigPtr config = XF86_CRTC_CONFIG_PTR(sna->scrn); + int n; + + /* The RR structs are freed early during CloseScreen as they + * are tracked as Resources. However, we may be tempted to + * access them during shutdown so decouple them now. + */ + for (n = 0; n < config->num_output; n++) + config->output[n]->randr_output = NULL; + + for (n = 0; n < config->num_crtc; n++) + config->crtc[n]->randr_crtc = NULL; +} + void sna_mode_close(struct sna *sna) { + sna_randr_close(sna); sna_mode_wakeup(sna); if (sna->flags & SNA_IS_HOSTED) @@ -6077,15 +7814,22 @@ xf86CrtcPtr sna_covering_crtc(struct sna *sna, const BoxRec *box, xf86CrtcPtr desired) { xf86CrtcConfigPtr config = XF86_CRTC_CONFIG_PTR(sna->scrn); - xf86CrtcPtr best_crtc; - int best_coverage, c; + xf86CrtcPtr best_crtc = NULL; + int best_coverage = -1, c; if (sna->flags & SNA_IS_HOSTED) return NULL; /* If we do not own the VT, we do not own the CRTC either */ - if (!sna->scrn->vtSema) + if (!sna->scrn->vtSema) { + DBG(("%s: none, VT switched\n", __FUNCTION__)); + return NULL; + } + + if (sna->mode.hidden) { + DBG(("%s: none, hidden outputs\n", __FUNCTION__)); return NULL; + } DBG(("%s for box=(%d, %d), (%d, %d)\n", __FUNCTION__, box->x1, box->y1, box->x2, box->y2)); @@ -6107,10 +7851,10 @@ sna_covering_crtc(struct sna *sna, const BoxRec *box, xf86CrtcPtr desired) cover_box.x2, cover_box.y2)); return desired; } + best_crtc = desired; + best_coverage = 0; } - best_crtc = NULL; - best_coverage = 0; for (c = 0; c < sna->mode.num_real_crtc; c++) { xf86CrtcPtr crtc = config->crtc[c]; BoxRec cover_box; @@ -6156,6 +7900,38 @@ sna_covering_crtc(struct sna *sna, const BoxRec *box, xf86CrtcPtr desired) return best_crtc; } +static xf86CrtcPtr first_active_crtc(struct sna *sna) +{ + xf86CrtcConfigPtr config = XF86_CRTC_CONFIG_PTR(sna->scrn); + int n; + + for (n = 0; n < sna->mode.num_real_crtc; n++) { + xf86CrtcPtr crtc = config->crtc[n]; + if (to_sna_crtc(crtc)->bo) + return crtc; + } + + /* No active, use the first as a placeholder */ + if (sna->mode.num_real_crtc) + return config->crtc[0]; + + return NULL; +} + +xf86CrtcPtr sna_primary_crtc(struct sna *sna) +{ + rrScrPrivPtr rr = rrGetScrPriv(xf86ScrnToScreen(sna->scrn)); + if (rr && rr->primaryOutput) { + xf86OutputPtr output = rr->primaryOutput->devPrivate; + if (output->crtc && + output->scrn == sna->scrn && + to_sna_crtc(output->crtc)) + return output->crtc; + } + + return first_active_crtc(sna); +} + #define MI_LOAD_REGISTER_IMM (0x22<<23) static bool sna_emit_wait_for_scanline_hsw(struct sna *sna, @@ -6433,7 +8209,7 @@ sna_wait_for_scanline(struct sna *sna, y2 /= 2; } - pipe = sna_crtc_to_pipe(crtc); + pipe = sna_crtc_pipe(crtc); DBG(("%s: pipe=%d, y1=%d, y2=%d, full_height?=%d\n", __FUNCTION__, pipe, y1, y2, full_height)); @@ -6457,19 +8233,101 @@ sna_wait_for_scanline(struct sna *sna, return ret; } +static bool sna_mode_shutdown_crtc(xf86CrtcPtr crtc) +{ + struct sna *sna = to_sna(crtc->scrn); + xf86CrtcConfigPtr config = XF86_CRTC_CONFIG_PTR(crtc->scrn); + bool disabled = false; + int o; + + xf86DrvMsg(crtc->scrn->scrnIndex, X_ERROR, + "%s: invalid state found on pipe %d, disabling CRTC:%d\n", + __FUNCTION__, + __sna_crtc_pipe(to_sna_crtc(crtc)), + __sna_crtc_id(to_sna_crtc(crtc))); + sna_crtc_disable(crtc, true); +#if XF86_CRTC_VERSION >= 3 + crtc->active = FALSE; +#endif + if (crtc->enabled) { + crtc->enabled = FALSE; + disabled = true; + } + + for (o = 0; o < sna->mode.num_real_output; o++) { + xf86OutputPtr output = config->output[o]; + + if (output->crtc != crtc) + continue; + + output->funcs->dpms(output, DPMSModeOff); + output->crtc = NULL; + } + + return disabled; +} + +static bool +sna_mode_disable_secondary_planes(struct sna *sna) +{ + xf86CrtcConfigPtr config = XF86_CRTC_CONFIG_PTR(sna->scrn); + bool disabled = false; + int c; + + /* Disable all secondary planes on our CRTCs, just in case + * other userspace left garbage in them. + */ + for (c = 0; c < sna->mode.num_real_crtc; c++) { + xf86CrtcPtr crtc = config->crtc[c]; + struct sna_crtc *sna_crtc = to_sna_crtc(crtc); + struct plane *plane; + + list_for_each_entry(plane, &sna_crtc->sprites, link) { + struct local_mode_get_plane p; + struct local_mode_set_plane s; + + VG_CLEAR(p); + p.plane_id = plane->id; + p.count_format_types = 0; + if (drmIoctl(sna->kgem.fd, + LOCAL_IOCTL_MODE_GETPLANE, + &p)) + continue; + + if (p.fb_id == 0 || p.crtc_id == 0) + continue; + + memset(&s, 0, sizeof(s)); + s.plane_id = p.plane_id; + s.crtc_id = p.crtc_id; + if (drmIoctl(sna->kgem.fd, + LOCAL_IOCTL_MODE_SETPLANE, + &s)) + disabled |= sna_mode_shutdown_crtc(crtc); + } + } + + return disabled; +} + void sna_mode_check(struct sna *sna) { xf86CrtcConfigPtr config = XF86_CRTC_CONFIG_PTR(sna->scrn); - int i; + bool disabled; + int c, o; if (sna->flags & SNA_IS_HOSTED) return; - DBG(("%s\n", __FUNCTION__)); + DBG(("%s: hidden?=%d\n", __FUNCTION__, sna->mode.hidden)); + if (sna->mode.hidden) + return; + + disabled = sna_mode_disable_secondary_planes(sna); /* Validate CRTC attachments and force consistency upon the kernel */ - for (i = 0; i < sna->mode.num_real_crtc; i++) { - xf86CrtcPtr crtc = config->crtc[i]; + for (c = 0; c < sna->mode.num_real_crtc; c++) { + xf86CrtcPtr crtc = config->crtc[c]; struct sna_crtc *sna_crtc = to_sna_crtc(crtc); struct drm_mode_crtc mode; uint32_t expected[2]; @@ -6483,7 +8341,7 @@ void sna_mode_check(struct sna *sna) expected[1] = sna_crtc->flip_bo ? fb_id(sna_crtc->flip_bo) : -1; VG_CLEAR(mode); - mode.crtc_id = sna_crtc->id; + mode.crtc_id = __sna_crtc_id(sna_crtc); if (drmIoctl(sna->kgem.fd, DRM_IOCTL_MODE_GETCRTC, &mode)) continue; @@ -6492,16 +8350,12 @@ void sna_mode_check(struct sna *sna) mode.crtc_id, mode.mode_valid, mode.fb_id, expected[0], expected[1])); - if (mode.fb_id != expected[0] && mode.fb_id != expected[1]) { - xf86DrvMsg(crtc->scrn->scrnIndex, X_ERROR, - "%s: invalid state found on pipe %d, disabling CRTC:%d\n", - __FUNCTION__, sna_crtc->pipe, sna_crtc->id); - sna_crtc_disable(crtc); - } + if (mode.fb_id != expected[0] && mode.fb_id != expected[1]) + disabled |= sna_mode_shutdown_crtc(crtc); } - for (i = 0; i < config->num_output; i++) { - xf86OutputPtr output = config->output[i]; + for (o = 0; o < config->num_output; o++) { + xf86OutputPtr output = config->output[o]; struct sna_output *sna_output; if (output->crtc) @@ -6515,26 +8369,16 @@ void sna_mode_check(struct sna *sna) } update_flush_interval(sna); + + if (disabled) + xf86RandR12TellChanged(xf86ScrnToScreen(sna->scrn)); } static bool sna_crtc_hide_planes(struct sna *sna, struct sna_crtc *crtc) { -#define LOCAL_IOCTL_MODE_SETPLANE DRM_IOWR(0xB7, struct local_mode_set_plane) - struct local_mode_set_plane { - uint32_t plane_id; - uint32_t crtc_id; - uint32_t fb_id; /* fb object contains surface format type */ - uint32_t flags; - - /* Signed dest location allows it to be partially off screen */ - int32_t crtc_x, crtc_y; - uint32_t crtc_w, crtc_h; - - /* Source values are 16.16 fixed point */ - uint32_t src_x, src_y; - uint32_t src_h, src_w; - } s; + struct local_mode_set_plane s; + struct plane *plane; if (crtc->primary.id == 0) return false; @@ -6544,8 +8388,10 @@ sna_crtc_hide_planes(struct sna *sna, struct sna_crtc *crtc) if (drmIoctl(sna->kgem.fd, LOCAL_IOCTL_MODE_SETPLANE, &s)) return false; - s.plane_id = crtc->sprite.id; - (void)drmIoctl(sna->kgem.fd, LOCAL_IOCTL_MODE_SETPLANE, &s); + list_for_each_entry(plane, &crtc->sprites, link) { + s.plane_id = plane->id; + (void)drmIoctl(sna->kgem.fd, LOCAL_IOCTL_MODE_SETPLANE, &s); + } __sna_crtc_disable(sna, crtc); return true; @@ -6561,21 +8407,22 @@ void sna_mode_reset(struct sna *sna) DBG(("%s\n", __FUNCTION__)); - sna_hide_cursors(sna->scrn); + sna_disable_cursors(sna->scrn); for (i = 0; i < sna->mode.num_real_crtc; i++) if (!sna_crtc_hide_planes(sna, to_sna_crtc(config->crtc[i]))) - sna_crtc_disable(config->crtc[i]); + sna_crtc_disable(config->crtc[i], true); assert(sna->mode.front_active == 0); for (i = 0; i < sna->mode.num_real_crtc; i++) { struct sna_crtc *sna_crtc = to_sna_crtc(config->crtc[i]); + struct plane *plane; assert(sna_crtc != NULL); - sna_crtc->dpms_mode = -1; /* Force the rotation property to be reset on next use */ rotation_reset(&sna_crtc->primary); - rotation_reset(&sna_crtc->sprite); + list_for_each_entry(plane, &sna_crtc->sprites, link) + rotation_reset(plane); } /* VT switching, likely to be fbcon so make the backlight usable */ @@ -6641,9 +8488,10 @@ sna_crtc_redisplay__fallback(xf86CrtcPtr crtc, RegionPtr region, struct kgem_bo { int16_t sx, sy; struct sna *sna = to_sna(crtc->scrn); - ScreenPtr screen = sna->scrn->pScreen; + ScreenPtr screen = xf86ScrnToScreen(crtc->scrn); DrawablePtr draw = crtc_source(crtc, &sx, &sy); PictFormatPtr format; + PictTransform T; PicturePtr src, dst; PixmapPtr pixmap; int depth, error; @@ -6664,6 +8512,14 @@ sna_crtc_redisplay__fallback(xf86CrtcPtr crtc, RegionPtr region, struct kgem_bo __FUNCTION__, format->format, depth, draw->bitsPerPixel, bo->pitch, crtc->mode.HDisplay, crtc->mode.VDisplay)); + if (sx | sy) + RegionTranslate(region, sx, sy); + error = !sna_drawable_move_region_to_cpu(draw, region, MOVE_READ); + if (sx | sy) + RegionTranslate(region, -sx, -sy); + if (error) + return; + ptr = kgem_bo_map__gtt(&sna->kgem, bo); if (ptr == NULL) return; @@ -6683,9 +8539,37 @@ sna_crtc_redisplay__fallback(xf86CrtcPtr crtc, RegionPtr region, struct kgem_bo if (!src) goto free_pixmap; - error = SetPictureTransform(src, &crtc->crtc_to_framebuffer); - if (error) - goto free_src; + pixman_transform_init_translate(&T, sx << 16, sy << 16); + pixman_transform_multiply(&T, &T, &crtc->crtc_to_framebuffer); + if (!sna_transform_is_integer_translation(&T, &sx, &sy)) { +#define f2d(x) (((double)(x))/65536.) + DBG(("%s: transform=[[%f %f %f], [%f %f %f], [%f %f %f]] (raw [[%x %x %x], [%x %x %x], [%x %x %x]])\n", + __FUNCTION__, + f2d(T.matrix[0][0]), + f2d(T.matrix[0][1]), + f2d(T.matrix[0][2]), + f2d(T.matrix[1][0]), + f2d(T.matrix[1][1]), + f2d(T.matrix[1][2]), + f2d(T.matrix[2][0]), + f2d(T.matrix[2][1]), + f2d(T.matrix[2][2]), + T.matrix[0][0], + T.matrix[0][1], + T.matrix[0][2], + T.matrix[1][0], + T.matrix[1][1], + T.matrix[1][2], + T.matrix[2][0], + T.matrix[2][1], + T.matrix[2][2])); +#undef f2d + + error = SetPictureTransform(src, &T); + if (error) + goto free_src; + sx = sy = 0; + } if (crtc->filter && crtc->transform_in_use) SetPicturePictFilter(src, crtc->filter, @@ -6733,10 +8617,11 @@ sna_crtc_redisplay__composite(xf86CrtcPtr crtc, RegionPtr region, struct kgem_bo { int16_t sx, sy; struct sna *sna = to_sna(crtc->scrn); - ScreenPtr screen = crtc->scrn->pScreen; + ScreenPtr screen = xf86ScrnToScreen(crtc->scrn); DrawablePtr draw = crtc_source(crtc, &sx, &sy); struct sna_composite_op tmp; PictFormatPtr format; + PictTransform T; PicturePtr src, dst; PixmapPtr pixmap; const BoxRec *b; @@ -6777,9 +8662,14 @@ sna_crtc_redisplay__composite(xf86CrtcPtr crtc, RegionPtr region, struct kgem_bo if (!src) goto free_pixmap; - error = SetPictureTransform(src, &crtc->crtc_to_framebuffer); - if (error) - goto free_src; + pixman_transform_init_translate(&T, sx << 16, sy << 16); + pixman_transform_multiply(&T, &T, &crtc->crtc_to_framebuffer); + if (!sna_transform_is_integer_translation(&T, &sx, &sy)) { + error = SetPictureTransform(src, &T); + if (error) + goto free_src; + sx = sy = 0; + } if (crtc->filter && crtc->transform_in_use) SetPicturePictFilter(src, crtc->filter, @@ -6793,36 +8683,38 @@ sna_crtc_redisplay__composite(xf86CrtcPtr crtc, RegionPtr region, struct kgem_bo ValidatePicture(src); ValidatePicture(dst); - if (!sna->render.composite(sna, - PictOpSrc, src, NULL, dst, - sx, sy, - 0, 0, - 0, 0, - crtc->mode.HDisplay, crtc->mode.VDisplay, - COMPOSITE_PARTIAL, memset(&tmp, 0, sizeof(tmp)))) { - DBG(("%s: unsupported operation!\n", __FUNCTION__)); - sna_crtc_redisplay__fallback(crtc, region, bo); - goto free_dst; - } - + /* Composite each box individually as if we are dealing with a rotation + * on a large display, we may have to perform intermediate copies. We + * can then minimise the overdraw by looking at individual boxes rather + * than the bbox. + */ n = region_num_rects(region); b = region_rects(region); do { - BoxRec box; - - box = *b++; + BoxRec box = *b; transformed_box(&box, crtc); DBG(("%s: (%d, %d)x(%d, %d) -> (%d, %d), (%d, %d)\n", __FUNCTION__, - b[-1].x1, b[-1].y1, b[-1].x2-b[-1].x1, b[-1].y2-b[-1].y1, + b->x1, b->y1, b->x2-b->x1, b->y2-b->y1, box.x1, box.y1, box.x2, box.y2)); - tmp.box(sna, &tmp, &box); - } while (--n); - tmp.done(sna, &tmp); + if (!sna->render.composite(sna, + PictOpSrc, src, NULL, dst, + sx + box.x1, sy + box.y1, + 0, 0, + box.x1, box.y1, + box.x2 - box.x1, box.y2 - box.y1, + 0, memset(&tmp, 0, sizeof(tmp)))) { + DBG(("%s: unsupported operation!\n", __FUNCTION__)); + sna_crtc_redisplay__fallback(crtc, region, bo); + break; + } else { + tmp.box(sna, &tmp, &box); + tmp.done(sna, &tmp); + } + } while (b++, --n); -free_dst: FreePicture(dst, None); free_src: FreePicture(src, None); @@ -6839,7 +8731,7 @@ sna_crtc_redisplay(xf86CrtcPtr crtc, RegionPtr region, struct kgem_bo *bo) struct sna_pixmap *priv = sna_pixmap((PixmapPtr)draw); DBG(("%s: crtc %d [pipe=%d], damage (%d, %d), (%d, %d) x %d\n", - __FUNCTION__, to_sna_crtc(crtc)->id, to_sna_crtc(crtc)->pipe, + __FUNCTION__, sna_crtc_id(crtc), sna_crtc_pipe(crtc), region->extents.x1, region->extents.y1, region->extents.x2, region->extents.y2, region_num_rects(region))); @@ -6898,7 +8790,10 @@ sna_crtc_redisplay(xf86CrtcPtr crtc, RegionPtr region, struct kgem_bo *bo) static void shadow_flip_handler(struct drm_event_vblank *e, void *data) { - sna_mode_redisplay(data); + struct sna *sna = data; + + if (!sna->mode.shadow_wait) + sna_mode_redisplay(sna); } void sna_shadow_set_crtc(struct sna *sna, @@ -6908,18 +8803,23 @@ void sna_shadow_set_crtc(struct sna *sna, struct sna_crtc *sna_crtc = to_sna_crtc(crtc); struct sna_pixmap *priv; + assert(sna_crtc); DBG(("%s: setting shadow override for CRTC:%d to handle=%d\n", - __FUNCTION__, sna_crtc->id, bo->handle)); + __FUNCTION__, __sna_crtc_id(sna_crtc), bo->handle)); assert(sna->flags & SNA_TEAR_FREE); - assert(sna_crtc); assert(!sna_crtc->transform); if (sna_crtc->client_bo != bo) { - if (sna_crtc->client_bo) + if (sna_crtc->client_bo) { + assert(sna_crtc->client_bo->refcnt >= sna_crtc->client_bo->active_scanout); + sna_crtc->client_bo->active_scanout--; kgem_bo_destroy(&sna->kgem, sna_crtc->client_bo); + } sna_crtc->client_bo = kgem_bo_reference(bo); + sna_crtc->client_bo->active_scanout++; + assert(sna_crtc->client_bo->refcnt >= sna_crtc->client_bo->active_scanout); sna_crtc_damage(crtc); } @@ -6969,11 +8869,13 @@ void sna_shadow_unset_crtc(struct sna *sna, struct sna_crtc *sna_crtc = to_sna_crtc(crtc); DBG(("%s: clearin shadow override for CRTC:%d\n", - __FUNCTION__, sna_crtc->id)); + __FUNCTION__, __sna_crtc_id(sna_crtc))); if (sna_crtc->client_bo == NULL) return; + assert(sna_crtc->client_bo->refcnt >= sna_crtc->client_bo->active_scanout); + sna_crtc->client_bo->active_scanout--; kgem_bo_destroy(&sna->kgem, sna_crtc->client_bo); sna_crtc->client_bo = NULL; list_del(&sna_crtc->shadow_link); @@ -6982,15 +8884,57 @@ void sna_shadow_unset_crtc(struct sna *sna, sna_crtc_damage(crtc); } +static bool move_crtc_to_gpu(struct sna *sna) +{ + xf86CrtcConfigPtr config = XF86_CRTC_CONFIG_PTR(sna->scrn); + int i; + + for (i = 0; i < sna->mode.num_real_crtc; i++) { + struct sna_crtc *crtc = to_sna_crtc(config->crtc[i]); + unsigned hint; + + assert(crtc); + + if (crtc->bo == NULL) + continue; + + if (crtc->slave_pixmap) + continue; + + if (crtc->client_bo) + continue; + + if (crtc->shadow_bo) + continue; + + hint = MOVE_READ | MOVE_ASYNC_HINT | __MOVE_SCANOUT; + if (sna->flags & SNA_TEAR_FREE) + hint |= __MOVE_FORCE; + + DBG(("%s: CRTC %d [pipe=%d] requires frontbuffer\n", + __FUNCTION__, __sna_crtc_id(crtc), __sna_crtc_pipe(crtc))); + return sna_pixmap_move_to_gpu(sna->front, hint); + } + + return true; +} + void sna_mode_redisplay(struct sna *sna) { xf86CrtcConfigPtr config = XF86_CRTC_CONFIG_PTR(sna->scrn); RegionPtr region; int i; - if (!sna->mode.shadow_damage) + if (sna->mode.hidden) { + DBG(("%s: hidden outputs, skipping\n", __FUNCTION__)); + return; + } + + if (!sna->mode.shadow_enabled) return; + assert(sna->mode.shadow_damage); + DBG(("%s: posting shadow damage? %d (flips pending? %d, mode reconfiguration pending? %d)\n", __FUNCTION__, !RegionNil(DamageRegion(sna->mode.shadow_damage)), @@ -7012,21 +8956,23 @@ void sna_mode_redisplay(struct sna *sna) region->extents.x2, region->extents.y2)); if (sna->mode.flip_active) { - DamagePtr damage; - - damage = sna->mode.shadow_damage; - sna->mode.shadow_damage = NULL; + DBG(("%s: checking for %d outstanding flip completions\n", + __FUNCTION__, sna->mode.flip_active)); + sna->mode.dirty = true; while (sna->mode.flip_active && sna_mode_wakeup(sna)) ; + sna->mode.dirty = false; - sna->mode.shadow_damage = damage; + DBG(("%s: now %d outstanding flip completions (enabled? %d)\n", + __FUNCTION__, + sna->mode.flip_active, + sna->mode.shadow_enabled)); + if (sna->mode.flip_active || !sna->mode.shadow_enabled) + return; } - if (sna->mode.flip_active) - return; - - if (wedged(sna) || !sna_pixmap_move_to_gpu(sna->front, MOVE_READ | MOVE_ASYNC_HINT | __MOVE_SCANOUT)) { + if (!move_crtc_to_gpu(sna)) { DBG(("%s: forcing scanout update using the CPU\n", __FUNCTION__)); if (!sna_pixmap_move_to_cpu(sna->front, MOVE_READ)) return; @@ -7047,90 +8993,14 @@ void sna_mode_redisplay(struct sna *sna) damage.data = NULL; RegionIntersect(&damage, &damage, region); if (!box_empty(&damage.extents)) { - struct kgem_bo *bo = NULL; - DBG(("%s: fallback intersects pipe=%d [(%d, %d), (%d, %d)]\n", - __FUNCTION__, sna_crtc->pipe, + __FUNCTION__, __sna_crtc_pipe(sna_crtc), damage.extents.x1, damage.extents.y1, damage.extents.x2, damage.extents.y2)); - if (sna->flags & SNA_TEAR_FREE) { - RegionRec new_damage; - - RegionNull(&new_damage); - RegionCopy(&new_damage, &damage); - - bo = sna_crtc->client_bo; - if (bo == NULL) { - damage.extents = crtc->bounds; - damage.data = NULL; - bo = kgem_create_2d(&sna->kgem, - crtc->mode.HDisplay, - crtc->mode.VDisplay, - crtc->scrn->bitsPerPixel, - sna_crtc->bo->tiling, - CREATE_SCANOUT); - } else - RegionUnion(&damage, &damage, &sna_crtc->client_damage); - - DBG(("%s: TearFree fallback, shadow handle=%d, crtc handle=%d\n", __FUNCTION__, bo->handle, sna_crtc->bo->handle)); - - sna_crtc->client_damage = new_damage; - } - - if (bo == NULL) - bo = sna_crtc->bo; - sna_crtc_redisplay__fallback(crtc, &damage, bo); - - if (bo != sna_crtc->bo) { - struct drm_mode_crtc_page_flip arg; - - arg.crtc_id = sna_crtc->id; - arg.fb_id = get_fb(sna, bo, - crtc->mode.HDisplay, - crtc->mode.VDisplay); - - arg.user_data = (uintptr_t)sna_crtc; - arg.flags = DRM_MODE_PAGE_FLIP_EVENT; - arg.reserved = 0; - - if (drmIoctl(sna->kgem.fd, DRM_IOCTL_MODE_PAGE_FLIP, &arg)) { - if (sna_crtc_flip(sna, sna_crtc, bo, 0, 0)) { - assert(sna_crtc->bo->active_scanout); - assert(sna_crtc->bo->refcnt >= sna_crtc->bo->active_scanout); - sna_crtc->bo->active_scanout--; - kgem_bo_destroy(&sna->kgem, sna_crtc->bo); - - sna_crtc->bo = bo; - sna_crtc->bo->active_scanout++; - sna_crtc->client_bo = NULL; - } else { - DBG(("%s: flip [fb=%d] on crtc %d [%d, pipe=%d] failed - %d\n", - __FUNCTION__, arg.fb_id, i, sna_crtc->id, sna_crtc->pipe, errno)); - xf86DrvMsg(sna->scrn->scrnIndex, X_ERROR, - "Page flipping failed, disabling TearFree\n"); - sna->flags &= ~SNA_TEAR_FREE; - - damage.extents = crtc->bounds; - damage.data = NULL; - sna_crtc_redisplay__fallback(crtc, &damage, sna_crtc->bo); - - kgem_bo_destroy(&sna->kgem, bo); - sna_crtc->client_bo = NULL; - } - } else { - sna->mode.flip_active++; - - assert(sna_crtc->flip_bo == NULL); - sna_crtc->flip_handler = shadow_flip_handler; - sna_crtc->flip_data = sna; - sna_crtc->flip_bo = bo; - sna_crtc->flip_bo->active_scanout++; - sna_crtc->flip_serial = sna_crtc->mode_serial; - - sna_crtc->client_bo = kgem_bo_reference(sna_crtc->bo); - } - } + sna_crtc_redisplay__fallback(crtc, + &damage, + sna_crtc->bo); } RegionUninit(&damage); @@ -7171,6 +9041,7 @@ void sna_mode_redisplay(struct sna *sna) xf86CrtcPtr crtc = config->crtc[i]; struct sna_crtc *sna_crtc = to_sna_crtc(crtc); RegionRec damage; + int sigio; assert(sna_crtc != NULL); DBG(("%s: crtc[%d] transformed? %d\n", @@ -7192,30 +9063,38 @@ void sna_mode_redisplay(struct sna *sna) region_num_rects(&damage), damage.extents.x1, damage.extents.y1, damage.extents.x2, damage.extents.y2)); + sigio = sigio_block(); if (!box_empty(&damage.extents)) { if (sna->flags & SNA_TEAR_FREE) { + RegionRec new_damage; struct drm_mode_crtc_page_flip arg; struct kgem_bo *bo; - RegionUninit(&damage); - damage.extents = crtc->bounds; - damage.data = NULL; + RegionNull(&new_damage); + RegionCopy(&new_damage, &damage); - bo = sna_crtc->client_bo; - if (bo == NULL) + bo = sna_crtc->cache_bo; + if (bo == NULL) { + damage.extents = crtc->bounds; + damage.data = NULL; bo = kgem_create_2d(&sna->kgem, crtc->mode.HDisplay, crtc->mode.VDisplay, crtc->scrn->bitsPerPixel, sna_crtc->bo->tiling, CREATE_SCANOUT); - if (bo == NULL) - goto disable1; + if (bo == NULL) + continue; + } else + RegionUnion(&damage, &damage, &sna_crtc->crtc_damage); + sna_crtc->crtc_damage = new_damage; sna_crtc_redisplay(crtc, &damage, bo); kgem_bo_submit(&sna->kgem, bo); + __kgem_bo_clear_dirty(bo); - arg.crtc_id = sna_crtc->id; + assert_crtc_fb(sna, sna_crtc); + arg.crtc_id = __sna_crtc_id(sna_crtc); arg.fb_id = get_fb(sna, bo, crtc->mode.HDisplay, crtc->mode.VDisplay); @@ -7228,6 +9107,9 @@ void sna_mode_redisplay(struct sna *sna) if (drmIoctl(sna->kgem.fd, DRM_IOCTL_MODE_PAGE_FLIP, &arg)) { if (sna_crtc_flip(sna, sna_crtc, bo, 0, 0)) { + DBG(("%s: removing handle=%d [active_scanout=%d] from scanout, installing handle=%d [active_scanout=%d]\n", + __FUNCTION__, sna_crtc->bo->handle, sna_crtc->bo->active_scanout - 1, + bo->handle, bo->active_scanout)); assert(sna_crtc->bo->active_scanout); assert(sna_crtc->bo->refcnt >= sna_crtc->bo->active_scanout); sna_crtc->bo->active_scanout--; @@ -7235,13 +9117,12 @@ void sna_mode_redisplay(struct sna *sna) sna_crtc->bo = kgem_bo_reference(bo); sna_crtc->bo->active_scanout++; - sna_crtc->client_bo = kgem_bo_reference(bo); } else { BoxRec box; DrawableRec tmp; DBG(("%s: flip [fb=%d] on crtc %d [%d, pipe=%d] failed - %d\n", - __FUNCTION__, arg.fb_id, i, sna_crtc->id, sna_crtc->pipe, errno)); + __FUNCTION__, arg.fb_id, i, __sna_crtc_id(sna_crtc), __sna_crtc_pipe(sna_crtc), errno)); xf86DrvMsg(sna->scrn->scrnIndex, X_ERROR, "Page flipping failed, disabling TearFree\n"); sna->flags &= ~SNA_TEAR_FREE; @@ -7260,13 +9141,13 @@ disable1: &box, 1, COPY_LAST)) { xf86DrvMsg(crtc->scrn->scrnIndex, X_ERROR, "%s: page flipping failed, disabling CRTC:%d (pipe=%d)\n", - __FUNCTION__, sna_crtc->id, sna_crtc->pipe); - sna_crtc_disable(crtc); + __FUNCTION__, __sna_crtc_id(sna_crtc), __sna_crtc_pipe(sna_crtc)); + sna_crtc_disable(crtc, false); } - - kgem_bo_destroy(&sna->kgem, bo); - sna_crtc->client_bo = NULL; } + + kgem_bo_destroy(&sna->kgem, bo); + sna_crtc->cache_bo = NULL; continue; } sna->mode.flip_active++; @@ -7279,13 +9160,20 @@ disable1: sna_crtc->flip_serial = sna_crtc->mode_serial; sna_crtc->flip_pending = true; - sna_crtc->client_bo = kgem_bo_reference(sna_crtc->bo); + if (sna_crtc->bo != sna->mode.shadow) { + assert_scanout(&sna->kgem, sna_crtc->bo, + crtc->mode.HDisplay, crtc->mode.VDisplay); + sna_crtc->cache_bo = kgem_bo_reference(sna_crtc->bo); + } + DBG(("%s: recording flip on CRTC:%d handle=%d, active_scanout=%d, serial=%d\n", + __FUNCTION__, __sna_crtc_id(sna_crtc), sna_crtc->flip_bo->handle, sna_crtc->flip_bo->active_scanout, sna_crtc->flip_serial)); } else { sna_crtc_redisplay(crtc, &damage, sna_crtc->bo); kgem_scanout_flush(&sna->kgem, sna_crtc->bo); } } RegionUninit(&damage); + sigio_unblock(sigio); if (sna_crtc->slave_damage) DamageEmpty(sna_crtc->slave_damage); @@ -7296,6 +9184,7 @@ disable1: struct kgem_bo *old = sna->mode.shadow; struct drm_mode_crtc_page_flip arg; uint32_t fb = 0; + int sigio; DBG(("%s: flipping TearFree outputs, current scanout handle=%d [active?=%d], new handle=%d [active=%d]\n", __FUNCTION__, old->handle, old->active_scanout, new->handle, new->active_scanout)); @@ -7307,7 +9196,9 @@ disable1: arg.reserved = 0; kgem_bo_submit(&sna->kgem, new); + __kgem_bo_clear_dirty(new); + sigio = sigio_block(); for (i = 0; i < sna->mode.num_real_crtc; i++) { struct sna_crtc *crtc = config->crtc[i]->driver_private; struct kgem_bo *flip_bo; @@ -7315,20 +9206,20 @@ disable1: assert(crtc != NULL); DBG(("%s: crtc %d [%d, pipe=%d] active? %d, transformed? %d\n", - __FUNCTION__, i, crtc->id, crtc->pipe, crtc->bo ? crtc->bo->handle : 0, crtc->transform)); + __FUNCTION__, i, __sna_crtc_id(crtc), __sna_crtc_pipe(crtc), crtc->bo ? crtc->bo->handle : 0, crtc->transform)); if (crtc->bo == NULL || crtc->transform) continue; assert(config->crtc[i]->enabled); - assert(crtc->dpms_mode <= DPMSModeOn); assert(crtc->flip_bo == NULL); + assert_crtc_fb(sna, crtc); - arg.crtc_id = crtc->id; + arg.crtc_id = __sna_crtc_id(crtc); arg.user_data = (uintptr_t)crtc; if (crtc->client_bo) { DBG(("%s: apply shadow override bo for CRTC:%d on pipe=%d, handle=%d\n", - __FUNCTION__, crtc->id, crtc->pipe, crtc->client_bo->handle)); + __FUNCTION__, __sna_crtc_id(crtc), __sna_crtc_pipe(crtc), crtc->client_bo->handle)); arg.fb_id = get_fb(sna, crtc->client_bo, crtc->base->mode.HDisplay, crtc->base->mode.VDisplay); @@ -7356,6 +9247,7 @@ fixup_shadow: } } + sigio_unblock(sigio); return; } @@ -7365,8 +9257,12 @@ fixup_shadow: y = crtc->base->y; } - if (crtc->bo == flip_bo) + if (crtc->bo == flip_bo) { + assert(crtc->bo->refcnt >= crtc->bo->active_scanout); + DBG(("%s: flip handle=%d is already on the CRTC\n", + __FUNCTION__, flip_bo->handle)); continue; + } if (flip_bo->pitch != crtc->bo->pitch || (y << 16 | x) != crtc->offset) { DBG(("%s: changing pitch (new %d =?= old %d) or offset (new %x =?= old %x)\n", @@ -7375,6 +9271,9 @@ fixup_shadow: y << 16 | x, crtc->offset)); fixup_flip: if (sna_crtc_flip(sna, crtc, flip_bo, x, y)) { + DBG(("%s: removing handle=%d [active_scanout=%d] from scanout, installing handle=%d [active_scanout=%d]\n", + __FUNCTION__, crtc->bo->handle, crtc->bo->active_scanout-1, + flip_bo->handle, flip_bo->active_scanout)); assert(flip_bo != crtc->bo); assert(crtc->bo->active_scanout); assert(crtc->bo->refcnt >= crtc->bo->active_scanout); @@ -7389,9 +9288,11 @@ fixup_flip: crtc->bo = kgem_bo_reference(flip_bo); crtc->bo->active_scanout++; } else { - xf86DrvMsg(sna->scrn->scrnIndex, X_ERROR, - "Failed to prepare CRTC for page flipping, disabling TearFree\n"); - sna->flags &= ~SNA_TEAR_FREE; + if (sna->flags & SNA_TEAR_FREE) { + xf86DrvMsg(sna->scrn->scrnIndex, X_ERROR, + "Failed to prepare CRTC for page flipping, disabling TearFree\n"); + sna->flags &= ~SNA_TEAR_FREE; + } if (sna->mode.flip_active == 0) { DBG(("%s: abandoning flip attempt\n", __FUNCTION__)); @@ -7400,15 +9301,15 @@ fixup_flip: xf86DrvMsg(sna->scrn->scrnIndex, X_ERROR, "%s: page flipping failed, disabling CRTC:%d (pipe=%d)\n", - __FUNCTION__, crtc->id, crtc->pipe); - sna_crtc_disable(crtc->base); + __FUNCTION__, __sna_crtc_id(crtc), __sna_crtc_pipe(crtc)); + sna_crtc_disable(crtc->base, false); } continue; } if (drmIoctl(sna->kgem.fd, DRM_IOCTL_MODE_PAGE_FLIP, &arg)) { ERR(("%s: flip [fb=%d] on crtc %d [%d, pipe=%d] failed - %d\n", - __FUNCTION__, arg.fb_id, i, crtc->id, crtc->pipe, errno)); + __FUNCTION__, arg.fb_id, i, __sna_crtc_id(crtc), __sna_crtc_pipe(crtc), errno)); goto fixup_flip; } sna->mode.flip_active++; @@ -7421,6 +9322,9 @@ fixup_flip: crtc->flip_serial = crtc->mode_serial; crtc->flip_pending = true; + DBG(("%s: recording flip on CRTC:%d handle=%d, active_scanout=%d, serial=%d\n", + __FUNCTION__, __sna_crtc_id(crtc), crtc->flip_bo->handle, crtc->flip_bo->active_scanout, crtc->flip_serial)); + { struct drm_i915_gem_busy busy = { flip_bo->handle }; if (drmIoctl(sna->kgem.fd, DRM_IOCTL_I915_GEM_BUSY, &busy) == 0) { @@ -7435,6 +9339,7 @@ fixup_flip: } } } + sigio_unblock(sigio); DBG(("%s: flipped %d outputs, shadow active? %d\n", __FUNCTION__, @@ -7486,7 +9391,9 @@ again: struct drm_event *e = (struct drm_event *)&buffer[i]; switch (e->type) { case DRM_EVENT_VBLANK: - if (((uintptr_t)((struct drm_event_vblank *)e)->user_data) & 2) + if (sna->mode.shadow_wait) + defer_event(sna, e); + else if (((uintptr_t)((struct drm_event_vblank *)e)->user_data) & 2) sna_present_vblank_handler((struct drm_event_vblank *)e); else sna_dri2_vblank_handler((struct drm_event_vblank *)e); @@ -7495,13 +9402,19 @@ again: { struct drm_event_vblank *vbl = (struct drm_event_vblank *)e; struct sna_crtc *crtc = (void *)(uintptr_t)vbl->user_data; + uint64_t msc; /* Beware Zaphod! */ sna = to_sna(crtc->base->scrn); - crtc->swap.tv_sec = vbl->tv_sec; - crtc->swap.tv_usec = vbl->tv_usec; - crtc->swap.msc = msc64(crtc, vbl->sequence); + if (msc64(crtc, vbl->sequence, &msc)) { + DBG(("%s: recording last swap on pipe=%d, frame %d [%08llx], time %d.%06d\n", + __FUNCTION__, __sna_crtc_pipe(crtc), vbl->sequence, (long long)msc, vbl->tv_sec, vbl->tv_usec)); + crtc->swap.tv_sec = vbl->tv_sec; + crtc->swap.tv_usec = vbl->tv_usec; + crtc->swap.msc = msc; + } + assert(crtc->flip_pending); crtc->flip_pending = false; assert(crtc->flip_bo); @@ -7509,10 +9422,12 @@ again: assert(crtc->flip_bo->refcnt >= crtc->flip_bo->active_scanout); if (crtc->flip_serial == crtc->mode_serial) { - DBG(("%s: removing handle=%d from scanout, installing handle=%d\n", - __FUNCTION__, crtc->bo->handle, crtc->flip_bo->handle)); + DBG(("%s: removing handle=%d [active_scanout=%d] from scanout, installing handle=%d [active_scanout=%d]\n", + __FUNCTION__, crtc->bo->handle, crtc->bo->active_scanout - 1, + crtc->flip_bo->handle, crtc->flip_bo->active_scanout)); assert(crtc->bo->active_scanout); assert(crtc->bo->refcnt >= crtc->bo->active_scanout); + crtc->bo->active_scanout--; kgem_bo_destroy(&sna->kgem, crtc->bo); @@ -7523,6 +9438,8 @@ again: crtc->bo = crtc->flip_bo; crtc->flip_bo = NULL; + + assert_crtc_fb(sna, crtc); } else { crtc->flip_bo->active_scanout--; kgem_bo_destroy(&sna->kgem, crtc->flip_bo); @@ -7531,8 +9448,10 @@ again: DBG(("%s: flip complete, pending? %d\n", __FUNCTION__, sna->mode.flip_active)); assert(sna->mode.flip_active); - if (--sna->mode.flip_active == 0) + if (--sna->mode.flip_active == 0) { + assert(crtc->flip_handler); crtc->flip_handler(vbl, crtc->flip_data); + } } break; default: diff --git a/src/sna/sna_display_fake.c b/src/sna/sna_display_fake.c index 4d74c38d..fa26bda1 100644 --- a/src/sna/sna_display_fake.c +++ b/src/sna/sna_display_fake.c @@ -96,12 +96,6 @@ sna_crtc_set_mode_major(xf86CrtcPtr crtc, DisplayModePtr mode, } static void -sna_crtc_gamma_set(xf86CrtcPtr crtc, - CARD16 *red, CARD16 *green, CARD16 *blue, int size) -{ -} - -static void sna_crtc_destroy(xf86CrtcPtr crtc) { } @@ -109,7 +103,6 @@ sna_crtc_destroy(xf86CrtcPtr crtc) static const xf86CrtcFuncsRec sna_crtc_funcs = { .dpms = sna_crtc_dpms, .set_mode_major = sna_crtc_set_mode_major, - .gamma_set = sna_crtc_gamma_set, .destroy = sna_crtc_destroy, }; @@ -192,7 +185,7 @@ static const xf86OutputFuncsRec sna_output_funcs = { static Bool sna_mode_resize(ScrnInfoPtr scrn, int width, int height) { - ScreenPtr screen = scrn->pScreen; + ScreenPtr screen = xf86ScrnToScreen(scrn); PixmapPtr new_front; DBG(("%s (%d, %d) -> (%d, %d)\n", __FUNCTION__, @@ -262,6 +255,7 @@ static bool add_fake_output(struct sna *sna, bool late) output->mm_height = 0; output->interlaceAllowed = FALSE; output->subpixel_order = SubPixelNone; + output->status = XF86OutputStatusDisconnected; output->possible_crtcs = ~((1 << sna->mode.num_real_crtc) - 1); output->possible_clones = ~((1 << sna->mode.num_real_output) - 1); @@ -297,6 +291,8 @@ static bool add_fake_output(struct sna *sna, bool late) RRCrtcSetRotations(crtc->randr_crtc, RR_Rotate_All | RR_Reflect_All); + if (!RRCrtcGammaSetSize(crtc->randr_crtc, 256)) + goto err; } sna->mode.num_fake++; @@ -312,13 +308,16 @@ err: continue; xf86OutputDestroy(output); + i--; } for (i = 0; i < xf86_config->num_crtc; i++) { crtc = xf86_config->crtc[i]; if (crtc->driver_private) continue; + xf86CrtcDestroy(crtc); + i--; } sna->mode.num_fake = -1; return false; diff --git a/src/sna/sna_dri2.c b/src/sna/sna_dri2.c index e5c4d53e..d89525cc 100644 --- a/src/sna/sna_dri2.c +++ b/src/sna/sna_dri2.c @@ -82,12 +82,23 @@ get_private(void *buffer) return (struct sna_dri2_private *)((DRI2Buffer2Ptr)buffer+1); } +pure static inline DRI2BufferPtr sna_pixmap_get_buffer(PixmapPtr pixmap) +{ + assert(pixmap->refcnt); + return ((void **)__get_private(pixmap, sna_pixmap_key))[2]; +} + +static inline void sna_pixmap_set_buffer(PixmapPtr pixmap, void *ptr) +{ + assert(pixmap->refcnt); + ((void **)__get_private(pixmap, sna_pixmap_key))[2] = ptr; +} + #if DRI2INFOREC_VERSION >= 4 enum event_type { WAITMSC = 0, SWAP, - SWAP_WAIT, - SWAP_THROTTLE, + SWAP_COMPLETE, FLIP, FLIP_THROTTLE, FLIP_COMPLETE, @@ -98,6 +109,7 @@ struct dri_bo { struct list link; struct kgem_bo *bo; uint32_t name; + unsigned flags; }; struct sna_dri2_event { @@ -108,6 +120,8 @@ struct sna_dri2_event { xf86CrtcPtr crtc; int pipe; bool queued; + bool sync; + bool chained; /* for swaps & flips only */ DRI2SwapEventPtr event_complete; @@ -116,35 +130,146 @@ struct sna_dri2_event { DRI2BufferPtr back; struct kgem_bo *bo; + struct copy { + struct kgem_bo *bo; + unsigned flags; + uint32_t name; + uint32_t size; + } pending; + struct sna_dri2_event *chain; - struct list cache; struct list link; - int mode; + int flip_continue; + int keepalive; + int signal; }; +#if DRI2INFOREC_VERSION < 10 +#undef USE_ASYNC_SWAP +#endif + +#if USE_ASYNC_SWAP +#define KEEPALIVE 8 /* wait ~100ms before discarding swap caches */ +#define APPLY_DAMAGE 0 +#else +#define USE_ASYNC_SWAP 0 +#define KEEPALIVE 1 +#define APPLY_DAMAGE 1 +#endif + static void sna_dri2_flip_event(struct sna_dri2_event *flip); +inline static DRI2BufferPtr dri2_window_get_front(WindowPtr win); + +static struct kgem_bo * +__sna_dri2_copy_region(struct sna *sna, DrawablePtr draw, RegionPtr region, + DRI2BufferPtr src, DRI2BufferPtr dst, + unsigned flags); + +inline static void +__sna_dri2_copy_event(struct sna_dri2_event *info, unsigned flags) +{ + DBG(("%s: flags = %x\n", __FUNCTION__, flags)); + assert(info->front != info->back); + info->bo = __sna_dri2_copy_region(info->sna, info->draw, NULL, + info->back, info->front, + flags); + info->front->flags = info->back->flags; +} + +static int front_pitch(DrawablePtr draw) +{ + DRI2BufferPtr buffer; + + buffer = NULL; + if (draw->type != DRAWABLE_PIXMAP) + buffer = dri2_window_get_front((WindowPtr)draw); + if (buffer == NULL) + buffer = sna_pixmap_get_buffer(get_drawable_pixmap(draw)); + + return buffer ? buffer->pitch : 0; +} + +struct dri2_window { + DRI2BufferPtr front; + struct sna_dri2_event *chain; + xf86CrtcPtr crtc; + int64_t msc_delta; + struct list cache; + uint32_t cache_size; + int scanout; +}; + +static struct dri2_window *dri2_window(WindowPtr win) +{ + assert(win->drawable.type != DRAWABLE_PIXMAP); + return ((void **)__get_private(win, sna_window_key))[1]; +} + +static bool use_scanout(struct sna *sna, + DrawablePtr draw, + struct dri2_window *priv) +{ + if (priv->front) + return true; + + if (priv->scanout < 0) + priv->scanout = + (sna->flags & (SNA_LINEAR_FB | SNA_NO_WAIT | SNA_NO_FLIP)) == 0 && + draw->width == sna->front->drawable.width && + draw->height == sna->front->drawable.height && + draw->bitsPerPixel == sna->front->drawable.bitsPerPixel; + + return priv->scanout; +} static void sna_dri2_get_back(struct sna *sna, DrawablePtr draw, - DRI2BufferPtr back, - struct sna_dri2_event *info) + DRI2BufferPtr back) { + struct dri2_window *priv = dri2_window((WindowPtr)draw); + uint32_t size; struct kgem_bo *bo; + struct dri_bo *c; uint32_t name; + int flags; bool reuse; - DBG(("%s: draw size=%dx%d, buffer size=%dx%d\n", + DBG(("%s: draw size=%dx%d, back buffer handle=%d size=%dx%d, is-scanout? %d, active?=%d, pitch=%d, front pitch=%d\n", __FUNCTION__, draw->width, draw->height, - get_private(back)->size & 0xffff, get_private(back)->size >> 16)); - reuse = (draw->height << 16 | draw->width) == get_private(back)->size; + get_private(back)->bo->handle, + get_private(back)->size & 0xffff, get_private(back)->size >> 16, + get_private(back)->bo->scanout, + get_private(back)->bo->active_scanout, + back->pitch, front_pitch(draw))); + assert(priv); + + size = draw->height << 16 | draw->width; + if (size != priv->cache_size) { + while (!list_is_empty(&priv->cache)) { + c = list_first_entry(&priv->cache, struct dri_bo, link); + list_del(&c->link); + + DBG(("%s: releasing cached handle=%d\n", __FUNCTION__, c->bo ? c->bo->handle : 0)); + assert(c->bo); + kgem_bo_destroy(&sna->kgem, c->bo); + + free(c); + } + priv->cache_size = size; + } + + reuse = size == get_private(back)->size; + if (reuse) + reuse = get_private(back)->bo->scanout == use_scanout(sna, draw, priv); + DBG(("%s: reuse backbuffer? %d\n", __FUNCTION__, reuse)); if (reuse) { bo = get_private(back)->bo; assert(bo->refcnt); - DBG(("%s: back buffer handle=%d, scanout?=%d, refcnt=%d\n", - __FUNCTION__, bo->handle, bo->active_scanout, get_private(back)->refcnt)); + DBG(("%s: back buffer handle=%d, active?=%d, refcnt=%d\n", + __FUNCTION__, bo->handle, bo->active_scanout, get_private(back)->refcnt)); if (bo->active_scanout == 0) { DBG(("%s: reuse unattached back\n", __FUNCTION__)); get_private(back)->stale = false; @@ -153,24 +278,37 @@ sna_dri2_get_back(struct sna *sna, } bo = NULL; - if (info) { - struct dri_bo *c; - list_for_each_entry(c, &info->cache, link) { - if (c->bo && c->bo->scanout == 0) { - bo = c->bo; - name = c->name; - DBG(("%s: reuse cache handle=%d\n", __FUNCTION__, bo->handle)); - list_move_tail(&c->link, &info->cache); - c->bo = NULL; + list_for_each_entry(c, &priv->cache, link) { + DBG(("%s: cache: handle=%d, active=%d\n", + __FUNCTION__, c->bo ? c->bo->handle : 0, c->bo ? c->bo->active_scanout : -1)); + assert(c->bo); + if (c->bo->active_scanout == 0) { + _list_del(&c->link); + if (c->bo == NULL) { + free(c); + goto out; } + bo = c->bo; + name = c->name; + flags = c->flags; + DBG(("%s: reuse cache handle=%d, name=%d, flags=%d\n", __FUNCTION__, bo->handle, name, flags)); + c->bo = NULL; + break; } } if (bo == NULL) { DBG(("%s: allocating new backbuffer\n", __FUNCTION__)); + flags = CREATE_EXACT; + + if (use_scanout(sna, draw, priv)) { + DBG(("%s: requesting scanout compatible back\n", __FUNCTION__)); + flags |= CREATE_SCANOUT; + } + bo = kgem_create_2d(&sna->kgem, draw->width, draw->height, draw->bitsPerPixel, get_private(back)->bo->tiling, - get_private(back)->bo->scanout ? CREATE_SCANOUT : 0); + flags); if (bo == NULL) return; @@ -179,30 +317,42 @@ sna_dri2_get_back(struct sna *sna, kgem_bo_destroy(&sna->kgem, bo); return; } + + flags = 0; + if (USE_ASYNC_SWAP && back->flags) { + BoxRec box; + + box.x1 = 0; + box.y1 = 0; + box.x2 = draw->width; + box.y2 = draw->height; + + DBG(("%s: filling new buffer with old back\n", __FUNCTION__)); + if (sna->render.copy_boxes(sna, GXcopy, + draw, get_private(back)->bo, 0, 0, + draw, bo, 0, 0, + &box, 1, COPY_LAST | COPY_DRI)) + flags = back->flags; + } } assert(bo->active_scanout == 0); - if (info && reuse) { - bool found = false; - struct dri_bo *c; - - list_for_each_entry_reverse(c, &info->cache, link) { - if (c->bo == NULL) { - found = true; - _list_del(&c->link); - break; - } - } - if (!found) + if (reuse && get_private(back)->bo->refcnt == 1 + get_private(back)->bo->active_scanout) { + if (&c->link == &priv->cache) c = malloc(sizeof(*c)); if (c != NULL) { c->bo = ref(get_private(back)->bo); c->name = back->name; - list_add(&c->link, &info->cache); - DBG(("%s: cacheing handle=%d (name=%d)\n", __FUNCTION__, c->bo->handle, c->name)); + c->flags = back->flags; + list_add(&c->link, &priv->cache); + DBG(("%s: caching handle=%d (name=%d, flags=%d, active_scanout=%d)\n", __FUNCTION__, c->bo->handle, c->name, c->flags, c->bo->active_scanout)); } + } else { + if (&c->link != &priv->cache) + free(c); } + assert(bo->active_scanout == 0); assert(bo != get_private(back)->bo); kgem_bo_destroy(&sna->kgem, get_private(back)->bo); @@ -210,21 +360,13 @@ sna_dri2_get_back(struct sna *sna, get_private(back)->size = draw->height << 16 | draw->width; back->pitch = bo->pitch; back->name = name; + back->flags = flags; - get_private(back)->stale = false; -} - -struct dri2_window { - DRI2BufferPtr front; - struct sna_dri2_event *chain; - xf86CrtcPtr crtc; - int64_t msc_delta; -}; + assert(back->pitch); + assert(back->name); -static struct dri2_window *dri2_window(WindowPtr win) -{ - assert(win->drawable.type != DRAWABLE_PIXMAP); - return ((void **)__get_private(win, sna_window_key))[1]; +out: + get_private(back)->stale = false; } static struct sna_dri2_event * @@ -232,21 +374,25 @@ dri2_chain(DrawablePtr d) { struct dri2_window *priv = dri2_window((WindowPtr)d); assert(priv != NULL); + assert(priv->chain == NULL || priv->chain->chained); return priv->chain; } inline static DRI2BufferPtr dri2_window_get_front(WindowPtr win) { struct dri2_window *priv = dri2_window(win); + assert(priv->front == NULL || get_private(priv->front)->bo->active_scanout); return priv ? priv->front : NULL; } #else inline static void *dri2_window_get_front(WindowPtr win) { return NULL; } +#define APPLY_DAMAGE 1 #endif #if DRI2INFOREC_VERSION < 6 #define xorg_can_triple_buffer() 0 #define swap_limit(d, l) false +#define mark_stale(b) #else @@ -273,6 +419,8 @@ mark_stale(DRI2BufferPtr back) * stale frame. (This is mostly useful for tracking down * driver bugs!) */ + DBG(("%s(handle=%d) => %d\n", __FUNCTION__, + get_private(back)->bo->handle, xorg_can_triple_buffer())); get_private(back)->stale = xorg_can_triple_buffer(); } @@ -286,21 +434,29 @@ sna_dri2_swap_limit_validate(DrawablePtr draw, int swap_limit) static void sna_dri2_reuse_buffer(DrawablePtr draw, DRI2BufferPtr buffer) { + struct sna *sna = to_sna_from_drawable(draw); + DBG(("%s: reusing buffer pixmap=%ld, attachment=%d, handle=%d, name=%d\n", __FUNCTION__, get_drawable_pixmap(draw)->drawable.serialNumber, buffer->attachment, get_private(buffer)->bo->handle, buffer->name)); assert(get_private(buffer)->refcnt); - assert(get_private(buffer)->bo->refcnt > get_private(buffer)->bo->active_scanout); + assert(get_private(buffer)->bo->refcnt >= get_private(buffer)->bo->active_scanout); + assert(kgem_bo_flink(&sna->kgem, get_private(buffer)->bo) == buffer->name); if (buffer->attachment == DRI2BufferBackLeft && draw->type != DRAWABLE_PIXMAP) { - DBG(("%s: replacing back buffer\n", __FUNCTION__)); - sna_dri2_get_back(to_sna_from_drawable(draw), draw, buffer, dri2_chain(draw)); + DBG(("%s: replacing back buffer on window %ld\n", __FUNCTION__, draw->id)); + sna_dri2_get_back(sna, draw, buffer); - assert(kgem_bo_flink(&to_sna_from_drawable(draw)->kgem, get_private(buffer)->bo) == buffer->name); assert(get_private(buffer)->bo->refcnt); assert(get_private(buffer)->bo->active_scanout == 0); + assert(kgem_bo_flink(&sna->kgem, get_private(buffer)->bo) == buffer->name); + DBG(("%s: reusing back buffer handle=%d, name=%d, pitch=%d, age=%d\n", + __FUNCTION__, get_private(buffer)->bo->handle, + buffer->name, buffer->pitch, buffer->flags)); } + + kgem_bo_submit(&sna->kgem, get_private(buffer)->bo); } static bool swap_limit(DrawablePtr draw, int limit) @@ -314,11 +470,6 @@ static bool swap_limit(DrawablePtr draw, int limit) } #endif -#if DRI2INFOREC_VERSION < 10 -#undef USE_ASYNC_SWAP -#define USE_ASYNC_SWAP 0 -#endif - #define COLOR_PREFER_TILING_Y 0 /* Prefer to enable TILING_Y if this buffer will never be a @@ -328,6 +479,9 @@ static uint32_t color_tiling(struct sna *sna, DrawablePtr draw) { uint32_t tiling; + if (!sna->kgem.can_fence) + return I915_TILING_NONE; + if (COLOR_PREFER_TILING_Y && (draw->width != sna->front->drawable.width || draw->height != sna->front->drawable.height)) @@ -355,7 +509,6 @@ static struct kgem_bo *sna_pixmap_set_dri(struct sna *sna, PixmapPtr pixmap) { struct sna_pixmap *priv; - int tiling; DBG(("%s: attaching DRI client to pixmap=%ld\n", __FUNCTION__, pixmap->drawable.serialNumber)); @@ -373,31 +526,29 @@ static struct kgem_bo *sna_pixmap_set_dri(struct sna *sna, return NULL; } - assert(priv->flush == false); + assert(priv->flush == false || priv->pinned & PIN_DRI3); + assert(priv->gpu_bo->flush == false || priv->pinned & PIN_DRI3); assert(priv->cpu_damage == NULL); assert(priv->gpu_bo); assert(priv->gpu_bo->proxy == NULL); - assert(priv->gpu_bo->flush == false); - - tiling = color_tiling(sna, &pixmap->drawable); - if (tiling < 0) - tiling = -tiling; - if (priv->gpu_bo->tiling != tiling) - sna_pixmap_change_tiling(pixmap, tiling); - return priv->gpu_bo; -} + if (!kgem_bo_is_fenced(&sna->kgem, priv->gpu_bo)) { + if (priv->gpu_bo->tiling && + !sna_pixmap_change_tiling(pixmap, I915_TILING_NONE)) { + DBG(("%s: failed to discard tiling (%d) for DRI2 protocol\n", __FUNCTION__, priv->gpu_bo->tiling)); + return NULL; + } + } else { + int tiling = color_tiling(sna, &pixmap->drawable); + if (tiling < 0) + tiling = -tiling; + if (priv->gpu_bo->tiling < tiling && !priv->gpu_bo->scanout) + sna_pixmap_change_tiling(pixmap, tiling); + } -pure static inline void *sna_pixmap_get_buffer(PixmapPtr pixmap) -{ - assert(pixmap->refcnt); - return ((void **)__get_private(pixmap, sna_pixmap_key))[2]; -} + priv->gpu_bo->active_scanout++; -static inline void sna_pixmap_set_buffer(PixmapPtr pixmap, void *ptr) -{ - assert(pixmap->refcnt); - ((void **)__get_private(pixmap, sna_pixmap_key))[2] = ptr; + return priv->gpu_bo; } void @@ -422,13 +573,18 @@ sna_dri2_pixmap_update_bo(struct sna *sna, PixmapPtr pixmap, struct kgem_bo *bo) if (private->bo == bo) return; + assert(private->bo->active_scanout > 0); + private->bo->active_scanout--; + DBG(("%s: dropping flush hint from handle=%d\n", __FUNCTION__, private->bo->handle)); private->bo->flush = false; kgem_bo_destroy(&sna->kgem, private->bo); + buffer->name = kgem_bo_flink(&sna->kgem, bo); buffer->pitch = bo->pitch; private->bo = ref(bo); + bo->active_scanout++; DBG(("%s: adding flush hint to handle=%d\n", __FUNCTION__, bo->handle)); bo->flush = true; @@ -449,9 +605,9 @@ sna_dri2_create_buffer(DrawablePtr draw, struct sna_dri2_private *private; PixmapPtr pixmap; struct kgem_bo *bo; - unsigned flags = 0; + unsigned bpp = format ?: draw->bitsPerPixel; + unsigned flags = CREATE_EXACT; uint32_t size; - int bpp; DBG(("%s pixmap=%ld, (attachment=%d, format=%d, drawable=%dx%d), window?=%d\n", __FUNCTION__, @@ -468,11 +624,11 @@ sna_dri2_create_buffer(DrawablePtr draw, if (draw->type != DRAWABLE_PIXMAP) buffer = dri2_window_get_front((WindowPtr)draw); if (buffer == NULL) - buffer = sna_pixmap_get_buffer(pixmap); + buffer = (DRI2Buffer2Ptr)sna_pixmap_get_buffer(pixmap); if (buffer) { private = get_private(buffer); - DBG(("%s: reusing front buffer attachment, win=%lu %dx%d, pixmap=%ld [%ld] %dx%d, handle=%d, name=%d\n", + DBG(("%s: reusing front buffer attachment, win=%lu %dx%d, pixmap=%ld [%ld] %dx%d, handle=%d, name=%d, active_scanout=%d\n", __FUNCTION__, draw->type != DRAWABLE_PIXMAP ? (long)draw->id : (long)0, draw->width, draw->height, @@ -480,12 +636,22 @@ sna_dri2_create_buffer(DrawablePtr draw, private->pixmap->drawable.serialNumber, pixmap->drawable.width, pixmap->drawable.height, - private->bo->handle, buffer->name)); + private->bo->handle, buffer->name, + private->bo->active_scanout)); + assert(buffer->attachment == DRI2BufferFrontLeft); assert(private->pixmap == pixmap); assert(sna_pixmap(pixmap)->flush); assert(sna_pixmap(pixmap)->pinned & PIN_DRI2); assert(kgem_bo_flink(&sna->kgem, private->bo) == buffer->name); + assert(private->bo->pitch == buffer->pitch); + assert(private->bo->active_scanout); + + sna_pixmap_move_to_gpu(pixmap, + MOVE_READ | + __MOVE_FORCE | + __MOVE_DRI); + kgem_bo_submit(&sna->kgem, private->bo); private->refcnt++; return buffer; @@ -498,7 +664,6 @@ sna_dri2_create_buffer(DrawablePtr draw, assert(sna_pixmap(pixmap) != NULL); bo = ref(bo); - bpp = pixmap->drawable.bitsPerPixel; if (pixmap == sna->front && !(sna->flags & SNA_LINEAR_FB)) flags |= CREATE_SCANOUT; DBG(("%s: attaching to front buffer %dx%d [%p:%d], scanout? %d\n", @@ -506,6 +671,7 @@ sna_dri2_create_buffer(DrawablePtr draw, pixmap->drawable.width, pixmap->drawable.height, pixmap, pixmap->refcnt, flags & CREATE_SCANOUT)); size = (uint32_t)pixmap->drawable.height << 16 | pixmap->drawable.width; + bpp = pixmap->drawable.bitsPerPixel; break; case DRI2BufferBackLeft: @@ -514,6 +680,7 @@ sna_dri2_create_buffer(DrawablePtr draw, flags |= CREATE_SCANOUT; if (draw->width == sna->front->drawable.width && draw->height == sna->front->drawable.height && + draw->bitsPerPixel == bpp && (sna->flags & (SNA_LINEAR_FB | SNA_NO_WAIT | SNA_NO_FLIP)) == 0) flags |= CREATE_SCANOUT; } @@ -521,7 +688,6 @@ sna_dri2_create_buffer(DrawablePtr draw, case DRI2BufferFrontRight: case DRI2BufferFakeFrontLeft: case DRI2BufferFakeFrontRight: - bpp = draw->bitsPerPixel; DBG(("%s: creating back buffer %dx%d, suitable for scanout? %d\n", __FUNCTION__, draw->width, draw->height, @@ -530,7 +696,7 @@ sna_dri2_create_buffer(DrawablePtr draw, bo = kgem_create_2d(&sna->kgem, draw->width, draw->height, - draw->bitsPerPixel, + bpp, color_tiling(sna, draw), flags); break; @@ -558,7 +724,6 @@ sna_dri2_create_buffer(DrawablePtr draw, * not understand W tiling and the GTT is incapable of * W fencing. */ - bpp = format ? format : draw->bitsPerPixel; bpp *= 2; bo = kgem_create_2d(&sna->kgem, ALIGN(draw->width, 64), @@ -570,7 +735,6 @@ sna_dri2_create_buffer(DrawablePtr draw, case DRI2BufferDepthStencil: case DRI2BufferHiz: case DRI2BufferAccum: - bpp = format ? format : draw->bitsPerPixel, bo = kgem_create_2d(&sna->kgem, draw->width, draw->height, bpp, other_tiling(sna, draw), @@ -614,7 +778,7 @@ sna_dri2_create_buffer(DrawablePtr draw, pixmap->refcnt++; priv = sna_pixmap(pixmap); - assert(priv->flush == false); + assert(priv->flush == false || priv->pinned & PIN_DRI3); assert((priv->pinned & PIN_DRI2) == 0); /* Don't allow this named buffer to be replaced */ @@ -630,17 +794,17 @@ sna_dri2_create_buffer(DrawablePtr draw, if (priv->gpu_bo->exec) sna->kgem.flush = 1; - priv->flush |= 1; + priv->flush |= FLUSH_READ; if (draw->type == DRAWABLE_PIXMAP) { /* DRI2 renders directly into GLXPixmaps, treat as hostile */ kgem_bo_unclean(&sna->kgem, priv->gpu_bo); sna_damage_all(&priv->gpu_damage, pixmap); priv->clear = false; priv->cpu = false; - priv->flush |= 2; + priv->flush |= FLUSH_WRITE; } - sna_accel_watch_flush(sna, 1); + sna_watch_flush(sna, 1); } return buffer; @@ -651,16 +815,80 @@ err: return NULL; } -static void _sna_dri2_destroy_buffer(struct sna *sna, DRI2Buffer2Ptr buffer) +static void +sna_dri2_cache_bo(struct sna *sna, + DrawablePtr draw, + struct kgem_bo *bo, + uint32_t name, + uint32_t size, + uint32_t flags) +{ + struct dri_bo *c; + + DBG(("%s(handle=%d, name=%d)\n", __FUNCTION__, bo->handle, name)); + + if (draw == NULL) { + DBG(("%s: no draw, releasing handle=%d\n", + __FUNCTION__, bo->handle)); + goto err; + } + + if (draw->type == DRAWABLE_PIXMAP) { + DBG(("%s: not a window, releasing handle=%d\n", + __FUNCTION__, bo->handle)); + goto err; + } + + if (bo->refcnt > 1 + bo->active_scanout) { + DBG(("%s: multiple references [%d], releasing handle\n", + __FUNCTION__, bo->refcnt, bo->handle)); + goto err; + } + + if ((draw->height << 16 | draw->width) != size) { + DBG(("%s: wrong size [%dx%d], releasing handle\n", + __FUNCTION__, + size & 0xffff, size >> 16, + bo->handle)); + goto err; + } + + if (bo->scanout && front_pitch(draw) != bo->pitch) { + DBG(("%s: scanout with pitch change [%d != %d], releasing handle\n", + __FUNCTION__, bo->pitch, front_pitch(draw), bo->handle)); + goto err; + } + + c = malloc(sizeof(*c)); + if (!c) + goto err; + + DBG(("%s: caching handle=%d (name=%d, flags=%d, active_scanout=%d)\n", __FUNCTION__, bo->handle, name, flags, bo->active_scanout)); + + c->bo = bo; + c->name = name; + c->flags = flags; + list_add(&c->link, &dri2_window((WindowPtr)draw)->cache); + return; + +err: + kgem_bo_destroy(&sna->kgem, bo); +} + +static void _sna_dri2_destroy_buffer(struct sna *sna, + DrawablePtr draw, + DRI2Buffer2Ptr buffer) { struct sna_dri2_private *private = get_private(buffer); if (buffer == NULL) return; - DBG(("%s: %p [handle=%d] -- refcnt=%d, pixmap=%ld\n", + DBG(("%s: %p [handle=%d] -- refcnt=%d, draw=%ld, pixmap=%ld, proxy?=%d\n", __FUNCTION__, buffer, private->bo->handle, private->refcnt, - private->pixmap ? private->pixmap->drawable.serialNumber : 0)); + draw ? draw->id : 0, + private->pixmap ? private->pixmap->drawable.serialNumber : 0, + private->proxy != NULL)); assert(private->refcnt > 0); if (--private->refcnt) return; @@ -669,7 +897,10 @@ static void _sna_dri2_destroy_buffer(struct sna *sna, DRI2Buffer2Ptr buffer) if (private->proxy) { DBG(("%s: destroying proxy\n", __FUNCTION__)); - _sna_dri2_destroy_buffer(sna, private->proxy); + assert(private->bo->active_scanout > 0); + private->bo->active_scanout--; + + _sna_dri2_destroy_buffer(sna, draw, private->proxy); private->pixmap = NULL; } @@ -683,6 +914,11 @@ static void _sna_dri2_destroy_buffer(struct sna *sna, DRI2Buffer2Ptr buffer) assert(priv->pinned & PIN_DRI2); assert(priv->flush); + DBG(("%s: removing active_scanout=%d from pixmap handle=%d\n", + __FUNCTION__, priv->gpu_bo->active_scanout, priv->gpu_bo->handle)); + assert(priv->gpu_bo->active_scanout > 0); + priv->gpu_bo->active_scanout--; + /* Undo the DRI markings on this pixmap */ DBG(("%s: releasing last DRI pixmap=%ld, scanout?=%d\n", __FUNCTION__, @@ -692,28 +928,34 @@ static void _sna_dri2_destroy_buffer(struct sna *sna, DRI2Buffer2Ptr buffer) list_del(&priv->flush_list); DBG(("%s: dropping flush hint from handle=%d\n", __FUNCTION__, private->bo->handle)); - priv->gpu_bo->flush = false; priv->pinned &= ~PIN_DRI2; - priv->flush = false; - sna_accel_watch_flush(sna, -1); + if ((priv->pinned & PIN_DRI3) == 0) { + priv->gpu_bo->flush = false; + priv->flush = false; + } + sna_watch_flush(sna, -1); sna_pixmap_set_buffer(pixmap, NULL); pixmap->drawable.pScreen->DestroyPixmap(pixmap); } - assert(private->bo->flush == false); - kgem_bo_destroy(&sna->kgem, private->bo); + sna_dri2_cache_bo(sna, draw, + private->bo, + buffer->name, + private->size, + buffer->flags); free(buffer); } static void sna_dri2_destroy_buffer(DrawablePtr draw, DRI2Buffer2Ptr buffer) { - _sna_dri2_destroy_buffer(to_sna_from_drawable(draw), buffer); + _sna_dri2_destroy_buffer(to_sna_from_drawable(draw), draw, buffer); } static DRI2BufferPtr sna_dri2_reference_buffer(DRI2BufferPtr buffer) { + assert(get_private(buffer)->refcnt > 0); get_private(buffer)->refcnt++; return buffer; } @@ -746,10 +988,9 @@ static void set_bo(PixmapPtr pixmap, struct kgem_bo *bo) { struct sna *sna = to_sna_from_pixmap(pixmap); struct sna_pixmap *priv = sna_pixmap(pixmap); - RegionRec region; - DBG(("%s: pixmap=%ld, handle=%d\n", - __FUNCTION__, pixmap->drawable.serialNumber, bo->handle)); + DBG(("%s: pixmap=%ld, handle=%d (old handle=%d)\n", + __FUNCTION__, pixmap->drawable.serialNumber, bo->handle, priv->gpu_bo->handle)); assert(pixmap->drawable.width * pixmap->drawable.bitsPerPixel <= 8*bo->pitch); assert(pixmap->drawable.height * bo->pitch <= kgem_bo_size(bo)); @@ -758,21 +999,34 @@ static void set_bo(PixmapPtr pixmap, struct kgem_bo *bo) assert((priv->pinned & (PIN_PRIME | PIN_DRI3)) == 0); assert(priv->flush); - /* Post damage on the new front buffer so that listeners, such - * as DisplayLink know take a copy and shove it over the USB, - * also for software cursors and the like. - */ - region.extents.x1 = region.extents.y1 = 0; - region.extents.x2 = pixmap->drawable.width; - region.extents.y2 = pixmap->drawable.height; - region.data = NULL; - DamageRegionAppend(&pixmap->drawable, ®ion); + if (APPLY_DAMAGE) { + RegionRec region; + + /* Post damage on the new front buffer so that listeners, such + * as DisplayLink know take a copy and shove it over the USB, + * also for software cursors and the like. + */ + region.extents.x1 = region.extents.y1 = 0; + region.extents.x2 = pixmap->drawable.width; + region.extents.y2 = pixmap->drawable.height; + region.data = NULL; + + /* + * Eeek, beware the sw cursor copying to the old bo + * causing recursion and mayhem. + */ + DBG(("%s: marking whole pixmap as damaged\n", __FUNCTION__)); + sna->ignore_copy_area = sna->flags & SNA_TEAR_FREE; + DamageRegionAppend(&pixmap->drawable, ®ion); + } damage(pixmap, priv, NULL); assert(bo->refcnt); - if (priv->move_to_gpu) + if (priv->move_to_gpu) { + DBG(("%s: applying final/discard move-to-gpu\n", __FUNCTION__)); priv->move_to_gpu(sna, priv, 0); + } if (priv->gpu_bo != bo) { DBG(("%s: dropping flush hint from handle=%d\n", __FUNCTION__, priv->gpu_bo->handle)); priv->gpu_bo->flush = false; @@ -792,8 +1046,27 @@ static void set_bo(PixmapPtr pixmap, struct kgem_bo *bo) bo->domain = DOMAIN_NONE; assert(bo->flush); - DamageRegionProcessPending(&pixmap->drawable); + if (APPLY_DAMAGE) { + sna->ignore_copy_area = false; + DamageRegionProcessPending(&pixmap->drawable); + } +} + +#if defined(__GNUC__) +#define popcount(x) __builtin_popcount(x) +#else +static int popcount(unsigned int x) +{ + int count = 0; + + while (x) { + count += x&1; + x >>= 1; + } + + return count; } +#endif static void sna_dri2_select_mode(struct sna *sna, struct kgem_bo *dst, struct kgem_bo *src, bool sync) { @@ -823,6 +1096,12 @@ static void sna_dri2_select_mode(struct sna *sna, struct kgem_bo *dst, struct kg return; } + if (sna->render_state.gt < 2 && sna->kgem.has_semaphores) { + DBG(("%s: small GT [%d], not forcing selection\n", + __FUNCTION__, sna->render_state.gt)); + return; + } + VG_CLEAR(busy); busy.handle = src->handle; if (drmIoctl(sna->kgem.fd, DRM_IOCTL_I915_GEM_BUSY, &busy)) @@ -860,9 +1139,11 @@ static void sna_dri2_select_mode(struct sna *sna, struct kgem_bo *dst, struct kg * the cost of the query. */ mode = KGEM_RENDER; - if (busy.busy & (0xfffe << 16)) + if ((busy.busy & 0xffff) == I915_EXEC_BLT) mode = KGEM_BLT; - kgem_bo_mark_busy(&sna->kgem, busy.handle == src->handle ? src : dst, mode); + kgem_bo_mark_busy(&sna->kgem, + busy.handle == src->handle ? src : dst, + mode); _kgem_set_mode(&sna->kgem, mode); } @@ -871,10 +1152,13 @@ static bool is_front(int attachment) return attachment == DRI2BufferFrontLeft; } +#define DRI2_SYNC 0x1 +#define DRI2_DAMAGE 0x2 +#define DRI2_BO 0x4 static struct kgem_bo * __sna_dri2_copy_region(struct sna *sna, DrawablePtr draw, RegionPtr region, DRI2BufferPtr src, DRI2BufferPtr dst, - bool sync) + unsigned flags) { PixmapPtr pixmap = get_drawable_pixmap(draw); DrawableRec scratch, *src_draw = &pixmap->drawable, *dst_draw = &pixmap->drawable; @@ -886,7 +1170,7 @@ __sna_dri2_copy_region(struct sna *sna, DrawablePtr draw, RegionPtr region, struct kgem_bo *dst_bo; const BoxRec *boxes; int16_t dx, dy, sx, sy; - unsigned flags; + unsigned hint; int n; /* To hide a stale DRI2Buffer, one may choose to substitute @@ -962,8 +1246,9 @@ __sna_dri2_copy_region(struct sna *sna, DrawablePtr draw, RegionPtr region, } } } else - sync = false; + flags &= ~DRI2_SYNC; + scratch.pScreen = draw->pScreen; scratch.x = scratch.y = 0; scratch.width = scratch.height = 0; scratch.depth = draw->depth; @@ -971,6 +1256,7 @@ __sna_dri2_copy_region(struct sna *sna, DrawablePtr draw, RegionPtr region, src_bo = src_priv->bo; assert(src_bo->refcnt); + kgem_bo_unclean(&sna->kgem, src_bo); if (is_front(src->attachment)) { struct sna_pixmap *priv; @@ -987,11 +1273,12 @@ __sna_dri2_copy_region(struct sna *sna, DrawablePtr draw, RegionPtr region, scratch.height = src_priv->size >> 16; src_draw = &scratch; - DBG(("%s: source size %dx%d, region size %dx%d\n", + DBG(("%s: source size %dx%d, region size %dx%d, src offset %dx%d\n", __FUNCTION__, scratch.width, scratch.height, clip.extents.x2 - clip.extents.x1, - clip.extents.y2 - clip.extents.y1)); + clip.extents.y2 - clip.extents.y1, + -sx, -sy)); source.extents.x1 = -sx; source.extents.y1 = -sy; @@ -1002,6 +1289,10 @@ __sna_dri2_copy_region(struct sna *sna, DrawablePtr draw, RegionPtr region, assert(region == NULL || region == &clip); pixman_region_intersect(&clip, &clip, &source); + if (!pixman_region_not_empty(&clip)) { + DBG(("%s: region doesn't overlap pixmap\n", __FUNCTION__)); + return NULL; + } } dst_bo = dst_priv->bo; @@ -1013,12 +1304,12 @@ __sna_dri2_copy_region(struct sna *sna, DrawablePtr draw, RegionPtr region, /* Preserve the CRTC shadow overrides */ sna_shadow_steal_crtcs(sna, &shadow); - flags = MOVE_WRITE | __MOVE_FORCE; + hint = MOVE_WRITE | __MOVE_FORCE; if (clip.data) - flags |= MOVE_READ; + hint |= MOVE_READ; assert(region == NULL || region == &clip); - priv = sna_pixmap_move_area_to_gpu(pixmap, &clip.extents, flags); + priv = sna_pixmap_move_area_to_gpu(pixmap, &clip.extents, hint); if (priv) { damage(pixmap, priv, region); dst_bo = priv->gpu_bo; @@ -1050,20 +1341,20 @@ __sna_dri2_copy_region(struct sna *sna, DrawablePtr draw, RegionPtr region, assert(region == NULL || region == &clip); pixman_region_intersect(&clip, &clip, &target); - sync = false; + flags &= ~DRI2_SYNC; } if (!wedged(sna)) { xf86CrtcPtr crtc; crtc = NULL; - if (sync && sna_pixmap_is_scanout(sna, pixmap)) + if (flags & DRI2_SYNC && sna_pixmap_is_scanout(sna, pixmap)) crtc = sna_covering_crtc(sna, &clip.extents, NULL); sna_dri2_select_mode(sna, dst_bo, src_bo, crtc != NULL); - sync = (crtc != NULL&& - sna_wait_for_scanline(sna, pixmap, crtc, - &clip.extents)); + if (crtc == NULL || + !sna_wait_for_scanline(sna, pixmap, crtc, &clip.extents)) + flags &= ~DRI2_SYNC; } if (region) { @@ -1075,8 +1366,11 @@ __sna_dri2_copy_region(struct sna *sna, DrawablePtr draw, RegionPtr region, boxes = &clip.extents; n = 1; } - DamageRegionAppend(&pixmap->drawable, region); - + if (APPLY_DAMAGE || flags & DRI2_DAMAGE) { + DBG(("%s: marking region as damaged\n", __FUNCTION__)); + sna->ignore_copy_area = sna->flags & SNA_TEAR_FREE; + DamageRegionAppend(&pixmap->drawable, region); + } DBG(("%s: copying [(%d, %d), (%d, %d)]x%d src=(%d, %d), dst=(%d, %d)\n", __FUNCTION__, @@ -1084,29 +1378,36 @@ __sna_dri2_copy_region(struct sna *sna, DrawablePtr draw, RegionPtr region, boxes[0].x2, boxes[0].y2, n, sx, sy, dx, dy)); - flags = COPY_LAST; - if (sync) - flags |= COPY_SYNC; + hint = COPY_LAST | COPY_DRI; + if (flags & DRI2_SYNC) + hint |= COPY_SYNC; if (!sna->render.copy_boxes(sna, GXcopy, src_draw, src_bo, sx, sy, dst_draw, dst_bo, dx, dy, - boxes, n, flags)) + boxes, n, hint)) memcpy_copy_boxes(sna, GXcopy, src_draw, src_bo, sx, sy, dst_draw, dst_bo, dx, dy, - boxes, n, flags); - - DBG(("%s: flushing? %d\n", __FUNCTION__, sync)); - if (sync) { /* STAT! */ - struct kgem_request *rq = sna->kgem.next_request; - kgem_submit(&sna->kgem); - if (rq->bo) { - bo = ref(rq->bo); - DBG(("%s: recording sync fence handle=%d\n", __FUNCTION__, bo->handle)); + boxes, n, hint); + + sna->needs_dri_flush = true; + if (flags & (DRI2_SYNC | DRI2_BO)) { /* STAT! */ + struct kgem_request *rq = RQ(dst_bo->rq); + if (rq && rq != (void *)&sna->kgem) { + if (rq->bo == NULL) + kgem_submit(&sna->kgem); + if (rq->bo) { /* Becareful in case the gpu is wedged */ + bo = ref(rq->bo); + DBG(("%s: recording sync fence handle=%d\n", + __FUNCTION__, bo->handle)); + } } } - DamageRegionProcessPending(&pixmap->drawable); + if (APPLY_DAMAGE || flags & DRI2_DAMAGE) { + sna->ignore_copy_area = false; + DamageRegionProcessPending(&pixmap->drawable); + } if (clip.data) pixman_region_fini(&clip); @@ -1142,6 +1443,8 @@ sna_dri2_copy_region(DrawablePtr draw, assert(get_private(src)->refcnt); assert(get_private(dst)->refcnt); + assert(get_private(src)->bo != get_private(dst)->bo); + assert(get_private(src)->bo->refcnt); assert(get_private(dst)->bo->refcnt); @@ -1151,7 +1454,7 @@ sna_dri2_copy_region(DrawablePtr draw, region->extents.x2, region->extents.y2, region_num_rects(region))); - __sna_dri2_copy_region(sna, draw, region, src, dst, false); + __sna_dri2_copy_region(sna, draw, region, src, dst, DRI2_DAMAGE); } inline static uint32_t pipe_select(int pipe) @@ -1161,6 +1464,7 @@ inline static uint32_t pipe_select(int pipe) * we can safely ignore the capability check - if we have more * than two pipes, we can assume that they are fully supported. */ + assert(pipe < _DRM_VBLANK_HIGH_CRTC_MASK); if (pipe > 1) return pipe << DRM_VBLANK_HIGH_CRTC_SHIFT; else if (pipe > 0) @@ -1169,15 +1473,53 @@ inline static uint32_t pipe_select(int pipe) return 0; } -static inline int sna_wait_vblank(struct sna *sna, union drm_wait_vblank *vbl, int pipe) +static inline bool sna_next_vblank(struct sna_dri2_event *info) { - DBG(("%s(pipe=%d, waiting until seq=%u%s)\n", - __FUNCTION__, pipe, vbl->request.sequence, - vbl->request.type & DRM_VBLANK_RELATIVE ? " [relative]" : "")); - assert(pipe != -1); + union drm_wait_vblank vbl; - vbl->request.type |= pipe_select(pipe); - return drmIoctl(sna->kgem.fd, DRM_IOCTL_WAIT_VBLANK, vbl); + DBG(("%s(pipe=%d, waiting until next vblank)\n", + __FUNCTION__, info->pipe)); + assert(info->pipe != -1); + + VG_CLEAR(vbl); + vbl.request.type = + DRM_VBLANK_RELATIVE | + DRM_VBLANK_EVENT | + pipe_select(info->pipe); + vbl.request.sequence = 1; + vbl.request.signal = (uintptr_t)info; + + assert(!info->queued); + if (drmIoctl(info->sna->kgem.fd, DRM_IOCTL_WAIT_VBLANK, &vbl)) + return false; + + info->queued = true; + return true; +} + +static inline bool sna_wait_vblank(struct sna_dri2_event *info, + unsigned seq) +{ + union drm_wait_vblank vbl; + + DBG(("%s(pipe=%d, waiting until vblank %u)\n", + __FUNCTION__, info->pipe, seq)); + assert(info->pipe != -1); + + VG_CLEAR(vbl); + vbl.request.type = + DRM_VBLANK_ABSOLUTE | + DRM_VBLANK_EVENT | + pipe_select(info->pipe); + vbl.request.sequence = seq; + vbl.request.signal = (uintptr_t)info; + + assert(!info->queued); + if (drmIoctl(info->sna->kgem.fd, DRM_IOCTL_WAIT_VBLANK, &vbl)) + return false; + + info->queued = true; + return true; } #if DRI2INFOREC_VERSION >= 4 @@ -1195,6 +1537,7 @@ draw_current_msc(DrawablePtr draw, xf86CrtcPtr crtc, uint64_t msc) { struct dri2_window *priv; + assert(draw); if (draw->type != DRAWABLE_WINDOW) return msc; @@ -1206,6 +1549,9 @@ draw_current_msc(DrawablePtr draw, xf86CrtcPtr crtc, uint64_t msc) priv->crtc = crtc; priv->msc_delta = 0; priv->chain = NULL; + priv->scanout = -1; + priv->cache_size = 0; + list_init(&priv->cache); dri2_window_attach((WindowPtr)draw, priv); } } else { @@ -1214,8 +1560,8 @@ draw_current_msc(DrawablePtr draw, xf86CrtcPtr crtc, uint64_t msc) const struct ust_msc *this = sna_crtc_last_swap(crtc); DBG(("%s: Window transferring from pipe=%d [msc=%llu] to pipe=%d [msc=%llu], delta now %lld\n", __FUNCTION__, - sna_crtc_to_pipe(priv->crtc), (long long)last->msc, - sna_crtc_to_pipe(crtc), (long long)this->msc, + sna_crtc_pipe(priv->crtc), (long long)last->msc, + sna_crtc_pipe(crtc), (long long)this->msc, (long long)(priv->msc_delta + this->msc - last->msc))); priv->msc_delta += this->msc - last->msc; priv->crtc = crtc; @@ -1248,57 +1594,119 @@ sna_dri2_get_crtc(DrawablePtr draw) NULL); } -static void -sna_dri2_remove_event(WindowPtr win, struct sna_dri2_event *info) +static void frame_swap_complete(struct sna_dri2_event *frame, int type) { - struct dri2_window *priv; - struct sna_dri2_event *chain; - - assert(win->drawable.type == DRAWABLE_WINDOW); - DBG(("%s: remove[%p] from window %ld, active? %d\n", - __FUNCTION__, info, (long)win->drawable.id, info->draw != NULL)); + const struct ust_msc *swap; - priv = dri2_window(win); - assert(priv); - assert(priv->chain != NULL); + assert(frame->signal); + frame->signal = false; - if (priv->chain == info) { - priv->chain = info->chain; + if (frame->client == NULL) { + DBG(("%s: client already gone\n", __FUNCTION__)); return; } - chain = priv->chain; - while (chain->chain != info) - chain = chain->chain; - assert(chain != info); - assert(info->chain != chain); - chain->chain = info->chain; + assert(frame->draw); + + swap = sna_crtc_last_swap(frame->crtc); + DBG(("%s(type=%d): draw=%ld, pipe=%d, frame=%lld [msc=%lld], tv=%d.%06d\n", + __FUNCTION__, type, (long)frame->draw->id, frame->pipe, + (long long)swap->msc, + (long long)draw_current_msc(frame->draw, frame->crtc, swap->msc), + swap->tv_sec, swap->tv_usec)); + + DRI2SwapComplete(frame->client, frame->draw, + draw_current_msc(frame->draw, frame->crtc, swap->msc), + swap->tv_sec, swap->tv_usec, + type, frame->event_complete, frame->event_data); } -static void -sna_dri2_event_free(struct sna_dri2_event *info) +static void fake_swap_complete(struct sna *sna, ClientPtr client, + DrawablePtr draw, xf86CrtcPtr crtc, + int type, DRI2SwapEventPtr func, void *data) { - DrawablePtr draw = info->draw; + const struct ust_msc *swap; - DBG(("%s(draw?=%d)\n", __FUNCTION__, draw != NULL)); - if (draw && draw->type == DRAWABLE_WINDOW) - sna_dri2_remove_event((WindowPtr)draw, info); + assert(draw); - _sna_dri2_destroy_buffer(info->sna, info->front); - _sna_dri2_destroy_buffer(info->sna, info->back); + if (crtc == NULL) + crtc = sna_primary_crtc(sna); - while (!list_is_empty(&info->cache)) { - struct dri_bo *c; + swap = sna_crtc_last_swap(crtc); + DBG(("%s(type=%d): draw=%ld, pipe=%d, frame=%lld [msc %lld], tv=%d.%06d\n", + __FUNCTION__, type, (long)draw->id, crtc ? sna_crtc_pipe(crtc) : -1, + (long long)swap->msc, + (long long)draw_current_msc(draw, crtc, swap->msc), + swap->tv_sec, swap->tv_usec)); - c = list_first_entry(&info->cache, struct dri_bo, link); - list_del(&c->link); + DRI2SwapComplete(client, draw, + draw_current_msc(draw, crtc, swap->msc), + swap->tv_sec, swap->tv_usec, + type, func, data); +} - DBG(("%s: releasing cached handle=%d\n", __FUNCTION__, c->bo ? c->bo->handle : 0)); - if (c->bo) - kgem_bo_destroy(&info->sna->kgem, c->bo); +static void +sna_dri2_remove_event(struct sna_dri2_event *info) +{ + WindowPtr win = (WindowPtr)info->draw; + struct dri2_window *priv; - free(c); + assert(win->drawable.type == DRAWABLE_WINDOW); + DBG(("%s: remove[%p] from window %ld, active? %d\n", + __FUNCTION__, info, (long)win->drawable.id, info->draw != NULL)); + assert(!info->signal); + + priv = dri2_window(win); + assert(priv); + assert(priv->chain != NULL); + assert(info->chained); + info->chained = false; + + if (priv->chain != info) { + struct sna_dri2_event *chain = priv->chain; + while (chain->chain != info) { + assert(chain->chained); + chain = chain->chain; + } + assert(chain != info); + assert(info->chain != chain); + chain->chain = info->chain; + return; + } + + priv->chain = info->chain; + if (priv->chain == NULL) { + struct dri_bo *c, *tmp; + + c = list_entry(priv->cache.next->next, struct dri_bo, link); + list_for_each_entry_safe_from(c, tmp, &priv->cache, link) { + list_del(&c->link); + + DBG(("%s: releasing cached handle=%d\n", __FUNCTION__, c->bo ? c->bo->handle : 0)); + assert(c->bo); + kgem_bo_destroy(&info->sna->kgem, c->bo); + free(c); + } } +} + +static void +sna_dri2_event_free(struct sna_dri2_event *info) +{ + DBG(("%s(draw?=%d)\n", __FUNCTION__, info->draw != NULL)); + assert(!info->queued); + assert(!info->signal); + assert(info->pending.bo == NULL); + + if (info->sna->dri2.flip_pending == info) + info->sna->dri2.flip_pending = NULL; + assert(info->sna->dri2.flip_pending != info); + if (info->chained) + sna_dri2_remove_event(info); + + assert((info->front == NULL && info->back == NULL) || info->front != info->back); + _sna_dri2_destroy_buffer(info->sna, info->draw, info->front); + _sna_dri2_destroy_buffer(info->sna, info->draw, info->back); if (info->bo) { DBG(("%s: releasing batch handle=%d\n", __FUNCTION__, info->bo->handle)); @@ -1331,15 +1739,26 @@ sna_dri2_client_gone(CallbackListPtr *list, void *closure, void *data) event = list_first_entry(&priv->events, struct sna_dri2_event, link); assert(event->client == client); + list_del(&event->link); + event->signal = false; - if (event->queued) { - if (event->draw) - sna_dri2_remove_event((WindowPtr)event->draw, - event); - event->client = NULL; - event->draw = NULL; - list_del(&event->link); - } else + if (event->pending.bo) { + assert(event->pending.bo->active_scanout > 0); + event->pending.bo->active_scanout--; + + kgem_bo_destroy(&sna->kgem, event->pending.bo); + event->pending.bo = NULL; + } + + if (event->chained) + sna_dri2_remove_event(event); + + event->client = NULL; + event->draw = NULL; + event->keepalive = 1; + assert(!event->signal); + + if (!event->queued) sna_dri2_event_free(event); } @@ -1365,11 +1784,15 @@ static bool add_event_to_client(struct sna_dri2_event *info, struct sna *sna, Cl } static struct sna_dri2_event * -sna_dri2_add_event(struct sna *sna, DrawablePtr draw, ClientPtr client) +sna_dri2_add_event(struct sna *sna, + DrawablePtr draw, + ClientPtr client, + xf86CrtcPtr crtc) { struct dri2_window *priv; struct sna_dri2_event *info, *chain; + assert(draw != NULL); assert(draw->type == DRAWABLE_WINDOW); DBG(("%s: adding event to window %ld)\n", __FUNCTION__, (long)draw->id)); @@ -1382,11 +1805,11 @@ sna_dri2_add_event(struct sna *sna, DrawablePtr draw, ClientPtr client) if (info == NULL) return NULL; - list_init(&info->cache); info->sna = sna; info->draw = draw; - info->crtc = priv->crtc; - info->pipe = sna_crtc_to_pipe(priv->crtc); + info->crtc = crtc; + info->pipe = sna_crtc_pipe(crtc); + info->keepalive = 1; if (!add_event_to_client(info, sna, client)) { free(info); @@ -1394,6 +1817,7 @@ sna_dri2_add_event(struct sna *sna, DrawablePtr draw, ClientPtr client) } assert(priv->chain != info); + info->chained = true; if (priv->chain == NULL) { priv->chain = info; @@ -1409,6 +1833,66 @@ sna_dri2_add_event(struct sna *sna, DrawablePtr draw, ClientPtr client) return info; } +static void decouple_window(WindowPtr win, + struct dri2_window *priv, + struct sna *sna, + bool signal) +{ + if (priv->front) { + DBG(("%s: decouple private front\n", __FUNCTION__)); + assert(priv->crtc); + sna_shadow_unset_crtc(sna, priv->crtc); + + _sna_dri2_destroy_buffer(sna, NULL, priv->front); + priv->front = NULL; + } + + if (priv->chain) { + struct sna_dri2_event *info, *chain; + + DBG(("%s: freeing chain\n", __FUNCTION__)); + + chain = priv->chain; + while ((info = chain)) { + DBG(("%s: freeing event, pending signal? %d, pending swap? handle=%d\n", + __FUNCTION__, info->signal, + info->pending.bo ? info->pending.bo->handle : 0)); + assert(info->draw == &win->drawable); + + if (info->pending.bo) { + if (signal) { + bool was_signalling = info->signal; + info->signal = true; + frame_swap_complete(info, DRI2_EXCHANGE_COMPLETE); + info->signal = was_signalling; + } + assert(info->pending.bo->active_scanout > 0); + info->pending.bo->active_scanout--; + + kgem_bo_destroy(&sna->kgem, info->pending.bo); + info->pending.bo = NULL; + } + + if (info->signal && signal) + frame_swap_complete(info, DRI2_EXCHANGE_COMPLETE); + info->signal = false; + info->draw = NULL; + info->keepalive = 1; + assert(!info->signal); + list_del(&info->link); + + chain = info->chain; + info->chain = NULL; + info->chained = false; + + if (!info->queued) + sna_dri2_event_free(info); + } + + priv->chain = NULL; + } +} + void sna_dri2_decouple_window(WindowPtr win) { struct dri2_window *priv; @@ -1418,50 +1902,34 @@ void sna_dri2_decouple_window(WindowPtr win) return; DBG(("%s: window=%ld\n", __FUNCTION__, win->drawable.id)); + decouple_window(win, priv, to_sna_from_drawable(&win->drawable), true); - if (priv->front) { - struct sna *sna = to_sna_from_drawable(&win->drawable); - assert(priv->crtc); - sna_shadow_unset_crtc(sna, priv->crtc); - _sna_dri2_destroy_buffer(sna, priv->front); - priv->front = NULL; - } + priv->scanout = -1; } void sna_dri2_destroy_window(WindowPtr win) { struct dri2_window *priv; + struct sna *sna; priv = dri2_window(win); if (priv == NULL) return; DBG(("%s: window=%ld\n", __FUNCTION__, win->drawable.id)); + sna = to_sna_from_drawable(&win->drawable); + decouple_window(win, priv, sna, false); - if (priv->front) { - struct sna *sna = to_sna_from_drawable(&win->drawable); - assert(priv->crtc); - sna_shadow_unset_crtc(sna, priv->crtc); - _sna_dri2_destroy_buffer(sna, priv->front); - } - - if (priv->chain) { - struct sna_dri2_event *info, *chain; - - DBG(("%s: freeing chain\n", __FUNCTION__)); - - chain = priv->chain; - while ((info = chain)) { - info->draw = NULL; - info->client = NULL; - list_del(&info->link); + while (!list_is_empty(&priv->cache)) { + struct dri_bo *c; - chain = info->chain; - info->chain = NULL; + c = list_first_entry(&priv->cache, struct dri_bo, link); + list_del(&c->link); - if (!info->queued) - sna_dri2_event_free(info); - } + DBG(("%s: releasing cached handle=%d\n", __FUNCTION__, c->bo ? c->bo->handle : 0)); + assert(c->bo); + kgem_bo_destroy(&sna->kgem, c->bo); + free(c); } free(priv); @@ -1479,19 +1947,30 @@ sna_dri2_flip(struct sna_dri2_event *info) { struct kgem_bo *bo = get_private(info->back)->bo; struct kgem_bo *tmp_bo; - uint32_t tmp_name; + uint32_t tmp_name, tmp_flags; int tmp_pitch; DBG(("%s(type=%d)\n", __FUNCTION__, info->type)); assert(sna_pixmap_get_buffer(info->sna->front) == info->front); assert(get_drawable_pixmap(info->draw)->drawable.height * bo->pitch <= kgem_bo_size(bo)); + assert(get_private(info->front)->size == get_private(info->back)->size); assert(bo->refcnt); + if (info->sna->mode.flip_active) { + DBG(("%s: %d flips still active, aborting\n", + __FUNCTION__, info->sna->mode.flip_active)); + return false; + } + + assert(!info->queued); if (!sna_page_flip(info->sna, bo, sna_dri2_flip_handler, info->type == FLIP_ASYNC ? NULL : info)) return false; + DBG(("%s: queued flip=%p\n", __FUNCTION__, info->type == FLIP_ASYNC ? NULL : info)); + assert(info->signal || info->type != FLIP_THROTTLE); + assert(info->sna->dri2.flip_pending == NULL || info->sna->dri2.flip_pending == info); if (info->type != FLIP_ASYNC) @@ -1505,13 +1984,21 @@ sna_dri2_flip(struct sna_dri2_event *info) tmp_bo = get_private(info->front)->bo; tmp_name = info->front->name; tmp_pitch = info->front->pitch; + tmp_flags = info->front->flags; + + assert(tmp_bo->active_scanout > 0); + tmp_bo->active_scanout--; set_bo(info->sna->front, bo); + info->front->flags = info->back->flags; info->front->name = info->back->name; info->front->pitch = info->back->pitch; get_private(info->front)->bo = bo; + bo->active_scanout++; + assert(bo->active_scanout <= bo->refcnt); + info->back->flags = tmp_flags; info->back->name = tmp_name; info->back->pitch = tmp_pitch; get_private(info->back)->bo = tmp_bo; @@ -1521,6 +2008,7 @@ sna_dri2_flip(struct sna_dri2_event *info) assert(get_private(info->back)->bo->refcnt); assert(get_private(info->front)->bo != get_private(info->back)->bo); + info->keepalive = KEEPALIVE; info->queued = true; return true; } @@ -1549,15 +2037,16 @@ can_flip(struct sna * sna, } assert(sna->scrn->vtSema); + assert(!sna->mode.hidden); if ((sna->flags & (SNA_HAS_FLIP | SNA_HAS_ASYNC_FLIP)) == 0) { DBG(("%s: no, pageflips disabled\n", __FUNCTION__)); return false; } - if (front->format != back->format) { + if (front->cpp != back->cpp) { DBG(("%s: no, format mismatch, front = %d, back = %d\n", - __FUNCTION__, front->format, back->format)); + __FUNCTION__, front->cpp, back->cpp)); return false; } @@ -1567,7 +2056,7 @@ can_flip(struct sna * sna, } if (!sna_crtc_is_on(crtc)) { - DBG(("%s: ref-pipe=%d is disabled\n", __FUNCTION__, sna_crtc_to_pipe(crtc))); + DBG(("%s: ref-pipe=%d is disabled\n", __FUNCTION__, sna_crtc_pipe(crtc))); return false; } @@ -1581,7 +2070,7 @@ can_flip(struct sna * sna, if (sna_pixmap_get_buffer(pixmap) != front) { DBG(("%s: no, DRI2 drawable is no longer attached (old name=%d, new name=%d) to pixmap=%ld\n", __FUNCTION__, front->name, - sna_pixmap_get_buffer(pixmap) ? ((DRI2BufferPtr)sna_pixmap_get_buffer(pixmap))->name : 0, + sna_pixmap_get_buffer(pixmap) ? sna_pixmap_get_buffer(pixmap)->name : 0, pixmap->drawable.serialNumber)); return false; } @@ -1661,7 +2150,6 @@ can_flip(struct sna * sna, } DBG(("%s: yes, pixmap=%ld\n", __FUNCTION__, pixmap->drawable.serialNumber)); - assert(dri2_window(win)->front == NULL); return true; } @@ -1680,9 +2168,9 @@ can_xchg(struct sna *sna, if (draw->type == DRAWABLE_PIXMAP) return false; - if (front->format != back->format) { + if (front->cpp != back->cpp) { DBG(("%s: no, format mismatch, front = %d, back = %d\n", - __FUNCTION__, front->format, back->format)); + __FUNCTION__, front->cpp, back->cpp)); return false; } @@ -1714,6 +2202,8 @@ can_xchg(struct sna *sna, return false; } + DBG(("%s: back size=%x, front size=%x\n", + __FUNCTION__, get_private(back)->size, get_private(front)->size)); if (get_private(back)->size != get_private(front)->size) { DBG(("%s: no, back buffer %dx%d does not match front buffer %dx%d\n", __FUNCTION__, @@ -1766,9 +2256,9 @@ overlaps_other_crtc(struct sna *sna, xf86CrtcPtr desired) static bool can_xchg_crtc(struct sna *sna, DrawablePtr draw, + xf86CrtcPtr crtc, DRI2BufferPtr front, - DRI2BufferPtr back, - xf86CrtcPtr crtc) + DRI2BufferPtr back) { WindowPtr win = (WindowPtr)draw; PixmapPtr pixmap; @@ -1785,9 +2275,9 @@ can_xchg_crtc(struct sna *sna, if (draw->type == DRAWABLE_PIXMAP) return false; - if (front->format != back->format) { + if (front->cpp != back->cpp) { DBG(("%s: no, format mismatch, front = %d, back = %d\n", - __FUNCTION__, front->format, back->format)); + __FUNCTION__, front->cpp, back->cpp)); return false; } @@ -1866,20 +2356,21 @@ sna_dri2_xchg(DrawablePtr draw, DRI2BufferPtr front, DRI2BufferPtr back) back_bo = get_private(back)->bo; front_bo = get_private(front)->bo; - assert(front_bo != back_bo); - DBG(("%s: win=%ld, exchange front=%d/%d and back=%d/%d, pixmap=%ld %dx%d\n", + DBG(("%s: win=%ld, exchange front=%d/%d,ref=%d and back=%d/%d,ref=%d, pixmap=%ld %dx%d\n", __FUNCTION__, win->drawable.id, - front_bo->handle, front->name, - back_bo->handle, back->name, + front_bo->handle, front->name, get_private(front)->refcnt, + back_bo->handle, back->name, get_private(back)->refcnt, pixmap->drawable.serialNumber, pixmap->drawable.width, pixmap->drawable.height)); - DBG(("%s: back_bo pitch=%d, size=%d, ref=%d, active_scanout?=%d\n", - __FUNCTION__, back_bo->pitch, kgem_bo_size(back_bo), back_bo->refcnt, back_bo->active_scanout)); - DBG(("%s: front_bo pitch=%d, size=%d, ref=%d, active_scanout?=%d\n", - __FUNCTION__, front_bo->pitch, kgem_bo_size(front_bo), front_bo->refcnt, front_bo->active_scanout)); + DBG(("%s: back_bo handle=%d, pitch=%d, size=%d, ref=%d, active_scanout?=%d\n", + __FUNCTION__, back_bo->handle, back_bo->pitch, kgem_bo_size(back_bo), back_bo->refcnt, back_bo->active_scanout)); + DBG(("%s: front_bo handle=%d, pitch=%d, size=%d, ref=%d, active_scanout?=%d\n", + __FUNCTION__, front_bo->handle, front_bo->pitch, kgem_bo_size(front_bo), front_bo->refcnt, front_bo->active_scanout)); + + assert(front_bo != back_bo); assert(front_bo->refcnt); assert(back_bo->refcnt); @@ -1894,6 +2385,11 @@ sna_dri2_xchg(DrawablePtr draw, DRI2BufferPtr front, DRI2BufferPtr back) get_private(back)->bo = front_bo; mark_stale(back); + assert(front_bo->active_scanout > 0); + front_bo->active_scanout--; + back_bo->active_scanout++; + assert(back_bo->active_scanout <= back_bo->refcnt); + tmp = front->name; front->name = back->name; back->name = tmp; @@ -1902,17 +2398,23 @@ sna_dri2_xchg(DrawablePtr draw, DRI2BufferPtr front, DRI2BufferPtr back) front->pitch = back->pitch; back->pitch = tmp; + tmp = front->flags; + front->flags = back->flags; + back->flags = tmp; + assert(front_bo->refcnt); assert(back_bo->refcnt); + assert(front_bo->pitch == get_private(front)->bo->pitch); + assert(back_bo->pitch == get_private(back)->bo->pitch); + assert(get_private(front)->bo == sna_pixmap(pixmap)->gpu_bo); } static void sna_dri2_xchg_crtc(struct sna *sna, DrawablePtr draw, xf86CrtcPtr crtc, DRI2BufferPtr front, DRI2BufferPtr back) { WindowPtr win = (WindowPtr)draw; - DRI2Buffer2Ptr tmp; - struct kgem_bo *bo; + struct dri2_window *priv = dri2_window(win); DBG(("%s: exchange front=%d/%d and back=%d/%d, win id=%lu, pixmap=%ld %dx%d\n", __FUNCTION__, @@ -1922,162 +2424,130 @@ static void sna_dri2_xchg_crtc(struct sna *sna, DrawablePtr draw, xf86CrtcPtr cr get_window_pixmap(win)->drawable.serialNumber, get_window_pixmap(win)->drawable.width, get_window_pixmap(win)->drawable.height)); + assert(can_xchg_crtc(sna, draw, crtc, front, back)); - DamageRegionAppend(&win->drawable, &win->clipList); + if (APPLY_DAMAGE) { + DBG(("%s: marking drawable as damaged\n", __FUNCTION__)); + sna->ignore_copy_area = sna->flags & SNA_TEAR_FREE; + DamageRegionAppend(&win->drawable, &win->clipList); + } sna_shadow_set_crtc(sna, crtc, get_private(back)->bo); - DamageRegionProcessPending(&win->drawable); + if (APPLY_DAMAGE) { + sna->ignore_copy_area = false; + DamageRegionProcessPending(&win->drawable); + } - assert(dri2_window(win)->front == NULL); + if (priv->front == NULL) { + DRI2Buffer2Ptr tmp; - tmp = calloc(1, sizeof(*tmp) + sizeof(struct sna_dri2_private)); - if (tmp == NULL) { - back->attachment = -1; - if (get_private(back)->proxy == NULL) { - get_private(back)->pixmap = get_window_pixmap(win); - get_private(back)->proxy = sna_dri2_reference_buffer(sna_pixmap_get_buffer(get_private(back)->pixmap)); + tmp = calloc(1, sizeof(*tmp) + sizeof(struct sna_dri2_private)); + if (tmp == NULL) { + sna_shadow_unset_crtc(sna, crtc); + return; } - dri2_window(win)->front = sna_dri2_reference_buffer(back); - return; - } - *tmp = *back; - tmp->attachment = DRI2BufferFrontLeft; - tmp->driverPrivate = tmp + 1; - get_private(tmp)->refcnt = 1; - get_private(tmp)->bo = get_private(back)->bo; - get_private(tmp)->size = get_private(back)->size; - get_private(tmp)->pixmap = get_window_pixmap(win); - get_private(tmp)->proxy = sna_dri2_reference_buffer(sna_pixmap_get_buffer(get_private(tmp)->pixmap)); - dri2_window(win)->front = tmp; - - DBG(("%s: allocating new backbuffer\n", __FUNCTION__)); - back->name = 0; - bo = kgem_create_2d(&sna->kgem, - draw->width, draw->height, draw->bitsPerPixel, - get_private(back)->bo->tiling, - CREATE_SCANOUT); - if (bo != NULL) { - get_private(back)->bo = bo; - back->pitch = bo->pitch; - back->name = kgem_bo_flink(&sna->kgem, bo); - } - if (back->name == 0) { - if (bo != NULL) - kgem_bo_destroy(&sna->kgem, bo); - get_private(back)->bo = NULL; - back->attachment = -1; + tmp->attachment = DRI2BufferFrontLeft; + tmp->driverPrivate = tmp + 1; + tmp->cpp = back->cpp; + tmp->format = back->format; + + get_private(tmp)->refcnt = 1; + get_private(tmp)->bo = kgem_create_2d(&sna->kgem, + draw->width, draw->height, draw->bitsPerPixel, + get_private(back)->bo->tiling, + CREATE_SCANOUT | CREATE_EXACT); + if (get_private(tmp)->bo != NULL) { + tmp->pitch = get_private(tmp)->bo->pitch; + tmp->name = kgem_bo_flink(&sna->kgem, get_private(tmp)->bo); + } + if (tmp->name == 0) { + if (get_private(tmp)->bo != NULL) + kgem_bo_destroy(&sna->kgem, get_private(tmp)->bo); + sna_shadow_unset_crtc(sna, crtc); + return; + } + get_private(tmp)->size = get_private(back)->size; + get_private(tmp)->pixmap = get_private(front)->pixmap; + get_private(tmp)->proxy = sna_dri2_reference_buffer(front); + get_private(tmp)->bo->active_scanout++; + + priv->front = front = tmp; } -} + assert(front == priv->front); -static void frame_swap_complete(struct sna_dri2_event *frame, int type) -{ - const struct ust_msc *swap; + { + struct kgem_bo *front_bo = get_private(front)->bo; + struct kgem_bo *back_bo = get_private(back)->bo; + unsigned tmp; - if (frame->draw == NULL) - return; + assert(front_bo->refcnt); + assert(back_bo->refcnt); - assert(frame->client); + get_private(back)->bo = front_bo; + get_private(front)->bo = back_bo; + mark_stale(back); - swap = sna_crtc_last_swap(frame->crtc); - DBG(("%s(type=%d): draw=%ld, pipe=%d, frame=%lld [msc=%lld], tv=%d.%06d\n", - __FUNCTION__, type, (long)frame->draw, frame->pipe, - (long long)swap->msc, - (long long)draw_current_msc(frame->draw, frame->crtc, swap->msc), - swap->tv_sec, swap->tv_usec)); + assert(front_bo->active_scanout > 0); + front_bo->active_scanout--; + back_bo->active_scanout++; + assert(back_bo->active_scanout <= back_bo->refcnt); - DRI2SwapComplete(frame->client, frame->draw, - draw_current_msc(frame->draw, frame->crtc, swap->msc), - swap->tv_sec, swap->tv_usec, - type, frame->event_complete, frame->event_data); -} + tmp = front->name; + front->name = back->name; + back->name = tmp; -static void fake_swap_complete(struct sna *sna, ClientPtr client, - DrawablePtr draw, xf86CrtcPtr crtc, - int type, DRI2SwapEventPtr func, void *data) -{ - const struct ust_msc *swap; - - swap = sna_crtc_last_swap(crtc); - DBG(("%s(type=%d): draw=%ld, pipe=%d, frame=%lld [msc %lld], tv=%d.%06d\n", - __FUNCTION__, type, (long)draw->id, crtc ? sna_crtc_to_pipe(crtc) : -1, - (long long)swap->msc, - (long long)draw_current_msc(draw, crtc, swap->msc), - swap->tv_sec, swap->tv_usec)); + tmp = front->pitch; + front->pitch = back->pitch; + back->pitch = tmp; - DRI2SwapComplete(client, draw, - draw_current_msc(draw, crtc, swap->msc), - swap->tv_sec, swap->tv_usec, - type, func, data); + tmp = front->flags; + front->flags = back->flags; + back->flags = tmp; + } } static void chain_swap(struct sna_dri2_event *chain) { - union drm_wait_vblank vbl; + DBG(("%s: draw=%ld, queued?=%d, type=%d\n", + __FUNCTION__, (long)chain->draw->id, chain->queued, chain->type)); + + if (chain->queued) /* too early! */ + return; if (chain->draw == NULL) { sna_dri2_event_free(chain); return; } - if (chain->queued) /* too early! */ - return; - assert(chain == dri2_chain(chain->draw)); - DBG(("%s: chaining draw=%ld, type=%d\n", - __FUNCTION__, (long)chain->draw->id, chain->type)); - chain->queued = true; + assert(chain->signal); switch (chain->type) { - case SWAP_THROTTLE: + case SWAP_COMPLETE: DBG(("%s: emitting chained vsync'ed blit\n", __FUNCTION__)); - if (chain->sna->mode.shadow && - !chain->sna->mode.shadow_damage) { - /* recursed from wait_for_shadow(), simply requeue */ - DBG(("%s -- recursed from wait_for_shadow(), requeuing\n", __FUNCTION__)); - VG_CLEAR(vbl); - vbl.request.type = - DRM_VBLANK_RELATIVE | - DRM_VBLANK_EVENT; - vbl.request.sequence = 1; - vbl.request.signal = (uintptr_t)chain; - - if (!sna_wait_vblank(chain->sna, &vbl, chain->pipe)) - return; - - DBG(("%s -- requeue failed, errno=%d\n", __FUNCTION__, errno)); - } - if (can_xchg(chain->sna, chain->draw, chain->front, chain->back)) { sna_dri2_xchg(chain->draw, chain->front, chain->back); - } else if (can_xchg_crtc(chain->sna, chain->draw, chain->front, chain->back, chain->crtc)) { - sna_dri2_xchg_crtc(chain->sna, chain->draw, chain->crtc, chain->front, chain->back); + } else if (can_xchg_crtc(chain->sna, chain->draw, chain->crtc, + chain->front, chain->back)) { + sna_dri2_xchg_crtc(chain->sna, chain->draw, chain->crtc, + chain->front, chain->back); } else { - assert(chain->queued); - chain->bo = __sna_dri2_copy_region(chain->sna, chain->draw, NULL, - chain->back, chain->front, - true); + __sna_dri2_copy_event(chain, chain->sync | DRI2_BO); } + assert(get_private(chain->back)->bo != get_private(chain->front)->bo); case SWAP: break; default: return; } - VG_CLEAR(vbl); - vbl.request.type = - DRM_VBLANK_RELATIVE | - DRM_VBLANK_EVENT; - vbl.request.sequence = 1; - vbl.request.signal = (uintptr_t)chain; - if (sna_wait_vblank(chain->sna, &vbl, chain->pipe)) { + if ((chain->type == SWAP_COMPLETE && + !swap_limit(chain->draw, 2 + !chain->sync) && + !chain->sync) || + !sna_next_vblank(chain)) { DBG(("%s: vblank wait failed, unblocking client\n", __FUNCTION__)); frame_swap_complete(chain, DRI2_BLIT_COMPLETE); sna_dri2_event_free(chain); - } else { - if (chain->type == SWAP_THROTTLE && !swap_limit(chain->draw, 2)) { - DBG(("%s: fake triple buffering, unblocking client\n", __FUNCTION__)); - frame_swap_complete(chain, DRI2_BLIT_COMPLETE); - } } } @@ -2086,40 +2556,27 @@ static inline bool rq_is_busy(struct kgem *kgem, struct kgem_bo *bo) if (bo == NULL) return false; - DBG(("%s: handle=%d, domain: %d exec? %d, rq? %d\n", __FUNCTION__, - bo->handle, bo->domain, bo->exec != NULL, bo->rq != NULL)); - assert(bo->refcnt); - - if (bo->exec) - return true; - - if (bo->rq == NULL) - return false; - - return __kgem_busy(kgem, bo->handle); + return __kgem_bo_is_busy(kgem, bo); } -static bool sna_dri2_blit_complete(struct sna *sna, - struct sna_dri2_event *info) +static bool sna_dri2_blit_complete(struct sna_dri2_event *info) { - if (rq_is_busy(&sna->kgem, info->bo)) { - union drm_wait_vblank vbl; + if (!info->bo) + return true; + if (__kgem_bo_is_busy(&info->sna->kgem, info->bo)) { DBG(("%s: vsync'ed blit is still busy, postponing\n", __FUNCTION__)); - - VG_CLEAR(vbl); - vbl.request.type = - DRM_VBLANK_RELATIVE | - DRM_VBLANK_EVENT; - vbl.request.sequence = 1; - vbl.request.signal = (uintptr_t)info; - assert(info->queued); - if (!sna_wait_vblank(sna, &vbl, info->pipe)) + if (sna_next_vblank(info)) return false; + + kgem_bo_sync__gtt(&info->sna->kgem, info->bo); } DBG(("%s: blit finished\n", __FUNCTION__)); + kgem_bo_destroy(&info->sna->kgem, info->bo); + info->bo = NULL; + return true; } @@ -2128,11 +2585,12 @@ void sna_dri2_vblank_handler(struct drm_event_vblank *event) struct sna_dri2_event *info = (void *)(uintptr_t)event->user_data; struct sna *sna = info->sna; DrawablePtr draw; - union drm_wait_vblank vbl; uint64_t msc; - DBG(("%s(type=%d, sequence=%d)\n", __FUNCTION__, info->type, event->sequence)); + DBG(("%s(type=%d, sequence=%d, draw=%ld)\n", __FUNCTION__, info->type, event->sequence, info->draw ? info->draw->serialNumber : 0)); assert(info->queued); + info->queued = false; + msc = sna_crtc_record_event(info->crtc, event); draw = info->draw; @@ -2141,68 +2599,120 @@ void sna_dri2_vblank_handler(struct drm_event_vblank *event) goto done; } + assert((info->front == NULL && info->back == NULL) || info->front != info->back); switch (info->type) { case FLIP: /* If we can still flip... */ + assert(info->signal); if (can_flip(sna, draw, info->front, info->back, info->crtc) && sna_dri2_flip(info)) return; /* else fall through to blit */ case SWAP: - assert(info->queued); - if (sna->mode.shadow && !sna->mode.shadow_damage) { - /* recursed from wait_for_shadow(), simply requeue */ - DBG(("%s -- recursed from wait_for_shadow(), requeuing\n", __FUNCTION__)); - - } else if (can_xchg(info->sna, draw, info->front, info->back)) { + assert(info->signal); + if (can_xchg(info->sna, draw, info->front, info->back)) { sna_dri2_xchg(draw, info->front, info->back); - info->type = SWAP_WAIT; - } else if (can_xchg_crtc(sna, draw, info->front, info->back, info->crtc)) { - sna_dri2_xchg_crtc(sna, draw, info->crtc, info->front, info->back); - info->type = SWAP_WAIT; + info->type = SWAP_COMPLETE; + } else if (can_xchg_crtc(sna, draw, info->crtc, + info->front, info->back)) { + sna_dri2_xchg_crtc(sna, draw, info->crtc, + info->front, info->back); + info->type = SWAP_COMPLETE; } else { - assert(info->queued); - info->bo = __sna_dri2_copy_region(sna, draw, NULL, - info->back, info->front, true); - info->type = SWAP_WAIT; + __sna_dri2_copy_event(info, DRI2_BO | DRI2_SYNC); + info->type = SWAP_COMPLETE; } - VG_CLEAR(vbl); - vbl.request.type = - DRM_VBLANK_RELATIVE | - DRM_VBLANK_EVENT; - vbl.request.sequence = 1; - vbl.request.signal = (uintptr_t)info; - - assert(info->queued); - if (!sna_wait_vblank(sna, &vbl, info->pipe)) + if (sna_next_vblank(info)) return; DBG(("%s -- requeue failed, errno=%d\n", __FUNCTION__, errno)); + assert(info->pending.bo == NULL); + assert(info->keepalive == 1); /* fall through to SwapComplete */ - case SWAP_WAIT: - if (!sna_dri2_blit_complete(sna, info)) - return; - - DBG(("%s: swap complete, unblocking client (frame=%d, tv=%d.%06d)\n", __FUNCTION__, - event->sequence, event->tv_sec, event->tv_usec)); - frame_swap_complete(info, DRI2_BLIT_COMPLETE); - break; - - case SWAP_THROTTLE: + case SWAP_COMPLETE: DBG(("%s: %d complete, frame=%d tv=%d.%06d\n", __FUNCTION__, info->type, event->sequence, event->tv_sec, event->tv_usec)); - if (xorg_can_triple_buffer()) { - if (!sna_dri2_blit_complete(sna, info)) + if (info->signal) { + if (!sna_dri2_blit_complete(info)) return; DBG(("%s: triple buffer swap complete, unblocking client (frame=%d, tv=%d.%06d)\n", __FUNCTION__, event->sequence, event->tv_sec, event->tv_usec)); frame_swap_complete(info, DRI2_BLIT_COMPLETE); } + + if (info->pending.bo) { + struct copy current_back; + + DBG(("%s: swapping back handle=%d [name=%d, active=%d] for pending handle=%d [name=%d, active=%d], front handle=%d [name=%d, active=%d]\n", + __FUNCTION__, + get_private(info->back)->bo->handle, info->back->name, get_private(info->back)->bo->active_scanout, + info->pending.bo->handle, info->pending.name, info->pending.bo->active_scanout, + get_private(info->front)->bo->handle, info->front->name, get_private(info->front)->bo->active_scanout)); + + assert(info->pending.bo->active_scanout > 0); + info->pending.bo->active_scanout--; + + current_back.bo = get_private(info->back)->bo; + current_back.size = get_private(info->back)->size; + current_back.name = info->back->name; + current_back.flags = info->back->flags; + + get_private(info->back)->bo = info->pending.bo; + get_private(info->back)->size = info->pending.size; + info->back->name = info->pending.name; + info->back->pitch = info->pending.bo->pitch; + info->back->flags = info->pending.flags; + info->pending.bo = NULL; + + assert(get_private(info->back)->bo != get_private(info->front)->bo); + + if (can_xchg(info->sna, info->draw, info->front, info->back)) + sna_dri2_xchg(info->draw, info->front, info->back); + else if (can_xchg_crtc(info->sna, info->draw, info->crtc, + info->front, info->back)) + sna_dri2_xchg_crtc(info->sna, info->draw, info->crtc, + info->front, info->back); + else + __sna_dri2_copy_event(info, info->sync | DRI2_BO); + + sna_dri2_cache_bo(info->sna, info->draw, + get_private(info->back)->bo, + info->back->name, + get_private(info->back)->size, + info->back->flags); + + get_private(info->back)->bo = current_back.bo; + get_private(info->back)->size = current_back.size; + info->back->name = current_back.name; + info->back->pitch = current_back.bo->pitch; + info->back->flags = current_back.flags; + + DBG(("%s: restored current back handle=%d [name=%d, active=%d], active=%d], front handle=%d [name=%d, active=%d]\n", + __FUNCTION__, + get_private(info->back)->bo->handle, info->back->name, get_private(info->back)->bo->active_scanout, + get_private(info->front)->bo->handle, info->front->name, get_private(info->front)->bo->active_scanout)); + + assert(info->draw); + assert(!info->signal); + info->keepalive++; + info->signal = true; + } + + if (--info->keepalive) { + if (sna_next_vblank(info)) + return; + + if (info->signal) { + DBG(("%s: triple buffer swap complete, unblocking client (frame=%d, tv=%d.%06d)\n", __FUNCTION__, + event->sequence, event->tv_sec, event->tv_usec)); + frame_swap_complete(info, DRI2_BLIT_COMPLETE); + } + } break; case WAITMSC: @@ -2218,11 +2728,11 @@ void sna_dri2_vblank_handler(struct drm_event_vblank *event) } if (info->chain) { + DBG(("%s: continuing chain\n", __FUNCTION__)); assert(info->chain != info); assert(info->draw == draw); - sna_dri2_remove_event((WindowPtr)draw, info); + sna_dri2_remove_event(info); chain_swap(info->chain); - info->draw = NULL; } done: @@ -2230,101 +2740,148 @@ done: DBG(("%s complete\n", __FUNCTION__)); } -static bool +static void sna_dri2_immediate_blit(struct sna *sna, struct sna_dri2_event *info, - bool sync, bool event) + bool sync) { - DrawablePtr draw = info->draw; - bool ret = false; + struct sna_dri2_event *chain = dri2_chain(info->draw); if (sna->flags & SNA_NO_WAIT) sync = false; - DBG(("%s: emitting immediate blit, throttling client, synced? %d, chained? %d, send-event? %d\n", - __FUNCTION__, sync, dri2_chain(draw) != info, - event)); + DBG(("%s: emitting immediate blit, throttling client, synced? %d, chained? %d, pipe %d\n", + __FUNCTION__, sync, chain != info, info->pipe)); + assert(chain); - info->type = SWAP_THROTTLE; - if (!sync || dri2_chain(draw) == info) { - DBG(("%s: no pending blit, starting chain\n", - __FUNCTION__)); + info->type = SWAP_COMPLETE; + info->sync = sync; + info->keepalive = KEEPALIVE; - info->queued = true; - info->bo = __sna_dri2_copy_region(sna, draw, NULL, - info->back, - info->front, - sync); - if (event) { - if (sync) { - union drm_wait_vblank vbl; - - VG_CLEAR(vbl); - vbl.request.type = - DRM_VBLANK_RELATIVE | - DRM_VBLANK_EVENT; - vbl.request.sequence = 1; - vbl.request.signal = (uintptr_t)info; - ret = !sna_wait_vblank(sna, &vbl, info->pipe); - if (ret) - event = !swap_limit(draw, 2); - } - if (event) { - DBG(("%s: fake triple buffering, unblocking client\n", __FUNCTION__)); - frame_swap_complete(info, DRI2_BLIT_COMPLETE); - } + if (chain == info) { + DBG(("%s: no pending blit, starting chain\n", __FUNCTION__)); + + assert(info->front != info->back); + if (can_xchg(info->sna, info->draw, info->front, info->back)) { + sna_dri2_xchg(info->draw, info->front, info->back); + } else if (can_xchg_crtc(info->sna, info->draw, info->crtc, + info->front, info->back)) { + sna_dri2_xchg_crtc(info->sna, info->draw, info->crtc, + info->front, info->back); + } else + __sna_dri2_copy_event(info, sync | DRI2_BO); + + assert(info->signal); + + if ((!swap_limit(info->draw, 2 + !sync) && !sync) || + !sna_next_vblank(info)) { + DBG(("%s: fake triple buffering, unblocking client\n", __FUNCTION__)); + frame_swap_complete(info, DRI2_BLIT_COMPLETE); + sna_dri2_event_free(info); + } + return; + } + + DBG(("%s: current event front=%d [name=%d, active?=%d], back=%d [name=%d, active?=%d]\n", __FUNCTION__, + get_private(chain->front)->bo->handle, chain->front->name, get_private(chain->front)->bo->active_scanout, + get_private(chain->back)->bo->handle, chain->back->name, get_private(chain->back)->bo->active_scanout)); + + if (chain->type == SWAP_COMPLETE && chain->front == info->front) { + assert(chain->draw == info->draw); + assert(chain->client == info->client); + assert(chain->event_complete == info->event_complete); + assert(chain->event_data == info->event_data); + assert(chain->queued); + + if ((!sync || !chain->sync) && chain->pending.bo) { + bool signal = chain->signal; + + DBG(("%s: swap elision, unblocking client\n", __FUNCTION__)); + assert(chain->draw); + chain->signal = true; + frame_swap_complete(chain, DRI2_EXCHANGE_COMPLETE); + chain->signal = signal; + + assert(chain->pending.bo->active_scanout > 0); + chain->pending.bo->active_scanout--; + + sna_dri2_cache_bo(chain->sna, chain->draw, + chain->pending.bo, + chain->pending.name, + chain->pending.size, + chain->pending.flags); + chain->pending.bo = NULL; + } + + if (chain->pending.bo == NULL && swap_limit(info->draw, 2 + !sync)) { + DBG(("%s: setting handle=%d as pending blit (current event front=%d, back=%d)\n", __FUNCTION__, + get_private(info->back)->bo->handle, + get_private(chain->front)->bo->handle, + get_private(chain->back)->bo->handle)); + chain->pending.bo = ref(get_private(info->back)->bo); + chain->pending.size = get_private(info->back)->size; + chain->pending.name = info->back->name; + chain->pending.flags = info->back->flags; + chain->sync = sync; + info->signal = false; /* transfer signal to pending */ + + /* Prevent us from handing it back on next GetBuffers */ + chain->pending.bo->active_scanout++; + + sna_dri2_event_free(info); + return; } - } else { - DBG(("%s: pending blit, chained\n", __FUNCTION__)); - ret = true; } - DBG(("%s: continue? %d\n", __FUNCTION__, ret)); - return ret; + DBG(("%s: pending blit, chained\n", __FUNCTION__)); } static bool sna_dri2_flip_continue(struct sna_dri2_event *info) { - DBG(("%s(mode=%d)\n", __FUNCTION__, info->mode)); + struct kgem_bo *bo = get_private(info->front)->bo; - if (info->mode > 0){ - struct kgem_bo *bo = get_private(info->front)->bo; + DBG(("%s(mode=%d)\n", __FUNCTION__, info->flip_continue)); + assert(info->flip_continue > 0); + info->type = info->flip_continue; + info->flip_continue = 0; - info->type = info->mode; + assert(!info->signal); + info->signal = info->type == FLIP_THROTTLE && info->draw; - if (bo != sna_pixmap(info->sna->front)->gpu_bo) - return false; + if (info->sna->mode.front_active == 0) + return false; - if (!sna_page_flip(info->sna, bo, sna_dri2_flip_handler, info)) - return false; + if (bo != sna_pixmap(info->sna->front)->gpu_bo) + return false; - assert(info->sna->dri2.flip_pending == NULL || - info->sna->dri2.flip_pending == info); - info->sna->dri2.flip_pending = info; - assert(info->queued); - } else { - info->type = -info->mode; + assert(!info->queued); + if (!sna_page_flip(info->sna, bo, sna_dri2_flip_handler, info)) + return false; - if (!info->draw) - return false; + DBG(("%s: queued flip=%p\n", __FUNCTION__, info)); + assert(info->sna->dri2.flip_pending == NULL || + info->sna->dri2.flip_pending == info); + info->sna->dri2.flip_pending = info; + info->queued = true; - if (!can_flip(info->sna, info->draw, info->front, info->back, info->crtc)) - return false; + return true; +} - assert(sna_pixmap_get_buffer(get_drawable_pixmap(info->draw)) == info->front); - if (!sna_dri2_flip(info)) - return false; +static bool +sna_dri2_flip_keepalive(struct sna_dri2_event *info) +{ + DBG(("%s(keepalive?=%d)\n", __FUNCTION__, info->keepalive-1)); + assert(info->keepalive > 0); + if (!--info->keepalive) + return false; - if (!xorg_can_triple_buffer()) { - sna_dri2_get_back(info->sna, info->draw, info->back, info); - DBG(("%s: fake triple buffering, unblocking client\n", __FUNCTION__)); - frame_swap_complete(info, DRI2_FLIP_COMPLETE); - } - } + if (info->draw == NULL) + return false; - info->mode = 0; - return true; + DBG(("%s: marking next flip as complete\n", __FUNCTION__)); + info->flip_continue = FLIP_COMPLETE; + return sna_dri2_flip_continue(info); } static void chain_flip(struct sna *sna) @@ -2332,8 +2889,8 @@ static void chain_flip(struct sna *sna) struct sna_dri2_event *chain = sna->dri2.flip_pending; assert(chain->type == FLIP); - DBG(("%s: chaining type=%d, cancelled?=%d\n", - __FUNCTION__, chain->type, chain->draw == NULL)); + DBG(("%s: chaining type=%d, cancelled?=%d window=%ld\n", + __FUNCTION__, chain->type, chain->draw == NULL, chain->draw ? chain->draw->id : 0)); sna->dri2.flip_pending = NULL; if (chain->draw == NULL) { @@ -2343,31 +2900,18 @@ static void chain_flip(struct sna *sna) assert(chain == dri2_chain(chain->draw)); assert(!chain->queued); - chain->queued = true; if (can_flip(sna, chain->draw, chain->front, chain->back, chain->crtc) && sna_dri2_flip(chain)) { DBG(("%s: performing chained flip\n", __FUNCTION__)); } else { DBG(("%s: emitting chained vsync'ed blit\n", __FUNCTION__)); - chain->bo = __sna_dri2_copy_region(sna, chain->draw, NULL, - chain->back, chain->front, - true); + __sna_dri2_copy_event(chain, DRI2_SYNC); if (xorg_can_triple_buffer()) { - union drm_wait_vblank vbl; - - VG_CLEAR(vbl); - - chain->type = SWAP_WAIT; - vbl.request.type = - DRM_VBLANK_RELATIVE | - DRM_VBLANK_EVENT; - vbl.request.sequence = 1; - vbl.request.signal = (uintptr_t)chain; - - assert(chain->queued); - if (!sna_wait_vblank(sna, &vbl, chain->pipe)) + chain->type = SWAP_COMPLETE; + assert(chain->signal); + if (sna_next_vblank(chain)) return; } @@ -2381,8 +2925,10 @@ static void sna_dri2_flip_event(struct sna_dri2_event *flip) { struct sna *sna = flip->sna; - DBG(("%s(pipe=%d, event=%d)\n", __FUNCTION__, flip->pipe, flip->type)); - assert(flip->queued); + DBG(("%s flip=%p (pipe=%d, event=%d, queued?=%d)\n", __FUNCTION__, flip, flip->pipe, flip->type, flip->queued)); + if (!flip->queued) /* pageflip died whilst being queued */ + return; + flip->queued = false; if (sna->dri2.flip_pending == flip) sna->dri2.flip_pending = NULL; @@ -2390,8 +2936,10 @@ static void sna_dri2_flip_event(struct sna_dri2_event *flip) /* We assume our flips arrive in order, so we don't check the frame */ switch (flip->type) { case FLIP: - DBG(("%s: swap complete, unblocking client\n", __FUNCTION__)); - frame_swap_complete(flip, DRI2_FLIP_COMPLETE); + if (flip->signal) { + DBG(("%s: swap complete, unblocking client\n", __FUNCTION__)); + frame_swap_complete(flip, DRI2_FLIP_COMPLETE); + } sna_dri2_event_free(flip); if (sna->dri2.flip_pending) @@ -2399,27 +2947,35 @@ static void sna_dri2_flip_event(struct sna_dri2_event *flip) break; case FLIP_THROTTLE: - DBG(("%s: triple buffer swap complete, unblocking client\n", __FUNCTION__)); - frame_swap_complete(flip, DRI2_FLIP_COMPLETE); + if (flip->signal) { + DBG(("%s: triple buffer swap complete, unblocking client\n", __FUNCTION__)); + frame_swap_complete(flip, DRI2_FLIP_COMPLETE); + } case FLIP_COMPLETE: + assert(!flip->signal); if (sna->dri2.flip_pending) { + DBG(("%s: pending flip\n", __FUNCTION__)); sna_dri2_event_free(flip); chain_flip(sna); - } else if (!flip->mode) { + } else if (!flip->flip_continue) { DBG(("%s: flip chain complete\n", __FUNCTION__)); + if (!sna_dri2_flip_keepalive(flip)) { + if (flip->chain) { + sna_dri2_remove_event(flip); + chain_swap(flip->chain); + } - if (flip->chain) { - sna_dri2_remove_event((WindowPtr)flip->draw, - flip); - chain_swap(flip->chain); - flip->draw = NULL; + sna_dri2_event_free(flip); } - - sna_dri2_event_free(flip); } else if (!sna_dri2_flip_continue(flip)) { DBG(("%s: no longer able to flip\n", __FUNCTION__)); - if (flip->draw == NULL || !sna_dri2_immediate_blit(sna, flip, false, flip->mode < 0)) - sna_dri2_event_free(flip); + if (flip->draw != NULL) + __sna_dri2_copy_event(flip, 0); + if (flip->signal) { + DBG(("%s: fake triple buffering, unblocking client\n", __FUNCTION__)); + frame_swap_complete(flip, DRI2_BLIT_COMPLETE); + } + sna_dri2_event_free(flip); } break; @@ -2433,17 +2989,27 @@ static void sna_dri2_flip_event(struct sna_dri2_event *flip) } } +static int +sna_query_vblank(struct sna *sna, xf86CrtcPtr crtc, union drm_wait_vblank *vbl) +{ + VG_CLEAR(*vbl); + vbl->request.type = + _DRM_VBLANK_RELATIVE | pipe_select(sna_crtc_pipe(crtc)); + vbl->request.sequence = 0; + + return drmIoctl(sna->kgem.fd, DRM_IOCTL_WAIT_VBLANK, vbl); +} + static uint64_t get_current_msc(struct sna *sna, DrawablePtr draw, xf86CrtcPtr crtc) { union drm_wait_vblank vbl; - uint64_t ret = -1; + uint64_t ret; - VG_CLEAR(vbl); - vbl.request.type = _DRM_VBLANK_RELATIVE; - vbl.request.sequence = 0; - if (sna_wait_vblank(sna, &vbl, sna_crtc_to_pipe(crtc)) == 0) + if (sna_query_vblank(sna, crtc, &vbl) == 0) ret = sna_crtc_record_vblank(crtc, &vbl); + else + ret = sna_crtc_last_swap(crtc)->msc; return draw_current_msc(draw, crtc, ret); } @@ -2494,12 +3060,18 @@ static int use_triple_buffer(struct sna *sna, ClientPtr client, bool async) } static bool immediate_swap(struct sna *sna, - uint64_t target_msc, - uint64_t divisor, DrawablePtr draw, xf86CrtcPtr crtc, + uint64_t *target_msc, + uint64_t divisor, + uint64_t remainder, uint64_t *current_msc) { + /* + * If divisor is zero, or current_msc is smaller than target_msc + * we just need to make sure target_msc passes before initiating + * the swap. + */ if (divisor == 0) { *current_msc = -1; @@ -2508,72 +3080,97 @@ static bool immediate_swap(struct sna *sna, return true; } - if (target_msc) + if (*target_msc) *current_msc = get_current_msc(sna, draw, crtc); DBG(("%s: current_msc=%ld, target_msc=%ld -- %s\n", - __FUNCTION__, (long)*current_msc, (long)target_msc, - (*current_msc >= target_msc - 1) ? "yes" : "no")); - return *current_msc >= target_msc - 1; + __FUNCTION__, (long)*current_msc, (long)*target_msc, + (*current_msc >= *target_msc - 1) ? "yes" : "no")); + return *current_msc >= *target_msc - 1; } DBG(("%s: explicit waits requests, divisor=%ld\n", __FUNCTION__, (long)divisor)); *current_msc = get_current_msc(sna, draw, crtc); - return false; + if (*current_msc >= *target_msc) { + DBG(("%s: missed target, queueing event for next: current=%lld, target=%lld, divisor=%lld, remainder=%lld\n", + __FUNCTION__, + (long long)*current_msc, + (long long)*target_msc, + (long long)divisor, + (long long)remainder)); + + *target_msc = *current_msc + remainder - *current_msc % divisor; + if (*target_msc <= *current_msc) + *target_msc += divisor; + } + + DBG(("%s: target_msc=%lld, current_msc=%lld, immediate?=%d\n", + __FUNCTION__, (long long)*target_msc, (long long)*current_msc, + *current_msc >= *target_msc - 1)); + return *current_msc >= *target_msc - 1; } static bool sna_dri2_schedule_flip(ClientPtr client, DrawablePtr draw, xf86CrtcPtr crtc, DRI2BufferPtr front, DRI2BufferPtr back, - CARD64 *target_msc, CARD64 divisor, CARD64 remainder, + bool immediate, CARD64 *target_msc, CARD64 current_msc, DRI2SwapEventPtr func, void *data) { struct sna *sna = to_sna_from_drawable(draw); struct sna_dri2_event *info; - uint64_t current_msc; - - if (immediate_swap(sna, *target_msc, divisor, draw, crtc, ¤t_msc)) { - int type; + if (immediate) { + bool signal = false; info = sna->dri2.flip_pending; DBG(("%s: performing immediate swap on pipe %d, pending? %d, mode: %d, continuation? %d\n", - __FUNCTION__, sna_crtc_to_pipe(crtc), - info != NULL, info ? info->mode : 0, + __FUNCTION__, sna_crtc_pipe(crtc), + info != NULL, info ? info->flip_continue : 0, info && info->draw == draw)); if (info && info->draw == draw) { assert(info->type != FLIP); - assert(info->front == front); + assert(info->queued); + assert(info->front != info->back); + if (info->front != front) { + assert(info->front != NULL); + _sna_dri2_destroy_buffer(sna, draw, info->front); + info->front = sna_dri2_reference_buffer(front); + } if (info->back != back) { - _sna_dri2_destroy_buffer(sna, info->back); + assert(info->back != NULL); + _sna_dri2_destroy_buffer(sna, draw, info->back); info->back = sna_dri2_reference_buffer(back); } - if (info->mode || current_msc >= *target_msc) { - DBG(("%s: executing xchg of pending flip\n", - __FUNCTION__)); - sna_dri2_xchg(draw, front, back); - info->mode = type = FLIP_COMPLETE; - goto new_back; - } else { + assert(info->front != info->back); + DBG(("%s: executing xchg of pending flip: flip_continue=%d, keepalive=%d, chain?=%d\n", __FUNCTION__, info->flip_continue, info->keepalive, current_msc < *target_msc)); + sna_dri2_xchg(draw, front, back); + info->keepalive = KEEPALIVE; + if (xorg_can_triple_buffer() && + current_msc < *target_msc) { DBG(("%s: chaining flip\n", __FUNCTION__)); - type = FLIP_THROTTLE; - if (xorg_can_triple_buffer()) - info->mode = -type; - else - info->mode = -FLIP_COMPLETE; + info->flip_continue = FLIP_THROTTLE; goto out; + } else { + info->flip_continue = FLIP_COMPLETE; + signal = info->signal; + assert(info->draw); + info->signal = true; + goto new_back; } } - info = sna_dri2_add_event(sna, draw, client); + info = sna_dri2_add_event(sna, draw, client, crtc); if (info == NULL) return false; assert(info->crtc == crtc); info->event_complete = func; info->event_data = data; + assert(info->draw); + info->signal = true; + assert(front != back); info->front = sna_dri2_reference_buffer(front); info->back = sna_dri2_reference_buffer(back); @@ -2584,26 +3181,33 @@ sna_dri2_schedule_flip(ClientPtr client, DrawablePtr draw, xf86CrtcPtr crtc, */ DBG(("%s: queueing flip after pending completion\n", __FUNCTION__)); - info->type = type = FLIP; + info->type = FLIP; sna->dri2.flip_pending = info; - assert(info->queued); current_msc++; + } else if (sna->mode.flip_active) { + DBG(("%s: %d outstanding flips from old client, queueing\n", + __FUNCTION__, sna->mode.flip_active)); + goto queue; } else { - info->type = type = use_triple_buffer(sna, client, *target_msc == 0); + info->type = use_triple_buffer(sna, client, *target_msc == 0); if (!sna_dri2_flip(info)) { DBG(("%s: flip failed, falling back\n", __FUNCTION__)); + info->signal = false; sna_dri2_event_free(info); return false; } + assert(get_private(info->front)->bo->active_scanout); } - swap_limit(draw, 1 + (type == FLIP_THROTTLE)); - if (type >= FLIP_COMPLETE) { + swap_limit(draw, 1 + (info->type == FLIP_THROTTLE)); + if (info->type >= FLIP_COMPLETE) { new_back: if (!xorg_can_triple_buffer()) - sna_dri2_get_back(sna, draw, back, info); + sna_dri2_get_back(sna, draw, back); DBG(("%s: fake triple buffering, unblocking client\n", __FUNCTION__)); frame_swap_complete(info, DRI2_EXCHANGE_COMPLETE); + assert(info->draw); + info->signal = signal; if (info->type == FLIP_ASYNC) sna_dri2_event_free(info); } @@ -2613,57 +3217,34 @@ out: return true; } - info = sna_dri2_add_event(sna, draw, client); +queue: + if (KEEPALIVE > 1 && sna->dri2.flip_pending) { + info = sna->dri2.flip_pending; + info->keepalive = 1; + } + + info = sna_dri2_add_event(sna, draw, client, crtc); if (info == NULL) return false; assert(info->crtc == crtc); info->event_complete = func; info->event_data = data; + assert(info->draw); + info->signal = true; info->type = FLIP; + assert(front != back); info->front = sna_dri2_reference_buffer(front); info->back = sna_dri2_reference_buffer(back); - /* - * If divisor is zero, or current_msc is smaller than target_msc - * we just need to make sure target_msc passes before initiating - * the swap. - */ - if (divisor && current_msc >= *target_msc) { - DBG(("%s: missed target, queueing event for next: current=%lld, target=%lld, divisor=%lld, remainder=%lld\n", - __FUNCTION__, - (long long)current_msc, - (long long)*target_msc, - (long long)divisor, - (long long)remainder)); - - *target_msc = current_msc + remainder - current_msc % divisor; - if (*target_msc <= current_msc) - *target_msc += divisor; - } - - if (*target_msc <= current_msc + 1) { - if (!sna_dri2_flip(info)) { - sna_dri2_event_free(info); - return false; - } + if (*target_msc <= current_msc + 1 && sna_dri2_flip(info)) { *target_msc = current_msc + 1; } else { - union drm_wait_vblank vbl; - - VG_CLEAR(vbl); - - vbl.request.type = - DRM_VBLANK_ABSOLUTE | - DRM_VBLANK_EVENT; - /* Account for 1 frame extra pageflip delay */ - vbl.reply.sequence = draw_target_seq(draw, *target_msc - 1); - vbl.request.signal = (uintptr_t)info; - - info->queued = true; - if (sna_wait_vblank(sna, &vbl, info->pipe)) { + if (!sna_wait_vblank(info, + draw_target_seq(draw, *target_msc - 1))) { + info->signal = false; sna_dri2_event_free(info); return false; } @@ -2674,128 +3255,6 @@ out: return true; } -static bool -sna_dri2_schedule_xchg(ClientPtr client, DrawablePtr draw, xf86CrtcPtr crtc, - DRI2BufferPtr front, DRI2BufferPtr back, - CARD64 *target_msc, CARD64 divisor, CARD64 remainder, - DRI2SwapEventPtr func, void *data) -{ - struct sna *sna = to_sna_from_drawable(draw); - uint64_t current_msc; - bool sync, event; - - if (!immediate_swap(sna, *target_msc, divisor, draw, crtc, ¤t_msc)) - return false; - - sync = current_msc < *target_msc; - event = dri2_chain(draw) == NULL; - if (!sync || event) { - DBG(("%s: performing immediate xchg on pipe %d\n", - __FUNCTION__, sna_crtc_to_pipe(crtc))); - sna_dri2_xchg(draw, front, back); - } - if (sync) { - struct sna_dri2_event *info; - - info = sna_dri2_add_event(sna, draw, client); - if (!info) - goto complete; - - info->event_complete = func; - info->event_data = data; - - info->front = sna_dri2_reference_buffer(front); - info->back = sna_dri2_reference_buffer(back); - info->type = SWAP_THROTTLE; - - if (event) { - union drm_wait_vblank vbl; - - VG_CLEAR(vbl); - vbl.request.type = - DRM_VBLANK_RELATIVE | - DRM_VBLANK_EVENT; - vbl.request.sequence = 1; - vbl.request.signal = (uintptr_t)info; - - info->queued = true; - if (sna_wait_vblank(sna, &vbl, info->pipe)) { - sna_dri2_event_free(info); - goto complete; - } - - swap_limit(draw, 2); - } - } else { -complete: - fake_swap_complete(sna, client, draw, crtc, DRI2_EXCHANGE_COMPLETE, func, data); - } - - *target_msc = current_msc + 1; - return true; -} - -static bool -sna_dri2_schedule_xchg_crtc(ClientPtr client, DrawablePtr draw, xf86CrtcPtr crtc, - DRI2BufferPtr front, DRI2BufferPtr back, - CARD64 *target_msc, CARD64 divisor, CARD64 remainder, - DRI2SwapEventPtr func, void *data) -{ - struct sna *sna = to_sna_from_drawable(draw); - uint64_t current_msc; - bool sync, event; - - if (!immediate_swap(sna, *target_msc, divisor, draw, crtc, ¤t_msc)) - return false; - - sync = current_msc < *target_msc; - event = dri2_chain(draw) == NULL; - if (!sync || event) { - DBG(("%s: performing immediate xchg only on pipe %d\n", - __FUNCTION__, sna_crtc_to_pipe(crtc))); - sna_dri2_xchg_crtc(sna, draw, crtc, front, back); - } - if (sync) { - struct sna_dri2_event *info; - - info = sna_dri2_add_event(sna, draw, client); - if (!info) - goto complete; - - info->event_complete = func; - info->event_data = data; - - info->front = sna_dri2_reference_buffer(front); - info->back = sna_dri2_reference_buffer(back); - info->type = SWAP_THROTTLE; - - if (event) { - union drm_wait_vblank vbl; - - VG_CLEAR(vbl); - vbl.request.type = - DRM_VBLANK_RELATIVE | - DRM_VBLANK_EVENT; - vbl.request.sequence = 1; - vbl.request.signal = (uintptr_t)info; - - info->queued = true; - if (sna_wait_vblank(sna, &vbl, info->pipe)) { - sna_dri2_event_free(info); - goto complete; - } - - swap_limit(draw, 2); - } - } else { -complete: - fake_swap_complete(sna, client, draw, crtc, DRI2_EXCHANGE_COMPLETE, func, data); - } - - *target_msc = current_msc + 1; - return true; -} - static bool has_pending_events(struct sna *sna) { struct pollfd pfd; @@ -2830,11 +3289,11 @@ sna_dri2_schedule_swap(ClientPtr client, DrawablePtr draw, DRI2BufferPtr front, CARD64 remainder, DRI2SwapEventPtr func, void *data) { struct sna *sna = to_sna_from_drawable(draw); - union drm_wait_vblank vbl; xf86CrtcPtr crtc = NULL; struct sna_dri2_event *info = NULL; int type = DRI2_EXCHANGE_COMPLETE; CARD64 current_msc; + bool immediate; DBG(("%s: draw=%lu %dx%d, pixmap=%ld %dx%d, back=%u (refs=%d/%d, flush=%d) , front=%u (refs=%d/%d, flush=%d)\n", __FUNCTION__, @@ -2860,6 +3319,7 @@ sna_dri2_schedule_swap(ClientPtr client, DrawablePtr draw, DRI2BufferPtr front, assert(get_private(front)->refcnt); assert(get_private(back)->refcnt); + assert(get_private(back)->bo != get_private(front)->bo); assert(get_private(front)->bo->refcnt); assert(get_private(back)->bo->refcnt); @@ -2876,17 +3336,17 @@ sna_dri2_schedule_swap(ClientPtr client, DrawablePtr draw, DRI2BufferPtr front, goto skip; } - assert(sna_pixmap_from_drawable(draw)->flush); - if (draw->type != DRAWABLE_PIXMAP) { WindowPtr win = (WindowPtr)draw; struct dri2_window *priv = dri2_window(win); + if (priv->front) { - assert(front == priv->front); - assert(get_private(priv->front)->refcnt > 1); - get_private(priv->front)->refcnt--; - priv->front = NULL; + front = priv->front; + assert(front->attachment == DRI2BufferFrontLeft); + assert(get_private(front)->refcnt); + assert(get_private(front)->pixmap == get_drawable_pixmap(draw)); } + if (win->clipList.extents.x2 <= win->clipList.extents.x1 || win->clipList.extents.y2 <= win->clipList.extents.y1) { DBG(("%s: window clipped (%d, %d), (%d, %d)\n", @@ -2899,6 +3359,10 @@ sna_dri2_schedule_swap(ClientPtr client, DrawablePtr draw, DRI2BufferPtr front, } } + DBG(("%s: using front handle=%d, active_scanout?=%d, flush?=%d\n", __FUNCTION__, get_private(front)->bo->handle, get_private(front)->bo->active_scanout, sna_pixmap_from_drawable(draw)->flush)); + assert(get_private(front)->bo->active_scanout); + assert(sna_pixmap_from_drawable(draw)->flush); + /* Drawable not displayed... just complete the swap */ if ((sna->flags & SNA_NO_WAIT) == 0) crtc = sna_dri2_get_crtc(draw); @@ -2914,109 +3378,112 @@ sna_dri2_schedule_swap(ClientPtr client, DrawablePtr draw, DRI2BufferPtr front, sna_mode_wakeup(sna); } - if (can_xchg(sna, draw, front, back) && - sna_dri2_schedule_xchg(client, draw, crtc, front, back, + immediate = immediate_swap(sna, draw, crtc, target_msc, divisor, remainder, - func, data)) - return TRUE; - - if (can_xchg_crtc(sna, draw, front, back, crtc) && - sna_dri2_schedule_xchg_crtc(client, draw, crtc, front, back, - target_msc, divisor, remainder, - func, data)) - return TRUE; + ¤t_msc); if (can_flip(sna, draw, front, back, crtc) && sna_dri2_schedule_flip(client, draw, crtc, front, back, - target_msc, divisor, remainder, + immediate, target_msc, current_msc, func, data)) return TRUE; - VG_CLEAR(vbl); - - info = sna_dri2_add_event(sna, draw, client); + info = sna_dri2_add_event(sna, draw, client, crtc); if (!info) goto blit; assert(info->crtc == crtc); info->event_complete = func; info->event_data = data; + assert(info->draw); + info->signal = true; + assert(front != back); info->front = sna_dri2_reference_buffer(front); info->back = sna_dri2_reference_buffer(back); - if (immediate_swap(sna, *target_msc, divisor, draw, crtc, ¤t_msc)) { + if (immediate) { bool sync = current_msc < *target_msc; - if (!sna_dri2_immediate_blit(sna, info, sync, true)) - sna_dri2_event_free(info); + sna_dri2_immediate_blit(sna, info, sync); *target_msc = current_msc + sync; + DBG(("%s: reported target_msc=%llu\n", + __FUNCTION__, *target_msc)); return TRUE; } - vbl.request.type = - DRM_VBLANK_ABSOLUTE | - DRM_VBLANK_EVENT; - vbl.request.signal = (uintptr_t)info; - - /* - * If divisor is zero, or current_msc is smaller than target_msc - * we just need to make sure target_msc passes before initiating - * the swap. - */ info->type = SWAP; - info->queued = true; - if (divisor && current_msc >= *target_msc) { - DBG(("%s: missed target, queueing event for next: current=%lld, target=%lld, divisor=%lld, remainder=%lld\n", - __FUNCTION__, - (long long)current_msc, - (long long)*target_msc, - (long long)divisor, - (long long)remainder)); - - *target_msc = current_msc + remainder - current_msc % divisor; - if (*target_msc <= current_msc) - *target_msc += divisor; - } - vbl.request.sequence = draw_target_seq(draw, *target_msc - 1); if (*target_msc <= current_msc + 1) { DBG(("%s: performing blit before queueing\n", __FUNCTION__)); - assert(info->queued); - info->bo = __sna_dri2_copy_region(sna, draw, NULL, - back, front, - true); - info->type = SWAP_WAIT; - - vbl.request.type = - DRM_VBLANK_RELATIVE | - DRM_VBLANK_EVENT; - vbl.request.sequence = 1; + __sna_dri2_copy_event(info, DRI2_SYNC); + info->type = SWAP_COMPLETE; + if (!sna_next_vblank(info)) + goto fake; + + DBG(("%s: reported target_msc=%llu\n", + __FUNCTION__, *target_msc)); *target_msc = current_msc + 1; - } + swap_limit(draw, 2); + } else { + if (!sna_wait_vblank(info, + draw_target_seq(draw, *target_msc - 1))) + goto blit; - assert(info->queued); - if (sna_wait_vblank(sna, &vbl, info->pipe)) - goto blit; + DBG(("%s: reported target_msc=%llu (in)\n", + __FUNCTION__, *target_msc)); + swap_limit(draw, 1); + } - DBG(("%s: reported target_msc=%llu\n", __FUNCTION__, *target_msc)); - swap_limit(draw, 1 + (info->type == SWAP_WAIT)); return TRUE; blit: DBG(("%s -- blit\n", __FUNCTION__)); - if (info) - sna_dri2_event_free(info); if (can_xchg(sna, draw, front, back)) { sna_dri2_xchg(draw, front, back); } else { - __sna_dri2_copy_region(sna, draw, NULL, back, front, false); + __sna_dri2_copy_region(sna, draw, NULL, back, front, 0); + front->flags = back->flags; type = DRI2_BLIT_COMPLETE; } + if (draw->type == DRAWABLE_PIXMAP) + goto fake; skip: DBG(("%s: unable to show frame, unblocking client\n", __FUNCTION__)); - if (crtc == NULL) - crtc = sna_mode_first_crtc(sna); - fake_swap_complete(sna, client, draw, crtc, type, func, data); - *target_msc = 0; /* offscreen, so zero out target vblank count */ + if (crtc == NULL && (sna->flags & SNA_NO_WAIT) == 0) + crtc = sna_primary_crtc(sna); + if (crtc && sna_crtc_is_on(crtc)) { + if (info == NULL) + info = sna_dri2_add_event(sna, draw, client, crtc); + if (info != dri2_chain(draw)) + goto fake; + + assert(info->crtc == crtc); + + info->type = SWAP_COMPLETE; + info->event_complete = func; + info->event_data = data; + assert(info->draw); + info->signal = true; + + if (info->front == NULL) + info->front = sna_dri2_reference_buffer(front); + if (info->back == NULL) + info->back = sna_dri2_reference_buffer(back); + + if (!sna_next_vblank(info)) + goto fake; + + swap_limit(draw, 1); + } else { +fake: + /* XXX Use a Timer to throttle the client? */ + fake_swap_complete(sna, client, draw, crtc, type, func, data); + if (info) { + assert(info->draw); + info->signal = false; + sna_dri2_event_free(info); + } + } + DBG(("%s: reported target_msc=%llu (in)\n", __FUNCTION__, *target_msc)); return TRUE; } @@ -3030,27 +3497,25 @@ sna_dri2_get_msc(DrawablePtr draw, CARD64 *ust, CARD64 *msc) struct sna *sna = to_sna_from_drawable(draw); xf86CrtcPtr crtc = sna_dri2_get_crtc(draw); const struct ust_msc *swap; + union drm_wait_vblank vbl; DBG(("%s(draw=%ld, pipe=%d)\n", __FUNCTION__, draw->id, - crtc ? sna_crtc_to_pipe(crtc) : -1)); + crtc ? sna_crtc_pipe(crtc) : -1)); - if (crtc != NULL) { - union drm_wait_vblank vbl; + /* Drawable not displayed, make up a *monotonic* value */ + if (crtc == NULL) + crtc = sna_primary_crtc(sna); + if (crtc == NULL) + return FALSE; - VG_CLEAR(vbl); - vbl.request.type = _DRM_VBLANK_RELATIVE; - vbl.request.sequence = 0; - if (sna_wait_vblank(sna, &vbl, sna_crtc_to_pipe(crtc)) == 0) - sna_crtc_record_vblank(crtc, &vbl); - } else - /* Drawable not displayed, make up a *monotonic* value */ - crtc = sna_mode_first_crtc(sna); + if (sna_query_vblank(sna, crtc, &vbl) == 0) + sna_crtc_record_vblank(crtc, &vbl); swap = sna_crtc_last_swap(crtc); *msc = draw_current_msc(draw, crtc, swap->msc); *ust = ust64(swap->tv_sec, swap->tv_usec); - DBG(("%s: msc=%llu, ust=%llu\n", __FUNCTION__, - (long long)*msc, (long long)*ust)); + DBG(("%s: msc=%llu [raw=%llu], ust=%llu\n", __FUNCTION__, + (long long)*msc, swap->msc, (long long)*ust)); return TRUE; } @@ -3068,32 +3533,22 @@ sna_dri2_schedule_wait_msc(ClientPtr client, DrawablePtr draw, CARD64 target_msc struct sna_dri2_event *info = NULL; xf86CrtcPtr crtc; CARD64 current_msc; - union drm_wait_vblank vbl; const struct ust_msc *swap; - int pipe; crtc = sna_dri2_get_crtc(draw); DBG(("%s(pipe=%d, target_msc=%llu, divisor=%llu, rem=%llu)\n", - __FUNCTION__, crtc ? sna_crtc_to_pipe(crtc) : -1, + __FUNCTION__, crtc ? sna_crtc_pipe(crtc) : -1, (long long)target_msc, (long long)divisor, (long long)remainder)); /* Drawable not visible, return immediately */ if (crtc == NULL) - goto out_complete; - - pipe = sna_crtc_to_pipe(crtc); - - VG_CLEAR(vbl); - - /* Get current count */ - vbl.request.type = _DRM_VBLANK_RELATIVE; - vbl.request.sequence = 0; - if (sna_wait_vblank(sna, &vbl, pipe)) - goto out_complete; + crtc = sna_primary_crtc(sna); + if (crtc == NULL) + return FALSE; - current_msc = draw_current_msc(draw, crtc, sna_crtc_record_vblank(crtc, &vbl)); + current_msc = get_current_msc(sna, draw, crtc); /* If target_msc already reached or passed, set it to * current_msc to ensure we return a reasonable value back @@ -3104,15 +3559,13 @@ sna_dri2_schedule_wait_msc(ClientPtr client, DrawablePtr draw, CARD64 target_msc if (divisor == 0 && current_msc >= target_msc) goto out_complete; - info = sna_dri2_add_event(sna, draw, client); + info = sna_dri2_add_event(sna, draw, client, crtc); if (!info) goto out_complete; assert(info->crtc == crtc); info->type = WAITMSC; - vbl.request.signal = (uintptr_t)info; - vbl.request.type = DRM_VBLANK_ABSOLUTE | DRM_VBLANK_EVENT; /* * If divisor is zero, or current_msc is smaller than target_msc, * we just need to make sure target_msc passes before waking up the @@ -3129,10 +3582,8 @@ sna_dri2_schedule_wait_msc(ClientPtr client, DrawablePtr draw, CARD64 target_msc if (target_msc <= current_msc) target_msc += divisor; } - vbl.request.sequence = draw_target_seq(draw, target_msc); - info->queued = true; - if (sna_wait_vblank(sna, &vbl, pipe)) + if (!sna_wait_vblank(info, draw_target_seq(draw, target_msc))) goto out_free_info; DRI2BlockClient(client, draw); @@ -3141,8 +3592,6 @@ sna_dri2_schedule_wait_msc(ClientPtr client, DrawablePtr draw, CARD64 target_msc out_free_info: sna_dri2_event_free(info); out_complete: - if (crtc == NULL) - crtc = sna_mode_first_crtc(sna); swap = sna_crtc_last_swap(crtc); DRI2WaitMSCComplete(client, draw, draw_current_msc(draw, crtc, swap->msc), @@ -3231,9 +3680,18 @@ static bool is_level(const char **str) return false; } +static const char *options_get_dri(struct sna *sna) +{ +#if XORG_VERSION_CURRENT >= XORG_VERSION_NUMERIC(1,7,99,901,0) + return xf86GetOptValString(sna->Options, OPTION_DRI); +#else + return NULL; +#endif +} + static const char *dri_driver_name(struct sna *sna) { - const char *s = xf86GetOptValString(sna->Options, OPTION_DRI); + const char *s = options_get_dri(sna); if (is_level(&s)) { if (sna->kgem.gen < 030) @@ -3259,7 +3717,7 @@ bool sna_dri2_open(struct sna *sna, ScreenPtr screen) if (wedged(sna)) { xf86DrvMsg(sna->scrn->scrnIndex, X_WARNING, - "loading DRI2 whilst the GPU is wedged.\n"); + "loading DRI2 whilst acceleration is disabled.\n"); } if (xf86LoaderCheckSymbol("DRI2Version")) @@ -3274,7 +3732,7 @@ bool sna_dri2_open(struct sna *sna, ScreenPtr screen) memset(&info, '\0', sizeof(info)); info.fd = sna->kgem.fd; info.driverName = dri_driver_name(sna); - info.deviceName = intel_get_client_name(sna->dev); + info.deviceName = intel_get_master_name(sna->dev); DBG(("%s: loading dri driver '%s' [gen=%d] for device '%s'\n", __FUNCTION__, info.driverName, sna->kgem.gen, info.deviceName)); @@ -3299,11 +3757,12 @@ bool sna_dri2_open(struct sna *sna, ScreenPtr screen) info.numDrivers = 2; info.driverNames = driverNames; driverNames[0] = info.driverName; - driverNames[1] = info.driverName; + driverNames[1] = "va_gl"; #endif #if DRI2INFOREC_VERSION >= 6 if (xorg_can_triple_buffer()) { + DBG(("%s: enabling Xorg triple buffering\n", __FUNCTION__)); info.version = 6; info.SwapLimitValidate = sna_dri2_swap_limit_validate; info.ReuseBufferNotify = sna_dri2_reuse_buffer; @@ -3311,8 +3770,10 @@ bool sna_dri2_open(struct sna *sna, ScreenPtr screen) #endif #if USE_ASYNC_SWAP + DBG(("%s: enabled async swap and buffer age\n", __FUNCTION__)); info.version = 10; info.scheduleSwap0 = 1; + info.bufferAge = 1; #endif return DRI2ScreenInit(screen, &info); diff --git a/src/sna/sna_dri3.c b/src/sna/sna_dri3.c index f586e242..ce4970ae 100644 --- a/src/sna/sna_dri3.c +++ b/src/sna/sna_dri3.c @@ -55,11 +55,14 @@ static inline void mark_dri3_pixmap(struct sna *sna, struct sna_pixmap *priv, st if (bo->exec) sna->kgem.flush = 1; if (bo == priv->gpu_bo) - priv->flush |= 3; + priv->flush |= FLUSH_READ | FLUSH_WRITE; else priv->shm = true; - sna_accel_watch_flush(sna, 1); + sna_watch_flush(sna, 1); + + kgem_bo_submit(&sna->kgem, bo); + kgem_bo_unclean(&sna->kgem, bo); } static void sna_sync_flush(struct sna *sna, struct sna_pixmap *priv) @@ -270,6 +273,8 @@ static PixmapPtr sna_dri3_pixmap_from_fd(ScreenPtr screen, priv->ptr = MAKE_STATIC_PTR(pixmap->devPrivate.ptr); } else { assert(priv->gpu_bo == bo); + priv->create = kgem_can_create_2d(&sna->kgem, + width, height, depth); priv->pinned |= PIN_DRI3; } list_add(&priv->cow_list, &sna->dri3.pixmaps); @@ -325,6 +330,15 @@ static int sna_dri3_fd_from_pixmap(ScreenPtr screen, return -1; } + if (bo->tiling && !sna->kgem.can_fence) { + if (!sna_pixmap_change_tiling(pixmap, I915_TILING_NONE)) { + DBG(("%s: unable to discard GPU tiling (%d) for DRI3 protocol\n", + __FUNCTION__, bo->tiling)); + return -1; + } + bo = priv->gpu_bo; + } + fd = kgem_bo_export_to_prime(&sna->kgem, bo); if (fd == -1) { DBG(("%s: exporting handle=%d to fd failed\n", __FUNCTION__, bo->handle)); diff --git a/src/sna/sna_driver.c b/src/sna/sna_driver.c index 8a3599c7..1b4015de 100644 --- a/src/sna/sna_driver.c +++ b/src/sna/sna_driver.c @@ -57,6 +57,13 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. #include #include +#if defined(HAVE_X11_EXTENSIONS_DPMSCONST_H) +#include +#else +#define DPMSModeOn 0 +#define DPMSModeOff 3 +#endif + #include #include #include @@ -69,6 +76,8 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. #if HAVE_DOT_GIT #include "git_version.h" +#else +#define git_version "not compiled from git" #endif #ifdef TEARFREE @@ -185,12 +194,12 @@ sna_set_fallback_mode(ScrnInfoPtr scrn) xf86DisableUnusedFunctions(scrn); #ifdef RANDR_12_INTERFACE - if (get_root_window(scrn->pScreen)) - xf86RandR12TellChanged(scrn->pScreen); + if (get_root_window(xf86ScrnToScreen(scrn))) + xf86RandR12TellChanged(xf86ScrnToScreen(scrn)); #endif } -static Bool sna_set_desired_mode(struct sna *sna) +static void sna_set_desired_mode(struct sna *sna) { ScrnInfoPtr scrn = sna->scrn; @@ -203,7 +212,6 @@ static Bool sna_set_desired_mode(struct sna *sna) } sna_mode_check(sna); - return TRUE; } /** @@ -222,7 +230,7 @@ static Bool sna_create_screen_resources(ScreenPtr screen) screen->width, screen->height, screen->rootDepth)); assert(sna->scrn == xf86ScreenToScrn(screen)); - assert(sna->scrn->pScreen == screen); + assert(to_screen_from_sna(sna) == screen); /* free the data used during miInitScreen */ free(screen->devPrivate); @@ -273,33 +281,89 @@ static Bool sna_create_screen_resources(ScreenPtr screen) if (serverGeneration == 1 && (sna->flags & SNA_IS_HOSTED) == 0) sna_copy_fbcon(sna); - (void)sna_set_desired_mode(sna); + sna_set_desired_mode(sna); } return TRUE; } -static Bool sna_save_screen(ScreenPtr screen, int mode) +static void sna_dpms_set(ScrnInfoPtr scrn, int mode, int flags) { - ScrnInfoPtr scrn = xf86ScreenToScrn(screen); + xf86CrtcConfigPtr config = XF86_CRTC_CONFIG_PTR(scrn); + struct sna *sna = to_sna(scrn); + bool changed = false; + int i; - DBG(("%s(mode=%d)\n", __FUNCTION__, mode)); + DBG(("%s(mode=%d, flags=%d), vtSema=%d => off?=%d\n", + __FUNCTION__, mode, flags, scrn->vtSema, mode!=DPMSModeOn)); if (!scrn->vtSema) - return FALSE; + return; - xf86SaveScreen(screen, mode); - sna_crtc_config_notify(screen); - return TRUE; + /* Opencoded version of xf86DPMSSet(). + * + * The principle difference is to skip calling crtc->dpms() when + * turning off the display. This (on recent enough kernels at + * least) should be equivalent in power consumption, but require + * less work (hence quicker and less likely to fail) when switching + * back on. + */ + if (mode != DPMSModeOn) { + if (sna->mode.hidden == 0 && !(sna->flags & SNA_NO_DPMS)) { + DBG(("%s: hiding %d outputs\n", + __FUNCTION__, config->num_output)); + for (i = 0; i < config->num_output; i++) { + xf86OutputPtr output = config->output[i]; + if (output->crtc != NULL) + output->funcs->dpms(output, mode); + } + sna->mode.hidden = sna->mode.front_active + 1; + sna->mode.front_active = 0; + changed = true; + } + } else { + /* Re-enable CRTC that have been forced off via other means */ + if (sna->mode.hidden != 0) { + DBG(("%s: unhiding %d crtc, %d outputs\n", + __FUNCTION__, config->num_crtc, config->num_output)); + sna->mode.front_active = sna->mode.hidden - 1; + sna->mode.hidden = 0; + for (i = 0; i < config->num_crtc; i++) { + xf86CrtcPtr crtc = config->crtc[i]; + if (crtc->enabled) + crtc->funcs->dpms(crtc, mode); + } + + for (i = 0; i < config->num_output; i++) { + xf86OutputPtr output = config->output[i]; + if (output->crtc != NULL) + output->funcs->dpms(output, mode); + } + changed = true; + } + } + + DBG(("%s: hiding outputs? %d, front active? %d, changed? %d\n", + __FUNCTION__, sna->mode.hidden, sna->mode.front_active, changed)); + + if (changed) + sna_crtc_config_notify(xf86ScrnToScreen(scrn)); } -static void sna_dpms_set(ScrnInfoPtr scrn, int mode, int flags) +static Bool sna_save_screen(ScreenPtr screen, int mode) { - DBG(("%s(mode=%d, flags=%d)\n", __FUNCTION__, mode)); - if (!scrn->vtSema) - return; + ScrnInfoPtr scrn = xf86ScreenToScrn(screen); + + DBG(("%s(mode=%d [unblank=%d])\n", + __FUNCTION__, mode, xf86IsUnblank(mode))); - xf86DPMSSet(scrn, mode, flags); - sna_crtc_config_notify(xf86ScrnToScreen(scrn)); + /* We have to unroll xf86SaveScreen() here as it is called + * by DPMSSet() nullifying our special handling crtc->dpms() + * in sna_dpms_set(). + */ + sna_dpms_set(scrn, + xf86IsUnblank(mode) ? DPMSModeOn : DPMSModeOff, + 0); + return TRUE; } static void sna_selftest(void) @@ -330,107 +394,6 @@ static void sna_setup_capabilities(ScrnInfoPtr scrn, int fd) #endif } -static int -namecmp(const char *s1, const char *s2) -{ - char c1, c2; - - if (!s1 || *s1 == 0) { - if (!s2 || *s2 == 0) - return 0; - else - return 1; - } - - while (*s1 == '_' || *s1 == ' ' || *s1 == '\t') - s1++; - - while (*s2 == '_' || *s2 == ' ' || *s2 == '\t') - s2++; - - c1 = isupper(*s1) ? tolower(*s1) : *s1; - c2 = isupper(*s2) ? tolower(*s2) : *s2; - while (c1 == c2) { - if (c1 == '\0') - return 0; - - s1++; - while (*s1 == '_' || *s1 == ' ' || *s1 == '\t') - s1++; - - s2++; - while (*s2 == '_' || *s2 == ' ' || *s2 == '\t') - s2++; - - c1 = isupper(*s1) ? tolower(*s1) : *s1; - c2 = isupper(*s2) ? tolower(*s2) : *s2; - } - - return c1 - c2; -} - -static Bool sna_option_cast_to_bool(struct sna *sna, int id, Bool val) -{ - const char *str = xf86GetOptValString(sna->Options, id); - - if (str == NULL) - return val; - - if (*str == '\0') - return TRUE; - - if (namecmp(str, "1") == 0) - return TRUE; - if (namecmp(str, "on") == 0) - return TRUE; - if (namecmp(str, "true") == 0) - return TRUE; - if (namecmp(str, "yes") == 0) - return TRUE; - - if (namecmp(str, "0") == 0) - return FALSE; - if (namecmp(str, "off") == 0) - return FALSE; - if (namecmp(str, "false") == 0) - return FALSE; - if (namecmp(str, "no") == 0) - return FALSE; - - return val; -} - -static unsigned sna_option_cast_to_unsigned(struct sna *sna, int id, unsigned val) -{ - const char *str = xf86GetOptValString(sna->Options, id); - unsigned v; - - if (str == NULL || *str == '\0') - return val; - - if (namecmp(str, "on") == 0) - return val; - if (namecmp(str, "true") == 0) - return val; - if (namecmp(str, "yes") == 0) - return val; - - if (namecmp(str, "0") == 0) - return 0; - if (namecmp(str, "off") == 0) - return 0; - if (namecmp(str, "false") == 0) - return 0; - if (namecmp(str, "no") == 0) - return 0; - - v = atoi(str); - if (v) - return v; - - return val; -} - static Bool fb_supports_depth(int fd, int depth) { struct drm_i915_gem_create create; @@ -470,16 +433,24 @@ static void setup_dri(struct sna *sna) unsigned level; sna->dri2.available = false; + sna->dri2.enable = false; sna->dri3.available = false; + sna->dri3.enable = false; + sna->dri3.override = false; - level = sna_option_cast_to_unsigned(sna, OPTION_DRI, ~0); + level = intel_option_cast_to_unsigned(sna->Options, OPTION_DRI, DEFAULT_DRI_LEVEL); #if HAVE_DRI3 - if (level >= 3) - sna->dri3.available = !!xf86LoadSubModule(sna->scrn, "dri3"); + sna->dri3.available = !!xf86LoadSubModule(sna->scrn, "dri3"); + sna->dri3.override = + !sna->dri3.available || + xf86IsOptionSet(sna->Options, OPTION_DRI); + if (level >= 3 && sna->kgem.gen >= 040) + sna->dri3.enable = sna->dri3.available; #endif #if HAVE_DRI2 + sna->dri2.available = !!xf86LoadSubModule(sna->scrn, "dri2"); if (level >= 2) - sna->dri2.available = !!xf86LoadSubModule(sna->scrn, "dri2"); + sna->dri2.enable = sna->dri2.available; #endif } @@ -498,13 +469,13 @@ static bool enable_tear_free(struct sna *sna) return ENABLE_TEAR_FREE; } -static void setup_tear_free(struct sna *sna) +static bool setup_tear_free(struct sna *sna) { MessageType from; Bool enable; if (sna->flags & SNA_LINEAR_FB) - return; + return false; if ((sna->flags & SNA_HAS_FLIP) == 0) { from = X_PROBED; @@ -518,11 +489,12 @@ static void setup_tear_free(struct sna *sna) from = X_CONFIG; if (enable) - sna->flags |= SNA_TEAR_FREE; + sna->flags |= SNA_WANT_TEAR_FREE | SNA_TEAR_FREE; done: xf86DrvMsg(sna->scrn->scrnIndex, from, "TearFree %sabled\n", sna->flags & SNA_TEAR_FREE ? "en" : "dis"); + return sna->flags & SNA_TEAR_FREE; } /** @@ -612,8 +584,10 @@ static Bool sna_pre_init(ScrnInfoPtr scrn, int probe) } intel_detect_chipset(scrn, sna->dev); - xf86DrvMsg(scrn->scrnIndex, X_PROBED, "CPU: %s\n", - sna_cpu_features_to_string(sna->cpu_features, buf)); + xf86DrvMsg(scrn->scrnIndex, X_PROBED, + "CPU: %s; using a maximum of %d threads\n", + sna_cpu_features_to_string(sna->cpu_features, buf), + sna_use_threads(64*1024, 64*1024, 1)); if (!xf86SetDepthBpp(scrn, 24, 0, 0, Support32bppFb | @@ -651,18 +625,11 @@ static Bool sna_pre_init(ScrnInfoPtr scrn, int probe) kgem_init(&sna->kgem, fd, xf86GetPciInfoForEntity(pEnt->index), sna->info->gen); - if (xf86ReturnOptValBool(sna->Options, OPTION_ACCEL_DISABLE, FALSE) || - !sna_option_cast_to_bool(sna, OPTION_ACCEL_METHOD, TRUE)) { - xf86DrvMsg(sna->scrn->scrnIndex, X_CONFIG, - "Disabling hardware acceleration.\n"); - sna->kgem.wedged = true; - } if (xf86ReturnOptValBool(sna->Options, OPTION_TILING_FB, FALSE)) sna->flags |= SNA_LINEAR_FB; - - if (xf86ReturnOptValBool(sna->Options, OPTION_DELETE_DP12, FALSE)) - sna->flags |= SNA_REMOVE_OUTPUTS; + if (!sna->kgem.can_fence) + sna->flags |= SNA_LINEAR_FB; if (!xf86ReturnOptValBool(sna->Options, OPTION_SWAPBUFFERS_WAIT, TRUE)) sna->flags |= SNA_NO_WAIT; @@ -695,7 +662,8 @@ static Bool sna_pre_init(ScrnInfoPtr scrn, int probe) } scrn->currentMode = scrn->modes; - setup_tear_free(sna); + if (!setup_tear_free(sna) && sna_mode_wants_tear_free(sna)) + sna->kgem.needs_dirtyfb = sna->kgem.has_dirtyfb; xf86SetGamma(scrn, zeros); xf86SetDpi(scrn, 0, 0); @@ -721,11 +689,13 @@ cleanup: return FALSE; } +#if !HAVE_NOTIFY_FD static bool has_shadow(struct sna *sna) { - if (!sna->mode.shadow_damage) + if (!sna->mode.shadow_enabled) return false; + assert(sna->mode.shadow_damage); if (RegionNil(DamageRegion(sna->mode.shadow_damage))) return false; @@ -748,7 +718,7 @@ sna_block_handler(BLOCKHANDLER_ARGS_DECL) sna->BlockHandler(BLOCKHANDLER_ARGS); if (*tv == NULL || ((*tv)->tv_usec | (*tv)->tv_sec) || has_shadow(sna)) - sna_accel_block_handler(sna, tv); + sna_accel_block(sna, tv); } static void @@ -770,52 +740,102 @@ sna_wakeup_handler(WAKEUPHANDLER_ARGS_DECL) sna->WakeupHandler(WAKEUPHANDLER_ARGS); - sna_accel_wakeup_handler(sna); - if (FD_ISSET(sna->kgem.fd, (fd_set*)read_mask)) { sna_mode_wakeup(sna); /* Clear the flag so that subsequent ZaphodHeads don't block */ FD_CLR(sna->kgem.fd, (fd_set*)read_mask); } } +#else +static void +sna_block_handler(void *data, void *_timeout) +{ + struct sna *sna = data; + int *timeout = _timeout; + struct timeval tv, *tvp; + + DBG(("%s (timeout=%d)\n", __FUNCTION__, *timeout)); + if (*timeout == 0) + return; + + if (*timeout < 0) { + tvp = NULL; + } else { + tv.tv_sec = *timeout / 1000; + tv.tv_usec = (*timeout % 1000) * 1000; + tvp = &tv; + } + + sna_accel_block(sna, &tvp); + if (tvp) + *timeout = tvp->tv_sec * 1000 + tvp->tv_usec / 1000; +} +#endif #if HAVE_UDEV +#include + static void sna_handle_uevents(int fd, void *closure) { struct sna *sna = closure; - struct udev_device *dev; - const char *str; struct stat s; - dev_t udev_devnum; + struct pollfd pfd; + bool hotplug = false; DBG(("%s\n", __FUNCTION__)); - dev = udev_monitor_receive_device(sna->uevent_monitor); - if (!dev) - return; + pfd.fd = udev_monitor_get_fd(sna->uevent_monitor); + pfd.events = POLLIN; + + if (fstat(sna->kgem.fd, &s)) + memset(&s, 0, sizeof(s)); + + while (poll(&pfd, 1, 0) > 0) { + struct udev_device *dev; + dev_t devnum; + + dev = udev_monitor_receive_device(sna->uevent_monitor); + if (dev == NULL) + break; + + devnum = udev_device_get_devnum(dev); + if (memcmp(&s.st_rdev, &devnum, sizeof(dev_t)) == 0) { + const char *str; + + str = udev_device_get_property_value(dev, "HOTPLUG"); + if (str && atoi(str) == 1) { + str = udev_device_get_property_value(dev, "CONNECTOR"); + if (str) { + hotplug |= sna_mode_find_hotplug_connector(sna, atoi(str)); + } else { + sna->flags |= SNA_REPROBE; + hotplug = true; + } + } + } - udev_devnum = udev_device_get_devnum(dev); - if (fstat(sna->kgem.fd, &s) || memcmp(&s.st_rdev, &udev_devnum, sizeof (dev_t))) { udev_device_unref(dev); - return; } - str = udev_device_get_property_value(dev, "HOTPLUG"); - if (str && atoi(str) == 1) { - ScrnInfoPtr scrn = sna->scrn; - - DBG(("%s: hotplug event (vtSema?=%d)\n", __FUNCTION__, scrn->vtSema)); + if (hotplug) { + DBG(("%s: hotplug event (vtSema?=%d)\n", + __FUNCTION__, sna->scrn->vtSema)); - if (scrn->vtSema) { - sna_mode_discover(sna); - sna_mode_check(sna); - RRGetInfo(xf86ScrnToScreen(scrn), TRUE); - } else + if (sna->scrn->vtSema) + sna_mode_discover(sna, true); + else sna->flags |= SNA_REPROBE; } +} - udev_device_unref(dev); +static bool has_randr(void) +{ +#if HAS_DIXREGISTERPRIVATEKEY + return dixPrivateKeyRegistered(rrPrivKey); +#else + return *rrPrivKey; +#endif } static void @@ -833,7 +853,7 @@ sna_uevent_init(struct sna *sna) /* RandR will be disabled if Xinerama is active, and so generating * RR hotplug events is then verboten. */ - if (!dixPrivateKeyRegistered(rrPrivKey)) + if (!has_randr()) goto out; u = NULL; @@ -861,7 +881,8 @@ sna_uevent_init(struct sna *sna) sna->uevent_monitor = mon; out: - xf86DrvMsg(sna->scrn->scrnIndex, from, "display hotplug detection %s\n", + xf86DrvMsg(sna->scrn->scrnIndex, from, + "Display hotplug detection %s\n", sna->uevent_monitor ? "enabled" : "disabled"); return; @@ -874,17 +895,10 @@ err_dev: static bool sna_uevent_poll(struct sna *sna) { - struct pollfd pfd; - if (sna->uevent_monitor == NULL) return false; - pfd.fd = udev_monitor_get_fd(sna->uevent_monitor); - pfd.events = POLLIN; - - while (poll(&pfd, 1, 0) > 0) - sna_handle_uevents(pfd.fd, sna); - + sna_handle_uevents(udev_monitor_get_fd(sna->uevent_monitor), sna); return true; } @@ -918,8 +932,10 @@ sna_randr_getinfo(ScreenPtr screen, Rotation *rotations) { struct sna *sna = to_sna_from_screen(screen); + DBG(("%s()\n", __FUNCTION__)); + if (!sna_uevent_poll(sna)) - sna_mode_discover(sna); + sna_mode_discover(sna, false); return sna->mode.rrGetInfo(screen, rotations); } @@ -931,8 +947,8 @@ static void sna_leave_vt(VT_FUNC_ARGS_DECL) DBG(("%s\n", __FUNCTION__)); - sna_accel_leave(sna); sna_mode_reset(sna); + sna_accel_leave(sna); if (intel_put_master(sna->dev)) xf86DrvMsg(scrn->scrnIndex, X_WARNING, @@ -948,6 +964,12 @@ static Bool sna_early_close_screen(CLOSE_SCREEN_ARGS_DECL) /* XXX Note that we will leak kernel resources if !vtSema */ +#if HAVE_NOTIFY_FD + RemoveBlockAndWakeupHandlers(sna_block_handler, + (ServerWakeupHandlerProcPtr)NoopDDA, + sna); +#endif + sna_uevent_fini(sna); sna_mode_close(sna); @@ -1047,12 +1069,13 @@ static void sna_dri_init(struct sna *sna, ScreenPtr screen) { char str[128] = ""; - if (sna->dri2.available) + if (sna->dri2.enable) sna->dri2.open = sna_dri2_open(sna, screen); if (sna->dri2.open) strcat(str, "DRI2 "); - if (sna->dri3.available) + /* Load DRI3 in case DRI2 doesn't work, e.g. vgaarb */ + if (sna->dri3.enable || (!sna->dri2.open && !sna->dri3.override)) sna->dri3.open = sna_dri3_open(sna, screen); if (sna->dri3.open) strcat(str, "DRI3 "); @@ -1098,7 +1121,8 @@ sna_screen_init(SCREEN_INIT_ARGS_DECL) DBG(("%s\n", __FUNCTION__)); assert(sna->scrn == scrn); - assert(scrn->pScreen == NULL); /* set afterwards */ + assert(to_screen_from_sna(sna) == NULL || /* set afterwards */ + to_screen_from_sna(sna) == screen); assert(sna->freed_pixmap == NULL); @@ -1166,11 +1190,17 @@ sna_screen_init(SCREEN_INIT_ARGS_DECL) * later memory should be bound when allocating, e.g rotate_mem */ scrn->vtSema = TRUE; +#if !HAVE_NOTIFY_FD sna->BlockHandler = screen->BlockHandler; screen->BlockHandler = sna_block_handler; sna->WakeupHandler = screen->WakeupHandler; screen->WakeupHandler = sna_wakeup_handler; +#else + RegisterBlockAndWakeupHandlers(sna_block_handler, + (ServerWakeupHandlerProcPtr)NoopDDA, + sna); +#endif screen->SaveScreen = sna_save_screen; screen->CreateScreenResources = sna_create_screen_resources; @@ -1190,6 +1220,8 @@ sna_screen_init(SCREEN_INIT_ARGS_DECL) CMAP_PALETTED_TRUECOLOR)) return FALSE; + if (!xf86CheckBoolOption(scrn->options, "dpms", TRUE)) + sna->flags |= SNA_NO_DPMS; xf86DPMSInit(screen, sna_dpms_set, 0); sna_uevent_init(sna); @@ -1244,20 +1276,15 @@ static Bool sna_enter_vt(VT_FUNC_ARGS_DECL) if (intel_get_master(sna->dev)) return FALSE; + sna_accel_enter(sna); + if (sna->flags & SNA_REPROBE) { - DBG(("%s: reporting deferred hotplug event\n", - __FUNCTION__)); - sna_mode_discover(sna); - RRGetInfo(xf86ScrnToScreen(scrn), TRUE); - sna->flags &= ~SNA_REPROBE; + DBG(("%s: reporting deferred hotplug event\n", __FUNCTION__)); + sna_mode_discover(sna, true); } - if (!sna_set_desired_mode(sna)) { - intel_put_master(sna->dev); - return FALSE; - } + sna_set_desired_mode(sna); - sna_accel_enter(sna); return TRUE; } @@ -1379,6 +1406,9 @@ static void describe_sna(ScrnInfoPtr scrn) xf86DrvMsg(scrn->scrnIndex, X_INFO, "SNA compiled: %s\n", BUILDER_DESCRIPTION); #endif +#if HAS_DEBUG_FULL + ErrorF("SNA compiled with full debug logging; expect to run slowly\n"); +#endif #if !NDEBUG xf86DrvMsg(scrn->scrnIndex, X_INFO, "SNA compiled with assertions enabled\n"); @@ -1400,6 +1430,7 @@ static void describe_sna(ScrnInfoPtr scrn) "SNA compiled for use with valgrind\n"); VALGRIND_PRINTF("SNA compiled for use with valgrind\n"); #endif + DBG(("xf86-video-intel version: %s\n", git_version)); DBG(("pixman version: %s\n", pixman_version_string())); } diff --git a/src/sna/sna_glyphs.c b/src/sna/sna_glyphs.c index a5dfb06b..6ee40336 100644 --- a/src/sna/sna_glyphs.c +++ b/src/sna/sna_glyphs.c @@ -74,7 +74,7 @@ #define NO_GLYPHS_VIA_MASK 0 #define FORCE_SMALL_MASK 0 /* -1 = never, 1 = always */ #define NO_GLYPHS_SLOW 0 -#define NO_DISCARD_MASK 0 +#define DISCARD_MASK 0 /* -1 = never, 1 = always */ #define CACHE_PICTURE_SIZE 1024 #define GLYPH_MIN_SIZE 8 @@ -185,7 +185,7 @@ void sna_glyphs_close(struct sna *sna) */ bool sna_glyphs_create(struct sna *sna) { - ScreenPtr screen = sna->scrn->pScreen; + ScreenPtr screen = to_screen_from_sna(sna); pixman_color_t white = { 0xffff, 0xffff, 0xffff, 0xffff }; unsigned int formats[] = { PIXMAN_a8, @@ -1094,6 +1094,9 @@ sna_glyph_get_image(GlyphPtr g, ScreenPtr s) static inline bool use_small_mask(struct sna *sna, int16_t width, int16_t height, int depth) { + if (depth < 8) + return true; + if (FORCE_SMALL_MASK) return FORCE_SMALL_MASK > 0; @@ -1156,12 +1159,6 @@ glyphs_via_mask(struct sna *sna, src_x += box.x1 - list->xOff; src_y += box.y1 - list->yOff; - if (format->depth < 8) { - format = PictureMatchFormat(screen, 8, PICT_a8); - if (!format) - return false; - } - component_alpha = NeedsComponent(format->format); if (use_small_mask(sna, width, height, format->depth)) { pixman_image_t *mask_image; @@ -1179,7 +1176,7 @@ use_small_mask: return false; mask_image = - pixman_image_create_bits(format->depth << 24 | format->format, + pixman_image_create_bits(pixmap->drawable.bitsPerPixel << 24 | format->format, width, height, pixmap->devPrivate.ptr, pixmap->devKind); @@ -1386,10 +1383,11 @@ next_image: DBG(("%s: atlas format=%08x, mask format=%08x\n", __FUNCTION__, (int)p->atlas->format, - (int)(format->depth << 24 | format->format))); + (int)mask->format)); memset(&tmp, 0, sizeof(tmp)); - if (p->atlas->format == (format->depth << 24 | format->format)) { + if (p->atlas->format == mask->format || + alphaless(p->atlas->format) == mask->format) { ok = sna->render.composite(sna, PictOpAdd, p->atlas, NULL, mask, 0, 0, 0, 0, 0, 0, @@ -1561,6 +1559,9 @@ skip_glyph: } } + assert(format); + DBG(("%s: format=%08d, depth=%d\n", + __FUNCTION__, format->format, format->depth)); out: if (list_extents != stack_extents) free(list_extents); @@ -1573,24 +1574,34 @@ static bool can_discard_mask(uint8_t op, PicturePtr src, PictFormatPtr mask, PictFormatPtr g; uint32_t color; - if (NO_DISCARD_MASK) - return false; + if (DISCARD_MASK) + return DISCARD_MASK > 0; DBG(("%s: nlist=%d, mask=%08x, depth %d, op=%d (bounded? %d)\n", __FUNCTION__, nlist, mask ? (unsigned)mask->format : 0, mask ? mask->depth : 0, op, op_is_bounded(op))); - if (nlist == 1 && list->len == 1) - return true; + if (nlist == 1 && list->len == 1) { + if (mask == list->format) + return true; + + g = list->format; + goto skip; + } - if (!op_is_bounded(op)) + if (!op_is_bounded(op)) { + DBG(("%s: unbounded op, not discarding\n", __FUNCTION__)); return false; + } /* No glyphs overlap and we are not performing a mask conversion. */ g = glyphs_format(nlist, list, glyphs); - if (mask == g) + if (mask == g) { + DBG(("%s: mask matches glyphs format, no conversion, so discard mask\n", + __FUNCTION__)); return true; + } DBG(("%s: preferred mask format %08x, depth %d\n", __FUNCTION__, g ? (unsigned)g->format : 0, g ? g->depth : 0)); @@ -1605,18 +1616,41 @@ static bool can_discard_mask(uint8_t op, PicturePtr src, PictFormatPtr mask, list++; } + + if (!sna_picture_is_solid(src, &color)) + return false; + + return color >> 24 == 0xff; } else { - if (PICT_FORMAT_A(mask->format) >= PICT_FORMAT_A(g->format)) +skip: + if (mask->format == g->format) return true; - if (g->depth != 1) - return false; - } + if (mask->format == alphaless(g->format)) + return true; + + if (PICT_FORMAT_TYPE(g->format) == PICT_TYPE_A && + PICT_FORMAT_TYPE(mask->format) != PICT_TYPE_A) + return true; - if (!sna_picture_is_solid(src, &color)) return false; + } +} - return color >> 24 == 0xff; +static uint32_t pixman_format(PictFormatPtr short_format) +{ + uint32_t bpp; + + bpp = short_format->depth; + if (bpp <= 1) + bpp = 1; + else if (bpp <= 8) + bpp = 8; + else if (bpp <= 16) + bpp = 16; + else + bpp = 32; + return bpp << 24 | short_format->format; } static void @@ -1756,7 +1790,7 @@ next: if (sigtrap_get() == 0) { if (mask_format) { pixman_composite_glyphs(op, src_image, dst_image, - mask_format->format | (mask_format->depth << 24), + pixman_format(mask_format), src_x + src_dx + region.extents.x1 - dst_x, src_y + src_dy + region.extents.y1 - dst_y, region.extents.x1, region.extents.y1, @@ -1815,10 +1849,10 @@ out: x, y, mask_format->depth, (long)mask_format->format, - (long)(mask_format->depth << 24 | mask_format->format), + (long)pixman_format(mask_format), NeedsComponent(mask_format->format))); mask_image = - pixman_image_create_bits(mask_format->depth << 24 | mask_format->format, + pixman_image_create_bits(pixman_format(mask_format), region.extents.x2 - region.extents.x1, region.extents.y2 - region.extents.y1, NULL, 0); @@ -2086,12 +2120,6 @@ glyphs_via_image(struct sna *sna, src_x += box.x1 - list->xOff; src_y += box.y1 - list->yOff; - if (format->depth < 8) { - format = PictureMatchFormat(screen, 8, PICT_a8); - if (!format) - return false; - } - DBG(("%s: small mask [format=%lx, depth=%d, size=%d], rendering glyphs to upload buffer\n", __FUNCTION__, (unsigned long)format->format, format->depth, (uint32_t)width*height*format->depth)); @@ -2104,7 +2132,7 @@ glyphs_via_image(struct sna *sna, return false; mask_image = - pixman_image_create_bits(format->depth << 24 | format->format, + pixman_image_create_bits(pixmap->drawable.bitsPerPixel << 24 | format->format, width, height, pixmap->devPrivate.ptr, pixmap->devKind); diff --git a/src/sna/sna_io.c b/src/sna/sna_io.c index d6aa1294..d32bd583 100644 --- a/src/sna/sna_io.c +++ b/src/sna/sna_io.c @@ -105,8 +105,10 @@ read_boxes_inplace__cpu(struct kgem *kgem, if (!download_inplace__cpu(kgem, dst, bo, box, n)) return false; + if (bo->tiling == I915_TILING_Y) + return false; + assert(kgem_bo_can_map__cpu(kgem, bo, false)); - assert(bo->tiling != I915_TILING_Y); src = kgem_bo_map__cpu(kgem, bo); if (src == NULL) @@ -281,6 +283,9 @@ fallback: if (box[n].y2 > extents.y2) extents.y2 = box[n].y2; } + if (!can_blt && sna->render.max_3d_size == 0) + goto fallback; + if (kgem_bo_can_map(kgem, src_bo)) { /* Is it worth detiling? */ if ((extents.y2 - extents.y1 - 1) * src_bo->pitch < 4096) @@ -477,6 +482,7 @@ fallback: goto fallback; _kgem_set_mode(kgem, KGEM_BLT); } + kgem_bcs_set_tiling(&sna->kgem, src_bo, NULL); tmp_nbox = nbox; tmp_box = box; @@ -539,6 +545,7 @@ fallback: break; _kgem_set_mode(kgem, KGEM_BLT); + kgem_bcs_set_tiling(&sna->kgem, src_bo, NULL); tmp_box += nbox_this_time; } while (1); } else { @@ -597,6 +604,7 @@ fallback: break; _kgem_set_mode(kgem, KGEM_BLT); + kgem_bcs_set_tiling(&sna->kgem, src_bo, NULL); tmp_box += nbox_this_time; } while (1); } @@ -666,8 +674,10 @@ write_boxes_inplace__tiled(struct kgem *kgem, { uint8_t *dst; + if (bo->tiling == I915_TILING_Y) + return false; + assert(kgem->has_wc_mmap || kgem_bo_can_map__cpu(kgem, bo, true)); - assert(bo->tiling != I915_TILING_Y); if (kgem_bo_can_map__cpu(kgem, bo, true)) { dst = kgem_bo_map__cpu(kgem, bo); @@ -778,6 +788,15 @@ static bool __upload_inplace(struct kgem *kgem, if (FORCE_INPLACE) return FORCE_INPLACE > 0; + if (bo->exec) + return false; + + if (bo->flush) + return true; + + if (kgem_bo_can_map__cpu(kgem, bo, true)) + return true; + /* If we are writing through the GTT, check first if we might be * able to almagamate a series of small writes into a single * operation. @@ -849,6 +868,8 @@ bool sna_write_boxes(struct sna *sna, PixmapPtr dst, if (box[n].y2 > extents.y2) extents.y2 = box[n].y2; } + if (!can_blt && sna->render.max_3d_size == 0) + goto fallback; /* Try to avoid switching rings... */ if (!can_blt || kgem->ring == KGEM_RENDER || @@ -1038,6 +1059,7 @@ tile: goto fallback; _kgem_set_mode(kgem, KGEM_BLT); } + kgem_bcs_set_tiling(&sna->kgem, NULL, dst_bo); if (kgem->gen >= 0100) { cmd |= 8; @@ -1129,6 +1151,7 @@ tile: if (nbox) { _kgem_submit(kgem); _kgem_set_mode(kgem, KGEM_BLT); + kgem_bcs_set_tiling(&sna->kgem, NULL, dst_bo); } kgem_bo_destroy(kgem, src_bo); @@ -1224,6 +1247,7 @@ tile: if (nbox) { _kgem_submit(kgem); _kgem_set_mode(kgem, KGEM_BLT); + kgem_bcs_set_tiling(&sna->kgem, NULL, dst_bo); } kgem_bo_destroy(kgem, src_bo); @@ -1541,6 +1565,7 @@ tile: goto fallback; _kgem_set_mode(kgem, KGEM_BLT); } + kgem_bcs_set_tiling(&sna->kgem, NULL, dst_bo); if (sna->kgem.gen >= 0100) { cmd |= 8; @@ -1636,6 +1661,7 @@ tile: if (nbox) { _kgem_submit(kgem); _kgem_set_mode(kgem, KGEM_BLT); + kgem_bcs_set_tiling(&sna->kgem, NULL, dst_bo); } kgem_bo_destroy(kgem, src_bo); @@ -1732,6 +1758,7 @@ tile: if (nbox) { _kgem_submit(kgem); _kgem_set_mode(kgem, KGEM_BLT); + kgem_bcs_set_tiling(&sna->kgem, NULL, dst_bo); } kgem_bo_destroy(kgem, src_bo); diff --git a/src/sna/sna_present.c b/src/sna/sna_present.c index 6dd6fe88..2796d972 100644 --- a/src/sna/sna_present.c +++ b/src/sna/sna_present.c @@ -27,6 +27,7 @@ #include #include #include +#include #include #include @@ -38,21 +39,73 @@ static present_screen_info_rec present_info; struct sna_present_event { - uint64_t event_id; xf86CrtcPtr crtc; + struct sna *sna; + struct list link; + uint64_t *event_id; + uint64_t target_msc; + int n_event_id; + bool queued; }; +static void sna_present_unflip(ScreenPtr screen, uint64_t event_id); +static bool sna_present_queue(struct sna_present_event *info, + uint64_t last_msc); + static inline struct sna_present_event * to_present_event(uintptr_t data) { return (struct sna_present_event *)(data & ~3); } +static struct sna_present_event *info_alloc(struct sna *sna) +{ + struct sna_present_event *info; + + info = sna->present.freed_info; + if (info) { + sna->present.freed_info = NULL; + return info; + } + + return malloc(sizeof(struct sna_present_event) + sizeof(uint64_t)); +} + +static void info_free(struct sna_present_event *info) +{ + struct sna *sna = info->sna; + + if (sna->present.freed_info) + free(sna->present.freed_info); + + sna->present.freed_info = info; +} + +static inline bool msc_before(uint64_t msc, uint64_t target) +{ + return (int64_t)(msc - target) < 0; +} + #define MARK_PRESENT(x) ((void *)((uintptr_t)(x) | 2)) -static int pipe_from_crtc(RRCrtcPtr crtc) +static inline xf86CrtcPtr unmask_crtc(xf86CrtcPtr crtc) +{ + return (xf86CrtcPtr)((uintptr_t)crtc & ~1); +} + +static inline xf86CrtcPtr mark_crtc(xf86CrtcPtr crtc) +{ + return (xf86CrtcPtr)((uintptr_t)crtc | 1); +} + +static inline bool has_vblank(xf86CrtcPtr crtc) +{ + return (uintptr_t)crtc & 1; +} + +static inline int pipe_from_crtc(RRCrtcPtr crtc) { - return crtc ? sna_crtc_to_pipe(crtc->devPrivate) : -1; + return crtc ? sna_crtc_pipe(crtc->devPrivate) : -1; } static uint32_t pipe_select(int pipe) @@ -74,6 +127,215 @@ static inline int sna_wait_vblank(struct sna *sna, union drm_wait_vblank *vbl, i return drmIoctl(sna->kgem.fd, DRM_IOCTL_WAIT_VBLANK, vbl); } +static uint64_t gettime_ust64(void) +{ + struct timespec tv; + + if (clock_gettime(CLOCK_MONOTONIC, &tv)) + return GetTimeInMicros(); + + return ust64(tv.tv_sec, tv.tv_nsec / 1000); +} + +static void vblank_complete(struct sna_present_event *info, + uint64_t ust, uint64_t msc) +{ + int n; + + if (msc_before(msc, info->target_msc)) { + DBG(("%s: event=%d too early, now %lld, expected %lld\n", + __FUNCTION__, + info->event_id[0], + (long long)msc, (long long)info->target_msc)); + if (sna_present_queue(info, msc)) + return; + } + + DBG(("%s: %d events complete\n", __FUNCTION__, info->n_event_id)); + for (n = 0; n < info->n_event_id; n++) { + DBG(("%s: pipe=%d tv=%d.%06d msc=%lld (target=%lld), event=%lld complete%s\n", __FUNCTION__, + sna_crtc_pipe(info->crtc), + (int)(ust / 1000000), (int)(ust % 1000000), + (long long)msc, (long long)info->target_msc, + (long long)info->event_id[n], + info->target_msc && msc == (uint32_t)info->target_msc ? "" : ": MISS")); + present_event_notify(info->event_id[n], ust, msc); + } + if (info->n_event_id > 1) + free(info->event_id); + list_del(&info->link); + info_free(info); +} + +static uint32_t msc_to_delay(xf86CrtcPtr crtc, uint64_t target) +{ + const DisplayModeRec *mode = &crtc->desiredMode; + const struct ust_msc *swap = sna_crtc_last_swap(crtc); + int64_t delay, subframe; + + assert(mode->Clock); + + delay = target - swap->msc; + assert(delay >= 0); + if (delay > 1) { /* try to use the hw vblank for the last frame */ + delay--; + subframe = 0; + } else { + subframe = gettime_ust64() - swap_ust(swap); + subframe += 500; + subframe /= 1000; + } + delay *= mode->VTotal * mode->HTotal / mode->Clock; + if (subframe < delay) + delay -= subframe; + else + delay = 0; + + DBG(("%s: sleep %d frames, %llu ms\n", __FUNCTION__, + (int)(target - swap->msc), (long long)delay)); + assert(delay >= 0); + return MIN(delay, INT32_MAX); +} + +static CARD32 sna_fake_vblank_handler(OsTimerPtr timer, CARD32 now, void *data) +{ + struct sna_present_event *info = data; + union drm_wait_vblank vbl; + uint64_t msc, ust; + + DBG(("%s(event=%lldx%d, now=%d)\n", __FUNCTION__, (long long)info->event_id[0], info->n_event_id, now)); + + VG_CLEAR(vbl); + vbl.request.type = DRM_VBLANK_RELATIVE; + vbl.request.sequence = 0; + if (sna_wait_vblank(info->sna, &vbl, sna_crtc_pipe(info->crtc)) == 0) { + ust = ust64(vbl.reply.tval_sec, vbl.reply.tval_usec); + msc = sna_crtc_record_vblank(info->crtc, &vbl); + DBG(("%s: event=%lld, target msc=%lld, now %lld\n", + __FUNCTION__, (long long)info->event_id[0], (long long)info->target_msc, (long long)msc)); + if (msc_before(msc, info->target_msc)) { + int delta = info->target_msc - msc; + uint32_t delay; + + DBG(("%s: too early, requeuing delta=%d\n", __FUNCTION__, delta)); + assert(info->target_msc - msc < 1ull<<31); + if (delta <= 2) { + vbl.request.type = DRM_VBLANK_ABSOLUTE | DRM_VBLANK_EVENT; + vbl.request.sequence = info->target_msc; + vbl.request.signal = (uintptr_t)MARK_PRESENT(info); + if (sna_wait_vblank(info->sna, &vbl, sna_crtc_pipe(info->crtc)) == 0) { + DBG(("%s: scheduled new vblank event for %lld\n", __FUNCTION__, (long long)info->target_msc)); + info->queued = true; + if (delta == 1) { + sna_crtc_set_vblank(info->crtc); + info->crtc = mark_crtc(info->crtc); + } + free(timer); + return 0; + } + } + + delay = msc_to_delay(info->crtc, info->target_msc); + if (delay) { + DBG(("%s: requeueing timer for %dms delay\n", __FUNCTION__, delay)); + return delay; + } + + /* As a last resort use a blocking wait. + * Less than a millisecond for (hopefully) a rare case. + */ + DBG(("%s: blocking wait!\n", __FUNCTION__)); + vbl.request.type = DRM_VBLANK_ABSOLUTE; + vbl.request.sequence = info->target_msc; + if (sna_wait_vblank(info->sna, &vbl, sna_crtc_pipe(info->crtc)) == 0) { + ust = ust64(vbl.reply.tval_sec, vbl.reply.tval_usec); + msc = sna_crtc_record_vblank(info->crtc, &vbl); + } else { + DBG(("%s: blocking wait failed, fudging\n", + __FUNCTION__)); + goto fixup; + } + } + } else { +fixup: + ust = gettime_ust64(); + msc = info->target_msc; + DBG(("%s: event=%lld, CRTC OFF, target msc=%lld, was %lld (off)\n", + __FUNCTION__, (long long)info->event_id[0], (long long)info->target_msc, (long long)sna_crtc_last_swap(info->crtc)->msc)); + } + + vblank_complete(info, ust, msc); + free(timer); + return 0; +} + +static bool sna_fake_vblank(struct sna_present_event *info) +{ + const struct ust_msc *swap = sna_crtc_last_swap(info->crtc); + uint32_t delay; + + if (msc_before(swap->msc, info->target_msc)) + delay = msc_to_delay(info->crtc, info->target_msc); + else + delay = 0; + + DBG(("%s(event=%lldx%d, target_msc=%lld, msc=%lld, delay=%ums)\n", + __FUNCTION__, (long long)info->event_id[0], info->n_event_id, + (long long)info->target_msc, (long long)swap->msc, delay)); + if (delay == 0) { + uint64_t ust, msc; + + if (msc_before(swap->msc, info->target_msc)) { + /* Fixup and pretend it completed immediately */ + msc = info->target_msc; + ust = gettime_ust64(); + } else { + msc = swap->msc; + ust = swap_ust(swap); + } + + vblank_complete(info, ust, msc); + return true; + } + + return TimerSet(NULL, 0, delay, sna_fake_vblank_handler, info); +} + +static bool sna_present_queue(struct sna_present_event *info, + uint64_t last_msc) +{ + union drm_wait_vblank vbl; + int delta = info->target_msc - last_msc; + + DBG(("%s: target msc=%llu, seq=%u (last_msc=%llu), delta=%d\n", + __FUNCTION__, + (long long)info->target_msc, + (unsigned)info->target_msc, + (long long)last_msc, + delta)); + assert(info->target_msc - last_msc < 1ull<<31); + assert(delta >= 0); + + VG_CLEAR(vbl); + vbl.request.type = DRM_VBLANK_ABSOLUTE | DRM_VBLANK_EVENT; + vbl.request.sequence = info->target_msc; + vbl.request.signal = (uintptr_t)MARK_PRESENT(info); + if (delta > 2 || + sna_wait_vblank(info->sna, &vbl, sna_crtc_pipe(info->crtc))) { + DBG(("%s: vblank enqueue failed, faking delta=%d\n", __FUNCTION__, delta)); + if (!sna_fake_vblank(info)) + return false; + } else { + info->queued = true; + if (delta == 1) { + sna_crtc_set_vblank(info->crtc); + info->crtc = mark_crtc(info->crtc); + } + } + + return true; +} + static RRCrtcPtr sna_present_get_crtc(WindowPtr window) { @@ -81,7 +343,10 @@ sna_present_get_crtc(WindowPtr window) BoxRec box; xf86CrtcPtr crtc; - DBG(("%s\n", __FUNCTION__)); + DBG(("%s: window=%ld (pixmap=%ld), box=(%d, %d)x(%d, %d)\n", + __FUNCTION__, window->drawable.id, get_window_pixmap(window)->drawable.serialNumber, + window->drawable.x, window->drawable.y, + window->drawable.width, window->drawable.height)); box.x1 = window->drawable.x; box.y1 = window->drawable.y; @@ -99,26 +364,59 @@ static int sna_present_get_ust_msc(RRCrtcPtr crtc, CARD64 *ust, CARD64 *msc) { struct sna *sna = to_sna_from_screen(crtc->pScreen); - int pipe = pipe_from_crtc(crtc); union drm_wait_vblank vbl; - DBG(("%s(pipe=%d)\n", __FUNCTION__, pipe)); + DBG(("%s(pipe=%d)\n", __FUNCTION__, sna_crtc_pipe(crtc->devPrivate))); + if (sna_crtc_has_vblank(crtc->devPrivate)) { + DBG(("%s: vblank active, reusing last swap msc/ust\n", + __FUNCTION__)); + goto last; + } VG_CLEAR(vbl); vbl.request.type = DRM_VBLANK_RELATIVE; vbl.request.sequence = 0; - if (sna_wait_vblank(sna, &vbl, pipe) == 0) { + if (sna_wait_vblank(sna, &vbl, sna_crtc_pipe(crtc->devPrivate)) == 0) { + struct sna_present_event *info; + *ust = ust64(vbl.reply.tval_sec, vbl.reply.tval_usec); *msc = sna_crtc_record_vblank(crtc->devPrivate, &vbl); + + info = info_alloc(sna); + if (info) { + info->crtc = crtc->devPrivate; + info->sna = sna; + info->target_msc = *msc + 1; + info->event_id = (uint64_t *)(info + 1); + info->n_event_id = 0; + + vbl.request.type = + DRM_VBLANK_ABSOLUTE | DRM_VBLANK_EVENT; + vbl.request.sequence = info->target_msc; + vbl.request.signal = (uintptr_t)MARK_PRESENT(info); + + if (sna_wait_vblank(info->sna, &vbl, + sna_crtc_pipe(info->crtc)) == 0) { + list_add(&info->link, + &sna->present.vblank_queue); + info->queued = true; + sna_crtc_set_vblank(info->crtc); + info->crtc = mark_crtc(info->crtc); + } else + info_free(info); + } } else { - const struct ust_msc *swap = sna_crtc_last_swap(crtc->devPrivate); - *ust = ust64(swap->tv_sec, swap->tv_usec); + const struct ust_msc *swap; +last: + swap = sna_crtc_last_swap(crtc->devPrivate); + *ust = swap_ust(swap); *msc = swap->msc; } - DBG(("%s: pipe=%d, tv=%d.%06d msc=%lld\n", __FUNCTION__, pipe, + DBG(("%s: pipe=%d, tv=%d.%06d seq=%d msc=%lld\n", __FUNCTION__, + sna_crtc_pipe(crtc->devPrivate), (int)(*ust / 1000000), (int)(*ust % 1000000), - (long long)*msc)); + vbl.reply.sequence, (long long)*msc)); return Success; } @@ -127,43 +425,106 @@ void sna_present_vblank_handler(struct drm_event_vblank *event) { struct sna_present_event *info = to_present_event(event->user_data); + uint64_t msc; - DBG(("%s: pipe=%d tv=%d.%06d msc=%d, event %lld complete\n", __FUNCTION__, - sna_crtc_to_pipe(info->crtc), - event->tv_sec, event->tv_usec, event->sequence, - (long long)info->event_id)); - present_event_notify(info->event_id, - ust64(event->tv_sec, event->tv_usec), - sna_crtc_record_event(info->crtc, event)); - free(info); + if (!info->queued) { + DBG(("%s: arrived unexpectedly early (not queued)\n", __FUNCTION__)); + assert(!has_vblank(info->crtc)); + return; + } + + if (has_vblank(info->crtc)) { + DBG(("%s: clearing immediate flag\n", __FUNCTION__)); + info->crtc = unmask_crtc(info->crtc); + sna_crtc_clear_vblank(info->crtc); + } + + msc = sna_crtc_record_event(info->crtc, event); + + if (info->sna->mode.shadow_wait) { + DBG(("%s: recursed from TearFree\n", __FUNCTION__)); + if (TimerSet(NULL, 0, 1, sna_fake_vblank_handler, info)) + return; + } + + vblank_complete(info, ust64(event->tv_sec, event->tv_usec), msc); } static int sna_present_queue_vblank(RRCrtcPtr crtc, uint64_t event_id, uint64_t msc) { struct sna *sna = to_sna_from_screen(crtc->pScreen); - struct sna_present_event *event; - union drm_wait_vblank vbl; - - DBG(("%s(pipe=%d, event=%lld, msc=%lld)\n", - __FUNCTION__, pipe_from_crtc(crtc), - (long long)event_id, (long long)msc)); + struct sna_present_event *info, *tmp; + const struct ust_msc *swap; - event = malloc(sizeof(struct sna_present_event)); - if (event == NULL) + if (!sna_crtc_is_on(crtc->devPrivate)) return BadAlloc; - event->event_id = event_id; - event->crtc = crtc->devPrivate; + swap = sna_crtc_last_swap(crtc->devPrivate); + DBG(("%s(pipe=%d, event=%lld, msc=%lld, last swap=%lld)\n", + __FUNCTION__, sna_crtc_pipe(crtc->devPrivate), + (long long)event_id, (long long)msc, (long long)swap->msc)); - VG_CLEAR(vbl); - vbl.request.type = DRM_VBLANK_ABSOLUTE | DRM_VBLANK_EVENT; - vbl.request.sequence = msc; - vbl.request.signal = (uintptr_t)MARK_PRESENT(event); - if (sna_wait_vblank(sna, &vbl, sna_crtc_to_pipe(event->crtc))) { - DBG(("%s: vblank enqueue failed\n", __FUNCTION__)); - free(event); - return BadMatch; + if (warn_unless((int64_t)(msc - swap->msc) >= 0)) { + DBG(("%s: pipe=%d tv=%d.%06d msc=%lld (target=%lld), event=%lld complete\n", __FUNCTION__, + sna_crtc_pipe(crtc->devPrivate), + swap->tv_sec, swap->tv_usec, + (long long)swap->msc, (long long)msc, + (long long)event_id)); + present_event_notify(event_id, swap_ust(swap), swap->msc); + return Success; + } + if (warn_unless(msc - swap->msc < 1ull<<31)) + return BadValue; + + list_for_each_entry(tmp, &sna->present.vblank_queue, link) { + if (tmp->target_msc == msc && + unmask_crtc(tmp->crtc) == crtc->devPrivate) { + uint64_t *events = tmp->event_id; + + if (tmp->n_event_id && + is_power_of_two(tmp->n_event_id)) { + events = malloc(2*sizeof(uint64_t)*tmp->n_event_id); + if (events == NULL) + return BadAlloc; + + memcpy(events, + tmp->event_id, + tmp->n_event_id*sizeof(uint64_t)); + if (tmp->n_event_id != 1) + free(tmp->event_id); + tmp->event_id = events; + } + + DBG(("%s: appending event=%lld to vblank %lld x %d\n", + __FUNCTION__, (long long)event_id, (long long)msc, tmp->n_event_id+1)); + events[tmp->n_event_id++] = event_id; + return Success; + } + if ((int64_t)(tmp->target_msc - msc) > 0) { + DBG(("%s: previous target_msc=%lld invalid for coalescing\n", + __FUNCTION__, (long long)tmp->target_msc)); + break; + } + } + + info = info_alloc(sna); + if (info == NULL) + return BadAlloc; + + info->crtc = crtc->devPrivate; + info->sna = sna; + info->target_msc = msc; + info->event_id = (uint64_t *)(info + 1); + info->event_id[0] = event_id; + info->n_event_id = 1; + list_add_tail(&info->link, &tmp->link); + info->queued = false; + + if (!sna_present_queue(info, swap->msc)) { + list_del(&info->link); + info_free(info); + return BadAlloc; } return Success; @@ -180,14 +541,6 @@ sna_present_abort_vblank(RRCrtcPtr crtc, uint64_t event_id, uint64_t msc) static void sna_present_flush(WindowPtr window) { - PixmapPtr pixmap = get_window_pixmap(window); - struct sna_pixmap *priv; - - DBG(("%s(pixmap=%ld)\n", __FUNCTION__, pixmap->drawable.serialNumber)); - - priv = sna_pixmap_move_to_gpu(pixmap, MOVE_READ | MOVE_ASYNC_HINT | __MOVE_FORCE); - if (priv && priv->gpu_bo) - kgem_scanout_flush(&to_sna_from_pixmap(pixmap)->kgem, priv->gpu_bo); } static bool @@ -201,8 +554,13 @@ check_flip__crtc(struct sna *sna, assert(sna->scrn->vtSema); - if (sna->mode.shadow_active) { - DBG(("%s: shadow buffer active\n", __FUNCTION__)); + if (!sna->mode.front_active) { + DBG(("%s: DPMS off, no flips\n", __FUNCTION__)); + return FALSE; + } + + if (sna->mode.rr_active) { + DBG(("%s: RandR transformation active\n", __FUNCTION__)); return false; } @@ -224,6 +582,11 @@ sna_present_check_flip(RRCrtcPtr crtc, pixmap->drawable.serialNumber, sync_flip)); + if (!sna->scrn->vtSema) { + DBG(("%s: VT switched away, no flips\n", __FUNCTION__)); + return FALSE; + } + if (sna->flags & SNA_NO_FLIP) { DBG(("%s: flips not suported\n", __FUNCTION__)); return FALSE; @@ -231,7 +594,7 @@ sna_present_check_flip(RRCrtcPtr crtc, if (sync_flip) { if ((sna->flags & SNA_HAS_FLIP) == 0) { - DBG(("%s: async flips not suported\n", __FUNCTION__)); + DBG(("%s: sync flips not suported\n", __FUNCTION__)); return FALSE; } } else { @@ -257,24 +620,39 @@ sna_present_check_flip(RRCrtcPtr crtc, return FALSE; } - return TRUE; -} - -static uint64_t gettime_ust64(void) -{ - struct timespec tv; + if (flip->pinned) { + assert(flip->gpu_bo); + if (sna->flags & SNA_LINEAR_FB) { + if (flip->gpu_bo->tiling != I915_TILING_NONE) { + DBG(("%s: pined bo, tilng=%d needs NONE\n", + __FUNCTION__, flip->gpu_bo->tiling)); + return FALSE; + } + } else { + if (!sna->kgem.can_scanout_y && + flip->gpu_bo->tiling == I915_TILING_Y) { + DBG(("%s: pined bo, tilng=%d and can't scanout Y\n", + __FUNCTION__, flip->gpu_bo->tiling)); + return FALSE; + } + } - if (clock_gettime(CLOCK_MONOTONIC, &tv)) - return 0; + if (flip->gpu_bo->pitch & 63) { + DBG(("%s: pined bo, bad pitch=%d\n", + __FUNCTION__, flip->gpu_bo->pitch)); + return FALSE; + } + } - return ust64(tv.tv_sec, tv.tv_nsec / 1000); + return TRUE; } static Bool -page_flip__async(RRCrtcPtr crtc, - uint64_t event_id, - uint64_t target_msc, - struct kgem_bo *bo) +flip__async(struct sna *sna, + RRCrtcPtr crtc, + uint64_t event_id, + uint64_t target_msc, + struct kgem_bo *bo) { DBG(("%s(pipe=%d, event=%lld, handle=%d)\n", __FUNCTION__, @@ -282,17 +660,17 @@ page_flip__async(RRCrtcPtr crtc, (long long)event_id, bo->handle)); - if (!sna_page_flip(to_sna_from_screen(crtc->pScreen), bo, NULL, NULL)) { + if (!sna_page_flip(sna, bo, NULL, NULL)) { DBG(("%s: async pageflip failed\n", __FUNCTION__)); present_info.capabilities &= ~PresentCapabilityAsync; return FALSE; } - DBG(("%s: pipe=%d tv=%d.%06d msc=%d, event %lld complete\n", __FUNCTION__, + DBG(("%s: pipe=%d tv=%ld.%06d msc=%lld (target=%lld), event=%lld complete\n", __FUNCTION__, pipe_from_crtc(crtc), - gettime_ust64() / 1000000, gettime_ust64() % 1000000, - sna_crtc_last_swap(crtc->devPrivate)->msc, - (long long)event_id)); + (long)(gettime_ust64() / 1000000), (int)(gettime_ust64() % 1000000), + crtc ? (long long)sna_crtc_last_swap(crtc->devPrivate)->msc : 0LL, + (long long)target_msc, (long long)event_id)); present_event_notify(event_id, gettime_ust64(), target_msc); return TRUE; } @@ -303,7 +681,12 @@ present_flip_handler(struct drm_event_vblank *event, void *data) struct sna_present_event *info = data; struct ust_msc swap; - DBG(("%s(sequence=%d)\n", __FUNCTION__, event->sequence)); + DBG(("%s(sequence=%d): event=%lld\n", __FUNCTION__, event->sequence, (long long)info->event_id[0])); + assert(info->n_event_id == 1); + if (!info->queued) { + DBG(("%s: arrived unexpectedly early (not queued)\n", __FUNCTION__)); + return; + } if (info->crtc == NULL) { swap.tv_sec = event->tv_sec; @@ -312,22 +695,33 @@ present_flip_handler(struct drm_event_vblank *event, void *data) } else swap = *sna_crtc_last_swap(info->crtc); - DBG(("%s: pipe=%d, tv=%d.%06d msc %lld, event %lld complete\n", __FUNCTION__, - info->crtc ? sna_crtc_to_pipe(info->crtc) : -1, + DBG(("%s: pipe=%d, tv=%d.%06d msc=%lld (target %lld), event=%lld complete%s\n", __FUNCTION__, + info->crtc ? sna_crtc_pipe(info->crtc) : -1, swap.tv_sec, swap.tv_usec, (long long)swap.msc, - (long long)info->event_id)); - present_event_notify(info->event_id, ust64(swap.tv_sec, swap.tv_usec), swap.msc); - free(info); + (long long)info->target_msc, + (long long)info->event_id[0], + info->target_msc && info->target_msc == swap.msc ? "" : ": MISS")); + present_event_notify(info->event_id[0], swap_ust(&swap), swap.msc); + if (info->crtc) + sna_crtc_clear_vblank(info->crtc); + + if (info->sna->present.unflip) { + DBG(("%s: executing queued unflip (event=%lld)\n", __FUNCTION__, (long long)info->sna->present.unflip)); + sna_present_unflip(xf86ScrnToScreen(info->sna->scrn), + info->sna->present.unflip); + info->sna->present.unflip = 0; + } + info_free(info); } static Bool -page_flip(ScreenPtr screen, - RRCrtcPtr crtc, - uint64_t event_id, - struct kgem_bo *bo) +flip(struct sna *sna, + RRCrtcPtr crtc, + uint64_t event_id, + uint64_t target_msc, + struct kgem_bo *bo) { - struct sna *sna = to_sna_from_screen(screen); - struct sna_present_event *event; + struct sna_present_event *info; DBG(("%s(pipe=%d, event=%lld, handle=%d)\n", __FUNCTION__, @@ -335,18 +729,27 @@ page_flip(ScreenPtr screen, (long long)event_id, bo->handle)); - event = malloc(sizeof(struct sna_present_event)); - if (event == NULL) + info = info_alloc(sna); + if (info == NULL) return FALSE; - event->event_id = event_id; - event->crtc = crtc ? crtc->devPrivate : NULL; - if (!sna_page_flip(sna, bo, present_flip_handler, event)) { + info->crtc = crtc ? crtc->devPrivate : NULL; + info->sna = sna; + info->event_id = (uint64_t *)(info + 1); + info->event_id[0] = event_id; + info->n_event_id = 1; + info->target_msc = target_msc; + info->queued = false; + + if (!sna_page_flip(sna, bo, present_flip_handler, info)) { DBG(("%s: pageflip failed\n", __FUNCTION__)); - free(event); + info_free(info); return FALSE; } + info->queued = true; + if (info->crtc) + sna_crtc_set_vblank(info->crtc); return TRUE; } @@ -358,12 +761,48 @@ get_flip_bo(PixmapPtr pixmap) DBG(("%s(pixmap=%ld)\n", __FUNCTION__, pixmap->drawable.serialNumber)); - priv = sna_pixmap_move_to_gpu(pixmap, MOVE_READ | __MOVE_FORCE); + priv = sna_pixmap_move_to_gpu(pixmap, MOVE_READ | __MOVE_SCANOUT | __MOVE_FORCE); if (priv == NULL) { DBG(("%s: cannot force pixmap to the GPU\n", __FUNCTION__)); return NULL; } + if (priv->gpu_bo->scanout) + return priv->gpu_bo; + + if (sna->kgem.has_llc && !wedged(sna) && !priv->pinned) { + struct kgem_bo *bo; + uint32_t tiling; + + tiling = I915_TILING_NONE; + if ((sna->flags & SNA_LINEAR_FB) == 0) + tiling = I915_TILING_X; + + bo = kgem_create_2d(&sna->kgem, + pixmap->drawable.width, + pixmap->drawable.height, + pixmap->drawable.bitsPerPixel, + tiling, CREATE_SCANOUT | CREATE_CACHED); + if (bo) { + BoxRec box; + + box.x1 = box.y1 = 0; + box.x2 = pixmap->drawable.width; + box.y2 = pixmap->drawable.height; + + if (sna->render.copy_boxes(sna, GXcopy, + &pixmap->drawable, priv->gpu_bo, 0, 0, + &pixmap->drawable, bo, 0, 0, + &box, 1, 0)) { + sna_pixmap_unmap(pixmap, priv); + kgem_bo_destroy(&sna->kgem, priv->gpu_bo); + + priv->gpu_bo = bo; + } else + kgem_bo_destroy(&sna->kgem, bo); + } + } + if (sna->flags & SNA_LINEAR_FB && priv->gpu_bo->tiling && !sna_pixmap_change_tiling(pixmap, I915_TILING_NONE)) { @@ -372,12 +811,17 @@ get_flip_bo(PixmapPtr pixmap) } if (priv->gpu_bo->tiling == I915_TILING_Y && + !sna->kgem.can_scanout_y && !sna_pixmap_change_tiling(pixmap, I915_TILING_X)) { DBG(("%s: invalid Y-tiling, cannot convert\n", __FUNCTION__)); return NULL; } - priv->pinned |= PIN_SCANOUT; + if (priv->gpu_bo->pitch & 63) { + DBG(("%s: invalid pitch, no conversion\n", __FUNCTION__)); + return NULL; + } + return priv->gpu_bo; } @@ -388,6 +832,7 @@ sna_present_flip(RRCrtcPtr crtc, PixmapPtr pixmap, Bool sync_flip) { + struct sna *sna = to_sna_from_pixmap(pixmap); struct kgem_bo *bo; DBG(("%s(pipe=%d, event=%lld, msc=%lld, pixmap=%ld, sync?=%d)\n", @@ -397,11 +842,32 @@ sna_present_flip(RRCrtcPtr crtc, (long long)target_msc, pixmap->drawable.serialNumber, sync_flip)); - if (!check_flip__crtc(to_sna_from_pixmap(pixmap), crtc)) { + if (!check_flip__crtc(sna, crtc)) { DBG(("%s: flip invalid for CRTC\n", __FUNCTION__)); return FALSE; } + assert(sna->present.unflip == 0); + + if (sna->flags & SNA_TEAR_FREE) { + DBG(("%s: disabling TearFree (was %s) in favour of Present flips\n", + __FUNCTION__, sna->mode.shadow_enabled ? "enabled" : "disabled")); + sna->mode.shadow_enabled = false; + } + assert(!sna->mode.shadow_enabled); + + if (sna->mode.flip_active) { + struct pollfd pfd; + + DBG(("%s: flips still pending, stalling\n", __FUNCTION__)); + pfd.fd = sna->kgem.fd; + pfd.events = POLLIN; + while (poll(&pfd, 1, 0) == 1) + sna_mode_wakeup(sna); + if (sna->mode.flip_active) + return FALSE; + } + bo = get_flip_bo(pixmap); if (bo == NULL) { DBG(("%s: flip invalid bo\n", __FUNCTION__)); @@ -409,9 +875,9 @@ sna_present_flip(RRCrtcPtr crtc, } if (sync_flip) - return page_flip(crtc->pScreen, crtc, event_id, bo); + return flip(sna, crtc, event_id, target_msc, bo); else - return page_flip__async(crtc, event_id, target_msc, bo); + return flip__async(sna, crtc, event_id, target_msc, bo); } static void @@ -421,29 +887,70 @@ sna_present_unflip(ScreenPtr screen, uint64_t event_id) struct kgem_bo *bo; DBG(("%s(event=%lld)\n", __FUNCTION__, (long long)event_id)); - if (sna->mode.front_active == 0 || sna->mode.shadow_active) { + if (sna->mode.front_active == 0 || sna->mode.rr_active) { const struct ust_msc *swap; DBG(("%s: no CRTC active, perform no-op flip\n", __FUNCTION__)); notify: - swap = sna_crtc_last_swap(sna_mode_first_crtc(sna)); - DBG(("%s: pipe=%d, tv=%d.%06d msc %lld, event %lld complete\n", __FUNCTION__, + swap = sna_crtc_last_swap(sna_primary_crtc(sna)); + DBG(("%s: pipe=%d, tv=%d.%06d msc=%lld, event=%lld complete\n", __FUNCTION__, -1, swap->tv_sec, swap->tv_usec, (long long)swap->msc, (long long)event_id)); - present_event_notify(event_id, - ust64(swap->tv_sec, swap->tv_usec), - swap->msc); + present_event_notify(event_id, swap_ust(swap), swap->msc); + return; + } + + assert(!sna->mode.shadow_enabled); + if (sna->mode.flip_active) { + DBG(("%s: %d outstanding flips, queueing unflip\n", __FUNCTION__, sna->mode.flip_active)); + assert(sna->present.unflip == 0); + sna->present.unflip = event_id; return; } + if (sna->flags & SNA_TEAR_FREE) { + DBG(("%s: %s TearFree after Present flips\n", + __FUNCTION__, sna->mode.shadow_damage != NULL ? "enabling" : "disabling")); + sna->mode.shadow_enabled = sna->mode.shadow_damage != NULL; + } + bo = get_flip_bo(screen->GetScreenPixmap(screen)); - if (bo == NULL || !page_flip(screen, NULL, event_id, bo)) { + if (bo == NULL) { +reset_mode: DBG(("%s: failed, trying to restore original mode\n", __FUNCTION__)); xf86SetDesiredModes(sna->scrn); goto notify; } + + /* Are we unflipping after a failure that left our ScreenP in place? */ + if (!sna_needs_page_flip(sna, bo)) + goto notify; + + assert(sna_pixmap(screen->GetScreenPixmap(screen))->pinned & PIN_SCANOUT); + + if (sna->flags & SNA_HAS_ASYNC_FLIP) { + DBG(("%s: trying async flip restore\n", __FUNCTION__)); + if (flip__async(sna, NULL, event_id, 0, bo)) + return; + } + + if (!flip(sna, NULL, event_id, 0, bo)) + goto reset_mode; +} + +void sna_present_cancel_flip(struct sna *sna) +{ + if (sna->present.unflip) { + const struct ust_msc *swap; + + swap = sna_crtc_last_swap(sna_primary_crtc(sna)); + present_event_notify(sna->present.unflip, + swap_ust(swap), swap->msc); + + sna->present.unflip = 0; + } } static present_screen_info_rec present_info = { @@ -463,10 +970,13 @@ static present_screen_info_rec present_info = { bool sna_present_open(struct sna *sna, ScreenPtr screen) { + DBG(("%s(num_crtc=%d)\n", __FUNCTION__, sna->mode.num_real_crtc)); + if (sna->mode.num_real_crtc == 0) return false; sna_present_update(sna); + list_init(&sna->present.vblank_queue); return present_screen_init(screen, &present_info); } diff --git a/src/sna/sna_render.c b/src/sna/sna_render.c index 3fbb9ecb..3e935d57 100644 --- a/src/sna/sna_render.c +++ b/src/sna/sna_render.c @@ -54,7 +54,7 @@ sna_format_for_depth(int depth) { switch (depth) { case 1: return PICT_a1; - case 4: return PICT_a4; + case 4: return PICT_x4a4; case 8: return PICT_a8; case 15: return PICT_x1r5g5b5; case 16: return PICT_r5g6b5; @@ -272,18 +272,6 @@ no_render_context_switch(struct kgem *kgem, } static void -no_render_retire(struct kgem *kgem) -{ - (void)kgem; -} - -static void -no_render_expire(struct kgem *kgem) -{ - (void)kgem; -} - -static void no_render_fini(struct sna *sna) { (void)sna; @@ -316,8 +304,6 @@ const char *no_render_init(struct sna *sna) render->fini = no_render_fini; sna->kgem.context_switch = no_render_context_switch; - sna->kgem.retire = no_render_retire; - sna->kgem.expire = no_render_expire; if (sna->kgem.has_blt) sna->kgem.ring = KGEM_BLT; @@ -407,10 +393,7 @@ use_cpu_bo(struct sna *sna, PixmapPtr pixmap, const BoxRec *box, bool blt) } } - if (priv->shm) { - assert(!priv->flush); - sna_add_flush_pixmap(sna, priv, priv->cpu_bo); - } + add_shm_flush(sna, priv); DBG(("%s for box=(%d, %d), (%d, %d)\n", __FUNCTION__, box->x1, box->y1, box->x2, box->y2)); @@ -567,6 +550,7 @@ static struct kgem_bo *upload(struct sna *sna, assert(priv->gpu_damage == NULL); assert(priv->gpu_bo == NULL); assert(bo->proxy != NULL); + sna_damage_all(&priv->cpu_damage, pixmap); kgem_proxy_bo_attach(bo, &priv->gpu_bo); } } @@ -627,10 +611,7 @@ sna_render_pixmap_bo(struct sna *sna, !priv->cpu_bo->snoop && priv->cpu_bo->pitch < 4096) { DBG(("%s: CPU all damaged\n", __FUNCTION__)); channel->bo = priv->cpu_bo; - if (priv->shm) { - assert(!priv->flush); - sna_add_flush_pixmap(sna, priv, priv->cpu_bo); - } + add_shm_flush(sna, priv); goto done; } } @@ -1275,6 +1256,7 @@ sna_render_picture_extract(struct sna *sna, assert(priv->gpu_damage == NULL); assert(priv->gpu_bo == NULL); assert(bo->proxy != NULL); + sna_damage_all(&priv->cpu_damage, pixmap); kgem_proxy_bo_attach(bo, &priv->gpu_bo); } } @@ -1338,6 +1320,8 @@ sna_render_picture_convolve(struct sna *sna, */ DBG(("%s: origin=(%d,%d) kernel=%dx%d, size=%dx%d\n", __FUNCTION__, x_off, y_off, cw, ch, w, h)); + if (cw*ch > 32) /* too much loss of precision from quantization! */ + return -1; assert(picture->pDrawable); assert(picture->filter == PictFilterConvolution); @@ -1388,9 +1372,9 @@ sna_render_picture_convolve(struct sna *sna, alpha = CreateSolidPicture(0, &color, &error); if (alpha) { sna_composite(PictOpAdd, picture, alpha, tmp, - x, y, + x-(x_off+i), y-(y_off+j), + 0, 0, 0, 0, - x_off+i, y_off+j, w, h); FreePicture(alpha, 0); } @@ -2183,11 +2167,11 @@ copy_overlap(struct sna *sna, uint8_t alu, ret = (sna->render.copy_boxes(sna, GXcopy, draw, bo, src_dx, src_dy, &tmp->drawable, tmp_bo, -extents->x1, -extents->y1, - box, n , 0) && + box, n, 0) && sna->render.copy_boxes(sna, alu, &tmp->drawable, tmp_bo, -extents->x1, -extents->y1, draw, bo, dst_dx, dst_dy, - box, n , 0)); + box, n, 0)); screen->DestroyPixmap(tmp); return ret; @@ -2308,16 +2292,22 @@ static bool can_copy_cpu(struct sna *sna, struct kgem_bo *src, struct kgem_bo *dst) { - if (src->tiling != dst->tiling) - return false; + DBG(("%s: tiling=%d:%d, pitch=%d:%d, can_map=%d:%d[%d]\n", + __FUNCTION__, + src->tiling, dst->tiling, + src->pitch, dst->pitch, + kgem_bo_can_map__cpu(&sna->kgem, src, false), + kgem_bo_can_map__cpu(&sna->kgem, dst, true), + sna->kgem.has_wc_mmap)); - if (src->pitch != dst->pitch) + if (src->tiling != dst->tiling) return false; if (!kgem_bo_can_map__cpu(&sna->kgem, src, false)) return false; - if (!kgem_bo_can_map__cpu(&sna->kgem, dst, true)) + if (!kgem_bo_can_map__cpu(&sna->kgem, dst, true) && + !sna->kgem.has_wc_mmap) return false; DBG(("%s -- yes, src handle=%d, dst handle=%d\n", __FUNCTION__, src->handle, dst->handle)); @@ -2330,31 +2320,62 @@ memcpy_copy_boxes(struct sna *sna, uint8_t op, const DrawableRec *dst_draw, struct kgem_bo *dst_bo, int16_t dx, int16_t dy, const BoxRec *box, int n, unsigned flags) { + memcpy_box_func detile = NULL; void *dst, *src; - bool clipped; if (op != GXcopy) return false; - clipped = (n > 1 || - box->x1 + dx > 0 || - box->y1 + dy > 0 || - box->x2 + dx < dst_draw->width || - box->y2 + dy < dst_draw->height); + if (src_draw->depth != dst_draw->depth) + return false; dst = src = NULL; - if (!clipped && can_copy_cpu(sna, src_bo, dst_bo)) { - dst = kgem_bo_map__cpu(&sna->kgem, dst_bo); + if (can_copy_cpu(sna, src_bo, dst_bo)) { + if (src_bo->pitch != dst_bo->pitch || + dx != sx || dy != sy || n > 1 || + box->x1 + dx > 0 || + box->y1 + dy > 0 || + box->x2 + dx < dst_draw->width || + box->y2 + dy < dst_draw->height) { + if (dx != sx) /* not implemented in memcpy yet */ + goto use_gtt; + + switch (dst_bo->tiling) { + default: + case I915_TILING_Y: + goto use_gtt; + + case I915_TILING_X: + detile = sna->kgem.memcpy_between_tiled_x; + if (detile == NULL) + goto use_gtt; + break; + + case I915_TILING_NONE: + break; + } + } + + if (kgem_bo_can_map__cpu(&sna->kgem, dst_bo, true)) + dst = kgem_bo_map__cpu(&sna->kgem, dst_bo); + else + dst = kgem_bo_map__wc(&sna->kgem, dst_bo); src = kgem_bo_map__cpu(&sna->kgem, src_bo); } if (dst == NULL || src == NULL) { +use_gtt: dst = kgem_bo_map__gtt(&sna->kgem, dst_bo); src = kgem_bo_map__gtt(&sna->kgem, src_bo); if (dst == NULL || src == NULL) return false; + + detile = NULL; } else { - kgem_bo_sync__cpu_full(&sna->kgem, dst_bo, true); + if (dst == dst_bo->map__wc) + kgem_bo_sync__gtt(&sna->kgem, dst_bo); + else + kgem_bo_sync__cpu_full(&sna->kgem, dst_bo, true); kgem_bo_sync__cpu_full(&sna->kgem, src_bo, false); } @@ -2362,7 +2383,16 @@ memcpy_copy_boxes(struct sna *sna, uint8_t op, __FUNCTION__, sx, sy, dx, dy, n)); if (sigtrap_get() == 0) { - do { + if (detile) { + do { + detile(src, dst, dst_draw->bitsPerPixel, + src_bo->pitch, dst_bo->pitch, + box->x1 + sx, box->y1 + sy, + box->x1 + dx, box->y1 + dy, + box->x2 - box->x1, box->y2 - box->y1); + box++; + } while (--n); + } else do { memcpy_blt(src, dst, dst_draw->bitsPerPixel, src_bo->pitch, dst_bo->pitch, box->x1 + sx, box->y1 + sy, @@ -2380,4 +2410,5 @@ void sna_render_mark_wedged(struct sna *sna) { sna->render.copy_boxes = memcpy_copy_boxes; + sna->render.prefer_gpu = 0; } diff --git a/src/sna/sna_render.h b/src/sna/sna_render.h index 6e1fa480..4ba345a7 100644 --- a/src/sna/sna_render.h +++ b/src/sna/sna_render.h @@ -148,6 +148,10 @@ struct sna_composite_op { struct { uint32_t flags; } gen8; + + struct { + uint32_t flags; + } gen9; } u; void *priv; @@ -238,8 +242,9 @@ struct sna_render { int16_t w, int16_t h, unsigned flags, struct sna_composite_op *tmp); -#define COMPOSITE_PARTIAL 0x1 -#define COMPOSITE_FALLBACK 0x80000000 +#define COMPOSITE_PARTIAL 0x1 +#define COMPOSITE_UPLOAD 0x40000000 +#define COMPOSITE_FALLBACK 0x80000000 bool (*check_composite_spans)(struct sna *sna, uint8_t op, PicturePtr dst, PicturePtr src, @@ -286,6 +291,8 @@ struct sna_render { #define COPY_LAST 0x1 #define COPY_SYNC 0x2 #define COPY_NO_OVERLAP 0x4 +#define COPY_SMALL 0x8 +#define COPY_DRI 0x10 bool (*copy)(struct sna *sna, uint8_t alu, PixmapPtr src, struct kgem_bo *src_bo, @@ -481,6 +488,7 @@ enum { GEN7_WM_KERNEL_VIDEO_PLANAR, GEN7_WM_KERNEL_VIDEO_PACKED, + GEN7_WM_KERNEL_VIDEO_RGB, GEN7_WM_KERNEL_COUNT }; @@ -533,12 +541,13 @@ enum { GEN8_WM_KERNEL_VIDEO_PLANAR, GEN8_WM_KERNEL_VIDEO_PACKED, + GEN8_WM_KERNEL_VIDEO_RGB, GEN8_WM_KERNEL_COUNT }; struct gen8_render_state { unsigned gt; - + const struct gt_info *info; struct kgem_bo *general_bo; uint32_t vs_state; @@ -565,6 +574,58 @@ struct gen8_render_state { bool emit_flush; }; +enum { + GEN9_WM_KERNEL_NOMASK = 0, + GEN9_WM_KERNEL_NOMASK_P, + + GEN9_WM_KERNEL_MASK, + GEN9_WM_KERNEL_MASK_P, + + GEN9_WM_KERNEL_MASKCA, + GEN9_WM_KERNEL_MASKCA_P, + + GEN9_WM_KERNEL_MASKSA, + GEN9_WM_KERNEL_MASKSA_P, + + GEN9_WM_KERNEL_OPACITY, + GEN9_WM_KERNEL_OPACITY_P, + + GEN9_WM_KERNEL_VIDEO_PLANAR, + GEN9_WM_KERNEL_VIDEO_PACKED, + GEN9_WM_KERNEL_VIDEO_RGB, + GEN9_WM_KERNEL_COUNT +}; + +struct gen9_render_state { + unsigned gt; + const struct gt_info *info; + struct kgem_bo *general_bo; + + uint32_t vs_state; + uint32_t sf_state; + uint32_t sf_mask_state; + uint32_t wm_state; + uint32_t wm_kernel[GEN9_WM_KERNEL_COUNT][3]; + + uint32_t cc_blend; + + uint32_t drawrect_offset; + uint32_t drawrect_limit; + uint32_t blend; + uint32_t samplers; + uint32_t kernel; + + uint16_t num_sf_outputs; + uint16_t ve_id; + uint16_t last_primitive; + int16_t floats_per_vertex; + uint16_t surface_table; + + bool needs_invariant; + bool emit_flush; + bool ve_dirty; +}; + struct sna_static_stream { uint32_t size, used; uint8_t *data; @@ -620,6 +681,7 @@ const char *gen5_render_init(struct sna *sna, const char *backend); const char *gen6_render_init(struct sna *sna, const char *backend); const char *gen7_render_init(struct sna *sna, const char *backend); const char *gen8_render_init(struct sna *sna, const char *backend); +const char *gen9_render_init(struct sna *sna, const char *backend); void sna_render_mark_wedged(struct sna *sna); diff --git a/src/sna/sna_render_inline.h b/src/sna/sna_render_inline.h index 10fbbfe2..e162e37f 100644 --- a/src/sna/sna_render_inline.h +++ b/src/sna/sna_render_inline.h @@ -304,6 +304,12 @@ color_convert(uint32_t pixel, return pixel; } +inline static uint32_t +solid_color(uint32_t format, uint32_t pixel) +{ + return color_convert(pixel, format, PICT_a8r8g8b8); +} + inline static bool dst_use_gpu(PixmapPtr pixmap) { struct sna_pixmap *priv = sna_pixmap(pixmap); diff --git a/src/sna/sna_tiling.c b/src/sna/sna_tiling.c index 308efc0a..8e2627f7 100644 --- a/src/sna/sna_tiling.c +++ b/src/sna/sna_tiling.c @@ -369,8 +369,7 @@ sna_tiling_composite_spans_boxes(struct sna *sna, const BoxRec *box, int nbox, float opacity) { while (nbox--) - sna_tiling_composite_spans_box(sna, op->base.priv, box++, opacity); - (void)sna; + sna_tiling_composite_spans_box(sna, op, box++, opacity); } fastcall static void @@ -581,6 +580,7 @@ sna_tiling_composite_spans(uint32_t op, tile->rects = tile->rects_embedded; tile->rect_count = 0; tile->rect_size = ARRAY_SIZE(tile->rects_embedded); + COMPILE_TIME_ASSERT(sizeof(tile->rects_embedded[0]) >= sizeof(struct sna_tile_span)); tmp->box = sna_tiling_composite_spans_box; tmp->boxes = sna_tiling_composite_spans_boxes; diff --git a/src/sna/sna_trapezoids_boxes.c b/src/sna/sna_trapezoids_boxes.c index 9900e3f0..bbf83759 100644 --- a/src/sna/sna_trapezoids_boxes.c +++ b/src/sna/sna_trapezoids_boxes.c @@ -198,7 +198,7 @@ composite_aligned_boxes(struct sna *sna, if (op == PictOpClear && sna->clear) src = sna->clear; - DBG(("%s: clipped extents (%d, %d), (%d, %d); now offset by (%d, %d), orgin (%d, %d)\n", + DBG(("%s: clipped extents (%d, %d), (%d, %d); now offset by (%d, %d), origin (%d, %d)\n", __FUNCTION__, clip.extents.x1, clip.extents.y1, clip.extents.x2, clip.extents.y2, @@ -592,6 +592,8 @@ lerp32_opacity(PixmapPtr scratch, uint32_t *ptr; int stride, i; + sigtrap_assert_active(); + ptr = (uint32_t*)((uint8_t *)scratch->devPrivate.ptr + scratch->devKind * y); ptr += x; stride = scratch->devKind / 4; diff --git a/src/sna/sna_trapezoids_imprecise.c b/src/sna/sna_trapezoids_imprecise.c index 37def2f9..8bc7c8a8 100644 --- a/src/sna/sna_trapezoids_imprecise.c +++ b/src/sna/sna_trapezoids_imprecise.c @@ -962,6 +962,16 @@ tor_add_trapezoid(struct tor *tor, const xTrapezoid *t, int dx, int dy) { + if (!xTrapezoidValid(t)) { + __DBG(("%s: skipping invalid trapezoid: top=%d, bottom=%d, left=(%d, %d), (%d, %d), right=(%d, %d), (%d, %d)\n", + __FUNCTION__, + t->top, t->bottom, + t->left.p1.x, t->left.p1.y, + t->left.p2.x, t->left.p2.y, + t->right.p1.x, t->right.p1.y, + t->right.p2.x, t->right.p2.y)); + return; + } polygon_add_edge(tor->polygon, t, &t->left, 1, dx, dy); polygon_add_edge(tor->polygon, t, &t->right, -1, dx, dy); } @@ -1687,31 +1697,27 @@ struct span_thread { #define SPAN_THREAD_MAX_BOXES (8192/sizeof(struct sna_opacity_box)) struct span_thread_boxes { const struct sna_composite_spans_op *op; + const BoxRec *clip_start, *clip_end; int num_boxes; struct sna_opacity_box boxes[SPAN_THREAD_MAX_BOXES]; }; -static void span_thread_add_boxes(struct sna *sna, void *data, - const BoxRec *box, int count, float alpha) +static void span_thread_add_box(struct sna *sna, void *data, + const BoxRec *box, float alpha) { struct span_thread_boxes *b = data; - __DBG(("%s: adding %d boxes with alpha=%f\n", - __FUNCTION__, count, alpha)); + __DBG(("%s: adding box with alpha=%f\n", __FUNCTION__, alpha)); - assert(count > 0 && count <= SPAN_THREAD_MAX_BOXES); - if (unlikely(b->num_boxes + count > SPAN_THREAD_MAX_BOXES)) { - DBG(("%s: flushing %d boxes, adding %d\n", __FUNCTION__, b->num_boxes, count)); - assert(b->num_boxes <= SPAN_THREAD_MAX_BOXES); + if (unlikely(b->num_boxes == SPAN_THREAD_MAX_BOXES)) { + DBG(("%s: flushing %d boxes\n", __FUNCTION__, b->num_boxes)); b->op->thread_boxes(sna, b->op, b->boxes, b->num_boxes); b->num_boxes = 0; } - do { - b->boxes[b->num_boxes].box = *box++; - b->boxes[b->num_boxes].alpha = alpha; - b->num_boxes++; - } while (--count); + b->boxes[b->num_boxes].box = *box++; + b->boxes[b->num_boxes].alpha = alpha; + b->num_boxes++; assert(b->num_boxes <= SPAN_THREAD_MAX_BOXES); } @@ -1722,8 +1728,22 @@ span_thread_box(struct sna *sna, const BoxRec *box, int coverage) { + struct span_thread_boxes *b = (struct span_thread_boxes *)op; + __DBG(("%s: %d -> %d @ %d\n", __FUNCTION__, box->x1, box->x2, coverage)); - span_thread_add_boxes(sna, op, box, 1, AREA_TO_ALPHA(coverage)); + if (b->num_boxes) { + struct sna_opacity_box *bb = &b->boxes[b->num_boxes-1]; + if (bb->box.x1 == box->x1 && + bb->box.x2 == box->x2 && + bb->box.y2 == box->y1 && + bb->alpha == AREA_TO_ALPHA(coverage)) { + bb->box.y2 = box->y2; + __DBG(("%s: contracted double row: %d -> %d\n", __func__, bb->box.y1, bb->box.y2)); + return; + } + } + + span_thread_add_box(sna, op, box, AREA_TO_ALPHA(coverage)); } static void @@ -1733,20 +1753,28 @@ span_thread_clipped_box(struct sna *sna, const BoxRec *box, int coverage) { - pixman_region16_t region; + struct span_thread_boxes *b = (struct span_thread_boxes *)op; + const BoxRec *c; __DBG(("%s: %d -> %d @ %f\n", __FUNCTION__, box->x1, box->x2, AREA_TO_ALPHA(coverage))); - pixman_region_init_rects(®ion, box, 1); - RegionIntersect(®ion, ®ion, clip); - if (region_num_rects(®ion)) { - span_thread_add_boxes(sna, op, - region_rects(®ion), - region_num_rects(®ion), - AREA_TO_ALPHA(coverage)); + b->clip_start = + find_clip_box_for_y(b->clip_start, b->clip_end, box->y1); + + c = b->clip_start; + while (c != b->clip_end) { + BoxRec clipped; + + if (box->y2 <= c->y1) + break; + + clipped = *box; + if (!box_intersect(&clipped, c++)) + continue; + + span_thread_add_box(sna, op, &clipped, AREA_TO_ALPHA(coverage)); } - pixman_region_fini(®ion); } static span_func_t @@ -1777,6 +1805,16 @@ thread_choose_span(struct sna_composite_spans_op *tmp, return span; } +inline static void +span_thread_boxes_init(struct span_thread_boxes *boxes, + const struct sna_composite_spans_op *op, + const RegionRec *clip) +{ + boxes->op = op; + region_get_boxes(clip, &boxes->clip_start, &boxes->clip_end); + boxes->num_boxes = 0; +} + static void span_thread(void *arg) { @@ -1789,8 +1827,7 @@ span_thread(void *arg) if (!tor_init(&tor, &thread->extents, 2*thread->ntrap)) return; - boxes.op = thread->op; - boxes.num_boxes = 0; + span_thread_boxes_init(&boxes, thread->op, thread->clip); y1 = thread->extents.y1 - thread->draw_y; y2 = thread->extents.y2 - thread->draw_y; @@ -2190,6 +2227,52 @@ static void _tor_blt_src(struct inplace *in, const BoxRec *box, uint8_t v) } while (--h); } +struct clipped_span { + span_func_t span; + const BoxRec *clip_start, *clip_end; +}; + +static void +tor_blt_clipped(struct sna *sna, + struct sna_composite_spans_op *op, + pixman_region16_t *clip, + const BoxRec *box, + int coverage) +{ + struct clipped_span *cs = (struct clipped_span *)clip; + const BoxRec *c; + + cs->clip_start = + find_clip_box_for_y(cs->clip_start, cs->clip_end, box->y1); + + c = cs->clip_start; + while (c != cs->clip_end) { + BoxRec clipped; + + if (box->y2 <= c->y1) + break; + + clipped = *box; + if (!box_intersect(&clipped, c++)) + continue; + + cs->span(sna, op, NULL, &clipped, coverage); + } +} + +inline static span_func_t +clipped_span(struct clipped_span *cs, + span_func_t span, + const RegionRec *clip) +{ + if (clip->data) { + cs->span = span; + region_get_boxes(clip, &cs->clip_start, &cs->clip_end); + span = tor_blt_clipped; + } + return span; +} + static void tor_blt_src(struct sna *sna, struct sna_composite_spans_op *op, @@ -2203,25 +2286,6 @@ tor_blt_src(struct sna *sna, } static void -tor_blt_src_clipped(struct sna *sna, - struct sna_composite_spans_op *op, - pixman_region16_t *clip, - const BoxRec *box, - int coverage) -{ - pixman_region16_t region; - int n; - - pixman_region_init_rects(®ion, box, 1); - RegionIntersect(®ion, ®ion, clip); - n = region_num_rects(®ion); - box = region_rects(®ion); - while (n--) - tor_blt_src(sna, op, NULL, box++, coverage); - pixman_region_fini(®ion); -} - -static void tor_blt_in(struct sna *sna, struct sna_composite_spans_op *op, pixman_region16_t *clip, @@ -2253,25 +2317,6 @@ tor_blt_in(struct sna *sna, } static void -tor_blt_in_clipped(struct sna *sna, - struct sna_composite_spans_op *op, - pixman_region16_t *clip, - const BoxRec *box, - int coverage) -{ - pixman_region16_t region; - int n; - - pixman_region_init_rects(®ion, box, 1); - RegionIntersect(®ion, ®ion, clip); - n = region_num_rects(®ion); - box = region_rects(®ion); - while (n--) - tor_blt_in(sna, op, NULL, box++, coverage); - pixman_region_fini(®ion); -} - -static void tor_blt_add(struct sna *sna, struct sna_composite_spans_op *op, pixman_region16_t *clip, @@ -2310,25 +2355,6 @@ tor_blt_add(struct sna *sna, } static void -tor_blt_add_clipped(struct sna *sna, - struct sna_composite_spans_op *op, - pixman_region16_t *clip, - const BoxRec *box, - int coverage) -{ - pixman_region16_t region; - int n; - - pixman_region_init_rects(®ion, box, 1); - RegionIntersect(®ion, ®ion, clip); - n = region_num_rects(®ion); - box = region_rects(®ion); - while (n--) - tor_blt_add(sna, op, NULL, box++, coverage); - pixman_region_fini(®ion); -} - -static void tor_blt_lerp32(struct sna *sna, struct sna_composite_spans_op *op, pixman_region16_t *clip, @@ -2343,6 +2369,7 @@ tor_blt_lerp32(struct sna *sna, if (coverage == 0) return; + sigtrap_assert_active(); ptr += box->y1 * stride + box->x1; h = box->y2 - box->y1; @@ -2383,25 +2410,6 @@ tor_blt_lerp32(struct sna *sna, } } -static void -tor_blt_lerp32_clipped(struct sna *sna, - struct sna_composite_spans_op *op, - pixman_region16_t *clip, - const BoxRec *box, - int coverage) -{ - pixman_region16_t region; - int n; - - pixman_region_init_rects(®ion, box, 1); - RegionIntersect(®ion, ®ion, clip); - n = region_num_rects(®ion); - box = region_rects(®ion); - while (n--) - tor_blt_lerp32(sna, op, NULL, box++, coverage); - pixman_region_fini(®ion); -} - struct pixman_inplace { pixman_image_t *image, *source, *mask; uint32_t color; @@ -2431,24 +2439,6 @@ pixmask_span_solid(struct sna *sna, pi->dx + box->x1, pi->dy + box->y1, box->x2 - box->x1, box->y2 - box->y1); } -static void -pixmask_span_solid__clipped(struct sna *sna, - struct sna_composite_spans_op *op, - pixman_region16_t *clip, - const BoxRec *box, - int coverage) -{ - pixman_region16_t region; - int n; - - pixman_region_init_rects(®ion, box, 1); - RegionIntersect(®ion, ®ion, clip); - n = region_num_rects(®ion); - box = region_rects(®ion); - while (n--) - pixmask_span_solid(sna, op, NULL, box++, coverage); - pixman_region_fini(®ion); -} static void pixmask_span(struct sna *sna, @@ -2471,24 +2461,6 @@ pixmask_span(struct sna *sna, pi->dx + box->x1, pi->dy + box->y1, box->x2 - box->x1, box->y2 - box->y1); } -static void -pixmask_span__clipped(struct sna *sna, - struct sna_composite_spans_op *op, - pixman_region16_t *clip, - const BoxRec *box, - int coverage) -{ - pixman_region16_t region; - int n; - - pixman_region_init_rects(®ion, box, 1); - RegionIntersect(®ion, ®ion, clip); - n = region_num_rects(®ion); - box = region_rects(®ion); - while (n--) - pixmask_span(sna, op, NULL, box++, coverage); - pixman_region_fini(®ion); -} struct inplace_x8r8g8b8_thread { xTrapezoid *traps; @@ -2507,6 +2479,7 @@ static void inplace_x8r8g8b8_thread(void *arg) struct inplace_x8r8g8b8_thread *thread = arg; struct tor tor; span_func_t span; + struct clipped_span clipped; RegionPtr clip; int y1, y2, n; @@ -2537,12 +2510,11 @@ static void inplace_x8r8g8b8_thread(void *arg) inplace.stride = pixmap->devKind; inplace.color = thread->color; - if (clip->data) - span = tor_blt_lerp32_clipped; - else - span = tor_blt_lerp32; + span = clipped_span(&clipped, tor_blt_lerp32, clip); - tor_render(NULL, &tor, (void*)&inplace, clip, span, false); + tor_render(NULL, &tor, + (void*)&inplace, (void*)&clipped, + span, false); } else if (thread->is_solid) { struct pixman_inplace pi; @@ -2555,12 +2527,11 @@ static void inplace_x8r8g8b8_thread(void *arg) 1, 1, pi.bits, 0); pixman_image_set_repeat(pi.source, PIXMAN_REPEAT_NORMAL); - if (clip->data) - span = pixmask_span_solid__clipped; - else - span = pixmask_span_solid; + span = clipped_span(&clipped, pixmask_span_solid, clip); - tor_render(NULL, &tor, (void*)&pi, clip, span, false); + tor_render(NULL, &tor, + (void*)&pi, (void *)&clipped, + span, false); pixman_image_unref(pi.source); pixman_image_unref(pi.image); @@ -2579,12 +2550,11 @@ static void inplace_x8r8g8b8_thread(void *arg) pi.bits = pixman_image_get_data(pi.mask); pi.op = thread->op; - if (clip->data) - span = pixmask_span__clipped; - else - span = pixmask_span; + span = clipped_span(&clipped, pixmask_span, clip); - tor_render(NULL, &tor, (void*)&pi, clip, span, false); + tor_render(NULL, &tor, + (void*)&pi, (void *)&clipped, + span, false); pixman_image_unref(pi.mask); pixman_image_unref(pi.source); @@ -2698,6 +2668,7 @@ trapezoid_span_inplace__x8r8g8b8(CARD8 op, if (num_threads == 1) { struct tor tor; span_func_t span; + struct clipped_span clipped; if (!tor_init(&tor, ®ion.extents, 2*ntrap)) return true; @@ -2723,17 +2694,15 @@ trapezoid_span_inplace__x8r8g8b8(CARD8 op, inplace.stride = pixmap->devKind; inplace.color = color; - if (dst->pCompositeClip->data) - span = tor_blt_lerp32_clipped; - else - span = tor_blt_lerp32; + span = clipped_span(&clipped, tor_blt_lerp32, dst->pCompositeClip); DBG(("%s: render inplace op=%d, color=%08x\n", __FUNCTION__, op, color)); if (sigtrap_get() == 0) { - tor_render(NULL, &tor, (void*)&inplace, - dst->pCompositeClip, span, false); + tor_render(NULL, &tor, + (void*)&inplace, (void*)&clipped, + span, false); sigtrap_put(); } } else if (is_solid) { @@ -2748,15 +2717,12 @@ trapezoid_span_inplace__x8r8g8b8(CARD8 op, 1, 1, pi.bits, 0); pixman_image_set_repeat(pi.source, PIXMAN_REPEAT_NORMAL); - if (dst->pCompositeClip->data) - span = pixmask_span_solid__clipped; - else - span = pixmask_span_solid; + span = clipped_span(&clipped, pixmask_span_solid, dst->pCompositeClip); if (sigtrap_get() == 0) { - tor_render(NULL, &tor, (void*)&pi, - dst->pCompositeClip, span, - false); + tor_render(NULL, &tor, + (void*)&pi, (void*)&clipped, + span, false); sigtrap_put(); } @@ -2777,15 +2743,12 @@ trapezoid_span_inplace__x8r8g8b8(CARD8 op, pi.bits = pixman_image_get_data(pi.mask); pi.op = op; - if (dst->pCompositeClip->data) - span = pixmask_span__clipped; - else - span = pixmask_span; + span = clipped_span(&clipped, pixmask_span, dst->pCompositeClip); if (sigtrap_get() == 0) { - tor_render(NULL, &tor, (void*)&pi, - dst->pCompositeClip, span, - false); + tor_render(NULL, &tor, + (void*)&pi, (void*)&clipped, + span, false); sigtrap_put(); } @@ -2847,9 +2810,9 @@ trapezoid_span_inplace__x8r8g8b8(CARD8 op, struct inplace_thread { xTrapezoid *traps; - RegionPtr clip; span_func_t span; struct inplace inplace; + struct clipped_span clipped; BoxRec extents; int dx, dy; int draw_x, draw_y; @@ -2874,8 +2837,9 @@ static void inplace_thread(void *arg) tor_add_trapezoid(&tor, &thread->traps[n], thread->dx, thread->dy); } - tor_render(NULL, &tor, (void*)&thread->inplace, - thread->clip, thread->span, thread->unbounded); + tor_render(NULL, &tor, + (void*)&thread->inplace, (void*)&thread->clipped, + thread->span, thread->unbounded); tor_fini(&tor); } @@ -2889,6 +2853,7 @@ imprecise_trapezoid_span_inplace(struct sna *sna, bool fallback) { struct inplace inplace; + struct clipped_span clipped; span_func_t span; PixmapPtr pixmap; struct sna_pixmap *priv; @@ -3005,21 +2970,12 @@ imprecise_trapezoid_span_inplace(struct sna *sna, region.extents.x2, region.extents.y2)); if (op == PictOpSrc) { - if (dst->pCompositeClip->data) - span = tor_blt_src_clipped; - else - span = tor_blt_src; + span = tor_blt_src; } else if (op == PictOpIn) { - if (dst->pCompositeClip->data) - span = tor_blt_in_clipped; - else - span = tor_blt_in; + span = tor_blt_in; } else { assert(op == PictOpAdd); - if (dst->pCompositeClip->data) - span = tor_blt_add_clipped; - else - span = tor_blt_add; + span = tor_blt_add; } DBG(("%s: move-to-cpu\n", __FUNCTION__)); @@ -3037,6 +2993,8 @@ imprecise_trapezoid_span_inplace(struct sna *sna, inplace.stride = pixmap->devKind; inplace.opacity = color >> 24; + span = clipped_span(&clipped, span, dst->pCompositeClip); + num_threads = 1; if ((flags & COMPOSITE_SPANS_RECTILINEAR) == 0) num_threads = sna_use_threads(region.extents.x2 - region.extents.x1, @@ -3057,8 +3015,9 @@ imprecise_trapezoid_span_inplace(struct sna *sna, } if (sigtrap_get() == 0) { - tor_render(NULL, &tor, (void*)&inplace, - dst->pCompositeClip, span, unbounded); + tor_render(NULL, &tor, + (void*)&inplace, (void *)&clipped, + span, unbounded); sigtrap_put(); } @@ -3075,8 +3034,8 @@ imprecise_trapezoid_span_inplace(struct sna *sna, threads[0].traps = traps; threads[0].ntrap = ntrap; threads[0].inplace = inplace; + threads[0].clipped = clipped; threads[0].extents = region.extents; - threads[0].clip = dst->pCompositeClip; threads[0].span = span; threads[0].unbounded = unbounded; threads[0].dx = dx; @@ -3707,8 +3666,7 @@ tristrip_thread(void *arg) if (!tor_init(&tor, &thread->extents, 2*thread->count)) return; - boxes.op = thread->op; - boxes.num_boxes = 0; + span_thread_boxes_init(&boxes, thread->op, thread->clip); cw = 0; ccw = 1; polygon_add_line(tor.polygon, @@ -3874,7 +3832,7 @@ imprecise_tristrip_span_converter(struct sna *sna, break; } while (1); polygon_add_line(tor.polygon, - &points[cw], &points[2+ccw], + &points[cw], &points[ccw], dx, dy); assert(tor.polygon->num_edges <= 2*count); diff --git a/src/sna/sna_trapezoids_mono.c b/src/sna/sna_trapezoids_mono.c index 808703a9..07a7867d 100644 --- a/src/sna/sna_trapezoids_mono.c +++ b/src/sna/sna_trapezoids_mono.c @@ -72,13 +72,14 @@ struct mono { struct sna *sna; struct sna_composite_op op; pixman_region16_t clip; + const BoxRec *clip_start, *clip_end; fastcall void (*span)(struct mono *, int, int, BoxPtr); struct mono_polygon polygon; }; -#define I(x) pixman_fixed_to_int ((x) + pixman_fixed_1_minus_e/2) +#define I(x) pixman_fixed_to_int((x) + pixman_fixed_1_minus_e/2) static struct quorem floored_muldivrem(int32_t x, int32_t a, int32_t b) @@ -249,22 +250,22 @@ mono_add_line(struct mono *mono, e->dxdy = floored_muldivrem(dx, pixman_fixed_1, dy); - e->x = floored_muldivrem((ytop - dst_y) * pixman_fixed_1 + pixman_fixed_1_minus_e/2 - p1->y, + e->x = floored_muldivrem((ytop - dst_y) * pixman_fixed_1 + pixman_fixed_1/2 - p1->y, dx, dy); e->x.quo += p1->x; e->x.rem -= dy; e->dy = dy; - - __DBG(("%s: initial x=%d [%d.%d/%d] + dxdy=%d.%d/%d\n", - __FUNCTION__, - I(e->x.quo), e->x.quo, e->x.rem, e->dy, - e->dxdy.quo, e->dxdy.rem, e->dy)); } e->x.quo += dst_x*pixman_fixed_1; + __DBG(("%s: initial x=%d [%d.%d/%d] + dxdy=%d.%d/%d\n", + __FUNCTION__, + I(e->x.quo), e->x.quo, e->x.rem, e->dy, + e->dxdy.quo, e->dxdy.rem, e->dy)); { struct mono_edge **ptail = &polygon->y_buckets[ytop - mono->clip.extents.y1]; + assert(ytop - mono->clip.extents.y1 < mono->clip.extents.y2 - mono->clip.extents.y1); if (*ptail) (*ptail)->prev = e; e->next = *ptail; @@ -368,6 +369,10 @@ static struct mono_edge *mono_filter(struct mono_edge *edges) e->x.rem == n->x.rem && e->dxdy.quo == n->dxdy.quo && e->dxdy.rem == n->dxdy.rem) { + assert(e->dy == n->dy); + __DBG(("%s: discarding cancellation pair (%d.%d) + (%d.%d)\n", + __FUNCTION__, e->x.quo, e->x.rem, e->dxdy.quo, e->dxdy.rem)); + if (e->prev) e->prev->next = n->next; else @@ -378,8 +383,11 @@ static struct mono_edge *mono_filter(struct mono_edge *edges) break; e = n->next; - } else + } else { + __DBG(("%s: adding edge (%d.%d) + (%d.%d)/%d, height=%d\n", + __FUNCTION__, n->x.quo, n->x.rem, n->dxdy.quo, n->dxdy.rem, n->dy, n->height_left)); e = n; + } } return edges; @@ -474,6 +482,34 @@ mono_span__fast(struct mono *c, int x1, int x2, BoxPtr box) c->op.box(c->sna, &c->op, box); } +fastcall static void +mono_span__clipped(struct mono *c, int x1, int x2, BoxPtr box) +{ + const BoxRec *b; + + __DBG(("%s [%d, %d]\n", __FUNCTION__, x1, x2)); + + c->clip_start = + find_clip_box_for_y(c->clip_start, c->clip_end, box->y1); + + b = c->clip_start; + while (b != c->clip_end) { + BoxRec clipped; + + if (box->y2 <= b->y1) + break; + + clipped.x1 = x1; + clipped.x2 = x2; + clipped.y1 = box->y1; + clipped.y2 = box->y2; + if (!box_intersect(&clipped, b++)) + continue; + + c->op.box(c->sna, &c->op, &clipped); + } +} + struct mono_span_thread_boxes { const struct sna_composite_op *op; #define MONO_SPAN_MAX_BOXES (8192/sizeof(BoxRec)) @@ -482,40 +518,45 @@ struct mono_span_thread_boxes { }; inline static void -thread_mono_span_add_boxes(struct mono *c, const BoxRec *box, int count) +thread_mono_span_add_box(struct mono *c, const BoxRec *box) { struct mono_span_thread_boxes *b = c->op.priv; - assert(count > 0 && count <= MONO_SPAN_MAX_BOXES); - if (unlikely(b->num_boxes + count > MONO_SPAN_MAX_BOXES)) { + if (unlikely(b->num_boxes == MONO_SPAN_MAX_BOXES)) { b->op->thread_boxes(c->sna, b->op, b->boxes, b->num_boxes); b->num_boxes = 0; } - memcpy(b->boxes + b->num_boxes, box, count*sizeof(BoxRec)); - b->num_boxes += count; + b->boxes[b->num_boxes++] = *box; assert(b->num_boxes <= MONO_SPAN_MAX_BOXES); } fastcall static void thread_mono_span_clipped(struct mono *c, int x1, int x2, BoxPtr box) { - pixman_region16_t region; + const BoxRec *b; __DBG(("%s [%d, %d]\n", __FUNCTION__, x1, x2)); - box->x1 = x1; - box->x2 = x2; + c->clip_start = + find_clip_box_for_y(c->clip_start, c->clip_end, box->y1); - assert(c->clip.data); + b = c->clip_start; + while (b != c->clip_end) { + BoxRec clipped; + + if (box->y2 <= b->y1) + break; + + clipped.x1 = x1; + clipped.x2 = x2; + clipped.y1 = box->y1; + clipped.y2 = box->y2; + if (!box_intersect(&clipped, b++)) + continue; - pixman_region_init_rects(®ion, box, 1); - RegionIntersect(®ion, ®ion, &c->clip); - if (region_num_rects(®ion)) - thread_mono_span_add_boxes(c, - region_rects(®ion), - region_num_rects(®ion)); - pixman_region_fini(®ion); + thread_mono_span_add_box(c, &clipped); + } } fastcall static void @@ -525,7 +566,7 @@ thread_mono_span(struct mono *c, int x1, int x2, BoxPtr box) box->x1 = x1; box->x2 = x2; - thread_mono_span_add_boxes(c, box, 1); + thread_mono_span_add_box(c, box); } inline static void @@ -537,6 +578,8 @@ mono_row(struct mono *c, int16_t y, int16_t h) int winding = 0; BoxRec box; + __DBG(("%s: y=%d, h=%d\n", __FUNCTION__, y, h)); + DBG_MONO_EDGES(edge); VALIDATE_MONO_EDGES(&c->head); @@ -547,6 +590,8 @@ mono_row(struct mono *c, int16_t y, int16_t h) struct mono_edge *next = edge->next; int16_t xend = I(edge->x.quo); + __DBG(("%s: adding edge dir=%d [winding=%d], x=%d [%d]\n", + __FUNCTION__, edge->dir, winding + edge->dir, xend, edge->x.quo)); if (--edge->height_left) { if (edge->dy) { edge->x.quo += edge->dxdy.quo; @@ -555,6 +600,8 @@ mono_row(struct mono *c, int16_t y, int16_t h) ++edge->x.quo; edge->x.rem -= edge->dy; } + __DBG(("%s: stepped edge (%d.%d) + (%d.%d)/%d, height=%d, prev_x=%d\n", + __FUNCTION__, edge->x.quo, edge->x.rem, edge->dxdy.quo, edge->dxdy.rem, edge->dy, edge->height_left, edge->x.quo)); } if (edge->x.quo < prev_x) { @@ -578,17 +625,22 @@ mono_row(struct mono *c, int16_t y, int16_t h) winding += edge->dir; if (winding == 0) { assert(I(next->x.quo) >= xend); - if (I(next->x.quo) > xend + 1) { + if (I(next->x.quo) > xend) { + __DBG(("%s: end span: %d\n", __FUNCTION__, xend)); if (xstart < c->clip.extents.x1) xstart = c->clip.extents.x1; if (xend > c->clip.extents.x2) xend = c->clip.extents.x2; - if (xend > xstart) + if (xend > xstart) { + __DBG(("%s: emit span [%d, %d]\n", __FUNCTION__, xstart, xend)); c->span(c, xstart, xend, &box); + } xstart = INT16_MIN; } - } else if (xstart == INT16_MIN) + } else if (xstart == INT16_MIN) { + __DBG(("%s: starting new span: %d\n", __FUNCTION__, xend)); xstart = xend; + } edge = next; } @@ -650,9 +702,14 @@ mono_render(struct mono *mono) for (i = 0; i < h; i = j) { j = i + 1; + __DBG(("%s: row=%d, new edges? %d\n", __FUNCTION__, + i, polygon->y_buckets[i] != NULL)); + if (polygon->y_buckets[i]) mono_merge_edges(mono, polygon->y_buckets[i]); + __DBG(("%s: row=%d, vertical? %d\n", __FUNCTION__, + i, mono->is_vertical)); if (mono->is_vertical) { struct mono_edge *e = mono->head.next; int min_height = h - i; @@ -667,6 +724,7 @@ mono_render(struct mono *mono) j++; if (j != i + 1) mono_step_edges(mono, j - (i + 1)); + __DBG(("%s: %d vertical rows\n", __FUNCTION__, j-i)); } mono_row(mono, i, j-i); @@ -717,6 +775,7 @@ mono_span_thread(void *arg) if (RegionNil(&mono.clip)) return; } + region_get_boxes(&mono.clip, &mono.clip_start, &mono.clip_end); boxes.op = thread->op; boxes.num_boxes = 0; @@ -891,9 +950,12 @@ mono_trapezoids_span_converter(struct sna *sna, if (mono.clip.data == NULL && mono.op.damage == NULL) mono.span = mono_span__fast; + else if (mono.clip.data != NULL && mono.op.damage == NULL) + mono.span = mono_span__clipped; else mono.span = mono_span; + region_get_boxes(&mono.clip, &mono.clip_start, &mono.clip_end); mono_render(&mono); mono.op.done(mono.sna, &mono.op); mono_fini(&mono); @@ -939,6 +1001,7 @@ mono_trapezoids_span_converter(struct sna *sna, mono.clip.extents.x2 - mono.clip.extents.x1, mono.clip.extents.y2 - mono.clip.extents.y1, COMPOSITE_PARTIAL, memset(&mono.op, 0, sizeof(mono.op)))) { + region_get_boxes(&mono.clip, &mono.clip_start, &mono.clip_end); mono_render(&mono); mono.op.done(mono.sna, &mono.op); } @@ -974,6 +1037,7 @@ mono_inplace_fill_box(struct sna *sna, box->x2 - box->x1, box->y2 - box->y1, fill->color)); + sigtrap_assert_active(); pixman_fill(fill->data, fill->stride, fill->bpp, box->x1, box->y1, box->x2 - box->x1, @@ -995,6 +1059,7 @@ mono_inplace_fill_boxes(struct sna *sna, box->x2 - box->x1, box->y2 - box->y1, fill->color)); + sigtrap_assert_active(); pixman_fill(fill->data, fill->stride, fill->bpp, box->x1, box->y1, box->x2 - box->x1, @@ -1382,10 +1447,13 @@ mono_triangles_span_converter(struct sna *sna, mono_render(&mono); mono.op.done(mono.sna, &mono.op); } + mono_fini(&mono); if (!was_clear && !operator_is_bounded(op)) { xPointFixed p1, p2; + DBG(("%s: performing unbounded clear\n", __FUNCTION__)); + if (!mono_init(&mono, 2+3*count)) return false; @@ -1431,7 +1499,6 @@ mono_triangles_span_converter(struct sna *sna, mono_fini(&mono); } - mono_fini(&mono); REGION_UNINIT(NULL, &mono.clip); return true; } diff --git a/src/sna/sna_trapezoids_precise.c b/src/sna/sna_trapezoids_precise.c index 9187ab48..242b4acb 100644 --- a/src/sna/sna_trapezoids_precise.c +++ b/src/sna/sna_trapezoids_precise.c @@ -1023,6 +1023,16 @@ tor_init(struct tor *converter, const BoxRec *box, int num_edges) static void tor_add_trapezoid(struct tor *tor, const xTrapezoid *t, int dx, int dy) { + if (!xTrapezoidValid(t)) { + __DBG(("%s: skipping invalid trapezoid: top=%d, bottom=%d, left=(%d, %d), (%d, %d), right=(%d, %d), (%d, %d)\n", + __FUNCTION__, + t->top, t->bottom, + t->left.p1.x, t->left.p1.y, + t->left.p2.x, t->left.p2.y, + t->right.p1.x, t->right.p1.y, + t->right.p2.x, t->right.p2.y)); + return; + } polygon_add_edge(tor->polygon, t, &t->left, 1, dx, dy); polygon_add_edge(tor->polygon, t, &t->right, -1, dx, dy); } @@ -1635,31 +1645,27 @@ struct span_thread { #define SPAN_THREAD_MAX_BOXES (8192/sizeof(struct sna_opacity_box)) struct span_thread_boxes { const struct sna_composite_spans_op *op; + const BoxRec *clip_start, *clip_end; int num_boxes; struct sna_opacity_box boxes[SPAN_THREAD_MAX_BOXES]; }; -static void span_thread_add_boxes(struct sna *sna, void *data, - const BoxRec *box, int count, float alpha) +static void span_thread_add_box(struct sna *sna, void *data, + const BoxRec *box, float alpha) { struct span_thread_boxes *b = data; - __DBG(("%s: adding %d boxes with alpha=%f\n", - __FUNCTION__, count, alpha)); + __DBG(("%s: adding box with alpha=%f\n", __FUNCTION__, alpha)); - assert(count > 0 && count <= SPAN_THREAD_MAX_BOXES); - if (unlikely(b->num_boxes + count > SPAN_THREAD_MAX_BOXES)) { - DBG(("%s: flushing %d boxes, adding %d\n", __FUNCTION__, b->num_boxes, count)); - assert(b->num_boxes <= SPAN_THREAD_MAX_BOXES); + if (unlikely(b->num_boxes == SPAN_THREAD_MAX_BOXES)) { + DBG(("%s: flushing %d boxes\n", __FUNCTION__, b->num_boxes)); b->op->thread_boxes(sna, b->op, b->boxes, b->num_boxes); b->num_boxes = 0; } - do { - b->boxes[b->num_boxes].box = *box++; - b->boxes[b->num_boxes].alpha = alpha; - b->num_boxes++; - } while (--count); + b->boxes[b->num_boxes].box = *box++; + b->boxes[b->num_boxes].alpha = alpha; + b->num_boxes++; assert(b->num_boxes <= SPAN_THREAD_MAX_BOXES); } @@ -1670,8 +1676,22 @@ span_thread_box(struct sna *sna, const BoxRec *box, int coverage) { + struct span_thread_boxes *b = (struct span_thread_boxes *)op; + __DBG(("%s: %d -> %d @ %d\n", __FUNCTION__, box->x1, box->x2, coverage)); - span_thread_add_boxes(sna, op, box, 1, AREA_TO_FLOAT(coverage)); + if (b->num_boxes) { + struct sna_opacity_box *bb = &b->boxes[b->num_boxes-1]; + if (bb->box.x1 == box->x1 && + bb->box.x2 == box->x2 && + bb->box.y2 == box->y1 && + bb->alpha == AREA_TO_FLOAT(coverage)) { + bb->box.y2 = box->y2; + __DBG(("%s: contracted double row: %d -> %d\n", __func__, bb->box.y1, bb->box.y2)); + return; + } + } + + span_thread_add_box(sna, op, box, AREA_TO_FLOAT(coverage)); } static void @@ -1681,20 +1701,28 @@ span_thread_clipped_box(struct sna *sna, const BoxRec *box, int coverage) { - pixman_region16_t region; + struct span_thread_boxes *b = (struct span_thread_boxes *)op; + const BoxRec *c; __DBG(("%s: %d -> %d @ %f\n", __FUNCTION__, box->x1, box->x2, AREA_TO_FLOAT(coverage))); - pixman_region_init_rects(®ion, box, 1); - RegionIntersect(®ion, ®ion, clip); - if (region_num_rects(®ion)) { - span_thread_add_boxes(sna, op, - region_rects(®ion), - region_num_rects(®ion), - AREA_TO_FLOAT(coverage)); + b->clip_start = + find_clip_box_for_y(b->clip_start, b->clip_end, box->y1); + + c = b->clip_start; + while (c != b->clip_end) { + BoxRec clipped; + + if (box->y2 <= c->y1) + break; + + clipped = *box; + if (!box_intersect(&clipped, c++)) + continue; + + span_thread_add_box(sna, op, &clipped, AREA_TO_FLOAT(coverage)); } - pixman_region_fini(®ion); } static span_func_t @@ -1712,7 +1740,7 @@ thread_choose_span(struct sna_composite_spans_op *tmp, assert(!is_mono(dst, maskFormat)); assert(tmp->thread_boxes); - DBG(("%s: clipped? %d\n", __FUNCTION__, clip->data != NULL)); + DBG(("%s: clipped? %d x %d\n", __FUNCTION__, clip->data != NULL, region_num_rects(clip))); if (clip->data) span = span_thread_clipped_box; else @@ -1721,6 +1749,17 @@ thread_choose_span(struct sna_composite_spans_op *tmp, return span; } +inline static void +span_thread_boxes_init(struct span_thread_boxes *boxes, + const struct sna_composite_spans_op *op, + const RegionRec *clip) +{ + boxes->op = op; + boxes->clip_start = region_rects(clip); + boxes->clip_end = boxes->clip_start + region_num_rects(clip); + boxes->num_boxes = 0; +} + static void span_thread(void *arg) { @@ -1733,8 +1772,7 @@ span_thread(void *arg) if (!tor_init(&tor, &thread->extents, 2*thread->ntrap)) return; - boxes.op = thread->op; - boxes.num_boxes = 0; + span_thread_boxes_init(&boxes, thread->op, thread->clip); y1 = thread->extents.y1 - thread->draw_y; y2 = thread->extents.y2 - thread->draw_y; @@ -2183,6 +2221,52 @@ static force_inline uint8_t coverage_opacity(int coverage, uint8_t opacity) return opacity == 255 ? coverage : mul_8_8(coverage, opacity); } +struct clipped_span { + span_func_t span; + const BoxRec *clip_start, *clip_end; +}; + +static void +tor_blt_clipped(struct sna *sna, + struct sna_composite_spans_op *op, + pixman_region16_t *clip, + const BoxRec *box, + int coverage) +{ + struct clipped_span *cs = (struct clipped_span *)clip; + const BoxRec *c; + + cs->clip_start = + find_clip_box_for_y(cs->clip_start, cs->clip_end, box->y1); + + c = cs->clip_start; + while (c != cs->clip_end) { + BoxRec clipped; + + if (box->y2 <= c->y1) + break; + + clipped = *box; + if (!box_intersect(&clipped, c++)) + continue; + + cs->span(sna, op, NULL, &clipped, coverage); + } +} + +inline static span_func_t +clipped_span(struct clipped_span *cs, + span_func_t span, + const RegionRec *clip) +{ + if (clip->data) { + cs->span = span; + region_get_boxes(clip, &cs->clip_start, &cs->clip_end); + span = tor_blt_clipped; + } + return span; +} + static void _tor_blt_src(struct inplace *in, const BoxRec *box, uint8_t v) { uint8_t *ptr = in->ptr; @@ -2218,25 +2302,6 @@ tor_blt_src(struct sna *sna, } static void -tor_blt_src_clipped(struct sna *sna, - struct sna_composite_spans_op *op, - pixman_region16_t *clip, - const BoxRec *box, - int coverage) -{ - pixman_region16_t region; - int n; - - pixman_region_init_rects(®ion, box, 1); - RegionIntersect(®ion, ®ion, clip); - n = region_num_rects(®ion); - box = region_rects(®ion); - while (n--) - tor_blt_src(sna, op, NULL, box++, coverage); - pixman_region_fini(®ion); -} - -static void tor_blt_in(struct sna *sna, struct sna_composite_spans_op *op, pixman_region16_t *clip, @@ -2268,25 +2333,6 @@ tor_blt_in(struct sna *sna, } static void -tor_blt_in_clipped(struct sna *sna, - struct sna_composite_spans_op *op, - pixman_region16_t *clip, - const BoxRec *box, - int coverage) -{ - pixman_region16_t region; - int n; - - pixman_region_init_rects(®ion, box, 1); - RegionIntersect(®ion, ®ion, clip); - n = region_num_rects(®ion); - box = region_rects(®ion); - while (n--) - tor_blt_in(sna, op, NULL, box++, coverage); - pixman_region_fini(®ion); -} - -static void tor_blt_add(struct sna *sna, struct sna_composite_spans_op *op, pixman_region16_t *clip, @@ -2325,25 +2371,6 @@ tor_blt_add(struct sna *sna, } static void -tor_blt_add_clipped(struct sna *sna, - struct sna_composite_spans_op *op, - pixman_region16_t *clip, - const BoxRec *box, - int coverage) -{ - pixman_region16_t region; - int n; - - pixman_region_init_rects(®ion, box, 1); - RegionIntersect(®ion, ®ion, clip); - n = region_num_rects(®ion); - box = region_rects(®ion); - while (n--) - tor_blt_add(sna, op, NULL, box++, coverage); - pixman_region_fini(®ion); -} - -static void tor_blt_lerp32(struct sna *sna, struct sna_composite_spans_op *op, pixman_region16_t *clip, @@ -2358,6 +2385,7 @@ tor_blt_lerp32(struct sna *sna, if (coverage == 0) return; + sigtrap_assert_active(); ptr += box->y1 * stride + box->x1; h = box->y2 - box->y1; @@ -2396,25 +2424,6 @@ tor_blt_lerp32(struct sna *sna, } } -static void -tor_blt_lerp32_clipped(struct sna *sna, - struct sna_composite_spans_op *op, - pixman_region16_t *clip, - const BoxRec *box, - int coverage) -{ - pixman_region16_t region; - int n; - - pixman_region_init_rects(®ion, box, 1); - RegionIntersect(®ion, ®ion, clip); - n = region_num_rects(®ion); - box = region_rects(®ion); - while (n--) - tor_blt_lerp32(sna, op, NULL, box++, coverage); - pixman_region_fini(®ion); -} - struct pixman_inplace { pixman_image_t *image, *source, *mask; uint32_t color; @@ -2442,24 +2451,6 @@ pixmask_span_solid(struct sna *sna, pi->dx + box->x1, pi->dy + box->y1, box->x2 - box->x1, box->y2 - box->y1); } -static void -pixmask_span_solid__clipped(struct sna *sna, - struct sna_composite_spans_op *op, - pixman_region16_t *clip, - const BoxRec *box, - int coverage) -{ - pixman_region16_t region; - int n; - - pixman_region_init_rects(®ion, box, 1); - RegionIntersect(®ion, ®ion, clip); - n = region_num_rects(®ion); - box = region_rects(®ion); - while (n--) - pixmask_span_solid(sna, op, NULL, box++, coverage); - pixman_region_fini(®ion); -} static void pixmask_span(struct sna *sna, @@ -2480,24 +2471,6 @@ pixmask_span(struct sna *sna, pi->dx + box->x1, pi->dy + box->y1, box->x2 - box->x1, box->y2 - box->y1); } -static void -pixmask_span__clipped(struct sna *sna, - struct sna_composite_spans_op *op, - pixman_region16_t *clip, - const BoxRec *box, - int coverage) -{ - pixman_region16_t region; - int n; - - pixman_region_init_rects(®ion, box, 1); - RegionIntersect(®ion, ®ion, clip); - n = region_num_rects(®ion); - box = region_rects(®ion); - while (n--) - pixmask_span(sna, op, NULL, box++, coverage); - pixman_region_fini(®ion); -} struct inplace_x8r8g8b8_thread { xTrapezoid *traps; @@ -2516,6 +2489,7 @@ static void inplace_x8r8g8b8_thread(void *arg) struct inplace_x8r8g8b8_thread *thread = arg; struct tor tor; span_func_t span; + struct clipped_span clipped; RegionPtr clip; int y1, y2, n; @@ -2546,12 +2520,11 @@ static void inplace_x8r8g8b8_thread(void *arg) inplace.stride = pixmap->devKind; inplace.color = thread->color; - if (clip->data) - span = tor_blt_lerp32_clipped; - else - span = tor_blt_lerp32; + span = clipped_span(&clipped, tor_blt_lerp32, clip); - tor_render(NULL, &tor, (void*)&inplace, clip, span, false); + tor_render(NULL, &tor, + (void*)&inplace, (void *)&clipped, + span, false); } else if (thread->is_solid) { struct pixman_inplace pi; @@ -2564,10 +2537,7 @@ static void inplace_x8r8g8b8_thread(void *arg) 1, 1, pi.bits, 0); pixman_image_set_repeat(pi.source, PIXMAN_REPEAT_NORMAL); - if (clip->data) - span = pixmask_span_solid__clipped; - else - span = pixmask_span_solid; + span = clipped_span(&clipped, pixmask_span_solid, clip); tor_render(NULL, &tor, (void*)&pi, clip, span, false); @@ -2588,12 +2558,11 @@ static void inplace_x8r8g8b8_thread(void *arg) pi.bits = pixman_image_get_data(pi.mask); pi.op = thread->op; - if (clip->data) - span = pixmask_span__clipped; - else - span = pixmask_span; + span = clipped_span(&clipped, pixmask_span, clip); - tor_render(NULL, &tor, (void*)&pi, clip, span, false); + tor_render(NULL, &tor, + (void*)&pi, (void *)&clipped, + span, false); pixman_image_unref(pi.mask); pixman_image_unref(pi.source); @@ -2712,6 +2681,7 @@ trapezoid_span_inplace__x8r8g8b8(CARD8 op, if (num_threads == 1) { struct tor tor; span_func_t span; + struct clipped_span clipped; if (!tor_init(&tor, ®ion.extents, 2*ntrap)) return true; @@ -2737,17 +2707,14 @@ trapezoid_span_inplace__x8r8g8b8(CARD8 op, inplace.stride = pixmap->devKind; inplace.color = color; - if (dst->pCompositeClip->data) - span = tor_blt_lerp32_clipped; - else - span = tor_blt_lerp32; - + span = clipped_span(&clipped, tor_blt_lerp32, dst->pCompositeClip); DBG(("%s: render inplace op=%d, color=%08x\n", __FUNCTION__, op, color)); if (sigtrap_get() == 0) { - tor_render(NULL, &tor, (void*)&inplace, - dst->pCompositeClip, span, false); + tor_render(NULL, &tor, + (void*)&inplace, (void*)&clipped, + span, false); sigtrap_put(); } } else if (is_solid) { @@ -2762,15 +2729,11 @@ trapezoid_span_inplace__x8r8g8b8(CARD8 op, 1, 1, pi.bits, 0); pixman_image_set_repeat(pi.source, PIXMAN_REPEAT_NORMAL); - if (dst->pCompositeClip->data) - span = pixmask_span_solid__clipped; - else - span = pixmask_span_solid; - + span = clipped_span(&clipped, pixmask_span_solid, dst->pCompositeClip); if (sigtrap_get() == 0) { - tor_render(NULL, &tor, (void*)&pi, - dst->pCompositeClip, span, - false); + tor_render(NULL, &tor, + (void*)&pi, (void*)&clipped, + span, false); sigtrap_put(); } @@ -2791,15 +2754,11 @@ trapezoid_span_inplace__x8r8g8b8(CARD8 op, pi.bits = pixman_image_get_data(pi.mask); pi.op = op; - if (dst->pCompositeClip->data) - span = pixmask_span__clipped; - else - span = pixmask_span; - + span = clipped_span(&clipped, pixmask_span, dst->pCompositeClip); if (sigtrap_get() == 0) { - tor_render(NULL, &tor, (void*)&pi, - dst->pCompositeClip, span, - false); + tor_render(NULL, &tor, + (void*)&pi, (void *)&clipped, + span, false); sigtrap_put(); } @@ -2861,9 +2820,9 @@ trapezoid_span_inplace__x8r8g8b8(CARD8 op, struct inplace_thread { xTrapezoid *traps; - RegionPtr clip; span_func_t span; struct inplace inplace; + struct clipped_span clipped; BoxRec extents; int dx, dy; int draw_x, draw_y; @@ -2888,8 +2847,9 @@ static void inplace_thread(void *arg) tor_add_trapezoid(&tor, &thread->traps[n], thread->dx, thread->dy); } - tor_render(NULL, &tor, (void*)&thread->inplace, - thread->clip, thread->span, thread->unbounded); + tor_render(NULL, &tor, + (void*)&thread->inplace, (void*)&thread->clipped, + thread->span, thread->unbounded); tor_fini(&tor); } @@ -2903,6 +2863,7 @@ precise_trapezoid_span_inplace(struct sna *sna, bool fallback) { struct inplace inplace; + struct clipped_span clipped; span_func_t span; PixmapPtr pixmap; struct sna_pixmap *priv; @@ -3020,21 +2981,12 @@ precise_trapezoid_span_inplace(struct sna *sna, dst->pCompositeClip->data != NULL)); if (op == PictOpSrc) { - if (dst->pCompositeClip->data) - span = tor_blt_src_clipped; - else - span = tor_blt_src; + span = tor_blt_src; } else if (op == PictOpIn) { - if (dst->pCompositeClip->data) - span = tor_blt_in_clipped; - else - span = tor_blt_in; + span = tor_blt_in; } else { assert(op == PictOpAdd); - if (dst->pCompositeClip->data) - span = tor_blt_add_clipped; - else - span = tor_blt_add; + span = tor_blt_add; } DBG(("%s: move-to-cpu(dst)\n", __FUNCTION__)); @@ -3052,6 +3004,8 @@ precise_trapezoid_span_inplace(struct sna *sna, inplace.stride = pixmap->devKind; inplace.opacity = color >> 24; + span = clipped_span(&clipped, span, dst->pCompositeClip); + num_threads = 1; if (!NO_GPU_THREADS && (flags & COMPOSITE_SPANS_RECTILINEAR) == 0) @@ -3074,8 +3028,9 @@ precise_trapezoid_span_inplace(struct sna *sna, } if (sigtrap_get() == 0) { - tor_render(NULL, &tor, (void*)&inplace, - dst->pCompositeClip, span, unbounded); + tor_render(NULL, &tor, + (void*)&inplace, (void *)&clipped, + span, unbounded); sigtrap_put(); } @@ -3093,7 +3048,7 @@ precise_trapezoid_span_inplace(struct sna *sna, threads[0].ntrap = ntrap; threads[0].inplace = inplace; threads[0].extents = region.extents; - threads[0].clip = dst->pCompositeClip; + threads[0].clipped = clipped; threads[0].span = span; threads[0].unbounded = unbounded; threads[0].dx = dx; @@ -3316,8 +3271,7 @@ tristrip_thread(void *arg) if (!tor_init(&tor, &thread->extents, 2*thread->count)) return; - boxes.op = thread->op; - boxes.num_boxes = 0; + span_thread_boxes_init(&boxes, thread->op, thread->clip); cw = 0; ccw = 1; polygon_add_line(tor.polygon, diff --git a/src/sna/sna_video.c b/src/sna/sna_video.c index ed0e7b31..e2b11c31 100644 --- a/src/sna/sna_video.c +++ b/src/sna/sna_video.c @@ -591,6 +591,72 @@ use_gtt: /* copy data, must use GTT so that we keep the overlay uncached */ return true; } +void sna_video_fill_colorkey(struct sna_video *video, + const RegionRec *clip) +{ + struct sna *sna = video->sna; + PixmapPtr front = sna->front; + struct kgem_bo *bo = __sna_pixmap_get_bo(front); + uint8_t *dst, *tmp; + int w, width; + + if (video->AlwaysOnTop || RegionEqual(&video->clip, (RegionPtr)clip)) + return; + + assert(bo); + if (!wedged(sna) && + sna_blt_fill_boxes(sna, GXcopy, bo, + front->drawable.bitsPerPixel, + video->color_key, + region_rects(clip), + region_num_rects(clip))) { + RegionCopy(&video->clip, (RegionPtr)clip); + return; + } + + dst = kgem_bo_map__gtt(&sna->kgem, bo); + if (dst == NULL) + return; + + w = front->drawable.bitsPerPixel/8; + width = (clip->extents.x2 - clip->extents.x1) * w; + tmp = malloc(width); + if (tmp == NULL) + return; + + memcpy(tmp, &video->color_key, w); + while (2 * w < width) { + memcpy(tmp + w, tmp, w); + w *= 2; + } + if (w < width) + memcpy(tmp + w, tmp, width - w); + + if (sigtrap_get() == 0) { + const BoxRec *box = region_rects(clip); + int n = region_num_rects(clip); + + w = front->drawable.bitsPerPixel/8; + do { + int y = box->y1; + uint8_t *row = dst + y*bo->pitch + w*box->x1; + + width = (box->x2 - box->x1) * w; + while (y < box->y2) { + memcpy(row, tmp, width); + row += bo->pitch; + y++; + } + box++; + } while (--n); + sigtrap_put(); + + RegionCopy(&video->clip, (RegionPtr)clip); + } + + free(tmp); +} + XvAdaptorPtr sna_xv_adaptor_alloc(struct sna *sna) { XvAdaptorPtr new_adaptors; diff --git a/src/sna/sna_video.h b/src/sna/sna_video.h index f21605fc..39cb725f 100644 --- a/src/sna/sna_video.h +++ b/src/sna/sna_video.h @@ -72,6 +72,8 @@ THE USE OR OTHER DEALINGS IN THE SOFTWARE. struct sna_video { struct sna *sna; + int idx; /* XXX expose struct plane instead? */ + int brightness; int contrast; int saturation; @@ -193,6 +195,9 @@ bool sna_video_copy_data(struct sna_video *video, struct sna_video_frame *frame, const uint8_t *buf); +void +sna_video_fill_colorkey(struct sna_video *video, + const RegionRec *clip); void sna_video_buffer_fini(struct sna_video *video); @@ -210,4 +215,26 @@ sna_window_set_port(WindowPtr window, XvPortPtr port) ((void **)__get_private(window, sna_window_key))[2] = port; } +static inline int offset_and_clip(int x, int dx) +{ + x += dx; + if (x <= 0) + return 0; + if (x >= MAXSHORT) + return MAXSHORT; + return x; +} + +static inline void init_video_region(RegionRec *region, + DrawablePtr draw, + int drw_x, int drw_y, + int drw_w, int drw_h) +{ + region->extents.x1 = offset_and_clip(draw->x, drw_x); + region->extents.y1 = offset_and_clip(draw->y, drw_y); + region->extents.x2 = offset_and_clip(draw->x, drw_x + drw_w); + region->extents.y2 = offset_and_clip(draw->y, drw_y + drw_h); + region->data = NULL; +} + #endif /* SNA_VIDEO_H */ diff --git a/src/sna/sna_video_overlay.c b/src/sna/sna_video_overlay.c index ac81f1a0..9bc5ce40 100644 --- a/src/sna/sna_video_overlay.c +++ b/src/sna/sna_video_overlay.c @@ -130,7 +130,7 @@ static int sna_video_overlay_stop(ddStopVideo_ARGS) DBG(("%s()\n", __FUNCTION__)); - REGION_EMPTY(scrn->pScreen, &video->clip); + REGION_EMPTY(to_screen_from_sna(sna), &video->clip); request.flags = 0; (void)drmIoctl(sna->kgem.fd, @@ -474,15 +474,13 @@ sna_video_overlay_put_image(ddPutImage_ARGS) if (src_h >= (drw_h * 8)) drw_h = src_h / 7; - clip.extents.x1 = draw->x + drw_x; - clip.extents.y1 = draw->y + drw_y; - clip.extents.x2 = clip.extents.x1 + drw_w; - clip.extents.y2 = clip.extents.y1 + drw_h; - clip.data = NULL; + init_video_region(&clip, draw, drw_x, drw_y, drw_w, drw_h); DBG(("%s: always_on_top=%d\n", __FUNCTION__, video->AlwaysOnTop)); - if (!video->AlwaysOnTop) + if (!video->AlwaysOnTop) { + ValidateGC(draw, gc); RegionIntersect(&clip, &clip, gc->pCompositeClip); + } if (box_empty(&clip.extents)) goto invisible; @@ -551,15 +549,7 @@ sna_video_overlay_put_image(ddPutImage_ARGS) ret = Success; if (sna_video_overlay_show (sna, video, &frame, crtc, &dstBox, src_w, src_h, drw_w, drw_h)) { - //xf86XVFillKeyHelperDrawable(draw, video->color_key, &clip); - if (!video->AlwaysOnTop && !RegionEqual(&video->clip, &clip) && - sna_blt_fill_boxes(sna, GXcopy, - __sna_pixmap_get_bo(sna->front), - sna->front->drawable.bitsPerPixel, - video->color_key, - region_rects(&clip), - region_num_rects(&clip))) - RegionCopy(&video->clip, &clip); + sna_video_fill_colorkey(video, &clip); sna_window_set_port((WindowPtr)draw, port); } else { DBG(("%s: failed to show video frame\n", __FUNCTION__)); diff --git a/src/sna/sna_video_sprite.c b/src/sna/sna_video_sprite.c index 92230f97..69bfdfd2 100644 --- a/src/sna/sna_video_sprite.c +++ b/src/sna/sna_video_sprite.c @@ -47,6 +47,8 @@ #define DRM_FORMAT_YUYV fourcc_code('Y', 'U', 'Y', 'V') /* [31:0] Cr0:Y1:Cb0:Y0 8:8:8:8 little endian */ #define DRM_FORMAT_UYVY fourcc_code('U', 'Y', 'V', 'Y') /* [31:0] Y1:Cr0:Y0:Cb0 8:8:8:8 little endian */ +#define has_hw_scaling(sna) ((sna)->kgem.gen < 071) + #define LOCAL_IOCTL_MODE_SETPLANE DRM_IOWR(0xB7, struct local_mode_set_plane) struct local_mode_set_plane { uint32_t plane_id; @@ -81,19 +83,17 @@ static int sna_video_sprite_stop(ddStopVideo_ARGS) xf86CrtcConfigPtr config = XF86_CRTC_CONFIG_PTR(video->sna->scrn); int i; - for (i = 0; i < config->num_crtc; i++) { + for (i = 0; i < video->sna->mode.num_real_crtc; i++) { xf86CrtcPtr crtc = config->crtc[i]; int pipe; - if (sna_crtc_id(crtc) == 0) - break; - - pipe = sna_crtc_to_pipe(crtc); + pipe = sna_crtc_pipe(crtc); + assert(pipe < ARRAY_SIZE(video->bo)); if (video->bo[pipe] == NULL) continue; memset(&s, 0, sizeof(s)); - s.plane_id = sna_crtc_to_sprite(crtc); + s.plane_id = sna_crtc_to_sprite(crtc, video->idx); if (drmIoctl(video->sna->kgem.fd, LOCAL_IOCTL_MODE_SETPLANE, &s)) xf86DrvMsg(video->sna->scrn->scrnIndex, X_ERROR, "failed to disable plane\n"); @@ -153,7 +153,7 @@ static int sna_video_sprite_best_size(ddQueryBestSize_ARGS) struct sna_video *video = port->devPriv.ptr; struct sna *sna = video->sna; - if (sna->kgem.gen >= 075) { + if (!has_hw_scaling(sna) && !sna->render.video) { *p_w = vid_w; *p_h = vid_h; } else { @@ -221,12 +221,12 @@ sna_video_sprite_show(struct sna *sna, BoxPtr dstBox) { struct local_mode_set_plane s; - int pipe = sna_crtc_to_pipe(crtc); + int pipe = sna_crtc_pipe(crtc); /* XXX handle video spanning multiple CRTC */ VG_CLEAR(s); - s.plane_id = sna_crtc_to_sprite(crtc); + s.plane_id = sna_crtc_to_sprite(crtc, video->idx); #define DRM_I915_SET_SPRITE_COLORKEY 0x2b #define LOCAL_IOCTL_I915_SET_SPRITE_COLORKEY DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_SET_SPRITE_COLORKEY, struct local_intel_sprite_colorkey) @@ -263,9 +263,6 @@ sna_video_sprite_show(struct sna *sna, video->color_key_changed &= ~(1 << pipe); } - if (video->bo[pipe] == frame->bo) - return true; - update_dst_box_to_crtc_coords(sna, crtc, dstBox); if (frame->rotation & (RR_Rotate_90 | RR_Rotate_270)) { int tmp = frame->width; @@ -283,15 +280,30 @@ sna_video_sprite_show(struct sna *sna, uint32_t handles[4]; uint32_t pitches[4]; /* pitch for each plane */ uint32_t offsets[4]; /* offset of each plane */ + uint64_t modifiers[4]; } f; bool purged = true; memset(&f, 0, sizeof(f)); f.width = frame->width; f.height = frame->height; + f.flags = 1 << 1; /* +modifiers */ f.handles[0] = frame->bo->handle; f.pitches[0] = frame->pitch[0]; + switch (frame->bo->tiling) { + case I915_TILING_NONE: + break; + case I915_TILING_X: + /* I915_FORMAT_MOD_X_TILED */ + f.modifiers[0] = (uint64_t)1 << 56 | 1; + break; + case I915_TILING_Y: + /* I915_FORMAT_MOD_X_TILED */ + f.modifiers[0] = (uint64_t)1 << 56 | 2; + break; + } + switch (frame->id) { case FOURCC_RGB565: f.pixel_format = DRM_FORMAT_RGB565; @@ -360,7 +372,7 @@ sna_video_sprite_show(struct sna *sna, return false; } - frame->bo->domain = DOMAIN_NONE; + __kgem_bo_clear_dirty(frame->bo); if (video->bo[pipe]) kgem_bo_destroy(&sna->kgem, video->bo[pipe]); @@ -374,17 +386,17 @@ static int sna_video_sprite_put_image(ddPutImage_ARGS) struct sna *sna = video->sna; xf86CrtcConfigPtr config = XF86_CRTC_CONFIG_PTR(sna->scrn); RegionRec clip; + BoxRec draw_extents; int ret, i; - clip.extents.x1 = draw->x + drw_x; - clip.extents.y1 = draw->y + drw_y; - clip.extents.x2 = clip.extents.x1 + drw_w; - clip.extents.y2 = clip.extents.y1 + drw_h; - clip.data = NULL; + init_video_region(&clip, draw, drw_x, drw_y, drw_w, drw_h); + draw_extents = clip.extents; DBG(("%s: always_on_top=%d\n", __FUNCTION__, video->AlwaysOnTop)); - if (!video->AlwaysOnTop) + if (!video->AlwaysOnTop) { + ValidateGC(draw, gc); RegionIntersect(&clip, &clip, gc->pCompositeClip); + } DBG(("%s: src=(%d, %d),(%d, %d), dst=(%d, %d),(%d, %d), id=%d, sizep=%dx%d, sync?=%d\n", __FUNCTION__, @@ -402,19 +414,17 @@ static int sna_video_sprite_put_image(ddPutImage_ARGS) goto err; } - for (i = 0; i < config->num_crtc; i++) { + for (i = 0; i < video->sna->mode.num_real_crtc; i++) { xf86CrtcPtr crtc = config->crtc[i]; struct sna_video_frame frame; + BoxRec dst = draw_extents; int pipe; INT32 x1, x2, y1, y2; - BoxRec dst; RegionRec reg; Rotation rotation; + bool cache_bo; - if (sna_crtc_id(crtc) == 0) - break; - - pipe = sna_crtc_to_pipe(crtc); + pipe = sna_crtc_pipe(crtc); sna_video_frame_init(video, format->id, width, height, &frame); @@ -423,10 +433,11 @@ static int sna_video_sprite_put_image(ddPutImage_ARGS) RegionIntersect(®, ®, &clip); if (RegionNil(®)) { off: + assert(pipe < ARRAY_SIZE(video->bo)); if (video->bo[pipe]) { struct local_mode_set_plane s; memset(&s, 0, sizeof(s)); - s.plane_id = sna_crtc_to_sprite(crtc); + s.plane_id = sna_crtc_to_sprite(crtc, video->idx); if (drmIoctl(video->sna->kgem.fd, LOCAL_IOCTL_MODE_SETPLANE, &s)) xf86DrvMsg(video->sna->scrn->scrnIndex, X_ERROR, "failed to disable plane\n"); @@ -440,8 +451,6 @@ off: y1 = src_y; y2 = src_y + src_h; - dst = clip.extents; - ret = xf86XVClipVideoHelper(&dst, &x1, &x2, &y1, &y2, ®, frame.width, frame.height); RegionUninit(®); @@ -465,8 +474,8 @@ off: /* if sprite can't handle rotation natively, store it for the copy func */ rotation = RR_Rotate_0; - if (!sna_crtc_set_sprite_rotation(crtc, crtc->rotation)) { - sna_crtc_set_sprite_rotation(crtc, RR_Rotate_0); + if (!sna_crtc_set_sprite_rotation(crtc, video->idx, crtc->rotation)) { + sna_crtc_set_sprite_rotation(crtc, video->idx, RR_Rotate_0); rotation = crtc->rotation; } sna_video_frame_set_rotation(video, &frame, rotation); @@ -496,6 +505,8 @@ off: frame.image.y1 = 0; frame.image.x2 = frame.width; frame.image.y2 = frame.height; + + cache_bo = false; } else { frame.bo = sna_video_buffer(video, &frame); if (frame.bo == NULL) { @@ -509,6 +520,60 @@ off: ret = BadAlloc; goto err; } + + cache_bo = true; + } + + if (!has_hw_scaling(sna) && sna->render.video && + !((frame.src.x2 - frame.src.x1) == (dst.x2 - dst.x1) && + (frame.src.y2 - frame.src.y1) == (dst.y2 - dst.y1))) { + ScreenPtr screen = to_screen_from_sna(sna); + PixmapPtr scaled; + RegionRec r; + + r.extents.x1 = r.extents.y1 = 0; + r.extents.x2 = dst.x2 - dst.x1; + r.extents.y2 = dst.y2 - dst.y1; + r.data = NULL; + + DBG(("%s: scaling from (%d, %d) to (%d, %d)\n", + __FUNCTION__, + frame.src.x2 - frame.src.x1, + frame.src.y2 - frame.src.y1, + r.extents.x2, r.extents.y2)); + + scaled = screen->CreatePixmap(screen, + r.extents.x2, + r.extents.y2, + 24, + CREATE_PIXMAP_USAGE_SCRATCH); + if (scaled == NULL) { + ret = BadAlloc; + goto err; + } + + if (!sna->render.video(sna, video, &frame, &r, scaled)) { + screen->DestroyPixmap(scaled); + ret = BadAlloc; + goto err; + } + + if (cache_bo) + sna_video_buffer_fini(video); + else + kgem_bo_destroy(&sna->kgem, frame.bo); + + frame.bo = kgem_bo_reference(__sna_pixmap_get_bo(scaled)); + kgem_bo_submit(&sna->kgem, frame.bo); + + frame.id = FOURCC_RGB888; + frame.src = frame.image = r.extents; + frame.width = frame.image.x2; + frame.height = frame.image.y2; + frame.pitch[0] = frame.bo->pitch; + + screen->DestroyPixmap(scaled); + cache_bo = false; } ret = Success; @@ -517,24 +582,16 @@ off: ret = BadAlloc; } - frame.bo->domain = DOMAIN_NONE; - if (xvmc_passthrough(format->id)) - kgem_bo_destroy(&sna->kgem, frame.bo); - else + if (cache_bo) sna_video_buffer_fini(video); + else + kgem_bo_destroy(&sna->kgem, frame.bo); if (ret != Success) goto err; } - if (!video->AlwaysOnTop && !RegionEqual(&video->clip, &clip) && - sna_blt_fill_boxes(sna, GXcopy, - __sna_pixmap_get_bo(sna->front), - sna->front->drawable.bitsPerPixel, - video->color_key, - region_rects(&clip), - region_num_rects(&clip))) - RegionCopy(&video->clip, &clip); + sna_video_fill_colorkey(video, &clip); sna_window_set_port((WindowPtr)draw, port); return Success; @@ -606,25 +663,28 @@ static int sna_video_sprite_color_key(struct sna *sna) return color_key & ((1 << scrn->depth) - 1); } -static bool sna_video_has_sprites(struct sna *sna) +static int sna_video_has_sprites(struct sna *sna) { xf86CrtcConfigPtr config = XF86_CRTC_CONFIG_PTR(sna->scrn); + unsigned min; int i; DBG(("%s: num_crtc=%d\n", __FUNCTION__, sna->mode.num_real_crtc)); if (sna->mode.num_real_crtc == 0) - return false; + return 0; + min = -1; for (i = 0; i < sna->mode.num_real_crtc; i++) { - if (!sna_crtc_to_sprite(config->crtc[i])) { - DBG(("%s: no sprite found on pipe %d\n", __FUNCTION__, sna_crtc_to_pipe(config->crtc[i]))); - return false; - } + unsigned count = sna_crtc_count_sprites(config->crtc[i]); + DBG(("%s: %d sprites found on pipe %d\n", __FUNCTION__, + count, sna_crtc_pipe(config->crtc[i]))); + if (count < min) + min = count; } - DBG(("%s: yes\n", __FUNCTION__)); - return true; + DBG(("%s: min=%d\n", __FUNCTION__, min)); + return min; } void sna_video_sprite_setup(struct sna *sna, ScreenPtr screen) @@ -632,16 +692,18 @@ void sna_video_sprite_setup(struct sna *sna, ScreenPtr screen) XvAdaptorPtr adaptor; struct sna_video *video; XvPortPtr port; + int count, i; - if (!sna_video_has_sprites(sna)) + count = sna_video_has_sprites(sna); + if (!count) return; adaptor = sna_xv_adaptor_alloc(sna); if (!adaptor) return; - video = calloc(1, sizeof(*video)); - port = calloc(1, sizeof(*port)); + video = calloc(count, sizeof(*video)); + port = calloc(count, sizeof(*port)); if (video == NULL || port == NULL) { free(video); free(port); @@ -686,36 +748,43 @@ void sna_video_sprite_setup(struct sna *sna, ScreenPtr screen) adaptor->ddPutImage = sna_video_sprite_put_image; adaptor->ddQueryImageAttributes = sna_video_sprite_query; - adaptor->nPorts = 1; + adaptor->nPorts = count; adaptor->pPorts = port; - adaptor->base_id = port->id = FakeClientID(0); - AddResource(port->id, XvGetRTPort(), port); - port->pAdaptor = adaptor; - port->pNotify = NULL; - port->pDraw = NULL; - port->client = NULL; - port->grab.client = NULL; - port->time = currentTime; - port->devPriv.ptr = video; - - video->sna = sna; - video->alignment = 64; - video->color_key = sna_video_sprite_color_key(sna); - video->color_key_changed = ~0; - video->has_color_key = true; - video->brightness = -19; /* (255/219) * -16 */ - video->contrast = 75; /* 255/219 * 64 */ - video->saturation = 146; /* 128/112 * 128 */ - video->desired_crtc = NULL; - video->gamma5 = 0xc0c0c0; - video->gamma4 = 0x808080; - video->gamma3 = 0x404040; - video->gamma2 = 0x202020; - video->gamma1 = 0x101010; - video->gamma0 = 0x080808; - RegionNull(&video->clip); - video->SyncToVblank = 1; + for (i = 0; i < count; i++) { + port->id = FakeClientID(0); + AddResource(port->id, XvGetRTPort(), port); + port->pAdaptor = adaptor; + port->pNotify = NULL; + port->pDraw = NULL; + port->client = NULL; + port->grab.client = NULL; + port->time = currentTime; + port->devPriv.ptr = video; + + video->sna = sna; + video->idx = i; + video->alignment = 64; + video->color_key = sna_video_sprite_color_key(sna); + video->color_key_changed = ~0; + video->has_color_key = true; + video->brightness = -19; /* (255/219) * -16 */ + video->contrast = 75; /* 255/219 * 64 */ + video->saturation = 146; /* 128/112 * 128 */ + video->desired_crtc = NULL; + video->gamma5 = 0xc0c0c0; + video->gamma4 = 0x808080; + video->gamma3 = 0x404040; + video->gamma2 = 0x202020; + video->gamma1 = 0x101010; + video->gamma0 = 0x080808; + RegionNull(&video->clip); + video->SyncToVblank = 1; + + port++; + video++; + } + adaptor->base_id = adaptor->pPorts[0].id; xvColorKey = MAKE_ATOM("XV_COLORKEY"); xvAlwaysOnTop = MAKE_ATOM("XV_ALWAYS_ON_TOP"); diff --git a/src/sna/sna_video_textured.c b/src/sna/sna_video_textured.c index 95011939..3cce5cf1 100644 --- a/src/sna/sna_video_textured.c +++ b/src/sna/sna_video_textured.c @@ -48,7 +48,12 @@ static const XvAttributeRec Attributes[] = { //{XvSettable | XvGettable, 0, 255, (char *)"XV_CONTRAST"}, }; -static const XvImageRec Images[] = { +static const XvImageRec gen2_Images[] = { + XVIMAGE_YUY2, + XVIMAGE_UYVY, +}; + +static const XvImageRec gen3_Images[] = { XVIMAGE_YUY2, XVIMAGE_YV12, XVIMAGE_I420, @@ -149,15 +154,16 @@ sna_video_textured_put_image(ddPutImage_ARGS) BoxRec dstBox; RegionRec clip; xf86CrtcPtr crtc; + int16_t dx, dy; bool flush = false; bool ret; - clip.extents.x1 = draw->x + drw_x; - clip.extents.y1 = draw->y + drw_y; - clip.extents.x2 = clip.extents.x1 + drw_w; - clip.extents.y2 = clip.extents.y1 + drw_h; - clip.data = NULL; + if (wedged(sna)) + return BadAlloc; + init_video_region(&clip, draw, drw_x, drw_y, drw_w, drw_h); + + ValidateGC(draw, gc); RegionIntersect(&clip, &clip, gc->pCompositeClip); if (!RegionNotEmpty(&clip)) return Success; @@ -181,6 +187,9 @@ sna_video_textured_put_image(ddPutImage_ARGS) &clip)) return Success; + if (get_drawable_deltas(draw, pixmap, &dx, &dy)) + RegionTranslate(&clip, dx, dy); + flags = MOVE_WRITE | __MOVE_FORCE; if (clip.data) flags |= MOVE_READ; @@ -234,7 +243,7 @@ sna_video_textured_put_image(ddPutImage_ARGS) DBG(("%s: failed to render video\n", __FUNCTION__)); ret = BadAlloc; } else - DamageDamageRegion(draw, &clip); + DamageDamageRegion(&pixmap->drawable, &clip); kgem_bo_destroy(&sna->kgem, frame.bo); @@ -316,7 +325,7 @@ void sna_video_textured_setup(struct sna *sna, ScreenPtr screen) if (!sna->render.video) { xf86DrvMsg(sna->scrn->scrnIndex, X_INFO, - "Textured video not supported on this hardware\n"); + "Textured video not supported on this hardware or backend\n"); return; } @@ -362,8 +371,13 @@ void sna_video_textured_setup(struct sna *sna, ScreenPtr screen) ARRAY_SIZE(Formats)); adaptor->nAttributes = ARRAY_SIZE(Attributes); adaptor->pAttributes = (XvAttributeRec *)Attributes; - adaptor->nImages = ARRAY_SIZE(Images); - adaptor->pImages = (XvImageRec *)Images; + if (sna->kgem.gen < 030) { + adaptor->nImages = ARRAY_SIZE(gen2_Images); + adaptor->pImages = (XvImageRec *)gen2_Images; + } else { + adaptor->nImages = ARRAY_SIZE(gen3_Images); + adaptor->pImages = (XvImageRec *)gen3_Images; + } #if XORG_XV_VERSION < 2 adaptor->ddAllocatePort = sna_xv_alloc_port; adaptor->ddFreePort = sna_xv_free_port; diff --git a/src/sna/xassert.h b/src/sna/xassert.h index 1bcfd080..e648e4bc 100644 --- a/src/sna/xassert.h +++ b/src/sna/xassert.h @@ -43,6 +43,28 @@ xorg_backtrace(); \ FatalError("%s:%d assertion '%s' failed\n", __func__, __LINE__, #E); \ } while (0) + +#define warn_unless(E) \ +({ \ + bool fail = !(E); \ + if (unlikely(fail)) { \ + static int __warn_once__; \ + if (!__warn_once__) { \ + xorg_backtrace(); \ + ErrorF("%s:%d assertion '%s' failed\n", __func__, __LINE__, #E); \ + __warn_once__ = 1; \ + } \ + } \ + unlikely(fail); \ +}) + +#define dbg(EXPR) EXPR + +#else + +#define warn_unless(E) ({ bool fail = !(E); unlikely(fail); }) +#define dbg(EXPR) + #endif #endif /* __XASSERT_H__ */ diff --git a/src/uxa/i830_reg.h b/src/uxa/i830_reg.h index d8306bcd..ba39d82c 100644 --- a/src/uxa/i830_reg.h +++ b/src/uxa/i830_reg.h @@ -65,6 +65,12 @@ #define MI_LOAD_SCAN_LINES_DISPLAY_PIPEA (0) #define MI_LOAD_SCAN_LINES_DISPLAY_PIPEB (0x1<<20) +#define MI_LOAD_REGISTER_IMM (0x22<<23 | (3-2)) + +#define BCS_SWCTRL 0x22200 +# define BCS_SWCTRL_SRC_Y (1 << 0) +# define BCS_SWCTRL_DST_Y (1 << 1) + /* BLT commands */ #define COLOR_BLT_CMD ((2<<29)|(0x40<<22)|(0x3)) #define COLOR_BLT_WRITE_ALPHA (1<<21) diff --git a/src/uxa/i965_video.c b/src/uxa/i965_video.c index 68e6fd38..438ab909 100644 --- a/src/uxa/i965_video.c +++ b/src/uxa/i965_video.c @@ -37,7 +37,6 @@ #include "fourcc.h" #include "intel.h" -#include "intel_xvmc.h" #include "intel_uxa.h" #include "i830_reg.h" #include "i965_reg.h" diff --git a/src/uxa/intel.h b/src/uxa/intel.h index 1b7e5339..a5e77af4 100644 --- a/src/uxa/intel.h +++ b/src/uxa/intel.h @@ -121,7 +121,6 @@ typedef struct intel_screen_private { void *modes; drm_intel_bo *front_buffer, *back_buffer; - unsigned int back_name; long front_pitch, front_tiling; dri_bufmgr *bufmgr; @@ -169,6 +168,7 @@ typedef struct intel_screen_private { const struct intel_device_info *info; unsigned int BR[20]; + unsigned int BR_tiling[2]; CloseScreenProcPtr CloseScreen; @@ -196,7 +196,9 @@ typedef struct intel_screen_private { int colorKey; XF86VideoAdaptorPtr adaptor; +#if !HAVE_NOTIFY_FD ScreenBlockHandlerProcPtr BlockHandler; +#endif Bool overlayOn; struct { @@ -285,8 +287,6 @@ typedef struct intel_screen_private { Bool has_kernel_flush; Bool needs_flush; - struct _DRI2FrameEvent *pending_flip[MAX_PIPES]; - /* Broken-out options. */ OptionInfoPtr Options; @@ -368,6 +368,7 @@ typedef void (*intel_drm_abort_proc)(ScrnInfoPtr scrn, extern uint32_t intel_drm_queue_alloc(ScrnInfoPtr scrn, xf86CrtcPtr crtc, void *data, intel_drm_handler_proc handler, intel_drm_abort_proc abort); extern void intel_drm_abort(ScrnInfoPtr scrn, Bool (*match)(void *data, void *match_data), void *match_data); +extern void intel_drm_abort_seq(ScrnInfoPtr scrn, uint32_t seq); extern int intel_get_pipe_from_crtc_id(drm_intel_bufmgr *bufmgr, xf86CrtcPtr crtc); extern int intel_crtc_id(xf86CrtcPtr crtc); @@ -408,7 +409,6 @@ typedef struct _DRI2FrameEvent { ClientPtr client; enum DRI2FrameEventType type; int frame; - int pipe; struct list drawable_resource, client_resource; @@ -418,7 +418,12 @@ typedef struct _DRI2FrameEvent { DRI2BufferPtr front; DRI2BufferPtr back; - struct _DRI2FrameEvent *chain; + /* current scanout for triple buffer */ + int old_width; + int old_height; + int old_pitch; + int old_tiling; + dri_bo *old_buffer; } DRI2FrameEventRec, *DRI2FrameEventPtr; extern Bool intel_do_pageflip(intel_screen_private *intel, @@ -456,10 +461,6 @@ extern xf86CrtcPtr intel_covering_crtc(ScrnInfoPtr scrn, BoxPtr box, Bool I830DRI2ScreenInit(ScreenPtr pScreen); void I830DRI2CloseScreen(ScreenPtr pScreen); -void I830DRI2FrameEventHandler(unsigned int frame, unsigned int tv_sec, - unsigned int tv_usec, DRI2FrameEventPtr flip_info); -void I830DRI2FlipEventHandler(unsigned int frame, unsigned int tv_sec, - unsigned int tv_usec, DRI2FrameEventPtr flip_info); /* intel_dri3.c */ Bool intel_dri3_screen_init(ScreenPtr screen); diff --git a/src/uxa/intel_batchbuffer.c b/src/uxa/intel_batchbuffer.c index a29e4434..114c6026 100644 --- a/src/uxa/intel_batchbuffer.c +++ b/src/uxa/intel_batchbuffer.c @@ -245,6 +245,17 @@ void intel_batch_submit(ScrnInfoPtr scrn) if (intel->batch_used == 0) return; + if (intel->current_batch == I915_EXEC_BLT && + INTEL_INFO(intel)->gen >= 060) { + OUT_BATCH(MI_FLUSH_DW); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(MI_LOAD_REGISTER_IMM); + OUT_BATCH(BCS_SWCTRL); + OUT_BATCH((BCS_SWCTRL_DST_Y | BCS_SWCTRL_SRC_Y) << 16); + } + /* Mark the end of the batchbuffer. */ OUT_BATCH(MI_BATCH_BUFFER_END); /* Emit a padding dword if we aren't going to be quad-word aligned. */ diff --git a/src/uxa/intel_batchbuffer.h b/src/uxa/intel_batchbuffer.h index e5fb8d08..e71ffd19 100644 --- a/src/uxa/intel_batchbuffer.h +++ b/src/uxa/intel_batchbuffer.h @@ -30,7 +30,7 @@ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #ifndef _INTEL_BATCHBUFFER_H #define _INTEL_BATCHBUFFER_H -#define BATCH_RESERVED 16 +#define BATCH_RESERVED 64 void intel_batch_init(ScrnInfoPtr scrn); @@ -202,6 +202,23 @@ do { \ #define BEGIN_BATCH(n) __BEGIN_BATCH(n,RENDER_BATCH) #define BEGIN_BATCH_BLT(n) __BEGIN_BATCH(n,BLT_BATCH) +#define BEGIN_BATCH_BLT_TILED(n) \ +do { \ + if (INTEL_INFO(intel)->gen < 060) { \ + __BEGIN_BATCH(n, BLT_BATCH); \ + } else { \ + __BEGIN_BATCH(n+7, BLT_BATCH); \ + OUT_BATCH(MI_FLUSH_DW); \ + OUT_BATCH(0); \ + OUT_BATCH(0); \ + OUT_BATCH(0); \ + OUT_BATCH(MI_LOAD_REGISTER_IMM); \ + OUT_BATCH(BCS_SWCTRL); \ + OUT_BATCH((BCS_SWCTRL_DST_Y | BCS_SWCTRL_SRC_Y) << 16 | \ + ((intel->BR_tiling[0] == I915_TILING_Y) ? BCS_SWCTRL_DST_Y : 0) | \ + ((intel->BR_tiling[1] == I915_TILING_Y) ? BCS_SWCTRL_SRC_Y : 0)); \ + } \ +} while (0) #define ADVANCE_BATCH() do { \ if (intel->batch_emitting == 0) \ diff --git a/src/uxa/intel_display.c b/src/uxa/intel_display.c index 7b4d4e0c..809cda1d 100644 --- a/src/uxa/intel_display.c +++ b/src/uxa/intel_display.c @@ -89,11 +89,11 @@ struct intel_mode { struct list outputs; struct list crtcs; - void *pageflip_data; - intel_pageflip_handler_proc pageflip_handler; - intel_pageflip_abort_proc pageflip_abort; - - Bool delete_dp_12_displays; + struct { + intel_pageflip_handler_proc handler; + intel_pageflip_abort_proc abort; + void *data; + } pageflip; }; struct intel_pageflip { @@ -114,7 +114,6 @@ struct intel_crtc { struct list link; PixmapPtr scanout_pixmap; uint32_t scanout_fb_id; - int32_t vblank_offset; uint32_t msc_prev; uint64_t msc_high; }; @@ -193,7 +192,7 @@ intel_output_backlight_init(xf86OutputPtr output) str = xf86GetOptValString(intel->Options, OPTION_BACKLIGHT); if (str != NULL) { - if (backlight_exists(str) != BL_NONE) { + if (backlight_exists(str)) { intel_output->backlight_active_level = backlight_open(&intel_output->backlight, strdup(str)); @@ -689,9 +688,11 @@ intel_set_scanout_pixmap(xf86CrtcPtr crtc, PixmapPtr ppix) } bo = intel_get_pixmap_bo(ppix); - if (intel->front_buffer) { - ErrorF("have front buffer\n"); - } + if (!bo) + return FALSE; + + if (intel->front_buffer) + return FALSE; drm_intel_bo_disable_reuse(bo); @@ -867,6 +868,48 @@ intel_output_attach_edid(xf86OutputPtr output) xf86OutputSetEDID(output, mon); } +static void +intel_output_attach_tile(xf86OutputPtr output) +{ +#if XF86_OUTPUT_VERSION >= 3 + struct intel_output *intel_output = output->driver_private; + drmModeConnectorPtr koutput = intel_output->mode_output; + struct intel_mode *mode = intel_output->mode; + drmModePropertyBlobPtr blob = NULL; + struct xf86CrtcTileInfo tile_info, *set = NULL; + int i; + + for (i = 0; koutput && i < koutput->count_props; i++) { + drmModePropertyPtr props; + + props = drmModeGetProperty(mode->fd, koutput->props[i]); + if (!props) + continue; + + if (!(props->flags & DRM_MODE_PROP_BLOB)) { + drmModeFreeProperty(props); + continue; + } + + if (!strcmp(props->name, "TILE")) { + blob = drmModeGetPropertyBlob(mode->fd, + koutput->prop_values[i]); + } + drmModeFreeProperty(props); + } + + if (blob) { + if (xf86OutputParseKMSTile(blob->data, + blob->length, + &tile_info)) + set = &tile_info; + drmModeFreePropertyBlob(blob); + } + + xf86OutputSetTile(output, set); +#endif +} + static DisplayModePtr intel_output_panel_edid(xf86OutputPtr output, DisplayModePtr modes) { @@ -922,6 +965,7 @@ intel_output_get_modes(xf86OutputPtr output) int i; intel_output_attach_edid(output); + intel_output_attach_tile(output); if (!koutput) return Modes; @@ -1492,6 +1536,7 @@ intel_output_init(ScrnInfoPtr scrn, struct intel_mode *mode, drmModeResPtr mode_ intel_output = output->driver_private; intel_output->output_id = mode_res->connectors[num]; intel_output->mode_output = koutput; + RROutputChanged(output->randr_output, TRUE); return; } } @@ -1650,9 +1695,6 @@ intel_pageflip_abort(ScrnInfoPtr scrn, xf86CrtcPtr crtc, void *data); static void intel_pageflip_complete(struct intel_mode *mode); -static void -intel_drm_abort_seq (ScrnInfoPtr scrn, uint32_t seq); - Bool intel_do_pageflip(intel_screen_private *intel, dri_bo *new_front, @@ -1671,23 +1713,30 @@ intel_do_pageflip(intel_screen_private *intel, uint32_t new_fb_id; uint32_t flags; uint32_t seq; + int err = 0; int i; /* + * We only have a single length queue in the kernel, so any + * attempts to schedule a second flip before processing the first + * is a bug. Punt it back to the caller. + */ + if (mode->flip_count) + return FALSE; + + /* * Create a new handle for the back buffer */ if (drmModeAddFB(mode->fd, scrn->virtualX, scrn->virtualY, scrn->depth, scrn->bitsPerPixel, pitch, - new_front->handle, &new_fb_id)) + new_front->handle, &new_fb_id)) { + err = errno; goto error_out; + } drm_intel_bo_disable_reuse(new_front); intel_flush(intel); - mode->pageflip_data = pageflip_data; - mode->pageflip_handler = pageflip_handler; - mode->pageflip_abort = pageflip_abort; - /* * Queue flips on all enabled CRTCs * Note that if/when we get per-CRTC buffers, we'll have to update this. @@ -1699,6 +1748,7 @@ intel_do_pageflip(intel_screen_private *intel, */ mode->fe_msc = 0; mode->fe_usec = 0; + memset(&mode->pageflip, 0, sizeof(mode->pageflip)); flags = DRM_MODE_PAGE_FLIP_EVENT; if (async) @@ -1711,8 +1761,7 @@ intel_do_pageflip(intel_screen_private *intel, flip = calloc(1, sizeof(struct intel_pageflip)); if (flip == NULL) { - xf86DrvMsg(scrn->scrnIndex, X_WARNING, - "flip queue: carrier alloc failed.\n"); + err = errno; goto error_undo; } @@ -1724,33 +1773,30 @@ intel_do_pageflip(intel_screen_private *intel, seq = intel_drm_queue_alloc(scrn, config->crtc[i], flip, intel_pageflip_handler, intel_pageflip_abort); if (!seq) { + err = errno; free(flip); goto error_undo; } -again: + mode->flip_count++; + if (drmModePageFlip(mode->fd, crtc_id(crtc), new_fb_id, flags, (void *)(uintptr_t)seq)) { - if (intel_mode_read_drm_events(intel)) { - xf86DrvMsg(scrn->scrnIndex, X_WARNING, - "flip queue retry\n"); - goto again; - } - xf86DrvMsg(scrn->scrnIndex, X_WARNING, - "flip queue failed: %s\n", strerror(errno)); - if (seq) - intel_drm_abort_seq(scrn, seq); - free(flip); + err = errno; + intel_drm_abort_seq(scrn, seq); goto error_undo; } - mode->flip_count++; } mode->old_fb_id = mode->fb_id; mode->fb_id = new_fb_id; + mode->pageflip.data = pageflip_data; + mode->pageflip.handler = pageflip_handler; + mode->pageflip.abort = pageflip_abort; + if (!mode->flip_count) intel_pageflip_complete(mode); @@ -1765,7 +1811,7 @@ error_undo: error_out: xf86DrvMsg(scrn->scrnIndex, X_WARNING, "Page flip failed: %s\n", - strerror(errno)); + strerror(err)); mode->flip_count = 0; return FALSE; @@ -1839,7 +1885,7 @@ intel_drm_abort(ScrnInfoPtr scrn, Bool (*match)(void *data, void *match_data), v /* * Abort by drm queue sequence number */ -static void +void intel_drm_abort_seq(ScrnInfoPtr scrn, uint32_t seq) { struct intel_drm_queue *q; @@ -1911,7 +1957,6 @@ intel_sequence_to_crtc_msc(xf86CrtcPtr crtc, uint32_t sequence) { struct intel_crtc *intel_crtc = crtc->driver_private; - sequence += intel_crtc->vblank_offset; if ((int32_t) (sequence - intel_crtc->msc_prev) < -0x40000000) intel_crtc->msc_high += 0x100000000L; intel_crtc->msc_prev = sequence; @@ -1935,37 +1980,10 @@ intel_get_crtc_msc_ust(ScrnInfoPtr scrn, xf86CrtcPtr crtc, uint64_t *msc, uint64 return 0; } -/* - * Convert a 64-bit adjusted MSC value into a 32-bit kernel sequence number, - * removing the high 32 bits and subtracting out the vblank_offset term. - * - * This also updates the vblank_offset when it notices that the value should - * change. - */ - -#define MAX_VBLANK_OFFSET 1000 - uint32_t intel_crtc_msc_to_sequence(ScrnInfoPtr scrn, xf86CrtcPtr crtc, uint64_t expect) { - struct intel_crtc *intel_crtc = crtc->driver_private; - uint64_t msc, ust; - - if (intel_get_crtc_msc_ust(scrn, crtc, &msc, &ust) == 0) { - int64_t diff = expect - msc; - - /* We're way off here, assume that the kernel has lost its mind - * and smack the vblank back to something sensible - */ - if (diff < -MAX_VBLANK_OFFSET || diff > MAX_VBLANK_OFFSET) { - intel_crtc->vblank_offset += (int32_t) diff; - if (intel_crtc->vblank_offset > -MAX_VBLANK_OFFSET && - intel_crtc->vblank_offset < MAX_VBLANK_OFFSET) - intel_crtc->vblank_offset = 0; - } - } - - return (uint32_t) (expect - intel_crtc->vblank_offset); + return (uint32_t)expect; } /* @@ -1998,14 +2016,13 @@ intel_drm_handler(int fd, uint32_t frame, uint32_t sec, uint32_t usec, void *use static void intel_pageflip_complete(struct intel_mode *mode) { - /* Release framebuffer */ - drmModeRmFB(mode->fd, mode->old_fb_id); - - if (!mode->pageflip_handler) + if (!mode->pageflip.handler) return; - mode->pageflip_handler(mode->fe_msc, mode->fe_usec, - mode->pageflip_data); + /* Release framebuffer */ + drmModeRmFB(mode->fd, mode->old_fb_id); + mode->pageflip.handler(mode->fe_msc, mode->fe_usec, + mode->pageflip.data); } /* @@ -2045,6 +2062,7 @@ intel_pageflip_handler(ScrnInfoPtr scrn, xf86CrtcPtr crtc, if (!mode) return; + intel_pageflip_complete(mode); } @@ -2060,18 +2078,18 @@ intel_pageflip_abort(ScrnInfoPtr scrn, xf86CrtcPtr crtc, void *data) if (!mode) return; - /* Release framebuffer */ - drmModeRmFB(mode->fd, mode->old_fb_id); - - if (!mode->pageflip_abort) + if (!mode->pageflip.abort) return; - mode->pageflip_abort(mode->pageflip_data); + /* Release framebuffer */ + drmModeRmFB(mode->fd, mode->old_fb_id); + mode->pageflip.abort(mode->pageflip.data); } /* * Check for pending DRM events and process them. */ +#if !HAVE_NOTIFY_FD static void drm_wakeup_handler(pointer data, int err, pointer p) { @@ -2086,6 +2104,14 @@ drm_wakeup_handler(pointer data, int err, pointer p) if (FD_ISSET(mode->fd, read_mask)) drmHandleEvent(mode->fd, &mode->event_context); } +#else +static void +drm_notify_fd(int fd, int ready, void *data) +{ + struct intel_mode *mode = data; + drmHandleEvent(mode->fd, &mode->event_context); +} +#endif /* * If there are any available, read drm_events @@ -2231,10 +2257,6 @@ Bool intel_mode_pre_init(ScrnInfoPtr scrn, int fd, int cpp) intel->use_pageflipping = TRUE; } - if (xf86ReturnOptValBool(intel->Options, OPTION_DELETE_DP12, FALSE)) { - mode->delete_dp_12_displays = TRUE; - } - intel->modes = mode; drmModeFreeResources(mode_res); return TRUE; @@ -2250,9 +2272,11 @@ intel_mode_init(struct intel_screen_private *intel) * registration within ScreenInit and not PreInit. */ mode->flip_count = 0; - AddGeneralSocket(mode->fd); + SetNotifyFd(mode->fd, drm_notify_fd, X_NOTIFY_READ, mode); +#if !HAVE_NOTIFY_FD RegisterBlockAndWakeupHandlers((BlockHandlerProcPtr)NoopDDA, drm_wakeup_handler, mode); +#endif } void @@ -2276,9 +2300,11 @@ intel_mode_close(intel_screen_private *intel) intel_drm_abort_scrn(intel->scrn); +#if !HAVE_NOTIFY_FD RemoveBlockAndWakeupHandlers((BlockHandlerProcPtr)NoopDDA, drm_wakeup_handler, mode); - RemoveGeneralSocket(mode->fd); +#endif + RemoveNotifyFd(mode->fd); } void @@ -2498,12 +2524,11 @@ intel_mode_hotplug(struct intel_screen_private *intel) int i, j; Bool found; Bool changed = FALSE; - struct intel_mode *mode = intel->modes; + mode_res = drmModeGetResources(intel->drmSubFD); if (!mode_res) goto out; -restart_destroy: for (i = 0; i < config->num_output; i++) { xf86OutputPtr output = config->output[i]; struct intel_output *intel_output; @@ -2522,13 +2547,9 @@ restart_destroy: drmModeFreeConnector(intel_output->mode_output); intel_output->mode_output = NULL; intel_output->output_id = -1; + RROutputChanged(output->randr_output, TRUE); changed = TRUE; - if (mode->delete_dp_12_displays) { - RROutputDestroy(output->randr_output); - xf86OutputDestroy(output); - goto restart_destroy; - } } /* find new output ids we don't have outputs for */ @@ -2552,10 +2573,8 @@ restart_destroy: intel_output_init(scrn, intel->modes, mode_res, i, 1); } - if (changed) { - RRSetChanged(xf86ScrnToScreen(scrn)); + if (changed) RRTellChanged(xf86ScrnToScreen(scrn)); - } drmModeFreeResources(mode_res); out: diff --git a/src/uxa/intel_dri.c b/src/uxa/intel_dri.c index f61c6210..524826d2 100644 --- a/src/uxa/intel_dri.c +++ b/src/uxa/intel_dri.c @@ -81,6 +81,47 @@ static DevPrivateKeyRec i830_client_key; static int i830_client_key; #endif +static void I830DRI2FlipEventHandler(unsigned int frame, + unsigned int tv_sec, + unsigned int tv_usec, + DRI2FrameEventPtr flip_info); + +static void I830DRI2FrameEventHandler(unsigned int frame, + unsigned int tv_sec, + unsigned int tv_usec, + DRI2FrameEventPtr swap_info); + +static void +i830_dri2_del_frame_event(DRI2FrameEventPtr info); + +static uint32_t pipe_select(int pipe) +{ + if (pipe > 1) + return pipe << DRM_VBLANK_HIGH_CRTC_SHIFT; + else if (pipe > 0) + return DRM_VBLANK_SECONDARY; + else + return 0; +} + +static void +intel_dri2_vblank_handler(ScrnInfoPtr scrn, + xf86CrtcPtr crtc, + uint64_t msc, + uint64_t usec, + void *data) +{ + I830DRI2FrameEventHandler((uint32_t) msc, usec / 1000000, usec % 1000000, data); +} + +static void +intel_dri2_vblank_abort(ScrnInfoPtr scrn, + xf86CrtcPtr crtc, + void *data) +{ + i830_dri2_del_frame_event(data); +} + static uint32_t pixmap_flink(PixmapPtr pixmap) { struct intel_uxa_pixmap *priv = intel_uxa_get_pixmap_private(pixmap); @@ -135,9 +176,6 @@ I830DRI2CreateBuffers(DrawablePtr drawable, unsigned int *attachments, pixmap = NULL; if (attachments[i] == DRI2BufferFrontLeft) { pixmap = get_front_buffer(drawable); - - if (pixmap == NULL) - drawable = &(get_drawable_pixmap(drawable)->drawable); } else if (attachments[i] == DRI2BufferStencil && pDepthPixmap) { pixmap = pDepthPixmap; pixmap->refcnt++; @@ -246,11 +284,8 @@ I830DRI2CreateBuffer(DrawablePtr drawable, unsigned int attachment, } pixmap = NULL; - if (attachment == DRI2BufferFrontLeft) { + if (attachment == DRI2BufferFrontLeft) pixmap = get_front_buffer(drawable); - if (pixmap == NULL) - drawable = &(get_drawable_pixmap(drawable)->drawable); - } if (pixmap == NULL) { unsigned int hint = INTEL_CREATE_PIXMAP_DRI2; @@ -673,6 +708,20 @@ i830_dri2_del_frame_event(DRI2FrameEventPtr info) if (info->back) I830DRI2DestroyBuffer(NULL, info->back); + if (info->old_buffer) { + /* Check that the old buffer still matches the front buffer + * in case a mode change occurred before we woke up. + */ + if (info->intel->back_buffer == NULL && + info->old_width == info->intel->scrn->virtualX && + info->old_height == info->intel->scrn->virtualY && + info->old_pitch == info->intel->front_pitch && + info->old_tiling == info->intel->front_tiling) + info->intel->back_buffer = info->old_buffer; + else + dri_bo_unreference(info->old_buffer); + } + free(info); } @@ -708,16 +757,14 @@ static void I830DRI2ExchangeBuffers(struct intel_screen_private *intel, DRI2BufferPtr front, DRI2BufferPtr back) { I830DRI2BufferPrivatePtr front_priv, back_priv; - int tmp; struct intel_uxa_pixmap *new_front; front_priv = front->driverPrivate; back_priv = back->driverPrivate; /* Swap BO names so DRI works */ - tmp = front->name; front->name = back->name; - back->name = tmp; + back->name = pixmap_flink(front_priv->pixmap); /* Swap pixmap bos */ new_front = intel_exchange_pixmap_buffers(intel, @@ -753,87 +800,30 @@ I830DRI2FlipAbort(void *pageflip_data) i830_dri2_del_frame_event(info); } -/* - * Our internal swap routine takes care of actually exchanging, blitting, or - * flipping buffers as necessary. - */ static Bool -I830DRI2ScheduleFlip(struct intel_screen_private *intel, - DrawablePtr draw, - DRI2FrameEventPtr info) +allocate_back_buffer(struct intel_screen_private *intel) { - I830DRI2BufferPrivatePtr priv = info->back->driverPrivate; - drm_intel_bo *new_back, *old_back; - int tmp_name; - - if (!intel->use_triple_buffer) { - info->type = DRI2_SWAP; - if (!intel_do_pageflip(intel, - get_pixmap_bo(priv), - info->pipe, FALSE, info, - I830DRI2FlipComplete, - I830DRI2FlipAbort)) - return FALSE; - - I830DRI2ExchangeBuffers(intel, info->front, info->back); - return TRUE; - } + drm_intel_bo *bo; + int pitch; + uint32_t tiling; - if (intel->pending_flip[info->pipe]) { - assert(intel->pending_flip[info->pipe]->chain == NULL); - intel->pending_flip[info->pipe]->chain = info; + if (intel->back_buffer) return TRUE; - } - if (intel->back_buffer == NULL) { - new_back = drm_intel_bo_alloc(intel->bufmgr, "front buffer", - intel->front_buffer->size, 0); - if (new_back == NULL) - return FALSE; - - if (intel->front_tiling != I915_TILING_NONE) { - uint32_t tiling = intel->front_tiling; - drm_intel_bo_set_tiling(new_back, &tiling, intel->front_pitch); - if (tiling != intel->front_tiling) { - drm_intel_bo_unreference(new_back); - return FALSE; - } - } - - drm_intel_bo_disable_reuse(new_back); - dri_bo_flink(new_back, &intel->back_name); - } else { - new_back = intel->back_buffer; - intel->back_buffer = NULL; - } + bo = intel_allocate_framebuffer(intel->scrn, + intel->scrn->virtualX, + intel->scrn->virtualY, + intel->cpp, + &pitch, &tiling); + if (bo == NULL) + return FALSE; - old_back = get_pixmap_bo(priv); - if (!intel_do_pageflip(intel, old_back, info->pipe, FALSE, info, I830DRI2FlipComplete, I830DRI2FlipAbort)) { - intel->back_buffer = new_back; + if (pitch != intel->front_pitch || tiling != intel->front_tiling) { + drm_intel_bo_unreference(bo); return FALSE; } - info->type = DRI2_SWAP_CHAIN; - intel->pending_flip[info->pipe] = info; - - priv = info->front->driverPrivate; - - /* Exchange the current front-buffer with the fresh bo */ - - intel->back_buffer = intel->front_buffer; - drm_intel_bo_reference(intel->back_buffer); - intel_set_pixmap_bo(priv->pixmap, new_back); - drm_intel_bo_unreference(new_back); - - tmp_name = info->front->name; - info->front->name = intel->back_name; - intel->back_name = tmp_name; - /* Then flip DRI2 pointers and update the screen pixmap */ - I830DRI2ExchangeBuffers(intel, info->front, info->back); - DRI2SwapComplete(info->client, draw, 0, 0, 0, - DRI2_EXCHANGE_COMPLETE, - info->event_complete, - info->event_data); + intel->back_buffer = bo; return TRUE; } @@ -889,8 +879,88 @@ can_exchange(DrawablePtr drawable, DRI2BufferPtr front, DRI2BufferPtr back) return TRUE; } -void I830DRI2FrameEventHandler(unsigned int frame, unsigned int tv_sec, - unsigned int tv_usec, DRI2FrameEventPtr swap_info) +static Bool +queue_flip(struct intel_screen_private *intel, + DrawablePtr draw, + DRI2FrameEventPtr info) +{ + xf86CrtcPtr crtc = I830DRI2DrawableCrtc(draw); + I830DRI2BufferPrivatePtr priv = info->back->driverPrivate; + drm_intel_bo *old_back = get_pixmap_bo(priv); + + if (crtc == NULL) + return FALSE; + + if (!can_exchange(draw, info->front, info->back)) + return FALSE; + + if (!intel_do_pageflip(intel, old_back, + intel_crtc_to_pipe(crtc), + FALSE, info, + I830DRI2FlipComplete, I830DRI2FlipAbort)) + return FALSE; + +#if DRI2INFOREC_VERSION >= 6 + if (intel->use_triple_buffer && allocate_back_buffer(intel)) { + info->old_width = intel->scrn->virtualX; + info->old_height = intel->scrn->virtualY; + info->old_pitch = intel->front_pitch; + info->old_tiling = intel->front_tiling; + info->old_buffer = intel->front_buffer; + dri_bo_reference(info->old_buffer); + + priv = info->front->driverPrivate; + intel_set_pixmap_bo(priv->pixmap, intel->back_buffer); + + dri_bo_unreference(intel->back_buffer); + intel->back_buffer = NULL; + + DRI2SwapLimit(draw, 2); + } else + DRI2SwapLimit(draw, 1); +#endif + + /* Then flip DRI2 pointers and update the screen pixmap */ + I830DRI2ExchangeBuffers(intel, info->front, info->back); + return TRUE; +} + +static Bool +queue_swap(struct intel_screen_private *intel, + DrawablePtr draw, + DRI2FrameEventPtr info) +{ + xf86CrtcPtr crtc = I830DRI2DrawableCrtc(draw); + drmVBlank vbl; + + if (crtc == NULL) + return FALSE; + + vbl.request.type = + DRM_VBLANK_RELATIVE | + DRM_VBLANK_EVENT | + pipe_select(intel_crtc_to_pipe(crtc)); + vbl.request.sequence = 1; + vbl.request.signal = + intel_drm_queue_alloc(intel->scrn, crtc, info, + intel_dri2_vblank_handler, + intel_dri2_vblank_abort); + if (vbl.request.signal == 0) + return FALSE; + + info->type = DRI2_SWAP; + if (drmWaitVBlank(intel->drmSubFD, &vbl)) { + intel_drm_abort_seq(intel->scrn, vbl.request.signal); + return FALSE; + } + + return TRUE; +} + +static void I830DRI2FrameEventHandler(unsigned int frame, + unsigned int tv_sec, + unsigned int tv_usec, + DRI2FrameEventPtr swap_info) { intel_screen_private *intel = swap_info->intel; DrawablePtr drawable; @@ -906,24 +976,22 @@ void I830DRI2FrameEventHandler(unsigned int frame, unsigned int tv_sec, return; } - switch (swap_info->type) { case DRI2_FLIP: /* If we can still flip... */ - if (can_exchange(drawable, swap_info->front, swap_info->back) && - I830DRI2ScheduleFlip(intel, drawable, swap_info)) - return; - - /* else fall through to exchange/blit */ - case DRI2_SWAP: { - I830DRI2FallbackBlitSwap(drawable, - swap_info->front, swap_info->back); - DRI2SwapComplete(swap_info->client, drawable, frame, tv_sec, tv_usec, - DRI2_BLIT_COMPLETE, - swap_info->client ? swap_info->event_complete : NULL, - swap_info->event_data); - break; - } + if (!queue_flip(intel, drawable, swap_info) && + !queue_swap(intel, drawable, swap_info)) { + case DRI2_SWAP: + I830DRI2FallbackBlitSwap(drawable, + swap_info->front, swap_info->back); + DRI2SwapComplete(swap_info->client, drawable, frame, tv_sec, tv_usec, + DRI2_BLIT_COMPLETE, + swap_info->client ? swap_info->event_complete : NULL, + swap_info->event_data); + break; + } + return; + case DRI2_WAITMSC: if (swap_info->client) DRI2WaitMSCComplete(swap_info->client, drawable, @@ -939,12 +1007,13 @@ void I830DRI2FrameEventHandler(unsigned int frame, unsigned int tv_sec, i830_dri2_del_frame_event(swap_info); } -void I830DRI2FlipEventHandler(unsigned int frame, unsigned int tv_sec, - unsigned int tv_usec, DRI2FrameEventPtr flip_info) +static void I830DRI2FlipEventHandler(unsigned int frame, + unsigned int tv_sec, + unsigned int tv_usec,