diff options
author | Natanael Copa <ncopa@alpinelinux.org> | 2017-04-04 11:46:24 +0000 |
---|---|---|
committer | Natanael Copa <ncopa@alpinelinux.org> | 2017-04-04 13:23:04 +0000 |
commit | 3a329ef78935857e218db6e81db502df98759feb (patch) | |
tree | ae898e99ff821b0e33eadbc0ffc9e3ef5dcbefb5 /main/xf86-video-intel | |
parent | 8d3e4978f411d125eda59125dab752a4d11aacdf (diff) | |
download | aports-3a329ef78935857e218db6e81db502df98759feb.tar.bz2 aports-3a329ef78935857e218db6e81db502df98759feb.tar.xz |
main/xf86-video-intel: upgrade to git snapshot
Diffstat (limited to 'main/xf86-video-intel')
-rw-r--r-- | main/xf86-video-intel/APKBUILD | 48 | ||||
-rw-r--r-- | main/xf86-video-intel/O_CLOEXEC.patch | 10 | ||||
-rw-r--r-- | main/xf86-video-intel/gcc5-workaround.patch | 22 | ||||
-rw-r--r-- | main/xf86-video-intel/git.patch | 43164 | ||||
-rw-r--r-- | main/xf86-video-intel/xf86-video-intel-2.99.917-libdrm-kernel-4_0-crash.patch | 65 | ||||
-rw-r--r-- | main/xf86-video-intel/xorg-1.18.patch | 24 |
6 files changed, 43180 insertions, 153 deletions
diff --git a/main/xf86-video-intel/APKBUILD b/main/xf86-video-intel/APKBUILD index 183bc3a433..154a763c6f 100644 --- a/main/xf86-video-intel/APKBUILD +++ b/main/xf86-video-intel/APKBUILD @@ -1,36 +1,32 @@ # Maintainer: Natanael Copa <ncopa@alpinelinux.org> pkgname=xf86-video-intel -pkgver=2.99.917 -pkgrel=4 +pkgver=2.99.917_git20170325 +pkgrel=0 pkgdesc="X.Org driver for Intel cards" url="http://xorg.freedesktop.org/" arch="x86 x86_64" -license="custom" +license="MIT" subpackages="$pkgname-doc" depends="mesa-dri-intel" makedepends="xorg-server-dev libxi-dev fontsproto randrproto videoproto renderproto glproto xineramaproto libdrm-dev xf86driproto - mesa-dev libxvmc-dev xcb-util-dev eudev-dev" + mesa-dev libxvmc-dev xcb-util-dev eudev-dev + util-macros autoconf automake libtool" -source="http://xorg.freedesktop.org/releases/individual/driver/$pkgname-$pkgver.tar.bz2 - gcc5-workaround.patch - xf86-video-intel-2.99.917-libdrm-kernel-4_0-crash.patch - O_CLOEXEC.patch - xorg-1.18.patch +_ver=${pkgver%_git*} +source="http://xorg.freedesktop.org/releases/individual/driver/$pkgname-$_ver.tar.bz2 + git.patch " -_builddir="$srcdir"/$pkgname-$pkgver +builddir="$srcdir"/$pkgname-$_ver prepare() { - cd "$_builddir" - for i in $source; do - case $i in - *.patch) msg $i; patch -p1 -i "$srcdir"/$i || return 1;; - esac - done + cd "$builddir" + default_prepare + autoreconf -vif } build() { - cd "$srcdir"/$pkgname-$pkgver + cd "$builddir" export LDFLAGS="$LDFLAGS -Wl,-z,lazy" ./configure \ --build=$CBUILD \ @@ -38,30 +34,18 @@ build() { --prefix=/usr \ --enable-xvmc \ --disable-selective-werror \ + --with-default-dri=3 \ || return 1 make || return 1 } package() { - cd "$srcdir"/$pkgname-$pkgver + cd "$builddir" make DESTDIR="$pkgdir" install || return 1 install -Dm644 COPYING "$pkgdir"/usr/share/licenses/$pkgname/COPYING # http://bugs.alpinelinux.org/issues/3312 chmod o-x "$pkgdir"/usr/libexec/xf86-video-intel-backlight-helper } -md5sums="fa196a66e52c0c624fe5d350af7a5e7b xf86-video-intel-2.99.917.tar.bz2 -2e9c5ee749f0a255d2b10ce18b3512fa gcc5-workaround.patch -2fa815b66eb6896b3962074731b0b4bb xf86-video-intel-2.99.917-libdrm-kernel-4_0-crash.patch -d5c410d504c58aa641658a19e4950ea5 O_CLOEXEC.patch -d64095af23cf26c3559bba6f739e371e xorg-1.18.patch" -sha256sums="00b781eea055582820a123c47b62411bdf6aabf4f03dc0568faec55faf9667c9 xf86-video-intel-2.99.917.tar.bz2 -55367cd8dbe58d1097e2cf6cee11895acadc1a5ef527b8d39361e3975a6943e5 gcc5-workaround.patch -54298cb4a59016be0451e3ea72b2c2c6b2a97cb9ec2c8f45d62c12447d14b361 xf86-video-intel-2.99.917-libdrm-kernel-4_0-crash.patch -79f6c0bf8eb56d631857a0064e6c4ba1582acfb12c467f29c211e4fc4e628b98 O_CLOEXEC.patch -f45ad7f6d8a78f282faf283057cc7fa82d2b1c99979dba563c1a734a25e4fb7a xorg-1.18.patch" sha512sums="cbf4d46ad1ad5e5587c0f1f620ff534ef0645270517b60056b9f03e83d8216e2f456de46352a06c37c0c46963cc4ed20b71b815b20ec1bf680ff046e535f580f xf86-video-intel-2.99.917.tar.bz2 -b208508d229f53f18cf3aa8de2c3637964d8b22f8a615fc4759a2bb58cbe9db4dca7a79129a7b59fd138980c90bdcaf1aec142e1f13954c4cf25a817a2125998 gcc5-workaround.patch -003fc22a9446cdfcb8d51cbface096187f93a0c54b024ee34b160ca41a491c35e8b387caabc3c3f6411b93663c5119f48dc2adae0d76878723c02483306972ac xf86-video-intel-2.99.917-libdrm-kernel-4_0-crash.patch -1054d8e4f314b061209d74d05037abefec64ab0c4a1efcf82e512ea8db9022c56cf7891ca4ed08af88f560e986ea0e726144f5bde11212e938cc741c40da5348 O_CLOEXEC.patch -f9c22684d50e4bd567efcb38f93b40b3bad2d56d8c7ae96bf768064dffbff1f9147e40ae415a7bbef61d8f3140ef6746a372059cea11861bc61b4b94acfa91a4 xorg-1.18.patch" +0fe4e455dcbc4ae6622dca483ef3ddc765c43009fdb0fef82bdaa835a737796a6caf8afa9c6630919f43c977a6f736770c3779f04d8c823da4fc9cee17d16f19 git.patch" diff --git a/main/xf86-video-intel/O_CLOEXEC.patch b/main/xf86-video-intel/O_CLOEXEC.patch deleted file mode 100644 index 9dbe933588..0000000000 --- a/main/xf86-video-intel/O_CLOEXEC.patch +++ /dev/null @@ -1,10 +0,0 @@ ---- ./src/sna/kgem.c.orig -+++ ./src/sna/kgem.c -@@ -37,6 +37,7 @@ - #include <sys/mman.h> - #include <time.h> - #include <errno.h> -+#define __USE_GNU - #include <fcntl.h> - - #include <xf86drm.h> diff --git a/main/xf86-video-intel/gcc5-workaround.patch b/main/xf86-video-intel/gcc5-workaround.patch deleted file mode 100644 index dd832546e4..0000000000 --- a/main/xf86-video-intel/gcc5-workaround.patch +++ /dev/null @@ -1,22 +0,0 @@ ---- ./src/sna/compiler.h.orig -+++ ./src/sna/compiler.h -@@ -65,16 +65,14 @@ - #define avx2 __attribute__((target("avx2,sse4.2,sse2,fpmath=sse"))) - #endif - --#if HAS_GCC(4, 6) && defined(__OPTIMIZE__) -+#if HAS_GCC(4, 6) && !HAS_GCC(5,0) && defined(__OPTIMIZE__) - #define fast __attribute__((optimize("Ofast"))) - #else - #define fast - #endif - --#if HAS_GCC(4, 6) && defined(__OPTIMIZE__) --#define fast_memcpy __attribute__((optimize("Ofast"))) __attribute__((target("inline-all-stringops"))) --#elif HAS_GCC(4, 5) && defined(__OPTIMIZE__) --#define fast_memcpy __attribute__((target("inline-all-stringops"))) -+#if HAS_GCC(4, 5) && defined(__OPTIMIZE__) -+#define fast_memcpy fast __attribute__((target("inline-all-stringops"))) - #else - #define fast_memcpy - #endif diff --git a/main/xf86-video-intel/git.patch b/main/xf86-video-intel/git.patch new file mode 100644 index 0000000000..20084425a4 --- /dev/null +++ b/main/xf86-video-intel/git.patch @@ -0,0 +1,43164 @@ +diff --git a/Makefile.am b/Makefile.am +index 418fdc92..de5fbe12 100644 +--- a/Makefile.am ++++ b/Makefile.am +@@ -18,14 +18,16 @@ + # IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +-ACLOCAL_AMFLAGS = ${ACLOCAL_FLAGS} -I m4 ++#Having problems passing through user flags as libtool complains ++#ACLOCAL_AMFLAGS = ${ACLOCAL_FLAGS} -I m4 ++ACLOCAL_AMFLAGS = -I m4 + + SUBDIRS = man libobj xvmc src tools + + MAINTAINERCLEANFILES = ChangeLog INSTALL + + if HAVE_X11 +-SUBDIRS += test ++SUBDIRS += test benchmarks + endif + + .PHONY: ChangeLog INSTALL +diff --git a/NEWS b/NEWS +index 604b9cce..0e200332 100644 +--- a/NEWS ++++ b/NEWS +@@ -21,7 +21,7 @@ should make one more snapshot before an imminent release. + Before kernel 3.19, O_NONBLOCK support is broken and so we must avoid + reading if we are not expecting an event. + +- * Backwards compatibilty fix for fake triple buffering with PRIME and ++ * Backwards compatibility fix for fake triple buffering with PRIME and + Xorg-1.15 + https://bugs.freedesktop.org/show_bug.cgi?id=85144#c12 + +@@ -51,7 +51,7 @@ should make one more snapshot before an imminent release. + Snapshot 2.99.916 (2014-09-08) + ============================== + Quick update for MST in UXA - we need to hook up the RandR outputs for +-dynamicaly added connectors. ++dynamically added connectors. + + + Snapshot 2.99.915 (2014-09-08) +@@ -503,7 +503,7 @@ release. + backlight property is queried whilst the connector is disabled + https://bugs.freedesktop.org/show_bug.cgi?id=70406 + +- * Pad GETCONNECTOR ioctl for compatability between 32/64-bit userspace ++ * Pad GETCONNECTOR ioctl for compatibility between 32/64-bit userspace + and kernel + + * Handle long glyph runs correctly +@@ -523,7 +523,7 @@ snapshot beforehand to push out the bug fixes from the last week. + + * Fix video output using sprites when changing the image size + +- * Apply more restrictive tile constaints for 915g class devices ++ * Apply more restrictive tile constraints for 915g class devices + https://bugs.launchpad.net/ubuntu/+source/xserver-xorg-video-intel/+bug/1232546 + + * Ensure all overlapping rectangles are drawn for XRenderFillRectangles +@@ -1132,7 +1132,7 @@ operation. + * Explicitly prevent ring-switching for synchronized rendering to + scanouts (for vsync). + +- * Clip dirty region to slave pixmaps (otherwise UDL is nigh unusuable) ++ * Clip dirty region to slave pixmaps (otherwise UDL is nigh unusable) + https://bugs.freedesktop.org/show_bug.cgi?id=59539 + + +@@ -1226,7 +1226,7 @@ Release 2.20.15 (2012-12-03) + ============================ + And lo, enabling more of the common acceleration paths for gen4 revealed + another lurking bug - something is wrong with how we prepare Y-tiling +-surfaces for rendering. For the time being, we can surreptiously disable ++surfaces for rendering. For the time being, we can surreptitiously disable + them for gen4 and avoid hitting GPU hangs. + + * Avoid clobbering the render state after failing to convert the +@@ -1515,7 +1515,7 @@ Release 2.20.5 (2012-08-26) + Another silly bug found, another small bugfix release. The goal was for + the driver to bind to all Intel devices supported by the kernel. + Unfortunately we were too successful and started claiming Pouslbo, +-Medfield and Cedarview devices which are still encumbered by propietary ++Medfield and Cedarview devices which are still encumbered by proprietary + IP and not supported by this driver. + + Bugs fixed since 2.20.4: +diff --git a/README b/README +index cf4d88d8..348983b4 100644 +--- a/README ++++ b/README +@@ -15,9 +15,9 @@ Intel graphics chipsets including: + G/Q33,G/Q35,G41,G/Q43,G/GM/Q45 + PineView-M (Atom N400 series) + PineView-D (Atom D400/D500 series) +- Intel(R) HD Graphics: 2000-6000, +- Intel(R) Iris(TM) Graphics: 5100/6100, and +- Intel(R) Iris(TM) Pro Graphics: 5200/6200/P6300. ++ Intel(R) HD Graphics, ++ Intel(R) Iris(TM) Graphics, ++ Intel(R) Iris(TM) Pro Graphics. + + Where to get more information about the driver + ---------------------------------------------- +diff --git a/benchmarks/.gitignore b/benchmarks/.gitignore +new file mode 100644 +index 00000000..301c0129 +--- /dev/null ++++ b/benchmarks/.gitignore +@@ -0,0 +1,2 @@ ++dri2-swap ++dri3-swap +diff --git a/benchmarks/Makefile.am b/benchmarks/Makefile.am +new file mode 100644 +index 00000000..4976e8a3 +--- /dev/null ++++ b/benchmarks/Makefile.am +@@ -0,0 +1,14 @@ ++AM_CFLAGS = @CWARNFLAGS@ $(X11_CFLAGS) $(DRM_CFLAGS) ++LDADD = $(X11_LIBS) $(DRM_LIBS) $(CLOCK_GETTIME_LIBS) ++ ++check_PROGRAMS = ++ ++if DRI2 ++check_PROGRAMS += dri2-swap ++endif ++ ++if DRI3 ++check_PROGRAMS += dri3-swap ++AM_CFLAGS += $(X11_DRI3_CFLAGS) ++LDADD += $(X11_DRI3_LIBS) ++endif +diff --git a/benchmarks/dri2-swap.c b/benchmarks/dri2-swap.c +new file mode 100644 +index 00000000..3d9d30aa +--- /dev/null ++++ b/benchmarks/dri2-swap.c +@@ -0,0 +1,588 @@ ++/* ++ * Copyright (c) 2015 Intel Corporation ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining a ++ * copy of this software and associated documentation files (the "Software"), ++ * to deal in the Software without restriction, including without limitation ++ * the rights to use, copy, modify, merge, publish, distribute, sublicense, ++ * and/or sell copies of the Software, and to permit persons to whom the ++ * Software is furnished to do so, subject to the following conditions: ++ * ++ * The above copyright notice and this permission notice (including the next ++ * paragraph) shall be included in all copies or substantial portions of the ++ * Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ++ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, ++ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE ++ * SOFTWARE. ++ * ++ */ ++ ++#ifdef HAVE_CONFIG_H ++#include "config.h" ++#endif ++ ++#include <X11/Xlib.h> ++#include <X11/Xatom.h> ++#include <X11/Xlib-xcb.h> ++#include <X11/Xutil.h> ++#include <X11/Xlibint.h> ++#include <X11/extensions/dpms.h> ++#include <X11/extensions/randr.h> ++#include <X11/extensions/Xcomposite.h> ++#include <X11/extensions/Xdamage.h> ++#include <X11/extensions/Xrandr.h> ++#include <xcb/xcb.h> ++#include <xcb/dri2.h> ++#include <xf86drm.h> ++ ++#include <stdio.h> ++#include <string.h> ++#include <fcntl.h> ++#include <unistd.h> ++#include <assert.h> ++#include <errno.h> ++#include <setjmp.h> ++#include <signal.h> ++ ++#include <X11/Xlibint.h> ++#include <X11/extensions/Xext.h> ++#include <X11/extensions/extutil.h> ++#include <X11/extensions/dri2proto.h> ++#include <X11/extensions/dri2tokens.h> ++#include <X11/extensions/Xfixes.h> ++ ++static char dri2ExtensionName[] = DRI2_NAME; ++static XExtensionInfo *dri2Info; ++static XEXT_GENERATE_CLOSE_DISPLAY (DRI2CloseDisplay, dri2Info) ++ ++static Bool ++DRI2WireToEvent(Display *dpy, XEvent *event, xEvent *wire); ++static Status ++DRI2EventToWire(Display *dpy, XEvent *event, xEvent *wire); ++static int ++DRI2Error(Display *display, xError *err, XExtCodes *codes, int *ret_code); ++ ++static /* const */ XExtensionHooks dri2ExtensionHooks = { ++ NULL, /* create_gc */ ++ NULL, /* copy_gc */ ++ NULL, /* flush_gc */ ++ NULL, /* free_gc */ ++ NULL, /* create_font */ ++ NULL, /* free_font */ ++ DRI2CloseDisplay, /* close_display */ ++ DRI2WireToEvent, /* wire_to_event */ ++ DRI2EventToWire, /* event_to_wire */ ++ DRI2Error, /* error */ ++ NULL, /* error_string */ ++}; ++ ++static XEXT_GENERATE_FIND_DISPLAY (DRI2FindDisplay, ++ dri2Info, ++ dri2ExtensionName, ++ &dri2ExtensionHooks, ++ 0, NULL) ++ ++static Bool ++DRI2WireToEvent(Display *dpy, XEvent *event, xEvent *wire) ++{ ++ XExtDisplayInfo *info = DRI2FindDisplay(dpy); ++ ++ XextCheckExtension(dpy, info, dri2ExtensionName, False); ++ ++ switch ((wire->u.u.type & 0x7f) - info->codes->first_event) { ++#ifdef X_DRI2SwapBuffers ++ case DRI2_BufferSwapComplete: ++ return False; ++#endif ++#ifdef DRI2_InvalidateBuffers ++ case DRI2_InvalidateBuffers: ++ return False; ++#endif ++ default: ++ /* client doesn't support server event */ ++ break; ++ } ++ ++ return False; ++} ++ ++/* We don't actually support this. It doesn't make sense for clients to ++ * send each other DRI2 events. ++ */ ++static Status ++DRI2EventToWire(Display *dpy, XEvent *event, xEvent *wire) ++{ ++ XExtDisplayInfo *info = DRI2FindDisplay(dpy); ++ ++ XextCheckExtension(dpy, info, dri2ExtensionName, False); ++ ++ switch (event->type) { ++ default: ++ /* client doesn't support server event */ ++ break; ++ } ++ ++ return Success; ++} ++ ++static int ++DRI2Error(Display *display, xError *err, XExtCodes *codes, int *ret_code) ++{ ++ if (err->majorCode == codes->major_opcode && ++ err->errorCode == BadDrawable && ++ err->minorCode == X_DRI2CopyRegion) ++ return True; ++ ++ /* If the X drawable was destroyed before the GLX drawable, the ++ * DRI2 drawble will be gone by the time we call ++ * DRI2DestroyDrawable. So just ignore BadDrawable here. */ ++ if (err->majorCode == codes->major_opcode && ++ err->errorCode == BadDrawable && ++ err->minorCode == X_DRI2DestroyDrawable) ++ return True; ++ ++ /* If the server is non-local DRI2Connect will raise BadRequest. ++ * Swallow this so that DRI2Connect can signal this in its return code */ ++ if (err->majorCode == codes->major_opcode && ++ err->minorCode == X_DRI2Connect && ++ err->errorCode == BadRequest) { ++ *ret_code = False; ++ return True; ++ } ++ ++ return False; ++} ++ ++static Bool ++DRI2QueryExtension(Display * dpy, int *eventBase, int *errorBase) ++{ ++ XExtDisplayInfo *info = DRI2FindDisplay(dpy); ++ ++ if (XextHasExtension(info)) { ++ *eventBase = info->codes->first_event; ++ *errorBase = info->codes->first_error; ++ return True; ++ } ++ ++ return False; ++} ++ ++static Bool ++DRI2Connect(Display * dpy, XID window, char **driverName, char **deviceName) ++{ ++ XExtDisplayInfo *info = DRI2FindDisplay(dpy); ++ xDRI2ConnectReply rep; ++ xDRI2ConnectReq *req; ++ ++ XextCheckExtension(dpy, info, dri2ExtensionName, False); ++ ++ LockDisplay(dpy); ++ GetReq(DRI2Connect, req); ++ req->reqType = info->codes->major_opcode; ++ req->dri2ReqType = X_DRI2Connect; ++ req->window = window; ++ req->driverType = DRI2DriverDRI; ++ if (!_XReply(dpy, (xReply *) & rep, 0, xFalse)) { ++ UnlockDisplay(dpy); ++ SyncHandle(); ++ return False; ++ } ++ ++ if (rep.driverNameLength == 0 && rep.deviceNameLength == 0) { ++ UnlockDisplay(dpy); ++ SyncHandle(); ++ return False; ++ } ++ ++ *driverName = Xmalloc(rep.driverNameLength + 1); ++ if (*driverName == NULL) { ++ _XEatData(dpy, ++ ((rep.driverNameLength + 3) & ~3) + ++ ((rep.deviceNameLength + 3) & ~3)); ++ UnlockDisplay(dpy); ++ SyncHandle(); ++ return False; ++ } ++ _XReadPad(dpy, *driverName, rep.driverNameLength); ++ (*driverName)[rep.driverNameLength] = '\0'; ++ ++ *deviceName = Xmalloc(rep.deviceNameLength + 1); ++ if (*deviceName == NULL) { ++ Xfree(*driverName); ++ _XEatData(dpy, ((rep.deviceNameLength + 3) & ~3)); ++ UnlockDisplay(dpy); ++ SyncHandle(); ++ return False; ++ } ++ _XReadPad(dpy, *deviceName, rep.deviceNameLength); ++ (*deviceName)[rep.deviceNameLength] = '\0'; ++ ++ UnlockDisplay(dpy); ++ SyncHandle(); ++ ++ return True; ++} ++ ++static Bool ++DRI2Authenticate(Display * dpy, XID window, unsigned int magic) ++{ ++ XExtDisplayInfo *info = DRI2FindDisplay(dpy); ++ xDRI2AuthenticateReq *req; ++ xDRI2AuthenticateReply rep; ++ ++ XextCheckExtension(dpy, info, dri2ExtensionName, False); ++ ++ LockDisplay(dpy); ++ GetReq(DRI2Authenticate, req); ++ req->reqType = info->codes->major_opcode; ++ req->dri2ReqType = X_DRI2Authenticate; ++ req->window = window; ++ req->magic = magic; ++ ++ if (!_XReply(dpy, (xReply *) & rep, 0, xFalse)) { ++ UnlockDisplay(dpy); ++ SyncHandle(); ++ return False; ++ } ++ ++ UnlockDisplay(dpy); ++ SyncHandle(); ++ ++ return rep.authenticated; ++} ++ ++static void ++DRI2CreateDrawable(Display * dpy, XID drawable) ++{ ++ XExtDisplayInfo *info = DRI2FindDisplay(dpy); ++ xDRI2CreateDrawableReq *req; ++ ++ XextSimpleCheckExtension(dpy, info, dri2ExtensionName); ++ ++ LockDisplay(dpy); ++ GetReq(DRI2CreateDrawable, req); ++ req->reqType = info->codes->major_opcode; ++ req->dri2ReqType = X_DRI2CreateDrawable; ++ req->drawable = drawable; ++ UnlockDisplay(dpy); ++ SyncHandle(); ++} ++ ++static void DRI2SwapInterval(Display *dpy, XID drawable, int interval) ++{ ++ XExtDisplayInfo *info = DRI2FindDisplay(dpy); ++ xDRI2SwapIntervalReq *req; ++ ++ XextSimpleCheckExtension (dpy, info, dri2ExtensionName); ++ ++ LockDisplay(dpy); ++ GetReq(DRI2SwapInterval, req); ++ req->reqType = info->codes->major_opcode; ++ req->dri2ReqType = X_DRI2SwapInterval; ++ req->drawable = drawable; ++ req->interval = interval; ++ UnlockDisplay(dpy); ++ SyncHandle(); ++} ++ ++static int _x_error_occurred; ++ ++static int ++_check_error_handler(Display *display, ++ XErrorEvent *event) ++{ ++ fprintf(stderr, ++ "X11 error from display %s, serial=%ld, error=%d, req=%d.%d\n", ++ DisplayString(display), ++ event->serial, ++ event->error_code, ++ event->request_code, ++ event->minor_code); ++ _x_error_occurred++; ++ return False; /* ignored */ ++} ++ ++static double elapsed(const struct timespec *start, ++ const struct timespec *end) ++{ ++ return 1e6*(end->tv_sec - start->tv_sec) + (end->tv_nsec - start->tv_nsec)/1000; ++} ++ ++static void run(Display *dpy, Window win) ++{ ++ xcb_connection_t *c = XGetXCBConnection(dpy); ++ struct timespec start, end; ++ int n, completed = 0; ++ ++ clock_gettime(CLOCK_MONOTONIC, &start); ++ do { ++ for (n = 0; n < 1000; n++) { ++ unsigned int attachments[] = { DRI2BufferBackLeft }; ++ unsigned int seq[2]; ++ ++ seq[0] = xcb_dri2_swap_buffers_unchecked(c, win, ++ 0, 0, 0, 0, 0, 0).sequence; ++ ++ ++ seq[1] = xcb_dri2_get_buffers_unchecked(c, win, ++ 1, 1, attachments).sequence; ++ ++ xcb_flush(c); ++ xcb_discard_reply(c, seq[0]); ++ xcb_discard_reply(c, seq[1]); ++ completed++; ++ } ++ clock_gettime(CLOCK_MONOTONIC, &end); ++ } while (end.tv_sec < start.tv_sec + 10); ++ ++ printf("%f\n", completed / (elapsed(&start, &end) / 1000000)); ++} ++ ++static inline XRRScreenResources *_XRRGetScreenResourcesCurrent(Display *dpy, Window window) ++{ ++ XRRScreenResources *res; ++ ++ res = XRRGetScreenResourcesCurrent(dpy, window); ++ if (res == NULL) ++ res = XRRGetScreenResources(dpy, window); ++ ++ return res; ++} ++ ++static XRRModeInfo *lookup_mode(XRRScreenResources *res, int id) ++{ ++ int i; ++ ++ for (i = 0; i < res->nmode; i++) { ++ if (res->modes[i].id == id) ++ return &res->modes[i]; ++ } ++ ++ return NULL; ++} ++ ++static int dri2_open(Display *dpy) ++{ ++ drm_auth_t auth; ++ char *driver, *device; ++ int fd; ++ ++ if (!DRI2QueryExtension(dpy, &fd, &fd)) ++ return -1; ++ ++ if (!DRI2Connect(dpy, DefaultRootWindow(dpy), &driver, &device)) ++ return -1; ++ ++ fd = open(device, O_RDWR); ++ if (fd < 0) ++ return -1; ++ ++ if (drmIoctl(fd, DRM_IOCTL_GET_MAGIC, &auth)) ++ return -1; ++ ++ if (!DRI2Authenticate(dpy, DefaultRootWindow(dpy), auth.magic)) ++ return -1; ++ ++ return fd; ++} ++ ++static void fullscreen(Display *dpy, Window win) ++{ ++ Atom atom = XInternAtom(dpy, "_NET_WM_STATE_FULLSCREEN", False); ++ XChangeProperty(dpy, win, ++ XInternAtom(dpy, "_NET_WM_STATE", False), ++ XA_ATOM, 32, PropModeReplace, ++ (unsigned char *)&atom, 1); ++} ++ ++static int has_composite(Display *dpy) ++{ ++ int event, error; ++ int major, minor; ++ ++ if (!XDamageQueryExtension (dpy, &event, &error)) ++ return 0; ++ ++ if (!XCompositeQueryExtension(dpy, &event, &error)) ++ return 0; ++ ++ XCompositeQueryVersion(dpy, &major, &minor); ++ ++ return major > 0 || minor >= 4; ++} ++ ++int main(int argc, char **argv) ++{ ++ Display *dpy; ++ Window root, win; ++ XRRScreenResources *res; ++ XRRCrtcInfo **original_crtc; ++ XSetWindowAttributes attr; ++ enum window { ROOT, FULLSCREEN, WINDOW } w = FULLSCREEN; ++ enum visible {REDIRECTED, NORMAL } v = NORMAL; ++ enum display { OFF, ON } d = OFF; ++ int width, height; ++ int i, fd; ++ int c; ++ ++ while ((c = getopt(argc, argv, "d:v:w:")) != -1) { ++ switch (c) { ++ case 'd': ++ if (strcmp(optarg, "off") == 0) ++ d = OFF; ++ else if (strcmp(optarg, "on") == 0) ++ d = ON; ++ else ++ abort(); ++ break; ++ ++ case 'v': ++ if (strcmp(optarg, "redirected") == 0) ++ v = REDIRECTED; ++ else if (strcmp(optarg, "normal") == 0) ++ v = NORMAL; ++ else ++ abort(); ++ break; ++ ++ case 'w': ++ if (strcmp(optarg, "fullscreen") == 0) ++ w = FULLSCREEN; ++ else if (strcmp(optarg, "window") == 0) ++ w = WINDOW; ++ else if (strcmp(optarg, "root") == 0) ++ w = ROOT; ++ else ++ abort(); ++ break; ++ } ++ } ++ ++ attr.override_redirect = 1; ++ ++ dpy = XOpenDisplay(NULL); ++ if (dpy == NULL) ++ return 77; ++ ++ width = DisplayWidth(dpy, DefaultScreen(dpy)); ++ height = DisplayHeight(dpy, DefaultScreen(dpy)); ++ ++ fd = dri2_open(dpy); ++ if (fd < 0) ++ return 77; ++ ++ if (DPMSQueryExtension(dpy, &i, &i)) ++ DPMSDisable(dpy); ++ ++ root = DefaultRootWindow(dpy); ++ ++ signal(SIGALRM, SIG_IGN); ++ XSetErrorHandler(_check_error_handler); ++ ++ res = NULL; ++ if (XRRQueryVersion(dpy, &i, &i)) ++ res = _XRRGetScreenResourcesCurrent(dpy, root); ++ if (res == NULL) ++ return 77; ++ ++ if (v == REDIRECTED && !has_composite(dpy)) ++ return 77; ++ ++ original_crtc = malloc(sizeof(XRRCrtcInfo *)*res->ncrtc); ++ for (i = 0; i < res->ncrtc; i++) ++ original_crtc[i] = XRRGetCrtcInfo(dpy, res, res->crtcs[i]); ++ ++ for (i = 0; i < res->ncrtc; i++) ++ XRRSetCrtcConfig(dpy, res, res->crtcs[i], CurrentTime, ++ 0, 0, None, RR_Rotate_0, NULL, 0); ++ ++ DRI2CreateDrawable(dpy, root); ++ DRI2SwapInterval(dpy, root, 0); ++ ++ if (d != OFF) { ++ for (i = 0; i < res->noutput; i++) { ++ XRROutputInfo *output; ++ XRRModeInfo *mode; ++ ++ output = XRRGetOutputInfo(dpy, res, res->outputs[i]); ++ if (output == NULL) ++ continue; ++ ++ mode = NULL; ++ if (res->nmode) ++ mode = lookup_mode(res, output->modes[0]); ++ if (mode == NULL) ++ continue; ++ ++ XRRSetCrtcConfig(dpy, res, output->crtcs[0], CurrentTime, ++ 0, 0, output->modes[0], RR_Rotate_0, &res->outputs[i], 1); ++ width = mode->width; ++ height = mode->height; ++ break; ++ } ++ if (i == res->noutput) { ++ _x_error_occurred = 77; ++ goto restore; ++ } ++ } ++ ++ if (w == ROOT) { ++ run(dpy, root); ++ } else if (w == FULLSCREEN) { ++ win = XCreateWindow(dpy, root, ++ 0, 0, width, height, 0, ++ DefaultDepth(dpy, DefaultScreen(dpy)), ++ InputOutput, ++ DefaultVisual(dpy, DefaultScreen(dpy)), ++ CWOverrideRedirect, &attr); ++ DRI2CreateDrawable(dpy, win); ++ DRI2SwapInterval(dpy, win, 0); ++ if (v == REDIRECTED) { ++ XCompositeRedirectWindow(dpy, win, CompositeRedirectManual); ++ XDamageCreate(dpy, win, XDamageReportRawRectangles); ++ } else ++ fullscreen(dpy, win); ++ XMapWindow(dpy, win); ++ run(dpy, win); ++ } else if (w == WINDOW) { ++ win = XCreateWindow(dpy, root, ++ 0, 0, width/2, height/2, 0, ++ DefaultDepth(dpy, DefaultScreen(dpy)), ++ InputOutput, ++ DefaultVisual(dpy, DefaultScreen(dpy)), ++ CWOverrideRedirect, &attr); ++ DRI2CreateDrawable(dpy, win); ++ DRI2SwapInterval(dpy, win, 0); ++ if (v == REDIRECTED) { ++ XCompositeRedirectWindow(dpy, win, CompositeRedirectManual); ++ XDamageCreate(dpy, win, XDamageReportRawRectangles); ++ } ++ XMapWindow(dpy, win); ++ run(dpy, win); ++ } ++ ++restore: ++ for (i = 0; i < res->ncrtc; i++) ++ XRRSetCrtcConfig(dpy, res, res->crtcs[i], CurrentTime, ++ 0, 0, None, RR_Rotate_0, NULL, 0); ++ ++ for (i = 0; i < res->ncrtc; i++) ++ XRRSetCrtcConfig(dpy, res, res->crtcs[i], CurrentTime, ++ original_crtc[i]->x, ++ original_crtc[i]->y, ++ original_crtc[i]->mode, ++ original_crtc[i]->rotation, ++ original_crtc[i]->outputs, ++ original_crtc[i]->noutput); ++ ++ if (DPMSQueryExtension(dpy, &i, &i)) ++ DPMSEnable(dpy); ++ ++ XSync(dpy, True); ++ return _x_error_occurred; ++} +diff --git a/benchmarks/dri3-swap.c b/benchmarks/dri3-swap.c +new file mode 100644 +index 00000000..4dd423b3 +--- /dev/null ++++ b/benchmarks/dri3-swap.c +@@ -0,0 +1,595 @@ ++/* ++ * Copyright (c) 2015 Intel Corporation ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining a ++ * copy of this software and associated documentation files (the "Software"), ++ * to deal in the Software without restriction, including without limitation ++ * the rights to use, copy, modify, merge, publish, distribute, sublicense, ++ * and/or sell copies of the Software, and to permit persons to whom the ++ * Software is furnished to do so, subject to the following conditions: ++ * ++ * The above copyright notice and this permission notice (including the next ++ * paragraph) shall be included in all copies or substantial portions of the ++ * Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ++ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, ++ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE ++ * SOFTWARE. ++ * ++ */ ++ ++#ifdef HAVE_CONFIG_H ++#include "config.h" ++#endif ++ ++#include <X11/Xlib.h> ++#include <X11/Xatom.h> ++#include <X11/Xlib-xcb.h> ++#include <X11/xshmfence.h> ++#include <X11/Xutil.h> ++#include <X11/Xlibint.h> ++#include <X11/extensions/Xcomposite.h> ++#include <X11/extensions/Xdamage.h> ++#include <X11/extensions/dpms.h> ++#include <X11/extensions/randr.h> ++#include <X11/extensions/Xrandr.h> ++#include <xcb/xcb.h> ++#include <xcb/present.h> ++#include <xcb/dri3.h> ++#include <xcb/xfixes.h> ++#include <xf86drm.h> ++#include <i915_drm.h> ++ ++#include <stdio.h> ++#include <string.h> ++#include <fcntl.h> ++#include <unistd.h> ++#include <assert.h> ++#include <errno.h> ++#include <setjmp.h> ++#include <signal.h> ++ ++struct dri3_fence { ++ XID xid; ++ void *addr; ++}; ++ ++static int _x_error_occurred; ++static uint32_t stamp; ++ ++struct list { ++ struct list *next, *prev; ++}; ++ ++static void ++list_init(struct list *list) ++{ ++ list->next = list->prev = list; ++} ++ ++static inline void ++__list_add(struct list *entry, ++ struct list *prev, ++ struct list *next) ++{ ++ next->prev = entry; ++ entry->next = next; ++ entry->prev = prev; ++ prev->next = entry; ++} ++ ++static inline void ++list_add(struct list *entry, struct list *head) ++{ ++ __list_add(entry, head, head->next); ++} ++ ++static inline void ++__list_del(struct list *prev, struct list *next) ++{ ++ next->prev = prev; ++ prev->next = next; ++} ++ ++static inline void ++_list_del(struct list *entry) ++{ ++ __list_del(entry->prev, entry->next); ++} ++ ++static inline void ++list_move(struct list *list, struct list *head) ++{ ++ if (list->prev != head) { ++ _list_del(list); ++ list_add(list, head); ++ } ++} ++ ++#define __container_of(ptr, sample, member) \ ++ (void *)((char *)(ptr) - ((char *)&(sample)->member - (char *)(sample))) ++ ++#define list_for_each_entry(pos, head, member) \ ++ for (pos = __container_of((head)->next, pos, member); \ ++ &pos->member != (head); \ ++ pos = __container_of(pos->member.next, pos, member)) ++ ++static int ++_check_error_handler(Display *display, ++ XErrorEvent *event) ++{ ++ printf("X11 error from display %s, serial=%ld, error=%d, req=%d.%d\n", ++ DisplayString(display), ++ event->serial, ++ event->error_code, ++ event->request_code, ++ event->minor_code); ++ _x_error_occurred++; ++ return False; /* ignored */ ++} ++ ++static int dri3_create_fence(Display *dpy, ++ Pixmap pixmap, ++ struct dri3_fence *fence) ++{ ++ xcb_connection_t *c = XGetXCBConnection(dpy); ++ struct dri3_fence f; ++ int fd; ++ ++ fd = xshmfence_alloc_shm(); ++ if (fd < 0) ++ return -1; ++ ++ f.addr = xshmfence_map_shm(fd); ++ if (f.addr == NULL) { ++ close(fd); ++ return -1; ++ } ++ ++ f.xid = xcb_generate_id(c); ++ xcb_dri3_fence_from_fd(c, pixmap, f.xid, 0, fd); ++ ++ *fence = f; ++ return 0; ++} ++ ++static double elapsed(const struct timespec *start, ++ const struct timespec *end) ++{ ++ return 1e6*(end->tv_sec - start->tv_sec) + (end->tv_nsec - start->tv_nsec)/1000; ++} ++ ++struct buffer { ++ struct list link; ++ Pixmap pixmap; ++ struct dri3_fence fence; ++ int fd; ++ int busy; ++}; ++ ++static void run(Display *dpy, Window win) ++{ ++ xcb_connection_t *c = XGetXCBConnection(dpy); ++ struct timespec start, end; ++#define N_BACK 8 ++ struct buffer buffer[N_BACK]; ++ struct list mru; ++ Window root; ++ unsigned int width, height; ++ unsigned border, depth; ++ unsigned present_flags = XCB_PRESENT_OPTION_ASYNC; ++ xcb_xfixes_region_t update = 0; ++ int completed = 0; ++ int queued = 0; ++ uint32_t eid; ++ void *Q; ++ int i, n; ++ ++ list_init(&mru); ++ ++ XGetGeometry(dpy, win, ++ &root, &i, &n, &width, &height, &border, &depth); ++ ++ _x_error_occurred = 0; ++ ++ for (n = 0; n < N_BACK; n++) { ++ xcb_dri3_buffer_from_pixmap_reply_t *reply; ++ int *fds; ++ ++ buffer[n].pixmap = ++ XCreatePixmap(dpy, win, width, height, depth); ++ buffer[n].fence.xid = 0; ++ buffer[n].fd = -1; ++ ++ if (dri3_create_fence(dpy, win, &buffer[n].fence)) ++ return; ++ ++ reply = xcb_dri3_buffer_from_pixmap_reply (c, ++ xcb_dri3_buffer_from_pixmap(c, buffer[n].pixmap), ++ NULL); ++ if (reply == NULL) ++ return; ++ ++ fds = xcb_dri3_buffer_from_pixmap_reply_fds (c, reply); ++ buffer[n].fd = fds[0]; ++ free(reply); ++ ++ /* start idle */ ++ xshmfence_trigger(buffer[n].fence.addr); ++ buffer[n].busy = 0; ++ list_add(&buffer[n].link, &mru); ++ } ++ ++ eid = xcb_generate_id(c); ++ xcb_present_select_input(c, eid, win, ++ XCB_PRESENT_EVENT_MASK_IDLE_NOTIFY | ++ XCB_PRESENT_EVENT_MASK_COMPLETE_NOTIFY); ++ Q = xcb_register_for_special_xge(c, &xcb_present_id, eid, &stamp); ++ ++ clock_gettime(CLOCK_MONOTONIC, &start); ++ do { ++ for (n = 0; n < 1000; n++) { ++ struct buffer *tmp, *b = NULL; ++ list_for_each_entry(tmp, &mru, link) { ++ if (!tmp->busy) { ++ b = tmp; ++ break; ++ } ++ } ++ while (b == NULL) { ++ xcb_present_generic_event_t *ev; ++ ++ ev = (xcb_present_generic_event_t *) ++ xcb_wait_for_special_event(c, Q); ++ if (ev == NULL) ++ abort(); ++ ++ do { ++ switch (ev->evtype) { ++ case XCB_PRESENT_COMPLETE_NOTIFY: ++ completed++; ++ queued--; ++ break; ++ ++ case XCB_PRESENT_EVENT_IDLE_NOTIFY: ++ { ++ xcb_present_idle_notify_event_t *ie = (xcb_present_idle_notify_event_t *)ev; ++ assert(ie->serial < N_BACK); ++ buffer[ie->serial].busy = 0; ++ if (b == NULL) ++ b = &buffer[ie->serial]; ++ break; ++ } ++ } ++ free(ev); ++ } while ((ev = (xcb_present_generic_event_t *)xcb_poll_for_special_event(c, Q))); ++ } ++ ++ b->busy = 1; ++ if (b->fence.xid) { ++ xshmfence_await(b->fence.addr); ++ xshmfence_reset(b->fence.addr); ++ } ++ xcb_present_pixmap(c, win, b->pixmap, b - buffer, ++ 0, /* valid */ ++ update, /* update */ ++ 0, /* x_off */ ++ 0, /* y_off */ ++ None, ++ None, /* wait fence */ ++ b->fence.xid, ++ present_flags, ++ 0, /* target msc */ ++ 0, /* divisor */ ++ 0, /* remainder */ ++ 0, NULL); ++ list_move(&b->link, &mru); ++ queued++; ++ xcb_flush(c); ++ } ++ clock_gettime(CLOCK_MONOTONIC, &end); ++ } while (end.tv_sec < start.tv_sec + 10); ++ ++ while (queued) { ++ xcb_present_generic_event_t *ev; ++ ++ ev = (xcb_present_generic_event_t *) ++ xcb_wait_for_special_event(c, Q); ++ if (ev == NULL) ++ abort(); ++ ++ do { ++ switch (ev->evtype) { ++ case XCB_PRESENT_COMPLETE_NOTIFY: ++ completed++; ++ queued--; ++ break; ++ ++ case XCB_PRESENT_EVENT_IDLE_NOTIFY: ++ break; ++ } ++ free(ev); ++ } while ((ev = (xcb_present_generic_event_t *)xcb_poll_for_special_event(c, Q))); ++ } ++ clock_gettime(CLOCK_MONOTONIC, &end); ++ ++ printf("%f\n", completed / (elapsed(&start, &end) / 1000000)); ++} ++ ++static int has_present(Display *dpy) ++{ ++ xcb_connection_t *c = XGetXCBConnection(dpy); ++ xcb_generic_error_t *error = NULL; ++ void *reply; ++ ++ reply = xcb_present_query_version_reply(c, ++ xcb_present_query_version(c, ++ XCB_PRESENT_MAJOR_VERSION, ++ XCB_PRESENT_MINOR_VERSION), ++ &error); ++ ++ free(reply); ++ free(error); ++ if (reply == NULL) { ++ fprintf(stderr, "Present not supported on %s\n", DisplayString(dpy)); ++ return 0; ++ } ++ ++ return 1; ++} ++ ++static int has_composite(Display *dpy) ++{ ++ int event, error; ++ int major, minor; ++ ++ if (!XDamageQueryExtension (dpy, &event, &error)) ++ return 0; ++ ++ if (!XCompositeQueryExtension(dpy, &event, &error)) ++ return 0; ++ ++ XCompositeQueryVersion(dpy, &major, &minor); ++ ++ return major > 0 || minor >= 4; ++} ++ ++static inline XRRScreenResources *_XRRGetScreenResourcesCurrent(Display *dpy, Window window) ++{ ++ XRRScreenResources *res; ++ ++ res = XRRGetScreenResourcesCurrent(dpy, window); ++ if (res == NULL) ++ res = XRRGetScreenResources(dpy, window); ++ ++ return res; ++} ++ ++static XRRModeInfo *lookup_mode(XRRScreenResources *res, int id) ++{ ++ int i; ++ ++ for (i = 0; i < res->nmode; i++) { ++ if (res->modes[i].id == id) ++ return &res->modes[i]; ++ } ++ ++ return NULL; ++} ++ ++static void fullscreen(Display *dpy, Window win) ++{ ++ Atom atom = XInternAtom(dpy, "_NET_WM_STATE_FULLSCREEN", False); ++ XChangeProperty(dpy, win, ++ XInternAtom(dpy, "_NET_WM_STATE", False), ++ XA_ATOM, 32, PropModeReplace, ++ (unsigned char *)&atom, 1); ++} ++ ++static int dri3_query_version(Display *dpy, int *major, int *minor) ++{ ++ xcb_connection_t *c = XGetXCBConnection(dpy); ++ xcb_dri3_query_version_reply_t *reply; ++ xcb_generic_error_t *error; ++ ++ *major = *minor = -1; ++ ++ reply = xcb_dri3_query_version_reply(c, ++ xcb_dri3_query_version(c, ++ XCB_DRI3_MAJOR_VERSION, ++ XCB_DRI3_MINOR_VERSION), ++ &error); ++ free(error); ++ if (reply == NULL) ++ return -1; ++ ++ *major = reply->major_version; ++ *minor = reply->minor_version; ++ free(reply); ++ ++ return 0; ++} ++ ++static int has_dri3(Display *dpy) ++{ ++ const xcb_query_extension_reply_t *ext; ++ int major, minor; ++ ++ ext = xcb_get_extension_data(XGetXCBConnection(dpy), &xcb_dri3_id); ++ if (ext == NULL || !ext->present) ++ return 0; ++ ++ if (dri3_query_version(dpy, &major, &minor) < 0) ++ return 0; ++ ++ return major >= 0; ++} ++ ++int main(int argc, char **argv) ++{ ++ Display *dpy; ++ Window root, win; ++ XRRScreenResources *res; ++ XRRCrtcInfo **original_crtc; ++ XSetWindowAttributes attr; ++ enum window { ROOT, FULLSCREEN, WINDOW } w = FULLSCREEN; ++ enum visible {REDIRECTED, NORMAL } v = NORMAL; ++ enum display { OFF, ON } d = OFF; ++ int width, height; ++ int i; ++ ++ while ((i = getopt(argc, argv, "d:v:w:")) != -1) { ++ switch (i) { ++ case 'd': ++ if (strcmp(optarg, "off") == 0) ++ d = OFF; ++ else if (strcmp(optarg, "on") == 0) ++ d = ON; ++ else ++ abort(); ++ break; ++ ++ case 'v': ++ if (strcmp(optarg, "redirected") == 0) ++ v = REDIRECTED; ++ else if (strcmp(optarg, "normal") == 0) ++ v = NORMAL; ++ else ++ abort(); ++ break; ++ ++ case 'w': ++ if (strcmp(optarg, "fullscreen") == 0) ++ w = FULLSCREEN; ++ else if (strcmp(optarg, "window") == 0) ++ w = WINDOW; ++ else if (strcmp(optarg, "root") == 0) ++ w = ROOT; ++ else ++ abort(); ++ break; ++ } ++ } ++ ++ attr.override_redirect = 1; ++ ++ dpy = XOpenDisplay(NULL); ++ if (dpy == NULL) ++ return 77; ++ ++ width = DisplayWidth(dpy, DefaultScreen(dpy)); ++ height = DisplayHeight(dpy, DefaultScreen(dpy)); ++ ++ if (!has_present(dpy)) ++ return 77; ++ ++ if (!has_dri3(dpy)) ++ return 77; ++ ++ if (DPMSQueryExtension(dpy, &i, &i)) ++ DPMSDisable(dpy); ++ ++ root = DefaultRootWindow(dpy); ++ ++ signal(SIGALRM, SIG_IGN); ++ XSetErrorHandler(_check_error_handler); ++ ++ res = NULL; ++ if (XRRQueryVersion(dpy, &i, &i)) ++ res = _XRRGetScreenResourcesCurrent(dpy, root); ++ if (res == NULL) ++ return 77; ++ ++ if (v == REDIRECTED && !has_composite(dpy)) ++ return 77; ++ ++ original_crtc = malloc(sizeof(XRRCrtcInfo *)*res->ncrtc); ++ for (i = 0; i < res->ncrtc; i++) ++ original_crtc[i] = XRRGetCrtcInfo(dpy, res, res->crtcs[i]); ++ ++ for (i = 0; i < res->ncrtc; i++) ++ XRRSetCrtcConfig(dpy, res, res->crtcs[i], CurrentTime, ++ 0, 0, None, RR_Rotate_0, NULL, 0); ++ ++ if (d != OFF) { ++ for (i = 0; i < res->noutput; i++) { ++ XRROutputInfo *output; ++ XRRModeInfo *mode; ++ ++ output = XRRGetOutputInfo(dpy, res, res->outputs[i]); ++ if (output == NULL) ++ continue; ++ ++ mode = NULL; ++ if (res->nmode) ++ mode = lookup_mode(res, output->modes[0]); ++ if (mode == NULL) ++ continue; ++ ++ XRRSetCrtcConfig(dpy, res, output->crtcs[0], CurrentTime, ++ 0, 0, output->modes[0], RR_Rotate_0, &res->outputs[i], 1); ++ width = mode->width; ++ height = mode->height; ++ break; ++ } ++ if (i == res->noutput) { ++ _x_error_occurred = 77; ++ goto restore; ++ } ++ } ++ ++ if (w == ROOT) { ++ run(dpy, root); ++ } else if (w == FULLSCREEN) { ++ win = XCreateWindow(dpy, root, ++ 0, 0, width, height, 0, ++ DefaultDepth(dpy, DefaultScreen(dpy)), ++ InputOutput, ++ DefaultVisual(dpy, DefaultScreen(dpy)), ++ CWOverrideRedirect, &attr); ++ if (v == REDIRECTED) { ++ XCompositeRedirectWindow(dpy, win, CompositeRedirectManual); ++ XDamageCreate(dpy, win, XDamageReportRawRectangles); ++ } else ++ fullscreen(dpy, win); ++ XMapWindow(dpy, win); ++ run(dpy, win); ++ } else if (w == WINDOW) { ++ win = XCreateWindow(dpy, root, ++ 0, 0, width/2, height/2, 0, ++ DefaultDepth(dpy, DefaultScreen(dpy)), ++ InputOutput, ++ DefaultVisual(dpy, DefaultScreen(dpy)), ++ CWOverrideRedirect, &attr); ++ if (v == REDIRECTED) { ++ XCompositeRedirectWindow(dpy, win, CompositeRedirectManual); ++ XDamageCreate(dpy, win, XDamageReportRawRectangles); ++ } ++ XMapWindow(dpy, win); ++ run(dpy, win); ++ } ++ ++restore: ++ for (i = 0; i < res->ncrtc; i++) ++ XRRSetCrtcConfig(dpy, res, res->crtcs[i], CurrentTime, ++ 0, 0, None, RR_Rotate_0, NULL, 0); ++ ++ for (i = 0; i < res->ncrtc; i++) ++ XRRSetCrtcConfig(dpy, res, res->crtcs[i], CurrentTime, ++ original_crtc[i]->x, ++ original_crtc[i]->y, ++ original_crtc[i]->mode, ++ original_crtc[i]->rotation, ++ original_crtc[i]->outputs, ++ original_crtc[i]->noutput); ++ ++ if (DPMSQueryExtension(dpy, &i, &i)) ++ DPMSEnable(dpy); ++ ++ XSync(dpy, True); ++ return _x_error_occurred; ++} +diff --git a/configure.ac b/configure.ac +index 61bea435..d13917ec 100644 +--- a/configure.ac ++++ b/configure.ac +@@ -195,18 +195,24 @@ AC_ARG_ENABLE(udev, + [UDEV="$enableval"], + [UDEV=auto]) + ++udev_msg=" disabled" + if test "x$UDEV" != "xno"; then + PKG_CHECK_MODULES(UDEV, [libudev], [udev="yes"], [udev="no"]) ++ AC_CHECK_HEADERS([sys/stat.h], [], [udev="no"]) + if test "x$UDEV" = "xyes" -a "x$udev" != "xyes"; then + AC_MSG_ERROR([udev support requested but not found (libudev)]) + fi + if test "x$udev" = "xyes"; then + AC_DEFINE(HAVE_UDEV,1,[Enable udev-based monitor hotplug detection]) ++ udev_msg=" yes" ++ else ++ udev_msg=" no" + fi + fi + +-PKG_CHECK_MODULES(X11, [x11 xrender xrandr xext xfixes cairo cairo-xlib-xrender pixman-1 libpng], [x11="yes"], [x11="no"]) ++PKG_CHECK_MODULES(X11, [x11 x11-xcb xcb-dri2 xcomposite xdamage xrender xrandr xext xfixes cairo cairo-xlib-xrender pixman-1 libpng], [x11="yes"], [x11="no"]) + AM_CONDITIONAL(HAVE_X11, test "x$x11" = "xyes") ++echo X11_CLFAGS="$X11_CLFAGS" X11_LIBS="$X11_LIBS" + + cpuid="yes" + AC_TRY_LINK([ +@@ -270,10 +276,13 @@ if test "x$shm" = "xyes"; then + AC_DEFINE([HAVE_MIT_SHM], 1, [Define to 1 if MIT-SHM is available]) + fi + +-PKG_CHECK_MODULES(X11_DRI3, [xcb-dri3 xcb-sync xcb-present x11-xcb xshmfence x11 xrender xext libdrm], [x11_dri3="yes"], [x11_dri3="no"]) ++PKG_CHECK_MODULES(X11_DRI3, [xcb-dri3 xcb-sync xcb-xfixes xcb-present x11-xcb xshmfence x11 xcomposite xdamage xrender xrandr xxf86vm xext libdrm], [x11_dri3="yes"], [x11_dri3="no"]) + AM_CONDITIONAL(X11_DRI3, test "x$x11_dri3" = "xyes" -a "x$shm" = "xyes") + AM_CONDITIONAL(X11_SHM, test "x$shm" = "xyes") + ++PKG_CHECK_MODULES(X11_VM, [xxf86vm], [x11_vm="yes"], [x11_vm="no"]) ++AM_CONDITIONAL(X11_VM, test "x$x11_vm" = "xyes") ++ + AC_ARG_ENABLE(tools, + AS_HELP_STRING([--disable-tools], + [Enable building and installing the miscellaneous tools [default=auto]]), +@@ -285,7 +294,7 @@ if test "x$shm" != "xyes"; then + tools="no" + fi + if test "x$tools" != "xno"; then +- ivo_requires="xrandr xdamage xfixes xcursor xtst xrender xext x11 pixman-1" ++ ivo_requires="xrandr xdamage xfixes xcursor xtst xrender xscrnsaver xext x11 pixman-1" + extra_cflags="" + + ignore="xinerama" +@@ -307,6 +316,8 @@ if test "x$tools" != "xno"; then + tools="no" + fi + ++ PKG_CHECK_MODULES(TOOL_CURSOR, [xfixes x11 libpng], [cursor="yes"], [ivo="no"]) ++ + IVO_CFLAGS="$IVO_CFLAGS $extra_cflags" + fi + if test "x$tools" != "xno"; then +@@ -315,6 +326,7 @@ fi + AC_MSG_CHECKING([whether to build additional tools]) + AC_MSG_RESULT([$tools]) + AM_CONDITIONAL(BUILD_TOOLS, test "x$tools" != "xno") ++AM_CONDITIONAL(BUILD_TOOL_CURSOR, test "x$cursor" = "xyes") + + # Define a configure option for an alternate module directory + AC_ARG_WITH(xorg-module-dir, +@@ -339,10 +351,20 @@ AC_ARG_ENABLE(dri2, + [DRI2=$enableval], + [DRI2=yes]) + AC_ARG_ENABLE(dri3, +- AS_HELP_STRING([--enable-dri3], +- [Enable DRI3 support [[default=no]]]), ++ AS_HELP_STRING([--disable-dri3], ++ [Disable DRI3 support [[default=yes]]]), + [DRI3=$enableval], +- [DRI3=no]) ++ [DRI3=yes]) ++AC_ARG_WITH(default-dri, ++ AS_HELP_STRING([--with-default-dri], ++ [Select the default maximum DRI level [default 2]]), ++ [DRI_DEFAULT=$withval], ++ [DRI_DEFAULT=2]) ++if test "x$DRI_DEFAULT" = "x0"; then ++ AC_DEFINE(DEFAULT_DRI_LEVEL, 0,[Default DRI level]) ++else ++ AC_DEFINE(DEFAULT_DRI_LEVEL, ~0, [Default DRI level]) ++fi + + AC_ARG_ENABLE(xvmc, AS_HELP_STRING([--disable-xvmc], + [Disable XvMC support [[default=yes]]]), +@@ -375,14 +397,12 @@ AC_ARG_ENABLE(ums-only, + required_xorg_server_version=1.6 + required_pixman_version=0.16 + +-if pkg-config --exists 'pixman-1 >= 0.27.1'; then +- AC_DEFINE([HAS_PIXMAN_GLYPHS], 1, [Enable pixman glyph cache]) +-fi +- +-if pkg-config --exists 'pixman-1 >= 0.24.0'; then +- AC_DEFINE([HAS_PIXMAN_TRIANGLES], 1, [Enable pixman triangle rasterisation]) +-fi +- ++PKG_CHECK_EXISTS([pixman-1 >= 0.24.0], ++ AC_DEFINE([HAS_PIXMAN_TRIANGLES], 1, [Enable pixman triangle rasterisation]) ++ []) ++PKG_CHECK_EXISTS([pixman-1 >= 0.27.1], ++ [AC_DEFINE([HAS_PIXMAN_GLYPHS], 1, [Enable pixman glyph cache])], ++ []) + # Store the list of server defined optional extensions in REQUIRED_MODULES + XORG_DRIVER_CHECK_EXT(RANDR, randrproto) + XORG_DRIVER_CHECK_EXT(RENDER, renderproto) +@@ -398,24 +418,25 @@ AC_ARG_ENABLE(sna, + [SNA="$enableval"], + [SNA=auto]) + ++AC_CHECK_HEADERS([dev/wscons/wsconsio.h]) ++AC_FUNC_ALLOCA ++AC_HEADER_MAJOR ++ + if test "x$SNA" != "xno"; then + AC_DEFINE(USE_SNA, 1, [Enable SNA support]) + AC_CHECK_HEADERS([sys/sysinfo.h], AC_CHECK_MEMBERS([struct sysinfo.totalram], [], [], [[#include <sys/sysinfo.h>]])) + fi + + uxa_requires_libdrm=2.4.52 ++uxa_requires_pixman=0.24.0 ++ + AC_ARG_ENABLE(uxa, + AS_HELP_STRING([--enable-uxa], + [Enable Unified Acceleration Architecture (UXA) [default=auto]]), + [UXA="$enableval"], + [UXA=auto]) + if test "x$UXA" = "xauto"; then +- if ! pkg-config --exists "libdrm_intel >= $uxa_requires_libdrm"; then +- UXA=no +- fi +- if ! pkg-config --exists 'pixman-1 >= 0.24.0'; then +- UXA=no +- fi ++ PKG_CHECK_EXISTS([libdrm_intel >= $uxa_requires_libdrm pixman-1 >= $uxa_requires_pixman], [], [UXA=no]) + fi + if test "x$UXA" != "xno"; then + AC_DEFINE(USE_UXA, 1, [Enable UXA support]) +@@ -424,8 +445,10 @@ if test "x$UXA" != "xno"; then + UXA=yes + fi + +-PKG_CHECK_MODULES(XORG, [xorg-server >= $required_xorg_server_version xproto fontsproto pixman-1 >= $required_pixman_version $REQUIRED_MODULES]) ++PKG_CHECK_MODULES(XORG, [xorg-server >= $required_xorg_server_version xproto fontsproto damageproto pixman-1 >= $required_pixman_version $REQUIRED_MODULES]) + ABI_VERSION=`$PKG_CONFIG --variable=abi_videodrv xorg-server` ++XSERVER_VERSION=`$PKG_CONFIG --modversion xorg-server` ++PIXMAN_VERSION=`$PKG_CONFIG --modversion pixman-1` + + if test "x$ONLY_UMS" = "xyes"; then + UMS="yes" +@@ -519,7 +542,12 @@ AC_MSG_RESULT([$have_dri1]) + AM_CONDITIONAL(DRI1, test "x$have_dri1" != "xno") + if test "x$have_dri1" != "xno"; then + AC_DEFINE(HAVE_DRI1,1,[Enable DRI1 driver support]) +- dri_msg="$dri_msg DRI1" ++ str="DRI1" ++ if test "x$DRI_DEFAULT" = "x1"; then ++ AC_DEFINE(DEFAULT_DRI_LEVEL,1,[Default DRI level]) ++ str="*$str" ++ fi ++ dri_msg="$dri_msg $str" + else + DRI1_CFLAGS="" + DRI1_LIBS="" +@@ -576,7 +604,12 @@ AM_CONDITIONAL(DRI2, test "x$have_dri2" != "xno") + AC_MSG_RESULT([$have_dri2]) + if test "x$have_dri2" != "xno"; then + AC_DEFINE(HAVE_DRI2,1,[Enable DRI2 driver support]) +- dri_msg="$dri_msg DRI2" ++ str="DRI2" ++ if test "x$DRI_DEFAULT" = "x2"; then ++ AC_DEFINE(DEFAULT_DRI_LEVEL,2,[Default DRI level]) ++ str="*$str" ++ fi ++ dri_msg="$dri_msg $str" + else + if test "x$DRI" = "xyes" -a "x$DRI2" != "xno" -a "x$KMS" = "xyes"; then + AC_MSG_ERROR([DRI2 requested but prerequisites not found]) +@@ -591,13 +624,21 @@ AM_CONDITIONAL(DRI3, test "x$have_dri3" != "xno") + AC_MSG_RESULT([$have_dri3]) + if test "x$have_dri3" != "xno"; then + AC_DEFINE(HAVE_DRI3,1,[Enable DRI3 driver support]) +- dri_msg="$dri_msg DRI3" ++ str="DRI3" ++ if test "x$DRI_DEFAULT" = "x3"; then ++ AC_DEFINE(DEFAULT_DRI_LEVEL,3,[Default DRI level]) ++ str="*$str" ++ fi ++ dri_msg="$dri_msg $str" + else + if test "x$DRI" = "xyes" -a "x$DRI3" != "xno" -a "x$KMS" = "xyes"; then + AC_MSG_ERROR([DRI3 requested but prerequisites not found]) + fi + fi + ++AC_MSG_CHECKING([default DRI support]) ++AC_MSG_RESULT([$DEFAULT_DRI_DEFAULT]) ++ + AC_CHECK_HEADERS([X11/extensions/dpmsconst.h]) + + PRESENT="no" +@@ -711,27 +752,6 @@ if test "x$TEARFREE" = "xyes"; then + xp_msg="$xp_msg TearFree" + fi + +-AC_ARG_ENABLE(rendernode, +- AS_HELP_STRING([--enable-rendernode], +- [Enable use of render nodes (experimental) [default=no]]), +- [RENDERNODE="$enableval"], +- [RENDERNODE="no"]) +-AM_CONDITIONAL(USE_RENDERNODE, test "x$RENDERNODE" = "xyes") +-if test "x$RENDERNODE" = "xyes"; then +- AC_DEFINE(USE_RENDERNODE,1,[Assume "rendernode" support]) +- xp_msg="$xp_msg rendernode" +-fi +- +-AC_ARG_ENABLE(wc-mmap, +- AS_HELP_STRING([--enable-wc-mmap], +- [Enable use of WriteCombining mmaps [default=no]]), +- [WC_MMAP="$enableval"], +- [WC_MMAP="no"]) +-if test "x$WC_MMAP" = "xyes"; then +- AC_DEFINE(USE_WC_MMAP,1,[Enable use of WriteCombining mmaps]) +- xp_msg="$xp_msg mmap(wc)" +-fi +- + AC_ARG_ENABLE(create2, + AS_HELP_STRING([--enable-create2], + [Enable use of create2 ioctl (experimental) [default=no]]), +@@ -848,6 +868,7 @@ AC_CONFIG_FILES([ + xvmc/shader/mc/Makefile + xvmc/shader/vld/Makefile + test/Makefile ++ benchmarks/Makefile + tools/Makefile + tools/org.x.xf86-video-intel.backlight-helper.policy + ]) +@@ -855,7 +876,7 @@ AC_OUTPUT + + echo "" + echo "" +-test -e `pwd $0`/README && cat `pwd $0`/README ++cat $srcdir/README + + accel_msg="" + if test "x$SNA" != "xno"; then +@@ -895,13 +916,15 @@ fi + + echo "" + echo "AC_PACKAGE_STRING will be compiled with:" +-echo " Xorg Video ABI version: $ABI_VERSION" ++echo " Xorg Video ABI version: $ABI_VERSION (xorg-server-$XSERVER_VERSION)" ++echo " pixman version: pixman-1-$PIXMAN_VERSION" + echo " Acceleration backends:$accel_msg" + echo " Additional debugging support?$debug_msg" + echo " Support for Kernel Mode Setting? $KMS" + echo " Support for legacy User Mode Setting (for i810)? $UMS" + echo " Support for Direct Rendering Infrastructure:$dri_msg" + echo " Support for Xv motion compensation (XvMC and libXvMC):$xvmc_msg" ++echo " Support for display hotplug notifications (udev):$udev_msg" + echo " Build additional tools and utilities?$tools_msg" + if test -n "$xp_msg"; then + echo " Experimental support:$xp_msg" +diff --git a/libobj/alloca.c b/libobj/alloca.c +new file mode 100644 +index 00000000..883e1e9f +--- /dev/null ++++ b/libobj/alloca.c +@@ -0,0 +1,4 @@ ++void *alloca(size_t sz) ++{ ++ return NULL; ++} +diff --git a/man/intel.man b/man/intel.man +index 17515206..be398fbe 100644 +--- a/man/intel.man ++++ b/man/intel.man +@@ -27,9 +27,9 @@ supports the i810, i810-DC100, i810e, i815, i830M, 845G, 852GM, 855GM, + 865G, 915G, 915GM, 945G, 945GM, 965G, 965Q, 946GZ, 965GM, 945GME, + G33, Q33, Q35, G35, GM45, G45, Q45, G43, G41 chipsets, Pineview-M in + Atom N400 series, Pineview-D in Atom D400/D500 series, +-Intel(R) HD Graphics: 2000-6000, +-Intel(R) Iris(TM) Graphics: 5100/6100, and +-Intel(R) Iris(TM) Pro Graphics: 5200/6200/P6300. ++Intel(R) HD Graphics, ++Intel(R) Iris(TM) Graphics, ++Intel(R) Iris(TM) Pro Graphics. + + .SH CONFIGURATION DETAILS + Please refer to __xconfigfile__(__filemansuffix__) for general configuration +@@ -112,8 +112,8 @@ The default is 8192 if AGP allocable memory is < 128 MB, 16384 if < 192 MB, + 24576 if higher. DRI require at least a value of 16384. Higher values may give + better 3D performance, at expense of available system memory. + .TP +-.BI "Option \*qNoAccel\*q \*q" boolean \*q +-Disable or enable acceleration. ++.BI "Option \*qAccel\*q \*q" boolean \*q ++Enable or disable acceleration. + .IP + Default: acceleration is enabled. + +@@ -122,8 +122,8 @@ The following driver + .B Options + are supported for the 830M and later chipsets: + .TP +-.BI "Option \*qNoAccel\*q \*q" boolean \*q +-Disable or enable acceleration. ++.BI "Option \*qAccel\*q \*q" boolean \*q ++Enable or disable acceleration. + .IP + Default: acceleration is enabled. + .TP +@@ -201,6 +201,16 @@ that choice by specifying the entry under /sys/class/backlight to use. + .IP + Default: Automatic selection. + .TP ++.BI "Option \*qCustomEDID\*q \*q" string \*q ++Override the probed EDID on particular outputs. Sometimes the manufacturer ++supplied EDID is corrupt or lacking a few usable modes and supplying a ++corrected EDID may be easier than specifying every modeline. This option ++allows to pass the path to load an EDID from per output. The format is a ++comma separated string of output:path pairs, e.g. ++DP1:/path/to/dp1.edid,DP2:/path/to/dp2.edid ++.IP ++Default: No override, use manufacturer supplied EDIDs. ++.TP + .BI "Option \*qFallbackDebug\*q \*q" boolean \*q + Enable printing of debugging information on acceleration fallbacks to the + server log. +@@ -225,6 +235,15 @@ i.e. perform synchronous rendering. + .IP + Default: Disabled + .TP ++.BI "Option \*qHWRotation\*q \*q" boolean \*q ++Override the use of native hardware rotation and force the use of software, ++but GPU accelerated where possible, rotation. On some platforms the hardware ++can scanout directly into a rotated output bypassing the intermediate rendering ++and extra allocations required for software implemented rotation (i.e. native ++rotation uses less resources, is quicker and uses less power). This allows you ++to disable the native rotation in case of errors. ++.IP ++Default: Enabled (use hardware rotation) + .TP + .BI "Option \*qVSync\*q \*q" boolean \*q + This option controls the use of commands to synchronise rendering with the +@@ -324,13 +343,29 @@ Default: 0 + .BI "Option \*qZaphodHeads\*q \*q" string \*q + .IP + Specify the randr output(s) to use with zaphod mode for a particular driver +-instance. If you this option you must use it with all instances of the +-driver ++instance. If you set this option you must use it with all instances of the ++driver. By default, each head is assigned only one CRTC (which limits ++using multiple outputs with that head to cloned mode). CRTC can be manually ++assigned to individual heads by preceding the output names with a comma ++delimited list of pipe numbers followed by a colon. Note that different pipes ++may be limited in their functionality and some outputs may only work with ++different pipes. + .br + For example: ++ ++.RS + .B + Option \*qZaphodHeads\*q \*qLVDS1,VGA1\*q +-will assign xrandr outputs LVDS1 and VGA0 to this instance of the driver. ++ ++will assign xrandr outputs LVDS1 and VGA1 to this instance of the driver. ++.RE ++ ++.RS ++.B ++Option \*qZaphodHeads\*q \*q0,2:HDMI1,DP2\*q ++ ++will assign xrandr outputs HDMI1 and DP2 and CRTCs 0 and 2 to this instance of the driver. ++.RE + + .SH OUTPUT CONFIGURATION + On 830M and better chipsets, the driver supports runtime configuration of +@@ -431,11 +466,11 @@ First DVI SDVO output + Second DVI SDVO output + + .SS "TMDS-1", "TMDS-2", "HDMI-1", "HDMI-2" +-DVI/HDMI outputs. Avaliable common properties include: ++DVI/HDMI outputs. Available common properties include: + .TP + \fBBROADCAST_RGB\fP - method used to set RGB color range + Adjusting this property allows you to set RGB color range on each +-channel in order to match HDTV requirment(default 0 for full ++channel in order to match HDTV requirement(default 0 for full + range). Setting 1 means RGB color range is 16-235, 0 means RGB color + range is 0-255 on each channel. (Full range is 0-255, not 16-235) + +diff --git a/src/backlight.c b/src/backlight.c +index 9f239867..fcbb279f 100644 +--- a/src/backlight.c ++++ b/src/backlight.c +@@ -34,6 +34,12 @@ + #include <sys/stat.h> + #include <sys/ioctl.h> + ++#if MAJOR_IN_MKDEV ++#include <sys/mkdev.h> ++#elif MAJOR_IN_SYSMACROS ++#include <sys/sysmacros.h> ++#endif ++ + #include <stdio.h> + #include <stdlib.h> + #include <string.h> +@@ -42,6 +48,7 @@ + #include <fcntl.h> + #include <unistd.h> + #include <dirent.h> ++#include <errno.h> + + #include <xorg-server.h> + #include <xf86.h> +@@ -84,7 +91,7 @@ void backlight_init(struct backlight *b) + b->has_power = 0; + } + +-#ifdef __OpenBSD__ ++#ifdef HAVE_DEV_WSCONS_WSCONSIO_H + + #include <dev/wscons/wsconsio.h> + #include <xf86Priv.h> +@@ -122,6 +129,11 @@ int backlight_get(struct backlight *b) + return param.curval; + } + ++char *backlight_find_for_device(struct pci_device *pci) ++{ ++ return NULL; ++} ++ + int backlight_open(struct backlight *b, char *iface) + { + struct wsdisplay_param param; +@@ -146,12 +158,9 @@ int backlight_open(struct backlight *b, char *iface) + return param.curval; + } + +-enum backlight_type backlight_exists(const char *iface) ++int backlight_exists(const char *iface) + { +- if (iface != NULL) +- return BL_NONE; +- +- return BL_PLATFORM; ++ return iface == NULL; + } + + int backlight_on(struct backlight *b) +@@ -163,6 +172,7 @@ int backlight_off(struct backlight *b) + { + return 0; + } ++ + #else + + static int +@@ -213,6 +223,24 @@ __backlight_read(const char *iface, const char *file) + } + + static int ++writen(int fd, const char *value, int len) ++{ ++ int ret; ++ ++ do { ++ ret = write(fd, value, len); ++ if (ret < 0) { ++ if (errno == EAGAIN || errno == EINTR) ++ continue; ++ ++ return ret; ++ } ++ } while (value += ret, len -= ret); ++ ++ return 0; ++} ++ ++static int + __backlight_write(const char *iface, const char *file, const char *value) + { + int fd, ret; +@@ -221,7 +249,7 @@ __backlight_write(const char *iface, const char *file, const char *value) + if (fd < 0) + return -1; + +- ret = write(fd, value, strlen(value)+1); ++ ret = writen(fd, value, strlen(value)+1); + close(fd); + + return ret; +@@ -244,10 +272,10 @@ static const char *known_interfaces[] = { + "intel_backlight", + }; + +-static enum backlight_type __backlight_type(const char *iface) ++static int __backlight_type(const char *iface) + { + char buf[1024]; +- int fd, v; ++ int fd, v, i; + + v = -1; + fd = __backlight_open(iface, "type", O_RDONLY); +@@ -261,39 +289,41 @@ static enum backlight_type __backlight_type(const char *iface) + buf[v] = '\0'; + + if (strcmp(buf, "raw") == 0) +- v = BL_RAW; ++ v = BL_RAW << 8; + else if (strcmp(buf, "platform") == 0) +- v = BL_PLATFORM; ++ v = BL_PLATFORM << 8; + else if (strcmp(buf, "firmware") == 0) +- v = BL_FIRMWARE; ++ v = BL_FIRMWARE << 8; + else +- v = BL_NAMED; ++ v = BL_NAMED << 8; + } else +- v = BL_NAMED; ++ v = BL_NAMED << 8; + +- if (v == BL_NAMED) { +- int i; +- for (i = 0; i < ARRAY_SIZE(known_interfaces); i++) { +- if (strcmp(iface, known_interfaces[i]) == 0) +- break; +- } +- v += i; ++ for (i = 0; i < ARRAY_SIZE(known_interfaces); i++) { ++ if (strcmp(iface, known_interfaces[i]) == 0) ++ break; + } ++ v += i; + + return v; + } + +-enum backlight_type backlight_exists(const char *iface) ++static int __backlight_exists(const char *iface) + { + if (__backlight_read(iface, "brightness") < 0) +- return BL_NONE; ++ return -1; + + if (__backlight_read(iface, "max_brightness") <= 0) +- return BL_NONE; ++ return -1; + + return __backlight_type(iface); + } + ++int backlight_exists(const char *iface) ++{ ++ return __backlight_exists(iface) != -1; ++} ++ + static int __backlight_init(struct backlight *b, char *iface, int fd) + { + b->fd = fd_move_cloexec(fd_set_nonblock(fd)); +@@ -399,7 +429,50 @@ __backlight_find(void) + continue; + + /* Fallback to priority list of known iface for old kernels */ +- v = backlight_exists(de->d_name); ++ v = __backlight_exists(de->d_name); ++ if (v < 0) ++ continue; ++ ++ if (v < best_type) { ++ char *copy = strdup(de->d_name); ++ if (copy) { ++ free(best_iface); ++ best_iface = copy; ++ best_type = v; ++ } ++ } ++ } ++ closedir(dir); ++ ++ return best_iface; ++} ++ ++char *backlight_find_for_device(struct pci_device *pci) ++{ ++ char path[200]; ++ unsigned best_type = INT_MAX; ++ char *best_iface = NULL; ++ DIR *dir; ++ struct dirent *de; ++ ++ snprintf(path, sizeof(path), ++ "/sys/bus/pci/devices/%04x:%02x:%02x.%d/backlight", ++ pci->domain, pci->bus, pci->dev, pci->func); ++ ++ dir = opendir(path); ++ if (dir == NULL) ++ return NULL; ++ ++ while ((de = readdir(dir))) { ++ int v; ++ ++ if (*de->d_name == '.') ++ continue; ++ ++ v = __backlight_exists(de->d_name); ++ if (v < 0) ++ continue; ++ + if (v < best_type) { + char *copy = strdup(de->d_name); + if (copy) { +@@ -416,14 +489,17 @@ __backlight_find(void) + + int backlight_open(struct backlight *b, char *iface) + { +- int level; ++ int level, type; + + if (iface == NULL) + iface = __backlight_find(); + if (iface == NULL) + goto err; + +- b->type = __backlight_type(iface); ++ type = __backlight_type(iface); ++ if (type < 0) ++ goto err; ++ b->type = type >> 8; + + b->max = __backlight_read(iface, "max_brightness"); + if (b->max <= 0) +@@ -447,7 +523,7 @@ err: + int backlight_set(struct backlight *b, int level) + { + char val[BACKLIGHT_VALUE_LEN]; +- int len, ret = 0; ++ int len; + + if (b->iface == NULL) + return 0; +@@ -456,10 +532,7 @@ int backlight_set(struct backlight *b, int level) + level = b->max; + + len = snprintf(val, BACKLIGHT_VALUE_LEN, "%d\n", level); +- if (write(b->fd, val, len) != len) +- ret = -1; +- +- return ret; ++ return writen(b->fd, val, len); + } + + int backlight_get(struct backlight *b) +@@ -517,43 +590,6 @@ void backlight_disable(struct backlight *b) + void backlight_close(struct backlight *b) + { + backlight_disable(b); +- if (b->pid) ++ if (b->pid > 0) + waitpid(b->pid, NULL, 0); + } +- +-char *backlight_find_for_device(struct pci_device *pci) +-{ +- char path[200]; +- unsigned best_type = INT_MAX; +- char *best_iface = NULL; +- DIR *dir; +- struct dirent *de; +- +- snprintf(path, sizeof(path), +- "/sys/bus/pci/devices/%04x:%02x:%02x.%d/backlight", +- pci->domain, pci->bus, pci->dev, pci->func); +- +- dir = opendir(path); +- if (dir == NULL) +- return NULL; +- +- while ((de = readdir(dir))) { +- int v; +- +- if (*de->d_name == '.') +- continue; +- +- v = backlight_exists(de->d_name); +- if (v < best_type) { +- char *copy = strdup(de->d_name); +- if (copy) { +- free(best_iface); +- best_iface = copy; +- best_type = v; +- } +- } +- } +- closedir(dir); +- +- return best_iface; +-} +diff --git a/src/backlight.h b/src/backlight.h +index bb0e28bc..ba17755b 100644 +--- a/src/backlight.h ++++ b/src/backlight.h +@@ -43,7 +43,7 @@ struct backlight { + int pid, fd; + }; + +-enum backlight_type backlight_exists(const char *iface); ++int backlight_exists(const char *iface); + + void backlight_init(struct backlight *backlight); + int backlight_open(struct backlight *backlight, char *iface); +diff --git a/src/compat-api.h b/src/compat-api.h +index d09e1fb3..05797a08 100644 +--- a/src/compat-api.h ++++ b/src/compat-api.h +@@ -30,6 +30,7 @@ + + #include <xorg-server.h> + #include <xorgVersion.h> ++#include <xf86Module.h> + + #include <picturestr.h> + #ifndef GLYPH_HAS_GLYPH_PICTURE_ACCESSOR +@@ -39,7 +40,17 @@ + + #ifndef XF86_HAS_SCRN_CONV + #define xf86ScreenToScrn(s) xf86Screens[(s)->myNum] ++#if XORG_VERSION_CURRENT < XORG_VERSION_NUMERIC(1,1,0,0,0) + #define xf86ScrnToScreen(s) screenInfo.screens[(s)->scrnIndex] ++#else ++#define xf86ScrnToScreen(s) ((s)->pScreen) ++#endif ++#else ++#define xf86ScrnToScreen(s) ((s)->pScreen) ++#endif ++ ++#if GET_ABI_MAJOR(ABI_VIDEODRV_VERSION) >= 22 ++#define HAVE_NOTIFY_FD 1 + #endif + + #ifndef XF86_SCRN_INTERFACE +@@ -131,6 +142,17 @@ region_rects(const RegionRec *r) + return r->data ? (const BoxRec *)(r->data + 1) : &r->extents; + } + ++inline static void ++region_get_boxes(const RegionRec *r, const BoxRec **s, const BoxRec **e) ++{ ++ int n; ++ if (r->data) ++ *s = region_boxptr(r), n = r->data->numRects; ++ else ++ *s = &r->extents, n = 1; ++ *e = *s + n; ++} ++ + #ifndef INCLUDE_LEGACY_REGION_DEFINES + #define RegionCreate(r, s) REGION_CREATE(NULL, r, s) + #define RegionBreak(r) REGION_BREAK(NULL, r) +@@ -223,4 +245,19 @@ static inline void FreePixmap(PixmapPtr pixmap) + dstx, dsty) + #endif + ++#if XORG_VERSION_CURRENT >= XORG_VERSION_NUMERIC(1,12,99,901,0) ++#define isGPU(S) (S)->is_gpu ++#else ++#define isGPU(S) 0 ++#endif ++ ++#if HAS_DIRTYTRACKING_ROTATION ++#define PixmapSyncDirtyHelper(d, dd) PixmapSyncDirtyHelper(d) ++#endif ++ ++#if !HAVE_NOTIFY_FD ++#define SetNotifyFd(fd, cb, mode, data) AddGeneralSocket(fd); ++#define RemoveNotifyFd(fd) RemoveGeneralSocket(fd) ++#endif ++ + #endif +diff --git a/src/i915_pciids.h b/src/i915_pciids.h +index 180ad0e6..466c7159 100644 +--- a/src/i915_pciids.h ++++ b/src/i915_pciids.h +@@ -134,7 +134,7 @@ + #define INTEL_IVB_Q_IDS(info) \ + INTEL_QUANTA_VGA_DEVICE(info) /* Quanta transcode */ + +-#define INTEL_HSW_D_IDS(info) \ ++#define INTEL_HSW_IDS(info) \ + INTEL_VGA_DEVICE(0x0402, info), /* GT1 desktop */ \ + INTEL_VGA_DEVICE(0x0412, info), /* GT2 desktop */ \ + INTEL_VGA_DEVICE(0x0422, info), /* GT3 desktop */ \ +@@ -179,9 +179,7 @@ + INTEL_VGA_DEVICE(0x0D2B, info), /* CRW GT3 reserved */ \ + INTEL_VGA_DEVICE(0x0D0E, info), /* CRW GT1 reserved */ \ + INTEL_VGA_DEVICE(0x0D1E, info), /* CRW GT2 reserved */ \ +- INTEL_VGA_DEVICE(0x0D2E, info) /* CRW GT3 reserved */ \ +- +-#define INTEL_HSW_M_IDS(info) \ ++ INTEL_VGA_DEVICE(0x0D2E, info), /* CRW GT3 reserved */ \ + INTEL_VGA_DEVICE(0x0406, info), /* GT1 mobile */ \ + INTEL_VGA_DEVICE(0x0416, info), /* GT2 mobile */ \ + INTEL_VGA_DEVICE(0x0426, info), /* GT2 mobile */ \ +@@ -198,60 +196,48 @@ + INTEL_VGA_DEVICE(0x0D16, info), /* CRW GT2 mobile */ \ + INTEL_VGA_DEVICE(0x0D26, info) /* CRW GT3 mobile */ + +-#define INTEL_VLV_M_IDS(info) \ ++#define INTEL_VLV_IDS(info) \ + INTEL_VGA_DEVICE(0x0f30, info), \ + INTEL_VGA_DEVICE(0x0f31, info), \ + INTEL_VGA_DEVICE(0x0f32, info), \ + INTEL_VGA_DEVICE(0x0f33, info), \ +- INTEL_VGA_DEVICE(0x0157, info) +- +-#define INTEL_VLV_D_IDS(info) \ ++ INTEL_VGA_DEVICE(0x0157, info), \ + INTEL_VGA_DEVICE(0x0155, info) + +-#define _INTEL_BDW_M(gt, id, info) \ +- INTEL_VGA_DEVICE((((gt) - 1) << 4) | (id), info) +-#define _INTEL_BDW_D(gt, id, info) \ +- INTEL_VGA_DEVICE((((gt) - 1) << 4) | (id), info) +- +-#define _INTEL_BDW_M_IDS(gt, info) \ +- _INTEL_BDW_M(gt, 0x1602, info), /* ULT */ \ +- _INTEL_BDW_M(gt, 0x1606, info), /* ULT */ \ +- _INTEL_BDW_M(gt, 0x160B, info), /* Iris */ \ +- _INTEL_BDW_M(gt, 0x160E, info) /* ULX */ +- +-#define _INTEL_BDW_D_IDS(gt, info) \ +- _INTEL_BDW_D(gt, 0x160A, info), /* Server */ \ +- _INTEL_BDW_D(gt, 0x160D, info) /* Workstation */ +- +-#define INTEL_BDW_GT12M_IDS(info) \ +- _INTEL_BDW_M_IDS(1, info), \ +- _INTEL_BDW_M_IDS(2, info) +- +-#define INTEL_BDW_GT12D_IDS(info) \ +- _INTEL_BDW_D_IDS(1, info), \ +- _INTEL_BDW_D_IDS(2, info) +- +-#define INTEL_BDW_GT3M_IDS(info) \ +- _INTEL_BDW_M_IDS(3, info) +- +-#define INTEL_BDW_GT3D_IDS(info) \ +- _INTEL_BDW_D_IDS(3, info) +- +-#define INTEL_BDW_RSVDM_IDS(info) \ +- _INTEL_BDW_M_IDS(4, info) +- +-#define INTEL_BDW_RSVDD_IDS(info) \ +- _INTEL_BDW_D_IDS(4, info) +- +-#define INTEL_BDW_M_IDS(info) \ +- INTEL_BDW_GT12M_IDS(info), \ +- INTEL_BDW_GT3M_IDS(info), \ +- INTEL_BDW_RSVDM_IDS(info) +- +-#define INTEL_BDW_D_IDS(info) \ +- INTEL_BDW_GT12D_IDS(info), \ +- INTEL_BDW_GT3D_IDS(info), \ +- INTEL_BDW_RSVDD_IDS(info) ++#define INTEL_BDW_GT12_IDS(info) \ ++ INTEL_VGA_DEVICE(0x1602, info), /* GT1 ULT */ \ ++ INTEL_VGA_DEVICE(0x1606, info), /* GT1 ULT */ \ ++ INTEL_VGA_DEVICE(0x160B, info), /* GT1 Iris */ \ ++ INTEL_VGA_DEVICE(0x160E, info), /* GT1 ULX */ \ ++ INTEL_VGA_DEVICE(0x1612, info), /* GT2 Halo */ \ ++ INTEL_VGA_DEVICE(0x1616, info), /* GT2 ULT */ \ ++ INTEL_VGA_DEVICE(0x161B, info), /* GT2 ULT */ \ ++ INTEL_VGA_DEVICE(0x161E, info), /* GT2 ULX */ \ ++ INTEL_VGA_DEVICE(0x160A, info), /* GT1 Server */ \ ++ INTEL_VGA_DEVICE(0x160D, info), /* GT1 Workstation */ \ ++ INTEL_VGA_DEVICE(0x161A, info), /* GT2 Server */ \ ++ INTEL_VGA_DEVICE(0x161D, info) /* GT2 Workstation */ ++ ++#define INTEL_BDW_GT3_IDS(info) \ ++ INTEL_VGA_DEVICE(0x1622, info), /* ULT */ \ ++ INTEL_VGA_DEVICE(0x1626, info), /* ULT */ \ ++ INTEL_VGA_DEVICE(0x162B, info), /* Iris */ \ ++ INTEL_VGA_DEVICE(0x162E, info), /* ULX */\ ++ INTEL_VGA_DEVICE(0x162A, info), /* Server */ \ ++ INTEL_VGA_DEVICE(0x162D, info) /* Workstation */ ++ ++#define INTEL_BDW_RSVD_IDS(info) \ ++ INTEL_VGA_DEVICE(0x1632, info), /* ULT */ \ ++ INTEL_VGA_DEVICE(0x1636, info), /* ULT */ \ ++ INTEL_VGA_DEVICE(0x163B, info), /* Iris */ \ ++ INTEL_VGA_DEVICE(0x163E, info), /* ULX */ \ ++ INTEL_VGA_DEVICE(0x163A, info), /* Server */ \ ++ INTEL_VGA_DEVICE(0x163D, info) /* Workstation */ ++ ++#define INTEL_BDW_IDS(info) \ ++ INTEL_BDW_GT12_IDS(info), \ ++ INTEL_BDW_GT3_IDS(info), \ ++ INTEL_BDW_RSVD_IDS(info) + + #define INTEL_CHV_IDS(info) \ + INTEL_VGA_DEVICE(0x22b0, info), \ +@@ -259,21 +245,85 @@ + INTEL_VGA_DEVICE(0x22b2, info), \ + INTEL_VGA_DEVICE(0x22b3, info) + +-#define INTEL_SKL_IDS(info) \ +- INTEL_VGA_DEVICE(0x1916, info), /* ULT GT2 */ \ ++#define INTEL_SKL_GT1_IDS(info) \ + INTEL_VGA_DEVICE(0x1906, info), /* ULT GT1 */ \ +- INTEL_VGA_DEVICE(0x1926, info), /* ULT GT3 */ \ +- INTEL_VGA_DEVICE(0x1921, info), /* ULT GT2F */ \ + INTEL_VGA_DEVICE(0x190E, info), /* ULX GT1 */ \ ++ INTEL_VGA_DEVICE(0x1902, info), /* DT GT1 */ \ ++ INTEL_VGA_DEVICE(0x190B, info), /* Halo GT1 */ \ ++ INTEL_VGA_DEVICE(0x190A, info) /* SRV GT1 */ ++ ++#define INTEL_SKL_GT2_IDS(info) \ ++ INTEL_VGA_DEVICE(0x1916, info), /* ULT GT2 */ \ ++ INTEL_VGA_DEVICE(0x1921, info), /* ULT GT2F */ \ + INTEL_VGA_DEVICE(0x191E, info), /* ULX GT2 */ \ + INTEL_VGA_DEVICE(0x1912, info), /* DT GT2 */ \ +- INTEL_VGA_DEVICE(0x1902, info), /* DT GT1 */ \ + INTEL_VGA_DEVICE(0x191B, info), /* Halo GT2 */ \ +- INTEL_VGA_DEVICE(0x192B, info), /* Halo GT3 */ \ +- INTEL_VGA_DEVICE(0x190B, info), /* Halo GT1 */ \ + INTEL_VGA_DEVICE(0x191A, info), /* SRV GT2 */ \ +- INTEL_VGA_DEVICE(0x192A, info), /* SRV GT3 */ \ +- INTEL_VGA_DEVICE(0x190A, info), /* SRV GT1 */ \ + INTEL_VGA_DEVICE(0x191D, info) /* WKS GT2 */ + ++#define INTEL_SKL_GT3_IDS(info) \ ++ INTEL_VGA_DEVICE(0x1923, info), /* ULT GT3 */ \ ++ INTEL_VGA_DEVICE(0x1926, info), /* ULT GT3 */ \ ++ INTEL_VGA_DEVICE(0x1927, info), /* ULT GT3 */ \ ++ INTEL_VGA_DEVICE(0x192B, info), /* Halo GT3 */ \ ++ INTEL_VGA_DEVICE(0x192D, info) /* SRV GT3 */ ++ ++#define INTEL_SKL_GT4_IDS(info) \ ++ INTEL_VGA_DEVICE(0x1932, info), /* DT GT4 */ \ ++ INTEL_VGA_DEVICE(0x193B, info), /* Halo GT4 */ \ ++ INTEL_VGA_DEVICE(0x193D, info), /* WKS GT4 */ \ ++ INTEL_VGA_DEVICE(0x192A, info), /* SRV GT4 */ \ ++ INTEL_VGA_DEVICE(0x193A, info) /* SRV GT4e */ ++ ++#define INTEL_SKL_IDS(info) \ ++ INTEL_SKL_GT1_IDS(info), \ ++ INTEL_SKL_GT2_IDS(info), \ ++ INTEL_SKL_GT3_IDS(info), \ ++ INTEL_SKL_GT4_IDS(info) ++ ++#define INTEL_BXT_IDS(info) \ ++ INTEL_VGA_DEVICE(0x0A84, info), \ ++ INTEL_VGA_DEVICE(0x1A84, info), \ ++ INTEL_VGA_DEVICE(0x1A85, info), \ ++ INTEL_VGA_DEVICE(0x5A84, info), /* APL HD Graphics 505 */ \ ++ INTEL_VGA_DEVICE(0x5A85, info) /* APL HD Graphics 500 */ ++ ++#define INTEL_GLK_IDS(info) \ ++ INTEL_VGA_DEVICE(0x3184, info), \ ++ INTEL_VGA_DEVICE(0x3185, info) ++ ++#define INTEL_KBL_GT1_IDS(info) \ ++ INTEL_VGA_DEVICE(0x5913, info), /* ULT GT1.5 */ \ ++ INTEL_VGA_DEVICE(0x5915, info), /* ULX GT1.5 */ \ ++ INTEL_VGA_DEVICE(0x5917, info), /* DT GT1.5 */ \ ++ INTEL_VGA_DEVICE(0x5906, info), /* ULT GT1 */ \ ++ INTEL_VGA_DEVICE(0x590E, info), /* ULX GT1 */ \ ++ INTEL_VGA_DEVICE(0x5902, info), /* DT GT1 */ \ ++ INTEL_VGA_DEVICE(0x5908, info), /* Halo GT1 */ \ ++ INTEL_VGA_DEVICE(0x590B, info), /* Halo GT1 */ \ ++ INTEL_VGA_DEVICE(0x590A, info) /* SRV GT1 */ ++ ++#define INTEL_KBL_GT2_IDS(info) \ ++ INTEL_VGA_DEVICE(0x5916, info), /* ULT GT2 */ \ ++ INTEL_VGA_DEVICE(0x5921, info), /* ULT GT2F */ \ ++ INTEL_VGA_DEVICE(0x591E, info), /* ULX GT2 */ \ ++ INTEL_VGA_DEVICE(0x5912, info), /* DT GT2 */ \ ++ INTEL_VGA_DEVICE(0x591B, info), /* Halo GT2 */ \ ++ INTEL_VGA_DEVICE(0x591A, info), /* SRV GT2 */ \ ++ INTEL_VGA_DEVICE(0x591D, info) /* WKS GT2 */ ++ ++#define INTEL_KBL_GT3_IDS(info) \ ++ INTEL_VGA_DEVICE(0x5923, info), /* ULT GT3 */ \ ++ INTEL_VGA_DEVICE(0x5926, info), /* ULT GT3 */ \ ++ INTEL_VGA_DEVICE(0x5927, info) /* ULT GT3 */ ++ ++#define INTEL_KBL_GT4_IDS(info) \ ++ INTEL_VGA_DEVICE(0x593B, info) /* Halo GT4 */ ++ ++#define INTEL_KBL_IDS(info) \ ++ INTEL_KBL_GT1_IDS(info), \ ++ INTEL_KBL_GT2_IDS(info), \ ++ INTEL_KBL_GT3_IDS(info), \ ++ INTEL_KBL_GT4_IDS(info) ++ + #endif /* _I915_PCIIDS_H */ +diff --git a/src/intel_device.c b/src/intel_device.c +index 140e1536..c4910cd8 100644 +--- a/src/intel_device.c ++++ b/src/intel_device.c +@@ -38,6 +38,12 @@ + #include <dirent.h> + #include <errno.h> + ++#if MAJOR_IN_MKDEV ++#include <sys/mkdev.h> ++#elif MAJOR_IN_SYSMACROS ++#include <sys/sysmacros.h> ++#endif ++ + #include <pciaccess.h> + + #include <xorg-server.h> +@@ -197,9 +203,15 @@ static inline struct intel_device *intel_device(ScrnInfoPtr scrn) + return xf86GetEntityPrivate(scrn->entityList[0], intel_device_key)->ptr; + } + ++static const char *kernel_module_names[] ={ ++ "i915", ++ NULL, ++}; ++ + static int is_i915_device(int fd) + { + drm_version_t version; ++ const char **kn; + char name[5] = ""; + + memset(&version, 0, sizeof(version)); +@@ -209,7 +221,22 @@ static int is_i915_device(int fd) + if (drmIoctl(fd, DRM_IOCTL_VERSION, &version)) + return 0; + +- return strcmp("i915", name) == 0; ++ for (kn = kernel_module_names; *kn; kn++) ++ if (strcmp(*kn, name) == 0) ++ return 1; ++ ++ return 0; ++} ++ ++static int load_i915_kernel_module(void) ++{ ++ const char **kn; ++ ++ for (kn = kernel_module_names; *kn; kn++) ++ if (xf86LoadKernelModule(*kn)) ++ return 0; ++ ++ return -1; + } + + static int is_i915_gem(int fd) +@@ -336,7 +363,7 @@ static int __intel_open_device__pci(const struct pci_device *pci) + + sprintf(path + base, "driver"); + if (stat(path, &st)) { +- if (xf86LoadKernelModule("i915")) ++ if (load_i915_kernel_module()) + return -1; + (void)xf86LoadKernelModule("fbcon"); + } +@@ -399,7 +426,7 @@ static int __intel_open_device__legacy(const struct pci_device *pci) + + ret = drmCheckModesettingSupported(id); + if (ret) { +- if (xf86LoadKernelModule("i915")) ++ if (load_i915_kernel_module() == 0) + ret = drmCheckModesettingSupported(id); + if (ret) + return -1; +@@ -461,9 +488,9 @@ static int is_render_node(int fd, struct stat *st) + + static char *find_render_node(int fd) + { +-#if defined(USE_RENDERNODE) + struct stat master, render; + char buf[128]; ++ int i; + + /* Are we a render-node ourselves? */ + if (is_render_node(fd, &master)) +@@ -472,9 +499,17 @@ static char *find_render_node(int fd) + sprintf(buf, "/dev/dri/renderD%d", (int)((master.st_rdev | 0x80) & 0xbf)); + if (stat(buf, &render) == 0 && + master.st_mode == render.st_mode && +- render.st_rdev == ((master.st_rdev | 0x80) & 0xbf)) ++ render.st_rdev == (master.st_rdev | 0x80)) + return strdup(buf); +-#endif ++ ++ /* Misaligned card <-> renderD, do a full search */ ++ for (i = 0; i < 16; i++) { ++ sprintf(buf, "/dev/dri/renderD%d", i + 128); ++ if (stat(buf, &render) == 0 && ++ master.st_mode == render.st_mode && ++ render.st_rdev == (master.st_rdev | 0x80)) ++ return strdup(buf); ++ } + + return NULL; + } +@@ -608,6 +643,27 @@ err_path: + return -1; + } + ++void intel_close_device(int entity_num) ++{ ++ struct intel_device *dev; ++ ++ if (intel_device_key == -1) ++ return; ++ ++ dev = xf86GetEntityPrivate(entity_num, intel_device_key)->ptr; ++ xf86GetEntityPrivate(entity_num, intel_device_key)->ptr = NULL; ++ if (!dev) ++ return; ++ ++ if (dev->master_count == 0) /* Don't close server-fds */ ++ close(dev->fd); ++ ++ if (dev->render_node != dev->master_node) ++ free(dev->render_node); ++ free(dev->master_node); ++ free(dev); ++} ++ + int __intel_peek_fd(ScrnInfoPtr scrn) + { + struct intel_device *dev; +@@ -672,6 +728,12 @@ struct intel_device *intel_get_device(ScrnInfoPtr scrn, int *fd) + return dev; + } + ++const char *intel_get_master_name(struct intel_device *dev) ++{ ++ assert(dev && dev->master_node); ++ return dev->master_node; ++} ++ + const char *intel_get_client_name(struct intel_device *dev) + { + assert(dev && dev->render_node); +diff --git a/src/intel_driver.h b/src/intel_driver.h +index 28ed1a0e..bece88a0 100644 +--- a/src/intel_driver.h ++++ b/src/intel_driver.h +@@ -124,9 +124,11 @@ int intel_entity_get_devid(int index); + int intel_open_device(int entity_num, + const struct pci_device *pci, + struct xf86_platform_device *dev); ++void intel_close_device(int entity_num); + int __intel_peek_fd(ScrnInfoPtr scrn); + struct intel_device *intel_get_device(ScrnInfoPtr scrn, int *fd); + int intel_has_render_node(struct intel_device *dev); ++const char *intel_get_master_name(struct intel_device *dev); + const char *intel_get_client_name(struct intel_device *dev); + int intel_get_client_fd(struct intel_device *dev); + int intel_get_device_id(struct intel_device *dev); +diff --git a/src/intel_list.h b/src/intel_list.h +index 51af825d..c8a3187a 100644 +--- a/src/intel_list.h ++++ b/src/intel_list.h +@@ -306,8 +306,7 @@ list_is_empty(const struct list *head) + list_entry((ptr)->prev, type, member) + + #define __container_of(ptr, sample, member) \ +- (void *)((char *)(ptr) \ +- - ((char *)&(sample)->member - (char *)(sample))) ++ (void *)((char *)(ptr) - ((char *)&(sample)->member - (char *)(sample))) + /** + * Loop through the list given by head and set pos to struct in the list. + * +@@ -392,17 +391,50 @@ static inline void list_move_tail(struct list *list, struct list *head) + #define list_last_entry(ptr, type, member) \ + list_entry((ptr)->prev, type, member) + +-#define list_for_each_entry_reverse(pos, head, member) \ ++#define list_for_each_entry_reverse(pos, head, member) \ + for (pos = __container_of((head)->prev, pos, member); \ + &pos->member != (head); \ + pos = __container_of(pos->member.prev, pos, member)) + + #endif + ++#define list_for_each_entry_safe_from(pos, tmp, head, member) \ ++ for (tmp = __container_of(pos->member.next, pos, member); \ ++ &pos->member != (head); \ ++ pos = tmp, tmp = __container_of(tmp->member.next, tmp, member)) ++ + #undef container_of + #define container_of(ptr, type, member) \ + ((type *)((char *)(ptr) - (char *) &((type *)0)->member)) + ++static inline void __list_splice(const struct list *list, ++ struct list *prev, ++ struct list *next) ++{ ++ struct list *first = list->next; ++ struct list *last = list->prev; ++ ++ first->prev = prev; ++ prev->next = first; ++ ++ last->next = next; ++ next->prev = last; ++} ++ ++static inline void list_splice(const struct list *list, ++ struct list *head) ++{ ++ if (!list_is_empty(list)) ++ __list_splice(list, head, head->next); ++} ++ ++static inline void list_splice_tail(const struct list *list, ++ struct list *head) ++{ ++ if (!list_is_empty(list)) ++ __list_splice(list, head->prev, head); ++} ++ + static inline int list_is_singular(const struct list *list) + { + return list->next == list->prev; +diff --git a/src/intel_module.c b/src/intel_module.c +index 102d52aa..2e97b5ea 100644 +--- a/src/intel_module.c ++++ b/src/intel_module.c +@@ -126,6 +126,17 @@ static const struct intel_device_info intel_skylake_info = { + .gen = 0110, + }; + ++static const struct intel_device_info intel_broxton_info = { ++ .gen = 0111, ++}; ++ ++static const struct intel_device_info intel_kabylake_info = { ++ .gen = 0112, ++}; ++ ++static const struct intel_device_info intel_geminilake_info = { ++ .gen = 0113, ++}; + + static const SymTabRec intel_chipsets[] = { + {PCI_CHIP_I810, "i810"}, +@@ -234,30 +245,63 @@ static const SymTabRec intel_chipsets[] = { + {0x0157, "HD Graphics"}, + + /* Broadwell Marketing names */ +- {0x1602, "HD graphics"}, +- {0x1606, "HD graphics"}, +- {0x160B, "HD graphics"}, +- {0x160A, "HD graphics"}, +- {0x160D, "HD graphics"}, +- {0x160E, "HD graphics"}, +- {0x1612, "HD graphics 5600"}, +- {0x1616, "HD graphics 5500"}, +- {0x161B, "HD graphics"}, +- {0x161A, "HD graphics"}, +- {0x161D, "HD graphics"}, +- {0x161E, "HD graphics 5300"}, +- {0x1622, "Iris Pro graphics 6200"}, +- {0x1626, "HD graphics 6000"}, +- {0x162B, "Iris graphics 6100"}, +- {0x162A, "Iris Pro graphics P6300"}, +- {0x162D, "HD graphics"}, +- {0x162E, "HD graphics"}, +- {0x1632, "HD graphics"}, +- {0x1636, "HD graphics"}, +- {0x163B, "HD graphics"}, +- {0x163A, "HD graphics"}, +- {0x163D, "HD graphics"}, +- {0x163E, "HD graphics"}, ++ {0x1602, "HD Graphics"}, ++ {0x1606, "HD Graphics"}, ++ {0x160B, "HD Graphics"}, ++ {0x160A, "HD Graphics"}, ++ {0x160D, "HD Graphics"}, ++ {0x160E, "HD Graphics"}, ++ {0x1612, "HD Graphics 5600"}, ++ {0x1616, "HD Graphics 5500"}, ++ {0x161B, "HD Graphics"}, ++ {0x161A, "HD Graphics"}, ++ {0x161D, "HD Graphics"}, ++ {0x161E, "HD Graphics 5300"}, ++ {0x1622, "Iris Pro Graphics 6200"}, ++ {0x1626, "HD Graphics 6000"}, ++ {0x162B, "Iris Graphics 6100"}, ++ {0x162A, "Iris Pro Graphics P6300"}, ++ {0x162D, "HD Graphics"}, ++ {0x162E, "HD Graphics"}, ++ {0x1632, "HD Graphics"}, ++ {0x1636, "HD Graphics"}, ++ {0x163B, "HD Graphics"}, ++ {0x163A, "HD Graphics"}, ++ {0x163D, "HD Graphics"}, ++ {0x163E, "HD Graphics"}, ++ ++ /* Cherryview (Cherrytrail/Braswell) */ ++ {0x22b0, "HD Graphics"}, ++ {0x22b1, "HD Graphics"}, ++ {0x22b2, "HD Graphics"}, ++ {0x22b3, "HD Graphics"}, ++ ++ /* Skylake */ ++ {0x1902, "HD Graphics 510"}, ++ {0x1906, "HD Graphics 510"}, ++ {0x190B, "HD Graphics 510"}, ++ {0x1912, "HD Graphics 530"}, ++ {0x1916, "HD Graphics 520"}, ++ {0x191B, "HD Graphics 530"}, ++ {0x191D, "HD Graphics P530"}, ++ {0x191E, "HD Graphics 515"}, ++ {0x1921, "HD Graphics 520"}, ++ {0x1926, "Iris Graphics 540"}, ++ {0x1927, "Iris Graphics 550"}, ++ {0x192B, "Iris Graphics 555"}, ++ {0x192D, "Iris Graphics P555"}, ++ {0x1932, "Iris Pro Graphics 580"}, ++ {0x193A, "Iris Pro Graphics P580"}, ++ {0x193B, "Iris Pro Graphics 580"}, ++ {0x193D, "Iris Pro Graphics P580"}, ++ ++ /* Broxton (Apollolake) */ ++ {0x5A84, "HD Graphics 505"}, ++ {0x5A85, "HD Graphics 500"}, ++ ++ /* Kabylake */ ++ {0x5916, "HD Graphics 620"}, ++ {0x591E, "HD Graphics 615"}, + + /* When adding new identifiers, also update: + * 1. intel_identify() +@@ -305,18 +349,14 @@ static const struct pci_id_match intel_device_match[] = { + INTEL_IVB_D_IDS(&intel_ivybridge_info), + INTEL_IVB_M_IDS(&intel_ivybridge_info), + +- INTEL_HSW_D_IDS(&intel_haswell_info), +- INTEL_HSW_M_IDS(&intel_haswell_info), +- +- INTEL_VLV_D_IDS(&intel_valleyview_info), +- INTEL_VLV_M_IDS(&intel_valleyview_info), +- +- INTEL_BDW_D_IDS(&intel_broadwell_info), +- INTEL_BDW_M_IDS(&intel_broadwell_info), +- ++ INTEL_HSW_IDS(&intel_haswell_info), ++ INTEL_VLV_IDS(&intel_valleyview_info), ++ INTEL_BDW_IDS(&intel_broadwell_info), + INTEL_CHV_IDS(&intel_cherryview_info), +- + INTEL_SKL_IDS(&intel_skylake_info), ++ INTEL_BXT_IDS(&intel_broxton_info), ++ INTEL_KBL_IDS(&intel_kabylake_info), ++ INTEL_GLK_IDS(&intel_geminilake_info), + + INTEL_VGA_DEVICE(PCI_MATCH_ANY, &intel_generic_info), + #endif +@@ -448,9 +488,9 @@ static void intel_identify(int flags) + if (unique != stack) + free(unique); + +- xf86Msg(X_INFO, INTEL_NAME ": Driver for Intel(R) HD Graphics: 2000-6000\n"); +- xf86Msg(X_INFO, INTEL_NAME ": Driver for Intel(R) Iris(TM) Graphics: 5100, 6100\n"); +- xf86Msg(X_INFO, INTEL_NAME ": Driver for Intel(R) Iris(TM) Pro Graphics: 5200, 6200, P6300\n"); ++ xf86Msg(X_INFO, INTEL_NAME ": Driver for Intel(R) HD Graphics\n"); ++ xf86Msg(X_INFO, INTEL_NAME ": Driver for Intel(R) Iris(TM) Graphics\n"); ++ xf86Msg(X_INFO, INTEL_NAME ": Driver for Intel(R) Iris(TM) Pro Graphics\n"); + } + + static Bool intel_driver_func(ScrnInfoPtr pScrn, +@@ -508,6 +548,9 @@ static enum accel_method { NOACCEL, SNA, UXA } get_accel_method(void) + if (hosted()) + return SNA; + ++ if (xf86configptr == NULL) /* X -configure */ ++ return SNA; ++ + dev = _xf86findDriver("intel", xf86configptr->conf_device_lst); + if (dev && dev->dev_option_lst) { + const char *s; +@@ -582,10 +625,17 @@ intel_scrn_create(DriverPtr driver, + case NOACCEL: + #endif + case UXA: +- return intel_init_scrn(scrn); ++ return intel_init_scrn(scrn); + #endif + +- default: break; ++ default: ++#if USE_SNA ++ return sna_init_scrn(scrn, entity_num); ++#elif USE_UXA ++ return intel_init_scrn(scrn); ++#else ++ break; ++#endif + } + #endif + +@@ -604,6 +654,8 @@ static Bool intel_pci_probe(DriverPtr driver, + struct pci_device *pci, + intptr_t match_data) + { ++ Bool ret; ++ + if (intel_open_device(entity_num, pci, NULL) == -1) { + #if UMS + switch (pci->device_id) { +@@ -621,7 +673,11 @@ static Bool intel_pci_probe(DriverPtr driver, + #endif + } + +- return intel_scrn_create(driver, entity_num, match_data, 0); ++ ret = intel_scrn_create(driver, entity_num, match_data, 0); ++ if (!ret) ++ intel_close_device(entity_num); ++ ++ return ret; + } + + #ifdef XSERVER_PLATFORM_BUS +@@ -644,9 +700,16 @@ intel_platform_probe(DriverPtr driver, + + /* if we get any flags we don't understand fail to probe for now */ + if (flags) +- return FALSE; ++ goto err; ++ ++ if (!intel_scrn_create(driver, entity_num, match_data, scrn_flags)) ++ goto err; + +- return intel_scrn_create(driver, entity_num, match_data, scrn_flags); ++ return TRUE; ++ ++err: ++ intel_close_device(entity_num); ++ return FALSE; + } + #endif + +diff --git a/src/intel_options.c b/src/intel_options.c +index ff8541a4..7f253ac1 100644 +--- a/src/intel_options.c ++++ b/src/intel_options.c +@@ -2,18 +2,24 @@ + #include "config.h" + #endif + ++#include <xorg-server.h> ++#include <xorgVersion.h> ++#include <xf86Parser.h> ++ + #include "intel_options.h" + + const OptionInfoRec intel_options[] = { +- {OPTION_ACCEL_DISABLE, "NoAccel", OPTV_BOOLEAN, {0}, 0}, ++ {OPTION_ACCEL_ENABLE, "Accel", OPTV_BOOLEAN, {0}, 0}, + {OPTION_ACCEL_METHOD, "AccelMethod", OPTV_STRING, {0}, 0}, + {OPTION_BACKLIGHT, "Backlight", OPTV_STRING, {0}, 0}, ++ {OPTION_EDID, "CustomEDID", OPTV_STRING, {0}, 0}, + {OPTION_DRI, "DRI", OPTV_STRING, {0}, 0}, + {OPTION_PRESENT, "Present", OPTV_BOOLEAN, {0}, 1}, + {OPTION_COLOR_KEY, "ColorKey", OPTV_INTEGER, {0}, 0}, + {OPTION_VIDEO_KEY, "VideoKey", OPTV_INTEGER, {0}, 0}, + {OPTION_TILING_2D, "Tiling", OPTV_BOOLEAN, {0}, 1}, + {OPTION_TILING_FB, "LinearFramebuffer", OPTV_BOOLEAN, {0}, 0}, ++ {OPTION_ROTATION, "HWRotation", OPTV_BOOLEAN, {0}, 1}, + {OPTION_VSYNC, "VSync", OPTV_BOOLEAN, {0}, 1}, + {OPTION_PAGEFLIP, "PageFlip", OPTV_BOOLEAN, {0}, 1}, + {OPTION_SWAPBUFFERS_WAIT, "SwapbuffersWait", OPTV_BOOLEAN, {0}, 1}, +@@ -21,7 +27,6 @@ const OptionInfoRec intel_options[] = { + {OPTION_PREFER_OVERLAY, "XvPreferOverlay", OPTV_BOOLEAN, {0}, 0}, + {OPTION_HOTPLUG, "HotPlug", OPTV_BOOLEAN, {0}, 1}, + {OPTION_REPROBE, "ReprobeOutputs", OPTV_BOOLEAN, {0}, 0}, +- {OPTION_DELETE_DP12, "DeleteUnusedDP12Displays", OPTV_BOOLEAN, {0}, 0}, + #ifdef INTEL_XVMC + {OPTION_XVMC, "XvMC", OPTV_BOOLEAN, {0}, 1}, + #endif +@@ -54,3 +59,85 @@ OptionInfoPtr intel_options_get(ScrnInfoPtr scrn) + + return options; + } ++ ++Bool intel_option_cast_to_bool(OptionInfoPtr options, int id, Bool val) ++{ ++#if XORG_VERSION_CURRENT >= XORG_VERSION_NUMERIC(1,7,99,901,0) ++ xf86getBoolValue(&val, xf86GetOptValString(options, id)); ++#endif ++ return val; ++} ++ ++static int ++namecmp(const char *s1, const char *s2) ++{ ++ char c1, c2; ++ ++ if (!s1 || *s1 == 0) { ++ if (!s2 || *s2 == 0) ++ return 0; ++ else ++ return 1; ++ } ++ ++ while (*s1 == '_' || *s1 == ' ' || *s1 == '\t') ++ s1++; ++ ++ while (*s2 == '_' || *s2 == ' ' || *s2 == '\t') ++ s2++; ++ ++ c1 = isupper(*s1) ? tolower(*s1) : *s1; ++ c2 = isupper(*s2) ? tolower(*s2) : *s2; ++ while (c1 == c2) { ++ if (c1 == '\0') ++ return 0; ++ ++ s1++; ++ while (*s1 == '_' || *s1 == ' ' || *s1 == '\t') ++ s1++; ++ ++ s2++; ++ while (*s2 == '_' || *s2 == ' ' || *s2 == '\t') ++ s2++; ++ ++ c1 = isupper(*s1) ? tolower(*s1) : *s1; ++ c2 = isupper(*s2) ? tolower(*s2) : *s2; ++ } ++ ++ return c1 - c2; ++} ++ ++unsigned intel_option_cast_to_unsigned(OptionInfoPtr options, int id, unsigned val) ++{ ++#if XORG_VERSION_CURRENT >= XORG_VERSION_NUMERIC(1,7,99,901,0) ++ const char *str = xf86GetOptValString(options, id); ++#else ++ const char *str = NULL; ++#endif ++ unsigned v; ++ ++ if (str == NULL || *str == '\0') ++ return val; ++ ++ if (namecmp(str, "on") == 0) ++ return val; ++ if (namecmp(str, "true") == 0) ++ return val; ++ if (namecmp(str, "yes") == 0) ++ return val; ++ ++ if (namecmp(str, "0") == 0) ++ return 0; ++ if (namecmp(str, "off") == 0) ++ return 0; ++ if (namecmp(str, "false") == 0) ++ return 0; ++ if (namecmp(str, "no") == 0) ++ return 0; ++ ++ v = atoi(str); ++ if (v) ++ return v; ++ ++ return val; ++} +diff --git a/src/intel_options.h b/src/intel_options.h +index 7e2cbd9b..43635f1f 100644 +--- a/src/intel_options.h ++++ b/src/intel_options.h +@@ -12,15 +12,17 @@ + */ + + enum intel_options { +- OPTION_ACCEL_DISABLE, ++ OPTION_ACCEL_ENABLE, + OPTION_ACCEL_METHOD, + OPTION_BACKLIGHT, ++ OPTION_EDID, + OPTION_DRI, + OPTION_PRESENT, + OPTION_VIDEO_KEY, + OPTION_COLOR_KEY, + OPTION_TILING_2D, + OPTION_TILING_FB, ++ OPTION_ROTATION, + OPTION_VSYNC, + OPTION_PAGEFLIP, + OPTION_SWAPBUFFERS_WAIT, +@@ -28,7 +30,6 @@ enum intel_options { + OPTION_PREFER_OVERLAY, + OPTION_HOTPLUG, + OPTION_REPROBE, +- OPTION_DELETE_DP12, + #if defined(XvMCExtension) && defined(ENABLE_XVMC) + OPTION_XVMC, + #define INTEL_XVMC 1 +@@ -51,5 +52,7 @@ enum intel_options { + + extern const OptionInfoRec intel_options[]; + OptionInfoPtr intel_options_get(ScrnInfoPtr scrn); ++unsigned intel_option_cast_to_unsigned(OptionInfoPtr, int id, unsigned val); ++Bool intel_option_cast_to_bool(OptionInfoPtr, int id, Bool val); + + #endif /* INTEL_OPTIONS_H */ +diff --git a/src/legacy/i810/i810_common.h b/src/legacy/i810/i810_common.h +index 4cc10e8b..8355708c 100644 +--- a/src/legacy/i810/i810_common.h ++++ b/src/legacy/i810/i810_common.h +@@ -52,7 +52,7 @@ + + #define ALIGN(i,m) (((i) + (m) - 1) & ~((m) - 1)) + +-/* Using usleep() makes things noticably slow. */ ++/* Using usleep() makes things noticeably slow. */ + #if 0 + #define DELAY(x) usleep(x) + #else +@@ -185,7 +185,7 @@ enum { + * - zbuffer linear offset and pitch -- also invarient + * - drawing origin in back and depth buffers. + * +- * Keep the depth/back buffer state here to acommodate private buffers ++ * Keep the depth/back buffer state here to accommodate private buffers + * in the future. + */ + #define I810_DESTREG_DI0 0 /* CMD_OP_DESTBUFFER_INFO (2 dwords) */ +diff --git a/src/legacy/i810/i810_hwmc.c b/src/legacy/i810/i810_hwmc.c +index 7cb9c1ab..58661b0a 100644 +--- a/src/legacy/i810/i810_hwmc.c ++++ b/src/legacy/i810/i810_hwmc.c +@@ -171,7 +171,7 @@ static XF86MCAdaptorPtr ppAdapt[1] = + * + * I810InitMC + * +- * Initialize the hardware motion compenstation extention for this ++ * Initialize the hardware motion compensation extension for this + * hardware. The initialization routines want the address of the pointers + * to the structures, not the address of the structures. This means we + * allocate (or create static?) the pointer memory and pass that +diff --git a/src/legacy/i810/i810_memory.c b/src/legacy/i810/i810_memory.c +index c3de2777..6f274836 100644 +--- a/src/legacy/i810/i810_memory.c ++++ b/src/legacy/i810/i810_memory.c +@@ -76,7 +76,7 @@ I810AllocateGARTMemory(ScrnInfoPtr pScrn) + unsigned long size = pScrn->videoRam * 1024UL; + I810Ptr pI810 = I810PTR(pScrn); + int key; +- long tom = 0; ++ unsigned long tom = 0; + unsigned long physical; + + if (!xf86AgpGARTSupported() || !xf86AcquireGART(pScrn->scrnIndex)) { +@@ -132,8 +132,8 @@ I810AllocateGARTMemory(ScrnInfoPtr pScrn) + * Keep it 512K aligned for the sake of tiled regions. + */ + +- tom += 0x7ffff; +- tom &= ~0x7ffff; ++ tom += 0x7ffffUL; ++ tom &= ~0x7ffffUL; + + if ((key = xf86AllocateGARTMemory(pScrn->scrnIndex, size, 1, NULL)) != -1) { + pI810->DcacheOffset = tom; +diff --git a/src/legacy/i810/i810_reg.h b/src/legacy/i810/i810_reg.h +index 54faeb3d..fa091c5b 100644 +--- a/src/legacy/i810/i810_reg.h ++++ b/src/legacy/i810/i810_reg.h +@@ -245,7 +245,7 @@ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * not sure they refer to local (graphics) memory. + * + * These details are for the local memory control registers, +- * (pp301-310). The test machines are not equiped with local memory, ++ * (pp301-310). The test machines are not equipped with local memory, + * so nothing is tested. Only a single row seems to be supported. + */ + #define DRAM_ROW_TYPE 0x3000 +diff --git a/src/legacy/i810/i810_video.c b/src/legacy/i810/i810_video.c +index be49b91d..af683c81 100644 +--- a/src/legacy/i810/i810_video.c ++++ b/src/legacy/i810/i810_video.c +@@ -77,7 +77,11 @@ static int I810PutImage( ScrnInfoPtr, + static int I810QueryImageAttributes(ScrnInfoPtr, + int, unsigned short *, unsigned short *, int *, int *); + ++#if !HAVE_NOTIFY_FD + static void I810BlockHandler(BLOCKHANDLER_ARGS_DECL); ++#else ++static void I810BlockHandler(void *data, void *_timeout); ++#endif + + #define MAKE_ATOM(a) MakeAtom(a, sizeof(a) - 1, TRUE) + +@@ -418,8 +422,14 @@ I810SetupImageVideo(ScreenPtr screen) + + pI810->adaptor = adapt; + ++#if !HAVE_NOTIFY_FD + pI810->BlockHandler = screen->BlockHandler; + screen->BlockHandler = I810BlockHandler; ++#else ++ RegisterBlockAndWakeupHandlers(I810BlockHandler, ++ (ServerWakeupHandlerProcPtr)NoopDDA, ++ pScrn); ++#endif + + xvBrightness = MAKE_ATOM("XV_BRIGHTNESS"); + xvContrast = MAKE_ATOM("XV_CONTRAST"); +@@ -1135,6 +1145,7 @@ I810QueryImageAttributes( + return size; + } + ++#if !HAVE_NOTIFY_FD + static void + I810BlockHandler (BLOCKHANDLER_ARGS_DECL) + { +@@ -1172,6 +1183,38 @@ I810BlockHandler (BLOCKHANDLER_ARGS_DECL) + } + } + } ++#else ++static void ++I810BlockHandler(void *data, void *_timeout) ++{ ++ ScrnInfoPtr pScrn = data; ++ I810Ptr pI810 = I810PTR(pScrn); ++ I810PortPrivPtr pPriv = GET_PORT_PRIVATE(pScrn); ++ I810OverlayRegPtr overlay = (I810OverlayRegPtr) (pI810->FbBase + pI810->OverlayStart); ++ ++ if(pPriv->videoStatus & TIMER_MASK) { ++ UpdateCurrentTime(); ++ if(pPriv->videoStatus & OFF_TIMER) { ++ if(pPriv->offTime < currentTime.milliseconds) { ++ /* Turn off the overlay */ ++ overlay->OV0CMD &= 0xFFFFFFFE; ++ OVERLAY_UPDATE(pI810->OverlayPhysical); ++ ++ pPriv->videoStatus = FREE_TIMER; ++ pPriv->freeTime = currentTime.milliseconds + FREE_DELAY; ++ } ++ } else { /* FREE_TIMER */ ++ if(pPriv->freeTime < currentTime.milliseconds) { ++ if(pPriv->linear) { ++ xf86FreeOffscreenLinear(pPriv->linear); ++ pPriv->linear = NULL; ++ } ++ pPriv->videoStatus = 0; ++ } ++ } ++ } ++} ++#endif + + + /*************************************************************************** +@@ -1373,7 +1416,6 @@ I810DisplaySurface( + UpdateCurrentTime(); + pI810Priv->videoStatus = FREE_TIMER; + pI810Priv->freeTime = currentTime.milliseconds + FREE_DELAY; +- pScrn->pScreen->BlockHandler = I810BlockHandler; + } + + return Success; +diff --git a/src/legacy/i810/xvmc/I810XvMC.c b/src/legacy/i810/xvmc/I810XvMC.c +index e6b63d30..a538e999 100644 +--- a/src/legacy/i810/xvmc/I810XvMC.c ++++ b/src/legacy/i810/xvmc/I810XvMC.c +@@ -61,7 +61,7 @@ static int event_base; + // Arguments: pI810XvMC private data structure from the current context. + // Notes: We faked the drmMapBufs for the i810's security so now we have + // to insert an allocated page into the correct spot in the faked +-// list to keep up appearences. ++// list to keep up appearances. + // Concept for this function was taken from Mesa sources. + // Returns: drmBufPtr containing the information about the allocated page. + ***************************************************************************/ +@@ -188,7 +188,7 @@ _X_EXPORT Status XvMCCreateContext(Display *display, XvPortID port, + + /* Check for drm */ + if(! drmAvailable()) { +- printf("Direct Rendering is not avilable on this system!\n"); ++ printf("Direct Rendering is not available on this system!\n"); + return BadAlloc; + } + +@@ -3279,7 +3279,7 @@ _X_EXPORT Status XvMCSyncSurface(Display *display,XvMCSurface *surface) { + // display - Connection to X server + // surface - Surface to flush + // Info: +-// This command is a noop for i810 becuase we always dispatch buffers in ++// This command is a noop for i810 because we always dispatch buffers in + // render. There is little gain to be had with 4k buffers. + // Returns: Status + ***************************************************************************/ +diff --git a/src/render_program/exa_wm.g4i b/src/render_program/exa_wm.g4i +index 5d3d45b1..587b581c 100644 +--- a/src/render_program/exa_wm.g4i ++++ b/src/render_program/exa_wm.g4i +@@ -57,7 +57,7 @@ define(`mask_dw_dy', `g6.4<0,1,0>F') + define(`mask_wo', `g6.12<0,1,0>F') + + /* +- * Local variables. Pairs must be aligned on even reg boundry ++ * Local variables. Pairs must be aligned on even reg boundary + */ + + /* this holds the X dest coordinates */ +diff --git a/src/render_program/exa_wm_yuv_rgb.g8a b/src/render_program/exa_wm_yuv_rgb.g8a +index 7def0930..34973ba8 100644 +--- a/src/render_program/exa_wm_yuv_rgb.g8a ++++ b/src/render_program/exa_wm_yuv_rgb.g8a +@@ -76,7 +76,7 @@ add (16) Cbn<1>F Cb<8,8,1>F -0.501961F { compr align1 }; + /* + * R = Y + Cr * 1.596 + */ +-mov (8) acc0<1>F Yn<8,8,1>F { compr align1 }; ++mov (8) acc0<1>F Yn_01<8,8,1>F { compr align1 }; + mac.sat(8) src_sample_r_01<1>F Crn_01<8,8,1>F 1.596F { compr align1 }; + + mov (8) acc0<1>F Yn_23<8,8,1>F { compr align1 }; +@@ -84,7 +84,7 @@ mac.sat(8) src_sample_r_23<1>F Crn_23<8,8,1>F 1.596F { compr align1 }; + /* + * G = Crn * -0.813 + Cbn * -0.392 + Y + */ +-mov (8) acc0<1>F Yn_23<8,8,1>F { compr align1 }; ++mov (8) acc0<1>F Yn_01<8,8,1>F { compr align1 }; + mac (8) acc0<1>F Crn_01<8,8,1>F -0.813F { compr align1 }; + mac.sat(8) src_sample_g_01<1>F Cbn_01<8,8,1>F -0.392F { compr align1 }; + +diff --git a/src/render_program/exa_wm_yuv_rgb.g8b b/src/render_program/exa_wm_yuv_rgb.g8b +index 44949538..2cd6fc44 100644 +--- a/src/render_program/exa_wm_yuv_rgb.g8b ++++ b/src/render_program/exa_wm_yuv_rgb.g8b +@@ -6,7 +6,7 @@ + { 0x80600048, 0x21c03ae8, 0x3e8d02c0, 0x3fcc49ba }, + { 0x00600001, 0x24003ae0, 0x008d0320, 0x00000000 }, + { 0x80600048, 0x21e03ae8, 0x3e8d02e0, 0x3fcc49ba }, +- { 0x00600001, 0x24003ae0, 0x008d0320, 0x00000000 }, ++ { 0x00600001, 0x24003ae0, 0x008d0300, 0x00000000 }, + { 0x00600048, 0x24003ae0, 0x3e8d02c0, 0xbf5020c5 }, + { 0x80600048, 0x22003ae8, 0x3e8d0340, 0xbec8b439 }, + { 0x00600001, 0x24003ae0, 0x008d0320, 0x00000000 }, +diff --git a/src/sna/Makefile.am b/src/sna/Makefile.am +index e09a8d49..adf13963 100644 +--- a/src/sna/Makefile.am ++++ b/src/sna/Makefile.am +@@ -107,6 +107,8 @@ libsna_la_SOURCES = \ + gen8_render.h \ + gen8_vertex.c \ + gen8_vertex.h \ ++ gen9_render.c \ ++ gen9_render.h \ + xassert.h \ + $(NULL) + +diff --git a/src/sna/blt.c b/src/sna/blt.c +index b5bfee69..cb90437a 100644 +--- a/src/sna/blt.c ++++ b/src/sna/blt.c +@@ -30,112 +30,608 @@ + #endif + + #include "sna.h" ++#include <pixman.h> + +-#if __x86_64__ +-#define USE_SSE2 1 +-#endif +- +-#if USE_SSE2 ++#if defined(sse2) ++#pragma GCC push_options ++#pragma GCC target("sse2,inline-all-stringops,fpmath=sse") ++#pragma GCC optimize("Ofast") + #include <xmmintrin.h> + + #if __x86_64__ + #define have_sse2() 1 + #else +-enum { +- MMX = 0x1, +- MMX_EXTENSIONS = 0x2, +- SSE = 0x6, +- SSE2 = 0x8, +- CMOV = 0x10 +-}; +- +-#ifdef __GNUC__ +-static unsigned int +-detect_cpu_features(void) +-{ +- unsigned int features; +- unsigned int result = 0; +- +- char vendor[13]; +- vendor[0] = 0; +- vendor[12] = 0; +- +- asm ( +- "pushf\n" +- "pop %%eax\n" +- "mov %%eax, %%ecx\n" +- "xor $0x00200000, %%eax\n" +- "push %%eax\n" +- "popf\n" +- "pushf\n" +- "pop %%eax\n" +- "mov $0x0, %%edx\n" +- "xor %%ecx, %%eax\n" +- "jz 1f\n" +- +- "mov $0x00000000, %%eax\n" +- "push %%ebx\n" +- "cpuid\n" +- "mov %%ebx, %%eax\n" +- "pop %%ebx\n" +- "mov %%eax, %1\n" +- "mov %%edx, %2\n" +- "mov %%ecx, %3\n" +- "mov $0x00000001, %%eax\n" +- "push %%ebx\n" +- "cpuid\n" +- "pop %%ebx\n" +- "1:\n" +- "mov %%edx, %0\n" +- : "=r" (result), "=m" (vendor[0]), "=m" (vendor[4]), "=m" (vendor[8]) +- :: "%eax", "%ecx", "%edx"); +- +- features = 0; +- if (result) { +- /* result now contains the standard feature bits */ +- if (result & (1 << 15)) +- features |= CMOV; +- if (result & (1 << 23)) +- features |= MMX; +- if (result & (1 << 25)) +- features |= SSE; +- if (result & (1 << 26)) +- features |= SSE2; +- } +- return features; +-} +-#else +-static unsigned int detect_cpu_features(void) { return 0; } +-#endif +- + static bool have_sse2(void) + { + static int sse2_present = -1; + + if (sse2_present == -1) +- sse2_present = detect_cpu_features() & SSE2; ++ sse2_present = sna_cpu_detect() & SSE2; + + return sse2_present; + } + #endif + +-static inline __m128i ++static force_inline __m128i + xmm_create_mask_32(uint32_t mask) + { + return _mm_set_epi32(mask, mask, mask, mask); + } + +-static inline __m128i ++static force_inline __m128i ++xmm_load_128(const __m128i *src) ++{ ++ return _mm_load_si128(src); ++} ++ ++static force_inline __m128i + xmm_load_128u(const __m128i *src) + { + return _mm_loadu_si128(src); + } + +-static inline void ++static force_inline void + xmm_save_128(__m128i *dst, __m128i data) + { + _mm_store_si128(dst, data); + } ++ ++static force_inline void ++xmm_save_128u(__m128i *dst, __m128i data) ++{ ++ _mm_storeu_si128(dst, data); ++} ++ ++static force_inline void ++to_sse128xN(uint8_t *dst, const uint8_t *src, int bytes) ++{ ++ int i; ++ ++ for (i = 0; i < bytes / 128; i++) { ++ __m128i xmm0, xmm1, xmm2, xmm3; ++ __m128i xmm4, xmm5, xmm6, xmm7; ++ ++ xmm0 = xmm_load_128u((const __m128i*)src + 0); ++ xmm1 = xmm_load_128u((const __m128i*)src + 1); ++ xmm2 = xmm_load_128u((const __m128i*)src + 2); ++ xmm3 = xmm_load_128u((const __m128i*)src + 3); ++ xmm4 = xmm_load_128u((const __m128i*)src + 4); ++ xmm5 = xmm_load_128u((const __m128i*)src + 5); ++ xmm6 = xmm_load_128u((const __m128i*)src + 6); ++ xmm7 = xmm_load_128u((const __m128i*)src + 7); ++ ++ xmm_save_128((__m128i*)dst + 0, xmm0); ++ xmm_save_128((__m128i*)dst + 1, xmm1); ++ xmm_save_128((__m128i*)dst + 2, xmm2); ++ xmm_save_128((__m128i*)dst + 3, xmm3); ++ xmm_save_128((__m128i*)dst + 4, xmm4); ++ xmm_save_128((__m128i*)dst + 5, xmm5); ++ xmm_save_128((__m128i*)dst + 6, xmm6); ++ xmm_save_128((__m128i*)dst + 7, xmm7); ++ ++ dst += 128; ++ src += 128; ++ } ++} ++ ++static force_inline void ++to_sse64(uint8_t *dst, const uint8_t *src) ++{ ++ __m128i xmm1, xmm2, xmm3, xmm4; ++ ++ xmm1 = xmm_load_128u((const __m128i*)src + 0); ++ xmm2 = xmm_load_128u((const __m128i*)src + 1); ++ xmm3 = xmm_load_128u((const __m128i*)src + 2); ++ xmm4 = xmm_load_128u((const __m128i*)src + 3); ++ ++ xmm_save_128((__m128i*)dst + 0, xmm1); ++ xmm_save_128((__m128i*)dst + 1, xmm2); ++ xmm_save_128((__m128i*)dst + 2, xmm3); ++ xmm_save_128((__m128i*)dst + 3, xmm4); ++} ++ ++static force_inline void ++to_sse32(uint8_t *dst, const uint8_t *src) ++{ ++ __m128i xmm1, xmm2; ++ ++ xmm1 = xmm_load_128u((const __m128i*)src + 0); ++ xmm2 = xmm_load_128u((const __m128i*)src + 1); ++ ++ xmm_save_128((__m128i*)dst + 0, xmm1); ++ xmm_save_128((__m128i*)dst + 1, xmm2); ++} ++ ++static force_inline void ++to_sse16(uint8_t *dst, const uint8_t *src) ++{ ++ xmm_save_128((__m128i*)dst, xmm_load_128u((const __m128i*)src)); ++} ++ ++static void to_memcpy(uint8_t *dst, const uint8_t *src, unsigned len) ++{ ++ assert(len); ++ if ((uintptr_t)dst & 15) { ++ if (len <= 16 - ((uintptr_t)dst & 15)) { ++ memcpy(dst, src, len); ++ return; ++ } ++ ++ if ((uintptr_t)dst & 1) { ++ assert(len >= 1); ++ *dst++ = *src++; ++ len--; ++ } ++ if ((uintptr_t)dst & 2) { ++ assert(((uintptr_t)dst & 1) == 0); ++ assert(len >= 2); ++ *(uint16_t *)dst = *(const uint16_t *)src; ++ dst += 2; ++ src += 2; ++ len -= 2; ++ } ++ if ((uintptr_t)dst & 4) { ++ assert(((uintptr_t)dst & 3) == 0); ++ assert(len >= 4); ++ *(uint32_t *)dst = *(const uint32_t *)src; ++ dst += 4; ++ src += 4; ++ len -= 4; ++ } ++ if ((uintptr_t)dst & 8) { ++ assert(((uintptr_t)dst & 7) == 0); ++ assert(len >= 8); ++ *(uint64_t *)dst = *(const uint64_t *)src; ++ dst += 8; ++ src += 8; ++ len -= 8; ++ } ++ } ++ ++ assert(((uintptr_t)dst & 15) == 0); ++ while (len >= 64) { ++ to_sse64(dst, src); ++ dst += 64; ++ src += 64; ++ len -= 64; ++ } ++ if (len == 0) ++ return; ++ ++ if (len & 32) { ++ to_sse32(dst, src); ++ dst += 32; ++ src += 32; ++ } ++ if (len & 16) { ++ to_sse16(dst, src); ++ dst += 16; ++ src += 16; ++ } ++ if (len & 8) { ++ *(uint64_t *)dst = *(uint64_t *)src; ++ dst += 8; ++ src += 8; ++ } ++ if (len & 4) { ++ *(uint32_t *)dst = *(uint32_t *)src; ++ dst += 4; ++ src += 4; ++ } ++ memcpy(dst, src, len & 3); ++} ++ ++static void ++memcpy_to_tiled_x__swizzle_0__sse2(const void *src, void *dst, int bpp, ++ int32_t src_stride, int32_t dst_stride, ++ int16_t src_x, int16_t src_y, ++ int16_t dst_x, int16_t dst_y, ++ uint16_t width, uint16_t height) ++{ ++ const unsigned tile_width = 512; ++ const unsigned tile_height = 8; ++ const unsigned tile_size = 4096; ++ ++ const unsigned cpp = bpp / 8; ++ const unsigned tile_pixels = tile_width / cpp; ++ const unsigned tile_shift = ffs(tile_pixels) - 1; ++ const unsigned tile_mask = tile_pixels - 1; ++ ++ unsigned offset_x, length_x; ++ ++ DBG(("%s(bpp=%d): src=(%d, %d), dst=(%d, %d), size=%dx%d, pitch=%d/%d\n", ++ __FUNCTION__, bpp, src_x, src_y, dst_x, dst_y, width, height, src_stride, dst_stride)); ++ assert(src != dst); ++ ++ if (src_x | src_y) ++ src = (const uint8_t *)src + src_y * src_stride + src_x * cpp; ++ width *= cpp; ++ assert(src_stride >= width); ++ ++ if (dst_x & tile_mask) { ++ offset_x = (dst_x & tile_mask) * cpp; ++ length_x = min(tile_width - offset_x, width); ++ } else ++ length_x = 0; ++ dst = (uint8_t *)dst + (dst_x >> tile_shift) * tile_size; ++ ++ while (height--) { ++ unsigned w = width; ++ const uint8_t *src_row = src; ++ uint8_t *tile_row = dst; ++ ++ src = (const uint8_t *)src + src_stride; ++ ++ tile_row += dst_y / tile_height * dst_stride * tile_height; ++ tile_row += (dst_y & (tile_height-1)) * tile_width; ++ dst_y++; ++ ++ if (length_x) { ++ to_memcpy(tile_row + offset_x, src_row, length_x); ++ ++ tile_row += tile_size; ++ src_row = (const uint8_t *)src_row + length_x; ++ w -= length_x; ++ } ++ while (w >= tile_width) { ++ assert(((uintptr_t)tile_row & (tile_width - 1)) == 0); ++ to_sse128xN(assume_aligned(tile_row, tile_width), ++ src_row, tile_width); ++ tile_row += tile_size; ++ src_row = (const uint8_t *)src_row + tile_width; ++ w -= tile_width; ++ } ++ if (w) { ++ assert(((uintptr_t)tile_row & (tile_width - 1)) == 0); ++ to_memcpy(assume_aligned(tile_row, tile_width), ++ src_row, w); ++ } ++ } ++} ++ ++static force_inline void ++from_sse128xNu(uint8_t *dst, const uint8_t *src, int bytes) ++{ ++ int i; ++ ++ assert(((uintptr_t)src & 15) == 0); ++ ++ for (i = 0; i < bytes / 128; i++) { ++ __m128i xmm0, xmm1, xmm2, xmm3; ++ __m128i xmm4, xmm5, xmm6, xmm7; ++ ++ xmm0 = xmm_load_128((const __m128i*)src + 0); ++ xmm1 = xmm_load_128((const __m128i*)src + 1); ++ xmm2 = xmm_load_128((const __m128i*)src + 2); ++ xmm3 = xmm_load_128((const __m128i*)src + 3); ++ xmm4 = xmm_load_128((const __m128i*)src + 4); ++ xmm5 = xmm_load_128((const __m128i*)src + 5); ++ xmm6 = xmm_load_128((const __m128i*)src + 6); ++ xmm7 = xmm_load_128((const __m128i*)src + 7); ++ ++ xmm_save_128u((__m128i*)dst + 0, xmm0); ++ xmm_save_128u((__m128i*)dst + 1, xmm1); ++ xmm_save_128u((__m128i*)dst + 2, xmm2); ++ xmm_save_128u((__m128i*)dst + 3, xmm3); ++ xmm_save_128u((__m128i*)dst + 4, xmm4); ++ xmm_save_128u((__m128i*)dst + 5, xmm5); ++ xmm_save_128u((__m128i*)dst + 6, xmm6); ++ xmm_save_128u((__m128i*)dst + 7, xmm7); ++ ++ dst += 128; ++ src += 128; ++ } ++} ++ ++static force_inline void ++from_sse128xNa(uint8_t *dst, const uint8_t *src, int bytes) ++{ ++ int i; ++ ++ assert(((uintptr_t)dst & 15) == 0); ++ assert(((uintptr_t)src & 15) == 0); ++ ++ for (i = 0; i < bytes / 128; i++) { ++ __m128i xmm0, xmm1, xmm2, xmm3; ++ __m128i xmm4, xmm5, xmm6, xmm7; ++ ++ xmm0 = xmm_load_128((const __m128i*)src + 0); ++ xmm1 = xmm_load_128((const __m128i*)src + 1); ++ xmm2 = xmm_load_128((const __m128i*)src + 2); ++ xmm3 = xmm_load_128((const __m128i*)src + 3); ++ xmm4 = xmm_load_128((const __m128i*)src + 4); ++ xmm5 = xmm_load_128((const __m128i*)src + 5); ++ xmm6 = xmm_load_128((const __m128i*)src + 6); ++ xmm7 = xmm_load_128((const __m128i*)src + 7); ++ ++ xmm_save_128((__m128i*)dst + 0, xmm0); ++ xmm_save_128((__m128i*)dst + 1, xmm1); ++ xmm_save_128((__m128i*)dst + 2, xmm2); ++ xmm_save_128((__m128i*)dst + 3, xmm3); ++ xmm_save_128((__m128i*)dst + 4, xmm4); ++ xmm_save_128((__m128i*)dst + 5, xmm5); ++ xmm_save_128((__m128i*)dst + 6, xmm6); ++ xmm_save_128((__m128i*)dst + 7, xmm7); ++ ++ dst += 128; ++ src += 128; ++ } ++} ++ ++static force_inline void ++from_sse64u(uint8_t *dst, const uint8_t *src) ++{ ++ __m128i xmm1, xmm2, xmm3, xmm4; ++ ++ assert(((uintptr_t)src & 15) == 0); ++ ++ xmm1 = xmm_load_128((const __m128i*)src + 0); ++ xmm2 = xmm_load_128((const __m128i*)src + 1); ++ xmm3 = xmm_load_128((const __m128i*)src + 2); ++ xmm4 = xmm_load_128((const __m128i*)src + 3); ++ ++ xmm_save_128u((__m128i*)dst + 0, xmm1); ++ xmm_save_128u((__m128i*)dst + 1, xmm2); ++ xmm_save_128u((__m128i*)dst + 2, xmm3); ++ xmm_save_128u((__m128i*)dst + 3, xmm4); ++} ++ ++static force_inline void ++from_sse64a(uint8_t *dst, const uint8_t *src) ++{ ++ __m128i xmm1, xmm2, xmm3, xmm4; ++ ++ assert(((uintptr_t)dst & 15) == 0); ++ assert(((uintptr_t)src & 15) == 0); ++ ++ xmm1 = xmm_load_128((const __m128i*)src + 0); ++ xmm2 = xmm_load_128((const __m128i*)src + 1); ++ xmm3 = xmm_load_128((const __m128i*)src + 2); ++ xmm4 = xmm_load_128((const __m128i*)src + 3); ++ ++ xmm_save_128((__m128i*)dst + 0, xmm1); ++ xmm_save_128((__m128i*)dst + 1, xmm2); ++ xmm_save_128((__m128i*)dst + 2, xmm3); ++ xmm_save_128((__m128i*)dst + 3, xmm4); ++} ++ ++static force_inline void ++from_sse32u(uint8_t *dst, const uint8_t *src) ++{ ++ __m128i xmm1, xmm2; ++ ++ xmm1 = xmm_load_128((const __m128i*)src + 0); ++ xmm2 = xmm_load_128((const __m128i*)src + 1); ++ ++ xmm_save_128u((__m128i*)dst + 0, xmm1); ++ xmm_save_128u((__m128i*)dst + 1, xmm2); ++} ++ ++static force_inline void ++from_sse32a(uint8_t *dst, const uint8_t *src) ++{ ++ __m128i xmm1, xmm2; ++ ++ assert(((uintptr_t)dst & 15) == 0); ++ assert(((uintptr_t)src & 15) == 0); ++ ++ xmm1 = xmm_load_128((const __m128i*)src + 0); ++ xmm2 = xmm_load_128((const __m128i*)src + 1); ++ ++ xmm_save_128((__m128i*)dst + 0, xmm1); ++ xmm_save_128((__m128i*)dst + 1, xmm2); ++} ++ ++static force_inline void ++from_sse16u(uint8_t *dst, const uint8_t *src) ++{ ++ assert(((uintptr_t)src & 15) == 0); ++ ++ xmm_save_128u((__m128i*)dst, xmm_load_128((const __m128i*)src)); ++} ++ ++static force_inline void ++from_sse16a(uint8_t *dst, const uint8_t *src) ++{ ++ assert(((uintptr_t)dst & 15) == 0); ++ assert(((uintptr_t)src & 15) == 0); ++ ++ xmm_save_128((__m128i*)dst, xmm_load_128((const __m128i*)src)); ++} ++ ++static void ++memcpy_from_tiled_x__swizzle_0__sse2(const void *src, void *dst, int bpp, ++ int32_t src_stride, int32_t dst_stride, ++ int16_t src_x, int16_t src_y, ++ int16_t dst_x, int16_t dst_y, ++ uint16_t width, uint16_t height) ++{ ++ const unsigned tile_width = 512; ++ const unsigned tile_height = 8; ++ const unsigned tile_size = 4096; ++ ++ const unsigned cpp = bpp / 8; ++ const unsigned tile_pixels = tile_width / cpp; ++ const unsigned tile_shift = ffs(tile_pixels) - 1; ++ const unsigned tile_mask = tile_pixels - 1; ++ ++ unsigned length_x, offset_x; ++ ++ DBG(("%s(bpp=%d): src=(%d, %d), dst=(%d, %d), size=%dx%d, pitch=%d/%d\n", ++ __FUNCTION__, bpp, src_x, src_y, dst_x, dst_y, width, height, src_stride, dst_stride)); ++ assert(src != dst); ++ ++ if (dst_x | dst_y) ++ dst = (uint8_t *)dst + dst_y * dst_stride + dst_x * cpp; ++ width *= cpp; ++ assert(dst_stride >= width); ++ if (src_x & tile_mask) { ++ offset_x = (src_x & tile_mask) * cpp; ++ length_x = min(tile_width - offset_x, width); ++ dst_stride -= width; ++ dst_stride += (width - length_x) & 15; ++ } else { ++ offset_x = 0; ++ dst_stride -= width & ~15; ++ } ++ assert(dst_stride >= 0); ++ src = (const uint8_t *)src + (src_x >> tile_shift) * tile_size; ++ ++ while (height--) { ++ unsigned w = width; ++ const uint8_t *tile_row = src; ++ ++ tile_row += src_y / tile_height * src_stride * tile_height; ++ tile_row += (src_y & (tile_height-1)) * tile_width; ++ src_y++; ++ ++ if (offset_x) { ++ memcpy(dst, tile_row + offset_x, length_x); ++ tile_row += tile_size; ++ dst = (uint8_t *)dst + length_x; ++ w -= length_x; ++ } ++ ++ if ((uintptr_t)dst & 15) { ++ while (w >= tile_width) { ++ from_sse128xNu(dst, ++ assume_aligned(tile_row, tile_width), ++ tile_width); ++ tile_row += tile_size; ++ dst = (uint8_t *)dst + tile_width; ++ w -= tile_width; ++ } ++ while (w >= 64) { ++ from_sse64u(dst, tile_row); ++ tile_row += 64; ++ dst = (uint8_t *)dst + 64; ++ w -= 64; ++ } ++ if (w & 32) { ++ from_sse32u(dst, tile_row); ++ tile_row += 32; ++ dst = (uint8_t *)dst + 32; ++ } ++ if (w & 16) { ++ from_sse16u(dst, tile_row); ++ tile_row += 16; ++ dst = (uint8_t *)dst + 16; ++ } ++ memcpy(dst, assume_aligned(tile_row, 16), w & 15); ++ } else { ++ while (w >= tile_width) { ++ from_sse128xNa(assume_aligned(dst, 16), ++ assume_aligned(tile_row, tile_width), ++ tile_width); ++ tile_row += tile_size; ++ dst = (uint8_t *)dst + tile_width; ++ w -= tile_width; ++ } ++ while (w >= 64) { ++ from_sse64a(dst, tile_row); ++ tile_row += 64; ++ dst = (uint8_t *)dst + 64; ++ w -= 64; ++ } ++ if (w & 32) { ++ from_sse32a(dst, tile_row); ++ tile_row += 32; ++ dst = (uint8_t *)dst + 32; ++ } ++ if (w & 16) { ++ from_sse16a(dst, tile_row); ++ tile_row += 16; ++ dst = (uint8_t *)dst + 16; ++ } ++ memcpy(assume_aligned(dst, 16), ++ assume_aligned(tile_row, 16), ++ w & 15); ++ } ++ dst = (uint8_t *)dst + dst_stride; ++ } ++} ++ ++static void ++memcpy_between_tiled_x__swizzle_0__sse2(const void *src, void *dst, int bpp, ++ int32_t src_stride, int32_t dst_stride, ++ int16_t src_x, int16_t src_y, ++ int16_t dst_x, int16_t dst_y, ++ uint16_t width, uint16_t height) ++{ ++ const unsigned tile_width = 512; ++ const unsigned tile_height = 8; ++ const unsigned tile_size = 4096; ++ ++ const unsigned cpp = bpp / 8; ++ const unsigned tile_pixels = tile_width / cpp; ++ const unsigned tile_shift = ffs(tile_pixels) - 1; ++ const unsigned tile_mask = tile_pixels - 1; ++ ++ unsigned ox, lx; ++ ++ DBG(("%s(bpp=%d): src=(%d, %d), dst=(%d, %d), size=%dx%d, pitch=%d/%d\n", ++ __FUNCTION__, bpp, src_x, src_y, dst_x, dst_y, width, height, src_stride, dst_stride)); ++ assert(src != dst); ++ ++ width *= cpp; ++ dst_stride *= tile_height; ++ src_stride *= tile_height; ++ ++ assert((dst_x & tile_mask) == (src_x & tile_mask)); ++ if (dst_x & tile_mask) { ++ ox = (dst_x & tile_mask) * cpp; ++ lx = min(tile_width - ox, width); ++ assert(lx != 0); ++ } else ++ lx = 0; ++ ++ if (dst_x) ++ dst = (uint8_t *)dst + (dst_x >> tile_shift) * tile_size; ++ if (src_x) ++ src = (const uint8_t *)src + (src_x >> tile_shift) * tile_size; ++ ++ while (height--) { ++ const uint8_t *src_row; ++ uint8_t *dst_row; ++ unsigned w = width; ++ ++ dst_row = dst; ++ dst_row += dst_y / tile_height * dst_stride; ++ dst_row += (dst_y & (tile_height-1)) * tile_width; ++ dst_y++; ++ ++ src_row = src; ++ src_row += src_y / tile_height * src_stride; ++ src_row += (src_y & (tile_height-1)) * tile_width; ++ src_y++; ++ ++ if (lx) { ++ to_memcpy(dst_row + ox, src_row + ox, lx); ++ dst_row += tile_size; ++ src_row += tile_size; ++ w -= lx; ++ } ++ while (w >= tile_width) { ++ assert(((uintptr_t)dst_row & (tile_width - 1)) == 0); ++ assert(((uintptr_t)src_row & (tile_width - 1)) == 0); ++ to_sse128xN(assume_aligned(dst_row, tile_width), ++ assume_aligned(src_row, tile_width), ++ tile_width); ++ dst_row += tile_size; ++ src_row += tile_size; ++ w -= tile_width; ++ } ++ if (w) { ++ assert(((uintptr_t)dst_row & (tile_width - 1)) == 0); ++ assert(((uintptr_t)src_row & (tile_width - 1)) == 0); ++ to_memcpy(assume_aligned(dst_row, tile_width), ++ assume_aligned(src_row, tile_width), ++ w); ++ } ++ } ++} ++ ++#pragma GCC push_options + #endif + + fast void +@@ -257,7 +753,8 @@ memcpy_to_tiled_x__swizzle_0(const void *src, void *dst, int bpp, + if (dst_x & tile_mask) { + const unsigned x = (dst_x & tile_mask) * cpp; + const unsigned len = min(tile_width - x, w); +- memcpy(tile_row + x, src, len); ++ memcpy(assume_misaligned(tile_row + x, tile_width, x), ++ src, len); + + tile_row += tile_size; + src = (const uint8_t *)src + len; +@@ -265,13 +762,13 @@ memcpy_to_tiled_x__swizzle_0(const void *src, void *dst, int bpp, + } + } + while (w >= tile_width) { +- memcpy(tile_row, src, tile_width); +- ++ memcpy(assume_aligned(tile_row, tile_width), ++ src, tile_width); + tile_row += tile_size; + src = (const uint8_t *)src + tile_width; + w -= tile_width; + } +- memcpy(tile_row, src, w); ++ memcpy(assume_aligned(tile_row, tile_width), src, w); + src = (const uint8_t *)src + src_stride + w; + dst_y++; + } +@@ -313,7 +810,7 @@ memcpy_from_tiled_x__swizzle_0(const void *src, void *dst, int bpp, + if (src_x & tile_mask) { + const unsigned x = (src_x & tile_mask) * cpp; + const unsigned len = min(tile_width - x, w); +- memcpy(dst, tile_row + x, len); ++ memcpy(dst, assume_misaligned(tile_row + x, tile_width, x), len); + + tile_row += tile_size; + dst = (uint8_t *)dst + len; +@@ -321,440 +818,371 @@ memcpy_from_tiled_x__swizzle_0(const void *src, void *dst, int bpp, + } + } + while (w >= tile_width) { +- memcpy(dst, tile_row, tile_width); ++ memcpy(dst, ++ assume_aligned(tile_row, tile_width), ++ tile_width); + + tile_row += tile_size; + dst = (uint8_t *)dst + tile_width; + w -= tile_width; + } +- memcpy(dst, tile_row, w); ++ memcpy(dst, assume_aligned(tile_row, tile_width), w); + dst = (uint8_t *)dst + dst_stride + w; + src_y++; + } + } + +-fast_memcpy static void +-memcpy_to_tiled_x__swizzle_9(const void *src, void *dst, int bpp, +- int32_t src_stride, int32_t dst_stride, +- int16_t src_x, int16_t src_y, +- int16_t dst_x, int16_t dst_y, +- uint16_t width, uint16_t height) ++static fast_memcpy void ++memcpy_between_tiled_x__swizzle_0(const void *src, void *dst, int bpp, ++ int32_t src_stride, int32_t dst_stride, ++ int16_t src_x, int16_t src_y, ++ int16_t dst_x, int16_t dst_y, ++ uint16_t width, uint16_t height) + { + const unsigned tile_width = 512; + const unsigned tile_height = 8; + const unsigned tile_size = 4096; + + const unsigned cpp = bpp / 8; +- const unsigned stride_tiles = dst_stride / tile_width; +- const unsigned swizzle_pixels = 64 / cpp; +- const unsigned tile_pixels = ffs(tile_width / cpp) - 1; +- const unsigned tile_mask = (1 << tile_pixels) - 1; +- +- unsigned x, y; ++ const unsigned tile_pixels = tile_width / cpp; ++ const unsigned tile_shift = ffs(tile_pixels) - 1; ++ const unsigned tile_mask = tile_pixels - 1; + + DBG(("%s(bpp=%d): src=(%d, %d), dst=(%d, %d), size=%dx%d, pitch=%d/%d\n", + __FUNCTION__, bpp, src_x, src_y, dst_x, dst_y, width, height, src_stride, dst_stride)); ++ assert(src != dst); ++ assert((dst_x & tile_mask) == (src_x & tile_mask)); + +- src = (const uint8_t *)src + src_y * src_stride + src_x * cpp; +- +- for (y = 0; y < height; ++y) { +- const uint32_t dy = y + dst_y; +- const uint32_t tile_row = +- (dy / tile_height * stride_tiles * tile_size + +- (dy & (tile_height-1)) * tile_width); +- const uint8_t *src_row = (const uint8_t *)src + src_stride * y; +- uint32_t dx = dst_x, offset; +- +- x = width * cpp; +- if (dx & (swizzle_pixels - 1)) { +- const uint32_t swizzle_bound_pixels = ALIGN(dx + 1, swizzle_pixels); +- const uint32_t length = min(dst_x + width, swizzle_bound_pixels) - dx; +- offset = tile_row + +- (dx >> tile_pixels) * tile_size + +- (dx & tile_mask) * cpp; +- offset ^= (offset >> 3) & 64; +- +- memcpy((char *)dst + offset, src_row, length * cpp); +- +- src_row += length * cpp; +- x -= length * cpp; +- dx += length; +- } +- while (x >= 64) { +- offset = tile_row + +- (dx >> tile_pixels) * tile_size + +- (dx & tile_mask) * cpp; +- offset ^= (offset >> 3) & 64; +- +- memcpy((char *)dst + offset, src_row, 64); +- +- src_row += 64; +- x -= 64; +- dx += swizzle_pixels; +- } +- if (x) { +- offset = tile_row + +- (dx >> tile_pixels) * tile_size + +- (dx & tile_mask) * cpp; +- offset ^= (offset >> 3) & 64; +- memcpy((char *)dst + offset, src_row, x); +- } +- } +-} ++ while (height--) { ++ unsigned w = width * cpp; ++ uint8_t *dst_row = dst; ++ const uint8_t *src_row = src; + +-fast_memcpy static void +-memcpy_from_tiled_x__swizzle_9(const void *src, void *dst, int bpp, +- int32_t src_stride, int32_t dst_stride, +- int16_t src_x, int16_t src_y, +- int16_t dst_x, int16_t dst_y, +- uint16_t width, uint16_t height) +-{ +- const unsigned tile_width = 512; +- const unsigned tile_height = 8; +- const unsigned tile_size = 4096; ++ dst_row += dst_y / tile_height * dst_stride * tile_height; ++ dst_row += (dst_y & (tile_height-1)) * tile_width; ++ if (dst_x) ++ dst_row += (dst_x >> tile_shift) * tile_size; ++ dst_y++; + +- const unsigned cpp = bpp / 8; +- const unsigned stride_tiles = src_stride / tile_width; +- const unsigned swizzle_pixels = 64 / cpp; +- const unsigned tile_pixels = ffs(tile_width / cpp) - 1; +- const unsigned tile_mask = (1 << tile_pixels) - 1; ++ src_row += src_y / tile_height * src_stride * tile_height; ++ src_row += (src_y & (tile_height-1)) * tile_width; ++ if (src_x) ++ src_row += (src_x >> tile_shift) * tile_size; ++ src_y++; + +- unsigned x, y; ++ if (dst_x & tile_mask) { ++ const unsigned x = (dst_x & tile_mask) * cpp; ++ const unsigned len = min(tile_width - x, w); + +- DBG(("%s(bpp=%d): src=(%d, %d), dst=(%d, %d), size=%dx%d, pitch=%d/%d\n", +- __FUNCTION__, bpp, src_x, src_y, dst_x, dst_y, width, height, src_stride, dst_stride)); ++ memcpy(assume_misaligned(dst_row + x, tile_width, x), ++ assume_misaligned(src_row + x, tile_width, x), ++ len); + +- dst = (uint8_t *)dst + dst_y * dst_stride + dst_x * cpp; +- +- for (y = 0; y < height; ++y) { +- const uint32_t sy = y + src_y; +- const uint32_t tile_row = +- (sy / tile_height * stride_tiles * tile_size + +- (sy & (tile_height-1)) * tile_width); +- uint8_t *dst_row = (uint8_t *)dst + dst_stride * y; +- uint32_t sx = src_x, offset; +- +- x = width * cpp; +- if (sx & (swizzle_pixels - 1)) { +- const uint32_t swizzle_bound_pixels = ALIGN(sx + 1, swizzle_pixels); +- const uint32_t length = min(src_x + width, swizzle_bound_pixels) - sx; +- offset = tile_row + +- (sx >> tile_pixels) * tile_size + +- (sx & tile_mask) * cpp; +- offset ^= (offset >> 3) & 64; +- +- memcpy(dst_row, (const char *)src + offset, length * cpp); +- +- dst_row += length * cpp; +- x -= length * cpp; +- sx += length; ++ dst_row += tile_size; ++ src_row += tile_size; ++ w -= len; + } +- while (x >= 64) { +- offset = tile_row + +- (sx >> tile_pixels) * tile_size + +- (sx & tile_mask) * cpp; +- offset ^= (offset >> 3) & 64; + +- memcpy(dst_row, (const char *)src + offset, 64); +- +- dst_row += 64; +- x -= 64; +- sx += swizzle_pixels; +- } +- if (x) { +- offset = tile_row + +- (sx >> tile_pixels) * tile_size + +- (sx & tile_mask) * cpp; +- offset ^= (offset >> 3) & 64; +- memcpy(dst_row, (const char *)src + offset, x); ++ while (w >= tile_width) { ++ memcpy(assume_aligned(dst_row, tile_width), ++ assume_aligned(src_row, tile_width), ++ tile_width); ++ dst_row += tile_size; ++ src_row += tile_size; ++ w -= tile_width; + } ++ memcpy(assume_aligned(dst_row, tile_width), ++ assume_aligned(src_row, tile_width), ++ w); + } + } + +-fast_memcpy static void +-memcpy_to_tiled_x__swizzle_9_10(const void *src, void *dst, int bpp, +- int32_t src_stride, int32_t dst_stride, +- int16_t src_x, int16_t src_y, +- int16_t dst_x, int16_t dst_y, +- uint16_t width, uint16_t height) +-{ +- const unsigned tile_width = 512; +- const unsigned tile_height = 8; +- const unsigned tile_size = 4096; +- +- const unsigned cpp = bpp / 8; +- const unsigned stride_tiles = dst_stride / tile_width; +- const unsigned swizzle_pixels = 64 / cpp; +- const unsigned tile_pixels = ffs(tile_width / cpp) - 1; +- const unsigned tile_mask = (1 << tile_pixels) - 1; ++#define memcpy_to_tiled_x(swizzle) \ ++fast_memcpy static void \ ++memcpy_to_tiled_x__##swizzle (const void *src, void *dst, int bpp, \ ++ int32_t src_stride, int32_t dst_stride, \ ++ int16_t src_x, int16_t src_y, \ ++ int16_t dst_x, int16_t dst_y, \ ++ uint16_t width, uint16_t height) \ ++{ \ ++ const unsigned tile_width = 512; \ ++ const unsigned tile_height = 8; \ ++ const unsigned tile_size = 4096; \ ++ const unsigned cpp = bpp / 8; \ ++ const unsigned stride_tiles = dst_stride / tile_width; \ ++ const unsigned swizzle_pixels = 64 / cpp; \ ++ const unsigned tile_pixels = ffs(tile_width / cpp) - 1; \ ++ const unsigned tile_mask = (1 << tile_pixels) - 1; \ ++ unsigned x, y; \ ++ DBG(("%s(bpp=%d): src=(%d, %d), dst=(%d, %d), size=%dx%d, pitch=%d/%d\n", \ ++ __FUNCTION__, bpp, src_x, src_y, dst_x, dst_y, width, height, src_stride, dst_stride)); \ ++ src = (const uint8_t *)src + src_y * src_stride + src_x * cpp; \ ++ for (y = 0; y < height; ++y) { \ ++ const uint32_t dy = y + dst_y; \ ++ const uint32_t tile_row = \ ++ (dy / tile_height * stride_tiles * tile_size + \ ++ (dy & (tile_height-1)) * tile_width); \ ++ const uint8_t *src_row = (const uint8_t *)src + src_stride * y; \ ++ uint32_t dx = dst_x; \ ++ x = width * cpp; \ ++ if (dx & (swizzle_pixels - 1)) { \ ++ const uint32_t swizzle_bound_pixels = ALIGN(dx + 1, swizzle_pixels); \ ++ const uint32_t length = min(dst_x + width, swizzle_bound_pixels) - dx; \ ++ uint32_t offset = \ ++ tile_row + \ ++ (dx >> tile_pixels) * tile_size + \ ++ (dx & tile_mask) * cpp; \ ++ memcpy((char *)dst + swizzle(offset), src_row, length * cpp); \ ++ src_row += length * cpp; \ ++ x -= length * cpp; \ ++ dx += length; \ ++ } \ ++ while (x >= 64) { \ ++ uint32_t offset = \ ++ tile_row + \ ++ (dx >> tile_pixels) * tile_size + \ ++ (dx & tile_mask) * cpp; \ ++ memcpy(assume_aligned((char *)dst+swizzle(offset),64), \ ++ src_row, 64); \ ++ src_row += 64; \ ++ x -= 64; \ ++ dx += swizzle_pixels; \ ++ } \ ++ if (x) { \ ++ uint32_t offset = \ ++ tile_row + \ ++ (dx >> tile_pixels) * tile_size + \ ++ (dx & tile_mask) * cpp; \ ++ memcpy(assume_aligned((char *)dst + swizzle(offset), 64), src_row, x); \ ++ } \ ++ } \ ++} + +- unsigned x, y; ++#define memcpy_from_tiled_x(swizzle) \ ++fast_memcpy static void \ ++memcpy_from_tiled_x__##swizzle (const void *src, void *dst, int bpp, \ ++ int32_t src_stride, int32_t dst_stride, \ ++ int16_t src_x, int16_t src_y, \ ++ int16_t dst_x, int16_t dst_y, \ ++ uint16_t width, uint16_t height) \ ++{ \ ++ const unsigned tile_width = 512; \ ++ const unsigned tile_height = 8; \ ++ const unsigned tile_size = 4096; \ ++ const unsigned cpp = bpp / 8; \ ++ const unsigned stride_tiles = src_stride / tile_width; \ ++ const unsigned swizzle_pixels = 64 / cpp; \ ++ const unsigned tile_pixels = ffs(tile_width / cpp) - 1; \ ++ const unsigned tile_mask = (1 << tile_pixels) - 1; \ ++ unsigned x, y; \ ++ DBG(("%s(bpp=%d): src=(%d, %d), dst=(%d, %d), size=%dx%d, pitch=%d/%d\n", \ ++ __FUNCTION__, bpp, src_x, src_y, dst_x, dst_y, width, height, src_stride, dst_stride)); \ ++ dst = (uint8_t *)dst + dst_y * dst_stride + dst_x * cpp; \ ++ for (y = 0; y < height; ++y) { \ ++ const uint32_t sy = y + src_y; \ ++ const uint32_t tile_row = \ ++ (sy / tile_height * stride_tiles * tile_size + \ ++ (sy & (tile_height-1)) * tile_width); \ ++ uint8_t *dst_row = (uint8_t *)dst + dst_stride * y; \ ++ uint32_t sx = src_x; \ ++ x = width * cpp; \ ++ if (sx & (swizzle_pixels - 1)) { \ ++ const uint32_t swizzle_bound_pixels = ALIGN(sx + 1, swizzle_pixels); \ ++ const uint32_t length = min(src_x + width, swizzle_bound_pixels) - sx; \ ++ uint32_t offset = \ ++ tile_row + \ ++ (sx >> tile_pixels) * tile_size + \ ++ (sx & tile_mask) * cpp; \ ++ memcpy(dst_row, (const char *)src + swizzle(offset), length * cpp); \ ++ dst_row += length * cpp; \ ++ x -= length * cpp; \ ++ sx += length; \ ++ } \ ++ while (x >= 64) { \ ++ uint32_t offset = \ ++ tile_row + \ ++ (sx >> tile_pixels) * tile_size + \ ++ (sx & tile_mask) * cpp; \ ++ memcpy(dst_row, assume_aligned((const char *)src + swizzle(offset), 64), 64); \ ++ dst_row += 64; \ ++ x -= 64; \ ++ sx += swizzle_pixels; \ ++ } \ ++ if (x) { \ ++ uint32_t offset = \ ++ tile_row + \ ++ (sx >> tile_pixels) * tile_size + \ ++ (sx & tile_mask) * cpp; \ ++ memcpy(dst_row, assume_aligned((const char *)src + swizzle(offset), 64), x); \ ++ } \ ++ } \ ++} + +- DBG(("%s(bpp=%d): src=(%d, %d), dst=(%d, %d), size=%dx%d, pitch=%d/%d\n", +- __FUNCTION__, bpp, src_x, src_y, dst_x, dst_y, width, height, src_stride, dst_stride)); ++#define swizzle_9(X) ((X) ^ (((X) >> 3) & 64)) ++memcpy_to_tiled_x(swizzle_9) ++memcpy_from_tiled_x(swizzle_9) ++#undef swizzle_9 + +- src = (const uint8_t *)src + src_y * src_stride + src_x * cpp; +- +- for (y = 0; y < height; ++y) { +- const uint32_t dy = y + dst_y; +- const uint32_t tile_row = +- (dy / tile_height * stride_tiles * tile_size + +- (dy & (tile_height-1)) * tile_width); +- const uint8_t *src_row = (const uint8_t *)src + src_stride * y; +- uint32_t dx = dst_x, offset; +- +- x = width * cpp; +- if (dx & (swizzle_pixels - 1)) { +- const uint32_t swizzle_bound_pixels = ALIGN(dx + 1, swizzle_pixels); +- const uint32_t length = min(dst_x + width, swizzle_bound_pixels) - dx; +- offset = tile_row + +- (dx >> tile_pixels) * tile_size + +- (dx & tile_mask) * cpp; +- offset ^= ((offset ^ (offset >> 1)) >> 3) & 64; +- +- memcpy((char *)dst + offset, src_row, length * cpp); +- +- src_row += length * cpp; +- x -= length * cpp; +- dx += length; +- } +- while (x >= 64) { +- offset = tile_row + +- (dx >> tile_pixels) * tile_size + +- (dx & tile_mask) * cpp; +- offset ^= ((offset ^ (offset >> 1)) >> 3) & 64; ++#define swizzle_9_10(X) ((X) ^ ((((X) ^ ((X) >> 1)) >> 3) & 64)) ++memcpy_to_tiled_x(swizzle_9_10) ++memcpy_from_tiled_x(swizzle_9_10) ++#undef swizzle_9_10 + +- memcpy((char *)dst + offset, src_row, 64); ++#define swizzle_9_11(X) ((X) ^ ((((X) ^ ((X) >> 2)) >> 3) & 64)) ++memcpy_to_tiled_x(swizzle_9_11) ++memcpy_from_tiled_x(swizzle_9_11) ++#undef swizzle_9_11 + +- src_row += 64; +- x -= 64; +- dx += swizzle_pixels; +- } +- if (x) { +- offset = tile_row + +- (dx >> tile_pixels) * tile_size + +- (dx & tile_mask) * cpp; +- offset ^= ((offset ^ (offset >> 1)) >> 3) & 64; +- memcpy((char *)dst + offset, src_row, x); +- } +- } +-} ++#define swizzle_9_10_11(X) ((X) ^ ((((X) ^ ((X) >> 1) ^ ((X) >> 2)) >> 3) & 64)) ++memcpy_to_tiled_x(swizzle_9_10_11) ++memcpy_from_tiled_x(swizzle_9_10_11) ++#undef swizzle_9_10_11 + +-fast_memcpy static void +-memcpy_from_tiled_x__swizzle_9_10(const void *src, void *dst, int bpp, +- int32_t src_stride, int32_t dst_stride, +- int16_t src_x, int16_t src_y, +- int16_t dst_x, int16_t dst_y, +- uint16_t width, uint16_t height) ++static fast_memcpy void ++memcpy_to_tiled_x__gen2(const void *src, void *dst, int bpp, ++ int32_t src_stride, int32_t dst_stride, ++ int16_t src_x, int16_t src_y, ++ int16_t dst_x, int16_t dst_y, ++ uint16_t width, uint16_t height) + { +- const unsigned tile_width = 512; +- const unsigned tile_height = 8; +- const unsigned tile_size = 4096; ++ const unsigned tile_width = 128; ++ const unsigned tile_height = 16; ++ const unsigned tile_size = 2048; + + const unsigned cpp = bpp / 8; +- const unsigned stride_tiles = src_stride / tile_width; +- const unsigned swizzle_pixels = 64 / cpp; +- const unsigned tile_pixels = ffs(tile_width / cpp) - 1; +- const unsigned tile_mask = (1 << tile_pixels) - 1; +- +- unsigned x, y; ++ const unsigned tile_pixels = tile_width / cpp; ++ const unsigned tile_shift = ffs(tile_pixels) - 1; ++ const unsigned tile_mask = tile_pixels - 1; + + DBG(("%s(bpp=%d): src=(%d, %d), dst=(%d, %d), size=%dx%d, pitch=%d/%d\n", + __FUNCTION__, bpp, src_x, src_y, dst_x, dst_y, width, height, src_stride, dst_stride)); ++ assert(src != dst); + +- dst = (uint8_t *)dst + dst_y * dst_stride + dst_x * cpp; +- +- for (y = 0; y < height; ++y) { +- const uint32_t sy = y + src_y; +- const uint32_t tile_row = +- (sy / tile_height * stride_tiles * tile_size + +- (sy & (tile_height-1)) * tile_width); +- uint8_t *dst_row = (uint8_t *)dst + dst_stride * y; +- uint32_t sx = src_x, offset; +- +- x = width * cpp; +- if (sx & (swizzle_pixels - 1)) { +- const uint32_t swizzle_bound_pixels = ALIGN(sx + 1, swizzle_pixels); +- const uint32_t length = min(src_x + width, swizzle_bound_pixels) - sx; +- offset = tile_row + +- (sx >> tile_pixels) * tile_size + +- (sx & tile_mask) * cpp; +- offset ^= ((offset ^ (offset >> 1)) >> 3) & 64; +- +- memcpy(dst_row, (const char *)src + offset, length * cpp); +- +- dst_row += length * cpp; +- x -= length * cpp; +- sx += length; +- } +- while (x >= 64) { +- offset = tile_row + +- (sx >> tile_pixels) * tile_size + +- (sx & tile_mask) * cpp; +- offset ^= ((offset ^ (offset >> 1)) >> 3) & 64; +- +- memcpy(dst_row, (const char *)src + offset, 64); +- +- dst_row += 64; +- x -= 64; +- sx += swizzle_pixels; +- } +- if (x) { +- offset = tile_row + +- (sx >> tile_pixels) * tile_size + +- (sx & tile_mask) * cpp; +- offset ^= ((offset ^ (offset >> 1)) >> 3) & 64; +- memcpy(dst_row, (const char *)src + offset, x); +- } +- } +-} +- +-fast_memcpy static void +-memcpy_to_tiled_x__swizzle_9_11(const void *src, void *dst, int bpp, +- int32_t src_stride, int32_t dst_stride, +- int16_t src_x, int16_t src_y, +- int16_t dst_x, int16_t dst_y, +- uint16_t width, uint16_t height) +-{ +- const unsigned tile_width = 512; +- const unsigned tile_height = 8; +- const unsigned tile_size = 4096; +- +- const unsigned cpp = bpp / 8; +- const unsigned stride_tiles = dst_stride / tile_width; +- const unsigned swizzle_pixels = 64 / cpp; +- const unsigned tile_pixels = ffs(tile_width / cpp) - 1; +- const unsigned tile_mask = (1 << tile_pixels) - 1; ++ if (src_x | src_y) ++ src = (const uint8_t *)src + src_y * src_stride + src_x * cpp; ++ assert(src_stride >= width * cpp); ++ src_stride -= width * cpp; + +- unsigned x, y; ++ while (height--) { ++ unsigned w = width * cpp; ++ uint8_t *tile_row = dst; + +- DBG(("%s(bpp=%d): src=(%d, %d), dst=(%d, %d), size=%dx%d, pitch=%d/%d\n", +- __FUNCTION__, bpp, src_x, src_y, dst_x, dst_y, width, height, src_stride, dst_stride)); ++ tile_row += dst_y / tile_height * dst_stride * tile_height; ++ tile_row += (dst_y & (tile_height-1)) * tile_width; ++ if (dst_x) { ++ tile_row += (dst_x >> tile_shift) * tile_size; ++ if (dst_x & tile_mask) { ++ const unsigned x = (dst_x & tile_mask) * cpp; ++ const unsigned len = min(tile_width - x, w); ++ memcpy(assume_misaligned(tile_row + x, tile_width, x), src, len); + +- src = (const uint8_t *)src + src_y * src_stride + src_x * cpp; +- +- for (y = 0; y < height; ++y) { +- const uint32_t dy = y + dst_y; +- const uint32_t tile_row = +- (dy / tile_height * stride_tiles * tile_size + +- (dy & (tile_height-1)) * tile_width); +- const uint8_t *src_row = (const uint8_t *)src + src_stride * y; +- uint32_t dx = dst_x, offset; +- +- x = width * cpp; +- if (dx & (swizzle_pixels - 1)) { +- const uint32_t swizzle_bound_pixels = ALIGN(dx + 1, swizzle_pixels); +- const uint32_t length = min(dst_x + width, swizzle_bound_pixels) - dx; +- offset = tile_row + +- (dx >> tile_pixels) * tile_size + +- (dx & tile_mask) * cpp; +- offset ^= ((offset ^ (offset >> 2)) >> 3) & 64; +- memcpy((char *)dst + offset, src_row, length * cpp); +- +- src_row += length * cpp; +- x -= length * cpp; +- dx += length; ++ tile_row += tile_size; ++ src = (const uint8_t *)src + len; ++ w -= len; ++ } + } +- while (x >= 64) { +- offset = tile_row + +- (dx >> tile_pixels) * tile_size + +- (dx & tile_mask) * cpp; +- offset ^= ((offset ^ (offset >> 2)) >> 3) & 64; +- +- memcpy((char *)dst + offset, src_row, 64); ++ while (w >= tile_width) { ++ memcpy(assume_aligned(tile_row, tile_width), ++ src, tile_width); + +- src_row += 64; +- x -= 64; +- dx += swizzle_pixels; +- } +- if (x) { +- offset = tile_row + +- (dx >> tile_pixels) * tile_size + +- (dx & tile_mask) * cpp; +- offset ^= ((offset ^ (offset >> 2)) >> 3) & 64; +- memcpy((char *)dst + offset, src_row, x); ++ tile_row += tile_size; ++ src = (const uint8_t *)src + tile_width; ++ w -= tile_width; + } ++ memcpy(assume_aligned(tile_row, tile_width), src, w); ++ src = (const uint8_t *)src + src_stride + w; ++ dst_y++; + } + } + +-fast_memcpy static void +-memcpy_from_tiled_x__swizzle_9_11(const void *src, void *dst, int bpp, +- int32_t src_stride, int32_t dst_stride, +- int16_t src_x, int16_t src_y, +- int16_t dst_x, int16_t dst_y, +- uint16_t width, uint16_t height) ++static fast_memcpy void ++memcpy_from_tiled_x__gen2(const void *src, void *dst, int bpp, ++ int32_t src_stride, int32_t dst_stride, ++ int16_t src_x, int16_t src_y, ++ int16_t dst_x, int16_t dst_y, ++ uint16_t width, uint16_t height) + { +- const unsigned tile_width = 512; +- const unsigned tile_height = 8; +- const unsigned tile_size = 4096; ++ const unsigned tile_width = 128; ++ const unsigned tile_height = 16; ++ const unsigned tile_size = 2048; + + const unsigned cpp = bpp / 8; +- const unsigned stride_tiles = src_stride / tile_width; +- const unsigned swizzle_pixels = 64 / cpp; +- const unsigned tile_pixels = ffs(tile_width / cpp) - 1; +- const unsigned tile_mask = (1 << tile_pixels) - 1; +- +- unsigned x, y; ++ const unsigned tile_pixels = tile_width / cpp; ++ const unsigned tile_shift = ffs(tile_pixels) - 1; ++ const unsigned tile_mask = tile_pixels - 1; + + DBG(("%s(bpp=%d): src=(%d, %d), dst=(%d, %d), size=%dx%d, pitch=%d/%d\n", + __FUNCTION__, bpp, src_x, src_y, dst_x, dst_y, width, height, src_stride, dst_stride)); ++ assert(src != dst); + +- dst = (uint8_t *)dst + dst_y * dst_stride + dst_x * cpp; +- +- for (y = 0; y < height; ++y) { +- const uint32_t sy = y + src_y; +- const uint32_t tile_row = +- (sy / tile_height * stride_tiles * tile_size + +- (sy & (tile_height-1)) * tile_width); +- uint8_t *dst_row = (uint8_t *)dst + dst_stride * y; +- uint32_t sx = src_x, offset; +- +- x = width * cpp; +- if (sx & (swizzle_pixels - 1)) { +- const uint32_t swizzle_bound_pixels = ALIGN(sx + 1, swizzle_pixels); +- const uint32_t length = min(src_x + width, swizzle_bound_pixels) - sx; +- offset = tile_row + +- (sx >> tile_pixels) * tile_size + +- (sx & tile_mask) * cpp; +- offset ^= ((offset ^ (offset >> 2)) >> 3) & 64; +- memcpy(dst_row, (const char *)src + offset, length * cpp); +- +- dst_row += length * cpp; +- x -= length * cpp; +- sx += length; +- } +- while (x >= 64) { +- offset = tile_row + +- (sx >> tile_pixels) * tile_size + +- (sx & tile_mask) * cpp; +- offset ^= ((offset ^ (offset >> 2)) >> 3) & 64; ++ if (dst_x | dst_y) ++ dst = (uint8_t *)dst + dst_y * dst_stride + dst_x * cpp; ++ assert(dst_stride >= width * cpp); ++ dst_stride -= width * cpp; ++ ++ while (height--) { ++ unsigned w = width * cpp; ++ const uint8_t *tile_row = src; + +- memcpy(dst_row, (const char *)src + offset, 64); ++ tile_row += src_y / tile_height * src_stride * tile_height; ++ tile_row += (src_y & (tile_height-1)) * tile_width; ++ if (src_x) { ++ tile_row += (src_x >> tile_shift) * tile_size; ++ if (src_x & tile_mask) { ++ const unsigned x = (src_x & tile_mask) * cpp; ++ const unsigned len = min(tile_width - x, w); ++ memcpy(dst, assume_misaligned(tile_row + x, tile_width, x), len); + +- dst_row += 64; +- x -= 64; +- sx += swizzle_pixels; ++ tile_row += tile_size; ++ dst = (uint8_t *)dst + len; ++ w -= len; ++ } + } +- if (x) { +- offset = tile_row + +- (sx >> tile_pixels) * tile_size + +- (sx & tile_mask) * cpp; +- offset ^= ((offset ^ (offset >> 2)) >> 3) & 64; +- memcpy(dst_row, (const char *)src + offset, x); ++ while (w >= tile_width) { ++ memcpy(dst, ++ assume_aligned(tile_row, tile_width), ++ tile_width); ++ ++ tile_row += tile_size; ++ dst = (uint8_t *)dst + tile_width; ++ w -= tile_width; + } ++ memcpy(dst, assume_aligned(tile_row, tile_width), w); ++ dst = (uint8_t *)dst + dst_stride + w; ++ src_y++; + } + } + +-void choose_memcpy_tiled_x(struct kgem *kgem, int swizzling) ++void choose_memcpy_tiled_x(struct kgem *kgem, int swizzling, unsigned cpu) + { ++ if (kgem->gen < 030) { ++ if (swizzling == I915_BIT_6_SWIZZLE_NONE) { ++ DBG(("%s: gen2, no swizzling\n", __FUNCTION__)); ++ kgem->memcpy_to_tiled_x = memcpy_to_tiled_x__gen2; ++ kgem->memcpy_from_tiled_x = memcpy_from_tiled_x__gen2; ++ } else ++ DBG(("%s: no detiling with swizzle functions for gen2\n", __FUNCTION__)); ++ return; ++ } ++ + switch (swizzling) { + default: + DBG(("%s: unknown swizzling, %d\n", __FUNCTION__, swizzling)); + break; + case I915_BIT_6_SWIZZLE_NONE: + DBG(("%s: no swizzling\n", __FUNCTION__)); +- kgem->memcpy_to_tiled_x = memcpy_to_tiled_x__swizzle_0; +- kgem->memcpy_from_tiled_x = memcpy_from_tiled_x__swizzle_0; ++#if defined(sse2) ++ if (cpu & SSE2) { ++ kgem->memcpy_to_tiled_x = memcpy_to_tiled_x__swizzle_0__sse2; ++ kgem->memcpy_from_tiled_x = memcpy_from_tiled_x__swizzle_0__sse2; ++ kgem->memcpy_between_tiled_x = memcpy_between_tiled_x__swizzle_0__sse2; ++ } else ++#endif ++ { ++ kgem->memcpy_to_tiled_x = memcpy_to_tiled_x__swizzle_0; ++ kgem->memcpy_from_tiled_x = memcpy_from_tiled_x__swizzle_0; ++ kgem->memcpy_between_tiled_x = memcpy_between_tiled_x__swizzle_0; ++ } + break; + case I915_BIT_6_SWIZZLE_9: + DBG(("%s: 6^9 swizzling\n", __FUNCTION__)); +@@ -771,6 +1199,11 @@ void choose_memcpy_tiled_x(struct kgem *kgem, int swizzling) + kgem->memcpy_to_tiled_x = memcpy_to_tiled_x__swizzle_9_11; + kgem->memcpy_from_tiled_x = memcpy_from_tiled_x__swizzle_9_11; + break; ++ case I915_BIT_6_SWIZZLE_9_10_11: ++ DBG(("%s: 6^9^10^11 swizzling\n", __FUNCTION__)); ++ kgem->memcpy_to_tiled_x = memcpy_to_tiled_x__swizzle_9_10_11; ++ kgem->memcpy_from_tiled_x = memcpy_from_tiled_x__swizzle_9_10_11; ++ break; + } + } + +@@ -995,7 +1428,7 @@ memcpy_xor(const void *src, void *dst, int bpp, + height = 1; + } + +-#if USE_SSE2 ++#if defined(sse2) && __x86_64__ + if (have_sse2()) { + do { + uint32_t *d = (uint32_t *)dst_bytes; +@@ -1118,3 +1551,241 @@ memcpy_xor(const void *src, void *dst, int bpp, + } + } + } ++ ++#define BILINEAR_INTERPOLATION_BITS 4 ++static inline int ++bilinear_weight(pixman_fixed_t x) ++{ ++ return (x >> (16 - BILINEAR_INTERPOLATION_BITS)) & ++ ((1 << BILINEAR_INTERPOLATION_BITS) - 1); ++} ++ ++#if BILINEAR_INTERPOLATION_BITS <= 4 ++/* Inspired by Filter_32_opaque from Skia */ ++static inline uint32_t ++bilinear_interpolation(uint32_t tl, uint32_t tr, ++ uint32_t bl, uint32_t br, ++ int distx, int disty) ++{ ++ int distxy, distxiy, distixy, distixiy; ++ uint32_t lo, hi; ++ ++ distx <<= (4 - BILINEAR_INTERPOLATION_BITS); ++ disty <<= (4 - BILINEAR_INTERPOLATION_BITS); ++ ++ distxy = distx * disty; ++ distxiy = (distx << 4) - distxy; /* distx * (16 - disty) */ ++ distixy = (disty << 4) - distxy; /* disty * (16 - distx) */ ++ distixiy = ++ 16 * 16 - (disty << 4) - ++ (distx << 4) + distxy; /* (16 - distx) * (16 - disty) */ ++ ++ lo = (tl & 0xff00ff) * distixiy; ++ hi = ((tl >> 8) & 0xff00ff) * distixiy; ++ ++ lo += (tr & 0xff00ff) * distxiy; ++ hi += ((tr >> 8) & 0xff00ff) * distxiy; ++ ++ lo += (bl & 0xff00ff) * distixy; ++ hi += ((bl >> 8) & 0xff00ff) * distixy; ++ ++ lo += (br & 0xff00ff) * distxy; ++ hi += ((br >> 8) & 0xff00ff) * distxy; ++ ++ return ((lo >> 8) & 0xff00ff) | (hi & ~0xff00ff); ++} ++#elif SIZEOF_LONG > 4 ++static inline uint32_t ++bilinear_interpolation(uint32_t tl, uint32_t tr, ++ uint32_t bl, uint32_t br, ++ int distx, int disty) ++{ ++ uint64_t distxy, distxiy, distixy, distixiy; ++ uint64_t tl64, tr64, bl64, br64; ++ uint64_t f, r; ++ ++ distx <<= (8 - BILINEAR_INTERPOLATION_BITS); ++ disty <<= (8 - BILINEAR_INTERPOLATION_BITS); ++ ++ distxy = distx * disty; ++ distxiy = distx * (256 - disty); ++ distixy = (256 - distx) * disty; ++ distixiy = (256 - distx) * (256 - disty); ++ ++ /* Alpha and Blue */ ++ tl64 = tl & 0xff0000ff; ++ tr64 = tr & 0xff0000ff; ++ bl64 = bl & 0xff0000ff; ++ br64 = br & 0xff0000ff; ++ ++ f = tl64 * distixiy + tr64 * distxiy + bl64 * distixy + br64 * distxy; ++ r = f & 0x0000ff0000ff0000ull; ++ ++ /* Red and Green */ ++ tl64 = tl; ++ tl64 = ((tl64 << 16) & 0x000000ff00000000ull) | (tl64 & 0x0000ff00ull); ++ ++ tr64 = tr; ++ tr64 = ((tr64 << 16) & 0x000000ff00000000ull) | (tr64 & 0x0000ff00ull); ++ ++ bl64 = bl; ++ bl64 = ((bl64 << 16) & 0x000000ff00000000ull) | (bl64 & 0x0000ff00ull); ++ ++ br64 = br; ++ br64 = ((br64 << 16) & 0x000000ff00000000ull) | (br64 & 0x0000ff00ull); ++ ++ f = tl64 * distixiy + tr64 * distxiy + bl64 * distixy + br64 * distxy; ++ r |= ((f >> 16) & 0x000000ff00000000ull) | (f & 0xff000000ull); ++ ++ return (uint32_t)(r >> 16); ++} ++#else ++static inline uint32_t ++bilinear_interpolation(uint32_t tl, uint32_t tr, ++ uint32_t bl, uint32_t br, ++ int distx, int disty) ++{ ++ int distxy, distxiy, distixy, distixiy; ++ uint32_t f, r; ++ ++ distx <<= (8 - BILINEAR_INTERPOLATION_BITS); ++ disty <<= (8 - BILINEAR_INTERPOLATION_BITS); ++ ++ distxy = distx * disty; ++ distxiy = (distx << 8) - distxy; /* distx * (256 - disty) */ ++ distixy = (disty << 8) - distxy; /* disty * (256 - distx) */ ++ distixiy = ++ 256 * 256 - (disty << 8) - ++ (distx << 8) + distxy; /* (256 - distx) * (256 - disty) */ ++ ++ /* Blue */ ++ r = ((tl & 0x000000ff) * distixiy + (tr & 0x000000ff) * distxiy + ++ (bl & 0x000000ff) * distixy + (br & 0x000000ff) * distxy); ++ ++ /* Green */ ++ f = ((tl & 0x0000ff00) * distixiy + (tr & 0x0000ff00) * distxiy + ++ (bl & 0x0000ff00) * distixy + (br & 0x0000ff00) * distxy); ++ r |= f & 0xff000000; ++ ++ tl >>= 16; ++ tr >>= 16; ++ bl >>= 16; ++ br >>= 16; ++ r >>= 16; ++ ++ /* Red */ ++ f = ((tl & 0x000000ff) * distixiy + (tr & 0x000000ff) * distxiy + ++ (bl & 0x000000ff) * distixy + (br & 0x000000ff) * distxy); ++ r |= f & 0x00ff0000; ++ ++ /* Alpha */ ++ f = ((tl & 0x0000ff00) * distixiy + (tr & 0x0000ff00) * distxiy + ++ (bl & 0x0000ff00) * distixy + (br & 0x0000ff00) * distxy); ++ r |= f & 0xff000000; ++ ++ return r; ++} ++#endif ++ ++static inline uint32_t convert_pixel(const uint8_t *p, int x) ++{ ++ return ((uint32_t *)p)[x]; ++} ++ ++fast void ++affine_blt(const void *src, void *dst, int bpp, ++ int16_t src_x, int16_t src_y, ++ int16_t src_width, int16_t src_height, ++ int32_t src_stride, ++ int16_t dst_x, int16_t dst_y, ++ uint16_t dst_width, uint16_t dst_height, ++ int32_t dst_stride, ++ const struct pixman_f_transform *t) ++{ ++ static const uint8_t zero[8] = { 0, 0, 0, 0, 0, 0, 0, 0 }; ++ const pixman_fixed_t ux = pixman_double_to_fixed(t->m[0][0]); ++ const pixman_fixed_t uy = pixman_double_to_fixed(t->m[1][0]); ++ int i, j; ++ ++ assert(bpp == 32); ++ ++ for (j = 0; j < dst_height; j++) { ++ pixman_fixed_t x, y; ++ struct pixman_f_vector v; ++ uint32_t *b; ++ ++ /* reference point is the center of the pixel */ ++ v.v[0] = dst_x + 0.5; ++ v.v[1] = dst_y + j + 0.5; ++ v.v[2] = 1.0; ++ ++ pixman_f_transform_point_3d(t, &v); ++ ++ x = pixman_double_to_fixed(v.v[0]); ++ x += pixman_int_to_fixed(src_x - dst_x); ++ y = pixman_double_to_fixed(v.v[1]); ++ y += pixman_int_to_fixed(src_y - dst_y); ++ ++ b = (uint32_t*)((uint8_t *)dst + (dst_y + j) * dst_stride + dst_x * bpp / 8); ++ for (i = 0; i < dst_width; i++) { ++ const uint8_t *row1; ++ const uint8_t *row2; ++ int x1, y1, x2, y2; ++ uint32_t tl, tr, bl, br; ++ int32_t fx, fy; ++ ++ x1 = x - pixman_fixed_1/2; ++ y1 = y - pixman_fixed_1/2; ++ ++ fx = bilinear_weight(x1); ++ fy = bilinear_weight(y1); ++ ++ x1 = pixman_fixed_to_int(x1); ++ x2 = x1 + 1; ++ y1 = pixman_fixed_to_int(y1); ++ y2 = y1 + 1; ++ ++ if (x1 >= src_width || x2 < 0 || ++ y1 >= src_height || y2 < 0) { ++ b[i] = 0; ++ goto next; ++ } ++ ++ if (y2 == 0) { ++ row1 = zero; ++ } else { ++ row1 = (uint8_t *)src + src_stride * y1; ++ row1 += bpp / 8 * x1; ++ } ++ ++ if (y1 == src_height - 1) { ++ row2 = zero; ++ } else { ++ row2 = (uint8_t *)src + src_stride * y2; ++ row2 += bpp / 8 * x1; ++ } ++ ++ if (x2 == 0) { ++ tl = 0; ++ bl = 0; ++ } else { ++ tl = convert_pixel(row1, 0); ++ bl = convert_pixel(row2, 0); ++ } ++ ++ if (x1 == src_width - 1) { ++ tr = 0; ++ br = 0; ++ } else { ++ tr = convert_pixel(row1, 1); ++ br = convert_pixel(row2, 1); ++ } ++ ++ b[i] = bilinear_interpolation(tl, tr, bl, br, fx, fy); ++ ++next: ++ x += ux; ++ y += uy; ++ } ++ } ++} +diff --git a/src/sna/brw/brw_eu_emit.c b/src/sna/brw/brw_eu_emit.c +index 00c984d9..154f939a 100644 +--- a/src/sna/brw/brw_eu_emit.c ++++ b/src/sna/brw/brw_eu_emit.c +@@ -178,7 +178,7 @@ validate_reg(struct brw_instruction *insn, struct brw_reg reg) + } + + if (reg.file == BRW_ARCHITECTURE_REGISTER_FILE && +- reg.file == BRW_ARF_NULL) ++ reg.nr == BRW_ARF_NULL) + return; + + assert(reg.hstride >= 0 && reg.hstride < ARRAY_SIZE(hstride_for_reg)); +@@ -700,7 +700,7 @@ push_if_stack(struct brw_compile *p, struct brw_instruction *inst) + * + * When the matching 'else' instruction is reached (presumably by + * countdown of the instruction count patched in by our ELSE/ENDIF +- * functions), the relevent flags are inverted. ++ * functions), the relevant flags are inverted. + * + * When the matching 'endif' instruction is reached, the flags are + * popped off. If the stack is now empty, normal execution resumes. +diff --git a/src/sna/compiler.h b/src/sna/compiler.h +index ff412179..0f3775ec 100644 +--- a/src/sna/compiler.h ++++ b/src/sna/compiler.h +@@ -39,6 +39,7 @@ + #define pure __attribute__((pure)) + #define tightly_packed __attribute__((__packed__)) + #define flatten __attribute__((flatten)) ++#define nonnull __attribute__((nonnull)) + #define page_aligned __attribute__((aligned(4096))) + #else + #define likely(expr) (expr) +@@ -51,18 +52,15 @@ + #define pure + #define tighly_packed + #define flatten ++#define nonnull + #define page_aligned + #endif + + #define HAS_GCC(major, minor) defined(__GNUC__) && (__GNUC__ > (major) || __GNUC__ == (major) && __GNUC_MINOR__ >= (minor)) + + #if HAS_GCC(4, 5) +-#define sse2 __attribute__((target("sse2,fpmath=sse"))) +-#define sse4_2 __attribute__((target("sse4.2,sse2,fpmath=sse"))) +-#endif +- +-#if HAS_GCC(4, 7) +-#define avx2 __attribute__((target("avx2,sse4.2,sse2,fpmath=sse"))) ++#define sse2 fast __attribute__((target("sse2,fpmath=sse"))) ++#define sse4_2 fast __attribute__((target("sse4.2,sse2,fpmath=sse"))) + #endif + + #if HAS_GCC(4, 6) && defined(__OPTIMIZE__) +@@ -71,10 +69,17 @@ + #define fast + #endif + +-#if HAS_GCC(4, 6) && defined(__OPTIMIZE__) +-#define fast_memcpy __attribute__((optimize("Ofast"))) __attribute__((target("inline-all-stringops"))) +-#elif HAS_GCC(4, 5) && defined(__OPTIMIZE__) +-#define fast_memcpy __attribute__((target("inline-all-stringops"))) ++#if HAS_GCC(4, 7) ++#define avx2 fast __attribute__((target("avx2,avx,sse4.2,sse2,fpmath=sse"))) ++#define assume_aligned(ptr, align) __builtin_assume_aligned((ptr), (align)) ++#define assume_misaligned(ptr, align, offset) __builtin_assume_aligned((ptr), (align), (offset)) ++#else ++#define assume_aligned(ptr, align) (ptr) ++#define assume_misaligned(ptr, align, offset) (ptr) ++#endif ++ ++#if HAS_GCC(4, 5) && defined(__OPTIMIZE__) ++#define fast_memcpy fast __attribute__((target("inline-all-stringops"))) + #else + #define fast_memcpy + #endif +diff --git a/src/sna/fb/fb.h b/src/sna/fb/fb.h +index 8bf9008a..90431747 100644 +--- a/src/sna/fb/fb.h ++++ b/src/sna/fb/fb.h +@@ -24,10 +24,6 @@ + #ifndef FB_H + #define FB_H + +-#ifdef HAVE_CONFIG_H +-#include "config.h" +-#endif +- + #include <xorg-server.h> + #include <servermd.h> + #include <gcstruct.h> +diff --git a/src/sna/fb/fbimage.c b/src/sna/fb/fbimage.c +index 5af23890..cc81c85b 100644 +--- a/src/sna/fb/fbimage.c ++++ b/src/sna/fb/fbimage.c +@@ -229,13 +229,19 @@ fbGetImage(DrawablePtr drawable, + FbBits pm; + + pm = fbReplicatePixel(planeMask, srcBpp); ++ + dstStride = PixmapBytePad(w, drawable->depth); +- if (pm != FB_ALLONES) +- memset(d, 0, dstStride * h); + dstStride /= sizeof(FbStip); ++ + fbBltStip((FbStip *)(src + (y + srcYoff) * srcStride), srcStride, + (x + srcXoff) * srcBpp, +- dst, dstStride, 0, w * srcBpp, h, GXcopy, pm, srcBpp); ++ dst, dstStride, 0, w * srcBpp, h, GXcopy, FB_ALLONES, srcBpp); ++ ++ if (pm != FB_ALLONES) { ++ int i = dstStride * h; ++ while (i--) ++ *dst++ &= pm; ++ } + } else { + dstStride = BitmapBytePad(w) / sizeof(FbStip); + fbBltPlane(src + (y + srcYoff) * srcStride, +diff --git a/src/sna/fb/fbpict.h b/src/sna/fb/fbpict.h +index 932032f9..20877777 100644 +--- a/src/sna/fb/fbpict.h ++++ b/src/sna/fb/fbpict.h +@@ -24,10 +24,6 @@ + #ifndef FBPICT_H + #define FBPICT_H + +-#ifdef HAVE_CONFIG_H +-#include "config.h" +-#endif +- + #include <xorg-server.h> + #include <picturestr.h> + +diff --git a/src/sna/gen2_render.c b/src/sna/gen2_render.c +index 1104f462..49ad16a3 100644 +--- a/src/sna/gen2_render.c ++++ b/src/sna/gen2_render.c +@@ -35,6 +35,7 @@ + #include "sna_reg.h" + #include "sna_render.h" + #include "sna_render_inline.h" ++#include "sna_video.h" + + #include "gen2_render.h" + +@@ -48,6 +49,7 @@ + + #define MAX_3D_SIZE 2048 + #define MAX_3D_PITCH 8192 ++#define MAX_INLINE (1 << 18) + + #define BATCH(v) batch_emit(sna, v) + #define BATCH_F(v) batch_emit_float(sna, v) +@@ -596,39 +598,43 @@ gen2_get_batch(struct sna *sna, const struct sna_composite_op *op) + gen2_emit_invariant(sna); + } + +-static void gen2_emit_target(struct sna *sna, const struct sna_composite_op *op) ++static void gen2_emit_target(struct sna *sna, ++ struct kgem_bo *bo, ++ int width, ++ int height, ++ int format) + { +- assert(!too_large(op->dst.width, op->dst.height)); +- assert(op->dst.bo->pitch >= 8 && op->dst.bo->pitch <= MAX_3D_PITCH); ++ assert(!too_large(width, height)); ++ assert(bo->pitch >= 8 && bo->pitch <= MAX_3D_PITCH); + assert(sna->render.vertex_offset == 0); + +- assert(op->dst.bo->unique_id); +- if (sna->render_state.gen2.target == op->dst.bo->unique_id) { +- kgem_bo_mark_dirty(op->dst.bo); ++ assert(bo->unique_id); ++ if (sna->render_state.gen2.target == bo->unique_id) { ++ kgem_bo_mark_dirty(bo); + return; + } + + BATCH(_3DSTATE_BUF_INFO_CMD); + BATCH(BUF_3D_ID_COLOR_BACK | +- gen2_buf_tiling(op->dst.bo->tiling) | +- BUF_3D_PITCH(op->dst.bo->pitch)); ++ gen2_buf_tiling(bo->tiling) | ++ BUF_3D_PITCH(bo->pitch)); + BATCH(kgem_add_reloc(&sna->kgem, sna->kgem.nbatch, +- op->dst.bo, ++ bo, + I915_GEM_DOMAIN_RENDER << 16 | + I915_GEM_DOMAIN_RENDER, + 0)); + + BATCH(_3DSTATE_DST_BUF_VARS_CMD); +- BATCH(gen2_get_dst_format(op->dst.format)); ++ BATCH(gen2_get_dst_format(format)); + + BATCH(_3DSTATE_DRAW_RECT_CMD); + BATCH(0); + BATCH(0); /* ymin, xmin */ +- BATCH(DRAW_YMAX(op->dst.height - 1) | +- DRAW_XMAX(op->dst.width - 1)); ++ BATCH(DRAW_YMAX(height - 1) | ++ DRAW_XMAX(width - 1)); + BATCH(0); /* yorig, xorig */ + +- sna->render_state.gen2.target = op->dst.bo->unique_id; ++ sna->render_state.gen2.target = bo->unique_id; + } + + static void gen2_disable_logic_op(struct sna *sna) +@@ -701,7 +707,11 @@ static void gen2_emit_composite_state(struct sna *sna, + kgem_clear_dirty(&sna->kgem); + } + +- gen2_emit_target(sna, op); ++ gen2_emit_target(sna, ++ op->dst.bo, ++ op->dst.width, ++ op->dst.height, ++ op->dst.format); + + unwind = sna->kgem.nbatch; + BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | +@@ -1190,7 +1200,13 @@ inline static int gen2_get_rectangles(struct sna *sna, + sna->render.vertex_offset = sna->kgem.nbatch; + BATCH(PRIM3D_INLINE | PRIM3D_RECTLIST); + } +- } ++ ++ need = 0; ++ } else ++ need = sna->kgem.nbatch - sna->render.vertex_offset; ++ ++ if (rem > MAX_INLINE - need) ++ rem = MAX_INLINE -need; + + if (want > 1 && want * size > rem) + want = rem / size; +@@ -1572,12 +1588,12 @@ gen2_composite_picture(struct sna *sna, + if (channel->repeat && + (x >= 0 && + y >= 0 && +- x + w < pixmap->drawable.width && +- y + h < pixmap->drawable.height)) { ++ x + w <= pixmap->drawable.width && ++ y + h <= pixmap->drawable.height)) { + struct sna_pixmap *priv = sna_pixmap(pixmap); + if (priv && priv->clear) { + DBG(("%s: converting large pixmap source into solid [%08x]\n", __FUNCTION__, priv->clear_color)); +- return gen2_composite_solid_init(sna, channel, priv->clear_color); ++ return gen2_composite_solid_init(sna, channel, solid_color(picture->format, priv->clear_color)); + } + } + } else +@@ -1619,7 +1635,9 @@ gen2_composite_set_target(struct sna *sna, + } else + sna_render_picture_extents(dst, &box); + +- hint = PREFER_GPU | FORCE_GPU | RENDER_GPU; ++ hint = PREFER_GPU | RENDER_GPU; ++ if (!need_tiling(sna, op->dst.width, op->dst.height)) ++ hint |= FORCE_GPU; + if (!partial) { + hint |= IGNORE_DAMAGE; + if (w == op->dst.width && h == op->dst.height) +@@ -2423,7 +2441,11 @@ static void gen2_emit_composite_spans_state(struct sna *sna, + uint32_t unwind; + + gen2_get_batch(sna, &op->base); +- gen2_emit_target(sna, &op->base); ++ gen2_emit_target(sna, ++ op->base.dst.bo, ++ op->base.dst.width, ++ op->base.dst.height, ++ op->base.dst.format); + + unwind = sna->kgem.nbatch; + BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | +@@ -2706,7 +2728,11 @@ static void gen2_emit_fill_composite_state(struct sna *sna, + uint32_t ls1; + + gen2_get_batch(sna, op); +- gen2_emit_target(sna, op); ++ gen2_emit_target(sna, ++ op->dst.bo, ++ op->dst.width, ++ op->dst.height, ++ op->dst.format); + + ls1 = sna->kgem.nbatch; + BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | +@@ -2868,7 +2894,11 @@ static void gen2_emit_fill_state(struct sna *sna, + uint32_t ls1; + + gen2_get_batch(sna, op); +- gen2_emit_target(sna, op); ++ gen2_emit_target(sna, ++ op->dst.bo, ++ op->dst.width, ++ op->dst.height, ++ op->dst.format); + + ls1 = sna->kgem.nbatch; + BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | +@@ -3102,6 +3132,276 @@ gen2_render_fill_one(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo, + } + + static void ++gen2_emit_video_state(struct sna *sna, ++ struct sna_video *video, ++ struct sna_video_frame *frame, ++ PixmapPtr pixmap, ++ struct kgem_bo *dst_bo, ++ int width, int height, ++ bool bilinear) ++{ ++ uint32_t ms1, v, unwind; ++ ++ gen2_emit_target(sna, dst_bo, width, height, ++ sna_format_for_depth(pixmap->drawable.depth)); ++ ++ unwind = sna->kgem.nbatch; ++ BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | ++ I1_LOAD_S(2) | I1_LOAD_S(3) | I1_LOAD_S(8) | 2); ++ BATCH(1 << 12); ++ BATCH(S3_CULLMODE_NONE | S3_VERTEXHAS_XY); ++ BATCH(S8_ENABLE_COLOR_BUFFER_WRITE); ++ if (memcmp(sna->kgem.batch + sna->render_state.gen2.ls1 + 1, ++ sna->kgem.batch + unwind + 1, ++ 3 * sizeof(uint32_t)) == 0) ++ sna->kgem.nbatch = unwind; ++ else ++ sna->render_state.gen2.ls1 = unwind; ++ ++ gen2_disable_logic_op(sna); ++ ++ unwind = sna->kgem.nbatch; ++ BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_2 | ++ LOAD_TEXTURE_BLEND_STAGE(0) | 1); ++ BATCH(TB0C_LAST_STAGE | TB0C_RESULT_SCALE_1X | TB0C_OUTPUT_WRITE_CURRENT | ++ TB0C_OP_ARG1 | TB0C_ARG1_SEL_TEXEL0); ++ BATCH(TB0A_RESULT_SCALE_1X | TB0A_OUTPUT_WRITE_CURRENT | ++ TB0A_OP_ARG1 | TB0A_ARG1_SEL_ONE); ++ if (memcmp(sna->kgem.batch + sna->render_state.gen2.ls2 + 1, ++ sna->kgem.batch + unwind + 1, ++ 2 * sizeof(uint32_t)) == 0) ++ sna->kgem.nbatch = unwind; ++ else ++ sna->render_state.gen2.ls2 = unwind; ++ ++ BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_2 | LOAD_TEXTURE_MAP(0) | 4); ++ BATCH(kgem_add_reloc(&sna->kgem, sna->kgem.nbatch, ++ frame->bo, ++ I915_GEM_DOMAIN_SAMPLER << 16, ++ 0)); ++ ms1 = MAPSURF_422 | TM0S1_COLORSPACE_CONVERSION; ++ switch (frame->id) { ++ case FOURCC_YUY2: ++ ms1 |= MT_422_YCRCB_NORMAL; ++ break; ++ case FOURCC_UYVY: ++ ms1 |= MT_422_YCRCB_SWAPY; ++ break; ++ } ++ BATCH(((frame->height - 1) << TM0S1_HEIGHT_SHIFT) | ++ ((frame->width - 1) << TM0S1_WIDTH_SHIFT) | ++ ms1 | ++ gen2_sampler_tiling_bits(frame->bo->tiling)); ++ BATCH((frame->pitch[0] / 4 - 1) << TM0S2_PITCH_SHIFT | TM0S2_MAP_2D); ++ if (bilinear) ++ BATCH(FILTER_LINEAR << TM0S3_MAG_FILTER_SHIFT | ++ FILTER_LINEAR << TM0S3_MIN_FILTER_SHIFT | ++ MIPFILTER_NONE << TM0S3_MIP_FILTER_SHIFT); ++ else ++ BATCH(FILTER_NEAREST << TM0S3_MAG_FILTER_SHIFT | ++ FILTER_NEAREST << TM0S3_MIN_FILTER_SHIFT | ++ MIPFILTER_NONE << TM0S3_MIP_FILTER_SHIFT); ++ BATCH(0); /* default color */ ++ ++ BATCH(_3DSTATE_MAP_COORD_SET_CMD | TEXCOORD_SET(0) | ++ ENABLE_TEXCOORD_PARAMS | TEXCOORDS_ARE_NORMAL | TEXCOORDTYPE_CARTESIAN | ++ ENABLE_ADDR_V_CNTL | TEXCOORD_ADDR_V_MODE(TEXCOORDMODE_CLAMP) | ++ ENABLE_ADDR_U_CNTL | TEXCOORD_ADDR_U_MODE(TEXCOORDMODE_CLAMP)); ++ ++ v = _3DSTATE_VERTEX_FORMAT_2_CMD | TEXCOORDFMT_2D; ++ if (sna->render_state.gen2.vft != v) { ++ BATCH(v); ++ sna->render_state.gen2.vft = v; ++ } ++} ++ ++static void ++gen2_video_get_batch(struct sna *sna, struct kgem_bo *bo) ++{ ++ kgem_set_mode(&sna->kgem, KGEM_RENDER, bo); ++ ++ if (!kgem_check_batch(&sna->kgem, 120) || ++ !kgem_check_reloc(&sna->kgem, 4) || ++ !kgem_check_exec(&sna->kgem, 2)) { ++ _kgem_submit(&sna->kgem); ++ _kgem_set_mode(&sna->kgem, KGEM_RENDER); ++ } ++ ++ if (sna->render_state.gen2.need_invariant) ++ gen2_emit_invariant(sna); ++} ++ ++static int ++gen2_get_inline_rectangles(struct sna *sna, int want, int floats_per_vertex) ++{ ++ int size = floats_per_vertex * 3; ++ int rem = batch_space(sna) - 1; ++ ++ if (rem > MAX_INLINE) ++ rem = MAX_INLINE; ++ ++ if (size * want > rem) ++ want = rem / size; ++ ++ return want; ++} ++ ++static bool ++gen2_render_video(struct sna *sna, ++ struct sna_video *video, ++ struct sna_video_frame *frame, ++ RegionPtr dstRegion, ++ PixmapPtr pixmap) ++{ ++ struct sna_pixmap *priv = sna_pixmap(pixmap); ++ const BoxRec *pbox = region_rects(dstRegion); ++ int nbox = region_num_rects(dstRegion); ++ int dst_width = dstRegion->extents.x2 - dstRegion->extents.x1; ++ int dst_height = dstRegion->extents.y2 - dstRegion->extents.y1; ++ int src_width = frame->src.x2 - frame->src.x1; ++ int src_height = frame->src.y2 - frame->src.y1; ++ float src_offset_x, src_offset_y; ++ float src_scale_x, src_scale_y; ++ int pix_xoff, pix_yoff; ++ struct kgem_bo *dst_bo; ++ bool bilinear; ++ int copy = 0; ++ ++ DBG(("%s: src:%dx%d (frame:%dx%d) -> dst:%dx%d\n", __FUNCTION__, ++ src_width, src_height, frame->width, frame->height, dst_width, dst_height)); ++ ++ assert(priv->gpu_bo); ++ dst_bo = priv->gpu_bo; ++ ++ bilinear = src_width != dst_width || src_height != dst_height; ++ ++ src_scale_x = (float)src_width / dst_width / frame->width; ++ src_offset_x = (float)frame->src.x1 / frame->width - dstRegion->extents.x1 * src_scale_x; ++ ++ src_scale_y = (float)src_height / dst_height / frame->height; ++ src_offset_y = (float)frame->src.y1 / frame->height - dstRegion->extents.y1 * src_scale_y; ++ DBG(("%s: src offset (%f, %f), scale (%f, %f)\n", ++ __FUNCTION__, src_offset_x, src_offset_y, src_scale_x, src_scale_y)); ++ ++ if (too_large(pixmap->drawable.width, pixmap->drawable.height) || ++ dst_bo->pitch > MAX_3D_PITCH) { ++ int bpp = pixmap->drawable.bitsPerPixel; ++ ++ if (too_large(dst_width, dst_height)) ++ return false; ++ ++ dst_bo = kgem_create_2d(&sna->kgem, ++ dst_width, dst_height, bpp, ++ kgem_choose_tiling(&sna->kgem, ++ I915_TILING_X, ++ dst_width, dst_height, bpp), ++ 0); ++ if (!dst_bo) ++ return false; ++ ++ pix_xoff = -dstRegion->extents.x1; ++ pix_yoff = -dstRegion->extents.y1; ++ copy = 1; ++ } else { ++ /* Set up the offset for translating from the given region ++ * (in screen coordinates) to the backing pixmap. ++ */ ++#ifdef COMPOSITE ++ pix_xoff = -pixmap->screen_x + pixmap->drawable.x; ++ pix_yoff = -pixmap->screen_y + pixmap->drawable.y; ++#else ++ pix_xoff = 0; ++ pix_yoff = 0; ++#endif ++ ++ dst_width = pixmap->drawable.width; ++ dst_height = pixmap->drawable.height; ++ } ++ ++ gen2_video_get_batch(sna, dst_bo); ++ gen2_emit_video_state(sna, video, frame, pixmap, ++ dst_bo, dst_width, dst_height, bilinear); ++ do { ++ int nbox_this_time = gen2_get_inline_rectangles(sna, nbox, 4); ++ if (nbox_this_time == 0) { ++ gen2_video_get_batch(sna, dst_bo); ++ gen2_emit_video_state(sna, video, frame, pixmap, ++ dst_bo, dst_width, dst_height, bilinear); ++ nbox_this_time = gen2_get_inline_rectangles(sna, nbox, 4); ++ assert(nbox_this_time); ++ } ++ nbox -= nbox_this_time; ++ ++ BATCH(PRIM3D_INLINE | PRIM3D_RECTLIST | ++ ((12 * nbox_this_time) - 1)); ++ do { ++ int box_x1 = pbox->x1; ++ int box_y1 = pbox->y1; ++ int box_x2 = pbox->x2; ++ int box_y2 = pbox->y2; ++ ++ pbox++; ++ ++ DBG(("%s: dst (%d, %d), (%d, %d) + (%d, %d); src (%f, %f), (%f, %f)\n", ++ __FUNCTION__, box_x1, box_y1, box_x2, box_y2, pix_xoff, pix_yoff, ++ box_x1 * src_scale_x + src_offset_x, ++ box_y1 * src_scale_y + src_offset_y, ++ box_x2 * src_scale_x + src_offset_x, ++ box_y2 * src_scale_y + src_offset_y)); ++ ++ /* bottom right */ ++ BATCH_F(box_x2 + pix_xoff); ++ BATCH_F(box_y2 + pix_yoff); ++ BATCH_F(box_x2 * src_scale_x + src_offset_x); ++ BATCH_F(box_y2 * src_scale_y + src_offset_y); ++ ++ /* bottom left */ ++ BATCH_F(box_x1 + pix_xoff); ++ BATCH_F(box_y2 + pix_yoff); ++ BATCH_F(box_x1 * src_scale_x + src_offset_x); ++ BATCH_F(box_y2 * src_scale_y + src_offset_y); ++ ++ /* top left */ ++ BATCH_F(box_x1 + pix_xoff); ++ BATCH_F(box_y1 + pix_yoff); ++ BATCH_F(box_x1 * src_scale_x + src_offset_x); ++ BATCH_F(box_y1 * src_scale_y + src_offset_y); ++ } while (--nbox_this_time); ++ } while (nbox); ++ ++ if (copy) { ++#ifdef COMPOSITE ++ pix_xoff = -pixmap->screen_x + pixmap->drawable.x; ++ pix_yoff = -pixmap->screen_y + pixmap->drawable.y; ++#else ++ pix_xoff = 0; ++ pix_yoff = 0; ++#endif ++ sna_blt_copy_boxes(sna, GXcopy, ++ dst_bo, -dstRegion->extents.x1, -dstRegion->extents.y1, ++ priv->gpu_bo, pix_xoff, pix_yoff, ++ pixmap->drawable.bitsPerPixel, ++ region_rects(dstRegion), ++ region_num_rects(dstRegion)); ++ ++ kgem_bo_destroy(&sna->kgem, dst_bo); ++ } ++ ++ if (!DAMAGE_IS_ALL(priv->gpu_damage)) { ++ if ((pix_xoff | pix_yoff) == 0) { ++ sna_damage_add(&priv->gpu_damage, dstRegion); ++ } else { ++ sna_damage_add_boxes(&priv->gpu_damage, ++ region_rects(dstRegion), ++ region_num_rects(dstRegion), ++ pix_xoff, pix_yoff); ++ } ++ } ++ ++ return true; ++} ++ ++static void + gen2_render_copy_setup_source(struct sna_composite_channel *channel, + const DrawableRec *draw, + struct kgem_bo *bo) +@@ -3176,7 +3476,11 @@ static void gen2_emit_copy_state(struct sna *sna, const struct sna_composite_op + PIPELINE_FLUSH_TEXTURE_CACHE); + kgem_clear_dirty(&sna->kgem); + } +- gen2_emit_target(sna, op); ++ gen2_emit_target(sna, ++ op->dst.bo, ++ op->dst.width, ++ op->dst.height, ++ op->dst.format); + + ls1 = sna->kgem.nbatch; + BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | +@@ -3511,7 +3815,7 @@ const char *gen2_render_init(struct sna *sna, const char *backend) + render->copy = gen2_render_copy; + render->copy_boxes = gen2_render_copy_boxes; + +- /* XXX YUV color space conversion for video? */ ++ render->video = gen2_render_video; + + render->reset = gen2_render_reset; + render->flush = gen2_render_flush; +diff --git a/src/sna/gen3_render.c b/src/sna/gen3_render.c +index 78289f00..4459a562 100644 +--- a/src/sna/gen3_render.c ++++ b/src/sna/gen3_render.c +@@ -448,14 +448,14 @@ gen3_emit_composite_boxes_constant(const struct sna_composite_op *op, + float *v) + { + do { +- v[0] = box->x2; +- v[1] = box->y2; ++ v[0] = box->x2 + op->dst.x; ++ v[1] = box->y2 + op->dst.y; + +- v[2] = box->x1; +- v[3] = box->y2; ++ v[2] = box->x1 + op->dst.x; ++ v[3] = box->y2 + op->dst.y; + +- v[4] = box->x1; +- v[5] = box->y1; ++ v[4] = box->x1 + op->dst.x; ++ v[5] = box->y1 + op->dst.y; + + box++; + v += 6; +@@ -494,18 +494,18 @@ gen3_emit_composite_boxes_identity_gradient(const struct sna_composite_op *op, + float *v) + { + do { +- v[0] = box->x2; +- v[1] = box->y2; ++ v[0] = box->x2 + op->dst.x; ++ v[1] = box->y2 + op->dst.y; + v[2] = box->x2 + op->src.offset[0]; + v[3] = box->y2 + op->src.offset[1]; + +- v[4] = box->x1; +- v[5] = box->y2; ++ v[4] = box->x1 + op->dst.x; ++ v[5] = box->y2 + op->dst.y; + v[6] = box->x1 + op->src.offset[0]; + v[7] = box->y2 + op->src.offset[1]; + +- v[8] = box->x1; +- v[9] = box->y1; ++ v[8] = box->x1 + op->dst.x; ++ v[9] = box->y1 + op->dst.y; + v[10] = box->x1 + op->src.offset[0]; + v[11] = box->y1 + op->src.offset[1]; + +@@ -531,6 +531,7 @@ gen3_emit_composite_primitive_affine_gradient(struct sna *sna, + + v = sna->render.vertices + sna->render.vertex_used; + sna->render.vertex_used += 12; ++ assert(sna->render.vertex_used <= sna->render.vertex_size); + + v[0] = dst_x + r->width; + v[1] = dst_y + r->height; +@@ -559,22 +560,22 @@ gen3_emit_composite_boxes_affine_gradient(const struct sna_composite_op *op, + const PictTransform *transform = op->src.transform; + + do { +- v[0] = box->x2; +- v[1] = box->y2; ++ v[0] = box->x2 + op->dst.x; ++ v[1] = box->y2 + op->dst.y; + _sna_get_transformed_scaled(box->x2 + op->src.offset[0], + box->y2 + op->src.offset[1], + transform, op->src.scale, + &v[2], &v[3]); + +- v[4] = box->x1; +- v[5] = box->y2; ++ v[4] = box->x1 + op->dst.x; ++ v[5] = box->y2 + op->dst.y; + _sna_get_transformed_scaled(box->x1 + op->src.offset[0], + box->y2 + op->src.offset[1], + transform, op->src.scale, + &v[6], &v[7]); + +- v[8] = box->x1; +- v[9] = box->y1; ++ v[8] = box->x1 + op->dst.x; ++ v[9] = box->y1 + op->dst.y; + _sna_get_transformed_scaled(box->x1 + op->src.offset[0], + box->y1 + op->src.offset[1], + transform, op->src.scale, +@@ -596,6 +597,7 @@ gen3_emit_composite_primitive_identity_source(struct sna *sna, + + v = sna->render.vertices + sna->render.vertex_used; + sna->render.vertex_used += 12; ++ assert(sna->render.vertex_used <= sna->render.vertex_size); + + v[8] = v[4] = r->dst.x + op->dst.x; + v[0] = v[4] + w; +@@ -643,6 +645,7 @@ gen3_emit_composite_primitive_identity_source_no_offset(struct sna *sna, + + v = sna->render.vertices + sna->render.vertex_used; + sna->render.vertex_used += 12; ++ assert(sna->render.vertex_used <= sna->render.vertex_size); + + v[8] = v[4] = r->dst.x; + v[9] = r->dst.y; +@@ -693,6 +696,7 @@ gen3_emit_composite_primitive_affine_source(struct sna *sna, + + v = sna->render.vertices + sna->render.vertex_used; + sna->render.vertex_used += 12; ++ assert(sna->render.vertex_used <= sna->render.vertex_size); + + v[0] = dst_x + r->width; + v[5] = v[1] = dst_y + r->height; +@@ -720,10 +724,10 @@ gen3_emit_composite_boxes_affine_source(const struct sna_composite_op *op, + const PictTransform *transform = op->src.transform; + + do { +- v[0] = box->x2; +- v[5] = v[1] = box->y2; +- v[8] = v[4] = box->x1; +- v[9] = box->y1; ++ v[0] = box->x2 + op->dst.x; ++ v[5] = v[1] = box->y2 + op->dst.y; ++ v[8] = v[4] = box->x1 + op->dst.x; ++ v[9] = box->y1 + op->dst.y; + + _sna_get_transformed_scaled(box->x2 + op->src.offset[0], + box->y2 + op->src.offset[1], +@@ -756,6 +760,7 @@ gen3_emit_composite_primitive_constant_identity_mask(struct sna *sna, + + v = sna->render.vertices + sna->render.vertex_used; + sna->render.vertex_used += 12; ++ assert(sna->render.vertex_used <= sna->render.vertex_size); + + v[8] = v[4] = r->dst.x + op->dst.x; + v[0] = v[4] + w; +@@ -781,6 +786,7 @@ gen3_emit_composite_primitive_constant_identity_mask_no_offset(struct sna *sna, + + v = sna->render.vertices + sna->render.vertex_used; + sna->render.vertex_used += 12; ++ assert(sna->render.vertex_used <= sna->render.vertex_size); + + v[8] = v[4] = r->dst.x; + v[9] = r->dst.y; +@@ -817,6 +823,7 @@ gen3_emit_composite_primitive_identity_source_mask(struct sna *sna, + + v = sna->render.vertices + sna->render.vertex_used; + sna->render.vertex_used += 18; ++ assert(sna->render.vertex_used <= sna->render.vertex_size); + + v[0] = dst_x + w; + v[1] = dst_y + h; +@@ -862,6 +869,7 @@ gen3_emit_composite_primitive_affine_source_mask(struct sna *sna, + + v = sna->render.vertices + sna->render.vertex_used; + sna->render.vertex_used += 18; ++ assert(sna->render.vertex_used <= sna->render.vertex_size); + + v[0] = dst_x + w; + v[1] = dst_y + h; +@@ -978,6 +986,7 @@ gen3_emit_composite_primitive_constant__sse2(struct sna *sna, + + v = sna->render.vertices + sna->render.vertex_used; + sna->render.vertex_used += 6; ++ assert(sna->render.vertex_used <= sna->render.vertex_size); + + v[4] = v[2] = r->dst.x + op->dst.x; + v[5] = r->dst.y + op->dst.y; +@@ -993,10 +1002,10 @@ gen3_emit_composite_boxes_constant__sse2(const struct sna_composite_op *op, + float *v) + { + do { +- v[0] = box->x2; +- v[3] = v[1] = box->y2; +- v[4] = v[2] = box->x1; +- v[5] = box->y1; ++ v[0] = box->x2 + op->dst.x; ++ v[3] = v[1] = box->y2 + op->dst.y; ++ v[4] = v[2] = box->x1 + op->dst.x; ++ v[5] = box->y1 + op->dst.y; + + box++; + v += 6; +@@ -1013,6 +1022,7 @@ gen3_emit_composite_primitive_identity_gradient__sse2(struct sna *sna, + + v = sna->render.vertices + sna->render.vertex_used; + sna->render.vertex_used += 12; ++ assert(sna->render.vertex_used <= sna->render.vertex_size); + + x = r->dst.x + op->dst.x; + y = r->dst.y + op->dst.y; +@@ -1035,10 +1045,10 @@ gen3_emit_composite_boxes_identity_gradient__sse2(const struct sna_composite_op + float *v) + { + do { +- v[0] = box->x2; +- v[5] = v[1] = box->y2; +- v[8] = v[4] = box->x1; +- v[9] = box->y1; ++ v[0] = box->x2 + op->dst.x; ++ v[5] = v[1] = box->y2 + op->dst.y; ++ v[8] = v[4] = box->x1 + op->dst.x; ++ v[9] = box->y1 + op->dst.y; + + v[2] = box->x2 + op->src.offset[0]; + v[7] = v[3] = box->y2 + op->src.offset[1]; +@@ -1067,6 +1077,7 @@ gen3_emit_composite_primitive_affine_gradient__sse2(struct sna *sna, + + v = sna->render.vertices + sna->render.vertex_used; + sna->render.vertex_used += 12; ++ assert(sna->render.vertex_used <= sna->render.vertex_size); + + v[0] = dst_x + r->width; + v[1] = dst_y + r->height; +@@ -1095,22 +1106,22 @@ gen3_emit_composite_boxes_affine_gradient__sse2(const struct sna_composite_op *o + const PictTransform *transform = op->src.transform; + + do { +- v[0] = box->x2; +- v[1] = box->y2; ++ v[0] = box->x2 + op->dst.x; ++ v[1] = box->y2 + op->dst.y; + _sna_get_transformed_scaled(box->x2 + op->src.offset[0], + box->y2 + op->src.offset[1], + transform, op->src.scale, + &v[2], &v[3]); + +- v[4] = box->x1; +- v[5] = box->y2; ++ v[4] = box->x1 + op->dst.x; ++ v[5] = box->y2 + op->dst.y; + _sna_get_transformed_scaled(box->x1 + op->src.offset[0], + box->y2 + op->src.offset[1], + transform, op->src.scale, + &v[6], &v[7]); + +- v[8] = box->x1; +- v[9] = box->y1; ++ v[8] = box->x1 + op->dst.x; ++ v[9] = box->y1 + op->dst.y; + _sna_get_transformed_scaled(box->x1 + op->src.offset[0], + box->y1 + op->src.offset[1], + transform, op->src.scale, +@@ -1132,6 +1143,7 @@ gen3_emit_composite_primitive_identity_source__sse2(struct sna *sna, + + v = sna->render.vertices + sna->render.vertex_used; + sna->render.vertex_used += 12; ++ assert(sna->render.vertex_used <= sna->render.vertex_size); + + v[8] = v[4] = r->dst.x + op->dst.x; + v[0] = v[4] + w; +@@ -1179,6 +1191,7 @@ gen3_emit_composite_primitive_identity_source_no_offset__sse2(struct sna *sna, + + v = sna->render.vertices + sna->render.vertex_used; + sna->render.vertex_used += 12; ++ assert(sna->render.vertex_used <= sna->render.vertex_size); + + v[8] = v[4] = r->dst.x; + v[9] = r->dst.y; +@@ -1227,8 +1240,12 @@ gen3_emit_composite_primitive_affine_source__sse2(struct sna *sna, + int src_y = r->src.y + (int)op->src.offset[1]; + float *v; + ++ DBG(("%s: src=(%d, %d), dst=(%d, %d), size=%dx%d\n", ++ __FUNCTION__, src_x, src_y, dst_x, dst_y, r->width, r->height)); ++ + v = sna->render.vertices + sna->render.vertex_used; + sna->render.vertex_used += 12; ++ assert(sna->render.vertex_used <= sna->render.vertex_size); + + v[0] = dst_x + r->width; + v[5] = v[1] = dst_y + r->height; +@@ -1256,10 +1273,13 @@ gen3_emit_composite_boxes_affine_source__sse2(const struct sna_composite_op *op, + const PictTransform *transform = op->src.transform; + + do { +- v[0] = box->x2; +- v[5] = v[1] = box->y2; +- v[8] = v[4] = box->x1; +- v[9] = box->y1; ++ DBG(("%s: box=(%d, %d), (%d, %d), src.offset=(%d, %d)\n", ++ __FUNCTION__, box->x1, box->y1, box->x2, box->y2, op->src.offset[0], op->src.offset[1])); ++ ++ v[0] = box->x2 + op->dst.x; ++ v[5] = v[1] = box->y2 + op->dst.y; ++ v[8] = v[4] = box->x1 + op->dst.x; ++ v[9] = box->y1 + op->dst.y; + + _sna_get_transformed_scaled(box->x2 + op->src.offset[0], + box->y2 + op->src.offset[1], +@@ -1292,6 +1312,7 @@ gen3_emit_composite_primitive_constant_identity_mask__sse2(struct sna *sna, + + v = sna->render.vertices + sna->render.vertex_used; + sna->render.vertex_used += 12; ++ assert(sna->render.vertex_used <= sna->render.vertex_size); + + v[8] = v[4] = r->dst.x + op->dst.x; + v[0] = v[4] + w; +@@ -1317,6 +1338,7 @@ gen3_emit_composite_primitive_constant_identity_mask_no_offset__sse2(struct sna + + v = sna->render.vertices + sna->render.vertex_used; + sna->render.vertex_used += 12; ++ assert(sna->render.vertex_used <= sna->render.vertex_size); + + v[8] = v[4] = r->dst.x; + v[9] = r->dst.y; +@@ -1353,6 +1375,7 @@ gen3_emit_composite_primitive_identity_source_mask__sse2(struct sna *sna, + + v = sna->render.vertices + sna->render.vertex_used; + sna->render.vertex_used += 18; ++ assert(sna->render.vertex_used <= sna->render.vertex_size); + + v[0] = dst_x + w; + v[1] = dst_y + h; +@@ -1398,6 +1421,7 @@ gen3_emit_composite_primitive_affine_source_mask__sse2(struct sna *sna, + + v = sna->render.vertices + sna->render.vertex_used; + sna->render.vertex_used += 18; ++ assert(sna->render.vertex_used <= sna->render.vertex_size); + + v[0] = dst_x + w; + v[1] = dst_y + h; +@@ -2233,6 +2257,7 @@ static void gen3_vertex_flush(struct sna *sna) + static int gen3_vertex_finish(struct sna *sna) + { + struct kgem_bo *bo; ++ unsigned hint, size; + + DBG(("%s: used=%d/%d, vbo active? %d\n", + __FUNCTION__, sna->render.vertex_used, sna->render.vertex_size, +@@ -2243,6 +2268,7 @@ static int gen3_vertex_finish(struct sna *sna) + + sna_vertex_wait__locked(&sna->render); + ++ hint = CREATE_GTT_MAP; + bo = sna->render.vbo; + if (bo) { + DBG(("%s: reloc = %d\n", __FUNCTION__, +@@ -2251,7 +2277,7 @@ static int gen3_vertex_finish(struct sna *sna) + if (sna->render.vertex_reloc[0]) { + sna->kgem.batch[sna->render.vertex_reloc[0]] = + kgem_add_reloc(&sna->kgem, sna->render.vertex_reloc[0], +- bo, I915_GEM_DOMAIN_VERTEX << 16, 0); ++ bo, I915_GEM_DOMAIN_VERTEX << 16 | KGEM_RELOC_FENCED, 0); + + sna->render.vertex_reloc[0] = 0; + } +@@ -2260,17 +2286,29 @@ static int gen3_vertex_finish(struct sna *sna) + sna->render.vbo = NULL; + + kgem_bo_destroy(&sna->kgem, bo); ++ hint |= CREATE_CACHED | CREATE_NO_THROTTLE; + } + ++ size = 256*1024; + sna->render.vertices = NULL; +- sna->render.vbo = kgem_create_linear(&sna->kgem, +- 256*1024, CREATE_GTT_MAP); +- if (sna->render.vbo) ++ sna->render.vbo = kgem_create_linear(&sna->kgem, size, hint); ++ while (sna->render.vbo == NULL && size > sizeof(sna->render.vertex_data)) { ++ size /= 2; ++ sna->render.vbo = kgem_create_linear(&sna->kgem, size, hint); ++ } ++ if (sna->render.vbo == NULL) ++ sna->render.vbo = kgem_create_linear(&sna->kgem, ++ 256*1024, CREATE_GTT_MAP); ++ if (sna->render.vbo && ++ kgem_check_bo(&sna->kgem, sna->render.vbo, NULL)) + sna->render.vertices = kgem_bo_map(&sna->kgem, sna->render.vbo); + if (sna->render.vertices == NULL) { +- if (sna->render.vbo) ++ if (sna->render.vbo) { + kgem_bo_destroy(&sna->kgem, sna->render.vbo); +- sna->render.vbo = NULL; ++ sna->render.vbo = NULL; ++ } ++ sna->render.vertices = sna->render.vertex_data; ++ sna->render.vertex_size = ARRAY_SIZE(sna->render.vertex_data); + return 0; + } + assert(sna->render.vbo->snoop == false); +@@ -2280,8 +2318,14 @@ static int gen3_vertex_finish(struct sna *sna) + sna->render.vertex_data, + sizeof(float)*sna->render.vertex_used); + } +- sna->render.vertex_size = 64 * 1024 - 1; +- return sna->render.vertex_size - sna->render.vertex_used; ++ ++ size = __kgem_bo_size(sna->render.vbo)/4; ++ if (size >= UINT16_MAX) ++ size = UINT16_MAX - 1; ++ assert(size > sna->render.vertex_used); ++ ++ sna->render.vertex_size = size; ++ return size - sna->render.vertex_used; + } + + static void gen3_vertex_close(struct sna *sna) +@@ -2345,7 +2389,7 @@ static void gen3_vertex_close(struct sna *sna) + DBG(("%s: reloc = %d\n", __FUNCTION__, sna->render.vertex_reloc[0])); + sna->kgem.batch[sna->render.vertex_reloc[0]] = + kgem_add_reloc(&sna->kgem, sna->render.vertex_reloc[0], +- bo, I915_GEM_DOMAIN_VERTEX << 16, delta); ++ bo, I915_GEM_DOMAIN_VERTEX << 16 | KGEM_RELOC_FENCED, delta); + sna->render.vertex_reloc[0] = 0; + + if (sna->render.vbo == NULL) { +@@ -2580,6 +2624,7 @@ gen3_render_composite_boxes(struct sna *sna, + + v = sna->render.vertices + sna->render.vertex_used; + sna->render.vertex_used += nbox_this_time * op->floats_per_rect; ++ assert(sna->render.vertex_used <= sna->render.vertex_size); + + op->emit_boxes(op, box, nbox_this_time, v); + box += nbox_this_time; +@@ -2604,6 +2649,7 @@ gen3_render_composite_boxes__thread(struct sna *sna, + + v = sna->render.vertices + sna->render.vertex_used; + sna->render.vertex_used += nbox_this_time * op->floats_per_rect; ++ assert(sna->render.vertex_used <= sna->render.vertex_size); + + sna_vertex_acquire__locked(&sna->render); + sna_vertex_unlock(&sna->render); +@@ -3065,7 +3111,7 @@ gen3_composite_picture(struct sna *sna, + + if (sna_picture_is_clear(picture, x, y, w, h, &color)) { + DBG(("%s: clear drawable [%08x]\n", __FUNCTION__, color)); +- return gen3_init_solid(channel, color_convert(color, picture->format, PICT_a8r8g8b8)); ++ return gen3_init_solid(channel, solid_color(picture->format, color)); + } + + if (!gen3_check_repeat(picture)) +@@ -3097,12 +3143,12 @@ gen3_composite_picture(struct sna *sna, + if (channel->repeat || + (x >= 0 && + y >= 0 && +- x + w < pixmap->drawable.width && +- y + h < pixmap->drawable.height)) { ++ x + w <= pixmap->drawable.width && ++ y + h <= pixmap->drawable.height)) { + struct sna_pixmap *priv = sna_pixmap(pixmap); + if (priv && priv->clear) { + DBG(("%s: converting large pixmap source into solid [%08x]\n", __FUNCTION__, priv->clear_color)); +- return gen3_init_solid(channel, priv->clear_color); ++ return gen3_init_solid(channel, solid_color(picture->format, priv->clear_color)); + } + } + } else { +@@ -3182,7 +3228,9 @@ gen3_composite_set_target(struct sna *sna, + } else + sna_render_picture_extents(dst, &box); + +- hint = PREFER_GPU | FORCE_GPU | RENDER_GPU; ++ hint = PREFER_GPU | RENDER_GPU; ++ if (!need_tiling(sna, op->dst.width, op->dst.height)) ++ hint |= FORCE_GPU; + if (!partial) { + hint |= IGNORE_DAMAGE; + if (w == op->dst.width && h == op->dst.height) +@@ -3645,8 +3693,11 @@ gen3_render_composite(struct sna *sna, + } + } + } +- DBG(("%s: final src/mask type=%d/%d, affine=%d/%d\n", __FUNCTION__, ++ DBG(("%s: final src/mask type=%d/%d [constant? %d/%d], transform? %d/%d, affine=%d/%d\n", __FUNCTION__, + tmp->src.u.gen3.type, tmp->mask.u.gen3.type, ++ is_constant_ps(tmp->src.u.gen3.type), ++ is_constant_ps(tmp->mask.u.gen3.type), ++ !!tmp->src.transform, !!tmp->mask.transform, + tmp->src.is_affine, tmp->mask.is_affine)); + + tmp->prim_emit = gen3_emit_composite_primitive; +@@ -3862,6 +3913,7 @@ gen3_emit_composite_spans_primitive_zero(struct sna *sna, + { + float *v = sna->render.vertices + sna->render.vertex_used; + sna->render.vertex_used += 6; ++ assert(sna->render.vertex_used <= sna->render.vertex_size); + + v[0] = op->base.dst.x + box->x2; + v[1] = op->base.dst.y + box->y2; +@@ -3901,6 +3953,7 @@ gen3_emit_composite_spans_primitive_zero_no_offset(struct sna *sna, + { + float *v = sna->render.vertices + sna->render.vertex_used; + sna->render.vertex_used += 6; ++ assert(sna->render.vertex_used <= sna->render.vertex_size); + + v[0] = box->x2; + v[3] = v[1] = box->y2; +@@ -3932,6 +3985,7 @@ gen3_emit_composite_spans_primitive_constant(struct sna *sna, + { + float *v = sna->render.vertices + sna->render.vertex_used; + sna->render.vertex_used += 9; ++ assert(sna->render.vertex_used <= sna->render.vertex_size); + + v[0] = op->base.dst.x + box->x2; + v[6] = v[3] = op->base.dst.x + box->x1; +@@ -3966,6 +4020,7 @@ gen3_emit_composite_spans_primitive_constant_no_offset(struct sna *sna, + { + float *v = sna->render.vertices + sna->render.vertex_used; + sna->render.vertex_used += 9; ++ assert(sna->render.vertex_used <= sna->render.vertex_size); + + v[0] = box->x2; + v[6] = v[3] = box->x1; +@@ -3999,6 +4054,7 @@ gen3_emit_composite_spans_primitive_identity_source(struct sna *sna, + { + float *v = sna->render.vertices + sna->render.vertex_used; + sna->render.vertex_used += 15; ++ assert(sna->render.vertex_used <= sna->render.vertex_size); + + v[0] = op->base.dst.x + box->x2; + v[1] = op->base.dst.y + box->y2; +@@ -4060,6 +4116,7 @@ gen3_emit_composite_spans_primitive_affine_source(struct sna *sna, + + v = sna->render.vertices + sna->render.vertex_used; + sna->render.vertex_used += 15; ++ assert(sna->render.vertex_used <= sna->render.vertex_size); + + v[0] = op->base.dst.x + box->x2; + v[6] = v[1] = op->base.dst.y + box->y2; +@@ -4125,6 +4182,7 @@ gen3_emit_composite_spans_primitive_identity_gradient(struct sna *sna, + { + float *v = sna->render.vertices + sna->render.vertex_used; + sna->render.vertex_used += 15; ++ assert(sna->render.vertex_used <= sna->render.vertex_size); + + v[0] = op->base.dst.x + box->x2; + v[1] = op->base.dst.y + box->y2; +@@ -4184,6 +4242,7 @@ gen3_emit_composite_spans_primitive_constant__sse2(struct sna *sna, + { + float *v = sna->render.vertices + sna->render.vertex_used; + sna->render.vertex_used += 9; ++ assert(sna->render.vertex_used <= sna->render.vertex_size); + + v[0] = op->base.dst.x + box->x2; + v[6] = v[3] = op->base.dst.x + box->x1; +@@ -4229,6 +4288,7 @@ gen3_render_composite_spans_constant_box__sse2(struct sna *sna, + + v = sna->render.vertices + sna->render.vertex_used; + sna->render.vertex_used += 9; ++ assert(sna->render.vertex_used <= sna->render.vertex_size); + + v[0] = box->x2; + v[6] = v[3] = box->x1; +@@ -4259,6 +4319,7 @@ gen3_render_composite_spans_constant_thread__sse2__boxes(struct sna *sna, + + v = sna->render.vertices + sna->render.vertex_used; + sna->render.vertex_used += nbox_this_time * 9; ++ assert(sna->render.vertex_used <= sna->render.vertex_size); + + sna_vertex_acquire__locked(&sna->render); + sna_vertex_unlock(&sna->render); +@@ -4287,6 +4348,7 @@ gen3_emit_composite_spans_primitive_constant__sse2__no_offset(struct sna *sna, + { + float *v = sna->render.vertices + sna->render.vertex_used; + sna->render.vertex_used += 9; ++ assert(sna->render.vertex_used <= sna->render.vertex_size); + + v[0] = box->x2; + v[6] = v[3] = box->x1; +@@ -4320,6 +4382,7 @@ gen3_emit_composite_spans_primitive_identity_source__sse2(struct sna *sna, + { + float *v = sna->render.vertices + sna->render.vertex_used; + sna->render.vertex_used += 15; ++ assert(sna->render.vertex_used <= sna->render.vertex_size); + + v[0] = op->base.dst.x + box->x2; + v[1] = op->base.dst.y + box->y2; +@@ -4380,6 +4443,7 @@ gen3_emit_composite_spans_primitive_affine_source__sse2(struct sna *sna, + + v = sna->render.vertices + sna->render.vertex_used; + sna->render.vertex_used += 15; ++ assert(sna->render.vertex_used <= sna->render.vertex_size); + + v[0] = op->base.dst.x + box->x2; + v[6] = v[1] = op->base.dst.y + box->y2; +@@ -4445,6 +4509,7 @@ gen3_emit_composite_spans_primitive_identity_gradient__sse2(struct sna *sna, + { + float *v = sna->render.vertices + sna->render.vertex_used; + sna->render.vertex_used += 15; ++ assert(sna->render.vertex_used <= sna->render.vertex_size); + + v[0] = op->base.dst.x + box->x2; + v[1] = op->base.dst.y + box->y2; +@@ -4504,6 +4569,7 @@ gen3_emit_composite_spans_primitive_affine_gradient__sse2(struct sna *sna, + PictTransform *transform = op->base.src.transform; + float *v = sna->render.vertices + sna->render.vertex_used; + sna->render.vertex_used += 15; ++ assert(sna->render.vertex_used <= sna->render.vertex_size); + + v[0] = op->base.dst.x + box->x2; + v[1] = op->base.dst.y + box->y2; +@@ -4577,6 +4643,7 @@ gen3_emit_composite_spans_primitive_affine_gradient(struct sna *sna, + PictTransform *transform = op->base.src.transform; + float *v = sna->render.vertices + sna->render.vertex_used; + sna->render.vertex_used += 15; ++ assert(sna->render.vertex_used <= sna->render.vertex_size); + + v[0] = op->base.dst.x + box->x2; + v[1] = op->base.dst.y + box->y2; +@@ -4676,6 +4743,7 @@ gen3_render_composite_spans_constant_box(struct sna *sna, + + v = sna->render.vertices + sna->render.vertex_used; + sna->render.vertex_used += 9; ++ assert(sna->render.vertex_used <= sna->render.vertex_size); + + v[0] = box->x2; + v[6] = v[3] = box->x1; +@@ -4706,6 +4774,7 @@ gen3_render_composite_spans_constant_thread_boxes(struct sna *sna, + + v = sna->render.vertices + sna->render.vertex_used; + sna->render.vertex_used += nbox_this_time * 9; ++ assert(sna->render.vertex_used <= sna->render.vertex_size); + + sna_vertex_acquire__locked(&sna->render); + sna_vertex_unlock(&sna->render); +@@ -4795,6 +4864,7 @@ gen3_render_composite_spans_boxes__thread(struct sna *sna, + + v = sna->render.vertices + sna->render.vertex_used; + sna->render.vertex_used += nbox_this_time * op->base.floats_per_rect; ++ assert(sna->render.vertex_used <= sna->render.vertex_size); + + sna_vertex_acquire__locked(&sna->render); + sna_vertex_unlock(&sna->render); +@@ -5436,17 +5506,7 @@ gen3_render_video(struct sna *sna, + pix_yoff = -dstRegion->extents.y1; + copy = 1; + } else { +- /* Set up the offset for translating from the given region +- * (in screen coordinates) to the backing pixmap. +- */ +-#ifdef COMPOSITE +- pix_xoff = -pixmap->screen_x + pixmap->drawable.x; +- pix_yoff = -pixmap->screen_y + pixmap->drawable.y; +-#else +- pix_xoff = 0; +- pix_yoff = 0; +-#endif +- ++ pix_xoff = pix_yoff = 0; + dst_width = pixmap->drawable.width; + dst_height = pixmap->drawable.height; + } +@@ -5502,16 +5562,9 @@ gen3_render_video(struct sna *sna, + } while (nbox); + + if (copy) { +-#ifdef COMPOSITE +- pix_xoff = -pixmap->screen_x + pixmap->drawable.x; +- pix_yoff = -pixmap->screen_y + pixmap->drawable.y; +-#else +- pix_xoff = 0; +- pix_yoff = 0; +-#endif + sna_blt_copy_boxes(sna, GXcopy, + dst_bo, -dstRegion->extents.x1, -dstRegion->extents.y1, +- priv->gpu_bo, pix_xoff, pix_yoff, ++ priv->gpu_bo, 0, 0, + pixmap->drawable.bitsPerPixel, + region_rects(dstRegion), + region_num_rects(dstRegion)); +@@ -5519,21 +5572,8 @@ gen3_render_video(struct sna *sna, + kgem_bo_destroy(&sna->kgem, dst_bo); + } + +- if (!DAMAGE_IS_ALL(priv->gpu_damage)) { +- if ((pix_xoff | pix_yoff) == 0) { +- sna_damage_add(&priv->gpu_damage, dstRegion); +- sna_damage_subtract(&priv->cpu_damage, dstRegion); +- } else { +- sna_damage_add_boxes(&priv->gpu_damage, +- region_rects(dstRegion), +- region_num_rects(dstRegion), +- pix_xoff, pix_yoff); +- sna_damage_subtract_boxes(&priv->cpu_damage, +- region_rects(dstRegion), +- region_num_rects(dstRegion), +- pix_xoff, pix_yoff); +- } +- } ++ if (!DAMAGE_IS_ALL(priv->gpu_damage)) ++ sna_damage_add(&priv->gpu_damage, dstRegion); + + return true; + } +diff --git a/src/sna/gen4_render.c b/src/sna/gen4_render.c +index 6c2d3808..72a98aee 100644 +--- a/src/sna/gen4_render.c ++++ b/src/sna/gen4_render.c +@@ -1405,8 +1405,8 @@ gen4_render_video(struct sna *sna, + int src_height = frame->src.y2 - frame->src.y1; + float src_offset_x, src_offset_y; + float src_scale_x, src_scale_y; +- int nbox, pix_xoff, pix_yoff; + const BoxRec *box; ++ int nbox; + + DBG(("%s: %dx%d -> %dx%d\n", __FUNCTION__, + src_width, src_height, dst_width, dst_height)); +@@ -1445,17 +1445,6 @@ gen4_render_video(struct sna *sna, + gen4_align_vertex(sna, &tmp); + gen4_video_bind_surfaces(sna, &tmp); + +- /* Set up the offset for translating from the given region (in screen +- * coordinates) to the backing pixmap. +- */ +-#ifdef COMPOSITE +- pix_xoff = -pixmap->screen_x + pixmap->drawable.x; +- pix_yoff = -pixmap->screen_y + pixmap->drawable.y; +-#else +- pix_xoff = 0; +- pix_yoff = 0; +-#endif +- + src_scale_x = (float)src_width / dst_width / frame->width; + src_offset_x = (float)frame->src.x1 / frame->width - dstRegion->extents.x1 * src_scale_x; + +@@ -1473,34 +1462,26 @@ gen4_render_video(struct sna *sna, + nbox -= n; + + do { +- BoxRec r; +- +- r.x1 = box->x1 + pix_xoff; +- r.x2 = box->x2 + pix_xoff; +- r.y1 = box->y1 + pix_yoff; +- r.y2 = box->y2 + pix_yoff; +- +- OUT_VERTEX(r.x2, r.y2); ++ OUT_VERTEX(box->x2, box->y2); + OUT_VERTEX_F(box->x2 * src_scale_x + src_offset_x); + OUT_VERTEX_F(box->y2 * src_scale_y + src_offset_y); + +- OUT_VERTEX(r.x1, r.y2); ++ OUT_VERTEX(box->x1, box->y2); + OUT_VERTEX_F(box->x1 * src_scale_x + src_offset_x); + OUT_VERTEX_F(box->y2 * src_scale_y + src_offset_y); + +- OUT_VERTEX(r.x1, r.y1); ++ OUT_VERTEX(box->x1, box->y1); + OUT_VERTEX_F(box->x1 * src_scale_x + src_offset_x); + OUT_VERTEX_F(box->y1 * src_scale_y + src_offset_y); + +- if (!DAMAGE_IS_ALL(priv->gpu_damage)) { +- sna_damage_add_box(&priv->gpu_damage, &r); +- sna_damage_subtract_box(&priv->cpu_damage, &r); +- } + box++; + } while (--n); + } while (nbox); + gen4_vertex_flush(sna); + ++ if (!DAMAGE_IS_ALL(priv->gpu_damage)) ++ sna_damage_add(&priv->gpu_damage, dstRegion); ++ + return true; + } + +@@ -1585,12 +1566,14 @@ gen4_composite_picture(struct sna *sna, + if (channel->repeat && + (x >= 0 && + y >= 0 && +- x + w < pixmap->drawable.width && +- y + h < pixmap->drawable.height)) { ++ x + w <= pixmap->drawable.width && ++ y + h <= pixmap->drawable.height)) { + struct sna_pixmap *priv = sna_pixmap(pixmap); + if (priv && priv->clear) { + DBG(("%s: converting large pixmap source into solid [%08x]\n", __FUNCTION__, priv->clear_color)); +- return gen4_channel_init_solid(sna, channel, priv->clear_color); ++ return gen4_channel_init_solid(sna, channel, ++ solid_color(picture->format, ++ priv->clear_color)); + } + } + } else +@@ -1664,7 +1647,9 @@ gen4_composite_set_target(struct sna *sna, + } else + sna_render_picture_extents(dst, &box); + +- hint = PREFER_GPU | FORCE_GPU | RENDER_GPU; ++ hint = PREFER_GPU | RENDER_GPU; ++ if (!need_tiling(sna, op->dst.width, op->dst.height)) ++ hint |= FORCE_GPU; + if (!partial) { + hint |= IGNORE_DAMAGE; + if (w == op->dst.width && h == op->dst.height) +@@ -2738,6 +2723,20 @@ gen4_render_fill_boxes(struct sna *sna, + tmp.dst.format = format; + tmp.dst.bo = dst_bo; + ++ sna_render_composite_redirect_init(&tmp); ++ if (too_large(dst->width, dst->height)) { ++ BoxRec extents; ++ ++ boxes_extents(box, n, &extents); ++ if (!sna_render_composite_redirect(sna, &tmp, ++ extents.x1, extents.y1, ++ extents.x2 - extents.x1, ++ extents.y2 - extents.y1, ++ n > 1)) ++ return sna_tiling_fill_boxes(sna, op, format, color, ++ dst, dst_bo, box, n); ++ } ++ + gen4_channel_init_solid(sna, &tmp.src, pixel); + + tmp.is_affine = true; +@@ -2748,8 +2747,10 @@ gen4_render_fill_boxes(struct sna *sna, + + if (!kgem_check_bo(&sna->kgem, dst_bo, NULL)) { + kgem_submit(&sna->kgem); +- if (!kgem_check_bo(&sna->kgem, dst_bo, NULL)) ++ if (!kgem_check_bo(&sna->kgem, dst_bo, NULL)) { ++ kgem_bo_destroy(&sna->kgem, tmp.src.bo); + return false; ++ } + } + + gen4_align_vertex(sna, &tmp); +@@ -2765,6 +2766,7 @@ gen4_render_fill_boxes(struct sna *sna, + + gen4_vertex_flush(sna); + kgem_bo_destroy(&sna->kgem, tmp.src.bo); ++ sna_render_composite_redirect_done(sna, &tmp); + return true; + } + +diff --git a/src/sna/gen5_render.c b/src/sna/gen5_render.c +index 37cf1ff9..fb3e79bf 100644 +--- a/src/sna/gen5_render.c ++++ b/src/sna/gen5_render.c +@@ -1355,8 +1355,8 @@ gen5_render_video(struct sna *sna, + int src_height = frame->src.y2 - frame->src.y1; + float src_offset_x, src_offset_y; + float src_scale_x, src_scale_y; +- int nbox, pix_xoff, pix_yoff; + const BoxRec *box; ++ int nbox; + + DBG(("%s: %dx%d -> %dx%d\n", __FUNCTION__, + src_width, src_height, dst_width, dst_height)); +@@ -1395,17 +1395,6 @@ gen5_render_video(struct sna *sna, + gen5_align_vertex(sna, &tmp); + gen5_video_bind_surfaces(sna, &tmp); + +- /* Set up the offset for translating from the given region (in screen +- * coordinates) to the backing pixmap. +- */ +-#ifdef COMPOSITE +- pix_xoff = -pixmap->screen_x + pixmap->drawable.x; +- pix_yoff = -pixmap->screen_y + pixmap->drawable.y; +-#else +- pix_xoff = 0; +- pix_yoff = 0; +-#endif +- + src_scale_x = (float)src_width / dst_width / frame->width; + src_offset_x = (float)frame->src.x1 / frame->width - dstRegion->extents.x1 * src_scale_x; + +@@ -1415,35 +1404,27 @@ gen5_render_video(struct sna *sna, + box = region_rects(dstRegion); + nbox = region_num_rects(dstRegion); + while (nbox--) { +- BoxRec r; +- +- r.x1 = box->x1 + pix_xoff; +- r.x2 = box->x2 + pix_xoff; +- r.y1 = box->y1 + pix_yoff; +- r.y2 = box->y2 + pix_yoff; +- + gen5_get_rectangles(sna, &tmp, 1, gen5_video_bind_surfaces); + +- OUT_VERTEX(r.x2, r.y2); ++ OUT_VERTEX(box->x2, box->y2); + OUT_VERTEX_F(box->x2 * src_scale_x + src_offset_x); + OUT_VERTEX_F(box->y2 * src_scale_y + src_offset_y); + +- OUT_VERTEX(r.x1, r.y2); ++ OUT_VERTEX(box->x1, box->y2); + OUT_VERTEX_F(box->x1 * src_scale_x + src_offset_x); + OUT_VERTEX_F(box->y2 * src_scale_y + src_offset_y); + +- OUT_VERTEX(r.x1, r.y1); ++ OUT_VERTEX(box->x1, box->y1); + OUT_VERTEX_F(box->x1 * src_scale_x + src_offset_x); + OUT_VERTEX_F(box->y1 * src_scale_y + src_offset_y); + +- if (!DAMAGE_IS_ALL(priv->gpu_damage)) { +- sna_damage_add_box(&priv->gpu_damage, &r); +- sna_damage_subtract_box(&priv->cpu_damage, &r); +- } + box++; + } +- + gen4_vertex_flush(sna); ++ ++ if (!DAMAGE_IS_ALL(priv->gpu_damage)) ++ sna_damage_add(&priv->gpu_damage, dstRegion); ++ + return true; + } + +@@ -1524,12 +1505,12 @@ gen5_composite_picture(struct sna *sna, + if (channel->repeat || + (x >= 0 && + y >= 0 && +- x + w < pixmap->drawable.width && +- y + h < pixmap->drawable.height)) { ++ x + w <= pixmap->drawable.width && ++ y + h <= pixmap->drawable.height)) { + struct sna_pixmap *priv = sna_pixmap(pixmap); + if (priv && priv->clear) { + DBG(("%s: converting large pixmap source into solid [%08x]\n", __FUNCTION__, priv->clear_color)); +- return gen4_channel_init_solid(sna, channel, priv->clear_color); ++ return gen4_channel_init_solid(sna, channel, solid_color(picture->format, priv->clear_color)); + } + } + } else +@@ -1618,7 +1599,9 @@ gen5_composite_set_target(struct sna *sna, + } else + sna_render_picture_extents(dst, &box); + +- hint = PREFER_GPU | FORCE_GPU | RENDER_GPU; ++ hint = PREFER_GPU | RENDER_GPU; ++ if (!need_tiling(sna, op->dst.width, op->dst.height)) ++ hint |= FORCE_GPU; + if (!partial) { + hint |= IGNORE_DAMAGE; + if (w == op->dst.width && h == op->dst.height) +@@ -2734,6 +2717,19 @@ gen5_render_fill_boxes(struct sna *sna, + tmp.dst.format = format; + tmp.dst.bo = dst_bo; + ++ if (too_large(dst->width, dst->height)) { ++ BoxRec extents; ++ ++ boxes_extents(box, n, &extents); ++ if (!sna_render_composite_redirect(sna, &tmp, ++ extents.x1, extents.y1, ++ extents.x2 - extents.x1, ++ extents.y2 - extents.y1, ++ n > 1)) ++ return sna_tiling_fill_boxes(sna, op, format, color, ++ dst, dst_bo, box, n); ++ } ++ + tmp.src.bo = sna_render_get_solid(sna, pixel); + tmp.src.filter = SAMPLER_FILTER_NEAREST; + tmp.src.repeat = SAMPLER_EXTEND_REPEAT; +@@ -2780,6 +2776,7 @@ gen5_render_fill_boxes(struct sna *sna, + + gen4_vertex_flush(sna); + kgem_bo_destroy(&sna->kgem, tmp.src.bo); ++ sna_render_composite_redirect_done(sna, &tmp); + return true; + } + +diff --git a/src/sna/gen6_common.h b/src/sna/gen6_common.h +index 6668620b..b53ec0c9 100644 +--- a/src/sna/gen6_common.h ++++ b/src/sna/gen6_common.h +@@ -30,8 +30,8 @@ + + #include "sna.h" + +-#define NO_RING_SWITCH 0 +-#define PREFER_RENDER 0 ++#define NO_RING_SWITCH(sna) (!(sna)->kgem.has_semaphores) ++#define PREFER_RENDER 0 /* -1 -> BLT, 1 -> RENDER */ + + static inline bool is_uncached(struct sna *sna, + struct kgem_bo *bo) +@@ -46,40 +46,28 @@ inline static bool can_switch_to_blt(struct sna *sna, + if (sna->kgem.ring != KGEM_RENDER) + return true; + +- if (NO_RING_SWITCH) +- return false; +- +- if (!sna->kgem.has_semaphores) +- return false; +- +- if (flags & COPY_LAST) +- return true; +- + if (bo && RQ_IS_BLT(bo->rq)) + return true; + +- if (sna->render_state.gt < 2) +- return true; ++ if (bo && bo->tiling == I915_TILING_Y) ++ return false; + +- return kgem_ring_is_idle(&sna->kgem, KGEM_BLT); +-} ++ if (bo && !kgem_bo_can_blt(&sna->kgem, bo)) ++ return false; + +-inline static bool can_switch_to_render(struct sna *sna, +- struct kgem_bo *bo) +-{ +- if (sna->kgem.ring == KGEM_RENDER) ++ if (sna->render_state.gt < 2) + return true; + +- if (NO_RING_SWITCH) ++ if (bo && RQ_IS_RENDER(bo->rq)) + return false; + +- if (!sna->kgem.has_semaphores) ++ if (NO_RING_SWITCH(sna)) + return false; + +- if (bo && !RQ_IS_BLT(bo->rq) && !is_uncached(sna, bo)) ++ if (flags & COPY_LAST) + return true; + +- return !kgem_ring_is_idle(&sna->kgem, KGEM_RENDER); ++ return kgem_ring_is_idle(&sna->kgem, KGEM_BLT); + } + + static inline bool untiled_tlb_miss(struct kgem_bo *bo) +@@ -90,57 +78,95 @@ static inline bool untiled_tlb_miss(struct kgem_bo *bo) + return bo->tiling == I915_TILING_NONE && bo->pitch >= 4096; + } + +-static int prefer_blt_bo(struct sna *sna, struct kgem_bo *bo) ++static int prefer_blt_bo(struct sna *sna, ++ struct kgem_bo *src, ++ struct kgem_bo *dst) + { ++ assert(dst != NULL); ++ + if (PREFER_RENDER) + return PREFER_RENDER < 0; + +- if (bo->rq) +- return RQ_IS_BLT(bo->rq); ++ if (dst->rq) ++ return RQ_IS_BLT(dst->rq); + + if (sna->flags & SNA_POWERSAVE) + return true; + +- return bo->tiling == I915_TILING_NONE || is_uncached(sna, bo); +-} ++ if (src) { ++ if (sna->render_state.gt > 1) ++ return false; + +-inline static bool force_blt_ring(struct sna *sna) +-{ +- if (sna->flags & SNA_POWERSAVE) ++ if (src->rq) ++ return RQ_IS_BLT(src->rq); ++ ++ if (src->tiling == I915_TILING_Y) ++ return false; ++ } else { ++ if (sna->render_state.gt > 2) ++ return false; ++ } ++ ++ if (sna->render_state.gt < 2) + return true; + ++ return dst->tiling == I915_TILING_NONE || is_uncached(sna, dst); ++} ++ ++inline static bool force_blt_ring(struct sna *sna, struct kgem_bo *bo) ++{ + if (sna->kgem.mode == KGEM_RENDER) + return false; + ++ if (NO_RING_SWITCH(sna)) ++ return sna->kgem.ring == KGEM_BLT; ++ ++ if (bo->tiling == I915_TILING_Y) ++ return false; ++ ++ if (sna->flags & SNA_POWERSAVE) ++ return true; ++ + if (sna->render_state.gt < 2) + return true; + + return false; + } + +-inline static bool prefer_blt_ring(struct sna *sna, +- struct kgem_bo *bo, +- unsigned flags) ++nonnull inline static bool ++prefer_blt_ring(struct sna *sna, struct kgem_bo *bo, unsigned flags) + { + if (PREFER_RENDER) + return PREFER_RENDER < 0; + +- assert(!force_blt_ring(sna)); +- assert(!kgem_bo_is_render(bo)); ++ assert(!force_blt_ring(sna, bo)); ++ assert(!kgem_bo_is_render(bo) || NO_RING_SWITCH(sna)); ++ ++ if (kgem_bo_is_blt(bo)) ++ return true; + + return can_switch_to_blt(sna, bo, flags); + } + +-inline static bool prefer_render_ring(struct sna *sna, +- struct kgem_bo *bo) ++nonnull inline static bool ++prefer_render_ring(struct sna *sna, struct kgem_bo *bo) + { ++ if (sna->kgem.ring == KGEM_RENDER) ++ return true; ++ ++ if (sna->kgem.ring != KGEM_NONE && NO_RING_SWITCH(sna)) ++ return false; ++ ++ if (kgem_bo_is_render(bo)) ++ return true; ++ + if (sna->flags & SNA_POWERSAVE) + return false; + +- if (sna->render_state.gt < 2) +- return false; ++ if (!prefer_blt_bo(sna, NULL, bo)) ++ return true; + +- return can_switch_to_render(sna, bo); ++ return !kgem_ring_is_idle(&sna->kgem, KGEM_RENDER); + } + + inline static bool +@@ -153,25 +179,20 @@ prefer_blt_composite(struct sna *sna, struct sna_composite_op *tmp) + untiled_tlb_miss(tmp->src.bo)) + return true; + +- if (force_blt_ring(sna)) ++ if (force_blt_ring(sna, tmp->dst.bo)) + return true; + +- if (kgem_bo_is_render(tmp->dst.bo) || +- kgem_bo_is_render(tmp->src.bo)) +- return false; +- + if (prefer_render_ring(sna, tmp->dst.bo)) + return false; + + if (!prefer_blt_ring(sna, tmp->dst.bo, 0)) + return false; + +- return prefer_blt_bo(sna, tmp->dst.bo) || prefer_blt_bo(sna, tmp->src.bo); ++ return prefer_blt_bo(sna, tmp->src.bo, tmp->dst.bo); + } + +-static inline bool prefer_blt_fill(struct sna *sna, +- struct kgem_bo *bo, +- unsigned flags) ++nonnull static inline bool ++prefer_blt_fill(struct sna *sna, struct kgem_bo *bo, unsigned flags) + { + if (PREFER_RENDER) + return PREFER_RENDER < 0; +@@ -179,24 +200,21 @@ static inline bool prefer_blt_fill(struct sna *sna, + if (untiled_tlb_miss(bo)) + return true; + +- if (force_blt_ring(sna)) ++ if (force_blt_ring(sna, bo)) + return true; + + if ((flags & (FILL_POINTS | FILL_SPANS)) == 0) { +- if (kgem_bo_is_render(bo)) +- return false; +- + if (prefer_render_ring(sna, bo)) + return false; + + if (!prefer_blt_ring(sna, bo, 0)) + return false; + } else { +- if (can_switch_to_blt(sna, bo, 0)) ++ if (can_switch_to_blt(sna, bo, COPY_LAST)) + return true; + } + +- return prefer_blt_bo(sna, bo); ++ return prefer_blt_bo(sna, NULL, bo); + } + + void gen6_render_context_switch(struct kgem *kgem, int new_mode); +diff --git a/src/sna/gen6_render.c b/src/sna/gen6_render.c +index 25044685..6b69f216 100644 +--- a/src/sna/gen6_render.c ++++ b/src/sna/gen6_render.c +@@ -1633,9 +1633,9 @@ gen6_render_video(struct sna *sna, + int src_height = frame->src.y2 - frame->src.y1; + float src_offset_x, src_offset_y; + float src_scale_x, src_scale_y; +- int nbox, pix_xoff, pix_yoff; + unsigned filter; + const BoxRec *box; ++ int nbox; + + DBG(("%s: src=(%d, %d), dst=(%d, %d), %dx[(%d, %d), (%d, %d)...]\n", + __FUNCTION__, +@@ -1686,17 +1686,6 @@ gen6_render_video(struct sna *sna, + gen6_align_vertex(sna, &tmp); + gen6_emit_video_state(sna, &tmp); + +- /* Set up the offset for translating from the given region (in screen +- * coordinates) to the backing pixmap. +- */ +-#ifdef COMPOSITE +- pix_xoff = -pixmap->screen_x + pixmap->drawable.x; +- pix_yoff = -pixmap->screen_y + pixmap->drawable.y; +-#else +- pix_xoff = 0; +- pix_yoff = 0; +-#endif +- + src_scale_x = (float)src_width / dst_width / frame->width; + src_offset_x = (float)frame->src.x1 / frame->width - dstRegion->extents.x1 * src_scale_x; + +@@ -1706,35 +1695,27 @@ gen6_render_video(struct sna *sna, + box = region_rects(dstRegion); + nbox = region_num_rects(dstRegion); + while (nbox--) { +- BoxRec r; +- +- r.x1 = box->x1 + pix_xoff; +- r.x2 = box->x2 + pix_xoff; +- r.y1 = box->y1 + pix_yoff; +- r.y2 = box->y2 + pix_yoff; +- + gen6_get_rectangles(sna, &tmp, 1, gen6_emit_video_state); + +- OUT_VERTEX(r.x2, r.y2); ++ OUT_VERTEX(box->x2, box->y2); + OUT_VERTEX_F(box->x2 * src_scale_x + src_offset_x); + OUT_VERTEX_F(box->y2 * src_scale_y + src_offset_y); + +- OUT_VERTEX(r.x1, r.y2); ++ OUT_VERTEX(box->x1, box->y2); + OUT_VERTEX_F(box->x1 * src_scale_x + src_offset_x); + OUT_VERTEX_F(box->y2 * src_scale_y + src_offset_y); + +- OUT_VERTEX(r.x1, r.y1); ++ OUT_VERTEX(box->x1, box->y1); + OUT_VERTEX_F(box->x1 * src_scale_x + src_offset_x); + OUT_VERTEX_F(box->y1 * src_scale_y + src_offset_y); + +- if (!DAMAGE_IS_ALL(priv->gpu_damage)) { +- sna_damage_add_box(&priv->gpu_damage, &r); +- sna_damage_subtract_box(&priv->cpu_damage, &r); +- } + box++; + } +- + gen4_vertex_flush(sna); ++ ++ if (!DAMAGE_IS_ALL(priv->gpu_damage)) ++ sna_damage_add(&priv->gpu_damage, dstRegion); ++ + return true; + } + +@@ -1815,12 +1796,12 @@ gen6_composite_picture(struct sna *sna, + if (channel->repeat && + (x >= 0 && + y >= 0 && +- x + w < pixmap->drawable.width && +- y + h < pixmap->drawable.height)) { ++ x + w <= pixmap->drawable.width && ++ y + h <= pixmap->drawable.height)) { + struct sna_pixmap *priv = sna_pixmap(pixmap); + if (priv && priv->clear) { + DBG(("%s: converting large pixmap source into solid [%08x]\n", __FUNCTION__, priv->clear_color)); +- return gen4_channel_init_solid(sna, channel, priv->clear_color); ++ return gen4_channel_init_solid(sna, channel, solid_color(picture->format, priv->clear_color)); + } + } + } else +@@ -1927,7 +1908,9 @@ gen6_composite_set_target(struct sna *sna, + } else + sna_render_picture_extents(dst, &box); + +- hint = PREFER_GPU | FORCE_GPU | RENDER_GPU; ++ hint = PREFER_GPU | RENDER_GPU; ++ if (!need_tiling(sna, op->dst.width, op->dst.height)) ++ hint |= FORCE_GPU; + if (!partial) { + hint |= IGNORE_DAMAGE; + if (w == op->dst.width && h == op->dst.height) +@@ -1965,46 +1948,77 @@ gen6_composite_set_target(struct sna *sna, + + static bool + try_blt(struct sna *sna, +- PicturePtr dst, PicturePtr src, +- int width, int height) ++ uint8_t op, ++ PicturePtr src, ++ PicturePtr mask, ++ PicturePtr dst, ++ int16_t src_x, int16_t src_y, ++ int16_t msk_x, int16_t msk_y, ++ int16_t dst_x, int16_t dst_y, ++ int16_t width, int16_t height, ++ unsigned flags, ++ struct sna_composite_op *tmp) + { + struct kgem_bo *bo; + + if (sna->kgem.mode == KGEM_BLT) { + DBG(("%s: already performing BLT\n", __FUNCTION__)); +- return true; ++ goto execute; + } + + if (too_large(width, height)) { + DBG(("%s: operation too large for 3D pipe (%d, %d)\n", + __FUNCTION__, width, height)); +- return true; ++ goto execute; + } + + bo = __sna_drawable_peek_bo(dst->pDrawable); + if (bo == NULL) +- return true; +- if (bo->rq) +- return RQ_IS_BLT(bo->rq); ++ goto execute; ++ ++ if (untiled_tlb_miss(bo)) ++ goto execute; ++ ++ if (bo->rq) { ++ if (RQ_IS_BLT(bo->rq)) ++ goto execute; ++ ++ return false; ++ } ++ ++ if (bo->tiling == I915_TILING_Y) ++ goto upload; ++ ++ if (src->pDrawable == dst->pDrawable && ++ can_switch_to_blt(sna, bo, 0)) ++ goto execute; + + if (sna_picture_is_solid(src, NULL) && can_switch_to_blt(sna, bo, 0)) +- return true; ++ goto execute; + + if (src->pDrawable) { +- bo = __sna_drawable_peek_bo(src->pDrawable); +- if (bo == NULL) +- return true; ++ struct kgem_bo *s = __sna_drawable_peek_bo(src->pDrawable); ++ if (s == NULL) ++ goto execute; + +- if (prefer_blt_bo(sna, bo)) +- return true; ++ if (prefer_blt_bo(sna, s, bo)) ++ goto execute; + } + + if (sna->kgem.ring == KGEM_BLT) { + DBG(("%s: already performing BLT\n", __FUNCTION__)); +- return true; ++ goto execute; + } + +- return false; ++upload: ++ flags |= COMPOSITE_UPLOAD; ++execute: ++ return sna_blt_composite(sna, op, ++ src, dst, ++ src_x, src_y, ++ dst_x, dst_y, ++ width, height, ++ flags, tmp); + } + + static bool +@@ -2234,13 +2248,13 @@ gen6_render_composite(struct sna *sna, + width, height, sna->kgem.ring)); + + if (mask == NULL && +- try_blt(sna, dst, src, width, height) && +- sna_blt_composite(sna, op, +- src, dst, +- src_x, src_y, +- dst_x, dst_y, +- width, height, +- flags, tmp)) ++ try_blt(sna, op, ++ src, mask, dst, ++ src_x, src_y, ++ msk_x, msk_y, ++ dst_x, dst_y, ++ width, height, ++ flags, tmp)) + return true; + + if (gen6_composite_fallback(sna, src, mask, dst)) +@@ -2676,27 +2690,35 @@ static inline bool prefer_blt_copy(struct sna *sna, + if (sna->kgem.ring == KGEM_BLT) + return true; + +- if (src_bo == dst_bo && can_switch_to_blt(sna, dst_bo, flags)) ++ if (flags & COPY_DRI && !sna->kgem.has_semaphores) ++ return false; ++ ++ if ((flags & COPY_SMALL || src_bo == dst_bo) && ++ can_switch_to_blt(sna, dst_bo, flags)) + return true; + + if (untiled_tlb_miss(src_bo) || + untiled_tlb_miss(dst_bo)) + return true; + +- if (force_blt_ring(sna)) ++ if (force_blt_ring(sna, dst_bo)) + return true; + + if (kgem_bo_is_render(dst_bo) || + kgem_bo_is_render(src_bo)) + return false; + ++ if (flags & COPY_LAST && ++ can_switch_to_blt(sna, dst_bo, flags)) ++ return true; ++ + if (prefer_render_ring(sna, dst_bo)) + return false; + + if (!prefer_blt_ring(sna, dst_bo, flags)) + return false; + +- return prefer_blt_bo(sna, src_bo) || prefer_blt_bo(sna, dst_bo); ++ return prefer_blt_bo(sna, src_bo, dst_bo); + } + + static bool +@@ -2758,8 +2780,7 @@ fallback_blt: + assert(src->depth == dst->depth); + assert(src->width == dst->width); + assert(src->height == dst->height); +- return sna_render_copy_boxes__overlap(sna, alu, +- src, src_bo, ++ return sna_render_copy_boxes__overlap(sna, alu, dst, dst_bo, + src_dx, src_dy, + dst_dx, dst_dy, + box, n, &extents); +diff --git a/src/sna/gen7_render.c b/src/sna/gen7_render.c +index 2ecfd641..aabb8693 100644 +--- a/src/sna/gen7_render.c ++++ b/src/sna/gen7_render.c +@@ -60,8 +60,6 @@ + #define NO_FILL_ONE 0 + #define NO_FILL_CLEAR 0 + +-#define NO_RING_SWITCH 0 +- + #define USE_8_PIXEL_DISPATCH 1 + #define USE_16_PIXEL_DISPATCH 1 + #define USE_32_PIXEL_DISPATCH 0 +@@ -149,7 +147,7 @@ static const struct gt_info hsw_gt1_info = { + .max_vs_threads = 70, + .max_gs_threads = 70, + .max_wm_threads = +- (102 - 1) << HSW_PS_MAX_THREADS_SHIFT | ++ (70 - 1) << HSW_PS_MAX_THREADS_SHIFT | + 1 << HSW_PS_SAMPLE_MASK_SHIFT, + .urb = { 128, 640, 256, 8 }, + .gt = 1, +@@ -209,6 +207,12 @@ static const uint32_t ps_kernel_planar[][4] = { + #include "exa_wm_write.g7b" + }; + ++static const uint32_t ps_kernel_rgb[][4] = { ++#include "exa_wm_src_affine.g7b" ++#include "exa_wm_src_sample_argb.g7b" ++#include "exa_wm_write.g7b" ++}; ++ + #define KERNEL(kernel_enum, kernel, num_surfaces) \ + [GEN7_WM_KERNEL_##kernel_enum] = {#kernel_enum, kernel, sizeof(kernel), num_surfaces} + #define NOKERNEL(kernel_enum, func, num_surfaces) \ +@@ -218,7 +222,7 @@ static const struct wm_kernel_info { + const void *data; + unsigned int size; + int num_surfaces; +-} wm_kernels[] = { ++} wm_kernels[GEN7_WM_KERNEL_COUNT] = { + NOKERNEL(NOMASK, brw_wm_kernel__affine, 2), + NOKERNEL(NOMASK_P, brw_wm_kernel__projective, 2), + +@@ -236,6 +240,7 @@ static const struct wm_kernel_info { + + KERNEL(VIDEO_PLANAR, ps_kernel_planar, 7), + KERNEL(VIDEO_PACKED, ps_kernel_packed, 2), ++ KERNEL(VIDEO_RGB, ps_kernel_rgb, 2), + }; + #undef KERNEL + +@@ -810,7 +815,7 @@ gen7_emit_cc(struct sna *sna, uint32_t blend_offset) + + DBG(("%s: blend = %x\n", __FUNCTION__, blend_offset)); + +- /* XXX can have upto 8 blend states preload, selectable via ++ /* XXX can have up to 8 blend states preload, selectable via + * Render Target Index. What other side-effects of Render Target Index? + */ + +@@ -1792,7 +1797,9 @@ static void gen7_emit_video_state(struct sna *sna, + frame->pitch[0]; + n_src = 6; + } else { +- if (frame->id == FOURCC_UYVY) ++ if (frame->id == FOURCC_RGB888) ++ src_surf_format = GEN7_SURFACEFORMAT_B8G8R8X8_UNORM; ++ else if (frame->id == FOURCC_UYVY) + src_surf_format = GEN7_SURFACEFORMAT_YCRCB_SWAPY; + else + src_surf_format = GEN7_SURFACEFORMAT_YCRCB_NORMAL; +@@ -1826,6 +1833,23 @@ static void gen7_emit_video_state(struct sna *sna, + gen7_emit_state(sna, op, offset | dirty); + } + ++static unsigned select_video_kernel(const struct sna_video_frame *frame) ++{ ++ switch (frame->id) { ++ case FOURCC_YV12: ++ case FOURCC_I420: ++ case FOURCC_XVMC: ++ return GEN7_WM_KERNEL_VIDEO_PLANAR; ++ ++ case FOURCC_RGB888: ++ case FOURCC_RGB565: ++ return GEN7_WM_KERNEL_VIDEO_RGB; ++ ++ default: ++ return GEN7_WM_KERNEL_VIDEO_PACKED; ++ } ++} ++ + static bool + gen7_render_video(struct sna *sna, + struct sna_video *video, +@@ -1841,9 +1865,9 @@ gen7_render_video(struct sna *sna, + int src_height = frame->src.y2 - frame->src.y1; + float src_offset_x, src_offset_y; + float src_scale_x, src_scale_y; +- int nbox, pix_xoff, pix_yoff; + unsigned filter; + const BoxRec *box; ++ int nbox; + + DBG(("%s: src=(%d, %d), dst=(%d, %d), %dx[(%d, %d), (%d, %d)...]\n", + __FUNCTION__, +@@ -1878,9 +1902,7 @@ gen7_render_video(struct sna *sna, + GEN7_SET_FLAGS(SAMPLER_OFFSET(filter, SAMPLER_EXTEND_PAD, + SAMPLER_FILTER_NEAREST, SAMPLER_EXTEND_NONE), + NO_BLEND, +- is_planar_fourcc(frame->id) ? +- GEN7_WM_KERNEL_VIDEO_PLANAR : +- GEN7_WM_KERNEL_VIDEO_PACKED, ++ select_video_kernel(frame), + 2); + tmp.priv = frame; + +@@ -1896,17 +1918,6 @@ gen7_render_video(struct sna *sna, + gen7_align_vertex(sna, &tmp); + gen7_emit_video_state(sna, &tmp); + +- /* Set up the offset for translating from the given region (in screen +- * coordinates) to the backing pixmap. +- */ +-#ifdef COMPOSITE +- pix_xoff = -pixmap->screen_x + pixmap->drawable.x; +- pix_yoff = -pixmap->screen_y + pixmap->drawable.y; +-#else +- pix_xoff = 0; +- pix_yoff = 0; +-#endif +- + DBG(("%s: src=(%d, %d)x(%d, %d); frame=(%dx%d), dst=(%dx%d)\n", + __FUNCTION__, + frame->src.x1, frame->src.y1, +@@ -1928,45 +1939,36 @@ gen7_render_video(struct sna *sna, + box = region_rects(dstRegion); + nbox = region_num_rects(dstRegion); + while (nbox--) { +- BoxRec r; +- +- DBG(("%s: dst=(%d, %d), (%d, %d) + (%d, %d); src=(%f, %f), (%f, %f)\n", ++ DBG(("%s: dst=(%d, %d), (%d, %d); src=(%f, %f), (%f, %f)\n", + __FUNCTION__, + box->x1, box->y1, + box->x2, box->y2, +- pix_xoff, pix_yoff, + box->x1 * src_scale_x + src_offset_x, + box->y1 * src_scale_y + src_offset_y, + box->x2 * src_scale_x + src_offset_x, + box->y2 * src_scale_y + src_offset_y)); + +- r.x1 = box->x1 + pix_xoff; +- r.x2 = box->x2 + pix_xoff; +- r.y1 = box->y1 + pix_yoff; +- r.y2 = box->y2 + pix_yoff; +- + gen7_get_rectangles(sna, &tmp, 1, gen7_emit_video_state); + +- OUT_VERTEX(r.x2, r.y2); ++ OUT_VERTEX(box->x2, box->y2); + OUT_VERTEX_F(box->x2 * src_scale_x + src_offset_x); + OUT_VERTEX_F(box->y2 * src_scale_y + src_offset_y); + +- OUT_VERTEX(r.x1, r.y2); ++ OUT_VERTEX(box->x1, box->y2); + OUT_VERTEX_F(box->x1 * src_scale_x + src_offset_x); + OUT_VERTEX_F(box->y2 * src_scale_y + src_offset_y); + +- OUT_VERTEX(r.x1, r.y1); ++ OUT_VERTEX(box->x1, box->y1); + OUT_VERTEX_F(box->x1 * src_scale_x + src_offset_x); + OUT_VERTEX_F(box->y1 * src_scale_y + src_offset_y); + +- if (!DAMAGE_IS_ALL(priv->gpu_damage)) { +- sna_damage_add_box(&priv->gpu_damage, &r); +- sna_damage_subtract_box(&priv->cpu_damage, &r); +- } + box++; + } +- + gen4_vertex_flush(sna); ++ ++ if (!DAMAGE_IS_ALL(priv->gpu_damage)) ++ sna_damage_add(&priv->gpu_damage, dstRegion); ++ + return true; + } + +@@ -2048,12 +2050,13 @@ gen7_composite_picture(struct sna *sna, + if (channel->repeat || + (x >= 0 && + y >= 0 && +- x + w < pixmap->drawable.width && +- y + h < pixmap->drawable.height)) { ++ x + w <= pixmap->drawable.width && ++ y + h <= pixmap->drawable.height)) { + struct sna_pixmap *priv = sna_pixmap(pixmap); + if (priv && priv->clear) { + DBG(("%s: converting large pixmap source into solid [%08x]\n", __FUNCTION__, priv->clear_color)); +- return gen4_channel_init_solid(sna, channel, priv->clear_color); ++ return gen4_channel_init_solid(sna, channel, ++ solid_color(picture->format, priv->clear_color)); + } + } + } else +@@ -2147,7 +2150,9 @@ gen7_composite_set_target(struct sna *sna, + } else + sna_render_picture_extents(dst, &box); + +- hint = PREFER_GPU | FORCE_GPU | RENDER_GPU; ++ hint = PREFER_GPU | RENDER_GPU; ++ if (!need_tiling(sna, op->dst.width, op->dst.height)) ++ hint |= FORCE_GPU; + if (!partial) { + hint |= IGNORE_DAMAGE; + if (w == op->dst.width && h == op->dst.height) +@@ -2185,46 +2190,78 @@ gen7_composite_set_target(struct sna *sna, + + static bool + try_blt(struct sna *sna, +- PicturePtr dst, PicturePtr src, +- int width, int height) ++ uint8_t op, ++ PicturePtr src, ++ PicturePtr mask, ++ PicturePtr dst, ++ int16_t src_x, int16_t src_y, ++ int16_t msk_x, int16_t msk_y, ++ int16_t dst_x, int16_t dst_y, ++ int16_t width, int16_t height, ++ unsigned flags, ++ struct sna_composite_op *tmp) + { + struct kgem_bo *bo; + + if (sna->kgem.mode == KGEM_BLT) { + DBG(("%s: already performing BLT\n", __FUNCTION__)); +- return true; ++ goto execute; + } + + if (too_large(width, height)) { + DBG(("%s: operation too large for 3D pipe (%d, %d)\n", + __FUNCTION__, width, height)); +- return true; ++ goto execute; + } + + bo = __sna_drawable_peek_bo(dst->pDrawable); + if (bo == NULL) +- return true; +- if (bo->rq) +- return RQ_IS_BLT(bo->rq); ++ goto execute; ++ ++ if (untiled_tlb_miss(bo)) ++ goto execute; ++ ++ if (bo->rq) { ++ if (RQ_IS_BLT(bo->rq)) ++ goto execute; ++ ++ return false; ++ } ++ ++ if (bo->tiling == I915_TILING_Y) ++ goto upload; ++ ++ if (src->pDrawable == dst->pDrawable && ++ (sna->render_state.gt < 3 || width*height < 1024) && ++ can_switch_to_blt(sna, bo, 0)) ++ goto execute; + + if (sna_picture_is_solid(src, NULL) && can_switch_to_blt(sna, bo, 0)) +- return true; ++ goto execute; + + if (src->pDrawable) { +- bo = __sna_drawable_peek_bo(src->pDrawable); +- if (bo == NULL) +- return true; ++ struct kgem_bo *s = __sna_drawable_peek_bo(src->pDrawable); ++ if (s == NULL) ++ goto upload; + +- if (prefer_blt_bo(sna, bo)) +- return true; ++ if (prefer_blt_bo(sna, s, bo)) ++ goto execute; + } + + if (sna->kgem.ring == KGEM_BLT) { + DBG(("%s: already performing BLT\n", __FUNCTION__)); +- return true; ++ goto execute; + } + +- return false; ++upload: ++ flags |= COMPOSITE_UPLOAD; ++execute: ++ return sna_blt_composite(sna, op, ++ src, dst, ++ src_x, src_y, ++ dst_x, dst_y, ++ width, height, ++ flags, tmp); + } + + static bool +@@ -2454,13 +2491,13 @@ gen7_render_composite(struct sna *sna, + width, height, sna->kgem.mode, sna->kgem.ring)); + + if (mask == NULL && +- try_blt(sna, dst, src, width, height) && +- sna_blt_composite(sna, op, +- src, dst, +- src_x, src_y, +- dst_x, dst_y, +- width, height, +- flags, tmp)) ++ try_blt(sna, op, ++ src, mask, dst, ++ src_x, src_y, ++ msk_x, msk_y, ++ dst_x, dst_y, ++ width, height, ++ flags, tmp)) + return true; + + if (gen7_composite_fallback(sna, src, mask, dst)) +@@ -2878,27 +2915,37 @@ prefer_blt_copy(struct sna *sna, + + assert((flags & COPY_SYNC) == 0); + +- if (src_bo == dst_bo && can_switch_to_blt(sna, dst_bo, flags)) +- return true; +- + if (untiled_tlb_miss(src_bo) || + untiled_tlb_miss(dst_bo)) + return true; + +- if (force_blt_ring(sna)) ++ if (flags & COPY_DRI && !sna->kgem.has_semaphores) ++ return false; ++ ++ if (force_blt_ring(sna, dst_bo)) ++ return true; ++ ++ if ((flags & COPY_SMALL || ++ (sna->render_state.gt < 3 && src_bo == dst_bo)) && ++ can_switch_to_blt(sna, dst_bo, flags)) + return true; + + if (kgem_bo_is_render(dst_bo) || + kgem_bo_is_render(src_bo)) + return false; + ++ if (flags & COPY_LAST && ++ sna->render_state.gt < 3 && ++ can_switch_to_blt(sna, dst_bo, flags)) ++ return true; ++ + if (prefer_render_ring(sna, dst_bo)) + return false; + + if (!prefer_blt_ring(sna, dst_bo, flags)) + return false; + +- return prefer_blt_bo(sna, src_bo) || prefer_blt_bo(sna, dst_bo); ++ return prefer_blt_bo(sna, src_bo, dst_bo); + } + + static bool +@@ -2946,7 +2993,7 @@ fallback_blt: + &extents)) { + bool big = too_large(extents.x2-extents.x1, extents.y2-extents.y1); + +- if ((big || can_switch_to_blt(sna, dst_bo, flags)) && ++ if ((big || !prefer_render_ring(sna, dst_bo)) && + sna_blt_copy_boxes(sna, alu, + src_bo, src_dx, src_dy, + dst_bo, dst_dx, dst_dy, +@@ -2961,8 +3008,7 @@ fallback_blt: + assert(src->depth == dst->depth); + assert(src->width == dst->width); + assert(src->height == dst->height); +- return sna_render_copy_boxes__overlap(sna, alu, +- src, src_bo, ++ return sna_render_copy_boxes__overlap(sna, alu, dst, dst_bo, + src_dx, src_dy, + dst_dx, dst_dy, + box, n, &extents); +diff --git a/src/sna/gen8_render.c b/src/sna/gen8_render.c +index 6eb11452..445983b1 100644 +--- a/src/sna/gen8_render.c ++++ b/src/sna/gen8_render.c +@@ -106,6 +106,12 @@ static const uint32_t ps_kernel_planar[][4] = { + #include "exa_wm_yuv_rgb.g8b" + #include "exa_wm_write.g8b" + }; ++ ++static const uint32_t ps_kernel_rgb[][4] = { ++#include "exa_wm_src_affine.g8b" ++#include "exa_wm_src_sample_argb.g8b" ++#include "exa_wm_write.g8b" ++}; + #endif + + #define SURFACE_DW (64 / sizeof(uint32_t)); +@@ -119,7 +125,7 @@ static const struct wm_kernel_info { + const void *data; + unsigned int size; + int num_surfaces; +-} wm_kernels[] = { ++} wm_kernels[GEN8_WM_KERNEL_COUNT] = { + NOKERNEL(NOMASK, gen8_wm_kernel__affine, 2), + NOKERNEL(NOMASK_P, gen8_wm_kernel__projective, 2), + +@@ -138,6 +144,7 @@ static const struct wm_kernel_info { + #if !NO_VIDEO + KERNEL(VIDEO_PLANAR, ps_kernel_planar, 7), + KERNEL(VIDEO_PACKED, ps_kernel_packed, 2), ++ KERNEL(VIDEO_RGB, ps_kernel_rgb, 2), + #endif + }; + #undef KERNEL +@@ -205,6 +212,33 @@ static const struct blendinfo { + #define OUT_VERTEX(x,y) vertex_emit_2s(sna, x,y) + #define OUT_VERTEX_F(v) vertex_emit(sna, v) + ++struct gt_info { ++ const char *name; ++ struct { ++ int max_vs_entries; ++ } urb; ++}; ++ ++static const struct gt_info bdw_gt_info = { ++ .name = "Broadwell (gen8)", ++ .urb = { .max_vs_entries = 960 }, ++}; ++ ++static bool is_bdw(struct sna *sna) ++{ ++ return sna->kgem.gen == 0100; ++} ++ ++static const struct gt_info chv_gt_info = { ++ .name = "Cherryview (gen8)", ++ .urb = { .max_vs_entries = 640 }, ++}; ++ ++static bool is_chv(struct sna *sna) ++{ ++ return sna->kgem.gen == 0101; ++} ++ + static inline bool too_large(int width, int height) + { + return width > GEN8_MAX_SIZE || height > GEN8_MAX_SIZE; +@@ -462,7 +496,7 @@ gen8_emit_urb(struct sna *sna) + { + /* num of VS entries must be divisible by 8 if size < 9 */ + OUT_BATCH(GEN8_3DSTATE_URB_VS | (2 - 2)); +- OUT_BATCH(960 << URB_ENTRY_NUMBER_SHIFT | ++ OUT_BATCH(sna->render_state.gen8.info->urb.max_vs_entries << URB_ENTRY_NUMBER_SHIFT | + (2 - 1) << URB_ENTRY_SIZE_SHIFT | + 4 << URB_STARTING_ADDRESS_SHIFT); + +@@ -873,7 +907,7 @@ gen8_emit_cc(struct sna *sna, uint32_t blend) + assert(blend / GEN8_BLENDFACTOR_COUNT > 0); + assert(blend % GEN8_BLENDFACTOR_COUNT > 0); + +- /* XXX can have upto 8 blend states preload, selectable via ++ /* XXX can have up to 8 blend states preload, selectable via + * Render Target Index. What other side-effects of Render Target Index? + */ + +@@ -1167,6 +1201,7 @@ gen8_emit_pipe_stall(struct sna *sna) + { + OUT_BATCH(GEN8_PIPE_CONTROL | (6 - 2)); + OUT_BATCH(PIPE_CONTROL_CS_STALL | ++ PIPE_CONTROL_FLUSH | + PIPE_CONTROL_STALL_AT_SCOREBOARD); + OUT_BATCH64(0); + OUT_BATCH64(0); +@@ -1876,12 +1911,12 @@ gen8_composite_picture(struct sna *sna, + if (channel->repeat || + (x >= 0 && + y >= 0 && +- x + w < pixmap->drawable.width && +- y + h < pixmap->drawable.height)) { ++ x + w <= pixmap->drawable.width && ++ y + h <= pixmap->drawable.height)) { + struct sna_pixmap *priv = sna_pixmap(pixmap); + if (priv && priv->clear) { + DBG(("%s: converting large pixmap source into solid [%08x]\n", __FUNCTION__, priv->clear_color)); +- return gen4_channel_init_solid(sna, channel, priv->clear_color); ++ return gen4_channel_init_solid(sna, channel, solid_color(picture->format, priv->clear_color)); + } + } + } else +@@ -1961,7 +1996,9 @@ gen8_composite_set_target(struct sna *sna, + } else + sna_render_picture_extents(dst, &box); + +- hint = PREFER_GPU | FORCE_GPU | RENDER_GPU; ++ hint = PREFER_GPU | RENDER_GPU; ++ if (!need_tiling(sna, op->dst.width, op->dst.height)) ++ hint |= FORCE_GPU; + if (!partial) { + hint |= IGNORE_DAMAGE; + if (w == op->dst.width && h == op->dst.height) +@@ -2002,46 +2039,78 @@ gen8_composite_set_target(struct sna *sna, + + static bool + try_blt(struct sna *sna, +- PicturePtr dst, PicturePtr src, +- int width, int height) ++ uint8_t op, ++ PicturePtr src, ++ PicturePtr mask, ++ PicturePtr dst, ++ int16_t src_x, int16_t src_y, ++ int16_t msk_x, int16_t msk_y, ++ int16_t dst_x, int16_t dst_y, ++ int16_t width, int16_t height, ++ unsigned flags, ++ struct sna_composite_op *tmp) + { + struct kgem_bo *bo; + + if (sna->kgem.mode == KGEM_BLT) { + DBG(("%s: already performing BLT\n", __FUNCTION__)); +- return true; ++ goto execute; + } + + if (too_large(width, height)) { + DBG(("%s: operation too large for 3D pipe (%d, %d)\n", + __FUNCTION__, width, height)); +- return true; ++ goto execute; + } + + bo = __sna_drawable_peek_bo(dst->pDrawable); + if (bo == NULL) +- return true; +- if (bo->rq) +- return RQ_IS_BLT(bo->rq); ++ goto execute; ++ ++ if (untiled_tlb_miss(bo)) ++ goto execute; ++ ++ if (bo->rq) { ++ if (RQ_IS_BLT(bo->rq)) ++ goto execute; ++ ++ return false; ++ } ++ ++ if (bo->tiling == I915_TILING_Y) ++ goto upload; + + if (sna_picture_is_solid(src, NULL) && can_switch_to_blt(sna, bo, 0)) +- return true; ++ goto execute; ++ ++ if (src->pDrawable == dst->pDrawable && ++ (sna->render_state.gt < 3 || width*height < 1024) && ++ can_switch_to_blt(sna, bo, 0)) ++ goto execute; + + if (src->pDrawable) { +- bo = __sna_drawable_peek_bo(src->pDrawable); +- if (bo == NULL) +- return true; ++ struct kgem_bo *s = __sna_drawable_peek_bo(src->pDrawable); ++ if (s == NULL) ++ goto upload; + +- if (prefer_blt_bo(sna, bo)) +- return RQ_IS_BLT(bo->rq); ++ if (prefer_blt_bo(sna, s, bo)) ++ goto execute; + } + + if (sna->kgem.ring == KGEM_BLT) { + DBG(("%s: already performing BLT\n", __FUNCTION__)); +- return true; ++ goto execute; + } + +- return false; ++upload: ++ flags |= COMPOSITE_UPLOAD; ++execute: ++ return sna_blt_composite(sna, op, ++ src, dst, ++ src_x, src_y, ++ dst_x, dst_y, ++ width, height, ++ flags, tmp); + } + + static bool +@@ -2271,13 +2340,13 @@ gen8_render_composite(struct sna *sna, + width, height, sna->kgem.mode, sna->kgem.ring)); + + if (mask == NULL && +- try_blt(sna, dst, src, width, height) && +- sna_blt_composite(sna, op, +- src, dst, +- src_x, src_y, +- dst_x, dst_y, +- width, height, +- flags, tmp)) ++ try_blt(sna, op, ++ src, mask, dst, ++ src_x, src_y, ++ msk_x, msk_y, ++ dst_x, dst_y, ++ width, height, ++ flags, tmp)) + return true; + + if (gen8_composite_fallback(sna, src, mask, dst)) +@@ -2700,27 +2769,37 @@ prefer_blt_copy(struct sna *sna, + + assert((flags & COPY_SYNC) == 0); + +- if (src_bo == dst_bo && can_switch_to_blt(sna, dst_bo, flags)) +- return true; +- + if (untiled_tlb_miss(src_bo) || + untiled_tlb_miss(dst_bo)) + return true; + +- if (force_blt_ring(sna)) ++ if (flags & COPY_DRI && !sna->kgem.has_semaphores) ++ return false; ++ ++ if (force_blt_ring(sna, dst_bo)) ++ return true; ++ ++ if ((flags & COPY_SMALL || ++ (sna->render_state.gt < 3 && src_bo == dst_bo)) && ++ can_switch_to_blt(sna, dst_bo, flags)) + return true; + + if (kgem_bo_is_render(dst_bo) || + kgem_bo_is_render(src_bo)) + return false; + ++ if (flags & COPY_LAST && ++ sna->render_state.gt < 3 && ++ can_switch_to_blt(sna, dst_bo, flags)) ++ return true; ++ + if (prefer_render_ring(sna, dst_bo)) + return false; + + if (!prefer_blt_ring(sna, dst_bo, flags)) + return false; + +- return prefer_blt_bo(sna, src_bo) || prefer_blt_bo(sna, dst_bo); ++ return prefer_blt_bo(sna, src_bo, dst_bo); + } + + static bool +@@ -2770,7 +2849,7 @@ fallback_blt: + &extents)) { + bool big = too_large(extents.x2-extents.x1, extents.y2-extents.y1); + +- if ((big || can_switch_to_blt(sna, dst_bo, flags)) && ++ if ((big || !prefer_render_ring(sna, dst_bo)) && + sna_blt_copy_boxes(sna, alu, + src_bo, src_dx, src_dy, + dst_bo, dst_dx, dst_dy, +@@ -2785,8 +2864,7 @@ fallback_blt: + assert(src->depth == dst->depth); + assert(src->width == dst->width); + assert(src->height == dst->height); +- return sna_render_copy_boxes__overlap(sna, alu, +- src, src_bo, ++ return sna_render_copy_boxes__overlap(sna, alu, dst, dst_bo, + src_dx, src_dy, + dst_dx, dst_dy, + box, n, &extents); +@@ -3665,7 +3743,9 @@ static void gen8_emit_video_state(struct sna *sna, + frame->pitch[0]; + n_src = 6; + } else { +- if (frame->id == FOURCC_UYVY) ++ if (frame->id == FOURCC_RGB888) ++ src_surf_format = SURFACEFORMAT_B8G8R8X8_UNORM; ++ else if (frame->id == FOURCC_UYVY) + src_surf_format = SURFACEFORMAT_YCRCB_SWAPY; + else + src_surf_format = SURFACEFORMAT_YCRCB_NORMAL; +@@ -3697,6 +3777,23 @@ static void gen8_emit_video_state(struct sna *sna, + gen8_emit_state(sna, op, offset); + } + ++static unsigned select_video_kernel(const struct sna_video_frame *frame) ++{ ++ switch (frame->id) { ++ case FOURCC_YV12: ++ case FOURCC_I420: ++ case FOURCC_XVMC: ++ return GEN8_WM_KERNEL_VIDEO_PLANAR; ++ ++ case FOURCC_RGB888: ++ case FOURCC_RGB565: ++ return GEN8_WM_KERNEL_VIDEO_RGB; ++ ++ default: ++ return GEN8_WM_KERNEL_VIDEO_PACKED; ++ } ++} ++ + static bool + gen8_render_video(struct sna *sna, + struct sna_video *video, +@@ -3712,9 +3809,9 @@ gen8_render_video(struct sna *sna, + int src_height = frame->src.y2 - frame->src.y1; + float src_offset_x, src_offset_y; + float src_scale_x, src_scale_y; +- int nbox, pix_xoff, pix_yoff; + unsigned filter; + const BoxRec *box; ++ int nbox; + + DBG(("%s: src=(%d, %d), dst=(%d, %d), %dx[(%d, %d), (%d, %d)...]\n", + __FUNCTION__, +@@ -3743,6 +3840,11 @@ gen8_render_video(struct sna *sna, + tmp.floats_per_vertex = 3; + tmp.floats_per_rect = 9; + ++ DBG(("%s: scaling?=%d, planar?=%d [%x]\n", ++ __FUNCTION__, ++ src_width != dst_width || src_height != dst_height, ++ is_planar_fourcc(frame->id), frame->id)); ++ + if (src_width == dst_width && src_height == dst_height) + filter = SAMPLER_FILTER_NEAREST; + else +@@ -3752,9 +3854,7 @@ gen8_render_video(struct sna *sna, + GEN8_SET_FLAGS(SAMPLER_OFFSET(filter, SAMPLER_EXTEND_PAD, + SAMPLER_FILTER_NEAREST, SAMPLER_EXTEND_NONE), + NO_BLEND, +- is_planar_fourcc(frame->id) ? +- GEN8_WM_KERNEL_VIDEO_PLANAR : +- GEN8_WM_KERNEL_VIDEO_PACKED, ++ select_video_kernel(frame), + 2); + tmp.priv = frame; + +@@ -3770,17 +3870,6 @@ gen8_render_video(struct sna *sna, + gen8_align_vertex(sna, &tmp); + gen8_emit_video_state(sna, &tmp); + +- /* Set up the offset for translating from the given region (in screen +- * coordinates) to the backing pixmap. +- */ +-#ifdef COMPOSITE +- pix_xoff = -pixmap->screen_x + pixmap->drawable.x; +- pix_yoff = -pixmap->screen_y + pixmap->drawable.y; +-#else +- pix_xoff = 0; +- pix_yoff = 0; +-#endif +- + DBG(("%s: src=(%d, %d)x(%d, %d); frame=(%dx%d), dst=(%dx%d)\n", + __FUNCTION__, + frame->src.x1, frame->src.y1, +@@ -3802,45 +3891,36 @@ gen8_render_video(struct sna *sna, + box = region_rects(dstRegion); + nbox = region_num_rects(dstRegion); + while (nbox--) { +- BoxRec r; +- + DBG(("%s: dst=(%d, %d), (%d, %d) + (%d, %d); src=(%f, %f), (%f, %f)\n", + __FUNCTION__, + box->x1, box->y1, + box->x2, box->y2, +- pix_xoff, pix_yoff, + box->x1 * src_scale_x + src_offset_x, + box->y1 * src_scale_y + src_offset_y, + box->x2 * src_scale_x + src_offset_x, + box->y2 * src_scale_y + src_offset_y)); + +- r.x1 = box->x1 + pix_xoff; +- r.x2 = box->x2 + pix_xoff; +- r.y1 = box->y1 + pix_yoff; +- r.y2 = box->y2 + pix_yoff; +- + gen8_get_rectangles(sna, &tmp, 1, gen8_emit_video_state); + +- OUT_VERTEX(r.x2, r.y2); ++ OUT_VERTEX(box->x2, box->y2); + OUT_VERTEX_F(box->x2 * src_scale_x + src_offset_x); + OUT_VERTEX_F(box->y2 * src_scale_y + src_offset_y); + +- OUT_VERTEX(r.x1, r.y2); ++ OUT_VERTEX(box->x1, box->y2); + OUT_VERTEX_F(box->x1 * src_scale_x + src_offset_x); + OUT_VERTEX_F(box->y2 * src_scale_y + src_offset_y); + +- OUT_VERTEX(r.x1, r.y1); ++ OUT_VERTEX(box->x1, box->y1); + OUT_VERTEX_F(box->x1 * src_scale_x + src_offset_x); + OUT_VERTEX_F(box->y1 * src_scale_y + src_offset_y); + +- if (!DAMAGE_IS_ALL(priv->gpu_damage)) { +- sna_damage_add_box(&priv->gpu_damage, &r); +- sna_damage_subtract_box(&priv->cpu_damage, &r); +- } + box++; + } +- + gen8_vertex_flush(sna); ++ ++ if (!DAMAGE_IS_ALL(priv->gpu_damage)) ++ sna_damage_add(&priv->gpu_damage, dstRegion); ++ + return true; + } + #endif +@@ -3896,6 +3976,13 @@ static bool gen8_render_setup(struct sna *sna) + state->gt = ((devid >> 4) & 0xf) + 1; + DBG(("%s: gt=%d\n", __FUNCTION__, state->gt)); + ++ if (is_bdw(sna)) ++ state->info = &bdw_gt_info; ++ else if (is_chv(sna)) ++ state->info = &chv_gt_info; ++ else ++ return false; ++ + sna_static_stream_init(&general); + + /* Zero pad the start. If you see an offset of 0x0 in the batchbuffer +@@ -4007,5 +4094,5 @@ const char *gen8_render_init(struct sna *sna, const char *backend) + + sna->render.max_3d_size = GEN8_MAX_SIZE; + sna->render.max_3d_pitch = 1 << 18; +- return "Broadwell"; ++ return sna->render_state.gen8.info->name; + } +diff --git a/src/sna/gen8_render.h b/src/sna/gen8_render.h +index eb4928e7..e6a8dc55 100644 +--- a/src/sna/gen8_render.h ++++ b/src/sna/gen8_render.h +@@ -335,6 +335,7 @@ + #define PIPE_CONTROL_IS_FLUSH (1 << 11) + #define PIPE_CONTROL_TC_FLUSH (1 << 10) + #define PIPE_CONTROL_NOTIFY_ENABLE (1 << 8) ++#define PIPE_CONTROL_FLUSH (1 << 7) + #define PIPE_CONTROL_GLOBAL_GTT (1 << 2) + #define PIPE_CONTROL_LOCAL_PGTT (0 << 2) + #define PIPE_CONTROL_STALL_AT_SCOREBOARD (1 << 1) +diff --git a/src/sna/gen9_render.c b/src/sna/gen9_render.c +new file mode 100644 +index 00000000..e5f12c72 +--- /dev/null ++++ b/src/sna/gen9_render.c +@@ -0,0 +1,4156 @@ ++/* ++ * Copyright © 2012,2013 Intel Corporation ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining a ++ * copy of this software and associated documentation files (the "Software"), ++ * to deal in the Software without restriction, including without limitation ++ * the rights to use, copy, modify, merge, publish, distribute, sublicense, ++ * and/or sell copies of the Software, and to permit persons to whom the ++ * Software is furnished to do so, subject to the following conditions: ++ * ++ * The above copyright notice and this permission notice (including the next ++ * paragraph) shall be included in all copies or substantial portions of the ++ * Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ++ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, ++ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE ++ * SOFTWARE. ++ * ++ * Authors: ++ * Chris Wilson <chris@chris-wilson.co.uk> ++ * ++ */ ++ ++#ifdef HAVE_CONFIG_H ++#include "config.h" ++#endif ++ ++#include "sna.h" ++#include "sna_reg.h" ++#include "sna_render.h" ++#include "sna_render_inline.h" ++#include "sna_video.h" ++ ++#include "gen9_render.h" ++#include "gen8_eu.h" ++#include "gen4_common.h" ++#include "gen4_source.h" ++#include "gen4_vertex.h" ++#include "gen6_common.h" ++#include "gen8_vertex.h" ++ ++#define SIM 1 ++ ++#define ALWAYS_INVALIDATE 0 ++#define ALWAYS_FLUSH 0 ++#define ALWAYS_STALL 0 ++ ++#define NO_COMPOSITE 0 ++#define NO_COMPOSITE_SPANS 0 ++#define NO_COPY 0 ++#define NO_COPY_BOXES 0 ++#define NO_FILL 0 ++#define NO_FILL_BOXES 0 ++#define NO_FILL_ONE 0 ++#define NO_FILL_CLEAR 0 ++#define NO_VIDEO 0 ++ ++#define USE_8_PIXEL_DISPATCH 1 ++#define USE_16_PIXEL_DISPATCH 1 ++#define USE_32_PIXEL_DISPATCH 0 ++ ++#if !USE_8_PIXEL_DISPATCH && !USE_16_PIXEL_DISPATCH && !USE_32_PIXEL_DISPATCH ++#error "Must select at least 8, 16 or 32 pixel dispatch" ++#endif ++ ++#define GEN9_MAX_SIZE 16384 ++#define GEN9_GT_BIAS 1 /* Each GT is bigger than previous gen */ ++ ++/* XXX Todo ++ * ++ * STR (software tiled rendering) mode. No, really. ++ * 64x32 pixel blocks align with the rendering cache. Worth considering. ++ */ ++ ++#define is_aligned(x, y) (((x) & ((y) - 1)) == 0) ++ ++/* Pipeline stages: ++ * 1. Command Streamer (CS) ++ * 2. Vertex Fetch (VF) ++ * 3. Vertex Shader (VS) ++ * 4. Hull Shader (HS) ++ * 5. Tesselation Engine (TE) ++ * 6. Domain Shader (DS) ++ * 7. Geometry Shader (GS) ++ * 8. Stream Output Logic (SOL) ++ * 9. Clipper (CLIP) ++ * 10. Strip/Fan (SF) ++ * 11. Windower/Masker (WM) ++ * 12. Color Calculator (CC) ++ */ ++ ++#if !NO_VIDEO ++static const uint32_t ps_kernel_packed[][4] = { ++#include "exa_wm_src_affine.g8b" ++#include "exa_wm_src_sample_argb.g8b" ++#include "exa_wm_yuv_rgb.g8b" ++#include "exa_wm_write.g8b" ++}; ++ ++static const uint32_t ps_kernel_planar[][4] = { ++#include "exa_wm_src_affine.g8b" ++#include "exa_wm_src_sample_planar.g8b" ++#include "exa_wm_yuv_rgb.g8b" ++#include "exa_wm_write.g8b" ++}; ++ ++static const uint32_t ps_kernel_rgb[][4] = { ++#include "exa_wm_src_affine.g8b" ++#include "exa_wm_src_sample_argb.g8b" ++#include "exa_wm_write.g8b" ++}; ++#endif ++ ++#define SURFACE_DW (64 / sizeof(uint32_t)); ++ ++#define KERNEL(kernel_enum, kernel, num_surfaces) \ ++ [GEN9_WM_KERNEL_##kernel_enum] = {#kernel_enum, kernel, sizeof(kernel), num_surfaces} ++#define NOKERNEL(kernel_enum, func, num_surfaces) \ ++ [GEN9_WM_KERNEL_##kernel_enum] = {#kernel_enum, (void *)func, 0, num_surfaces} ++static const struct wm_kernel_info { ++ const char *name; ++ const void *data; ++ unsigned int size; ++ int num_surfaces; ++} wm_kernels[] = { ++ NOKERNEL(NOMASK, gen8_wm_kernel__affine, 2), ++ NOKERNEL(NOMASK_P, gen8_wm_kernel__projective, 2), ++ ++ NOKERNEL(MASK, gen8_wm_kernel__affine_mask, 3), ++ NOKERNEL(MASK_P, gen8_wm_kernel__projective_mask, 3), ++ ++ NOKERNEL(MASKCA, gen8_wm_kernel__affine_mask_ca, 3), ++ NOKERNEL(MASKCA_P, gen8_wm_kernel__projective_mask_ca, 3), ++ ++ NOKERNEL(MASKSA, gen8_wm_kernel__affine_mask_sa, 3), ++ NOKERNEL(MASKSA_P, gen8_wm_kernel__projective_mask_sa, 3), ++ ++ NOKERNEL(OPACITY, gen8_wm_kernel__affine_opacity, 2), ++ NOKERNEL(OPACITY_P, gen8_wm_kernel__projective_opacity, 2), ++ ++#if !NO_VIDEO ++ KERNEL(VIDEO_PLANAR, ps_kernel_planar, 7), ++ KERNEL(VIDEO_PACKED, ps_kernel_packed, 2), ++ KERNEL(VIDEO_RGB, ps_kernel_rgb, 2), ++#endif ++}; ++#undef KERNEL ++ ++static const struct blendinfo { ++ uint8_t src_alpha; ++ uint8_t src_blend; ++ uint8_t dst_blend; ++} gen9_blend_op[] = { ++ /* Clear */ {0, BLENDFACTOR_ZERO, BLENDFACTOR_ZERO}, ++ /* Src */ {0, BLENDFACTOR_ONE, BLENDFACTOR_ZERO}, ++ /* Dst */ {0, BLENDFACTOR_ZERO, BLENDFACTOR_ONE}, ++ /* Over */ {1, BLENDFACTOR_ONE, BLENDFACTOR_INV_SRC_ALPHA}, ++ /* OverReverse */ {0, BLENDFACTOR_INV_DST_ALPHA, BLENDFACTOR_ONE}, ++ /* In */ {0, BLENDFACTOR_DST_ALPHA, BLENDFACTOR_ZERO}, ++ /* InReverse */ {1, BLENDFACTOR_ZERO, BLENDFACTOR_SRC_ALPHA}, ++ /* Out */ {0, BLENDFACTOR_INV_DST_ALPHA, BLENDFACTOR_ZERO}, ++ /* OutReverse */ {1, BLENDFACTOR_ZERO, BLENDFACTOR_INV_SRC_ALPHA}, ++ /* Atop */ {1, BLENDFACTOR_DST_ALPHA, BLENDFACTOR_INV_SRC_ALPHA}, ++ /* AtopReverse */ {1, BLENDFACTOR_INV_DST_ALPHA, BLENDFACTOR_SRC_ALPHA}, ++ /* Xor */ {1, BLENDFACTOR_INV_DST_ALPHA, BLENDFACTOR_INV_SRC_ALPHA}, ++ /* Add */ {0, BLENDFACTOR_ONE, BLENDFACTOR_ONE}, ++}; ++ ++/** ++ * Highest-valued BLENDFACTOR used in gen9_blend_op. ++ * ++ * This leaves out GEN9_BLENDFACTOR_INV_DST_COLOR, ++ * GEN9_BLENDFACTOR_INV_CONST_{COLOR,ALPHA}, ++ * GEN9_BLENDFACTOR_INV_SRC1_{COLOR,ALPHA} ++ */ ++#define GEN9_BLENDFACTOR_COUNT (BLENDFACTOR_INV_DST_ALPHA + 1) ++ ++#define GEN9_BLEND_STATE_PADDED_SIZE ALIGN(sizeof(struct gen9_blend_state), 64) ++ ++#define BLEND_OFFSET(s, d) \ ++ ((d != BLENDFACTOR_ZERO) << 15 | ((s) * GEN9_BLENDFACTOR_COUNT + (d)) << 4) ++ ++#define NO_BLEND BLEND_OFFSET(BLENDFACTOR_ONE, BLENDFACTOR_ZERO) ++#define CLEAR BLEND_OFFSET(BLENDFACTOR_ZERO, BLENDFACTOR_ZERO) ++ ++#define SAMPLER_OFFSET(sf, se, mf, me) \ ++ (((((sf) * EXTEND_COUNT + (se)) * FILTER_COUNT + (mf)) * EXTEND_COUNT + (me)) + 2) ++ ++#define VERTEX_2s2s 0 ++ ++#define COPY_SAMPLER 0 ++#define COPY_VERTEX VERTEX_2s2s ++#define COPY_FLAGS(a) GEN9_SET_FLAGS(COPY_SAMPLER, (a) == GXcopy ? NO_BLEND : CLEAR, GEN9_WM_KERNEL_NOMASK, COPY_VERTEX) ++ ++#define FILL_SAMPLER 1 ++#define FILL_VERTEX VERTEX_2s2s ++#define FILL_FLAGS(op, format) GEN9_SET_FLAGS(FILL_SAMPLER, gen9_get_blend((op), false, (format)), GEN9_WM_KERNEL_NOMASK, FILL_VERTEX) ++#define FILL_FLAGS_NOBLEND GEN9_SET_FLAGS(FILL_SAMPLER, NO_BLEND, GEN9_WM_KERNEL_NOMASK, FILL_VERTEX) ++ ++#define GEN9_SAMPLER(f) (((f) >> 20) & 0xfff) ++#define GEN9_BLEND(f) (((f) >> 4) & 0x7ff) ++#define GEN9_READS_DST(f) (((f) >> 15) & 1) ++#define GEN9_KERNEL(f) (((f) >> 16) & 0xf) ++#define GEN9_VERTEX(f) (((f) >> 0) & 0xf) ++#define GEN9_SET_FLAGS(S, B, K, V) ((S) << 20 | (K) << 16 | (B) | (V)) ++ ++#define OUT_BATCH(v) batch_emit(sna, v) ++#define OUT_BATCH64(v) batch_emit64(sna, v) ++#define OUT_VERTEX(x,y) vertex_emit_2s(sna, x,y) ++#define OUT_VERTEX_F(v) vertex_emit(sna, v) ++ ++struct gt_info { ++ const char *name; ++ struct { ++ int max_vs_entries; ++ } urb; ++}; ++ ++static const struct gt_info min_gt_info = { ++ .name = "Skylake (gen9)", ++ .urb = { .max_vs_entries = 240 }, ++}; ++ ++static const struct gt_info skl_gt_info = { ++ .name = "Skylake (gen9)", ++ .urb = { .max_vs_entries = 960 }, ++}; ++ ++static const struct gt_info bxt_gt_info = { ++ .name = "Broxton (gen9)", ++ .urb = { .max_vs_entries = 320 }, ++}; ++ ++static const struct gt_info kbl_gt_info = { ++ .name = "Kabylake (gen9)", ++ .urb = { .max_vs_entries = 960 }, ++}; ++ ++static const struct gt_info glk_gt_info = { ++ .name = "Geminilake (gen9)", ++ .urb = { .max_vs_entries = 320 }, ++}; ++ ++static bool is_skl(struct sna *sna) ++{ ++ return sna->kgem.gen == 0110; ++} ++ ++static bool is_bxt(struct sna *sna) ++{ ++ return sna->kgem.gen == 0111; ++} ++ ++static bool is_kbl(struct sna *sna) ++{ ++ return sna->kgem.gen == 0112; ++} ++ ++static bool is_glk(struct sna *sna) ++{ ++ return sna->kgem.gen == 0113; ++} ++ ++ ++static inline bool too_large(int width, int height) ++{ ++ return width > GEN9_MAX_SIZE || height > GEN9_MAX_SIZE; ++} ++ ++static inline bool unaligned(struct kgem_bo *bo, int bpp) ++{ ++ /* XXX What exactly do we need to meet H_ALIGN and V_ALIGN? */ ++#if 0 ++ int x, y; ++ ++ if (bo->proxy == NULL) ++ return false; ++ ++ /* Assume that all tiled proxies are constructed correctly. */ ++ if (bo->tiling) ++ return false; ++ ++ DBG(("%s: checking alignment of a linear proxy, offset=%d, pitch=%d, bpp=%d: => (%d, %d)\n", ++ __FUNCTION__, bo->delta, bo->pitch, bpp, ++ 8 * (bo->delta % bo->pitch) / bpp, bo->delta / bo->pitch)); ++ ++ /* This may be a random userptr map, check that it meets the ++ * render alignment of SURFACE_VALIGN_4 | SURFACE_HALIGN_4. ++ */ ++ y = bo->delta / bo->pitch; ++ if (y & 3) ++ return true; ++ ++ x = 8 * (bo->delta - y * bo->pitch); ++ if (x & (4*bpp - 1)) ++ return true; ++ ++ return false; ++#else ++ return false; ++#endif ++} ++ ++static uint32_t gen9_get_blend(int op, ++ bool has_component_alpha, ++ uint32_t dst_format) ++{ ++ uint32_t src, dst; ++ ++ COMPILE_TIME_ASSERT(BLENDFACTOR_INV_DST_ALPHA*GEN9_BLENDFACTOR_COUNT + BLENDFACTOR_INV_DST_ALPHA <= 0x7ff); ++ ++ src = gen9_blend_op[op].src_blend; ++ dst = gen9_blend_op[op].dst_blend; ++ ++ /* If there's no dst alpha channel, adjust the blend op so that ++ * we'll treat it always as 1. ++ */ ++ if (PICT_FORMAT_A(dst_format) == 0) { ++ if (src == BLENDFACTOR_DST_ALPHA) ++ src = BLENDFACTOR_ONE; ++ else if (src == BLENDFACTOR_INV_DST_ALPHA) ++ src = BLENDFACTOR_ZERO; ++ } ++ ++ /* If the source alpha is being used, then we should only be in a ++ * case where the source blend factor is 0, and the source blend ++ * value is the mask channels multiplied by the source picture's alpha. ++ */ ++ if (has_component_alpha && gen9_blend_op[op].src_alpha) { ++ if (dst == BLENDFACTOR_SRC_ALPHA) ++ dst = BLENDFACTOR_SRC_COLOR; ++ else if (dst == BLENDFACTOR_INV_SRC_ALPHA) ++ dst = BLENDFACTOR_INV_SRC_COLOR; ++ } ++ ++ DBG(("blend op=%d, dst=%x [A=%d] => src=%d, dst=%d => offset=%x\n", ++ op, dst_format, PICT_FORMAT_A(dst_format), ++ src, dst, (int)(BLEND_OFFSET(src, dst)>>4))); ++ assert(BLEND_OFFSET(src, dst) >> 4 <= 0xfff); ++ return BLEND_OFFSET(src, dst); ++} ++ ++static uint32_t gen9_get_card_format(PictFormat format) ++{ ++ switch (format) { ++ default: ++ return -1; ++ case PICT_a8r8g8b8: ++ return SURFACEFORMAT_B8G8R8A8_UNORM; ++ case PICT_x8r8g8b8: ++ return SURFACEFORMAT_B8G8R8X8_UNORM; ++ case PICT_a8b8g8r8: ++ return SURFACEFORMAT_R8G8B8A8_UNORM; ++ case PICT_x8b8g8r8: ++ return SURFACEFORMAT_R8G8B8X8_UNORM; ++#ifdef PICT_a2r10g10b10 ++ case PICT_a2r10g10b10: ++ return SURFACEFORMAT_B10G10R10A2_UNORM; ++ case PICT_x2r10g10b10: ++ return SURFACEFORMAT_B10G10R10X2_UNORM; ++#endif ++ case PICT_r8g8b8: ++ return SURFACEFORMAT_R8G8B8_UNORM; ++ case PICT_r5g6b5: ++ return SURFACEFORMAT_B5G6R5_UNORM; ++ case PICT_a1r5g5b5: ++ return SURFACEFORMAT_B5G5R5A1_UNORM; ++ case PICT_a8: ++ return SURFACEFORMAT_A8_UNORM; ++ case PICT_a4r4g4b4: ++ return SURFACEFORMAT_B4G4R4A4_UNORM; ++ } ++} ++ ++static uint32_t gen9_get_dest_format(PictFormat format) ++{ ++ switch (format) { ++ default: ++ return -1; ++ case PICT_a8r8g8b8: ++ case PICT_x8r8g8b8: ++ return SURFACEFORMAT_B8G8R8A8_UNORM; ++ case PICT_a8b8g8r8: ++ case PICT_x8b8g8r8: ++ return SURFACEFORMAT_R8G8B8A8_UNORM; ++#ifdef PICT_a2r10g10b10 ++ case PICT_a2r10g10b10: ++ case PICT_x2r10g10b10: ++ return SURFACEFORMAT_B10G10R10A2_UNORM; ++#endif ++ case PICT_r5g6b5: ++ return SURFACEFORMAT_B5G6R5_UNORM; ++ case PICT_x1r5g5b5: ++ case PICT_a1r5g5b5: ++ return SURFACEFORMAT_B5G5R5A1_UNORM; ++ case PICT_a8: ++ return SURFACEFORMAT_A8_UNORM; ++ case PICT_a4r4g4b4: ++ case PICT_x4r4g4b4: ++ return SURFACEFORMAT_B4G4R4A4_UNORM; ++ } ++} ++ ++static bool gen9_check_dst_format(PictFormat format) ++{ ++ if (gen9_get_dest_format(format) != -1) ++ return true; ++ ++ DBG(("%s: unhandled format: %x\n", __FUNCTION__, (int)format)); ++ return false; ++} ++ ++static bool gen9_check_format(uint32_t format) ++{ ++ if (gen9_get_card_format(format) != -1) ++ return true; ++ ++ DBG(("%s: unhandled format: %x\n", __FUNCTION__, (int)format)); ++ return false; ++} ++ ++static uint32_t gen9_filter(uint32_t filter) ++{ ++ switch (filter) { ++ default: ++ assert(0); ++ case PictFilterNearest: ++ return SAMPLER_FILTER_NEAREST; ++ case PictFilterBilinear: ++ return SAMPLER_FILTER_BILINEAR; ++ } ++} ++ ++static uint32_t gen9_check_filter(PicturePtr picture) ++{ ++ switch (picture->filter) { ++ case PictFilterNearest: ++ case PictFilterBilinear: ++ return true; ++ default: ++ return false; ++ } ++} ++ ++static uint32_t gen9_repeat(uint32_t repeat) ++{ ++ switch (repeat) { ++ default: ++ assert(0); ++ case RepeatNone: ++ return SAMPLER_EXTEND_NONE; ++ case RepeatNormal: ++ return SAMPLER_EXTEND_REPEAT; ++ case RepeatPad: ++ return SAMPLER_EXTEND_PAD; ++ case RepeatReflect: ++ return SAMPLER_EXTEND_REFLECT; ++ } ++} ++ ++static bool gen9_check_repeat(PicturePtr picture) ++{ ++ if (!picture->repeat) ++ return true; ++ ++ switch (picture->repeatType) { ++ case RepeatNone: ++ case RepeatNormal: ++ case RepeatPad: ++ case RepeatReflect: ++ return true; ++ default: ++ return false; ++ } ++} ++ ++static int ++gen9_choose_composite_kernel(int op, bool has_mask, bool is_ca, bool is_affine) ++{ ++ int base; ++ ++ if (has_mask) { ++ if (is_ca) { ++ if (gen9_blend_op[op].src_alpha) ++ base = GEN9_WM_KERNEL_MASKSA; ++ else ++ base = GEN9_WM_KERNEL_MASKCA; ++ } else ++ base = GEN9_WM_KERNEL_MASK; ++ } else ++ base = GEN9_WM_KERNEL_NOMASK; ++ ++ return base + !is_affine; ++} ++ ++static void ++gen9_emit_push_constants(struct sna *sna) ++{ ++#if SIM ++ OUT_BATCH(GEN9_3DSTATE_PUSH_CONSTANT_ALLOC_VS | (2 - 2)); ++ OUT_BATCH(0); ++ ++ OUT_BATCH(GEN9_3DSTATE_PUSH_CONSTANT_ALLOC_HS | (2 - 2)); ++ OUT_BATCH(0); ++ ++ OUT_BATCH(GEN9_3DSTATE_PUSH_CONSTANT_ALLOC_DS | (2 - 2)); ++ OUT_BATCH(0); ++ ++ OUT_BATCH(GEN9_3DSTATE_PUSH_CONSTANT_ALLOC_GS | (2 - 2)); ++ OUT_BATCH(0); ++ ++ OUT_BATCH(GEN9_3DSTATE_PUSH_CONSTANT_ALLOC_PS | (2 - 2)); ++ OUT_BATCH(0); ++#endif ++} ++ ++static void ++gen9_emit_urb(struct sna *sna) ++{ ++ /* num of VS entries must be divisible by 8 if size < 9 */ ++ OUT_BATCH(GEN9_3DSTATE_URB_VS | (2 - 2)); ++ OUT_BATCH(sna->render_state.gen9.info->urb.max_vs_entries << URB_ENTRY_NUMBER_SHIFT | ++ (2 - 1) << URB_ENTRY_SIZE_SHIFT | ++ 4 << URB_STARTING_ADDRESS_SHIFT); ++ ++ OUT_BATCH(GEN9_3DSTATE_URB_HS | (2 - 2)); ++ OUT_BATCH(0 << URB_ENTRY_SIZE_SHIFT | ++ 4 << URB_STARTING_ADDRESS_SHIFT); ++ ++ OUT_BATCH(GEN9_3DSTATE_URB_DS | (2 - 2)); ++ OUT_BATCH(0 << URB_ENTRY_SIZE_SHIFT | ++ 4 << URB_STARTING_ADDRESS_SHIFT); ++ ++ OUT_BATCH(GEN9_3DSTATE_URB_GS | (2 - 2)); ++ OUT_BATCH(0 << URB_ENTRY_SIZE_SHIFT | ++ 4 << URB_STARTING_ADDRESS_SHIFT); ++} ++ ++static void ++gen9_emit_state_base_address(struct sna *sna) ++{ ++ uint32_t num_pages; ++ ++ assert(sna->kgem.surface - sna->kgem.nbatch <= 16384); ++ ++ /* WaBindlessSurfaceStateModifyEnable:skl,bxt */ ++ OUT_BATCH(GEN9_STATE_BASE_ADDRESS | (19 - 1 - 2)); ++ OUT_BATCH64(0); /* general */ ++ OUT_BATCH(0); /* stateless dataport */ ++ OUT_BATCH64(kgem_add_reloc64(&sna->kgem, /* surface */ ++ sna->kgem.nbatch, ++ NULL, ++ I915_GEM_DOMAIN_INSTRUCTION << 16, ++ BASE_ADDRESS_MODIFY)); ++ OUT_BATCH64(kgem_add_reloc64(&sna->kgem, /* dynamic */ ++ sna->kgem.nbatch, ++ sna->render_state.gen9.general_bo, ++ I915_GEM_DOMAIN_INSTRUCTION << 16, ++ BASE_ADDRESS_MODIFY)); ++ OUT_BATCH64(0); /* indirect */ ++ OUT_BATCH64(kgem_add_reloc64(&sna->kgem, /* instruction */ ++ sna->kgem.nbatch, ++ sna->render_state.gen9.general_bo, ++ I915_GEM_DOMAIN_INSTRUCTION << 16, ++ BASE_ADDRESS_MODIFY)); ++ /* upper bounds */ ++ num_pages = sna->render_state.gen9.general_bo->size.pages.count; ++ OUT_BATCH(0); /* general */ ++ OUT_BATCH(num_pages << 12 | 1); /* dynamic */ ++ OUT_BATCH(0); /* indirect */ ++ OUT_BATCH(num_pages << 12 | 1); /* instruction */ ++ ++ /* Bindless */ ++ OUT_BATCH(0); ++ OUT_BATCH(0); ++ OUT_BATCH(0); ++} ++ ++static void ++gen9_emit_vs_invariant(struct sna *sna) ++{ ++ OUT_BATCH(GEN9_3DSTATE_VS | (9 - 2)); ++ OUT_BATCH64(0); /* no VS kernel */ ++ OUT_BATCH(0); ++ OUT_BATCH64(0); /* scratch */ ++ OUT_BATCH(0); ++ OUT_BATCH(1 << 1); /* pass-through */ ++ OUT_BATCH(1 << 16 | 1 << 21); /* urb write to SBE */ ++ ++#if SIM ++ OUT_BATCH(GEN9_3DSTATE_CONSTANT_VS | (11 - 2)); ++ OUT_BATCH(0); ++ OUT_BATCH(0); ++ OUT_BATCH64(0); ++ OUT_BATCH64(0); ++ OUT_BATCH64(0); ++ OUT_BATCH64(0); ++ ++ OUT_BATCH(GEN9_3DSTATE_BINDING_TABLE_POINTERS_VS | (2 - 2)); ++ OUT_BATCH(0); ++ ++ OUT_BATCH(GEN9_3DSTATE_SAMPLER_STATE_POINTERS_VS | (2 - 2)); ++ OUT_BATCH(0); ++#endif ++} ++ ++static void ++gen9_emit_hs_invariant(struct sna *sna) ++{ ++ OUT_BATCH(GEN9_3DSTATE_HS | (9 - 2)); ++ OUT_BATCH(0); ++ OUT_BATCH(0); ++ OUT_BATCH64(0); /* no HS kernel */ ++ OUT_BATCH64(0); /* scratch */ ++ OUT_BATCH(0); ++ OUT_BATCH(0); /* pass-through */ ++ ++#if SIM ++ OUT_BATCH(GEN9_3DSTATE_CONSTANT_HS | (11 - 2)); ++ OUT_BATCH(0); ++ OUT_BATCH(0); ++ OUT_BATCH64(0); ++ OUT_BATCH64(0); ++ OUT_BATCH64(0); ++ OUT_BATCH64(0); ++ ++#if 1 ++ OUT_BATCH(GEN9_3DSTATE_BINDING_TABLE_POINTERS_HS | (2 - 2)); ++ OUT_BATCH(0); ++ ++ OUT_BATCH(GEN9_3DSTATE_SAMPLER_STATE_POINTERS_HS | (2 - 2)); ++ OUT_BATCH(0); ++#endif ++#endif ++} ++ ++static void ++gen9_emit_te_invariant(struct sna *sna) ++{ ++ OUT_BATCH(GEN9_3DSTATE_TE | (4 - 2)); ++ OUT_BATCH(0); ++ OUT_BATCH(0); ++ OUT_BATCH(0); ++} ++ ++static void ++gen9_emit_ds_invariant(struct sna *sna) ++{ ++ OUT_BATCH(GEN9_3DSTATE_DS | (11 - 2)); ++ OUT_BATCH64(0); /* no kernel */ ++ OUT_BATCH(0); ++ OUT_BATCH64(0); /* scratch */ ++ OUT_BATCH(0); ++ OUT_BATCH(0); ++ OUT_BATCH(0); ++ OUT_BATCH(0); ++ OUT_BATCH(0); ++ ++#if SIM ++ OUT_BATCH(GEN9_3DSTATE_CONSTANT_DS | (11 - 2)); ++ OUT_BATCH(0); ++ OUT_BATCH(0); ++ OUT_BATCH64(0); ++ OUT_BATCH64(0); ++ OUT_BATCH64(0); ++ OUT_BATCH64(0); ++ ++#if 1 ++ OUT_BATCH(GEN9_3DSTATE_BINDING_TABLE_POINTERS_DS | (2 - 2)); ++ OUT_BATCH(0); ++ ++ OUT_BATCH(GEN9_3DSTATE_SAMPLER_STATE_POINTERS_DS | (2 - 2)); ++ OUT_BATCH(0); ++#endif ++#endif ++} ++ ++static void ++gen9_emit_gs_invariant(struct sna *sna) ++{ ++ OUT_BATCH(GEN9_3DSTATE_GS | (10 - 2)); ++ OUT_BATCH64(0); /* no GS kernel */ ++ OUT_BATCH(0); ++ OUT_BATCH64(0); /* scratch */ ++ OUT_BATCH(0); ++ OUT_BATCH(0); /* pass-through */ ++ OUT_BATCH(0); ++ OUT_BATCH(0); ++ ++#if SIM ++ OUT_BATCH(GEN9_3DSTATE_CONSTANT_GS | (11 - 2)); ++ OUT_BATCH(0); ++ OUT_BATCH(0); ++ OUT_BATCH64(0); ++ OUT_BATCH64(0); ++ OUT_BATCH64(0); ++ OUT_BATCH64(0); ++ ++#if 1 ++ OUT_BATCH(GEN9_3DSTATE_BINDING_TABLE_POINTERS_GS | (2 - 2)); ++ OUT_BATCH(0); ++ ++ OUT_BATCH(GEN9_3DSTATE_SAMPLER_STATE_POINTERS_GS | (2 - 2)); ++ OUT_BATCH(0); ++#endif ++#endif ++} ++ ++static void ++gen9_emit_sol_invariant(struct sna *sna) ++{ ++ OUT_BATCH(GEN9_3DSTATE_STREAMOUT | (5 - 2)); ++ OUT_BATCH(0); ++ OUT_BATCH(0); ++ OUT_BATCH(0); ++ OUT_BATCH(0); ++} ++ ++static void ++gen9_emit_sf_invariant(struct sna *sna) ++{ ++ OUT_BATCH(GEN9_3DSTATE_SF | (4 - 2)); ++ OUT_BATCH(0); ++ OUT_BATCH(0); ++ OUT_BATCH(0); ++} ++ ++static void ++gen9_emit_clip_invariant(struct sna *sna) ++{ ++ OUT_BATCH(GEN9_3DSTATE_CLIP | (4 - 2)); ++ OUT_BATCH(0); ++ OUT_BATCH(0); /* pass-through */ ++ OUT_BATCH(0); ++ ++ OUT_BATCH(GEN9_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP | (2 - 2)); ++ OUT_BATCH(0); ++ ++ OUT_BATCH(GEN9_3DSTATE_VIEWPORT_STATE_POINTERS_CC | (2 - 2)); ++ OUT_BATCH(0); ++} ++ ++static void ++gen9_emit_null_depth_buffer(struct sna *sna) ++{ ++ OUT_BATCH(GEN9_3DSTATE_DEPTH_BUFFER | (8 - 2)); ++#if 1 ++ OUT_BATCH(SURFACE_NULL << DEPTH_BUFFER_TYPE_SHIFT | ++ DEPTHFORMAT_D32_FLOAT << DEPTH_BUFFER_FORMAT_SHIFT); ++#else ++ OUT_BATCH(SURFACE_2D << DEPTH_BUFFER_TYPE_SHIFT | ++ DEPTHFORMAT_D16_UNORM << DEPTH_BUFFER_FORMAT_SHIFT); ++#endif ++ OUT_BATCH64(0); ++ OUT_BATCH(0); ++ OUT_BATCH(0); ++ OUT_BATCH(0); ++ OUT_BATCH(0); ++ ++#if SIM ++ OUT_BATCH(GEN9_3DSTATE_HIER_DEPTH_BUFFER | (5 - 2)); ++ OUT_BATCH(0); ++ OUT_BATCH64(0); ++ OUT_BATCH(0); ++#endif ++ ++#if SIM ++ OUT_BATCH(GEN9_3DSTATE_STENCIL_BUFFER | (5 - 2)); ++ OUT_BATCH(0); ++ OUT_BATCH64(0); ++ OUT_BATCH(0); ++#endif ++ ++#if SIM ++ OUT_BATCH(GEN9_3DSTATE_WM_DEPTH_STENCIL | (4 - 2)); ++ OUT_BATCH(0); ++ OUT_BATCH(0); ++ OUT_BATCH(0); ++#endif ++ ++#if SIM ++ OUT_BATCH(GEN9_3DSTATE_CLEAR_PARAMS | (3 - 2)); ++ OUT_BATCH(0); ++ OUT_BATCH(0); ++#endif ++} ++ ++static void ++gen9_emit_wm_invariant(struct sna *sna) ++{ ++ gen9_emit_null_depth_buffer(sna); ++ ++#if SIM ++ OUT_BATCH(GEN9_3DSTATE_SCISSOR_STATE_POINTERS | (2 - 2)); ++ OUT_BATCH(0); ++#endif ++ ++ OUT_BATCH(GEN9_3DSTATE_WM | (2 - 2)); ++ //OUT_BATCH(WM_NONPERSPECTIVE_PIXEL_BARYCENTRIC); /* XXX */ ++ OUT_BATCH(WM_PERSPECTIVE_PIXEL_BARYCENTRIC); ++ ++#if SIM ++ OUT_BATCH(GEN9_3DSTATE_WM_CHROMAKEY | (2 - 2)); ++ OUT_BATCH(0); ++#endif ++ ++#if 0 ++ OUT_BATCH(GEN9_3DSTATE_WM_HZ_OP | (5 - 2)); ++ OUT_BATCH(0); ++ OUT_BATCH(0); ++ OUT_BATCH(0); ++ OUT_BATCH(0); ++#endif ++ ++ OUT_BATCH(GEN9_3DSTATE_PS_EXTRA | (2 - 2)); ++ OUT_BATCH(PSX_PIXEL_SHADER_VALID | ++ PSX_ATTRIBUTE_ENABLE); ++ ++ OUT_BATCH(GEN9_3DSTATE_RASTER | (5 - 2)); ++ OUT_BATCH(RASTER_FRONT_WINDING_CCW | ++ RASTER_CULL_NONE); ++ OUT_BATCH(0); ++ OUT_BATCH(0); ++ OUT_BATCH(0); ++ ++ OUT_BATCH(GEN9_3DSTATE_SBE_SWIZ | (11 - 2)); ++ OUT_BATCH(0); ++ OUT_BATCH(0); ++ OUT_BATCH(0); ++ OUT_BATCH(0); ++ OUT_BATCH(0); ++ OUT_BATCH(0); ++ OUT_BATCH(0); ++ OUT_BATCH(0); ++ OUT_BATCH(0); ++ OUT_BATCH(0); ++ ++#if SIM ++ OUT_BATCH(GEN9_3DSTATE_CONSTANT_PS | (11 - 2)); ++ OUT_BATCH(0); ++ OUT_BATCH(0); ++ OUT_BATCH64(0); ++ OUT_BATCH64(0); ++ OUT_BATCH64(0); ++ OUT_BATCH64(0); ++#endif ++} ++ ++static void ++gen9_emit_cc_invariant(struct sna *sna) ++{ ++} ++ ++static void ++gen9_emit_vf_invariant(struct sna *sna) ++{ ++ int n; ++ ++#if 1 ++ OUT_BATCH(GEN9_3DSTATE_VF | (2 - 2)); ++ OUT_BATCH(0); ++#endif ++ ++ OUT_BATCH(GEN9_3DSTATE_VF_SGVS | (2 - 2)); ++ OUT_BATCH(0); ++ ++ OUT_BATCH(GEN9_3DSTATE_VF_TOPOLOGY | (2 - 2)); ++ OUT_BATCH(RECTLIST); ++ ++ OUT_BATCH(GEN9_3DSTATE_VF_STATISTICS | 0); ++ ++ for (n = 1; n <= 3; n++) { ++ OUT_BATCH(GEN9_3DSTATE_VF_INSTANCING | (3 - 2)); ++ OUT_BATCH(n); ++ OUT_BATCH(0); ++ } ++} ++ ++static void ++gen9_emit_invariant(struct sna *sna) ++{ ++ OUT_BATCH(GEN9_PIPELINE_SELECT | ++ PIPELINE_SELECTION_MASK | ++ PIPELINE_SELECT_3D); ++ ++#if SIM ++ OUT_BATCH(GEN9_STATE_SIP | (3 - 2)); ++ OUT_BATCH64(0); ++#endif ++ ++ OUT_BATCH(GEN9_3DSTATE_MULTISAMPLE | (2 - 2)); ++ OUT_BATCH(MULTISAMPLE_PIXEL_LOCATION_CENTER | ++ MULTISAMPLE_NUMSAMPLES_1); /* 1 sample/pixel */ ++ ++ OUT_BATCH(GEN9_3DSTATE_SAMPLE_MASK | (2 - 2)); ++ OUT_BATCH(1); ++ ++#if SIM ++ OUT_BATCH(GEN9_3DSTATE_SAMPLE_PATTERN | (5 - 2)); ++ OUT_BATCH(0); ++ OUT_BATCH(0); ++ OUT_BATCH(0); ++ //OUT_BATCH(8<<20 | 8<<16); ++ OUT_BATCH(0); ++#endif ++ ++ gen9_emit_push_constants(sna); ++ gen9_emit_urb(sna); ++ ++ gen9_emit_state_base_address(sna); ++ ++ gen9_emit_vf_invariant(sna); ++ gen9_emit_vs_invariant(sna); ++ gen9_emit_hs_invariant(sna); ++ gen9_emit_te_invariant(sna); ++ gen9_emit_ds_invariant(sna); ++ gen9_emit_gs_invariant(sna); ++ gen9_emit_sol_invariant(sna); ++ gen9_emit_clip_invariant(sna); ++ gen9_emit_sf_invariant(sna); ++ gen9_emit_wm_invariant(sna); ++ gen9_emit_cc_invariant(sna); ++ ++ sna->render_state.gen9.needs_invariant = false; ++} ++ ++static void ++gen9_emit_cc(struct sna *sna, uint32_t blend) ++{ ++ struct gen9_render_state *render = &sna->render_state.gen9; ++ ++ if (render->blend == blend) ++ return; ++ ++ DBG(("%s: blend=%x (current=%x), src=%d, dst=%d\n", ++ __FUNCTION__, blend, render->blend, ++ blend / GEN9_BLENDFACTOR_COUNT, ++ blend % GEN9_BLENDFACTOR_COUNT)); ++ ++ assert(blend < GEN9_BLENDFACTOR_COUNT * GEN9_BLENDFACTOR_COUNT); ++ assert(blend / GEN9_BLENDFACTOR_COUNT > 0); ++ assert(blend % GEN9_BLENDFACTOR_COUNT > 0); ++ ++ /* XXX can have up to 8 blend states preload, selectable via ++ * Render Target Index. What other side-effects of Render Target Index? ++ */ ++ ++ OUT_BATCH(GEN9_3DSTATE_PS_BLEND | (2 - 2)); ++ if (blend != GEN9_BLEND(NO_BLEND)) { ++ uint32_t src = blend / GEN9_BLENDFACTOR_COUNT; ++ uint32_t dst = blend % GEN9_BLENDFACTOR_COUNT; ++ OUT_BATCH(PS_BLEND_HAS_WRITEABLE_RT | ++ PS_BLEND_COLOR_BLEND_ENABLE | ++ src << PS_BLEND_SRC_ALPHA_SHIFT | ++ dst << PS_BLEND_DST_ALPHA_SHIFT | ++ src << PS_BLEND_SRC_SHIFT | ++ dst << PS_BLEND_DST_SHIFT); ++ } else ++ OUT_BATCH(PS_BLEND_HAS_WRITEABLE_RT); ++ ++ assert(is_aligned(render->cc_blend + blend * GEN9_BLEND_STATE_PADDED_SIZE, 64)); ++ OUT_BATCH(GEN9_3DSTATE_BLEND_STATE_POINTERS | (2 - 2)); ++ OUT_BATCH((render->cc_blend + blend * GEN9_BLEND_STATE_PADDED_SIZE) | 1); ++ ++ /* Force a CC_STATE pointer change to improve blend performance */ ++ OUT_BATCH(GEN9_3DSTATE_CC_STATE_POINTERS | (2 - 2)); ++ OUT_BATCH(0); ++ ++ render->blend = blend; ++} ++ ++static void ++gen9_emit_sampler(struct sna *sna, uint32_t state) ++{ ++ if (sna->render_state.gen9.samplers == state) ++ return; ++ ++ sna->render_state.gen9.samplers = state; ++ ++ DBG(("%s: sampler = %x\n", __FUNCTION__, state)); ++ ++ assert(2 * sizeof(struct gen9_sampler_state) == 32); ++ OUT_BATCH(GEN9_3DSTATE_SAMPLER_STATE_POINTERS_PS | (2 - 2)); ++ OUT_BATCH(sna->render_state.gen9.wm_state + state * 2 * sizeof(struct gen9_sampler_state)); ++} ++ ++static void ++gen9_emit_sf(struct sna *sna, bool has_mask) ++{ ++ int num_sf_outputs = has_mask ? 2 : 1; ++ ++ if (sna->render_state.gen9.num_sf_outputs == num_sf_outputs) ++ return; ++ ++ DBG(("%s: num_sf_outputs=%d\n", __FUNCTION__, num_sf_outputs)); ++ ++ sna->render_state.gen9.num_sf_outputs = num_sf_outputs; ++ ++ OUT_BATCH(GEN9_3DSTATE_SBE | (6 - 2)); ++ OUT_BATCH(num_sf_outputs << SBE_NUM_OUTPUTS_SHIFT | ++ SBE_FORCE_VERTEX_URB_READ_LENGTH | /* forced is faster */ ++ SBE_FORCE_VERTEX_URB_READ_OFFSET | ++ 1 << SBE_URB_ENTRY_READ_LENGTH_SHIFT | ++ 1 << SBE_URB_ENTRY_READ_OFFSET_SHIFT); ++ OUT_BATCH(0); ++ OUT_BATCH(0); ++ OUT_BATCH(SBE_ACTIVE_COMPONENT_XYZW << 0 | ++ SBE_ACTIVE_COMPONENT_XYZW << 1); ++ OUT_BATCH(0); ++} ++ ++static void ++gen9_emit_wm(struct sna *sna, int kernel) ++{ ++ const uint32_t *kernels; ++ ++ assert(kernel < ARRAY_SIZE(wm_kernels)); ++ if (sna->render_state.gen9.kernel == kernel) ++ return; ++ ++ sna->render_state.gen9.kernel = kernel; ++ kernels = sna->render_state.gen9.wm_kernel[kernel]; ++ ++ DBG(("%s: switching to %s, num_surfaces=%d (8-wide? %d, 16-wide? %d, 32-wide? %d)\n", ++ __FUNCTION__, ++ wm_kernels[kernel].name, ++ wm_kernels[kernel].num_surfaces, ++ kernels[0], kernels[1], kernels[2])); ++ assert(is_aligned(kernels[0], 64)); ++ assert(is_aligned(kernels[1], 64)); ++ assert(is_aligned(kernels[2], 64)); ++ ++ OUT_BATCH(GEN9_3DSTATE_PS | (12 - 2)); ++ OUT_BATCH64(kernels[0] ?: kernels[1] ?: kernels[2]); ++ OUT_BATCH(1 << PS_SAMPLER_COUNT_SHIFT | ++ PS_VECTOR_MASK_ENABLE | ++ wm_kernels[kernel].num_surfaces << PS_BINDING_TABLE_ENTRY_COUNT_SHIFT); ++ OUT_BATCH64(0); /* scratch address */ ++ OUT_BATCH(PS_MAX_THREADS | ++ (kernels[0] ? PS_8_DISPATCH_ENABLE : 0) | ++ (kernels[1] ? PS_16_DISPATCH_ENABLE : 0) | ++ (kernels[2] ? PS_32_DISPATCH_ENABLE : 0)); ++ OUT_BATCH((kernels[0] ? 4 : kernels[1] ? 6 : 8) << PS_DISPATCH_START_GRF_SHIFT_0 | ++ 8 << PS_DISPATCH_START_GRF_SHIFT_1 | ++ 6 << PS_DISPATCH_START_GRF_SHIFT_2); ++ OUT_BATCH64(kernels[2]); ++ OUT_BATCH64(kernels[1]); ++} ++ ++static bool ++gen9_emit_binding_table(struct sna *sna, uint16_t offset) ++{ ++ if (sna->render_state.gen9.surface_table == offset) ++ return false; ++ ++ /* Binding table pointers */ ++ assert(is_aligned(4*offset, 32)); ++ OUT_BATCH(GEN9_3DSTATE_BINDING_TABLE_POINTERS_PS | (2 - 2)); ++ OUT_BATCH(offset*4); ++ ++ sna->render_state.gen9.surface_table = offset; ++ return true; ++} ++ ++static bool ++gen9_emit_drawing_rectangle(struct sna *sna, ++ const struct sna_composite_op *op) ++{ ++ uint32_t limit = (op->dst.height - 1) << 16 | (op->dst.width - 1); ++ uint32_t offset = (uint16_t)op->dst.y << 16 | (uint16_t)op->dst.x; ++ ++ assert(!too_large(abs(op->dst.x), abs(op->dst.y))); ++ assert(!too_large(op->dst.width, op->dst.height)); ++ ++ if (sna->render_state.gen9.drawrect_limit == limit && ++ sna->render_state.gen9.drawrect_offset == offset) ++ return true; ++ ++ sna->render_state.gen9.drawrect_offset = offset; ++ sna->render_state.gen9.drawrect_limit = limit; ++ ++ OUT_BATCH(GEN9_3DSTATE_DRAWING_RECTANGLE | (4 - 2)); ++ OUT_BATCH(0); ++ OUT_BATCH(limit); ++ OUT_BATCH(offset); ++ return false; ++} ++ ++static void ++gen9_emit_vertex_elements(struct sna *sna, ++ const struct sna_composite_op *op) ++{ ++ /* ++ * vertex data in vertex buffer ++ * position: (x, y) ++ * texture coordinate 0: (u0, v0) if (is_affine is true) else (u0, v0, w0) ++ * texture coordinate 1 if (has_mask is true): same as above ++ */ ++ struct gen9_render_state *render = &sna->render_state.gen9; ++ uint32_t src_format, dw; ++ int id = GEN9_VERTEX(op->u.gen9.flags); ++ bool has_mask; ++ ++ DBG(("%s: setup id=%d\n", __FUNCTION__, id)); ++ ++ if (render->ve_id == id) ++ return; ++ render->ve_id = id; ++ ++ if (render->ve_dirty) { ++ /* dummy primitive to flush vertex before change? */ ++ OUT_BATCH(GEN9_3DPRIMITIVE | (7 - 2)); ++ OUT_BATCH(0); /* ignored, see VF_TOPOLOGY */ ++ OUT_BATCH(0); ++ OUT_BATCH(0); ++ OUT_BATCH(1); /* single instance */ ++ OUT_BATCH(0); /* start instance location */ ++ OUT_BATCH(0); /* index buffer offset, ignored */ ++ } ++ ++ /* The VUE layout ++ * dword 0-3: pad (0.0, 0.0, 0.0. 0.0) ++ * dword 4-7: position (x, y, 1.0, 1.0), ++ * dword 8-11: texture coordinate 0 (u0, v0, w0, 1.0) ++ * dword 12-15: texture coordinate 1 (u1, v1, w1, 1.0) ++ * ++ * dword 4-15 are fetched from vertex buffer ++ */ ++ has_mask = (id >> 2) != 0; ++ OUT_BATCH(GEN9_3DSTATE_VERTEX_ELEMENTS | ++ ((2 * (3 + has_mask)) + 1 - 2)); ++ ++ OUT_BATCH(id << VE_INDEX_SHIFT | VE_VALID | ++ SURFACEFORMAT_R32G32B32A32_FLOAT << VE_FORMAT_SHIFT | ++ 0 << VE_OFFSET_SHIFT); ++ OUT_BATCH(COMPONENT_STORE_0 << VE_COMPONENT_0_SHIFT | ++ COMPONENT_STORE_0 << VE_COMPONENT_1_SHIFT | ++ COMPONENT_STORE_0 << VE_COMPONENT_2_SHIFT | ++ COMPONENT_STORE_0 << VE_COMPONENT_3_SHIFT); ++ ++ /* x,y */ ++ OUT_BATCH(id << VE_INDEX_SHIFT | VE_VALID | ++ SURFACEFORMAT_R16G16_SSCALED << VE_FORMAT_SHIFT | ++ 0 << VE_OFFSET_SHIFT); ++ OUT_BATCH(COMPONENT_STORE_SRC << VE_COMPONENT_0_SHIFT | ++ COMPONENT_STORE_SRC << VE_COMPONENT_1_SHIFT | ++ COMPONENT_STORE_0 << VE_COMPONENT_2_SHIFT | ++ COMPONENT_STORE_1_FLT << VE_COMPONENT_3_SHIFT); ++ ++ /* u0, v0, w0 */ ++ DBG(("%s: first channel %d floats, offset=4\n", __FUNCTION__, id & 3)); ++ dw = COMPONENT_STORE_1_FLT << VE_COMPONENT_3_SHIFT; ++ switch (id & 3) { ++ default: ++ assert(0); ++ case 0: ++ src_format = SURFACEFORMAT_R16G16_SSCALED; ++ dw |= COMPONENT_STORE_SRC << VE_COMPONENT_0_SHIFT; ++ dw |= COMPONENT_STORE_SRC << VE_COMPONENT_1_SHIFT; ++ dw |= COMPONENT_STORE_0 << VE_COMPONENT_2_SHIFT; ++ break; ++ case 1: ++ src_format = SURFACEFORMAT_R32_FLOAT; ++ dw |= COMPONENT_STORE_SRC << VE_COMPONENT_0_SHIFT; ++ dw |= COMPONENT_STORE_0 << VE_COMPONENT_1_SHIFT; ++ dw |= COMPONENT_STORE_0 << VE_COMPONENT_2_SHIFT; ++ break; ++ case 2: ++ src_format = SURFACEFORMAT_R32G32_FLOAT; ++ dw |= COMPONENT_STORE_SRC << VE_COMPONENT_0_SHIFT; ++ dw |= COMPONENT_STORE_SRC << VE_COMPONENT_1_SHIFT; ++ dw |= COMPONENT_STORE_0 << VE_COMPONENT_2_SHIFT; ++ break; ++ case 3: ++ src_format = SURFACEFORMAT_R32G32B32_FLOAT; ++ dw |= COMPONENT_STORE_SRC << VE_COMPONENT_0_SHIFT; ++ dw |= COMPONENT_STORE_SRC << VE_COMPONENT_1_SHIFT; ++ dw |= COMPONENT_STORE_SRC << VE_COMPONENT_2_SHIFT; ++ break; ++ } ++ OUT_BATCH(id << VE_INDEX_SHIFT | VE_VALID | ++ src_format << VE_FORMAT_SHIFT | ++ 4 << VE_OFFSET_SHIFT); ++ OUT_BATCH(dw); ++ ++ /* u1, v1, w1 */ ++ if (has_mask) { ++ unsigned offset = 4 + ((id & 3) ?: 1) * sizeof(float); ++ DBG(("%s: second channel %d floats, offset=%d\n", __FUNCTION__, (id >> 2) & 3, offset)); ++ dw = COMPONENT_STORE_1_FLT << VE_COMPONENT_3_SHIFT; ++ switch (id >> 2) { ++ case 1: ++ src_format = SURFACEFORMAT_R32_FLOAT; ++ dw |= COMPONENT_STORE_SRC << VE_COMPONENT_0_SHIFT; ++ dw |= COMPONENT_STORE_0 << VE_COMPONENT_1_SHIFT; ++ dw |= COMPONENT_STORE_0 << VE_COMPONENT_2_SHIFT; ++ break; ++ default: ++ assert(0); ++ case 2: ++ src_format = SURFACEFORMAT_R32G32_FLOAT; ++ dw |= COMPONENT_STORE_SRC << VE_COMPONENT_0_SHIFT; ++ dw |= COMPONENT_STORE_SRC << VE_COMPONENT_1_SHIFT; ++ dw |= COMPONENT_STORE_0 << VE_COMPONENT_2_SHIFT; ++ break; ++ case 3: ++ src_format = SURFACEFORMAT_R32G32B32_FLOAT; ++ dw |= COMPONENT_STORE_SRC << VE_COMPONENT_0_SHIFT; ++ dw |= COMPONENT_STORE_SRC << VE_COMPONENT_1_SHIFT; ++ dw |= COMPONENT_STORE_SRC << VE_COMPONENT_2_SHIFT; ++ break; ++ } ++ OUT_BATCH(id << VE_INDEX_SHIFT | VE_VALID | ++ src_format << VE_FORMAT_SHIFT | ++ offset << VE_OFFSET_SHIFT); ++ OUT_BATCH(dw); ++ } ++ ++ render->ve_dirty = true; ++} ++ ++inline static void ++gen9_emit_pipe_invalidate(struct sna *sna) ++{ ++ OUT_BATCH(GEN9_PIPE_CONTROL | (6 - 2)); ++ OUT_BATCH(PIPE_CONTROL_WC_FLUSH | ++ PIPE_CONTROL_TC_FLUSH | ++ PIPE_CONTROL_CS_STALL); ++ OUT_BATCH64(0); ++ OUT_BATCH64(0); ++} ++ ++inline static void ++gen9_emit_pipe_flush(struct sna *sna, bool need_stall) ++{ ++ unsigned stall; ++ ++ stall = 0; ++ if (need_stall) ++ stall = (PIPE_CONTROL_CS_STALL | ++ PIPE_CONTROL_STALL_AT_SCOREBOARD); ++ ++ OUT_BATCH(GEN9_PIPE_CONTROL | (6 - 2)); ++ OUT_BATCH(PIPE_CONTROL_WC_FLUSH | stall); ++ OUT_BATCH64(0); ++ OUT_BATCH64(0); ++} ++ ++inline static void ++gen9_emit_pipe_stall(struct sna *sna) ++{ ++ OUT_BATCH(GEN9_PIPE_CONTROL | (6 - 2)); ++ OUT_BATCH(PIPE_CONTROL_CS_STALL | ++ PIPE_CONTROL_FLUSH | ++ PIPE_CONTROL_STALL_AT_SCOREBOARD); ++ OUT_BATCH64(0); ++ OUT_BATCH64(0); ++} ++ ++static void ++gen9_emit_state(struct sna *sna, ++ const struct sna_composite_op *op, ++ uint16_t wm_binding_table) ++{ ++ bool need_invalidate; ++ bool need_flush; ++ bool need_stall; ++ ++ assert(op->dst.bo->exec); ++ ++ need_flush = wm_binding_table & 1 || ++ (sna->render_state.gen9.emit_flush && GEN9_READS_DST(op->u.gen9.flags)); ++ if (ALWAYS_FLUSH) ++ need_flush = true; ++ ++ wm_binding_table &= ~1; ++ ++ need_stall = sna->render_state.gen9.surface_table != wm_binding_table; ++ ++ need_invalidate = kgem_bo_is_dirty(op->src.bo) || kgem_bo_is_dirty(op->mask.bo); ++ if (ALWAYS_INVALIDATE) ++ need_invalidate = true; ++ ++ need_stall &= gen9_emit_drawing_rectangle(sna, op); ++ if (ALWAYS_STALL) ++ need_stall = true; ++ ++ if (need_invalidate) { ++ gen9_emit_pipe_invalidate(sna); ++ kgem_clear_dirty(&sna->kgem); ++ assert(op->dst.bo->exec); ++ kgem_bo_mark_dirty(op->dst.bo); ++ ++ need_flush = false; ++ need_stall = false; ++ } ++ if (need_flush) { ++ gen9_emit_pipe_flush(sna, need_stall); ++ need_stall = false; ++ } ++ if (need_stall) ++ gen9_emit_pipe_stall(sna); ++ ++ gen9_emit_cc(sna, GEN9_BLEND(op->u.gen9.flags)); ++ gen9_emit_sampler(sna, GEN9_SAMPLER(op->u.gen9.flags)); ++ gen9_emit_sf(sna, GEN9_VERTEX(op->u.gen9.flags) >> 2); ++ gen9_emit_wm(sna, GEN9_KERNEL(op->u.gen9.flags)); ++ gen9_emit_vertex_elements(sna, op); ++ gen9_emit_binding_table(sna, wm_binding_table); ++ ++ sna->render_state.gen9.emit_flush = GEN9_READS_DST(op->u.gen9.flags); ++} ++ ++static bool gen9_magic_ca_pass(struct sna *sna, ++ const struct sna_composite_op *op) ++{ ++ struct gen9_render_state *state = &sna->render_state.gen9; ++ ++ if (!op->need_magic_ca_pass) ++ return false; ++ ++ DBG(("%s: CA fixup (%d -> %d)\n", __FUNCTION__, ++ sna->render.vertex_start, sna->render.vertex_index)); ++ ++ gen9_emit_pipe_stall(sna); ++ ++ gen9_emit_cc(sna, ++ GEN9_BLEND(gen9_get_blend(PictOpAdd, true, ++ op->dst.format))); ++ gen9_emit_wm(sna, ++ gen9_choose_composite_kernel(PictOpAdd, ++ true, true, ++ op->is_affine)); ++ ++ OUT_BATCH(GEN9_3DPRIMITIVE | (7 - 2)); ++ OUT_BATCH(0); /* ignored, see VF_TOPOLOGY */ ++ OUT_BATCH(sna->render.vertex_index - sna->render.vertex_start); ++ OUT_BATCH(sna->render.vertex_start); ++ OUT_BATCH(1); /* single instance */ ++ OUT_BATCH(0); /* start instance location */ ++ OUT_BATCH(0); /* index buffer offset, ignored */ ++ ++ state->last_primitive = sna->kgem.nbatch; ++ state->ve_dirty = false; ++ return true; ++} ++ ++static void null_create(struct sna_static_stream *stream) ++{ ++ /* A bunch of zeros useful for legacy border color and depth-stencil */ ++ sna_static_stream_map(stream, 64, 64); ++} ++ ++static void ++sampler_state_init(struct gen9_sampler_state *sampler_state, ++ sampler_filter_t filter, ++ sampler_extend_t extend) ++{ ++ COMPILE_TIME_ASSERT(sizeof(*sampler_state) == 4*sizeof(uint32_t)); ++ ++ sampler_state->ss0.lod_preclamp = 2; /* GL mode */ ++ sampler_state->ss0.default_color_mode = 1; ++ ++ switch (filter) { ++ default: ++ case SAMPLER_FILTER_NEAREST: ++ sampler_state->ss0.min_filter = MAPFILTER_NEAREST; ++ sampler_state->ss0.mag_filter = MAPFILTER_NEAREST; ++ break; ++ case SAMPLER_FILTER_BILINEAR: ++ sampler_state->ss0.min_filter = MAPFILTER_LINEAR; ++ sampler_state->ss0.mag_filter = MAPFILTER_LINEAR; ++ break; ++ } ++ ++ /* XXX bicubic filter using MAPFILTER_FLEXIBLE */ ++ ++ switch (extend) { ++ default: ++ case SAMPLER_EXTEND_NONE: ++ sampler_state->ss3.r_wrap_mode = TEXCOORDMODE_CLAMP_BORDER; ++ sampler_state->ss3.s_wrap_mode = TEXCOORDMODE_CLAMP_BORDER; ++ sampler_state->ss3.t_wrap_mode = TEXCOORDMODE_CLAMP_BORDER; ++ break; ++ case SAMPLER_EXTEND_REPEAT: ++ sampler_state->ss3.r_wrap_mode = TEXCOORDMODE_WRAP; ++ sampler_state->ss3.s_wrap_mode = TEXCOORDMODE_WRAP; ++ sampler_state->ss3.t_wrap_mode = TEXCOORDMODE_WRAP; ++ break; ++ case SAMPLER_EXTEND_PAD: ++ sampler_state->ss3.r_wrap_mode = TEXCOORDMODE_CLAMP; ++ sampler_state->ss3.s_wrap_mode = TEXCOORDMODE_CLAMP; ++ sampler_state->ss3.t_wrap_mode = TEXCOORDMODE_CLAMP; ++ break; ++ case SAMPLER_EXTEND_REFLECT: ++ sampler_state->ss3.r_wrap_mode = TEXCOORDMODE_MIRROR; ++ sampler_state->ss3.s_wrap_mode = TEXCOORDMODE_MIRROR; ++ sampler_state->ss3.t_wrap_mode = TEXCOORDMODE_MIRROR; ++ break; ++ } ++} ++ ++static void ++sampler_copy_init(struct gen9_sampler_state *ss) ++{ ++ sampler_state_init(ss, SAMPLER_FILTER_NEAREST, SAMPLER_EXTEND_NONE); ++ ss->ss3.non_normalized_coord = 1; ++ ++ sampler_state_init(ss+1, SAMPLER_FILTER_NEAREST, SAMPLER_EXTEND_NONE); ++} ++ ++static void ++sampler_fill_init(struct gen9_sampler_state *ss) ++{ ++ sampler_state_init(ss, SAMPLER_FILTER_NEAREST, SAMPLER_EXTEND_REPEAT); ++ ss->ss3.non_normalized_coord = 1; ++ ++ sampler_state_init(ss+1, SAMPLER_FILTER_NEAREST, SAMPLER_EXTEND_NONE); ++} ++ ++static uint32_t ++gen9_tiling_bits(uint32_t tiling) ++{ ++ switch (tiling) { ++ default: assert(0); ++ case I915_TILING_NONE: return 0; ++ case I915_TILING_X: return SURFACE_TILED; ++ case I915_TILING_Y: return SURFACE_TILED | SURFACE_TILED_Y; ++ } ++} ++ ++#define MOCS_PTE (1 << 1) ++#define MOCS_WB (2 << 1) ++ ++/** ++ * Sets up the common fields for a surface state buffer for the given ++ * picture in the given surface state buffer. ++ */ ++static uint32_t ++gen9_bind_bo(struct sna *sna, ++ struct kgem_bo *bo, ++ uint32_t width, ++ uint32_t height, ++ uint32_t format, ++ bool is_dst) ++{ ++ uint32_t *ss; ++ uint32_t domains; ++ int offset; ++ uint32_t is_scanout = is_dst && bo->scanout; ++ ++ /* After the first bind, we manage the cache domains within the batch */ ++ offset = kgem_bo_get_binding(bo, format | is_dst << 30 | is_scanout << 31); ++ if (offset) { ++ if (is_dst) ++ kgem_bo_mark_dirty(bo); ++ assert(offset >= sna->kgem.surface); ++ return offset * sizeof(uint32_t); ++ } ++ ++ offset = sna->kgem.surface -= SURFACE_DW; ++ ss = sna->kgem.batch + offset; ++ ss[0] = (SURFACE_2D << SURFACE_TYPE_SHIFT | ++ gen9_tiling_bits(bo->tiling) | ++ format << SURFACE_FORMAT_SHIFT | ++ SURFACE_VALIGN_4 | SURFACE_HALIGN_4); ++ if (is_dst) { ++ ss[0] |= SURFACE_RC_READ_WRITE; ++ domains = I915_GEM_DOMAIN_RENDER << 16 |I915_GEM_DOMAIN_RENDER; ++ } else ++ domains = I915_GEM_DOMAIN_SAMPLER << 16; ++ ss[1] = (is_scanout || (is_dst && is_uncached(sna, bo))) ? MOCS_PTE << 24 : MOCS_WB << 24; ++ ss[2] = ((width - 1) << SURFACE_WIDTH_SHIFT | ++ (height - 1) << SURFACE_HEIGHT_SHIFT); ++ ss[3] = (bo->pitch - 1) << SURFACE_PITCH_SHIFT; ++ ss[4] = 0; ++ ss[5] = 0; ++ ss[6] = 0; ++ ss[7] = SURFACE_SWIZZLE(RED, GREEN, BLUE, ALPHA); ++ *(uint64_t *)(ss+8) = kgem_add_reloc64(&sna->kgem, offset + 8, bo, domains, 0); ++ ss[10] = 0; ++ ss[11] = 0; ++ ss[12] = 0; ++ ss[13] = 0; ++ ss[14] = 0; ++ ss[15] = 0; ++ ++ kgem_bo_set_binding(bo, format | is_dst << 30 | is_scanout << 31, offset); ++ ++ DBG(("[%x] bind bo(handle=%d, addr=%lx), format=%d, width=%d, height=%d, pitch=%d, tiling=%d -> %s\n", ++ offset, bo->handle, *(uint64_t *)(ss+8), ++ format, width, height, bo->pitch, bo->tiling, ++ domains & 0xffff ? "render" : "sampler")); ++ ++ return offset * sizeof(uint32_t); ++} ++ ++static void gen9_emit_vertex_buffer(struct sna *sna, ++ const struct sna_composite_op *op) ++{ ++ int id = GEN9_VERTEX(op->u.gen9.flags); ++ ++ OUT_BATCH(GEN9_3DSTATE_VERTEX_BUFFERS | (5 - 2)); ++ OUT_BATCH(id << VB_INDEX_SHIFT | VB_MODIFY_ENABLE | ++ 4*op->floats_per_vertex); ++ sna->render.vertex_reloc[sna->render.nvertex_reloc++] = sna->kgem.nbatch; ++ OUT_BATCH64(0); ++ OUT_BATCH(~0); /* buffer size: disabled */ ++ ++ sna->render.vb_id |= 1 << id; ++} ++ ++static void gen9_emit_primitive(struct sna *sna) ++{ ++ if (sna->kgem.nbatch == sna->render_state.gen9.last_primitive) { ++ sna->render.vertex_offset = sna->kgem.nbatch - 5; ++ return; ++ } ++ ++ OUT_BATCH(GEN9_3DPRIMITIVE | (7 - 2)); ++ OUT_BATCH(0); /* ignored, see VF_TOPOLOGY */ ++ sna->render.vertex_offset = sna->kgem.nbatch; ++ OUT_BATCH(0); /* vertex count, to be filled in later */ ++ OUT_BATCH(sna->render.vertex_index); ++ OUT_BATCH(1); /* single instance */ ++ OUT_BATCH(0); /* start instance location */ ++ OUT_BATCH(0); /* index buffer offset, ignored */ ++ sna->render.vertex_start = sna->render.vertex_index; ++ ++ sna->render_state.gen9.last_primitive = sna->kgem.nbatch; ++ sna->render_state.gen9.ve_dirty = false; ++} ++ ++static bool gen9_rectangle_begin(struct sna *sna, ++ const struct sna_composite_op *op) ++{ ++ int id = 1 << GEN9_VERTEX(op->u.gen9.flags); ++ int ndwords; ++ ++ if (sna_vertex_wait__locked(&sna->render) && sna->render.vertex_offset) ++ return true; ++ ++ ndwords = op->need_magic_ca_pass ? 60 : 6; ++ if ((sna->render.vb_id & id) == 0) ++ ndwords += 5; ++ if (!kgem_check_batch(&sna->kgem, ndwords)) ++ return false; ++ ++ if ((sna->render.vb_id & id) == 0) ++ gen9_emit_vertex_buffer(sna, op); ++ ++ gen9_emit_primitive(sna); ++ return true; ++} ++ ++static int gen9_get_rectangles__flush(struct sna *sna, ++ const struct sna_composite_op *op) ++{ ++ /* Preventing discarding new vbo after lock contention */ ++ if (sna_vertex_wait__locked(&sna->render)) { ++ int rem = vertex_space(sna); ++ if (rem > op->floats_per_rect) ++ return rem; ++ } ++ ++ if (!kgem_check_batch(&sna->kgem, op->need_magic_ca_pass ? 65 : 6)) ++ return 0; ++ if (!kgem_check_reloc_and_exec(&sna->kgem, 2)) ++ return 0; ++ ++ if (sna->render.vertex_offset) { ++ gen8_vertex_flush(sna); ++ if (gen9_magic_ca_pass(sna, op)) { ++ gen9_emit_pipe_invalidate(sna); ++ gen9_emit_cc(sna, GEN9_BLEND(op->u.gen9.flags)); ++ gen9_emit_wm(sna, GEN9_KERNEL(op->u.gen9.flags)); ++ } ++ } ++ ++ return gen8_vertex_finish(sna); ++} ++ ++inline static int gen9_get_rectangles(struct sna *sna, ++ const struct sna_composite_op *op, ++ int want, ++ void (*emit_state)(struct sna *sna, const struct sna_composite_op *op)) ++{ ++ int rem; ++ ++ assert(want); ++ ++start: ++ rem = vertex_space(sna); ++ if (unlikely(rem < op->floats_per_rect)) { ++ DBG(("flushing vbo for %s: %d < %d\n", ++ __FUNCTION__, rem, op->floats_per_rect)); ++ rem = gen9_get_rectangles__flush(sna, op); ++ if (unlikely(rem == 0)) ++ goto flush; ++ } ++ ++ if (unlikely(sna->render.vertex_offset == 0)) { ++ if (!gen9_rectangle_begin(sna, op)) ++ goto flush; ++ else ++ goto start; ++ } ++ ++ assert(rem <= vertex_space(sna)); ++ assert(op->floats_per_rect <= rem); ++ if (want > 1 && want * op->floats_per_rect > rem) ++ want = rem / op->floats_per_rect; ++ ++ assert(want > 0); ++ sna->render.vertex_index += 3*want; ++ return want; ++ ++flush: ++ if (sna->render.vertex_offset) { ++ gen8_vertex_flush(sna); ++ gen9_magic_ca_pass(sna, op); ++ } ++ sna_vertex_wait__locked(&sna->render); ++ _kgem_submit(&sna->kgem); ++ emit_state(sna, op); ++ goto start; ++} ++ ++inline static uint32_t *gen9_composite_get_binding_table(struct sna *sna, ++ uint16_t *offset) ++{ ++ uint32_t *table; ++ ++ assert(sna->kgem.surface <= 16384); ++ sna->kgem.surface -= SURFACE_DW; ++ /* Clear all surplus entries to zero in case of prefetch */ ++ table = memset(sna->kgem.batch + sna->kgem.surface, 0, 64); ++ ++ DBG(("%s(%x)\n", __FUNCTION__, 4*sna->kgem.surface)); ++ ++ *offset = sna->kgem.surface; ++ return table; ++} ++ ++static void ++gen9_get_batch(struct sna *sna, const struct sna_composite_op *op) ++{ ++ kgem_set_mode(&sna->kgem, KGEM_RENDER, op->dst.bo); ++ ++ if (!kgem_check_batch_with_surfaces(&sna->kgem, 150, 2*(1+3))) { ++ DBG(("%s: flushing batch: %d < %d+%d\n", ++ __FUNCTION__, sna->kgem.surface - sna->kgem.nbatch, ++ 150, 4*8*2)); ++ _kgem_submit(&sna->kgem); ++ _kgem_set_mode(&sna->kgem, KGEM_RENDER); ++ } ++ ++ assert(sna->kgem.mode == KGEM_RENDER); ++ assert(sna->kgem.ring == KGEM_RENDER); ++ ++ if (sna->render_state.gen9.needs_invariant) ++ gen9_emit_invariant(sna); ++} ++ ++static void gen9_emit_composite_state(struct sna *sna, ++ const struct sna_composite_op *op) ++{ ++ uint32_t *binding_table; ++ uint16_t offset, dirty; ++ ++ gen9_get_batch(sna, op); ++ ++ binding_table = gen9_composite_get_binding_table(sna, &offset); ++ ++ dirty = kgem_bo_is_dirty(op->dst.bo); ++ ++ binding_table[0] = ++ gen9_bind_bo(sna, ++ op->dst.bo, op->dst.width, op->dst.height, ++ gen9_get_dest_format(op->dst.format), ++ true); ++ binding_table[1] = ++ gen9_bind_bo(sna, ++ op->src.bo, op->src.width, op->src.height, ++ op->src.card_format, ++ false); ++ if (op->mask.bo) { ++ binding_table[2] = ++ gen9_bind_bo(sna, ++ op->mask.bo, ++ op->mask.width, ++ op->mask.height, ++ op->mask.card_format, ++ false); ++ } ++ ++ if (sna->kgem.surface == offset && ++ *(uint64_t *)(sna->kgem.batch + sna->render_state.gen9.surface_table) == *(uint64_t*)binding_table && ++ (op->mask.bo == NULL || ++ sna->kgem.batch[sna->render_state.gen9.surface_table+2] == binding_table[2])) { ++ sna->kgem.surface += SURFACE_DW; ++ offset = sna->render_state.gen9.surface_table; ++ } ++ ++ if (sna->kgem.batch[sna->render_state.gen9.surface_table] == binding_table[0]) ++ dirty = 0; ++ ++ gen9_emit_state(sna, op, offset | dirty); ++} ++ ++static void ++gen9_align_vertex(struct sna *sna, const struct sna_composite_op *op) ++{ ++ if (op->floats_per_vertex != sna->render_state.gen9.floats_per_vertex) { ++ DBG(("aligning vertex: was %d, now %d floats per vertex\n", ++ sna->render_state.gen9.floats_per_vertex, op->floats_per_vertex)); ++ gen8_vertex_align(sna, op); ++ sna->render_state.gen9.floats_per_vertex = op->floats_per_vertex; ++ } ++} ++ ++fastcall static void ++gen9_render_composite_blt(struct sna *sna, ++ const struct sna_composite_op *op, ++ const struct sna_composite_rectangles *r) ++{ ++ gen9_get_rectangles(sna, op, 1, gen9_emit_composite_state); ++ op->prim_emit(sna, op, r); ++} ++ ++fastcall static void ++gen9_render_composite_box(struct sna *sna, ++ const struct sna_composite_op *op, ++ const BoxRec *box) ++{ ++ struct sna_composite_rectangles r; ++ ++ gen9_get_rectangles(sna, op, 1, gen9_emit_composite_state); ++ ++ DBG((" %s: (%d, %d), (%d, %d)\n", ++ __FUNCTION__, ++ box->x1, box->y1, box->x2, box->y2)); ++ ++ r.dst.x = box->x1; ++ r.dst.y = box->y1; ++ r.width = box->x2 - box->x1; ++ r.height = box->y2 - box->y1; ++ r.src = r.mask = r.dst; ++ ++ op->prim_emit(sna, op, &r); ++} ++ ++static void ++gen9_render_composite_boxes__blt(struct sna *sna, ++ const struct sna_composite_op *op, ++ const BoxRec *box, int nbox) ++{ ++ DBG(("composite_boxes(%d)\n", nbox)); ++ ++ do { ++ int nbox_this_time; ++ ++ nbox_this_time = gen9_get_rectangles(sna, op, nbox, ++ gen9_emit_composite_state); ++ nbox -= nbox_this_time; ++ ++ do { ++ struct sna_composite_rectangles r; ++ ++ DBG((" %s: (%d, %d), (%d, %d)\n", ++ __FUNCTION__, ++ box->x1, box->y1, box->x2, box->y2)); ++ ++ r.dst.x = box->x1; ++ r.dst.y = box->y1; ++ r.width = box->x2 - box->x1; ++ r.height = box->y2 - box->y1; ++ r.src = r.mask = r.dst; ++ ++ op->prim_emit(sna, op, &r); ++ box++; ++ } while (--nbox_this_time); ++ } while (nbox); ++} ++ ++static void ++gen9_render_composite_boxes(struct sna *sna, ++ const struct sna_composite_op *op, ++ const BoxRec *box, int nbox) ++{ ++ DBG(("%s: nbox=%d\n", __FUNCTION__, nbox)); ++ ++ do { ++ int nbox_this_time; ++ float *v; ++ ++ nbox_this_time = gen9_get_rectangles(sna, op, nbox, ++ gen9_emit_composite_state); ++ assert(nbox_this_time); ++ nbox -= nbox_this_time; ++ ++ v = sna->render.vertices + sna->render.vertex_used; ++ sna->render.vertex_used += nbox_this_time * op->floats_per_rect; ++ ++ op->emit_boxes(op, box, nbox_this_time, v); ++ box += nbox_this_time; ++ } while (nbox); ++} ++ ++static void ++gen9_render_composite_boxes__thread(struct sna *sna, ++ const struct sna_composite_op *op, ++ const BoxRec *box, int nbox) ++{ ++ DBG(("%s: nbox=%d\n", __FUNCTION__, nbox)); ++ ++ sna_vertex_lock(&sna->render); ++ do { ++ int nbox_this_time; ++ float *v; ++ ++ nbox_this_time = gen9_get_rectangles(sna, op, nbox, ++ gen9_emit_composite_state); ++ assert(nbox_this_time); ++ nbox -= nbox_this_time; ++ ++ v = sna->render.vertices + sna->render.vertex_used; ++ sna->render.vertex_used += nbox_this_time * op->floats_per_rect; ++ ++ sna_vertex_acquire__locked(&sna->render); ++ sna_vertex_unlock(&sna->render); ++ ++ op->emit_boxes(op, box, nbox_this_time, v); ++ box += nbox_this_time; ++ ++ sna_vertex_lock(&sna->render); ++ sna_vertex_release__locked(&sna->render); ++ } while (nbox); ++ sna_vertex_unlock(&sna->render); ++} ++ ++static uint32_t ++gen9_create_blend_state(struct sna_static_stream *stream) ++{ ++ char *base, *ptr; ++ int src, dst; ++ ++ COMPILE_TIME_ASSERT(((GEN9_BLENDFACTOR_COUNT * GEN9_BLENDFACTOR_COUNT << 4) & (1 << 15)) == 0); ++ ++ base = sna_static_stream_map(stream, ++ GEN9_BLENDFACTOR_COUNT * GEN9_BLENDFACTOR_COUNT * GEN9_BLEND_STATE_PADDED_SIZE, ++ 64); ++ ++ ptr = base; ++ for (src = 0; src < GEN9_BLENDFACTOR_COUNT; src++) { ++ for (dst = 0; dst < GEN9_BLENDFACTOR_COUNT; dst++) { ++ struct gen9_blend_state *blend = ++ (struct gen9_blend_state *)ptr; ++ ++ assert(((ptr - base) & 63) == 0); ++ COMPILE_TIME_ASSERT(sizeof(blend->common) == 4); ++ COMPILE_TIME_ASSERT(sizeof(blend->rt) == 8); ++ COMPILE_TIME_ASSERT((char *)&blend->rt - (char *)blend == 4); ++ ++ blend->rt.post_blend_clamp = 1; ++ blend->rt.pre_blend_clamp = 1; ++ ++ blend->rt.color_blend = ++ !(dst == BLENDFACTOR_ZERO && src == BLENDFACTOR_ONE); ++ blend->rt.dest_blend_factor = dst; ++ blend->rt.source_blend_factor = src; ++ blend->rt.color_blend_function = BLENDFUNCTION_ADD; ++ ++ blend->rt.dest_alpha_blend_factor = dst; ++ blend->rt.source_alpha_blend_factor = src; ++ blend->rt.alpha_blend_function = BLENDFUNCTION_ADD; ++ ++ ptr += GEN9_BLEND_STATE_PADDED_SIZE; ++ } ++ } ++ ++ return sna_static_stream_offsetof(stream, base); ++} ++ ++static int ++gen9_composite_picture(struct sna *sna, ++ PicturePtr picture, ++ struct sna_composite_channel *channel, ++ int x, int y, ++ int w, int h, ++ int dst_x, int dst_y, ++ bool precise) ++{ ++ PixmapPtr pixmap; ++ uint32_t color; ++ int16_t dx, dy; ++ ++ DBG(("%s: (%d, %d)x(%d, %d), dst=(%d, %d)\n", ++ __FUNCTION__, x, y, w, h, dst_x, dst_y)); ++ ++ channel->is_solid = false; ++ channel->card_format = -1; ++ ++ if (sna_picture_is_solid(picture, &color)) ++ return gen4_channel_init_solid(sna, channel, color); ++ ++ if (picture->pDrawable == NULL) { ++ int ret; ++ ++ if (picture->pSourcePict->type == SourcePictTypeLinear) ++ return gen4_channel_init_linear(sna, picture, channel, ++ x, y, ++ w, h, ++ dst_x, dst_y); ++ ++ DBG(("%s -- fixup, gradient\n", __FUNCTION__)); ++ ret = -1; ++ if (!precise) ++ ret = sna_render_picture_approximate_gradient(sna, picture, channel, ++ x, y, w, h, dst_x, dst_y); ++ if (ret == -1) ++ ret = sna_render_picture_fixup(sna, picture, channel, ++ x, y, w, h, dst_x, dst_y); ++ return ret; ++ } ++ ++ if (picture->alphaMap) { ++ DBG(("%s -- fallback, alphamap\n", __FUNCTION__)); ++ return sna_render_picture_fixup(sna, picture, channel, ++ x, y, w, h, dst_x, dst_y); ++ } ++ ++ if (!gen9_check_repeat(picture)) ++ return sna_render_picture_fixup(sna, picture, channel, ++ x, y, w, h, dst_x, dst_y); ++ ++ if (!gen9_check_filter(picture)) ++ return sna_render_picture_fixup(sna, picture, channel, ++ x, y, w, h, dst_x, dst_y); ++ ++ channel->repeat = picture->repeat ? picture->repeatType : RepeatNone; ++ channel->filter = picture->filter; ++ ++ pixmap = get_drawable_pixmap(picture->pDrawable); ++ get_drawable_deltas(picture->pDrawable, pixmap, &dx, &dy); ++ ++ x += dx + picture->pDrawable->x; ++ y += dy + picture->pDrawable->y; ++ ++ channel->is_affine = sna_transform_is_affine(picture->transform); ++ if (sna_transform_is_imprecise_integer_translation(picture->transform, picture->filter, precise, &dx, &dy)) { ++ DBG(("%s: integer translation (%d, %d), removing\n", ++ __FUNCTION__, dx, dy)); ++ x += dx; ++ y += dy; ++ channel->transform = NULL; ++ channel->filter = PictFilterNearest; ++ ++ if (channel->repeat || ++ (x >= 0 && ++ y >= 0 && ++ x + w <= pixmap->drawable.width && ++ y + h <= pixmap->drawable.height)) { ++ struct sna_pixmap *priv = sna_pixmap(pixmap); ++ if (priv && priv->clear) { ++ DBG(("%s: converting large pixmap source into solid [%08x]\n", __FUNCTION__, priv->clear_color)); ++ return gen4_channel_init_solid(sna, channel, solid_color(picture->format, priv->clear_color)); ++ } ++ } ++ } else ++ channel->transform = picture->transform; ++ ++ channel->pict_format = picture->format; ++ channel->card_format = gen9_get_card_format(picture->format); ++ if (channel->card_format == (unsigned)-1) ++ return sna_render_picture_convert(sna, picture, channel, pixmap, ++ x, y, w, h, dst_x, dst_y, ++ false); ++ ++ if (too_large(pixmap->drawable.width, pixmap->drawable.height)) { ++ DBG(("%s: extracting from pixmap %dx%d\n", __FUNCTION__, ++ pixmap->drawable.width, pixmap->drawable.height)); ++ return sna_render_picture_extract(sna, picture, channel, ++ x, y, w, h, dst_x, dst_y); ++ } ++ ++ return sna_render_pixmap_bo(sna, channel, pixmap, ++ x, y, w, h, dst_x, dst_y); ++} ++ ++inline static bool gen9_composite_channel_convert(struct sna_composite_channel *channel) ++{ ++ if (unaligned(channel->bo, PICT_FORMAT_BPP(channel->pict_format))) ++ return false; ++ ++ channel->repeat = gen9_repeat(channel->repeat); ++ channel->filter = gen9_filter(channel->filter); ++ if (channel->card_format == (unsigned)-1) ++ channel->card_format = gen9_get_card_format(channel->pict_format); ++ assert(channel->card_format != (unsigned)-1); ++ ++ return true; ++} ++ ++static void gen9_render_composite_done(struct sna *sna, ++ const struct sna_composite_op *op) ++{ ++ if (sna->render.vertex_offset) { ++ gen8_vertex_flush(sna); ++ gen9_magic_ca_pass(sna, op); ++ } ++ ++ if (op->mask.bo) ++ kgem_bo_destroy(&sna->kgem, op->mask.bo); ++ if (op->src.bo) ++ kgem_bo_destroy(&sna->kgem, op->src.bo); ++ ++ sna_render_composite_redirect_done(sna, op); ++} ++ ++inline static bool ++gen9_composite_set_target(struct sna *sna, ++ struct sna_composite_op *op, ++ PicturePtr dst, ++ int x, int y, int w, int h, ++ bool partial) ++{ ++ BoxRec box; ++ unsigned int hint; ++ ++ DBG(("%s: (%d, %d)x(%d, %d), partial?=%d\n", __FUNCTION__, x, y, w, h, partial)); ++ ++ op->dst.pixmap = get_drawable_pixmap(dst->pDrawable); ++ op->dst.format = dst->format; ++ op->dst.width = op->dst.pixmap->drawable.width; ++ op->dst.height = op->dst.pixmap->drawable.height; ++ ++ if (w | h) { ++ assert(w && h); ++ box.x1 = x; ++ box.y1 = y; ++ box.x2 = x + w; ++ box.y2 = y + h; ++ } else ++ sna_render_picture_extents(dst, &box); ++ ++ hint = PREFER_GPU | RENDER_GPU; ++ if (!need_tiling(sna, op->dst.width, op->dst.height)) ++ hint |= FORCE_GPU; ++ if (!partial) { ++ hint |= IGNORE_DAMAGE; ++ if (w == op->dst.width && h == op->dst.height) ++ hint |= REPLACES; ++ } ++ ++ op->dst.bo = sna_drawable_use_bo(dst->pDrawable, hint, &box, &op->damage); ++ if (op->dst.bo == NULL) ++ return false; ++ ++ assert(!op->damage || !DAMAGE_IS_ALL(*op->damage)); ++ ++ if (unaligned(op->dst.bo, dst->pDrawable->bitsPerPixel)) ++ return false; ++ ++ if (hint & REPLACES) { ++ struct sna_pixmap *priv = sna_pixmap(op->dst.pixmap); ++ kgem_bo_pair_undo(&sna->kgem, priv->gpu_bo, priv->cpu_bo); ++ } ++ ++ get_drawable_deltas(dst->pDrawable, op->dst.pixmap, ++ &op->dst.x, &op->dst.y); ++ ++ DBG(("%s: pixmap=%ld, format=%08x, size=%dx%d, pitch=%d, delta=(%d,%d),damage=%p\n", ++ __FUNCTION__, ++ op->dst.pixmap->drawable.serialNumber, (int)op->dst.format, ++ op->dst.width, op->dst.height, ++ op->dst.bo->pitch, ++ op->dst.x, op->dst.y, ++ op->damage ? *op->damage : (void *)-1)); ++ ++ assert(op->dst.bo->proxy == NULL); ++ ++ if (too_large(op->dst.width, op->dst.height) && ++ !sna_render_composite_redirect(sna, op, x, y, w, h, partial)) ++ return false; ++ ++ return true; ++} ++ ++static bool ++try_blt(struct sna *sna, ++ uint8_t op, ++ PicturePtr src, ++ PicturePtr mask, ++ PicturePtr dst, ++ int16_t src_x, int16_t src_y, ++ int16_t msk_x, int16_t msk_y, ++ int16_t dst_x, int16_t dst_y, ++ int16_t width, int16_t height, ++ unsigned flags, ++ struct sna_composite_op *tmp) ++{ ++ struct kgem_bo *bo; ++ ++ if (sna->kgem.mode == KGEM_BLT) { ++ DBG(("%s: already performing BLT\n", __FUNCTION__)); ++ goto execute; ++ } ++ ++ if (too_large(width, height)) { ++ DBG(("%s: operation too large for 3D pipe (%d, %d)\n", ++ __FUNCTION__, width, height)); ++ goto execute; ++ } ++ ++ bo = __sna_drawable_peek_bo(dst->pDrawable); ++ if (bo == NULL) ++ goto execute; ++ ++ if (untiled_tlb_miss(bo)) ++ goto execute; ++ ++ if (bo->rq) { ++ if (RQ_IS_BLT(bo->rq)) ++ goto execute; ++ ++ return false; ++ } ++ ++ if (bo->tiling == I915_TILING_Y) ++ goto upload; ++ ++ if (sna_picture_is_solid(src, NULL) && can_switch_to_blt(sna, bo, 0)) ++ goto execute; ++ ++ if (src->pDrawable == dst->pDrawable && ++ (sna->render_state.gt < 3 || width*height < 1024) && ++ can_switch_to_blt(sna, bo, 0)) ++ goto execute; ++ ++ if (src->pDrawable) { ++ struct kgem_bo *s = __sna_drawable_peek_bo(src->pDrawable); ++ if (s == NULL) ++ goto upload; ++ ++ if (prefer_blt_bo(sna, s, bo)) ++ goto execute; ++ } ++ ++ if (sna->kgem.ring == KGEM_BLT) { ++ DBG(("%s: already performing BLT\n", __FUNCTION__)); ++ goto execute; ++ } ++ ++upload: ++ flags |= COMPOSITE_UPLOAD; ++execute: ++ return sna_blt_composite(sna, op, ++ src, dst, ++ src_x, src_y, ++ dst_x, dst_y, ++ width, height, ++ flags, tmp); ++} ++ ++static bool ++check_gradient(PicturePtr picture, bool precise) ++{ ++ if (picture->pDrawable) ++ return false; ++ ++ switch (picture->pSourcePict->type) { ++ case SourcePictTypeSolidFill: ++ case SourcePictTypeLinear: ++ return false; ++ default: ++ return precise; ++ } ++} ++ ++static bool ++has_alphamap(PicturePtr p) ++{ ++ return p->alphaMap != NULL; ++} ++ ++static bool ++need_upload(PicturePtr p) ++{ ++ return p->pDrawable && unattached(p->pDrawable) && untransformed(p); ++} ++ ++static bool ++source_is_busy(PixmapPtr pixmap) ++{ ++ struct sna_pixmap *priv = sna_pixmap(pixmap); ++ if (priv == NULL || priv->clear) ++ return false; ++ ++ if (priv->gpu_bo && kgem_bo_is_busy(priv->gpu_bo)) ++ return true; ++ ++ if (priv->cpu_bo && kgem_bo_is_busy(priv->cpu_bo)) ++ return true; ++ ++ return priv->gpu_damage && !priv->cpu_damage; ++} ++ ++static bool ++source_fallback(PicturePtr p, PixmapPtr pixmap, bool precise) ++{ ++ if (sna_picture_is_solid(p, NULL)) ++ return false; ++ ++ if (p->pSourcePict) ++ return check_gradient(p, precise); ++ ++ if (!gen9_check_repeat(p) || !gen9_check_format(p->format)) ++ return true; ++ ++ if (pixmap && source_is_busy(pixmap)) ++ return false; ++ ++ return has_alphamap(p) || !gen9_check_filter(p) || need_upload(p); ++} ++ ++static bool ++gen9_composite_fallback(struct sna *sna, ++ PicturePtr src, ++ PicturePtr mask, ++ PicturePtr dst) ++{ ++ PixmapPtr src_pixmap; ++ PixmapPtr mask_pixmap; ++ PixmapPtr dst_pixmap; ++ bool src_fallback, mask_fallback; ++ ++ if (!gen9_check_dst_format(dst->format)) { ++ DBG(("%s: unknown destination format: %d\n", ++ __FUNCTION__, dst->format)); ++ return true; ++ } ++ ++ dst_pixmap = get_drawable_pixmap(dst->pDrawable); ++ ++ src_pixmap = src->pDrawable ? get_drawable_pixmap(src->pDrawable) : NULL; ++ src_fallback = source_fallback(src, src_pixmap, ++ dst->polyMode == PolyModePrecise); ++ ++ if (mask) { ++ mask_pixmap = mask->pDrawable ? get_drawable_pixmap(mask->pDrawable) : NULL; ++ mask_fallback = source_fallback(mask, mask_pixmap, ++ dst->polyMode == PolyModePrecise); ++ } else { ++ mask_pixmap = NULL; ++ mask_fallback = false; ++ } ++ ++ /* If we are using the destination as a source and need to ++ * readback in order to upload the source, do it all ++ * on the cpu. ++ */ ++ if (src_pixmap == dst_pixmap && src_fallback) { ++ DBG(("%s: src is dst and will fallback\n",__FUNCTION__)); ++ return true; ++ } ++ if (mask_pixmap == dst_pixmap && mask_fallback) { ++ DBG(("%s: mask is dst and will fallback\n",__FUNCTION__)); ++ return true; ++ } ++ ++ /* If anything is on the GPU, push everything out to the GPU */ ++ if (dst_use_gpu(dst_pixmap)) { ++ DBG(("%s: dst is already on the GPU, try to use GPU\n", ++ __FUNCTION__)); ++ return false; ++ } ++ ++ if (src_pixmap && !src_fallback) { ++ DBG(("%s: src is already on the GPU, try to use GPU\n", ++ __FUNCTION__)); ++ return false; ++ } ++ if (mask_pixmap && !mask_fallback) { ++ DBG(("%s: mask is already on the GPU, try to use GPU\n", ++ __FUNCTION__)); ++ return false; ++ } ++ ++ /* However if the dst is not on the GPU and we need to ++ * render one of the sources using the CPU, we may ++ * as well do the entire operation in place onthe CPU. ++ */ ++ if (src_fallback) { ++ DBG(("%s: dst is on the CPU and src will fallback\n", ++ __FUNCTION__)); ++ return true; ++ } ++ ++ if (mask && mask_fallback) { ++ DBG(("%s: dst is on the CPU and mask will fallback\n", ++ __FUNCTION__)); ++ return true; ++ } ++ ++ if (too_large(dst_pixmap->drawable.width, ++ dst_pixmap->drawable.height) && ++ dst_is_cpu(dst_pixmap)) { ++ DBG(("%s: dst is on the CPU and too large\n", __FUNCTION__)); ++ return true; ++ } ++ ++ DBG(("%s: dst is not on the GPU and the operation should not fallback\n", ++ __FUNCTION__)); ++ return dst_use_cpu(dst_pixmap); ++} ++ ++static int ++reuse_source(struct sna *sna, ++ PicturePtr src, struct sna_composite_channel *sc, int src_x, int src_y, ++ PicturePtr mask, struct sna_composite_channel *mc, int msk_x, int msk_y) ++{ ++ uint32_t color; ++ ++ if (src_x != msk_x || src_y != msk_y) ++ return false; ++ ++ if (src == mask) { ++ DBG(("%s: mask is source\n", __FUNCTION__)); ++ *mc = *sc; ++ mc->bo = kgem_bo_reference(mc->bo); ++ return true; ++ } ++ ++ if (sna_picture_is_solid(mask, &color)) ++ return gen4_channel_init_solid(sna, mc, color); ++ ++ if (sc->is_solid) ++ return false; ++ ++ if (src->pDrawable == NULL || mask->pDrawable != src->pDrawable) ++ return false; ++ ++ DBG(("%s: mask reuses source drawable\n", __FUNCTION__)); ++ ++ if (!sna_transform_equal(src->transform, mask->transform)) ++ return false; ++ ++ if (!sna_picture_alphamap_equal(src, mask)) ++ return false; ++ ++ if (!gen9_check_repeat(mask)) ++ return false; ++ ++ if (!gen9_check_filter(mask)) ++ return false; ++ ++ if (!gen9_check_format(mask->format)) ++ return false; ++ ++ DBG(("%s: reusing source channel for mask with a twist\n", ++ __FUNCTION__)); ++ ++ *mc = *sc; ++ mc->repeat = gen9_repeat(mask->repeat ? mask->repeatType : RepeatNone); ++ mc->filter = gen9_filter(mask->filter); ++ mc->pict_format = mask->format; ++ mc->card_format = gen9_get_card_format(mask->format); ++ mc->bo = kgem_bo_reference(mc->bo); ++ return true; ++} ++ ++static bool ++gen9_render_composite(struct sna *sna, ++ uint8_t op, ++ PicturePtr src, ++ PicturePtr mask, ++ PicturePtr dst, ++ int16_t src_x, int16_t src_y, ++ int16_t msk_x, int16_t msk_y, ++ int16_t dst_x, int16_t dst_y, ++ int16_t width, int16_t height, ++ unsigned flags, ++ struct sna_composite_op *tmp) ++{ ++ if (op >= ARRAY_SIZE(gen9_blend_op)) ++ return false; ++ ++ DBG(("%s: %dx%d, current mode=%d/%d\n", __FUNCTION__, ++ width, height, sna->kgem.mode, sna->kgem.ring)); ++ ++ if (mask == NULL && ++ try_blt(sna, op, ++ src, mask, dst, ++ src_x, src_y, ++ msk_x, msk_y, ++ dst_x, dst_y, ++ width, height, ++ flags, tmp)) ++ return true; ++ ++ if (gen9_composite_fallback(sna, src, mask, dst)) ++ goto fallback; ++ ++ if (need_tiling(sna, width, height)) ++ return sna_tiling_composite(op, src, mask, dst, ++ src_x, src_y, ++ msk_x, msk_y, ++ dst_x, dst_y, ++ width, height, ++ tmp); ++ ++ if (op == PictOpClear && src == sna->clear) ++ op = PictOpSrc; ++ tmp->op = op; ++ if (!gen9_composite_set_target(sna, tmp, dst, ++ dst_x, dst_y, width, height, ++ flags & COMPOSITE_PARTIAL || op > PictOpSrc)) ++ goto fallback; ++ ++ switch (gen9_composite_picture(sna, src, &tmp->src, ++ src_x, src_y, ++ width, height, ++ dst_x, dst_y, ++ dst->polyMode == PolyModePrecise)) { ++ case -1: ++ goto cleanup_dst; ++ case 0: ++ if (!gen4_channel_init_solid(sna, &tmp->src, 0)) ++ goto cleanup_dst; ++ /* fall through to fixup */ ++ case 1: ++ /* Did we just switch rings to prepare the source? */ ++ if (mask == NULL && ++ (prefer_blt_composite(sna, tmp) || ++ unaligned(tmp->src.bo, PICT_FORMAT_BPP(tmp->src.pict_format))) && ++ sna_blt_composite__convert(sna, ++ dst_x, dst_y, width, height, ++ tmp)) ++ return true; ++ ++ if (!gen9_composite_channel_convert(&tmp->src)) ++ goto cleanup_src; ++ ++ break; ++ } ++ ++ tmp->is_affine = tmp->src.is_affine; ++ tmp->has_component_alpha = false; ++ tmp->need_magic_ca_pass = false; ++ ++ tmp->mask.bo = NULL; ++ tmp->mask.filter = SAMPLER_FILTER_NEAREST; ++ tmp->mask.repeat = SAMPLER_EXTEND_NONE; ++ ++ if (mask) { ++ if (mask->componentAlpha && PICT_FORMAT_RGB(mask->format)) { ++ tmp->has_component_alpha = true; ++ ++ /* Check if it's component alpha that relies on a source alpha and on ++ * the source value. We can only get one of those into the single ++ * source value that we get to blend with. ++ */ ++ if (gen9_blend_op[op].src_alpha && ++ (gen9_blend_op[op].src_blend != BLENDFACTOR_ZERO)) { ++ if (op != PictOpOver) ++ goto cleanup_src; ++ ++ tmp->need_magic_ca_pass = true; ++ tmp->op = PictOpOutReverse; ++ } ++ } ++ ++ if (!reuse_source(sna, ++ src, &tmp->src, src_x, src_y, ++ mask, &tmp->mask, msk_x, msk_y)) { ++ switch (gen9_composite_picture(sna, mask, &tmp->mask, ++ msk_x, msk_y, ++ width, height, ++ dst_x, dst_y, ++ dst->polyMode == PolyModePrecise)) { ++ case -1: ++ goto cleanup_src; ++ case 0: ++ if (!gen4_channel_init_solid(sna, &tmp->mask, 0)) ++ goto cleanup_src; ++ /* fall through to fixup */ ++ case 1: ++ if (!gen9_composite_channel_convert(&tmp->mask)) ++ goto cleanup_mask; ++ break; ++ } ++ } ++ ++ tmp->is_affine &= tmp->mask.is_affine; ++ } ++ ++ tmp->u.gen9.flags = ++ GEN9_SET_FLAGS(SAMPLER_OFFSET(tmp->src.filter, ++ tmp->src.repeat, ++ tmp->mask.filter, ++ tmp->mask.repeat), ++ gen9_get_blend(tmp->op, ++ tmp->has_component_alpha, ++ tmp->dst.format), ++ gen9_choose_composite_kernel(tmp->op, ++ tmp->mask.bo != NULL, ++ tmp->has_component_alpha, ++ tmp->is_affine), ++ gen4_choose_composite_emitter(sna, tmp)); ++ ++ tmp->blt = gen9_render_composite_blt; ++ tmp->box = gen9_render_composite_box; ++ tmp->boxes = gen9_render_composite_boxes__blt; ++ if (tmp->emit_boxes){ ++ tmp->boxes = gen9_render_composite_boxes; ++ tmp->thread_boxes = gen9_render_composite_boxes__thread; ++ } ++ tmp->done = gen9_render_composite_done; ++ ++ kgem_set_mode(&sna->kgem, KGEM_RENDER, tmp->dst.bo); ++ if (!kgem_check_bo(&sna->kgem, ++ tmp->dst.bo, tmp->src.bo, tmp->mask.bo, ++ NULL)) { ++ kgem_submit(&sna->kgem); ++ if (!kgem_check_bo(&sna->kgem, ++ tmp->dst.bo, tmp->src.bo, tmp->mask.bo, ++ NULL)) ++ goto cleanup_mask; ++ _kgem_set_mode(&sna->kgem, KGEM_RENDER); ++ } ++ ++ gen9_align_vertex(sna, tmp); ++ gen9_emit_composite_state(sna, tmp); ++ return true; ++ ++cleanup_mask: ++ if (tmp->mask.bo) { ++ kgem_bo_destroy(&sna->kgem, tmp->mask.bo); ++ tmp->mask.bo = NULL; ++ } ++cleanup_src: ++ if (tmp->src.bo) { ++ kgem_bo_destroy(&sna->kgem, tmp->src.bo); ++ tmp->src.bo = NULL; ++ } ++cleanup_dst: ++ if (tmp->redirect.real_bo) { ++ kgem_bo_destroy(&sna->kgem, tmp->dst.bo); ++ tmp->redirect.real_bo = NULL; ++ } ++fallback: ++ return (mask == NULL && ++ sna_blt_composite(sna, op, ++ src, dst, ++ src_x, src_y, ++ dst_x, dst_y, ++ width, height, ++ flags | COMPOSITE_FALLBACK, tmp)); ++} ++ ++#if !NO_COMPOSITE_SPANS ++fastcall static void ++gen9_render_composite_spans_box(struct sna *sna, ++ const struct sna_composite_spans_op *op, ++ const BoxRec *box, float opacity) ++{ ++ DBG(("%s: src=+(%d, %d), opacity=%f, dst=+(%d, %d), box=(%d, %d) x (%d, %d)\n", ++ __FUNCTION__, ++ op->base.src.offset[0], op->base.src.offset[1], ++ opacity, ++ op->base.dst.x, op->base.dst.y, ++ box->x1, box->y1, ++ box->x2 - box->x1, ++ box->y2 - box->y1)); ++ ++ gen9_get_rectangles(sna, &op->base, 1, gen9_emit_composite_state); ++ op->prim_emit(sna, op, box, opacity); ++} ++ ++static void ++gen9_render_composite_spans_boxes(struct sna *sna, ++ const struct sna_composite_spans_op *op, ++ const BoxRec *box, int nbox, ++ float opacity) ++{ ++ DBG(("%s: nbox=%d, src=+(%d, %d), opacity=%f, dst=+(%d, %d)\n", ++ __FUNCTION__, nbox, ++ op->base.src.offset[0], op->base.src.offset[1], ++ opacity, ++ op->base.dst.x, op->base.dst.y)); ++ ++ do { ++ int nbox_this_time; ++ ++ nbox_this_time = gen9_get_rectangles(sna, &op->base, nbox, ++ gen9_emit_composite_state); ++ nbox -= nbox_this_time; ++ ++ do { ++ DBG((" %s: (%d, %d) x (%d, %d)\n", __FUNCTION__, ++ box->x1, box->y1, ++ box->x2 - box->x1, ++ box->y2 - box->y1)); ++ ++ op->prim_emit(sna, op, box++, opacity); ++ } while (--nbox_this_time); ++ } while (nbox); ++} ++ ++fastcall static void ++gen9_render_composite_spans_boxes__thread(struct sna *sna, ++ const struct sna_composite_spans_op *op, ++ const struct sna_opacity_box *box, ++ int nbox) ++{ ++ DBG(("%s: nbox=%d, src=+(%d, %d), dst=+(%d, %d)\n", ++ __FUNCTION__, nbox, ++ op->base.src.offset[0], op->base.src.offset[1], ++ op->base.dst.x, op->base.dst.y)); ++ ++ sna_vertex_lock(&sna->render); ++ do { ++ int nbox_this_time; ++ float *v; ++ ++ nbox_this_time = gen9_get_rectangles(sna, &op->base, nbox, ++ gen9_emit_composite_state); ++ assert(nbox_this_time); ++ nbox -= nbox_this_time; ++ ++ v = sna->render.vertices + sna->render.vertex_used; ++ sna->render.vertex_used += nbox_this_time * op->base.floats_per_rect; ++ ++ sna_vertex_acquire__locked(&sna->render); ++ sna_vertex_unlock(&sna->render); ++ ++ op->emit_boxes(op, box, nbox_this_time, v); ++ box += nbox_this_time; ++ ++ sna_vertex_lock(&sna->render); ++ sna_vertex_release__locked(&sna->render); ++ } while (nbox); ++ sna_vertex_unlock(&sna->render); ++} ++ ++fastcall static void ++gen9_render_composite_spans_done(struct sna *sna, ++ const struct sna_composite_spans_op *op) ++{ ++ if (sna->render.vertex_offset) ++ gen8_vertex_flush(sna); ++ ++ DBG(("%s()\n", __FUNCTION__)); ++ ++ if (op->base.src.bo) ++ kgem_bo_destroy(&sna->kgem, op->base.src.bo); ++ ++ sna_render_composite_redirect_done(sna, &op->base); ++} ++ ++static bool ++gen9_check_composite_spans(struct sna *sna, ++ uint8_t op, PicturePtr src, PicturePtr dst, ++ int16_t width, int16_t height, unsigned flags) ++{ ++ if (op >= ARRAY_SIZE(gen9_blend_op)) ++ return false; ++ ++ if (gen9_composite_fallback(sna, src, NULL, dst)) ++ return false; ++ ++ if (need_tiling(sna, width, height) && ++ !is_gpu(sna, dst->pDrawable, PREFER_GPU_SPANS)) { ++ DBG(("%s: fallback, tiled operation not on GPU\n", ++ __FUNCTION__)); ++ return false; ++ } ++ ++ return true; ++} ++ ++static bool ++gen9_render_composite_spans(struct sna *sna, ++ uint8_t op, ++ PicturePtr src, ++ PicturePtr dst, ++ int16_t src_x, int16_t src_y, ++ int16_t dst_x, int16_t dst_y, ++ int16_t width, int16_t height, ++ unsigned flags, ++ struct sna_composite_spans_op *tmp) ++{ ++ DBG(("%s: %dx%d with flags=%x, current mode=%d\n", __FUNCTION__, ++ width, height, flags, sna->kgem.ring)); ++ ++ assert(gen9_check_composite_spans(sna, op, src, dst, width, height, flags)); ++ ++ if (need_tiling(sna, width, height)) { ++ DBG(("%s: tiling, operation (%dx%d) too wide for pipeline\n", ++ __FUNCTION__, width, height)); ++ return sna_tiling_composite_spans(op, src, dst, ++ src_x, src_y, dst_x, dst_y, ++ width, height, flags, tmp); ++ } ++ ++ tmp->base.op = op; ++ if (!gen9_composite_set_target(sna, &tmp->base, dst, ++ dst_x, dst_y, width, height, true)) ++ return false; ++ ++ switch (gen9_composite_picture(sna, src, &tmp->base.src, ++ src_x, src_y, ++ width, height, ++ dst_x, dst_y, ++ dst->polyMode == PolyModePrecise)) { ++ case -1: ++ goto cleanup_dst; ++ case 0: ++ if (!gen4_channel_init_solid(sna, &tmp->base.src, 0)) ++ goto cleanup_dst; ++ /* fall through to fixup */ ++ case 1: ++ if (!gen9_composite_channel_convert(&tmp->base.src)) ++ goto cleanup_src; ++ break; ++ } ++ tmp->base.mask.bo = NULL; ++ ++ tmp->base.is_affine = tmp->base.src.is_affine; ++ tmp->base.need_magic_ca_pass = false; ++ ++ tmp->base.u.gen9.flags = ++ GEN9_SET_FLAGS(SAMPLER_OFFSET(tmp->base.src.filter, ++ tmp->base.src.repeat, ++ SAMPLER_FILTER_NEAREST, ++ SAMPLER_EXTEND_PAD), ++ gen9_get_blend(tmp->base.op, false, tmp->base.dst.format), ++ GEN9_WM_KERNEL_OPACITY | !tmp->base.is_affine, ++ gen4_choose_spans_emitter(sna, tmp)); ++ ++ tmp->box = gen9_render_composite_spans_box; ++ tmp->boxes = gen9_render_composite_spans_boxes; ++ if (tmp->emit_boxes) ++ tmp->thread_boxes = gen9_render_composite_spans_boxes__thread; ++ tmp->done = gen9_render_composite_spans_done; ++ ++ kgem_set_mode(&sna->kgem, KGEM_RENDER, tmp->base.dst.bo); ++ if (!kgem_check_bo(&sna->kgem, ++ tmp->base.dst.bo, tmp->base.src.bo, ++ NULL)) { ++ kgem_submit(&sna->kgem); ++ if (!kgem_check_bo(&sna->kgem, ++ tmp->base.dst.bo, tmp->base.src.bo, ++ NULL)) ++ goto cleanup_src; ++ _kgem_set_mode(&sna->kgem, KGEM_RENDER); ++ } ++ ++ gen9_align_vertex(sna, &tmp->base); ++ gen9_emit_composite_state(sna, &tmp->base); ++ return true; ++ ++cleanup_src: ++ if (tmp->base.src.bo) ++ kgem_bo_destroy(&sna->kgem, tmp->base.src.bo); ++cleanup_dst: ++ if (tmp->base.redirect.real_bo) ++ kgem_bo_destroy(&sna->kgem, tmp->base.dst.bo); ++ return false; ++} ++#endif ++ ++static void ++gen9_emit_copy_state(struct sna *sna, ++ const struct sna_composite_op *op) ++{ ++ uint32_t *binding_table; ++ uint16_t offset, dirty; ++ ++ gen9_get_batch(sna, op); ++ ++ binding_table = gen9_composite_get_binding_table(sna, &offset); ++ ++ dirty = kgem_bo_is_dirty(op->dst.bo); ++ ++ binding_table[0] = ++ gen9_bind_bo(sna, ++ op->dst.bo, op->dst.width, op->dst.height, ++ gen9_get_dest_format(op->dst.format), ++ true); ++ binding_table[1] = ++ gen9_bind_bo(sna, ++ op->src.bo, op->src.width, op->src.height, ++ op->src.card_format, ++ false); ++ ++ if (sna->kgem.surface == offset && ++ *(uint64_t *)(sna->kgem.batch + sna->render_state.gen9.surface_table) == *(uint64_t*)binding_table) { ++ sna->kgem.surface += SURFACE_DW; ++ offset = sna->render_state.gen9.surface_table; ++ } ++ ++ if (sna->kgem.batch[sna->render_state.gen9.surface_table] == binding_table[0]) ++ dirty = 0; ++ ++ assert(!GEN9_READS_DST(op->u.gen9.flags)); ++ gen9_emit_state(sna, op, offset | dirty); ++} ++ ++static inline bool ++prefer_blt_copy(struct sna *sna, ++ struct kgem_bo *src_bo, ++ struct kgem_bo *dst_bo, ++ unsigned flags) ++{ ++ if (sna->kgem.mode == KGEM_BLT) ++ return true; ++ ++ assert((flags & COPY_SYNC) == 0); ++ ++ if (untiled_tlb_miss(src_bo) || ++ untiled_tlb_miss(dst_bo)) ++ return true; ++ ++ if (flags & COPY_DRI && !sna->kgem.has_semaphores) ++ return false; ++ ++ if (force_blt_ring(sna, dst_bo)) ++ return true; ++ ++ if ((flags & COPY_SMALL || ++ (sna->render_state.gt < 3 && src_bo == dst_bo)) && ++ can_switch_to_blt(sna, dst_bo, flags)) ++ return true; ++ ++ if (kgem_bo_is_render(dst_bo) || ++ kgem_bo_is_render(src_bo)) ++ return false; ++ ++ if (flags & COPY_LAST && ++ sna->render_state.gt < 3 && ++ can_switch_to_blt(sna, dst_bo, flags)) ++ return true; ++ ++ if (prefer_render_ring(sna, dst_bo)) ++ return false; ++ ++ if (!prefer_blt_ring(sna, dst_bo, flags)) ++ return false; ++ ++ return prefer_blt_bo(sna, src_bo, dst_bo); ++} ++ ++static bool ++gen9_render_copy_boxes(struct sna *sna, uint8_t alu, ++ const DrawableRec *src, struct kgem_bo *src_bo, int16_t src_dx, int16_t src_dy, ++ const DrawableRec *dst, struct kgem_bo *dst_bo, int16_t dst_dx, int16_t dst_dy, ++ const BoxRec *box, int n, unsigned flags) ++{ ++ struct sna_composite_op tmp; ++ BoxRec extents; ++ ++ DBG(("%s (%d, %d)->(%d, %d) x %d, alu=%x, flags=%x, self-copy=%d, overlaps? %d\n", ++ __FUNCTION__, src_dx, src_dy, dst_dx, dst_dy, n, alu, flags, ++ src_bo == dst_bo, ++ overlaps(sna, ++ src_bo, src_dx, src_dy, ++ dst_bo, dst_dx, dst_dy, ++ box, n, flags, &extents))); ++ ++ if (prefer_blt_copy(sna, src_bo, dst_bo, flags) && ++ sna_blt_compare_depth(src, dst) && ++ sna_blt_copy_boxes(sna, alu, ++ src_bo, src_dx, src_dy, ++ dst_bo, dst_dx, dst_dy, ++ dst->bitsPerPixel, ++ box, n)) ++ return true; ++ ++ if (!(alu == GXcopy || alu == GXclear) || ++ unaligned(src_bo, src->bitsPerPixel) || ++ unaligned(dst_bo, dst->bitsPerPixel)) { ++fallback_blt: ++ DBG(("%s: fallback blt\n", __FUNCTION__)); ++ if (!sna_blt_compare_depth(src, dst)) ++ return false; ++ ++ return sna_blt_copy_boxes_fallback(sna, alu, ++ src, src_bo, src_dx, src_dy, ++ dst, dst_bo, dst_dx, dst_dy, ++ box, n); ++ } ++ ++ if (overlaps(sna, ++ src_bo, src_dx, src_dy, ++ dst_bo, dst_dx, dst_dy, ++ box, n, flags, ++ &extents)) { ++ bool big = too_large(extents.x2-extents.x1, extents.y2-extents.y1); ++ ++ if ((big || !prefer_render_ring(sna, dst_bo)) && ++ sna_blt_copy_boxes(sna, alu, ++ src_bo, src_dx, src_dy, ++ dst_bo, dst_dx, dst_dy, ++ dst->bitsPerPixel, ++ box, n)) ++ return true; ++ ++ if (big) ++ goto fallback_blt; ++ ++ assert(src_bo == dst_bo); ++ assert(src->depth == dst->depth); ++ assert(src->width == dst->width); ++ assert(src->height == dst->height); ++ return sna_render_copy_boxes__overlap(sna, alu, dst, dst_bo, ++ src_dx, src_dy, ++ dst_dx, dst_dy, ++ box, n, &extents); ++ } ++ ++ if (dst->depth == src->depth) { ++ tmp.dst.format = sna_render_format_for_depth(dst->depth); ++ tmp.src.pict_format = tmp.dst.format; ++ } else { ++ tmp.dst.format = sna_format_for_depth(dst->depth); ++ tmp.src.pict_format = sna_format_for_depth(src->depth); ++ } ++ if (!gen9_check_format(tmp.src.pict_format)) ++ goto fallback_blt; ++ ++ tmp.dst.pixmap = (PixmapPtr)dst; ++ tmp.dst.width = dst->width; ++ tmp.dst.height = dst->height; ++ tmp.dst.bo = dst_bo; ++ tmp.dst.x = tmp.dst.y = 0; ++ tmp.damage = NULL; ++ ++ sna_render_composite_redirect_init(&tmp); ++ if (too_large(tmp.dst.width, tmp.dst.height)) { ++ int i; ++ ++ extents = box[0]; ++ for (i = 1; i < n; i++) { ++ if (box[i].x1 < extents.x1) ++ extents.x1 = box[i].x1; ++ if (box[i].y1 < extents.y1) ++ extents.y1 = box[i].y1; ++ ++ if (box[i].x2 > extents.x2) ++ extents.x2 = box[i].x2; ++ if (box[i].y2 > extents.y2) ++ extents.y2 = box[i].y2; ++ } ++ ++ if (!sna_render_composite_redirect(sna, &tmp, ++ extents.x1 + dst_dx, ++ extents.y1 + dst_dy, ++ extents.x2 - extents.x1, ++ extents.y2 - extents.y1, ++ n > 1)) ++ goto fallback_tiled; ++ } ++ ++ tmp.src.card_format = gen9_get_card_format(tmp.src.pict_format); ++ if (too_large(src->width, src->height)) { ++ int i; ++ ++ extents = box[0]; ++ for (i = 1; i < n; i++) { ++ if (box[i].x1 < extents.x1) ++ extents.x1 = box[i].x1; ++ if (box[i].y1 < extents.y1) ++ extents.y1 = box[i].y1; ++ ++ if (box[i].x2 > extents.x2) ++ extents.x2 = box[i].x2; ++ if (box[i].y2 > extents.y2) ++ extents.y2 = box[i].y2; ++ } ++ ++ if (!sna_render_pixmap_partial(sna, src, src_bo, &tmp.src, ++ extents.x1 + src_dx, ++ extents.y1 + src_dy, ++ extents.x2 - extents.x1, ++ extents.y2 - extents.y1)) ++ goto fallback_tiled_dst; ++ } else { ++ tmp.src.bo = src_bo; ++ tmp.src.width = src->width; ++ tmp.src.height = src->height; ++ tmp.src.offset[0] = tmp.src.offset[1] = 0; ++ } ++ ++ tmp.mask.bo = NULL; ++ ++ tmp.floats_per_vertex = 2; ++ tmp.floats_per_rect = 6; ++ tmp.need_magic_ca_pass = 0; ++ ++ tmp.u.gen9.flags = COPY_FLAGS(alu); ++ ++ kgem_set_mode(&sna->kgem, KGEM_RENDER, tmp.dst.bo); ++ if (!kgem_check_bo(&sna->kgem, tmp.dst.bo, tmp.src.bo, NULL)) { ++ kgem_submit(&sna->kgem); ++ if (!kgem_check_bo(&sna->kgem, tmp.dst.bo, tmp.src.bo, NULL)) { ++ if (tmp.src.bo != src_bo) ++ kgem_bo_destroy(&sna->kgem, tmp.src.bo); ++ if (tmp.redirect.real_bo) ++ kgem_bo_destroy(&sna->kgem, tmp.dst.bo); ++ goto fallback_blt; ++ } ++ _kgem_set_mode(&sna->kgem, KGEM_RENDER); ++ } ++ ++ src_dx += tmp.src.offset[0]; ++ src_dy += tmp.src.offset[1]; ++ ++ dst_dx += tmp.dst.x; ++ dst_dy += tmp.dst.y; ++ ++ tmp.dst.x = tmp.dst.y = 0; ++ ++ gen9_align_vertex(sna, &tmp); ++ gen9_emit_copy_state(sna, &tmp); ++ ++ do { ++ int16_t *v; ++ int n_this_time; ++ ++ n_this_time = gen9_get_rectangles(sna, &tmp, n, ++ gen9_emit_copy_state); ++ n -= n_this_time; ++ ++ v = (int16_t *)(sna->render.vertices + sna->render.vertex_used); ++ sna->render.vertex_used += 6 * n_this_time; ++ assert(sna->render.vertex_used <= sna->render.vertex_size); ++ do { ++ ++ DBG((" (%d, %d) -> (%d, %d) + (%d, %d)\n", ++ box->x1 + src_dx, box->y1 + src_dy, ++ box->x1 + dst_dx, box->y1 + dst_dy, ++ box->x2 - box->x1, box->y2 - box->y1)); ++ v[0] = box->x2 + dst_dx; ++ v[2] = box->x2 + src_dx; ++ v[1] = v[5] = box->y2 + dst_dy; ++ v[3] = v[7] = box->y2 + src_dy; ++ v[8] = v[4] = box->x1 + dst_dx; ++ v[10] = v[6] = box->x1 + src_dx; ++ v[9] = box->y1 + dst_dy; ++ v[11] = box->y1 + src_dy; ++ v += 12; box++; ++ } while (--n_this_time); ++ } while (n); ++ ++ gen8_vertex_flush(sna); ++ sna_render_composite_redirect_done(sna, &tmp); ++ if (tmp.src.bo != src_bo) ++ kgem_bo_destroy(&sna->kgem, tmp.src.bo); ++ return true; ++ ++fallback_tiled_dst: ++ if (tmp.redirect.real_bo) ++ kgem_bo_destroy(&sna->kgem, tmp.dst.bo); ++fallback_tiled: ++ DBG(("%s: fallback tiled\n", __FUNCTION__)); ++ if (sna_blt_compare_depth(src, dst) && ++ sna_blt_copy_boxes(sna, alu, ++ src_bo, src_dx, src_dy, ++ dst_bo, dst_dx, dst_dy, ++ dst->bitsPerPixel, ++ box, n)) ++ return true; ++ ++ return sna_tiling_copy_boxes(sna, alu, ++ src, src_bo, src_dx, src_dy, ++ dst, dst_bo, dst_dx, dst_dy, ++ box, n); ++} ++ ++static void ++gen9_render_copy_blt(struct sna *sna, ++ const struct sna_copy_op *op, ++ int16_t sx, int16_t sy, ++ int16_t w, int16_t h, ++ int16_t dx, int16_t dy) ++{ ++ int16_t *v; ++ ++ gen9_get_rectangles(sna, &op->base, 1, gen9_emit_copy_state); ++ ++ v = (int16_t *)&sna->render.vertices[sna->render.vertex_used]; ++ sna->render.vertex_used += 6; ++ assert(sna->render.vertex_used <= sna->render.vertex_size); ++ ++ v[0] = dx+w; v[1] = dy+h; ++ v[2] = sx+w; v[3] = sy+h; ++ v[4] = dx; v[5] = dy+h; ++ v[6] = sx; v[7] = sy+h; ++ v[8] = dx; v[9] = dy; ++ v[10] = sx; v[11] = sy; ++} ++ ++static void ++gen9_render_copy_done(struct sna *sna, const struct sna_copy_op *op) ++{ ++ if (sna->render.vertex_offset) ++ gen8_vertex_flush(sna); ++} ++ ++static bool ++gen9_render_copy(struct sna *sna, uint8_t alu, ++ PixmapPtr src, struct kgem_bo *src_bo, ++ PixmapPtr dst, struct kgem_bo *dst_bo, ++ struct sna_copy_op *op) ++{ ++ DBG(("%s (alu=%d, src=(%dx%d), dst=(%dx%d))\n", ++ __FUNCTION__, alu, ++ src->drawable.width, src->drawable.height, ++ dst->drawable.width, dst->drawable.height)); ++ ++ if (prefer_blt_copy(sna, src_bo, dst_bo, 0) && ++ sna_blt_compare_depth(&src->drawable, &dst->drawable) && ++ sna_blt_copy(sna, alu, ++ src_bo, dst_bo, ++ dst->drawable.bitsPerPixel, ++ op)) ++ return true; ++ ++ if (!(alu == GXcopy || alu == GXclear) || src_bo == dst_bo || ++ too_large(src->drawable.width, src->drawable.height) || ++ too_large(dst->drawable.width, dst->drawable.height) || ++ unaligned(src_bo, src->drawable.bitsPerPixel) || ++ unaligned(dst_bo, dst->drawable.bitsPerPixel)) { ++fallback: ++ if (!sna_blt_compare_depth(&src->drawable, &dst->drawable)) ++ return false; ++ ++ return sna_blt_copy(sna, alu, src_bo, dst_bo, ++ dst->drawable.bitsPerPixel, ++ op); ++ } ++ ++ if (dst->drawable.depth == src->drawable.depth) { ++ op->base.dst.format = sna_render_format_for_depth(dst->drawable.depth); ++ op->base.src.pict_format = op->base.dst.format; ++ } else { ++ op->base.dst.format = sna_format_for_depth(dst->drawable.depth); ++ op->base.src.pict_format = sna_format_for_depth(src->drawable.depth); ++ } ++ if (!gen9_check_format(op->base.src.pict_format)) ++ goto fallback; ++ ++ op->base.dst.pixmap = dst; ++ op->base.dst.width = dst->drawable.width; ++ op->base.dst.height = dst->drawable.height; ++ op->base.dst.bo = dst_bo; ++ ++ op->base.src.bo = src_bo; ++ op->base.src.card_format = ++ gen9_get_card_format(op->base.src.pict_format); ++ op->base.src.width = src->drawable.width; ++ op->base.src.height = src->drawable.height; ++ ++ op->base.mask.bo = NULL; ++ ++ op->base.floats_per_vertex = 2; ++ op->base.floats_per_rect = 6; ++ ++ op->base.u.gen9.flags = COPY_FLAGS(alu); ++ ++ kgem_set_mode(&sna->kgem, KGEM_RENDER, dst_bo); ++ if (!kgem_check_bo(&sna->kgem, dst_bo, src_bo, NULL)) { ++ kgem_submit(&sna->kgem); ++ if (!kgem_check_bo(&sna->kgem, dst_bo, src_bo, NULL)) ++ goto fallback; ++ _kgem_set_mode(&sna->kgem, KGEM_RENDER); ++ } ++ ++ gen9_align_vertex(sna, &op->base); ++ gen9_emit_copy_state(sna, &op->base); ++ ++ op->blt = gen9_render_copy_blt; ++ op->done = gen9_render_copy_done; ++ return true; ++} ++ ++static void ++gen9_emit_fill_state(struct sna *sna, const struct sna_composite_op *op) ++{ ++ uint32_t *binding_table; ++ uint16_t offset, dirty; ++ ++ /* XXX Render Target Fast Clear ++ * Set RTFC Enable in PS and render a rectangle. ++ * Limited to a clearing the full MSC surface only with a ++ * specific kernel. ++ */ ++ ++ gen9_get_batch(sna, op); ++ ++ binding_table = gen9_composite_get_binding_table(sna, &offset); ++ ++ dirty = kgem_bo_is_dirty(op->dst.bo); ++ ++ binding_table[0] = ++ gen9_bind_bo(sna, ++ op->dst.bo, op->dst.width, op->dst.height, ++ gen9_get_dest_format(op->dst.format), ++ true); ++ binding_table[1] = ++ gen9_bind_bo(sna, ++ op->src.bo, 1, 1, ++ SURFACEFORMAT_B8G8R8A8_UNORM, ++ false); ++ ++ if (sna->kgem.surface == offset && ++ *(uint64_t *)(sna->kgem.batch + sna->render_state.gen9.surface_table) == *(uint64_t*)binding_table) { ++ sna->kgem.surface += SURFACE_DW; ++ offset = sna->render_state.gen9.surface_table; ++ } ++ ++ if (sna->kgem.batch[sna->render_state.gen9.surface_table] == binding_table[0]) ++ dirty = 0; ++ ++ gen9_emit_state(sna, op, offset | dirty); ++} ++ ++static bool ++gen9_render_fill_boxes(struct sna *sna, ++ CARD8 op, ++ PictFormat format, ++ const xRenderColor *color, ++ const DrawableRec *dst, struct kgem_bo *dst_bo, ++ const BoxRec *box, int n) ++{ ++ struct sna_composite_op tmp; ++ uint32_t pixel; ++ ++ DBG(("%s (op=%d, color=(%04x, %04x, %04x, %04x) [%08x])\n", ++ __FUNCTION__, op, ++ color->red, color->green, color->blue, color->alpha, (int)format)); ++ ++ if (op >= ARRAY_SIZE(gen9_blend_op)) { ++ DBG(("%s: fallback due to unhandled blend op: %d\n", ++ __FUNCTION__, op)); ++ return false; ++ } ++ ++ if (prefer_blt_fill(sna, dst_bo, FILL_BOXES) || ++ !gen9_check_dst_format(format) || ++ unaligned(dst_bo, PICT_FORMAT_BPP(format))) { ++ uint8_t alu = GXinvalid; ++ ++ if (op <= PictOpSrc) { ++ pixel = 0; ++ if (op == PictOpClear) ++ alu = GXclear; ++ else if (sna_get_pixel_from_rgba(&pixel, ++ color->red, ++ color->green, ++ color->blue, ++ color->alpha, ++ format)) ++ alu = GXcopy; ++ } ++ ++ if (alu != GXinvalid && ++ sna_blt_fill_boxes(sna, alu, ++ dst_bo, dst->bitsPerPixel, ++ pixel, box, n)) ++ return true; ++ ++ if (!gen9_check_dst_format(format)) ++ return false; ++ } ++ ++ if (op == PictOpClear) { ++ pixel = 0; ++ op = PictOpSrc; ++ } else if (!sna_get_pixel_from_rgba(&pixel, ++ color->red, ++ color->green, ++ color->blue, ++ color->alpha, ++ PICT_a8r8g8b8)) ++ return false; ++ ++ DBG(("%s(%08x x %d [(%d, %d), (%d, %d) ...])\n", ++ __FUNCTION__, pixel, n, ++ box[0].x1, box[0].y1, box[0].x2, box[0].y2)); ++ ++ tmp.dst.pixmap = (PixmapPtr)dst; ++ tmp.dst.width = dst->width; ++ tmp.dst.height = dst->height; ++ tmp.dst.format = format; ++ tmp.dst.bo = dst_bo; ++ tmp.dst.x = tmp.dst.y = 0; ++ tmp.damage = NULL; ++ ++ sna_render_composite_redirect_init(&tmp); ++ if (too_large(dst->width, dst->height)) { ++ BoxRec extents; ++ ++ boxes_extents(box, n, &extents); ++ if (!sna_render_composite_redirect(sna, &tmp, ++ extents.x1, extents.y1, ++ extents.x2 - extents.x1, ++ extents.y2 - extents.y1, ++ n > 1)) ++ return sna_tiling_fill_boxes(sna, op, format, color, ++ dst, dst_bo, box, n); ++ } ++ ++ tmp.src.bo = sna_render_get_solid(sna, pixel); ++ tmp.mask.bo = NULL; ++ ++ tmp.floats_per_vertex = 2; ++ tmp.floats_per_rect = 6; ++ tmp.need_magic_ca_pass = false; ++ ++ tmp.u.gen9.flags = FILL_FLAGS(op, format); ++ ++ kgem_set_mode(&sna->kgem, KGEM_RENDER, dst_bo); ++ if (!kgem_check_bo(&sna->kgem, dst_bo, NULL)) { ++ kgem_submit(&sna->kgem); ++ if (!kgem_check_bo(&sna->kgem, dst_bo, NULL)) { ++ kgem_bo_destroy(&sna->kgem, tmp.src.bo); ++ tmp.src.bo = NULL; ++ ++ if (tmp.redirect.real_bo) { ++ kgem_bo_destroy(&sna->kgem, tmp.dst.bo); ++ tmp.redirect.real_bo = NULL; ++ } ++ ++ return false; ++ } ++ _kgem_set_mode(&sna->kgem, KGEM_RENDER); ++ } ++ ++ gen9_align_vertex(sna, &tmp); ++ gen9_emit_fill_state(sna, &tmp); ++ ++ do { ++ int n_this_time; ++ int16_t *v; ++ ++ n_this_time = gen9_get_rectangles(sna, &tmp, n, ++ gen9_emit_fill_state); ++ n -= n_this_time; ++ ++ v = (int16_t *)(sna->render.vertices + sna->render.vertex_used); ++ sna->render.vertex_used += 6 * n_this_time; ++ assert(sna->render.vertex_used <= sna->render.vertex_size); ++ do { ++ DBG((" (%d, %d), (%d, %d)\n", ++ box->x1, box->y1, box->x2, box->y2)); ++ ++ v[0] = box->x2; ++ v[5] = v[1] = box->y2; ++ v[8] = v[4] = box->x1; ++ v[9] = box->y1; ++ v[2] = v[3] = v[7] = 1; ++ v[6] = v[10] = v[11] = 0; ++ v += 12; box++; ++ } while (--n_this_time); ++ } while (n); ++ ++ gen8_vertex_flush(sna); ++ kgem_bo_destroy(&sna->kgem, tmp.src.bo); ++ sna_render_composite_redirect_done(sna, &tmp); ++ return true; ++} ++ ++static void ++gen9_render_fill_op_blt(struct sna *sna, ++ const struct sna_fill_op *op, ++ int16_t x, int16_t y, int16_t w, int16_t h) ++{ ++ int16_t *v; ++ ++ DBG(("%s: (%d, %d)x(%d, %d)\n", __FUNCTION__, x, y, w, h)); ++ ++ gen9_get_rectangles(sna, &op->base, 1, gen9_emit_fill_state); ++ ++ v = (int16_t *)&sna->render.vertices[sna->render.vertex_used]; ++ sna->render.vertex_used += 6; ++ assert(sna->render.vertex_used <= sna->render.vertex_size); ++ ++ v[0] = x+w; ++ v[4] = v[8] = x; ++ v[1] = v[5] = y+h; ++ v[9] = y; ++ ++ v[2] = v[3] = v[7] = 1; ++ v[6] = v[10] = v[11] = 0; ++} ++ ++fastcall static void ++gen9_render_fill_op_box(struct sna *sna, ++ const struct sna_fill_op *op, ++ const BoxRec *box) ++{ ++ int16_t *v; ++ ++ DBG(("%s: (%d, %d),(%d, %d)\n", __FUNCTION__, ++ box->x1, box->y1, box->x2, box->y2)); ++ ++ gen9_get_rectangles(sna, &op->base, 1, gen9_emit_fill_state); ++ ++ v = (int16_t *)&sna->render.vertices[sna->render.vertex_used]; ++ sna->render.vertex_used += 6; ++ assert(sna->render.vertex_used <= sna->render.vertex_size); ++ ++ v[0] = box->x2; ++ v[8] = v[4] = box->x1; ++ v[5] = v[1] = box->y2; ++ v[9] = box->y1; ++ ++ v[7] = v[2] = v[3] = 1; ++ v[6] = v[10] = v[11] = 0; ++} ++ ++fastcall static void ++gen9_render_fill_op_boxes(struct sna *sna, ++ const struct sna_fill_op *op, ++ const BoxRec *box, ++ int nbox) ++{ ++ DBG(("%s: (%d, %d),(%d, %d)... x %d\n", __FUNCTION__, ++ box->x1, box->y1, box->x2, box->y2, nbox)); ++ ++ do { ++ int nbox_this_time; ++ int16_t *v; ++ ++ nbox_this_time = gen9_get_rectangles(sna, &op->base, nbox, ++ gen9_emit_fill_state); ++ nbox -= nbox_this_time; ++ ++ v = (int16_t *)&sna->render.vertices[sna->render.vertex_used]; ++ sna->render.vertex_used += 6 * nbox_this_time; ++ assert(sna->render.vertex_used <= sna->render.vertex_size); ++ ++ do { ++ v[0] = box->x2; ++ v[8] = v[4] = box->x1; ++ v[5] = v[1] = box->y2; ++ v[9] = box->y1; ++ v[7] = v[2] = v[3] = 1; ++ v[6] = v[10] = v[11] = 0; ++ box++; v += 12; ++ } while (--nbox_this_time); ++ } while (nbox); ++} ++ ++static void ++gen9_render_fill_op_done(struct sna *sna, const struct sna_fill_op *op) ++{ ++ if (sna->render.vertex_offset) ++ gen8_vertex_flush(sna); ++ kgem_bo_destroy(&sna->kgem, op->base.src.bo); ++} ++ ++static bool ++gen9_render_fill(struct sna *sna, uint8_t alu, ++ PixmapPtr dst, struct kgem_bo *dst_bo, ++ uint32_t color, unsigned flags, ++ struct sna_fill_op *op) ++{ ++ DBG(("%s: (alu=%d, color=%x)\n", __FUNCTION__, alu, color)); ++ ++ if (prefer_blt_fill(sna, dst_bo, flags) && ++ sna_blt_fill(sna, alu, ++ dst_bo, dst->drawable.bitsPerPixel, ++ color, ++ op)) ++ return true; ++ ++ if (!(alu == GXcopy || alu == GXclear) || ++ too_large(dst->drawable.width, dst->drawable.height) || ++ unaligned(dst_bo, dst->drawable.bitsPerPixel)) ++ return sna_blt_fill(sna, alu, ++ dst_bo, dst->drawable.bitsPerPixel, ++ color, ++ op); ++ ++ if (alu == GXclear) ++ color = 0; ++ ++ op->base.dst.pixmap = dst; ++ op->base.dst.width = dst->drawable.width; ++ op->base.dst.height = dst->drawable.height; ++ op->base.dst.format = sna_format_for_depth(dst->drawable.depth); ++ op->base.dst.bo = dst_bo; ++ op->base.dst.x = op->base.dst.y = 0; ++ ++ op->base.src.bo = ++ sna_render_get_solid(sna, ++ sna_rgba_for_color(color, ++ dst->drawable.depth)); ++ op->base.mask.bo = NULL; ++ ++ op->base.need_magic_ca_pass = false; ++ op->base.floats_per_vertex = 2; ++ op->base.floats_per_rect = 6; ++ ++ op->base.u.gen9.flags = FILL_FLAGS_NOBLEND; ++ ++ kgem_set_mode(&sna->kgem, KGEM_RENDER, dst_bo); ++ if (!kgem_check_bo(&sna->kgem, dst_bo, NULL)) { ++ kgem_submit(&sna->kgem); ++ if (!kgem_check_bo(&sna->kgem, dst_bo, NULL)) { ++ kgem_bo_destroy(&sna->kgem, op->base.src.bo); ++ return false; ++ } ++ ++ _kgem_set_mode(&sna->kgem, KGEM_RENDER); ++ } ++ ++ gen9_align_vertex(sna, &op->base); ++ gen9_emit_fill_state(sna, &op->base); ++ ++ op->blt = gen9_render_fill_op_blt; ++ op->box = gen9_render_fill_op_box; ++ op->boxes = gen9_render_fill_op_boxes; ++ op->points = NULL; ++ op->done = gen9_render_fill_op_done; ++ return true; ++} ++ ++static bool ++gen9_render_fill_one_try_blt(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo, ++ uint32_t color, ++ int16_t x1, int16_t y1, int16_t x2, int16_t y2, ++ uint8_t alu) ++{ ++ BoxRec box; ++ ++ box.x1 = x1; ++ box.y1 = y1; ++ box.x2 = x2; ++ box.y2 = y2; ++ ++ return sna_blt_fill_boxes(sna, alu, ++ bo, dst->drawable.bitsPerPixel, ++ color, &box, 1); ++} ++ ++static bool ++gen9_render_fill_one(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo, ++ uint32_t color, ++ int16_t x1, int16_t y1, ++ int16_t x2, int16_t y2, ++ uint8_t alu) ++{ ++ struct sna_composite_op tmp; ++ int16_t *v; ++ ++ /* Prefer to use the BLT if already engaged */ ++ if (prefer_blt_fill(sna, bo, FILL_BOXES) && ++ gen9_render_fill_one_try_blt(sna, dst, bo, color, ++ x1, y1, x2, y2, alu)) ++ return true; ++ ++ /* Must use the BLT if we can't RENDER... */ ++ if (!(alu == GXcopy || alu == GXclear) || ++ too_large(dst->drawable.width, dst->drawable.height) || ++ unaligned(bo, dst->drawable.bitsPerPixel)) ++ return gen9_render_fill_one_try_blt(sna, dst, bo, color, ++ x1, y1, x2, y2, alu); ++ ++ if (alu == GXclear) ++ color = 0; ++ ++ tmp.dst.pixmap = dst; ++ tmp.dst.width = dst->drawable.width; ++ tmp.dst.height = dst->drawable.height; ++ tmp.dst.format = sna_format_for_depth(dst->drawable.depth); ++ tmp.dst.bo = bo; ++ tmp.dst.x = tmp.dst.y = 0; ++ ++ tmp.src.bo = ++ sna_render_get_solid(sna, ++ sna_rgba_for_color(color, ++ dst->drawable.depth)); ++ tmp.mask.bo = NULL; ++ ++ tmp.floats_per_vertex = 2; ++ tmp.floats_per_rect = 6; ++ tmp.need_magic_ca_pass = false; ++ ++ tmp.u.gen9.flags = FILL_FLAGS_NOBLEND; ++ ++ kgem_set_mode(&sna->kgem, KGEM_RENDER, bo); ++ if (!kgem_check_bo(&sna->kgem, bo, NULL)) { ++ kgem_submit(&sna->kgem); ++ if (kgem_check_bo(&sna->kgem, bo, NULL)) { ++ kgem_bo_destroy(&sna->kgem, tmp.src.bo); ++ return false; ++ } ++ _kgem_set_mode(&sna->kgem, KGEM_RENDER); ++ } ++ ++ gen9_align_vertex(sna, &tmp); ++ gen9_emit_fill_state(sna, &tmp); ++ ++ gen9_get_rectangles(sna, &tmp, 1, gen9_emit_fill_state); ++ ++ DBG((" (%d, %d), (%d, %d)\n", x1, y1, x2, y2)); ++ ++ v = (int16_t *)&sna->render.vertices[sna->render.vertex_used]; ++ sna->render.vertex_used += 6; ++ assert(sna->render.vertex_used <= sna->render.vertex_size); ++ ++ v[0] = x2; ++ v[8] = v[4] = x1; ++ v[5] = v[1] = y2; ++ v[9] = y1; ++ v[7] = v[2] = v[3] = 1; ++ v[6] = v[10] = v[11] = 0; ++ ++ gen8_vertex_flush(sna); ++ kgem_bo_destroy(&sna->kgem, tmp.src.bo); ++ ++ return true; ++} ++ ++static bool ++gen9_render_clear_try_blt(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo) ++{ ++ BoxRec box; ++ ++ box.x1 = 0; ++ box.y1 = 0; ++ box.x2 = dst->drawable.width; ++ box.y2 = dst->drawable.height; ++ ++ return sna_blt_fill_boxes(sna, GXclear, ++ bo, dst->drawable.bitsPerPixel, ++ 0, &box, 1); ++} ++ ++static bool ++gen9_render_clear(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo) ++{ ++ struct sna_composite_op tmp; ++ int16_t *v; ++ ++ DBG(("%s: %dx%d\n", ++ __FUNCTION__, ++ dst->drawable.width, ++ dst->drawable.height)); ++ ++ /* Prefer to use the BLT if already engaged */ ++ if (sna->kgem.mode == KGEM_BLT && ++ gen9_render_clear_try_blt(sna, dst, bo)) ++ return true; ++ ++ /* Must use the BLT if we can't RENDER... */ ++ if (too_large(dst->drawable.width, dst->drawable.height) || ++ unaligned(bo, dst->drawable.bitsPerPixel)) ++ return gen9_render_clear_try_blt(sna, dst, bo); ++ ++ tmp.dst.pixmap = dst; ++ tmp.dst.width = dst->drawable.width; ++ tmp.dst.height = dst->drawable.height; ++ tmp.dst.format = sna_format_for_depth(dst->drawable.depth); ++ tmp.dst.bo = bo; ++ tmp.dst.x = tmp.dst.y = 0; ++ ++ tmp.src.bo = sna_render_get_solid(sna, 0); ++ tmp.mask.bo = NULL; ++ ++ tmp.floats_per_vertex = 2; ++ tmp.floats_per_rect = 6; ++ tmp.need_magic_ca_pass = false; ++ ++ tmp.u.gen9.flags = FILL_FLAGS_NOBLEND; ++ ++ kgem_set_mode(&sna->kgem, KGEM_RENDER, bo); ++ if (!kgem_check_bo(&sna->kgem, bo, NULL)) { ++ kgem_submit(&sna->kgem); ++ if (!kgem_check_bo(&sna->kgem, bo, NULL)) { ++ kgem_bo_destroy(&sna->kgem, tmp.src.bo); ++ return false; ++ } ++ _kgem_set_mode(&sna->kgem, KGEM_RENDER); ++ } ++ ++ gen9_align_vertex(sna, &tmp); ++ gen9_emit_fill_state(sna, &tmp); ++ ++ gen9_get_rectangles(sna, &tmp, 1, gen9_emit_fill_state); ++ ++ v = (int16_t *)&sna->render.vertices[sna->render.vertex_used]; ++ sna->render.vertex_used += 6; ++ assert(sna->render.vertex_used <= sna->render.vertex_size); ++ ++ v[0] = dst->drawable.width; ++ v[5] = v[1] = dst->drawable.height; ++ v[8] = v[4] = 0; ++ v[9] = 0; ++ ++ v[7] = v[2] = v[3] = 1; ++ v[6] = v[10] = v[11] = 0; ++ ++ gen8_vertex_flush(sna); ++ kgem_bo_destroy(&sna->kgem, tmp.src.bo); ++ ++ return true; ++} ++ ++#if !NO_VIDEO ++static uint32_t gen9_bind_video_source(struct sna *sna, ++ struct kgem_bo *bo, ++ uint32_t delta, ++ int width, ++ int height, ++ int pitch, ++ uint32_t format) ++{ ++ uint32_t *ss; ++ int offset; ++ ++ offset = sna->kgem.surface -= SURFACE_DW; ++ ss = sna->kgem.batch + offset; ++ ss[0] = (SURFACE_2D << SURFACE_TYPE_SHIFT | ++ gen9_tiling_bits(bo->tiling) | ++ format << SURFACE_FORMAT_SHIFT | ++ SURFACE_VALIGN_4 | SURFACE_HALIGN_4); ++ ss[1] = 0; ++ ss[2] = ((width - 1) << SURFACE_WIDTH_SHIFT | ++ (height - 1) << SURFACE_HEIGHT_SHIFT); ++ ss[3] = (pitch - 1) << SURFACE_PITCH_SHIFT; ++ ss[4] = 0; ++ ss[5] = 0; ++ ss[6] = 0; ++ ss[7] = SURFACE_SWIZZLE(RED, GREEN, BLUE, ALPHA); ++ *(uint64_t *)(ss+8) = ++ kgem_add_reloc64(&sna->kgem, offset + 8, bo, ++ I915_GEM_DOMAIN_SAMPLER << 16, ++ delta); ++ ss[10] = 0; ++ ss[11] = 0; ++ ss[12] = 0; ++ ss[13] = 0; ++ ss[14] = 0; ++ ss[15] = 0; ++ ++ DBG(("[%x] bind bo(handle=%d, addr=%d), format=%d, width=%d, height=%d, pitch=%d, tiling=%d -> sampler\n", ++ offset, bo->handle, ss[1], ++ format, width, height, bo->pitch, bo->tiling)); ++ ++ return offset * sizeof(uint32_t); ++} ++ ++static void gen9_emit_video_state(struct sna *sna, ++ const struct sna_composite_op *op) ++{ ++ struct sna_video_frame *frame = op->priv; ++ uint32_t src_surf_format; ++ uint32_t src_surf_base[6]; ++ int src_width[6]; ++ int src_height[6]; ++ int src_pitch[6]; ++ uint32_t *binding_table; ++ uint16_t offset; ++ int n_src, n; ++ ++ /* XXX VeBox, bicubic */ ++ ++ gen9_get_batch(sna, op); ++ ++ src_surf_base[0] = 0; ++ src_surf_base[1] = 0; ++ src_surf_base[2] = frame->VBufOffset; ++ src_surf_base[3] = frame->VBufOffset; ++ src_surf_base[4] = frame->UBufOffset; ++ src_surf_base[5] = frame->UBufOffset; ++ ++ if (is_planar_fourcc(frame->id)) { ++ src_surf_format = SURFACEFORMAT_R8_UNORM; ++ src_width[1] = src_width[0] = frame->width; ++ src_height[1] = src_height[0] = frame->height; ++ src_pitch[1] = src_pitch[0] = frame->pitch[1]; ++ src_width[4] = src_width[5] = src_width[2] = src_width[3] = ++ frame->width / 2; ++ src_height[4] = src_height[5] = src_height[2] = src_height[3] = ++ frame->height / 2; ++ src_pitch[4] = src_pitch[5] = src_pitch[2] = src_pitch[3] = ++ frame->pitch[0]; ++ n_src = 6; ++ } else { ++ if (frame->id == FOURCC_RGB888) ++ src_surf_format = SURFACEFORMAT_B8G8R8X8_UNORM; ++ else if (frame->id == FOURCC_UYVY) ++ src_surf_format = SURFACEFORMAT_YCRCB_SWAPY; ++ else ++ src_surf_format = SURFACEFORMAT_YCRCB_NORMAL; ++ ++ src_width[0] = frame->width; ++ src_height[0] = frame->height; ++ src_pitch[0] = frame->pitch[0]; ++ n_src = 1; ++ } ++ ++ binding_table = gen9_composite_get_binding_table(sna, &offset); ++ ++ binding_table[0] = ++ gen9_bind_bo(sna, ++ op->dst.bo, op->dst.width, op->dst.height, ++ gen9_get_dest_format(op->dst.format), ++ true); ++ for (n = 0; n < n_src; n++) { ++ binding_table[1+n] = ++ gen9_bind_video_source(sna, ++ frame->bo, ++ src_surf_base[n], ++ src_width[n], ++ src_height[n], ++ src_pitch[n], ++ src_surf_format); ++ } ++ ++ gen9_emit_state(sna, op, offset); ++} ++ ++static unsigned select_video_kernel(const struct sna_video_frame *frame) ++{ ++ switch (frame->id) { ++ case FOURCC_YV12: ++ case FOURCC_I420: ++ case FOURCC_XVMC: ++ return GEN9_WM_KERNEL_VIDEO_PLANAR; ++ ++ case FOURCC_RGB888: ++ case FOURCC_RGB565: ++ return GEN9_WM_KERNEL_VIDEO_RGB; ++ ++ default: ++ return GEN9_WM_KERNEL_VIDEO_PACKED; ++ } ++} ++ ++static bool ++gen9_render_video(struct sna *sna, ++ struct sna_video *video, ++ struct sna_video_frame *frame, ++ RegionPtr dstRegion, ++ PixmapPtr pixmap) ++{ ++ struct sna_composite_op tmp; ++ struct sna_pixmap *priv = sna_pixmap(pixmap); ++ int dst_width = dstRegion->extents.x2 - dstRegion->extents.x1; ++ int dst_height = dstRegion->extents.y2 - dstRegion->extents.y1; ++ int src_width = frame->src.x2 - frame->src.x1; ++ int src_height = frame->src.y2 - frame->src.y1; ++ float src_offset_x, src_offset_y; ++ float src_scale_x, src_scale_y; ++ unsigned filter; ++ const BoxRec *box; ++ int nbox; ++ ++ DBG(("%s: src=(%d, %d), dst=(%d, %d), %dx[(%d, %d), (%d, %d)...]\n", ++ __FUNCTION__, ++ src_width, src_height, dst_width, dst_height, ++ region_num_rects(dstRegion), ++ REGION_EXTENTS(NULL, dstRegion)->x1, ++ REGION_EXTENTS(NULL, dstRegion)->y1, ++ REGION_EXTENTS(NULL, dstRegion)->x2, ++ REGION_EXTENTS(NULL, dstRegion)->y2)); ++ ++ assert(priv->gpu_bo); ++ assert(!too_large(pixmap->drawable.width, pixmap->drawable.height)); ++ assert(!unaligned(priv->gpu_bo, pixmap->drawable.bitsPerPixel)); ++ ++ memset(&tmp, 0, sizeof(tmp)); ++ ++ tmp.dst.pixmap = pixmap; ++ tmp.dst.width = pixmap->drawable.width; ++ tmp.dst.height = pixmap->drawable.height; ++ tmp.dst.format = sna_render_format_for_depth(pixmap->drawable.depth); ++ tmp.dst.bo = priv->gpu_bo; ++ ++ tmp.src.bo = frame->bo; ++ tmp.mask.bo = NULL; ++ ++ tmp.floats_per_vertex = 3; ++ tmp.floats_per_rect = 9; ++ ++ DBG(("%s: scaling?=%d, planar?=%d [%x]\n", ++ __FUNCTION__, ++ src_width != dst_width || src_height != dst_height, ++ is_planar_fourcc(frame->id), frame->id)); ++ ++ if (src_width == dst_width && src_height == dst_height) ++ filter = SAMPLER_FILTER_NEAREST; ++ else ++ filter = SAMPLER_FILTER_BILINEAR; ++ ++ tmp.u.gen9.flags = ++ GEN9_SET_FLAGS(SAMPLER_OFFSET(filter, SAMPLER_EXTEND_PAD, ++ SAMPLER_FILTER_NEAREST, SAMPLER_EXTEND_NONE), ++ NO_BLEND, ++ select_video_kernel(frame), ++ 2); ++ tmp.priv = frame; ++ ++ kgem_set_mode(&sna->kgem, KGEM_RENDER, tmp.dst.bo); ++ if (!kgem_check_bo(&sna->kgem, tmp.dst.bo, frame->bo, NULL)) { ++ kgem_submit(&sna->kgem); ++ if (!kgem_check_bo(&sna->kgem, tmp.dst.bo, frame->bo, NULL)) ++ return false; ++ ++ _kgem_set_mode(&sna->kgem, KGEM_RENDER); ++ } ++ ++ gen9_align_vertex(sna, &tmp); ++ gen9_emit_video_state(sna, &tmp); ++ ++ DBG(("%s: src=(%d, %d)x(%d, %d); frame=(%dx%d), dst=(%dx%d)\n", ++ __FUNCTION__, ++ frame->src.x1, frame->src.y1, ++ src_width, src_height, ++ dst_width, dst_height, ++ frame->width, frame->height)); ++ ++ src_scale_x = (float)src_width / dst_width / frame->width; ++ src_offset_x = (float)frame->src.x1 / frame->width - dstRegion->extents.x1 * src_scale_x; ++ ++ src_scale_y = (float)src_height / dst_height / frame->height; ++ src_offset_y = (float)frame->src.y1 / frame->height - dstRegion->extents.y1 * src_scale_y; ++ ++ DBG(("%s: scale=(%f, %f), offset=(%f, %f)\n", ++ __FUNCTION__, ++ src_scale_x, src_scale_y, ++ src_offset_x, src_offset_y)); ++ ++ box = region_rects(dstRegion); ++ nbox = region_num_rects(dstRegion); ++ while (nbox--) { ++ DBG(("%s: dst=(%d, %d), (%d, %d) + (%d, %d); src=(%f, %f), (%f, %f)\n", ++ __FUNCTION__, ++ box->x1, box->y1, ++ box->x2, box->y2, ++ box->x1 * src_scale_x + src_offset_x, ++ box->y1 * src_scale_y + src_offset_y, ++ box->x2 * src_scale_x + src_offset_x, ++ box->y2 * src_scale_y + src_offset_y)); ++ ++ gen9_get_rectangles(sna, &tmp, 1, gen9_emit_video_state); ++ ++ OUT_VERTEX(box->x2, box->y2); ++ OUT_VERTEX_F(box->x2 * src_scale_x + src_offset_x); ++ OUT_VERTEX_F(box->y2 * src_scale_y + src_offset_y); ++ ++ OUT_VERTEX(box->x1, box->y2); ++ OUT_VERTEX_F(box->x1 * src_scale_x + src_offset_x); ++ OUT_VERTEX_F(box->y2 * src_scale_y + src_offset_y); ++ ++ OUT_VERTEX(box->x1, box->y1); ++ OUT_VERTEX_F(box->x1 * src_scale_x + src_offset_x); ++ OUT_VERTEX_F(box->y1 * src_scale_y + src_offset_y); ++ ++ box++; ++ } ++ gen8_vertex_flush(sna); ++ ++ if (!DAMAGE_IS_ALL(priv->gpu_damage)) ++ sna_damage_add(&priv->gpu_damage, dstRegion); ++ ++ return true; ++} ++#endif ++ ++static void gen9_render_flush(struct sna *sna) ++{ ++ gen8_vertex_close(sna); ++ ++ assert(sna->render.vb_id == 0); ++ assert(sna->render.vertex_offset == 0); ++} ++ ++static void gen9_render_reset(struct sna *sna) ++{ ++ sna->render_state.gen9.emit_flush = false; ++ sna->render_state.gen9.needs_invariant = true; ++ sna->render_state.gen9.ve_id = 3 << 2; ++ sna->render_state.gen9.ve_dirty = false; ++ sna->render_state.gen9.last_primitive = -1; ++ ++ sna->render_state.gen9.num_sf_outputs = 0; ++ sna->render_state.gen9.samplers = -1; ++ sna->render_state.gen9.blend = -1; ++ sna->render_state.gen9.kernel = -1; ++ sna->render_state.gen9.drawrect_offset = -1; ++ sna->render_state.gen9.drawrect_limit = -1; ++ sna->render_state.gen9.surface_table = 0; ++ ++ if (sna->render.vbo && !kgem_bo_can_map(&sna->kgem, sna->render.vbo)) { ++ DBG(("%s: discarding unmappable vbo\n", __FUNCTION__)); ++ discard_vbo(sna); ++ } ++ ++ sna->render.vertex_offset = 0; ++ sna->render.nvertex_reloc = 0; ++ sna->render.vb_id = 0; ++} ++ ++static void gen9_render_fini(struct sna *sna) ++{ ++ kgem_bo_destroy(&sna->kgem, sna->render_state.gen9.general_bo); ++} ++ ++static bool gen9_render_setup(struct sna *sna) ++{ ++ struct gen9_render_state *state = &sna->render_state.gen9; ++ struct sna_static_stream general; ++ struct gen9_sampler_state *ss; ++ int i, j, k, l, m; ++ uint32_t devid; ++ ++ devid = intel_get_device_id(sna->dev); ++ if (devid & 0xf) ++ state->gt = GEN9_GT_BIAS + ((devid >> 4) & 0xf) + 1; ++ DBG(("%s: gt=%d\n", __FUNCTION__, state->gt)); ++ ++ state->info = &min_gt_info; ++ if (is_skl(sna)) ++ state->info = &skl_gt_info; ++ if (is_bxt(sna)) ++ state->info = &bxt_gt_info; ++ if (is_kbl(sna)) ++ state->info = &kbl_gt_info; ++ if (is_glk(sna)) ++ state->info = &glk_gt_info; ++ ++ sna_static_stream_init(&general); ++ ++ /* Zero pad the start. If you see an offset of 0x0 in the batchbuffer ++ * dumps, you know it points to zero. ++ */ ++ null_create(&general); ++ ++ for (m = 0; m < ARRAY_SIZE(wm_kernels); m++) { ++ if (wm_kernels[m].size) { ++ state->wm_kernel[m][1] = ++ sna_static_stream_add(&general, ++ wm_kernels[m].data, ++ wm_kernels[m].size, ++ 64); ++ } else { ++ if (USE_8_PIXEL_DISPATCH) { ++ state->wm_kernel[m][0] = ++ sna_static_stream_compile_wm(sna, &general, ++ wm_kernels[m].data, 8); ++ } ++ ++ if (USE_16_PIXEL_DISPATCH) { ++ state->wm_kernel[m][1] = ++ sna_static_stream_compile_wm(sna, &general, ++ wm_kernels[m].data, 16); ++ } ++ ++ if (USE_32_PIXEL_DISPATCH) { ++ state->wm_kernel[m][2] = ++ sna_static_stream_compile_wm(sna, &general, ++ wm_kernels[m].data, 32); ++ } ++ } ++ assert(state->wm_kernel[m][0]|state->wm_kernel[m][1]|state->wm_kernel[m][2]); ++ } ++ ++ COMPILE_TIME_ASSERT(SAMPLER_OFFSET(FILTER_COUNT, EXTEND_COUNT, FILTER_COUNT, EXTEND_COUNT) <= 0x7ff); ++ ss = sna_static_stream_map(&general, ++ 2 * sizeof(*ss) * ++ (2 + ++ FILTER_COUNT * EXTEND_COUNT * ++ FILTER_COUNT * EXTEND_COUNT), ++ 32); ++ state->wm_state = sna_static_stream_offsetof(&general, ss); ++ sampler_copy_init(ss); ss += 2; ++ sampler_fill_init(ss); ss += 2; ++ for (i = 0; i < FILTER_COUNT; i++) { ++ for (j = 0; j < EXTEND_COUNT; j++) { ++ for (k = 0; k < FILTER_COUNT; k++) { ++ for (l = 0; l < EXTEND_COUNT; l++) { ++ sampler_state_init(ss++, i, j); ++ sampler_state_init(ss++, k, l); ++ } ++ } ++ } ++ } ++ ++ state->cc_blend = gen9_create_blend_state(&general); ++ ++ state->general_bo = sna_static_stream_fini(sna, &general); ++ return state->general_bo != NULL; ++} ++ ++const char *gen9_render_init(struct sna *sna, const char *backend) ++{ ++ if (!gen9_render_setup(sna)) ++ return backend; ++ ++ sna->kgem.context_switch = gen6_render_context_switch; ++ sna->kgem.retire = gen6_render_retire; ++ sna->kgem.expire = gen4_render_expire; ++ ++#if !NO_COMPOSITE ++ sna->render.composite = gen9_render_composite; ++ sna->render.prefer_gpu |= PREFER_GPU_RENDER; ++#endif ++#if !NO_COMPOSITE_SPANS ++ sna->render.check_composite_spans = gen9_check_composite_spans; ++ sna->render.composite_spans = gen9_render_composite_spans; ++ sna->render.prefer_gpu |= PREFER_GPU_SPANS; ++#endif ++#if !NO_VIDEO ++ sna->render.video = gen9_render_video; ++#endif ++ ++#if !NO_COPY_BOXES ++ sna->render.copy_boxes = gen9_render_copy_boxes; ++#endif ++#if !NO_COPY ++ sna->render.copy = gen9_render_copy; ++#endif ++ ++#if !NO_FILL_BOXES ++ sna->render.fill_boxes = gen9_render_fill_boxes; ++#endif ++#if !NO_FILL ++ sna->render.fill = gen9_render_fill; ++#endif ++#if !NO_FILL_ONE ++ sna->render.fill_one = gen9_render_fill_one; ++#endif ++#if !NO_FILL_CLEAR ++ sna->render.clear = gen9_render_clear; ++#endif ++ ++ sna->render.flush = gen9_render_flush; ++ sna->render.reset = gen9_render_reset; ++ sna->render.fini = gen9_render_fini; ++ ++ sna->render.max_3d_size = GEN9_MAX_SIZE; ++ sna->render.max_3d_pitch = 1 << 18; ++ return sna->render_state.gen9.info->name; ++} +diff --git a/src/sna/gen9_render.h b/src/sna/gen9_render.h +new file mode 100644 +index 00000000..e3cb3f93 +--- /dev/null ++++ b/src/sna/gen9_render.h +@@ -0,0 +1,1130 @@ ++#ifndef GEN9_RENDER_H ++#define GEN9_RENDER_H ++ ++#define INTEL_MASK(high, low) (((1 << ((high) - (low) + 1)) - 1) << (low)) ++ ++#define GEN9_3D(pipeline,op,sub) \ ++ ((3 << 29) | ((pipeline) << 27) | ((op) << 24) | ((sub) << 16)) ++ ++#define GEN9_STATE_BASE_ADDRESS GEN9_3D(0, 1, 1) ++# define BASE_ADDRESS_MODIFY (1 << 0) ++ ++#define GEN9_STATE_SIP GEN9_3D(0, 1, 2) ++ ++#define GEN9_3DSTATE_VF_STATISTICS GEN9_3D(1, 0, 0xb) ++#define GEN9_PIPELINE_SELECT GEN9_3D(1, 1, 4) ++# define PIPELINE_SELECT_3D 0 ++# define PIPELINE_SELECT_MEDIA 1 ++#define PIPELINE_SELECTION_MASK (3 << 8) ++ ++#define GEN9_MEDIA_STATE_POINTERS GEN9_3D(2, 0, 0) ++#define GEN9_MEDIA_OBJECT GEN9_3D(2, 1, 0) ++ ++#define GEN9_3DSTATE_CLEAR_PARAMS GEN9_3D(3, 0, 0x04) ++#define GEN9_3DSTATE_DEPTH_BUFFER GEN9_3D(3, 0, 0x05) ++# define DEPTH_BUFFER_TYPE_SHIFT 29 ++# define DEPTH_BUFFER_FORMAT_SHIFT 18 ++ ++#define GEN9_3DSTATE_STENCIL_BUFFER GEN9_3D(3, 0, 0x06) ++#define GEN9_3DSTATE_HIER_DEPTH_BUFFER GEN9_3D(3, 0, 0x07) ++#define GEN9_3DSTATE_VERTEX_BUFFERS GEN9_3D(3, 0, 0x08) ++# define VB_INDEX_SHIFT 26 ++# define VB_MODIFY_ENABLE (1 << 14) ++#define GEN9_3DSTATE_VERTEX_ELEMENTS GEN9_3D(3, 0, 0x09) ++# define VE_INDEX_SHIFT 26 ++# define VE_VALID (1 << 25) ++# define VE_FORMAT_SHIFT 16 ++# define VE_OFFSET_SHIFT 0 ++# define VE_COMPONENT_0_SHIFT 28 ++# define VE_COMPONENT_1_SHIFT 24 ++# define VE_COMPONENT_2_SHIFT 20 ++# define VE_COMPONENT_3_SHIFT 16 ++#define GEN9_3DSTATE_INDEX_BUFFER GEN9_3D(3, 0, 0x0a) ++#define GEN9_3DSTATE_VF GEN9_3D(3, 0, 0x0c) ++ ++#define GEN9_3DSTATE_MULTISAMPLE GEN9_3D(3, 0, 0x0d) ++/* DW1 */ ++# define MULTISAMPLE_PIXEL_LOCATION_CENTER (0 << 4) ++# define MULTISAMPLE_PIXEL_LOCATION_UPPER_LEFT (1 << 4) ++# define MULTISAMPLE_NUMSAMPLES_1 (0 << 1) ++# define MULTISAMPLE_NUMSAMPLES_4 (2 << 1) ++# define MULTISAMPLE_NUMSAMPLES_8 (3 << 1) ++ ++#define GEN9_3DSTATE_CC_STATE_POINTERS GEN9_3D(3, 0, 0x0e) ++#define GEN9_3DSTATE_SCISSOR_STATE_POINTERS GEN9_3D(3, 0, 0x0f) ++ ++#define GEN9_3DSTATE_VS GEN9_3D(3, 0, 0x10) ++#define GEN9_3DSTATE_GS GEN9_3D(3, 0, 0x11) ++#define GEN9_3DSTATE_CLIP GEN9_3D(3, 0, 0x12) ++#define GEN9_3DSTATE_SF GEN9_3D(3, 0, 0x13) ++# define SF_TRI_PROVOKE_SHIFT 29 ++# define SF_LINE_PROVOKE_SHIFT 27 ++# define SF_FAN_PROVOKE_SHIFT 25 ++ ++#define GEN9_3DSTATE_WM GEN9_3D(3, 0, 0x14) ++/* DW1 */ ++# define WM_STATISTICS_ENABLE (1 << 31) ++# define WM_DEPTH_CLEAR (1 << 30) ++# define WM_DEPTH_RESOLVE (1 << 28) ++# define WM_HIERARCHICAL_DEPTH_RESOLVE (1 << 27) ++# define WM_KILL_ENABLE (1 << 25) ++# define WM_POSITION_ZW_PIXEL (0 << 17) ++# define WM_POSITION_ZW_CENTROID (2 << 17) ++# define WM_POSITION_ZW_SAMPLE (3 << 17) ++# define WM_NONPERSPECTIVE_SAMPLE_BARYCENTRIC (1 << 16) ++# define WM_NONPERSPECTIVE_CENTROID_BARYCENTRIC (1 << 15) ++# define WM_NONPERSPECTIVE_PIXEL_BARYCENTRIC (1 << 14) ++# define WM_PERSPECTIVE_SAMPLE_BARYCENTRIC (1 << 13) ++# define WM_PERSPECTIVE_CENTROID_BARYCENTRIC (1 << 12) ++# define WM_PERSPECTIVE_PIXEL_BARYCENTRIC (1 << 11) ++# define WM_LINE_END_CAP_AA_WIDTH_0_5 (0 << 8) ++# define WM_LINE_END_CAP_AA_WIDTH_1_0 (1 << 8) ++# define WM_LINE_END_CAP_AA_WIDTH_2_0 (2 << 8) ++# define WM_LINE_END_CAP_AA_WIDTH_4_0 (3 << 8) ++# define WM_LINE_AA_WIDTH_0_5 (0 << 6) ++# define WM_LINE_AA_WIDTH_1_0 (1 << 6) ++# define WM_LINE_AA_WIDTH_2_0 (2 << 6) ++# define WM_LINE_AA_WIDTH_4_0 (3 << 6) ++# define WM_POLYGON_STIPPLE_ENABLE (1 << 4) ++# define WM_LINE_STIPPLE_ENABLE (1 << 3) ++# define WM_POINT_RASTRULE_UPPER_RIGHT (1 << 2) ++# define WM_MSRAST_OFF_PIXEL (0 << 0) ++# define WM_MSRAST_OFF_PATTERN (1 << 0) ++# define WM_MSRAST_ON_PIXEL (2 << 0) ++# define WM_MSRAST_ON_PATTERN (3 << 0) ++ ++#define GEN9_3DSTATE_CONSTANT_VS GEN9_3D(3, 0, 0x15) ++#define GEN9_3DSTATE_CONSTANT_GS GEN9_3D(3, 0, 0x16) ++#define GEN9_3DSTATE_CONSTANT_PS GEN9_3D(3, 0, 0x17) ++ ++#define GEN9_3DSTATE_SAMPLE_MASK GEN9_3D(3, 0, 0x18) ++ ++#define GEN9_3DSTATE_CONSTANT_HS GEN9_3D(3, 0, 0x19) ++#define GEN9_3DSTATE_CONSTANT_DS GEN9_3D(3, 0, 0x1a) ++ ++#define GEN9_3DSTATE_HS GEN9_3D(3, 0, 0x1b) ++#define GEN9_3DSTATE_TE GEN9_3D(3, 0, 0x1c) ++#define GEN9_3DSTATE_DS GEN9_3D(3, 0, 0x1d) ++#define GEN9_3DSTATE_STREAMOUT GEN9_3D(3, 0, 0x1e) ++ ++#define GEN9_3DSTATE_SBE GEN9_3D(3, 0, 0x1f) ++/* DW1 */ ++# define SBE_FORCE_VERTEX_URB_READ_LENGTH (1<<29) ++# define SBE_FORCE_VERTEX_URB_READ_OFFSET (1<<28) ++# define SBE_NUM_OUTPUTS_SHIFT 22 ++# define SBE_SWIZZLE_ENABLE (1 << 21) ++# define SBE_POINT_SPRITE_LOWERLEFT (1 << 20) ++# define SBE_URB_ENTRY_READ_LENGTH_SHIFT 11 ++# define SBE_URB_ENTRY_READ_OFFSET_SHIFT 5 ++#define SBE_ACTIVE_COMPONENT_NONE 0 ++#define SBE_ACTIVE_COMPONENT_XY 1 ++#define SBE_ACTIVE_COMPONENT_XYZ 2 ++#define SBE_ACTIVE_COMPONENT_XYZW 3 ++ ++ ++#define GEN9_3DSTATE_PS GEN9_3D(3, 0, 0x20) ++/* DW1:DW2 kernel pointer */ ++/* DW3 */ ++# define PS_SPF_MODE (1 << 31) ++# define PS_VECTOR_MASK_ENABLE (1 << 30) ++# define PS_SAMPLER_COUNT_SHIFT 27 ++# define PS_BINDING_TABLE_ENTRY_COUNT_SHIFT 18 ++# define PS_FLOATING_POINT_MODE_IEEE_754 (0 << 16) ++# define PS_FLOATING_POINT_MODE_ALT (1 << 16) ++/* DW4:DW5: scratch space */ ++/* DW6 */ ++# define PS_MAX_THREADS_SHIFT 23 ++# define PS_MAX_THREADS (63 << PS_MAX_THREADS_SHIFT) ++# define PS_PUSH_CONSTANT_ENABLE (1 << 11) ++# define PS_RENDER_TARGET_CLEAR (1 << 8) ++# define PS_RENDER_TARGET_RESOLVE (1 << 6) ++# define PS_POSOFFSET_NONE (0 << 3) ++# define PS_POSOFFSET_CENTROID (2 << 3) ++# define PS_POSOFFSET_SAMPLE (3 << 3) ++# define PS_32_DISPATCH_ENABLE (1 << 2) ++# define PS_16_DISPATCH_ENABLE (1 << 1) ++# define PS_8_DISPATCH_ENABLE (1 << 0) ++/* DW7 */ ++# define PS_DISPATCH_START_GRF_SHIFT_0 16 ++# define PS_DISPATCH_START_GRF_SHIFT_1 8 ++# define PS_DISPATCH_START_GRF_SHIFT_2 0 ++/* DW8:D9: kernel 1 pointer */ ++/* DW10:D11: kernel 2 pointer */ ++ ++#define GEN9_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP GEN9_3D(3, 0, 0x21) ++#define GEN9_3DSTATE_VIEWPORT_STATE_POINTERS_CC GEN9_3D(3, 0, 0x23) ++ ++#define GEN9_3DSTATE_BLEND_STATE_POINTERS GEN9_3D(3, 0, 0x24) ++ ++#define GEN9_3DSTATE_BINDING_TABLE_POINTERS_VS GEN9_3D(3, 0, 0x26) ++#define GEN9_3DSTATE_BINDING_TABLE_POINTERS_HS GEN9_3D(3, 0, 0x27) ++#define GEN9_3DSTATE_BINDING_TABLE_POINTERS_DS GEN9_3D(3, 0, 0x28) ++#define GEN9_3DSTATE_BINDING_TABLE_POINTERS_GS GEN9_3D(3, 0, 0x29) ++#define GEN9_3DSTATE_BINDING_TABLE_POINTERS_PS GEN9_3D(3, 0, 0x2a) ++ ++#define GEN9_3DSTATE_SAMPLER_STATE_POINTERS_VS GEN9_3D(3, 0, 0x2b) ++#define GEN9_3DSTATE_SAMPLER_STATE_POINTERS_HS GEN9_3D(3, 0, 0x2c) ++#define GEN9_3DSTATE_SAMPLER_STATE_POINTERS_DS GEN9_3D(3, 0, 0x2d) ++#define GEN9_3DSTATE_SAMPLER_STATE_POINTERS_GS GEN9_3D(3, 0, 0x2e) ++#define GEN9_3DSTATE_SAMPLER_STATE_POINTERS_PS GEN9_3D(3, 0, 0x2f) ++ ++#define GEN9_3DSTATE_URB_VS GEN9_3D(3, 0, 0x30) ++#define GEN9_3DSTATE_URB_HS GEN9_3D(3, 0, 0x31) ++#define GEN9_3DSTATE_URB_DS GEN9_3D(3, 0, 0x32) ++#define GEN9_3DSTATE_URB_GS GEN9_3D(3, 0, 0x33) ++/* DW1 */ ++# define URB_ENTRY_NUMBER_SHIFT 0 ++# define URB_ENTRY_SIZE_SHIFT 16 ++# define URB_STARTING_ADDRESS_SHIFT 25 ++ ++#define GEN9_3DSTATE_GATHER_CONSTANT_VS GEN9_3D(3, 0, 0x34) ++#define GEN9_3DSTATE_GATHER_CONSTANT_GS GEN9_3D(3, 0, 0x35) ++#define GEN9_3DSTATE_GATHER_CONSTANT_HS GEN9_3D(3, 0, 0x36) ++#define GEN9_3DSTATE_GATHER_CONSTANT_DS GEN9_3D(3, 0, 0x37) ++#define GEN9_3DSTATE_GATHER_CONSTANT_PS GEN9_3D(3, 0, 0x38) ++ ++#define GEN9_3DSTATE_DX9_CONSTANTF_VS GEN9_3D(3, 0, 0x39) ++#define GEN9_3DSTATE_DX9_CONSTANTF_PS GEN9_3D(3, 0, 0x3a) ++#define GEN9_3DSTATE_DX9_CONSTANTI_VS GEN9_3D(3, 0, 0x3b) ++#define GEN9_3DSTATE_DX9_CONSTANTI_PS GEN9_3D(3, 0, 0x3c) ++#define GEN9_3DSTATE_DX9_CONSTANTB_VS GEN9_3D(3, 0, 0x3d) ++#define GEN9_3DSTATE_DX9_CONSTANTB_PS GEN9_3D(3, 0, 0x3e) ++#define GEN9_3DSTATE_DX9_LOCAL_VALID_VS GEN9_3D(3, 0, 0x3f) ++#define GEN9_3DSTATE_DX9_LOCAL_VALID_PS GEN9_3D(3, 0, 0x40) ++#define GEN9_3DSTATE_DX9_GENERATE_ACTIVE_VS GEN9_3D(3, 0, 0x41) ++#define GEN9_3DSTATE_DX9_GENERATE_ACTIVE_PS GEN9_3D(3, 0, 0x42) ++ ++#define GEN9_3DSTATE_BINDING_TABLE_EDIT_VS GEN9_3D(3, 0, 0x43) ++#define GEN9_3DSTATE_BINDING_TABLE_EDIT_GS GEN9_3D(3, 0, 0x44) ++#define GEN9_3DSTATE_BINDING_TABLE_EDIT_HS GEN9_3D(3, 0, 0x45) ++#define GEN9_3DSTATE_BINDING_TABLE_EDIT_DS GEN9_3D(3, 0, 0x46) ++#define GEN9_3DSTATE_BINDING_TABLE_EDIT_PS GEN9_3D(3, 0, 0x47) ++ ++#define GEN9_3DSTATE_VF_INSTANCING GEN9_3D(3, 0, 0x49) ++#define GEN9_3DSTATE_VF_SGVS GEN9_3D(3, 0, 0x4a) ++# define SGVS_ENABLE_INSTANCE_ID (1 << 31) ++# define SGVS_INSTANCE_ID_COMPONENT_SHIFT 29 ++# define SGVS_INSTANCE_ID_ELEMENT_OFFSET_SHIFT 16 ++# define SGVS_ENABLE_VERTEX_ID (1 << 15) ++# define SGVS_VERTEX_ID_COMPONENT_SHIFT 13 ++# define SGVS_VERTEX_ID_ELEMENT_OFFSET_SHIFT 0 ++#define GEN9_3DSTATE_VF_TOPOLOGY GEN9_3D(3, 0, 0x4b) ++# define POINTLIST 0x01 ++# define LINELIST 0x02 ++# define LINESTRIP 0x03 ++# define TRILIST 0x04 ++# define TRISTRIP 0x05 ++# define TRIFAN 0x06 ++# define QUADLIST 0x07 ++# define QUADSTRIP 0x08 ++# define LINELIST_ADJ 0x09 ++# define LINESTRIP_ADJ 0x0A ++# define TRILIST_ADJ 0x0B ++# define TRISTRIP_ADJ 0x0C ++# define TRISTRIP_REVERSE 0x0D ++# define POLYGON 0x0E ++# define RECTLIST 0x0F ++# define LINELOOP 0x10 ++# define POINTLIST_BF 0x11 ++# define LINESTRIP_CONT 0x12 ++# define LINESTRIP_BF 0x13 ++# define LINESTRIP_CONT_BF 0x14 ++# define TRIFAN_NOSTIPPLE 0x15 ++ ++#define GEN9_3DSTATE_WM_CHROMAKEY GEN9_3D(3, 0, 0x4c) ++ ++#define GEN9_3DSTATE_PS_BLEND GEN9_3D(3, 0, 0x4d) ++# define PS_BLEND_ALPHA_TO_COVERAGE_ENABLE (1 << 31) ++# define PS_BLEND_HAS_WRITEABLE_RT (1 << 30) ++# define PS_BLEND_COLOR_BLEND_ENABLE (1 << 29) ++# define PS_BLEND_SRC_ALPHA_SHIFT 24 ++# define PS_BLEND_DST_ALPHA_SHIFT 19 ++# define PS_BLEND_SRC_SHIFT 14 ++# define PS_BLEND_DST_SHIFT 9 ++# define PS_BLEND_ALPHA_TEST_ENABLE (1 << 8) ++# define PS_BLEND_INDEPENDENT_ALPHA_BLEND_ENABLE (1 << 7) ++ ++#define GEN9_3DSTATE_WM_DEPTH_STENCIL GEN9_3D(3, 0, 0x4e) ++/* DW1 */ ++# define WM_DS_STENCIL_TEST_MASK_MASK INTEL_MASK(31, 24) ++# define WM_DS_STENCIL_TEST_MASK_SHIFT 24 ++# define WM_DS_STENCIL_WRITE_MASK_MASK INTEL_MASK(23, 16) ++# define WM_DS_STENCIL_WRITE_MASK_SHIFT 16 ++# define WM_DS_BF_STENCIL_TEST_MASK_MASK INTEL_MASK(15, 8) ++# define WM_DS_BF_STENCIL_TEST_MASK_SHIFT 8 ++# define WM_DS_BF_STENCIL_WRITE_MASK_MASK INTEL_MASK(7, 0) ++# define WM_DS_DEPTH_FUNC_SHIFT 5 ++# define WM_DS_DOUBLE_SIDED_STENCIL_ENABLE (1 << 4) ++# define WM_DS_STENCIL_TEST_ENABLE (1 << 3) ++# define WM_DS_STENCIL_BUFFER_WRITE_ENABLE (1 << 2) ++# define WM_DS_DEPTH_TEST_ENABLE (1 << 1) ++# define WM_DS_DEPTH_BUFFER_WRITE_ENABLE (1 << 0) ++/* DW2 */ ++# define WM_DS_STENCIL_TEST_MASK_MASK INTEL_MASK(31, 24) ++# define WM_DS_STENCIL_TEST_MASK_SHIFT 24 ++# define WM_DS_STENCIL_WRITE_MASK_MASK INTEL_MASK(23, 16) ++# define WM_DS_STENCIL_WRITE_MASK_SHIFT 16 ++# define WM_DS_BF_STENCIL_TEST_MASK_MASK INTEL_MASK(15, 8) ++# define WM_DS_BF_STENCIL_TEST_MASK_SHIFT 8 ++# define WM_DS_BF_STENCIL_WRITE_MASK_MASK INTEL_MASK(7, 0) ++# define WM_DS_BF_STENCIL_WRITE_MASK_SHIFT 0 ++ ++#define GEN9_3DSTATE_PS_EXTRA GEN9_3D(3, 0, 0x4f) ++# define PSX_PIXEL_SHADER_VALID (1 << 31) ++# define PSX_PIXEL_SHADER_NO_RT_WRITE (1 << 30) ++# define PSX_OMASK_TO_RENDER_TARGET (1 << 29) ++# define PSX_KILL_ENABLE (1 << 28) ++# define PSX_PSCDEPTH_OFF (0 << 26) ++# define PSX_PSCDEPTH_ON (1 << 26) ++# define PSX_PSCDEPTH_ON_GE (2 << 26) ++# define PSX_PSCDEPTH_ON_LE (3 << 26) ++# define PSX_FORCE_COMPUTED_DEPTH (1 << 25) ++# define PSX_USES_SOURCE_DEPTH (1 << 24) ++# define PSX_USES_SOURCE_W (1 << 23) ++# define PSX_ATTRIBUTE_ENABLE (1 << 8) ++# define PSX_SHADER_DISABLES_ALPHA_TO_COVERAGE (1 << 7) ++# define PSX_SHADER_IS_PER_SAMPLE (1 << 6) ++# define PSX_SHADER_HAS_UAV (1 << 2) ++# define PSX_SHADER_USES_INPUT_COVERAGE_MASK (1 << 1) ++ ++#define GEN9_3DSTATE_RASTER GEN9_3D(3, 0, 0x50) ++/* DW1 */ ++# define RASTER_FRONT_WINDING_CCW (1 << 21) ++# define RASTER_CULL_BOTH (0 << 16) ++# define RASTER_CULL_NONE (1 << 16) ++# define RASTER_CULL_FRONT (2 << 16) ++# define RASTER_CULL_BACK (3 << 16) ++# define RASTER_SMOOTH_POINT_ENABLE (1 << 13) ++# define RASTER_LINE_AA_ENABLE (1 << 2) ++# define RASTER_VIEWPORT_Z_CLIP_TEST_ENABLE (1 << 0) ++ ++#define GEN9_3DSTATE_SBE_SWIZ GEN9_3D(3, 0, 0x51) ++#define GEN9_3DSTATE_WM_HZ_OP GEN9_3D(3, 0, 0x52) ++ ++#define GEN9_3DSTATE_COMPONENT_PACKING GEN6_3D(3, 0, 0x55) ++ ++ ++ ++#define GEN9_3DSTATE_DRAWING_RECTANGLE GEN9_3D(3, 1, 0x00) ++#define GEN9_3DSTATE_SAMPLER_PALETTE_LOAD GEN9_3D(3, 1, 0x02) ++#define GEN9_3DSTATE_CHROMA_KEY GEN9_3D(3, 1, 0x04) ++ ++#define GEN9_3DSTATE_POLY_STIPPLE_OFFSET GEN9_3D(3, 1, 0x06) ++#define GEN9_3DSTATE_POLY_STIPPLE_PATTERN GEN9_3D(3, 1, 0x07) ++#define GEN9_3DSTATE_LINE_STIPPLE GEN9_3D(3, 1, 0x08) ++#define GEN9_3DSTATE_AA_LINE_PARAMS GEN9_3D(3, 1, 0x0a) ++#define GEN9_3DSTATE_SAMPLER_PALETTE_LOAD1 GEN9_3D(3, 1, 0x0c) ++#define GEN9_3DSTATE_MONOFILTER_SIZE GEN9_3D(3, 1, 0x11) ++#define GEN9_3DSTATE_PUSH_CONSTANT_ALLOC_VS GEN9_3D(3, 1, 0x12) ++#define GEN9_3DSTATE_PUSH_CONSTANT_ALLOC_HS GEN9_3D(3, 1, 0x13) ++#define GEN9_3DSTATE_PUSH_CONSTANT_ALLOC_DS GEN9_3D(3, 1, 0x14) ++#define GEN9_3DSTATE_PUSH_CONSTANT_ALLOC_GS GEN9_3D(3, 1, 0x15) ++#define GEN9_3DSTATE_PUSH_CONSTANT_ALLOC_PS GEN9_3D(3, 1, 0x16) ++/* DW1 */ ++# define PUSH_CONSTANT_BUFFER_OFFSET_SHIFT 16 ++# define PUSH_CONSTANT_BUFFER_SIZE_SHIFT 0 ++ ++#define GEN9_3DSTATE_SO_DECL_LIST GEN9_3D(3, 1, 0x17) ++#define GEN9_3DSTATE_SO_BUFFER GEN9_3D(3, 1, 0x18) ++#define GEN9_3DSTATE_BINDING_TABLE_POOL_ALLOC GEN9_3D(3, 1, 0x19) ++#define GEN9_3DSTATE_GATHER_BUFFER_POOL_ALLOC GEN9_3D(3, 1, 0x1a) ++#define GEN9_3DSTATE_DX9_CONSTANT_BUFFER_POOL_ALLOC GEN9_3D(3, 1, 0x1b) ++#define GEN9_3DSTATE_SAMPLE_PATTERN GEN9_3D(3, 1, 0x1c) ++ ++ ++/* for GEN9_PIPE_CONTROL */ ++#define GEN9_PIPE_CONTROL GEN9_3D(3, 2, 0) ++#define PIPE_CONTROL_CS_STALL (1 << 20) ++#define PIPE_CONTROL_NOWRITE (0 << 14) ++#define PIPE_CONTROL_WRITE_QWORD (1 << 14) ++#define PIPE_CONTROL_WRITE_DEPTH (2 << 14) ++#define PIPE_CONTROL_WRITE_TIME (3 << 14) ++#define PIPE_CONTROL_DEPTH_STALL (1 << 13) ++#define PIPE_CONTROL_WC_FLUSH (1 << 12) ++#define PIPE_CONTROL_IS_FLUSH (1 << 11) ++#define PIPE_CONTROL_TC_FLUSH (1 << 10) ++#define PIPE_CONTROL_NOTIFY_ENABLE (1 << 8) ++#define PIPE_CONTROL_FLUSH (1 << 7) ++#define PIPE_CONTROL_GLOBAL_GTT (1 << 2) ++#define PIPE_CONTROL_LOCAL_PGTT (0 << 2) ++#define PIPE_CONTROL_STALL_AT_SCOREBOARD (1 << 1) ++#define PIPE_CONTROL_DEPTH_CACHE_FLUSH (1 << 0) ++ ++ ++#define GEN9_3DPRIMITIVE GEN9_3D(3, 3, 0) ++ ++/* 3DPRIMITIVE bits */ ++#define VERTEX_SEQUENTIAL (0 << 15) ++#define VERTEX_RANDOM (1 << 15) ++ ++#define ANISORATIO_2 0 ++#define ANISORATIO_4 1 ++#define ANISORATIO_6 2 ++#define ANISORATIO_8 3 ++#define ANISORATIO_10 4 ++#define ANISORATIO_12 5 ++#define ANISORATIO_14 6 ++#define ANISORATIO_16 7 ++ ++#define BLENDFACTOR_ONE 0x1 ++#define BLENDFACTOR_SRC_COLOR 0x2 ++#define BLENDFACTOR_SRC_ALPHA 0x3 ++#define BLENDFACTOR_DST_ALPHA 0x4 ++#define BLENDFACTOR_DST_COLOR 0x5 ++#define BLENDFACTOR_SRC_ALPHA_SATURATE 0x6 ++#define BLENDFACTOR_CONST_COLOR 0x7 ++#define BLENDFACTOR_CONST_ALPHA 0x8 ++#define BLENDFACTOR_SRC1_COLOR 0x9 ++#define BLENDFACTOR_SRC1_ALPHA 0x0A ++#define BLENDFACTOR_ZERO 0x11 ++#define BLENDFACTOR_INV_SRC_COLOR 0x12 ++#define BLENDFACTOR_INV_SRC_ALPHA 0x13 ++#define BLENDFACTOR_INV_DST_ALPHA 0x14 ++#define BLENDFACTOR_INV_DST_COLOR 0x15 ++#define BLENDFACTOR_INV_CONST_COLOR 0x17 ++#define BLENDFACTOR_INV_CONST_ALPHA 0x18 ++#define BLENDFACTOR_INV_SRC1_COLOR 0x19 ++#define BLENDFACTOR_INV_SRC1_ALPHA 0x1A ++ ++#define BLENDFUNCTION_ADD 0 ++#define BLENDFUNCTION_SUBTRACT 1 ++#define BLENDFUNCTION_REVERSE_SUBTRACT 2 ++#define GEN9_BLENDFUNCTION_MIN 3 ++#define BLENDFUNCTION_MAX 4 ++ ++#define ALPHATEST_FORMAT_UNORM8 0 ++#define ALPHATEST_FORMAT_FLOAT32 1 ++ ++#define CHROMAKEY_KILL_ON_ANY_MATCH 0 ++#define CHROMAKEY_REPLACE_BLACK 1 ++ ++#define CLIP_API_OGL 0 ++#define CLIP_API_DX 1 ++ ++#define CLIPMODE_NORMAL 0 ++#define CLIPMODE_CLIP_ALL 1 ++#define CLIPMODE_CLIP_NON_REJECTED 2 ++#define CLIPMODE_REJECT_ALL 3 ++#define CLIPMODE_ACCEPT_ALL 4 ++ ++#define CLIP_NDCSPACE 0 ++#define CLIP_SCREENSPACE 1 ++ ++#define COMPAREFUNCTION_ALWAYS 0 ++#define COMPAREFUNCTION_NEVER 1 ++#define COMPAREFUNCTION_LESS 2 ++#define COMPAREFUNCTION_EQUAL 3 ++#define COMPAREFUNCTION_LEQUAL 4 ++#define COMPAREFUNCTION_GREATER 5 ++#define COMPAREFUNCTION_NOTEQUAL 6 ++#define COMPAREFUNCTION_GEQUAL 7 ++ ++#define COVERAGE_PIXELS_HALF 0 ++#define COVERAGE_PIXELS_1 1 ++#define COVERAGE_PIXELS_2 2 ++#define COVERAGE_PIXELS_4 3 ++ ++#define DEPTHFORMAT_D32_FLOAT_S8X24_UINT 0 ++#define DEPTHFORMAT_D32_FLOAT 1 ++#define DEPTHFORMAT_D24_UNORM_S8_UINT 2 ++#define DEPTHFORMAT_D16_UNORM 5 ++ ++#define FLOATING_POINT_IEEE_754 0 ++#define FLOATING_POINT_NON_IEEE_754 1 ++ ++#define INDEX_BYTE 0 ++#define INDEX_WORD 1 ++#define INDEX_DWORD 2 ++ ++#define LOGICOPFUNCTION_CLEAR 0 ++#define LOGICOPFUNCTION_NOR 1 ++#define LOGICOPFUNCTION_AND_INVERTED 2 ++#define LOGICOPFUNCTION_COPY_INVERTED 3 ++#define LOGICOPFUNCTION_AND_REVERSE 4 ++#define LOGICOPFUNCTION_INVERT 5 ++#define LOGICOPFUNCTION_XOR 6 ++#define LOGICOPFUNCTION_NAND 7 ++#define LOGICOPFUNCTION_AND 8 ++#define LOGICOPFUNCTION_EQUIV 9 ++#define LOGICOPFUNCTION_NOOP 10 ++#define LOGICOPFUNCTION_OR_INVERTED 11 ++#define LOGICOPFUNCTION_COPY 12 ++#define LOGICOPFUNCTION_OR_REVERSE 13 ++#define LOGICOPFUNCTION_OR 14 ++#define LOGICOPFUNCTION_SET 15 ++ ++#define MAPFILTER_NEAREST 0x0 ++#define MAPFILTER_LINEAR 0x1 ++#define MAPFILTER_ANISOTROPIC 0x2 ++#define MAPFILTER_FLEXIBLE 0x3 ++#define MAPFILTER_MONO 0x6 ++ ++#define MIPFILTER_NONE 0 ++#define MIPFILTER_NEAREST 1 ++#define MIPFILTER_LINEAR 3 ++ ++#define POLYGON_FRONT_FACING 0 ++#define POLYGON_BACK_FACING 1 ++ ++#define PREFILTER_ALWAYS 0x0 ++#define PREFILTER_NEVER 0x1 ++#define PREFILTER_LESS 0x2 ++#define PREFILTER_EQUAL 0x3 ++#define PREFILTER_LEQUAL 0x4 ++#define PREFILTER_GREATER 0x5 ++#define PREFILTER_NOTEQUAL 0x6 ++#define PREFILTER_GEQUAL 0x7 ++ ++#define RASTRULE_UPPER_LEFT 0 ++#define RASTRULE_UPPER_RIGHT 1 ++ ++#define STENCILOP_KEEP 0 ++#define STENCILOP_ZERO 1 ++#define STENCILOP_REPLACE 2 ++#define STENCILOP_INCRSAT 3 ++#define STENCILOP_DECRSAT 4 ++#define STENCILOP_INCR 5 ++#define STENCILOP_DECR 6 ++#define STENCILOP_INVERT 7 ++ ++#define SURFACE_MIPMAPLAYOUT_BELOW 0 ++#define SURFACE_MIPMAPLAYOUT_RIGHT 1 ++ ++#define SURFACEFORMAT_R32G32B32A32_FLOAT 0x000 ++#define SURFACEFORMAT_R32G32B32A32_SINT 0x001 ++#define SURFACEFORMAT_R32G32B32A32_UINT 0x002 ++#define SURFACEFORMAT_R32G32B32A32_UNORM 0x003 ++#define SURFACEFORMAT_R32G32B32A32_SNORM 0x004 ++#define SURFACEFORMAT_R64G64_FLOAT 0x005 ++#define SURFACEFORMAT_R32G32B32X32_FLOAT 0x006 ++#define SURFACEFORMAT_R32G32B32A32_SSCALED 0x007 ++#define SURFACEFORMAT_R32G32B32A32_USCALED 0x008 ++#define SURFACEFORMAT_R32G32B32_FLOAT 0x040 ++#define SURFACEFORMAT_R32G32B32_SINT 0x041 ++#define SURFACEFORMAT_R32G32B32_UINT 0x042 ++#define SURFACEFORMAT_R32G32B32_UNORM 0x043 ++#define SURFACEFORMAT_R32G32B32_SNORM 0x044 ++#define SURFACEFORMAT_R32G32B32_SSCALED 0x045 ++#define SURFACEFORMAT_R32G32B32_USCALED 0x046 ++#define SURFACEFORMAT_R16G16B16A16_UNORM 0x080 ++#define SURFACEFORMAT_R16G16B16A16_SNORM 0x081 ++#define SURFACEFORMAT_R16G16B16A16_SINT 0x082 ++#define SURFACEFORMAT_R16G16B16A16_UINT 0x083 ++#define SURFACEFORMAT_R16G16B16A16_FLOAT 0x084 ++#define SURFACEFORMAT_R32G32_FLOAT 0x085 ++#define SURFACEFORMAT_R32G32_SINT 0x086 ++#define SURFACEFORMAT_R32G32_UINT 0x087 ++#define SURFACEFORMAT_R32_FLOAT_X8X24_TYPELESS 0x088 ++#define SURFACEFORMAT_X32_TYPELESS_G8X24_UINT 0x089 ++#define SURFACEFORMAT_L32A32_FLOAT 0x08A ++#define SURFACEFORMAT_R32G32_UNORM 0x08B ++#define SURFACEFORMAT_R32G32_SNORM 0x08C ++#define SURFACEFORMAT_R64_FLOAT 0x08D ++#define SURFACEFORMAT_R16G16B16X16_UNORM 0x08E ++#define SURFACEFORMAT_R16G16B16X16_FLOAT 0x08F ++#define SURFACEFORMAT_A32X32_FLOAT 0x090 ++#define SURFACEFORMAT_L32X32_FLOAT 0x091 ++#define SURFACEFORMAT_I32X32_FLOAT 0x092 ++#define SURFACEFORMAT_R16G16B16A16_SSCALED 0x093 ++#define SURFACEFORMAT_R16G16B16A16_USCALED 0x094 ++#define SURFACEFORMAT_R32G32_SSCALED 0x095 ++#define SURFACEFORMAT_R32G32_USCALED 0x096 ++#define SURFACEFORMAT_B8G8R8A8_UNORM 0x0C0 ++#define SURFACEFORMAT_B8G8R8A8_UNORM_SRGB 0x0C1 ++#define SURFACEFORMAT_R10G10B10A2_UNORM 0x0C2 ++#define SURFACEFORMAT_R10G10B10A2_UNORM_SRGB 0x0C3 ++#define SURFACEFORMAT_R10G10B10A2_UINT 0x0C4 ++#define SURFACEFORMAT_R10G10B10_SNORM_A2_UNORM 0x0C5 ++#define SURFACEFORMAT_R8G8B8A8_UNORM 0x0C7 ++#define SURFACEFORMAT_R8G8B8A8_UNORM_SRGB 0x0C8 ++#define SURFACEFORMAT_R8G8B8A8_SNORM 0x0C9 ++#define SURFACEFORMAT_R8G8B8A8_SINT 0x0CA ++#define SURFACEFORMAT_R8G8B8A8_UINT 0x0CB ++#define SURFACEFORMAT_R16G16_UNORM 0x0CC ++#define SURFACEFORMAT_R16G16_SNORM 0x0CD ++#define SURFACEFORMAT_R16G16_SINT 0x0CE ++#define SURFACEFORMAT_R16G16_UINT 0x0CF ++#define SURFACEFORMAT_R16G16_FLOAT 0x0D0 ++#define SURFACEFORMAT_B10G10R10A2_UNORM 0x0D1 ++#define SURFACEFORMAT_B10G10R10A2_UNORM_SRGB 0x0D2 ++#define SURFACEFORMAT_R11G11B10_FLOAT 0x0D3 ++#define SURFACEFORMAT_R32_SINT 0x0D6 ++#define SURFACEFORMAT_R32_UINT 0x0D7 ++#define SURFACEFORMAT_R32_FLOAT 0x0D8 ++#define SURFACEFORMAT_R24_UNORM_X8_TYPELESS 0x0D9 ++#define SURFACEFORMAT_X24_TYPELESS_G8_UINT 0x0DA ++#define SURFACEFORMAT_L16A16_UNORM 0x0DF ++#define SURFACEFORMAT_I24X8_UNORM 0x0E0 ++#define SURFACEFORMAT_L24X8_UNORM 0x0E1 ++#define SURFACEFORMAT_A24X8_UNORM 0x0E2 ++#define SURFACEFORMAT_I32_FLOAT 0x0E3 ++#define SURFACEFORMAT_L32_FLOAT 0x0E4 ++#define SURFACEFORMAT_A32_FLOAT 0x0E5 ++#define SURFACEFORMAT_B8G8R8X8_UNORM 0x0E9 ++#define SURFACEFORMAT_B8G8R8X8_UNORM_SRGB 0x0EA ++#define SURFACEFORMAT_R8G8B8X8_UNORM 0x0EB ++#define SURFACEFORMAT_R8G8B8X8_UNORM_SRGB 0x0EC ++#define SURFACEFORMAT_R9G9B9E5_SHAREDEXP 0x0ED ++#define SURFACEFORMAT_B10G10R10X2_UNORM 0x0EE ++#define SURFACEFORMAT_L16A16_FLOAT 0x0F0 ++#define SURFACEFORMAT_R32_UNORM 0x0F1 ++#define SURFACEFORMAT_R32_SNORM 0x0F2 ++#define SURFACEFORMAT_R10G10B10X2_USCALED 0x0F3 ++#define SURFACEFORMAT_R8G8B8A8_SSCALED 0x0F4 ++#define SURFACEFORMAT_R8G8B8A8_USCALED 0x0F5 ++#define SURFACEFORMAT_R16G16_SSCALED 0x0F6 ++#define SURFACEFORMAT_R16G16_USCALED 0x0F7 ++#define SURFACEFORMAT_R32_SSCALED 0x0F8 ++#define SURFACEFORMAT_R32_USCALED 0x0F9 ++#define SURFACEFORMAT_B5G6R5_UNORM 0x100 ++#define SURFACEFORMAT_B5G6R5_UNORM_SRGB 0x101 ++#define SURFACEFORMAT_B5G5R5A1_UNORM 0x102 ++#define SURFACEFORMAT_B5G5R5A1_UNORM_SRGB 0x103 ++#define SURFACEFORMAT_B4G4R4A4_UNORM 0x104 ++#define SURFACEFORMAT_B4G4R4A4_UNORM_SRGB 0x105 ++#define SURFACEFORMAT_R8G8_UNORM 0x106 ++#define SURFACEFORMAT_R8G8_SNORM 0x107 ++#define SURFACEFORMAT_R8G8_SINT 0x108 ++#define SURFACEFORMAT_R8G8_UINT 0x109 ++#define SURFACEFORMAT_R16_UNORM 0x10A ++#define SURFACEFORMAT_R16_SNORM 0x10B ++#define SURFACEFORMAT_R16_SINT 0x10C ++#define SURFACEFORMAT_R16_UINT 0x10D ++#define SURFACEFORMAT_R16_FLOAT 0x10E ++#define SURFACEFORMAT_I16_UNORM 0x111 ++#define SURFACEFORMAT_L16_UNORM 0x112 ++#define SURFACEFORMAT_A16_UNORM 0x113 ++#define SURFACEFORMAT_L8A8_UNORM 0x114 ++#define SURFACEFORMAT_I16_FLOAT 0x115 ++#define SURFACEFORMAT_L16_FLOAT 0x116 ++#define SURFACEFORMAT_A16_FLOAT 0x117 ++#define SURFACEFORMAT_R5G5_SNORM_B6_UNORM 0x119 ++#define SURFACEFORMAT_B5G5R5X1_UNORM 0x11A ++#define SURFACEFORMAT_B5G5R5X1_UNORM_SRGB 0x11B ++#define SURFACEFORMAT_R8G8_SSCALED 0x11C ++#define SURFACEFORMAT_R8G8_USCALED 0x11D ++#define SURFACEFORMAT_R16_SSCALED 0x11E ++#define SURFACEFORMAT_R16_USCALED 0x11F ++#define SURFACEFORMAT_R8_UNORM 0x140 ++#define SURFACEFORMAT_R8_SNORM 0x141 ++#define SURFACEFORMAT_R8_SINT 0x142 ++#define SURFACEFORMAT_R8_UINT 0x143 ++#define SURFACEFORMAT_A8_UNORM 0x144 ++#define SURFACEFORMAT_I8_UNORM 0x145 ++#define SURFACEFORMAT_L8_UNORM 0x146 ++#define SURFACEFORMAT_P4A4_UNORM 0x147 ++#define SURFACEFORMAT_A4P4_UNORM 0x148 ++#define SURFACEFORMAT_R8_SSCALED 0x149 ++#define SURFACEFORMAT_R8_USCALED 0x14A ++#define SURFACEFORMAT_R1_UINT 0x181 ++#define SURFACEFORMAT_YCRCB_NORMAL 0x182 ++#define SURFACEFORMAT_YCRCB_SWAPUVY 0x183 ++#define SURFACEFORMAT_BC1_UNORM 0x186 ++#define SURFACEFORMAT_BC2_UNORM 0x187 ++#define SURFACEFORMAT_BC3_UNORM 0x188 ++#define SURFACEFORMAT_BC4_UNORM 0x189 ++#define SURFACEFORMAT_BC5_UNORM 0x18A ++#define SURFACEFORMAT_BC1_UNORM_SRGB 0x18B ++#define SURFACEFORMAT_BC2_UNORM_SRGB 0x18C ++#define SURFACEFORMAT_BC3_UNORM_SRGB 0x18D ++#define SURFACEFORMAT_MONO8 0x18E ++#define SURFACEFORMAT_YCRCB_SWAPUV 0x18F ++#define SURFACEFORMAT_YCRCB_SWAPY 0x190 ++#define SURFACEFORMAT_DXT1_RGB 0x191 ++#define SURFACEFORMAT_FXT1 0x192 ++#define SURFACEFORMAT_R8G8B8_UNORM 0x193 ++#define SURFACEFORMAT_R8G8B8_SNORM 0x194 ++#define SURFACEFORMAT_R8G8B8_SSCALED 0x195 ++#define SURFACEFORMAT_R8G8B8_USCALED 0x196 ++#define SURFACEFORMAT_R64G64B64A64_FLOAT 0x197 ++#define SURFACEFORMAT_R64G64B64_FLOAT 0x198 ++#define SURFACEFORMAT_BC4_SNORM 0x199 ++#define SURFACEFORMAT_BC5_SNORM 0x19A ++#define SURFACEFORMAT_R16G16B16_UNORM 0x19C ++#define SURFACEFORMAT_R16G16B16_SNORM 0x19D ++#define SURFACEFORMAT_R16G16B16_SSCALED 0x19E ++#define SURFACEFORMAT_R16G16B16_USCALED 0x19F ++ ++#define SURFACE_1D 0 ++#define SURFACE_2D 1 ++#define SURFACE_3D 2 ++#define SURFACE_CUBE 3 ++#define SURFACE_BUFFER 4 ++#define SURFACE_NULL 7 ++ ++#define TEXCOORDMODE_WRAP 0 ++#define TEXCOORDMODE_MIRROR 1 ++#define TEXCOORDMODE_CLAMP 2 ++#define TEXCOORDMODE_CUBE 3 ++#define TEXCOORDMODE_CLAMP_BORDER 4 ++#define TEXCOORDMODE_MIRROR_ONCE 5 ++ ++#define THREAD_PRIORITY_NORMAL 0 ++#define THREAD_PRIORITY_HIGH 1 ++ ++#define VERTEX_SUBPIXEL_PRECISION_8BITS 0 ++#define VERTEX_SUBPIXEL_PRECISION_4BITS 1 ++ ++#define COMPONENT_NOSTORE 0 ++#define COMPONENT_STORE_SRC 1 ++#define COMPONENT_STORE_0 2 ++#define COMPONENT_STORE_1_FLT 3 ++#define COMPONENT_STORE_1_INT 4 ++#define COMPONENT_STORE_VID 5 ++#define COMPONENT_STORE_IID 6 ++#define COMPONENT_STORE_PID 7 ++ ++/* Execution Unit (EU) defines ++ */ ++ ++#define GEN9_ALIGN_1 0 ++#define GEN9_ALIGN_16 1 ++ ++#define GEN9_ADDRESS_DIRECT 0 ++#define GEN9_ADDRESS_REGISTER_INDIRECT_REGISTER 1 ++ ++#define GEN9_CHANNEL_X 0 ++#define GEN9_CHANNEL_Y 1 ++#define GEN9_CHANNEL_Z 2 ++#define GEN9_CHANNEL_W 3 ++ ++#define GEN9_COMPRESSION_NONE 0 ++#define GEN9_COMPRESSION_2NDHALF 1 ++#define GEN9_COMPRESSION_COMPRESSED 2 ++ ++#define GEN9_CONDITIONAL_NONE 0 ++#define GEN9_CONDITIONAL_Z 1 ++#define GEN9_CONDITIONAL_NZ 2 ++#define GEN9_CONDITIONAL_EQ 1 /* Z */ ++#define GEN9_CONDITIONAL_NEQ 2 /* NZ */ ++#define GEN9_CONDITIONAL_G 3 ++#define GEN9_CONDITIONAL_GE 4 ++#define GEN9_CONDITIONAL_L 5 ++#define GEN9_CONDITIONAL_LE 6 ++#define GEN9_CONDITIONAL_C 7 ++#define GEN9_CONDITIONAL_O 8 ++ ++#define GEN9_DEBUG_NONE 0 ++#define GEN9_DEBUG_BREAKPOINT 1 ++ ++#define GEN9_DEPENDENCY_NORMAL 0 ++#define GEN9_DEPENDENCY_NOTCLEARED 1 ++#define GEN9_DEPENDENCY_NOTCHECKED 2 ++#define GEN9_DEPENDENCY_DISABLE 3 ++ ++#define GEN9_EXECUTE_1 0 ++#define GEN9_EXECUTE_2 1 ++#define GEN9_EXECUTE_4 2 ++#define GEN9_EXECUTE_8 3 ++#define GEN9_EXECUTE_16 4 ++#define GEN9_EXECUTE_32 5 ++ ++#define GEN9_HORIZONTAL_STRIDE_0 0 ++#define GEN9_HORIZONTAL_STRIDE_1 1 ++#define GEN9_HORIZONTAL_STRIDE_2 2 ++#define GEN9_HORIZONTAL_STRIDE_4 3 ++ ++#define GEN9_INSTRUCTION_NORMAL 0 ++#define GEN9_INSTRUCTION_SATURATE 1 ++ ++#define GEN9_OPCODE_MOV 1 ++#define GEN9_OPCODE_SEL 2 ++#define GEN9_OPCODE_NOT 4 ++#define GEN9_OPCODE_AND 5 ++#define GEN9_OPCODE_OR 6 ++#define GEN9_OPCODE_XOR 7 ++#define GEN9_OPCODE_SHR 8 ++#define GEN9_OPCODE_SHL 9 ++#define GEN9_OPCODE_RSR 10 ++#define GEN9_OPCODE_RSL 11 ++#define GEN9_OPCODE_ASR 12 ++#define GEN9_OPCODE_CMP 16 ++#define GEN9_OPCODE_JMPI 32 ++#define GEN9_OPCODE_IF 34 ++#define GEN9_OPCODE_IFF 35 ++#define GEN9_OPCODE_ELSE 36 ++#define GEN9_OPCODE_ENDIF 37 ++#define GEN9_OPCODE_DO 38 ++#define GEN9_OPCODE_WHILE 39 ++#define GEN9_OPCODE_BREAK 40 ++#define GEN9_OPCODE_CONTINUE 41 ++#define GEN9_OPCODE_HALT 42 ++#define GEN9_OPCODE_MSAVE 44 ++#define GEN9_OPCODE_MRESTORE 45 ++#define GEN9_OPCODE_PUSH 46 ++#define GEN9_OPCODE_POP 47 ++#define GEN9_OPCODE_WAIT 48 ++#define GEN9_OPCODE_SEND 49 ++#define GEN9_OPCODE_ADD 64 ++#define GEN9_OPCODE_MUL 65 ++#define GEN9_OPCODE_AVG 66 ++#define GEN9_OPCODE_FRC 67 ++#define GEN9_OPCODE_RNDU 68 ++#define GEN9_OPCODE_RNDD 69 ++#define GEN9_OPCODE_RNDE 70 ++#define GEN9_OPCODE_RNDZ 71 ++#define GEN9_OPCODE_MAC 72 ++#define GEN9_OPCODE_MACH 73 ++#define GEN9_OPCODE_LZD 74 ++#define GEN9_OPCODE_SAD2 80 ++#define GEN9_OPCODE_SADA2 81 ++#define GEN9_OPCODE_DP4 84 ++#define GEN9_OPCODE_DPH 85 ++#define GEN9_OPCODE_DP3 86 ++#define GEN9_OPCODE_DP2 87 ++#define GEN9_OPCODE_DPA2 88 ++#define GEN9_OPCODE_LINE 89 ++#define GEN9_OPCODE_NOP 126 ++ ++#define GEN9_PREDICATE_NONE 0 ++#define GEN9_PREDICATE_NORMAL 1 ++#define GEN9_PREDICATE_ALIGN1_ANYV 2 ++#define GEN9_PREDICATE_ALIGN1_ALLV 3 ++#define GEN9_PREDICATE_ALIGN1_ANY2H 4 ++#define GEN9_PREDICATE_ALIGN1_ALL2H 5 ++#define GEN9_PREDICATE_ALIGN1_ANY4H 6 ++#define GEN9_PREDICATE_ALIGN1_ALL4H 7 ++#define GEN9_PREDICATE_ALIGN1_ANY8H 8 ++#define GEN9_PREDICATE_ALIGN1_ALL8H 9 ++#define GEN9_PREDICATE_ALIGN1_ANY16H 10 ++#define GEN9_PREDICATE_ALIGN1_ALL16H 11 ++#define GEN9_PREDICATE_ALIGN16_REPLICATE_X 2 ++#define GEN9_PREDICATE_ALIGN16_REPLICATE_Y 3 ++#define GEN9_PREDICATE_ALIGN16_REPLICATE_Z 4 ++#define GEN9_PREDICATE_ALIGN16_REPLICATE_W 5 ++#define GEN9_PREDICATE_ALIGN16_ANY4H 6 ++#define GEN9_PREDICATE_ALIGN16_ALL4H 7 ++ ++#define GEN9_ARCHITECTURE_REGISTER_FILE 0 ++#define GEN9_GENERAL_REGISTER_FILE 1 ++#define GEN9_MESSAGE_REGISTER_FILE 2 ++#define GEN9_IMMEDIATE_VALUE 3 ++ ++#define GEN9_REGISTER_TYPE_UD 0 ++#define GEN9_REGISTER_TYPE_D 1 ++#define GEN9_REGISTER_TYPE_UW 2 ++#define GEN9_REGISTER_TYPE_W 3 ++#define GEN9_REGISTER_TYPE_UB 4 ++#define GEN9_REGISTER_TYPE_B 5 ++#define GEN9_REGISTER_TYPE_VF 5 /* packed float vector, immediates only? */ ++#define GEN9_REGISTER_TYPE_HF 6 ++#define GEN9_REGISTER_TYPE_V 6 /* packed int vector, immediates only, uword dest only */ ++#define GEN9_REGISTER_TYPE_F 7 ++ ++#define GEN9_ARF_NULL 0x00 ++#define GEN9_ARF_ADDRESS 0x10 ++#define GEN9_ARF_ACCUMULATOR 0x20 ++#define GEN9_ARF_FLAG 0x30 ++#define GEN9_ARF_MASK 0x40 ++#define GEN9_ARF_MASK_STACK 0x50 ++#define GEN9_ARF_MASK_STACK_DEPTH 0x60 ++#define GEN9_ARF_STATE 0x70 ++#define GEN9_ARF_CONTROL 0x80 ++#define GEN9_ARF_NOTIFICATION_COUNT 0x90 ++#define GEN9_ARF_IP 0xA0 ++ ++#define GEN9_AMASK 0 ++#define GEN9_IMASK 1 ++#define GEN9_LMASK 2 ++#define GEN9_CMASK 3 ++ ++#define GEN9_THREAD_NORMAL 0 ++#define GEN9_THREAD_ATOMIC 1 ++#define GEN9_THREAD_SWITCH 2 ++ ++#define GEN9_VERTICAL_STRIDE_0 0 ++#define GEN9_VERTICAL_STRIDE_1 1 ++#define GEN9_VERTICAL_STRIDE_2 2 ++#define GEN9_VERTICAL_STRIDE_4 3 ++#define GEN9_VERTICAL_STRIDE_8 4 ++#define GEN9_VERTICAL_STRIDE_16 5 ++#define GEN9_VERTICAL_STRIDE_32 6 ++#define GEN9_VERTICAL_STRIDE_64 7 ++#define GEN9_VERTICAL_STRIDE_128 8 ++#define GEN9_VERTICAL_STRIDE_256 9 ++#define GEN9_VERTICAL_STRIDE_ONE_DIMENSIONAL 0xF ++ ++#define GEN9_WIDTH_1 0 ++#define GEN9_WIDTH_2 1 ++#define GEN9_WIDTH_4 2 ++#define GEN9_WIDTH_8 3 ++#define GEN9_WIDTH_16 4 ++ ++#define GEN9_STATELESS_BUFFER_BOUNDARY_1K 0 ++#define GEN9_STATELESS_BUFFER_BOUNDARY_2K 1 ++#define GEN9_STATELESS_BUFFER_BOUNDARY_4K 2 ++#define GEN9_STATELESS_BUFFER_BOUNDARY_8K 3 ++#define GEN9_STATELESS_BUFFER_BOUNDARY_16K 4 ++#define GEN9_STATELESS_BUFFER_BOUNDARY_32K 5 ++#define GEN9_STATELESS_BUFFER_BOUNDARY_64K 6 ++#define GEN9_STATELESS_BUFFER_BOUNDARY_128K 7 ++#define GEN9_STATELESS_BUFFER_BOUNDARY_256K 8 ++#define GEN9_STATELESS_BUFFER_BOUNDARY_512K 9 ++#define GEN9_STATELESS_BUFFER_BOUNDARY_1M 10 ++#define GEN9_STATELESS_BUFFER_BOUNDARY_2M 11 ++ ++#define GEN9_POLYGON_FACING_FRONT 0 ++#define GEN9_POLYGON_FACING_BACK 1 ++ ++#define GEN9_MESSAGE_TARGET_NULL 0 ++#define GEN9_MESSAGE_TARGET_MATH 1 ++#define GEN9_MESSAGE_TARGET_SAMPLER 2 ++#define GEN9_MESSAGE_TARGET_GATEWAY 3 ++#define GEN9_MESSAGE_TARGET_DATAPORT_READ 4 ++#define GEN9_MESSAGE_TARGET_DATAPORT_WRITE 5 ++#define GEN9_MESSAGE_TARGET_URB 6 ++#define GEN9_MESSAGE_TARGET_THREAD_SPAWNER 7 ++ ++#define GEN9_SAMPLER_RETURN_FORMAT_FLOAT32 0 ++#define GEN9_SAMPLER_RETURN_FORMAT_UINT32 2 ++#define GEN9_SAMPLER_RETURN_FORMAT_SINT32 3 ++ ++#define GEN9_SAMPLER_MESSAGE_SIMD8_SAMPLE 0 ++#define GEN9_SAMPLER_MESSAGE_SIMD16_SAMPLE 0 ++#define GEN9_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS 0 ++#define GEN9_SAMPLER_MESSAGE_SIMD8_KILLPIX 1 ++#define GEN9_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_LOD 1 ++#define GEN9_SAMPLER_MESSAGE_SIMD16_SAMPLE_LOD 1 ++#define GEN9_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_GRADIENTS 2 ++#define GEN9_SAMPLER_MESSAGE_SIMD8_SAMPLE_GRADIENTS 2 ++#define GEN9_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_COMPARE 0 ++#define GEN9_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE 2 ++#define GEN9_SAMPLER_MESSAGE_SIMD4X2_RESINFO 2 ++#define GEN9_SAMPLER_MESSAGE_SIMD8_RESINFO 2 ++#define GEN9_SAMPLER_MESSAGE_SIMD16_RESINFO 2 ++#define GEN9_SAMPLER_MESSAGE_SIMD4X2_LD 3 ++#define GEN9_SAMPLER_MESSAGE_SIMD8_LD 3 ++#define GEN9_SAMPLER_MESSAGE_SIMD16_LD 3 ++ ++#define GEN9_DATAPORT_OWORD_BLOCK_1_OWORDLOW 0 ++#define GEN9_DATAPORT_OWORD_BLOCK_1_OWORDHIGH 1 ++#define GEN9_DATAPORT_OWORD_BLOCK_2_OWORDS 2 ++#define GEN9_DATAPORT_OWORD_BLOCK_4_OWORDS 3 ++#define GEN9_DATAPORT_OWORD_BLOCK_8_OWORDS 4 ++ ++#define GEN9_DATAPORT_OWORD_DUAL_BLOCK_1OWORD 0 ++#define GEN9_DATAPORT_OWORD_DUAL_BLOCK_4OWORDS 2 ++ ++#define GEN9_DATAPORT_DWORD_SCATTERED_BLOCK_8DWORDS 2 ++#define GEN9_DATAPORT_DWORD_SCATTERED_BLOCK_16DWORDS 3 ++ ++#define GEN9_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ 0 ++#define GEN9_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ 1 ++#define GEN9_DATAPORT_READ_MESSAGE_DWORD_BLOCK_READ 2 ++#define GEN9_DATAPORT_READ_MESSAGE_DWORD_SCATTERED_READ 3 ++ ++#define GEN9_DATAPORT_READ_TARGET_DATA_CACHE 0 ++#define GEN9_DATAPORT_READ_TARGET_RENDER_CACHE 1 ++#define GEN9_DATAPORT_READ_TARGET_SAMPLER_CACHE 2 ++ ++#define GEN9_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE 0 ++#define GEN9_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE_REPLICATED 1 ++#define GEN9_DATAPORT_RENDER_TARGET_WRITE_SIMD8_DUAL_SOURCE_SUBSPAN01 2 ++#define GEN9_DATAPORT_RENDER_TARGET_WRITE_SIMD8_DUAL_SOURCE_SUBSPAN23 3 ++#define GEN9_DATAPORT_RENDER_TARGET_WRITE_SIMD8_SINGLE_SOURCE_SUBSPAN01 4 ++ ++#define GEN9_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE 0 ++#define GEN9_DATAPORT_WRITE_MESSAGE_OWORD_DUAL_BLOCK_WRITE 1 ++#define GEN9_DATAPORT_WRITE_MESSAGE_DWORD_BLOCK_WRITE 2 ++#define GEN9_DATAPORT_WRITE_MESSAGE_DWORD_SCATTERED_WRITE 3 ++#define GEN9_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE 4 ++#define GEN9_DATAPORT_WRITE_MESSAGE_STREAMED_VERTEX_BUFFER_WRITE 5 ++#define GEN9_DATAPORT_WRITE_MESSAGE_FLUSH_RENDER_CACHE 7 ++ ++#define GEN9_MATH_FUNCTION_INV 1 ++#define GEN9_MATH_FUNCTION_LOG 2 ++#define GEN9_MATH_FUNCTION_EXP 3 ++#define GEN9_MATH_FUNCTION_SQRT 4 ++#define GEN9_MATH_FUNCTION_RSQ 5 ++#define GEN9_MATH_FUNCTION_SIN 6 /* was 7 */ ++#define GEN9_MATH_FUNCTION_COS 7 /* was 8 */ ++#define GEN9_MATH_FUNCTION_SINCOS 8 /* was 6 */ ++#define GEN9_MATH_FUNCTION_TAN 9 ++#define GEN9_MATH_FUNCTION_POW 10 ++#define GEN9_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER 11 ++#define GEN9_MATH_FUNCTION_INT_DIV_QUOTIENT 12 ++#define GEN9_MATH_FUNCTION_INT_DIV_REMAINDER 13 ++ ++#define GEN9_MATH_INTEGER_UNSIGNED 0 ++#define GEN9_MATH_INTEGER_SIGNED 1 ++ ++#define GEN9_MATH_PRECISION_FULL 0 ++#define GEN9_MATH_PRECISION_PARTIAL 1 ++ ++#define GEN9_MATH_SATURATE_NONE 0 ++#define GEN9_MATH_SATURATE_SATURATE 1 ++ ++#define GEN9_MATH_DATA_VECTOR 0 ++#define GEN9_MATH_DATA_SCALAR 1 ++ ++#define GEN9_URB_OPCODE_WRITE 0 ++ ++#define GEN9_URB_SWIZZLE_NONE 0 ++#define GEN9_URB_SWIZZLE_INTERLEAVE 1 ++#define GEN9_URB_SWIZZLE_TRANSPOSE 2 ++ ++#define GEN9_SCRATCH_SPACE_SIZE_1K 0 ++#define GEN9_SCRATCH_SPACE_SIZE_2K 1 ++#define GEN9_SCRATCH_SPACE_SIZE_4K 2 ++#define GEN9_SCRATCH_SPACE_SIZE_8K 3 ++#define GEN9_SCRATCH_SPACE_SIZE_16K 4 ++#define GEN9_SCRATCH_SPACE_SIZE_32K 5 ++#define GEN9_SCRATCH_SPACE_SIZE_64K 6 ++#define GEN9_SCRATCH_SPACE_SIZE_128K 7 ++#define GEN9_SCRATCH_SPACE_SIZE_256K 8 ++#define GEN9_SCRATCH_SPACE_SIZE_512K 9 ++#define GEN9_SCRATCH_SPACE_SIZE_1M 10 ++#define GEN9_SCRATCH_SPACE_SIZE_2M 11 ++ ++struct gen9_blend_state { ++ struct { ++ /* 00 */ uint32_t pad:19; ++ /* 19 */ uint32_t y_dither_offset:2; ++ /* 21 */ uint32_t x_dither_offset:2; ++ /* 23 */ uint32_t color_dither_enable:1; ++ /* 24 */ uint32_t alpha_test_function:3; ++ /* 27 */ uint32_t alpha_test:1; ++ /* 28 */ uint32_t alpha_to_coverage_dither:1; ++ /* 29 */ uint32_t alpha_to_one:1; ++ /* 30 */ uint32_t ia_blend:1; ++ /* 31 */ uint32_t alpha_to_coverage:1; ++ } common; ++ ++ struct { ++ /* 00 */ uint32_t write_disable_blue:1; ++ /* 01 */ uint32_t write_disable_green:1; ++ /* 02 */ uint32_t write_disable_red:1; ++ /* 03 */ uint32_t write_disable_alpha:1; ++ /* 04 */ uint32_t pad0:1; ++ /* 05 */ uint32_t alpha_blend_function:3; ++ /* 08 */ uint32_t dest_alpha_blend_factor:5; ++ /* 13 */ uint32_t source_alpha_blend_factor:5; ++ /* 18 */ uint32_t color_blend_function:3; ++ /* 21 */ uint32_t dest_blend_factor:5; ++ /* 26 */ uint32_t source_blend_factor:5; ++ /* 31 */ uint32_t color_blend:1; ++ /* 32 */ uint32_t post_blend_clamp:1; ++ /* 33 */ uint32_t pre_blend_clamp:1; ++ /* 34 */ uint32_t color_clamp_range:2; ++ /* 36 */ uint32_t pre_blend_source_only_clamp:1; ++ /* 37 */ uint32_t pad1:22; ++ /* 59 */ uint32_t logic_op_function:4; ++ /* 63 */ uint32_t logic_op:1; ++ } rt; ++}; ++ ++struct gen9_color_calc_state { ++ struct { ++ /* 00 */ uint32_t alpha_test_format:1; ++ /* 01 */ uint32_t pad0:14; ++ /* 15 */ uint32_t round_disable:1; ++ /* 16 */ uint32_t bf_stencil_ref:8; ++ /* 24 */ uint32_t stencil_ref:8; ++ } cc0; ++ ++ union { ++ float alpha_ref_f; ++ struct { ++ uint32_t ui:8; ++ uint32_t pad0:24; ++ } alpha_ref_fi; ++ } cc1; ++ ++ float constant_r; ++ float constant_g; ++ float constant_b; ++ float constant_a; ++}; ++ ++struct gen9_sampler_state { ++ struct { ++ /* 00 */ unsigned int aniso_algorithm:1; ++ /* 01 */ unsigned int lod_bias:13; ++ /* 14 */ unsigned int min_filter:3; ++ /* 17 */ unsigned int mag_filter:3; ++ /* 20 */ unsigned int mip_filter:2; ++ /* 22 */ unsigned int base_level:5; ++ /* 27 */ unsigned int lod_preclamp:2; ++ /* 29 */ unsigned int default_color_mode:1; ++ /* 30 */ unsigned int flexible_filter_clamp:1; ++ /* 31 */ unsigned int disable:1; ++ } ss0; ++ ++ struct { ++ /* 00 */ unsigned int cube_control_mode:1; ++ /* 01 */ unsigned int shadow_function:3; ++ /* 04 */ unsigned int chroma_key_mode:1; ++ /* 05 */ unsigned int chroma_key_index:2; ++ /* 07 */ unsigned int chroma_key_enable:1; ++ /* 08 */ unsigned int max_lod:12; ++ /* 20 */ unsigned int min_lod:12; ++ } ss1; ++ ++ struct { ++ unsigned int pad:6; ++ unsigned int default_color_pointer:26; ++ } ss2; ++ ++ struct { ++ /* 00 */ unsigned int r_wrap_mode:3; ++ /* 03 */ unsigned int t_wrap_mode:3; ++ /* 06 */ unsigned int s_wrap_mode:3; ++ /* 09 */ unsigned int pad:1; ++ /* 10 */ unsigned int non_normalized_coord:1; ++ /* 11 */ unsigned int trilinear_quality:2; ++ /* 13 */ unsigned int address_round:6; ++ /* 19 */ unsigned int max_aniso:3; ++ /* 22 */ unsigned int pad0:2; ++ /* 24 */ unsigned int non_separable_filter:8; ++ } ss3; ++}; ++ ++/* Surface state DW0 */ ++#define SURFACE_RC_READ_WRITE (1 << 8) ++#define SURFACE_TILED (1 << 13) ++#define SURFACE_TILED_Y (1 << 12) ++#define SURFACE_FORMAT_SHIFT 18 ++#define SURFACE_VALIGN_1 (0 << 16) /* reserved! */ ++#define SURFACE_VALIGN_4 (1 << 16) ++#define SURFACE_VALIGN_8 (2 << 16) ++#define SURFACE_VALIGN_16 (3 << 16) ++#define SURFACE_HALIGN_1 (0 << 14) /* reserved! */ ++#define SURFACE_HALIGN_4 (1 << 14) ++#define SURFACE_HALIGN_8 (2 << 14) ++#define SURFACE_HALIGN_16 (3 << 14) ++#define SURFACE_TYPE_SHIFT 29 ++ ++/* Surface state DW2 */ ++#define SURFACE_HEIGHT_SHIFT 16 ++#define SURFACE_WIDTH_SHIFT 0 ++ ++/* Surface state DW3 */ ++#define SURFACE_DEPTH_SHIFT 21 ++#define SURFACE_PITCH_SHIFT 0 ++ ++#define SWIZZLE_ZERO 0 ++#define SWIZZLE_ONE 1 ++#define SWIZZLE_RED 4 ++#define SWIZZLE_GREEN 5 ++#define SWIZZLE_BLUE 6 ++#define SWIZZLE_ALPHA 7 ++#define __SURFACE_SWIZZLE(r,g,b,a) \ ++ ((a) << 16 | (b) << 19 | (g) << 22 | (r) << 25) ++#define SURFACE_SWIZZLE(r,g,b,a) \ ++ __SURFACE_SWIZZLE(SWIZZLE_##r, SWIZZLE_##g, SWIZZLE_##b, SWIZZLE_##a) ++ ++typedef enum { ++ SAMPLER_FILTER_NEAREST = 0, ++ SAMPLER_FILTER_BILINEAR, ++ FILTER_COUNT ++} sampler_filter_t; ++ ++typedef enum { ++ SAMPLER_EXTEND_NONE = 0, ++ SAMPLER_EXTEND_REPEAT, ++ SAMPLER_EXTEND_PAD, ++ SAMPLER_EXTEND_REFLECT, ++ EXTEND_COUNT ++} sampler_extend_t; ++ ++#endif +diff --git a/src/sna/kgem.c b/src/sna/kgem.c +index 78ed5407..f0d171ac 100644 +--- a/src/sna/kgem.c ++++ b/src/sna/kgem.c +@@ -84,6 +84,10 @@ search_snoop_cache(struct kgem *kgem, unsigned int num_pages, unsigned flags); + #define DBG_NO_HANDLE_LUT 0 + #define DBG_NO_WT 0 + #define DBG_NO_WC_MMAP 0 ++#define DBG_NO_BLT_Y 0 ++#define DBG_NO_SCANOUT_Y 0 ++#define DBG_NO_DIRTYFB 0 ++#define DBG_NO_DETILING 0 + #define DBG_DUMP 0 + #define DBG_NO_MALLOC_CACHE 0 + +@@ -96,11 +100,6 @@ search_snoop_cache(struct kgem *kgem, unsigned int num_pages, unsigned flags); + #define SHOW_BATCH_BEFORE 0 + #define SHOW_BATCH_AFTER 0 + +-#if !USE_WC_MMAP +-#undef DBG_NO_WC_MMAP +-#define DBG_NO_WC_MMAP 1 +-#endif +- + #if 0 + #define ASSERT_IDLE(kgem__, handle__) assert(!__kgem_busy(kgem__, handle__)) + #define ASSERT_MAYBE_IDLE(kgem__, handle__, expect__) assert(!(expect__) || !__kgem_busy(kgem__, handle__)) +@@ -187,6 +186,15 @@ struct local_i915_gem_caching { + #define LOCAL_IOCTL_I915_GEM_SET_CACHING DRM_IOW(DRM_COMMAND_BASE + LOCAL_I915_GEM_SET_CACHING, struct local_i915_gem_caching) + #define LOCAL_IOCTL_I915_GEM_GET_CACHING DRM_IOW(DRM_COMMAND_BASE + LOCAL_I915_GEM_GET_CACHING, struct local_i915_gem_caching) + ++struct local_i915_gem_mmap { ++ uint32_t handle; ++ uint32_t pad; ++ uint64_t offset; ++ uint64_t size; ++ uint64_t addr_ptr; ++}; ++#define LOCAL_IOCTL_I915_GEM_MMAP DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_MMAP, struct local_i915_gem_mmap) ++ + struct local_i915_gem_mmap2 { + uint32_t handle; + uint32_t pad; +@@ -216,6 +224,12 @@ static struct kgem_bo *__kgem_freed_bo; + static struct kgem_request *__kgem_freed_request; + static struct drm_i915_gem_exec_object2 _kgem_dummy_exec; + ++static inline struct sna *__to_sna(struct kgem *kgem) ++{ ++ /* minor layering violations */ ++ return container_of(kgem, struct sna, kgem); ++} ++ + static inline int bytes(struct kgem_bo *bo) + { + return __kgem_bo_size(bo); +@@ -224,25 +238,31 @@ static inline int bytes(struct kgem_bo *bo) + #define bucket(B) (B)->size.pages.bucket + #define num_pages(B) (B)->size.pages.count + +-static int do_ioctl(int fd, unsigned long req, void *arg) ++static int __do_ioctl(int fd, unsigned long req, void *arg) + { +- int err; +- +-restart: +- if (ioctl(fd, req, arg) == 0) +- return 0; ++ do { ++ int err; + +- err = errno; ++ switch ((err = errno)) { ++ case EAGAIN: ++ sched_yield(); ++ case EINTR: ++ break; ++ default: ++ return -err; ++ } + +- if (err == EINTR) +- goto restart; ++ if (likely(ioctl(fd, req, arg) == 0)) ++ return 0; ++ } while (1); ++} + +- if (err == EAGAIN) { +- sched_yield(); +- goto restart; +- } ++inline static int do_ioctl(int fd, unsigned long req, void *arg) ++{ ++ if (likely(ioctl(fd, req, arg) == 0)) ++ return 0; + +- return -err; ++ return __do_ioctl(fd, req, arg); + } + + #ifdef DEBUG_MEMORY +@@ -266,6 +286,9 @@ static void assert_tiling(struct kgem *kgem, struct kgem_bo *bo) + + assert(bo); + ++ if (!kgem->can_fence && kgem->gen >= 040 && bo->tiling) ++ return; /* lies */ ++ + VG_CLEAR(tiling); + tiling.handle = bo->handle; + tiling.tiling_mode = bo->tiling; +@@ -273,7 +296,7 @@ static void assert_tiling(struct kgem *kgem, struct kgem_bo *bo) + assert(tiling.tiling_mode == bo->tiling); + } + +-static void assert_cacheing(struct kgem *kgem, struct kgem_bo *bo) ++static void assert_caching(struct kgem *kgem, struct kgem_bo *bo) + { + struct local_i915_gem_caching arg; + int expect = kgem->has_llc ? SNOOPED : UNCACHED; +@@ -294,24 +317,117 @@ static void assert_bo_retired(struct kgem_bo *bo) + assert(bo->refcnt); + assert(bo->rq == NULL); + assert(bo->exec == NULL); ++ assert(!bo->needs_flush); + assert(list_is_empty(&bo->request)); + } + #else + #define assert_tiling(kgem, bo) +-#define assert_cacheing(kgem, bo) ++#define assert_caching(kgem, bo) + #define assert_bo_retired(bo) + #endif + ++static int __find_debugfs(struct kgem *kgem) ++{ ++ int i; ++ ++ for (i = 0; i < DRM_MAX_MINOR; i++) { ++ char path[80]; ++ ++ sprintf(path, "/sys/kernel/debug/dri/%d/i915_wedged", i); ++ if (access(path, R_OK) == 0) ++ return i; ++ ++ sprintf(path, "/debug/dri/%d/i915_wedged", i); ++ if (access(path, R_OK) == 0) ++ return i; ++ } ++ ++ return -1; ++} ++ ++static int kgem_get_minor(struct kgem *kgem) ++{ ++ struct stat st; ++ ++ if (fstat(kgem->fd, &st)) ++ return __find_debugfs(kgem); ++ ++ if (!S_ISCHR(st.st_mode)) ++ return __find_debugfs(kgem); ++ ++ return st.st_rdev & 0x63; ++} ++ ++static bool find_hang_state(struct kgem *kgem, char *path, int maxlen) ++{ ++ int minor = kgem_get_minor(kgem); ++ ++ /* Search for our hang state in a few canonical locations. ++ * In the unlikely event of having multiple devices, we ++ * will need to check which minor actually corresponds to ours. ++ */ ++ ++ snprintf(path, maxlen, "/sys/class/drm/card%d/error", minor); ++ if (access(path, R_OK) == 0) ++ return true; ++ ++ snprintf(path, maxlen, "/sys/kernel/debug/dri/%d/i915_error_state", minor); ++ if (access(path, R_OK) == 0) ++ return true; ++ ++ snprintf(path, maxlen, "/debug/dri/%d/i915_error_state", minor); ++ if (access(path, R_OK) == 0) ++ return true; ++ ++ path[0] = '\0'; ++ return false; ++} ++ ++static bool has_error_state(struct kgem *kgem, char *path) ++{ ++ bool ret = false; ++ char no; ++ int fd; ++ ++ fd = open(path, O_RDONLY); ++ if (fd >= 0) { ++ ret = read(fd, &no, 1) == 1 && no != 'N'; ++ close(fd); ++ } ++ ++ return ret; ++} ++ ++static int kgem_get_screen_index(struct kgem *kgem) ++{ ++ return __to_sna(kgem)->scrn->scrnIndex; ++} ++ + static void + __kgem_set_wedged(struct kgem *kgem) + { ++ static int once; ++ char path[256]; ++ ++ if (kgem->wedged) ++ return; ++ ++ if (!once && ++ find_hang_state(kgem, path, sizeof(path)) && ++ has_error_state(kgem, path)) { ++ xf86DrvMsg(kgem_get_screen_index(kgem), X_ERROR, ++ "When reporting this, please include %s and the full dmesg.\n", ++ path); ++ once = 1; ++ } ++ + kgem->wedged = true; +- sna_render_mark_wedged(container_of(kgem, struct sna, kgem)); ++ sna_render_mark_wedged(__to_sna(kgem)); + } + + static void kgem_sna_reset(struct kgem *kgem) + { +- struct sna *sna = container_of(kgem, struct sna, kgem); ++ struct sna *sna = __to_sna(kgem); + + sna->render.reset(sna); + sna->blt_state.fill_bo = 0; +@@ -319,7 +435,7 @@ static void kgem_sna_reset(struct kgem *kgem) + + static void kgem_sna_flush(struct kgem *kgem) + { +- struct sna *sna = container_of(kgem, struct sna, kgem); ++ struct sna *sna = __to_sna(kgem); + + sna->render.flush(sna); + +@@ -327,22 +443,53 @@ static void kgem_sna_flush(struct kgem *kgem) + sna_render_flush_solid(sna); + } + +-static bool gem_set_tiling(int fd, uint32_t handle, int tiling, int stride) ++static bool kgem_bo_rmfb(struct kgem *kgem, struct kgem_bo *bo) ++{ ++ if (bo->scanout && bo->delta) { ++ DBG(("%s: releasing fb=%d for handle=%d\n", ++ __FUNCTION__, bo->delta, bo->handle)); ++ /* XXX will leak if we are not DRM_MASTER. *shrug* */ ++ do_ioctl(kgem->fd, DRM_IOCTL_MODE_RMFB, &bo->delta); ++ bo->delta = 0; ++ return true; ++ } else ++ return false; ++} ++ ++static bool kgem_set_tiling(struct kgem *kgem, struct kgem_bo *bo, ++ int tiling, int stride) + { + struct drm_i915_gem_set_tiling set_tiling; + int err; + ++ if (tiling == bo->tiling) { ++ if (tiling == I915_TILING_NONE) { ++ bo->pitch = stride; ++ return true; ++ } ++ if (stride == bo->pitch) ++ return true; ++ } ++ + if (DBG_NO_TILING) + return false; + + VG_CLEAR(set_tiling); + restart: +- set_tiling.handle = handle; ++ set_tiling.handle = bo->handle; + set_tiling.tiling_mode = tiling; +- set_tiling.stride = stride; ++ set_tiling.stride = tiling ? stride : 0; + +- if (ioctl(fd, DRM_IOCTL_I915_GEM_SET_TILING, &set_tiling) == 0) +- return true; ++ if (ioctl(kgem->fd, DRM_IOCTL_I915_GEM_SET_TILING, &set_tiling) == 0) { ++ bo->tiling = set_tiling.tiling_mode; ++ bo->pitch = set_tiling.tiling_mode ? set_tiling.stride : stride; ++ DBG(("%s: handle=%d, tiling=%d [%d], pitch=%d [%d]: %d\n", ++ __FUNCTION__, bo->handle, ++ bo->tiling, tiling, ++ bo->pitch, stride, ++ set_tiling.tiling_mode == tiling)); ++ return set_tiling.tiling_mode == tiling; ++ } + + err = errno; + if (err == EINTR) +@@ -353,6 +500,11 @@ restart: + goto restart; + } + ++ if (err == EBUSY && kgem_bo_rmfb(kgem, bo)) ++ goto restart; ++ ++ ERR(("%s: failed to set-tiling(tiling=%d, pitch=%d) for handle=%d: %d\n", ++ __FUNCTION__, tiling, stride, bo->handle, err)); + return false; + } + +@@ -437,10 +589,15 @@ static void *__kgem_bo_map__gtt(struct kgem *kgem, struct kgem_bo *bo) + DBG(("%s(handle=%d, size=%d)\n", __FUNCTION__, + bo->handle, bytes(bo))); + ++ if (bo->tiling && !kgem->can_fence) ++ return NULL; ++ + VG_CLEAR(gtt); + retry_gtt: + gtt.handle = bo->handle; + if ((err = do_ioctl(kgem->fd, DRM_IOCTL_I915_GEM_MMAP_GTT, >t))) { ++ DBG(("%s: failed %d, throttling/cleaning caches\n", ++ __FUNCTION__, err)); + assert(err != EINVAL); + + (void)__kgem_throttle_retire(kgem, 0); +@@ -460,6 +617,8 @@ retry_mmap: + kgem->fd, gtt.offset); + if (ptr == MAP_FAILED) { + err = errno; ++ DBG(("%s: failed %d, throttling/cleaning caches\n", ++ __FUNCTION__, err)); + assert(err != EINVAL); + + if (__kgem_throttle_retire(kgem, 0)) +@@ -498,6 +657,8 @@ retry_wc: + wc.size = bytes(bo); + wc.flags = I915_MMAP_WC; + if ((err = do_ioctl(kgem->fd, LOCAL_IOCTL_I915_GEM_MMAP_v2, &wc))) { ++ DBG(("%s: failed %d, throttling/cleaning caches\n", ++ __FUNCTION__, err)); + assert(err != EINVAL); + + if (__kgem_throttle_retire(kgem, 0)) +@@ -519,16 +680,19 @@ retry_wc: + + static void *__kgem_bo_map__cpu(struct kgem *kgem, struct kgem_bo *bo) + { +- struct drm_i915_gem_mmap mmap_arg; ++ struct local_i915_gem_mmap arg; + int err; + ++ VG_CLEAR(arg); ++ arg.offset = 0; ++ + retry: +- VG_CLEAR(mmap_arg); +- mmap_arg.handle = bo->handle; +- mmap_arg.offset = 0; +- mmap_arg.size = bytes(bo); +- if ((err = do_ioctl(kgem->fd, DRM_IOCTL_I915_GEM_MMAP, &mmap_arg))) { +- assert(err != EINVAL); ++ arg.handle = bo->handle; ++ arg.size = bytes(bo); ++ if ((err = do_ioctl(kgem->fd, LOCAL_IOCTL_I915_GEM_MMAP, &arg))) { ++ DBG(("%s: failed %d, throttling/cleaning caches\n", ++ __FUNCTION__, err)); ++ assert(err != -EINVAL || bo->prime); + + if (__kgem_throttle_retire(kgem, 0)) + goto retry; +@@ -536,15 +700,16 @@ retry: + if (kgem_cleanup_cache(kgem)) + goto retry; + +- ERR(("%s: failed to mmap handle=%d, %d bytes, into CPU domain: %d\n", +- __FUNCTION__, bo->handle, bytes(bo), -err)); ++ ERR(("%s: failed to mmap handle=%d (prime? %d), %d bytes, into CPU domain: %d\n", ++ __FUNCTION__, bo->handle, bo->prime, bytes(bo), -err)); ++ bo->purged = 1; + return NULL; + } + +- VG(VALGRIND_MAKE_MEM_DEFINED(mmap_arg.addr_ptr, bytes(bo))); ++ VG(VALGRIND_MAKE_MEM_DEFINED(arg.addr_ptr, bytes(bo))); + + DBG(("%s: caching CPU vma for %d\n", __FUNCTION__, bo->handle)); +- return bo->map__cpu = (void *)(uintptr_t)mmap_arg.addr_ptr; ++ return bo->map__cpu = (void *)(uintptr_t)arg.addr_ptr; + } + + static int gem_write(int fd, uint32_t handle, +@@ -634,16 +799,10 @@ static void kgem_bo_retire(struct kgem *kgem, struct kgem_bo *bo) + assert(bo->exec == NULL); + assert(list_is_empty(&bo->vma)); + +- if (bo->rq) { +- __kgem_bo_clear_busy(bo); +- kgem_retire(kgem); +- assert_bo_retired(bo); +- } else { +- assert(bo->exec == NULL); +- assert(list_is_empty(&bo->request)); +- assert(!bo->needs_flush); +- ASSERT_IDLE(kgem, bo->handle); +- } ++ if (bo->rq) ++ __kgem_retire_requests_upto(kgem, bo); ++ ASSERT_IDLE(kgem, bo->handle); ++ assert_bo_retired(bo); + } + + static void kgem_bo_maybe_retire(struct kgem *kgem, struct kgem_bo *bo) +@@ -655,10 +814,8 @@ static void kgem_bo_maybe_retire(struct kgem *kgem, struct kgem_bo *bo) + assert(list_is_empty(&bo->vma)); + + if (bo->rq) { +- if (!__kgem_busy(kgem, bo->handle)) { +- __kgem_bo_clear_busy(bo); +- kgem_retire(kgem); +- } ++ if (!__kgem_busy(kgem, bo->handle)) ++ __kgem_retire_requests_upto(kgem, bo); + } else { + assert(!bo->needs_flush); + ASSERT_IDLE(kgem, bo->handle); +@@ -694,6 +851,8 @@ retry: + } + + if ((err = gem_write(kgem->fd, bo->handle, 0, length, data))) { ++ DBG(("%s: failed %d, throttling/cleaning caches\n", ++ __FUNCTION__, err)); + assert(err != EINVAL); + + (void)__kgem_throttle_retire(kgem, 0); +@@ -728,27 +887,21 @@ static uint32_t gem_create(int fd, int num_pages) + return create.handle; + } + +-static bool ++static void + kgem_bo_set_purgeable(struct kgem *kgem, struct kgem_bo *bo) + { +-#if DBG_NO_MADV +- return true; +-#else ++#if !DBG_NO_MADV + struct drm_i915_gem_madvise madv; + + assert(bo->exec == NULL); +- assert(!bo->purged); + + VG_CLEAR(madv); + madv.handle = bo->handle; + madv.madv = I915_MADV_DONTNEED; + if (do_ioctl(kgem->fd, DRM_IOCTL_I915_GEM_MADVISE, &madv) == 0) { +- bo->purged = 1; +- kgem->need_purge |= !madv.retained && bo->domain == DOMAIN_GPU; +- return madv.retained; ++ bo->purged = true; ++ kgem->need_purge |= !madv.retained && bo->domain != DOMAIN_CPU; + } +- +- return true; + #endif + } + +@@ -788,7 +941,7 @@ kgem_bo_clear_purgeable(struct kgem *kgem, struct kgem_bo *bo) + madv.madv = I915_MADV_WILLNEED; + if (do_ioctl(kgem->fd, DRM_IOCTL_I915_GEM_MADVISE, &madv) == 0) { + bo->purged = !madv.retained; +- kgem->need_purge |= !madv.retained && bo->domain == DOMAIN_GPU; ++ kgem->need_purge |= !madv.retained && bo->domain != DOMAIN_CPU; + return madv.retained; + } + +@@ -869,13 +1022,17 @@ static struct kgem_request *__kgem_request_alloc(struct kgem *kgem) + { + struct kgem_request *rq; + +- rq = __kgem_freed_request; +- if (rq) { +- __kgem_freed_request = *(struct kgem_request **)rq; ++ if (unlikely(kgem->wedged)) { ++ rq = &kgem->static_request; + } else { +- rq = malloc(sizeof(*rq)); +- if (rq == NULL) +- rq = &kgem->static_request; ++ rq = __kgem_freed_request; ++ if (rq) { ++ __kgem_freed_request = *(struct kgem_request **)rq; ++ } else { ++ rq = malloc(sizeof(*rq)); ++ if (rq == NULL) ++ rq = &kgem->static_request; ++ } + } + + list_init(&rq->buffers); +@@ -925,11 +1082,11 @@ total_ram_size(void) + #ifdef HAVE_STRUCT_SYSINFO_TOTALRAM + struct sysinfo info; + if (sysinfo(&info) == 0) +- return info.totalram * info.mem_unit; ++ return (size_t)info.totalram * info.mem_unit; + #endif + + #ifdef _SC_PHYS_PAGES +- return sysconf(_SC_PHYS_PAGES) * sysconf(_SC_PAGE_SIZE); ++ return (size_t)sysconf(_SC_PHYS_PAGES) * sysconf(_SC_PAGE_SIZE); + #endif + + return 0; +@@ -1150,6 +1307,10 @@ static bool test_has_wc_mmap(struct kgem *kgem) + if (DBG_NO_WC_MMAP) + return false; + ++ /* XXX See https://bugs.freedesktop.org/show_bug.cgi?id=90841 */ ++ if (kgem->gen < 033) ++ return false; ++ + if (gem_param(kgem, LOCAL_I915_PARAM_MMAP_VERSION) < 1) + return false; + +@@ -1187,7 +1348,7 @@ static bool test_has_caching(struct kgem *kgem) + + static bool test_has_userptr(struct kgem *kgem) + { +- uint32_t handle; ++ struct local_i915_gem_userptr arg; + void *ptr; + + if (DBG_NO_USERPTR) +@@ -1200,11 +1361,23 @@ static bool test_has_userptr(struct kgem *kgem) + if (posix_memalign(&ptr, PAGE_SIZE, PAGE_SIZE)) + return false; + +- handle = gem_userptr(kgem->fd, ptr, PAGE_SIZE, false); +- gem_close(kgem->fd, handle); +- free(ptr); ++ VG_CLEAR(arg); ++ arg.user_ptr = (uintptr_t)ptr; ++ arg.user_size = PAGE_SIZE; ++ arg.flags = I915_USERPTR_UNSYNCHRONIZED; + +- return handle != 0; ++ if (DBG_NO_UNSYNCHRONIZED_USERPTR || ++ do_ioctl(kgem->fd, LOCAL_IOCTL_I915_GEM_USERPTR, &arg)) { ++ arg.flags &= ~I915_USERPTR_UNSYNCHRONIZED; ++ if (do_ioctl(kgem->fd, LOCAL_IOCTL_I915_GEM_USERPTR, &arg)) ++ arg.handle = 0; ++ /* Leak the userptr bo to keep the mmu_notifier alive */ ++ } else { ++ gem_close(kgem->fd, arg.handle); ++ free(ptr); ++ } ++ ++ return arg.handle != 0; + } + + static bool test_has_create2(struct kgem *kgem) +@@ -1227,67 +1400,187 @@ static bool test_has_create2(struct kgem *kgem) + #endif + } + +-static bool test_has_secure_batches(struct kgem *kgem) ++static bool test_can_blt_y(struct kgem *kgem) + { +- if (DBG_NO_SECURE_BATCHES) ++ struct drm_i915_gem_exec_object2 object; ++ uint32_t batch[] = { ++#define MI_LOAD_REGISTER_IMM (0x22<<23 | (3-2)) ++#define BCS_SWCTRL 0x22200 ++#define BCS_SRC_Y (1 << 0) ++#define BCS_DST_Y (1 << 1) ++ MI_LOAD_REGISTER_IMM, ++ BCS_SWCTRL, ++ (BCS_SRC_Y | BCS_DST_Y) << 16 | (BCS_SRC_Y | BCS_DST_Y), ++ ++ MI_LOAD_REGISTER_IMM, ++ BCS_SWCTRL, ++ (BCS_SRC_Y | BCS_DST_Y) << 16, ++ ++ MI_BATCH_BUFFER_END, ++ 0, ++ }; ++ int ret; ++ ++ if (DBG_NO_BLT_Y) + return false; + +- return gem_param(kgem, LOCAL_I915_PARAM_HAS_SECURE_BATCHES) > 0; ++ if (kgem->gen < 060) ++ return false; ++ ++ memset(&object, 0, sizeof(object)); ++ object.handle = gem_create(kgem->fd, 1); ++ ++ ret = gem_write(kgem->fd, object.handle, 0, sizeof(batch), batch); ++ if (ret == 0) { ++ struct drm_i915_gem_execbuffer2 execbuf; ++ ++ memset(&execbuf, 0, sizeof(execbuf)); ++ execbuf.buffers_ptr = (uintptr_t)&object; ++ execbuf.buffer_count = 1; ++ execbuf.flags = KGEM_BLT; ++ ++ ret = do_ioctl(kgem->fd, ++ DRM_IOCTL_I915_GEM_EXECBUFFER2, ++ &execbuf); ++ } ++ gem_close(kgem->fd, object.handle); ++ ++ return ret == 0; + } + +-static bool test_has_pinned_batches(struct kgem *kgem) ++static bool gem_set_tiling(int fd, uint32_t handle, int tiling, int stride) + { +- if (DBG_NO_PINNED_BATCHES) ++ struct drm_i915_gem_set_tiling set_tiling; ++ ++ if (DBG_NO_TILING) + return false; + +- return gem_param(kgem, LOCAL_I915_PARAM_HAS_PINNED_BATCHES) > 0; ++ VG_CLEAR(set_tiling); ++ set_tiling.handle = handle; ++ set_tiling.tiling_mode = tiling; ++ set_tiling.stride = stride; ++ ++ if (ioctl(fd, DRM_IOCTL_I915_GEM_SET_TILING, &set_tiling) == 0) ++ return set_tiling.tiling_mode == tiling; ++ ++ return false; + } + +-static int kgem_get_screen_index(struct kgem *kgem) ++static bool test_can_scanout_y(struct kgem *kgem) + { +- struct sna *sna = container_of(kgem, struct sna, kgem); +- return sna->scrn->scrnIndex; ++ struct drm_mode_fb_cmd arg; ++ bool ret = false; ++ ++ if (DBG_NO_SCANOUT_Y) ++ return false; ++ ++ VG_CLEAR(arg); ++ arg.width = 32; ++ arg.height = 32; ++ arg.pitch = 4*32; ++ arg.bpp = 32; ++ arg.depth = 24; ++ arg.handle = gem_create(kgem->fd, 1); ++ ++ if (gem_set_tiling(kgem->fd, arg.handle, I915_TILING_Y, arg.pitch)) ++ ret = do_ioctl(kgem->fd, DRM_IOCTL_MODE_ADDFB, &arg) == 0; ++ if (!ret) { ++ struct local_mode_fb_cmd2 { ++ uint32_t fb_id; ++ uint32_t width, height; ++ uint32_t pixel_format; ++ uint32_t flags; ++ ++ uint32_t handles[4]; ++ uint32_t pitches[4]; ++ uint32_t offsets[4]; ++ uint64_t modifiers[4]; ++ } f; ++#define LOCAL_IOCTL_MODE_ADDFB2 DRM_IOWR(0xb8, struct local_mode_fb_cmd2) ++ memset(&f, 0, sizeof(f)); ++ f.width = arg.width; ++ f.height = arg.height; ++ f.handles[0] = arg.handle; ++ f.pitches[0] = arg.pitch; ++ f.modifiers[0] = (uint64_t)1 << 56 | 2; /* MOD_Y_TILED */ ++ f.pixel_format = 'X' | 'R' << 8 | '2' << 16 | '4' << 24; /* XRGB8888 */ ++ f.flags = 1 << 1; /* + modifier */ ++ if (drmIoctl(kgem->fd, LOCAL_IOCTL_MODE_ADDFB2, &f) == 0) { ++ ret = true; ++ arg.fb_id = f.fb_id; ++ } ++ } ++ do_ioctl(kgem->fd, DRM_IOCTL_MODE_RMFB, &arg.fb_id); ++ gem_close(kgem->fd, arg.handle); ++ ++ return ret; + } + +-static int __find_debugfs(struct kgem *kgem) ++static bool test_has_dirtyfb(struct kgem *kgem) + { +- int i; ++ struct drm_mode_fb_cmd create; ++ bool ret = false; + +- for (i = 0; i < DRM_MAX_MINOR; i++) { +- char path[80]; ++ if (DBG_NO_DIRTYFB) ++ return false; + +- sprintf(path, "/sys/kernel/debug/dri/%d/i915_wedged", i); +- if (access(path, R_OK) == 0) +- return i; ++ VG_CLEAR(create); ++ create.width = 32; ++ create.height = 32; ++ create.pitch = 4*32; ++ create.bpp = 32; ++ create.depth = 32; ++ create.handle = gem_create(kgem->fd, 1); ++ if (create.handle == 0) ++ return false; + +- sprintf(path, "/debug/dri/%d/i915_wedged", i); +- if (access(path, R_OK) == 0) +- return i; ++ if (drmIoctl(kgem->fd, DRM_IOCTL_MODE_ADDFB, &create) == 0) { ++ struct drm_mode_fb_dirty_cmd dirty; ++ ++ memset(&dirty, 0, sizeof(dirty)); ++ dirty.fb_id = create.fb_id; ++ ret = drmIoctl(kgem->fd, ++ DRM_IOCTL_MODE_DIRTYFB, ++ &dirty) == 0; ++ ++ /* XXX There may be multiple levels of DIRTYFB, depending on ++ * whether the kernel thinks tracking dirty regions is ++ * beneficial vs flagging the whole fb as dirty. ++ */ ++ ++ drmIoctl(kgem->fd, ++ DRM_IOCTL_MODE_RMFB, ++ &create.fb_id); + } ++ gem_close(kgem->fd, create.handle); + +- return -1; ++ return ret; + } + +-static int kgem_get_minor(struct kgem *kgem) ++static bool test_has_secure_batches(struct kgem *kgem) + { +- struct stat st; ++ if (DBG_NO_SECURE_BATCHES) ++ return false; + +- if (fstat(kgem->fd, &st)) +- return __find_debugfs(kgem); ++ return gem_param(kgem, LOCAL_I915_PARAM_HAS_SECURE_BATCHES) > 0; ++} + +- if (!S_ISCHR(st.st_mode)) +- return __find_debugfs(kgem); ++static bool test_has_pinned_batches(struct kgem *kgem) ++{ ++ if (DBG_NO_PINNED_BATCHES) ++ return false; + +- return st.st_rdev & 0x63; ++ return gem_param(kgem, LOCAL_I915_PARAM_HAS_PINNED_BATCHES) > 0; + } + + static bool kgem_init_pinned_batches(struct kgem *kgem) + { + int count[2] = { 16, 4 }; + int size[2] = { 1, 4 }; ++ int ret = 0; + int n, i; + +- if (kgem->wedged) ++ if (unlikely(kgem->wedged)) + return true; + + for (n = 0; n < ARRAY_SIZE(count); n++) { +@@ -1311,7 +1604,8 @@ static bool kgem_init_pinned_batches(struct kgem *kgem) + } + + pin.alignment = 0; +- if (do_ioctl(kgem->fd, DRM_IOCTL_I915_GEM_PIN, &pin)) { ++ ret = do_ioctl(kgem->fd, DRM_IOCTL_I915_GEM_PIN, &pin); ++ if (ret) { + gem_close(kgem->fd, pin.handle); + free(bo); + goto err; +@@ -1333,6 +1627,16 @@ err: + } + } + ++ /* If we fail to pin some memory for 830gm/845g, we need to disable ++ * acceleration as otherwise the machine will eventually fail. However, ++ * the kernel started arbitrarily rejecting PIN, so hope for the best ++ * if the ioctl no longer works. ++ */ ++ if (ret != -ENODEV && kgem->gen == 020) ++ return false; ++ ++ kgem->has_pinned_batches = false; ++ + /* For simplicity populate the lists with a single unpinned bo */ + for (n = 0; n < ARRAY_SIZE(count); n++) { + struct kgem_bo *bo; +@@ -1340,18 +1644,18 @@ err: + + handle = gem_create(kgem->fd, size[n]); + if (handle == 0) +- break; ++ return false; + + bo = __kgem_bo_alloc(handle, size[n]); + if (bo == NULL) { + gem_close(kgem->fd, handle); +- break; ++ return false; + } + + debug_alloc__bo(kgem, bo); + list_add(&bo->list, &kgem->pinned_batches[n]); + } +- return false; ++ return true; + } + + static void kgem_init_swizzling(struct kgem *kgem) +@@ -1364,7 +1668,7 @@ static void kgem_init_swizzling(struct kgem *kgem) + } tiling; + #define LOCAL_IOCTL_I915_GEM_GET_TILING DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GEM_GET_TILING, struct local_i915_gem_get_tiling_v2) + +- VG_CLEAR(tiling); ++ memset(&tiling, 0, sizeof(tiling)); + tiling.handle = gem_create(kgem->fd, 1); + if (!tiling.handle) + return; +@@ -1375,12 +1679,23 @@ static void kgem_init_swizzling(struct kgem *kgem) + if (do_ioctl(kgem->fd, LOCAL_IOCTL_I915_GEM_GET_TILING, &tiling)) + goto out; + +- if (kgem->gen < 50 && tiling.phys_swizzle_mode != tiling.swizzle_mode) ++ DBG(("%s: swizzle_mode=%d, phys_swizzle_mode=%d\n", ++ __FUNCTION__, tiling.swizzle_mode, tiling.phys_swizzle_mode)); ++ ++ kgem->can_fence = ++ !DBG_NO_TILING && ++ tiling.swizzle_mode != I915_BIT_6_SWIZZLE_UNKNOWN; ++ ++ if (kgem->gen < 050 && tiling.phys_swizzle_mode != tiling.swizzle_mode) + goto out; + +- choose_memcpy_tiled_x(kgem, tiling.swizzle_mode); ++ if (!DBG_NO_DETILING) ++ choose_memcpy_tiled_x(kgem, ++ tiling.swizzle_mode, ++ __to_sna(kgem)->cpu_features); + out: + gem_close(kgem->fd, tiling.handle); ++ DBG(("%s: can fence?=%d\n", __FUNCTION__, kgem->can_fence)); + } + + static void kgem_fixup_relocs(struct kgem *kgem, struct kgem_bo *bo, int shrink) +@@ -1399,6 +1714,7 @@ static void kgem_fixup_relocs(struct kgem *kgem, struct kgem_bo *bo, int shrink) + bo->handle, (long long)bo->presumed_offset)); + for (n = 0; n < kgem->nreloc__self; n++) { + int i = kgem->reloc__self[n]; ++ uint64_t addr; + + assert(kgem->reloc[i].target_handle == ~0U); + kgem->reloc[i].target_handle = bo->target_handle; +@@ -1412,13 +1728,17 @@ static void kgem_fixup_relocs(struct kgem *kgem, struct kgem_bo *bo, int shrink) + + kgem->reloc[i].delta -= shrink; + } +- kgem->batch[kgem->reloc[i].offset/sizeof(uint32_t)] = +- kgem->reloc[i].delta + bo->presumed_offset; ++ addr = (int)kgem->reloc[i].delta + bo->presumed_offset; ++ kgem->batch[kgem->reloc[i].offset/sizeof(uint32_t)] = addr; ++ if (kgem->gen >= 0100) ++ kgem->batch[kgem->reloc[i].offset/sizeof(uint32_t) + 1] = addr >> 32; + } + + if (n == 256) { + for (n = kgem->reloc__self[255]; n < kgem->nreloc; n++) { + if (kgem->reloc[n].target_handle == ~0U) { ++ uint64_t addr; ++ + kgem->reloc[n].target_handle = bo->target_handle; + kgem->reloc[n].presumed_offset = bo->presumed_offset; + +@@ -1429,8 +1749,11 @@ static void kgem_fixup_relocs(struct kgem *kgem, struct kgem_bo *bo, int shrink) + kgem->reloc[n].delta - shrink)); + kgem->reloc[n].delta -= shrink; + } +- kgem->batch[kgem->reloc[n].offset/sizeof(uint32_t)] = +- kgem->reloc[n].delta + bo->presumed_offset; ++ ++ addr = (int)kgem->reloc[n].delta + bo->presumed_offset; ++ kgem->batch[kgem->reloc[n].offset/sizeof(uint32_t)] = addr; ++ if (kgem->gen >= 0100) ++ kgem->batch[kgem->reloc[n].offset/sizeof(uint32_t) + 1] = addr >> 32; + } + } + } +@@ -1444,6 +1767,44 @@ static void kgem_fixup_relocs(struct kgem *kgem, struct kgem_bo *bo, int shrink) + } + } + ++static int kgem_bo_wait(struct kgem *kgem, struct kgem_bo *bo) ++{ ++ struct local_i915_gem_wait { ++ uint32_t handle; ++ uint32_t flags; ++ int64_t timeout; ++ } wait; ++#define LOCAL_I915_GEM_WAIT 0x2c ++#define LOCAL_IOCTL_I915_GEM_WAIT DRM_IOWR(DRM_COMMAND_BASE + LOCAL_I915_GEM_WAIT, struct local_i915_gem_wait) ++ int ret; ++ ++ DBG(("%s: waiting for handle=%d\n", __FUNCTION__, bo->handle)); ++ if (bo->rq == NULL) ++ return 0; ++ ++ VG_CLEAR(wait); ++ wait.handle = bo->handle; ++ wait.flags = 0; ++ wait.timeout = -1; ++ ret = do_ioctl(kgem->fd, LOCAL_IOCTL_I915_GEM_WAIT, &wait); ++ if (ret) { ++ struct drm_i915_gem_set_domain set_domain; ++ ++ VG_CLEAR(set_domain); ++ set_domain.handle = bo->handle; ++ set_domain.read_domains = I915_GEM_DOMAIN_GTT; ++ set_domain.write_domain = I915_GEM_DOMAIN_GTT; ++ ret = do_ioctl(kgem->fd, ++ DRM_IOCTL_I915_GEM_SET_DOMAIN, ++ &set_domain); ++ } ++ ++ if (ret == 0) ++ __kgem_retire_requests_upto(kgem, bo); ++ ++ return ret; ++} ++ + static struct kgem_bo *kgem_new_batch(struct kgem *kgem) + { + struct kgem_bo *last; +@@ -1464,20 +1825,41 @@ static struct kgem_bo *kgem_new_batch(struct kgem *kgem) + if (!kgem->has_llc) + flags |= CREATE_UNCACHED; + ++restart: + kgem->batch_bo = kgem_create_linear(kgem, + sizeof(uint32_t)*kgem->batch_size, + flags); + if (kgem->batch_bo) + kgem->batch = kgem_bo_map__cpu(kgem, kgem->batch_bo); + if (kgem->batch == NULL) { +- DBG(("%s: unable to map batch bo, mallocing(size=%d)\n", +- __FUNCTION__, +- sizeof(uint32_t)*kgem->batch_size)); ++ int ring = kgem->ring == KGEM_BLT; ++ assert(ring < ARRAY_SIZE(kgem->requests)); ++ + if (kgem->batch_bo) { + kgem_bo_destroy(kgem, kgem->batch_bo); + kgem->batch_bo = NULL; + } + ++ if (!list_is_empty(&kgem->requests[ring])) { ++ struct kgem_request *rq; ++ ++ rq = list_first_entry(&kgem->requests[ring], ++ struct kgem_request, list); ++ assert(rq->ring == ring); ++ assert(rq->bo); ++ assert(RQ(rq->bo->rq) == rq); ++ if (kgem_bo_wait(kgem, rq->bo) == 0) ++ goto restart; ++ } ++ ++ if (flags & CREATE_NO_THROTTLE) { ++ flags &= ~CREATE_NO_THROTTLE; ++ if (kgem_cleanup_cache(kgem)) ++ goto restart; ++ } ++ ++ DBG(("%s: unable to map batch bo, mallocing(size=%d)\n", ++ __FUNCTION__, sizeof(uint32_t)*kgem->batch_size)); + if (posix_memalign((void **)&kgem->batch, PAGE_SIZE, + ALIGN(sizeof(uint32_t) * kgem->batch_size, PAGE_SIZE))) { + ERR(("%s: batch allocation failed, disabling acceleration\n", __FUNCTION__)); +@@ -1495,18 +1877,79 @@ static struct kgem_bo *kgem_new_batch(struct kgem *kgem) + return last; + } + +-void kgem_init(struct kgem *kgem, int fd, struct pci_device *dev, unsigned gen) ++static void ++no_retire(struct kgem *kgem) ++{ ++ (void)kgem; ++} ++ ++static void ++no_expire(struct kgem *kgem) ++{ ++ (void)kgem; ++} ++ ++static void ++no_context_switch(struct kgem *kgem, int new_mode) ++{ ++ (void)kgem; ++ (void)new_mode; ++} ++ ++static uint64_t get_gtt_size(int fd) + { + struct drm_i915_gem_get_aperture aperture; ++ struct local_i915_gem_context_param { ++ uint32_t context; ++ uint32_t size; ++ uint64_t param; ++#define LOCAL_CONTEXT_PARAM_BAN_PERIOD 0x1 ++#define LOCAL_CONTEXT_PARAM_NO_ZEROMAP 0x2 ++#define LOCAL_CONTEXT_PARAM_GTT_SIZE 0x3 ++ uint64_t value; ++ } p; ++#define LOCAL_I915_GEM_CONTEXT_GETPARAM 0x34 ++#define LOCAL_IOCTL_I915_GEM_CONTEXT_GETPARAM DRM_IOWR (DRM_COMMAND_BASE + LOCAL_I915_GEM_CONTEXT_GETPARAM, struct local_i915_gem_context_param) ++ ++ memset(&aperture, 0, sizeof(aperture)); ++ ++ memset(&p, 0, sizeof(p)); ++ p.param = LOCAL_CONTEXT_PARAM_GTT_SIZE; ++ if (drmIoctl(fd, LOCAL_IOCTL_I915_GEM_CONTEXT_GETPARAM, &p) == 0) ++ aperture.aper_size = p.value; ++ if (aperture.aper_size == 0) ++ (void)drmIoctl(fd, DRM_IOCTL_I915_GEM_GET_APERTURE, &aperture); ++ if (aperture.aper_size == 0) ++ aperture.aper_size = 64*1024*1024; ++ ++ DBG(("%s: aperture size %lld, available now %lld\n", ++ __FUNCTION__, ++ (long long)aperture.aper_size, ++ (long long)aperture.aper_available_size)); ++ ++ /* clamp aperture to uint32_t for simplicity */ ++ if (aperture.aper_size > 0xc0000000) ++ aperture.aper_size = 0xc0000000; ++ ++ return aperture.aper_size; ++} ++ ++void kgem_init(struct kgem *kgem, int fd, struct pci_device *dev, unsigned gen) ++{ + size_t totalram; + unsigned half_gpu_max; + unsigned int i, j; ++ uint64_t gtt_size; + + DBG(("%s: fd=%d, gen=%d\n", __FUNCTION__, fd, gen)); + + kgem->fd = fd; + kgem->gen = gen; + ++ kgem->retire = no_retire; ++ kgem->expire = no_expire; ++ kgem->context_switch = no_context_switch; ++ + list_init(&kgem->requests[0]); + list_init(&kgem->requests[1]); + list_init(&kgem->batch_buffers); +@@ -1586,10 +2029,21 @@ void kgem_init(struct kgem *kgem, int fd, struct pci_device *dev, unsigned gen) + DBG(("%s: can blt to cpu? %d\n", __FUNCTION__, + kgem->can_blt_cpu)); + ++ kgem->can_blt_y = test_can_blt_y(kgem); ++ DBG(("%s: can blit to Y-tiled surfaces? %d\n", __FUNCTION__, ++ kgem->can_blt_y)); ++ + kgem->can_render_y = gen != 021 && (gen >> 3) != 4; + DBG(("%s: can render to Y-tiled surfaces? %d\n", __FUNCTION__, + kgem->can_render_y)); + ++ kgem->can_scanout_y = test_can_scanout_y(kgem); ++ DBG(("%s: can scanout Y-tiled surfaces? %d\n", __FUNCTION__, ++ kgem->can_scanout_y)); ++ ++ kgem->has_dirtyfb = test_has_dirtyfb(kgem); ++ DBG(("%s: has dirty fb? %d\n", __FUNCTION__, kgem->has_dirtyfb)); ++ + kgem->has_secure_batches = test_has_secure_batches(kgem); + DBG(("%s: can use privileged batchbuffers? %d\n", __FUNCTION__, + kgem->has_secure_batches)); +@@ -1620,7 +2074,7 @@ void kgem_init(struct kgem *kgem, int fd, struct pci_device *dev, unsigned gen) + if (!kgem->has_relaxed_delta && kgem->batch_size > 4*1024) + kgem->batch_size = 4*1024; + +- if (!kgem_init_pinned_batches(kgem) && gen == 020) { ++ if (!kgem_init_pinned_batches(kgem)) { + xf86DrvMsg(kgem_get_screen_index(kgem), X_WARNING, + "Unable to reserve memory for GPU, disabling acceleration.\n"); + __kgem_set_wedged(kgem); +@@ -1640,35 +2094,24 @@ void kgem_init(struct kgem *kgem, int fd, struct pci_device *dev, unsigned gen) + !DBG_NO_CPU && (kgem->has_llc | kgem->has_userptr | kgem->has_caching), + kgem->has_llc, kgem->has_caching, kgem->has_userptr)); + +- VG_CLEAR(aperture); +- aperture.aper_size = 0; +- (void)do_ioctl(fd, DRM_IOCTL_I915_GEM_GET_APERTURE, &aperture); +- if (aperture.aper_size == 0) +- aperture.aper_size = 64*1024*1024; +- +- DBG(("%s: aperture size %lld, available now %lld\n", +- __FUNCTION__, +- (long long)aperture.aper_size, +- (long long)aperture.aper_available_size)); +- +- kgem->aperture_total = aperture.aper_size; +- kgem->aperture_high = aperture.aper_size * 3/4; +- kgem->aperture_low = aperture.aper_size * 1/3; ++ gtt_size = get_gtt_size(fd); ++ kgem->aperture_total = gtt_size; ++ kgem->aperture_high = gtt_size * 3/4; ++ kgem->aperture_low = gtt_size * 1/3; + if (gen < 033) { + /* Severe alignment penalties */ + kgem->aperture_high /= 2; + kgem->aperture_low /= 2; + } +- DBG(("%s: aperture low=%d [%d], high=%d [%d]\n", __FUNCTION__, ++ DBG(("%s: aperture low=%u [%u], high=%u [%u]\n", __FUNCTION__, + kgem->aperture_low, kgem->aperture_low / (1024*1024), + kgem->aperture_high, kgem->aperture_high / (1024*1024))); + + kgem->aperture_mappable = 256 * 1024 * 1024; + if (dev != NULL) + kgem->aperture_mappable = agp_aperture_size(dev, gen); +- if (kgem->aperture_mappable == 0 || +- kgem->aperture_mappable > aperture.aper_size) +- kgem->aperture_mappable = aperture.aper_size; ++ if (kgem->aperture_mappable == 0 || kgem->aperture_mappable > gtt_size) ++ kgem->aperture_mappable = gtt_size; + DBG(("%s: aperture mappable=%d [%d MiB]\n", __FUNCTION__, + kgem->aperture_mappable, kgem->aperture_mappable / (1024*1024))); + +@@ -1697,7 +2140,7 @@ void kgem_init(struct kgem *kgem, int fd, struct pci_device *dev, unsigned gen) + __FUNCTION__)); + totalram = kgem->aperture_total; + } +- DBG(("%s: total ram=%ld\n", __FUNCTION__, (long)totalram)); ++ DBG(("%s: total ram=%lld\n", __FUNCTION__, (long long)totalram)); + if (kgem->max_object_size > totalram / 2) + kgem->max_object_size = totalram / 2; + if (kgem->max_gpu_size > totalram / 4) +@@ -1749,11 +2192,11 @@ void kgem_init(struct kgem *kgem, int fd, struct pci_device *dev, unsigned gen) + if (DBG_NO_CPU) + kgem->max_cpu_size = 0; + +- DBG(("%s: maximum object size=%d\n", ++ DBG(("%s: maximum object size=%u\n", + __FUNCTION__, kgem->max_object_size)); +- DBG(("%s: large object thresold=%d\n", ++ DBG(("%s: large object thresold=%u\n", + __FUNCTION__, kgem->large_object_size)); +- DBG(("%s: max object sizes (gpu=%d, cpu=%d, tile upload=%d, copy=%d)\n", ++ DBG(("%s: max object sizes (gpu=%u, cpu=%u, tile upload=%u, copy=%u)\n", + __FUNCTION__, + kgem->max_gpu_size, kgem->max_cpu_size, + kgem->max_upload_tile_size, kgem->max_copy_tile_size)); +@@ -2043,8 +2486,34 @@ static void kgem_add_bo(struct kgem *kgem, struct kgem_bo *bo) + kgem->flush |= bo->flush; + } + ++static void kgem_clear_swctrl(struct kgem *kgem) ++{ ++ uint32_t *b; ++ ++ if (kgem->bcs_state == 0) ++ return; ++ ++ DBG(("%s: clearin SWCTRL LRI from %x\n", ++ __FUNCTION__, kgem->bcs_state)); ++ ++ b = kgem->batch + kgem->nbatch; ++ kgem->nbatch += 7; ++ ++ *b++ = MI_FLUSH_DW; ++ *b++ = 0; ++ *b++ = 0; ++ *b++ = 0; ++ ++ *b++ = MI_LOAD_REGISTER_IMM; ++ *b++ = BCS_SWCTRL; ++ *b++ = (BCS_SRC_Y | BCS_DST_Y) << 16; ++ ++ kgem->bcs_state = 0; ++} ++ + static uint32_t kgem_end_batch(struct kgem *kgem) + { ++ kgem_clear_swctrl(kgem); + kgem->batch[kgem->nbatch++] = MI_BATCH_BUFFER_END; + if (kgem->nbatch & 1) + kgem->batch[kgem->nbatch++] = MI_NOOP; +@@ -2064,17 +2533,6 @@ static void kgem_bo_binding_free(struct kgem *kgem, struct kgem_bo *bo) + } + } + +-static void kgem_bo_rmfb(struct kgem *kgem, struct kgem_bo *bo) +-{ +- if (bo->scanout && bo->delta) { +- DBG(("%s: releasing fb=%d for handle=%d\n", +- __FUNCTION__, bo->delta, bo->handle)); +- /* XXX will leak if we are not DRM_MASTER. *shrug* */ +- do_ioctl(kgem->fd, DRM_IOCTL_MODE_RMFB, &bo->delta); +- bo->delta = 0; +- } +-} +- + static void kgem_bo_free(struct kgem *kgem, struct kgem_bo *bo) + { + DBG(("%s: handle=%d, size=%d\n", __FUNCTION__, bo->handle, bytes(bo))); +@@ -2150,13 +2608,16 @@ inline static void kgem_bo_move_to_inactive(struct kgem *kgem, + assert(!bo->snoop); + assert(!bo->flush); + assert(!bo->needs_flush); ++ assert(!bo->delta); + assert(list_is_empty(&bo->vma)); + assert_tiling(kgem, bo); +- assert_cacheing(kgem, bo); ++ assert_caching(kgem, bo); + ASSERT_IDLE(kgem, bo->handle); + + if (bucket(bo) >= NUM_CACHE_BUCKETS) { + if (bo->map__gtt) { ++ DBG(("%s: relinquishing large GTT mapping for handle=%d\n", ++ __FUNCTION__, bo->handle)); + munmap(bo->map__gtt, bytes(bo)); + bo->map__gtt = NULL; + } +@@ -2167,6 +2628,8 @@ inline static void kgem_bo_move_to_inactive(struct kgem *kgem, + assert(list_is_empty(&bo->vma)); + list_move(&bo->list, &kgem->inactive[bucket(bo)]); + if (bo->map__gtt && !kgem_bo_can_map(kgem, bo)) { ++ DBG(("%s: relinquishing old GTT mapping for handle=%d\n", ++ __FUNCTION__, bo->handle)); + munmap(bo->map__gtt, bytes(bo)); + bo->map__gtt = NULL; + } +@@ -2191,6 +2654,10 @@ static struct kgem_bo *kgem_bo_replace_io(struct kgem_bo *bo) + return bo; + + assert(!bo->snoop); ++ assert(!bo->purged); ++ assert(!bo->scanout); ++ assert(!bo->delta); ++ + if (__kgem_freed_bo) { + base = __kgem_freed_bo; + __kgem_freed_bo = *(struct kgem_bo **)base; +@@ -2221,6 +2688,7 @@ inline static void kgem_bo_remove_from_inactive(struct kgem *kgem, + list_del(&bo->list); + assert(bo->rq == NULL); + assert(bo->exec == NULL); ++ assert(!bo->purged); + if (!list_is_empty(&bo->vma)) { + assert(bo->map__gtt || bo->map__wc || bo->map__cpu); + list_del(&bo->vma); +@@ -2305,7 +2773,6 @@ static void kgem_bo_move_to_scanout(struct kgem *kgem, struct kgem_bo *bo) + list_move(&bo->list, &kgem->scanout); + + kgem->need_expire = true; +- + } + + static void kgem_bo_move_to_snoop(struct kgem *kgem, struct kgem_bo *bo) +@@ -2316,6 +2783,8 @@ static void kgem_bo_move_to_snoop(struct kgem *kgem, struct kgem_bo *bo) + assert(!bo->needs_flush); + assert(bo->refcnt == 0); + assert(bo->exec == NULL); ++ assert(!bo->purged); ++ assert(!bo->delta); + + if (DBG_NO_SNOOP_CACHE) { + kgem_bo_free(kgem, bo); +@@ -2351,8 +2820,7 @@ static bool kgem_bo_move_to_cache(struct kgem *kgem, struct kgem_bo *bo) + kgem_bo_move_to_snoop(kgem, bo); + } else if (bo->scanout) { + kgem_bo_move_to_scanout(kgem, bo); +- } else if ((bo = kgem_bo_replace_io(bo))->reusable && +- kgem_bo_set_purgeable(kgem, bo)) { ++ } else if ((bo = kgem_bo_replace_io(bo))->reusable) { + kgem_bo_move_to_inactive(kgem, bo); + retired = true; + } else +@@ -2429,7 +2897,7 @@ void kgem_bo_undo(struct kgem *kgem, struct kgem_bo *bo) + DBG(("%s: only handle in batch, discarding last operations for handle=%d\n", + __FUNCTION__, bo->handle)); + +- assert(bo->exec == &kgem->exec[0]); ++ assert(bo->exec == &_kgem_dummy_exec || bo->exec == &kgem->exec[0]); + assert(kgem->exec[0].handle == bo->handle); + assert(RQ(bo->rq) == kgem->next_request); + +@@ -2457,16 +2925,23 @@ void kgem_bo_pair_undo(struct kgem *kgem, struct kgem_bo *a, struct kgem_bo *b) + + if (a == NULL || b == NULL) + return; ++ assert(a != b); + if (a->exec == NULL || b->exec == NULL) + return; + +- DBG(("%s: only handles in batch, discarding last operations for handle=%d and handle=%d\n", +- __FUNCTION__, a->handle, b->handle)); ++ DBG(("%s: only handles in batch, discarding last operations for handle=%d (index=%d) and handle=%d (index=%d)\n", ++ __FUNCTION__, ++ a->handle, a->proxy ? -1 : a->exec - kgem->exec, ++ b->handle, b->proxy ? -1 : b->exec - kgem->exec)); + +- assert(a->exec == &kgem->exec[0] || a->exec == &kgem->exec[1]); ++ assert(a->exec == &_kgem_dummy_exec || ++ a->exec == &kgem->exec[0] || ++ a->exec == &kgem->exec[1]); + assert(a->handle == kgem->exec[0].handle || a->handle == kgem->exec[1].handle); + assert(RQ(a->rq) == kgem->next_request); +- assert(b->exec == &kgem->exec[0] || b->exec == &kgem->exec[1]); ++ assert(b->exec == &_kgem_dummy_exec || ++ b->exec == &kgem->exec[0] || ++ b->exec == &kgem->exec[1]); + assert(b->handle == kgem->exec[0].handle || b->handle == kgem->exec[1].handle); + assert(RQ(b->rq) == kgem->next_request); + +@@ -2487,6 +2962,7 @@ static void __kgem_bo_destroy(struct kgem *kgem, struct kgem_bo *bo) + DBG(("%s: handle=%d, size=%d\n", __FUNCTION__, bo->handle, bytes(bo))); + + assert(list_is_empty(&bo->list)); ++ assert(list_is_empty(&bo->vma)); + assert(bo->refcnt == 0); + assert(bo->proxy == NULL); + assert(bo->active_scanout == 0); +@@ -2532,7 +3008,7 @@ static void __kgem_bo_destroy(struct kgem *kgem, struct kgem_bo *bo) + assert(bo->snoop == false); + assert(bo->io == false); + assert(bo->scanout == false); +- assert_cacheing(kgem, bo); ++ assert_caching(kgem, bo); + + kgem_bo_undo(kgem, bo); + assert(bo->refcnt == 0); +@@ -2556,9 +3032,6 @@ static void __kgem_bo_destroy(struct kgem *kgem, struct kgem_bo *bo) + assert(list_is_empty(&bo->request)); + + if (bo->map__cpu == NULL || bucket(bo) >= NUM_CACHE_BUCKETS) { +- if (!kgem_bo_set_purgeable(kgem, bo)) +- goto destroy; +- + if (!kgem->has_llc && bo->domain == DOMAIN_CPU) + goto destroy; + +@@ -2647,7 +3120,7 @@ static bool kgem_retire__flushing(struct kgem *kgem) + int count = 0; + list_for_each_entry(bo, &kgem->flushing, request) + count++; +- DBG(("%s: %d bo on flushing list\n", __FUNCTION__, count)); ++ DBG(("%s: %d bo on flushing list, retired? %d\n", __FUNCTION__, count, retired)); + } + #endif + +@@ -2656,6 +3129,34 @@ static bool kgem_retire__flushing(struct kgem *kgem) + return retired; + } + ++static bool __kgem_bo_flush(struct kgem *kgem, struct kgem_bo *bo) ++{ ++ struct drm_i915_gem_busy busy; ++ ++ if (!bo->needs_flush) ++ return false; ++ ++ bo->needs_flush = false; ++ ++ VG_CLEAR(busy); ++ busy.handle = bo->handle; ++ busy.busy = !kgem->wedged; ++ (void)do_ioctl(kgem->fd, DRM_IOCTL_I915_GEM_BUSY, &busy); ++ DBG(("%s: handle=%d, busy=%d, wedged=%d\n", ++ __FUNCTION__, bo->handle, busy.busy, kgem->wedged)); ++ ++ if (busy.busy == 0) ++ return false; ++ ++ DBG(("%s: moving %d to flushing\n", ++ __FUNCTION__, bo->handle)); ++ list_add(&bo->request, &kgem->flushing); ++ bo->rq = MAKE_REQUEST(kgem, !!(busy.busy & ~0x1ffff)); ++ bo->needs_flush = busy.busy & 0xffff; ++ kgem->need_retire = true; ++ return true; ++} ++ + static bool __kgem_retire_rq(struct kgem *kgem, struct kgem_request *rq) + { + bool retired = false; +@@ -2663,6 +3164,8 @@ static bool __kgem_retire_rq(struct kgem *kgem, struct kgem_request *rq) + DBG(("%s: request %d complete\n", + __FUNCTION__, rq->bo->handle)); + assert(RQ(rq->bo->rq) == rq); ++ assert(rq != (struct kgem_request *)kgem); ++ assert(rq != &kgem->static_request); + + if (rq == kgem->fence[rq->ring]) + kgem->fence[rq->ring] = NULL; +@@ -2680,19 +3183,14 @@ static bool __kgem_retire_rq(struct kgem *kgem, struct kgem_request *rq) + + list_del(&bo->request); + +- if (bo->needs_flush) +- bo->needs_flush = __kgem_busy(kgem, bo->handle); +- if (bo->needs_flush) { +- DBG(("%s: moving %d to flushing\n", ++ if (unlikely(__kgem_bo_flush(kgem, bo))) { ++ assert(bo != rq->bo); ++ DBG(("%s: movied %d to flushing\n", + __FUNCTION__, bo->handle)); +- list_add(&bo->request, &kgem->flushing); +- bo->rq = MAKE_REQUEST(kgem, RQ_RING(bo->rq)); +- kgem->need_retire = true; + continue; + } + + bo->domain = DOMAIN_NONE; +- bo->gtt_dirty = false; + bo->rq = NULL; + if (bo->refcnt) + continue; +@@ -2706,14 +3204,8 @@ static bool __kgem_retire_rq(struct kgem *kgem, struct kgem_request *rq) + assert(rq->bo->refcnt > 0); + + if (--rq->bo->refcnt == 0) { +- if (kgem_bo_set_purgeable(kgem, rq->bo)) { +- kgem_bo_move_to_inactive(kgem, rq->bo); +- retired = true; +- } else { +- DBG(("%s: closing %d\n", +- __FUNCTION__, rq->bo->handle)); +- kgem_bo_free(kgem, rq->bo); +- } ++ kgem_bo_move_to_inactive(kgem, rq->bo); ++ retired = true; + } + + __kgem_request_free(rq); +@@ -2724,13 +3216,18 @@ static bool kgem_retire__requests_ring(struct kgem *kgem, int ring) + { + bool retired = false; + ++ assert(ring < ARRAY_SIZE(kgem->requests)); + while (!list_is_empty(&kgem->requests[ring])) { + struct kgem_request *rq; + ++ DBG(("%s: retiring ring %d\n", __FUNCTION__, ring)); ++ + rq = list_first_entry(&kgem->requests[ring], + struct kgem_request, + list); + assert(rq->ring == ring); ++ assert(rq->bo); ++ assert(RQ(rq->bo->rq) == rq); + if (__kgem_busy(kgem, rq->bo->handle)) + break; + +@@ -2751,8 +3248,8 @@ static bool kgem_retire__requests_ring(struct kgem *kgem, int ring) + struct kgem_request, + list)->bo; + +- DBG(("%s: ring=%d, %d outstanding requests, oldest=%d\n", +- __FUNCTION__, ring, count, bo ? bo->handle : 0)); ++ DBG(("%s: ring=%d, %d outstanding requests, oldest=%d, retired? %d\n", ++ __FUNCTION__, ring, count, bo ? bo->handle : 0, retired)); + } + #endif + +@@ -2824,6 +3321,8 @@ bool __kgem_ring_is_idle(struct kgem *kgem, int ring) + rq = list_last_entry(&kgem->requests[ring], + struct kgem_request, list); + assert(rq->ring == ring); ++ assert(rq->bo); ++ assert(RQ(rq->bo->rq) == rq); + if (__kgem_busy(kgem, rq->bo->handle)) { + DBG(("%s: last requests handle=%d still busy\n", + __FUNCTION__, rq->bo->handle)); +@@ -2845,23 +3344,30 @@ bool __kgem_ring_is_idle(struct kgem *kgem, int ring) + return true; + } + +-void __kgem_retire_requests_upto(struct kgem *kgem, struct kgem_bo *bo) ++bool __kgem_retire_requests_upto(struct kgem *kgem, struct kgem_bo *bo) + { +- struct kgem_request *rq = bo->rq, *tmp; +- struct list *requests = &kgem->requests[RQ_RING(rq) == I915_EXEC_BLT]; ++ struct kgem_request * const rq = RQ(bo->rq), *tmp; ++ struct list *requests = &kgem->requests[rq->ring]; ++ ++ DBG(("%s(handle=%d, ring=%d)\n", __FUNCTION__, bo->handle, rq->ring)); + +- rq = RQ(rq); + assert(rq != &kgem->static_request); + if (rq == (struct kgem_request *)kgem) { + __kgem_bo_clear_busy(bo); +- return; ++ return false; + } + ++ assert(rq->ring < ARRAY_SIZE(kgem->requests)); + do { + tmp = list_first_entry(requests, struct kgem_request, list); + assert(tmp->ring == rq->ring); + __kgem_retire_rq(kgem, tmp); + } while (tmp != rq); ++ ++ assert(bo->needs_flush || bo->rq == NULL); ++ assert(bo->needs_flush || list_is_empty(&bo->request)); ++ assert(bo->needs_flush || bo->domain == DOMAIN_NONE); ++ return bo->rq; + } + + #if 0 +@@ -2932,6 +3438,7 @@ static void kgem_commit(struct kgem *kgem) + bo->binding.offset = 0; + bo->domain = DOMAIN_GPU; + bo->gpu_dirty = false; ++ bo->gtt_dirty = false; + + if (bo->proxy) { + /* proxies are not used for domain tracking */ +@@ -2955,6 +3462,23 @@ static void kgem_commit(struct kgem *kgem) + kgem_throttle(kgem); + } + ++ while (!list_is_empty(&rq->buffers)) { ++ bo = list_first_entry(&rq->buffers, ++ struct kgem_bo, ++ request); ++ ++ assert(RQ(bo->rq) == rq); ++ assert(bo->exec == NULL); ++ assert(bo->domain == DOMAIN_GPU); ++ ++ list_del(&bo->request); ++ bo->domain = DOMAIN_NONE; ++ bo->rq = NULL; ++ ++ if (bo->refcnt == 0) ++ _kgem_bo_destroy(kgem, bo); ++ } ++ + kgem_retire(kgem); + assert(list_is_empty(&rq->buffers)); + +@@ -2964,7 +3488,9 @@ static void kgem_commit(struct kgem *kgem) + gem_close(kgem->fd, rq->bo->handle); + kgem_cleanup_cache(kgem); + } else { ++ assert(rq != (struct kgem_request *)kgem); + assert(rq->ring < ARRAY_SIZE(kgem->requests)); ++ assert(rq->bo); + list_add_tail(&rq->list, &kgem->requests[rq->ring]); + kgem->need_throttle = kgem->need_retire = 1; + +@@ -2988,8 +3514,10 @@ static void kgem_close_inactive(struct kgem *kgem) + { + unsigned int i; + +- for (i = 0; i < ARRAY_SIZE(kgem->inactive); i++) ++ for (i = 0; i < ARRAY_SIZE(kgem->inactive); i++) { + kgem_close_list(kgem, &kgem->inactive[i]); ++ assert(list_is_empty(&kgem->inactive[i])); ++ } + } + + static void kgem_finish_buffers(struct kgem *kgem) +@@ -3079,10 +3607,13 @@ static void kgem_finish_buffers(struct kgem *kgem) + kgem->has_handle_lut ? bo->base.target_handle : shrink->handle; + for (n = 0; n < kgem->nreloc; n++) { + if (kgem->reloc[n].target_handle == bo->base.target_handle) { ++ uint64_t addr = (int)kgem->reloc[n].delta + shrink->presumed_offset; ++ kgem->batch[kgem->reloc[n].offset/sizeof(kgem->batch[0])] = addr; ++ if (kgem->gen >= 0100) ++ kgem->batch[kgem->reloc[n].offset/sizeof(kgem->batch[0]) + 1] = addr >> 32; ++ + kgem->reloc[n].target_handle = shrink->target_handle; + kgem->reloc[n].presumed_offset = shrink->presumed_offset; +- kgem->batch[kgem->reloc[n].offset/sizeof(kgem->batch[0])] = +- kgem->reloc[n].delta + shrink->presumed_offset; + } + } + +@@ -3124,10 +3655,13 @@ static void kgem_finish_buffers(struct kgem *kgem) + kgem->has_handle_lut ? bo->base.target_handle : shrink->handle; + for (n = 0; n < kgem->nreloc; n++) { + if (kgem->reloc[n].target_handle == bo->base.target_handle) { ++ uint64_t addr = (int)kgem->reloc[n].delta + shrink->presumed_offset; ++ kgem->batch[kgem->reloc[n].offset/sizeof(kgem->batch[0])] = addr; ++ if (kgem->gen >= 0100) ++ kgem->batch[kgem->reloc[n].offset/sizeof(kgem->batch[0]) + 1] = addr >> 32; ++ + kgem->reloc[n].target_handle = shrink->target_handle; + kgem->reloc[n].presumed_offset = shrink->presumed_offset; +- kgem->batch[kgem->reloc[n].offset/sizeof(kgem->batch[0])] = +- kgem->reloc[n].delta + shrink->presumed_offset; + } + } + +@@ -3195,6 +3729,9 @@ static void kgem_cleanup(struct kgem *kgem) + kgem_bo_free(kgem, bo); + } + ++ if (--rq->bo->refcnt == 0) ++ kgem_bo_free(kgem, rq->bo); ++ + __kgem_request_free(rq); + } + } +@@ -3210,7 +3747,9 @@ kgem_batch_write(struct kgem *kgem, + char *ptr; + int ret; + +- ASSERT_IDLE(kgem, bo->handle); ++ assert(bo->exec == NULL); ++ assert(bo->rq == NULL); ++ assert(!__kgem_busy(kgem, bo->handle)); + + #if DBG_NO_EXEC + { +@@ -3371,55 +3910,54 @@ static int compact_batch_surface(struct kgem *kgem, int *shrink) + return size * sizeof(uint32_t); + } + ++static struct kgem_bo *first_available(struct kgem *kgem, struct list *list) ++{ ++ struct kgem_bo *bo; ++ ++ list_for_each_entry(bo, list, list) { ++ assert(bo->refcnt > 0); ++ ++ if (bo->rq) { ++ assert(RQ(bo->rq)->bo == bo); ++ if (__kgem_busy(kgem, bo->handle)) ++ break; ++ ++ __kgem_retire_rq(kgem, RQ(bo->rq)); ++ assert(bo->rq == NULL); ++ } ++ ++ if (bo->refcnt > 1) ++ continue; ++ ++ list_move_tail(&bo->list, list); ++ return kgem_bo_reference(bo); ++ } ++ ++ return NULL; ++} ++ + static struct kgem_bo * + kgem_create_batch(struct kgem *kgem) + { +-#if !DBG_NO_SHRINK_BATCHES +- struct drm_i915_gem_set_domain set_domain; + struct kgem_bo *bo; +- int shrink = 0; +- int size; ++ int size, shrink = 0; + ++#if !DBG_NO_SHRINK_BATCHES + if (kgem->surface != kgem->batch_size) + size = compact_batch_surface(kgem, &shrink); + else + size = kgem->nbatch * sizeof(uint32_t); + + if (size <= 4096) { +- bo = list_first_entry(&kgem->pinned_batches[0], +- struct kgem_bo, +- list); +- if (!bo->rq) { +-out_4096: +- assert(bo->refcnt > 0); +- list_move_tail(&bo->list, &kgem->pinned_batches[0]); +- bo = kgem_bo_reference(bo); ++ bo = first_available(kgem, &kgem->pinned_batches[0]); ++ if (bo) + goto write; +- } +- +- if (!__kgem_busy(kgem, bo->handle)) { +- assert(RQ(bo->rq)->bo == bo); +- __kgem_retire_rq(kgem, RQ(bo->rq)); +- goto out_4096; +- } + } + +- if (size <= 16384) { +- bo = list_first_entry(&kgem->pinned_batches[1], +- struct kgem_bo, +- list); +- if (!bo->rq) { +-out_16384: +- assert(bo->refcnt > 0); +- list_move_tail(&bo->list, &kgem->pinned_batches[1]); +- bo = kgem_bo_reference(bo); +- goto write; +- } +- +- if (!__kgem_busy(kgem, bo->handle)) { +- __kgem_retire_rq(kgem, RQ(bo->rq)); +- goto out_16384; +- } ++ if (size <= 16384) { ++ bo = first_available(kgem, &kgem->pinned_batches[1]); ++ if (bo) ++ goto write; + } + + if (kgem->gen == 020) { +@@ -3443,16 +3981,8 @@ out_16384: + list_move_tail(&bo->list, &kgem->pinned_batches[size > 4096]); + + DBG(("%s: syncing due to busy batches\n", __FUNCTION__)); +- +- VG_CLEAR(set_domain); +- set_domain.handle = bo->handle; +- set_domain.read_domains = I915_GEM_DOMAIN_GTT; +- set_domain.write_domain = I915_GEM_DOMAIN_GTT; +- if (do_ioctl(kgem->fd, DRM_IOCTL_I915_GEM_SET_DOMAIN, &set_domain)) { +- DBG(("%s: sync: GPU hang detected\n", __FUNCTION__)); +- kgem_throttle(kgem); ++ if (kgem_bo_wait(kgem, bo)) + return NULL; +- } + + kgem_retire(kgem); + assert(bo->rq == NULL); +@@ -3460,9 +3990,14 @@ out_16384: + goto write; + } + } ++#else ++ if (kgem->surface != kgem->batch_size) ++ size = kgem->batch_size * sizeof(uint32_t); ++ else ++ size = kgem->nbatch * sizeof(uint32_t); ++#endif + +- bo = NULL; +- if (!kgem->has_llc) { ++ if (!kgem->batch_bo || !kgem->has_llc) { + bo = kgem_create_linear(kgem, size, CREATE_NO_THROTTLE); + if (bo) { + write: +@@ -3471,14 +4006,11 @@ write: + kgem_bo_destroy(kgem, bo); + return NULL; + } ++ return bo; + } + } +- if (bo == NULL) +- bo = kgem_new_batch(kgem); +- return bo; +-#else ++ + return kgem_new_batch(kgem); +-#endif + } + + #if !NDEBUG +@@ -3530,7 +4062,7 @@ static void dump_fence_regs(struct kgem *kgem) + + static int do_execbuf(struct kgem *kgem, struct drm_i915_gem_execbuffer2 *execbuf) + { +- int ret, err; ++ int ret; + + retry: + ret = do_ioctl(kgem->fd, DRM_IOCTL_I915_GEM_EXECBUFFER2, execbuf); +@@ -3547,26 +4079,25 @@ retry: + + /* last gasp */ + ret = do_ioctl(kgem->fd, DRM_IOCTL_I915_GEM_EXECBUFFER2, execbuf); +- if (ret == 0) +- return 0; ++ if (ret != -ENOSPC) ++ return ret; ++ ++ /* One final trick up our sleeve for when we run out of space. ++ * We turn everything off to free up our pinned framebuffers, ++ * sprites and cursors, and try just one more time. ++ */ + + xf86DrvMsg(kgem_get_screen_index(kgem), X_WARNING, + "Failed to submit rendering commands, trying again with outputs disabled.\n"); + +- /* One last trick up our sleeve for when we run out of space. +- * We turn everything off to free up our pinned framebuffers, +- * sprites and cursors, and try one last time. +- */ +- err = errno; +- if (sna_mode_disable(container_of(kgem, struct sna, kgem))) { ++ if (sna_mode_disable(__to_sna(kgem))) { + kgem_cleanup_cache(kgem); + ret = do_ioctl(kgem->fd, + DRM_IOCTL_I915_GEM_EXECBUFFER2, + execbuf); + DBG(("%s: last_gasp ret=%d\n", __FUNCTION__, ret)); +- sna_mode_enable(container_of(kgem, struct sna, kgem)); ++ sna_mode_enable(__to_sna(kgem)); + } +- errno = err; + + return ret; + } +@@ -3575,6 +4106,7 @@ void _kgem_submit(struct kgem *kgem) + { + struct kgem_request *rq; + uint32_t batch_end; ++ int i, ret; + + assert(!DBG_NO_HW); + assert(!kgem->wedged); +@@ -3609,7 +4141,6 @@ void _kgem_submit(struct kgem *kgem) + rq->bo = kgem_create_batch(kgem); + if (rq->bo) { + struct drm_i915_gem_execbuffer2 execbuf; +- int i, ret; + + assert(!rq->bo->needs_flush); + +@@ -3619,7 +4150,8 @@ void _kgem_submit(struct kgem *kgem) + kgem->exec[i].relocs_ptr = (uintptr_t)kgem->reloc; + kgem->exec[i].alignment = 0; + kgem->exec[i].offset = rq->bo->presumed_offset; +- kgem->exec[i].flags = 0; ++ /* Make sure the kernel releases any fence, ignored if gen4+ */ ++ kgem->exec[i].flags = EXEC_OBJECT_NEEDS_FENCE; + kgem->exec[i].rsvd1 = 0; + kgem->exec[i].rsvd2 = 0; + +@@ -3631,7 +4163,8 @@ void _kgem_submit(struct kgem *kgem) + memset(&execbuf, 0, sizeof(execbuf)); + execbuf.buffers_ptr = (uintptr_t)kgem->exec; + execbuf.buffer_count = kgem->nexec; +- execbuf.batch_len = batch_end*sizeof(uint32_t); ++ if (kgem->gen < 030) ++ execbuf.batch_len = batch_end*sizeof(uint32_t); + execbuf.flags = kgem->ring | kgem->batch_flags; + + if (DBG_DUMP) { +@@ -3645,91 +4178,98 @@ void _kgem_submit(struct kgem *kgem) + } + + ret = do_execbuf(kgem, &execbuf); +- if (DEBUG_SYNC && ret == 0) { +- struct drm_i915_gem_set_domain set_domain; +- +- VG_CLEAR(set_domain); +- set_domain.handle = rq->bo->handle; +- set_domain.read_domains = I915_GEM_DOMAIN_GTT; +- set_domain.write_domain = I915_GEM_DOMAIN_GTT; ++ } else ++ ret = -ENOMEM; + +- ret = do_ioctl(kgem->fd, DRM_IOCTL_I915_GEM_SET_DOMAIN, &set_domain); ++ if (ret < 0) { ++ kgem_throttle(kgem); ++ if (!kgem->wedged) { ++ xf86DrvMsg(kgem_get_screen_index(kgem), X_ERROR, ++ "Failed to submit rendering commands (%s), disabling acceleration.\n", ++ strerror(-ret)); ++ __kgem_set_wedged(kgem); + } +- if (ret < 0) { +- kgem_throttle(kgem); +- if (!kgem->wedged) { +- xf86DrvMsg(kgem_get_screen_index(kgem), X_ERROR, +- "Failed to submit rendering commands, disabling acceleration.\n"); +- __kgem_set_wedged(kgem); +- } + + #if !NDEBUG +- ErrorF("batch[%d/%d]: %d %d %d, nreloc=%d, nexec=%d, nfence=%d, aperture=%d, fenced=%d, high=%d,%d: errno=%d\n", +- kgem->mode, kgem->ring, batch_end, kgem->nbatch, kgem->surface, +- kgem->nreloc, kgem->nexec, kgem->nfence, kgem->aperture, kgem->aperture_fenced, kgem->aperture_high, kgem->aperture_total, -ret); ++ ErrorF("batch[%d/%d]: %d %d %d, nreloc=%d, nexec=%d, nfence=%d, aperture=%d, fenced=%d, high=%d,%d: errno=%d\n", ++ kgem->mode, kgem->ring, batch_end, kgem->nbatch, kgem->surface, ++ kgem->nreloc, kgem->nexec, kgem->nfence, kgem->aperture, kgem->aperture_fenced, kgem->aperture_high, kgem->aperture_total, -ret); + +- for (i = 0; i < kgem->nexec; i++) { +- struct kgem_bo *bo, *found = NULL; ++ for (i = 0; i < kgem->nexec; i++) { ++ struct kgem_bo *bo, *found = NULL; + +- list_for_each_entry(bo, &kgem->next_request->buffers, request) { +- if (bo->handle == kgem->exec[i].handle) { +- found = bo; +- break; +- } ++ list_for_each_entry(bo, &kgem->next_request->buffers, request) { ++ if (bo->handle == kgem->exec[i].handle) { ++ found = bo; ++ break; + } +- ErrorF("exec[%d] = handle:%d, presumed offset: %x, size: %d, tiling %d, fenced %d, snooped %d, deleted %d\n", +- i, +- kgem->exec[i].handle, +- (int)kgem->exec[i].offset, +- found ? kgem_bo_size(found) : -1, +- found ? found->tiling : -1, +- (int)(kgem->exec[i].flags & EXEC_OBJECT_NEEDS_FENCE), +- found ? found->snoop : -1, +- found ? found->purged : -1); + } +- for (i = 0; i < kgem->nreloc; i++) { +- ErrorF("reloc[%d] = pos:%d, target:%d, delta:%d, read:%x, write:%x, offset:%x\n", +- i, +- (int)kgem->reloc[i].offset, +- kgem->reloc[i].target_handle, +- kgem->reloc[i].delta, +- kgem->reloc[i].read_domains, +- kgem->reloc[i].write_domain, +- (int)kgem->reloc[i].presumed_offset); ++ ErrorF("exec[%d] = handle:%d, presumed offset: %x, size: %d, tiling %d, fenced %d, snooped %d, deleted %d\n", ++ i, ++ kgem->exec[i].handle, ++ (int)kgem->exec[i].offset, ++ found ? kgem_bo_size(found) : -1, ++ found ? found->tiling : -1, ++ (int)(kgem->exec[i].flags & EXEC_OBJECT_NEEDS_FENCE), ++ found ? found->snoop : -1, ++ found ? found->purged : -1); ++ } ++ for (i = 0; i < kgem->nreloc; i++) { ++ ErrorF("reloc[%d] = pos:%d, target:%d, delta:%d, read:%x, write:%x, offset:%x\n", ++ i, ++ (int)kgem->reloc[i].offset, ++ kgem->reloc[i].target_handle, ++ kgem->reloc[i].delta, ++ kgem->reloc[i].read_domains, ++ kgem->reloc[i].write_domain, ++ (int)kgem->reloc[i].presumed_offset); ++ } ++ ++ { ++ struct drm_i915_gem_get_aperture aperture; ++ if (do_ioctl(kgem->fd, DRM_IOCTL_I915_GEM_GET_APERTURE, &aperture) == 0) ++ ErrorF("Aperture size %lld, available %lld\n", ++ (long long)aperture.aper_size, ++ (long long)aperture.aper_available_size); ++ } ++ ++ if (ret == -ENOSPC) ++ dump_gtt_info(kgem); ++ if (ret == -EDEADLK) ++ dump_fence_regs(kgem); ++ ++ if (DEBUG_SYNC) { ++ int fd = open("/tmp/batchbuffer", O_WRONLY | O_CREAT | O_APPEND, 0666); ++ if (fd != -1) { ++ int ignored = write(fd, kgem->batch, batch_end*sizeof(uint32_t)); ++ assert(ignored == batch_end*sizeof(uint32_t)); ++ close(fd); + } + +- { +- struct drm_i915_gem_get_aperture aperture; +- if (do_ioctl(kgem->fd, DRM_IOCTL_I915_GEM_GET_APERTURE, &aperture) == 0) +- ErrorF("Aperture size %lld, available %lld\n", +- (long long)aperture.aper_size, +- (long long)aperture.aper_available_size); +- } ++ FatalError("SNA: failed to submit batchbuffer, errno=%d\n", -ret); ++ } ++#endif ++ } else { ++ if (DEBUG_SYNC) { ++ struct drm_i915_gem_set_domain set_domain; + +- if (ret == -ENOSPC) +- dump_gtt_info(kgem); +- if (ret == -EDEADLK) +- dump_fence_regs(kgem); +- +- if (DEBUG_SYNC) { +- int fd = open("/tmp/batchbuffer", O_WRONLY | O_CREAT | O_APPEND, 0666); +- if (fd != -1) { +- int ignored = write(fd, kgem->batch, batch_end*sizeof(uint32_t)); +- assert(ignored == batch_end*sizeof(uint32_t)); +- close(fd); +- } ++ VG_CLEAR(set_domain); ++ set_domain.handle = rq->bo->handle; ++ set_domain.read_domains = I915_GEM_DOMAIN_GTT; ++ set_domain.write_domain = I915_GEM_DOMAIN_GTT; + +- FatalError("SNA: failed to submit batchbuffer, errno=%d\n", -ret); +- } +-#endif ++ ret = do_ioctl(kgem->fd, DRM_IOCTL_I915_GEM_SET_DOMAIN, &set_domain); + } +- } ++ + #if SHOW_BATCH_AFTER +- if (gem_read(kgem->fd, rq->bo->handle, kgem->batch, 0, batch_end*sizeof(uint32_t)) == 0) +- __kgem_batch_debug(kgem, batch_end); ++ if (gem_read(kgem->fd, rq->bo->handle, kgem->batch, 0, batch_end*sizeof(uint32_t)) == 0) ++ __kgem_batch_debug(kgem, batch_end); + #endif +- kgem_commit(kgem); +- if (kgem->wedged) ++ ++ kgem_commit(kgem); ++ } ++ ++ if (unlikely(kgem->wedged)) + kgem_cleanup(kgem); + + kgem_reset(kgem); +@@ -3737,49 +4277,14 @@ void _kgem_submit(struct kgem *kgem) + assert(kgem->next_request != NULL); + } + +-static bool find_hang_state(struct kgem *kgem, char *path, int maxlen) +-{ +- int minor = kgem_get_minor(kgem); +- +- /* Search for our hang state in a few canonical locations. +- * In the unlikely event of having multiple devices, we +- * will need to check which minor actually corresponds to ours. +- */ +- +- snprintf(path, maxlen, "/sys/class/drm/card%d/error", minor); +- if (access(path, R_OK) == 0) +- return true; +- +- snprintf(path, maxlen, "/sys/kernel/debug/dri/%d/i915_error_state", minor); +- if (access(path, R_OK) == 0) +- return true; +- +- snprintf(path, maxlen, "/debug/dri/%d/i915_error_state", minor); +- if (access(path, R_OK) == 0) +- return true; +- +- path[0] = '\0'; +- return false; +-} +- + void kgem_throttle(struct kgem *kgem) + { +- if (kgem->wedged) ++ if (unlikely(kgem->wedged)) + return; + + if (__kgem_throttle(kgem, true)) { +- static int once; +- char path[128]; +- + xf86DrvMsg(kgem_get_screen_index(kgem), X_ERROR, + "Detected a hung GPU, disabling acceleration.\n"); +- if (!once && find_hang_state(kgem, path, sizeof(path))) { +- xf86DrvMsg(kgem_get_screen_index(kgem), X_ERROR, +- "When reporting this, please include %s and the full dmesg.\n", +- path); +- once = 1; +- } +- + __kgem_set_wedged(kgem); + kgem->need_throttle = false; + } +@@ -3860,7 +4365,8 @@ bool kgem_expire_cache(struct kgem *kgem) + bool idle; + unsigned int i; + +- time(&now); ++ if (!time(&now)) ++ return false; + + while (__kgem_freed_bo) { + bo = __kgem_freed_bo; +@@ -3875,7 +4381,7 @@ bool kgem_expire_cache(struct kgem *kgem) + } + + kgem_clean_large_cache(kgem); +- if (container_of(kgem, struct sna, kgem)->scrn->vtSema) ++ if (__to_sna(kgem)->scrn->vtSema) + kgem_clean_scanout_cache(kgem); + + expire = 0; +@@ -3885,6 +4391,7 @@ bool kgem_expire_cache(struct kgem *kgem) + break; + } + ++ assert(now); + bo->delta = now; + } + if (expire) { +@@ -3909,7 +4416,7 @@ bool kgem_expire_cache(struct kgem *kgem) + #endif + + kgem_retire(kgem); +- if (kgem->wedged) ++ if (unlikely(kgem->wedged)) + kgem_cleanup(kgem); + + kgem->expire(kgem); +@@ -3930,6 +4437,8 @@ bool kgem_expire_cache(struct kgem *kgem) + break; + } + ++ assert(now); ++ kgem_bo_set_purgeable(kgem, bo); + bo->delta = now; + } + } +@@ -3960,16 +4469,11 @@ bool kgem_expire_cache(struct kgem *kgem) + count++; + size += bytes(bo); + kgem_bo_free(kgem, bo); +- DBG(("%s: expiring %d\n", ++ DBG(("%s: expiring handle=%d\n", + __FUNCTION__, bo->handle)); + } + } +- if (!list_is_empty(&preserve)) { +- preserve.prev->next = kgem->inactive[i].next; +- kgem->inactive[i].next->prev = preserve.prev; +- kgem->inactive[i].next = preserve.next; +- preserve.next->prev = &kgem->inactive[i]; +- } ++ list_splice_tail(&preserve, &kgem->inactive[i]); + } + + #ifdef DEBUG_MEMORY +@@ -3998,31 +4502,30 @@ bool kgem_cleanup_cache(struct kgem *kgem) + unsigned int i; + int n; + ++ DBG(("%s\n", __FUNCTION__)); ++ + /* sync to the most recent request */ + for (n = 0; n < ARRAY_SIZE(kgem->requests); n++) { + if (!list_is_empty(&kgem->requests[n])) { + struct kgem_request *rq; +- struct drm_i915_gem_set_domain set_domain; + +- rq = list_first_entry(&kgem->requests[n], +- struct kgem_request, +- list); ++ rq = list_last_entry(&kgem->requests[n], ++ struct kgem_request, ++ list); + + DBG(("%s: sync on cleanup\n", __FUNCTION__)); +- +- VG_CLEAR(set_domain); +- set_domain.handle = rq->bo->handle; +- set_domain.read_domains = I915_GEM_DOMAIN_GTT; +- set_domain.write_domain = I915_GEM_DOMAIN_GTT; +- (void)do_ioctl(kgem->fd, +- DRM_IOCTL_I915_GEM_SET_DOMAIN, +- &set_domain); ++ assert(rq->ring == n); ++ assert(rq->bo); ++ assert(RQ(rq->bo->rq) == rq); ++ kgem_bo_wait(kgem, rq->bo); + } ++ assert(list_is_empty(&kgem->requests[n])); + } + + kgem_retire(kgem); + kgem_cleanup(kgem); + ++ DBG(("%s: need_expire?=%d\n", __FUNCTION__, kgem->need_expire)); + if (!kgem->need_expire) + return false; + +@@ -4049,6 +4552,8 @@ bool kgem_cleanup_cache(struct kgem *kgem) + + kgem->need_purge = false; + kgem->need_expire = false; ++ ++ DBG(("%s: complete\n", __FUNCTION__)); + return true; + } + +@@ -4079,16 +4584,15 @@ retry_large: + goto discard; + + if (bo->tiling != I915_TILING_NONE) { +- if (use_active) ++ if (use_active && kgem->gen < 040) + goto discard; + +- if (!gem_set_tiling(kgem->fd, bo->handle, ++ if (!kgem_set_tiling(kgem, bo, + I915_TILING_NONE, 0)) + goto discard; +- +- bo->tiling = I915_TILING_NONE; +- bo->pitch = 0; + } ++ assert(bo->tiling == I915_TILING_NONE); ++ bo->pitch = 0; + + if (bo->purged && !kgem_bo_clear_purgeable(kgem, bo)) + goto discard; +@@ -4169,17 +4673,17 @@ discard: + break; + } + +- if (I915_TILING_NONE != bo->tiling && +- !gem_set_tiling(kgem->fd, bo->handle, +- I915_TILING_NONE, 0)) +- continue; ++ if (!kgem_set_tiling(kgem, bo, I915_TILING_NONE, 0)) { ++ kgem_bo_free(kgem, bo); ++ break; ++ } + + kgem_bo_remove_from_inactive(kgem, bo); + assert(list_is_empty(&bo->vma)); + assert(list_is_empty(&bo->list)); + +- bo->tiling = I915_TILING_NONE; +- bo->pitch = 0; ++ assert(bo->tiling == I915_TILING_NONE); ++ assert(bo->pitch == 0); + bo->delta = 0; + DBG((" %s: found handle=%d (num_pages=%d) in linear vma cache\n", + __FUNCTION__, bo->handle, num_pages(bo))); +@@ -4225,13 +4729,13 @@ discard: + if (first) + continue; + +- if (!gem_set_tiling(kgem->fd, bo->handle, +- I915_TILING_NONE, 0)) +- continue; +- +- bo->tiling = I915_TILING_NONE; +- bo->pitch = 0; ++ if (!kgem_set_tiling(kgem, bo, I915_TILING_NONE, 0)) { ++ kgem_bo_free(kgem, bo); ++ break; ++ } + } ++ assert(bo->tiling == I915_TILING_NONE); ++ bo->pitch = 0; + + if (bo->map__gtt || bo->map__wc || bo->map__cpu) { + if (flags & (CREATE_CPU_MAP | CREATE_GTT_MAP)) { +@@ -4269,7 +4773,7 @@ discard: + kgem_bo_remove_from_inactive(kgem, bo); + + assert(bo->tiling == I915_TILING_NONE); +- bo->pitch = 0; ++ assert(bo->pitch == 0); + bo->delta = 0; + DBG((" %s: found handle=%d (num_pages=%d) in linear %s cache\n", + __FUNCTION__, bo->handle, num_pages(bo), +@@ -4340,9 +4844,9 @@ struct kgem_bo *kgem_create_for_name(struct kgem *kgem, uint32_t name) + + bo->unique_id = kgem_get_unique_id(kgem); + bo->tiling = tiling.tiling_mode; +- bo->reusable = false; + bo->prime = true; +- bo->purged = true; /* no coherency guarantees */ ++ bo->reusable = false; ++ kgem_bo_unclean(kgem, bo); + + debug_alloc__bo(kgem, bo); + return bo; +@@ -4448,6 +4952,8 @@ int kgem_bo_export_to_prime(struct kgem *kgem, struct kgem_bo *bo) + #if defined(DRM_IOCTL_PRIME_HANDLE_TO_FD) && defined(O_CLOEXEC) + struct drm_prime_handle args; + ++ assert(kgem_bo_is_fenced(kgem, bo)); ++ + VG_CLEAR(args); + args.handle = bo->handle; + args.flags = O_CLOEXEC; +@@ -4479,6 +4985,8 @@ struct kgem_bo *kgem_create_linear(struct kgem *kgem, int size, unsigned flags) + if ((flags & CREATE_UNCACHED) == 0) { + bo = search_linear_cache(kgem, size, CREATE_INACTIVE | flags); + if (bo) { ++ assert(!bo->purged); ++ assert(!bo->delta); + assert(bo->domain != DOMAIN_GPU); + ASSERT_IDLE(kgem, bo->handle); + bo->refcnt = 1; +@@ -4760,8 +5268,7 @@ static void __kgem_bo_make_scanout(struct kgem *kgem, + struct kgem_bo *bo, + int width, int height) + { +- ScrnInfoPtr scrn = +- container_of(kgem, struct sna, kgem)->scrn; ++ ScrnInfoPtr scrn = __to_sna(kgem)->scrn; + struct drm_mode_fb_cmd arg; + + assert(bo->proxy == NULL); +@@ -4809,6 +5316,48 @@ static void __kgem_bo_make_scanout(struct kgem *kgem, + } + } + ++static bool tiling_changed(struct kgem_bo *bo, int tiling, int pitch) ++{ ++ if (tiling != bo->tiling) ++ return true; ++ ++ return tiling != I915_TILING_NONE && pitch != bo->pitch; ++} ++ ++static void set_gpu_tiling(struct kgem *kgem, ++ struct kgem_bo *bo, ++ int tiling, int pitch) ++{ ++ DBG(("%s: handle=%d, tiling=%d, pitch=%d\n", ++ __FUNCTION__, bo->handle, tiling, pitch)); ++ ++ if (tiling_changed(bo, tiling, pitch) && bo->map__gtt) { ++ if (!list_is_empty(&bo->vma)) { ++ list_del(&bo->vma); ++ kgem->vma[0].count--; ++ } ++ munmap(bo->map__gtt, bytes(bo)); ++ bo->map__gtt = NULL; ++ } ++ ++ bo->tiling = tiling; ++ bo->pitch = pitch; ++} ++ ++bool kgem_bo_is_fenced(struct kgem *kgem, struct kgem_bo *bo) ++{ ++ struct drm_i915_gem_get_tiling tiling; ++ ++ assert(kgem); ++ assert(bo); ++ ++ VG_CLEAR(tiling); ++ tiling.handle = bo->handle; ++ tiling.tiling_mode = bo->tiling; ++ (void)do_ioctl(kgem->fd, DRM_IOCTL_I915_GEM_GET_TILING, &tiling); ++ return tiling.tiling_mode == bo->tiling; /* assume pitch is fine! */ ++} ++ + struct kgem_bo *kgem_create_2d(struct kgem *kgem, + int width, + int height, +@@ -4892,8 +5441,8 @@ struct kgem_bo *kgem_create_2d(struct kgem *kgem, + return last; + } + +- if (container_of(kgem, struct sna, kgem)->scrn->vtSema) { +- ScrnInfoPtr scrn = container_of(kgem, struct sna, kgem)->scrn; ++ if (__to_sna(kgem)->scrn->vtSema) { ++ ScrnInfoPtr scrn = __to_sna(kgem)->scrn; + + list_for_each_entry_reverse(bo, &kgem->scanout, list) { + struct drm_mode_fb_cmd arg; +@@ -4915,11 +5464,8 @@ struct kgem_bo *kgem_create_2d(struct kgem *kgem, + bo->delta = 0; + } + +- if (gem_set_tiling(kgem->fd, bo->handle, +- tiling, pitch)) { +- bo->tiling = tiling; +- bo->pitch = pitch; +- } else { ++ if (!kgem_set_tiling(kgem, bo, ++ tiling, pitch)) { + kgem_bo_free(kgem, bo); + break; + } +@@ -4950,6 +5496,9 @@ struct kgem_bo *kgem_create_2d(struct kgem *kgem, + } + } + ++ if (flags & CREATE_CACHED) ++ return NULL; ++ + bo = __kgem_bo_create_as_display(kgem, size, tiling, pitch); + if (bo) + return bo; +@@ -4987,14 +5536,9 @@ struct kgem_bo *kgem_create_2d(struct kgem *kgem, + if (num_pages(bo) < size) + continue; + +- if (bo->pitch != pitch || bo->tiling != tiling) { +- if (!gem_set_tiling(kgem->fd, bo->handle, +- tiling, pitch)) +- continue; +- +- bo->pitch = pitch; +- bo->tiling = tiling; +- } ++ if (!kgem_set_tiling(kgem, bo, tiling, pitch) && ++ !exact) ++ set_gpu_tiling(kgem, bo, tiling, pitch); + } + + kgem_bo_remove_from_active(kgem, bo); +@@ -5020,14 +5564,11 @@ large_inactive: + if (size > num_pages(bo)) + continue; + +- if (bo->tiling != tiling || +- (tiling != I915_TILING_NONE && bo->pitch != pitch)) { +- if (!gem_set_tiling(kgem->fd, bo->handle, +- tiling, pitch)) ++ if (!kgem_set_tiling(kgem, bo, tiling, pitch)) { ++ if (kgem->gen >= 040 && !exact) ++ set_gpu_tiling(kgem, bo, tiling, pitch); ++ else + continue; +- +- bo->tiling = tiling; +- bo->pitch = pitch; + } + + if (bo->purged && !kgem_bo_clear_purgeable(kgem, bo)) { +@@ -5039,7 +5580,6 @@ large_inactive: + + assert(bo->domain != DOMAIN_GPU); + bo->unique_id = kgem_get_unique_id(kgem); +- bo->pitch = pitch; + bo->delta = 0; + DBG((" 1:from large inactive: pitch=%d, tiling=%d, handle=%d, id=%d\n", + bo->pitch, bo->tiling, bo->handle, bo->unique_id)); +@@ -5088,14 +5628,13 @@ large_inactive: + if (bo->tiling != tiling || + (tiling != I915_TILING_NONE && bo->pitch != pitch)) { + if (bo->map__gtt || +- !gem_set_tiling(kgem->fd, bo->handle, +- tiling, pitch)) { ++ !kgem_set_tiling(kgem, bo, ++ tiling, pitch)) { + DBG(("inactive GTT vma with wrong tiling: %d < %d\n", + bo->tiling, tiling)); +- continue; ++ kgem_bo_free(kgem, bo); ++ break; + } +- bo->tiling = tiling; +- bo->pitch = pitch; + } + + if (bo->purged && !kgem_bo_clear_purgeable(kgem, bo)) { +@@ -5103,8 +5642,11 @@ large_inactive: + break; + } + ++ if (tiling == I915_TILING_NONE) ++ bo->pitch = pitch; ++ + assert(bo->tiling == tiling); +- bo->pitch = pitch; ++ assert(bo->pitch >= pitch); + bo->delta = 0; + bo->unique_id = kgem_get_unique_id(kgem); + +@@ -5170,15 +5712,12 @@ search_active: + if (num_pages(bo) < size) + continue; + +- if (bo->pitch != pitch) { +- if (!gem_set_tiling(kgem->fd, +- bo->handle, +- tiling, pitch)) +- continue; +- +- bo->pitch = pitch; +- } ++ if (!kgem_set_tiling(kgem, bo, tiling, pitch) && ++ !exact) ++ set_gpu_tiling(kgem, bo, tiling, pitch); + } ++ assert(bo->tiling == tiling); ++ assert(bo->pitch >= pitch); + + kgem_bo_remove_from_active(kgem, bo); + +@@ -5233,19 +5772,21 @@ search_active: + if (num_pages(bo) < size) + continue; + +- if (bo->tiling != tiling || +- (tiling != I915_TILING_NONE && bo->pitch != pitch)) { +- if (!gem_set_tiling(kgem->fd, +- bo->handle, +- tiling, pitch)) +- continue; ++ if (!kgem_set_tiling(kgem, bo, tiling, pitch)) { ++ if (kgem->gen >= 040 && !exact) { ++ set_gpu_tiling(kgem, bo, ++ tiling, pitch); ++ } else { ++ kgem_bo_free(kgem, bo); ++ break; ++ } + } ++ assert(bo->tiling == tiling); ++ assert(bo->pitch >= pitch); + + kgem_bo_remove_from_active(kgem, bo); + + bo->unique_id = kgem_get_unique_id(kgem); +- bo->pitch = pitch; +- bo->tiling = tiling; + bo->delta = 0; + DBG((" 1:from active: pitch=%d, tiling=%d, handle=%d, id=%d\n", + bo->pitch, bo->tiling, bo->handle, bo->unique_id)); +@@ -5323,11 +5864,13 @@ search_inactive: + continue; + } + +- if (bo->tiling != tiling || +- (tiling != I915_TILING_NONE && bo->pitch != pitch)) { +- if (!gem_set_tiling(kgem->fd, bo->handle, +- tiling, pitch)) +- continue; ++ if (!kgem_set_tiling(kgem, bo, tiling, pitch)) { ++ if (kgem->gen >= 040 && !exact) { ++ set_gpu_tiling(kgem, bo, tiling, pitch); ++ } else { ++ kgem_bo_free(kgem, bo); ++ break; ++ } + } + + if (bo->purged && !kgem_bo_clear_purgeable(kgem, bo)) { +@@ -5338,9 +5881,8 @@ search_inactive: + kgem_bo_remove_from_inactive(kgem, bo); + assert(list_is_empty(&bo->list)); + assert(list_is_empty(&bo->vma)); +- +- bo->pitch = pitch; +- bo->tiling = tiling; ++ assert(bo->tiling == tiling); ++ assert(bo->pitch >= pitch); + + bo->delta = 0; + bo->unique_id = kgem_get_unique_id(kgem); +@@ -5388,14 +5930,17 @@ search_inactive: + kgem_bo_remove_from_active(kgem, bo); + __kgem_bo_clear_busy(bo); + +- if (tiling != I915_TILING_NONE && bo->pitch != pitch) { +- if (!gem_set_tiling(kgem->fd, bo->handle, tiling, pitch)) { ++ if (!kgem_set_tiling(kgem, bo, tiling, pitch)) { ++ if (kgem->gen >= 040 && !exact) { ++ set_gpu_tiling(kgem, bo, tiling, pitch); ++ } else { + kgem_bo_free(kgem, bo); + goto no_retire; + } + } ++ assert(bo->tiling == tiling); ++ assert(bo->pitch >= pitch); + +- bo->pitch = pitch; + bo->unique_id = kgem_get_unique_id(kgem); + bo->delta = 0; + DBG((" 2:from active: pitch=%d, tiling=%d, handle=%d, id=%d\n", +@@ -5440,18 +5985,21 @@ create: + } + + bo->unique_id = kgem_get_unique_id(kgem); +- if (tiling == I915_TILING_NONE || +- gem_set_tiling(kgem->fd, handle, tiling, pitch)) { +- bo->tiling = tiling; +- bo->pitch = pitch; ++ if (kgem_set_tiling(kgem, bo, tiling, pitch)) { + if (flags & CREATE_SCANOUT) + __kgem_bo_make_scanout(kgem, bo, width, height); + } else { +- if (flags & CREATE_EXACT) { +- DBG(("%s: failed to set exact tiling (gem_set_tiling)\n", __FUNCTION__)); +- gem_close(kgem->fd, handle); +- free(bo); +- return NULL; ++ if (kgem->gen >= 040) { ++ assert(!kgem->can_fence); ++ bo->tiling = tiling; ++ bo->pitch = pitch; ++ } else { ++ if (flags & CREATE_EXACT) { ++ DBG(("%s: failed to set exact tiling (gem_set_tiling)\n", __FUNCTION__)); ++ gem_close(kgem->fd, handle); ++ free(bo); ++ return NULL; ++ } + } + } + +@@ -5608,7 +6156,7 @@ static void __kgem_flush(struct kgem *kgem, struct kgem_bo *bo) + + void kgem_scanout_flush(struct kgem *kgem, struct kgem_bo *bo) + { +- if (!bo->needs_flush) ++ if (!bo->needs_flush && !bo->gtt_dirty) + return; + + kgem_bo_submit(kgem, bo); +@@ -5621,18 +6169,24 @@ void kgem_scanout_flush(struct kgem *kgem, struct kgem_bo *bo) + if (bo->rq) + __kgem_flush(kgem, bo); + ++ if (bo->scanout && kgem->needs_dirtyfb) { ++ struct drm_mode_fb_dirty_cmd cmd; ++ memset(&cmd, 0, sizeof(cmd)); ++ cmd.fb_id = bo->delta; ++ (void)drmIoctl(kgem->fd, DRM_IOCTL_MODE_DIRTYFB, &cmd); ++ } ++ + /* Whatever actually happens, we can regard the GTT write domain + * as being flushed. + */ +- bo->gtt_dirty = false; +- bo->needs_flush = false; +- bo->domain = DOMAIN_NONE; ++ __kgem_bo_clear_dirty(bo); + } + + inline static bool nearly_idle(struct kgem *kgem) + { + int ring = kgem->ring == KGEM_BLT; + ++ assert(ring < ARRAY_SIZE(kgem->requests)); + if (list_is_singular(&kgem->requests[ring])) + return true; + +@@ -5720,7 +6274,7 @@ static inline bool kgem_flush(struct kgem *kgem, bool flush) + if (kgem->nreloc == 0) + return true; + +- if (container_of(kgem, struct sna, kgem)->flags & SNA_POWERSAVE) ++ if (__to_sna(kgem)->flags & SNA_POWERSAVE) + return true; + + if (kgem->flush == flush && kgem->aperture < kgem->aperture_low) +@@ -5982,6 +6536,55 @@ bool kgem_check_many_bo_fenced(struct kgem *kgem, ...) + return kgem_flush(kgem, flush); + } + ++void __kgem_bcs_set_tiling(struct kgem *kgem, ++ struct kgem_bo *src, ++ struct kgem_bo *dst) ++{ ++ uint32_t state, *b; ++ ++ DBG(("%s: src handle=%d:tiling=%d, dst handle=%d:tiling=%d\n", ++ __FUNCTION__, ++ src ? src->handle : 0, src ? src->tiling : 0, ++ dst ? dst->handle : 0, dst ? dst->tiling : 0)); ++ assert(kgem->mode == KGEM_BLT); ++ assert(dst == NULL || kgem_bo_can_blt(kgem, dst)); ++ assert(src == NULL || kgem_bo_can_blt(kgem, src)); ++ ++ state = 0; ++ if (dst && dst->tiling == I915_TILING_Y) ++ state |= BCS_DST_Y; ++ if (src && src->tiling == I915_TILING_Y) ++ state |= BCS_SRC_Y; ++ ++ if (kgem->bcs_state == state) ++ return; ++ ++ DBG(("%s: updating SWCTRL %x -> %x\n", __FUNCTION__, ++ kgem->bcs_state, state)); ++ ++ /* Over-estimate space in case we need to re-emit the cmd packet */ ++ if (!kgem_check_batch(kgem, 24)) { ++ _kgem_submit(kgem); ++ _kgem_set_mode(kgem, KGEM_BLT); ++ if (state == 0) ++ return; ++ } ++ ++ b = kgem->batch + kgem->nbatch; ++ if (kgem->nbatch) { ++ *b++ = MI_FLUSH_DW; ++ *b++ = 0; ++ *b++ = 0; ++ *b++ = 0; ++ } ++ *b++ = MI_LOAD_REGISTER_IMM; ++ *b++ = BCS_SWCTRL; ++ *b++ = (BCS_SRC_Y | BCS_DST_Y) << 16 | state; ++ kgem->nbatch = b - kgem->batch; ++ ++ kgem->bcs_state = state; ++} ++ + uint32_t kgem_add_reloc(struct kgem *kgem, + uint32_t pos, + struct kgem_bo *bo, +@@ -6195,12 +6798,6 @@ static void kgem_trim_vma_cache(struct kgem *kgem, int type, int bucket) + + list_del(&bo->vma); + kgem->vma[type].count--; +- +- if (!bo->purged && !kgem_bo_set_purgeable(kgem, bo)) { +- DBG(("%s: freeing unpurgeable old mapping\n", +- __FUNCTION__)); +- kgem_bo_free(kgem, bo); +- } + } + } + +@@ -6216,8 +6813,8 @@ static void *__kgem_bo_map__gtt_or_wc(struct kgem *kgem, struct kgem_bo *bo) + kgem_trim_vma_cache(kgem, MAP_GTT, bucket(bo)); + + if (bo->tiling || !kgem->has_wc_mmap) { +- assert(num_pages(bo) <= kgem->aperture_mappable / 2); + assert(kgem->gen != 021 || bo->tiling != I915_TILING_Y); ++ warn_unless(num_pages(bo) <= kgem->aperture_mappable / 2); + + ptr = bo->map__gtt; + if (ptr == NULL) +@@ -6291,6 +6888,7 @@ void *kgem_bo_map(struct kgem *kgem, struct kgem_bo *bo) + DBG(("%s: sync: GPU hang detected\n", __FUNCTION__)); + kgem_throttle(kgem); + } ++ bo->needs_flush = false; + kgem_bo_retire(kgem, bo); + bo->domain = DOMAIN_GTT; + bo->gtt_dirty = true; +@@ -6319,14 +6917,16 @@ void *kgem_bo_map__wc(struct kgem *kgem, struct kgem_bo *bo) + bo->handle, (long)bo->presumed_offset, bo->tiling, bo->map__gtt, bo->map__cpu, bo->domain)); + + assert(bo->proxy == NULL); +- assert(bo->exec == NULL); + assert(list_is_empty(&bo->list)); + assert_tiling(kgem, bo); + assert(!bo->purged || bo->reusable); + + if (bo->map__wc) + return bo->map__wc; ++ if (!kgem->has_wc_mmap) ++ return NULL; + ++ kgem_trim_vma_cache(kgem, MAP_GTT, bucket(bo)); + return __kgem_bo_map__wc(kgem, bo); + } + +@@ -6373,6 +6973,8 @@ uint32_t kgem_bo_flink(struct kgem *kgem, struct kgem_bo *bo) + { + struct drm_gem_flink flink; + ++ assert(kgem_bo_is_fenced(kgem, bo)); ++ + VG_CLEAR(flink); + flink.handle = bo->handle; + if (do_ioctl(kgem->fd, DRM_IOCTL_GEM_FLINK, &flink)) +@@ -6387,7 +6989,6 @@ uint32_t kgem_bo_flink(struct kgem *kgem, struct kgem_bo *bo) + * party, we track the lifetime accurately. + */ + bo->reusable = false; +- + kgem_bo_unclean(kgem, bo); + + return flink.name; +@@ -6411,16 +7012,34 @@ struct kgem_bo *kgem_create_map(struct kgem *kgem, + first_page = (uintptr_t)ptr; + last_page = first_page + size + PAGE_SIZE - 1; + +- first_page &= ~(PAGE_SIZE-1); +- last_page &= ~(PAGE_SIZE-1); ++ first_page &= ~(uintptr_t)(PAGE_SIZE-1); ++ last_page &= ~(uintptr_t)(PAGE_SIZE-1); + assert(last_page > first_page); + + handle = gem_userptr(kgem->fd, + (void *)first_page, last_page-first_page, + read_only); + if (handle == 0) { +- DBG(("%s: import failed, errno=%d\n", __FUNCTION__, errno)); +- return NULL; ++ if (read_only && kgem->has_wc_mmap) { ++ struct drm_i915_gem_set_domain set_domain; ++ ++ handle = gem_userptr(kgem->fd, ++ (void *)first_page, last_page-first_page, ++ false); ++ ++ VG_CLEAR(set_domain); ++ set_domain.handle = handle; ++ set_domain.read_domains = I915_GEM_DOMAIN_GTT; ++ set_domain.write_domain = 0; ++ if (do_ioctl(kgem->fd, DRM_IOCTL_I915_GEM_SET_DOMAIN, &set_domain)) { ++ gem_close(kgem->fd, handle); ++ handle = 0; ++ } ++ } ++ if (handle == 0) { ++ DBG(("%s: import failed, errno=%d\n", __FUNCTION__, errno)); ++ return NULL; ++ } + } + + bo = __kgem_bo_alloc(handle, (last_page - first_page) / PAGE_SIZE); +@@ -6483,8 +7102,10 @@ void kgem_bo_sync__cpu(struct kgem *kgem, struct kgem_bo *bo) + DBG(("%s: sync: GPU hang detected\n", __FUNCTION__)); + kgem_throttle(kgem); + } ++ bo->needs_flush = false; + kgem_bo_retire(kgem, bo); + bo->domain = DOMAIN_CPU; ++ bo->gtt_dirty = true; + } + } + +@@ -6505,6 +7126,9 @@ void kgem_bo_sync__cpu_full(struct kgem *kgem, struct kgem_bo *bo, bool write) + assert(bo->refcnt); + assert(!bo->purged); + ++ if (bo->rq == NULL && (kgem->has_llc || bo->snoop) && !write) ++ return; ++ + if (bo->domain != DOMAIN_CPU || FORCE_MMAP_SYNC & (1 << DOMAIN_CPU)) { + struct drm_i915_gem_set_domain set_domain; + +@@ -6522,9 +7146,11 @@ void kgem_bo_sync__cpu_full(struct kgem *kgem, struct kgem_bo *bo, bool write) + DBG(("%s: sync: GPU hang detected\n", __FUNCTION__)); + kgem_throttle(kgem); + } ++ bo->needs_flush = false; + if (write) { + kgem_bo_retire(kgem, bo); + bo->domain = DOMAIN_CPU; ++ bo->gtt_dirty = true; + } else { + if (bo->exec == NULL) + kgem_bo_maybe_retire(kgem, bo); +@@ -6539,6 +7165,7 @@ void kgem_bo_sync__gtt(struct kgem *kgem, struct kgem_bo *bo) + assert(bo->refcnt); + assert(bo->proxy == NULL); + assert_tiling(kgem, bo); ++ assert(!bo->snoop); + + kgem_bo_submit(kgem, bo); + +@@ -6559,6 +7186,7 @@ void kgem_bo_sync__gtt(struct kgem *kgem, struct kgem_bo *bo) + DBG(("%s: sync: GPU hang detected\n", __FUNCTION__)); + kgem_throttle(kgem); + } ++ bo->needs_flush = false; + kgem_bo_retire(kgem, bo); + bo->domain = DOMAIN_GTT; + bo->gtt_dirty = true; +@@ -7485,6 +8113,7 @@ kgem_replace_bo(struct kgem *kgem, + } + _kgem_set_mode(kgem, KGEM_BLT); + } ++ kgem_bcs_set_tiling(kgem, src, dst); + + br00 = XY_SRC_COPY_BLT_CMD; + br13 = pitch; +@@ -7553,6 +8182,9 @@ bool kgem_bo_convert_to_gpu(struct kgem *kgem, + __FUNCTION__, bo->handle, flags, __kgem_bo_is_busy(kgem, bo))); + assert(bo->tiling == I915_TILING_NONE); + ++ if (flags & (__MOVE_PRIME | __MOVE_SCANOUT)) ++ return false; ++ + if (kgem->has_llc) + return true; + +diff --git a/src/sna/kgem.h b/src/sna/kgem.h +index 2267bacf..08b4eb20 100644 +--- a/src/sna/kgem.h ++++ b/src/sna/kgem.h +@@ -42,6 +42,7 @@ struct kgem_bo { + #define RQ(rq) ((struct kgem_request *)((uintptr_t)(rq) & ~3)) + #define RQ_RING(rq) ((uintptr_t)(rq) & 3) + #define RQ_IS_BLT(rq) (RQ_RING(rq) == KGEM_BLT) ++#define RQ_IS_RENDER(rq) (RQ_RING(rq) == KGEM_RENDER) + #define MAKE_REQUEST(rq, ring) ((struct kgem_request *)((uintptr_t)(rq) | (ring))) + + struct drm_i915_gem_exec_object2 *exec; +@@ -103,7 +104,7 @@ struct kgem_request { + struct list list; + struct kgem_bo *bo; + struct list buffers; +- int ring; ++ unsigned ring; + }; + + enum { +@@ -112,6 +113,12 @@ enum { + NUM_MAP_TYPES, + }; + ++typedef void (*memcpy_box_func)(const void *src, void *dst, int bpp, ++ int32_t src_stride, int32_t dst_stride, ++ int16_t src_x, int16_t src_y, ++ int16_t dst_x, int16_t dst_y, ++ uint16_t width, uint16_t height); ++ + struct kgem { + unsigned wedged; + int fd; +@@ -157,6 +164,8 @@ struct kgem { + int16_t count; + } vma[NUM_MAP_TYPES]; + ++ uint32_t bcs_state; ++ + uint32_t batch_flags; + uint32_t batch_flags_base; + #define I915_EXEC_SECURE (1<<9) +@@ -186,9 +195,15 @@ struct kgem { + uint32_t has_no_reloc :1; + uint32_t has_handle_lut :1; + uint32_t has_wc_mmap :1; ++ uint32_t has_dirtyfb :1; + ++ uint32_t can_fence :1; + uint32_t can_blt_cpu :1; ++ uint32_t can_blt_y :1; + uint32_t can_render_y :1; ++ uint32_t can_scanout_y :1; ++ ++ uint32_t needs_dirtyfb :1; + + uint16_t fence_max; + uint16_t half_cpu_cache_pages; +@@ -203,16 +218,9 @@ struct kgem { + void (*retire)(struct kgem *kgem); + void (*expire)(struct kgem *kgem); + +- void (*memcpy_to_tiled_x)(const void *src, void *dst, int bpp, +- int32_t src_stride, int32_t dst_stride, +- int16_t src_x, int16_t src_y, +- int16_t dst_x, int16_t dst_y, +- uint16_t width, uint16_t height); +- void (*memcpy_from_tiled_x)(const void *src, void *dst, int bpp, +- int32_t src_stride, int32_t dst_stride, +- int16_t src_x, int16_t src_y, +- int16_t dst_x, int16_t dst_y, +- uint16_t width, uint16_t height); ++ memcpy_box_func memcpy_to_tiled_x; ++ memcpy_box_func memcpy_from_tiled_x; ++ memcpy_box_func memcpy_between_tiled_x; + + struct kgem_bo *batch_bo; + +@@ -230,7 +238,7 @@ struct kgem { + + #define KGEM_MAX_DEFERRED_VBO 16 + +-#define KGEM_BATCH_RESERVED 1 ++#define KGEM_BATCH_RESERVED 8 /* LRI(SWCTRL) + END */ + #define KGEM_RELOC_RESERVED (KGEM_MAX_DEFERRED_VBO) + #define KGEM_EXEC_RESERVED (1+KGEM_MAX_DEFERRED_VBO) + +@@ -317,6 +325,7 @@ bool kgem_bo_convert_to_gpu(struct kgem *kgem, + struct kgem_bo *bo, + unsigned flags); + ++bool kgem_bo_is_fenced(struct kgem *kgem, struct kgem_bo *bo); + uint32_t kgem_bo_get_binding(struct kgem_bo *bo, uint32_t format); + void kgem_bo_set_binding(struct kgem_bo *bo, uint32_t format, uint16_t offset); + +@@ -342,6 +351,11 @@ static inline bool kgem_ring_is_idle(struct kgem *kgem, int ring) + { + ring = ring == KGEM_BLT; + ++ if (kgem->needs_semaphore && ++ !list_is_empty(&kgem->requests[!ring]) && ++ !__kgem_ring_is_idle(kgem, !ring)) ++ return false; ++ + if (list_is_empty(&kgem->requests[ring])) + return true; + +@@ -390,6 +404,7 @@ void _kgem_bo_destroy(struct kgem *kgem, struct kgem_bo *bo); + static inline void kgem_bo_destroy(struct kgem *kgem, struct kgem_bo *bo) + { + assert(bo->refcnt); ++ assert(bo->refcnt > bo->active_scanout); + if (--bo->refcnt == 0) + _kgem_bo_destroy(kgem, bo); + } +@@ -400,13 +415,13 @@ static inline void kgem_set_mode(struct kgem *kgem, + enum kgem_mode mode, + struct kgem_bo *bo) + { +- assert(!kgem->wedged); ++ warn_unless(!kgem->wedged); + + #if DEBUG_FLUSH_BATCH + kgem_submit(kgem); + #endif + +- if (kgem->nreloc && bo->exec == NULL && kgem_ring_is_idle(kgem, kgem->ring)) { ++ if (kgem->nreloc && bo->rq == NULL && kgem_ring_is_idle(kgem, kgem->ring)) { + DBG(("%s: flushing before new bo\n", __FUNCTION__)); + _kgem_submit(kgem); + } +@@ -422,7 +437,7 @@ static inline void _kgem_set_mode(struct kgem *kgem, enum kgem_mode mode) + { + assert(kgem->mode == KGEM_NONE); + assert(kgem->nbatch == 0); +- assert(!kgem->wedged); ++ warn_unless(!kgem->wedged); + kgem->context_switch(kgem, mode); + kgem->mode = mode; + } +@@ -566,7 +581,7 @@ static inline bool kgem_bo_can_blt(struct kgem *kgem, + { + assert(bo->refcnt); + +- if (bo->tiling == I915_TILING_Y) { ++ if (bo->tiling == I915_TILING_Y && !kgem->can_blt_y) { + DBG(("%s: can not blt to handle=%d, tiling=Y\n", + __FUNCTION__, bo->handle)); + return false; +@@ -581,6 +596,22 @@ static inline bool kgem_bo_can_blt(struct kgem *kgem, + return kgem_bo_blt_pitch_is_ok(kgem, bo); + } + ++void __kgem_bcs_set_tiling(struct kgem *kgem, ++ struct kgem_bo *src, ++ struct kgem_bo *dst); ++ ++inline static void kgem_bcs_set_tiling(struct kgem *kgem, ++ struct kgem_bo *src, ++ struct kgem_bo *dst) ++{ ++ assert(kgem->mode == KGEM_BLT); ++ ++ if (!kgem->can_blt_y) ++ return; ++ ++ __kgem_bcs_set_tiling(kgem, src, dst); ++} ++ + static inline bool kgem_bo_is_snoop(struct kgem_bo *bo) + { + assert(bo->refcnt); +@@ -607,17 +638,24 @@ static inline void kgem_bo_mark_busy(struct kgem *kgem, struct kgem_bo *bo, int + } + } + +-inline static void __kgem_bo_clear_busy(struct kgem_bo *bo) ++static inline void __kgem_bo_clear_dirty(struct kgem_bo *bo) + { + DBG(("%s: handle=%d\n", __FUNCTION__, bo->handle)); +- bo->rq = NULL; +- list_del(&bo->request); + + bo->domain = DOMAIN_NONE; + bo->needs_flush = false; + bo->gtt_dirty = false; + } + ++inline static void __kgem_bo_clear_busy(struct kgem_bo *bo) ++{ ++ DBG(("%s: handle=%d\n", __FUNCTION__, bo->handle)); ++ bo->rq = NULL; ++ list_del(&bo->request); ++ ++ __kgem_bo_clear_dirty(bo); ++} ++ + static inline bool kgem_bo_is_busy(struct kgem_bo *bo) + { + DBG(("%s: handle=%d, domain: %d exec? %d, rq? %d\n", __FUNCTION__, +@@ -626,7 +664,7 @@ static inline bool kgem_bo_is_busy(struct kgem_bo *bo) + return bo->rq; + } + +-void __kgem_retire_requests_upto(struct kgem *kgem, struct kgem_bo *bo); ++bool __kgem_retire_requests_upto(struct kgem *kgem, struct kgem_bo *bo); + static inline bool __kgem_bo_is_busy(struct kgem *kgem, struct kgem_bo *bo) + { + DBG(("%s: handle=%d, domain: %d exec? %d, rq? %d\n", __FUNCTION__, +@@ -636,14 +674,13 @@ static inline bool __kgem_bo_is_busy(struct kgem *kgem, struct kgem_bo *bo) + if (bo->exec) + return true; + +- if (bo->rq && !__kgem_busy(kgem, bo->handle)) { +- __kgem_retire_requests_upto(kgem, bo); +- assert(list_is_empty(&bo->request)); +- assert(bo->rq == NULL); +- assert(bo->domain == DOMAIN_NONE); +- } ++ if (bo->rq == NULL) ++ return false; ++ ++ if (__kgem_busy(kgem, bo->handle)) ++ return true; + +- return kgem_bo_is_busy(bo); ++ return __kgem_retire_requests_upto(kgem, bo); + } + + static inline bool kgem_bo_is_render(struct kgem_bo *bo) +@@ -651,7 +688,15 @@ static inline bool kgem_bo_is_render(struct kgem_bo *bo) + DBG(("%s: handle=%d, rq? %d [%d]\n", __FUNCTION__, + bo->handle, bo->rq != NULL, (int)RQ_RING(bo->rq))); + assert(bo->refcnt); +- return bo->rq && RQ_RING(bo->rq) == I915_EXEC_RENDER; ++ return bo->rq && RQ_RING(bo->rq) != KGEM_BLT; ++} ++ ++static inline bool kgem_bo_is_blt(struct kgem_bo *bo) ++{ ++ DBG(("%s: handle=%d, rq? %d\n", __FUNCTION__, ++ bo->handle, bo->rq != NULL, (int)RQ_RING(bo->rq))); ++ assert(bo->refcnt); ++ return RQ_RING(bo->rq) == KGEM_BLT; + } + + static inline void kgem_bo_mark_unreusable(struct kgem_bo *bo) +@@ -852,6 +897,6 @@ memcpy_from_tiled_x(struct kgem *kgem, + width, height); + } + +-void choose_memcpy_tiled_x(struct kgem *kgem, int swizzling); ++void choose_memcpy_tiled_x(struct kgem *kgem, int swizzling, unsigned cpu); + + #endif /* KGEM_H */ +diff --git a/src/sna/kgem_debug_gen4.c b/src/sna/kgem_debug_gen4.c +index 9b80dc88..8e6e47b6 100644 +--- a/src/sna/kgem_debug_gen4.c ++++ b/src/sna/kgem_debug_gen4.c +@@ -598,7 +598,7 @@ int kgem_gen4_decode_3d(struct kgem *kgem, uint32_t offset) + assert(len == 7); + kgem_debug_print(data, offset, 0, + "3DSTATE_DEPTH_BUFFER\n"); +- kgem_debug_print(data, offset, 1, "%s, %s, pitch = %d bytes, %stiled, HiZ %d, Seperate Stencil %d\n", ++ kgem_debug_print(data, offset, 1, "%s, %s, pitch = %d bytes, %stiled, HiZ %d, Separate Stencil %d\n", + get_965_surfacetype(data[1] >> 29), + get_965_depthformat((data[1] >> 18) & 0x7), + (data[1] & 0x0001ffff) + 1, +diff --git a/src/sna/kgem_debug_gen5.c b/src/sna/kgem_debug_gen5.c +index 8b55dd91..f1b1275f 100644 +--- a/src/sna/kgem_debug_gen5.c ++++ b/src/sna/kgem_debug_gen5.c +@@ -573,7 +573,7 @@ int kgem_gen5_decode_3d(struct kgem *kgem, uint32_t offset) + assert(len == 7); + kgem_debug_print(data, offset, 0, + "3DSTATE_DEPTH_BUFFER\n"); +- kgem_debug_print(data, offset, 1, "%s, %s, pitch = %d bytes, %stiled, HiZ %d, Seperate Stencil %d\n", ++ kgem_debug_print(data, offset, 1, "%s, %s, pitch = %d bytes, %stiled, HiZ %d, Separate Stencil %d\n", + get_965_surfacetype(data[1] >> 29), + get_965_depthformat((data[1] >> 18) & 0x7), + (data[1] & 0x0001ffff) + 1, +diff --git a/src/sna/kgem_debug_gen6.c b/src/sna/kgem_debug_gen6.c +index 7ef55d38..579c5d54 100644 +--- a/src/sna/kgem_debug_gen6.c ++++ b/src/sna/kgem_debug_gen6.c +@@ -985,7 +985,7 @@ int kgem_gen6_decode_3d(struct kgem *kgem, uint32_t offset) + assert(len == 7); + kgem_debug_print(data, offset, 0, + "3DSTATE_DEPTH_BUFFER\n"); +- kgem_debug_print(data, offset, 1, "%s, %s, pitch = %d bytes, %stiled, HiZ %d, Seperate Stencil %d\n", ++ kgem_debug_print(data, offset, 1, "%s, %s, pitch = %d bytes, %stiled, HiZ %d, Separate Stencil %d\n", + get_965_surfacetype(data[1] >> 29), + get_965_depthformat((data[1] >> 18) & 0x7), + (data[1] & 0x0001ffff) + 1, +diff --git a/src/sna/sna.h b/src/sna/sna.h +index 18425e30..7861110a 100644 +--- a/src/sna/sna.h ++++ b/src/sna/sna.h +@@ -154,6 +154,8 @@ struct sna_pixmap { + #define MAPPED_GTT 1 + #define MAPPED_CPU 2 + uint8_t flush :2; ++#define FLUSH_READ 1 ++#define FLUSH_WRITE 2 + uint8_t shm :1; + uint8_t clear :1; + uint8_t header :1; +@@ -179,18 +181,31 @@ static inline WindowPtr get_root_window(ScreenPtr screen) + #endif + } + ++#if !NDEBUG ++static PixmapPtr check_pixmap(PixmapPtr pixmap) ++{ ++ if (pixmap != NULL) { ++ assert(pixmap->refcnt >= 1); ++ assert(pixmap->devKind != 0xdeadbeef); ++ } ++ return pixmap; ++} ++#else ++#define check_pixmap(p) p ++#endif ++ + static inline PixmapPtr get_window_pixmap(WindowPtr window) + { + assert(window); + assert(window->drawable.type != DRAWABLE_PIXMAP); +- return fbGetWindowPixmap(window); ++ return check_pixmap(fbGetWindowPixmap(window)); + } + + static inline PixmapPtr get_drawable_pixmap(DrawablePtr drawable) + { + assert(drawable); + if (drawable->type == DRAWABLE_PIXMAP) +- return (PixmapPtr)drawable; ++ return check_pixmap((PixmapPtr)drawable); + else + return get_window_pixmap((WindowPtr)drawable); + } +@@ -244,11 +259,12 @@ struct sna { + #define SNA_NO_VSYNC 0x40 + #define SNA_TRIPLE_BUFFER 0x80 + #define SNA_TEAR_FREE 0x100 +-#define SNA_FORCE_SHADOW 0x200 +-#define SNA_FLUSH_GTT 0x400 ++#define SNA_WANT_TEAR_FREE 0x200 ++#define SNA_FORCE_SHADOW 0x400 ++#define SNA_FLUSH_GTT 0x800 + #define SNA_PERFORMANCE 0x1000 + #define SNA_POWERSAVE 0x2000 +-#define SNA_REMOVE_OUTPUTS 0x4000 ++#define SNA_NO_DPMS 0x4000 + #define SNA_HAS_FLIP 0x10000 + #define SNA_HAS_ASYNC_FLIP 0x20000 + #define SNA_LINEAR_FB 0x40000 +@@ -265,7 +281,13 @@ struct sna { + #define AVX 0x80 + #define AVX2 0x100 + +- unsigned watch_flush; ++ bool ignore_copy_area : 1; ++ ++ unsigned watch_shm_flush; ++ unsigned watch_dri_flush; ++ unsigned damage_event; ++ bool needs_shm_flush; ++ bool needs_dri_flush; + + struct timeval timer_tv; + uint32_t timer_expire[NUM_TIMERS]; +@@ -284,9 +306,17 @@ struct sna { + struct kgem_bo *shadow; + unsigned front_active; + unsigned shadow_active; ++ unsigned rr_active; + unsigned flip_active; ++ unsigned hidden; ++ bool shadow_enabled; ++ bool shadow_wait; + bool dirty; + ++ struct drm_event_vblank *shadow_events; ++ int shadow_nevent; ++ int shadow_size; ++ + int max_crtc_width, max_crtc_height; + RegionRec shadow_region; + RegionRec shadow_cancel; +@@ -318,7 +348,8 @@ struct sna { + uint32_t fg, bg; + int size; + +- int active; ++ bool disable; ++ bool active; + int last_x; + int last_y; + +@@ -331,8 +362,9 @@ struct sna { + } cursor; + + struct sna_dri2 { +- bool available; +- bool open; ++ bool available : 1; ++ bool enable : 1; ++ bool open : 1; + + #if HAVE_DRI2 + void *flip_pending; +@@ -341,8 +373,11 @@ struct sna { + } dri2; + + struct sna_dri3 { +- bool available; +- bool open; ++ bool available :1; ++ bool override : 1; ++ bool enable : 1; ++ bool open :1; ++ + #if HAVE_DRI3 + SyncScreenCreateFenceFunc create_fence; + struct list pixmaps; +@@ -353,6 +388,9 @@ struct sna { + bool available; + bool open; + #if HAVE_PRESENT ++ struct list vblank_queue; ++ uint64_t unflip; ++ void *freed_info; + #endif + } present; + +@@ -364,8 +402,10 @@ struct sna { + EntityInfoPtr pEnt; + const struct intel_device_info *info; + ++#if !HAVE_NOTIFY_FD + ScreenBlockHandlerProcPtr BlockHandler; + ScreenWakeupHandlerProcPtr WakeupHandler; ++#endif + CloseScreenProcPtr CloseScreen; + + PicturePtr clear; +@@ -383,6 +423,7 @@ struct sna { + struct gen6_render_state gen6; + struct gen7_render_state gen7; + struct gen8_render_state gen8; ++ struct gen9_render_state gen9; + } render_state; + + /* Broken-out options. */ +@@ -420,7 +461,7 @@ bool sna_mode_pre_init(ScrnInfoPtr scrn, struct sna *sna); + bool sna_mode_fake_init(struct sna *sna, int num_fake); + bool sna_mode_wants_tear_free(struct sna *sna); + void sna_mode_adjust_frame(struct sna *sna, int x, int y); +-extern void sna_mode_discover(struct sna *sna); ++extern void sna_mode_discover(struct sna *sna, bool tell); + extern void sna_mode_check(struct sna *sna); + extern bool sna_mode_disable(struct sna *sna); + extern void sna_mode_enable(struct sna *sna); +@@ -434,6 +475,7 @@ extern void sna_shadow_unset_crtc(struct sna *sna, xf86CrtcPtr crtc); + extern bool sna_pixmap_discard_shadow_damage(struct sna_pixmap *priv, + const RegionRec *region); + extern void sna_mode_set_primary(struct sna *sna); ++extern bool sna_mode_find_hotplug_connector(struct sna *sna, unsigned id); + extern void sna_mode_close(struct sna *sna); + extern void sna_mode_fini(struct sna *sna); + +@@ -444,6 +486,7 @@ extern bool sna_cursors_init(ScreenPtr screen, struct sna *sna); + typedef void (*sna_flip_handler_t)(struct drm_event_vblank *e, + void *data); + ++extern bool sna_needs_page_flip(struct sna *sna, struct kgem_bo *bo); + extern int sna_page_flip(struct sna *sna, + struct kgem_bo *bo, + sna_flip_handler_t handler, +@@ -461,6 +504,11 @@ to_sna_from_screen(ScreenPtr screen) + return to_sna(xf86ScreenToScrn(screen)); + } + ++pure static inline ScreenPtr to_screen_from_sna(struct sna *sna) ++{ ++ return xf86ScrnToScreen(sna->scrn); ++} ++ + pure static inline struct sna * + to_sna_from_pixmap(PixmapPtr pixmap) + { +@@ -498,12 +546,11 @@ to_sna_from_kgem(struct kgem *kgem) + extern xf86CrtcPtr sna_covering_crtc(struct sna *sna, + const BoxRec *box, + xf86CrtcPtr desired); ++extern xf86CrtcPtr sna_primary_crtc(struct sna *sna); + + extern bool sna_wait_for_scanline(struct sna *sna, PixmapPtr pixmap, + xf86CrtcPtr crtc, const BoxRec *clip); + +-xf86CrtcPtr sna_mode_first_crtc(struct sna *sna); +- + const struct ust_msc { + uint64_t msc; + int tv_sec; +@@ -536,6 +583,11 @@ static inline uint64_t ust64(int tv_sec, int tv_usec) + return (uint64_t)tv_sec * 1000000 + tv_usec; + } + ++static inline uint64_t swap_ust(const struct ust_msc *swap) ++{ ++ return ust64(swap->tv_sec, swap->tv_usec); ++} ++ + #if HAVE_DRI2 + bool sna_dri2_open(struct sna *sna, ScreenPtr pScreen); + void sna_dri2_page_flip_handler(struct sna *sna, struct drm_event_vblank *event); +@@ -567,20 +619,59 @@ bool sna_present_open(struct sna *sna, ScreenPtr pScreen); + void sna_present_update(struct sna *sna); + void sna_present_close(struct sna *sna, ScreenPtr pScreen); + void sna_present_vblank_handler(struct drm_event_vblank *event); ++void sna_present_cancel_flip(struct sna *sna); + #else + static inline bool sna_present_open(struct sna *sna, ScreenPtr pScreen) { return false; } + static inline void sna_present_update(struct sna *sna) { } + static inline void sna_present_close(struct sna *sna, ScreenPtr pScreen) { } + static inline void sna_present_vblank_handler(struct drm_event_vblank *event) { } ++static inline void sna_present_cancel_flip(struct sna *sna) { } + #endif + +-extern bool sna_crtc_set_sprite_rotation(xf86CrtcPtr crtc, uint32_t rotation); +-extern int sna_crtc_to_pipe(xf86CrtcPtr crtc); +-extern uint32_t sna_crtc_to_sprite(xf86CrtcPtr crtc); +-extern uint32_t sna_crtc_id(xf86CrtcPtr crtc); +-extern bool sna_crtc_is_on(xf86CrtcPtr crtc); ++extern unsigned sna_crtc_count_sprites(xf86CrtcPtr crtc); ++extern bool sna_crtc_set_sprite_rotation(xf86CrtcPtr crtc, unsigned idx, uint32_t rotation); ++extern uint32_t sna_crtc_to_sprite(xf86CrtcPtr crtc, unsigned idx); + extern bool sna_crtc_is_transformed(xf86CrtcPtr crtc); + ++#define CRTC_VBLANK 0x3 ++#define CRTC_ON 0x80000000 ++ ++uint32_t sna_crtc_id(xf86CrtcPtr crtc); ++ ++static inline unsigned long *sna_crtc_flags(xf86CrtcPtr crtc) ++{ ++ unsigned long *flags = crtc->driver_private; ++ assert(flags); ++ return flags; ++} ++ ++static inline unsigned sna_crtc_pipe(xf86CrtcPtr crtc) ++{ ++ return *sna_crtc_flags(crtc) >> 8 & 0xff; ++} ++ ++static inline bool sna_crtc_is_on(xf86CrtcPtr crtc) ++{ ++ return *sna_crtc_flags(crtc) & CRTC_ON; ++} ++ ++static inline void sna_crtc_set_vblank(xf86CrtcPtr crtc) ++{ ++ assert((*sna_crtc_flags(crtc) & CRTC_VBLANK) < 3); ++ ++*sna_crtc_flags(crtc); ++} ++ ++static inline void sna_crtc_clear_vblank(xf86CrtcPtr crtc) ++{ ++ assert(*sna_crtc_flags(crtc) & CRTC_VBLANK); ++ --*sna_crtc_flags(crtc); ++} ++ ++static inline bool sna_crtc_has_vblank(xf86CrtcPtr crtc) ++{ ++ return *sna_crtc_flags(crtc) & CRTC_VBLANK; ++} ++ + CARD32 sna_format_for_depth(int depth); + CARD32 sna_render_format_for_depth(int depth); + +@@ -998,15 +1089,14 @@ static inline uint32_t pixmap_size(PixmapPtr pixmap) + + bool sna_accel_init(ScreenPtr sreen, struct sna *sna); + void sna_accel_create(struct sna *sna); +-void sna_accel_block_handler(struct sna *sna, struct timeval **tv); +-void sna_accel_wakeup_handler(struct sna *sna); +-void sna_accel_watch_flush(struct sna *sna, int enable); ++void sna_accel_block(struct sna *sna, struct timeval **tv); + void sna_accel_flush(struct sna *sna); + void sna_accel_enter(struct sna *sna); + void sna_accel_leave(struct sna *sna); + void sna_accel_close(struct sna *sna); + void sna_accel_free(struct sna *sna); + ++void sna_watch_flush(struct sna *sna, int enable); + void sna_copy_fbcon(struct sna *sna); + + bool sna_composite_create(struct sna *sna); +@@ -1127,6 +1217,16 @@ memcpy_blt(const void *src, void *dst, int bpp, + uint16_t width, uint16_t height); + + void ++affine_blt(const void *src, void *dst, int bpp, ++ int16_t src_x, int16_t src_y, ++ int16_t src_width, int16_t src_height, ++ int32_t src_stride, ++ int16_t dst_x, int16_t dst_y, ++ uint16_t dst_width, uint16_t dst_height, ++ int32_t dst_stride, ++ const struct pixman_f_transform *t); ++ ++void + memmove_box(const void *src, void *dst, + int bpp, int32_t stride, + const BoxRec *box, +@@ -1182,6 +1282,31 @@ box_intersect(BoxPtr a, const BoxRec *b) + return true; + } + ++const BoxRec * ++__find_clip_box_for_y(const BoxRec *begin, const BoxRec *end, int16_t y); ++inline static const BoxRec * ++find_clip_box_for_y(const BoxRec *begin, const BoxRec *end, int16_t y) ++{ ++ /* Special case for incremental trapezoid clipping */ ++ if (begin == end) ++ return end; ++ ++ /* Quick test if scanline is within range of clip boxes */ ++ if (begin->y2 > y) { ++ assert(end == begin + 1 || ++ __find_clip_box_for_y(begin, end, y) == begin); ++ return begin; ++ } ++ if (y >= end[-1].y2) { ++ assert(end == begin + 1 || ++ __find_clip_box_for_y(begin, end, y) == end); ++ return end; ++ } ++ ++ /* Otherwise bisect to find the first box crossing y */ ++ return __find_clip_box_for_y(begin, end, y); ++} ++ + unsigned sna_cpu_detect(void); + char *sna_cpu_features_to_string(unsigned features, char *line); + +@@ -1237,4 +1362,17 @@ static inline void sigtrap_put(void) + extern int getline(char **line, size_t *len, FILE *file); + #endif + ++static inline void add_shm_flush(struct sna *sna, struct sna_pixmap *priv) ++{ ++ if (!priv->shm) ++ return; ++ ++ DBG(("%s: marking handle=%d for SHM flush\n", ++ __FUNCTION__, priv->cpu_bo->handle)); ++ ++ assert(!priv->flush); ++ sna_add_flush_pixmap(sna, priv, priv->cpu_bo); ++ sna->needs_shm_flush = true; ++} ++ + #endif /* _SNA_H */ +diff --git a/src/sna/sna_accel.c b/src/sna/sna_accel.c +index baf5f609..25a075cf 100644 +--- a/src/sna/sna_accel.c ++++ b/src/sna/sna_accel.c +@@ -50,8 +50,11 @@ + #endif + #include <shmint.h> + ++#include <X11/extensions/damageproto.h> ++ + #include <sys/time.h> + #include <sys/mman.h> ++#include <sys/ioctl.h> + #include <unistd.h> + + #ifdef HAVE_VALGRIND +@@ -66,7 +69,8 @@ + #define FORCE_FLUSH 0 + #define FORCE_FULL_SYNC 0 /* https://bugs.freedesktop.org/show_bug.cgi?id=61628 */ + +-#define DEFAULT_TILING I915_TILING_X ++#define DEFAULT_PIXMAP_TILING I915_TILING_X ++#define DEFAULT_SCANOUT_TILING I915_TILING_X + + #define USE_INPLACE 1 + #define USE_SPANS 0 /* -1 force CPU, 1 force GPU */ +@@ -115,6 +119,11 @@ + #define RECTILINEAR 0x4 + #define OVERWRITES 0x8 + ++#if XFONT2_CLIENT_FUNCS_VERSION >= 1 ++#define AllocateFontPrivateIndex() xfont2_allocate_font_private_index() ++#define FontSetPrivate(font, idx, data) xfont2_font_set_private(font, idx, data) ++#endif ++ + #if 0 + static void __sna_fallback_flush(DrawablePtr d) + { +@@ -213,6 +222,7 @@ static GCOps sna_gc_ops__tmp; + static const GCFuncs sna_gc_funcs; + static const GCFuncs sna_gc_funcs__cpu; + ++static void sna_shm_watch_flush(struct sna *sna, int enable); + static void + sna_poly_fill_rect__gpu(DrawablePtr draw, GCPtr gc, int n, xRectangle *rect); + +@@ -527,10 +537,10 @@ sna_pixmap_alloc_cpu(struct sna *sna, + DBG(("%s: allocating CPU buffer (%dx%d)\n", __FUNCTION__, + pixmap->drawable.width, pixmap->drawable.height)); + +- hint = 0; +- if ((flags & MOVE_ASYNC_HINT) == 0 && +- ((flags & MOVE_READ) == 0 || (priv->gpu_damage && !priv->clear && !sna->kgem.has_llc))) +- hint = CREATE_CPU_MAP | CREATE_INACTIVE | CREATE_NO_THROTTLE; ++ hint = CREATE_CPU_MAP | CREATE_INACTIVE | CREATE_NO_THROTTLE; ++ if ((flags & MOVE_ASYNC_HINT) || ++ (priv->gpu_damage && !priv->clear && kgem_bo_is_busy(priv->gpu_bo) && sna->kgem.can_blt_cpu)) ++ hint = 0; + + priv->cpu_bo = kgem_create_cpu_2d(&sna->kgem, + pixmap->drawable.width, +@@ -580,7 +590,7 @@ static void __sna_pixmap_free_cpu(struct sna *sna, struct sna_pixmap *priv) + if (priv->cpu_bo->flush) { + assert(!priv->cpu_bo->reusable); + kgem_bo_sync__cpu(&sna->kgem, priv->cpu_bo); +- sna_accel_watch_flush(sna, -1); ++ sna_shm_watch_flush(sna, -1); + } + kgem_bo_destroy(&sna->kgem, priv->cpu_bo); + } else if (!IS_STATIC_PTR(priv->ptr)) +@@ -612,9 +622,9 @@ static bool sna_pixmap_free_cpu(struct sna *sna, struct sna_pixmap *priv, bool a + + static inline uint32_t default_tiling(struct sna *sna, PixmapPtr pixmap) + { +-#if DEFAULT_TILING == I915_TILING_NONE ++#if DEFAULT_PIXMAP_TILING == I915_TILING_NONE + return I915_TILING_NONE; +-#elif DEFAULT_TILING == I915_TILING_X ++#elif DEFAULT_PIXMAP_TILING == I915_TILING_X + return I915_TILING_X; + #else + /* Try to avoid hitting the Y-tiling GTT mapping bug on 855GM */ +@@ -630,15 +640,6 @@ static inline uint32_t default_tiling(struct sna *sna, PixmapPtr pixmap) + pixmap->drawable.height > sna->render.max_3d_size)) + return I915_TILING_X; + +- if (sna_damage_is_all(&sna_pixmap(pixmap)->cpu_damage, +- pixmap->drawable.width, +- pixmap->drawable.height)) { +- DBG(("%s: entire source is damaged, using Y-tiling\n", +- __FUNCTION__)); +- sna_damage_destroy(&sna_pixmap(priv)->gpu_damage); +- return I915_TILING_Y; +- } +- + return I915_TILING_Y; + #endif + } +@@ -666,6 +667,7 @@ struct kgem_bo *sna_pixmap_change_tiling(PixmapPtr pixmap, uint32_t tiling) + __FUNCTION__, priv->gpu_bo->tiling, tiling, + pixmap->drawable.width, pixmap->drawable.height)); + assert(priv->gpu_damage == NULL || priv->gpu_bo); ++ assert(priv->gpu_bo->tiling != tiling); + + if (priv->pinned) { + DBG(("%s: can't convert pinned bo\n", __FUNCTION__)); +@@ -690,6 +692,12 @@ struct kgem_bo *sna_pixmap_change_tiling(PixmapPtr pixmap, uint32_t tiling) + return NULL; + } + ++ if (bo->tiling == priv->gpu_bo->tiling) { ++ DBG(("%s: tiling request failed\n", __FUNCTION__)); ++ kgem_bo_destroy(&sna->kgem, bo); ++ return NULL; ++ } ++ + box.x1 = box.y1 = 0; + box.x2 = pixmap->drawable.width; + box.y2 = pixmap->drawable.height; +@@ -824,8 +832,8 @@ create_pixmap(struct sna *sna, ScreenPtr screen, + datasize += adjust; + } + +- DBG(("%s: allocating pixmap %dx%d, depth=%d, size=%ld\n", +- __FUNCTION__, width, height, depth, (long)datasize)); ++ DBG(("%s: allocating pixmap %dx%d, depth=%d/%d, size=%ld\n", ++ __FUNCTION__, width, height, depth, bpp, (long)datasize)); + pixmap = AllocatePixmap(screen, datasize); + if (!pixmap) + return NullPixmap; +@@ -878,7 +886,11 @@ __pop_freed_pixmap(struct sna *sna) + pixmap = sna->freed_pixmap; + sna->freed_pixmap = pixmap->devPrivate.ptr; + ++ DBG(("%s: reusing freed pixmap=%ld header\n", ++ __FUNCTION__, pixmap->drawable.serialNumber)); ++ + assert(pixmap->refcnt == 0); ++ assert(pixmap->devKind = 0xdeadbeef); + assert(sna_pixmap(pixmap)); + assert(sna_pixmap(pixmap)->header); + +@@ -990,7 +1002,7 @@ fallback: + } + priv->cpu_bo->pitch = pitch; + kgem_bo_mark_unreusable(priv->cpu_bo); +- sna_accel_watch_flush(sna, 1); ++ sna_shm_watch_flush(sna, 1); + #ifdef DEBUG_MEMORY + sna->debug_memory.cpu_bo_allocs++; + sna->debug_memory.cpu_bo_bytes += kgem_bo_size(priv->cpu_bo); +@@ -1081,6 +1093,18 @@ sna_pixmap_create_scratch(ScreenPtr screen, + return pixmap; + } + ++static unsigned small_copy(const RegionRec *region) ++{ ++ if ((region->extents.x2 - region->extents.x1)*(region->extents.y2 - region->extents.y1) < 1024) { ++ DBG(("%s: region:%dx%d\n", __FUNCTION__, ++ (region->extents.x2 - region->extents.x1), ++ (region->extents.y2 - region->extents.y1))); ++ return COPY_SMALL; ++ } ++ ++ return 0; ++} ++ + #ifdef CREATE_PIXMAP_USAGE_SHARED + static Bool + sna_share_pixmap_backing(PixmapPtr pixmap, ScreenPtr slave, void **fd_handle) +@@ -1124,7 +1148,7 @@ sna_share_pixmap_backing(PixmapPtr pixmap, ScreenPtr slave, void **fd_handle) + pixmap->drawable.height, + pixmap->drawable.bitsPerPixel, + I915_TILING_NONE, +- CREATE_GTT_MAP | CREATE_PRIME | CREATE_EXACT); ++ CREATE_GTT_MAP | CREATE_SCANOUT | CREATE_PRIME | CREATE_EXACT); + if (bo == NULL) { + DBG(("%s: allocation failed\n", __FUNCTION__)); + return FALSE; +@@ -1243,7 +1267,7 @@ sna_create_pixmap_shared(struct sna *sna, ScreenPtr screen, + width, height, + pixmap->drawable.bitsPerPixel, + I915_TILING_NONE, +- CREATE_GTT_MAP | CREATE_PRIME | CREATE_EXACT); ++ CREATE_GTT_MAP | CREATE_SCANOUT | CREATE_PRIME | CREATE_EXACT); + if (priv->gpu_bo == NULL) { + free(priv); + FreePixmap(pixmap); +@@ -1311,7 +1335,7 @@ static PixmapPtr sna_create_pixmap(ScreenPtr screen, + + if (unlikely((sna->render.prefer_gpu & PREFER_GPU_RENDER) == 0)) + flags &= ~KGEM_CAN_CREATE_GPU; +- if (wedged(sna)) ++ if (wedged(sna) && usage != SNA_CREATE_FB) + flags &= ~KGEM_CAN_CREATE_GTT; + + DBG(("%s: usage=%d, flags=%x\n", __FUNCTION__, usage, flags)); +@@ -1417,10 +1441,13 @@ static void __sna_free_pixmap(struct sna *sna, + __sna_pixmap_free_cpu(sna, priv); + + if (priv->flush) +- sna_accel_watch_flush(sna, -1); ++ sna_watch_flush(sna, -1); + ++#if !NDEBUG ++ pixmap->devKind = 0xdeadbeef; ++#endif + if (priv->header) { +- assert(pixmap->drawable.pScreen == sna->scrn->pScreen); ++ assert(pixmap->drawable.pScreen == to_screen_from_sna(sna)); + assert(!priv->shm); + pixmap->devPrivate.ptr = sna->freed_pixmap; + sna->freed_pixmap = pixmap; +@@ -1485,7 +1512,7 @@ static Bool sna_destroy_pixmap(PixmapPtr pixmap) + if (priv->shm && kgem_bo_is_busy(priv->cpu_bo)) { + DBG(("%s: deferring release of active SHM pixmap=%ld\n", + __FUNCTION__, pixmap->drawable.serialNumber)); +- sna_add_flush_pixmap(sna, priv, priv->cpu_bo); ++ add_shm_flush(sna, priv); + kgem_bo_submit(&sna->kgem, priv->cpu_bo); /* XXX ShmDetach */ + } else + __sna_free_pixmap(sna, pixmap, priv); +@@ -1529,7 +1556,7 @@ static inline bool has_coherent_ptr(struct sna *sna, struct sna_pixmap *priv, un + if (!priv->cpu_bo) + return true; + +- assert(!priv->cpu_bo->needs_flush); ++ assert(!priv->cpu_bo->needs_flush || (flags & MOVE_WRITE) == 0); + assert(priv->pixmap->devKind == priv->cpu_bo->pitch); + return priv->pixmap->devPrivate.ptr == MAP(priv->cpu_bo->map__cpu); + } +@@ -1557,6 +1584,11 @@ static inline bool has_coherent_ptr(struct sna *sna, struct sna_pixmap *priv, un + return true; + } + ++ if (priv->pixmap->devPrivate.ptr == MAP(priv->gpu_bo->map__wc)) { ++ assert(priv->mapped == MAPPED_GTT); ++ return true; ++ } ++ + return false; + } + +@@ -1577,6 +1609,16 @@ static inline bool pixmap_inplace(struct sna *sna, + return false; + + if (priv->gpu_bo && kgem_bo_is_busy(priv->gpu_bo)) { ++ if (priv->clear) { ++ DBG(("%s: no, clear GPU bo is busy\n", __FUNCTION__)); ++ return false; ++ } ++ ++ if (flags & MOVE_ASYNC_HINT) { ++ DBG(("%s: no, async hint and GPU bo is busy\n", __FUNCTION__)); ++ return false; ++ } ++ + if ((flags & (MOVE_WRITE | MOVE_READ)) == (MOVE_WRITE | MOVE_READ)) { + DBG(("%s: no, GPU bo is busy\n", __FUNCTION__)); + return false; +@@ -1624,7 +1666,7 @@ static bool sna_pixmap_alloc_gpu(struct sna *sna, + if (pixmap->usage_hint == SNA_CREATE_FB && (sna->flags & SNA_LINEAR_FB) == 0) { + flags |= CREATE_SCANOUT; + tiling = kgem_choose_tiling(&sna->kgem, +- -I915_TILING_X, ++ -DEFAULT_SCANOUT_TILING, + pixmap->drawable.width, + pixmap->drawable.height, + pixmap->drawable.bitsPerPixel); +@@ -1861,7 +1903,9 @@ sna_pixmap_undo_cow(struct sna *sna, struct sna_pixmap *priv, unsigned flags) + assert(priv->gpu_bo == cow->bo); + assert(cow->refcnt); + +- if (flags && (flags & MOVE_WRITE) == 0 && IS_COW_OWNER(priv->cow)) ++ if (flags && /* flags == 0 => force decouple */ ++ (flags & MOVE_WRITE) == 0 && ++ (((flags & __MOVE_FORCE) == 0) || IS_COW_OWNER(priv->cow))) + return true; + + if (!IS_COW_OWNER(priv->cow)) +@@ -1933,7 +1977,7 @@ sna_pixmap_undo_cow(struct sna *sna, struct sna_pixmap *priv, unsigned flags) + box.y2 = pixmap->drawable.height; + + if (flags & __MOVE_PRIME) { +- create = CREATE_GTT_MAP | CREATE_PRIME | CREATE_EXACT; ++ create = CREATE_GTT_MAP | CREATE_SCANOUT | CREATE_PRIME | CREATE_EXACT; + tiling = I915_TILING_NONE; + } else { + create = 0; +@@ -2021,6 +2065,10 @@ sna_pixmap_make_cow(struct sna *sna, + cow->bo->handle)); + + src_priv->cow = MAKE_COW_OWNER(cow); ++ if (src_priv->flush & FLUSH_WRITE) { ++ assert(!src_priv->shm); ++ sna_add_flush_pixmap(sna, src_priv, src_priv->gpu_bo); ++ } + } + + if (cow == COW(dst_priv->cow)) { +@@ -2267,6 +2315,7 @@ skip_inplace_map: + (flags & MOVE_WRITE ? (void *)priv->gpu_bo : (void *)priv->gpu_damage) && priv->cpu_damage == NULL && + priv->gpu_bo->tiling == I915_TILING_NONE && + (flags & MOVE_READ || kgem_bo_can_map__cpu(&sna->kgem, priv->gpu_bo, flags & MOVE_WRITE)) && ++ (!priv->clear || !kgem_bo_is_busy(priv->gpu_bo)) && + ((flags & (MOVE_WRITE | MOVE_ASYNC_HINT)) == 0 || + (!priv->cow && !priv->move_to_gpu && !__kgem_bo_is_busy(&sna->kgem, priv->gpu_bo)))) { + void *ptr; +@@ -2330,7 +2379,9 @@ skip_inplace_map: + pixmap->devKind, pixmap->devKind * pixmap->drawable.height)); + + if (priv->cpu_bo) { ++ kgem_bo_undo(&sna->kgem, priv->cpu_bo); + if ((flags & MOVE_ASYNC_HINT || priv->cpu_bo->exec) && ++ sna->kgem.can_blt_cpu && + sna->render.fill_one(sna, + pixmap, priv->cpu_bo, priv->clear_color, + 0, 0, +@@ -2344,21 +2395,26 @@ skip_inplace_map: + assert(pixmap->devPrivate.ptr == MAP(priv->cpu_bo->map__cpu)); + } + +- assert(pixmap->devKind); +- if (priv->clear_color == 0 || +- pixmap->drawable.bitsPerPixel == 8 || +- priv->clear_color == (1 << pixmap->drawable.depth) - 1) { +- memset(pixmap->devPrivate.ptr, priv->clear_color, +- (size_t)pixmap->devKind * pixmap->drawable.height); +- } else { +- pixman_fill(pixmap->devPrivate.ptr, +- pixmap->devKind/sizeof(uint32_t), +- pixmap->drawable.bitsPerPixel, +- 0, 0, +- pixmap->drawable.width, +- pixmap->drawable.height, +- priv->clear_color); +- } ++ if (sigtrap_get() == 0) { ++ assert(pixmap->devKind); ++ sigtrap_assert_active(); ++ if (priv->clear_color == 0 || ++ pixmap->drawable.bitsPerPixel == 8 || ++ priv->clear_color == (1 << pixmap->drawable.depth) - 1) { ++ memset(pixmap->devPrivate.ptr, priv->clear_color, ++ (size_t)pixmap->devKind * pixmap->drawable.height); ++ } else { ++ pixman_fill(pixmap->devPrivate.ptr, ++ pixmap->devKind/sizeof(uint32_t), ++ pixmap->drawable.bitsPerPixel, ++ 0, 0, ++ pixmap->drawable.width, ++ pixmap->drawable.height, ++ priv->clear_color); ++ } ++ sigtrap_put(); ++ } else ++ return false; + + clear_done: + sna_damage_all(&priv->cpu_damage, pixmap); +@@ -2414,6 +2470,10 @@ done: + DBG(("%s: discarding idle GPU bo\n", __FUNCTION__)); + sna_pixmap_free_gpu(sna, priv); + } ++ if (priv->flush) { ++ assert(!priv->shm); ++ sna_add_flush_pixmap(sna, priv, priv->gpu_bo); ++ } + priv->source_count = SOURCE_BIAS; + } + +@@ -2531,6 +2591,9 @@ static bool cpu_clear_boxes(struct sna *sna, + { + struct sna_fill_op fill; + ++ if (!sna->kgem.can_blt_cpu) ++ return false; ++ + if (!sna_fill_init_blt(&fill, sna, + pixmap, priv->cpu_bo, + GXcopy, priv->clear_color, +@@ -2659,6 +2722,10 @@ sna_drawable_move_region_to_cpu(DrawablePtr drawable, + } + } + sna_damage_add_to_pixmap(&priv->cpu_damage, region, pixmap); ++ if (priv->flush) { ++ assert(!priv->shm); ++ sna_add_flush_pixmap(sna, priv, priv->gpu_bo); ++ } + + if (dx | dy) + RegionTranslate(region, -dx, -dy); +@@ -2904,17 +2971,22 @@ move_to_cpu: + assert(pixmap->devPrivate.ptr == MAP(priv->cpu_bo->map__cpu)); + } + +- assert(pixmap->devKind); +- do { +- pixman_fill(pixmap->devPrivate.ptr, +- pixmap->devKind/sizeof(uint32_t), +- pixmap->drawable.bitsPerPixel, +- box->x1, box->y1, +- box->x2 - box->x1, +- box->y2 - box->y1, +- priv->clear_color); +- box++; +- } while (--n); ++ if (sigtrap_get() == 0) { ++ assert(pixmap->devKind); ++ sigtrap_assert_active(); ++ do { ++ pixman_fill(pixmap->devPrivate.ptr, ++ pixmap->devKind/sizeof(uint32_t), ++ pixmap->drawable.bitsPerPixel, ++ box->x1, box->y1, ++ box->x2 - box->x1, ++ box->y2 - box->y1, ++ priv->clear_color); ++ box++; ++ } while (--n); ++ sigtrap_put(); ++ } else ++ return false; + + clear_done: + if (flags & MOVE_WRITE || +@@ -3209,13 +3281,14 @@ __sna_pixmap_for_gpu(struct sna *sna, PixmapPtr pixmap, unsigned flags) + { + struct sna_pixmap *priv; + ++ assert(flags & (MOVE_READ | MOVE_WRITE | __MOVE_FORCE)); + if ((flags & __MOVE_FORCE) == 0 && wedged(sna)) + return NULL; + + priv = sna_pixmap(pixmap); + if (priv == NULL) { + DBG(("%s: not attached\n", __FUNCTION__)); +- if ((flags & __MOVE_DRI) == 0) ++ if ((flags & (__MOVE_DRI | __MOVE_SCANOUT)) == 0) + return NULL; + + if (pixmap->usage_hint == -1) { +@@ -3238,6 +3311,44 @@ __sna_pixmap_for_gpu(struct sna *sna, PixmapPtr pixmap, unsigned flags) + return priv; + } + ++inline static void sna_pixmap_unclean(struct sna *sna, ++ struct sna_pixmap *priv, ++ unsigned flags) ++{ ++ struct drm_i915_gem_busy busy; ++ ++ assert(DAMAGE_IS_ALL(priv->gpu_damage)); ++ assert(priv->gpu_bo); ++ assert(priv->gpu_bo->proxy == NULL); ++ assert_pixmap_map(priv->pixmap, priv); ++ ++ sna_damage_destroy(&priv->cpu_damage); ++ list_del(&priv->flush_list); ++ ++ if (flags & (__MOVE_DRI | __MOVE_SCANOUT)) ++ return; ++ ++ if (!priv->flush || priv->gpu_bo->exec) ++ return; ++ ++ busy.handle = priv->gpu_bo->handle; ++ busy.busy = 0; ++ ioctl(sna->kgem.fd, DRM_IOCTL_I915_GEM_BUSY, &busy); ++ ++ DBG(("%s(pixmap=%ld): cleaning foreign bo handle=%u, busy=%x [ring=%d]\n", ++ __FUNCTION__, ++ priv->pixmap->drawable.serialNumber, ++ busy.handle, busy.busy, !!(busy.busy & (0xfffe << 16)))); ++ ++ if (busy.busy) { ++ unsigned mode = KGEM_RENDER; ++ if (busy.busy & (0xfffe << 16)) ++ mode = KGEM_BLT; ++ kgem_bo_mark_busy(&sna->kgem, priv->gpu_bo, mode); ++ } else ++ __kgem_bo_clear_busy(priv->gpu_bo); ++} ++ + struct sna_pixmap * + sna_pixmap_move_area_to_gpu(PixmapPtr pixmap, const BoxRec *box, unsigned int flags) + { +@@ -3287,12 +3398,14 @@ sna_pixmap_move_area_to_gpu(PixmapPtr pixmap, const BoxRec *box, unsigned int fl + if (priv->cow) { + unsigned cow = flags & (MOVE_READ | MOVE_WRITE | __MOVE_FORCE); + ++ assert(cow); ++ + if ((flags & MOVE_READ) == 0) { + if (priv->gpu_damage) { + r.extents = *box; + r.data = NULL; + if (!region_subsumes_damage(&r, priv->gpu_damage)) +- cow |= MOVE_READ; ++ cow |= MOVE_READ | __MOVE_FORCE; + } + } else { + if (priv->cpu_damage) { +@@ -3303,22 +3416,18 @@ sna_pixmap_move_area_to_gpu(PixmapPtr pixmap, const BoxRec *box, unsigned int fl + } + } + +- if (cow) { +- if (!sna_pixmap_undo_cow(sna, priv, cow)) +- return NULL; ++ if (!sna_pixmap_undo_cow(sna, priv, cow)) ++ return NULL; + +- if (priv->gpu_bo == NULL) +- sna_damage_destroy(&priv->gpu_damage); +- } ++ if (priv->gpu_bo == NULL) ++ sna_damage_destroy(&priv->gpu_damage); + } + + if (sna_damage_is_all(&priv->gpu_damage, + pixmap->drawable.width, + pixmap->drawable.height)) { +- assert(priv->gpu_bo); +- assert(priv->gpu_bo->proxy == NULL); +- sna_damage_destroy(&priv->cpu_damage); +- list_del(&priv->flush_list); ++ DBG(("%s: already all-damaged\n", __FUNCTION__)); ++ sna_pixmap_unclean(sna, priv, flags); + goto done; + } + +@@ -3360,10 +3469,7 @@ sna_pixmap_move_area_to_gpu(PixmapPtr pixmap, const BoxRec *box, unsigned int fl + return priv; + } + +- if (priv->shm) { +- assert(!priv->flush); +- sna_add_flush_pixmap(sna, priv, priv->cpu_bo); +- } ++ add_shm_flush(sna, priv); + + assert(priv->cpu_damage); + region_set(&r, box); +@@ -3527,7 +3633,8 @@ sna_drawable_use_bo(DrawablePtr drawable, unsigned flags, const BoxRec *box, + } + + if (priv->cow) { +- unsigned cow = MOVE_WRITE | MOVE_READ; ++ unsigned cow = MOVE_WRITE | MOVE_READ | __MOVE_FORCE; ++ assert(cow); + + if (flags & IGNORE_DAMAGE) { + if (priv->gpu_damage) { +@@ -3717,8 +3824,11 @@ create_gpu_bo: + else + move = MOVE_WRITE | MOVE_READ | MOVE_ASYNC_HINT; + +- if (sna_pixmap_move_to_gpu(pixmap, move)) ++ if (sna_pixmap_move_to_gpu(pixmap, move)) { ++ sna_damage_all(&priv->gpu_damage, ++ pixmap); + goto use_gpu_bo; ++ } + } + + if (DAMAGE_IS_ALL(priv->gpu_damage) || +@@ -3934,26 +4044,28 @@ prefer_gpu_bo: + goto move_to_gpu; + } + +- if ((priv->cpu_damage == NULL || flags & IGNORE_DAMAGE)) { +- if (priv->gpu_bo && priv->gpu_bo->tiling) { +- DBG(("%s: prefer to use GPU bo for rendering large pixmaps\n", __FUNCTION__)); +- goto prefer_gpu_bo; ++ if (!priv->shm) { ++ if ((priv->cpu_damage == NULL || flags & IGNORE_DAMAGE)) { ++ if (priv->gpu_bo && priv->gpu_bo->tiling) { ++ DBG(("%s: prefer to use GPU bo for rendering large pixmaps\n", __FUNCTION__)); ++ goto prefer_gpu_bo; ++ } ++ ++ if (priv->cpu_bo->pitch >= 4096) { ++ DBG(("%s: prefer to use GPU bo for rendering wide pixmaps\n", __FUNCTION__)); ++ goto prefer_gpu_bo; ++ } + } + +- if (priv->cpu_bo->pitch >= 4096) { +- DBG(("%s: prefer to use GPU bo for rendering wide pixmaps\n", __FUNCTION__)); ++ if ((flags & IGNORE_DAMAGE) == 0 && priv->cpu_bo->snoop) { ++ DBG(("%s: prefer to use GPU bo for reading from snooped target bo\n", __FUNCTION__)); + goto prefer_gpu_bo; + } +- } +- +- if ((flags & IGNORE_DAMAGE) == 0 && priv->cpu_bo->snoop) { +- DBG(("%s: prefer to use GPU bo for reading from snooped target bo\n", __FUNCTION__)); +- goto prefer_gpu_bo; +- } + +- if (!sna->kgem.can_blt_cpu) { +- DBG(("%s: can't render to CPU bo, try to use GPU bo\n", __FUNCTION__)); +- goto prefer_gpu_bo; ++ if (!sna->kgem.can_blt_cpu) { ++ DBG(("%s: can't render to CPU bo, try to use GPU bo\n", __FUNCTION__)); ++ goto prefer_gpu_bo; ++ } + } + } + +@@ -3967,9 +4079,7 @@ prefer_gpu_bo: + } + + if (priv->shm) { +- assert(!priv->flush); +- sna_add_flush_pixmap(sna, priv, priv->cpu_bo); +- ++ add_shm_flush(sna, priv); + /* As we may have flushed and retired,, recheck for busy bo */ + if ((flags & FORCE_GPU) == 0 && !kgem_bo_is_busy(priv->cpu_bo)) + return NULL; +@@ -4019,7 +4129,7 @@ sna_pixmap_create_upload(ScreenPtr screen, + assert(width); + assert(height); + +- if (depth == 1) ++ if (depth < 8) + return create_pixmap(sna, screen, width, height, depth, + CREATE_PIXMAP_USAGE_SCRATCH); + +@@ -4121,27 +4231,21 @@ sna_pixmap_move_to_gpu(PixmapPtr pixmap, unsigned flags) + + if (priv->cow) { + unsigned cow = flags & (MOVE_READ | MOVE_WRITE | __MOVE_FORCE); ++ assert(cow); + if (flags & MOVE_READ && priv->cpu_damage) + cow |= MOVE_WRITE; +- if (cow) { +- if (!sna_pixmap_undo_cow(sna, priv, cow)) +- return NULL; ++ if (!sna_pixmap_undo_cow(sna, priv, cow)) ++ return NULL; + +- if (priv->gpu_bo == NULL) +- sna_damage_destroy(&priv->gpu_damage); +- } ++ if (priv->gpu_bo == NULL) ++ sna_damage_destroy(&priv->gpu_damage); + } + + if (sna_damage_is_all(&priv->gpu_damage, + pixmap->drawable.width, + pixmap->drawable.height)) { + DBG(("%s: already all-damaged\n", __FUNCTION__)); +- assert(DAMAGE_IS_ALL(priv->gpu_damage)); +- assert(priv->gpu_bo); +- assert(priv->gpu_bo->proxy == NULL); +- assert_pixmap_map(pixmap, priv); +- sna_damage_destroy(&priv->cpu_damage); +- list_del(&priv->flush_list); ++ sna_pixmap_unclean(sna, priv, flags); + goto active; + } + +@@ -4206,7 +4310,7 @@ sna_pixmap_move_to_gpu(PixmapPtr pixmap, unsigned flags) + if (flags & MOVE_INPLACE_HINT || (priv->cpu_damage && priv->cpu_bo == NULL)) + create = CREATE_GTT_MAP | CREATE_INACTIVE; + if (flags & __MOVE_PRIME) +- create |= CREATE_GTT_MAP | CREATE_PRIME | CREATE_EXACT; ++ create |= CREATE_GTT_MAP | CREATE_SCANOUT | CREATE_PRIME | CREATE_EXACT; + + sna_pixmap_alloc_gpu(sna, pixmap, priv, create); + } +@@ -4282,10 +4386,7 @@ sna_pixmap_move_to_gpu(PixmapPtr pixmap, unsigned flags) + goto done; + } + +- if (priv->shm) { +- assert(!priv->flush); +- sna_add_flush_pixmap(sna, priv, priv->cpu_bo); +- } ++ add_shm_flush(sna, priv); + + n = sna_damage_get_boxes(priv->cpu_damage, &box); + assert(n); +@@ -4534,7 +4635,7 @@ static inline bool box32_trim_and_translate(Box32Rec *box, DrawablePtr d, GCPtr + return box32_clip(box, gc); + } + +-static inline void box_add_pt(BoxPtr box, int16_t x, int16_t y) ++static inline void box_add_xy(BoxPtr box, int16_t x, int16_t y) + { + if (box->x1 > x) + box->x1 = x; +@@ -4547,6 +4648,11 @@ static inline void box_add_pt(BoxPtr box, int16_t x, int16_t y) + box->y2 = y; + } + ++static inline void box_add_pt(BoxPtr box, const DDXPointRec *pt) ++{ ++ box_add_xy(box, pt->x, pt->y); ++} ++ + static inline bool box32_to_box16(const Box32Rec *b32, BoxRec *b16) + { + b16->x1 = b32->x1; +@@ -4864,6 +4970,7 @@ try_upload__inplace(PixmapPtr pixmap, RegionRec *region, + pixmap->devPrivate.ptr = dst; + pixmap->devKind = priv->gpu_bo->pitch; + priv->mapped = dst == MAP(priv->gpu_bo->map__cpu) ? MAPPED_CPU : MAPPED_GTT; ++ priv->cpu &= priv->mapped == MAPPED_CPU; + assert(has_coherent_ptr(sna, priv, MOVE_WRITE)); + + box = region_rects(region); +@@ -4923,8 +5030,7 @@ done: + sna_damage_all(&priv->gpu_damage, pixmap); + } + +- if (priv->shm) +- sna_add_flush_pixmap(sna, priv, priv->cpu_bo); ++ add_shm_flush(sna, priv); + } + + assert(!priv->clear); +@@ -5172,6 +5278,16 @@ static inline uint8_t blt_depth(int depth) + } + } + ++inline static void blt_done(struct sna *sna) ++{ ++ sna->blt_state.fill_bo = 0; ++ if (sna->kgem.nbatch && __kgem_ring_empty(&sna->kgem)) { ++ DBG(("%s: flushing BLT operation on empty ring\n", ++ __FUNCTION__)); ++ _kgem_submit(&sna->kgem); ++ } ++} ++ + static bool + sna_put_xybitmap_blt(DrawablePtr drawable, GCPtr gc, RegionPtr region, + int x, int y, int w, int h, char *bits) +@@ -5217,6 +5333,7 @@ sna_put_xybitmap_blt(DrawablePtr drawable, GCPtr gc, RegionPtr region, + + kgem_set_mode(&sna->kgem, KGEM_BLT, bo); + assert(kgem_bo_can_blt(&sna->kgem, bo)); ++ kgem_bcs_set_tiling(&sna->kgem, NULL, bo); + + /* Region is pre-clipped and translated into pixmap space */ + box = region_rects(region); +@@ -5238,6 +5355,7 @@ sna_put_xybitmap_blt(DrawablePtr drawable, GCPtr gc, RegionPtr region, + return false; + _kgem_set_mode(&sna->kgem, KGEM_BLT); + } ++ kgem_bcs_set_tiling(&sna->kgem, NULL, bo); + + upload = kgem_create_buffer(&sna->kgem, + bstride*bh, +@@ -5331,7 +5449,7 @@ sna_put_xybitmap_blt(DrawablePtr drawable, GCPtr gc, RegionPtr region, + box++; + } while (--n); + +- sna->blt_state.fill_bo = 0; ++ blt_done(sna); + return true; + } + +@@ -5381,6 +5499,7 @@ sna_put_xypixmap_blt(DrawablePtr drawable, GCPtr gc, RegionPtr region, + + kgem_set_mode(&sna->kgem, KGEM_BLT, bo); + assert(kgem_bo_can_blt(&sna->kgem, bo)); ++ kgem_bcs_set_tiling(&sna->kgem, NULL, bo); + + skip = h * BitmapBytePad(w + left); + for (i = 1 << (gc->depth-1); i; i >>= 1, bits += skip) { +@@ -5408,6 +5527,7 @@ sna_put_xypixmap_blt(DrawablePtr drawable, GCPtr gc, RegionPtr region, + return false; + _kgem_set_mode(&sna->kgem, KGEM_BLT); + } ++ kgem_bcs_set_tiling(&sna->kgem, NULL, bo); + + upload = kgem_create_buffer(&sna->kgem, + bstride*bh, +@@ -5509,7 +5629,7 @@ sna_put_xypixmap_blt(DrawablePtr drawable, GCPtr gc, RegionPtr region, + } while (--n); + } + +- sna->blt_state.fill_bo = 0; ++ blt_done(sna); + return true; + } + +@@ -5837,7 +5957,7 @@ sna_self_copy_boxes(DrawablePtr src, DrawablePtr dst, GCPtr gc, + if (!sna->render.copy_boxes(sna, alu, + &pixmap->drawable, priv->gpu_bo, sx, sy, + &pixmap->drawable, priv->gpu_bo, tx, ty, +- box, n, 0)) { ++ box, n, small_copy(region))) { + DBG(("%s: fallback - accelerated copy boxes failed\n", + __FUNCTION__)); + goto fallback; +@@ -6098,6 +6218,9 @@ sna_copy_boxes__inplace(struct sna *sna, RegionPtr region, int alu, + + kgem_bo_sync__cpu_full(&sna->kgem, src_priv->gpu_bo, FORCE_FULL_SYNC); + ++ if (sigtrap_get()) ++ return false; ++ + box = region_rects(region); + n = region_num_rects(region); + if (src_priv->gpu_bo->tiling) { +@@ -6137,6 +6260,8 @@ sna_copy_boxes__inplace(struct sna *sna, RegionPtr region, int alu, + } + } + ++ sigtrap_put(); ++ + return true; + + upload_inplace: +@@ -6234,6 +6359,9 @@ upload_inplace: + + assert(has_coherent_ptr(sna, src_priv, MOVE_READ)); + ++ if (sigtrap_get()) ++ return false; ++ + box = region_rects(region); + n = region_num_rects(region); + if (dst_priv->gpu_bo->tiling) { +@@ -6265,15 +6393,19 @@ upload_inplace: + } while (--n); + + if (!dst_priv->shm) { +- assert(ptr == MAP(dst_priv->gpu_bo->map__cpu)); + dst_pixmap->devPrivate.ptr = ptr; + dst_pixmap->devKind = dst_priv->gpu_bo->pitch; +- dst_priv->mapped = MAPPED_CPU; ++ if (ptr == MAP(dst_priv->gpu_bo->map__cpu)) { ++ dst_priv->mapped = MAPPED_CPU; ++ dst_priv->cpu = true; ++ } else ++ dst_priv->mapped = MAPPED_GTT; + assert_pixmap_map(dst_pixmap, dst_priv); +- dst_priv->cpu = true; + } + } + ++ sigtrap_put(); ++ + return true; + } + +@@ -6326,6 +6458,16 @@ sna_copy_boxes(DrawablePtr src, DrawablePtr dst, GCPtr gc, + + assert(region_num_rects(region)); + ++ if (src_priv && ++ src_priv->gpu_bo == NULL && ++ src_priv->cpu_bo == NULL && ++ src_priv->ptr == NULL) { ++ /* Rare but still happens, nothing to copy */ ++ DBG(("%s: src pixmap=%ld is empty\n", ++ __FUNCTION__, src_pixmap->drawable.serialNumber)); ++ return; ++ } ++ + if (src_pixmap == dst_pixmap) + return sna_self_copy_boxes(src, dst, gc, + region, dx, dy, +@@ -6491,15 +6633,14 @@ discard_cow: + sna_damage_all(&dst_priv->gpu_damage, dst_pixmap); + sna_damage_destroy(&dst_priv->cpu_damage); + list_del(&dst_priv->flush_list); +- if (dst_priv->shm) +- sna_add_flush_pixmap(sna, dst_priv, dst_priv->cpu_bo); ++ add_shm_flush(sna, dst_priv); + return; + } + } + if (!sna->render.copy_boxes(sna, alu, + &src_pixmap->drawable, src_priv->gpu_bo, src_dx, src_dy, + &dst_pixmap->drawable, bo, 0, 0, +- box, n, 0)) { ++ box, n, small_copy(region))) { + DBG(("%s: fallback - accelerated copy boxes failed\n", + __FUNCTION__)); + goto fallback; +@@ -6536,7 +6677,7 @@ discard_cow: + if (!sna->render.copy_boxes(sna, alu, + &src_pixmap->drawable, src_priv->gpu_bo, src_dx, src_dy, + &dst_pixmap->drawable, bo, 0, 0, +- box, n, 0)) { ++ box, n, small_copy(region))) { + DBG(("%s: fallback - accelerated copy boxes failed\n", + __FUNCTION__)); + goto fallback; +@@ -6571,15 +6712,12 @@ discard_cow: + if (replaces && UNDO) + kgem_bo_pair_undo(&sna->kgem, dst_priv->gpu_bo, dst_priv->cpu_bo); + +- if (src_priv->shm) { +- assert(!src_priv->flush); +- sna_add_flush_pixmap(sna, src_priv, src_priv->cpu_bo); +- } ++ add_shm_flush(sna, src_priv); + + if (!sna->render.copy_boxes(sna, alu, + &src_pixmap->drawable, src_priv->cpu_bo, src_dx, src_dy, + &dst_pixmap->drawable, bo, 0, 0, +- box, n, src_priv->shm ? COPY_LAST : 0)) { ++ box, n, small_copy(region) | (src_priv->shm ? COPY_LAST : 0))) { + DBG(("%s: fallback - accelerated copy boxes failed\n", + __FUNCTION__)); + goto fallback; +@@ -6631,8 +6769,7 @@ discard_cow: + ok = sna->render.copy_boxes(sna, alu, + &src_pixmap->drawable, src_bo, src_dx, src_dy, + &dst_pixmap->drawable, bo, 0, 0, +- box, n, COPY_LAST); +- ++ box, n, small_copy(region) | COPY_LAST); + kgem_bo_sync__cpu(&sna->kgem, src_bo); + assert(src_bo->rq == NULL); + kgem_bo_destroy(&sna->kgem, src_bo); +@@ -6780,18 +6917,22 @@ fallback: + return; + } + +- assert(dst_pixmap->devPrivate.ptr); +- assert(dst_pixmap->devKind); +- do { +- pixman_fill(dst_pixmap->devPrivate.ptr, +- dst_pixmap->devKind/sizeof(uint32_t), +- dst_pixmap->drawable.bitsPerPixel, +- box->x1, box->y1, +- box->x2 - box->x1, +- box->y2 - box->y1, +- src_priv->clear_color); +- box++; +- } while (--n); ++ if (sigtrap_get() == 0) { ++ assert(dst_pixmap->devPrivate.ptr); ++ assert(dst_pixmap->devKind); ++ sigtrap_assert_active(); ++ do { ++ pixman_fill(dst_pixmap->devPrivate.ptr, ++ dst_pixmap->devKind/sizeof(uint32_t), ++ dst_pixmap->drawable.bitsPerPixel, ++ box->x1, box->y1, ++ box->x2 - box->x1, ++ box->y2 - box->y1, ++ src_priv->clear_color); ++ box++; ++ } while (--n); ++ sigtrap_put(); ++ } + } else if (!sna_copy_boxes__inplace(sna, region, alu, + src_pixmap, src_priv, + src_dx, src_dy, +@@ -6848,36 +6989,39 @@ fallback: + ((char *)src_pixmap->devPrivate.ptr + + src_dy * src_stride + src_dx * bpp / 8); + +- do { +- DBG(("%s: memcpy_blt(box=(%d, %d), (%d, %d), src=(%d, %d), pitches=(%d, %d))\n", +- __FUNCTION__, +- box->x1, box->y1, +- box->x2 - box->x1, +- box->y2 - box->y1, +- src_dx, src_dy, +- src_stride, dst_stride)); +- +- assert(box->x1 >= 0); +- assert(box->y1 >= 0); +- assert(box->x2 <= dst_pixmap->drawable.width); +- assert(box->y2 <= dst_pixmap->drawable.height); +- +- assert(box->x1 + src_dx >= 0); +- assert(box->y1 + src_dy >= 0); +- assert(box->x2 + src_dx <= src_pixmap->drawable.width); +- assert(box->y2 + src_dy <= src_pixmap->drawable.height); +- assert(has_coherent_ptr(sna, src_priv, MOVE_READ)); +- assert(has_coherent_ptr(sna, dst_priv, MOVE_WRITE)); +- assert(src_stride); +- assert(dst_stride); +- memcpy_blt(src_bits, dst_bits, bpp, +- src_stride, dst_stride, +- box->x1, box->y1, +- box->x1, box->y1, +- box->x2 - box->x1, +- box->y2 - box->y1); +- box++; +- } while (--n); ++ if (sigtrap_get() == 0) { ++ do { ++ DBG(("%s: memcpy_blt(box=(%d, %d), (%d, %d), src=(%d, %d), pitches=(%d, %d))\n", ++ __FUNCTION__, ++ box->x1, box->y1, ++ box->x2 - box->x1, ++ box->y2 - box->y1, ++ src_dx, src_dy, ++ src_stride, dst_stride)); ++ ++ assert(box->x1 >= 0); ++ assert(box->y1 >= 0); ++ assert(box->x2 <= dst_pixmap->drawable.width); ++ assert(box->y2 <= dst_pixmap->drawable.height); ++ ++ assert(box->x1 + src_dx >= 0); ++ assert(box->y1 + src_dy >= 0); ++ assert(box->x2 + src_dx <= src_pixmap->drawable.width); ++ assert(box->y2 + src_dy <= src_pixmap->drawable.height); ++ assert(has_coherent_ptr(sna, src_priv, MOVE_READ)); ++ assert(has_coherent_ptr(sna, dst_priv, MOVE_WRITE)); ++ assert(src_stride); ++ assert(dst_stride); ++ memcpy_blt(src_bits, dst_bits, bpp, ++ src_stride, dst_stride, ++ box->x1, box->y1, ++ box->x1, box->y1, ++ box->x2 - box->x1, ++ box->y2 - box->y1); ++ box++; ++ } while (--n); ++ sigtrap_put(); ++ } + } else { + DBG(("%s: fallback -- miCopyRegion\n", __FUNCTION__)); + +@@ -6931,7 +7075,8 @@ sna_do_copy(DrawablePtr src, DrawablePtr dst, GCPtr gc, + + /* Short cut for unmapped windows */ + if (dst->type == DRAWABLE_WINDOW && !((WindowPtr)dst)->realized) { +- DBG(("%s: unmapped\n", __FUNCTION__)); ++ DBG(("%s: unmapped/unrealized dst (pixmap=%ld)\n", ++ __FUNCTION__, get_window_pixmap((WindowPtr)dst))); + return NULL; + } + +@@ -7115,19 +7260,28 @@ sna_copy_area(DrawablePtr src, DrawablePtr dst, GCPtr gc, + if (gc->planemask == 0) + return NULL; + +- DBG(("%s: src=(%d, %d)x(%d, %d)+(%d, %d) -> dst=(%d, %d)+(%d, %d); alu=%d, pm=%lx, depth=%d\n", ++ if (sna->ignore_copy_area) ++ return NULL; ++ ++ DBG(("%s: src=pixmap=%ld:(%d, %d)x(%d, %d)+(%d, %d) -> dst=pixmap=%ld:(%d, %d)+(%d, %d); alu=%d, pm=%lx, depth=%d\n", + __FUNCTION__, ++ get_drawable_pixmap(src)->drawable.serialNumber, + src_x, src_y, width, height, src->x, src->y, ++ get_drawable_pixmap(dst)->drawable.serialNumber, + dst_x, dst_y, dst->x, dst->y, + gc->alu, gc->planemask, gc->depth)); + + if (FORCE_FALLBACK || !ACCEL_COPY_AREA || wedged(sna) || +- !PM_IS_SOLID(dst, gc->planemask) || gc->depth < 8) ++ !PM_IS_SOLID(dst, gc->planemask) || gc->depth < 8) { ++ DBG(("%s: fallback copy\n", __FUNCTION__)); + copy = sna_fallback_copy_boxes; +- else if (src == dst) ++ } else if (src == dst) { ++ DBG(("%s: self copy\n", __FUNCTION__)); + copy = sna_self_copy_boxes; +- else ++ } else { ++ DBG(("%s: normal copy\n", __FUNCTION__)); + copy = sna_copy_boxes; ++ } + + return sna_do_copy(src, dst, gc, + src_x, src_y, +@@ -7136,30 +7290,21 @@ sna_copy_area(DrawablePtr src, DrawablePtr dst, GCPtr gc, + copy, 0, NULL); + } + +-static const BoxRec * +-find_clip_box_for_y(const BoxRec *begin, const BoxRec *end, int16_t y) ++const BoxRec * ++__find_clip_box_for_y(const BoxRec *begin, const BoxRec *end, int16_t y) + { +- const BoxRec *mid; +- +- if (end == begin) +- return end; +- +- if (end - begin == 1) { ++ assert(end - begin > 1); ++ do { ++ const BoxRec *mid = begin + (end - begin) / 2; ++ if (mid->y2 > y) ++ end = mid; ++ else ++ begin = mid; ++ } while (end > begin + 1); + if (begin->y2 > y) +- return begin; ++ return begin; + else +- return end; +- } +- +- mid = begin + (end - begin) / 2; +- if (mid->y2 > y) +- /* If no box is found in [begin, mid], the function +- * will return @mid, which is then known to be the +- * correct answer. +- */ +- return find_clip_box_for_y(begin, mid, y); +- else +- return find_clip_box_for_y(mid, end, y); ++ return end; + } + + struct sna_fill_spans { +@@ -8223,6 +8368,8 @@ sna_copy_bitmap_blt(DrawablePtr _bitmap, DrawablePtr drawable, GCPtr gc, + } + br13 |= blt_depth(drawable->depth) << 24; + br13 |= copy_ROP[gc->alu] << 16; ++ DBG(("%s: target-depth=%d, alu=%d, bg=%08x, fg=%08x\n", ++ __FUNCTION__, drawable->depth, gc->alu, gc->bgPixel, gc->fgPixel)); + + kgem_set_mode(&sna->kgem, KGEM_BLT, arg->bo); + assert(kgem_bo_can_blt(&sna->kgem, arg->bo)); +@@ -8255,6 +8402,7 @@ sna_copy_bitmap_blt(DrawablePtr _bitmap, DrawablePtr drawable, GCPtr gc, + return; /* XXX fallback? */ + _kgem_set_mode(&sna->kgem, KGEM_BLT); + } ++ kgem_bcs_set_tiling(&sna->kgem, NULL, arg->bo); + + assert(sna->kgem.mode == KGEM_BLT); + if (sna->kgem.gen >= 0100) { +@@ -8270,8 +8418,8 @@ sna_copy_bitmap_blt(DrawablePtr _bitmap, DrawablePtr drawable, GCPtr gc, + I915_GEM_DOMAIN_RENDER | + KGEM_RELOC_FENCED, + 0); +- b[5] = gc->bgPixel; +- b[6] = gc->fgPixel; ++ b[6] = gc->bgPixel; ++ b[7] = gc->fgPixel; + + dst = (uint8_t *)&b[8]; + sna->kgem.nbatch += 8 + src_stride; +@@ -8322,6 +8470,7 @@ sna_copy_bitmap_blt(DrawablePtr _bitmap, DrawablePtr drawable, GCPtr gc, + return; /* XXX fallback? */ + _kgem_set_mode(&sna->kgem, KGEM_BLT); + } ++ kgem_bcs_set_tiling(&sna->kgem, NULL, arg->bo); + + upload = kgem_create_buffer(&sna->kgem, + bstride*bh, +@@ -8408,7 +8557,7 @@ sna_copy_bitmap_blt(DrawablePtr _bitmap, DrawablePtr drawable, GCPtr gc, + sna_damage_add_to_pixmap(arg->damage, region, pixmap); + } + assert_pixmap_damage(pixmap); +- sna->blt_state.fill_bo = 0; ++ blt_done(sna); + } + + static void +@@ -8472,6 +8621,7 @@ sna_copy_plane_blt(DrawablePtr source, DrawablePtr drawable, GCPtr gc, + return; /* XXX fallback? */ + _kgem_set_mode(&sna->kgem, KGEM_BLT); + } ++ kgem_bcs_set_tiling(&sna->kgem, NULL, arg->bo); + + upload = kgem_create_buffer(&sna->kgem, + bstride*bh, +@@ -8588,6 +8738,8 @@ sna_copy_plane_blt(DrawablePtr source, DrawablePtr drawable, GCPtr gc, + } + } + ++ kgem_bcs_set_tiling(&sna->kgem, upload, arg->bo); ++ + assert(sna->kgem.mode == KGEM_BLT); + b = sna->kgem.batch + sna->kgem.nbatch; + if (sna->kgem.gen >= 0100) { +@@ -8641,7 +8793,7 @@ sna_copy_plane_blt(DrawablePtr source, DrawablePtr drawable, GCPtr gc, + sna_damage_add_to_pixmap(arg->damage, region, dst_pixmap); + } + assert_pixmap_damage(dst_pixmap); +- sna->blt_state.fill_bo = 0; ++ blt_done(sna); + } + + static RegionPtr +@@ -8895,36 +9047,11 @@ sna_poly_point_extents(DrawablePtr drawable, GCPtr gc, + last.x += pt->x; + last.y += pt->y; + pt++; +- box_add_pt(&box, last.x, last.y); ++ box_add_xy(&box, last.x, last.y); + } + } else { +- --n; ++pt; +- while (n >= 8) { +- box_add_pt(&box, pt[0].x, pt[0].y); +- box_add_pt(&box, pt[1].x, pt[1].y); +- box_add_pt(&box, pt[2].x, pt[2].y); +- box_add_pt(&box, pt[3].x, pt[3].y); +- box_add_pt(&box, pt[4].x, pt[4].y); +- box_add_pt(&box, pt[5].x, pt[5].y); +- box_add_pt(&box, pt[6].x, pt[6].y); +- box_add_pt(&box, pt[7].x, pt[7].y); +- pt += 8; +- n -= 8; +- } +- if (n & 4) { +- box_add_pt(&box, pt[0].x, pt[0].y); +- box_add_pt(&box, pt[1].x, pt[1].y); +- box_add_pt(&box, pt[2].x, pt[2].y); +- box_add_pt(&box, pt[3].x, pt[3].y); +- pt += 4; +- } +- if (n & 2) { +- box_add_pt(&box, pt[0].x, pt[0].y); +- box_add_pt(&box, pt[1].x, pt[1].y); +- pt += 2; +- } +- if (n & 1) +- box_add_pt(&box, pt[0].x, pt[0].y); ++ while (--n) ++ box_add_pt(&box, ++pt); + } + box.x2++; + box.y2++; +@@ -9636,7 +9763,7 @@ sna_poly_line_extents(DrawablePtr drawable, GCPtr gc, + y += pt->y; + if (blt) + blt &= pt->x == 0 || pt->y == 0; +- box_add_pt(&box, x, y); ++ box_add_xy(&box, x, y); + } + } else { + int x = box.x1; +@@ -9648,7 +9775,7 @@ sna_poly_line_extents(DrawablePtr drawable, GCPtr gc, + x = pt->x; + y = pt->y; + } +- box_add_pt(&box, pt->x, pt->y); ++ box_add_pt(&box, pt); + } + } + box.x2++; +@@ -10037,7 +10164,7 @@ out: + RegionUninit(&data.region); + } + +-static inline void box_from_seg(BoxPtr b, const xSegment *seg, GCPtr gc) ++static inline bool box_from_seg(BoxPtr b, const xSegment *seg, GCPtr gc) + { + if (seg->x1 == seg->x2) { + if (seg->y1 > seg->y2) { +@@ -10051,6 +10178,9 @@ static inline void box_from_seg(BoxPtr b, const xSegment *seg, GCPtr gc) + if (gc->capStyle != CapNotLast) + b->y2++; + } ++ if (b->y1 >= b->y2) ++ return false; ++ + b->x1 = seg->x1; + b->x2 = seg->x1 + 1; + } else { +@@ -10065,6 +10195,9 @@ static inline void box_from_seg(BoxPtr b, const xSegment *seg, GCPtr gc) + if (gc->capStyle != CapNotLast) + b->x2++; + } ++ if (b->x1 >= b->x2) ++ return false; ++ + b->y1 = seg->y1; + b->y2 = seg->y1 + 1; + } +@@ -10073,6 +10206,7 @@ static inline void box_from_seg(BoxPtr b, const xSegment *seg, GCPtr gc) + __FUNCTION__, + seg->x1, seg->y1, seg->x2, seg->y2, + b->x1, b->y1, b->x2, b->y2)); ++ return true; + } + + static bool +@@ -10107,12 +10241,13 @@ sna_poly_segment_blt(DrawablePtr drawable, + nbox = ARRAY_SIZE(boxes); + n -= nbox; + do { +- box_from_seg(b, seg++, gc); +- if (b->y2 > b->y1 && b->x2 > b->x1) { ++ if (box_from_seg(b, seg++, gc)) { ++ assert(!box_empty(b)); + b->x1 += dx; + b->x2 += dx; + b->y1 += dy; + b->y2 += dy; ++ assert(!box_empty(b)); + b++; + } + } while (--nbox); +@@ -10131,7 +10266,10 @@ sna_poly_segment_blt(DrawablePtr drawable, + nbox = ARRAY_SIZE(boxes); + n -= nbox; + do { +- box_from_seg(b++, seg++, gc); ++ if (box_from_seg(b, seg++, gc)) { ++ assert(!box_empty(b)); ++ b++; ++ } + } while (--nbox); + + if (b != boxes) { +@@ -10156,7 +10294,10 @@ sna_poly_segment_blt(DrawablePtr drawable, + do { + BoxRec box; + +- box_from_seg(&box, seg++, gc); ++ if (!box_from_seg(&box, seg++, gc)) ++ continue; ++ ++ assert(!box_empty(&box)); + box.x1 += drawable->x; + box.x2 += drawable->x; + box.y1 += drawable->y; +@@ -10174,6 +10315,7 @@ sna_poly_segment_blt(DrawablePtr drawable, + b->x2 += dx; + b->y1 += dy; + b->y2 += dy; ++ assert(!box_empty(b)); + if (++b == last_box) { + fill.boxes(sna, &fill, boxes, last_box-boxes); + if (damage) +@@ -10185,7 +10327,10 @@ sna_poly_segment_blt(DrawablePtr drawable, + } while (--n); + } else { + do { +- box_from_seg(b, seg++, gc); ++ if (!box_from_seg(b, seg++, gc)) ++ continue; ++ ++ assert(!box_empty(b)); + b->x1 += drawable->x; + b->x2 += drawable->x; + b->y1 += drawable->y; +@@ -10195,6 +10340,7 @@ sna_poly_segment_blt(DrawablePtr drawable, + b->x2 += dx; + b->y1 += dy; + b->y2 += dy; ++ assert(!box_empty(b)); + if (++b == last_box) { + fill.boxes(sna, &fill, boxes, last_box-boxes); + if (damage) +@@ -10319,8 +10465,11 @@ sna_poly_zero_segment_blt(DrawablePtr drawable, + } + b->x2++; + b->y2++; +- if (oc1 | oc2) +- box_intersect(b, extents); ++ ++ if ((oc1 | oc2) && !box_intersect(b, extents)) ++ continue; ++ ++ assert(!box_empty(b)); + if (++b == last_box) { + ret = &&rectangle_continue; + goto *jump; +@@ -10383,6 +10532,7 @@ rectangle_continue: + __FUNCTION__, x1, y1, + b->x1, b->y1, b->x2, b->y2)); + ++ assert(!box_empty(b)); + if (++b == last_box) { + ret = &&X_continue; + goto *jump; +@@ -10407,6 +10557,7 @@ X_continue: + b->x2 = x1 + 1; + b->y2 = b->y1 + 1; + ++ assert(!box_empty(b)); + if (++b == last_box) { + ret = &&X2_continue; + goto *jump; +@@ -10468,6 +10619,7 @@ X2_continue: + b->y2 = y1 + 1; + b->x2 = x1 + 1; + ++ assert(!box_empty(b)); + if (++b == last_box) { + ret = &&Y_continue; + goto *jump; +@@ -10491,6 +10643,7 @@ Y_continue: + b->y2 = y1 + 1; + b->x2 = x1 + 1; + ++ assert(!box_empty(b)); + if (++b == last_box) { + ret = &&Y2_continue; + goto *jump; +@@ -11785,14 +11938,29 @@ sna_poly_fill_rect_blt(DrawablePtr drawable, + if (nbox > ARRAY_SIZE(boxes)) + nbox = ARRAY_SIZE(boxes); + n -= nbox; +- do { ++ while (nbox >= 2) { ++ b[0].x1 = rect[0].x + dx; ++ b[0].y1 = rect[0].y + dy; ++ b[0].x2 = b[0].x1 + rect[0].width; ++ b[0].y2 = b[0].y1 + rect[0].height; ++ ++ b[1].x1 = rect[1].x + dx; ++ b[1].y1 = rect[1].y + dy; ++ b[1].x2 = b[1].x1 + rect[1].width; ++ b[1].y2 = b[1].y1 + rect[1].height; ++ ++ b += 2; ++ rect += 2; ++ nbox -= 2; ++ } ++ if (nbox) { + b->x1 = rect->x + dx; + b->y1 = rect->y + dy; + b->x2 = b->x1 + rect->width; + b->y2 = b->y1 + rect->height; + b++; + rect++; +- } while (--nbox); ++ } + fill.boxes(sna, &fill, boxes, b-boxes); + b = boxes; + } while (n); +@@ -11802,14 +11970,29 @@ sna_poly_fill_rect_blt(DrawablePtr drawable, + if (nbox > ARRAY_SIZE(boxes)) + nbox = ARRAY_SIZE(boxes); + n -= nbox; +- do { ++ while (nbox >= 2) { ++ b[0].x1 = rect[0].x; ++ b[0].y1 = rect[0].y; ++ b[0].x2 = b[0].x1 + rect[0].width; ++ b[0].y2 = b[0].y1 + rect[0].height; ++ ++ b[1].x1 = rect[1].x; ++ b[1].y1 = rect[1].y; ++ b[1].x2 = b[1].x1 + rect[1].width; ++ b[1].y2 = b[1].y1 + rect[1].height; ++ ++ b += 2; ++ rect += 2; ++ nbox -= 2; ++ } ++ if (nbox) { + b->x1 = rect->x; + b->y1 = rect->y; + b->x2 = b->x1 + rect->width; + b->y2 = b->y1 + rect->height; + b++; + rect++; +- } while (--nbox); ++ } + fill.boxes(sna, &fill, boxes, b-boxes); + b = boxes; + } while (n); +@@ -12192,6 +12375,7 @@ sna_poly_fill_rect_tiled_8x8_blt(DrawablePtr drawable, + return false; + _kgem_set_mode(&sna->kgem, KGEM_BLT); + } ++ kgem_bcs_set_tiling(&sna->kgem, tile_bo, bo); + + get_drawable_deltas(drawable, pixmap, &dx, &dy); + assert(extents->x1 + dx >= 0); +@@ -12335,6 +12519,7 @@ sna_poly_fill_rect_tiled_8x8_blt(DrawablePtr drawable, + + _kgem_submit(&sna->kgem); + _kgem_set_mode(&sna->kgem, KGEM_BLT); ++ kgem_bcs_set_tiling(&sna->kgem, tile_bo, bo); + } while (1); + } else { + RegionRec clip; +@@ -12403,6 +12588,7 @@ sna_poly_fill_rect_tiled_8x8_blt(DrawablePtr drawable, + if (!kgem_check_batch(&sna->kgem, 3)) { + _kgem_submit(&sna->kgem); + _kgem_set_mode(&sna->kgem, KGEM_BLT); ++ kgem_bcs_set_tiling(&sna->kgem, tile_bo, bo); + + unwind_batch = sna->kgem.nbatch; + unwind_reloc = sna->kgem.nreloc; +@@ -12499,6 +12685,7 @@ sna_poly_fill_rect_tiled_8x8_blt(DrawablePtr drawable, + DBG(("%s: emitting split batch\n", __FUNCTION__)); + _kgem_submit(&sna->kgem); + _kgem_set_mode(&sna->kgem, KGEM_BLT); ++ kgem_bcs_set_tiling(&sna->kgem, tile_bo, bo); + + unwind_batch = sna->kgem.nbatch; + unwind_reloc = sna->kgem.nreloc; +@@ -12572,7 +12759,7 @@ sna_poly_fill_rect_tiled_8x8_blt(DrawablePtr drawable, + } + done: + assert_pixmap_damage(pixmap); +- sna->blt_state.fill_bo = 0; ++ blt_done(sna); + return true; + } + +@@ -13128,6 +13315,7 @@ sna_poly_fill_rect_stippled_8x8_blt(DrawablePtr drawable, + return false; + _kgem_set_mode(&sna->kgem, KGEM_BLT); + } ++ kgem_bcs_set_tiling(&sna->kgem, NULL, bo); + + if (!clipped) { + dx += drawable->x; +@@ -13240,6 +13428,7 @@ sna_poly_fill_rect_stippled_8x8_blt(DrawablePtr drawable, + + _kgem_submit(&sna->kgem); + _kgem_set_mode(&sna->kgem, KGEM_BLT); ++ kgem_bcs_set_tiling(&sna->kgem, NULL, bo); + } while (1); + } else { + RegionRec clip; +@@ -13297,6 +13486,7 @@ sna_poly_fill_rect_stippled_8x8_blt(DrawablePtr drawable, + if (!kgem_check_batch(&sna->kgem, 3)) { + _kgem_submit(&sna->kgem); + _kgem_set_mode(&sna->kgem, KGEM_BLT); ++ kgem_bcs_set_tiling(&sna->kgem, NULL, bo); + + assert(sna->kgem.mode == KGEM_BLT); + b = sna->kgem.batch + sna->kgem.nbatch; +@@ -13369,6 +13559,7 @@ sna_poly_fill_rect_stippled_8x8_blt(DrawablePtr drawable, + if (!kgem_check_batch(&sna->kgem, 3)) { + _kgem_submit(&sna->kgem); + _kgem_set_mode(&sna->kgem, KGEM_BLT); ++ kgem_bcs_set_tiling(&sna->kgem, NULL, bo); + + assert(sna->kgem.mode == KGEM_BLT); + b = sna->kgem.batch + sna->kgem.nbatch; +@@ -13419,7 +13610,7 @@ sna_poly_fill_rect_stippled_8x8_blt(DrawablePtr drawable, + } + + assert_pixmap_damage(pixmap); +- sna->blt_state.fill_bo = 0; ++ blt_done(sna); + return true; + } + +@@ -13499,6 +13690,7 @@ sna_poly_fill_rect_stippled_1_blt(DrawablePtr drawable, + get_drawable_deltas(drawable, pixmap, &dx, &dy); + kgem_set_mode(&sna->kgem, KGEM_BLT, bo); + assert(kgem_bo_can_blt(&sna->kgem, bo)); ++ kgem_bcs_set_tiling(&sna->kgem, NULL, bo); + + br00 = 3 << 20; + br13 = bo->pitch; +@@ -13543,6 +13735,7 @@ sna_poly_fill_rect_stippled_1_blt(DrawablePtr drawable, + return false; + _kgem_set_mode(&sna->kgem, KGEM_BLT); + } ++ kgem_bcs_set_tiling(&sna->kgem, NULL, bo); + + assert(sna->kgem.mode == KGEM_BLT); + b = sna->kgem.batch + sna->kgem.nbatch; +@@ -13606,6 +13799,7 @@ sna_poly_fill_rect_stippled_1_blt(DrawablePtr drawable, + return false; + _kgem_set_mode(&sna->kgem, KGEM_BLT); + } ++ kgem_bcs_set_tiling(&sna->kgem, NULL, bo); + + upload = kgem_create_buffer(&sna->kgem, + bstride*bh, +@@ -13736,6 +13930,7 @@ sna_poly_fill_rect_stippled_1_blt(DrawablePtr drawable, + return false; + _kgem_set_mode(&sna->kgem, KGEM_BLT); + } ++ kgem_bcs_set_tiling(&sna->kgem, NULL, bo); + + assert(sna->kgem.mode == KGEM_BLT); + b = sna->kgem.batch + sna->kgem.nbatch; +@@ -13797,6 +13992,7 @@ sna_poly_fill_rect_stippled_1_blt(DrawablePtr drawable, + return false; + _kgem_set_mode(&sna->kgem, KGEM_BLT); + } ++ kgem_bcs_set_tiling(&sna->kgem, NULL, bo); + + upload = kgem_create_buffer(&sna->kgem, + bstride*bh, +@@ -13927,6 +14123,7 @@ sna_poly_fill_rect_stippled_1_blt(DrawablePtr drawable, + return false; + _kgem_set_mode(&sna->kgem, KGEM_BLT); + } ++ kgem_bcs_set_tiling(&sna->kgem, NULL, bo); + + assert(sna->kgem.mode == KGEM_BLT); + b = sna->kgem.batch + sna->kgem.nbatch; +@@ -13987,6 +14184,7 @@ sna_poly_fill_rect_stippled_1_blt(DrawablePtr drawable, + return false; + _kgem_set_mode(&sna->kgem, KGEM_BLT); + } ++ kgem_bcs_set_tiling(&sna->kgem, NULL, bo); + + upload = kgem_create_buffer(&sna->kgem, + bstride*bh, +@@ -14064,7 +14262,7 @@ sna_poly_fill_rect_stippled_1_blt(DrawablePtr drawable, + } + } + +- sna->blt_state.fill_bo = 0; ++ blt_done(sna); + return true; + } + +@@ -14126,6 +14324,7 @@ sna_poly_fill_rect_stippled_n_box__imm(struct sna *sna, + return; /* XXX fallback? */ + _kgem_set_mode(&sna->kgem, KGEM_BLT); + } ++ kgem_bcs_set_tiling(&sna->kgem, NULL, bo); + + assert(sna->kgem.mode == KGEM_BLT); + b = sna->kgem.batch + sna->kgem.nbatch; +@@ -14251,6 +14450,7 @@ sna_poly_fill_rect_stippled_n_box(struct sna *sna, + return; /* XXX fallback? */ + _kgem_set_mode(&sna->kgem, KGEM_BLT); + } ++ kgem_bcs_set_tiling(&sna->kgem, NULL, bo); + + assert(sna->kgem.mode == KGEM_BLT); + b = sna->kgem.batch + sna->kgem.nbatch; +@@ -14414,6 +14614,7 @@ sna_poly_fill_rect_stippled_n_blt__imm(DrawablePtr drawable, + get_drawable_deltas(drawable, pixmap, &dx, &dy); + kgem_set_mode(&sna->kgem, KGEM_BLT, bo); + assert(kgem_bo_can_blt(&sna->kgem, bo)); ++ kgem_bcs_set_tiling(&sna->kgem, NULL, bo); + + br00 = XY_MONO_SRC_COPY_IMM | 3 << 20; + br13 = bo->pitch; +@@ -14526,7 +14727,7 @@ sna_poly_fill_rect_stippled_n_blt__imm(DrawablePtr drawable, + } + + assert_pixmap_damage(pixmap); +- sna->blt_state.fill_bo = 0; ++ blt_done(sna); + return true; + } + +@@ -14559,6 +14760,7 @@ sna_poly_fill_rect_stippled_n_blt(DrawablePtr drawable, + get_drawable_deltas(drawable, pixmap, &dx, &dy); + kgem_set_mode(&sna->kgem, KGEM_BLT, bo); + assert(kgem_bo_can_blt(&sna->kgem, bo)); ++ kgem_bcs_set_tiling(&sna->kgem, NULL, bo); + + br00 = XY_MONO_SRC_COPY | 3 << 20; + br13 = bo->pitch; +@@ -14673,7 +14875,7 @@ sna_poly_fill_rect_stippled_n_blt(DrawablePtr drawable, + assert_pixmap_damage(pixmap); + if (tile) + kgem_bo_destroy(&sna->kgem, tile); +- sna->blt_state.fill_bo = 0; ++ blt_done(sna); + return true; + } + +@@ -15281,6 +15483,7 @@ sna_glyph_blt(DrawablePtr drawable, GCPtr gc, + } + _kgem_set_mode(&sna->kgem, KGEM_BLT); + } ++ kgem_bcs_set_tiling(&sna->kgem, NULL, bo); + + DBG(("%s: glyph clip box (%d, %d), (%d, %d)\n", + __FUNCTION__, +@@ -15368,6 +15571,7 @@ sna_glyph_blt(DrawablePtr drawable, GCPtr gc, + if (!kgem_check_batch(&sna->kgem, 3+len)) { + _kgem_submit(&sna->kgem); + _kgem_set_mode(&sna->kgem, KGEM_BLT); ++ kgem_bcs_set_tiling(&sna->kgem, NULL, bo); + + DBG(("%s: new batch, glyph clip box (%d, %d), (%d, %d)\n", + __FUNCTION__, +@@ -15479,7 +15683,7 @@ skip: + } + + assert_pixmap_damage(pixmap); +- sna->blt_state.fill_bo = 0; ++ blt_done(sna); + return true; + } + +@@ -16002,6 +16206,7 @@ sna_reversed_glyph_blt(DrawablePtr drawable, GCPtr gc, + } + _kgem_set_mode(&sna->kgem, KGEM_BLT); + } ++ kgem_bcs_set_tiling(&sna->kgem, NULL, bo); + + unwind_batch = sna->kgem.nbatch; + unwind_reloc = sna->kgem.nreloc; +@@ -16111,6 +16316,7 @@ sna_reversed_glyph_blt(DrawablePtr drawable, GCPtr gc, + if (!kgem_check_batch(&sna->kgem, 3+len)) { + _kgem_submit(&sna->kgem); + _kgem_set_mode(&sna->kgem, KGEM_BLT); ++ kgem_bcs_set_tiling(&sna->kgem, NULL, bo); + + unwind_batch = sna->kgem.nbatch; + unwind_reloc = sna->kgem.nreloc; +@@ -16229,7 +16435,7 @@ skip: + } + + assert_pixmap_damage(pixmap); +- sna->blt_state.fill_bo = 0; ++ blt_done(sna); + return true; + } + +@@ -16450,6 +16656,7 @@ sna_push_pixels_solid_blt(GCPtr gc, + + kgem_set_mode(&sna->kgem, KGEM_BLT, bo); + assert(kgem_bo_can_blt(&sna->kgem, bo)); ++ kgem_bcs_set_tiling(&sna->kgem, NULL, bo); + + /* Region is pre-clipped and translated into pixmap space */ + box = region_rects(region); +@@ -16471,6 +16678,7 @@ sna_push_pixels_solid_blt(GCPtr gc, + return false; + _kgem_set_mode(&sna->kgem, KGEM_BLT); + } ++ kgem_bcs_set_tiling(&sna->kgem, NULL, bo); + + upload = kgem_create_buffer(&sna->kgem, + bstride*bh, +@@ -16564,7 +16772,7 @@ sna_push_pixels_solid_blt(GCPtr gc, + box++; + } while (--n); + +- sna->blt_state.fill_bo = 0; ++ blt_done(sna); + return true; + } + +@@ -16754,7 +16962,9 @@ static int sna_create_gc(GCPtr gc) + + gc->freeCompClip = 0; + gc->pCompositeClip = 0; ++#if XORG_VERSION_CURRENT < XORG_VERSION_NUMERIC(1,19,99,1,0) + gc->pRotatedPixmap = 0; ++#endif + + fb_gc(gc)->bpp = bits_per_pixel(gc->depth); + +@@ -16789,7 +16999,8 @@ sna_get_image__inplace(PixmapPtr pixmap, + break; + } + +- if (!kgem_bo_can_map__cpu(&sna->kgem, priv->gpu_bo, FORCE_FULL_SYNC)) ++ if ((flags & MOVE_INPLACE_HINT) == 0 && ++ !kgem_bo_can_map__cpu(&sna->kgem, priv->gpu_bo, FORCE_FULL_SYNC)) + return false; + + if (idle && __kgem_bo_is_busy(&sna->kgem, priv->gpu_bo)) +@@ -16801,11 +17012,19 @@ sna_get_image__inplace(PixmapPtr pixmap, + assert(sna_damage_contains_box(&priv->gpu_damage, ®ion->extents) == PIXMAN_REGION_IN); + assert(sna_damage_contains_box(&priv->cpu_damage, ®ion->extents) == PIXMAN_REGION_OUT); + +- src = kgem_bo_map__cpu(&sna->kgem, priv->gpu_bo); +- if (src == NULL) +- return false; ++ if (kgem_bo_can_map__cpu(&sna->kgem, priv->gpu_bo, FORCE_FULL_SYNC)) { ++ src = kgem_bo_map__cpu(&sna->kgem, priv->gpu_bo); ++ if (src == NULL) ++ return false; + +- kgem_bo_sync__cpu_full(&sna->kgem, priv->gpu_bo, FORCE_FULL_SYNC); ++ kgem_bo_sync__cpu_full(&sna->kgem, priv->gpu_bo, FORCE_FULL_SYNC); ++ } else { ++ src = kgem_bo_map__wc(&sna->kgem, priv->gpu_bo); ++ if (src == NULL) ++ return false; ++ ++ kgem_bo_sync__gtt(&sna->kgem, priv->gpu_bo); ++ } + + if (sigtrap_get()) + return false; +@@ -16833,12 +17052,11 @@ sna_get_image__inplace(PixmapPtr pixmap, + region->extents.x2 - region->extents.x1, + region->extents.y2 - region->extents.y1); + if (!priv->shm) { +- assert(src == MAP(priv->gpu_bo->map__cpu)); + pixmap->devPrivate.ptr = src; + pixmap->devKind = priv->gpu_bo->pitch; +- priv->mapped = MAPPED_CPU; ++ priv->mapped = src == MAP(priv->gpu_bo->map__cpu) ? MAPPED_CPU : MAPPED_GTT; + assert_pixmap_map(pixmap, priv); +- priv->cpu = true; ++ priv->cpu &= priv->mapped == MAPPED_CPU; + } + } + +@@ -16930,7 +17148,7 @@ sna_get_image__fast(PixmapPtr pixmap, + if (priv == NULL || priv->gpu_damage == NULL) + return false; + +- if (priv->clear) { ++ if (priv->clear && sigtrap_get() == 0) { + int w = region->extents.x2 - region->extents.x1; + int h = region->extents.y2 - region->extents.y1; + int pitch = PixmapBytePad(w, pixmap->drawable.depth); +@@ -16939,6 +17157,7 @@ sna_get_image__fast(PixmapPtr pixmap, + __FUNCTION__, priv->clear_color)); + assert(DAMAGE_IS_ALL(priv->gpu_damage)); + assert(priv->cpu_damage == NULL); ++ sigtrap_assert_active(); + + if (priv->clear_color == 0 || + pixmap->drawable.bitsPerPixel == 8 || +@@ -16955,6 +17174,7 @@ sna_get_image__fast(PixmapPtr pixmap, + priv->clear_color); + } + ++ sigtrap_put(); + return true; + } + +@@ -17001,8 +17221,7 @@ sna_get_image(DrawablePtr drawable, + if (ACCEL_GET_IMAGE && + !FORCE_FALLBACK && + format == ZPixmap && +- drawable->bitsPerPixel >= 8 && +- PM_IS_SOLID(drawable, mask)) { ++ drawable->bitsPerPixel >= 8) { + PixmapPtr pixmap = get_drawable_pixmap(drawable); + int16_t dx, dy; + +@@ -17014,7 +17233,7 @@ sna_get_image(DrawablePtr drawable, + region.data = NULL; + + if (sna_get_image__fast(pixmap, ®ion, dst, flags)) +- return; ++ goto apply_planemask; + + if (!sna_drawable_move_region_to_cpu(&pixmap->drawable, + ®ion, flags)) +@@ -17032,6 +17251,16 @@ sna_get_image(DrawablePtr drawable, + region.extents.x1, region.extents.y1, 0, 0, w, h); + sigtrap_put(); + } ++ ++apply_planemask: ++ if (!PM_IS_SOLID(drawable, mask)) { ++ FbStip pm = fbReplicatePixel(mask, drawable->bitsPerPixel); ++ FbStip *d = (FbStip *)dst; ++ int i, n = PixmapBytePad(w, drawable->depth) / sizeof(FbStip) * h; ++ ++ for (i = 0; i < n; i++) ++ d[i] &= pm; ++ } + } else { + region.extents.x1 = x + drawable->x; + region.extents.y1 = y + drawable->y; +@@ -17162,17 +17391,19 @@ void sna_accel_flush(struct sna *sna) + __sna_free_pixmap(sna, priv->pixmap, priv); + } + } else { ++ unsigned hints; + DBG(("%s: flushing DRI pixmap=%ld\n", __FUNCTION__, + priv->pixmap->drawable.serialNumber)); + assert(priv->flush); +- if (sna_pixmap_move_to_gpu(priv->pixmap, +- MOVE_READ | __MOVE_FORCE)) { +- if (priv->flush & IS_CLIPPED) { ++ hints = MOVE_READ | __MOVE_FORCE; ++ if (priv->flush & FLUSH_WRITE) ++ hints |= MOVE_WRITE; ++ if (sna_pixmap_move_to_gpu(priv->pixmap, hints)) { ++ if (priv->flush & FLUSH_WRITE) { + kgem_bo_unclean(&sna->kgem, priv->gpu_bo); + sna_damage_all(&priv->gpu_damage, priv->pixmap); + assert(priv->cpu_damage == NULL); +- priv->clear = false; +- priv->cpu = false; ++ assert(priv->clear == false); + } + } + } +@@ -17184,10 +17415,46 @@ void sna_accel_flush(struct sna *sna) + } + + static void +-sna_accel_flush_callback(CallbackListPtr *list, +- pointer user_data, pointer call_data) ++sna_shm_flush_callback(CallbackListPtr *list, ++ pointer user_data, pointer call_data) + { +- sna_accel_flush(user_data); ++ struct sna *sna = user_data; ++ ++ if (!sna->needs_shm_flush) ++ return; ++ ++ sna_accel_flush(sna); ++ sna->needs_shm_flush = false; ++} ++ ++static void ++sna_flush_callback(CallbackListPtr *list, pointer user_data, pointer call_data) ++{ ++ struct sna *sna = user_data; ++ ++ if (!sna->needs_dri_flush) ++ return; ++ ++ sna_accel_flush(sna); ++ sna->needs_dri_flush = false; ++} ++ ++static void ++sna_event_callback(CallbackListPtr *list, pointer user_data, pointer call_data) ++{ ++ EventInfoRec *eventinfo = call_data; ++ struct sna *sna = user_data; ++ int i; ++ ++ if (sna->needs_dri_flush) ++ return; ++ ++ for (i = 0; i < eventinfo->count; i++) { ++ if (eventinfo->events[i].u.u.type == sna->damage_event) { ++ sna->needs_dri_flush = true; ++ return; ++ } ++ } + } + + static struct sna_pixmap *sna_accel_scanout(struct sna *sna) +@@ -17199,6 +17466,7 @@ static struct sna_pixmap *sna_accel_scanout(struct sna *sna) + + assert(sna->vblank_interval); + assert(sna->front); ++ assert(!sna->mode.hidden); + + priv = sna_pixmap(sna->front); + if (priv->gpu_bo == NULL) +@@ -17217,7 +17485,7 @@ static void sna_accel_disarm_timer(struct sna *sna, int id) + static bool has_offload_slaves(struct sna *sna) + { + #if HAS_PIXMAP_SHARING +- ScreenPtr screen = sna->scrn->pScreen; ++ ScreenPtr screen = to_screen_from_sna(sna); + PixmapDirtyUpdatePtr dirty; + + xorg_list_for_each_entry(dirty, &screen->pixmap_dirty_list, ent) { +@@ -17231,11 +17499,14 @@ static bool has_offload_slaves(struct sna *sna) + + static bool has_shadow(struct sna *sna) + { +- DamagePtr damage = sna->mode.shadow_damage; ++ DamagePtr damage; + +- if (damage == NULL) ++ if (!sna->mode.shadow_enabled) + return false; + ++ damage = sna->mode.shadow_damage; ++ assert(damage); ++ + DBG(("%s: has pending damage? %d, outstanding flips: %d\n", + __FUNCTION__, + RegionNotEmpty(DamageRegion(damage)), +@@ -17365,9 +17636,8 @@ static bool sna_accel_do_expire(struct sna *sna) + static void sna_accel_post_damage(struct sna *sna) + { + #if HAS_PIXMAP_SHARING +- ScreenPtr screen = sna->scrn->pScreen; ++ ScreenPtr screen = to_screen_from_sna(sna); + PixmapDirtyUpdatePtr dirty; +- bool flush = false; + + xorg_list_for_each_entry(dirty, &screen->pixmap_dirty_list, ent) { + RegionRec region, *damage; +@@ -17376,8 +17646,6 @@ static void sna_accel_post_damage(struct sna *sna) + int16_t dx, dy; + int n; + +- assert(dirty->src == sna->front); +- + damage = DamageRegion(dirty->damage); + if (RegionNil(damage)) + continue; +@@ -17477,7 +17745,14 @@ fallback: + box, n, COPY_LAST)) + goto fallback; + +- flush = true; ++ /* Before signalling the slave via ProcessPending, ++ * ensure not only the batch is submitted as the ++ * slave may be using the Damage callback to perform ++ * its copy, but also that the memory must be coherent ++ * - we need to treat it as uncached for the PCI slave ++ * will bypass LLC. ++ */ ++ kgem_bo_sync__gtt(&sna->kgem, __sna_pixmap_get_bo(dst)); + } + + DamageRegionProcessPending(&dirty->slave_dst->drawable); +@@ -17485,8 +17760,6 @@ skip: + RegionUninit(®ion); + DamageEmpty(dirty->damage); + } +- if (flush) +- kgem_submit(&sna->kgem); + #endif + } + +@@ -17689,6 +17962,7 @@ sna_set_screen_pixmap(PixmapPtr pixmap) + static Bool + sna_create_window(WindowPtr win) + { ++ DBG(("%s: window=%ld\n", __FUNCTION__, win->drawable.id)); + sna_set_window_pixmap(win, win->drawable.pScreen->devPrivate); + return TRUE; + } +@@ -17714,6 +17988,7 @@ sna_unmap_window(WindowPtr win) + static Bool + sna_destroy_window(WindowPtr win) + { ++ DBG(("%s: window=%ld\n", __FUNCTION__, win->drawable.id)); + sna_video_destroy_window(win); + sna_dri2_destroy_window(win); + return TRUE; +@@ -17790,20 +18065,34 @@ static bool sna_option_accel_none(struct sna *sna) + if (wedged(sna)) + return true; + +- if (xf86ReturnOptValBool(sna->Options, OPTION_ACCEL_DISABLE, FALSE)) ++ if (!xf86ReturnOptValBool(sna->Options, OPTION_ACCEL_ENABLE, TRUE)) + return true; + ++ if (sna->kgem.gen >= 0120) ++ return true; ++ ++ if (!intel_option_cast_to_bool(sna->Options, ++ OPTION_ACCEL_METHOD, ++ !IS_DEFAULT_ACCEL_METHOD(NOACCEL))) ++ return false; ++ ++#if XORG_VERSION_CURRENT >= XORG_VERSION_NUMERIC(1,7,99,901,0) + s = xf86GetOptValString(sna->Options, OPTION_ACCEL_METHOD); + if (s == NULL) + return IS_DEFAULT_ACCEL_METHOD(NOACCEL); + + return strcasecmp(s, "none") == 0; ++#else ++ return IS_DEFAULT_ACCEL_METHOD(NOACCEL); ++#endif + } + + static bool sna_option_accel_blt(struct sna *sna) + { + const char *s; + ++ assert(sna->kgem.gen < 0120); ++ + s = xf86GetOptValString(sna->Options, OPTION_ACCEL_METHOD); + if (s == NULL) + return false; +@@ -17811,6 +18100,13 @@ static bool sna_option_accel_blt(struct sna *sna) + return strcasecmp(s, "blt") == 0; + } + ++#if HAVE_NOTIFY_FD ++static void sna_accel_notify(int fd, int ready, void *data) ++{ ++ sna_mode_wakeup(data); ++} ++#endif ++ + bool sna_accel_init(ScreenPtr screen, struct sna *sna) + { + const char *backend; +@@ -17822,7 +18118,7 @@ bool sna_accel_init(ScreenPtr screen, struct sna *sna) + list_init(&sna->flush_pixmaps); + list_init(&sna->active_pixmaps); + +- AddGeneralSocket(sna->kgem.fd); ++ SetNotifyFd(sna->kgem.fd, sna_accel_notify, X_NOTIFY_READ, sna); + + #ifdef DEBUG_MEMORY + sna->timer_expire[DEBUG_MEMORY_TIMER] = GetTimeInMillis()+ 10 * 1000; +@@ -17892,21 +18188,23 @@ bool sna_accel_init(ScreenPtr screen, struct sna *sna) + backend = "disabled"; + sna->kgem.wedged = true; + sna_render_mark_wedged(sna); +- } else if (sna_option_accel_blt(sna) || sna->info->gen >= 0110) ++ } else if (sna_option_accel_blt(sna)) + (void)backend; +- else if (sna->info->gen >= 0100) ++ else if (sna->kgem.gen >= 0110) ++ backend = gen9_render_init(sna, backend); ++ else if (sna->kgem.gen >= 0100) + backend = gen8_render_init(sna, backend); +- else if (sna->info->gen >= 070) ++ else if (sna->kgem.gen >= 070) + backend = gen7_render_init(sna, backend); +- else if (sna->info->gen >= 060) ++ else if (sna->kgem.gen >= 060) + backend = gen6_render_init(sna, backend); +- else if (sna->info->gen >= 050) ++ else if (sna->kgem.gen >= 050) + backend = gen5_render_init(sna, backend); +- else if (sna->info->gen >= 040) ++ else if (sna->kgem.gen >= 040) + backend = gen4_render_init(sna, backend); +- else if (sna->info->gen >= 030) ++ else if (sna->kgem.gen >= 030) + backend = gen3_render_init(sna, backend); +- else if (sna->info->gen >= 020) ++ else if (sna->kgem.gen >= 020) + backend = gen2_render_init(sna, backend); + + DBG(("%s(backend=%s, prefer_gpu=%x)\n", +@@ -17924,8 +18222,14 @@ bool sna_accel_init(ScreenPtr screen, struct sna *sna) + + void sna_accel_create(struct sna *sna) + { ++ ExtensionEntry *damage; ++ + DBG(("%s\n", __FUNCTION__)); + ++ damage = CheckExtension("DAMAGE"); ++ if (damage) ++ sna->damage_event = damage->eventBase + XDamageNotify; ++ + if (!sna_glyphs_create(sna)) + goto fail; + +@@ -17943,27 +18247,59 @@ fail: + no_render_init(sna); + } + +-void sna_accel_watch_flush(struct sna *sna, int enable) ++static void sna_shm_watch_flush(struct sna *sna, int enable) + { + DBG(("%s: enable=%d\n", __FUNCTION__, enable)); + assert(enable); + +- if (sna->watch_flush == 0) { ++ if (sna->watch_shm_flush == 0) { ++ DBG(("%s: installing shm watchers\n", __FUNCTION__)); ++ assert(enable > 0); ++ ++ if (!AddCallback(&FlushCallback, sna_shm_flush_callback, sna)) ++ return; ++ ++ sna->watch_shm_flush++; ++ } ++ ++ sna->watch_shm_flush += enable; ++} ++ ++void sna_watch_flush(struct sna *sna, int enable) ++{ ++ DBG(("%s: enable=%d\n", __FUNCTION__, enable)); ++ assert(enable); ++ ++ if (sna->watch_dri_flush == 0) { ++ int err = 0; ++ + DBG(("%s: installing watchers\n", __FUNCTION__)); + assert(enable > 0); +- if (!AddCallback(&FlushCallback, sna_accel_flush_callback, sna)) { ++ ++ if (!sna->damage_event) ++ return; ++ ++ if (!AddCallback(&EventCallback, sna_event_callback, sna)) ++ err = 1; ++ ++ if (!AddCallback(&FlushCallback, sna_flush_callback, sna)) ++ err = 1; ++ ++ if (err) { + xf86DrvMsg(sna->scrn->scrnIndex, X_Error, + "Failed to attach ourselves to the flush callbacks, expect missing synchronisation with DRI clients (e.g a compositor)\n"); + } +- sna->watch_flush++; ++ ++ sna->watch_dri_flush++; + } + +- sna->watch_flush += enable; ++ sna->watch_dri_flush += enable; + } + + void sna_accel_leave(struct sna *sna) + { + DBG(("%s\n", __FUNCTION__)); ++ sna_scanout_flush(sna); + + /* as root we always have permission to render */ + if (geteuid() == 0) +@@ -17997,13 +18333,15 @@ void sna_accel_close(struct sna *sna) + + sna_pixmap_expire(sna); + +- DeleteCallback(&FlushCallback, sna_accel_flush_callback, sna); +- RemoveGeneralSocket(sna->kgem.fd); ++ DeleteCallback(&FlushCallback, sna_shm_flush_callback, sna); ++ DeleteCallback(&FlushCallback, sna_flush_callback, sna); ++ DeleteCallback(&EventCallback, sna_event_callback, sna); ++ RemoveNotifyFd(sna->kgem.fd); + + kgem_cleanup_cache(&sna->kgem); + } + +-void sna_accel_block_handler(struct sna *sna, struct timeval **tv) ++void sna_accel_block(struct sna *sna, struct timeval **tv) + { + sigtrap_assert_inactive(); + +@@ -18044,10 +18382,17 @@ restart: + if (sna_accel_do_debug_memory(sna)) + sna_accel_debug_memory(sna); + +- if (sna->watch_flush == 1) { +- DBG(("%s: removing watchers\n", __FUNCTION__)); +- DeleteCallback(&FlushCallback, sna_accel_flush_callback, sna); +- sna->watch_flush = 0; ++ if (sna->watch_shm_flush == 1) { ++ DBG(("%s: removing shm watchers\n", __FUNCTION__)); ++ DeleteCallback(&FlushCallback, sna_shm_flush_callback, sna); ++ sna->watch_shm_flush = 0; ++ } ++ ++ if (sna->watch_dri_flush == 1) { ++ DBG(("%s: removing dri watchers\n", __FUNCTION__)); ++ DeleteCallback(&FlushCallback, sna_flush_callback, sna); ++ DeleteCallback(&EventCallback, sna_event_callback, sna); ++ sna->watch_dri_flush = 0; + } + + if (sna->timer_active & 1) { +@@ -18083,22 +18428,6 @@ set_tv: + } + } + +-void sna_accel_wakeup_handler(struct sna *sna) +-{ +- DBG(("%s: nbatch=%d, need_retire=%d, need_purge=%d\n", __FUNCTION__, +- sna->kgem.nbatch, sna->kgem.need_retire, sna->kgem.need_purge)); +- +- if (!sna->kgem.nbatch) +- return; +- +- if (kgem_is_idle(&sna->kgem)) { +- DBG(("%s: GPU idle, flushing\n", __FUNCTION__)); +- _kgem_submit(&sna->kgem); +- } +- +- sigtrap_assert_inactive(); +-} +- + void sna_accel_free(struct sna *sna) + { + DBG(("%s\n", __FUNCTION__)); +diff --git a/src/sna/sna_acpi.c b/src/sna/sna_acpi.c +index dcc0287b..643d04af 100644 +--- a/src/sna/sna_acpi.c ++++ b/src/sna/sna_acpi.c +@@ -92,7 +92,7 @@ void _sna_acpi_wakeup(struct sna *sna) + DBG(("%s: error [%d], detaching from acpid\n", __FUNCTION__, n)); + + /* XXX reattach later? */ +- RemoveGeneralSocket(sna->acpi.fd); ++ RemoveNotifyFd(sna->acpi.fd); + sna_acpi_fini(sna); + return; + } +@@ -136,6 +136,13 @@ void _sna_acpi_wakeup(struct sna *sna) + } while (n); + } + ++#if HAVE_NOTIFY_FD ++static void sna_acpi_notify(int fd, int read, void *data) ++{ ++ _sna_acpi_wakeup(data); ++} ++#endif ++ + static int read_power_state(const char *path) + { + DIR *dir; +@@ -200,7 +207,7 @@ void sna_acpi_init(struct sna *sna) + + DBG(("%s: attaching to acpid\n", __FUNCTION__)); + +- AddGeneralSocket(sna->acpi.fd); ++ SetNotifyFd(sna->acpi.fd, sna_acpi_notify, X_NOTIFY_READ, sna); + sna->acpi.remain = sizeof(sna->acpi.event) - 1; + sna->acpi.offset = 0; + +diff --git a/src/sna/sna_blt.c b/src/sna/sna_blt.c +index de8f6ec3..ddd2586d 100644 +--- a/src/sna/sna_blt.c ++++ b/src/sna/sna_blt.c +@@ -86,6 +86,11 @@ static const uint8_t fill_ROP[] = { + ROP_1 + }; + ++static void sig_done(struct sna *sna, const struct sna_composite_op *op) ++{ ++ sigtrap_put(); ++} ++ + static void nop_done(struct sna *sna, const struct sna_composite_op *op) + { + assert(sna->kgem.nbatch <= KGEM_BATCH_SIZE(&sna->kgem)); +@@ -129,7 +134,6 @@ static bool sna_blt_fill_init(struct sna *sna, + struct kgem *kgem = &sna->kgem; + + assert(kgem_bo_can_blt (kgem, bo)); +- assert(bo->tiling != I915_TILING_Y); + blt->bo[0] = bo; + + blt->br13 = bo->pitch; +@@ -183,6 +187,7 @@ static bool sna_blt_fill_init(struct sna *sna, + return false; + _kgem_set_mode(kgem, KGEM_BLT); + } ++ kgem_bcs_set_tiling(kgem, NULL, bo); + + assert(sna->kgem.mode == KGEM_BLT); + b = kgem->batch + kgem->nbatch; +@@ -237,17 +242,13 @@ static bool sna_blt_fill_init(struct sna *sna, + return true; + } + +-noinline static void sna_blt_fill_begin(struct sna *sna, +- const struct sna_blt_state *blt) ++noinline static void __sna_blt_fill_begin(struct sna *sna, ++ const struct sna_blt_state *blt) + { + struct kgem *kgem = &sna->kgem; + uint32_t *b; + +- if (kgem->nreloc) { +- _kgem_submit(kgem); +- _kgem_set_mode(kgem, KGEM_BLT); +- assert(kgem->nbatch == 0); +- } ++ kgem_bcs_set_tiling(&sna->kgem, NULL, blt->bo[0]); + + assert(kgem->mode == KGEM_BLT); + b = kgem->batch + kgem->nbatch; +@@ -293,6 +294,21 @@ noinline static void sna_blt_fill_begin(struct sna *sna, + } + } + ++inline static void sna_blt_fill_begin(struct sna *sna, ++ const struct sna_blt_state *blt) ++{ ++ struct kgem *kgem = &sna->kgem; ++ ++ if (kgem->nreloc) { ++ _kgem_submit(kgem); ++ _kgem_set_mode(kgem, KGEM_BLT); ++ kgem_bcs_set_tiling(kgem, NULL, blt->bo[0]); ++ assert(kgem->nbatch == 0); ++ } ++ ++ __sna_blt_fill_begin(sna, blt); ++} ++ + inline static void sna_blt_fill_one(struct sna *sna, + const struct sna_blt_state *blt, + int16_t x, int16_t y, +@@ -330,8 +346,8 @@ static bool sna_blt_copy_init(struct sna *sna, + { + struct kgem *kgem = &sna->kgem; + +- assert(kgem_bo_can_blt (kgem, src)); +- assert(kgem_bo_can_blt (kgem, dst)); ++ assert(kgem_bo_can_blt(kgem, src)); ++ assert(kgem_bo_can_blt(kgem, dst)); + + blt->bo[0] = src; + blt->bo[1] = dst; +@@ -370,6 +386,7 @@ static bool sna_blt_copy_init(struct sna *sna, + return false; + _kgem_set_mode(kgem, KGEM_BLT); + } ++ kgem_bcs_set_tiling(&sna->kgem, src, dst); + + sna->blt_state.fill_bo = 0; + return true; +@@ -424,6 +441,7 @@ static bool sna_blt_alpha_fixup_init(struct sna *sna, + return false; + _kgem_set_mode(kgem, KGEM_BLT); + } ++ kgem_bcs_set_tiling(&sna->kgem, src, dst); + + sna->blt_state.fill_bo = 0; + return true; +@@ -454,6 +472,7 @@ static void sna_blt_alpha_fixup_one(struct sna *sna, + !kgem_check_reloc(kgem, 2)) { + _kgem_submit(kgem); + _kgem_set_mode(kgem, KGEM_BLT); ++ kgem_bcs_set_tiling(&sna->kgem, blt->bo[0], blt->bo[1]); + } + + assert(sna->kgem.mode == KGEM_BLT); +@@ -582,6 +601,7 @@ static void sna_blt_copy_one(struct sna *sna, + !kgem_check_reloc(kgem, 2)) { + _kgem_submit(kgem); + _kgem_set_mode(kgem, KGEM_BLT); ++ kgem_bcs_set_tiling(&sna->kgem, blt->bo[0], blt->bo[1]); + } + + assert(sna->kgem.mode == KGEM_BLT); +@@ -912,8 +932,27 @@ sna_composite_mask_is_opaque(PicturePtr mask) + return is_solid(mask) && is_white(mask); + else if (!PICT_FORMAT_A(mask->format)) + return true; +- else +- return is_solid(mask) && is_opaque_solid(mask); ++ else if (mask->pSourcePict) { ++ PictSolidFill *fill = (PictSolidFill *) mask->pSourcePict; ++ return (fill->color >> 24) == 0xff; ++ } else { ++ struct sna_pixmap *priv; ++ assert(mask->pDrawable); ++ ++ if (mask->pDrawable->width == 1 && ++ mask->pDrawable->height == 1 && ++ mask->repeat) ++ return pixel_is_opaque(get_pixel(mask), mask->format); ++ ++ if (mask->transform) ++ return false; ++ ++ priv = sna_pixmap_from_drawable(mask->pDrawable); ++ if (priv == NULL || !priv->clear) ++ return false; ++ ++ return pixel_is_opaque(priv->clear_color, mask->format); ++ } + } + + fastcall +@@ -971,6 +1010,7 @@ static void blt_composite_fill__cpu(struct sna *sna, + + assert(op->dst.pixmap->devPrivate.ptr); + assert(op->dst.pixmap->devKind); ++ sigtrap_assert_active(); + pixman_fill(op->dst.pixmap->devPrivate.ptr, + op->dst.pixmap->devKind / sizeof(uint32_t), + op->dst.pixmap->drawable.bitsPerPixel, +@@ -990,6 +1030,7 @@ blt_composite_fill_box_no_offset__cpu(struct sna *sna, + + assert(op->dst.pixmap->devPrivate.ptr); + assert(op->dst.pixmap->devKind); ++ sigtrap_assert_active(); + pixman_fill(op->dst.pixmap->devPrivate.ptr, + op->dst.pixmap->devKind / sizeof(uint32_t), + op->dst.pixmap->drawable.bitsPerPixel, +@@ -1010,6 +1051,7 @@ blt_composite_fill_boxes_no_offset__cpu(struct sna *sna, + + assert(op->dst.pixmap->devPrivate.ptr); + assert(op->dst.pixmap->devKind); ++ sigtrap_assert_active(); + pixman_fill(op->dst.pixmap->devPrivate.ptr, + op->dst.pixmap->devKind / sizeof(uint32_t), + op->dst.pixmap->drawable.bitsPerPixel, +@@ -1031,6 +1073,7 @@ blt_composite_fill_box__cpu(struct sna *sna, + + assert(op->dst.pixmap->devPrivate.ptr); + assert(op->dst.pixmap->devKind); ++ sigtrap_assert_active(); + pixman_fill(op->dst.pixmap->devPrivate.ptr, + op->dst.pixmap->devKind / sizeof(uint32_t), + op->dst.pixmap->drawable.bitsPerPixel, +@@ -1052,6 +1095,7 @@ blt_composite_fill_boxes__cpu(struct sna *sna, + + assert(op->dst.pixmap->devPrivate.ptr); + assert(op->dst.pixmap->devKind); ++ sigtrap_assert_active(); + pixman_fill(op->dst.pixmap->devPrivate.ptr, + op->dst.pixmap->devKind / sizeof(uint32_t), + op->dst.pixmap->drawable.bitsPerPixel, +@@ -1159,12 +1203,15 @@ static inline void _sna_blt_maybe_clear(const struct sna_composite_op *op, const + box->y2 - box->y1 >= op->dst.height) { + struct sna_pixmap *priv = sna_pixmap(op->dst.pixmap); + if (op->dst.bo == priv->gpu_bo) { ++ sna_damage_all(&priv->gpu_damage, op->dst.pixmap); ++ sna_damage_destroy(&priv->cpu_damage); + priv->clear = true; + priv->clear_color = op->u.blt.pixel; + DBG(("%s: pixmap=%ld marking clear [%08x]\n", + __FUNCTION__, + op->dst.pixmap->drawable.serialNumber, + op->u.blt.pixel)); ++ ((struct sna_composite_op *)op)->damage = NULL; + } + } + } +@@ -1404,6 +1451,7 @@ begin_blt(struct sna *sna, + return false; + + _kgem_set_mode(&sna->kgem, KGEM_BLT); ++ kgem_bcs_set_tiling(&sna->kgem, NULL, op->dst.bo); + } + + return true; +@@ -1429,6 +1477,7 @@ prepare_blt_clear(struct sna *sna, + DBG(("%s\n", __FUNCTION__)); + + if (op->dst.bo == NULL) { ++ op->u.blt.pixel = 0; + op->blt = blt_composite_fill__cpu; + if (op->dst.x|op->dst.y) { + op->box = blt_composite_fill_box__cpu; +@@ -1439,9 +1488,8 @@ prepare_blt_clear(struct sna *sna, + op->boxes = blt_composite_fill_boxes_no_offset__cpu; + op->thread_boxes = blt_composite_fill_boxes_no_offset__cpu; + } +- op->done = nop_done; +- op->u.blt.pixel = 0; +- return true; ++ op->done = sig_done; ++ return sigtrap_get() == 0; + } + + op->blt = blt_composite_fill; +@@ -1484,8 +1532,8 @@ prepare_blt_fill(struct sna *sna, + op->boxes = blt_composite_fill_boxes_no_offset__cpu; + op->thread_boxes = blt_composite_fill_boxes_no_offset__cpu; + } +- op->done = nop_done; +- return true; ++ op->done = sig_done; ++ return sigtrap_get() == 0; + } + + op->blt = blt_composite_fill; +@@ -1668,6 +1716,7 @@ static void blt_composite_copy_boxes__thread(struct sna *sna, + + _kgem_submit(kgem); + _kgem_set_mode(kgem, KGEM_BLT); ++ kgem_bcs_set_tiling(&sna->kgem, src_bo, dst_bo); + } while (1); + } else { + do { +@@ -1724,6 +1773,7 @@ static void blt_composite_copy_boxes__thread(struct sna *sna, + + _kgem_submit(kgem); + _kgem_set_mode(kgem, KGEM_BLT); ++ kgem_bcs_set_tiling(&sna->kgem, src_bo, dst_bo); + } while (1); + } + sna_vertex_unlock(&sna->render); +@@ -1806,6 +1856,7 @@ static void blt_composite_copy_boxes__thread64(struct sna *sna, + + _kgem_submit(kgem); + _kgem_set_mode(kgem, KGEM_BLT); ++ kgem_bcs_set_tiling(&sna->kgem, src_bo, dst_bo); + } while (1); + } else { + do { +@@ -1864,6 +1915,7 @@ static void blt_composite_copy_boxes__thread64(struct sna *sna, + + _kgem_submit(kgem); + _kgem_set_mode(kgem, KGEM_BLT); ++ kgem_bcs_set_tiling(&sna->kgem, src_bo, dst_bo); + } while (1); + } + sna_vertex_unlock(&sna->render); +@@ -1973,6 +2025,7 @@ prepare_blt_copy(struct sna *sna, + } + _kgem_set_mode(&sna->kgem, KGEM_BLT); + } ++ kgem_bcs_set_tiling(&sna->kgem, bo, op->dst.bo); + + DBG(("%s\n", __FUNCTION__)); + +@@ -2396,6 +2449,9 @@ prepare_blt_put(struct sna *sna, + op->box = blt_put_composite_box; + op->boxes = blt_put_composite_boxes; + } ++ ++ op->done = nop_done; ++ return true; + } else { + if (alpha_fixup) { + op->u.blt.pixel = alpha_fixup; +@@ -2407,10 +2463,10 @@ prepare_blt_put(struct sna *sna, + op->box = blt_put_composite_box__cpu; + op->boxes = blt_put_composite_boxes__cpu; + } +- } +- op->done = nop_done; + +- return true; ++ op->done = sig_done; ++ return sigtrap_get() == 0; ++ } + } + + static bool +@@ -2544,6 +2600,7 @@ sna_blt_composite(struct sna *sna, + clear: + if (was_clear && sna_pixmap(tmp->dst.pixmap)->clear_color == 0) { + sna_pixmap(tmp->dst.pixmap)->clear = true; ++nop: + return prepare_blt_nop(sna, tmp); + } + +@@ -2559,6 +2616,7 @@ clear: + } + tmp->dst.bo = sna_drawable_use_bo(dst->pDrawable, hint, + &dst_box, &tmp->damage); ++ assert(!tmp->damage || !DAMAGE_IS_ALL(*tmp->damage)); + if (tmp->dst.bo) { + if (!kgem_bo_can_blt(&sna->kgem, tmp->dst.bo)) { + DBG(("%s: can not blit to dst, tiling? %d, pitch? %d\n", +@@ -2567,6 +2625,8 @@ clear: + } + if (hint & REPLACES) + kgem_bo_undo(&sna->kgem, tmp->dst.bo); ++ if (flags & COMPOSITE_UPLOAD) ++ return false; + } else { + RegionRec region; + +@@ -2590,32 +2650,40 @@ clear: + } + if (op == PictOpOver && is_opaque_solid(src)) + op = PictOpSrc; +- if (op == PictOpAdd && is_white(src)) ++ if (op == PictOpAdd && ++ PICT_FORMAT_RGB(src->format) == PICT_FORMAT_RGB(dst->format) && ++ is_white(src)) + op = PictOpSrc; + if (was_clear && (op == PictOpAdd || op == PictOpOver)) { + if (sna_pixmap(tmp->dst.pixmap)->clear_color == 0) + op = PictOpSrc; + if (op == PictOpOver) { ++ unsigned dst_color = solid_color(dst->format, sna_pixmap(tmp->dst.pixmap)->clear_color); + color = over(get_solid_color(src, PICT_a8r8g8b8), +- color_convert(sna_pixmap(tmp->dst.pixmap)->clear_color, +- dst->format, PICT_a8r8g8b8)); ++ dst_color); + op = PictOpSrc; + DBG(("%s: precomputing solid OVER (%08x, %08x) -> %08x\n", + __FUNCTION__, get_solid_color(src, PICT_a8r8g8b8), +- color_convert(sna_pixmap(tmp->dst.pixmap)->clear_color, +- dst->format, PICT_a8r8g8b8), ++ solid_color(dst->format, sna_pixmap(tmp->dst.pixmap)->clear_color), + color)); ++ if (color == dst_color) ++ goto nop; ++ else ++ goto fill; + } + if (op == PictOpAdd) { ++ unsigned dst_color = solid_color(dst->format, sna_pixmap(tmp->dst.pixmap)->clear_color); + color = add(get_solid_color(src, PICT_a8r8g8b8), +- color_convert(sna_pixmap(tmp->dst.pixmap)->clear_color, +- dst->format, PICT_a8r8g8b8)); ++ dst_color); + op = PictOpSrc; + DBG(("%s: precomputing solid ADD (%08x, %08x) -> %08x\n", + __FUNCTION__, get_solid_color(src, PICT_a8r8g8b8), +- color_convert(sna_pixmap(tmp->dst.pixmap)->clear_color, +- dst->format, PICT_a8r8g8b8), ++ solid_color(dst->format, sna_pixmap(tmp->dst.pixmap)->clear_color), + color)); ++ if (color == dst_color) ++ goto nop; ++ else ++ goto fill; + } + } + if (op == PictOpOutReverse && is_opaque_solid(src)) +@@ -2649,6 +2717,7 @@ fill: + } + tmp->dst.bo = sna_drawable_use_bo(dst->pDrawable, hint, + &dst_box, &tmp->damage); ++ assert(!tmp->damage || !DAMAGE_IS_ALL(*tmp->damage)); + if (tmp->dst.bo) { + if (!kgem_bo_can_blt(&sna->kgem, tmp->dst.bo)) { + DBG(("%s: can not blit to dst, tiling? %d, pitch? %d\n", +@@ -2657,6 +2726,8 @@ fill: + } + if (hint & REPLACES) + kgem_bo_undo(&sna->kgem, tmp->dst.bo); ++ if (flags & COMPOSITE_UPLOAD) ++ return false; + } else { + RegionRec region; + +@@ -2720,8 +2791,8 @@ fill: + if (is_clear(src_pixmap)) { + if (src->repeat || + (x >= 0 && y >= 0 && +- x + width < src_pixmap->drawable.width && +- y + height < src_pixmap->drawable.height)) { ++ x + width <= src_pixmap->drawable.width && ++ y + height <= src_pixmap->drawable.height)) { + color = color_convert(sna_pixmap(src_pixmap)->clear_color, + src->format, tmp->dst.format); + goto fill; +@@ -2795,7 +2866,7 @@ fill: + if (src_pixmap->drawable.width <= sna->render.max_3d_size && + src_pixmap->drawable.height <= sna->render.max_3d_size && + bo->pitch <= sna->render.max_3d_pitch && +- (flags & COMPOSITE_FALLBACK) == 0) ++ (flags & (COMPOSITE_UPLOAD | COMPOSITE_FALLBACK)) == 0) + { + return false; + } +@@ -2817,6 +2888,7 @@ fill: + } + tmp->dst.bo = sna_drawable_use_bo(dst->pDrawable, hint, + &dst_box, &tmp->damage); ++ assert(!tmp->damage || !DAMAGE_IS_ALL(*tmp->damage)); + + if (tmp->dst.bo && hint & REPLACES) { + struct sna_pixmap *priv = sna_pixmap(tmp->dst.pixmap); +@@ -2846,7 +2918,7 @@ fallback: + DBG(("%s: fallback -- unaccelerated upload\n", + __FUNCTION__)); + goto fallback; +- } else { ++ } else if ((flags & COMPOSITE_UPLOAD) == 0) { + ret = prepare_blt_copy(sna, tmp, bo, alpha_fixup); + if (!ret) + goto fallback; +@@ -3023,6 +3095,7 @@ sna_blt_composite__convert(struct sna *sna, + } + _kgem_set_mode(&sna->kgem, KGEM_BLT); + } ++ kgem_bcs_set_tiling(&sna->kgem, tmp->src.bo, tmp->dst.bo); + + if (alpha_fixup) { + tmp->blt = blt_composite_copy_with_alpha; +@@ -3062,7 +3135,7 @@ static void sna_blt_fill_op_blt(struct sna *sna, + if (sna->blt_state.fill_bo != op->base.u.blt.bo[0]->unique_id) { + const struct sna_blt_state *blt = &op->base.u.blt; + +- sna_blt_fill_begin(sna, blt); ++ __sna_blt_fill_begin(sna, blt); + + sna->blt_state.fill_bo = blt->bo[0]->unique_id; + sna->blt_state.fill_pixel = blt->pixel; +@@ -3079,7 +3152,7 @@ fastcall static void sna_blt_fill_op_box(struct sna *sna, + if (sna->blt_state.fill_bo != op->base.u.blt.bo[0]->unique_id) { + const struct sna_blt_state *blt = &op->base.u.blt; + +- sna_blt_fill_begin(sna, blt); ++ __sna_blt_fill_begin(sna, blt); + + sna->blt_state.fill_bo = blt->bo[0]->unique_id; + sna->blt_state.fill_pixel = blt->pixel; +@@ -3097,7 +3170,7 @@ fastcall static void sna_blt_fill_op_boxes(struct sna *sna, + if (sna->blt_state.fill_bo != op->base.u.blt.bo[0]->unique_id) { + const struct sna_blt_state *blt = &op->base.u.blt; + +- sna_blt_fill_begin(sna, blt); ++ __sna_blt_fill_begin(sna, blt); + + sna->blt_state.fill_bo = blt->bo[0]->unique_id; + sna->blt_state.fill_pixel = blt->pixel; +@@ -3132,7 +3205,7 @@ fastcall static void sna_blt_fill_op_points(struct sna *sna, + DBG(("%s: %08x x %d\n", __FUNCTION__, blt->pixel, n)); + + if (sna->blt_state.fill_bo != op->base.u.blt.bo[0]->unique_id) { +- sna_blt_fill_begin(sna, blt); ++ __sna_blt_fill_begin(sna, blt); + + sna->blt_state.fill_bo = blt->bo[0]->unique_id; + sna->blt_state.fill_pixel = blt->pixel; +@@ -3162,65 +3235,15 @@ fastcall static void sna_blt_fill_op_points(struct sna *sna, + assert(kgem->nbatch < kgem->surface); + + if ((dx|dy) == 0) { +- while (n_this_time >= 8) { +- *((uint64_t *)b + 0) = pt_add(cmd, p+0, 0, 0); +- *((uint64_t *)b + 1) = pt_add(cmd, p+1, 0, 0); +- *((uint64_t *)b + 2) = pt_add(cmd, p+2, 0, 0); +- *((uint64_t *)b + 3) = pt_add(cmd, p+3, 0, 0); +- *((uint64_t *)b + 4) = pt_add(cmd, p+4, 0, 0); +- *((uint64_t *)b + 5) = pt_add(cmd, p+5, 0, 0); +- *((uint64_t *)b + 6) = pt_add(cmd, p+6, 0, 0); +- *((uint64_t *)b + 7) = pt_add(cmd, p+7, 0, 0); +- b += 16; +- n_this_time -= 8; +- p += 8; +- } +- if (n_this_time & 4) { +- *((uint64_t *)b + 0) = pt_add(cmd, p+0, 0, 0); +- *((uint64_t *)b + 1) = pt_add(cmd, p+1, 0, 0); +- *((uint64_t *)b + 2) = pt_add(cmd, p+2, 0, 0); +- *((uint64_t *)b + 3) = pt_add(cmd, p+3, 0, 0); +- b += 8; +- p += 4; +- } +- if (n_this_time & 2) { +- *((uint64_t *)b + 0) = pt_add(cmd, p+0, 0, 0); +- *((uint64_t *)b + 1) = pt_add(cmd, p+1, 0, 0); +- b += 4; +- p += 2; +- } +- if (n_this_time & 1) +- *((uint64_t *)b + 0) = pt_add(cmd, p++, 0, 0); ++ do { ++ *(uint64_t *)b = pt_add(cmd, p++, 0, 0); ++ b += 2; ++ } while (--n_this_time); + } else { +- while (n_this_time >= 8) { +- *((uint64_t *)b + 0) = pt_add(cmd, p+0, dx, dy); +- *((uint64_t *)b + 1) = pt_add(cmd, p+1, dx, dy); +- *((uint64_t *)b + 2) = pt_add(cmd, p+2, dx, dy); +- *((uint64_t *)b + 3) = pt_add(cmd, p+3, dx, dy); +- *((uint64_t *)b + 4) = pt_add(cmd, p+4, dx, dy); +- *((uint64_t *)b + 5) = pt_add(cmd, p+5, dx, dy); +- *((uint64_t *)b + 6) = pt_add(cmd, p+6, dx, dy); +- *((uint64_t *)b + 7) = pt_add(cmd, p+7, dx, dy); +- b += 16; +- n_this_time -= 8; +- p += 8; +- } +- if (n_this_time & 4) { +- *((uint64_t *)b + 0) = pt_add(cmd, p+0, dx, dy); +- *((uint64_t *)b + 1) = pt_add(cmd, p+1, dx, dy); +- *((uint64_t *)b + 2) = pt_add(cmd, p+2, dx, dy); +- *((uint64_t *)b + 3) = pt_add(cmd, p+3, dx, dy); +- b += 8; +- p += 8; +- } +- if (n_this_time & 2) { +- *((uint64_t *)b + 0) = pt_add(cmd, p+0, dx, dy); +- *((uint64_t *)b + 1) = pt_add(cmd, p+1, dx, dy); +- b += 4; +- p += 2; +- } +- if (n_this_time & 1) +- *((uint64_t *)b + 0) = pt_add(cmd, p++, dx, dy); ++ do { ++ *(uint64_t *)b = pt_add(cmd, p++, dx, dy); ++ b += 2; ++ } while (--n_this_time); + } + + if (!n) +@@ -3414,6 +3437,7 @@ static bool sna_blt_fill_box(struct sna *sna, uint8_t alu, + + _kgem_set_mode(kgem, KGEM_BLT); + } ++ kgem_bcs_set_tiling(&sna->kgem, NULL, bo); + + assert(kgem_check_batch(kgem, 6)); + assert(kgem_check_reloc(kgem, 1)); +@@ -3520,6 +3544,8 @@ bool sna_blt_fill_boxes(struct sna *sna, uint8_t alu, + _kgem_set_mode(kgem, KGEM_BLT); + } + ++ kgem_bcs_set_tiling(&sna->kgem, NULL, bo); ++ + assert(sna->kgem.mode == KGEM_BLT); + b = kgem->batch + kgem->nbatch; + if (kgem->gen >= 0100) { +@@ -3608,6 +3634,7 @@ bool sna_blt_fill_boxes(struct sna *sna, uint8_t alu, + + _kgem_submit(kgem); + _kgem_set_mode(kgem, KGEM_BLT); ++ kgem_bcs_set_tiling(&sna->kgem, NULL, bo); + + assert(sna->kgem.mode == KGEM_BLT); + b = kgem->batch + kgem->nbatch; +@@ -3754,6 +3781,7 @@ bool sna_blt_copy_boxes(struct sna *sna, uint8_t alu, + } + _kgem_set_mode(kgem, KGEM_BLT); + } ++ kgem_bcs_set_tiling(&sna->kgem, src_bo, dst_bo); + + if ((dst_dx | dst_dy) == 0) { + if (kgem->gen >= 0100) { +@@ -3814,6 +3842,7 @@ bool sna_blt_copy_boxes(struct sna *sna, uint8_t alu, + + _kgem_submit(kgem); + _kgem_set_mode(kgem, KGEM_BLT); ++ kgem_bcs_set_tiling(&sna->kgem, src_bo, dst_bo); + } while (1); + } else { + uint64_t hdr = (uint64_t)br13 << 32 | cmd | 6; +@@ -3871,6 +3900,7 @@ bool sna_blt_copy_boxes(struct sna *sna, uint8_t alu, + + _kgem_submit(kgem); + _kgem_set_mode(kgem, KGEM_BLT); ++ kgem_bcs_set_tiling(&sna->kgem, src_bo, dst_bo); + } while (1); + } + } else { +@@ -3932,6 +3962,7 @@ bool sna_blt_copy_boxes(struct sna *sna, uint8_t alu, + + _kgem_submit(kgem); + _kgem_set_mode(kgem, KGEM_BLT); ++ kgem_bcs_set_tiling(&sna->kgem, src_bo, dst_bo); + } while (1); + } else { + cmd |= 6; +@@ -3989,6 +4020,7 @@ bool sna_blt_copy_boxes(struct sna *sna, uint8_t alu, + + _kgem_submit(kgem); + _kgem_set_mode(kgem, KGEM_BLT); ++ kgem_bcs_set_tiling(&sna->kgem, src_bo, dst_bo); + } while (1); + } + } +@@ -4095,6 +4127,7 @@ bool sna_blt_copy_boxes__with_alpha(struct sna *sna, uint8_t alu, + !kgem_check_reloc(kgem, 2)) { + _kgem_submit(kgem); + _kgem_set_mode(kgem, KGEM_BLT); ++ kgem_bcs_set_tiling(&sna->kgem, src_bo, dst_bo); + } + + assert(sna->kgem.mode == KGEM_BLT); +@@ -4190,6 +4223,7 @@ bool sna_blt_copy_boxes_fallback(struct sna *sna, uint8_t alu, + DBG(("%s: dst == src\n", __FUNCTION__)); + + if (src_bo->tiling == I915_TILING_Y && ++ !sna->kgem.can_blt_y && + kgem_bo_blt_pitch_is_ok(&sna->kgem, src_bo)) { + struct kgem_bo *bo; + +@@ -4237,6 +4271,7 @@ bool sna_blt_copy_boxes_fallback(struct sna *sna, uint8_t alu, + } + } else { + if (src_bo->tiling == I915_TILING_Y && ++ !sna->kgem.can_blt_y && + kgem_bo_blt_pitch_is_ok(&sna->kgem, src_bo)) { + DBG(("%s: src is y-tiled\n", __FUNCTION__)); + if (src->type != DRAWABLE_PIXMAP) +@@ -4251,6 +4286,7 @@ bool sna_blt_copy_boxes_fallback(struct sna *sna, uint8_t alu, + } + + if (dst_bo->tiling == I915_TILING_Y && ++ !sna->kgem.can_blt_y && + kgem_bo_blt_pitch_is_ok(&sna->kgem, dst_bo)) { + DBG(("%s: dst is y-tiled\n", __FUNCTION__)); + if (dst->type != DRAWABLE_PIXMAP) +diff --git a/src/sna/sna_composite.c b/src/sna/sna_composite.c +index f01f020e..1da8c291 100644 +--- a/src/sna/sna_composite.c ++++ b/src/sna/sna_composite.c +@@ -452,6 +452,8 @@ static void apply_damage(struct sna_composite_op *op, RegionPtr region) + op->damage = NULL; + } else + sna_damage_add(op->damage, region); ++ ++ assert(!op->damage || !DAMAGE_IS_ALL(*op->damage)); + } + + static inline bool use_cpu(PixmapPtr pixmap, struct sna_pixmap *priv, +@@ -653,8 +655,9 @@ sna_composite(CARD8 op, + RegionRec region; + int dx, dy; + +- DBG(("%s(%d src=%ld+(%d, %d), mask=%ld+(%d, %d), dst=%ld+(%d, %d)+(%d, %d), size=(%d, %d)\n", +- __FUNCTION__, op, ++ DBG(("%s(pixmap=%ld, op=%d, src=%ld+(%d, %d), mask=%ld+(%d, %d), dst=%ld+(%d, %d)+(%d, %d), size=(%d, %d)\n", ++ __FUNCTION__, ++ pixmap->drawable.serialNumber, op, + get_picture_id(src), src_x, src_y, + get_picture_id(mask), mask_x, mask_y, + get_picture_id(dst), dst_x, dst_y, +@@ -673,13 +676,6 @@ sna_composite(CARD8 op, + src = sna->clear; + } + +- if (mask && sna_composite_mask_is_opaque(mask)) { +- DBG(("%s: removing opaque %smask\n", +- __FUNCTION__, +- mask->componentAlpha && PICT_FORMAT_RGB(mask->format) ? "CA " : "")); +- mask = NULL; +- } +- + if (!sna_compute_composite_region(®ion, + src, mask, dst, + src_x, src_y, +@@ -688,6 +684,13 @@ sna_composite(CARD8 op, + width, height)) + return; + ++ if (mask && sna_composite_mask_is_opaque(mask)) { ++ DBG(("%s: removing opaque %smask\n", ++ __FUNCTION__, ++ mask->componentAlpha && PICT_FORMAT_RGB(mask->format) ? "CA " : "")); ++ mask = NULL; ++ } ++ + if (NO_COMPOSITE) + goto fallback; + +@@ -756,6 +759,7 @@ sna_composite(CARD8 op, + DBG(("%s: fallback due unhandled composite op\n", __FUNCTION__)); + goto fallback; + } ++ assert(!tmp.damage || !DAMAGE_IS_ALL(*tmp.damage)); + + if (region.data == NULL) + tmp.box(sna, &tmp, ®ion.extents); +@@ -797,8 +801,10 @@ sna_composite_rectangles(CARD8 op, + int i, num_boxes; + unsigned hint; + +- DBG(("%s(op=%d, %08x x %d [(%d, %d)x(%d, %d) ...])\n", +- __FUNCTION__, op, ++ DBG(("%s(pixmap=%ld, op=%d, %08x x %d [(%d, %d)x(%d, %d) ...])\n", ++ __FUNCTION__, ++ get_drawable_pixmap(dst->pDrawable)->drawable.serialNumber, ++ op, + (color->alpha >> 8 << 24) | + (color->red >> 8 << 16) | + (color->green >> 8 << 8) | +@@ -814,38 +820,40 @@ sna_composite_rectangles(CARD8 op, + return; + } + +- if ((color->red|color->green|color->blue|color->alpha) <= 0x00ff) { +- switch (op) { +- case PictOpOver: +- case PictOpOutReverse: +- case PictOpAdd: +- return; +- case PictOpInReverse: +- case PictOpSrc: +- op = PictOpClear; +- break; +- case PictOpAtopReverse: +- op = PictOpOut; +- break; +- case PictOpXor: +- op = PictOpOverReverse; +- break; +- } +- } + if (color->alpha <= 0x00ff) { +- switch (op) { +- case PictOpOver: +- case PictOpOutReverse: +- return; +- case PictOpInReverse: +- op = PictOpClear; +- break; +- case PictOpAtopReverse: +- op = PictOpOut; +- break; +- case PictOpXor: +- op = PictOpOverReverse; +- break; ++ if (PICT_FORMAT_TYPE(dst->format) == PICT_TYPE_A || ++ (color->red|color->green|color->blue) <= 0x00ff) { ++ switch (op) { ++ case PictOpOver: ++ case PictOpOutReverse: ++ case PictOpAdd: ++ return; ++ case PictOpInReverse: ++ case PictOpSrc: ++ op = PictOpClear; ++ break; ++ case PictOpAtopReverse: ++ op = PictOpOut; ++ break; ++ case PictOpXor: ++ op = PictOpOverReverse; ++ break; ++ } ++ } else { ++ switch (op) { ++ case PictOpOver: ++ case PictOpOutReverse: ++ return; ++ case PictOpInReverse: ++ op = PictOpClear; ++ break; ++ case PictOpAtopReverse: ++ op = PictOpOut; ++ break; ++ case PictOpXor: ++ op = PictOpOverReverse; ++ break; ++ } + } + } else if (color->alpha >= 0xff00) { + switch (op) { +@@ -863,11 +871,16 @@ sna_composite_rectangles(CARD8 op, + case PictOpXor: + op = PictOpOut; + break; ++ case PictOpAdd: ++ if (PICT_FORMAT_TYPE(dst->format) == PICT_TYPE_A || ++ (color->red&color->green&color->blue) >= 0xff00) ++ op = PictOpSrc; ++ break; + } + } + + /* Avoid reducing overlapping translucent rectangles */ +- if (op == PictOpOver && ++ if ((op == PictOpOver || op == PictOpAdd) && + num_rects == 1 && + sna_drawable_is_clear(dst->pDrawable)) + op = PictOpSrc; +@@ -979,6 +992,9 @@ sna_composite_rectangles(CARD8 op, + bool ok; + + if (op == PictOpClear) { ++ if (priv->clear_color == 0) ++ goto done; ++ + ok = sna_get_pixel_from_rgba(&pixel, + 0, 0, 0, 0, + dst->format); +@@ -990,8 +1006,11 @@ sna_composite_rectangles(CARD8 op, + color->alpha, + dst->format); + } +- if (ok && priv->clear_color == pixel) ++ if (ok && priv->clear_color == pixel) { ++ DBG(("%s: matches current clear, skipping\n", ++ __FUNCTION__)); + goto done; ++ } + } + + if (region.data == NULL) { +diff --git a/src/sna/sna_damage.h b/src/sna/sna_damage.h +index 272e83bc..d5c727ee 100644 +--- a/src/sna/sna_damage.h ++++ b/src/sna/sna_damage.h +@@ -267,7 +267,7 @@ int _sna_damage_get_boxes(struct sna_damage *damage, const BoxRec **boxes); + static inline int + sna_damage_get_boxes(struct sna_damage *damage, const BoxRec **boxes) + { +- assert(damage); ++ assert(DAMAGE_PTR(damage)); + + if (DAMAGE_IS_ALL(damage)) { + *boxes = &DAMAGE_PTR(damage)->extents; +@@ -322,7 +322,8 @@ static inline void sna_damage_destroy(struct sna_damage **damage) + if (*damage == NULL) + return; + +- __sna_damage_destroy(DAMAGE_PTR(*damage)); ++ if (DAMAGE_PTR(*damage)) ++ __sna_damage_destroy(DAMAGE_PTR(*damage)); + *damage = NULL; + } + +diff --git a/src/sna/sna_display.c b/src/sna/sna_display.c +index 4b218b70..9b77550e 100644 +--- a/src/sna/sna_display.c ++++ b/src/sna/sna_display.c +@@ -39,6 +39,25 @@ + #include <errno.h> + #include <poll.h> + #include <ctype.h> ++#include <dirent.h> ++ ++#if HAVE_ALLOCA_H ++#include <alloca.h> ++#elif defined __GNUC__ ++#define alloca __builtin_alloca ++#elif defined _AIX ++#define alloca __alloca ++#elif defined _MSC_VER ++#include <malloc.h> ++#define alloca _alloca ++#else ++void *alloca(size_t); ++#endif ++ ++#define _PARSE_EDID_ ++/* Jump through a few hoops in order to fixup EDIDs */ ++#undef VERSION ++#undef REVISION + + #include "sna.h" + #include "sna_reg.h" +@@ -72,6 +91,10 @@ + #include <memcheck.h> + #endif + ++#define FAIL_CURSOR_IOCTL 0 ++ ++#define COLDPLUG_DELAY_MS 2000 ++ + /* Minor discrepancy between 32-bit/64-bit ABI in old kernels */ + union compat_mode_get_connector{ + struct drm_mode_get_connector conn; +@@ -88,6 +111,8 @@ union compat_mode_get_connector{ + #define DEFAULT_DPI 96 + #endif + ++#define OUTPUT_STATUS_CACHE_MS 15000 ++ + #define DRM_MODE_PAGE_FLIP_ASYNC 0x02 + + #define DRM_CLIENT_CAP_UNIVERSAL_PLANES 2 +@@ -106,33 +131,87 @@ struct local_mode_obj_get_properties { + }; + #define LOCAL_MODE_OBJECT_PLANE 0xeeeeeeee + +-#if 0 ++struct local_mode_set_plane { ++ uint32_t plane_id; ++ uint32_t crtc_id; ++ uint32_t fb_id; /* fb object contains surface format type */ ++ uint32_t flags; ++ ++ /* Signed dest location allows it to be partially off screen */ ++ int32_t crtc_x, crtc_y; ++ uint32_t crtc_w, crtc_h; ++ ++ /* Source values are 16.16 fixed point */ ++ uint32_t src_x, src_y; ++ uint32_t src_h, src_w; ++}; ++#define LOCAL_IOCTL_MODE_SETPLANE DRM_IOWR(0xB7, struct local_mode_set_plane) ++ ++struct local_mode_get_plane { ++ uint32_t plane_id; ++ ++ uint32_t crtc_id; ++ uint32_t fb_id; ++ ++ uint32_t possible_crtcs; ++ uint32_t gamma_size; ++ ++ uint32_t count_format_types; ++ uint64_t format_type_ptr; ++}; ++#define LOCAL_IOCTL_MODE_GETPLANE DRM_IOWR(0xb6, struct local_mode_get_plane) ++ ++struct local_mode_get_plane_res { ++ uint64_t plane_id_ptr; ++ uint64_t count_planes; ++}; ++#define LOCAL_IOCTL_MODE_GETPLANERESOURCES DRM_IOWR(0xb5, struct local_mode_get_plane_res) ++ ++#if 1 + #define __DBG DBG + #else + #define __DBG(x) + #endif + ++#define DBG_NATIVE_ROTATION ~0 /* minimum RR_Rotate_0 */ ++ + extern XF86ConfigPtr xf86configptr; + ++struct sna_cursor { ++ struct sna_cursor *next; ++ uint32_t *image; ++ bool transformed; ++ Rotation rotation; ++ int ref; ++ int size; ++ int last_width; ++ int last_height; ++ unsigned handle; ++ unsigned serial; ++ unsigned alloc; ++}; ++ + struct sna_crtc { ++ unsigned long flags; ++ uint32_t id; + xf86CrtcPtr base; + struct drm_mode_modeinfo kmode; +- int dpms_mode; + PixmapPtr slave_pixmap; + DamagePtr slave_damage; +- struct kgem_bo *bo, *shadow_bo, *client_bo; ++ struct kgem_bo *bo, *shadow_bo, *client_bo, *cache_bo; + struct sna_cursor *cursor; + unsigned int last_cursor_size; + uint32_t offset; + bool shadow; + bool fallback_shadow; + bool transform; ++ bool cursor_transform; ++ bool hwcursor; + bool flip_pending; +- uint8_t id; +- uint8_t pipe; + +- RegionRec client_damage; /* XXX overlap with shadow damage? */ ++ struct pict_f_transform cursor_to_fb, fb_to_cursor; + ++ RegionRec crtc_damage; + uint16_t shadow_bo_width, shadow_bo_height; + + uint32_t rotation; +@@ -143,7 +222,9 @@ struct sna_crtc { + uint32_t supported; + uint32_t current; + } rotation; +- } primary, sprite; ++ struct list link; ++ } primary; ++ struct list sprites; + + uint32_t mode_serial, flip_serial; + +@@ -173,21 +254,33 @@ struct sna_output { + + unsigned int is_panel : 1; + unsigned int add_default_modes : 1; ++ int connector_type; ++ int connector_type_id; ++ ++ uint32_t link_status_idx; + + uint32_t edid_idx; + uint32_t edid_blob_id; + uint32_t edid_len; + void *edid_raw; ++ xf86MonPtr fake_edid_mon; ++ void *fake_edid_raw; + + bool has_panel_limits; + int panel_hdisplay; + int panel_vdisplay; + + uint32_t dpms_id; +- int dpms_mode; ++ uint8_t dpms_mode; + struct backlight backlight; + int backlight_active_level; + ++ uint32_t last_detect; ++ uint32_t status; ++ unsigned int hotplug_count; ++ bool update_properties; ++ bool reprobe; ++ + int num_modes; + struct drm_mode_modeinfo *modes; + +@@ -218,13 +311,91 @@ enum { /* XXX copied from hw/xfree86/modes/xf86Crtc.c */ + OPTION_DEFAULT_MODES, + }; + ++static void __sna_output_dpms(xf86OutputPtr output, int dpms, int fixup); + static void sna_crtc_disable_cursor(struct sna *sna, struct sna_crtc *crtc); ++static bool sna_crtc_flip(struct sna *sna, struct sna_crtc *crtc, ++ struct kgem_bo *bo, int x, int y); + + static bool is_zaphod(ScrnInfoPtr scrn) + { + return xf86IsEntityShared(scrn->entityList[0]); + } + ++static bool ++sna_zaphod_match(struct sna *sna, const char *output) ++{ ++ const char *s, *colon; ++ char t[20]; ++ unsigned int i = 0; ++ ++ s = xf86GetOptValString(sna->Options, OPTION_ZAPHOD); ++ if (s == NULL) ++ return false; ++ ++ colon = strchr(s, ':'); ++ if (colon) /* Skip over the ZaphodPipes */ ++ s = colon + 1; ++ ++ do { ++ /* match any outputs in a comma list, stopping at whitespace */ ++ switch (*s) { ++ case '\0': ++ t[i] = '\0'; ++ return strcmp(t, output) == 0; ++ ++ case ',': ++ t[i] ='\0'; ++ if (strcmp(t, output) == 0) ++ return TRUE; ++ i = 0; ++ break; ++ ++ case ' ': ++ case '\t': ++ case '\n': ++ case '\r': ++ break; ++ ++ default: ++ t[i++] = *s; ++ break; ++ } ++ ++ s++; ++ } while (i < sizeof(t)); ++ ++ return false; ++} ++ ++static unsigned ++get_zaphod_crtcs(struct sna *sna) ++{ ++ const char *str, *colon; ++ unsigned crtcs = 0; ++ ++ str = xf86GetOptValString(sna->Options, OPTION_ZAPHOD); ++ if (str == NULL || (colon = strchr(str, ':')) == NULL) { ++ DBG(("%s: no zaphod pipes, using screen number: %x\n", ++ __FUNCTION__, ++ sna->scrn->confScreen->device->screen)); ++ return 1 << sna->scrn->confScreen->device->screen; ++ } ++ ++ DBG(("%s: ZaphodHeads='%s'\n", __FUNCTION__, str)); ++ while (str < colon) { ++ char *end; ++ unsigned crtc = strtoul(str, &end, 0); ++ if (end == str) ++ break; ++ DBG(("%s: adding CRTC %d to zaphod pipes\n", ++ __FUNCTION__, crtc)); ++ crtcs |= 1 << crtc; ++ str = end + 1; ++ } ++ DBG(("%s: ZaphodPipes=%x\n", __FUNCTION__, crtcs)); ++ return crtcs; ++} ++ + inline static unsigned count_to_mask(int x) + { + return (1 << x) - 1; +@@ -247,6 +418,21 @@ static inline struct sna_crtc *to_sna_crtc(xf86CrtcPtr crtc) + return crtc->driver_private; + } + ++static inline unsigned __sna_crtc_pipe(struct sna_crtc *crtc) ++{ ++ return crtc->flags >> 8 & 0xff; ++} ++ ++static inline unsigned __sna_crtc_id(struct sna_crtc *crtc) ++{ ++ return crtc->id; ++} ++ ++uint32_t sna_crtc_id(xf86CrtcPtr crtc) ++{ ++ return __sna_crtc_id(to_sna_crtc(crtc)); ++} ++ + static inline bool event_pending(int fd) + { + struct pollfd pfd; +@@ -268,29 +454,37 @@ static inline uint32_t fb_id(struct kgem_bo *bo) + return bo->delta; + } + +-uint32_t sna_crtc_id(xf86CrtcPtr crtc) ++unsigned sna_crtc_count_sprites(xf86CrtcPtr crtc) + { +- if (to_sna_crtc(crtc) == NULL) +- return 0; +- return to_sna_crtc(crtc)->id; +-} ++ struct plane *sprite; ++ unsigned count; + +-int sna_crtc_to_pipe(xf86CrtcPtr crtc) +-{ +- assert(to_sna_crtc(crtc)); +- return to_sna_crtc(crtc)->pipe; ++ count = 0; ++ list_for_each_entry(sprite, &to_sna_crtc(crtc)->sprites, link) ++ count++; ++ ++ return count; + } + +-uint32_t sna_crtc_to_sprite(xf86CrtcPtr crtc) ++static struct plane *lookup_sprite(struct sna_crtc *crtc, unsigned idx) + { +- assert(to_sna_crtc(crtc)); +- return to_sna_crtc(crtc)->sprite.id; ++ struct plane *sprite; ++ ++ list_for_each_entry(sprite, &crtc->sprites, link) ++ if (idx-- == 0) ++ return sprite; ++ ++ return NULL; + } + +-bool sna_crtc_is_on(xf86CrtcPtr crtc) ++uint32_t sna_crtc_to_sprite(xf86CrtcPtr crtc, unsigned idx) + { ++ struct plane *sprite; ++ + assert(to_sna_crtc(crtc)); +- return to_sna_crtc(crtc)->bo != NULL; ++ ++ sprite = lookup_sprite(to_sna_crtc(crtc), idx); ++ return sprite ? sprite->id : 0; + } + + bool sna_crtc_is_transformed(xf86CrtcPtr crtc) +@@ -299,34 +493,48 @@ bool sna_crtc_is_transformed(xf86CrtcPtr crtc) + return to_sna_crtc(crtc)->transform; + } + +-static inline uint64_t msc64(struct sna_crtc *sna_crtc, uint32_t seq) ++static inline bool msc64(struct sna_crtc *sna_crtc, uint32_t seq, uint64_t *msc) + { ++ bool record = true; + if (seq < sna_crtc->last_seq) { + if (sna_crtc->last_seq - seq > 0x40000000) { + sna_crtc->wrap_seq++; + DBG(("%s: pipe=%d wrapped; was %u, now %u, wraps=%u\n", +- __FUNCTION__, sna_crtc->pipe, ++ __FUNCTION__, __sna_crtc_pipe(sna_crtc), + sna_crtc->last_seq, seq, sna_crtc->wrap_seq)); +- } else { +- ERR(("%s: pipe=%d msc went backwards; was %u, now %u\n", +- __FUNCTION__, sna_crtc->pipe, sna_crtc->last_seq, seq)); +- seq = sna_crtc->last_seq; ++ } else { ++ DBG(("%s: pipe=%d msc went backwards; was %u, now %u; ignoring for last_swap\n", ++ __FUNCTION__, __sna_crtc_pipe(sna_crtc), sna_crtc->last_seq, seq)); ++ ++ record = false; + } + } +- sna_crtc->last_seq = seq; +- return (uint64_t)sna_crtc->wrap_seq << 32 | seq; ++ *msc = (uint64_t)sna_crtc->wrap_seq << 32 | seq; ++ return record; + } + + uint64_t sna_crtc_record_swap(xf86CrtcPtr crtc, + int tv_sec, int tv_usec, unsigned seq) + { + struct sna_crtc *sna_crtc = to_sna_crtc(crtc); ++ uint64_t msc; ++ + assert(sna_crtc); +- DBG(("%s: recording last swap on pipe=%d, frame %d, time %d.%06d\n", +- __FUNCTION__, sna_crtc->pipe, seq, tv_sec, tv_usec)); +- sna_crtc->swap.tv_sec = tv_sec; +- sna_crtc->swap.tv_usec = tv_usec; +- return sna_crtc->swap.msc = msc64(sna_crtc, seq); ++ ++ if (msc64(sna_crtc, seq, &msc)) { ++ DBG(("%s: recording last swap on pipe=%d, frame %d [msc=%08lld], time %d.%06d\n", ++ __FUNCTION__, __sna_crtc_pipe(sna_crtc), seq, (long long)msc, ++ tv_sec, tv_usec)); ++ sna_crtc->swap.tv_sec = tv_sec; ++ sna_crtc->swap.tv_usec = tv_usec; ++ sna_crtc->swap.msc = msc; ++ } else { ++ DBG(("%s: swap event on pipe=%d, frame %d [msc=%08lld], time %d.%06d\n", ++ __FUNCTION__, __sna_crtc_pipe(sna_crtc), seq, (long long)msc, ++ tv_sec, tv_usec)); ++ } ++ ++ return msc; + } + + const struct ust_msc *sna_crtc_last_swap(xf86CrtcPtr crtc) +@@ -342,15 +550,6 @@ const struct ust_msc *sna_crtc_last_swap(xf86CrtcPtr crtc) + } + } + +-xf86CrtcPtr sna_mode_first_crtc(struct sna *sna) +-{ +- xf86CrtcConfigPtr config = XF86_CRTC_CONFIG_PTR(sna->scrn); +- if (sna->mode.num_real_crtc) +- return config->crtc[0]; +- else +- return NULL; +-} +- + #ifndef NDEBUG + static void gem_close(int fd, uint32_t handle); + static void assert_scanout(struct kgem *kgem, struct kgem_bo *bo, +@@ -372,12 +571,24 @@ static void assert_scanout(struct kgem *kgem, struct kgem_bo *bo, + #define assert_scanout(k, b, w, h) + #endif + ++static void assert_crtc_fb(struct sna *sna, struct sna_crtc *crtc) ++{ ++#ifndef NDEBUG ++ struct drm_mode_crtc mode = { .crtc_id = __sna_crtc_id(crtc) }; ++ drmIoctl(sna->kgem.fd, DRM_IOCTL_MODE_GETCRTC, &mode); ++ assert(mode.fb_id == fb_id(crtc->bo)); ++#endif ++} ++ + static unsigned get_fb(struct sna *sna, struct kgem_bo *bo, + int width, int height) + { + ScrnInfoPtr scrn = sna->scrn; + struct drm_mode_fb_cmd arg; + ++ if (!kgem_bo_is_fenced(&sna->kgem, bo)) ++ return 0; ++ + assert(bo->refcnt); + assert(bo->proxy == NULL); + assert(!bo->snoop); +@@ -393,8 +604,9 @@ static unsigned get_fb(struct sna *sna, struct kgem_bo *bo, + DBG(("%s: create fb %dx%d@%d/%d\n", + __FUNCTION__, width, height, scrn->depth, scrn->bitsPerPixel)); + +- assert(bo->tiling != I915_TILING_Y); ++ assert(bo->tiling != I915_TILING_Y || sna->kgem.can_scanout_y); + assert((bo->pitch & 63) == 0); ++ assert(scrn->vtSema); /* must be master */ + + VG_CLEAR(arg); + arg.width = width; +@@ -404,21 +616,83 @@ static unsigned get_fb(struct sna *sna, struct kgem_bo *bo, + arg.depth = scrn->depth; + arg.handle = bo->handle; + +- assert(sna->scrn->vtSema); /* must be master */ + if (drmIoctl(sna->kgem.fd, DRM_IOCTL_MODE_ADDFB, &arg)) { +- xf86DrvMsg(scrn->scrnIndex, X_ERROR, +- "%s: failed to add fb: %dx%d depth=%d, bpp=%d, pitch=%d: %d\n", +- __FUNCTION__, width, height, +- scrn->depth, scrn->bitsPerPixel, bo->pitch, errno); +- return 0; ++ /* Try again with the fancy version */ ++ struct local_mode_fb_cmd2 { ++ uint32_t fb_id; ++ uint32_t width, height; ++ uint32_t pixel_format; ++ uint32_t flags; ++ ++ uint32_t handles[4]; ++ uint32_t pitches[4]; /* pitch for each plane */ ++ uint32_t offsets[4]; /* offset of each plane */ ++ uint64_t modifiers[4]; ++ } f; ++#define LOCAL_IOCTL_MODE_ADDFB2 DRM_IOWR(0xb8, struct local_mode_fb_cmd2) ++ memset(&f, 0, sizeof(f)); ++ f.width = width; ++ f.height = height; ++ /* XXX interlaced */ ++ f.flags = 1 << 1; /* +modifiers */ ++ f.handles[0] = bo->handle; ++ f.pitches[0] = bo->pitch; ++ ++ switch (bo->tiling) { ++ case I915_TILING_NONE: ++ break; ++ case I915_TILING_X: ++ /* I915_FORMAT_MOD_X_TILED */ ++ f.modifiers[0] = (uint64_t)1 << 56 | 1; ++ break; ++ case I915_TILING_Y: ++ /* I915_FORMAT_MOD_X_TILED */ ++ f.modifiers[0] = (uint64_t)1 << 56 | 2; ++ break; ++ } ++ ++#define fourcc(a,b,c,d) ((a) | (b) << 8 | (c) << 16 | (d) << 24) ++ switch (scrn->depth) { ++ default: ++ ERR(("%s: unhandled screen format, depth=%d\n", ++ __FUNCTION__, scrn->depth)); ++ goto fail; ++ case 8: ++ f.pixel_format = fourcc('C', '8', ' ', ' '); ++ break; ++ case 15: ++ f.pixel_format = fourcc('X', 'R', '1', '5'); ++ break; ++ case 16: ++ f.pixel_format = fourcc('R', 'G', '1', '6'); ++ break; ++ case 24: ++ f.pixel_format = fourcc('X', 'R', '2', '4'); ++ break; ++ case 30: ++ f.pixel_format = fourcc('X', 'R', '3', '0'); ++ break; ++ } ++#undef fourcc ++ ++ if (drmIoctl(sna->kgem.fd, LOCAL_IOCTL_MODE_ADDFB2, &f)) { ++fail: ++ xf86DrvMsg(scrn->scrnIndex, X_ERROR, ++ "%s: failed to add fb: %dx%d depth=%d, bpp=%d, pitch=%d: %d\n", ++ __FUNCTION__, width, height, ++ scrn->depth, scrn->bitsPerPixel, bo->pitch, errno); ++ return 0; ++ } ++ ++ arg.fb_id = f.fb_id; + } + assert(arg.fb_id != 0); +- ++ bo->delta = arg.fb_id; + DBG(("%s: attached fb=%d to handle=%d\n", +- __FUNCTION__, arg.fb_id, arg.handle)); ++ __FUNCTION__, bo->delta, arg.handle)); + + bo->scanout = true; +- return bo->delta = arg.fb_id; ++ return bo->delta; + } + + static uint32_t gem_create(int fd, int size) +@@ -438,6 +712,7 @@ static uint32_t gem_create(int fd, int size) + static void *gem_mmap(int fd, int handle, int size) + { + struct drm_i915_gem_mmap_gtt mmap_arg; ++ struct drm_i915_gem_set_domain set_domain; + void *ptr; + + VG_CLEAR(mmap_arg); +@@ -449,6 +724,15 @@ static void *gem_mmap(int fd, int handle, int size) + if (ptr == MAP_FAILED) + return NULL; + ++ VG_CLEAR(set_domain); ++ set_domain.handle = handle; ++ set_domain.read_domains = I915_GEM_DOMAIN_GTT; ++ set_domain.write_domain = I915_GEM_DOMAIN_GTT; ++ if (drmIoctl(fd, DRM_IOCTL_I915_GEM_SET_DOMAIN, &set_domain)) { ++ munmap(ptr, size); ++ return NULL; ++ } ++ + return ptr; + } + +@@ -497,8 +781,6 @@ sna_backlight_uevent(int fd, void *closure) + if (sna_output->dpms_mode != DPMSModeOn) + continue; + +- assert(output->randr_output); +- + val = backlight_get(&sna_output->backlight); + if (val < 0) + continue; +@@ -523,6 +805,7 @@ sna_backlight_uevent(int fd, void *closure) + TRUE, FALSE); + } + } ++ DBG(("%s: complete\n", __FUNCTION__)); + } + + static void sna_backlight_pre_init(struct sna *sna) +@@ -570,6 +853,7 @@ static void sna_backlight_drain_uevents(struct sna *sna) + if (sna->mode.backlight_monitor == NULL) + return; + ++ DBG(("%s()\n", __FUNCTION__)); + sna_backlight_uevent(udev_monitor_get_fd(sna->mode.backlight_monitor), + sna); + } +@@ -632,9 +916,22 @@ sna_output_backlight_set(struct sna_output *sna_output, int level) + return ret; + } + ++static bool ++has_native_backlight(struct sna_output *sna_output) ++{ ++ return sna_output->backlight.type == BL_RAW; ++} ++ + static void + sna_output_backlight_off(struct sna_output *sna_output) + { ++ /* Trust the kernel to turn the native backlight off. However, we ++ * do explicitly turn the backlight back on (when we wake the output) ++ * just in case a third party turns it off! ++ */ ++ if (has_native_backlight(sna_output)) ++ return; ++ + DBG(("%s(%s)\n", __FUNCTION__, sna_output->base->name)); + backlight_off(&sna_output->backlight); + sna_output_backlight_set(sna_output, 0); +@@ -674,7 +971,7 @@ has_user_backlight_override(xf86OutputPtr output) + if (*str == '\0') + return (char *)str; + +- if (backlight_exists(str) == BL_NONE) { ++ if (!backlight_exists(str)) { + xf86DrvMsg(output->scrn->scrnIndex, X_ERROR, + "Unrecognised backlight control interface '%s'\n", + str); +@@ -684,6 +981,93 @@ has_user_backlight_override(xf86OutputPtr output) + return strdup(str); + } + ++static int get_device_minor(int fd) ++{ ++ struct stat st; ++ ++ if (fstat(fd, &st) || !S_ISCHR(st.st_mode)) ++ return -1; ++ ++ return st.st_rdev & 0x63; ++} ++ ++static const char * const sysfs_connector_types[] = { ++ /* DRM_MODE_CONNECTOR_Unknown */ "Unknown", ++ /* DRM_MODE_CONNECTOR_VGA */ "VGA", ++ /* DRM_MODE_CONNECTOR_DVII */ "DVI-I", ++ /* DRM_MODE_CONNECTOR_DVID */ "DVI-D", ++ /* DRM_MODE_CONNECTOR_DVIA */ "DVI-A", ++ /* DRM_MODE_CONNECTOR_Composite */ "Composite", ++ /* DRM_MODE_CONNECTOR_SVIDEO */ "SVIDEO", ++ /* DRM_MODE_CONNECTOR_LVDS */ "LVDS", ++ /* DRM_MODE_CONNECTOR_Component */ "Component", ++ /* DRM_MODE_CONNECTOR_9PinDIN */ "DIN", ++ /* DRM_MODE_CONNECTOR_DisplayPort */ "DP", ++ /* DRM_MODE_CONNECTOR_HDMIA */ "HDMI-A", ++ /* DRM_MODE_CONNECTOR_HDMIB */ "HDMI-B", ++ /* DRM_MODE_CONNECTOR_TV */ "TV", ++ /* DRM_MODE_CONNECTOR_eDP */ "eDP", ++ /* DRM_MODE_CONNECTOR_VIRTUAL */ "Virtual", ++ /* DRM_MODE_CONNECTOR_DSI */ "DSI", ++ /* DRM_MODE_CONNECTOR_DPI */ "DPI" ++}; ++ ++static char *has_connector_backlight(xf86OutputPtr output) ++{ ++ struct sna_output *sna_output = output->driver_private; ++ struct sna *sna = to_sna(output->scrn); ++ char path[1024]; ++ DIR *dir; ++ struct dirent *de; ++ int minor, len; ++ char *str = NULL; ++ ++ if (sna_output->connector_type >= ARRAY_SIZE(sysfs_connector_types)) ++ return NULL; ++ ++ minor = get_device_minor(sna->kgem.fd); ++ if (minor < 0) ++ return NULL; ++ ++ len = snprintf(path, sizeof(path), ++ "/sys/class/drm/card%d-%s-%d", ++ minor, ++ sysfs_connector_types[sna_output->connector_type], ++ sna_output->connector_type_id); ++ DBG(("%s: lookup %s\n", __FUNCTION__, path)); ++ ++ dir = opendir(path); ++ if (dir == NULL) ++ return NULL; ++ ++ while ((de = readdir(dir))) { ++ struct stat st; ++ ++ if (*de->d_name == '.') ++ continue; ++ ++ snprintf(path + len, sizeof(path) - len, ++ "/%s", de->d_name); ++ ++ if (stat(path, &st)) ++ continue; ++ ++ if (!S_ISDIR(st.st_mode)) ++ continue; ++ ++ DBG(("%s: testing %s as backlight\n", ++ __FUNCTION__, de->d_name)); ++ ++ if (backlight_exists(de->d_name)) { ++ str = strdup(de->d_name); /* leak! */ ++ break; ++ } ++ } ++ ++ closedir(dir); ++ return str; ++} ++ + static void + sna_output_backlight_init(xf86OutputPtr output) + { +@@ -696,11 +1080,20 @@ sna_output_backlight_init(xf86OutputPtr output) + return; + #endif + +- from = X_CONFIG; +- best_iface = has_user_backlight_override(output); ++ if (sna_output->is_panel) { ++ from = X_CONFIG; ++ best_iface = has_user_backlight_override(output); ++ if (best_iface) ++ goto done; ++ } ++ ++ best_iface = has_connector_backlight(output); + if (best_iface) + goto done; + ++ if (!sna_output->is_panel) ++ return; ++ + /* XXX detect right backlight for multi-GPU/panels */ + from = X_PROBED; + pci = xf86GetPciInfoForEntity(to_sna(output->scrn)->pEnt->index); +@@ -728,6 +1121,38 @@ done: + sna_output->backlight.iface, best_iface, output->name); + } + ++#if ABI_VIDEODRV_VERSION >= SET_ABI_VERSION(22, 0) ++static inline int sigio_block(void) ++{ ++ return 0; ++} ++static inline void sigio_unblock(int was_blocked) ++{ ++ (void)was_blocked; ++} ++#elif XORG_VERSION_CURRENT >= XORG_VERSION_NUMERIC(1,12,99,901,0) ++static inline int sigio_block(void) ++{ ++ OsBlockSIGIO(); ++ return 0; ++} ++static inline void sigio_unblock(int was_blocked) ++{ ++ OsReleaseSIGIO(); ++ (void)was_blocked; ++} ++#else ++#include <xf86_OSproc.h> ++static inline int sigio_block(void) ++{ ++ return xf86BlockSIGIO(); ++} ++static inline void sigio_unblock(int was_blocked) ++{ ++ xf86UnblockSIGIO(was_blocked); ++} ++#endif ++ + static char *canonical_kmode_name(const struct drm_mode_modeinfo *kmode) + { + char tmp[32], *buf; +@@ -781,6 +1206,7 @@ mode_from_kmode(ScrnInfoPtr scrn, + mode->VTotal = kmode->vtotal; + mode->VScan = kmode->vscan; + ++ mode->VRefresh = kmode->vrefresh; + mode->Flags = kmode->flags; + mode->name = get_kmode_name(kmode); + +@@ -814,6 +1240,7 @@ mode_to_kmode(struct drm_mode_modeinfo *kmode, DisplayModePtr mode) + kmode->vtotal = mode->VTotal; + kmode->vscan = mode->VScan; + ++ kmode->vrefresh = mode->VRefresh; + kmode->flags = mode->Flags; + if (mode->name) + strncpy(kmode->name, mode->name, DRM_DISPLAY_MODE_LEN); +@@ -824,11 +1251,12 @@ static void + sna_crtc_force_outputs_on(xf86CrtcPtr crtc) + { + xf86CrtcConfigPtr config = XF86_CRTC_CONFIG_PTR(crtc->scrn); ++ /* All attached outputs are valid, so update our timestamps */ ++ unsigned now = GetTimeInMillis(); + int i; + + assert(to_sna_crtc(crtc)); +- DBG(("%s(pipe=%d), currently? %d\n", __FUNCTION__, +- to_sna_crtc(crtc)->pipe, to_sna_crtc(crtc)->dpms_mode)); ++ DBG(("%s(pipe=%d)\n", __FUNCTION__, sna_crtc_pipe(crtc))); + + /* DPMS handling by the kernel is inconsistent, so after setting a + * mode on an output presume that we intend for it to be on, or that +@@ -843,10 +1271,11 @@ sna_crtc_force_outputs_on(xf86CrtcPtr crtc) + if (output->crtc != crtc) + continue; + +- output->funcs->dpms(output, DPMSModeOn); ++ __sna_output_dpms(output, DPMSModeOn, false); ++ if (to_sna_output(output)->last_detect) ++ to_sna_output(output)->last_detect = now; + } + +- to_sna_crtc(crtc)->dpms_mode = DPMSModeOn; + #if XF86_CRTC_VERSION >= 3 + crtc->active = TRUE; + #endif +@@ -859,8 +1288,7 @@ sna_crtc_force_outputs_off(xf86CrtcPtr crtc) + int i; + + assert(to_sna_crtc(crtc)); +- DBG(("%s(pipe=%d), currently? %d\n", __FUNCTION__, +- to_sna_crtc(crtc)->pipe, to_sna_crtc(crtc)->dpms_mode)); ++ DBG(("%s(pipe=%d)\n", __FUNCTION__, sna_crtc_pipe(crtc))); + + /* DPMS handling by the kernel is inconsistent, so after setting a + * mode on an output presume that we intend for it to be on, or that +@@ -875,35 +1303,47 @@ sna_crtc_force_outputs_off(xf86CrtcPtr crtc) + if (output->crtc != crtc) + continue; + +- output->funcs->dpms(output, DPMSModeOff); ++ __sna_output_dpms(output, DPMSModeOff, false); + } +- +- to_sna_crtc(crtc)->dpms_mode = DPMSModeOff; + } + + static unsigned +-rotation_reduce(struct plane *p, unsigned rotation) ++rotation_reflect(unsigned rotation) + { +- unsigned unsupported_rotations = rotation & ~p->rotation.supported; ++ unsigned other_bits; + +- if (unsupported_rotations == 0) +- return rotation; ++ /* paranoia for future extensions */ ++ other_bits = rotation & ~RR_Rotate_All; + +-#define RR_Reflect_XY (RR_Reflect_X | RR_Reflect_Y) ++ /* flip the reflection to compensate for reflecting the rotation */ ++ other_bits ^= RR_Reflect_X | RR_Reflect_Y; + +- if ((unsupported_rotations & RR_Reflect_XY) == RR_Reflect_XY && +- p->rotation.supported& RR_Rotate_180) { +- rotation &= ~RR_Reflect_XY; +- rotation ^= RR_Rotate_180; +- } ++ /* Reflect the screen by rotating the rotation bit, ++ * which has to have at least RR_Rotate_0 set. This allows ++ * us to reflect any of the rotation bits, not just 0. ++ */ ++ rotation &= RR_Rotate_All; ++ assert(rotation); ++ rotation <<= 2; /* RR_Rotate_0 -> RR_Rotate_180 etc */ ++ rotation |= rotation >> 4; /* RR_Rotate_270' to RR_Rotate_90 */ ++ rotation &= RR_Rotate_All; ++ assert(rotation); + +- if ((unsupported_rotations & RR_Rotate_180) && +- (p->rotation.supported& RR_Reflect_XY) == RR_Reflect_XY) { +- rotation ^= RR_Reflect_XY; +- rotation &= ~RR_Rotate_180; ++ return rotation | other_bits; ++} ++ ++static unsigned ++rotation_reduce(struct plane *p, unsigned rotation) ++{ ++ /* If unsupported try exchanging rotation for a reflection */ ++ if (rotation & ~p->rotation.supported) { ++ unsigned new_rotation = rotation_reflect(rotation); ++ if ((new_rotation & p->rotation.supported) == new_rotation) ++ rotation = new_rotation; + } + +-#undef RR_Reflect_XY ++ /* Only one rotation bit should be set */ ++ assert(is_power_of_two(rotation & RR_Rotate_All)); + + return rotation; + } +@@ -923,7 +1363,7 @@ rotation_set(struct sna *sna, struct plane *p, uint32_t desired) + if (desired == p->rotation.current) + return true; + +- if ((desired & p->rotation.supported) == 0) { ++ if ((desired & p->rotation.supported) != desired) { + errno = EINVAL; + return false; + } +@@ -956,20 +1396,105 @@ rotation_reset(struct plane *p) + p->rotation.current = 0; + } + +-bool sna_crtc_set_sprite_rotation(xf86CrtcPtr crtc, uint32_t rotation) ++bool sna_crtc_set_sprite_rotation(xf86CrtcPtr crtc, ++ unsigned idx, ++ uint32_t rotation) + { ++ struct plane *sprite; + assert(to_sna_crtc(crtc)); ++ ++ sprite = lookup_sprite(to_sna_crtc(crtc), idx); ++ if (!sprite) ++ return false; ++ + DBG(("%s: CRTC:%d [pipe=%d], sprite=%u set-rotation=%x\n", + __FUNCTION__, +- to_sna_crtc(crtc)->id, to_sna_crtc(crtc)->pipe, to_sna_crtc(crtc)->sprite.id, +- rotation)); ++ sna_crtc_id(crtc), sna_crtc_pipe(crtc), ++ sprite->id, rotation)); + +- return rotation_set(to_sna(crtc->scrn), +- &to_sna_crtc(crtc)->sprite, +- rotation_reduce(&to_sna_crtc(crtc)->sprite, rotation)); ++ return rotation_set(to_sna(crtc->scrn), sprite, ++ rotation_reduce(sprite, rotation)); + } + +-static bool ++#if HAS_DEBUG_FULL ++#if !HAS_DEBUG_FULL ++#define LogF ErrorF ++#endif ++struct kmsg { ++ int fd; ++ int saved_loglevel; ++}; ++ ++static int kmsg_get_debug(void) ++{ ++ FILE *file; ++ int v = -1; ++ ++ file = fopen("/sys/module/drm/parameters/debug", "r"); ++ if (file) { ++ fscanf(file, "%d", &v); ++ fclose(file); ++ } ++ ++ return v; ++} ++ ++static void kmsg_set_debug(int v) ++{ ++ FILE *file; ++ ++ file = fopen("/sys/module/drm/parameters/debug", "w"); ++ if (file) { ++ fprintf(file, "%d\n", v); ++ fclose(file); ++ } ++} ++ ++static void kmsg_open(struct kmsg *k) ++{ ++ k->saved_loglevel = kmsg_get_debug(); ++ if (k->saved_loglevel != -1) ++ kmsg_set_debug(0xff); ++ ++ k->fd = open("/dev/kmsg", O_RDONLY | O_NONBLOCK); ++ if (k->fd != -1) ++ lseek(k->fd, 0, SEEK_END); ++} ++ ++static void kmsg_close(struct kmsg *k, int dump) ++{ ++ FILE *file; ++ ++ file = NULL; ++ if (k->fd != -1 && dump) ++ file = fdopen(k->fd, "r"); ++ if (file) { ++ size_t len = 0; ++ char *line = NULL; ++ ++ while (getline(&line, &len, file) != -1) { ++ char *start = strchr(line, ';'); ++ if (start) ++ LogF("KMSG: %s", start + 1); ++ } ++ ++ free(line); ++ fclose(file); ++ } ++ ++ if (k->fd != -1) ++ close(k->fd); ++ ++ if (k->saved_loglevel != -1) ++ kmsg_set_debug(k->saved_loglevel); ++} ++#else ++struct kmsg { int unused; }; ++static void kmsg_open(struct kmsg *k) {} ++static void kmsg_close(struct kmsg *k, int dump) {} ++#endif ++ ++static int + sna_crtc_apply(xf86CrtcPtr crtc) + { + struct sna *sna = to_sna(crtc->scrn); +@@ -978,26 +1503,39 @@ sna_crtc_apply(xf86CrtcPtr crtc) + struct drm_mode_crtc arg; + uint32_t output_ids[32]; + int output_count = 0; +- int i; ++ int sigio, i; ++ struct kmsg kmsg; ++ int ret = EINVAL; + +- DBG(("%s CRTC:%d [pipe=%d], handle=%d\n", __FUNCTION__, sna_crtc->id, sna_crtc->pipe, sna_crtc->bo->handle)); ++ DBG(("%s CRTC:%d [pipe=%d], handle=%d\n", __FUNCTION__, ++ __sna_crtc_id(sna_crtc), __sna_crtc_pipe(sna_crtc), ++ sna_crtc->bo->handle)); + if (!sna_crtc->kmode.clock) { + ERR(("%s(CRTC:%d [pipe=%d]): attempted to set an invalid mode\n", +- __FUNCTION__, sna_crtc->id, sna_crtc->pipe)); +- return false; ++ __FUNCTION__, __sna_crtc_id(sna_crtc), __sna_crtc_pipe(sna_crtc))); ++ return EINVAL; + } + ++ kmsg_open(&kmsg); ++ sigio = sigio_block(); ++ + assert(sna->mode.num_real_output < ARRAY_SIZE(output_ids)); + sna_crtc_disable_cursor(sna, sna_crtc); + + if (!rotation_set(sna, &sna_crtc->primary, sna_crtc->rotation)) { ++ memset(&arg, 0, sizeof(arg)); ++ arg.crtc_id = __sna_crtc_id(sna_crtc); ++ (void)drmIoctl(sna->kgem.fd, DRM_IOCTL_MODE_SETCRTC, &arg); ++ } ++ ++ if (!rotation_set(sna, &sna_crtc->primary, sna_crtc->rotation)) { + ERR(("%s: set-primary-rotation failed (rotation-id=%d, rotation=%d) on CRTC:%d [pipe=%d], errno=%d\n", +- __FUNCTION__, sna_crtc->primary.rotation.prop, sna_crtc->rotation, sna_crtc->id, sna_crtc->pipe, errno)); ++ __FUNCTION__, sna_crtc->primary.rotation.prop, sna_crtc->rotation, __sna_crtc_id(sna_crtc), __sna_crtc_pipe(sna_crtc), errno)); + sna_crtc->primary.rotation.supported &= ~sna_crtc->rotation; +- return false; ++ goto unblock; + } + DBG(("%s: CRTC:%d [pipe=%d] primary rotation set to %x\n", +- __FUNCTION__, sna_crtc->id, sna_crtc->pipe, sna_crtc->rotation)); ++ __FUNCTION__, __sna_crtc_id(sna_crtc), __sna_crtc_pipe(sna_crtc), sna_crtc->rotation)); + + for (i = 0; i < sna->mode.num_real_output; i++) { + xf86OutputPtr output = config->output[i]; +@@ -1008,7 +1546,7 @@ sna_crtc_apply(xf86CrtcPtr crtc) + * and we lose track of the user settings. + */ + if (output->crtc == NULL) +- output->funcs->dpms(output, DPMSModeOff); ++ __sna_output_dpms(output, DPMSModeOff, false); + + if (output->crtc != crtc) + continue; +@@ -1022,29 +1560,27 @@ sna_crtc_apply(xf86CrtcPtr crtc) + + DBG(("%s: attaching output '%s' %d [%d] to crtc:%d (pipe %d) (possible crtc:%x, possible clones:%x)\n", + __FUNCTION__, output->name, i, to_connector_id(output), +- sna_crtc->id, sna_crtc->pipe, ++ __sna_crtc_id(sna_crtc), __sna_crtc_pipe(sna_crtc), + (uint32_t)output->possible_crtcs, + (uint32_t)output->possible_clones)); + +- assert(output->possible_crtcs & (1 << sna_crtc->pipe) || ++ assert(output->possible_crtcs & (1 << __sna_crtc_pipe(sna_crtc)) || + is_zaphod(crtc->scrn)); + + output_ids[output_count] = to_connector_id(output); + if (++output_count == ARRAY_SIZE(output_ids)) { + DBG(("%s: too many outputs (%d) for me!\n", + __FUNCTION__, output_count)); +- errno = EINVAL; +- return false; ++ goto unblock; + } + } + if (output_count == 0) { + DBG(("%s: no outputs\n", __FUNCTION__)); +- errno = EINVAL; +- return false; ++ goto unblock; + } + + VG_CLEAR(arg); +- arg.crtc_id = sna_crtc->id; ++ arg.crtc_id = __sna_crtc_id(sna_crtc); + arg.fb_id = fb_id(sna_crtc->bo); + if (sna_crtc->transform || sna_crtc->slave_pixmap) { + arg.x = 0; +@@ -1061,7 +1597,7 @@ sna_crtc_apply(xf86CrtcPtr crtc) + arg.mode_valid = 1; + + DBG(("%s: applying crtc [%d, pipe=%d] mode=%dx%d+%d+%d@%d, fb=%d%s%s update to %d outputs [%d...]\n", +- __FUNCTION__, sna_crtc->id, sna_crtc->pipe, ++ __FUNCTION__, __sna_crtc_id(sna_crtc), __sna_crtc_pipe(sna_crtc), + arg.mode.hdisplay, + arg.mode.vdisplay, + arg.x, arg.y, +@@ -1071,12 +1607,19 @@ sna_crtc_apply(xf86CrtcPtr crtc) + sna_crtc->transform ? " [transformed]" : "", + output_count, output_count ? output_ids[0] : 0)); + +- if (drmIoctl(sna->kgem.fd, DRM_IOCTL_MODE_SETCRTC, &arg)) +- return false; ++ ret = 0; ++ if (unlikely(drmIoctl(sna->kgem.fd, DRM_IOCTL_MODE_SETCRTC, &arg))) { ++ ret = errno; ++ goto unblock; ++ } + + sna_crtc->mode_serial++; + sna_crtc_force_outputs_on(crtc); +- return true; ++ ++unblock: ++ sigio_unblock(sigio); ++ kmsg_close(&kmsg, ret); ++ return ret; + } + + static bool overlap(const BoxRec *a, const BoxRec *b) +@@ -1094,26 +1637,73 @@ static bool overlap(const BoxRec *a, const BoxRec *b) + return true; + } + ++static void defer_event(struct sna *sna, struct drm_event *base) ++{ ++ if (sna->mode.shadow_nevent == sna->mode.shadow_size) { ++ int size = sna->mode.shadow_size * 2; ++ void *ptr; ++ ++ ptr = realloc(sna->mode.shadow_events, ++ sizeof(struct drm_event_vblank)*size); ++ if (!ptr) ++ return; ++ ++ sna->mode.shadow_events = ptr; ++ sna->mode.shadow_size = size; ++ } ++ ++ memcpy(&sna->mode.shadow_events[sna->mode.shadow_nevent++], ++ base, sizeof(struct drm_event_vblank)); ++ DBG(("%s: deferring event count=%d\n", ++ __func__, sna->mode.shadow_nevent)); ++} ++ ++static void flush_events(struct sna *sna) ++{ ++ int n; ++ ++ if (!sna->mode.shadow_nevent) ++ return; ++ ++ DBG(("%s: flushing %d events=%d\n", __func__, sna->mode.shadow_nevent)); ++ ++ for (n = 0; n < sna->mode.shadow_nevent; n++) { ++ struct drm_event_vblank *vb = &sna->mode.shadow_events[n]; ++ ++ if ((uintptr_t)(vb->user_data) & 2) ++ sna_present_vblank_handler(vb); ++ else ++ sna_dri2_vblank_handler(vb); ++ } ++ ++ sna->mode.shadow_nevent = 0; ++} ++ ++ + static bool wait_for_shadow(struct sna *sna, + struct sna_pixmap *priv, + unsigned flags) + { + PixmapPtr pixmap = priv->pixmap; +- DamagePtr damage; + struct kgem_bo *bo, *tmp; + int flip_active; + bool ret = true; + +- DBG(("%s: flags=%x, flips=%d, handle=%d, shadow=%d\n", +- __FUNCTION__, flags, sna->mode.flip_active, ++ DBG(("%s: enabled? %d waiting? %d, flags=%x, flips=%d, pixmap=%ld [front?=%d], handle=%d, shadow=%d\n", ++ __FUNCTION__, sna->mode.shadow_enabled, sna->mode.shadow_wait, ++ flags, sna->mode.flip_active, ++ pixmap->drawable.serialNumber, pixmap == sna->front, + priv->gpu_bo->handle, sna->mode.shadow->handle)); + + assert(priv->move_to_gpu_data == sna); + assert(sna->mode.shadow != priv->gpu_bo); + +- if (flags == 0 || pixmap != sna->front || !sna->mode.shadow_damage) ++ if (flags == 0 || pixmap != sna->front || !sna->mode.shadow_enabled) + goto done; + ++ assert(sna->mode.shadow_damage); ++ assert(!sna->mode.shadow_wait); ++ + if ((flags & MOVE_WRITE) == 0) { + if ((flags & __MOVE_SCANOUT) == 0) { + struct sna_crtc *crtc; +@@ -1154,9 +1744,7 @@ static bool wait_for_shadow(struct sna *sna, + } + + assert(sna->mode.shadow_active); +- +- damage = sna->mode.shadow_damage; +- sna->mode.shadow_damage = NULL; ++ sna->mode.shadow_wait = true; + + flip_active = sna->mode.flip_active; + if (flip_active) { +@@ -1208,6 +1796,8 @@ static bool wait_for_shadow(struct sna *sna, + bo = sna->mode.shadow; + } + } ++ assert(sna->mode.shadow_wait); ++ sna->mode.shadow_wait = false; + + if (bo->refcnt > 1) { + bo = kgem_create_2d(&sna->kgem, +@@ -1230,8 +1820,6 @@ static bool wait_for_shadow(struct sna *sna, + bo = sna->mode.shadow; + } + +- sna->mode.shadow_damage = damage; +- + RegionSubtract(&sna->mode.shadow_region, + &sna->mode.shadow_region, + &sna->mode.shadow_cancel); +@@ -1269,6 +1857,7 @@ static bool wait_for_shadow(struct sna *sna, + RegionSubtract(&sna->mode.shadow_region, &sna->mode.shadow_region, ®ion); + } + ++ crtc->client_bo->active_scanout--; + kgem_bo_destroy(&sna->kgem, crtc->client_bo); + crtc->client_bo = NULL; + list_del(&crtc->shadow_link); +@@ -1281,12 +1870,13 @@ static bool wait_for_shadow(struct sna *sna, + sna->mode.shadow_region.extents.y1, + sna->mode.shadow_region.extents.x2, + sna->mode.shadow_region.extents.y2)); +- ret = sna->render.copy_boxes(sna, GXcopy, +- &pixmap->drawable, priv->gpu_bo, 0, 0, +- &pixmap->drawable, bo, 0, 0, +- region_rects(&sna->mode.shadow_region), +- region_num_rects(&sna->mode.shadow_region), +- 0); ++ if (!sna->render.copy_boxes(sna, GXcopy, ++ &pixmap->drawable, priv->gpu_bo, 0, 0, ++ &pixmap->drawable, bo, 0, 0, ++ region_rects(&sna->mode.shadow_region), ++ region_num_rects(&sna->mode.shadow_region), ++ 0)) ++ ERR(("%s: copy failed\n", __FUNCTION__)); + } + + if (priv->cow) +@@ -1295,11 +1885,13 @@ static bool wait_for_shadow(struct sna *sna, + sna_pixmap_unmap(pixmap, priv); + + DBG(("%s: setting front pixmap to handle=%d\n", __FUNCTION__, bo->handle)); ++ sna->mode.shadow->active_scanout--; + tmp = priv->gpu_bo; + priv->gpu_bo = bo; + if (bo != sna->mode.shadow) + kgem_bo_destroy(&sna->kgem, sna->mode.shadow); + sna->mode.shadow = tmp; ++ sna->mode.shadow->active_scanout++; + + sna_dri2_pixmap_update_bo(sna, pixmap, bo); + +@@ -1311,6 +1903,9 @@ done: + priv->move_to_gpu_data = NULL; + priv->move_to_gpu = NULL; + ++ assert(!sna->mode.shadow_wait); ++ flush_events(sna); ++ + return ret; + } + +@@ -1358,22 +1953,43 @@ bool sna_pixmap_discard_shadow_damage(struct sna_pixmap *priv, + return RegionNil(&sna->mode.shadow_region); + } + ++static void sna_mode_damage(DamagePtr damage, RegionPtr region, void *closure) ++{ ++ struct sna *sna = closure; ++ ++ if (sna->mode.rr_active) ++ return; ++ ++ /* Throw away the rectangles if the region grows too big */ ++ region = DamageRegion(damage); ++ if (region->data) { ++ RegionRec dup; ++ ++ dup = *region; ++ RegionUninit(&dup); ++ ++ region->data = NULL; ++ } ++} ++ + static bool sna_mode_enable_shadow(struct sna *sna) + { +- ScreenPtr screen = sna->scrn->pScreen; ++ ScreenPtr screen = to_screen_from_sna(sna); + + DBG(("%s\n", __FUNCTION__)); + assert(sna->mode.shadow == NULL); + assert(sna->mode.shadow_damage == NULL); + assert(sna->mode.shadow_active == 0); ++ assert(!sna->mode.shadow_enabled); + +- sna->mode.shadow_damage = DamageCreate(NULL, NULL, +- DamageReportNone, TRUE, +- screen, screen); ++ sna->mode.shadow_damage = DamageCreate(sna_mode_damage, NULL, ++ DamageReportRawRegion, ++ TRUE, screen, sna); + if (!sna->mode.shadow_damage) + return false; + + DamageRegister(&sna->front->drawable, sna->mode.shadow_damage); ++ sna->mode.shadow_enabled = true; + return true; + } + +@@ -1381,8 +1997,10 @@ static void sna_mode_disable_shadow(struct sna *sna) + { + struct sna_pixmap *priv; + +- if (!sna->mode.shadow_damage) ++ if (!sna->mode.shadow_damage) { ++ assert(!sna->mode.shadow_enabled); + return; ++ } + + DBG(("%s\n", __FUNCTION__)); + +@@ -1393,8 +2011,10 @@ static void sna_mode_disable_shadow(struct sna *sna) + DamageUnregister(&sna->front->drawable, sna->mode.shadow_damage); + DamageDestroy(sna->mode.shadow_damage); + sna->mode.shadow_damage = NULL; ++ sna->mode.shadow_enabled = false; + + if (sna->mode.shadow) { ++ sna->mode.shadow->active_scanout--; + kgem_bo_destroy(&sna->kgem, sna->mode.shadow); + sna->mode.shadow = NULL; + } +@@ -1413,7 +2033,7 @@ static void sna_crtc_slave_damage(DamagePtr damage, RegionPtr region, void *clos + __FUNCTION__, + region->extents.x1, region->extents.y1, region->extents.x2, region->extents.y2, + region_num_rects(region), +- crtc->pipe, crtc->base->x, crtc->base->y)); ++ __sna_crtc_pipe(crtc), crtc->base->x, crtc->base->y)); + + assert(crtc->slave_damage == damage); + assert(sna->mode.shadow_damage); +@@ -1431,7 +2051,7 @@ static bool sna_crtc_enable_shadow(struct sna *sna, struct sna_crtc *crtc) + return true; + } + +- DBG(("%s: enabling for crtc %d\n", __FUNCTION__, crtc->id)); ++ DBG(("%s: enabling for crtc %d\n", __FUNCTION__, __sna_crtc_id(crtc))); + + if (!sna->mode.shadow_active) { + if (!sna_mode_enable_shadow(sna)) +@@ -1443,9 +2063,12 @@ static bool sna_crtc_enable_shadow(struct sna *sna, struct sna_crtc *crtc) + if (crtc->slave_pixmap) { + assert(crtc->slave_damage == NULL); + ++ DBG(("%s: enabling PRIME slave tracking on CRTC %d [pipe=%d], pixmap=%ld\n", ++ __FUNCTION__, __sna_crtc_id(crtc), __sna_crtc_pipe(crtc), crtc->slave_pixmap->drawable.serialNumber)); + crtc->slave_damage = DamageCreate(sna_crtc_slave_damage, NULL, + DamageReportRawRegion, TRUE, +- sna->scrn->pScreen, crtc); ++ to_screen_from_sna(sna), ++ crtc); + if (crtc->slave_damage == NULL) { + if (!--sna->mode.shadow_active) + sna_mode_disable_shadow(sna); +@@ -1465,6 +2088,9 @@ static void sna_crtc_disable_override(struct sna *sna, struct sna_crtc *crtc) + if (crtc->client_bo == NULL) + return; + ++ assert(crtc->client_bo->refcnt >= crtc->client_bo->active_scanout); ++ crtc->client_bo->active_scanout--; ++ + if (!crtc->transform) { + DrawableRec tmp; + +@@ -1489,7 +2115,7 @@ static void sna_crtc_disable_shadow(struct sna *sna, struct sna_crtc *crtc) + if (!crtc->shadow) + return; + +- DBG(("%s: disabling for crtc %d\n", __FUNCTION__, crtc->id)); ++ DBG(("%s: disabling for crtc %d\n", __FUNCTION__, __sna_crtc_id(crtc))); + assert(sna->mode.shadow_active > 0); + + if (crtc->slave_damage) { +@@ -1517,14 +2143,24 @@ __sna_crtc_disable(struct sna *sna, struct sna_crtc *sna_crtc) + sna_crtc_disable_shadow(sna, sna_crtc); + + if (sna_crtc->bo) { ++ DBG(("%s: releasing handle=%d from scanout, active=%d\n", ++ __FUNCTION__,sna_crtc->bo->handle, sna_crtc->bo->active_scanout-1)); ++ assert(sna_crtc->flags & CRTC_ON); + assert(sna_crtc->bo->active_scanout); + assert(sna_crtc->bo->refcnt >= sna_crtc->bo->active_scanout); + sna_crtc->bo->active_scanout--; + kgem_bo_destroy(&sna->kgem, sna_crtc->bo); + sna_crtc->bo = NULL; ++ sna_crtc->flags &= ~CRTC_ON; + +- assert(sna->mode.front_active); +- sna->mode.front_active--; ++ if (sna->mode.hidden) { ++ sna->mode.hidden--; ++ assert(sna->mode.hidden); ++ assert(sna->mode.front_active == 0); ++ } else { ++ assert(sna->mode.front_active); ++ sna->mode.front_active--; ++ } + sna->mode.dirty = true; + } + +@@ -1532,13 +2168,19 @@ __sna_crtc_disable(struct sna *sna, struct sna_crtc *sna_crtc) + kgem_bo_destroy(&sna->kgem, sna_crtc->shadow_bo); + sna_crtc->shadow_bo = NULL; + } +- sna_crtc->transform = false; ++ if (sna_crtc->transform) { ++ assert(sna->mode.rr_active); ++ sna->mode.rr_active--; ++ sna_crtc->transform = false; ++ } + ++ sna_crtc->cursor_transform = false; ++ sna_crtc->hwcursor = true; + assert(!sna_crtc->shadow); + } + + static void +-sna_crtc_disable(xf86CrtcPtr crtc) ++sna_crtc_disable(xf86CrtcPtr crtc, bool force) + { + struct sna *sna = to_sna(crtc->scrn); + struct sna_crtc *sna_crtc = to_sna_crtc(crtc); +@@ -1547,14 +2189,16 @@ sna_crtc_disable(xf86CrtcPtr crtc) + if (sna_crtc == NULL) + return; + +- DBG(("%s: disabling crtc [%d, pipe=%d]\n", __FUNCTION__, +- sna_crtc->id, sna_crtc->pipe)); ++ if (!force && sna_crtc->bo == NULL) ++ return; ++ ++ DBG(("%s: disabling crtc [%d, pipe=%d], force?=%d\n", __FUNCTION__, ++ __sna_crtc_id(sna_crtc), __sna_crtc_pipe(sna_crtc), force)); + + sna_crtc_force_outputs_off(crtc); +- assert(sna_crtc->dpms_mode == DPMSModeOff); + + memset(&arg, 0, sizeof(arg)); +- arg.crtc_id = sna_crtc->id; ++ arg.crtc_id = __sna_crtc_id(sna_crtc); + (void)drmIoctl(sna->kgem.fd, DRM_IOCTL_MODE_SETCRTC, &arg); + + __sna_crtc_disable(sna, sna_crtc); +@@ -1574,19 +2218,19 @@ static void update_flush_interval(struct sna *sna) + + if (!crtc->enabled) { + DBG(("%s: CRTC:%d (pipe %d) disabled\n", +- __FUNCTION__,i, to_sna_crtc(crtc)->pipe)); ++ __FUNCTION__,i, sna_crtc_pipe(crtc))); + assert(to_sna_crtc(crtc)->bo == NULL); + continue; + } + +- if (to_sna_crtc(crtc)->dpms_mode != DPMSModeOn) { ++ if (to_sna_crtc(crtc)->bo == NULL) { + DBG(("%s: CRTC:%d (pipe %d) turned off\n", +- __FUNCTION__,i, to_sna_crtc(crtc)->pipe)); ++ __FUNCTION__,i, sna_crtc_pipe(crtc))); + continue; + } + + DBG(("%s: CRTC:%d (pipe %d) vrefresh=%f\n", +- __FUNCTION__, i, to_sna_crtc(crtc)->pipe, ++ __FUNCTION__, i, sna_crtc_pipe(crtc), + xf86ModeVRefresh(&crtc->mode))); + max_vrefresh = max(max_vrefresh, xf86ModeVRefresh(&crtc->mode)); + } +@@ -1642,7 +2286,7 @@ void sna_copy_fbcon(struct sna *sna) + int dx, dy; + int i; + +- if (wedged(sna)) ++ if (wedged(sna) || isGPU(sna->scrn)) + return; + + DBG(("%s\n", __FUNCTION__)); +@@ -1662,7 +2306,7 @@ void sna_copy_fbcon(struct sna *sna) + assert(crtc != NULL); + + VG_CLEAR(mode); +- mode.crtc_id = crtc->id; ++ mode.crtc_id = __sna_crtc_id(crtc); + if (drmIoctl(sna->kgem.fd, DRM_IOCTL_MODE_GETCRTC, &mode)) + continue; + if (!mode.fb_id) +@@ -1726,7 +2370,7 @@ void sna_copy_fbcon(struct sna *sna) + kgem_bo_destroy(&sna->kgem, bo); + + #if ABI_VIDEODRV_VERSION >= SET_ABI_VERSION(10, 0) +- sna->scrn->pScreen->canDoBGNoneRoot = ok; ++ to_screen_from_sna(sna)->canDoBGNoneRoot = ok; + #endif + } + +@@ -1736,7 +2380,6 @@ static bool use_shadow(struct sna *sna, xf86CrtcPtr crtc) + PictTransform crtc_to_fb; + struct pict_f_transform f_crtc_to_fb, f_fb_to_crtc; + unsigned pitch_limit; +- struct sna_pixmap *priv; + BoxRec b; + + assert(sna->scrn->virtualX && sna->scrn->virtualY); +@@ -1765,27 +2408,31 @@ static bool use_shadow(struct sna *sna, xf86CrtcPtr crtc) + return true; + } + +- priv = sna_pixmap_force_to_gpu(sna->front, MOVE_READ | __MOVE_SCANOUT); +- if (priv == NULL) +- return true; /* maybe we can create a bo for the scanout? */ +- +- if (sna->kgem.gen == 071) +- pitch_limit = priv->gpu_bo->tiling ? 16 * 1024 : 32 * 1024; +- else if ((sna->kgem.gen >> 3) > 4) +- pitch_limit = 32 * 1024; +- else if ((sna->kgem.gen >> 3) == 4) +- pitch_limit = priv->gpu_bo->tiling ? 16 * 1024 : 32 * 1024; +- else if ((sna->kgem.gen >> 3) == 3) +- pitch_limit = priv->gpu_bo->tiling ? 8 * 1024 : 16 * 1024; +- else +- pitch_limit = 8 * 1024; +- DBG(("%s: gpu bo handle=%d tiling=%d pitch=%d, limit=%d\n", __FUNCTION__, priv->gpu_bo->handle, priv->gpu_bo->tiling, priv->gpu_bo->pitch, pitch_limit)); +- if (priv->gpu_bo->pitch > pitch_limit) +- return true; ++ if (!isGPU(sna->scrn)) { ++ struct sna_pixmap *priv; + +- if (priv->gpu_bo->tiling && sna->flags & SNA_LINEAR_FB) { +- DBG(("%s: gpu bo is tiled, need linear, forcing shadow\n", __FUNCTION__)); +- return true; ++ priv = sna_pixmap_force_to_gpu(sna->front, MOVE_READ | __MOVE_SCANOUT); ++ if (priv == NULL) ++ return true; /* maybe we can create a bo for the scanout? */ ++ ++ if (sna->kgem.gen == 071) ++ pitch_limit = priv->gpu_bo->tiling ? 16 * 1024 : 32 * 1024; ++ else if ((sna->kgem.gen >> 3) > 4) ++ pitch_limit = 32 * 1024; ++ else if ((sna->kgem.gen >> 3) == 4) ++ pitch_limit = priv->gpu_bo->tiling ? 16 * 1024 : 32 * 1024; ++ else if ((sna->kgem.gen >> 3) == 3) ++ pitch_limit = priv->gpu_bo->tiling ? 8 * 1024 : 16 * 1024; ++ else ++ pitch_limit = 8 * 1024; ++ DBG(("%s: gpu bo handle=%d tiling=%d pitch=%d, limit=%d\n", __FUNCTION__, priv->gpu_bo->handle, priv->gpu_bo->tiling, priv->gpu_bo->pitch, pitch_limit)); ++ if (priv->gpu_bo->pitch > pitch_limit) ++ return true; ++ ++ if (priv->gpu_bo->tiling && sna->flags & SNA_LINEAR_FB) { ++ DBG(("%s: gpu bo is tiled, need linear, forcing shadow\n", __FUNCTION__)); ++ return true; ++ } + } + + transform = NULL; +@@ -1800,9 +2447,9 @@ static bool use_shadow(struct sna *sna, xf86CrtcPtr crtc) + bool needs_transform = true; + unsigned rotation = rotation_reduce(&to_sna_crtc(crtc)->primary, crtc->rotation); + DBG(("%s: natively supported rotation? rotation=%x & supported=%x == %d\n", +- __FUNCTION__, crtc->rotation, to_sna_crtc(crtc)->primary.rotation.supported, +- !!(crtc->rotation & to_sna_crtc(crtc)->primary.rotation.supported))); +- if (to_sna_crtc(crtc)->primary.rotation.supported & rotation) ++ __FUNCTION__, rotation, to_sna_crtc(crtc)->primary.rotation.supported, ++ rotation == (rotation & to_sna_crtc(crtc)->primary.rotation.supported))); ++ if ((to_sna_crtc(crtc)->primary.rotation.supported & rotation) == rotation) + needs_transform = RRTransformCompute(crtc->x, crtc->y, + crtc->mode.HDisplay, crtc->mode.VDisplay, + RR_Rotate_0, transform, +@@ -1839,6 +2486,7 @@ static void set_shadow(struct sna *sna, RegionPtr region) + + assert(priv->gpu_bo); + assert(sna->mode.shadow); ++ assert(sna->mode.shadow->active_scanout); + + DBG(("%s: waiting for region %dx[(%d, %d), (%d, %d)], front handle=%d, shadow handle=%d\n", + __FUNCTION__, +@@ -1912,6 +2560,28 @@ get_scanout_bo(struct sna *sna, PixmapPtr pixmap) + return priv->gpu_bo; + } + ++static void shadow_clear(struct sna *sna, ++ PixmapPtr front, struct kgem_bo *bo, ++ xf86CrtcPtr crtc) ++{ ++ bool ok = false; ++ if (!wedged(sna)) ++ ok = sna->render.fill_one(sna, front, bo, 0, ++ 0, 0, crtc->mode.HDisplay, crtc->mode.VDisplay, ++ GXclear); ++ if (!ok) { ++ void *ptr = kgem_bo_map__gtt(&sna->kgem, bo); ++ if (ptr) ++ memset(ptr, 0, bo->pitch * crtc->mode.HDisplay); ++ } ++ sna->mode.shadow_dirty = true; ++} ++ ++static bool rr_active(xf86CrtcPtr crtc) ++{ ++ return crtc->transformPresent || crtc->rotation != RR_Rotate_0; ++} ++ + static struct kgem_bo *sna_crtc_attach(xf86CrtcPtr crtc) + { + struct sna_crtc *sna_crtc = to_sna_crtc(crtc); +@@ -1919,10 +2589,15 @@ static struct kgem_bo *sna_crtc_attach(xf86CrtcPtr crtc) + struct sna *sna = to_sna(scrn); + struct kgem_bo *bo; + +- sna_crtc->transform = false; ++ if (sna_crtc->transform) { ++ assert(sna->mode.rr_active); ++ sna_crtc->transform = false; ++ sna->mode.rr_active--; ++ } + sna_crtc->rotation = RR_Rotate_0; + + if (use_shadow(sna, crtc)) { ++ PixmapPtr front; + unsigned long tiled_limit; + int tiling; + +@@ -1949,6 +2624,10 @@ force_shadow: + } + + tiling = I915_TILING_X; ++ if (crtc->rotation & (RR_Rotate_90 | RR_Rotate_270) && ++ sna->kgem.can_scanout_y) ++ tiling = I915_TILING_Y; ++ + if (sna->kgem.gen == 071) + tiled_limit = 16 * 1024 * 8; + else if ((sna->kgem.gen >> 3) > 4) +@@ -1977,8 +2656,8 @@ force_shadow: + return NULL; + } + +- if (__sna_pixmap_get_bo(sna->front) && !crtc->transformPresent) { +- DrawableRec tmp; ++ front = sna_crtc->slave_pixmap ?: sna->front; ++ if (__sna_pixmap_get_bo(front) && !rr_active(crtc)) { + BoxRec b; + + b.x1 = crtc->x; +@@ -1986,28 +2665,48 @@ force_shadow: + b.x2 = crtc->x + crtc->mode.HDisplay; + b.y2 = crtc->y + crtc->mode.VDisplay; + +- DBG(("%s: copying onto shadow CRTC: (%d, %d)x(%d, %d), handle=%d\n", +- __FUNCTION__, +- b.x1, b.y1, +- b.x2, b.y2, +- bo->handle)); +- +- tmp.width = crtc->mode.HDisplay; +- tmp.height = crtc->mode.VDisplay; +- tmp.depth = sna->front->drawable.depth; +- tmp.bitsPerPixel = sna->front->drawable.bitsPerPixel; +- +- (void)sna->render.copy_boxes(sna, GXcopy, +- &sna->front->drawable, __sna_pixmap_get_bo(sna->front), 0, 0, +- &tmp, bo, -b.x1, -b.y1, +- &b, 1, 0); +- } ++ if (b.x1 < 0) ++ b.x1 = 0; ++ if (b.y1 < 0) ++ b.y1 = 0; ++ if (b.x2 > scrn->virtualX) ++ b.x2 = scrn->virtualX; ++ if (b.y2 > scrn->virtualY) ++ b.y2 = scrn->virtualY; ++ if (b.x2 - b.x1 < crtc->mode.HDisplay || ++ b.y2 - b.y1 < crtc->mode.VDisplay) ++ shadow_clear(sna, front, bo, crtc); ++ ++ if (b.y2 > b.y1 && b.x2 > b.x1) { ++ DrawableRec tmp; ++ ++ DBG(("%s: copying onto shadow CRTC: (%d, %d)x(%d, %d) [fb=%dx%d], handle=%d\n", ++ __FUNCTION__, ++ b.x1, b.y1, ++ b.x2-b.x1, b.y2-b.y1, ++ scrn->virtualX, scrn->virtualY, ++ bo->handle)); ++ ++ tmp.width = crtc->mode.HDisplay; ++ tmp.height = crtc->mode.VDisplay; ++ tmp.depth = front->drawable.depth; ++ tmp.bitsPerPixel = front->drawable.bitsPerPixel; ++ ++ if (!sna->render.copy_boxes(sna, GXcopy, ++ &front->drawable, __sna_pixmap_get_bo(front), 0, 0, ++ &tmp, bo, -crtc->x, -crtc->y, ++ &b, 1, COPY_LAST)) ++ shadow_clear(sna, front, bo, crtc); ++ } ++ } else ++ shadow_clear(sna, front, bo, crtc); + + sna_crtc->shadow_bo_width = crtc->mode.HDisplay; + sna_crtc->shadow_bo_height = crtc->mode.VDisplay; + sna_crtc->shadow_bo = bo; + out_shadow: + sna_crtc->transform = true; ++ sna->mode.rr_active++; + return kgem_bo_reference(bo); + } else { + if (sna_crtc->shadow_bo) { +@@ -2048,26 +2747,26 @@ out_shadow: + } + + if (sna->flags & SNA_TEAR_FREE) { ++ RegionRec region; ++ + assert(sna_crtc->slave_pixmap == NULL); + + DBG(("%s: enabling TearFree shadow\n", __FUNCTION__)); ++ region.extents.x1 = 0; ++ region.extents.y1 = 0; ++ region.extents.x2 = sna->scrn->virtualX; ++ region.extents.y2 = sna->scrn->virtualY; ++ region.data = NULL; ++ + if (!sna_crtc_enable_shadow(sna, sna_crtc)) { + DBG(("%s: failed to enable crtc shadow\n", __FUNCTION__)); + return NULL; + } + +- if (sna->mode.shadow == NULL && !wedged(sna)) { +- RegionRec region; ++ if (sna->mode.shadow == NULL) { + struct kgem_bo *shadow; + + DBG(("%s: creating TearFree shadow bo\n", __FUNCTION__)); +- +- region.extents.x1 = 0; +- region.extents.y1 = 0; +- region.extents.x2 = sna->scrn->virtualX; +- region.extents.y2 = sna->scrn->virtualY; +- region.data = NULL; +- + shadow = kgem_create_2d(&sna->kgem, + region.extents.x2, + region.extents.y2, +@@ -2093,9 +2792,12 @@ out_shadow: + goto force_shadow; + } + ++ assert(__sna_pixmap_get_bo(sna->front) == NULL || ++ __sna_pixmap_get_bo(sna->front)->pitch == shadow->pitch); + sna->mode.shadow = shadow; +- set_shadow(sna, ®ion); ++ sna->mode.shadow->active_scanout++; + } ++ set_shadow(sna, ®ion); + + sna_crtc_disable_override(sna, sna_crtc); + } else +@@ -2107,6 +2809,37 @@ out_shadow: + } + } + ++#define SCALING_EPSILON (1./256) ++ ++static bool ++is_affine(const struct pixman_f_transform *t) ++{ ++ return (fabs(t->m[2][0]) < SCALING_EPSILON && ++ fabs(t->m[2][1]) < SCALING_EPSILON); ++} ++ ++static double determinant(const struct pixman_f_transform *t) ++{ ++ return t->m[0][0]*t->m[1][1] - t->m[1][0]*t->m[0][1]; ++} ++ ++static bool ++affine_is_pixel_exact(const struct pixman_f_transform *t) ++{ ++ double det = t->m[2][2] * determinant(t); ++ if (fabs (det * det - 1.0) < SCALING_EPSILON) { ++ if (fabs(t->m[0][1]) < SCALING_EPSILON && ++ fabs(t->m[1][0]) < SCALING_EPSILON) ++ return true; ++ ++ if (fabs(t->m[0][0]) < SCALING_EPSILON && ++ fabs(t->m[1][1]) < SCALING_EPSILON) ++ return true; ++ } ++ ++ return false; ++} ++ + static void sna_crtc_randr(xf86CrtcPtr crtc) + { + struct sna_crtc *sna_crtc = to_sna_crtc(crtc); +@@ -2152,6 +2885,25 @@ static void sna_crtc_randr(xf86CrtcPtr crtc) + } else + crtc->transform_in_use = sna_crtc->rotation != RR_Rotate_0; + ++ /* Recompute the cursor after a potential change in transform */ ++ if (sna_crtc->cursor) { ++ assert(sna_crtc->cursor->ref > 0); ++ sna_crtc->cursor->ref--; ++ sna_crtc->cursor = NULL; ++ } ++ ++ if (needs_transform) { ++ sna_crtc->hwcursor = is_affine(&f_fb_to_crtc); ++ sna_crtc->cursor_transform = ++ sna_crtc->hwcursor && ++ !affine_is_pixel_exact(&f_fb_to_crtc); ++ } else { ++ sna_crtc->hwcursor = true; ++ sna_crtc->cursor_transform = false; ++ } ++ DBG(("%s: hwcursor?=%d, cursor_transform?=%d\n", ++ __FUNCTION__, sna_crtc->hwcursor, sna_crtc->cursor_transform)); ++ + crtc->crtc_to_framebuffer = crtc_to_fb; + crtc->f_crtc_to_framebuffer = f_crtc_to_fb; + crtc->f_framebuffer_to_crtc = f_fb_to_crtc; +@@ -2184,7 +2936,7 @@ static void sna_crtc_randr(xf86CrtcPtr crtc) + static void + sna_crtc_damage(xf86CrtcPtr crtc) + { +- ScreenPtr screen = crtc->scrn->pScreen; ++ ScreenPtr screen = xf86ScrnToScreen(crtc->scrn); + struct sna *sna = to_sna(crtc->scrn); + RegionRec region, *damage; + +@@ -2200,15 +2952,21 @@ sna_crtc_damage(xf86CrtcPtr crtc) + if (region.extents.y2 > screen->height) + region.extents.y2 = screen->height; + ++ if (region.extents.x2 <= region.extents.x1 || ++ region.extents.y2 <= region.extents.y1) { ++ DBG(("%s: crtc not damaged, all-clipped\n", __FUNCTION__)); ++ return; ++ } ++ + DBG(("%s: marking crtc %d as completely damaged (%d, %d), (%d, %d)\n", +- __FUNCTION__, to_sna_crtc(crtc)->id, ++ __FUNCTION__, sna_crtc_id(crtc), + region.extents.x1, region.extents.y1, + region.extents.x2, region.extents.y2)); +- to_sna_crtc(crtc)->client_damage = region; + + assert(sna->mode.shadow_damage && sna->mode.shadow_active); + damage = DamageRegion(sna->mode.shadow_damage); + RegionUnion(damage, damage, ®ion); ++ to_sna_crtc(crtc)->crtc_damage = region; + + DBG(("%s: damage now %dx[(%d, %d), (%d, %d)]\n", + __FUNCTION__, +@@ -2260,6 +3018,21 @@ static const char *reflection_to_str(Rotation rotation) + } + } + ++static void reprobe_connectors(xf86CrtcPtr crtc) ++{ ++ xf86CrtcConfigPtr config = XF86_CRTC_CONFIG_PTR(crtc->scrn); ++ struct sna *sna = to_sna(crtc->scrn); ++ int i; ++ ++ for (i = 0; i < sna->mode.num_real_output; i++) { ++ xf86OutputPtr output = config->output[i]; ++ if (output->crtc == crtc) ++ to_sna_output(output)->reprobe = true; ++ } ++ ++ sna_mode_discover(sna, true); ++} ++ + static Bool + __sna_crtc_set_mode(xf86CrtcPtr crtc) + { +@@ -2268,11 +3041,19 @@ __sna_crtc_set_mode(xf86CrtcPtr crtc) + struct kgem_bo *saved_bo, *bo; + uint32_t saved_offset; + bool saved_transform; ++ bool saved_hwcursor; ++ bool saved_cursor_transform; ++ int ret; + +- DBG(("%s\n", __FUNCTION__)); ++ DBG(("%s: CRTC=%d, pipe=%d, hidden?=%d\n", __FUNCTION__, ++ __sna_crtc_id(sna_crtc), __sna_crtc_pipe(sna_crtc), sna->mode.hidden)); ++ if (sna->mode.hidden) ++ return TRUE; + + saved_bo = sna_crtc->bo; + saved_transform = sna_crtc->transform; ++ saved_cursor_transform = sna_crtc->cursor_transform; ++ saved_hwcursor = sna_crtc->hwcursor; + saved_offset = sna_crtc->offset; + + sna_crtc->fallback_shadow = false; +@@ -2285,26 +3066,31 @@ retry: /* Attach per-crtc pixmap or direct */ + } + + /* Prevent recursion when enabling outputs during execbuffer */ +- if (bo->exec && RQ(bo->rq)->bo == NULL) ++ if (bo->exec && RQ(bo->rq)->bo == NULL) { + _kgem_submit(&sna->kgem); ++ __kgem_bo_clear_dirty(bo); ++ } + + sna_crtc->bo = bo; +- if (!sna_crtc_apply(crtc)) { +- int err = errno; +- ++ ret = sna_crtc_apply(crtc); ++ if (ret) { + kgem_bo_destroy(&sna->kgem, bo); + +- if (!sna_crtc->shadow) { ++ if (!sna_crtc->fallback_shadow) { + sna_crtc->fallback_shadow = true; + goto retry; + } + + xf86DrvMsg(crtc->scrn->scrnIndex, X_ERROR, +- "failed to set mode: %s [%d]\n", strerror(err), err); ++ "failed to set mode: %s [%d]\n", strerror(ret), ret); + goto error; + } + ++ sna_crtc->flags |= CRTC_ON; + bo->active_scanout++; ++ DBG(("%s: marking handle=%d as active=%d (removing %d from scanout, active=%d)\n", ++ __FUNCTION__, bo->handle, bo->active_scanout, ++ saved_bo ? saved_bo->handle : 0, saved_bo ? saved_bo->active_scanout - 1: -1)); + if (saved_bo) { + assert(saved_bo->active_scanout); + assert(saved_bo->refcnt >= saved_bo->active_scanout); +@@ -2315,17 +3101,34 @@ retry: /* Attach per-crtc pixmap or direct */ + sna_crtc_randr(crtc); + if (sna_crtc->transform) + sna_crtc_damage(crtc); ++ if (sna_crtc->cursor && /* Reload cursor if RandR maybe changed */ ++ (!sna_crtc->hwcursor || ++ saved_cursor_transform || sna_crtc->cursor_transform || ++ sna_crtc->cursor->rotation != crtc->rotation)) ++ sna_crtc_disable_cursor(sna, sna_crtc); ++ ++ assert(!sna->mode.hidden); + sna->mode.front_active += saved_bo == NULL; + sna->mode.dirty = true; +- DBG(("%s: front_active=%d\n", __FUNCTION__, sna->mode.front_active)); ++ DBG(("%s: handle=%d, scanout_active=%d, front_active=%d\n", ++ __FUNCTION__, bo->handle, bo->active_scanout, sna->mode.front_active)); + + return TRUE; + + error: + sna_crtc->offset = saved_offset; ++ if (sna_crtc->transform) { ++ assert(sna->mode.rr_active); ++ sna->mode.rr_active--; ++ } ++ if (saved_transform) ++ sna->mode.rr_active++; + sna_crtc->transform = saved_transform; ++ sna_crtc->cursor_transform = saved_cursor_transform; ++ sna_crtc->hwcursor = saved_hwcursor; + sna_crtc->bo = saved_bo; +- sna_mode_discover(sna); ++ ++ reprobe_connectors(crtc); + return FALSE; + } + +@@ -2346,14 +3149,14 @@ sna_crtc_set_mode_major(xf86CrtcPtr crtc, DisplayModePtr mode, + xf86DrvMsg(crtc->scrn->scrnIndex, X_INFO, + "switch to mode %dx%d@%.1f on %s using pipe %d, position (%d, %d), rotation %s, reflection %s\n", + mode->HDisplay, mode->VDisplay, xf86ModeVRefresh(mode), +- outputs_for_crtc(crtc, outputs, sizeof(outputs)), sna_crtc->pipe, ++ outputs_for_crtc(crtc, outputs, sizeof(outputs)), __sna_crtc_pipe(sna_crtc), + x, y, rotation_to_str(rotation), reflection_to_str(rotation)); + + assert(mode->HDisplay <= sna->mode.max_crtc_width && + mode->VDisplay <= sna->mode.max_crtc_height); + + #if HAS_GAMMA +- drmModeCrtcSetGamma(sna->kgem.fd, sna_crtc->id, ++ drmModeCrtcSetGamma(sna->kgem.fd, __sna_crtc_id(sna_crtc), + crtc->gamma_size, + crtc->gamma_red, + crtc->gamma_green, +@@ -2372,17 +3175,10 @@ sna_crtc_set_mode_major(xf86CrtcPtr crtc, DisplayModePtr mode, + static void + sna_crtc_dpms(xf86CrtcPtr crtc, int mode) + { +- struct sna_crtc *priv = to_sna_crtc(crtc); +- + DBG(("%s(pipe %d, dpms mode -> %d):= active=%d\n", +- __FUNCTION__, priv->pipe, mode, mode == DPMSModeOn)); +- if (priv->dpms_mode == mode) +- return; +- +- assert(priv); +- priv->dpms_mode = mode; ++ __FUNCTION__, sna_crtc_pipe(crtc), mode, mode == DPMSModeOn)); + +- if (mode == DPMSModeOn && crtc->enabled && priv->bo == NULL) { ++ if (mode == DPMSModeOn && crtc->enabled) { + if (__sna_crtc_set_mode(crtc)) + update_flush_interval(to_sna(crtc->scrn)); + else +@@ -2390,7 +3186,7 @@ sna_crtc_dpms(xf86CrtcPtr crtc, int mode) + } + + if (mode != DPMSModeOn) +- sna_crtc_disable(crtc); ++ sna_crtc_disable(crtc, false); + } + + void sna_mode_adjust_frame(struct sna *sna, int x, int y) +@@ -2426,7 +3222,7 @@ sna_crtc_gamma_set(xf86CrtcPtr crtc, + { + assert(to_sna_crtc(crtc)); + drmModeCrtcSetGamma(to_sna(crtc->scrn)->kgem.fd, +- to_sna_crtc(crtc)->id, ++ sna_crtc_id(crtc), + size, red, green, blue); + } + +@@ -2434,10 +3230,14 @@ static void + sna_crtc_destroy(xf86CrtcPtr crtc) + { + struct sna_crtc *sna_crtc = to_sna_crtc(crtc); ++ struct plane *sprite, *sn; + + if (sna_crtc == NULL) + return; + ++ list_for_each_entry_safe(sprite, sn, &sna_crtc->sprites, link) ++ free(sprite); ++ + free(sna_crtc); + crtc->driver_private = NULL; + } +@@ -2455,7 +3255,7 @@ sna_crtc_set_scanout_pixmap(xf86CrtcPtr crtc, PixmapPtr pixmap) + return TRUE; + + DBG(("%s: CRTC:%d, pipe=%d setting scanout pixmap=%ld\n", +- __FUNCTION__, sna_crtc->id, sna_crtc->pipe, ++ __FUNCTION__, __sna_crtc_id(sna_crtc), __sna_crtc_pipe(sna_crtc), + pixmap ? pixmap->drawable.serialNumber : 0)); + + /* Disable first so that we can unregister the damage tracking */ +@@ -2576,6 +3376,10 @@ static int plane_details(struct sna *sna, struct plane *p) + } + } + ++ p->rotation.supported &= DBG_NATIVE_ROTATION; ++ if (!xf86ReturnOptValBool(sna->Options, OPTION_ROTATION, TRUE)) ++ p->rotation.supported = RR_Rotate_0; ++ + if (props != (uint32_t *)stack_props) + free(props); + +@@ -2583,20 +3387,26 @@ static int plane_details(struct sna *sna, struct plane *p) + return type; + } + ++static void add_sprite_plane(struct sna_crtc *crtc, ++ struct plane *details) ++{ ++ struct plane *sprite = malloc(sizeof(*sprite)); ++ if (!sprite) ++ return; ++ ++ memcpy(sprite, details, sizeof(*sprite)); ++ list_add(&sprite->link, &crtc->sprites); ++} ++ + static void + sna_crtc_find_planes(struct sna *sna, struct sna_crtc *crtc) + { + #define LOCAL_IOCTL_SET_CAP DRM_IOWR(0x0d, struct local_set_cap) +-#define LOCAL_IOCTL_MODE_GETPLANERESOURCES DRM_IOWR(0xb5, struct local_mode_get_plane_res) +-#define LOCAL_IOCTL_MODE_GETPLANE DRM_IOWR(0xb6, struct local_mode_get_plane) + struct local_set_cap { + uint64_t name; + uint64_t value; + } cap; +- struct local_mode_get_plane_res { +- uint64_t plane_id_ptr; +- uint64_t count_planes; +- } r; ++ struct local_mode_get_plane_res r; + uint32_t stack_planes[32]; + uint32_t *planes = stack_planes; + int i; +@@ -2629,18 +3439,7 @@ sna_crtc_find_planes(struct sna *sna, struct sna_crtc *crtc) + VG(VALGRIND_MAKE_MEM_DEFINED(planes, sizeof(uint32_t)*r.count_planes)); + + for (i = 0; i < r.count_planes; i++) { +- struct local_mode_get_plane { +- uint32_t plane_id; +- +- uint32_t crtc_id; +- uint32_t fb_id; +- +- uint32_t possible_crtcs; +- uint32_t gamma_size; +- +- uint32_t count_format_types; +- uint64_t format_type_ptr; +- } p; ++ struct local_mode_get_plane p; + struct plane details; + + VG_CLEAR(p); +@@ -2649,11 +3448,11 @@ sna_crtc_find_planes(struct sna *sna, struct sna_crtc *crtc) + if (drmIoctl(sna->kgem.fd, LOCAL_IOCTL_MODE_GETPLANE, &p)) + continue; + +- if ((p.possible_crtcs & (1 << crtc->pipe)) == 0) ++ if ((p.possible_crtcs & (1 << __sna_crtc_pipe(crtc))) == 0) + continue; + + DBG(("%s: plane %d is attached to our pipe=%d\n", +- __FUNCTION__, planes[i], crtc->pipe)); ++ __FUNCTION__, planes[i], __sna_crtc_pipe(crtc))); + + details.id = p.plane_id; + details.rotation.prop = 0; +@@ -2672,8 +3471,7 @@ sna_crtc_find_planes(struct sna *sna, struct sna_crtc *crtc) + break; + + case DRM_PLANE_TYPE_OVERLAY: +- if (crtc->sprite.id == 0) +- crtc->sprite = details; ++ add_sprite_plane(crtc, &details); + break; + } + } +@@ -2688,7 +3486,6 @@ sna_crtc_init__rotation(struct sna *sna, struct sna_crtc *crtc) + crtc->rotation = RR_Rotate_0; + crtc->primary.rotation.supported = RR_Rotate_0; + crtc->primary.rotation.current = RR_Rotate_0; +- crtc->sprite.rotation = crtc->primary.rotation; + } + + static void +@@ -2698,55 +3495,55 @@ sna_crtc_init__cursor(struct sna *sna, struct sna_crtc *crtc) + + VG_CLEAR(arg); + arg.flags = DRM_MODE_CURSOR_BO; +- arg.crtc_id = crtc->id; ++ arg.crtc_id = __sna_crtc_id(crtc); + arg.width = arg.height = 0; + arg.handle = 0; + + (void)drmIoctl(sna->kgem.fd, DRM_IOCTL_MODE_CURSOR, &arg); ++ crtc->hwcursor = true; + } + + static bool +-sna_crtc_add(ScrnInfoPtr scrn, int id) ++sna_crtc_add(ScrnInfoPtr scrn, unsigned id) + { + struct sna *sna = to_sna(scrn); + xf86CrtcPtr crtc; + struct sna_crtc *sna_crtc; + struct drm_i915_get_pipe_from_crtc_id get_pipe; + +- DBG(("%s(%d)\n", __FUNCTION__, id)); ++ DBG(("%s(%d): is-zaphod? %d\n", __FUNCTION__, id, is_zaphod(scrn))); + + sna_crtc = calloc(sizeof(struct sna_crtc), 1); + if (sna_crtc == NULL) + return false; + + sna_crtc->id = id; +- sna_crtc->dpms_mode = -1; + + VG_CLEAR(get_pipe); + get_pipe.pipe = 0; +- get_pipe.crtc_id = sna_crtc->id; ++ get_pipe.crtc_id = id; + if (drmIoctl(sna->kgem.fd, + DRM_IOCTL_I915_GET_PIPE_FROM_CRTC_ID, + &get_pipe)) { + free(sna_crtc); + return false; + } +- sna_crtc->pipe = get_pipe.pipe; ++ assert((unsigned)get_pipe.pipe < 256); ++ sna_crtc->flags |= get_pipe.pipe << 8; + + if (is_zaphod(scrn) && +- scrn->confScreen->device->screen != sna_crtc->pipe) { ++ (get_zaphod_crtcs(sna) & (1 << get_pipe.pipe)) == 0) { + free(sna_crtc); + return true; + } + ++ list_init(&sna_crtc->sprites); + sna_crtc_init__rotation(sna, sna_crtc); +- + sna_crtc_find_planes(sna, sna_crtc); + +- DBG(("%s: CRTC:%d [pipe=%d], primary id=%x: supported-rotations=%x, current-rotation=%x, sprite id=%x: supported-rotations=%x, current-rotation=%x\n", +- __FUNCTION__, sna_crtc->id, sna_crtc->pipe, +- sna_crtc->primary.id, sna_crtc->primary.rotation.supported, sna_crtc->primary.rotation.current, +- sna_crtc->sprite.id, sna_crtc->sprite.rotation.supported, sna_crtc->sprite.rotation.current)); ++ DBG(("%s: CRTC:%d [pipe=%d], primary id=%x: supported-rotations=%x, current-rotation=%x\n", ++ __FUNCTION__, id, get_pipe.pipe, ++ sna_crtc->primary.id, sna_crtc->primary.rotation.supported, sna_crtc->primary.rotation.current)); + + list_init(&sna_crtc->shadow_link); + +@@ -2761,7 +3558,7 @@ sna_crtc_add(ScrnInfoPtr scrn, int id) + crtc->driver_private = sna_crtc; + sna_crtc->base = crtc; + DBG(("%s: attached crtc[%d] pipe=%d\n", +- __FUNCTION__, id, sna_crtc->pipe)); ++ __FUNCTION__, id, __sna_crtc_pipe(sna_crtc))); + + return true; + } +@@ -2798,20 +3595,56 @@ find_property(struct sna *sna, struct sna_output *output, const char *name) + return -1; + } + ++static void update_properties(struct sna *sna, struct sna_output *output) ++{ ++ union compat_mode_get_connector compat_conn; ++ struct drm_mode_modeinfo dummy; ++ ++ VG_CLEAR(compat_conn); ++ ++ compat_conn.conn.connector_id = output->id; ++ compat_conn.conn.count_props = output->num_props; ++ compat_conn.conn.props_ptr = (uintptr_t)output->prop_ids; ++ compat_conn.conn.prop_values_ptr = (uintptr_t)output->prop_values; ++ compat_conn.conn.count_modes = 1; /* skip detect */ ++ compat_conn.conn.modes_ptr = (uintptr_t)&dummy; ++ compat_conn.conn.count_encoders = 0; ++ ++ (void)drmIoctl(sna->kgem.fd, ++ DRM_IOCTL_MODE_GETCONNECTOR, ++ &compat_conn.conn); ++ ++ assert(compat_conn.conn.count_props == output->num_props); ++ output->update_properties = false; ++} ++ + static xf86OutputStatus + sna_output_detect(xf86OutputPtr output) + { + struct sna *sna = to_sna(output->scrn); + struct sna_output *sna_output = output->driver_private; + union compat_mode_get_connector compat_conn; ++ uint32_t now; + + DBG(("%s(%s:%d)\n", __FUNCTION__, output->name, sna_output->id)); ++ sna_output->update_properties = false; + + if (!sna_output->id) { + DBG(("%s(%s) hiding due to lost connection\n", __FUNCTION__, output->name)); + return XF86OutputStatusDisconnected; + } + ++ /* Cache detections for 15s or hotplug event */ ++ now = GetTimeInMillis(); ++ if (sna_output->last_detect != 0 && ++ (int32_t)(now - sna_output->last_detect) <= OUTPUT_STATUS_CACHE_MS) { ++ DBG(("%s(%s) reporting cached status (since %dms): %d\n", ++ __FUNCTION__, output->name, now - sna_output->last_detect, ++ sna_output->status)); ++ sna_output->update_properties = true; ++ return sna_output->status; ++ } ++ + VG_CLEAR(compat_conn); + compat_conn.conn.connector_id = sna_output->id; + sna_output->num_modes = compat_conn.conn.count_modes = 0; /* reprobe */ +@@ -2854,15 +3687,23 @@ sna_output_detect(xf86OutputPtr output) + DBG(("%s(%s): found %d modes, connection status=%d\n", + __FUNCTION__, output->name, sna_output->num_modes, compat_conn.conn.connection)); + ++ sna_output->reprobe = false; ++ sna_output->last_detect = now; + switch (compat_conn.conn.connection) { + case DRM_MODE_CONNECTED: +- return XF86OutputStatusConnected; ++ sna_output->status = XF86OutputStatusConnected; ++ output->mm_width = compat_conn.conn.mm_width; ++ output->mm_height = compat_conn.conn.mm_height; ++ break; + case DRM_MODE_DISCONNECTED: +- return XF86OutputStatusDisconnected; ++ sna_output->status = XF86OutputStatusDisconnected; ++ break; + default: + case DRM_MODE_UNKNOWNCONNECTION: +- return XF86OutputStatusUnknown; ++ sna_output->status = XF86OutputStatusUnknown; ++ break; + } ++ return sna_output->status; + } + + static Bool +@@ -2895,6 +3736,27 @@ sna_output_mode_valid(xf86OutputPtr output, DisplayModePtr mode) + return MODE_OK; + } + ++static void sna_output_set_parsed_edid(xf86OutputPtr output, xf86MonPtr mon) ++{ ++ unsigned conn_mm_width, conn_mm_height; ++ ++ /* We set the output size based on values from the kernel */ ++ conn_mm_width = output->mm_width; ++ conn_mm_height = output->mm_height; ++ ++ xf86OutputSetEDID(output, mon); ++ ++ if (output->mm_width != conn_mm_width || output->mm_height != conn_mm_height) { ++ DBG(("%s)%s): kernel and Xorg disagree over physical size: kernel=%dx%dmm, Xorg=%dx%dmm\n", ++ __FUNCTION__, output->name, ++ conn_mm_width, conn_mm_height, ++ output->mm_width, output->mm_height)); ++ } ++ ++ output->mm_width = conn_mm_width; ++ output->mm_height = conn_mm_height; ++} ++ + static void + sna_output_attach_edid(xf86OutputPtr output) + { +@@ -2907,6 +3769,13 @@ sna_output_attach_edid(xf86OutputPtr output) + if (sna_output->edid_idx == -1) + return; + ++ /* Always refresh the blob as the kernel may randomly update the ++ * id even if the contents of the blob doesn't change, and a ++ * request for the stale id will return nothing. ++ */ ++ if (sna_output->update_properties) ++ update_properties(sna, sna_output); ++ + raw = sna_output->edid_raw; + blob.length = sna_output->edid_len; + +@@ -2917,8 +3786,12 @@ sna_output_attach_edid(xf86OutputPtr output) + old = NULL; + + blob.blob_id = sna_output->prop_values[sna_output->edid_idx]; +- DBG(("%s: attaching EDID id=%d, current=%d\n", +- __FUNCTION__, blob.blob_id, sna_output->edid_blob_id)); ++ if (!blob.blob_id) ++ goto done; ++ ++ DBG(("%s(%s): attaching EDID id=%d, current=%d\n", ++ __FUNCTION__, output->name, ++ blob.blob_id, sna_output->edid_blob_id)); + if (blob.blob_id == sna_output->edid_blob_id && 0) { /* sigh */ + if (output->MonInfo) { + /* XXX the property keeps on disappearing... */ +@@ -2936,26 +3809,45 @@ sna_output_attach_edid(xf86OutputPtr output) + } + + blob.data = (uintptr_t)raw; +- if (drmIoctl(sna->kgem.fd, DRM_IOCTL_MODE_GETPROPBLOB, &blob)) +- goto done; ++ do { ++ while (drmIoctl(sna->kgem.fd, DRM_IOCTL_MODE_GETPROPBLOB, &blob)) { ++ update_properties(sna, sna_output); ++ if (blob.blob_id == sna_output->prop_values[sna_output->edid_idx]) { ++ DBG(("%s(%s): failed to read blob, reusing previous\n", ++ __FUNCTION__, output->name)); ++ goto done; ++ } ++ blob.blob_id = sna_output->prop_values[sna_output->edid_idx]; ++ } + +- DBG(("%s: retrieving blob id=%d, length=%d\n", +- __FUNCTION__, blob.blob_id, blob.length)); ++ DBG(("%s(%s): retrieving blob id=%d, length=%d\n", ++ __FUNCTION__, output->name, blob.blob_id, blob.length)); + +- if (blob.length > sna_output->edid_len) { +- raw = realloc(raw, blob.length); +- if (raw == NULL) ++ if (blob.length < 128) + goto done; + +- VG(memset(raw, 0, blob.length)); +- blob.data = (uintptr_t)raw; +- if (drmIoctl(sna->kgem.fd, DRM_IOCTL_MODE_GETPROPBLOB, &blob)) +- goto done; ++ if (blob.length > sna_output->edid_len) { ++ raw = realloc(raw, blob.length); ++ if (raw == NULL) ++ goto done; ++ ++ VG(memset(raw, 0, blob.length)); ++ blob.data = (uintptr_t)raw; ++ } ++ } while (blob.length != sna_output->edid_len && ++ drmIoctl(sna->kgem.fd, DRM_IOCTL_MODE_GETPROPBLOB, &blob)); ++ ++ if (blob.length & 127) { ++ /* Truncated EDID! Make sure no one reads too far */ ++ *SECTION(NO_EDID, (uint8_t*)raw) = blob.length/128 - 1; ++ blob.length &= -128; + } + + if (old && + blob.length == sna_output->edid_len && + memcmp(old, raw, blob.length) == 0) { ++ DBG(("%s(%s): EDID + MonInfo is unchanged\n", ++ __FUNCTION__, output->name)); + assert(sna_output->edid_raw == raw); + sna_output->edid_blob_id = blob.blob_id; + RRChangeOutputProperty(output->randr_output, +@@ -2974,31 +3866,186 @@ skip_read: + mon->flags |= MONITOR_EDID_COMPLETE_RAWDATA; + } + +-done: +- xf86OutputSetEDID(output, mon); +- if (raw) { +- sna_output->edid_raw = raw; +- sna_output->edid_len = blob.length; +- sna_output->edid_blob_id = blob.blob_id; ++done: ++ sna_output_set_parsed_edid(output, mon); ++ if (raw) { ++ sna_output->edid_raw = raw; ++ sna_output->edid_len = blob.length; ++ sna_output->edid_blob_id = blob.blob_id; ++ } ++} ++ ++static void ++sna_output_attach_tile(xf86OutputPtr output) ++{ ++#if XF86_OUTPUT_VERSION >= 3 ++ struct sna *sna = to_sna(output->scrn); ++ struct sna_output *sna_output = output->driver_private; ++ struct drm_mode_get_blob blob; ++ struct xf86CrtcTileInfo tile_info, *set = NULL; ++ char *tile; ++ int id; ++ ++ id = find_property(sna, sna_output, "TILE"); ++ DBG(("%s: found? TILE=%d\n", __FUNCTION__, id)); ++ if (id == -1) ++ goto out; ++ ++ if (sna_output->update_properties) ++ update_properties(sna, sna_output); ++ ++ VG_CLEAR(blob); ++ blob.blob_id = sna_output->prop_values[id]; ++ blob.length = 0; ++ if (drmIoctl(sna->kgem.fd, DRM_IOCTL_MODE_GETPROPBLOB, &blob)) ++ goto out; ++ ++ do { ++ id = blob.length; ++ tile = alloca(id + 1); ++ blob.data = (uintptr_t)tile; ++ VG(memset(tile, 0, id)); ++ DBG(("%s: reading %d bytes for TILE blob\n", __FUNCTION__, id)); ++ if (drmIoctl(sna->kgem.fd, DRM_IOCTL_MODE_GETPROPBLOB, &blob)) ++ goto out; ++ } while (id != blob.length); ++ ++ tile[blob.length] = '\0'; /* paranoia */ ++ DBG(("%s: TILE='%s'\n", __FUNCTION__, tile)); ++ if (xf86OutputParseKMSTile(tile, blob.length, &tile_info)) ++ set = &tile_info; ++out: ++ xf86OutputSetTile(output, set); ++#endif ++} ++ ++static bool duplicate_mode(DisplayModePtr modes, DisplayModePtr m) ++{ ++ if (m == NULL) ++ return false; ++ ++ while (modes) { ++ if (xf86ModesEqual(modes, m)) ++ return true; ++ ++ modes = modes->next; ++ } ++ ++ return false; ++} ++ ++static struct pixel_count { ++ int16_t width, height; ++} common_16_9[] = { ++ { 640, 360 }, ++ { 720, 405 }, ++ { 864, 486 }, ++ { 960, 540 }, ++ { 1024, 576 }, ++ { 1280, 720 }, ++ { 1366, 768 }, ++ { 1600, 900 }, ++ { 1920, 1080 }, ++ { 2048, 1152 }, ++ { 2560, 1440 }, ++ { 2880, 1620 }, ++ { 3200, 1800 }, ++ { 3840, 2160 }, ++ { 4096, 2304 }, ++ { 5120, 2880 }, ++ { 7680, 4320 }, ++ { 15360, 8640 }, ++}, common_16_10[] = { ++ { 1280, 800 }, ++ { 1400, 900 }, ++ { 1680, 1050 }, ++ { 1920, 1200 }, ++ { 2560, 1600 }, ++}; ++ ++static DisplayModePtr ++default_modes(DisplayModePtr preferred) ++{ ++ DisplayModePtr modes; ++ int n; ++ ++#if XORG_VERSION_CURRENT >= XORG_VERSION_NUMERIC(1,6,99,900,0) ++ modes = xf86GetDefaultModes(); ++#else ++ modes = xf86GetDefaultModes(0, 0); ++#endif ++ ++ /* XXX O(n^2) mode list generation :( */ ++ ++#if XORG_VERSION_CURRENT >= XORG_VERSION_NUMERIC(1,4,99,901,0) ++ if (preferred) { ++ DisplayModePtr m; ++ ++ /* Add a half-resolution mode useful for large panels */ ++ m = xf86GTFMode(preferred->HDisplay/2, ++ preferred->VDisplay/2, ++ xf86ModeVRefresh(preferred), ++ FALSE, FALSE); ++ if (!duplicate_mode(modes, m)) ++ modes = xf86ModesAdd(modes, m); ++ else ++ free(m); ++ ++ if (preferred->VDisplay * 16 > preferred->HDisplay*9 - preferred->HDisplay/32 && ++ preferred->VDisplay * 16 < preferred->HDisplay*9 + preferred->HDisplay/32) { ++ DBG(("Adding 16:9 modes -- %d < %d > %d\n", ++ preferred->HDisplay*9 - preferred->HDisplay/32, ++ preferred->VDisplay * 16, ++ preferred->HDisplay*9 + preferred->HDisplay/32)); ++ for (n = 0; n < ARRAY_SIZE(common_16_9); n++) { ++ if (preferred->HDisplay <= common_16_9[n].width || ++ preferred->VDisplay <= common_16_9[n].height) ++ break; ++ ++ m = xf86GTFMode(common_16_9[n].width, ++ common_16_9[n].height, ++ xf86ModeVRefresh(preferred), ++ FALSE, FALSE); ++ if (!duplicate_mode(modes, m)) ++ modes = xf86ModesAdd(modes, m); ++ else ++ free(m); ++ } ++ } ++ ++ if (preferred->VDisplay * 16 > preferred->HDisplay*10 - preferred->HDisplay/32 && ++ preferred->VDisplay * 16 < preferred->HDisplay*10 + preferred->HDisplay/32) { ++ DBG(("Adding 16:10 modes -- %d < %d > %d\n", ++ preferred->HDisplay*10 - preferred->HDisplay/32, ++ preferred->VDisplay * 16, ++ preferred->HDisplay*10 + preferred->HDisplay/32)); ++ for (n = 0; n < ARRAY_SIZE(common_16_10); n++) { ++ if (preferred->HDisplay <= common_16_10[n].width || ++ preferred->VDisplay <= common_16_10[n].height) ++ break; ++ ++ m = xf86GTFMode(common_16_10[n].width, ++ common_16_10[n].height, ++ xf86ModeVRefresh(preferred), ++ FALSE, FALSE); ++ if (!duplicate_mode(modes, m)) ++ modes = xf86ModesAdd(modes, m); ++ else ++ free(m); ++ } ++ } + } +-} +- +-static DisplayModePtr +-default_modes(void) +-{ +-#if XORG_VERSION_CURRENT >= XORG_VERSION_NUMERIC(1,6,99,900,0) +- return xf86GetDefaultModes(); +-#else +- return xf86GetDefaultModes(0, 0); + #endif ++ ++ return modes; + } + + static DisplayModePtr +-sna_output_panel_edid(xf86OutputPtr output, DisplayModePtr modes) ++sna_output_add_default_modes(xf86OutputPtr output, DisplayModePtr modes) + { + xf86MonPtr mon = output->MonInfo; + DisplayModePtr i, m, preferred = NULL; +- int max_x = 0, max_y = 0; ++ int max_x = 0, max_y = 0, max_clock = 0; + float max_vrefresh = 0.0; + + if (mon && GTF_SUPPORTED(mon->features.msc)) +@@ -3009,16 +4056,17 @@ sna_output_panel_edid(xf86OutputPtr output, DisplayModePtr modes) + preferred = m; + max_x = max(max_x, m->HDisplay); + max_y = max(max_y, m->VDisplay); ++ max_clock = max(max_clock, m->Clock); + max_vrefresh = max(max_vrefresh, xf86ModeVRefresh(m)); + } +- +- max_vrefresh = max(max_vrefresh, 60.0); + max_vrefresh *= (1 + SYNC_TOLERANCE); + +- m = default_modes(); ++ m = default_modes(preferred); + xf86ValidateModesSize(output->scrn, m, max_x, max_y, 0); + + for (i = m; i; i = i->next) { ++ if (i->Clock > max_clock) ++ i->status = MODE_CLOCK_HIGH; + if (xf86ModeVRefresh(i) > max_vrefresh) + i->status = MODE_VSYNC; + if (preferred && +@@ -3034,28 +4082,47 @@ sna_output_panel_edid(xf86OutputPtr output, DisplayModePtr modes) + } + + static DisplayModePtr ++sna_output_override_edid(xf86OutputPtr output) ++{ ++ struct sna_output *sna_output = output->driver_private; ++ ++ if (sna_output->fake_edid_mon == NULL) ++ return NULL; ++ ++ xf86OutputSetEDID(output, sna_output->fake_edid_mon); ++ return xf86DDCGetModes(output->scrn->scrnIndex, ++ sna_output->fake_edid_mon); ++} ++ ++static DisplayModePtr + sna_output_get_modes(xf86OutputPtr output) + { + struct sna_output *sna_output = output->driver_private; +- DisplayModePtr Modes = NULL, current = NULL; ++ DisplayModePtr Modes, current; + int i; + + DBG(("%s(%s:%d)\n", __FUNCTION__, output->name, sna_output->id)); + assert(sna_output->id); + ++ Modes = sna_output_override_edid(output); ++ if (Modes) ++ return Modes; ++ + sna_output_attach_edid(output); ++ sna_output_attach_tile(output); + +- if (output->crtc) { ++ current = NULL; ++ if (output->crtc && !sna_output->hotplug_count) { + struct drm_mode_crtc mode; + + VG_CLEAR(mode); + assert(to_sna_crtc(output->crtc)); +- mode.crtc_id = to_sna_crtc(output->crtc)->id; ++ mode.crtc_id = sna_crtc_id(output->crtc); + + if (drmIoctl(to_sna(output->scrn)->kgem.fd, DRM_IOCTL_MODE_GETCRTC, &mode) == 0) { + DBG(("%s: CRTC:%d, pipe=%d: has mode?=%d\n", __FUNCTION__, +- to_sna_crtc(output->crtc)->id, +- to_sna_crtc(output->crtc)->pipe, ++ sna_crtc_id(output->crtc), ++ sna_crtc_pipe(output->crtc), + mode.mode_valid && mode.mode.clock)); + + if (mode.mode_valid && mode.mode.clock) { +@@ -3117,7 +4184,7 @@ sna_output_get_modes(xf86OutputPtr output) + } + + if (sna_output->add_default_modes) +- Modes = sna_output_panel_edid(output, Modes); ++ Modes = sna_output_add_default_modes(output, Modes); + + return Modes; + } +@@ -3132,6 +4199,8 @@ sna_output_destroy(xf86OutputPtr output) + return; + + free(sna_output->edid_raw); ++ free(sna_output->fake_edid_raw); ++ + for (i = 0; i < sna_output->num_props; i++) { + if (sna_output->props[i].kprop == NULL) + continue; +@@ -3155,7 +4224,7 @@ sna_output_destroy(xf86OutputPtr output) + } + + static void +-sna_output_dpms(xf86OutputPtr output, int dpms) ++__sna_output_dpms(xf86OutputPtr output, int dpms, int fixup) + { + struct sna *sna = to_sna(output->scrn); + struct sna_output *sna_output = output->driver_private; +@@ -3182,8 +4251,9 @@ sna_output_dpms(xf86OutputPtr output, int dpms) + if (sna_output->backlight.iface && dpms != DPMSModeOn) { + if (old_dpms == DPMSModeOn) { + sna_output->backlight_active_level = sna_output_backlight_get(output); +- DBG(("%s: saving current backlight %d\n", +- __FUNCTION__, sna_output->backlight_active_level)); ++ DBG(("%s(%s:%d): saving current backlight %d\n", ++ __FUNCTION__, output->name, sna_output->id, ++ sna_output->backlight_active_level)); + } + sna_output->dpms_mode = dpms; + sna_output_backlight_off(sna_output); +@@ -3193,18 +4263,31 @@ sna_output_dpms(xf86OutputPtr output, int dpms) + drmModeConnectorSetProperty(sna->kgem.fd, + sna_output->id, + sna_output->dpms_id, +- dpms)) +- dpms = old_dpms; ++ dpms)) { ++ DBG(("%s(%s:%d): failed to set DPMS to %d (fixup? %d)\n", ++ __FUNCTION__, output->name, sna_output->id, dpms, fixup)); ++ if (fixup && dpms != DPMSModeOn) { ++ sna_crtc_disable(output->crtc, false); ++ return; ++ } ++ } + + if (sna_output->backlight.iface && dpms == DPMSModeOn) { +- DBG(("%s: restoring previous backlight %d\n", +- __FUNCTION__, sna_output->backlight_active_level)); ++ DBG(("%s(%d:%d: restoring previous backlight %d\n", ++ __FUNCTION__, output->name, sna_output->id, ++ sna_output->backlight_active_level)); + sna_output_backlight_on(sna_output); + } + + sna_output->dpms_mode = dpms; + } + ++static void ++sna_output_dpms(xf86OutputPtr output, int dpms) ++{ ++ __sna_output_dpms(output, dpms, true); ++} ++ + static bool + sna_property_ignore(drmModePropertyPtr prop) + { +@@ -3239,14 +4322,14 @@ sna_output_create_ranged_atom(xf86OutputPtr output, Atom *atom, + err = RRConfigureOutputProperty(output->randr_output, *atom, FALSE, + TRUE, immutable, 2, atom_range); + if (err != 0) +- xf86DrvMsg(output->scrn->scrnIndex, X_ERROR, ++ xf86DrvMsg(output->scrn->scrnIndex, X_WARNING, + "RRConfigureOutputProperty error, %d\n", err); + + err = RRChangeOutputProperty(output->randr_output, *atom, XA_INTEGER, + 32, PropModeReplace, 1, &value, + FALSE, FALSE); + if (err != 0) +- xf86DrvMsg(output->scrn->scrnIndex, X_ERROR, ++ xf86DrvMsg(output->scrn->scrnIndex, X_WARNING, + "RRChangeOutputProperty error, %d\n", err); + } + +@@ -3303,7 +4386,7 @@ sna_output_create_resources(xf86OutputPtr output) + p->kprop->flags & DRM_MODE_PROP_IMMUTABLE ? TRUE : FALSE, + p->num_atoms - 1, (INT32 *)&p->atoms[1]); + if (err != 0) { +- xf86DrvMsg(output->scrn->scrnIndex, X_ERROR, ++ xf86DrvMsg(output->scrn->scrnIndex, X_WARNING, + "RRConfigureOutputProperty error, %d\n", err); + } + +@@ -3315,7 +4398,7 @@ sna_output_create_resources(xf86OutputPtr output) + XA_ATOM, 32, PropModeReplace, 1, &p->atoms[j+1], + FALSE, FALSE); + if (err != 0) { +- xf86DrvMsg(output->scrn->scrnIndex, X_ERROR, ++ xf86DrvMsg(output->scrn->scrnIndex, X_WARNING, + "RRChangeOutputProperty error, %d\n", err); + } + } +@@ -3385,18 +4468,19 @@ sna_output_set_property(xf86OutputPtr output, Atom property, + if (value->type != XA_INTEGER || value->format != 32 || + value->size != 1) + return FALSE; +- val = *(uint32_t *)value->data; + ++ val = *(uint32_t *)value->data; + drmModeConnectorSetProperty(sna->kgem.fd, sna_output->id, + p->kprop->prop_id, (uint64_t)val); + return TRUE; + } else if (p->kprop->flags & DRM_MODE_PROP_ENUM) { +- Atom atom; +- const char *name; +- int j; ++ Atom atom; ++ const char *name; ++ int j; + + if (value->type != XA_ATOM || value->format != 32 || value->size != 1) + return FALSE; ++ + memcpy(&atom, value->data, 4); + name = NameForAtom(atom); + if (name == NULL) +@@ -3425,7 +4509,7 @@ static Bool + sna_output_get_property(xf86OutputPtr output, Atom property) + { + struct sna_output *sna_output = output->driver_private; +- int err; ++ int err, i, j; + + if (property == backlight_atom || property == backlight_deprecated_atom) { + INT32 val; +@@ -3449,7 +4533,7 @@ sna_output_get_property(xf86OutputPtr output, Atom property) + XA_INTEGER, 32, PropModeReplace, 1, &val, + FALSE, FALSE); + if (err != 0) { +- xf86DrvMsg(output->scrn->scrnIndex, X_ERROR, ++ xf86DrvMsg(output->scrn->scrnIndex, X_WARNING, + "RRChangeOutputProperty error, %d\n", err); + return FALSE; + } +@@ -3457,6 +4541,40 @@ sna_output_get_property(xf86OutputPtr output, Atom property) + return TRUE; + } + ++ for (i = 0; i < sna_output->num_props; i++) { ++ struct sna_property *p = &sna_output->props[i]; ++ ++ if (p->atoms == NULL || p->atoms[0] != property) ++ continue; ++ ++ if (sna_output->update_properties && output->scrn->vtSema) ++ update_properties(to_sna(output->scrn), sna_output); ++ ++ err = 0; ++ if (p->kprop->flags & DRM_MODE_PROP_RANGE) { ++ err = RRChangeOutputProperty(output->randr_output, ++ property, XA_INTEGER, 32, ++ PropModeReplace, 1, ++ &sna_output->prop_values[i], ++ FALSE, FALSE); ++ } else if (p->kprop->flags & DRM_MODE_PROP_ENUM) { ++ for (j = 0; j < p->kprop->count_enums; j++) { ++ if (p->kprop->enums[j].value == sna_output->prop_values[i]) ++ break; ++ } ++ err = RRChangeOutputProperty(output->randr_output, ++ property, XA_ATOM, 32, ++ PropModeReplace, 1, ++ &p->atoms[j+1], ++ FALSE, FALSE); ++ } ++ ++ if (err != 0) ++ xf86DrvMsg(output->scrn->scrnIndex, X_WARNING, ++ "RRChangeOutputProperty error, %d\n", err); ++ return TRUE; ++ } ++ + return FALSE; + } + +@@ -3500,47 +4618,11 @@ static const char * const output_names[] = { + /* DRM_MODE_CONNECTOR_TV */ "TV", + /* DRM_MODE_CONNECTOR_eDP */ "eDP", + /* DRM_MODE_CONNECTOR_VIRTUAL */ "Virtual", +- /* DRM_MODE_CONNECTOR_DSI */ "DSI" ++ /* DRM_MODE_CONNECTOR_DSI */ "DSI", ++ /* DRM_MODE_CONNECTOR_DPI */ "DPI" + }; + + static bool +-sna_zaphod_match(const char *s, const char *output) +-{ +- char t[20]; +- unsigned int i = 0; +- +- do { +- /* match any outputs in a comma list, stopping at whitespace */ +- switch (*s) { +- case '\0': +- t[i] = '\0'; +- return strcmp(t, output) == 0; +- +- case ',': +- t[i] ='\0'; +- if (strcmp(t, output) == 0) +- return TRUE; +- i = 0; +- break; +- +- case ' ': +- case '\t': +- case '\n': +- case '\r': +- break; +- +- default: +- t[i++] = *s; +- break; +- } +- +- s++; +- } while (i < sizeof(t)); +- +- return false; +-} +- +-static bool + output_ignored(ScrnInfoPtr scrn, const char *name) + { + char monitor_name[64]; +@@ -3572,14 +4654,21 @@ gather_encoders(struct sna *sna, uint32_t id, int count, + struct drm_mode_get_encoder enc; + uint32_t *ids = NULL; + ++ DBG(("%s(%d): expected count=%d\n", __FUNCTION__, id, count)); ++ + VG_CLEAR(compat_conn); ++ VG_CLEAR(enc); + memset(out, 0, sizeof(*out)); + + do { +- free(ids); +- ids = malloc(sizeof(*ids) * count); +- if (ids == 0) ++ uint32_t *nids; ++ ++ nids = realloc(ids, sizeof(*ids) * count); ++ if (nids == NULL) { ++ free(ids); + return false; ++ } ++ ids = nids; + + compat_conn.conn.connector_id = id; + compat_conn.conn.count_props = 0; +@@ -3593,12 +4682,14 @@ gather_encoders(struct sna *sna, uint32_t id, int count, + compat_conn.conn.count_encoders = count = 0; + } + ++ VG(VALGRIND_MAKE_MEM_DEFINED(ids, sizeof(uint32_t)*compat_conn.conn.count_encoders)); + if (count == compat_conn.conn.count_encoders) + break; + + count = compat_conn.conn.count_encoders; + } while (1); + ++ DBG(("%s(%d): gathering %d encoders\n", __FUNCTION__, id, count)); + for (count = 0; count < compat_conn.conn.count_encoders; count++) { + enc.encoder_id = ids[count]; + if (drmIoctl(sna->kgem.fd, DRM_IOCTL_MODE_GETENCODER, &enc)) { +@@ -3606,6 +4697,8 @@ gather_encoders(struct sna *sna, uint32_t id, int count, + count = 0; + break; + } ++ DBG(("%s(%d): encoder=%d, possible_crtcs=%x, possible_clones=%x\n", ++ __FUNCTION__, id, enc.encoder_id, enc.possible_crtcs, enc.possible_clones)); + out->possible_crtcs |= enc.possible_crtcs; + out->possible_clones |= enc.possible_clones; + +@@ -3731,6 +4824,116 @@ static int name_from_path(struct sna *sna, + return 0; + } + ++static char *fake_edid_name(xf86OutputPtr output) ++{ ++ struct sna *sna = to_sna(output->scrn); ++ const char *str, *colon; ++ ++#if XORG_VERSION_CURRENT >= XORG_VERSION_NUMERIC(1,7,99,901,0) ++ str = xf86GetOptValString(sna->Options, OPTION_EDID); ++#else ++ str = NULL; ++#endif ++ if (str == NULL) ++ return NULL; ++ ++ do { ++ colon = strchr(str, ':'); ++ if (colon == NULL) ++ return NULL; ++ ++ if (strncmp(str, output->name, colon-str) == 0 && ++ output->name[colon-str] == '\0') { ++ char *path; ++ int len; ++ ++ str = colon + 1; ++ colon = strchr(str, ','); ++ if (colon) ++ len = colon - str; ++ else ++ len = strlen(str); ++ ++ path = malloc(len + 1); ++ if (path == NULL) ++ return NULL; ++ ++ memcpy(path, str, len); ++ path[len] = '\0'; ++ return path; ++ } ++ ++ str = strchr(colon + 1, ','); ++ if (str == NULL) ++ return NULL; ++ ++ str++; ++ } while (1); ++} ++ ++static void ++sna_output_load_fake_edid(xf86OutputPtr output) ++{ ++ struct sna_output *sna_output = output->driver_private; ++ const char *filename; ++ FILE *file; ++ void *raw; ++ int size; ++ xf86MonPtr mon; ++ ++ filename = fake_edid_name(output); ++ if (filename == NULL) ++ return; ++ ++ file = fopen(filename, "rb"); ++ if (file == NULL) ++ goto err; ++ ++ fseek(file, 0, SEEK_END); ++ size = ftell(file); ++ if (size % 128) { ++ fclose(file); ++ goto err; ++ } ++ ++ raw = malloc(size); ++ if (raw == NULL) { ++ fclose(file); ++ free(raw); ++ goto err; ++ } ++ ++ fseek(file, 0, SEEK_SET); ++ if (fread(raw, size, 1, file) != 1) { ++ fclose(file); ++ free(raw); ++ goto err; ++ } ++ fclose(file); ++ ++ mon = xf86InterpretEDID(output->scrn->scrnIndex, raw); ++ if (mon == NULL) { ++ free(raw); ++ goto err; ++ } ++ ++ if (mon && size > 128) ++ mon->flags |= MONITOR_EDID_COMPLETE_RAWDATA; ++ ++ sna_output->fake_edid_mon = mon; ++ sna_output->fake_edid_raw = raw; ++ ++ xf86DrvMsg(output->scrn->scrnIndex, X_CONFIG, ++ "Loading EDID from \"%s\" for output %s\n", ++ filename, output->name); ++ return; ++ ++err: ++ xf86DrvMsg(output->scrn->scrnIndex, X_ERROR, ++ "Could not read EDID file \"%s\" for output %s\n", ++ filename, output->name); ++} ++ + static int + sna_output_add(struct sna *sna, unsigned id, unsigned serial) + { +@@ -3765,6 +4968,7 @@ sna_output_add(struct sna *sna, unsigned id, unsigned serial) + return -1; + } + assert(compat_conn.conn.connector_id == id); ++ DBG(("%s(%d): has %d associated encoders\n", __FUNCTION__, id, compat_conn.conn.count_encoders)); + + if (compat_conn.conn.connector_type < ARRAY_SIZE(output_names)) + output_name = output_names[compat_conn.conn.connector_type]; +@@ -3813,34 +5017,43 @@ sna_output_add(struct sna *sna, unsigned id, unsigned serial) + } + + if (is_zaphod(scrn)) { +- const char *str; ++ unsigned zaphod_crtcs; + +- str = xf86GetOptValString(sna->Options, OPTION_ZAPHOD); +- if (str && !sna_zaphod_match(str, name)) { +- DBG(("%s: zaphod mismatch, want %s, have %s\n", __FUNCTION__, str, name)); ++ if (!sna_zaphod_match(sna, name)) { ++ DBG(("%s: zaphod mismatch, want %s, have %s\n", ++ __FUNCTION__, ++ xf86GetOptValString(sna->Options, OPTION_ZAPHOD) ?: "???", ++ name)); + return 0; + } + +- if ((possible_crtcs & (1 << scrn->confScreen->device->screen)) == 0) { +- if (str) { +- xf86DrvMsg(scrn->scrnIndex, X_ERROR, +- "%s is an invalid output for screen (pipe) %d\n", +- name, scrn->confScreen->device->screen); +- return -1; +- } else +- return 0; ++ zaphod_crtcs = get_zaphod_crtcs(sna); ++ possible_crtcs &= zaphod_crtcs; ++ if (possible_crtcs == 0) { ++ xf86DrvMsg(scrn->scrnIndex, X_ERROR, ++ "%s is an invalid output for screen %d\n", ++ name, scrn->confScreen->device->screen); ++ return -1; + } + +- possible_crtcs = 1; ++ possible_crtcs >>= ffs(zaphod_crtcs) - 1; + } + + sna_output = calloc(sizeof(struct sna_output), 1); + if (!sna_output) + return -1; + ++ sna_output->connector_type = compat_conn.conn.connector_type; ++ sna_output->connector_type_id = compat_conn.conn.connector_type_id; + sna_output->num_props = compat_conn.conn.count_props; + sna_output->prop_ids = malloc(sizeof(uint32_t)*compat_conn.conn.count_props); + sna_output->prop_values = malloc(sizeof(uint64_t)*compat_conn.conn.count_props); ++ if (sna_output->prop_ids == NULL || sna_output->prop_values == NULL) { ++ free(sna_output->prop_ids); ++ free(sna_output->prop_values); ++ free(sna_output); ++ return -1; ++ } + + compat_conn.conn.count_encoders = 0; + +@@ -3865,16 +5078,16 @@ sna_output_add(struct sna *sna, unsigned id, unsigned serial) + /* Construct name from topology, and recheck if output is acceptable */ + path = name_from_path(sna, sna_output, name); + if (path) { +- const char *str; +- + if (output_ignored(scrn, name)) { + len = 0; + goto skip; + } + +- str = xf86GetOptValString(sna->Options, OPTION_ZAPHOD); +- if (str && !sna_zaphod_match(str, name)) { +- DBG(("%s: zaphod mismatch, want %s, have %s\n", __FUNCTION__, str, name)); ++ if (is_zaphod(scrn) && !sna_zaphod_match(sna, name)) { ++ DBG(("%s: zaphod mismatch, want %s, have %s\n", ++ __FUNCTION__, ++ xf86GetOptValString(sna->Options, OPTION_ZAPHOD) ?: "???", ++ name)); + len = 0; + goto skip; + } +@@ -3889,7 +5102,6 @@ sna_output_add(struct sna *sna, unsigned id, unsigned serial) + if (strcmp(output->name, name) == 0) { + assert(output->scrn == scrn); + assert(output->funcs == &sna_output_funcs); +- assert(to_sna_output(output)->id == 0); + sna_output_destroy(output); + goto reset; + } +@@ -3935,6 +5147,8 @@ reset: + sna_output->id = compat_conn.conn.connector_id; + sna_output->is_panel = is_panel(compat_conn.conn.connector_type); + sna_output->edid_idx = find_property(sna, sna_output, "EDID"); ++ sna_output->link_status_idx = ++ find_property(sna, sna_output, "link-status"); + if (find_property(sna, sna_output, "scaling mode") != -1) + sna_output->add_default_modes = + xf86ReturnOptValBool(output->options, OPTION_DEFAULT_MODES, TRUE); +@@ -3945,10 +5159,8 @@ reset: + sna_output->dpms_mode = sna_output->prop_values[i]; + DBG(("%s: found 'DPMS' (idx=%d, id=%d), initial value=%d\n", + __FUNCTION__, i, sna_output->dpms_id, sna_output->dpms_mode)); +- } else { +- sna_output->dpms_id = -1; ++ } else + sna_output->dpms_mode = DPMSModeOff; +- } + + sna_output->possible_encoders = possible_encoders; + sna_output->attached_encoders = attached_encoders; +@@ -3963,12 +5175,13 @@ reset: + sna_output->base = output; + + backlight_init(&sna_output->backlight); +- if (sna_output->is_panel) +- sna_output_backlight_init(output); ++ sna_output_backlight_init(output); + + output->possible_crtcs = possible_crtcs & count_to_mask(sna->mode.num_real_crtc); + output->interlaceAllowed = TRUE; + ++ sna_output_load_fake_edid(output); ++ + if (serial) { + if (output->randr_output == NULL) { + output->randr_output = RROutputCreate(xf86ScrnToScreen(scrn), name, len, output); +@@ -3976,6 +5189,7 @@ reset: + goto cleanup; + } + ++ RROutputChanged(output->randr_output, TRUE); + sna_output_create_resources(output); + RRPostPendingProperties(output->randr_output); + +@@ -4009,38 +5223,6 @@ skip: + return len; + } + +-static void sna_output_del(xf86OutputPtr output) +-{ +- ScrnInfoPtr scrn = output->scrn; +- xf86CrtcConfigPtr config = XF86_CRTC_CONFIG_PTR(scrn); +- int i; +- +- DBG(("%s(%s)\n", __FUNCTION__, output->name)); +- assert(to_sna_output(output)); +- +- RROutputDestroy(output->randr_output); +- sna_output_destroy(output); +- +- while (output->probed_modes) +- xf86DeleteMode(&output->probed_modes, output->probed_modes); +- +- free(output); +- +- for (i = 0; i < config->num_output; i++) +- if (config->output[i] == output) +- break; +- assert(i < to_sna(scrn)->mode.num_real_output); +- DBG(("%s: removing output #%d of %d\n", +- __FUNCTION__, i, to_sna(scrn)->mode.num_real_output)); +- +- for (; i < config->num_output; i++) { +- config->output[i] = config->output[i+1]; +- config->output[i]->possible_clones >>= 1; +- } +- config->num_output--; +- to_sna(scrn)->mode.num_real_output--; +-} +- + static int output_rank(const void *A, const void *B) + { + const xf86OutputPtr *a = A; +@@ -4058,6 +5240,7 @@ static void sort_config_outputs(struct sna *sna) + { + xf86CrtcConfigPtr config = XF86_CRTC_CONFIG_PTR(sna->scrn); + qsort(config->output, sna->mode.num_real_output, sizeof(*config->output), output_rank); ++ config->compat_output = 0; /* make sure it is a sane value */ + sna_mode_compute_possible_outputs(sna); + } + +@@ -4080,11 +5263,15 @@ static bool disable_unused_crtc(struct sna *sna) + bool update = false; + int o, c; + ++ DBG(("%s\n", __FUNCTION__)); ++ + for (c = 0; c < sna->mode.num_real_crtc; c++) { + xf86CrtcPtr crtc = config->crtc[c]; + +- if (!crtc->enabled) ++ if (!crtc->enabled) { ++ sna_crtc_disable(crtc, false); + continue; ++ } + + for (o = 0; o < sna->mode.num_real_output; o++) { + xf86OutputPtr output = config->output[o]; +@@ -4094,7 +5281,7 @@ static bool disable_unused_crtc(struct sna *sna) + + if (o == sna->mode.num_real_output) { + DBG(("%s: CRTC:%d was enabled with no outputs\n", +- __FUNCTION__, to_sna_crtc(crtc)->id)); ++ __FUNCTION__, sna_crtc_id(crtc))); + crtc->enabled = false; + update = true; + } +@@ -4108,17 +5295,145 @@ static bool disable_unused_crtc(struct sna *sna) + return update; + } + +-void sna_mode_discover(struct sna *sna) ++bool sna_mode_find_hotplug_connector(struct sna *sna, unsigned id) ++{ ++ xf86CrtcConfigPtr config = XF86_CRTC_CONFIG_PTR(sna->scrn); ++ int i; ++ ++ for (i = 0; i < sna->mode.num_real_output; i++) { ++ struct sna_output *output = to_sna_output(config->output[i]); ++ if (output->id == id) { ++ output->reprobe = true; ++ return true; ++ } ++ } ++ ++ return false; ++} ++ ++static bool ++output_retrain_link(struct sna *sna, struct sna_output *output) ++{ ++ struct sna_crtc *crtc = to_sna_crtc(output->base->crtc); ++ int crtc_x = crtc->offset & 0xffff; ++ int crtc_y = crtc->offset >> 16; ++ ++ return sna_crtc_flip(sna, crtc, crtc->bo, crtc_x, crtc_y); ++} ++ ++static bool ++output_check_link(struct sna *sna, struct sna_output *output) ++{ ++ uint64_t link_status; ++ ++ if (!output->base->crtc) ++ return true; ++ ++ if (output->link_status_idx == -1) ++ return true; ++ ++#define LINK_STATUS_GOOD 0 ++ link_status = output->prop_values[output->link_status_idx]; ++ DBG(("%s: link_status=%d\n", __FUNCTION__, link_status)); ++ if (link_status == LINK_STATUS_GOOD) ++ return true; ++ ++ /* Perform a modeset as required for "link-status" = BAD */ ++ if (!output_retrain_link(sna, output)) ++ return false; ++ ++ /* Query the "link-status" again to confirm the modeset */ ++ update_properties(sna, output); ++ ++ link_status = output->prop_values[output->link_status_idx]; ++ DBG(("%s: link_status=%d after modeset\n", __FUNCTION__, link_status)); ++ return link_status == LINK_STATUS_GOOD; ++} ++ ++static bool ++output_check_status(struct sna *sna, struct sna_output *output) ++{ ++ union compat_mode_get_connector compat_conn; ++ struct drm_mode_modeinfo dummy; ++ struct drm_mode_get_blob blob; ++ xf86OutputStatus status; ++ char *edid; ++ ++ VG_CLEAR(compat_conn); ++ ++ compat_conn.conn.connection = -1; ++ compat_conn.conn.connector_id = output->id; ++ compat_conn.conn.count_modes = 1; /* skip detect */ ++ compat_conn.conn.modes_ptr = (uintptr_t)&dummy; ++ compat_conn.conn.count_encoders = 0; ++ compat_conn.conn.props_ptr = (uintptr_t)output->prop_ids; ++ compat_conn.conn.prop_values_ptr = (uintptr_t)output->prop_values; ++ compat_conn.conn.count_props = output->num_props; ++ ++ if (drmIoctl(sna->kgem.fd, ++ DRM_IOCTL_MODE_GETCONNECTOR, ++ &compat_conn.conn) == 0) ++ output->update_properties = false; ++ ++ if (!output_check_link(sna, output)) ++ return false; ++ ++ if (output->reprobe) ++ return false; ++ ++ switch (compat_conn.conn.connection) { ++ case DRM_MODE_CONNECTED: ++ status = XF86OutputStatusConnected; ++ break; ++ case DRM_MODE_DISCONNECTED: ++ status = XF86OutputStatusDisconnected; ++ break; ++ default: ++ case DRM_MODE_UNKNOWNCONNECTION: ++ status = XF86OutputStatusUnknown; ++ break; ++ } ++ if (output->status != status) ++ return false; ++ ++ if (status != XF86OutputStatusConnected) ++ return true; ++ ++ if (output->num_modes != compat_conn.conn.count_modes) ++ return false; ++ ++ if (output->edid_len == 0) ++ return false; ++ ++ edid = alloca(output->edid_len); ++ ++ VG_CLEAR(blob); ++ blob.blob_id = output->prop_values[output->edid_idx]; ++ blob.length = output->edid_len; ++ blob.data = (uintptr_t)edid; ++ if (drmIoctl(sna->kgem.fd, DRM_IOCTL_MODE_GETPROPBLOB, &blob)) ++ return false; ++ ++ if (blob.length != output->edid_len) ++ return false; ++ ++ return memcmp(edid, output->edid_raw, output->edid_len) == 0; ++} ++ ++void sna_mode_discover(struct sna *sna, bool tell) + { + ScreenPtr screen = xf86ScrnToScreen(sna->scrn); + xf86CrtcConfigPtr config = XF86_CRTC_CONFIG_PTR(sna->scrn); ++ bool force = sna->flags & SNA_REPROBE; + struct drm_mode_card_res res; +- uint32_t connectors[32]; ++ uint32_t connectors[32], now; + unsigned changed = 0; + unsigned serial; + int i, j; + + DBG(("%s()\n", __FUNCTION__)); ++ sna->flags &= ~SNA_REPROBE; ++ + VG_CLEAR(connectors); + + memset(&res, 0, sizeof(res)); +@@ -4128,10 +5443,11 @@ void sna_mode_discover(struct sna *sna) + if (drmIoctl(sna->kgem.fd, DRM_IOCTL_MODE_GETRESOURCES, &res)) + return; + +- DBG(("%s: now %d (was %d) connectors\n", __FUNCTION__, +- res.count_connectors, sna->mode.num_real_output)); ++ DBG(("%s: now %d (was %d) connectors, %d encoders, %d crtc\n", __FUNCTION__, ++ res.count_connectors, sna->mode.num_real_output, ++ res.count_encoders, res.count_crtcs)); + if (res.count_connectors > 32) +- return; ++ res.count_connectors = 32; + + assert(sna->mode.num_real_crtc == res.count_crtcs || is_zaphod(sna->scrn)); + assert(sna->mode.max_crtc_width == res.max_width); +@@ -4142,6 +5458,11 @@ void sna_mode_discover(struct sna *sna) + if (serial == 0) + serial = ++sna->mode.serial; + ++ if (force) { ++ changed = 4; ++ now = 0; ++ } else ++ now = GetTimeInMillis(); + for (i = 0; i < res.count_connectors; i++) { + DBG(("%s: connector[%d] = %d\n", __FUNCTION__, i, connectors[i])); + for (j = 0; j < sna->mode.num_real_output; j++) { +@@ -4161,32 +5482,42 @@ void sna_mode_discover(struct sna *sna) + + for (i = 0; i < sna->mode.num_real_output; i++) { + xf86OutputPtr output = config->output[i]; ++ struct sna_output *sna_output = to_sna_output(output); + +- if (to_sna_output(output)->id == 0) ++ if (sna_output->id == 0) + continue; + +- if (to_sna_output(output)->serial == serial) ++ if (sna_output->serial == serial) { ++ if (output_check_status(sna, sna_output)) { ++ DBG(("%s: output %s (id=%d), retained state\n", ++ __FUNCTION__, output->name, sna_output->id)); ++ sna_output->last_detect = now; ++ } else { ++ DBG(("%s: output %s (id=%d), changed state, reprobing\n", ++ __FUNCTION__, output->name, sna_output->id)); ++ sna_output->hotplug_count++; ++ sna_output->last_detect = 0; ++ changed |= 4; ++ } + continue; ++ } + + DBG(("%s: removing output %s (id=%d), serial=%u [now %u]\n", +- __FUNCTION__, output->name, to_sna_output(output)->id, +- to_sna_output(output)->serial, serial)); ++ __FUNCTION__, output->name, sna_output->id, ++ sna_output->serial, serial)); + + xf86DrvMsg(sna->scrn->scrnIndex, X_INFO, +- "%s output %s\n", +- sna->flags & SNA_REMOVE_OUTPUTS ? "Removed" : "Disabled", ++ "Disabled output %s\n", + output->name); +- if (sna->flags & SNA_REMOVE_OUTPUTS) { +- sna_output_del(output); +- i--; +- } else { +- to_sna_output(output)->id = 0; +- output->crtc = NULL; +- } ++ sna_output->id = 0; ++ sna_output->last_detect = 0; ++ output->crtc = NULL; ++ RROutputChanged(output->randr_output, TRUE); + changed |= 2; + } + +- if (changed) { ++ /* Have the list of available outputs been updated? */ ++ if (changed & 3) { + DBG(("%s: outputs changed, broadcasting\n", __FUNCTION__)); + + sna_mode_set_primary(sna); +@@ -4200,6 +5531,51 @@ void sna_mode_discover(struct sna *sna) + + xf86RandR12TellChanged(screen); + } ++ ++ /* If anything has changed, refresh the RandR information. ++ * Note this could recurse once from udevless RRGetInfo() probes, ++ * but only once. ++ */ ++ if (changed && tell) ++ RRGetInfo(screen, TRUE); ++} ++ ++/* Since we only probe the current mode on startup, we may not have the full ++ * list of modes available until the user explicitly requests them. Fake a ++ * hotplug event after a second after starting to fill in any missing modes. ++ */ ++static CARD32 sna_mode_coldplug(OsTimerPtr timer, CARD32 now, void *data) ++{ ++ struct sna *sna = data; ++ ScreenPtr screen = xf86ScrnToScreen(sna->scrn); ++ xf86CrtcConfigPtr config = XF86_CRTC_CONFIG_PTR(sna->scrn); ++ bool reprobe = false; ++ int i; ++ ++ DBG(("%s()\n", __FUNCTION__)); ++ ++ for (i = 0; i < sna->mode.num_real_output; i++) { ++ xf86OutputPtr output = config->output[i]; ++ struct sna_output *sna_output = to_sna_output(output); ++ ++ if (sna_output->id == 0) ++ continue; ++ if (sna_output->last_detect) ++ continue; ++ if (output->status == XF86OutputStatusDisconnected) ++ continue; ++ ++ DBG(("%s: output %s connected, needs reprobe\n", ++ __FUNCTION__, output->name)); ++ reprobe = true; ++ } ++ ++ if (reprobe) { ++ RRGetInfo(screen, TRUE); ++ RRTellChanged(screen); ++ } ++ free(timer); ++ return 0; + } + + static void copy_front(struct sna *sna, PixmapPtr old, PixmapPtr new) +@@ -4208,7 +5584,7 @@ static void copy_front(struct sna *sna, PixmapPtr old, PixmapPtr new) + + DBG(("%s\n", __FUNCTION__)); + +- if (wedged(sna)) ++ if (wedged(sna) || isGPU(sna->scrn)) + return; + + old_priv = sna_pixmap_force_to_gpu(old, MOVE_READ); +@@ -4220,12 +5596,19 @@ static void copy_front(struct sna *sna, PixmapPtr old, PixmapPtr new) + return; + + if (old_priv->clear) { +- (void)sna->render.fill_one(sna, new, new_priv->gpu_bo, +- old_priv->clear_color, +- 0, 0, +- new->drawable.width, +- new->drawable.height, +- GXcopy); ++ bool ok = false; ++ if (!wedged(sna)) ++ ok = sna->render.fill_one(sna, new, new_priv->gpu_bo, ++ old_priv->clear_color, ++ 0, 0, ++ new->drawable.width, ++ new->drawable.height, ++ GXcopy); ++ if (!ok) { ++ void *ptr = kgem_bo_map__gtt(&sna->kgem, new_priv->gpu_bo); ++ if (ptr) ++ memset(ptr, 0, new_priv->gpu_bo->pitch*new->drawable.height); ++ } + new_priv->clear = true; + new_priv->clear_color = old_priv->clear_color; + } else { +@@ -4281,11 +5664,18 @@ static void copy_front(struct sna *sna, PixmapPtr old, PixmapPtr new) + __FUNCTION__, box.x2, box.y2, sx, sy, dx, dy)); + + if (box.x2 != new->drawable.width || box.y2 != new->drawable.height) { +- (void)sna->render.fill_one(sna, new, new_priv->gpu_bo, 0, +- 0, 0, +- new->drawable.width, +- new->drawable.height, +- GXclear); ++ bool ok = false; ++ if (!wedged(sna)) ++ ok = sna->render.fill_one(sna, new, new_priv->gpu_bo, 0, ++ 0, 0, ++ new->drawable.width, ++ new->drawable.height, ++ GXclear); ++ if (!ok) { ++ void *ptr = kgem_bo_map__gtt(&sna->kgem, new_priv->gpu_bo); ++ if (ptr) ++ memset(ptr, 0, new_priv->gpu_bo->pitch*new->drawable.height); ++ } + } + (void)sna->render.copy_boxes(sna, GXcopy, + &old->drawable, old_priv->gpu_bo, sx, sy, +@@ -4302,7 +5692,7 @@ sna_mode_resize(ScrnInfoPtr scrn, int width, int height) + { + xf86CrtcConfigPtr config = XF86_CRTC_CONFIG_PTR(scrn); + struct sna *sna = to_sna(scrn); +- ScreenPtr screen = scrn->pScreen; ++ ScreenPtr screen = xf86ScrnToScreen(scrn); + PixmapPtr new_front; + int i; + +@@ -4337,9 +5727,20 @@ sna_mode_resize(ScrnInfoPtr scrn, int width, int height) + for (i = 0; i < sna->mode.num_real_crtc; i++) + sna_crtc_disable_shadow(sna, to_sna_crtc(config->crtc[i])); + assert(sna->mode.shadow_active == 0); ++ assert(!sna->mode.shadow_enabled); + assert(sna->mode.shadow_damage == NULL); + assert(sna->mode.shadow == NULL); + ++ /* Flush pending shadow updates */ ++ if (sna->mode.flip_active) { ++ DBG(("%s: waiting for %d outstanding TearFree flips\n", ++ __FUNCTION__, sna->mode.flip_active)); ++ while (sna->mode.flip_active && sna_mode_wait_for_event(sna)) ++ sna_mode_wakeup(sna); ++ } ++ ++ /* Cancel a pending [un]flip (as the pixmaps no longer match) */ ++ sna_present_cancel_flip(sna); + copy_front(sna, sna->front, new_front); + + screen->SetScreenPixmap(new_front); +@@ -4351,14 +5752,6 @@ sna_mode_resize(ScrnInfoPtr scrn, int width, int height) + scrn->virtualY = height; + scrn->displayWidth = width; + +- /* Flush pending shadow updates */ +- if (sna->mode.flip_active) { +- DBG(("%s: waiting for %d outstanding TearFree flips\n", +- __FUNCTION__, sna->mode.flip_active)); +- while (sna->mode.flip_active && sna_mode_wait_for_event(sna)) +- sna_mode_wakeup(sna); +- } +- + /* Only update the CRTCs if we are in control */ + if (!scrn->vtSema) + return TRUE; +@@ -4371,7 +5764,7 @@ sna_mode_resize(ScrnInfoPtr scrn, int width, int height) + continue; + + if (!__sna_crtc_set_mode(crtc)) +- sna_crtc_disable(crtc); ++ sna_crtc_disable(crtc, false); + } + + sna_mode_wakeup(sna); +@@ -4381,19 +5774,6 @@ sna_mode_resize(ScrnInfoPtr scrn, int width, int height) + } + + /* cursor handling */ +-struct sna_cursor { +- struct sna_cursor *next; +- uint32_t *image; +- Rotation rotation; +- int ref; +- int size; +- int last_width; +- int last_height; +- unsigned handle; +- unsigned serial; +- unsigned alloc; +-}; +- + static void + rotate_coord(Rotation rotation, int size, + int x_dst, int y_dst, +@@ -4429,36 +5809,6 @@ rotate_coord(Rotation rotation, int size, + *y_src = y_dst; + } + +-static void +-rotate_coord_back(Rotation rotation, int size, int *x, int *y) +-{ +- int t; +- +- if (rotation & RR_Reflect_X) +- *x = size - *x - 1; +- if (rotation & RR_Reflect_Y) +- *y = size - *y - 1; +- +- switch (rotation & 0xf) { +- case RR_Rotate_0: +- break; +- case RR_Rotate_90: +- t = *x; +- *x = *y; +- *y = size - t - 1; +- break; +- case RR_Rotate_180: +- *x = size - *x - 1; +- *y = size - *y - 1; +- break; +- case RR_Rotate_270: +- t = *x; +- *x = size - *y - 1; +- *y = t; +- break; +- } +-} +- + static struct sna_cursor *__sna_create_cursor(struct sna *sna, int size) + { + struct sna_cursor *c; +@@ -4519,6 +5869,17 @@ static uint32_t *get_cursor_argb(CursorPtr c) + #endif + } + ++static int __cursor_size(int width, int height) ++{ ++ int i, size; ++ ++ i = MAX(width, height); ++ for (size = 64; size < i; size <<= 1) ++ ; ++ ++ return size; ++} ++ + static struct sna_cursor *__sna_get_cursor(struct sna *sna, xf86CrtcPtr crtc) + { + struct sna_cursor *cursor; +@@ -4526,6 +5887,7 @@ static struct sna_cursor *__sna_get_cursor(struct sna *sna, xf86CrtcPtr crtc) + const uint32_t *argb; + uint32_t *image; + int width, height, pitch, size, x, y; ++ bool transformed; + Rotation rotation; + + assert(sna->cursor.ref); +@@ -4537,8 +5899,8 @@ static struct sna_cursor *__sna_get_cursor(struct sna *sna, xf86CrtcPtr crtc) + cursor ? cursor->serial : 0, + sna->cursor.serial)); + if (cursor && cursor->serial == sna->cursor.serial) { +- assert(cursor->size == sna->cursor.size); +- assert(cursor->rotation == crtc->transform_in_use ? crtc->rotation : RR_Rotate_0); ++ assert(cursor->size == sna->cursor.size || cursor->transformed); ++ assert(cursor->rotation == (!to_sna_crtc(crtc)->cursor_transform && crtc->transform_in_use) ? crtc->rotation : RR_Rotate_0); + assert(cursor->ref); + return cursor; + } +@@ -4550,22 +5912,81 @@ static struct sna_cursor *__sna_get_cursor(struct sna *sna, xf86CrtcPtr crtc) + sna->cursor.serial, + get_cursor_argb(sna->cursor.ref) != NULL)); + +- rotation = crtc->transform_in_use ? crtc->rotation : RR_Rotate_0; ++ transformed = to_sna_crtc(crtc)->cursor_transform; ++ rotation = (!transformed && crtc->transform_in_use) ? crtc->rotation : RR_Rotate_0; ++ ++ if (transformed) { ++ struct pixman_box16 box; ++ ++ box.x1 = box.y1 = 0; ++ box.x2 = sna->cursor.ref->bits->width; ++ box.y2 = sna->cursor.ref->bits->height; + +- if (sna->cursor.use_gtt) { /* Don't allow phys cursor sharing */ ++ pixman_f_transform_bounds(&crtc->f_crtc_to_framebuffer, &box); ++ size = __cursor_size(box.x2 - box.x1, box.y2 - box.y1); ++ __DBG(("%s: transformed cursor %dx%d -> %dx%d\n", ++ __FUNCTION__ , ++ sna->cursor.ref->bits->width, ++ sna->cursor.ref->bits->height, ++ box.x2 - box.x1, box.y2 - box.y1)); ++ } else ++ size = sna->cursor.size; ++ ++ if (crtc->transform_in_use) { ++ RRTransformPtr T = NULL; ++ struct pixman_vector v; ++ ++ if (crtc->transformPresent) { ++ T = &crtc->transform; ++ ++ /* Cancel any translation from this affine ++ * transformation. We just want to rotate and scale ++ * the cursor image. ++ */ ++ v.vector[0] = 0; ++ v.vector[1] = 0; ++ v.vector[2] = pixman_fixed_1; ++ pixman_transform_point(&crtc->transform.transform, &v); ++ } ++ ++ RRTransformCompute(0, 0, size, size, crtc->rotation, T, NULL, ++ &to_sna_crtc(crtc)->cursor_to_fb, ++ &to_sna_crtc(crtc)->fb_to_cursor); ++ if (T) ++ pixman_f_transform_translate( ++ &to_sna_crtc(crtc)->cursor_to_fb, ++ &to_sna_crtc(crtc)->fb_to_cursor, ++ -pixman_fixed_to_double(v.vector[0]), ++ -pixman_fixed_to_double(v.vector[1])); ++ ++ __DBG(("%s: cursor_to_fb [%f %f %f, %f %f %f, %f %f %f]\n", ++ __FUNCTION__, ++ to_sna_crtc(crtc)->cursor_to_fb.m[0][0], ++ to_sna_crtc(crtc)->cursor_to_fb.m[0][1], ++ to_sna_crtc(crtc)->cursor_to_fb.m[0][2], ++ to_sna_crtc(crtc)->cursor_to_fb.m[1][0], ++ to_sna_crtc(crtc)->cursor_to_fb.m[1][1], ++ to_sna_crtc(crtc)->cursor_to_fb.m[1][2], ++ to_sna_crtc(crtc)->cursor_to_fb.m[2][0], ++ to_sna_crtc(crtc)->cursor_to_fb.m[2][1], ++ to_sna_crtc(crtc)->cursor_to_fb.m[2][2])); ++ } ++ ++ /* Don't allow phys cursor sharing */ ++ if (sna->cursor.use_gtt && !transformed) { + for (cursor = sna->cursor.cursors; cursor; cursor = cursor->next) { +- if (cursor->serial == sna->cursor.serial && cursor->rotation == rotation) { ++ if (cursor->serial == sna->cursor.serial && ++ cursor->rotation == rotation && ++ !cursor->transformed) { + __DBG(("%s: reusing handle=%d, serial=%d, rotation=%d, size=%d\n", + __FUNCTION__, cursor->handle, cursor->serial, cursor->rotation, cursor->size)); + assert(cursor->size == sna->cursor.size); + return cursor; + } + } +- +- cursor = to_sna_crtc(crtc)->cursor; + } + +- size = sna->cursor.size; ++ cursor = to_sna_crtc(crtc)->cursor; + if (cursor && cursor->alloc < 4*size*size) + cursor = NULL; + +@@ -4577,7 +5998,7 @@ static struct sna_cursor *__sna_get_cursor(struct sna *sna, xf86CrtcPtr crtc) + } + } + +- width = sna->cursor.ref->bits->width; ++ width = sna->cursor.ref->bits->width; + height = sna->cursor.ref->bits->height; + source = sna->cursor.ref->bits->source; + mask = sna->cursor.ref->bits->mask; +@@ -4585,7 +6006,7 @@ static struct sna_cursor *__sna_get_cursor(struct sna *sna, xf86CrtcPtr crtc) + pitch = BitmapBytePad(width); + + image = cursor->image; +- if (image == NULL) { ++ if (image == NULL || transformed) { + image = sna->cursor.scratch; + cursor->last_width = cursor->last_height = size; + } +@@ -4616,6 +6037,21 @@ static struct sna_cursor *__sna_get_cursor(struct sna *sna, xf86CrtcPtr crtc) + mask += pitch; + source += pitch; + } ++ if (transformed) { ++ __DBG(("%s: Applying affine BLT to bitmap\n", __FUNCTION__)); ++ affine_blt(image, cursor->image, 32, ++ 0, 0, width, height, size * 4, ++ 0, 0, size, size, size * 4, ++ &to_sna_crtc(crtc)->cursor_to_fb); ++ image = cursor->image; ++ } ++ } else if (transformed) { ++ __DBG(("%s: Applying affine BLT to ARGB\n", __FUNCTION__)); ++ affine_blt(argb, cursor->image, 32, ++ 0, 0, width, height, width * 4, ++ 0, 0, size, size, size * 4, ++ &to_sna_crtc(crtc)->cursor_to_fb); ++ image = cursor->image; + } else + memcpy_blt(argb, image, 32, + width * 4, size * 4, +@@ -4662,9 +6098,16 @@ static struct sna_cursor *__sna_get_cursor(struct sna *sna, xf86CrtcPtr crtc) + + cursor->size = size; + cursor->rotation = rotation; ++ cursor->transformed = transformed; + cursor->serial = sna->cursor.serial; +- cursor->last_width = width; +- cursor->last_height = height; ++ if (transformed) { ++ /* mark the transformed rectangle as dirty, not input */ ++ cursor->last_width = size; ++ cursor->last_height = size; ++ } else { ++ cursor->last_width = width; ++ cursor->last_height = height; ++ } + return cursor; + } + +@@ -4674,40 +6117,55 @@ sna_realize_cursor(xf86CursorInfoPtr info, CursorPtr cursor) + return NULL; + } + +-#if XORG_VERSION_CURRENT >= XORG_VERSION_NUMERIC(1,12,99,901,0) +-static inline int sigio_block(void) +-{ +- OsBlockSIGIO(); +- return 0; +-} +-static inline void sigio_unblock(int was_blocked) ++static void enable_fb_access(ScrnInfoPtr scrn, int state) + { +- OsReleaseSIGIO(); +- (void)was_blocked; +-} ++ scrn->EnableDisableFBAccess( ++#ifdef XF86_HAS_SCRN_CONV ++ scrn, + #else +-#include <xf86_OSproc.h> +-static inline int sigio_block(void) ++ scrn->scrnIndex, ++#endif ++ state); ++} ++ ++ ++static void __restore_swcursor(ScrnInfoPtr scrn) + { +- return xf86BlockSIGIO(); ++ DBG(("%s: attempting to restore SW cursor\n", __FUNCTION__)); ++ enable_fb_access(scrn, FALSE); ++ enable_fb_access(scrn, TRUE); ++ ++ RemoveBlockAndWakeupHandlers((void *)__restore_swcursor, ++ (void *)NoopDDA, ++ scrn); + } +-static inline void sigio_unblock(int was_blocked) ++ ++static void restore_swcursor(struct sna *sna) + { +- xf86UnblockSIGIO(was_blocked); ++ sna->cursor.info->HideCursor(sna->scrn); ++ ++ /* XXX Force the cursor to be restored (avoiding recursion) */ ++ FreeCursor(sna->cursor.ref, None); ++ sna->cursor.ref = NULL; ++ ++ RegisterBlockAndWakeupHandlers((void *)__restore_swcursor, ++ (void *)NoopDDA, ++ sna->scrn); + } +-#endif + + static void + sna_show_cursors(ScrnInfoPtr scrn) + { + xf86CrtcConfigPtr xf86_config = XF86_CRTC_CONFIG_PTR(scrn); + struct sna *sna = to_sna(scrn); ++ struct kmsg kmsg; + int sigio, c; + + DBG(("%s: cursor?=%d\n", __FUNCTION__, sna->cursor.ref != NULL)); + if (sna->cursor.ref == NULL) + return; + ++ kmsg_open(&kmsg); + sigio = sigio_block(); + for (c = 0; c < sna->mode.num_real_crtc; c++) { + xf86CrtcPtr crtc = xf86_config->crtc[c]; +@@ -4721,7 +6179,7 @@ sna_show_cursors(ScrnInfoPtr scrn) + + if (!crtc->cursor_in_range) { + DBG(("%s: skipping cursor outside CRTC (pipe=%d)\n", +- __FUNCTION__, sna_crtc->pipe)); ++ __FUNCTION__, sna_crtc_pipe(crtc))); + continue; + } + +@@ -4729,20 +6187,21 @@ sna_show_cursors(ScrnInfoPtr scrn) + if (cursor == NULL || + (sna_crtc->cursor == cursor && sna_crtc->last_cursor_size == cursor->size)) { + DBG(("%s: skipping cursor already show on CRTC (pipe=%d)\n", +- __FUNCTION__, sna_crtc->pipe)); ++ __FUNCTION__, sna_crtc_pipe(crtc))); + continue; + } + + DBG(("%s: CRTC pipe=%d, handle->%d\n", __FUNCTION__, +- sna_crtc->pipe, cursor->handle)); ++ sna_crtc_pipe(crtc), cursor->handle)); + + VG_CLEAR(arg); + arg.flags = DRM_MODE_CURSOR_BO; +- arg.crtc_id = sna_crtc->id; ++ arg.crtc_id = __sna_crtc_id(sna_crtc); + arg.width = arg.height = cursor->size; + arg.handle = cursor->handle; + +- if (drmIoctl(sna->kgem.fd, DRM_IOCTL_MODE_CURSOR, &arg) == 0) { ++ if (!FAIL_CURSOR_IOCTL && ++ drmIoctl(sna->kgem.fd, DRM_IOCTL_MODE_CURSOR, &arg) == 0) { + if (sna_crtc->cursor) { + assert(sna_crtc->cursor->ref > 0); + sna_crtc->cursor->ref--; +@@ -4750,10 +6209,18 @@ sna_show_cursors(ScrnInfoPtr scrn) + cursor->ref++; + sna_crtc->cursor = cursor; + sna_crtc->last_cursor_size = cursor->size; ++ } else { ++ ERR(("%s: failed to show cursor on CRTC:%d [pipe=%d], disabling hwcursor: errno=%d\n", ++ __FUNCTION__, sna_crtc_id(crtc), sna_crtc_pipe(crtc), errno)); ++ sna->cursor.disable = true; + } + } + sigio_unblock(sigio); + sna->cursor.active = true; ++ kmsg_close(&kmsg, sna->cursor.disable); ++ ++ if (unlikely(sna->cursor.disable)) ++ restore_swcursor(sna); + } + + static void +@@ -4789,24 +6256,45 @@ static void + sna_crtc_disable_cursor(struct sna *sna, struct sna_crtc *crtc) + { + struct drm_mode_cursor arg; ++ int sigio; + + if (!crtc->cursor) + return; + +- DBG(("%s: CRTC:%d, handle=%d\n", __FUNCTION__, crtc->id, crtc->cursor->handle)); +- assert(crtc->cursor->ref); ++ sigio = sigio_block(); ++ if (crtc->cursor) { ++ DBG(("%s: CRTC:%d, handle=%d\n", __FUNCTION__, __sna_crtc_id(crtc), crtc->cursor->handle)); ++ assert(crtc->cursor->ref > 0); ++ crtc->cursor->ref--; ++ crtc->cursor = NULL; ++ crtc->last_cursor_size = 0; + +- VG_CLEAR(arg); +- arg.flags = DRM_MODE_CURSOR_BO; +- arg.crtc_id = crtc->id; +- arg.width = arg.height = 0; +- arg.handle = 0; ++ VG_CLEAR(arg); ++ arg.flags = DRM_MODE_CURSOR_BO; ++ arg.crtc_id = __sna_crtc_id(crtc); ++ arg.width = arg.height = 0; ++ arg.handle = 0; + +- (void)drmIoctl(sna->kgem.fd, DRM_IOCTL_MODE_CURSOR, &arg); +- assert(crtc->cursor->ref > 0); +- crtc->cursor->ref--; +- crtc->cursor = NULL; +- crtc->last_cursor_size = 0; ++ (void)drmIoctl(sna->kgem.fd, DRM_IOCTL_MODE_CURSOR, &arg); ++ } ++ sigio_unblock(sigio); ++} ++ ++static void ++sna_disable_cursors(ScrnInfoPtr scrn) ++{ ++ xf86CrtcConfigPtr xf86_config = XF86_CRTC_CONFIG_PTR(scrn); ++ struct sna *sna = to_sna(scrn); ++ int sigio, c; ++ ++ DBG(("%s\n", __FUNCTION__)); ++ ++ sigio = sigio_block(); ++ for (c = 0; c < sna->mode.num_real_crtc; c++) { ++ assert(to_sna_crtc(xf86_config->crtc[c])); ++ sna_crtc_disable_cursor(sna, to_sna_crtc(xf86_config->crtc[c])); ++ } ++ sigio_unblock(sigio); + } + + static void +@@ -4852,6 +6340,7 @@ sna_set_cursor_position(ScrnInfoPtr scrn, int x, int y) + { + xf86CrtcConfigPtr xf86_config = XF86_CRTC_CONFIG_PTR(scrn); + struct sna *sna = to_sna(scrn); ++ struct kmsg kmsg; + int sigio, c; + + __DBG(("%s(%d, %d), cursor? %d\n", __FUNCTION__, +@@ -4859,6 +6348,7 @@ sna_set_cursor_position(ScrnInfoPtr scrn, int x, int y) + if (sna->cursor.ref == NULL) + return; + ++ kmsg_open(&kmsg); + sigio = sigio_block(); + sna->cursor.last_x = x; + sna->cursor.last_y = y; +@@ -4876,27 +6366,37 @@ sna_set_cursor_position(ScrnInfoPtr scrn, int x, int y) + + VG_CLEAR(arg); + arg.flags = 0; +- arg.crtc_id = sna_crtc->id; ++ arg.crtc_id = __sna_crtc_id(sna_crtc); + arg.handle = 0; + + if (sna_crtc->bo == NULL) + goto disable; + ++ cursor = __sna_get_cursor(sna, crtc); ++ if (cursor == NULL) ++ cursor = sna_crtc->cursor; ++ if (cursor == NULL) { ++ __DBG(("%s: failed to grab cursor, disabling\n", __FUNCTION__)); ++ goto disable; ++ } ++ + if (crtc->transform_in_use) { + int xhot = sna->cursor.ref->bits->xhot; + int yhot = sna->cursor.ref->bits->yhot; +- struct pict_f_vector v; ++ struct pict_f_vector v, hot; + +- v.v[0] = (x + xhot) + 0.5; +- v.v[1] = (y + yhot) + 0.5; +- v.v[2] = 1; ++ v.v[0] = x + xhot + .5; ++ v.v[1] = y + yhot + .5; ++ v.v[2] = 1.; + pixman_f_transform_point(&crtc->f_framebuffer_to_crtc, &v); + +- rotate_coord_back(crtc->rotation, sna->cursor.size, &xhot, &yhot); ++ hot.v[0] = xhot; ++ hot.v[1] = yhot; ++ hot.v[2] = 1.; ++ pixman_f_transform_point(&sna_crtc->fb_to_cursor, &hot); + +- /* cursor will have 0.5 added to it already so floor is sufficent */ +- arg.x = floor(v.v[0]) - xhot; +- arg.y = floor(v.v[1]) - yhot; ++ arg.x = floor(v.v[0] - hot.v[0]); ++ arg.y = floor(v.v[1] - hot.v[1]); + } else { + arg.x = x - crtc->x; + arg.y = y - crtc->y; +@@ -4904,15 +6404,6 @@ sna_set_cursor_position(ScrnInfoPtr scrn, int x, int y) + + if (arg.x < crtc->mode.HDisplay && arg.x > -sna->cursor.size && + arg.y < crtc->mode.VDisplay && arg.y > -sna->cursor.size) { +- cursor = __sna_get_cursor(sna, crtc); +- if (cursor == NULL) +- cursor = sna_crtc->cursor; +- if (cursor == NULL) { +- __DBG(("%s: failed to grab cursor, disabling\n", +- __FUNCTION__)); +- goto disable; +- } +- + if (sna_crtc->cursor != cursor || sna_crtc->last_cursor_size != cursor->size) { + arg.flags |= DRM_MODE_CURSOR_BO; + arg.handle = cursor->handle; +@@ -4932,10 +6423,13 @@ disable: + } + + __DBG(("%s: CRTC:%d (%d, %d), handle=%d, flags=%x (old cursor handle=%d), move? %d, update handle? %d\n", +- __FUNCTION__, sna_crtc->id, arg.x, arg.y, arg.handle, arg.flags, sna_crtc->cursor ? sna_crtc->cursor->handle : 0, ++ __FUNCTION__, __sna_crtc_id(sna_crtc), arg.x, arg.y, arg.handle, arg.flags, sna_crtc->cursor ? sna_crtc->cursor->handle : 0, + arg.flags & DRM_MODE_CURSOR_MOVE, arg.flags & DRM_MODE_CURSOR_BO)); + +- if (arg.flags && ++ if (arg.flags == 0) ++ continue; ++ ++ if (!FAIL_CURSOR_IOCTL && + drmIoctl(sna->kgem.fd, DRM_IOCTL_MODE_CURSOR, &arg) == 0) { + if (arg.flags & DRM_MODE_CURSOR_BO) { + if (sna_crtc->cursor) { +@@ -4949,9 +6443,21 @@ disable: + } else + sna_crtc->last_cursor_size = 0; + } ++ } else { ++ ERR(("%s: failed to update cursor on CRTC:%d [pipe=%d], disabling hwcursor: errno=%d\n", ++ __FUNCTION__, sna_crtc_id(crtc), sna_crtc_pipe(crtc), errno)); ++ /* XXX How to force switch back to SW cursor? ++ * Right now we just want until the next cursor image ++ * change, which is fairly frequent. ++ */ ++ sna->cursor.disable = true; + } + } + sigio_unblock(sigio); ++ kmsg_close(&kmsg, sna->cursor.disable); ++ ++ if (unlikely(sna->cursor.disable)) ++ restore_swcursor(sna); + } + + #if XORG_VERSION_CURRENT >= XORG_VERSION_NUMERIC(1,15,99,902,2) +@@ -4978,17 +6484,6 @@ sna_load_cursor_image(ScrnInfoPtr scrn, unsigned char *src) + { + } + +-static int __cursor_size(CursorPtr cursor) +-{ +- int i, size; +- +- i = MAX(cursor->bits->width, cursor->bits->height); +- for (size = 64; size < i; size <<= 1) +- ; +- +- return size; +-} +- + static bool + sna_cursor_preallocate(struct sna *sna) + { +@@ -5006,6 +6501,50 @@ sna_cursor_preallocate(struct sna *sna) + return true; + } + ++static bool ++transformable_cursor(struct sna *sna, CursorPtr cursor) ++{ ++ xf86CrtcConfigPtr config = XF86_CRTC_CONFIG_PTR(sna->scrn); ++ int i; ++ ++ for (i = 0; i < sna->mode.num_real_crtc; i++) { ++ xf86CrtcPtr crtc = config->crtc[i]; ++ struct pixman_box16 box; ++ int size; ++ ++ if (!to_sna_crtc(crtc)->hwcursor) { ++ DBG(("%s: hwcursor disabled on CRTC:%d [pipe=%d]\n", ++ __FUNCTION__, sna_crtc_id(crtc), sna_crtc_pipe(crtc))); ++ return false; ++ } ++ ++ if (!sna->cursor.use_gtt || !sna->cursor.scratch) { ++ DBG(("%s: unable to use GTT curosor access [%d] or no scratch [%d]\n", ++ __FUNCTION__, sna->cursor.use_gtt, sna->cursor.scratch)); ++ return false; ++ } ++ ++ box.x1 = box.y1 = 0; ++ box.x2 = cursor->bits->width; ++ box.y2 = cursor->bits->height; ++ ++ if (!pixman_f_transform_bounds(&crtc->f_crtc_to_framebuffer, ++ &box)) { ++ DBG(("%s: unable to transform bounds\n", __FUNCTION__)); ++ return false; ++ } ++ ++ size = __cursor_size(box.x2 - box.x1, box.y2 - box.y1); ++ if (size > sna->cursor.max_size) { ++ DBG(("%s: transformed cursor size=%d too large, max=%d\n", ++ __FUNCTION__, size, sna->cursor.max_size)); ++ return false; ++ } ++ } ++ ++ return true; ++} ++ + static Bool + sna_use_hw_cursor(ScreenPtr screen, CursorPtr cursor) + { +@@ -5014,6 +6553,9 @@ sna_use_hw_cursor(ScreenPtr screen, CursorPtr cursor) + DBG(("%s (%dx%d)?\n", __FUNCTION__, + cursor->bits->width, cursor->bits->height)); + ++ if (sna->cursor.disable) ++ return FALSE; ++ + /* cursors are invariant */ + if (cursor == sna->cursor.ref) + return TRUE; +@@ -5023,12 +6565,24 @@ sna_use_hw_cursor(ScreenPtr screen, CursorPtr cursor) + sna->cursor.ref = NULL; + } + +- sna->cursor.size = __cursor_size(cursor); +- if (sna->cursor.size > sna->cursor.max_size) ++ sna->cursor.size = ++ __cursor_size(cursor->bits->width, cursor->bits->height); ++ if (sna->cursor.size > sna->cursor.max_size) { ++ DBG(("%s: cursor size=%d too large, max %d: using sw cursor\n", ++ __FUNCTION__, sna->cursor.size, sna->cursor.max_size)); + return FALSE; ++ } ++ ++ if (sna->mode.rr_active && !transformable_cursor(sna, cursor)) { ++ DBG(("%s: RandR active [%d] and non-transformable cursor: using sw cursor\n", ++ __FUNCTION__, sna->mode.rr_active)); ++ return FALSE; ++ } + +- if (!sna_cursor_preallocate(sna)) ++ if (!sna_cursor_preallocate(sna)) { ++ DBG(("%s: cursor preallocation failed: using sw cursor\n", __FUNCTION__)); + return FALSE; ++ } + + sna->cursor.ref = cursor; + cursor->refcnt++; +@@ -5056,8 +6610,12 @@ sna_cursor_pre_init(struct sna *sna) + return; + + #define LOCAL_IOCTL_GET_CAP DRM_IOWR(0x0c, struct local_get_cap) +-#define DRM_CAP_CURSOR_WIDTH 8 +-#define DRM_CAP_CURSOR_HEIGHT 9 ++#ifndef DRM_CAP_CURSOR_WIDTH ++#define DRM_CAP_CURSOR_WIDTH 0x8 ++#endif ++#ifndef DRM_CAP_CURSOR_HEIGHT ++#define DRM_CAP_CURSOR_HEIGHT 0x9 ++#endif + + #define I915_PARAM_HAS_COHERENT_PHYS_GTT 29 + +@@ -5087,11 +6645,9 @@ sna_cursor_pre_init(struct sna *sna) + DBG(("%s: cursor updates use_gtt?=%d\n", + __FUNCTION__, sna->cursor.use_gtt)); + +- if (!sna->cursor.use_gtt) { +- sna->cursor.scratch = malloc(sna->cursor.max_size * sna->cursor.max_size * 4); +- if (!sna->cursor.scratch) +- sna->cursor.max_size = 0; +- } ++ sna->cursor.scratch = malloc(sna->cursor.max_size * sna->cursor.max_size * 4); ++ if (!sna->cursor.scratch && !sna->cursor.use_gtt) ++ sna->cursor.max_size = 0; + + sna->cursor.num_stash = -sna->mode.num_real_crtc; + +@@ -5193,7 +6749,7 @@ sna_crtc_flip(struct sna *sna, struct sna_crtc *crtc, struct kgem_bo *bo, int x, + int output_count = 0; + int i; + +- DBG(("%s CRTC:%d [pipe=%d], handle=%d\n", __FUNCTION__, crtc->id, crtc->pipe, bo->handle)); ++ DBG(("%s CRTC:%d [pipe=%d], handle=%d\n", __FUNCTION__, __sna_crtc_id(crtc), __sna_crtc_pipe(crtc), bo->handle)); + + assert(sna->mode.num_real_output < ARRAY_SIZE(output_ids)); + assert(crtc->bo); +@@ -5207,11 +6763,11 @@ sna_crtc_flip(struct sna *sna, struct sna_crtc *crtc, struct kgem_bo *bo, int x, + + DBG(("%s: attaching output '%s' %d [%d] to crtc:%d (pipe %d) (possible crtc:%x, possible clones:%x)\n", + __FUNCTION__, output->name, i, to_connector_id(output), +- crtc->id, crtc->pipe, ++ __sna_crtc_id(crtc), __sna_crtc_pipe(crtc), + (uint32_t)output->possible_crtcs, + (uint32_t)output->possible_clones)); + +- assert(output->possible_crtcs & (1 << crtc->pipe) || ++ assert(output->possible_crtcs & (1 << __sna_crtc_pipe(crtc)) || + is_zaphod(sna->scrn)); + + output_ids[output_count] = to_connector_id(output); +@@ -5221,7 +6777,7 @@ sna_crtc_flip(struct sna *sna, struct sna_crtc *crtc, struct kgem_bo *bo, int x, + assert(output_count); + + VG_CLEAR(arg); +- arg.crtc_id = crtc->id; ++ arg.crtc_id = __sna_crtc_id(crtc); + arg.fb_id = fb_id(bo); + assert(arg.fb_id); + arg.x = x; +@@ -5231,20 +6787,74 @@ sna_crtc_flip(struct sna *sna, struct sna_crtc *crtc, struct kgem_bo *bo, int x, + arg.mode = crtc->kmode; + arg.mode_valid = 1; + +- DBG(("%s: applying crtc [%d, pipe=%d] mode=%dx%d+%d+%d@%d, fb=%d across %d outputs [%d...]\n", +- __FUNCTION__, crtc->id, crtc->pipe, +- arg.mode.hdisplay, +- arg.mode.vdisplay, +- arg.x, arg.y, +- arg.mode.clock, +- arg.fb_id, +- output_count, output_count ? output_ids[0] : 0)); ++ DBG(("%s: applying crtc [%d, pipe=%d] mode=%dx%d+%d+%d@%d, fb=%d across %d outputs [%d...]\n", ++ __FUNCTION__, __sna_crtc_id(crtc), __sna_crtc_pipe(crtc), ++ arg.mode.hdisplay, ++ arg.mode.vdisplay, ++ arg.x, arg.y, ++ arg.mode.clock, ++ arg.fb_id, ++ output_count, output_count ? output_ids[0] : 0)); ++ ++ if (drmIoctl(sna->kgem.fd, DRM_IOCTL_MODE_SETCRTC, &arg)) ++ return false; ++ ++ crtc->offset = y << 16 | x; ++ __kgem_bo_clear_dirty(bo); ++ return true; ++} ++ ++static void sna_mode_restore(struct sna *sna) ++{ ++ xf86CrtcConfigPtr config = XF86_CRTC_CONFIG_PTR(sna->scrn); ++ int error = 0; ++ int i; ++ ++ assert(!sna->mode.hidden); ++ ++ for (i = 0; i < sna->mode.num_real_crtc; i++) { ++ xf86CrtcPtr crtc = config->crtc[i]; ++ ++ assert(to_sna_crtc(crtc) != NULL); ++ if (to_sna_crtc(crtc)->bo == NULL) ++ continue; ++ ++ assert(crtc->enabled); ++ if (!__sna_crtc_set_mode(crtc)) { ++ sna_crtc_disable(crtc, false); ++ error++; ++ } ++ } ++ sna_mode_wakeup(sna); ++ while (sna->mode.flip_active && sna_mode_wakeup(sna)) ++ ; ++ update_flush_interval(sna); ++ sna_cursors_reload(sna); ++ sna->mode.dirty = false; ++ ++ if (error) ++ xf86DrvMsg(sna->scrn->scrnIndex, X_ERROR, ++ "Failed to restore display configuration\n"); ++} ++ ++bool sna_needs_page_flip(struct sna *sna, struct kgem_bo *bo) ++{ ++ xf86CrtcConfigPtr config = XF86_CRTC_CONFIG_PTR(sna->scrn); ++ int i; ++ ++ for (i = 0; i < sna->mode.num_real_crtc; i++) { ++ struct sna_crtc *crtc = config->crtc[i]->driver_private; ++ ++ if (crtc->bo == NULL) ++ continue; + +- if (drmIoctl(sna->kgem.fd, DRM_IOCTL_MODE_SETCRTC, &arg)) +- return false; ++ if (crtc->bo == bo) ++ continue; + +- crtc->offset = y << 16 | x; +- return true; ++ return true; ++ } ++ ++ return false; + } + + int +@@ -5256,6 +6866,7 @@ sna_page_flip(struct sna *sna, + xf86CrtcConfigPtr config = XF86_CRTC_CONFIG_PTR(sna->scrn); + const int width = sna->scrn->virtualX; + const int height = sna->scrn->virtualY; ++ int sigio; + int count = 0; + int i; + +@@ -5263,23 +6874,26 @@ sna_page_flip(struct sna *sna, + assert(bo->refcnt); + + assert((sna->flags & SNA_IS_HOSTED) == 0); +- assert((sna->flags & SNA_TEAR_FREE) == 0); + assert(sna->mode.flip_active == 0); + assert(sna->mode.front_active); ++ assert(!sna->mode.hidden); + assert(sna->scrn->vtSema); + + if ((sna->flags & (data ? SNA_HAS_FLIP : SNA_HAS_ASYNC_FLIP)) == 0) + return 0; + + kgem_bo_submit(&sna->kgem, bo); ++ __kgem_bo_clear_dirty(bo); + ++ sigio = sigio_block(); + for (i = 0; i < sna->mode.num_real_crtc; i++) { + struct sna_crtc *crtc = config->crtc[i]->driver_private; + struct drm_mode_crtc_page_flip arg; + uint32_t crtc_offset; ++ int fixup; + + DBG(("%s: crtc %d id=%d, pipe=%d active? %d\n", +- __FUNCTION__, i, crtc->id, crtc->pipe, crtc->bo != NULL)); ++ __FUNCTION__, i, __sna_crtc_id(crtc), __sna_crtc_pipe(crtc), crtc->bo != NULL)); + if (crtc->bo == NULL) + continue; + assert(!crtc->transform); +@@ -5288,13 +6902,18 @@ sna_page_flip(struct sna *sna, + assert(crtc->bo->refcnt >= crtc->bo->active_scanout); + assert(crtc->flip_bo == NULL); + +- arg.crtc_id = crtc->id; ++ assert_crtc_fb(sna, crtc); ++ if (data == NULL && crtc->bo == bo) ++ goto next_crtc; ++ ++ arg.crtc_id = __sna_crtc_id(crtc); + arg.fb_id = get_fb(sna, bo, width, height); + if (arg.fb_id == 0) { + assert(count == 0); +- return 0; ++ break; + } + ++ fixup = 0; + crtc_offset = crtc->base->y << 16 | crtc->base->x; + + if (bo->pitch != crtc->bo->pitch || crtc_offset != crtc->offset) { +@@ -5303,7 +6922,12 @@ sna_page_flip(struct sna *sna, + bo->pitch, crtc->bo->pitch, + crtc_offset, crtc->offset)); + fixup_flip: ++ fixup = 1; + if (crtc->bo != bo && sna_crtc_flip(sna, crtc, bo, crtc->base->x, crtc->base->y)) { ++update_scanout: ++ DBG(("%s: removing handle=%d [active_scanout=%d] from scanout, installing handle=%d [active_scanout=%d]\n", ++ __FUNCTION__, crtc->bo->handle, crtc->bo->active_scanout, ++ bo->handle, bo->active_scanout)); + assert(crtc->bo->active_scanout); + assert(crtc->bo->refcnt >= crtc->bo->active_scanout); + crtc->bo->active_scanout--; +@@ -5321,15 +6945,8 @@ fixup_flip: + goto next_crtc; + + /* queue a flip in order to send the event */ +- } else { +- if (count && !xf86SetDesiredModes(sna->scrn)) { +- xf86DrvMsg(sna->scrn->scrnIndex, X_ERROR, +- "failed to restore display configuration\n"); +- for (; i < sna->mode.num_real_crtc; i++) +- sna_crtc_disable(config->crtc[i]); +- } +- return 0; +- } ++ } else ++ goto error; + } + + /* Only the reference crtc will finally deliver its page flip +@@ -5346,7 +6963,7 @@ fixup_flip: + + retry_flip: + DBG(("%s: crtc %d id=%d, pipe=%d --> fb %d\n", +- __FUNCTION__, i, crtc->id, crtc->pipe, arg.fb_id)); ++ __FUNCTION__, i, __sna_crtc_id(crtc), __sna_crtc_pipe(crtc), arg.fb_id)); + if (drmIoctl(sna->kgem.fd, DRM_IOCTL_MODE_PAGE_FLIP, &arg)) { + ERR(("%s: pageflip failed with err=%d\n", __FUNCTION__, errno)); + +@@ -5354,7 +6971,7 @@ retry_flip: + struct drm_mode_crtc mode; + + memset(&mode, 0, sizeof(mode)); +- mode.crtc_id = crtc->id; ++ mode.crtc_id = __sna_crtc_id(crtc); + drmIoctl(sna->kgem.fd, DRM_IOCTL_MODE_GETCRTC, &mode); + + DBG(("%s: crtc=%d, valid?=%d, fb attached?=%d, expected=%d\n", +@@ -5366,7 +6983,7 @@ retry_flip: + goto fixup_flip; + + if (count == 0) +- return 0; ++ break; + + DBG(("%s: throttling on busy flip / waiting for kernel to catch up\n", __FUNCTION__)); + drmIoctl(sna->kgem.fd, DRM_IOCTL_I915_GEM_THROTTLE, 0); +@@ -5375,15 +6992,25 @@ retry_flip: + goto retry_flip; + } + ++ if (!fixup) ++ goto fixup_flip; ++ ++error: + xf86DrvMsg(sna->scrn->scrnIndex, X_ERROR, +- "page flipping failed, on CRTC:%d (pipe=%d), disabling %s page flips\n", +- crtc->id, crtc->pipe, data ? "synchronous": "asynchronous"); ++ "page flipping failed, on CRTC:%d (pipe=%d), disabling %s page flips\n", ++ __sna_crtc_id(crtc), __sna_crtc_pipe(crtc), data ? "synchronous": "asynchronous"); ++ ++ if (count || crtc->bo == bo) ++ sna_mode_restore(sna); ++ + sna->flags &= ~(data ? SNA_HAS_FLIP : SNA_HAS_ASYNC_FLIP); +- goto fixup_flip; ++ count = 0; ++ break; + } + + if (data) { + assert(crtc->flip_bo == NULL); ++ assert(handler); + crtc->flip_handler = handler; + crtc->flip_data = data; + crtc->flip_bo = kgem_bo_reference(bo); +@@ -5391,11 +7018,15 @@ retry_flip: + crtc->flip_serial = crtc->mode_serial; + crtc->flip_pending = true; + sna->mode.flip_active++; +- } + ++ DBG(("%s: recording flip on CRTC:%d handle=%d, active_scanout=%d, serial=%d\n", ++ __FUNCTION__, __sna_crtc_id(crtc), crtc->flip_bo->handle, crtc->flip_bo->active_scanout, crtc->flip_serial)); ++ } else ++ goto update_scanout; + next_crtc: + count++; + } ++ sigio_unblock(sigio); + + DBG(("%s: page flipped %d crtcs\n", __FUNCTION__, count)); + return count; +@@ -5471,7 +7102,7 @@ static void crtc_init_gamma(xf86CrtcPtr crtc) + + assert(sna_crtc); + +- lut.crtc_id = sna_crtc->id; ++ lut.crtc_id = __sna_crtc_id(sna_crtc); + lut.gamma_size = 256; + lut.red = (uintptr_t)(gamma); + lut.green = (uintptr_t)(gamma + 256); +@@ -5485,7 +7116,7 @@ static void crtc_init_gamma(xf86CrtcPtr crtc) + } + + DBG(("%s: CRTC:%d, pipe=%d: gamma set?=%d\n", +- __FUNCTION__, sna_crtc->id, sna_crtc->pipe, ++ __FUNCTION__, __sna_crtc_id(sna_crtc), __sna_crtc_pipe(sna_crtc), + gamma_set)); + if (!gamma_set) { + int i; +@@ -5502,6 +7133,7 @@ static void crtc_init_gamma(xf86CrtcPtr crtc) + crtc->gamma_red = gamma; + crtc->gamma_green = gamma + 256; + crtc->gamma_blue = gamma + 2*256; ++ crtc->gamma_size = 256; + } + } + } +@@ -5528,6 +7160,7 @@ static bool sna_probe_initial_configuration(struct sna *sna) + { + ScrnInfoPtr scrn = sna->scrn; + xf86CrtcConfigPtr config = XF86_CRTC_CONFIG_PTR(scrn); ++ int crtc_active, crtc_enabled; + int width, height; + int i, j; + +@@ -5565,6 +7198,7 @@ static bool sna_probe_initial_configuration(struct sna *sna) + } + + /* Copy the existing modes on each CRTCs */ ++ crtc_active = crtc_enabled = 0; + for (i = 0; i < sna->mode.num_real_crtc; i++) { + xf86CrtcPtr crtc = config->crtc[i]; + struct sna_crtc *sna_crtc = to_sna_crtc(crtc); +@@ -5577,12 +7211,12 @@ static bool sna_probe_initial_configuration(struct sna *sna) + + /* Retrieve the current mode */ + VG_CLEAR(mode); +- mode.crtc_id = sna_crtc->id; ++ mode.crtc_id = __sna_crtc_id(sna_crtc); + if (drmIoctl(sna->kgem.fd, DRM_IOCTL_MODE_GETCRTC, &mode)) + continue; + + DBG(("%s: CRTC:%d, pipe=%d: has mode?=%d\n", __FUNCTION__, +- sna_crtc->id, sna_crtc->pipe, ++ __sna_crtc_id(sna_crtc), __sna_crtc_pipe(sna_crtc), + mode.mode_valid && mode.mode.clock)); + + if (!mode.mode_valid || mode.mode.clock == 0) +@@ -5593,6 +7227,7 @@ static bool sna_probe_initial_configuration(struct sna *sna) + crtc->desiredX = mode.x; + crtc->desiredY = mode.y; + crtc->desiredTransformPresent = FALSE; ++ crtc_active++; + } + + /* Reconstruct outputs pointing to active CRTC */ +@@ -5604,6 +7239,7 @@ static bool sna_probe_initial_configuration(struct sna *sna) + + crtc_id = (uintptr_t)output->crtc; + output->crtc = NULL; ++ output->status = XF86OutputStatusUnknown; + if (sna->flags & SNA_IS_SLAVED) + continue; + +@@ -5623,7 +7259,7 @@ static bool sna_probe_initial_configuration(struct sna *sna) + xf86CrtcPtr crtc = config->crtc[j]; + + assert(to_sna_crtc(crtc)); +- if (to_sna_crtc(crtc)->id != crtc_id) ++ if (sna_crtc_id(crtc) != crtc_id) + continue; + + if (crtc->desiredMode.status == MODE_OK) { +@@ -5641,18 +7277,30 @@ static bool sna_probe_initial_configuration(struct sna *sna) + "Output %s using initial mode %s on pipe %d\n", + output->name, + crtc->desiredMode.name, +- to_sna_crtc(crtc)->pipe); ++ sna_crtc_pipe(crtc)); + + output->crtc = crtc; ++ output->status = XF86OutputStatusConnected; + crtc->enabled = TRUE; ++ crtc_enabled++; ++ ++ output_set_gamma(output, crtc); ++ ++ if (output->conf_monitor) { ++ output->mm_width = output->conf_monitor->mon_width; ++ output->mm_height = output->conf_monitor->mon_height; ++ } ++ ++#if 0 ++ sna_output_attach_edid(output); ++ sna_output_attach_tile(output); ++#endif + + if (output->mm_width == 0 || output->mm_height == 0) { + output->mm_height = (crtc->desiredMode.VDisplay * 254) / (10*DEFAULT_DPI); + output->mm_width = (crtc->desiredMode.HDisplay * 254) / (10*DEFAULT_DPI); + } + +- output_set_gamma(output, crtc); +- + M = calloc(1, sizeof(DisplayModeRec)); + if (M) { + *M = crtc->desiredMode; +@@ -5673,6 +7321,12 @@ static bool sna_probe_initial_configuration(struct sna *sna) + } + } + ++ if (crtc_active != crtc_enabled) { ++ DBG(("%s: only enabled %d out of %d active CRTC, forcing a reconfigure\n", ++ __FUNCTION__, crtc_enabled, crtc_active)); ++ return false; ++ } ++ + width = height = 0; + for (i = 0; i < sna->mode.num_real_crtc; i++) { + xf86CrtcPtr crtc = config->crtc[i]; +@@ -5707,8 +7361,8 @@ static bool sna_probe_initial_configuration(struct sna *sna) + if (sna_output->num_modes == 0) + continue; + +- width = sna_output->modes[0].hdisplay; +- height= sna_output->modes[0].vdisplay; ++ width = sna_output->modes[0].hdisplay; ++ height = sna_output->modes[0].vdisplay; + + DBG(("%s: panel '%s' is %dx%d\n", + __FUNCTION__, output->name, width, height)); +@@ -5788,7 +7442,7 @@ probe_capabilities(struct sna *sna) + sna->flags &= ~(SNA_HAS_FLIP | SNA_HAS_ASYNC_FLIP); + if (has_flip(sna)) + sna->flags |= SNA_HAS_FLIP; +- if (has_flip__async(sna)) ++ if (has_flip__async(sna) && (sna->flags & SNA_TEAR_FREE) == 0) + sna->flags |= SNA_HAS_ASYNC_FLIP; + DBG(("%s: page flips? %s, async? %s\n", __FUNCTION__, + sna->flags & SNA_HAS_FLIP ? "enabled" : "disabled", +@@ -5813,12 +7467,25 @@ sna_crtc_config_notify(ScreenPtr screen) + return; + } + ++ /* Flush any events completed by the modeset */ ++ sna_mode_wakeup(sna); ++ + update_flush_interval(sna); ++ sna->cursor.disable = false; /* Reset HW cursor until the next fail */ + sna_cursors_reload(sna); + + probe_capabilities(sna); + sna_present_update(sna); + ++ /* Allow TearFree to come back on when everything is off */ ++ if (!sna->mode.front_active && sna->flags & SNA_WANT_TEAR_FREE) { ++ if ((sna->flags & SNA_TEAR_FREE) == 0) ++ DBG(("%s: enable TearFree next modeset\n", ++ __FUNCTION__)); ++ ++ sna->flags |= SNA_TEAR_FREE; ++ } ++ + sna->mode.dirty = false; + } + +@@ -5840,6 +7507,7 @@ bool sna_mode_pre_init(ScrnInfoPtr scrn, struct sna *sna) + } + + probe_capabilities(sna); ++ sna->mode.hidden = 1; + + if (!xf86GetOptValInteger(sna->Options, OPTION_VIRTUAL, &num_fake)) + num_fake = 1; +@@ -5855,6 +7523,9 @@ bool sna_mode_pre_init(ScrnInfoPtr scrn, struct sna *sna) + if (res) { + xf86CrtcConfigPtr xf86_config; + ++ DBG(("%s: found %d CRTC, %d encoders, %d connectors\n", ++ __FUNCTION__, res->count_crtcs, res->count_encoders, res->count_connectors)); ++ + assert(res->count_crtcs); + assert(res->count_connectors); + +@@ -5862,6 +7533,7 @@ bool sna_mode_pre_init(ScrnInfoPtr scrn, struct sna *sna) + + xf86_config = XF86_CRTC_CONFIG_PTR(scrn); + xf86_config->xf86_crtc_notify = sna_crtc_config_notify; ++ xf86_config->compat_output = 0; + + for (i = 0; i < res->count_crtcs; i++) + if (!sna_crtc_add(scrn, res->crtcs[i])) +@@ -5900,6 +7572,11 @@ bool sna_mode_pre_init(ScrnInfoPtr scrn, struct sna *sna) + if (!sna_mode_fake_init(sna, num_fake)) + return false; + ++ sna->mode.shadow_size = 256; ++ sna->mode.shadow_events = malloc(sna->mode.shadow_size * sizeof(struct drm_event_vblank)); ++ if (!sna->mode.shadow_events) ++ return false; ++ + if (!sna_probe_initial_configuration(sna)) { + xf86CrtcConfigPtr config = XF86_CRTC_CONFIG_PTR(scrn); + +@@ -5912,6 +7589,7 @@ bool sna_mode_pre_init(ScrnInfoPtr scrn, struct sna *sna) + } + } + sort_config_outputs(sna); ++ TimerSet(NULL, 0, COLDPLUG_DELAY_MS, sna_mode_coldplug, sna); + + sna_setup_provider(scrn); + return scrn->modes != NULL; +@@ -5921,18 +7599,58 @@ bool + sna_mode_wants_tear_free(struct sna *sna) + { + xf86CrtcConfigPtr config = XF86_CRTC_CONFIG_PTR(sna->scrn); ++ bool found = false; ++ FILE *file; + int i; + ++ file = fopen("/sys/module/i915/parameters/enable_fbc", "r"); ++ if (file) { ++ int fbc_enabled = 0; ++ int value; ++ ++ if (fscanf(file, "%d", &value) == 1) ++ fbc_enabled = value > 0; ++ fclose(file); ++ ++ DBG(("%s: module parameter 'enable_fbc' enabled? %d\n", ++ __FUNCTION__, fbc_enabled)); ++ ++ if (fbc_enabled) ++ return true; ++ } ++ + for (i = 0; i < sna->mode.num_real_output; i++) { + struct sna_output *output = to_sna_output(config->output[i]); + int id = find_property(sna, output, "Panel Self-Refresh"); +- if (id !=-1 && output->prop_values[id] != -1) { ++ if (id == -1) ++ continue; ++ ++ found = true; ++ if (output->prop_values[id] != -1) { + DBG(("%s: Panel Self-Refresh detected on %s\n", + __FUNCTION__, config->output[i]->name)); + return true; + } + } + ++ if (!found) { ++ file = fopen("/sys/module/i915/parameters/enable_psr", "r"); ++ if (file) { ++ int psr_enabled = 0; ++ int value; ++ ++ if (fscanf(file, "%d", &value) == 1) ++ psr_enabled = value > 0; ++ fclose(file); ++ ++ DBG(("%s: module parameter 'enable_psr' enabled? %d\n", ++ __FUNCTION__, psr_enabled)); ++ ++ if (psr_enabled) ++ return true; ++ } ++ } ++ + return false; + } + +@@ -5955,7 +7673,7 @@ sna_mode_set_primary(struct sna *sna) + + DBG(("%s: setting PrimaryOutput %s\n", __FUNCTION__, output->name)); + rr->primaryOutput = output->randr_output; +- RROutputChanged(rr->primaryOutput, 0); ++ RROutputChanged(rr->primaryOutput, FALSE); + rr->layoutChanged = TRUE; + break; + } +@@ -5974,12 +7692,9 @@ sna_mode_disable(struct sna *sna) + if (!sna->scrn->vtSema) + return false; + +- /* XXX we will cause previously hidden cursors to be reshown, but +- * this should be a rare fixup case for severe fragmentation. +- */ +- sna_hide_cursors(sna->scrn); ++ sna_disable_cursors(sna->scrn); + for (i = 0; i < sna->mode.num_real_crtc; i++) +- sna_crtc_disable(config->crtc[i]); ++ sna_crtc_disable(config->crtc[i], false); + assert(sna->mode.front_active == 0); + + sna_mode_wakeup(sna); +@@ -6001,6 +7716,11 @@ sna_mode_enable(struct sna *sna) + if (!sna->scrn->vtSema) + return; + ++ if (sna->mode.hidden) { ++ DBG(("%s: hidden outputs\n", __FUNCTION__)); ++ return; ++ } ++ + for (i = 0; i < sna->mode.num_real_crtc; i++) { + xf86CrtcPtr crtc = config->crtc[i]; + +@@ -6016,13 +7736,30 @@ sna_mode_enable(struct sna *sna) + } + + update_flush_interval(sna); +- sna_show_cursors(sna->scrn); ++ sna_cursors_reload(sna); + sna->mode.dirty = false; + } + ++static void sna_randr_close(struct sna *sna) ++{ ++ xf86CrtcConfigPtr config = XF86_CRTC_CONFIG_PTR(sna->scrn); ++ int n; ++ ++ /* The RR structs are freed early during CloseScreen as they ++ * are tracked as Resources. However, we may be tempted to ++ * access them during shutdown so decouple them now. ++ */ ++ for (n = 0; n < config->num_output; n++) ++ config->output[n]->randr_output = NULL; ++ ++ for (n = 0; n < config->num_crtc; n++) ++ config->crtc[n]->randr_crtc = NULL; ++} ++ + void + sna_mode_close(struct sna *sna) + { ++ sna_randr_close(sna); + sna_mode_wakeup(sna); + + if (sna->flags & SNA_IS_HOSTED) +@@ -6077,15 +7814,22 @@ xf86CrtcPtr + sna_covering_crtc(struct sna *sna, const BoxRec *box, xf86CrtcPtr desired) + { + xf86CrtcConfigPtr config = XF86_CRTC_CONFIG_PTR(sna->scrn); +- xf86CrtcPtr best_crtc; +- int best_coverage, c; ++ xf86CrtcPtr best_crtc = NULL; ++ int best_coverage = -1, c; + + if (sna->flags & SNA_IS_HOSTED) + return NULL; + + /* If we do not own the VT, we do not own the CRTC either */ +- if (!sna->scrn->vtSema) ++ if (!sna->scrn->vtSema) { ++ DBG(("%s: none, VT switched\n", __FUNCTION__)); ++ return NULL; ++ } ++ ++ if (sna->mode.hidden) { ++ DBG(("%s: none, hidden outputs\n", __FUNCTION__)); + return NULL; ++ } + + DBG(("%s for box=(%d, %d), (%d, %d)\n", + __FUNCTION__, box->x1, box->y1, box->x2, box->y2)); +@@ -6107,10 +7851,10 @@ sna_covering_crtc(struct sna *sna, const BoxRec *box, xf86CrtcPtr desired) + cover_box.x2, cover_box.y2)); + return desired; + } ++ best_crtc = desired; ++ best_coverage = 0; + } + +- best_crtc = NULL; +- best_coverage = 0; + for (c = 0; c < sna->mode.num_real_crtc; c++) { + xf86CrtcPtr crtc = config->crtc[c]; + BoxRec cover_box; +@@ -6156,6 +7900,38 @@ sna_covering_crtc(struct sna *sna, const BoxRec *box, xf86CrtcPtr desired) + return best_crtc; + } + ++static xf86CrtcPtr first_active_crtc(struct sna *sna) ++{ ++ xf86CrtcConfigPtr config = XF86_CRTC_CONFIG_PTR(sna->scrn); ++ int n; ++ ++ for (n = 0; n < sna->mode.num_real_crtc; n++) { ++ xf86CrtcPtr crtc = config->crtc[n]; ++ if (to_sna_crtc(crtc)->bo) ++ return crtc; ++ } ++ ++ /* No active, use the first as a placeholder */ ++ if (sna->mode.num_real_crtc) ++ return config->crtc[0]; ++ ++ return NULL; ++} ++ ++xf86CrtcPtr sna_primary_crtc(struct sna *sna) ++{ ++ rrScrPrivPtr rr = rrGetScrPriv(xf86ScrnToScreen(sna->scrn)); ++ if (rr && rr->primaryOutput) { ++ xf86OutputPtr output = rr->primaryOutput->devPrivate; ++ if (output->crtc && ++ output->scrn == sna->scrn && ++ to_sna_crtc(output->crtc)) ++ return output->crtc; ++ } ++ ++ return first_active_crtc(sna); ++} ++ + #define MI_LOAD_REGISTER_IMM (0x22<<23) + + static bool sna_emit_wait_for_scanline_hsw(struct sna *sna, +@@ -6433,7 +8209,7 @@ sna_wait_for_scanline(struct sna *sna, + y2 /= 2; + } + +- pipe = sna_crtc_to_pipe(crtc); ++ pipe = sna_crtc_pipe(crtc); + DBG(("%s: pipe=%d, y1=%d, y2=%d, full_height?=%d\n", + __FUNCTION__, pipe, y1, y2, full_height)); + +@@ -6457,19 +8233,101 @@ sna_wait_for_scanline(struct sna *sna, + return ret; + } + ++static bool sna_mode_shutdown_crtc(xf86CrtcPtr crtc) ++{ ++ struct sna *sna = to_sna(crtc->scrn); ++ xf86CrtcConfigPtr config = XF86_CRTC_CONFIG_PTR(crtc->scrn); ++ bool disabled = false; ++ int o; ++ ++ xf86DrvMsg(crtc->scrn->scrnIndex, X_ERROR, ++ "%s: invalid state found on pipe %d, disabling CRTC:%d\n", ++ __FUNCTION__, ++ __sna_crtc_pipe(to_sna_crtc(crtc)), ++ __sna_crtc_id(to_sna_crtc(crtc))); ++ sna_crtc_disable(crtc, true); ++#if XF86_CRTC_VERSION >= 3 ++ crtc->active = FALSE; ++#endif ++ if (crtc->enabled) { ++ crtc->enabled = FALSE; ++ disabled = true; ++ } ++ ++ for (o = 0; o < sna->mode.num_real_output; o++) { ++ xf86OutputPtr output = config->output[o]; ++ ++ if (output->crtc != crtc) ++ continue; ++ ++ output->funcs->dpms(output, DPMSModeOff); ++ output->crtc = NULL; ++ } ++ ++ return disabled; ++} ++ ++static bool ++sna_mode_disable_secondary_planes(struct sna *sna) ++{ ++ xf86CrtcConfigPtr config = XF86_CRTC_CONFIG_PTR(sna->scrn); ++ bool disabled = false; ++ int c; ++ ++ /* Disable all secondary planes on our CRTCs, just in case ++ * other userspace left garbage in them. ++ */ ++ for (c = 0; c < sna->mode.num_real_crtc; c++) { ++ xf86CrtcPtr crtc = config->crtc[c]; ++ struct sna_crtc *sna_crtc = to_sna_crtc(crtc); ++ struct plane *plane; ++ ++ list_for_each_entry(plane, &sna_crtc->sprites, link) { ++ struct local_mode_get_plane p; ++ struct local_mode_set_plane s; ++ ++ VG_CLEAR(p); ++ p.plane_id = plane->id; ++ p.count_format_types = 0; ++ if (drmIoctl(sna->kgem.fd, ++ LOCAL_IOCTL_MODE_GETPLANE, ++ &p)) ++ continue; ++ ++ if (p.fb_id == 0 || p.crtc_id == 0) ++ continue; ++ ++ memset(&s, 0, sizeof(s)); ++ s.plane_id = p.plane_id; ++ s.crtc_id = p.crtc_id; ++ if (drmIoctl(sna->kgem.fd, ++ LOCAL_IOCTL_MODE_SETPLANE, ++ &s)) ++ disabled |= sna_mode_shutdown_crtc(crtc); ++ } ++ } ++ ++ return disabled; ++} ++ + void sna_mode_check(struct sna *sna) + { + xf86CrtcConfigPtr config = XF86_CRTC_CONFIG_PTR(sna->scrn); +- int i; ++ bool disabled; ++ int c, o; + + if (sna->flags & SNA_IS_HOSTED) + return; + +- DBG(("%s\n", __FUNCTION__)); ++ DBG(("%s: hidden?=%d\n", __FUNCTION__, sna->mode.hidden)); ++ if (sna->mode.hidden) ++ return; ++ ++ disabled = sna_mode_disable_secondary_planes(sna); + + /* Validate CRTC attachments and force consistency upon the kernel */ +- for (i = 0; i < sna->mode.num_real_crtc; i++) { +- xf86CrtcPtr crtc = config->crtc[i]; ++ for (c = 0; c < sna->mode.num_real_crtc; c++) { ++ xf86CrtcPtr crtc = config->crtc[c]; + struct sna_crtc *sna_crtc = to_sna_crtc(crtc); + struct drm_mode_crtc mode; + uint32_t expected[2]; +@@ -6483,7 +8341,7 @@ void sna_mode_check(struct sna *sna) + expected[1] = sna_crtc->flip_bo ? fb_id(sna_crtc->flip_bo) : -1; + + VG_CLEAR(mode); +- mode.crtc_id = sna_crtc->id; ++ mode.crtc_id = __sna_crtc_id(sna_crtc); + if (drmIoctl(sna->kgem.fd, DRM_IOCTL_MODE_GETCRTC, &mode)) + continue; + +@@ -6492,16 +8350,12 @@ void sna_mode_check(struct sna *sna) + mode.crtc_id, mode.mode_valid, + mode.fb_id, expected[0], expected[1])); + +- if (mode.fb_id != expected[0] && mode.fb_id != expected[1]) { +- xf86DrvMsg(crtc->scrn->scrnIndex, X_ERROR, +- "%s: invalid state found on pipe %d, disabling CRTC:%d\n", +- __FUNCTION__, sna_crtc->pipe, sna_crtc->id); +- sna_crtc_disable(crtc); +- } ++ if (mode.fb_id != expected[0] && mode.fb_id != expected[1]) ++ disabled |= sna_mode_shutdown_crtc(crtc); + } + +- for (i = 0; i < config->num_output; i++) { +- xf86OutputPtr output = config->output[i]; ++ for (o = 0; o < config->num_output; o++) { ++ xf86OutputPtr output = config->output[o]; + struct sna_output *sna_output; + + if (output->crtc) +@@ -6515,26 +8369,16 @@ void sna_mode_check(struct sna *sna) + } + + update_flush_interval(sna); ++ ++ if (disabled) ++ xf86RandR12TellChanged(xf86ScrnToScreen(sna->scrn)); + } + + static bool + sna_crtc_hide_planes(struct sna *sna, struct sna_crtc *crtc) + { +-#define LOCAL_IOCTL_MODE_SETPLANE DRM_IOWR(0xB7, struct local_mode_set_plane) +- struct local_mode_set_plane { +- uint32_t plane_id; +- uint32_t crtc_id; +- uint32_t fb_id; /* fb object contains surface format type */ +- uint32_t flags; +- +- /* Signed dest location allows it to be partially off screen */ +- int32_t crtc_x, crtc_y; +- uint32_t crtc_w, crtc_h; +- +- /* Source values are 16.16 fixed point */ +- uint32_t src_x, src_y; +- uint32_t src_h, src_w; +- } s; ++ struct local_mode_set_plane s; ++ struct plane *plane; + + if (crtc->primary.id == 0) + return false; +@@ -6544,8 +8388,10 @@ sna_crtc_hide_planes(struct sna *sna, struct sna_crtc *crtc) + if (drmIoctl(sna->kgem.fd, LOCAL_IOCTL_MODE_SETPLANE, &s)) + return false; + +- s.plane_id = crtc->sprite.id; +- (void)drmIoctl(sna->kgem.fd, LOCAL_IOCTL_MODE_SETPLANE, &s); ++ list_for_each_entry(plane, &crtc->sprites, link) { ++ s.plane_id = plane->id; ++ (void)drmIoctl(sna->kgem.fd, LOCAL_IOCTL_MODE_SETPLANE, &s); ++ } + + __sna_crtc_disable(sna, crtc); + return true; +@@ -6561,21 +8407,22 @@ void sna_mode_reset(struct sna *sna) + + DBG(("%s\n", __FUNCTION__)); + +- sna_hide_cursors(sna->scrn); ++ sna_disable_cursors(sna->scrn); + for (i = 0; i < sna->mode.num_real_crtc; i++) + if (!sna_crtc_hide_planes(sna, to_sna_crtc(config->crtc[i]))) +- sna_crtc_disable(config->crtc[i]); ++ sna_crtc_disable(config->crtc[i], true); + assert(sna->mode.front_active == 0); + + for (i = 0; i < sna->mode.num_real_crtc; i++) { + struct sna_crtc *sna_crtc = to_sna_crtc(config->crtc[i]); ++ struct plane *plane; + + assert(sna_crtc != NULL); +- sna_crtc->dpms_mode = -1; + + /* Force the rotation property to be reset on next use */ + rotation_reset(&sna_crtc->primary); +- rotation_reset(&sna_crtc->sprite); ++ list_for_each_entry(plane, &sna_crtc->sprites, link) ++ rotation_reset(plane); + } + + /* VT switching, likely to be fbcon so make the backlight usable */ +@@ -6641,9 +8488,10 @@ sna_crtc_redisplay__fallback(xf86CrtcPtr crtc, RegionPtr region, struct kgem_bo + { + int16_t sx, sy; + struct sna *sna = to_sna(crtc->scrn); +- ScreenPtr screen = sna->scrn->pScreen; ++ ScreenPtr screen = xf86ScrnToScreen(crtc->scrn); + DrawablePtr draw = crtc_source(crtc, &sx, &sy); + PictFormatPtr format; ++ PictTransform T; + PicturePtr src, dst; + PixmapPtr pixmap; + int depth, error; +@@ -6664,6 +8512,14 @@ sna_crtc_redisplay__fallback(xf86CrtcPtr crtc, RegionPtr region, struct kgem_bo + __FUNCTION__, format->format, depth, draw->bitsPerPixel, + bo->pitch, crtc->mode.HDisplay, crtc->mode.VDisplay)); + ++ if (sx | sy) ++ RegionTranslate(region, sx, sy); ++ error = !sna_drawable_move_region_to_cpu(draw, region, MOVE_READ); ++ if (sx | sy) ++ RegionTranslate(region, -sx, -sy); ++ if (error) ++ return; ++ + ptr = kgem_bo_map__gtt(&sna->kgem, bo); + if (ptr == NULL) + return; +@@ -6683,9 +8539,37 @@ sna_crtc_redisplay__fallback(xf86CrtcPtr crtc, RegionPtr region, struct kgem_bo + if (!src) + goto free_pixmap; + +- error = SetPictureTransform(src, &crtc->crtc_to_framebuffer); +- if (error) +- goto free_src; ++ pixman_transform_init_translate(&T, sx << 16, sy << 16); ++ pixman_transform_multiply(&T, &T, &crtc->crtc_to_framebuffer); ++ if (!sna_transform_is_integer_translation(&T, &sx, &sy)) { ++#define f2d(x) (((double)(x))/65536.) ++ DBG(("%s: transform=[[%f %f %f], [%f %f %f], [%f %f %f]] (raw [[%x %x %x], [%x %x %x], [%x %x %x]])\n", ++ __FUNCTION__, ++ f2d(T.matrix[0][0]), ++ f2d(T.matrix[0][1]), ++ f2d(T.matrix[0][2]), ++ f2d(T.matrix[1][0]), ++ f2d(T.matrix[1][1]), ++ f2d(T.matrix[1][2]), ++ f2d(T.matrix[2][0]), ++ f2d(T.matrix[2][1]), ++ f2d(T.matrix[2][2]), ++ T.matrix[0][0], ++ T.matrix[0][1], ++ T.matrix[0][2], ++ T.matrix[1][0], ++ T.matrix[1][1], ++ T.matrix[1][2], ++ T.matrix[2][0], ++ T.matrix[2][1], ++ T.matrix[2][2])); ++#undef f2d ++ ++ error = SetPictureTransform(src, &T); ++ if (error) ++ goto free_src; ++ sx = sy = 0; ++ } + + if (crtc->filter && crtc->transform_in_use) + SetPicturePictFilter(src, crtc->filter, +@@ -6733,10 +8617,11 @@ sna_crtc_redisplay__composite(xf86CrtcPtr crtc, RegionPtr region, struct kgem_bo + { + int16_t sx, sy; + struct sna *sna = to_sna(crtc->scrn); +- ScreenPtr screen = crtc->scrn->pScreen; ++ ScreenPtr screen = xf86ScrnToScreen(crtc->scrn); + DrawablePtr draw = crtc_source(crtc, &sx, &sy); + struct sna_composite_op tmp; + PictFormatPtr format; ++ PictTransform T; + PicturePtr src, dst; + PixmapPtr pixmap; + const BoxRec *b; +@@ -6777,9 +8662,14 @@ sna_crtc_redisplay__composite(xf86CrtcPtr crtc, RegionPtr region, struct kgem_bo + if (!src) + goto free_pixmap; + +- error = SetPictureTransform(src, &crtc->crtc_to_framebuffer); +- if (error) +- goto free_src; ++ pixman_transform_init_translate(&T, sx << 16, sy << 16); ++ pixman_transform_multiply(&T, &T, &crtc->crtc_to_framebuffer); ++ if (!sna_transform_is_integer_translation(&T, &sx, &sy)) { ++ error = SetPictureTransform(src, &T); ++ if (error) ++ goto free_src; ++ sx = sy = 0; ++ } + + if (crtc->filter && crtc->transform_in_use) + SetPicturePictFilter(src, crtc->filter, +@@ -6793,36 +8683,38 @@ sna_crtc_redisplay__composite(xf86CrtcPtr crtc, RegionPtr region, struct kgem_bo + ValidatePicture(src); + ValidatePicture(dst); + +- if (!sna->render.composite(sna, +- PictOpSrc, src, NULL, dst, +- sx, sy, +- 0, 0, +- 0, 0, +- crtc->mode.HDisplay, crtc->mode.VDisplay, +- COMPOSITE_PARTIAL, memset(&tmp, 0, sizeof(tmp)))) { +- DBG(("%s: unsupported operation!\n", __FUNCTION__)); +- sna_crtc_redisplay__fallback(crtc, region, bo); +- goto free_dst; +- } +- ++ /* Composite each box individually as if we are dealing with a rotation ++ * on a large display, we may have to perform intermediate copies. We ++ * can then minimise the overdraw by looking at individual boxes rather ++ * than the bbox. ++ */ + n = region_num_rects(region); + b = region_rects(region); + do { +- BoxRec box; +- +- box = *b++; ++ BoxRec box = *b; + transformed_box(&box, crtc); + + DBG(("%s: (%d, %d)x(%d, %d) -> (%d, %d), (%d, %d)\n", + __FUNCTION__, +- b[-1].x1, b[-1].y1, b[-1].x2-b[-1].x1, b[-1].y2-b[-1].y1, ++ b->x1, b->y1, b->x2-b->x1, b->y2-b->y1, + box.x1, box.y1, box.x2, box.y2)); + +- tmp.box(sna, &tmp, &box); +- } while (--n); +- tmp.done(sna, &tmp); ++ if (!sna->render.composite(sna, ++ PictOpSrc, src, NULL, dst, ++ sx + box.x1, sy + box.y1, ++ 0, 0, ++ box.x1, box.y1, ++ box.x2 - box.x1, box.y2 - box.y1, ++ 0, memset(&tmp, 0, sizeof(tmp)))) { ++ DBG(("%s: unsupported operation!\n", __FUNCTION__)); ++ sna_crtc_redisplay__fallback(crtc, region, bo); ++ break; ++ } else { ++ tmp.box(sna, &tmp, &box); ++ tmp.done(sna, &tmp); ++ } ++ } while (b++, --n); + +-free_dst: + FreePicture(dst, None); + free_src: + FreePicture(src, None); +@@ -6839,7 +8731,7 @@ sna_crtc_redisplay(xf86CrtcPtr crtc, RegionPtr region, struct kgem_bo *bo) + struct sna_pixmap *priv = sna_pixmap((PixmapPtr)draw); + + DBG(("%s: crtc %d [pipe=%d], damage (%d, %d), (%d, %d) x %d\n", +- __FUNCTION__, to_sna_crtc(crtc)->id, to_sna_crtc(crtc)->pipe, ++ __FUNCTION__, sna_crtc_id(crtc), sna_crtc_pipe(crtc), + region->extents.x1, region->extents.y1, + region->extents.x2, region->extents.y2, + region_num_rects(region))); +@@ -6898,7 +8790,10 @@ sna_crtc_redisplay(xf86CrtcPtr crtc, RegionPtr region, struct kgem_bo *bo) + static void shadow_flip_handler(struct drm_event_vblank *e, + void *data) + { +- sna_mode_redisplay(data); ++ struct sna *sna = data; ++ ++ if (!sna->mode.shadow_wait) ++ sna_mode_redisplay(sna); + } + + void sna_shadow_set_crtc(struct sna *sna, +@@ -6908,18 +8803,23 @@ void sna_shadow_set_crtc(struct sna *sna, + struct sna_crtc *sna_crtc = to_sna_crtc(crtc); + struct sna_pixmap *priv; + ++ assert(sna_crtc); + DBG(("%s: setting shadow override for CRTC:%d to handle=%d\n", +- __FUNCTION__, sna_crtc->id, bo->handle)); ++ __FUNCTION__, __sna_crtc_id(sna_crtc), bo->handle)); + + assert(sna->flags & SNA_TEAR_FREE); +- assert(sna_crtc); + assert(!sna_crtc->transform); + + if (sna_crtc->client_bo != bo) { +- if (sna_crtc->client_bo) ++ if (sna_crtc->client_bo) { ++ assert(sna_crtc->client_bo->refcnt >= sna_crtc->client_bo->active_scanout); ++ sna_crtc->client_bo->active_scanout--; + kgem_bo_destroy(&sna->kgem, sna_crtc->client_bo); ++ } + + sna_crtc->client_bo = kgem_bo_reference(bo); ++ sna_crtc->client_bo->active_scanout++; ++ assert(sna_crtc->client_bo->refcnt >= sna_crtc->client_bo->active_scanout); + sna_crtc_damage(crtc); + } + +@@ -6969,11 +8869,13 @@ void sna_shadow_unset_crtc(struct sna *sna, + struct sna_crtc *sna_crtc = to_sna_crtc(crtc); + + DBG(("%s: clearin shadow override for CRTC:%d\n", +- __FUNCTION__, sna_crtc->id)); ++ __FUNCTION__, __sna_crtc_id(sna_crtc))); + + if (sna_crtc->client_bo == NULL) + return; + ++ assert(sna_crtc->client_bo->refcnt >= sna_crtc->client_bo->active_scanout); ++ sna_crtc->client_bo->active_scanout--; + kgem_bo_destroy(&sna->kgem, sna_crtc->client_bo); + sna_crtc->client_bo = NULL; + list_del(&sna_crtc->shadow_link); +@@ -6982,15 +8884,57 @@ void sna_shadow_unset_crtc(struct sna *sna, + sna_crtc_damage(crtc); + } + ++static bool move_crtc_to_gpu(struct sna *sna) ++{ ++ xf86CrtcConfigPtr config = XF86_CRTC_CONFIG_PTR(sna->scrn); ++ int i; ++ ++ for (i = 0; i < sna->mode.num_real_crtc; i++) { ++ struct sna_crtc *crtc = to_sna_crtc(config->crtc[i]); ++ unsigned hint; ++ ++ assert(crtc); ++ ++ if (crtc->bo == NULL) ++ continue; ++ ++ if (crtc->slave_pixmap) ++ continue; ++ ++ if (crtc->client_bo) ++ continue; ++ ++ if (crtc->shadow_bo) ++ continue; ++ ++ hint = MOVE_READ | MOVE_ASYNC_HINT | __MOVE_SCANOUT; ++ if (sna->flags & SNA_TEAR_FREE) ++ hint |= __MOVE_FORCE; ++ ++ DBG(("%s: CRTC %d [pipe=%d] requires frontbuffer\n", ++ __FUNCTION__, __sna_crtc_id(crtc), __sna_crtc_pipe(crtc))); ++ return sna_pixmap_move_to_gpu(sna->front, hint); ++ } ++ ++ return true; ++} ++ + void sna_mode_redisplay(struct sna *sna) + { + xf86CrtcConfigPtr config = XF86_CRTC_CONFIG_PTR(sna->scrn); + RegionPtr region; + int i; + +- if (!sna->mode.shadow_damage) ++ if (sna->mode.hidden) { ++ DBG(("%s: hidden outputs, skipping\n", __FUNCTION__)); ++ return; ++ } ++ ++ if (!sna->mode.shadow_enabled) + return; + ++ assert(sna->mode.shadow_damage); ++ + DBG(("%s: posting shadow damage? %d (flips pending? %d, mode reconfiguration pending? %d)\n", + __FUNCTION__, + !RegionNil(DamageRegion(sna->mode.shadow_damage)), +@@ -7012,21 +8956,23 @@ void sna_mode_redisplay(struct sna *sna) + region->extents.x2, region->extents.y2)); + + if (sna->mode.flip_active) { +- DamagePtr damage; +- +- damage = sna->mode.shadow_damage; +- sna->mode.shadow_damage = NULL; ++ DBG(("%s: checking for %d outstanding flip completions\n", ++ __FUNCTION__, sna->mode.flip_active)); + ++ sna->mode.dirty = true; + while (sna->mode.flip_active && sna_mode_wakeup(sna)) + ; ++ sna->mode.dirty = false; + +- sna->mode.shadow_damage = damage; ++ DBG(("%s: now %d outstanding flip completions (enabled? %d)\n", ++ __FUNCTION__, ++ sna->mode.flip_active, ++ sna->mode.shadow_enabled)); ++ if (sna->mode.flip_active || !sna->mode.shadow_enabled) ++ return; + } + +- if (sna->mode.flip_active) +- return; +- +- if (wedged(sna) || !sna_pixmap_move_to_gpu(sna->front, MOVE_READ | MOVE_ASYNC_HINT | __MOVE_SCANOUT)) { ++ if (!move_crtc_to_gpu(sna)) { + DBG(("%s: forcing scanout update using the CPU\n", __FUNCTION__)); + if (!sna_pixmap_move_to_cpu(sna->front, MOVE_READ)) + return; +@@ -7047,90 +8993,14 @@ void sna_mode_redisplay(struct sna *sna) + damage.data = NULL; + RegionIntersect(&damage, &damage, region); + if (!box_empty(&damage.extents)) { +- struct kgem_bo *bo = NULL; +- + DBG(("%s: fallback intersects pipe=%d [(%d, %d), (%d, %d)]\n", +- __FUNCTION__, sna_crtc->pipe, ++ __FUNCTION__, __sna_crtc_pipe(sna_crtc), + damage.extents.x1, damage.extents.y1, + damage.extents.x2, damage.extents.y2)); + +- if (sna->flags & SNA_TEAR_FREE) { +- RegionRec new_damage; +- +- RegionNull(&new_damage); +- RegionCopy(&new_damage, &damage); +- +- bo = sna_crtc->client_bo; +- if (bo == NULL) { +- damage.extents = crtc->bounds; +- damage.data = NULL; +- bo = kgem_create_2d(&sna->kgem, +- crtc->mode.HDisplay, +- crtc->mode.VDisplay, +- crtc->scrn->bitsPerPixel, +- sna_crtc->bo->tiling, +- CREATE_SCANOUT); +- } else +- RegionUnion(&damage, &damage, &sna_crtc->client_damage); +- +- DBG(("%s: TearFree fallback, shadow handle=%d, crtc handle=%d\n", __FUNCTION__, bo->handle, sna_crtc->bo->handle)); +- +- sna_crtc->client_damage = new_damage; +- } +- +- if (bo == NULL) +- bo = sna_crtc->bo; +- sna_crtc_redisplay__fallback(crtc, &damage, bo); +- +- if (bo != sna_crtc->bo) { +- struct drm_mode_crtc_page_flip arg; +- +- arg.crtc_id = sna_crtc->id; +- arg.fb_id = get_fb(sna, bo, +- crtc->mode.HDisplay, +- crtc->mode.VDisplay); +- +- arg.user_data = (uintptr_t)sna_crtc; +- arg.flags = DRM_MODE_PAGE_FLIP_EVENT; +- arg.reserved = 0; +- +- if (drmIoctl(sna->kgem.fd, DRM_IOCTL_MODE_PAGE_FLIP, &arg)) { +- if (sna_crtc_flip(sna, sna_crtc, bo, 0, 0)) { +- assert(sna_crtc->bo->active_scanout); +- assert(sna_crtc->bo->refcnt >= sna_crtc->bo->active_scanout); +- sna_crtc->bo->active_scanout--; +- kgem_bo_destroy(&sna->kgem, sna_crtc->bo); +- +- sna_crtc->bo = bo; +- sna_crtc->bo->active_scanout++; +- sna_crtc->client_bo = NULL; +- } else { +- DBG(("%s: flip [fb=%d] on crtc %d [%d, pipe=%d] failed - %d\n", +- __FUNCTION__, arg.fb_id, i, sna_crtc->id, sna_crtc->pipe, errno)); +- xf86DrvMsg(sna->scrn->scrnIndex, X_ERROR, +- "Page flipping failed, disabling TearFree\n"); +- sna->flags &= ~SNA_TEAR_FREE; +- +- damage.extents = crtc->bounds; +- damage.data = NULL; +- sna_crtc_redisplay__fallback(crtc, &damage, sna_crtc->bo); +- +- kgem_bo_destroy(&sna->kgem, bo); +- sna_crtc->client_bo = NULL; +- } +- } else { +- sna->mode.flip_active++; +- +- assert(sna_crtc->flip_bo == NULL); +- sna_crtc->flip_handler = shadow_flip_handler; +- sna_crtc->flip_data = sna; +- sna_crtc->flip_bo = bo; +- sna_crtc->flip_bo->active_scanout++; +- sna_crtc->flip_serial = sna_crtc->mode_serial; +- +- sna_crtc->client_bo = kgem_bo_reference(sna_crtc->bo); +- } +- } ++ sna_crtc_redisplay__fallback(crtc, ++ &damage, ++ sna_crtc->bo); + } + RegionUninit(&damage); + +@@ -7171,6 +9041,7 @@ void sna_mode_redisplay(struct sna *sna) + xf86CrtcPtr crtc = config->crtc[i]; + struct sna_crtc *sna_crtc = to_sna_crtc(crtc); + RegionRec damage; ++ int sigio; + + assert(sna_crtc != NULL); + DBG(("%s: crtc[%d] transformed? %d\n", +@@ -7192,30 +9063,38 @@ void sna_mode_redisplay(struct sna *sna) + region_num_rects(&damage), + damage.extents.x1, damage.extents.y1, + damage.extents.x2, damage.extents.y2)); ++ sigio = sigio_block(); + if (!box_empty(&damage.extents)) { + if (sna->flags & SNA_TEAR_FREE) { ++ RegionRec new_damage; + struct drm_mode_crtc_page_flip arg; + struct kgem_bo *bo; + +- RegionUninit(&damage); +- damage.extents = crtc->bounds; +- damage.data = NULL; ++ RegionNull(&new_damage); ++ RegionCopy(&new_damage, &damage); + +- bo = sna_crtc->client_bo; +- if (bo == NULL) ++ bo = sna_crtc->cache_bo; ++ if (bo == NULL) { ++ damage.extents = crtc->bounds; ++ damage.data = NULL; + bo = kgem_create_2d(&sna->kgem, + crtc->mode.HDisplay, + crtc->mode.VDisplay, + crtc->scrn->bitsPerPixel, + sna_crtc->bo->tiling, + CREATE_SCANOUT); +- if (bo == NULL) +- goto disable1; ++ if (bo == NULL) ++ continue; ++ } else ++ RegionUnion(&damage, &damage, &sna_crtc->crtc_damage); ++ sna_crtc->crtc_damage = new_damage; + + sna_crtc_redisplay(crtc, &damage, bo); + kgem_bo_submit(&sna->kgem, bo); ++ __kgem_bo_clear_dirty(bo); + +- arg.crtc_id = sna_crtc->id; ++ assert_crtc_fb(sna, sna_crtc); ++ arg.crtc_id = __sna_crtc_id(sna_crtc); + arg.fb_id = get_fb(sna, bo, + crtc->mode.HDisplay, + crtc->mode.VDisplay); +@@ -7228,6 +9107,9 @@ void sna_mode_redisplay(struct sna *sna) + + if (drmIoctl(sna->kgem.fd, DRM_IOCTL_MODE_PAGE_FLIP, &arg)) { + if (sna_crtc_flip(sna, sna_crtc, bo, 0, 0)) { ++ DBG(("%s: removing handle=%d [active_scanout=%d] from scanout, installing handle=%d [active_scanout=%d]\n", ++ __FUNCTION__, sna_crtc->bo->handle, sna_crtc->bo->active_scanout - 1, ++ bo->handle, bo->active_scanout)); + assert(sna_crtc->bo->active_scanout); + assert(sna_crtc->bo->refcnt >= sna_crtc->bo->active_scanout); + sna_crtc->bo->active_scanout--; +@@ -7235,13 +9117,12 @@ void sna_mode_redisplay(struct sna *sna) + + sna_crtc->bo = kgem_bo_reference(bo); + sna_crtc->bo->active_scanout++; +- sna_crtc->client_bo = kgem_bo_reference(bo); + } else { + BoxRec box; + DrawableRec tmp; + + DBG(("%s: flip [fb=%d] on crtc %d [%d, pipe=%d] failed - %d\n", +- __FUNCTION__, arg.fb_id, i, sna_crtc->id, sna_crtc->pipe, errno)); ++ __FUNCTION__, arg.fb_id, i, __sna_crtc_id(sna_crtc), __sna_crtc_pipe(sna_crtc), errno)); + xf86DrvMsg(sna->scrn->scrnIndex, X_ERROR, + "Page flipping failed, disabling TearFree\n"); + sna->flags &= ~SNA_TEAR_FREE; +@@ -7260,13 +9141,13 @@ disable1: + &box, 1, COPY_LAST)) { + xf86DrvMsg(crtc->scrn->scrnIndex, X_ERROR, + "%s: page flipping failed, disabling CRTC:%d (pipe=%d)\n", +- __FUNCTION__, sna_crtc->id, sna_crtc->pipe); +- sna_crtc_disable(crtc); ++ __FUNCTION__, __sna_crtc_id(sna_crtc), __sna_crtc_pipe(sna_crtc)); ++ sna_crtc_disable(crtc, false); + } +- +- kgem_bo_destroy(&sna->kgem, bo); +- sna_crtc->client_bo = NULL; + } ++ ++ kgem_bo_destroy(&sna->kgem, bo); ++ sna_crtc->cache_bo = NULL; + continue; + } + sna->mode.flip_active++; +@@ -7279,13 +9160,20 @@ disable1: + sna_crtc->flip_serial = sna_crtc->mode_serial; + sna_crtc->flip_pending = true; + +- sna_crtc->client_bo = kgem_bo_reference(sna_crtc->bo); ++ if (sna_crtc->bo != sna->mode.shadow) { ++ assert_scanout(&sna->kgem, sna_crtc->bo, ++ crtc->mode.HDisplay, crtc->mode.VDisplay); ++ sna_crtc->cache_bo = kgem_bo_reference(sna_crtc->bo); ++ } ++ DBG(("%s: recording flip on CRTC:%d handle=%d, active_scanout=%d, serial=%d\n", ++ __FUNCTION__, __sna_crtc_id(sna_crtc), sna_crtc->flip_bo->handle, sna_crtc->flip_bo->active_scanout, sna_crtc->flip_serial)); + } else { + sna_crtc_redisplay(crtc, &damage, sna_crtc->bo); + kgem_scanout_flush(&sna->kgem, sna_crtc->bo); + } + } + RegionUninit(&damage); ++ sigio_unblock(sigio); + + if (sna_crtc->slave_damage) + DamageEmpty(sna_crtc->slave_damage); +@@ -7296,6 +9184,7 @@ disable1: + struct kgem_bo *old = sna->mode.shadow; + struct drm_mode_crtc_page_flip arg; + uint32_t fb = 0; ++ int sigio; + + DBG(("%s: flipping TearFree outputs, current scanout handle=%d [active?=%d], new handle=%d [active=%d]\n", + __FUNCTION__, old->handle, old->active_scanout, new->handle, new->active_scanout)); +@@ -7307,7 +9196,9 @@ disable1: + arg.reserved = 0; + + kgem_bo_submit(&sna->kgem, new); ++ __kgem_bo_clear_dirty(new); + ++ sigio = sigio_block(); + for (i = 0; i < sna->mode.num_real_crtc; i++) { + struct sna_crtc *crtc = config->crtc[i]->driver_private; + struct kgem_bo *flip_bo; +@@ -7315,20 +9206,20 @@ disable1: + + assert(crtc != NULL); + DBG(("%s: crtc %d [%d, pipe=%d] active? %d, transformed? %d\n", +- __FUNCTION__, i, crtc->id, crtc->pipe, crtc->bo ? crtc->bo->handle : 0, crtc->transform)); ++ __FUNCTION__, i, __sna_crtc_id(crtc), __sna_crtc_pipe(crtc), crtc->bo ? crtc->bo->handle : 0, crtc->transform)); + if (crtc->bo == NULL || crtc->transform) + continue; + + assert(config->crtc[i]->enabled); +- assert(crtc->dpms_mode <= DPMSModeOn); + assert(crtc->flip_bo == NULL); ++ assert_crtc_fb(sna, crtc); + +- arg.crtc_id = crtc->id; ++ arg.crtc_id = __sna_crtc_id(crtc); + arg.user_data = (uintptr_t)crtc; + + if (crtc->client_bo) { + DBG(("%s: apply shadow override bo for CRTC:%d on pipe=%d, handle=%d\n", +- __FUNCTION__, crtc->id, crtc->pipe, crtc->client_bo->handle)); ++ __FUNCTION__, __sna_crtc_id(crtc), __sna_crtc_pipe(crtc), crtc->client_bo->handle)); + arg.fb_id = get_fb(sna, crtc->client_bo, + crtc->base->mode.HDisplay, + crtc->base->mode.VDisplay); +@@ -7356,6 +9247,7 @@ fixup_shadow: + } + } + ++ sigio_unblock(sigio); + return; + } + +@@ -7365,8 +9257,12 @@ fixup_shadow: + y = crtc->base->y; + } + +- if (crtc->bo == flip_bo) ++ if (crtc->bo == flip_bo) { ++ assert(crtc->bo->refcnt >= crtc->bo->active_scanout); ++ DBG(("%s: flip handle=%d is already on the CRTC\n", ++ __FUNCTION__, flip_bo->handle)); + continue; ++ } + + if (flip_bo->pitch != crtc->bo->pitch || (y << 16 | x) != crtc->offset) { + DBG(("%s: changing pitch (new %d =?= old %d) or offset (new %x =?= old %x)\n", +@@ -7375,6 +9271,9 @@ fixup_shadow: + y << 16 | x, crtc->offset)); + fixup_flip: + if (sna_crtc_flip(sna, crtc, flip_bo, x, y)) { ++ DBG(("%s: removing handle=%d [active_scanout=%d] from scanout, installing handle=%d [active_scanout=%d]\n", ++ __FUNCTION__, crtc->bo->handle, crtc->bo->active_scanout-1, ++ flip_bo->handle, flip_bo->active_scanout)); + assert(flip_bo != crtc->bo); + assert(crtc->bo->active_scanout); + assert(crtc->bo->refcnt >= crtc->bo->active_scanout); +@@ -7389,9 +9288,11 @@ fixup_flip: + crtc->bo = kgem_bo_reference(flip_bo); + crtc->bo->active_scanout++; + } else { +- xf86DrvMsg(sna->scrn->scrnIndex, X_ERROR, +- "Failed to prepare CRTC for page flipping, disabling TearFree\n"); +- sna->flags &= ~SNA_TEAR_FREE; ++ if (sna->flags & SNA_TEAR_FREE) { ++ xf86DrvMsg(sna->scrn->scrnIndex, X_ERROR, ++ "Failed to prepare CRTC for page flipping, disabling TearFree\n"); ++ sna->flags &= ~SNA_TEAR_FREE; ++ } + + if (sna->mode.flip_active == 0) { + DBG(("%s: abandoning flip attempt\n", __FUNCTION__)); +@@ -7400,15 +9301,15 @@ fixup_flip: + + xf86DrvMsg(sna->scrn->scrnIndex, X_ERROR, + "%s: page flipping failed, disabling CRTC:%d (pipe=%d)\n", +- __FUNCTION__, crtc->id, crtc->pipe); +- sna_crtc_disable(crtc->base); ++ __FUNCTION__, __sna_crtc_id(crtc), __sna_crtc_pipe(crtc)); ++ sna_crtc_disable(crtc->base, false); + } + continue; + } + + if (drmIoctl(sna->kgem.fd, DRM_IOCTL_MODE_PAGE_FLIP, &arg)) { + ERR(("%s: flip [fb=%d] on crtc %d [%d, pipe=%d] failed - %d\n", +- __FUNCTION__, arg.fb_id, i, crtc->id, crtc->pipe, errno)); ++ __FUNCTION__, arg.fb_id, i, __sna_crtc_id(crtc), __sna_crtc_pipe(crtc), errno)); + goto fixup_flip; + } + sna->mode.flip_active++; +@@ -7421,6 +9322,9 @@ fixup_flip: + crtc->flip_serial = crtc->mode_serial; + crtc->flip_pending = true; + ++ DBG(("%s: recording flip on CRTC:%d handle=%d, active_scanout=%d, serial=%d\n", ++ __FUNCTION__, __sna_crtc_id(crtc), crtc->flip_bo->handle, crtc->flip_bo->active_scanout, crtc->flip_serial)); ++ + { + struct drm_i915_gem_busy busy = { flip_bo->handle }; + if (drmIoctl(sna->kgem.fd, DRM_IOCTL_I915_GEM_BUSY, &busy) == 0) { +@@ -7435,6 +9339,7 @@ fixup_flip: + } + } + } ++ sigio_unblock(sigio); + + DBG(("%s: flipped %d outputs, shadow active? %d\n", + __FUNCTION__, +@@ -7486,7 +9391,9 @@ again: + struct drm_event *e = (struct drm_event *)&buffer[i]; + switch (e->type) { + case DRM_EVENT_VBLANK: +- if (((uintptr_t)((struct drm_event_vblank *)e)->user_data) & 2) ++ if (sna->mode.shadow_wait) ++ defer_event(sna, e); ++ else if (((uintptr_t)((struct drm_event_vblank *)e)->user_data) & 2) + sna_present_vblank_handler((struct drm_event_vblank *)e); + else + sna_dri2_vblank_handler((struct drm_event_vblank *)e); +@@ -7495,13 +9402,19 @@ again: + { + struct drm_event_vblank *vbl = (struct drm_event_vblank *)e; + struct sna_crtc *crtc = (void *)(uintptr_t)vbl->user_data; ++ uint64_t msc; + + /* Beware Zaphod! */ + sna = to_sna(crtc->base->scrn); + +- crtc->swap.tv_sec = vbl->tv_sec; +- crtc->swap.tv_usec = vbl->tv_usec; +- crtc->swap.msc = msc64(crtc, vbl->sequence); ++ if (msc64(crtc, vbl->sequence, &msc)) { ++ DBG(("%s: recording last swap on pipe=%d, frame %d [%08llx], time %d.%06d\n", ++ __FUNCTION__, __sna_crtc_pipe(crtc), vbl->sequence, (long long)msc, vbl->tv_sec, vbl->tv_usec)); ++ crtc->swap.tv_sec = vbl->tv_sec; ++ crtc->swap.tv_usec = vbl->tv_usec; ++ crtc->swap.msc = msc; ++ } ++ assert(crtc->flip_pending); + crtc->flip_pending = false; + + assert(crtc->flip_bo); +@@ -7509,10 +9422,12 @@ again: + assert(crtc->flip_bo->refcnt >= crtc->flip_bo->active_scanout); + + if (crtc->flip_serial == crtc->mode_serial) { +- DBG(("%s: removing handle=%d from scanout, installing handle=%d\n", +- __FUNCTION__, crtc->bo->handle, crtc->flip_bo->handle)); ++ DBG(("%s: removing handle=%d [active_scanout=%d] from scanout, installing handle=%d [active_scanout=%d]\n", ++ __FUNCTION__, crtc->bo->handle, crtc->bo->active_scanout - 1, ++ crtc->flip_bo->handle, crtc->flip_bo->active_scanout)); + assert(crtc->bo->active_scanout); + assert(crtc->bo->refcnt >= crtc->bo->active_scanout); ++ + crtc->bo->active_scanout--; + kgem_bo_destroy(&sna->kgem, crtc->bo); + +@@ -7523,6 +9438,8 @@ again: + + crtc->bo = crtc->flip_bo; + crtc->flip_bo = NULL; ++ ++ assert_crtc_fb(sna, crtc); + } else { + crtc->flip_bo->active_scanout--; + kgem_bo_destroy(&sna->kgem, crtc->flip_bo); +@@ -7531,8 +9448,10 @@ again: + + DBG(("%s: flip complete, pending? %d\n", __FUNCTION__, sna->mode.flip_active)); + assert(sna->mode.flip_active); +- if (--sna->mode.flip_active == 0) ++ if (--sna->mode.flip_active == 0) { ++ assert(crtc->flip_handler); + crtc->flip_handler(vbl, crtc->flip_data); ++ } + } + break; + default: +diff --git a/src/sna/sna_display_fake.c b/src/sna/sna_display_fake.c +index 4d74c38d..fa26bda1 100644 +--- a/src/sna/sna_display_fake.c ++++ b/src/sna/sna_display_fake.c +@@ -96,12 +96,6 @@ sna_crtc_set_mode_major(xf86CrtcPtr crtc, DisplayModePtr mode, + } + + static void +-sna_crtc_gamma_set(xf86CrtcPtr crtc, +- CARD16 *red, CARD16 *green, CARD16 *blue, int size) +-{ +-} +- +-static void + sna_crtc_destroy(xf86CrtcPtr crtc) + { + } +@@ -109,7 +103,6 @@ sna_crtc_destroy(xf86CrtcPtr crtc) + static const xf86CrtcFuncsRec sna_crtc_funcs = { + .dpms = sna_crtc_dpms, + .set_mode_major = sna_crtc_set_mode_major, +- .gamma_set = sna_crtc_gamma_set, + .destroy = sna_crtc_destroy, + }; + +@@ -192,7 +185,7 @@ static const xf86OutputFuncsRec sna_output_funcs = { + static Bool + sna_mode_resize(ScrnInfoPtr scrn, int width, int height) + { +- ScreenPtr screen = scrn->pScreen; ++ ScreenPtr screen = xf86ScrnToScreen(scrn); + PixmapPtr new_front; + + DBG(("%s (%d, %d) -> (%d, %d)\n", __FUNCTION__, +@@ -262,6 +255,7 @@ static bool add_fake_output(struct sna *sna, bool late) + output->mm_height = 0; + output->interlaceAllowed = FALSE; + output->subpixel_order = SubPixelNone; ++ output->status = XF86OutputStatusDisconnected; + + output->possible_crtcs = ~((1 << sna->mode.num_real_crtc) - 1); + output->possible_clones = ~((1 << sna->mode.num_real_output) - 1); +@@ -297,6 +291,8 @@ static bool add_fake_output(struct sna *sna, bool late) + + RRCrtcSetRotations(crtc->randr_crtc, + RR_Rotate_All | RR_Reflect_All); ++ if (!RRCrtcGammaSetSize(crtc->randr_crtc, 256)) ++ goto err; + } + + sna->mode.num_fake++; +@@ -312,13 +308,16 @@ err: + continue; + + xf86OutputDestroy(output); ++ i--; + } + + for (i = 0; i < xf86_config->num_crtc; i++) { + crtc = xf86_config->crtc[i]; + if (crtc->driver_private) + continue; ++ + xf86CrtcDestroy(crtc); ++ i--; + } + sna->mode.num_fake = -1; + return false; +diff --git a/src/sna/sna_dri2.c b/src/sna/sna_dri2.c +index e5c4d53e..d89525cc 100644 +--- a/src/sna/sna_dri2.c ++++ b/src/sna/sna_dri2.c +@@ -82,12 +82,23 @@ get_private(void *buffer) + return (struct sna_dri2_private *)((DRI2Buffer2Ptr)buffer+1); + } + ++pure static inline DRI2BufferPtr sna_pixmap_get_buffer(PixmapPtr pixmap) ++{ ++ assert(pixmap->refcnt); ++ return ((void **)__get_private(pixmap, sna_pixmap_key))[2]; ++} ++ ++static inline void sna_pixmap_set_buffer(PixmapPtr pixmap, void *ptr) ++{ ++ assert(pixmap->refcnt); ++ ((void **)__get_private(pixmap, sna_pixmap_key))[2] = ptr; ++} ++ + #if DRI2INFOREC_VERSION >= 4 + enum event_type { + WAITMSC = 0, + SWAP, +- SWAP_WAIT, +- SWAP_THROTTLE, ++ SWAP_COMPLETE, + FLIP, + FLIP_THROTTLE, + FLIP_COMPLETE, +@@ -98,6 +109,7 @@ struct dri_bo { + struct list link; + struct kgem_bo *bo; + uint32_t name; ++ unsigned flags; + }; + + struct sna_dri2_event { +@@ -108,6 +120,8 @@ struct sna_dri2_event { + xf86CrtcPtr crtc; + int pipe; + bool queued; ++ bool sync; ++ bool chained; + + /* for swaps & flips only */ + DRI2SwapEventPtr event_complete; +@@ -116,35 +130,146 @@ struct sna_dri2_event { + DRI2BufferPtr back; + struct kgem_bo *bo; + ++ struct copy { ++ struct kgem_bo *bo; ++ unsigned flags; ++ uint32_t name; ++ uint32_t size; ++ } pending; ++ + struct sna_dri2_event *chain; + +- struct list cache; + struct list link; + +- int mode; ++ int flip_continue; ++ int keepalive; ++ int signal; + }; + ++#if DRI2INFOREC_VERSION < 10 ++#undef USE_ASYNC_SWAP ++#endif ++ ++#if USE_ASYNC_SWAP ++#define KEEPALIVE 8 /* wait ~100ms before discarding swap caches */ ++#define APPLY_DAMAGE 0 ++#else ++#define USE_ASYNC_SWAP 0 ++#define KEEPALIVE 1 ++#define APPLY_DAMAGE 1 ++#endif ++ + static void sna_dri2_flip_event(struct sna_dri2_event *flip); ++inline static DRI2BufferPtr dri2_window_get_front(WindowPtr win); ++ ++static struct kgem_bo * ++__sna_dri2_copy_region(struct sna *sna, DrawablePtr draw, RegionPtr region, ++ DRI2BufferPtr src, DRI2BufferPtr dst, ++ unsigned flags); ++ ++inline static void ++__sna_dri2_copy_event(struct sna_dri2_event *info, unsigned flags) ++{ ++ DBG(("%s: flags = %x\n", __FUNCTION__, flags)); ++ assert(info->front != info->back); ++ info->bo = __sna_dri2_copy_region(info->sna, info->draw, NULL, ++ info->back, info->front, ++ flags); ++ info->front->flags = info->back->flags; ++} ++ ++static int front_pitch(DrawablePtr draw) ++{ ++ DRI2BufferPtr buffer; ++ ++ buffer = NULL; ++ if (draw->type != DRAWABLE_PIXMAP) ++ buffer = dri2_window_get_front((WindowPtr)draw); ++ if (buffer == NULL) ++ buffer = sna_pixmap_get_buffer(get_drawable_pixmap(draw)); ++ ++ return buffer ? buffer->pitch : 0; ++} ++ ++struct dri2_window { ++ DRI2BufferPtr front; ++ struct sna_dri2_event *chain; ++ xf86CrtcPtr crtc; ++ int64_t msc_delta; ++ struct list cache; ++ uint32_t cache_size; ++ int scanout; ++}; ++ ++static struct dri2_window *dri2_window(WindowPtr win) ++{ ++ assert(win->drawable.type != DRAWABLE_PIXMAP); ++ return ((void **)__get_private(win, sna_window_key))[1]; ++} ++ ++static bool use_scanout(struct sna *sna, ++ DrawablePtr draw, ++ struct dri2_window *priv) ++{ ++ if (priv->front) ++ return true; ++ ++ if (priv->scanout < 0) ++ priv->scanout = ++ (sna->flags & (SNA_LINEAR_FB | SNA_NO_WAIT | SNA_NO_FLIP)) == 0 && ++ draw->width == sna->front->drawable.width && ++ draw->height == sna->front->drawable.height && ++ draw->bitsPerPixel == sna->front->drawable.bitsPerPixel; ++ ++ return priv->scanout; ++} + + static void + sna_dri2_get_back(struct sna *sna, + DrawablePtr draw, +- DRI2BufferPtr back, +- struct sna_dri2_event *info) ++ DRI2BufferPtr back) + { ++ struct dri2_window *priv = dri2_window((WindowPtr)draw); ++ uint32_t size; + struct kgem_bo *bo; ++ struct dri_bo *c; + uint32_t name; ++ int flags; + bool reuse; + +- DBG(("%s: draw size=%dx%d, buffer size=%dx%d\n", ++ DBG(("%s: draw size=%dx%d, back buffer handle=%d size=%dx%d, is-scanout? %d, active?=%d, pitch=%d, front pitch=%d\n", + __FUNCTION__, draw->width, draw->height, +- get_private(back)->size & 0xffff, get_private(back)->size >> 16)); +- reuse = (draw->height << 16 | draw->width) == get_private(back)->size; ++ get_private(back)->bo->handle, ++ get_private(back)->size & 0xffff, get_private(back)->size >> 16, ++ get_private(back)->bo->scanout, ++ get_private(back)->bo->active_scanout, ++ back->pitch, front_pitch(draw))); ++ assert(priv); ++ ++ size = draw->height << 16 | draw->width; ++ if (size != priv->cache_size) { ++ while (!list_is_empty(&priv->cache)) { ++ c = list_first_entry(&priv->cache, struct dri_bo, link); ++ list_del(&c->link); ++ ++ DBG(("%s: releasing cached handle=%d\n", __FUNCTION__, c->bo ? c->bo->handle : 0)); ++ assert(c->bo); ++ kgem_bo_destroy(&sna->kgem, c->bo); ++ ++ free(c); ++ } ++ priv->cache_size = size; ++ } ++ ++ reuse = size == get_private(back)->size; ++ if (reuse) ++ reuse = get_private(back)->bo->scanout == use_scanout(sna, draw, priv); ++ DBG(("%s: reuse backbuffer? %d\n", __FUNCTION__, reuse)); + if (reuse) { + bo = get_private(back)->bo; + assert(bo->refcnt); +- DBG(("%s: back buffer handle=%d, scanout?=%d, refcnt=%d\n", +- __FUNCTION__, bo->handle, bo->active_scanout, get_private(back)->refcnt)); ++ DBG(("%s: back buffer handle=%d, active?=%d, refcnt=%d\n", ++ __FUNCTION__, bo->handle, bo->active_scanout, get_private(back)->refcnt)); + if (bo->active_scanout == 0) { + DBG(("%s: reuse unattached back\n", __FUNCTION__)); + get_private(back)->stale = false; +@@ -153,24 +278,37 @@ sna_dri2_get_back(struct sna *sna, + } + + bo = NULL; +- if (info) { +- struct dri_bo *c; +- list_for_each_entry(c, &info->cache, link) { +- if (c->bo && c->bo->scanout == 0) { +- bo = c->bo; +- name = c->name; +- DBG(("%s: reuse cache handle=%d\n", __FUNCTION__, bo->handle)); +- list_move_tail(&c->link, &info->cache); +- c->bo = NULL; ++ list_for_each_entry(c, &priv->cache, link) { ++ DBG(("%s: cache: handle=%d, active=%d\n", ++ __FUNCTION__, c->bo ? c->bo->handle : 0, c->bo ? c->bo->active_scanout : -1)); ++ assert(c->bo); ++ if (c->bo->active_scanout == 0) { ++ _list_del(&c->link); ++ if (c->bo == NULL) { ++ free(c); ++ goto out; + } ++ bo = c->bo; ++ name = c->name; ++ flags = c->flags; ++ DBG(("%s: reuse cache handle=%d, name=%d, flags=%d\n", __FUNCTION__, bo->handle, name, flags)); ++ c->bo = NULL; ++ break; + } + } + if (bo == NULL) { + DBG(("%s: allocating new backbuffer\n", __FUNCTION__)); ++ flags = CREATE_EXACT; ++ ++ if (use_scanout(sna, draw, priv)) { ++ DBG(("%s: requesting scanout compatible back\n", __FUNCTION__)); ++ flags |= CREATE_SCANOUT; ++ } ++ + bo = kgem_create_2d(&sna->kgem, + draw->width, draw->height, draw->bitsPerPixel, + get_private(back)->bo->tiling, +- get_private(back)->bo->scanout ? CREATE_SCANOUT : 0); ++ flags); + if (bo == NULL) + return; + +@@ -179,30 +317,42 @@ sna_dri2_get_back(struct sna *sna, + kgem_bo_destroy(&sna->kgem, bo); + return; + } ++ ++ flags = 0; ++ if (USE_ASYNC_SWAP && back->flags) { ++ BoxRec box; ++ ++ box.x1 = 0; ++ box.y1 = 0; ++ box.x2 = draw->width; ++ box.y2 = draw->height; ++ ++ DBG(("%s: filling new buffer with old back\n", __FUNCTION__)); ++ if (sna->render.copy_boxes(sna, GXcopy, ++ draw, get_private(back)->bo, 0, 0, ++ draw, bo, 0, 0, ++ &box, 1, COPY_LAST | COPY_DRI)) ++ flags = back->flags; ++ } + } + assert(bo->active_scanout == 0); + +- if (info && reuse) { +- bool found = false; +- struct dri_bo *c; +- +- list_for_each_entry_reverse(c, &info->cache, link) { +- if (c->bo == NULL) { +- found = true; +- _list_del(&c->link); +- break; +- } +- } +- if (!found) ++ if (reuse && get_private(back)->bo->refcnt == 1 + get_private(back)->bo->active_scanout) { ++ if (&c->link == &priv->cache) + c = malloc(sizeof(*c)); + if (c != NULL) { + c->bo = ref(get_private(back)->bo); + c->name = back->name; +- list_add(&c->link, &info->cache); +- DBG(("%s: cacheing handle=%d (name=%d)\n", __FUNCTION__, c->bo->handle, c->name)); ++ c->flags = back->flags; ++ list_add(&c->link, &priv->cache); ++ DBG(("%s: caching handle=%d (name=%d, flags=%d, active_scanout=%d)\n", __FUNCTION__, c->bo->handle, c->name, c->flags, c->bo->active_scanout)); + } ++ } else { ++ if (&c->link != &priv->cache) ++ free(c); + } + ++ assert(bo->active_scanout == 0); + assert(bo != get_private(back)->bo); + kgem_bo_destroy(&sna->kgem, get_private(back)->bo); + +@@ -210,21 +360,13 @@ sna_dri2_get_back(struct sna *sna, + get_private(back)->size = draw->height << 16 | draw->width; + back->pitch = bo->pitch; + back->name = name; ++ back->flags = flags; + +- get_private(back)->stale = false; +-} +- +-struct dri2_window { +- DRI2BufferPtr front; +- struct sna_dri2_event *chain; +- xf86CrtcPtr crtc; +- int64_t msc_delta; +-}; ++ assert(back->pitch); ++ assert(back->name); + +-static struct dri2_window *dri2_window(WindowPtr win) +-{ +- assert(win->drawable.type != DRAWABLE_PIXMAP); +- return ((void **)__get_private(win, sna_window_key))[1]; ++out: ++ get_private(back)->stale = false; + } + + static struct sna_dri2_event * +@@ -232,21 +374,25 @@ dri2_chain(DrawablePtr d) + { + struct dri2_window *priv = dri2_window((WindowPtr)d); + assert(priv != NULL); ++ assert(priv->chain == NULL || priv->chain->chained); + return priv->chain; + } + inline static DRI2BufferPtr dri2_window_get_front(WindowPtr win) + { + struct dri2_window *priv = dri2_window(win); ++ assert(priv->front == NULL || get_private(priv->front)->bo->active_scanout); + return priv ? priv->front : NULL; + } + #else + inline static void *dri2_window_get_front(WindowPtr win) { return NULL; } ++#define APPLY_DAMAGE 1 + #endif + + #if DRI2INFOREC_VERSION < 6 + + #define xorg_can_triple_buffer() 0 + #define swap_limit(d, l) false ++#define mark_stale(b) + + #else + +@@ -273,6 +419,8 @@ mark_stale(DRI2BufferPtr back) + * stale frame. (This is mostly useful for tracking down + * driver bugs!) + */ ++ DBG(("%s(handle=%d) => %d\n", __FUNCTION__, ++ get_private(back)->bo->handle, xorg_can_triple_buffer())); + get_private(back)->stale = xorg_can_triple_buffer(); + } + +@@ -286,21 +434,29 @@ sna_dri2_swap_limit_validate(DrawablePtr draw, int swap_limit) + static void + sna_dri2_reuse_buffer(DrawablePtr draw, DRI2BufferPtr buffer) + { ++ struct sna *sna = to_sna_from_drawable(draw); ++ + DBG(("%s: reusing buffer pixmap=%ld, attachment=%d, handle=%d, name=%d\n", + __FUNCTION__, get_drawable_pixmap(draw)->drawable.serialNumber, + buffer->attachment, get_private(buffer)->bo->handle, buffer->name)); + assert(get_private(buffer)->refcnt); +- assert(get_private(buffer)->bo->refcnt > get_private(buffer)->bo->active_scanout); ++ assert(get_private(buffer)->bo->refcnt >= get_private(buffer)->bo->active_scanout); ++ assert(kgem_bo_flink(&sna->kgem, get_private(buffer)->bo) == buffer->name); + + if (buffer->attachment == DRI2BufferBackLeft && + draw->type != DRAWABLE_PIXMAP) { +- DBG(("%s: replacing back buffer\n", __FUNCTION__)); +- sna_dri2_get_back(to_sna_from_drawable(draw), draw, buffer, dri2_chain(draw)); ++ DBG(("%s: replacing back buffer on window %ld\n", __FUNCTION__, draw->id)); ++ sna_dri2_get_back(sna, draw, buffer); + +- assert(kgem_bo_flink(&to_sna_from_drawable(draw)->kgem, get_private(buffer)->bo) == buffer->name); + assert(get_private(buffer)->bo->refcnt); + assert(get_private(buffer)->bo->active_scanout == 0); ++ assert(kgem_bo_flink(&sna->kgem, get_private(buffer)->bo) == buffer->name); ++ DBG(("%s: reusing back buffer handle=%d, name=%d, pitch=%d, age=%d\n", ++ __FUNCTION__, get_private(buffer)->bo->handle, ++ buffer->name, buffer->pitch, buffer->flags)); + } ++ ++ kgem_bo_submit(&sna->kgem, get_private(buffer)->bo); + } + + static bool swap_limit(DrawablePtr draw, int limit) +@@ -314,11 +470,6 @@ static bool swap_limit(DrawablePtr draw, int limit) + } + #endif + +-#if DRI2INFOREC_VERSION < 10 +-#undef USE_ASYNC_SWAP +-#define USE_ASYNC_SWAP 0 +-#endif +- + #define COLOR_PREFER_TILING_Y 0 + + /* Prefer to enable TILING_Y if this buffer will never be a +@@ -328,6 +479,9 @@ static uint32_t color_tiling(struct sna *sna, DrawablePtr draw) + { + uint32_t tiling; + ++ if (!sna->kgem.can_fence) ++ return I915_TILING_NONE; ++ + if (COLOR_PREFER_TILING_Y && + (draw->width != sna->front->drawable.width || + draw->height != sna->front->drawable.height)) +@@ -355,7 +509,6 @@ static struct kgem_bo *sna_pixmap_set_dri(struct sna *sna, + PixmapPtr pixmap) + { + struct sna_pixmap *priv; +- int tiling; + + DBG(("%s: attaching DRI client to pixmap=%ld\n", + __FUNCTION__, pixmap->drawable.serialNumber)); +@@ -373,31 +526,29 @@ static struct kgem_bo *sna_pixmap_set_dri(struct sna *sna, + return NULL; + } + +- assert(priv->flush == false); ++ assert(priv->flush == false || priv->pinned & PIN_DRI3); ++ assert(priv->gpu_bo->flush == false || priv->pinned & PIN_DRI3); + assert(priv->cpu_damage == NULL); + assert(priv->gpu_bo); + assert(priv->gpu_bo->proxy == NULL); +- assert(priv->gpu_bo->flush == false); +- +- tiling = color_tiling(sna, &pixmap->drawable); +- if (tiling < 0) +- tiling = -tiling; +- if (priv->gpu_bo->tiling != tiling) +- sna_pixmap_change_tiling(pixmap, tiling); + +- return priv->gpu_bo; +-} ++ if (!kgem_bo_is_fenced(&sna->kgem, priv->gpu_bo)) { ++ if (priv->gpu_bo->tiling && ++ !sna_pixmap_change_tiling(pixmap, I915_TILING_NONE)) { ++ DBG(("%s: failed to discard tiling (%d) for DRI2 protocol\n", __FUNCTION__, priv->gpu_bo->tiling)); ++ return NULL; ++ } ++ } else { ++ int tiling = color_tiling(sna, &pixmap->drawable); ++ if (tiling < 0) ++ tiling = -tiling; ++ if (priv->gpu_bo->tiling < tiling && !priv->gpu_bo->scanout) ++ sna_pixmap_change_tiling(pixmap, tiling); ++ } + +-pure static inline void *sna_pixmap_get_buffer(PixmapPtr pixmap) +-{ +- assert(pixmap->refcnt); +- return ((void **)__get_private(pixmap, sna_pixmap_key))[2]; +-} ++ priv->gpu_bo->active_scanout++; + +-static inline void sna_pixmap_set_buffer(PixmapPtr pixmap, void *ptr) +-{ +- assert(pixmap->refcnt); +- ((void **)__get_private(pixmap, sna_pixmap_key))[2] = ptr; ++ return priv->gpu_bo; + } + + void +@@ -422,13 +573,18 @@ sna_dri2_pixmap_update_bo(struct sna *sna, PixmapPtr pixmap, struct kgem_bo *bo) + if (private->bo == bo) + return; + ++ assert(private->bo->active_scanout > 0); ++ private->bo->active_scanout--; ++ + DBG(("%s: dropping flush hint from handle=%d\n", __FUNCTION__, private->bo->handle)); + private->bo->flush = false; + kgem_bo_destroy(&sna->kgem, private->bo); + ++ + buffer->name = kgem_bo_flink(&sna->kgem, bo); + buffer->pitch = bo->pitch; + private->bo = ref(bo); ++ bo->active_scanout++; + + DBG(("%s: adding flush hint to handle=%d\n", __FUNCTION__, bo->handle)); + bo->flush = true; +@@ -449,9 +605,9 @@ sna_dri2_create_buffer(DrawablePtr draw, + struct sna_dri2_private *private; + PixmapPtr pixmap; + struct kgem_bo *bo; +- unsigned flags = 0; ++ unsigned bpp = format ?: draw->bitsPerPixel; ++ unsigned flags = CREATE_EXACT; + uint32_t size; +- int bpp; + + DBG(("%s pixmap=%ld, (attachment=%d, format=%d, drawable=%dx%d), window?=%d\n", + __FUNCTION__, +@@ -468,11 +624,11 @@ sna_dri2_create_buffer(DrawablePtr draw, + if (draw->type != DRAWABLE_PIXMAP) + buffer = dri2_window_get_front((WindowPtr)draw); + if (buffer == NULL) +- buffer = sna_pixmap_get_buffer(pixmap); ++ buffer = (DRI2Buffer2Ptr)sna_pixmap_get_buffer(pixmap); + if (buffer) { + private = get_private(buffer); + +- DBG(("%s: reusing front buffer attachment, win=%lu %dx%d, pixmap=%ld [%ld] %dx%d, handle=%d, name=%d\n", ++ DBG(("%s: reusing front buffer attachment, win=%lu %dx%d, pixmap=%ld [%ld] %dx%d, handle=%d, name=%d, active_scanout=%d\n", + __FUNCTION__, + draw->type != DRAWABLE_PIXMAP ? (long)draw->id : (long)0, + draw->width, draw->height, +@@ -480,12 +636,22 @@ sna_dri2_create_buffer(DrawablePtr draw, + private->pixmap->drawable.serialNumber, + pixmap->drawable.width, + pixmap->drawable.height, +- private->bo->handle, buffer->name)); ++ private->bo->handle, buffer->name, ++ private->bo->active_scanout)); + ++ assert(buffer->attachment == DRI2BufferFrontLeft); + assert(private->pixmap == pixmap); + assert(sna_pixmap(pixmap)->flush); + assert(sna_pixmap(pixmap)->pinned & PIN_DRI2); + assert(kgem_bo_flink(&sna->kgem, private->bo) == buffer->name); ++ assert(private->bo->pitch == buffer->pitch); ++ assert(private->bo->active_scanout); ++ ++ sna_pixmap_move_to_gpu(pixmap, ++ MOVE_READ | ++ __MOVE_FORCE | ++ __MOVE_DRI); ++ kgem_bo_submit(&sna->kgem, private->bo); + + private->refcnt++; + return buffer; +@@ -498,7 +664,6 @@ sna_dri2_create_buffer(DrawablePtr draw, + assert(sna_pixmap(pixmap) != NULL); + + bo = ref(bo); +- bpp = pixmap->drawable.bitsPerPixel; + if (pixmap == sna->front && !(sna->flags & SNA_LINEAR_FB)) + flags |= CREATE_SCANOUT; + DBG(("%s: attaching to front buffer %dx%d [%p:%d], scanout? %d\n", +@@ -506,6 +671,7 @@ sna_dri2_create_buffer(DrawablePtr draw, + pixmap->drawable.width, pixmap->drawable.height, + pixmap, pixmap->refcnt, flags & CREATE_SCANOUT)); + size = (uint32_t)pixmap->drawable.height << 16 | pixmap->drawable.width; ++ bpp = pixmap->drawable.bitsPerPixel; + break; + + case DRI2BufferBackLeft: +@@ -514,6 +680,7 @@ sna_dri2_create_buffer(DrawablePtr draw, + flags |= CREATE_SCANOUT; + if (draw->width == sna->front->drawable.width && + draw->height == sna->front->drawable.height && ++ draw->bitsPerPixel == bpp && + (sna->flags & (SNA_LINEAR_FB | SNA_NO_WAIT | SNA_NO_FLIP)) == 0) + flags |= CREATE_SCANOUT; + } +@@ -521,7 +688,6 @@ sna_dri2_create_buffer(DrawablePtr draw, + case DRI2BufferFrontRight: + case DRI2BufferFakeFrontLeft: + case DRI2BufferFakeFrontRight: +- bpp = draw->bitsPerPixel; + DBG(("%s: creating back buffer %dx%d, suitable for scanout? %d\n", + __FUNCTION__, + draw->width, draw->height, +@@ -530,7 +696,7 @@ sna_dri2_create_buffer(DrawablePtr draw, + bo = kgem_create_2d(&sna->kgem, + draw->width, + draw->height, +- draw->bitsPerPixel, ++ bpp, + color_tiling(sna, draw), + flags); + break; +@@ -558,7 +724,6 @@ sna_dri2_create_buffer(DrawablePtr draw, + * not understand W tiling and the GTT is incapable of + * W fencing. + */ +- bpp = format ? format : draw->bitsPerPixel; + bpp *= 2; + bo = kgem_create_2d(&sna->kgem, + ALIGN(draw->width, 64), +@@ -570,7 +735,6 @@ sna_dri2_create_buffer(DrawablePtr draw, + case DRI2BufferDepthStencil: + case DRI2BufferHiz: + case DRI2BufferAccum: +- bpp = format ? format : draw->bitsPerPixel, + bo = kgem_create_2d(&sna->kgem, + draw->width, draw->height, bpp, + other_tiling(sna, draw), +@@ -614,7 +778,7 @@ sna_dri2_create_buffer(DrawablePtr draw, + pixmap->refcnt++; + + priv = sna_pixmap(pixmap); +- assert(priv->flush == false); ++ assert(priv->flush == false || priv->pinned & PIN_DRI3); + assert((priv->pinned & PIN_DRI2) == 0); + + /* Don't allow this named buffer to be replaced */ +@@ -630,17 +794,17 @@ sna_dri2_create_buffer(DrawablePtr draw, + if (priv->gpu_bo->exec) + sna->kgem.flush = 1; + +- priv->flush |= 1; ++ priv->flush |= FLUSH_READ; + if (draw->type == DRAWABLE_PIXMAP) { + /* DRI2 renders directly into GLXPixmaps, treat as hostile */ + kgem_bo_unclean(&sna->kgem, priv->gpu_bo); + sna_damage_all(&priv->gpu_damage, pixmap); + priv->clear = false; + priv->cpu = false; +- priv->flush |= 2; ++ priv->flush |= FLUSH_WRITE; + } + +- sna_accel_watch_flush(sna, 1); ++ sna_watch_flush(sna, 1); + } + + return buffer; +@@ -651,16 +815,80 @@ err: + return NULL; + } + +-static void _sna_dri2_destroy_buffer(struct sna *sna, DRI2Buffer2Ptr buffer) ++static void ++sna_dri2_cache_bo(struct sna *sna, ++ DrawablePtr draw, ++ struct kgem_bo *bo, ++ uint32_t name, ++ uint32_t size, ++ uint32_t flags) ++{ ++ struct dri_bo *c; ++ ++ DBG(("%s(handle=%d, name=%d)\n", __FUNCTION__, bo->handle, name)); ++ ++ if (draw == NULL) { ++ DBG(("%s: no draw, releasing handle=%d\n", ++ __FUNCTION__, bo->handle)); ++ goto err; ++ } ++ ++ if (draw->type == DRAWABLE_PIXMAP) { ++ DBG(("%s: not a window, releasing handle=%d\n", ++ __FUNCTION__, bo->handle)); ++ goto err; ++ } ++ ++ if (bo->refcnt > 1 + bo->active_scanout) { ++ DBG(("%s: multiple references [%d], releasing handle\n", ++ __FUNCTION__, bo->refcnt, bo->handle)); ++ goto err; ++ } ++ ++ if ((draw->height << 16 | draw->width) != size) { ++ DBG(("%s: wrong size [%dx%d], releasing handle\n", ++ __FUNCTION__, ++ size & 0xffff, size >> 16, ++ bo->handle)); ++ goto err; ++ } ++ ++ if (bo->scanout && front_pitch(draw) != bo->pitch) { ++ DBG(("%s: scanout with pitch change [%d != %d], releasing handle\n", ++ __FUNCTION__, bo->pitch, front_pitch(draw), bo->handle)); ++ goto err; ++ } ++ ++ c = malloc(sizeof(*c)); ++ if (!c) ++ goto err; ++ ++ DBG(("%s: caching handle=%d (name=%d, flags=%d, active_scanout=%d)\n", __FUNCTION__, bo->handle, name, flags, bo->active_scanout)); ++ ++ c->bo = bo; ++ c->name = name; ++ c->flags = flags; ++ list_add(&c->link, &dri2_window((WindowPtr)draw)->cache); ++ return; ++ ++err: ++ kgem_bo_destroy(&sna->kgem, bo); ++} ++ ++static void _sna_dri2_destroy_buffer(struct sna *sna, ++ DrawablePtr draw, ++ DRI2Buffer2Ptr buffer) + { + struct sna_dri2_private *private = get_private(buffer); + + if (buffer == NULL) + return; + +- DBG(("%s: %p [handle=%d] -- refcnt=%d, pixmap=%ld\n", ++ DBG(("%s: %p [handle=%d] -- refcnt=%d, draw=%ld, pixmap=%ld, proxy?=%d\n", + __FUNCTION__, buffer, private->bo->handle, private->refcnt, +- private->pixmap ? private->pixmap->drawable.serialNumber : 0)); ++ draw ? draw->id : 0, ++ private->pixmap ? private->pixmap->drawable.serialNumber : 0, ++ private->proxy != NULL)); + assert(private->refcnt > 0); + if (--private->refcnt) + return; +@@ -669,7 +897,10 @@ static void _sna_dri2_destroy_buffer(struct sna *sna, DRI2Buffer2Ptr buffer) + + if (private->proxy) { + DBG(("%s: destroying proxy\n", __FUNCTION__)); +- _sna_dri2_destroy_buffer(sna, private->proxy); ++ assert(private->bo->active_scanout > 0); ++ private->bo->active_scanout--; ++ ++ _sna_dri2_destroy_buffer(sna, draw, private->proxy); + private->pixmap = NULL; + } + +@@ -683,6 +914,11 @@ static void _sna_dri2_destroy_buffer(struct sna *sna, DRI2Buffer2Ptr buffer) + assert(priv->pinned & PIN_DRI2); + assert(priv->flush); + ++ DBG(("%s: removing active_scanout=%d from pixmap handle=%d\n", ++ __FUNCTION__, priv->gpu_bo->active_scanout, priv->gpu_bo->handle)); ++ assert(priv->gpu_bo->active_scanout > 0); ++ priv->gpu_bo->active_scanout--; ++ + /* Undo the DRI markings on this pixmap */ + DBG(("%s: releasing last DRI pixmap=%ld, scanout?=%d\n", + __FUNCTION__, +@@ -692,28 +928,34 @@ static void _sna_dri2_destroy_buffer(struct sna *sna, DRI2Buffer2Ptr buffer) + list_del(&priv->flush_list); + + DBG(("%s: dropping flush hint from handle=%d\n", __FUNCTION__, private->bo->handle)); +- priv->gpu_bo->flush = false; + priv->pinned &= ~PIN_DRI2; + +- priv->flush = false; +- sna_accel_watch_flush(sna, -1); ++ if ((priv->pinned & PIN_DRI3) == 0) { ++ priv->gpu_bo->flush = false; ++ priv->flush = false; ++ } ++ sna_watch_flush(sna, -1); + + sna_pixmap_set_buffer(pixmap, NULL); + pixmap->drawable.pScreen->DestroyPixmap(pixmap); + } +- assert(private->bo->flush == false); + +- kgem_bo_destroy(&sna->kgem, private->bo); ++ sna_dri2_cache_bo(sna, draw, ++ private->bo, ++ buffer->name, ++ private->size, ++ buffer->flags); + free(buffer); + } + + static void sna_dri2_destroy_buffer(DrawablePtr draw, DRI2Buffer2Ptr buffer) + { +- _sna_dri2_destroy_buffer(to_sna_from_drawable(draw), buffer); ++ _sna_dri2_destroy_buffer(to_sna_from_drawable(draw), draw, buffer); + } + + static DRI2BufferPtr sna_dri2_reference_buffer(DRI2BufferPtr buffer) + { ++ assert(get_private(buffer)->refcnt > 0); + get_private(buffer)->refcnt++; + return buffer; + } +@@ -746,10 +988,9 @@ static void set_bo(PixmapPtr pixmap, struct kgem_bo *bo) + { + struct sna *sna = to_sna_from_pixmap(pixmap); + struct sna_pixmap *priv = sna_pixmap(pixmap); +- RegionRec region; + +- DBG(("%s: pixmap=%ld, handle=%d\n", +- __FUNCTION__, pixmap->drawable.serialNumber, bo->handle)); ++ DBG(("%s: pixmap=%ld, handle=%d (old handle=%d)\n", ++ __FUNCTION__, pixmap->drawable.serialNumber, bo->handle, priv->gpu_bo->handle)); + + assert(pixmap->drawable.width * pixmap->drawable.bitsPerPixel <= 8*bo->pitch); + assert(pixmap->drawable.height * bo->pitch <= kgem_bo_size(bo)); +@@ -758,21 +999,34 @@ static void set_bo(PixmapPtr pixmap, struct kgem_bo *bo) + assert((priv->pinned & (PIN_PRIME | PIN_DRI3)) == 0); + assert(priv->flush); + +- /* Post damage on the new front buffer so that listeners, such +- * as DisplayLink know take a copy and shove it over the USB, +- * also for software cursors and the like. +- */ +- region.extents.x1 = region.extents.y1 = 0; +- region.extents.x2 = pixmap->drawable.width; +- region.extents.y2 = pixmap->drawable.height; +- region.data = NULL; +- DamageRegionAppend(&pixmap->drawable, ®ion); ++ if (APPLY_DAMAGE) { ++ RegionRec region; ++ ++ /* Post damage on the new front buffer so that listeners, such ++ * as DisplayLink know take a copy and shove it over the USB, ++ * also for software cursors and the like. ++ */ ++ region.extents.x1 = region.extents.y1 = 0; ++ region.extents.x2 = pixmap->drawable.width; ++ region.extents.y2 = pixmap->drawable.height; ++ region.data = NULL; ++ ++ /* ++ * Eeek, beware the sw cursor copying to the old bo ++ * causing recursion and mayhem. ++ */ ++ DBG(("%s: marking whole pixmap as damaged\n", __FUNCTION__)); ++ sna->ignore_copy_area = sna->flags & SNA_TEAR_FREE; ++ DamageRegionAppend(&pixmap->drawable, ®ion); ++ } + + damage(pixmap, priv, NULL); + + assert(bo->refcnt); +- if (priv->move_to_gpu) ++ if (priv->move_to_gpu) { ++ DBG(("%s: applying final/discard move-to-gpu\n", __FUNCTION__)); + priv->move_to_gpu(sna, priv, 0); ++ } + if (priv->gpu_bo != bo) { + DBG(("%s: dropping flush hint from handle=%d\n", __FUNCTION__, priv->gpu_bo->handle)); + priv->gpu_bo->flush = false; +@@ -792,8 +1046,27 @@ static void set_bo(PixmapPtr pixmap, struct kgem_bo *bo) + bo->domain = DOMAIN_NONE; + assert(bo->flush); + +- DamageRegionProcessPending(&pixmap->drawable); ++ if (APPLY_DAMAGE) { ++ sna->ignore_copy_area = false; ++ DamageRegionProcessPending(&pixmap->drawable); ++ } ++} ++ ++#if defined(__GNUC__) ++#define popcount(x) __builtin_popcount(x) ++#else ++static int popcount(unsigned int x) ++{ ++ int count = 0; ++ ++ while (x) { ++ count += x&1; ++ x >>= 1; ++ } ++ ++ return count; + } ++#endif + + static void sna_dri2_select_mode(struct sna *sna, struct kgem_bo *dst, struct kgem_bo *src, bool sync) + { +@@ -823,6 +1096,12 @@ static void sna_dri2_select_mode(struct sna *sna, struct kgem_bo *dst, struct kg + return; + } + ++ if (sna->render_state.gt < 2 && sna->kgem.has_semaphores) { ++ DBG(("%s: small GT [%d], not forcing selection\n", ++ __FUNCTION__, sna->render_state.gt)); ++ return; ++ } ++ + VG_CLEAR(busy); + busy.handle = src->handle; + if (drmIoctl(sna->kgem.fd, DRM_IOCTL_I915_GEM_BUSY, &busy)) +@@ -860,9 +1139,11 @@ static void sna_dri2_select_mode(struct sna *sna, struct kgem_bo *dst, struct kg + * the cost of the query. + */ + mode = KGEM_RENDER; +- if (busy.busy & (0xfffe << 16)) ++ if ((busy.busy & 0xffff) == I915_EXEC_BLT) + mode = KGEM_BLT; +- kgem_bo_mark_busy(&sna->kgem, busy.handle == src->handle ? src : dst, mode); ++ kgem_bo_mark_busy(&sna->kgem, ++ busy.handle == src->handle ? src : dst, ++ mode); + _kgem_set_mode(&sna->kgem, mode); + } + +@@ -871,10 +1152,13 @@ static bool is_front(int attachment) + return attachment == DRI2BufferFrontLeft; + } + ++#define DRI2_SYNC 0x1 ++#define DRI2_DAMAGE 0x2 ++#define DRI2_BO 0x4 + static struct kgem_bo * + __sna_dri2_copy_region(struct sna *sna, DrawablePtr draw, RegionPtr region, + DRI2BufferPtr src, DRI2BufferPtr dst, +- bool sync) ++ unsigned flags) + { + PixmapPtr pixmap = get_drawable_pixmap(draw); + DrawableRec scratch, *src_draw = &pixmap->drawable, *dst_draw = &pixmap->drawable; +@@ -886,7 +1170,7 @@ __sna_dri2_copy_region(struct sna *sna, DrawablePtr draw, RegionPtr region, + struct kgem_bo *dst_bo; + const BoxRec *boxes; + int16_t dx, dy, sx, sy; +- unsigned flags; ++ unsigned hint; + int n; + + /* To hide a stale DRI2Buffer, one may choose to substitute +@@ -962,8 +1246,9 @@ __sna_dri2_copy_region(struct sna *sna, DrawablePtr draw, RegionPtr region, + } + } + } else +- sync = false; ++ flags &= ~DRI2_SYNC; + ++ scratch.pScreen = draw->pScreen; + scratch.x = scratch.y = 0; + scratch.width = scratch.height = 0; + scratch.depth = draw->depth; +@@ -971,6 +1256,7 @@ __sna_dri2_copy_region(struct sna *sna, DrawablePtr draw, RegionPtr region, + + src_bo = src_priv->bo; + assert(src_bo->refcnt); ++ kgem_bo_unclean(&sna->kgem, src_bo); + if (is_front(src->attachment)) { + struct sna_pixmap *priv; + +@@ -987,11 +1273,12 @@ __sna_dri2_copy_region(struct sna *sna, DrawablePtr draw, RegionPtr region, + scratch.height = src_priv->size >> 16; + src_draw = &scratch; + +- DBG(("%s: source size %dx%d, region size %dx%d\n", ++ DBG(("%s: source size %dx%d, region size %dx%d, src offset %dx%d\n", + __FUNCTION__, + scratch.width, scratch.height, + clip.extents.x2 - clip.extents.x1, +- clip.extents.y2 - clip.extents.y1)); ++ clip.extents.y2 - clip.extents.y1, ++ -sx, -sy)); + + source.extents.x1 = -sx; + source.extents.y1 = -sy; +@@ -1002,6 +1289,10 @@ __sna_dri2_copy_region(struct sna *sna, DrawablePtr draw, RegionPtr region, + assert(region == NULL || region == &clip); + pixman_region_intersect(&clip, &clip, &source); + ++ if (!pixman_region_not_empty(&clip)) { ++ DBG(("%s: region doesn't overlap pixmap\n", __FUNCTION__)); ++ return NULL; ++ } + } + + dst_bo = dst_priv->bo; +@@ -1013,12 +1304,12 @@ __sna_dri2_copy_region(struct sna *sna, DrawablePtr draw, RegionPtr region, + /* Preserve the CRTC shadow overrides */ + sna_shadow_steal_crtcs(sna, &shadow); + +- flags = MOVE_WRITE | __MOVE_FORCE; ++ hint = MOVE_WRITE | __MOVE_FORCE; + if (clip.data) +- flags |= MOVE_READ; ++ hint |= MOVE_READ; + + assert(region == NULL || region == &clip); +- priv = sna_pixmap_move_area_to_gpu(pixmap, &clip.extents, flags); ++ priv = sna_pixmap_move_area_to_gpu(pixmap, &clip.extents, hint); + if (priv) { + damage(pixmap, priv, region); + dst_bo = priv->gpu_bo; +@@ -1050,20 +1341,20 @@ __sna_dri2_copy_region(struct sna *sna, DrawablePtr draw, RegionPtr region, + assert(region == NULL || region == &clip); + pixman_region_intersect(&clip, &clip, &target); + +- sync = false; ++ flags &= ~DRI2_SYNC; + } + + if (!wedged(sna)) { + xf86CrtcPtr crtc; + + crtc = NULL; +- if (sync && sna_pixmap_is_scanout(sna, pixmap)) ++ if (flags & DRI2_SYNC && sna_pixmap_is_scanout(sna, pixmap)) + crtc = sna_covering_crtc(sna, &clip.extents, NULL); + sna_dri2_select_mode(sna, dst_bo, src_bo, crtc != NULL); + +- sync = (crtc != NULL&& +- sna_wait_for_scanline(sna, pixmap, crtc, +- &clip.extents)); ++ if (crtc == NULL || ++ !sna_wait_for_scanline(sna, pixmap, crtc, &clip.extents)) ++ flags &= ~DRI2_SYNC; + } + + if (region) { +@@ -1075,8 +1366,11 @@ __sna_dri2_copy_region(struct sna *sna, DrawablePtr draw, RegionPtr region, + boxes = &clip.extents; + n = 1; + } +- DamageRegionAppend(&pixmap->drawable, region); +- ++ if (APPLY_DAMAGE || flags & DRI2_DAMAGE) { ++ DBG(("%s: marking region as damaged\n", __FUNCTION__)); ++ sna->ignore_copy_area = sna->flags & SNA_TEAR_FREE; ++ DamageRegionAppend(&pixmap->drawable, region); ++ } + + DBG(("%s: copying [(%d, %d), (%d, %d)]x%d src=(%d, %d), dst=(%d, %d)\n", + __FUNCTION__, +@@ -1084,29 +1378,36 @@ __sna_dri2_copy_region(struct sna *sna, DrawablePtr draw, RegionPtr region, + boxes[0].x2, boxes[0].y2, + n, sx, sy, dx, dy)); + +- flags = COPY_LAST; +- if (sync) +- flags |= COPY_SYNC; ++ hint = COPY_LAST | COPY_DRI; ++ if (flags & DRI2_SYNC) ++ hint |= COPY_SYNC; + if (!sna->render.copy_boxes(sna, GXcopy, + src_draw, src_bo, sx, sy, + dst_draw, dst_bo, dx, dy, +- boxes, n, flags)) ++ boxes, n, hint)) + memcpy_copy_boxes(sna, GXcopy, + src_draw, src_bo, sx, sy, + dst_draw, dst_bo, dx, dy, +- boxes, n, flags); +- +- DBG(("%s: flushing? %d\n", __FUNCTION__, sync)); +- if (sync) { /* STAT! */ +- struct kgem_request *rq = sna->kgem.next_request; +- kgem_submit(&sna->kgem); +- if (rq->bo) { +- bo = ref(rq->bo); +- DBG(("%s: recording sync fence handle=%d\n", __FUNCTION__, bo->handle)); ++ boxes, n, hint); ++ ++ sna->needs_dri_flush = true; ++ if (flags & (DRI2_SYNC | DRI2_BO)) { /* STAT! */ ++ struct kgem_request *rq = RQ(dst_bo->rq); ++ if (rq && rq != (void *)&sna->kgem) { ++ if (rq->bo == NULL) ++ kgem_submit(&sna->kgem); ++ if (rq->bo) { /* Becareful in case the gpu is wedged */ ++ bo = ref(rq->bo); ++ DBG(("%s: recording sync fence handle=%d\n", ++ __FUNCTION__, bo->handle)); ++ } + } + } + +- DamageRegionProcessPending(&pixmap->drawable); ++ if (APPLY_DAMAGE || flags & DRI2_DAMAGE) { ++ sna->ignore_copy_area = false; ++ DamageRegionProcessPending(&pixmap->drawable); ++ } + + if (clip.data) + pixman_region_fini(&clip); +@@ -1142,6 +1443,8 @@ sna_dri2_copy_region(DrawablePtr draw, + assert(get_private(src)->refcnt); + assert(get_private(dst)->refcnt); + ++ assert(get_private(src)->bo != get_private(dst)->bo); ++ + assert(get_private(src)->bo->refcnt); + assert(get_private(dst)->bo->refcnt); + +@@ -1151,7 +1454,7 @@ sna_dri2_copy_region(DrawablePtr draw, + region->extents.x2, region->extents.y2, + region_num_rects(region))); + +- __sna_dri2_copy_region(sna, draw, region, src, dst, false); ++ __sna_dri2_copy_region(sna, draw, region, src, dst, DRI2_DAMAGE); + } + + inline static uint32_t pipe_select(int pipe) +@@ -1161,6 +1464,7 @@ inline static uint32_t pipe_select(int pipe) + * we can safely ignore the capability check - if we have more + * than two pipes, we can assume that they are fully supported. + */ ++ assert(pipe < _DRM_VBLANK_HIGH_CRTC_MASK); + if (pipe > 1) + return pipe << DRM_VBLANK_HIGH_CRTC_SHIFT; + else if (pipe > 0) +@@ -1169,15 +1473,53 @@ inline static uint32_t pipe_select(int pipe) + return 0; + } + +-static inline int sna_wait_vblank(struct sna *sna, union drm_wait_vblank *vbl, int pipe) ++static inline bool sna_next_vblank(struct sna_dri2_event *info) + { +- DBG(("%s(pipe=%d, waiting until seq=%u%s)\n", +- __FUNCTION__, pipe, vbl->request.sequence, +- vbl->request.type & DRM_VBLANK_RELATIVE ? " [relative]" : "")); +- assert(pipe != -1); ++ union drm_wait_vblank vbl; + +- vbl->request.type |= pipe_select(pipe); +- return drmIoctl(sna->kgem.fd, DRM_IOCTL_WAIT_VBLANK, vbl); ++ DBG(("%s(pipe=%d, waiting until next vblank)\n", ++ __FUNCTION__, info->pipe)); ++ assert(info->pipe != -1); ++ ++ VG_CLEAR(vbl); ++ vbl.request.type = ++ DRM_VBLANK_RELATIVE | ++ DRM_VBLANK_EVENT | ++ pipe_select(info->pipe); ++ vbl.request.sequence = 1; ++ vbl.request.signal = (uintptr_t)info; ++ ++ assert(!info->queued); ++ if (drmIoctl(info->sna->kgem.fd, DRM_IOCTL_WAIT_VBLANK, &vbl)) ++ return false; ++ ++ info->queued = true; ++ return true; ++} ++ ++static inline bool sna_wait_vblank(struct sna_dri2_event *info, ++ unsigned seq) ++{ ++ union drm_wait_vblank vbl; ++ ++ DBG(("%s(pipe=%d, waiting until vblank %u)\n", ++ __FUNCTION__, info->pipe, seq)); ++ assert(info->pipe != -1); ++ ++ VG_CLEAR(vbl); ++ vbl.request.type = ++ DRM_VBLANK_ABSOLUTE | ++ DRM_VBLANK_EVENT | ++ pipe_select(info->pipe); ++ vbl.request.sequence = seq; ++ vbl.request.signal = (uintptr_t)info; ++ ++ assert(!info->queued); ++ if (drmIoctl(info->sna->kgem.fd, DRM_IOCTL_WAIT_VBLANK, &vbl)) ++ return false; ++ ++ info->queued = true; ++ return true; + } + + #if DRI2INFOREC_VERSION >= 4 +@@ -1195,6 +1537,7 @@ draw_current_msc(DrawablePtr draw, xf86CrtcPtr crtc, uint64_t msc) + { + struct dri2_window *priv; + ++ assert(draw); + if (draw->type != DRAWABLE_WINDOW) + return msc; + +@@ -1206,6 +1549,9 @@ draw_current_msc(DrawablePtr draw, xf86CrtcPtr crtc, uint64_t msc) + priv->crtc = crtc; + priv->msc_delta = 0; + priv->chain = NULL; ++ priv->scanout = -1; ++ priv->cache_size = 0; ++ list_init(&priv->cache); + dri2_window_attach((WindowPtr)draw, priv); + } + } else { +@@ -1214,8 +1560,8 @@ draw_current_msc(DrawablePtr draw, xf86CrtcPtr crtc, uint64_t msc) + const struct ust_msc *this = sna_crtc_last_swap(crtc); + DBG(("%s: Window transferring from pipe=%d [msc=%llu] to pipe=%d [msc=%llu], delta now %lld\n", + __FUNCTION__, +- sna_crtc_to_pipe(priv->crtc), (long long)last->msc, +- sna_crtc_to_pipe(crtc), (long long)this->msc, ++ sna_crtc_pipe(priv->crtc), (long long)last->msc, ++ sna_crtc_pipe(crtc), (long long)this->msc, + (long long)(priv->msc_delta + this->msc - last->msc))); + priv->msc_delta += this->msc - last->msc; + priv->crtc = crtc; +@@ -1248,57 +1594,119 @@ sna_dri2_get_crtc(DrawablePtr draw) + NULL); + } + +-static void +-sna_dri2_remove_event(WindowPtr win, struct sna_dri2_event *info) ++static void frame_swap_complete(struct sna_dri2_event *frame, int type) + { +- struct dri2_window *priv; +- struct sna_dri2_event *chain; +- +- assert(win->drawable.type == DRAWABLE_WINDOW); +- DBG(("%s: remove[%p] from window %ld, active? %d\n", +- __FUNCTION__, info, (long)win->drawable.id, info->draw != NULL)); ++ const struct ust_msc *swap; + +- priv = dri2_window(win); +- assert(priv); +- assert(priv->chain != NULL); ++ assert(frame->signal); ++ frame->signal = false; + +- if (priv->chain == info) { +- priv->chain = info->chain; ++ if (frame->client == NULL) { ++ DBG(("%s: client already gone\n", __FUNCTION__)); + return; + } + +- chain = priv->chain; +- while (chain->chain != info) +- chain = chain->chain; +- assert(chain != info); +- assert(info->chain != chain); +- chain->chain = info->chain; ++ assert(frame->draw); ++ ++ swap = sna_crtc_last_swap(frame->crtc); ++ DBG(("%s(type=%d): draw=%ld, pipe=%d, frame=%lld [msc=%lld], tv=%d.%06d\n", ++ __FUNCTION__, type, (long)frame->draw->id, frame->pipe, ++ (long long)swap->msc, ++ (long long)draw_current_msc(frame->draw, frame->crtc, swap->msc), ++ swap->tv_sec, swap->tv_usec)); ++ ++ DRI2SwapComplete(frame->client, frame->draw, ++ draw_current_msc(frame->draw, frame->crtc, swap->msc), ++ swap->tv_sec, swap->tv_usec, ++ type, frame->event_complete, frame->event_data); + } + +-static void +-sna_dri2_event_free(struct sna_dri2_event *info) ++static void fake_swap_complete(struct sna *sna, ClientPtr client, ++ DrawablePtr draw, xf86CrtcPtr crtc, ++ int type, DRI2SwapEventPtr func, void *data) + { +- DrawablePtr draw = info->draw; ++ const struct ust_msc *swap; + +- DBG(("%s(draw?=%d)\n", __FUNCTION__, draw != NULL)); +- if (draw && draw->type == DRAWABLE_WINDOW) +- sna_dri2_remove_event((WindowPtr)draw, info); ++ assert(draw); + +- _sna_dri2_destroy_buffer(info->sna, info->front); +- _sna_dri2_destroy_buffer(info->sna, info->back); ++ if (crtc == NULL) ++ crtc = sna_primary_crtc(sna); + +- while (!list_is_empty(&info->cache)) { +- struct dri_bo *c; ++ swap = sna_crtc_last_swap(crtc); ++ DBG(("%s(type=%d): draw=%ld, pipe=%d, frame=%lld [msc %lld], tv=%d.%06d\n", ++ __FUNCTION__, type, (long)draw->id, crtc ? sna_crtc_pipe(crtc) : -1, ++ (long long)swap->msc, ++ (long long)draw_current_msc(draw, crtc, swap->msc), ++ swap->tv_sec, swap->tv_usec)); + +- c = list_first_entry(&info->cache, struct dri_bo, link); +- list_del(&c->link); ++ DRI2SwapComplete(client, draw, ++ draw_current_msc(draw, crtc, swap->msc), ++ swap->tv_sec, swap->tv_usec, ++ type, func, data); ++} + +- DBG(("%s: releasing cached handle=%d\n", __FUNCTION__, c->bo ? c->bo->handle : 0)); +- if (c->bo) +- kgem_bo_destroy(&info->sna->kgem, c->bo); ++static void ++sna_dri2_remove_event(struct sna_dri2_event *info) ++{ ++ WindowPtr win = (WindowPtr)info->draw; ++ struct dri2_window *priv; + +- free(c); ++ assert(win->drawable.type == DRAWABLE_WINDOW); ++ DBG(("%s: remove[%p] from window %ld, active? %d\n", ++ __FUNCTION__, info, (long)win->drawable.id, info->draw != NULL)); ++ assert(!info->signal); ++ ++ priv = dri2_window(win); ++ assert(priv); ++ assert(priv->chain != NULL); ++ assert(info->chained); ++ info->chained = false; ++ ++ if (priv->chain != info) { ++ struct sna_dri2_event *chain = priv->chain; ++ while (chain->chain != info) { ++ assert(chain->chained); ++ chain = chain->chain; ++ } ++ assert(chain != info); ++ assert(info->chain != chain); ++ chain->chain = info->chain; ++ return; ++ } ++ ++ priv->chain = info->chain; ++ if (priv->chain == NULL) { ++ struct dri_bo *c, *tmp; ++ ++ c = list_entry(priv->cache.next->next, struct dri_bo, link); ++ list_for_each_entry_safe_from(c, tmp, &priv->cache, link) { ++ list_del(&c->link); ++ ++ DBG(("%s: releasing cached handle=%d\n", __FUNCTION__, c->bo ? c->bo->handle : 0)); ++ assert(c->bo); ++ kgem_bo_destroy(&info->sna->kgem, c->bo); ++ free(c); ++ } + } ++} ++ ++static void ++sna_dri2_event_free(struct sna_dri2_event *info) ++{ ++ DBG(("%s(draw?=%d)\n", __FUNCTION__, info->draw != NULL)); ++ assert(!info->queued); ++ assert(!info->signal); ++ assert(info->pending.bo == NULL); ++ ++ if (info->sna->dri2.flip_pending == info) ++ info->sna->dri2.flip_pending = NULL; ++ assert(info->sna->dri2.flip_pending != info); ++ if (info->chained) ++ sna_dri2_remove_event(info); ++ ++ assert((info->front == NULL && info->back == NULL) || info->front != info->back); ++ _sna_dri2_destroy_buffer(info->sna, info->draw, info->front); ++ _sna_dri2_destroy_buffer(info->sna, info->draw, info->back); + + if (info->bo) { + DBG(("%s: releasing batch handle=%d\n", __FUNCTION__, info->bo->handle)); +@@ -1331,15 +1739,26 @@ sna_dri2_client_gone(CallbackListPtr *list, void *closure, void *data) + + event = list_first_entry(&priv->events, struct sna_dri2_event, link); + assert(event->client == client); ++ list_del(&event->link); ++ event->signal = false; + +- if (event->queued) { +- if (event->draw) +- sna_dri2_remove_event((WindowPtr)event->draw, +- event); +- event->client = NULL; +- event->draw = NULL; +- list_del(&event->link); +- } else ++ if (event->pending.bo) { ++ assert(event->pending.bo->active_scanout > 0); ++ event->pending.bo->active_scanout--; ++ ++ kgem_bo_destroy(&sna->kgem, event->pending.bo); ++ event->pending.bo = NULL; ++ } ++ ++ if (event->chained) ++ sna_dri2_remove_event(event); ++ ++ event->client = NULL; ++ event->draw = NULL; ++ event->keepalive = 1; ++ assert(!event->signal); ++ ++ if (!event->queued) + sna_dri2_event_free(event); + } + +@@ -1365,11 +1784,15 @@ static bool add_event_to_client(struct sna_dri2_event *info, struct sna *sna, Cl + } + + static struct sna_dri2_event * +-sna_dri2_add_event(struct sna *sna, DrawablePtr draw, ClientPtr client) ++sna_dri2_add_event(struct sna *sna, ++ DrawablePtr draw, ++ ClientPtr client, ++ xf86CrtcPtr crtc) + { + struct dri2_window *priv; + struct sna_dri2_event *info, *chain; + ++ assert(draw != NULL); + assert(draw->type == DRAWABLE_WINDOW); + DBG(("%s: adding event to window %ld)\n", + __FUNCTION__, (long)draw->id)); +@@ -1382,11 +1805,11 @@ sna_dri2_add_event(struct sna *sna, DrawablePtr draw, ClientPtr client) + if (info == NULL) + return NULL; + +- list_init(&info->cache); + info->sna = sna; + info->draw = draw; +- info->crtc = priv->crtc; +- info->pipe = sna_crtc_to_pipe(priv->crtc); ++ info->crtc = crtc; ++ info->pipe = sna_crtc_pipe(crtc); ++ info->keepalive = 1; + + if (!add_event_to_client(info, sna, client)) { + free(info); +@@ -1394,6 +1817,7 @@ sna_dri2_add_event(struct sna *sna, DrawablePtr draw, ClientPtr client) + } + + assert(priv->chain != info); ++ info->chained = true; + + if (priv->chain == NULL) { + priv->chain = info; +@@ -1409,6 +1833,66 @@ sna_dri2_add_event(struct sna *sna, DrawablePtr draw, ClientPtr client) + return info; + } + ++static void decouple_window(WindowPtr win, ++ struct dri2_window *priv, ++ struct sna *sna, ++ bool signal) ++{ ++ if (priv->front) { ++ DBG(("%s: decouple private front\n", __FUNCTION__)); ++ assert(priv->crtc); ++ sna_shadow_unset_crtc(sna, priv->crtc); ++ ++ _sna_dri2_destroy_buffer(sna, NULL, priv->front); ++ priv->front = NULL; ++ } ++ ++ if (priv->chain) { ++ struct sna_dri2_event *info, *chain; ++ ++ DBG(("%s: freeing chain\n", __FUNCTION__)); ++ ++ chain = priv->chain; ++ while ((info = chain)) { ++ DBG(("%s: freeing event, pending signal? %d, pending swap? handle=%d\n", ++ __FUNCTION__, info->signal, ++ info->pending.bo ? info->pending.bo->handle : 0)); ++ assert(info->draw == &win->drawable); ++ ++ if (info->pending.bo) { ++ if (signal) { ++ bool was_signalling = info->signal; ++ info->signal = true; ++ frame_swap_complete(info, DRI2_EXCHANGE_COMPLETE); ++ info->signal = was_signalling; ++ } ++ assert(info->pending.bo->active_scanout > 0); ++ info->pending.bo->active_scanout--; ++ ++ kgem_bo_destroy(&sna->kgem, info->pending.bo); ++ info->pending.bo = NULL; ++ } ++ ++ if (info->signal && signal) ++ frame_swap_complete(info, DRI2_EXCHANGE_COMPLETE); ++ info->signal = false; ++ info->draw = NULL; ++ info->keepalive = 1; ++ assert(!info->signal); ++ list_del(&info->link); ++ ++ chain = info->chain; ++ info->chain = NULL; ++ info->chained = false; ++ ++ if (!info->queued) ++ sna_dri2_event_free(info); ++ } ++ ++ priv->chain = NULL; ++ } ++} ++ + void sna_dri2_decouple_window(WindowPtr win) + { + struct dri2_window *priv; +@@ -1418,50 +1902,34 @@ void sna_dri2_decouple_window(WindowPtr win) + return; + + DBG(("%s: window=%ld\n", __FUNCTION__, win->drawable.id)); ++ decouple_window(win, priv, to_sna_from_drawable(&win->drawable), true); + +- if (priv->front) { +- struct sna *sna = to_sna_from_drawable(&win->drawable); +- assert(priv->crtc); +- sna_shadow_unset_crtc(sna, priv->crtc); +- _sna_dri2_destroy_buffer(sna, priv->front); +- priv->front = NULL; +- } ++ priv->scanout = -1; + } + + void sna_dri2_destroy_window(WindowPtr win) + { + struct dri2_window *priv; ++ struct sna *sna; + + priv = dri2_window(win); + if (priv == NULL) + return; + + DBG(("%s: window=%ld\n", __FUNCTION__, win->drawable.id)); ++ sna = to_sna_from_drawable(&win->drawable); ++ decouple_window(win, priv, sna, false); + +- if (priv->front) { +- struct sna *sna = to_sna_from_drawable(&win->drawable); +- assert(priv->crtc); +- sna_shadow_unset_crtc(sna, priv->crtc); +- _sna_dri2_destroy_buffer(sna, priv->front); +- } +- +- if (priv->chain) { +- struct sna_dri2_event *info, *chain; +- +- DBG(("%s: freeing chain\n", __FUNCTION__)); +- +- chain = priv->chain; +- while ((info = chain)) { +- info->draw = NULL; +- info->client = NULL; +- list_del(&info->link); ++ while (!list_is_empty(&priv->cache)) { ++ struct dri_bo *c; + +- chain = info->chain; +- info->chain = NULL; ++ c = list_first_entry(&priv->cache, struct dri_bo, link); ++ list_del(&c->link); + +- if (!info->queued) +- sna_dri2_event_free(info); +- } ++ DBG(("%s: releasing cached handle=%d\n", __FUNCTION__, c->bo ? c->bo->handle : 0)); ++ assert(c->bo); ++ kgem_bo_destroy(&sna->kgem, c->bo); ++ free(c); + } + + free(priv); +@@ -1479,19 +1947,30 @@ sna_dri2_flip(struct sna_dri2_event *info) + { + struct kgem_bo *bo = get_private(info->back)->bo; + struct kgem_bo *tmp_bo; +- uint32_t tmp_name; ++ uint32_t tmp_name, tmp_flags; + int tmp_pitch; + + DBG(("%s(type=%d)\n", __FUNCTION__, info->type)); + + assert(sna_pixmap_get_buffer(info->sna->front) == info->front); + assert(get_drawable_pixmap(info->draw)->drawable.height * bo->pitch <= kgem_bo_size(bo)); ++ assert(get_private(info->front)->size == get_private(info->back)->size); + assert(bo->refcnt); + ++ if (info->sna->mode.flip_active) { ++ DBG(("%s: %d flips still active, aborting\n", ++ __FUNCTION__, info->sna->mode.flip_active)); ++ return false; ++ } ++ ++ assert(!info->queued); + if (!sna_page_flip(info->sna, bo, sna_dri2_flip_handler, + info->type == FLIP_ASYNC ? NULL : info)) + return false; + ++ DBG(("%s: queued flip=%p\n", __FUNCTION__, info->type == FLIP_ASYNC ? NULL : info)); ++ assert(info->signal || info->type != FLIP_THROTTLE); ++ + assert(info->sna->dri2.flip_pending == NULL || + info->sna->dri2.flip_pending == info); + if (info->type != FLIP_ASYNC) +@@ -1505,13 +1984,21 @@ sna_dri2_flip(struct sna_dri2_event *info) + tmp_bo = get_private(info->front)->bo; + tmp_name = info->front->name; + tmp_pitch = info->front->pitch; ++ tmp_flags = info->front->flags; ++ ++ assert(tmp_bo->active_scanout > 0); ++ tmp_bo->active_scanout--; + + set_bo(info->sna->front, bo); + ++ info->front->flags = info->back->flags; + info->front->name = info->back->name; + info->front->pitch = info->back->pitch; + get_private(info->front)->bo = bo; ++ bo->active_scanout++; ++ assert(bo->active_scanout <= bo->refcnt); + ++ info->back->flags = tmp_flags; + info->back->name = tmp_name; + info->back->pitch = tmp_pitch; + get_private(info->back)->bo = tmp_bo; +@@ -1521,6 +2008,7 @@ sna_dri2_flip(struct sna_dri2_event *info) + assert(get_private(info->back)->bo->refcnt); + assert(get_private(info->front)->bo != get_private(info->back)->bo); + ++ info->keepalive = KEEPALIVE; + info->queued = true; + return true; + } +@@ -1549,15 +2037,16 @@ can_flip(struct sna * sna, + } + + assert(sna->scrn->vtSema); ++ assert(!sna->mode.hidden); + + if ((sna->flags & (SNA_HAS_FLIP | SNA_HAS_ASYNC_FLIP)) == 0) { + DBG(("%s: no, pageflips disabled\n", __FUNCTION__)); + return false; + } + +- if (front->format != back->format) { ++ if (front->cpp != back->cpp) { + DBG(("%s: no, format mismatch, front = %d, back = %d\n", +- __FUNCTION__, front->format, back->format)); ++ __FUNCTION__, front->cpp, back->cpp)); + return false; + } + +@@ -1567,7 +2056,7 @@ can_flip(struct sna * sna, + } + + if (!sna_crtc_is_on(crtc)) { +- DBG(("%s: ref-pipe=%d is disabled\n", __FUNCTION__, sna_crtc_to_pipe(crtc))); ++ DBG(("%s: ref-pipe=%d is disabled\n", __FUNCTION__, sna_crtc_pipe(crtc))); + return false; + } + +@@ -1581,7 +2070,7 @@ can_flip(struct sna * sna, + if (sna_pixmap_get_buffer(pixmap) != front) { + DBG(("%s: no, DRI2 drawable is no longer attached (old name=%d, new name=%d) to pixmap=%ld\n", + __FUNCTION__, front->name, +- sna_pixmap_get_buffer(pixmap) ? ((DRI2BufferPtr)sna_pixmap_get_buffer(pixmap))->name : 0, ++ sna_pixmap_get_buffer(pixmap) ? sna_pixmap_get_buffer(pixmap)->name : 0, + pixmap->drawable.serialNumber)); + return false; + } +@@ -1661,7 +2150,6 @@ can_flip(struct sna * sna, + } + + DBG(("%s: yes, pixmap=%ld\n", __FUNCTION__, pixmap->drawable.serialNumber)); +- assert(dri2_window(win)->front == NULL); + return true; + } + +@@ -1680,9 +2168,9 @@ can_xchg(struct sna *sna, + if (draw->type == DRAWABLE_PIXMAP) + return false; + +- if (front->format != back->format) { ++ if (front->cpp != back->cpp) { + DBG(("%s: no, format mismatch, front = %d, back = %d\n", +- __FUNCTION__, front->format, back->format)); ++ __FUNCTION__, front->cpp, back->cpp)); + return false; + } + +@@ -1714,6 +2202,8 @@ can_xchg(struct sna *sna, + return false; + } + ++ DBG(("%s: back size=%x, front size=%x\n", ++ __FUNCTION__, get_private(back)->size, get_private(front)->size)); + if (get_private(back)->size != get_private(front)->size) { + DBG(("%s: no, back buffer %dx%d does not match front buffer %dx%d\n", + __FUNCTION__, +@@ -1766,9 +2256,9 @@ overlaps_other_crtc(struct sna *sna, xf86CrtcPtr desired) + static bool + can_xchg_crtc(struct sna *sna, + DrawablePtr draw, ++ xf86CrtcPtr crtc, + DRI2BufferPtr front, +- DRI2BufferPtr back, +- xf86CrtcPtr crtc) ++ DRI2BufferPtr back) + { + WindowPtr win = (WindowPtr)draw; + PixmapPtr pixmap; +@@ -1785,9 +2275,9 @@ can_xchg_crtc(struct sna *sna, + if (draw->type == DRAWABLE_PIXMAP) + return false; + +- if (front->format != back->format) { ++ if (front->cpp != back->cpp) { + DBG(("%s: no, format mismatch, front = %d, back = %d\n", +- __FUNCTION__, front->format, back->format)); ++ __FUNCTION__, front->cpp, back->cpp)); + return false; + } + +@@ -1866,20 +2356,21 @@ sna_dri2_xchg(DrawablePtr draw, DRI2BufferPtr front, DRI2BufferPtr back) + + back_bo = get_private(back)->bo; + front_bo = get_private(front)->bo; +- assert(front_bo != back_bo); + +- DBG(("%s: win=%ld, exchange front=%d/%d and back=%d/%d, pixmap=%ld %dx%d\n", ++ DBG(("%s: win=%ld, exchange front=%d/%d,ref=%d and back=%d/%d,ref=%d, pixmap=%ld %dx%d\n", + __FUNCTION__, win->drawable.id, +- front_bo->handle, front->name, +- back_bo->handle, back->name, ++ front_bo->handle, front->name, get_private(front)->refcnt, ++ back_bo->handle, back->name, get_private(back)->refcnt, + pixmap->drawable.serialNumber, + pixmap->drawable.width, + pixmap->drawable.height)); + +- DBG(("%s: back_bo pitch=%d, size=%d, ref=%d, active_scanout?=%d\n", +- __FUNCTION__, back_bo->pitch, kgem_bo_size(back_bo), back_bo->refcnt, back_bo->active_scanout)); +- DBG(("%s: front_bo pitch=%d, size=%d, ref=%d, active_scanout?=%d\n", +- __FUNCTION__, front_bo->pitch, kgem_bo_size(front_bo), front_bo->refcnt, front_bo->active_scanout)); ++ DBG(("%s: back_bo handle=%d, pitch=%d, size=%d, ref=%d, active_scanout?=%d\n", ++ __FUNCTION__, back_bo->handle, back_bo->pitch, kgem_bo_size(back_bo), back_bo->refcnt, back_bo->active_scanout)); ++ DBG(("%s: front_bo handle=%d, pitch=%d, size=%d, ref=%d, active_scanout?=%d\n", ++ __FUNCTION__, front_bo->handle, front_bo->pitch, kgem_bo_size(front_bo), front_bo->refcnt, front_bo->active_scanout)); ++ ++ assert(front_bo != back_bo); + assert(front_bo->refcnt); + assert(back_bo->refcnt); + +@@ -1894,6 +2385,11 @@ sna_dri2_xchg(DrawablePtr draw, DRI2BufferPtr front, DRI2BufferPtr back) + get_private(back)->bo = front_bo; + mark_stale(back); + ++ assert(front_bo->active_scanout > 0); ++ front_bo->active_scanout--; ++ back_bo->active_scanout++; ++ assert(back_bo->active_scanout <= back_bo->refcnt); ++ + tmp = front->name; + front->name = back->name; + back->name = tmp; +@@ -1902,17 +2398,23 @@ sna_dri2_xchg(DrawablePtr draw, DRI2BufferPtr front, DRI2BufferPtr back) + front->pitch = back->pitch; + back->pitch = tmp; + ++ tmp = front->flags; ++ front->flags = back->flags; ++ back->flags = tmp; ++ + assert(front_bo->refcnt); + assert(back_bo->refcnt); + ++ assert(front_bo->pitch == get_private(front)->bo->pitch); ++ assert(back_bo->pitch == get_private(back)->bo->pitch); ++ + assert(get_private(front)->bo == sna_pixmap(pixmap)->gpu_bo); + } + + static void sna_dri2_xchg_crtc(struct sna *sna, DrawablePtr draw, xf86CrtcPtr crtc, DRI2BufferPtr front, DRI2BufferPtr back) + { + WindowPtr win = (WindowPtr)draw; +- DRI2Buffer2Ptr tmp; +- struct kgem_bo *bo; ++ struct dri2_window *priv = dri2_window(win); + + DBG(("%s: exchange front=%d/%d and back=%d/%d, win id=%lu, pixmap=%ld %dx%d\n", + __FUNCTION__, +@@ -1922,162 +2424,130 @@ static void sna_dri2_xchg_crtc(struct sna *sna, DrawablePtr draw, xf86CrtcPtr cr + get_window_pixmap(win)->drawable.serialNumber, + get_window_pixmap(win)->drawable.width, + get_window_pixmap(win)->drawable.height)); ++ assert(can_xchg_crtc(sna, draw, crtc, front, back)); + +- DamageRegionAppend(&win->drawable, &win->clipList); ++ if (APPLY_DAMAGE) { ++ DBG(("%s: marking drawable as damaged\n", __FUNCTION__)); ++ sna->ignore_copy_area = sna->flags & SNA_TEAR_FREE; ++ DamageRegionAppend(&win->drawable, &win->clipList); ++ } + sna_shadow_set_crtc(sna, crtc, get_private(back)->bo); +- DamageRegionProcessPending(&win->drawable); ++ if (APPLY_DAMAGE) { ++ sna->ignore_copy_area = false; ++ DamageRegionProcessPending(&win->drawable); ++ } + +- assert(dri2_window(win)->front == NULL); ++ if (priv->front == NULL) { ++ DRI2Buffer2Ptr tmp; + +- tmp = calloc(1, sizeof(*tmp) + sizeof(struct sna_dri2_private)); +- if (tmp == NULL) { +- back->attachment = -1; +- if (get_private(back)->proxy == NULL) { +- get_private(back)->pixmap = get_window_pixmap(win); +- get_private(back)->proxy = sna_dri2_reference_buffer(sna_pixmap_get_buffer(get_private(back)->pixmap)); ++ tmp = calloc(1, sizeof(*tmp) + sizeof(struct sna_dri2_private)); ++ if (tmp == NULL) { ++ sna_shadow_unset_crtc(sna, crtc); ++ return; + } +- dri2_window(win)->front = sna_dri2_reference_buffer(back); +- return; +- } + +- *tmp = *back; +- tmp->attachment = DRI2BufferFrontLeft; +- tmp->driverPrivate = tmp + 1; +- get_private(tmp)->refcnt = 1; +- get_private(tmp)->bo = get_private(back)->bo; +- get_private(tmp)->size = get_private(back)->size; +- get_private(tmp)->pixmap = get_window_pixmap(win); +- get_private(tmp)->proxy = sna_dri2_reference_buffer(sna_pixmap_get_buffer(get_private(tmp)->pixmap)); +- dri2_window(win)->front = tmp; +- +- DBG(("%s: allocating new backbuffer\n", __FUNCTION__)); +- back->name = 0; +- bo = kgem_create_2d(&sna->kgem, +- draw->width, draw->height, draw->bitsPerPixel, +- get_private(back)->bo->tiling, +- CREATE_SCANOUT); +- if (bo != NULL) { +- get_private(back)->bo = bo; +- back->pitch = bo->pitch; +- back->name = kgem_bo_flink(&sna->kgem, bo); +- } +- if (back->name == 0) { +- if (bo != NULL) +- kgem_bo_destroy(&sna->kgem, bo); +- get_private(back)->bo = NULL; +- back->attachment = -1; ++ tmp->attachment = DRI2BufferFrontLeft; ++ tmp->driverPrivate = tmp + 1; ++ tmp->cpp = back->cpp; ++ tmp->format = back->format; ++ ++ get_private(tmp)->refcnt = 1; ++ get_private(tmp)->bo = kgem_create_2d(&sna->kgem, ++ draw->width, draw->height, draw->bitsPerPixel, ++ get_private(back)->bo->tiling, ++ CREATE_SCANOUT | CREATE_EXACT); ++ if (get_private(tmp)->bo != NULL) { ++ tmp->pitch = get_private(tmp)->bo->pitch; ++ tmp->name = kgem_bo_flink(&sna->kgem, get_private(tmp)->bo); ++ } ++ if (tmp->name == 0) { ++ if (get_private(tmp)->bo != NULL) ++ kgem_bo_destroy(&sna->kgem, get_private(tmp)->bo); ++ sna_shadow_unset_crtc(sna, crtc); ++ return; ++ } ++ get_private(tmp)->size = get_private(back)->size; ++ get_private(tmp)->pixmap = get_private(front)->pixmap; ++ get_private(tmp)->proxy = sna_dri2_reference_buffer(front); ++ get_private(tmp)->bo->active_scanout++; ++ ++ priv->front = front = tmp; + } +-} ++ assert(front == priv->front); + +-static void frame_swap_complete(struct sna_dri2_event *frame, int type) +-{ +- const struct ust_msc *swap; ++ { ++ struct kgem_bo *front_bo = get_private(front)->bo; ++ struct kgem_bo *back_bo = get_private(back)->bo; ++ unsigned tmp; + +- if (frame->draw == NULL) +- return; ++ assert(front_bo->refcnt); ++ assert(back_bo->refcnt); + +- assert(frame->client); ++ get_private(back)->bo = front_bo; ++ get_private(front)->bo = back_bo; ++ mark_stale(back); + +- swap = sna_crtc_last_swap(frame->crtc); +- DBG(("%s(type=%d): draw=%ld, pipe=%d, frame=%lld [msc=%lld], tv=%d.%06d\n", +- __FUNCTION__, type, (long)frame->draw, frame->pipe, +- (long long)swap->msc, +- (long long)draw_current_msc(frame->draw, frame->crtc, swap->msc), +- swap->tv_sec, swap->tv_usec)); ++ assert(front_bo->active_scanout > 0); ++ front_bo->active_scanout--; ++ back_bo->active_scanout++; ++ assert(back_bo->active_scanout <= back_bo->refcnt); + +- DRI2SwapComplete(frame->client, frame->draw, +- draw_current_msc(frame->draw, frame->crtc, swap->msc), +- swap->tv_sec, swap->tv_usec, +- type, frame->event_complete, frame->event_data); +-} ++ tmp = front->name; ++ front->name = back->name; ++ back->name = tmp; + +-static void fake_swap_complete(struct sna *sna, ClientPtr client, +- DrawablePtr draw, xf86CrtcPtr crtc, +- int type, DRI2SwapEventPtr func, void *data) +-{ +- const struct ust_msc *swap; +- +- swap = sna_crtc_last_swap(crtc); +- DBG(("%s(type=%d): draw=%ld, pipe=%d, frame=%lld [msc %lld], tv=%d.%06d\n", +- __FUNCTION__, type, (long)draw->id, crtc ? sna_crtc_to_pipe(crtc) : -1, +- (long long)swap->msc, +- (long long)draw_current_msc(draw, crtc, swap->msc), +- swap->tv_sec, swap->tv_usec)); ++ tmp = front->pitch; ++ front->pitch = back->pitch; ++ back->pitch = tmp; + +- DRI2SwapComplete(client, draw, +- draw_current_msc(draw, crtc, swap->msc), +- swap->tv_sec, swap->tv_usec, +- type, func, data); ++ tmp = front->flags; ++ front->flags = back->flags; ++ back->flags = tmp; ++ } + } + + static void chain_swap(struct sna_dri2_event *chain) + { +- union drm_wait_vblank vbl; ++ DBG(("%s: draw=%ld, queued?=%d, type=%d\n", ++ __FUNCTION__, (long)chain->draw->id, chain->queued, chain->type)); ++ ++ if (chain->queued) /* too early! */ ++ return; + + if (chain->draw == NULL) { + sna_dri2_event_free(chain); + return; + } + +- if (chain->queued) /* too early! */ +- return; +- + assert(chain == dri2_chain(chain->draw)); +- DBG(("%s: chaining draw=%ld, type=%d\n", +- __FUNCTION__, (long)chain->draw->id, chain->type)); +- chain->queued = true; ++ assert(chain->signal); + + switch (chain->type) { +- case SWAP_THROTTLE: ++ case SWAP_COMPLETE: + DBG(("%s: emitting chained vsync'ed blit\n", __FUNCTION__)); +- if (chain->sna->mode.shadow && +- !chain->sna->mode.shadow_damage) { +- /* recursed from wait_for_shadow(), simply requeue */ +- DBG(("%s -- recursed from wait_for_shadow(), requeuing\n", __FUNCTION__)); +- VG_CLEAR(vbl); +- vbl.request.type = +- DRM_VBLANK_RELATIVE | +- DRM_VBLANK_EVENT; +- vbl.request.sequence = 1; +- vbl.request.signal = (uintptr_t)chain; +- +- if (!sna_wait_vblank(chain->sna, &vbl, chain->pipe)) +- return; +- +- DBG(("%s -- requeue failed, errno=%d\n", __FUNCTION__, errno)); +- } +- + if (can_xchg(chain->sna, chain->draw, chain->front, chain->back)) { + sna_dri2_xchg(chain->draw, chain->front, chain->back); +- } else if (can_xchg_crtc(chain->sna, chain->draw, chain->front, chain->back, chain->crtc)) { +- sna_dri2_xchg_crtc(chain->sna, chain->draw, chain->crtc, chain->front, chain->back); ++ } else if (can_xchg_crtc(chain->sna, chain->draw, chain->crtc, ++ chain->front, chain->back)) { ++ sna_dri2_xchg_crtc(chain->sna, chain->draw, chain->crtc, ++ chain->front, chain->back); + } else { +- assert(chain->queued); +- chain->bo = __sna_dri2_copy_region(chain->sna, chain->draw, NULL, +- chain->back, chain->front, +- true); ++ __sna_dri2_copy_event(chain, chain->sync | DRI2_BO); + } ++ assert(get_private(chain->back)->bo != get_private(chain->front)->bo); + case SWAP: + break; + default: + return; + } + +- VG_CLEAR(vbl); +- vbl.request.type = +- DRM_VBLANK_RELATIVE | +- DRM_VBLANK_EVENT; +- vbl.request.sequence = 1; +- vbl.request.signal = (uintptr_t)chain; +- if (sna_wait_vblank(chain->sna, &vbl, chain->pipe)) { ++ if ((chain->type == SWAP_COMPLETE && ++ !swap_limit(chain->draw, 2 + !chain->sync) && ++ !chain->sync) || ++ !sna_next_vblank(chain)) { + DBG(("%s: vblank wait failed, unblocking client\n", __FUNCTION__)); + frame_swap_complete(chain, DRI2_BLIT_COMPLETE); + sna_dri2_event_free(chain); +- } else { +- if (chain->type == SWAP_THROTTLE && !swap_limit(chain->draw, 2)) { +- DBG(("%s: fake triple buffering, unblocking client\n", __FUNCTION__)); +- frame_swap_complete(chain, DRI2_BLIT_COMPLETE); +- } + } + } + +@@ -2086,40 +2556,27 @@ static inline bool rq_is_busy(struct kgem *kgem, struct kgem_bo *bo) + if (bo == NULL) + return false; + +- DBG(("%s: handle=%d, domain: %d exec? %d, rq? %d\n", __FUNCTION__, +- bo->handle, bo->domain, bo->exec != NULL, bo->rq != NULL)); +- assert(bo->refcnt); +- +- if (bo->exec) +- return true; +- +- if (bo->rq == NULL) +- return false; +- +- return __kgem_busy(kgem, bo->handle); ++ return __kgem_bo_is_busy(kgem, bo); + } + +-static bool sna_dri2_blit_complete(struct sna *sna, +- struct sna_dri2_event *info) ++static bool sna_dri2_blit_complete(struct sna_dri2_event *info) + { +- if (rq_is_busy(&sna->kgem, info->bo)) { +- union drm_wait_vblank vbl; ++ if (!info->bo) ++ return true; + ++ if (__kgem_bo_is_busy(&info->sna->kgem, info->bo)) { + DBG(("%s: vsync'ed blit is still busy, postponing\n", + __FUNCTION__)); +- +- VG_CLEAR(vbl); +- vbl.request.type = +- DRM_VBLANK_RELATIVE | +- DRM_VBLANK_EVENT; +- vbl.request.sequence = 1; +- vbl.request.signal = (uintptr_t)info; +- assert(info->queued); +- if (!sna_wait_vblank(sna, &vbl, info->pipe)) ++ if (sna_next_vblank(info)) + return false; ++ ++ kgem_bo_sync__gtt(&info->sna->kgem, info->bo); + } + + DBG(("%s: blit finished\n", __FUNCTION__)); ++ kgem_bo_destroy(&info->sna->kgem, info->bo); ++ info->bo = NULL; ++ + return true; + } + +@@ -2128,11 +2585,12 @@ void sna_dri2_vblank_handler(struct drm_event_vblank *event) + struct sna_dri2_event *info = (void *)(uintptr_t)event->user_data; + struct sna *sna = info->sna; + DrawablePtr draw; +- union drm_wait_vblank vbl; + uint64_t msc; + +- DBG(("%s(type=%d, sequence=%d)\n", __FUNCTION__, info->type, event->sequence)); ++ DBG(("%s(type=%d, sequence=%d, draw=%ld)\n", __FUNCTION__, info->type, event->sequence, info->draw ? info->draw->serialNumber : 0)); + assert(info->queued); ++ info->queued = false; ++ + msc = sna_crtc_record_event(info->crtc, event); + + draw = info->draw; +@@ -2141,68 +2599,120 @@ void sna_dri2_vblank_handler(struct drm_event_vblank *event) + goto done; + } + ++ assert((info->front == NULL && info->back == NULL) || info->front != info->back); + switch (info->type) { + case FLIP: + /* If we can still flip... */ ++ assert(info->signal); + if (can_flip(sna, draw, info->front, info->back, info->crtc) && + sna_dri2_flip(info)) + return; + + /* else fall through to blit */ + case SWAP: +- assert(info->queued); +- if (sna->mode.shadow && !sna->mode.shadow_damage) { +- /* recursed from wait_for_shadow(), simply requeue */ +- DBG(("%s -- recursed from wait_for_shadow(), requeuing\n", __FUNCTION__)); +- +- } else if (can_xchg(info->sna, draw, info->front, info->back)) { ++ assert(info->signal); ++ if (can_xchg(info->sna, draw, info->front, info->back)) { + sna_dri2_xchg(draw, info->front, info->back); +- info->type = SWAP_WAIT; +- } else if (can_xchg_crtc(sna, draw, info->front, info->back, info->crtc)) { +- sna_dri2_xchg_crtc(sna, draw, info->crtc, info->front, info->back); +- info->type = SWAP_WAIT; ++ info->type = SWAP_COMPLETE; ++ } else if (can_xchg_crtc(sna, draw, info->crtc, ++ info->front, info->back)) { ++ sna_dri2_xchg_crtc(sna, draw, info->crtc, ++ info->front, info->back); ++ info->type = SWAP_COMPLETE; + } else { +- assert(info->queued); +- info->bo = __sna_dri2_copy_region(sna, draw, NULL, +- info->back, info->front, true); +- info->type = SWAP_WAIT; ++ __sna_dri2_copy_event(info, DRI2_BO | DRI2_SYNC); ++ info->type = SWAP_COMPLETE; + } + +- VG_CLEAR(vbl); +- vbl.request.type = +- DRM_VBLANK_RELATIVE | +- DRM_VBLANK_EVENT; +- vbl.request.sequence = 1; +- vbl.request.signal = (uintptr_t)info; +- +- assert(info->queued); +- if (!sna_wait_vblank(sna, &vbl, info->pipe)) ++ if (sna_next_vblank(info)) + return; + + DBG(("%s -- requeue failed, errno=%d\n", __FUNCTION__, errno)); ++ assert(info->pending.bo == NULL); ++ assert(info->keepalive == 1); + /* fall through to SwapComplete */ +- case SWAP_WAIT: +- if (!sna_dri2_blit_complete(sna, info)) +- return; +- +- DBG(("%s: swap complete, unblocking client (frame=%d, tv=%d.%06d)\n", __FUNCTION__, +- event->sequence, event->tv_sec, event->tv_usec)); +- frame_swap_complete(info, DRI2_BLIT_COMPLETE); +- break; +- +- case SWAP_THROTTLE: ++ case SWAP_COMPLETE: + DBG(("%s: %d complete, frame=%d tv=%d.%06d\n", + __FUNCTION__, info->type, + event->sequence, event->tv_sec, event->tv_usec)); + +- if (xorg_can_triple_buffer()) { +- if (!sna_dri2_blit_complete(sna, info)) ++ if (info->signal) { ++ if (!sna_dri2_blit_complete(info)) + return; + + DBG(("%s: triple buffer swap complete, unblocking client (frame=%d, tv=%d.%06d)\n", __FUNCTION__, + event->sequence, event->tv_sec, event->tv_usec)); + frame_swap_complete(info, DRI2_BLIT_COMPLETE); + } ++ ++ if (info->pending.bo) { ++ struct copy current_back; ++ ++ DBG(("%s: swapping back handle=%d [name=%d, active=%d] for pending handle=%d [name=%d, active=%d], front handle=%d [name=%d, active=%d]\n", ++ __FUNCTION__, ++ get_private(info->back)->bo->handle, info->back->name, get_private(info->back)->bo->active_scanout, ++ info->pending.bo->handle, info->pending.name, info->pending.bo->active_scanout, ++ get_private(info->front)->bo->handle, info->front->name, get_private(info->front)->bo->active_scanout)); ++ ++ assert(info->pending.bo->active_scanout > 0); ++ info->pending.bo->active_scanout--; ++ ++ current_back.bo = get_private(info->back)->bo; ++ current_back.size = get_private(info->back)->size; ++ current_back.name = info->back->name; ++ current_back.flags = info->back->flags; ++ ++ get_private(info->back)->bo = info->pending.bo; ++ get_private(info->back)->size = info->pending.size; ++ info->back->name = info->pending.name; ++ info->back->pitch = info->pending.bo->pitch; ++ info->back->flags = info->pending.flags; ++ info->pending.bo = NULL; ++ ++ assert(get_private(info->back)->bo != get_private(info->front)->bo); ++ ++ if (can_xchg(info->sna, info->draw, info->front, info->back)) ++ sna_dri2_xchg(info->draw, info->front, info->back); ++ else if (can_xchg_crtc(info->sna, info->draw, info->crtc, ++ info->front, info->back)) ++ sna_dri2_xchg_crtc(info->sna, info->draw, info->crtc, ++ info->front, info->back); ++ else ++ __sna_dri2_copy_event(info, info->sync | DRI2_BO); ++ ++ sna_dri2_cache_bo(info->sna, info->draw, ++ get_private(info->back)->bo, ++ info->back->name, ++ get_private(info->back)->size, ++ info->back->flags); ++ ++ get_private(info->back)->bo = current_back.bo; ++ get_private(info->back)->size = current_back.size; ++ info->back->name = current_back.name; ++ info->back->pitch = current_back.bo->pitch; ++ info->back->flags = current_back.flags; ++ ++ DBG(("%s: restored current back handle=%d [name=%d, active=%d], active=%d], front handle=%d [name=%d, active=%d]\n", ++ __FUNCTION__, ++ get_private(info->back)->bo->handle, info->back->name, get_private(info->back)->bo->active_scanout, ++ get_private(info->front)->bo->handle, info->front->name, get_private(info->front)->bo->active_scanout)); ++ ++ assert(info->draw); ++ assert(!info->signal); ++ info->keepalive++; ++ info->signal = true; ++ } ++ ++ if (--info->keepalive) { ++ if (sna_next_vblank(info)) ++ return; ++ ++ if (info->signal) { ++ DBG(("%s: triple buffer swap complete, unblocking client (frame=%d, tv=%d.%06d)\n", __FUNCTION__, ++ event->sequence, event->tv_sec, event->tv_usec)); ++ frame_swap_complete(info, DRI2_BLIT_COMPLETE); ++ } ++ } + break; + + case WAITMSC: +@@ -2218,11 +2728,11 @@ void sna_dri2_vblank_handler(struct drm_event_vblank *event) + } + + if (info->chain) { ++ DBG(("%s: continuing chain\n", __FUNCTION__)); + assert(info->chain != info); + assert(info->draw == draw); +- sna_dri2_remove_event((WindowPtr)draw, info); ++ sna_dri2_remove_event(info); + chain_swap(info->chain); +- info->draw = NULL; + } + + done: +@@ -2230,101 +2740,148 @@ done: + DBG(("%s complete\n", __FUNCTION__)); + } + +-static bool ++static void + sna_dri2_immediate_blit(struct sna *sna, + struct sna_dri2_event *info, +- bool sync, bool event) ++ bool sync) + { +- DrawablePtr draw = info->draw; +- bool ret = false; ++ struct sna_dri2_event *chain = dri2_chain(info->draw); + + if (sna->flags & SNA_NO_WAIT) + sync = false; + +- DBG(("%s: emitting immediate blit, throttling client, synced? %d, chained? %d, send-event? %d\n", +- __FUNCTION__, sync, dri2_chain(draw) != info, +- event)); ++ DBG(("%s: emitting immediate blit, throttling client, synced? %d, chained? %d, pipe %d\n", ++ __FUNCTION__, sync, chain != info, info->pipe)); ++ assert(chain); + +- info->type = SWAP_THROTTLE; +- if (!sync || dri2_chain(draw) == info) { +- DBG(("%s: no pending blit, starting chain\n", +- __FUNCTION__)); ++ info->type = SWAP_COMPLETE; ++ info->sync = sync; ++ info->keepalive = KEEPALIVE; + +- info->queued = true; +- info->bo = __sna_dri2_copy_region(sna, draw, NULL, +- info->back, +- info->front, +- sync); +- if (event) { +- if (sync) { +- union drm_wait_vblank vbl; +- +- VG_CLEAR(vbl); +- vbl.request.type = +- DRM_VBLANK_RELATIVE | +- DRM_VBLANK_EVENT; +- vbl.request.sequence = 1; +- vbl.request.signal = (uintptr_t)info; +- ret = !sna_wait_vblank(sna, &vbl, info->pipe); +- if (ret) +- event = !swap_limit(draw, 2); +- } +- if (event) { +- DBG(("%s: fake triple buffering, unblocking client\n", __FUNCTION__)); +- frame_swap_complete(info, DRI2_BLIT_COMPLETE); +- } ++ if (chain == info) { ++ DBG(("%s: no pending blit, starting chain\n", __FUNCTION__)); ++ ++ assert(info->front != info->back); ++ if (can_xchg(info->sna, info->draw, info->front, info->back)) { ++ sna_dri2_xchg(info->draw, info->front, info->back); ++ } else if (can_xchg_crtc(info->sna, info->draw, info->crtc, ++ info->front, info->back)) { ++ sna_dri2_xchg_crtc(info->sna, info->draw, info->crtc, ++ info->front, info->back); ++ } else ++ __sna_dri2_copy_event(info, sync | DRI2_BO); ++ ++ assert(info->signal); ++ ++ if ((!swap_limit(info->draw, 2 + !sync) && !sync) || ++ !sna_next_vblank(info)) { ++ DBG(("%s: fake triple buffering, unblocking client\n", __FUNCTION__)); ++ frame_swap_complete(info, DRI2_BLIT_COMPLETE); ++ sna_dri2_event_free(info); ++ } ++ return; ++ } ++ ++ DBG(("%s: current event front=%d [name=%d, active?=%d], back=%d [name=%d, active?=%d]\n", __FUNCTION__, ++ get_private(chain->front)->bo->handle, chain->front->name, get_private(chain->front)->bo->active_scanout, ++ get_private(chain->back)->bo->handle, chain->back->name, get_private(chain->back)->bo->active_scanout)); ++ ++ if (chain->type == SWAP_COMPLETE && chain->front == info->front) { ++ assert(chain->draw == info->draw); ++ assert(chain->client == info->client); ++ assert(chain->event_complete == info->event_complete); ++ assert(chain->event_data == info->event_data); ++ assert(chain->queued); ++ ++ if ((!sync || !chain->sync) && chain->pending.bo) { ++ bool signal = chain->signal; ++ ++ DBG(("%s: swap elision, unblocking client\n", __FUNCTION__)); ++ assert(chain->draw); ++ chain->signal = true; ++ frame_swap_complete(chain, DRI2_EXCHANGE_COMPLETE); ++ chain->signal = signal; ++ ++ assert(chain->pending.bo->active_scanout > 0); ++ chain->pending.bo->active_scanout--; ++ ++ sna_dri2_cache_bo(chain->sna, chain->draw, ++ chain->pending.bo, ++ chain->pending.name, ++ chain->pending.size, ++ chain->pending.flags); ++ chain->pending.bo = NULL; ++ } ++ ++ if (chain->pending.bo == NULL && swap_limit(info->draw, 2 + !sync)) { ++ DBG(("%s: setting handle=%d as pending blit (current event front=%d, back=%d)\n", __FUNCTION__, ++ get_private(info->back)->bo->handle, ++ get_private(chain->front)->bo->handle, ++ get_private(chain->back)->bo->handle)); ++ chain->pending.bo = ref(get_private(info->back)->bo); ++ chain->pending.size = get_private(info->back)->size; ++ chain->pending.name = info->back->name; ++ chain->pending.flags = info->back->flags; ++ chain->sync = sync; ++ info->signal = false; /* transfer signal to pending */ ++ ++ /* Prevent us from handing it back on next GetBuffers */ ++ chain->pending.bo->active_scanout++; ++ ++ sna_dri2_event_free(info); ++ return; + } +- } else { +- DBG(("%s: pending blit, chained\n", __FUNCTION__)); +- ret = true; + } + +- DBG(("%s: continue? %d\n", __FUNCTION__, ret)); +- return ret; ++ DBG(("%s: pending blit, chained\n", __FUNCTION__)); + } + + static bool + sna_dri2_flip_continue(struct sna_dri2_event *info) + { +- DBG(("%s(mode=%d)\n", __FUNCTION__, info->mode)); ++ struct kgem_bo *bo = get_private(info->front)->bo; + +- if (info->mode > 0){ +- struct kgem_bo *bo = get_private(info->front)->bo; ++ DBG(("%s(mode=%d)\n", __FUNCTION__, info->flip_continue)); ++ assert(info->flip_continue > 0); ++ info->type = info->flip_continue; ++ info->flip_continue = 0; + +- info->type = info->mode; ++ assert(!info->signal); ++ info->signal = info->type == FLIP_THROTTLE && info->draw; + +- if (bo != sna_pixmap(info->sna->front)->gpu_bo) +- return false; ++ if (info->sna->mode.front_active == 0) ++ return false; + +- if (!sna_page_flip(info->sna, bo, sna_dri2_flip_handler, info)) +- return false; ++ if (bo != sna_pixmap(info->sna->front)->gpu_bo) ++ return false; + +- assert(info->sna->dri2.flip_pending == NULL || +- info->sna->dri2.flip_pending == info); +- info->sna->dri2.flip_pending = info; +- assert(info->queued); +- } else { +- info->type = -info->mode; ++ assert(!info->queued); ++ if (!sna_page_flip(info->sna, bo, sna_dri2_flip_handler, info)) ++ return false; + +- if (!info->draw) +- return false; ++ DBG(("%s: queued flip=%p\n", __FUNCTION__, info)); ++ assert(info->sna->dri2.flip_pending == NULL || ++ info->sna->dri2.flip_pending == info); ++ info->sna->dri2.flip_pending = info; ++ info->queued = true; + +- if (!can_flip(info->sna, info->draw, info->front, info->back, info->crtc)) +- return false; ++ return true; ++} + +- assert(sna_pixmap_get_buffer(get_drawable_pixmap(info->draw)) == info->front); +- if (!sna_dri2_flip(info)) +- return false; ++static bool ++sna_dri2_flip_keepalive(struct sna_dri2_event *info) ++{ ++ DBG(("%s(keepalive?=%d)\n", __FUNCTION__, info->keepalive-1)); ++ assert(info->keepalive > 0); ++ if (!--info->keepalive) ++ return false; + +- if (!xorg_can_triple_buffer()) { +- sna_dri2_get_back(info->sna, info->draw, info->back, info); +- DBG(("%s: fake triple buffering, unblocking client\n", __FUNCTION__)); +- frame_swap_complete(info, DRI2_FLIP_COMPLETE); +- } +- } ++ if (info->draw == NULL) ++ return false; + +- info->mode = 0; +- return true; ++ DBG(("%s: marking next flip as complete\n", __FUNCTION__)); ++ info->flip_continue = FLIP_COMPLETE; ++ return sna_dri2_flip_continue(info); + } + + static void chain_flip(struct sna *sna) +@@ -2332,8 +2889,8 @@ static void chain_flip(struct sna *sna) + struct sna_dri2_event *chain = sna->dri2.flip_pending; + + assert(chain->type == FLIP); +- DBG(("%s: chaining type=%d, cancelled?=%d\n", +- __FUNCTION__, chain->type, chain->draw == NULL)); ++ DBG(("%s: chaining type=%d, cancelled?=%d window=%ld\n", ++ __FUNCTION__, chain->type, chain->draw == NULL, chain->draw ? chain->draw->id : 0)); + + sna->dri2.flip_pending = NULL; + if (chain->draw == NULL) { +@@ -2343,31 +2900,18 @@ static void chain_flip(struct sna *sna) + + assert(chain == dri2_chain(chain->draw)); + assert(!chain->queued); +- chain->queued = true; + + if (can_flip(sna, chain->draw, chain->front, chain->back, chain->crtc) && + sna_dri2_flip(chain)) { + DBG(("%s: performing chained flip\n", __FUNCTION__)); + } else { + DBG(("%s: emitting chained vsync'ed blit\n", __FUNCTION__)); +- chain->bo = __sna_dri2_copy_region(sna, chain->draw, NULL, +- chain->back, chain->front, +- true); ++ __sna_dri2_copy_event(chain, DRI2_SYNC); + + if (xorg_can_triple_buffer()) { +- union drm_wait_vblank vbl; +- +- VG_CLEAR(vbl); +- +- chain->type = SWAP_WAIT; +- vbl.request.type = +- DRM_VBLANK_RELATIVE | +- DRM_VBLANK_EVENT; +- vbl.request.sequence = 1; +- vbl.request.signal = (uintptr_t)chain; +- +- assert(chain->queued); +- if (!sna_wait_vblank(sna, &vbl, chain->pipe)) ++ chain->type = SWAP_COMPLETE; ++ assert(chain->signal); ++ if (sna_next_vblank(chain)) + return; + } + +@@ -2381,8 +2925,10 @@ static void sna_dri2_flip_event(struct sna_dri2_event *flip) + { + struct sna *sna = flip->sna; + +- DBG(("%s(pipe=%d, event=%d)\n", __FUNCTION__, flip->pipe, flip->type)); +- assert(flip->queued); ++ DBG(("%s flip=%p (pipe=%d, event=%d, queued?=%d)\n", __FUNCTION__, flip, flip->pipe, flip->type, flip->queued)); ++ if (!flip->queued) /* pageflip died whilst being queued */ ++ return; ++ flip->queued = false; + + if (sna->dri2.flip_pending == flip) + sna->dri2.flip_pending = NULL; +@@ -2390,8 +2936,10 @@ static void sna_dri2_flip_event(struct sna_dri2_event *flip) + /* We assume our flips arrive in order, so we don't check the frame */ + switch (flip->type) { + case FLIP: +- DBG(("%s: swap complete, unblocking client\n", __FUNCTION__)); +- frame_swap_complete(flip, DRI2_FLIP_COMPLETE); ++ if (flip->signal) { ++ DBG(("%s: swap complete, unblocking client\n", __FUNCTION__)); ++ frame_swap_complete(flip, DRI2_FLIP_COMPLETE); ++ } + sna_dri2_event_free(flip); + + if (sna->dri2.flip_pending) +@@ -2399,27 +2947,35 @@ static void sna_dri2_flip_event(struct sna_dri2_event *flip) + break; + + case FLIP_THROTTLE: +- DBG(("%s: triple buffer swap complete, unblocking client\n", __FUNCTION__)); +- frame_swap_complete(flip, DRI2_FLIP_COMPLETE); ++ if (flip->signal) { ++ DBG(("%s: triple buffer swap complete, unblocking client\n", __FUNCTION__)); ++ frame_swap_complete(flip, DRI2_FLIP_COMPLETE); ++ } + case FLIP_COMPLETE: ++ assert(!flip->signal); + if (sna->dri2.flip_pending) { ++ DBG(("%s: pending flip\n", __FUNCTION__)); + sna_dri2_event_free(flip); + chain_flip(sna); +- } else if (!flip->mode) { ++ } else if (!flip->flip_continue) { + DBG(("%s: flip chain complete\n", __FUNCTION__)); ++ if (!sna_dri2_flip_keepalive(flip)) { ++ if (flip->chain) { ++ sna_dri2_remove_event(flip); ++ chain_swap(flip->chain); ++ } + +- if (flip->chain) { +- sna_dri2_remove_event((WindowPtr)flip->draw, +- flip); +- chain_swap(flip->chain); +- flip->draw = NULL; ++ sna_dri2_event_free(flip); + } +- +- sna_dri2_event_free(flip); + } else if (!sna_dri2_flip_continue(flip)) { + DBG(("%s: no longer able to flip\n", __FUNCTION__)); +- if (flip->draw == NULL || !sna_dri2_immediate_blit(sna, flip, false, flip->mode < 0)) +- sna_dri2_event_free(flip); ++ if (flip->draw != NULL) ++ __sna_dri2_copy_event(flip, 0); ++ if (flip->signal) { ++ DBG(("%s: fake triple buffering, unblocking client\n", __FUNCTION__)); ++ frame_swap_complete(flip, DRI2_BLIT_COMPLETE); ++ } ++ sna_dri2_event_free(flip); + } + break; + +@@ -2433,17 +2989,27 @@ static void sna_dri2_flip_event(struct sna_dri2_event *flip) + } + } + ++static int ++sna_query_vblank(struct sna *sna, xf86CrtcPtr crtc, union drm_wait_vblank *vbl) ++{ ++ VG_CLEAR(*vbl); ++ vbl->request.type = ++ _DRM_VBLANK_RELATIVE | pipe_select(sna_crtc_pipe(crtc)); ++ vbl->request.sequence = 0; ++ ++ return drmIoctl(sna->kgem.fd, DRM_IOCTL_WAIT_VBLANK, vbl); ++} ++ + static uint64_t + get_current_msc(struct sna *sna, DrawablePtr draw, xf86CrtcPtr crtc) + { + union drm_wait_vblank vbl; +- uint64_t ret = -1; ++ uint64_t ret; + +- VG_CLEAR(vbl); +- vbl.request.type = _DRM_VBLANK_RELATIVE; +- vbl.request.sequence = 0; +- if (sna_wait_vblank(sna, &vbl, sna_crtc_to_pipe(crtc)) == 0) ++ if (sna_query_vblank(sna, crtc, &vbl) == 0) + ret = sna_crtc_record_vblank(crtc, &vbl); ++ else ++ ret = sna_crtc_last_swap(crtc)->msc; + + return draw_current_msc(draw, crtc, ret); + } +@@ -2494,12 +3060,18 @@ static int use_triple_buffer(struct sna *sna, ClientPtr client, bool async) + } + + static bool immediate_swap(struct sna *sna, +- uint64_t target_msc, +- uint64_t divisor, + DrawablePtr draw, + xf86CrtcPtr crtc, ++ uint64_t *target_msc, ++ uint64_t divisor, ++ uint64_t remainder, + uint64_t *current_msc) + { ++ /* ++ * If divisor is zero, or current_msc is smaller than target_msc ++ * we just need to make sure target_msc passes before initiating ++ * the swap. ++ */ + if (divisor == 0) { + *current_msc = -1; + +@@ -2508,72 +3080,97 @@ static bool immediate_swap(struct sna *sna, + return true; + } + +- if (target_msc) ++ if (*target_msc) + *current_msc = get_current_msc(sna, draw, crtc); + + DBG(("%s: current_msc=%ld, target_msc=%ld -- %s\n", +- __FUNCTION__, (long)*current_msc, (long)target_msc, +- (*current_msc >= target_msc - 1) ? "yes" : "no")); +- return *current_msc >= target_msc - 1; ++ __FUNCTION__, (long)*current_msc, (long)*target_msc, ++ (*current_msc >= *target_msc - 1) ? "yes" : "no")); ++ return *current_msc >= *target_msc - 1; + } + + DBG(("%s: explicit waits requests, divisor=%ld\n", + __FUNCTION__, (long)divisor)); + *current_msc = get_current_msc(sna, draw, crtc); +- return false; ++ if (*current_msc >= *target_msc) { ++ DBG(("%s: missed target, queueing event for next: current=%lld, target=%lld, divisor=%lld, remainder=%lld\n", ++ __FUNCTION__, ++ (long long)*current_msc, ++ (long long)*target_msc, ++ (long long)divisor, ++ (long long)remainder)); ++ ++ *target_msc = *current_msc + remainder - *current_msc % divisor; ++ if (*target_msc <= *current_msc) ++ *target_msc += divisor; ++ } ++ ++ DBG(("%s: target_msc=%lld, current_msc=%lld, immediate?=%d\n", ++ __FUNCTION__, (long long)*target_msc, (long long)*current_msc, ++ *current_msc >= *target_msc - 1)); ++ return *current_msc >= *target_msc - 1; + } + + static bool + sna_dri2_schedule_flip(ClientPtr client, DrawablePtr draw, xf86CrtcPtr crtc, + DRI2BufferPtr front, DRI2BufferPtr back, +- CARD64 *target_msc, CARD64 divisor, CARD64 remainder, ++ bool immediate, CARD64 *target_msc, CARD64 current_msc, + DRI2SwapEventPtr func, void *data) + { + struct sna *sna = to_sna_from_drawable(draw); + struct sna_dri2_event *info; +- uint64_t current_msc; +- +- if (immediate_swap(sna, *target_msc, divisor, draw, crtc, ¤t_msc)) { +- int type; + ++ if (immediate) { ++ bool signal = false; + info = sna->dri2.flip_pending; + DBG(("%s: performing immediate swap on pipe %d, pending? %d, mode: %d, continuation? %d\n", +- __FUNCTION__, sna_crtc_to_pipe(crtc), +- info != NULL, info ? info->mode : 0, ++ __FUNCTION__, sna_crtc_pipe(crtc), ++ info != NULL, info ? info->flip_continue : 0, + info && info->draw == draw)); + + if (info && info->draw == draw) { + assert(info->type != FLIP); +- assert(info->front == front); ++ assert(info->queued); ++ assert(info->front != info->back); ++ if (info->front != front) { ++ assert(info->front != NULL); ++ _sna_dri2_destroy_buffer(sna, draw, info->front); ++ info->front = sna_dri2_reference_buffer(front); ++ } + if (info->back != back) { +- _sna_dri2_destroy_buffer(sna, info->back); ++ assert(info->back != NULL); ++ _sna_dri2_destroy_buffer(sna, draw, info->back); + info->back = sna_dri2_reference_buffer(back); + } +- if (info->mode || current_msc >= *target_msc) { +- DBG(("%s: executing xchg of pending flip\n", +- __FUNCTION__)); +- sna_dri2_xchg(draw, front, back); +- info->mode = type = FLIP_COMPLETE; +- goto new_back; +- } else { ++ assert(info->front != info->back); ++ DBG(("%s: executing xchg of pending flip: flip_continue=%d, keepalive=%d, chain?=%d\n", __FUNCTION__, info->flip_continue, info->keepalive, current_msc < *target_msc)); ++ sna_dri2_xchg(draw, front, back); ++ info->keepalive = KEEPALIVE; ++ if (xorg_can_triple_buffer() && ++ current_msc < *target_msc) { + DBG(("%s: chaining flip\n", __FUNCTION__)); +- type = FLIP_THROTTLE; +- if (xorg_can_triple_buffer()) +- info->mode = -type; +- else +- info->mode = -FLIP_COMPLETE; ++ info->flip_continue = FLIP_THROTTLE; + goto out; ++ } else { ++ info->flip_continue = FLIP_COMPLETE; ++ signal = info->signal; ++ assert(info->draw); ++ info->signal = true; ++ goto new_back; + } + } + +- info = sna_dri2_add_event(sna, draw, client); ++ info = sna_dri2_add_event(sna, draw, client, crtc); + if (info == NULL) + return false; + + assert(info->crtc == crtc); + info->event_complete = func; + info->event_data = data; ++ assert(info->draw); ++ info->signal = true; + ++ assert(front != back); + info->front = sna_dri2_reference_buffer(front); + info->back = sna_dri2_reference_buffer(back); + +@@ -2584,26 +3181,33 @@ sna_dri2_schedule_flip(ClientPtr client, DrawablePtr draw, xf86CrtcPtr crtc, + */ + DBG(("%s: queueing flip after pending completion\n", + __FUNCTION__)); +- info->type = type = FLIP; ++ info->type = FLIP; + sna->dri2.flip_pending = info; +- assert(info->queued); + current_msc++; ++ } else if (sna->mode.flip_active) { ++ DBG(("%s: %d outstanding flips from old client, queueing\n", ++ __FUNCTION__, sna->mode.flip_active)); ++ goto queue; + } else { +- info->type = type = use_triple_buffer(sna, client, *target_msc == 0); ++ info->type = use_triple_buffer(sna, client, *target_msc == 0); + if (!sna_dri2_flip(info)) { + DBG(("%s: flip failed, falling back\n", __FUNCTION__)); ++ info->signal = false; + sna_dri2_event_free(info); + return false; + } ++ assert(get_private(info->front)->bo->active_scanout); + } + +- swap_limit(draw, 1 + (type == FLIP_THROTTLE)); +- if (type >= FLIP_COMPLETE) { ++ swap_limit(draw, 1 + (info->type == FLIP_THROTTLE)); ++ if (info->type >= FLIP_COMPLETE) { + new_back: + if (!xorg_can_triple_buffer()) +- sna_dri2_get_back(sna, draw, back, info); ++ sna_dri2_get_back(sna, draw, back); + DBG(("%s: fake triple buffering, unblocking client\n", __FUNCTION__)); + frame_swap_complete(info, DRI2_EXCHANGE_COMPLETE); ++ assert(info->draw); ++ info->signal = signal; + if (info->type == FLIP_ASYNC) + sna_dri2_event_free(info); + } +@@ -2613,57 +3217,34 @@ out: + return true; + } + +- info = sna_dri2_add_event(sna, draw, client); ++queue: ++ if (KEEPALIVE > 1 && sna->dri2.flip_pending) { ++ info = sna->dri2.flip_pending; ++ info->keepalive = 1; ++ } ++ ++ info = sna_dri2_add_event(sna, draw, client, crtc); + if (info == NULL) + return false; + + assert(info->crtc == crtc); + info->event_complete = func; + info->event_data = data; ++ assert(info->draw); ++ info->signal = true; + info->type = FLIP; + ++ assert(front != back); + info->front = sna_dri2_reference_buffer(front); + info->back = sna_dri2_reference_buffer(back); + +- /* +- * If divisor is zero, or current_msc is smaller than target_msc +- * we just need to make sure target_msc passes before initiating +- * the swap. +- */ +- if (divisor && current_msc >= *target_msc) { +- DBG(("%s: missed target, queueing event for next: current=%lld, target=%lld, divisor=%lld, remainder=%lld\n", +- __FUNCTION__, +- (long long)current_msc, +- (long long)*target_msc, +- (long long)divisor, +- (long long)remainder)); +- +- *target_msc = current_msc + remainder - current_msc % divisor; +- if (*target_msc <= current_msc) +- *target_msc += divisor; +- } +- +- if (*target_msc <= current_msc + 1) { +- if (!sna_dri2_flip(info)) { +- sna_dri2_event_free(info); +- return false; +- } ++ if (*target_msc <= current_msc + 1 && sna_dri2_flip(info)) { + *target_msc = current_msc + 1; + } else { +- union drm_wait_vblank vbl; +- +- VG_CLEAR(vbl); +- +- vbl.request.type = +- DRM_VBLANK_ABSOLUTE | +- DRM_VBLANK_EVENT; +- + /* Account for 1 frame extra pageflip delay */ +- vbl.reply.sequence = draw_target_seq(draw, *target_msc - 1); +- vbl.request.signal = (uintptr_t)info; +- +- info->queued = true; +- if (sna_wait_vblank(sna, &vbl, info->pipe)) { ++ if (!sna_wait_vblank(info, ++ draw_target_seq(draw, *target_msc - 1))) { ++ info->signal = false; + sna_dri2_event_free(info); + return false; + } +@@ -2674,128 +3255,6 @@ out: + return true; + } + +-static bool +-sna_dri2_schedule_xchg(ClientPtr client, DrawablePtr draw, xf86CrtcPtr crtc, +- DRI2BufferPtr front, DRI2BufferPtr back, +- CARD64 *target_msc, CARD64 divisor, CARD64 remainder, +- DRI2SwapEventPtr func, void *data) +-{ +- struct sna *sna = to_sna_from_drawable(draw); +- uint64_t current_msc; +- bool sync, event; +- +- if (!immediate_swap(sna, *target_msc, divisor, draw, crtc, ¤t_msc)) +- return false; +- +- sync = current_msc < *target_msc; +- event = dri2_chain(draw) == NULL; +- if (!sync || event) { +- DBG(("%s: performing immediate xchg on pipe %d\n", +- __FUNCTION__, sna_crtc_to_pipe(crtc))); +- sna_dri2_xchg(draw, front, back); +- } +- if (sync) { +- struct sna_dri2_event *info; +- +- info = sna_dri2_add_event(sna, draw, client); +- if (!info) +- goto complete; +- +- info->event_complete = func; +- info->event_data = data; +- +- info->front = sna_dri2_reference_buffer(front); +- info->back = sna_dri2_reference_buffer(back); +- info->type = SWAP_THROTTLE; +- +- if (event) { +- union drm_wait_vblank vbl; +- +- VG_CLEAR(vbl); +- vbl.request.type = +- DRM_VBLANK_RELATIVE | +- DRM_VBLANK_EVENT; +- vbl.request.sequence = 1; +- vbl.request.signal = (uintptr_t)info; +- +- info->queued = true; +- if (sna_wait_vblank(sna, &vbl, info->pipe)) { +- sna_dri2_event_free(info); +- goto complete; +- } +- +- swap_limit(draw, 2); +- } +- } else { +-complete: +- fake_swap_complete(sna, client, draw, crtc, DRI2_EXCHANGE_COMPLETE, func, data); +- } +- +- *target_msc = current_msc + 1; +- return true; +-} +- +-static bool +-sna_dri2_schedule_xchg_crtc(ClientPtr client, DrawablePtr draw, xf86CrtcPtr crtc, +- DRI2BufferPtr front, DRI2BufferPtr back, +- CARD64 *target_msc, CARD64 divisor, CARD64 remainder, +- DRI2SwapEventPtr func, void *data) +-{ +- struct sna *sna = to_sna_from_drawable(draw); +- uint64_t current_msc; +- bool sync, event; +- +- if (!immediate_swap(sna, *target_msc, divisor, draw, crtc, ¤t_msc)) +- return false; +- +- sync = current_msc < *target_msc; +- event = dri2_chain(draw) == NULL; +- if (!sync || event) { +- DBG(("%s: performing immediate xchg only on pipe %d\n", +- __FUNCTION__, sna_crtc_to_pipe(crtc))); +- sna_dri2_xchg_crtc(sna, draw, crtc, front, back); +- } +- if (sync) { +- struct sna_dri2_event *info; +- +- info = sna_dri2_add_event(sna, draw, client); +- if (!info) +- goto complete; +- +- info->event_complete = func; +- info->event_data = data; +- +- info->front = sna_dri2_reference_buffer(front); +- info->back = sna_dri2_reference_buffer(back); +- info->type = SWAP_THROTTLE; +- +- if (event) { +- union drm_wait_vblank vbl; +- +- VG_CLEAR(vbl); +- vbl.request.type = +- DRM_VBLANK_RELATIVE | +- DRM_VBLANK_EVENT; +- vbl.request.sequence = 1; +- vbl.request.signal = (uintptr_t)info; +- +- info->queued = true; +- if (sna_wait_vblank(sna, &vbl, info->pipe)) { +- sna_dri2_event_free(info); +- goto complete; +- } +- +- swap_limit(draw, 2); +- } +- } else { +-complete: +- fake_swap_complete(sna, client, draw, crtc, DRI2_EXCHANGE_COMPLETE, func, data); +- } +- +- *target_msc = current_msc + 1; +- return true; +-} +- + static bool has_pending_events(struct sna *sna) + { + struct pollfd pfd; +@@ -2830,11 +3289,11 @@ sna_dri2_schedule_swap(ClientPtr client, DrawablePtr draw, DRI2BufferPtr front, + CARD64 remainder, DRI2SwapEventPtr func, void *data) + { + struct sna *sna = to_sna_from_drawable(draw); +- union drm_wait_vblank vbl; + xf86CrtcPtr crtc = NULL; + struct sna_dri2_event *info = NULL; + int type = DRI2_EXCHANGE_COMPLETE; + CARD64 current_msc; ++ bool immediate; + + DBG(("%s: draw=%lu %dx%d, pixmap=%ld %dx%d, back=%u (refs=%d/%d, flush=%d) , front=%u (refs=%d/%d, flush=%d)\n", + __FUNCTION__, +@@ -2860,6 +3319,7 @@ sna_dri2_schedule_swap(ClientPtr client, DrawablePtr draw, DRI2BufferPtr front, + assert(get_private(front)->refcnt); + assert(get_private(back)->refcnt); + ++ assert(get_private(back)->bo != get_private(front)->bo); + assert(get_private(front)->bo->refcnt); + assert(get_private(back)->bo->refcnt); + +@@ -2876,17 +3336,17 @@ sna_dri2_schedule_swap(ClientPtr client, DrawablePtr draw, DRI2BufferPtr front, + goto skip; + } + +- assert(sna_pixmap_from_drawable(draw)->flush); +- + if (draw->type != DRAWABLE_PIXMAP) { + WindowPtr win = (WindowPtr)draw; + struct dri2_window *priv = dri2_window(win); ++ + if (priv->front) { +- assert(front == priv->front); +- assert(get_private(priv->front)->refcnt > 1); +- get_private(priv->front)->refcnt--; +- priv->front = NULL; ++ front = priv->front; ++ assert(front->attachment == DRI2BufferFrontLeft); ++ assert(get_private(front)->refcnt); ++ assert(get_private(front)->pixmap == get_drawable_pixmap(draw)); + } ++ + if (win->clipList.extents.x2 <= win->clipList.extents.x1 || + win->clipList.extents.y2 <= win->clipList.extents.y1) { + DBG(("%s: window clipped (%d, %d), (%d, %d)\n", +@@ -2899,6 +3359,10 @@ sna_dri2_schedule_swap(ClientPtr client, DrawablePtr draw, DRI2BufferPtr front, + } + } + ++ DBG(("%s: using front handle=%d, active_scanout?=%d, flush?=%d\n", __FUNCTION__, get_private(front)->bo->handle, get_private(front)->bo->active_scanout, sna_pixmap_from_drawable(draw)->flush)); ++ assert(get_private(front)->bo->active_scanout); ++ assert(sna_pixmap_from_drawable(draw)->flush); ++ + /* Drawable not displayed... just complete the swap */ + if ((sna->flags & SNA_NO_WAIT) == 0) + crtc = sna_dri2_get_crtc(draw); +@@ -2914,109 +3378,112 @@ sna_dri2_schedule_swap(ClientPtr client, DrawablePtr draw, DRI2BufferPtr front, + sna_mode_wakeup(sna); + } + +- if (can_xchg(sna, draw, front, back) && +- sna_dri2_schedule_xchg(client, draw, crtc, front, back, ++ immediate = immediate_swap(sna, draw, crtc, + target_msc, divisor, remainder, +- func, data)) +- return TRUE; +- +- if (can_xchg_crtc(sna, draw, front, back, crtc) && +- sna_dri2_schedule_xchg_crtc(client, draw, crtc, front, back, +- target_msc, divisor, remainder, +- func, data)) +- return TRUE; ++ ¤t_msc); + + if (can_flip(sna, draw, front, back, crtc) && + sna_dri2_schedule_flip(client, draw, crtc, front, back, +- target_msc, divisor, remainder, ++ immediate, target_msc, current_msc, + func, data)) + return TRUE; + +- VG_CLEAR(vbl); +- +- info = sna_dri2_add_event(sna, draw, client); ++ info = sna_dri2_add_event(sna, draw, client, crtc); + if (!info) + goto blit; + + assert(info->crtc == crtc); + info->event_complete = func; + info->event_data = data; ++ assert(info->draw); ++ info->signal = true; + ++ assert(front != back); + info->front = sna_dri2_reference_buffer(front); + info->back = sna_dri2_reference_buffer(back); + +- if (immediate_swap(sna, *target_msc, divisor, draw, crtc, ¤t_msc)) { ++ if (immediate) { + bool sync = current_msc < *target_msc; +- if (!sna_dri2_immediate_blit(sna, info, sync, true)) +- sna_dri2_event_free(info); ++ sna_dri2_immediate_blit(sna, info, sync); + *target_msc = current_msc + sync; ++ DBG(("%s: reported target_msc=%llu\n", ++ __FUNCTION__, *target_msc)); + return TRUE; + } + +- vbl.request.type = +- DRM_VBLANK_ABSOLUTE | +- DRM_VBLANK_EVENT; +- vbl.request.signal = (uintptr_t)info; +- +- /* +- * If divisor is zero, or current_msc is smaller than target_msc +- * we just need to make sure target_msc passes before initiating +- * the swap. +- */ + info->type = SWAP; +- info->queued = true; +- if (divisor && current_msc >= *target_msc) { +- DBG(("%s: missed target, queueing event for next: current=%lld, target=%lld, divisor=%lld, remainder=%lld\n", +- __FUNCTION__, +- (long long)current_msc, +- (long long)*target_msc, +- (long long)divisor, +- (long long)remainder)); +- +- *target_msc = current_msc + remainder - current_msc % divisor; +- if (*target_msc <= current_msc) +- *target_msc += divisor; +- } +- vbl.request.sequence = draw_target_seq(draw, *target_msc - 1); + if (*target_msc <= current_msc + 1) { + DBG(("%s: performing blit before queueing\n", __FUNCTION__)); +- assert(info->queued); +- info->bo = __sna_dri2_copy_region(sna, draw, NULL, +- back, front, +- true); +- info->type = SWAP_WAIT; +- +- vbl.request.type = +- DRM_VBLANK_RELATIVE | +- DRM_VBLANK_EVENT; +- vbl.request.sequence = 1; ++ __sna_dri2_copy_event(info, DRI2_SYNC); ++ info->type = SWAP_COMPLETE; ++ if (!sna_next_vblank(info)) ++ goto fake; ++ ++ DBG(("%s: reported target_msc=%llu\n", ++ __FUNCTION__, *target_msc)); + *target_msc = current_msc + 1; +- } ++ swap_limit(draw, 2); ++ } else { ++ if (!sna_wait_vblank(info, ++ draw_target_seq(draw, *target_msc - 1))) ++ goto blit; + +- assert(info->queued); +- if (sna_wait_vblank(sna, &vbl, info->pipe)) +- goto blit; ++ DBG(("%s: reported target_msc=%llu (in)\n", ++ __FUNCTION__, *target_msc)); ++ swap_limit(draw, 1); ++ } + +- DBG(("%s: reported target_msc=%llu\n", __FUNCTION__, *target_msc)); +- swap_limit(draw, 1 + (info->type == SWAP_WAIT)); + return TRUE; + + blit: + DBG(("%s -- blit\n", __FUNCTION__)); +- if (info) +- sna_dri2_event_free(info); + if (can_xchg(sna, draw, front, back)) { + sna_dri2_xchg(draw, front, back); + } else { +- __sna_dri2_copy_region(sna, draw, NULL, back, front, false); ++ __sna_dri2_copy_region(sna, draw, NULL, back, front, 0); ++ front->flags = back->flags; + type = DRI2_BLIT_COMPLETE; + } ++ if (draw->type == DRAWABLE_PIXMAP) ++ goto fake; + skip: + DBG(("%s: unable to show frame, unblocking client\n", __FUNCTION__)); +- if (crtc == NULL) +- crtc = sna_mode_first_crtc(sna); +- fake_swap_complete(sna, client, draw, crtc, type, func, data); +- *target_msc = 0; /* offscreen, so zero out target vblank count */ ++ if (crtc == NULL && (sna->flags & SNA_NO_WAIT) == 0) ++ crtc = sna_primary_crtc(sna); ++ if (crtc && sna_crtc_is_on(crtc)) { ++ if (info == NULL) ++ info = sna_dri2_add_event(sna, draw, client, crtc); ++ if (info != dri2_chain(draw)) ++ goto fake; ++ ++ assert(info->crtc == crtc); ++ ++ info->type = SWAP_COMPLETE; ++ info->event_complete = func; ++ info->event_data = data; ++ assert(info->draw); ++ info->signal = true; ++ ++ if (info->front == NULL) ++ info->front = sna_dri2_reference_buffer(front); ++ if (info->back == NULL) ++ info->back = sna_dri2_reference_buffer(back); ++ ++ if (!sna_next_vblank(info)) ++ goto fake; ++ ++ swap_limit(draw, 1); ++ } else { ++fake: ++ /* XXX Use a Timer to throttle the client? */ ++ fake_swap_complete(sna, client, draw, crtc, type, func, data); ++ if (info) { ++ assert(info->draw); ++ info->signal = false; ++ sna_dri2_event_free(info); ++ } ++ } ++ DBG(("%s: reported target_msc=%llu (in)\n", __FUNCTION__, *target_msc)); + return TRUE; + } + +@@ -3030,27 +3497,25 @@ sna_dri2_get_msc(DrawablePtr draw, CARD64 *ust, CARD64 *msc) + struct sna *sna = to_sna_from_drawable(draw); + xf86CrtcPtr crtc = sna_dri2_get_crtc(draw); + const struct ust_msc *swap; ++ union drm_wait_vblank vbl; + + DBG(("%s(draw=%ld, pipe=%d)\n", __FUNCTION__, draw->id, +- crtc ? sna_crtc_to_pipe(crtc) : -1)); ++ crtc ? sna_crtc_pipe(crtc) : -1)); + +- if (crtc != NULL) { +- union drm_wait_vblank vbl; ++ /* Drawable not displayed, make up a *monotonic* value */ ++ if (crtc == NULL) ++ crtc = sna_primary_crtc(sna); ++ if (crtc == NULL) ++ return FALSE; + +- VG_CLEAR(vbl); +- vbl.request.type = _DRM_VBLANK_RELATIVE; +- vbl.request.sequence = 0; +- if (sna_wait_vblank(sna, &vbl, sna_crtc_to_pipe(crtc)) == 0) +- sna_crtc_record_vblank(crtc, &vbl); +- } else +- /* Drawable not displayed, make up a *monotonic* value */ +- crtc = sna_mode_first_crtc(sna); ++ if (sna_query_vblank(sna, crtc, &vbl) == 0) ++ sna_crtc_record_vblank(crtc, &vbl); + + swap = sna_crtc_last_swap(crtc); + *msc = draw_current_msc(draw, crtc, swap->msc); + *ust = ust64(swap->tv_sec, swap->tv_usec); +- DBG(("%s: msc=%llu, ust=%llu\n", __FUNCTION__, +- (long long)*msc, (long long)*ust)); ++ DBG(("%s: msc=%llu [raw=%llu], ust=%llu\n", __FUNCTION__, ++ (long long)*msc, swap->msc, (long long)*ust)); + return TRUE; + } + +@@ -3068,32 +3533,22 @@ sna_dri2_schedule_wait_msc(ClientPtr client, DrawablePtr draw, CARD64 target_msc + struct sna_dri2_event *info = NULL; + xf86CrtcPtr crtc; + CARD64 current_msc; +- union drm_wait_vblank vbl; + const struct ust_msc *swap; +- int pipe; + + crtc = sna_dri2_get_crtc(draw); + DBG(("%s(pipe=%d, target_msc=%llu, divisor=%llu, rem=%llu)\n", +- __FUNCTION__, crtc ? sna_crtc_to_pipe(crtc) : -1, ++ __FUNCTION__, crtc ? sna_crtc_pipe(crtc) : -1, + (long long)target_msc, + (long long)divisor, + (long long)remainder)); + + /* Drawable not visible, return immediately */ + if (crtc == NULL) +- goto out_complete; +- +- pipe = sna_crtc_to_pipe(crtc); +- +- VG_CLEAR(vbl); +- +- /* Get current count */ +- vbl.request.type = _DRM_VBLANK_RELATIVE; +- vbl.request.sequence = 0; +- if (sna_wait_vblank(sna, &vbl, pipe)) +- goto out_complete; ++ crtc = sna_primary_crtc(sna); ++ if (crtc == NULL) ++ return FALSE; + +- current_msc = draw_current_msc(draw, crtc, sna_crtc_record_vblank(crtc, &vbl)); ++ current_msc = get_current_msc(sna, draw, crtc); + + /* If target_msc already reached or passed, set it to + * current_msc to ensure we return a reasonable value back +@@ -3104,15 +3559,13 @@ sna_dri2_schedule_wait_msc(ClientPtr client, DrawablePtr draw, CARD64 target_msc + if (divisor == 0 && current_msc >= target_msc) + goto out_complete; + +- info = sna_dri2_add_event(sna, draw, client); ++ info = sna_dri2_add_event(sna, draw, client, crtc); + if (!info) + goto out_complete; + + assert(info->crtc == crtc); + info->type = WAITMSC; + +- vbl.request.signal = (uintptr_t)info; +- vbl.request.type = DRM_VBLANK_ABSOLUTE | DRM_VBLANK_EVENT; + /* + * If divisor is zero, or current_msc is smaller than target_msc, + * we just need to make sure target_msc passes before waking up the +@@ -3129,10 +3582,8 @@ sna_dri2_schedule_wait_msc(ClientPtr client, DrawablePtr draw, CARD64 target_msc + if (target_msc <= current_msc) + target_msc += divisor; + } +- vbl.request.sequence = draw_target_seq(draw, target_msc); + +- info->queued = true; +- if (sna_wait_vblank(sna, &vbl, pipe)) ++ if (!sna_wait_vblank(info, draw_target_seq(draw, target_msc))) + goto out_free_info; + + DRI2BlockClient(client, draw); +@@ -3141,8 +3592,6 @@ sna_dri2_schedule_wait_msc(ClientPtr client, DrawablePtr draw, CARD64 target_msc + out_free_info: + sna_dri2_event_free(info); + out_complete: +- if (crtc == NULL) +- crtc = sna_mode_first_crtc(sna); + swap = sna_crtc_last_swap(crtc); + DRI2WaitMSCComplete(client, draw, + draw_current_msc(draw, crtc, swap->msc), +@@ -3231,9 +3680,18 @@ static bool is_level(const char **str) + return false; + } + ++static const char *options_get_dri(struct sna *sna) ++{ ++#if XORG_VERSION_CURRENT >= XORG_VERSION_NUMERIC(1,7,99,901,0) ++ return xf86GetOptValString(sna->Options, OPTION_DRI); ++#else ++ return NULL; ++#endif ++} ++ + static const char *dri_driver_name(struct sna *sna) + { +- const char *s = xf86GetOptValString(sna->Options, OPTION_DRI); ++ const char *s = options_get_dri(sna); + + if (is_level(&s)) { + if (sna->kgem.gen < 030) +@@ -3259,7 +3717,7 @@ bool sna_dri2_open(struct sna *sna, ScreenPtr screen) + + if (wedged(sna)) { + xf86DrvMsg(sna->scrn->scrnIndex, X_WARNING, +- "loading DRI2 whilst the GPU is wedged.\n"); ++ "loading DRI2 whilst acceleration is disabled.\n"); + } + + if (xf86LoaderCheckSymbol("DRI2Version")) +@@ -3274,7 +3732,7 @@ bool sna_dri2_open(struct sna *sna, ScreenPtr screen) + memset(&info, '\0', sizeof(info)); + info.fd = sna->kgem.fd; + info.driverName = dri_driver_name(sna); +- info.deviceName = intel_get_client_name(sna->dev); ++ info.deviceName = intel_get_master_name(sna->dev); + + DBG(("%s: loading dri driver '%s' [gen=%d] for device '%s'\n", + __FUNCTION__, info.driverName, sna->kgem.gen, info.deviceName)); +@@ -3299,11 +3757,12 @@ bool sna_dri2_open(struct sna *sna, ScreenPtr screen) + info.numDrivers = 2; + info.driverNames = driverNames; + driverNames[0] = info.driverName; +- driverNames[1] = info.driverName; ++ driverNames[1] = "va_gl"; + #endif + + #if DRI2INFOREC_VERSION >= 6 + if (xorg_can_triple_buffer()) { ++ DBG(("%s: enabling Xorg triple buffering\n", __FUNCTION__)); + info.version = 6; + info.SwapLimitValidate = sna_dri2_swap_limit_validate; + info.ReuseBufferNotify = sna_dri2_reuse_buffer; +@@ -3311,8 +3770,10 @@ bool sna_dri2_open(struct sna *sna, ScreenPtr screen) + #endif + + #if USE_ASYNC_SWAP ++ DBG(("%s: enabled async swap and buffer age\n", __FUNCTION__)); + info.version = 10; + info.scheduleSwap0 = 1; ++ info.bufferAge = 1; + #endif + + return DRI2ScreenInit(screen, &info); +diff --git a/src/sna/sna_dri3.c b/src/sna/sna_dri3.c +index f586e242..ce4970ae 100644 +--- a/src/sna/sna_dri3.c ++++ b/src/sna/sna_dri3.c +@@ -55,11 +55,14 @@ static inline void mark_dri3_pixmap(struct sna *sna, struct sna_pixmap *priv, st + if (bo->exec) + sna->kgem.flush = 1; + if (bo == priv->gpu_bo) +- priv->flush |= 3; ++ priv->flush |= FLUSH_READ | FLUSH_WRITE; + else + priv->shm = true; + +- sna_accel_watch_flush(sna, 1); ++ sna_watch_flush(sna, 1); ++ ++ kgem_bo_submit(&sna->kgem, bo); ++ kgem_bo_unclean(&sna->kgem, bo); + } + + static void sna_sync_flush(struct sna *sna, struct sna_pixmap *priv) +@@ -270,6 +273,8 @@ static PixmapPtr sna_dri3_pixmap_from_fd(ScreenPtr screen, + priv->ptr = MAKE_STATIC_PTR(pixmap->devPrivate.ptr); + } else { + assert(priv->gpu_bo == bo); ++ priv->create = kgem_can_create_2d(&sna->kgem, ++ width, height, depth); + priv->pinned |= PIN_DRI3; + } + list_add(&priv->cow_list, &sna->dri3.pixmaps); +@@ -325,6 +330,15 @@ static int sna_dri3_fd_from_pixmap(ScreenPtr screen, + return -1; + } + ++ if (bo->tiling && !sna->kgem.can_fence) { ++ if (!sna_pixmap_change_tiling(pixmap, I915_TILING_NONE)) { ++ DBG(("%s: unable to discard GPU tiling (%d) for DRI3 protocol\n", ++ __FUNCTION__, bo->tiling)); ++ return -1; ++ } ++ bo = priv->gpu_bo; ++ } ++ + fd = kgem_bo_export_to_prime(&sna->kgem, bo); + if (fd == -1) { + DBG(("%s: exporting handle=%d to fd failed\n", __FUNCTION__, bo->handle)); +diff --git a/src/sna/sna_driver.c b/src/sna/sna_driver.c +index 8a3599c7..1b4015de 100644 +--- a/src/sna/sna_driver.c ++++ b/src/sna/sna_driver.c +@@ -57,6 +57,13 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. + #include <mi.h> + #include <micmap.h> + ++#if defined(HAVE_X11_EXTENSIONS_DPMSCONST_H) ++#include <X11/extensions/dpmsconst.h> ++#else ++#define DPMSModeOn 0 ++#define DPMSModeOff 3 ++#endif ++ + #include <sys/ioctl.h> + #include <sys/fcntl.h> + #include <sys/poll.h> +@@ -69,6 +76,8 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. + + #if HAVE_DOT_GIT + #include "git_version.h" ++#else ++#define git_version "not compiled from git" + #endif + + #ifdef TEARFREE +@@ -185,12 +194,12 @@ sna_set_fallback_mode(ScrnInfoPtr scrn) + + xf86DisableUnusedFunctions(scrn); + #ifdef RANDR_12_INTERFACE +- if (get_root_window(scrn->pScreen)) +- xf86RandR12TellChanged(scrn->pScreen); ++ if (get_root_window(xf86ScrnToScreen(scrn))) ++ xf86RandR12TellChanged(xf86ScrnToScreen(scrn)); + #endif + } + +-static Bool sna_set_desired_mode(struct sna *sna) ++static void sna_set_desired_mode(struct sna *sna) + { + ScrnInfoPtr scrn = sna->scrn; + +@@ -203,7 +212,6 @@ static Bool sna_set_desired_mode(struct sna *sna) + } + + sna_mode_check(sna); +- return TRUE; + } + + /** +@@ -222,7 +230,7 @@ static Bool sna_create_screen_resources(ScreenPtr screen) + screen->width, screen->height, screen->rootDepth)); + + assert(sna->scrn == xf86ScreenToScrn(screen)); +- assert(sna->scrn->pScreen == screen); ++ assert(to_screen_from_sna(sna) == screen); + + /* free the data used during miInitScreen */ + free(screen->devPrivate); +@@ -273,33 +281,89 @@ static Bool sna_create_screen_resources(ScreenPtr screen) + if (serverGeneration == 1 && (sna->flags & SNA_IS_HOSTED) == 0) + sna_copy_fbcon(sna); + +- (void)sna_set_desired_mode(sna); ++ sna_set_desired_mode(sna); + } + + return TRUE; + } + +-static Bool sna_save_screen(ScreenPtr screen, int mode) ++static void sna_dpms_set(ScrnInfoPtr scrn, int mode, int flags) + { +- ScrnInfoPtr scrn = xf86ScreenToScrn(screen); ++ xf86CrtcConfigPtr config = XF86_CRTC_CONFIG_PTR(scrn); ++ struct sna *sna = to_sna(scrn); ++ bool changed = false; ++ int i; + +- DBG(("%s(mode=%d)\n", __FUNCTION__, mode)); ++ DBG(("%s(mode=%d, flags=%d), vtSema=%d => off?=%d\n", ++ __FUNCTION__, mode, flags, scrn->vtSema, mode!=DPMSModeOn)); + if (!scrn->vtSema) +- return FALSE; ++ return; + +- xf86SaveScreen(screen, mode); +- sna_crtc_config_notify(screen); +- return TRUE; ++ /* Opencoded version of xf86DPMSSet(). ++ * ++ * The principle difference is to skip calling crtc->dpms() when ++ * turning off the display. This (on recent enough kernels at ++ * least) should be equivalent in power consumption, but require ++ * less work (hence quicker and less likely to fail) when switching ++ * back on. ++ */ ++ if (mode != DPMSModeOn) { ++ if (sna->mode.hidden == 0 && !(sna->flags & SNA_NO_DPMS)) { ++ DBG(("%s: hiding %d outputs\n", ++ __FUNCTION__, config->num_output)); ++ for (i = 0; i < config->num_output; i++) { ++ xf86OutputPtr output = config->output[i]; ++ if (output->crtc != NULL) ++ output->funcs->dpms(output, mode); ++ } ++ sna->mode.hidden = sna->mode.front_active + 1; ++ sna->mode.front_active = 0; ++ changed = true; ++ } ++ } else { ++ /* Re-enable CRTC that have been forced off via other means */ ++ if (sna->mode.hidden != 0) { ++ DBG(("%s: unhiding %d crtc, %d outputs\n", ++ __FUNCTION__, config->num_crtc, config->num_output)); ++ sna->mode.front_active = sna->mode.hidden - 1; ++ sna->mode.hidden = 0; ++ for (i = 0; i < config->num_crtc; i++) { ++ xf86CrtcPtr crtc = config->crtc[i]; ++ if (crtc->enabled) ++ crtc->funcs->dpms(crtc, mode); ++ } ++ ++ for (i = 0; i < config->num_output; i++) { ++ xf86OutputPtr output = config->output[i]; ++ if (output->crtc != NULL) ++ output->funcs->dpms(output, mode); ++ } ++ changed = true; ++ } ++ } ++ ++ DBG(("%s: hiding outputs? %d, front active? %d, changed? %d\n", ++ __FUNCTION__, sna->mode.hidden, sna->mode.front_active, changed)); ++ ++ if (changed) ++ sna_crtc_config_notify(xf86ScrnToScreen(scrn)); + } + +-static void sna_dpms_set(ScrnInfoPtr scrn, int mode, int flags) ++static Bool sna_save_screen(ScreenPtr screen, int mode) + { +- DBG(("%s(mode=%d, flags=%d)\n", __FUNCTION__, mode)); +- if (!scrn->vtSema) +- return; ++ ScrnInfoPtr scrn = xf86ScreenToScrn(screen); ++ ++ DBG(("%s(mode=%d [unblank=%d])\n", ++ __FUNCTION__, mode, xf86IsUnblank(mode))); + +- xf86DPMSSet(scrn, mode, flags); +- sna_crtc_config_notify(xf86ScrnToScreen(scrn)); ++ /* We have to unroll xf86SaveScreen() here as it is called ++ * by DPMSSet() nullifying our special handling crtc->dpms() ++ * in sna_dpms_set(). ++ */ ++ sna_dpms_set(scrn, ++ xf86IsUnblank(mode) ? DPMSModeOn : DPMSModeOff, ++ 0); ++ return TRUE; + } + + static void sna_selftest(void) +@@ -330,107 +394,6 @@ static void sna_setup_capabilities(ScrnInfoPtr scrn, int fd) + #endif + } + +-static int +-namecmp(const char *s1, const char *s2) +-{ +- char c1, c2; +- +- if (!s1 || *s1 == 0) { +- if (!s2 || *s2 == 0) +- return 0; +- else +- return 1; +- } +- +- while (*s1 == '_' || *s1 == ' ' || *s1 == '\t') +- s1++; +- +- while (*s2 == '_' || *s2 == ' ' || *s2 == '\t') +- s2++; +- +- c1 = isupper(*s1) ? tolower(*s1) : *s1; +- c2 = isupper(*s2) ? tolower(*s2) : *s2; +- while (c1 == c2) { +- if (c1 == '\0') +- return 0; +- +- s1++; +- while (*s1 == '_' || *s1 == ' ' || *s1 == '\t') +- s1++; +- +- s2++; +- while (*s2 == '_' || *s2 == ' ' || *s2 == '\t') +- s2++; +- +- c1 = isupper(*s1) ? tolower(*s1) : *s1; +- c2 = isupper(*s2) ? tolower(*s2) : *s2; +- } +- +- return c1 - c2; +-} +- +-static Bool sna_option_cast_to_bool(struct sna *sna, int id, Bool val) +-{ +- const char *str = xf86GetOptValString(sna->Options, id); +- +- if (str == NULL) +- return val; +- +- if (*str == '\0') +- return TRUE; +- +- if (namecmp(str, "1") == 0) +- return TRUE; +- if (namecmp(str, "on") == 0) +- return TRUE; +- if (namecmp(str, "true") == 0) +- return TRUE; +- if (namecmp(str, "yes") == 0) +- return TRUE; +- +- if (namecmp(str, "0") == 0) +- return FALSE; +- if (namecmp(str, "off") == 0) +- return FALSE; +- if (namecmp(str, "false") == 0) +- return FALSE; +- if (namecmp(str, "no") == 0) +- return FALSE; +- +- return val; +-} +- +-static unsigned sna_option_cast_to_unsigned(struct sna *sna, int id, unsigned val) +-{ +- const char *str = xf86GetOptValString(sna->Options, id); +- unsigned v; +- +- if (str == NULL || *str == '\0') +- return val; +- +- if (namecmp(str, "on") == 0) +- return val; +- if (namecmp(str, "true") == 0) +- return val; +- if (namecmp(str, "yes") == 0) +- return val; +- +- if (namecmp(str, "0") == 0) +- return 0; +- if (namecmp(str, "off") == 0) +- return 0; +- if (namecmp(str, "false") == 0) +- return 0; +- if (namecmp(str, "no") == 0) +- return 0; +- +- v = atoi(str); +- if (v) +- return v; +- +- return val; +-} +- + static Bool fb_supports_depth(int fd, int depth) + { + struct drm_i915_gem_create create; +@@ -470,16 +433,24 @@ static void setup_dri(struct sna *sna) + unsigned level; + + sna->dri2.available = false; ++ sna->dri2.enable = false; + sna->dri3.available = false; ++ sna->dri3.enable = false; ++ sna->dri3.override = false; + +- level = sna_option_cast_to_unsigned(sna, OPTION_DRI, ~0); ++ level = intel_option_cast_to_unsigned(sna->Options, OPTION_DRI, DEFAULT_DRI_LEVEL); + #if HAVE_DRI3 +- if (level >= 3) +- sna->dri3.available = !!xf86LoadSubModule(sna->scrn, "dri3"); ++ sna->dri3.available = !!xf86LoadSubModule(sna->scrn, "dri3"); ++ sna->dri3.override = ++ !sna->dri3.available || ++ xf86IsOptionSet(sna->Options, OPTION_DRI); ++ if (level >= 3 && sna->kgem.gen >= 040) ++ sna->dri3.enable = sna->dri3.available; + #endif + #if HAVE_DRI2 ++ sna->dri2.available = !!xf86LoadSubModule(sna->scrn, "dri2"); + if (level >= 2) +- sna->dri2.available = !!xf86LoadSubModule(sna->scrn, "dri2"); ++ sna->dri2.enable = sna->dri2.available; + #endif + } + +@@ -498,13 +469,13 @@ static bool enable_tear_free(struct sna *sna) + return ENABLE_TEAR_FREE; + } + +-static void setup_tear_free(struct sna *sna) ++static bool setup_tear_free(struct sna *sna) + { + MessageType from; + Bool enable; + + if (sna->flags & SNA_LINEAR_FB) +- return; ++ return false; + + if ((sna->flags & SNA_HAS_FLIP) == 0) { + from = X_PROBED; +@@ -518,11 +489,12 @@ static void setup_tear_free(struct sna *sna) + from = X_CONFIG; + + if (enable) +- sna->flags |= SNA_TEAR_FREE; ++ sna->flags |= SNA_WANT_TEAR_FREE | SNA_TEAR_FREE; + + done: + xf86DrvMsg(sna->scrn->scrnIndex, from, "TearFree %sabled\n", + sna->flags & SNA_TEAR_FREE ? "en" : "dis"); ++ return sna->flags & SNA_TEAR_FREE; + } + + /** +@@ -612,8 +584,10 @@ static Bool sna_pre_init(ScrnInfoPtr scrn, int probe) + } + + intel_detect_chipset(scrn, sna->dev); +- xf86DrvMsg(scrn->scrnIndex, X_PROBED, "CPU: %s\n", +- sna_cpu_features_to_string(sna->cpu_features, buf)); ++ xf86DrvMsg(scrn->scrnIndex, X_PROBED, ++ "CPU: %s; using a maximum of %d threads\n", ++ sna_cpu_features_to_string(sna->cpu_features, buf), ++ sna_use_threads(64*1024, 64*1024, 1)); + + if (!xf86SetDepthBpp(scrn, 24, 0, 0, + Support32bppFb | +@@ -651,18 +625,11 @@ static Bool sna_pre_init(ScrnInfoPtr scrn, int probe) + kgem_init(&sna->kgem, fd, + xf86GetPciInfoForEntity(pEnt->index), + sna->info->gen); +- if (xf86ReturnOptValBool(sna->Options, OPTION_ACCEL_DISABLE, FALSE) || +- !sna_option_cast_to_bool(sna, OPTION_ACCEL_METHOD, TRUE)) { +- xf86DrvMsg(sna->scrn->scrnIndex, X_CONFIG, +- "Disabling hardware acceleration.\n"); +- sna->kgem.wedged = true; +- } + + if (xf86ReturnOptValBool(sna->Options, OPTION_TILING_FB, FALSE)) + sna->flags |= SNA_LINEAR_FB; +- +- if (xf86ReturnOptValBool(sna->Options, OPTION_DELETE_DP12, FALSE)) +- sna->flags |= SNA_REMOVE_OUTPUTS; ++ if (!sna->kgem.can_fence) ++ sna->flags |= SNA_LINEAR_FB; + + if (!xf86ReturnOptValBool(sna->Options, OPTION_SWAPBUFFERS_WAIT, TRUE)) + sna->flags |= SNA_NO_WAIT; +@@ -695,7 +662,8 @@ static Bool sna_pre_init(ScrnInfoPtr scrn, int probe) + } + scrn->currentMode = scrn->modes; + +- setup_tear_free(sna); ++ if (!setup_tear_free(sna) && sna_mode_wants_tear_free(sna)) ++ sna->kgem.needs_dirtyfb = sna->kgem.has_dirtyfb; + + xf86SetGamma(scrn, zeros); + xf86SetDpi(scrn, 0, 0); +@@ -721,11 +689,13 @@ cleanup: + return FALSE; + } + ++#if !HAVE_NOTIFY_FD + static bool has_shadow(struct sna *sna) + { +- if (!sna->mode.shadow_damage) ++ if (!sna->mode.shadow_enabled) + return false; + ++ assert(sna->mode.shadow_damage); + if (RegionNil(DamageRegion(sna->mode.shadow_damage))) + return false; + +@@ -748,7 +718,7 @@ sna_block_handler(BLOCKHANDLER_ARGS_DECL) + sna->BlockHandler(BLOCKHANDLER_ARGS); + + if (*tv == NULL || ((*tv)->tv_usec | (*tv)->tv_sec) || has_shadow(sna)) +- sna_accel_block_handler(sna, tv); ++ sna_accel_block(sna, tv); + } + + static void +@@ -770,52 +740,102 @@ sna_wakeup_handler(WAKEUPHANDLER_ARGS_DECL) + + sna->WakeupHandler(WAKEUPHANDLER_ARGS); + +- sna_accel_wakeup_handler(sna); +- + if (FD_ISSET(sna->kgem.fd, (fd_set*)read_mask)) { + sna_mode_wakeup(sna); + /* Clear the flag so that subsequent ZaphodHeads don't block */ + FD_CLR(sna->kgem.fd, (fd_set*)read_mask); + } + } ++#else ++static void ++sna_block_handler(void *data, void *_timeout) ++{ ++ struct sna *sna = data; ++ int *timeout = _timeout; ++ struct timeval tv, *tvp; ++ ++ DBG(("%s (timeout=%d)\n", __FUNCTION__, *timeout)); ++ if (*timeout == 0) ++ return; ++ ++ if (*timeout < 0) { ++ tvp = NULL; ++ } else { ++ tv.tv_sec = *timeout / 1000; ++ tv.tv_usec = (*timeout % 1000) * 1000; ++ tvp = &tv; ++ } ++ ++ sna_accel_block(sna, &tvp); ++ if (tvp) ++ *timeout = tvp->tv_sec * 1000 + tvp->tv_usec / 1000; ++} ++#endif + + #if HAVE_UDEV ++#include <sys/stat.h> ++ + static void + sna_handle_uevents(int fd, void *closure) + { + struct sna *sna = closure; +- struct udev_device *dev; +- const char *str; + struct stat s; +- dev_t udev_devnum; ++ struct pollfd pfd; ++ bool hotplug = false; + + DBG(("%s\n", __FUNCTION__)); + +- dev = udev_monitor_receive_device(sna->uevent_monitor); +- if (!dev) +- return; ++ pfd.fd = udev_monitor_get_fd(sna->uevent_monitor); ++ pfd.events = POLLIN; ++ ++ if (fstat(sna->kgem.fd, &s)) ++ memset(&s, 0, sizeof(s)); ++ ++ while (poll(&pfd, 1, 0) > 0) { ++ struct udev_device *dev; ++ dev_t devnum; ++ ++ dev = udev_monitor_receive_device(sna->uevent_monitor); ++ if (dev == NULL) ++ break; ++ ++ devnum = udev_device_get_devnum(dev); ++ if (memcmp(&s.st_rdev, &devnum, sizeof(dev_t)) == 0) { ++ const char *str; ++ ++ str = udev_device_get_property_value(dev, "HOTPLUG"); ++ if (str && atoi(str) == 1) { ++ str = udev_device_get_property_value(dev, "CONNECTOR"); ++ if (str) { ++ hotplug |= sna_mode_find_hotplug_connector(sna, atoi(str)); ++ } else { ++ sna->flags |= SNA_REPROBE; ++ hotplug = true; ++ } ++ } ++ } + +- udev_devnum = udev_device_get_devnum(dev); +- if (fstat(sna->kgem.fd, &s) || memcmp(&s.st_rdev, &udev_devnum, sizeof (dev_t))) { + udev_device_unref(dev); +- return; + } + +- str = udev_device_get_property_value(dev, "HOTPLUG"); +- if (str && atoi(str) == 1) { +- ScrnInfoPtr scrn = sna->scrn; +- +- DBG(("%s: hotplug event (vtSema?=%d)\n", __FUNCTION__, scrn->vtSema)); ++ if (hotplug) { ++ DBG(("%s: hotplug event (vtSema?=%d)\n", ++ __FUNCTION__, sna->scrn->vtSema)); + +- if (scrn->vtSema) { +- sna_mode_discover(sna); +- sna_mode_check(sna); +- RRGetInfo(xf86ScrnToScreen(scrn), TRUE); +- } else ++ if (sna->scrn->vtSema) ++ sna_mode_discover(sna, true); ++ else + sna->flags |= SNA_REPROBE; + } ++} + +- udev_device_unref(dev); ++static bool has_randr(void) ++{ ++#if HAS_DIXREGISTERPRIVATEKEY ++ return dixPrivateKeyRegistered(rrPrivKey); ++#else ++ return *rrPrivKey; ++#endif + } + + static void +@@ -833,7 +853,7 @@ sna_uevent_init(struct sna *sna) + /* RandR will be disabled if Xinerama is active, and so generating + * RR hotplug events is then verboten. + */ +- if (!dixPrivateKeyRegistered(rrPrivKey)) ++ if (!has_randr()) + goto out; + + u = NULL; +@@ -861,7 +881,8 @@ sna_uevent_init(struct sna *sna) + + sna->uevent_monitor = mon; + out: +- xf86DrvMsg(sna->scrn->scrnIndex, from, "display hotplug detection %s\n", ++ xf86DrvMsg(sna->scrn->scrnIndex, from, ++ "Display hotplug detection %s\n", + sna->uevent_monitor ? "enabled" : "disabled"); + return; + +@@ -874,17 +895,10 @@ err_dev: + + static bool sna_uevent_poll(struct sna *sna) + { +- struct pollfd pfd; +- + if (sna->uevent_monitor == NULL) + return false; + +- pfd.fd = udev_monitor_get_fd(sna->uevent_monitor); +- pfd.events = POLLIN; +- +- while (poll(&pfd, 1, 0) > 0) +- sna_handle_uevents(pfd.fd, sna); +- ++ sna_handle_uevents(udev_monitor_get_fd(sna->uevent_monitor), sna); + return true; + } + +@@ -918,8 +932,10 @@ sna_randr_getinfo(ScreenPtr screen, Rotation *rotations) + { + struct sna *sna = to_sna_from_screen(screen); + ++ DBG(("%s()\n", __FUNCTION__)); ++ + if (!sna_uevent_poll(sna)) +- sna_mode_discover(sna); ++ sna_mode_discover(sna, false); + + return sna->mode.rrGetInfo(screen, rotations); + } +@@ -931,8 +947,8 @@ static void sna_leave_vt(VT_FUNC_ARGS_DECL) + + DBG(("%s\n", __FUNCTION__)); + +- sna_accel_leave(sna); + sna_mode_reset(sna); ++ sna_accel_leave(sna); + + if (intel_put_master(sna->dev)) + xf86DrvMsg(scrn->scrnIndex, X_WARNING, +@@ -948,6 +964,12 @@ static Bool sna_early_close_screen(CLOSE_SCREEN_ARGS_DECL) + + /* XXX Note that we will leak kernel resources if !vtSema */ + ++#if HAVE_NOTIFY_FD ++ RemoveBlockAndWakeupHandlers(sna_block_handler, ++ (ServerWakeupHandlerProcPtr)NoopDDA, ++ sna); ++#endif ++ + sna_uevent_fini(sna); + sna_mode_close(sna); + +@@ -1047,12 +1069,13 @@ static void sna_dri_init(struct sna *sna, ScreenPtr screen) + { + char str[128] = ""; + +- if (sna->dri2.available) ++ if (sna->dri2.enable) + sna->dri2.open = sna_dri2_open(sna, screen); + if (sna->dri2.open) + strcat(str, "DRI2 "); + +- if (sna->dri3.available) ++ /* Load DRI3 in case DRI2 doesn't work, e.g. vgaarb */ ++ if (sna->dri3.enable || (!sna->dri2.open && !sna->dri3.override)) + sna->dri3.open = sna_dri3_open(sna, screen); + if (sna->dri3.open) + strcat(str, "DRI3 "); +@@ -1098,7 +1121,8 @@ sna_screen_init(SCREEN_INIT_ARGS_DECL) + DBG(("%s\n", __FUNCTION__)); + + assert(sna->scrn == scrn); +- assert(scrn->pScreen == NULL); /* set afterwards */ ++ assert(to_screen_from_sna(sna) == NULL || /* set afterwards */ ++ to_screen_from_sna(sna) == screen); + + assert(sna->freed_pixmap == NULL); + +@@ -1166,11 +1190,17 @@ sna_screen_init(SCREEN_INIT_ARGS_DECL) + * later memory should be bound when allocating, e.g rotate_mem */ + scrn->vtSema = TRUE; + ++#if !HAVE_NOTIFY_FD + sna->BlockHandler = screen->BlockHandler; + screen->BlockHandler = sna_block_handler; + + sna->WakeupHandler = screen->WakeupHandler; + screen->WakeupHandler = sna_wakeup_handler; ++#else ++ RegisterBlockAndWakeupHandlers(sna_block_handler, ++ (ServerWakeupHandlerProcPtr)NoopDDA, ++ sna); ++#endif + + screen->SaveScreen = sna_save_screen; + screen->CreateScreenResources = sna_create_screen_resources; +@@ -1190,6 +1220,8 @@ sna_screen_init(SCREEN_INIT_ARGS_DECL) + CMAP_PALETTED_TRUECOLOR)) + return FALSE; + ++ if (!xf86CheckBoolOption(scrn->options, "dpms", TRUE)) ++ sna->flags |= SNA_NO_DPMS; + xf86DPMSInit(screen, sna_dpms_set, 0); + + sna_uevent_init(sna); +@@ -1244,20 +1276,15 @@ static Bool sna_enter_vt(VT_FUNC_ARGS_DECL) + if (intel_get_master(sna->dev)) + return FALSE; + ++ sna_accel_enter(sna); ++ + if (sna->flags & SNA_REPROBE) { +- DBG(("%s: reporting deferred hotplug event\n", +- __FUNCTION__)); +- sna_mode_discover(sna); +- RRGetInfo(xf86ScrnToScreen(scrn), TRUE); +- sna->flags &= ~SNA_REPROBE; ++ DBG(("%s: reporting deferred hotplug event\n", __FUNCTION__)); ++ sna_mode_discover(sna, true); + } + +- if (!sna_set_desired_mode(sna)) { +- intel_put_master(sna->dev); +- return FALSE; +- } ++ sna_set_desired_mode(sna); + +- sna_accel_enter(sna); + return TRUE; + } + +@@ -1379,6 +1406,9 @@ static void describe_sna(ScrnInfoPtr scrn) + xf86DrvMsg(scrn->scrnIndex, X_INFO, + "SNA compiled: %s\n", BUILDER_DESCRIPTION); + #endif ++#if HAS_DEBUG_FULL ++ ErrorF("SNA compiled with full debug logging; expect to run slowly\n"); ++#endif + #if !NDEBUG + xf86DrvMsg(scrn->scrnIndex, X_INFO, + "SNA compiled with assertions enabled\n"); +@@ -1400,6 +1430,7 @@ static void describe_sna(ScrnInfoPtr scrn) + "SNA compiled for use with valgrind\n"); + VALGRIND_PRINTF("SNA compiled for use with valgrind\n"); + #endif ++ DBG(("xf86-video-intel version: %s\n", git_version)); + DBG(("pixman version: %s\n", pixman_version_string())); + } + +diff --git a/src/sna/sna_glyphs.c b/src/sna/sna_glyphs.c +index a5dfb06b..6ee40336 100644 +--- a/src/sna/sna_glyphs.c ++++ b/src/sna/sna_glyphs.c +@@ -74,7 +74,7 @@ + #define NO_GLYPHS_VIA_MASK 0 + #define FORCE_SMALL_MASK 0 /* -1 = never, 1 = always */ + #define NO_GLYPHS_SLOW 0 +-#define NO_DISCARD_MASK 0 ++#define DISCARD_MASK 0 /* -1 = never, 1 = always */ + + #define CACHE_PICTURE_SIZE 1024 + #define GLYPH_MIN_SIZE 8 +@@ -185,7 +185,7 @@ void sna_glyphs_close(struct sna *sna) + */ + bool sna_glyphs_create(struct sna *sna) + { +- ScreenPtr screen = sna->scrn->pScreen; ++ ScreenPtr screen = to_screen_from_sna(sna); + pixman_color_t white = { 0xffff, 0xffff, 0xffff, 0xffff }; + unsigned int formats[] = { + PIXMAN_a8, +@@ -1094,6 +1094,9 @@ sna_glyph_get_image(GlyphPtr g, ScreenPtr s) + + static inline bool use_small_mask(struct sna *sna, int16_t width, int16_t height, int depth) + { ++ if (depth < 8) ++ return true; ++ + if (FORCE_SMALL_MASK) + return FORCE_SMALL_MASK > 0; + +@@ -1156,12 +1159,6 @@ glyphs_via_mask(struct sna *sna, + src_x += box.x1 - list->xOff; + src_y += box.y1 - list->yOff; + +- if (format->depth < 8) { +- format = PictureMatchFormat(screen, 8, PICT_a8); +- if (!format) +- return false; +- } +- + component_alpha = NeedsComponent(format->format); + if (use_small_mask(sna, width, height, format->depth)) { + pixman_image_t *mask_image; +@@ -1179,7 +1176,7 @@ use_small_mask: + return false; + + mask_image = +- pixman_image_create_bits(format->depth << 24 | format->format, ++ pixman_image_create_bits(pixmap->drawable.bitsPerPixel << 24 | format->format, + width, height, + pixmap->devPrivate.ptr, + pixmap->devKind); +@@ -1386,10 +1383,11 @@ next_image: + DBG(("%s: atlas format=%08x, mask format=%08x\n", + __FUNCTION__, + (int)p->atlas->format, +- (int)(format->depth << 24 | format->format))); ++ (int)mask->format)); + + memset(&tmp, 0, sizeof(tmp)); +- if (p->atlas->format == (format->depth << 24 | format->format)) { ++ if (p->atlas->format == mask->format || ++ alphaless(p->atlas->format) == mask->format) { + ok = sna->render.composite(sna, PictOpAdd, + p->atlas, NULL, mask, + 0, 0, 0, 0, 0, 0, +@@ -1561,6 +1559,9 @@ skip_glyph: + } + } + ++ assert(format); ++ DBG(("%s: format=%08d, depth=%d\n", ++ __FUNCTION__, format->format, format->depth)); + out: + if (list_extents != stack_extents) + free(list_extents); +@@ -1573,24 +1574,34 @@ static bool can_discard_mask(uint8_t op, PicturePtr src, PictFormatPtr mask, + PictFormatPtr g; + uint32_t color; + +- if (NO_DISCARD_MASK) +- return false; ++ if (DISCARD_MASK) ++ return DISCARD_MASK > 0; + + DBG(("%s: nlist=%d, mask=%08x, depth %d, op=%d (bounded? %d)\n", + __FUNCTION__, nlist, + mask ? (unsigned)mask->format : 0, mask ? mask->depth : 0, + op, op_is_bounded(op))); + +- if (nlist == 1 && list->len == 1) +- return true; ++ if (nlist == 1 && list->len == 1) { ++ if (mask == list->format) ++ return true; ++ ++ g = list->format; ++ goto skip; ++ } + +- if (!op_is_bounded(op)) ++ if (!op_is_bounded(op)) { ++ DBG(("%s: unbounded op, not discarding\n", __FUNCTION__)); + return false; ++ } + + /* No glyphs overlap and we are not performing a mask conversion. */ + g = glyphs_format(nlist, list, glyphs); +- if (mask == g) ++ if (mask == g) { ++ DBG(("%s: mask matches glyphs format, no conversion, so discard mask\n", ++ __FUNCTION__)); + return true; ++ } + + DBG(("%s: preferred mask format %08x, depth %d\n", + __FUNCTION__, g ? (unsigned)g->format : 0, g ? g->depth : 0)); +@@ -1605,18 +1616,41 @@ static bool can_discard_mask(uint8_t op, PicturePtr src, PictFormatPtr mask, + + list++; + } ++ ++ if (!sna_picture_is_solid(src, &color)) ++ return false; ++ ++ return color >> 24 == 0xff; + } else { +- if (PICT_FORMAT_A(mask->format) >= PICT_FORMAT_A(g->format)) ++skip: ++ if (mask->format == g->format) + return true; + +- if (g->depth != 1) +- return false; +- } ++ if (mask->format == alphaless(g->format)) ++ return true; ++ ++ if (PICT_FORMAT_TYPE(g->format) == PICT_TYPE_A && ++ PICT_FORMAT_TYPE(mask->format) != PICT_TYPE_A) ++ return true; + +- if (!sna_picture_is_solid(src, &color)) + return false; ++ } ++} + +- return color >> 24 == 0xff; ++static uint32_t pixman_format(PictFormatPtr short_format) ++{ ++ uint32_t bpp; ++ ++ bpp = short_format->depth; ++ if (bpp <= 1) ++ bpp = 1; ++ else if (bpp <= 8) ++ bpp = 8; ++ else if (bpp <= 16) ++ bpp = 16; ++ else ++ bpp = 32; ++ return bpp << 24 | short_format->format; + } + + static void +@@ -1756,7 +1790,7 @@ next: + if (sigtrap_get() == 0) { + if (mask_format) { + pixman_composite_glyphs(op, src_image, dst_image, +- mask_format->format | (mask_format->depth << 24), ++ pixman_format(mask_format), + src_x + src_dx + region.extents.x1 - dst_x, + src_y + src_dy + region.extents.y1 - dst_y, + region.extents.x1, region.extents.y1, +@@ -1815,10 +1849,10 @@ out: + x, y, + mask_format->depth, + (long)mask_format->format, +- (long)(mask_format->depth << 24 | mask_format->format), ++ (long)pixman_format(mask_format), + NeedsComponent(mask_format->format))); + mask_image = +- pixman_image_create_bits(mask_format->depth << 24 | mask_format->format, ++ pixman_image_create_bits(pixman_format(mask_format), + region.extents.x2 - region.extents.x1, + region.extents.y2 - region.extents.y1, + NULL, 0); +@@ -2086,12 +2120,6 @@ glyphs_via_image(struct sna *sna, + src_x += box.x1 - list->xOff; + src_y += box.y1 - list->yOff; + +- if (format->depth < 8) { +- format = PictureMatchFormat(screen, 8, PICT_a8); +- if (!format) +- return false; +- } +- + DBG(("%s: small mask [format=%lx, depth=%d, size=%d], rendering glyphs to upload buffer\n", + __FUNCTION__, (unsigned long)format->format, + format->depth, (uint32_t)width*height*format->depth)); +@@ -2104,7 +2132,7 @@ glyphs_via_image(struct sna *sna, + return false; + + mask_image = +- pixman_image_create_bits(format->depth << 24 | format->format, ++ pixman_image_create_bits(pixmap->drawable.bitsPerPixel << 24 | format->format, + width, height, + pixmap->devPrivate.ptr, + pixmap->devKind); +diff --git a/src/sna/sna_io.c b/src/sna/sna_io.c +index d6aa1294..d32bd583 100644 +--- a/src/sna/sna_io.c ++++ b/src/sna/sna_io.c +@@ -105,8 +105,10 @@ read_boxes_inplace__cpu(struct kgem *kgem, + if (!download_inplace__cpu(kgem, dst, bo, box, n)) + return false; + ++ if (bo->tiling == I915_TILING_Y) ++ return false; ++ + assert(kgem_bo_can_map__cpu(kgem, bo, false)); +- assert(bo->tiling != I915_TILING_Y); + + src = kgem_bo_map__cpu(kgem, bo); + if (src == NULL) +@@ -281,6 +283,9 @@ fallback: + if (box[n].y2 > extents.y2) + extents.y2 = box[n].y2; + } ++ if (!can_blt && sna->render.max_3d_size == 0) ++ goto fallback; ++ + if (kgem_bo_can_map(kgem, src_bo)) { + /* Is it worth detiling? */ + if ((extents.y2 - extents.y1 - 1) * src_bo->pitch < 4096) +@@ -477,6 +482,7 @@ fallback: + goto fallback; + _kgem_set_mode(kgem, KGEM_BLT); + } ++ kgem_bcs_set_tiling(&sna->kgem, src_bo, NULL); + + tmp_nbox = nbox; + tmp_box = box; +@@ -539,6 +545,7 @@ fallback: + break; + + _kgem_set_mode(kgem, KGEM_BLT); ++ kgem_bcs_set_tiling(&sna->kgem, src_bo, NULL); + tmp_box += nbox_this_time; + } while (1); + } else { +@@ -597,6 +604,7 @@ fallback: + break; + + _kgem_set_mode(kgem, KGEM_BLT); ++ kgem_bcs_set_tiling(&sna->kgem, src_bo, NULL); + tmp_box += nbox_this_time; + } while (1); + } +@@ -666,8 +674,10 @@ write_boxes_inplace__tiled(struct kgem *kgem, + { + uint8_t *dst; + ++ if (bo->tiling == I915_TILING_Y) ++ return false; ++ + assert(kgem->has_wc_mmap || kgem_bo_can_map__cpu(kgem, bo, true)); +- assert(bo->tiling != I915_TILING_Y); + + if (kgem_bo_can_map__cpu(kgem, bo, true)) { + dst = kgem_bo_map__cpu(kgem, bo); +@@ -778,6 +788,15 @@ static bool __upload_inplace(struct kgem *kgem, + if (FORCE_INPLACE) + return FORCE_INPLACE > 0; + ++ if (bo->exec) ++ return false; ++ ++ if (bo->flush) ++ return true; ++ ++ if (kgem_bo_can_map__cpu(kgem, bo, true)) ++ return true; ++ + /* If we are writing through the GTT, check first if we might be + * able to almagamate a series of small writes into a single + * operation. +@@ -849,6 +868,8 @@ bool sna_write_boxes(struct sna *sna, PixmapPtr dst, + if (box[n].y2 > extents.y2) + extents.y2 = box[n].y2; + } ++ if (!can_blt && sna->render.max_3d_size == 0) ++ goto fallback; + + /* Try to avoid switching rings... */ + if (!can_blt || kgem->ring == KGEM_RENDER || +@@ -1038,6 +1059,7 @@ tile: + goto fallback; + _kgem_set_mode(kgem, KGEM_BLT); + } ++ kgem_bcs_set_tiling(&sna->kgem, NULL, dst_bo); + + if (kgem->gen >= 0100) { + cmd |= 8; +@@ -1129,6 +1151,7 @@ tile: + if (nbox) { + _kgem_submit(kgem); + _kgem_set_mode(kgem, KGEM_BLT); ++ kgem_bcs_set_tiling(&sna->kgem, NULL, dst_bo); + } + + kgem_bo_destroy(kgem, src_bo); +@@ -1224,6 +1247,7 @@ tile: + if (nbox) { + _kgem_submit(kgem); + _kgem_set_mode(kgem, KGEM_BLT); ++ kgem_bcs_set_tiling(&sna->kgem, NULL, dst_bo); + } + + kgem_bo_destroy(kgem, src_bo); +@@ -1541,6 +1565,7 @@ tile: + goto fallback; + _kgem_set_mode(kgem, KGEM_BLT); + } ++ kgem_bcs_set_tiling(&sna->kgem, NULL, dst_bo); + + if (sna->kgem.gen >= 0100) { + cmd |= 8; +@@ -1636,6 +1661,7 @@ tile: + if (nbox) { + _kgem_submit(kgem); + _kgem_set_mode(kgem, KGEM_BLT); ++ kgem_bcs_set_tiling(&sna->kgem, NULL, dst_bo); + } + + kgem_bo_destroy(kgem, src_bo); +@@ -1732,6 +1758,7 @@ tile: + if (nbox) { + _kgem_submit(kgem); + _kgem_set_mode(kgem, KGEM_BLT); ++ kgem_bcs_set_tiling(&sna->kgem, NULL, dst_bo); + } + + kgem_bo_destroy(kgem, src_bo); +diff --git a/src/sna/sna_present.c b/src/sna/sna_present.c +index 6dd6fe88..2796d972 100644 +--- a/src/sna/sna_present.c ++++ b/src/sna/sna_present.c +@@ -27,6 +27,7 @@ + #include <sys/types.h> + #include <fcntl.h> + #include <unistd.h> ++#include <sys/poll.h> + #include <errno.h> + #include <xf86drm.h> + +@@ -38,21 +39,73 @@ + static present_screen_info_rec present_info; + + struct sna_present_event { +- uint64_t event_id; + xf86CrtcPtr crtc; ++ struct sna *sna; ++ struct list link; ++ uint64_t *event_id; ++ uint64_t target_msc; ++ int n_event_id; ++ bool queued; + }; + ++static void sna_present_unflip(ScreenPtr screen, uint64_t event_id); ++static bool sna_present_queue(struct sna_present_event *info, ++ uint64_t last_msc); ++ + static inline struct sna_present_event * + to_present_event(uintptr_t data) + { + return (struct sna_present_event *)(data & ~3); + } + ++static struct sna_present_event *info_alloc(struct sna *sna) ++{ ++ struct sna_present_event *info; ++ ++ info = sna->present.freed_info; ++ if (info) { ++ sna->present.freed_info = NULL; ++ return info; ++ } ++ ++ return malloc(sizeof(struct sna_present_event) + sizeof(uint64_t)); ++} ++ ++static void info_free(struct sna_present_event *info) ++{ ++ struct sna *sna = info->sna; ++ ++ if (sna->present.freed_info) ++ free(sna->present.freed_info); ++ ++ sna->present.freed_info = info; ++} ++ ++static inline bool msc_before(uint64_t msc, uint64_t target) ++{ ++ return (int64_t)(msc - target) < 0; ++} ++ + #define MARK_PRESENT(x) ((void *)((uintptr_t)(x) | 2)) + +-static int pipe_from_crtc(RRCrtcPtr crtc) ++static inline xf86CrtcPtr unmask_crtc(xf86CrtcPtr crtc) ++{ ++ return (xf86CrtcPtr)((uintptr_t)crtc & ~1); ++} ++ ++static inline xf86CrtcPtr mark_crtc(xf86CrtcPtr crtc) ++{ ++ return (xf86CrtcPtr)((uintptr_t)crtc | 1); ++} ++ ++static inline bool has_vblank(xf86CrtcPtr crtc) ++{ ++ return (uintptr_t)crtc & 1; ++} ++ ++static inline int pipe_from_crtc(RRCrtcPtr crtc) + { +- return crtc ? sna_crtc_to_pipe(crtc->devPrivate) : -1; ++ return crtc ? sna_crtc_pipe(crtc->devPrivate) : -1; + } + + static uint32_t pipe_select(int pipe) +@@ -74,6 +127,215 @@ static inline int sna_wait_vblank(struct sna *sna, union drm_wait_vblank *vbl, i + return drmIoctl(sna->kgem.fd, DRM_IOCTL_WAIT_VBLANK, vbl); + } + ++static uint64_t gettime_ust64(void) ++{ ++ struct timespec tv; ++ ++ if (clock_gettime(CLOCK_MONOTONIC, &tv)) ++ return GetTimeInMicros(); ++ ++ return ust64(tv.tv_sec, tv.tv_nsec / 1000); ++} ++ ++static void vblank_complete(struct sna_present_event *info, ++ uint64_t ust, uint64_t msc) ++{ ++ int n; ++ ++ if (msc_before(msc, info->target_msc)) { ++ DBG(("%s: event=%d too early, now %lld, expected %lld\n", ++ __FUNCTION__, ++ info->event_id[0], ++ (long long)msc, (long long)info->target_msc)); ++ if (sna_present_queue(info, msc)) ++ return; ++ } ++ ++ DBG(("%s: %d events complete\n", __FUNCTION__, info->n_event_id)); ++ for (n = 0; n < info->n_event_id; n++) { ++ DBG(("%s: pipe=%d tv=%d.%06d msc=%lld (target=%lld), event=%lld complete%s\n", __FUNCTION__, ++ sna_crtc_pipe(info->crtc), ++ (int)(ust / 1000000), (int)(ust % 1000000), ++ (long long)msc, (long long)info->target_msc, ++ (long long)info->event_id[n], ++ info->target_msc && msc == (uint32_t)info->target_msc ? "" : ": MISS")); ++ present_event_notify(info->event_id[n], ust, msc); ++ } ++ if (info->n_event_id > 1) ++ free(info->event_id); ++ list_del(&info->link); ++ info_free(info); ++} ++ ++static uint32_t msc_to_delay(xf86CrtcPtr crtc, uint64_t target) ++{ ++ const DisplayModeRec *mode = &crtc->desiredMode; ++ const struct ust_msc *swap = sna_crtc_last_swap(crtc); ++ int64_t delay, subframe; ++ ++ assert(mode->Clock); ++ ++ delay = target - swap->msc; ++ assert(delay >= 0); ++ if (delay > 1) { /* try to use the hw vblank for the last frame */ ++ delay--; ++ subframe = 0; ++ } else { ++ subframe = gettime_ust64() - swap_ust(swap); ++ subframe += 500; ++ subframe /= 1000; ++ } ++ delay *= mode->VTotal * mode->HTotal / mode->Clock; ++ if (subframe < delay) ++ delay -= subframe; ++ else ++ delay = 0; ++ ++ DBG(("%s: sleep %d frames, %llu ms\n", __FUNCTION__, ++ (int)(target - swap->msc), (long long)delay)); ++ assert(delay >= 0); ++ return MIN(delay, INT32_MAX); ++} ++ ++static CARD32 sna_fake_vblank_handler(OsTimerPtr timer, CARD32 now, void *data) ++{ ++ struct sna_present_event *info = data; ++ union drm_wait_vblank vbl; ++ uint64_t msc, ust; ++ ++ DBG(("%s(event=%lldx%d, now=%d)\n", __FUNCTION__, (long long)info->event_id[0], info->n_event_id, now)); ++ ++ VG_CLEAR(vbl); ++ vbl.request.type = DRM_VBLANK_RELATIVE; ++ vbl.request.sequence = 0; ++ if (sna_wait_vblank(info->sna, &vbl, sna_crtc_pipe(info->crtc)) == 0) { ++ ust = ust64(vbl.reply.tval_sec, vbl.reply.tval_usec); ++ msc = sna_crtc_record_vblank(info->crtc, &vbl); ++ DBG(("%s: event=%lld, target msc=%lld, now %lld\n", ++ __FUNCTION__, (long long)info->event_id[0], (long long)info->target_msc, (long long)msc)); ++ if (msc_before(msc, info->target_msc)) { ++ int delta = info->target_msc - msc; ++ uint32_t delay; ++ ++ DBG(("%s: too early, requeuing delta=%d\n", __FUNCTION__, delta)); ++ assert(info->target_msc - msc < 1ull<<31); ++ if (delta <= 2) { ++ vbl.request.type = DRM_VBLANK_ABSOLUTE | DRM_VBLANK_EVENT; ++ vbl.request.sequence = info->target_msc; ++ vbl.request.signal = (uintptr_t)MARK_PRESENT(info); ++ if (sna_wait_vblank(info->sna, &vbl, sna_crtc_pipe(info->crtc)) == 0) { ++ DBG(("%s: scheduled new vblank event for %lld\n", __FUNCTION__, (long long)info->target_msc)); ++ info->queued = true; ++ if (delta == 1) { ++ sna_crtc_set_vblank(info->crtc); ++ info->crtc = mark_crtc(info->crtc); ++ } ++ free(timer); ++ return 0; ++ } ++ } ++ ++ delay = msc_to_delay(info->crtc, info->target_msc); ++ if (delay) { ++ DBG(("%s: requeueing timer for %dms delay\n", __FUNCTION__, delay)); ++ return delay; ++ } ++ ++ /* As a last resort use a blocking wait. ++ * Less than a millisecond for (hopefully) a rare case. ++ */ ++ DBG(("%s: blocking wait!\n", __FUNCTION__)); ++ vbl.request.type = DRM_VBLANK_ABSOLUTE; ++ vbl.request.sequence = info->target_msc; ++ if (sna_wait_vblank(info->sna, &vbl, sna_crtc_pipe(info->crtc)) == 0) { ++ ust = ust64(vbl.reply.tval_sec, vbl.reply.tval_usec); ++ msc = sna_crtc_record_vblank(info->crtc, &vbl); ++ } else { ++ DBG(("%s: blocking wait failed, fudging\n", ++ __FUNCTION__)); ++ goto fixup; ++ } ++ } ++ } else { ++fixup: ++ ust = gettime_ust64(); ++ msc = info->target_msc; ++ DBG(("%s: event=%lld, CRTC OFF, target msc=%lld, was %lld (off)\n", ++ __FUNCTION__, (long long)info->event_id[0], (long long)info->target_msc, (long long)sna_crtc_last_swap(info->crtc)->msc)); ++ } ++ ++ vblank_complete(info, ust, msc); ++ free(timer); ++ return 0; ++} ++ ++static bool sna_fake_vblank(struct sna_present_event *info) ++{ ++ const struct ust_msc *swap = sna_crtc_last_swap(info->crtc); ++ uint32_t delay; ++ ++ if (msc_before(swap->msc, info->target_msc)) ++ delay = msc_to_delay(info->crtc, info->target_msc); ++ else ++ delay = 0; ++ ++ DBG(("%s(event=%lldx%d, target_msc=%lld, msc=%lld, delay=%ums)\n", ++ __FUNCTION__, (long long)info->event_id[0], info->n_event_id, ++ (long long)info->target_msc, (long long)swap->msc, delay)); ++ if (delay == 0) { ++ uint64_t ust, msc; ++ ++ if (msc_before(swap->msc, info->target_msc)) { ++ /* Fixup and pretend it completed immediately */ ++ msc = info->target_msc; ++ ust = gettime_ust64(); ++ } else { ++ msc = swap->msc; ++ ust = swap_ust(swap); ++ } ++ ++ vblank_complete(info, ust, msc); ++ return true; ++ } ++ ++ return TimerSet(NULL, 0, delay, sna_fake_vblank_handler, info); ++} ++ ++static bool sna_present_queue(struct sna_present_event *info, ++ uint64_t last_msc) ++{ ++ union drm_wait_vblank vbl; ++ int delta = info->target_msc - last_msc; ++ ++ DBG(("%s: target msc=%llu, seq=%u (last_msc=%llu), delta=%d\n", ++ __FUNCTION__, ++ (long long)info->target_msc, ++ (unsigned)info->target_msc, ++ (long long)last_msc, ++ delta)); ++ assert(info->target_msc - last_msc < 1ull<<31); ++ assert(delta >= 0); ++ ++ VG_CLEAR(vbl); ++ vbl.request.type = DRM_VBLANK_ABSOLUTE | DRM_VBLANK_EVENT; ++ vbl.request.sequence = info->target_msc; ++ vbl.request.signal = (uintptr_t)MARK_PRESENT(info); ++ if (delta > 2 || ++ sna_wait_vblank(info->sna, &vbl, sna_crtc_pipe(info->crtc))) { ++ DBG(("%s: vblank enqueue failed, faking delta=%d\n", __FUNCTION__, delta)); ++ if (!sna_fake_vblank(info)) ++ return false; ++ } else { ++ info->queued = true; ++ if (delta == 1) { ++ sna_crtc_set_vblank(info->crtc); ++ info->crtc = mark_crtc(info->crtc); ++ } ++ } ++ ++ return true; ++} ++ + static RRCrtcPtr + sna_present_get_crtc(WindowPtr window) + { +@@ -81,7 +343,10 @@ sna_present_get_crtc(WindowPtr window) + BoxRec box; + xf86CrtcPtr crtc; + +- DBG(("%s\n", __FUNCTION__)); ++ DBG(("%s: window=%ld (pixmap=%ld), box=(%d, %d)x(%d, %d)\n", ++ __FUNCTION__, window->drawable.id, get_window_pixmap(window)->drawable.serialNumber, ++ window->drawable.x, window->drawable.y, ++ window->drawable.width, window->drawable.height)); + + box.x1 = window->drawable.x; + box.y1 = window->drawable.y; +@@ -99,26 +364,59 @@ static int + sna_present_get_ust_msc(RRCrtcPtr crtc, CARD64 *ust, CARD64 *msc) + { + struct sna *sna = to_sna_from_screen(crtc->pScreen); +- int pipe = pipe_from_crtc(crtc); + union drm_wait_vblank vbl; + +- DBG(("%s(pipe=%d)\n", __FUNCTION__, pipe)); ++ DBG(("%s(pipe=%d)\n", __FUNCTION__, sna_crtc_pipe(crtc->devPrivate))); ++ if (sna_crtc_has_vblank(crtc->devPrivate)) { ++ DBG(("%s: vblank active, reusing last swap msc/ust\n", ++ __FUNCTION__)); ++ goto last; ++ } + + VG_CLEAR(vbl); + vbl.request.type = DRM_VBLANK_RELATIVE; + vbl.request.sequence = 0; +- if (sna_wait_vblank(sna, &vbl, pipe) == 0) { ++ if (sna_wait_vblank(sna, &vbl, sna_crtc_pipe(crtc->devPrivate)) == 0) { ++ struct sna_present_event *info; ++ + *ust = ust64(vbl.reply.tval_sec, vbl.reply.tval_usec); + *msc = sna_crtc_record_vblank(crtc->devPrivate, &vbl); ++ ++ info = info_alloc(sna); ++ if (info) { ++ info->crtc = crtc->devPrivate; ++ info->sna = sna; ++ info->target_msc = *msc + 1; ++ info->event_id = (uint64_t *)(info + 1); ++ info->n_event_id = 0; ++ ++ vbl.request.type = ++ DRM_VBLANK_ABSOLUTE | DRM_VBLANK_EVENT; ++ vbl.request.sequence = info->target_msc; ++ vbl.request.signal = (uintptr_t)MARK_PRESENT(info); ++ ++ if (sna_wait_vblank(info->sna, &vbl, ++ sna_crtc_pipe(info->crtc)) == 0) { ++ list_add(&info->link, ++ &sna->present.vblank_queue); ++ info->queued = true; ++ sna_crtc_set_vblank(info->crtc); ++ info->crtc = mark_crtc(info->crtc); ++ } else ++ info_free(info); ++ } + } else { +- const struct ust_msc *swap = sna_crtc_last_swap(crtc->devPrivate); +- *ust = ust64(swap->tv_sec, swap->tv_usec); ++ const struct ust_msc *swap; ++last: ++ swap = sna_crtc_last_swap(crtc->devPrivate); ++ *ust = swap_ust(swap); + *msc = swap->msc; + } + +- DBG(("%s: pipe=%d, tv=%d.%06d msc=%lld\n", __FUNCTION__, pipe, ++ DBG(("%s: pipe=%d, tv=%d.%06d seq=%d msc=%lld\n", __FUNCTION__, ++ sna_crtc_pipe(crtc->devPrivate), + (int)(*ust / 1000000), (int)(*ust % 1000000), +- (long long)*msc)); ++ vbl.reply.sequence, (long long)*msc)); + + return Success; + } +@@ -127,43 +425,106 @@ void + sna_present_vblank_handler(struct drm_event_vblank *event) + { + struct sna_present_event *info = to_present_event(event->user_data); ++ uint64_t msc; + +- DBG(("%s: pipe=%d tv=%d.%06d msc=%d, event %lld complete\n", __FUNCTION__, +- sna_crtc_to_pipe(info->crtc), +- event->tv_sec, event->tv_usec, event->sequence, +- (long long)info->event_id)); +- present_event_notify(info->event_id, +- ust64(event->tv_sec, event->tv_usec), +- sna_crtc_record_event(info->crtc, event)); +- free(info); ++ if (!info->queued) { ++ DBG(("%s: arrived unexpectedly early (not queued)\n", __FUNCTION__)); ++ assert(!has_vblank(info->crtc)); ++ return; ++ } ++ ++ if (has_vblank(info->crtc)) { ++ DBG(("%s: clearing immediate flag\n", __FUNCTION__)); ++ info->crtc = unmask_crtc(info->crtc); ++ sna_crtc_clear_vblank(info->crtc); ++ } ++ ++ msc = sna_crtc_record_event(info->crtc, event); ++ ++ if (info->sna->mode.shadow_wait) { ++ DBG(("%s: recursed from TearFree\n", __FUNCTION__)); ++ if (TimerSet(NULL, 0, 1, sna_fake_vblank_handler, info)) ++ return; ++ } ++ ++ vblank_complete(info, ust64(event->tv_sec, event->tv_usec), msc); + } + + static int + sna_present_queue_vblank(RRCrtcPtr crtc, uint64_t event_id, uint64_t msc) + { + struct sna *sna = to_sna_from_screen(crtc->pScreen); +- struct sna_present_event *event; +- union drm_wait_vblank vbl; +- +- DBG(("%s(pipe=%d, event=%lld, msc=%lld)\n", +- __FUNCTION__, pipe_from_crtc(crtc), +- (long long)event_id, (long long)msc)); ++ struct sna_present_event *info, *tmp; ++ const struct ust_msc *swap; + +- event = malloc(sizeof(struct sna_present_event)); +- if (event == NULL) ++ if (!sna_crtc_is_on(crtc->devPrivate)) + return BadAlloc; + +- event->event_id = event_id; +- event->crtc = crtc->devPrivate; ++ swap = sna_crtc_last_swap(crtc->devPrivate); ++ DBG(("%s(pipe=%d, event=%lld, msc=%lld, last swap=%lld)\n", ++ __FUNCTION__, sna_crtc_pipe(crtc->devPrivate), ++ (long long)event_id, (long long)msc, (long long)swap->msc)); + +- VG_CLEAR(vbl); +- vbl.request.type = DRM_VBLANK_ABSOLUTE | DRM_VBLANK_EVENT; +- vbl.request.sequence = msc; +- vbl.request.signal = (uintptr_t)MARK_PRESENT(event); +- if (sna_wait_vblank(sna, &vbl, sna_crtc_to_pipe(event->crtc))) { +- DBG(("%s: vblank enqueue failed\n", __FUNCTION__)); +- free(event); +- return BadMatch; ++ if (warn_unless((int64_t)(msc - swap->msc) >= 0)) { ++ DBG(("%s: pipe=%d tv=%d.%06d msc=%lld (target=%lld), event=%lld complete\n", __FUNCTION__, ++ sna_crtc_pipe(crtc->devPrivate), ++ swap->tv_sec, swap->tv_usec, ++ (long long)swap->msc, (long long)msc, ++ (long long)event_id)); ++ present_event_notify(event_id, swap_ust(swap), swap->msc); ++ return Success; ++ } ++ if (warn_unless(msc - swap->msc < 1ull<<31)) ++ return BadValue; ++ ++ list_for_each_entry(tmp, &sna->present.vblank_queue, link) { ++ if (tmp->target_msc == msc && ++ unmask_crtc(tmp->crtc) == crtc->devPrivate) { ++ uint64_t *events = tmp->event_id; ++ ++ if (tmp->n_event_id && ++ is_power_of_two(tmp->n_event_id)) { ++ events = malloc(2*sizeof(uint64_t)*tmp->n_event_id); ++ if (events == NULL) ++ return BadAlloc; ++ ++ memcpy(events, ++ tmp->event_id, ++ tmp->n_event_id*sizeof(uint64_t)); ++ if (tmp->n_event_id != 1) ++ free(tmp->event_id); ++ tmp->event_id = events; ++ } ++ ++ DBG(("%s: appending event=%lld to vblank %lld x %d\n", ++ __FUNCTION__, (long long)event_id, (long long)msc, tmp->n_event_id+1)); ++ events[tmp->n_event_id++] = event_id; ++ return Success; ++ } ++ if ((int64_t)(tmp->target_msc - msc) > 0) { ++ DBG(("%s: previous target_msc=%lld invalid for coalescing\n", ++ __FUNCTION__, (long long)tmp->target_msc)); ++ break; ++ } ++ } ++ ++ info = info_alloc(sna); ++ if (info == NULL) ++ return BadAlloc; ++ ++ info->crtc = crtc->devPrivate; ++ info->sna = sna; ++ info->target_msc = msc; ++ info->event_id = (uint64_t *)(info + 1); ++ info->event_id[0] = event_id; ++ info->n_event_id = 1; ++ list_add_tail(&info->link, &tmp->link); ++ info->queued = false; ++ ++ if (!sna_present_queue(info, swap->msc)) { ++ list_del(&info->link); ++ info_free(info); ++ return BadAlloc; + } + + return Success; +@@ -180,14 +541,6 @@ sna_present_abort_vblank(RRCrtcPtr crtc, uint64_t event_id, uint64_t msc) + static void + sna_present_flush(WindowPtr window) + { +- PixmapPtr pixmap = get_window_pixmap(window); +- struct sna_pixmap *priv; +- +- DBG(("%s(pixmap=%ld)\n", __FUNCTION__, pixmap->drawable.serialNumber)); +- +- priv = sna_pixmap_move_to_gpu(pixmap, MOVE_READ | MOVE_ASYNC_HINT | __MOVE_FORCE); +- if (priv && priv->gpu_bo) +- kgem_scanout_flush(&to_sna_from_pixmap(pixmap)->kgem, priv->gpu_bo); + } + + static bool +@@ -201,8 +554,13 @@ check_flip__crtc(struct sna *sna, + + assert(sna->scrn->vtSema); + +- if (sna->mode.shadow_active) { +- DBG(("%s: shadow buffer active\n", __FUNCTION__)); ++ if (!sna->mode.front_active) { ++ DBG(("%s: DPMS off, no flips\n", __FUNCTION__)); ++ return FALSE; ++ } ++ ++ if (sna->mode.rr_active) { ++ DBG(("%s: RandR transformation active\n", __FUNCTION__)); + return false; + } + +@@ -224,6 +582,11 @@ sna_present_check_flip(RRCrtcPtr crtc, + pixmap->drawable.serialNumber, + sync_flip)); + ++ if (!sna->scrn->vtSema) { ++ DBG(("%s: VT switched away, no flips\n", __FUNCTION__)); ++ return FALSE; ++ } ++ + if (sna->flags & SNA_NO_FLIP) { + DBG(("%s: flips not suported\n", __FUNCTION__)); + return FALSE; +@@ -231,7 +594,7 @@ sna_present_check_flip(RRCrtcPtr crtc, + + if (sync_flip) { + if ((sna->flags & SNA_HAS_FLIP) == 0) { +- DBG(("%s: async flips not suported\n", __FUNCTION__)); ++ DBG(("%s: sync flips not suported\n", __FUNCTION__)); + return FALSE; + } + } else { +@@ -257,24 +620,39 @@ sna_present_check_flip(RRCrtcPtr crtc, + return FALSE; + } + +- return TRUE; +-} +- +-static uint64_t gettime_ust64(void) +-{ +- struct timespec tv; ++ if (flip->pinned) { ++ assert(flip->gpu_bo); ++ if (sna->flags & SNA_LINEAR_FB) { ++ if (flip->gpu_bo->tiling != I915_TILING_NONE) { ++ DBG(("%s: pined bo, tilng=%d needs NONE\n", ++ __FUNCTION__, flip->gpu_bo->tiling)); ++ return FALSE; ++ } ++ } else { ++ if (!sna->kgem.can_scanout_y && ++ flip->gpu_bo->tiling == I915_TILING_Y) { ++ DBG(("%s: pined bo, tilng=%d and can't scanout Y\n", ++ __FUNCTION__, flip->gpu_bo->tiling)); ++ return FALSE; ++ } ++ } + +- if (clock_gettime(CLOCK_MONOTONIC, &tv)) +- return 0; ++ if (flip->gpu_bo->pitch & 63) { ++ DBG(("%s: pined bo, bad pitch=%d\n", ++ __FUNCTION__, flip->gpu_bo->pitch)); ++ return FALSE; ++ } ++ } + +- return ust64(tv.tv_sec, tv.tv_nsec / 1000); ++ return TRUE; + } + + static Bool +-page_flip__async(RRCrtcPtr crtc, +- uint64_t event_id, +- uint64_t target_msc, +- struct kgem_bo *bo) ++flip__async(struct sna *sna, ++ RRCrtcPtr crtc, ++ uint64_t event_id, ++ uint64_t target_msc, ++ struct kgem_bo *bo) + { + DBG(("%s(pipe=%d, event=%lld, handle=%d)\n", + __FUNCTION__, +@@ -282,17 +660,17 @@ page_flip__async(RRCrtcPtr crtc, + (long long)event_id, + bo->handle)); + +- if (!sna_page_flip(to_sna_from_screen(crtc->pScreen), bo, NULL, NULL)) { ++ if (!sna_page_flip(sna, bo, NULL, NULL)) { + DBG(("%s: async pageflip failed\n", __FUNCTION__)); + present_info.capabilities &= ~PresentCapabilityAsync; + return FALSE; + } + +- DBG(("%s: pipe=%d tv=%d.%06d msc=%d, event %lld complete\n", __FUNCTION__, ++ DBG(("%s: pipe=%d tv=%ld.%06d msc=%lld (target=%lld), event=%lld complete\n", __FUNCTION__, + pipe_from_crtc(crtc), +- gettime_ust64() / 1000000, gettime_ust64() % 1000000, +- sna_crtc_last_swap(crtc->devPrivate)->msc, +- (long long)event_id)); ++ (long)(gettime_ust64() / 1000000), (int)(gettime_ust64() % 1000000), ++ crtc ? (long long)sna_crtc_last_swap(crtc->devPrivate)->msc : 0LL, ++ (long long)target_msc, (long long)event_id)); + present_event_notify(event_id, gettime_ust64(), target_msc); + return TRUE; + } +@@ -303,7 +681,12 @@ present_flip_handler(struct drm_event_vblank *event, void *data) + struct sna_present_event *info = data; + struct ust_msc swap; + +- DBG(("%s(sequence=%d)\n", __FUNCTION__, event->sequence)); ++ DBG(("%s(sequence=%d): event=%lld\n", __FUNCTION__, event->sequence, (long long)info->event_id[0])); ++ assert(info->n_event_id == 1); ++ if (!info->queued) { ++ DBG(("%s: arrived unexpectedly early (not queued)\n", __FUNCTION__)); ++ return; ++ } + + if (info->crtc == NULL) { + swap.tv_sec = event->tv_sec; +@@ -312,22 +695,33 @@ present_flip_handler(struct drm_event_vblank *event, void *data) + } else + swap = *sna_crtc_last_swap(info->crtc); + +- DBG(("%s: pipe=%d, tv=%d.%06d msc %lld, event %lld complete\n", __FUNCTION__, +- info->crtc ? sna_crtc_to_pipe(info->crtc) : -1, ++ DBG(("%s: pipe=%d, tv=%d.%06d msc=%lld (target %lld), event=%lld complete%s\n", __FUNCTION__, ++ info->crtc ? sna_crtc_pipe(info->crtc) : -1, + swap.tv_sec, swap.tv_usec, (long long)swap.msc, +- (long long)info->event_id)); +- present_event_notify(info->event_id, ust64(swap.tv_sec, swap.tv_usec), swap.msc); +- free(info); ++ (long long)info->target_msc, ++ (long long)info->event_id[0], ++ info->target_msc && info->target_msc == swap.msc ? "" : ": MISS")); ++ present_event_notify(info->event_id[0], swap_ust(&swap), swap.msc); ++ if (info->crtc) ++ sna_crtc_clear_vblank(info->crtc); ++ ++ if (info->sna->present.unflip) { ++ DBG(("%s: executing queued unflip (event=%lld)\n", __FUNCTION__, (long long)info->sna->present.unflip)); ++ sna_present_unflip(xf86ScrnToScreen(info->sna->scrn), ++ info->sna->present.unflip); ++ info->sna->present.unflip = 0; ++ } ++ info_free(info); + } + + static Bool +-page_flip(ScreenPtr screen, +- RRCrtcPtr crtc, +- uint64_t event_id, +- struct kgem_bo *bo) ++flip(struct sna *sna, ++ RRCrtcPtr crtc, ++ uint64_t event_id, ++ uint64_t target_msc, ++ struct kgem_bo *bo) + { +- struct sna *sna = to_sna_from_screen(screen); +- struct sna_present_event *event; ++ struct sna_present_event *info; + + DBG(("%s(pipe=%d, event=%lld, handle=%d)\n", + __FUNCTION__, +@@ -335,18 +729,27 @@ page_flip(ScreenPtr screen, + (long long)event_id, + bo->handle)); + +- event = malloc(sizeof(struct sna_present_event)); +- if (event == NULL) ++ info = info_alloc(sna); ++ if (info == NULL) + return FALSE; + +- event->event_id = event_id; +- event->crtc = crtc ? crtc->devPrivate : NULL; +- if (!sna_page_flip(sna, bo, present_flip_handler, event)) { ++ info->crtc = crtc ? crtc->devPrivate : NULL; ++ info->sna = sna; ++ info->event_id = (uint64_t *)(info + 1); ++ info->event_id[0] = event_id; ++ info->n_event_id = 1; ++ info->target_msc = target_msc; ++ info->queued = false; ++ ++ if (!sna_page_flip(sna, bo, present_flip_handler, info)) { + DBG(("%s: pageflip failed\n", __FUNCTION__)); +- free(event); ++ info_free(info); + return FALSE; + } + ++ info->queued = true; ++ if (info->crtc) ++ sna_crtc_set_vblank(info->crtc); + return TRUE; + } + +@@ -358,12 +761,48 @@ get_flip_bo(PixmapPtr pixmap) + + DBG(("%s(pixmap=%ld)\n", __FUNCTION__, pixmap->drawable.serialNumber)); + +- priv = sna_pixmap_move_to_gpu(pixmap, MOVE_READ | __MOVE_FORCE); ++ priv = sna_pixmap_move_to_gpu(pixmap, MOVE_READ | __MOVE_SCANOUT | __MOVE_FORCE); + if (priv == NULL) { + DBG(("%s: cannot force pixmap to the GPU\n", __FUNCTION__)); + return NULL; + } + ++ if (priv->gpu_bo->scanout) ++ return priv->gpu_bo; ++ ++ if (sna->kgem.has_llc && !wedged(sna) && !priv->pinned) { ++ struct kgem_bo *bo; ++ uint32_t tiling; ++ ++ tiling = I915_TILING_NONE; ++ if ((sna->flags & SNA_LINEAR_FB) == 0) ++ tiling = I915_TILING_X; ++ ++ bo = kgem_create_2d(&sna->kgem, ++ pixmap->drawable.width, ++ pixmap->drawable.height, ++ pixmap->drawable.bitsPerPixel, ++ tiling, CREATE_SCANOUT | CREATE_CACHED); ++ if (bo) { ++ BoxRec box; ++ ++ box.x1 = box.y1 = 0; ++ box.x2 = pixmap->drawable.width; ++ box.y2 = pixmap->drawable.height; ++ ++ if (sna->render.copy_boxes(sna, GXcopy, ++ &pixmap->drawable, priv->gpu_bo, 0, 0, ++ &pixmap->drawable, bo, 0, 0, ++ &box, 1, 0)) { ++ sna_pixmap_unmap(pixmap, priv); ++ kgem_bo_destroy(&sna->kgem, priv->gpu_bo); ++ ++ priv->gpu_bo = bo; ++ } else ++ kgem_bo_destroy(&sna->kgem, bo); ++ } ++ } ++ + if (sna->flags & SNA_LINEAR_FB && + priv->gpu_bo->tiling && + !sna_pixmap_change_tiling(pixmap, I915_TILING_NONE)) { +@@ -372,12 +811,17 @@ get_flip_bo(PixmapPtr pixmap) + } + + if (priv->gpu_bo->tiling == I915_TILING_Y && ++ !sna->kgem.can_scanout_y && + !sna_pixmap_change_tiling(pixmap, I915_TILING_X)) { + DBG(("%s: invalid Y-tiling, cannot convert\n", __FUNCTION__)); + return NULL; + } + +- priv->pinned |= PIN_SCANOUT; ++ if (priv->gpu_bo->pitch & 63) { ++ DBG(("%s: invalid pitch, no conversion\n", __FUNCTION__)); ++ return NULL; ++ } ++ + return priv->gpu_bo; + } + +@@ -388,6 +832,7 @@ sna_present_flip(RRCrtcPtr crtc, + PixmapPtr pixmap, + Bool sync_flip) + { ++ struct sna *sna = to_sna_from_pixmap(pixmap); + struct kgem_bo *bo; + + DBG(("%s(pipe=%d, event=%lld, msc=%lld, pixmap=%ld, sync?=%d)\n", +@@ -397,11 +842,32 @@ sna_present_flip(RRCrtcPtr crtc, + (long long)target_msc, + pixmap->drawable.serialNumber, sync_flip)); + +- if (!check_flip__crtc(to_sna_from_pixmap(pixmap), crtc)) { ++ if (!check_flip__crtc(sna, crtc)) { + DBG(("%s: flip invalid for CRTC\n", __FUNCTION__)); + return FALSE; + } + ++ assert(sna->present.unflip == 0); ++ ++ if (sna->flags & SNA_TEAR_FREE) { ++ DBG(("%s: disabling TearFree (was %s) in favour of Present flips\n", ++ __FUNCTION__, sna->mode.shadow_enabled ? "enabled" : "disabled")); ++ sna->mode.shadow_enabled = false; ++ } ++ assert(!sna->mode.shadow_enabled); ++ ++ if (sna->mode.flip_active) { ++ struct pollfd pfd; ++ ++ DBG(("%s: flips still pending, stalling\n", __FUNCTION__)); ++ pfd.fd = sna->kgem.fd; ++ pfd.events = POLLIN; ++ while (poll(&pfd, 1, 0) == 1) ++ sna_mode_wakeup(sna); ++ if (sna->mode.flip_active) ++ return FALSE; ++ } ++ + bo = get_flip_bo(pixmap); + if (bo == NULL) { + DBG(("%s: flip invalid bo\n", __FUNCTION__)); +@@ -409,9 +875,9 @@ sna_present_flip(RRCrtcPtr crtc, + } + + if (sync_flip) +- return page_flip(crtc->pScreen, crtc, event_id, bo); ++ return flip(sna, crtc, event_id, target_msc, bo); + else +- return page_flip__async(crtc, event_id, target_msc, bo); ++ return flip__async(sna, crtc, event_id, target_msc, bo); + } + + static void +@@ -421,29 +887,70 @@ sna_present_unflip(ScreenPtr screen, uint64_t event_id) + struct kgem_bo *bo; + + DBG(("%s(event=%lld)\n", __FUNCTION__, (long long)event_id)); +- if (sna->mode.front_active == 0 || sna->mode.shadow_active) { ++ if (sna->mode.front_active == 0 || sna->mode.rr_active) { + const struct ust_msc *swap; + + DBG(("%s: no CRTC active, perform no-op flip\n", __FUNCTION__)); + + notify: +- swap = sna_crtc_last_swap(sna_mode_first_crtc(sna)); +- DBG(("%s: pipe=%d, tv=%d.%06d msc %lld, event %lld complete\n", __FUNCTION__, ++ swap = sna_crtc_last_swap(sna_primary_crtc(sna)); ++ DBG(("%s: pipe=%d, tv=%d.%06d msc=%lld, event=%lld complete\n", __FUNCTION__, + -1, + swap->tv_sec, swap->tv_usec, (long long)swap->msc, + (long long)event_id)); +- present_event_notify(event_id, +- ust64(swap->tv_sec, swap->tv_usec), +- swap->msc); ++ present_event_notify(event_id, swap_ust(swap), swap->msc); ++ return; ++ } ++ ++ assert(!sna->mode.shadow_enabled); ++ if (sna->mode.flip_active) { ++ DBG(("%s: %d outstanding flips, queueing unflip\n", __FUNCTION__, sna->mode.flip_active)); ++ assert(sna->present.unflip == 0); ++ sna->present.unflip = event_id; + return; + } + ++ if (sna->flags & SNA_TEAR_FREE) { ++ DBG(("%s: %s TearFree after Present flips\n", ++ __FUNCTION__, sna->mode.shadow_damage != NULL ? "enabling" : "disabling")); ++ sna->mode.shadow_enabled = sna->mode.shadow_damage != NULL; ++ } ++ + bo = get_flip_bo(screen->GetScreenPixmap(screen)); +- if (bo == NULL || !page_flip(screen, NULL, event_id, bo)) { ++ if (bo == NULL) { ++reset_mode: + DBG(("%s: failed, trying to restore original mode\n", __FUNCTION__)); + xf86SetDesiredModes(sna->scrn); + goto notify; + } ++ ++ /* Are we unflipping after a failure that left our ScreenP in place? */ ++ if (!sna_needs_page_flip(sna, bo)) ++ goto notify; ++ ++ assert(sna_pixmap(screen->GetScreenPixmap(screen))->pinned & PIN_SCANOUT); ++ ++ if (sna->flags & SNA_HAS_ASYNC_FLIP) { ++ DBG(("%s: trying async flip restore\n", __FUNCTION__)); ++ if (flip__async(sna, NULL, event_id, 0, bo)) ++ return; ++ } ++ ++ if (!flip(sna, NULL, event_id, 0, bo)) ++ goto reset_mode; ++} ++ ++void sna_present_cancel_flip(struct sna *sna) ++{ ++ if (sna->present.unflip) { ++ const struct ust_msc *swap; ++ ++ swap = sna_crtc_last_swap(sna_primary_crtc(sna)); ++ present_event_notify(sna->present.unflip, ++ swap_ust(swap), swap->msc); ++ ++ sna->present.unflip = 0; ++ } + } + + static present_screen_info_rec present_info = { +@@ -463,10 +970,13 @@ static present_screen_info_rec present_info = { + + bool sna_present_open(struct sna *sna, ScreenPtr screen) + { ++ DBG(("%s(num_crtc=%d)\n", __FUNCTION__, sna->mode.num_real_crtc)); ++ + if (sna->mode.num_real_crtc == 0) + return false; + + sna_present_update(sna); ++ list_init(&sna->present.vblank_queue); + + return present_screen_init(screen, &present_info); + } +diff --git a/src/sna/sna_render.c b/src/sna/sna_render.c +index 3fbb9ecb..3e935d57 100644 +--- a/src/sna/sna_render.c ++++ b/src/sna/sna_render.c +@@ -54,7 +54,7 @@ sna_format_for_depth(int depth) + { + switch (depth) { + case 1: return PICT_a1; +- case 4: return PICT_a4; ++ case 4: return PICT_x4a4; + case 8: return PICT_a8; + case 15: return PICT_x1r5g5b5; + case 16: return PICT_r5g6b5; +@@ -272,18 +272,6 @@ no_render_context_switch(struct kgem *kgem, + } + + static void +-no_render_retire(struct kgem *kgem) +-{ +- (void)kgem; +-} +- +-static void +-no_render_expire(struct kgem *kgem) +-{ +- (void)kgem; +-} +- +-static void + no_render_fini(struct sna *sna) + { + (void)sna; +@@ -316,8 +304,6 @@ const char *no_render_init(struct sna *sna) + render->fini = no_render_fini; + + sna->kgem.context_switch = no_render_context_switch; +- sna->kgem.retire = no_render_retire; +- sna->kgem.expire = no_render_expire; + if (sna->kgem.has_blt) + sna->kgem.ring = KGEM_BLT; + +@@ -407,10 +393,7 @@ use_cpu_bo(struct sna *sna, PixmapPtr pixmap, const BoxRec *box, bool blt) + } + } + +- if (priv->shm) { +- assert(!priv->flush); +- sna_add_flush_pixmap(sna, priv, priv->cpu_bo); +- } ++ add_shm_flush(sna, priv); + + DBG(("%s for box=(%d, %d), (%d, %d)\n", + __FUNCTION__, box->x1, box->y1, box->x2, box->y2)); +@@ -567,6 +550,7 @@ static struct kgem_bo *upload(struct sna *sna, + assert(priv->gpu_damage == NULL); + assert(priv->gpu_bo == NULL); + assert(bo->proxy != NULL); ++ sna_damage_all(&priv->cpu_damage, pixmap); + kgem_proxy_bo_attach(bo, &priv->gpu_bo); + } + } +@@ -627,10 +611,7 @@ sna_render_pixmap_bo(struct sna *sna, + !priv->cpu_bo->snoop && priv->cpu_bo->pitch < 4096) { + DBG(("%s: CPU all damaged\n", __FUNCTION__)); + channel->bo = priv->cpu_bo; +- if (priv->shm) { +- assert(!priv->flush); +- sna_add_flush_pixmap(sna, priv, priv->cpu_bo); +- } ++ add_shm_flush(sna, priv); + goto done; + } + } +@@ -1275,6 +1256,7 @@ sna_render_picture_extract(struct sna *sna, + assert(priv->gpu_damage == NULL); + assert(priv->gpu_bo == NULL); + assert(bo->proxy != NULL); ++ sna_damage_all(&priv->cpu_damage, pixmap); + kgem_proxy_bo_attach(bo, &priv->gpu_bo); + } + } +@@ -1338,6 +1320,8 @@ sna_render_picture_convolve(struct sna *sna, + */ + DBG(("%s: origin=(%d,%d) kernel=%dx%d, size=%dx%d\n", + __FUNCTION__, x_off, y_off, cw, ch, w, h)); ++ if (cw*ch > 32) /* too much loss of precision from quantization! */ ++ return -1; + + assert(picture->pDrawable); + assert(picture->filter == PictFilterConvolution); +@@ -1388,9 +1372,9 @@ sna_render_picture_convolve(struct sna *sna, + alpha = CreateSolidPicture(0, &color, &error); + if (alpha) { + sna_composite(PictOpAdd, picture, alpha, tmp, +- x, y, ++ x-(x_off+i), y-(y_off+j), ++ 0, 0, + 0, 0, +- x_off+i, y_off+j, + w, h); + FreePicture(alpha, 0); + } +@@ -2183,11 +2167,11 @@ copy_overlap(struct sna *sna, uint8_t alu, + ret = (sna->render.copy_boxes(sna, GXcopy, + draw, bo, src_dx, src_dy, + &tmp->drawable, tmp_bo, -extents->x1, -extents->y1, +- box, n , 0) && ++ box, n, 0) && + sna->render.copy_boxes(sna, alu, + &tmp->drawable, tmp_bo, -extents->x1, -extents->y1, + draw, bo, dst_dx, dst_dy, +- box, n , 0)); ++ box, n, 0)); + + screen->DestroyPixmap(tmp); + return ret; +@@ -2308,16 +2292,22 @@ static bool can_copy_cpu(struct sna *sna, + struct kgem_bo *src, + struct kgem_bo *dst) + { +- if (src->tiling != dst->tiling) +- return false; ++ DBG(("%s: tiling=%d:%d, pitch=%d:%d, can_map=%d:%d[%d]\n", ++ __FUNCTION__, ++ src->tiling, dst->tiling, ++ src->pitch, dst->pitch, ++ kgem_bo_can_map__cpu(&sna->kgem, src, false), ++ kgem_bo_can_map__cpu(&sna->kgem, dst, true), ++ sna->kgem.has_wc_mmap)); + +- if (src->pitch != dst->pitch) ++ if (src->tiling != dst->tiling) + return false; + + if (!kgem_bo_can_map__cpu(&sna->kgem, src, false)) + return false; + +- if (!kgem_bo_can_map__cpu(&sna->kgem, dst, true)) ++ if (!kgem_bo_can_map__cpu(&sna->kgem, dst, true) && ++ !sna->kgem.has_wc_mmap) + return false; + + DBG(("%s -- yes, src handle=%d, dst handle=%d\n", __FUNCTION__, src->handle, dst->handle)); +@@ -2330,31 +2320,62 @@ memcpy_copy_boxes(struct sna *sna, uint8_t op, + const DrawableRec *dst_draw, struct kgem_bo *dst_bo, int16_t dx, int16_t dy, + const BoxRec *box, int n, unsigned flags) + { ++ memcpy_box_func detile = NULL; + void *dst, *src; +- bool clipped; + + if (op != GXcopy) + return false; + +- clipped = (n > 1 || +- box->x1 + dx > 0 || +- box->y1 + dy > 0 || +- box->x2 + dx < dst_draw->width || +- box->y2 + dy < dst_draw->height); ++ if (src_draw->depth != dst_draw->depth) ++ return false; + + dst = src = NULL; +- if (!clipped && can_copy_cpu(sna, src_bo, dst_bo)) { +- dst = kgem_bo_map__cpu(&sna->kgem, dst_bo); ++ if (can_copy_cpu(sna, src_bo, dst_bo)) { ++ if (src_bo->pitch != dst_bo->pitch || ++ dx != sx || dy != sy || n > 1 || ++ box->x1 + dx > 0 || ++ box->y1 + dy > 0 || ++ box->x2 + dx < dst_draw->width || ++ box->y2 + dy < dst_draw->height) { ++ if (dx != sx) /* not implemented in memcpy yet */ ++ goto use_gtt; ++ ++ switch (dst_bo->tiling) { ++ default: ++ case I915_TILING_Y: ++ goto use_gtt; ++ ++ case I915_TILING_X: ++ detile = sna->kgem.memcpy_between_tiled_x; ++ if (detile == NULL) ++ goto use_gtt; ++ break; ++ ++ case I915_TILING_NONE: ++ break; ++ } ++ } ++ ++ if (kgem_bo_can_map__cpu(&sna->kgem, dst_bo, true)) ++ dst = kgem_bo_map__cpu(&sna->kgem, dst_bo); ++ else ++ dst = kgem_bo_map__wc(&sna->kgem, dst_bo); + src = kgem_bo_map__cpu(&sna->kgem, src_bo); + } + + if (dst == NULL || src == NULL) { ++use_gtt: + dst = kgem_bo_map__gtt(&sna->kgem, dst_bo); + src = kgem_bo_map__gtt(&sna->kgem, src_bo); + if (dst == NULL || src == NULL) + return false; ++ ++ detile = NULL; + } else { +- kgem_bo_sync__cpu_full(&sna->kgem, dst_bo, true); ++ if (dst == dst_bo->map__wc) ++ kgem_bo_sync__gtt(&sna->kgem, dst_bo); ++ else ++ kgem_bo_sync__cpu_full(&sna->kgem, dst_bo, true); + kgem_bo_sync__cpu_full(&sna->kgem, src_bo, false); + } + +@@ -2362,7 +2383,16 @@ memcpy_copy_boxes(struct sna *sna, uint8_t op, + __FUNCTION__, sx, sy, dx, dy, n)); + + if (sigtrap_get() == 0) { +- do { ++ if (detile) { ++ do { ++ detile(src, dst, dst_draw->bitsPerPixel, ++ src_bo->pitch, dst_bo->pitch, ++ box->x1 + sx, box->y1 + sy, ++ box->x1 + dx, box->y1 + dy, ++ box->x2 - box->x1, box->y2 - box->y1); ++ box++; ++ } while (--n); ++ } else do { + memcpy_blt(src, dst, dst_draw->bitsPerPixel, + src_bo->pitch, dst_bo->pitch, + box->x1 + sx, box->y1 + sy, +@@ -2380,4 +2410,5 @@ void + sna_render_mark_wedged(struct sna *sna) + { + sna->render.copy_boxes = memcpy_copy_boxes; ++ sna->render.prefer_gpu = 0; + } +diff --git a/src/sna/sna_render.h b/src/sna/sna_render.h +index 6e1fa480..4ba345a7 100644 +--- a/src/sna/sna_render.h ++++ b/src/sna/sna_render.h +@@ -148,6 +148,10 @@ struct sna_composite_op { + struct { + uint32_t flags; + } gen8; ++ ++ struct { ++ uint32_t flags; ++ } gen9; + } u; + + void *priv; +@@ -238,8 +242,9 @@ struct sna_render { + int16_t w, int16_t h, + unsigned flags, + struct sna_composite_op *tmp); +-#define COMPOSITE_PARTIAL 0x1 +-#define COMPOSITE_FALLBACK 0x80000000 ++#define COMPOSITE_PARTIAL 0x1 ++#define COMPOSITE_UPLOAD 0x40000000 ++#define COMPOSITE_FALLBACK 0x80000000 + + bool (*check_composite_spans)(struct sna *sna, uint8_t op, + PicturePtr dst, PicturePtr src, +@@ -286,6 +291,8 @@ struct sna_render { + #define COPY_LAST 0x1 + #define COPY_SYNC 0x2 + #define COPY_NO_OVERLAP 0x4 ++#define COPY_SMALL 0x8 ++#define COPY_DRI 0x10 + + bool (*copy)(struct sna *sna, uint8_t alu, + PixmapPtr src, struct kgem_bo *src_bo, +@@ -481,6 +488,7 @@ enum { + + GEN7_WM_KERNEL_VIDEO_PLANAR, + GEN7_WM_KERNEL_VIDEO_PACKED, ++ GEN7_WM_KERNEL_VIDEO_RGB, + GEN7_WM_KERNEL_COUNT + }; + +@@ -533,12 +541,13 @@ enum { + + GEN8_WM_KERNEL_VIDEO_PLANAR, + GEN8_WM_KERNEL_VIDEO_PACKED, ++ GEN8_WM_KERNEL_VIDEO_RGB, + GEN8_WM_KERNEL_COUNT + }; + + struct gen8_render_state { + unsigned gt; +- ++ const struct gt_info *info; + struct kgem_bo *general_bo; + + uint32_t vs_state; +@@ -565,6 +574,58 @@ struct gen8_render_state { + bool emit_flush; + }; + ++enum { ++ GEN9_WM_KERNEL_NOMASK = 0, ++ GEN9_WM_KERNEL_NOMASK_P, ++ ++ GEN9_WM_KERNEL_MASK, ++ GEN9_WM_KERNEL_MASK_P, ++ ++ GEN9_WM_KERNEL_MASKCA, ++ GEN9_WM_KERNEL_MASKCA_P, ++ ++ GEN9_WM_KERNEL_MASKSA, ++ GEN9_WM_KERNEL_MASKSA_P, ++ ++ GEN9_WM_KERNEL_OPACITY, ++ GEN9_WM_KERNEL_OPACITY_P, ++ ++ GEN9_WM_KERNEL_VIDEO_PLANAR, ++ GEN9_WM_KERNEL_VIDEO_PACKED, ++ GEN9_WM_KERNEL_VIDEO_RGB, ++ GEN9_WM_KERNEL_COUNT ++}; ++ ++struct gen9_render_state { ++ unsigned gt; ++ const struct gt_info *info; ++ struct kgem_bo *general_bo; ++ ++ uint32_t vs_state; ++ uint32_t sf_state; ++ uint32_t sf_mask_state; ++ uint32_t wm_state; ++ uint32_t wm_kernel[GEN9_WM_KERNEL_COUNT][3]; ++ ++ uint32_t cc_blend; ++ ++ uint32_t drawrect_offset; ++ uint32_t drawrect_limit; ++ uint32_t blend; ++ uint32_t samplers; ++ uint32_t kernel; ++ ++ uint16_t num_sf_outputs; ++ uint16_t ve_id; ++ uint16_t last_primitive; ++ int16_t floats_per_vertex; ++ uint16_t surface_table; ++ ++ bool needs_invariant; ++ bool emit_flush; ++ bool ve_dirty; ++}; ++ + struct sna_static_stream { + uint32_t size, used; + uint8_t *data; +@@ -620,6 +681,7 @@ const char *gen5_render_init(struct sna *sna, const char *backend); + const char *gen6_render_init(struct sna *sna, const char *backend); + const char *gen7_render_init(struct sna *sna, const char *backend); + const char *gen8_render_init(struct sna *sna, const char *backend); ++const char *gen9_render_init(struct sna *sna, const char *backend); + + void sna_render_mark_wedged(struct sna *sna); + +diff --git a/src/sna/sna_render_inline.h b/src/sna/sna_render_inline.h +index 10fbbfe2..e162e37f 100644 +--- a/src/sna/sna_render_inline.h ++++ b/src/sna/sna_render_inline.h +@@ -304,6 +304,12 @@ color_convert(uint32_t pixel, + return pixel; + } + ++inline static uint32_t ++solid_color(uint32_t format, uint32_t pixel) ++{ ++ return color_convert(pixel, format, PICT_a8r8g8b8); ++} ++ + inline static bool dst_use_gpu(PixmapPtr pixmap) + { + struct sna_pixmap *priv = sna_pixmap(pixmap); +diff --git a/src/sna/sna_tiling.c b/src/sna/sna_tiling.c +index 308efc0a..8e2627f7 100644 +--- a/src/sna/sna_tiling.c ++++ b/src/sna/sna_tiling.c +@@ -369,8 +369,7 @@ sna_tiling_composite_spans_boxes(struct sna *sna, + const BoxRec *box, int nbox, float opacity) + { + while (nbox--) +- sna_tiling_composite_spans_box(sna, op->base.priv, box++, opacity); +- (void)sna; ++ sna_tiling_composite_spans_box(sna, op, box++, opacity); + } + + fastcall static void +@@ -581,6 +580,7 @@ sna_tiling_composite_spans(uint32_t op, + tile->rects = tile->rects_embedded; + tile->rect_count = 0; + tile->rect_size = ARRAY_SIZE(tile->rects_embedded); ++ COMPILE_TIME_ASSERT(sizeof(tile->rects_embedded[0]) >= sizeof(struct sna_tile_span)); + + tmp->box = sna_tiling_composite_spans_box; + tmp->boxes = sna_tiling_composite_spans_boxes; +diff --git a/src/sna/sna_trapezoids_boxes.c b/src/sna/sna_trapezoids_boxes.c +index 9900e3f0..bbf83759 100644 +--- a/src/sna/sna_trapezoids_boxes.c ++++ b/src/sna/sna_trapezoids_boxes.c +@@ -198,7 +198,7 @@ composite_aligned_boxes(struct sna *sna, + if (op == PictOpClear && sna->clear) + src = sna->clear; + +- DBG(("%s: clipped extents (%d, %d), (%d, %d); now offset by (%d, %d), orgin (%d, %d)\n", ++ DBG(("%s: clipped extents (%d, %d), (%d, %d); now offset by (%d, %d), origin (%d, %d)\n", + __FUNCTION__, + clip.extents.x1, clip.extents.y1, + clip.extents.x2, clip.extents.y2, +@@ -592,6 +592,8 @@ lerp32_opacity(PixmapPtr scratch, + uint32_t *ptr; + int stride, i; + ++ sigtrap_assert_active(); ++ + ptr = (uint32_t*)((uint8_t *)scratch->devPrivate.ptr + scratch->devKind * y); + ptr += x; + stride = scratch->devKind / 4; +diff --git a/src/sna/sna_trapezoids_imprecise.c b/src/sna/sna_trapezoids_imprecise.c +index 37def2f9..8bc7c8a8 100644 +--- a/src/sna/sna_trapezoids_imprecise.c ++++ b/src/sna/sna_trapezoids_imprecise.c +@@ -962,6 +962,16 @@ tor_add_trapezoid(struct tor *tor, + const xTrapezoid *t, + int dx, int dy) + { ++ if (!xTrapezoidValid(t)) { ++ __DBG(("%s: skipping invalid trapezoid: top=%d, bottom=%d, left=(%d, %d), (%d, %d), right=(%d, %d), (%d, %d)\n", ++ __FUNCTION__, ++ t->top, t->bottom, ++ t->left.p1.x, t->left.p1.y, ++ t->left.p2.x, t->left.p2.y, ++ t->right.p1.x, t->right.p1.y, ++ t->right.p2.x, t->right.p2.y)); ++ return; ++ } + polygon_add_edge(tor->polygon, t, &t->left, 1, dx, dy); + polygon_add_edge(tor->polygon, t, &t->right, -1, dx, dy); + } +@@ -1687,31 +1697,27 @@ struct span_thread { + #define SPAN_THREAD_MAX_BOXES (8192/sizeof(struct sna_opacity_box)) + struct span_thread_boxes { + const struct sna_composite_spans_op *op; ++ const BoxRec *clip_start, *clip_end; + int num_boxes; + struct sna_opacity_box boxes[SPAN_THREAD_MAX_BOXES]; + }; + +-static void span_thread_add_boxes(struct sna *sna, void *data, +- const BoxRec *box, int count, float alpha) ++static void span_thread_add_box(struct sna *sna, void *data, ++ const BoxRec *box, float alpha) + { + struct span_thread_boxes *b = data; + +- __DBG(("%s: adding %d boxes with alpha=%f\n", +- __FUNCTION__, count, alpha)); ++ __DBG(("%s: adding box with alpha=%f\n", __FUNCTION__, alpha)); + +- assert(count > 0 && count <= SPAN_THREAD_MAX_BOXES); +- if (unlikely(b->num_boxes + count > SPAN_THREAD_MAX_BOXES)) { +- DBG(("%s: flushing %d boxes, adding %d\n", __FUNCTION__, b->num_boxes, count)); +- assert(b->num_boxes <= SPAN_THREAD_MAX_BOXES); ++ if (unlikely(b->num_boxes == SPAN_THREAD_MAX_BOXES)) { ++ DBG(("%s: flushing %d boxes\n", __FUNCTION__, b->num_boxes)); + b->op->thread_boxes(sna, b->op, b->boxes, b->num_boxes); + b->num_boxes = 0; + } + +- do { +- b->boxes[b->num_boxes].box = *box++; +- b->boxes[b->num_boxes].alpha = alpha; +- b->num_boxes++; +- } while (--count); ++ b->boxes[b->num_boxes].box = *box++; ++ b->boxes[b->num_boxes].alpha = alpha; ++ b->num_boxes++; + assert(b->num_boxes <= SPAN_THREAD_MAX_BOXES); + } + +@@ -1722,8 +1728,22 @@ span_thread_box(struct sna *sna, + const BoxRec *box, + int coverage) + { ++ struct span_thread_boxes *b = (struct span_thread_boxes *)op; ++ + __DBG(("%s: %d -> %d @ %d\n", __FUNCTION__, box->x1, box->x2, coverage)); +- span_thread_add_boxes(sna, op, box, 1, AREA_TO_ALPHA(coverage)); ++ if (b->num_boxes) { ++ struct sna_opacity_box *bb = &b->boxes[b->num_boxes-1]; ++ if (bb->box.x1 == box->x1 && ++ bb->box.x2 == box->x2 && ++ bb->box.y2 == box->y1 && ++ bb->alpha == AREA_TO_ALPHA(coverage)) { ++ bb->box.y2 = box->y2; ++ __DBG(("%s: contracted double row: %d -> %d\n", __func__, bb->box.y1, bb->box.y2)); ++ return; ++ } ++ } ++ ++ span_thread_add_box(sna, op, box, AREA_TO_ALPHA(coverage)); + } + + static void +@@ -1733,20 +1753,28 @@ span_thread_clipped_box(struct sna *sna, + const BoxRec *box, + int coverage) + { +- pixman_region16_t region; ++ struct span_thread_boxes *b = (struct span_thread_boxes *)op; ++ const BoxRec *c; + + __DBG(("%s: %d -> %d @ %f\n", __FUNCTION__, box->x1, box->x2, + AREA_TO_ALPHA(coverage))); + +- pixman_region_init_rects(®ion, box, 1); +- RegionIntersect(®ion, ®ion, clip); +- if (region_num_rects(®ion)) { +- span_thread_add_boxes(sna, op, +- region_rects(®ion), +- region_num_rects(®ion), +- AREA_TO_ALPHA(coverage)); ++ b->clip_start = ++ find_clip_box_for_y(b->clip_start, b->clip_end, box->y1); ++ ++ c = b->clip_start; ++ while (c != b->clip_end) { ++ BoxRec clipped; ++ ++ if (box->y2 <= c->y1) ++ break; ++ ++ clipped = *box; ++ if (!box_intersect(&clipped, c++)) ++ continue; ++ ++ span_thread_add_box(sna, op, &clipped, AREA_TO_ALPHA(coverage)); + } +- pixman_region_fini(®ion); + } + + static span_func_t +@@ -1777,6 +1805,16 @@ thread_choose_span(struct sna_composite_spans_op *tmp, + return span; + } + ++inline static void ++span_thread_boxes_init(struct span_thread_boxes *boxes, ++ const struct sna_composite_spans_op *op, ++ const RegionRec *clip) ++{ ++ boxes->op = op; ++ region_get_boxes(clip, &boxes->clip_start, &boxes->clip_end); ++ boxes->num_boxes = 0; ++} ++ + static void + span_thread(void *arg) + { +@@ -1789,8 +1827,7 @@ span_thread(void *arg) + if (!tor_init(&tor, &thread->extents, 2*thread->ntrap)) + return; + +- boxes.op = thread->op; +- boxes.num_boxes = 0; ++ span_thread_boxes_init(&boxes, thread->op, thread->clip); + + y1 = thread->extents.y1 - thread->draw_y; + y2 = thread->extents.y2 - thread->draw_y; +@@ -2190,6 +2227,52 @@ static void _tor_blt_src(struct inplace *in, const BoxRec *box, uint8_t v) + } while (--h); + } + ++struct clipped_span { ++ span_func_t span; ++ const BoxRec *clip_start, *clip_end; ++}; ++ ++static void ++tor_blt_clipped(struct sna *sna, ++ struct sna_composite_spans_op *op, ++ pixman_region16_t *clip, ++ const BoxRec *box, ++ int coverage) ++{ ++ struct clipped_span *cs = (struct clipped_span *)clip; ++ const BoxRec *c; ++ ++ cs->clip_start = ++ find_clip_box_for_y(cs->clip_start, cs->clip_end, box->y1); ++ ++ c = cs->clip_start; ++ while (c != cs->clip_end) { ++ BoxRec clipped; ++ ++ if (box->y2 <= c->y1) ++ break; ++ ++ clipped = *box; ++ if (!box_intersect(&clipped, c++)) ++ continue; ++ ++ cs->span(sna, op, NULL, &clipped, coverage); ++ } ++} ++ ++inline static span_func_t ++clipped_span(struct clipped_span *cs, ++ span_func_t span, ++ const RegionRec *clip) ++{ ++ if (clip->data) { ++ cs->span = span; ++ region_get_boxes(clip, &cs->clip_start, &cs->clip_end); ++ span = tor_blt_clipped; ++ } ++ return span; ++} ++ + static void + tor_blt_src(struct sna *sna, + struct sna_composite_spans_op *op, +@@ -2203,25 +2286,6 @@ tor_blt_src(struct sna *sna, + } + + static void +-tor_blt_src_clipped(struct sna *sna, +- struct sna_composite_spans_op *op, +- pixman_region16_t *clip, +- const BoxRec *box, +- int coverage) +-{ +- pixman_region16_t region; +- int n; +- +- pixman_region_init_rects(®ion, box, 1); +- RegionIntersect(®ion, ®ion, clip); +- n = region_num_rects(®ion); +- box = region_rects(®ion); +- while (n--) +- tor_blt_src(sna, op, NULL, box++, coverage); +- pixman_region_fini(®ion); +-} +- +-static void + tor_blt_in(struct sna *sna, + struct sna_composite_spans_op *op, + pixman_region16_t *clip, +@@ -2253,25 +2317,6 @@ tor_blt_in(struct sna *sna, + } + + static void +-tor_blt_in_clipped(struct sna *sna, +- struct sna_composite_spans_op *op, +- pixman_region16_t *clip, +- const BoxRec *box, +- int coverage) +-{ +- pixman_region16_t region; +- int n; +- +- pixman_region_init_rects(®ion, box, 1); +- RegionIntersect(®ion, ®ion, clip); +- n = region_num_rects(®ion); +- box = region_rects(®ion); +- while (n--) +- tor_blt_in(sna, op, NULL, box++, coverage); +- pixman_region_fini(®ion); +-} +- +-static void + tor_blt_add(struct sna *sna, + struct sna_composite_spans_op *op, + pixman_region16_t *clip, +@@ -2310,25 +2355,6 @@ tor_blt_add(struct sna *sna, + } + + static void +-tor_blt_add_clipped(struct sna *sna, +- struct sna_composite_spans_op *op, +- pixman_region16_t *clip, +- const BoxRec *box, +- int coverage) +-{ +- pixman_region16_t region; +- int n; +- +- pixman_region_init_rects(®ion, box, 1); +- RegionIntersect(®ion, ®ion, clip); +- n = region_num_rects(®ion); +- box = region_rects(®ion); +- while (n--) +- tor_blt_add(sna, op, NULL, box++, coverage); +- pixman_region_fini(®ion); +-} +- +-static void + tor_blt_lerp32(struct sna *sna, + struct sna_composite_spans_op *op, + pixman_region16_t *clip, +@@ -2343,6 +2369,7 @@ tor_blt_lerp32(struct sna *sna, + if (coverage == 0) + return; + ++ sigtrap_assert_active(); + ptr += box->y1 * stride + box->x1; + + h = box->y2 - box->y1; +@@ -2383,25 +2410,6 @@ tor_blt_lerp32(struct sna *sna, + } + } + +-static void +-tor_blt_lerp32_clipped(struct sna *sna, +- struct sna_composite_spans_op *op, +- pixman_region16_t *clip, +- const BoxRec *box, +- int coverage) +-{ +- pixman_region16_t region; +- int n; +- +- pixman_region_init_rects(®ion, box, 1); +- RegionIntersect(®ion, ®ion, clip); +- n = region_num_rects(®ion); +- box = region_rects(®ion); +- while (n--) +- tor_blt_lerp32(sna, op, NULL, box++, coverage); +- pixman_region_fini(®ion); +-} +- + struct pixman_inplace { + pixman_image_t *image, *source, *mask; + uint32_t color; +@@ -2431,24 +2439,6 @@ pixmask_span_solid(struct sna *sna, + pi->dx + box->x1, pi->dy + box->y1, + box->x2 - box->x1, box->y2 - box->y1); + } +-static void +-pixmask_span_solid__clipped(struct sna *sna, +- struct sna_composite_spans_op *op, +- pixman_region16_t *clip, +- const BoxRec *box, +- int coverage) +-{ +- pixman_region16_t region; +- int n; +- +- pixman_region_init_rects(®ion, box, 1); +- RegionIntersect(®ion, ®ion, clip); +- n = region_num_rects(®ion); +- box = region_rects(®ion); +- while (n--) +- pixmask_span_solid(sna, op, NULL, box++, coverage); +- pixman_region_fini(®ion); +-} + + static void + pixmask_span(struct sna *sna, +@@ -2471,24 +2461,6 @@ pixmask_span(struct sna *sna, + pi->dx + box->x1, pi->dy + box->y1, + box->x2 - box->x1, box->y2 - box->y1); + } +-static void +-pixmask_span__clipped(struct sna *sna, +- struct sna_composite_spans_op *op, +- pixman_region16_t *clip, +- const BoxRec *box, +- int coverage) +-{ +- pixman_region16_t region; +- int n; +- +- pixman_region_init_rects(®ion, box, 1); +- RegionIntersect(®ion, ®ion, clip); +- n = region_num_rects(®ion); +- box = region_rects(®ion); +- while (n--) +- pixmask_span(sna, op, NULL, box++, coverage); +- pixman_region_fini(®ion); +-} + + struct inplace_x8r8g8b8_thread { + xTrapezoid *traps; +@@ -2507,6 +2479,7 @@ static void inplace_x8r8g8b8_thread(void *arg) + struct inplace_x8r8g8b8_thread *thread = arg; + struct tor tor; + span_func_t span; ++ struct clipped_span clipped; + RegionPtr clip; + int y1, y2, n; + +@@ -2537,12 +2510,11 @@ static void inplace_x8r8g8b8_thread(void *arg) + inplace.stride = pixmap->devKind; + inplace.color = thread->color; + +- if (clip->data) +- span = tor_blt_lerp32_clipped; +- else +- span = tor_blt_lerp32; ++ span = clipped_span(&clipped, tor_blt_lerp32, clip); + +- tor_render(NULL, &tor, (void*)&inplace, clip, span, false); ++ tor_render(NULL, &tor, ++ (void*)&inplace, (void*)&clipped, ++ span, false); + } else if (thread->is_solid) { + struct pixman_inplace pi; + +@@ -2555,12 +2527,11 @@ static void inplace_x8r8g8b8_thread(void *arg) + 1, 1, pi.bits, 0); + pixman_image_set_repeat(pi.source, PIXMAN_REPEAT_NORMAL); + +- if (clip->data) +- span = pixmask_span_solid__clipped; +- else +- span = pixmask_span_solid; ++ span = clipped_span(&clipped, pixmask_span_solid, clip); + +- tor_render(NULL, &tor, (void*)&pi, clip, span, false); ++ tor_render(NULL, &tor, ++ (void*)&pi, (void *)&clipped, ++ span, false); + + pixman_image_unref(pi.source); + pixman_image_unref(pi.image); +@@ -2579,12 +2550,11 @@ static void inplace_x8r8g8b8_thread(void *arg) + pi.bits = pixman_image_get_data(pi.mask); + pi.op = thread->op; + +- if (clip->data) +- span = pixmask_span__clipped; +- else +- span = pixmask_span; ++ span = clipped_span(&clipped, pixmask_span, clip); + +- tor_render(NULL, &tor, (void*)&pi, clip, span, false); ++ tor_render(NULL, &tor, ++ (void*)&pi, (void *)&clipped, ++ span, false); + + pixman_image_unref(pi.mask); + pixman_image_unref(pi.source); +@@ -2698,6 +2668,7 @@ trapezoid_span_inplace__x8r8g8b8(CARD8 op, + if (num_threads == 1) { + struct tor tor; + span_func_t span; ++ struct clipped_span clipped; + + if (!tor_init(&tor, ®ion.extents, 2*ntrap)) + return true; +@@ -2723,17 +2694,15 @@ trapezoid_span_inplace__x8r8g8b8(CARD8 op, + inplace.stride = pixmap->devKind; + inplace.color = color; + +- if (dst->pCompositeClip->data) +- span = tor_blt_lerp32_clipped; +- else +- span = tor_blt_lerp32; ++ span = clipped_span(&clipped, tor_blt_lerp32, dst->pCompositeClip); + + DBG(("%s: render inplace op=%d, color=%08x\n", + __FUNCTION__, op, color)); + + if (sigtrap_get() == 0) { +- tor_render(NULL, &tor, (void*)&inplace, +- dst->pCompositeClip, span, false); ++ tor_render(NULL, &tor, ++ (void*)&inplace, (void*)&clipped, ++ span, false); + sigtrap_put(); + } + } else if (is_solid) { +@@ -2748,15 +2717,12 @@ trapezoid_span_inplace__x8r8g8b8(CARD8 op, + 1, 1, pi.bits, 0); + pixman_image_set_repeat(pi.source, PIXMAN_REPEAT_NORMAL); + +- if (dst->pCompositeClip->data) +- span = pixmask_span_solid__clipped; +- else +- span = pixmask_span_solid; ++ span = clipped_span(&clipped, pixmask_span_solid, dst->pCompositeClip); + + if (sigtrap_get() == 0) { +- tor_render(NULL, &tor, (void*)&pi, +- dst->pCompositeClip, span, +- false); ++ tor_render(NULL, &tor, ++ (void*)&pi, (void*)&clipped, ++ span, false); + sigtrap_put(); + } + +@@ -2777,15 +2743,12 @@ trapezoid_span_inplace__x8r8g8b8(CARD8 op, + pi.bits = pixman_image_get_data(pi.mask); + pi.op = op; + +- if (dst->pCompositeClip->data) +- span = pixmask_span__clipped; +- else +- span = pixmask_span; ++ span = clipped_span(&clipped, pixmask_span, dst->pCompositeClip); + + if (sigtrap_get() == 0) { +- tor_render(NULL, &tor, (void*)&pi, +- dst->pCompositeClip, span, +- false); ++ tor_render(NULL, &tor, ++ (void*)&pi, (void*)&clipped, ++ span, false); + sigtrap_put(); + } + +@@ -2847,9 +2810,9 @@ trapezoid_span_inplace__x8r8g8b8(CARD8 op, + + struct inplace_thread { + xTrapezoid *traps; +- RegionPtr clip; + span_func_t span; + struct inplace inplace; ++ struct clipped_span clipped; + BoxRec extents; + int dx, dy; + int draw_x, draw_y; +@@ -2874,8 +2837,9 @@ static void inplace_thread(void *arg) + tor_add_trapezoid(&tor, &thread->traps[n], thread->dx, thread->dy); + } + +- tor_render(NULL, &tor, (void*)&thread->inplace, +- thread->clip, thread->span, thread->unbounded); ++ tor_render(NULL, &tor, ++ (void*)&thread->inplace, (void*)&thread->clipped, ++ thread->span, thread->unbounded); + + tor_fini(&tor); + } +@@ -2889,6 +2853,7 @@ imprecise_trapezoid_span_inplace(struct sna *sna, + bool fallback) + { + struct inplace inplace; ++ struct clipped_span clipped; + span_func_t span; + PixmapPtr pixmap; + struct sna_pixmap *priv; +@@ -3005,21 +2970,12 @@ imprecise_trapezoid_span_inplace(struct sna *sna, + region.extents.x2, region.extents.y2)); + + if (op == PictOpSrc) { +- if (dst->pCompositeClip->data) +- span = tor_blt_src_clipped; +- else +- span = tor_blt_src; ++ span = tor_blt_src; + } else if (op == PictOpIn) { +- if (dst->pCompositeClip->data) +- span = tor_blt_in_clipped; +- else +- span = tor_blt_in; ++ span = tor_blt_in; + } else { + assert(op == PictOpAdd); +- if (dst->pCompositeClip->data) +- span = tor_blt_add_clipped; +- else +- span = tor_blt_add; ++ span = tor_blt_add; + } + + DBG(("%s: move-to-cpu\n", __FUNCTION__)); +@@ -3037,6 +2993,8 @@ imprecise_trapezoid_span_inplace(struct sna *sna, + inplace.stride = pixmap->devKind; + inplace.opacity = color >> 24; + ++ span = clipped_span(&clipped, span, dst->pCompositeClip); ++ + num_threads = 1; + if ((flags & COMPOSITE_SPANS_RECTILINEAR) == 0) + num_threads = sna_use_threads(region.extents.x2 - region.extents.x1, +@@ -3057,8 +3015,9 @@ imprecise_trapezoid_span_inplace(struct sna *sna, + } + + if (sigtrap_get() == 0) { +- tor_render(NULL, &tor, (void*)&inplace, +- dst->pCompositeClip, span, unbounded); ++ tor_render(NULL, &tor, ++ (void*)&inplace, (void *)&clipped, ++ span, unbounded); + sigtrap_put(); + } + +@@ -3075,8 +3034,8 @@ imprecise_trapezoid_span_inplace(struct sna *sna, + threads[0].traps = traps; + threads[0].ntrap = ntrap; + threads[0].inplace = inplace; ++ threads[0].clipped = clipped; + threads[0].extents = region.extents; +- threads[0].clip = dst->pCompositeClip; + threads[0].span = span; + threads[0].unbounded = unbounded; + threads[0].dx = dx; +@@ -3707,8 +3666,7 @@ tristrip_thread(void *arg) + if (!tor_init(&tor, &thread->extents, 2*thread->count)) + return; + +- boxes.op = thread->op; +- boxes.num_boxes = 0; ++ span_thread_boxes_init(&boxes, thread->op, thread->clip); + + cw = 0; ccw = 1; + polygon_add_line(tor.polygon, +@@ -3874,7 +3832,7 @@ imprecise_tristrip_span_converter(struct sna *sna, + break; + } while (1); + polygon_add_line(tor.polygon, +- &points[cw], &points[2+ccw], ++ &points[cw], &points[ccw], + dx, dy); + assert(tor.polygon->num_edges <= 2*count); + +diff --git a/src/sna/sna_trapezoids_mono.c b/src/sna/sna_trapezoids_mono.c +index 808703a9..07a7867d 100644 +--- a/src/sna/sna_trapezoids_mono.c ++++ b/src/sna/sna_trapezoids_mono.c +@@ -72,13 +72,14 @@ struct mono { + struct sna *sna; + struct sna_composite_op op; + pixman_region16_t clip; ++ const BoxRec *clip_start, *clip_end; + + fastcall void (*span)(struct mono *, int, int, BoxPtr); + + struct mono_polygon polygon; + }; + +-#define I(x) pixman_fixed_to_int ((x) + pixman_fixed_1_minus_e/2) ++#define I(x) pixman_fixed_to_int((x) + pixman_fixed_1_minus_e/2) + + static struct quorem + floored_muldivrem(int32_t x, int32_t a, int32_t b) +@@ -249,22 +250,22 @@ mono_add_line(struct mono *mono, + + e->dxdy = floored_muldivrem(dx, pixman_fixed_1, dy); + +- e->x = floored_muldivrem((ytop - dst_y) * pixman_fixed_1 + pixman_fixed_1_minus_e/2 - p1->y, ++ e->x = floored_muldivrem((ytop - dst_y) * pixman_fixed_1 + pixman_fixed_1/2 - p1->y, + dx, dy); + e->x.quo += p1->x; + e->x.rem -= dy; + + e->dy = dy; +- +- __DBG(("%s: initial x=%d [%d.%d/%d] + dxdy=%d.%d/%d\n", +- __FUNCTION__, +- I(e->x.quo), e->x.quo, e->x.rem, e->dy, +- e->dxdy.quo, e->dxdy.rem, e->dy)); + } + e->x.quo += dst_x*pixman_fixed_1; ++ __DBG(("%s: initial x=%d [%d.%d/%d] + dxdy=%d.%d/%d\n", ++ __FUNCTION__, ++ I(e->x.quo), e->x.quo, e->x.rem, e->dy, ++ e->dxdy.quo, e->dxdy.rem, e->dy)); + + { + struct mono_edge **ptail = &polygon->y_buckets[ytop - mono->clip.extents.y1]; ++ assert(ytop - mono->clip.extents.y1 < mono->clip.extents.y2 - mono->clip.extents.y1); + if (*ptail) + (*ptail)->prev = e; + e->next = *ptail; +@@ -368,6 +369,10 @@ static struct mono_edge *mono_filter(struct mono_edge *edges) + e->x.rem == n->x.rem && + e->dxdy.quo == n->dxdy.quo && + e->dxdy.rem == n->dxdy.rem) { ++ assert(e->dy == n->dy); ++ __DBG(("%s: discarding cancellation pair (%d.%d) + (%d.%d)\n", ++ __FUNCTION__, e->x.quo, e->x.rem, e->dxdy.quo, e->dxdy.rem)); ++ + if (e->prev) + e->prev->next = n->next; + else +@@ -378,8 +383,11 @@ static struct mono_edge *mono_filter(struct mono_edge *edges) + break; + + e = n->next; +- } else ++ } else { ++ __DBG(("%s: adding edge (%d.%d) + (%d.%d)/%d, height=%d\n", ++ __FUNCTION__, n->x.quo, n->x.rem, n->dxdy.quo, n->dxdy.rem, n->dy, n->height_left)); + e = n; ++ } + } + + return edges; +@@ -474,6 +482,34 @@ mono_span__fast(struct mono *c, int x1, int x2, BoxPtr box) + c->op.box(c->sna, &c->op, box); + } + ++fastcall static void ++mono_span__clipped(struct mono *c, int x1, int x2, BoxPtr box) ++{ ++ const BoxRec *b; ++ ++ __DBG(("%s [%d, %d]\n", __FUNCTION__, x1, x2)); ++ ++ c->clip_start = ++ find_clip_box_for_y(c->clip_start, c->clip_end, box->y1); ++ ++ b = c->clip_start; ++ while (b != c->clip_end) { ++ BoxRec clipped; ++ ++ if (box->y2 <= b->y1) ++ break; ++ ++ clipped.x1 = x1; ++ clipped.x2 = x2; ++ clipped.y1 = box->y1; ++ clipped.y2 = box->y2; ++ if (!box_intersect(&clipped, b++)) ++ continue; ++ ++ c->op.box(c->sna, &c->op, &clipped); ++ } ++} ++ + struct mono_span_thread_boxes { + const struct sna_composite_op *op; + #define MONO_SPAN_MAX_BOXES (8192/sizeof(BoxRec)) +@@ -482,40 +518,45 @@ struct mono_span_thread_boxes { + }; + + inline static void +-thread_mono_span_add_boxes(struct mono *c, const BoxRec *box, int count) ++thread_mono_span_add_box(struct mono *c, const BoxRec *box) + { + struct mono_span_thread_boxes *b = c->op.priv; + +- assert(count > 0 && count <= MONO_SPAN_MAX_BOXES); +- if (unlikely(b->num_boxes + count > MONO_SPAN_MAX_BOXES)) { ++ if (unlikely(b->num_boxes == MONO_SPAN_MAX_BOXES)) { + b->op->thread_boxes(c->sna, b->op, b->boxes, b->num_boxes); + b->num_boxes = 0; + } + +- memcpy(b->boxes + b->num_boxes, box, count*sizeof(BoxRec)); +- b->num_boxes += count; ++ b->boxes[b->num_boxes++] = *box; + assert(b->num_boxes <= MONO_SPAN_MAX_BOXES); + } + + fastcall static void + thread_mono_span_clipped(struct mono *c, int x1, int x2, BoxPtr box) + { +- pixman_region16_t region; ++ const BoxRec *b; + + __DBG(("%s [%d, %d]\n", __FUNCTION__, x1, x2)); + +- box->x1 = x1; +- box->x2 = x2; ++ c->clip_start = ++ find_clip_box_for_y(c->clip_start, c->clip_end, box->y1); + +- assert(c->clip.data); ++ b = c->clip_start; ++ while (b != c->clip_end) { ++ BoxRec clipped; ++ ++ if (box->y2 <= b->y1) ++ break; ++ ++ clipped.x1 = x1; ++ clipped.x2 = x2; ++ clipped.y1 = box->y1; ++ clipped.y2 = box->y2; ++ if (!box_intersect(&clipped, b++)) ++ continue; + +- pixman_region_init_rects(®ion, box, 1); +- RegionIntersect(®ion, ®ion, &c->clip); +- if (region_num_rects(®ion)) +- thread_mono_span_add_boxes(c, +- region_rects(®ion), +- region_num_rects(®ion)); +- pixman_region_fini(®ion); ++ thread_mono_span_add_box(c, &clipped); ++ } + } + + fastcall static void +@@ -525,7 +566,7 @@ thread_mono_span(struct mono *c, int x1, int x2, BoxPtr box) + + box->x1 = x1; + box->x2 = x2; +- thread_mono_span_add_boxes(c, box, 1); ++ thread_mono_span_add_box(c, box); + } + + inline static void +@@ -537,6 +578,8 @@ mono_row(struct mono *c, int16_t y, int16_t h) + int winding = 0; + BoxRec box; + ++ __DBG(("%s: y=%d, h=%d\n", __FUNCTION__, y, h)); ++ + DBG_MONO_EDGES(edge); + VALIDATE_MONO_EDGES(&c->head); + +@@ -547,6 +590,8 @@ mono_row(struct mono *c, int16_t y, int16_t h) + struct mono_edge *next = edge->next; + int16_t xend = I(edge->x.quo); + ++ __DBG(("%s: adding edge dir=%d [winding=%d], x=%d [%d]\n", ++ __FUNCTION__, edge->dir, winding + edge->dir, xend, edge->x.quo)); + if (--edge->height_left) { + if (edge->dy) { + edge->x.quo += edge->dxdy.quo; +@@ -555,6 +600,8 @@ mono_row(struct mono *c, int16_t y, int16_t h) + ++edge->x.quo; + edge->x.rem -= edge->dy; + } ++ __DBG(("%s: stepped edge (%d.%d) + (%d.%d)/%d, height=%d, prev_x=%d\n", ++ __FUNCTION__, edge->x.quo, edge->x.rem, edge->dxdy.quo, edge->dxdy.rem, edge->dy, edge->height_left, edge->x.quo)); + } + + if (edge->x.quo < prev_x) { +@@ -578,17 +625,22 @@ mono_row(struct mono *c, int16_t y, int16_t h) + winding += edge->dir; + if (winding == 0) { + assert(I(next->x.quo) >= xend); +- if (I(next->x.quo) > xend + 1) { ++ if (I(next->x.quo) > xend) { ++ __DBG(("%s: end span: %d\n", __FUNCTION__, xend)); + if (xstart < c->clip.extents.x1) + xstart = c->clip.extents.x1; + if (xend > c->clip.extents.x2) + xend = c->clip.extents.x2; +- if (xend > xstart) ++ if (xend > xstart) { ++ __DBG(("%s: emit span [%d, %d]\n", __FUNCTION__, xstart, xend)); + c->span(c, xstart, xend, &box); ++ } + xstart = INT16_MIN; + } +- } else if (xstart == INT16_MIN) ++ } else if (xstart == INT16_MIN) { ++ __DBG(("%s: starting new span: %d\n", __FUNCTION__, xend)); + xstart = xend; ++ } + + edge = next; + } +@@ -650,9 +702,14 @@ mono_render(struct mono *mono) + for (i = 0; i < h; i = j) { + j = i + 1; + ++ __DBG(("%s: row=%d, new edges? %d\n", __FUNCTION__, ++ i, polygon->y_buckets[i] != NULL)); ++ + if (polygon->y_buckets[i]) + mono_merge_edges(mono, polygon->y_buckets[i]); + ++ __DBG(("%s: row=%d, vertical? %d\n", __FUNCTION__, ++ i, mono->is_vertical)); + if (mono->is_vertical) { + struct mono_edge *e = mono->head.next; + int min_height = h - i; +@@ -667,6 +724,7 @@ mono_render(struct mono *mono) + j++; + if (j != i + 1) + mono_step_edges(mono, j - (i + 1)); ++ __DBG(("%s: %d vertical rows\n", __FUNCTION__, j-i)); + } + + mono_row(mono, i, j-i); +@@ -717,6 +775,7 @@ mono_span_thread(void *arg) + if (RegionNil(&mono.clip)) + return; + } ++ region_get_boxes(&mono.clip, &mono.clip_start, &mono.clip_end); + + boxes.op = thread->op; + boxes.num_boxes = 0; +@@ -891,9 +950,12 @@ mono_trapezoids_span_converter(struct sna *sna, + + if (mono.clip.data == NULL && mono.op.damage == NULL) + mono.span = mono_span__fast; ++ else if (mono.clip.data != NULL && mono.op.damage == NULL) ++ mono.span = mono_span__clipped; + else + mono.span = mono_span; + ++ region_get_boxes(&mono.clip, &mono.clip_start, &mono.clip_end); + mono_render(&mono); + mono.op.done(mono.sna, &mono.op); + mono_fini(&mono); +@@ -939,6 +1001,7 @@ mono_trapezoids_span_converter(struct sna *sna, + mono.clip.extents.x2 - mono.clip.extents.x1, + mono.clip.extents.y2 - mono.clip.extents.y1, + COMPOSITE_PARTIAL, memset(&mono.op, 0, sizeof(mono.op)))) { ++ region_get_boxes(&mono.clip, &mono.clip_start, &mono.clip_end); + mono_render(&mono); + mono.op.done(mono.sna, &mono.op); + } +@@ -974,6 +1037,7 @@ mono_inplace_fill_box(struct sna *sna, + box->x2 - box->x1, + box->y2 - box->y1, + fill->color)); ++ sigtrap_assert_active(); + pixman_fill(fill->data, fill->stride, fill->bpp, + box->x1, box->y1, + box->x2 - box->x1, +@@ -995,6 +1059,7 @@ mono_inplace_fill_boxes(struct sna *sna, + box->x2 - box->x1, + box->y2 - box->y1, + fill->color)); ++ sigtrap_assert_active(); + pixman_fill(fill->data, fill->stride, fill->bpp, + box->x1, box->y1, + box->x2 - box->x1, +@@ -1382,10 +1447,13 @@ mono_triangles_span_converter(struct sna *sna, + mono_render(&mono); + mono.op.done(mono.sna, &mono.op); + } ++ mono_fini(&mono); + + if (!was_clear && !operator_is_bounded(op)) { + xPointFixed p1, p2; + ++ DBG(("%s: performing unbounded clear\n", __FUNCTION__)); ++ + if (!mono_init(&mono, 2+3*count)) + return false; + +@@ -1431,7 +1499,6 @@ mono_triangles_span_converter(struct sna *sna, + mono_fini(&mono); + } + +- mono_fini(&mono); + REGION_UNINIT(NULL, &mono.clip); + return true; + } +diff --git a/src/sna/sna_trapezoids_precise.c b/src/sna/sna_trapezoids_precise.c +index 9187ab48..242b4acb 100644 +--- a/src/sna/sna_trapezoids_precise.c ++++ b/src/sna/sna_trapezoids_precise.c +@@ -1023,6 +1023,16 @@ tor_init(struct tor *converter, const BoxRec *box, int num_edges) + static void + tor_add_trapezoid(struct tor *tor, const xTrapezoid *t, int dx, int dy) + { ++ if (!xTrapezoidValid(t)) { ++ __DBG(("%s: skipping invalid trapezoid: top=%d, bottom=%d, left=(%d, %d), (%d, %d), right=(%d, %d), (%d, %d)\n", ++ __FUNCTION__, ++ t->top, t->bottom, ++ t->left.p1.x, t->left.p1.y, ++ t->left.p2.x, t->left.p2.y, ++ t->right.p1.x, t->right.p1.y, ++ t->right.p2.x, t->right.p2.y)); ++ return; ++ } + polygon_add_edge(tor->polygon, t, &t->left, 1, dx, dy); + polygon_add_edge(tor->polygon, t, &t->right, -1, dx, dy); + } +@@ -1635,31 +1645,27 @@ struct span_thread { + #define SPAN_THREAD_MAX_BOXES (8192/sizeof(struct sna_opacity_box)) + struct span_thread_boxes { + const struct sna_composite_spans_op *op; ++ const BoxRec *clip_start, *clip_end; + int num_boxes; + struct sna_opacity_box boxes[SPAN_THREAD_MAX_BOXES]; + }; + +-static void span_thread_add_boxes(struct sna *sna, void *data, +- const BoxRec *box, int count, float alpha) ++static void span_thread_add_box(struct sna *sna, void *data, ++ const BoxRec *box, float alpha) + { + struct span_thread_boxes *b = data; + +- __DBG(("%s: adding %d boxes with alpha=%f\n", +- __FUNCTION__, count, alpha)); ++ __DBG(("%s: adding box with alpha=%f\n", __FUNCTION__, alpha)); + +- assert(count > 0 && count <= SPAN_THREAD_MAX_BOXES); +- if (unlikely(b->num_boxes + count > SPAN_THREAD_MAX_BOXES)) { +- DBG(("%s: flushing %d boxes, adding %d\n", __FUNCTION__, b->num_boxes, count)); +- assert(b->num_boxes <= SPAN_THREAD_MAX_BOXES); ++ if (unlikely(b->num_boxes == SPAN_THREAD_MAX_BOXES)) { ++ DBG(("%s: flushing %d boxes\n", __FUNCTION__, b->num_boxes)); + b->op->thread_boxes(sna, b->op, b->boxes, b->num_boxes); + b->num_boxes = 0; + } + +- do { +- b->boxes[b->num_boxes].box = *box++; +- b->boxes[b->num_boxes].alpha = alpha; +- b->num_boxes++; +- } while (--count); ++ b->boxes[b->num_boxes].box = *box++; ++ b->boxes[b->num_boxes].alpha = alpha; ++ b->num_boxes++; + assert(b->num_boxes <= SPAN_THREAD_MAX_BOXES); + } + +@@ -1670,8 +1676,22 @@ span_thread_box(struct sna *sna, + const BoxRec *box, + int coverage) + { ++ struct span_thread_boxes *b = (struct span_thread_boxes *)op; ++ + __DBG(("%s: %d -> %d @ %d\n", __FUNCTION__, box->x1, box->x2, coverage)); +- span_thread_add_boxes(sna, op, box, 1, AREA_TO_FLOAT(coverage)); ++ if (b->num_boxes) { ++ struct sna_opacity_box *bb = &b->boxes[b->num_boxes-1]; ++ if (bb->box.x1 == box->x1 && ++ bb->box.x2 == box->x2 && ++ bb->box.y2 == box->y1 && ++ bb->alpha == AREA_TO_FLOAT(coverage)) { ++ bb->box.y2 = box->y2; ++ __DBG(("%s: contracted double row: %d -> %d\n", __func__, bb->box.y1, bb->box.y2)); ++ return; ++ } ++ } ++ ++ span_thread_add_box(sna, op, box, AREA_TO_FLOAT(coverage)); + } + + static void +@@ -1681,20 +1701,28 @@ span_thread_clipped_box(struct sna *sna, + const BoxRec *box, + int coverage) + { +- pixman_region16_t region; ++ struct span_thread_boxes *b = (struct span_thread_boxes *)op; ++ const BoxRec *c; + + __DBG(("%s: %d -> %d @ %f\n", __FUNCTION__, box->x1, box->x2, + AREA_TO_FLOAT(coverage))); + +- pixman_region_init_rects(®ion, box, 1); +- RegionIntersect(®ion, ®ion, clip); +- if (region_num_rects(®ion)) { +- span_thread_add_boxes(sna, op, +- region_rects(®ion), +- region_num_rects(®ion), +- AREA_TO_FLOAT(coverage)); ++ b->clip_start = ++ find_clip_box_for_y(b->clip_start, b->clip_end, box->y1); ++ ++ c = b->clip_start; ++ while (c != b->clip_end) { ++ BoxRec clipped; ++ ++ if (box->y2 <= c->y1) ++ break; ++ ++ clipped = *box; ++ if (!box_intersect(&clipped, c++)) ++ continue; ++ ++ span_thread_add_box(sna, op, &clipped, AREA_TO_FLOAT(coverage)); + } +- pixman_region_fini(®ion); + } + + static span_func_t +@@ -1712,7 +1740,7 @@ thread_choose_span(struct sna_composite_spans_op *tmp, + + assert(!is_mono(dst, maskFormat)); + assert(tmp->thread_boxes); +- DBG(("%s: clipped? %d\n", __FUNCTION__, clip->data != NULL)); ++ DBG(("%s: clipped? %d x %d\n", __FUNCTION__, clip->data != NULL, region_num_rects(clip))); + if (clip->data) + span = span_thread_clipped_box; + else +@@ -1721,6 +1749,17 @@ thread_choose_span(struct sna_composite_spans_op *tmp, + return span; + } + ++inline static void ++span_thread_boxes_init(struct span_thread_boxes *boxes, ++ const struct sna_composite_spans_op *op, ++ const RegionRec *clip) ++{ ++ boxes->op = op; ++ boxes->clip_start = region_rects(clip); ++ boxes->clip_end = boxes->clip_start + region_num_rects(clip); ++ boxes->num_boxes = 0; ++} ++ + static void + span_thread(void *arg) + { +@@ -1733,8 +1772,7 @@ span_thread(void *arg) + if (!tor_init(&tor, &thread->extents, 2*thread->ntrap)) + return; + +- boxes.op = thread->op; +- boxes.num_boxes = 0; ++ span_thread_boxes_init(&boxes, thread->op, thread->clip); + + y1 = thread->extents.y1 - thread->draw_y; + y2 = thread->extents.y2 - thread->draw_y; +@@ -2183,6 +2221,52 @@ static force_inline uint8_t coverage_opacity(int coverage, uint8_t opacity) + return opacity == 255 ? coverage : mul_8_8(coverage, opacity); + } + ++struct clipped_span { ++ span_func_t span; ++ const BoxRec *clip_start, *clip_end; ++}; ++ ++static void ++tor_blt_clipped(struct sna *sna, ++ struct sna_composite_spans_op *op, ++ pixman_region16_t *clip, ++ const BoxRec *box, ++ int coverage) ++{ ++ struct clipped_span *cs = (struct clipped_span *)clip; ++ const BoxRec *c; ++ ++ cs->clip_start = ++ find_clip_box_for_y(cs->clip_start, cs->clip_end, box->y1); ++ ++ c = cs->clip_start; ++ while (c != cs->clip_end) { ++ BoxRec clipped; ++ ++ if (box->y2 <= c->y1) ++ break; ++ ++ clipped = *box; ++ if (!box_intersect(&clipped, c++)) ++ continue; ++ ++ cs->span(sna, op, NULL, &clipped, coverage); ++ } ++} ++ ++inline static span_func_t ++clipped_span(struct clipped_span *cs, ++ span_func_t span, ++ const RegionRec *clip) ++{ ++ if (clip->data) { ++ cs->span = span; ++ region_get_boxes(clip, &cs->clip_start, &cs->clip_end); ++ span = tor_blt_clipped; ++ } ++ return span; ++} ++ + static void _tor_blt_src(struct inplace *in, const BoxRec *box, uint8_t v) + { + uint8_t *ptr = in->ptr; +@@ -2218,25 +2302,6 @@ tor_blt_src(struct sna *sna, + } + + static void +-tor_blt_src_clipped(struct sna *sna, +- struct sna_composite_spans_op *op, +- pixman_region16_t *clip, +- const BoxRec *box, +- int coverage) +-{ +- pixman_region16_t region; +- int n; +- +- pixman_region_init_rects(®ion, box, 1); +- RegionIntersect(®ion, ®ion, clip); +- n = region_num_rects(®ion); +- box = region_rects(®ion); +- while (n--) +- tor_blt_src(sna, op, NULL, box++, coverage); +- pixman_region_fini(®ion); +-} +- +-static void + tor_blt_in(struct sna *sna, + struct sna_composite_spans_op *op, + pixman_region16_t *clip, +@@ -2268,25 +2333,6 @@ tor_blt_in(struct sna *sna, + } + + static void +-tor_blt_in_clipped(struct sna *sna, +- struct sna_composite_spans_op *op, +- pixman_region16_t *clip, +- const BoxRec *box, +- int coverage) +-{ +- pixman_region16_t region; +- int n; +- +- pixman_region_init_rects(®ion, box, 1); +- RegionIntersect(®ion, ®ion, clip); +- n = region_num_rects(®ion); +- box = region_rects(®ion); +- while (n--) +- tor_blt_in(sna, op, NULL, box++, coverage); +- pixman_region_fini(®ion); +-} +- +-static void + tor_blt_add(struct sna *sna, + struct sna_composite_spans_op *op, + pixman_region16_t *clip, +@@ -2325,25 +2371,6 @@ tor_blt_add(struct sna *sna, + } + + static void +-tor_blt_add_clipped(struct sna *sna, +- struct sna_composite_spans_op *op, +- pixman_region16_t *clip, +- const BoxRec *box, +- int coverage) +-{ +- pixman_region16_t region; +- int n; +- +- pixman_region_init_rects(®ion, box, 1); +- RegionIntersect(®ion, ®ion, clip); +- n = region_num_rects(®ion); +- box = region_rects(®ion); +- while (n--) +- tor_blt_add(sna, op, NULL, box++, coverage); +- pixman_region_fini(®ion); +-} +- +-static void + tor_blt_lerp32(struct sna *sna, + struct sna_composite_spans_op *op, + pixman_region16_t *clip, +@@ -2358,6 +2385,7 @@ tor_blt_lerp32(struct sna *sna, + if (coverage == 0) + return; + ++ sigtrap_assert_active(); + ptr += box->y1 * stride + box->x1; + + h = box->y2 - box->y1; +@@ -2396,25 +2424,6 @@ tor_blt_lerp32(struct sna *sna, + } + } + +-static void +-tor_blt_lerp32_clipped(struct sna *sna, +- struct sna_composite_spans_op *op, +- pixman_region16_t *clip, +- const BoxRec *box, +- int coverage) +-{ +- pixman_region16_t region; +- int n; +- +- pixman_region_init_rects(®ion, box, 1); +- RegionIntersect(®ion, ®ion, clip); +- n = region_num_rects(®ion); +- box = region_rects(®ion); +- while (n--) +- tor_blt_lerp32(sna, op, NULL, box++, coverage); +- pixman_region_fini(®ion); +-} +- + struct pixman_inplace { + pixman_image_t *image, *source, *mask; + uint32_t color; +@@ -2442,24 +2451,6 @@ pixmask_span_solid(struct sna *sna, + pi->dx + box->x1, pi->dy + box->y1, + box->x2 - box->x1, box->y2 - box->y1); + } +-static void +-pixmask_span_solid__clipped(struct sna *sna, +- struct sna_composite_spans_op *op, +- pixman_region16_t *clip, +- const BoxRec *box, +- int coverage) +-{ +- pixman_region16_t region; +- int n; +- +- pixman_region_init_rects(®ion, box, 1); +- RegionIntersect(®ion, ®ion, clip); +- n = region_num_rects(®ion); +- box = region_rects(®ion); +- while (n--) +- pixmask_span_solid(sna, op, NULL, box++, coverage); +- pixman_region_fini(®ion); +-} + + static void + pixmask_span(struct sna *sna, +@@ -2480,24 +2471,6 @@ pixmask_span(struct sna *sna, + pi->dx + box->x1, pi->dy + box->y1, + box->x2 - box->x1, box->y2 - box->y1); + } +-static void +-pixmask_span__clipped(struct sna *sna, +- struct sna_composite_spans_op *op, +- pixman_region16_t *clip, +- const BoxRec *box, +- int coverage) +-{ +- pixman_region16_t region; +- int n; +- +- pixman_region_init_rects(®ion, box, 1); +- RegionIntersect(®ion, ®ion, clip); +- n = region_num_rects(®ion); +- box = region_rects(®ion); +- while (n--) +- pixmask_span(sna, op, NULL, box++, coverage); +- pixman_region_fini(®ion); +-} + + struct inplace_x8r8g8b8_thread { + xTrapezoid *traps; +@@ -2516,6 +2489,7 @@ static void inplace_x8r8g8b8_thread(void *arg) + struct inplace_x8r8g8b8_thread *thread = arg; + struct tor tor; + span_func_t span; ++ struct clipped_span clipped; + RegionPtr clip; + int y1, y2, n; + +@@ -2546,12 +2520,11 @@ static void inplace_x8r8g8b8_thread(void *arg) + inplace.stride = pixmap->devKind; + inplace.color = thread->color; + +- if (clip->data) +- span = tor_blt_lerp32_clipped; +- else +- span = tor_blt_lerp32; ++ span = clipped_span(&clipped, tor_blt_lerp32, clip); + +- tor_render(NULL, &tor, (void*)&inplace, clip, span, false); ++ tor_render(NULL, &tor, ++ (void*)&inplace, (void *)&clipped, ++ span, false); + } else if (thread->is_solid) { + struct pixman_inplace pi; + +@@ -2564,10 +2537,7 @@ static void inplace_x8r8g8b8_thread(void *arg) + 1, 1, pi.bits, 0); + pixman_image_set_repeat(pi.source, PIXMAN_REPEAT_NORMAL); + +- if (clip->data) +- span = pixmask_span_solid__clipped; +- else +- span = pixmask_span_solid; ++ span = clipped_span(&clipped, pixmask_span_solid, clip); + + tor_render(NULL, &tor, (void*)&pi, clip, span, false); + +@@ -2588,12 +2558,11 @@ static void inplace_x8r8g8b8_thread(void *arg) + pi.bits = pixman_image_get_data(pi.mask); + pi.op = thread->op; + +- if (clip->data) +- span = pixmask_span__clipped; +- else +- span = pixmask_span; ++ span = clipped_span(&clipped, pixmask_span, clip); + +- tor_render(NULL, &tor, (void*)&pi, clip, span, false); ++ tor_render(NULL, &tor, ++ (void*)&pi, (void *)&clipped, ++ span, false); + + pixman_image_unref(pi.mask); + pixman_image_unref(pi.source); +@@ -2712,6 +2681,7 @@ trapezoid_span_inplace__x8r8g8b8(CARD8 op, + if (num_threads == 1) { + struct tor tor; + span_func_t span; ++ struct clipped_span clipped; + + if (!tor_init(&tor, ®ion.extents, 2*ntrap)) + return true; +@@ -2737,17 +2707,14 @@ trapezoid_span_inplace__x8r8g8b8(CARD8 op, + inplace.stride = pixmap->devKind; + inplace.color = color; + +- if (dst->pCompositeClip->data) +- span = tor_blt_lerp32_clipped; +- else +- span = tor_blt_lerp32; +- ++ span = clipped_span(&clipped, tor_blt_lerp32, dst->pCompositeClip); + DBG(("%s: render inplace op=%d, color=%08x\n", + __FUNCTION__, op, color)); + + if (sigtrap_get() == 0) { +- tor_render(NULL, &tor, (void*)&inplace, +- dst->pCompositeClip, span, false); ++ tor_render(NULL, &tor, ++ (void*)&inplace, (void*)&clipped, ++ span, false); + sigtrap_put(); + } + } else if (is_solid) { +@@ -2762,15 +2729,11 @@ trapezoid_span_inplace__x8r8g8b8(CARD8 op, + 1, 1, pi.bits, 0); + pixman_image_set_repeat(pi.source, PIXMAN_REPEAT_NORMAL); + +- if (dst->pCompositeClip->data) +- span = pixmask_span_solid__clipped; +- else +- span = pixmask_span_solid; +- ++ span = clipped_span(&clipped, pixmask_span_solid, dst->pCompositeClip); + if (sigtrap_get() == 0) { +- tor_render(NULL, &tor, (void*)&pi, +- dst->pCompositeClip, span, +- false); ++ tor_render(NULL, &tor, ++ (void*)&pi, (void*)&clipped, ++ span, false); + sigtrap_put(); + } + +@@ -2791,15 +2754,11 @@ trapezoid_span_inplace__x8r8g8b8(CARD8 op, + pi.bits = pixman_image_get_data(pi.mask); + pi.op = op; + +- if (dst->pCompositeClip->data) +- span = pixmask_span__clipped; +- else +- span = pixmask_span; +- ++ span = clipped_span(&clipped, pixmask_span, dst->pCompositeClip); + if (sigtrap_get() == 0) { +- tor_render(NULL, &tor, (void*)&pi, +- dst->pCompositeClip, span, +- false); ++ tor_render(NULL, &tor, ++ (void*)&pi, (void *)&clipped, ++ span, false); + sigtrap_put(); + } + +@@ -2861,9 +2820,9 @@ trapezoid_span_inplace__x8r8g8b8(CARD8 op, + + struct inplace_thread { + xTrapezoid *traps; +- RegionPtr clip; + span_func_t span; + struct inplace inplace; ++ struct clipped_span clipped; + BoxRec extents; + int dx, dy; + int draw_x, draw_y; +@@ -2888,8 +2847,9 @@ static void inplace_thread(void *arg) + tor_add_trapezoid(&tor, &thread->traps[n], thread->dx, thread->dy); + } + +- tor_render(NULL, &tor, (void*)&thread->inplace, +- thread->clip, thread->span, thread->unbounded); ++ tor_render(NULL, &tor, ++ (void*)&thread->inplace, (void*)&thread->clipped, ++ thread->span, thread->unbounded); + + tor_fini(&tor); + } +@@ -2903,6 +2863,7 @@ precise_trapezoid_span_inplace(struct sna *sna, + bool fallback) + { + struct inplace inplace; ++ struct clipped_span clipped; + span_func_t span; + PixmapPtr pixmap; + struct sna_pixmap *priv; +@@ -3020,21 +2981,12 @@ precise_trapezoid_span_inplace(struct sna *sna, + dst->pCompositeClip->data != NULL)); + + if (op == PictOpSrc) { +- if (dst->pCompositeClip->data) +- span = tor_blt_src_clipped; +- else +- span = tor_blt_src; ++ span = tor_blt_src; + } else if (op == PictOpIn) { +- if (dst->pCompositeClip->data) +- span = tor_blt_in_clipped; +- else +- span = tor_blt_in; ++ span = tor_blt_in; + } else { + assert(op == PictOpAdd); +- if (dst->pCompositeClip->data) +- span = tor_blt_add_clipped; +- else +- span = tor_blt_add; ++ span = tor_blt_add; + } + + DBG(("%s: move-to-cpu(dst)\n", __FUNCTION__)); +@@ -3052,6 +3004,8 @@ precise_trapezoid_span_inplace(struct sna *sna, + inplace.stride = pixmap->devKind; + inplace.opacity = color >> 24; + ++ span = clipped_span(&clipped, span, dst->pCompositeClip); ++ + num_threads = 1; + if (!NO_GPU_THREADS && + (flags & COMPOSITE_SPANS_RECTILINEAR) == 0) +@@ -3074,8 +3028,9 @@ precise_trapezoid_span_inplace(struct sna *sna, + } + + if (sigtrap_get() == 0) { +- tor_render(NULL, &tor, (void*)&inplace, +- dst->pCompositeClip, span, unbounded); ++ tor_render(NULL, &tor, ++ (void*)&inplace, (void *)&clipped, ++ span, unbounded); + sigtrap_put(); + } + +@@ -3093,7 +3048,7 @@ precise_trapezoid_span_inplace(struct sna *sna, + threads[0].ntrap = ntrap; + threads[0].inplace = inplace; + threads[0].extents = region.extents; +- threads[0].clip = dst->pCompositeClip; ++ threads[0].clipped = clipped; + threads[0].span = span; + threads[0].unbounded = unbounded; + threads[0].dx = dx; +@@ -3316,8 +3271,7 @@ tristrip_thread(void *arg) + if (!tor_init(&tor, &thread->extents, 2*thread->count)) + return; + +- boxes.op = thread->op; +- boxes.num_boxes = 0; ++ span_thread_boxes_init(&boxes, thread->op, thread->clip); + + cw = 0; ccw = 1; + polygon_add_line(tor.polygon, +diff --git a/src/sna/sna_video.c b/src/sna/sna_video.c +index ed0e7b31..e2b11c31 100644 +--- a/src/sna/sna_video.c ++++ b/src/sna/sna_video.c +@@ -591,6 +591,72 @@ use_gtt: /* copy data, must use GTT so that we keep the overlay uncached */ + return true; + } + ++void sna_video_fill_colorkey(struct sna_video *video, ++ const RegionRec *clip) ++{ ++ struct sna *sna = video->sna; ++ PixmapPtr front = sna->front; ++ struct kgem_bo *bo = __sna_pixmap_get_bo(front); ++ uint8_t *dst, *tmp; ++ int w, width; ++ ++ if (video->AlwaysOnTop || RegionEqual(&video->clip, (RegionPtr)clip)) ++ return; ++ ++ assert(bo); ++ if (!wedged(sna) && ++ sna_blt_fill_boxes(sna, GXcopy, bo, ++ front->drawable.bitsPerPixel, ++ video->color_key, ++ region_rects(clip), ++ region_num_rects(clip))) { ++ RegionCopy(&video->clip, (RegionPtr)clip); ++ return; ++ } ++ ++ dst = kgem_bo_map__gtt(&sna->kgem, bo); ++ if (dst == NULL) ++ return; ++ ++ w = front->drawable.bitsPerPixel/8; ++ width = (clip->extents.x2 - clip->extents.x1) * w; ++ tmp = malloc(width); ++ if (tmp == NULL) ++ return; ++ ++ memcpy(tmp, &video->color_key, w); ++ while (2 * w < width) { ++ memcpy(tmp + w, tmp, w); ++ w *= 2; ++ } ++ if (w < width) ++ memcpy(tmp + w, tmp, width - w); ++ ++ if (sigtrap_get() == 0) { ++ const BoxRec *box = region_rects(clip); ++ int n = region_num_rects(clip); ++ ++ w = front->drawable.bitsPerPixel/8; ++ do { ++ int y = box->y1; ++ uint8_t *row = dst + y*bo->pitch + w*box->x1; ++ ++ width = (box->x2 - box->x1) * w; ++ while (y < box->y2) { ++ memcpy(row, tmp, width); ++ row += bo->pitch; ++ y++; ++ } ++ box++; ++ } while (--n); ++ sigtrap_put(); ++ ++ RegionCopy(&video->clip, (RegionPtr)clip); ++ } ++ ++ free(tmp); ++} ++ + XvAdaptorPtr sna_xv_adaptor_alloc(struct sna *sna) + { + XvAdaptorPtr new_adaptors; +diff --git a/src/sna/sna_video.h b/src/sna/sna_video.h +index f21605fc..39cb725f 100644 +--- a/src/sna/sna_video.h ++++ b/src/sna/sna_video.h +@@ -72,6 +72,8 @@ THE USE OR OTHER DEALINGS IN THE SOFTWARE. + struct sna_video { + struct sna *sna; + ++ int idx; /* XXX expose struct plane instead? */ ++ + int brightness; + int contrast; + int saturation; +@@ -193,6 +195,9 @@ bool + sna_video_copy_data(struct sna_video *video, + struct sna_video_frame *frame, + const uint8_t *buf); ++void ++sna_video_fill_colorkey(struct sna_video *video, ++ const RegionRec *clip); + + void sna_video_buffer_fini(struct sna_video *video); + +@@ -210,4 +215,26 @@ sna_window_set_port(WindowPtr window, XvPortPtr port) + ((void **)__get_private(window, sna_window_key))[2] = port; + } + ++static inline int offset_and_clip(int x, int dx) ++{ ++ x += dx; ++ if (x <= 0) ++ return 0; ++ if (x >= MAXSHORT) ++ return MAXSHORT; ++ return x; ++} ++ ++static inline void init_video_region(RegionRec *region, ++ DrawablePtr draw, ++ int drw_x, int drw_y, ++ int drw_w, int drw_h) ++{ ++ region->extents.x1 = offset_and_clip(draw->x, drw_x); ++ region->extents.y1 = offset_and_clip(draw->y, drw_y); ++ region->extents.x2 = offset_and_clip(draw->x, drw_x + drw_w); ++ region->extents.y2 = offset_and_clip(draw->y, drw_y + drw_h); ++ region->data = NULL; ++} ++ + #endif /* SNA_VIDEO_H */ +diff --git a/src/sna/sna_video_overlay.c b/src/sna/sna_video_overlay.c +index ac81f1a0..9bc5ce40 100644 +--- a/src/sna/sna_video_overlay.c ++++ b/src/sna/sna_video_overlay.c +@@ -130,7 +130,7 @@ static int sna_video_overlay_stop(ddStopVideo_ARGS) + + DBG(("%s()\n", __FUNCTION__)); + +- REGION_EMPTY(scrn->pScreen, &video->clip); ++ REGION_EMPTY(to_screen_from_sna(sna), &video->clip); + + request.flags = 0; + (void)drmIoctl(sna->kgem.fd, +@@ -474,15 +474,13 @@ sna_video_overlay_put_image(ddPutImage_ARGS) + if (src_h >= (drw_h * 8)) + drw_h = src_h / 7; + +- clip.extents.x1 = draw->x + drw_x; +- clip.extents.y1 = draw->y + drw_y; +- clip.extents.x2 = clip.extents.x1 + drw_w; +- clip.extents.y2 = clip.extents.y1 + drw_h; +- clip.data = NULL; ++ init_video_region(&clip, draw, drw_x, drw_y, drw_w, drw_h); + + DBG(("%s: always_on_top=%d\n", __FUNCTION__, video->AlwaysOnTop)); +- if (!video->AlwaysOnTop) ++ if (!video->AlwaysOnTop) { ++ ValidateGC(draw, gc); + RegionIntersect(&clip, &clip, gc->pCompositeClip); ++ } + if (box_empty(&clip.extents)) + goto invisible; + +@@ -551,15 +549,7 @@ sna_video_overlay_put_image(ddPutImage_ARGS) + ret = Success; + if (sna_video_overlay_show + (sna, video, &frame, crtc, &dstBox, src_w, src_h, drw_w, drw_h)) { +- //xf86XVFillKeyHelperDrawable(draw, video->color_key, &clip); +- if (!video->AlwaysOnTop && !RegionEqual(&video->clip, &clip) && +- sna_blt_fill_boxes(sna, GXcopy, +- __sna_pixmap_get_bo(sna->front), +- sna->front->drawable.bitsPerPixel, +- video->color_key, +- region_rects(&clip), +- region_num_rects(&clip))) +- RegionCopy(&video->clip, &clip); ++ sna_video_fill_colorkey(video, &clip); + sna_window_set_port((WindowPtr)draw, port); + } else { + DBG(("%s: failed to show video frame\n", __FUNCTION__)); +diff --git a/src/sna/sna_video_sprite.c b/src/sna/sna_video_sprite.c +index 92230f97..69bfdfd2 100644 +--- a/src/sna/sna_video_sprite.c ++++ b/src/sna/sna_video_sprite.c +@@ -47,6 +47,8 @@ + #define DRM_FORMAT_YUYV fourcc_code('Y', 'U', 'Y', 'V') /* [31:0] Cr0:Y1:Cb0:Y0 8:8:8:8 little endian */ + #define DRM_FORMAT_UYVY fourcc_code('U', 'Y', 'V', 'Y') /* [31:0] Y1:Cr0:Y0:Cb0 8:8:8:8 little endian */ + ++#define has_hw_scaling(sna) ((sna)->kgem.gen < 071) ++ + #define LOCAL_IOCTL_MODE_SETPLANE DRM_IOWR(0xB7, struct local_mode_set_plane) + struct local_mode_set_plane { + uint32_t plane_id; +@@ -81,19 +83,17 @@ static int sna_video_sprite_stop(ddStopVideo_ARGS) + xf86CrtcConfigPtr config = XF86_CRTC_CONFIG_PTR(video->sna->scrn); + int i; + +- for (i = 0; i < config->num_crtc; i++) { ++ for (i = 0; i < video->sna->mode.num_real_crtc; i++) { + xf86CrtcPtr crtc = config->crtc[i]; + int pipe; + +- if (sna_crtc_id(crtc) == 0) +- break; +- +- pipe = sna_crtc_to_pipe(crtc); ++ pipe = sna_crtc_pipe(crtc); ++ assert(pipe < ARRAY_SIZE(video->bo)); + if (video->bo[pipe] == NULL) + continue; + + memset(&s, 0, sizeof(s)); +- s.plane_id = sna_crtc_to_sprite(crtc); ++ s.plane_id = sna_crtc_to_sprite(crtc, video->idx); + if (drmIoctl(video->sna->kgem.fd, LOCAL_IOCTL_MODE_SETPLANE, &s)) + xf86DrvMsg(video->sna->scrn->scrnIndex, X_ERROR, + "failed to disable plane\n"); +@@ -153,7 +153,7 @@ static int sna_video_sprite_best_size(ddQueryBestSize_ARGS) + struct sna_video *video = port->devPriv.ptr; + struct sna *sna = video->sna; + +- if (sna->kgem.gen >= 075) { ++ if (!has_hw_scaling(sna) && !sna->render.video) { + *p_w = vid_w; + *p_h = vid_h; + } else { +@@ -221,12 +221,12 @@ sna_video_sprite_show(struct sna *sna, + BoxPtr dstBox) + { + struct local_mode_set_plane s; +- int pipe = sna_crtc_to_pipe(crtc); ++ int pipe = sna_crtc_pipe(crtc); + + /* XXX handle video spanning multiple CRTC */ + + VG_CLEAR(s); +- s.plane_id = sna_crtc_to_sprite(crtc); ++ s.plane_id = sna_crtc_to_sprite(crtc, video->idx); + + #define DRM_I915_SET_SPRITE_COLORKEY 0x2b + #define LOCAL_IOCTL_I915_SET_SPRITE_COLORKEY DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_SET_SPRITE_COLORKEY, struct local_intel_sprite_colorkey) +@@ -263,9 +263,6 @@ sna_video_sprite_show(struct sna *sna, + video->color_key_changed &= ~(1 << pipe); + } + +- if (video->bo[pipe] == frame->bo) +- return true; +- + update_dst_box_to_crtc_coords(sna, crtc, dstBox); + if (frame->rotation & (RR_Rotate_90 | RR_Rotate_270)) { + int tmp = frame->width; +@@ -283,15 +280,30 @@ sna_video_sprite_show(struct sna *sna, + uint32_t handles[4]; + uint32_t pitches[4]; /* pitch for each plane */ + uint32_t offsets[4]; /* offset of each plane */ ++ uint64_t modifiers[4]; + } f; + bool purged = true; + + memset(&f, 0, sizeof(f)); + f.width = frame->width; + f.height = frame->height; ++ f.flags = 1 << 1; /* +modifiers */ + f.handles[0] = frame->bo->handle; + f.pitches[0] = frame->pitch[0]; + ++ switch (frame->bo->tiling) { ++ case I915_TILING_NONE: ++ break; ++ case I915_TILING_X: ++ /* I915_FORMAT_MOD_X_TILED */ ++ f.modifiers[0] = (uint64_t)1 << 56 | 1; ++ break; ++ case I915_TILING_Y: ++ /* I915_FORMAT_MOD_X_TILED */ ++ f.modifiers[0] = (uint64_t)1 << 56 | 2; ++ break; ++ } ++ + switch (frame->id) { + case FOURCC_RGB565: + f.pixel_format = DRM_FORMAT_RGB565; +@@ -360,7 +372,7 @@ sna_video_sprite_show(struct sna *sna, + return false; + } + +- frame->bo->domain = DOMAIN_NONE; ++ __kgem_bo_clear_dirty(frame->bo); + + if (video->bo[pipe]) + kgem_bo_destroy(&sna->kgem, video->bo[pipe]); +@@ -374,17 +386,17 @@ static int sna_video_sprite_put_image(ddPutImage_ARGS) + struct sna *sna = video->sna; + xf86CrtcConfigPtr config = XF86_CRTC_CONFIG_PTR(sna->scrn); + RegionRec clip; ++ BoxRec draw_extents; + int ret, i; + +- clip.extents.x1 = draw->x + drw_x; +- clip.extents.y1 = draw->y + drw_y; +- clip.extents.x2 = clip.extents.x1 + drw_w; +- clip.extents.y2 = clip.extents.y1 + drw_h; +- clip.data = NULL; ++ init_video_region(&clip, draw, drw_x, drw_y, drw_w, drw_h); ++ draw_extents = clip.extents; + + DBG(("%s: always_on_top=%d\n", __FUNCTION__, video->AlwaysOnTop)); +- if (!video->AlwaysOnTop) ++ if (!video->AlwaysOnTop) { ++ ValidateGC(draw, gc); + RegionIntersect(&clip, &clip, gc->pCompositeClip); ++ } + + DBG(("%s: src=(%d, %d),(%d, %d), dst=(%d, %d),(%d, %d), id=%d, sizep=%dx%d, sync?=%d\n", + __FUNCTION__, +@@ -402,19 +414,17 @@ static int sna_video_sprite_put_image(ddPutImage_ARGS) + goto err; + } + +- for (i = 0; i < config->num_crtc; i++) { ++ for (i = 0; i < video->sna->mode.num_real_crtc; i++) { + xf86CrtcPtr crtc = config->crtc[i]; + struct sna_video_frame frame; ++ BoxRec dst = draw_extents; + int pipe; + INT32 x1, x2, y1, y2; +- BoxRec dst; + RegionRec reg; + Rotation rotation; ++ bool cache_bo; + +- if (sna_crtc_id(crtc) == 0) +- break; +- +- pipe = sna_crtc_to_pipe(crtc); ++ pipe = sna_crtc_pipe(crtc); + + sna_video_frame_init(video, format->id, width, height, &frame); + +@@ -423,10 +433,11 @@ static int sna_video_sprite_put_image(ddPutImage_ARGS) + RegionIntersect(®, ®, &clip); + if (RegionNil(®)) { + off: ++ assert(pipe < ARRAY_SIZE(video->bo)); + if (video->bo[pipe]) { + struct local_mode_set_plane s; + memset(&s, 0, sizeof(s)); +- s.plane_id = sna_crtc_to_sprite(crtc); ++ s.plane_id = sna_crtc_to_sprite(crtc, video->idx); + if (drmIoctl(video->sna->kgem.fd, LOCAL_IOCTL_MODE_SETPLANE, &s)) + xf86DrvMsg(video->sna->scrn->scrnIndex, X_ERROR, + "failed to disable plane\n"); +@@ -440,8 +451,6 @@ off: + y1 = src_y; + y2 = src_y + src_h; + +- dst = clip.extents; +- + ret = xf86XVClipVideoHelper(&dst, &x1, &x2, &y1, &y2, + ®, frame.width, frame.height); + RegionUninit(®); +@@ -465,8 +474,8 @@ off: + + /* if sprite can't handle rotation natively, store it for the copy func */ + rotation = RR_Rotate_0; +- if (!sna_crtc_set_sprite_rotation(crtc, crtc->rotation)) { +- sna_crtc_set_sprite_rotation(crtc, RR_Rotate_0); ++ if (!sna_crtc_set_sprite_rotation(crtc, video->idx, crtc->rotation)) { ++ sna_crtc_set_sprite_rotation(crtc, video->idx, RR_Rotate_0); + rotation = crtc->rotation; + } + sna_video_frame_set_rotation(video, &frame, rotation); +@@ -496,6 +505,8 @@ off: + frame.image.y1 = 0; + frame.image.x2 = frame.width; + frame.image.y2 = frame.height; ++ ++ cache_bo = false; + } else { + frame.bo = sna_video_buffer(video, &frame); + if (frame.bo == NULL) { +@@ -509,6 +520,60 @@ off: + ret = BadAlloc; + goto err; + } ++ ++ cache_bo = true; ++ } ++ ++ if (!has_hw_scaling(sna) && sna->render.video && ++ !((frame.src.x2 - frame.src.x1) == (dst.x2 - dst.x1) && ++ (frame.src.y2 - frame.src.y1) == (dst.y2 - dst.y1))) { ++ ScreenPtr screen = to_screen_from_sna(sna); ++ PixmapPtr scaled; ++ RegionRec r; ++ ++ r.extents.x1 = r.extents.y1 = 0; ++ r.extents.x2 = dst.x2 - dst.x1; ++ r.extents.y2 = dst.y2 - dst.y1; ++ r.data = NULL; ++ ++ DBG(("%s: scaling from (%d, %d) to (%d, %d)\n", ++ __FUNCTION__, ++ frame.src.x2 - frame.src.x1, ++ frame.src.y2 - frame.src.y1, ++ r.extents.x2, r.extents.y2)); ++ ++ scaled = screen->CreatePixmap(screen, ++ r.extents.x2, ++ r.extents.y2, ++ 24, ++ CREATE_PIXMAP_USAGE_SCRATCH); ++ if (scaled == NULL) { ++ ret = BadAlloc; ++ goto err; ++ } ++ ++ if (!sna->render.video(sna, video, &frame, &r, scaled)) { ++ screen->DestroyPixmap(scaled); ++ ret = BadAlloc; ++ goto err; ++ } ++ ++ if (cache_bo) ++ sna_video_buffer_fini(video); ++ else ++ kgem_bo_destroy(&sna->kgem, frame.bo); ++ ++ frame.bo = kgem_bo_reference(__sna_pixmap_get_bo(scaled)); ++ kgem_bo_submit(&sna->kgem, frame.bo); ++ ++ frame.id = FOURCC_RGB888; ++ frame.src = frame.image = r.extents; ++ frame.width = frame.image.x2; ++ frame.height = frame.image.y2; ++ frame.pitch[0] = frame.bo->pitch; ++ ++ screen->DestroyPixmap(scaled); ++ cache_bo = false; + } + + ret = Success; +@@ -517,24 +582,16 @@ off: + ret = BadAlloc; + } + +- frame.bo->domain = DOMAIN_NONE; +- if (xvmc_passthrough(format->id)) +- kgem_bo_destroy(&sna->kgem, frame.bo); +- else ++ if (cache_bo) + sna_video_buffer_fini(video); ++ else ++ kgem_bo_destroy(&sna->kgem, frame.bo); + + if (ret != Success) + goto err; + } + +- if (!video->AlwaysOnTop && !RegionEqual(&video->clip, &clip) && +- sna_blt_fill_boxes(sna, GXcopy, +- __sna_pixmap_get_bo(sna->front), +- sna->front->drawable.bitsPerPixel, +- video->color_key, +- region_rects(&clip), +- region_num_rects(&clip))) +- RegionCopy(&video->clip, &clip); ++ sna_video_fill_colorkey(video, &clip); + sna_window_set_port((WindowPtr)draw, port); + + return Success; +@@ -606,25 +663,28 @@ static int sna_video_sprite_color_key(struct sna *sna) + return color_key & ((1 << scrn->depth) - 1); + } + +-static bool sna_video_has_sprites(struct sna *sna) ++static int sna_video_has_sprites(struct sna *sna) + { + xf86CrtcConfigPtr config = XF86_CRTC_CONFIG_PTR(sna->scrn); ++ unsigned min; + int i; + + DBG(("%s: num_crtc=%d\n", __FUNCTION__, sna->mode.num_real_crtc)); + + if (sna->mode.num_real_crtc == 0) +- return false; ++ return 0; + ++ min = -1; + for (i = 0; i < sna->mode.num_real_crtc; i++) { +- if (!sna_crtc_to_sprite(config->crtc[i])) { +- DBG(("%s: no sprite found on pipe %d\n", __FUNCTION__, sna_crtc_to_pipe(config->crtc[i]))); +- return false; +- } ++ unsigned count = sna_crtc_count_sprites(config->crtc[i]); ++ DBG(("%s: %d sprites found on pipe %d\n", __FUNCTION__, ++ count, sna_crtc_pipe(config->crtc[i]))); ++ if (count < min) ++ min = count; + } + +- DBG(("%s: yes\n", __FUNCTION__)); +- return true; ++ DBG(("%s: min=%d\n", __FUNCTION__, min)); ++ return min; + } + + void sna_video_sprite_setup(struct sna *sna, ScreenPtr screen) +@@ -632,16 +692,18 @@ void sna_video_sprite_setup(struct sna *sna, ScreenPtr screen) + XvAdaptorPtr adaptor; + struct sna_video *video; + XvPortPtr port; ++ int count, i; + +- if (!sna_video_has_sprites(sna)) ++ count = sna_video_has_sprites(sna); ++ if (!count) + return; + + adaptor = sna_xv_adaptor_alloc(sna); + if (!adaptor) + return; + +- video = calloc(1, sizeof(*video)); +- port = calloc(1, sizeof(*port)); ++ video = calloc(count, sizeof(*video)); ++ port = calloc(count, sizeof(*port)); + if (video == NULL || port == NULL) { + free(video); + free(port); +@@ -686,36 +748,43 @@ void sna_video_sprite_setup(struct sna *sna, ScreenPtr screen) + adaptor->ddPutImage = sna_video_sprite_put_image; + adaptor->ddQueryImageAttributes = sna_video_sprite_query; + +- adaptor->nPorts = 1; ++ adaptor->nPorts = count; + adaptor->pPorts = port; + +- adaptor->base_id = port->id = FakeClientID(0); +- AddResource(port->id, XvGetRTPort(), port); +- port->pAdaptor = adaptor; +- port->pNotify = NULL; +- port->pDraw = NULL; +- port->client = NULL; +- port->grab.client = NULL; +- port->time = currentTime; +- port->devPriv.ptr = video; +- +- video->sna = sna; +- video->alignment = 64; +- video->color_key = sna_video_sprite_color_key(sna); +- video->color_key_changed = ~0; +- video->has_color_key = true; +- video->brightness = -19; /* (255/219) * -16 */ +- video->contrast = 75; /* 255/219 * 64 */ +- video->saturation = 146; /* 128/112 * 128 */ +- video->desired_crtc = NULL; +- video->gamma5 = 0xc0c0c0; +- video->gamma4 = 0x808080; +- video->gamma3 = 0x404040; +- video->gamma2 = 0x202020; +- video->gamma1 = 0x101010; +- video->gamma0 = 0x080808; +- RegionNull(&video->clip); +- video->SyncToVblank = 1; ++ for (i = 0; i < count; i++) { ++ port->id = FakeClientID(0); ++ AddResource(port->id, XvGetRTPort(), port); ++ port->pAdaptor = adaptor; ++ port->pNotify = NULL; ++ port->pDraw = NULL; ++ port->client = NULL; ++ port->grab.client = NULL; ++ port->time = currentTime; ++ port->devPriv.ptr = video; ++ ++ video->sna = sna; ++ video->idx = i; ++ video->alignment = 64; ++ video->color_key = sna_video_sprite_color_key(sna); ++ video->color_key_changed = ~0; ++ video->has_color_key = true; ++ video->brightness = -19; /* (255/219) * -16 */ ++ video->contrast = 75; /* 255/219 * 64 */ ++ video->saturation = 146; /* 128/112 * 128 */ ++ video->desired_crtc = NULL; ++ video->gamma5 = 0xc0c0c0; ++ video->gamma4 = 0x808080; ++ video->gamma3 = 0x404040; ++ video->gamma2 = 0x202020; ++ video->gamma1 = 0x101010; ++ video->gamma0 = 0x080808; ++ RegionNull(&video->clip); ++ video->SyncToVblank = 1; ++ ++ port++; ++ video++; ++ } ++ adaptor->base_id = adaptor->pPorts[0].id; + + xvColorKey = MAKE_ATOM("XV_COLORKEY"); + xvAlwaysOnTop = MAKE_ATOM("XV_ALWAYS_ON_TOP"); +diff --git a/src/sna/sna_video_textured.c b/src/sna/sna_video_textured.c +index 95011939..3cce5cf1 100644 +--- a/src/sna/sna_video_textured.c ++++ b/src/sna/sna_video_textured.c +@@ -48,7 +48,12 @@ static const XvAttributeRec Attributes[] = { + //{XvSettable | XvGettable, 0, 255, (char *)"XV_CONTRAST"}, + }; + +-static const XvImageRec Images[] = { ++static const XvImageRec gen2_Images[] = { ++ XVIMAGE_YUY2, ++ XVIMAGE_UYVY, ++}; ++ ++static const XvImageRec gen3_Images[] = { + XVIMAGE_YUY2, + XVIMAGE_YV12, + XVIMAGE_I420, +@@ -149,15 +154,16 @@ sna_video_textured_put_image(ddPutImage_ARGS) + BoxRec dstBox; + RegionRec clip; + xf86CrtcPtr crtc; ++ int16_t dx, dy; + bool flush = false; + bool ret; + +- clip.extents.x1 = draw->x + drw_x; +- clip.extents.y1 = draw->y + drw_y; +- clip.extents.x2 = clip.extents.x1 + drw_w; +- clip.extents.y2 = clip.extents.y1 + drw_h; +- clip.data = NULL; ++ if (wedged(sna)) ++ return BadAlloc; + ++ init_video_region(&clip, draw, drw_x, drw_y, drw_w, drw_h); ++ ++ ValidateGC(draw, gc); + RegionIntersect(&clip, &clip, gc->pCompositeClip); + if (!RegionNotEmpty(&clip)) + return Success; +@@ -181,6 +187,9 @@ sna_video_textured_put_image(ddPutImage_ARGS) + &clip)) + return Success; + ++ if (get_drawable_deltas(draw, pixmap, &dx, &dy)) ++ RegionTranslate(&clip, dx, dy); ++ + flags = MOVE_WRITE | __MOVE_FORCE; + if (clip.data) + flags |= MOVE_READ; +@@ -234,7 +243,7 @@ sna_video_textured_put_image(ddPutImage_ARGS) + DBG(("%s: failed to render video\n", __FUNCTION__)); + ret = BadAlloc; + } else +- DamageDamageRegion(draw, &clip); ++ DamageDamageRegion(&pixmap->drawable, &clip); + + kgem_bo_destroy(&sna->kgem, frame.bo); + +@@ -316,7 +325,7 @@ void sna_video_textured_setup(struct sna *sna, ScreenPtr screen) + + if (!sna->render.video) { + xf86DrvMsg(sna->scrn->scrnIndex, X_INFO, +- "Textured video not supported on this hardware\n"); ++ "Textured video not supported on this hardware or backend\n"); + return; + } + +@@ -362,8 +371,13 @@ void sna_video_textured_setup(struct sna *sna, ScreenPtr screen) + ARRAY_SIZE(Formats)); + adaptor->nAttributes = ARRAY_SIZE(Attributes); + adaptor->pAttributes = (XvAttributeRec *)Attributes; +- adaptor->nImages = ARRAY_SIZE(Images); +- adaptor->pImages = (XvImageRec *)Images; ++ if (sna->kgem.gen < 030) { ++ adaptor->nImages = ARRAY_SIZE(gen2_Images); ++ adaptor->pImages = (XvImageRec *)gen2_Images; ++ } else { ++ adaptor->nImages = ARRAY_SIZE(gen3_Images); ++ adaptor->pImages = (XvImageRec *)gen3_Images; ++ } + #if XORG_XV_VERSION < 2 + adaptor->ddAllocatePort = sna_xv_alloc_port; + adaptor->ddFreePort = sna_xv_free_port; +diff --git a/src/sna/xassert.h b/src/sna/xassert.h +index 1bcfd080..e648e4bc 100644 +--- a/src/sna/xassert.h ++++ b/src/sna/xassert.h +@@ -43,6 +43,28 @@ + xorg_backtrace(); \ + FatalError("%s:%d assertion '%s' failed\n", __func__, __LINE__, #E); \ + } while (0) ++ ++#define warn_unless(E) \ ++({ \ ++ bool fail = !(E); \ ++ if (unlikely(fail)) { \ ++ static int __warn_once__; \ ++ if (!__warn_once__) { \ ++ xorg_backtrace(); \ ++ ErrorF("%s:%d assertion '%s' failed\n", __func__, __LINE__, #E); \ ++ __warn_once__ = 1; \ ++ } \ ++ } \ ++ unlikely(fail); \ ++}) ++ ++#define dbg(EXPR) EXPR ++ ++#else ++ ++#define warn_unless(E) ({ bool fail = !(E); unlikely(fail); }) ++#define dbg(EXPR) ++ + #endif + + #endif /* __XASSERT_H__ */ +diff --git a/src/uxa/i830_reg.h b/src/uxa/i830_reg.h +index d8306bcd..ba39d82c 100644 +--- a/src/uxa/i830_reg.h ++++ b/src/uxa/i830_reg.h +@@ -65,6 +65,12 @@ + #define MI_LOAD_SCAN_LINES_DISPLAY_PIPEA (0) + #define MI_LOAD_SCAN_LINES_DISPLAY_PIPEB (0x1<<20) + ++#define MI_LOAD_REGISTER_IMM (0x22<<23 | (3-2)) ++ ++#define BCS_SWCTRL 0x22200 ++# define BCS_SWCTRL_SRC_Y (1 << 0) ++# define BCS_SWCTRL_DST_Y (1 << 1) ++ + /* BLT commands */ + #define COLOR_BLT_CMD ((2<<29)|(0x40<<22)|(0x3)) + #define COLOR_BLT_WRITE_ALPHA (1<<21) +diff --git a/src/uxa/i965_video.c b/src/uxa/i965_video.c +index 68e6fd38..438ab909 100644 +--- a/src/uxa/i965_video.c ++++ b/src/uxa/i965_video.c +@@ -37,7 +37,6 @@ + #include "fourcc.h" + + #include "intel.h" +-#include "intel_xvmc.h" + #include "intel_uxa.h" + #include "i830_reg.h" + #include "i965_reg.h" +diff --git a/src/uxa/intel.h b/src/uxa/intel.h +index 1b7e5339..a5e77af4 100644 +--- a/src/uxa/intel.h ++++ b/src/uxa/intel.h +@@ -121,7 +121,6 @@ typedef struct intel_screen_private { + + void *modes; + drm_intel_bo *front_buffer, *back_buffer; +- unsigned int back_name; + long front_pitch, front_tiling; + + dri_bufmgr *bufmgr; +@@ -169,6 +168,7 @@ typedef struct intel_screen_private { + const struct intel_device_info *info; + + unsigned int BR[20]; ++ unsigned int BR_tiling[2]; + + CloseScreenProcPtr CloseScreen; + +@@ -196,7 +196,9 @@ typedef struct intel_screen_private { + + int colorKey; + XF86VideoAdaptorPtr adaptor; ++#if !HAVE_NOTIFY_FD + ScreenBlockHandlerProcPtr BlockHandler; ++#endif + Bool overlayOn; + + struct { +@@ -285,8 +287,6 @@ typedef struct intel_screen_private { + Bool has_kernel_flush; + Bool needs_flush; + +- struct _DRI2FrameEvent *pending_flip[MAX_PIPES]; +- + /* Broken-out options. */ + OptionInfoPtr Options; + +@@ -368,6 +368,7 @@ typedef void (*intel_drm_abort_proc)(ScrnInfoPtr scrn, + + extern uint32_t intel_drm_queue_alloc(ScrnInfoPtr scrn, xf86CrtcPtr crtc, void *data, intel_drm_handler_proc handler, intel_drm_abort_proc abort); + extern void intel_drm_abort(ScrnInfoPtr scrn, Bool (*match)(void *data, void *match_data), void *match_data); ++extern void intel_drm_abort_seq(ScrnInfoPtr scrn, uint32_t seq); + + extern int intel_get_pipe_from_crtc_id(drm_intel_bufmgr *bufmgr, xf86CrtcPtr crtc); + extern int intel_crtc_id(xf86CrtcPtr crtc); +@@ -408,7 +409,6 @@ typedef struct _DRI2FrameEvent { + ClientPtr client; + enum DRI2FrameEventType type; + int frame; +- int pipe; + + struct list drawable_resource, client_resource; + +@@ -418,7 +418,12 @@ typedef struct _DRI2FrameEvent { + DRI2BufferPtr front; + DRI2BufferPtr back; + +- struct _DRI2FrameEvent *chain; ++ /* current scanout for triple buffer */ ++ int old_width; ++ int old_height; ++ int old_pitch; ++ int old_tiling; ++ dri_bo *old_buffer; + } DRI2FrameEventRec, *DRI2FrameEventPtr; + + extern Bool intel_do_pageflip(intel_screen_private *intel, +@@ -456,10 +461,6 @@ extern xf86CrtcPtr intel_covering_crtc(ScrnInfoPtr scrn, BoxPtr box, + + Bool I830DRI2ScreenInit(ScreenPtr pScreen); + void I830DRI2CloseScreen(ScreenPtr pScreen); +-void I830DRI2FrameEventHandler(unsigned int frame, unsigned int tv_sec, +- unsigned int tv_usec, DRI2FrameEventPtr flip_info); +-void I830DRI2FlipEventHandler(unsigned int frame, unsigned int tv_sec, +- unsigned int tv_usec, DRI2FrameEventPtr flip_info); + + /* intel_dri3.c */ + Bool intel_dri3_screen_init(ScreenPtr screen); +diff --git a/src/uxa/intel_batchbuffer.c b/src/uxa/intel_batchbuffer.c +index a29e4434..114c6026 100644 +--- a/src/uxa/intel_batchbuffer.c ++++ b/src/uxa/intel_batchbuffer.c +@@ -245,6 +245,17 @@ void intel_batch_submit(ScrnInfoPtr scrn) + if (intel->batch_used == 0) + return; + ++ if (intel->current_batch == I915_EXEC_BLT && ++ INTEL_INFO(intel)->gen >= 060) { ++ OUT_BATCH(MI_FLUSH_DW); ++ OUT_BATCH(0); ++ OUT_BATCH(0); ++ OUT_BATCH(0); ++ OUT_BATCH(MI_LOAD_REGISTER_IMM); ++ OUT_BATCH(BCS_SWCTRL); ++ OUT_BATCH((BCS_SWCTRL_DST_Y | BCS_SWCTRL_SRC_Y) << 16); ++ } ++ + /* Mark the end of the batchbuffer. */ + OUT_BATCH(MI_BATCH_BUFFER_END); + /* Emit a padding dword if we aren't going to be quad-word aligned. */ +diff --git a/src/uxa/intel_batchbuffer.h b/src/uxa/intel_batchbuffer.h +index e5fb8d08..e71ffd19 100644 +--- a/src/uxa/intel_batchbuffer.h ++++ b/src/uxa/intel_batchbuffer.h +@@ -30,7 +30,7 @@ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + #ifndef _INTEL_BATCHBUFFER_H + #define _INTEL_BATCHBUFFER_H + +-#define BATCH_RESERVED 16 ++#define BATCH_RESERVED 64 + + + void intel_batch_init(ScrnInfoPtr scrn); +@@ -202,6 +202,23 @@ do { \ + + #define BEGIN_BATCH(n) __BEGIN_BATCH(n,RENDER_BATCH) + #define BEGIN_BATCH_BLT(n) __BEGIN_BATCH(n,BLT_BATCH) ++#define BEGIN_BATCH_BLT_TILED(n) \ ++do { \ ++ if (INTEL_INFO(intel)->gen < 060) { \ ++ __BEGIN_BATCH(n, BLT_BATCH); \ ++ } else { \ ++ __BEGIN_BATCH(n+7, BLT_BATCH); \ ++ OUT_BATCH(MI_FLUSH_DW); \ ++ OUT_BATCH(0); \ ++ OUT_BATCH(0); \ ++ OUT_BATCH(0); \ ++ OUT_BATCH(MI_LOAD_REGISTER_IMM); \ ++ OUT_BATCH(BCS_SWCTRL); \ ++ OUT_BATCH((BCS_SWCTRL_DST_Y | BCS_SWCTRL_SRC_Y) << 16 | \ ++ ((intel->BR_tiling[0] == I915_TILING_Y) ? BCS_SWCTRL_DST_Y : 0) | \ ++ ((intel->BR_tiling[1] == I915_TILING_Y) ? BCS_SWCTRL_SRC_Y : 0)); \ ++ } \ ++} while (0) + + #define ADVANCE_BATCH() do { \ + if (intel->batch_emitting == 0) \ +diff --git a/src/uxa/intel_display.c b/src/uxa/intel_display.c +index 7b4d4e0c..809cda1d 100644 +--- a/src/uxa/intel_display.c ++++ b/src/uxa/intel_display.c +@@ -89,11 +89,11 @@ struct intel_mode { + struct list outputs; + struct list crtcs; + +- void *pageflip_data; +- intel_pageflip_handler_proc pageflip_handler; +- intel_pageflip_abort_proc pageflip_abort; +- +- Bool delete_dp_12_displays; ++ struct { ++ intel_pageflip_handler_proc handler; ++ intel_pageflip_abort_proc abort; ++ void *data; ++ } pageflip; + }; + + struct intel_pageflip { +@@ -114,7 +114,6 @@ struct intel_crtc { + struct list link; + PixmapPtr scanout_pixmap; + uint32_t scanout_fb_id; +- int32_t vblank_offset; + uint32_t msc_prev; + uint64_t msc_high; + }; +@@ -193,7 +192,7 @@ intel_output_backlight_init(xf86OutputPtr output) + + str = xf86GetOptValString(intel->Options, OPTION_BACKLIGHT); + if (str != NULL) { +- if (backlight_exists(str) != BL_NONE) { ++ if (backlight_exists(str)) { + intel_output->backlight_active_level = + backlight_open(&intel_output->backlight, + strdup(str)); +@@ -689,9 +688,11 @@ intel_set_scanout_pixmap(xf86CrtcPtr crtc, PixmapPtr ppix) + } + + bo = intel_get_pixmap_bo(ppix); +- if (intel->front_buffer) { +- ErrorF("have front buffer\n"); +- } ++ if (!bo) ++ return FALSE; ++ ++ if (intel->front_buffer) ++ return FALSE; + + drm_intel_bo_disable_reuse(bo); + +@@ -867,6 +868,48 @@ intel_output_attach_edid(xf86OutputPtr output) + xf86OutputSetEDID(output, mon); + } + ++static void ++intel_output_attach_tile(xf86OutputPtr output) ++{ ++#if XF86_OUTPUT_VERSION >= 3 ++ struct intel_output *intel_output = output->driver_private; ++ drmModeConnectorPtr koutput = intel_output->mode_output; ++ struct intel_mode *mode = intel_output->mode; ++ drmModePropertyBlobPtr blob = NULL; ++ struct xf86CrtcTileInfo tile_info, *set = NULL; ++ int i; ++ ++ for (i = 0; koutput && i < koutput->count_props; i++) { ++ drmModePropertyPtr props; ++ ++ props = drmModeGetProperty(mode->fd, koutput->props[i]); ++ if (!props) ++ continue; ++ ++ if (!(props->flags & DRM_MODE_PROP_BLOB)) { ++ drmModeFreeProperty(props); ++ continue; ++ } ++ ++ if (!strcmp(props->name, "TILE")) { ++ blob = drmModeGetPropertyBlob(mode->fd, ++ koutput->prop_values[i]); ++ } ++ drmModeFreeProperty(props); ++ } ++ ++ if (blob) { ++ if (xf86OutputParseKMSTile(blob->data, ++ blob->length, ++ &tile_info)) ++ set = &tile_info; ++ drmModeFreePropertyBlob(blob); ++ } ++ ++ xf86OutputSetTile(output, set); ++#endif ++} ++ + static DisplayModePtr + intel_output_panel_edid(xf86OutputPtr output, DisplayModePtr modes) + { +@@ -922,6 +965,7 @@ intel_output_get_modes(xf86OutputPtr output) + int i; + + intel_output_attach_edid(output); ++ intel_output_attach_tile(output); + + if (!koutput) + return Modes; +@@ -1492,6 +1536,7 @@ intel_output_init(ScrnInfoPtr scrn, struct intel_mode *mode, drmModeResPtr mode_ + intel_output = output->driver_private; + intel_output->output_id = mode_res->connectors[num]; + intel_output->mode_output = koutput; ++ RROutputChanged(output->randr_output, TRUE); + return; + } + } +@@ -1650,9 +1695,6 @@ intel_pageflip_abort(ScrnInfoPtr scrn, xf86CrtcPtr crtc, void *data); + static void + intel_pageflip_complete(struct intel_mode *mode); + +-static void +-intel_drm_abort_seq (ScrnInfoPtr scrn, uint32_t seq); +- + Bool + intel_do_pageflip(intel_screen_private *intel, + dri_bo *new_front, +@@ -1671,23 +1713,30 @@ intel_do_pageflip(intel_screen_private *intel, + uint32_t new_fb_id; + uint32_t flags; + uint32_t seq; ++ int err = 0; + int i; + + /* ++ * We only have a single length queue in the kernel, so any ++ * attempts to schedule a second flip before processing the first ++ * is a bug. Punt it back to the caller. ++ */ ++ if (mode->flip_count) ++ return FALSE; ++ ++ /* + * Create a new handle for the back buffer + */ + if (drmModeAddFB(mode->fd, scrn->virtualX, scrn->virtualY, + scrn->depth, scrn->bitsPerPixel, pitch, +- new_front->handle, &new_fb_id)) ++ new_front->handle, &new_fb_id)) { ++ err = errno; + goto error_out; ++ } + + drm_intel_bo_disable_reuse(new_front); + intel_flush(intel); + +- mode->pageflip_data = pageflip_data; +- mode->pageflip_handler = pageflip_handler; +- mode->pageflip_abort = pageflip_abort; +- + /* + * Queue flips on all enabled CRTCs + * Note that if/when we get per-CRTC buffers, we'll have to update this. +@@ -1699,6 +1748,7 @@ intel_do_pageflip(intel_screen_private *intel, + */ + mode->fe_msc = 0; + mode->fe_usec = 0; ++ memset(&mode->pageflip, 0, sizeof(mode->pageflip)); + + flags = DRM_MODE_PAGE_FLIP_EVENT; + if (async) +@@ -1711,8 +1761,7 @@ intel_do_pageflip(intel_screen_private *intel, + + flip = calloc(1, sizeof(struct intel_pageflip)); + if (flip == NULL) { +- xf86DrvMsg(scrn->scrnIndex, X_WARNING, +- "flip queue: carrier alloc failed.\n"); ++ err = errno; + goto error_undo; + } + +@@ -1724,33 +1773,30 @@ intel_do_pageflip(intel_screen_private *intel, + + seq = intel_drm_queue_alloc(scrn, config->crtc[i], flip, intel_pageflip_handler, intel_pageflip_abort); + if (!seq) { ++ err = errno; + free(flip); + goto error_undo; + } + +-again: ++ mode->flip_count++; ++ + if (drmModePageFlip(mode->fd, + crtc_id(crtc), + new_fb_id, + flags, (void *)(uintptr_t)seq)) { +- if (intel_mode_read_drm_events(intel)) { +- xf86DrvMsg(scrn->scrnIndex, X_WARNING, +- "flip queue retry\n"); +- goto again; +- } +- xf86DrvMsg(scrn->scrnIndex, X_WARNING, +- "flip queue failed: %s\n", strerror(errno)); +- if (seq) +- intel_drm_abort_seq(scrn, seq); +- free(flip); ++ err = errno; ++ intel_drm_abort_seq(scrn, seq); + goto error_undo; + } +- mode->flip_count++; + } + + mode->old_fb_id = mode->fb_id; + mode->fb_id = new_fb_id; + ++ mode->pageflip.data = pageflip_data; ++ mode->pageflip.handler = pageflip_handler; ++ mode->pageflip.abort = pageflip_abort; ++ + if (!mode->flip_count) + intel_pageflip_complete(mode); + +@@ -1765,7 +1811,7 @@ error_undo: + + error_out: + xf86DrvMsg(scrn->scrnIndex, X_WARNING, "Page flip failed: %s\n", +- strerror(errno)); ++ strerror(err)); + + mode->flip_count = 0; + return FALSE; +@@ -1839,7 +1885,7 @@ intel_drm_abort(ScrnInfoPtr scrn, Bool (*match)(void *data, void *match_data), v + /* + * Abort by drm queue sequence number + */ +-static void ++void + intel_drm_abort_seq(ScrnInfoPtr scrn, uint32_t seq) + { + struct intel_drm_queue *q; +@@ -1911,7 +1957,6 @@ intel_sequence_to_crtc_msc(xf86CrtcPtr crtc, uint32_t sequence) + { + struct intel_crtc *intel_crtc = crtc->driver_private; + +- sequence += intel_crtc->vblank_offset; + if ((int32_t) (sequence - intel_crtc->msc_prev) < -0x40000000) + intel_crtc->msc_high += 0x100000000L; + intel_crtc->msc_prev = sequence; +@@ -1935,37 +1980,10 @@ intel_get_crtc_msc_ust(ScrnInfoPtr scrn, xf86CrtcPtr crtc, uint64_t *msc, uint64 + return 0; + } + +-/* +- * Convert a 64-bit adjusted MSC value into a 32-bit kernel sequence number, +- * removing the high 32 bits and subtracting out the vblank_offset term. +- * +- * This also updates the vblank_offset when it notices that the value should +- * change. +- */ +- +-#define MAX_VBLANK_OFFSET 1000 +- + uint32_t + intel_crtc_msc_to_sequence(ScrnInfoPtr scrn, xf86CrtcPtr crtc, uint64_t expect) + { +- struct intel_crtc *intel_crtc = crtc->driver_private; +- uint64_t msc, ust; +- +- if (intel_get_crtc_msc_ust(scrn, crtc, &msc, &ust) == 0) { +- int64_t diff = expect - msc; +- +- /* We're way off here, assume that the kernel has lost its mind +- * and smack the vblank back to something sensible +- */ +- if (diff < -MAX_VBLANK_OFFSET || diff > MAX_VBLANK_OFFSET) { +- intel_crtc->vblank_offset += (int32_t) diff; +- if (intel_crtc->vblank_offset > -MAX_VBLANK_OFFSET && +- intel_crtc->vblank_offset < MAX_VBLANK_OFFSET) +- intel_crtc->vblank_offset = 0; +- } +- } +- +- return (uint32_t) (expect - intel_crtc->vblank_offset); ++ return (uint32_t)expect; + } + + /* +@@ -1998,14 +2016,13 @@ intel_drm_handler(int fd, uint32_t frame, uint32_t sec, uint32_t usec, void *use + static void + intel_pageflip_complete(struct intel_mode *mode) + { +- /* Release framebuffer */ +- drmModeRmFB(mode->fd, mode->old_fb_id); +- +- if (!mode->pageflip_handler) ++ if (!mode->pageflip.handler) + return; + +- mode->pageflip_handler(mode->fe_msc, mode->fe_usec, +- mode->pageflip_data); ++ /* Release framebuffer */ ++ drmModeRmFB(mode->fd, mode->old_fb_id); ++ mode->pageflip.handler(mode->fe_msc, mode->fe_usec, ++ mode->pageflip.data); + } + + /* +@@ -2045,6 +2062,7 @@ intel_pageflip_handler(ScrnInfoPtr scrn, xf86CrtcPtr crtc, + + if (!mode) + return; ++ + intel_pageflip_complete(mode); + } + +@@ -2060,18 +2078,18 @@ intel_pageflip_abort(ScrnInfoPtr scrn, xf86CrtcPtr crtc, void *data) + if (!mode) + return; + +- /* Release framebuffer */ +- drmModeRmFB(mode->fd, mode->old_fb_id); +- +- if (!mode->pageflip_abort) ++ if (!mode->pageflip.abort) + return; + +- mode->pageflip_abort(mode->pageflip_data); ++ /* Release framebuffer */ ++ drmModeRmFB(mode->fd, mode->old_fb_id); ++ mode->pageflip.abort(mode->pageflip.data); + } + + /* + * Check for pending DRM events and process them. + */ ++#if !HAVE_NOTIFY_FD + static void + drm_wakeup_handler(pointer data, int err, pointer p) + { +@@ -2086,6 +2104,14 @@ drm_wakeup_handler(pointer data, int err, pointer p) + if (FD_ISSET(mode->fd, read_mask)) + drmHandleEvent(mode->fd, &mode->event_context); + } ++#else ++static void ++drm_notify_fd(int fd, int ready, void *data) ++{ ++ struct intel_mode *mode = data; ++ drmHandleEvent(mode->fd, &mode->event_context); ++} ++#endif + + /* + * If there are any available, read drm_events +@@ -2231,10 +2257,6 @@ Bool intel_mode_pre_init(ScrnInfoPtr scrn, int fd, int cpp) + intel->use_pageflipping = TRUE; + } + +- if (xf86ReturnOptValBool(intel->Options, OPTION_DELETE_DP12, FALSE)) { +- mode->delete_dp_12_displays = TRUE; +- } +- + intel->modes = mode; + drmModeFreeResources(mode_res); + return TRUE; +@@ -2250,9 +2272,11 @@ intel_mode_init(struct intel_screen_private *intel) + * registration within ScreenInit and not PreInit. + */ + mode->flip_count = 0; +- AddGeneralSocket(mode->fd); ++ SetNotifyFd(mode->fd, drm_notify_fd, X_NOTIFY_READ, mode); ++#if !HAVE_NOTIFY_FD + RegisterBlockAndWakeupHandlers((BlockHandlerProcPtr)NoopDDA, + drm_wakeup_handler, mode); ++#endif + } + + void +@@ -2276,9 +2300,11 @@ intel_mode_close(intel_screen_private *intel) + + intel_drm_abort_scrn(intel->scrn); + ++#if !HAVE_NOTIFY_FD + RemoveBlockAndWakeupHandlers((BlockHandlerProcPtr)NoopDDA, + drm_wakeup_handler, mode); +- RemoveGeneralSocket(mode->fd); ++#endif ++ RemoveNotifyFd(mode->fd); + } + + void +@@ -2498,12 +2524,11 @@ intel_mode_hotplug(struct intel_screen_private *intel) + int i, j; + Bool found; + Bool changed = FALSE; +- struct intel_mode *mode = intel->modes; ++ + mode_res = drmModeGetResources(intel->drmSubFD); + if (!mode_res) + goto out; + +-restart_destroy: + for (i = 0; i < config->num_output; i++) { + xf86OutputPtr output = config->output[i]; + struct intel_output *intel_output; +@@ -2522,13 +2547,9 @@ restart_destroy: + drmModeFreeConnector(intel_output->mode_output); + intel_output->mode_output = NULL; + intel_output->output_id = -1; ++ RROutputChanged(output->randr_output, TRUE); + + changed = TRUE; +- if (mode->delete_dp_12_displays) { +- RROutputDestroy(output->randr_output); +- xf86OutputDestroy(output); +- goto restart_destroy; +- } + } + + /* find new output ids we don't have outputs for */ +@@ -2552,10 +2573,8 @@ restart_destroy: + intel_output_init(scrn, intel->modes, mode_res, i, 1); + } + +- if (changed) { +- RRSetChanged(xf86ScrnToScreen(scrn)); ++ if (changed) + RRTellChanged(xf86ScrnToScreen(scrn)); +- } + + drmModeFreeResources(mode_res); + out: +diff --git a/src/uxa/intel_dri.c b/src/uxa/intel_dri.c +index f61c6210..524826d2 100644 +--- a/src/uxa/intel_dri.c ++++ b/src/uxa/intel_dri.c +@@ -81,6 +81,47 @@ static DevPrivateKeyRec i830_client_key; + static int i830_client_key; + #endif + ++static void I830DRI2FlipEventHandler(unsigned int frame, ++ unsigned int tv_sec, ++ unsigned int tv_usec, ++ DRI2FrameEventPtr flip_info); ++ ++static void I830DRI2FrameEventHandler(unsigned int frame, ++ unsigned int tv_sec, ++ unsigned int tv_usec, ++ DRI2FrameEventPtr swap_info); ++ ++static void ++i830_dri2_del_frame_event(DRI2FrameEventPtr info); ++ ++static uint32_t pipe_select(int pipe) ++{ ++ if (pipe > 1) ++ return pipe << DRM_VBLANK_HIGH_CRTC_SHIFT; ++ else if (pipe > 0) ++ return DRM_VBLANK_SECONDARY; ++ else ++ return 0; ++} ++ ++static void ++intel_dri2_vblank_handler(ScrnInfoPtr scrn, ++ xf86CrtcPtr crtc, ++ uint64_t msc, ++ uint64_t usec, ++ void *data) ++{ ++ I830DRI2FrameEventHandler((uint32_t) msc, usec / 1000000, usec % 1000000, data); ++} ++ ++static void ++intel_dri2_vblank_abort(ScrnInfoPtr scrn, ++ xf86CrtcPtr crtc, ++ void *data) ++{ ++ i830_dri2_del_frame_event(data); ++} ++ + static uint32_t pixmap_flink(PixmapPtr pixmap) + { + struct intel_uxa_pixmap *priv = intel_uxa_get_pixmap_private(pixmap); +@@ -135,9 +176,6 @@ I830DRI2CreateBuffers(DrawablePtr drawable, unsigned int *attachments, + pixmap = NULL; + if (attachments[i] == DRI2BufferFrontLeft) { + pixmap = get_front_buffer(drawable); +- +- if (pixmap == NULL) +- drawable = &(get_drawable_pixmap(drawable)->drawable); + } else if (attachments[i] == DRI2BufferStencil && pDepthPixmap) { + pixmap = pDepthPixmap; + pixmap->refcnt++; +@@ -246,11 +284,8 @@ I830DRI2CreateBuffer(DrawablePtr drawable, unsigned int attachment, + } + + pixmap = NULL; +- if (attachment == DRI2BufferFrontLeft) { ++ if (attachment == DRI2BufferFrontLeft) + pixmap = get_front_buffer(drawable); +- if (pixmap == NULL) +- drawable = &(get_drawable_pixmap(drawable)->drawable); +- } + + if (pixmap == NULL) { + unsigned int hint = INTEL_CREATE_PIXMAP_DRI2; +@@ -673,6 +708,20 @@ i830_dri2_del_frame_event(DRI2FrameEventPtr info) + if (info->back) + I830DRI2DestroyBuffer(NULL, info->back); + ++ if (info->old_buffer) { ++ /* Check that the old buffer still matches the front buffer ++ * in case a mode change occurred before we woke up. ++ */ ++ if (info->intel->back_buffer == NULL && ++ info->old_width == info->intel->scrn->virtualX && ++ info->old_height == info->intel->scrn->virtualY && ++ info->old_pitch == info->intel->front_pitch && ++ info->old_tiling == info->intel->front_tiling) ++ info->intel->back_buffer = info->old_buffer; ++ else ++ dri_bo_unreference(info->old_buffer); ++ } ++ + free(info); + } + +@@ -708,16 +757,14 @@ static void + I830DRI2ExchangeBuffers(struct intel_screen_private *intel, DRI2BufferPtr front, DRI2BufferPtr back) + { + I830DRI2BufferPrivatePtr front_priv, back_priv; +- int tmp; + struct intel_uxa_pixmap *new_front; + + front_priv = front->driverPrivate; + back_priv = back->driverPrivate; + + /* Swap BO names so DRI works */ +- tmp = front->name; + front->name = back->name; +- back->name = tmp; ++ back->name = pixmap_flink(front_priv->pixmap); + + /* Swap pixmap bos */ + new_front = intel_exchange_pixmap_buffers(intel, +@@ -753,87 +800,30 @@ I830DRI2FlipAbort(void *pageflip_data) + i830_dri2_del_frame_event(info); + } + +-/* +- * Our internal swap routine takes care of actually exchanging, blitting, or +- * flipping buffers as necessary. +- */ + static Bool +-I830DRI2ScheduleFlip(struct intel_screen_private *intel, +- DrawablePtr draw, +- DRI2FrameEventPtr info) ++allocate_back_buffer(struct intel_screen_private *intel) + { +- I830DRI2BufferPrivatePtr priv = info->back->driverPrivate; +- drm_intel_bo *new_back, *old_back; +- int tmp_name; +- +- if (!intel->use_triple_buffer) { +- info->type = DRI2_SWAP; +- if (!intel_do_pageflip(intel, +- get_pixmap_bo(priv), +- info->pipe, FALSE, info, +- I830DRI2FlipComplete, +- I830DRI2FlipAbort)) +- return FALSE; +- +- I830DRI2ExchangeBuffers(intel, info->front, info->back); +- return TRUE; +- } ++ drm_intel_bo *bo; ++ int pitch; ++ uint32_t tiling; + +- if (intel->pending_flip[info->pipe]) { +- assert(intel->pending_flip[info->pipe]->chain == NULL); +- intel->pending_flip[info->pipe]->chain = info; ++ if (intel->back_buffer) + return TRUE; +- } + +- if (intel->back_buffer == NULL) { +- new_back = drm_intel_bo_alloc(intel->bufmgr, "front buffer", +- intel->front_buffer->size, 0); +- if (new_back == NULL) +- return FALSE; +- +- if (intel->front_tiling != I915_TILING_NONE) { +- uint32_t tiling = intel->front_tiling; +- drm_intel_bo_set_tiling(new_back, &tiling, intel->front_pitch); +- if (tiling != intel->front_tiling) { +- drm_intel_bo_unreference(new_back); +- return FALSE; +- } +- } +- +- drm_intel_bo_disable_reuse(new_back); +- dri_bo_flink(new_back, &intel->back_name); +- } else { +- new_back = intel->back_buffer; +- intel->back_buffer = NULL; +- } ++ bo = intel_allocate_framebuffer(intel->scrn, ++ intel->scrn->virtualX, ++ intel->scrn->virtualY, ++ intel->cpp, ++ &pitch, &tiling); ++ if (bo == NULL) ++ return FALSE; + +- old_back = get_pixmap_bo(priv); +- if (!intel_do_pageflip(intel, old_back, info->pipe, FALSE, info, I830DRI2FlipComplete, I830DRI2FlipAbort)) { +- intel->back_buffer = new_back; ++ if (pitch != intel->front_pitch || tiling != intel->front_tiling) { ++ drm_intel_bo_unreference(bo); + return FALSE; + } +- info->type = DRI2_SWAP_CHAIN; +- intel->pending_flip[info->pipe] = info; +- +- priv = info->front->driverPrivate; +- +- /* Exchange the current front-buffer with the fresh bo */ +- +- intel->back_buffer = intel->front_buffer; +- drm_intel_bo_reference(intel->back_buffer); +- intel_set_pixmap_bo(priv->pixmap, new_back); +- drm_intel_bo_unreference(new_back); +- +- tmp_name = info->front->name; +- info->front->name = intel->back_name; +- intel->back_name = tmp_name; + +- /* Then flip DRI2 pointers and update the screen pixmap */ +- I830DRI2ExchangeBuffers(intel, info->front, info->back); +- DRI2SwapComplete(info->client, draw, 0, 0, 0, +- DRI2_EXCHANGE_COMPLETE, +- info->event_complete, +- info->event_data); ++ intel->back_buffer = bo; + return TRUE; + } + +@@ -889,8 +879,88 @@ can_exchange(DrawablePtr drawable, DRI2BufferPtr front, DRI2BufferPtr back) + return TRUE; + } + +-void I830DRI2FrameEventHandler(unsigned int frame, unsigned int tv_sec, +- unsigned int tv_usec, DRI2FrameEventPtr swap_info) ++static Bool ++queue_flip(struct intel_screen_private *intel, ++ DrawablePtr draw, ++ DRI2FrameEventPtr info) ++{ ++ xf86CrtcPtr crtc = I830DRI2DrawableCrtc(draw); ++ I830DRI2BufferPrivatePtr priv = info->back->driverPrivate; ++ drm_intel_bo *old_back = get_pixmap_bo(priv); ++ ++ if (crtc == NULL) ++ return FALSE; ++ ++ if (!can_exchange(draw, info->front, info->back)) ++ return FALSE; ++ ++ if (!intel_do_pageflip(intel, old_back, ++ intel_crtc_to_pipe(crtc), ++ FALSE, info, ++ I830DRI2FlipComplete, I830DRI2FlipAbort)) ++ return FALSE; ++ ++#if DRI2INFOREC_VERSION >= 6 ++ if (intel->use_triple_buffer && allocate_back_buffer(intel)) { ++ info->old_width = intel->scrn->virtualX; ++ info->old_height = intel->scrn->virtualY; ++ info->old_pitch = intel->front_pitch; ++ info->old_tiling = intel->front_tiling; ++ info->old_buffer = intel->front_buffer; ++ dri_bo_reference(info->old_buffer); ++ ++ priv = info->front->driverPrivate; ++ intel_set_pixmap_bo(priv->pixmap, intel->back_buffer); ++ ++ dri_bo_unreference(intel->back_buffer); ++ intel->back_buffer = NULL; ++ ++ DRI2SwapLimit(draw, 2); ++ } else ++ DRI2SwapLimit(draw, 1); ++#endif ++ ++ /* Then flip DRI2 pointers and update the screen pixmap */ ++ I830DRI2ExchangeBuffers(intel, info->front, info->back); ++ return TRUE; ++} ++ ++static Bool ++queue_swap(struct intel_screen_private *intel, ++ DrawablePtr draw, ++ DRI2FrameEventPtr info) ++{ ++ xf86CrtcPtr crtc = I830DRI2DrawableCrtc(draw); ++ drmVBlank vbl; ++ ++ if (crtc == NULL) ++ return FALSE; ++ ++ vbl.request.type = ++ DRM_VBLANK_RELATIVE | ++ DRM_VBLANK_EVENT | ++ pipe_select(intel_crtc_to_pipe(crtc)); ++ vbl.request.sequence = 1; ++ vbl.request.signal = ++ intel_drm_queue_alloc(intel->scrn, crtc, info, ++ intel_dri2_vblank_handler, ++ intel_dri2_vblank_abort); ++ if (vbl.request.signal == 0) ++ return FALSE; ++ ++ info->type = DRI2_SWAP; ++ if (drmWaitVBlank(intel->drmSubFD, &vbl)) { ++ intel_drm_abort_seq(intel->scrn, vbl.request.signal); ++ return FALSE; ++ } ++ ++ return TRUE; ++} ++ ++static void I830DRI2FrameEventHandler(unsigned int frame, ++ unsigned int tv_sec, ++ unsigned int tv_usec, ++ DRI2FrameEventPtr swap_info) + { + intel_screen_private *intel = swap_info->intel; + DrawablePtr drawable; +@@ -906,24 +976,22 @@ void I830DRI2FrameEventHandler(unsigned int frame, unsigned int tv_sec, + return; + } + +- + switch (swap_info->type) { + case DRI2_FLIP: + /* If we can still flip... */ +- if (can_exchange(drawable, swap_info->front, swap_info->back) && +- I830DRI2ScheduleFlip(intel, drawable, swap_info)) +- return; +- +- /* else fall through to exchange/blit */ +- case DRI2_SWAP: { +- I830DRI2FallbackBlitSwap(drawable, +- swap_info->front, swap_info->back); +- DRI2SwapComplete(swap_info->client, drawable, frame, tv_sec, tv_usec, +- DRI2_BLIT_COMPLETE, +- swap_info->client ? swap_info->event_complete : NULL, +- swap_info->event_data); +- break; +- } ++ if (!queue_flip(intel, drawable, swap_info) && ++ !queue_swap(intel, drawable, swap_info)) { ++ case DRI2_SWAP: ++ I830DRI2FallbackBlitSwap(drawable, ++ swap_info->front, swap_info->back); ++ DRI2SwapComplete(swap_info->client, drawable, frame, tv_sec, tv_usec, ++ DRI2_BLIT_COMPLETE, ++ swap_info->client ? swap_info->event_complete : NULL, ++ swap_info->event_data); ++ break; ++ } ++ return; ++ + case DRI2_WAITMSC: + if (swap_info->client) + DRI2WaitMSCComplete(swap_info->client, drawable, +@@ -939,12 +1007,13 @@ void I830DRI2FrameEventHandler(unsigned int frame, unsigned int tv_sec, + i830_dri2_del_frame_event(swap_info); + } + +-void I830DRI2FlipEventHandler(unsigned int frame, unsigned int tv_sec, +- unsigned int tv_usec, DRI2FrameEventPtr flip_info) ++static void I830DRI2FlipEventHandler(unsigned int frame, ++ unsigned int tv_sec, ++ unsigned int tv_usec, ++ DRI2FrameEventPtr flip_info) + { + struct intel_screen_private *intel = flip_info->intel; + DrawablePtr drawable; +- DRI2FrameEventPtr chain; + + drawable = NULL; + if (flip_info->drawable_id) +@@ -954,6 +1023,7 @@ void I830DRI2FlipEventHandler(unsigned int frame, unsigned int tv_sec, + + /* We assume our flips arrive in order, so we don't check the frame */ + switch (flip_info->type) { ++ case DRI2_FLIP: + case DRI2_SWAP: + if (!drawable) + break; +@@ -984,35 +1054,6 @@ void I830DRI2FlipEventHandler(unsigned int frame, unsigned int tv_sec, + flip_info->event_data); + break; + +- case DRI2_SWAP_CHAIN: +- assert(intel->pending_flip[flip_info->pipe] == flip_info); +- intel->pending_flip[flip_info->pipe] = NULL; +- +- chain = flip_info->chain; +- if (chain) { +- DrawablePtr chain_drawable = NULL; +- if (chain->drawable_id) +- dixLookupDrawable(&chain_drawable, +- chain->drawable_id, +- serverClient, +- M_ANY, DixWriteAccess); +- if (chain_drawable == NULL) { +- i830_dri2_del_frame_event(chain); +- } else if (!can_exchange(chain_drawable, chain->front, chain->back) || +- !I830DRI2ScheduleFlip(intel, chain_drawable, chain)) { +- I830DRI2FallbackBlitSwap(chain_drawable, +- chain->front, +- chain->back); +- +- DRI2SwapComplete(chain->client, chain_drawable, frame, tv_sec, tv_usec, +- DRI2_BLIT_COMPLETE, +- chain->client ? chain->event_complete : NULL, +- chain->event_data); +- i830_dri2_del_frame_event(chain); +- } +- } +- break; +- + default: + xf86DrvMsg(intel->scrn->scrnIndex, X_WARNING, + "%s: unknown vblank event received\n", __func__); +@@ -1023,38 +1064,6 @@ void I830DRI2FlipEventHandler(unsigned int frame, unsigned int tv_sec, + i830_dri2_del_frame_event(flip_info); + } + +-static uint32_t pipe_select(int pipe) +-{ +- if (pipe > 1) +- return pipe << DRM_VBLANK_HIGH_CRTC_SHIFT; +- else if (pipe > 0) +- return DRM_VBLANK_SECONDARY; +- else +- return 0; +-} +- +-static void +-intel_dri2_vblank_handler(ScrnInfoPtr scrn, +- xf86CrtcPtr crtc, +- uint64_t msc, +- uint64_t usec, +- void *data) +-{ +- DRI2FrameEventPtr swap_info = data; +- +- I830DRI2FrameEventHandler((uint32_t) msc, usec / 1000000, usec % 1000000, swap_info); +-} +- +-static void +-intel_dri2_vblank_abort(ScrnInfoPtr scrn, +- xf86CrtcPtr crtc, +- void *data) +-{ +- DRI2FrameEventPtr swap_info = data; +- +- i830_dri2_del_frame_event(swap_info); +-} +- + /* + * ScheduleSwap is responsible for requesting a DRM vblank event for the + * appropriate frame. +@@ -1089,7 +1098,6 @@ I830DRI2ScheduleSwap(ClientPtr client, DrawablePtr draw, DRI2BufferPtr front, + int pipe = crtc ? intel_crtc_to_pipe(crtc) : -1; + int flip = 0; + DRI2FrameEventPtr swap_info = NULL; +- enum DRI2FrameEventType swap_type = DRI2_SWAP; + uint64_t current_msc, current_ust; + uint64_t request_msc; + uint32_t seq; +@@ -1109,7 +1117,7 @@ I830DRI2ScheduleSwap(ClientPtr client, DrawablePtr draw, DRI2BufferPtr front, + swap_info->event_data = data; + swap_info->front = front; + swap_info->back = back; +- swap_info->pipe = pipe; ++ swap_info->type = DRI2_SWAP; + + if (!i830_dri2_add_frame_event(swap_info)) { + free(swap_info); +@@ -1124,20 +1132,27 @@ I830DRI2ScheduleSwap(ClientPtr client, DrawablePtr draw, DRI2BufferPtr front, + if (ret) + goto blit_fallback; + +- /* Flips need to be submitted one frame before */ ++ /* ++ * If we can, schedule the flip directly from here rather ++ * than waiting for an event from the kernel for the current ++ * (or a past) MSC. ++ */ ++ if (divisor == 0 && ++ current_msc >= *target_msc && ++ queue_flip(intel, draw, swap_info)) ++ return TRUE; ++ + if (can_exchange(draw, front, back)) { +- swap_type = DRI2_FLIP; +- flip = 1; ++ swap_info->type = DRI2_FLIP; ++ /* Flips need to be submitted one frame before */ ++ if (*target_msc > 0) ++ --*target_msc; ++ flip = 1; + } + +- swap_info->type = swap_type; +- +- /* Correct target_msc by 'flip' if swap_type == DRI2_FLIP. +- * Do it early, so handling of different timing constraints +- * for divisor, remainder and msc vs. target_msc works. +- */ +- if (*target_msc > 0) +- *target_msc -= flip; ++#if DRI2INFOREC_VERSION >= 6 ++ DRI2SwapLimit(draw, 1); ++#endif + + /* + * If divisor is zero, or current_msc is smaller than target_msc +@@ -1145,15 +1160,6 @@ I830DRI2ScheduleSwap(ClientPtr client, DrawablePtr draw, DRI2BufferPtr front, + * the swap. + */ + if (divisor == 0 || current_msc < *target_msc) { +- /* +- * If we can, schedule the flip directly from here rather +- * than waiting for an event from the kernel for the current +- * (or a past) MSC. +- */ +- if (flip && divisor == 0 && current_msc >= *target_msc && +- I830DRI2ScheduleFlip(intel, draw, swap_info)) +- return TRUE; +- + vbl.request.type = + DRM_VBLANK_ABSOLUTE | DRM_VBLANK_EVENT | pipe_select(pipe); + +@@ -1168,7 +1174,7 @@ I830DRI2ScheduleSwap(ClientPtr client, DrawablePtr draw, DRI2BufferPtr front, + * current_msc to ensure we return a reasonable value back + * to the caller. This makes swap_interval logic more robust. + */ +- if (current_msc >= *target_msc) ++ if (current_msc > *target_msc) + *target_msc = current_msc; + + seq = intel_drm_queue_alloc(scrn, crtc, swap_info, intel_dri2_vblank_handler, intel_dri2_vblank_abort); +@@ -1183,6 +1189,8 @@ I830DRI2ScheduleSwap(ClientPtr client, DrawablePtr draw, DRI2BufferPtr front, + xf86DrvMsg(scrn->scrnIndex, X_WARNING, + "divisor 0 get vblank counter failed: %s\n", + strerror(errno)); ++ intel_drm_abort_seq(intel->scrn, seq); ++ swap_info = NULL; + goto blit_fallback; + } + +@@ -1332,7 +1340,6 @@ I830DRI2ScheduleWaitMSC(ClientPtr client, DrawablePtr draw, CARD64 target_msc, + + if (!i830_dri2_add_frame_event(wait_info)) { + free(wait_info); +- wait_info = NULL; + goto out_complete; + } + +@@ -1374,7 +1381,8 @@ I830DRI2ScheduleWaitMSC(ClientPtr client, DrawablePtr draw, CARD64 target_msc, + strerror(errno)); + limit--; + } +- goto out_free; ++ intel_drm_abort_seq(intel->scrn, seq); ++ goto out_complete; + } + + wait_info->frame = intel_sequence_to_crtc_msc(crtc, vbl.reply.sequence); +@@ -1417,7 +1425,8 @@ I830DRI2ScheduleWaitMSC(ClientPtr client, DrawablePtr draw, CARD64 target_msc, + strerror(errno)); + limit--; + } +- goto out_free; ++ intel_drm_abort_seq(intel->scrn, seq); ++ goto out_complete; + } + + wait_info->frame = intel_sequence_to_crtc_msc(crtc, vbl.reply.sequence); +@@ -1440,13 +1449,92 @@ static int has_i830_dri(void) + return access(DRI_DRIVER_PATH "/i830_dri.so", R_OK) == 0; + } + +-static const char *dri_driver_name(intel_screen_private *intel) ++static int ++namecmp(const char *s1, const char *s2) ++{ ++ char c1, c2; ++ ++ if (!s1 || *s1 == 0) { ++ if (!s2 || *s2 == 0) ++ return 0; ++ else ++ return 1; ++ } ++ ++ while (*s1 == '_' || *s1 == ' ' || *s1 == '\t') ++ s1++; ++ ++ while (*s2 == '_' || *s2 == ' ' || *s2 == '\t') ++ s2++; ++ ++ c1 = isupper(*s1) ? tolower(*s1) : *s1; ++ c2 = isupper(*s2) ? tolower(*s2) : *s2; ++ while (c1 == c2) { ++ if (c1 == '\0') ++ return 0; ++ ++ s1++; ++ while (*s1 == '_' || *s1 == ' ' || *s1 == '\t') ++ s1++; ++ ++ s2++; ++ while (*s2 == '_' || *s2 == ' ' || *s2 == '\t') ++ s2++; ++ ++ c1 = isupper(*s1) ? tolower(*s1) : *s1; ++ c2 = isupper(*s2) ? tolower(*s2) : *s2; ++ } ++ ++ return c1 - c2; ++} ++ ++static Bool is_level(const char **str) ++{ ++ const char *s = *str; ++ char *end; ++ unsigned val; ++ ++ if (s == NULL || *s == '\0') ++ return TRUE; ++ ++ if (namecmp(s, "on") == 0) ++ return TRUE; ++ if (namecmp(s, "true") == 0) ++ return TRUE; ++ if (namecmp(s, "yes") == 0) ++ return TRUE; ++ ++ if (namecmp(s, "0") == 0) ++ return TRUE; ++ if (namecmp(s, "off") == 0) ++ return TRUE; ++ if (namecmp(s, "false") == 0) ++ return TRUE; ++ if (namecmp(s, "no") == 0) ++ return TRUE; ++ ++ val = strtoul(s, &end, 0); ++ if (val && *end == '\0') ++ return TRUE; ++ if (val && *end == ':') ++ *str = end + 1; ++ return FALSE; ++} ++ ++static const char *options_get_dri(intel_screen_private *intel) + { + #if XORG_VERSION_CURRENT >= XORG_VERSION_NUMERIC(1,7,99,901,0) +- const char *s = xf86GetOptValString(intel->Options, OPTION_DRI); +- Bool dummy; ++ return xf86GetOptValString(intel->Options, OPTION_DRI); ++#else ++ return NULL; ++#endif ++} + +- if (s == NULL || xf86getBoolValue(&dummy, s)) { ++static const char *dri_driver_name(intel_screen_private *intel) ++{ ++ const char *s = options_get_dri(intel); ++ ++ if (is_level(&s)) { + if (INTEL_INFO(intel)->gen < 030) + return has_i830_dri() ? "i830" : "i915"; + else if (INTEL_INFO(intel)->gen < 040) +@@ -1456,14 +1544,6 @@ static const char *dri_driver_name(intel_screen_private *intel) + } + + return s; +-#else +- if (INTEL_INFO(intel)->gen < 030) +- return has_i830_dri() ? "i830" : "i915"; +- else if (INTEL_INFO(intel)->gen < 040) +- return "i915"; +- else +- return "i965"; +-#endif + } + + Bool I830DRI2ScreenInit(ScreenPtr screen) +@@ -1544,7 +1624,7 @@ Bool I830DRI2ScreenInit(ScreenPtr screen) + info.numDrivers = 2; + info.driverNames = driverNames; + driverNames[0] = info.driverName; +- driverNames[1] = info.driverName; ++ driverNames[1] = "va_gl"; + #endif + + return DRI2ScreenInit(screen, &info); +diff --git a/src/uxa/intel_driver.c b/src/uxa/intel_driver.c +index 2793da5d..3703c412 100644 +--- a/src/uxa/intel_driver.c ++++ b/src/uxa/intel_driver.c +@@ -237,24 +237,17 @@ static Bool I830GetEarlyOptions(ScrnInfoPtr scrn) + return TRUE; + } + +-static Bool intel_option_cast_string_to_bool(intel_screen_private *intel, +- int id, Bool val) +-{ +-#if XORG_VERSION_CURRENT >= XORG_VERSION_NUMERIC(1,7,99,901,0) +- xf86getBoolValue(&val, xf86GetOptValString(intel->Options, id)); +- return val; +-#else +- return val; +-#endif +-} +- + static void intel_check_dri_option(ScrnInfoPtr scrn) + { + intel_screen_private *intel = intel_get_screen_private(scrn); ++ unsigned level; + + intel->dri2 = intel->dri3 = DRI_NONE; +- if (!intel_option_cast_string_to_bool(intel, OPTION_DRI, TRUE)) +- intel->dri2 = intel->dri3 = DRI_DISABLED; ++ level = intel_option_cast_to_unsigned(intel->Options, OPTION_DRI, DEFAULT_DRI_LEVEL); ++ if (level < 3 || INTEL_INFO(intel)->gen < 040) ++ intel->dri3 = DRI_DISABLED; ++ if (level < 2) ++ intel->dri2 = DRI_DISABLED; + + if (scrn->depth != 16 && scrn->depth != 24 && scrn->depth != 30) { + xf86DrvMsg(scrn->scrnIndex, X_CONFIG, +@@ -371,8 +364,8 @@ static Bool can_accelerate_blt(struct intel_screen_private *intel) + if (INTEL_INFO(intel)->gen == -1) + return FALSE; + +- if (xf86ReturnOptValBool(intel->Options, OPTION_ACCEL_DISABLE, FALSE) || +- !intel_option_cast_string_to_bool(intel, OPTION_ACCEL_METHOD, TRUE)) { ++ if (!xf86ReturnOptValBool(intel->Options, OPTION_ACCEL_ENABLE, TRUE) || ++ !intel_option_cast_to_bool(intel->Options, OPTION_ACCEL_METHOD, TRUE)) { + xf86DrvMsg(intel->scrn->scrnIndex, X_CONFIG, + "Disabling hardware acceleration.\n"); + return FALSE; +@@ -659,8 +652,9 @@ redisplay_dirty(ScreenPtr screen, PixmapDirtyUpdatePtr dirty) + } + + static void +-intel_dirty_update(ScreenPtr screen) ++intel_dirty_update(intel_screen_private *intel) + { ++ ScreenPtr screen = xf86ScrnToScreen(intel->scrn); + RegionPtr region; + PixmapDirtyUpdatePtr ent; + +@@ -677,6 +671,7 @@ intel_dirty_update(ScreenPtr screen) + } + #endif + ++#if !HAVE_NOTIFY_FD + static void + I830BlockHandler(BLOCKHANDLER_ARGS_DECL) + { +@@ -694,9 +689,22 @@ I830BlockHandler(BLOCKHANDLER_ARGS_DECL) + intel_uxa_block_handler(intel); + intel_video_block_handler(intel); + #ifdef INTEL_PIXMAP_SHARING +- intel_dirty_update(screen); ++ intel_dirty_update(intel); + #endif + } ++#else ++static void ++I830BlockHandler(void *data, void *timeout) ++{ ++ intel_screen_private *intel = data; ++ ++ intel_uxa_block_handler(intel); ++ intel_video_block_handler(intel); ++#ifdef INTEL_PIXMAP_SHARING ++ intel_dirty_update(intel); ++#endif ++} ++#endif + + static Bool + intel_init_initial_framebuffer(ScrnInfoPtr scrn) +@@ -735,6 +743,8 @@ intel_flush_callback(CallbackListPtr *list, + } + + #if HAVE_UDEV ++#include <sys/stat.h> ++ + static void + I830HandleUEvents(int fd, void *closure) + { +@@ -771,6 +781,15 @@ I830HandleUEvents(int fd, void *closure) + udev_device_unref(dev); + } + ++static int has_randr(void) ++{ ++#if HAS_DIXREGISTERPRIVATEKEY ++ return dixPrivateKeyRegistered(rrPrivKey); ++#else ++ return *rrPrivKey; ++#endif ++} ++ + static void + I830UeventInit(ScrnInfoPtr scrn) + { +@@ -780,6 +799,10 @@ I830UeventInit(ScrnInfoPtr scrn) + Bool hotplug; + MessageType from = X_CONFIG; + ++ /* Without RR, nothing we can do here */ ++ if (!has_randr()) ++ return; ++ + if (!xf86GetOptValBool(intel->Options, OPTION_HOTPLUG, &hotplug)) { + from = X_DEFAULT; + hotplug = TRUE; +@@ -939,8 +962,14 @@ I830ScreenInit(SCREEN_INIT_ARGS_DECL) + "Hardware cursor initialization failed\n"); + } + ++#if !HAVE_NOTIFY_FD + intel->BlockHandler = screen->BlockHandler; + screen->BlockHandler = I830BlockHandler; ++#else ++ RegisterBlockAndWakeupHandlers(I830BlockHandler, ++ (ServerWakeupHandlerProcPtr)NoopDDA, ++ intel); ++#endif + + #ifdef INTEL_PIXMAP_SHARING + screen->StartPixmapTracking = PixmapStartDirtyTracking; +@@ -1164,8 +1193,6 @@ static Bool I830CloseScreen(CLOSE_SCREEN_ARGS_DECL) + + intel_sync_close(screen); + +- xf86GARTCloseScreen(scrn->scrnIndex); +- + scrn->vtSema = FALSE; + return TRUE; + } +diff --git a/src/uxa/intel_hwmc.c b/src/uxa/intel_hwmc.c +index 829cb8e0..78540600 100644 +--- a/src/uxa/intel_hwmc.c ++++ b/src/uxa/intel_hwmc.c +@@ -193,7 +193,7 @@ Bool intel_xvmc_adaptor_init(ScreenPtr pScreen) + intel_screen_private *intel = intel_get_screen_private(scrn); + struct pci_device *pci; + static XF86MCAdaptorRec *pAdapt; +- char *name; ++ const char *name; + char buf[64]; + + if (!intel->XvMCEnabled) +diff --git a/src/uxa/intel_memory.c b/src/uxa/intel_memory.c +index 0c6cf30c..b2d7a367 100644 +--- a/src/uxa/intel_memory.c ++++ b/src/uxa/intel_memory.c +@@ -42,7 +42,7 @@ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * This is the video memory allocator. Our memory allocation is different from + * other graphics chips, where you have a fixed amount of graphics memory + * available that you want to put to the best use. Instead, we have almost no +- * memory pre-allocated, and we have to choose an appropriate amount of sytem ++ * memory pre-allocated, and we have to choose an appropriate amount of system + * memory to use. + * + * The allocations we might do: +diff --git a/src/uxa/intel_present.c b/src/uxa/intel_present.c +index d20043f3..ac028edd 100644 +--- a/src/uxa/intel_present.c ++++ b/src/uxa/intel_present.c +@@ -244,6 +244,7 @@ intel_present_check_flip(RRCrtcPtr crtc, + ScrnInfoPtr scrn = xf86ScreenToScrn(screen); + intel_screen_private *intel = intel_get_screen_private(scrn); + dri_bo *bo; ++ uint32_t tiling, swizzle; + + if (!scrn->vtSema) + return FALSE; +@@ -266,6 +267,12 @@ intel_present_check_flip(RRCrtcPtr crtc, + if (!bo) + return FALSE; + ++ if (drm_intel_bo_get_tiling(bo, &tiling, &swizzle)) ++ return FALSE; ++ ++ if (tiling == I915_TILING_Y) ++ return FALSE; ++ + return TRUE; + } + +@@ -343,29 +350,33 @@ intel_present_unflip(ScreenPtr screen, uint64_t event_id) + { + ScrnInfoPtr scrn = xf86ScreenToScrn(screen); + intel_screen_private *intel = intel_get_screen_private(scrn); +- struct intel_present_vblank_event *event; + PixmapPtr pixmap = screen->GetScreenPixmap(screen); ++ struct intel_present_vblank_event *event = NULL; + dri_bo *bo; +- Bool ret; + + if (!intel_present_check_flip(NULL, screen->root, pixmap, true)) +- return; ++ goto fail; + + bo = intel_get_pixmap_bo(pixmap); + if (!bo) +- return; ++ goto fail; + + event = calloc(1, sizeof(struct intel_present_vblank_event)); + if (!event) +- return; ++ goto fail; + + event->event_id = event_id; + +- ret = intel_do_pageflip(intel, bo, -1, FALSE, event, intel_present_flip_event, intel_present_flip_abort); +- if (!ret) { +- xf86DrvMsg(scrn->scrnIndex, X_ERROR, +- "present unflip failed\n"); +- } ++ if (!intel_do_pageflip(intel, bo, -1, FALSE, event, ++ intel_present_flip_event, ++ intel_present_flip_abort)) ++ goto fail; ++ ++ return; ++fail: ++ xf86SetDesiredModes(scrn); ++ present_event_notify(event_id, 0, 0); ++ free(event); + } + + static present_screen_info_rec intel_present_screen_info = { +diff --git a/src/uxa/intel_uxa.c b/src/uxa/intel_uxa.c +index 590ff5d1..ec32a723 100644 +--- a/src/uxa/intel_uxa.c ++++ b/src/uxa/intel_uxa.c +@@ -176,6 +176,24 @@ intel_uxa_check_solid(DrawablePtr drawable, int alu, Pixel planemask) + return TRUE; + } + ++static Bool ++intel_uxa_check_bo_tiling(intel_screen_private *intel, ++ PixmapPtr pixmap, ++ unsigned *tiling_out) ++{ ++ struct intel_uxa_pixmap *priv; ++ ++ priv = intel_uxa_get_pixmap_private(pixmap); ++ if (!priv) ++ return FALSE; ++ ++ if (priv->tiling == I915_TILING_Y && INTEL_INFO(intel)->gen < 060) ++ return FALSE; ++ ++ *tiling_out = priv->tiling; ++ return TRUE; ++} ++ + /** + * Sets up hardware state for a series of solid fills. + */ +@@ -189,6 +207,9 @@ intel_uxa_prepare_solid(PixmapPtr pixmap, int alu, Pixel planemask, Pixel fg) + intel_uxa_get_pixmap_bo(pixmap), + }; + ++ if (!intel_uxa_check_bo_tiling(intel, pixmap, &intel->BR_tiling[0])) ++ return FALSE; ++ + if (!intel_uxa_check_pitch_2d(pixmap)) + return FALSE; + +@@ -236,7 +257,7 @@ static void intel_uxa_solid(PixmapPtr pixmap, int x1, int y1, int x2, int y2) + + { + int len = INTEL_INFO(intel)->gen >= 0100 ? 7 : 6; +- BEGIN_BATCH_BLT(len); ++ BEGIN_BATCH_BLT_TILED(len); + + cmd = XY_COLOR_BLT_CMD | (len - 2); + +@@ -310,6 +331,10 @@ intel_uxa_prepare_copy(PixmapPtr source, PixmapPtr dest, int xdir, + intel_uxa_get_pixmap_bo(dest), + }; + ++ if (!intel_uxa_check_bo_tiling(intel, dest, &intel->BR_tiling[0]) || ++ !intel_uxa_check_bo_tiling(intel, source, &intel->BR_tiling[1])) ++ return FALSE; ++ + if (!intel_uxa_get_aperture_space(scrn, bo_table, ARRAY_SIZE(bo_table))) + return FALSE; + +@@ -375,7 +400,7 @@ intel_uxa_copy(PixmapPtr dest, int src_x1, int src_y1, int dst_x1, + + { + int len = INTEL_INFO(intel)->gen >= 0100 ? 10 : 8; +- BEGIN_BATCH_BLT(len); ++ BEGIN_BATCH_BLT_TILED(len); + + cmd = XY_SRC_COPY_BLT_CMD | (len - 2); + +@@ -1068,7 +1093,7 @@ Bool intel_uxa_create_screen_resources(ScreenPtr screen) + ScrnInfoPtr scrn = xf86ScreenToScrn(screen); + PixmapPtr pixmap; + intel_screen_private *intel = intel_get_screen_private(scrn); +- dri_bo *bo = intel->front_buffer; ++ dri_bo *bo = intel->front_buffer, *old_bo; + int old_width, old_height, old_pitch; + + if (!uxa_resources_init(screen)) +@@ -1081,6 +1106,7 @@ Bool intel_uxa_create_screen_resources(ScreenPtr screen) + old_width = pixmap->drawable.width; + old_height = pixmap->drawable.height; + old_pitch = pixmap->devKind; ++ old_bo = intel_uxa_get_pixmap_bo(pixmap); + + if (!screen->ModifyPixmapHeader(pixmap, + scrn->virtualX, +@@ -1102,6 +1128,9 @@ Bool intel_uxa_create_screen_resources(ScreenPtr screen) + err: + screen->ModifyPixmapHeader(pixmap, + old_width, old_height, -1, -1, old_pitch, NULL); ++ if (old_bo) ++ intel_uxa_set_pixmap_bo(pixmap, old_bo); ++ + return FALSE; + } + +diff --git a/test/Makefile.am b/test/Makefile.am +index 66ed8ebb..12b5d5d8 100644 +--- a/test/Makefile.am ++++ b/test/Makefile.am +@@ -5,6 +5,7 @@ stress_TESTS = \ + basic-rectangle \ + basic-string \ + basic-copyarea \ ++ basic-copyplane \ + basic-copyarea-size \ + basic-putimage \ + basic-lines \ +@@ -12,8 +13,10 @@ stress_TESTS = \ + DrawSegments \ + cursor-test \ + render-fill \ ++ render-glyphs \ + render-trapezoid \ + render-trapezoid-image \ ++ render-triangle \ + render-fill-copy \ + render-composite-solid \ + render-composite-solid-mask \ +@@ -25,9 +28,16 @@ stress_TESTS = \ + shm-test \ + $(NULL) + ++if X11_VM ++stress_TESTS += \ ++ xvidmode \ ++ $(NULL) ++endif ++ + if DRI2 + stress_TESTS += \ + dri2-race \ ++ dri2-speed \ + dri2-swap \ + dri2-test \ + $(NULL) +@@ -36,8 +46,11 @@ endif + if X11_DRI3 + stress_TESTS += \ + dri3-test \ ++ present-race \ ++ present-speed \ + present-test \ + $(NULL) ++present_speed_CFLAGS = ${AM_CFLAGS} -pthread + endif + check_PROGRAMS = $(stress_TESTS) + +diff --git a/test/basic-copyplane.c b/test/basic-copyplane.c +new file mode 100644 +index 00000000..f049b82b +--- /dev/null ++++ b/test/basic-copyplane.c +@@ -0,0 +1,99 @@ ++#include <stdint.h> ++#include <stdio.h> ++#include <stdlib.h> ++ ++#include <X11/Xutil.h> /* for XDestroyImage */ ++#include <pixman.h> /* for pixman blt functions */ ++ ++#include "test.h" ++ ++static uint8_t clock_bits[] = {0x3C, 0x5E, 0xEF, 0xF7, 0x87, 0xFF, 0x7E, 0x3C}; ++ ++/* https://bugs.freedesktop.org/show_bug.cgi?id=91499 */ ++static void draw_clock(struct test_display *t, Drawable d, ++ uint8_t alu, int x, int y, uint32_t fg, uint32_t bg) ++{ ++ Pixmap pixmap; ++ XGCValues val; ++ GC gc; ++ ++ val.graphics_exposures = 0; ++ val.function = alu; ++ val.foreground = fg; ++ val.background = fg; ++ ++ gc = XCreateGC(t->dpy, d, ++ GCGraphicsExposures | GCForeground | GCBackground | GCFunction, ++ &val); ++ pixmap = XCreateBitmapFromData(t->dpy, d, (char *)clock_bits, 8, 8); ++ ++ XCopyPlane(t->dpy, pixmap, d, gc, 0, 0, 8, 8, x, y, 1); ++ ++ XFreePixmap(t->dpy, pixmap); ++ XFreeGC(t->dpy, gc); ++} ++ ++static void clear(struct test_display *dpy, struct test_target *tt) ++{ ++ XRenderColor render_color = {0}; ++ XRenderFillRectangle(dpy->dpy, PictOpClear, tt->picture, &render_color, ++ 0, 0, tt->width, tt->height); ++} ++ ++static void clock_tests(struct test *t, int reps, int sets, enum target target) ++{ ++ struct test_target out, ref; ++ int r, s; ++ ++ printf("Testing clock (%s): ", test_target_name(target)); ++ fflush(stdout); ++ ++ test_target_create_render(&t->out, target, &out); ++ clear(&t->out, &out); ++ ++ test_target_create_render(&t->ref, target, &ref); ++ clear(&t->ref, &ref); ++ ++ for (s = 0; s < sets; s++) { ++ for (r = 0; r < reps; r++) { ++ int x = rand() % (out.width - 8); ++ int y = rand() % (out.height - 8); ++ uint8_t alu = rand() % (GXset + 1); ++ uint32_t bg = rand(); ++ uint32_t fg = rand(); ++ ++ draw_clock(&t->out, out.draw, alu, x, y, fg, bg); ++ draw_clock(&t->ref, ref.draw, alu, x, y, fg, bg); ++ } ++ ++ test_compare(t, ++ out.draw, out.format, ++ ref.draw, ref.format, ++ 0, 0, out.width, out.height, ++ ""); ++ } ++ ++ printf("passed [%d iterations x %d]\n", reps, sets); ++ ++ test_target_destroy_render(&t->out, &out); ++ test_target_destroy_render(&t->ref, &ref); ++} ++ ++int main(int argc, char **argv) ++{ ++ struct test test; ++ int i; ++ ++ test_init(&test, argc, argv); ++ ++ for (i = 0; i <= DEFAULT_ITERATIONS; i++) { ++ int reps = REPS(i), sets = SETS(i); ++ enum target t; ++ ++ for (t = TARGET_FIRST; t <= TARGET_LAST; t++) { ++ clock_tests(&test, reps, sets, t); ++ } ++ } ++ ++ return 0; ++} +diff --git a/test/dri2-race.c b/test/dri2-race.c +index 8862c84c..ece624f6 100644 +--- a/test/dri2-race.c ++++ b/test/dri2-race.c +@@ -5,6 +5,11 @@ + #include <X11/Xlib.h> + #include <X11/Xutil.h> + #include <X11/extensions/Xfixes.h> ++#include <X11/extensions/Xcomposite.h> ++#include <X11/Xlib-xcb.h> ++#include <xcb/xcb.h> ++#include <xcb/xcbext.h> ++#include <xcb/dri2.h> + #include <unistd.h> + #include <fcntl.h> + #include <string.h> +@@ -12,11 +17,49 @@ + + #include <xf86drm.h> + #include <drm.h> ++#include <setjmp.h> + + #include "dri2.h" + + #define COUNT 60 + ++#define N_DIVISORS 3 ++static const int divisors[N_DIVISORS] = { 0, 1, 16 }; ++ ++static jmp_buf error_handler[4]; ++static int have_error_handler; ++ ++#define error_get() \ ++ setjmp(error_handler[have_error_handler++]) ++ ++#define error_put() \ ++ have_error_handler-- ++ ++static int (*saved_io_error)(Display *dpy); ++ ++static int io_error(Display *dpy) ++{ ++ if (have_error_handler) ++ longjmp(error_handler[--have_error_handler], 0); ++ ++ return saved_io_error(dpy); ++} ++ ++static int x_error(Display *dpy, XErrorEvent *e) ++{ ++ return Success; ++} ++ ++static uint32_t upper_32_bits(uint64_t val) ++{ ++ return val >> 32; ++} ++ ++static uint32_t lower_32_bits(uint64_t val) ++{ ++ return val & 0xffffffff; ++} ++ + static int dri2_open(Display *dpy) + { + drm_auth_t auth; +@@ -41,45 +84,701 @@ static int dri2_open(Display *dpy) + return fd; + } + +-static void run(Display *dpy, int width, int height, +- unsigned int *attachments, int nattachments, +- const char *name) ++static void swap_buffers(Display *dpy, Window win, int divisor, ++ unsigned int *attachments, int nattachments) ++{ ++ xcb_connection_t *c = XGetXCBConnection(dpy); ++ unsigned int seq[2]; ++ ++ seq[0] = xcb_dri2_swap_buffers_unchecked(c, win, ++ 0, 0, 0, divisor, 0, 0).sequence; ++ ++ ++ seq[1] = xcb_dri2_get_buffers_unchecked(c, win, ++ nattachments, nattachments, ++ attachments).sequence; ++ ++ xcb_flush(c); ++ xcb_discard_reply(c, seq[0]); ++ xcb_discard_reply(c, seq[1]); ++} ++ ++#define COMPOSITE 1 ++ ++static int has_composite(Display *dpy) ++{ ++ Display *dummy = NULL; ++ int event, error; ++ int major = -1, minor = -1; ++ ++ if (dpy == NULL) ++ dummy = dpy = XOpenDisplay(NULL); ++ ++ if (XCompositeQueryExtension(dpy, &event, &error)) ++ XCompositeQueryVersion(dpy, &major, &minor); ++ ++ if (dummy) ++ XCloseDisplay(dummy); ++ ++ return major > 0 || minor >= 4; ++} ++ ++static void race_window(Display *dpy, int width, int height, ++ unsigned int *attachments, int nattachments, ++ unsigned flags, const char *name) + { + Window win; + XSetWindowAttributes attr; +- int count, loop; ++ int count, loop, n; + DRI2Buffer *buffers; + ++ if (flags & COMPOSITE && !has_composite(dpy)) ++ return; ++ ++ printf("%s(%s)\n", __func__, name); ++ + /* Be nasty and install a fullscreen window on top so that we + * can guarantee we do not get clipped by children. + */ + attr.override_redirect = 1; +- loop = 100; +- do { ++ for (n = 0; n < N_DIVISORS; n++) { ++ loop = 256 >> ffs(divisors[n]); ++ printf("DRI2SwapBuffers(divisor=%d), loop=%d", divisors[n], loop); ++ fflush(stdout); ++ do { ++ win = XCreateWindow(dpy, DefaultRootWindow(dpy), ++ 0, 0, width, height, 0, ++ DefaultDepth(dpy, DefaultScreen(dpy)), ++ InputOutput, ++ DefaultVisual(dpy, DefaultScreen(dpy)), ++ CWOverrideRedirect, &attr); ++ if (flags & COMPOSITE) ++ XCompositeRedirectWindow(dpy, win, CompositeRedirectManual); ++ XMapWindow(dpy, win); ++ ++ DRI2CreateDrawable(dpy, win); ++ ++ buffers = DRI2GetBuffers(dpy, win, &width, &height, ++ attachments, nattachments, &count); ++ if (count != nattachments) ++ return; ++ ++ free(buffers); ++ for (count = 0; count < loop; count++) ++ DRI2SwapBuffers(dpy, win, 0, divisors[n], count & (divisors[n]-1)); ++ XDestroyWindow(dpy, win); ++ printf("."); fflush(stdout); ++ } while (--loop); ++ printf("*\n"); ++ } ++ ++ for (n = 0; n < N_DIVISORS; n++) { ++ loop = 256 >> ffs(divisors[n]); ++ printf("xcb_dri2_swap_buffers(divisor=%d), loops=%d", divisors[n], loop); ++ fflush(stdout); ++ do { ++ win = XCreateWindow(dpy, DefaultRootWindow(dpy), ++ 0, 0, width, height, 0, ++ DefaultDepth(dpy, DefaultScreen(dpy)), ++ InputOutput, ++ DefaultVisual(dpy, DefaultScreen(dpy)), ++ CWOverrideRedirect, &attr); ++ if (flags & COMPOSITE) ++ XCompositeRedirectWindow(dpy, win, CompositeRedirectManual); ++ XMapWindow(dpy, win); ++ ++ DRI2CreateDrawable(dpy, win); ++ ++ buffers = DRI2GetBuffers(dpy, win, &width, &height, ++ attachments, nattachments, &count); ++ if (count != nattachments) ++ return; ++ ++ free(buffers); ++ for (count = 0; count < loop; count++) ++ swap_buffers(dpy, win, divisors[n], attachments, nattachments); ++ XDestroyWindow(dpy, win); ++ printf("."); fflush(stdout); ++ } while (--loop); ++ printf("*\n"); ++ } ++ ++ for (n = 0; n < N_DIVISORS; n++) { ++ loop = 256 >> ffs(divisors[n]); ++ printf("DRI2WaitMsc(divisor=%d), loop=%d", divisors[n], loop); ++ fflush(stdout); ++ do { ++ uint64_t ignore, msc; ++ xcb_connection_t *c = XGetXCBConnection(dpy); ++ ++ win = XCreateWindow(dpy, DefaultRootWindow(dpy), ++ 0, 0, width, height, 0, ++ DefaultDepth(dpy, DefaultScreen(dpy)), ++ InputOutput, ++ DefaultVisual(dpy, DefaultScreen(dpy)), ++ CWOverrideRedirect, &attr); ++ if (flags & COMPOSITE) ++ XCompositeRedirectWindow(dpy, win, CompositeRedirectManual); ++ XMapWindow(dpy, win); ++ ++ DRI2CreateDrawable(dpy, win); ++ DRI2GetMSC(dpy, win, &ignore, &msc, &ignore); ++ msc++; ++ for (count = 0; count < loop; count++) { ++ xcb_discard_reply(c, ++ xcb_dri2_wait_msc(c, win, ++ upper_32_bits(msc), ++ lower_32_bits(msc), ++ 0, 0, 0, 0).sequence); ++ msc += divisors[n]; ++ } ++ XFlush(dpy); ++ XDestroyWindow(dpy, win); ++ printf("."); fflush(stdout); ++ } while (--loop); ++ printf("*\n"); ++ } ++ ++ XSync(dpy, 1); ++ sleep(2); ++ XSync(dpy, 1); ++} ++ ++static int rand_size(int max) ++{ ++ return 1 + (rand() % (max - 1)); ++} ++ ++static void race_resize(Display *dpy, int width, int height, ++ unsigned int *attachments, int nattachments, ++ unsigned flags, const char *name) ++{ ++ Window win; ++ XSetWindowAttributes attr; ++ int count, loop, n; ++ DRI2Buffer *buffers; ++ ++ if (flags & COMPOSITE && !has_composite(dpy)) ++ return; ++ ++ printf("%s(%s)\n", __func__, name); ++ ++ attr.override_redirect = 1; ++ for (n = 0; n < N_DIVISORS; n++) { ++ win = XCreateWindow(dpy, DefaultRootWindow(dpy), ++ 0, 0, width, height, 0, ++ DefaultDepth(dpy, DefaultScreen(dpy)), ++ InputOutput, ++ DefaultVisual(dpy, DefaultScreen(dpy)), ++ CWOverrideRedirect, &attr); ++ if (flags & COMPOSITE) ++ XCompositeRedirectWindow(dpy, win, CompositeRedirectManual); ++ XMapWindow(dpy, win); ++ ++ DRI2CreateDrawable(dpy, win); ++ ++ loop = 256 >> ffs(divisors[n]); ++ printf("DRI2SwapBuffers(divisor=%d), loop=%d", divisors[n], loop); ++ fflush(stdout); ++ do { ++ int w, h; ++ ++ buffers = DRI2GetBuffers(dpy, win, &w, &h, ++ attachments, nattachments, &count); ++ if (count != nattachments) ++ return; ++ ++ free(buffers); ++ for (count = 0; count < loop; count++) ++ DRI2SwapBuffers(dpy, win, 0, divisors[n], count & (divisors[n]-1)); ++ XResizeWindow(dpy, win, rand_size(width), rand_size(height)); ++ printf("."); fflush(stdout); ++ } while (--loop); ++ XDestroyWindow(dpy, win); ++ XSync(dpy, True); ++ printf("*\n"); ++ } ++ ++ for (n = 0; n < N_DIVISORS; n++) { + win = XCreateWindow(dpy, DefaultRootWindow(dpy), + 0, 0, width, height, 0, + DefaultDepth(dpy, DefaultScreen(dpy)), + InputOutput, + DefaultVisual(dpy, DefaultScreen(dpy)), + CWOverrideRedirect, &attr); ++ if (flags & COMPOSITE) ++ XCompositeRedirectWindow(dpy, win, CompositeRedirectManual); + XMapWindow(dpy, win); + + DRI2CreateDrawable(dpy, win); + +- buffers = DRI2GetBuffers(dpy, win, &width, &height, +- attachments, nattachments, &count); +- if (count != nattachments) +- return; ++ loop = 256 >> ffs(divisors[n]); ++ printf("xcb_dri2_swap_buffers(divisor=%d), loops=%d", divisors[n], loop); ++ fflush(stdout); ++ do { ++ int w, h; ++ ++ buffers = DRI2GetBuffers(dpy, win, &w, &h, ++ attachments, nattachments, &count); ++ if (count != nattachments) ++ return; + +- free(buffers); +- for (count = 0; count < loop; count++) +- DRI2SwapBuffers(dpy, win, 0, 0, 0); ++ free(buffers); ++ for (count = 0; count < loop; count++) ++ swap_buffers(dpy, win, divisors[n], attachments, nattachments); ++ XResizeWindow(dpy, win, rand_size(width), rand_size(height)); ++ printf("."); fflush(stdout); ++ } while (--loop); + XDestroyWindow(dpy, win); +- } while (--loop); ++ XSync(dpy, True); ++ printf("*\n"); ++ } ++ ++ for (n = 0; n < N_DIVISORS; n++) { ++ win = XCreateWindow(dpy, DefaultRootWindow(dpy), ++ 0, 0, width, height, 0, ++ DefaultDepth(dpy, DefaultScreen(dpy)), ++ InputOutput, ++ DefaultVisual(dpy, DefaultScreen(dpy)), ++ CWOverrideRedirect, &attr); ++ if (flags & COMPOSITE) ++ XCompositeRedirectWindow(dpy, win, CompositeRedirectManual); ++ XMapWindow(dpy, win); ++ ++ DRI2CreateDrawable(dpy, win); ++ ++ loop = 256 >> ffs(divisors[n]); ++ printf("DRI2WaitMsc(divisor=%d), loop=%d", divisors[n], loop); ++ fflush(stdout); ++ do { ++ uint64_t ignore, msc; ++ xcb_connection_t *c = XGetXCBConnection(dpy); ++ ++ DRI2GetMSC(dpy, win, &ignore, &msc, &ignore); ++ msc++; ++ for (count = 0; count < loop; count++) { ++ xcb_discard_reply(c, ++ xcb_dri2_wait_msc(c, win, ++ upper_32_bits(msc), ++ lower_32_bits(msc), ++ 0, 0, 0, 0).sequence); ++ msc += divisors[n]; ++ } ++ XFlush(dpy); ++ XResizeWindow(dpy, win, rand_size(width), rand_size(height)); ++ printf("."); fflush(stdout); ++ } while (--loop); ++ XDestroyWindow(dpy, win); ++ XSync(dpy, True); ++ printf("*\n"); ++ } ++ ++ XSync(dpy, 1); ++ sleep(2); ++ XSync(dpy, 1); ++} ++ ++static void race_manager(Display *dpy, int width, int height, ++ unsigned int *attachments, int nattachments, ++ unsigned flags, const char *name) ++{ ++ Display *mgr = XOpenDisplay(NULL); ++ Window win; ++ XSetWindowAttributes attr; ++ int count, loop, n; ++ DRI2Buffer *buffers; ++ ++ if (flags & COMPOSITE && !has_composite(dpy)) ++ return; ++ ++ printf("%s(%s)\n", __func__, name); ++ ++ /* Be nasty and install a fullscreen window on top so that we ++ * can guarantee we do not get clipped by children. ++ */ ++ attr.override_redirect = 1; ++ for (n = 0; n < N_DIVISORS; n++) { ++ printf("DRI2SwapBuffers(divisor=%d)", divisors[n]); ++ fflush(stdout); ++ loop = 256 >> ffs(divisors[n]); ++ do { ++ win = XCreateWindow(dpy, DefaultRootWindow(dpy), ++ 0, 0, width, height, 0, ++ DefaultDepth(dpy, DefaultScreen(dpy)), ++ InputOutput, ++ DefaultVisual(dpy, DefaultScreen(dpy)), ++ CWOverrideRedirect, &attr); ++ if (flags & COMPOSITE) ++ XCompositeRedirectWindow(dpy, win, CompositeRedirectManual); ++ XMapWindow(dpy, win); ++ ++ DRI2CreateDrawable(dpy, win); ++ ++ buffers = DRI2GetBuffers(dpy, win, &width, &height, ++ attachments, nattachments, &count); ++ if (count != nattachments) ++ return; ++ ++ free(buffers); ++ for (count = 0; count < loop; count++) ++ DRI2SwapBuffers(dpy, win, 0, divisors[n], count & (divisors[n]-1)); ++ XFlush(dpy); ++ XDestroyWindow(mgr, win); ++ XFlush(mgr); ++ printf("."); fflush(stdout); ++ } while (--loop); ++ printf("*\n"); ++ } ++ ++ for (n = 0; n < N_DIVISORS; n++) { ++ printf("xcb_dri2_swap_buffers(divisor=%d)", divisors[n]); ++ fflush(stdout); ++ loop = 256 >> ffs(divisors[n]); ++ do { ++ win = XCreateWindow(dpy, DefaultRootWindow(dpy), ++ 0, 0, width, height, 0, ++ DefaultDepth(dpy, DefaultScreen(dpy)), ++ InputOutput, ++ DefaultVisual(dpy, DefaultScreen(dpy)), ++ CWOverrideRedirect, &attr); ++ if (flags & COMPOSITE) ++ XCompositeRedirectWindow(dpy, win, CompositeRedirectManual); ++ XMapWindow(dpy, win); ++ ++ DRI2CreateDrawable(dpy, win); ++ ++ buffers = DRI2GetBuffers(dpy, win, &width, &height, ++ attachments, nattachments, &count); ++ if (count != nattachments) ++ return; ++ ++ free(buffers); ++ for (count = 0; count < loop; count++) ++ swap_buffers(dpy, win, divisors[n], attachments, nattachments); ++ XFlush(dpy); ++ XDestroyWindow(mgr, win); ++ XFlush(mgr); ++ printf("."); fflush(stdout); ++ } while (--loop); ++ printf("*\n"); ++ } ++ ++ for (n = 0; n < N_DIVISORS; n++) { ++ printf("DRI2WaitMsc(divisor=%d)", divisors[n]); ++ fflush(stdout); ++ loop = 256 >> ffs(divisors[n]); ++ do { ++ uint64_t ignore, msc; ++ xcb_connection_t *c = XGetXCBConnection(dpy); ++ ++ win = XCreateWindow(dpy, DefaultRootWindow(dpy), ++ 0, 0, width, height, 0, ++ DefaultDepth(dpy, DefaultScreen(dpy)), ++ InputOutput, ++ DefaultVisual(dpy, DefaultScreen(dpy)), ++ CWOverrideRedirect, &attr); ++ if (flags & COMPOSITE) ++ XCompositeRedirectWindow(dpy, win, CompositeRedirectManual); ++ XMapWindow(dpy, win); ++ ++ DRI2CreateDrawable(dpy, win); ++ DRI2GetMSC(dpy, win, &ignore, &msc, &ignore); ++ msc++; ++ for (count = 0; count < loop; count++) { ++ xcb_discard_reply(c, ++ xcb_dri2_wait_msc(c, win, ++ upper_32_bits(msc), ++ lower_32_bits(msc), ++ 0, 0, 0, 0).sequence); ++ msc += divisors[n]; ++ } ++ XFlush(dpy); ++ XDestroyWindow(mgr, win); ++ XFlush(mgr); ++ printf("."); fflush(stdout); ++ } while (--loop); ++ printf("*\n"); ++ } + + XSync(dpy, 1); ++ XSync(mgr, 1); + sleep(2); + XSync(dpy, 1); ++ XSync(mgr, 1); ++ ++ XCloseDisplay(mgr); ++} ++ ++static void race_close(int width, int height, ++ unsigned int *attachments, int nattachments, ++ unsigned flags, const char *name) ++{ ++ XSetWindowAttributes attr; ++ int count, loop, n; ++ ++ if (flags & COMPOSITE && !has_composite(NULL)) ++ return; ++ ++ printf("%s(%s)\n", __func__, name); ++ ++ /* Be nasty and install a fullscreen window on top so that we ++ * can guarantee we do not get clipped by children. ++ */ ++ attr.override_redirect = 1; ++ for (n = 0; n < N_DIVISORS; n++) { ++ printf("DRI2SwapBuffers(divisor=%d)", divisors[n]); ++ fflush(stdout); ++ loop = 256 >> ffs(divisors[n]); ++ do { ++ Display *dpy = XOpenDisplay(NULL); ++ Window win = XCreateWindow(dpy, DefaultRootWindow(dpy), ++ 0, 0, width, height, 0, ++ DefaultDepth(dpy, DefaultScreen(dpy)), ++ InputOutput, ++ DefaultVisual(dpy, DefaultScreen(dpy)), ++ CWOverrideRedirect, &attr); ++ if (flags & COMPOSITE) ++ XCompositeRedirectWindow(dpy, win, CompositeRedirectManual); ++ XMapWindow(dpy, win); ++ ++ DRI2CreateDrawable(dpy, win); ++ free(DRI2GetBuffers(dpy, win, &width, &height, ++ attachments, nattachments, &count)); ++ if (count != nattachments) ++ return; ++ ++ for (count = 0; count < loop; count++) ++ DRI2SwapBuffers(dpy, win, 0, divisors[n], count & (divisors[n]-1)); ++ XCloseDisplay(dpy); ++ printf("."); fflush(stdout); ++ } while (--loop); ++ printf("*\n"); ++ } ++ ++ for (n = 0; n < N_DIVISORS; n++) { ++ printf("xcb_dri2_swap_buffers(divisor=%d)", divisors[n]); ++ fflush(stdout); ++ loop = 256 >> ffs(divisors[n]); ++ do { ++ Display *dpy = XOpenDisplay(NULL); ++ Window win = XCreateWindow(dpy, DefaultRootWindow(dpy), ++ 0, 0, width, height, 0, ++ DefaultDepth(dpy, DefaultScreen(dpy)), ++ InputOutput, ++ DefaultVisual(dpy, DefaultScreen(dpy)), ++ CWOverrideRedirect, &attr); ++ if (flags & COMPOSITE) ++ XCompositeRedirectWindow(dpy, win, CompositeRedirectManual); ++ XMapWindow(dpy, win); ++ ++ DRI2CreateDrawable(dpy, win); ++ free(DRI2GetBuffers(dpy, win, &width, &height, ++ attachments, nattachments, &count)); ++ if (count != nattachments) ++ return; ++ ++ for (count = 0; count < loop; count++) ++ swap_buffers(dpy, win, divisors[n], attachments, nattachments); ++ XCloseDisplay(dpy); ++ printf("."); fflush(stdout); ++ } while (--loop); ++ printf("*\n"); ++ } ++ ++ for (n = 0; n < N_DIVISORS; n++) { ++ printf("DRI2WaitMsc(divisor=%d)", divisors[n]); ++ fflush(stdout); ++ loop = 256 >> ffs(divisors[n]); ++ do { ++ uint64_t ignore, msc; ++ Display *dpy = XOpenDisplay(NULL); ++ xcb_connection_t *c = XGetXCBConnection(dpy); ++ Window win = XCreateWindow(dpy, DefaultRootWindow(dpy), ++ 0, 0, width, height, 0, ++ DefaultDepth(dpy, DefaultScreen(dpy)), ++ InputOutput, ++ DefaultVisual(dpy, DefaultScreen(dpy)), ++ CWOverrideRedirect, &attr); ++ if (flags & COMPOSITE) ++ XCompositeRedirectWindow(dpy, win, CompositeRedirectManual); ++ XMapWindow(dpy, win); ++ ++ DRI2CreateDrawable(dpy, win); ++ DRI2GetMSC(dpy, win, &ignore, &msc, &ignore); ++ msc++; ++ for (count = 0; count < loop; count++) { ++ xcb_discard_reply(c, ++ xcb_dri2_wait_msc(c, win, ++ upper_32_bits(msc), ++ lower_32_bits(msc), ++ 0, 0, 0, 0).sequence); ++ msc += divisors[n]; ++ } ++ XFlush(dpy); ++ XCloseDisplay(dpy); ++ printf("."); fflush(stdout); ++ } while (--loop); ++ printf("*\n"); ++ } ++} ++ ++static void race_client(int width, int height, ++ unsigned int *attachments, int nattachments, ++ unsigned flags, const char *name) ++{ ++ Display *mgr = XOpenDisplay(NULL); ++ XSetWindowAttributes attr; ++ int count, loop, n; ++ ++ if (flags & COMPOSITE && !has_composite(NULL)) ++ return; ++ ++ printf("%s(%s)\n", __func__, name); ++ ++ /* Be nasty and install a fullscreen window on top so that we ++ * can guarantee we do not get clipped by children. ++ */ ++ attr.override_redirect = 1; ++ for (n = 0; n < N_DIVISORS; n++) { ++ printf("DRI2SwapBuffers(divisor=%d)", divisors[n]); ++ fflush(stdout); ++ loop = 256 >> ffs(divisors[n]); ++ do { ++ Display *dpy = XOpenDisplay(NULL); ++ Window win; ++ ++ if (error_get()) { ++ XCloseDisplay(dpy); ++ printf("+"); fflush(stdout); ++ continue; ++ } ++ ++ win = XCreateWindow(dpy, DefaultRootWindow(dpy), ++ 0, 0, width, height, 0, ++ DefaultDepth(dpy, DefaultScreen(dpy)), ++ InputOutput, ++ DefaultVisual(dpy, DefaultScreen(dpy)), ++ CWOverrideRedirect, &attr); ++ if (flags & COMPOSITE) ++ XCompositeRedirectWindow(dpy, win, CompositeRedirectManual); ++ XMapWindow(dpy, win); ++ ++ DRI2CreateDrawable(dpy, win); ++ free(DRI2GetBuffers(dpy, win, &width, &height, ++ attachments, nattachments, &count)); ++ if (count == nattachments) { ++ for (count = 0; count < loop; count++) ++ DRI2SwapBuffers(dpy, win, 0, divisors[n], count & (divisors[n]-1)); ++ } ++ ++ XFlush(dpy); ++ XKillClient(mgr, win); ++ XFlush(mgr); ++ ++ XCloseDisplay(dpy); ++ printf("."); fflush(stdout); ++ ++ error_put(); ++ } while (--loop); ++ printf("*\n"); ++ } ++ ++ for (n = 0; n < N_DIVISORS; n++) { ++ printf("xcb_dri2_swap_buffers(divisor=%d)", divisors[n]); ++ fflush(stdout); ++ loop = 256 >> ffs(divisors[n]); ++ do { ++ Display *dpy = XOpenDisplay(NULL); ++ Window win; ++ ++ if (error_get()) { ++ XCloseDisplay(dpy); ++ printf("+"); fflush(stdout); ++ continue; ++ } ++ ++ win = XCreateWindow(dpy, DefaultRootWindow(dpy), ++ 0, 0, width, height, 0, ++ DefaultDepth(dpy, DefaultScreen(dpy)), ++ InputOutput, ++ DefaultVisual(dpy, DefaultScreen(dpy)), ++ CWOverrideRedirect, &attr); ++ if (flags & COMPOSITE) ++ XCompositeRedirectWindow(dpy, win, CompositeRedirectManual); ++ XMapWindow(dpy, win); ++ ++ DRI2CreateDrawable(dpy, win); ++ free(DRI2GetBuffers(dpy, win, &width, &height, ++ attachments, nattachments, &count)); ++ if (count == nattachments) { ++ for (count = 0; count < loop; count++) ++ swap_buffers(dpy, win, divisors[n], attachments, nattachments); ++ } ++ ++ XFlush(dpy); ++ XKillClient(mgr, win); ++ XFlush(mgr); ++ ++ XCloseDisplay(dpy); ++ printf("."); fflush(stdout); ++ ++ error_put(); ++ } while (--loop); ++ printf("*\n"); ++ } ++ ++ for (n = 0; n < N_DIVISORS; n++) { ++ printf("DRI2WaitMsc(divisor=%d)", divisors[n]); ++ fflush(stdout); ++ loop = 256 >> ffs(divisors[n]); ++ do { ++ Display *dpy = XOpenDisplay(NULL); ++ uint64_t ignore, msc; ++ xcb_connection_t *c; ++ Window win; ++ ++ if (error_get()) { ++ XCloseDisplay(dpy); ++ printf("+"); fflush(stdout); ++ continue; ++ } ++ ++ win = XCreateWindow(dpy, DefaultRootWindow(dpy), ++ 0, 0, width, height, 0, ++ DefaultDepth(dpy, DefaultScreen(dpy)), ++ InputOutput, ++ DefaultVisual(dpy, DefaultScreen(dpy)), ++ CWOverrideRedirect, &attr); ++ if (flags & COMPOSITE) ++ XCompositeRedirectWindow(dpy, win, CompositeRedirectManual); ++ XMapWindow(dpy, win); ++ ++ DRI2CreateDrawable(dpy, win); ++ DRI2GetMSC(dpy, win, &ignore, &msc, &ignore); ++ c = XGetXCBConnection(dpy); ++ msc++; ++ for (count = 0; count < loop; count++) { ++ xcb_discard_reply(c, ++ xcb_dri2_wait_msc(c, win, ++ upper_32_bits(msc), ++ lower_32_bits(msc), ++ 0, 0, 0, 0).sequence); ++ msc += divisors[n]; ++ } ++ ++ XFlush(dpy); ++ XKillClient(mgr, win); ++ XFlush(mgr); ++ ++ XCloseDisplay(dpy); ++ printf("."); fflush(stdout); ++ ++ error_put(); ++ } while (--loop); ++ printf("*\n"); ++ } ++ ++ XCloseDisplay(mgr); + } + + int main(void) +@@ -91,7 +790,10 @@ int main(void) + DRI2BufferFrontLeft, + }; + +- dpy = XOpenDisplay (NULL); ++ saved_io_error = XSetIOErrorHandler(io_error); ++ XSetErrorHandler(x_error); ++ ++ dpy = XOpenDisplay(NULL); + if (dpy == NULL) + return 77; + +@@ -101,13 +803,52 @@ int main(void) + + width = WidthOfScreen(DefaultScreenOfDisplay(dpy)); + height = HeightOfScreen(DefaultScreenOfDisplay(dpy)); +- run(dpy, width, height, attachments, 1, "fullscreen"); +- run(dpy, width, height, attachments, 2, "fullscreen (with front)"); ++ race_window(dpy, width, height, attachments, 1, 0, "fullscreen"); ++ race_window(dpy, width, height, attachments, 1, COMPOSITE, "composite fullscreen"); ++ race_window(dpy, width, height, attachments, 2, 0, "fullscreen (with front)"); ++ race_window(dpy, width, height, attachments, 2, COMPOSITE, "composite fullscreen (with front)"); ++ ++ race_resize(dpy, width, height, attachments, 1, 0, ""); ++ race_resize(dpy, width, height, attachments, 1, COMPOSITE, "composite"); ++ race_resize(dpy, width, height, attachments, 2, 0, "with front"); ++ race_resize(dpy, width, height, attachments, 2, COMPOSITE, "composite with front"); ++ ++ race_manager(dpy, width, height, attachments, 1, 0, "fullscreen"); ++ race_manager(dpy, width, height, attachments, 1, COMPOSITE, "composite fullscreen"); ++ race_manager(dpy, width, height, attachments, 2, 0, "fullscreen (with front)"); ++ race_manager(dpy, width, height, attachments, 2, COMPOSITE, "composite fullscreen (with front)"); ++ ++ race_close(width, height, attachments, 1, 0, "fullscreen"); ++ race_close(width, height, attachments, 1, COMPOSITE, "composite fullscreen"); ++ race_close(width, height, attachments, 2, 0, "fullscreen (with front)"); ++ race_close(width, height, attachments, 2, COMPOSITE, "composite fullscreen (with front)"); ++ ++ race_client(width, height, attachments, 1, 0, "fullscreen"); ++ race_client(width, height, attachments, 1, COMPOSITE, "composite fullscreen"); ++ race_client(width, height, attachments, 2, 0, "fullscreen (with front)"); ++ race_client(width, height, attachments, 2, COMPOSITE, "composite fullscreen (with front)"); + + width /= 2; + height /= 2; +- run(dpy, width, height, attachments, 1, "windowed"); +- run(dpy, width, height, attachments, 2, "windowed (with front)"); ++ race_window(dpy, width, height, attachments, 1, 0, "windowed"); ++ race_window(dpy, width, height, attachments, 1, COMPOSITE, "composite windowed"); ++ race_window(dpy, width, height, attachments, 2, 0, "windowed (with front)"); ++ race_window(dpy, width, height, attachments, 2, COMPOSITE, "composite windowed (with front)"); ++ ++ race_manager(dpy, width, height, attachments, 1, 0, "windowed"); ++ race_manager(dpy, width, height, attachments, 1, COMPOSITE, "composite windowed"); ++ race_manager(dpy, width, height, attachments, 2, 0, "windowed (with front)"); ++ race_manager(dpy, width, height, attachments, 2, COMPOSITE, "composite windowed (with front)"); ++ ++ race_close(width, height, attachments, 1, 0, "windowed"); ++ race_close(width, height, attachments, 1, COMPOSITE, "composite windowed"); ++ race_close(width, height, attachments, 2, 0, "windowed (with front)"); ++ race_close(width, height, attachments, 2, COMPOSITE, "composite windowed (with front)"); ++ ++ race_client(width, height, attachments, 1, 0, "windowed"); ++ race_client(width, height, attachments, 1, COMPOSITE, "composite windowed"); ++ race_client(width, height, attachments, 2, 0, "windowed (with front)"); ++ race_client(width, height, attachments, 2, COMPOSITE, "composite windowed (with front)"); + + return 0; + } +diff --git a/test/dri2-speed.c b/test/dri2-speed.c +new file mode 100644 +index 00000000..87b9d0b6 +--- /dev/null ++++ b/test/dri2-speed.c +@@ -0,0 +1,342 @@ ++/* ++ * Copyright (c) 2015 Intel Corporation ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining a ++ * copy of this software and associated documentation files (the "Software"), ++ * to deal in the Software without restriction, including without limitation ++ * the rights to use, copy, modify, merge, publish, distribute, sublicense, ++ * and/or sell copies of the Software, and to permit persons to whom the ++ * Software is furnished to do so, subject to the following conditions: ++ * ++ * The above copyright notice and this permission notice (including the next ++ * paragraph) shall be included in all copies or substantial portions of the ++ * Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ++ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, ++ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE ++ * SOFTWARE. ++ * ++ */ ++ ++#ifdef HAVE_CONFIG_H ++#include "config.h" ++#endif ++ ++#include <X11/Xlib.h> ++#include <X11/Xatom.h> ++#include <X11/Xlib-xcb.h> ++#include <X11/Xutil.h> ++#include <X11/Xlibint.h> ++#include <X11/extensions/dpms.h> ++#include <X11/extensions/randr.h> ++#include <X11/extensions/Xcomposite.h> ++#include <X11/extensions/Xdamage.h> ++#include <X11/extensions/Xrandr.h> ++#include <xcb/xcb.h> ++#include <xcb/dri2.h> ++#include <xf86drm.h> ++ ++#include <stdio.h> ++#include <string.h> ++#include <fcntl.h> ++#include <unistd.h> ++#include <assert.h> ++#include <errno.h> ++#include <setjmp.h> ++#include <signal.h> ++ ++#include "dri2.h" ++ ++static int _x_error_occurred; ++ ++static int ++_check_error_handler(Display *display, ++ XErrorEvent *event) ++{ ++ printf("X11 error from display %s, serial=%ld, error=%d, req=%d.%d\n", ++ DisplayString(display), ++ event->serial, ++ event->error_code, ++ event->request_code, ++ event->minor_code); ++ _x_error_occurred++; ++ return False; /* ignored */ ++} ++ ++static double elapsed(const struct timespec *start, ++ const struct timespec *end) ++{ ++ return 1e6*(end->tv_sec - start->tv_sec) + (end->tv_nsec - start->tv_nsec)/1000; ++} ++ ++static void run(Display *dpy, Window win, const char *name) ++{ ++ xcb_connection_t *c = XGetXCBConnection(dpy); ++ struct timespec start, end; ++ int n, completed = 0; ++ ++ _x_error_occurred = 0; ++ ++ clock_gettime(CLOCK_MONOTONIC, &start); ++ do { ++ for (n = 0; n < 1000; n++) { ++ unsigned int attachments[] = { DRI2BufferBackLeft }; ++ unsigned int seq[2]; ++ ++ seq[0] = xcb_dri2_swap_buffers_unchecked(c, win, ++ 0, 0, 0, 0, 0, 0).sequence; ++ ++ ++ seq[1] = xcb_dri2_get_buffers_unchecked(c, win, ++ 1, 1, attachments).sequence; ++ ++ xcb_flush(c); ++ xcb_discard_reply(c, seq[0]); ++ xcb_discard_reply(c, seq[1]); ++ completed++; ++ } ++ clock_gettime(CLOCK_MONOTONIC, &end); ++ } while (end.tv_sec < start.tv_sec + 10); ++ ++ XSync(dpy, True); ++ if (_x_error_occurred) ++ abort(); ++ ++ printf("%s: Completed %d swaps in %.1fs, %.3fus each (%.1f FPS)\n", ++ name, completed, elapsed(&start, &end) / 1000000, ++ elapsed(&start, &end) / completed, ++ completed / (elapsed(&start, &end) / 1000000)); ++} ++ ++static inline XRRScreenResources *_XRRGetScreenResourcesCurrent(Display *dpy, Window window) ++{ ++ XRRScreenResources *res; ++ ++ res = XRRGetScreenResourcesCurrent(dpy, window); ++ if (res == NULL) ++ res = XRRGetScreenResources(dpy, window); ++ ++ return res; ++} ++ ++static XRRModeInfo *lookup_mode(XRRScreenResources *res, int id) ++{ ++ int i; ++ ++ for (i = 0; i < res->nmode; i++) { ++ if (res->modes[i].id == id) ++ return &res->modes[i]; ++ } ++ ++ return NULL; ++} ++ ++static int dri2_open(Display *dpy) ++{ ++ drm_auth_t auth; ++ char *driver, *device; ++ int fd; ++ ++ if (!DRI2Connect(dpy, DefaultRootWindow(dpy), &driver, &device)) ++ return -1; ++ ++ printf ("Connecting to %s driver on %s\n", driver, device); ++ ++ fd = open(device, O_RDWR); ++ if (fd < 0) ++ return -1; ++ ++ if (drmIoctl(fd, DRM_IOCTL_GET_MAGIC, &auth)) ++ return -1; ++ ++ if (!DRI2Authenticate(dpy, DefaultRootWindow(dpy), auth.magic)) ++ return -1; ++ ++ return fd; ++} ++ ++static void fullscreen(Display *dpy, Window win) ++{ ++ Atom atom = XInternAtom(dpy, "_NET_WM_STATE_FULLSCREEN", False); ++ XChangeProperty(dpy, win, ++ XInternAtom(dpy, "_NET_WM_STATE", False), ++ XA_ATOM, 32, PropModeReplace, ++ (unsigned char *)&atom, 1); ++} ++ ++static int has_composite(Display *dpy) ++{ ++ int event, error; ++ int major, minor; ++ ++ if (!XDamageQueryExtension (dpy, &event, &error)) ++ return 0; ++ ++ if (!XCompositeQueryExtension(dpy, &event, &error)) ++ return 0; ++ ++ XCompositeQueryVersion(dpy, &major, &minor); ++ ++ return major > 0 || minor >= 4; ++} ++ ++int main(void) ++{ ++ Display *dpy; ++ Window root, win; ++ XRRScreenResources *res; ++ XRRCrtcInfo **original_crtc; ++ XSetWindowAttributes attr; ++ int i, j, fd; ++ ++ attr.override_redirect = 1; ++ ++ dpy = XOpenDisplay(NULL); ++ if (dpy == NULL) ++ return 77; ++ ++ fd = dri2_open(dpy); ++ if (fd < 0) ++ return 77; ++ ++ if (DPMSQueryExtension(dpy, &i, &i)) ++ DPMSDisable(dpy); ++ ++ root = DefaultRootWindow(dpy); ++ ++ signal(SIGALRM, SIG_IGN); ++ XSetErrorHandler(_check_error_handler); ++ ++ res = NULL; ++ if (XRRQueryVersion(dpy, &i, &i)) ++ res = _XRRGetScreenResourcesCurrent(dpy, root); ++ if (res == NULL) ++ return 77; ++ ++ original_crtc = malloc(sizeof(XRRCrtcInfo *)*res->ncrtc); ++ for (i = 0; i < res->ncrtc; i++) ++ original_crtc[i] = XRRGetCrtcInfo(dpy, res, res->crtcs[i]); ++ ++ printf("noutput=%d, ncrtc=%d\n", res->noutput, res->ncrtc); ++ for (i = 0; i < res->ncrtc; i++) ++ XRRSetCrtcConfig(dpy, res, res->crtcs[i], CurrentTime, ++ 0, 0, None, RR_Rotate_0, NULL, 0); ++ ++ DRI2CreateDrawable(dpy, root); ++ DRI2SwapInterval(dpy, root, 0); ++ run(dpy, root, "off"); ++ XSync(dpy, True); ++ ++ for (i = 0; i < res->noutput; i++) { ++ XRROutputInfo *output; ++ XRRModeInfo *mode; ++ ++ output = XRRGetOutputInfo(dpy, res, res->outputs[i]); ++ if (output == NULL) ++ continue; ++ ++ mode = NULL; ++ if (res->nmode) ++ mode = lookup_mode(res, output->modes[0]); ++ ++ for (j = 0; mode && j < 2*output->ncrtc; j++) { ++ int c = j; ++ if (c >= output->ncrtc) ++ c = 2*output->ncrtc - j - 1; ++ ++ printf("[%d, %d] -- OUTPUT:%ld, CRTC:%ld: %dx%d\n", ++ i, c, (long)res->outputs[i], (long)output->crtcs[c], ++ mode->width, mode->height); ++ XRRSetCrtcConfig(dpy, res, output->crtcs[c], CurrentTime, ++ 0, 0, output->modes[0], RR_Rotate_0, &res->outputs[i], 1); ++ ++ run(dpy, root, "root"); ++ XSync(dpy, True); ++ ++ win = XCreateWindow(dpy, root, ++ 0, 0, mode->width, mode->height, 0, ++ DefaultDepth(dpy, DefaultScreen(dpy)), ++ InputOutput, ++ DefaultVisual(dpy, DefaultScreen(dpy)), ++ CWOverrideRedirect, &attr); ++ DRI2CreateDrawable(dpy, win); ++ DRI2SwapInterval(dpy, win, 0); ++ fullscreen(dpy, win); ++ XMapWindow(dpy, win); ++ run(dpy, win, "fullscreen"); ++ XDestroyWindow(dpy, win); ++ XSync(dpy, True); ++ ++ win = XCreateWindow(dpy, root, ++ 0, 0, mode->width, mode->height, 0, ++ DefaultDepth(dpy, DefaultScreen(dpy)), ++ InputOutput, ++ DefaultVisual(dpy, DefaultScreen(dpy)), ++ CWOverrideRedirect, &attr); ++ DRI2CreateDrawable(dpy, win); ++ DRI2SwapInterval(dpy, win, 0); ++ XMapWindow(dpy, win); ++ run(dpy, win, "windowed"); ++ XDestroyWindow(dpy, win); ++ XSync(dpy, True); ++ ++ if (has_composite(dpy)) { ++ Damage damage; ++ ++ _x_error_occurred = 0; ++ win = XCreateWindow(dpy, root, ++ 0, 0, mode->width, mode->height, 0, ++ DefaultDepth(dpy, DefaultScreen(dpy)), ++ InputOutput, ++ DefaultVisual(dpy, DefaultScreen(dpy)), ++ CWOverrideRedirect, &attr); ++ XCompositeRedirectWindow(dpy, win, CompositeRedirectManual); ++ damage = XDamageCreate(dpy, win, XDamageReportRawRectangles); ++ DRI2CreateDrawable(dpy, win); ++ DRI2SwapInterval(dpy, win, 0); ++ XMapWindow(dpy, win); ++ XSync(dpy, True); ++ if (!_x_error_occurred) ++ run(dpy, win, "composited"); ++ XDamageDestroy(dpy, damage); ++ XDestroyWindow(dpy, win); ++ XSync(dpy, True); ++ } ++ ++ win = XCreateWindow(dpy, root, ++ 0, 0, mode->width/2, mode->height/2, 0, ++ DefaultDepth(dpy, DefaultScreen(dpy)), ++ InputOutput, ++ DefaultVisual(dpy, DefaultScreen(dpy)), ++ CWOverrideRedirect, &attr); ++ DRI2CreateDrawable(dpy, win); ++ DRI2SwapInterval(dpy, win, 0); ++ XMapWindow(dpy, win); ++ run(dpy, win, "half"); ++ XDestroyWindow(dpy, win); ++ XSync(dpy, True); ++ ++ XRRSetCrtcConfig(dpy, res, output->crtcs[c], CurrentTime, ++ 0, 0, None, RR_Rotate_0, NULL, 0); ++ } ++ ++ XRRFreeOutputInfo(output); ++ } ++ ++ for (i = 0; i < res->ncrtc; i++) ++ XRRSetCrtcConfig(dpy, res, res->crtcs[i], CurrentTime, ++ original_crtc[i]->x, ++ original_crtc[i]->y, ++ original_crtc[i]->mode, ++ original_crtc[i]->rotation, ++ original_crtc[i]->outputs, ++ original_crtc[i]->noutput); ++ ++ if (DPMSQueryExtension(dpy, &i, &i)) ++ DPMSEnable(dpy); ++ return 0; ++} +diff --git a/test/dri2-test.c b/test/dri2-test.c +index dd4179f3..bdf01f38 100644 +--- a/test/dri2-test.c ++++ b/test/dri2-test.c +@@ -6,6 +6,10 @@ + #include <X11/Xutil.h> + #include <X11/extensions/Xfixes.h> + #include <X11/extensions/Xrandr.h> ++#include <X11/Xlib-xcb.h> ++#include <xcb/xcb.h> ++#include <xcb/xcbext.h> ++#include <xcb/dri2.h> + #include <unistd.h> + #include <fcntl.h> + #include <string.h> +@@ -18,6 +22,8 @@ + + #define COUNT 60 + ++static int prime[] = { 0, 1, 2, 3, 5, 7, 11, 13, 17, 19, 23, 27, 29, 31, 37, 41, 43, 47, 51, 53, 59, 61, 67, 71, 73, 79, 83, 89, 97, 101, 103, 107, 109, 113, 127, 131 }; ++ + static inline XRRScreenResources *_XRRGetScreenResourcesCurrent(Display *dpy, Window window) + { + XRRScreenResources *res; +@@ -101,16 +107,41 @@ static uint64_t check_msc(Display *dpy, Window win, uint64_t last_msc) + return current_msc; + } + ++static void wait_next_vblank(Display *dpy, Window win) ++{ ++ uint64_t msc, ust, sbc; ++ DRI2WaitMSC(dpy, win, 0, 1, 0, &ust, &msc, &sbc); ++} ++ ++static void swap_buffers(xcb_connection_t *c, Window win, ++ unsigned int *attachments, int nattachments) ++{ ++ unsigned int seq[2]; ++ ++ seq[0] = xcb_dri2_swap_buffers_unchecked(c, win, ++ 0, 0, 0, 0, 0, 0).sequence; ++ ++ ++ seq[1] = xcb_dri2_get_buffers_unchecked(c, win, ++ nattachments, nattachments, ++ attachments).sequence; ++ ++ xcb_flush(c); ++ xcb_discard_reply(c, seq[0]); ++ xcb_discard_reply(c, seq[1]); ++} ++ + static void run(Display *dpy, int width, int height, + unsigned int *attachments, int nattachments, + const char *name) + { ++ xcb_connection_t *c = XGetXCBConnection(dpy); + Window win; + XSetWindowAttributes attr; +- int count; + DRI2Buffer *buffers; + struct timespec start, end; +- uint64_t msc; ++ uint64_t start_msc, end_msc; ++ int modulus, remainder, count; + + /* Be nasty and install a fullscreen window on top so that we + * can guarantee we do not get clipped by children. +@@ -125,42 +156,99 @@ static void run(Display *dpy, int width, int height, + XMapWindow(dpy, win); + + DRI2CreateDrawable(dpy, win); +- msc = check_msc(dpy, win, 0); ++ DRI2SwapInterval(dpy, win, 1); ++ start_msc = check_msc(dpy, win, 0); + + buffers = DRI2GetBuffers(dpy, win, &width, &height, + attachments, nattachments, &count); + if (count != nattachments) + return; + +- msc = check_msc(dpy, win, msc); ++ swap_buffers(c, win, attachments, nattachments); ++ start_msc = check_msc(dpy, win, start_msc); + clock_gettime(CLOCK_MONOTONIC, &start); + for (count = 0; count < COUNT; count++) +- DRI2SwapBuffers(dpy, win, 0, 0, 0); +- msc = check_msc(dpy, win, msc); ++ swap_buffers(c, win, attachments, nattachments); ++ end_msc = check_msc(dpy, win, start_msc); + clock_gettime(CLOCK_MONOTONIC, &end); +- printf("%d %s (%dx%d) swaps in %fs.\n", +- count, name, width, height, elapsed(&start, &end)); ++ printf("%d [%ld] %s (%dx%d) swaps in %fs.\n", ++ count, (long)(end_msc - start_msc), ++ name, width, height, elapsed(&start, &end)); + +- msc = check_msc(dpy, win, msc); ++ swap_buffers(c, win, attachments, nattachments); ++ start_msc = check_msc(dpy, win, end_msc); + clock_gettime(CLOCK_MONOTONIC, &start); + for (count = 0; count < COUNT; count++) + dri2_copy_swap(dpy, win, width, height, nattachments == 2); +- msc = check_msc(dpy, win, msc); ++ end_msc = check_msc(dpy, win, start_msc); + clock_gettime(CLOCK_MONOTONIC, &end); + +- printf("%d %s (%dx%d) blits in %fs.\n", +- count, name, width, height, elapsed(&start, &end)); ++ printf("%d [%ld] %s (%dx%d) blits in %fs.\n", ++ count, (long)(end_msc - start_msc), ++ name, width, height, elapsed(&start, &end)); + + DRI2SwapInterval(dpy, win, 0); ++ wait_next_vblank(dpy, win); ++ ++ swap_buffers(c, win, attachments, nattachments); ++ start_msc = check_msc(dpy, win, end_msc); ++ clock_gettime(CLOCK_MONOTONIC, &start); ++ for (count = 0; count < COUNT; count++) ++ swap_buffers(c, win, attachments, nattachments); ++ end_msc = check_msc(dpy, win, start_msc); ++ clock_gettime(CLOCK_MONOTONIC, &end); ++ printf("%d [%ld] %s (%dx%d) vblank=0 swaps in %fs.\n", ++ count, (long)(end_msc - start_msc), ++ name, width, height, elapsed(&start, &end)); + +- msc = check_msc(dpy, win, msc); ++ start_msc = check_msc(dpy, win, end_msc); + clock_gettime(CLOCK_MONOTONIC, &start); + for (count = 0; count < COUNT; count++) +- DRI2SwapBuffers(dpy, win, 0, 0, 0); +- msc = check_msc(dpy, win, msc); ++ wait_next_vblank(dpy, win); ++ end_msc = check_msc(dpy, win, start_msc); + clock_gettime(CLOCK_MONOTONIC, &end); +- printf("%d %s (%dx%d) vblank=0 swaps in %fs.\n", +- count, name, width, height, elapsed(&start, &end)); ++ printf("%d [%ld] %s waits in %fs.\n", ++ count, (long)(end_msc - start_msc), ++ name, elapsed(&start, &end)); ++ ++ printf("Testing past & future waits\n"); ++ for (modulus = 1; modulus <= 128; modulus <<= 1) { ++ for (count = 0; prime[count] < modulus; count++) { ++ uint64_t msc, ust, sbc; ++ uint64_t target; ++ ++ remainder = prime[count]; ++ ++ DRI2WaitMSC(dpy, win, 0, 1, 0, &ust, &msc, &sbc); ++ ++ target = msc + modulus + 1; ++ target &= -modulus; ++ target += remainder; ++ ++ DRI2WaitMSC(dpy, win, target, modulus, remainder, ++ &ust, &msc, &sbc); ++ if (msc != target) { ++ printf("Missed future MSC (%d, %d): expected=%lld, found=%lld\n", ++ modulus, remainder, ++ (long long)target, (long long)msc); ++ } ++ ++ target = msc; ++ target &= -modulus; ++ target += remainder; ++ if (target <= msc) ++ target += modulus; ++ ++ DRI2WaitMSC(dpy, win, msc, modulus, remainder, ++ &ust, &msc, &sbc); ++ ++ if (msc != target) { ++ printf("Missed past MSC (%d, %d): expected=%lld, found=%lld\n", ++ modulus, remainder, ++ (long long)target, (long long)msc); ++ } ++ } ++ } + + XDestroyWindow(dpy, win); + free(buffers); +diff --git a/test/dri3-test.c b/test/dri3-test.c +index c66da313..78e105a8 100644 +--- a/test/dri3-test.c ++++ b/test/dri3-test.c +@@ -93,14 +93,9 @@ static const struct pci_id_match ids[] = { + INTEL_IVB_D_IDS(070), + INTEL_IVB_M_IDS(070), + +- INTEL_HSW_D_IDS(075), +- INTEL_HSW_M_IDS(075), +- +- INTEL_VLV_D_IDS(071), +- INTEL_VLV_M_IDS(071), +- +- INTEL_BDW_D_IDS(0100), +- INTEL_BDW_M_IDS(0100), ++ INTEL_HSW_IDS(075), ++ INTEL_VLV_IDS(071), ++ INTEL_BDW_IDS(0100), + }; + + static int i915_gen(int device) +@@ -1020,6 +1015,67 @@ fail: + return 1; + } + ++static int gem_set_tiling(int fd, uint32_t handle, int tiling, int stride) ++{ ++ struct drm_i915_gem_set_tiling set_tiling; ++ ++ set_tiling.handle = handle; ++ set_tiling.tiling_mode = tiling; ++ set_tiling.stride = stride; ++ ++ return drmIoctl(fd, DRM_IOCTL_I915_GEM_SET_TILING, &set_tiling) == 0; ++} ++ ++static int test_tiling(Display *dpy, int device) ++{ ++ Window root = RootWindow(dpy, DefaultScreen(dpy)); ++ const int tiling[] = { I915_TILING_NONE, I915_TILING_X, I915_TILING_Y }; ++ int line = -1; ++ int t; ++ ++ _x_error_occurred = 0; ++ ++ for (t = 0; t < sizeof(tiling)/sizeof(tiling[0]); t++) { ++ uint32_t src; ++ int src_fd; ++ Pixmap src_pix; ++ ++ src = gem_create(device, 4*4096); ++ if (!src) { ++ line = __LINE__; ++ goto fail; ++ } ++ ++ gem_set_tiling(device, src, tiling[t], 512); ++ ++ src_fd = gem_export(device, src); ++ if (src_fd < 0) { ++ line = __LINE__; ++ goto fail; ++ } ++ ++ src_pix = dri3_create_pixmap(dpy, root, ++ 128, 32, 32, ++ src_fd, 32, 512, 4*4096); ++ XSync(dpy, True); ++ if (_x_error_occurred) { ++ line = __LINE__; ++ goto fail; ++ } ++ XFreePixmap(dpy, src_pix); ++ _x_error_occurred = 0; ++ ++ close(src_fd); ++ gem_close(device, src); ++ } ++ ++ return 0; ++ ++fail: ++ printf("%s failed with tiling %d, line %d\n", __func__, tiling[t], line); ++ return 1; ++} ++ + static int + _check_error_handler(Display *display, + XErrorEvent *event) +@@ -1060,6 +1116,7 @@ int main(void) + + error += test_bad_size(dpy, device); + error += test_bad_pitch(dpy, device); ++ error += test_tiling(dpy, device); + + error += test_shm(dpy, device, 400, 300); + error += test_shm(dpy, device, 300, 400); +diff --git a/test/dri3.c b/test/dri3.c +index 45f3285c..e5644629 100644 +--- a/test/dri3.c ++++ b/test/dri3.c +@@ -29,6 +29,7 @@ + #include <xcb/dri3.h> + #include <xcb/sync.h> + #include <unistd.h> ++#include <stdlib.h> + + #include "dri3.h" + +@@ -109,12 +110,45 @@ void dri3_fence_free(Display *dpy, struct dri3_fence *fence) + xcb_sync_destroy_fence(c, fence->xid); + } + ++static void dri3_query_version(xcb_connection_t *c, int *major, int *minor) ++{ ++ xcb_dri3_query_version_reply_t *reply; ++ ++ reply = xcb_dri3_query_version_reply(c, ++ xcb_dri3_query_version(c, ++ XCB_DRI3_MAJOR_VERSION, ++ XCB_DRI3_MINOR_VERSION), ++ NULL); ++ if (reply != NULL) { ++ *major = reply->major_version; ++ *minor = reply->minor_version; ++ free(reply); ++ } ++} ++ ++static int dri3_exists(xcb_connection_t *c) ++{ ++ const xcb_query_extension_reply_t *ext; ++ int major, minor; ++ ++ major = minor = -1; ++ ++ ext = xcb_get_extension_data(c, &xcb_dri3_id); ++ if (ext != NULL && ext->present) ++ dri3_query_version(c, &major, &minor); ++ ++ return major >= 0; ++} ++ + int dri3_open__full(Display *dpy, Window root, unsigned provider) + { + xcb_connection_t *c = XGetXCBConnection(dpy); + xcb_dri3_open_cookie_t cookie; + xcb_dri3_open_reply_t *reply; + ++ if (!dri3_exists(c)) ++ return -1; ++ + cookie = xcb_dri3_open(c, root, provider); + reply = xcb_dri3_open_reply(c, cookie, NULL); + +diff --git a/test/present-race.c b/test/present-race.c +new file mode 100644 +index 00000000..b2b6aa2b +--- /dev/null ++++ b/test/present-race.c +@@ -0,0 +1,484 @@ ++/* ++ * Copyright (c) 2014 Intel Corporation ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining a ++ * copy of this software and associated documentation files (the "Software"), ++ * to deal in the Software without restriction, including without limitation ++ * the rights to use, copy, modify, merge, publish, distribute, sublicense, ++ * and/or sell copies of the Software, and to permit persons to whom the ++ * Software is furnished to do so, subject to the following conditions: ++ * ++ * The above copyright notice and this permission notice (including the next ++ * paragraph) shall be included in all copies or substantial portions of the ++ * Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ++ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, ++ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE ++ * SOFTWARE. ++ * ++ */ ++ ++#ifdef HAVE_CONFIG_H ++#include "config.h" ++#endif ++ ++#include <X11/Xlib.h> ++#include <X11/Xlib-xcb.h> ++#include <X11/xshmfence.h> ++#include <X11/Xutil.h> ++#include <X11/Xlibint.h> ++#include <X11/extensions/dpms.h> ++#include <X11/extensions/randr.h> ++#include <X11/extensions/Xcomposite.h> ++#include <X11/extensions/Xrandr.h> ++#include <X11/extensions/Xrender.h> ++#include <X11/extensions/XShm.h> ++#if HAVE_X11_EXTENSIONS_SHMPROTO_H ++#include <X11/extensions/shmproto.h> ++#elif HAVE_X11_EXTENSIONS_SHMSTR_H ++#include <X11/extensions/shmstr.h> ++#else ++#error Failed to find the right header for X11 MIT-SHM protocol definitions ++#endif ++#include <xcb/xcb.h> ++#include <xcb/present.h> ++#include <xcb/xfixes.h> ++#include <xcb/dri3.h> ++#include <xf86drm.h> ++#include <i915_drm.h> ++ ++#include <stdio.h> ++#include <string.h> ++#include <fcntl.h> ++#include <unistd.h> ++#include <assert.h> ++#include <errno.h> ++#include <setjmp.h> ++#include <signal.h> ++ ++#include <sys/mman.h> ++#include <sys/ipc.h> ++#include <sys/shm.h> ++#include <pciaccess.h> ++ ++#include "dri3.h" ++ ++static int _x_error_occurred; ++static uint32_t stamp; ++ ++static int ++_check_error_handler(Display *display, ++ XErrorEvent *event) ++{ ++ printf("X11 error from display %s, serial=%ld, error=%d, req=%d.%d\n", ++ DisplayString(display), ++ event->serial, ++ event->error_code, ++ event->request_code, ++ event->minor_code); ++ _x_error_occurred++; ++ return False; /* ignored */ ++} ++ ++static int has_composite(Display *dpy) ++{ ++ int event, error; ++ int major, minor; ++ ++ if (!XCompositeQueryExtension(dpy, &event, &error)) ++ return 0; ++ ++ XCompositeQueryVersion(dpy, &major, &minor); ++ ++ return major > 0 || minor >= 4; ++} ++ ++static void *setup_msc(Display *dpy, Window win) ++{ ++ xcb_connection_t *c = XGetXCBConnection(dpy); ++ xcb_void_cookie_t cookie; ++ uint32_t id = xcb_generate_id(c); ++ xcb_generic_error_t *error; ++ void *q; ++ ++ cookie = xcb_present_select_input_checked(c, id, win, XCB_PRESENT_EVENT_MASK_COMPLETE_NOTIFY); ++ q = xcb_register_for_special_xge(c, &xcb_present_id, id, &stamp); ++ ++ error = xcb_request_check(c, cookie); ++ assert(error == NULL); ++ ++ return q; ++} ++ ++static void teardown_msc(Display *dpy, void *q) ++{ ++ xcb_unregister_for_special_event(XGetXCBConnection(dpy), q); ++} ++ ++static uint64_t wait_vblank(Display *dpy, Window win) ++{ ++ xcb_connection_t *c = XGetXCBConnection(dpy); ++ static uint32_t serial = 1; ++ uint64_t msc = 0; ++ int complete = 0; ++ void *q; ++ ++ if (win == 0) ++ win = DefaultRootWindow(dpy); ++ ++ q = setup_msc(dpy, win); ++ ++ xcb_present_notify_msc(c, win, serial ^ 0xdeadbeef, 0, 1, 0); ++ xcb_flush(c); ++ ++ do { ++ xcb_present_complete_notify_event_t *ce; ++ xcb_generic_event_t *ev; ++ ++ ev = xcb_wait_for_special_event(c, q); ++ if (ev == NULL) ++ break; ++ ++ ce = (xcb_present_complete_notify_event_t *)ev; ++ if (ce->kind == XCB_PRESENT_COMPLETE_KIND_NOTIFY_MSC && ++ ce->serial == (serial ^ 0xdeadbeef)) { ++ msc = ce->msc; ++ complete = 1; ++ } ++ free(ev); ++ } while (!complete); ++ ++ if (++serial == 0) ++ serial = 1; ++ ++ teardown_msc(dpy, q); ++ ++ return msc; ++} ++ ++static int test_basic(Display *dpy, int dummy) ++{ ++ xcb_connection_t *c = XGetXCBConnection(dpy); ++ XSetWindowAttributes attr; ++ Visual *visual = DefaultVisual(dpy, DefaultScreen(dpy)); ++ Pixmap pixmap; ++ struct dri3_fence fence; ++ Window root, win; ++ unsigned int width, height; ++ unsigned border, depth; ++ int x, y, ret = 1; ++ const char *phase; ++ uint64_t msc; ++ ++ root = DefaultRootWindow(dpy); ++ XGetGeometry(dpy, root, ++ &win, &x, &y, ++ &width, &height, &border, &depth); ++ ++ _x_error_occurred = 0; ++ attr.override_redirect = 1; ++ switch (dummy) { ++ case 0: ++ win = root; ++ phase = "root"; ++ break; ++ case 1: ++ win = XCreateWindow(dpy, root, ++ 0, 0, width, height, 0, depth, ++ InputOutput, visual, ++ CWOverrideRedirect, &attr); ++ phase = "fullscreen"; ++ break; ++ case 2: ++ width /= 2; ++ height /= 2; ++ win = XCreateWindow(dpy, root, ++ 0, 0, width, height, 0, depth, ++ InputOutput, visual, ++ CWOverrideRedirect, &attr); ++ phase = "window"; ++ break; ++ case 3: ++ if (!has_composite(dpy)) ++ return 0; ++ ++ win = XCreateWindow(dpy, root, ++ 0, 0, width, height, 0, ++ DefaultDepth(dpy, DefaultScreen(dpy)), ++ InputOutput, ++ DefaultVisual(dpy, DefaultScreen(dpy)), ++ CWOverrideRedirect, &attr); ++ XCompositeRedirectWindow(dpy, win, CompositeRedirectManual); ++ phase = "composite"; ++ break; ++ ++ default: ++ phase = "broken"; ++ win = root; ++ abort(); ++ break; ++ } ++ ++ XMapWindow(dpy, win); ++ XSync(dpy, True); ++ if (_x_error_occurred) ++ return 1; ++ ++ if (dri3_create_fence(dpy, win, &fence)) ++ return 0; ++ ++ printf("%s: Testing basic flip: %dx%d\n", phase, width, height); ++ fflush(stdout); ++ _x_error_occurred = 0; ++ ++ xshmfence_reset(fence.addr); ++ msc = wait_vblank(dpy, win); ++ ++ pixmap = XCreatePixmap(dpy, win, width, height, depth); ++ xcb_present_pixmap(c, win, pixmap, 0, ++ 0, /* valid */ ++ 0, /* update */ ++ 0, /* x_off */ ++ 0, /* y_off */ ++ None, ++ None, /* wait fence */ ++ fence.xid, ++ XCB_PRESENT_OPTION_NONE, ++ (msc + 64) & -64, /* target msc */ ++ 64, /* divisor */ ++ 32, /* remainder */ ++ 0, NULL); ++ XFreePixmap(dpy, pixmap); ++ ++ pixmap = XCreatePixmap(dpy, win, width, height, depth); ++ xcb_present_pixmap(c, win, pixmap, 0, ++ 0, /* valid */ ++ 0, /* update */ ++ 0, /* x_off */ ++ 0, /* y_off */ ++ None, ++ None, /* wait fence */ ++ None, /* sync fence */ ++ XCB_PRESENT_OPTION_NONE, ++ (msc + 64) & -64, /* target msc */ ++ 64, /* divisor */ ++ 48, /* remainder */ ++ 0, NULL); ++ XFreePixmap(dpy, pixmap); ++ XDestroyWindow(dpy, win); ++ XFlush(dpy); ++ ++ ret = !!xshmfence_await(fence.addr); ++ dri3_fence_free(dpy, &fence); ++ ++ XSync(dpy, True); ++ ret += !!_x_error_occurred; ++ ++ return ret; ++} ++ ++static int test_race(Display *dpy, int dummy) ++{ ++ Display *mgr = XOpenDisplay(NULL); ++ xcb_connection_t *c = XGetXCBConnection(dpy); ++ XSetWindowAttributes attr; ++ Visual *visual = DefaultVisual(dpy, DefaultScreen(dpy)); ++ Pixmap pixmap; ++ struct dri3_fence fence; ++ Window root, win; ++ unsigned int width, height; ++ unsigned border, depth; ++ int x, y, ret = 1; ++ const char *phase; ++ uint64_t msc; ++ ++ root = DefaultRootWindow(dpy); ++ XGetGeometry(dpy, root, ++ &win, &x, &y, ++ &width, &height, &border, &depth); ++ ++ _x_error_occurred = 0; ++ attr.override_redirect = 1; ++ switch (dummy) { ++ case 0: ++ win = root; ++ phase = "root"; ++ break; ++ case 1: ++ win = XCreateWindow(dpy, root, ++ 0, 0, width, height, 0, depth, ++ InputOutput, visual, ++ CWOverrideRedirect, &attr); ++ phase = "fullscreen"; ++ break; ++ case 2: ++ width /= 2; ++ height /= 2; ++ win = XCreateWindow(dpy, root, ++ 0, 0, width, height, 0, depth, ++ InputOutput, visual, ++ CWOverrideRedirect, &attr); ++ phase = "window"; ++ break; ++ case 3: ++ if (!has_composite(dpy)) ++ return 0; ++ ++ win = XCreateWindow(dpy, root, ++ 0, 0, width, height, 0, ++ DefaultDepth(dpy, DefaultScreen(dpy)), ++ InputOutput, ++ DefaultVisual(dpy, DefaultScreen(dpy)), ++ CWOverrideRedirect, &attr); ++ XCompositeRedirectWindow(dpy, win, CompositeRedirectManual); ++ phase = "composite"; ++ break; ++ ++ default: ++ phase = "broken"; ++ win = root; ++ abort(); ++ break; ++ } ++ ++ XMapWindow(dpy, win); ++ XSync(dpy, True); ++ if (_x_error_occurred) ++ return 1; ++ ++ if (dri3_create_fence(dpy, win, &fence)) ++ return 0; ++ ++ printf("%s: Testing race with manager: %dx%d\n", phase, width, height); ++ fflush(stdout); ++ _x_error_occurred = 0; ++ ++ xshmfence_reset(fence.addr); ++ msc = wait_vblank(dpy, win); ++ ++ pixmap = XCreatePixmap(dpy, win, width, height, depth); ++ xcb_present_pixmap(c, win, pixmap, 0, ++ 0, /* valid */ ++ 0, /* update */ ++ 0, /* x_off */ ++ 0, /* y_off */ ++ None, ++ None, /* wait fence */ ++ fence.xid, ++ XCB_PRESENT_OPTION_NONE, ++ (msc + 64) & -64, /* target msc */ ++ 64, /* divisor */ ++ 32, /* remainder */ ++ 0, NULL); ++ XFreePixmap(dpy, pixmap); ++ ++ XFlush(dpy); ++ XDestroyWindow(mgr, win); ++ XFlush(mgr); ++ ++ pixmap = XCreatePixmap(dpy, win, width, height, depth); ++ xcb_present_pixmap(c, win, pixmap, 0, ++ 0, /* valid */ ++ 0, /* update */ ++ 0, /* x_off */ ++ 0, /* y_off */ ++ None, ++ None, /* wait fence */ ++ None, /* sync fence */ ++ XCB_PRESENT_OPTION_NONE, ++ (msc + 64) & -64, /* target msc */ ++ 64, /* divisor */ ++ 48, /* remainder */ ++ 0, NULL); ++ XFreePixmap(dpy, pixmap); ++ XFlush(dpy); ++ ++ ret = !!xshmfence_await(fence.addr); ++ dri3_fence_free(dpy, &fence); ++ ++ XSync(dpy, True); ++ ret += !!_x_error_occurred; ++ ++ XCloseDisplay(mgr); ++ ++ return ret; ++} ++ ++static int has_present(Display *dpy) ++{ ++ xcb_connection_t *c = XGetXCBConnection(dpy); ++ xcb_generic_error_t *error = NULL; ++ void *reply; ++ ++ reply = xcb_xfixes_query_version_reply(c, ++ xcb_xfixes_query_version(c, ++ XCB_XFIXES_MAJOR_VERSION, ++ XCB_XFIXES_MINOR_VERSION), ++ &error); ++ free(reply); ++ free(error); ++ if (reply == NULL) { ++ fprintf(stderr, "XFixes not supported on %s\n", DisplayString(dpy)); ++ return 0; ++ } ++ ++ reply = xcb_dri3_query_version_reply(c, ++ xcb_dri3_query_version(c, ++ XCB_DRI3_MAJOR_VERSION, ++ XCB_DRI3_MINOR_VERSION), ++ &error); ++ free(reply); ++ free(error); ++ if (reply == NULL) { ++ fprintf(stderr, "DRI3 not supported on %s\n", DisplayString(dpy)); ++ return 0; ++ } ++ ++ reply = xcb_present_query_version_reply(c, ++ xcb_present_query_version(c, ++ XCB_PRESENT_MAJOR_VERSION, ++ XCB_PRESENT_MINOR_VERSION), ++ &error); ++ ++ free(reply); ++ free(error); ++ if (reply == NULL) { ++ fprintf(stderr, "Present not supported on %s\n", DisplayString(dpy)); ++ return 0; ++ } ++ ++ return 1; ++} ++ ++int main(void) ++{ ++ Display *dpy; ++ int dummy; ++ int error = 0; ++ ++ dpy = XOpenDisplay(NULL); ++ if (dpy == NULL) ++ return 77; ++ ++ if (!has_present(dpy)) ++ return 77; ++ ++ if (DPMSQueryExtension(dpy, &dummy, &dummy)) ++ DPMSDisable(dpy); ++ ++ signal(SIGALRM, SIG_IGN); ++ XSetErrorHandler(_check_error_handler); ++ ++ for (dummy = 0; dummy <= 3; dummy++) { ++ error += test_basic(dpy, dummy); ++ error += test_race(dpy, dummy); ++ } ++ ++ if (DPMSQueryExtension(dpy, &dummy, &dummy)) ++ DPMSEnable(dpy); ++ return !!error; ++} +diff --git a/test/present-speed.c b/test/present-speed.c +new file mode 100644 +index 00000000..eccde931 +--- /dev/null ++++ b/test/present-speed.c +@@ -0,0 +1,1015 @@ ++/* ++ * Copyright (c) 2015 Intel Corporation ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining a ++ * copy of this software and associated documentation files (the "Software"), ++ * to deal in the Software without restriction, including without limitation ++ * the rights to use, copy, modify, merge, publish, distribute, sublicense, ++ * and/or sell copies of the Software, and to permit persons to whom the ++ * Software is furnished to do so, subject to the following conditions: ++ * ++ * The above copyright notice and this permission notice (including the next ++ * paragraph) shall be included in all copies or substantial portions of the ++ * Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ++ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, ++ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE ++ * SOFTWARE. ++ * ++ */ ++ ++#ifdef HAVE_CONFIG_H ++#include "config.h" ++#endif ++ ++#include <X11/Xlib.h> ++#include <X11/Xatom.h> ++#include <X11/Xlib-xcb.h> ++#include <X11/xshmfence.h> ++#include <X11/Xutil.h> ++#include <X11/Xlibint.h> ++#include <X11/extensions/Xcomposite.h> ++#include <X11/extensions/Xdamage.h> ++#include <X11/extensions/dpms.h> ++#include <X11/extensions/randr.h> ++#include <X11/extensions/Xrandr.h> ++#include <xcb/xcb.h> ++#include <xcb/present.h> ++#include <xcb/dri3.h> ++#include <xcb/xfixes.h> ++#include <xf86drm.h> ++#include <i915_drm.h> ++ ++#include <stdio.h> ++#include <string.h> ++#include <fcntl.h> ++#include <unistd.h> ++#include <assert.h> ++#include <errno.h> ++#include <setjmp.h> ++#include <signal.h> ++#include <sys/wait.h> ++ ++#include "dri3.h" ++ ++static int _x_error_occurred; ++static uint32_t stamp; ++ ++struct list { ++ struct list *next, *prev; ++}; ++ ++static void ++list_init(struct list *list) ++{ ++ list->next = list->prev = list; ++} ++ ++static inline void ++__list_add(struct list *entry, ++ struct list *prev, ++ struct list *next) ++{ ++ next->prev = entry; ++ entry->next = next; ++ entry->prev = prev; ++ prev->next = entry; ++} ++ ++static inline void ++list_add(struct list *entry, struct list *head) ++{ ++ __list_add(entry, head, head->next); ++} ++ ++static inline void ++__list_del(struct list *prev, struct list *next) ++{ ++ next->prev = prev; ++ prev->next = next; ++} ++ ++static inline void ++_list_del(struct list *entry) ++{ ++ __list_del(entry->prev, entry->next); ++} ++ ++static inline void ++list_move(struct list *list, struct list *head) ++{ ++ if (list->prev != head) { ++ _list_del(list); ++ list_add(list, head); ++ } ++} ++ ++#define __container_of(ptr, sample, member) \ ++ (void *)((char *)(ptr) - ((char *)&(sample)->member - (char *)(sample))) ++ ++#define list_for_each_entry(pos, head, member) \ ++ for (pos = __container_of((head)->next, pos, member); \ ++ &pos->member != (head); \ ++ pos = __container_of(pos->member.next, pos, member)) ++ ++static int ++_check_error_handler(Display *display, ++ XErrorEvent *event) ++{ ++ if (_x_error_occurred < 0) ++ return True; ++ ++ printf("X11 error from display %s, serial=%ld, error=%d, req=%d.%d\n", ++ DisplayString(display), ++ event->serial, ++ event->error_code, ++ event->request_code, ++ event->minor_code); ++ _x_error_occurred++; ++ return False; /* ignored */ ++} ++ ++static double elapsed(const struct timespec *start, ++ const struct timespec *end) ++{ ++ return 1e6*(end->tv_sec - start->tv_sec) + (end->tv_nsec - start->tv_nsec)/1000; ++} ++ ++struct buffer { ++ struct list link; ++ Pixmap pixmap; ++ struct dri3_fence fence; ++ int fd; ++ int busy; ++ int id; ++}; ++ ++#define DRI3 1 ++#define NOCOPY 2 ++#define ASYNC 4 ++static void run(Display *dpy, Window win, const char *name, unsigned options) ++{ ++ xcb_connection_t *c = XGetXCBConnection(dpy); ++ struct timespec start, end; ++#define N_BACK 8 ++ char test_name[128]; ++ struct buffer buffer[N_BACK]; ++ struct list mru; ++ Window root; ++ unsigned int width, height; ++ unsigned border, depth; ++ unsigned present_flags = 0; ++ xcb_xfixes_region_t update = 0; ++ int completed = 0; ++ int queued = 0; ++ uint32_t eid = 0; ++ void *Q = NULL; ++ int i, n; ++ ++ list_init(&mru); ++ ++ XGetGeometry(dpy, win, ++ &root, &i, &n, &width, &height, &border, &depth); ++ ++ _x_error_occurred = 0; ++ ++ for (n = 0; n < N_BACK; n++) { ++ buffer[n].pixmap = xcb_generate_id(c); ++ xcb_create_pixmap(c, depth, buffer[n].pixmap, win, ++ width, height); ++ buffer[n].fence.xid = 0; ++ buffer[n].fd = -1; ++ buffer[n].id = n; ++ if (options & DRI3) { ++ xcb_dri3_buffer_from_pixmap_reply_t *reply; ++ int *fds; ++ ++ if (dri3_create_fence(dpy, win, &buffer[n].fence)) ++ return; ++ ++ reply = xcb_dri3_buffer_from_pixmap_reply (c, ++ xcb_dri3_buffer_from_pixmap(c, buffer[n].pixmap), ++ NULL); ++ if (reply == NULL) ++ return; ++ ++ fds = xcb_dri3_buffer_from_pixmap_reply_fds (c, reply); ++ buffer[n].fd = fds[0]; ++ free(reply); ++ ++ /* start idle */ ++ xshmfence_trigger(buffer[n].fence.addr); ++ } ++ buffer[n].busy = 0; ++ list_add(&buffer[n].link, &mru); ++ } ++ if (options & ASYNC) ++ present_flags |= XCB_PRESENT_OPTION_ASYNC; ++ if (options & NOCOPY) { ++ update = xcb_generate_id(c); ++ xcb_xfixes_create_region(c, update, 0, NULL); ++ present_flags |= XCB_PRESENT_OPTION_COPY; ++ } ++ ++ if (!(options & DRI3)) { ++ eid = xcb_generate_id(c); ++ xcb_present_select_input(c, eid, win, ++ (options & NOCOPY ? 0 : XCB_PRESENT_EVENT_MASK_IDLE_NOTIFY) | ++ XCB_PRESENT_EVENT_MASK_COMPLETE_NOTIFY); ++ Q = xcb_register_for_special_xge(c, &xcb_present_id, eid, &stamp); ++ } ++ ++ clock_gettime(CLOCK_MONOTONIC, &start); ++ do { ++ for (n = 0; n < 1000; n++) { ++ struct buffer *tmp, *b = NULL; ++retry: ++ list_for_each_entry(tmp, &mru, link) { ++ if (tmp->fence.xid) ++ tmp->busy = !xshmfence_query(tmp->fence.addr); ++ if (!tmp->busy) { ++ b = tmp; ++ break; ++ } ++ } ++ if (options & DRI3) { ++ if (b == NULL) ++ goto retry; ++ ++ xshmfence_reset(b->fence.addr); ++ queued--; ++ completed++; ++ } else while (b == NULL) { ++ xcb_present_generic_event_t *ev; ++ ++ ev = (xcb_present_generic_event_t *) ++ xcb_wait_for_special_event(c, Q); ++ if (ev == NULL) ++ abort(); ++ ++ do { ++ switch (ev->evtype) { ++ case XCB_PRESENT_COMPLETE_NOTIFY: ++ completed++; ++ queued--; ++ break; ++ ++ case XCB_PRESENT_EVENT_IDLE_NOTIFY: ++ { ++ xcb_present_idle_notify_event_t *ie = (xcb_present_idle_notify_event_t *)ev; ++ assert(ie->serial < N_BACK); ++ buffer[ie->serial].busy = 0; ++ if (b == NULL) ++ b = &buffer[ie->serial]; ++ break; ++ } ++ } ++ free(ev); ++ } while ((ev = (xcb_present_generic_event_t *)xcb_poll_for_special_event(c, Q))); ++ } ++ ++ b->busy = (options & NOCOPY) == 0; ++ xcb_present_pixmap(c, win, b->pixmap, b->id, ++ 0, /* valid */ ++ update, /* update */ ++ 0, /* x_off */ ++ 0, /* y_off */ ++ None, ++ None, /* wait fence */ ++ b->fence.xid, ++ present_flags, ++ 0, /* target msc */ ++ 0, /* divisor */ ++ 0, /* remainder */ ++ 0, NULL); ++ list_move(&b->link, &mru); ++ queued++; ++ xcb_flush(c); ++ } ++ clock_gettime(CLOCK_MONOTONIC, &end); ++ } while (end.tv_sec < start.tv_sec + 10); ++ ++ if (options & DRI3) { ++ struct buffer *b; ++ XID pixmap; ++ ++ pixmap = xcb_generate_id(c); ++ xcb_create_pixmap(c, depth, pixmap, win, width, height); ++ xcb_present_pixmap(c, win, pixmap, 0xdeadbeef, ++ 0, /* valid */ ++ None, /* update */ ++ 0, /* x_off */ ++ 0, /* y_off */ ++ None, ++ None, /* wait fence */ ++ None, ++ 0, ++ 0, /* target msc */ ++ 0, /* divisor */ ++ 0, /* remainder */ ++ 0, NULL); ++ xcb_flush(c); ++ ++ list_for_each_entry(b, &mru, link) ++ xshmfence_await(b->fence.addr); ++ ++ xcb_free_pixmap(c, pixmap); ++ completed += queued; ++ } else while (queued) { ++ xcb_present_generic_event_t *ev; ++ ++ ev = (xcb_present_generic_event_t *) ++ xcb_wait_for_special_event(c, Q); ++ if (ev == NULL) ++ abort(); ++ ++ do { ++ switch (ev->evtype) { ++ case XCB_PRESENT_COMPLETE_NOTIFY: ++ completed++; ++ queued--; ++ break; ++ ++ case XCB_PRESENT_EVENT_IDLE_NOTIFY: ++ break; ++ } ++ free(ev); ++ } while ((ev = (xcb_present_generic_event_t *)xcb_poll_for_special_event(c, Q))); ++ } ++ clock_gettime(CLOCK_MONOTONIC, &end); ++ ++ if (update) ++ xcb_xfixes_destroy_region(c, update); ++ for (n = 0; n < N_BACK; n++) { ++ if (buffer[n].fence.xid) ++ dri3_fence_free(dpy, &buffer[n].fence); ++ if (buffer[n].fd != -1) ++ close(buffer[n].fd); ++ xcb_free_pixmap(c, buffer[n].pixmap); ++ } ++ ++ if (Q) { ++ xcb_discard_reply(c, xcb_present_select_input_checked(c, eid, win, 0).sequence); ++ XSync(dpy, True); ++ xcb_unregister_for_special_event(c, Q); ++ } ++ ++ test_name[0] = '\0'; ++ if (options) { ++ snprintf(test_name, sizeof(test_name), "(%s%s%s )", ++ options & NOCOPY ? " no-copy" : "", ++ options & DRI3 ? " dri3" : "", ++ options & ASYNC ? " async" : ""); ++ } ++ printf("%s%s: Completed %d presents in %.1fs, %.3fus each (%.1f FPS)\n", ++ name, test_name, ++ completed, elapsed(&start, &end) / 1000000, ++ elapsed(&start, &end) / completed, ++ completed / (elapsed(&start, &end) / 1000000)); ++} ++ ++struct perpixel { ++ Window win; ++ struct buffer buffer[N_BACK]; ++ struct list mru; ++ uint32_t eid; ++ void *Q; ++ int queued; ++}; ++ ++static void perpixel(Display *dpy, ++ int max_width, int max_height, unsigned options) ++{ ++ //const int sz = max_width * max_height; ++ const int sz = 1048; ++ struct perpixel *pp; ++ xcb_connection_t *c = XGetXCBConnection(dpy); ++ struct timespec start, end; ++ char test_name[128]; ++ unsigned present_flags = 0; ++ xcb_xfixes_region_t update = 0; ++ int completed = 0; ++ int i, n; ++ ++ pp = calloc(sz, sizeof(*pp)); ++ if (!pp) ++ return; ++ ++ for (i = 0; i < sz; i++) { ++ XSetWindowAttributes attr = { .override_redirect = 1 }; ++ int depth = DefaultDepth(dpy, DefaultScreen(dpy)); ++ pp[i].win = XCreateWindow(dpy, DefaultRootWindow(dpy), ++ i % max_width, i / max_width, 1, 1, 0, depth, ++ InputOutput, ++ DefaultVisual(dpy, DefaultScreen(dpy)), ++ CWOverrideRedirect, &attr); ++ XMapWindow(dpy, pp[i].win); ++ list_init(&pp[i].mru); ++ for (n = 0; n < N_BACK; n++) { ++ pp[i].buffer[n].pixmap = xcb_generate_id(c); ++ xcb_create_pixmap(c, depth, pp[i].buffer[n].pixmap, ++ pp[i].win, 1, 1); ++ pp[i].buffer[n].fence.xid = 0; ++ pp[i].buffer[n].fd = -1; ++ pp[i].buffer[n].id = n; ++ if (options & DRI3) { ++ xcb_dri3_buffer_from_pixmap_reply_t *reply; ++ int *fds; ++ ++ if (dri3_create_fence(dpy, pp[i].win, &pp[i].buffer[n].fence)) ++ return; ++ ++ reply = xcb_dri3_buffer_from_pixmap_reply(c, ++ xcb_dri3_buffer_from_pixmap(c, pp[i].buffer[n].pixmap), ++ NULL); ++ if (reply == NULL) ++ return; ++ ++ fds = xcb_dri3_buffer_from_pixmap_reply_fds(c, reply); ++ pp[i].buffer[n].fd = fds[0]; ++ free(reply); ++ ++ /* start idle */ ++ xshmfence_trigger(pp[i].buffer[n].fence.addr); ++ } ++ pp[i].buffer[n].busy = 0; ++ list_add(&pp[i].buffer[n].link, &pp[i].mru); ++ } ++ ++ if (!(options & DRI3)) { ++ pp[i].eid = xcb_generate_id(c); ++ xcb_present_select_input(c, pp[i].eid, pp[i].win, ++ (options & NOCOPY ? 0 : XCB_PRESENT_EVENT_MASK_IDLE_NOTIFY) | ++ XCB_PRESENT_EVENT_MASK_COMPLETE_NOTIFY); ++ pp[i].Q = xcb_register_for_special_xge(c, &xcb_present_id, pp[i].eid, &stamp); ++ } ++ pp[i].queued = 0; ++ } ++ ++ XSync(dpy, True); ++ _x_error_occurred = 0; ++ ++ if (options & ASYNC) ++ present_flags |= XCB_PRESENT_OPTION_ASYNC; ++ if (options & NOCOPY) { ++ update = xcb_generate_id(c); ++ xcb_xfixes_create_region(c, update, 0, NULL); ++ present_flags |= XCB_PRESENT_OPTION_COPY; ++ } ++ ++ clock_gettime(CLOCK_MONOTONIC, &start); ++ do { ++ for (i = 0; i < sz; i++) { ++ struct buffer *tmp, *b = NULL; ++retry: ++ list_for_each_entry(tmp, &pp[i].mru, link) { ++ if (tmp->fence.xid) ++ tmp->busy = !xshmfence_query(tmp->fence.addr); ++ if (!tmp->busy) { ++ b = tmp; ++ break; ++ } ++ } ++ if (options & DRI3) { ++ if (b == NULL) ++ goto retry; ++ ++ xshmfence_reset(b->fence.addr); ++ pp[i].queued--; ++ completed++; ++ } else while (b == NULL) { ++ xcb_present_generic_event_t *ev; ++ ++ ev = (xcb_present_generic_event_t *) ++ xcb_wait_for_special_event(c, pp[i].Q); ++ if (ev == NULL) ++ abort(); ++ ++ do { ++ switch (ev->evtype) { ++ case XCB_PRESENT_COMPLETE_NOTIFY: ++ completed++; ++ pp[i].queued--; ++ break; ++ ++ case XCB_PRESENT_EVENT_IDLE_NOTIFY: ++ { ++ xcb_present_idle_notify_event_t *ie = (xcb_present_idle_notify_event_t *)ev; ++ assert(ie->serial < N_BACK); ++ pp[i].buffer[ie->serial].busy = 0; ++ if (b == NULL) ++ b = &pp[i].buffer[ie->serial]; ++ break; ++ } ++ } ++ free(ev); ++ } while ((ev = (xcb_present_generic_event_t *)xcb_poll_for_special_event(c, pp[i].Q))); ++ } ++ ++ b->busy = (options & NOCOPY) == 0; ++ xcb_present_pixmap(c, pp[i].win, b->pixmap, b->id, ++ 0, /* valid */ ++ update, /* update */ ++ 0, /* x_off */ ++ 0, /* y_off */ ++ None, ++ None, /* wait fence */ ++ b->fence.xid, ++ present_flags, ++ 0, /* target msc */ ++ 0, /* divisor */ ++ 0, /* remainder */ ++ 0, NULL); ++ list_move(&b->link, &pp[i].mru); ++ pp[i].queued++; ++ } ++ xcb_flush(c); ++ clock_gettime(CLOCK_MONOTONIC, &end); ++ } while (end.tv_sec < start.tv_sec + 10); ++ ++ for (i = 0; i < sz; i++) { ++ if (options & DRI3) { ++ int depth = DefaultDepth(dpy, DefaultScreen(dpy)); ++ struct buffer *b; ++ XID pixmap; ++ ++ pixmap = xcb_generate_id(c); ++ xcb_create_pixmap(c, depth, pixmap, pp[i].win, 1, 1); ++ xcb_present_pixmap(c, pp[i].win, pixmap, 0xdeadbeef, ++ 0, /* valid */ ++ None, /* update */ ++ 0, /* x_off */ ++ 0, /* y_off */ ++ None, ++ None, /* wait fence */ ++ None, ++ 0, ++ 0, /* target msc */ ++ 0, /* divisor */ ++ 0, /* remainder */ ++ 0, NULL); ++ xcb_flush(c); ++ ++ list_for_each_entry(b, &pp[i].mru, link) ++ xshmfence_await(b->fence.addr); ++ ++ xcb_free_pixmap(c, pixmap); ++ completed += pp[i].queued; ++ } else while (pp[i].queued) { ++ xcb_present_generic_event_t *ev; ++ ++ ev = (xcb_present_generic_event_t *) ++ xcb_wait_for_special_event(c, pp[i].Q); ++ if (ev == NULL) ++ abort(); ++ ++ do { ++ switch (ev->evtype) { ++ case XCB_PRESENT_COMPLETE_NOTIFY: ++ completed++; ++ pp[i].queued--; ++ break; ++ ++ case XCB_PRESENT_EVENT_IDLE_NOTIFY: ++ break; ++ } ++ free(ev); ++ } while ((ev = (xcb_present_generic_event_t *)xcb_poll_for_special_event(c, pp[i].Q))); ++ } ++ } ++ clock_gettime(CLOCK_MONOTONIC, &end); ++ ++ if (update) ++ xcb_xfixes_destroy_region(c, update); ++ ++ for (i = 0; i < sz; i++) { ++ for (n = 0; n < N_BACK; n++) { ++ if (pp[i].buffer[n].fence.xid) ++ dri3_fence_free(dpy, &pp[i].buffer[n].fence); ++ if (pp[i].buffer[n].fd != -1) ++ close(pp[i].buffer[n].fd); ++ xcb_free_pixmap(c, pp[i].buffer[n].pixmap); ++ } ++ ++ if (pp[i].Q) { ++ xcb_discard_reply(c, xcb_present_select_input_checked(c, pp[i].eid, pp[i].win, 0).sequence); ++ XSync(dpy, True); ++ xcb_unregister_for_special_event(c, pp[i].Q); ++ } ++ ++ XDestroyWindow(dpy, pp[i].win); ++ } ++ free(pp); ++ ++ test_name[0] = '\0'; ++ if (options) { ++ snprintf(test_name, sizeof(test_name), "(%s%s%s )", ++ options & NOCOPY ? " no-copy" : "", ++ options & DRI3 ? " dri3" : "", ++ options & ASYNC ? " async" : ""); ++ } ++ printf("%s%s: Completed %d presents in %.1fs, %.3fus each (%.1f FPS)\n", ++ __func__, test_name, ++ completed, elapsed(&start, &end) / 1000000, ++ elapsed(&start, &end) / completed, ++ completed / (elapsed(&start, &end) / 1000000)); ++} ++ ++static int isqrt(int x) ++{ ++ int i; ++ ++ for (i = 2; i*i < x; i++) ++ ; ++ return i; ++} ++ ++struct sibling { ++ pthread_t thread; ++ Display *dpy; ++ int x, y; ++ int width, height; ++ unsigned options; ++}; ++ ++static void *sibling(void *arg) ++{ ++ struct sibling *s = arg; ++ XSetWindowAttributes attr = { .override_redirect = 1 }; ++ Window win = XCreateWindow(s->dpy, DefaultRootWindow(s->dpy), ++ s->x, s->y, s->width, s->height, 0, ++ DefaultDepth(s->dpy, DefaultScreen(s->dpy)), ++ InputOutput, ++ DefaultVisual(s->dpy, DefaultScreen(s->dpy)), ++ CWOverrideRedirect, &attr); ++ XMapWindow(s->dpy, win); ++ run(s->dpy, win, "sibling", s->options); ++ return NULL; ++} ++ ++static void siblings(Display *dpy, ++ int max_width, int max_height, int ncpus, unsigned options) ++{ ++ int sq_ncpus = isqrt(ncpus); ++ int width = max_width / sq_ncpus; ++ int height = max_height/ sq_ncpus; ++ struct sibling s[ncpus]; ++ int child; ++ ++ if (ncpus <= 1) ++ return; ++ ++ for (child = 0; child < ncpus; child++) { ++ s[child].dpy = dpy; ++ s[child].x = (child % sq_ncpus) * width; ++ s[child].y = (child / sq_ncpus) * height; ++ s[child].width = width; ++ s[child].height = height; ++ s[child].options = options; ++ pthread_create(&s[child].thread, NULL, sibling, &s[child]); ++ } ++ ++ for (child = 0; child < ncpus; child++) ++ pthread_join(s[child].thread, NULL); ++} ++ ++static void cousins(int max_width, int max_height, int ncpus, unsigned options) ++{ ++ int sq_ncpus = isqrt(ncpus); ++ int width = max_width / sq_ncpus; ++ int height = max_height/ sq_ncpus; ++ int child; ++ ++ if (ncpus <= 1) ++ return; ++ ++ for (child = 0; child < ncpus; child++) { ++ for (; fork() == 0; exit(0)) { ++ int x = (child % sq_ncpus) * width; ++ int y = (child / sq_ncpus) * height; ++ XSetWindowAttributes attr = { .override_redirect = 1 }; ++ Display *dpy = XOpenDisplay(NULL); ++ Window win = XCreateWindow(dpy, DefaultRootWindow(dpy), ++ x, y, width, height, 0, ++ DefaultDepth(dpy, DefaultScreen(dpy)), ++ InputOutput, ++ DefaultVisual(dpy, DefaultScreen(dpy)), ++ CWOverrideRedirect, &attr); ++ XMapWindow(dpy, win); ++ run(dpy, win, "cousin", options); ++ } ++ } ++ ++ while (child) { ++ int status = -1; ++ pid_t pid = wait(&status); ++ if (pid == -1) ++ continue; ++ child--; ++ } ++} ++ ++static int has_present(Display *dpy) ++{ ++ xcb_connection_t *c = XGetXCBConnection(dpy); ++ xcb_generic_error_t *error = NULL; ++ void *reply; ++ ++ reply = xcb_present_query_version_reply(c, ++ xcb_present_query_version(c, ++ XCB_PRESENT_MAJOR_VERSION, ++ XCB_PRESENT_MINOR_VERSION), ++ &error); ++ ++ free(reply); ++ free(error); ++ if (reply == NULL) { ++ fprintf(stderr, "Present not supported on %s\n", DisplayString(dpy)); ++ return 0; ++ } ++ ++ return 1; ++} ++ ++static int has_composite(Display *dpy) ++{ ++ int event, error; ++ int major, minor; ++ ++ if (!XDamageQueryExtension (dpy, &event, &error)) ++ return 0; ++ ++ if (!XCompositeQueryExtension(dpy, &event, &error)) ++ return 0; ++ ++ XCompositeQueryVersion(dpy, &major, &minor); ++ ++ return major > 0 || minor >= 4; ++} ++ ++static int dri3_query_version(Display *dpy, int *major, int *minor) ++{ ++ xcb_connection_t *c = XGetXCBConnection(dpy); ++ xcb_dri3_query_version_reply_t *reply; ++ xcb_generic_error_t *error; ++ ++ *major = *minor = -1; ++ ++ reply = xcb_dri3_query_version_reply(c, ++ xcb_dri3_query_version(c, ++ XCB_DRI3_MAJOR_VERSION, ++ XCB_DRI3_MINOR_VERSION), ++ &error); ++ free(error); ++ if (reply == NULL) ++ return -1; ++ ++ *major = reply->major_version; ++ *minor = reply->minor_version; ++ free(reply); ++ ++ return 0; ++} ++ ++static int has_dri3(Display *dpy) ++{ ++ const xcb_query_extension_reply_t *ext; ++ int major, minor; ++ ++ ext = xcb_get_extension_data(XGetXCBConnection(dpy), &xcb_dri3_id); ++ if (ext == NULL || !ext->present) ++ return 0; ++ ++ if (dri3_query_version(dpy, &major, &minor) < 0) ++ return 0; ++ ++ return major >= 0; ++} ++ ++static int has_xfixes(Display *dpy) ++{ ++ xcb_connection_t *c = XGetXCBConnection(dpy); ++ const xcb_query_extension_reply_t *ext; ++ void *reply; ++ ++ ext = xcb_get_extension_data(c, &xcb_xfixes_id); ++ if (ext == NULL || !ext->present) ++ return 0; ++ ++ reply = xcb_xfixes_query_version_reply(c, ++ xcb_xfixes_query_version(c, ++ XCB_XFIXES_MAJOR_VERSION, ++ XCB_XFIXES_MINOR_VERSION), ++ NULL); ++ free(reply); ++ ++ return reply != NULL; ++} ++ ++static inline XRRScreenResources *_XRRGetScreenResourcesCurrent(Display *dpy, Window window) ++{ ++ XRRScreenResources *res; ++ ++ res = XRRGetScreenResourcesCurrent(dpy, window); ++ if (res == NULL) ++ res = XRRGetScreenResources(dpy, window); ++ ++ return res; ++} ++ ++static XRRModeInfo *lookup_mode(XRRScreenResources *res, int id) ++{ ++ int i; ++ ++ for (i = 0; i < res->nmode; i++) { ++ if (res->modes[i].id == id) ++ return &res->modes[i]; ++ } ++ ++ return NULL; ++} ++ ++static void fullscreen(Display *dpy, Window win) ++{ ++ Atom atom = XInternAtom(dpy, "_NET_WM_STATE_FULLSCREEN", False); ++ XChangeProperty(dpy, win, ++ XInternAtom(dpy, "_NET_WM_STATE", False), ++ XA_ATOM, 32, PropModeReplace, ++ (unsigned char *)&atom, 1); ++} ++ ++static void loop(Display *dpy, XRRScreenResources *res, unsigned options) ++{ ++ Window root = DefaultRootWindow(dpy); ++ Window win; ++ XSetWindowAttributes attr; ++ int i, j; ++ ++ attr.override_redirect = 1; ++ ++ run(dpy, root, "off", options); ++ XSync(dpy, True); ++ ++ for (i = 0; i < res->noutput; i++) { ++ XRROutputInfo *output; ++ XRRModeInfo *mode; ++ ++ output = XRRGetOutputInfo(dpy, res, res->outputs[i]); ++ if (output == NULL) ++ continue; ++ ++ mode = NULL; ++ if (res->nmode) ++ mode = lookup_mode(res, output->modes[0]); ++ ++ for (j = 0; mode && j < 2*output->ncrtc; j++) { ++ int c = j; ++ if (c >= output->ncrtc) ++ c = 2*output->ncrtc - j - 1; ++ ++ printf("[%d, %d] -- OUTPUT:%ld, CRTC:%ld: %dx%d\n", ++ i, c, (long)res->outputs[i], (long)output->crtcs[c], ++ mode->width, mode->height); ++ XRRSetCrtcConfig(dpy, res, output->crtcs[c], CurrentTime, ++ 0, 0, output->modes[0], RR_Rotate_0, &res->outputs[i], 1); ++ ++ run(dpy, root, "root", options); ++ XSync(dpy, True); ++ ++ win = XCreateWindow(dpy, root, ++ 0, 0, mode->width, mode->height, 0, ++ DefaultDepth(dpy, DefaultScreen(dpy)), ++ InputOutput, ++ DefaultVisual(dpy, DefaultScreen(dpy)), ++ CWOverrideRedirect, &attr); ++ fullscreen(dpy, win); ++ XMapWindow(dpy, win); ++ run(dpy, win, "fullscreen", options); ++ XDestroyWindow(dpy, win); ++ XSync(dpy, True); ++ ++ win = XCreateWindow(dpy, root, ++ 0, 0, mode->width, mode->height, 0, ++ DefaultDepth(dpy, DefaultScreen(dpy)), ++ InputOutput, ++ DefaultVisual(dpy, DefaultScreen(dpy)), ++ CWOverrideRedirect, &attr); ++ XMapWindow(dpy, win); ++ run(dpy, win, "windowed", options); ++ XDestroyWindow(dpy, win); ++ XSync(dpy, True); ++ ++ if (has_composite(dpy)) { ++ Damage damage; ++ ++ _x_error_occurred = 0; ++ win = XCreateWindow(dpy, root, ++ 0, 0, mode->width, mode->height, 0, ++ DefaultDepth(dpy, DefaultScreen(dpy)), ++ InputOutput, ++ DefaultVisual(dpy, DefaultScreen(dpy)), ++ CWOverrideRedirect, &attr); ++ XCompositeRedirectWindow(dpy, win, CompositeRedirectManual); ++ damage = XDamageCreate(dpy, win, XDamageReportNonEmpty); ++ XMapWindow(dpy, win); ++ XSync(dpy, True); ++ if (!_x_error_occurred) ++ run(dpy, win, "composited", options); ++ XDamageDestroy(dpy, damage); ++ XDestroyWindow(dpy, win); ++ XSync(dpy, True); ++ } ++ ++ win = XCreateWindow(dpy, root, ++ 0, 0, mode->width/2, mode->height/2, 0, ++ DefaultDepth(dpy, DefaultScreen(dpy)), ++ InputOutput, ++ DefaultVisual(dpy, DefaultScreen(dpy)), ++ CWOverrideRedirect, &attr); ++ XMapWindow(dpy, win); ++ run(dpy, win, "half", options); ++ XDestroyWindow(dpy, win); ++ XSync(dpy, True); ++ ++ perpixel(dpy, mode->width, mode->height, options); ++ ++ siblings(dpy, mode->width, mode->height, ++ sysconf(_SC_NPROCESSORS_ONLN), ++ options); ++ ++ cousins(mode->width, mode->height, ++ sysconf(_SC_NPROCESSORS_ONLN), ++ options); ++ ++ XRRSetCrtcConfig(dpy, res, output->crtcs[c], CurrentTime, ++ 0, 0, None, RR_Rotate_0, NULL, 0); ++ } ++ ++ XRRFreeOutputInfo(output); ++ } ++ ++} ++ ++int main(void) ++{ ++ Display *dpy; ++ XRRScreenResources *res; ++ XRRCrtcInfo **original_crtc; ++ int i; ++ ++ XInitThreads(); ++ ++ dpy = XOpenDisplay(NULL); ++ if (dpy == NULL) ++ return 77; ++ ++ if (!has_present(dpy)) ++ return 77; ++ ++ if (DPMSQueryExtension(dpy, &i, &i)) ++ DPMSDisable(dpy); ++ ++ signal(SIGALRM, SIG_IGN); ++ XSetErrorHandler(_check_error_handler); ++ ++ res = NULL; ++ if (XRRQueryVersion(dpy, &i, &i)) ++ res = _XRRGetScreenResourcesCurrent(dpy, DefaultRootWindow(dpy)); ++ if (res == NULL) ++ return 77; ++ ++ original_crtc = malloc(sizeof(XRRCrtcInfo *)*res->ncrtc); ++ for (i = 0; i < res->ncrtc; i++) ++ original_crtc[i] = XRRGetCrtcInfo(dpy, res, res->crtcs[i]); ++ ++ printf("noutput=%d, ncrtc=%d\n", res->noutput, res->ncrtc); ++ for (i = 0; i < res->ncrtc; i++) ++ XRRSetCrtcConfig(dpy, res, res->crtcs[i], CurrentTime, ++ 0, 0, None, RR_Rotate_0, NULL, 0); ++ ++ loop(dpy, res, 0); ++ loop(dpy, res, ASYNC); ++ if (has_xfixes(dpy)) ++ loop(dpy, res, NOCOPY); ++ if (has_dri3(dpy)) { ++ loop(dpy, res, DRI3); ++ loop(dpy, res, DRI3 | ASYNC); ++ } ++ ++ for (i = 0; i < res->ncrtc; i++) ++ XRRSetCrtcConfig(dpy, res, res->crtcs[i], CurrentTime, ++ original_crtc[i]->x, ++ original_crtc[i]->y, ++ original_crtc[i]->mode, ++ original_crtc[i]->rotation, ++ original_crtc[i]->outputs, ++ original_crtc[i]->noutput); ++ ++ if (DPMSQueryExtension(dpy, &i, &i)) ++ DPMSEnable(dpy); ++ return 0; ++} +diff --git a/test/present-test.c b/test/present-test.c +index 6b562eb0..5a12a24f 100644 +--- a/test/present-test.c ++++ b/test/present-test.c +@@ -31,7 +31,9 @@ + #include <X11/xshmfence.h> + #include <X11/Xutil.h> + #include <X11/Xlibint.h> ++#include <X11/extensions/dpms.h> + #include <X11/extensions/randr.h> ++#include <X11/extensions/Xcomposite.h> + #include <X11/extensions/Xrandr.h> + #include <X11/extensions/Xrender.h> + #include <X11/extensions/XShm.h> +@@ -44,6 +46,8 @@ + #endif + #include <xcb/xcb.h> + #include <xcb/present.h> ++#include <xcb/xfixes.h> ++#include <xcb/dri3.h> + #include <xf86drm.h> + #include <i915_drm.h> + +@@ -134,12 +138,14 @@ static void *setup_msc(Display *dpy, Window win) + return q; + } + +-static uint64_t check_msc(Display *dpy, Window win, void *q, uint64_t last_msc) ++static uint64_t check_msc(Display *dpy, Window win, void *q, uint64_t last_msc, uint64_t *ust) + { + xcb_connection_t *c = XGetXCBConnection(dpy); ++ static uint32_t serial = 1; + uint64_t msc = 0; ++ int complete = 0; + +- xcb_present_notify_msc(c, win, 0, 0, 0, 0); ++ xcb_present_notify_msc(c, win, serial ^ 0xcc00ffee, 0, 0, 0); + xcb_flush(c); + + do { +@@ -151,82 +157,1268 @@ static uint64_t check_msc(Display *dpy, Window win, void *q, uint64_t last_msc) + break; + + ce = (xcb_present_complete_notify_event_t *)ev; +- if (ce->kind != XCB_PRESENT_COMPLETE_KIND_PIXMAP) ++ if (ce->kind == XCB_PRESENT_COMPLETE_KIND_NOTIFY_MSC && ++ ce->serial == (serial ^ 0xcc00ffee)) { ++ msc = ce->msc; ++ if (ust) ++ *ust = ce->ust; ++ complete = 1; ++ } ++ free(ev); ++ } while (!complete); ++ ++ if ((int64_t)(msc - last_msc) < 0) { ++ printf("Invalid MSC: was %llu, now %llu\n", ++ (long long)last_msc, (long long)msc); ++ } ++ ++ if (++serial == 0) ++ serial = 1; ++ ++ return msc; ++} ++ ++static uint64_t wait_vblank(Display *dpy, Window win, void *q) ++{ ++ xcb_connection_t *c = XGetXCBConnection(dpy); ++ static uint32_t serial = 1; ++ uint64_t msc = 0; ++ int complete = 0; ++ ++ xcb_present_notify_msc(c, win, serial ^ 0xdeadbeef, 0, 1, 0); ++ xcb_flush(c); ++ ++ do { ++ xcb_present_complete_notify_event_t *ce; ++ xcb_generic_event_t *ev; ++ ++ ev = xcb_wait_for_special_event(c, q); ++ if (ev == NULL) ++ break; ++ ++ ce = (xcb_present_complete_notify_event_t *)ev; ++ if (ce->kind == XCB_PRESENT_COMPLETE_KIND_NOTIFY_MSC && ++ ce->serial == (serial ^ 0xdeadbeef)) { + msc = ce->msc; ++ complete = 1; ++ } ++ free(ev); ++ } while (!complete); ++ ++ if (++serial == 0) ++ serial = 1; ++ ++ return msc; ++} ++ ++static uint64_t msc_interval(Display *dpy, Window win, void *q) ++{ ++ xcb_connection_t *c = XGetXCBConnection(dpy); ++ uint64_t msc, ust; ++ int complete = 0; ++ ++ msc = check_msc(dpy, win, q, 0, NULL); ++ ++ xcb_present_notify_msc(c, win, 0xc0ffee00, msc, 0, 0); ++ xcb_present_notify_msc(c, win, 0xc0ffee01, msc + 10, 0, 0); ++ xcb_flush(c); ++ ++ ust = msc = 0; ++ do { ++ xcb_present_complete_notify_event_t *ce; ++ xcb_generic_event_t *ev; ++ ++ ev = xcb_wait_for_special_event(c, q); ++ if (ev == NULL) ++ break; ++ ++ ce = (xcb_present_complete_notify_event_t *)ev; ++ if (ce->kind == XCB_PRESENT_COMPLETE_KIND_NOTIFY_MSC && ++ ce->serial == 0xc0ffee00) { ++ msc -= ce->msc; ++ ust -= ce->ust; ++ complete++; ++ } ++ if (ce->kind == XCB_PRESENT_COMPLETE_KIND_NOTIFY_MSC && ++ ce->serial == 0xc0ffee01) { ++ msc += ce->msc; ++ ust += ce->ust; ++ complete++; ++ } ++ free(ev); ++ } while (complete != 2); ++ ++ printf("10 frame interval: msc=%lld, ust=%lld\n", ++ (long long)msc, (long long)ust); ++ XSync(dpy, True); ++ if (msc == 0) ++ return 0; ++ ++ return (ust + msc/2) / msc; ++} ++ ++static void teardown_msc(Display *dpy, void *q) ++{ ++ xcb_unregister_for_special_event(XGetXCBConnection(dpy), q); ++} ++ ++static int test_whole(Display *dpy, Window win, const char *phase) ++{ ++ xcb_connection_t *c = XGetXCBConnection(dpy); ++ Pixmap pixmap; ++ struct dri3_fence fence; ++ Window root; ++ unsigned int width, height; ++ unsigned border, depth; ++ int x, y, ret = 1; ++ ++ XGetGeometry(dpy, win, ++ &root, &x, &y, &width, &height, &border, &depth); ++ ++ if (dri3_create_fence(dpy, win, &fence)) ++ return 0; ++ ++ printf("%s: Testing simple flip: %dx%d\n", phase, width, height); ++ _x_error_occurred = 0; ++ ++ xshmfence_reset(fence.addr); ++ ++ pixmap = XCreatePixmap(dpy, win, width, height, depth); ++ xcb_present_pixmap(c, win, pixmap, 0, ++ 0, /* valid */ ++ 0, /* update */ ++ 0, /* x_off */ ++ 0, /* y_off */ ++ None, ++ None, /* wait fence */ ++ fence.xid, ++ XCB_PRESENT_OPTION_NONE, ++ 0, /* target msc */ ++ 0, /* divisor */ ++ 0, /* remainder */ ++ 0, NULL); ++ XFreePixmap(dpy, pixmap); ++ ++ pixmap = XCreatePixmap(dpy, win, width, height, depth); ++ xcb_present_pixmap(c, win, pixmap, 0, ++ 0, /* valid */ ++ 0, /* update */ ++ 0, /* x_off */ ++ 0, /* y_off */ ++ None, ++ None, /* wait fence */ ++ None, /* sync fence */ ++ XCB_PRESENT_OPTION_NONE, ++ 0, /* target msc */ ++ 0, /* divisor */ ++ 0, /* remainder */ ++ 0, NULL); ++ XFreePixmap(dpy, pixmap); ++ XFlush(dpy); ++ ++ ret = !!xshmfence_await(fence.addr); ++ dri3_fence_free(dpy, &fence); ++ ++ XSync(dpy, True); ++ ret += !!_x_error_occurred; ++ ++ return ret; ++} ++ ++static uint64_t flush_flips(Display *dpy, Window win, Pixmap pixmap, void *Q, uint64_t *ust) ++{ ++ xcb_connection_t *c = XGetXCBConnection(dpy); ++ uint64_t msc; ++ int complete; ++ ++ msc = check_msc(dpy, win, Q, 0, NULL); ++ xcb_present_pixmap(c, win, pixmap, ++ 0xdeadbeef, /* serial */ ++ 0, /* valid */ ++ 0, /* update */ ++ 0, /* x_off */ ++ 0, /* y_off */ ++ None, ++ None, /* wait fence */ ++ None, ++ XCB_PRESENT_OPTION_NONE, ++ msc + 60, /* target msc */ ++ 0, /* divisor */ ++ 0, /* remainder */ ++ 0, NULL); ++ xcb_flush(c); ++ complete = 0; ++ do { ++ xcb_present_complete_notify_event_t *ce; ++ xcb_generic_event_t *ev; ++ ++ ev = xcb_wait_for_special_event(c, Q); ++ if (ev == NULL) ++ break; ++ ++ ce = (xcb_present_complete_notify_event_t *)ev; ++ complete = (ce->kind == XCB_PRESENT_COMPLETE_KIND_PIXMAP && ++ ce->serial == 0xdeadbeef); ++ free(ev); ++ } while (!complete); ++ XSync(dpy, True); ++ ++ return check_msc(dpy, win, Q, msc, ust); ++} ++ ++static int test_double(Display *dpy, Window win, const char *phase, void *Q) ++{ ++#define COUNT (15*60) ++ xcb_connection_t *c = XGetXCBConnection(dpy); ++ Pixmap pixmap; ++ Window root; ++ unsigned int width, height; ++ unsigned border, depth; ++ int x, y, n, ret; ++ struct { ++ uint64_t msc, ust; ++ } frame[COUNT+1]; ++ int offset = 0; ++ ++ XGetGeometry(dpy, win, ++ &root, &x, &y, &width, &height, &border, &depth); ++ ++ printf("%s: Testing flip double buffering: %dx%d\n", phase, width, height); ++ _x_error_occurred = 0; ++ ++ pixmap = XCreatePixmap(dpy, win, width, height, depth); ++ flush_flips(dpy, win, pixmap, Q, NULL); ++ for (n = 0; n <= COUNT; n++) { ++ int complete; ++ ++ xcb_present_pixmap(c, win, pixmap, n, ++ 0, /* valid */ ++ 0, /* update */ ++ 0, /* x_off */ ++ 0, /* y_off */ ++ None, ++ None, /* wait fence */ ++ None, ++ XCB_PRESENT_OPTION_NONE, ++ 0, /* target msc */ ++ 0, /* divisor */ ++ 0, /* remainder */ ++ 0, NULL); ++ xcb_flush(c); ++ ++ complete = 0; ++ do { ++ xcb_present_complete_notify_event_t *ce; ++ xcb_generic_event_t *ev; ++ ++ ev = xcb_wait_for_special_event(c, Q); ++ if (ev == NULL) ++ break; ++ ++ ce = (xcb_present_complete_notify_event_t *)ev; ++ if (ce->kind == XCB_PRESENT_COMPLETE_KIND_PIXMAP && ++ ce->serial == n) { ++ frame[n].msc = ce->msc; ++ frame[n].ust = ce->ust; ++ complete = 1; ++ } ++ free(ev); ++ } while (!complete); ++ } ++ XFreePixmap(dpy, pixmap); ++ ++ XSync(dpy, True); ++ ret = !!_x_error_occurred; ++ ++ if (frame[COUNT].msc - frame[0].msc != COUNT) { ++ printf("Expected %d frames interval, %d elapsed instead\n", ++ COUNT, (int)(frame[COUNT].msc - frame[0].msc)); ++ for (n = 0; n <= COUNT; n++) { ++ if (frame[n].msc - frame[0].msc != n + offset) { ++ printf("frame[%d]: msc=%03lld, ust=%lld\n", n, ++ (long long)(frame[n].msc - frame[0].msc), ++ (long long)(frame[n].ust - frame[0].ust)); ++ offset = frame[n].msc - frame[0].msc - n; ++ ret++; ++ } ++ } ++ } ++ ++ return ret; ++} ++ ++static int test_future(Display *dpy, Window win, const char *phase, void *Q) ++{ ++ xcb_connection_t *c = XGetXCBConnection(dpy); ++ Pixmap pixmap; ++ struct dri3_fence fence; ++ Window root; ++ unsigned int width, height; ++ unsigned border, depth; ++ int x, y, ret = 0, n; ++ uint64_t msc, ust; ++ int complete, count; ++ int early = 0, late = 0; ++ int earliest = 0, latest = 0; ++ uint64_t interval; ++ ++ XGetGeometry(dpy, win, ++ &root, &x, &y, &width, &height, &border, &depth); ++ ++ if (dri3_create_fence(dpy, win, &fence)) ++ return 0; ++ ++ printf("%s: Testing flips into the future: %dx%d\n", phase, width, height); ++ _x_error_occurred = 0; ++ ++ interval = msc_interval(dpy, win, Q); ++ if (interval == 0) { ++ printf("Zero delay between frames\n"); ++ return 1; ++ } ++ ++ pixmap = XCreatePixmap(dpy, win, width, height, depth); ++ msc = flush_flips(dpy, win, pixmap, Q, &ust); ++ for (n = 1; n <= 10; n++) ++ xcb_present_pixmap(c, win, pixmap, ++ n, /* serial */ ++ 0, /* valid */ ++ 0, /* update */ ++ 0, /* x_off */ ++ 0, /* y_off */ ++ None, ++ None, /* wait fence */ ++ None, ++ XCB_PRESENT_OPTION_NONE, ++ msc + 60 + n*15*60, /* target msc */ ++ 0, /* divisor */ ++ 0, /* remainder */ ++ 0, NULL); ++ xcb_present_pixmap(c, win, pixmap, ++ 0xdeadbeef, /* serial */ ++ 0, /* valid */ ++ 0, /* update */ ++ 0, /* x_off */ ++ 0, /* y_off */ ++ None, ++ None, /* wait fence */ ++ None, ++ XCB_PRESENT_OPTION_NONE, ++ msc + 60 + n*15*60, /* target msc */ ++ 0, /* divisor */ ++ 0, /* remainder */ ++ 0, NULL); ++ xcb_flush(c); ++ ++ complete = 0; ++ count = 0; ++ do { ++ xcb_present_complete_notify_event_t *ce; ++ xcb_generic_event_t *ev; ++ ++ ev = xcb_wait_for_special_event(c, Q); ++ if (ev == NULL) ++ break; ++ ++ ce = (xcb_present_complete_notify_event_t *)ev; ++ assert(ce->kind == XCB_PRESENT_COMPLETE_KIND_PIXMAP); ++ ++ if (ce->serial == 0xdeadbeef) { ++ int64_t time; ++ ++ time = ce->ust - (ust + (60 + 15*60*n) * interval); ++ if (time < -(int64_t)interval) { ++ fprintf(stderr, ++ "\tflips completed too early by %lldms\n", ++ (long long)(-time / 1000)); ++ } else if (time > (int64_t)interval) { ++ fprintf(stderr, ++ "\tflips completed too late by %lldms\n", ++ (long long)(time / 1000)); ++ } ++ complete = 1; ++ } else { ++ int diff = (int64_t)(ce->msc - (15*60*ce->serial + msc + 60)); ++ if (diff < 0) { ++ if (-diff > earliest) { ++ fprintf(stderr, "\tframe %d displayed early by %d frames\n", ce->serial, -diff); ++ earliest = -diff; ++ } ++ early++; ++ ret++; ++ } else if (diff > 0) { ++ if (diff > latest) { ++ fprintf(stderr, "\tframe %d displayed late by %d frames\n", ce->serial, diff); ++ latest = diff; ++ } ++ late++; ++ ret++; ++ } ++ count++; ++ } ++ free(ev); ++ } while (!complete); ++ ++ if (early) ++ printf("\t%d frames shown too early (worst %d)!\n", early, earliest); ++ if (late) ++ printf("\t%d frames shown too late (worst %d)!\n", late, latest); ++ ++ if (count != 10) { ++ fprintf(stderr, "Sentinel frame received too early! %d frames outstanding\n", 10 - count); ++ ret++; ++ ++ do { ++ xcb_present_complete_notify_event_t *ce; ++ xcb_generic_event_t *ev; ++ ++ ev = xcb_wait_for_special_event(c, Q); ++ if (ev == NULL) ++ break; ++ ++ ce = (xcb_present_complete_notify_event_t *)ev; ++ assert(ce->kind == XCB_PRESENT_COMPLETE_KIND_PIXMAP); ++ free(ev); ++ } while (++count != 10); ++ } ++ ++ ret += !!_x_error_occurred; ++ ++ return ret; ++} ++ ++static int test_exhaustion(Display *dpy, Window win, const char *phase, void *Q) ++{ ++#define N_VBLANKS 256 /* kernel event queue length: 128 vblanks */ ++ xcb_connection_t *c = XGetXCBConnection(dpy); ++ Pixmap pixmap; ++ struct dri3_fence fence[2]; ++ Window root; ++ xcb_xfixes_region_t region; ++ unsigned int width, height; ++ unsigned border, depth; ++ int x, y, ret = 0, n; ++ uint64_t target, final; ++ ++ XGetGeometry(dpy, win, ++ &root, &x, &y, &width, &height, &border, &depth); ++ ++ if (dri3_create_fence(dpy, win, &fence[0]) || ++ dri3_create_fence(dpy, win, &fence[1])) ++ return 0; ++ ++ printf("%s: Testing flips with long vblank queues: %dx%d\n", phase, width, height); ++ _x_error_occurred = 0; ++ ++ region = xcb_generate_id(c); ++ xcb_xfixes_create_region(c, region, 0, NULL); ++ ++ pixmap = XCreatePixmap(dpy, win, width, height, depth); ++ xshmfence_reset(fence[0].addr); ++ xshmfence_reset(fence[1].addr); ++ target = check_msc(dpy, win, Q, 0, NULL); ++ for (n = N_VBLANKS; n--; ) ++ xcb_present_pixmap(c, win, pixmap, 0, ++ 0, /* valid */ ++ region, /* update */ ++ 0, /* x_off */ ++ 0, /* y_off */ ++ None, ++ None, /* wait fence */ ++ None, ++ XCB_PRESENT_OPTION_NONE, ++ target + N_VBLANKS, /* target msc */ ++ 1, /* divisor */ ++ 0, /* remainder */ ++ 0, NULL); ++ xcb_present_pixmap(c, win, pixmap, 0, ++ region, /* valid */ ++ region, /* update */ ++ 0, /* x_off */ ++ 0, /* y_off */ ++ None, ++ None, /* wait fence */ ++ fence[0].xid, ++ XCB_PRESENT_OPTION_NONE, ++ target, /* target msc */ ++ 0, /* divisor */ ++ 0, /* remainder */ ++ 0, NULL); ++ for (n = 1; n < N_VBLANKS; n++) ++ xcb_present_pixmap(c, win, pixmap, 0, ++ region, /* valid */ ++ region, /* update */ ++ 0, /* x_off */ ++ 0, /* y_off */ ++ None, ++ None, /* wait fence */ ++ None, ++ XCB_PRESENT_OPTION_NONE, ++ target + n, /* target msc */ ++ 0, /* divisor */ ++ 0, /* remainder */ ++ 0, NULL); ++ xcb_present_pixmap(c, win, pixmap, 0, ++ region, /* valid */ ++ region, /* update */ ++ 0, /* x_off */ ++ 0, /* y_off */ ++ None, ++ None, /* wait fence */ ++ fence[1].xid, ++ XCB_PRESENT_OPTION_NONE, ++ target + N_VBLANKS, /* target msc */ ++ 0, /* divisor */ ++ 0, /* remainder */ ++ 0, NULL); ++ xcb_flush(c); ++ ++ ret += !!xshmfence_await(fence[0].addr); ++ final = check_msc(dpy, win, Q, 0, NULL); ++ if (final < target) { ++ printf("\tFirst flip too early, MSC was %llu, expected %llu\n", ++ (long long)final, (long long)target); ++ ret++; ++ } else if (final > target + 1) { ++ printf("\tFirst flip too late, MSC was %llu, expected %llu\n", ++ (long long)final, (long long)target); ++ ret++; ++ } ++ ++ ret += !!xshmfence_await(fence[1].addr); ++ final = check_msc(dpy, win, Q, 0, NULL); ++ if (final < target + N_VBLANKS) { ++ printf("\tLast flip too early, MSC was %llu, expected %llu\n", ++ (long long)final, (long long)(target + N_VBLANKS)); ++ ret++; ++ } else if (final > target + N_VBLANKS + 1) { ++ printf("\tLast flip too late, MSC was %llu, expected %llu\n", ++ (long long)final, (long long)(target + N_VBLANKS)); ++ ret++; ++ } ++ ++ flush_flips(dpy, win, pixmap, Q, NULL); ++ ++ XFreePixmap(dpy, pixmap); ++ xcb_xfixes_destroy_region(c, region); ++ dri3_fence_free(dpy, &fence[1]); ++ dri3_fence_free(dpy, &fence[0]); ++ ++ XSync(dpy, True); ++ ret += !!_x_error_occurred; ++ ++ return ret; ++#undef N_VBLANKS ++} ++ ++static int test_accuracy(Display *dpy, Window win, const char *phase, void *Q) ++{ ++#define N_VBLANKS (60 * 120) /* ~2 minutes */ ++ xcb_connection_t *c = XGetXCBConnection(dpy); ++ Pixmap pixmap; ++ Window root; ++ unsigned int width, height; ++ unsigned border, depth; ++ int x, y, ret = 0, n; ++ uint64_t target; ++ int early = 0, late = 0; ++ int earliest = 0, latest = 0; ++ int complete, count; ++ ++ XGetGeometry(dpy, win, ++ &root, &x, &y, &width, &height, &border, &depth); ++ ++ printf("%s: Testing flip accuracy: %dx%d\n", phase, width, height); ++ _x_error_occurred = 0; ++ ++ pixmap = XCreatePixmap(dpy, win, width, height, depth); ++ target = flush_flips(dpy, win, pixmap, Q, NULL); ++ for (n = 0; n <= N_VBLANKS; n++) ++ xcb_present_pixmap(c, win, pixmap, ++ n, /* serial */ ++ 0, /* valid */ ++ 0, /* update */ ++ 0, /* x_off */ ++ 0, /* y_off */ ++ None, ++ None, /* wait fence */ ++ None, ++ XCB_PRESENT_OPTION_NONE, ++ target + 60 + n, /* target msc */ ++ 0, /* divisor */ ++ 0, /* remainder */ ++ 0, NULL); ++ xcb_present_pixmap(c, win, pixmap, ++ 0xdeadbeef, /* serial */ ++ 0, /* valid */ ++ 0, /* update */ ++ 0, /* x_off */ ++ 0, /* y_off */ ++ None, ++ None, /* wait fence */ ++ None, ++ XCB_PRESENT_OPTION_NONE, ++ target + 60 + n, /* target msc */ ++ 0, /* divisor */ ++ 0, /* remainder */ ++ 0, NULL); ++ xcb_flush(c); ++ ++ complete = 0; ++ count = 0; ++ do { ++ xcb_present_complete_notify_event_t *ce; ++ xcb_generic_event_t *ev; ++ ++ ev = xcb_wait_for_special_event(c, Q); ++ if (ev == NULL) ++ break; ++ ++ ce = (xcb_present_complete_notify_event_t *)ev; ++ assert(ce->kind == XCB_PRESENT_COMPLETE_KIND_PIXMAP); ++ ++ if (ce->serial != 0xdeadbeef) { ++ int diff = (int64_t)(ce->msc - (target + ce->serial + 60)); ++ if (diff < 0) { ++ if (-diff > earliest) { ++ fprintf(stderr, "\tframe %d displayed early by %d frames\n", ce->serial, -diff); ++ earliest = -diff; ++ } ++ early++; ++ ret++; ++ } else if (diff > 0) { ++ if (diff > latest) { ++ fprintf(stderr, "\tframe %d displayed late by %d frames\n", ce->serial, diff); ++ latest = diff; ++ } ++ late++; ++ ret++; ++ } ++ count++; ++ } else ++ complete = 1; + free(ev); +- } while (msc == 0); ++ } while (!complete); ++ ++ if (early) ++ printf("\t%d frames shown too early (worst %d)!\n", early, earliest); ++ if (late) ++ printf("\t%d frames shown too late (worst %d)!\n", late, latest); ++ ++ if (count != N_VBLANKS+1) { ++ fprintf(stderr, "Sentinel frame received too early! %d frames outstanding\n", N_VBLANKS+1 - count); ++ ret++; ++ do { ++ xcb_present_complete_notify_event_t *ce; ++ xcb_generic_event_t *ev; ++ ++ ev = xcb_wait_for_special_event(c, Q); ++ if (ev == NULL) ++ break; ++ ++ ce = (xcb_present_complete_notify_event_t *)ev; ++ assert(ce->kind == XCB_PRESENT_COMPLETE_KIND_PIXMAP); ++ free(ev); ++ } while (++count != N_VBLANKS+1); ++ } ++ ++ XFreePixmap(dpy, pixmap); ++ ++ XSync(dpy, True); ++ ret += !!_x_error_occurred; ++ ++ return ret; ++#undef N_VBLANKS ++} ++ ++static int test_modulus(Display *dpy, Window win, const char *phase, void *Q) ++{ ++ xcb_connection_t *c = XGetXCBConnection(dpy); ++ Pixmap pixmap; ++ Window root; ++ unsigned int width, height; ++ unsigned border, depth; ++ xcb_xfixes_region_t region; ++ int x, y, ret = 0; ++ uint64_t target; ++ int early = 0, late = 0; ++ int earliest = 0, latest = 0; ++ int complete, expect, count; ++ ++ XGetGeometry(dpy, win, ++ &root, &x, &y, &width, &height, &border, &depth); ++ ++ printf("%s: Testing flip modulus: %dx%d\n", phase, width, height); ++ _x_error_occurred = 0; ++ ++ region = xcb_generate_id(c); ++ xcb_xfixes_create_region(c, region, 0, NULL); ++ ++ pixmap = XCreatePixmap(dpy, win, width, height, depth); ++ target = flush_flips(dpy, win, pixmap, Q, NULL); ++ expect = 0; ++ for (x = 1; x <= 7; x++) { ++ for (y = 0; y < x; y++) { ++ xcb_present_pixmap(c, win, pixmap, ++ y << 16 | x, /* serial */ ++ region, /* valid */ ++ region, /* update */ ++ 0, /* x_off */ ++ 0, /* y_off */ ++ None, ++ None, /* wait fence */ ++ None, ++ XCB_PRESENT_OPTION_NONE, ++ 0, /* target msc */ ++ x, /* divisor */ ++ y, /* remainder */ ++ 0, NULL); ++ expect++; ++ } ++ } ++ xcb_present_pixmap(c, win, pixmap, ++ 0xdeadbeef, /* serial */ ++ 0, /* valid */ ++ 0, /* update */ ++ 0, /* x_off */ ++ 0, /* y_off */ ++ None, ++ None, /* wait fence */ ++ None, ++ XCB_PRESENT_OPTION_NONE, ++ target + 2*x, /* target msc */ ++ 0, /* divisor */ ++ 0, /* remainder */ ++ 0, NULL); ++ xcb_flush(c); ++ ++ complete = 0; ++ count = 0; ++ do { ++ xcb_present_complete_notify_event_t *ce; ++ xcb_generic_event_t *ev; ++ ++ ev = xcb_wait_for_special_event(c, Q); ++ if (ev == NULL) ++ break; ++ ++ ce = (xcb_present_complete_notify_event_t *)ev; ++ if (ce->kind != XCB_PRESENT_COMPLETE_KIND_PIXMAP) ++ break; ++ ++ assert(ce->serial); ++ if (ce->serial != 0xdeadbeef) { ++ uint64_t msc; ++ int diff; ++ ++ x = ce->serial & 0xffff; ++ y = ce->serial >> 16; ++ ++ msc = target; ++ msc -= target % x; ++ msc += y; ++ if (msc <= target) ++ msc += x; ++ ++ diff = (int64_t)(ce->msc - msc); ++ if (diff < 0) { ++ if (-diff > earliest) { ++ fprintf(stderr, "\tframe (%d, %d) displayed early by %d frames\n", y, x, -diff); ++ earliest = -diff; ++ } ++ early++; ++ ret++; ++ } else if (diff > 0) { ++ if (diff > latest) { ++ fprintf(stderr, "\tframe (%d, %d) displayed late by %d frames\n", y, x, diff); ++ latest = diff; ++ } ++ late++; ++ ret++; ++ } ++ count++; ++ } else ++ complete = 1; ++ free(ev); ++ } while (!complete); ++ ++ if (early) ++ printf("\t%d frames shown too early (worst %d)!\n", early, earliest); ++ if (late) ++ printf("\t%d frames shown too late (worst %d)!\n", late, latest); ++ ++ if (count != expect) { ++ fprintf(stderr, "Sentinel frame received too early! %d frames outstanding\n", expect - count); ++ ret++; ++ do { ++ xcb_present_complete_notify_event_t *ce; ++ xcb_generic_event_t *ev; ++ ++ ev = xcb_wait_for_special_event(c, Q); ++ if (ev == NULL) ++ break; ++ ++ ce = (xcb_present_complete_notify_event_t *)ev; ++ assert(ce->kind == XCB_PRESENT_COMPLETE_KIND_NOTIFY_MSC); ++ free(ev); ++ } while (++count != expect); ++ } ++ ++ XFreePixmap(dpy, pixmap); ++ xcb_xfixes_destroy_region(c, region); ++ ++ XSync(dpy, True); ++ ret += !!_x_error_occurred; ++ ++ return ret; ++} ++ ++static int test_future_msc(Display *dpy, void *Q) ++{ ++ xcb_connection_t *c = XGetXCBConnection(dpy); ++ Window root = DefaultRootWindow(dpy); ++ int ret = 0, n; ++ uint64_t msc, ust; ++ int complete, count; ++ int early = 0, late = 0; ++ int earliest = 0, latest = 0; ++ uint64_t interval; ++ ++ printf("Testing notifies into the future\n"); ++ _x_error_occurred = 0; ++ ++ interval = msc_interval(dpy, root, Q); ++ if (interval == 0) { ++ printf("Zero delay between frames\n"); ++ return 1; ++ } ++ msc = check_msc(dpy, root, Q, 0, &ust); ++ printf("Initial msc=%llx, interval between frames %lldus\n", ++ (long long)msc, (long long)interval); ++ ++ for (n = 1; n <= 10; n++) ++ xcb_present_notify_msc(c, root, n, msc + 60 + n*15*60, 0, 0); ++ xcb_present_notify_msc(c, root, 0xdeadbeef, msc + 60 + n*15*60, 0, 0); ++ xcb_flush(c); ++ ++ complete = 0; ++ count = 0; ++ do { ++ xcb_present_complete_notify_event_t *ce; ++ xcb_generic_event_t *ev; ++ ++ ev = xcb_wait_for_special_event(c, Q); ++ if (ev == NULL) ++ break; ++ ++ ce = (xcb_present_complete_notify_event_t *)ev; ++ assert(ce->kind == XCB_PRESENT_COMPLETE_KIND_NOTIFY_MSC); ++ ++ if (ce->serial == 0xdeadbeef) { ++ int64_t time, tolerance; ++ ++ tolerance = 60 + 15*60*n/10; ++ if (tolerance < interval) ++ tolerance = interval; ++ ++ time = ce->ust - (ust + (60 + 15*60*n) * interval); ++ if (time < -(int64_t)tolerance) { ++ fprintf(stderr, ++ "\tnotifies completed too early by %lldms, tolerance %lldus\n", ++ (long long)(-time / 1000), (long long)tolerance); ++ } else if (time > (int64_t)tolerance) { ++ fprintf(stderr, ++ "\tnotifies completed too late by %lldms, tolerance %lldus\n", ++ (long long)(time / 1000), (long long)tolerance); ++ } ++ complete = 1; ++ } else { ++ int diff = (int64_t)(ce->msc - (15*60*ce->serial + msc + 60)); ++ ++ if (ce->serial != count + 1) { ++ fprintf(stderr, "vblank received out of order! expected %d, received %d\n", ++ count + 1, (int)ce->serial); ++ ret++; ++ } ++ count++; ++ ++ if (diff < 0) { ++ if (-diff > earliest) { ++ fprintf(stderr, "\tnotify %d early by %d msc\n", ce->serial, -diff); ++ earliest = -diff; ++ } ++ early++; ++ ret++; ++ } else if (diff > 0) { ++ if (diff > latest) { ++ fprintf(stderr, "\tnotify %d late by %d msc\n", ce->serial, diff); ++ latest = diff; ++ } ++ late++; ++ ret++; ++ } ++ } ++ free(ev); ++ } while (!complete); ++ ++ if (early) ++ printf("\t%d notifies too early (worst %d)!\n", early, earliest); ++ if (late) ++ printf("\t%d notifies too late (worst %d)!\n", late, latest); ++ ++ if (count != 10) { ++ fprintf(stderr, "Sentinel vblank received too early! %d waits outstanding\n", 10 - count); ++ ret++; ++ do { ++ xcb_present_complete_notify_event_t *ce; ++ xcb_generic_event_t *ev; ++ ++ ev = xcb_wait_for_special_event(c, Q); ++ if (ev == NULL) ++ break; ++ ++ ce = (xcb_present_complete_notify_event_t *)ev; ++ assert(ce->kind == XCB_PRESENT_COMPLETE_KIND_NOTIFY_MSC); ++ free(ev); ++ } while (++count != 10); ++ } ++ ++ XSync(dpy, True); ++ ret += !!_x_error_occurred; ++ ++ return ret; ++} ++ ++static int test_wrap_msc(Display *dpy) ++{ ++ xcb_connection_t *c = XGetXCBConnection(dpy); ++ Window root, win; ++ int x, y; ++ unsigned int width, height; ++ unsigned border, depth; ++ XSetWindowAttributes attr; ++ int ret = 0, n; ++ uint64_t msc, ust; ++ int complete; ++ uint64_t interval; ++ void *Q; ++ ++ XGetGeometry(dpy, DefaultRootWindow(dpy), ++ &root, &x, &y, &width, &height, &border, &depth); ++ ++ attr.override_redirect = 1; ++ win = XCreateWindow(dpy, root, ++ 0, 0, width, height, 0, depth, ++ InputOutput, DefaultVisual(dpy, DefaultScreen(dpy)), ++ CWOverrideRedirect, &attr); ++ XMapWindow(dpy, win); ++ XSync(dpy, True); ++ if (_x_error_occurred) ++ return 1; + +- if (msc < last_msc) { +- printf("Invalid MSC: was %llu, now %llu\n", +- (long long)last_msc, (long long)msc); ++ printf("Testing wraparound notifies\n"); ++ _x_error_occurred = 0; ++ ++ Q = setup_msc(dpy, win); ++ interval = msc_interval(dpy, win, Q); ++ if (interval == 0) { ++ printf("Zero delay between frames\n"); ++ return 1; + } ++ msc = check_msc(dpy, win, Q, 0, &ust); ++ printf("Initial msc=%llx, interval between frames %lldus\n", ++ (long long)msc, (long long)interval); ++ ++ for (n = 1; n <= 10; n++) ++ xcb_present_notify_msc(c, win, n, ++ msc + ((long long)n<<32) + n, ++ 0, 0); ++ for (n = 1; n <= 10; n++) ++ xcb_present_notify_msc(c, win, -n, ++ 0, (long long)n << 32, 0); ++ xcb_present_notify_msc(c, win, 0xdeadbeef, msc + 60*10, 0, 0); ++ xcb_flush(c); + +- return msc; ++ complete = 0; ++ do { ++ xcb_present_complete_notify_event_t *ce; ++ xcb_generic_event_t *ev; ++ ++ ev = xcb_wait_for_special_event(c, Q); ++ if (ev == NULL) ++ break; ++ ++ ce = (xcb_present_complete_notify_event_t *)ev; ++ assert(ce->kind == XCB_PRESENT_COMPLETE_KIND_NOTIFY_MSC); ++ ++ if (ce->serial == 0xdeadbeef) { ++ complete = 1; ++ } else { ++ fprintf(stderr, ++ "\tnotify %d recieved at +%llu\n", ++ ce->serial, ce->msc - msc); ++ ret++; ++ } ++ free(ev); ++ } while (!complete); ++ ++ teardown_msc(dpy, Q); ++ XDestroyWindow(dpy, win); ++ XSync(dpy, True); ++ ++ return ret; + } + +-static void teardown_msc(Display *dpy, void *q) ++static int test_exhaustion_msc(Display *dpy, void *Q) + { +- xcb_unregister_for_special_event(XGetXCBConnection(dpy), q); ++#define N_VBLANKS 256 /* kernel event queue length: 128 vblanks */ ++ xcb_connection_t *c = XGetXCBConnection(dpy); ++ Window root = DefaultRootWindow(dpy); ++ int ret = 0, n, complete; ++ int earliest = 0, early = 0; ++ int latest = 0, late = 0; ++ uint64_t msc; ++ ++ printf("Testing notifies with long queues\n"); ++ _x_error_occurred = 0; ++ ++ msc = check_msc(dpy, root, Q, 0, NULL); ++ for (n = N_VBLANKS; n--; ) ++ xcb_present_notify_msc(c, root, N_VBLANKS, msc + N_VBLANKS, 0, 0); ++ for (n = 1; n <= N_VBLANKS ; n++) ++ xcb_present_notify_msc(c, root, n, msc + n, 0, 0); ++ xcb_flush(c); ++ ++ complete = 2*N_VBLANKS; ++ do { ++ xcb_present_complete_notify_event_t *ce; ++ xcb_generic_event_t *ev; ++ int diff; ++ ++ ev = xcb_wait_for_special_event(c, Q); ++ if (ev == NULL) ++ break; ++ ++ ce = (xcb_present_complete_notify_event_t *)ev; ++ assert(ce->kind == XCB_PRESENT_COMPLETE_KIND_NOTIFY_MSC); ++ ++ diff = (int64_t)(ce->msc - msc - ce->serial); ++ if (diff < 0) { ++ if (-diff > earliest) { ++ fprintf(stderr, "\tnotify %d early by %d msc\n",(int)ce->serial, -diff); ++ earliest = -diff; ++ } ++ early++; ++ ret++; ++ } else if (diff > 0) { ++ if (diff > latest) { ++ fprintf(stderr, "\tnotify %d late by %d msc\n", (int)ce->serial, diff); ++ latest = diff; ++ } ++ late++; ++ ret++; ++ } ++ free(ev); ++ } while (--complete); ++ ++ if (early) ++ printf("\t%d notifies too early (worst %d)!\n", early, earliest); ++ if (late) ++ printf("\t%d notifies too late (worst %d)!\n", late, latest); ++ ++ XSync(dpy, True); ++ ret += !!_x_error_occurred; ++ ++ return ret; ++#undef N_VBLANKS + } +-static int test_whole(Display *dpy) ++ ++static int test_accuracy_msc(Display *dpy, void *Q) + { +- Pixmap pixmap; +- struct dri3_fence fence; +- Window root; +- unsigned int width, height; +- unsigned border, depth; +- int x, y, ret = 1; ++#define N_VBLANKS (60 * 120) /* ~2 minutes */ ++ xcb_connection_t *c = XGetXCBConnection(dpy); ++ Window root = DefaultRootWindow(dpy); ++ int ret = 0, n; ++ uint64_t msc; ++ int early = 0, late = 0; ++ int earliest = 0, latest = 0; ++ int complete, count; + +- XGetGeometry(dpy, DefaultRootWindow(dpy), +- &root, &x, &y, &width, &height, &border, &depth); ++ printf("Testing notify accuracy\n"); ++ _x_error_occurred = 0; + +- if (dri3_create_fence(dpy, root, &fence)) +- return 0; ++ msc = check_msc(dpy, root, Q, 0, NULL); ++ for (n = 0; n <= N_VBLANKS; n++) ++ xcb_present_notify_msc(c, root, n, msc + 60 + n, 0, 0); ++ xcb_present_notify_msc(c, root, 0xdeadbeef, msc + 60 + n, 0, 0); ++ xcb_flush(c); ++ ++ complete = 0; ++ count = 0; ++ do { ++ xcb_present_complete_notify_event_t *ce; ++ xcb_generic_event_t *ev; ++ ++ ev = xcb_wait_for_special_event(c, Q); ++ if (ev == NULL) ++ break; ++ ++ ce = (xcb_present_complete_notify_event_t *)ev; ++ assert(ce->kind == XCB_PRESENT_COMPLETE_KIND_NOTIFY_MSC); ++ ++ if (ce->serial != 0xdeadbeef) { ++ int diff = (int64_t)(ce->msc - (msc + ce->serial + 60)); ++ if (diff < 0) { ++ if (-diff > earliest) { ++ fprintf(stderr, "\tnotify %d early by %d msc\n", ce->serial, -diff); ++ earliest = -diff; ++ } ++ early++; ++ ret++; ++ } else if (diff > 0) { ++ if (diff > latest) { ++ fprintf(stderr, "\tnotify %d late by %d msc\n", ce->serial, diff); ++ latest = diff; ++ } ++ late++; ++ ret++; ++ } ++ count++; ++ } else ++ complete = 1; ++ free(ev); ++ } while (!complete); ++ ++ if (early) ++ printf("\t%d notifies too early (worst %d)!\n", early, earliest); ++ if (late) ++ printf("\t%d notifies too late (worst %d)!\n", late, latest); ++ ++ if (count != N_VBLANKS+1) { ++ fprintf(stderr, "Sentinel vblank received too early! %d waits outstanding\n", N_VBLANKS+1 - count); ++ ret++; ++ do { ++ xcb_present_complete_notify_event_t *ce; ++ xcb_generic_event_t *ev; ++ ++ ev = xcb_wait_for_special_event(c, Q); ++ if (ev == NULL) ++ break; ++ ++ ce = (xcb_present_complete_notify_event_t *)ev; ++ assert(ce->kind == XCB_PRESENT_COMPLETE_KIND_NOTIFY_MSC); ++ free(ev); ++ } while (++count != N_VBLANKS+1); ++ } ++ ++ XSync(dpy, True); ++ ret += !!_x_error_occurred; ++ ++ return ret; ++#undef N_VBLANKS ++} + +- printf("Testing whole screen flip: %dx%d\n", width, height); ++static int test_modulus_msc(Display *dpy, void *Q) ++{ ++ xcb_connection_t *c = XGetXCBConnection(dpy); ++ Window root = DefaultRootWindow(dpy); ++ xcb_present_complete_notify_event_t *ce; ++ xcb_generic_event_t *ev; ++ int x, y, ret = 0; ++ uint64_t target; ++ int early = 0, late = 0; ++ int earliest = 0, latest = 0; ++ int complete, count, expect; ++ ++ printf("Testing notify modulus\n"); + _x_error_occurred = 0; + +- xshmfence_reset(fence.addr); ++ target = wait_vblank(dpy, root, Q); + +- pixmap = XCreatePixmap(dpy, root, width, height, depth); +- xcb_present_pixmap(XGetXCBConnection(dpy), +- root, pixmap, +- 0, /* sbc */ +- 0, /* valid */ +- 0, /* update */ +- 0, /* x_off */ +- 0, /* y_off */ +- None, +- None, /* wait fence */ +- fence.xid, +- XCB_PRESENT_OPTION_NONE, +- 0, /* target msc */ +- 0, /* divisor */ +- 0, /* remainder */ +- 0, NULL); +- XFreePixmap(dpy, pixmap); ++ expect = 0; ++ xcb_present_notify_msc(c, root, 0, 0, 0, 0); ++ for (x = 1; x <= 19; x++) { ++ for (y = 0; y < x; y++) { ++ xcb_present_notify_msc(c, root, y << 16 | x, 0, x, y); ++ expect++; ++ } ++ } ++ xcb_present_notify_msc(c, root, 0xdeadbeef, target + 2*x, 0, 0); ++ xcb_flush(c); + +- pixmap = XCreatePixmap(dpy, root, width, height, depth); +- xcb_present_pixmap(XGetXCBConnection(dpy), +- root, pixmap, +- 0, /* sbc */ +- 0, /* valid */ +- 0, /* update */ +- 0, /* x_off */ +- 0, /* y_off */ +- None, +- None, /* wait fence */ +- None, /* sync fence */ +- XCB_PRESENT_OPTION_NONE, +- 0, /* target msc */ +- 0, /* divisor */ +- 0, /* remainder */ +- 0, NULL); +- XFreePixmap(dpy, pixmap); +- XFlush(dpy); ++ ev = xcb_wait_for_special_event(c, Q); ++ if (ev) { ++ ce = (xcb_present_complete_notify_event_t *)ev; ++ assert(ce->kind == XCB_PRESENT_COMPLETE_KIND_NOTIFY_MSC); ++ assert(ce->serial == 0); ++ assert(target == ce->msc); ++ target = ce->msc; ++ } + +- ret = !!xshmfence_await(fence.addr); +- dri3_fence_free(dpy, &fence); ++ complete = 0; ++ count = 0; ++ do { ++ ev = xcb_wait_for_special_event(c, Q); ++ if (ev == NULL) ++ break; ++ ++ ce = (xcb_present_complete_notify_event_t *)ev; ++ assert(ce->kind == XCB_PRESENT_COMPLETE_KIND_NOTIFY_MSC); ++ ++ assert(ce->serial); ++ if (ce->serial != 0xdeadbeef) { ++ uint64_t msc; ++ int diff; ++ ++ x = ce->serial & 0xffff; ++ y = ce->serial >> 16; ++ ++ msc = target; ++ msc -= target % x; ++ msc += y; ++ if (msc <= target) ++ msc += x; ++ ++ diff = (int64_t)(ce->msc - msc); ++ if (diff < 0) { ++ if (-diff > earliest) { ++ fprintf(stderr, "\tnotify (%d, %d) early by %d msc (target %lld, reported %lld)\n", y, x, -diff, (long long)msc, (long long)ce->msc); ++ earliest = -diff; ++ } ++ early++; ++ ret++; ++ } else if (diff > 0) { ++ if (diff > latest) { ++ fprintf(stderr, "\tnotify (%d, %d) late by %d msc (target %lld, reported %lld)\n", y, x, diff, (long long)msc, (long long)ce->msc); ++ latest = diff; ++ } ++ late++; ++ ret++; ++ } ++ count++; ++ } else ++ complete = 1; ++ free(ev); ++ } while (!complete); ++ ++ if (early) ++ printf("\t%d notifies too early (worst %d)!\n", early, earliest); ++ if (late) ++ printf("\t%d notifies too late (worst %d)!\n", late, latest); ++ ++ if (count != expect) { ++ fprintf(stderr, "Sentinel vblank received too early! %d waits outstanding\n", expect - count); ++ ret++; ++ do { ++ ev = xcb_wait_for_special_event(c, Q); ++ if (ev == NULL) ++ break; ++ ++ ce = (xcb_present_complete_notify_event_t *)ev; ++ assert(ce->kind == XCB_PRESENT_COMPLETE_KIND_NOTIFY_MSC); ++ free(ev); ++ } while (++count != expect); ++ } + + XSync(dpy, True); + ret += !!_x_error_occurred; +@@ -279,8 +1471,6 @@ static int for_each_crtc(Display *dpy, + for (i = 0; i < res->ncrtc; i++) + original_crtc[i] = XRRGetCrtcInfo(dpy, res, res->crtcs[i]); + +- printf("noutput=%d, ncrtc=%d\n", res->noutput, res->ncrtc); +- + for (i = 0; i < res->noutput; i++) { + XRROutputInfo *output; + XRRModeInfo *mode; +@@ -322,7 +1512,7 @@ static int for_each_crtc(Display *dpy, + free(original_crtc); + XRRFreeScreenResources(res); + +- return j; ++ return err; + } + + struct test_crtc { +@@ -335,6 +1525,7 @@ struct test_crtc { + uint64_t msc; + }; + #define SYNC 0x1 ++#define FUTURE 0x2 + + static int __test_crtc(Display *dpy, RRCrtc crtc, + int width, int height, +@@ -344,7 +1535,7 @@ static int __test_crtc(Display *dpy, RRCrtc crtc, + Pixmap pixmap; + int err = 0; + +- test->msc = check_msc(dpy, test->win, test->queue, test->msc); ++ test->msc = check_msc(dpy, test->win, test->queue, test->msc, NULL); + + if (test->flags & SYNC) + xshmfence_reset(test->fence.addr); +@@ -361,16 +1552,14 @@ static int __test_crtc(Display *dpy, RRCrtc crtc, + None, /* wait fence */ + test->flags & SYNC ? test->fence.xid : None, + XCB_PRESENT_OPTION_NONE, +- 0, /* target msc */ ++ test->msc, /* target msc */ + 1, /* divisor */ + 0, /* remainder */ + 0, NULL); +- XFreePixmap(dpy, pixmap); +- + if (test->flags & SYNC) { +- pixmap = XCreatePixmap(dpy, test->win, width, height, test->depth); ++ Pixmap tmp = XCreatePixmap(dpy, test->win, width, height, test->depth); + xcb_present_pixmap(XGetXCBConnection(dpy), +- test->win, pixmap, ++ test->win, tmp, + 1, /* sbc */ + 0, /* valid */ + 0, /* update */ +@@ -380,16 +1569,17 @@ static int __test_crtc(Display *dpy, RRCrtc crtc, + None, /* wait fence */ + None, /* sync fence */ + XCB_PRESENT_OPTION_NONE, +- 1, /* target msc */ ++ test->msc + (test->flags & FUTURE ? 5 * 16 : 1), /* target msc */ + 1, /* divisor */ + 0, /* remainder */ + 0, NULL); +- XFreePixmap(dpy, pixmap); ++ XFreePixmap(dpy, tmp); + XFlush(dpy); + err += !!xshmfence_await(test->fence.addr); + } ++ XFreePixmap(dpy, pixmap); + +- test->msc = check_msc(dpy, test->win, test->queue, test->msc); ++ test->msc = check_msc(dpy, test->win, test->queue, test->msc, NULL); + return err; + } + +@@ -410,15 +1600,23 @@ static int test_crtc(Display *dpy, void *queue, uint64_t last_msc) + + printf("Testing each crtc, without waiting for each flip\n"); + test.flags = 0; ++ test.msc = check_msc(dpy, test.win, test.queue, test.msc, NULL); + err += for_each_crtc(dpy, __test_crtc, &test); ++ test.msc = check_msc(dpy, test.win, test.queue, test.msc, NULL); + + printf("Testing each crtc, waiting for flips to complete\n"); + test.flags = SYNC; ++ test.msc = check_msc(dpy, test.win, test.queue, test.msc, NULL); + err += for_each_crtc(dpy, __test_crtc, &test); ++ test.msc = check_msc(dpy, test.win, test.queue, test.msc, NULL); + +- test.msc = check_msc(dpy, test.win, test.queue, test.msc); +- dri3_fence_free(dpy, &test.fence); ++ printf("Testing each crtc, with future flips\n"); ++ test.flags = FUTURE | SYNC; ++ test.msc = check_msc(dpy, test.win, test.queue, test.msc, NULL); ++ err += for_each_crtc(dpy, __test_crtc, &test); ++ test.msc = check_msc(dpy, test.win, test.queue, test.msc, NULL); + ++ dri3_fence_free(dpy, &test.fence); + XSync(dpy, True); + err += !!_x_error_occurred; + +@@ -536,6 +1734,31 @@ static int gem_set_caching(int fd, uint32_t handle, int caching) + return drmIoctl(fd, LOCAL_IOCTL_I915_GEM_SET_CACHING, &arg) == 0; + } + ++static int gem_set_tiling(int fd, uint32_t handle, int tiling, int stride) ++{ ++ struct drm_i915_gem_set_tiling set_tiling; ++ int err; ++ ++restart: ++ set_tiling.handle = handle; ++ set_tiling.tiling_mode = tiling; ++ set_tiling.stride = stride; ++ ++ if (drmIoctl(fd, DRM_IOCTL_I915_GEM_SET_TILING, &set_tiling) == 0) ++ return 1; ++ ++ err = errno; ++ if (err == EINTR) ++ goto restart; ++ ++ if (err == EAGAIN) { ++ sched_yield(); ++ goto restart; ++ } ++ ++ return 0; ++} ++ + static int gem_export(int fd, uint32_t handle) + { + struct drm_prime_handle args; +@@ -557,6 +1780,126 @@ static void gem_close(int fd, uint32_t handle) + (void)drmIoctl(fd, DRM_IOCTL_GEM_CLOSE, &close); + } + ++static int test_dri3_tiling(Display *dpy) ++{ ++ Window win = DefaultRootWindow(dpy); ++ const int tiling[] = { I915_TILING_NONE, I915_TILING_X, I915_TILING_Y }; ++ Window root; ++ unsigned int width, height; ++ unsigned border, depth, bpp; ++ unsigned stride, size; ++ void *Q; ++ int x, y; ++ int device; ++ int line = -1; ++ int t; ++ ++ device = dri3_open(dpy); ++ if (device < 0) ++ return 0; ++ ++ if (!is_intel(device)) ++ return 0; ++ ++ printf("Opened Intel DRI3 device\n"); ++ ++ XGetGeometry(dpy, win, &root, &x, &y, ++ &width, &height, &border, &depth); ++ ++ switch (depth) { ++ case 8: bpp = 8; break; ++ case 15: case 16: bpp = 16; break; ++ case 24: case 32: bpp = 32; break; ++ default: return 0; ++ } ++ ++ stride = ALIGN(width * bpp/8, 512); ++ size = PAGE_ALIGN(stride * ALIGN(height, 32)); ++ printf("Creating DRI3 %dx%d (source stride=%d, size=%d) for GTT\n", ++ width, height, stride, size); ++ ++ _x_error_occurred = 0; ++ Q = setup_msc(dpy, root); ++ ++ for (t = 0; t < sizeof(tiling)/sizeof(tiling[0]); t++) { ++ uint64_t msc; ++ uint32_t src; ++ int src_fd; ++ Pixmap src_pix; ++ ++ src = gem_create(device, size); ++ if (!src) { ++ line = __LINE__; ++ goto fail; ++ } ++ ++ gem_set_tiling(device, src, tiling[t], stride); ++ ++ src_fd = gem_export(device, src); ++ if (src_fd < 0) { ++ line = __LINE__; ++ goto fail; ++ } ++ ++ src_pix = dri3_create_pixmap(dpy, root, ++ width, height, depth, ++ src_fd, bpp, stride, size); ++ ++ msc = wait_vblank(dpy, root, Q); ++ ++ xcb_present_pixmap(XGetXCBConnection(dpy), ++ win, src_pix, ++ 0, /* sbc */ ++ 0, /* valid */ ++ 0, /* update */ ++ 0, /* x_off */ ++ 0, /* y_off */ ++ None, ++ None, /* wait fence */ ++ None, ++ XCB_PRESENT_OPTION_NONE, ++ msc + 2, /* target msc */ ++ 1, /* divisor */ ++ 0, /* remainder */ ++ 0, NULL); ++ ++ xcb_present_pixmap(XGetXCBConnection(dpy), ++ win, src_pix, ++ 0, /* sbc */ ++ 0, /* valid */ ++ 0, /* update */ ++ 0, /* x_off */ ++ 0, /* y_off */ ++ None, ++ None, /* wait fence */ ++ None, ++ XCB_PRESENT_OPTION_NONE, ++ msc + 3, /* target msc */ ++ 1, /* divisor */ ++ 0, /* remainder */ ++ 0, NULL); ++ ++ XSync(dpy, True); ++ if (_x_error_occurred) { ++ line = __LINE__; ++ goto fail; ++ } ++ XFreePixmap(dpy, src_pix); ++ _x_error_occurred = 0; ++ ++ close(src_fd); ++ gem_close(device, src); ++ } ++ ++ teardown_msc(dpy, Q); ++ return 0; ++ ++fail: ++ printf("%s failed with tiling %d, line %d\n", __func__, tiling[t], line); ++ teardown_msc(dpy, Q); ++ return 1; ++} ++ + static int test_dri3(Display *dpy) + { + Window win = DefaultRootWindow(dpy); +@@ -670,8 +2013,32 @@ fail: + static int has_present(Display *dpy) + { + xcb_connection_t *c = XGetXCBConnection(dpy); +- xcb_present_query_version_reply_t *reply; + xcb_generic_error_t *error = NULL; ++ void *reply; ++ ++ reply = xcb_xfixes_query_version_reply(c, ++ xcb_xfixes_query_version(c, ++ XCB_XFIXES_MAJOR_VERSION, ++ XCB_XFIXES_MINOR_VERSION), ++ &error); ++ free(reply); ++ free(error); ++ if (reply == NULL) { ++ fprintf(stderr, "XFixes not supported on %s\n", DisplayString(dpy)); ++ return 0; ++ } ++ ++ reply = xcb_dri3_query_version_reply(c, ++ xcb_dri3_query_version(c, ++ XCB_DRI3_MAJOR_VERSION, ++ XCB_DRI3_MINOR_VERSION), ++ &error); ++ free(reply); ++ free(error); ++ if (reply == NULL) { ++ fprintf(stderr, "DRI3 not supported on %s\n", DisplayString(dpy)); ++ return 0; ++ } + + reply = xcb_present_query_version_reply(c, + xcb_present_query_version(c, +@@ -681,14 +2048,32 @@ static int has_present(Display *dpy) + + free(reply); + free(error); ++ if (reply == NULL) { ++ fprintf(stderr, "Present not supported on %s\n", DisplayString(dpy)); ++ return 0; ++ } ++ ++ return 1; ++} ++ ++static int has_composite(Display *dpy) ++{ ++ int event, error; ++ int major, minor; ++ ++ if (!XCompositeQueryExtension(dpy, &event, &error)) ++ return 0; ++ ++ XCompositeQueryVersion(dpy, &major, &minor); + +- return reply != NULL; ++ return major > 0 || minor >= 4; + } + + int main(void) + { + Display *dpy; + Window root; ++ int dummy; + int error = 0; + uint64_t last_msc; + void *queue; +@@ -700,27 +2085,135 @@ int main(void) + if (!has_present(dpy)) + return 77; + ++ if (DPMSQueryExtension(dpy, &dummy, &dummy)) ++ DPMSDisable(dpy); ++ + root = DefaultRootWindow(dpy); + + signal(SIGALRM, SIG_IGN); + XSetErrorHandler(_check_error_handler); + + queue = setup_msc(dpy, root); +- last_msc = check_msc(dpy, root, queue, 0); ++ last_msc = check_msc(dpy, root, queue, 0, NULL); ++ ++ error += test_future_msc(dpy, queue); ++ last_msc = check_msc(dpy, root, queue, last_msc, NULL); ++ ++ error += test_wrap_msc(dpy); ++ last_msc = check_msc(dpy, root, queue, last_msc, NULL); ++ ++ error += test_accuracy_msc(dpy, queue); ++ last_msc = check_msc(dpy, root, queue, last_msc, NULL); ++ ++ error += test_modulus_msc(dpy, queue); ++ last_msc = check_msc(dpy, root, queue, last_msc, NULL); ++ ++ error += test_exhaustion_msc(dpy, queue); ++ last_msc = check_msc(dpy, root, queue, last_msc, NULL); ++ ++ for (dummy = 0; dummy <= 3; dummy++) { ++ Window win; ++ uint64_t msc = 0; ++ XSetWindowAttributes attr; ++ Visual *visual = DefaultVisual(dpy, DefaultScreen(dpy)); ++ unsigned int width, height; ++ unsigned border, depth; ++ const char *phase; ++ int x, y; ++ void *Q; ++ ++ attr.override_redirect = 1; ++ ++ XGetGeometry(dpy, root, &win, &x, &y, ++ &width, &height, &border, &depth); ++ ++ _x_error_occurred = 0; ++ switch (dummy) { ++ case 0: ++ win = root; ++ phase = "root"; ++ break; ++ case 1: ++ win = XCreateWindow(dpy, root, ++ 0, 0, width, height, 0, depth, ++ InputOutput, visual, ++ CWOverrideRedirect, &attr); ++ phase = "fullscreen"; ++ break; ++ case 2: ++ win = XCreateWindow(dpy, root, ++ 0, 0, width/2, height/2, 0, depth, ++ InputOutput, visual, ++ CWOverrideRedirect, &attr); ++ phase = "window"; ++ break; ++ case 3: ++ if (!has_composite(dpy)) ++ continue; ++ ++ win = XCreateWindow(dpy, root, ++ 0, 0, width, height, 0, ++ DefaultDepth(dpy, DefaultScreen(dpy)), ++ InputOutput, ++ DefaultVisual(dpy, DefaultScreen(dpy)), ++ CWOverrideRedirect, &attr); ++ XCompositeRedirectWindow(dpy, win, CompositeRedirectManual); ++ phase = "composite"; ++ break; ++ ++ default: ++ phase = "broken"; ++ win = root; ++ abort(); ++ break; ++ } ++ ++ XMapWindow(dpy, win); ++ XSync(dpy, True); ++ if (_x_error_occurred) ++ continue; ++ ++ Q = setup_msc(dpy, win); ++ msc = check_msc(dpy, win, Q, msc, NULL); + +- error += test_whole(dpy); +- last_msc = check_msc(dpy, root, queue, last_msc); ++ error += test_whole(dpy, win, phase); ++ msc = check_msc(dpy, win, Q, msc, NULL); ++ ++ error += test_double(dpy, win, phase, Q); ++ msc = check_msc(dpy, win, Q, msc, NULL); ++ ++ error += test_future(dpy, win, phase, Q); ++ msc = check_msc(dpy, win, Q, msc, NULL); ++ ++ error += test_accuracy(dpy, win, phase, Q); ++ msc = check_msc(dpy, win, Q, msc, NULL); ++ ++ error += test_modulus(dpy, win, phase, Q); ++ msc = check_msc(dpy, win, Q, msc, NULL); ++ ++ error += test_exhaustion(dpy, win, phase, Q); ++ msc = check_msc(dpy, win, Q, msc, NULL); ++ ++ teardown_msc(dpy, Q); ++ if (win != root) ++ XDestroyWindow(dpy, win); ++ } + + error += test_crtc(dpy, queue, last_msc); +- last_msc = check_msc(dpy, root, queue, last_msc); ++ last_msc = check_msc(dpy, root, queue, last_msc, NULL); + + error += test_shm(dpy); +- last_msc = check_msc(dpy, root, queue, last_msc); ++ last_msc = check_msc(dpy, root, queue, last_msc, NULL); + + error += test_dri3(dpy); +- last_msc = check_msc(dpy, root, queue, last_msc); ++ last_msc = check_msc(dpy, root, queue, last_msc, NULL); ++ ++ error += test_dri3_tiling(dpy); ++ last_msc = check_msc(dpy, root, queue, last_msc, NULL); + + teardown_msc(dpy, queue); + ++ if (DPMSQueryExtension(dpy, &dummy, &dummy)) ++ DPMSEnable(dpy); + return !!error; + } +diff --git a/test/render-glyphs.c b/test/render-glyphs.c +new file mode 100644 +index 00000000..8822e36a +--- /dev/null ++++ b/test/render-glyphs.c +@@ -0,0 +1,441 @@ ++#include <stdint.h> ++#include <stdio.h> ++#include <stdlib.h> ++#include <stdbool.h> ++#include <stdarg.h> ++#include <string.h> ++ ++#include <X11/Xutil.h> /* for XDestroyImage */ ++#include <pixman.h> /* for pixman blt functions */ ++ ++#include "test.h" ++ ++static const XRenderColor colors[] = { ++ /* red, green, blue, alpha */ ++ { 0 }, ++ { 0, 0, 0, 0xffff }, ++ { 0xffff, 0, 0, 0xffff }, ++ { 0, 0xffff, 0, 0xffff }, ++ { 0, 0, 0xffff, 0xffff }, ++ { 0xffff, 0xffff, 0xffff, 0xffff }, ++}; ++ ++static struct clip { ++ void *func; ++} clips[] = { ++ { NULL }, ++}; ++ ++static int _x_error_occurred; ++ ++static int ++_check_error_handler(Display *display, ++ XErrorEvent *event) ++{ ++ _x_error_occurred = 1; ++ return False; /* ignored */ ++} ++ ++static void clear(struct test_display *dpy, ++ struct test_target *tt, ++ const XRenderColor *c) ++{ ++ XRenderFillRectangle(dpy->dpy, PictOpClear, tt->picture, c, ++ 0, 0, tt->width, tt->height); ++} ++ ++static bool check_op(struct test_display *dpy, int op, struct test_target *tt) ++{ ++ XRenderColor render_color = {0}; ++ ++ XSync(dpy->dpy, True); ++ _x_error_occurred = 0; ++ ++ XRenderFillRectangle(dpy->dpy, op, ++ tt->picture, &render_color, ++ 0, 0, 0, 0); ++ ++ XSync(dpy->dpy, True); ++ return _x_error_occurred == 0; ++} ++ ++struct glyph_iter { ++ enum { ++ GLYPHS, OP, DST, SRC, MASK, CLIP, ++ } stage; ++ ++ int glyph_format; ++ int op; ++ int dst_color; ++ int src_color; ++ int mask_format; ++ int clip; ++ ++ struct { ++ struct test_display *dpy; ++ struct test_target tt; ++ GlyphSet glyphset; ++ Picture src; ++ XRenderPictFormat *mask_format; ++ } ref, out; ++}; ++ ++static void glyph_iter_init(struct glyph_iter *gi, ++ struct test *t, enum target target) ++{ ++ memset(gi, 0, sizeof(*gi)); ++ ++ gi->out.dpy = &t->out; ++ test_target_create_render(&t->out, target, &gi->out.tt); ++ ++ gi->ref.dpy = &t->ref; ++ test_target_create_render(&t->ref, target, &gi->ref.tt); ++ ++ gi->stage = GLYPHS; ++ gi->glyph_format = -1; ++ gi->op = -1; ++ gi->dst_color = -1; ++ gi->src_color = -1; ++ gi->mask_format = -1; ++ gi->clip = -1; ++} ++ ++static void render_clear(char *image, int image_size, int bpp) ++{ ++ memset(image, 0, image_size); ++} ++ ++static void render_black(char *image, int image_size, int bpp) ++{ ++ if (bpp == 4) { ++ uint32_t *p = (uint32_t *)image; ++ image_size /= 4; ++ while (image_size--) ++ *p++ = 0x000000ff; ++ } else ++ memset(image, 0x55, image_size); ++} ++ ++static void render_green(char *image, int image_size, int bpp) ++{ ++ if (bpp == 4) { ++ uint32_t *p = (uint32_t *)image; ++ image_size /= 4; ++ while (image_size--) ++ *p++ = 0xffff0000; ++ } else ++ memset(image, 0xaa, image_size); ++} ++ ++static void render_white(char *image, int image_size, int bpp) ++{ ++ memset(image, 0xff, image_size); ++} ++ ++static GlyphSet create_glyphs(Display *dpy, int format_id) ++{ ++#define N_GLYPHS 4 ++ XRenderPictFormat *format; ++ XGlyphInfo glyph = { 8, 8, 0, 0, 8, 0 }; ++ char image[4*8*8]; ++ GlyphSet glyphset; ++ Glyph gid; ++ int image_size; ++ int bpp; ++ int n; ++ ++ format = XRenderFindStandardFormat(dpy, format_id); ++ if (format == NULL) ++ return 0; ++ ++ switch (format_id) { ++ case PictStandardARGB32: ++ case PictStandardRGB24: ++ image_size = 4 * 8 * 8; ++ bpp = 4; ++ break; ++ case PictStandardA8: ++ case PictStandardA4: ++ image_size = 8 * 8; ++ bpp = 1; ++ break; ++ case PictStandardA1: ++ image_size = 8; ++ bpp = 0; ++ break; ++ default: ++ return 0; ++ } ++ ++ glyphset = XRenderCreateGlyphSet(dpy, format); ++ for (n = 0; n < N_GLYPHS; n++) { ++ gid = n; ++ ++ switch (n) { ++ case 0: render_clear(image, image_size, bpp); break; ++ case 1: render_black(image, image_size, bpp); break; ++ case 2: render_green(image, image_size, bpp); break; ++ case 3: render_white(image, image_size, bpp); break; ++ } ++ ++ XRenderAddGlyphs(dpy, glyphset, ++ &gid, &glyph, 1, image, image_size); ++ } ++ ++ return glyphset; ++} ++ ++static const char *glyph_name(int n) ++{ ++ switch (n) { ++ case 0: return "clear"; ++ case 1: return "black"; ++ case 2: return "green"; ++ case 3: return "white"; ++ default: return "unknown"; ++ } ++} ++ ++static bool glyph_iter_next(struct glyph_iter *gi) ++{ ++restart: ++ if (gi->stage == GLYPHS) { ++ if (++gi->glyph_format == PictStandardNUM) ++ return false; ++ ++ if (gi->out.glyphset) ++ XRenderFreeGlyphSet(gi->out.dpy->dpy, ++ gi->out.glyphset); ++ gi->out.glyphset = create_glyphs(gi->out.dpy->dpy, ++ gi->glyph_format); ++ ++ if (gi->ref.glyphset) ++ XRenderFreeGlyphSet(gi->ref.dpy->dpy, ++ gi->ref.glyphset); ++ gi->ref.glyphset = create_glyphs(gi->ref.dpy->dpy, ++ gi->glyph_format); ++ ++ gi->stage++; ++ } ++ ++ if (gi->stage == OP) { ++ do { ++ if (++gi->op == 255) ++ goto reset_op; ++ } while (!check_op(gi->out.dpy, gi->op, &gi->out.tt) || ++ !check_op(gi->ref.dpy, gi->op, &gi->ref.tt)); ++ ++ gi->stage++; ++ } ++ ++ if (gi->stage == DST) { ++ if (++gi->dst_color == ARRAY_SIZE(colors)) ++ goto reset_dst; ++ ++ gi->stage++; ++ } ++ ++ if (gi->stage == SRC) { ++ if (++gi->src_color == ARRAY_SIZE(colors)) ++ goto reset_src; ++ ++ if (gi->ref.src) ++ XRenderFreePicture(gi->ref.dpy->dpy, gi->ref.src); ++ gi->ref.src = XRenderCreateSolidFill(gi->ref.dpy->dpy, ++ &colors[gi->src_color]); ++ ++ if (gi->out.src) ++ XRenderFreePicture(gi->out.dpy->dpy, gi->out.src); ++ gi->out.src = XRenderCreateSolidFill(gi->out.dpy->dpy, ++ &colors[gi->src_color]); ++ ++ gi->stage++; ++ } ++ ++ if (gi->stage == MASK) { ++ if (++gi->mask_format > PictStandardNUM) ++ goto reset_mask; ++ ++ if (gi->mask_format == PictStandardRGB24) ++ gi->mask_format++; ++ ++ if (gi->mask_format < PictStandardNUM) { ++ gi->out.mask_format = XRenderFindStandardFormat(gi->out.dpy->dpy, ++ gi->mask_format); ++ gi->ref.mask_format = XRenderFindStandardFormat(gi->ref.dpy->dpy, ++ gi->mask_format); ++ } else { ++ gi->out.mask_format = NULL; ++ gi->ref.mask_format = NULL; ++ } ++ ++ gi->stage++; ++ } ++ ++ if (gi->stage == CLIP) { ++ if (++gi->clip == ARRAY_SIZE(clips)) ++ goto reset_clip; ++ ++ gi->stage++; ++ } ++ ++ gi->stage--; ++ return true; ++ ++reset_op: ++ gi->op = -1; ++reset_dst: ++ gi->dst_color = -1; ++reset_src: ++ gi->src_color = -1; ++reset_mask: ++ gi->mask_format = -1; ++reset_clip: ++ gi->clip = -1; ++ gi->stage--; ++ goto restart; ++} ++ ++static void glyph_iter_fini(struct glyph_iter *gi) ++{ ++ if (gi->out.glyphset) ++ XRenderFreeGlyphSet (gi->out.dpy->dpy, gi->out.glyphset); ++ if (gi->ref.glyphset) ++ XRenderFreeGlyphSet (gi->ref.dpy->dpy, gi->ref.glyphset); ++ ++ test_target_destroy_render(gi->out.dpy, &gi->out.tt); ++ test_target_destroy_render(gi->ref.dpy, &gi->ref.tt); ++} ++ ++static const char *stdformat_to_str(int id) ++{ ++ switch (id) { ++ case PictStandardARGB32: return "ARGB32"; ++ case PictStandardRGB24: return "RGB24"; ++ case PictStandardA8: return "A8"; ++ case PictStandardA4: return "A4"; ++ case PictStandardA1: return "A1"; ++ default: return "none"; ++ } ++} ++ ++static char *glyph_iter_to_string(struct glyph_iter *gi, ++ const char *format, ++ ...) ++{ ++ static char buf[100]; ++ va_list ap; ++ int len; ++ ++ len = sprintf(buf, "glyphs=%s, op=%d, dst=%08x, src=%08x, mask=%s", ++ stdformat_to_str(gi->glyph_format), gi->op, ++ xrender_color(&colors[gi->dst_color]), ++ xrender_color(&colors[gi->src_color]), ++ stdformat_to_str(gi->mask_format)); ++ ++ if (format) { ++ buf[len++] = ' '; ++ va_start(ap, format); ++ vsprintf(buf+len, format, ap); ++ va_end(ap); ++ } ++ ++ return buf; ++} ++ ++static void single(struct test *t, enum target target) ++{ ++ struct glyph_iter gi; ++ int n; ++ ++ printf("Testing single glyph (%s): ", test_target_name(target)); ++ fflush(stdout); ++ ++ glyph_iter_init(&gi, t, target); ++ while (glyph_iter_next(&gi)) { ++ XGlyphElt8 elt; ++ char id[N_GLYPHS]; ++ ++ for (n = 0; n < N_GLYPHS; n++) { ++ id[n] = n; ++ ++ elt.chars = &id[n]; ++ elt.nchars = 1; ++ elt.xOff = 0; ++ elt.yOff = 0; ++ ++ clear(gi.out.dpy, &gi.out.tt, &colors[gi.dst_color]); ++ elt.glyphset = gi.out.glyphset; ++ XRenderCompositeText8 (gi.out.dpy->dpy, gi.op, ++ gi.out.src, ++ gi.out.tt.picture, ++ gi.out.mask_format, ++ 0, 0, ++ 0, 8, ++ &elt, 1); ++ ++ clear(gi.ref.dpy, &gi.ref.tt, &colors[gi.dst_color]); ++ elt.glyphset = gi.ref.glyphset; ++ XRenderCompositeText8 (gi.ref.dpy->dpy, gi.op, ++ gi.ref.src, ++ gi.ref.tt.picture, ++ gi.ref.mask_format, ++ 0, 0, ++ 0, 8, ++ &elt, 1); ++ test_compare(t, ++ gi.out.tt.draw, gi.out.tt.format, ++ gi.ref.tt.draw, gi.ref.tt.format, ++ 0, 0, gi.out.tt.width, gi.out.tt.height, ++ glyph_iter_to_string(&gi, ++ "glyph=%s", ++ glyph_name(n))); ++ } ++ ++ elt.chars = &id[0]; ++ elt.nchars = n; ++ clear(gi.out.dpy, &gi.out.tt, &colors[gi.dst_color]); ++ elt.glyphset = gi.out.glyphset; ++ XRenderCompositeText8 (gi.out.dpy->dpy, gi.op, ++ gi.out.src, ++ gi.out.tt.picture, ++ gi.out.mask_format, ++ 0, 0, ++ 0, 8, ++ &elt, 1); ++ ++ clear(gi.ref.dpy, &gi.ref.tt, &colors[gi.dst_color]); ++ elt.glyphset = gi.ref.glyphset; ++ XRenderCompositeText8 (gi.ref.dpy->dpy, gi.op, ++ gi.ref.src, ++ gi.ref.tt.picture, ++ gi.ref.mask_format, ++ 0, 0, ++ 0, 8, ++ &elt, 1); ++ test_compare(t, ++ gi.out.tt.draw, gi.out.tt.format, ++ gi.ref.tt.draw, gi.ref.tt.format, ++ 0, 0, gi.out.tt.width, gi.out.tt.height, ++ glyph_iter_to_string(&gi, "all")); ++ } ++ glyph_iter_fini(&gi); ++} ++ ++int main(int argc, char **argv) ++{ ++ struct test test; ++ int t; ++ ++ test_init(&test, argc, argv); ++ XSetErrorHandler(_check_error_handler); ++ ++ for (t = TARGET_FIRST; t <= TARGET_LAST; t++) { ++ single(&test, t); ++ //overlapping(&test, t); ++ //gap(&test, t); ++ //mixed(&test, t); ++ } ++ ++ return 0; ++} +diff --git a/test/render-trapezoid.c b/test/render-trapezoid.c +index cd990143..f15a78e3 100644 +--- a/test/render-trapezoid.c ++++ b/test/render-trapezoid.c +@@ -403,16 +403,141 @@ static void trap_tests(struct test *t, + free(traps); + } + ++enum edge { ++ EDGE_SHARP = PolyEdgeSharp, ++ EDGE_SMOOTH, ++}; ++ ++static const char *edge_name(enum edge edge) ++{ ++ switch (edge) { ++ default: ++ case EDGE_SHARP: return "sharp"; ++ case EDGE_SMOOTH: return "smooth"; ++ } ++} ++ ++static void set_edge(Display *dpy, Picture p, enum edge edge) ++{ ++ XRenderPictureAttributes a; ++ ++ a.poly_edge = edge; ++ XRenderChangePicture(dpy, p, CPPolyEdge, &a); ++} ++ ++static void edge_test(struct test *t, ++ enum mask mask, ++ enum edge edge, ++ enum target target) ++{ ++ struct test_target out, ref; ++ XRenderColor white = { 0xffff, 0xffff, 0xffff, 0xffff }; ++ Picture src_ref, src_out; ++ XTrapezoid trap; ++ int left_or_right, p; ++ ++ test_target_create_render(&t->out, target, &out); ++ set_edge(t->out.dpy, out.picture, edge); ++ src_out = XRenderCreateSolidFill(t->out.dpy, &white); ++ ++ test_target_create_render(&t->ref, target, &ref); ++ set_edge(t->ref.dpy, ref.picture, edge); ++ src_ref = XRenderCreateSolidFill(t->ref.dpy, &white); ++ ++ printf("Testing edges (with mask %s and %s edges) (%s): ", ++ mask_name(mask), ++ edge_name(edge), ++ test_target_name(target)); ++ fflush(stdout); ++ ++ for (left_or_right = 0; left_or_right <= 1; left_or_right++) { ++ for (p = -64; p <= out.width + 64; p++) { ++ char buf[80]; ++ ++ if (left_or_right) { ++ trap.left.p1.x = 0; ++ trap.left.p1.y = 0; ++ trap.left.p2.x = 0; ++ trap.left.p2.y = out.height << 16; ++ ++ trap.right.p1.x = p << 16; ++ trap.right.p1.y = 0; ++ trap.right.p2.x = out.width << 16; ++ trap.right.p2.y = out.height << 16; ++ } else { ++ trap.right.p1.x = out.width << 16; ++ trap.right.p1.y = 0; ++ trap.right.p2.x = out.width << 16; ++ trap.right.p2.y = out.height << 16; ++ ++ trap.left.p1.x = 0; ++ trap.left.p1.y = 0; ++ trap.left.p2.x = p << 16; ++ trap.left.p2.y = out.height << 16; ++ } ++ ++ trap.top = 0; ++ trap.bottom = out.height << 16; ++ ++ sprintf(buf, ++ "trap=((%d, %d), (%d, %d)), ((%d, %d), (%d, %d))\n", ++ trap.left.p1.x >> 16, trap.left.p1.y >> 16, ++ trap.left.p2.x >> 16, trap.left.p2.y >> 16, ++ trap.right.p1.x >> 16, trap.right.p1.y >> 16, ++ trap.right.p2.x >> 16, trap.right.p2.y >> 16); ++ ++ clear(&t->out, &out); ++ XRenderCompositeTrapezoids(t->out.dpy, ++ PictOpSrc, ++ src_out, ++ out.picture, ++ mask_format(t->out.dpy, mask), ++ 0, 0, ++ &trap, 1); ++ ++ clear(&t->ref, &ref); ++ XRenderCompositeTrapezoids(t->ref.dpy, ++ PictOpSrc, ++ src_ref, ++ ref.picture, ++ mask_format(t->ref.dpy, mask), ++ 0, 0, ++ &trap, 1); ++ ++ test_compare(t, ++ out.draw, out.format, ++ ref.draw, ref.format, ++ 0, 0, out.width, out.height, ++ buf); ++ } ++ } ++ ++ XRenderFreePicture(t->out.dpy, src_out); ++ test_target_destroy_render(&t->out, &out); ++ ++ XRenderFreePicture(t->ref.dpy, src_ref); ++ test_target_destroy_render(&t->ref, &ref); ++ ++ printf("pass\n"); ++} ++ + int main(int argc, char **argv) + { + struct test test; + int i, dx, dy; + enum target target; + enum mask mask; ++ enum edge edge; + enum trapezoid trapezoid; + + test_init(&test, argc, argv); + ++ for (target = TARGET_FIRST; target <= TARGET_LAST; target++) { ++ for (mask = MASK_NONE; mask <= MASK_A8; mask++) ++ for (edge = EDGE_SHARP; edge <= EDGE_SMOOTH; edge++) ++ edge_test(&test, mask, edge, target); ++ } ++ + for (i = 0; i <= DEFAULT_ITERATIONS; i++) { + int reps = REPS(i), sets = SETS(i); + +diff --git a/test/render-triangle.c b/test/render-triangle.c +new file mode 100644 +index 00000000..165834ce +--- /dev/null ++++ b/test/render-triangle.c +@@ -0,0 +1,180 @@ ++#include <stdint.h> ++#include <stdio.h> ++#include <stdlib.h> ++ ++#include "test.h" ++ ++enum edge { ++ EDGE_SHARP = PolyEdgeSharp, ++ EDGE_SMOOTH, ++}; ++ ++static void set_edge(Display *dpy, Picture p, enum edge edge) ++{ ++ XRenderPictureAttributes a; ++ ++ a.poly_edge = edge; ++ XRenderChangePicture(dpy, p, CPPolyEdge, &a); ++} ++ ++static XRenderPictFormat *mask_format(Display *dpy, enum mask mask) ++{ ++ switch (mask) { ++ default: ++ case MASK_NONE: return NULL; ++ case MASK_A1: return XRenderFindStandardFormat(dpy, PictStandardA1); ++ case MASK_A8: return XRenderFindStandardFormat(dpy, PictStandardA8); ++ } ++} ++ ++static const char *mask_name(enum mask mask) ++{ ++ switch (mask) { ++ default: ++ case MASK_NONE: return "none"; ++ case MASK_A1: return "a1"; ++ case MASK_A8: return "a8"; ++ } ++} ++ ++static const char *edge_name(enum edge edge) ++{ ++ switch (edge) { ++ default: ++ case EDGE_SHARP: return "sharp"; ++ case EDGE_SMOOTH: return "smooth"; ++ } ++} ++ ++static void clear(struct test_display *dpy, struct test_target *tt) ++{ ++ XRenderColor render_color = {0}; ++ XRenderFillRectangle(dpy->dpy, PictOpClear, tt->picture, &render_color, ++ 0, 0, tt->width, tt->height); ++} ++ ++static void step_to_point(int step, int width, int height, XPointFixed *p) ++{ ++ do { ++ p->x = (step - 64) << 16; ++ p->y = -64 << 16; ++ ++ step -= width - 128; ++ if (step <= 0) ++ return; ++ ++ p->x = (width + 64) << 16; ++ p->y = (step - 64) << 16; ++ step -= height - 128; ++ ++ if (step <= 0) ++ return; ++ ++ p->x = (width + 64 - step) << 16; ++ p->y = (height + 64) << 16; ++ step -= width - 128; ++ ++ if (step <= 0) ++ return; ++ ++ p->x = -64 << 16; ++ p->y = (height + 64 - step) << 16; ++ step -= height - 128; ++ } while (step > 0); ++} ++ ++static void edge_test(struct test *t, ++ enum mask mask, ++ enum edge edge, ++ enum target target) ++{ ++ struct test_target out, ref; ++ XRenderColor white = { 0xffff, 0xffff, 0xffff, 0xffff }; ++ Picture src_ref, src_out; ++ XTriangle tri; ++ unsigned step, max; ++ ++ test_target_create_render(&t->out, target, &out); ++ set_edge(t->out.dpy, out.picture, edge); ++ src_out = XRenderCreateSolidFill(t->out.dpy, &white); ++ ++ test_target_create_render(&t->ref, target, &ref); ++ set_edge(t->ref.dpy, ref.picture, edge); ++ src_ref = XRenderCreateSolidFill(t->ref.dpy, &white); ++ ++ printf("Testing edges (with mask %s and %s edges) (%s): ", ++ mask_name(mask), ++ edge_name(edge), ++ test_target_name(target)); ++ fflush(stdout); ++ ++ max = 2*(out.width + 128 + out.height+128); ++ step = 0; ++ for (step = 0; step <= max; step++) { ++ char buf[80]; ++ ++ step_to_point(step, out.width, out.height, &tri.p1); ++ step_to_point(step + out.width + 128, ++ out.width, out.height, ++ &tri.p2); ++ step_to_point(step + out.height + 128 + 2*(out.width + 128), ++ out.width, out.height, ++ &tri.p3); ++ ++ sprintf(buf, ++ "tri=((%d, %d), (%d, %d), (%d, %d))\n", ++ tri.p1.x >> 16, tri.p1.y >> 16, ++ tri.p2.x >> 16, tri.p2.y >> 16, ++ tri.p3.x >> 16, tri.p3.y >> 16); ++ ++ clear(&t->out, &out); ++ XRenderCompositeTriangles(t->out.dpy, ++ PictOpSrc, ++ src_out, ++ out.picture, ++ mask_format(t->out.dpy, mask), ++ 0, 0, ++ &tri, 1); ++ ++ clear(&t->ref, &ref); ++ XRenderCompositeTriangles(t->ref.dpy, ++ PictOpSrc, ++ src_ref, ++ ref.picture, ++ mask_format(t->ref.dpy, mask), ++ 0, 0, ++ &tri, 1); ++ ++ test_compare(t, ++ out.draw, out.format, ++ ref.draw, ref.format, ++ 0, 0, out.width, out.height, ++ buf); ++ } ++ ++ XRenderFreePicture(t->out.dpy, src_out); ++ test_target_destroy_render(&t->out, &out); ++ ++ XRenderFreePicture(t->ref.dpy, src_ref); ++ test_target_destroy_render(&t->ref, &ref); ++ ++ printf("pass\n"); ++} ++ ++int main(int argc, char **argv) ++{ ++ struct test test; ++ enum target target; ++ enum mask mask; ++ enum edge edge; ++ ++ test_init(&test, argc, argv); ++ ++ for (target = TARGET_FIRST; target <= TARGET_LAST; target++) { ++ for (mask = MASK_NONE; mask <= MASK_A8; mask++) ++ for (edge = EDGE_SHARP; edge <= EDGE_SMOOTH; edge++) ++ edge_test(&test, mask, edge, target); ++ } ++ ++ return 0; ++} +diff --git a/test/test.h b/test/test.h +index a3ef979d..9eec1cf9 100644 +--- a/test/test.h ++++ b/test/test.h +@@ -107,6 +107,15 @@ static inline uint32_t color(uint8_t red, uint8_t green, uint8_t blue, uint8_t a + return alpha << 24 | ra >> 8 << 16 | ga >> 8 << 8 | ba >> 8; + } + ++static inline uint32_t xrender_color(const XRenderColor *c) ++{ ++ uint32_t ra = c->red * c->alpha; ++ uint32_t ga = c->green * c->alpha; ++ uint32_t ba = c->blue * c->alpha; ++ ++ return c->alpha >> 8 << 24 | ra >> 24 << 16 | ga >> 24 << 8 | ba >> 24; ++} ++ + void test_timer_start(struct test_display *t, struct timespec *tv); + double test_timer_stop(struct test_display *t, struct timespec *tv); + +diff --git a/test/test_image.c b/test/test_image.c +index d15a8af8..1c076990 100644 +--- a/test/test_image.c ++++ b/test/test_image.c +@@ -197,13 +197,10 @@ void test_compare(struct test *t, + const char *info) + { + XImage out_image, ref_image; +- Pixmap tmp; +- char *out, *ref; ++ uint32_t *out, *ref; + char buf[600]; + uint32_t mask; + int i, j; +- XGCValues gcv; +- GC gc; + + if (w * h * 4 > t->out.max_shm_size) + return test_compare_fallback(t, +@@ -214,37 +211,24 @@ void test_compare(struct test *t, + test_init_image(&out_image, &t->out.shm, out_format, w, h); + test_init_image(&ref_image, &t->ref.shm, ref_format, w, h); + +- gcv.graphics_exposures = 0; +- + die_unless(out_image.depth == ref_image.depth); + die_unless(out_image.bits_per_pixel == ref_image.bits_per_pixel); + die_unless(out_image.bits_per_pixel == 32); + +- mask = depth_mask(out_image.depth); ++ XShmGetImage(t->out.dpy, out_draw, &out_image, x, y, AllPlanes); ++ out = (uint32_t *)out_image.data; + +- tmp = XCreatePixmap(t->out.dpy, out_draw, w, h, out_image.depth); +- gc = XCreateGC(t->out.dpy, tmp, GCGraphicsExposures, &gcv); +- XCopyArea(t->out.dpy, out_draw, tmp, gc, x, y, w, h, 0, 0); +- XShmGetImage(t->out.dpy, tmp, &out_image, 0, 0, AllPlanes); +- XFreeGC(t->out.dpy, gc); +- XFreePixmap(t->out.dpy, tmp); +- out = out_image.data; +- +- tmp = XCreatePixmap(t->ref.dpy, ref_draw, w, h, ref_image.depth); +- gc = XCreateGC(t->ref.dpy, tmp, GCGraphicsExposures, &gcv); +- XCopyArea(t->ref.dpy, ref_draw, tmp, gc, x, y, w, h, 0, 0); +- XShmGetImage(t->ref.dpy, tmp, &ref_image, 0, 0, AllPlanes); +- XFreeGC(t->ref.dpy, gc); +- XFreePixmap(t->ref.dpy, tmp); +- ref = ref_image.data; ++ XShmGetImage(t->ref.dpy, ref_draw, &ref_image, x, y, AllPlanes); ++ ref = (uint32_t *)ref_image.data; + + /* Start with an exact comparison. However, one quicky desires + * a fuzzy comparator to hide hardware inaccuracies... + */ ++ mask = depth_mask(out_image.depth); + for (j = 0; j < h; j++) { + for (i = 0; i < w; i++) { +- uint32_t a = ((uint32_t *)out)[i] & mask; +- uint32_t b = ((uint32_t *)ref)[i] & mask; ++ uint32_t a = out[i] & mask; ++ uint32_t b = ref[i] & mask; + if (a != b && pixel_difference(a, b) > MAX_DELTA) { + show_pixels(buf, + &out_image, &ref_image, +@@ -255,8 +239,8 @@ void test_compare(struct test *t, + x,i, y,j, a, b, pixel_difference(a, b), buf, info); + } + } +- out += out_image.bytes_per_line; +- ref += ref_image.bytes_per_line; ++ out = (uint32_t *)((char *)out + out_image.bytes_per_line); ++ ref = (uint32_t *)((char *)ref + ref_image.bytes_per_line); + } + } + +diff --git a/test/xvidmode.c b/test/xvidmode.c +new file mode 100644 +index 00000000..5cde8286 +--- /dev/null ++++ b/test/xvidmode.c +@@ -0,0 +1,54 @@ ++#include <stdlib.h> ++#include <stdio.h> ++#include <string.h> ++#include <X11/Xlib.h> ++#include <X11/extensions/xf86vmode.h> ++ ++int main(void) ++{ ++ Display *dpy; ++ XF86VidModeModeLine current; ++ XF86VidModeModeInfo **modes; ++ int num_modes, i; ++ int saved_mode = -1; ++ int dotclock; ++ ++ dpy = XOpenDisplay(NULL); ++ if (dpy == NULL) ++ dpy = XOpenDisplay(":0"); ++ ++ XF86VidModeGetModeLine(dpy, DefaultScreen(dpy), &dotclock, ¤t); ++ XF86VidModeGetAllModeLines(dpy, XDefaultScreen(dpy), ++ &num_modes, &modes); ++ for (i = 0; i < num_modes; i++) { ++ int this; ++ ++ this = (current.hdisplay == modes[i]->hdisplay && ++ current.vdisplay == modes[i]->vdisplay && ++ dotclock == modes[i]->dotclock); ++ if (this && saved_mode == -1) ++ saved_mode = i; ++ ++ printf("[%d] %dx%d%s\n", ++ i, ++ modes[i]->hdisplay, ++ modes[i]->vdisplay, ++ this ? "*" : ""); ++ } ++ ++ for (i = 0; i < num_modes; i++) { ++ printf("Switching to mode %dx%d\n", ++ modes[i]->hdisplay, ++ modes[i]->vdisplay); ++ XF86VidModeSwitchToMode(dpy, XDefaultScreen(dpy), modes[i]); ++ XSync(dpy, True); ++ } ++ ++ if (saved_mode != -1) { ++ XF86VidModeSwitchToMode(dpy, XDefaultScreen(dpy), ++ modes[saved_mode]); ++ XFlush(dpy); ++ } ++ ++ return 0; ++} +diff --git a/tools/Makefile.am b/tools/Makefile.am +index b5de2c96..92df266b 100644 +--- a/tools/Makefile.am ++++ b/tools/Makefile.am +@@ -26,13 +26,30 @@ AM_CFLAGS = \ + drivermandir = $(DRIVER_MAN_DIR) + policydir = $(datarootdir)/polkit-1/actions + ++bin_PROGRAMS = ++noinst_PROGRAMS = ++libexec_PROGRAMS = ++ + if BUILD_TOOLS +-bin_PROGRAMS = intel-virtual-output ++bin_PROGRAMS += intel-virtual-output + driverman_DATA = intel-virtual-output.$(DRIVER_MAN_SUFFIX) + endif + ++if BUILD_TOOL_CURSOR ++noinst_PROGRAMS += cursor ++cursor_CFLAGS = $(TOOL_CURSOR_CFLAGS) ++cursor_LDADD = $(TOOL_CURSOR_LIBS) ++endif ++ ++if X11_DRI3 ++noinst_PROGRAMS += dri3info ++dri3info_SOURCES = dri3info.c ++dri3info_CFLAGS = $(X11_DRI3_CFLAGS) $(DRI_CFLAGS) ++dri3info_LDADD = $(X11_DRI3_LIBS) $(DRI_LIBS) ++endif ++ + if BUILD_BACKLIGHT_HELPER +-libexec_PROGRAMS = xf86-video-intel-backlight-helper ++libexec_PROGRAMS += xf86-video-intel-backlight-helper + nodist_policy_DATA = org.x.xf86-video-intel.backlight-helper.policy + + backlight_helper = $(libexecdir)/xf86-video-intel-backlight-helper +diff --git a/tools/backlight_helper.c b/tools/backlight_helper.c +index 8b2667dc..aadb8fac 100644 +--- a/tools/backlight_helper.c ++++ b/tools/backlight_helper.c +@@ -1,3 +1,7 @@ ++#ifdef HAVE_CONFIG_H ++#include "config.h" ++#endif ++ + #include <stdio.h> + #include <string.h> + #include <stdarg.h> +@@ -9,6 +13,12 @@ + #include <sys/types.h> + #include <sys/stat.h> + ++#if MAJOR_IN_MKDEV ++#include <sys/mkdev.h> ++#elif MAJOR_IN_SYSMACROS ++#include <sys/sysmacros.h> ++#endif ++ + #define DBG 0 + + #if defined(__GNUC__) && (__GNUC__ > 3) +diff --git a/tools/cursor.c b/tools/cursor.c +new file mode 100644 +index 00000000..6a2438ad +--- /dev/null ++++ b/tools/cursor.c +@@ -0,0 +1,127 @@ ++/* ++ * Copyright © 2015 Intel Corporation ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining a ++ * copy of this software and associated documentation files (the "Software"), ++ * to deal in the Software without restriction, including without limitation ++ * the rights to use, copy, modify, merge, publish, distribute, sublicense, ++ * and/or sell copies of the Software, and to permit persons to whom the ++ * Software is furnished to do so, subject to the following conditions: ++ * ++ * The above copyright notice and this permission notice (including the next ++ * paragraph) shall be included in all copies or substantial portions of the ++ * Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ++ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ++ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS ++ * IN THE SOFTWARE. ++ * ++ */ ++ ++#ifdef HAVE_CONFIG_H ++#include "config.h" ++#endif ++ ++#include <X11/Xlib.h> ++#include <X11/extensions/Xfixes.h> ++ ++#include <stdint.h> ++#include <stdio.h> ++#include <stdlib.h> ++#include <png.h> ++ ++int main(int argc, char **argv) ++{ ++ Display *dpy; ++ XFixesCursorImage *cur; ++ unsigned long *src; /* XXX deep sigh */ ++ unsigned x, y; ++ png_struct *png; ++ png_info *info; ++ png_byte **rows; ++ FILE *file; ++ ++ dpy = XOpenDisplay(NULL); ++ if (dpy == NULL) ++ return 1; ++ ++ if (!XFixesQueryExtension(dpy, (int *)&x, (int *)&y)) ++ return 1; ++ ++ cur = XFixesGetCursorImage(dpy); ++ if (cur == NULL) ++ return 1; ++ ++ printf("Cursor on display '%s': %dx%d, (hotspot %dx%d)\n", ++ DisplayString(dpy), ++ cur->width, cur->height, ++ cur->xhot, cur->yhot); ++ ++ if (1) { ++ int x, y; ++ ++ src = cur->pixels; ++ for (y = 0; y < cur->height; y++) { ++ for (x = 0; x < cur->width; x++) { ++ if (x == cur->xhot && y == cur->yhot) ++ printf("+"); ++ else ++ printf("%c", *src ? *src >> 24 >= 127 ? 'x' : '.' : ' '); ++ src++; ++ } ++ printf("\n"); ++ } ++ } ++ ++ file = fopen("cursor.png", "wb"); ++ if (file == NULL) ++ return 2; ++ ++ png = png_create_write_struct(PNG_LIBPNG_VER_STRING, NULL, NULL, NULL); ++ info = png_create_info_struct(png); ++ png_init_io(png, file); ++ png_set_IHDR(png, info, ++ cur->width, cur->height, 8, ++ PNG_COLOR_TYPE_RGB_ALPHA, ++ PNG_INTERLACE_NONE, ++ PNG_COMPRESSION_TYPE_DEFAULT, ++ PNG_FILTER_TYPE_DEFAULT); ++ png_write_info(png, info); ++ ++ src = cur->pixels; ++ rows = malloc(cur->height*sizeof(png_byte*)); ++ if (rows == NULL) ++ return 3; ++ ++ for (y = 0; y < cur->height; y++) { ++ rows[y] = malloc(cur->width * 4); ++ for (x = 0; x < cur->width; x++) { ++ uint32_t p = *src++; ++ uint8_t r = p >> 0; ++ uint8_t g = p >> 8; ++ uint8_t b = p >> 16; ++ uint8_t a = p >> 24; ++ ++ if (a > 0x00 && a < 0xff) { ++ r = (r * 0xff + a /2) / a; ++ g = (g * 0xff + a /2) / a; ++ b = (b * 0xff + a /2) / a; ++ } ++ ++ rows[y][4*x + 0] = b; ++ rows[y][4*x + 1] = g; ++ rows[y][4*x + 2] = r; ++ rows[y][4*x + 3] = a; ++ } ++ } ++ ++ png_write_image(png, rows); ++ png_write_end(png, NULL); ++ fclose(file); ++ ++ return 0; ++} +diff --git a/tools/dri3info.c b/tools/dri3info.c +new file mode 100644 +index 00000000..0c33fc5a +--- /dev/null ++++ b/tools/dri3info.c +@@ -0,0 +1,329 @@ ++/* ++ * Copyright (c) 2015 Intel Corporation ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining a ++ * copy of this software and associated documentation files (the "Software"), ++ * to deal in the Software without restriction, including without limitation ++ * the rights to use, copy, modify, merge, publish, distribute, sublicense, ++ * and/or sell copies of the Software, and to permit persons to whom the ++ * Software is furnished to do so, subject to the following conditions: ++ * ++ * The above copyright notice and this permission notice (including the next ++ * paragraph) shall be included in all copies or substantial portions of the ++ * Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ++ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, ++ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE ++ * SOFTWARE. ++ * ++ * To compile standalone: gcc -o dri3info dri3info.c `pkg-config --cflags --libs xcb-dri3 x11-xcb xrandr xxf86vm libdrm` ++ */ ++ ++#include <X11/Xlib.h> ++#include <X11/Xlib-xcb.h> ++#include <xcb/xcb.h> ++#include <xcb/dri3.h> ++#include <unistd.h> ++#include <stdio.h> ++#include <stdlib.h> ++#include <stdint.h> ++#include <string.h> ++#include <sys/stat.h> ++#include <drm.h> ++#include <xf86drm.h> ++ ++#include <X11/extensions/Xrandr.h> ++#include <X11/extensions/xf86vmode.h> ++ ++static int dri3_query_version(Display *dpy, int *major, int *minor) ++{ ++ xcb_connection_t *c = XGetXCBConnection(dpy); ++ xcb_dri3_query_version_reply_t *reply; ++ xcb_generic_error_t *error; ++ ++ *major = *minor = -1; ++ ++ reply = xcb_dri3_query_version_reply(c, ++ xcb_dri3_query_version(c, ++ XCB_DRI3_MAJOR_VERSION, ++ XCB_DRI3_MINOR_VERSION), ++ &error); ++ free(error); ++ if (reply == NULL) ++ return -1; ++ ++ *major = reply->major_version; ++ *minor = reply->minor_version; ++ free(reply); ++ ++ return 0; ++} ++ ++static int dri3_exists(Display *dpy) ++{ ++ const xcb_query_extension_reply_t *ext; ++ int major, minor; ++ ++ ext = xcb_get_extension_data(XGetXCBConnection(dpy), &xcb_dri3_id); ++ if (ext == NULL || !ext->present) ++ return 0; ++ ++ if (dri3_query_version(dpy, &major, &minor) < 0) ++ return 0; ++ ++ return major >= 0; ++} ++ ++static int dri3_open(Display *dpy) ++{ ++ xcb_connection_t *c = XGetXCBConnection(dpy); ++ xcb_dri3_open_cookie_t cookie; ++ xcb_dri3_open_reply_t *reply; ++ ++ if (!dri3_exists(dpy)) ++ return -1; ++ ++ cookie = xcb_dri3_open(c, RootWindow(dpy, DefaultScreen(dpy)), None); ++ reply = xcb_dri3_open_reply(c, cookie, NULL); ++ ++ if (!reply) ++ return -1; ++ ++ if (reply->nfd != 1) ++ return -1; ++ ++ return xcb_dri3_open_reply_fds(c, reply)[0]; ++} ++ ++static void get_device_path(int fd, char *buf, int len) ++{ ++ struct stat remote, local; ++ int i; ++ ++ if (fstat(fd, &remote)) ++ goto out; ++ ++ for (i = 0; i < 16; i++) { ++ snprintf(buf, len, "/dev/dri/card%d", i); ++ if (stat(buf, &local)) ++ continue; ++ ++ if (local.st_mode == remote.st_mode && ++ local.st_rdev == remote.st_rdev) ++ return; ++ ++ snprintf(buf, len, "/dev/dri/renderD%d", i + 128); ++ if (stat(buf, &local)) ++ continue; ++ ++ if (local.st_mode == remote.st_mode && ++ local.st_rdev == remote.st_rdev) ++ return; ++ } ++ ++out: ++ strncpy(buf, "unknown path", len); ++} ++ ++static void get_driver_name(int fd, char *name, int len) ++{ ++ drm_version_t version; ++ ++ memset(name, 0, len); ++ memset(&version, 0, sizeof(version)); ++ version.name_len = len; ++ version.name = name; ++ ++ (void)drmIoctl(fd, DRM_IOCTL_VERSION, &version); ++} ++ ++static int compute_refresh_rate_from_mode(long n, long d, unsigned flags, ++ int32_t *numerator, ++ int32_t *denominator) ++{ ++ int i; ++ ++ /* The mode flags are only defined privately to the Xserver (in xf86str.h) ++ * but they at least bit compatible between VidMode, RandR and DRM. ++ */ ++# define V_INTERLACE 0x010 ++# define V_DBLSCAN 0x020 ++ ++ if (flags & V_INTERLACE) ++ n *= 2; ++ else if (flags & V_DBLSCAN) ++ d *= 2; ++ ++ /* The OML_sync_control spec requires that if the refresh rate is a ++ * whole number, that the returned numerator be equal to the refresh ++ * rate and the denominator be 1. ++ */ ++ ++ if (n % d == 0) { ++ n /= d; ++ d = 1; ++ } ++ else { ++ static const unsigned f[] = { 13, 11, 7, 5, 3, 2, 0 }; ++ ++ /* This is a poor man's way to reduce a fraction. It's far from ++ * perfect, but it will work well enough for this situation. ++ */ ++ ++ for (i = 0; f[i] != 0; i++) { ++ while (n % f[i] == 0 && d % f[i] == 0) { ++ d /= f[i]; ++ n /= f[i]; ++ } ++ } ++ } ++ ++ *numerator = n; ++ *denominator = d; ++ return 1; ++} ++ ++static int RRGetMscRate(Display *dpy, int32_t *numerator, int32_t *denominator) ++{ ++ int ret = 0; ++ Window root = RootWindow(dpy, DefaultScreen(dpy)); ++ XRRScreenResources *res; ++ int rr_event, rr_error; ++ RROutput primary; ++ RRMode mode = 0; ++ int n; ++ ++ if (!XRRQueryExtension(dpy, &rr_event, &rr_error)) ++ return ret; ++ ++ res = XRRGetScreenResourcesCurrent(dpy, root); ++ if (res == NULL) ++ return ret; ++ ++ /* Use the primary output if specified, otherwise ++ * use the mode on the first enabled crtc. ++ */ ++ primary = XRRGetOutputPrimary(dpy, root); ++ if (primary) { ++ XRROutputInfo *output; ++ ++ output = XRRGetOutputInfo(dpy, res, primary); ++ if (output != NULL) { ++ if (output->crtc) { ++ XRRCrtcInfo *crtc; ++ ++ crtc = XRRGetCrtcInfo(dpy, res, output->crtc); ++ if (crtc) { ++ mode = crtc->mode; ++ XRRFreeCrtcInfo(crtc); ++ } ++ } ++ XRRFreeOutputInfo(output); ++ } ++ } ++ ++ for (n = 0; mode == 0 && n < res->ncrtc; n++) { ++ XRRCrtcInfo *crtc; ++ ++ crtc = XRRGetCrtcInfo(dpy, res, res->crtcs[n]); ++ if (crtc) { ++ mode = crtc->mode; ++ XRRFreeCrtcInfo(crtc); ++ } ++ } ++ ++ for (n = 0; n < res->nmode; n++) { ++ if (res->modes[n].id == mode) { ++ ret = compute_refresh_rate_from_mode(res->modes[n].dotClock, ++ res->modes[n].hTotal*res->modes[n].vTotal, ++ res->modes[n].modeFlags, ++ numerator, denominator); ++ break; ++ } ++ } ++ ++ XRRFreeScreenResources(res); ++ return ret; ++} ++ ++static int VMGetMscRate(Display *dpy, int32_t *numerator, int32_t *denominator) ++{ ++ XF86VidModeModeLine mode_line; ++ int dot_clock; ++ int i; ++ ++ if (XF86VidModeQueryVersion(dpy, &i, &i) && ++ XF86VidModeGetModeLine(dpy, DefaultScreen(dpy), &dot_clock, &mode_line)) ++ return compute_refresh_rate_from_mode(dot_clock * 1000, ++ mode_line.vtotal * mode_line.htotal, ++ mode_line.flags, ++ numerator, denominator); ++ ++ return 0; ++} ++ ++static int get_refresh_rate(Display *dpy, ++ int32_t *numerator, ++ int32_t *denominator) ++{ ++ if (RRGetMscRate(dpy, numerator, denominator)) ++ return 1; ++ ++ if (VMGetMscRate(dpy, numerator, denominator)) ++ return 1; ++ ++ return 0; ++} ++ ++static void info(const char *dpyname) ++{ ++ Display *dpy; ++ int device; ++ int32_t numerator, denominator; ++ ++ dpy = XOpenDisplay(dpyname); ++ if (dpy == NULL) { ++ printf("Unable to connect to display '%s'\n", ++ dpyname ?: getenv("DISPLAY") ?: "unset"); ++ return; ++ } ++ ++ printf("Display '%s'\n", DisplayString(dpy)); ++ device = dri3_open(dpy); ++ if (device < 0) { ++ printf("\tUnable to connect to DRI3\n"); ++ } else { ++ char device_path[1024]; ++ char driver_name[1024]; ++ ++ get_device_path(device, device_path, sizeof(device_path)); ++ get_driver_name(device, driver_name, sizeof(driver_name)); ++ ++ printf("Connected to DRI3, using fd %d which matches %s, driver %s\n", ++ device, device_path, driver_name); ++ close(device); ++ } ++ ++ if (get_refresh_rate(dpy, &numerator, &denominator)) ++ printf("\tPrimary refresh rate: %d/%d (%.1fHz)\n", ++ numerator, denominator, numerator/(float)denominator); ++ ++ XCloseDisplay(dpy); ++} ++ ++int main(int argc, char **argv) ++{ ++ int i; ++ ++ if (argc > 1) { ++ for (i = 1; i < argc; i++) ++ info(argv[i]); ++ } else ++ info(NULL); ++ ++ return 0; ++} +diff --git a/tools/virtual.c b/tools/virtual.c +index 8e2b4a22..fc8db2b9 100644 +--- a/tools/virtual.c ++++ b/tools/virtual.c +@@ -31,6 +31,7 @@ + + #include <X11/Xlibint.h> + #include <X11/extensions/record.h> ++#include <X11/extensions/scrnsaver.h> + #include <X11/extensions/XShm.h> + #if HAVE_X11_EXTENSIONS_SHMPROTO_H + #include <X11/extensions/shmproto.h> +@@ -79,13 +80,15 @@ static int verbose; + #define DRAW 0x8 + #define DAMAGE 0x10 + #define CURSOR 0x20 +-#define POLL 0x40 ++#define SCREEN 0x40 ++#define POLL 0x80 + + struct display { + Display *dpy; + struct clone *clone; + struct context *ctx; + ++ int saver_event, saver_error, saver_active; + int damage_event, damage_error; + int xfixes_event, xfixes_error; + int rr_event, rr_error, rr_active; +@@ -98,6 +101,7 @@ struct display { + int width; + int height; + int depth; ++ int active; + + XRenderPictFormat *root_format; + XRenderPictFormat *rgb16_format; +@@ -111,7 +115,7 @@ struct display { + Cursor invisible_cursor; + Cursor visible_cursor; + +- XcursorImage cursor_image; ++ XcursorImage cursor_image; /* first only */ + int cursor_serial; + int cursor_x; + int cursor_y; +@@ -123,6 +127,13 @@ struct display { + int send; + int skip_clone; + int skip_frame; ++ ++ struct { ++ int timeout; ++ int interval; ++ int prefer_blank; ++ int allow_exp; ++ } saver; + }; + + struct output { +@@ -145,6 +156,7 @@ struct output { + XRenderPictFormat *use_render; + + int x, y; ++ int width, height; + XRRModeInfo mode; + Rotation rotation; + }; +@@ -218,6 +230,13 @@ static inline XRRScreenResources *_XRRGetScreenResourcesCurrent(Display *dpy, Wi + static int _x_error_occurred; + + static int ++_io_error_handler(Display *display) ++{ ++ fprintf(stderr, "XIO error on display %s\n", DisplayString(display)); ++ abort(); ++} ++ ++static int + _check_error_handler(Display *display, + XErrorEvent *event) + { +@@ -243,6 +262,10 @@ can_use_shm(Display *dpy, + XExtCodes *codes; + int major, minor, has_shm, has_pixmap; + ++ *shm_event = 0; ++ *shm_opcode = 0; ++ *shm_pixmap = 0; ++ + if (!XShmQueryExtension(dpy)) + return 0; + +@@ -320,6 +343,7 @@ can_use_shm(Display *dpy, + #include <X11/Xlib-xcb.h> + #include <X11/xshmfence.h> + #include <xcb/xcb.h> ++#include <xcb/xcbext.h> + #include <xcb/dri3.h> + #include <xcb/sync.h> + static Pixmap dri3_create_pixmap(Display *dpy, +@@ -357,6 +381,7 @@ static int dri3_query_version(Display *dpy, int *major, int *minor) + { + xcb_connection_t *c = XGetXCBConnection(dpy); + xcb_dri3_query_version_reply_t *reply; ++ xcb_generic_error_t *error; + + *major = *minor = -1; + +@@ -364,7 +389,8 @@ static int dri3_query_version(Display *dpy, int *major, int *minor) + xcb_dri3_query_version(c, + XCB_DRI3_MAJOR_VERSION, + XCB_DRI3_MINOR_VERSION), +- NULL); ++ &error); ++ free(error); + if (reply == NULL) + return -1; + +@@ -377,8 +403,13 @@ static int dri3_query_version(Display *dpy, int *major, int *minor) + + static int dri3_exists(Display *dpy) + { ++ const xcb_query_extension_reply_t *ext; + int major, minor; + ++ ext = xcb_get_extension_data(XGetXCBConnection(dpy), &xcb_dri3_id); ++ if (ext == NULL || !ext->present) ++ return 0; ++ + if (dri3_query_version(dpy, &major, &minor) < 0) + return 0; + +@@ -809,6 +840,10 @@ static int clone_update_modes__fixed(struct clone *clone) + RRMode id; + int i, j, ret = ENOENT; + ++ DBG(X11, ("%s-%s cloning modes fixed %dx%d\n", ++ DisplayString(clone->dst.dpy), clone->dst.name, ++ clone->dst.width, clone->dst.height)); ++ + assert(clone->src.rr_output); + + res = _XRRGetScreenResourcesCurrent(clone->src.dpy, clone->src.window); +@@ -837,8 +872,8 @@ static int clone_update_modes__fixed(struct clone *clone) + + /* Create matching mode for the real output on the virtual */ + memset(&mode, 0, sizeof(mode)); +- mode.width = clone->width; +- mode.height = clone->height; ++ mode.width = clone->dst.width; ++ mode.height = clone->dst.height; + mode.nameLength = sprintf(mode_name, "FAKE-%dx%d", mode.width, mode.height); + mode.name = mode_name; + +@@ -942,6 +977,35 @@ out: + return rr_output; + } + ++static int check_virtual(struct display *display) ++{ ++ XRRScreenResources *res; ++ int found = -ENOENT; ++ int i; ++ ++ res = _XRRGetScreenResourcesCurrent(display->dpy, display->root); ++ if (res == NULL) ++ return -ENOMEM; ++ ++ for (i = 0; found == -ENOENT && i < res->noutput; i++) { ++ XRROutputInfo *output; ++ ++ output = XRRGetOutputInfo(display->dpy, res, res->outputs[i]); ++ if (output == NULL) ++ continue; ++ ++ if (strcmp(output->name, "VIRTUAL1") == 0) ++ found = 0; ++ ++ XRRFreeOutputInfo(output); ++ } ++ XRRFreeScreenResources(res); ++ ++ DBG(XRR, ("%s(%s): has VIRTUAL1? %d\n", ++ __func__, DisplayString(display->dpy), found)); ++ return found; ++} ++ + static int stride_for_depth(int width, int depth) + { + if (depth == 24) +@@ -1082,20 +1146,20 @@ static int clone_init_xfer(struct clone *clone) + width = 0; + height = 0; + } else if (clone->dri3.xid) { +- width = clone->dst.display->width; +- height = clone->dst.display->height; ++ width = clone->dst.width; ++ height = clone->dst.height; + } else { + width = mode_width(&clone->src.mode, clone->src.rotation); + height = mode_height(&clone->src.mode, clone->src.rotation); + } + ++ DBG(DRAW, ("%s-%s create xfer, %dx%d (currently %dx%d)\n", ++ DisplayString(clone->dst.dpy), clone->dst.name, ++ width, height, clone->width, clone->height)); ++ + if (width == clone->width && height == clone->height) + return 0; + +- DBG(DRAW, ("%s-%s create xfer, %dx%d\n", +- DisplayString(clone->dst.dpy), clone->dst.name, +- width, height)); +- + if (clone->shm.shmaddr) { + if (clone->src.use_shm) + XShmDetach(clone->src.dpy, &clone->src.shm); +@@ -1225,6 +1289,56 @@ static void clone_update(struct clone *clone) + clone->rr_update = 0; + } + ++static void screensaver_save(struct display *display) ++{ ++ display->saver_active = ++ XScreenSaverQueryExtension(display->dpy, ++ &display->saver_event, ++ &display->saver_error); ++ DBG(SCREEN, ++ ("%s screen saver active? %d [event=%d, error=%d]\n", ++ DisplayString(display->dpy), ++ display->saver_active, ++ display->saver_event, ++ display->saver_error)); ++ ++ XGetScreenSaver(display->dpy, ++ &display->saver.timeout, ++ &display->saver.interval, ++ &display->saver.prefer_blank, ++ &display->saver.allow_exp); ++ ++ DBG(SCREEN, ++ ("%s saving screen saver defaults: timeout=%d interval=%d prefer_blank=%d allow_exp=%d\n", ++ DisplayString(display->dpy), ++ display->saver.timeout, ++ display->saver.interval, ++ display->saver.prefer_blank, ++ display->saver.allow_exp)); ++} ++ ++static void screensaver_disable(struct display *display) ++{ ++ DBG(SCREEN, ++ ("%s disabling screen saver\n", DisplayString(display->dpy))); ++ ++ XSetScreenSaver(display->dpy, 0, 0, DefaultBlanking, DefaultExposures); ++ display_mark_flush(display); ++} ++ ++static void screensaver_restore(struct display *display) ++{ ++ DBG(SCREEN, ++ ("%s restoring screen saver\n", DisplayString(display->dpy))); ++ ++ XSetScreenSaver(display->dpy, ++ display->saver.timeout, ++ display->saver.interval, ++ display->saver.prefer_blank, ++ display->saver.allow_exp); ++ display_mark_flush(display); ++} ++ + static int context_update(struct context *ctx) + { + Display *dpy = ctx->display->dpy; +@@ -1325,8 +1439,19 @@ static int context_update(struct context *ctx) + struct clone *clone; + int x1, x2, y1, y2; + +- if (display->rr_active == 0) ++ if (display->rr_active == 0) { ++ for (clone = display->clone; clone; clone = clone->next) { ++ struct output *output = &clone->src; ++ if (output->mode.id) { ++ clone->dst.mode.id = -1; ++ clone->dst.rr_crtc = -1; ++ } else { ++ clone->dst.mode.id = 0; ++ clone->dst.rr_crtc = 0; ++ } ++ } + continue; ++ } + + x1 = y1 = INT_MAX; + x2 = y2 = INT_MIN; +@@ -1570,6 +1695,13 @@ ungrab: + XUngrabServer(display->dpy); + } + ++ for (n = 1; n < ctx->ndisplay; n++) { ++ struct display *display = &ctx->display[n]; ++ ++ display->active = 0; ++ screensaver_restore(display); ++ } ++ + ctx->active = NULL; + for (n = 0; n < ctx->nclone; n++) { + struct clone *clone = &ctx->clones[n]; +@@ -1580,7 +1712,10 @@ ungrab: + continue; + + DBG(XRR, ("%s-%s: added to active list\n", +- DisplayString(clone->dst.display->dpy), clone->dst.name)); ++ DisplayString(clone->dst.display->dpy), clone->dst.name)); ++ ++ if (clone->dst.display->active++ == 0) ++ screensaver_disable(clone->dst.display); + + clone->active = ctx->active; + ctx->active = clone; +@@ -1599,14 +1734,17 @@ static Cursor display_load_invisible_cursor(struct display *display) + + static Cursor display_get_visible_cursor(struct display *display) + { +- if (display->cursor_serial != display->cursor_image.size) { +- DBG(CURSOR, ("%s updating cursor\n", DisplayString(display->dpy))); ++ struct display *first = display->ctx->display; ++ ++ if (display->cursor_serial != first->cursor_serial) { ++ DBG(CURSOR, ("%s updating cursor %dx%d, serial %d\n", ++ DisplayString(display->dpy), first->cursor_image.width, first->cursor_image.height, first->cursor_serial)); + + if (display->visible_cursor) + XFreeCursor(display->dpy, display->visible_cursor); + +- display->visible_cursor = XcursorImageLoadCursor(display->dpy, &display->cursor_image); +- display->cursor_serial = display->cursor_image.size; ++ display->visible_cursor = XcursorImageLoadCursor(display->dpy, &first->cursor_image); ++ display->cursor_serial = first->cursor_serial; + } + + return display->visible_cursor; +@@ -1629,7 +1767,7 @@ static void display_load_visible_cursor(struct display *display, XFixesCursorIma + display->cursor_image.height = cur->height; + display->cursor_image.xhot = cur->xhot; + display->cursor_image.yhot = cur->yhot; +- display->cursor_image.size++; ++ display->cursor_serial++; + + n = cur->width*cur->height; + src = cur->pixels; +@@ -1637,11 +1775,24 @@ static void display_load_visible_cursor(struct display *display, XFixesCursorIma + while (n--) + *dst++ = *src++; + +- DBG(CURSOR, ("%s marking cursor changed\n", DisplayString(display->dpy))); +- display->cursor_moved++; +- if (display->cursor != display->invisible_cursor) { +- display->cursor_visible++; +- context_enable_timer(display->ctx); ++ if (verbose & CURSOR) { ++ int x, y; ++ ++ printf("%s cursor image %dx%d, serial %d:\n", ++ DisplayString(display->dpy), ++ cur->width, cur->height, ++ display->cursor_serial); ++ dst = display->cursor_image.pixels; ++ for (y = 0; y < cur->height; y++) { ++ for (x = 0; x < cur->width; x++) { ++ if (x == cur->xhot && y == cur->yhot) ++ printf("+"); ++ else ++ printf("%c", *dst ? *dst >> 24 >= 127 ? 'x' : '.' : ' '); ++ dst++; ++ } ++ printf("\n"); ++ } + } + } + +@@ -1685,6 +1836,8 @@ static void display_flush_cursor(struct display *display) + if (cursor == None) + cursor = display->invisible_cursor; + if (cursor != display->cursor) { ++ DBG(CURSOR, ("%s setting cursor shape %lx\n", ++ DisplayString(display->dpy), (long)cursor)); + XDefineCursor(display->dpy, display->root, cursor); + display->cursor = cursor; + } +@@ -1762,6 +1915,8 @@ static void get_src(struct clone *c, const XRectangle *clip) + c->image.obdata = (char *)&c->src.shm; + + if (c->src.use_render) { ++ DBG(DRAW, ("%s-%s get_src via XRender\n", ++ DisplayString(c->dst.dpy), c->dst.name)); + XRenderComposite(c->src.dpy, PictOpSrc, + c->src.win_picture, 0, c->src.pix_picture, + clip->x, clip->y, +@@ -1782,16 +1937,22 @@ static void get_src(struct clone *c, const XRectangle *clip) + &c->image, 0, 0); + } + } else if (c->src.pixmap) { ++ DBG(DRAW, ("%s-%s get_src XCopyArea (SHM/DRI3)\n", ++ DisplayString(c->dst.dpy), c->dst.name)); + XCopyArea(c->src.dpy, c->src.window, c->src.pixmap, c->src.gc, + clip->x, clip->y, + clip->width, clip->height, + 0, 0); + XSync(c->src.dpy, False); + } else if (c->src.use_shm) { ++ DBG(DRAW, ("%s-%s get_src XShmGetImage\n", ++ DisplayString(c->dst.dpy), c->dst.name)); + ximage_prepare(&c->image, clip->width, clip->height); + XShmGetImage(c->src.dpy, c->src.window, &c->image, + clip->x, clip->y, AllPlanes); + } else { ++ DBG(DRAW, ("%s-%s get_src XGetSubImage (slow)\n", ++ DisplayString(c->dst.dpy), c->dst.name)); + ximage_prepare(&c->image, c->width, c->height); + XGetSubImage(c->src.dpy, c->src.window, + clip->x, clip->y, clip->width, clip->height, +@@ -1838,7 +1999,7 @@ static void put_dst(struct clone *c, const XRectangle *clip) + clip->width, clip->height); + c->dst.display->send |= c->dst.use_shm; + } else if (c->dst.pixmap) { +- DBG(DRAW, ("%s-%s using SHM pixmap\n", ++ DBG(DRAW, ("%s-%s using SHM or DRI3 pixmap\n", + DisplayString(c->dst.dpy), c->dst.name)); + c->dst.serial = NextRequest(c->dst.dpy); + XCopyArea(c->dst.dpy, c->dst.pixmap, c->dst.window, c->dst.gc, +@@ -1870,6 +2031,9 @@ static int clone_paint(struct clone *c) + { + XRectangle clip; + ++ if (c->width == 0 || c->height == 0) ++ return 0; ++ + DBG(DRAW, ("%s-%s paint clone, damaged (%d, %d), (%d, %d) [(%d, %d), (%d, %d)]\n", + DisplayString(c->dst.dpy), c->dst.name, + c->damaged.x1, c->damaged.y1, +@@ -1944,6 +2108,10 @@ static int clone_paint(struct clone *c) + clip.height = c->damaged.y2 - c->damaged.y1; + get_src(c, &clip); + ++ DBG(DRAW, ("%s-%s target offset %dx%d\n", ++ DisplayString(c->dst.dpy), c->dst.name, ++ c->dst.x - c->src.x, c->dst.y - c->src.y)); ++ + clip.x += c->dst.x - c->src.x; + clip.y += c->dst.y - c->src.y; + put_dst(c, &clip); +@@ -1969,8 +2137,9 @@ static void clone_damage(struct clone *c, const XRectangle *rec) + if ((v = (int)rec->y + rec->height) > c->damaged.y2) + c->damaged.y2 = v; + +- DBG(DAMAGE, ("%s-%s damaged: (%d, %d), (%d, %d)\n", ++ DBG(DAMAGE, ("%s-%s damaged: +(%d,%d)x(%d, %d) -> (%d, %d), (%d, %d)\n", + DisplayString(c->dst.display->dpy), c->dst.name, ++ rec->x, rec->y, rec->width, rec->height, + c->damaged.x1, c->damaged.y1, + c->damaged.x2, c->damaged.y2)); + } +@@ -2252,6 +2421,8 @@ static int clone_init_depth(struct clone *clone) + if (ret) + return ret; + ++ clone->depth = depth; ++ + DBG(X11, ("%s-%s using depth %d, requires xrender for src? %d, for dst? %d\n", + DisplayString(clone->dst.dpy), clone->dst.name, + clone->depth, +@@ -2312,6 +2483,8 @@ static int add_display(struct context *ctx, Display *dpy) + display->depth = DefaultDepth(dpy, DefaultScreen(dpy)); + display->visual = DefaultVisual(dpy, DefaultScreen(dpy)); + ++ XSelectInput(dpy, display->root, ExposureMask); ++ + display->has_shm = can_use_shm(dpy, display->root, + &display->shm_event, + &display->shm_opcode, +@@ -2323,6 +2496,8 @@ static int add_display(struct context *ctx, Display *dpy) + display->shm_opcode, + display->has_shm_pixmap)); + ++ screensaver_save(display); ++ + display->rr_active = XRRQueryExtension(dpy, &display->rr_event, &display->rr_error); + DBG(X11, ("%s: randr_active?=%d, event=%d, error=%d\n", + DisplayString(dpy), +@@ -2592,6 +2767,11 @@ static int last_display_add_clones__randr(struct context *ctx) + return ret; + } + ++ clone->dst.x = 0; ++ clone->dst.y = 0; ++ clone->dst.width = display->width; ++ clone->dst.height = display->height; ++ + ret = clone_update_modes__randr(clone); + if (ret) { + fprintf(stderr, "Failed to clone output \"%s\" from display \"%s\"\n", +@@ -2668,8 +2848,8 @@ static int last_display_add_clones__xinerama(struct context *ctx) + } + + /* Replace the modes on the local VIRTUAL output with the remote Screen */ +- clone->width = xi[n].width; +- clone->height = xi[n].height; ++ clone->dst.width = xi[n].width; ++ clone->dst.height = xi[n].height; + clone->dst.x = xi[n].x_org; + clone->dst.y = xi[n].y_org; + clone->dst.rr_crtc = -1; +@@ -2698,64 +2878,67 @@ static int last_display_add_clones__display(struct context *ctx) + Display *dpy = display->dpy; + struct clone *clone; + Screen *scr; ++ int count, s; + char buf[80]; + int ret; + RROutput id; + ++ count = ScreenCount(dpy); ++ DBG(X11, ("%s(%s) - %d screens\n", __func__, DisplayString(dpy), count)); ++ for (s = 0; s < count; s++) { ++ clone = add_clone(ctx); ++ if (clone == NULL) ++ return -ENOMEM; + +- DBG(X11, ("%s(%s)\n", __func__, DisplayString(dpy))); +- clone = add_clone(ctx); +- if (clone == NULL) +- return -ENOMEM; ++ clone->depth = 24; ++ clone->next = display->clone; ++ display->clone = clone; + +- clone->depth = 24; +- clone->next = display->clone; +- display->clone = clone; ++ id = claim_virtual(ctx->display, buf, ctx->nclone); ++ if (id == 0) { ++ fprintf(stderr, "Failed to find available VirtualHead \"%s\" for on display \"%s\"\n", ++ buf, DisplayString(dpy)); ++ } ++ ret = clone_output_init(clone, &clone->src, ctx->display, buf, id); ++ if (ret) { ++ fprintf(stderr, "Failed to add display \"%s\"\n", ++ DisplayString(ctx->display->dpy)); ++ return ret; ++ } + +- id = claim_virtual(ctx->display, buf, ctx->nclone); +- if (id == 0) { +- fprintf(stderr, "Failed to find available VirtualHead \"%s\" for on display \"%s\"\n", +- buf, DisplayString(dpy)); +- } +- ret = clone_output_init(clone, &clone->src, ctx->display, buf, id); +- if (ret) { +- fprintf(stderr, "Failed to add display \"%s\"\n", +- DisplayString(ctx->display->dpy)); +- return ret; +- } ++ sprintf(buf, "SCREEN%d", s); ++ ret = clone_output_init(clone, &clone->dst, display, buf, 0); ++ if (ret) { ++ fprintf(stderr, "Failed to add display \"%s\"\n", ++ DisplayString(dpy)); ++ return ret; ++ } + +- sprintf(buf, "WHOLE"); +- ret = clone_output_init(clone, &clone->dst, display, buf, 0); +- if (ret) { +- fprintf(stderr, "Failed to add display \"%s\"\n", +- DisplayString(dpy)); +- return ret; +- } ++ ret = clone_init_depth(clone); ++ if (ret) { ++ fprintf(stderr, "Failed to negotiate image format for display \"%s\"\n", ++ DisplayString(dpy)); ++ return ret; ++ } + +- ret = clone_init_depth(clone); +- if (ret) { +- fprintf(stderr, "Failed to negotiate image format for display \"%s\"\n", +- DisplayString(dpy)); +- return ret; +- } ++ /* Replace the modes on the local VIRTUAL output with the remote Screen */ ++ scr = ScreenOfDisplay(dpy, s); ++ clone->dst.width = scr->width; ++ clone->dst.height = scr->height; ++ clone->dst.x = 0; ++ clone->dst.y = 0; ++ clone->dst.rr_crtc = -1; ++ ret = clone_update_modes__fixed(clone); ++ if (ret) { ++ fprintf(stderr, "Failed to clone display \"%s\"\n", ++ DisplayString(dpy)); ++ return ret; ++ } + +- /* Replace the modes on the local VIRTUAL output with the remote Screen */ +- scr = ScreenOfDisplay(dpy, DefaultScreen(dpy)); +- clone->width = scr->width; +- clone->height = scr->height; +- clone->dst.x = 0; +- clone->dst.y = 0; +- clone->dst.rr_crtc = -1; +- ret = clone_update_modes__fixed(clone); +- if (ret) { +- fprintf(stderr, "Failed to clone display \"%s\"\n", +- DisplayString(dpy)); +- return ret; ++ clone->active = ctx->active; ++ ctx->active = clone; + } + +- clone->active = ctx->active; +- ctx->active = clone; +- + return 0; + } + +@@ -3168,6 +3351,33 @@ static void context_cleanup(struct context *ctx) + XCloseDisplay(dpy); + } + ++static void update_cursor_image(struct context *ctx) ++{ ++ XFixesCursorImage *cur; ++ int i; ++ ++ DBG(CURSOR, ("%s cursor changed\n", ++ DisplayString(ctx->display->dpy))); ++ ++ cur = XFixesGetCursorImage(ctx->display->dpy); ++ if (cur == NULL) ++ return; ++ ++ display_load_visible_cursor(&ctx->display[0], cur); ++ for (i = 1; i < ctx->ndisplay; i++) { ++ struct display *display = &ctx->display[i]; ++ ++ DBG(CURSOR, ("%s marking cursor changed\n", DisplayString(display->dpy))); ++ display->cursor_moved++; ++ if (display->cursor != display->invisible_cursor) { ++ display->cursor_visible++; ++ context_enable_timer(display->ctx); ++ } ++ } ++ ++ XFree(cur); ++} ++ + static int done; + + static void signal_handler(int sig) +@@ -3182,6 +3392,7 @@ int main(int argc, char **argv) + uint64_t count; + int daemonize = 1, bumblebee = 0, siblings = 0, singleton = 1; + int i, ret, open, fail; ++ int idle; + + signal(SIGPIPE, SIG_IGN); + +@@ -3228,6 +3439,7 @@ int main(int argc, char **argv) + return -ret; + + XSetErrorHandler(_check_error_handler); ++ XSetIOErrorHandler(_io_error_handler); + + ret = add_fd(&ctx, display_open(&ctx, src_name)); + if (ret) { +@@ -3237,6 +3449,13 @@ int main(int argc, char **argv) + goto out; + } + ++ ret = check_virtual(ctx.display); ++ if (ret) { ++ fprintf(stderr, "No VIRTUAL outputs on \"%s\".\n", ++ DisplayString(ctx.display->dpy)); ++ goto out; ++ } ++ + if (singleton) { + XSelectInput(ctx.display->dpy, ctx.display->root, PropertyChangeMask); + if (first_display_has_singleton(&ctx)) { +@@ -3291,6 +3510,11 @@ int main(int argc, char **argv) + if (ret) + goto out; + ++ if (ctx.display->saver_active) ++ XScreenSaverSelectInput(ctx.display->dpy, ++ ctx.display->root, ++ ScreenSaverNotifyMask); ++ + if ((ctx.display->rr_event | ctx.display->rr_error) == 0) { + fprintf(stderr, "RandR extension not supported by %s\n", DisplayString(ctx.display->dpy)); + ret = EINVAL; +@@ -3348,25 +3572,60 @@ int main(int argc, char **argv) + signal(SIGTERM, signal_handler); + + ctx.command_continuation = 0; ++ update_cursor_image(&ctx); ++ ++ idle = 0; + while (!done) { + XEvent e; + int reconfigure = 0; + int rr_update = 0; + +- DBG(POLL, ("polling - enable timer? %d, nfd=%d, ndisplay=%d\n", ctx.timer_active, ctx.nfd, ctx.ndisplay)); +- ret = poll(ctx.pfd + !ctx.timer_active, ctx.nfd - !ctx.timer_active, -1); +- if (ret <= 0) +- break; ++ if (idle) { ++ DBG(POLL, ("polling - enable timer? %d, nfd=%d, ndisplay=%d\n", ctx.timer_active, ctx.nfd, ctx.ndisplay)); ++ ret = poll(ctx.pfd + !ctx.timer_active, ctx.nfd - !ctx.timer_active, -1); ++ if (ret <= 0) ++ break; ++ ++ DBG(POLL, ("poll reports %d fd awake\n", ret)); ++ } ++ idle = 1; + + /* pfd[0] is the timer, pfd[1] is the local display, pfd[2] is the mouse, pfd[3+] are the remotes */ + +- DBG(POLL, ("poll reports %d fd awake\n", ret)); + if (ctx.pfd[1].revents || XPending(ctx.display[0].dpy)) { + DBG(POLL,("%s woken up\n", DisplayString(ctx.display[0].dpy))); ++ ctx.pfd[1].revents = 0; ++ idle = 0; ++ + do { + XNextEvent(ctx.display->dpy, &e); + +- if (e.type == ctx.display->damage_event + XDamageNotify ) { ++ DBG(POLL, ("%s received event %d\n", DisplayString(ctx.display[0].dpy), e.type)); ++ ++ if (e.type == ctx.display->saver_event + ScreenSaverNotify) { ++ const XScreenSaverNotifyEvent *se = (const XScreenSaverNotifyEvent *)&e; ++ DBG(SCREEN, ++ ("%s screen saver: state=%d, kind=%d, forced=%d\n", ++ DisplayString(ctx.display->dpy), ++ se->state, se->kind, se->forced)); ++ for (i = 1; i < ctx.ndisplay; i++) { ++ struct display *display = &ctx.display[i]; ++ ++ if (!display->active) ++ continue; ++ ++ DBG(SCREEN, ++ ("%s %s screen saver\n", ++ DisplayString(display->dpy), ++ se->state == ScreenSaverOn ? "activating" : "resetting\n")); ++ ++ if (se->state == ScreenSaverOn) ++ XActivateScreenSaver(display->dpy); ++ else ++ XResetScreenSaver(display->dpy); ++ XFlush(display->dpy); ++ } ++ } else if (e.type == ctx.display->damage_event + XDamageNotify) { + const XDamageNotifyEvent *de = (const XDamageNotifyEvent *)&e; + struct clone *clone; + +@@ -3380,19 +3639,7 @@ int main(int argc, char **argv) + if (ctx.active) + context_enable_timer(&ctx); + } else if (e.type == ctx.display->xfixes_event + XFixesCursorNotify) { +- XFixesCursorImage *cur; +- +- DBG(CURSOR, ("%s cursor changed\n", +- DisplayString(ctx.display->dpy))); +- +- cur = XFixesGetCursorImage(ctx.display->dpy); +- if (cur == NULL) +- continue; +- +- for (i = 1; i < ctx.ndisplay; i++) +- display_load_visible_cursor(&ctx.display[i], cur); +- +- XFree(cur); ++ update_cursor_image(&ctx); + } else if (e.type == ctx.display->rr_event + RRScreenChangeNotify) { + DBG(XRR, ("%s screen changed (reconfigure pending? %d)\n", + DisplayString(ctx.display->dpy), reconfigure)); +@@ -3426,13 +3673,41 @@ int main(int argc, char **argv) + if (ctx.pfd[i+2].revents == 0 && !XPending(ctx.display[i].dpy)) + continue; + ++ ctx.pfd[i+2].revents = 0; ++ idle = 0; ++ + DBG(POLL, ("%s woken up\n", DisplayString(ctx.display[i].dpy))); + do { + XNextEvent(ctx.display[i].dpy, &e); + + DBG(POLL, ("%s received event %d\n", DisplayString(ctx.display[i].dpy), e.type)); +- if (ctx.display[i].rr_active && e.type == ctx.display[i].rr_event + RRNotify) { +- XRRNotifyEvent *re = (XRRNotifyEvent *)&e; ++ if (e.type == Expose) { ++ const XExposeEvent *xe = (XExposeEvent *)&e; ++ struct clone *clone; ++ int damaged = 0; ++ ++ DBG(DAMAGE, ("%s exposed: (%d, %d)x(%d, %d)\n", ++ DisplayString(ctx.display[i].dpy), ++ xe->x, xe->y, xe->width, xe->height)); ++ ++ for (clone = ctx.active; clone; clone = clone->active) { ++ XRectangle r; ++ ++ if (clone->dst.display != &ctx.display[i]) ++ continue; ++ ++ r.x = clone->src.x + xe->x; ++ r.y = clone->src.y + xe->y; ++ r.width = xe->width; ++ r.height = xe->height; ++ clone_damage(clone, &r); ++ damaged++; ++ } ++ ++ if (damaged) ++ context_enable_timer(&ctx); ++ } else if (ctx.display[i].rr_active && e.type == ctx.display[i].rr_event + RRNotify) { ++ const XRRNotifyEvent *re = (XRRNotifyEvent *)&e; + + DBG(XRR, ("%s received RRNotify, type %d\n", DisplayString(ctx.display[i].dpy), re->subtype)); + if (re->subtype == RRNotify_OutputChange) { +@@ -3480,6 +3755,7 @@ int main(int argc, char **argv) + + DBG(TIMER, ("%s timer still active? %d\n", DisplayString(ctx.display->dpy), ret != 0)); + ctx.timer_active = ret != 0; ++ idle = 0; + } + } + diff --git a/main/xf86-video-intel/xf86-video-intel-2.99.917-libdrm-kernel-4_0-crash.patch b/main/xf86-video-intel/xf86-video-intel-2.99.917-libdrm-kernel-4_0-crash.patch deleted file mode 100644 index ea3aa30ed1..0000000000 --- a/main/xf86-video-intel/xf86-video-intel-2.99.917-libdrm-kernel-4_0-crash.patch +++ /dev/null @@ -1,65 +0,0 @@ -From 7fe2b2948652443ff43d907855bd7a051d54d309 Mon Sep 17 00:00:00 2001 -From: Chris Wilson <chris@chris-wilson.co.uk> -Date: Thu, 19 Mar 2015 23:14:17 +0000 -Subject: sna: Protect against ABI breakage in recent versions of libdrm - -Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> - -diff --git a/src/sna/kgem.c b/src/sna/kgem.c -index 11f0828..6f16cba 100644 ---- a/src/sna/kgem.c -+++ b/src/sna/kgem.c -@@ -182,6 +182,15 @@ struct local_i915_gem_caching { - #define LOCAL_IOCTL_I915_GEM_SET_CACHING DRM_IOW(DRM_COMMAND_BASE + LOCAL_I915_GEM_SET_CACHING, struct local_i915_gem_caching) - #define LOCAL_IOCTL_I915_GEM_GET_CACHING DRM_IOW(DRM_COMMAND_BASE + LOCAL_I915_GEM_GET_CACHING, struct local_i915_gem_caching) - -+struct local_i915_gem_mmap { -+ uint32_t handle; -+ uint32_t pad; -+ uint64_t offset; -+ uint64_t size; -+ uint64_t addr_ptr; -+}; -+#define LOCAL_IOCTL_I915_GEM_MMAP DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_MMAP, struct local_i915_gem_mmap) -+ - struct local_i915_gem_mmap2 { - uint32_t handle; - uint32_t pad; -@@ -514,15 +523,15 @@ retry_wc: - - static void *__kgem_bo_map__cpu(struct kgem *kgem, struct kgem_bo *bo) - { -- struct drm_i915_gem_mmap mmap_arg; -+ struct local_i915_gem_mmap arg; - int err; - - retry: -- VG_CLEAR(mmap_arg); -- mmap_arg.handle = bo->handle; -- mmap_arg.offset = 0; -- mmap_arg.size = bytes(bo); -- if ((err = do_ioctl(kgem->fd, DRM_IOCTL_I915_GEM_MMAP, &mmap_arg))) { -+ VG_CLEAR(arg); -+ arg.handle = bo->handle; -+ arg.offset = 0; -+ arg.size = bytes(bo); -+ if ((err = do_ioctl(kgem->fd, LOCAL_IOCTL_I915_GEM_MMAP, &arg))) { - assert(err != EINVAL); - - if (__kgem_throttle_retire(kgem, 0)) -@@ -536,10 +545,10 @@ retry: - return NULL; - } - -- VG(VALGRIND_MAKE_MEM_DEFINED(mmap_arg.addr_ptr, bytes(bo))); -+ VG(VALGRIND_MAKE_MEM_DEFINED(arg.addr_ptr, bytes(bo))); - - DBG(("%s: caching CPU vma for %d\n", __FUNCTION__, bo->handle)); -- return bo->map__cpu = (void *)(uintptr_t)mmap_arg.addr_ptr; -+ return bo->map__cpu = (void *)(uintptr_t)arg.addr_ptr; - } - - static int gem_write(int fd, uint32_t handle, --- -cgit v0.10.2 - diff --git a/main/xf86-video-intel/xorg-1.18.patch b/main/xf86-video-intel/xorg-1.18.patch deleted file mode 100644 index ab3fb399a1..0000000000 --- a/main/xf86-video-intel/xorg-1.18.patch +++ /dev/null @@ -1,24 +0,0 @@ -From 2c5063938cc809f624e56efd4673041fa8141e81 Mon Sep 17 00:00:00 2001 -From: Martin Peres <martin.peres@linux.intel.com> -Date: Thu, 9 Jul 2015 11:26:38 +0300 -Subject: uxa: fix the call to PixmapSyncDirtyHelper, broken by xserver's - 90db5ed - -[ickle: switch to HAS_DIRTYTRACKING_ROTATION as suggested by Dave Airlie] -Signed-off-by: Martin Peres <martin.peres@linux.intel.com> - -diff --git a/src/compat-api.h b/src/compat-api.h -index aa93bee..293e9d7 100644 ---- a/src/compat-api.h -+++ b/src/compat-api.h -@@ -247,3 +247,7 @@ static inline void FreePixmap(PixmapPtr pixmap) - #endif - - #endif -+ -+#if HAS_DIRTYTRACKING_ROTATION -+#define PixmapSyncDirtyHelper(d, dd) PixmapSyncDirtyHelper(d) -+#endif --- -cgit v0.10.2 - |