summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--testing/linux-xen0/APKBUILD147
-rw-r--r--testing/linux-xen0/kernelconfig.x86_644241
-rw-r--r--testing/linux-xen0/pvops.patch37837
3 files changed, 0 insertions, 42225 deletions
diff --git a/testing/linux-xen0/APKBUILD b/testing/linux-xen0/APKBUILD
deleted file mode 100644
index a08692ad4..000000000
--- a/testing/linux-xen0/APKBUILD
+++ /dev/null
@@ -1,147 +0,0 @@
-# Maintainer: William Pitcock <nenolod@dereferenced.org>
-
-_flavor=xen0
-pkgname=linux-${_flavor}
-pkgver=2.6.32.28
-_kernver=2.6.32
-pkgrel=1
-pkgdesc="Linux kernel with dom0 support (no grsecurity)"
-url=http://grsecurity.net
-depends="mkinitfs linux-firmware xen"
-makedepends="perl installkernel bash xen"
-options="!strip"
-_config=${config:-kernelconfig.${CARCH}}
-install=
-source="ftp://ftp.kernel.org/pub/linux/kernel/v2.6/linux-$_kernver.tar.bz2
- http://www.kernel.org/pub/linux/kernel/v2.6/longterm/v${pkgver%.*}/patch-$pkgver.bz2
-
- pvops.patch
-
- kernelconfig.x86_64"
-subpackages="$pkgname-dev linux-firmware:firmware"
-arch="x86_64"
-license="GPL-2"
-
-_abi_release=${pkgver}-${_flavor}
-
-prepare() {
- local _patch_failed=
- cd "$srcdir"/linux-$_kernver
- if [ "$_kernver" != "$pkgver" ]; then
- bunzip2 -c < ../patch-$pkgver.bz2 | patch -p1 -N || return 1
- fi
-
- # first apply patches in specified order
- for i in $source; do
- case $i in
- *.patch)
- bn=$(basename $i)
-
- msg "Applying $bn..."
- if ! patch -s -p1 -N -i "$srcdir"/$bn; then
- echo $bn >>failed
- _patch_failed=1
- fi
- ;;
- esac
- done
-
- if ! [ -z "$_patch_failed" ]; then
- error "The following patches failed:"
- cat failed
- return 1
- fi
-
- echo "-xen0" > "$srcdir"/linux-$_kernver/localversion-xen0
-
- mkdir -p "$srcdir"/build
- cp "$srcdir"/$_config "$srcdir"/build/.config || return 1
- make -C "$srcdir"/linux-$_kernver O="$srcdir"/build HOSTCC="${CC:-gcc}" V=1 \
- silentoldconfig
-}
-
-# this is so we can do: 'abuild menuconfig' to reconfigure kernel
-menuconfig() {
- cd "$srcdir"/build || return 1
- make menuconfig
- cp .config "$startdir"/$_config
-}
-
-build() {
- cd "$srcdir"/build
- make CC="${CC:-gcc}" \
- KBUILD_BUILD_VERSION="$((pkgrel + 1 ))-Alpine" V=1 \
- || return 1
-}
-
-package() {
- cd "$srcdir"/build
- mkdir -p "$pkgdir"/boot "$pkgdir"/lib/modules
- make -j1 modules_install firmware_install install \
- INSTALL_MOD_PATH="$pkgdir" \
- INSTALL_PATH="$pkgdir"/boot \
- || return 1
-
- rm -f "$pkgdir"/lib/modules/${_abi_release}/build \
- "$pkgdir"/lib/modules/${_abi_release}/source
- install -D include/config/kernel.release \
- "$pkgdir"/usr/share/kernel/$_flavor/kernel.release
-}
-
-dev() {
- # copy the only the parts that we really need for build 3rd party
- # kernel modules and install those as /usr/src/linux-headers,
- # simlar to what ubuntu does
- #
- # this way you dont need to install the 300-400 kernel sources to
- # build a tiny kernel module
- #
- pkgdesc="Headers and script for third party modules for grsec kernel"
- local dir="$subpkgdir"/usr/src/linux-headers-${_abi_release}
-
- # first we import config, run prepare to set up for building
- # external modules, and create the scripts
- mkdir -p "$dir"
- cp "$srcdir"/$_config "$dir"/.config
- make -j1 -C "$srcdir"/linux-$_kernver O="$dir" HOSTCC="${CC:-gcc}" \
- silentoldconfig prepare scripts
-
- # remove the stuff that poits to real sources. we want 3rd party
- # modules to believe this is the soruces
- rm "$dir"/Makefile "$dir"/source
-
- # copy the needed stuff from real sources
- #
- # this is taken from ubuntu kernel build script
- # http://kernel.ubuntu.com/git?p=ubuntu/ubuntu-jaunty.git;a=blob;f=debian/rules.d/3-binary-indep.mk;hb=HEAD
- cd "$srcdir"/linux-$_kernver
- find . -path './include/*' -prune -o -path './scripts/*' -prune \
- -o -type f \( -name 'Makefile*' -o -name 'Kconfig*' \
- -o -name 'Kbuild*' -o -name '*.sh' -o -name '*.pl' \
- -o -name '*.lds' \) | cpio -pdm "$dir"
- cp -a drivers/media/dvb/dvb-core/*.h "$dir"/drivers/media/dvb/dvb-core
- cp -a drivers/media/video/*.h "$dir"/drivers/media/video
- cp -a drivers/media/dvb/frontends/*.h "$dir"/drivers/media/dvb/frontends
- cp -a scripts include "$dir"
- find $(find arch -name include -type d -print) -type f \
- | cpio -pdm "$dir"
-
- install -Dm644 "$srcdir"/build/Module.symvers \
- "$dir"/Module.symvers
-
- mkdir -p "$subpkgdir"/lib/modules/${_abi_release}
- ln -sf /usr/src/linux-headers-${_abi_release} \
- "$subpkgdir"/lib/modules/${_abi_release}/build
-}
-
-firmware() {
- pkgdesc="Firmware for linux kernel"
- replaces="linux-grsec linux-vserver"
- mkdir -p "$subpkgdir"/lib
- mv "$pkgdir"/lib/firmware "$subpkgdir"/lib/
-}
-
-md5sums="260551284ac224c3a43c4adac7df4879 linux-2.6.32.tar.bz2
-fc8c36b4638d8384a5d26a50413a1d11 patch-2.6.32.28.bz2
-2c678c4610b9d425fd3791e4ebaa0bdd pvops.patch
-22f2c14e9ca592d668fc9aeda989f3e7 kernelconfig.x86_64"
diff --git a/testing/linux-xen0/kernelconfig.x86_64 b/testing/linux-xen0/kernelconfig.x86_64
deleted file mode 100644
index 8749f2637..000000000
--- a/testing/linux-xen0/kernelconfig.x86_64
+++ /dev/null
@@ -1,4241 +0,0 @@
-#
-# Automatically generated make config: don't edit
-# Linux kernel version: 2.6.32.27
-# Tue Dec 28 00:27:19 2010
-#
-CONFIG_64BIT=y
-# CONFIG_X86_32 is not set
-CONFIG_X86_64=y
-CONFIG_X86=y
-CONFIG_OUTPUT_FORMAT="elf64-x86-64"
-CONFIG_ARCH_DEFCONFIG="arch/x86/configs/x86_64_defconfig"
-CONFIG_GENERIC_TIME=y
-CONFIG_GENERIC_CMOS_UPDATE=y
-CONFIG_CLOCKSOURCE_WATCHDOG=y
-CONFIG_GENERIC_CLOCKEVENTS=y
-CONFIG_GENERIC_CLOCKEVENTS_BROADCAST=y
-CONFIG_LOCKDEP_SUPPORT=y
-CONFIG_STACKTRACE_SUPPORT=y
-CONFIG_HAVE_LATENCYTOP_SUPPORT=y
-CONFIG_MMU=y
-CONFIG_ZONE_DMA=y
-CONFIG_GENERIC_ISA_DMA=y
-CONFIG_GENERIC_IOMAP=y
-CONFIG_GENERIC_BUG=y
-CONFIG_GENERIC_BUG_RELATIVE_POINTERS=y
-CONFIG_GENERIC_HWEIGHT=y
-CONFIG_GENERIC_GPIO=y
-CONFIG_ARCH_MAY_HAVE_PC_FDC=y
-# CONFIG_RWSEM_GENERIC_SPINLOCK is not set
-CONFIG_RWSEM_XCHGADD_ALGORITHM=y
-CONFIG_ARCH_HAS_CPU_IDLE_WAIT=y
-CONFIG_GENERIC_CALIBRATE_DELAY=y
-CONFIG_GENERIC_TIME_VSYSCALL=y
-CONFIG_ARCH_HAS_CPU_RELAX=y
-CONFIG_ARCH_HAS_DEFAULT_IDLE=y
-CONFIG_ARCH_HAS_CACHE_LINE_SIZE=y
-CONFIG_HAVE_SETUP_PER_CPU_AREA=y
-CONFIG_NEED_PER_CPU_EMBED_FIRST_CHUNK=y
-CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK=y
-CONFIG_HAVE_CPUMASK_OF_CPU_MAP=y
-CONFIG_ARCH_HIBERNATION_POSSIBLE=y
-CONFIG_ARCH_SUSPEND_POSSIBLE=y
-CONFIG_ZONE_DMA32=y
-CONFIG_ARCH_POPULATES_NODE_MAP=y
-CONFIG_AUDIT_ARCH=y
-CONFIG_ARCH_SUPPORTS_OPTIMIZED_INLINING=y
-CONFIG_ARCH_SUPPORTS_DEBUG_PAGEALLOC=y
-CONFIG_GENERIC_HARDIRQS=y
-CONFIG_GENERIC_HARDIRQS_NO__DO_IRQ=y
-CONFIG_GENERIC_IRQ_PROBE=y
-CONFIG_GENERIC_PENDING_IRQ=y
-CONFIG_USE_GENERIC_SMP_HELPERS=y
-CONFIG_X86_64_SMP=y
-CONFIG_X86_HT=y
-CONFIG_X86_TRAMPOLINE=y
-# CONFIG_KTIME_SCALAR is not set
-CONFIG_ARCH_CPU_PROBE_RELEASE=y
-CONFIG_DEFCONFIG_LIST="/lib/modules/$UNAME_RELEASE/.config"
-CONFIG_CONSTRUCTORS=y
-
-#
-# General setup
-#
-CONFIG_EXPERIMENTAL=y
-CONFIG_LOCK_KERNEL=y
-CONFIG_INIT_ENV_ARG_LIMIT=32
-CONFIG_LOCALVERSION=""
-# CONFIG_LOCALVERSION_AUTO is not set
-CONFIG_HAVE_KERNEL_GZIP=y
-CONFIG_HAVE_KERNEL_BZIP2=y
-CONFIG_HAVE_KERNEL_LZMA=y
-CONFIG_KERNEL_GZIP=y
-# CONFIG_KERNEL_BZIP2 is not set
-# CONFIG_KERNEL_LZMA is not set
-CONFIG_SWAP=y
-CONFIG_SYSVIPC=y
-CONFIG_SYSVIPC_SYSCTL=y
-# CONFIG_POSIX_MQUEUE is not set
-CONFIG_BSD_PROCESS_ACCT=y
-CONFIG_BSD_PROCESS_ACCT_V3=y
-# CONFIG_TASKSTATS is not set
-# CONFIG_AUDIT is not set
-
-#
-# RCU Subsystem
-#
-CONFIG_TREE_RCU=y
-# CONFIG_TREE_PREEMPT_RCU is not set
-# CONFIG_RCU_TRACE is not set
-CONFIG_RCU_FANOUT=32
-# CONFIG_RCU_FANOUT_EXACT is not set
-# CONFIG_TREE_RCU_TRACE is not set
-CONFIG_IKCONFIG=m
-CONFIG_IKCONFIG_PROC=y
-CONFIG_LOG_BUF_SHIFT=14
-CONFIG_HAVE_UNSTABLE_SCHED_CLOCK=y
-# CONFIG_GROUP_SCHED is not set
-# CONFIG_CGROUPS is not set
-# CONFIG_SYSFS_DEPRECATED_V2 is not set
-# CONFIG_RELAY is not set
-# CONFIG_NAMESPACES is not set
-CONFIG_BLK_DEV_INITRD=y
-CONFIG_INITRAMFS_SOURCE=""
-CONFIG_RD_GZIP=y
-CONFIG_RD_BZIP2=y
-CONFIG_RD_LZMA=y
-CONFIG_CC_OPTIMIZE_FOR_SIZE=y
-CONFIG_SYSCTL=y
-CONFIG_ANON_INODES=y
-CONFIG_EMBEDDED=y
-CONFIG_UID16=y
-CONFIG_SYSCTL_SYSCALL=y
-# CONFIG_KALLSYMS is not set
-CONFIG_HOTPLUG=y
-CONFIG_PRINTK=y
-CONFIG_BUG=y
-CONFIG_ELF_CORE=y
-CONFIG_PCSPKR_PLATFORM=y
-CONFIG_BASE_FULL=y
-CONFIG_FUTEX=y
-CONFIG_EPOLL=y
-CONFIG_SIGNALFD=y
-CONFIG_TIMERFD=y
-CONFIG_EVENTFD=y
-CONFIG_SHMEM=y
-CONFIG_AIO=y
-CONFIG_HAVE_PERF_EVENTS=y
-
-#
-# Kernel Performance Events And Counters
-#
-CONFIG_PERF_EVENTS=y
-CONFIG_PERF_COUNTERS=y
-CONFIG_VM_EVENT_COUNTERS=y
-CONFIG_PCI_QUIRKS=y
-# CONFIG_SLUB_DEBUG is not set
-# CONFIG_COMPAT_BRK is not set
-# CONFIG_SLAB is not set
-CONFIG_SLUB=y
-# CONFIG_SLOB is not set
-CONFIG_PROFILING=y
-CONFIG_OPROFILE=m
-# CONFIG_OPROFILE_IBS is not set
-# CONFIG_OPROFILE_EVENT_MULTIPLEX is not set
-CONFIG_HAVE_OPROFILE=y
-CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS=y
-CONFIG_HAVE_IOREMAP_PROT=y
-CONFIG_HAVE_KPROBES=y
-CONFIG_HAVE_KRETPROBES=y
-CONFIG_HAVE_ARCH_TRACEHOOK=y
-CONFIG_HAVE_DMA_ATTRS=y
-CONFIG_HAVE_DMA_API_DEBUG=y
-
-#
-# GCOV-based kernel profiling
-#
-# CONFIG_GCOV_KERNEL is not set
-CONFIG_SLOW_WORK=y
-# CONFIG_SLOW_WORK_DEBUG is not set
-# CONFIG_HAVE_GENERIC_DMA_COHERENT is not set
-CONFIG_RT_MUTEXES=y
-CONFIG_BASE_SMALL=0
-CONFIG_MODULES=y
-# CONFIG_MODULE_FORCE_LOAD is not set
-CONFIG_MODULE_UNLOAD=y
-# CONFIG_MODULE_FORCE_UNLOAD is not set
-CONFIG_MODVERSIONS=y
-# CONFIG_MODULE_SRCVERSION_ALL is not set
-CONFIG_STOP_MACHINE=y
-CONFIG_BLOCK=y
-CONFIG_BLK_DEV_BSG=y
-# CONFIG_BLK_DEV_INTEGRITY is not set
-CONFIG_BLOCK_COMPAT=y
-
-#
-# IO Schedulers
-#
-CONFIG_IOSCHED_NOOP=y
-CONFIG_IOSCHED_AS=y
-CONFIG_IOSCHED_DEADLINE=m
-CONFIG_IOSCHED_CFQ=y
-# CONFIG_DEFAULT_AS is not set
-# CONFIG_DEFAULT_DEADLINE is not set
-CONFIG_DEFAULT_CFQ=y
-# CONFIG_DEFAULT_NOOP is not set
-CONFIG_DEFAULT_IOSCHED="cfq"
-CONFIG_PREEMPT_NOTIFIERS=y
-CONFIG_FREEZER=y
-
-#
-# Processor type and features
-#
-CONFIG_TICK_ONESHOT=y
-CONFIG_NO_HZ=y
-CONFIG_HIGH_RES_TIMERS=y
-CONFIG_GENERIC_CLOCKEVENTS_BUILD=y
-CONFIG_SMP=y
-# CONFIG_SPARSE_IRQ is not set
-CONFIG_X86_MPPARSE=y
-CONFIG_X86_EXTENDED_PLATFORM=y
-# CONFIG_X86_VSMP is not set
-CONFIG_SCHED_OMIT_FRAME_POINTER=y
-CONFIG_PARAVIRT_GUEST=y
-CONFIG_XEN=y
-CONFIG_XEN_PVHVM=y
-CONFIG_XEN_MAX_DOMAIN_MEMORY=128
-CONFIG_XEN_SAVE_RESTORE=y
-CONFIG_XEN_DEBUG_FS=y
-CONFIG_SWIOTLB_XEN=y
-CONFIG_MICROCODE_XEN=y
-CONFIG_XEN_DOM0=y
-CONFIG_XEN_PRIVILEGED_GUEST=y
-CONFIG_XEN_DOM0_PCI=y
-CONFIG_XEN_PCI_PASSTHROUGH=y
-CONFIG_KVM_CLOCK=y
-CONFIG_KVM_GUEST=y
-CONFIG_PARAVIRT=y
-# CONFIG_PARAVIRT_SPINLOCKS is not set
-CONFIG_PARAVIRT_CLOCK=y
-# CONFIG_MEMTEST is not set
-# CONFIG_M386 is not set
-# CONFIG_M486 is not set
-# CONFIG_M586 is not set
-# CONFIG_M586TSC is not set
-# CONFIG_M586MMX is not set
-# CONFIG_M686 is not set
-# CONFIG_MPENTIUMII is not set
-# CONFIG_MPENTIUMIII is not set
-# CONFIG_MPENTIUMM is not set
-# CONFIG_MPENTIUM4 is not set
-# CONFIG_MK6 is not set
-# CONFIG_MK7 is not set
-# CONFIG_MK8 is not set
-# CONFIG_MCRUSOE is not set
-# CONFIG_MEFFICEON is not set
-# CONFIG_MWINCHIPC6 is not set
-# CONFIG_MWINCHIP3D is not set
-# CONFIG_MGEODEGX1 is not set
-# CONFIG_MGEODE_LX is not set
-# CONFIG_MCYRIXIII is not set
-# CONFIG_MVIAC3_2 is not set
-# CONFIG_MVIAC7 is not set
-# CONFIG_MPSC is not set
-# CONFIG_MCORE2 is not set
-# CONFIG_MATOM is not set
-CONFIG_GENERIC_CPU=y
-CONFIG_X86_CPU=y
-CONFIG_X86_L1_CACHE_BYTES=64
-CONFIG_X86_INTERNODE_CACHE_BYTES=64
-CONFIG_X86_CMPXCHG=y
-CONFIG_X86_L1_CACHE_SHIFT=6
-CONFIG_X86_XADD=y
-CONFIG_X86_WP_WORKS_OK=y
-CONFIG_X86_TSC=y
-CONFIG_X86_CMPXCHG64=y
-CONFIG_X86_CMOV=y
-CONFIG_X86_MINIMUM_CPU_FAMILY=64
-CONFIG_X86_DEBUGCTLMSR=y
-# CONFIG_PROCESSOR_SELECT is not set
-CONFIG_CPU_SUP_INTEL=y
-CONFIG_CPU_SUP_AMD=y
-CONFIG_CPU_SUP_CENTAUR=y
-# CONFIG_X86_DS is not set
-CONFIG_HPET_TIMER=y
-CONFIG_HPET_EMULATE_RTC=y
-CONFIG_DMI=y
-CONFIG_GART_IOMMU=y
-CONFIG_CALGARY_IOMMU=y
-CONFIG_CALGARY_IOMMU_ENABLED_BY_DEFAULT=y
-CONFIG_AMD_IOMMU=y
-# CONFIG_AMD_IOMMU_STATS is not set
-CONFIG_SWIOTLB=y
-CONFIG_IOMMU_HELPER=y
-CONFIG_IOMMU_API=y
-CONFIG_NR_CPUS=8
-CONFIG_SCHED_SMT=y
-CONFIG_SCHED_MC=y
-CONFIG_PREEMPT_NONE=y
-# CONFIG_PREEMPT_VOLUNTARY is not set
-# CONFIG_PREEMPT is not set
-CONFIG_X86_LOCAL_APIC=y
-CONFIG_X86_IO_APIC=y
-# CONFIG_X86_REROUTE_FOR_BROKEN_BOOT_IRQS is not set
-# CONFIG_X86_MCE is not set
-CONFIG_I8K=m
-CONFIG_MICROCODE=m
-CONFIG_MICROCODE_INTEL=y
-CONFIG_MICROCODE_AMD=y
-CONFIG_MICROCODE_OLD_INTERFACE=y
-CONFIG_X86_MSR=m
-CONFIG_X86_CPUID=m
-CONFIG_ARCH_PHYS_ADDR_T_64BIT=y
-CONFIG_DIRECT_GBPAGES=y
-# CONFIG_NUMA is not set
-CONFIG_ARCH_SPARSEMEM_DEFAULT=y
-CONFIG_ARCH_SPARSEMEM_ENABLE=y
-CONFIG_ARCH_SELECT_MEMORY_MODEL=y
-CONFIG_ILLEGAL_POINTER_VALUE=0xdead000000000000
-CONFIG_SELECT_MEMORY_MODEL=y
-# CONFIG_FLATMEM_MANUAL is not set
-# CONFIG_DISCONTIGMEM_MANUAL is not set
-CONFIG_SPARSEMEM_MANUAL=y
-CONFIG_SPARSEMEM=y
-CONFIG_HAVE_MEMORY_PRESENT=y
-CONFIG_SPARSEMEM_EXTREME=y
-CONFIG_SPARSEMEM_VMEMMAP_ENABLE=y
-CONFIG_SPARSEMEM_VMEMMAP=y
-# CONFIG_MEMORY_HOTPLUG is not set
-CONFIG_PAGEFLAGS_EXTENDED=y
-CONFIG_SPLIT_PTLOCK_CPUS=4
-CONFIG_PHYS_ADDR_T_64BIT=y
-CONFIG_ZONE_DMA_FLAG=1
-CONFIG_BOUNCE=y
-CONFIG_VIRT_TO_BUS=y
-CONFIG_HAVE_MLOCK=y
-CONFIG_HAVE_MLOCKED_PAGE_BIT=y
-CONFIG_MMU_NOTIFIER=y
-CONFIG_KSM=y
-CONFIG_DEFAULT_MMAP_MIN_ADDR=4096
-# CONFIG_X86_CHECK_BIOS_CORRUPTION is not set
-CONFIG_X86_RESERVE_LOW_64K=y
-CONFIG_MTRR=y
-CONFIG_MTRR_SANITIZER=y
-CONFIG_MTRR_SANITIZER_ENABLE_DEFAULT=0
-CONFIG_MTRR_SANITIZER_SPARE_REG_NR_DEFAULT=1
-CONFIG_X86_PAT=y
-CONFIG_ARCH_USES_PG_UNCACHED=y
-# CONFIG_EFI is not set
-# CONFIG_SECCOMP is not set
-# CONFIG_CC_STACKPROTECTOR is not set
-# CONFIG_HZ_100 is not set
-# CONFIG_HZ_250 is not set
-CONFIG_HZ_300=y
-# CONFIG_HZ_1000 is not set
-CONFIG_HZ=300
-CONFIG_SCHED_HRTICK=y
-# CONFIG_KEXEC is not set
-# CONFIG_CRASH_DUMP is not set
-CONFIG_PHYSICAL_START=0x1000000
-# CONFIG_RELOCATABLE is not set
-CONFIG_PHYSICAL_ALIGN=0x1000000
-CONFIG_HOTPLUG_CPU=y
-CONFIG_COMPAT_VDSO=y
-# CONFIG_CMDLINE_BOOL is not set
-CONFIG_ARCH_ENABLE_MEMORY_HOTPLUG=y
-
-#
-# Power management and ACPI options
-#
-CONFIG_PM=y
-# CONFIG_PM_DEBUG is not set
-CONFIG_PM_SLEEP_SMP=y
-CONFIG_PM_SLEEP=y
-CONFIG_SUSPEND=y
-CONFIG_SUSPEND_FREEZER=y
-# CONFIG_HIBERNATION is not set
-# CONFIG_PM_RUNTIME is not set
-CONFIG_ACPI=y
-CONFIG_ACPI_SLEEP=y
-CONFIG_ACPI_PROCFS=y
-CONFIG_ACPI_PROCFS_POWER=y
-# CONFIG_ACPI_POWER_METER is not set
-CONFIG_ACPI_SYSFS_POWER=y
-CONFIG_ACPI_PROC_EVENT=y
-CONFIG_ACPI_AC=m
-CONFIG_ACPI_BATTERY=m
-CONFIG_ACPI_BUTTON=m
-CONFIG_ACPI_VIDEO=m
-CONFIG_ACPI_FAN=m
-CONFIG_ACPI_DOCK=y
-CONFIG_ACPI_PROCESSOR=m
-CONFIG_ACPI_HOTPLUG_CPU=y
-# CONFIG_ACPI_PROCESSOR_AGGREGATOR is not set
-CONFIG_ACPI_THERMAL=m
-# CONFIG_ACPI_CUSTOM_DSDT is not set
-CONFIG_ACPI_BLACKLIST_YEAR=0
-# CONFIG_ACPI_DEBUG is not set
-CONFIG_ACPI_PCI_SLOT=m
-CONFIG_X86_PM_TIMER=y
-CONFIG_ACPI_CONTAINER=m
-CONFIG_ACPI_SBS=m
-# CONFIG_SFI is not set
-
-#
-# CPU Frequency scaling
-#
-CONFIG_CPU_FREQ=y
-CONFIG_CPU_FREQ_TABLE=m
-# CONFIG_CPU_FREQ_DEBUG is not set
-CONFIG_CPU_FREQ_STAT=m
-# CONFIG_CPU_FREQ_STAT_DETAILS is not set
-CONFIG_CPU_FREQ_DEFAULT_GOV_PERFORMANCE=y
-# CONFIG_CPU_FREQ_DEFAULT_GOV_POWERSAVE is not set
-# CONFIG_CPU_FREQ_DEFAULT_GOV_USERSPACE is not set
-# CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND is not set
-# CONFIG_CPU_FREQ_DEFAULT_GOV_CONSERVATIVE is not set
-CONFIG_CPU_FREQ_GOV_PERFORMANCE=y
-CONFIG_CPU_FREQ_GOV_POWERSAVE=m
-CONFIG_CPU_FREQ_GOV_USERSPACE=m
-CONFIG_CPU_FREQ_GOV_ONDEMAND=m
-CONFIG_CPU_FREQ_GOV_CONSERVATIVE=m
-
-#
-# CPUFreq processor drivers
-#
-CONFIG_X86_ACPI_CPUFREQ=m
-CONFIG_X86_POWERNOW_K8=m
-CONFIG_X86_SPEEDSTEP_CENTRINO=m
-CONFIG_X86_P4_CLOCKMOD=m
-
-#
-# shared options
-#
-CONFIG_X86_SPEEDSTEP_LIB=m
-CONFIG_CPU_IDLE=y
-CONFIG_CPU_IDLE_GOV_LADDER=y
-CONFIG_CPU_IDLE_GOV_MENU=y
-
-#
-# Memory power savings
-#
-CONFIG_I7300_IDLE_IOAT_CHANNEL=y
-CONFIG_I7300_IDLE=m
-
-#
-# Bus options (PCI etc.)
-#
-CONFIG_PCI=y
-CONFIG_PCI_DIRECT=y
-CONFIG_PCI_MMCONFIG=y
-CONFIG_PCI_XEN=y
-CONFIG_PCI_DOMAINS=y
-# CONFIG_DMAR is not set
-# CONFIG_INTR_REMAP is not set
-CONFIG_PCIEPORTBUS=y
-CONFIG_HOTPLUG_PCI_PCIE=m
-# CONFIG_PCIEAER is not set
-CONFIG_PCIEASPM=y
-# CONFIG_PCIEASPM_DEBUG is not set
-CONFIG_ARCH_SUPPORTS_MSI=y
-CONFIG_PCI_MSI=y
-CONFIG_PCI_LEGACY=y
-CONFIG_PCI_STUB=m
-CONFIG_XEN_PCIDEV_FRONTEND=y
-CONFIG_HT_IRQ=y
-# CONFIG_PCI_IOV is not set
-CONFIG_ISA_DMA_API=y
-CONFIG_K8_NB=y
-CONFIG_PCCARD=m
-# CONFIG_PCMCIA_DEBUG is not set
-CONFIG_PCMCIA=m
-CONFIG_PCMCIA_LOAD_CIS=y
-CONFIG_PCMCIA_IOCTL=y
-CONFIG_CARDBUS=y
-
-#
-# PC-card bridges
-#
-CONFIG_YENTA=m
-CONFIG_YENTA_O2=y
-CONFIG_YENTA_RICOH=y
-CONFIG_YENTA_TI=y
-CONFIG_YENTA_ENE_TUNE=y
-CONFIG_YENTA_TOSHIBA=y
-CONFIG_PD6729=m
-CONFIG_I82092=m
-CONFIG_PCCARD_NONSTATIC=m
-CONFIG_HOTPLUG_PCI=m
-CONFIG_HOTPLUG_PCI_FAKE=m
-CONFIG_HOTPLUG_PCI_ACPI=m
-CONFIG_HOTPLUG_PCI_ACPI_IBM=m
-CONFIG_HOTPLUG_PCI_CPCI=y
-CONFIG_HOTPLUG_PCI_CPCI_ZT5550=m
-CONFIG_HOTPLUG_PCI_CPCI_GENERIC=m
-CONFIG_HOTPLUG_PCI_SHPC=m
-
-#
-# Executable file formats / Emulations
-#
-CONFIG_BINFMT_ELF=y
-CONFIG_COMPAT_BINFMT_ELF=y
-# CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set
-# CONFIG_HAVE_AOUT is not set
-CONFIG_BINFMT_MISC=m
-CONFIG_IA32_EMULATION=y
-# CONFIG_IA32_AOUT is not set
-CONFIG_COMPAT=y
-CONFIG_COMPAT_FOR_U64_ALIGNMENT=y
-CONFIG_SYSVIPC_COMPAT=y
-CONFIG_NET=y
-CONFIG_COMPAT_NETLINK_MESSAGES=y
-
-#
-# Networking options
-#
-CONFIG_PACKET=m
-# CONFIG_PACKET_MMAP is not set
-CONFIG_UNIX=y
-CONFIG_XFRM=y
-CONFIG_XFRM_USER=m
-CONFIG_XFRM_SUB_POLICY=y
-CONFIG_XFRM_MIGRATE=y
-# CONFIG_XFRM_STATISTICS is not set
-CONFIG_XFRM_IPCOMP=m
-CONFIG_NET_KEY=m
-CONFIG_NET_KEY_MIGRATE=y
-CONFIG_INET=y
-CONFIG_IP_MULTICAST=y
-CONFIG_IP_ADVANCED_ROUTER=y
-CONFIG_ASK_IP_FIB_HASH=y
-# CONFIG_IP_FIB_TRIE is not set
-CONFIG_IP_FIB_HASH=y
-CONFIG_IP_MULTIPLE_TABLES=y
-CONFIG_IP_ROUTE_MULTIPATH=y
-CONFIG_IP_ROUTE_VERBOSE=y
-CONFIG_IP_PNP=y
-CONFIG_IP_PNP_DHCP=y
-CONFIG_IP_PNP_BOOTP=y
-CONFIG_IP_PNP_RARP=y
-CONFIG_NET_IPIP=m
-CONFIG_NET_IPGRE=m
-CONFIG_NET_IPGRE_BROADCAST=y
-CONFIG_IP_MROUTE=y
-# CONFIG_IP_PIMSM_V1 is not set
-CONFIG_IP_PIMSM_V2=y
-CONFIG_ARPD=y
-CONFIG_SYN_COOKIES=y
-CONFIG_INET_AH=m
-CONFIG_INET_ESP=m
-CONFIG_INET_IPCOMP=m
-CONFIG_INET_XFRM_TUNNEL=m
-CONFIG_INET_TUNNEL=m
-CONFIG_INET_XFRM_MODE_TRANSPORT=m
-CONFIG_INET_XFRM_MODE_TUNNEL=m
-CONFIG_INET_XFRM_MODE_BEET=m
-CONFIG_INET_LRO=y
-CONFIG_INET_DIAG=m
-CONFIG_INET_TCP_DIAG=m
-CONFIG_TCP_CONG_ADVANCED=y
-CONFIG_TCP_CONG_BIC=m
-CONFIG_TCP_CONG_CUBIC=y
-CONFIG_TCP_CONG_WESTWOOD=m
-CONFIG_TCP_CONG_HTCP=m
-CONFIG_TCP_CONG_HSTCP=m
-CONFIG_TCP_CONG_HYBLA=m
-CONFIG_TCP_CONG_VEGAS=m
-CONFIG_TCP_CONG_SCALABLE=m
-CONFIG_TCP_CONG_LP=m
-CONFIG_TCP_CONG_VENO=m
-CONFIG_TCP_CONG_YEAH=m
-CONFIG_TCP_CONG_ILLINOIS=m
-# CONFIG_DEFAULT_BIC is not set
-CONFIG_DEFAULT_CUBIC=y
-# CONFIG_DEFAULT_HTCP is not set
-# CONFIG_DEFAULT_VEGAS is not set
-# CONFIG_DEFAULT_WESTWOOD is not set
-# CONFIG_DEFAULT_RENO is not set
-CONFIG_DEFAULT_TCP_CONG="cubic"
-CONFIG_TCP_MD5SIG=y
-CONFIG_IPV6=m
-CONFIG_IPV6_PRIVACY=y
-CONFIG_IPV6_ROUTER_PREF=y
-CONFIG_IPV6_ROUTE_INFO=y
-# CONFIG_IPV6_OPTIMISTIC_DAD is not set
-CONFIG_INET6_AH=m
-CONFIG_INET6_ESP=m
-CONFIG_INET6_IPCOMP=m
-CONFIG_IPV6_MIP6=m
-CONFIG_INET6_XFRM_TUNNEL=m
-CONFIG_INET6_TUNNEL=m
-CONFIG_INET6_XFRM_MODE_TRANSPORT=m
-CONFIG_INET6_XFRM_MODE_TUNNEL=m
-CONFIG_INET6_XFRM_MODE_BEET=m
-CONFIG_INET6_XFRM_MODE_ROUTEOPTIMIZATION=m
-CONFIG_IPV6_SIT=m
-CONFIG_IPV6_NDISC_NODETYPE=y
-CONFIG_IPV6_TUNNEL=m
-CONFIG_IPV6_MULTIPLE_TABLES=y
-CONFIG_IPV6_SUBTREES=y
-CONFIG_IPV6_MROUTE=y
-CONFIG_IPV6_PIMSM_V2=y
-CONFIG_NETLABEL=y
-CONFIG_NETWORK_SECMARK=y
-CONFIG_NETFILTER=y
-# CONFIG_NETFILTER_DEBUG is not set
-CONFIG_NETFILTER_ADVANCED=y
-CONFIG_BRIDGE_NETFILTER=y
-
-#
-# Core Netfilter Configuration
-#
-CONFIG_NETFILTER_NETLINK=m
-CONFIG_NETFILTER_NETLINK_QUEUE=m
-CONFIG_NETFILTER_NETLINK_LOG=m
-CONFIG_NF_CONNTRACK=m
-CONFIG_NF_CT_ACCT=y
-CONFIG_NF_CONNTRACK_MARK=y
-CONFIG_NF_CONNTRACK_SECMARK=y
-CONFIG_NF_CONNTRACK_EVENTS=y
-CONFIG_NF_CT_PROTO_DCCP=m
-CONFIG_NF_CT_PROTO_GRE=m
-CONFIG_NF_CT_PROTO_SCTP=m
-CONFIG_NF_CT_PROTO_UDPLITE=m
-CONFIG_NF_CONNTRACK_AMANDA=m
-CONFIG_NF_CONNTRACK_FTP=m
-CONFIG_NF_CONNTRACK_H323=m
-CONFIG_NF_CONNTRACK_IRC=m
-CONFIG_NF_CONNTRACK_NETBIOS_NS=m
-CONFIG_NF_CONNTRACK_PPTP=m
-CONFIG_NF_CONNTRACK_SANE=m
-CONFIG_NF_CONNTRACK_SIP=m
-CONFIG_NF_CONNTRACK_TFTP=m
-CONFIG_NF_CT_NETLINK=m
-CONFIG_NETFILTER_TPROXY=m
-CONFIG_NETFILTER_XTABLES=m
-CONFIG_NETFILTER_XT_TARGET_CLASSIFY=m
-CONFIG_NETFILTER_XT_TARGET_CONNMARK=m
-CONFIG_NETFILTER_XT_TARGET_CONNSECMARK=m
-CONFIG_NETFILTER_XT_TARGET_DSCP=m
-CONFIG_NETFILTER_XT_TARGET_HL=m
-CONFIG_NETFILTER_XT_TARGET_LED=m
-CONFIG_NETFILTER_XT_TARGET_MARK=m
-CONFIG_NETFILTER_XT_TARGET_NFLOG=m
-CONFIG_NETFILTER_XT_TARGET_NFQUEUE=m
-CONFIG_NETFILTER_XT_TARGET_NOTRACK=m
-CONFIG_NETFILTER_XT_TARGET_RATEEST=m
-CONFIG_NETFILTER_XT_TARGET_TPROXY=m
-CONFIG_NETFILTER_XT_TARGET_TRACE=m
-CONFIG_NETFILTER_XT_TARGET_SECMARK=m
-CONFIG_NETFILTER_XT_TARGET_TCPMSS=m
-CONFIG_NETFILTER_XT_TARGET_TCPOPTSTRIP=m
-CONFIG_NETFILTER_XT_MATCH_CLUSTER=m
-CONFIG_NETFILTER_XT_MATCH_COMMENT=m
-CONFIG_NETFILTER_XT_MATCH_CONNBYTES=m
-CONFIG_NETFILTER_XT_MATCH_CONNLIMIT=m
-CONFIG_NETFILTER_XT_MATCH_CONNMARK=m
-CONFIG_NETFILTER_XT_MATCH_CONNTRACK=m
-CONFIG_NETFILTER_XT_MATCH_DCCP=m
-CONFIG_NETFILTER_XT_MATCH_DSCP=m
-CONFIG_NETFILTER_XT_MATCH_ESP=m
-CONFIG_NETFILTER_XT_MATCH_HASHLIMIT=m
-CONFIG_NETFILTER_XT_MATCH_HELPER=m
-CONFIG_NETFILTER_XT_MATCH_HL=m
-CONFIG_NETFILTER_XT_MATCH_IPRANGE=m
-CONFIG_NETFILTER_XT_MATCH_LENGTH=m
-CONFIG_NETFILTER_XT_MATCH_LIMIT=m
-CONFIG_NETFILTER_XT_MATCH_MAC=m
-CONFIG_NETFILTER_XT_MATCH_MARK=m
-CONFIG_NETFILTER_XT_MATCH_MULTIPORT=m
-CONFIG_NETFILTER_XT_MATCH_OWNER=m
-CONFIG_NETFILTER_XT_MATCH_POLICY=m
-CONFIG_NETFILTER_XT_MATCH_PHYSDEV=m
-CONFIG_NETFILTER_XT_MATCH_PKTTYPE=m
-CONFIG_NETFILTER_XT_MATCH_QUOTA=m
-CONFIG_NETFILTER_XT_MATCH_RATEEST=m
-CONFIG_NETFILTER_XT_MATCH_REALM=m
-CONFIG_NETFILTER_XT_MATCH_RECENT=m
-# CONFIG_NETFILTER_XT_MATCH_RECENT_PROC_COMPAT is not set
-CONFIG_NETFILTER_XT_MATCH_SCTP=m
-CONFIG_NETFILTER_XT_MATCH_SOCKET=m
-CONFIG_NETFILTER_XT_MATCH_STATE=m
-CONFIG_NETFILTER_XT_MATCH_STATISTIC=m
-CONFIG_NETFILTER_XT_MATCH_STRING=m
-CONFIG_NETFILTER_XT_MATCH_TCPMSS=m
-CONFIG_NETFILTER_XT_MATCH_TIME=m
-CONFIG_NETFILTER_XT_MATCH_U32=m
-CONFIG_NETFILTER_XT_MATCH_OSF=m
-CONFIG_IP_VS=m
-CONFIG_IP_VS_IPV6=y
-# CONFIG_IP_VS_DEBUG is not set
-CONFIG_IP_VS_TAB_BITS=12
-
-#
-# IPVS transport protocol load balancing support
-#
-CONFIG_IP_VS_PROTO_TCP=y
-CONFIG_IP_VS_PROTO_UDP=y
-CONFIG_IP_VS_PROTO_AH_ESP=y
-CONFIG_IP_VS_PROTO_ESP=y
-CONFIG_IP_VS_PROTO_AH=y
-
-#
-# IPVS scheduler
-#
-CONFIG_IP_VS_RR=m
-CONFIG_IP_VS_WRR=m
-CONFIG_IP_VS_LC=m
-CONFIG_IP_VS_WLC=m
-CONFIG_IP_VS_LBLC=m
-CONFIG_IP_VS_LBLCR=m
-CONFIG_IP_VS_DH=m
-CONFIG_IP_VS_SH=m
-CONFIG_IP_VS_SED=m
-CONFIG_IP_VS_NQ=m
-
-#
-# IPVS application helper
-#
-CONFIG_IP_VS_FTP=m
-
-#
-# IP: Netfilter Configuration
-#
-CONFIG_NF_DEFRAG_IPV4=m
-CONFIG_NF_CONNTRACK_IPV4=m
-CONFIG_NF_CONNTRACK_PROC_COMPAT=y
-CONFIG_IP_NF_QUEUE=m
-CONFIG_IP_NF_IPTABLES=m
-CONFIG_IP_NF_MATCH_ADDRTYPE=m
-CONFIG_IP_NF_MATCH_AH=m
-CONFIG_IP_NF_MATCH_ECN=m
-CONFIG_IP_NF_MATCH_TTL=m
-CONFIG_IP_NF_FILTER=m
-CONFIG_IP_NF_TARGET_REJECT=m
-CONFIG_IP_NF_TARGET_LOG=m
-CONFIG_IP_NF_TARGET_ULOG=m
-CONFIG_NF_NAT=m
-CONFIG_NF_NAT_NEEDED=y
-CONFIG_IP_NF_TARGET_MASQUERADE=m
-CONFIG_IP_NF_TARGET_NETMAP=m
-CONFIG_IP_NF_TARGET_REDIRECT=m
-CONFIG_NF_NAT_SNMP_BASIC=m
-CONFIG_NF_NAT_PROTO_DCCP=m
-CONFIG_NF_NAT_PROTO_GRE=m
-CONFIG_NF_NAT_PROTO_UDPLITE=m
-CONFIG_NF_NAT_PROTO_SCTP=m
-CONFIG_NF_NAT_FTP=m
-CONFIG_NF_NAT_IRC=m
-CONFIG_NF_NAT_TFTP=m
-CONFIG_NF_NAT_AMANDA=m
-CONFIG_NF_NAT_PPTP=m
-CONFIG_NF_NAT_H323=m
-CONFIG_NF_NAT_SIP=m
-CONFIG_IP_NF_MANGLE=m
-CONFIG_IP_NF_TARGET_CLUSTERIP=m
-CONFIG_IP_NF_TARGET_ECN=m
-CONFIG_IP_NF_TARGET_TTL=m
-CONFIG_IP_NF_RAW=m
-CONFIG_IP_NF_SECURITY=m
-CONFIG_IP_NF_ARPTABLES=m
-CONFIG_IP_NF_ARPFILTER=m
-CONFIG_IP_NF_ARP_MANGLE=m
-
-#
-# IPv6: Netfilter Configuration
-#
-CONFIG_NF_CONNTRACK_IPV6=m
-CONFIG_IP6_NF_QUEUE=m
-CONFIG_IP6_NF_IPTABLES=m
-CONFIG_IP6_NF_MATCH_AH=m
-CONFIG_IP6_NF_MATCH_EUI64=m
-CONFIG_IP6_NF_MATCH_FRAG=m
-CONFIG_IP6_NF_MATCH_OPTS=m
-CONFIG_IP6_NF_MATCH_HL=m
-CONFIG_IP6_NF_MATCH_IPV6HEADER=m
-CONFIG_IP6_NF_MATCH_MH=m
-CONFIG_IP6_NF_MATCH_RT=m
-CONFIG_IP6_NF_TARGET_HL=m
-CONFIG_IP6_NF_TARGET_LOG=m
-CONFIG_IP6_NF_FILTER=m
-CONFIG_IP6_NF_TARGET_REJECT=m
-CONFIG_IP6_NF_MANGLE=m
-CONFIG_IP6_NF_RAW=m
-CONFIG_IP6_NF_SECURITY=m
-
-#
-# DECnet: Netfilter Configuration
-#
-CONFIG_DECNET_NF_GRABULATOR=m
-CONFIG_BRIDGE_NF_EBTABLES=m
-CONFIG_BRIDGE_EBT_BROUTE=m
-CONFIG_BRIDGE_EBT_T_FILTER=m
-CONFIG_BRIDGE_EBT_T_NAT=m
-CONFIG_BRIDGE_EBT_802_3=m
-CONFIG_BRIDGE_EBT_AMONG=m
-CONFIG_BRIDGE_EBT_ARP=m
-CONFIG_BRIDGE_EBT_IP=m
-CONFIG_BRIDGE_EBT_IP6=m
-CONFIG_BRIDGE_EBT_LIMIT=m
-CONFIG_BRIDGE_EBT_MARK=m
-CONFIG_BRIDGE_EBT_PKTTYPE=m
-CONFIG_BRIDGE_EBT_STP=m
-CONFIG_BRIDGE_EBT_VLAN=m
-CONFIG_BRIDGE_EBT_ARPREPLY=m
-CONFIG_BRIDGE_EBT_DNAT=m
-CONFIG_BRIDGE_EBT_MARK_T=m
-CONFIG_BRIDGE_EBT_REDIRECT=m
-CONFIG_BRIDGE_EBT_SNAT=m
-CONFIG_BRIDGE_EBT_LOG=m
-CONFIG_BRIDGE_EBT_ULOG=m
-CONFIG_BRIDGE_EBT_NFLOG=m
-CONFIG_IP_DCCP=m
-CONFIG_INET_DCCP_DIAG=m
-
-#
-# DCCP CCIDs Configuration (EXPERIMENTAL)
-#
-# CONFIG_IP_DCCP_CCID2_DEBUG is not set
-CONFIG_IP_DCCP_CCID3=y
-# CONFIG_IP_DCCP_CCID3_DEBUG is not set
-CONFIG_IP_DCCP_CCID3_RTO=100
-CONFIG_IP_DCCP_TFRC_LIB=y
-CONFIG_IP_SCTP=m
-# CONFIG_SCTP_DBG_MSG is not set
-# CONFIG_SCTP_DBG_OBJCNT is not set
-# CONFIG_SCTP_HMAC_NONE is not set
-CONFIG_SCTP_HMAC_SHA1=y
-# CONFIG_SCTP_HMAC_MD5 is not set
-CONFIG_RDS=m
-# CONFIG_RDS_RDMA is not set
-# CONFIG_RDS_TCP is not set
-# CONFIG_RDS_DEBUG is not set
-CONFIG_TIPC=m
-# CONFIG_TIPC_ADVANCED is not set
-# CONFIG_TIPC_DEBUG is not set
-CONFIG_ATM=m
-CONFIG_ATM_CLIP=m
-# CONFIG_ATM_CLIP_NO_ICMP is not set
-CONFIG_ATM_LANE=m
-CONFIG_ATM_MPOA=m
-CONFIG_ATM_BR2684=m
-# CONFIG_ATM_BR2684_IPFILTER is not set
-CONFIG_STP=m
-CONFIG_BRIDGE=m
-# CONFIG_NET_DSA is not set
-CONFIG_VLAN_8021Q=m
-# CONFIG_VLAN_8021Q_GVRP is not set
-CONFIG_DECNET=m
-CONFIG_DECNET_ROUTER=y
-CONFIG_LLC=m
-CONFIG_LLC2=m
-CONFIG_IPX=m
-# CONFIG_IPX_INTERN is not set
-CONFIG_ATALK=m
-CONFIG_DEV_APPLETALK=m
-CONFIG_IPDDP=m
-CONFIG_IPDDP_ENCAP=y
-CONFIG_IPDDP_DECAP=y
-CONFIG_X25=m
-CONFIG_LAPB=m
-CONFIG_ECONET=m
-CONFIG_ECONET_AUNUDP=y
-CONFIG_ECONET_NATIVE=y
-CONFIG_WAN_ROUTER=m
-CONFIG_PHONET=m
-CONFIG_IEEE802154=m
-CONFIG_NET_SCHED=y
-
-#
-# Queueing/Scheduling
-#
-CONFIG_NET_SCH_CBQ=m
-CONFIG_NET_SCH_HTB=m
-CONFIG_NET_SCH_HFSC=m
-CONFIG_NET_SCH_ATM=m
-CONFIG_NET_SCH_PRIO=m
-CONFIG_NET_SCH_MULTIQ=m
-CONFIG_NET_SCH_RED=m
-CONFIG_NET_SCH_SFQ=m
-CONFIG_NET_SCH_TEQL=m
-CONFIG_NET_SCH_TBF=m
-CONFIG_NET_SCH_GRED=m
-CONFIG_NET_SCH_DSMARK=m
-CONFIG_NET_SCH_NETEM=m
-CONFIG_NET_SCH_DRR=m
-CONFIG_NET_SCH_INGRESS=m
-# CONFIG_NET_SCH_PLUG is not set
-
-#
-# Classification
-#
-CONFIG_NET_CLS=y
-CONFIG_NET_CLS_BASIC=m
-CONFIG_NET_CLS_TCINDEX=m
-CONFIG_NET_CLS_ROUTE4=m
-CONFIG_NET_CLS_ROUTE=y
-CONFIG_NET_CLS_FW=m
-CONFIG_NET_CLS_U32=m
-CONFIG_CLS_U32_PERF=y
-CONFIG_CLS_U32_MARK=y
-CONFIG_NET_CLS_RSVP=m
-CONFIG_NET_CLS_RSVP6=m
-CONFIG_NET_CLS_FLOW=m
-CONFIG_NET_EMATCH=y
-CONFIG_NET_EMATCH_STACK=32
-CONFIG_NET_EMATCH_CMP=m
-CONFIG_NET_EMATCH_NBYTE=m
-CONFIG_NET_EMATCH_U32=m
-CONFIG_NET_EMATCH_META=m
-CONFIG_NET_EMATCH_TEXT=m
-CONFIG_NET_CLS_ACT=y
-CONFIG_NET_ACT_POLICE=m
-CONFIG_NET_ACT_GACT=m
-CONFIG_GACT_PROB=y
-CONFIG_NET_ACT_MIRRED=m
-CONFIG_NET_ACT_IPT=m
-CONFIG_NET_ACT_NAT=m
-CONFIG_NET_ACT_PEDIT=m
-CONFIG_NET_ACT_SIMP=m
-CONFIG_NET_ACT_SKBEDIT=m
-# CONFIG_NET_CLS_IND is not set
-CONFIG_NET_SCH_FIFO=y
-# CONFIG_DCB is not set
-
-#
-# Network testing
-#
-CONFIG_NET_PKTGEN=m
-# CONFIG_HAMRADIO is not set
-CONFIG_CAN=m
-CONFIG_CAN_RAW=m
-CONFIG_CAN_BCM=m
-
-#
-# CAN Device Drivers
-#
-CONFIG_CAN_VCAN=m
-CONFIG_CAN_DEV=m
-# CONFIG_CAN_CALC_BITTIMING is not set
-CONFIG_CAN_SJA1000=m
-CONFIG_CAN_SJA1000_PLATFORM=m
-CONFIG_CAN_EMS_PCI=m
-CONFIG_CAN_KVASER_PCI=m
-
-#
-# CAN USB interfaces
-#
-# CONFIG_CAN_EMS_USB is not set
-# CONFIG_CAN_DEBUG_DEVICES is not set
-CONFIG_IRDA=m
-
-#
-# IrDA protocols
-#
-CONFIG_IRLAN=m
-CONFIG_IRNET=m
-CONFIG_IRCOMM=m
-CONFIG_IRDA_ULTRA=y
-
-#
-# IrDA options
-#
-CONFIG_IRDA_CACHE_LAST_LSAP=y
-CONFIG_IRDA_FAST_RR=y
-# CONFIG_IRDA_DEBUG is not set
-
-#
-# Infrared-port device drivers
-#
-
-#
-# SIR device drivers
-#
-CONFIG_IRTTY_SIR=m
-
-#
-# Dongle support
-#
-CONFIG_DONGLE=y
-CONFIG_ESI_DONGLE=m
-CONFIG_ACTISYS_DONGLE=m
-CONFIG_TEKRAM_DONGLE=m
-CONFIG_TOIM3232_DONGLE=m
-CONFIG_LITELINK_DONGLE=m
-CONFIG_MA600_DONGLE=m
-CONFIG_GIRBIL_DONGLE=m
-CONFIG_MCP2120_DONGLE=m
-CONFIG_OLD_BELKIN_DONGLE=m
-CONFIG_ACT200L_DONGLE=m
-CONFIG_KINGSUN_DONGLE=m
-CONFIG_KSDAZZLE_DONGLE=m
-CONFIG_KS959_DONGLE=m
-
-#
-# FIR device drivers
-#
-CONFIG_USB_IRDA=m
-CONFIG_SIGMATEL_FIR=m
-CONFIG_NSC_FIR=m
-CONFIG_WINBOND_FIR=m
-CONFIG_SMC_IRCC_FIR=m
-CONFIG_ALI_FIR=m
-CONFIG_VLSI_FIR=m
-CONFIG_VIA_FIR=m
-CONFIG_MCS_FIR=m
-CONFIG_BT=m
-CONFIG_BT_L2CAP=m
-CONFIG_BT_SCO=m
-CONFIG_BT_RFCOMM=m
-CONFIG_BT_RFCOMM_TTY=y
-CONFIG_BT_BNEP=m
-CONFIG_BT_BNEP_MC_FILTER=y
-CONFIG_BT_BNEP_PROTO_FILTER=y
-CONFIG_BT_CMTP=m
-CONFIG_BT_HIDP=m
-
-#
-# Bluetooth device drivers
-#
-CONFIG_BT_HCIBTUSB=m
-CONFIG_BT_HCIBTSDIO=m
-CONFIG_BT_HCIUART=m
-CONFIG_BT_HCIUART_H4=y
-CONFIG_BT_HCIUART_BCSP=y
-CONFIG_BT_HCIUART_LL=y
-CONFIG_BT_HCIBCM203X=m
-CONFIG_BT_HCIBPA10X=m
-CONFIG_BT_HCIBFUSB=m
-CONFIG_BT_HCIDTL1=m
-CONFIG_BT_HCIBT3C=m
-CONFIG_BT_HCIBLUECARD=m
-CONFIG_BT_HCIBTUART=m
-CONFIG_BT_HCIVHCI=m
-# CONFIG_BT_MRVL is not set
-CONFIG_AF_RXRPC=m
-# CONFIG_AF_RXRPC_DEBUG is not set
-CONFIG_RXKAD=m
-CONFIG_FIB_RULES=y
-CONFIG_WIRELESS=y
-CONFIG_CFG80211=m
-# CONFIG_NL80211_TESTMODE is not set
-# CONFIG_CFG80211_DEVELOPER_WARNINGS is not set
-# CONFIG_CFG80211_REG_DEBUG is not set
-CONFIG_CFG80211_DEFAULT_PS=y
-CONFIG_CFG80211_DEFAULT_PS_VALUE=1
-# CONFIG_CFG80211_DEBUGFS is not set
-# CONFIG_WIRELESS_OLD_REGULATORY is not set
-CONFIG_WIRELESS_EXT=y
-CONFIG_WIRELESS_EXT_SYSFS=y
-CONFIG_LIB80211=m
-# CONFIG_LIB80211_DEBUG is not set
-CONFIG_MAC80211=m
-CONFIG_MAC80211_HAS_RC=y
-CONFIG_MAC80211_RC_PID=y
-CONFIG_MAC80211_RC_MINSTREL=y
-CONFIG_MAC80211_RC_DEFAULT_PID=y
-# CONFIG_MAC80211_RC_DEFAULT_MINSTREL is not set
-CONFIG_MAC80211_RC_DEFAULT="pid"
-# CONFIG_MAC80211_MESH is not set
-CONFIG_MAC80211_LEDS=y
-# CONFIG_MAC80211_DEBUGFS is not set
-# CONFIG_MAC80211_DEBUG_MENU is not set
-CONFIG_WIMAX=m
-CONFIG_WIMAX_DEBUG_LEVEL=8
-CONFIG_RFKILL=m
-CONFIG_RFKILL_LEDS=y
-# CONFIG_RFKILL_INPUT is not set
-CONFIG_NET_9P=m
-CONFIG_NET_9P_VIRTIO=m
-CONFIG_NET_9P_RDMA=m
-# CONFIG_NET_9P_DEBUG is not set
-
-#
-# Device Drivers
-#
-
-#
-# Generic Driver Options
-#
-CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
-# CONFIG_DEVTMPFS is not set
-CONFIG_STANDALONE=y
-# CONFIG_PREVENT_FIRMWARE_BUILD is not set
-CONFIG_FW_LOADER=m
-# CONFIG_FIRMWARE_IN_KERNEL is not set
-CONFIG_EXTRA_FIRMWARE=""
-CONFIG_SYS_HYPERVISOR=y
-CONFIG_CONNECTOR=m
-CONFIG_MTD=m
-# CONFIG_MTD_DEBUG is not set
-CONFIG_MTD_TESTS=m
-CONFIG_MTD_CONCAT=m
-CONFIG_MTD_PARTITIONS=y
-CONFIG_MTD_REDBOOT_PARTS=m
-CONFIG_MTD_REDBOOT_DIRECTORY_BLOCK=-1
-# CONFIG_MTD_REDBOOT_PARTS_UNALLOCATED is not set
-# CONFIG_MTD_REDBOOT_PARTS_READONLY is not set
-CONFIG_MTD_AR7_PARTS=m
-
-#
-# User Modules And Translation Layers
-#
-CONFIG_MTD_CHAR=m
-CONFIG_HAVE_MTD_OTP=y
-CONFIG_MTD_BLKDEVS=m
-CONFIG_MTD_BLOCK=m
-CONFIG_MTD_BLOCK_RO=m
-CONFIG_FTL=m
-CONFIG_NFTL=m
-CONFIG_NFTL_RW=y
-CONFIG_INFTL=m
-CONFIG_RFD_FTL=m
-CONFIG_SSFDC=m
-CONFIG_MTD_OOPS=m
-
-#
-# RAM/ROM/Flash chip drivers
-#
-CONFIG_MTD_CFI=m
-CONFIG_MTD_JEDECPROBE=m
-CONFIG_MTD_GEN_PROBE=m
-# CONFIG_MTD_CFI_ADV_OPTIONS is not set
-CONFIG_MTD_MAP_BANK_WIDTH_1=y
-CONFIG_MTD_MAP_BANK_WIDTH_2=y
-CONFIG_MTD_MAP_BANK_WIDTH_4=y
-# CONFIG_MTD_MAP_BANK_WIDTH_8 is not set
-# CONFIG_MTD_MAP_BANK_WIDTH_16 is not set
-# CONFIG_MTD_MAP_BANK_WIDTH_32 is not set
-CONFIG_MTD_CFI_I1=y
-CONFIG_MTD_CFI_I2=y
-# CONFIG_MTD_CFI_I4 is not set
-# CONFIG_MTD_CFI_I8 is not set
-CONFIG_MTD_CFI_INTELEXT=m
-CONFIG_MTD_CFI_AMDSTD=m
-CONFIG_MTD_CFI_STAA=m
-CONFIG_MTD_CFI_UTIL=m
-CONFIG_MTD_RAM=m
-CONFIG_MTD_ROM=m
-CONFIG_MTD_ABSENT=m
-
-#
-# Mapping drivers for chip access
-#
-CONFIG_MTD_COMPLEX_MAPPINGS=y
-CONFIG_MTD_PHYSMAP=m
-# CONFIG_MTD_PHYSMAP_COMPAT is not set
-CONFIG_MTD_SC520CDP=m
-CONFIG_MTD_NETSC520=m
-CONFIG_MTD_TS5500=m
-CONFIG_MTD_SBC_GXX=m
-CONFIG_MTD_AMD76XROM=m
-CONFIG_MTD_ICHXROM=m
-CONFIG_MTD_ESB2ROM=m
-CONFIG_MTD_CK804XROM=m
-CONFIG_MTD_SCB2_FLASH=m
-CONFIG_MTD_NETtel=m
-CONFIG_MTD_L440GX=m
-CONFIG_MTD_PCI=m
-# CONFIG_MTD_GPIO_ADDR is not set
-CONFIG_MTD_INTEL_VR_NOR=m
-CONFIG_MTD_PLATRAM=m
-
-#
-# Self-contained MTD device drivers
-#
-CONFIG_MTD_PMC551=m
-CONFIG_MTD_PMC551_BUGFIX=y
-# CONFIG_MTD_PMC551_DEBUG is not set
-CONFIG_MTD_DATAFLASH=m
-# CONFIG_MTD_DATAFLASH_WRITE_VERIFY is not set
-# CONFIG_MTD_DATAFLASH_OTP is not set
-CONFIG_MTD_M25P80=m
-CONFIG_M25PXX_USE_FAST_READ=y
-# CONFIG_MTD_SST25L is not set
-CONFIG_MTD_SLRAM=m
-CONFIG_MTD_PHRAM=m
-CONFIG_MTD_MTDRAM=m
-CONFIG_MTDRAM_TOTAL_SIZE=4096
-CONFIG_MTDRAM_ERASE_SIZE=128
-CONFIG_MTD_BLOCK2MTD=m
-
-#
-# Disk-On-Chip Device Drivers
-#
-CONFIG_MTD_DOC2000=m
-CONFIG_MTD_DOC2001=m
-CONFIG_MTD_DOC2001PLUS=m
-CONFIG_MTD_DOCPROBE=m
-CONFIG_MTD_DOCECC=m
-CONFIG_MTD_DOCPROBE_ADVANCED=y
-CONFIG_MTD_DOCPROBE_ADDRESS=0x0000
-# CONFIG_MTD_DOCPROBE_HIGH is not set
-# CONFIG_MTD_DOCPROBE_55AA is not set
-CONFIG_MTD_NAND=m
-# CONFIG_MTD_NAND_VERIFY_WRITE is not set
-CONFIG_MTD_NAND_ECC_SMC=y
-# CONFIG_MTD_NAND_MUSEUM_IDS is not set
-CONFIG_MTD_NAND_IDS=m
-CONFIG_MTD_NAND_DISKONCHIP=m
-# CONFIG_MTD_NAND_DISKONCHIP_PROBE_ADVANCED is not set
-CONFIG_MTD_NAND_DISKONCHIP_PROBE_ADDRESS=0
-# CONFIG_MTD_NAND_DISKONCHIP_BBTWRITE is not set
-CONFIG_MTD_NAND_CAFE=m
-CONFIG_MTD_NAND_NANDSIM=m
-CONFIG_MTD_NAND_PLATFORM=m
-CONFIG_MTD_ALAUDA=m
-CONFIG_MTD_ONENAND=m
-# CONFIG_MTD_ONENAND_VERIFY_WRITE is not set
-# CONFIG_MTD_ONENAND_GENERIC is not set
-CONFIG_MTD_ONENAND_OTP=y
-CONFIG_MTD_ONENAND_2X_PROGRAM=y
-CONFIG_MTD_ONENAND_SIM=m
-
-#
-# LPDDR flash memory drivers
-#
-CONFIG_MTD_LPDDR=m
-CONFIG_MTD_QINFO_PROBE=m
-
-#
-# UBI - Unsorted block images
-#
-CONFIG_MTD_UBI=m
-CONFIG_MTD_UBI_WL_THRESHOLD=4096
-CONFIG_MTD_UBI_BEB_RESERVE=1
-# CONFIG_MTD_UBI_GLUEBI is not set
-
-#
-# UBI debugging options
-#
-# CONFIG_MTD_UBI_DEBUG is not set
-CONFIG_PARPORT=m
-CONFIG_PARPORT_PC=m
-CONFIG_PARPORT_SERIAL=m
-# CONFIG_PARPORT_PC_FIFO is not set
-# CONFIG_PARPORT_PC_SUPERIO is not set
-CONFIG_PARPORT_PC_PCMCIA=m
-# CONFIG_PARPORT_GSC is not set
-CONFIG_PARPORT_AX88796=m
-# CONFIG_PARPORT_1284 is not set
-CONFIG_PARPORT_NOT_PC=y
-CONFIG_PNP=y
-# CONFIG_PNP_DEBUG_MESSAGES is not set
-
-#
-# Protocols
-#
-CONFIG_PNPACPI=y
-CONFIG_BLK_DEV=y
-CONFIG_BLK_DEV_FD=m
-# CONFIG_PARIDE is not set
-CONFIG_BLK_CPQ_DA=m
-CONFIG_BLK_CPQ_CISS_DA=m
-CONFIG_CISS_SCSI_TAPE=y
-CONFIG_BLK_DEV_DAC960=m
-CONFIG_BLK_DEV_UMEM=m
-# CONFIG_BLK_DEV_COW_COMMON is not set
-CONFIG_BLK_DEV_LOOP=m
-CONFIG_BLK_DEV_CRYPTOLOOP=m
-CONFIG_BLK_DEV_NBD=m
-CONFIG_BLK_DEV_OSD=m
-CONFIG_BLK_DEV_SX8=m
-CONFIG_BLK_DEV_UB=m
-CONFIG_BLK_DEV_RAM=y
-CONFIG_BLK_DEV_RAM_COUNT=16
-CONFIG_BLK_DEV_RAM_SIZE=4096
-# CONFIG_BLK_DEV_XIP is not set
-CONFIG_CDROM_PKTCDVD=m
-CONFIG_CDROM_PKTCDVD_BUFFERS=8
-# CONFIG_CDROM_PKTCDVD_WCACHE is not set
-CONFIG_ATA_OVER_ETH=m
-CONFIG_XEN_BLKDEV_FRONTEND=y
-CONFIG_VIRTIO_BLK=m
-# CONFIG_BLK_DEV_HD is not set
-CONFIG_MISC_DEVICES=y
-CONFIG_IBM_ASM=m
-CONFIG_PHANTOM=m
-CONFIG_SGI_IOC4=m
-CONFIG_TIFM_CORE=m
-CONFIG_TIFM_7XX1=m
-CONFIG_ICS932S401=m
-CONFIG_ENCLOSURE_SERVICES=m
-CONFIG_HP_ILO=m
-CONFIG_DELL_LAPTOP=m
-CONFIG_ISL29003=m
-CONFIG_C2PORT=m
-CONFIG_C2PORT_DURAMAR_2150=m
-
-#
-# EEPROM support
-#
-CONFIG_EEPROM_AT24=m
-CONFIG_EEPROM_AT25=m
-CONFIG_EEPROM_LEGACY=m
-CONFIG_EEPROM_MAX6875=m
-CONFIG_EEPROM_93CX6=m
-CONFIG_CB710_CORE=m
-# CONFIG_CB710_DEBUG is not set
-CONFIG_CB710_DEBUG_ASSUMPTIONS=y
-CONFIG_HAVE_IDE=y
-# CONFIG_IDE is not set
-
-#
-# SCSI device support
-#
-CONFIG_RAID_ATTRS=m
-CONFIG_SCSI=m
-CONFIG_SCSI_DMA=y
-CONFIG_SCSI_TGT=m
-CONFIG_SCSI_NETLINK=y
-CONFIG_SCSI_PROC_FS=y
-
-#
-# SCSI support type (disk, tape, CD-ROM)
-#
-CONFIG_BLK_DEV_SD=m
-CONFIG_CHR_DEV_ST=m
-CONFIG_CHR_DEV_OSST=m
-CONFIG_BLK_DEV_SR=m
-CONFIG_BLK_DEV_SR_VENDOR=y
-CONFIG_CHR_DEV_SG=m
-CONFIG_CHR_DEV_SCH=m
-CONFIG_SCSI_ENCLOSURE=m
-CONFIG_SCSI_MULTI_LUN=y
-# CONFIG_SCSI_CONSTANTS is not set
-# CONFIG_SCSI_LOGGING is not set
-CONFIG_SCSI_SCAN_ASYNC=y
-CONFIG_SCSI_WAIT_SCAN=m
-
-#
-# SCSI Transports
-#
-CONFIG_SCSI_SPI_ATTRS=m
-CONFIG_SCSI_FC_ATTRS=m
-CONFIG_SCSI_FC_TGT_ATTRS=y
-CONFIG_SCSI_ISCSI_ATTRS=m
-CONFIG_SCSI_SAS_ATTRS=m
-CONFIG_SCSI_SAS_LIBSAS=m
-CONFIG_SCSI_SAS_ATA=y
-CONFIG_SCSI_SAS_HOST_SMP=y
-# CONFIG_SCSI_SAS_LIBSAS_DEBUG is not set
-CONFIG_SCSI_SRP_ATTRS=m
-CONFIG_SCSI_SRP_TGT_ATTRS=y
-CONFIG_SCSI_LOWLEVEL=y
-CONFIG_ISCSI_TCP=m
-CONFIG_SCSI_CXGB3_ISCSI=m
-CONFIG_SCSI_BNX2_ISCSI=m
-# CONFIG_BE2ISCSI is not set
-CONFIG_BLK_DEV_3W_XXXX_RAID=m
-CONFIG_SCSI_3W_9XXX=m
-CONFIG_SCSI_ACARD=m
-CONFIG_SCSI_AACRAID=m
-CONFIG_SCSI_AIC7XXX=m
-CONFIG_AIC7XXX_CMDS_PER_DEVICE=32
-CONFIG_AIC7XXX_RESET_DELAY_MS=15000
-# CONFIG_AIC7XXX_BUILD_FIRMWARE is not set
-CONFIG_AIC7XXX_DEBUG_ENABLE=y
-CONFIG_AIC7XXX_DEBUG_MASK=0
-CONFIG_AIC7XXX_REG_PRETTY_PRINT=y
-CONFIG_SCSI_AIC7XXX_OLD=m
-CONFIG_SCSI_AIC79XX=m
-CONFIG_AIC79XX_CMDS_PER_DEVICE=32
-CONFIG_AIC79XX_RESET_DELAY_MS=15000
-# CONFIG_AIC79XX_BUILD_FIRMWARE is not set
-CONFIG_AIC79XX_DEBUG_ENABLE=y
-CONFIG_AIC79XX_DEBUG_MASK=0
-CONFIG_AIC79XX_REG_PRETTY_PRINT=y
-CONFIG_SCSI_AIC94XX=m
-# CONFIG_AIC94XX_DEBUG is not set
-CONFIG_SCSI_MVSAS=m
-CONFIG_SCSI_MVSAS_DEBUG=y
-CONFIG_SCSI_DPT_I2O=m
-CONFIG_SCSI_ADVANSYS=m
-CONFIG_SCSI_ARCMSR=m
-CONFIG_MEGARAID_NEWGEN=y
-CONFIG_MEGARAID_MM=m
-CONFIG_MEGARAID_MAILBOX=m
-CONFIG_MEGARAID_LEGACY=m
-CONFIG_MEGARAID_SAS=m
-CONFIG_SCSI_MPT2SAS=m
-CONFIG_SCSI_MPT2SAS_MAX_SGE=128
-# CONFIG_SCSI_MPT2SAS_LOGGING is not set
-CONFIG_SCSI_HPTIOP=m
-CONFIG_SCSI_BUSLOGIC=m
-CONFIG_LIBFC=m
-CONFIG_LIBFCOE=m
-CONFIG_FCOE=m
-CONFIG_FCOE_FNIC=m
-CONFIG_SCSI_DMX3191D=m
-CONFIG_SCSI_EATA=m
-# CONFIG_SCSI_EATA_TAGGED_QUEUE is not set
-# CONFIG_SCSI_EATA_LINKED_COMMANDS is not set
-CONFIG_SCSI_EATA_MAX_TAGS=16
-CONFIG_SCSI_FUTURE_DOMAIN=m
-CONFIG_SCSI_GDTH=m
-CONFIG_SCSI_IPS=m
-CONFIG_SCSI_INITIO=m
-CONFIG_SCSI_INIA100=m
-CONFIG_SCSI_PPA=m
-CONFIG_SCSI_IMM=m
-# CONFIG_SCSI_IZIP_EPP16 is not set
-# CONFIG_SCSI_IZIP_SLOW_CTR is not set
-CONFIG_SCSI_STEX=m
-CONFIG_SCSI_SYM53C8XX_2=m
-CONFIG_SCSI_SYM53C8XX_DMA_ADDRESSING_MODE=1
-CONFIG_SCSI_SYM53C8XX_DEFAULT_TAGS=16
-CONFIG_SCSI_SYM53C8XX_MAX_TAGS=64
-CONFIG_SCSI_SYM53C8XX_MMIO=y
-CONFIG_SCSI_IPR=m
-CONFIG_SCSI_IPR_TRACE=y
-# CONFIG_SCSI_IPR_DUMP is not set
-CONFIG_SCSI_QLOGIC_1280=m
-CONFIG_SCSI_QLA_FC=m
-CONFIG_SCSI_QLA_ISCSI=m
-CONFIG_SCSI_LPFC=m
-# CONFIG_SCSI_LPFC_DEBUG_FS is not set
-CONFIG_SCSI_DC395x=m
-CONFIG_SCSI_DC390T=m
-CONFIG_SCSI_DEBUG=m
-# CONFIG_SCSI_PMCRAID is not set
-CONFIG_SCSI_SRP=m
-# CONFIG_SCSI_BFA_FC is not set
-CONFIG_SCSI_LOWLEVEL_PCMCIA=y
-CONFIG_PCMCIA_FDOMAIN=m
-CONFIG_PCMCIA_QLOGIC=m
-CONFIG_PCMCIA_SYM53C500=m
-CONFIG_SCSI_DH=m
-CONFIG_SCSI_DH_RDAC=m
-CONFIG_SCSI_DH_HP_SW=m
-CONFIG_SCSI_DH_EMC=m
-CONFIG_SCSI_DH_ALUA=m
-CONFIG_SCSI_OSD_INITIATOR=m
-CONFIG_SCSI_OSD_ULD=m
-CONFIG_SCSI_OSD_DPRINT_SENSE=1
-# CONFIG_SCSI_OSD_DEBUG is not set
-CONFIG_ATA=m
-# CONFIG_ATA_NONSTANDARD is not set
-CONFIG_ATA_VERBOSE_ERROR=y
-CONFIG_ATA_ACPI=y
-CONFIG_SATA_PMP=y
-CONFIG_SATA_AHCI=m
-CONFIG_SATA_SIL24=m
-CONFIG_ATA_SFF=y
-CONFIG_SATA_SVW=m
-CONFIG_ATA_PIIX=m
-CONFIG_SATA_MV=m
-CONFIG_SATA_NV=m
-CONFIG_PDC_ADMA=m
-CONFIG_SATA_QSTOR=m
-CONFIG_SATA_PROMISE=m
-CONFIG_SATA_SX4=m
-CONFIG_SATA_SIL=m
-CONFIG_SATA_SIS=m
-CONFIG_SATA_ULI=m
-CONFIG_SATA_VIA=m
-CONFIG_SATA_VITESSE=m
-CONFIG_SATA_INIC162X=m
-CONFIG_PATA_ACPI=m
-CONFIG_PATA_ALI=m
-CONFIG_PATA_AMD=m
-CONFIG_PATA_ARTOP=m
-CONFIG_PATA_ATP867X=m
-CONFIG_PATA_ATIIXP=m
-CONFIG_PATA_CMD640_PCI=m
-CONFIG_PATA_CMD64X=m
-CONFIG_PATA_CS5520=m
-CONFIG_PATA_CS5530=m
-CONFIG_PATA_CYPRESS=m
-CONFIG_PATA_EFAR=m
-CONFIG_ATA_GENERIC=m
-CONFIG_PATA_HPT366=m
-CONFIG_PATA_HPT37X=m
-CONFIG_PATA_HPT3X2N=m
-CONFIG_PATA_HPT3X3=m
-CONFIG_PATA_HPT3X3_DMA=y
-CONFIG_PATA_IT821X=m
-CONFIG_PATA_IT8213=m
-CONFIG_PATA_JMICRON=m
-CONFIG_PATA_TRIFLEX=m
-CONFIG_PATA_MARVELL=m
-CONFIG_PATA_MPIIX=m
-CONFIG_PATA_OLDPIIX=m
-CONFIG_PATA_NETCELL=m
-CONFIG_PATA_NINJA32=m
-CONFIG_PATA_NS87410=m
-CONFIG_PATA_NS87415=m
-CONFIG_PATA_OPTI=m
-CONFIG_PATA_OPTIDMA=m
-CONFIG_PATA_PCMCIA=m
-CONFIG_PATA_PDC_OLD=m
-CONFIG_PATA_RADISYS=m
-CONFIG_PATA_RDC=m
-CONFIG_PATA_RZ1000=m
-CONFIG_PATA_SC1200=m
-CONFIG_PATA_SERVERWORKS=m
-CONFIG_PATA_PDC2027X=m
-CONFIG_PATA_SIL680=m
-CONFIG_PATA_SIS=m
-CONFIG_PATA_VIA=m
-CONFIG_PATA_WINBOND=m
-CONFIG_PATA_PLATFORM=m
-CONFIG_PATA_SCH=m
-CONFIG_MD=y
-CONFIG_BLK_DEV_MD=y
-# CONFIG_MD_AUTODETECT is not set
-CONFIG_MD_LINEAR=m
-CONFIG_MD_RAID0=m
-CONFIG_MD_RAID1=m
-CONFIG_MD_RAID10=m
-CONFIG_MD_RAID456=m
-# CONFIG_MULTICORE_RAID456 is not set
-CONFIG_MD_RAID6_PQ=m
-# CONFIG_ASYNC_RAID6_TEST is not set
-CONFIG_MD_MULTIPATH=m
-CONFIG_MD_FAULTY=m
-CONFIG_BLK_DEV_DM=m
-# CONFIG_DM_DEBUG is not set
-CONFIG_DM_CRYPT=m
-CONFIG_DM_SNAPSHOT=m
-CONFIG_DM_MIRROR=m
-CONFIG_DM_LOG_USERSPACE=m
-CONFIG_DM_ZERO=m
-CONFIG_DM_MULTIPATH=m
-CONFIG_DM_MULTIPATH_QL=m
-CONFIG_DM_MULTIPATH_ST=m
-CONFIG_DM_DELAY=m
-# CONFIG_DM_UEVENT is not set
-CONFIG_FUSION=y
-CONFIG_FUSION_SPI=m
-CONFIG_FUSION_FC=m
-CONFIG_FUSION_SAS=m
-CONFIG_FUSION_MAX_SGE=128
-CONFIG_FUSION_CTL=m
-# CONFIG_FUSION_LOGGING is not set
-
-#
-# IEEE 1394 (FireWire) support
-#
-
-#
-# You can enable one or both FireWire driver stacks.
-#
-
-#
-# See the help texts for more information.
-#
-CONFIG_FIREWIRE=m
-CONFIG_FIREWIRE_OHCI=m
-CONFIG_FIREWIRE_OHCI_DEBUG=y
-CONFIG_FIREWIRE_SBP2=m
-CONFIG_FIREWIRE_NET=m
-CONFIG_IEEE1394=m
-CONFIG_IEEE1394_OHCI1394=m
-CONFIG_IEEE1394_PCILYNX=m
-CONFIG_IEEE1394_SBP2=m
-# CONFIG_IEEE1394_SBP2_PHYS_DMA is not set
-CONFIG_IEEE1394_ETH1394_ROM_ENTRY=y
-CONFIG_IEEE1394_ETH1394=m
-CONFIG_IEEE1394_RAWIO=m
-CONFIG_IEEE1394_VIDEO1394=m
-CONFIG_IEEE1394_DV1394=m
-# CONFIG_IEEE1394_VERBOSEDEBUG is not set
-CONFIG_I2O=m
-CONFIG_I2O_LCT_NOTIFY_ON_CHANGES=y
-CONFIG_I2O_EXT_ADAPTEC=y
-CONFIG_I2O_EXT_ADAPTEC_DMA64=y
-CONFIG_I2O_CONFIG=m
-CONFIG_I2O_CONFIG_OLD_IOCTL=y
-CONFIG_I2O_BUS=m
-CONFIG_I2O_BLOCK=m
-CONFIG_I2O_SCSI=m
-CONFIG_I2O_PROC=m
-# CONFIG_MACINTOSH_DRIVERS is not set
-CONFIG_NETDEVICES=y
-CONFIG_IFB=m
-CONFIG_DUMMY=m
-CONFIG_BONDING=m
-CONFIG_MACVLAN=m
-CONFIG_EQUALIZER=m
-CONFIG_TUN=m
-CONFIG_VETH=m
-CONFIG_NET_SB1000=m
-CONFIG_ARCNET=m
-CONFIG_ARCNET_1201=m
-CONFIG_ARCNET_1051=m
-CONFIG_ARCNET_RAW=m
-CONFIG_ARCNET_CAP=m
-CONFIG_ARCNET_COM90xx=m
-CONFIG_ARCNET_COM90xxIO=m
-CONFIG_ARCNET_RIM_I=m
-CONFIG_ARCNET_COM20020=m
-CONFIG_ARCNET_COM20020_PCI=m
-CONFIG_PHYLIB=m
-
-#
-# MII PHY device drivers
-#
-CONFIG_MARVELL_PHY=m
-CONFIG_DAVICOM_PHY=m
-CONFIG_QSEMI_PHY=m
-CONFIG_LXT_PHY=m
-CONFIG_CICADA_PHY=m
-CONFIG_VITESSE_PHY=m
-CONFIG_SMSC_PHY=m
-CONFIG_BROADCOM_PHY=m
-CONFIG_ICPLUS_PHY=m
-CONFIG_REALTEK_PHY=m
-CONFIG_NATIONAL_PHY=m
-CONFIG_STE10XP=m
-CONFIG_LSI_ET1011C_PHY=m
-CONFIG_MDIO_BITBANG=m
-CONFIG_MDIO_GPIO=m
-CONFIG_NET_ETHERNET=y
-CONFIG_MII=m
-CONFIG_HAPPYMEAL=m
-CONFIG_SUNGEM=m
-CONFIG_CASSINI=m
-CONFIG_NET_VENDOR_3COM=y
-CONFIG_VORTEX=m
-CONFIG_TYPHOON=m
-CONFIG_ENC28J60=m
-# CONFIG_ENC28J60_WRITEVERIFY is not set
-CONFIG_ETHOC=m
-CONFIG_DNET=m
-CONFIG_NET_TULIP=y
-CONFIG_DE2104X=m
-CONFIG_DE2104X_DSL=0
-CONFIG_TULIP=m
-# CONFIG_TULIP_MWI is not set
-# CONFIG_TULIP_MMIO is not set
-# CONFIG_TULIP_NAPI is not set
-CONFIG_DE4X5=m
-CONFIG_WINBOND_840=m
-CONFIG_DM9102=m
-CONFIG_ULI526X=m
-CONFIG_PCMCIA_XIRCOM=m
-CONFIG_HP100=m
-# CONFIG_IBM_NEW_EMAC_ZMII is not set
-# CONFIG_IBM_NEW_EMAC_RGMII is not set
-# CONFIG_IBM_NEW_EMAC_TAH is not set
-# CONFIG_IBM_NEW_EMAC_EMAC4 is not set
-# CONFIG_IBM_NEW_EMAC_NO_FLOW_CTRL is not set
-# CONFIG_IBM_NEW_EMAC_MAL_CLR_ICINTSTAT is not set
-# CONFIG_IBM_NEW_EMAC_MAL_COMMON_ERR is not set
-CONFIG_NET_PCI=y
-CONFIG_PCNET32=m
-CONFIG_AMD8111_ETH=m
-CONFIG_ADAPTEC_STARFIRE=m
-CONFIG_B44=m
-CONFIG_B44_PCI_AUTOSELECT=y
-CONFIG_B44_PCICORE_AUTOSELECT=y
-CONFIG_B44_PCI=y
-CONFIG_FORCEDETH=m
-# CONFIG_FORCEDETH_NAPI is not set
-CONFIG_E100=m
-CONFIG_FEALNX=m
-CONFIG_NATSEMI=m
-CONFIG_NE2K_PCI=m
-CONFIG_8139CP=m
-CONFIG_8139TOO=m
-CONFIG_8139TOO_PIO=y
-# CONFIG_8139TOO_TUNE_TWISTER is not set
-# CONFIG_8139TOO_8129 is not set
-# CONFIG_8139_OLD_RX_RESET is not set
-CONFIG_R6040=m
-CONFIG_SIS900=m
-CONFIG_EPIC100=m
-CONFIG_SMSC9420=m
-CONFIG_SUNDANCE=m
-# CONFIG_SUNDANCE_MMIO is not set
-CONFIG_TLAN=m
-CONFIG_KS8842=m
-CONFIG_KS8851=m
-CONFIG_KS8851_MLL=m
-CONFIG_VIA_RHINE=m
-# CONFIG_VIA_RHINE_MMIO is not set
-CONFIG_SC92031=m
-CONFIG_NET_POCKET=y
-CONFIG_ATP=m
-CONFIG_DE600=m
-CONFIG_DE620=m
-CONFIG_ATL2=m
-CONFIG_NETDEV_1000=y
-CONFIG_ACENIC=m
-# CONFIG_ACENIC_OMIT_TIGON_I is not set
-CONFIG_DL2K=m
-CONFIG_E1000=m
-CONFIG_E1000E=m
-CONFIG_IP1000=m
-CONFIG_IGB=m
-CONFIG_IGB_DCA=y
-CONFIG_IGBVF=m
-CONFIG_NS83820=m
-CONFIG_HAMACHI=m
-CONFIG_YELLOWFIN=m
-CONFIG_R8169=m
-CONFIG_R8169_VLAN=y
-CONFIG_SIS190=m
-CONFIG_SKGE=m
-# CONFIG_SKGE_DEBUG is not set
-CONFIG_SKY2=m
-# CONFIG_SKY2_DEBUG is not set
-CONFIG_VIA_VELOCITY=m
-CONFIG_TIGON3=m
-CONFIG_BNX2=m
-CONFIG_CNIC=m
-CONFIG_QLA3XXX=m
-CONFIG_ATL1=m
-CONFIG_ATL1E=m
-CONFIG_ATL1C=m
-CONFIG_JME=m
-CONFIG_NETDEV_10000=y
-CONFIG_MDIO=m
-CONFIG_CHELSIO_T1=m
-CONFIG_CHELSIO_T1_1G=y
-CONFIG_CHELSIO_T3_DEPENDS=y
-CONFIG_CHELSIO_T3=m
-CONFIG_ENIC=m
-CONFIG_IXGBE=m
-CONFIG_IXGBE_DCA=y
-CONFIG_IXGB=m
-CONFIG_S2IO=m
-CONFIG_VXGE=m
-# CONFIG_VXGE_DEBUG_TRACE_ALL is not set
-CONFIG_MYRI10GE=m
-CONFIG_MYRI10GE_DCA=y
-CONFIG_NETXEN_NIC=m
-CONFIG_NIU=m
-CONFIG_MLX4_EN=m
-CONFIG_MLX4_CORE=m
-CONFIG_MLX4_DEBUG=y
-CONFIG_TEHUTI=m
-CONFIG_BNX2X=m
-CONFIG_QLGE=m
-CONFIG_SFC=m
-CONFIG_SFC_MTD=y
-CONFIG_BE2NET=m
-# CONFIG_TR is not set
-CONFIG_WLAN=y
-# CONFIG_WLAN_PRE80211 is not set
-# CONFIG_WLAN_80211 is not set
-
-#
-# WiMAX Wireless Broadband devices
-#
-CONFIG_WIMAX_I2400M=m
-CONFIG_WIMAX_I2400M_USB=m
-CONFIG_WIMAX_I2400M_SDIO=m
-CONFIG_WIMAX_I2400M_DEBUG_LEVEL=8
-
-#
-# USB Network Adapters
-#
-CONFIG_USB_CATC=m
-CONFIG_USB_KAWETH=m
-CONFIG_USB_PEGASUS=m
-CONFIG_USB_RTL8150=m
-CONFIG_USB_USBNET=m
-CONFIG_USB_NET_AX8817X=m
-CONFIG_USB_NET_CDCETHER=m
-CONFIG_USB_NET_CDC_EEM=m
-CONFIG_USB_NET_DM9601=m
-CONFIG_USB_NET_SMSC95XX=m
-CONFIG_USB_NET_GL620A=m
-CONFIG_USB_NET_NET1080=m
-CONFIG_USB_NET_PLUSB=m
-CONFIG_USB_NET_MCS7830=m
-CONFIG_USB_NET_RNDIS_HOST=m
-CONFIG_USB_NET_CDC_SUBSET=m
-CONFIG_USB_ALI_M5632=y
-CONFIG_USB_AN2720=y
-CONFIG_USB_BELKIN=y
-CONFIG_USB_ARMLINUX=y
-CONFIG_USB_EPSON2888=y
-CONFIG_USB_KC2190=y
-CONFIG_USB_NET_ZAURUS=m
-CONFIG_USB_HSO=m
-CONFIG_USB_NET_INT51X1=m
-CONFIG_USB_CDC_PHONET=m
-CONFIG_NET_PCMCIA=y
-CONFIG_PCMCIA_3C589=m
-CONFIG_PCMCIA_3C574=m
-CONFIG_PCMCIA_FMVJ18X=m
-CONFIG_PCMCIA_PCNET=m
-CONFIG_PCMCIA_NMCLAN=m
-CONFIG_PCMCIA_SMC91C92=m
-CONFIG_PCMCIA_XIRC2PS=m
-CONFIG_PCMCIA_AXNET=m
-CONFIG_ARCNET_COM20020_CS=m
-CONFIG_WAN=y
-CONFIG_LANMEDIA=m
-CONFIG_HDLC=m
-CONFIG_HDLC_RAW=m
-CONFIG_HDLC_RAW_ETH=m
-CONFIG_HDLC_CISCO=m
-CONFIG_HDLC_FR=m
-CONFIG_HDLC_PPP=m
-CONFIG_HDLC_X25=m
-CONFIG_PCI200SYN=m
-CONFIG_WANXL=m
-# CONFIG_WANXL_BUILD_FIRMWARE is not set
-CONFIG_PC300TOO=m
-CONFIG_FARSYNC=m
-CONFIG_DSCC4=m
-CONFIG_DSCC4_PCISYNC=y
-CONFIG_DSCC4_PCI_RST=y
-CONFIG_DLCI=m
-CONFIG_DLCI_MAX=8
-CONFIG_WAN_ROUTER_DRIVERS=m
-CONFIG_CYCLADES_SYNC=m
-CONFIG_CYCLOMX_X25=y
-CONFIG_LAPBETHER=m
-CONFIG_X25_ASY=m
-CONFIG_SBNI=m
-CONFIG_SBNI_MULTILINE=y
-CONFIG_ATM_DRIVERS=y
-CONFIG_ATM_DUMMY=m
-CONFIG_ATM_TCP=m
-CONFIG_ATM_LANAI=m
-CONFIG_ATM_ENI=m
-# CONFIG_ATM_ENI_DEBUG is not set
-# CONFIG_ATM_ENI_TUNE_BURST is not set
-CONFIG_ATM_FIRESTREAM=m
-CONFIG_ATM_ZATM=m
-# CONFIG_ATM_ZATM_DEBUG is not set
-CONFIG_ATM_IDT77252=m
-# CONFIG_ATM_IDT77252_DEBUG is not set
-# CONFIG_ATM_IDT77252_RCV_ALL is not set
-CONFIG_ATM_IDT77252_USE_SUNI=y
-CONFIG_ATM_AMBASSADOR=m
-# CONFIG_ATM_AMBASSADOR_DEBUG is not set
-CONFIG_ATM_HORIZON=m
-# CONFIG_ATM_HORIZON_DEBUG is not set
-CONFIG_ATM_IA=m
-# CONFIG_ATM_IA_DEBUG is not set
-CONFIG_ATM_FORE200E=m
-CONFIG_ATM_FORE200E_USE_TASKLET=y
-CONFIG_ATM_FORE200E_TX_RETRY=16
-CONFIG_ATM_FORE200E_DEBUG=0
-CONFIG_ATM_HE=m
-CONFIG_ATM_HE_USE_SUNI=y
-CONFIG_ATM_SOLOS=m
-CONFIG_IEEE802154_DRIVERS=m
-CONFIG_IEEE802154_FAKEHARD=m
-CONFIG_XEN_NETDEV_FRONTEND=y
-CONFIG_FDDI=y
-CONFIG_DEFXX=m
-# CONFIG_DEFXX_MMIO is not set
-CONFIG_SKFP=m
-CONFIG_HIPPI=y
-CONFIG_ROADRUNNER=m
-# CONFIG_ROADRUNNER_LARGE_RINGS is not set
-CONFIG_PLIP=m
-CONFIG_PPP=m
-CONFIG_PPP_MULTILINK=y
-CONFIG_PPP_FILTER=y
-CONFIG_PPP_ASYNC=m
-CONFIG_PPP_SYNC_TTY=m
-CONFIG_PPP_DEFLATE=m
-CONFIG_PPP_BSDCOMP=m
-CONFIG_PPP_MPPE=m
-CONFIG_PPPOE=m
-CONFIG_PPPOATM=m
-CONFIG_PPPOL2TP=m
-CONFIG_SLIP=m
-CONFIG_SLIP_COMPRESSED=y
-CONFIG_SLHC=m
-CONFIG_SLIP_SMART=y
-CONFIG_SLIP_MODE_SLIP6=y
-# CONFIG_NET_FC is not set
-CONFIG_NETCONSOLE=m
-CONFIG_NETCONSOLE_DYNAMIC=y
-CONFIG_NETPOLL=y
-# CONFIG_NETPOLL_TRAP is not set
-CONFIG_NET_POLL_CONTROLLER=y
-CONFIG_VIRTIO_NET=m
-CONFIG_VMXNET3=m
-CONFIG_ISDN=y
-# CONFIG_ISDN_I4L is not set
-CONFIG_MISDN=m
-CONFIG_MISDN_DSP=m
-CONFIG_MISDN_L1OIP=m
-
-#
-# mISDN hardware drivers
-#
-CONFIG_MISDN_HFCPCI=m
-CONFIG_MISDN_HFCMULTI=m
-CONFIG_MISDN_HFCUSB=m
-CONFIG_MISDN_AVMFRITZ=m
-# CONFIG_MISDN_SPEEDFAX is not set
-# CONFIG_MISDN_INFINEON is not set
-# CONFIG_MISDN_W6692 is not set
-# CONFIG_MISDN_NETJET is not set
-CONFIG_MISDN_IPAC=m
-CONFIG_ISDN_CAPI=m
-# CONFIG_ISDN_DRV_AVMB1_VERBOSE_REASON is not set
-# CONFIG_CAPI_TRACE is not set
-CONFIG_ISDN_CAPI_MIDDLEWARE=y
-CONFIG_ISDN_CAPI_CAPI20=m
-CONFIG_ISDN_CAPI_CAPIFS_BOOL=y
-CONFIG_ISDN_CAPI_CAPIFS=m
-
-#
-# CAPI hardware drivers
-#
-CONFIG_CAPI_AVM=y
-CONFIG_ISDN_DRV_AVMB1_B1PCI=m
-CONFIG_ISDN_DRV_AVMB1_B1PCIV4=y
-CONFIG_ISDN_DRV_AVMB1_B1PCMCIA=m
-CONFIG_ISDN_DRV_AVMB1_AVM_CS=m
-CONFIG_ISDN_DRV_AVMB1_T1PCI=m
-CONFIG_ISDN_DRV_AVMB1_C4=m
-CONFIG_CAPI_EICON=y
-CONFIG_ISDN_DIVAS=m
-CONFIG_ISDN_DIVAS_BRIPCI=y
-CONFIG_ISDN_DIVAS_PRIPCI=y
-CONFIG_ISDN_DIVAS_DIVACAPI=m
-CONFIG_ISDN_DIVAS_USERIDI=m
-CONFIG_ISDN_DIVAS_MAINT=m
-# CONFIG_PHONE is not set
-
-#
-# Input device support
-#
-CONFIG_INPUT=y
-CONFIG_INPUT_FF_MEMLESS=m
-CONFIG_INPUT_POLLDEV=m
-
-#
-# Userland interfaces
-#
-CONFIG_INPUT_MOUSEDEV=m
-CONFIG_INPUT_MOUSEDEV_PSAUX=y
-CONFIG_INPUT_MOUSEDEV_SCREEN_X=1024
-CONFIG_INPUT_MOUSEDEV_SCREEN_Y=768
-CONFIG_INPUT_JOYDEV=m
-CONFIG_INPUT_EVDEV=m
-CONFIG_INPUT_EVBUG=m
-CONFIG_XEN_KBDDEV_FRONTEND=m
-
-#
-# Input Device Drivers
-#
-CONFIG_INPUT_KEYBOARD=y
-# CONFIG_KEYBOARD_ADP5588 is not set
-CONFIG_KEYBOARD_ATKBD=y
-# CONFIG_QT2160 is not set
-CONFIG_KEYBOARD_LKKBD=m
-CONFIG_KEYBOARD_GPIO=m
-CONFIG_KEYBOARD_MATRIX=m
-CONFIG_KEYBOARD_LM8323=m
-# CONFIG_KEYBOARD_MAX7359 is not set
-CONFIG_KEYBOARD_NEWTON=m
-# CONFIG_KEYBOARD_OPENCORES is not set
-CONFIG_KEYBOARD_STOWAWAY=m
-CONFIG_KEYBOARD_SUNKBD=m
-CONFIG_KEYBOARD_XTKBD=m
-CONFIG_INPUT_MOUSE=y
-CONFIG_MOUSE_PS2=m
-CONFIG_MOUSE_PS2_ALPS=y
-CONFIG_MOUSE_PS2_LOGIPS2PP=y
-CONFIG_MOUSE_PS2_SYNAPTICS=y
-CONFIG_MOUSE_PS2_LIFEBOOK=y
-CONFIG_MOUSE_PS2_TRACKPOINT=y
-# CONFIG_MOUSE_PS2_ELANTECH is not set
-# CONFIG_MOUSE_PS2_SENTELIC is not set
-# CONFIG_MOUSE_PS2_TOUCHKIT is not set
-CONFIG_MOUSE_SERIAL=m
-CONFIG_MOUSE_APPLETOUCH=m
-CONFIG_MOUSE_BCM5974=m
-CONFIG_MOUSE_VSXXXAA=m
-CONFIG_MOUSE_GPIO=m
-CONFIG_MOUSE_SYNAPTICS_I2C=m
-# CONFIG_INPUT_JOYSTICK is not set
-# CONFIG_INPUT_TABLET is not set
-CONFIG_INPUT_TOUCHSCREEN=y
-CONFIG_TOUCHSCREEN_ADS7846=m
-CONFIG_TOUCHSCREEN_AD7877=m
-CONFIG_TOUCHSCREEN_AD7879_I2C=m
-CONFIG_TOUCHSCREEN_AD7879=m
-CONFIG_TOUCHSCREEN_EETI=m
-CONFIG_TOUCHSCREEN_FUJITSU=m
-CONFIG_TOUCHSCREEN_GUNZE=m
-CONFIG_TOUCHSCREEN_ELO=m
-CONFIG_TOUCHSCREEN_WACOM_W8001=m
-# CONFIG_TOUCHSCREEN_MCS5000 is not set
-CONFIG_TOUCHSCREEN_MTOUCH=m
-CONFIG_TOUCHSCREEN_INEXIO=m
-CONFIG_TOUCHSCREEN_MK712=m
-CONFIG_TOUCHSCREEN_PENMOUNT=m
-CONFIG_TOUCHSCREEN_TOUCHRIGHT=m
-CONFIG_TOUCHSCREEN_TOUCHWIN=m
-CONFIG_TOUCHSCREEN_UCB1400=m
-CONFIG_TOUCHSCREEN_WM97XX=m
-CONFIG_TOUCHSCREEN_WM9705=y
-CONFIG_TOUCHSCREEN_WM9712=y
-CONFIG_TOUCHSCREEN_WM9713=y
-CONFIG_TOUCHSCREEN_USB_COMPOSITE=m
-CONFIG_TOUCHSCREEN_USB_EGALAX=y
-CONFIG_TOUCHSCREEN_USB_PANJIT=y
-CONFIG_TOUCHSCREEN_USB_3M=y
-CONFIG_TOUCHSCREEN_USB_ITM=y
-CONFIG_TOUCHSCREEN_USB_ETURBO=y
-CONFIG_TOUCHSCREEN_USB_GUNZE=y
-CONFIG_TOUCHSCREEN_USB_DMC_TSC10=y
-CONFIG_TOUCHSCREEN_USB_IRTOUCH=y
-CONFIG_TOUCHSCREEN_USB_IDEALTEK=y
-CONFIG_TOUCHSCREEN_USB_GENERAL_TOUCH=y
-CONFIG_TOUCHSCREEN_USB_GOTOP=y
-CONFIG_TOUCHSCREEN_USB_JASTEC=y
-CONFIG_TOUCHSCREEN_USB_E2I=y
-CONFIG_TOUCHSCREEN_TOUCHIT213=m
-CONFIG_TOUCHSCREEN_TSC2007=m
-CONFIG_INPUT_MISC=y
-CONFIG_INPUT_PCSPKR=m
-CONFIG_INPUT_APANEL=m
-CONFIG_INPUT_ATLAS_BTNS=m
-CONFIG_INPUT_ATI_REMOTE=m
-CONFIG_INPUT_ATI_REMOTE2=m
-CONFIG_INPUT_KEYSPAN_REMOTE=m
-CONFIG_INPUT_POWERMATE=m
-CONFIG_INPUT_YEALINK=m
-CONFIG_INPUT_CM109=m
-CONFIG_INPUT_UINPUT=m
-CONFIG_INPUT_WINBOND_CIR=m
-CONFIG_INPUT_PCF50633_PMU=m
-CONFIG_INPUT_GPIO_ROTARY_ENCODER=m
-
-#
-# Hardware I/O ports
-#
-CONFIG_SERIO=y
-CONFIG_SERIO_I8042=y
-CONFIG_SERIO_SERPORT=m
-CONFIG_SERIO_CT82C710=m
-CONFIG_SERIO_PARKBD=m
-CONFIG_SERIO_PCIPS2=m
-CONFIG_SERIO_LIBPS2=y
-CONFIG_SERIO_RAW=m
-# CONFIG_GAMEPORT is not set
-
-#
-# Character devices
-#
-CONFIG_VT=y
-CONFIG_CONSOLE_TRANSLATIONS=y
-CONFIG_VT_CONSOLE=y
-CONFIG_HW_CONSOLE=y
-# CONFIG_VT_HW_CONSOLE_BINDING is not set
-# CONFIG_DEVKMEM is not set
-CONFIG_SERIAL_NONSTANDARD=y
-CONFIG_COMPUTONE=m
-CONFIG_ROCKETPORT=m
-CONFIG_CYCLADES=m
-# CONFIG_CYZ_INTR is not set
-CONFIG_DIGIEPCA=m
-CONFIG_MOXA_INTELLIO=m
-CONFIG_MOXA_SMARTIO=m
-CONFIG_ISI=m
-CONFIG_SYNCLINK=m
-CONFIG_SYNCLINKMP=m
-CONFIG_SYNCLINK_GT=m
-CONFIG_N_HDLC=m
-CONFIG_RISCOM8=m
-CONFIG_SPECIALIX=m
-CONFIG_STALDRV=y
-CONFIG_STALLION=m
-CONFIG_ISTALLION=m
-CONFIG_NOZOMI=m
-
-#
-# Serial drivers
-#
-CONFIG_SERIAL_8250=y
-CONFIG_SERIAL_8250_CONSOLE=y
-CONFIG_FIX_EARLYCON_MEM=y
-CONFIG_SERIAL_8250_PCI=y
-CONFIG_SERIAL_8250_PNP=y
-CONFIG_SERIAL_8250_CS=m
-CONFIG_SERIAL_8250_NR_UARTS=16
-CONFIG_SERIAL_8250_RUNTIME_UARTS=4
-CONFIG_SERIAL_8250_EXTENDED=y
-CONFIG_SERIAL_8250_MANY_PORTS=y
-CONFIG_SERIAL_8250_SHARE_IRQ=y
-# CONFIG_SERIAL_8250_DETECT_IRQ is not set
-CONFIG_SERIAL_8250_RSA=y
-
-#
-# Non-8250 serial port support
-#
-CONFIG_SERIAL_MAX3100=m
-CONFIG_SERIAL_CORE=y
-CONFIG_SERIAL_CORE_CONSOLE=y
-CONFIG_SERIAL_JSM=m
-CONFIG_UNIX98_PTYS=y
-# CONFIG_DEVPTS_MULTIPLE_INSTANCES is not set
-# CONFIG_LEGACY_PTYS is not set
-CONFIG_PRINTER=m
-# CONFIG_LP_CONSOLE is not set
-CONFIG_PPDEV=m
-CONFIG_HVC_DRIVER=y
-CONFIG_HVC_IRQ=y
-CONFIG_HVC_XEN=y
-CONFIG_VIRTIO_CONSOLE=m
-CONFIG_IPMI_HANDLER=m
-# CONFIG_IPMI_PANIC_EVENT is not set
-CONFIG_IPMI_DEVICE_INTERFACE=m
-CONFIG_IPMI_SI=m
-CONFIG_IPMI_WATCHDOG=m
-CONFIG_IPMI_POWEROFF=m
-CONFIG_HW_RANDOM=m
-CONFIG_HW_RANDOM_TIMERIOMEM=m
-CONFIG_HW_RANDOM_INTEL=m
-CONFIG_HW_RANDOM_AMD=m
-CONFIG_HW_RANDOM_VIA=m
-CONFIG_HW_RANDOM_VIRTIO=m
-CONFIG_NVRAM=m
-CONFIG_R3964=m
-CONFIG_APPLICOM=m
-
-#
-# PCMCIA character devices
-#
-CONFIG_SYNCLINK_CS=m
-CONFIG_CARDMAN_4000=m
-CONFIG_CARDMAN_4040=m
-CONFIG_IPWIRELESS=m
-CONFIG_MWAVE=m
-CONFIG_PC8736x_GPIO=m
-CONFIG_NSC_GPIO=m
-CONFIG_RAW_DRIVER=m
-CONFIG_MAX_RAW_DEVS=256
-CONFIG_HPET=y
-CONFIG_HPET_MMAP=y
-CONFIG_HANGCHECK_TIMER=m
-CONFIG_TCG_TPM=m
-CONFIG_TCG_TIS=m
-CONFIG_TCG_NSC=m
-CONFIG_TCG_ATMEL=m
-CONFIG_TCG_INFINEON=m
-CONFIG_TELCLOCK=m
-CONFIG_DEVPORT=y
-CONFIG_I2C=m
-CONFIG_I2C_BOARDINFO=y
-CONFIG_I2C_COMPAT=y
-CONFIG_I2C_CHARDEV=m
-CONFIG_I2C_HELPER_AUTO=y
-CONFIG_I2C_ALGOBIT=m
-CONFIG_I2C_ALGOPCA=m
-
-#
-# I2C Hardware Bus support
-#
-
-#
-# PC SMBus host controller drivers
-#
-CONFIG_I2C_ALI1535=m
-CONFIG_I2C_ALI1563=m
-CONFIG_I2C_ALI15X3=m
-CONFIG_I2C_AMD756=m
-CONFIG_I2C_AMD756_S4882=m
-CONFIG_I2C_AMD8111=m
-CONFIG_I2C_I801=m
-CONFIG_I2C_ISCH=m
-CONFIG_I2C_PIIX4=m
-CONFIG_I2C_NFORCE2=m
-CONFIG_I2C_NFORCE2_S4985=m
-CONFIG_I2C_SIS5595=m
-CONFIG_I2C_SIS630=m
-CONFIG_I2C_SIS96X=m
-CONFIG_I2C_VIA=m
-CONFIG_I2C_VIAPRO=m
-
-#
-# ACPI drivers
-#
-CONFIG_I2C_SCMI=m
-
-#
-# I2C system bus drivers (mostly embedded / system-on-chip)
-#
-CONFIG_I2C_GPIO=m
-CONFIG_I2C_OCORES=m
-CONFIG_I2C_SIMTEC=m
-
-#
-# External I2C/SMBus adapter drivers
-#
-CONFIG_I2C_PARPORT=m
-CONFIG_I2C_PARPORT_LIGHT=m
-CONFIG_I2C_TAOS_EVM=m
-CONFIG_I2C_TINY_USB=m
-
-#
-# Graphics adapter I2C/DDC channel drivers
-#
-# CONFIG_I2C_VOODOO3 is not set
-
-#
-# Other I2C/SMBus bus drivers
-#
-CONFIG_I2C_PCA_PLATFORM=m
-CONFIG_I2C_STUB=m
-
-#
-# Miscellaneous I2C Chip support
-#
-CONFIG_DS1682=m
-CONFIG_SENSORS_TSL2550=m
-# CONFIG_I2C_DEBUG_CORE is not set
-# CONFIG_I2C_DEBUG_ALGO is not set
-# CONFIG_I2C_DEBUG_BUS is not set
-# CONFIG_I2C_DEBUG_CHIP is not set
-CONFIG_SPI=y
-CONFIG_SPI_MASTER=y
-
-#
-# SPI Master Controller Drivers
-#
-CONFIG_SPI_BITBANG=m
-CONFIG_SPI_BUTTERFLY=m
-CONFIG_SPI_GPIO=m
-CONFIG_SPI_LM70_LLP=m
-
-#
-# SPI Protocol Masters
-#
-CONFIG_SPI_SPIDEV=m
-CONFIG_SPI_TLE62X0=m
-
-#
-# PPS support
-#
-# CONFIG_PPS is not set
-CONFIG_ARCH_WANT_OPTIONAL_GPIOLIB=y
-CONFIG_GPIOLIB=y
-# CONFIG_GPIO_SYSFS is not set
-
-#
-# Memory mapped GPIO expanders:
-#
-
-#
-# I2C GPIO expanders:
-#
-CONFIG_GPIO_MAX732X=m
-CONFIG_GPIO_PCA953X=m
-CONFIG_GPIO_PCF857X=m
-
-#
-# PCI GPIO expanders:
-#
-# CONFIG_GPIO_LANGWELL is not set
-
-#
-# SPI GPIO expanders:
-#
-CONFIG_GPIO_MAX7301=m
-CONFIG_GPIO_MCP23S08=m
-# CONFIG_GPIO_MC33880 is not set
-
-#
-# AC97 GPIO expanders:
-#
-# CONFIG_GPIO_UCB1400 is not set
-CONFIG_W1=m
-CONFIG_W1_CON=y
-
-#
-# 1-wire Bus Masters
-#
-CONFIG_W1_MASTER_MATROX=m
-CONFIG_W1_MASTER_DS2490=m
-CONFIG_W1_MASTER_DS2482=m
-CONFIG_W1_MASTER_GPIO=m
-
-#
-# 1-wire Slaves
-#
-CONFIG_W1_SLAVE_THERM=m
-CONFIG_W1_SLAVE_SMEM=m
-CONFIG_W1_SLAVE_DS2431=m
-CONFIG_W1_SLAVE_DS2433=m
-# CONFIG_W1_SLAVE_DS2433_CRC is not set
-CONFIG_W1_SLAVE_DS2760=m
-CONFIG_W1_SLAVE_BQ27000=m
-CONFIG_POWER_SUPPLY=y
-# CONFIG_POWER_SUPPLY_DEBUG is not set
-CONFIG_PDA_POWER=m
-CONFIG_BATTERY_DS2760=m
-CONFIG_BATTERY_DS2782=m
-CONFIG_BATTERY_BQ27x00=m
-CONFIG_BATTERY_MAX17040=m
-CONFIG_CHARGER_PCF50633=m
-CONFIG_HWMON=m
-CONFIG_HWMON_VID=m
-# CONFIG_HWMON_DEBUG_CHIP is not set
-
-#
-# Native drivers
-#
-CONFIG_SENSORS_ABITUGURU=m
-CONFIG_SENSORS_ABITUGURU3=m
-CONFIG_SENSORS_AD7414=m
-CONFIG_SENSORS_AD7418=m
-CONFIG_SENSORS_ADCXX=m
-CONFIG_SENSORS_ADM1021=m
-CONFIG_SENSORS_ADM1025=m
-CONFIG_SENSORS_ADM1026=m
-CONFIG_SENSORS_ADM1029=m
-CONFIG_SENSORS_ADM1031=m
-CONFIG_SENSORS_ADM9240=m
-CONFIG_SENSORS_ADT7462=m
-CONFIG_SENSORS_ADT7470=m
-# CONFIG_SENSORS_ADT7473 is not set
-CONFIG_SENSORS_ADT7475=m
-CONFIG_SENSORS_K8TEMP=m
-CONFIG_SENSORS_ASB100=m
-CONFIG_SENSORS_ATXP1=m
-CONFIG_SENSORS_DS1621=m
-CONFIG_SENSORS_I5K_AMB=m
-CONFIG_SENSORS_F71805F=m
-CONFIG_SENSORS_F71882FG=m
-CONFIG_SENSORS_F75375S=m
-CONFIG_SENSORS_FSCHMD=m
-CONFIG_SENSORS_G760A=m
-CONFIG_SENSORS_GL518SM=m
-CONFIG_SENSORS_GL520SM=m
-CONFIG_SENSORS_CORETEMP=m
-CONFIG_SENSORS_IBMAEM=m
-CONFIG_SENSORS_IBMPEX=m
-CONFIG_SENSORS_IT87=m
-CONFIG_SENSORS_LM63=m
-CONFIG_SENSORS_LM70=m
-CONFIG_SENSORS_LM75=m
-CONFIG_SENSORS_LM77=m
-CONFIG_SENSORS_LM78=m
-CONFIG_SENSORS_LM80=m
-CONFIG_SENSORS_LM83=m
-CONFIG_SENSORS_LM85=m
-CONFIG_SENSORS_LM87=m
-CONFIG_SENSORS_LM90=m
-CONFIG_SENSORS_LM92=m
-CONFIG_SENSORS_LM93=m
-CONFIG_SENSORS_LTC4215=m
-CONFIG_SENSORS_LTC4245=m
-CONFIG_SENSORS_LM95241=m
-CONFIG_SENSORS_MAX1111=m
-CONFIG_SENSORS_MAX1619=m
-CONFIG_SENSORS_MAX6650=m
-CONFIG_SENSORS_PC87360=m
-CONFIG_SENSORS_PC87427=m
-CONFIG_SENSORS_PCF8591=m
-CONFIG_SENSORS_SHT15=m
-CONFIG_SENSORS_SIS5595=m
-CONFIG_SENSORS_DME1737=m
-CONFIG_SENSORS_SMSC47M1=m
-CONFIG_SENSORS_SMSC47M192=m
-CONFIG_SENSORS_SMSC47B397=m
-CONFIG_SENSORS_ADS7828=m
-CONFIG_SENSORS_THMC50=m
-CONFIG_SENSORS_TMP401=m
-CONFIG_SENSORS_TMP421=m
-CONFIG_SENSORS_VIA686A=m
-CONFIG_SENSORS_VT1211=m
-CONFIG_SENSORS_VT8231=m
-CONFIG_SENSORS_W83781D=m
-CONFIG_SENSORS_W83791D=m
-CONFIG_SENSORS_W83792D=m
-CONFIG_SENSORS_W83793=m
-CONFIG_SENSORS_W83L785TS=m
-CONFIG_SENSORS_W83L786NG=m
-CONFIG_SENSORS_W83627HF=m
-CONFIG_SENSORS_W83627EHF=m
-CONFIG_SENSORS_HDAPS=m
-CONFIG_SENSORS_APPLESMC=m
-
-#
-# ACPI drivers
-#
-CONFIG_SENSORS_ATK0110=m
-CONFIG_SENSORS_LIS3LV02D=m
-CONFIG_THERMAL=y
-CONFIG_WATCHDOG=y
-# CONFIG_WATCHDOG_NOWAYOUT is not set
-
-#
-# Watchdog Device Drivers
-#
-CONFIG_SOFT_WATCHDOG=m
-CONFIG_ACQUIRE_WDT=m
-CONFIG_ADVANTECH_WDT=m
-CONFIG_ALIM1535_WDT=m
-CONFIG_ALIM7101_WDT=m
-CONFIG_SC520_WDT=m
-# CONFIG_SBC_FITPC2_WATCHDOG is not set
-CONFIG_EUROTECH_WDT=m
-CONFIG_IB700_WDT=m
-CONFIG_IBMASR=m
-CONFIG_WAFER_WDT=m
-CONFIG_I6300ESB_WDT=m
-CONFIG_ITCO_WDT=m
-CONFIG_ITCO_VENDOR_SUPPORT=y
-CONFIG_IT8712F_WDT=m
-CONFIG_IT87_WDT=m
-# CONFIG_HP_WATCHDOG is not set
-CONFIG_SC1200_WDT=m
-CONFIG_PC87413_WDT=m
-CONFIG_60XX_WDT=m
-CONFIG_SBC8360_WDT=m
-CONFIG_CPU5_WDT=m
-CONFIG_SMSC_SCH311X_WDT=m
-CONFIG_SMSC37B787_WDT=m
-CONFIG_W83627HF_WDT=m
-CONFIG_W83697HF_WDT=m
-CONFIG_W83697UG_WDT=m
-CONFIG_W83877F_WDT=m
-CONFIG_W83977F_WDT=m
-CONFIG_MACHZ_WDT=m
-CONFIG_SBC_EPX_C3_WATCHDOG=m
-# CONFIG_XEN_WDT is not set
-
-#
-# PCI-based Watchdog Cards
-#
-CONFIG_PCIPCWATCHDOG=m
-CONFIG_WDTPCI=m
-
-#
-# USB-based Watchdog Cards
-#
-CONFIG_USBPCWATCHDOG=m
-CONFIG_SSB_POSSIBLE=y
-
-#
-# Sonics Silicon Backplane
-#
-CONFIG_SSB=m
-CONFIG_SSB_SPROM=y
-CONFIG_SSB_PCIHOST_POSSIBLE=y
-CONFIG_SSB_PCIHOST=y
-# CONFIG_SSB_B43_PCI_BRIDGE is not set
-CONFIG_SSB_PCMCIAHOST_POSSIBLE=y
-CONFIG_SSB_PCMCIAHOST=y
-CONFIG_SSB_SDIOHOST_POSSIBLE=y
-CONFIG_SSB_SDIOHOST=y
-# CONFIG_SSB_SILENT is not set
-# CONFIG_SSB_DEBUG is not set
-CONFIG_SSB_DRIVER_PCICORE_POSSIBLE=y
-CONFIG_SSB_DRIVER_PCICORE=y
-
-#
-# Multifunction device drivers
-#
-CONFIG_MFD_CORE=m
-CONFIG_MFD_SM501=m
-# CONFIG_MFD_SM501_GPIO is not set
-CONFIG_HTC_PASIC3=m
-CONFIG_UCB1400_CORE=m
-CONFIG_TPS65010=m
-# CONFIG_MFD_TMIO is not set
-CONFIG_MFD_WM8400=m
-# CONFIG_MFD_WM831X is not set
-# CONFIG_MFD_WM8350_I2C is not set
-CONFIG_MFD_PCF50633=m
-# CONFIG_MFD_MC13783 is not set
-CONFIG_PCF50633_ADC=m
-CONFIG_PCF50633_GPIO=m
-# CONFIG_AB3100_CORE is not set
-# CONFIG_EZX_PCAP is not set
-CONFIG_REGULATOR=y
-# CONFIG_REGULATOR_DEBUG is not set
-# CONFIG_REGULATOR_FIXED_VOLTAGE is not set
-CONFIG_REGULATOR_VIRTUAL_CONSUMER=m
-CONFIG_REGULATOR_USERSPACE_CONSUMER=m
-CONFIG_REGULATOR_BQ24022=m
-CONFIG_REGULATOR_MAX1586=m
-CONFIG_REGULATOR_WM8400=m
-CONFIG_REGULATOR_PCF50633=m
-CONFIG_REGULATOR_LP3971=m
-# CONFIG_REGULATOR_TPS65023 is not set
-# CONFIG_REGULATOR_TPS6507X is not set
-CONFIG_MEDIA_SUPPORT=m
-
-#
-# Multimedia core support
-#
-CONFIG_VIDEO_DEV=m
-CONFIG_VIDEO_V4L2_COMMON=m
-# CONFIG_VIDEO_ALLOW_V4L1 is not set
-CONFIG_VIDEO_V4L1_COMPAT=y
-CONFIG_DVB_CORE=m
-CONFIG_VIDEO_MEDIA=m
-
-#
-# Multimedia drivers
-#
-CONFIG_VIDEO_SAA7146=m
-CONFIG_VIDEO_SAA7146_VV=m
-# CONFIG_MEDIA_ATTACH is not set
-CONFIG_MEDIA_TUNER=m
-# CONFIG_MEDIA_TUNER_CUSTOMISE is not set
-CONFIG_MEDIA_TUNER_SIMPLE=m
-CONFIG_MEDIA_TUNER_TDA8290=m
-CONFIG_MEDIA_TUNER_TDA827X=m
-CONFIG_MEDIA_TUNER_TDA18271=m
-CONFIG_MEDIA_TUNER_TDA9887=m
-CONFIG_MEDIA_TUNER_TEA5761=m
-CONFIG_MEDIA_TUNER_TEA5767=m
-CONFIG_MEDIA_TUNER_MT20XX=m
-CONFIG_MEDIA_TUNER_MT2060=m
-CONFIG_MEDIA_TUNER_MT2266=m
-CONFIG_MEDIA_TUNER_MT2131=m
-CONFIG_MEDIA_TUNER_QT1010=m
-CONFIG_MEDIA_TUNER_XC2028=m
-CONFIG_MEDIA_TUNER_XC5000=m
-CONFIG_MEDIA_TUNER_MXL5005S=m
-CONFIG_MEDIA_TUNER_MXL5007T=m
-CONFIG_MEDIA_TUNER_MC44S803=m
-CONFIG_VIDEO_V4L2=m
-CONFIG_VIDEOBUF_GEN=m
-CONFIG_VIDEOBUF_DMA_SG=m
-CONFIG_VIDEOBUF_VMALLOC=m
-CONFIG_VIDEOBUF_DVB=m
-CONFIG_VIDEO_BTCX=m
-CONFIG_VIDEO_IR=m
-CONFIG_VIDEO_TVEEPROM=m
-CONFIG_VIDEO_TUNER=m
-CONFIG_VIDEO_CAPTURE_DRIVERS=y
-# CONFIG_VIDEO_ADV_DEBUG is not set
-# CONFIG_VIDEO_FIXED_MINOR_RANGES is not set
-# CONFIG_VIDEO_HELPER_CHIPS_AUTO is not set
-CONFIG_VIDEO_IR_I2C=m
-
-#
-# Encoders/decoders and other helper chips
-#
-
-#
-# Audio decoders
-#
-CONFIG_VIDEO_TVAUDIO=m
-CONFIG_VIDEO_TDA7432=m
-CONFIG_VIDEO_TDA9840=m
-CONFIG_VIDEO_TDA9875=m
-CONFIG_VIDEO_TEA6415C=m
-CONFIG_VIDEO_TEA6420=m
-CONFIG_VIDEO_MSP3400=m
-CONFIG_VIDEO_CS5345=m
-CONFIG_VIDEO_CS53L32A=m
-CONFIG_VIDEO_M52790=m
-CONFIG_VIDEO_TLV320AIC23B=m
-CONFIG_VIDEO_WM8775=m
-CONFIG_VIDEO_WM8739=m
-CONFIG_VIDEO_VP27SMPX=m
-
-#
-# RDS decoders
-#
-CONFIG_VIDEO_SAA6588=m
-
-#
-# Video decoders
-#
-# CONFIG_VIDEO_ADV7180 is not set
-CONFIG_VIDEO_BT819=m
-CONFIG_VIDEO_BT856=m
-CONFIG_VIDEO_BT866=m
-CONFIG_VIDEO_KS0127=m
-CONFIG_VIDEO_OV7670=m
-CONFIG_VIDEO_MT9V011=m
-CONFIG_VIDEO_TCM825X=m
-CONFIG_VIDEO_SAA7110=m
-CONFIG_VIDEO_SAA711X=m
-CONFIG_VIDEO_SAA717X=m
-CONFIG_VIDEO_TVP514X=m
-CONFIG_VIDEO_TVP5150=m
-CONFIG_VIDEO_VPX3220=m
-
-#
-# Video and audio decoders
-#
-CONFIG_VIDEO_CX25840=m
-
-#
-# MPEG video encoders
-#
-CONFIG_VIDEO_CX2341X=m
-
-#
-# Video encoders
-#
-CONFIG_VIDEO_SAA7127=m
-CONFIG_VIDEO_SAA7185=m
-CONFIG_VIDEO_ADV7170=m
-CONFIG_VIDEO_ADV7175=m
-CONFIG_VIDEO_THS7303=m
-CONFIG_VIDEO_ADV7343=m
-
-#
-# Video improvement chips
-#
-CONFIG_VIDEO_UPD64031A=m
-CONFIG_VIDEO_UPD64083=m
-# CONFIG_VIDEO_VIVI is not set
-CONFIG_VIDEO_BT848=m
-CONFIG_VIDEO_BT848_DVB=y
-CONFIG_VIDEO_SAA5246A=m
-CONFIG_VIDEO_SAA5249=m
-CONFIG_VIDEO_ZORAN=m
-CONFIG_VIDEO_ZORAN_DC30=m
-CONFIG_VIDEO_ZORAN_ZR36060=m
-CONFIG_VIDEO_ZORAN_BUZ=m
-CONFIG_VIDEO_ZORAN_DC10=m
-CONFIG_VIDEO_ZORAN_LML33=m
-CONFIG_VIDEO_ZORAN_LML33R10=m
-CONFIG_VIDEO_ZORAN_AVS6EYES=m
-CONFIG_VIDEO_SAA7134=m
-CONFIG_VIDEO_SAA7134_ALSA=m
-CONFIG_VIDEO_SAA7134_DVB=m
-CONFIG_VIDEO_HEXIUM_ORION=m
-CONFIG_VIDEO_HEXIUM_GEMINI=m
-CONFIG_VIDEO_CX88=m
-CONFIG_VIDEO_CX88_ALSA=m
-CONFIG_VIDEO_CX88_BLACKBIRD=m
-CONFIG_VIDEO_CX88_DVB=m
-CONFIG_VIDEO_CX88_MPEG=m
-CONFIG_VIDEO_CX88_VP3054=m
-CONFIG_VIDEO_CX23885=m
-CONFIG_VIDEO_AU0828=m
-CONFIG_VIDEO_IVTV=m
-CONFIG_VIDEO_FB_IVTV=m
-CONFIG_VIDEO_CX18=m
-CONFIG_VIDEO_SAA7164=m
-CONFIG_VIDEO_CAFE_CCIC=m
-CONFIG_SOC_CAMERA=m
-CONFIG_SOC_CAMERA_MT9M001=m
-CONFIG_SOC_CAMERA_MT9M111=m
-CONFIG_SOC_CAMERA_MT9T031=m
-CONFIG_SOC_CAMERA_MT9V022=m
-CONFIG_SOC_CAMERA_TW9910=m
-CONFIG_SOC_CAMERA_PLATFORM=m
-CONFIG_SOC_CAMERA_OV772X=m
-CONFIG_V4L_USB_DRIVERS=y
-CONFIG_USB_VIDEO_CLASS=m
-CONFIG_USB_VIDEO_CLASS_INPUT_EVDEV=y
-CONFIG_USB_GSPCA=m
-CONFIG_USB_M5602=m
-CONFIG_USB_STV06XX=m
-CONFIG_USB_GL860=m
-CONFIG_USB_GSPCA_CONEX=m
-CONFIG_USB_GSPCA_ETOMS=m
-CONFIG_USB_GSPCA_FINEPIX=m
-CONFIG_USB_GSPCA_JEILINJ=m
-CONFIG_USB_GSPCA_MARS=m
-CONFIG_USB_GSPCA_MR97310A=m
-CONFIG_USB_GSPCA_OV519=m
-CONFIG_USB_GSPCA_OV534=m
-CONFIG_USB_GSPCA_PAC207=m
-CONFIG_USB_GSPCA_PAC7311=m
-CONFIG_USB_GSPCA_SN9C20X=m
-# CONFIG_USB_GSPCA_SN9C20X_EVDEV is not set
-CONFIG_USB_GSPCA_SONIXB=m
-CONFIG_USB_GSPCA_SONIXJ=m
-CONFIG_USB_GSPCA_SPCA500=m
-CONFIG_USB_GSPCA_SPCA501=m
-CONFIG_USB_GSPCA_SPCA505=m
-CONFIG_USB_GSPCA_SPCA506=m
-CONFIG_USB_GSPCA_SPCA508=m
-CONFIG_USB_GSPCA_SPCA561=m
-CONFIG_USB_GSPCA_SQ905=m
-CONFIG_USB_GSPCA_SQ905C=m
-CONFIG_USB_GSPCA_STK014=m
-CONFIG_USB_GSPCA_SUNPLUS=m
-CONFIG_USB_GSPCA_T613=m
-CONFIG_USB_GSPCA_TV8532=m
-CONFIG_USB_GSPCA_VC032X=m
-CONFIG_USB_GSPCA_ZC3XX=m
-CONFIG_VIDEO_PVRUSB2=m
-CONFIG_VIDEO_PVRUSB2_SYSFS=y
-CONFIG_VIDEO_PVRUSB2_DVB=y
-# CONFIG_VIDEO_PVRUSB2_DEBUGIFC is not set
-CONFIG_VIDEO_HDPVR=m
-CONFIG_VIDEO_EM28XX=m
-CONFIG_VIDEO_EM28XX_ALSA=m
-CONFIG_VIDEO_EM28XX_DVB=m
-CONFIG_VIDEO_CX231XX=m
-CONFIG_VIDEO_CX231XX_ALSA=m
-CONFIG_VIDEO_CX231XX_DVB=m
-CONFIG_VIDEO_USBVISION=m
-CONFIG_USB_ET61X251=m
-CONFIG_USB_SN9C102=m
-CONFIG_USB_ZC0301=m
-CONFIG_USB_PWC_INPUT_EVDEV=y
-CONFIG_USB_ZR364XX=m
-CONFIG_USB_STKWEBCAM=m
-CONFIG_USB_S2255=m
-CONFIG_RADIO_ADAPTERS=y
-CONFIG_RADIO_GEMTEK_PCI=m
-CONFIG_RADIO_MAXIRADIO=m
-CONFIG_RADIO_MAESTRO=m
-# CONFIG_I2C_SI4713 is not set
-# CONFIG_RADIO_SI4713 is not set
-CONFIG_USB_DSBR=m
-# CONFIG_RADIO_SI470X is not set
-CONFIG_USB_MR800=m
-CONFIG_RADIO_TEA5764=m
-CONFIG_DVB_MAX_ADAPTERS=8
-# CONFIG_DVB_DYNAMIC_MINORS is not set
-CONFIG_DVB_CAPTURE_DRIVERS=y
-
-#
-# Supported SAA7146 based PCI Adapters
-#
-CONFIG_TTPCI_EEPROM=m
-CONFIG_DVB_AV7110=m
-CONFIG_DVB_AV7110_OSD=y
-CONFIG_DVB_BUDGET_CORE=m
-CONFIG_DVB_BUDGET=m
-CONFIG_DVB_BUDGET_CI=m
-CONFIG_DVB_BUDGET_AV=m
-CONFIG_DVB_BUDGET_PATCH=m
-
-#
-# Supported USB Adapters
-#
-CONFIG_DVB_USB=m
-# CONFIG_DVB_USB_DEBUG is not set
-CONFIG_DVB_USB_A800=m
-CONFIG_DVB_USB_DIBUSB_MB=m
-# CONFIG_DVB_USB_DIBUSB_MB_FAULTY is not set
-CONFIG_DVB_USB_DIBUSB_MC=m
-CONFIG_DVB_USB_DIB0700=m
-CONFIG_DVB_USB_UMT_010=m
-CONFIG_DVB_USB_CXUSB=m
-CONFIG_DVB_USB_M920X=m
-CONFIG_DVB_USB_GL861=m
-CONFIG_DVB_USB_AU6610=m
-CONFIG_DVB_USB_DIGITV=m
-CONFIG_DVB_USB_VP7045=m
-CONFIG_DVB_USB_VP702X=m
-CONFIG_DVB_USB_GP8PSK=m
-CONFIG_DVB_USB_NOVA_T_USB2=m
-CONFIG_DVB_USB_TTUSB2=m
-CONFIG_DVB_USB_DTT200U=m
-CONFIG_DVB_USB_OPERA1=m
-CONFIG_DVB_USB_AF9005=m
-CONFIG_DVB_USB_AF9005_REMOTE=m
-CONFIG_DVB_USB_DW2102=m
-CONFIG_DVB_USB_CINERGY_T2=m
-CONFIG_DVB_USB_ANYSEE=m
-CONFIG_DVB_USB_DTV5100=m
-CONFIG_DVB_USB_AF9015=m
-CONFIG_DVB_USB_CE6230=m
-# CONFIG_DVB_USB_FRIIO is not set
-CONFIG_DVB_TTUSB_BUDGET=m
-CONFIG_DVB_TTUSB_DEC=m
-CONFIG_SMS_SIANO_MDTV=m
-
-#
-# Siano module components
-#
-CONFIG_SMS_USB_DRV=m
-CONFIG_SMS_SDIO_DRV=m
-
-#
-# Supported FlexCopII (B2C2) Adapters
-#
-CONFIG_DVB_B2C2_FLEXCOP=m
-CONFIG_DVB_B2C2_FLEXCOP_PCI=m
-CONFIG_DVB_B2C2_FLEXCOP_USB=m
-# CONFIG_DVB_B2C2_FLEXCOP_DEBUG is not set
-
-#
-# Supported BT878 Adapters
-#
-CONFIG_DVB_BT8XX=m
-
-#
-# Supported Pluto2 Adapters
-#
-CONFIG_DVB_PLUTO2=m
-
-#
-# Supported SDMC DM1105 Adapters
-#
-CONFIG_DVB_DM1105=m
-
-#
-# Supported FireWire (IEEE 1394) Adapters
-#
-CONFIG_DVB_FIREDTV=m
-CONFIG_DVB_FIREDTV_IEEE1394=y
-CONFIG_DVB_FIREDTV_INPUT=y
-
-#
-# Supported Earthsoft PT1 Adapters
-#
-# CONFIG_DVB_PT1 is not set
-
-#
-# Supported DVB Frontends
-#
-# CONFIG_DVB_FE_CUSTOMISE is not set
-CONFIG_DVB_STB0899=m
-CONFIG_DVB_STB6100=m
-CONFIG_DVB_STV090x=m
-CONFIG_DVB_STV6110x=m
-CONFIG_DVB_CX24110=m
-CONFIG_DVB_CX24123=m
-CONFIG_DVB_MT312=m
-CONFIG_DVB_ZL10036=m
-CONFIG_DVB_ZL10039=m
-CONFIG_DVB_S5H1420=m
-CONFIG_DVB_STV0288=m
-CONFIG_DVB_STB6000=m
-CONFIG_DVB_STV0299=m
-CONFIG_DVB_STV6110=m
-CONFIG_DVB_STV0900=m
-CONFIG_DVB_TDA8083=m
-CONFIG_DVB_TDA10086=m
-CONFIG_DVB_TDA8261=m
-CONFIG_DVB_VES1X93=m
-CONFIG_DVB_TUNER_ITD1000=m
-CONFIG_DVB_TUNER_CX24113=m
-CONFIG_DVB_TDA826X=m
-CONFIG_DVB_TUA6100=m
-CONFIG_DVB_CX24116=m
-CONFIG_DVB_SI21XX=m
-CONFIG_DVB_SP8870=m
-CONFIG_DVB_SP887X=m
-CONFIG_DVB_CX22700=m
-CONFIG_DVB_CX22702=m
-CONFIG_DVB_L64781=m
-CONFIG_DVB_TDA1004X=m
-CONFIG_DVB_NXT6000=m
-CONFIG_DVB_MT352=m
-CONFIG_DVB_ZL10353=m
-CONFIG_DVB_DIB3000MB=m
-CONFIG_DVB_DIB3000MC=m
-CONFIG_DVB_DIB7000M=m
-CONFIG_DVB_DIB7000P=m
-CONFIG_DVB_TDA10048=m
-CONFIG_DVB_AF9013=m
-CONFIG_DVB_VES1820=m
-CONFIG_DVB_TDA10021=m
-CONFIG_DVB_TDA10023=m
-CONFIG_DVB_STV0297=m
-CONFIG_DVB_NXT200X=m
-CONFIG_DVB_OR51211=m
-CONFIG_DVB_OR51132=m
-CONFIG_DVB_BCM3510=m
-CONFIG_DVB_LGDT330X=m
-CONFIG_DVB_LGDT3305=m
-CONFIG_DVB_S5H1409=m
-CONFIG_DVB_AU8522=m
-CONFIG_DVB_S5H1411=m
-CONFIG_DVB_DIB8000=m
-CONFIG_DVB_PLL=m
-CONFIG_DVB_TUNER_DIB0070=m
-CONFIG_DVB_LNBP21=m
-CONFIG_DVB_ISL6405=m
-CONFIG_DVB_ISL6421=m
-CONFIG_DVB_ISL6423=m
-CONFIG_DVB_LGS8GXX=m
-CONFIG_DAB=y
-CONFIG_USB_DABUSB=m
-
-#
-# Graphics support
-#
-CONFIG_AGP=m
-CONFIG_AGP_AMD64=m
-CONFIG_AGP_INTEL=m
-CONFIG_AGP_SIS=m
-CONFIG_AGP_VIA=m
-# CONFIG_VGA_ARB is not set
-CONFIG_DRM=m
-CONFIG_DRM_KMS_HELPER=m
-CONFIG_DRM_TTM=m
-CONFIG_DRM_TDFX=m
-CONFIG_DRM_R128=m
-CONFIG_DRM_RADEON=m
-CONFIG_DRM_I810=m
-CONFIG_DRM_I830=m
-CONFIG_DRM_I915=m
-# CONFIG_DRM_I915_KMS is not set
-CONFIG_DRM_MGA=m
-CONFIG_DRM_SIS=m
-CONFIG_DRM_VIA=m
-CONFIG_DRM_SAVAGE=m
-CONFIG_VGASTATE=m
-CONFIG_VIDEO_OUTPUT_CONTROL=m
-CONFIG_FB=m
-# CONFIG_FIRMWARE_EDID is not set
-CONFIG_FB_DDC=m
-# CONFIG_FB_BOOT_VESA_SUPPORT is not set
-CONFIG_FB_CFB_FILLRECT=m
-CONFIG_FB_CFB_COPYAREA=m
-CONFIG_FB_CFB_IMAGEBLIT=m
-# CONFIG_FB_CFB_REV_PIXELS_IN_BYTE is not set
-CONFIG_FB_SYS_FILLRECT=m
-CONFIG_FB_SYS_COPYAREA=m
-CONFIG_FB_SYS_IMAGEBLIT=m
-# CONFIG_FB_FOREIGN_ENDIAN is not set
-CONFIG_FB_SYS_FOPS=m
-CONFIG_FB_DEFERRED_IO=y
-CONFIG_FB_HECUBA=m
-CONFIG_FB_SVGALIB=m
-# CONFIG_FB_MACMODES is not set
-CONFIG_FB_BACKLIGHT=y
-CONFIG_FB_MODE_HELPERS=y
-CONFIG_FB_TILEBLITTING=y
-
-#
-# Frame buffer hardware drivers
-#
-CONFIG_FB_CIRRUS=m
-CONFIG_FB_PM2=m
-CONFIG_FB_PM2_FIFO_DISCONNECT=y
-CONFIG_FB_CYBER2000=m
-CONFIG_FB_ARC=m
-CONFIG_FB_VGA16=m
-CONFIG_FB_UVESA=m
-CONFIG_FB_N411=m
-CONFIG_FB_HGA=m
-# CONFIG_FB_HGA_ACCEL is not set
-CONFIG_FB_S1D13XXX=m
-CONFIG_FB_NVIDIA=m
-CONFIG_FB_NVIDIA_I2C=y
-# CONFIG_FB_NVIDIA_DEBUG is not set
-CONFIG_FB_NVIDIA_BACKLIGHT=y
-CONFIG_FB_RIVA=m
-CONFIG_FB_RIVA_I2C=y
-# CONFIG_FB_RIVA_DEBUG is not set
-CONFIG_FB_RIVA_BACKLIGHT=y
-CONFIG_FB_LE80578=m
-CONFIG_FB_CARILLO_RANCH=m
-CONFIG_FB_INTEL=m
-# CONFIG_FB_INTEL_DEBUG is not set
-CONFIG_FB_INTEL_I2C=y
-CONFIG_FB_MATROX=m
-CONFIG_FB_MATROX_MILLENIUM=y
-CONFIG_FB_MATROX_MYSTIQUE=y
-CONFIG_FB_MATROX_G=y
-CONFIG_FB_MATROX_I2C=m
-CONFIG_FB_MATROX_MAVEN=m
-CONFIG_FB_RADEON=m
-CONFIG_FB_RADEON_I2C=y
-CONFIG_FB_RADEON_BACKLIGHT=y
-# CONFIG_FB_RADEON_DEBUG is not set
-CONFIG_FB_ATY128=m
-CONFIG_FB_ATY128_BACKLIGHT=y
-CONFIG_FB_ATY=m
-CONFIG_FB_ATY_CT=y
-CONFIG_FB_ATY_GENERIC_LCD=y
-CONFIG_FB_ATY_GX=y
-CONFIG_FB_ATY_BACKLIGHT=y
-CONFIG_FB_S3=m
-CONFIG_FB_SAVAGE=m
-CONFIG_FB_SAVAGE_I2C=y
-CONFIG_FB_SAVAGE_ACCEL=y
-CONFIG_FB_SIS=m
-CONFIG_FB_SIS_300=y
-CONFIG_FB_SIS_315=y
-CONFIG_FB_VIA=m
-CONFIG_FB_NEOMAGIC=m
-CONFIG_FB_KYRO=m
-CONFIG_FB_3DFX=m
-CONFIG_FB_3DFX_ACCEL=y
-CONFIG_FB_3DFX_I2C=y
-CONFIG_FB_VOODOO1=m
-CONFIG_FB_VT8623=m
-CONFIG_FB_TRIDENT=m
-CONFIG_FB_ARK=m
-CONFIG_FB_PM3=m
-CONFIG_FB_CARMINE=m
-CONFIG_FB_CARMINE_DRAM_EVAL=y
-# CONFIG_CARMINE_DRAM_CUSTOM is not set
-CONFIG_FB_GEODE=y
-CONFIG_FB_GEODE_LX=m
-CONFIG_FB_GEODE_GX=m
-CONFIG_FB_GEODE_GX1=m
-CONFIG_FB_TMIO=m
-CONFIG_FB_TMIO_ACCELL=y
-CONFIG_FB_SM501=m
-# CONFIG_FB_VIRTUAL is not set
-CONFIG_XEN_FBDEV_FRONTEND=m
-CONFIG_FB_METRONOME=m
-CONFIG_FB_MB862XX=m
-# CONFIG_FB_MB862XX_PCI_GDC is not set
-CONFIG_FB_BROADSHEET=m
-CONFIG_BACKLIGHT_LCD_SUPPORT=y
-CONFIG_LCD_CLASS_DEVICE=m
-CONFIG_LCD_LMS283GF05=m
-CONFIG_LCD_LTV350QV=m
-CONFIG_LCD_ILI9320=m
-CONFIG_LCD_TDO24M=m
-CONFIG_LCD_VGG2432A4=m
-CONFIG_LCD_PLATFORM=m
-CONFIG_BACKLIGHT_CLASS_DEVICE=m
-CONFIG_BACKLIGHT_GENERIC=m
-CONFIG_BACKLIGHT_PROGEAR=m
-CONFIG_BACKLIGHT_CARILLO_RANCH=m
-CONFIG_BACKLIGHT_MBP_NVIDIA=m
-CONFIG_BACKLIGHT_SAHARA=m
-
-#
-# Display device support
-#
-CONFIG_DISPLAY_SUPPORT=m
-
-#
-# Display hardware drivers
-#
-
-#
-# Console display driver support
-#
-CONFIG_VGA_CONSOLE=y
-# CONFIG_VGACON_SOFT_SCROLLBACK is not set
-CONFIG_DUMMY_CONSOLE=y
-CONFIG_FRAMEBUFFER_CONSOLE=m
-CONFIG_FRAMEBUFFER_CONSOLE_DETECT_PRIMARY=y
-CONFIG_FRAMEBUFFER_CONSOLE_ROTATION=y
-# CONFIG_FONTS is not set
-CONFIG_FONT_8x8=y
-CONFIG_FONT_8x16=y
-# CONFIG_LOGO is not set
-CONFIG_SOUND=m
-CONFIG_SOUND_OSS_CORE=y
-CONFIG_SOUND_OSS_CORE_PRECLAIM=y
-CONFIG_SND=m
-CONFIG_SND_TIMER=m
-CONFIG_SND_PCM=m
-CONFIG_SND_HWDEP=m
-CONFIG_SND_RAWMIDI=m
-CONFIG_SND_JACK=y
-CONFIG_SND_SEQUENCER=m
-CONFIG_SND_SEQ_DUMMY=m
-CONFIG_SND_OSSEMUL=y
-CONFIG_SND_MIXER_OSS=m
-CONFIG_SND_PCM_OSS=m
-CONFIG_SND_PCM_OSS_PLUGINS=y
-CONFIG_SND_SEQUENCER_OSS=y
-CONFIG_SND_HRTIMER=m
-CONFIG_SND_SEQ_HRTIMER_DEFAULT=y
-CONFIG_SND_DYNAMIC_MINORS=y
-# CONFIG_SND_SUPPORT_OLD_API is not set
-# CONFIG_SND_VERBOSE_PROCFS is not set
-# CONFIG_SND_VERBOSE_PRINTK is not set
-# CONFIG_SND_DEBUG is not set
-CONFIG_SND_VMASTER=y
-CONFIG_SND_DMA_SGBUF=y
-CONFIG_SND_RAWMIDI_SEQ=m
-CONFIG_SND_OPL3_LIB_SEQ=m
-# CONFIG_SND_OPL4_LIB_SEQ is not set
-# CONFIG_SND_SBAWE_SEQ is not set
-CONFIG_SND_EMU10K1_SEQ=m
-CONFIG_SND_MPU401_UART=m
-CONFIG_SND_OPL3_LIB=m
-CONFIG_SND_VX_LIB=m
-CONFIG_SND_AC97_CODEC=m
-CONFIG_SND_DRIVERS=y
-CONFIG_SND_PCSP=m
-CONFIG_SND_DUMMY=m
-CONFIG_SND_VIRMIDI=m
-CONFIG_SND_MTPAV=m
-CONFIG_SND_MTS64=m
-CONFIG_SND_SERIAL_U16550=m
-CONFIG_SND_MPU401=m
-CONFIG_SND_PORTMAN2X4=m
-CONFIG_SND_AC97_POWER_SAVE=y
-CONFIG_SND_AC97_POWER_SAVE_DEFAULT=0
-CONFIG_SND_SB_COMMON=m
-CONFIG_SND_SB16_DSP=m
-CONFIG_SND_PCI=y
-CONFIG_SND_AD1889=m
-CONFIG_SND_ALS300=m
-CONFIG_SND_ALS4000=m
-CONFIG_SND_ALI5451=m
-CONFIG_SND_ATIIXP=m
-CONFIG_SND_ATIIXP_MODEM=m
-CONFIG_SND_AU8810=m
-CONFIG_SND_AU8820=m
-CONFIG_SND_AU8830=m
-CONFIG_SND_AW2=m
-CONFIG_SND_AZT3328=m
-CONFIG_SND_BT87X=m
-# CONFIG_SND_BT87X_OVERCLOCK is not set
-CONFIG_SND_CA0106=m
-CONFIG_SND_CMIPCI=m
-CONFIG_SND_OXYGEN_LIB=m
-CONFIG_SND_OXYGEN=m
-CONFIG_SND_CS4281=m
-CONFIG_SND_CS46XX=m
-CONFIG_SND_CS46XX_NEW_DSP=y
-CONFIG_SND_CS5530=m
-CONFIG_SND_CS5535AUDIO=m
-CONFIG_SND_CTXFI=m
-CONFIG_SND_DARLA20=m
-CONFIG_SND_GINA20=m
-CONFIG_SND_LAYLA20=m
-CONFIG_SND_DARLA24=m
-CONFIG_SND_GINA24=m
-CONFIG_SND_LAYLA24=m
-CONFIG_SND_MONA=m
-CONFIG_SND_MIA=m
-CONFIG_SND_ECHO3G=m
-CONFIG_SND_INDIGO=m
-CONFIG_SND_INDIGOIO=m
-CONFIG_SND_INDIGODJ=m
-CONFIG_SND_INDIGOIOX=m
-CONFIG_SND_INDIGODJX=m
-CONFIG_SND_EMU10K1=m
-CONFIG_SND_EMU10K1X=m
-CONFIG_SND_ENS1370=m
-CONFIG_SND_ENS1371=m
-CONFIG_SND_ES1938=m
-CONFIG_SND_ES1968=m
-CONFIG_SND_FM801=m
-# CONFIG_SND_FM801_TEA575X_BOOL is not set
-CONFIG_SND_HDA_INTEL=m
-CONFIG_SND_HDA_HWDEP=y
-# CONFIG_SND_HDA_RECONFIG is not set
-CONFIG_SND_HDA_INPUT_BEEP=y
-CONFIG_SND_HDA_INPUT_JACK=y
-# CONFIG_SND_HDA_PATCH_LOADER is not set
-CONFIG_SND_HDA_CODEC_REALTEK=y
-CONFIG_SND_HDA_CODEC_ANALOG=y
-CONFIG_SND_HDA_CODEC_SIGMATEL=y
-CONFIG_SND_HDA_CODEC_VIA=y
-CONFIG_SND_HDA_CODEC_ATIHDMI=y
-CONFIG_SND_HDA_CODEC_NVHDMI=y
-CONFIG_SND_HDA_CODEC_INTELHDMI=y
-CONFIG_SND_HDA_ELD=y
-CONFIG_SND_HDA_CODEC_CIRRUS=y
-CONFIG_SND_HDA_CODEC_CONEXANT=y
-CONFIG_SND_HDA_CODEC_CA0110=y
-CONFIG_SND_HDA_CODEC_CMEDIA=y
-CONFIG_SND_HDA_CODEC_SI3054=y
-CONFIG_SND_HDA_GENERIC=y
-# CONFIG_SND_HDA_POWER_SAVE is not set
-CONFIG_SND_HDSP=m
-CONFIG_SND_HDSPM=m
-CONFIG_SND_HIFIER=m
-CONFIG_SND_ICE1712=m
-CONFIG_SND_ICE1724=m
-CONFIG_SND_INTEL8X0=m
-CONFIG_SND_INTEL8X0M=m
-CONFIG_SND_KORG1212=m
-CONFIG_SND_LX6464ES=m
-CONFIG_SND_MAESTRO3=m
-CONFIG_SND_MIXART=m
-CONFIG_SND_NM256=m
-CONFIG_SND_PCXHR=m
-CONFIG_SND_RIPTIDE=m
-CONFIG_SND_RME32=m
-CONFIG_SND_RME96=m
-CONFIG_SND_RME9652=m
-CONFIG_SND_SONICVIBES=m
-CONFIG_SND_TRIDENT=m
-CONFIG_SND_VIA82XX=m
-CONFIG_SND_VIA82XX_MODEM=m
-CONFIG_SND_VIRTUOSO=m
-CONFIG_SND_VX222=m
-CONFIG_SND_YMFPCI=m
-CONFIG_SND_SPI=y
-CONFIG_SND_USB=y
-CONFIG_SND_USB_AUDIO=m
-CONFIG_SND_USB_USX2Y=m
-CONFIG_SND_USB_CAIAQ=m
-# CONFIG_SND_USB_CAIAQ_INPUT is not set
-CONFIG_SND_USB_US122L=m
-CONFIG_SND_PCMCIA=y
-CONFIG_SND_VXPOCKET=m
-CONFIG_SND_PDAUDIOCF=m
-CONFIG_SND_SOC=m
-CONFIG_SND_SOC_I2C_AND_SPI=m
-CONFIG_SND_SOC_ALL_CODECS=m
-CONFIG_SND_SOC_WM_HUBS=m
-CONFIG_SND_SOC_AD1836=m
-CONFIG_SND_SOC_AD1938=m
-CONFIG_SND_SOC_AD73311=m
-CONFIG_SND_SOC_AK4104=m
-CONFIG_SND_SOC_AK4535=m
-CONFIG_SND_SOC_AK4642=m
-CONFIG_SND_SOC_CS4270=m
-CONFIG_SND_SOC_L3=m
-CONFIG_SND_SOC_PCM3008=m
-CONFIG_SND_SOC_SPDIF=m
-CONFIG_SND_SOC_SSM2602=m
-CONFIG_SND_SOC_TLV320AIC23=m
-CONFIG_SND_SOC_TLV320AIC26=m
-CONFIG_SND_SOC_TLV320AIC3X=m
-CONFIG_SND_SOC_UDA134X=m
-CONFIG_SND_SOC_UDA1380=m
-CONFIG_SND_SOC_WM8400=m
-CONFIG_SND_SOC_WM8510=m
-CONFIG_SND_SOC_WM8523=m
-CONFIG_SND_SOC_WM8580=m
-CONFIG_SND_SOC_WM8728=m
-CONFIG_SND_SOC_WM8731=m
-CONFIG_SND_SOC_WM8750=m
-CONFIG_SND_SOC_WM8753=m
-CONFIG_SND_SOC_WM8776=m
-CONFIG_SND_SOC_WM8900=m
-CONFIG_SND_SOC_WM8903=m
-CONFIG_SND_SOC_WM8940=m
-CONFIG_SND_SOC_WM8960=m
-CONFIG_SND_SOC_WM8961=m
-CONFIG_SND_SOC_WM8971=m
-CONFIG_SND_SOC_WM8974=m
-CONFIG_SND_SOC_WM8988=m
-CONFIG_SND_SOC_WM8990=m
-CONFIG_SND_SOC_WM8993=m
-CONFIG_SND_SOC_WM9081=m
-CONFIG_SND_SOC_MAX9877=m
-# CONFIG_SOUND_PRIME is not set
-CONFIG_AC97_BUS=m
-CONFIG_HID_SUPPORT=y
-CONFIG_HID=m
-CONFIG_HIDRAW=y
-
-#
-# USB Input Devices
-#
-CONFIG_USB_HID=m
-# CONFIG_HID_PID is not set
-# CONFIG_USB_HIDDEV is not set
-
-#
-# USB HID Boot Protocol drivers
-#
-CONFIG_USB_KBD=m
-CONFIG_USB_MOUSE=m
-
-#
-# Special HID drivers
-#
-# CONFIG_HID_A4TECH is not set
-# CONFIG_HID_APPLE is not set
-# CONFIG_HID_BELKIN is not set
-# CONFIG_HID_CHERRY is not set
-# CONFIG_HID_CHICONY is not set
-# CONFIG_HID_CYPRESS is not set
-# CONFIG_HID_DRAGONRISE is not set
-# CONFIG_HID_EZKEY is not set
-# CONFIG_HID_KYE is not set
-# CONFIG_HID_GYRATION is not set
-# CONFIG_HID_TWINHAN is not set
-# CONFIG_HID_KENSINGTON is not set
-# CONFIG_HID_LOGITECH is not set
-# CONFIG_HID_MICROSOFT is not set
-# CONFIG_HID_MONTEREY is not set
-# CONFIG_HID_NTRIG is not set
-# CONFIG_HID_PANTHERLORD is not set
-# CONFIG_HID_PETALYNX is not set
-# CONFIG_HID_SAMSUNG is not set
-# CONFIG_HID_SONY is not set
-# CONFIG_HID_SUNPLUS is not set
-# CONFIG_HID_GREENASIA is not set
-# CONFIG_HID_SMARTJOYPLUS is not set
-# CONFIG_HID_TOPSEED is not set
-# CONFIG_HID_THRUSTMASTER is not set
-# CONFIG_HID_WACOM is not set
-# CONFIG_HID_ZEROPLUS is not set
-CONFIG_USB_SUPPORT=y
-CONFIG_USB_ARCH_HAS_HCD=y
-CONFIG_USB_ARCH_HAS_OHCI=y
-CONFIG_USB_ARCH_HAS_EHCI=y
-CONFIG_USB=m
-# CONFIG_USB_DEBUG is not set
-CONFIG_USB_ANNOUNCE_NEW_DEVICES=y
-
-#
-# Miscellaneous USB options
-#
-CONFIG_USB_DEVICEFS=y
-CONFIG_USB_DEVICE_CLASS=y
-# CONFIG_USB_DYNAMIC_MINORS is not set
-# CONFIG_USB_SUSPEND is not set
-# CONFIG_USB_OTG is not set
-# CONFIG_USB_OTG_WHITELIST is not set
-# CONFIG_USB_OTG_BLACKLIST_HUB is not set
-CONFIG_USB_MON=m
-CONFIG_USB_WUSB=m
-CONFIG_USB_WUSB_CBAF=m
-# CONFIG_USB_WUSB_CBAF_DEBUG is not set
-
-#
-# USB Host Controller Drivers
-#
-CONFIG_USB_C67X00_HCD=m
-CONFIG_USB_XHCI_HCD=m
-# CONFIG_USB_XHCI_HCD_DEBUGGING is not set
-CONFIG_USB_EHCI_HCD=m
-# CONFIG_USB_EHCI_ROOT_HUB_TT is not set
-# CONFIG_USB_EHCI_TT_NEWSCHED is not set
-CONFIG_USB_OXU210HP_HCD=m
-CONFIG_USB_ISP116X_HCD=m
-CONFIG_USB_ISP1760_HCD=m
-CONFIG_USB_ISP1362_HCD=m
-CONFIG_USB_OHCI_HCD=m
-CONFIG_USB_OHCI_HCD_SSB=y
-# CONFIG_USB_OHCI_BIG_ENDIAN_DESC is not set
-# CONFIG_USB_OHCI_BIG_ENDIAN_MMIO is not set
-CONFIG_USB_OHCI_LITTLE_ENDIAN=y
-CONFIG_USB_UHCI_HCD=m
-CONFIG_USB_U132_HCD=m
-CONFIG_USB_SL811_HCD=m
-CONFIG_USB_SL811_CS=m
-CONFIG_USB_R8A66597_HCD=m
-CONFIG_USB_WHCI_HCD=m
-CONFIG_USB_HWA_HCD=m
-
-#
-# Enable Host or Gadget support to see Inventra options
-#
-
-#
-# USB Device Class drivers
-#
-CONFIG_USB_ACM=m
-CONFIG_USB_PRINTER=m
-CONFIG_USB_WDM=m
-CONFIG_USB_TMC=m
-
-#
-# NOTE: USB_STORAGE depends on SCSI but BLK_DEV_SD may
-#
-
-#
-# also be needed; see USB_STORAGE Help for more info
-#
-CONFIG_USB_STORAGE=m
-# CONFIG_USB_STORAGE_DEBUG is not set
-CONFIG_USB_STORAGE_DATAFAB=m
-CONFIG_USB_STORAGE_FREECOM=m
-CONFIG_USB_STORAGE_ISD200=m
-CONFIG_USB_STORAGE_USBAT=m
-CONFIG_USB_STORAGE_SDDR09=m
-CONFIG_USB_STORAGE_SDDR55=m
-CONFIG_USB_STORAGE_JUMPSHOT=m
-CONFIG_USB_STORAGE_ALAUDA=m
-CONFIG_USB_STORAGE_ONETOUCH=m
-CONFIG_USB_STORAGE_KARMA=m
-CONFIG_USB_STORAGE_CYPRESS_ATACB=m
-CONFIG_USB_LIBUSUAL=y
-
-#
-# USB Imaging devices
-#
-# CONFIG_USB_MDC800 is not set
-# CONFIG_USB_MICROTEK is not set
-
-#
-# USB port drivers
-#
-CONFIG_USB_USS720=m
-CONFIG_USB_SERIAL=m
-CONFIG_USB_EZUSB=y
-CONFIG_USB_SERIAL_GENERIC=y
-CONFIG_USB_SERIAL_AIRCABLE=m
-CONFIG_USB_SERIAL_ARK3116=m
-CONFIG_USB_SERIAL_BELKIN=m
-CONFIG_USB_SERIAL_CH341=m
-CONFIG_USB_SERIAL_WHITEHEAT=m
-CONFIG_USB_SERIAL_DIGI_ACCELEPORT=m
-CONFIG_USB_SERIAL_CP210X=m
-CONFIG_USB_SERIAL_CYPRESS_M8=m
-CONFIG_USB_SERIAL_EMPEG=m
-CONFIG_USB_SERIAL_FTDI_SIO=m
-CONFIG_USB_SERIAL_FUNSOFT=m
-CONFIG_USB_SERIAL_VISOR=m
-CONFIG_USB_SERIAL_IPAQ=m
-CONFIG_USB_SERIAL_IR=m
-CONFIG_USB_SERIAL_EDGEPORT=m
-CONFIG_USB_SERIAL_EDGEPORT_TI=m
-CONFIG_USB_SERIAL_GARMIN=m
-CONFIG_USB_SERIAL_IPW=m
-CONFIG_USB_SERIAL_IUU=m
-CONFIG_USB_SERIAL_KEYSPAN_PDA=m
-CONFIG_USB_SERIAL_KEYSPAN=m
-CONFIG_USB_SERIAL_KLSI=m
-CONFIG_USB_SERIAL_KOBIL_SCT=m
-CONFIG_USB_SERIAL_MCT_U232=m
-CONFIG_USB_SERIAL_MOS7720=m
-CONFIG_USB_SERIAL_MOS7840=m
-CONFIG_USB_SERIAL_MOTOROLA=m
-CONFIG_USB_SERIAL_NAVMAN=m
-CONFIG_USB_SERIAL_PL2303=m
-CONFIG_USB_SERIAL_OTI6858=m
-CONFIG_USB_SERIAL_QUALCOMM=m
-CONFIG_USB_SERIAL_SPCP8X5=m
-CONFIG_USB_SERIAL_HP4X=m
-CONFIG_USB_SERIAL_SAFE=m
-CONFIG_USB_SERIAL_SAFE_PADDED=y
-CONFIG_USB_SERIAL_SIEMENS_MPI=m
-CONFIG_USB_SERIAL_SIERRAWIRELESS=m
-CONFIG_USB_SERIAL_SYMBOL=m
-CONFIG_USB_SERIAL_TI=m
-CONFIG_USB_SERIAL_CYBERJACK=m
-CONFIG_USB_SERIAL_XIRCOM=m
-CONFIG_USB_SERIAL_OPTION=m
-CONFIG_USB_SERIAL_OMNINET=m
-CONFIG_USB_SERIAL_OPTICON=m
-CONFIG_USB_SERIAL_DEBUG=m
-
-#
-# USB Miscellaneous drivers
-#
-CONFIG_USB_EMI62=m
-CONFIG_USB_EMI26=m
-CONFIG_USB_ADUTUX=m
-CONFIG_USB_SEVSEG=m
-CONFIG_USB_RIO500=m
-# CONFIG_USB_LEGOTOWER is not set
-CONFIG_USB_LCD=m
-# CONFIG_USB_BERRY_CHARGE is not set
-CONFIG_USB_LED=m
-CONFIG_USB_CYPRESS_CY7C63=m
-CONFIG_USB_CYTHERM=m
-CONFIG_USB_IDMOUSE=m
-CONFIG_USB_FTDI_ELAN=m
-# CONFIG_USB_APPLEDISPLAY is not set
-CONFIG_USB_SISUSBVGA=m
-CONFIG_USB_SISUSBVGA_CON=y
-CONFIG_USB_LD=m
-# CONFIG_USB_TRANCEVIBRATOR is not set
-CONFIG_USB_IOWARRIOR=m
-CONFIG_USB_TEST=m
-CONFIG_USB_ISIGHTFW=m
-# CONFIG_USB_VST is not set
-CONFIG_USB_ATM=m
-CONFIG_USB_SPEEDTOUCH=m
-CONFIG_USB_CXACRU=m
-CONFIG_USB_UEAGLEATM=m
-CONFIG_USB_XUSBATM=m
-# CONFIG_USB_GADGET is not set
-
-#
-# OTG and related infrastructure
-#
-CONFIG_USB_OTG_UTILS=y
-CONFIG_USB_GPIO_VBUS=m
-CONFIG_NOP_USB_XCEIV=m
-CONFIG_UWB=m
-CONFIG_UWB_HWA=m
-CONFIG_UWB_WHCI=m
-CONFIG_UWB_WLP=m
-CONFIG_UWB_I1480U=m
-CONFIG_UWB_I1480U_WLP=m
-CONFIG_MMC=m
-# CONFIG_MMC_DEBUG is not set
-# CONFIG_MMC_UNSAFE_RESUME is not set
-
-#
-# MMC/SD/SDIO Card Drivers
-#
-CONFIG_MMC_BLOCK=m
-CONFIG_MMC_BLOCK_BOUNCE=y
-CONFIG_SDIO_UART=m
-CONFIG_MMC_TEST=m
-
-#
-# MMC/SD/SDIO Host Controller Drivers
-#
-CONFIG_MMC_SDHCI=m
-CONFIG_MMC_SDHCI_PCI=m
-CONFIG_MMC_RICOH_MMC=m
-CONFIG_MMC_SDHCI_PLTFM=m
-CONFIG_MMC_WBSD=m
-# CONFIG_MMC_AT91 is not set
-# CONFIG_MMC_ATMELMCI is not set
-CONFIG_MMC_TIFM_SD=m
-# CONFIG_MMC_SPI is not set
-CONFIG_MMC_SDRICOH_CS=m
-CONFIG_MMC_CB710=m
-CONFIG_MMC_VIA_SDMMC=m
-CONFIG_MEMSTICK=m
-# CONFIG_MEMSTICK_DEBUG is not set
-
-#
-# MemoryStick drivers
-#
-# CONFIG_MEMSTICK_UNSAFE_RESUME is not set
-CONFIG_MSPRO_BLOCK=m
-
-#
-# MemoryStick Host Controller Drivers
-#
-CONFIG_MEMSTICK_TIFM_MS=m
-CONFIG_MEMSTICK_JMICRON_38X=m
-CONFIG_NEW_LEDS=y
-CONFIG_LEDS_CLASS=m
-
-#
-# LED drivers
-#
-CONFIG_LEDS_ALIX2=m
-CONFIG_LEDS_PCA9532=m
-CONFIG_LEDS_GPIO=m
-CONFIG_LEDS_GPIO_PLATFORM=y
-CONFIG_LEDS_LP3944=m
-CONFIG_LEDS_CLEVO_MAIL=m
-CONFIG_LEDS_PCA955X=m
-CONFIG_LEDS_DAC124S085=m
-CONFIG_LEDS_BD2802=m
-
-#
-# LED Triggers
-#
-CONFIG_LEDS_TRIGGERS=y
-CONFIG_LEDS_TRIGGER_TIMER=m
-CONFIG_LEDS_TRIGGER_HEARTBEAT=m
-CONFIG_LEDS_TRIGGER_BACKLIGHT=m
-CONFIG_LEDS_TRIGGER_GPIO=m
-CONFIG_LEDS_TRIGGER_DEFAULT_ON=m
-
-#
-# iptables trigger is under Netfilter config (LED target)
-#
-CONFIG_ACCESSIBILITY=y
-# CONFIG_A11Y_BRAILLE_CONSOLE is not set
-CONFIG_INFINIBAND=m
-CONFIG_INFINIBAND_USER_MAD=m
-CONFIG_INFINIBAND_USER_ACCESS=m
-CONFIG_INFINIBAND_USER_MEM=y
-CONFIG_INFINIBAND_ADDR_TRANS=y
-CONFIG_INFINIBAND_MTHCA=m
-# CONFIG_INFINIBAND_MTHCA_DEBUG is not set
-# CONFIG_INFINIBAND_IPATH is not set
-CONFIG_INFINIBAND_AMSO1100=m
-# CONFIG_INFINIBAND_AMSO1100_DEBUG is not set
-CONFIG_INFINIBAND_CXGB3=m
-# CONFIG_INFINIBAND_CXGB3_DEBUG is not set
-CONFIG_MLX4_INFINIBAND=m
-CONFIG_INFINIBAND_NES=m
-# CONFIG_INFINIBAND_NES_DEBUG is not set
-CONFIG_INFINIBAND_IPOIB=m
-# CONFIG_INFINIBAND_IPOIB_CM is not set
-# CONFIG_INFINIBAND_IPOIB_DEBUG is not set
-CONFIG_INFINIBAND_SRP=m
-CONFIG_INFINIBAND_ISER=m
-# CONFIG_EDAC is not set
-CONFIG_RTC_LIB=m
-CONFIG_RTC_CLASS=m
-
-#
-# RTC interfaces
-#
-CONFIG_RTC_INTF_SYSFS=y
-CONFIG_RTC_INTF_PROC=y
-CONFIG_RTC_INTF_DEV=y
-CONFIG_RTC_INTF_DEV_UIE_EMUL=y
-CONFIG_RTC_DRV_TEST=m
-
-#
-# I2C RTC drivers
-#
-CONFIG_RTC_DRV_DS1307=m
-CONFIG_RTC_DRV_DS1374=m
-CONFIG_RTC_DRV_DS1672=m
-CONFIG_RTC_DRV_MAX6900=m
-CONFIG_RTC_DRV_RS5C372=m
-CONFIG_RTC_DRV_ISL1208=m
-CONFIG_RTC_DRV_X1205=m
-CONFIG_RTC_DRV_PCF8563=m
-CONFIG_RTC_DRV_PCF8583=m
-CONFIG_RTC_DRV_M41T80=m
-CONFIG_RTC_DRV_M41T80_WDT=y
-CONFIG_RTC_DRV_S35390A=m
-CONFIG_RTC_DRV_FM3130=m
-CONFIG_RTC_DRV_RX8581=m
-CONFIG_RTC_DRV_RX8025=m
-
-#
-# SPI RTC drivers
-#
-CONFIG_RTC_DRV_M41T94=m
-CONFIG_RTC_DRV_DS1305=m
-CONFIG_RTC_DRV_DS1390=m
-CONFIG_RTC_DRV_MAX6902=m
-CONFIG_RTC_DRV_R9701=m
-CONFIG_RTC_DRV_RS5C348=m
-CONFIG_RTC_DRV_DS3234=m
-CONFIG_RTC_DRV_PCF2123=m
-
-#
-# Platform RTC drivers
-#
-CONFIG_RTC_DRV_CMOS=m
-CONFIG_RTC_DRV_DS1286=m
-CONFIG_RTC_DRV_DS1511=m
-CONFIG_RTC_DRV_DS1553=m
-CONFIG_RTC_DRV_DS1742=m
-CONFIG_RTC_DRV_STK17TA8=m
-CONFIG_RTC_DRV_M48T86=m
-CONFIG_RTC_DRV_M48T35=m
-CONFIG_RTC_DRV_M48T59=m
-CONFIG_RTC_DRV_BQ4802=m
-CONFIG_RTC_DRV_V3020=m
-CONFIG_RTC_DRV_PCF50633=m
-
-#
-# on-CPU RTC drivers
-#
-CONFIG_DMADEVICES=y
-
-#
-# DMA Devices
-#
-CONFIG_ASYNC_TX_DISABLE_CHANNEL_SWITCH=y
-CONFIG_INTEL_IOATDMA=m
-CONFIG_DMA_ENGINE=y
-
-#
-# DMA Clients
-#
-CONFIG_NET_DMA=y
-# CONFIG_ASYNC_TX_DMA is not set
-CONFIG_DMATEST=m
-CONFIG_DCA=m
-CONFIG_AUXDISPLAY=y
-CONFIG_KS0108=m
-CONFIG_KS0108_PORT=0x378
-CONFIG_KS0108_DELAY=2
-CONFIG_CFAG12864B=m
-CONFIG_CFAG12864B_RATE=20
-CONFIG_UIO=m
-CONFIG_UIO_CIF=m
-CONFIG_UIO_PDRV=m
-CONFIG_UIO_PDRV_GENIRQ=m
-# CONFIG_UIO_SMX is not set
-CONFIG_UIO_AEC=m
-CONFIG_UIO_SERCOS3=m
-# CONFIG_UIO_PCI_GENERIC is not set
-
-#
-# TI VLYNQ
-#
-CONFIG_XEN_BALLOON=y
-CONFIG_XEN_SCRUB_PAGES=y
-CONFIG_XEN_DEV_EVTCHN=y
-CONFIG_XEN_BACKEND=y
-# CONFIG_XEN_NETDEV_BACKEND is not set
-# CONFIG_XEN_BLKDEV_BACKEND is not set
-# CONFIG_XEN_BLKDEV_TAP is not set
-CONFIG_XEN_PCIDEV_BACKEND=y
-CONFIG_XEN_PCIDEV_BACKEND_VPCI=y
-# CONFIG_XEN_PCIDEV_BACKEND_PASS is not set
-# CONFIG_XEN_PCIDEV_BACKEND_SLOT is not set
-# CONFIG_XEN_PCIDEV_BACKEND_CONTROLLER is not set
-# CONFIG_XEN_PCIDEV_BE_DEBUG is not set
-CONFIG_XENFS=y
-CONFIG_XEN_COMPAT_XENFS=y
-CONFIG_XEN_SYS_HYPERVISOR=y
-CONFIG_XEN_XENBUS_FRONTEND=y
-# CONFIG_XEN_GNTDEV is not set
-CONFIG_XEN_S3=y
-CONFIG_ACPI_PROCESSOR_XEN=m
-CONFIG_XEN_PLATFORM_PCI=m
-CONFIG_STAGING=y
-# CONFIG_STAGING_EXCLUDE_BUILD is not set
-# CONFIG_ET131X is not set
-# CONFIG_SLICOSS is not set
-# CONFIG_VIDEO_GO7007 is not set
-# CONFIG_VIDEO_CX25821 is not set
-# CONFIG_USB_IP_COMMON is not set
-# CONFIG_W35UND is not set
-# CONFIG_PRISM2_USB is not set
-# CONFIG_ECHO is not set
-# CONFIG_POCH is not set
-# CONFIG_OTUS is not set
-# CONFIG_RT2860 is not set
-# CONFIG_RT2870 is not set
-# CONFIG_RT3090 is not set
-# CONFIG_COMEDI is not set
-# CONFIG_ASUS_OLED is not set
-# CONFIG_PANEL is not set
-# CONFIG_ALTERA_PCIE_CHDMA is not set
-# CONFIG_RTL8187SE is not set
-# CONFIG_RTL8192SU is not set
-# CONFIG_RTL8192E is not set
-# CONFIG_TRANZPORT is not set
-
-#
-# Android
-#
-
-#
-# Qualcomm MSM Camera And Video
-#
-
-#
-# Camera Sensor Selection
-#
-# CONFIG_INPUT_GPIO is not set
-# CONFIG_DST is not set
-# CONFIG_POHMELFS is not set
-# CONFIG_B3DFG is not set
-# CONFIG_IDE_PHISON is not set
-# CONFIG_PLAN9AUTH is not set
-# CONFIG_LINE6_USB is not set
-# CONFIG_DRM_RADEON_KMS is not set
-# CONFIG_USB_SERIAL_QUATECH2 is not set
-# CONFIG_USB_SERIAL_QUATECH_USB2 is not set
-# CONFIG_VT6655 is not set
-# CONFIG_VT6656 is not set
-# CONFIG_FB_UDL is not set
-CONFIG_HYPERV=m
-CONFIG_HYPERV_STORAGE=m
-CONFIG_HYPERV_BLOCK=m
-CONFIG_HYPERV_NET=m
-# CONFIG_VME_BUS is not set
-
-#
-# RAR Register Driver
-#
-# CONFIG_RAR_REGISTER is not set
-# CONFIG_IIO is not set
-CONFIG_X86_PLATFORM_DEVICES=y
-CONFIG_ACER_WMI=m
-CONFIG_ASUS_LAPTOP=m
-CONFIG_DELL_WMI=m
-CONFIG_FUJITSU_LAPTOP=m
-# CONFIG_FUJITSU_LAPTOP_DEBUG is not set
-CONFIG_HP_WMI=m
-CONFIG_MSI_LAPTOP=m
-CONFIG_PANASONIC_LAPTOP=m
-CONFIG_COMPAL_LAPTOP=m
-CONFIG_SONY_LAPTOP=m
-# CONFIG_SONYPI_COMPAT is not set
-CONFIG_THINKPAD_ACPI=m
-# CONFIG_THINKPAD_ACPI_DEBUGFACILITIES is not set
-# CONFIG_THINKPAD_ACPI_DEBUG is not set
-# CONFIG_THINKPAD_ACPI_UNSAFE_LEDS is not set
-CONFIG_THINKPAD_ACPI_VIDEO=y
-CONFIG_THINKPAD_ACPI_HOTKEY_POLL=y
-CONFIG_INTEL_MENLOW=m
-CONFIG_EEEPC_LAPTOP=m
-CONFIG_ACPI_WMI=m
-CONFIG_ACPI_ASUS=m
-# CONFIG_TOPSTAR_LAPTOP is not set
-CONFIG_ACPI_TOSHIBA=m
-
-#
-# Firmware Drivers
-#
-CONFIG_EDD=m
-# CONFIG_EDD_OFF is not set
-CONFIG_FIRMWARE_MEMMAP=y
-CONFIG_DELL_RBU=m
-CONFIG_DCDBAS=m
-CONFIG_DMIID=y
-# CONFIG_ISCSI_IBFT_FIND is not set
-
-#
-# File systems
-#
-CONFIG_EXT2_FS=m
-CONFIG_EXT2_FS_XATTR=y
-CONFIG_EXT2_FS_POSIX_ACL=y
-CONFIG_EXT2_FS_SECURITY=y
-CONFIG_EXT2_FS_XIP=y
-CONFIG_EXT3_FS=m
-# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set
-CONFIG_EXT3_FS_XATTR=y
-CONFIG_EXT3_FS_POSIX_ACL=y
-CONFIG_EXT3_FS_SECURITY=y
-CONFIG_EXT4_FS=m
-CONFIG_EXT4_FS_XATTR=y
-CONFIG_EXT4_FS_POSIX_ACL=y
-CONFIG_EXT4_FS_SECURITY=y
-# CONFIG_EXT4_DEBUG is not set
-CONFIG_FS_XIP=y
-CONFIG_JBD=m
-# CONFIG_JBD_DEBUG is not set
-CONFIG_JBD2=m
-# CONFIG_JBD2_DEBUG is not set
-CONFIG_FS_MBCACHE=m
-CONFIG_REISERFS_FS=m
-# CONFIG_REISERFS_CHECK is not set
-CONFIG_REISERFS_PROC_INFO=y
-CONFIG_REISERFS_FS_XATTR=y
-CONFIG_REISERFS_FS_POSIX_ACL=y
-# CONFIG_REISERFS_FS_SECURITY is not set
-CONFIG_JFS_FS=m
-CONFIG_JFS_POSIX_ACL=y
-CONFIG_JFS_SECURITY=y
-# CONFIG_JFS_DEBUG is not set
-CONFIG_JFS_STATISTICS=y
-CONFIG_FS_POSIX_ACL=y
-CONFIG_XFS_FS=m
-CONFIG_XFS_QUOTA=y
-CONFIG_XFS_POSIX_ACL=y
-CONFIG_XFS_RT=y
-# CONFIG_XFS_DEBUG is not set
-CONFIG_GFS2_FS=m
-CONFIG_GFS2_FS_LOCKING_DLM=y
-CONFIG_OCFS2_FS=m
-CONFIG_OCFS2_FS_O2CB=m
-CONFIG_OCFS2_FS_USERSPACE_CLUSTER=m
-CONFIG_OCFS2_FS_STATS=y
-CONFIG_OCFS2_DEBUG_MASKLOG=y
-# CONFIG_OCFS2_DEBUG_FS is not set
-# CONFIG_OCFS2_FS_POSIX_ACL is not set
-CONFIG_BTRFS_FS=m
-CONFIG_BTRFS_FS_POSIX_ACL=y
-CONFIG_NILFS2_FS=m
-CONFIG_FILE_LOCKING=y
-CONFIG_FSNOTIFY=y
-# CONFIG_DNOTIFY is not set
-CONFIG_INOTIFY=y
-CONFIG_INOTIFY_USER=y
-CONFIG_QUOTA=y
-CONFIG_QUOTA_NETLINK_INTERFACE=y
-# CONFIG_PRINT_QUOTA_WARNING is not set
-CONFIG_QUOTA_TREE=m
-CONFIG_QFMT_V1=m
-CONFIG_QFMT_V2=m
-CONFIG_QUOTACTL=y
-CONFIG_AUTOFS_FS=m
-CONFIG_AUTOFS4_FS=m
-CONFIG_FUSE_FS=m
-# CONFIG_CUSE is not set
-
-#
-# Caches
-#
-CONFIG_FSCACHE=m
-CONFIG_FSCACHE_STATS=y
-CONFIG_FSCACHE_HISTOGRAM=y
-# CONFIG_FSCACHE_DEBUG is not set
-# CONFIG_FSCACHE_OBJECT_LIST is not set
-CONFIG_CACHEFILES=m
-# CONFIG_CACHEFILES_DEBUG is not set
-# CONFIG_CACHEFILES_HISTOGRAM is not set
-
-#
-# CD-ROM/DVD Filesystems
-#
-CONFIG_ISO9660_FS=m
-CONFIG_JOLIET=y
-CONFIG_ZISOFS=y
-CONFIG_UDF_FS=m
-CONFIG_UDF_NLS=y
-
-#
-# DOS/FAT/NT Filesystems
-#
-CONFIG_FAT_FS=m
-CONFIG_MSDOS_FS=m
-CONFIG_VFAT_FS=m
-CONFIG_FAT_DEFAULT_CODEPAGE=437
-CONFIG_FAT_DEFAULT_IOCHARSET="iso8859-1"
-CONFIG_NTFS_FS=m
-# CONFIG_NTFS_DEBUG is not set
-CONFIG_NTFS_RW=y
-
-#
-# Pseudo filesystems
-#
-CONFIG_PROC_FS=y
-# CONFIG_PROC_KCORE is not set
-CONFIG_PROC_SYSCTL=y
-CONFIG_PROC_PAGE_MONITOR=y
-CONFIG_SYSFS=y
-CONFIG_TMPFS=y
-# CONFIG_TMPFS_POSIX_ACL is not set
-# CONFIG_HUGETLBFS is not set
-# CONFIG_HUGETLB_PAGE is not set
-CONFIG_CONFIGFS_FS=m
-CONFIG_MISC_FILESYSTEMS=y
-# CONFIG_ADFS_FS is not set
-# CONFIG_AFFS_FS is not set
-CONFIG_ECRYPT_FS=m
-CONFIG_HFS_FS=m
-CONFIG_HFSPLUS_FS=m
-# CONFIG_BEFS_FS is not set
-# CONFIG_BFS_FS is not set
-CONFIG_EFS_FS=m
-CONFIG_JFFS2_FS=m
-CONFIG_JFFS2_FS_DEBUG=0
-CONFIG_JFFS2_FS_WRITEBUFFER=y
-# CONFIG_JFFS2_FS_WBUF_VERIFY is not set
-CONFIG_JFFS2_SUMMARY=y
-CONFIG_JFFS2_FS_XATTR=y
-CONFIG_JFFS2_FS_POSIX_ACL=y
-CONFIG_JFFS2_FS_SECURITY=y
-CONFIG_JFFS2_COMPRESSION_OPTIONS=y
-CONFIG_JFFS2_ZLIB=y
-CONFIG_JFFS2_LZO=y
-CONFIG_JFFS2_RTIME=y
-CONFIG_JFFS2_RUBIN=y
-# CONFIG_JFFS2_CMODE_NONE is not set
-CONFIG_JFFS2_CMODE_PRIORITY=y
-# CONFIG_JFFS2_CMODE_SIZE is not set
-# CONFIG_JFFS2_CMODE_FAVOURLZO is not set
-CONFIG_UBIFS_FS=m
-# CONFIG_UBIFS_FS_XATTR is not set
-# CONFIG_UBIFS_FS_ADVANCED_COMPR is not set
-CONFIG_UBIFS_FS_LZO=y
-CONFIG_UBIFS_FS_ZLIB=y
-# CONFIG_UBIFS_FS_DEBUG is not set
-CONFIG_CRAMFS=m
-CONFIG_SQUASHFS=m
-# CONFIG_SQUASHFS_EMBEDDED is not set
-CONFIG_SQUASHFS_FRAGMENT_CACHE_SIZE=3
-# CONFIG_VXFS_FS is not set
-CONFIG_MINIX_FS=m
-CONFIG_OMFS_FS=m
-CONFIG_HPFS_FS=m
-# CONFIG_QNX4FS_FS is not set
-CONFIG_ROMFS_FS=m
-CONFIG_ROMFS_BACKED_BY_BLOCK=y
-# CONFIG_ROMFS_BACKED_BY_MTD is not set
-# CONFIG_ROMFS_BACKED_BY_BOTH is not set
-CONFIG_ROMFS_ON_BLOCK=y
-CONFIG_SYSV_FS=m
-CONFIG_UFS_FS=m
-# CONFIG_UFS_FS_WRITE is not set
-# CONFIG_UFS_DEBUG is not set
-CONFIG_EXOFS_FS=m
-# CONFIG_EXOFS_DEBUG is not set
-CONFIG_NETWORK_FILESYSTEMS=y
-CONFIG_NFS_FS=m
-CONFIG_NFS_V3=y
-# CONFIG_NFS_V3_ACL is not set
-CONFIG_NFS_V4=y
-# CONFIG_NFS_V4_1 is not set
-# CONFIG_NFS_FSCACHE is not set
-CONFIG_NFSD=m
-CONFIG_NFSD_V3=y
-# CONFIG_NFSD_V3_ACL is not set
-CONFIG_NFSD_V4=y
-CONFIG_LOCKD=m
-CONFIG_LOCKD_V4=y
-CONFIG_EXPORTFS=m
-CONFIG_NFS_COMMON=y
-CONFIG_SUNRPC=m
-CONFIG_SUNRPC_GSS=m
-CONFIG_SUNRPC_XPRT_RDMA=m
-CONFIG_RPCSEC_GSS_KRB5=m
-# CONFIG_RPCSEC_GSS_SPKM3 is not set
-# CONFIG_SMB_FS is not set
-CONFIG_CIFS=m
-# CONFIG_CIFS_STATS is not set
-# CONFIG_CIFS_WEAK_PW_HASH is not set
-# CONFIG_CIFS_UPCALL is not set
-CONFIG_CIFS_XATTR=y
-CONFIG_CIFS_POSIX=y
-# CONFIG_CIFS_DEBUG2 is not set
-CONFIG_CIFS_DFS_UPCALL=y
-CONFIG_CIFS_EXPERIMENTAL=y
-# CONFIG_NCP_FS is not set
-# CONFIG_CODA_FS is not set
-# CONFIG_AFS_FS is not set
-# CONFIG_9P_FS is not set
-
-#
-# Partition Types
-#
-CONFIG_PARTITION_ADVANCED=y
-# CONFIG_ACORN_PARTITION is not set
-# CONFIG_OSF_PARTITION is not set
-# CONFIG_AMIGA_PARTITION is not set
-# CONFIG_ATARI_PARTITION is not set
-# CONFIG_MAC_PARTITION is not set
-CONFIG_MSDOS_PARTITION=y
-# CONFIG_BSD_DISKLABEL is not set
-# CONFIG_MINIX_SUBPARTITION is not set
-# CONFIG_SOLARIS_X86_PARTITION is not set
-# CONFIG_UNIXWARE_DISKLABEL is not set
-# CONFIG_LDM_PARTITION is not set
-# CONFIG_SGI_PARTITION is not set
-# CONFIG_ULTRIX_PARTITION is not set
-# CONFIG_SUN_PARTITION is not set
-# CONFIG_KARMA_PARTITION is not set
-CONFIG_EFI_PARTITION=y
-# CONFIG_SYSV68_PARTITION is not set
-CONFIG_NLS=m
-CONFIG_NLS_DEFAULT="iso8859-1"
-CONFIG_NLS_CODEPAGE_437=m
-CONFIG_NLS_CODEPAGE_737=m
-CONFIG_NLS_CODEPAGE_775=m
-CONFIG_NLS_CODEPAGE_850=m
-CONFIG_NLS_CODEPAGE_852=m
-CONFIG_NLS_CODEPAGE_855=m
-CONFIG_NLS_CODEPAGE_857=m
-CONFIG_NLS_CODEPAGE_860=m
-CONFIG_NLS_CODEPAGE_861=m
-CONFIG_NLS_CODEPAGE_862=m
-CONFIG_NLS_CODEPAGE_863=m
-CONFIG_NLS_CODEPAGE_864=m
-CONFIG_NLS_CODEPAGE_865=m
-CONFIG_NLS_CODEPAGE_866=m
-CONFIG_NLS_CODEPAGE_869=m
-CONFIG_NLS_CODEPAGE_936=m
-CONFIG_NLS_CODEPAGE_950=m
-CONFIG_NLS_CODEPAGE_932=m
-CONFIG_NLS_CODEPAGE_949=m
-CONFIG_NLS_CODEPAGE_874=m
-CONFIG_NLS_ISO8859_8=m
-CONFIG_NLS_CODEPAGE_1250=m
-CONFIG_NLS_CODEPAGE_1251=m
-CONFIG_NLS_ASCII=m
-CONFIG_NLS_ISO8859_1=m
-CONFIG_NLS_ISO8859_2=m
-CONFIG_NLS_ISO8859_3=m
-CONFIG_NLS_ISO8859_4=m
-CONFIG_NLS_ISO8859_5=m
-CONFIG_NLS_ISO8859_6=m
-CONFIG_NLS_ISO8859_7=m
-CONFIG_NLS_ISO8859_9=m
-CONFIG_NLS_ISO8859_13=m
-CONFIG_NLS_ISO8859_14=m
-CONFIG_NLS_ISO8859_15=m
-CONFIG_NLS_KOI8_R=m
-CONFIG_NLS_KOI8_U=m
-CONFIG_NLS_UTF8=m
-CONFIG_DLM=m
-# CONFIG_DLM_DEBUG is not set
-
-#
-# Kernel hacking
-#
-CONFIG_TRACE_IRQFLAGS_SUPPORT=y
-CONFIG_PRINTK_TIME=y
-CONFIG_ENABLE_WARN_DEPRECATED=y
-# CONFIG_ENABLE_MUST_CHECK is not set
-CONFIG_FRAME_WARN=1024
-# CONFIG_MAGIC_SYSRQ is not set
-# CONFIG_STRIP_ASM_SYMS is not set
-# CONFIG_UNUSED_SYMBOLS is not set
-CONFIG_DEBUG_FS=y
-# CONFIG_HEADERS_CHECK is not set
-# CONFIG_DEBUG_KERNEL is not set
-# CONFIG_DEBUG_BUGVERBOSE is not set
-# CONFIG_DEBUG_MEMORY_INIT is not set
-CONFIG_ARCH_WANT_FRAME_POINTERS=y
-CONFIG_FRAME_POINTER=y
-# CONFIG_RCU_CPU_STALL_DETECTOR is not set
-# CONFIG_LATENCYTOP is not set
-CONFIG_SYSCTL_SYSCALL_CHECK=y
-CONFIG_USER_STACKTRACE_SUPPORT=y
-CONFIG_HAVE_FUNCTION_TRACER=y
-CONFIG_HAVE_FUNCTION_GRAPH_TRACER=y
-CONFIG_HAVE_FUNCTION_GRAPH_FP_TEST=y
-CONFIG_HAVE_FUNCTION_TRACE_MCOUNT_TEST=y
-CONFIG_HAVE_DYNAMIC_FTRACE=y
-CONFIG_HAVE_FTRACE_MCOUNT_RECORD=y
-CONFIG_HAVE_SYSCALL_TRACEPOINTS=y
-CONFIG_RING_BUFFER=y
-CONFIG_RING_BUFFER_ALLOW_SWAP=y
-CONFIG_TRACING_SUPPORT=y
-# CONFIG_FTRACE is not set
-# CONFIG_PROVIDE_OHCI1394_DMA_INIT is not set
-# CONFIG_FIREWIRE_OHCI_REMOTE_DMA is not set
-# CONFIG_DYNAMIC_DEBUG is not set
-# CONFIG_DMA_API_DEBUG is not set
-# CONFIG_SAMPLES is not set
-CONFIG_HAVE_ARCH_KGDB=y
-CONFIG_HAVE_ARCH_KMEMCHECK=y
-CONFIG_STRICT_DEVMEM=y
-# CONFIG_X86_VERBOSE_BOOTUP is not set
-# CONFIG_EARLY_PRINTK is not set
-# CONFIG_IOMMU_STRESS is not set
-CONFIG_HAVE_MMIOTRACE_SUPPORT=y
-CONFIG_IO_DELAY_TYPE_0X80=0
-CONFIG_IO_DELAY_TYPE_0XED=1
-CONFIG_IO_DELAY_TYPE_UDELAY=2
-CONFIG_IO_DELAY_TYPE_NONE=3
-CONFIG_IO_DELAY_0X80=y
-# CONFIG_IO_DELAY_0XED is not set
-# CONFIG_IO_DELAY_UDELAY is not set
-# CONFIG_IO_DELAY_NONE is not set
-CONFIG_DEFAULT_IO_DELAY_TYPE=0
-# CONFIG_OPTIMIZE_INLINING is not set
-
-#
-# Security options
-#
-CONFIG_KEYS=y
-# CONFIG_KEYS_DEBUG_PROC_KEYS is not set
-CONFIG_SECURITY=y
-CONFIG_SECURITYFS=y
-# CONFIG_SECURITY_NETWORK is not set
-# CONFIG_SECURITY_PATH is not set
-# CONFIG_SECURITY_FILE_CAPABILITIES is not set
-# CONFIG_SECURITY_TOMOYO is not set
-# CONFIG_IMA is not set
-CONFIG_XOR_BLOCKS=m
-CONFIG_ASYNC_CORE=m
-CONFIG_ASYNC_MEMCPY=m
-CONFIG_ASYNC_XOR=m
-CONFIG_ASYNC_PQ=m
-CONFIG_ASYNC_RAID6_RECOV=m
-CONFIG_ASYNC_TX_DISABLE_PQ_VAL_DMA=y
-CONFIG_ASYNC_TX_DISABLE_XOR_VAL_DMA=y
-CONFIG_CRYPTO=y
-
-#
-# Crypto core or helper
-#
-CONFIG_CRYPTO_FIPS=y
-CONFIG_CRYPTO_ALGAPI=y
-CONFIG_CRYPTO_ALGAPI2=y
-CONFIG_CRYPTO_AEAD=m
-CONFIG_CRYPTO_AEAD2=y
-CONFIG_CRYPTO_BLKCIPHER=m
-CONFIG_CRYPTO_BLKCIPHER2=y
-CONFIG_CRYPTO_HASH=y
-CONFIG_CRYPTO_HASH2=y
-CONFIG_CRYPTO_RNG=m
-CONFIG_CRYPTO_RNG2=y
-CONFIG_CRYPTO_PCOMP=y
-CONFIG_CRYPTO_MANAGER=m
-CONFIG_CRYPTO_MANAGER2=y
-CONFIG_CRYPTO_GF128MUL=m
-CONFIG_CRYPTO_NULL=m
-CONFIG_CRYPTO_WORKQUEUE=y
-CONFIG_CRYPTO_CRYPTD=m
-CONFIG_CRYPTO_AUTHENC=m
-CONFIG_CRYPTO_TEST=m
-
-#
-# Authenticated Encryption with Associated Data
-#
-CONFIG_CRYPTO_CCM=m
-CONFIG_CRYPTO_GCM=m
-CONFIG_CRYPTO_SEQIV=m
-
-#
-# Block modes
-#
-CONFIG_CRYPTO_CBC=m
-CONFIG_CRYPTO_CTR=m
-CONFIG_CRYPTO_CTS=m
-CONFIG_CRYPTO_ECB=m
-CONFIG_CRYPTO_LRW=m
-CONFIG_CRYPTO_PCBC=m
-CONFIG_CRYPTO_XTS=m
-
-#
-# Hash modes
-#
-CONFIG_CRYPTO_HMAC=m
-CONFIG_CRYPTO_XCBC=m
-CONFIG_CRYPTO_VMAC=m
-
-#
-# Digest
-#
-CONFIG_CRYPTO_CRC32C=m
-CONFIG_CRYPTO_CRC32C_INTEL=m
-CONFIG_CRYPTO_GHASH=m
-CONFIG_CRYPTO_MD4=m
-CONFIG_CRYPTO_MD5=y
-CONFIG_CRYPTO_MICHAEL_MIC=m
-CONFIG_CRYPTO_RMD128=m
-CONFIG_CRYPTO_RMD160=m
-CONFIG_CRYPTO_RMD256=m
-CONFIG_CRYPTO_RMD320=m
-CONFIG_CRYPTO_SHA1=m
-CONFIG_CRYPTO_SHA256=y
-CONFIG_CRYPTO_SHA512=m
-CONFIG_CRYPTO_TGR192=m
-CONFIG_CRYPTO_WP512=m
-
-#
-# Ciphers
-#
-CONFIG_CRYPTO_AES=m
-# CONFIG_CRYPTO_AES_X86_64 is not set
-# CONFIG_CRYPTO_AES_NI_INTEL is not set
-CONFIG_CRYPTO_ANUBIS=m
-CONFIG_CRYPTO_ARC4=m
-CONFIG_CRYPTO_BLOWFISH=m
-CONFIG_CRYPTO_CAMELLIA=m
-CONFIG_CRYPTO_CAST5=m
-CONFIG_CRYPTO_CAST6=m
-CONFIG_CRYPTO_DES=m
-CONFIG_CRYPTO_FCRYPT=m
-CONFIG_CRYPTO_KHAZAD=m
-CONFIG_CRYPTO_SALSA20=m
-# CONFIG_CRYPTO_SALSA20_X86_64 is not set
-CONFIG_CRYPTO_SEED=m
-CONFIG_CRYPTO_SERPENT=m
-CONFIG_CRYPTO_TEA=m
-CONFIG_CRYPTO_TWOFISH=m
-CONFIG_CRYPTO_TWOFISH_COMMON=m
-# CONFIG_CRYPTO_TWOFISH_X86_64 is not set
-
-#
-# Compression
-#
-CONFIG_CRYPTO_DEFLATE=m
-CONFIG_CRYPTO_ZLIB=m
-CONFIG_CRYPTO_LZO=m
-
-#
-# Random Number Generation
-#
-CONFIG_CRYPTO_ANSI_CPRNG=m
-CONFIG_CRYPTO_HW=y
-CONFIG_CRYPTO_DEV_PADLOCK=m
-CONFIG_CRYPTO_DEV_PADLOCK_AES=m
-CONFIG_CRYPTO_DEV_PADLOCK_SHA=m
-CONFIG_CRYPTO_DEV_HIFN_795X=m
-CONFIG_CRYPTO_DEV_HIFN_795X_RNG=y
-CONFIG_HAVE_KVM=y
-CONFIG_HAVE_KVM_IRQCHIP=y
-CONFIG_HAVE_KVM_EVENTFD=y
-CONFIG_KVM_APIC_ARCHITECTURE=y
-CONFIG_VIRTUALIZATION=y
-CONFIG_KVM=m
-CONFIG_KVM_INTEL=m
-CONFIG_KVM_AMD=m
-CONFIG_VIRTIO=m
-CONFIG_VIRTIO_RING=m
-CONFIG_VIRTIO_PCI=m
-CONFIG_VIRTIO_BALLOON=m
-# CONFIG_BINARY_PRINTF is not set
-
-#
-# Library routines
-#
-CONFIG_BITREVERSE=y
-CONFIG_GENERIC_FIND_FIRST_BIT=y
-CONFIG_GENERIC_FIND_NEXT_BIT=y
-CONFIG_GENERIC_FIND_LAST_BIT=y
-CONFIG_CRC_CCITT=m
-CONFIG_CRC16=m
-CONFIG_CRC_T10DIF=m
-CONFIG_CRC_ITU_T=m
-CONFIG_CRC32=y
-CONFIG_CRC7=m
-CONFIG_LIBCRC32C=m
-CONFIG_ZLIB_INFLATE=y
-CONFIG_ZLIB_DEFLATE=m
-CONFIG_LZO_COMPRESS=m
-CONFIG_LZO_DECOMPRESS=m
-CONFIG_DECOMPRESS_GZIP=y
-CONFIG_DECOMPRESS_BZIP2=y
-CONFIG_DECOMPRESS_LZMA=y
-CONFIG_GENERIC_ALLOCATOR=y
-CONFIG_REED_SOLOMON=m
-CONFIG_REED_SOLOMON_DEC16=y
-CONFIG_TEXTSEARCH=y
-CONFIG_TEXTSEARCH_KMP=m
-CONFIG_TEXTSEARCH_BM=m
-CONFIG_TEXTSEARCH_FSM=m
-CONFIG_HAS_IOMEM=y
-CONFIG_HAS_IOPORT=y
-CONFIG_HAS_DMA=y
-CONFIG_CHECK_SIGNATURE=y
-CONFIG_NLATTR=y
diff --git a/testing/linux-xen0/pvops.patch b/testing/linux-xen0/pvops.patch
deleted file mode 100644
index 49969705b..000000000
--- a/testing/linux-xen0/pvops.patch
+++ /dev/null
@@ -1,37837 +0,0 @@
-diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
-index 5f6aa11..9ec8558 100644
---- a/Documentation/kernel-parameters.txt
-+++ b/Documentation/kernel-parameters.txt
-@@ -113,6 +113,7 @@ parameter is applicable:
- More X86-64 boot options can be found in
- Documentation/x86/x86_64/boot-options.txt .
- X86 Either 32bit or 64bit x86 (same as X86-32+X86-64)
-+ XEN Xen support is enabled
-
- In addition, the following text indicates that the option:
-
-@@ -2760,6 +2761,18 @@ and is between 256 and 4096 characters. It is defined in the file
- xd= [HW,XT] Original XT pre-IDE (RLL encoded) disks.
- xd_geo= See header of drivers/block/xd.c.
-
-+ xen_emul_unplug= [HW,X86,XEN]
-+ Unplug Xen emulated devices
-+ Format: [unplug0,][unplug1]
-+ ide-disks -- unplug primary master IDE devices
-+ aux-ide-disks -- unplug non-primary-master IDE devices
-+ nics -- unplug network devices
-+ all -- unplug all emulated devices (NICs and IDE disks)
-+ unnecessary -- unplugging emulated devices is
-+ unnecessary even if the host did not respond to
-+ the unplug protocol
-+ never -- do not unplug even if version check succeeds
-+
- xirc2ps_cs= [NET,PCMCIA]
- Format:
- <irq>,<irq_mask>,<io>,<full_duplex>,<do_sound>,<lockup_hack>[,<irq2>[,<irq3>[,<irq4>]]]
-diff --git a/Documentation/x86/x86_64/boot-options.txt b/Documentation/x86/x86_64/boot-options.txt
-index 29a6ff8..81f9b94 100644
---- a/Documentation/x86/x86_64/boot-options.txt
-+++ b/Documentation/x86/x86_64/boot-options.txt
-@@ -267,10 +267,14 @@ IOMMU (input/output memory management unit)
-
- iommu options only relevant to the software bounce buffering (SWIOTLB) IOMMU
- implementation:
-- swiotlb=<pages>[,force]
-+ swiotlb=[npages=<pages>]
-+ swiotlb=[force]
-+ swiotlb=[overflow=<size>]
-+
- <pages> Prereserve that many 128K pages for the software IO
- bounce buffering.
- force Force all IO through the software TLB.
-+ <size> Size in bytes of the overflow buffer.
-
- Settings for the IBM Calgary hardware IOMMU currently found in IBM
- pSeries and xSeries machines:
-diff --git a/arch/ia64/include/asm/dma-mapping.h b/arch/ia64/include/asm/dma-mapping.h
-index 8d3c79c..7d09a09 100644
---- a/arch/ia64/include/asm/dma-mapping.h
-+++ b/arch/ia64/include/asm/dma-mapping.h
-@@ -73,7 +73,7 @@ static inline bool dma_capable(struct device *dev, dma_addr_t addr, size_t size)
- if (!dev->dma_mask)
- return 0;
-
-- return addr + size <= *dev->dma_mask;
-+ return addr + size - 1 <= *dev->dma_mask;
- }
-
- static inline dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr)
-diff --git a/arch/ia64/include/asm/swiotlb.h b/arch/ia64/include/asm/swiotlb.h
-index dcbaea7..f0acde6 100644
---- a/arch/ia64/include/asm/swiotlb.h
-+++ b/arch/ia64/include/asm/swiotlb.h
-@@ -4,8 +4,6 @@
- #include <linux/dma-mapping.h>
- #include <linux/swiotlb.h>
-
--extern int swiotlb_force;
--
- #ifdef CONFIG_SWIOTLB
- extern int swiotlb;
- extern void pci_swiotlb_init(void);
-diff --git a/arch/ia64/include/asm/xen/events.h b/arch/ia64/include/asm/xen/events.h
-index b8370c8..baa74c8 100644
---- a/arch/ia64/include/asm/xen/events.h
-+++ b/arch/ia64/include/asm/xen/events.h
-@@ -36,10 +36,6 @@ static inline int xen_irqs_disabled(struct pt_regs *regs)
- return !(ia64_psr(regs)->i);
- }
-
--static inline void handle_irq(int irq, struct pt_regs *regs)
--{
-- __do_IRQ(irq);
--}
- #define irq_ctx_init(cpu) do { } while (0)
-
- #endif /* _ASM_IA64_XEN_EVENTS_H */
-diff --git a/arch/ia64/kernel/pci-swiotlb.c b/arch/ia64/kernel/pci-swiotlb.c
-index 285aae8..53292ab 100644
---- a/arch/ia64/kernel/pci-swiotlb.c
-+++ b/arch/ia64/kernel/pci-swiotlb.c
-@@ -41,7 +41,7 @@ struct dma_map_ops swiotlb_dma_ops = {
- void __init swiotlb_dma_init(void)
- {
- dma_ops = &swiotlb_dma_ops;
-- swiotlb_init();
-+ swiotlb_init(1);
- }
-
- void __init pci_swiotlb_init(void)
-@@ -51,7 +51,7 @@ void __init pci_swiotlb_init(void)
- swiotlb = 1;
- printk(KERN_INFO "PCI-DMA: Re-initialize machine vector.\n");
- machvec_init("dig");
-- swiotlb_init();
-+ swiotlb_init(1);
- dma_ops = &swiotlb_dma_ops;
- #else
- panic("Unable to find Intel IOMMU");
-diff --git a/arch/powerpc/include/asm/dma-mapping.h b/arch/powerpc/include/asm/dma-mapping.h
-index e281dae..80a973b 100644
---- a/arch/powerpc/include/asm/dma-mapping.h
-+++ b/arch/powerpc/include/asm/dma-mapping.h
-@@ -197,7 +197,7 @@ static inline bool dma_capable(struct device *dev, dma_addr_t addr, size_t size)
- if (!dev->dma_mask)
- return 0;
-
-- return addr + size <= *dev->dma_mask;
-+ return addr + size - 1 <= *dev->dma_mask;
- }
-
- static inline dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr)
-diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c
-index 53bcf3d..b152de3 100644
---- a/arch/powerpc/kernel/setup_32.c
-+++ b/arch/powerpc/kernel/setup_32.c
-@@ -345,7 +345,7 @@ void __init setup_arch(char **cmdline_p)
-
- #ifdef CONFIG_SWIOTLB
- if (ppc_swiotlb_enable)
-- swiotlb_init();
-+ swiotlb_init(1);
- #endif
-
- paging_init();
-diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
-index 04f638d..df2c9e9 100644
---- a/arch/powerpc/kernel/setup_64.c
-+++ b/arch/powerpc/kernel/setup_64.c
-@@ -550,7 +550,7 @@ void __init setup_arch(char **cmdline_p)
-
- #ifdef CONFIG_SWIOTLB
- if (ppc_swiotlb_enable)
-- swiotlb_init();
-+ swiotlb_init(1);
- #endif
-
- paging_init();
-diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
-index cb5a57c..a3b7475 100644
---- a/arch/x86/Kconfig
-+++ b/arch/x86/Kconfig
-@@ -1885,6 +1885,10 @@ config PCI_OLPC
- def_bool y
- depends on PCI && OLPC && (PCI_GOOLPC || PCI_GOANY)
-
-+config PCI_XEN
-+ bool
-+ select SWIOTLB
-+
- config PCI_DOMAINS
- def_bool y
- depends on PCI
-diff --git a/arch/x86/include/asm/amd_iommu.h b/arch/x86/include/asm/amd_iommu.h
-index 18aa3f8..4413ba4 100644
---- a/arch/x86/include/asm/amd_iommu.h
-+++ b/arch/x86/include/asm/amd_iommu.h
-@@ -23,20 +23,16 @@
- #include <linux/irqreturn.h>
-
- #ifdef CONFIG_AMD_IOMMU
--extern int amd_iommu_init(void);
- extern int amd_iommu_init_dma_ops(void);
- extern int amd_iommu_init_passthrough(void);
- extern void amd_iommu_detect(void);
- extern irqreturn_t amd_iommu_int_handler(int irq, void *data);
- extern void amd_iommu_flush_all_domains(void);
- extern void amd_iommu_flush_all_devices(void);
--extern void amd_iommu_shutdown(void);
- extern void amd_iommu_apply_erratum_63(u16 devid);
- extern void amd_iommu_init_api(void);
- #else
--static inline int amd_iommu_init(void) { return -ENODEV; }
- static inline void amd_iommu_detect(void) { }
--static inline void amd_iommu_shutdown(void) { }
- #endif
-
- #endif /* _ASM_X86_AMD_IOMMU_H */
-diff --git a/arch/x86/include/asm/calgary.h b/arch/x86/include/asm/calgary.h
-index b03bedb..0918654 100644
---- a/arch/x86/include/asm/calgary.h
-+++ b/arch/x86/include/asm/calgary.h
-@@ -62,10 +62,8 @@ struct cal_chipset_ops {
- extern int use_calgary;
-
- #ifdef CONFIG_CALGARY_IOMMU
--extern int calgary_iommu_init(void);
- extern void detect_calgary(void);
- #else
--static inline int calgary_iommu_init(void) { return 1; }
- static inline void detect_calgary(void) { return; }
- #endif
-
-diff --git a/arch/x86/include/asm/dma-mapping.h b/arch/x86/include/asm/dma-mapping.h
-index 6a25d5d..ac91eed 100644
---- a/arch/x86/include/asm/dma-mapping.h
-+++ b/arch/x86/include/asm/dma-mapping.h
-@@ -20,7 +20,8 @@
- # define ISA_DMA_BIT_MASK DMA_BIT_MASK(32)
- #endif
-
--extern dma_addr_t bad_dma_address;
-+#define DMA_ERROR_CODE 0
-+
- extern int iommu_merge;
- extern struct device x86_dma_fallback_dev;
- extern int panic_on_overflow;
-@@ -48,7 +49,7 @@ static inline int dma_mapping_error(struct device *dev, dma_addr_t dma_addr)
- if (ops->mapping_error)
- return ops->mapping_error(dev, dma_addr);
-
-- return (dma_addr == bad_dma_address);
-+ return (dma_addr == DMA_ERROR_CODE);
- }
-
- #define dma_alloc_noncoherent(d, s, h, f) dma_alloc_coherent(d, s, h, f)
-@@ -66,7 +67,7 @@ static inline bool dma_capable(struct device *dev, dma_addr_t addr, size_t size)
- if (!dev->dma_mask)
- return 0;
-
-- return addr + size <= *dev->dma_mask;
-+ return addr + size - 1 <= *dev->dma_mask;
- }
-
- static inline dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr)
-diff --git a/arch/x86/include/asm/e820.h b/arch/x86/include/asm/e820.h
-index 40b4e61..fa3fd43 100644
---- a/arch/x86/include/asm/e820.h
-+++ b/arch/x86/include/asm/e820.h
-@@ -109,6 +109,8 @@ extern void reserve_early(u64 start, u64 end, char *name);
- extern void reserve_early_overlap_ok(u64 start, u64 end, char *name);
- extern void free_early(u64 start, u64 end);
- extern void early_res_to_bootmem(u64 start, u64 end);
-+extern u64 early_res_next_free(u64 start);
-+extern u64 early_res_next_reserved(u64 addr, u64 max);
- extern u64 early_reserve_e820(u64 startt, u64 sizet, u64 align);
-
- extern unsigned long e820_end_of_ram_pfn(void);
-diff --git a/arch/x86/include/asm/gart.h b/arch/x86/include/asm/gart.h
-index 6cfdafa..4ac5b0f 100644
---- a/arch/x86/include/asm/gart.h
-+++ b/arch/x86/include/asm/gart.h
-@@ -35,8 +35,7 @@ extern int gart_iommu_aperture_allowed;
- extern int gart_iommu_aperture_disabled;
-
- extern void early_gart_iommu_check(void);
--extern void gart_iommu_init(void);
--extern void gart_iommu_shutdown(void);
-+extern int gart_iommu_init(void);
- extern void __init gart_parse_options(char *);
- extern void gart_iommu_hole_init(void);
-
-@@ -48,12 +47,6 @@ extern void gart_iommu_hole_init(void);
- static inline void early_gart_iommu_check(void)
- {
- }
--static inline void gart_iommu_init(void)
--{
--}
--static inline void gart_iommu_shutdown(void)
--{
--}
- static inline void gart_parse_options(char *options)
- {
- }
-diff --git a/arch/x86/include/asm/hpet.h b/arch/x86/include/asm/hpet.h
-index 3251e23..fa152cb 100644
---- a/arch/x86/include/asm/hpet.h
-+++ b/arch/x86/include/asm/hpet.h
-@@ -68,6 +68,7 @@ extern unsigned long force_hpet_address;
- extern int hpet_force_user;
- extern u8 hpet_msi_disable;
- extern int is_hpet_enabled(void);
-+extern int disable_hpet(char *);
- extern int hpet_enable(void);
- extern void hpet_disable(void);
- extern unsigned long hpet_readl(unsigned long a);
-@@ -108,6 +109,7 @@ extern void hpet_unregister_irq_handler(rtc_irq_handler handler);
- #else /* CONFIG_HPET_TIMER */
-
- static inline int hpet_enable(void) { return 0; }
-+static inline int disable_hpet(char *s) { return 0; }
- static inline int is_hpet_enabled(void) { return 0; }
- #define hpet_readl(a) 0
-
-diff --git a/arch/x86/include/asm/hugetlb.h b/arch/x86/include/asm/hugetlb.h
-index 439a9ac..bf88684 100644
---- a/arch/x86/include/asm/hugetlb.h
-+++ b/arch/x86/include/asm/hugetlb.h
-@@ -36,16 +36,28 @@ static inline void hugetlb_free_pgd_range(struct mmu_gather *tlb,
- free_pgd_range(tlb, addr, end, floor, ceiling);
- }
-
-+static inline pte_t huge_ptep_get(pte_t *ptep)
-+{
-+ return *ptep;
-+}
-+
- static inline void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
- pte_t *ptep, pte_t pte)
- {
-- set_pte_at(mm, addr, ptep, pte);
-+#if PAGETABLE_LEVELS >= 3
-+ set_pmd((pmd_t *)ptep, native_make_pmd(native_pte_val(pte)));
-+#else
-+ set_pgd((pgd_t *)ptep, native_make_pgd(native_pte_val(pte)));
-+#endif
- }
-
- static inline pte_t huge_ptep_get_and_clear(struct mm_struct *mm,
- unsigned long addr, pte_t *ptep)
- {
-- return ptep_get_and_clear(mm, addr, ptep);
-+ pte_t pte = huge_ptep_get(ptep);
-+
-+ set_huge_pte_at(mm, addr, ptep, __pte(0));
-+ return pte;
- }
-
- static inline void huge_ptep_clear_flush(struct vm_area_struct *vma,
-@@ -66,19 +78,25 @@ static inline pte_t huge_pte_wrprotect(pte_t pte)
- static inline void huge_ptep_set_wrprotect(struct mm_struct *mm,
- unsigned long addr, pte_t *ptep)
- {
-- ptep_set_wrprotect(mm, addr, ptep);
-+ pte_t pte = huge_ptep_get(ptep);
-+
-+ pte = pte_wrprotect(pte);
-+ set_huge_pte_at(mm, addr, ptep, pte);
- }
-
- static inline int huge_ptep_set_access_flags(struct vm_area_struct *vma,
- unsigned long addr, pte_t *ptep,
- pte_t pte, int dirty)
- {
-- return ptep_set_access_flags(vma, addr, ptep, pte, dirty);
--}
-+ pte_t oldpte = huge_ptep_get(ptep);
-+ int changed = !pte_same(oldpte, pte);
-
--static inline pte_t huge_ptep_get(pte_t *ptep)
--{
-- return *ptep;
-+ if (changed && dirty) {
-+ set_huge_pte_at(vma->vm_mm, addr, ptep, pte);
-+ flush_tlb_page(vma, addr);
-+ }
-+
-+ return changed;
- }
-
- static inline int arch_prepare_hugepage(struct page *page)
-diff --git a/arch/x86/include/asm/io.h b/arch/x86/include/asm/io.h
-index 6a63b86..9ad387e 100644
---- a/arch/x86/include/asm/io.h
-+++ b/arch/x86/include/asm/io.h
-@@ -7,6 +7,10 @@
- #include <asm-generic/int-ll64.h>
- #include <asm/page.h>
-
-+#include <xen/xen.h>
-+
-+extern int isapnp_disable;
-+
- #define build_mmio_read(name, size, type, reg, barrier) \
- static inline type name(const volatile void __iomem *addr) \
- { type ret; asm volatile("mov" size " %1,%0":reg (ret) \
-@@ -199,6 +203,18 @@ extern void __iomem *early_ioremap(resource_size_t phys_addr,
- extern void __iomem *early_memremap(resource_size_t phys_addr,
- unsigned long size);
- extern void early_iounmap(void __iomem *addr, unsigned long size);
-+extern bool is_early_ioremap_ptep(pte_t *ptep);
-+
-+#ifdef CONFIG_XEN
-+struct bio_vec;
-+
-+extern bool xen_biovec_phys_mergeable(const struct bio_vec *vec1,
-+ const struct bio_vec *vec2);
-+
-+#define BIOVEC_PHYS_MERGEABLE(vec1, vec2) \
-+ (__BIOVEC_PHYS_MERGEABLE(vec1, vec2) && \
-+ (!xen_domain() || xen_biovec_phys_mergeable(vec1, vec2)))
-+#endif /* CONFIG_XEN */
-
- #define IO_SPACE_LIMIT 0xffff
-
-diff --git a/arch/x86/include/asm/io_apic.h b/arch/x86/include/asm/io_apic.h
-index 5f61f6e..b852da9 100644
---- a/arch/x86/include/asm/io_apic.h
-+++ b/arch/x86/include/asm/io_apic.h
-@@ -172,6 +172,7 @@ extern void mask_IO_APIC_setup(struct IO_APIC_route_entry **ioapic_entries);
- extern int restore_IO_APIC_setup(struct IO_APIC_route_entry **ioapic_entries);
-
- extern void probe_nr_irqs_gsi(void);
-+extern int get_nr_irqs_gsi(void);
-
- extern int setup_ioapic_entry(int apic, int irq,
- struct IO_APIC_route_entry *entry,
-@@ -201,4 +202,6 @@ static inline void probe_nr_irqs_gsi(void) { }
-
- #endif
-
-+void xen_io_apic_init(void);
-+
- #endif /* _ASM_X86_IO_APIC_H */
-diff --git a/arch/x86/include/asm/iommu.h b/arch/x86/include/asm/iommu.h
-index fd6d21b..345c99c 100644
---- a/arch/x86/include/asm/iommu.h
-+++ b/arch/x86/include/asm/iommu.h
-@@ -1,8 +1,6 @@
- #ifndef _ASM_X86_IOMMU_H
- #define _ASM_X86_IOMMU_H
-
--extern void pci_iommu_shutdown(void);
--extern void no_iommu_init(void);
- extern struct dma_map_ops nommu_dma_ops;
- extern int force_iommu, no_iommu;
- extern int iommu_detected;
-diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h
-index 6e90a04..ba4dc7b 100644
---- a/arch/x86/include/asm/irq_vectors.h
-+++ b/arch/x86/include/asm/irq_vectors.h
-@@ -120,6 +120,12 @@
- */
- #define MCE_SELF_VECTOR 0xeb
-
-+#ifdef CONFIG_XEN
-+/* Xen vector callback to receive events in a HVM domain */
-+#define XEN_HVM_EVTCHN_CALLBACK 0xe9
-+#endif
-+
-+
- /*
- * First APIC vector available to drivers: (vectors 0x30-0xee) we
- * start at 0x31(0x41) to spread out vectors evenly between priority
-@@ -157,6 +163,14 @@ static inline int invalid_vm86_irq(int irq)
- #define CPU_VECTOR_LIMIT ( 8 * NR_CPUS )
- #define IO_APIC_VECTOR_LIMIT ( 32 * MAX_IO_APICS )
-
-+#ifndef __ASSEMBLY__
-+# if defined(CONFIG_X86_IO_APIC) && defined(CONFIG_SPARSE_IRQ)
-+extern int nr_dynamic_irqs;
-+# else
-+# define NR_DYNAMIC_IRQS 256
-+# endif
-+#endif
-+
- #ifdef CONFIG_X86_IO_APIC
- # ifdef CONFIG_SPARSE_IRQ
- # define NR_IRQS \
-@@ -165,13 +179,13 @@ static inline int invalid_vm86_irq(int irq)
- (NR_VECTORS + IO_APIC_VECTOR_LIMIT))
- # else
- # if NR_CPUS < MAX_IO_APICS
--# define NR_IRQS (NR_VECTORS + 4*CPU_VECTOR_LIMIT)
-+# define NR_IRQS (NR_VECTORS + 4*CPU_VECTOR_LIMIT) + NR_DYNAMIC_IRQS
- # else
--# define NR_IRQS (NR_VECTORS + IO_APIC_VECTOR_LIMIT)
-+# define NR_IRQS (NR_VECTORS + IO_APIC_VECTOR_LIMIT) + NR_DYNAMIC_IRQS
- # endif
- # endif
- #else /* !CONFIG_X86_IO_APIC: */
--# define NR_IRQS NR_IRQS_LEGACY
-+# define NR_IRQS NR_IRQS_LEGACY + NR_DYNAMIC_IRQS
- #endif
-
- #endif /* _ASM_X86_IRQ_VECTORS_H */
-diff --git a/arch/x86/include/asm/microcode.h b/arch/x86/include/asm/microcode.h
-index ef51b50..e15fca1 100644
---- a/arch/x86/include/asm/microcode.h
-+++ b/arch/x86/include/asm/microcode.h
-@@ -55,4 +55,13 @@ static inline struct microcode_ops * __init init_amd_microcode(void)
- }
- #endif
-
-+#ifdef CONFIG_MICROCODE_XEN
-+extern struct microcode_ops * __init init_xen_microcode(void);
-+#else
-+static inline struct microcode_ops * __init init_xen_microcode(void)
-+{
-+ return NULL;
-+}
-+#endif
-+
- #endif /* _ASM_X86_MICROCODE_H */
-diff --git a/arch/x86/include/asm/mmu.h b/arch/x86/include/asm/mmu.h
-index 80a1dee..67eaa91 100644
---- a/arch/x86/include/asm/mmu.h
-+++ b/arch/x86/include/asm/mmu.h
-@@ -13,6 +13,9 @@ typedef struct {
- int size;
- struct mutex lock;
- void *vdso;
-+#ifdef CONFIG_XEN
-+ int has_foreign_mappings;
-+#endif
- } mm_context_t;
-
- #ifdef CONFIG_SMP
-diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
-index efb3899..e571db4 100644
---- a/arch/x86/include/asm/paravirt.h
-+++ b/arch/x86/include/asm/paravirt.h
-@@ -330,11 +330,18 @@ static inline void write_idt_entry(gate_desc *dt, int entry, const gate_desc *g)
- {
- PVOP_VCALL3(pv_cpu_ops.write_idt_entry, dt, entry, g);
- }
-+
- static inline void set_iopl_mask(unsigned mask)
- {
- PVOP_VCALL1(pv_cpu_ops.set_iopl_mask, mask);
- }
-
-+static inline void set_io_bitmap(struct thread_struct *thread,
-+ unsigned long bytes_updated)
-+{
-+ PVOP_VCALL2(pv_cpu_ops.set_io_bitmap, thread, bytes_updated);
-+}
-+
- /* The paravirtualized I/O functions */
- static inline void slow_down_io(void)
- {
-@@ -770,15 +777,28 @@ static __always_inline void __raw_spin_unlock(struct raw_spinlock *lock)
- #define PV_RESTORE_REGS "popl %edx; popl %ecx;"
-
- /* save and restore all caller-save registers, except return value */
--#define PV_SAVE_ALL_CALLER_REGS "pushl %ecx;"
--#define PV_RESTORE_ALL_CALLER_REGS "popl %ecx;"
-+#define __PV_SAVE_ALL_CALLER_REGS "pushl %ecx;"
-+#define __PV_RESTORE_ALL_CALLER_REGS "popl %ecx;"
-+
-+#ifdef CONFIG_FRAME_POINTER
-+#define PV_SAVE_ALL_CALLER_REGS \
-+ "push %ebp;" \
-+ "mov %esp, %ebp;" \
-+ __PV_SAVE_ALL_CALLER_REGS
-+#define PV_RESTORE_ALL_CALLER_REGS \
-+ __PV_RESTORE_ALL_CALLER_REGS \
-+ "leave;"
-+#else
-+#define PV_SAVE_ALL_CALLER_REGS __PV_SAVE_ALL_CALLER_REGS
-+#define PV_RESTORE_ALL_CALLER_REGS __PV_RESTORE_ALL_CALLER_REGS
-+#endif
-
- #define PV_FLAGS_ARG "0"
- #define PV_EXTRA_CLOBBERS
- #define PV_VEXTRA_CLOBBERS
- #else
- /* save and restore all caller-save registers, except return value */
--#define PV_SAVE_ALL_CALLER_REGS \
-+#define __PV_SAVE_ALL_CALLER_REGS \
- "push %rcx;" \
- "push %rdx;" \
- "push %rsi;" \
-@@ -787,7 +807,7 @@ static __always_inline void __raw_spin_unlock(struct raw_spinlock *lock)
- "push %r9;" \
- "push %r10;" \
- "push %r11;"
--#define PV_RESTORE_ALL_CALLER_REGS \
-+#define __PV_RESTORE_ALL_CALLER_REGS \
- "pop %r11;" \
- "pop %r10;" \
- "pop %r9;" \
-@@ -797,6 +817,19 @@ static __always_inline void __raw_spin_unlock(struct raw_spinlock *lock)
- "pop %rdx;" \
- "pop %rcx;"
-
-+#ifdef CONFIG_FRAME_POINTER
-+#define PV_SAVE_ALL_CALLER_REGS \
-+ "push %rbp;" \
-+ "mov %rsp, %rbp;" \
-+ __PV_SAVE_ALL_CALLER_REGS
-+#define PV_RESTORE_ALL_CALLER_REGS \
-+ __PV_RESTORE_ALL_CALLER_REGS \
-+ "leaveq;"
-+#else
-+#define PV_SAVE_ALL_CALLER_REGS __PV_SAVE_ALL_CALLER_REGS
-+#define PV_RESTORE_ALL_CALLER_REGS __PV_RESTORE_ALL_CALLER_REGS
-+#endif
-+
- /* We save some registers, but all of them, that's too much. We clobber all
- * caller saved registers but the argument parameter */
- #define PV_SAVE_REGS "pushq %%rdi;"
-diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h
-index 9357473..3202dcc 100644
---- a/arch/x86/include/asm/paravirt_types.h
-+++ b/arch/x86/include/asm/paravirt_types.h
-@@ -135,6 +135,8 @@ struct pv_cpu_ops {
- void (*load_sp0)(struct tss_struct *tss, struct thread_struct *t);
-
- void (*set_iopl_mask)(unsigned mask);
-+ void (*set_io_bitmap)(struct thread_struct *thread,
-+ unsigned long bytes_updated);
-
- void (*wbinvd)(void);
- void (*io_delay)(void);
-diff --git a/arch/x86/include/asm/pci.h b/arch/x86/include/asm/pci.h
-index ada8c20..faa0af1 100644
---- a/arch/x86/include/asm/pci.h
-+++ b/arch/x86/include/asm/pci.h
-@@ -21,6 +21,7 @@ struct pci_sysdata {
- extern int pci_routeirq;
- extern int noioapicquirk;
- extern int noioapicreroute;
-+extern int pci_scan_all_fns;
-
- /* scan a bus after allocating a pci_sysdata for it */
- extern struct pci_bus *pci_scan_bus_on_node(int busno, struct pci_ops *ops,
-@@ -49,6 +50,11 @@ extern unsigned int pcibios_assign_all_busses(void);
- #define pcibios_assign_all_busses() 0
- #endif
-
-+static inline int pcibios_scan_all_fns(struct pci_bus *bus, int devfn)
-+{
-+ return pci_scan_all_fns;
-+}
-+
- extern unsigned long pci_mem_start;
- #define PCIBIOS_MIN_IO 0x1000
- #define PCIBIOS_MIN_MEM (pci_mem_start)
-@@ -87,6 +93,7 @@ extern void pci_iommu_alloc(void);
-
- /* MSI arch hook */
- #define arch_setup_msi_irqs arch_setup_msi_irqs
-+#define arch_teardown_msi_irqs arch_teardown_msi_irqs
-
- #define PCI_DMA_BUS_IS_PHYS (dma_ops->is_phys)
-
-@@ -128,6 +135,7 @@ extern void pci_iommu_alloc(void);
- #include <asm-generic/pci-dma-compat.h>
-
- /* generic pci stuff */
-+#define HAVE_ARCH_PCIBIOS_SCAN_ALL_FNS
- #include <asm-generic/pci.h>
- #define PCIBIOS_MAX_MEM_32 0xffffffff
-
-diff --git a/arch/x86/include/asm/pci_x86.h b/arch/x86/include/asm/pci_x86.h
-index b399988..30cbf49 100644
---- a/arch/x86/include/asm/pci_x86.h
-+++ b/arch/x86/include/asm/pci_x86.h
-@@ -45,6 +45,7 @@ enum pci_bf_sort_state {
- extern unsigned int pcibios_max_latency;
-
- void pcibios_resource_survey(void);
-+void pcibios_set_cache_line_size(void);
-
- /* pci-pc.c */
-
-@@ -106,6 +107,7 @@ extern int pci_direct_probe(void);
- extern void pci_direct_init(int type);
- extern void pci_pcbios_init(void);
- extern int pci_olpc_init(void);
-+extern int pci_xen_init(void);
- extern void __init dmi_check_pciprobe(void);
- extern void __init dmi_check_skip_isa_align(void);
-
-diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
-index af6fd36..430e3cc 100644
---- a/arch/x86/include/asm/pgtable.h
-+++ b/arch/x86/include/asm/pgtable.h
-@@ -15,7 +15,6 @@
- : (prot))
-
- #ifndef __ASSEMBLY__
--
- /*
- * ZERO_PAGE is a global shared page that is always zero: used
- * for zero-mapped memory areas etc..
-@@ -26,6 +25,8 @@ extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)];
- extern spinlock_t pgd_lock;
- extern struct list_head pgd_list;
-
-+extern struct mm_struct *pgd_page_get_mm(struct page *page);
-+
- #ifdef CONFIG_PARAVIRT
- #include <asm/paravirt.h>
- #else /* !CONFIG_PARAVIRT */
-@@ -76,6 +77,11 @@ extern struct list_head pgd_list;
-
- #endif /* CONFIG_PARAVIRT */
-
-+static inline pteval_t pte_flags(pte_t pte)
-+{
-+ return pte_val(pte) & PTE_FLAGS_MASK;
-+}
-+
- /*
- * The following only work if pte_present() is true.
- * Undefined behaviour if not..
-@@ -397,6 +403,9 @@ static inline unsigned long pages_to_mb(unsigned long npg)
- #define io_remap_pfn_range(vma, vaddr, pfn, size, prot) \
- remap_pfn_range(vma, vaddr, pfn, size, prot)
-
-+#define arch_vm_get_page_prot arch_vm_get_page_prot
-+extern pgprot_t arch_vm_get_page_prot(unsigned vm_flags);
-+
- #if PAGETABLE_LEVELS > 2
- static inline int pud_none(pud_t pud)
- {
-@@ -616,6 +625,9 @@ static inline void clone_pgd_range(pgd_t *dst, pgd_t *src, int count)
- memcpy(dst, src, count * sizeof(pgd_t));
- }
-
-+int create_lookup_pte_addr(struct mm_struct *mm,
-+ unsigned long address,
-+ uint64_t *ptep);
-
- #include <asm-generic/pgtable.h>
- #endif /* __ASSEMBLY__ */
-diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h
-index c57a301..4e46931 100644
---- a/arch/x86/include/asm/pgtable_64.h
-+++ b/arch/x86/include/asm/pgtable_64.h
-@@ -160,7 +160,7 @@ extern void cleanup_highmap(void);
- #define pgtable_cache_init() do { } while (0)
- #define check_pgt_cache() do { } while (0)
-
--#define PAGE_AGP PAGE_KERNEL_NOCACHE
-+#define PAGE_AGP PAGE_KERNEL_IO_NOCACHE
- #define HAVE_PAGE_AGP 1
-
- /* fs/proc/kcore.c */
-diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h
-index d1f4a76..a81b0ed 100644
---- a/arch/x86/include/asm/pgtable_types.h
-+++ b/arch/x86/include/asm/pgtable_types.h
-@@ -265,11 +265,6 @@ static inline pteval_t native_pte_val(pte_t pte)
- return pte.pte;
- }
-
--static inline pteval_t pte_flags(pte_t pte)
--{
-- return native_pte_val(pte) & PTE_FLAGS_MASK;
--}
--
- #define pgprot_val(x) ((x).pgprot)
- #define __pgprot(x) ((pgprot_t) { (x) } )
-
-diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
-index 78bb4d7..2232bd2 100644
---- a/arch/x86/include/asm/processor.h
-+++ b/arch/x86/include/asm/processor.h
-@@ -551,6 +551,9 @@ static inline void native_set_iopl_mask(unsigned mask)
- #endif
- }
-
-+extern void native_set_io_bitmap(struct thread_struct *thread,
-+ unsigned long updated_bytes);
-+
- static inline void
- native_load_sp0(struct tss_struct *tss, struct thread_struct *thread)
- {
-@@ -592,6 +595,7 @@ static inline void load_sp0(struct tss_struct *tss,
- }
-
- #define set_iopl_mask native_set_iopl_mask
-+#define set_io_bitmap native_set_io_bitmap
- #endif /* CONFIG_PARAVIRT */
-
- /*
-diff --git a/arch/x86/include/asm/pvclock.h b/arch/x86/include/asm/pvclock.h
-index 53235fd..daaacab 100644
---- a/arch/x86/include/asm/pvclock.h
-+++ b/arch/x86/include/asm/pvclock.h
-@@ -10,5 +10,6 @@ unsigned long pvclock_tsc_khz(struct pvclock_vcpu_time_info *src);
- void pvclock_read_wallclock(struct pvclock_wall_clock *wall,
- struct pvclock_vcpu_time_info *vcpu,
- struct timespec *ts);
-+void pvclock_resume(void);
-
- #endif /* _ASM_X86_PVCLOCK_H */
-diff --git a/arch/x86/include/asm/setup.h b/arch/x86/include/asm/setup.h
-index 18e496c..154a5f1 100644
---- a/arch/x86/include/asm/setup.h
-+++ b/arch/x86/include/asm/setup.h
-@@ -95,6 +95,11 @@ void *extend_brk(size_t size, size_t align);
- : : "i" (sz)); \
- }
-
-+/* Helper for reserving space for arrays of things */
-+#define RESERVE_BRK_ARRAY(type, name, entries) \
-+ type *name; \
-+ RESERVE_BRK(name, sizeof(type) * entries)
-+
- #ifdef __i386__
-
- void __init i386_start_kernel(void);
-diff --git a/arch/x86/include/asm/swiotlb.h b/arch/x86/include/asm/swiotlb.h
-index b9e4e20..8085277 100644
---- a/arch/x86/include/asm/swiotlb.h
-+++ b/arch/x86/include/asm/swiotlb.h
-@@ -3,15 +3,16 @@
-
- #include <linux/swiotlb.h>
-
--/* SWIOTLB interface */
--
--extern int swiotlb_force;
--
- #ifdef CONFIG_SWIOTLB
- extern int swiotlb;
--extern void pci_swiotlb_init(void);
-+extern int __init pci_swiotlb_detect(void);
-+extern void __init pci_swiotlb_init(void);
- #else
- #define swiotlb 0
-+static inline int pci_swiotlb_detect(void)
-+{
-+ return 0;
-+}
- static inline void pci_swiotlb_init(void)
- {
- }
-diff --git a/arch/x86/include/asm/syscalls.h b/arch/x86/include/asm/syscalls.h
-index 1bb6e39..ef0fa4d 100644
---- a/arch/x86/include/asm/syscalls.h
-+++ b/arch/x86/include/asm/syscalls.h
-@@ -33,11 +33,11 @@ long sys_rt_sigreturn(struct pt_regs *);
- asmlinkage int sys_set_thread_area(struct user_desc __user *);
- asmlinkage int sys_get_thread_area(struct user_desc __user *);
-
--/* X86_32 only */
--#ifdef CONFIG_X86_32
- /* kernel/ioport.c */
--long sys_iopl(struct pt_regs *);
-+asmlinkage long sys_iopl(unsigned int);
-
-+/* X86_32 only */
-+#ifdef CONFIG_X86_32
- /* kernel/process_32.c */
- int sys_clone(struct pt_regs *);
- int sys_execve(struct pt_regs *);
-@@ -68,8 +68,6 @@ int sys_vm86(struct pt_regs *);
- #else /* CONFIG_X86_32 */
-
- /* X86_64 only */
--/* kernel/ioport.c */
--asmlinkage long sys_iopl(unsigned int, struct pt_regs *);
-
- /* kernel/process_64.c */
- asmlinkage long sys_clone(unsigned long, unsigned long,
-diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h
-index 7f3eba0..e4fc8ea 100644
---- a/arch/x86/include/asm/tlbflush.h
-+++ b/arch/x86/include/asm/tlbflush.h
-@@ -89,6 +89,10 @@ static inline void __flush_tlb_one(unsigned long addr)
-
- #ifndef CONFIG_SMP
-
-+static inline void __init init_smp_flush(void)
-+{
-+}
-+
- #define flush_tlb() __flush_tlb()
- #define flush_tlb_all() __flush_tlb_all()
- #define local_flush_tlb() __flush_tlb()
-@@ -129,6 +133,8 @@ static inline void reset_lazy_tlbstate(void)
-
- #define local_flush_tlb() __flush_tlb()
-
-+extern void init_smp_flush(void);
-+
- extern void flush_tlb_all(void);
- extern void flush_tlb_current_task(void);
- extern void flush_tlb_mm(struct mm_struct *);
-diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h
-index 2c756fd..d8e7145 100644
---- a/arch/x86/include/asm/x86_init.h
-+++ b/arch/x86/include/asm/x86_init.h
-@@ -91,6 +91,14 @@ struct x86_init_timers {
- };
-
- /**
-+ * struct x86_init_iommu - platform specific iommu setup
-+ * @iommu_init: platform specific iommu setup
-+ */
-+struct x86_init_iommu {
-+ int (*iommu_init)(void);
-+};
-+
-+/**
- * struct x86_init_ops - functions for platform specific setup
- *
- */
-@@ -101,6 +109,7 @@ struct x86_init_ops {
- struct x86_init_oem oem;
- struct x86_init_paging paging;
- struct x86_init_timers timers;
-+ struct x86_init_iommu iommu;
- };
-
- /**
-@@ -121,6 +130,7 @@ struct x86_platform_ops {
- unsigned long (*calibrate_tsc)(void);
- unsigned long (*get_wallclock)(void);
- int (*set_wallclock)(unsigned long nowtime);
-+ void (*iommu_shutdown)(void);
- };
-
- extern struct x86_init_ops x86_init;
-diff --git a/arch/x86/include/asm/xen/hypercall.h b/arch/x86/include/asm/xen/hypercall.h
-index 9c371e4..41c4be0 100644
---- a/arch/x86/include/asm/xen/hypercall.h
-+++ b/arch/x86/include/asm/xen/hypercall.h
-@@ -45,6 +45,8 @@
- #include <xen/interface/xen.h>
- #include <xen/interface/sched.h>
- #include <xen/interface/physdev.h>
-+#include <xen/interface/platform.h>
-+#include <xen/interface/xen-mca.h>
-
- /*
- * The hypercall asms have to meet several constraints:
-@@ -200,6 +202,23 @@ extern struct { char _entry[32]; } hypercall_page[];
- (type)__res; \
- })
-
-+static inline long
-+privcmd_call(unsigned call,
-+ unsigned long a1, unsigned long a2,
-+ unsigned long a3, unsigned long a4,
-+ unsigned long a5)
-+{
-+ __HYPERCALL_DECLS;
-+ __HYPERCALL_5ARG(a1, a2, a3, a4, a5);
-+
-+ asm volatile("call *%[call]"
-+ : __HYPERCALL_5PARAM
-+ : [call] "a" (&hypercall_page[call])
-+ : __HYPERCALL_CLOBBER5);
-+
-+ return (long)__res;
-+}
-+
- static inline int
- HYPERVISOR_set_trap_table(struct trap_info *table)
- {
-@@ -282,6 +301,20 @@ HYPERVISOR_set_timer_op(u64 timeout)
- }
-
- static inline int
-+HYPERVISOR_mca(struct xen_mc *mc_op)
-+{
-+ mc_op->interface_version = XEN_MCA_INTERFACE_VERSION;
-+ return _hypercall1(int, mca, mc_op);
-+}
-+
-+static inline int
-+HYPERVISOR_dom0_op(struct xen_platform_op *platform_op)
-+{
-+ platform_op->interface_version = XENPF_INTERFACE_VERSION;
-+ return _hypercall1(int, dom0_op, platform_op);
-+}
-+
-+static inline int
- HYPERVISOR_set_debugreg(int reg, unsigned long value)
- {
- return _hypercall2(int, set_debugreg, reg, value);
-@@ -417,6 +450,12 @@ HYPERVISOR_nmi_op(unsigned long op, unsigned long arg)
- return _hypercall2(int, nmi_op, op, arg);
- }
-
-+static inline unsigned long __must_check
-+HYPERVISOR_hvm_op(int op, void *arg)
-+{
-+ return _hypercall2(unsigned long, hvm_op, op, arg);
-+}
-+
- static inline void
- MULTI_fpu_taskswitch(struct multicall_entry *mcl, int set)
- {
-@@ -424,6 +463,14 @@ MULTI_fpu_taskswitch(struct multicall_entry *mcl, int set)
- mcl->args[0] = set;
- }
-
-+#if defined(CONFIG_X86_64)
-+#define MULTI_UVMFLAGS_INDEX 2
-+#define MULTI_UVMDOMID_INDEX 3
-+#else
-+#define MULTI_UVMFLAGS_INDEX 3
-+#define MULTI_UVMDOMID_INDEX 4
-+#endif
-+
- static inline void
- MULTI_update_va_mapping(struct multicall_entry *mcl, unsigned long va,
- pte_t new_val, unsigned long flags)
-@@ -432,12 +479,11 @@ MULTI_update_va_mapping(struct multicall_entry *mcl, unsigned long va,
- mcl->args[0] = va;
- if (sizeof(new_val) == sizeof(long)) {
- mcl->args[1] = new_val.pte;
-- mcl->args[2] = flags;
- } else {
- mcl->args[1] = new_val.pte;
- mcl->args[2] = new_val.pte >> 32;
-- mcl->args[3] = flags;
- }
-+ mcl->args[MULTI_UVMFLAGS_INDEX] = flags;
- }
-
- static inline void
-diff --git a/arch/x86/include/asm/xen/hypervisor.h b/arch/x86/include/asm/xen/hypervisor.h
-index d5b7e90..396ff4c 100644
---- a/arch/x86/include/asm/xen/hypervisor.h
-+++ b/arch/x86/include/asm/xen/hypervisor.h
-@@ -37,31 +37,4 @@
- extern struct shared_info *HYPERVISOR_shared_info;
- extern struct start_info *xen_start_info;
-
--enum xen_domain_type {
-- XEN_NATIVE, /* running on bare hardware */
-- XEN_PV_DOMAIN, /* running in a PV domain */
-- XEN_HVM_DOMAIN, /* running in a Xen hvm domain */
--};
--
--#ifdef CONFIG_XEN
--extern enum xen_domain_type xen_domain_type;
--#else
--#define xen_domain_type XEN_NATIVE
--#endif
--
--#define xen_domain() (xen_domain_type != XEN_NATIVE)
--#define xen_pv_domain() (xen_domain() && \
-- xen_domain_type == XEN_PV_DOMAIN)
--#define xen_hvm_domain() (xen_domain() && \
-- xen_domain_type == XEN_HVM_DOMAIN)
--
--#ifdef CONFIG_XEN_DOM0
--#include <xen/interface/xen.h>
--
--#define xen_initial_domain() (xen_pv_domain() && \
-- xen_start_info->flags & SIF_INITDOMAIN)
--#else /* !CONFIG_XEN_DOM0 */
--#define xen_initial_domain() (0)
--#endif /* CONFIG_XEN_DOM0 */
--
- #endif /* _ASM_X86_XEN_HYPERVISOR_H */
-diff --git a/arch/x86/include/asm/xen/interface.h b/arch/x86/include/asm/xen/interface.h
-index e8506c1..9539998 100644
---- a/arch/x86/include/asm/xen/interface.h
-+++ b/arch/x86/include/asm/xen/interface.h
-@@ -61,9 +61,9 @@ DEFINE_GUEST_HANDLE(void);
- #define HYPERVISOR_VIRT_START mk_unsigned_long(__HYPERVISOR_VIRT_START)
- #endif
-
--#ifndef machine_to_phys_mapping
--#define machine_to_phys_mapping ((unsigned long *)HYPERVISOR_VIRT_START)
--#endif
-+#define MACH2PHYS_VIRT_START mk_unsigned_long(__MACH2PHYS_VIRT_START)
-+#define MACH2PHYS_VIRT_END mk_unsigned_long(__MACH2PHYS_VIRT_END)
-+#define MACH2PHYS_NR_ENTRIES ((MACH2PHYS_VIRT_END-MACH2PHYS_VIRT_START)>>__MACH2PHYS_SHIFT)
-
- /* Maximum number of virtual CPUs in multi-processor guests. */
- #define MAX_VIRT_CPUS 32
-@@ -97,6 +97,8 @@ DEFINE_GUEST_HANDLE(void);
- #define TI_SET_IF(_ti, _if) ((_ti)->flags |= ((!!(_if))<<2))
-
- #ifndef __ASSEMBLY__
-+#include <linux/types.h>
-+
- struct trap_info {
- uint8_t vector; /* exception vector */
- uint8_t flags; /* 0-3: privilege level; 4: clear event enable? */
-diff --git a/arch/x86/include/asm/xen/interface_32.h b/arch/x86/include/asm/xen/interface_32.h
-index 42a7e00..8413688 100644
---- a/arch/x86/include/asm/xen/interface_32.h
-+++ b/arch/x86/include/asm/xen/interface_32.h
-@@ -32,6 +32,11 @@
- /* And the trap vector is... */
- #define TRAP_INSTR "int $0x82"
-
-+#define __MACH2PHYS_VIRT_START 0xF5800000
-+#define __MACH2PHYS_VIRT_END 0xF6800000
-+
-+#define __MACH2PHYS_SHIFT 2
-+
- /*
- * Virtual addresses beyond this are not modifiable by guest OSes. The
- * machine->physical mapping table starts at this address, read-only.
-diff --git a/arch/x86/include/asm/xen/interface_64.h b/arch/x86/include/asm/xen/interface_64.h
-index 100d266..839a481 100644
---- a/arch/x86/include/asm/xen/interface_64.h
-+++ b/arch/x86/include/asm/xen/interface_64.h
-@@ -39,18 +39,7 @@
- #define __HYPERVISOR_VIRT_END 0xFFFF880000000000
- #define __MACH2PHYS_VIRT_START 0xFFFF800000000000
- #define __MACH2PHYS_VIRT_END 0xFFFF804000000000
--
--#ifndef HYPERVISOR_VIRT_START
--#define HYPERVISOR_VIRT_START mk_unsigned_long(__HYPERVISOR_VIRT_START)
--#define HYPERVISOR_VIRT_END mk_unsigned_long(__HYPERVISOR_VIRT_END)
--#endif
--
--#define MACH2PHYS_VIRT_START mk_unsigned_long(__MACH2PHYS_VIRT_START)
--#define MACH2PHYS_VIRT_END mk_unsigned_long(__MACH2PHYS_VIRT_END)
--#define MACH2PHYS_NR_ENTRIES ((MACH2PHYS_VIRT_END-MACH2PHYS_VIRT_START)>>3)
--#ifndef machine_to_phys_mapping
--#define machine_to_phys_mapping ((unsigned long *)HYPERVISOR_VIRT_START)
--#endif
-+#define __MACH2PHYS_SHIFT 3
-
- /*
- * int HYPERVISOR_set_segment_base(unsigned int which, unsigned long base)
-diff --git a/arch/x86/include/asm/xen/iommu.h b/arch/x86/include/asm/xen/iommu.h
-new file mode 100644
-index 0000000..75df312
---- /dev/null
-+++ b/arch/x86/include/asm/xen/iommu.h
-@@ -0,0 +1,12 @@
-+#ifndef ASM_X86__XEN_IOMMU_H
-+
-+#ifdef CONFIG_PCI_XEN
-+extern void xen_iommu_init(void);
-+#else
-+static inline void xen_iommu_init(void)
-+{
-+}
-+#endif
-+
-+#endif
-+
-diff --git a/arch/x86/include/asm/xen/page.h b/arch/x86/include/asm/xen/page.h
-index 018a0a4..05c5cf5 100644
---- a/arch/x86/include/asm/xen/page.h
-+++ b/arch/x86/include/asm/xen/page.h
-@@ -5,6 +5,7 @@
- #include <linux/types.h>
- #include <linux/spinlock.h>
- #include <linux/pfn.h>
-+#include <linux/mm.h>
-
- #include <asm/uaccess.h>
- #include <asm/page.h>
-@@ -28,23 +29,32 @@ typedef struct xpaddr {
-
- /**** MACHINE <-> PHYSICAL CONVERSION MACROS ****/
- #define INVALID_P2M_ENTRY (~0UL)
--#define FOREIGN_FRAME_BIT (1UL<<31)
-+#define FOREIGN_FRAME_BIT (1UL << (sizeof(unsigned long) * 8 - 1))
- #define FOREIGN_FRAME(m) ((m) | FOREIGN_FRAME_BIT)
-
- /* Maximum amount of memory we can handle in a domain in pages */
- #define MAX_DOMAIN_PAGES \
- ((unsigned long)((u64)CONFIG_XEN_MAX_DOMAIN_MEMORY * 1024 * 1024 * 1024 / PAGE_SIZE))
-
-+extern unsigned long *machine_to_phys_mapping;
-+extern unsigned int machine_to_phys_order;
-
- extern unsigned long get_phys_to_machine(unsigned long pfn);
--extern void set_phys_to_machine(unsigned long pfn, unsigned long mfn);
-+extern bool set_phys_to_machine(unsigned long pfn, unsigned long mfn);
-
- static inline unsigned long pfn_to_mfn(unsigned long pfn)
- {
-+ unsigned long mfn;
-+
- if (xen_feature(XENFEAT_auto_translated_physmap))
- return pfn;
-
-- return get_phys_to_machine(pfn) & ~FOREIGN_FRAME_BIT;
-+ mfn = get_phys_to_machine(pfn);
-+
-+ if (mfn != INVALID_P2M_ENTRY)
-+ mfn &= ~FOREIGN_FRAME_BIT;
-+
-+ return mfn;
- }
-
- static inline int phys_to_machine_mapping_valid(unsigned long pfn)
-@@ -62,10 +72,8 @@ static inline unsigned long mfn_to_pfn(unsigned long mfn)
- if (xen_feature(XENFEAT_auto_translated_physmap))
- return mfn;
-
--#if 0
- if (unlikely((mfn >> machine_to_phys_order) != 0))
-- return max_mapnr;
--#endif
-+ return ~0;
-
- pfn = 0;
- /*
-@@ -112,13 +120,9 @@ static inline xpaddr_t machine_to_phys(xmaddr_t machine)
- */
- static inline unsigned long mfn_to_local_pfn(unsigned long mfn)
- {
-- extern unsigned long max_mapnr;
- unsigned long pfn = mfn_to_pfn(mfn);
-- if ((pfn < max_mapnr)
-- && !xen_feature(XENFEAT_auto_translated_physmap)
-- && (get_phys_to_machine(pfn) != mfn))
-- return max_mapnr; /* force !pfn_valid() */
-- /* XXX fixme; not true with sparsemem */
-+ if (get_phys_to_machine(pfn) != mfn)
-+ return -1; /* force !pfn_valid() */
- return pfn;
- }
-
-@@ -163,6 +167,7 @@ static inline pte_t __pte_ma(pteval_t x)
-
- #define pgd_val_ma(x) ((x).pgd)
-
-+void xen_set_domain_pte(pte_t *ptep, pte_t pteval, unsigned domid);
-
- xmaddr_t arbitrary_virt_to_machine(void *address);
- unsigned long arbitrary_virt_to_mfn(void *vaddr);
-diff --git a/arch/x86/include/asm/xen/pci.h b/arch/x86/include/asm/xen/pci.h
-new file mode 100644
-index 0000000..6683196
---- /dev/null
-+++ b/arch/x86/include/asm/xen/pci.h
-@@ -0,0 +1,104 @@
-+#ifndef _ASM_X86_XEN_PCI_H
-+#define _ASM_X86_XEN_PCI_H
-+
-+#if defined(CONFIG_PCI_MSI)
-+#if defined(CONFIG_PCI_XEN)
-+int xen_register_pirq(u32 gsi, int triggering);
-+int xen_register_gsi(u32 gsi, int triggering, int polarity);
-+int xen_create_msi_irq(struct pci_dev *dev,
-+ struct msi_desc *msidesc,
-+ int type);
-+void xen_pci_teardown_msi_dev(struct pci_dev *dev);
-+void xen_pci_teardown_msi_irq(int irq);
-+int xen_pci_setup_msi_irqs(struct pci_dev *dev, int nvec, int type);
-+
-+/* The drivers/pci/xen-pcifront.c sets this structure to
-+ * its own functions.
-+ */
-+struct xen_pci_frontend_ops {
-+ int (*enable_msi)(struct pci_dev *dev, int **vectors);
-+ void (*disable_msi)(struct pci_dev *dev);
-+ int (*enable_msix)(struct pci_dev *dev, int **vectors, int nvec);
-+ void (*disable_msix)(struct pci_dev *dev);
-+};
-+
-+extern struct xen_pci_frontend_ops *xen_pci_frontend;
-+
-+static inline int xen_pci_frontend_enable_msi(struct pci_dev *dev,
-+ int **vectors)
-+{
-+ if (xen_pci_frontend && xen_pci_frontend->enable_msi)
-+ return xen_pci_frontend->enable_msi(dev, vectors);
-+ return -ENODEV;
-+}
-+static inline void xen_pci_frontend_disable_msi(struct pci_dev *dev)
-+{
-+ if (xen_pci_frontend && xen_pci_frontend->disable_msi)
-+ xen_pci_frontend->disable_msi(dev);
-+}
-+static inline int xen_pci_frontend_enable_msix(struct pci_dev *dev,
-+ int **vectors, int nvec)
-+{
-+ if (xen_pci_frontend && xen_pci_frontend->enable_msix)
-+ return xen_pci_frontend->enable_msix(dev, vectors, nvec);
-+ return -ENODEV;
-+}
-+static inline void xen_pci_frontend_disable_msix(struct pci_dev *dev)
-+{
-+ if (xen_pci_frontend && xen_pci_frontend->disable_msix)
-+ xen_pci_frontend->disable_msix(dev);
-+}
-+#else
-+static inline int xen_create_msi_irq(struct pci_dev *dev,
-+ struct msi_desc *msidesc,
-+ int type)
-+{
-+ return -1;
-+}
-+static inline void xen_pci_teardown_msi_dev(struct pci_dev *dev) { }
-+static inline void xen_pci_teardown_msi_irq(int irq) { }
-+static inline int xen_pci_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
-+{
-+ return -ENODEV;
-+}
-+#endif /* CONFIG_PCI_XEN */
-+
-+#endif /* CONFIG_PCI_MSI */
-+
-+#ifdef CONFIG_XEN_DOM0_PCI
-+int xen_register_gsi(u32 gsi, int triggering, int polarity);
-+int xen_find_device_domain_owner(struct pci_dev *dev);
-+int xen_register_device_domain_owner(struct pci_dev *dev, uint16_t domain);
-+int xen_unregister_device_domain_owner(struct pci_dev *dev);
-+
-+#else
-+static inline int xen_register_gsi(u32 gsi, int triggering, int polarity)
-+{
-+ return -1;
-+}
-+
-+static inline int xen_find_device_domain_owner(struct pci_dev *dev)
-+{
-+ return -1;
-+}
-+static inline int xen_register_device_domain_owner(struct pci_dev *dev,
-+ uint16_t domain)
-+{
-+ return -1;
-+}
-+static inline int xen_unregister_device_domain_owner(struct pci_dev *dev)
-+{
-+ return -1;
-+}
-+#endif
-+
-+#if defined(CONFIG_PCI_MSI) && defined(CONFIG_XEN_DOM0_PCI)
-+int xen_setup_msi_irqs(struct pci_dev *dev, int nvec, int type);
-+#else
-+static inline int xen_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
-+{
-+ return -1;
-+}
-+#endif
-+
-+#endif /* _ASM_X86_XEN_PCI_H */
-diff --git a/arch/x86/include/asm/xen/swiotlb-xen.h b/arch/x86/include/asm/xen/swiotlb-xen.h
-new file mode 100644
-index 0000000..e4fe299
---- /dev/null
-+++ b/arch/x86/include/asm/xen/swiotlb-xen.h
-@@ -0,0 +1,14 @@
-+#ifndef _ASM_X86_SWIOTLB_XEN_H
-+#define _ASM_X86_SWIOTLB_XEN_H
-+
-+#ifdef CONFIG_PCI_XEN
-+extern int xen_swiotlb;
-+extern int __init pci_xen_swiotlb_detect(void);
-+extern void __init pci_xen_swiotlb_init(void);
-+#else
-+#define xen_swiotlb 0
-+static inline int __init pci_xen_swiotlb_detect(void) { return 0; }
-+static inline void __init pci_xen_swiotlb_init(void) { }
-+#endif
-+
-+#endif /* _ASM_X86_SWIOTLB_XEN_H */
-diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
-index d1911ab..cfe00bc 100644
---- a/arch/x86/kernel/Makefile
-+++ b/arch/x86/kernel/Makefile
-@@ -113,6 +113,7 @@ obj-$(CONFIG_X86_MRST) += mrst.o
- microcode-y := microcode_core.o
- microcode-$(CONFIG_MICROCODE_INTEL) += microcode_intel.o
- microcode-$(CONFIG_MICROCODE_AMD) += microcode_amd.o
-+microcode-$(CONFIG_MICROCODE_XEN) += microcode_xen.o
- obj-$(CONFIG_MICROCODE) += microcode.o
-
- obj-$(CONFIG_X86_CHECK_BIOS_CORRUPTION) += check.o
-diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
-index 23c2da8..a2a5125 100644
---- a/arch/x86/kernel/acpi/boot.c
-+++ b/arch/x86/kernel/acpi/boot.c
-@@ -42,6 +42,10 @@
- #include <asm/mpspec.h>
- #include <asm/smp.h>
-
-+#include <asm/xen/pci.h>
-+
-+#include <asm/xen/hypervisor.h>
-+
- static int __initdata acpi_force = 0;
- u32 acpi_rsdt_forced;
- int acpi_disabled;
-@@ -149,6 +153,10 @@ static void __cpuinit acpi_register_lapic(int id, u8 enabled)
- {
- unsigned int ver = 0;
-
-+ /* We don't want to register lapics when in Xen dom0 */
-+ if (xen_initial_domain())
-+ return;
-+
- if (!enabled) {
- ++disabled_cpus;
- return;
-@@ -461,9 +469,13 @@ int acpi_gsi_to_irq(u32 gsi, unsigned int *irq)
- */
- int acpi_register_gsi(struct device *dev, u32 gsi, int trigger, int polarity)
- {
-- unsigned int irq;
-+ int irq;
- unsigned int plat_gsi = gsi;
-
-+ irq = xen_register_gsi(gsi, trigger, polarity);
-+ if (irq >= 0)
-+ return irq;
-+
- #ifdef CONFIG_PCI
- /*
- * Make sure all (legacy) PCI IRQs are set as level-triggered.
-@@ -740,6 +752,10 @@ static int __init acpi_parse_fadt(struct acpi_table_header *table)
-
- static void __init acpi_register_lapic_address(unsigned long address)
- {
-+ /* Xen dom0 doesn't have usable lapics */
-+ if (xen_initial_domain())
-+ return;
-+
- mp_lapic_addr = address;
-
- set_fixmap_nocache(FIX_APIC_BASE, address);
-@@ -860,6 +876,9 @@ int __init acpi_probe_gsi(void)
- max_gsi = gsi;
- }
-
-+ if (xen_initial_domain())
-+ max_gsi += 255; /* Plus maximum entries of an ioapic. */
-+
- return max_gsi + 1;
- }
-
-diff --git a/arch/x86/kernel/acpi/processor.c b/arch/x86/kernel/acpi/processor.c
-index d85d1b2..8aabedd 100644
---- a/arch/x86/kernel/acpi/processor.c
-+++ b/arch/x86/kernel/acpi/processor.c
-@@ -12,6 +12,8 @@
- #include <acpi/processor.h>
- #include <asm/acpi.h>
-
-+#include <xen/xen.h>
-+
- static void init_intel_pdc(struct acpi_processor *pr, struct cpuinfo_x86 *c)
- {
- struct acpi_object_list *obj_list;
-@@ -59,7 +61,7 @@ static void init_intel_pdc(struct acpi_processor *pr, struct cpuinfo_x86 *c)
- /*
- * If mwait/monitor is unsupported, C2/C3_FFH will be disabled
- */
-- if (!cpu_has(c, X86_FEATURE_MWAIT))
-+ if (!cpu_has(c, X86_FEATURE_MWAIT) && !xen_initial_domain())
- buf[2] &= ~(ACPI_PDC_C_C2C3_FFH);
-
- obj->type = ACPI_TYPE_BUFFER;
-@@ -88,6 +90,19 @@ void arch_acpi_processor_init_pdc(struct acpi_processor *pr)
-
- EXPORT_SYMBOL(arch_acpi_processor_init_pdc);
-
-+/* Initialize _PDC data based on the CPU vendor */
-+void xen_arch_acpi_processor_init_pdc(struct acpi_processor *pr)
-+{
-+ struct cpuinfo_x86 *c = &cpu_data(0);
-+
-+ pr->pdc = NULL;
-+ if (c->x86_vendor == X86_VENDOR_INTEL)
-+ init_intel_pdc(pr, c);
-+
-+ return;
-+}
-+EXPORT_SYMBOL(xen_arch_acpi_processor_init_pdc);
-+
- void arch_acpi_processor_cleanup_pdc(struct acpi_processor *pr)
- {
- if (pr->pdc) {
-diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c
-index ca93638..9eff23c 100644
---- a/arch/x86/kernel/acpi/sleep.c
-+++ b/arch/x86/kernel/acpi/sleep.c
-@@ -12,6 +12,8 @@
- #include <asm/segment.h>
- #include <asm/desc.h>
-
-+#include <xen/acpi.h>
-+
- #include "realmode/wakeup.h"
- #include "sleep.h"
-
-diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
-index 7cd33f7..b8497c6 100644
---- a/arch/x86/kernel/amd_iommu.c
-+++ b/arch/x86/kernel/amd_iommu.c
-@@ -928,7 +928,7 @@ static unsigned long dma_ops_alloc_addresses(struct device *dev,
- }
-
- if (unlikely(address == -1))
-- address = bad_dma_address;
-+ address = DMA_ERROR_CODE;
-
- WARN_ON((address + (PAGE_SIZE*pages)) > dom->aperture_size);
-
-@@ -1545,7 +1545,7 @@ static dma_addr_t dma_ops_domain_map(struct amd_iommu *iommu,
-
- pte = dma_ops_get_pte(dom, address);
- if (!pte)
-- return bad_dma_address;
-+ return DMA_ERROR_CODE;
-
- __pte = paddr | IOMMU_PTE_P | IOMMU_PTE_FC;
-
-@@ -1626,7 +1626,7 @@ static dma_addr_t __map_single(struct device *dev,
- retry:
- address = dma_ops_alloc_addresses(dev, dma_dom, pages, align_mask,
- dma_mask);
-- if (unlikely(address == bad_dma_address)) {
-+ if (unlikely(address == DMA_ERROR_CODE)) {
- /*
- * setting next_address here will let the address
- * allocator only scan the new allocated range in the
-@@ -1647,7 +1647,7 @@ retry:
- start = address;
- for (i = 0; i < pages; ++i) {
- ret = dma_ops_domain_map(iommu, dma_dom, start, paddr, dir);
-- if (ret == bad_dma_address)
-+ if (ret == DMA_ERROR_CODE)
- goto out_unmap;
-
- paddr += PAGE_SIZE;
-@@ -1675,7 +1675,7 @@ out_unmap:
-
- dma_ops_free_addresses(dma_dom, address, pages);
-
-- return bad_dma_address;
-+ return DMA_ERROR_CODE;
- }
-
- /*
-@@ -1692,7 +1692,7 @@ static void __unmap_single(struct amd_iommu *iommu,
- dma_addr_t i, start;
- unsigned int pages;
-
-- if ((dma_addr == bad_dma_address) ||
-+ if ((dma_addr == DMA_ERROR_CODE) ||
- (dma_addr + size > dma_dom->aperture_size))
- return;
-
-@@ -1735,7 +1735,7 @@ static dma_addr_t map_page(struct device *dev, struct page *page,
- INC_STATS_COUNTER(cnt_map_single);
-
- if (!check_device(dev))
-- return bad_dma_address;
-+ return DMA_ERROR_CODE;
-
- dma_mask = *dev->dma_mask;
-
-@@ -1746,12 +1746,12 @@ static dma_addr_t map_page(struct device *dev, struct page *page,
- return (dma_addr_t)paddr;
-
- if (!dma_ops_domain(domain))
-- return bad_dma_address;
-+ return DMA_ERROR_CODE;
-
- spin_lock_irqsave(&domain->lock, flags);
- addr = __map_single(dev, iommu, domain->priv, paddr, size, dir, false,
- dma_mask);
-- if (addr == bad_dma_address)
-+ if (addr == DMA_ERROR_CODE)
- goto out;
-
- iommu_completion_wait(iommu);
-@@ -1960,7 +1960,7 @@ static void *alloc_coherent(struct device *dev, size_t size,
- *dma_addr = __map_single(dev, iommu, domain->priv, paddr,
- size, DMA_BIDIRECTIONAL, true, dma_mask);
-
-- if (*dma_addr == bad_dma_address) {
-+ if (*dma_addr == DMA_ERROR_CODE) {
- spin_unlock_irqrestore(&domain->lock, flags);
- goto out_free;
- }
-@@ -2122,8 +2122,7 @@ int __init amd_iommu_init_dma_ops(void)
- prealloc_protection_domains();
-
- iommu_detected = 1;
-- force_iommu = 1;
-- bad_dma_address = 0;
-+ swiotlb = 0;
- #ifdef CONFIG_GART_IOMMU
- gart_iommu_aperture_disabled = 1;
- gart_iommu_aperture = 0;
-diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c
-index 400be99..0069df5 100644
---- a/arch/x86/kernel/amd_iommu_init.c
-+++ b/arch/x86/kernel/amd_iommu_init.c
-@@ -29,6 +29,7 @@
- #include <asm/amd_iommu.h>
- #include <asm/iommu.h>
- #include <asm/gart.h>
-+#include <asm/x86_init.h>
-
- /*
- * definitions for the ACPI scanning code
-@@ -1206,19 +1207,10 @@ static struct sys_device device_amd_iommu = {
- * functions. Finally it prints some information about AMD IOMMUs and
- * the driver state and enables the hardware.
- */
--int __init amd_iommu_init(void)
-+static int __init amd_iommu_init(void)
- {
- int i, ret = 0;
-
--
-- if (no_iommu) {
-- printk(KERN_INFO "AMD-Vi disabled by kernel command line\n");
-- return 0;
-- }
--
-- if (!amd_iommu_detected)
-- return -ENODEV;
--
- /*
- * First parse ACPI tables to find the largest Bus/Dev/Func
- * we need to handle. Upon this information the shared data
-@@ -1333,6 +1325,7 @@ int __init amd_iommu_init(void)
- else
- printk(KERN_INFO "AMD-Vi: Lazy IO/TLB flushing enabled\n");
-
-+ x86_platform.iommu_shutdown = disable_iommus;
- out:
- return ret;
-
-@@ -1361,11 +1354,6 @@ free:
- goto out;
- }
-
--void amd_iommu_shutdown(void)
--{
-- disable_iommus();
--}
--
- /****************************************************************************
- *
- * Early detect code. This code runs at IOMMU detection time in the DMA
-@@ -1380,16 +1368,13 @@ static int __init early_amd_iommu_detect(struct acpi_table_header *table)
-
- void __init amd_iommu_detect(void)
- {
-- if (swiotlb || no_iommu || (iommu_detected && !gart_iommu_aperture))
-+ if (no_iommu || (iommu_detected && !gart_iommu_aperture))
- return;
-
- if (acpi_table_parse("IVRS", early_amd_iommu_detect) == 0) {
- iommu_detected = 1;
- amd_iommu_detected = 1;
--#ifdef CONFIG_GART_IOMMU
-- gart_iommu_aperture_disabled = 1;
-- gart_iommu_aperture = 0;
--#endif
-+ x86_init.iommu.iommu_init = amd_iommu_init;
- }
- }
-
-diff --git a/arch/x86/kernel/aperture_64.c b/arch/x86/kernel/aperture_64.c
-index 082089e..8d34362 100644
---- a/arch/x86/kernel/aperture_64.c
-+++ b/arch/x86/kernel/aperture_64.c
-@@ -28,6 +28,7 @@
- #include <asm/pci-direct.h>
- #include <asm/dma.h>
- #include <asm/k8.h>
-+#include <asm/x86_init.h>
-
- int gart_iommu_aperture;
- int gart_iommu_aperture_disabled __initdata;
-@@ -401,6 +402,7 @@ void __init gart_iommu_hole_init(void)
-
- iommu_detected = 1;
- gart_iommu_aperture = 1;
-+ x86_init.iommu.iommu_init = gart_iommu_init;
-
- ctl = read_pci_config(bus, slot, 3,
- AMD64_GARTAPERTURECTL);
-@@ -469,7 +471,7 @@ out:
-
- if (aper_alloc) {
- /* Got the aperture from the AGP bridge */
-- } else if (swiotlb && !valid_agp) {
-+ } else if (!valid_agp) {
- /* Do nothing */
- } else if ((!no_iommu && max_pfn > MAX_DMA32_PFN) ||
- force_iommu ||
-diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
-index 8928d97..4848d5d 100644
---- a/arch/x86/kernel/apic/io_apic.c
-+++ b/arch/x86/kernel/apic/io_apic.c
-@@ -63,7 +63,12 @@
- #include <asm/uv/uv_hub.h>
- #include <asm/uv/uv_irq.h>
-
-+#include <asm/xen/hypervisor.h>
- #include <asm/apic.h>
-+#include <asm/xen/hypervisor.h>
-+#include <asm/xen/pci.h>
-+
-+#include <asm/xen/pci.h>
-
- #define __apicdebuginit(type) static type __init
- #define for_each_irq_pin(entry, head) \
-@@ -395,14 +400,18 @@ static inline void io_apic_eoi(unsigned int apic, unsigned int vector)
-
- static inline unsigned int io_apic_read(unsigned int apic, unsigned int reg)
- {
-- struct io_apic __iomem *io_apic = io_apic_base(apic);
-+ struct io_apic __iomem *io_apic;
-+
-+ io_apic = io_apic_base(apic);
- writel(reg, &io_apic->index);
- return readl(&io_apic->data);
- }
-
- static inline void io_apic_write(unsigned int apic, unsigned int reg, unsigned int value)
- {
-- struct io_apic __iomem *io_apic = io_apic_base(apic);
-+ struct io_apic __iomem *io_apic;
-+
-+ io_apic = io_apic_base(apic);
- writel(reg, &io_apic->index);
- writel(value, &io_apic->data);
- }
-@@ -415,7 +424,9 @@ static inline void io_apic_write(unsigned int apic, unsigned int reg, unsigned i
- */
- static inline void io_apic_modify(unsigned int apic, unsigned int reg, unsigned int value)
- {
-- struct io_apic __iomem *io_apic = io_apic_base(apic);
-+ struct io_apic __iomem *io_apic;
-+
-+ io_apic = io_apic_base(apic);
-
- if (sis_apic_bug)
- writel(reg, &io_apic->index);
-@@ -3494,6 +3505,9 @@ int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
- if (type == PCI_CAP_ID_MSI && nvec > 1)
- return 1;
-
-+ if (xen_pv_domain())
-+ return xen_pci_setup_msi_irqs(dev, nvec, type);
-+
- node = dev_to_node(&dev->dev);
- irq_want = nr_irqs_gsi;
- sub_handle = 0;
-@@ -3543,7 +3557,29 @@ error:
-
- void arch_teardown_msi_irq(unsigned int irq)
- {
-- destroy_irq(irq);
-+ if (xen_domain())
-+ xen_pci_teardown_msi_irq(irq);
-+ else
-+ destroy_irq(irq);
-+}
-+
-+void arch_teardown_msi_irqs(struct pci_dev *dev)
-+{
-+ struct msi_desc *entry;
-+
-+ /* If we are non-privileged PV domain, we have to
-+ * to call xen_teardown_msi_dev first. */
-+ if (xen_domain())
-+ xen_pci_teardown_msi_dev(dev);
-+
-+ list_for_each_entry(entry, &dev->msi_list, list) {
-+ int i, nvec;
-+ if (entry->irq == 0)
-+ continue;
-+ nvec = 1 << entry->msi_attrib.multiple;
-+ for (i = 0; i < nvec; i++)
-+ arch_teardown_msi_irq(entry->irq + i);
-+ }
- }
-
- #if defined (CONFIG_DMAR) || defined (CONFIG_INTR_REMAP)
-@@ -3860,7 +3896,14 @@ void __init probe_nr_irqs_gsi(void)
- printk(KERN_DEBUG "nr_irqs_gsi: %d\n", nr_irqs_gsi);
- }
-
-+int get_nr_irqs_gsi(void)
-+{
-+ return nr_irqs_gsi;
-+}
-+
- #ifdef CONFIG_SPARSE_IRQ
-+int nr_dynamic_irqs;
-+
- int __init arch_probe_nr_irqs(void)
- {
- int nr;
-@@ -3878,6 +3921,8 @@ int __init arch_probe_nr_irqs(void)
- if (nr < nr_irqs)
- nr_irqs = nr;
-
-+ nr_irqs += nr_dynamic_irqs;
-+
- return 0;
- }
- #endif
-diff --git a/arch/x86/kernel/apic/nmi.c b/arch/x86/kernel/apic/nmi.c
-index 7ff61d6..d1e6e60 100644
---- a/arch/x86/kernel/apic/nmi.c
-+++ b/arch/x86/kernel/apic/nmi.c
-@@ -558,6 +558,9 @@ void arch_trigger_all_cpu_backtrace(void)
- {
- int i;
-
-+ if (!cpu_has_apic)
-+ return;
-+
- cpumask_copy(&backtrace_mask, cpu_online_mask);
-
- printk(KERN_INFO "sending NMI to all CPUs:\n");
-diff --git a/arch/x86/kernel/cpu/mtrr/Makefile b/arch/x86/kernel/cpu/mtrr/Makefile
-index f4361b5..404e458 100644
---- a/arch/x86/kernel/cpu/mtrr/Makefile
-+++ b/arch/x86/kernel/cpu/mtrr/Makefile
-@@ -1,3 +1,4 @@
- obj-y := main.o if.o generic.o state.o cleanup.o
- obj-$(CONFIG_X86_32) += amd.o cyrix.o centaur.o
-+obj-$(CONFIG_XEN_DOM0) += xen.o
-
-diff --git a/arch/x86/kernel/cpu/mtrr/amd.c b/arch/x86/kernel/cpu/mtrr/amd.c
-index 33af141..378f8dc 100644
---- a/arch/x86/kernel/cpu/mtrr/amd.c
-+++ b/arch/x86/kernel/cpu/mtrr/amd.c
-@@ -108,6 +108,11 @@ amd_validate_add_page(unsigned long base, unsigned long size, unsigned int type)
- return 0;
- }
-
-+static int amd_num_var_ranges(void)
-+{
-+ return 2;
-+}
-+
- static struct mtrr_ops amd_mtrr_ops = {
- .vendor = X86_VENDOR_AMD,
- .set = amd_set_mtrr,
-@@ -115,6 +120,7 @@ static struct mtrr_ops amd_mtrr_ops = {
- .get_free_region = generic_get_free_region,
- .validate_add_page = amd_validate_add_page,
- .have_wrcomb = positive_have_wrcomb,
-+ .num_var_ranges = amd_num_var_ranges,
- };
-
- int __init amd_init_mtrr(void)
-diff --git a/arch/x86/kernel/cpu/mtrr/centaur.c b/arch/x86/kernel/cpu/mtrr/centaur.c
-index de89f14..7c686a0 100644
---- a/arch/x86/kernel/cpu/mtrr/centaur.c
-+++ b/arch/x86/kernel/cpu/mtrr/centaur.c
-@@ -110,6 +110,11 @@ centaur_validate_add_page(unsigned long base, unsigned long size, unsigned int t
- return 0;
- }
-
-+static int centaur_num_var_ranges(void)
-+{
-+ return 8;
-+}
-+
- static struct mtrr_ops centaur_mtrr_ops = {
- .vendor = X86_VENDOR_CENTAUR,
- .set = centaur_set_mcr,
-@@ -117,6 +122,7 @@ static struct mtrr_ops centaur_mtrr_ops = {
- .get_free_region = centaur_get_free_region,
- .validate_add_page = centaur_validate_add_page,
- .have_wrcomb = positive_have_wrcomb,
-+ .num_var_ranges = centaur_num_var_ranges,
- };
-
- int __init centaur_init_mtrr(void)
-diff --git a/arch/x86/kernel/cpu/mtrr/cyrix.c b/arch/x86/kernel/cpu/mtrr/cyrix.c
-index 228d982..fd6edcc 100644
---- a/arch/x86/kernel/cpu/mtrr/cyrix.c
-+++ b/arch/x86/kernel/cpu/mtrr/cyrix.c
-@@ -265,6 +265,11 @@ static void cyrix_set_all(void)
- post_set();
- }
-
-+static int cyrix_num_var_ranges(void)
-+{
-+ return 8;
-+}
-+
- static struct mtrr_ops cyrix_mtrr_ops = {
- .vendor = X86_VENDOR_CYRIX,
- .set_all = cyrix_set_all,
-@@ -273,6 +278,7 @@ static struct mtrr_ops cyrix_mtrr_ops = {
- .get_free_region = cyrix_get_free_region,
- .validate_add_page = generic_validate_add_page,
- .have_wrcomb = positive_have_wrcomb,
-+ .num_var_ranges = cyrix_num_var_ranges,
- };
-
- int __init cyrix_init_mtrr(void)
-diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c
-index 55da0c5..42f30cd 100644
---- a/arch/x86/kernel/cpu/mtrr/generic.c
-+++ b/arch/x86/kernel/cpu/mtrr/generic.c
-@@ -749,8 +749,16 @@ int positive_have_wrcomb(void)
- return 1;
- }
-
--/*
-- * Generic structure...
-+static int generic_num_var_ranges(void)
-+{
-+ unsigned long config = 0, dummy;
-+
-+ rdmsr(MSR_MTRRcap, config, dummy);
-+
-+ return config & 0xff;
-+}
-+
-+/* generic structure...
- */
- struct mtrr_ops generic_mtrr_ops = {
- .use_intel_if = 1,
-@@ -760,4 +768,5 @@ struct mtrr_ops generic_mtrr_ops = {
- .set = generic_set_mtrr,
- .validate_add_page = generic_validate_add_page,
- .have_wrcomb = generic_have_wrcomb,
-+ .num_var_ranges = generic_num_var_ranges,
- };
-diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c
-index 84e83de..c8cb9ed 100644
---- a/arch/x86/kernel/cpu/mtrr/main.c
-+++ b/arch/x86/kernel/cpu/mtrr/main.c
-@@ -110,21 +110,6 @@ static int have_wrcomb(void)
- return mtrr_if->have_wrcomb ? mtrr_if->have_wrcomb() : 0;
- }
-
--/* This function returns the number of variable MTRRs */
--static void __init set_num_var_ranges(void)
--{
-- unsigned long config = 0, dummy;
--
-- if (use_intel())
-- rdmsr(MSR_MTRRcap, config, dummy);
-- else if (is_cpu(AMD))
-- config = 2;
-- else if (is_cpu(CYRIX) || is_cpu(CENTAUR))
-- config = 8;
--
-- num_var_ranges = config & 0xff;
--}
--
- static void __init init_table(void)
- {
- int i, max;
-@@ -711,8 +696,11 @@ void __init mtrr_bp_init(void)
- }
- }
-
-+ /* Let Xen code override the above if it wants */
-+ xen_init_mtrr();
-+
- if (mtrr_if) {
-- set_num_var_ranges();
-+ num_var_ranges = mtrr_if->num_var_ranges();
- init_table();
- if (use_intel()) {
- get_mtrr_state();
-diff --git a/arch/x86/kernel/cpu/mtrr/mtrr.h b/arch/x86/kernel/cpu/mtrr/mtrr.h
-index a501dee..98569c3 100644
---- a/arch/x86/kernel/cpu/mtrr/mtrr.h
-+++ b/arch/x86/kernel/cpu/mtrr/mtrr.h
-@@ -5,6 +5,8 @@
- #include <linux/types.h>
- #include <linux/stddef.h>
-
-+#include <asm/mtrr.h>
-+
- #define MTRR_CHANGE_MASK_FIXED 0x01
- #define MTRR_CHANGE_MASK_VARIABLE 0x02
- #define MTRR_CHANGE_MASK_DEFTYPE 0x04
-@@ -25,6 +27,8 @@ struct mtrr_ops {
- int (*validate_add_page)(unsigned long base, unsigned long size,
- unsigned int type);
- int (*have_wrcomb)(void);
-+
-+ int (*num_var_ranges)(void);
- };
-
- extern int generic_get_free_region(unsigned long base, unsigned long size,
-@@ -73,6 +77,13 @@ void mtrr_wrmsr(unsigned, unsigned, unsigned);
- int amd_init_mtrr(void);
- int cyrix_init_mtrr(void);
- int centaur_init_mtrr(void);
-+#ifdef CONFIG_XEN_DOM0
-+void xen_init_mtrr(void);
-+#else
-+static inline void xen_init_mtrr(void)
-+{
-+}
-+#endif
-
- extern int changed_by_mtrr_cleanup;
- extern int mtrr_cleanup(unsigned address_bits);
-diff --git a/arch/x86/kernel/cpu/mtrr/xen.c b/arch/x86/kernel/cpu/mtrr/xen.c
-new file mode 100644
-index 0000000..852018b
---- /dev/null
-+++ b/arch/x86/kernel/cpu/mtrr/xen.c
-@@ -0,0 +1,109 @@
-+#include <linux/init.h>
-+#include <linux/mm.h>
-+
-+#include <asm/pat.h>
-+
-+#include "mtrr.h"
-+
-+#include <xen/xen.h>
-+#include <xen/interface/platform.h>
-+#include <asm/xen/hypervisor.h>
-+#include <asm/xen/hypercall.h>
-+
-+static void xen_set_mtrr(unsigned int reg, unsigned long base,
-+ unsigned long size, mtrr_type type)
-+{
-+ struct xen_platform_op op;
-+ int error;
-+
-+ /* mtrr_ops->set() is called once per CPU,
-+ * but Xen's ops apply to all CPUs.
-+ */
-+ if (smp_processor_id())
-+ return;
-+
-+ if (size == 0) {
-+ op.cmd = XENPF_del_memtype;
-+ op.u.del_memtype.handle = 0;
-+ op.u.del_memtype.reg = reg;
-+ } else {
-+ op.cmd = XENPF_add_memtype;
-+ op.u.add_memtype.mfn = base;
-+ op.u.add_memtype.nr_mfns = size;
-+ op.u.add_memtype.type = type;
-+ }
-+
-+ error = HYPERVISOR_dom0_op(&op);
-+ BUG_ON(error != 0);
-+}
-+
-+static void xen_get_mtrr(unsigned int reg, unsigned long *base,
-+ unsigned long *size, mtrr_type *type)
-+{
-+ struct xen_platform_op op;
-+
-+ op.cmd = XENPF_read_memtype;
-+ op.u.read_memtype.reg = reg;
-+ if (HYPERVISOR_dom0_op(&op) != 0) {
-+ *base = 0;
-+ *size = 0;
-+ *type = 0;
-+ return;
-+ }
-+
-+ *size = op.u.read_memtype.nr_mfns;
-+ *base = op.u.read_memtype.mfn;
-+ *type = op.u.read_memtype.type;
-+}
-+
-+static int __init xen_num_var_ranges(void)
-+{
-+ int ranges;
-+ struct xen_platform_op op;
-+
-+ op.cmd = XENPF_read_memtype;
-+
-+ for (ranges = 0; ; ranges++) {
-+ op.u.read_memtype.reg = ranges;
-+ if (HYPERVISOR_dom0_op(&op) != 0)
-+ break;
-+ }
-+ return ranges;
-+}
-+
-+/*
-+ * DOM0 TODO: Need to fill in the remaining mtrr methods to have full
-+ * working userland mtrr support.
-+ */
-+static struct mtrr_ops xen_mtrr_ops = {
-+ .vendor = X86_VENDOR_UNKNOWN,
-+ .get_free_region = generic_get_free_region,
-+ .set = xen_set_mtrr,
-+ .get = xen_get_mtrr,
-+ .have_wrcomb = positive_have_wrcomb,
-+ .validate_add_page = generic_validate_add_page,
-+ .use_intel_if = 0,
-+ .num_var_ranges = xen_num_var_ranges,
-+};
-+
-+void __init xen_init_mtrr(void)
-+{
-+ /*
-+ * Check that we're running under Xen, and privileged enough
-+ * to play with MTRRs.
-+ */
-+ if (!xen_initial_domain())
-+ return;
-+
-+ /*
-+ * Check that the CPU has an MTRR implementation we can
-+ * support.
-+ */
-+ if (cpu_has_mtrr ||
-+ cpu_has_k6_mtrr ||
-+ cpu_has_cyrix_arr ||
-+ cpu_has_centaur_mcr) {
-+ mtrr_if = &xen_mtrr_ops;
-+ pat_init();
-+ }
-+}
-diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c
-index ff95824..ebd4c51 100644
---- a/arch/x86/kernel/crash.c
-+++ b/arch/x86/kernel/crash.c
-@@ -28,7 +28,6 @@
- #include <asm/reboot.h>
- #include <asm/virtext.h>
-
--
- #if defined(CONFIG_SMP) && defined(CONFIG_X86_LOCAL_APIC)
-
- static void kdump_nmi_callback(int cpu, struct die_args *args)
-diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
-index d17d482..4d0aded 100644
---- a/arch/x86/kernel/e820.c
-+++ b/arch/x86/kernel/e820.c
-@@ -750,6 +750,36 @@ static int __init find_overlapped_early(u64 start, u64 end)
- return i;
- }
-
-+u64 __init early_res_next_free(u64 addr)
-+{
-+ int i;
-+ u64 end = addr;
-+ struct early_res *r;
-+
-+ for (i = 0; i < MAX_EARLY_RES; i++) {
-+ r = &early_res[i];
-+ if (addr >= r->start && addr < r->end) {
-+ end = r->end;
-+ break;
-+ }
-+ }
-+ return end;
-+}
-+
-+u64 __init early_res_next_reserved(u64 addr, u64 max)
-+{
-+ int i;
-+ struct early_res *r;
-+ u64 next_res = max;
-+
-+ for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) {
-+ r = &early_res[i];
-+ if ((r->start >= addr) && (r->start < next_res))
-+ next_res = r->start;
-+ }
-+ return next_res;
-+}
-+
- /*
- * Drop the i-th range from the early reservation map,
- * by copying any higher ranges down one over it, and
-diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
-index c097e7d..7764118 100644
---- a/arch/x86/kernel/entry_32.S
-+++ b/arch/x86/kernel/entry_32.S
-@@ -1088,6 +1088,9 @@ ENTRY(xen_failsafe_callback)
- .previous
- ENDPROC(xen_failsafe_callback)
-
-+BUILD_INTERRUPT3(xen_hvm_callback_vector, XEN_HVM_EVTCHN_CALLBACK,
-+ xen_evtchn_do_upcall)
-+
- #endif /* CONFIG_XEN */
-
- #ifdef CONFIG_FUNCTION_TRACER
-diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
-index b5c061f..a626344 100644
---- a/arch/x86/kernel/entry_64.S
-+++ b/arch/x86/kernel/entry_64.S
-@@ -1364,6 +1364,9 @@ ENTRY(xen_failsafe_callback)
- CFI_ENDPROC
- END(xen_failsafe_callback)
-
-+apicinterrupt XEN_HVM_EVTCHN_CALLBACK \
-+ xen_hvm_callback_vector xen_evtchn_do_upcall
-+
- #endif /* CONFIG_XEN */
-
- /*
-diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
-index 0b06cd7..f59b07a 100644
---- a/arch/x86/kernel/head64.c
-+++ b/arch/x86/kernel/head64.c
-@@ -79,6 +79,8 @@ void __init x86_64_start_kernel(char * real_mode_data)
- /* Cleanup the over mapped high alias */
- cleanup_highmap();
-
-+ max_pfn_mapped = KERNEL_IMAGE_SIZE >> PAGE_SHIFT;
-+
- for (i = 0; i < NUM_EXCEPTION_VECTORS; i++) {
- #ifdef CONFIG_EARLY_PRINTK
- set_intr_gate(i, &early_idt_handlers[i]);
-diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
-index c771e1a..8b970b8 100644
---- a/arch/x86/kernel/hpet.c
-+++ b/arch/x86/kernel/hpet.c
-@@ -98,7 +98,7 @@ static int __init hpet_setup(char *str)
- }
- __setup("hpet=", hpet_setup);
-
--static int __init disable_hpet(char *str)
-+int __init disable_hpet(char *str)
- {
- boot_hpet_disable = 1;
- return 1;
-diff --git a/arch/x86/kernel/ioport.c b/arch/x86/kernel/ioport.c
-index 99c4d30..919c1a8 100644
---- a/arch/x86/kernel/ioport.c
-+++ b/arch/x86/kernel/ioport.c
-@@ -30,13 +30,29 @@ static void set_bitmap(unsigned long *bitmap, unsigned int base,
- }
- }
-
-+void native_set_io_bitmap(struct thread_struct *t,
-+ unsigned long bytes_updated)
-+{
-+ struct tss_struct *tss;
-+
-+ if (!bytes_updated)
-+ return;
-+
-+ tss = &__get_cpu_var(init_tss);
-+
-+ /* Update the TSS: */
-+ if (t->io_bitmap_ptr)
-+ memcpy(tss->io_bitmap, t->io_bitmap_ptr, bytes_updated);
-+ else
-+ memset(tss->io_bitmap, 0xff, bytes_updated);
-+}
-+
- /*
- * this changes the io permissions bitmap in the current task.
- */
- asmlinkage long sys_ioperm(unsigned long from, unsigned long num, int turn_on)
- {
- struct thread_struct *t = &current->thread;
-- struct tss_struct *tss;
- unsigned int i, max_long, bytes, bytes_updated;
-
- if ((from + num <= from) || (from + num > IO_BITMAP_BITS))
-@@ -61,13 +77,13 @@ asmlinkage long sys_ioperm(unsigned long from, unsigned long num, int turn_on)
- }
-
- /*
-- * do it in the per-thread copy and in the TSS ...
-+ * do it in the per-thread copy
- *
-- * Disable preemption via get_cpu() - we must not switch away
-+ * Disable preemption - we must not switch away
- * because the ->io_bitmap_max value must match the bitmap
- * contents:
- */
-- tss = &per_cpu(init_tss, get_cpu());
-+ preempt_disable();
-
- set_bitmap(t->io_bitmap_ptr, from, num, !turn_on);
-
-@@ -85,10 +101,9 @@ asmlinkage long sys_ioperm(unsigned long from, unsigned long num, int turn_on)
-
- t->io_bitmap_max = bytes;
-
-- /* Update the TSS: */
-- memcpy(tss->io_bitmap, t->io_bitmap_ptr, bytes_updated);
-+ set_io_bitmap(t, bytes_updated);
-
-- put_cpu();
-+ preempt_enable();
-
- return 0;
- }
-@@ -119,11 +134,10 @@ static int do_iopl(unsigned int level, struct pt_regs *regs)
- return 0;
- }
-
--#ifdef CONFIG_X86_32
--long sys_iopl(struct pt_regs *regs)
-+asmlinkage long sys_iopl(unsigned int level)
- {
-- unsigned int level = regs->bx;
- struct thread_struct *t = &current->thread;
-+ struct pt_regs *regs = task_pt_regs(current);
- int rc;
-
- rc = do_iopl(level, regs);
-@@ -135,9 +149,3 @@ long sys_iopl(struct pt_regs *regs)
- out:
- return rc;
- }
--#else
--asmlinkage long sys_iopl(unsigned int level, struct pt_regs *regs)
--{
-- return do_iopl(level, regs);
--}
--#endif
-diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c
-index ec6ef60..fa5b061 100644
---- a/arch/x86/kernel/ldt.c
-+++ b/arch/x86/kernel/ldt.c
-@@ -109,6 +109,9 @@ int init_new_context(struct task_struct *tsk, struct mm_struct *mm)
-
- mutex_init(&mm->context.lock);
- mm->context.size = 0;
-+#ifdef CONFIG_XEN
-+ mm->context.has_foreign_mappings = 0;
-+#endif
- old_mm = current->mm;
- if (old_mm && old_mm->context.size > 0) {
- mutex_lock(&old_mm->context.lock);
-diff --git a/arch/x86/kernel/microcode_core.c b/arch/x86/kernel/microcode_core.c
-index 378e9a8..86ca771 100644
---- a/arch/x86/kernel/microcode_core.c
-+++ b/arch/x86/kernel/microcode_core.c
-@@ -81,6 +81,8 @@
- #include <linux/fs.h>
- #include <linux/mm.h>
-
-+#include <xen/xen.h>
-+#include <asm/xen/hypervisor.h>
- #include <asm/microcode.h>
- #include <asm/processor.h>
-
-@@ -503,7 +505,9 @@ static int __init microcode_init(void)
- struct cpuinfo_x86 *c = &cpu_data(0);
- int error;
-
-- if (c->x86_vendor == X86_VENDOR_INTEL)
-+ if (xen_pv_domain())
-+ microcode_ops = init_xen_microcode();
-+ else if (c->x86_vendor == X86_VENDOR_INTEL)
- microcode_ops = init_intel_microcode();
- else if (c->x86_vendor == X86_VENDOR_AMD)
- microcode_ops = init_amd_microcode();
-diff --git a/arch/x86/kernel/microcode_xen.c b/arch/x86/kernel/microcode_xen.c
-new file mode 100644
-index 0000000..16c742e
---- /dev/null
-+++ b/arch/x86/kernel/microcode_xen.c
-@@ -0,0 +1,201 @@
-+/*
-+ * Xen microcode update driver
-+ *
-+ * Xen does most of the work here. We just pass the whole blob into
-+ * Xen, and it will apply it to all CPUs as appropriate. Xen will
-+ * worry about how different CPU models are actually updated.
-+ */
-+#include <linux/sched.h>
-+#include <linux/module.h>
-+#include <linux/firmware.h>
-+#include <linux/vmalloc.h>
-+#include <linux/uaccess.h>
-+
-+#include <asm/microcode.h>
-+
-+#include <xen/xen.h>
-+#include <xen/interface/platform.h>
-+#include <xen/interface/xen.h>
-+
-+#include <asm/xen/hypercall.h>
-+#include <asm/xen/hypervisor.h>
-+
-+MODULE_DESCRIPTION("Xen microcode update driver");
-+MODULE_LICENSE("GPL");
-+
-+struct xen_microcode {
-+ size_t len;
-+ char data[0];
-+};
-+
-+static int xen_microcode_update(int cpu)
-+{
-+ int err;
-+ struct xen_platform_op op;
-+ struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
-+ struct xen_microcode *uc = uci->mc;
-+
-+ if (uc == NULL || uc->len == 0) {
-+ /*
-+ * We do all cpus at once, so we don't need to do
-+ * other cpus explicitly (besides, these vcpu numbers
-+ * have no relationship to underlying physical cpus).
-+ */
-+ return 0;
-+ }
-+
-+ op.cmd = XENPF_microcode_update;
-+ set_xen_guest_handle(op.u.microcode.data, uc->data);
-+ op.u.microcode.length = uc->len;
-+
-+ err = HYPERVISOR_dom0_op(&op);
-+
-+ if (err != 0)
-+ printk(KERN_WARNING "microcode_xen: microcode update failed: %d\n", err);
-+
-+ return err;
-+}
-+
-+static enum ucode_state xen_request_microcode_fw(int cpu, struct device *device)
-+{
-+ char name[30];
-+ struct cpuinfo_x86 *c = &cpu_data(cpu);
-+ const struct firmware *firmware;
-+ struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
-+ enum ucode_state ret;
-+ struct xen_microcode *uc;
-+ size_t size;
-+ int err;
-+
-+ switch (c->x86_vendor) {
-+ case X86_VENDOR_INTEL:
-+ snprintf(name, sizeof(name), "intel-ucode/%02x-%02x-%02x",
-+ c->x86, c->x86_model, c->x86_mask);
-+ break;
-+
-+ case X86_VENDOR_AMD:
-+ snprintf(name, sizeof(name), "amd-ucode/microcode_amd.bin");
-+ break;
-+
-+ default:
-+ return UCODE_NFOUND;
-+ }
-+
-+ err = request_firmware(&firmware, name, device);
-+ if (err) {
-+ pr_debug("microcode: data file %s load failed\n", name);
-+ return UCODE_NFOUND;
-+ }
-+
-+ /*
-+ * Only bother getting real firmware for cpu 0; the others get
-+ * dummy placeholders.
-+ */
-+ if (cpu == 0)
-+ size = firmware->size;
-+ else
-+ size = 0;
-+
-+ if (uci->mc != NULL) {
-+ vfree(uci->mc);
-+ uci->mc = NULL;
-+ }
-+
-+ ret = UCODE_ERROR;
-+ uc = vmalloc(sizeof(*uc) + size);
-+ if (uc == NULL)
-+ goto out;
-+
-+ ret = UCODE_OK;
-+ uc->len = size;
-+ memcpy(uc->data, firmware->data, uc->len);
-+
-+ uci->mc = uc;
-+
-+out:
-+ release_firmware(firmware);
-+
-+ return ret;
-+}
-+
-+static enum ucode_state xen_request_microcode_user(int cpu,
-+ const void __user *buf, size_t size)
-+{
-+ struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
-+ struct xen_microcode *uc;
-+ enum ucode_state ret;
-+ size_t unread;
-+
-+ if (cpu != 0) {
-+ /* No real firmware for non-zero cpus; just store a
-+ placeholder */
-+ size = 0;
-+ }
-+
-+ if (uci->mc != NULL) {
-+ vfree(uci->mc);
-+ uci->mc = NULL;
-+ }
-+
-+ ret = UCODE_ERROR;
-+ uc = vmalloc(sizeof(*uc) + size);
-+ if (uc == NULL)
-+ goto out;
-+
-+ uc->len = size;
-+
-+ ret = UCODE_NFOUND;
-+
-+ /* XXX This sporadically returns uncopied bytes, so we return
-+ EFAULT. As far as I can see, the usermode code
-+ (microcode_ctl) isn't doing anything wrong... */
-+ unread = copy_from_user(uc->data, buf, size);
-+
-+ if (unread != 0) {
-+ printk(KERN_WARNING "failed to read %zd of %zd bytes at %p -> %p\n",
-+ unread, size, buf, uc->data);
-+ goto out;
-+ }
-+
-+ ret = UCODE_OK;
-+
-+out:
-+ if (ret == 0)
-+ uci->mc = uc;
-+ else
-+ vfree(uc);
-+
-+ return ret;
-+}
-+
-+static void xen_microcode_fini_cpu(int cpu)
-+{
-+ struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
-+
-+ vfree(uci->mc);
-+ uci->mc = NULL;
-+}
-+
-+static int xen_collect_cpu_info(int cpu, struct cpu_signature *sig)
-+{
-+ sig->sig = 0;
-+ sig->pf = 0;
-+ sig->rev = 0;
-+
-+ return 0;
-+}
-+
-+static struct microcode_ops microcode_xen_ops = {
-+ .request_microcode_user = xen_request_microcode_user,
-+ .request_microcode_fw = xen_request_microcode_fw,
-+ .collect_cpu_info = xen_collect_cpu_info,
-+ .apply_microcode = xen_microcode_update,
-+ .microcode_fini_cpu = xen_microcode_fini_cpu,
-+};
-+
-+struct microcode_ops * __init init_xen_microcode(void)
-+{
-+ if (!xen_initial_domain())
-+ return NULL;
-+ return &microcode_xen_ops;
-+}
-diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
-index 1b1739d..f7e115c 100644
---- a/arch/x86/kernel/paravirt.c
-+++ b/arch/x86/kernel/paravirt.c
-@@ -376,6 +376,7 @@ struct pv_cpu_ops pv_cpu_ops = {
- .swapgs = native_swapgs,
-
- .set_iopl_mask = native_set_iopl_mask,
-+ .set_io_bitmap = native_set_io_bitmap,
- .io_delay = native_io_delay,
-
- .start_context_switch = paravirt_nop,
-diff --git a/arch/x86/kernel/pci-calgary_64.c b/arch/x86/kernel/pci-calgary_64.c
-index 1a2d4b1..2f158a5 100644
---- a/arch/x86/kernel/pci-calgary_64.c
-+++ b/arch/x86/kernel/pci-calgary_64.c
-@@ -46,6 +46,7 @@
- #include <asm/dma.h>
- #include <asm/rio.h>
- #include <asm/bios_ebda.h>
-+#include <asm/x86_init.h>
-
- #ifdef CONFIG_CALGARY_IOMMU_ENABLED_BY_DEFAULT
- int use_calgary __read_mostly = 1;
-@@ -249,7 +250,7 @@ static unsigned long iommu_range_alloc(struct device *dev,
- if (panic_on_overflow)
- panic("Calgary: fix the allocator.\n");
- else
-- return bad_dma_address;
-+ return DMA_ERROR_CODE;
- }
- }
-
-@@ -265,11 +266,11 @@ static dma_addr_t iommu_alloc(struct device *dev, struct iommu_table *tbl,
- void *vaddr, unsigned int npages, int direction)
- {
- unsigned long entry;
-- dma_addr_t ret = bad_dma_address;
-+ dma_addr_t ret = DMA_ERROR_CODE;
-
- entry = iommu_range_alloc(dev, tbl, npages);
-
-- if (unlikely(entry == bad_dma_address))
-+ if (unlikely(entry == DMA_ERROR_CODE))
- goto error;
-
- /* set the return dma address */
-@@ -284,7 +285,7 @@ static dma_addr_t iommu_alloc(struct device *dev, struct iommu_table *tbl,
- error:
- printk(KERN_WARNING "Calgary: failed to allocate %u pages in "
- "iommu %p\n", npages, tbl);
-- return bad_dma_address;
-+ return DMA_ERROR_CODE;
- }
-
- static void iommu_free(struct iommu_table *tbl, dma_addr_t dma_addr,
-@@ -295,8 +296,8 @@ static void iommu_free(struct iommu_table *tbl, dma_addr_t dma_addr,
- unsigned long flags;
-
- /* were we called with bad_dma_address? */
-- badend = bad_dma_address + (EMERGENCY_PAGES * PAGE_SIZE);
-- if (unlikely((dma_addr >= bad_dma_address) && (dma_addr < badend))) {
-+ badend = DMA_ERROR_CODE + (EMERGENCY_PAGES * PAGE_SIZE);
-+ if (unlikely((dma_addr >= DMA_ERROR_CODE) && (dma_addr < badend))) {
- WARN(1, KERN_ERR "Calgary: driver tried unmapping bad DMA "
- "address 0x%Lx\n", dma_addr);
- return;
-@@ -380,7 +381,7 @@ static int calgary_map_sg(struct device *dev, struct scatterlist *sg,
- npages = iommu_num_pages(vaddr, s->length, PAGE_SIZE);
-
- entry = iommu_range_alloc(dev, tbl, npages);
-- if (entry == bad_dma_address) {
-+ if (entry == DMA_ERROR_CODE) {
- /* makes sure unmap knows to stop */
- s->dma_length = 0;
- goto error;
-@@ -398,7 +399,7 @@ static int calgary_map_sg(struct device *dev, struct scatterlist *sg,
- error:
- calgary_unmap_sg(dev, sg, nelems, dir, NULL);
- for_each_sg(sg, s, nelems, i) {
-- sg->dma_address = bad_dma_address;
-+ sg->dma_address = DMA_ERROR_CODE;
- sg->dma_length = 0;
- }
- return 0;
-@@ -453,7 +454,7 @@ static void* calgary_alloc_coherent(struct device *dev, size_t size,
-
- /* set up tces to cover the allocated range */
- mapping = iommu_alloc(dev, tbl, ret, npages, DMA_BIDIRECTIONAL);
-- if (mapping == bad_dma_address)
-+ if (mapping == DMA_ERROR_CODE)
- goto free;
- *dma_handle = mapping;
- return ret;
-@@ -734,7 +735,7 @@ static void __init calgary_reserve_regions(struct pci_dev *dev)
- struct iommu_table *tbl = pci_iommu(dev->bus);
-
- /* reserve EMERGENCY_PAGES from bad_dma_address and up */
-- iommu_range_reserve(tbl, bad_dma_address, EMERGENCY_PAGES);
-+ iommu_range_reserve(tbl, DMA_ERROR_CODE, EMERGENCY_PAGES);
-
- /* avoid the BIOS/VGA first 640KB-1MB region */
- /* for CalIOC2 - avoid the entire first MB */
-@@ -1349,6 +1350,23 @@ static void __init get_tce_space_from_tar(void)
- return;
- }
-
-+static int __init calgary_iommu_init(void)
-+{
-+ int ret;
-+
-+ /* ok, we're trying to use Calgary - let's roll */
-+ printk(KERN_INFO "PCI-DMA: Using Calgary IOMMU\n");
-+
-+ ret = calgary_init();
-+ if (ret) {
-+ printk(KERN_ERR "PCI-DMA: Calgary init failed %d, "
-+ "falling back to no_iommu\n", ret);
-+ return ret;
-+ }
-+
-+ return 0;
-+}
-+
- void __init detect_calgary(void)
- {
- int bus;
-@@ -1362,7 +1380,7 @@ void __init detect_calgary(void)
- * if the user specified iommu=off or iommu=soft or we found
- * another HW IOMMU already, bail out.
- */
-- if (swiotlb || no_iommu || iommu_detected)
-+ if (no_iommu || iommu_detected)
- return;
-
- if (!use_calgary)
-@@ -1447,9 +1465,7 @@ void __init detect_calgary(void)
- printk(KERN_INFO "PCI-DMA: Calgary TCE table spec is %d\n",
- specified_table_size);
-
-- /* swiotlb for devices that aren't behind the Calgary. */
-- if (max_pfn > MAX_DMA32_PFN)
-- swiotlb = 1;
-+ x86_init.iommu.iommu_init = calgary_iommu_init;
- }
- return;
-
-@@ -1462,35 +1478,6 @@ cleanup:
- }
- }
-
--int __init calgary_iommu_init(void)
--{
-- int ret;
--
-- if (no_iommu || (swiotlb && !calgary_detected))
-- return -ENODEV;
--
-- if (!calgary_detected)
-- return -ENODEV;
--
-- /* ok, we're trying to use Calgary - let's roll */
-- printk(KERN_INFO "PCI-DMA: Using Calgary IOMMU\n");
--
-- ret = calgary_init();
-- if (ret) {
-- printk(KERN_ERR "PCI-DMA: Calgary init failed %d, "
-- "falling back to no_iommu\n", ret);
-- return ret;
-- }
--
-- force_iommu = 1;
-- bad_dma_address = 0x0;
-- /* dma_ops is set to swiotlb or nommu */
-- if (!dma_ops)
-- dma_ops = &nommu_dma_ops;
--
-- return 0;
--}
--
- static int __init calgary_parse_options(char *p)
- {
- unsigned int bridge;
-diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c
-index 6ac3931..3e57c58 100644
---- a/arch/x86/kernel/pci-dma.c
-+++ b/arch/x86/kernel/pci-dma.c
-@@ -11,10 +11,12 @@
- #include <asm/gart.h>
- #include <asm/calgary.h>
- #include <asm/amd_iommu.h>
-+#include <asm/x86_init.h>
-+#include <asm/xen/swiotlb-xen.h>
-
- static int forbid_dac __read_mostly;
-
--struct dma_map_ops *dma_ops;
-+struct dma_map_ops *dma_ops = &nommu_dma_ops;
- EXPORT_SYMBOL(dma_ops);
-
- static int iommu_sac_force __read_mostly;
-@@ -42,9 +44,6 @@ int iommu_detected __read_mostly = 0;
- */
- int iommu_pass_through __read_mostly;
-
--dma_addr_t bad_dma_address __read_mostly = 0;
--EXPORT_SYMBOL(bad_dma_address);
--
- /* Dummy device used for NULL arguments (normally ISA). */
- struct device x86_dma_fallback_dev = {
- .init_name = "fallback device",
-@@ -126,18 +125,19 @@ void __init pci_iommu_alloc(void)
- /* free the range so iommu could get some range less than 4G */
- dma32_free_bootmem();
- #endif
-+ if (pci_xen_swiotlb_detect() || pci_swiotlb_detect())
-+ goto out;
-
-- /*
-- * The order of these functions is important for
-- * fall-back/fail-over reasons
-- */
- gart_iommu_hole_init();
-
- detect_calgary();
-
- detect_intel_iommu();
-
-+ /* needs to be called after gart_iommu_hole_init */
- amd_iommu_detect();
-+out:
-+ pci_xen_swiotlb_init();
-
- pci_swiotlb_init();
- }
-@@ -289,25 +289,17 @@ static int __init pci_iommu_init(void)
- #ifdef CONFIG_PCI
- dma_debug_add_bus(&pci_bus_type);
- #endif
-+ x86_init.iommu.iommu_init();
-
-- calgary_iommu_init();
--
-- intel_iommu_init();
--
-- amd_iommu_init();
-+ if (swiotlb || xen_swiotlb) {
-+ printk(KERN_INFO "PCI-DMA: "
-+ "Using software bounce buffering for IO (SWIOTLB)\n");
-+ swiotlb_print_info();
-+ } else
-+ swiotlb_free();
-
-- gart_iommu_init();
--
-- no_iommu_init();
- return 0;
- }
--
--void pci_iommu_shutdown(void)
--{
-- gart_iommu_shutdown();
--
-- amd_iommu_shutdown();
--}
- /* Must execute after PCI subsystem */
- rootfs_initcall(pci_iommu_init);
-
-diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c
-index 1c76691..8c9dd05 100644
---- a/arch/x86/kernel/pci-gart_64.c
-+++ b/arch/x86/kernel/pci-gart_64.c
-@@ -39,6 +39,7 @@
- #include <asm/swiotlb.h>
- #include <asm/dma.h>
- #include <asm/k8.h>
-+#include <asm/x86_init.h>
-
- static unsigned long iommu_bus_base; /* GART remapping area (physical) */
- static unsigned long iommu_size; /* size of remapping area bytes */
-@@ -46,6 +47,8 @@ static unsigned long iommu_pages; /* .. and in pages */
-
- static u32 *iommu_gatt_base; /* Remapping table */
-
-+static dma_addr_t bad_dma_addr;
-+
- /*
- * If this is disabled the IOMMU will use an optimized flushing strategy
- * of only flushing when an mapping is reused. With it true the GART is
-@@ -216,7 +219,7 @@ static dma_addr_t dma_map_area(struct device *dev, dma_addr_t phys_mem,
- if (panic_on_overflow)
- panic("dma_map_area overflow %lu bytes\n", size);
- iommu_full(dev, size, dir);
-- return bad_dma_address;
-+ return bad_dma_addr;
- }
-
- for (i = 0; i < npages; i++) {
-@@ -302,7 +305,7 @@ static int dma_map_sg_nonforce(struct device *dev, struct scatterlist *sg,
-
- if (nonforced_iommu(dev, addr, s->length)) {
- addr = dma_map_area(dev, addr, s->length, dir, 0);
-- if (addr == bad_dma_address) {
-+ if (addr == bad_dma_addr) {
- if (i > 0)
- gart_unmap_sg(dev, sg, i, dir, NULL);
- nents = 0;
-@@ -455,7 +458,7 @@ error:
-
- iommu_full(dev, pages << PAGE_SHIFT, dir);
- for_each_sg(sg, s, nents, i)
-- s->dma_address = bad_dma_address;
-+ s->dma_address = bad_dma_addr;
- return 0;
- }
-
-@@ -479,7 +482,7 @@ gart_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_addr,
- DMA_BIDIRECTIONAL, align_mask);
-
- flush_gart();
-- if (paddr != bad_dma_address) {
-+ if (paddr != bad_dma_addr) {
- *dma_addr = paddr;
- return page_address(page);
- }
-@@ -499,6 +502,11 @@ gart_free_coherent(struct device *dev, size_t size, void *vaddr,
- free_pages((unsigned long)vaddr, get_order(size));
- }
-
-+static int gart_mapping_error(struct device *dev, dma_addr_t dma_addr)
-+{
-+ return (dma_addr == bad_dma_addr);
-+}
-+
- static int no_agp;
-
- static __init unsigned long check_iommu_size(unsigned long aper, u64 aper_size)
-@@ -689,14 +697,15 @@ static struct dma_map_ops gart_dma_ops = {
- .unmap_page = gart_unmap_page,
- .alloc_coherent = gart_alloc_coherent,
- .free_coherent = gart_free_coherent,
-+ .mapping_error = gart_mapping_error,
- };
-
--void gart_iommu_shutdown(void)
-+static void gart_iommu_shutdown(void)
- {
- struct pci_dev *dev;
- int i;
-
-- if (no_agp && (dma_ops != &gart_dma_ops))
-+ if (no_agp)
- return;
-
- for (i = 0; i < num_k8_northbridges; i++) {
-@@ -711,7 +720,7 @@ void gart_iommu_shutdown(void)
- }
- }
-
--void __init gart_iommu_init(void)
-+int __init gart_iommu_init(void)
- {
- struct agp_kern_info info;
- unsigned long iommu_start;
-@@ -721,7 +730,7 @@ void __init gart_iommu_init(void)
- long i;
-
- if (num_k8_northbridges == 0)
-- return;
-+ return 0;
-
- #ifndef CONFIG_AGP_AMD64
- no_agp = 1;
-@@ -733,13 +742,6 @@ void __init gart_iommu_init(void)
- (agp_copy_info(agp_bridge, &info) < 0);
- #endif
-
-- if (swiotlb)
-- return;
--
-- /* Did we detect a different HW IOMMU? */
-- if (iommu_detected && !gart_iommu_aperture)
-- return;
--
- if (no_iommu ||
- (!force_iommu && max_pfn <= MAX_DMA32_PFN) ||
- !gart_iommu_aperture ||
-@@ -749,7 +751,7 @@ void __init gart_iommu_init(void)
- "but GART IOMMU not available.\n");
- printk(KERN_WARNING "falling back to iommu=soft.\n");
- }
-- return;
-+ return 0;
- }
-
- /* need to map that range */
-@@ -794,7 +796,7 @@ void __init gart_iommu_init(void)
-
- iommu_start = aper_size - iommu_size;
- iommu_bus_base = info.aper_base + iommu_start;
-- bad_dma_address = iommu_bus_base;
-+ bad_dma_addr = iommu_bus_base;
- iommu_gatt_base = agp_gatt_table + (iommu_start>>PAGE_SHIFT);
-
- /*
-@@ -841,6 +843,10 @@ void __init gart_iommu_init(void)
-
- flush_gart();
- dma_ops = &gart_dma_ops;
-+ x86_platform.iommu_shutdown = gart_iommu_shutdown;
-+ swiotlb = 0;
-+
-+ return 0;
- }
-
- void __init gart_parse_options(char *p)
-diff --git a/arch/x86/kernel/pci-nommu.c b/arch/x86/kernel/pci-nommu.c
-index a3933d4..22be12b 100644
---- a/arch/x86/kernel/pci-nommu.c
-+++ b/arch/x86/kernel/pci-nommu.c
-@@ -33,7 +33,7 @@ static dma_addr_t nommu_map_page(struct device *dev, struct page *page,
- dma_addr_t bus = page_to_phys(page) + offset;
- WARN_ON(size == 0);
- if (!check_addr("map_single", dev, bus, size))
-- return bad_dma_address;
-+ return DMA_ERROR_CODE;
- flush_write_buffers();
- return bus;
- }
-@@ -103,12 +103,3 @@ struct dma_map_ops nommu_dma_ops = {
- .sync_sg_for_device = nommu_sync_sg_for_device,
- .is_phys = 1,
- };
--
--void __init no_iommu_init(void)
--{
-- if (dma_ops)
-- return;
--
-- force_iommu = 0; /* no HW IOMMU */
-- dma_ops = &nommu_dma_ops;
--}
-diff --git a/arch/x86/kernel/pci-swiotlb.c b/arch/x86/kernel/pci-swiotlb.c
-index aaa6b78..7d2829d 100644
---- a/arch/x86/kernel/pci-swiotlb.c
-+++ b/arch/x86/kernel/pci-swiotlb.c
-@@ -42,18 +42,31 @@ static struct dma_map_ops swiotlb_dma_ops = {
- .dma_supported = NULL,
- };
-
--void __init pci_swiotlb_init(void)
-+/*
-+ * pci_swiotlb_detect - set swiotlb to 1 if necessary
-+ *
-+ * This returns non-zero if we are forced to use swiotlb (by the boot
-+ * option).
-+ */
-+int __init pci_swiotlb_detect(void)
- {
-+ int use_swiotlb = swiotlb | swiotlb_force;
-+
- /* don't initialize swiotlb if iommu=off (no_iommu=1) */
- #ifdef CONFIG_X86_64
-- if ((!iommu_detected && !no_iommu && max_pfn > MAX_DMA32_PFN))
-+ if (!no_iommu && max_pfn > MAX_DMA32_PFN)
- swiotlb = 1;
- #endif
- if (swiotlb_force)
- swiotlb = 1;
-+
-+ return use_swiotlb;
-+}
-+
-+void __init pci_swiotlb_init(void)
-+{
- if (swiotlb) {
-- printk(KERN_INFO "PCI-DMA: Using software bounce buffering for IO (SWIOTLB)\n");
-- swiotlb_init();
-+ swiotlb_init(0);
- dma_ops = &swiotlb_dma_ops;
- }
- }
-diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
-index 5fd5b07..11d8667 100644
---- a/arch/x86/kernel/process.c
-+++ b/arch/x86/kernel/process.c
-@@ -73,16 +73,12 @@ void exit_thread(void)
- unsigned long *bp = t->io_bitmap_ptr;
-
- if (bp) {
-- struct tss_struct *tss = &per_cpu(init_tss, get_cpu());
--
-+ preempt_disable();
- t->io_bitmap_ptr = NULL;
- clear_thread_flag(TIF_IO_BITMAP);
-- /*
-- * Careful, clear this in the TSS too:
-- */
-- memset(tss->io_bitmap, 0xff, t->io_bitmap_max);
-+ set_io_bitmap(t, t->io_bitmap_max);
- t->io_bitmap_max = 0;
-- put_cpu();
-+ preempt_enable();
- kfree(bp);
- }
- }
-@@ -199,19 +195,10 @@ void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
- hard_enable_TSC();
- }
-
-- if (test_tsk_thread_flag(next_p, TIF_IO_BITMAP)) {
-- /*
-- * Copy the relevant range of the IO bitmap.
-- * Normally this is 128 bytes or less:
-- */
-- memcpy(tss->io_bitmap, next->io_bitmap_ptr,
-- max(prev->io_bitmap_max, next->io_bitmap_max));
-- } else if (test_tsk_thread_flag(prev_p, TIF_IO_BITMAP)) {
-- /*
-- * Clear any possible leftover bits:
-- */
-- memset(tss->io_bitmap, 0xff, prev->io_bitmap_max);
-- }
-+ if (test_tsk_thread_flag(next_p, TIF_IO_BITMAP) ||
-+ test_tsk_thread_flag(prev_p, TIF_IO_BITMAP))
-+ set_io_bitmap(next,
-+ max(prev->io_bitmap_max, next->io_bitmap_max));
- }
-
- int sys_fork(struct pt_regs *regs)
-diff --git a/arch/x86/kernel/pvclock.c b/arch/x86/kernel/pvclock.c
-index dfdfe46..b12fe8d 100644
---- a/arch/x86/kernel/pvclock.c
-+++ b/arch/x86/kernel/pvclock.c
-@@ -111,6 +111,11 @@ unsigned long pvclock_tsc_khz(struct pvclock_vcpu_time_info *src)
-
- static atomic64_t last_value = ATOMIC64_INIT(0);
-
-+void pvclock_resume(void)
-+{
-+ atomic64_set(&last_value, 0);
-+}
-+
- cycle_t pvclock_clocksource_read(struct pvclock_vcpu_time_info *src)
- {
- struct pvclock_shadow_time shadow;
-diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
-index 200fcde..ff8cc40 100644
---- a/arch/x86/kernel/reboot.c
-+++ b/arch/x86/kernel/reboot.c
-@@ -23,7 +23,7 @@
- # include <linux/ctype.h>
- # include <linux/mc146818rtc.h>
- #else
--# include <asm/iommu.h>
-+# include <asm/x86_init.h>
- #endif
-
- /*
-@@ -647,7 +647,7 @@ void native_machine_shutdown(void)
- #endif
-
- #ifdef CONFIG_X86_64
-- pci_iommu_shutdown();
-+ x86_platform.iommu_shutdown();
- #endif
- }
-
-diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
-index 5449a26..56b4707 100644
---- a/arch/x86/kernel/setup.c
-+++ b/arch/x86/kernel/setup.c
-@@ -70,6 +70,7 @@
- #include <linux/tboot.h>
-
- #include <video/edid.h>
-+#include <xen/xen.h>
-
- #include <asm/mtrr.h>
- #include <asm/apic.h>
-@@ -89,6 +90,7 @@
- #include <asm/cacheflush.h>
- #include <asm/processor.h>
- #include <asm/bugs.h>
-+#include <asm/tlbflush.h>
-
- #include <asm/system.h>
- #include <asm/vsyscall.h>
-@@ -909,7 +911,6 @@ void __init setup_arch(char **cmdline_p)
- max_low_pfn = max_pfn;
-
- high_memory = (void *)__va(max_pfn * PAGE_SIZE - 1) + 1;
-- max_pfn_mapped = KERNEL_IMAGE_SIZE >> PAGE_SHIFT;
- #endif
-
- #ifdef CONFIG_X86_CHECK_BIOS_CORRUPTION
-@@ -967,6 +968,9 @@ void __init setup_arch(char **cmdline_p)
-
- initmem_init(0, max_pfn);
-
-+ /* Initialize cross-cpu tlb flushes */
-+ init_smp_flush();
-+
- #ifdef CONFIG_ACPI_SLEEP
- /*
- * Reserve low memory region for sleep support.
-@@ -1037,6 +1041,7 @@ void __init setup_arch(char **cmdline_p)
- probe_nr_irqs_gsi();
-
- kvm_guest_init();
-+ xen_hvm_guest_init();
-
- e820_reserve_resources();
- e820_mark_nosave_regions(max_low_pfn);
-diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c
-index 4449a4a..d11c5ff 100644
---- a/arch/x86/kernel/x86_init.c
-+++ b/arch/x86/kernel/x86_init.c
-@@ -14,10 +14,13 @@
- #include <asm/time.h>
- #include <asm/irq.h>
- #include <asm/tsc.h>
-+#include <asm/iommu.h>
-
- void __cpuinit x86_init_noop(void) { }
- void __init x86_init_uint_noop(unsigned int unused) { }
- void __init x86_init_pgd_noop(pgd_t *unused) { }
-+int __init iommu_init_noop(void) { return 0; }
-+void iommu_shutdown_noop(void) { }
-
- /*
- * The platform setup functions are preset with the default functions
-@@ -62,6 +65,10 @@ struct x86_init_ops x86_init __initdata = {
- .tsc_pre_init = x86_init_noop,
- .timer_init = hpet_time_init,
- },
-+
-+ .iommu = {
-+ .iommu_init = iommu_init_noop,
-+ },
- };
-
- struct x86_cpuinit_ops x86_cpuinit __cpuinitdata = {
-@@ -72,4 +79,5 @@ struct x86_platform_ops x86_platform = {
- .calibrate_tsc = native_calibrate_tsc,
- .get_wallclock = mach_get_cmos_time,
- .set_wallclock = mach_set_rtc_mmss,
-+ .iommu_shutdown = iommu_shutdown_noop,
- };
-diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile
-index 06630d2..ad895ae 100644
---- a/arch/x86/mm/Makefile
-+++ b/arch/x86/mm/Makefile
-@@ -6,6 +6,11 @@ nostackp := $(call cc-option, -fno-stack-protector)
- CFLAGS_physaddr.o := $(nostackp)
- CFLAGS_setup_nx.o := $(nostackp)
-
-+# Make sure __phys_addr has no stackprotector
-+nostackp := $(call cc-option, -fno-stack-protector)
-+CFLAGS_ioremap.o := $(nostackp)
-+CFLAGS_init.o := $(nostackp)
-+
- obj-$(CONFIG_SMP) += tlb.o
-
- obj-$(CONFIG_X86_32) += pgtable_32.o iomap_32.o
-diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
-index 1739358..e003b83 100644
---- a/arch/x86/mm/fault.c
-+++ b/arch/x86/mm/fault.c
-@@ -228,7 +228,16 @@ void vmalloc_sync_all(void)
-
- spin_lock_irqsave(&pgd_lock, flags);
- list_for_each_entry(page, &pgd_list, lru) {
-- if (!vmalloc_sync_one(page_address(page), address))
-+ spinlock_t *pgt_lock;
-+ int ret;
-+
-+ pgt_lock = &pgd_page_get_mm(page)->page_table_lock;
-+
-+ spin_lock(pgt_lock);
-+ ret = vmalloc_sync_one(page_address(page), address);
-+ spin_unlock(pgt_lock);
-+
-+ if (!ret)
- break;
- }
- spin_unlock_irqrestore(&pgd_lock, flags);
-@@ -340,11 +349,19 @@ void vmalloc_sync_all(void)
- spin_lock_irqsave(&pgd_lock, flags);
- list_for_each_entry(page, &pgd_list, lru) {
- pgd_t *pgd;
-+ spinlock_t *pgt_lock;
-+
- pgd = (pgd_t *)page_address(page) + pgd_index(address);
-+
-+ pgt_lock = &pgd_page_get_mm(page)->page_table_lock;
-+ spin_lock(pgt_lock);
-+
- if (pgd_none(*pgd))
- set_pgd(pgd, *pgd_ref);
- else
- BUG_ON(pgd_page_vaddr(*pgd) != pgd_page_vaddr(*pgd_ref));
-+
-+ spin_unlock(pgt_lock);
- }
- spin_unlock_irqrestore(&pgd_lock, flags);
- }
-diff --git a/arch/x86/mm/gup.c b/arch/x86/mm/gup.c
-index 71da1bc..892b8eb 100644
---- a/arch/x86/mm/gup.c
-+++ b/arch/x86/mm/gup.c
-@@ -313,6 +313,11 @@ int get_user_pages_fast(unsigned long start, int nr_pages, int write,
- goto slow_irqon;
- #endif
-
-+#ifdef CONFIG_XEN
-+ if (unlikely(mm->context.has_foreign_mappings))
-+ goto slow_irqon;
-+#endif
-+
- /*
- * XXX: batch / limit 'nr', to avoid large irq off latency
- * needs some instrumenting to determine the common sizes used by
-diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
-index 30938c1..10c3719 100644
---- a/arch/x86/mm/init_32.c
-+++ b/arch/x86/mm/init_32.c
-@@ -430,22 +430,45 @@ static int __init add_highpages_work_fn(unsigned long start_pfn,
- {
- int node_pfn;
- struct page *page;
-+ phys_addr_t chunk_end, chunk_max;
- unsigned long final_start_pfn, final_end_pfn;
-- struct add_highpages_data *data;
--
-- data = (struct add_highpages_data *)datax;
-+ struct add_highpages_data *data = (struct add_highpages_data *)datax;
-
- final_start_pfn = max(start_pfn, data->start_pfn);
- final_end_pfn = min(end_pfn, data->end_pfn);
- if (final_start_pfn >= final_end_pfn)
- return 0;
-
-- for (node_pfn = final_start_pfn; node_pfn < final_end_pfn;
-- node_pfn++) {
-- if (!pfn_valid(node_pfn))
-- continue;
-- page = pfn_to_page(node_pfn);
-- add_one_highpage_init(page, node_pfn);
-+ chunk_end = PFN_PHYS(final_start_pfn);
-+ chunk_max = PFN_PHYS(final_end_pfn);
-+
-+ /*
-+ * Check for reserved areas.
-+ */
-+ for (;;) {
-+ phys_addr_t chunk_start;
-+ chunk_start = early_res_next_free(chunk_end);
-+
-+ /*
-+ * Reserved area. Just count high mem pages.
-+ */
-+ for (node_pfn = PFN_DOWN(chunk_end);
-+ node_pfn < PFN_DOWN(chunk_start); node_pfn++) {
-+ if (pfn_valid(node_pfn))
-+ totalhigh_pages++;
-+ }
-+
-+ if (chunk_start >= chunk_max)
-+ break;
-+
-+ chunk_end = early_res_next_reserved(chunk_start, chunk_max);
-+ for (node_pfn = PFN_DOWN(chunk_start);
-+ node_pfn < PFN_DOWN(chunk_end); node_pfn++) {
-+ if (!pfn_valid(node_pfn))
-+ continue;
-+ page = pfn_to_page(node_pfn);
-+ add_one_highpage_init(page, node_pfn);
-+ }
- }
-
- return 0;
-@@ -459,7 +482,6 @@ void __init add_highpages_with_active_regions(int nid, unsigned long start_pfn,
-
- data.start_pfn = start_pfn;
- data.end_pfn = end_pfn;
--
- work_with_active_regions(nid, add_highpages_work_fn, &data);
- }
-
-diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c
-index 2feb9bd..2601df2 100644
---- a/arch/x86/mm/ioremap.c
-+++ b/arch/x86/mm/ioremap.c
-@@ -425,6 +425,11 @@ static inline pte_t * __init early_ioremap_pte(unsigned long addr)
- return &bm_pte[pte_index(addr)];
- }
-
-+bool __init is_early_ioremap_ptep(pte_t *ptep)
-+{
-+ return ptep >= &bm_pte[0] && ptep < &bm_pte[PAGE_SIZE/sizeof(pte_t)];
-+}
-+
- static unsigned long slot_virt[FIX_BTMAPS_SLOTS] __initdata;
-
- void __init early_ioremap_init(void)
-diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c
-index e78cd0e..fb91994 100644
---- a/arch/x86/mm/pat.c
-+++ b/arch/x86/mm/pat.c
-@@ -666,7 +666,7 @@ void io_free_memtype(resource_size_t start, resource_size_t end)
- pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
- unsigned long size, pgprot_t vma_prot)
- {
-- return vma_prot;
-+ return __pgprot(pgprot_val(vma_prot) | _PAGE_IOMAP);
- }
-
- #ifdef CONFIG_STRICT_DEVMEM
-diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c
-index c9ba9de..1fcc191 100644
---- a/arch/x86/mm/pgtable.c
-+++ b/arch/x86/mm/pgtable.c
-@@ -4,6 +4,9 @@
- #include <asm/tlb.h>
- #include <asm/fixmap.h>
-
-+#include <xen/xen.h>
-+#include <asm/xen/hypervisor.h>
-+
- #define PGALLOC_GFP GFP_KERNEL | __GFP_NOTRACK | __GFP_REPEAT | __GFP_ZERO
-
- #ifdef CONFIG_HIGHPTE
-@@ -14,6 +17,16 @@
-
- gfp_t __userpte_alloc_gfp = PGALLOC_GFP | PGALLOC_USER_GFP;
-
-+pgprot_t arch_vm_get_page_prot(unsigned vm_flags)
-+{
-+ pgprot_t ret = __pgprot(0);
-+
-+ if (vm_flags & VM_IO)
-+ ret = __pgprot(_PAGE_IOMAP);
-+
-+ return ret;
-+}
-+
- pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
- {
- return (pte_t *)__get_free_page(PGALLOC_GFP);
-@@ -86,7 +99,19 @@ static inline void pgd_list_del(pgd_t *pgd)
- #define UNSHARED_PTRS_PER_PGD \
- (SHARED_KERNEL_PMD ? KERNEL_PGD_BOUNDARY : PTRS_PER_PGD)
-
--static void pgd_ctor(pgd_t *pgd)
-+
-+static void pgd_set_mm(pgd_t *pgd, struct mm_struct *mm)
-+{
-+ BUILD_BUG_ON(sizeof(virt_to_page(pgd)->index) < sizeof(mm));
-+ virt_to_page(pgd)->index = (pgoff_t)mm;
-+}
-+
-+struct mm_struct *pgd_page_get_mm(struct page *page)
-+{
-+ return (struct mm_struct *)page->index;
-+}
-+
-+static void pgd_ctor(struct mm_struct *mm, pgd_t *pgd)
- {
- /* If the pgd points to a shared pagetable level (either the
- ptes in non-PAE, or shared PMD in PAE), then just copy the
-@@ -104,8 +129,10 @@ static void pgd_ctor(pgd_t *pgd)
- }
-
- /* list required to sync kernel mapping updates */
-- if (!SHARED_KERNEL_PMD)
-+ if (!SHARED_KERNEL_PMD) {
-+ pgd_set_mm(pgd, mm);
- pgd_list_add(pgd);
-+ }
- }
-
- static void pgd_dtor(pgd_t *pgd)
-@@ -271,7 +298,7 @@ pgd_t *pgd_alloc(struct mm_struct *mm)
- */
- spin_lock_irqsave(&pgd_lock, flags);
-
-- pgd_ctor(pgd);
-+ pgd_ctor(mm, pgd);
- pgd_prepopulate_pmd(mm, pgd, pmds);
-
- spin_unlock_irqrestore(&pgd_lock, flags);
-@@ -288,6 +315,12 @@ out:
-
- void pgd_free(struct mm_struct *mm, pgd_t *pgd)
- {
-+#ifdef CONFIG_XEN
-+ /* EEW */
-+ extern void xen_late_unpin_pgd(struct mm_struct *mm, pgd_t *pgd);
-+ if (xen_pv_domain())
-+ xen_late_unpin_pgd(mm, pgd);
-+#endif
- pgd_mop_up_pmds(mm, pgd);
- pgd_dtor(pgd);
- paravirt_pgd_free(mm, pgd);
-diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
-index 36fe08e..7317947 100644
---- a/arch/x86/mm/tlb.c
-+++ b/arch/x86/mm/tlb.c
-@@ -148,13 +148,25 @@ void smp_invalidate_interrupt(struct pt_regs *regs)
- * BUG();
- */
-
-- if (f->flush_mm == percpu_read(cpu_tlbstate.active_mm)) {
-- if (percpu_read(cpu_tlbstate.state) == TLBSTATE_OK) {
-+ if (f->flush_mm == NULL ||
-+ f->flush_mm == percpu_read(cpu_tlbstate.active_mm)) {
-+ int tlbstate = percpu_read(cpu_tlbstate.state);
-+
-+ /*
-+ * flush_mm == NULL means flush everything, including
-+ * global tlbs, which will only happen when flushing
-+ * kernel mappings.
-+ */
-+ if (f->flush_mm == NULL)
-+ __flush_tlb_all();
-+ else if (tlbstate == TLBSTATE_OK) {
- if (f->flush_va == TLB_FLUSH_ALL)
- local_flush_tlb();
- else
- __flush_tlb_one(f->flush_va);
-- } else
-+ }
-+
-+ if (tlbstate == TLBSTATE_LAZY)
- leave_mm(cpu);
- }
- out:
-@@ -217,16 +229,13 @@ void native_flush_tlb_others(const struct cpumask *cpumask,
- flush_tlb_others_ipi(cpumask, mm, va);
- }
-
--static int __cpuinit init_smp_flush(void)
-+void __init init_smp_flush(void)
- {
- int i;
-
- for (i = 0; i < ARRAY_SIZE(flush_state); i++)
- spin_lock_init(&flush_state[i].tlbstate_lock);
--
-- return 0;
- }
--core_initcall(init_smp_flush);
-
- void flush_tlb_current_task(void)
- {
-@@ -274,17 +283,19 @@ void flush_tlb_page(struct vm_area_struct *vma, unsigned long va)
-
- preempt_enable();
- }
-+EXPORT_SYMBOL_GPL(flush_tlb_page);
-
--static void do_flush_tlb_all(void *info)
-+void flush_tlb_all(void)
- {
-- unsigned long cpu = smp_processor_id();
-+ /* flush_tlb_others expects preempt to be disabled */
-+ int cpu = get_cpu();
-+
-+ flush_tlb_others(cpu_online_mask, NULL, TLB_FLUSH_ALL);
-
- __flush_tlb_all();
- if (percpu_read(cpu_tlbstate.state) == TLBSTATE_LAZY)
- leave_mm(cpu);
--}
-
--void flush_tlb_all(void)
--{
-- on_each_cpu(do_flush_tlb_all, NULL, 1);
-+ put_cpu();
- }
-+EXPORT_SYMBOL_GPL(flush_tlb_all);
-diff --git a/arch/x86/pci/Makefile b/arch/x86/pci/Makefile
-index d49202e..64182c5 100644
---- a/arch/x86/pci/Makefile
-+++ b/arch/x86/pci/Makefile
-@@ -4,6 +4,7 @@ obj-$(CONFIG_PCI_BIOS) += pcbios.o
- obj-$(CONFIG_PCI_MMCONFIG) += mmconfig_$(BITS).o direct.o mmconfig-shared.o
- obj-$(CONFIG_PCI_DIRECT) += direct.o
- obj-$(CONFIG_PCI_OLPC) += olpc.o
-+obj-$(CONFIG_PCI_XEN) += xen.o
-
- obj-y += fixup.o
- obj-$(CONFIG_ACPI) += acpi.o
-diff --git a/arch/x86/pci/common.c b/arch/x86/pci/common.c
-index 1331fcf..30a9808 100644
---- a/arch/x86/pci/common.c
-+++ b/arch/x86/pci/common.c
-@@ -22,6 +22,7 @@ unsigned int pci_probe = PCI_PROBE_BIOS | PCI_PROBE_CONF1 | PCI_PROBE_CONF2 |
- unsigned int pci_early_dump_regs;
- static int pci_bf_sort;
- int pci_routeirq;
-+int pci_scan_all_fns;
- int noioapicquirk;
- #ifdef CONFIG_X86_REROUTE_FOR_BROKEN_BOOT_IRQS
- int noioapicreroute = 0;
-@@ -412,26 +413,31 @@ struct pci_bus * __devinit pcibios_scan_root(int busnum)
-
- extern u8 pci_cache_line_size;
-
--int __init pcibios_init(void)
-+void __init pcibios_set_cache_line_size(void)
- {
- struct cpuinfo_x86 *c = &boot_cpu_data;
-
-- if (!raw_pci_ops) {
-- printk(KERN_WARNING "PCI: System does not support PCI\n");
-- return 0;
-- }
--
- /*
- * Assume PCI cacheline size of 32 bytes for all x86s except K7/K8
- * and P4. It's also good for 386/486s (which actually have 16)
- * as quite a few PCI devices do not support smaller values.
- */
-+
- pci_cache_line_size = 32 >> 2;
- if (c->x86 >= 6 && c->x86_vendor == X86_VENDOR_AMD)
- pci_cache_line_size = 64 >> 2; /* K7 & K8 */
- else if (c->x86 > 6 && c->x86_vendor == X86_VENDOR_INTEL)
- pci_cache_line_size = 128 >> 2; /* P4 */
-+}
-+
-+int __init pcibios_init(void)
-+{
-+ if (!raw_pci_ops) {
-+ printk(KERN_WARNING "PCI: System does not support PCI\n");
-+ return 0;
-+ }
-
-+ pcibios_set_cache_line_size();
- pcibios_resource_survey();
-
- if (pci_bf_sort >= pci_force_bf)
-diff --git a/arch/x86/pci/i386.c b/arch/x86/pci/i386.c
-index a672f12..91d040e 100644
---- a/arch/x86/pci/i386.c
-+++ b/arch/x86/pci/i386.c
-@@ -283,6 +283,8 @@ int pci_mmap_page_range(struct pci_dev *dev, struct vm_area_struct *vma,
-
- prot = pgprot_val(vma->vm_page_prot);
-
-+ prot |= _PAGE_IOMAP; /* creating a mapping for IO */
-+
- /*
- * Return error if pat is not enabled and write_combine is requested.
- * Caller can followup with UC MINUS request and add a WC mtrr if there
-diff --git a/arch/x86/pci/init.c b/arch/x86/pci/init.c
-index 25a1f8e..4e2f90a 100644
---- a/arch/x86/pci/init.c
-+++ b/arch/x86/pci/init.c
-@@ -15,10 +15,16 @@ static __init int pci_arch_init(void)
- if (!(pci_probe & PCI_PROBE_NOEARLY))
- pci_mmcfg_early_init();
-
-+#ifdef CONFIG_PCI_XEN
-+ if (!pci_xen_init())
-+ return 0;
-+#endif
-+
- #ifdef CONFIG_PCI_OLPC
- if (!pci_olpc_init())
- return 0; /* skip additional checks if it's an XO */
- #endif
-+
- #ifdef CONFIG_PCI_BIOS
- pci_pcbios_init();
- #endif
-diff --git a/arch/x86/pci/xen.c b/arch/x86/pci/xen.c
-new file mode 100644
-index 0000000..67fa926
---- /dev/null
-+++ b/arch/x86/pci/xen.c
-@@ -0,0 +1,154 @@
-+/*
-+ * Xen PCI Frontend Stub - puts some "dummy" functions in to the Linux
-+ * x86 PCI core to support the Xen PCI Frontend
-+ *
-+ * Author: Ryan Wilson <hap9@epoch.ncsc.mil>
-+ */
-+#include <linux/module.h>
-+#include <linux/init.h>
-+#include <linux/pci.h>
-+#include <linux/acpi.h>
-+
-+#include <asm/io.h>
-+#include <asm/pci_x86.h>
-+
-+#include <asm/xen/hypervisor.h>
-+
-+#include <xen/events.h>
-+#include <asm/xen/pci.h>
-+
-+#if defined(CONFIG_PCI_MSI)
-+#include <linux/msi.h>
-+
-+struct xen_pci_frontend_ops *xen_pci_frontend;
-+EXPORT_SYMBOL_GPL(xen_pci_frontend);
-+
-+/*
-+ * For MSI interrupts we have to use drivers/xen/event.s functions to
-+ * allocate an irq_desc and setup the right */
-+
-+
-+int xen_pci_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
-+{
-+ int irq, ret, i;
-+ struct msi_desc *msidesc;
-+ int *v;
-+
-+ /* Dom0 has another mechanism for this. The exit path
-+ * (xen_pci_teardown_msi_irq) is shared with Dom0.
-+ */
-+ if (xen_initial_domain())
-+ return xen_setup_msi_irqs(dev, nvec, type);
-+
-+ v = kzalloc(sizeof(int) * max(1, nvec), GFP_KERNEL);
-+ if (!v)
-+ return -ENOMEM;
-+
-+ if (!xen_initial_domain()) {
-+ if (type == PCI_CAP_ID_MSIX)
-+ ret = xen_pci_frontend_enable_msix(dev, &v, nvec);
-+ else
-+ ret = xen_pci_frontend_enable_msi(dev, &v);
-+ if (ret)
-+ goto error;
-+ }
-+ i = 0;
-+ list_for_each_entry(msidesc, &dev->msi_list, list) {
-+ irq = xen_allocate_pirq(v[i], 0, /* not sharable */
-+ (type == PCI_CAP_ID_MSIX) ?
-+ "pcifront-msi-x":"pcifront-msi");
-+ if (irq < 0)
-+ return -1;
-+
-+ ret = set_irq_msi(irq, msidesc);
-+ if (ret)
-+ goto error_while;
-+ i++;
-+ }
-+ kfree(v);
-+ return 0;
-+
-+error_while:
-+ unbind_from_irqhandler(irq, NULL);
-+error:
-+ if (ret == -ENODEV)
-+ dev_err(&dev->dev,"Xen PCI frontend has not registered" \
-+ " MSI/MSI-X support!\n");
-+
-+ kfree(v);
-+ return ret;
-+}
-+
-+void xen_pci_teardown_msi_dev(struct pci_dev *dev)
-+{
-+ /* Only do this when were are in non-privileged mode.*/
-+ if (!xen_initial_domain()) {
-+ struct msi_desc *msidesc;
-+
-+ msidesc = list_entry(dev->msi_list.next, struct msi_desc, list);
-+ if (msidesc->msi_attrib.is_msix)
-+ xen_pci_frontend_disable_msix(dev);
-+ else
-+ xen_pci_frontend_disable_msi(dev);
-+ }
-+
-+}
-+
-+void xen_pci_teardown_msi_irq(int irq)
-+{
-+ xen_destroy_irq(irq);
-+}
-+#endif
-+
-+static int xen_pcifront_enable_irq(struct pci_dev *dev)
-+{
-+ int rc;
-+ int share = 1;
-+
-+ dev_info(&dev->dev, "Xen PCI enabling IRQ: %d\n", dev->irq);
-+
-+ if (dev->irq < 0)
-+ return -EINVAL;
-+
-+ if (dev->irq < NR_IRQS_LEGACY)
-+ share = 0;
-+
-+ rc = xen_allocate_pirq(dev->irq, share, "pcifront");
-+ if (rc < 0) {
-+ dev_warn(&dev->dev, "Xen PCI IRQ: %d, failed to register:%d\n",
-+ dev->irq, rc);
-+ return rc;
-+ }
-+ return 0;
-+}
-+
-+int __init pci_xen_init(void)
-+{
-+ if (!xen_pv_domain() || xen_initial_domain())
-+ return -ENODEV;
-+
-+ printk(KERN_INFO "PCI: setting up Xen PCI frontend stub\n");
-+
-+ pcibios_set_cache_line_size();
-+
-+ pcibios_enable_irq = xen_pcifront_enable_irq;
-+ pcibios_disable_irq = NULL;
-+
-+#ifdef CONFIG_ACPI
-+ /* Keep ACPI out of the picture */
-+ acpi_noirq = 1;
-+#endif
-+
-+#ifdef CONFIG_ISAPNP
-+ /* Stop isapnp from probing */
-+ isapnp_disable = 1;
-+#endif
-+
-+ /* Ensure a device still gets scanned even if it's fn number
-+ * is non-zero.
-+ */
-+ pci_scan_all_fns = 1;
-+
-+ return 0;
-+}
-+
-diff --git a/arch/x86/xen/Kconfig b/arch/x86/xen/Kconfig
-index b83e119..3f9f4a0 100644
---- a/arch/x86/xen/Kconfig
-+++ b/arch/x86/xen/Kconfig
-@@ -13,16 +13,18 @@ config XEN
- kernel to boot in a paravirtualized environment under the
- Xen hypervisor.
-
-+config XEN_PVHVM
-+ def_bool y
-+ depends on XEN
-+ depends on X86_LOCAL_APIC
-+
- config XEN_MAX_DOMAIN_MEMORY
-- int "Maximum allowed size of a domain in gigabytes"
-- default 8 if X86_32
-- default 32 if X86_64
-+ int
-+ default 128
- depends on XEN
- help
-- The pseudo-physical to machine address array is sized
-- according to the maximum possible memory size of a Xen
-- domain. This array uses 1 page per gigabyte, so there's no
-- need to be too stingy here.
-+ This only affects the sizing of some bss arrays, the unused
-+ portions of which are freed.
-
- config XEN_SAVE_RESTORE
- bool
-@@ -36,3 +38,40 @@ config XEN_DEBUG_FS
- help
- Enable statistics output and various tuning options in debugfs.
- Enabling this option may incur a significant performance overhead.
-+
-+config SWIOTLB_XEN
-+ def_bool y
-+ depends on XEN && SWIOTLB
-+
-+config MICROCODE_XEN
-+ def_bool y
-+ depends on XEN_DOM0 && MICROCODE
-+
-+config XEN_DOM0
-+ bool "Enable Xen privileged domain support"
-+ depends on XEN && X86_IO_APIC && ACPI
-+ help
-+ The Xen hypervisor requires a privileged domain ("dom0") to
-+ actually manage the machine, provide devices drivers, etc.
-+ This option enables dom0 support. A dom0 kernel can also
-+ run as an unprivileged domU kernel, or a kernel running
-+ native on bare hardware.
-+
-+# Dummy symbol since people have come to rely on the PRIVILEGED_GUEST
-+# name in tools.
-+config XEN_PRIVILEGED_GUEST
-+ def_bool XEN_DOM0
-+
-+config XEN_DOM0_PCI
-+ def_bool y
-+ depends on XEN_DOM0 && PCI
-+ select PCI_XEN
-+
-+config XEN_PCI_PASSTHROUGH
-+ bool "Enable support for Xen PCI passthrough devices"
-+ depends on XEN && PCI
-+ select PCI_XEN
-+ select SWIOTLB_XEN
-+ help
-+ Enable support for passing PCI devices through to
-+ unprivileged domains. (COMPLETELY UNTESTED)
-diff --git a/arch/x86/xen/Makefile b/arch/x86/xen/Makefile
-index 3bb4fc2..13ca65c 100644
---- a/arch/x86/xen/Makefile
-+++ b/arch/x86/xen/Makefile
-@@ -12,9 +12,12 @@ CFLAGS_mmu.o := $(nostackp)
-
- obj-y := enlighten.o setup.o multicalls.o mmu.o irq.o \
- time.o xen-asm.o xen-asm_$(BITS).o \
-- grant-table.o suspend.o
-+ grant-table.o suspend.o platform-pci-unplug.o
-
- obj-$(CONFIG_SMP) += smp.o
- obj-$(CONFIG_PARAVIRT_SPINLOCKS)+= spinlock.o
- obj-$(CONFIG_XEN_DEBUG_FS) += debugfs.o
--
-+obj-$(CONFIG_XEN_DOM0) += vga.o
-+obj-$(CONFIG_XEN_DOM0) += apic.o
-+obj-$(CONFIG_SWIOTLB) += pci-swiotlb-xen.o
-+obj-$(CONFIG_XEN_DOM0_PCI) += pci.o
-\ No newline at end of file
-diff --git a/arch/x86/xen/apic.c b/arch/x86/xen/apic.c
-new file mode 100644
-index 0000000..21a3089
---- /dev/null
-+++ b/arch/x86/xen/apic.c
-@@ -0,0 +1,33 @@
-+#include <linux/kernel.h>
-+#include <linux/threads.h>
-+#include <linux/bitmap.h>
-+
-+#include <asm/io_apic.h>
-+#include <asm/acpi.h>
-+#include <asm/hw_irq.h>
-+
-+#include <asm/xen/hypervisor.h>
-+#include <asm/xen/hypercall.h>
-+
-+#include <xen/xen.h>
-+#include <xen/interface/xen.h>
-+#include <xen/interface/physdev.h>
-+
-+void __init xen_io_apic_init(void)
-+{
-+ enable_IO_APIC();
-+}
-+
-+void xen_init_apic(void)
-+{
-+ if (!xen_initial_domain())
-+ return;
-+
-+#ifdef CONFIG_ACPI
-+ /*
-+ * Pretend ACPI found our lapic even though we've disabled it,
-+ * to prevent MP tables from setting up lapics.
-+ */
-+ acpi_lapic = 1;
-+#endif
-+}
-diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
-index 0087b00..070f138 100644
---- a/arch/x86/xen/enlighten.c
-+++ b/arch/x86/xen/enlighten.c
-@@ -11,6 +11,7 @@
- * Jeremy Fitzhardinge <jeremy@xensource.com>, XenSource Inc, 2007
- */
-
-+#include <linux/cpu.h>
- #include <linux/kernel.h>
- #include <linux/init.h>
- #include <linux/smp.h>
-@@ -28,12 +29,15 @@
- #include <linux/highmem.h>
- #include <linux/console.h>
-
-+#include <xen/xen.h>
- #include <xen/interface/xen.h>
- #include <xen/interface/version.h>
- #include <xen/interface/physdev.h>
- #include <xen/interface/vcpu.h>
-+#include <xen/interface/memory.h>
- #include <xen/features.h>
- #include <xen/page.h>
-+#include <xen/hvm.h>
- #include <xen/hvc-console.h>
-
- #include <asm/paravirt.h>
-@@ -53,6 +57,7 @@
- #include <asm/tlbflush.h>
- #include <asm/reboot.h>
- #include <asm/stackprotector.h>
-+#include <asm/hypervisor.h>
-
- #include "xen-ops.h"
- #include "mmu.h"
-@@ -66,6 +71,11 @@ DEFINE_PER_CPU(struct vcpu_info, xen_vcpu_info);
- enum xen_domain_type xen_domain_type = XEN_NATIVE;
- EXPORT_SYMBOL_GPL(xen_domain_type);
-
-+unsigned long *machine_to_phys_mapping = (void *)MACH2PHYS_VIRT_START;
-+EXPORT_SYMBOL(machine_to_phys_mapping);
-+unsigned int machine_to_phys_order;
-+EXPORT_SYMBOL(machine_to_phys_order);
-+
- struct start_info *xen_start_info;
- EXPORT_SYMBOL_GPL(xen_start_info);
-
-@@ -73,6 +83,9 @@ struct shared_info xen_dummy_shared_info;
-
- void *xen_initial_gdt;
-
-+__read_mostly int xen_have_vector_callback;
-+EXPORT_SYMBOL_GPL(xen_have_vector_callback);
-+
- /*
- * Point at some empty memory to start with. We map the real shared_info
- * page as soon as fixmap is up and running.
-@@ -94,6 +107,14 @@ struct shared_info *HYPERVISOR_shared_info = (void *)&xen_dummy_shared_info;
- */
- static int have_vcpu_info_placement = 1;
-
-+static void clamp_max_cpus(void)
-+{
-+#ifdef CONFIG_SMP
-+ if (setup_max_cpus > MAX_VIRT_CPUS)
-+ setup_max_cpus = MAX_VIRT_CPUS;
-+#endif
-+}
-+
- static void xen_vcpu_setup(int cpu)
- {
- struct vcpu_register_vcpu_info info;
-@@ -101,19 +122,20 @@ static void xen_vcpu_setup(int cpu)
- struct vcpu_info *vcpup;
-
- BUG_ON(HYPERVISOR_shared_info == &xen_dummy_shared_info);
-- per_cpu(xen_vcpu, cpu) = &HYPERVISOR_shared_info->vcpu_info[cpu];
-
-- if (!have_vcpu_info_placement)
-- return; /* already tested, not available */
-+ if (cpu < MAX_VIRT_CPUS)
-+ per_cpu(xen_vcpu,cpu) = &HYPERVISOR_shared_info->vcpu_info[cpu];
-
-- vcpup = &per_cpu(xen_vcpu_info, cpu);
-+ if (!have_vcpu_info_placement) {
-+ if (cpu >= MAX_VIRT_CPUS)
-+ clamp_max_cpus();
-+ return;
-+ }
-
-+ vcpup = &per_cpu(xen_vcpu_info, cpu);
- info.mfn = arbitrary_virt_to_mfn(vcpup);
- info.offset = offset_in_page(vcpup);
-
-- printk(KERN_DEBUG "trying to map vcpu_info %d at %p, mfn %llx, offset %d\n",
-- cpu, vcpup, info.mfn, info.offset);
--
- /* Check to see if the hypervisor will put the vcpu_info
- structure where we want it, which allows direct access via
- a percpu-variable. */
-@@ -122,13 +144,11 @@ static void xen_vcpu_setup(int cpu)
- if (err) {
- printk(KERN_DEBUG "register_vcpu_info failed: err=%d\n", err);
- have_vcpu_info_placement = 0;
-+ clamp_max_cpus();
- } else {
- /* This cpu is using the registered vcpu info, even if
- later ones fail to. */
- per_cpu(xen_vcpu, cpu) = vcpup;
--
-- printk(KERN_DEBUG "cpu %d using vcpu_info at %p\n",
-- cpu, vcpup);
- }
- }
-
-@@ -167,13 +187,16 @@ static void __init xen_banner(void)
-
- printk(KERN_INFO "Booting paravirtualized kernel on %s\n",
- pv_info.name);
-- printk(KERN_INFO "Xen version: %d.%d%s%s\n",
-+ printk(KERN_INFO "Xen version: %d.%d%s%s%s\n",
- version >> 16, version & 0xffff, extra.extraversion,
-- xen_feature(XENFEAT_mmu_pt_update_preserve_ad) ? " (preserve-AD)" : "");
-+ xen_feature(XENFEAT_mmu_pt_update_preserve_ad) ?
-+ " (preserve-AD)" : "",
-+ xen_initial_domain() ? " (dom0)" : "");
- }
-
- static __read_mostly unsigned int cpuid_leaf1_edx_mask = ~0;
- static __read_mostly unsigned int cpuid_leaf1_ecx_mask = ~0;
-+static __read_mostly unsigned int cpuid_leaf81_edx_mask = ~0;
-
- static void xen_cpuid(unsigned int *ax, unsigned int *bx,
- unsigned int *cx, unsigned int *dx)
-@@ -187,7 +210,7 @@ static void xen_cpuid(unsigned int *ax, unsigned int *bx,
- * unsupported kernel subsystems as possible.
- */
- switch (*ax) {
-- case 1:
-+ case 0x1:
- maskecx = cpuid_leaf1_ecx_mask;
- maskedx = cpuid_leaf1_edx_mask;
- break;
-@@ -196,6 +219,10 @@ static void xen_cpuid(unsigned int *ax, unsigned int *bx,
- /* Suppress extended topology stuff */
- maskebx = 0;
- break;
-+
-+ case 0x80000001:
-+ maskedx = cpuid_leaf81_edx_mask;
-+ break;
- }
-
- asm(XEN_EMULATE_PREFIX "cpuid"
-@@ -213,34 +240,29 @@ static void xen_cpuid(unsigned int *ax, unsigned int *bx,
- static __init void xen_init_cpuid_mask(void)
- {
- unsigned int ax, bx, cx, dx;
-+ unsigned int xsave_mask;
-
- cpuid_leaf1_edx_mask =
-- ~((1 << X86_FEATURE_MCE) | /* disable MCE */
-- (1 << X86_FEATURE_MCA) | /* disable MCA */
-- (1 << X86_FEATURE_ACC)); /* thermal monitoring */
-+ ~(1 << X86_FEATURE_ACC); /* thermal monitoring */
-+
-+ cpuid_leaf81_edx_mask = ~(1 << (X86_FEATURE_GBPAGES % 32));
-
- if (!xen_initial_domain())
- cpuid_leaf1_edx_mask &=
-- ~((1 << X86_FEATURE_APIC) | /* disable local APIC */
-+ ~((1 << X86_FEATURE_MCE) | /* disable MCE */
-+ (1 << X86_FEATURE_MCA) | /* disable MCA */
-+ (1 << X86_FEATURE_APIC) | /* disable local APIC */
- (1 << X86_FEATURE_ACPI)); /* disable ACPI */
--
- ax = 1;
-- cx = 0;
- xen_cpuid(&ax, &bx, &cx, &dx);
-
-- /* cpuid claims we support xsave; try enabling it to see what happens */
-- if (cx & (1 << (X86_FEATURE_XSAVE % 32))) {
-- unsigned long cr4;
--
-- set_in_cr4(X86_CR4_OSXSAVE);
--
-- cr4 = read_cr4();
--
-- if ((cr4 & X86_CR4_OSXSAVE) == 0)
-- cpuid_leaf1_ecx_mask &= ~(1 << (X86_FEATURE_XSAVE % 32));
-+ xsave_mask =
-+ (1 << (X86_FEATURE_XSAVE % 32)) |
-+ (1 << (X86_FEATURE_OSXSAVE % 32));
-
-- clear_in_cr4(X86_CR4_OSXSAVE);
-- }
-+ /* Xen will set CR4.OSXSAVE if supported and not disabled by force */
-+ if ((cx & xsave_mask) != xsave_mask)
-+ cpuid_leaf1_ecx_mask &= ~xsave_mask; /* disable XSAVE & OSXSAVE */
- }
-
- static void xen_set_debugreg(int reg, unsigned long val)
-@@ -406,7 +428,7 @@ static __init void xen_load_gdt_boot(const struct desc_ptr *dtr)
-
- pte = pfn_pte(pfn, PAGE_KERNEL_RO);
-
-- if (HYPERVISOR_update_va_mapping((unsigned long)va, pte, 0))
-+ if (HYPERVISOR_update_va_mapping(va, pte, 0))
- BUG();
-
- frames[f] = mfn;
-@@ -517,13 +539,13 @@ static int cvt_gate_to_trap(int vector, const gate_desc *val,
- return 0;
- #ifdef CONFIG_X86_MCE
- } else if (addr == (unsigned long)machine_check) {
-- return 0;
-+ /* We can use the original machine_check handler,
-+ despite IST. */
- #endif
-- } else {
-- /* Some other trap using IST? */
-- if (WARN_ON(val->ist != 0))
-- return 0;
-- }
-+ } else if (WARN(val->ist != 0,
-+ "Unknown IST-using trap: vector %d, %pF, val->ist=%d\n",
-+ vector, (void *)addr, val->ist))
-+ return 0;
- #endif /* CONFIG_X86_64 */
- info->address = addr;
-
-@@ -679,6 +701,18 @@ static void xen_set_iopl_mask(unsigned mask)
- HYPERVISOR_physdev_op(PHYSDEVOP_set_iopl, &set_iopl);
- }
-
-+static void xen_set_io_bitmap(struct thread_struct *thread,
-+ unsigned long bytes_updated)
-+{
-+ struct physdev_set_iobitmap set_iobitmap;
-+
-+ set_xen_guest_handle(set_iobitmap.bitmap,
-+ (char *)thread->io_bitmap_ptr);
-+ set_iobitmap.nr_ports = thread->io_bitmap_ptr ? IO_BITMAP_BITS : 0;
-+ WARN_ON(HYPERVISOR_physdev_op(PHYSDEVOP_set_iobitmap,
-+ &set_iobitmap));
-+}
-+
- static void xen_io_delay(void)
- {
- }
-@@ -716,7 +750,7 @@ static u32 xen_safe_apic_wait_icr_idle(void)
- return 0;
- }
-
--static void set_xen_basic_apic_ops(void)
-+static __init void set_xen_basic_apic_ops(void)
- {
- apic->read = xen_apic_read;
- apic->write = xen_apic_write;
-@@ -728,7 +762,6 @@ static void set_xen_basic_apic_ops(void)
-
- #endif
-
--
- static void xen_clts(void)
- {
- struct multicall_space mcs;
-@@ -811,6 +844,11 @@ static int xen_write_msr_safe(unsigned int msr, unsigned low, unsigned high)
- Xen console noise. */
- break;
-
-+ case MSR_IA32_CR_PAT:
-+ if (smp_processor_id() == 0)
-+ xen_set_pat(((u64)high << 32) | low);
-+ break;
-+
- default:
- ret = native_write_msr_safe(msr, low, high);
- }
-@@ -849,8 +887,6 @@ void xen_setup_vcpu_info_placement(void)
- /* xen_vcpu_setup managed to place the vcpu_info within the
- percpu area for all cpus, so make use of it */
- if (have_vcpu_info_placement) {
-- printk(KERN_INFO "Xen: using vcpu_info placement\n");
--
- pv_irq_ops.save_fl = __PV_IS_CALLEE_SAVE(xen_save_fl_direct);
- pv_irq_ops.restore_fl = __PV_IS_CALLEE_SAVE(xen_restore_fl_direct);
- pv_irq_ops.irq_disable = __PV_IS_CALLEE_SAVE(xen_irq_disable_direct);
-@@ -923,10 +959,6 @@ static const struct pv_init_ops xen_init_ops __initdata = {
- .patch = xen_patch,
- };
-
--static const struct pv_time_ops xen_time_ops __initdata = {
-- .sched_clock = xen_clocksource_read,
--};
--
- static const struct pv_cpu_ops xen_cpu_ops __initdata = {
- .cpuid = xen_cpuid,
-
-@@ -978,6 +1010,7 @@ static const struct pv_cpu_ops xen_cpu_ops __initdata = {
- .load_sp0 = xen_load_sp0,
-
- .set_iopl_mask = xen_set_iopl_mask,
-+ .set_io_bitmap = xen_set_io_bitmap,
- .io_delay = xen_io_delay,
-
- /* Xen takes care of %gs when switching to usermode for us */
-@@ -1016,15 +1049,40 @@ static void xen_machine_halt(void)
- xen_reboot(SHUTDOWN_poweroff);
- }
-
-+static void xen_machine_power_off(void)
-+{
-+ if (pm_power_off)
-+ pm_power_off();
-+ else
-+ xen_reboot(SHUTDOWN_poweroff);
-+}
-+
- static void xen_crash_shutdown(struct pt_regs *regs)
- {
- xen_reboot(SHUTDOWN_crash);
- }
-
-+static int
-+xen_panic_event(struct notifier_block *this, unsigned long event, void *ptr)
-+{
-+ xen_reboot(SHUTDOWN_crash);
-+ return NOTIFY_DONE;
-+}
-+
-+static struct notifier_block xen_panic_block = {
-+ .notifier_call= xen_panic_event,
-+};
-+
-+int xen_panic_handler_init(void)
-+{
-+ atomic_notifier_chain_register(&panic_notifier_list, &xen_panic_block);
-+ return 0;
-+}
-+
- static const struct machine_ops __initdata xen_machine_ops = {
- .restart = xen_restart,
- .halt = xen_machine_halt,
-- .power_off = xen_machine_halt,
-+ .power_off = xen_machine_power_off,
- .shutdown = xen_machine_halt,
- .crash_shutdown = xen_crash_shutdown,
- .emergency_restart = xen_emergency_restart,
-@@ -1057,10 +1115,11 @@ asmlinkage void __init xen_start_kernel(void)
-
- xen_domain_type = XEN_PV_DOMAIN;
-
-+ xen_setup_machphys_mapping();
-+
- /* Install Xen paravirt ops */
- pv_info = xen_info;
- pv_init_ops = xen_init_ops;
-- pv_time_ops = xen_time_ops;
- pv_cpu_ops = xen_cpu_ops;
- pv_apic_ops = xen_apic_ops;
-
-@@ -1068,13 +1127,7 @@ asmlinkage void __init xen_start_kernel(void)
- x86_init.oem.arch_setup = xen_arch_setup;
- x86_init.oem.banner = xen_banner;
-
-- x86_init.timers.timer_init = xen_time_init;
-- x86_init.timers.setup_percpu_clockev = x86_init_noop;
-- x86_cpuinit.setup_percpu_clockev = x86_init_noop;
--
-- x86_platform.calibrate_tsc = xen_tsc_khz;
-- x86_platform.get_wallclock = xen_get_wallclock;
-- x86_platform.set_wallclock = xen_set_wallclock;
-+ xen_init_time_ops();
-
- /*
- * Set up some pagetable state before starting to set any ptes.
-@@ -1112,6 +1165,10 @@ asmlinkage void __init xen_start_kernel(void)
- */
- xen_setup_stackprotector();
-
-+#ifdef CONFIG_SPARSE_IRQ
-+ nr_dynamic_irqs += 256;
-+#endif
-+
- xen_init_irq_ops();
- xen_init_cpuid_mask();
-
-@@ -1138,8 +1195,19 @@ asmlinkage void __init xen_start_kernel(void)
-
- xen_smp_init();
-
-+#ifdef CONFIG_ACPI_NUMA
-+ /*
-+ * The pages we from Xen are not related to machine pages, so
-+ * any NUMA information the kernel tries to get from ACPI will
-+ * be meaningless. Prevent it from trying.
-+ */
-+ acpi_numa = -1;
-+#endif
-+
- pgd = (pgd_t *)xen_start_info->pt_base;
-
-+ __supported_pte_mask |= _PAGE_IOMAP;
-+
- /* Don't do the full vcpu_info placement stuff until we have a
- possible map and a non-dummy shared_info. */
- per_cpu(xen_vcpu, 0) = &HYPERVISOR_shared_info->vcpu_info[0];
-@@ -1149,6 +1217,10 @@ asmlinkage void __init xen_start_kernel(void)
-
- xen_raw_console_write("mapping kernel into physical memory\n");
- pgd = xen_setup_kernel_pagetable(pgd, xen_start_info->nr_pages);
-+ xen_ident_map_ISA();
-+
-+ /* Allocate and initialize top and mid mfn levels for p2m structure */
-+ xen_build_mfn_list_list();
-
- init_mm.pgd = pgd;
-
-@@ -1158,6 +1230,14 @@ asmlinkage void __init xen_start_kernel(void)
- if (xen_feature(XENFEAT_supervisor_mode_kernel))
- pv_info.kernel_rpl = 0;
-
-+ if (xen_initial_domain()) {
-+ struct physdev_set_iopl set_iopl;
-+ set_iopl.iopl = 1;
-+ if (HYPERVISOR_physdev_op(PHYSDEVOP_set_iopl, &set_iopl) == -1)
-+ BUG();
-+ xen_init_apic();
-+ }
-+
- /* set the limit of our address space */
- xen_reserve_top();
-
-@@ -1180,6 +1260,16 @@ asmlinkage void __init xen_start_kernel(void)
- add_preferred_console("xenboot", 0, NULL);
- add_preferred_console("tty", 0, NULL);
- add_preferred_console("hvc", 0, NULL);
-+
-+ boot_params.screen_info.orig_video_isVGA = 0;
-+ } else {
-+ const struct dom0_vga_console_info *info =
-+ (void *)((char *)xen_start_info +
-+ xen_start_info->console.dom0.info_off);
-+
-+ xen_init_vga(info, xen_start_info->console.dom0.info_size);
-+ xen_start_info->console.domU.mfn = 0;
-+ xen_start_info->console.domU.evtchn = 0;
- }
-
- xen_raw_console_write("about to get started...\n");
-@@ -1193,3 +1283,126 @@ asmlinkage void __init xen_start_kernel(void)
- x86_64_start_reservations((char *)__pa_symbol(&boot_params));
- #endif
- }
-+
-+static uint32_t xen_cpuid_base(void)
-+{
-+ uint32_t base, eax, ebx, ecx, edx;
-+ char signature[13];
-+
-+ for (base = 0x40000000; base < 0x40010000; base += 0x100) {
-+ cpuid(base, &eax, &ebx, &ecx, &edx);
-+ *(uint32_t *)(signature + 0) = ebx;
-+ *(uint32_t *)(signature + 4) = ecx;
-+ *(uint32_t *)(signature + 8) = edx;
-+ signature[12] = 0;
-+
-+ if (!strcmp("XenVMMXenVMM", signature) && ((eax - base) >= 2))
-+ return base;
-+ }
-+
-+ return 0;
-+}
-+
-+static int init_hvm_pv_info(int *major, int *minor)
-+{
-+ uint32_t eax, ebx, ecx, edx, pages, msr, base;
-+ u64 pfn;
-+
-+ base = xen_cpuid_base();
-+ if (!base)
-+ return -EINVAL;
-+
-+ cpuid(base + 1, &eax, &ebx, &ecx, &edx);
-+
-+ *major = eax >> 16;
-+ *minor = eax & 0xffff;
-+ printk(KERN_INFO "Xen version %d.%d.\n", *major, *minor);
-+
-+ cpuid(base + 2, &pages, &msr, &ecx, &edx);
-+
-+ pfn = __pa(hypercall_page);
-+ wrmsr_safe(msr, (u32)pfn, (u32)(pfn >> 32));
-+
-+ xen_setup_features();
-+
-+ pv_info = xen_info;
-+ pv_info.kernel_rpl = 0;
-+
-+ xen_domain_type = XEN_HVM_DOMAIN;
-+
-+ return 0;
-+}
-+
-+void xen_hvm_init_shared_info(void)
-+{
-+ int cpu;
-+ struct xen_add_to_physmap xatp;
-+ static struct shared_info *shared_info_page = 0;
-+
-+ if (!shared_info_page)
-+ shared_info_page = (struct shared_info *) alloc_bootmem_pages(PAGE_SIZE);
-+ xatp.domid = DOMID_SELF;
-+ xatp.idx = 0;
-+ xatp.space = XENMAPSPACE_shared_info;
-+ xatp.gpfn = __pa(shared_info_page) >> PAGE_SHIFT;
-+ if (HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xatp))
-+ BUG();
-+
-+ HYPERVISOR_shared_info = (struct shared_info *)shared_info_page;
-+
-+ /* xen_vcpu is a pointer to the vcpu_info struct in the shared_info
-+ * page, we use it in the event channel upcall and in some pvclock
-+ * related functions. We don't need the vcpu_info placement
-+ * optimizations because we don't use any pv_mmu or pv_irq op on
-+ * HVM.
-+ * When xen_hvm_init_shared_info is run at boot time only vcpu 0 is
-+ * online but xen_hvm_init_shared_info is run at resume time too and
-+ * in that case multiple vcpus might be online. */
-+ for_each_online_cpu(cpu) {
-+ per_cpu(xen_vcpu, cpu) = &HYPERVISOR_shared_info->vcpu_info[cpu];
-+ }
-+}
-+
-+#ifdef CONFIG_XEN_PVHVM
-+static int __cpuinit xen_hvm_cpu_notify(struct notifier_block *self,
-+ unsigned long action, void *hcpu)
-+{
-+ int cpu = (long)hcpu;
-+ switch (action) {
-+ case CPU_UP_PREPARE:
-+ per_cpu(xen_vcpu, cpu) = &HYPERVISOR_shared_info->vcpu_info[cpu];
-+ break;
-+ default:
-+ break;
-+ }
-+ return NOTIFY_OK;
-+}
-+
-+static struct notifier_block __cpuinitdata xen_hvm_cpu_notifier = {
-+ .notifier_call = xen_hvm_cpu_notify,
-+};
-+
-+void __init xen_hvm_guest_init(void)
-+{
-+ int r;
-+ int major, minor;
-+
-+ if (xen_pv_domain())
-+ return;
-+
-+ r = init_hvm_pv_info(&major, &minor);
-+ if (r < 0)
-+ return;
-+
-+ xen_hvm_init_shared_info();
-+
-+ if (xen_feature(XENFEAT_hvm_callback_vector))
-+ xen_have_vector_callback = 1;
-+ register_cpu_notifier(&xen_hvm_cpu_notifier);
-+ xen_unplug_emulated_devices();
-+ have_vcpu_info_placement = 0;
-+ x86_init.irqs.intr_init = xen_init_IRQ;
-+ xen_hvm_init_time_ops();
-+ xen_hvm_init_mmu_ops();
-+}
-+#endif
-diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
-index 350a3de..32a1c65 100644
---- a/arch/x86/xen/mmu.c
-+++ b/arch/x86/xen/mmu.c
-@@ -42,6 +42,7 @@
- #include <linux/highmem.h>
- #include <linux/debugfs.h>
- #include <linux/bug.h>
-+#include <linux/vmalloc.h>
- #include <linux/module.h>
-
- #include <asm/pgtable.h>
-@@ -50,14 +51,20 @@
- #include <asm/mmu_context.h>
- #include <asm/setup.h>
- #include <asm/paravirt.h>
-+#include <asm/e820.h>
- #include <asm/linkage.h>
-+#include <asm/pat.h>
-+#include <asm/init.h>
-+#include <asm/page.h>
-
- #include <asm/xen/hypercall.h>
- #include <asm/xen/hypervisor.h>
-
- #include <xen/page.h>
- #include <xen/interface/xen.h>
-+#include <xen/interface/hvm/hvm_op.h>
- #include <xen/interface/version.h>
-+#include <xen/interface/memory.h>
- #include <xen/hvc-console.h>
-
- #include "multicalls.h"
-@@ -66,6 +73,13 @@
-
- #define MMU_UPDATE_HISTO 30
-
-+/*
-+ * Protects atomic reservation decrease/increase against concurrent increases.
-+ * Also protects non-atomic updates of current_pages and driver_pages, and
-+ * balloon lists.
-+ */
-+DEFINE_SPINLOCK(xen_reservation_lock);
-+
- #ifdef CONFIG_XEN_DEBUG_FS
-
- static struct {
-@@ -124,7 +138,8 @@ static inline void check_zero(void)
- * large enough to allocate page table pages to allocate the rest.
- * Each page can map 2MB.
- */
--static pte_t level1_ident_pgt[PTRS_PER_PTE * 4] __page_aligned_bss;
-+#define LEVEL1_IDENT_ENTRIES (PTRS_PER_PTE * 4)
-+static RESERVE_BRK_ARRAY(pte_t, level1_ident_pgt, LEVEL1_IDENT_ENTRIES);
-
- #ifdef CONFIG_X86_64
- /* l3 pud for userspace vsyscall mapping */
-@@ -155,49 +170,202 @@ DEFINE_PER_CPU(unsigned long, xen_current_cr3); /* actual vcpu cr3 */
- */
- #define USER_LIMIT ((STACK_TOP_MAX + PGDIR_SIZE - 1) & PGDIR_MASK)
-
-+/*
-+ * Xen leaves the responsibility for maintaining p2m mappings to the
-+ * guests themselves, but it must also access and update the p2m array
-+ * during suspend/resume when all the pages are reallocated.
-+ *
-+ * The p2m table is logically a flat array, but we implement it as a
-+ * three-level tree to allow the address space to be sparse.
-+ *
-+ * Xen
-+ * |
-+ * p2m_top p2m_top_mfn
-+ * / \ / \
-+ * p2m_mid p2m_mid p2m_mid_mfn p2m_mid_mfn
-+ * / \ / \ / /
-+ * p2m p2m p2m p2m p2m p2m p2m ...
-+ *
-+ * The p2m_mid_mfn pages are mapped by p2m_top_mfn_p.
-+ *
-+ * The p2m_top and p2m_top_mfn levels are limited to 1 page, so the
-+ * maximum representable pseudo-physical address space is:
-+ * P2M_TOP_PER_PAGE * P2M_MID_PER_PAGE * P2M_PER_PAGE pages
-+ *
-+ * P2M_PER_PAGE depends on the architecture, as a mfn is always
-+ * unsigned long (8 bytes on 64-bit, 4 bytes on 32), leading to
-+ * 512 and 1024 entries respectively.
-+ */
-+
-+unsigned long xen_max_p2m_pfn __read_mostly;
-
--#define P2M_ENTRIES_PER_PAGE (PAGE_SIZE / sizeof(unsigned long))
--#define TOP_ENTRIES (MAX_DOMAIN_PAGES / P2M_ENTRIES_PER_PAGE)
-+#define P2M_PER_PAGE (PAGE_SIZE / sizeof(unsigned long))
-+#define P2M_MID_PER_PAGE (PAGE_SIZE / sizeof(unsigned long *))
-+#define P2M_TOP_PER_PAGE (PAGE_SIZE / sizeof(unsigned long **))
-
--/* Placeholder for holes in the address space */
--static unsigned long p2m_missing[P2M_ENTRIES_PER_PAGE] __page_aligned_data =
-- { [ 0 ... P2M_ENTRIES_PER_PAGE-1 ] = ~0UL };
-+#define MAX_P2M_PFN (P2M_TOP_PER_PAGE * P2M_MID_PER_PAGE * P2M_PER_PAGE)
-
-- /* Array of pointers to pages containing p2m entries */
--static unsigned long *p2m_top[TOP_ENTRIES] __page_aligned_data =
-- { [ 0 ... TOP_ENTRIES - 1] = &p2m_missing[0] };
-+/* Placeholders for holes in the address space */
-+static RESERVE_BRK_ARRAY(unsigned long, p2m_missing, P2M_PER_PAGE);
-+static RESERVE_BRK_ARRAY(unsigned long *, p2m_mid_missing, P2M_MID_PER_PAGE);
-+static RESERVE_BRK_ARRAY(unsigned long, p2m_mid_missing_mfn, P2M_MID_PER_PAGE);
-
--/* Arrays of p2m arrays expressed in mfns used for save/restore */
--static unsigned long p2m_top_mfn[TOP_ENTRIES] __page_aligned_bss;
-+static RESERVE_BRK_ARRAY(unsigned long **, p2m_top, P2M_TOP_PER_PAGE);
-+static RESERVE_BRK_ARRAY(unsigned long, p2m_top_mfn, P2M_TOP_PER_PAGE);
-+static RESERVE_BRK_ARRAY(unsigned long *, p2m_top_mfn_p, P2M_TOP_PER_PAGE);
-
--static unsigned long p2m_top_mfn_list[TOP_ENTRIES / P2M_ENTRIES_PER_PAGE]
-- __page_aligned_bss;
-+RESERVE_BRK(p2m_mid, PAGE_SIZE * (MAX_DOMAIN_PAGES / (P2M_PER_PAGE * P2M_MID_PER_PAGE)));
-+RESERVE_BRK(p2m_mid_mfn, PAGE_SIZE * (MAX_DOMAIN_PAGES / (P2M_PER_PAGE * P2M_MID_PER_PAGE)));
-
- static inline unsigned p2m_top_index(unsigned long pfn)
- {
-- BUG_ON(pfn >= MAX_DOMAIN_PAGES);
-- return pfn / P2M_ENTRIES_PER_PAGE;
-+ BUG_ON(pfn >= MAX_P2M_PFN);
-+ return pfn / (P2M_MID_PER_PAGE * P2M_PER_PAGE);
-+}
-+
-+static inline unsigned p2m_mid_index(unsigned long pfn)
-+{
-+ return (pfn / P2M_PER_PAGE) % P2M_MID_PER_PAGE;
- }
-
- static inline unsigned p2m_index(unsigned long pfn)
- {
-- return pfn % P2M_ENTRIES_PER_PAGE;
-+ return pfn % P2M_PER_PAGE;
-+}
-+
-+static void p2m_top_init(unsigned long ***top)
-+{
-+ unsigned i;
-+
-+ for (i = 0; i < P2M_TOP_PER_PAGE; i++)
-+ top[i] = p2m_mid_missing;
-+}
-+
-+static void p2m_top_mfn_init(unsigned long *top)
-+{
-+ unsigned i;
-+
-+ for (i = 0; i < P2M_TOP_PER_PAGE; i++)
-+ top[i] = virt_to_mfn(p2m_mid_missing_mfn);
-+}
-+
-+static void p2m_top_mfn_p_init(unsigned long **top)
-+{
-+ unsigned i;
-+
-+ for (i = 0; i < P2M_TOP_PER_PAGE; i++)
-+ top[i] = p2m_mid_missing_mfn;
-+}
-+
-+static void p2m_mid_init(unsigned long **mid)
-+{
-+ unsigned i;
-+
-+ for (i = 0; i < P2M_MID_PER_PAGE; i++)
-+ mid[i] = p2m_missing;
-+}
-+
-+static void p2m_mid_mfn_init(unsigned long *mid)
-+{
-+ unsigned i;
-+
-+ for (i = 0; i < P2M_MID_PER_PAGE; i++)
-+ mid[i] = virt_to_mfn(p2m_missing);
-+}
-+
-+static void p2m_init(unsigned long *p2m)
-+{
-+ unsigned i;
-+
-+ for (i = 0; i < P2M_MID_PER_PAGE; i++)
-+ p2m[i] = INVALID_P2M_ENTRY;
-+}
-+
-+static int lookup_pte_fn(
-+ pte_t *pte, struct page *pmd_page, unsigned long addr, void *data)
-+{
-+ uint64_t *ptep = (uint64_t *)data;
-+ if (ptep)
-+ *ptep = ((uint64_t)pfn_to_mfn(page_to_pfn(pmd_page)) <<
-+ PAGE_SHIFT) | ((unsigned long)pte & ~PAGE_MASK);
-+ return 0;
- }
-
--/* Build the parallel p2m_top_mfn structures */
-+int create_lookup_pte_addr(struct mm_struct *mm,
-+ unsigned long address,
-+ uint64_t *ptep)
-+{
-+ return apply_to_page_range(mm, address, PAGE_SIZE,
-+ lookup_pte_fn, ptep);
-+}
-+
-+EXPORT_SYMBOL(create_lookup_pte_addr);
-+
-+/*
-+ * Build the parallel p2m_top_mfn and p2m_mid_mfn structures
-+ *
-+ * This is called both at boot time, and after resuming from suspend:
-+ * - At boot time we're called very early, and must use extend_brk()
-+ * to allocate memory.
-+ *
-+ * - After resume we're called from within stop_machine, but the mfn
-+ * tree should alreay be completely allocated.
-+ */
- void xen_build_mfn_list_list(void)
- {
-- unsigned pfn, idx;
-+ unsigned long pfn;
-
-- for (pfn = 0; pfn < MAX_DOMAIN_PAGES; pfn += P2M_ENTRIES_PER_PAGE) {
-- unsigned topidx = p2m_top_index(pfn);
-+ /* Pre-initialize p2m_top_mfn to be completely missing */
-+ if (p2m_top_mfn == NULL) {
-+ p2m_mid_missing_mfn = extend_brk(PAGE_SIZE, PAGE_SIZE);
-+ p2m_mid_mfn_init(p2m_mid_missing_mfn);
-+
-+ p2m_top_mfn_p = extend_brk(PAGE_SIZE, PAGE_SIZE);
-+ p2m_top_mfn_p_init(p2m_top_mfn_p);
-
-- p2m_top_mfn[topidx] = virt_to_mfn(p2m_top[topidx]);
-+ p2m_top_mfn = extend_brk(PAGE_SIZE, PAGE_SIZE);
-+ p2m_top_mfn_init(p2m_top_mfn);
-+ } else {
-+ /* Reinitialise, mfn's all change after migration */
-+ p2m_mid_mfn_init(p2m_mid_missing_mfn);
- }
-
-- for (idx = 0; idx < ARRAY_SIZE(p2m_top_mfn_list); idx++) {
-- unsigned topidx = idx * P2M_ENTRIES_PER_PAGE;
-- p2m_top_mfn_list[idx] = virt_to_mfn(&p2m_top_mfn[topidx]);
-+ for (pfn = 0; pfn < xen_max_p2m_pfn; pfn += P2M_PER_PAGE) {
-+ unsigned topidx = p2m_top_index(pfn);
-+ unsigned mididx = p2m_mid_index(pfn);
-+ unsigned long **mid;
-+ unsigned long *mid_mfn_p;
-+
-+ mid = p2m_top[topidx];
-+ mid_mfn_p = p2m_top_mfn_p[topidx];
-+
-+ /* Don't bother allocating any mfn mid levels if
-+ * they're just missing, just update the stored mfn,
-+ * since all could have changed over a migrate.
-+ */
-+ if (mid == p2m_mid_missing) {
-+ BUG_ON(mididx);
-+ BUG_ON(mid_mfn_p != p2m_mid_missing_mfn);
-+ p2m_top_mfn[topidx] = virt_to_mfn(p2m_mid_missing_mfn);
-+ pfn += (P2M_MID_PER_PAGE - 1) * P2M_PER_PAGE;
-+ continue;
-+ }
-+
-+ if (mid_mfn_p == p2m_mid_missing_mfn) {
-+ /*
-+ * XXX boot-time only! We should never find
-+ * missing parts of the mfn tree after
-+ * runtime. extend_brk() will BUG if we call
-+ * it too late.
-+ */
-+ mid_mfn_p = extend_brk(PAGE_SIZE, PAGE_SIZE);
-+ p2m_mid_mfn_init(mid_mfn_p);
-+
-+ p2m_top_mfn_p[topidx] = mid_mfn_p;
-+ }
-+
-+ p2m_top_mfn[topidx] = virt_to_mfn(mid_mfn_p);
-+ mid_mfn_p[mididx] = virt_to_mfn(mid[mididx]);
- }
- }
-
-@@ -206,8 +374,8 @@ void xen_setup_mfn_list_list(void)
- BUG_ON(HYPERVISOR_shared_info == &xen_dummy_shared_info);
-
- HYPERVISOR_shared_info->arch.pfn_to_mfn_frame_list_list =
-- virt_to_mfn(p2m_top_mfn_list);
-- HYPERVISOR_shared_info->arch.max_pfn = xen_start_info->nr_pages;
-+ virt_to_mfn(p2m_top_mfn);
-+ HYPERVISOR_shared_info->arch.max_pfn = xen_max_p2m_pfn;
- }
-
- /* Set up p2m_top to point to the domain-builder provided p2m pages */
-@@ -215,98 +383,176 @@ void __init xen_build_dynamic_phys_to_machine(void)
- {
- unsigned long *mfn_list = (unsigned long *)xen_start_info->mfn_list;
- unsigned long max_pfn = min(MAX_DOMAIN_PAGES, xen_start_info->nr_pages);
-- unsigned pfn;
-+ unsigned long pfn;
-+
-+ xen_max_p2m_pfn = max_pfn;
-
-- for (pfn = 0; pfn < max_pfn; pfn += P2M_ENTRIES_PER_PAGE) {
-+ p2m_missing = extend_brk(PAGE_SIZE, PAGE_SIZE);
-+ p2m_init(p2m_missing);
-+
-+ p2m_mid_missing = extend_brk(PAGE_SIZE, PAGE_SIZE);
-+ p2m_mid_init(p2m_mid_missing);
-+
-+ p2m_top = extend_brk(PAGE_SIZE, PAGE_SIZE);
-+ p2m_top_init(p2m_top);
-+
-+ /*
-+ * The domain builder gives us a pre-constructed p2m array in
-+ * mfn_list for all the pages initially given to us, so we just
-+ * need to graft that into our tree structure.
-+ */
-+ for (pfn = 0; pfn < max_pfn; pfn += P2M_PER_PAGE) {
- unsigned topidx = p2m_top_index(pfn);
-+ unsigned mididx = p2m_mid_index(pfn);
-
-- p2m_top[topidx] = &mfn_list[pfn];
-- }
-+ if (p2m_top[topidx] == p2m_mid_missing) {
-+ unsigned long **mid = extend_brk(PAGE_SIZE, PAGE_SIZE);
-+ p2m_mid_init(mid);
-+
-+ p2m_top[topidx] = mid;
-+ }
-
-- xen_build_mfn_list_list();
-+ p2m_top[topidx][mididx] = &mfn_list[pfn];
-+ }
- }
-
- unsigned long get_phys_to_machine(unsigned long pfn)
- {
-- unsigned topidx, idx;
-+ unsigned topidx, mididx, idx;
-
-- if (unlikely(pfn >= MAX_DOMAIN_PAGES))
-+ if (unlikely(pfn >= MAX_P2M_PFN))
- return INVALID_P2M_ENTRY;
-
- topidx = p2m_top_index(pfn);
-+ mididx = p2m_mid_index(pfn);
- idx = p2m_index(pfn);
-- return p2m_top[topidx][idx];
-+
-+ return p2m_top[topidx][mididx][idx];
- }
- EXPORT_SYMBOL_GPL(get_phys_to_machine);
-
--/* install a new p2m_top page */
--bool install_p2mtop_page(unsigned long pfn, unsigned long *p)
-+static void *alloc_p2m_page(void)
- {
-- unsigned topidx = p2m_top_index(pfn);
-- unsigned long **pfnp, *mfnp;
-- unsigned i;
-+ return (void *)__get_free_page(GFP_KERNEL | __GFP_REPEAT);
-+}
-
-- pfnp = &p2m_top[topidx];
-- mfnp = &p2m_top_mfn[topidx];
-+static void free_p2m_page(void *p)
-+{
-+ free_page((unsigned long)p);
-+}
-
-- for (i = 0; i < P2M_ENTRIES_PER_PAGE; i++)
-- p[i] = INVALID_P2M_ENTRY;
-+/*
-+ * Fully allocate the p2m structure for a given pfn. We need to check
-+ * that both the top and mid levels are allocated, and make sure the
-+ * parallel mfn tree is kept in sync. We may race with other cpus, so
-+ * the new pages are installed with cmpxchg; if we lose the race then
-+ * simply free the page we allocated and use the one that's there.
-+ */
-+static bool alloc_p2m(unsigned long pfn)
-+{
-+ unsigned topidx, mididx;
-+ unsigned long ***top_p, **mid;
-+ unsigned long *top_mfn_p, *mid_mfn;
-
-- if (cmpxchg(pfnp, p2m_missing, p) == p2m_missing) {
-- *mfnp = virt_to_mfn(p);
-- return true;
-+ topidx = p2m_top_index(pfn);
-+ mididx = p2m_mid_index(pfn);
-+
-+ top_p = &p2m_top[topidx];
-+ mid = *top_p;
-+
-+ if (mid == p2m_mid_missing) {
-+ /* Mid level is missing, allocate a new one */
-+ mid = alloc_p2m_page();
-+ if (!mid)
-+ return false;
-+
-+ p2m_mid_init(mid);
-+
-+ if (cmpxchg(top_p, p2m_mid_missing, mid) != p2m_mid_missing)
-+ free_p2m_page(mid);
- }
-
-- return false;
--}
-+ top_mfn_p = &p2m_top_mfn[topidx];
-+ mid_mfn = p2m_top_mfn_p[topidx];
-
--static void alloc_p2m(unsigned long pfn)
--{
-- unsigned long *p;
-+ BUG_ON(virt_to_mfn(mid_mfn) != *top_mfn_p);
-+
-+ if (mid_mfn == p2m_mid_missing_mfn) {
-+ /* Separately check the mid mfn level */
-+ unsigned long missing_mfn;
-+ unsigned long mid_mfn_mfn;
-+
-+ mid_mfn = alloc_p2m_page();
-+ if (!mid_mfn)
-+ return false;
-
-- p = (void *)__get_free_page(GFP_KERNEL | __GFP_NOFAIL);
-- BUG_ON(p == NULL);
-+ p2m_mid_mfn_init(mid_mfn);
-
-- if (!install_p2mtop_page(pfn, p))
-- free_page((unsigned long)p);
-+ missing_mfn = virt_to_mfn(p2m_mid_missing_mfn);
-+ mid_mfn_mfn = virt_to_mfn(mid_mfn);
-+ if (cmpxchg(top_mfn_p, missing_mfn, mid_mfn_mfn) != missing_mfn)
-+ free_p2m_page(mid_mfn);
-+ else
-+ p2m_top_mfn_p[topidx] = mid_mfn;
-+ }
-+
-+ if (p2m_top[topidx][mididx] == p2m_missing) {
-+ /* p2m leaf page is missing */
-+ unsigned long *p2m;
-+
-+ p2m = alloc_p2m_page();
-+ if (!p2m)
-+ return false;
-+
-+ p2m_init(p2m);
-+
-+ if (cmpxchg(&mid[mididx], p2m_missing, p2m) != p2m_missing)
-+ free_p2m_page(p2m);
-+ else
-+ mid_mfn[mididx] = virt_to_mfn(p2m);
-+ }
-+
-+ return true;
- }
-
- /* Try to install p2m mapping; fail if intermediate bits missing */
- bool __set_phys_to_machine(unsigned long pfn, unsigned long mfn)
- {
-- unsigned topidx, idx;
-+ unsigned topidx, mididx, idx;
-
-- if (unlikely(pfn >= MAX_DOMAIN_PAGES)) {
-+ if (unlikely(pfn >= MAX_P2M_PFN)) {
- BUG_ON(mfn != INVALID_P2M_ENTRY);
- return true;
- }
-
- topidx = p2m_top_index(pfn);
-- if (p2m_top[topidx] == p2m_missing) {
-- if (mfn == INVALID_P2M_ENTRY)
-- return true;
-- return false;
-- }
--
-+ mididx = p2m_mid_index(pfn);
- idx = p2m_index(pfn);
-- p2m_top[topidx][idx] = mfn;
-+
-+ if (p2m_top[topidx][mididx] == p2m_missing)
-+ return mfn == INVALID_P2M_ENTRY;
-+
-+ p2m_top[topidx][mididx][idx] = mfn;
-
- return true;
- }
-
--void set_phys_to_machine(unsigned long pfn, unsigned long mfn)
-+bool set_phys_to_machine(unsigned long pfn, unsigned long mfn)
- {
- if (unlikely(xen_feature(XENFEAT_auto_translated_physmap))) {
- BUG_ON(pfn != mfn && mfn != INVALID_P2M_ENTRY);
-- return;
-+ return true;
- }
-
- if (unlikely(!__set_phys_to_machine(pfn, mfn))) {
-- alloc_p2m(pfn);
-+ if (!alloc_p2m(pfn))
-+ return false;
-
- if (!__set_phys_to_machine(pfn, mfn))
-- BUG();
-+ return false;
- }
-+
-+ return true;
- }
-
- unsigned long arbitrary_virt_to_mfn(void *vaddr)
-@@ -315,6 +561,7 @@ unsigned long arbitrary_virt_to_mfn(void *vaddr)
-
- return PFN_DOWN(maddr.maddr);
- }
-+EXPORT_SYMBOL_GPL(set_phys_to_machine);
-
- xmaddr_t arbitrary_virt_to_machine(void *vaddr)
- {
-@@ -345,7 +592,8 @@ void make_lowmem_page_readonly(void *vaddr)
- unsigned int level;
-
- pte = lookup_address(address, &level);
-- BUG_ON(pte == NULL);
-+ if (pte == NULL)
-+ return; /* vaddr missing */
-
- ptev = pte_wrprotect(*pte);
-
-@@ -360,7 +608,8 @@ void make_lowmem_page_readwrite(void *vaddr)
- unsigned int level;
-
- pte = lookup_address(address, &level);
-- BUG_ON(pte == NULL);
-+ if (pte == NULL)
-+ return; /* vaddr missing */
-
- ptev = pte_mkwrite(*pte);
-
-@@ -376,6 +625,24 @@ static bool xen_page_pinned(void *ptr)
- return PagePinned(page);
- }
-
-+void xen_set_domain_pte(pte_t *ptep, pte_t pteval, unsigned domid)
-+{
-+ struct multicall_space mcs;
-+ struct mmu_update *u;
-+
-+ mcs = xen_mc_entry(sizeof(*u));
-+ u = mcs.args;
-+
-+ /* ptep might be kmapped when using 32-bit HIGHPTE */
-+ u->ptr = arbitrary_virt_to_machine(ptep).maddr;
-+ u->val = pte_val_ma(pteval);
-+
-+ MULTI_mmu_update(mcs.mc, mcs.args, 1, NULL, domid);
-+
-+ xen_mc_issue(PARAVIRT_LAZY_MMU);
-+}
-+EXPORT_SYMBOL_GPL(xen_set_domain_pte);
-+
- static void xen_extend_mmu_update(const struct mmu_update *update)
- {
- struct multicall_space mcs;
-@@ -516,7 +783,34 @@ static pteval_t pte_pfn_to_mfn(pteval_t val)
- if (val & _PAGE_PRESENT) {
- unsigned long pfn = (val & PTE_PFN_MASK) >> PAGE_SHIFT;
- pteval_t flags = val & PTE_FLAGS_MASK;
-- val = ((pteval_t)pfn_to_mfn(pfn) << PAGE_SHIFT) | flags;
-+ unsigned long mfn = pfn_to_mfn(pfn);
-+
-+ /*
-+ * If there's no mfn for the pfn, then just create an
-+ * empty non-present pte. Unfortunately this loses
-+ * information about the original pfn, so
-+ * pte_mfn_to_pfn is asymmetric.
-+ */
-+ if (unlikely(mfn == INVALID_P2M_ENTRY)) {
-+ mfn = 0;
-+ flags = 0;
-+ }
-+
-+ val = ((pteval_t)mfn << PAGE_SHIFT) | flags;
-+ }
-+
-+ return val;
-+}
-+
-+static pteval_t iomap_pte(pteval_t val)
-+{
-+ if (val & _PAGE_PRESENT) {
-+ unsigned long pfn = (val & PTE_PFN_MASK) >> PAGE_SHIFT;
-+ pteval_t flags = val & PTE_FLAGS_MASK;
-+
-+ /* We assume the pte frame number is a MFN, so
-+ just use it as-is. */
-+ val = ((pteval_t)pfn << PAGE_SHIFT) | flags;
- }
-
- return val;
-@@ -524,7 +818,18 @@ static pteval_t pte_pfn_to_mfn(pteval_t val)
-
- pteval_t xen_pte_val(pte_t pte)
- {
-- return pte_mfn_to_pfn(pte.pte);
-+ pteval_t pteval = pte.pte;
-+
-+ /* If this is a WC pte, convert back from Xen WC to Linux WC */
-+ if ((pteval & (_PAGE_PAT | _PAGE_PCD | _PAGE_PWT)) == _PAGE_PAT) {
-+ WARN_ON(!pat_enabled);
-+ pteval = (pteval & ~_PAGE_PAT) | _PAGE_PWT;
-+ }
-+
-+ if (xen_initial_domain() && (pteval & _PAGE_IOMAP))
-+ return pteval;
-+
-+ return pte_mfn_to_pfn(pteval);
- }
- PV_CALLEE_SAVE_REGS_THUNK(xen_pte_val);
-
-@@ -534,9 +839,62 @@ pgdval_t xen_pgd_val(pgd_t pgd)
- }
- PV_CALLEE_SAVE_REGS_THUNK(xen_pgd_val);
-
-+/*
-+ * Xen's PAT setup is part of its ABI, though I assume entries 6 & 7
-+ * are reserved for now, to correspond to the Intel-reserved PAT
-+ * types.
-+ *
-+ * We expect Linux's PAT set as follows:
-+ *
-+ * Idx PTE flags Linux Xen Default
-+ * 0 WB WB WB
-+ * 1 PWT WC WT WT
-+ * 2 PCD UC- UC- UC-
-+ * 3 PCD PWT UC UC UC
-+ * 4 PAT WB WC WB
-+ * 5 PAT PWT WC WP WT
-+ * 6 PAT PCD UC- UC UC-
-+ * 7 PAT PCD PWT UC UC UC
-+ */
-+
-+void xen_set_pat(u64 pat)
-+{
-+ /* We expect Linux to use a PAT setting of
-+ * UC UC- WC WB (ignoring the PAT flag) */
-+ WARN_ON(pat != 0x0007010600070106ull);
-+}
-+
- pte_t xen_make_pte(pteval_t pte)
- {
-- pte = pte_pfn_to_mfn(pte);
-+ phys_addr_t addr = (pte & PTE_PFN_MASK);
-+
-+ /* If Linux is trying to set a WC pte, then map to the Xen WC.
-+ * If _PAGE_PAT is set, then it probably means it is really
-+ * _PAGE_PSE, so avoid fiddling with the PAT mapping and hope
-+ * things work out OK...
-+ *
-+ * (We should never see kernel mappings with _PAGE_PSE set,
-+ * but we could see hugetlbfs mappings, I think.).
-+ */
-+ if (pat_enabled && !WARN_ON(pte & _PAGE_PAT)) {
-+ if ((pte & (_PAGE_PCD | _PAGE_PWT)) == _PAGE_PWT)
-+ pte = (pte & ~(_PAGE_PCD | _PAGE_PWT)) | _PAGE_PAT;
-+ }
-+
-+ /*
-+ * Unprivileged domains are allowed to do IOMAPpings for
-+ * PCI passthrough, but not map ISA space. The ISA
-+ * mappings are just dummy local mappings to keep other
-+ * parts of the kernel happy.
-+ */
-+ if (unlikely(pte & _PAGE_IOMAP) &&
-+ (xen_initial_domain() || addr >= ISA_END_ADDRESS)) {
-+ pte = iomap_pte(pte);
-+ } else {
-+ pte &= ~_PAGE_IOMAP;
-+ pte = pte_pfn_to_mfn(pte);
-+ }
-+
- return native_make_pte(pte);
- }
- PV_CALLEE_SAVE_REGS_THUNK(xen_make_pte);
-@@ -934,8 +1292,6 @@ static int xen_pin_page(struct mm_struct *mm, struct page *page,
- read-only, and can be pinned. */
- static void __xen_pgd_pin(struct mm_struct *mm, pgd_t *pgd)
- {
-- vm_unmap_aliases();
--
- xen_mc_batch();
-
- if (__xen_pgd_walk(mm, pgd, xen_pin_page, USER_LIMIT)) {
-@@ -1219,7 +1575,7 @@ void xen_exit_mmap(struct mm_struct *mm)
- spin_lock(&mm->page_table_lock);
-
- /* pgd may not be pinned in the error exit path of execve */
-- if (xen_page_pinned(mm->pgd))
-+ if (xen_page_pinned(mm->pgd) && !mm->context.has_foreign_mappings)
- xen_pgd_unpin(mm);
-
- spin_unlock(&mm->page_table_lock);
-@@ -1288,12 +1644,19 @@ static void xen_flush_tlb_single(unsigned long addr)
- preempt_enable();
- }
-
-+/*
-+ * Flush tlb on other cpus. Xen can do this via a single hypercall
-+ * rather than explicit IPIs, which has the nice property of avoiding
-+ * any cpus which don't actually have dirty tlbs. Unfortunately it
-+ * doesn't give us an opportunity to kick out cpus which are in lazy
-+ * tlb state, so we may end up reflushing some cpus unnecessarily.
-+ */
- static void xen_flush_tlb_others(const struct cpumask *cpus,
- struct mm_struct *mm, unsigned long va)
- {
- struct {
- struct mmuext_op op;
-- DECLARE_BITMAP(mask, NR_CPUS);
-+ DECLARE_BITMAP(mask, num_processors);
- } *args;
- struct multicall_space mcs;
-
-@@ -1417,6 +1780,13 @@ static int xen_pgd_alloc(struct mm_struct *mm)
- return ret;
- }
-
-+void xen_late_unpin_pgd(struct mm_struct *mm, pgd_t *pgd)
-+{
-+ if (xen_page_pinned(pgd))
-+ __xen_pgd_unpin(mm, pgd);
-+
-+}
-+
- static void xen_pgd_free(struct mm_struct *mm, pgd_t *pgd)
- {
- #ifdef CONFIG_X86_64
-@@ -1445,13 +1815,29 @@ static void *xen_kmap_atomic_pte(struct page *page, enum km_type type)
- }
- #endif
-
--#ifdef CONFIG_X86_32
- static __init pte_t mask_rw_pte(pte_t *ptep, pte_t pte)
- {
-- /* If there's an existing pte, then don't allow _PAGE_RW to be set */
-- if (pte_val_ma(*ptep) & _PAGE_PRESENT)
-+ unsigned long pfn = pte_pfn(pte);
-+ pte_t oldpte = *ptep;
-+
-+ if (pte_flags(oldpte) & _PAGE_PRESENT) {
-+ /* Don't allow existing IO mappings to be overridden */
-+ if (pte_flags(oldpte) & _PAGE_IOMAP)
-+ pte = oldpte;
-+
-+ /* Don't allow _PAGE_RW to be set on existing pte */
- pte = __pte_ma(((pte_val_ma(*ptep) & _PAGE_RW) | ~_PAGE_RW) &
- pte_val_ma(pte));
-+ }
-+
-+ /*
-+ * If the new pfn is within the range of the newly allocated
-+ * kernel pagetable, and it isn't being mapped into an
-+ * early_ioremap fixmap slot, make sure it is RO.
-+ */
-+ if (!is_early_ioremap_ptep(ptep) &&
-+ pfn >= e820_table_start && pfn < e820_table_end)
-+ pte = pte_wrprotect(pte);
-
- return pte;
- }
-@@ -1464,7 +1850,6 @@ static __init void xen_set_pte_init(pte_t *ptep, pte_t pte)
-
- xen_set_pte(ptep, pte);
- }
--#endif
-
- static void pin_pagetable_pfn(unsigned cmd, unsigned long pfn)
- {
-@@ -1517,7 +1902,6 @@ static void xen_alloc_ptpage(struct mm_struct *mm, unsigned long pfn, unsigned l
- if (PagePinned(virt_to_page(mm->pgd))) {
- SetPagePinned(page);
-
-- vm_unmap_aliases();
- if (!PageHighMem(page)) {
- make_lowmem_page_readonly(__va(PFN_PHYS((unsigned long)pfn)));
- if (level == PT_PTE && USE_SPLIT_PTLOCKS)
-@@ -1620,6 +2004,7 @@ static void *m2v(phys_addr_t maddr)
- return __ka(m2p(maddr));
- }
-
-+/* Set the page permissions on an identity-mapped pages */
- static void set_page_prot(void *addr, pgprot_t prot)
- {
- unsigned long pfn = __pa(addr) >> PAGE_SHIFT;
-@@ -1635,6 +2020,9 @@ static __init void xen_map_identity_early(pmd_t *pmd, unsigned long max_pfn)
- unsigned ident_pte;
- unsigned long pfn;
-
-+ level1_ident_pgt = extend_brk(sizeof(pte_t) * LEVEL1_IDENT_ENTRIES,
-+ PAGE_SIZE);
-+
- ident_pte = 0;
- pfn = 0;
- for (pmdidx = 0; pmdidx < PTRS_PER_PMD && pfn < max_pfn; pmdidx++) {
-@@ -1645,7 +2033,7 @@ static __init void xen_map_identity_early(pmd_t *pmd, unsigned long max_pfn)
- pte_page = m2v(pmd[pmdidx].pmd);
- else {
- /* Check for free pte pages */
-- if (ident_pte == ARRAY_SIZE(level1_ident_pgt))
-+ if (ident_pte == LEVEL1_IDENT_ENTRIES)
- break;
-
- pte_page = &level1_ident_pgt[ident_pte];
-@@ -1675,6 +2063,20 @@ static __init void xen_map_identity_early(pmd_t *pmd, unsigned long max_pfn)
- set_page_prot(pmd, PAGE_KERNEL_RO);
- }
-
-+void __init xen_setup_machphys_mapping(void)
-+{
-+ struct xen_machphys_mapping mapping;
-+ unsigned long machine_to_phys_nr_ents;
-+
-+ if (HYPERVISOR_memory_op(XENMEM_machphys_mapping, &mapping) == 0) {
-+ machine_to_phys_mapping = (unsigned long *)mapping.v_start;
-+ machine_to_phys_nr_ents = mapping.max_mfn + 1;
-+ } else {
-+ machine_to_phys_nr_ents = MACH2PHYS_NR_ENTRIES;
-+ }
-+ machine_to_phys_order = fls(machine_to_phys_nr_ents - 1);
-+}
-+
- #ifdef CONFIG_X86_64
- static void convert_pfn_mfn(void *v)
- {
-@@ -1760,12 +2162,15 @@ __init pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd,
- return pgd;
- }
- #else /* !CONFIG_X86_64 */
--static pmd_t level2_kernel_pgt[PTRS_PER_PMD] __page_aligned_bss;
-+static RESERVE_BRK_ARRAY(pmd_t, level2_kernel_pgt, PTRS_PER_PMD);
-
- __init pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd,
- unsigned long max_pfn)
- {
- pmd_t *kernel_pmd;
-+ int i;
-+
-+ level2_kernel_pgt = extend_brk(sizeof(pmd_t) * PTRS_PER_PMD, PAGE_SIZE);
-
- max_pfn_mapped = PFN_DOWN(__pa(xen_start_info->pt_base) +
- xen_start_info->nr_pt_frames * PAGE_SIZE +
-@@ -1777,6 +2182,20 @@ __init pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd,
- xen_map_identity_early(level2_kernel_pgt, max_pfn);
-
- memcpy(swapper_pg_dir, pgd, sizeof(pgd_t) * PTRS_PER_PGD);
-+
-+ /*
-+ * When running a 32 bit domain 0 on a 64 bit hypervisor a
-+ * pinned L3 (such as the initial pgd here) contains bits
-+ * which are reserved in the PAE layout but not in the 64 bit
-+ * layout. Unfortunately some versions of the hypervisor
-+ * (incorrectly) validate compat mode guests against the PAE
-+ * layout and hence will not allow such a pagetable to be
-+ * pinned by the guest. Therefore we mask off only the PFN and
-+ * Present bits of the supplied L3.
-+ */
-+ for (i = 0; i < PTRS_PER_PGD; i++)
-+ swapper_pg_dir[i].pgd &= (PTE_PFN_MASK | _PAGE_PRESENT);
-+
- set_pgd(&swapper_pg_dir[KERNEL_PGD_BOUNDARY],
- __pgd(__pa(level2_kernel_pgt) | _PAGE_PRESENT));
-
-@@ -1799,6 +2218,8 @@ __init pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd,
- }
- #endif /* CONFIG_X86_64 */
-
-+static unsigned char dummy_ioapic_mapping[PAGE_SIZE] __page_aligned_bss;
-+
- static void xen_set_fixmap(unsigned idx, phys_addr_t phys, pgprot_t prot)
- {
- pte_t pte;
-@@ -1828,9 +2249,26 @@ static void xen_set_fixmap(unsigned idx, phys_addr_t phys, pgprot_t prot)
- pte = pfn_pte(phys, prot);
- break;
-
-- default:
-+#ifdef CONFIG_X86_IO_APIC
-+ case FIX_IO_APIC_BASE_0 ... FIX_IO_APIC_BASE_END:
-+ /*
-+ * We just don't map the IO APIC - all access is via
-+ * hypercalls. Keep the address in the pte for reference.
-+ */
-+ pte = pfn_pte(PFN_DOWN(__pa(dummy_ioapic_mapping)), PAGE_KERNEL);
-+ break;
-+#endif
-+
-+ case FIX_PARAVIRT_BOOTMAP:
-+ /* This is an MFN, but it isn't an IO mapping from the
-+ IO domain */
- pte = mfn_pte(phys, prot);
- break;
-+
-+ default:
-+ /* By default, set_fixmap is used for hardware mappings */
-+ pte = mfn_pte(phys, __pgprot(pgprot_val(prot) | _PAGE_IOMAP));
-+ break;
- }
-
- __native_set_fixmap(idx, pte);
-@@ -1845,6 +2283,29 @@ static void xen_set_fixmap(unsigned idx, phys_addr_t phys, pgprot_t prot)
- #endif
- }
-
-+__init void xen_ident_map_ISA(void)
-+{
-+ unsigned long pa;
-+
-+ /*
-+ * If we're dom0, then linear map the ISA machine addresses into
-+ * the kernel's address space.
-+ */
-+ if (!xen_initial_domain())
-+ return;
-+
-+ xen_raw_printk("Xen: setup ISA identity maps\n");
-+
-+ for (pa = ISA_START_ADDRESS; pa < ISA_END_ADDRESS; pa += PAGE_SIZE) {
-+ pte_t pte = mfn_pte(PFN_DOWN(pa), PAGE_KERNEL_IO);
-+
-+ if (HYPERVISOR_update_va_mapping(PAGE_OFFSET + pa, pte, 0))
-+ BUG();
-+ }
-+
-+ xen_flush_tlb();
-+}
-+
- static __init void xen_post_allocator_init(void)
- {
- pv_mmu_ops.set_pte = xen_set_pte;
-@@ -1907,11 +2368,7 @@ static const struct pv_mmu_ops xen_mmu_ops __initdata = {
- .kmap_atomic_pte = xen_kmap_atomic_pte,
- #endif
-
--#ifdef CONFIG_X86_64
-- .set_pte = xen_set_pte,
--#else
- .set_pte = xen_set_pte_init,
--#endif
- .set_pte_at = xen_set_pte_at,
- .set_pmd = xen_set_pmd_hyper,
-
-@@ -1960,8 +2417,305 @@ void __init xen_init_mmu_ops(void)
- x86_init.paging.pagetable_setup_start = xen_pagetable_setup_start;
- x86_init.paging.pagetable_setup_done = xen_pagetable_setup_done;
- pv_mmu_ops = xen_mmu_ops;
-+
-+ vmap_lazy_unmap = false;
-+}
-+
-+/* Protected by xen_reservation_lock. */
-+#define MAX_CONTIG_ORDER 9 /* 2MB */
-+static unsigned long discontig_frames[1<<MAX_CONTIG_ORDER];
-+
-+#define VOID_PTE (mfn_pte(0, __pgprot(0)))
-+static void xen_zap_pfn_range(unsigned long vaddr, unsigned int order,
-+ unsigned long *in_frames,
-+ unsigned long *out_frames)
-+{
-+ int i;
-+ struct multicall_space mcs;
-+
-+ xen_mc_batch();
-+ for (i = 0; i < (1UL<<order); i++, vaddr += PAGE_SIZE) {
-+ mcs = __xen_mc_entry(0);
-+
-+ if (in_frames)
-+ in_frames[i] = virt_to_mfn(vaddr);
-+
-+ MULTI_update_va_mapping(mcs.mc, vaddr, VOID_PTE, 0);
-+ set_phys_to_machine(virt_to_pfn(vaddr), INVALID_P2M_ENTRY);
-+
-+ if (out_frames)
-+ out_frames[i] = virt_to_pfn(vaddr);
-+ }
-+ xen_mc_issue(0);
-+}
-+
-+/*
-+ * Update the pfn-to-mfn mappings for a virtual address range, either to
-+ * point to an array of mfns, or contiguously from a single starting
-+ * mfn.
-+ */
-+static void xen_remap_exchanged_ptes(unsigned long vaddr, int order,
-+ unsigned long *mfns,
-+ unsigned long first_mfn)
-+{
-+ unsigned i, limit;
-+ unsigned long mfn;
-+
-+ xen_mc_batch();
-+
-+ limit = 1u << order;
-+ for (i = 0; i < limit; i++, vaddr += PAGE_SIZE) {
-+ struct multicall_space mcs;
-+ unsigned flags;
-+
-+ mcs = __xen_mc_entry(0);
-+ if (mfns)
-+ mfn = mfns[i];
-+ else
-+ mfn = first_mfn + i;
-+
-+ if (i < (limit - 1))
-+ flags = 0;
-+ else {
-+ if (order == 0)
-+ flags = UVMF_INVLPG | UVMF_ALL;
-+ else
-+ flags = UVMF_TLB_FLUSH | UVMF_ALL;
-+ }
-+
-+ MULTI_update_va_mapping(mcs.mc, vaddr,
-+ mfn_pte(mfn, PAGE_KERNEL), flags);
-+
-+ set_phys_to_machine(virt_to_pfn(vaddr), mfn);
-+ }
-+
-+ xen_mc_issue(0);
-+}
-+
-+/*
-+ * Perform the hypercall to exchange a region of our pfns to point to
-+ * memory with the required contiguous alignment. Takes the pfns as
-+ * input, and populates mfns as output.
-+ *
-+ * Returns a success code indicating whether the hypervisor was able to
-+ * satisfy the request or not.
-+ */
-+static int xen_exchange_memory(unsigned long extents_in, unsigned int order_in,
-+ unsigned long *pfns_in,
-+ unsigned long extents_out, unsigned int order_out,
-+ unsigned long *mfns_out,
-+ unsigned int address_bits)
-+{
-+ long rc;
-+ int success;
-+
-+ struct xen_memory_exchange exchange = {
-+ .in = {
-+ .nr_extents = extents_in,
-+ .extent_order = order_in,
-+ .extent_start = pfns_in,
-+ .domid = DOMID_SELF
-+ },
-+ .out = {
-+ .nr_extents = extents_out,
-+ .extent_order = order_out,
-+ .extent_start = mfns_out,
-+ .address_bits = address_bits,
-+ .domid = DOMID_SELF
-+ }
-+ };
-+
-+ BUG_ON(extents_in << order_in != extents_out << order_out);
-+
-+ rc = HYPERVISOR_memory_op(XENMEM_exchange, &exchange);
-+ success = (exchange.nr_exchanged == extents_in);
-+
-+ BUG_ON(!success && ((exchange.nr_exchanged != 0) || (rc == 0)));
-+ BUG_ON(success && (rc != 0));
-+
-+ return success;
- }
-
-+int xen_create_contiguous_region(unsigned long vstart, unsigned int order,
-+ unsigned int address_bits)
-+{
-+ unsigned long *in_frames = discontig_frames, out_frame;
-+ unsigned long flags;
-+ int success;
-+
-+ /*
-+ * Currently an auto-translated guest will not perform I/O, nor will
-+ * it require PAE page directories below 4GB. Therefore any calls to
-+ * this function are redundant and can be ignored.
-+ */
-+
-+ if (xen_feature(XENFEAT_auto_translated_physmap))
-+ return 0;
-+
-+ if (unlikely(order > MAX_CONTIG_ORDER))
-+ return -ENOMEM;
-+
-+ memset((void *) vstart, 0, PAGE_SIZE << order);
-+
-+ spin_lock_irqsave(&xen_reservation_lock, flags);
-+
-+ /* 1. Zap current PTEs, remembering MFNs. */
-+ xen_zap_pfn_range(vstart, order, in_frames, NULL);
-+
-+ /* 2. Get a new contiguous memory extent. */
-+ out_frame = virt_to_pfn(vstart);
-+ success = xen_exchange_memory(1UL << order, 0, in_frames,
-+ 1, order, &out_frame,
-+ address_bits);
-+
-+ /* 3. Map the new extent in place of old pages. */
-+ if (success)
-+ xen_remap_exchanged_ptes(vstart, order, NULL, out_frame);
-+ else
-+ xen_remap_exchanged_ptes(vstart, order, in_frames, 0);
-+
-+ spin_unlock_irqrestore(&xen_reservation_lock, flags);
-+
-+ return success ? 0 : -ENOMEM;
-+}
-+EXPORT_SYMBOL_GPL(xen_create_contiguous_region);
-+
-+void xen_destroy_contiguous_region(unsigned long vstart, unsigned int order)
-+{
-+ unsigned long *out_frames = discontig_frames, in_frame;
-+ unsigned long flags;
-+ int success;
-+
-+ if (xen_feature(XENFEAT_auto_translated_physmap))
-+ return;
-+
-+ if (unlikely(order > MAX_CONTIG_ORDER))
-+ return;
-+
-+ memset((void *) vstart, 0, PAGE_SIZE << order);
-+
-+ spin_lock_irqsave(&xen_reservation_lock, flags);
-+
-+ /* 1. Find start MFN of contiguous extent. */
-+ in_frame = virt_to_mfn(vstart);
-+
-+ /* 2. Zap current PTEs. */
-+ xen_zap_pfn_range(vstart, order, NULL, out_frames);
-+
-+ /* 3. Do the exchange for non-contiguous MFNs. */
-+ success = xen_exchange_memory(1, order, &in_frame, 1UL << order,
-+ 0, out_frames, 0);
-+
-+ /* 4. Map new pages in place of old pages. */
-+ if (success)
-+ xen_remap_exchanged_ptes(vstart, order, out_frames, 0);
-+ else
-+ xen_remap_exchanged_ptes(vstart, order, NULL, in_frame);
-+
-+ spin_unlock_irqrestore(&xen_reservation_lock, flags);
-+}
-+EXPORT_SYMBOL_GPL(xen_destroy_contiguous_region);
-+
-+#define REMAP_BATCH_SIZE 16
-+
-+struct remap_data {
-+ unsigned long mfn;
-+ pgprot_t prot;
-+ struct mmu_update *mmu_update;
-+};
-+
-+static int remap_area_mfn_pte_fn(pte_t *ptep, pgtable_t token,
-+ unsigned long addr, void *data)
-+{
-+ struct remap_data *rmd = data;
-+ pte_t pte = pte_mkspecial(pfn_pte(rmd->mfn++, rmd->prot));
-+
-+ rmd->mmu_update->ptr = arbitrary_virt_to_machine(ptep).maddr;
-+ rmd->mmu_update->val = pte_val_ma(pte);
-+ rmd->mmu_update++;
-+
-+ return 0;
-+}
-+
-+int xen_remap_domain_mfn_range(struct vm_area_struct *vma,
-+ unsigned long addr,
-+ unsigned long mfn, int nr,
-+ pgprot_t prot, unsigned domid)
-+{
-+ struct remap_data rmd;
-+ struct mmu_update mmu_update[REMAP_BATCH_SIZE];
-+ int batch;
-+ unsigned long range;
-+ int err = 0;
-+
-+ prot = __pgprot(pgprot_val(prot) | _PAGE_IOMAP);
-+
-+ vma->vm_flags |= VM_IO | VM_RESERVED | VM_PFNMAP;
-+
-+ rmd.mfn = mfn;
-+ rmd.prot = prot;
-+
-+ while (nr) {
-+ batch = min(REMAP_BATCH_SIZE, nr);
-+ range = (unsigned long)batch << PAGE_SHIFT;
-+
-+ rmd.mmu_update = mmu_update;
-+ err = apply_to_page_range(vma->vm_mm, addr, range,
-+ remap_area_mfn_pte_fn, &rmd);
-+ if (err)
-+ goto out;
-+
-+ err = -EFAULT;
-+ if (HYPERVISOR_mmu_update(mmu_update, batch, NULL, domid) < 0)
-+ goto out;
-+
-+ nr -= batch;
-+ addr += range;
-+ }
-+
-+ err = 0;
-+out:
-+
-+ flush_tlb_all();
-+
-+ return err;
-+}
-+EXPORT_SYMBOL_GPL(xen_remap_domain_mfn_range);
-+
-+#ifdef CONFIG_XEN_PVHVM
-+static void xen_hvm_exit_mmap(struct mm_struct *mm)
-+{
-+ struct xen_hvm_pagetable_dying a;
-+ int rc;
-+
-+ a.domid = DOMID_SELF;
-+ a.gpa = __pa(mm->pgd);
-+ rc = HYPERVISOR_hvm_op(HVMOP_pagetable_dying, &a);
-+ WARN_ON_ONCE(rc < 0);
-+}
-+
-+static int is_pagetable_dying_supported(void)
-+{
-+ struct xen_hvm_pagetable_dying a;
-+ int rc = 0;
-+
-+ a.domid = DOMID_SELF;
-+ a.gpa = 0x00;
-+ rc = HYPERVISOR_hvm_op(HVMOP_pagetable_dying, &a);
-+ if (rc < 0) {
-+ printk(KERN_DEBUG "HVMOP_pagetable_dying not supported\n");
-+ return 0;
-+ }
-+ return 1;
-+}
-+
-+void __init xen_hvm_init_mmu_ops(void)
-+{
-+ if (is_pagetable_dying_supported())
-+ pv_mmu_ops.exit_mmap = xen_hvm_exit_mmap;
-+}
-+#endif
-+
- #ifdef CONFIG_XEN_DEBUG_FS
-
- static struct dentry *d_mmu_debug;
-diff --git a/arch/x86/xen/mmu.h b/arch/x86/xen/mmu.h
-index 5fe6bc7..537bb9a 100644
---- a/arch/x86/xen/mmu.h
-+++ b/arch/x86/xen/mmu.h
-@@ -12,7 +12,6 @@ enum pt_level {
-
-
- bool __set_phys_to_machine(unsigned long pfn, unsigned long mfn);
--bool install_p2mtop_page(unsigned long pfn, unsigned long *p);
-
- void set_pte_mfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags);
-
-@@ -60,4 +59,5 @@ void xen_ptep_modify_prot_commit(struct mm_struct *mm, unsigned long addr,
- unsigned long xen_read_cr2_direct(void);
-
- extern void xen_init_mmu_ops(void);
-+extern void xen_hvm_init_mmu_ops(void);
- #endif /* _XEN_MMU_H */
-diff --git a/arch/x86/xen/pci-swiotlb-xen.c b/arch/x86/xen/pci-swiotlb-xen.c
-new file mode 100644
-index 0000000..4d55524
---- /dev/null
-+++ b/arch/x86/xen/pci-swiotlb-xen.c
-@@ -0,0 +1,52 @@
-+/* Glue code to lib/swiotlb-xen.c */
-+
-+#include <linux/dma-mapping.h>
-+#include <linux/swiotlb.h>
-+
-+#include <asm/xen/hypervisor.h>
-+
-+int xen_swiotlb __read_mostly;
-+
-+static struct dma_map_ops xen_swiotlb_dma_ops = {
-+ .mapping_error = xen_swiotlb_dma_mapping_error,
-+ .alloc_coherent = xen_swiotlb_alloc_coherent,
-+ .free_coherent = xen_swiotlb_free_coherent,
-+ .sync_single_for_cpu = xen_swiotlb_sync_single_for_cpu,
-+ .sync_single_for_device = xen_swiotlb_sync_single_for_device,
-+ .sync_single_range_for_cpu = xen_swiotlb_sync_single_range_for_cpu,
-+ .sync_single_range_for_device = xen_swiotlb_sync_single_range_for_device,
-+ .sync_sg_for_cpu = xen_swiotlb_sync_sg_for_cpu,
-+ .sync_sg_for_device = xen_swiotlb_sync_sg_for_device,
-+ .map_sg = xen_swiotlb_map_sg_attrs,
-+ .unmap_sg = xen_swiotlb_unmap_sg_attrs,
-+ .map_page = xen_swiotlb_map_page,
-+ .unmap_page = xen_swiotlb_unmap_page,
-+ .dma_supported = xen_swiotlb_dma_supported,
-+};
-+
-+/*
-+ * pci_swiotlb_detect - set swiotlb to 1 if necessary
-+ *
-+ * This returns non-zero if we are forced to use swiotlb (by the boot
-+ * option).
-+ */
-+int __init pci_xen_swiotlb_detect(void)
-+{
-+
-+ if (xen_pv_domain() && (xen_initial_domain() || swiotlb))
-+ xen_swiotlb = 1;
-+
-+ /* If we are running under Xen, we MUST disable the native SWIOTLB */
-+ if (xen_pv_domain())
-+ swiotlb = 0;
-+
-+ return xen_swiotlb;
-+}
-+
-+void __init pci_xen_swiotlb_init(void)
-+{
-+ if (xen_swiotlb) {
-+ xen_swiotlb_init(1);
-+ dma_ops = &xen_swiotlb_dma_ops;
-+ }
-+}
-diff --git a/arch/x86/xen/pci.c b/arch/x86/xen/pci.c
-new file mode 100644
-index 0000000..8ca31f1
---- /dev/null
-+++ b/arch/x86/xen/pci.c
-@@ -0,0 +1,296 @@
-+#include <linux/kernel.h>
-+#include <linux/acpi.h>
-+#include <linux/pci.h>
-+#include <linux/msi.h>
-+#include <linux/slab.h>
-+
-+#include <asm/mpspec.h>
-+#include <asm/io_apic.h>
-+#include <asm/pci_x86.h>
-+
-+#include <asm/xen/hypervisor.h>
-+#include <asm/xen/pci.h>
-+
-+#include <xen/interface/xen.h>
-+#include <xen/events.h>
-+
-+#include "xen-ops.h"
-+
-+int xen_register_pirq(u32 gsi, int triggering)
-+{
-+ int rc, irq;
-+ struct physdev_map_pirq map_irq;
-+ int shareable = 0;
-+ char *name;
-+
-+ if (!xen_pv_domain())
-+ return -1;
-+
-+ if (triggering == ACPI_EDGE_SENSITIVE) {
-+ shareable = 0;
-+ name = "ioapic-edge";
-+ } else {
-+ shareable = 1;
-+ name = "ioapic-level";
-+ }
-+
-+ irq = xen_allocate_pirq(gsi, shareable, name);
-+
-+ printk(KERN_DEBUG "xen: --> irq=%d\n", irq);
-+
-+ if (irq < 0)
-+ goto out;
-+
-+ map_irq.domid = DOMID_SELF;
-+ map_irq.type = MAP_PIRQ_TYPE_GSI;
-+ map_irq.index = gsi;
-+ map_irq.pirq = irq;
-+
-+ rc = HYPERVISOR_physdev_op(PHYSDEVOP_map_pirq, &map_irq);
-+ if (rc) {
-+ printk(KERN_WARNING "xen map irq failed %d\n", rc);
-+ return -1;
-+ }
-+
-+out:
-+ return irq;
-+}
-+
-+int xen_register_gsi(u32 gsi, int triggering, int polarity)
-+{
-+ int rc, irq;
-+ struct physdev_setup_gsi setup_gsi;
-+
-+ if (!xen_pv_domain())
-+ return -1;
-+
-+ printk(KERN_DEBUG "xen: registering gsi %u triggering %d polarity %d\n",
-+ gsi, triggering, polarity);
-+
-+ irq = xen_register_pirq(gsi, triggering);
-+
-+ setup_gsi.gsi = gsi;
-+ setup_gsi.triggering = (triggering == ACPI_EDGE_SENSITIVE ? 0 : 1);
-+ setup_gsi.polarity = (polarity == ACPI_ACTIVE_HIGH ? 0 : 1);
-+
-+ rc = HYPERVISOR_physdev_op(PHYSDEVOP_setup_gsi, &setup_gsi);
-+ if (rc == -EEXIST)
-+ printk(KERN_INFO "Already setup the GSI :%d\n", gsi);
-+ else if (rc) {
-+ printk(KERN_ERR "Failed to setup GSI :%d, err_code:%d\n",
-+ gsi, rc);
-+ }
-+
-+ return irq;
-+}
-+
-+#ifdef CONFIG_ACPI
-+#define BAD_MADT_ENTRY(entry, end) ( \
-+ (!entry) || (unsigned long)entry + sizeof(*entry) > end || \
-+ ((struct acpi_subtable_header *)entry)->length < sizeof(*entry))
-+
-+
-+static int __init
-+xen_acpi_parse_int_src_ovr(struct acpi_subtable_header * header,
-+ const unsigned long end)
-+{
-+ struct acpi_madt_interrupt_override *intsrc = NULL;
-+
-+ intsrc = (struct acpi_madt_interrupt_override *)header;
-+
-+ if (BAD_MADT_ENTRY(intsrc, end))
-+ return -EINVAL;
-+
-+ acpi_table_print_madt_entry(header);
-+
-+ if (intsrc->source_irq == acpi_gbl_FADT.sci_interrupt) {
-+ int gsi;
-+ int trigger, polarity;
-+
-+ trigger = intsrc->inti_flags & ACPI_MADT_TRIGGER_MASK;
-+ polarity = intsrc->inti_flags & ACPI_MADT_POLARITY_MASK;
-+
-+ /* Command-line over-ride via acpi_sci= */
-+ if (acpi_sci_flags & ACPI_MADT_TRIGGER_MASK)
-+ trigger = acpi_sci_flags & ACPI_MADT_TRIGGER_MASK;
-+
-+ if (acpi_sci_flags & ACPI_MADT_POLARITY_MASK)
-+ polarity = acpi_sci_flags & ACPI_MADT_POLARITY_MASK;
-+
-+ printk("xen: sci override: source_irq=%d global_irq=%d trigger=%x polarity=%x\n",
-+ intsrc->source_irq, intsrc->global_irq,
-+ trigger, polarity);
-+
-+ switch (polarity) {
-+ case ACPI_MADT_POLARITY_CONFORMS:
-+ case ACPI_MADT_POLARITY_ACTIVE_LOW:
-+ polarity = ACPI_ACTIVE_LOW;
-+ break;
-+
-+ case ACPI_MADT_POLARITY_ACTIVE_HIGH:
-+ polarity = ACPI_ACTIVE_HIGH;
-+ break;
-+
-+ default:
-+ return 0;
-+ }
-+
-+ switch (trigger) {
-+ case ACPI_MADT_TRIGGER_CONFORMS:
-+ case ACPI_MADT_TRIGGER_LEVEL:
-+ trigger = ACPI_LEVEL_SENSITIVE;
-+ break;
-+
-+ case ACPI_MADT_TRIGGER_EDGE:
-+ trigger = ACPI_EDGE_SENSITIVE;
-+ break;
-+
-+ default:
-+ return 0;
-+ }
-+
-+ gsi = xen_register_gsi(intsrc->global_irq,
-+ trigger, polarity);
-+ /*
-+ * stash over-ride to indicate we've been here
-+ * and for later update of acpi_gbl_FADT
-+ */
-+ acpi_sci_override_gsi = gsi;
-+
-+ printk("xen: acpi sci %d\n", gsi);
-+ }
-+
-+ return 0;
-+}
-+
-+static __init void xen_setup_acpi_sci(void)
-+{
-+ acpi_table_parse_madt(ACPI_MADT_TYPE_INTERRUPT_OVERRIDE,
-+ xen_acpi_parse_int_src_ovr,
-+ nr_irqs);
-+}
-+#else
-+static __init void xen_setup_acpi_sci(void)
-+{
-+}
-+#endif
-+
-+void __init xen_setup_pirqs(void)
-+{
-+ int irq;
-+
-+ if (0 == nr_ioapics) {
-+ for (irq = 0; irq < NR_IRQS_LEGACY; irq++)
-+ xen_allocate_pirq(irq, 0, "xt-pic");
-+ return;
-+ }
-+
-+ /* Pre-allocate legacy irqs */
-+ for (irq = 0; irq < NR_IRQS_LEGACY; irq++) {
-+ int trigger, polarity;
-+
-+ if (acpi_get_override_irq(irq, &trigger, &polarity) == -1)
-+ continue;
-+
-+ xen_register_pirq(irq,
-+ trigger ? ACPI_LEVEL_SENSITIVE : ACPI_EDGE_SENSITIVE);
-+ }
-+
-+ xen_setup_acpi_sci();
-+}
-+
-+#ifdef CONFIG_PCI_MSI
-+int xen_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
-+{
-+ int irq, ret;
-+ struct msi_desc *msidesc;
-+
-+ list_for_each_entry(msidesc, &dev->msi_list, list) {
-+ irq = xen_create_msi_irq(dev, msidesc, type);
-+ if (irq < 0)
-+ return -1;
-+
-+ ret = set_irq_msi(irq, msidesc);
-+ if (ret)
-+ goto error;
-+ }
-+ return 0;
-+
-+error:
-+ xen_destroy_irq(irq);
-+ return ret;
-+}
-+#endif
-+
-+struct xen_device_domain_owner {
-+ domid_t domain;
-+ struct pci_dev *dev;
-+ struct list_head list;
-+};
-+
-+static DEFINE_SPINLOCK(dev_domain_list_spinlock);
-+static struct list_head dev_domain_list = LIST_HEAD_INIT(dev_domain_list);
-+
-+static struct xen_device_domain_owner *find_device(struct pci_dev *dev)
-+{
-+ struct xen_device_domain_owner *owner;
-+
-+ list_for_each_entry(owner, &dev_domain_list, list) {
-+ if (owner->dev == dev)
-+ return owner;
-+ }
-+ return NULL;
-+}
-+
-+int xen_find_device_domain_owner(struct pci_dev *dev)
-+{
-+ struct xen_device_domain_owner *owner;
-+ int domain = -ENODEV;
-+
-+ spin_lock(&dev_domain_list_spinlock);
-+ owner = find_device(dev);
-+ if (owner)
-+ domain = owner->domain;
-+ spin_unlock(&dev_domain_list_spinlock);
-+ return domain;
-+}
-+EXPORT_SYMBOL(xen_find_device_domain_owner);
-+
-+int xen_register_device_domain_owner(struct pci_dev *dev, uint16_t domain)
-+{
-+ struct xen_device_domain_owner *owner;
-+
-+ owner = kzalloc(sizeof(struct xen_device_domain_owner), GFP_KERNEL);
-+ if (!owner)
-+ return -ENODEV;
-+
-+ spin_lock(&dev_domain_list_spinlock);
-+ if (find_device(dev)) {
-+ spin_unlock(&dev_domain_list_spinlock);
-+ kfree(owner);
-+ return -EEXIST;
-+ }
-+ owner->domain = domain;
-+ owner->dev = dev;
-+ list_add_tail(&owner->list, &dev_domain_list);
-+ spin_unlock(&dev_domain_list_spinlock);
-+ return 0;
-+}
-+EXPORT_SYMBOL(xen_register_device_domain_owner);
-+
-+int xen_unregister_device_domain_owner(struct pci_dev *dev)
-+{
-+ struct xen_device_domain_owner *owner;
-+
-+ spin_lock(&dev_domain_list_spinlock);
-+ owner = find_device(dev);
-+ if (!owner) {
-+ spin_unlock(&dev_domain_list_spinlock);
-+ return -ENODEV;
-+ }
-+ list_del(&owner->list);
-+ spin_unlock(&dev_domain_list_spinlock);
-+ kfree(owner);
-+ return 0;
-+}
-+EXPORT_SYMBOL(xen_unregister_device_domain_owner);
-diff --git a/arch/x86/xen/platform-pci-unplug.c b/arch/x86/xen/platform-pci-unplug.c
-new file mode 100644
-index 0000000..0f45638
---- /dev/null
-+++ b/arch/x86/xen/platform-pci-unplug.c
-@@ -0,0 +1,143 @@
-+/******************************************************************************
-+ * platform-pci-unplug.c
-+ *
-+ * Xen platform PCI device driver
-+ * Copyright (c) 2010, Citrix
-+ *
-+ * This program is free software; you can redistribute it and/or modify it
-+ * under the terms and conditions of the GNU General Public License,
-+ * version 2, as published by the Free Software Foundation.
-+ *
-+ * This program is distributed in the hope it will be useful, but WITHOUT
-+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
-+ * more details.
-+ *
-+ * You should have received a copy of the GNU General Public License along with
-+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-+ * Place - Suite 330, Boston, MA 02111-1307 USA.
-+ *
-+ */
-+
-+#include <linux/init.h>
-+#include <linux/io.h>
-+#include <linux/module.h>
-+
-+#include <xen/platform_pci.h>
-+
-+#define XEN_PLATFORM_ERR_MAGIC -1
-+#define XEN_PLATFORM_ERR_PROTOCOL -2
-+#define XEN_PLATFORM_ERR_BLACKLIST -3
-+
-+/* store the value of xen_emul_unplug after the unplug is done */
-+int xen_platform_pci_unplug;
-+EXPORT_SYMBOL_GPL(xen_platform_pci_unplug);
-+#ifdef CONFIG_XEN_PVHVM
-+static int xen_emul_unplug;
-+
-+static int __init check_platform_magic(void)
-+{
-+ short magic;
-+ char protocol;
-+
-+ magic = inw(XEN_IOPORT_MAGIC);
-+ if (magic != XEN_IOPORT_MAGIC_VAL) {
-+ printk(KERN_ERR "Xen Platform PCI: unrecognised magic value\n");
-+ return XEN_PLATFORM_ERR_MAGIC;
-+ }
-+
-+ protocol = inb(XEN_IOPORT_PROTOVER);
-+
-+ printk(KERN_DEBUG "Xen Platform PCI: I/O protocol version %d\n",
-+ protocol);
-+
-+ switch (protocol) {
-+ case 1:
-+ outw(XEN_IOPORT_LINUX_PRODNUM, XEN_IOPORT_PRODNUM);
-+ outl(XEN_IOPORT_LINUX_DRVVER, XEN_IOPORT_DRVVER);
-+ if (inw(XEN_IOPORT_MAGIC) != XEN_IOPORT_MAGIC_VAL) {
-+ printk(KERN_ERR "Xen Platform: blacklisted by host\n");
-+ return XEN_PLATFORM_ERR_BLACKLIST;
-+ }
-+ break;
-+ default:
-+ printk(KERN_WARNING "Xen Platform PCI: unknown I/O protocol version");
-+ return XEN_PLATFORM_ERR_PROTOCOL;
-+ }
-+
-+ return 0;
-+}
-+
-+void __init xen_unplug_emulated_devices(void)
-+{
-+ int r;
-+
-+ /* user explicitly requested no unplug */
-+ if (xen_emul_unplug & XEN_UNPLUG_NEVER)
-+ return;
-+ /* check the version of the xen platform PCI device */
-+ r = check_platform_magic();
-+ /* If the version matches enable the Xen platform PCI driver.
-+ * Also enable the Xen platform PCI driver if the host does
-+ * not support the unplug protocol (XEN_PLATFORM_ERR_MAGIC)
-+ * but the user told us that unplugging is unnecessary. */
-+ if (r && !(r == XEN_PLATFORM_ERR_MAGIC &&
-+ (xen_emul_unplug & XEN_UNPLUG_UNNECESSARY)))
-+ return;
-+ /* Set the default value of xen_emul_unplug depending on whether or
-+ * not the Xen PV frontends and the Xen platform PCI driver have
-+ * been compiled for this kernel (modules or built-in are both OK). */
-+ if (!xen_emul_unplug) {
-+ if (xen_must_unplug_nics()) {
-+ printk(KERN_INFO "Netfront and the Xen platform PCI driver have "
-+ "been compiled for this kernel: unplug emulated NICs.\n");
-+ xen_emul_unplug |= XEN_UNPLUG_ALL_NICS;
-+ }
-+ if (xen_must_unplug_disks()) {
-+ printk(KERN_INFO "Blkfront and the Xen platform PCI driver have "
-+ "been compiled for this kernel: unplug emulated disks.\n"
-+ "You might have to change the root device\n"
-+ "from /dev/hd[a-d] to /dev/xvd[a-d]\n"
-+ "in your root= kernel command line option\n");
-+ xen_emul_unplug |= XEN_UNPLUG_ALL_IDE_DISKS;
-+ }
-+ }
-+ /* Now unplug the emulated devices */
-+ if (!(xen_emul_unplug & XEN_UNPLUG_UNNECESSARY))
-+ outw(xen_emul_unplug, XEN_IOPORT_UNPLUG);
-+ xen_platform_pci_unplug = xen_emul_unplug;
-+}
-+
-+static int __init parse_xen_emul_unplug(char *arg)
-+{
-+ char *p, *q;
-+ int l;
-+
-+ for (p = arg; p; p = q) {
-+ q = strchr(p, ',');
-+ if (q) {
-+ l = q - p;
-+ q++;
-+ } else {
-+ l = strlen(p);
-+ }
-+ if (!strncmp(p, "all", l))
-+ xen_emul_unplug |= XEN_UNPLUG_ALL;
-+ else if (!strncmp(p, "ide-disks", l))
-+ xen_emul_unplug |= XEN_UNPLUG_ALL_IDE_DISKS;
-+ else if (!strncmp(p, "aux-ide-disks", l))
-+ xen_emul_unplug |= XEN_UNPLUG_AUX_IDE_DISKS;
-+ else if (!strncmp(p, "nics", l))
-+ xen_emul_unplug |= XEN_UNPLUG_ALL_NICS;
-+ else if (!strncmp(p, "unnecessary", l))
-+ xen_emul_unplug |= XEN_UNPLUG_UNNECESSARY;
-+ else if (!strncmp(p, "never", l))
-+ xen_emul_unplug |= XEN_UNPLUG_NEVER;
-+ else
-+ printk(KERN_WARNING "unrecognised option '%s' "
-+ "in parameter 'xen_emul_unplug'\n", p);
-+ }
-+ return 0;
-+}
-+early_param("xen_emul_unplug", parse_xen_emul_unplug);
-+#endif
-diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c
-index ad0047f..1a1934a 100644
---- a/arch/x86/xen/setup.c
-+++ b/arch/x86/xen/setup.c
-@@ -10,6 +10,7 @@
- #include <linux/pm.h>
-
- #include <asm/elf.h>
-+#include <asm/hpet.h>
- #include <asm/vdso.h>
- #include <asm/e820.h>
- #include <asm/setup.h>
-@@ -19,7 +20,9 @@
-
- #include <xen/page.h>
- #include <xen/interface/callback.h>
-+#include <xen/interface/memory.h>
- #include <xen/interface/physdev.h>
-+#include <xen/interface/memory.h>
- #include <xen/features.h>
-
- #include "xen-ops.h"
-@@ -32,25 +35,178 @@ extern void xen_sysenter_target(void);
- extern void xen_syscall_target(void);
- extern void xen_syscall32_target(void);
-
-+/* Amount of extra memory space we add to the e820 ranges */
-+phys_addr_t xen_extra_mem_start, xen_extra_mem_size;
-+
-+/*
-+ * The maximum amount of extra memory compared to the base size. The
-+ * main scaling factor is the size of struct page. At extreme ratios
-+ * of base:extra, all the base memory can be filled with page
-+ * structures for the extra memory, leaving no space for anything
-+ * else.
-+ *
-+ * 10x seems like a reasonable balance between scaling flexibility and
-+ * leaving a practically usable system.
-+ */
-+#define EXTRA_MEM_RATIO (10)
-+
-+static __init void xen_add_extra_mem(unsigned long pages)
-+{
-+ u64 size = (u64)pages * PAGE_SIZE;
-+ u64 extra_start = xen_extra_mem_start + xen_extra_mem_size;
-+
-+ if (!pages)
-+ return;
-+
-+ e820_add_region(extra_start, size, E820_RAM);
-+ sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map);
-+
-+ reserve_early(extra_start, extra_start + size, "XEN EXTRA");
-+
-+ xen_extra_mem_size += size;
-+
-+ xen_max_p2m_pfn = PFN_DOWN(extra_start + size);
-+}
-+
-+static unsigned long __init xen_release_chunk(phys_addr_t start_addr,
-+ phys_addr_t end_addr)
-+{
-+ struct xen_memory_reservation reservation = {
-+ .address_bits = 0,
-+ .extent_order = 0,
-+ .domid = DOMID_SELF
-+ };
-+ unsigned long start, end;
-+ unsigned long len = 0;
-+ unsigned long pfn;
-+ int ret;
-+
-+ start = PFN_UP(start_addr);
-+ end = PFN_DOWN(end_addr);
-+
-+ if (end <= start)
-+ return 0;
-+
-+ printk(KERN_INFO "xen_release_chunk: looking at area pfn %lx-%lx: ",
-+ start, end);
-+ for(pfn = start; pfn < end; pfn++) {
-+ unsigned long mfn = pfn_to_mfn(pfn);
-+
-+ /* Make sure pfn exists to start with */
-+ if (mfn == INVALID_P2M_ENTRY || mfn_to_pfn(mfn) != pfn)
-+ continue;
-+
-+ set_xen_guest_handle(reservation.extent_start, &mfn);
-+ reservation.nr_extents = 1;
-+
-+ ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation,
-+ &reservation);
-+ WARN(ret != 1, "Failed to release memory %lx-%lx err=%d\n",
-+ start, end, ret);
-+ if (ret == 1) {
-+ set_phys_to_machine(pfn, INVALID_P2M_ENTRY);
-+ len++;
-+ }
-+ }
-+ printk(KERN_CONT "%ld pages freed\n", len);
-+
-+ return len;
-+}
-+
-+static unsigned long __init xen_return_unused_memory(unsigned long max_pfn,
-+ const struct e820map *e820)
-+{
-+ phys_addr_t max_addr = PFN_PHYS(max_pfn);
-+ phys_addr_t last_end = ISA_END_ADDRESS;
-+ unsigned long released = 0;
-+ int i;
-+
-+ /* Free any unused memory above the low 1Mbyte. */
-+ for (i = 0; i < e820->nr_map && last_end < max_addr; i++) {
-+ phys_addr_t end = e820->map[i].addr;
-+ end = min(max_addr, end);
-+
-+ if (last_end < end)
-+ released += xen_release_chunk(last_end, end);
-+ last_end = max(last_end, e820->map[i].addr + e820->map[i].size);
-+ }
-+
-+ if (last_end < max_addr)
-+ released += xen_release_chunk(last_end, max_addr);
-+
-+ printk(KERN_INFO "released %ld pages of unused memory\n", released);
-+ return released;
-+}
-
- /**
- * machine_specific_memory_setup - Hook for machine specific memory setup.
- **/
--
- char * __init xen_memory_setup(void)
- {
-+ static struct e820entry map[E820MAX] __initdata;
-+
- unsigned long max_pfn = xen_start_info->nr_pages;
-+ unsigned long long mem_end;
-+ int rc;
-+ struct xen_memory_map memmap;
-+ unsigned long extra_pages = 0;
-+ unsigned long extra_limit;
-+ int op;
-+ int i;
-
- max_pfn = min(MAX_DOMAIN_PAGES, max_pfn);
-+ mem_end = PFN_PHYS(max_pfn);
-+
-+ memmap.nr_entries = E820MAX;
-+ set_xen_guest_handle(memmap.buffer, map);
-+
-+ op = xen_initial_domain() ?
-+ XENMEM_machine_memory_map :
-+ XENMEM_memory_map;
-+ rc = HYPERVISOR_memory_op(op, &memmap);
-+ if (rc == -ENOSYS) {
-+ BUG_ON(xen_initial_domain());
-+ memmap.nr_entries = 1;
-+ map[0].addr = 0ULL;
-+ map[0].size = mem_end;
-+ /* 8MB slack (to balance backend allocations). */
-+ map[0].size += 8ULL << 20;
-+ map[0].type = E820_RAM;
-+ rc = 0;
-+ }
-+ BUG_ON(rc);
-
- e820.nr_map = 0;
-+ xen_extra_mem_start = mem_end;
-+ for (i = 0; i < memmap.nr_entries; i++) {
-+ unsigned long long end = map[i].addr + map[i].size;
-+
-+ if (map[i].type == E820_RAM && end > mem_end) {
-+ /* RAM off the end - may be partially included */
-+ u64 delta = min(map[i].size, end - mem_end);
-
-- e820_add_region(0, PFN_PHYS((u64)max_pfn), E820_RAM);
-+ map[i].size -= delta;
-+ end -= delta;
-+
-+ extra_pages += PFN_DOWN(delta);
-+ }
-+
-+ if (map[i].size > 0 && end > xen_extra_mem_start)
-+ xen_extra_mem_start = end;
-+
-+ /* Add region if any remains */
-+ if (map[i].size > 0)
-+ e820_add_region(map[i].addr, map[i].size, map[i].type);
-+ }
-
- /*
-- * Even though this is normal, usable memory under Xen, reserve
-- * ISA memory anyway because too many things think they can poke
-+ * In domU, the ISA region is normal, usable memory, but we
-+ * reserve ISA memory anyway because too many things poke
- * about in there.
-+ *
-+ * In Dom0, the host E820 information can leave gaps in the
-+ * ISA range, which would cause us to release those pages. To
-+ * avoid this, we unconditionally reserve them here.
- */
- e820_add_region(ISA_START_ADDRESS, ISA_END_ADDRESS - ISA_START_ADDRESS,
- E820_RESERVED);
-@@ -67,21 +223,30 @@ char * __init xen_memory_setup(void)
-
- sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map);
-
-- return "Xen";
--}
-+ extra_pages += xen_return_unused_memory(xen_start_info->nr_pages, &e820);
-
--static void xen_idle(void)
--{
-- local_irq_disable();
--
-- if (need_resched())
-- local_irq_enable();
-- else {
-- current_thread_info()->status &= ~TS_POLLING;
-- smp_mb__after_clear_bit();
-- safe_halt();
-- current_thread_info()->status |= TS_POLLING;
-- }
-+ /*
-+ * Clamp the amount of extra memory to a EXTRA_MEM_RATIO
-+ * factor the base size. On non-highmem systems, the base
-+ * size is the full initial memory allocation; on highmem it
-+ * is limited to the max size of lowmem, so that it doesn't
-+ * get completely filled.
-+ *
-+ * In principle there could be a problem in lowmem systems if
-+ * the initial memory is also very large with respect to
-+ * lowmem, but we won't try to deal with that here.
-+ */
-+ extra_limit = min(EXTRA_MEM_RATIO * min(max_pfn, PFN_DOWN(MAXMEM)),
-+ max_pfn + extra_pages);
-+
-+ if (extra_limit >= max_pfn)
-+ extra_pages = extra_limit - max_pfn;
-+ else
-+ extra_pages = 0;
-+
-+ xen_add_extra_mem(extra_pages);
-+
-+ return "Xen";
- }
-
- /*
-@@ -156,6 +321,8 @@ void __init xen_arch_setup(void)
- struct physdev_set_iopl set_iopl;
- int rc;
-
-+ xen_panic_handler_init();
-+
- HYPERVISOR_vm_assist(VMASST_CMD_enable, VMASST_TYPE_4gb_segments);
- HYPERVISOR_vm_assist(VMASST_CMD_enable, VMASST_TYPE_writable_pagetables);
-
-@@ -182,13 +349,21 @@ void __init xen_arch_setup(void)
- }
- #endif
-
-+ /*
-+ * Xen hypervisor uses HPET to wakeup cpu from deep c-states,
-+ * so the HPET usage in dom0 must be forbidden.
-+ */
-+ disable_hpet(NULL);
-+
- memcpy(boot_command_line, xen_start_info->cmd_line,
- MAX_GUEST_CMDLINE > COMMAND_LINE_SIZE ?
- COMMAND_LINE_SIZE : MAX_GUEST_CMDLINE);
-
-- pm_idle = xen_idle;
--
-- paravirt_disable_iospace();
-+ /* Set up idle, making sure it calls safe_halt() pvop */
-+#ifdef CONFIG_X86_32
-+ boot_cpu_data.hlt_works_ok = 1;
-+#endif
-+ pm_idle = default_idle;
-
- fiddle_vdso();
- }
-diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c
-index ca5f56e..3e06a9e 100644
---- a/arch/x86/xen/smp.c
-+++ b/arch/x86/xen/smp.c
-@@ -178,11 +178,18 @@ static void __init xen_smp_prepare_boot_cpu(void)
- static void __init xen_smp_prepare_cpus(unsigned int max_cpus)
- {
- unsigned cpu;
-+ unsigned int i;
-
- xen_init_lock_cpu(0);
-
- smp_store_cpu_info(0);
- cpu_data(0).x86_max_cores = 1;
-+
-+ for_each_possible_cpu(i) {
-+ zalloc_cpumask_var(&per_cpu(cpu_sibling_map, i), GFP_KERNEL);
-+ zalloc_cpumask_var(&per_cpu(cpu_core_map, i), GFP_KERNEL);
-+ zalloc_cpumask_var(&cpu_data(i).llc_shared_map, GFP_KERNEL);
-+ }
- set_cpu_sibling_map(0);
-
- if (xen_smp_intr_init(0))
-@@ -299,6 +306,8 @@ static int __cpuinit xen_cpu_up(unsigned int cpu)
- xen_setup_timer(cpu);
- xen_init_lock_cpu(cpu);
-
-+ cpumask_set_cpu(cpu, cpu_callout_mask);
-+
- per_cpu(cpu_state, cpu) = CPU_UP_PREPARE;
-
- /* make sure interrupts start blocked */
-@@ -392,6 +401,8 @@ static void stop_self(void *v)
- load_cr3(swapper_pg_dir);
- /* should set up a minimal gdt */
-
-+ set_cpu_online(cpu, false);
-+
- HYPERVISOR_vcpu_op(VCPUOP_down, cpu, NULL);
- BUG();
- }
-diff --git a/arch/x86/xen/suspend.c b/arch/x86/xen/suspend.c
-index a9c6611..1d789d5 100644
---- a/arch/x86/xen/suspend.c
-+++ b/arch/x86/xen/suspend.c
-@@ -26,6 +26,18 @@ void xen_pre_suspend(void)
- BUG();
- }
-
-+void xen_hvm_post_suspend(int suspend_cancelled)
-+{
-+ int cpu;
-+ xen_hvm_init_shared_info();
-+ xen_callback_vector();
-+ if (xen_feature(XENFEAT_hvm_safe_pvclock)) {
-+ for_each_online_cpu(cpu) {
-+ xen_setup_runstate_info(cpu);
-+ }
-+ }
-+}
-+
- void xen_post_suspend(int suspend_cancelled)
- {
- xen_build_mfn_list_list();
-diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c
-index 8e04980..ab35140 100644
---- a/arch/x86/xen/time.c
-+++ b/arch/x86/xen/time.c
-@@ -19,6 +19,7 @@
- #include <asm/xen/hypercall.h>
-
- #include <xen/events.h>
-+#include <xen/features.h>
- #include <xen/interface/xen.h>
- #include <xen/interface/vcpu.h>
-
-@@ -155,7 +156,7 @@ static void do_stolen_accounting(void)
- }
-
- /* Get the TSC speed from Xen */
--unsigned long xen_tsc_khz(void)
-+static unsigned long xen_tsc_khz(void)
- {
- struct pvclock_vcpu_time_info *info =
- &HYPERVISOR_shared_info->vcpu_info[0].time;
-@@ -190,7 +191,7 @@ static void xen_read_wallclock(struct timespec *ts)
- put_cpu_var(xen_vcpu);
- }
-
--unsigned long xen_get_wallclock(void)
-+static unsigned long xen_get_wallclock(void)
- {
- struct timespec ts;
-
-@@ -198,10 +199,24 @@ unsigned long xen_get_wallclock(void)
- return ts.tv_sec;
- }
-
--int xen_set_wallclock(unsigned long now)
-+static int xen_set_wallclock(unsigned long now)
- {
-+ struct xen_platform_op op;
-+ int rc;
-+
- /* do nothing for domU */
-- return -1;
-+ if (!xen_initial_domain())
-+ return -1;
-+
-+ op.cmd = XENPF_settime;
-+ op.u.settime.secs = now;
-+ op.u.settime.nsecs = 0;
-+ op.u.settime.system_time = xen_clocksource_read();
-+
-+ rc = HYPERVISOR_dom0_op(&op);
-+ WARN(rc != 0, "XENPF_settime failed: now=%ld\n", now);
-+
-+ return rc;
- }
-
- static struct clocksource xen_clocksource __read_mostly = {
-@@ -403,6 +418,8 @@ void xen_setup_timer(int cpu)
-
- evt->cpumask = cpumask_of(cpu);
- evt->irq = irq;
-+
-+ xen_setup_runstate_info(cpu);
- }
-
- void xen_teardown_timer(int cpu)
-@@ -424,6 +441,8 @@ void xen_timer_resume(void)
- {
- int cpu;
-
-+ pvclock_resume();
-+
- if (xen_clockevent != &xen_vcpuop_clockevent)
- return;
-
-@@ -433,7 +452,7 @@ void xen_timer_resume(void)
- }
- }
-
--__init void xen_time_init(void)
-+static __init void xen_time_init(void)
- {
- int cpu = smp_processor_id();
-
-@@ -457,3 +476,51 @@ __init void xen_time_init(void)
- xen_setup_timer(cpu);
- xen_setup_cpu_clockevents();
- }
-+
-+static const struct pv_time_ops xen_time_ops __initdata = {
-+ .sched_clock = xen_clocksource_read,
-+};
-+
-+__init void xen_init_time_ops(void)
-+{
-+ pv_time_ops = xen_time_ops;
-+
-+ x86_init.timers.timer_init = xen_time_init;
-+ x86_init.timers.setup_percpu_clockev = x86_init_noop;
-+ x86_cpuinit.setup_percpu_clockev = x86_init_noop;
-+
-+ x86_platform.calibrate_tsc = xen_tsc_khz;
-+ x86_platform.get_wallclock = xen_get_wallclock;
-+ x86_platform.set_wallclock = xen_set_wallclock;
-+}
-+
-+#ifdef CONFIG_XEN_PVHVM
-+static void xen_hvm_setup_cpu_clockevents(void)
-+{
-+ int cpu = smp_processor_id();
-+ xen_setup_runstate_info(cpu);
-+ xen_setup_timer(cpu);
-+ xen_setup_cpu_clockevents();
-+}
-+
-+__init void xen_hvm_init_time_ops(void)
-+{
-+ /* vector callback is needed otherwise we cannot receive interrupts
-+ * on cpu > 0 */
-+ if (!xen_have_vector_callback && num_present_cpus() > 1)
-+ return;
-+ if (!xen_feature(XENFEAT_hvm_safe_pvclock)) {
-+ printk(KERN_INFO "Xen doesn't support pvclock on HVM,"
-+ "disable pv timer\n");
-+ return;
-+ }
-+
-+ pv_time_ops = xen_time_ops;
-+ x86_init.timers.setup_percpu_clockev = xen_time_init;
-+ x86_cpuinit.setup_percpu_clockev = xen_hvm_setup_cpu_clockevents;
-+
-+ x86_platform.calibrate_tsc = xen_tsc_khz;
-+ x86_platform.get_wallclock = xen_get_wallclock;
-+ x86_platform.set_wallclock = xen_set_wallclock;
-+}
-+#endif
-diff --git a/arch/x86/xen/vga.c b/arch/x86/xen/vga.c
-new file mode 100644
-index 0000000..1cd7f4d
---- /dev/null
-+++ b/arch/x86/xen/vga.c
-@@ -0,0 +1,67 @@
-+#include <linux/screen_info.h>
-+#include <linux/init.h>
-+
-+#include <asm/bootparam.h>
-+#include <asm/setup.h>
-+
-+#include <xen/interface/xen.h>
-+
-+#include "xen-ops.h"
-+
-+void __init xen_init_vga(const struct dom0_vga_console_info *info, size_t size)
-+{
-+ struct screen_info *screen_info = &boot_params.screen_info;
-+
-+ /* This is drawn from a dump from vgacon:startup in
-+ * standard Linux. */
-+ screen_info->orig_video_mode = 3;
-+ screen_info->orig_video_isVGA = 1;
-+ screen_info->orig_video_lines = 25;
-+ screen_info->orig_video_cols = 80;
-+ screen_info->orig_video_ega_bx = 3;
-+ screen_info->orig_video_points = 16;
-+ screen_info->orig_y = screen_info->orig_video_lines - 1;
-+
-+ switch (info->video_type) {
-+ case XEN_VGATYPE_TEXT_MODE_3:
-+ if (size < offsetof(struct dom0_vga_console_info, u.text_mode_3)
-+ + sizeof(info->u.text_mode_3))
-+ break;
-+ screen_info->orig_video_lines = info->u.text_mode_3.rows;
-+ screen_info->orig_video_cols = info->u.text_mode_3.columns;
-+ screen_info->orig_x = info->u.text_mode_3.cursor_x;
-+ screen_info->orig_y = info->u.text_mode_3.cursor_y;
-+ screen_info->orig_video_points =
-+ info->u.text_mode_3.font_height;
-+ break;
-+
-+ case XEN_VGATYPE_VESA_LFB:
-+ if (size < offsetof(struct dom0_vga_console_info,
-+ u.vesa_lfb.gbl_caps))
-+ break;
-+ screen_info->orig_video_isVGA = VIDEO_TYPE_VLFB;
-+ screen_info->lfb_width = info->u.vesa_lfb.width;
-+ screen_info->lfb_height = info->u.vesa_lfb.height;
-+ screen_info->lfb_depth = info->u.vesa_lfb.bits_per_pixel;
-+ screen_info->lfb_base = info->u.vesa_lfb.lfb_base;
-+ screen_info->lfb_size = info->u.vesa_lfb.lfb_size;
-+ screen_info->lfb_linelength = info->u.vesa_lfb.bytes_per_line;
-+ screen_info->red_size = info->u.vesa_lfb.red_size;
-+ screen_info->red_pos = info->u.vesa_lfb.red_pos;
-+ screen_info->green_size = info->u.vesa_lfb.green_size;
-+ screen_info->green_pos = info->u.vesa_lfb.green_pos;
-+ screen_info->blue_size = info->u.vesa_lfb.blue_size;
-+ screen_info->blue_pos = info->u.vesa_lfb.blue_pos;
-+ screen_info->rsvd_size = info->u.vesa_lfb.rsvd_size;
-+ screen_info->rsvd_pos = info->u.vesa_lfb.rsvd_pos;
-+ if (size >= offsetof(struct dom0_vga_console_info,
-+ u.vesa_lfb.gbl_caps)
-+ + sizeof(info->u.vesa_lfb.gbl_caps))
-+ screen_info->capabilities = info->u.vesa_lfb.gbl_caps;
-+ if (size >= offsetof(struct dom0_vga_console_info,
-+ u.vesa_lfb.mode_attrs)
-+ + sizeof(info->u.vesa_lfb.mode_attrs))
-+ screen_info->vesa_attributes = info->u.vesa_lfb.mode_attrs;
-+ break;
-+ }
-+}
-diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h
-index f9153a3..ebbee21 100644
---- a/arch/x86/xen/xen-ops.h
-+++ b/arch/x86/xen/xen-ops.h
-@@ -30,6 +30,10 @@ void xen_setup_machphys_mapping(void);
- pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn);
- void xen_ident_map_ISA(void);
- void xen_reserve_top(void);
-+void xen_ident_map_ISA(void);
-+extern unsigned long xen_max_p2m_pfn;
-+
-+void xen_set_pat(u64);
-
- char * __init xen_memory_setup(void);
- void __init xen_arch_setup(void);
-@@ -38,6 +42,10 @@ void xen_enable_sysenter(void);
- void xen_enable_syscall(void);
- void xen_vcpu_restore(void);
-
-+void xen_callback_vector(void);
-+void xen_hvm_init_shared_info(void);
-+void __init xen_unplug_emulated_devices(void);
-+
- void __init xen_build_dynamic_phys_to_machine(void);
-
- void xen_init_irq_ops(void);
-@@ -46,11 +54,8 @@ void xen_setup_runstate_info(int cpu);
- void xen_teardown_timer(int cpu);
- cycle_t xen_clocksource_read(void);
- void xen_setup_cpu_clockevents(void);
--unsigned long xen_tsc_khz(void);
--void __init xen_time_init(void);
--unsigned long xen_get_wallclock(void);
--int xen_set_wallclock(unsigned long time);
--unsigned long long xen_sched_clock(void);
-+void __init xen_init_time_ops(void);
-+void __init xen_hvm_init_time_ops(void);
-
- irqreturn_t xen_debug_interrupt(int irq, void *dev_id);
-
-@@ -82,6 +87,23 @@ static inline void xen_uninit_lock_cpu(int cpu)
- }
- #endif
-
-+struct dom0_vga_console_info;
-+
-+#ifdef CONFIG_XEN_DOM0
-+void xen_init_vga(const struct dom0_vga_console_info *, size_t size);
-+#else
-+static inline void xen_init_vga(const struct dom0_vga_console_info *info,
-+ size_t size)
-+{
-+}
-+#endif
-+
-+#ifdef CONFIG_XEN_DOM0
-+void xen_init_apic(void);
-+#else
-+static inline void xen_init_apic(void) {}
-+#endif
-+
- /* Declare an asm function, along with symbols needed to make it
- inlineable */
- #define DECL_ASM(ret, name, ...) \
-@@ -101,4 +123,6 @@ void xen_sysret32(void);
- void xen_sysret64(void);
- void xen_adjust_exception_frame(void);
-
-+extern int xen_panic_handler_init(void);
-+
- #endif /* XEN_OPS_H */
-diff --git a/block/blk-core.c b/block/blk-core.c
-index 71da511..32d305c 100644
---- a/block/blk-core.c
-+++ b/block/blk-core.c
-@@ -439,6 +439,7 @@ void blk_put_queue(struct request_queue *q)
- {
- kobject_put(&q->kobj);
- }
-+EXPORT_SYMBOL_GPL(blk_put_queue);
-
- void blk_cleanup_queue(struct request_queue *q)
- {
-@@ -612,6 +613,7 @@ int blk_get_queue(struct request_queue *q)
-
- return 1;
- }
-+EXPORT_SYMBOL_GPL(blk_get_queue);
-
- static inline void blk_free_request(struct request_queue *q, struct request *rq)
- {
-diff --git a/drivers/acpi/Makefile b/drivers/acpi/Makefile
-index 7702118..1be123c 100644
---- a/drivers/acpi/Makefile
-+++ b/drivers/acpi/Makefile
-@@ -61,6 +61,7 @@ obj-$(CONFIG_ACPI_POWER_METER) += power_meter.o
- # processor has its own "processor." module_param namespace
- processor-y := processor_core.o processor_throttling.o
- processor-y += processor_idle.o processor_thermal.o
-+processor-y += processor_xen.o
- processor-$(CONFIG_CPU_FREQ) += processor_perflib.o
-
- obj-$(CONFIG_ACPI_PROCESSOR_AGGREGATOR) += acpi_pad.o
-diff --git a/drivers/acpi/acpi_memhotplug.c b/drivers/acpi/acpi_memhotplug.c
-index 28ccdbc..b0f9ed6 100644
---- a/drivers/acpi/acpi_memhotplug.c
-+++ b/drivers/acpi/acpi_memhotplug.c
-@@ -31,6 +31,7 @@
- #include <linux/types.h>
- #include <linux/memory_hotplug.h>
- #include <acpi/acpi_drivers.h>
-+#include <xen/acpi.h>
-
- #define ACPI_MEMORY_DEVICE_CLASS "memory"
- #define ACPI_MEMORY_DEVICE_HID "PNP0C80"
-@@ -70,21 +71,6 @@ static struct acpi_driver acpi_memory_device_driver = {
- },
- };
-
--struct acpi_memory_info {
-- struct list_head list;
-- u64 start_addr; /* Memory Range start physical addr */
-- u64 length; /* Memory Range length */
-- unsigned short caching; /* memory cache attribute */
-- unsigned short write_protect; /* memory read/write attribute */
-- unsigned int enabled:1;
--};
--
--struct acpi_memory_device {
-- struct acpi_device * device;
-- unsigned int state; /* State of the memory device */
-- struct list_head res_list;
--};
--
- static int acpi_hotmem_initialized;
-
- static acpi_status
-@@ -228,6 +214,9 @@ static int acpi_memory_enable_device(struct acpi_memory_device *mem_device)
- return result;
- }
-
-+ if (xen_initial_domain())
-+ return xen_hotadd_memory(mem_device);
-+
- node = acpi_get_node(mem_device->device->handle);
- /*
- * Tell the VM there is more memory here...
-diff --git a/drivers/acpi/acpica/hwsleep.c b/drivers/acpi/acpica/hwsleep.c
-index cc22f9a..747d96f 100644
---- a/drivers/acpi/acpica/hwsleep.c
-+++ b/drivers/acpi/acpica/hwsleep.c
-@@ -47,6 +47,9 @@
- #include "actables.h"
- #include <linux/tboot.h>
-
-+#include <xen/acpi.h>
-+#include <asm/xen/hypervisor.h>
-+
- #define _COMPONENT ACPI_HARDWARE
- ACPI_MODULE_NAME("hwsleep")
-
-@@ -346,6 +349,19 @@ acpi_status asmlinkage acpi_enter_sleep_state(u8 sleep_state)
- tboot_sleep(sleep_state, pm1a_control, pm1b_control);
-
- /* Write #2: Write both SLP_TYP + SLP_EN */
-+ if (xen_pv_acpi()) {
-+ int err;
-+
-+ err = acpi_notify_hypervisor_state(sleep_state,
-+ pm1a_control, pm1b_control);
-+ if (err) {
-+ ACPI_DEBUG_PRINT((ACPI_DB_INIT,
-+ "Hypervisor failure [%d]\n", err));
-+ return_ACPI_STATUS(AE_ERROR);
-+ }
-+
-+ return_ACPI_STATUS(AE_OK);
-+ }
-
- status = acpi_hw_write_pm1_control(pm1a_control, pm1b_control);
- if (ACPI_FAILURE(status)) {
-diff --git a/drivers/acpi/processor_core.c b/drivers/acpi/processor_core.c
-index 7102474..2428cc0 100644
---- a/drivers/acpi/processor_core.c
-+++ b/drivers/acpi/processor_core.c
-@@ -58,6 +58,7 @@
- #include <acpi/acpi_bus.h>
- #include <acpi/acpi_drivers.h>
- #include <acpi/processor.h>
-+#include <xen/acpi.h>
-
- #define PREFIX "ACPI: "
-
-@@ -81,11 +82,9 @@ MODULE_DESCRIPTION("ACPI Processor Driver");
- MODULE_LICENSE("GPL");
-
- static int acpi_processor_add(struct acpi_device *device);
--static int acpi_processor_remove(struct acpi_device *device, int type);
- #ifdef CONFIG_ACPI_PROCFS
- static int acpi_processor_info_open_fs(struct inode *inode, struct file *file);
- #endif
--static void acpi_processor_notify(struct acpi_device *device, u32 event);
- static acpi_status acpi_processor_hotadd_init(acpi_handle handle, int *p_cpu);
- static int acpi_processor_handle_eject(struct acpi_processor *pr);
-
-@@ -247,7 +246,7 @@ static int acpi_processor_errata_piix4(struct pci_dev *dev)
- return 0;
- }
-
--static int acpi_processor_errata(struct acpi_processor *pr)
-+int acpi_processor_errata(struct acpi_processor *pr)
- {
- int result = 0;
- struct pci_dev *dev = NULL;
-@@ -278,7 +277,7 @@ static int acpi_processor_errata(struct acpi_processor *pr)
- * _PDC is required for a BIOS-OS handshake for most of the newer
- * ACPI processor features.
- */
--static int acpi_processor_set_pdc(struct acpi_processor *pr)
-+int acpi_processor_set_pdc(struct acpi_processor *pr)
- {
- struct acpi_object_list *pdc_in = pr->pdc;
- acpi_status status = AE_OK;
-@@ -347,7 +346,7 @@ static int acpi_processor_info_open_fs(struct inode *inode, struct file *file)
- PDE(inode)->data);
- }
-
--static int acpi_processor_add_fs(struct acpi_device *device)
-+int acpi_processor_add_fs(struct acpi_device *device)
- {
- struct proc_dir_entry *entry = NULL;
-
-@@ -386,7 +385,7 @@ static int acpi_processor_add_fs(struct acpi_device *device)
- return -EIO;
- return 0;
- }
--static int acpi_processor_remove_fs(struct acpi_device *device)
-+int acpi_processor_remove_fs(struct acpi_device *device)
- {
-
- if (acpi_device_dir(device)) {
-@@ -402,15 +401,6 @@ static int acpi_processor_remove_fs(struct acpi_device *device)
-
- return 0;
- }
--#else
--static inline int acpi_processor_add_fs(struct acpi_device *device)
--{
-- return 0;
--}
--static inline int acpi_processor_remove_fs(struct acpi_device *device)
--{
-- return 0;
--}
- #endif
-
- /* Use the acpiid in MADT to map cpus in case of SMP */
-@@ -705,7 +695,7 @@ static int acpi_processor_get_info(struct acpi_device *device)
-
- static DEFINE_PER_CPU(void *, processor_device_array);
-
--static void acpi_processor_notify(struct acpi_device *device, u32 event)
-+void acpi_processor_notify(struct acpi_device *device, u32 event)
- {
- struct acpi_processor *pr = acpi_driver_data(device);
- int saved;
-@@ -873,7 +863,7 @@ err_free_cpumask:
- return result;
- }
-
--static int acpi_processor_remove(struct acpi_device *device, int type)
-+int acpi_processor_remove(struct acpi_device *device, int type)
- {
- struct acpi_processor *pr = NULL;
-
-@@ -1148,7 +1138,11 @@ static int __init acpi_processor_init(void)
- if (result < 0)
- goto out_proc;
-
-- result = acpi_bus_register_driver(&acpi_processor_driver);
-+ if (xen_initial_domain())
-+ result = xen_acpi_processor_init();
-+ else
-+ result = acpi_bus_register_driver(&acpi_processor_driver);
-+
- if (result < 0)
- goto out_cpuidle;
-
-@@ -1184,7 +1178,10 @@ static void __exit acpi_processor_exit(void)
-
- acpi_processor_uninstall_hotplug_notify();
-
-- acpi_bus_unregister_driver(&acpi_processor_driver);
-+ if (xen_initial_domain())
-+ xen_acpi_processor_exit();
-+ else
-+ acpi_bus_unregister_driver(&acpi_processor_driver);
-
- cpuidle_unregister_driver(&acpi_idle_driver);
-
-diff --git a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c
-index a6ad608..3c32e87 100644
---- a/drivers/acpi/processor_idle.c
-+++ b/drivers/acpi/processor_idle.c
-@@ -58,6 +58,7 @@
-
- #include <acpi/acpi_bus.h>
- #include <acpi/processor.h>
-+#include <xen/acpi.h>
- #include <asm/processor.h>
-
- #define PREFIX "ACPI: "
-@@ -439,7 +440,8 @@ static int acpi_processor_get_power_info_cst(struct acpi_processor *pr)
- cx.entry_method = ACPI_CSTATE_HALT;
- snprintf(cx.desc, ACPI_CX_DESC_LEN, "ACPI HLT");
- } else {
-- continue;
-+ if (!xen_initial_domain())
-+ continue;
- }
- if (cx.type == ACPI_STATE_C1 &&
- (idle_halt || idle_nomwait)) {
-@@ -477,6 +479,9 @@ static int acpi_processor_get_power_info_cst(struct acpi_processor *pr)
-
- cx.power = obj->integer.value;
-
-+ /* cache control methods to notify xen*/
-+ processor_cntl_xen_power_cache(pr->acpi_id, i, reg);
-+
- current_count++;
- memcpy(&(pr->power.states[current_count]), &cx, sizeof(cx));
-
-@@ -653,7 +658,7 @@ static int acpi_processor_power_verify(struct acpi_processor *pr)
- return (working);
- }
-
--static int acpi_processor_get_power_info(struct acpi_processor *pr)
-+int acpi_processor_get_power_info(struct acpi_processor *pr)
- {
- unsigned int i;
- int result;
-@@ -1223,9 +1228,14 @@ int __cpuinit acpi_processor_power_init(struct acpi_processor *pr,
- * platforms that only support C1.
- */
- if (pr->flags.power) {
-- acpi_processor_setup_cpuidle(pr);
-- if (cpuidle_register_device(&pr->power.dev))
-- return -EIO;
-+ if (xen_initial_domain()) {
-+ processor_cntl_xen_notify(pr,
-+ PROCESSOR_PM_INIT, PM_TYPE_IDLE);
-+ } else {
-+ acpi_processor_setup_cpuidle(pr);
-+ if (cpuidle_register_device(&pr->power.dev))
-+ return -EIO;
-+ }
- }
- #ifdef CONFIG_ACPI_PROCFS
- /* 'power' [R] */
-diff --git a/drivers/acpi/processor_perflib.c b/drivers/acpi/processor_perflib.c
-index 40d395e..7ba143d 100644
---- a/drivers/acpi/processor_perflib.c
-+++ b/drivers/acpi/processor_perflib.c
-@@ -332,7 +332,7 @@ static int acpi_processor_get_performance_states(struct acpi_processor *pr)
- return result;
- }
-
--static int acpi_processor_get_performance_info(struct acpi_processor *pr)
-+int acpi_processor_get_performance_info(struct acpi_processor *pr)
- {
- int result = 0;
- acpi_status status = AE_OK;
-@@ -438,7 +438,7 @@ int acpi_processor_notify_smm(struct module *calling_module)
-
- EXPORT_SYMBOL(acpi_processor_notify_smm);
-
--static int acpi_processor_get_psd(struct acpi_processor *pr)
-+int acpi_processor_get_psd(struct acpi_processor *pr)
- {
- int result = 0;
- acpi_status status = AE_OK;
-diff --git a/drivers/acpi/processor_xen.c b/drivers/acpi/processor_xen.c
-new file mode 100644
-index 0000000..305398d
---- /dev/null
-+++ b/drivers/acpi/processor_xen.c
-@@ -0,0 +1,651 @@
-+/*
-+ * processor_xen.c - ACPI Processor Driver for xen
-+ *
-+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-+ *
-+ * This program is free software; you can redistribute it and/or modify
-+ * it under the terms of the GNU General Public License as published by
-+ * the Free Software Foundation; either version 2 of the License, or (at
-+ * your option) any later version.
-+ *
-+ * This program is distributed in the hope that it will be useful, but
-+ * WITHOUT ANY WARRANTY; without even the implied warranty of
-+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-+ * General Public License for more details.
-+ *
-+ * You should have received a copy of the GNU General Public License along
-+ * with this program; if not, write to the Free Software Foundation, Inc.,
-+ * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
-+ *
-+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-+ */
-+
-+#include <linux/kernel.h>
-+#include <linux/module.h>
-+#include <linux/init.h>
-+#include <linux/types.h>
-+#include <linux/pci.h>
-+#include <linux/pm.h>
-+#include <linux/cpufreq.h>
-+#include <linux/cpu.h>
-+#include <linux/proc_fs.h>
-+#include <linux/seq_file.h>
-+#include <linux/dmi.h>
-+#include <linux/moduleparam.h>
-+#include <linux/cpuidle.h>
-+#include <linux/acpi.h>
-+
-+#include <acpi/acpi_bus.h>
-+#include <acpi/acpi_drivers.h>
-+#include <acpi/processor.h>
-+#include <xen/acpi.h>
-+#include <xen/pcpu.h>
-+
-+#define PREFIX "ACPI: "
-+
-+#define ACPI_PROCESSOR_CLASS "processor"
-+#define ACPI_PROCESSOR_DEVICE_NAME "Processor"
-+#define ACPI_PROCESSOR_FILE_INFO "info"
-+#define ACPI_PROCESSOR_FILE_THROTTLING "throttling"
-+#define ACPI_PROCESSOR_FILE_LIMIT "limit"
-+#define ACPI_PROCESSOR_NOTIFY_PERFORMANCE 0x80
-+#define ACPI_PROCESSOR_NOTIFY_POWER 0x81
-+#define ACPI_PROCESSOR_NOTIFY_THROTTLING 0x82
-+
-+#define _COMPONENT ACPI_PROCESSOR_COMPONENT
-+ACPI_MODULE_NAME("processor_xen");
-+
-+static const struct acpi_device_id processor_device_ids[] = {
-+ {ACPI_PROCESSOR_OBJECT_HID, 0},
-+ {"ACPI0007", 0},
-+ {"", 0},
-+};
-+
-+/*
-+ * Xen ACPI processor driver
-+ */
-+
-+/* from processor_core.c */
-+
-+static int xen_acpi_processor_add(struct acpi_device *device);
-+static void xen_acpi_processor_notify(struct acpi_device *device, u32 event);
-+
-+struct acpi_driver xen_acpi_processor_driver = {
-+ .name = "processor",
-+ .class = ACPI_PROCESSOR_CLASS,
-+ .ids = processor_device_ids,
-+ .ops = {
-+ .add = xen_acpi_processor_add,
-+ .remove = acpi_processor_remove,
-+ .suspend = acpi_processor_suspend,
-+ .resume = acpi_processor_resume,
-+ .notify = xen_acpi_processor_notify,
-+ },
-+};
-+
-+static int is_processor_present(acpi_handle handle)
-+{
-+ acpi_status status;
-+ unsigned long long sta = 0;
-+
-+
-+ status = acpi_evaluate_integer(handle, "_STA", NULL, &sta);
-+
-+ if (ACPI_SUCCESS(status) && (sta & ACPI_STA_DEVICE_PRESENT))
-+ return 1;
-+
-+ /*
-+ * _STA is mandatory for a processor that supports hot plug
-+ */
-+ if (status == AE_NOT_FOUND)
-+ ACPI_DEBUG_PRINT((ACPI_DB_INFO,
-+ "Processor does not support hot plug\n"));
-+ else
-+ ACPI_EXCEPTION((AE_INFO, status,
-+ "Processor Device is not present"));
-+ return 0;
-+}
-+
-+static acpi_status
-+xen_acpi_processor_hotadd_init(struct acpi_processor *pr, int *p_cpu)
-+{
-+ if (!is_processor_present(pr->handle))
-+ return AE_ERROR;
-+
-+ if (processor_cntl_xen_notify(pr,
-+ PROCESSOR_HOTPLUG, HOTPLUG_TYPE_ADD))
-+ return AE_ERROR;
-+
-+ return AE_OK;
-+}
-+
-+static int xen_acpi_processor_get_info(struct acpi_device *device)
-+{
-+ acpi_status status = 0;
-+ union acpi_object object = { 0 };
-+ struct acpi_buffer buffer = { sizeof(union acpi_object), &object };
-+ struct acpi_processor *pr;
-+ int cpu_index, device_declaration = 0;
-+ static int cpu0_initialized;
-+
-+ pr = acpi_driver_data(device);
-+ if (!pr)
-+ return -EINVAL;
-+
-+ if (num_online_cpus() > 1)
-+ errata.smp = TRUE;
-+
-+ acpi_processor_errata(pr);
-+
-+ /*
-+ * Check to see if we have bus mastering arbitration control. This
-+ * is required for proper C3 usage (to maintain cache coherency).
-+ */
-+ if (acpi_gbl_FADT.pm2_control_block &&
-+ acpi_gbl_FADT.pm2_control_length) {
-+ pr->flags.bm_control = 1;
-+ ACPI_DEBUG_PRINT((ACPI_DB_INFO,
-+ "Bus mastering arbitration control present\n"
-+ ));
-+ } else
-+ ACPI_DEBUG_PRINT((ACPI_DB_INFO,
-+ "No bus mastering arbitration control\n"));
-+
-+ if (!strcmp(acpi_device_hid(device), ACPI_PROCESSOR_OBJECT_HID)) {
-+ /* Declared with "Processor" statement; match ProcessorID */
-+ status = acpi_evaluate_object(pr->handle, NULL, NULL, &buffer);
-+ if (ACPI_FAILURE(status)) {
-+ printk(KERN_ERR PREFIX "Evaluating processor object\n");
-+ return -ENODEV;
-+ }
-+
-+ /*
-+ * TBD: Synch processor ID (via LAPIC/LSAPIC structures) on SMP.
-+ * >>> 'acpi_get_processor_id(acpi_id, &id)' in
-+ * arch/xxx/acpi.c
-+ */
-+ pr->acpi_id = object.processor.proc_id;
-+ } else {
-+ /*
-+ * Declared with "Device" statement; match _UID.
-+ * Note that we don't handle string _UIDs yet.
-+ */
-+ unsigned long long value;
-+ status = acpi_evaluate_integer(pr->handle, METHOD_NAME__UID,
-+ NULL, &value);
-+ if (ACPI_FAILURE(status)) {
-+ printk(KERN_ERR PREFIX
-+ "Evaluating processor _UID [%#x]\n", status);
-+ return -ENODEV;
-+ }
-+ device_declaration = 1;
-+ pr->acpi_id = value;
-+ }
-+
-+ /* TBD: add Xen specific code to query cpu_index */
-+ cpu_index = -1;
-+
-+ /* Handle UP system running SMP kernel, with no LAPIC in MADT */
-+ if (!cpu0_initialized && (cpu_index == -1) &&
-+ (num_online_cpus() == 1)) {
-+ cpu_index = 0;
-+ }
-+
-+ cpu0_initialized = 1;
-+
-+ pr->id = cpu_index;
-+
-+ /*
-+ * Extra Processor objects may be enumerated on MP systems with
-+ * less than the max # of CPUs, or Xen vCPU < pCPU.
-+ * They should be ignored _iff they are physically not present.
-+ *
-+ */
-+ if (xen_pcpu_index(pr->acpi_id, 1) == -1) {
-+ if (ACPI_FAILURE
-+ (xen_acpi_processor_hotadd_init(pr, &pr->id))) {
-+ return -ENODEV;
-+ }
-+ }
-+
-+ /*
-+ * On some boxes several processors use the same processor bus id.
-+ * But they are located in different scope. For example:
-+ * \_SB.SCK0.CPU0
-+ * \_SB.SCK1.CPU0
-+ * Rename the processor device bus id. And the new bus id will be
-+ * generated as the following format:
-+ * CPU+CPU ID.
-+ */
-+ sprintf(acpi_device_bid(device), "CPU%X", pr->id);
-+ ACPI_DEBUG_PRINT((ACPI_DB_INFO, "Processor [%d:%d]\n", pr->id,
-+ pr->acpi_id));
-+
-+ if (!object.processor.pblk_address)
-+ ACPI_DEBUG_PRINT((ACPI_DB_INFO, "No PBLK (NULL address)\n"));
-+ else if (object.processor.pblk_length != 6)
-+ printk(KERN_ERR PREFIX "Invalid PBLK length [%d]\n",
-+ object.processor.pblk_length);
-+ else {
-+ pr->throttling.address = object.processor.pblk_address;
-+ pr->throttling.duty_offset = acpi_gbl_FADT.duty_offset;
-+ pr->throttling.duty_width = acpi_gbl_FADT.duty_width;
-+
-+ pr->pblk = object.processor.pblk_address;
-+
-+ /*
-+ * We don't care about error returns - we just try to mark
-+ * these reserved so that nobody else is confused into thinking
-+ * that this region might be unused..
-+ *
-+ * (In particular, allocating the IO range for Cardbus)
-+ */
-+ request_region(pr->throttling.address, 6, "ACPI CPU throttle");
-+ }
-+
-+ /*
-+ * If ACPI describes a slot number for this CPU, we can use it
-+ * ensure we get the right value in the "physical id" field
-+ * of /proc/cpuinfo
-+ */
-+ status = acpi_evaluate_object(pr->handle, "_SUN", NULL, &buffer);
-+ if (ACPI_SUCCESS(status))
-+ arch_fix_phys_package_id(pr->id, object.integer.value);
-+
-+ return 0;
-+}
-+
-+static struct acpi_device *processor_device_array[XEN_MAX_ACPI_ID + 1];
-+
-+static int __cpuinit xen_acpi_processor_add(struct acpi_device *device)
-+{
-+ struct acpi_processor *pr = NULL;
-+ int result = 0;
-+ struct sys_device *sysdev;
-+
-+ pr = kzalloc(sizeof(struct acpi_processor), GFP_KERNEL);
-+ if (!pr)
-+ return -ENOMEM;
-+
-+ if (!zalloc_cpumask_var(&pr->throttling.shared_cpu_map, GFP_KERNEL)) {
-+ kfree(pr);
-+ return -ENOMEM;
-+ }
-+
-+ pr->handle = device->handle;
-+ strcpy(acpi_device_name(device), ACPI_PROCESSOR_DEVICE_NAME);
-+ strcpy(acpi_device_class(device), ACPI_PROCESSOR_CLASS);
-+ device->driver_data = pr;
-+
-+ result = xen_acpi_processor_get_info(device);
-+ if (result) {
-+ /* Processor is physically not present */
-+ return 0;
-+ }
-+
-+ /*
-+ * Buggy BIOS check
-+ * ACPI id of processors can be reported wrongly by the BIOS.
-+ * Don't trust it blindly
-+ */
-+ if (pr->acpi_id > XEN_MAX_ACPI_ID ||
-+ (processor_device_array[pr->acpi_id] != NULL &&
-+ processor_device_array[pr->acpi_id] != device)) {
-+ printk(KERN_WARNING "BIOS reported wrong ACPI id "
-+ "for the processor\n");
-+ result = -ENODEV;
-+ goto err_free_cpumask;
-+ }
-+
-+ processor_device_array[pr->acpi_id] = device;
-+
-+ if (pr->id != -1) {
-+ per_cpu(processors, pr->id) = pr;
-+
-+ result = acpi_processor_add_fs(device);
-+ if (result)
-+ goto err_free_cpumask;
-+
-+ sysdev = get_cpu_sysdev(pr->id);
-+ if (sysdev != NULL && sysfs_create_link(&device->dev.kobj,
-+ &sysdev->kobj, "sysdev")) {
-+ result = -EFAULT;
-+ goto err_remove_fs;
-+ }
-+ }
-+
-+ /* _PDC call should be done before doing anything else (if reqd.). */
-+ xen_arch_acpi_processor_init_pdc(pr);
-+ acpi_processor_set_pdc(pr);
-+ arch_acpi_processor_cleanup_pdc(pr);
-+
-+#ifdef CONFIG_CPU_FREQ
-+ xen_acpi_processor_ppc_has_changed(pr);
-+ result = xen_acpi_processor_get_performance(pr);
-+ if (result)
-+ goto err_remove_fs;
-+#endif
-+
-+ if (pr->id != -1) {
-+ acpi_processor_get_throttling_info(pr);
-+ acpi_processor_get_limit_info(pr);
-+ }
-+
-+ xen_acpi_processor_power_init(pr, device);
-+
-+ if (pr->id != -1) {
-+ pr->cdev = thermal_cooling_device_register("Processor", device,
-+ &processor_cooling_ops);
-+ if (IS_ERR(pr->cdev)) {
-+ result = PTR_ERR(pr->cdev);
-+ goto err_power_exit;
-+ }
-+
-+ dev_info(&device->dev, "registered as cooling_device%d\n",
-+ pr->cdev->id);
-+
-+ result = sysfs_create_link(&device->dev.kobj,
-+ &pr->cdev->device.kobj,
-+ "thermal_cooling");
-+ if (result) {
-+ printk(KERN_ERR PREFIX "Create sysfs link\n");
-+ goto err_thermal_unregister;
-+ }
-+ result = sysfs_create_link(&pr->cdev->device.kobj,
-+ &device->dev.kobj,
-+ "device");
-+ if (result) {
-+ printk(KERN_ERR PREFIX "Create sysfs link\n");
-+ goto err_remove_sysfs;
-+ }
-+ }
-+
-+ return 0;
-+
-+err_remove_sysfs:
-+ sysfs_remove_link(&device->dev.kobj, "thermal_cooling");
-+err_thermal_unregister:
-+ thermal_cooling_device_unregister(pr->cdev);
-+err_power_exit:
-+ acpi_processor_power_exit(pr, device);
-+err_remove_fs:
-+ acpi_processor_remove_fs(device);
-+err_free_cpumask:
-+ free_cpumask_var(pr->throttling.shared_cpu_map);
-+
-+ return result;
-+}
-+
-+static void xen_acpi_processor_notify(struct acpi_device *device, u32 event)
-+{
-+ struct acpi_processor *pr = acpi_driver_data(device);
-+ int saved;
-+
-+ if (!pr)
-+ return;
-+
-+ switch (event) {
-+ case ACPI_PROCESSOR_NOTIFY_PERFORMANCE:
-+ saved = pr->performance_platform_limit;
-+ xen_acpi_processor_ppc_has_changed(pr);
-+ if (saved == pr->performance_platform_limit)
-+ break;
-+ acpi_bus_generate_proc_event(device, event,
-+ pr->performance_platform_limit);
-+ acpi_bus_generate_netlink_event(device->pnp.device_class,
-+ dev_name(&device->dev), event,
-+ pr->performance_platform_limit);
-+ break;
-+ case ACPI_PROCESSOR_NOTIFY_POWER:
-+ xen_acpi_processor_cst_has_changed(pr);
-+ acpi_bus_generate_proc_event(device, event, 0);
-+ acpi_bus_generate_netlink_event(device->pnp.device_class,
-+ dev_name(&device->dev), event, 0);
-+ break;
-+ case ACPI_PROCESSOR_NOTIFY_THROTTLING:
-+ acpi_processor_tstate_has_changed(pr);
-+ acpi_bus_generate_proc_event(device, event, 0);
-+ acpi_bus_generate_netlink_event(device->pnp.device_class,
-+ dev_name(&device->dev), event, 0);
-+ default:
-+ ACPI_DEBUG_PRINT((ACPI_DB_INFO,
-+ "Unsupported event [0x%x]\n", event));
-+ break;
-+ }
-+
-+ return;
-+}
-+
-+/* from processor_idle.c */
-+
-+static int xen_acpi_processor_get_power_info(struct acpi_processor *pr)
-+{
-+ int ret;
-+ int invalid_pr_id = 0;
-+
-+ /*
-+ * acpi_processor_get_power_info need valid pr->id
-+ * so set pr->id=0 temporarily
-+ */
-+ if (pr->id == -1) {
-+ invalid_pr_id = 1;
-+ pr->id = 0;
-+ }
-+
-+ ret = acpi_processor_get_power_info(pr);
-+
-+ if (invalid_pr_id)
-+ pr->id = -1;
-+
-+ return ret;
-+}
-+
-+int xen_acpi_processor_cst_has_changed(struct acpi_processor *pr)
-+{
-+ if (!pr)
-+ return -EINVAL;
-+
-+ if (!pr->flags.power_setup_done)
-+ return -ENODEV;
-+
-+ xen_acpi_processor_get_power_info(pr);
-+
-+ processor_cntl_xen_notify(pr,
-+ PROCESSOR_PM_CHANGE, PM_TYPE_IDLE);
-+
-+ return 0;
-+}
-+
-+
-+int __cpuinit xen_acpi_processor_power_init(struct acpi_processor *pr,
-+ struct acpi_device *device)
-+{
-+ acpi_status status = 0;
-+ unsigned int i;
-+
-+ if (!pr)
-+ return -EINVAL;
-+
-+ if (acpi_gbl_FADT.cst_control) {
-+ status = acpi_os_write_port(acpi_gbl_FADT.smi_command,
-+ acpi_gbl_FADT.cst_control, 8);
-+ if (ACPI_FAILURE(status)) {
-+ ACPI_EXCEPTION((AE_INFO, status,
-+ "Notifying BIOS of _CST ability failed"));
-+ }
-+ }
-+
-+ xen_acpi_processor_get_power_info(pr);
-+
-+ pr->flags.power_setup_done = 1;
-+
-+ if (pr->flags.power) {
-+ processor_cntl_xen_notify(pr,
-+ PROCESSOR_PM_INIT, PM_TYPE_IDLE);
-+
-+ printk(KERN_INFO PREFIX "CPU%d (power states:", pr->id);
-+ for (i = 1; i <= pr->power.count; i++)
-+ if (pr->power.states[i].valid)
-+ printk(" C%d[C%d]", i,
-+ pr->power.states[i].type);
-+ printk(")\n");
-+ }
-+
-+ return 0;
-+}
-+
-+/* from processor_perflib.c */
-+
-+#ifdef CONFIG_CPU_FREQ
-+static int xen_processor_notify_smm(void)
-+{
-+ acpi_status status;
-+ static int is_done;
-+
-+ /* only need successfully notify BIOS once */
-+ /* avoid double notification which may lead to unexpected result */
-+ if (is_done)
-+ return 0;
-+
-+ /* Can't write pstate_cnt to smi_cmd if either value is zero */
-+ if ((!acpi_gbl_FADT.smi_command) || (!acpi_gbl_FADT.pstate_control)) {
-+ ACPI_DEBUG_PRINT((ACPI_DB_INFO, "No SMI port or pstate_cnt\n"));
-+ return 0;
-+ }
-+
-+ ACPI_DEBUG_PRINT((ACPI_DB_INFO,
-+ "Writing pstate_cnt [0x%x] to smi_cmd [0x%x]\n",
-+ acpi_gbl_FADT.pstate_control, acpi_gbl_FADT.smi_command));
-+
-+ status = acpi_os_write_port(acpi_gbl_FADT.smi_command,
-+ (u32) acpi_gbl_FADT.pstate_control, 8);
-+ if (ACPI_FAILURE(status))
-+ return status;
-+
-+ is_done = 1;
-+
-+ return 0;
-+}
-+
-+static int xen_acpi_processor_get_platform_limit(struct acpi_processor *pr)
-+{
-+ acpi_status status = 0;
-+ unsigned long long ppc = 0;
-+
-+ if (!pr)
-+ return -EINVAL;
-+
-+ /*
-+ * _PPC indicates the maximum state currently supported by the platform
-+ * (e.g. 0 = states 0..n; 1 = states 1..n; etc.
-+ */
-+ status = acpi_evaluate_integer(pr->handle, "_PPC", NULL, &ppc);
-+
-+ if (ACPI_FAILURE(status) && status != AE_NOT_FOUND) {
-+ ACPI_EXCEPTION((AE_INFO, status, "Evaluating _PPC"));
-+ return -ENODEV;
-+ }
-+
-+ pr->performance_platform_limit = (int)ppc;
-+
-+ return 0;
-+}
-+
-+int xen_acpi_processor_ppc_has_changed(struct acpi_processor *pr)
-+{
-+ int ret;
-+
-+ ret = xen_acpi_processor_get_platform_limit(pr);
-+
-+ if (ret < 0)
-+ return ret;
-+ else
-+ return processor_cntl_xen_notify(pr,
-+ PROCESSOR_PM_CHANGE, PM_TYPE_PERF);
-+}
-+
-+/*
-+ * Existing ACPI module does parse performance states at some point,
-+ * when acpi-cpufreq driver is loaded which however is something
-+ * we'd like to disable to avoid confliction with xen PM
-+ * logic. So we have to collect raw performance information here
-+ * when ACPI processor object is found and started.
-+ */
-+int xen_acpi_processor_get_performance(struct acpi_processor *pr)
-+{
-+ int ret;
-+ struct acpi_processor_performance *perf;
-+ struct acpi_psd_package *pdomain;
-+
-+ if (pr->performance)
-+ return -EBUSY;
-+
-+ perf = kzalloc(sizeof(struct acpi_processor_performance), GFP_KERNEL);
-+ if (!perf)
-+ return -ENOMEM;
-+
-+ pr->performance = perf;
-+ /* Get basic performance state information */
-+ ret = acpi_processor_get_performance_info(pr);
-+ if (ret < 0)
-+ goto err_out;
-+
-+ /*
-+ * Well, here we need retrieve performance dependency information
-+ * from _PSD object. The reason why existing interface is not used
-+ * is due to the reason that existing interface sticks to Linux cpu
-+ * id to construct some bitmap, however we want to split ACPI
-+ * processor objects from Linux cpu id logic. For example, even
-+ * when Linux is configured as UP, we still want to parse all ACPI
-+ * processor objects to xen. In this case, it's preferred
-+ * to use ACPI ID instead.
-+ */
-+ pdomain = &pr->performance->domain_info;
-+ pdomain->num_processors = 0;
-+ ret = acpi_processor_get_psd(pr);
-+ if (ret < 0) {
-+ /*
-+ * _PSD is optional - assume no coordination if absent (or
-+ * broken), matching native kernels' behavior.
-+ */
-+ pdomain->num_entries = ACPI_PSD_REV0_ENTRIES;
-+ pdomain->revision = ACPI_PSD_REV0_REVISION;
-+ pdomain->domain = pr->acpi_id;
-+ pdomain->coord_type = DOMAIN_COORD_TYPE_SW_ALL;
-+ pdomain->num_processors = 1;
-+ }
-+
-+ /* Some sanity check */
-+ if ((pdomain->revision != ACPI_PSD_REV0_REVISION) ||
-+ (pdomain->num_entries != ACPI_PSD_REV0_ENTRIES) ||
-+ ((pdomain->coord_type != DOMAIN_COORD_TYPE_SW_ALL) &&
-+ (pdomain->coord_type != DOMAIN_COORD_TYPE_SW_ANY) &&
-+ (pdomain->coord_type != DOMAIN_COORD_TYPE_HW_ALL))) {
-+ ret = -EINVAL;
-+ goto err_out;
-+ }
-+
-+ /* Last step is to notify BIOS that xen exists */
-+ xen_processor_notify_smm();
-+
-+ processor_cntl_xen_notify(pr, PROCESSOR_PM_INIT, PM_TYPE_PERF);
-+
-+ return 0;
-+err_out:
-+ pr->performance = NULL;
-+ kfree(perf);
-+ return ret;
-+}
-+#endif /* CONFIG_CPU_FREQ */
-+
-+/* init and exit */
-+
-+int xen_acpi_processor_init(void)
-+{
-+ return acpi_bus_register_driver(&xen_acpi_processor_driver);
-+}
-+
-+void xen_acpi_processor_exit(void)
-+{
-+ acpi_bus_unregister_driver(&xen_acpi_processor_driver);
-+}
-diff --git a/drivers/acpi/sleep.c b/drivers/acpi/sleep.c
-index 0458094..85a1308 100644
---- a/drivers/acpi/sleep.c
-+++ b/drivers/acpi/sleep.c
-@@ -19,6 +19,8 @@
-
- #include <asm/io.h>
-
-+#include <xen/acpi.h>
-+
- #include <acpi/acpi_bus.h>
- #include <acpi/acpi_drivers.h>
-
-@@ -200,6 +202,21 @@ static int acpi_suspend_begin(suspend_state_t pm_state)
- return error;
- }
-
-+static void do_suspend(void)
-+{
-+ if (!xen_pv_acpi()) {
-+ do_suspend_lowlevel();
-+ return;
-+ }
-+
-+ /*
-+ * Xen will save and restore CPU context, so
-+ * we can skip that and just go straight to
-+ * the suspend.
-+ */
-+ acpi_enter_sleep_state(ACPI_STATE_S3);
-+}
-+
- /**
- * acpi_suspend_enter - Actually enter a sleep state.
- * @pm_state: ignored
-@@ -233,7 +250,7 @@ static int acpi_suspend_enter(suspend_state_t pm_state)
- break;
-
- case ACPI_STATE_S3:
-- do_suspend_lowlevel();
-+ do_suspend();
- break;
- }
-
-diff --git a/drivers/block/Kconfig b/drivers/block/Kconfig
-index 1d886e0..f4a2b10 100644
---- a/drivers/block/Kconfig
-+++ b/drivers/block/Kconfig
-@@ -462,6 +462,7 @@ config XEN_BLKDEV_FRONTEND
- tristate "Xen virtual block device support"
- depends on XEN
- default y
-+ select XEN_XENBUS_FRONTEND
- help
- This driver implements the front-end of the Xen virtual
- block device driver. It communicates with a back-end driver
-diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
-index b8578bb..9679ffa 100644
---- a/drivers/block/xen-blkfront.c
-+++ b/drivers/block/xen-blkfront.c
-@@ -42,10 +42,12 @@
- #include <linux/module.h>
- #include <linux/scatterlist.h>
-
-+#include <xen/xen.h>
- #include <xen/xenbus.h>
- #include <xen/grant_table.h>
- #include <xen/events.h>
- #include <xen/page.h>
-+#include <xen/platform_pci.h>
-
- #include <xen/interface/grant_table.h>
- #include <xen/interface/io/blkif.h>
-@@ -67,7 +69,7 @@ struct blk_shadow {
-
- static const struct block_device_operations xlvbd_block_fops;
-
--#define BLK_RING_SIZE __RING_SIZE((struct blkif_sring *)0, PAGE_SIZE)
-+#define BLK_RING_SIZE __CONST_RING_SIZE(blkif, PAGE_SIZE)
-
- /*
- * We have one of these per vbd, whether ide, scsi or 'other'. They
-@@ -76,6 +78,7 @@ static const struct block_device_operations xlvbd_block_fops;
- */
- struct blkfront_info
- {
-+ struct mutex mutex;
- struct xenbus_device *xbdev;
- struct gendisk *gd;
- int vdevice;
-@@ -85,6 +88,7 @@ struct blkfront_info
- struct blkif_front_ring ring;
- struct scatterlist sg[BLKIF_MAX_SEGMENTS_PER_REQUEST];
- unsigned int evtchn, irq;
-+ struct tasklet_struct tasklet;
- struct request_queue *rq;
- struct work_struct work;
- struct gnttab_free_callback callback;
-@@ -93,14 +97,12 @@ struct blkfront_info
- int feature_barrier;
- int is_ready;
-
-- /**
-- * The number of people holding this device open. We won't allow a
-- * hot-unplug unless this is 0.
-- */
-- int users;
-+ spinlock_t io_lock;
- };
-
--static DEFINE_SPINLOCK(blkif_io_lock);
-+static unsigned int nr_minors;
-+static unsigned long *minors;
-+static DEFINE_SPINLOCK(minor_lock);
-
- #define MAXIMUM_OUTSTANDING_BLOCK_REQS \
- (BLKIF_MAX_SEGMENTS_PER_REQUEST * BLK_RING_SIZE)
-@@ -116,6 +118,10 @@ static DEFINE_SPINLOCK(blkif_io_lock);
- #define EXTENDED (1<<EXT_SHIFT)
- #define VDEV_IS_EXTENDED(dev) ((dev)&(EXTENDED))
- #define BLKIF_MINOR_EXT(dev) ((dev)&(~EXTENDED))
-+#define EMULATED_HD_DISK_MINOR_OFFSET (0)
-+#define EMULATED_HD_DISK_NAME_OFFSET (EMULATED_HD_DISK_MINOR_OFFSET / 256)
-+#define EMULATED_SD_DISK_MINOR_OFFSET (EMULATED_HD_DISK_MINOR_OFFSET + (4 * 16))
-+#define EMULATED_SD_DISK_NAME_OFFSET (EMULATED_HD_DISK_NAME_OFFSET + 4)
-
- #define DEV_NAME "xvd" /* name in /dev */
-
-@@ -136,6 +142,55 @@ static void add_id_to_freelist(struct blkfront_info *info,
- info->shadow_free = id;
- }
-
-+static int xlbd_reserve_minors(unsigned int minor, unsigned int nr)
-+{
-+ unsigned int end = minor + nr;
-+ int rc;
-+
-+ if (end > nr_minors) {
-+ unsigned long *bitmap, *old;
-+
-+ bitmap = kzalloc(BITS_TO_LONGS(end) * sizeof(*bitmap),
-+ GFP_KERNEL);
-+ if (bitmap == NULL)
-+ return -ENOMEM;
-+
-+ spin_lock(&minor_lock);
-+ if (end > nr_minors) {
-+ old = minors;
-+ memcpy(bitmap, minors,
-+ BITS_TO_LONGS(nr_minors) * sizeof(*bitmap));
-+ minors = bitmap;
-+ nr_minors = BITS_TO_LONGS(end) * BITS_PER_LONG;
-+ } else
-+ old = bitmap;
-+ spin_unlock(&minor_lock);
-+ kfree(old);
-+ }
-+
-+ spin_lock(&minor_lock);
-+ if (find_next_bit(minors, end, minor) >= end) {
-+ for (; minor < end; ++minor)
-+ __set_bit(minor, minors);
-+ rc = 0;
-+ } else
-+ rc = -EBUSY;
-+ spin_unlock(&minor_lock);
-+
-+ return rc;
-+}
-+
-+static void xlbd_release_minors(unsigned int minor, unsigned int nr)
-+{
-+ unsigned int end = minor + nr;
-+
-+ BUG_ON(end > nr_minors);
-+ spin_lock(&minor_lock);
-+ for (; minor < end; ++minor)
-+ __clear_bit(minor, minors);
-+ spin_unlock(&minor_lock);
-+}
-+
- static void blkif_restart_queue_callback(void *arg)
- {
- struct blkfront_info *info = (struct blkfront_info *)arg;
-@@ -333,11 +388,12 @@ wait:
- flush_requests(info);
- }
-
--static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size)
-+static int xlvbd_init_blk_queue(struct blkfront_info *info,
-+ struct gendisk *gd, u16 sector_size)
- {
- struct request_queue *rq;
-
-- rq = blk_init_queue(do_blkif_request, &blkif_io_lock);
-+ rq = blk_init_queue(do_blkif_request, &info->io_lock);
- if (rq == NULL)
- return -1;
-
-@@ -370,20 +426,84 @@ static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size)
- static int xlvbd_barrier(struct blkfront_info *info)
- {
- int err;
-+ const char *barrier;
-+
-+ switch (info->feature_barrier) {
-+ case QUEUE_ORDERED_DRAIN: barrier = "enabled (drain)"; break;
-+ case QUEUE_ORDERED_TAG: barrier = "enabled (tag)"; break;
-+ case QUEUE_ORDERED_NONE: barrier = "disabled"; break;
-+ default: return -EINVAL;
-+ }
-
-- err = blk_queue_ordered(info->rq,
-- info->feature_barrier ? QUEUE_ORDERED_DRAIN : QUEUE_ORDERED_NONE,
-- NULL);
-+ err = blk_queue_ordered(info->rq, info->feature_barrier, NULL);
-
- if (err)
- return err;
-
- printk(KERN_INFO "blkfront: %s: barriers %s\n",
-- info->gd->disk_name,
-- info->feature_barrier ? "enabled" : "disabled");
-+ info->gd->disk_name, barrier);
- return 0;
- }
-
-+static int xen_translate_vdev(int vdevice, int *minor, unsigned int *offset)
-+{
-+ int major;
-+ major = BLKIF_MAJOR(vdevice);
-+ *minor = BLKIF_MINOR(vdevice);
-+ switch (major) {
-+ case XEN_IDE0_MAJOR:
-+ *offset = (*minor / 64) + EMULATED_HD_DISK_NAME_OFFSET;
-+ *minor = ((*minor / 64) * PARTS_PER_DISK) +
-+ EMULATED_HD_DISK_MINOR_OFFSET;
-+ break;
-+ case XEN_IDE1_MAJOR:
-+ *offset = (*minor / 64) + 2 + EMULATED_HD_DISK_NAME_OFFSET;
-+ *minor = (((*minor / 64) + 2) * PARTS_PER_DISK) +
-+ EMULATED_HD_DISK_MINOR_OFFSET;
-+ break;
-+ case XEN_SCSI_DISK0_MAJOR:
-+ *offset = (*minor / PARTS_PER_DISK) + EMULATED_SD_DISK_NAME_OFFSET;
-+ *minor = *minor + EMULATED_SD_DISK_MINOR_OFFSET;
-+ break;
-+ case XEN_SCSI_DISK1_MAJOR:
-+ case XEN_SCSI_DISK2_MAJOR:
-+ case XEN_SCSI_DISK3_MAJOR:
-+ case XEN_SCSI_DISK4_MAJOR:
-+ case XEN_SCSI_DISK5_MAJOR:
-+ case XEN_SCSI_DISK6_MAJOR:
-+ case XEN_SCSI_DISK7_MAJOR:
-+ *offset = (*minor / PARTS_PER_DISK) +
-+ ((major - XEN_SCSI_DISK1_MAJOR + 1) * 16) +
-+ EMULATED_SD_DISK_NAME_OFFSET;
-+ *minor = *minor +
-+ ((major - XEN_SCSI_DISK1_MAJOR + 1) * 16 * PARTS_PER_DISK) +
-+ EMULATED_SD_DISK_MINOR_OFFSET;
-+ break;
-+ case XEN_SCSI_DISK8_MAJOR:
-+ case XEN_SCSI_DISK9_MAJOR:
-+ case XEN_SCSI_DISK10_MAJOR:
-+ case XEN_SCSI_DISK11_MAJOR:
-+ case XEN_SCSI_DISK12_MAJOR:
-+ case XEN_SCSI_DISK13_MAJOR:
-+ case XEN_SCSI_DISK14_MAJOR:
-+ case XEN_SCSI_DISK15_MAJOR:
-+ *offset = (*minor / PARTS_PER_DISK) +
-+ ((major - XEN_SCSI_DISK8_MAJOR + 8) * 16) +
-+ EMULATED_SD_DISK_NAME_OFFSET;
-+ *minor = *minor +
-+ ((major - XEN_SCSI_DISK8_MAJOR + 8) * 16 * PARTS_PER_DISK) +
-+ EMULATED_SD_DISK_MINOR_OFFSET;
-+ break;
-+ case XENVBD_MAJOR:
-+ *offset = *minor / PARTS_PER_DISK;
-+ break;
-+ default:
-+ printk(KERN_WARNING "blkfront: your disk configuration is "
-+ "incorrect, please use an xvd device instead\n");
-+ return -ENODEV;
-+ }
-+ return 0;
-+}
-
- static int xlvbd_alloc_gendisk(blkif_sector_t capacity,
- struct blkfront_info *info,
-@@ -391,7 +511,7 @@ static int xlvbd_alloc_gendisk(blkif_sector_t capacity,
- {
- struct gendisk *gd;
- int nr_minors = 1;
-- int err = -ENODEV;
-+ int err;
- unsigned int offset;
- int minor;
- int nr_parts;
-@@ -406,21 +526,33 @@ static int xlvbd_alloc_gendisk(blkif_sector_t capacity,
- }
-
- if (!VDEV_IS_EXTENDED(info->vdevice)) {
-- minor = BLKIF_MINOR(info->vdevice);
-- nr_parts = PARTS_PER_DISK;
-+ err = xen_translate_vdev(info->vdevice, &minor, &offset);
-+ if (err)
-+ return err;
-+ nr_parts = PARTS_PER_DISK;
- } else {
- minor = BLKIF_MINOR_EXT(info->vdevice);
- nr_parts = PARTS_PER_EXT_DISK;
-+ offset = minor / nr_parts;
-+ if (xen_hvm_domain() && minor >= EMULATED_HD_DISK_MINOR_OFFSET) {
-+ printk(KERN_WARNING "blkfront: vdevice 0x%x might conflict with "
-+ "emulated IDE and SCSI disks; ignoring", info->vdevice);
-+ return -ENODEV;
-+ }
- }
-+ err = -ENODEV;
-
- if ((minor % nr_parts) == 0)
- nr_minors = nr_parts;
-
-- gd = alloc_disk(nr_minors);
-- if (gd == NULL)
-+ err = xlbd_reserve_minors(minor, nr_minors);
-+ if (err)
- goto out;
-+ err = -ENODEV;
-
-- offset = minor / nr_parts;
-+ gd = alloc_disk(nr_minors);
-+ if (gd == NULL)
-+ goto release;
-
- if (nr_minors > 1) {
- if (offset < 26)
-@@ -447,16 +579,15 @@ static int xlvbd_alloc_gendisk(blkif_sector_t capacity,
- gd->driverfs_dev = &(info->xbdev->dev);
- set_capacity(gd, capacity);
-
-- if (xlvbd_init_blk_queue(gd, sector_size)) {
-+ if (xlvbd_init_blk_queue(info, gd, sector_size)) {
- del_gendisk(gd);
-- goto out;
-+ goto release;
- }
-
- info->rq = gd->queue;
- info->gd = gd;
-
-- if (info->feature_barrier)
-- xlvbd_barrier(info);
-+ xlvbd_barrier(info);
-
- if (vdisk_info & VDISK_READONLY)
- set_disk_ro(gd, 1);
-@@ -469,10 +600,45 @@ static int xlvbd_alloc_gendisk(blkif_sector_t capacity,
-
- return 0;
-
-+ release:
-+ xlbd_release_minors(minor, nr_minors);
- out:
- return err;
- }
-
-+static void xlvbd_release_gendisk(struct blkfront_info *info)
-+{
-+ unsigned int minor, nr_minors;
-+ unsigned long flags;
-+
-+ if (info->rq == NULL)
-+ return;
-+
-+ spin_lock_irqsave(&info->io_lock, flags);
-+
-+ /* No more blkif_request(). */
-+ blk_stop_queue(info->rq);
-+
-+ /* No more gnttab callback work. */
-+ gnttab_cancel_free_callback(&info->callback);
-+ spin_unlock_irqrestore(&info->io_lock, flags);
-+
-+ /* Flush gnttab callback work. Must be done with no locks held. */
-+ flush_scheduled_work();
-+
-+ del_gendisk(info->gd);
-+
-+ minor = info->gd->first_minor;
-+ nr_minors = info->gd->minors;
-+ xlbd_release_minors(minor, nr_minors);
-+
-+ blk_cleanup_queue(info->rq);
-+ info->rq = NULL;
-+
-+ put_disk(info->gd);
-+ info->gd = NULL;
-+}
-+
- static void kick_pending_request_queues(struct blkfront_info *info)
- {
- if (!RING_FULL(&info->ring)) {
-@@ -487,16 +653,16 @@ static void blkif_restart_queue(struct work_struct *work)
- {
- struct blkfront_info *info = container_of(work, struct blkfront_info, work);
-
-- spin_lock_irq(&blkif_io_lock);
-+ spin_lock_irq(&info->io_lock);
- if (info->connected == BLKIF_STATE_CONNECTED)
- kick_pending_request_queues(info);
-- spin_unlock_irq(&blkif_io_lock);
-+ spin_unlock_irq(&info->io_lock);
- }
-
- static void blkif_free(struct blkfront_info *info, int suspend)
- {
- /* Prevent new requests being issued until we fix things up. */
-- spin_lock_irq(&blkif_io_lock);
-+ spin_lock_irq(&info->io_lock);
- info->connected = suspend ?
- BLKIF_STATE_SUSPENDED : BLKIF_STATE_DISCONNECTED;
- /* No more blkif_request(). */
-@@ -504,7 +670,7 @@ static void blkif_free(struct blkfront_info *info, int suspend)
- blk_stop_queue(info->rq);
- /* No more gnttab callback work. */
- gnttab_cancel_free_callback(&info->callback);
-- spin_unlock_irq(&blkif_io_lock);
-+ spin_unlock_irq(&info->io_lock);
-
- /* Flush gnttab callback work. Must be done with no locks held. */
- flush_scheduled_work();
-@@ -529,21 +695,20 @@ static void blkif_completion(struct blk_shadow *s)
- gnttab_end_foreign_access(s->req.seg[i].gref, 0, 0UL);
- }
-
--static irqreturn_t blkif_interrupt(int irq, void *dev_id)
-+static void
-+blkif_do_interrupt(unsigned long data)
- {
-+ struct blkfront_info *info = (struct blkfront_info *)data;
- struct request *req;
- struct blkif_response *bret;
- RING_IDX i, rp;
- unsigned long flags;
-- struct blkfront_info *info = (struct blkfront_info *)dev_id;
- int error;
-
-- spin_lock_irqsave(&blkif_io_lock, flags);
-+ spin_lock_irqsave(&info->io_lock, flags);
-
-- if (unlikely(info->connected != BLKIF_STATE_CONNECTED)) {
-- spin_unlock_irqrestore(&blkif_io_lock, flags);
-- return IRQ_HANDLED;
-- }
-+ if (unlikely(info->connected != BLKIF_STATE_CONNECTED))
-+ goto out;
-
- again:
- rp = info->ring.sring->rsp_prod;
-@@ -567,7 +732,7 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id)
- printk(KERN_WARNING "blkfront: %s: write barrier op failed\n",
- info->gd->disk_name);
- error = -EOPNOTSUPP;
-- info->feature_barrier = 0;
-+ info->feature_barrier = QUEUE_ORDERED_NONE;
- xlvbd_barrier(info);
- }
- /* fall through */
-@@ -596,7 +761,17 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id)
-
- kick_pending_request_queues(info);
-
-- spin_unlock_irqrestore(&blkif_io_lock, flags);
-+out:
-+ spin_unlock_irqrestore(&info->io_lock, flags);
-+}
-+
-+
-+static irqreturn_t
-+blkif_interrupt(int irq, void *dev_id)
-+{
-+ struct blkfront_info *info = (struct blkfront_info *)dev_id;
-+
-+ tasklet_schedule(&info->tasklet);
-
- return IRQ_HANDLED;
- }
-@@ -650,7 +825,7 @@ fail:
-
-
- /* Common code used when first setting up, and when resuming. */
--static int talk_to_backend(struct xenbus_device *dev,
-+static int talk_to_blkback(struct xenbus_device *dev,
- struct blkfront_info *info)
- {
- const char *message = NULL;
-@@ -710,7 +885,6 @@ again:
- return err;
- }
-
--
- /**
- * Entry point to this code when a new device is created. Allocate the basic
- * structures and the ring buffer for communication with the backend, and
-@@ -736,16 +910,48 @@ static int blkfront_probe(struct xenbus_device *dev,
- }
- }
-
-+ if (xen_hvm_domain()) {
-+ char *type;
-+ int len;
-+ /* no unplug has been done: do not hook devices != xen vbds */
-+ if (xen_platform_pci_unplug & XEN_UNPLUG_UNNECESSARY) {
-+ int major;
-+
-+ if (!VDEV_IS_EXTENDED(vdevice))
-+ major = BLKIF_MAJOR(vdevice);
-+ else
-+ major = XENVBD_MAJOR;
-+
-+ if (major != XENVBD_MAJOR) {
-+ printk(KERN_INFO
-+ "%s: HVM does not support vbd %d as xen block device\n",
-+ __FUNCTION__, vdevice);
-+ return -ENODEV;
-+ }
-+ }
-+ /* do not create a PV cdrom device if we are an HVM guest */
-+ type = xenbus_read(XBT_NIL, dev->nodename, "device-type", &len);
-+ if (IS_ERR(type))
-+ return -ENODEV;
-+ if (strncmp(type, "cdrom", 5) == 0) {
-+ kfree(type);
-+ return -ENODEV;
-+ }
-+ kfree(type);
-+ }
- info = kzalloc(sizeof(*info), GFP_KERNEL);
- if (!info) {
- xenbus_dev_fatal(dev, -ENOMEM, "allocating info structure");
- return -ENOMEM;
- }
-
-+ mutex_init(&info->mutex);
- info->xbdev = dev;
- info->vdevice = vdevice;
- info->connected = BLKIF_STATE_DISCONNECTED;
- INIT_WORK(&info->work, blkif_restart_queue);
-+ spin_lock_init(&info->io_lock);
-+ tasklet_init(&info->tasklet, blkif_do_interrupt, (unsigned long)info);
-
- for (i = 0; i < BLK_RING_SIZE; i++)
- info->shadow[i].req.id = i+1;
-@@ -755,7 +961,7 @@ static int blkfront_probe(struct xenbus_device *dev,
- info->handle = simple_strtoul(strrchr(dev->nodename, '/')+1, NULL, 0);
- dev_set_drvdata(&dev->dev, info);
-
-- err = talk_to_backend(dev, info);
-+ err = talk_to_blkback(dev, info);
- if (err) {
- kfree(info);
- dev_set_drvdata(&dev->dev, NULL);
-@@ -819,7 +1025,7 @@ static int blkif_recover(struct blkfront_info *info)
-
- xenbus_switch_state(info->xbdev, XenbusStateConnected);
-
-- spin_lock_irq(&blkif_io_lock);
-+ spin_lock_irq(&info->io_lock);
-
- /* Now safe for us to use the shared ring */
- info->connected = BLKIF_STATE_CONNECTED;
-@@ -830,7 +1036,7 @@ static int blkif_recover(struct blkfront_info *info)
- /* Kick any other new requests queued since we resumed */
- kick_pending_request_queues(info);
-
-- spin_unlock_irq(&blkif_io_lock);
-+ spin_unlock_irq(&info->io_lock);
-
- return 0;
- }
-@@ -850,13 +1056,50 @@ static int blkfront_resume(struct xenbus_device *dev)
-
- blkif_free(info, info->connected == BLKIF_STATE_CONNECTED);
-
-- err = talk_to_backend(dev, info);
-+ err = talk_to_blkback(dev, info);
- if (info->connected == BLKIF_STATE_SUSPENDED && !err)
- err = blkif_recover(info);
-
- return err;
- }
-
-+static void
-+blkfront_closing(struct blkfront_info *info)
-+{
-+ struct xenbus_device *xbdev = info->xbdev;
-+ struct block_device *bdev = NULL;
-+
-+ mutex_lock(&info->mutex);
-+
-+ if (xbdev->state == XenbusStateClosing) {
-+ mutex_unlock(&info->mutex);
-+ return;
-+ }
-+
-+ if (info->gd)
-+ bdev = bdget_disk(info->gd, 0);
-+
-+ mutex_unlock(&info->mutex);
-+
-+ if (!bdev) {
-+ xenbus_frontend_closed(xbdev);
-+ return;
-+ }
-+
-+ mutex_lock(&bdev->bd_mutex);
-+
-+ if (bdev->bd_openers) {
-+ xenbus_dev_error(xbdev, -EBUSY,
-+ "Device in use; refusing to close");
-+ xenbus_switch_state(xbdev, XenbusStateClosing);
-+ } else {
-+ xlvbd_release_gendisk(info);
-+ xenbus_frontend_closed(xbdev);
-+ }
-+
-+ mutex_unlock(&bdev->bd_mutex);
-+ bdput(bdev);
-+}
-
- /*
- * Invoked when the backend is finally 'ready' (and has told produced
-@@ -868,11 +1111,31 @@ static void blkfront_connect(struct blkfront_info *info)
- unsigned long sector_size;
- unsigned int binfo;
- int err;
--
-- if ((info->connected == BLKIF_STATE_CONNECTED) ||
-- (info->connected == BLKIF_STATE_SUSPENDED) )
-+ int barrier;
-+
-+ switch (info->connected) {
-+ case BLKIF_STATE_CONNECTED:
-+ /*
-+ * Potentially, the back-end may be signalling
-+ * a capacity change; update the capacity.
-+ */
-+ err = xenbus_scanf(XBT_NIL, info->xbdev->otherend,
-+ "sectors", "%Lu", &sectors);
-+ if (XENBUS_EXIST_ERR(err))
-+ return;
-+ printk(KERN_INFO "Setting capacity to %Lu\n",
-+ sectors);
-+ set_capacity(info->gd, sectors);
-+ revalidate_disk(info->gd);
-+
-+ /* fall through */
-+ case BLKIF_STATE_SUSPENDED:
- return;
-
-+ default:
-+ break;
-+ }
-+
- dev_dbg(&info->xbdev->dev, "%s:%s.\n",
- __func__, info->xbdev->otherend);
-
-@@ -889,10 +1152,26 @@ static void blkfront_connect(struct blkfront_info *info)
- }
-
- err = xenbus_gather(XBT_NIL, info->xbdev->otherend,
-- "feature-barrier", "%lu", &info->feature_barrier,
-+ "feature-barrier", "%lu", &barrier,
- NULL);
-+
-+ /*
-+ * If there's no "feature-barrier" defined, then it means
-+ * we're dealing with a very old backend which writes
-+ * synchronously; draining will do what needs to get done.
-+ *
-+ * If there are barriers, then we can do full queued writes
-+ * with tagged barriers.
-+ *
-+ * If barriers are not supported, then there's no much we can
-+ * do, so just set ordering to NONE.
-+ */
- if (err)
-- info->feature_barrier = 0;
-+ info->feature_barrier = QUEUE_ORDERED_DRAIN;
-+ else if (barrier)
-+ info->feature_barrier = QUEUE_ORDERED_TAG;
-+ else
-+ info->feature_barrier = QUEUE_ORDERED_NONE;
-
- err = xlvbd_alloc_gendisk(sectors, info, binfo, sector_size);
- if (err) {
-@@ -904,10 +1183,10 @@ static void blkfront_connect(struct blkfront_info *info)
- xenbus_switch_state(info->xbdev, XenbusStateConnected);
-
- /* Kick pending requests. */
-- spin_lock_irq(&blkif_io_lock);
-+ spin_lock_irq(&info->io_lock);
- info->connected = BLKIF_STATE_CONNECTED;
- kick_pending_request_queues(info);
-- spin_unlock_irq(&blkif_io_lock);
-+ spin_unlock_irq(&info->io_lock);
-
- add_disk(info->gd);
-
-@@ -915,57 +1194,21 @@ static void blkfront_connect(struct blkfront_info *info)
- }
-
- /**
-- * Handle the change of state of the backend to Closing. We must delete our
-- * device-layer structures now, to ensure that writes are flushed through to
-- * the backend. Once is this done, we can switch to Closed in
-- * acknowledgement.
-- */
--static void blkfront_closing(struct xenbus_device *dev)
--{
-- struct blkfront_info *info = dev_get_drvdata(&dev->dev);
-- unsigned long flags;
--
-- dev_dbg(&dev->dev, "blkfront_closing: %s removed\n", dev->nodename);
--
-- if (info->rq == NULL)
-- goto out;
--
-- spin_lock_irqsave(&blkif_io_lock, flags);
--
-- /* No more blkif_request(). */
-- blk_stop_queue(info->rq);
--
-- /* No more gnttab callback work. */
-- gnttab_cancel_free_callback(&info->callback);
-- spin_unlock_irqrestore(&blkif_io_lock, flags);
--
-- /* Flush gnttab callback work. Must be done with no locks held. */
-- flush_scheduled_work();
--
-- blk_cleanup_queue(info->rq);
-- info->rq = NULL;
--
-- del_gendisk(info->gd);
--
-- out:
-- xenbus_frontend_closed(dev);
--}
--
--/**
- * Callback received when the backend's state changes.
- */
--static void backend_changed(struct xenbus_device *dev,
-+static void blkback_changed(struct xenbus_device *dev,
- enum xenbus_state backend_state)
- {
- struct blkfront_info *info = dev_get_drvdata(&dev->dev);
-- struct block_device *bd;
-
-- dev_dbg(&dev->dev, "blkfront:backend_changed.\n");
-+ dev_dbg(&dev->dev, "blkfront:blkback_changed to state %d.\n", backend_state);
-
- switch (backend_state) {
- case XenbusStateInitialising:
- case XenbusStateInitWait:
- case XenbusStateInitialised:
-+ case XenbusStateReconfiguring:
-+ case XenbusStateReconfigured:
- case XenbusStateUnknown:
- case XenbusStateClosed:
- break;
-@@ -975,35 +1218,56 @@ static void backend_changed(struct xenbus_device *dev,
- break;
-
- case XenbusStateClosing:
-- if (info->gd == NULL) {
-- xenbus_frontend_closed(dev);
-- break;
-- }
-- bd = bdget_disk(info->gd, 0);
-- if (bd == NULL)
-- xenbus_dev_fatal(dev, -ENODEV, "bdget failed");
--
-- mutex_lock(&bd->bd_mutex);
-- if (info->users > 0)
-- xenbus_dev_error(dev, -EBUSY,
-- "Device in use; refusing to close");
-- else
-- blkfront_closing(dev);
-- mutex_unlock(&bd->bd_mutex);
-- bdput(bd);
-+ blkfront_closing(info);
- break;
- }
- }
-
--static int blkfront_remove(struct xenbus_device *dev)
-+static int blkfront_remove(struct xenbus_device *xbdev)
- {
-- struct blkfront_info *info = dev_get_drvdata(&dev->dev);
-+ struct blkfront_info *info = dev_get_drvdata(&xbdev->dev);
-+ struct block_device *bdev = NULL;
-+ struct gendisk *disk;
-
-- dev_dbg(&dev->dev, "blkfront_remove: %s removed\n", dev->nodename);
-+ dev_dbg(&xbdev->dev, "%s removed", xbdev->nodename);
-
- blkif_free(info, 0);
-
-- kfree(info);
-+ mutex_lock(&info->mutex);
-+
-+ disk = info->gd;
-+ if (disk)
-+ bdev = bdget_disk(disk, 0);
-+
-+ info->xbdev = NULL;
-+ mutex_unlock(&info->mutex);
-+
-+ if (!bdev) {
-+ kfree(info);
-+ return 0;
-+ }
-+
-+ /*
-+ * The xbdev was removed before we reached the Closed
-+ * state. See if it's safe to remove the disk. If the bdev
-+ * isn't closed yet, we let release take care of it.
-+ */
-+
-+ mutex_lock(&bdev->bd_mutex);
-+ info = disk->private_data;
-+
-+ dev_warn(disk_to_dev(disk),
-+ "%s was hot-unplugged, %d stale handles\n",
-+ xbdev->nodename, bdev->bd_openers);
-+
-+ if (info && !bdev->bd_openers) {
-+ xlvbd_release_gendisk(info);
-+ disk->private_data = NULL;
-+ kfree(info);
-+ }
-+
-+ mutex_unlock(&bdev->bd_mutex);
-+ bdput(bdev);
-
- return 0;
- }
-@@ -1012,30 +1276,68 @@ static int blkfront_is_ready(struct xenbus_device *dev)
- {
- struct blkfront_info *info = dev_get_drvdata(&dev->dev);
-
-- return info->is_ready;
-+ return info->is_ready && info->xbdev;
- }
-
- static int blkif_open(struct block_device *bdev, fmode_t mode)
- {
-- struct blkfront_info *info = bdev->bd_disk->private_data;
-- info->users++;
-- return 0;
-+ struct gendisk *disk = bdev->bd_disk;
-+ struct blkfront_info *info;
-+ int err = 0;
-+
-+ info = disk->private_data;
-+ if (!info)
-+ /* xbdev gone */
-+ return -ERESTARTSYS;
-+
-+ mutex_lock(&info->mutex);
-+
-+ if (!info->gd)
-+ /* xbdev is closed */
-+ err = -ERESTARTSYS;
-+
-+ mutex_unlock(&info->mutex);
-+
-+ return err;
- }
-
- static int blkif_release(struct gendisk *disk, fmode_t mode)
- {
- struct blkfront_info *info = disk->private_data;
-- info->users--;
-- if (info->users == 0) {
-- /* Check whether we have been instructed to close. We will
-- have ignored this request initially, as the device was
-- still mounted. */
-- struct xenbus_device *dev = info->xbdev;
-- enum xenbus_state state = xenbus_read_driver_state(dev->otherend);
--
-- if (state == XenbusStateClosing && info->is_ready)
-- blkfront_closing(dev);
-+ struct block_device *bdev;
-+ struct xenbus_device *xbdev;
-+
-+ bdev = bdget_disk(disk, 0);
-+ bdput(bdev);
-+
-+ if (bdev->bd_openers)
-+ return 0;
-+
-+ /*
-+ * Check if we have been instructed to close. We will have
-+ * deferred this request, because the bdev was still open.
-+ */
-+
-+ mutex_lock(&info->mutex);
-+ xbdev = info->xbdev;
-+
-+ if (xbdev && xbdev->state == XenbusStateClosing) {
-+ /* pending switch to state closed */
-+ dev_info(disk_to_dev(bdev->bd_disk), "releasing disk\n");
-+ xlvbd_release_gendisk(info);
-+ xenbus_frontend_closed(info->xbdev);
- }
-+
-+ mutex_unlock(&info->mutex);
-+
-+ if (!xbdev) {
-+ /* sudden device removal */
-+ dev_info(disk_to_dev(bdev->bd_disk), "releasing disk\n");
-+ xlvbd_release_gendisk(info);
-+ disk->private_data = NULL;
-+ kfree(info);
-+ }
-+
- return 0;
- }
-
-@@ -1061,7 +1363,7 @@ static struct xenbus_driver blkfront = {
- .probe = blkfront_probe,
- .remove = blkfront_remove,
- .resume = blkfront_resume,
-- .otherend_changed = backend_changed,
-+ .otherend_changed = blkback_changed,
- .is_ready = blkfront_is_ready,
- };
-
-diff --git a/drivers/char/agp/amd64-agp.c b/drivers/char/agp/amd64-agp.c
-index c496c8a..4064d95 100644
---- a/drivers/char/agp/amd64-agp.c
-+++ b/drivers/char/agp/amd64-agp.c
-@@ -18,6 +18,8 @@
- #include <asm/k8.h>
- #include <asm/gart.h>
- #include "agp.h"
-+#include <xen/page.h>
-+#include <asm/xen/page.h>
-
- /* NVIDIA K8 registers */
- #define NVIDIA_X86_64_0_APBASE 0x10
-@@ -78,8 +80,21 @@ static int amd64_insert_memory(struct agp_memory *mem, off_t pg_start, int type)
- }
-
- for (i = 0, j = pg_start; i < mem->page_count; i++, j++) {
-+ phys_addr_t phys = page_to_phys(mem->pages[i]);
-+ if (xen_pv_domain()) {
-+ phys_addr_t xen_phys = PFN_PHYS(pfn_to_mfn(
-+ page_to_pfn(mem->pages[i])));
-+ if (phys != xen_phys) {
-+ printk(KERN_ERR "Fixing up GART: (0x%lx->0x%lx)." \
-+ " CODE UNTESTED!\n",
-+ (unsigned long)phys,
-+ (unsigned long)xen_phys);
-+ WARN_ON_ONCE(phys != xen_phys);
-+ phys = xen_phys;
-+ }
-+ }
- tmp = agp_bridge->driver->mask_memory(agp_bridge,
-- page_to_phys(mem->pages[i]),
-+ phys,
- mask_type);
-
- BUG_ON(tmp & 0xffffff0000000ffcULL);
-@@ -181,6 +196,20 @@ static int amd_8151_configure(void)
- unsigned long gatt_bus = virt_to_phys(agp_bridge->gatt_table_real);
- int i;
-
-+ if (xen_pv_domain()) {
-+ phys_addr_t xen_phys = PFN_PHYS(pfn_to_mfn(
-+ virt_to_pfn(agp_bridge->gatt_table_real)));
-+ /* Future thoughts: Perhaps use the gatt_table_bus that
-+ * agp_generic_create_gatt_table has setup instead of
-+ * doing the virt_to_phys once more? */
-+ if (gatt_bus != xen_phys) {
-+ printk(KERN_ERR "Fixing up GATT: (0x%lx->0x%lx)." \
-+ " CODE UNTESTED!\n", gatt_bus,
-+ (unsigned long)xen_phys);
-+ WARN_ON_ONCE(gatt_bus != xen_phys);
-+ gatt_bus = xen_phys;
-+ }
-+ }
- /* Configure AGP regs in each x86-64 host bridge. */
- for (i = 0; i < num_k8_northbridges; i++) {
- agp_bridge->gart_bus_addr =
-diff --git a/drivers/char/agp/backend.c b/drivers/char/agp/backend.c
-index a56ca08..30fc4b6 100644
---- a/drivers/char/agp/backend.c
-+++ b/drivers/char/agp/backend.c
-@@ -38,6 +38,8 @@
- #include <linux/vmalloc.h>
- #include <asm/io.h>
- #include "agp.h"
-+#include <xen/page.h>
-+#include <asm/xen/page.h>
-
- /* Due to XFree86 brain-damage, we can't go to 1.0 until they
- * fix some real stupidity. It's only by chance we can bump
-@@ -160,8 +162,13 @@ static int agp_backend_initialize(struct agp_bridge_data *bridge)
- }
- } else {
- bridge->scratch_page_dma = page_to_phys(page);
-+ if (xen_pv_domain()) {
-+ phys_addr_t xen_phys = PFN_PHYS(pfn_to_mfn(
-+ page_to_pfn(page)));
-+ if (bridge->scratch_page_dma != xen_phys)
-+ bridge->scratch_page_dma = xen_phys;
-+ }
- }
--
- bridge->scratch_page = bridge->driver->mask_memory(bridge,
- bridge->scratch_page_dma, 0);
- }
-diff --git a/drivers/char/agp/generic.c b/drivers/char/agp/generic.c
-index c505439..2434c91 100644
---- a/drivers/char/agp/generic.c
-+++ b/drivers/char/agp/generic.c
-@@ -42,6 +42,8 @@
- #include <asm/cacheflush.h>
- #include <asm/pgtable.h>
- #include "agp.h"
-+#include <xen/page.h>
-+#include <asm/xen/page.h>
-
- __u32 *agp_gatt_table;
- int agp_memory_reserved;
-@@ -1002,6 +1004,14 @@ int agp_generic_create_gatt_table(struct agp_bridge_data *bridge)
- return -ENOMEM;
- }
- bridge->gatt_bus_addr = virt_to_phys(bridge->gatt_table_real);
-+ /* KRW: virt_to_phys under Xen is not safe. */
-+ if (xen_pv_domain()) {
-+ /* Use back-door to get the "real" PFN. */
-+ phys_addr_t pfn = virt_to_pfn(bridge->gatt_table_real);
-+ phys_addr_t xen_phys = PFN_PHYS(pfn_to_mfn(pfn));
-+ if (bridge->gatt_bus_addr != xen_phys)
-+ bridge->gatt_bus_addr = xen_phys;
-+ }
-
- /* AK: bogus, should encode addresses > 4GB */
- for (i = 0; i < num_entries; i++) {
-@@ -1141,8 +1151,17 @@ int agp_generic_insert_memory(struct agp_memory * mem, off_t pg_start, int type)
- }
-
- for (i = 0, j = pg_start; i < mem->page_count; i++, j++) {
-+ phys_addr_t phys = page_to_phys(mem->pages[i]);
-+
-+ /* HACK: Via a back-door we get the bus address. */
-+ if (xen_pv_domain()) {
-+ phys_addr_t xen_phys = PFN_PHYS(pfn_to_mfn(
-+ page_to_pfn(mem->pages[i])));
-+ if (phys != xen_phys)
-+ phys = xen_phys;
-+ }
- writel(bridge->driver->mask_memory(bridge,
-- page_to_phys(mem->pages[i]),
-+ phys,
- mask_type),
- bridge->gatt_table+j);
- }
-@@ -1235,7 +1254,16 @@ int agp_generic_alloc_pages(struct agp_bridge_data *bridge, struct agp_memory *m
- int i, ret = -ENOMEM;
-
- for (i = 0; i < num_pages; i++) {
-- page = alloc_page(GFP_KERNEL | GFP_DMA32 | __GFP_ZERO);
-+ if (xen_pv_domain()) {
-+ void *addr;
-+ dma_addr_t _d;
-+
-+ addr = dma_alloc_coherent(NULL, PAGE_SIZE, &_d, GFP_KERNEL);
-+ if (!addr)
-+ goto out;
-+ page = virt_to_page(addr);
-+ } else
-+ page = alloc_page(GFP_KERNEL | GFP_DMA32 | __GFP_ZERO);
- /* agp_free_memory() needs gart address */
- if (page == NULL)
- goto out;
-@@ -1263,7 +1291,17 @@ struct page *agp_generic_alloc_page(struct agp_bridge_data *bridge)
- {
- struct page * page;
-
-- page = alloc_page(GFP_KERNEL | GFP_DMA32 | __GFP_ZERO);
-+ if (xen_pv_domain()) {
-+ void *addr;
-+ dma_addr_t _d;
-+
-+ addr = dma_alloc_coherent(NULL, PAGE_SIZE, &_d, GFP_KERNEL);
-+ if (!addr)
-+ return NULL;
-+ page = virt_to_page(addr);
-+ } else
-+ page = alloc_page(GFP_KERNEL | GFP_DMA32 | __GFP_ZERO);
-+
- if (page == NULL)
- return NULL;
-
-@@ -1294,7 +1332,12 @@ void agp_generic_destroy_pages(struct agp_memory *mem)
- unmap_page_from_agp(page);
- #endif
- put_page(page);
-- __free_page(page);
-+ if (xen_pv_domain()) {
-+ void *addr = page_address(page);
-+ dma_free_coherent(NULL, PAGE_SIZE, addr,
-+ virt_to_bus(addr));
-+ } else
-+ __free_page(page);
- atomic_dec(&agp_bridge->current_memory_agp);
- mem->pages[i] = NULL;
- }
-@@ -1311,7 +1354,12 @@ void agp_generic_destroy_page(struct page *page, int flags)
-
- if (flags & AGP_PAGE_DESTROY_FREE) {
- put_page(page);
-- __free_page(page);
-+ if (xen_pv_domain()) {
-+ void *addr = page_address(page);
-+ dma_free_coherent(NULL, PAGE_SIZE, addr,
-+ virt_to_bus(addr));
-+ } else
-+ __free_page(page);
- atomic_dec(&agp_bridge->current_memory_agp);
- }
- }
-diff --git a/drivers/char/agp/intel-agp.c b/drivers/char/agp/intel-agp.c
-index b8e0219..7a62c3c 100644
---- a/drivers/char/agp/intel-agp.c
-+++ b/drivers/char/agp/intel-agp.c
-@@ -10,14 +10,20 @@
- #include <linux/agp_backend.h>
- #include <asm/smp.h>
- #include "agp.h"
-+#include <xen/page.h>
-+#include <asm/xen/page.h>
-
- /*
- * If we have Intel graphics, we're not going to have anything other than
- * an Intel IOMMU. So make the correct use of the PCI DMA API contingent
- * on the Intel IOMMU support (CONFIG_DMAR).
- * Only newer chipsets need to bother with this, of course.
-+ *
-+ * Xen guests accessing graphics hardware also need proper translation
-+ * between pseudo-physical addresses and real machine addresses, which
-+ * is also achieved by using the DMA API.
- */
--#ifdef CONFIG_DMAR
-+#if defined(CONFIG_DMAR) || defined(CONFIG_XEN)
- #define USE_PCI_DMA_API 1
- #endif
-
-@@ -296,8 +302,20 @@ static void intel_agp_insert_sg_entries(struct agp_memory *mem,
- int i, j;
-
- for (i = 0, j = pg_start; i < mem->page_count; i++, j++) {
-+ phys_addr_t phys = page_to_phys(mem->pages[i]);
-+ if (xen_pv_domain()) {
-+ phys_addr_t xen_phys = PFN_PHYS(pfn_to_mfn(
-+ page_to_pfn(mem->pages[i])));
-+ if (xen_phys != phys) {
-+ printk(KERN_ERR "Compile kernel with " \
-+ "CONFIG_DMAR to get rid of this " \
-+ "warning!\n");
-+ WARN_ON_ONCE(xen_phys != phys);
-+ /* Fixup: */
-+ phys = xen_phys;
-+ }
- writel(agp_bridge->driver->mask_memory(agp_bridge,
-- page_to_phys(mem->pages[i]), mask_type),
-+ phys, mask_type),
- intel_private.gtt+j);
- }
-
-@@ -395,15 +413,19 @@ static void intel_i810_agp_enable(struct agp_bridge_data *bridge, u32 mode)
- /* Exists to support ARGB cursors */
- static struct page *i8xx_alloc_pages(void)
- {
-+ void *addr;
-+ dma_addr_t _d;
- struct page *page;
-
-- page = alloc_pages(GFP_KERNEL | GFP_DMA32, 2);
-- if (page == NULL)
-+ addr = dma_alloc_coherent(NULL, 4 * PAGE_SIZE, &_d, GFP_KERNEL);
-+ if (addr == NULL)
- return NULL;
-
-+ page = virt_to_page(addr);
-+
- if (set_pages_uc(page, 4) < 0) {
- set_pages_wb(page, 4);
-- __free_pages(page, 2);
-+ dma_free_coherent(NULL, 4 * PAGE_SIZE, addr, _d);
- return NULL;
- }
- get_page(page);
-@@ -413,12 +435,17 @@ static struct page *i8xx_alloc_pages(void)
-
- static void i8xx_destroy_pages(struct page *page)
- {
-+ void *addr;
-+
- if (page == NULL)
- return;
-
- set_pages_wb(page, 4);
- put_page(page);
-- __free_pages(page, 2);
-+
-+ addr = page_address(page);
-+
-+ dma_free_coherent(NULL, 4 * PAGE_SIZE, addr, virt_to_bus(addr));
- atomic_dec(&agp_bridge->current_memory_agp);
- }
-
-@@ -478,8 +505,16 @@ static int intel_i810_insert_entries(struct agp_memory *mem, off_t pg_start,
- if (!mem->is_flushed)
- global_cache_flush();
- for (i = 0, j = pg_start; i < mem->page_count; i++, j++) {
-+ phys_addr_t phys = page_to_phys(mem->pages[i]);
-+ if (xen_pv_domain()) {
-+ phys_addr_t xen_phys = PFN_PHYS(pfn_to_mfn(
-+ page_to_pfn(mem->pages[i])));
-+ /* Fixup: */
-+ if (xen_phys != phys)
-+ phys = xen_phys;
-+ }
- writel(agp_bridge->driver->mask_memory(agp_bridge,
-- page_to_phys(mem->pages[i]), mask_type),
-+ phys, mask_type),
- intel_private.registers+I810_PTE_BASE+(j*4));
- }
- readl(intel_private.registers+I810_PTE_BASE+((j-1)*4));
-@@ -552,6 +587,12 @@ static struct agp_memory *alloc_agpphysmem_i8xx(size_t pg_count, int type)
- new->num_scratch_pages = pg_count;
- new->type = AGP_PHYS_MEMORY;
- new->physical = page_to_phys(new->pages[0]);
-+ if (xen_pv_domain()) {
-+ phys_addr_t xen_phys = PFN_PHYS(pfn_to_mfn(
-+ page_to_pfn(new->pages[0])));
-+ if (xen_phys != new->physical)
-+ new->physical = xen_phys;
-+ }
- return new;
- }
-
-@@ -992,8 +1033,16 @@ static int intel_i830_insert_entries(struct agp_memory *mem, off_t pg_start,
- global_cache_flush();
-
- for (i = 0, j = pg_start; i < mem->page_count; i++, j++) {
-+ phys_addr_t phys = page_to_phys(mem->pages[i]);
-+ if (xen_pv_domain()) {
-+ phys_addr_t xen_phys = PFN_PHYS(pfn_to_mfn(
-+ page_to_pfn(mem->pages[i])));
-+ /* Fixup: */
-+ if (xen_phys != phys)
-+ phys = xen_phys;
-+ }
- writel(agp_bridge->driver->mask_memory(agp_bridge,
-- page_to_phys(mem->pages[i]), mask_type),
-+ phys, mask_type),
- intel_private.registers+I810_PTE_BASE+(j*4));
- }
- readl(intel_private.registers+I810_PTE_BASE+((j-1)*4));
-diff --git a/drivers/char/hvc_xen.c b/drivers/char/hvc_xen.c
-index a6ee32b..a7c6529 100644
---- a/drivers/char/hvc_xen.c
-+++ b/drivers/char/hvc_xen.c
-@@ -25,6 +25,8 @@
- #include <linux/types.h>
-
- #include <asm/xen/hypervisor.h>
-+
-+#include <xen/xen.h>
- #include <xen/page.h>
- #include <xen/events.h>
- #include <xen/interface/io/console.h>
-@@ -72,11 +74,12 @@ static int __write_console(const char *data, int len)
- wmb(); /* write ring before updating pointer */
- intf->out_prod = prod;
-
-- notify_daemon();
-+ if (sent)
-+ notify_daemon();
- return sent;
- }
-
--static int write_console(uint32_t vtermno, const char *data, int len)
-+static int domU_write_console(uint32_t vtermno, const char *data, int len)
- {
- int ret = len;
-
-@@ -99,7 +102,7 @@ static int write_console(uint32_t vtermno, const char *data, int len)
- return ret;
- }
-
--static int read_console(uint32_t vtermno, char *buf, int len)
-+static int domU_read_console(uint32_t vtermno, char *buf, int len)
- {
- struct xencons_interface *intf = xencons_interface();
- XENCONS_RING_IDX cons, prod;
-@@ -120,28 +123,63 @@ static int read_console(uint32_t vtermno, char *buf, int len)
- return recv;
- }
-
--static struct hv_ops hvc_ops = {
-- .get_chars = read_console,
-- .put_chars = write_console,
-+static struct hv_ops domU_hvc_ops = {
-+ .get_chars = domU_read_console,
-+ .put_chars = domU_write_console,
- .notifier_add = notifier_add_irq,
- .notifier_del = notifier_del_irq,
- .notifier_hangup = notifier_hangup_irq,
- };
-
--static int __init xen_init(void)
-+static int dom0_read_console(uint32_t vtermno, char *buf, int len)
-+{
-+ return HYPERVISOR_console_io(CONSOLEIO_read, len, buf);
-+}
-+
-+/*
-+ * Either for a dom0 to write to the system console, or a domU with a
-+ * debug version of Xen
-+ */
-+static int dom0_write_console(uint32_t vtermno, const char *str, int len)
-+{
-+ int rc = HYPERVISOR_console_io(CONSOLEIO_write, len, (char *)str);
-+ if (rc < 0)
-+ return 0;
-+
-+ return len;
-+}
-+
-+static struct hv_ops dom0_hvc_ops = {
-+ .get_chars = dom0_read_console,
-+ .put_chars = dom0_write_console,
-+ .notifier_add = notifier_add_irq,
-+ .notifier_del = notifier_del_irq,
-+ .notifier_hangup = notifier_hangup_irq,
-+};
-+
-+static int __init xen_hvc_init(void)
- {
- struct hvc_struct *hp;
-+ struct hv_ops *ops;
-
-- if (!xen_pv_domain() ||
-- xen_initial_domain() ||
-- !xen_start_info->console.domU.evtchn)
-+ if (!xen_pv_domain())
- return -ENODEV;
-
-- xencons_irq = bind_evtchn_to_irq(xen_start_info->console.domU.evtchn);
-+ if (xen_initial_domain()) {
-+ ops = &dom0_hvc_ops;
-+ xencons_irq = bind_virq_to_irq(VIRQ_CONSOLE, 0);
-+ } else {
-+ if (!xen_start_info->console.domU.evtchn)
-+ return -ENODEV;
-+
-+ ops = &domU_hvc_ops;
-+ xencons_irq = bind_evtchn_to_irq(xen_start_info->console.domU.evtchn);
-+ }
-+
- if (xencons_irq < 0)
- xencons_irq = 0; /* NO_IRQ */
-
-- hp = hvc_alloc(HVC_COOKIE, xencons_irq, &hvc_ops, 256);
-+ hp = hvc_alloc(HVC_COOKIE, xencons_irq, ops, 256);
- if (IS_ERR(hp))
- return PTR_ERR(hp);
-
-@@ -158,7 +196,7 @@ void xen_console_resume(void)
- rebind_evtchn_irq(xen_start_info->console.domU.evtchn, xencons_irq);
- }
-
--static void __exit xen_fini(void)
-+static void __exit xen_hvc_fini(void)
- {
- if (hvc)
- hvc_remove(hvc);
-@@ -166,29 +204,24 @@ static void __exit xen_fini(void)
-
- static int xen_cons_init(void)
- {
-+ struct hv_ops *ops;
-+
- if (!xen_pv_domain())
- return 0;
-
-- hvc_instantiate(HVC_COOKIE, 0, &hvc_ops);
-+ ops = &domU_hvc_ops;
-+ if (xen_initial_domain())
-+ ops = &dom0_hvc_ops;
-+
-+ hvc_instantiate(HVC_COOKIE, 0, ops);
-+
- return 0;
- }
-
--module_init(xen_init);
--module_exit(xen_fini);
-+module_init(xen_hvc_init);
-+module_exit(xen_hvc_fini);
- console_initcall(xen_cons_init);
-
--static void raw_console_write(const char *str, int len)
--{
-- while(len > 0) {
-- int rc = HYPERVISOR_console_io(CONSOLEIO_write, len, (char *)str);
-- if (rc <= 0)
-- break;
--
-- str += rc;
-- len -= rc;
-- }
--}
--
- #ifdef CONFIG_EARLY_PRINTK
- static void xenboot_write_console(struct console *console, const char *string,
- unsigned len)
-@@ -196,19 +229,22 @@ static void xenboot_write_console(struct console *console, const char *string,
- unsigned int linelen, off = 0;
- const char *pos;
-
-- raw_console_write(string, len);
-+ dom0_write_console(0, string, len);
-+
-+ if (xen_initial_domain())
-+ return;
-
-- write_console(0, "(early) ", 8);
-+ domU_write_console(0, "(early) ", 8);
- while (off < len && NULL != (pos = strchr(string+off, '\n'))) {
- linelen = pos-string+off;
- if (off + linelen > len)
- break;
-- write_console(0, string+off, linelen);
-- write_console(0, "\r\n", 2);
-+ domU_write_console(0, string+off, linelen);
-+ domU_write_console(0, "\r\n", 2);
- off += linelen + 1;
- }
- if (off < len)
-- write_console(0, string+off, len-off);
-+ domU_write_console(0, string+off, len-off);
- }
-
- struct console xenboot_console = {
-@@ -220,7 +256,7 @@ struct console xenboot_console = {
-
- void xen_raw_console_write(const char *str)
- {
-- raw_console_write(str, strlen(str));
-+ dom0_write_console(0, str, strlen(str));
- }
-
- void xen_raw_printk(const char *fmt, ...)
-diff --git a/drivers/firewire/net.c b/drivers/firewire/net.c
-index cbaf420..163459d 100644
---- a/drivers/firewire/net.c
-+++ b/drivers/firewire/net.c
-@@ -8,7 +8,6 @@
-
- #include <linux/bug.h>
- #include <linux/device.h>
--#include <linux/ethtool.h>
- #include <linux/firewire.h>
- #include <linux/firewire-constants.h>
- #include <linux/highmem.h>
-@@ -1333,17 +1332,6 @@ static int fwnet_change_mtu(struct net_device *net, int new_mtu)
- return 0;
- }
-
--static void fwnet_get_drvinfo(struct net_device *net,
-- struct ethtool_drvinfo *info)
--{
-- strcpy(info->driver, KBUILD_MODNAME);
-- strcpy(info->bus_info, "ieee1394");
--}
--
--static const struct ethtool_ops fwnet_ethtool_ops = {
-- .get_drvinfo = fwnet_get_drvinfo,
--};
--
- static const struct net_device_ops fwnet_netdev_ops = {
- .ndo_open = fwnet_open,
- .ndo_stop = fwnet_stop,
-@@ -1362,7 +1350,6 @@ static void fwnet_init_dev(struct net_device *net)
- net->hard_header_len = FWNET_HLEN;
- net->type = ARPHRD_IEEE1394;
- net->tx_queue_len = 10;
-- SET_ETHTOOL_OPS(net, &fwnet_ethtool_ops);
- }
-
- /* caller must hold fwnet_device_mutex */
-diff --git a/drivers/gpu/drm/drm_drv.c b/drivers/gpu/drm/drm_drv.c
-index 0e27d98..f5e2572 100644
---- a/drivers/gpu/drm/drm_drv.c
-+++ b/drivers/gpu/drm/drm_drv.c
-@@ -201,7 +201,7 @@ int drm_lastclose(struct drm_device * dev)
- }
- if (drm_core_check_feature(dev, DRIVER_SG) && dev->sg &&
- !drm_core_check_feature(dev, DRIVER_MODESET)) {
-- drm_sg_cleanup(dev->sg);
-+ drm_sg_cleanup(dev, dev->sg);
- dev->sg = NULL;
- }
-
-diff --git a/drivers/gpu/drm/drm_gem.c b/drivers/gpu/drm/drm_gem.c
-index 8bf3770..dde5f66 100644
---- a/drivers/gpu/drm/drm_gem.c
-+++ b/drivers/gpu/drm/drm_gem.c
-@@ -539,7 +539,7 @@ int drm_gem_mmap(struct file *filp, struct vm_area_struct *vma)
- vma->vm_flags |= VM_RESERVED | VM_IO | VM_PFNMAP | VM_DONTEXPAND;
- vma->vm_ops = obj->dev->driver->gem_vm_ops;
- vma->vm_private_data = map->handle;
-- vma->vm_page_prot = pgprot_writecombine(vm_get_page_prot(vma->vm_flags));
-+ vma->vm_page_prot = pgprot_writecombine(vm_get_page_prot(vma->vm_flags));
-
- /* Take a ref for this mapping of the object, so that the fault
- * handler can dereference the mmap offset's pointer to the object.
-diff --git a/drivers/gpu/drm/drm_scatter.c b/drivers/gpu/drm/drm_scatter.c
-index c7823c8..95ffb8a 100644
---- a/drivers/gpu/drm/drm_scatter.c
-+++ b/drivers/gpu/drm/drm_scatter.c
-@@ -32,20 +32,73 @@
- */
-
- #include <linux/vmalloc.h>
-+#include <linux/mm.h>
- #include "drmP.h"
-
- #define DEBUG_SCATTER 0
-
--static inline void *drm_vmalloc_dma(unsigned long size)
-+static void *drm_vmalloc_dma(struct drm_device *drmdev, unsigned long size)
- {
- #if defined(__powerpc__) && defined(CONFIG_NOT_COHERENT_CACHE)
- return __vmalloc(size, GFP_KERNEL, PAGE_KERNEL | _PAGE_NO_CACHE);
- #else
-- return vmalloc_32(size);
-+ struct device *dev = &drmdev->pdev->dev;
-+ struct page **pages;
-+ void *addr;
-+ const int npages = PFN_UP(size);
-+ int i;
-+
-+ pages = kmalloc(npages * sizeof(*pages), GFP_KERNEL);
-+ if (!pages)
-+ goto fail;
-+
-+ for (i = 0; i < npages; i++) {
-+ dma_addr_t phys;
-+ void *addr;
-+ addr = dma_alloc_coherent(dev, PAGE_SIZE, &phys, GFP_KERNEL);
-+ if (addr == NULL)
-+ goto out_free_pages;
-+
-+ pages[i] = virt_to_page(addr);
-+ }
-+
-+ addr = vmap(pages, npages, VM_MAP | VM_IOREMAP, PAGE_KERNEL);
-+
-+ kfree(pages);
-+
-+ return addr;
-+
-+out_free_pages:
-+ while (i > 0) {
-+ void *addr = page_address(pages[--i]);
-+ dma_free_coherent(dev, PAGE_SIZE, addr, virt_to_bus(addr));
-+ }
-+
-+ kfree(pages);
-+
-+fail:
-+ return NULL;
-+#endif
-+}
-+
-+static void drm_vfree_dma(struct drm_device *drmdev, void *addr, int npages,
-+ struct page **pages)
-+{
-+#if defined(__powerpc__) && defined(CONFIG_NOT_COHERENT_CACHE)
-+ vfree(addr);
-+#else
-+ struct device *dev = &drmdev->pdev->dev;
-+ int i;
-+
-+ for (i = 0; i < npages; i++) {
-+ void *addr = page_address(pages[i]);
-+ dma_free_coherent(dev, PAGE_SIZE, addr, virt_to_bus(addr));
-+ }
-+ vunmap(addr);
- #endif
- }
-
--void drm_sg_cleanup(struct drm_sg_mem * entry)
-+void drm_sg_cleanup(struct drm_device *drmdev, struct drm_sg_mem * entry)
- {
- struct page *page;
- int i;
-@@ -56,7 +109,7 @@ void drm_sg_cleanup(struct drm_sg_mem * entry)
- ClearPageReserved(page);
- }
-
-- vfree(entry->virtual);
-+ drm_vfree_dma(drmdev, entry->virtual, entry->pages, entry->pagelist);
-
- kfree(entry->busaddr);
- kfree(entry->pagelist);
-@@ -107,7 +160,7 @@ int drm_sg_alloc(struct drm_device *dev, struct drm_scatter_gather * request)
- }
- memset((void *)entry->busaddr, 0, pages * sizeof(*entry->busaddr));
-
-- entry->virtual = drm_vmalloc_dma(pages << PAGE_SHIFT);
-+ entry->virtual = drm_vmalloc_dma(dev, pages << PAGE_SHIFT);
- if (!entry->virtual) {
- kfree(entry->busaddr);
- kfree(entry->pagelist);
-@@ -180,7 +233,7 @@ int drm_sg_alloc(struct drm_device *dev, struct drm_scatter_gather * request)
- return 0;
-
- failed:
-- drm_sg_cleanup(entry);
-+ drm_sg_cleanup(dev, entry);
- return -ENOMEM;
- }
- EXPORT_SYMBOL(drm_sg_alloc);
-@@ -212,7 +265,7 @@ int drm_sg_free(struct drm_device *dev, void *data,
-
- DRM_DEBUG("virtual = %p\n", entry->virtual);
-
-- drm_sg_cleanup(entry);
-+ drm_sg_cleanup(dev, entry);
-
- return 0;
- }
-diff --git a/drivers/gpu/drm/ttm/ttm_bo_vm.c b/drivers/gpu/drm/ttm/ttm_bo_vm.c
-index 1c040d0..e3555bf 100644
---- a/drivers/gpu/drm/ttm/ttm_bo_vm.c
-+++ b/drivers/gpu/drm/ttm/ttm_bo_vm.c
-@@ -87,6 +87,9 @@ static int ttm_bo_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
- bool is_iomem;
- unsigned long address = (unsigned long)vmf->virtual_address;
- int retval = VM_FAULT_NOPAGE;
-+ bool vm_io = (vma->vm_flags & VM_IO) && VM_IO;
-+ bool pte_iomap = (pgprot_val(vma->vm_page_prot) & _PAGE_IOMAP)
-+ && _PAGE_IOMAP;
-
- /*
- * Work around locking order reversal in fault / nopfn
-@@ -158,11 +161,30 @@ static int ttm_bo_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
- if (is_iomem) {
- vma->vm_page_prot = ttm_io_prot(bo->mem.placement,
- vma->vm_page_prot);
-+ if (!vm_io || !pte_iomap) {
-+ vma->vm_flags |= VM_IO;
-+ pgprot_val(vma->vm_page_prot) |= _PAGE_IOMAP;
-+ }
- } else {
- ttm = bo->ttm;
- vma->vm_page_prot = (bo->mem.placement & TTM_PL_FLAG_CACHED) ?
- vm_get_page_prot(vma->vm_flags) :
- ttm_io_prot(bo->mem.placement, vma->vm_page_prot);
-+ /*
-+ * During PCI suspend the graphic cards purge their VRAM and
-+ * move their graphic objects to the TT. They also unmap all
-+ * of the objects, meaning that when an user application is
-+ * unfrozen it will re-fault and call here.
-+ *
-+ * What this means is that the VMA for the graphic object might
-+ * have been set for VRAM TTM but now it is with the TT
-+ * (normal RAM) meaning that the vma->vm_flags could be
-+ * inappropiate (say, VM_IO on TT - no good).
-+ */
-+ if (vm_io || pte_iomap) {
-+ vma->vm_flags &= ~VM_IO;
-+ pgprot_val(vma->vm_page_prot) &= ~_PAGE_IOMAP;
-+ }
- }
-
- /*
-@@ -239,6 +261,7 @@ int ttm_bo_mmap(struct file *filp, struct vm_area_struct *vma,
- {
- struct ttm_bo_driver *driver;
- struct ttm_buffer_object *bo;
-+ struct ttm_mem_type_manager *man;
- int ret;
-
- read_lock(&bdev->vm_lock);
-@@ -271,7 +294,11 @@ int ttm_bo_mmap(struct file *filp, struct vm_area_struct *vma,
- */
-
- vma->vm_private_data = bo;
-- vma->vm_flags |= VM_RESERVED | VM_IO | VM_MIXEDMAP | VM_DONTEXPAND;
-+ vma->vm_flags |= VM_RESERVED | VM_MIXEDMAP | VM_DONTEXPAND;
-+ man = &bdev->man[bo->mem.mem_type];
-+ if (man->flags & TTM_MEMTYPE_FLAG_NEEDS_IOREMAP)
-+ vma->vm_flags |= VM_IO;
-+ vma->vm_page_prot = vm_get_page_prot(vma->vm_flags);
- return 0;
- out_unref:
- ttm_bo_unref(&bo);
-diff --git a/drivers/gpu/drm/ttm/ttm_tt.c b/drivers/gpu/drm/ttm/ttm_tt.c
-index 3d5b8b0..8b05e38 100644
---- a/drivers/gpu/drm/ttm/ttm_tt.c
-+++ b/drivers/gpu/drm/ttm/ttm_tt.c
-@@ -38,7 +38,8 @@
- #include "ttm/ttm_module.h"
- #include "ttm/ttm_bo_driver.h"
- #include "ttm/ttm_placement.h"
--
-+#include <linux/dma-mapping.h>
-+#include <xen/xen.h>
- static int ttm_tt_swapin(struct ttm_tt *ttm);
-
- /**
-@@ -84,6 +85,16 @@ static struct page *ttm_tt_alloc_page(unsigned page_flags)
- else
- gfp_flags |= __GFP_HIGHMEM;
-
-+ if ((page_flags & TTM_PAGE_FLAG_DMA32) && xen_pv_domain())
-+ {
-+ void *addr;
-+ dma_addr_t _d;
-+
-+ addr = dma_alloc_coherent(NULL, PAGE_SIZE, &_d, GFP_KERNEL);
-+ if (addr == NULL)
-+ return NULL;
-+ return virt_to_page(addr);
-+ }
- return alloc_page(gfp_flags);
- }
-
-@@ -286,6 +297,7 @@ static void ttm_tt_free_alloced_pages(struct ttm_tt *ttm)
- int i;
- struct page *cur_page;
- struct ttm_backend *be = ttm->be;
-+ void *addr;
-
- if (be)
- be->func->clear(be);
-@@ -300,7 +312,16 @@ static void ttm_tt_free_alloced_pages(struct ttm_tt *ttm)
- "Leaking pages.\n");
- ttm_mem_global_free_page(ttm->glob->mem_glob,
- cur_page);
-- __free_page(cur_page);
-+
-+ if ((ttm->page_flags & TTM_PAGE_FLAG_DMA32) &&
-+ xen_pv_domain()) {
-+ addr = page_address(cur_page);
-+ WARN_ON(!addr);
-+ if (addr)
-+ dma_free_coherent(NULL, PAGE_SIZE, addr,
-+ virt_to_bus(addr));
-+ } else
-+ __free_page(cur_page);
- }
- }
- ttm->state = tt_unpopulated;
-diff --git a/drivers/ieee1394/eth1394.c b/drivers/ieee1394/eth1394.c
-index a4e9dcb..62ab09e 100644
---- a/drivers/ieee1394/eth1394.c
-+++ b/drivers/ieee1394/eth1394.c
-@@ -58,7 +58,6 @@
- #include <linux/tcp.h>
- #include <linux/skbuff.h>
- #include <linux/bitops.h>
--#include <linux/ethtool.h>
- #include <asm/uaccess.h>
- #include <asm/delay.h>
- #include <asm/unaligned.h>
-@@ -173,8 +172,6 @@ static netdev_tx_t ether1394_tx(struct sk_buff *skb,
- struct net_device *dev);
- static void ether1394_iso(struct hpsb_iso *iso);
-
--static const struct ethtool_ops ethtool_ops;
--
- static int ether1394_write(struct hpsb_host *host, int srcid, int destid,
- quadlet_t *data, u64 addr, size_t len, u16 flags);
- static void ether1394_add_host(struct hpsb_host *host);
-@@ -525,8 +522,6 @@ static void ether1394_init_dev(struct net_device *dev)
- dev->header_ops = &ether1394_header_ops;
- dev->netdev_ops = &ether1394_netdev_ops;
-
-- SET_ETHTOOL_OPS(dev, &ethtool_ops);
--
- dev->watchdog_timeo = ETHER1394_TIMEOUT;
- dev->flags = IFF_BROADCAST | IFF_MULTICAST;
- dev->features = NETIF_F_HIGHDMA;
-@@ -1698,17 +1693,6 @@ fail:
- return NETDEV_TX_OK;
- }
-
--static void ether1394_get_drvinfo(struct net_device *dev,
-- struct ethtool_drvinfo *info)
--{
-- strcpy(info->driver, driver_name);
-- strcpy(info->bus_info, "ieee1394"); /* FIXME provide more detail? */
--}
--
--static const struct ethtool_ops ethtool_ops = {
-- .get_drvinfo = ether1394_get_drvinfo
--};
--
- static int __init ether1394_init_module(void)
- {
- int err;
-diff --git a/drivers/input/xen-kbdfront.c b/drivers/input/xen-kbdfront.c
-index b115726..80a072e 100644
---- a/drivers/input/xen-kbdfront.c
-+++ b/drivers/input/xen-kbdfront.c
-@@ -21,7 +21,10 @@
- #include <linux/errno.h>
- #include <linux/module.h>
- #include <linux/input.h>
-+
- #include <asm/xen/hypervisor.h>
-+
-+#include <xen/xen.h>
- #include <xen/events.h>
- #include <xen/page.h>
- #include <xen/interface/io/fbif.h>
-@@ -272,6 +275,8 @@ static void xenkbd_backend_changed(struct xenbus_device *dev,
- switch (backend_state) {
- case XenbusStateInitialising:
- case XenbusStateInitialised:
-+ case XenbusStateReconfiguring:
-+ case XenbusStateReconfigured:
- case XenbusStateUnknown:
- case XenbusStateClosed:
- break;
-@@ -335,7 +340,7 @@ static struct xenbus_driver xenkbd_driver = {
-
- static int __init xenkbd_init(void)
- {
-- if (!xen_domain())
-+ if (!xen_domain() || xen_hvm_domain())
- return -ENODEV;
-
- /* Nothing to do if running in dom0. */
-diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig
-index b2f71f7..b7feb84 100644
---- a/drivers/net/Kconfig
-+++ b/drivers/net/Kconfig
-@@ -2787,6 +2787,7 @@ source "drivers/s390/net/Kconfig"
- config XEN_NETDEV_FRONTEND
- tristate "Xen network device frontend driver"
- depends on XEN
-+ select XEN_XENBUS_FRONTEND
- default y
- help
- The network device frontend driver allows the kernel to
-diff --git a/drivers/net/bmac.c b/drivers/net/bmac.c
-index 406f064..c063b53 100644
---- a/drivers/net/bmac.c
-+++ b/drivers/net/bmac.c
-@@ -1236,15 +1236,8 @@ static void bmac_reset_and_enable(struct net_device *dev)
- }
- spin_unlock_irqrestore(&bp->lock, flags);
- }
--static void bmac_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info)
--{
-- struct bmac_data *bp = netdev_priv(dev);
-- strcpy(info->driver, "bmac");
-- strcpy(info->bus_info, dev_name(&bp->mdev->ofdev.dev));
--}
-
- static const struct ethtool_ops bmac_ethtool_ops = {
-- .get_drvinfo = bmac_get_drvinfo,
- .get_link = ethtool_op_get_link,
- };
-
-diff --git a/drivers/net/fec_mpc52xx.c b/drivers/net/fec_mpc52xx.c
-index 66dace6..8238fa2 100644
---- a/drivers/net/fec_mpc52xx.c
-+++ b/drivers/net/fec_mpc52xx.c
-@@ -772,11 +772,6 @@ static void mpc52xx_fec_reset(struct net_device *dev)
-
-
- /* ethtool interface */
--static void mpc52xx_fec_get_drvinfo(struct net_device *dev,
-- struct ethtool_drvinfo *info)
--{
-- strcpy(info->driver, DRIVER_NAME);
--}
-
- static int mpc52xx_fec_get_settings(struct net_device *dev, struct ethtool_cmd *cmd)
- {
-@@ -811,7 +806,6 @@ static void mpc52xx_fec_set_msglevel(struct net_device *dev, u32 level)
- }
-
- static const struct ethtool_ops mpc52xx_fec_ethtool_ops = {
-- .get_drvinfo = mpc52xx_fec_get_drvinfo,
- .get_settings = mpc52xx_fec_get_settings,
- .set_settings = mpc52xx_fec_set_settings,
- .get_link = ethtool_op_get_link,
-diff --git a/drivers/net/pasemi_mac_ethtool.c b/drivers/net/pasemi_mac_ethtool.c
-index 28a8622..29ff9ad 100644
---- a/drivers/net/pasemi_mac_ethtool.c
-+++ b/drivers/net/pasemi_mac_ethtool.c
-@@ -77,21 +77,6 @@ pasemi_mac_ethtool_get_settings(struct net_device *netdev,
- return phy_ethtool_gset(phydev, cmd);
- }
-
--static void
--pasemi_mac_ethtool_get_drvinfo(struct net_device *netdev,
-- struct ethtool_drvinfo *drvinfo)
--{
-- struct pasemi_mac *mac;
-- mac = netdev_priv(netdev);
--
-- /* clear and fill out info */
-- memset(drvinfo, 0, sizeof(struct ethtool_drvinfo));
-- strncpy(drvinfo->driver, "pasemi_mac", 12);
-- strcpy(drvinfo->version, "N/A");
-- strcpy(drvinfo->fw_version, "N/A");
-- strncpy(drvinfo->bus_info, pci_name(mac->pdev), 32);
--}
--
- static u32
- pasemi_mac_ethtool_get_msglevel(struct net_device *netdev)
- {
-@@ -150,7 +135,6 @@ static void pasemi_mac_get_strings(struct net_device *netdev, u32 stringset,
-
- const struct ethtool_ops pasemi_mac_ethtool_ops = {
- .get_settings = pasemi_mac_ethtool_get_settings,
-- .get_drvinfo = pasemi_mac_ethtool_get_drvinfo,
- .get_msglevel = pasemi_mac_ethtool_get_msglevel,
- .set_msglevel = pasemi_mac_ethtool_set_msglevel,
- .get_link = ethtool_op_get_link,
-diff --git a/drivers/net/pcmcia/3c574_cs.c b/drivers/net/pcmcia/3c574_cs.c
-index b58965a..7f9a4f4 100644
---- a/drivers/net/pcmcia/3c574_cs.c
-+++ b/drivers/net/pcmcia/3c574_cs.c
-@@ -83,7 +83,6 @@ earlier 3Com products.
- #include <linux/skbuff.h>
- #include <linux/if_arp.h>
- #include <linux/ioport.h>
--#include <linux/ethtool.h>
- #include <linux/bitops.h>
- #include <linux/mii.h>
-
-@@ -249,7 +248,6 @@ static int el3_rx(struct net_device *dev, int worklimit);
- static int el3_close(struct net_device *dev);
- static void el3_tx_timeout(struct net_device *dev);
- static int el3_ioctl(struct net_device *dev, struct ifreq *rq, int cmd);
--static const struct ethtool_ops netdev_ethtool_ops;
- static void set_rx_mode(struct net_device *dev);
- static void set_multicast_list(struct net_device *dev);
-
-@@ -300,7 +298,6 @@ static int tc574_probe(struct pcmcia_device *link)
- link->conf.ConfigIndex = 1;
-
- dev->netdev_ops = &el3_netdev_ops;
-- SET_ETHTOOL_OPS(dev, &netdev_ethtool_ops);
- dev->watchdog_timeo = TX_TIMEOUT;
-
- return tc574_config(link);
-@@ -1083,16 +1080,6 @@ static int el3_rx(struct net_device *dev, int worklimit)
- return worklimit;
- }
-
--static void netdev_get_drvinfo(struct net_device *dev,
-- struct ethtool_drvinfo *info)
--{
-- strcpy(info->driver, "3c574_cs");
--}
--
--static const struct ethtool_ops netdev_ethtool_ops = {
-- .get_drvinfo = netdev_get_drvinfo,
--};
--
- /* Provide ioctl() calls to examine the MII xcvr state. */
- static int el3_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
- {
-diff --git a/drivers/net/pcmcia/axnet_cs.c b/drivers/net/pcmcia/axnet_cs.c
-index 3131a59..40e5e7c 100644
---- a/drivers/net/pcmcia/axnet_cs.c
-+++ b/drivers/net/pcmcia/axnet_cs.c
-@@ -33,7 +33,6 @@
- #include <linux/timer.h>
- #include <linux/delay.h>
- #include <linux/spinlock.h>
--#include <linux/ethtool.h>
- #include <linux/netdevice.h>
- #include <linux/etherdevice.h>
- #include <linux/crc32.h>
-@@ -98,7 +97,6 @@ static netdev_tx_t axnet_start_xmit(struct sk_buff *skb,
- static struct net_device_stats *get_stats(struct net_device *dev);
- static void set_multicast_list(struct net_device *dev);
- static void axnet_tx_timeout(struct net_device *dev);
--static const struct ethtool_ops netdev_ethtool_ops;
- static irqreturn_t ei_irq_wrapper(int irq, void *dev_id);
- static void ei_watchdog(u_long arg);
- static void axnet_reset_8390(struct net_device *dev);
-@@ -186,7 +184,6 @@ static int axnet_probe(struct pcmcia_device *link)
-
- dev->netdev_ops = &axnet_netdev_ops;
-
-- SET_ETHTOOL_OPS(dev, &netdev_ethtool_ops);
- dev->watchdog_timeo = TX_TIMEOUT;
-
- return axnet_config(link);
-@@ -683,16 +680,6 @@ reschedule:
- add_timer(&info->watchdog);
- }
-
--static void netdev_get_drvinfo(struct net_device *dev,
-- struct ethtool_drvinfo *info)
--{
-- strcpy(info->driver, "axnet_cs");
--}
--
--static const struct ethtool_ops netdev_ethtool_ops = {
-- .get_drvinfo = netdev_get_drvinfo,
--};
--
- /*====================================================================*/
-
- static int axnet_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
-diff --git a/drivers/net/pcmcia/ibmtr_cs.c b/drivers/net/pcmcia/ibmtr_cs.c
-index 06618af..db0c890 100644
---- a/drivers/net/pcmcia/ibmtr_cs.c
-+++ b/drivers/net/pcmcia/ibmtr_cs.c
-@@ -52,7 +52,6 @@
- #include <linux/string.h>
- #include <linux/timer.h>
- #include <linux/module.h>
--#include <linux/ethtool.h>
- #include <linux/netdevice.h>
- #include <linux/trdevice.h>
- #include <linux/ibmtr.h>
-@@ -120,16 +119,6 @@ typedef struct ibmtr_dev_t {
- struct tok_info *ti;
- } ibmtr_dev_t;
-
--static void netdev_get_drvinfo(struct net_device *dev,
-- struct ethtool_drvinfo *info)
--{
-- strcpy(info->driver, "ibmtr_cs");
--}
--
--static const struct ethtool_ops netdev_ethtool_ops = {
-- .get_drvinfo = netdev_get_drvinfo,
--};
--
- /*======================================================================
-
- ibmtr_attach() creates an "instance" of the driver, allocating
-@@ -170,8 +159,6 @@ static int __devinit ibmtr_attach(struct pcmcia_device *link)
-
- link->irq.Instance = info->dev = dev;
-
-- SET_ETHTOOL_OPS(dev, &netdev_ethtool_ops);
--
- return ibmtr_config(link);
- } /* ibmtr_attach */
-
-diff --git a/drivers/net/pcmcia/pcnet_cs.c b/drivers/net/pcmcia/pcnet_cs.c
-index 94c9ad2..1b673b0 100644
---- a/drivers/net/pcmcia/pcnet_cs.c
-+++ b/drivers/net/pcmcia/pcnet_cs.c
-@@ -36,7 +36,6 @@
- #include <linux/string.h>
- #include <linux/timer.h>
- #include <linux/delay.h>
--#include <linux/ethtool.h>
- #include <linux/netdevice.h>
- #include <linux/log2.h>
- #include <linux/etherdevice.h>
-@@ -111,7 +110,6 @@ static void pcnet_release(struct pcmcia_device *link);
- static int pcnet_open(struct net_device *dev);
- static int pcnet_close(struct net_device *dev);
- static int ei_ioctl(struct net_device *dev, struct ifreq *rq, int cmd);
--static const struct ethtool_ops netdev_ethtool_ops;
- static irqreturn_t ei_irq_wrapper(int irq, void *dev_id);
- static void ei_watchdog(u_long arg);
- static void pcnet_reset_8390(struct net_device *dev);
-@@ -654,8 +652,6 @@ static int pcnet_config(struct pcmcia_device *link)
- ei_status.word16 = 1;
- ei_status.reset_8390 = &pcnet_reset_8390;
-
-- SET_ETHTOOL_OPS(dev, &netdev_ethtool_ops);
--
- if (info->flags & (IS_DL10019|IS_DL10022))
- mii_phy_probe(dev);
-
-@@ -1175,18 +1171,6 @@ reschedule:
-
- /*====================================================================*/
-
--static void netdev_get_drvinfo(struct net_device *dev,
-- struct ethtool_drvinfo *info)
--{
-- strcpy(info->driver, "pcnet_cs");
--}
--
--static const struct ethtool_ops netdev_ethtool_ops = {
-- .get_drvinfo = netdev_get_drvinfo,
--};
--
--/*====================================================================*/
--
-
- static int ei_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
- {
-diff --git a/drivers/net/sc92031.c b/drivers/net/sc92031.c
-index 8d60300..0926832 100644
---- a/drivers/net/sc92031.c
-+++ b/drivers/net/sc92031.c
-@@ -1255,16 +1255,6 @@ static int sc92031_ethtool_set_settings(struct net_device *dev,
- return 0;
- }
-
--static void sc92031_ethtool_get_drvinfo(struct net_device *dev,
-- struct ethtool_drvinfo *drvinfo)
--{
-- struct sc92031_priv *priv = netdev_priv(dev);
-- struct pci_dev *pdev = priv->pdev;
--
-- strcpy(drvinfo->driver, SC92031_NAME);
-- strcpy(drvinfo->bus_info, pci_name(pdev));
--}
--
- static void sc92031_ethtool_get_wol(struct net_device *dev,
- struct ethtool_wolinfo *wolinfo)
- {
-@@ -1386,7 +1376,6 @@ static void sc92031_ethtool_get_ethtool_stats(struct net_device *dev,
- static const struct ethtool_ops sc92031_ethtool_ops = {
- .get_settings = sc92031_ethtool_get_settings,
- .set_settings = sc92031_ethtool_set_settings,
-- .get_drvinfo = sc92031_ethtool_get_drvinfo,
- .get_wol = sc92031_ethtool_get_wol,
- .set_wol = sc92031_ethtool_set_wol,
- .nway_reset = sc92031_ethtool_nway_reset,
-diff --git a/drivers/net/tulip/xircom_cb.c b/drivers/net/tulip/xircom_cb.c
-index 0f2ca598..44159be 100644
---- a/drivers/net/tulip/xircom_cb.c
-+++ b/drivers/net/tulip/xircom_cb.c
-@@ -27,7 +27,6 @@
- #include <linux/skbuff.h>
- #include <linux/delay.h>
- #include <linux/init.h>
--#include <linux/ethtool.h>
- #include <linux/bitops.h>
-
- #include <asm/uaccess.h>
-@@ -179,19 +178,6 @@ static void print_binary(unsigned int number)
- }
- #endif
-
--static void netdev_get_drvinfo(struct net_device *dev,
-- struct ethtool_drvinfo *info)
--{
-- struct xircom_private *private = netdev_priv(dev);
--
-- strcpy(info->driver, "xircom_cb");
-- strcpy(info->bus_info, pci_name(private->pdev));
--}
--
--static const struct ethtool_ops netdev_ethtool_ops = {
-- .get_drvinfo = netdev_get_drvinfo,
--};
--
- static const struct net_device_ops netdev_ops = {
- .ndo_open = xircom_open,
- .ndo_stop = xircom_close,
-@@ -277,7 +263,6 @@ static int __devinit xircom_probe(struct pci_dev *pdev, const struct pci_device_
- setup_descriptors(private);
-
- dev->netdev_ops = &netdev_ops;
-- SET_ETHTOOL_OPS(dev, &netdev_ethtool_ops);
- pci_set_drvdata(pdev, dev);
-
- if (register_netdev(dev)) {
-diff --git a/drivers/net/usb/hso.c b/drivers/net/usb/hso.c
-index f450bc9..2109514 100644
---- a/drivers/net/usb/hso.c
-+++ b/drivers/net/usb/hso.c
-@@ -820,17 +820,7 @@ static netdev_tx_t hso_net_start_xmit(struct sk_buff *skb,
- return NETDEV_TX_OK;
- }
-
--static void hso_get_drvinfo(struct net_device *net, struct ethtool_drvinfo *info)
--{
-- struct hso_net *odev = netdev_priv(net);
--
-- strncpy(info->driver, driver_name, ETHTOOL_BUSINFO_LEN);
-- strncpy(info->version, DRIVER_VERSION, ETHTOOL_BUSINFO_LEN);
-- usb_make_path(odev->parent->usb, info->bus_info, sizeof info->bus_info);
--}
--
- static const struct ethtool_ops ops = {
-- .get_drvinfo = hso_get_drvinfo,
- .get_link = ethtool_op_get_link
- };
-
-diff --git a/drivers/net/usb/kaweth.c b/drivers/net/usb/kaweth.c
-index e391ef9..47d1926 100644
---- a/drivers/net/usb/kaweth.c
-+++ b/drivers/net/usb/kaweth.c
-@@ -767,14 +767,6 @@ static int kaweth_close(struct net_device *net)
- return 0;
- }
-
--static void kaweth_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info)
--{
-- struct kaweth_device *kaweth = netdev_priv(dev);
--
-- strlcpy(info->driver, driver_name, sizeof(info->driver));
-- usb_make_path(kaweth->dev, info->bus_info, sizeof (info->bus_info));
--}
--
- static u32 kaweth_get_link(struct net_device *dev)
- {
- struct kaweth_device *kaweth = netdev_priv(dev);
-@@ -783,7 +775,6 @@ static u32 kaweth_get_link(struct net_device *dev)
- }
-
- static const struct ethtool_ops ops = {
-- .get_drvinfo = kaweth_get_drvinfo,
- .get_link = kaweth_get_link
- };
-
-diff --git a/drivers/net/wireless/ray_cs.c b/drivers/net/wireless/ray_cs.c
-index 1c88c2e..2e65100 100644
---- a/drivers/net/wireless/ray_cs.c
-+++ b/drivers/net/wireless/ray_cs.c
-@@ -44,7 +44,6 @@
- #include <linux/if_arp.h>
- #include <linux/ioport.h>
- #include <linux/skbuff.h>
--#include <linux/ethtool.h>
- #include <linux/ieee80211.h>
-
- #include <pcmcia/cs_types.h>
-@@ -101,8 +100,6 @@ static int ray_dev_config(struct net_device *dev, struct ifmap *map);
- static struct net_device_stats *ray_get_stats(struct net_device *dev);
- static int ray_dev_init(struct net_device *dev);
-
--static const struct ethtool_ops netdev_ethtool_ops;
--
- static int ray_open(struct net_device *dev);
- static netdev_tx_t ray_dev_start_xmit(struct sk_buff *skb,
- struct net_device *dev);
-@@ -362,7 +359,6 @@ static int ray_probe(struct pcmcia_device *p_dev)
-
- /* Raylink entries in the device structure */
- dev->netdev_ops = &ray_netdev_ops;
-- SET_ETHTOOL_OPS(dev, &netdev_ethtool_ops);
- dev->wireless_handlers = &ray_handler_def;
- #ifdef WIRELESS_SPY
- local->wireless_data.spy_data = &local->spy_data;
-@@ -1106,18 +1102,6 @@ AP to AP 1 1 dest AP src AP dest source
- }
- } /* end encapsulate_frame */
-
--/*===========================================================================*/
--
--static void netdev_get_drvinfo(struct net_device *dev,
-- struct ethtool_drvinfo *info)
--{
-- strcpy(info->driver, "ray_cs");
--}
--
--static const struct ethtool_ops netdev_ethtool_ops = {
-- .get_drvinfo = netdev_get_drvinfo,
--};
--
- /*====================================================================*/
-
- /*------------------------------------------------------------------*/
-diff --git a/drivers/net/wireless/wl3501_cs.c b/drivers/net/wireless/wl3501_cs.c
-index 4f1e0cf..22b2b43 100644
---- a/drivers/net/wireless/wl3501_cs.c
-+++ b/drivers/net/wireless/wl3501_cs.c
-@@ -29,7 +29,6 @@
-
- #include <linux/delay.h>
- #include <linux/types.h>
--#include <linux/ethtool.h>
- #include <linux/init.h>
- #include <linux/interrupt.h>
- #include <linux/in.h>
-@@ -1436,15 +1435,6 @@ static struct iw_statistics *wl3501_get_wireless_stats(struct net_device *dev)
- return wstats;
- }
-
--static void wl3501_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info)
--{
-- strlcpy(info->driver, wl3501_dev_info, sizeof(info->driver));
--}
--
--static const struct ethtool_ops ops = {
-- .get_drvinfo = wl3501_get_drvinfo
--};
--
- /**
- * wl3501_detach - deletes a driver "instance"
- * @link - FILL_IN
-@@ -1936,7 +1926,6 @@ static int wl3501_probe(struct pcmcia_device *p_dev)
- this->p_dev = p_dev;
- dev->wireless_data = &this->wireless_data;
- dev->wireless_handlers = &wl3501_handler_def;
-- SET_ETHTOOL_OPS(dev, &ops);
- netif_stop_queue(dev);
- p_dev->priv = p_dev->irq.Instance = dev;
-
-diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c
-index 1a11d95..3f71199 100644
---- a/drivers/net/xen-netfront.c
-+++ b/drivers/net/xen-netfront.c
-@@ -42,6 +42,7 @@
- #include <linux/mm.h>
- #include <net/ip.h>
-
-+#include <xen/xen.h>
- #include <xen/xenbus.h>
- #include <xen/events.h>
- #include <xen/page.h>
-@@ -53,19 +54,36 @@
-
- static const struct ethtool_ops xennet_ethtool_ops;
-
-+static int use_smartpoll = 0;
-+module_param(use_smartpoll, int, 0600);
-+MODULE_PARM_DESC (use_smartpoll, "Use smartpoll mechanism if available");
-+
- struct netfront_cb {
- struct page *page;
- unsigned offset;
- };
-
-+#define MICRO_SECOND 1000000UL
-+#define NANO_SECOND 1000000000UL
-+#define DEFAULT_SMART_POLL_FREQ 1000UL
-+
-+struct netfront_smart_poll {
-+ struct hrtimer timer;
-+ struct net_device *netdev;
-+ unsigned int smart_poll_freq;
-+ unsigned int feature_smart_poll;
-+ unsigned int active;
-+ unsigned long counter;
-+};
-+
- #define NETFRONT_SKB_CB(skb) ((struct netfront_cb *)((skb)->cb))
-
- #define RX_COPY_THRESHOLD 256
-
- #define GRANT_INVALID_REF 0
-
--#define NET_TX_RING_SIZE __RING_SIZE((struct xen_netif_tx_sring *)0, PAGE_SIZE)
--#define NET_RX_RING_SIZE __RING_SIZE((struct xen_netif_rx_sring *)0, PAGE_SIZE)
-+#define NET_TX_RING_SIZE __CONST_RING_SIZE(xen_netif_tx, PAGE_SIZE)
-+#define NET_RX_RING_SIZE __CONST_RING_SIZE(xen_netif_rx, PAGE_SIZE)
- #define TX_MAX_TARGET min_t(int, NET_RX_RING_SIZE, 256)
-
- struct netfront_info {
-@@ -104,7 +122,7 @@ struct netfront_info {
-
- /* Receive-ring batched refills. */
- #define RX_MIN_TARGET 8
--#define RX_DFL_MIN_TARGET 64
-+#define RX_DFL_MIN_TARGET 80
- #define RX_MAX_TARGET min_t(int, NET_RX_RING_SIZE, 256)
- unsigned rx_min_target, rx_max_target, rx_target;
- struct sk_buff_head rx_batch;
-@@ -118,6 +136,8 @@ struct netfront_info {
- unsigned long rx_pfn_array[NET_RX_RING_SIZE];
- struct multicall_entry rx_mcl[NET_RX_RING_SIZE+1];
- struct mmu_update rx_mmu[NET_RX_RING_SIZE];
-+
-+ struct netfront_smart_poll smart_poll;
- };
-
- struct netfront_rx_info {
-@@ -337,15 +357,17 @@ static int xennet_open(struct net_device *dev)
- return 0;
- }
-
--static void xennet_tx_buf_gc(struct net_device *dev)
-+static int xennet_tx_buf_gc(struct net_device *dev)
- {
- RING_IDX cons, prod;
-+ RING_IDX cons_begin, cons_end;
- unsigned short id;
- struct netfront_info *np = netdev_priv(dev);
- struct sk_buff *skb;
-
- BUG_ON(!netif_carrier_ok(dev));
-
-+ cons_begin = np->tx.rsp_cons;
- do {
- prod = np->tx.sring->rsp_prod;
- rmb(); /* Ensure we see responses up to 'rp'. */
-@@ -390,7 +412,11 @@ static void xennet_tx_buf_gc(struct net_device *dev)
- mb(); /* update shared area */
- } while ((cons == prod) && (prod != np->tx.sring->rsp_prod));
-
-+ cons_end = np->tx.rsp_cons;
-+
- xennet_maybe_wake_tx(dev);
-+
-+ return (cons_begin == cons_end);
- }
-
- static void xennet_make_frags(struct sk_buff *skb, struct net_device *dev,
-@@ -1267,6 +1293,14 @@ static void xennet_disconnect_backend(struct netfront_info *info)
- info->rx.sring = NULL;
- }
-
-+static int netfront_suspend(struct xenbus_device *dev, pm_message_t state)
-+{
-+ struct netfront_info *info = dev_get_drvdata(&dev->dev);
-+ struct hrtimer *timer = &info->smart_poll.timer;
-+ hrtimer_cancel(timer);
-+ return 0;
-+}
-+
- /**
- * We are reconnecting to the backend, due to a suspend/resume, or a backend
- * driver restart. We tear down our netif structure and recreate it, but
-@@ -1305,6 +1339,59 @@ static int xen_net_read_mac(struct xenbus_device *dev, u8 mac[])
- return 0;
- }
-
-+static enum hrtimer_restart smart_poll_function(struct hrtimer *timer)
-+{
-+ struct netfront_smart_poll *psmart_poll;
-+ struct net_device *dev;
-+ struct netfront_info *np;
-+ unsigned long flags;
-+ unsigned int tx_active = 0, rx_active = 0;
-+
-+ psmart_poll = container_of(timer, struct netfront_smart_poll, timer);
-+ dev = psmart_poll->netdev;
-+ np = netdev_priv(dev);
-+
-+ spin_lock_irqsave(&np->tx_lock, flags);
-+
-+ if (!np->rx.sring)
-+ goto end;
-+
-+ np->smart_poll.counter++;
-+
-+ if (likely(netif_carrier_ok(dev))) {
-+ tx_active = !(xennet_tx_buf_gc(dev));
-+ /* Under tx_lock: protects access to rx shared-ring indexes. */
-+ if (RING_HAS_UNCONSUMED_RESPONSES(&np->rx)) {
-+ rx_active = 1;
-+ napi_schedule(&np->napi);
-+ }
-+ }
-+
-+ np->smart_poll.active |= (tx_active || rx_active);
-+ if (np->smart_poll.counter %
-+ (np->smart_poll.smart_poll_freq / 10) == 0) {
-+ if (!np->smart_poll.active) {
-+ np->rx.sring->private.netif.smartpoll_active = 0;
-+ goto end;
-+ }
-+ np->smart_poll.active = 0;
-+ }
-+
-+ if (np->rx.sring->private.netif.smartpoll_active) {
-+ if ( hrtimer_start(timer,
-+ ktime_set(0, NANO_SECOND/psmart_poll->smart_poll_freq),
-+ HRTIMER_MODE_REL) ) {
-+ printk(KERN_DEBUG "Failed to start hrtimer,"
-+ "use interrupt mode for this packet\n");
-+ np->rx.sring->private.netif.smartpoll_active = 0;
-+ }
-+ }
-+
-+end:
-+ spin_unlock_irqrestore(&np->tx_lock, flags);
-+ return HRTIMER_NORESTART;
-+}
-+
- static irqreturn_t xennet_interrupt(int irq, void *dev_id)
- {
- struct net_device *dev = dev_id;
-@@ -1320,6 +1407,16 @@ static irqreturn_t xennet_interrupt(int irq, void *dev_id)
- napi_schedule(&np->napi);
- }
-
-+ if (np->smart_poll.feature_smart_poll) {
-+ if ( hrtimer_start(&np->smart_poll.timer,
-+ ktime_set(0,NANO_SECOND/np->smart_poll.smart_poll_freq),
-+ HRTIMER_MODE_REL) ) {
-+ printk(KERN_DEBUG "Failed to start hrtimer,"
-+ "use interrupt mode for this packet\n");
-+ np->rx.sring->private.netif.smartpoll_active = 0;
-+ }
-+ }
-+
- spin_unlock_irqrestore(&np->tx_lock, flags);
-
- return IRQ_HANDLED;
-@@ -1393,7 +1490,7 @@ static int setup_netfront(struct xenbus_device *dev, struct netfront_info *info)
- }
-
- /* Common code used when first setting up, and when resuming. */
--static int talk_to_backend(struct xenbus_device *dev,
-+static int talk_to_netback(struct xenbus_device *dev,
- struct netfront_info *info)
- {
- const char *message;
-@@ -1456,6 +1553,12 @@ again:
- goto abort_transaction;
- }
-
-+ err = xenbus_printf(xbt, dev->nodename, "feature-smart-poll", "%d", use_smartpoll);
-+ if (err) {
-+ message = "writing feature-smart-poll";
-+ goto abort_transaction;
-+ }
-+
- err = xenbus_transaction_end(xbt, 0);
- if (err) {
- if (err == -EAGAIN)
-@@ -1543,7 +1646,26 @@ static int xennet_connect(struct net_device *dev)
- return -ENODEV;
- }
-
-- err = talk_to_backend(np->xbdev, np);
-+ np->smart_poll.feature_smart_poll = 0;
-+ if (use_smartpoll) {
-+ err = xenbus_scanf(XBT_NIL, np->xbdev->otherend,
-+ "feature-smart-poll", "%u",
-+ &np->smart_poll.feature_smart_poll);
-+ if (err != 1)
-+ np->smart_poll.feature_smart_poll = 0;
-+ }
-+
-+ hrtimer_init(&np->smart_poll.timer, CLOCK_MONOTONIC,
-+ HRTIMER_MODE_REL);
-+ if (np->smart_poll.feature_smart_poll) {
-+ np->smart_poll.timer.function = smart_poll_function;
-+ np->smart_poll.netdev = dev;
-+ np->smart_poll.smart_poll_freq = DEFAULT_SMART_POLL_FREQ;
-+ np->smart_poll.active = 0;
-+ np->smart_poll.counter = 0;
-+ }
-+
-+ err = talk_to_netback(np->xbdev, np);
- if (err)
- return err;
-
-@@ -1597,7 +1719,7 @@ static int xennet_connect(struct net_device *dev)
- /**
- * Callback received when the backend's state changes.
- */
--static void backend_changed(struct xenbus_device *dev,
-+static void netback_changed(struct xenbus_device *dev,
- enum xenbus_state backend_state)
- {
- struct netfront_info *np = dev_get_drvdata(&dev->dev);
-@@ -1608,6 +1730,8 @@ static void backend_changed(struct xenbus_device *dev,
- switch (backend_state) {
- case XenbusStateInitialising:
- case XenbusStateInitialised:
-+ case XenbusStateReconfiguring:
-+ case XenbusStateReconfigured:
- case XenbusStateConnected:
- case XenbusStateUnknown:
- case XenbusStateClosed:
-@@ -1628,12 +1752,30 @@ static void backend_changed(struct xenbus_device *dev,
- }
- }
-
-+static int xennet_get_coalesce(struct net_device *netdev,
-+ struct ethtool_coalesce *ec)
-+{
-+ struct netfront_info *np = netdev_priv(netdev);
-+ ec->rx_coalesce_usecs = MICRO_SECOND / np->smart_poll.smart_poll_freq;
-+ return 0;
-+}
-+
-+static int xennet_set_coalesce(struct net_device *netdev,
-+ struct ethtool_coalesce *ec)
-+{
-+ struct netfront_info *np = netdev_priv(netdev);
-+ np->smart_poll.smart_poll_freq = MICRO_SECOND / ec->rx_coalesce_usecs;
-+ return 0;
-+}
-+
- static const struct ethtool_ops xennet_ethtool_ops =
- {
- .set_tx_csum = ethtool_op_set_tx_csum,
- .set_sg = xennet_set_sg,
- .set_tso = xennet_set_tso,
- .get_link = ethtool_op_get_link,
-+ .get_coalesce = xennet_get_coalesce,
-+ .set_coalesce = xennet_set_coalesce,
- };
-
- #ifdef CONFIG_SYSFS
-@@ -1798,8 +1940,9 @@ static struct xenbus_driver netfront_driver = {
- .ids = netfront_ids,
- .probe = netfront_probe,
- .remove = __devexit_p(xennet_remove),
-+ .suspend = netfront_suspend,
- .resume = netfront_resume,
-- .otherend_changed = backend_changed,
-+ .otherend_changed = netback_changed,
- };
-
- static int __init netif_init(void)
-diff --git a/drivers/pci/Kconfig b/drivers/pci/Kconfig
-index fdc864f..7802fcd 100644
---- a/drivers/pci/Kconfig
-+++ b/drivers/pci/Kconfig
-@@ -51,6 +51,16 @@ config PCI_STUB
-
- When in doubt, say N.
-
-+config XEN_PCIDEV_FRONTEND
-+ tristate "Xen PCI Frontend"
-+ depends on XEN && PCI && X86
-+ select HOTPLUG
-+ select XEN_XENBUS_FRONTEND
-+ default y
-+ help
-+ The PCI device frontend driver allows the kernel to import arbitrary
-+ PCI devices from a PCI backend to support PCI driver domains.
-+
- config HT_IRQ
- bool "Interrupts on hypertransport devices"
- default y
-diff --git a/drivers/pci/Makefile b/drivers/pci/Makefile
-index 4a7f11d..b70aa4d 100644
---- a/drivers/pci/Makefile
-+++ b/drivers/pci/Makefile
-@@ -31,6 +31,8 @@ obj-$(CONFIG_HT_IRQ) += htirq.o
- # Build Intel IOMMU support
- obj-$(CONFIG_DMAR) += dmar.o iova.o intel-iommu.o
-
-+# Build Xen IOMMU support
-+obj-$(CONFIG_PCI_XEN) += xen-iommu.o
- obj-$(CONFIG_INTR_REMAP) += dmar.o intr_remapping.o
-
- obj-$(CONFIG_PCI_IOV) += iov.o
-@@ -60,6 +62,8 @@ obj-$(CONFIG_PCI_SYSCALL) += syscall.o
-
- obj-$(CONFIG_PCI_STUB) += pci-stub.o
-
-+obj-$(CONFIG_XEN_PCIDEV_FRONTEND) += xen-pcifront.o
-+
- ifeq ($(CONFIG_PCI_DEBUG),y)
- EXTRA_CFLAGS += -DDEBUG
- endif
-diff --git a/drivers/pci/bus.c b/drivers/pci/bus.c
-index cef28a7..1940183 100644
---- a/drivers/pci/bus.c
-+++ b/drivers/pci/bus.c
-@@ -249,6 +249,7 @@ void pci_walk_bus(struct pci_bus *top, int (*cb)(struct pci_dev *, void *),
- up_read(&pci_bus_sem);
- }
-
-+EXPORT_SYMBOL_GPL(pci_walk_bus);
- EXPORT_SYMBOL(pci_bus_alloc_resource);
- EXPORT_SYMBOL_GPL(pci_bus_add_device);
- EXPORT_SYMBOL(pci_bus_add_devices);
-diff --git a/drivers/pci/dmar.c b/drivers/pci/dmar.c
-index 91d0390..24f6f28 100644
---- a/drivers/pci/dmar.c
-+++ b/drivers/pci/dmar.c
-@@ -673,10 +673,13 @@ void __init detect_intel_iommu(void)
- "x2apic and Intr-remapping.\n");
- #endif
- #ifdef CONFIG_DMAR
-- if (ret && !no_iommu && !iommu_detected && !swiotlb &&
-- !dmar_disabled)
-+ if (ret && !no_iommu && !iommu_detected && !dmar_disabled)
- iommu_detected = 1;
- #endif
-+#ifdef CONFIG_X86
-+ if (ret)
-+ x86_init.iommu.iommu_init = intel_iommu_init;
-+#endif
- }
- early_acpi_os_unmap_memory(dmar_tbl, dmar_tbl_size);
- dmar_tbl = NULL;
-diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c
-index ba83495..1506d4a 100644
---- a/drivers/pci/intel-iommu.c
-+++ b/drivers/pci/intel-iommu.c
-@@ -3278,7 +3278,7 @@ int __init intel_iommu_init(void)
- * Check the need for DMA-remapping initialization now.
- * Above initialization will also be used by Interrupt-remapping.
- */
-- if (no_iommu || swiotlb || dmar_disabled)
-+ if (no_iommu || dmar_disabled)
- return -ENODEV;
-
- iommu_init_mempool();
-@@ -3299,7 +3299,9 @@ int __init intel_iommu_init(void)
- "PCI-DMA: Intel(R) Virtualization Technology for Directed I/O\n");
-
- init_timer(&unmap_timer);
-- force_iommu = 1;
-+#ifdef CONFIG_SWIOTLB
-+ swiotlb = 0;
-+#endif
- dma_ops = &intel_dma_ops;
-
- init_iommu_sysfs();
-diff --git a/drivers/pci/iov.c b/drivers/pci/iov.c
-index e03fe98..f9db891 100644
---- a/drivers/pci/iov.c
-+++ b/drivers/pci/iov.c
-@@ -706,6 +706,21 @@ irqreturn_t pci_sriov_migration(struct pci_dev *dev)
- }
- EXPORT_SYMBOL_GPL(pci_sriov_migration);
-
-+/**
-+ * pci_num_vf - return number of VFs associated with a PF device_release_driver
-+ * @dev: the PCI device
-+ *
-+ * Returns number of VFs, or 0 if SR-IOV is not enabled.
-+ */
-+int pci_num_vf(struct pci_dev *dev)
-+{
-+ if (!dev || !dev->is_physfn)
-+ return 0;
-+ else
-+ return dev->sriov->nr_virtfn;
-+}
-+EXPORT_SYMBOL_GPL(pci_num_vf);
-+
- static int ats_alloc_one(struct pci_dev *dev, int ps)
- {
- int pos;
-diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c
-index 0fb1d05..c7e8a69 100644
---- a/drivers/pci/msi.c
-+++ b/drivers/pci/msi.c
-@@ -19,6 +19,9 @@
- #include <linux/errno.h>
- #include <linux/io.h>
-
-+#include <asm/xen/hypercall.h>
-+#include <asm/xen/hypervisor.h>
-+
- #include "pci.h"
- #include "msi.h"
-
-@@ -391,6 +394,20 @@ static void __pci_restore_msix_state(struct pci_dev *dev)
-
- void pci_restore_msi_state(struct pci_dev *dev)
- {
-+ if (xen_initial_domain()) {
-+ struct physdev_restore_msi physdev;
-+
-+ if (!dev->msi_enabled && !dev->msix_enabled)
-+ return;
-+
-+ pci_intx_for_msi(dev, 0);
-+
-+ physdev.bus = dev->bus->number;
-+ physdev.devfn = dev->devfn;
-+ HYPERVISOR_physdev_op(PHYSDEVOP_restore_msi, &physdev);
-+
-+ return;
-+ }
- __pci_restore_msi_state(dev);
- __pci_restore_msix_state(dev);
- }
-diff --git a/drivers/pci/xen-iommu.c b/drivers/pci/xen-iommu.c
-new file mode 100644
-index 0000000..ac6bcdb
---- /dev/null
-+++ b/drivers/pci/xen-iommu.c
-@@ -0,0 +1,271 @@
-+#include <linux/types.h>
-+#include <linux/mm.h>
-+#include <linux/string.h>
-+#include <linux/pci.h>
-+#include <linux/module.h>
-+#include <linux/version.h>
-+#include <linux/scatterlist.h>
-+#include <linux/io.h>
-+#include <linux/bug.h>
-+
-+#include <xen/interface/xen.h>
-+#include <xen/grant_table.h>
-+#include <xen/page.h>
-+#include <xen/xen-ops.h>
-+
-+#include <asm/iommu.h>
-+#include <asm/swiotlb.h>
-+#include <asm/tlbflush.h>
-+
-+#define IOMMU_BUG_ON(test) \
-+do { \
-+ if (unlikely(test)) { \
-+ printk(KERN_ALERT "Fatal DMA error! " \
-+ "Please use 'swiotlb=force'\n"); \
-+ BUG(); \
-+ } \
-+} while (0)
-+
-+/* Print address range with message */
-+#define PAR(msg, addr, size) \
-+do { \
-+ printk(msg "[%#llx - %#llx]\n", \
-+ (unsigned long long)addr, \
-+ (unsigned long long)addr + size); \
-+} while (0)
-+
-+static inline int address_needs_mapping(struct device *hwdev,
-+ dma_addr_t addr)
-+{
-+ dma_addr_t mask = DMA_BIT_MASK(32);
-+ int ret;
-+
-+ /* If the device has a mask, use it, otherwise default to 32 bits */
-+ if (hwdev)
-+ mask = *hwdev->dma_mask;
-+
-+ ret = (addr & ~mask) != 0;
-+
-+ if (ret) {
-+ printk(KERN_ERR "dma address needs mapping\n");
-+ printk(KERN_ERR "mask: %#llx\n address: [%#llx]\n", mask, addr);
-+ }
-+ return ret;
-+}
-+
-+static int check_pages_physically_contiguous(unsigned long pfn,
-+ unsigned int offset,
-+ size_t length)
-+{
-+ unsigned long next_mfn;
-+ int i;
-+ int nr_pages;
-+
-+ next_mfn = pfn_to_mfn(pfn);
-+ nr_pages = (offset + length + PAGE_SIZE-1) >> PAGE_SHIFT;
-+
-+ for (i = 1; i < nr_pages; i++) {
-+ if (pfn_to_mfn(++pfn) != ++next_mfn)
-+ return 0;
-+ }
-+ return 1;
-+}
-+
-+static int range_straddles_page_boundary(phys_addr_t p, size_t size)
-+{
-+ unsigned long pfn = PFN_DOWN(p);
-+ unsigned int offset = p & ~PAGE_MASK;
-+
-+ if (offset + size <= PAGE_SIZE)
-+ return 0;
-+ if (check_pages_physically_contiguous(pfn, offset, size))
-+ return 0;
-+ return 1;
-+}
-+
-+static inline void xen_dma_unmap_page(struct page *page)
-+{
-+ /* Xen TODO: 2.6.18 xen calls __gnttab_dma_unmap_page here
-+ * to deal with foreign pages. We'll need similar logic here at
-+ * some point.
-+ */
-+}
-+
-+/* Gets dma address of a page */
-+static inline dma_addr_t xen_dma_map_page(struct page *page)
-+{
-+ /* Xen TODO: 2.6.18 xen calls __gnttab_dma_map_page here to deal
-+ * with foreign pages. We'll need similar logic here at some
-+ * point.
-+ */
-+ return ((dma_addr_t)pfn_to_mfn(page_to_pfn(page))) << PAGE_SHIFT;
-+}
-+
-+static int xen_map_sg(struct device *hwdev, struct scatterlist *sg,
-+ int nents,
-+ enum dma_data_direction direction,
-+ struct dma_attrs *attrs)
-+{
-+ struct scatterlist *s;
-+ struct page *page;
-+ int i, rc;
-+
-+ BUG_ON(direction == DMA_NONE);
-+ WARN_ON(nents == 0 || sg[0].length == 0);
-+
-+ for_each_sg(sg, s, nents, i) {
-+ BUG_ON(!sg_page(s));
-+ page = sg_page(s);
-+ s->dma_address = xen_dma_map_page(page) + s->offset;
-+ s->dma_length = s->length;
-+ IOMMU_BUG_ON(range_straddles_page_boundary(
-+ page_to_phys(page), s->length));
-+ }
-+
-+ rc = nents;
-+
-+ flush_write_buffers();
-+ return rc;
-+}
-+
-+static void xen_unmap_sg(struct device *hwdev, struct scatterlist *sg,
-+ int nents,
-+ enum dma_data_direction direction,
-+ struct dma_attrs *attrs)
-+{
-+ struct scatterlist *s;
-+ struct page *page;
-+ int i;
-+
-+ for_each_sg(sg, s, nents, i) {
-+ page = pfn_to_page(mfn_to_pfn(PFN_DOWN(s->dma_address)));
-+ xen_dma_unmap_page(page);
-+ }
-+}
-+
-+static void *xen_alloc_coherent(struct device *dev, size_t size,
-+ dma_addr_t *dma_handle, gfp_t gfp)
-+{
-+ void *ret;
-+ unsigned int order = get_order(size);
-+ unsigned long vstart;
-+ u64 mask;
-+
-+ /* ignore region specifiers */
-+ gfp &= ~(__GFP_DMA | __GFP_HIGHMEM);
-+
-+ if (dma_alloc_from_coherent(dev, size, dma_handle, &ret))
-+ return ret;
-+
-+ if (dev == NULL || (dev->coherent_dma_mask < DMA_BIT_MASK(32)))
-+ gfp |= GFP_DMA;
-+
-+ vstart = __get_free_pages(gfp, order);
-+ ret = (void *)vstart;
-+
-+ if (dev != NULL && dev->coherent_dma_mask)
-+ mask = dev->coherent_dma_mask;
-+ else
-+ mask = DMA_BIT_MASK(32);
-+
-+ if (ret != NULL) {
-+ if (xen_create_contiguous_region(vstart, order,
-+ fls64(mask)) != 0) {
-+ free_pages(vstart, order);
-+ return NULL;
-+ }
-+ memset(ret, 0, size);
-+ *dma_handle = virt_to_machine(ret).maddr;
-+ }
-+ return ret;
-+}
-+
-+static void xen_free_coherent(struct device *dev, size_t size,
-+ void *vaddr, dma_addr_t dma_addr)
-+{
-+ int order = get_order(size);
-+
-+ if (dma_release_from_coherent(dev, order, vaddr))
-+ return;
-+
-+ xen_destroy_contiguous_region((unsigned long)vaddr, order);
-+ free_pages((unsigned long)vaddr, order);
-+}
-+
-+static dma_addr_t xen_map_page(struct device *dev, struct page *page,
-+ unsigned long offset, size_t size,
-+ enum dma_data_direction direction,
-+ struct dma_attrs *attrs)
-+{
-+ dma_addr_t dma;
-+
-+ BUG_ON(direction == DMA_NONE);
-+
-+ WARN_ON(size == 0);
-+
-+ dma = xen_dma_map_page(page) + offset;
-+
-+ IOMMU_BUG_ON(address_needs_mapping(dev, dma));
-+ flush_write_buffers();
-+ return dma;
-+}
-+
-+static void xen_unmap_page(struct device *dev, dma_addr_t dma_addr,
-+ size_t size,
-+ enum dma_data_direction direction,
-+ struct dma_attrs *attrs)
-+{
-+ BUG_ON(direction == DMA_NONE);
-+ xen_dma_unmap_page(pfn_to_page(mfn_to_pfn(PFN_DOWN(dma_addr))));
-+}
-+
-+static struct dma_map_ops xen_dma_ops = {
-+ .dma_supported = NULL,
-+
-+ .alloc_coherent = xen_alloc_coherent,
-+ .free_coherent = xen_free_coherent,
-+
-+ .map_page = xen_map_page,
-+ .unmap_page = xen_unmap_page,
-+
-+ .map_sg = xen_map_sg,
-+ .unmap_sg = xen_unmap_sg,
-+
-+ .mapping_error = NULL,
-+
-+ .is_phys = 0,
-+};
-+
-+static struct dma_map_ops xen_swiotlb_dma_ops = {
-+ .dma_supported = swiotlb_dma_supported,
-+
-+ .alloc_coherent = xen_alloc_coherent,
-+ .free_coherent = xen_free_coherent,
-+
-+ .map_page = swiotlb_map_page,
-+ .unmap_page = swiotlb_unmap_page,
-+
-+ .map_sg = swiotlb_map_sg_attrs,
-+ .unmap_sg = swiotlb_unmap_sg_attrs,
-+
-+ .mapping_error = swiotlb_dma_mapping_error,
-+
-+ .is_phys = 0,
-+};
-+
-+void __init xen_iommu_init(void)
-+{
-+ if (!xen_pv_domain())
-+ return;
-+
-+ printk(KERN_INFO "Xen: Initializing Xen DMA ops\n");
-+
-+ force_iommu = 0;
-+ dma_ops = &xen_dma_ops;
-+
-+ if (swiotlb) {
-+ printk(KERN_INFO "Xen: Enabling DMA fallback to swiotlb\n");
-+ dma_ops = &xen_swiotlb_dma_ops;
-+ }
-+}
-+
-diff --git a/drivers/pci/xen-pcifront.c b/drivers/pci/xen-pcifront.c
-new file mode 100644
-index 0000000..76d0bdd
---- /dev/null
-+++ b/drivers/pci/xen-pcifront.c
-@@ -0,0 +1,1157 @@
-+/*
-+ * PCI Frontend Xenbus Setup - handles setup with backend (imports page/evtchn)
-+ *
-+ * Author: Ryan Wilson <hap9@epoch.ncsc.mil>
-+ */
-+#include <linux/module.h>
-+#include <linux/init.h>
-+#include <linux/mm.h>
-+#include <xen/xenbus.h>
-+#include <xen/events.h>
-+#include <xen/grant_table.h>
-+#include <xen/page.h>
-+#include <linux/spinlock.h>
-+#include <linux/pci.h>
-+#include <linux/msi.h>
-+#include <xen/xenbus.h>
-+#include <xen/interface/io/pciif.h>
-+#include <asm/xen/pci.h>
-+#include <linux/interrupt.h>
-+#include <asm/atomic.h>
-+#include <linux/workqueue.h>
-+#include <linux/bitops.h>
-+#include <linux/time.h>
-+
-+
-+#ifndef __init_refok
-+#define __init_refok
-+#endif
-+
-+#define INVALID_GRANT_REF (0)
-+#define INVALID_EVTCHN (-1)
-+
-+
-+struct pci_bus_entry {
-+ struct list_head list;
-+ struct pci_bus *bus;
-+};
-+
-+#define _PDEVB_op_active (0)
-+#define PDEVB_op_active (1 << (_PDEVB_op_active))
-+
-+struct pcifront_device {
-+ struct xenbus_device *xdev;
-+ struct list_head root_buses;
-+
-+ int evtchn;
-+ int gnt_ref;
-+
-+ int irq;
-+
-+ /* Lock this when doing any operations in sh_info */
-+ spinlock_t sh_info_lock;
-+ struct xen_pci_sharedinfo *sh_info;
-+ struct work_struct op_work;
-+ unsigned long flags;
-+
-+};
-+
-+struct pcifront_sd {
-+ int domain;
-+ struct pcifront_device *pdev;
-+};
-+
-+static inline struct pcifront_device *
-+pcifront_get_pdev(struct pcifront_sd *sd)
-+{
-+ return sd->pdev;
-+}
-+
-+static inline void pcifront_init_sd(struct pcifront_sd *sd,
-+ unsigned int domain, unsigned int bus,
-+ struct pcifront_device *pdev)
-+{
-+ sd->domain = domain;
-+ sd->pdev = pdev;
-+}
-+
-+static inline void pcifront_setup_root_resources(struct pci_bus *bus,
-+ struct pcifront_sd *sd)
-+{
-+}
-+
-+
-+DEFINE_SPINLOCK(pcifront_dev_lock);
-+static struct pcifront_device *pcifront_dev;
-+
-+static int verbose_request;
-+module_param(verbose_request, int, 0644);
-+
-+static int errno_to_pcibios_err(int errno)
-+{
-+ switch (errno) {
-+ case XEN_PCI_ERR_success:
-+ return PCIBIOS_SUCCESSFUL;
-+
-+ case XEN_PCI_ERR_dev_not_found:
-+ return PCIBIOS_DEVICE_NOT_FOUND;
-+
-+ case XEN_PCI_ERR_invalid_offset:
-+ case XEN_PCI_ERR_op_failed:
-+ return PCIBIOS_BAD_REGISTER_NUMBER;
-+
-+ case XEN_PCI_ERR_not_implemented:
-+ return PCIBIOS_FUNC_NOT_SUPPORTED;
-+
-+ case XEN_PCI_ERR_access_denied:
-+ return PCIBIOS_SET_FAILED;
-+ }
-+ return errno;
-+}
-+
-+static inline void schedule_pcifront_aer_op(struct pcifront_device *pdev)
-+{
-+ if (test_bit(_XEN_PCIB_active, (unsigned long *)&pdev->sh_info->flags)
-+ && !test_and_set_bit(_PDEVB_op_active, &pdev->flags)) {
-+ dev_dbg(&pdev->xdev->dev, "schedule aer frontend job\n");
-+ schedule_work(&pdev->op_work);
-+ }
-+}
-+
-+static int do_pci_op(struct pcifront_device *pdev, struct xen_pci_op *op)
-+{
-+ int err = 0;
-+ struct xen_pci_op *active_op = &pdev->sh_info->op;
-+ unsigned long irq_flags;
-+ evtchn_port_t port = pdev->evtchn;
-+ unsigned irq = pdev->irq;
-+ s64 ns, ns_timeout;
-+ struct timeval tv;
-+
-+ spin_lock_irqsave(&pdev->sh_info_lock, irq_flags);
-+
-+ memcpy(active_op, op, sizeof(struct xen_pci_op));
-+
-+ /* Go */
-+ wmb();
-+ set_bit(_XEN_PCIF_active, (unsigned long *)&pdev->sh_info->flags);
-+ notify_remote_via_evtchn(port);
-+
-+ /*
-+ * We set a poll timeout of 3 seconds but give up on return after
-+ * 2 seconds. It is better to time out too late rather than too early
-+ * (in the latter case we end up continually re-executing poll() with a
-+ * timeout in the past). 1s difference gives plenty of slack for error.
-+ */
-+ do_gettimeofday(&tv);
-+ ns_timeout = timeval_to_ns(&tv) + 2 * (s64)NSEC_PER_SEC;
-+
-+ xen_clear_irq_pending(irq);
-+
-+ while (test_bit(_XEN_PCIF_active,
-+ (unsigned long *)&pdev->sh_info->flags)) {
-+ xen_poll_irq_timeout(irq, jiffies + 3*HZ);
-+ xen_clear_irq_pending(irq);
-+ do_gettimeofday(&tv);
-+ ns = timeval_to_ns(&tv);
-+ if (ns > ns_timeout) {
-+ dev_err(&pdev->xdev->dev,
-+ "pciback not responding!!!\n");
-+ clear_bit(_XEN_PCIF_active,
-+ (unsigned long *)&pdev->sh_info->flags);
-+ err = XEN_PCI_ERR_dev_not_found;
-+ goto out;
-+ }
-+ }
-+
-+ /*
-+ * We might lose backend service request since we
-+ * reuse same evtchn with pci_conf backend response. So re-schedule
-+ * aer pcifront service.
-+ */
-+ if (test_bit(_XEN_PCIB_active,
-+ (unsigned long *)&pdev->sh_info->flags)) {
-+ dev_err(&pdev->xdev->dev,
-+ "schedule aer pcifront service\n");
-+ schedule_pcifront_aer_op(pdev);
-+ }
-+
-+ memcpy(op, active_op, sizeof(struct xen_pci_op));
-+
-+ err = op->err;
-+out:
-+ spin_unlock_irqrestore(&pdev->sh_info_lock, irq_flags);
-+ return err;
-+}
-+
-+/* Access to this function is spinlocked in drivers/pci/access.c */
-+static int pcifront_bus_read(struct pci_bus *bus, unsigned int devfn,
-+ int where, int size, u32 *val)
-+{
-+ int err = 0;
-+ struct xen_pci_op op = {
-+ .cmd = XEN_PCI_OP_conf_read,
-+ .domain = pci_domain_nr(bus),
-+ .bus = bus->number,
-+ .devfn = devfn,
-+ .offset = where,
-+ .size = size,
-+ };
-+ struct pcifront_sd *sd = bus->sysdata;
-+ struct pcifront_device *pdev = pcifront_get_pdev(sd);
-+
-+ if (verbose_request)
-+ dev_info(&pdev->xdev->dev,
-+ "read dev=%04x:%02x:%02x.%01x - offset %x size %d\n",
-+ pci_domain_nr(bus), bus->number, PCI_SLOT(devfn),
-+ PCI_FUNC(devfn), where, size);
-+
-+ err = do_pci_op(pdev, &op);
-+
-+ if (likely(!err)) {
-+ if (verbose_request)
-+ dev_info(&pdev->xdev->dev, "read got back value %x\n",
-+ op.value);
-+
-+ *val = op.value;
-+ } else if (err == -ENODEV) {
-+ /* No device here, pretend that it just returned 0 */
-+ err = 0;
-+ *val = 0;
-+ }
-+
-+ return errno_to_pcibios_err(err);
-+}
-+
-+/* Access to this function is spinlocked in drivers/pci/access.c */
-+static int pcifront_bus_write(struct pci_bus *bus, unsigned int devfn,
-+ int where, int size, u32 val)
-+{
-+ struct xen_pci_op op = {
-+ .cmd = XEN_PCI_OP_conf_write,
-+ .domain = pci_domain_nr(bus),
-+ .bus = bus->number,
-+ .devfn = devfn,
-+ .offset = where,
-+ .size = size,
-+ .value = val,
-+ };
-+ struct pcifront_sd *sd = bus->sysdata;
-+ struct pcifront_device *pdev = pcifront_get_pdev(sd);
-+
-+ if (verbose_request)
-+ dev_info(&pdev->xdev->dev,
-+ "write dev=%04x:%02x:%02x.%01x - "
-+ "offset %x size %d val %x\n",
-+ pci_domain_nr(bus), bus->number,
-+ PCI_SLOT(devfn), PCI_FUNC(devfn), where, size, val);
-+
-+ return errno_to_pcibios_err(do_pci_op(pdev, &op));
-+}
-+
-+struct pci_ops pcifront_bus_ops = {
-+ .read = pcifront_bus_read,
-+ .write = pcifront_bus_write,
-+};
-+
-+#ifdef CONFIG_PCI_MSI
-+static int pci_frontend_enable_msix(struct pci_dev *dev,
-+ int **vector, int nvec)
-+{
-+ int err;
-+ int i;
-+ struct xen_pci_op op = {
-+ .cmd = XEN_PCI_OP_enable_msix,
-+ .domain = pci_domain_nr(dev->bus),
-+ .bus = dev->bus->number,
-+ .devfn = dev->devfn,
-+ .value = nvec,
-+ };
-+ struct pcifront_sd *sd = dev->bus->sysdata;
-+ struct pcifront_device *pdev = pcifront_get_pdev(sd);
-+ struct msi_desc *entry;
-+
-+ if (nvec > SH_INFO_MAX_VEC) {
-+ dev_err(&dev->dev, "too much vector for pci frontend: %x."
-+ " Increase SH_INFO_MAX_VEC.\n", nvec);
-+ return -EINVAL;
-+ }
-+
-+ i = 0;
-+ list_for_each_entry(entry, &dev->msi_list, list) {
-+ op.msix_entries[i].entry = entry->msi_attrib.entry_nr;
-+ /* Vector is useless at this point. */
-+ op.msix_entries[i].vector = -1;
-+ i++;
-+ }
-+
-+ err = do_pci_op(pdev, &op);
-+
-+ if (likely(!err)) {
-+ if (likely(!op.value)) {
-+ /* we get the result */
-+ for (i = 0; i < nvec; i++)
-+ *(*vector+i) = op.msix_entries[i].vector;
-+ return 0;
-+ } else {
-+ printk(KERN_DEBUG "enable msix get value %x\n",
-+ op.value);
-+ return op.value;
-+ }
-+ } else {
-+ dev_err(&dev->dev, "enable msix get err %x\n", err);
-+ return err;
-+ }
-+}
-+
-+static void pci_frontend_disable_msix(struct pci_dev *dev)
-+{
-+ int err;
-+ struct xen_pci_op op = {
-+ .cmd = XEN_PCI_OP_disable_msix,
-+ .domain = pci_domain_nr(dev->bus),
-+ .bus = dev->bus->number,
-+ .devfn = dev->devfn,
-+ };
-+ struct pcifront_sd *sd = dev->bus->sysdata;
-+ struct pcifront_device *pdev = pcifront_get_pdev(sd);
-+
-+ err = do_pci_op(pdev, &op);
-+
-+ /* What should do for error ? */
-+ if (err)
-+ dev_err(&dev->dev, "pci_disable_msix get err %x\n", err);
-+}
-+
-+static int pci_frontend_enable_msi(struct pci_dev *dev, int **vector)
-+{
-+ int err;
-+ struct xen_pci_op op = {
-+ .cmd = XEN_PCI_OP_enable_msi,
-+ .domain = pci_domain_nr(dev->bus),
-+ .bus = dev->bus->number,
-+ .devfn = dev->devfn,
-+ };
-+ struct pcifront_sd *sd = dev->bus->sysdata;
-+ struct pcifront_device *pdev = pcifront_get_pdev(sd);
-+
-+ err = do_pci_op(pdev, &op);
-+ if (likely(!err)) {
-+ *(*vector) = op.value;
-+ } else {
-+ dev_err(&dev->dev, "pci frontend enable msi failed for dev "
-+ "%x:%x \n", op.bus, op.devfn);
-+ err = -EINVAL;
-+ }
-+ return err;
-+}
-+
-+static void pci_frontend_disable_msi(struct pci_dev *dev)
-+{
-+ int err;
-+ struct xen_pci_op op = {
-+ .cmd = XEN_PCI_OP_disable_msi,
-+ .domain = pci_domain_nr(dev->bus),
-+ .bus = dev->bus->number,
-+ .devfn = dev->devfn,
-+ };
-+ struct pcifront_sd *sd = dev->bus->sysdata;
-+ struct pcifront_device *pdev = pcifront_get_pdev(sd);
-+
-+ err = do_pci_op(pdev, &op);
-+ if (err == XEN_PCI_ERR_dev_not_found) {
-+ /* XXX No response from backend, what shall we do? */
-+ printk(KERN_DEBUG "get no response from backend for disable MSI\n");
-+ return;
-+ }
-+ if (err)
-+ /* how can pciback notify us fail? */
-+ printk(KERN_DEBUG "get fake response frombackend \n");
-+}
-+
-+static struct xen_pci_frontend_ops pci_frontend_ops = {
-+ .enable_msi = pci_frontend_enable_msi,
-+ .disable_msi = pci_frontend_disable_msi,
-+ .enable_msix = pci_frontend_enable_msix,
-+ .disable_msix = pci_frontend_disable_msix,
-+};
-+
-+static void pci_frontend_registrar(int enable)
-+{
-+ if (enable)
-+ xen_pci_frontend = &pci_frontend_ops;
-+ else
-+ xen_pci_frontend = NULL;
-+};
-+#else
-+static inline void pci_frontend_registrar(int enable) { };
-+#endif /* CONFIG_PCI_MSI */
-+
-+/* Claim resources for the PCI frontend as-is, backend won't allow changes */
-+static int pcifront_claim_resource(struct pci_dev *dev, void *data)
-+{
-+ struct pcifront_device *pdev = data;
-+ int i;
-+ struct resource *r;
-+
-+ for (i = 0; i < PCI_NUM_RESOURCES; i++) {
-+ r = &dev->resource[i];
-+
-+ if (!r->parent && r->start && r->flags) {
-+ dev_info(&pdev->xdev->dev, "claiming resource %s/%d\n",
-+ pci_name(dev), i);
-+ if (pci_claim_resource(dev, i)) {
-+ dev_err(&pdev->xdev->dev, "Could not claim "
-+ "resource %s/%d! Device offline. Try "
-+ "giving less than 4GB to domain.\n",
-+ pci_name(dev), i);
-+ }
-+ }
-+ }
-+
-+ return 0;
-+}
-+
-+int __devinit pcifront_scan_bus(struct pcifront_device *pdev,
-+ unsigned int domain, unsigned int bus,
-+ struct pci_bus *b)
-+{
-+ struct pci_dev *d;
-+ unsigned int devfn;
-+ int err;
-+
-+ /* Scan the bus for functions and add.
-+ * We omit handling of PCI bridge attachment because pciback prevents
-+ * bridges from being exported.
-+ */
-+ for (devfn = 0; devfn < 0x100; devfn++) {
-+ d = pci_get_slot(b, devfn);
-+ if (d) {
-+ /* Device is already known. */
-+ pci_dev_put(d);
-+ continue;
-+ }
-+
-+ d = pci_scan_single_device(b, devfn);
-+ if (d)
-+ dev_info(&pdev->xdev->dev, "New device on "
-+ "%04x:%02x:%02x.%02x found.\n", domain, bus,
-+ PCI_SLOT(devfn), PCI_FUNC(devfn));
-+ }
-+
-+ return 0;
-+}
-+
-+int __devinit pcifront_scan_root(struct pcifront_device *pdev,
-+ unsigned int domain, unsigned int bus)
-+{
-+ struct pci_bus *b;
-+ struct pcifront_sd *sd = NULL;
-+ struct pci_bus_entry *bus_entry = NULL;
-+ int err = 0;
-+
-+#ifndef CONFIG_PCI_DOMAINS
-+ if (domain != 0) {
-+ dev_err(&pdev->xdev->dev,
-+ "PCI Root in non-zero PCI Domain! domain=%d\n", domain);
-+ dev_err(&pdev->xdev->dev,
-+ "Please compile with CONFIG_PCI_DOMAINS\n");
-+ err = -EINVAL;
-+ goto err_out;
-+ }
-+#endif
-+
-+ dev_info(&pdev->xdev->dev, "Creating PCI Frontend Bus %04x:%02x\n",
-+ domain, bus);
-+
-+ bus_entry = kmalloc(sizeof(*bus_entry), GFP_KERNEL);
-+ sd = kmalloc(sizeof(*sd), GFP_KERNEL);
-+ if (!bus_entry || !sd) {
-+ err = -ENOMEM;
-+ goto err_out;
-+ }
-+ pcifront_init_sd(sd, domain, bus, pdev);
-+
-+ b = pci_scan_bus_parented(&pdev->xdev->dev, bus,
-+ &pcifront_bus_ops, sd);
-+ if (!b) {
-+ dev_err(&pdev->xdev->dev,
-+ "Error creating PCI Frontend Bus!\n");
-+ err = -ENOMEM;
-+ goto err_out;
-+ }
-+
-+ pcifront_setup_root_resources(b, sd);
-+ bus_entry->bus = b;
-+
-+ list_add(&bus_entry->list, &pdev->root_buses);
-+
-+ /* pci_scan_bus_parented skips devices which do not have a have
-+ * devfn==0. The pcifront_scan_bus enumerates all devfn. */
-+ err = pcifront_scan_bus(pdev, domain, bus, b);
-+
-+ /* Claim resources before going "live" with our devices */
-+ pci_walk_bus(b, pcifront_claim_resource, pdev);
-+
-+ /* Create SysFS and notify udev of the devices. Aka: "going live" */
-+ pci_bus_add_devices(b);
-+
-+ return err;
-+
-+err_out:
-+ kfree(bus_entry);
-+ kfree(sd);
-+
-+ return err;
-+}
-+
-+int __devinit pcifront_rescan_root(struct pcifront_device *pdev,
-+ unsigned int domain, unsigned int bus)
-+{
-+ int err;
-+ struct pci_bus *b;
-+
-+#ifndef CONFIG_PCI_DOMAINS
-+ if (domain != 0) {
-+ dev_err(&pdev->xdev->dev,
-+ "PCI Root in non-zero PCI Domain! domain=%d\n", domain);
-+ dev_err(&pdev->xdev->dev,
-+ "Please compile with CONFIG_PCI_DOMAINS\n");
-+ return -EINVAL;
-+ }
-+#endif
-+
-+ dev_info(&pdev->xdev->dev, "Rescanning PCI Frontend Bus %04x:%02x\n",
-+ domain, bus);
-+
-+ b = pci_find_bus(domain, bus);
-+ if (!b)
-+ /* If the bus is unknown, create it. */
-+ return pcifront_scan_root(pdev, domain, bus);
-+
-+ err = pcifront_scan_bus(pdev, domain, bus, b);
-+
-+ /* Claim resources before going "live" with our devices */
-+ pci_walk_bus(b, pcifront_claim_resource, pdev);
-+
-+ /* Create SysFS and notify udev of the devices. Aka: "going live" */
-+ pci_bus_add_devices(b);
-+
-+ return err;
-+}
-+
-+static void free_root_bus_devs(struct pci_bus *bus)
-+{
-+ struct pci_dev *dev;
-+
-+ while (!list_empty(&bus->devices)) {
-+ dev = container_of(bus->devices.next, struct pci_dev,
-+ bus_list);
-+ dev_dbg(&dev->dev, "removing device\n");
-+ pci_remove_bus_device(dev);
-+ }
-+}
-+
-+void pcifront_free_roots(struct pcifront_device *pdev)
-+{
-+ struct pci_bus_entry *bus_entry, *t;
-+
-+ dev_dbg(&pdev->xdev->dev, "cleaning up root buses\n");
-+
-+ list_for_each_entry_safe(bus_entry, t, &pdev->root_buses, list) {
-+ list_del(&bus_entry->list);
-+
-+ free_root_bus_devs(bus_entry->bus);
-+
-+ kfree(bus_entry->bus->sysdata);
-+
-+ device_unregister(bus_entry->bus->bridge);
-+ pci_remove_bus(bus_entry->bus);
-+
-+ kfree(bus_entry);
-+ }
-+}
-+
-+static pci_ers_result_t pcifront_common_process(int cmd,
-+ struct pcifront_device *pdev,
-+ pci_channel_state_t state)
-+{
-+ pci_ers_result_t result;
-+ struct pci_driver *pdrv;
-+ int bus = pdev->sh_info->aer_op.bus;
-+ int devfn = pdev->sh_info->aer_op.devfn;
-+ struct pci_dev *pcidev;
-+ int flag = 0;
-+
-+ dev_dbg(&pdev->xdev->dev,
-+ "pcifront AER process: cmd %x (bus:%x, devfn%x)",
-+ cmd, bus, devfn);
-+ result = PCI_ERS_RESULT_NONE;
-+
-+ pcidev = pci_get_bus_and_slot(bus, devfn);
-+ if (!pcidev || !pcidev->driver) {
-+ dev_err(&pcidev->dev,
-+ "device or driver is NULL\n");
-+ return result;
-+ }
-+ pdrv = pcidev->driver;
-+
-+ if (get_driver(&pdrv->driver)) {
-+ if (pdrv->err_handler && pdrv->err_handler->error_detected) {
-+ dev_dbg(&pcidev->dev,
-+ "trying to call AER service\n");
-+ if (pcidev) {
-+ flag = 1;
-+ switch (cmd) {
-+ case XEN_PCI_OP_aer_detected:
-+ result = pdrv->err_handler->
-+ error_detected(pcidev, state);
-+ break;
-+ case XEN_PCI_OP_aer_mmio:
-+ result = pdrv->err_handler->
-+ mmio_enabled(pcidev);
-+ break;
-+ case XEN_PCI_OP_aer_slotreset:
-+ result = pdrv->err_handler->
-+ slot_reset(pcidev);
-+ break;
-+ case XEN_PCI_OP_aer_resume:
-+ pdrv->err_handler->resume(pcidev);
-+ break;
-+ default:
-+ dev_err(&pdev->xdev->dev,
-+ "bad request in aer recovery "
-+ "operation!\n");
-+
-+ }
-+ }
-+ }
-+ put_driver(&pdrv->driver);
-+ }
-+ if (!flag)
-+ result = PCI_ERS_RESULT_NONE;
-+
-+ return result;
-+}
-+
-+
-+void pcifront_do_aer(struct work_struct *data)
-+{
-+ struct pcifront_device *pdev =
-+ container_of(data, struct pcifront_device, op_work);
-+ int cmd = pdev->sh_info->aer_op.cmd;
-+ pci_channel_state_t state =
-+ (pci_channel_state_t)pdev->sh_info->aer_op.err;
-+
-+ /*If a pci_conf op is in progress,
-+ we have to wait until it is done before service aer op*/
-+ dev_dbg(&pdev->xdev->dev,
-+ "pcifront service aer bus %x devfn %x\n",
-+ pdev->sh_info->aer_op.bus, pdev->sh_info->aer_op.devfn);
-+
-+ pdev->sh_info->aer_op.err = pcifront_common_process(cmd, pdev, state);
-+
-+ wmb();
-+ clear_bit(_XEN_PCIB_active, (unsigned long *)&pdev->sh_info->flags);
-+ notify_remote_via_evtchn(pdev->evtchn);
-+
-+ /*in case of we lost an aer request in four lines time_window*/
-+ smp_mb__before_clear_bit();
-+ clear_bit(_PDEVB_op_active, &pdev->flags);
-+ smp_mb__after_clear_bit();
-+
-+ schedule_pcifront_aer_op(pdev);
-+
-+}
-+
-+irqreturn_t pcifront_handler_aer(int irq, void *dev)
-+{
-+ struct pcifront_device *pdev = dev;
-+ schedule_pcifront_aer_op(pdev);
-+ return IRQ_HANDLED;
-+}
-+int pcifront_connect(struct pcifront_device *pdev)
-+{
-+ int err = 0;
-+
-+ spin_lock(&pcifront_dev_lock);
-+
-+ if (!pcifront_dev) {
-+ dev_info(&pdev->xdev->dev, "Installing PCI frontend\n");
-+ pcifront_dev = pdev;
-+ } else {
-+ dev_err(&pdev->xdev->dev, "PCI frontend already installed!\n");
-+ err = -EEXIST;
-+ }
-+
-+ spin_unlock(&pcifront_dev_lock);
-+
-+ return err;
-+}
-+
-+void pcifront_disconnect(struct pcifront_device *pdev)
-+{
-+ spin_lock(&pcifront_dev_lock);
-+
-+ if (pdev == pcifront_dev) {
-+ dev_info(&pdev->xdev->dev,
-+ "Disconnecting PCI Frontend Buses\n");
-+ pcifront_dev = NULL;
-+ }
-+
-+ spin_unlock(&pcifront_dev_lock);
-+}
-+static struct pcifront_device *alloc_pdev(struct xenbus_device *xdev)
-+{
-+ struct pcifront_device *pdev;
-+
-+ pdev = kzalloc(sizeof(struct pcifront_device), GFP_KERNEL);
-+ if (pdev == NULL)
-+ goto out;
-+
-+ pdev->sh_info =
-+ (struct xen_pci_sharedinfo *)__get_free_page(GFP_KERNEL);
-+ if (pdev->sh_info == NULL) {
-+ kfree(pdev);
-+ pdev = NULL;
-+ goto out;
-+ }
-+ pdev->sh_info->flags = 0;
-+
-+ /*Flag for registering PV AER handler*/
-+ set_bit(_XEN_PCIB_AERHANDLER, (void *)&pdev->sh_info->flags);
-+
-+ dev_set_drvdata(&xdev->dev, pdev);
-+ pdev->xdev = xdev;
-+
-+ INIT_LIST_HEAD(&pdev->root_buses);
-+
-+ spin_lock_init(&pdev->sh_info_lock);
-+
-+ pdev->evtchn = INVALID_EVTCHN;
-+ pdev->gnt_ref = INVALID_GRANT_REF;
-+ pdev->irq = -1;
-+
-+ INIT_WORK(&pdev->op_work, pcifront_do_aer);
-+
-+ dev_dbg(&xdev->dev, "Allocated pdev @ 0x%p pdev->sh_info @ 0x%p\n",
-+ pdev, pdev->sh_info);
-+out:
-+ return pdev;
-+}
-+
-+static void free_pdev(struct pcifront_device *pdev)
-+{
-+ dev_dbg(&pdev->xdev->dev, "freeing pdev @ 0x%p\n", pdev);
-+
-+ pcifront_free_roots(pdev);
-+
-+ /*For PCIE_AER error handling job*/
-+ flush_scheduled_work();
-+ unbind_from_irqhandler(pdev->irq, pdev);
-+
-+ if (pdev->evtchn != INVALID_EVTCHN)
-+ xenbus_free_evtchn(pdev->xdev, pdev->evtchn);
-+
-+ if (pdev->gnt_ref != INVALID_GRANT_REF)
-+ gnttab_end_foreign_access(pdev->gnt_ref, 0 /* r/w page */,
-+ (unsigned long)pdev->sh_info);
-+
-+ dev_set_drvdata(&pdev->xdev->dev, NULL);
-+ kfree(pdev);
-+}
-+
-+static int pcifront_publish_info(struct pcifront_device *pdev)
-+{
-+ int err = 0;
-+ struct xenbus_transaction trans;
-+
-+ err = xenbus_grant_ring(pdev->xdev, virt_to_mfn(pdev->sh_info));
-+ if (err < 0)
-+ goto out;
-+
-+ pdev->gnt_ref = err;
-+
-+ err = xenbus_alloc_evtchn(pdev->xdev, &pdev->evtchn);
-+ if (err)
-+ goto out;
-+
-+ err = bind_evtchn_to_irqhandler(pdev->evtchn, pcifront_handler_aer,
-+ 0, "pcifront", pdev);
-+ if (err < 0) {
-+ xenbus_free_evtchn(pdev->xdev, pdev->evtchn);
-+ xenbus_dev_fatal(pdev->xdev, err, "Failed to bind evtchn to "
-+ "irqhandler.\n");
-+ return err;
-+ }
-+ pdev->irq = err;
-+
-+do_publish:
-+ err = xenbus_transaction_start(&trans);
-+ if (err) {
-+ xenbus_dev_fatal(pdev->xdev, err,
-+ "Error writing configuration for backend "
-+ "(start transaction)");
-+ goto out;
-+ }
-+
-+ err = xenbus_printf(trans, pdev->xdev->nodename,
-+ "pci-op-ref", "%u", pdev->gnt_ref);
-+ if (!err)
-+ err = xenbus_printf(trans, pdev->xdev->nodename,
-+ "event-channel", "%u", pdev->evtchn);
-+ if (!err)
-+ err = xenbus_printf(trans, pdev->xdev->nodename,
-+ "magic", XEN_PCI_MAGIC);
-+
-+ if (err) {
-+ xenbus_transaction_end(trans, 1);
-+ xenbus_dev_fatal(pdev->xdev, err,
-+ "Error writing configuration for backend");
-+ goto out;
-+ } else {
-+ err = xenbus_transaction_end(trans, 0);
-+ if (err == -EAGAIN)
-+ goto do_publish;
-+ else if (err) {
-+ xenbus_dev_fatal(pdev->xdev, err,
-+ "Error completing transaction "
-+ "for backend");
-+ goto out;
-+ }
-+ }
-+
-+ xenbus_switch_state(pdev->xdev, XenbusStateInitialised);
-+
-+ dev_dbg(&pdev->xdev->dev, "publishing successful!\n");
-+
-+out:
-+ return err;
-+}
-+
-+static int __devinit pcifront_try_connect(struct pcifront_device *pdev)
-+{
-+ int err = -EFAULT;
-+ int i, num_roots, len;
-+ char str[64];
-+ unsigned int domain, bus;
-+
-+
-+ /* Only connect once */
-+ if (xenbus_read_driver_state(pdev->xdev->nodename) !=
-+ XenbusStateInitialised)
-+ goto out;
-+
-+ err = pcifront_connect(pdev);
-+ if (err) {
-+ xenbus_dev_fatal(pdev->xdev, err,
-+ "Error connecting PCI Frontend");
-+ goto out;
-+ }
-+
-+ err = xenbus_scanf(XBT_NIL, pdev->xdev->otherend,
-+ "root_num", "%d", &num_roots);
-+ if (err == -ENOENT) {
-+ xenbus_dev_error(pdev->xdev, err,
-+ "No PCI Roots found, trying 0000:00");
-+ err = pcifront_scan_root(pdev, 0, 0);
-+ num_roots = 0;
-+ } else if (err != 1) {
-+ if (err == 0)
-+ err = -EINVAL;
-+ xenbus_dev_fatal(pdev->xdev, err,
-+ "Error reading number of PCI roots");
-+ goto out;
-+ }
-+
-+ for (i = 0; i < num_roots; i++) {
-+ len = snprintf(str, sizeof(str), "root-%d", i);
-+ if (unlikely(len >= (sizeof(str) - 1))) {
-+ err = -ENOMEM;
-+ goto out;
-+ }
-+
-+ err = xenbus_scanf(XBT_NIL, pdev->xdev->otherend, str,
-+ "%x:%x", &domain, &bus);
-+ if (err != 2) {
-+ if (err >= 0)
-+ err = -EINVAL;
-+ xenbus_dev_fatal(pdev->xdev, err,
-+ "Error reading PCI root %d", i);
-+ goto out;
-+ }
-+
-+ err = pcifront_scan_root(pdev, domain, bus);
-+ if (err) {
-+ xenbus_dev_fatal(pdev->xdev, err,
-+ "Error scanning PCI root %04x:%02x",
-+ domain, bus);
-+ goto out;
-+ }
-+ }
-+
-+ err = xenbus_switch_state(pdev->xdev, XenbusStateConnected);
-+
-+out:
-+ return err;
-+}
-+
-+static int pcifront_try_disconnect(struct pcifront_device *pdev)
-+{
-+ int err = 0;
-+ enum xenbus_state prev_state;
-+
-+
-+ prev_state = xenbus_read_driver_state(pdev->xdev->nodename);
-+
-+ if (prev_state >= XenbusStateClosing)
-+ goto out;
-+
-+ if (prev_state == XenbusStateConnected) {
-+ pcifront_free_roots(pdev);
-+ pcifront_disconnect(pdev);
-+ }
-+
-+ err = xenbus_switch_state(pdev->xdev, XenbusStateClosed);
-+
-+out:
-+
-+ return err;
-+}
-+
-+static int __devinit pcifront_attach_devices(struct pcifront_device *pdev)
-+{
-+ int err = -EFAULT;
-+ int i, num_roots, len;
-+ unsigned int domain, bus;
-+ char str[64];
-+
-+ if (xenbus_read_driver_state(pdev->xdev->nodename) !=
-+ XenbusStateReconfiguring)
-+ goto out;
-+
-+ err = xenbus_scanf(XBT_NIL, pdev->xdev->otherend,
-+ "root_num", "%d", &num_roots);
-+ if (err == -ENOENT) {
-+ xenbus_dev_error(pdev->xdev, err,
-+ "No PCI Roots found, trying 0000:00");
-+ err = pcifront_rescan_root(pdev, 0, 0);
-+ num_roots = 0;
-+ } else if (err != 1) {
-+ if (err == 0)
-+ err = -EINVAL;
-+ xenbus_dev_fatal(pdev->xdev, err,
-+ "Error reading number of PCI roots");
-+ goto out;
-+ }
-+
-+ for (i = 0; i < num_roots; i++) {
-+ len = snprintf(str, sizeof(str), "root-%d", i);
-+ if (unlikely(len >= (sizeof(str) - 1))) {
-+ err = -ENOMEM;
-+ goto out;
-+ }
-+
-+ err = xenbus_scanf(XBT_NIL, pdev->xdev->otherend, str,
-+ "%x:%x", &domain, &bus);
-+ if (err != 2) {
-+ if (err >= 0)
-+ err = -EINVAL;
-+ xenbus_dev_fatal(pdev->xdev, err,
-+ "Error reading PCI root %d", i);
-+ goto out;
-+ }
-+
-+ err = pcifront_rescan_root(pdev, domain, bus);
-+ if (err) {
-+ xenbus_dev_fatal(pdev->xdev, err,
-+ "Error scanning PCI root %04x:%02x",
-+ domain, bus);
-+ goto out;
-+ }
-+ }
-+
-+ xenbus_switch_state(pdev->xdev, XenbusStateConnected);
-+
-+out:
-+ return err;
-+}
-+
-+static int pcifront_detach_devices(struct pcifront_device *pdev)
-+{
-+ int err = 0;
-+ int i, num_devs;
-+ unsigned int domain, bus, slot, func;
-+ struct pci_bus *pci_bus;
-+ struct pci_dev *pci_dev;
-+ char str[64];
-+
-+ if (xenbus_read_driver_state(pdev->xdev->nodename) !=
-+ XenbusStateConnected)
-+ goto out;
-+
-+ err = xenbus_scanf(XBT_NIL, pdev->xdev->otherend, "num_devs", "%d",
-+ &num_devs);
-+ if (err != 1) {
-+ if (err >= 0)
-+ err = -EINVAL;
-+ xenbus_dev_fatal(pdev->xdev, err,
-+ "Error reading number of PCI devices");
-+ goto out;
-+ }
-+
-+ /* Find devices being detached and remove them. */
-+ for (i = 0; i < num_devs; i++) {
-+ int l, state;
-+ l = snprintf(str, sizeof(str), "state-%d", i);
-+ if (unlikely(l >= (sizeof(str) - 1))) {
-+ err = -ENOMEM;
-+ goto out;
-+ }
-+ err = xenbus_scanf(XBT_NIL, pdev->xdev->otherend, str, "%d",
-+ &state);
-+ if (err != 1)
-+ state = XenbusStateUnknown;
-+
-+ if (state != XenbusStateClosing)
-+ continue;
-+
-+ /* Remove device. */
-+ l = snprintf(str, sizeof(str), "vdev-%d", i);
-+ if (unlikely(l >= (sizeof(str) - 1))) {
-+ err = -ENOMEM;
-+ goto out;
-+ }
-+ err = xenbus_scanf(XBT_NIL, pdev->xdev->otherend, str,
-+ "%x:%x:%x.%x", &domain, &bus, &slot, &func);
-+ if (err != 4) {
-+ if (err >= 0)
-+ err = -EINVAL;
-+ xenbus_dev_fatal(pdev->xdev, err,
-+ "Error reading PCI device %d", i);
-+ goto out;
-+ }
-+
-+ pci_bus = pci_find_bus(domain, bus);
-+ if (!pci_bus) {
-+ dev_dbg(&pdev->xdev->dev, "Cannot get bus %04x:%02x\n",
-+ domain, bus);
-+ continue;
-+ }
-+ pci_dev = pci_get_slot(pci_bus, PCI_DEVFN(slot, func));
-+ if (!pci_dev) {
-+ dev_dbg(&pdev->xdev->dev,
-+ "Cannot get PCI device %04x:%02x:%02x.%02x\n",
-+ domain, bus, slot, func);
-+ continue;
-+ }
-+ pci_remove_bus_device(pci_dev);
-+ pci_dev_put(pci_dev);
-+
-+ dev_dbg(&pdev->xdev->dev,
-+ "PCI device %04x:%02x:%02x.%02x removed.\n",
-+ domain, bus, slot, func);
-+ }
-+
-+ err = xenbus_switch_state(pdev->xdev, XenbusStateReconfiguring);
-+
-+out:
-+ return err;
-+}
-+
-+static void __init_refok pcifront_backend_changed(struct xenbus_device *xdev,
-+ enum xenbus_state be_state)
-+{
-+ struct pcifront_device *pdev = dev_get_drvdata(&xdev->dev);
-+
-+ switch (be_state) {
-+ case XenbusStateUnknown:
-+ case XenbusStateInitialising:
-+ case XenbusStateInitWait:
-+ case XenbusStateInitialised:
-+ case XenbusStateClosed:
-+ break;
-+
-+ case XenbusStateConnected:
-+ pcifront_try_connect(pdev);
-+ break;
-+
-+ case XenbusStateClosing:
-+ dev_warn(&xdev->dev, "backend going away!\n");
-+ pcifront_try_disconnect(pdev);
-+ break;
-+
-+ case XenbusStateReconfiguring:
-+ pcifront_detach_devices(pdev);
-+ break;
-+
-+ case XenbusStateReconfigured:
-+ pcifront_attach_devices(pdev);
-+ break;
-+ }
-+}
-+
-+static int pcifront_xenbus_probe(struct xenbus_device *xdev,
-+ const struct xenbus_device_id *id)
-+{
-+ int err = 0;
-+ struct pcifront_device *pdev = alloc_pdev(xdev);
-+
-+ if (pdev == NULL) {
-+ err = -ENOMEM;
-+ xenbus_dev_fatal(xdev, err,
-+ "Error allocating pcifront_device struct");
-+ goto out;
-+ }
-+
-+ err = pcifront_publish_info(pdev);
-+
-+out:
-+ return err;
-+}
-+
-+static int pcifront_xenbus_remove(struct xenbus_device *xdev)
-+{
-+ struct pcifront_device *pdev = dev_get_drvdata(&xdev->dev);
-+
-+ if (pdev)
-+ free_pdev(pdev);
-+
-+ return 0;
-+}
-+
-+static const struct xenbus_device_id xenpci_ids[] = {
-+ {"pci"},
-+ {""},
-+};
-+
-+static struct xenbus_driver xenbus_pcifront_driver = {
-+ .name = "pcifront",
-+ .owner = THIS_MODULE,
-+ .ids = xenpci_ids,
-+ .probe = pcifront_xenbus_probe,
-+ .remove = pcifront_xenbus_remove,
-+ .otherend_changed = pcifront_backend_changed,
-+};
-+
-+static int __init pcifront_init(void)
-+{
-+ if (!xen_domain())
-+ return -ENODEV;
-+
-+ pci_frontend_registrar(1 /* enable */);
-+
-+ return xenbus_register_frontend(&xenbus_pcifront_driver);
-+}
-+
-+static void __exit pcifront_cleanup(void)
-+{
-+ xenbus_unregister_driver(&xenbus_pcifront_driver);
-+ pci_frontend_registrar(0 /* disable */);
-+}
-+module_init(pcifront_init);
-+module_exit(pcifront_cleanup);
-+
-+MODULE_DESCRIPTION("Xen PCI passthrough frontend.");
-+MODULE_LICENSE("GPL");
-+MODULE_ALIAS("xen:pci");
-diff --git a/drivers/video/Kconfig b/drivers/video/Kconfig
-index 188e1ba..efac9e3 100644
---- a/drivers/video/Kconfig
-+++ b/drivers/video/Kconfig
-@@ -2063,6 +2063,7 @@ config XEN_FBDEV_FRONTEND
- select FB_SYS_IMAGEBLIT
- select FB_SYS_FOPS
- select FB_DEFERRED_IO
-+ select XEN_XENBUS_FRONTEND
- default y
- help
- This driver implements the front-end of the Xen virtual
-diff --git a/drivers/video/broadsheetfb.c b/drivers/video/broadsheetfb.c
-index 509cb92..df9ccb9 100644
---- a/drivers/video/broadsheetfb.c
-+++ b/drivers/video/broadsheetfb.c
-@@ -470,7 +470,7 @@ static int __devinit broadsheetfb_probe(struct platform_device *dev)
- par->read_reg = broadsheet_read_reg;
- init_waitqueue_head(&par->waitq);
-
-- info->flags = FBINFO_FLAG_DEFAULT;
-+ info->flags = FBINFO_FLAG_DEFAULT | FBINFO_VIRTFB;
-
- info->fbdefio = &broadsheetfb_defio;
- fb_deferred_io_init(info);
-diff --git a/drivers/video/fb_defio.c b/drivers/video/fb_defio.c
-index c27ab1e..94414fc 100644
---- a/drivers/video/fb_defio.c
-+++ b/drivers/video/fb_defio.c
-@@ -144,7 +144,9 @@ static const struct address_space_operations fb_deferred_io_aops = {
- static int fb_deferred_io_mmap(struct fb_info *info, struct vm_area_struct *vma)
- {
- vma->vm_ops = &fb_deferred_io_vm_ops;
-- vma->vm_flags |= ( VM_IO | VM_RESERVED | VM_DONTEXPAND );
-+ vma->vm_flags |= ( VM_RESERVED | VM_DONTEXPAND );
-+ if (!(info->flags & FBINFO_VIRTFB))
-+ vma->vm_flags |= VM_IO;
- vma->vm_private_data = info;
- return 0;
- }
-diff --git a/drivers/video/fbmem.c b/drivers/video/fbmem.c
-index 99bbd28..057433a 100644
---- a/drivers/video/fbmem.c
-+++ b/drivers/video/fbmem.c
-@@ -1362,6 +1362,7 @@ fb_mmap(struct file *file, struct vm_area_struct * vma)
- vma->vm_pgoff = off >> PAGE_SHIFT;
- /* This is an IO map - tell maydump to skip this VMA */
- vma->vm_flags |= VM_IO | VM_RESERVED;
-+ vma->vm_page_prot = vm_get_page_prot(vma->vm_flags);
- fb_pgprotect(file, vma, off);
- if (io_remap_pfn_range(vma, vma->vm_start, off >> PAGE_SHIFT,
- vma->vm_end - vma->vm_start, vma->vm_page_prot))
-diff --git a/drivers/video/hecubafb.c b/drivers/video/hecubafb.c
-index 0b4bffb..f9d77ad 100644
---- a/drivers/video/hecubafb.c
-+++ b/drivers/video/hecubafb.c
-@@ -253,7 +253,7 @@ static int __devinit hecubafb_probe(struct platform_device *dev)
- par->send_command = apollo_send_command;
- par->send_data = apollo_send_data;
-
-- info->flags = FBINFO_FLAG_DEFAULT;
-+ info->flags = FBINFO_FLAG_DEFAULT | FBINFO_VIRTFB;
-
- info->fbdefio = &hecubafb_defio;
- fb_deferred_io_init(info);
-diff --git a/drivers/video/metronomefb.c b/drivers/video/metronomefb.c
-index df1f757..661bfd2 100644
---- a/drivers/video/metronomefb.c
-+++ b/drivers/video/metronomefb.c
-@@ -700,7 +700,7 @@ static int __devinit metronomefb_probe(struct platform_device *dev)
- if (retval < 0)
- goto err_free_irq;
-
-- info->flags = FBINFO_FLAG_DEFAULT;
-+ info->flags = FBINFO_FLAG_DEFAULT | FBINFO_VIRTFB;
-
- info->fbdefio = &metronomefb_defio;
- fb_deferred_io_init(info);
-diff --git a/drivers/video/xen-fbfront.c b/drivers/video/xen-fbfront.c
-index 54cd916..dc72563 100644
---- a/drivers/video/xen-fbfront.c
-+++ b/drivers/video/xen-fbfront.c
-@@ -25,7 +25,10 @@
- #include <linux/module.h>
- #include <linux/vmalloc.h>
- #include <linux/mm.h>
-+
- #include <asm/xen/hypervisor.h>
-+
-+#include <xen/xen.h>
- #include <xen/events.h>
- #include <xen/page.h>
- #include <xen/interface/io/fbif.h>
-@@ -440,7 +443,7 @@ static int __devinit xenfb_probe(struct xenbus_device *dev,
- fb_info->fix.type = FB_TYPE_PACKED_PIXELS;
- fb_info->fix.accel = FB_ACCEL_NONE;
-
-- fb_info->flags = FBINFO_FLAG_DEFAULT;
-+ fb_info->flags = FBINFO_FLAG_DEFAULT | FBINFO_VIRTFB;
-
- ret = fb_alloc_cmap(&fb_info->cmap, 256, 0);
- if (ret < 0) {
-@@ -627,6 +630,8 @@ static void xenfb_backend_changed(struct xenbus_device *dev,
- switch (backend_state) {
- case XenbusStateInitialising:
- case XenbusStateInitialised:
-+ case XenbusStateReconfiguring:
-+ case XenbusStateReconfigured:
- case XenbusStateUnknown:
- case XenbusStateClosed:
- break;
-@@ -680,7 +685,7 @@ static struct xenbus_driver xenfb_driver = {
-
- static int __init xenfb_init(void)
- {
-- if (!xen_domain())
-+ if (!xen_domain() || xen_hvm_domain())
- return -ENODEV;
-
- /* Nothing to do if running in dom0. */
-diff --git a/drivers/watchdog/Kconfig b/drivers/watchdog/Kconfig
-index 3711b88..4fcb4c5 100644
---- a/drivers/watchdog/Kconfig
-+++ b/drivers/watchdog/Kconfig
-@@ -975,6 +975,16 @@ config WATCHDOG_RIO
-
- # XTENSA Architecture
-
-+# Xen Architecture
-+
-+config XEN_WDT
-+ tristate "Xen Watchdog support"
-+ depends on XEN
-+ help
-+ Say Y here to support the hypervisor watchdog capability provided
-+ by Xen 4.0 and newer. The watchdog timeout period is normally one
-+ minute but can be changed with a boot-time parameter.
-+
- #
- # ISA-based Watchdog Cards
- #
-diff --git a/drivers/watchdog/Makefile b/drivers/watchdog/Makefile
-index 699199b..2f6739a 100644
---- a/drivers/watchdog/Makefile
-+++ b/drivers/watchdog/Makefile
-@@ -141,6 +141,9 @@ obj-$(CONFIG_WATCHDOG_CP1XXX) += cpwd.o
-
- # XTENSA Architecture
-
-+# Xen
-+obj-$(CONFIG_XEN_WDT) += xen_wdt.o
-+
- # Architecture Independant
- obj-$(CONFIG_WM831X_WATCHDOG) += wm831x_wdt.o
- obj-$(CONFIG_WM8350_WATCHDOG) += wm8350_wdt.o
-diff --git a/drivers/watchdog/xen_wdt.c b/drivers/watchdog/xen_wdt.c
-new file mode 100644
-index 0000000..bcfaafb
---- /dev/null
-+++ b/drivers/watchdog/xen_wdt.c
-@@ -0,0 +1,359 @@
-+/*
-+ * Xen Watchdog Driver
-+ *
-+ * (c) Copyright 2010 Novell, Inc.
-+ *
-+ * This program is free software; you can redistribute it and/or
-+ * modify it under the terms of the GNU General Public License
-+ * as published by the Free Software Foundation; either version
-+ * 2 of the License, or (at your option) any later version.
-+ */
-+
-+#define DRV_NAME "wdt"
-+#define DRV_VERSION "0.01"
-+#define PFX DRV_NAME ": "
-+
-+#include <linux/bug.h>
-+#include <linux/errno.h>
-+#include <linux/fs.h>
-+#include <linux/hrtimer.h>
-+#include <linux/kernel.h>
-+#include <linux/ktime.h>
-+#include <linux/init.h>
-+#include <linux/miscdevice.h>
-+#include <linux/module.h>
-+#include <linux/moduleparam.h>
-+#include <linux/platform_device.h>
-+#include <linux/spinlock.h>
-+#include <linux/uaccess.h>
-+#include <linux/watchdog.h>
-+#include <xen/xen.h>
-+#include <asm/xen/hypercall.h>
-+#include <xen/interface/sched.h>
-+
-+static struct platform_device *platform_device;
-+static DEFINE_SPINLOCK(wdt_lock);
-+static struct sched_watchdog wdt;
-+static __kernel_time_t wdt_expires;
-+static bool is_active, expect_release;
-+
-+#define WATCHDOG_TIMEOUT 60 /* in seconds */
-+static unsigned int timeout = WATCHDOG_TIMEOUT;
-+module_param(timeout, uint, S_IRUGO);
-+MODULE_PARM_DESC(timeout, "Watchdog timeout in seconds "
-+ "(default=" __MODULE_STRING(WATCHDOG_TIMEOUT) ")");
-+
-+static bool nowayout = WATCHDOG_NOWAYOUT;
-+module_param(nowayout, bool, S_IRUGO);
-+MODULE_PARM_DESC(nowayout, "Watchdog cannot be stopped once started "
-+ "(default=" __MODULE_STRING(WATCHDOG_NOWAYOUT) ")");
-+
-+static inline __kernel_time_t set_timeout(void)
-+{
-+ wdt.timeout = timeout;
-+ return ktime_to_timespec(ktime_get()).tv_sec + timeout;
-+}
-+
-+static int xen_wdt_start(void)
-+{
-+ __kernel_time_t expires;
-+ int err;
-+
-+ spin_lock(&wdt_lock);
-+
-+ expires = set_timeout();
-+ if (!wdt.id)
-+ err = HYPERVISOR_sched_op(SCHEDOP_watchdog, &wdt);
-+ else
-+ err = -EBUSY;
-+ if (err > 0) {
-+ wdt.id = err;
-+ wdt_expires = expires;
-+ err = 0;
-+ } else
-+ BUG_ON(!err);
-+
-+ spin_unlock(&wdt_lock);
-+
-+ return err;
-+}
-+
-+static int xen_wdt_stop(void)
-+{
-+ int err = 0;
-+
-+ spin_lock(&wdt_lock);
-+
-+ wdt.timeout = 0;
-+ if (wdt.id)
-+ err = HYPERVISOR_sched_op(SCHEDOP_watchdog, &wdt);
-+ if (!err)
-+ wdt.id = 0;
-+
-+ spin_unlock(&wdt_lock);
-+
-+ return err;
-+}
-+
-+static int xen_wdt_kick(void)
-+{
-+ __kernel_time_t expires;
-+ int err;
-+
-+ spin_lock(&wdt_lock);
-+
-+ expires = set_timeout();
-+ if (wdt.id)
-+ err = HYPERVISOR_sched_op(SCHEDOP_watchdog, &wdt);
-+ else
-+ err = -ENXIO;
-+ if (!err)
-+ wdt_expires = expires;
-+
-+ spin_unlock(&wdt_lock);
-+
-+ return err;
-+}
-+
-+static int xen_wdt_open(struct inode *inode, struct file *file)
-+{
-+ int err;
-+
-+ /* /dev/watchdog can only be opened once */
-+ if (xchg(&is_active, true))
-+ return -EBUSY;
-+
-+ err = xen_wdt_start();
-+ if (err == -EBUSY)
-+ err = xen_wdt_kick();
-+ return err ?: nonseekable_open(inode, file);
-+}
-+
-+static int xen_wdt_release(struct inode *inode, struct file *file)
-+{
-+ if (expect_release)
-+ xen_wdt_stop();
-+ else {
-+ printk(KERN_CRIT PFX
-+ "unexpected close, not stopping watchdog!\n");
-+ xen_wdt_kick();
-+ }
-+ is_active = false;
-+ expect_release = false;
-+ return 0;
-+}
-+
-+static ssize_t xen_wdt_write(struct file *file, const char __user *data,
-+ size_t len, loff_t *ppos)
-+{
-+ /* See if we got the magic character 'V' and reload the timer */
-+ if (len) {
-+ if (!nowayout) {
-+ size_t i;
-+
-+ /* in case it was set long ago */
-+ expect_release = false;
-+
-+ /* scan to see whether or not we got the magic
-+ character */
-+ for (i = 0; i != len; i++) {
-+ char c;
-+ if (get_user(c, data + i))
-+ return -EFAULT;
-+ if (c == 'V')
-+ expect_release = true;
-+ }
-+ }
-+
-+ /* someone wrote to us, we should reload the timer */
-+ xen_wdt_kick();
-+ }
-+ return len;
-+}
-+
-+static long xen_wdt_ioctl(struct file *file, unsigned int cmd,
-+ unsigned long arg)
-+{
-+ int new_options, retval = -EINVAL;
-+ int new_timeout;
-+ int __user *argp = (void __user *)arg;
-+ static const struct watchdog_info ident = {
-+ .options = WDIOF_SETTIMEOUT | WDIOF_MAGICCLOSE,
-+ .firmware_version = 0,
-+ .identity = DRV_NAME,
-+ };
-+
-+ switch (cmd) {
-+ case WDIOC_GETSUPPORT:
-+ return copy_to_user(argp, &ident, sizeof(ident)) ? -EFAULT : 0;
-+
-+ case WDIOC_GETSTATUS:
-+ case WDIOC_GETBOOTSTATUS:
-+ return put_user(0, argp);
-+
-+ case WDIOC_SETOPTIONS:
-+ if (get_user(new_options, argp))
-+ return -EFAULT;
-+
-+ if (new_options & WDIOS_DISABLECARD)
-+ retval = xen_wdt_stop();
-+ if (new_options & WDIOS_ENABLECARD) {
-+ retval = xen_wdt_start();
-+ if (retval == -EBUSY)
-+ retval = xen_wdt_kick();
-+ }
-+ return retval;
-+
-+ case WDIOC_KEEPALIVE:
-+ xen_wdt_kick();
-+ return 0;
-+
-+ case WDIOC_SETTIMEOUT:
-+ if (get_user(new_timeout, argp))
-+ return -EFAULT;
-+ if (!new_timeout)
-+ return -EINVAL;
-+ timeout = new_timeout;
-+ xen_wdt_kick();
-+ /* fall through */
-+ case WDIOC_GETTIMEOUT:
-+ return put_user(timeout, argp);
-+
-+ case WDIOC_GETTIMELEFT:
-+ retval = wdt_expires - ktime_to_timespec(ktime_get()).tv_sec;
-+ return put_user(retval, argp);
-+ }
-+
-+ return -ENOTTY;
-+}
-+
-+static const struct file_operations xen_wdt_fops = {
-+ .owner = THIS_MODULE,
-+ .llseek = no_llseek,
-+ .write = xen_wdt_write,
-+ .unlocked_ioctl = xen_wdt_ioctl,
-+ .open = xen_wdt_open,
-+ .release = xen_wdt_release,
-+};
-+
-+static struct miscdevice xen_wdt_miscdev = {
-+ .minor = WATCHDOG_MINOR,
-+ .name = "watchdog",
-+ .fops = &xen_wdt_fops,
-+};
-+
-+static int __devinit xen_wdt_probe(struct platform_device *dev)
-+{
-+ struct sched_watchdog wd = { .id = ~0 };
-+ int ret = HYPERVISOR_sched_op(SCHEDOP_watchdog, &wd);
-+
-+ switch (ret) {
-+ case -EINVAL:
-+ if (!timeout) {
-+ timeout = WATCHDOG_TIMEOUT;
-+ printk(KERN_INFO PFX
-+ "timeout value invalid, using %d\n", timeout);
-+ }
-+
-+ ret = misc_register(&xen_wdt_miscdev);
-+ if (ret) {
-+ printk(KERN_ERR PFX
-+ "cannot register miscdev on minor=%d (%d)\n",
-+ WATCHDOG_MINOR, ret);
-+ break;
-+ }
-+
-+ printk(KERN_INFO PFX
-+ "initialized (timeout=%ds, nowayout=%d)\n",
-+ timeout, nowayout);
-+ break;
-+
-+ case -ENOSYS:
-+ printk(KERN_INFO PFX "not supported\n");
-+ ret = -ENODEV;
-+ break;
-+
-+ default:
-+ printk(KERN_INFO PFX "bogus return value %d\n", ret);
-+ break;
-+ }
-+
-+ return ret;
-+}
-+
-+static int __devexit xen_wdt_remove(struct platform_device *dev)
-+{
-+ /* Stop the timer before we leave */
-+ if (!nowayout)
-+ xen_wdt_stop();
-+
-+ misc_deregister(&xen_wdt_miscdev);
-+
-+ return 0;
-+}
-+
-+static void xen_wdt_shutdown(struct platform_device *dev)
-+{
-+ xen_wdt_stop();
-+}
-+
-+static int xen_wdt_suspend(struct platform_device *dev, pm_message_t state)
-+{
-+ return xen_wdt_stop();
-+}
-+
-+static int xen_wdt_resume(struct platform_device *dev)
-+{
-+ return xen_wdt_start();
-+}
-+
-+static struct platform_driver xen_wdt_driver = {
-+ .probe = xen_wdt_probe,
-+ .remove = __devexit_p(xen_wdt_remove),
-+ .shutdown = xen_wdt_shutdown,
-+ .suspend = xen_wdt_suspend,
-+ .resume = xen_wdt_resume,
-+ .driver = {
-+ .owner = THIS_MODULE,
-+ .name = DRV_NAME,
-+ },
-+};
-+
-+static int __init xen_wdt_init_module(void)
-+{
-+ int err;
-+
-+ if (!xen_domain())
-+ return -ENODEV;
-+
-+ printk(KERN_INFO PFX "Xen WatchDog Timer Driver v%s\n", DRV_VERSION);
-+
-+ err = platform_driver_register(&xen_wdt_driver);
-+ if (err)
-+ return err;
-+
-+ platform_device = platform_device_register_simple(DRV_NAME,
-+ -1, NULL, 0);
-+ if (IS_ERR(platform_device)) {
-+ err = PTR_ERR(platform_device);
-+ platform_driver_unregister(&xen_wdt_driver);
-+ }
-+
-+ return err;
-+}
-+
-+static void __exit xen_wdt_cleanup_module(void)
-+{
-+ platform_device_unregister(platform_device);
-+ platform_driver_unregister(&xen_wdt_driver);
-+ printk(KERN_INFO PFX "module unloaded\n");
-+}
-+
-+module_init(xen_wdt_init_module);
-+module_exit(xen_wdt_cleanup_module);
-+
-+MODULE_AUTHOR("Jen Beulich <jbeulich@novell.com>");
-+MODULE_DESCRIPTION("Xen WatchDog Timer Driver");
-+MODULE_VERSION(DRV_VERSION);
-+MODULE_LICENSE("GPL");
-+MODULE_ALIAS_MISCDEV(WATCHDOG_MINOR);
-diff --git a/drivers/xen/Kconfig b/drivers/xen/Kconfig
-index cab100a..fa9982e 100644
---- a/drivers/xen/Kconfig
-+++ b/drivers/xen/Kconfig
-@@ -28,6 +28,110 @@ config XEN_DEV_EVTCHN
- firing.
- If in doubt, say yes.
-
-+config XEN_BACKEND
-+ bool "Backend driver support"
-+ depends on XEN_DOM0
-+ default y
-+ help
-+ Support for backend device drivers that provide I/O services
-+ to other virtual machines.
-+
-+config XEN_NETDEV_BACKEND
-+ tristate "Xen backend network device"
-+ depends on XEN_BACKEND && NET
-+ help
-+ Implement the network backend driver, which passes packets
-+ from the guest domain's frontend drivers to the network.
-+
-+config XEN_BLKDEV_BACKEND
-+ tristate "Block-device backend driver"
-+ depends on XEN_BACKEND && BLOCK
-+ help
-+ The block-device backend driver allows the kernel to export its
-+ block devices to other guests via a high-performance shared-memory
-+ interface.
-+
-+
-+config XEN_BLKDEV_TAP
-+ tristate "Block-device tap backend driver"
-+ depends on XEN_BACKEND && BLOCK
-+ help
-+ The block tap driver is an alternative to the block back driver
-+ and allows VM block requests to be redirected to userspace through
-+ a device interface. The tap allows user-space development of
-+ high-performance block backends, where disk images may be implemented
-+ as files, in memory, or on other hosts across the network. This
-+ driver can safely coexist with the existing blockback driver.
-+
-+config XEN_BLKBACK_PAGEMAP
-+ tristate
-+ depends on XEN_BLKDEV_BACKEND != n && XEN_BLKDEV_TAP != n
-+ default XEN_BLKDEV_BACKEND || XEN_BLKDEV_TAP
-+
-+config XEN_PCIDEV_BACKEND
-+ tristate "PCI-device backend driver"
-+ depends on PCI && XEN_BACKEND
-+ default XEN_BACKEND
-+ help
-+ The PCI device backend driver allows the kernel to export arbitrary
-+ PCI devices to other guests. If you select this to be a module, you
-+ will need to make sure no other driver has bound to the device(s)
-+ you want to make visible to other guests.
-+
-+choice
-+ prompt "PCI Backend Mode"
-+ depends on XEN_PCIDEV_BACKEND
-+ default XEN_PCIDEV_BACKEND_VPCI if !IA64
-+ default XEN_PCIDEV_BACKEND_CONTROLLER if IA64
-+
-+config XEN_PCIDEV_BACKEND_VPCI
-+ bool "Virtual PCI"
-+ ---help---
-+ This PCI Backend hides the true PCI topology and makes the frontend
-+ think there is a single PCI bus with only the exported devices on it.
-+ For example, a device at 03:05.0 will be re-assigned to 00:00.0. A
-+ second device at 02:1a.1 will be re-assigned to 00:01.1.
-+
-+config XEN_PCIDEV_BACKEND_PASS
-+ bool "Passthrough"
-+ ---help---
-+ This PCI Backend provides a real view of the PCI topology to the
-+ frontend (for example, a device at 06:01.b will still appear at
-+ 06:01.b to the frontend). This is similar to how Xen 2.0.x exposed
-+ PCI devices to its driver domains. This may be required for drivers
-+ which depend on finding their hardward in certain bus/slot
-+ locations.
-+
-+config XEN_PCIDEV_BACKEND_SLOT
-+ bool "Slot"
-+ ---help---
-+ This PCI Backend hides the true PCI topology and makes the frontend
-+ think there is a single PCI bus with only the exported devices on it.
-+ Contrary to the virtual PCI backend, a function becomes a new slot.
-+ For example, a device at 03:05.2 will be re-assigned to 00:00.0. A
-+ second device at 02:1a.1 will be re-assigned to 00:01.0.
-+
-+config XEN_PCIDEV_BACKEND_CONTROLLER
-+ bool "Controller"
-+ depends on IA64
-+ ---help---
-+ This PCI backend virtualizes the PCI bus topology by providing a
-+ virtual bus per PCI root device. Devices which are physically under
-+ the same root bus will appear on the same virtual bus. For systems
-+ with complex I/O addressing, this is the only backend which supports
-+ extended I/O port spaces and MMIO translation offsets. This backend
-+ also supports slot virtualization. For example, a device at
-+ 0000:01:02.1 will be re-assigned to 0000:00:00.0. A second device
-+ at 0000:02:05.0 (behind a P2P bridge on bus 0000:01) will be
-+ re-assigned to 0000:00:01.0. A third device at 0000:16:05.0 (under
-+ a different PCI root bus) will be re-assigned to 0000:01:00.0.
-+
-+endchoice
-+
-+config XEN_PCIDEV_BE_DEBUG
-+ bool "PCI Backend Debugging"
-+ depends on XEN_PCIDEV_BACKEND
-+
- config XENFS
- tristate "Xen filesystem"
- depends on XEN
-@@ -60,4 +164,37 @@ config XEN_SYS_HYPERVISOR
- Create entries under /sys/hypervisor describing the Xen
- hypervisor environment. When running native or in another
- virtual environment, /sys/hypervisor will still be present,
-- but will have no xen contents.
-\ No newline at end of file
-+ but will have no xen contents.
-+
-+config XEN_MCE
-+ def_bool y
-+ depends on XEN_DOM0 && X86_64 && X86_MCE_INTEL
-+
-+config XEN_XENBUS_FRONTEND
-+ tristate
-+
-+config XEN_GNTDEV
-+ tristate "userspace grant access device driver"
-+ depends on XEN
-+ select MMU_NOTIFIER
-+ help
-+ Allows userspace processes use grants.
-+
-+config XEN_S3
-+ def_bool y
-+ depends on XEN_DOM0 && ACPI
-+
-+config ACPI_PROCESSOR_XEN
-+ tristate
-+ depends on XEN_DOM0 && ACPI_PROCESSOR && CPU_FREQ
-+ default y
-+
-+config XEN_PLATFORM_PCI
-+ tristate "xen platform pci device driver"
-+ depends on XEN_PVHVM
-+ default m
-+ help
-+ Driver for the Xen PCI Platform device: it is responsible for
-+ initializing xenbus and grant_table when running in a Xen HVM
-+ domain. As a consequence this driver is required to run any Xen PV
-+ frontend on Xen HVM.
-diff --git a/drivers/xen/Makefile b/drivers/xen/Makefile
-index 7c28434..ef1ea63 100644
---- a/drivers/xen/Makefile
-+++ b/drivers/xen/Makefile
-@@ -1,12 +1,27 @@
--obj-y += grant-table.o features.o events.o manage.o
-+obj-y += grant-table.o features.o events.o manage.o biomerge.o pcpu.o
- obj-y += xenbus/
-
- nostackp := $(call cc-option, -fno-stack-protector)
- CFLAGS_features.o := $(nostackp)
-
--obj-$(CONFIG_HOTPLUG_CPU) += cpu_hotplug.o
--obj-$(CONFIG_XEN_XENCOMM) += xencomm.o
--obj-$(CONFIG_XEN_BALLOON) += balloon.o
--obj-$(CONFIG_XEN_DEV_EVTCHN) += evtchn.o
--obj-$(CONFIG_XENFS) += xenfs/
--obj-$(CONFIG_XEN_SYS_HYPERVISOR) += sys-hypervisor.o
-\ No newline at end of file
-+obj-$(CONFIG_PCI) += pci.o
-+obj-$(CONFIG_HOTPLUG_CPU) += cpu_hotplug.o
-+obj-$(CONFIG_XEN_XENCOMM) += xencomm.o
-+obj-$(CONFIG_XEN_BALLOON) += balloon.o
-+obj-$(CONFIG_XEN_DEV_EVTCHN) += xen-evtchn.o
-+obj-$(CONFIG_XEN_GNTDEV) += xen-gntdev.o
-+obj-$(CONFIG_XEN_PCIDEV_BACKEND) += pciback/
-+obj-$(CONFIG_XEN_BLKDEV_BACKEND) += blkback/
-+obj-$(CONFIG_XEN_BLKDEV_TAP) += blktap/
-+obj-$(CONFIG_XEN_NETDEV_BACKEND) += netback/
-+obj-$(CONFIG_XENFS) += xenfs/
-+obj-$(CONFIG_XEN_SYS_HYPERVISOR) += sys-hypervisor.o
-+obj-$(CONFIG_XEN_MCE) += mce.o
-+
-+obj-$(CONFIG_XEN_S3) += acpi.o
-+obj-$(CONFIG_ACPI_PROCESSOR_XEN) += acpi_processor.o
-+obj-$(CONFIG_ACPI_HOTPLUG_MEMORY) += xen_acpi_memhotplug.o
-+obj-$(CONFIG_XEN_PLATFORM_PCI) += platform-pci.o
-+
-+xen-evtchn-y := evtchn.o
-+xen-gntdev-y := gntdev.o
-diff --git a/drivers/xen/acpi.c b/drivers/xen/acpi.c
-new file mode 100644
-index 0000000..e6d3d0e
---- /dev/null
-+++ b/drivers/xen/acpi.c
-@@ -0,0 +1,23 @@
-+#include <xen/acpi.h>
-+
-+#include <xen/interface/platform.h>
-+#include <asm/xen/hypercall.h>
-+#include <asm/xen/hypervisor.h>
-+
-+int acpi_notify_hypervisor_state(u8 sleep_state,
-+ u32 pm1a_cnt, u32 pm1b_cnt)
-+{
-+ struct xen_platform_op op = {
-+ .cmd = XENPF_enter_acpi_sleep,
-+ .interface_version = XENPF_INTERFACE_VERSION,
-+ .u = {
-+ .enter_acpi_sleep = {
-+ .pm1a_cnt_val = (u16)pm1a_cnt,
-+ .pm1b_cnt_val = (u16)pm1b_cnt,
-+ .sleep_state = sleep_state,
-+ },
-+ },
-+ };
-+
-+ return HYPERVISOR_dom0_op(&op);
-+}
-diff --git a/drivers/xen/acpi_processor.c b/drivers/xen/acpi_processor.c
-new file mode 100644
-index 0000000..e83b615
---- /dev/null
-+++ b/drivers/xen/acpi_processor.c
-@@ -0,0 +1,417 @@
-+/*
-+ * acpi_processor.c - interface to notify Xen on acpi processor object
-+ * info parsing
-+ *
-+ * Copyright (C) 2008, Intel corporation
-+ *
-+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-+ *
-+ * This program is free software; you can redistribute it and/or modify
-+ * it under the terms of the GNU General Public License as published by
-+ * the Free Software Foundation; either version 2 of the License, or (at
-+ * your option) any later version.
-+ *
-+ * This program is distributed in the hope that it will be useful, but
-+ * WITHOUT ANY WARRANTY; without even the implied warranty of
-+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-+ * General Public License for more details.
-+ *
-+ * You should have received a copy of the GNU General Public License along
-+ * with this program; if not, write to the Free Software Foundation, Inc.,
-+ * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
-+ *
-+ */
-+
-+#include <linux/kernel.h>
-+#include <linux/init.h>
-+#include <linux/types.h>
-+#include <linux/acpi.h>
-+#include <linux/pm.h>
-+#include <linux/cpu.h>
-+
-+#include <linux/cpufreq.h>
-+#include <acpi/processor.h>
-+#include <xen/acpi.h>
-+#include <xen/pcpu.h>
-+
-+#include <asm/xen/hypercall.h>
-+#include <asm/xen/hypervisor.h>
-+
-+static int xen_hotplug_notifier(struct acpi_processor *pr, int event);
-+
-+static struct processor_cntl_xen_ops xen_ops = {
-+ .hotplug = xen_hotplug_notifier,
-+};
-+
-+static struct acpi_power_register *power_registers[XEN_MAX_ACPI_ID + 1];
-+
-+int processor_cntl_xen_power_cache(int cpu, int cx,
-+ struct acpi_power_register *reg)
-+{
-+ struct acpi_power_register *buf;
-+
-+ if (cpu < 0 || cpu > XEN_MAX_ACPI_ID ||
-+ cx < 1 || cx > ACPI_PROCESSOR_MAX_POWER) {
-+ return -EINVAL;
-+ }
-+
-+ if (power_registers[cpu] == NULL) {
-+ buf = kzalloc(ACPI_PROCESSOR_MAX_POWER *
-+ sizeof(struct xen_processor_cx), GFP_KERNEL);
-+ if (buf == NULL)
-+ return -ENOMEM;
-+
-+ power_registers[cpu] = buf;
-+ }
-+
-+ memcpy(power_registers[cpu]+cx-1, reg, sizeof(*reg));
-+
-+ return 0;
-+}
-+EXPORT_SYMBOL(processor_cntl_xen_power_cache);
-+
-+#ifdef CONFIG_ACPI_HOTPLUG_CPU
-+static int xen_get_apic_id(acpi_handle handle)
-+{
-+ struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL };
-+ union acpi_object *obj;
-+ struct acpi_madt_local_apic *lapic;
-+ u8 physid;
-+
-+ if (ACPI_FAILURE(acpi_evaluate_object(handle, "_MAT", NULL, &buffer)))
-+ return -EINVAL;
-+
-+ if (!buffer.length || !buffer.pointer)
-+ return -EINVAL;
-+
-+ obj = buffer.pointer;
-+ if (obj->type != ACPI_TYPE_BUFFER ||
-+ obj->buffer.length < sizeof(*lapic)) {
-+ kfree(buffer.pointer);
-+ return -EINVAL;
-+ }
-+
-+ lapic = (struct acpi_madt_local_apic *)obj->buffer.pointer;
-+
-+ if (lapic->header.type != ACPI_MADT_TYPE_LOCAL_APIC ||
-+ !(lapic->lapic_flags & ACPI_MADT_ENABLED)) {
-+ kfree(buffer.pointer);
-+ return -EINVAL;
-+ }
-+
-+ physid = lapic->id;
-+ kfree(buffer.pointer);
-+ buffer.length = ACPI_ALLOCATE_BUFFER;
-+ buffer.pointer = NULL;
-+
-+ return physid;
-+}
-+#else
-+static int xen_get_apic_id(acpi_handle handle)
-+{
-+ return -1;
-+}
-+#endif
-+
-+int processor_cntl_xen_notify(struct acpi_processor *pr, int event, int type)
-+{
-+ int ret = -EINVAL;
-+
-+ switch (event) {
-+ case PROCESSOR_PM_INIT:
-+ case PROCESSOR_PM_CHANGE:
-+ if ((type >= PM_TYPE_MAX) ||
-+ !xen_ops.pm_ops[type])
-+ break;
-+
-+ ret = xen_ops.pm_ops[type](pr, event);
-+ break;
-+ case PROCESSOR_HOTPLUG:
-+ {
-+ int apic_id;
-+
-+ apic_id = xen_get_apic_id(pr->handle);
-+ if (apic_id < 0)
-+ break;
-+ if (xen_ops.hotplug)
-+ ret = xen_ops.hotplug(pr, type);
-+ xen_pcpu_hotplug(type, apic_id);
-+ break;
-+ }
-+ default:
-+ printk(KERN_ERR "Unsupport processor events %d.\n", event);
-+ break;
-+ }
-+
-+ return ret;
-+}
-+EXPORT_SYMBOL(processor_cntl_xen_notify);
-+
-+static inline void xen_convert_pct_reg(struct xen_pct_register *xpct,
-+ struct acpi_pct_register *apct)
-+{
-+ xpct->descriptor = apct->descriptor;
-+ xpct->length = apct->length;
-+ xpct->space_id = apct->space_id;
-+ xpct->bit_width = apct->bit_width;
-+ xpct->bit_offset = apct->bit_offset;
-+ xpct->reserved = apct->reserved;
-+ xpct->address = apct->address;
-+}
-+
-+static inline void xen_convert_pss_states(struct xen_processor_px *xpss,
-+ struct acpi_processor_px *apss, int state_count)
-+{
-+ int i;
-+ for (i = 0; i < state_count; i++) {
-+ xpss->core_frequency = apss->core_frequency;
-+ xpss->power = apss->power;
-+ xpss->transition_latency = apss->transition_latency;
-+ xpss->bus_master_latency = apss->bus_master_latency;
-+ xpss->control = apss->control;
-+ xpss->status = apss->status;
-+ xpss++;
-+ apss++;
-+ }
-+}
-+
-+static inline void xen_convert_psd_pack(struct xen_psd_package *xpsd,
-+ struct acpi_psd_package *apsd)
-+{
-+ xpsd->num_entries = apsd->num_entries;
-+ xpsd->revision = apsd->revision;
-+ xpsd->domain = apsd->domain;
-+ xpsd->coord_type = apsd->coord_type;
-+ xpsd->num_processors = apsd->num_processors;
-+}
-+
-+static int xen_cx_notifier(struct acpi_processor *pr, int action)
-+{
-+ int ret, count = 0, i;
-+ xen_platform_op_t op = {
-+ .cmd = XENPF_set_processor_pminfo,
-+ .interface_version = XENPF_INTERFACE_VERSION,
-+ .u.set_pminfo.id = pr->acpi_id,
-+ .u.set_pminfo.type = XEN_PM_CX,
-+ };
-+ struct xen_processor_cx *data, *buf;
-+ struct acpi_processor_cx *cx;
-+ struct acpi_power_register *reg;
-+
-+ if (action == PROCESSOR_PM_CHANGE)
-+ return -EINVAL;
-+
-+ if (power_registers[pr->acpi_id] == NULL) {
-+ printk(KERN_WARNING "No C state info for acpi processor %d\n",
-+ pr->acpi_id);
-+ return -EINVAL;
-+ }
-+
-+ /* Convert to Xen defined structure and hypercall */
-+ buf = kzalloc(pr->power.count * sizeof(struct xen_processor_cx),
-+ GFP_KERNEL);
-+ if (!buf)
-+ return -ENOMEM;
-+
-+ data = buf;
-+ for (i = 1; i <= pr->power.count; i++) {
-+ cx = &pr->power.states[i];
-+ reg = power_registers[pr->acpi_id]+i-1;
-+ /* Skip invalid cstate entry */
-+ if (!cx->valid)
-+ continue;
-+
-+ data->type = cx->type;
-+ data->latency = cx->latency;
-+ data->power = cx->power;
-+ data->reg.space_id = reg->space_id;
-+ data->reg.bit_width = reg->bit_width;
-+ data->reg.bit_offset = reg->bit_offset;
-+ data->reg.access_size = reg->access_size;
-+ data->reg.address = reg->address;
-+
-+ /* Get dependency relationships, _CSD is not supported yet */
-+ data->dpcnt = 0;
-+ set_xen_guest_handle(data->dp, NULL);
-+
-+ data++;
-+ count++;
-+ }
-+
-+ if (!count) {
-+ printk(KERN_ERR "No available Cx info for cpu %d\n",
-+ pr->acpi_id);
-+ kfree(buf);
-+ return -EINVAL;
-+ }
-+
-+ op.u.set_pminfo.power.count = count;
-+ op.u.set_pminfo.power.flags.bm_control = pr->flags.bm_control;
-+ op.u.set_pminfo.power.flags.bm_check = pr->flags.bm_check;
-+ op.u.set_pminfo.power.flags.has_cst = pr->flags.has_cst;
-+ op.u.set_pminfo.power.flags.power_setup_done =
-+ pr->flags.power_setup_done;
-+
-+ set_xen_guest_handle(op.u.set_pminfo.power.states, buf);
-+ ret = HYPERVISOR_dom0_op(&op);
-+ kfree(buf);
-+ return ret;
-+}
-+
-+static int xen_px_notifier(struct acpi_processor *pr, int action)
-+{
-+ int ret = -EINVAL;
-+ xen_platform_op_t op = {
-+ .cmd = XENPF_set_processor_pminfo,
-+ .interface_version = XENPF_INTERFACE_VERSION,
-+ .u.set_pminfo.id = pr->acpi_id,
-+ .u.set_pminfo.type = XEN_PM_PX,
-+ };
-+ struct xen_processor_performance *perf;
-+ struct xen_processor_px *states = NULL;
-+ struct acpi_processor_performance *px;
-+ struct acpi_psd_package *pdomain;
-+
-+ if (!pr)
-+ return -EINVAL;
-+
-+ perf = &op.u.set_pminfo.perf;
-+ px = pr->performance;
-+
-+ switch (action) {
-+ case PROCESSOR_PM_CHANGE:
-+ /* ppc dynamic handle */
-+ perf->flags = XEN_PX_PPC;
-+ perf->platform_limit = pr->performance_platform_limit;
-+
-+ ret = HYPERVISOR_dom0_op(&op);
-+ break;
-+
-+ case PROCESSOR_PM_INIT:
-+ /* px normal init */
-+ perf->flags = XEN_PX_PPC |
-+ XEN_PX_PCT |
-+ XEN_PX_PSS |
-+ XEN_PX_PSD;
-+
-+ /* ppc */
-+ perf->platform_limit = pr->performance_platform_limit;
-+
-+ /* pct */
-+ xen_convert_pct_reg(&perf->control_register,
-+ &px->control_register);
-+ xen_convert_pct_reg(&perf->status_register,
-+ &px->status_register);
-+
-+ /* pss */
-+ perf->state_count = px->state_count;
-+ states = kzalloc(px->state_count*sizeof(xen_processor_px_t),
-+ GFP_KERNEL);
-+ if (!states)
-+ return -ENOMEM;
-+ xen_convert_pss_states(states, px->states, px->state_count);
-+ set_xen_guest_handle(perf->states, states);
-+
-+ /* psd */
-+ pdomain = &px->domain_info;
-+ xen_convert_psd_pack(&perf->domain_info, pdomain);
-+ if (pdomain->coord_type == DOMAIN_COORD_TYPE_SW_ALL)
-+ perf->shared_type = CPUFREQ_SHARED_TYPE_ALL;
-+ else if (pdomain->coord_type == DOMAIN_COORD_TYPE_SW_ANY)
-+ perf->shared_type = CPUFREQ_SHARED_TYPE_ANY;
-+ else if (pdomain->coord_type == DOMAIN_COORD_TYPE_HW_ALL)
-+ perf->shared_type = CPUFREQ_SHARED_TYPE_HW;
-+ else {
-+ ret = -ENODEV;
-+ kfree(states);
-+ break;
-+ }
-+
-+ ret = HYPERVISOR_dom0_op(&op);
-+ kfree(states);
-+ break;
-+
-+ default:
-+ break;
-+ }
-+
-+ return ret;
-+}
-+
-+static int xen_tx_notifier(struct acpi_processor *pr, int action)
-+{
-+ return -EINVAL;
-+}
-+
-+#ifdef CONFIG_ACPI_HOTPLUG_CPU
-+static int xen_hotplug_notifier(struct acpi_processor *pr, int event)
-+{
-+ int ret = -EINVAL;
-+ uint32_t apic_id;
-+ unsigned long long pxm;
-+ acpi_status status = 0;
-+
-+ xen_platform_op_t op = {
-+ .interface_version = XENPF_INTERFACE_VERSION,
-+ };
-+
-+ apic_id = xen_get_apic_id(pr->handle);
-+ if (apic_id < 0) {
-+ printk(KERN_WARNING "Can't get apic_id for acpi_id %x\n",
-+ pr->acpi_id);
-+ return -1;
-+ }
-+
-+ status = acpi_evaluate_integer(pr->handle, "_PXM",
-+ NULL, &pxm);
-+ if (ACPI_FAILURE(status)) {
-+ printk(KERN_WARNING "can't get pxm for acpi_id %x\n",
-+ pr->acpi_id);
-+ return -1;
-+ }
-+
-+ switch (event) {
-+ case HOTPLUG_TYPE_ADD:
-+ op.cmd = XENPF_cpu_hotadd;
-+ op.u.cpu_add.apic_id = apic_id;
-+ op.u.cpu_add.acpi_id = pr->acpi_id;
-+ op.u.cpu_add.pxm = pxm;
-+ ret = HYPERVISOR_dom0_op(&op);
-+ break;
-+ case HOTPLUG_TYPE_REMOVE:
-+ printk(KERN_WARNING "Xen not support CPU hotremove\n");
-+ ret = -ENOSYS;
-+ break;
-+ }
-+
-+ return ret;
-+}
-+#else
-+static int xen_hotplug_notifier(struct acpi_processor *pr, int event)
-+{
-+ return -ENOSYS;
-+}
-+#endif
-+
-+static int __init xen_acpi_processor_extcntl_init(void)
-+{
-+ unsigned int pmbits;
-+
-+ /* Only xen dom0 is allowed to handle ACPI processor info */
-+ if (!xen_initial_domain())
-+ return 0;
-+
-+ pmbits = (xen_start_info->flags & SIF_PM_MASK) >> 8;
-+
-+ if (pmbits & XEN_PROCESSOR_PM_CX)
-+ xen_ops.pm_ops[PM_TYPE_IDLE] = xen_cx_notifier;
-+ if (pmbits & XEN_PROCESSOR_PM_PX)
-+ xen_ops.pm_ops[PM_TYPE_PERF] = xen_px_notifier;
-+ if (pmbits & XEN_PROCESSOR_PM_TX)
-+ xen_ops.pm_ops[PM_TYPE_THR] = xen_tx_notifier;
-+
-+ return 0;
-+}
-+
-+subsys_initcall(xen_acpi_processor_extcntl_init);
-+MODULE_LICENSE("GPL");
-diff --git a/drivers/xen/balloon.c b/drivers/xen/balloon.c
-index 4204336..158cdd1 100644
---- a/drivers/xen/balloon.c
-+++ b/drivers/xen/balloon.c
-@@ -43,22 +43,26 @@
- #include <linux/mutex.h>
- #include <linux/list.h>
- #include <linux/sysdev.h>
-+#include <linux/swap.h>
-
- #include <asm/page.h>
- #include <asm/pgalloc.h>
- #include <asm/pgtable.h>
- #include <asm/uaccess.h>
- #include <asm/tlb.h>
-+#include <asm/e820.h>
-
- #include <asm/xen/hypervisor.h>
- #include <asm/xen/hypercall.h>
-+
-+#include <xen/xen.h>
- #include <xen/interface/xen.h>
- #include <xen/interface/memory.h>
- #include <xen/xenbus.h>
- #include <xen/features.h>
- #include <xen/page.h>
-
--#define PAGES2KB(_p) ((_p)<<(PAGE_SHIFT-10))
-+#define PAGES2KB(_p) ((_p)<<(PAGE_SHIFT+balloon_order-10))
-
- #define BALLOON_CLASS_NAME "xen_memory"
-
-@@ -82,14 +86,15 @@ static struct sys_device balloon_sysdev;
-
- static int register_balloon(struct sys_device *sysdev);
-
-+static struct balloon_stats balloon_stats;
-+
- /*
-- * Protects atomic reservation decrease/increase against concurrent increases.
-- * Also protects non-atomic updates of current_pages and driver_pages, and
-- * balloon lists.
-+ * Work in pages of this order. Can be either 0 for normal pages
-+ * or 9 for hugepages.
- */
--static DEFINE_SPINLOCK(balloon_lock);
--
--static struct balloon_stats balloon_stats;
-+static int balloon_order;
-+static unsigned long balloon_npages;
-+static unsigned long discontig_frame_list[PAGE_SIZE / sizeof(unsigned long)];
-
- /* We increase/decrease in batches which fit in a page */
- static unsigned long frame_list[PAGE_SIZE / sizeof(unsigned long)];
-@@ -118,12 +123,43 @@ static struct timer_list balloon_timer;
- static void scrub_page(struct page *page)
- {
- #ifdef CONFIG_XEN_SCRUB_PAGES
-- clear_highpage(page);
-+ int i;
-+
-+ for (i = 0; i < balloon_npages; i++)
-+ clear_highpage(page++);
- #endif
- }
-
-+static void free_discontig_frame(void)
-+{
-+ int rc;
-+ struct xen_memory_reservation reservation = {
-+ .address_bits = 0,
-+ .domid = DOMID_SELF,
-+ .nr_extents = balloon_npages,
-+ .extent_order = 0
-+ };
-+
-+ set_xen_guest_handle(reservation.extent_start, discontig_frame_list);
-+ rc = HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation);
-+ BUG_ON(rc != balloon_npages);
-+}
-+
-+static unsigned long shrink_frame(unsigned long nr_pages)
-+{
-+ unsigned long i, j;
-+
-+ for (i = 0, j = 0; i < nr_pages; i++, j++) {
-+ if (frame_list[i] == 0)
-+ j++;
-+ if (i != j)
-+ frame_list[i] = frame_list[j];
-+ }
-+ return i;
-+}
-+
- /* balloon_append: add the given page to the balloon. */
--static void balloon_append(struct page *page)
-+static void __balloon_append(struct page *page)
- {
- /* Lowmem is re-populated first, so highmem pages go at list tail. */
- if (PageHighMem(page)) {
-@@ -134,7 +170,11 @@ static void balloon_append(struct page *page)
- list_add(&page->lru, &ballooned_pages);
- balloon_stats.balloon_low++;
- }
-+}
-
-+static void balloon_append(struct page *page)
-+{
-+ __balloon_append(page);
- totalram_pages--;
- }
-
-@@ -195,20 +235,17 @@ static unsigned long current_target(void)
-
- static int increase_reservation(unsigned long nr_pages)
- {
-- unsigned long pfn, i, flags;
-+ unsigned long pfn, mfn, i, j;
- struct page *page;
- long rc;
- struct xen_memory_reservation reservation = {
- .address_bits = 0,
-- .extent_order = 0,
- .domid = DOMID_SELF
- };
-
- if (nr_pages > ARRAY_SIZE(frame_list))
- nr_pages = ARRAY_SIZE(frame_list);
-
-- spin_lock_irqsave(&balloon_lock, flags);
--
- page = balloon_first_page();
- for (i = 0; i < nr_pages; i++) {
- BUG_ON(page == NULL);
-@@ -218,6 +255,8 @@ static int increase_reservation(unsigned long nr_pages)
-
- set_xen_guest_handle(reservation.extent_start, frame_list);
- reservation.nr_extents = nr_pages;
-+ reservation.extent_order = balloon_order;
-+
- rc = HYPERVISOR_memory_op(XENMEM_populate_physmap, &reservation);
- if (rc < 0)
- goto out;
-@@ -227,19 +266,22 @@ static int increase_reservation(unsigned long nr_pages)
- BUG_ON(page == NULL);
-
- pfn = page_to_pfn(page);
-+ mfn = frame_list[i];
- BUG_ON(!xen_feature(XENFEAT_auto_translated_physmap) &&
- phys_to_machine_mapping_valid(pfn));
-
-- set_phys_to_machine(pfn, frame_list[i]);
--
-- /* Link back into the page tables if not highmem. */
-- if (pfn < max_low_pfn) {
-- int ret;
-- ret = HYPERVISOR_update_va_mapping(
-- (unsigned long)__va(pfn << PAGE_SHIFT),
-- mfn_pte(frame_list[i], PAGE_KERNEL),
-- 0);
-- BUG_ON(ret);
-+ for (j = 0; j < balloon_npages; j++, pfn++, mfn++) {
-+ set_phys_to_machine(pfn, mfn);
-+
-+ /* Link back into the page tables if not highmem. */
-+ if (pfn < max_low_pfn) {
-+ int ret;
-+ ret = HYPERVISOR_update_va_mapping(
-+ (unsigned long)__va(pfn << PAGE_SHIFT),
-+ mfn_pte(mfn, PAGE_KERNEL),
-+ 0);
-+ BUG_ON(ret);
-+ }
- }
-
- /* Relinquish the page back to the allocator. */
-@@ -251,20 +293,18 @@ static int increase_reservation(unsigned long nr_pages)
- balloon_stats.current_pages += rc;
-
- out:
-- spin_unlock_irqrestore(&balloon_lock, flags);
--
- return rc < 0 ? rc : rc != nr_pages;
- }
-
- static int decrease_reservation(unsigned long nr_pages)
- {
-- unsigned long pfn, i, flags;
-- struct page *page;
-+ unsigned long pfn, lpfn, mfn, i, j;
-+ struct page *page = NULL;
- int need_sleep = 0;
-- int ret;
-+ int discontig, discontig_free;
-+ int ret;
- struct xen_memory_reservation reservation = {
- .address_bits = 0,
-- .extent_order = 0,
- .domid = DOMID_SELF
- };
-
-@@ -272,7 +312,7 @@ static int decrease_reservation(unsigned long nr_pages)
- nr_pages = ARRAY_SIZE(frame_list);
-
- for (i = 0; i < nr_pages; i++) {
-- if ((page = alloc_page(GFP_BALLOON)) == NULL) {
-+ if ((page = alloc_pages(GFP_BALLOON, balloon_order)) == NULL) {
- nr_pages = i;
- need_sleep = 1;
- break;
-@@ -282,38 +322,49 @@ static int decrease_reservation(unsigned long nr_pages)
- frame_list[i] = pfn_to_mfn(pfn);
-
- scrub_page(page);
--
-- if (!PageHighMem(page)) {
-- ret = HYPERVISOR_update_va_mapping(
-- (unsigned long)__va(pfn << PAGE_SHIFT),
-- __pte_ma(0), 0);
-- BUG_ON(ret);
-- }
--
- }
-
- /* Ensure that ballooned highmem pages don't have kmaps. */
- kmap_flush_unused();
- flush_tlb_all();
-
-- spin_lock_irqsave(&balloon_lock, flags);
--
- /* No more mappings: invalidate P2M and add to balloon. */
- for (i = 0; i < nr_pages; i++) {
-- pfn = mfn_to_pfn(frame_list[i]);
-- set_phys_to_machine(pfn, INVALID_P2M_ENTRY);
-+ mfn = frame_list[i];
-+ lpfn = pfn = mfn_to_pfn(mfn);
- balloon_append(pfn_to_page(pfn));
-+ discontig_free = 0;
-+ for (j = 0; j < balloon_npages; j++, lpfn++, mfn++) {
-+ if ((discontig_frame_list[j] = pfn_to_mfn(lpfn)) != mfn)
-+ discontig_free = 1;
-+
-+ set_phys_to_machine(lpfn, INVALID_P2M_ENTRY);
-+ page = pfn_to_page(lpfn);
-+
-+ if (!PageHighMem(page)) {
-+ ret = HYPERVISOR_update_va_mapping(
-+ (unsigned long)__va(lpfn << PAGE_SHIFT),
-+ __pte_ma(0), 0);
-+ BUG_ON(ret);
-+ }
-+ }
-+ if (discontig_free) {
-+ free_discontig_frame();
-+ frame_list[i] = 0;
-+ discontig = 1;
-+ }
- }
-+ balloon_stats.current_pages -= nr_pages;
-+
-+ if (discontig)
-+ nr_pages = shrink_frame(nr_pages);
-
- set_xen_guest_handle(reservation.extent_start, frame_list);
- reservation.nr_extents = nr_pages;
-+ reservation.extent_order = balloon_order;
- ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation);
- BUG_ON(ret != nr_pages);
-
-- balloon_stats.current_pages -= nr_pages;
--
-- spin_unlock_irqrestore(&balloon_lock, flags);
--
- return need_sleep;
- }
-
-@@ -379,7 +430,7 @@ static void watch_target(struct xenbus_watch *watch,
- /* The given memory/target value is in KiB, so it needs converting to
- * pages. PAGE_SHIFT converts bytes to pages, hence PAGE_SHIFT - 10.
- */
-- balloon_set_new_target(new_target >> (PAGE_SHIFT - 10));
-+ balloon_set_new_target(new_target >> ((PAGE_SHIFT - 10) + balloon_order));
- }
-
- static int balloon_init_watcher(struct notifier_block *notifier,
-@@ -399,15 +450,18 @@ static struct notifier_block xenstore_notifier;
-
- static int __init balloon_init(void)
- {
-- unsigned long pfn;
-+ unsigned long pfn, extra_pfn_end;
- struct page *page;
-
- if (!xen_pv_domain())
- return -ENODEV;
-
-- pr_info("xen_balloon: Initialising balloon driver.\n");
-+ pr_info("xen_balloon: Initialising balloon driver with page order %d.\n",
-+ balloon_order);
-+
-+ balloon_npages = 1 << balloon_order;
-
-- balloon_stats.current_pages = min(xen_start_info->nr_pages, max_pfn);
-+ balloon_stats.current_pages = (min(xen_start_info->nr_pages, max_pfn)) >> balloon_order;
- balloon_stats.target_pages = balloon_stats.current_pages;
- balloon_stats.balloon_low = 0;
- balloon_stats.balloon_high = 0;
-@@ -419,11 +473,24 @@ static int __init balloon_init(void)
-
- register_balloon(&balloon_sysdev);
-
-- /* Initialise the balloon with excess memory space. */
-- for (pfn = xen_start_info->nr_pages; pfn < max_pfn; pfn++) {
-+ /*
-+ * Initialise the balloon with excess memory space. We need
-+ * to make sure we don't add memory which doesn't exist or
-+ * logically exist. The E820 map can be trimmed to be smaller
-+ * than the amount of physical memory due to the mem= command
-+ * line parameter. And if this is a 32-bit non-HIGHMEM kernel
-+ * on a system with memory which requires highmem to access,
-+ * don't try to use it.
-+ */
-+ extra_pfn_end = min(min(max_pfn, e820_end_of_ram_pfn()),
-+ (unsigned long)PFN_DOWN(xen_extra_mem_start + xen_extra_mem_size));
-+ for (pfn = PFN_UP(xen_extra_mem_start);
-+ pfn < extra_pfn_end;
-+ pfn += balloon_npages) {
- page = pfn_to_page(pfn);
-- if (!PageReserved(page))
-- balloon_append(page);
-+ /* totalram_pages doesn't include the boot-time
-+ balloon extension, so don't subtract from it. */
-+ __balloon_append(page);
- }
-
- target_watch.callback = watch_target;
-@@ -444,6 +511,121 @@ static void balloon_exit(void)
-
- module_exit(balloon_exit);
-
-+static int __init balloon_parse_huge(char *s)
-+{
-+ balloon_order = 9;
-+ return 1;
-+}
-+
-+__setup("balloon_hugepages", balloon_parse_huge);
-+
-+static int dealloc_pte_fn(pte_t *pte, struct page *pmd_page,
-+ unsigned long addr, void *data)
-+{
-+ unsigned long mfn = pte_mfn(*pte);
-+ int ret;
-+ struct xen_memory_reservation reservation = {
-+ .nr_extents = 1,
-+ .extent_order = 0,
-+ .domid = DOMID_SELF
-+ };
-+
-+ set_xen_guest_handle(reservation.extent_start, &mfn);
-+ set_pte_at(&init_mm, addr, pte, __pte_ma(0));
-+ set_phys_to_machine(__pa(addr) >> PAGE_SHIFT, INVALID_P2M_ENTRY);
-+
-+ ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation);
-+ BUG_ON(ret != 1);
-+
-+ return 0;
-+}
-+
-+struct page **alloc_empty_pages_and_pagevec(int nr_pages)
-+{
-+ struct page *page, **pagevec;
-+ int npages;
-+ int i, j, ret;
-+
-+ /* Round up to next number of balloon_order pages */
-+ npages = (nr_pages + (balloon_npages-1)) >> balloon_order;
-+
-+ pagevec = kmalloc(sizeof(page) * nr_pages << balloon_order, GFP_KERNEL);
-+ if (pagevec == NULL)
-+ return NULL;
-+
-+ for (i = 0; i < nr_pages; i++) {
-+ void *v;
-+
-+ page = alloc_pages(GFP_KERNEL|__GFP_COLD, balloon_order);
-+ if (page == NULL)
-+ goto err;
-+
-+ scrub_page(page);
-+
-+ mutex_lock(&balloon_mutex);
-+
-+ v = page_address(page);
-+
-+ ret = apply_to_page_range(&init_mm, (unsigned long)v,
-+ PAGE_SIZE << balloon_order,
-+ dealloc_pte_fn, NULL);
-+
-+ if (ret != 0) {
-+ mutex_unlock(&balloon_mutex);
-+ //balloon_free_page(page); /* tries to use free_cold_page */
-+ __free_page(page);
-+ goto err;
-+ }
-+ for (j = 0; j < balloon_npages; j++)
-+ pagevec[(i<<balloon_order)+j] = page++;
-+
-+ totalram_pages = balloon_stats.current_pages -= balloon_npages;
-+
-+ mutex_unlock(&balloon_mutex);
-+ }
-+
-+ out:
-+ schedule_work(&balloon_worker);
-+ flush_tlb_all();
-+ return pagevec;
-+
-+ err:
-+ mutex_lock(&balloon_mutex);
-+ while (--i >= 0)
-+ balloon_append(pagevec[i << balloon_order]);
-+ mutex_unlock(&balloon_mutex);
-+ kfree(pagevec);
-+ pagevec = NULL;
-+ goto out;
-+}
-+EXPORT_SYMBOL_GPL(alloc_empty_pages_and_pagevec);
-+
-+void free_empty_pages_and_pagevec(struct page **pagevec, int nr_pages)
-+{
-+ struct page *page;
-+ int i;
-+ int npages;
-+
-+ if (pagevec == NULL)
-+ return;
-+
-+ /* Round up to next number of balloon_order pages */
-+ npages = (nr_pages + (balloon_npages-1)) >> balloon_order;
-+
-+ mutex_lock(&balloon_mutex);
-+ for (i = 0; i < nr_pages; i++) {
-+ page = pagevec[i << balloon_order];
-+ BUG_ON(page_count(page) != 1);
-+ balloon_append(page);
-+ }
-+ mutex_unlock(&balloon_mutex);
-+
-+ kfree(pagevec);
-+
-+ schedule_work(&balloon_worker);
-+}
-+EXPORT_SYMBOL_GPL(free_empty_pages_and_pagevec);
-+
- #define BALLOON_SHOW(name, format, args...) \
- static ssize_t show_##name(struct sys_device *dev, \
- struct sysdev_attribute *attr, \
-@@ -477,7 +659,7 @@ static ssize_t store_target_kb(struct sys_device *dev,
-
- target_bytes = simple_strtoull(buf, &endchar, 0) * 1024;
-
-- balloon_set_new_target(target_bytes >> PAGE_SHIFT);
-+ balloon_set_new_target(target_bytes >> (PAGE_SHIFT + balloon_order));
-
- return count;
- }
-@@ -491,7 +673,7 @@ static ssize_t show_target(struct sys_device *dev, struct sysdev_attribute *attr
- {
- return sprintf(buf, "%llu\n",
- (unsigned long long)balloon_stats.target_pages
-- << PAGE_SHIFT);
-+ << (PAGE_SHIFT + balloon_order));
- }
-
- static ssize_t store_target(struct sys_device *dev,
-@@ -507,7 +689,7 @@ static ssize_t store_target(struct sys_device *dev,
-
- target_bytes = memparse(buf, &endchar);
-
-- balloon_set_new_target(target_bytes >> PAGE_SHIFT);
-+ balloon_set_new_target(target_bytes >> (PAGE_SHIFT + balloon_order));
-
- return count;
- }
-diff --git a/drivers/xen/biomerge.c b/drivers/xen/biomerge.c
-new file mode 100644
-index 0000000..d40f534
---- /dev/null
-+++ b/drivers/xen/biomerge.c
-@@ -0,0 +1,14 @@
-+#include <linux/bio.h>
-+#include <asm/io.h>
-+#include <xen/page.h>
-+
-+bool xen_biovec_phys_mergeable(const struct bio_vec *vec1,
-+ const struct bio_vec *vec2)
-+{
-+ unsigned long mfn1 = pfn_to_mfn(page_to_pfn(vec1->bv_page));
-+ unsigned long mfn2 = pfn_to_mfn(page_to_pfn(vec2->bv_page));
-+
-+ return __BIOVEC_PHYS_MERGEABLE(vec1, vec2) &&
-+ ((mfn1 == mfn2) || ((mfn1+1) == mfn2));
-+}
-+
-diff --git a/drivers/xen/blkback/Makefile b/drivers/xen/blkback/Makefile
-new file mode 100644
-index 0000000..dee55ba
---- /dev/null
-+++ b/drivers/xen/blkback/Makefile
-@@ -0,0 +1,4 @@
-+obj-$(CONFIG_XEN_BLKDEV_BACKEND) := xen-blkback.o
-+obj-$(CONFIG_XEN_BLKBACK_PAGEMAP) += blkback-pagemap.o
-+
-+xen-blkback-y := blkback.o xenbus.o interface.o vbd.o
-diff --git a/drivers/xen/blkback/blkback-pagemap.c b/drivers/xen/blkback/blkback-pagemap.c
-new file mode 100644
-index 0000000..45f6eb2
---- /dev/null
-+++ b/drivers/xen/blkback/blkback-pagemap.c
-@@ -0,0 +1,109 @@
-+#include <linux/module.h>
-+#include "blkback-pagemap.h"
-+
-+static int blkback_pagemap_size;
-+static struct blkback_pagemap *blkback_pagemap;
-+
-+static inline int
-+blkback_pagemap_entry_clear(struct blkback_pagemap *map)
-+{
-+ static struct blkback_pagemap zero;
-+ return !memcmp(map, &zero, sizeof(zero));
-+}
-+
-+int
-+blkback_pagemap_init(int pages)
-+{
-+ blkback_pagemap = kzalloc(pages * sizeof(struct blkback_pagemap),
-+ GFP_KERNEL);
-+ if (!blkback_pagemap)
-+ return -ENOMEM;
-+
-+ blkback_pagemap_size = pages;
-+ return 0;
-+}
-+EXPORT_SYMBOL_GPL(blkback_pagemap_init);
-+
-+void
-+blkback_pagemap_set(int idx, struct page *page,
-+ domid_t domid, busid_t busid, grant_ref_t gref)
-+{
-+ struct blkback_pagemap *entry;
-+
-+ BUG_ON(!blkback_pagemap);
-+ BUG_ON(idx >= blkback_pagemap_size);
-+
-+ set_page_private(page, idx);
-+
-+ entry = blkback_pagemap + idx;
-+ if (!blkback_pagemap_entry_clear(entry)) {
-+ printk("overwriting pagemap %d: d %u b %u g %u\n",
-+ idx, entry->domid, entry->busid, entry->gref);
-+ BUG();
-+ }
-+
-+ entry->page = page;
-+ entry->domid = domid;
-+ entry->busid = busid;
-+ entry->gref = gref;
-+}
-+EXPORT_SYMBOL_GPL(blkback_pagemap_set);
-+
-+void
-+blkback_pagemap_clear(struct page *page)
-+{
-+ int idx;
-+ struct blkback_pagemap *entry;
-+
-+ idx = (int)page_private(page);
-+
-+ BUG_ON(!blkback_pagemap);
-+ BUG_ON(idx >= blkback_pagemap_size);
-+
-+ entry = blkback_pagemap + idx;
-+ if (blkback_pagemap_entry_clear(entry)) {
-+ printk("clearing empty pagemap %d\n", idx);
-+ BUG();
-+ }
-+
-+ memset(entry, 0, sizeof(*entry));
-+}
-+EXPORT_SYMBOL_GPL(blkback_pagemap_clear);
-+
-+struct blkback_pagemap
-+blkback_pagemap_read(struct page *page)
-+{
-+ int idx;
-+ struct blkback_pagemap *entry;
-+
-+ idx = (int)page_private(page);
-+
-+ BUG_ON(!blkback_pagemap);
-+ BUG_ON(idx >= blkback_pagemap_size);
-+
-+ entry = blkback_pagemap + idx;
-+ if (blkback_pagemap_entry_clear(entry)) {
-+ printk("reading empty pagemap %d\n", idx);
-+ BUG();
-+ }
-+
-+ return *entry;
-+}
-+EXPORT_SYMBOL(blkback_pagemap_read);
-+
-+MODULE_LICENSE("Dual BSD/GPL");
-+
-+int
-+blkback_pagemap_contains_page(struct page *page)
-+{
-+ struct blkback_pagemap *entry;
-+ int idx = (int)page_private(page);
-+
-+ if (idx < 0 || idx >= blkback_pagemap_size)
-+ return 0;
-+
-+ entry = blkback_pagemap + idx;
-+
-+ return (entry->page == page);
-+}
-+EXPORT_SYMBOL(blkback_pagemap_contains_page);
-diff --git a/drivers/xen/blkback/blkback-pagemap.h b/drivers/xen/blkback/blkback-pagemap.h
-new file mode 100644
-index 0000000..7f97d15
---- /dev/null
-+++ b/drivers/xen/blkback/blkback-pagemap.h
-@@ -0,0 +1,36 @@
-+#ifndef _BLKBACK_PAGEMAP_H_
-+#define _BLKBACK_PAGEMAP_H_
-+
-+#include <linux/mm.h>
-+#include <xen/interface/xen.h>
-+#include <xen/interface/grant_table.h>
-+
-+typedef unsigned int busid_t;
-+
-+struct blkback_pagemap {
-+ struct page *page;
-+ domid_t domid;
-+ busid_t busid;
-+ grant_ref_t gref;
-+};
-+
-+#if defined(CONFIG_XEN_BLKBACK_PAGEMAP) || defined(CONFIG_XEN_BLKBACK_PAGEMAP_MODULE)
-+
-+int blkback_pagemap_init(int);
-+void blkback_pagemap_set(int, struct page *, domid_t, busid_t, grant_ref_t);
-+void blkback_pagemap_clear(struct page *);
-+struct blkback_pagemap blkback_pagemap_read(struct page *);
-+int blkback_pagemap_contains_page(struct page *page);
-+
-+#else /* CONFIG_XEN_BLKBACK_PAGEMAP */
-+
-+static inline int blkback_pagemap_init(int pages) { return 0; }
-+static inline void blkback_pagemap_set(int idx, struct page *page, domid_t dom,
-+ busid_t bus, grant_ref_t gnt) {}
-+static inline void blkback_pagemap_clear(struct page *page) {}
-+#define blkback_pagemap_read(_page) ({ BUG(); (struct blkback_pagemap){0}; })
-+static inline int blkback_pagemap_contains_page(struct page *page) { return 0; }
-+
-+#endif /* CONFIG_XEN_BLKBACK_PAGEMAP */
-+
-+#endif
-diff --git a/drivers/xen/blkback/blkback.c b/drivers/xen/blkback/blkback.c
-new file mode 100644
-index 0000000..0bef445
---- /dev/null
-+++ b/drivers/xen/blkback/blkback.c
-@@ -0,0 +1,675 @@
-+/******************************************************************************
-+ * arch/xen/drivers/blkif/backend/main.c
-+ *
-+ * Back-end of the driver for virtual block devices. This portion of the
-+ * driver exports a 'unified' block-device interface that can be accessed
-+ * by any operating system that implements a compatible front end. A
-+ * reference front-end implementation can be found in:
-+ * arch/xen/drivers/blkif/frontend
-+ *
-+ * Copyright (c) 2003-2004, Keir Fraser & Steve Hand
-+ * Copyright (c) 2005, Christopher Clark
-+ *
-+ * This program is free software; you can redistribute it and/or
-+ * modify it under the terms of the GNU General Public License version 2
-+ * as published by the Free Software Foundation; or, when distributed
-+ * separately from the Linux kernel or incorporated into other
-+ * software packages, subject to the following license:
-+ *
-+ * Permission is hereby granted, free of charge, to any person obtaining a copy
-+ * of this source file (the "Software"), to deal in the Software without
-+ * restriction, including without limitation the rights to use, copy, modify,
-+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
-+ * and to permit persons to whom the Software is furnished to do so, subject to
-+ * the following conditions:
-+ *
-+ * The above copyright notice and this permission notice shall be included in
-+ * all copies or substantial portions of the Software.
-+ *
-+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-+ * IN THE SOFTWARE.
-+ */
-+
-+#include <linux/spinlock.h>
-+#include <linux/kthread.h>
-+#include <linux/list.h>
-+#include <linux/delay.h>
-+#include <linux/freezer.h>
-+
-+#include <xen/balloon.h>
-+#include <xen/events.h>
-+#include <xen/page.h>
-+#include <asm/xen/hypervisor.h>
-+#include <asm/xen/hypercall.h>
-+#include "common.h"
-+
-+/*
-+ * These are rather arbitrary. They are fairly large because adjacent requests
-+ * pulled from a communication ring are quite likely to end up being part of
-+ * the same scatter/gather request at the disc.
-+ *
-+ * ** TRY INCREASING 'blkif_reqs' IF WRITE SPEEDS SEEM TOO LOW **
-+ *
-+ * This will increase the chances of being able to write whole tracks.
-+ * 64 should be enough to keep us competitive with Linux.
-+ */
-+static int blkif_reqs = 64;
-+module_param_named(reqs, blkif_reqs, int, 0);
-+MODULE_PARM_DESC(reqs, "Number of blkback requests to allocate");
-+
-+/* Run-time switchable: /sys/module/blkback/parameters/ */
-+static unsigned int log_stats = 0;
-+static unsigned int debug_lvl = 0;
-+module_param(log_stats, int, 0644);
-+module_param(debug_lvl, int, 0644);
-+
-+/*
-+ * Each outstanding request that we've passed to the lower device layers has a
-+ * 'pending_req' allocated to it. Each buffer_head that completes decrements
-+ * the pendcnt towards zero. When it hits zero, the specified domain has a
-+ * response queued for it, with the saved 'id' passed back.
-+ */
-+typedef struct {
-+ blkif_t *blkif;
-+ u64 id;
-+ int nr_pages;
-+ atomic_t pendcnt;
-+ unsigned short operation;
-+ int status;
-+ struct list_head free_list;
-+} pending_req_t;
-+
-+static pending_req_t *pending_reqs;
-+static struct list_head pending_free;
-+static DEFINE_SPINLOCK(pending_free_lock);
-+static DECLARE_WAIT_QUEUE_HEAD(pending_free_wq);
-+
-+#define BLKBACK_INVALID_HANDLE (~0)
-+
-+static struct page **pending_pages;
-+static grant_handle_t *pending_grant_handles;
-+
-+static inline int vaddr_pagenr(pending_req_t *req, int seg)
-+{
-+ return (req - pending_reqs) * BLKIF_MAX_SEGMENTS_PER_REQUEST + seg;
-+}
-+
-+#define pending_page(req, seg) pending_pages[vaddr_pagenr(req, seg)]
-+
-+static inline unsigned long vaddr(pending_req_t *req, int seg)
-+{
-+ unsigned long pfn = page_to_pfn(pending_page(req, seg));
-+ return (unsigned long)pfn_to_kaddr(pfn);
-+}
-+
-+#define pending_handle(_req, _seg) \
-+ (pending_grant_handles[vaddr_pagenr(_req, _seg)])
-+
-+
-+static int do_block_io_op(blkif_t *blkif);
-+static void dispatch_rw_block_io(blkif_t *blkif,
-+ struct blkif_request *req,
-+ pending_req_t *pending_req);
-+static void make_response(blkif_t *blkif, u64 id,
-+ unsigned short op, int st);
-+
-+/******************************************************************
-+ * misc small helpers
-+ */
-+static pending_req_t* alloc_req(void)
-+{
-+ pending_req_t *req = NULL;
-+ unsigned long flags;
-+
-+ spin_lock_irqsave(&pending_free_lock, flags);
-+ if (!list_empty(&pending_free)) {
-+ req = list_entry(pending_free.next, pending_req_t, free_list);
-+ list_del(&req->free_list);
-+ }
-+ spin_unlock_irqrestore(&pending_free_lock, flags);
-+ return req;
-+}
-+
-+static void free_req(pending_req_t *req)
-+{
-+ unsigned long flags;
-+ int was_empty;
-+
-+ spin_lock_irqsave(&pending_free_lock, flags);
-+ was_empty = list_empty(&pending_free);
-+ list_add(&req->free_list, &pending_free);
-+ spin_unlock_irqrestore(&pending_free_lock, flags);
-+ if (was_empty)
-+ wake_up(&pending_free_wq);
-+}
-+
-+static void unplug_queue(blkif_t *blkif)
-+{
-+ if (blkif->plug == NULL)
-+ return;
-+ if (blkif->plug->unplug_fn)
-+ blkif->plug->unplug_fn(blkif->plug);
-+ blk_put_queue(blkif->plug);
-+ blkif->plug = NULL;
-+}
-+
-+static void plug_queue(blkif_t *blkif, struct block_device *bdev)
-+{
-+ struct request_queue *q = bdev_get_queue(bdev);
-+
-+ if (q == blkif->plug)
-+ return;
-+ unplug_queue(blkif);
-+ blk_get_queue(q);
-+ blkif->plug = q;
-+}
-+
-+static void fast_flush_area(pending_req_t *req)
-+{
-+ struct gnttab_unmap_grant_ref unmap[BLKIF_MAX_SEGMENTS_PER_REQUEST];
-+ unsigned int i, invcount = 0;
-+ grant_handle_t handle;
-+ int ret;
-+
-+ for (i = 0; i < req->nr_pages; i++) {
-+ handle = pending_handle(req, i);
-+ if (handle == BLKBACK_INVALID_HANDLE)
-+ continue;
-+ blkback_pagemap_clear(pending_page(req, i));
-+ gnttab_set_unmap_op(&unmap[invcount], vaddr(req, i),
-+ GNTMAP_host_map, handle);
-+ pending_handle(req, i) = BLKBACK_INVALID_HANDLE;
-+ invcount++;
-+ }
-+
-+ ret = HYPERVISOR_grant_table_op(
-+ GNTTABOP_unmap_grant_ref, unmap, invcount);
-+ BUG_ON(ret);
-+}
-+
-+/******************************************************************
-+ * SCHEDULER FUNCTIONS
-+ */
-+
-+static void print_stats(blkif_t *blkif)
-+{
-+ printk(KERN_DEBUG "%s: oo %3d | rd %4d | wr %4d | br %4d\n",
-+ current->comm, blkif->st_oo_req,
-+ blkif->st_rd_req, blkif->st_wr_req, blkif->st_br_req);
-+ blkif->st_print = jiffies + msecs_to_jiffies(10 * 1000);
-+ blkif->st_rd_req = 0;
-+ blkif->st_wr_req = 0;
-+ blkif->st_oo_req = 0;
-+}
-+
-+int blkif_schedule(void *arg)
-+{
-+ blkif_t *blkif = arg;
-+ struct vbd *vbd = &blkif->vbd;
-+
-+ blkif_get(blkif);
-+
-+ if (debug_lvl)
-+ printk(KERN_DEBUG "%s: started\n", current->comm);
-+
-+ while (!kthread_should_stop()) {
-+ if (try_to_freeze())
-+ continue;
-+ if (unlikely(vbd->size != vbd_size(vbd)))
-+ vbd_resize(blkif);
-+
-+ wait_event_interruptible(
-+ blkif->wq,
-+ blkif->waiting_reqs || kthread_should_stop());
-+ wait_event_interruptible(
-+ pending_free_wq,
-+ !list_empty(&pending_free) || kthread_should_stop());
-+
-+ blkif->waiting_reqs = 0;
-+ smp_mb(); /* clear flag *before* checking for work */
-+
-+ if (do_block_io_op(blkif))
-+ blkif->waiting_reqs = 1;
-+ unplug_queue(blkif);
-+
-+ if (log_stats && time_after(jiffies, blkif->st_print))
-+ print_stats(blkif);
-+ }
-+
-+ if (log_stats)
-+ print_stats(blkif);
-+ if (debug_lvl)
-+ printk(KERN_DEBUG "%s: exiting\n", current->comm);
-+
-+ blkif->xenblkd = NULL;
-+ blkif_put(blkif);
-+
-+ return 0;
-+}
-+
-+/******************************************************************
-+ * COMPLETION CALLBACK -- Called as bh->b_end_io()
-+ */
-+
-+static void __end_block_io_op(pending_req_t *pending_req, int error)
-+{
-+ /* An error fails the entire request. */
-+ if ((pending_req->operation == BLKIF_OP_WRITE_BARRIER) &&
-+ (error == -EOPNOTSUPP)) {
-+ DPRINTK("blkback: write barrier op failed, not supported\n");
-+ blkback_barrier(XBT_NIL, pending_req->blkif->be, 0);
-+ pending_req->status = BLKIF_RSP_EOPNOTSUPP;
-+ } else if (error) {
-+ DPRINTK("Buffer not up-to-date at end of operation, "
-+ "error=%d\n", error);
-+ pending_req->status = BLKIF_RSP_ERROR;
-+ }
-+
-+ if (atomic_dec_and_test(&pending_req->pendcnt)) {
-+ fast_flush_area(pending_req);
-+ make_response(pending_req->blkif, pending_req->id,
-+ pending_req->operation, pending_req->status);
-+ blkif_put(pending_req->blkif);
-+ free_req(pending_req);
-+ }
-+}
-+
-+static void end_block_io_op(struct bio *bio, int error)
-+{
-+ __end_block_io_op(bio->bi_private, error);
-+ bio_put(bio);
-+}
-+
-+
-+/******************************************************************************
-+ * NOTIFICATION FROM GUEST OS.
-+ */
-+
-+static void blkif_notify_work(blkif_t *blkif)
-+{
-+ blkif->waiting_reqs = 1;
-+ wake_up(&blkif->wq);
-+}
-+
-+irqreturn_t blkif_be_int(int irq, void *dev_id)
-+{
-+ blkif_notify_work(dev_id);
-+ return IRQ_HANDLED;
-+}
-+
-+
-+
-+/******************************************************************
-+ * DOWNWARD CALLS -- These interface with the block-device layer proper.
-+ */
-+
-+static int do_block_io_op(blkif_t *blkif)
-+{
-+ union blkif_back_rings *blk_rings = &blkif->blk_rings;
-+ struct blkif_request req;
-+ pending_req_t *pending_req;
-+ RING_IDX rc, rp;
-+ int more_to_do = 0;
-+
-+ rc = blk_rings->common.req_cons;
-+ rp = blk_rings->common.sring->req_prod;
-+ rmb(); /* Ensure we see queued requests up to 'rp'. */
-+
-+ while (rc != rp) {
-+
-+ if (RING_REQUEST_CONS_OVERFLOW(&blk_rings->common, rc))
-+ break;
-+
-+ if (kthread_should_stop()) {
-+ more_to_do = 1;
-+ break;
-+ }
-+
-+ pending_req = alloc_req();
-+ if (NULL == pending_req) {
-+ blkif->st_oo_req++;
-+ more_to_do = 1;
-+ break;
-+ }
-+
-+ switch (blkif->blk_protocol) {
-+ case BLKIF_PROTOCOL_NATIVE:
-+ memcpy(&req, RING_GET_REQUEST(&blk_rings->native, rc), sizeof(req));
-+ break;
-+ case BLKIF_PROTOCOL_X86_32:
-+ blkif_get_x86_32_req(&req, RING_GET_REQUEST(&blk_rings->x86_32, rc));
-+ break;
-+ case BLKIF_PROTOCOL_X86_64:
-+ blkif_get_x86_64_req(&req, RING_GET_REQUEST(&blk_rings->x86_64, rc));
-+ break;
-+ default:
-+ BUG();
-+ }
-+ blk_rings->common.req_cons = ++rc; /* before make_response() */
-+
-+ /* Apply all sanity checks to /private copy/ of request. */
-+ barrier();
-+
-+ switch (req.operation) {
-+ case BLKIF_OP_READ:
-+ blkif->st_rd_req++;
-+ dispatch_rw_block_io(blkif, &req, pending_req);
-+ break;
-+ case BLKIF_OP_WRITE_BARRIER:
-+ blkif->st_br_req++;
-+ /* fall through */
-+ case BLKIF_OP_WRITE:
-+ blkif->st_wr_req++;
-+ dispatch_rw_block_io(blkif, &req, pending_req);
-+ break;
-+ default:
-+ /* A good sign something is wrong: sleep for a while to
-+ * avoid excessive CPU consumption by a bad guest. */
-+ msleep(1);
-+ DPRINTK("error: unknown block io operation [%d]\n",
-+ req.operation);
-+ make_response(blkif, req.id, req.operation,
-+ BLKIF_RSP_ERROR);
-+ free_req(pending_req);
-+ break;
-+ }
-+
-+ /* Yield point for this unbounded loop. */
-+ cond_resched();
-+ }
-+
-+ return more_to_do;
-+}
-+
-+static void dispatch_rw_block_io(blkif_t *blkif,
-+ struct blkif_request *req,
-+ pending_req_t *pending_req)
-+{
-+ struct gnttab_map_grant_ref map[BLKIF_MAX_SEGMENTS_PER_REQUEST];
-+ struct phys_req preq;
-+ struct {
-+ unsigned long buf; unsigned int nsec;
-+ } seg[BLKIF_MAX_SEGMENTS_PER_REQUEST];
-+ unsigned int nseg;
-+ struct bio *bio = NULL;
-+ int ret, i;
-+ int operation;
-+
-+ switch (req->operation) {
-+ case BLKIF_OP_READ:
-+ operation = READ;
-+ break;
-+ case BLKIF_OP_WRITE:
-+ operation = WRITE;
-+ break;
-+ case BLKIF_OP_WRITE_BARRIER:
-+ operation = WRITE_BARRIER;
-+ break;
-+ default:
-+ operation = 0; /* make gcc happy */
-+ BUG();
-+ }
-+
-+ /* Check that number of segments is sane. */
-+ nseg = req->nr_segments;
-+ if (unlikely(nseg == 0 && operation != WRITE_BARRIER) ||
-+ unlikely(nseg > BLKIF_MAX_SEGMENTS_PER_REQUEST)) {
-+ DPRINTK("Bad number of segments in request (%d)\n", nseg);
-+ goto fail_response;
-+ }
-+
-+ preq.dev = req->handle;
-+ preq.sector_number = req->sector_number;
-+ preq.nr_sects = 0;
-+
-+ pending_req->blkif = blkif;
-+ pending_req->id = req->id;
-+ pending_req->operation = req->operation;
-+ pending_req->status = BLKIF_RSP_OKAY;
-+ pending_req->nr_pages = nseg;
-+
-+ for (i = 0; i < nseg; i++) {
-+ uint32_t flags;
-+
-+ seg[i].nsec = req->seg[i].last_sect -
-+ req->seg[i].first_sect + 1;
-+
-+ if ((req->seg[i].last_sect >= (PAGE_SIZE >> 9)) ||
-+ (req->seg[i].last_sect < req->seg[i].first_sect))
-+ goto fail_response;
-+ preq.nr_sects += seg[i].nsec;
-+
-+ flags = GNTMAP_host_map;
-+ if (operation != READ)
-+ flags |= GNTMAP_readonly;
-+ gnttab_set_map_op(&map[i], vaddr(pending_req, i), flags,
-+ req->seg[i].gref, blkif->domid);
-+ }
-+
-+ ret = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, map, nseg);
-+ BUG_ON(ret);
-+
-+ for (i = 0; i < nseg; i++) {
-+ if (unlikely(map[i].status != 0)) {
-+ DPRINTK("invalid buffer -- could not remap it\n");
-+ map[i].handle = BLKBACK_INVALID_HANDLE;
-+ ret |= 1;
-+ continue;
-+ }
-+
-+ set_phys_to_machine(
-+ page_to_pfn(pending_page(pending_req, i)),
-+ FOREIGN_FRAME(map[i].dev_bus_addr >> PAGE_SHIFT));
-+ seg[i].buf = map[i].dev_bus_addr |
-+ (req->seg[i].first_sect << 9);
-+ blkback_pagemap_set(vaddr_pagenr(pending_req, i),
-+ pending_page(pending_req, i),
-+ blkif->domid, req->handle,
-+ req->seg[i].gref);
-+ pending_handle(pending_req, i) = map[i].handle;
-+ }
-+
-+ if (ret)
-+ goto fail_flush;
-+
-+ if (vbd_translate(&preq, blkif, operation) != 0) {
-+ DPRINTK("access denied: %s of [%llu,%llu] on dev=%04x\n",
-+ operation == READ ? "read" : "write",
-+ preq.sector_number,
-+ preq.sector_number + preq.nr_sects, preq.dev);
-+ goto fail_flush;
-+ }
-+
-+ plug_queue(blkif, preq.bdev);
-+ atomic_set(&pending_req->pendcnt, 1);
-+ blkif_get(blkif);
-+
-+ for (i = 0; i < nseg; i++) {
-+ if (((int)preq.sector_number|(int)seg[i].nsec) &
-+ ((bdev_logical_block_size(preq.bdev) >> 9) - 1)) {
-+ DPRINTK("Misaligned I/O request from domain %d",
-+ blkif->domid);
-+ goto fail_put_bio;
-+ }
-+
-+ while ((bio == NULL) ||
-+ (bio_add_page(bio,
-+ pending_page(pending_req, i),
-+ seg[i].nsec << 9,
-+ seg[i].buf & ~PAGE_MASK) == 0)) {
-+ if (bio) {
-+ atomic_inc(&pending_req->pendcnt);
-+ submit_bio(operation, bio);
-+ }
-+
-+ bio = bio_alloc(GFP_KERNEL, nseg-i);
-+ if (unlikely(bio == NULL))
-+ goto fail_put_bio;
-+
-+ bio->bi_bdev = preq.bdev;
-+ bio->bi_private = pending_req;
-+ bio->bi_end_io = end_block_io_op;
-+ bio->bi_sector = preq.sector_number;
-+ }
-+
-+ preq.sector_number += seg[i].nsec;
-+ }
-+
-+ if (!bio) {
-+ BUG_ON(operation != WRITE_BARRIER);
-+ bio = bio_alloc(GFP_KERNEL, 0);
-+ if (unlikely(bio == NULL))
-+ goto fail_put_bio;
-+
-+ bio->bi_bdev = preq.bdev;
-+ bio->bi_private = pending_req;
-+ bio->bi_end_io = end_block_io_op;
-+ bio->bi_sector = -1;
-+ }
-+
-+ submit_bio(operation, bio);
-+
-+ if (operation == READ)
-+ blkif->st_rd_sect += preq.nr_sects;
-+ else if (operation == WRITE || operation == WRITE_BARRIER)
-+ blkif->st_wr_sect += preq.nr_sects;
-+
-+ return;
-+
-+ fail_flush:
-+ fast_flush_area(pending_req);
-+ fail_response:
-+ make_response(blkif, req->id, req->operation, BLKIF_RSP_ERROR);
-+ free_req(pending_req);
-+ msleep(1); /* back off a bit */
-+ return;
-+
-+ fail_put_bio:
-+ __end_block_io_op(pending_req, -EINVAL);
-+ if (bio)
-+ bio_put(bio);
-+ unplug_queue(blkif);
-+ msleep(1); /* back off a bit */
-+ return;
-+}
-+
-+
-+
-+/******************************************************************
-+ * MISCELLANEOUS SETUP / TEARDOWN / DEBUGGING
-+ */
-+
-+
-+static void make_response(blkif_t *blkif, u64 id,
-+ unsigned short op, int st)
-+{
-+ struct blkif_response resp;
-+ unsigned long flags;
-+ union blkif_back_rings *blk_rings = &blkif->blk_rings;
-+ int more_to_do = 0;
-+ int notify;
-+
-+ resp.id = id;
-+ resp.operation = op;
-+ resp.status = st;
-+
-+ spin_lock_irqsave(&blkif->blk_ring_lock, flags);
-+ /* Place on the response ring for the relevant domain. */
-+ switch (blkif->blk_protocol) {
-+ case BLKIF_PROTOCOL_NATIVE:
-+ memcpy(RING_GET_RESPONSE(&blk_rings->native, blk_rings->native.rsp_prod_pvt),
-+ &resp, sizeof(resp));
-+ break;
-+ case BLKIF_PROTOCOL_X86_32:
-+ memcpy(RING_GET_RESPONSE(&blk_rings->x86_32, blk_rings->x86_32.rsp_prod_pvt),
-+ &resp, sizeof(resp));
-+ break;
-+ case BLKIF_PROTOCOL_X86_64:
-+ memcpy(RING_GET_RESPONSE(&blk_rings->x86_64, blk_rings->x86_64.rsp_prod_pvt),
-+ &resp, sizeof(resp));
-+ break;
-+ default:
-+ BUG();
-+ }
-+ blk_rings->common.rsp_prod_pvt++;
-+ RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&blk_rings->common, notify);
-+ if (blk_rings->common.rsp_prod_pvt == blk_rings->common.req_cons) {
-+ /*
-+ * Tail check for pending requests. Allows frontend to avoid
-+ * notifications if requests are already in flight (lower
-+ * overheads and promotes batching).
-+ */
-+ RING_FINAL_CHECK_FOR_REQUESTS(&blk_rings->common, more_to_do);
-+
-+ } else if (RING_HAS_UNCONSUMED_REQUESTS(&blk_rings->common)) {
-+ more_to_do = 1;
-+ }
-+
-+ spin_unlock_irqrestore(&blkif->blk_ring_lock, flags);
-+
-+ if (more_to_do)
-+ blkif_notify_work(blkif);
-+ if (notify)
-+ notify_remote_via_irq(blkif->irq);
-+}
-+
-+static int __init blkif_init(void)
-+{
-+ int i, mmap_pages;
-+ int rc = 0;
-+
-+ if (!xen_pv_domain())
-+ return -ENODEV;
-+
-+ mmap_pages = blkif_reqs * BLKIF_MAX_SEGMENTS_PER_REQUEST;
-+
-+ pending_reqs = kmalloc(sizeof(pending_reqs[0]) *
-+ blkif_reqs, GFP_KERNEL);
-+ pending_grant_handles = kmalloc(sizeof(pending_grant_handles[0]) *
-+ mmap_pages, GFP_KERNEL);
-+ pending_pages = alloc_empty_pages_and_pagevec(mmap_pages);
-+
-+ if (blkback_pagemap_init(mmap_pages))
-+ goto out_of_memory;
-+
-+ if (!pending_reqs || !pending_grant_handles || !pending_pages) {
-+ rc = -ENOMEM;
-+ goto out_of_memory;
-+ }
-+
-+ for (i = 0; i < mmap_pages; i++)
-+ pending_grant_handles[i] = BLKBACK_INVALID_HANDLE;
-+
-+ rc = blkif_interface_init();
-+ if (rc)
-+ goto failed_init;
-+
-+ memset(pending_reqs, 0, sizeof(pending_reqs));
-+ INIT_LIST_HEAD(&pending_free);
-+
-+ for (i = 0; i < blkif_reqs; i++)
-+ list_add_tail(&pending_reqs[i].free_list, &pending_free);
-+
-+ rc = blkif_xenbus_init();
-+ if (rc)
-+ goto failed_init;
-+
-+ return 0;
-+
-+ out_of_memory:
-+ printk(KERN_ERR "%s: out of memory\n", __func__);
-+ failed_init:
-+ kfree(pending_reqs);
-+ kfree(pending_grant_handles);
-+ free_empty_pages_and_pagevec(pending_pages, mmap_pages);
-+ return rc;
-+}
-+
-+module_init(blkif_init);
-+
-+MODULE_LICENSE("Dual BSD/GPL");
-diff --git a/drivers/xen/blkback/common.h b/drivers/xen/blkback/common.h
-new file mode 100644
-index 0000000..531ba81
---- /dev/null
-+++ b/drivers/xen/blkback/common.h
-@@ -0,0 +1,143 @@
-+/*
-+ * This program is free software; you can redistribute it and/or
-+ * modify it under the terms of the GNU General Public License version 2
-+ * as published by the Free Software Foundation; or, when distributed
-+ * separately from the Linux kernel or incorporated into other
-+ * software packages, subject to the following license:
-+ *
-+ * Permission is hereby granted, free of charge, to any person obtaining a copy
-+ * of this source file (the "Software"), to deal in the Software without
-+ * restriction, including without limitation the rights to use, copy, modify,
-+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
-+ * and to permit persons to whom the Software is furnished to do so, subject to
-+ * the following conditions:
-+ *
-+ * The above copyright notice and this permission notice shall be included in
-+ * all copies or substantial portions of the Software.
-+ *
-+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-+ * IN THE SOFTWARE.
-+ */
-+
-+#ifndef __BLKIF__BACKEND__COMMON_H__
-+#define __BLKIF__BACKEND__COMMON_H__
-+
-+#include <linux/version.h>
-+#include <linux/module.h>
-+#include <linux/interrupt.h>
-+#include <linux/slab.h>
-+#include <linux/blkdev.h>
-+#include <linux/vmalloc.h>
-+#include <linux/wait.h>
-+#include <asm/io.h>
-+#include <asm/setup.h>
-+#include <asm/pgalloc.h>
-+#include <asm/hypervisor.h>
-+#include <xen/blkif.h>
-+#include <xen/grant_table.h>
-+#include <xen/xenbus.h>
-+#include "blkback-pagemap.h"
-+
-+
-+#define DPRINTK(_f, _a...) \
-+ pr_debug("(file=%s, line=%d) " _f, \
-+ __FILE__ , __LINE__ , ## _a )
-+
-+struct vbd {
-+ blkif_vdev_t handle; /* what the domain refers to this vbd as */
-+ unsigned char readonly; /* Non-zero -> read-only */
-+ unsigned char type; /* VDISK_xxx */
-+ u32 pdevice; /* phys device that this vbd maps to */
-+ struct block_device *bdev;
-+ sector_t size; /* Cached size parameter */
-+};
-+
-+struct backend_info;
-+
-+typedef struct blkif_st {
-+ /* Unique identifier for this interface. */
-+ domid_t domid;
-+ unsigned int handle;
-+ /* Physical parameters of the comms window. */
-+ unsigned int irq;
-+ /* Comms information. */
-+ enum blkif_protocol blk_protocol;
-+ union blkif_back_rings blk_rings;
-+ struct vm_struct *blk_ring_area;
-+ /* The VBD attached to this interface. */
-+ struct vbd vbd;
-+ /* Back pointer to the backend_info. */
-+ struct backend_info *be;
-+ /* Private fields. */
-+ spinlock_t blk_ring_lock;
-+ atomic_t refcnt;
-+
-+ wait_queue_head_t wq;
-+ struct task_struct *xenblkd;
-+ unsigned int waiting_reqs;
-+ struct request_queue *plug;
-+
-+ /* statistics */
-+ unsigned long st_print;
-+ int st_rd_req;
-+ int st_wr_req;
-+ int st_oo_req;
-+ int st_br_req;
-+ int st_rd_sect;
-+ int st_wr_sect;
-+
-+ wait_queue_head_t waiting_to_free;
-+
-+ grant_handle_t shmem_handle;
-+ grant_ref_t shmem_ref;
-+} blkif_t;
-+
-+blkif_t *blkif_alloc(domid_t domid);
-+void blkif_disconnect(blkif_t *blkif);
-+void blkif_free(blkif_t *blkif);
-+int blkif_map(blkif_t *blkif, unsigned long shared_page, unsigned int evtchn);
-+void vbd_resize(blkif_t *blkif);
-+
-+#define blkif_get(_b) (atomic_inc(&(_b)->refcnt))
-+#define blkif_put(_b) \
-+ do { \
-+ if (atomic_dec_and_test(&(_b)->refcnt)) \
-+ wake_up(&(_b)->waiting_to_free);\
-+ } while (0)
-+
-+/* Create a vbd. */
-+int vbd_create(blkif_t *blkif, blkif_vdev_t vdevice, unsigned major,
-+ unsigned minor, int readonly, int cdrom);
-+void vbd_free(struct vbd *vbd);
-+
-+unsigned long long vbd_size(struct vbd *vbd);
-+unsigned int vbd_info(struct vbd *vbd);
-+unsigned long vbd_secsize(struct vbd *vbd);
-+
-+struct phys_req {
-+ unsigned short dev;
-+ unsigned short nr_sects;
-+ struct block_device *bdev;
-+ blkif_sector_t sector_number;
-+};
-+
-+int vbd_translate(struct phys_req *req, blkif_t *blkif, int operation);
-+
-+int blkif_interface_init(void);
-+
-+int blkif_xenbus_init(void);
-+
-+irqreturn_t blkif_be_int(int irq, void *dev_id);
-+int blkif_schedule(void *arg);
-+
-+int blkback_barrier(struct xenbus_transaction xbt,
-+ struct backend_info *be, int state);
-+
-+struct xenbus_device *blkback_xenbus(struct backend_info *be);
-+
-+#endif /* __BLKIF__BACKEND__COMMON_H__ */
-diff --git a/drivers/xen/blkback/interface.c b/drivers/xen/blkback/interface.c
-new file mode 100644
-index 0000000..e397a41
---- /dev/null
-+++ b/drivers/xen/blkback/interface.c
-@@ -0,0 +1,186 @@
-+/******************************************************************************
-+ * arch/xen/drivers/blkif/backend/interface.c
-+ *
-+ * Block-device interface management.
-+ *
-+ * Copyright (c) 2004, Keir Fraser
-+ *
-+ * This program is free software; you can redistribute it and/or
-+ * modify it under the terms of the GNU General Public License version 2
-+ * as published by the Free Software Foundation; or, when distributed
-+ * separately from the Linux kernel or incorporated into other
-+ * software packages, subject to the following license:
-+ *
-+ * Permission is hereby granted, free of charge, to any person obtaining a copy
-+ * of this source file (the "Software"), to deal in the Software without
-+ * restriction, including without limitation the rights to use, copy, modify,
-+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
-+ * and to permit persons to whom the Software is furnished to do so, subject to
-+ * the following conditions:
-+ *
-+ * The above copyright notice and this permission notice shall be included in
-+ * all copies or substantial portions of the Software.
-+ *
-+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-+ * IN THE SOFTWARE.
-+ */
-+
-+#include "common.h"
-+#include <xen/events.h>
-+#include <xen/grant_table.h>
-+#include <linux/kthread.h>
-+
-+static struct kmem_cache *blkif_cachep;
-+
-+blkif_t *blkif_alloc(domid_t domid)
-+{
-+ blkif_t *blkif;
-+
-+ blkif = kmem_cache_alloc(blkif_cachep, GFP_KERNEL);
-+ if (!blkif)
-+ return ERR_PTR(-ENOMEM);
-+
-+ memset(blkif, 0, sizeof(*blkif));
-+ blkif->domid = domid;
-+ spin_lock_init(&blkif->blk_ring_lock);
-+ atomic_set(&blkif->refcnt, 1);
-+ init_waitqueue_head(&blkif->wq);
-+ blkif->st_print = jiffies;
-+ init_waitqueue_head(&blkif->waiting_to_free);
-+
-+ return blkif;
-+}
-+
-+static int map_frontend_page(blkif_t *blkif, unsigned long shared_page)
-+{
-+ struct gnttab_map_grant_ref op;
-+
-+ gnttab_set_map_op(&op, (unsigned long)blkif->blk_ring_area->addr,
-+ GNTMAP_host_map, shared_page, blkif->domid);
-+
-+ if (HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1))
-+ BUG();
-+
-+ if (op.status) {
-+ DPRINTK(" Grant table operation failure !\n");
-+ return op.status;
-+ }
-+
-+ blkif->shmem_ref = shared_page;
-+ blkif->shmem_handle = op.handle;
-+
-+ return 0;
-+}
-+
-+static void unmap_frontend_page(blkif_t *blkif)
-+{
-+ struct gnttab_unmap_grant_ref op;
-+
-+ gnttab_set_unmap_op(&op, (unsigned long)blkif->blk_ring_area->addr,
-+ GNTMAP_host_map, blkif->shmem_handle);
-+
-+ if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1))
-+ BUG();
-+}
-+
-+int blkif_map(blkif_t *blkif, unsigned long shared_page, unsigned int evtchn)
-+{
-+ int err;
-+
-+ /* Already connected through? */
-+ if (blkif->irq)
-+ return 0;
-+
-+ if ( (blkif->blk_ring_area = alloc_vm_area(PAGE_SIZE)) == NULL )
-+ return -ENOMEM;
-+
-+ err = map_frontend_page(blkif, shared_page);
-+ if (err) {
-+ free_vm_area(blkif->blk_ring_area);
-+ return err;
-+ }
-+
-+ switch (blkif->blk_protocol) {
-+ case BLKIF_PROTOCOL_NATIVE:
-+ {
-+ struct blkif_sring *sring;
-+ sring = (struct blkif_sring *)blkif->blk_ring_area->addr;
-+ BACK_RING_INIT(&blkif->blk_rings.native, sring, PAGE_SIZE);
-+ break;
-+ }
-+ case BLKIF_PROTOCOL_X86_32:
-+ {
-+ struct blkif_x86_32_sring *sring_x86_32;
-+ sring_x86_32 = (struct blkif_x86_32_sring *)blkif->blk_ring_area->addr;
-+ BACK_RING_INIT(&blkif->blk_rings.x86_32, sring_x86_32, PAGE_SIZE);
-+ break;
-+ }
-+ case BLKIF_PROTOCOL_X86_64:
-+ {
-+ struct blkif_x86_64_sring *sring_x86_64;
-+ sring_x86_64 = (struct blkif_x86_64_sring *)blkif->blk_ring_area->addr;
-+ BACK_RING_INIT(&blkif->blk_rings.x86_64, sring_x86_64, PAGE_SIZE);
-+ break;
-+ }
-+ default:
-+ BUG();
-+ }
-+
-+ err = bind_interdomain_evtchn_to_irqhandler(
-+ blkif->domid, evtchn, blkif_be_int, 0, "blkif-backend", blkif);
-+ if (err < 0)
-+ {
-+ unmap_frontend_page(blkif);
-+ free_vm_area(blkif->blk_ring_area);
-+ blkif->blk_rings.common.sring = NULL;
-+ return err;
-+ }
-+ blkif->irq = err;
-+
-+ return 0;
-+}
-+
-+void blkif_disconnect(blkif_t *blkif)
-+{
-+ if (blkif->xenblkd) {
-+ kthread_stop(blkif->xenblkd);
-+ blkif->xenblkd = NULL;
-+ }
-+
-+ atomic_dec(&blkif->refcnt);
-+ wait_event(blkif->waiting_to_free, atomic_read(&blkif->refcnt) == 0);
-+ atomic_inc(&blkif->refcnt);
-+
-+ if (blkif->irq) {
-+ unbind_from_irqhandler(blkif->irq, blkif);
-+ blkif->irq = 0;
-+ }
-+
-+ if (blkif->blk_rings.common.sring) {
-+ unmap_frontend_page(blkif);
-+ free_vm_area(blkif->blk_ring_area);
-+ blkif->blk_rings.common.sring = NULL;
-+ }
-+}
-+
-+void blkif_free(blkif_t *blkif)
-+{
-+ if (!atomic_dec_and_test(&blkif->refcnt))
-+ BUG();
-+ kmem_cache_free(blkif_cachep, blkif);
-+}
-+
-+int __init blkif_interface_init(void)
-+{
-+ blkif_cachep = kmem_cache_create("blkif_cache", sizeof(blkif_t),
-+ 0, 0, NULL);
-+ if (!blkif_cachep)
-+ return -ENOMEM;
-+
-+ return 0;
-+}
-diff --git a/drivers/xen/blkback/vbd.c b/drivers/xen/blkback/vbd.c
-new file mode 100644
-index 0000000..943ec23
---- /dev/null
-+++ b/drivers/xen/blkback/vbd.c
-@@ -0,0 +1,161 @@
-+/******************************************************************************
-+ * blkback/vbd.c
-+ *
-+ * Routines for managing virtual block devices (VBDs).
-+ *
-+ * Copyright (c) 2003-2005, Keir Fraser & Steve Hand
-+ *
-+ * This program is free software; you can redistribute it and/or
-+ * modify it under the terms of the GNU General Public License version 2
-+ * as published by the Free Software Foundation; or, when distributed
-+ * separately from the Linux kernel or incorporated into other
-+ * software packages, subject to the following license:
-+ *
-+ * Permission is hereby granted, free of charge, to any person obtaining a copy
-+ * of this source file (the "Software"), to deal in the Software without
-+ * restriction, including without limitation the rights to use, copy, modify,
-+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
-+ * and to permit persons to whom the Software is furnished to do so, subject to
-+ * the following conditions:
-+ *
-+ * The above copyright notice and this permission notice shall be included in
-+ * all copies or substantial portions of the Software.
-+ *
-+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-+ * IN THE SOFTWARE.
-+ */
-+
-+#include "common.h"
-+
-+#define vbd_sz(_v) ((_v)->bdev->bd_part ? \
-+ (_v)->bdev->bd_part->nr_sects : get_capacity((_v)->bdev->bd_disk))
-+
-+unsigned long long vbd_size(struct vbd *vbd)
-+{
-+ return vbd_sz(vbd);
-+}
-+
-+unsigned int vbd_info(struct vbd *vbd)
-+{
-+ return vbd->type | (vbd->readonly?VDISK_READONLY:0);
-+}
-+
-+unsigned long vbd_secsize(struct vbd *vbd)
-+{
-+ return bdev_logical_block_size(vbd->bdev);
-+}
-+
-+int vbd_create(blkif_t *blkif, blkif_vdev_t handle, unsigned major,
-+ unsigned minor, int readonly, int cdrom)
-+{
-+ struct vbd *vbd;
-+ struct block_device *bdev;
-+
-+ vbd = &blkif->vbd;
-+ vbd->handle = handle;
-+ vbd->readonly = readonly;
-+ vbd->type = 0;
-+
-+ vbd->pdevice = MKDEV(major, minor);
-+
-+ bdev = open_by_devnum(vbd->pdevice,
-+ vbd->readonly ? FMODE_READ : FMODE_WRITE);
-+
-+ if (IS_ERR(bdev)) {
-+ DPRINTK("vbd_creat: device %08x could not be opened.\n",
-+ vbd->pdevice);
-+ return -ENOENT;
-+ }
-+
-+ vbd->bdev = bdev;
-+ vbd->size = vbd_size(vbd);
-+
-+ if (vbd->bdev->bd_disk == NULL) {
-+ DPRINTK("vbd_creat: device %08x doesn't exist.\n",
-+ vbd->pdevice);
-+ vbd_free(vbd);
-+ return -ENOENT;
-+ }
-+
-+ if (vbd->bdev->bd_disk->flags & GENHD_FL_CD || cdrom)
-+ vbd->type |= VDISK_CDROM;
-+ if (vbd->bdev->bd_disk->flags & GENHD_FL_REMOVABLE)
-+ vbd->type |= VDISK_REMOVABLE;
-+
-+ DPRINTK("Successful creation of handle=%04x (dom=%u)\n",
-+ handle, blkif->domid);
-+ return 0;
-+}
-+
-+void vbd_free(struct vbd *vbd)
-+{
-+ if (vbd->bdev)
-+ blkdev_put(vbd->bdev, vbd->readonly ? FMODE_READ : FMODE_WRITE);
-+ vbd->bdev = NULL;
-+}
-+
-+int vbd_translate(struct phys_req *req, blkif_t *blkif, int operation)
-+{
-+ struct vbd *vbd = &blkif->vbd;
-+ int rc = -EACCES;
-+
-+ if ((operation != READ) && vbd->readonly)
-+ goto out;
-+
-+ if (unlikely((req->sector_number + req->nr_sects) > vbd_sz(vbd)))
-+ goto out;
-+
-+ req->dev = vbd->pdevice;
-+ req->bdev = vbd->bdev;
-+ rc = 0;
-+
-+ out:
-+ return rc;
-+}
-+
-+void vbd_resize(blkif_t *blkif)
-+{
-+ struct vbd *vbd = &blkif->vbd;
-+ struct xenbus_transaction xbt;
-+ int err;
-+ struct xenbus_device *dev = blkback_xenbus(blkif->be);
-+ unsigned long long new_size = vbd_size(vbd);
-+
-+ printk(KERN_INFO "VBD Resize: new size %Lu\n", new_size);
-+ vbd->size = new_size;
-+again:
-+ err = xenbus_transaction_start(&xbt);
-+ if (err) {
-+ printk(KERN_WARNING "Error starting transaction");
-+ return;
-+ }
-+ err = xenbus_printf(xbt, dev->nodename, "sectors", "%Lu",
-+ vbd_size(vbd));
-+ if (err) {
-+ printk(KERN_WARNING "Error writing new size");
-+ goto abort;
-+ }
-+ /*
-+ * Write the current state; we will use this to synchronize
-+ * the front-end. If the current state is "connected" the
-+ * front-end will get the new size information online.
-+ */
-+ err = xenbus_printf(xbt, dev->nodename, "state", "%d", dev->state);
-+ if (err) {
-+ printk(KERN_WARNING "Error writing the state");
-+ goto abort;
-+ }
-+
-+ err = xenbus_transaction_end(xbt, 0);
-+ if (err == -EAGAIN)
-+ goto again;
-+ if (err)
-+ printk(KERN_WARNING "Error ending transaction");
-+abort:
-+ xenbus_transaction_end(xbt, 1);
-+}
-diff --git a/drivers/xen/blkback/xenbus.c b/drivers/xen/blkback/xenbus.c
-new file mode 100644
-index 0000000..a0534fc
---- /dev/null
-+++ b/drivers/xen/blkback/xenbus.c
-@@ -0,0 +1,553 @@
-+/* Xenbus code for blkif backend
-+ Copyright (C) 2005 Rusty Russell <rusty@rustcorp.com.au>
-+ Copyright (C) 2005 XenSource Ltd
-+
-+ This program is free software; you can redistribute it and/or modify
-+ it under the terms of the GNU General Public License as published by
-+ the Free Software Foundation; either version 2 of the License, or
-+ (at your option) any later version.
-+
-+ This program is distributed in the hope that it will be useful,
-+ but WITHOUT ANY WARRANTY; without even the implied warranty of
-+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-+ GNU General Public License for more details.
-+
-+ You should have received a copy of the GNU General Public License
-+ along with this program; if not, write to the Free Software
-+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
-+*/
-+
-+#include <stdarg.h>
-+#include <linux/module.h>
-+#include <linux/kthread.h>
-+#include "common.h"
-+
-+#undef DPRINTK
-+#define DPRINTK(fmt, args...) \
-+ pr_debug("blkback/xenbus (%s:%d) " fmt ".\n", \
-+ __FUNCTION__, __LINE__, ##args)
-+
-+struct backend_info
-+{
-+ struct xenbus_device *dev;
-+ blkif_t *blkif;
-+ struct xenbus_watch backend_watch;
-+ unsigned major;
-+ unsigned minor;
-+ char *mode;
-+};
-+
-+static void connect(struct backend_info *);
-+static int connect_ring(struct backend_info *);
-+static void backend_changed(struct xenbus_watch *, const char **,
-+ unsigned int);
-+
-+struct xenbus_device *blkback_xenbus(struct backend_info *be)
-+{
-+ return be->dev;
-+}
-+
-+static int blkback_name(blkif_t *blkif, char *buf)
-+{
-+ char *devpath, *devname;
-+ struct xenbus_device *dev = blkif->be->dev;
-+
-+ devpath = xenbus_read(XBT_NIL, dev->nodename, "dev", NULL);
-+ if (IS_ERR(devpath))
-+ return PTR_ERR(devpath);
-+
-+ if ((devname = strstr(devpath, "/dev/")) != NULL)
-+ devname += strlen("/dev/");
-+ else
-+ devname = devpath;
-+
-+ snprintf(buf, TASK_COMM_LEN, "blkback.%d.%s", blkif->domid, devname);
-+ kfree(devpath);
-+
-+ return 0;
-+}
-+
-+static void update_blkif_status(blkif_t *blkif)
-+{
-+ int err;
-+ char name[TASK_COMM_LEN];
-+
-+ /* Not ready to connect? */
-+ if (!blkif->irq || !blkif->vbd.bdev)
-+ return;
-+
-+ /* Already connected? */
-+ if (blkif->be->dev->state == XenbusStateConnected)
-+ return;
-+
-+ /* Attempt to connect: exit if we fail to. */
-+ connect(blkif->be);
-+ if (blkif->be->dev->state != XenbusStateConnected)
-+ return;
-+
-+ err = blkback_name(blkif, name);
-+ if (err) {
-+ xenbus_dev_error(blkif->be->dev, err, "get blkback dev name");
-+ return;
-+ }
-+
-+ err = filemap_write_and_wait(blkif->vbd.bdev->bd_inode->i_mapping);
-+ if (err) {
-+ xenbus_dev_error(blkif->be->dev, err, "block flush");
-+ return;
-+ }
-+ invalidate_inode_pages2(blkif->vbd.bdev->bd_inode->i_mapping);
-+
-+ blkif->xenblkd = kthread_run(blkif_schedule, blkif, name);
-+ if (IS_ERR(blkif->xenblkd)) {
-+ err = PTR_ERR(blkif->xenblkd);
-+ blkif->xenblkd = NULL;
-+ xenbus_dev_error(blkif->be->dev, err, "start xenblkd");
-+ }
-+}
-+
-+
-+/****************************************************************
-+ * sysfs interface for VBD I/O requests
-+ */
-+
-+#define VBD_SHOW(name, format, args...) \
-+ static ssize_t show_##name(struct device *_dev, \
-+ struct device_attribute *attr, \
-+ char *buf) \
-+ { \
-+ struct xenbus_device *dev = to_xenbus_device(_dev); \
-+ struct backend_info *be = dev_get_drvdata(&dev->dev); \
-+ \
-+ return sprintf(buf, format, ##args); \
-+ } \
-+ static DEVICE_ATTR(name, S_IRUGO, show_##name, NULL)
-+
-+VBD_SHOW(oo_req, "%d\n", be->blkif->st_oo_req);
-+VBD_SHOW(rd_req, "%d\n", be->blkif->st_rd_req);
-+VBD_SHOW(wr_req, "%d\n", be->blkif->st_wr_req);
-+VBD_SHOW(br_req, "%d\n", be->blkif->st_br_req);
-+VBD_SHOW(rd_sect, "%d\n", be->blkif->st_rd_sect);
-+VBD_SHOW(wr_sect, "%d\n", be->blkif->st_wr_sect);
-+
-+static struct attribute *vbdstat_attrs[] = {
-+ &dev_attr_oo_req.attr,
-+ &dev_attr_rd_req.attr,
-+ &dev_attr_wr_req.attr,
-+ &dev_attr_br_req.attr,
-+ &dev_attr_rd_sect.attr,
-+ &dev_attr_wr_sect.attr,
-+ NULL
-+};
-+
-+static struct attribute_group vbdstat_group = {
-+ .name = "statistics",
-+ .attrs = vbdstat_attrs,
-+};
-+
-+VBD_SHOW(physical_device, "%x:%x\n", be->major, be->minor);
-+VBD_SHOW(mode, "%s\n", be->mode);
-+
-+int xenvbd_sysfs_addif(struct xenbus_device *dev)
-+{
-+ int error;
-+
-+ error = device_create_file(&dev->dev, &dev_attr_physical_device);
-+ if (error)
-+ goto fail1;
-+
-+ error = device_create_file(&dev->dev, &dev_attr_mode);
-+ if (error)
-+ goto fail2;
-+
-+ error = sysfs_create_group(&dev->dev.kobj, &vbdstat_group);
-+ if (error)
-+ goto fail3;
-+
-+ return 0;
-+
-+fail3: sysfs_remove_group(&dev->dev.kobj, &vbdstat_group);
-+fail2: device_remove_file(&dev->dev, &dev_attr_mode);
-+fail1: device_remove_file(&dev->dev, &dev_attr_physical_device);
-+ return error;
-+}
-+
-+void xenvbd_sysfs_delif(struct xenbus_device *dev)
-+{
-+ sysfs_remove_group(&dev->dev.kobj, &vbdstat_group);
-+ device_remove_file(&dev->dev, &dev_attr_mode);
-+ device_remove_file(&dev->dev, &dev_attr_physical_device);
-+}
-+
-+static int blkback_remove(struct xenbus_device *dev)
-+{
-+ struct backend_info *be = dev_get_drvdata(&dev->dev);
-+
-+ DPRINTK("");
-+
-+ if (be->major || be->minor)
-+ xenvbd_sysfs_delif(dev);
-+
-+ if (be->backend_watch.node) {
-+ unregister_xenbus_watch(&be->backend_watch);
-+ kfree(be->backend_watch.node);
-+ be->backend_watch.node = NULL;
-+ }
-+
-+ if (be->blkif) {
-+ blkif_disconnect(be->blkif);
-+ vbd_free(&be->blkif->vbd);
-+ blkif_free(be->blkif);
-+ be->blkif = NULL;
-+ }
-+
-+ kfree(be);
-+ dev_set_drvdata(&dev->dev, NULL);
-+ return 0;
-+}
-+
-+int blkback_barrier(struct xenbus_transaction xbt,
-+ struct backend_info *be, int state)
-+{
-+ struct xenbus_device *dev = be->dev;
-+ int err;
-+
-+ err = xenbus_printf(xbt, dev->nodename, "feature-barrier",
-+ "%d", state);
-+ if (err)
-+ xenbus_dev_fatal(dev, err, "writing feature-barrier");
-+
-+ return err;
-+}
-+
-+/**
-+ * Entry point to this code when a new device is created. Allocate the basic
-+ * structures, and watch the store waiting for the hotplug scripts to tell us
-+ * the device's physical major and minor numbers. Switch to InitWait.
-+ */
-+static int blkback_probe(struct xenbus_device *dev,
-+ const struct xenbus_device_id *id)
-+{
-+ int err;
-+ struct backend_info *be = kzalloc(sizeof(struct backend_info),
-+ GFP_KERNEL);
-+ if (!be) {
-+ xenbus_dev_fatal(dev, -ENOMEM,
-+ "allocating backend structure");
-+ return -ENOMEM;
-+ }
-+ be->dev = dev;
-+ dev_set_drvdata(&dev->dev, be);
-+
-+ be->blkif = blkif_alloc(dev->otherend_id);
-+ if (IS_ERR(be->blkif)) {
-+ err = PTR_ERR(be->blkif);
-+ be->blkif = NULL;
-+ xenbus_dev_fatal(dev, err, "creating block interface");
-+ goto fail;
-+ }
-+
-+ /* setup back pointer */
-+ be->blkif->be = be;
-+
-+ err = xenbus_watch_pathfmt(dev, &be->backend_watch, backend_changed,
-+ "%s/%s", dev->nodename, "physical-device");
-+ if (err)
-+ goto fail;
-+
-+ err = xenbus_switch_state(dev, XenbusStateInitWait);
-+ if (err)
-+ goto fail;
-+
-+ return 0;
-+
-+fail:
-+ DPRINTK("failed");
-+ blkback_remove(dev);
-+ return err;
-+}
-+
-+
-+/**
-+ * Callback received when the hotplug scripts have placed the physical-device
-+ * node. Read it and the mode node, and create a vbd. If the frontend is
-+ * ready, connect.
-+ */
-+static void backend_changed(struct xenbus_watch *watch,
-+ const char **vec, unsigned int len)
-+{
-+ int err;
-+ unsigned major;
-+ unsigned minor;
-+ struct backend_info *be
-+ = container_of(watch, struct backend_info, backend_watch);
-+ struct xenbus_device *dev = be->dev;
-+ int cdrom = 0;
-+ char *device_type;
-+
-+ DPRINTK("");
-+
-+ err = xenbus_scanf(XBT_NIL, dev->nodename, "physical-device", "%x:%x",
-+ &major, &minor);
-+ if (XENBUS_EXIST_ERR(err)) {
-+ /* Since this watch will fire once immediately after it is
-+ registered, we expect this. Ignore it, and wait for the
-+ hotplug scripts. */
-+ return;
-+ }
-+ if (err != 2) {
-+ xenbus_dev_fatal(dev, err, "reading physical-device");
-+ return;
-+ }
-+
-+ if ((be->major || be->minor) &&
-+ ((be->major != major) || (be->minor != minor))) {
-+ printk(KERN_WARNING
-+ "blkback: changing physical device (from %x:%x to "
-+ "%x:%x) not supported.\n", be->major, be->minor,
-+ major, minor);
-+ return;
-+ }
-+
-+ be->mode = xenbus_read(XBT_NIL, dev->nodename, "mode", NULL);
-+ if (IS_ERR(be->mode)) {
-+ err = PTR_ERR(be->mode);
-+ be->mode = NULL;
-+ xenbus_dev_fatal(dev, err, "reading mode");
-+ return;
-+ }
-+
-+ device_type = xenbus_read(XBT_NIL, dev->otherend, "device-type", NULL);
-+ if (!IS_ERR(device_type)) {
-+ cdrom = strcmp(device_type, "cdrom") == 0;
-+ kfree(device_type);
-+ }
-+
-+ if (be->major == 0 && be->minor == 0) {
-+ /* Front end dir is a number, which is used as the handle. */
-+
-+ char *p = strrchr(dev->otherend, '/') + 1;
-+ long handle = simple_strtoul(p, NULL, 0);
-+
-+ be->major = major;
-+ be->minor = minor;
-+
-+ err = vbd_create(be->blkif, handle, major, minor,
-+ (NULL == strchr(be->mode, 'w')), cdrom);
-+ if (err) {
-+ be->major = be->minor = 0;
-+ xenbus_dev_fatal(dev, err, "creating vbd structure");
-+ return;
-+ }
-+
-+ err = xenvbd_sysfs_addif(dev);
-+ if (err) {
-+ vbd_free(&be->blkif->vbd);
-+ be->major = be->minor = 0;
-+ xenbus_dev_fatal(dev, err, "creating sysfs entries");
-+ return;
-+ }
-+
-+ /* We're potentially connected now */
-+ update_blkif_status(be->blkif);
-+ }
-+}
-+
-+
-+/**
-+ * Callback received when the frontend's state changes.
-+ */
-+static void frontend_changed(struct xenbus_device *dev,
-+ enum xenbus_state frontend_state)
-+{
-+ struct backend_info *be = dev_get_drvdata(&dev->dev);
-+ int err;
-+
-+ DPRINTK("%s", xenbus_strstate(frontend_state));
-+
-+ switch (frontend_state) {
-+ case XenbusStateInitialising:
-+ if (dev->state == XenbusStateClosed) {
-+ printk(KERN_INFO "%s: %s: prepare for reconnect\n",
-+ __FUNCTION__, dev->nodename);
-+ xenbus_switch_state(dev, XenbusStateInitWait);
-+ }
-+ break;
-+
-+ case XenbusStateInitialised:
-+ case XenbusStateConnected:
-+ /* Ensure we connect even when two watches fire in
-+ close successsion and we miss the intermediate value
-+ of frontend_state. */
-+ if (dev->state == XenbusStateConnected)
-+ break;
-+
-+ err = connect_ring(be);
-+ if (err)
-+ break;
-+ update_blkif_status(be->blkif);
-+ break;
-+
-+ case XenbusStateClosing:
-+ blkif_disconnect(be->blkif);
-+ xenbus_switch_state(dev, XenbusStateClosing);
-+ break;
-+
-+ case XenbusStateClosed:
-+ xenbus_switch_state(dev, XenbusStateClosed);
-+ if (xenbus_dev_is_online(dev))
-+ break;
-+ /* fall through if not online */
-+ case XenbusStateUnknown:
-+ device_unregister(&dev->dev);
-+ break;
-+
-+ default:
-+ xenbus_dev_fatal(dev, -EINVAL, "saw state %d at frontend",
-+ frontend_state);
-+ break;
-+ }
-+}
-+
-+
-+/* ** Connection ** */
-+
-+
-+/**
-+ * Write the physical details regarding the block device to the store, and
-+ * switch to Connected state.
-+ */
-+static void connect(struct backend_info *be)
-+{
-+ struct xenbus_transaction xbt;
-+ int err;
-+ struct xenbus_device *dev = be->dev;
-+
-+ DPRINTK("%s", dev->otherend);
-+
-+ /* Supply the information about the device the frontend needs */
-+again:
-+ err = xenbus_transaction_start(&xbt);
-+ if (err) {
-+ xenbus_dev_fatal(dev, err, "starting transaction");
-+ return;
-+ }
-+
-+ err = blkback_barrier(xbt, be, 1);
-+ if (err)
-+ goto abort;
-+
-+ err = xenbus_printf(xbt, dev->nodename, "sectors", "%llu",
-+ vbd_size(&be->blkif->vbd));
-+ if (err) {
-+ xenbus_dev_fatal(dev, err, "writing %s/sectors",
-+ dev->nodename);
-+ goto abort;
-+ }
-+
-+ /* FIXME: use a typename instead */
-+ err = xenbus_printf(xbt, dev->nodename, "info", "%u",
-+ vbd_info(&be->blkif->vbd));
-+ if (err) {
-+ xenbus_dev_fatal(dev, err, "writing %s/info",
-+ dev->nodename);
-+ goto abort;
-+ }
-+ err = xenbus_printf(xbt, dev->nodename, "sector-size", "%lu",
-+ vbd_secsize(&be->blkif->vbd));
-+ if (err) {
-+ xenbus_dev_fatal(dev, err, "writing %s/sector-size",
-+ dev->nodename);
-+ goto abort;
-+ }
-+
-+ err = xenbus_transaction_end(xbt, 0);
-+ if (err == -EAGAIN)
-+ goto again;
-+ if (err)
-+ xenbus_dev_fatal(dev, err, "ending transaction");
-+
-+ err = xenbus_switch_state(dev, XenbusStateConnected);
-+ if (err)
-+ xenbus_dev_fatal(dev, err, "switching to Connected state",
-+ dev->nodename);
-+
-+ return;
-+ abort:
-+ xenbus_transaction_end(xbt, 1);
-+}
-+
-+
-+static int connect_ring(struct backend_info *be)
-+{
-+ struct xenbus_device *dev = be->dev;
-+ unsigned long ring_ref;
-+ unsigned int evtchn;
-+ char protocol[64] = "";
-+ int err;
-+
-+ DPRINTK("%s", dev->otherend);
-+
-+ err = xenbus_gather(XBT_NIL, dev->otherend, "ring-ref", "%lu", &ring_ref,
-+ "event-channel", "%u", &evtchn, NULL);
-+ if (err) {
-+ xenbus_dev_fatal(dev, err,
-+ "reading %s/ring-ref and event-channel",
-+ dev->otherend);
-+ return err;
-+ }
-+
-+ be->blkif->blk_protocol = BLKIF_PROTOCOL_NATIVE;
-+ err = xenbus_gather(XBT_NIL, dev->otherend, "protocol",
-+ "%63s", protocol, NULL);
-+ if (err)
-+ strcpy(protocol, "unspecified, assuming native");
-+ else if (0 == strcmp(protocol, XEN_IO_PROTO_ABI_NATIVE))
-+ be->blkif->blk_protocol = BLKIF_PROTOCOL_NATIVE;
-+ else if (0 == strcmp(protocol, XEN_IO_PROTO_ABI_X86_32))
-+ be->blkif->blk_protocol = BLKIF_PROTOCOL_X86_32;
-+ else if (0 == strcmp(protocol, XEN_IO_PROTO_ABI_X86_64))
-+ be->blkif->blk_protocol = BLKIF_PROTOCOL_X86_64;
-+ else {
-+ xenbus_dev_fatal(dev, err, "unknown fe protocol %s", protocol);
-+ return -1;
-+ }
-+ printk(KERN_INFO
-+ "blkback: ring-ref %ld, event-channel %d, protocol %d (%s)\n",
-+ ring_ref, evtchn, be->blkif->blk_protocol, protocol);
-+
-+ /* Map the shared frame, irq etc. */
-+ err = blkif_map(be->blkif, ring_ref, evtchn);
-+ if (err) {
-+ xenbus_dev_fatal(dev, err, "mapping ring-ref %lu port %u",
-+ ring_ref, evtchn);
-+ return err;
-+ }
-+
-+ return 0;
-+}
-+
-+
-+/* ** Driver Registration ** */
-+
-+
-+static const struct xenbus_device_id blkback_ids[] = {
-+ { "vbd" },
-+ { "" }
-+};
-+
-+
-+static struct xenbus_driver blkback = {
-+ .name = "vbd",
-+ .owner = THIS_MODULE,
-+ .ids = blkback_ids,
-+ .probe = blkback_probe,
-+ .remove = blkback_remove,
-+ .otherend_changed = frontend_changed
-+};
-+
-+
-+int blkif_xenbus_init(void)
-+{
-+ return xenbus_register_backend(&blkback);
-+}
-diff --git a/drivers/xen/blktap/Makefile b/drivers/xen/blktap/Makefile
-new file mode 100644
-index 0000000..822b4e4
---- /dev/null
-+++ b/drivers/xen/blktap/Makefile
-@@ -0,0 +1,3 @@
-+obj-$(CONFIG_XEN_BLKDEV_TAP) := blktap.o
-+
-+blktap-objs := control.o ring.o device.o request.o sysfs.o
-diff --git a/drivers/xen/blktap/blktap.h b/drivers/xen/blktap/blktap.h
-new file mode 100644
-index 0000000..fe63fc9
---- /dev/null
-+++ b/drivers/xen/blktap/blktap.h
-@@ -0,0 +1,209 @@
-+#ifndef _BLKTAP_H_
-+#define _BLKTAP_H_
-+
-+#include <linux/mm.h>
-+#include <linux/fs.h>
-+#include <linux/cdev.h>
-+#include <linux/init.h>
-+#include <linux/scatterlist.h>
-+#include <xen/blkif.h>
-+
-+extern int blktap_debug_level;
-+extern int blktap_ring_major;
-+extern int blktap_device_major;
-+
-+#define BTPRINTK(level, tag, force, _f, _a...) \
-+ do { \
-+ if (blktap_debug_level > level && \
-+ (force || printk_ratelimit())) \
-+ printk(tag "%s: " _f, __func__, ##_a); \
-+ } while (0)
-+
-+#define BTDBG(_f, _a...) BTPRINTK(8, KERN_DEBUG, 1, _f, ##_a)
-+#define BTINFO(_f, _a...) BTPRINTK(0, KERN_INFO, 0, _f, ##_a)
-+#define BTWARN(_f, _a...) BTPRINTK(0, KERN_WARNING, 0, _f, ##_a)
-+#define BTERR(_f, _a...) BTPRINTK(0, KERN_ERR, 0, _f, ##_a)
-+
-+#define MAX_BLKTAP_DEVICE 1024
-+
-+#define BLKTAP_DEVICE 4
-+#define BLKTAP_DEVICE_CLOSED 5
-+#define BLKTAP_SHUTDOWN_REQUESTED 8
-+
-+/* blktap IOCTLs: */
-+#define BLKTAP2_IOCTL_KICK_FE 1
-+#define BLKTAP2_IOCTL_ALLOC_TAP 200
-+#define BLKTAP2_IOCTL_FREE_TAP 201
-+#define BLKTAP2_IOCTL_CREATE_DEVICE 202
-+#define BLKTAP2_IOCTL_REMOVE_DEVICE 207
-+
-+#define BLKTAP2_MAX_MESSAGE_LEN 256
-+
-+#define BLKTAP2_RING_MESSAGE_CLOSE 3
-+
-+#define BLKTAP_REQUEST_FREE 0
-+#define BLKTAP_REQUEST_PENDING 1
-+
-+/*
-+ * The maximum number of requests that can be outstanding at any time
-+ * is determined by
-+ *
-+ * [mmap_alloc * MAX_PENDING_REQS * BLKIF_MAX_SEGMENTS_PER_REQUEST]
-+ *
-+ * where mmap_alloc < MAX_DYNAMIC_MEM.
-+ *
-+ * TODO:
-+ * mmap_alloc is initialised to 2 and should be adjustable on the fly via
-+ * sysfs.
-+ */
-+#define BLK_RING_SIZE __RING_SIZE((struct blkif_sring *)0, PAGE_SIZE)
-+#define MAX_DYNAMIC_MEM BLK_RING_SIZE
-+#define MAX_PENDING_REQS BLK_RING_SIZE
-+#define MMAP_PAGES (MAX_PENDING_REQS * BLKIF_MAX_SEGMENTS_PER_REQUEST)
-+#define MMAP_VADDR(_start, _req, _seg) \
-+ (_start + \
-+ ((_req) * BLKIF_MAX_SEGMENTS_PER_REQUEST * PAGE_SIZE) + \
-+ ((_seg) * PAGE_SIZE))
-+
-+struct grant_handle_pair {
-+ grant_handle_t kernel;
-+ grant_handle_t user;
-+};
-+#define INVALID_GRANT_HANDLE 0xFFFF
-+
-+struct blktap_handle {
-+ unsigned int ring;
-+ unsigned int device;
-+ unsigned int minor;
-+};
-+
-+struct blktap_params {
-+ char name[BLKTAP2_MAX_MESSAGE_LEN];
-+ unsigned long long capacity;
-+ unsigned long sector_size;
-+};
-+
-+struct blktap_device {
-+ spinlock_t lock;
-+ struct gendisk *gd;
-+};
-+
-+struct blktap_ring {
-+ struct task_struct *task;
-+
-+ struct vm_area_struct *vma;
-+ struct blkif_front_ring ring;
-+ unsigned long ring_vstart;
-+ unsigned long user_vstart;
-+
-+ int n_pending;
-+ struct blktap_request *pending[MAX_PENDING_REQS];
-+
-+ wait_queue_head_t poll_wait;
-+
-+ dev_t devno;
-+ struct device *dev;
-+};
-+
-+struct blktap_statistics {
-+ unsigned long st_print;
-+ int st_rd_req;
-+ int st_wr_req;
-+ int st_oo_req;
-+ int st_rd_sect;
-+ int st_wr_sect;
-+ s64 st_rd_cnt;
-+ s64 st_rd_sum_usecs;
-+ s64 st_rd_max_usecs;
-+ s64 st_wr_cnt;
-+ s64 st_wr_sum_usecs;
-+ s64 st_wr_max_usecs;
-+};
-+
-+struct blktap_request {
-+ struct blktap *tap;
-+ struct request *rq;
-+ int usr_idx;
-+
-+ int operation;
-+ struct timeval time;
-+
-+ struct scatterlist sg_table[BLKIF_MAX_SEGMENTS_PER_REQUEST];
-+ struct page *pages[BLKIF_MAX_SEGMENTS_PER_REQUEST];
-+ int nr_pages;
-+};
-+
-+#define blktap_for_each_sg(_sg, _req, _i) \
-+ for (_sg = (_req)->sg_table, _i = 0; \
-+ _i < (_req)->nr_pages; \
-+ (_sg)++, (_i)++)
-+
-+struct blktap {
-+ int minor;
-+ unsigned long dev_inuse;
-+
-+ struct blktap_ring ring;
-+ struct blktap_device device;
-+ struct blktap_page_pool *pool;
-+
-+ wait_queue_head_t remove_wait;
-+ struct work_struct remove_work;
-+ char name[BLKTAP2_MAX_MESSAGE_LEN];
-+
-+ struct blktap_statistics stats;
-+};
-+
-+struct blktap_page_pool {
-+ struct mempool_s *bufs;
-+ spinlock_t lock;
-+ struct kobject kobj;
-+ wait_queue_head_t wait;
-+};
-+
-+extern struct mutex blktap_lock;
-+extern struct blktap **blktaps;
-+extern int blktap_max_minor;
-+
-+int blktap_control_destroy_tap(struct blktap *);
-+size_t blktap_control_debug(struct blktap *, char *, size_t);
-+
-+int blktap_ring_init(void);
-+void blktap_ring_exit(void);
-+size_t blktap_ring_debug(struct blktap *, char *, size_t);
-+int blktap_ring_create(struct blktap *);
-+int blktap_ring_destroy(struct blktap *);
-+struct blktap_request *blktap_ring_make_request(struct blktap *);
-+void blktap_ring_free_request(struct blktap *,struct blktap_request *);
-+void blktap_ring_submit_request(struct blktap *, struct blktap_request *);
-+int blktap_ring_map_request_segment(struct blktap *, struct blktap_request *, int);
-+int blktap_ring_map_request(struct blktap *, struct blktap_request *);
-+void blktap_ring_unmap_request(struct blktap *, struct blktap_request *);
-+void blktap_ring_set_message(struct blktap *, int);
-+void blktap_ring_kick_user(struct blktap *);
-+
-+int blktap_sysfs_init(void);
-+void blktap_sysfs_exit(void);
-+int blktap_sysfs_create(struct blktap *);
-+void blktap_sysfs_destroy(struct blktap *);
-+
-+int blktap_device_init(void);
-+void blktap_device_exit(void);
-+size_t blktap_device_debug(struct blktap *, char *, size_t);
-+int blktap_device_create(struct blktap *, struct blktap_params *);
-+int blktap_device_destroy(struct blktap *);
-+void blktap_device_destroy_sync(struct blktap *);
-+void blktap_device_run_queue(struct blktap *);
-+void blktap_device_end_request(struct blktap *, struct blktap_request *, int);
-+
-+int blktap_page_pool_init(struct kobject *);
-+void blktap_page_pool_exit(void);
-+struct blktap_page_pool *blktap_page_pool_get(const char *);
-+
-+size_t blktap_request_debug(struct blktap *, char *, size_t);
-+struct blktap_request *blktap_request_alloc(struct blktap *);
-+int blktap_request_get_pages(struct blktap *, struct blktap_request *, int);
-+void blktap_request_free(struct blktap *, struct blktap_request *);
-+void blktap_request_bounce(struct blktap *, struct blktap_request *, int, int);
-+
-+
-+#endif
-diff --git a/drivers/xen/blktap/control.c b/drivers/xen/blktap/control.c
-new file mode 100644
-index 0000000..f339bba
---- /dev/null
-+++ b/drivers/xen/blktap/control.c
-@@ -0,0 +1,315 @@
-+#include <linux/module.h>
-+#include <linux/sched.h>
-+#include <linux/miscdevice.h>
-+#include <linux/device.h>
-+#include <asm/uaccess.h>
-+
-+#include "blktap.h"
-+
-+DEFINE_MUTEX(blktap_lock);
-+
-+struct blktap **blktaps;
-+int blktap_max_minor;
-+static struct blktap_page_pool *default_pool;
-+
-+static struct blktap *
-+blktap_control_get_minor(void)
-+{
-+ int minor;
-+ struct blktap *tap;
-+
-+ tap = kzalloc(sizeof(*tap), GFP_KERNEL);
-+ if (unlikely(!tap))
-+ return NULL;
-+
-+ mutex_lock(&blktap_lock);
-+
-+ for (minor = 0; minor < blktap_max_minor; minor++)
-+ if (!blktaps[minor])
-+ break;
-+
-+ if (minor == MAX_BLKTAP_DEVICE)
-+ goto fail;
-+
-+ if (minor == blktap_max_minor) {
-+ void *p;
-+ int n;
-+
-+ n = min(2 * blktap_max_minor, MAX_BLKTAP_DEVICE);
-+ p = krealloc(blktaps, n * sizeof(blktaps[0]), GFP_KERNEL);
-+ if (!p)
-+ goto fail;
-+
-+ blktaps = p;
-+ minor = blktap_max_minor;
-+ blktap_max_minor = n;
-+
-+ memset(&blktaps[minor], 0, (n - minor) * sizeof(blktaps[0]));
-+ }
-+
-+ tap->minor = minor;
-+ blktaps[minor] = tap;
-+
-+ __module_get(THIS_MODULE);
-+out:
-+ mutex_unlock(&blktap_lock);
-+ return tap;
-+
-+fail:
-+ mutex_unlock(&blktap_lock);
-+ kfree(tap);
-+ tap = NULL;
-+ goto out;
-+}
-+
-+static void
-+blktap_control_put_minor(struct blktap* tap)
-+{
-+ blktaps[tap->minor] = NULL;
-+ kfree(tap);
-+
-+ module_put(THIS_MODULE);
-+}
-+
-+static struct blktap*
-+blktap_control_create_tap(void)
-+{
-+ struct blktap *tap;
-+ int err;
-+
-+ tap = blktap_control_get_minor();
-+ if (!tap)
-+ return NULL;
-+
-+ kobject_get(&default_pool->kobj);
-+ tap->pool = default_pool;
-+
-+ err = blktap_ring_create(tap);
-+ if (err)
-+ goto fail_tap;
-+
-+ err = blktap_sysfs_create(tap);
-+ if (err)
-+ goto fail_ring;
-+
-+ return tap;
-+
-+fail_ring:
-+ blktap_ring_destroy(tap);
-+fail_tap:
-+ blktap_control_put_minor(tap);
-+
-+ return NULL;
-+}
-+
-+int
-+blktap_control_destroy_tap(struct blktap *tap)
-+{
-+ int err;
-+
-+ err = blktap_ring_destroy(tap);
-+ if (err)
-+ return err;
-+
-+ kobject_put(&tap->pool->kobj);
-+
-+ blktap_sysfs_destroy(tap);
-+
-+ blktap_control_put_minor(tap);
-+
-+ return 0;
-+}
-+
-+static int
-+blktap_control_ioctl(struct inode *inode, struct file *filp,
-+ unsigned int cmd, unsigned long arg)
-+{
-+ struct blktap *tap;
-+
-+ switch (cmd) {
-+ case BLKTAP2_IOCTL_ALLOC_TAP: {
-+ struct blktap_handle h;
-+ void __user *ptr = (void __user*)arg;
-+
-+ tap = blktap_control_create_tap();
-+ if (!tap)
-+ return -ENOMEM;
-+
-+ h.ring = blktap_ring_major;
-+ h.device = blktap_device_major;
-+ h.minor = tap->minor;
-+
-+ if (copy_to_user(ptr, &h, sizeof(h))) {
-+ blktap_control_destroy_tap(tap);
-+ return -EFAULT;
-+ }
-+
-+ return 0;
-+ }
-+
-+ case BLKTAP2_IOCTL_FREE_TAP: {
-+ int minor = arg;
-+
-+ if (minor > MAX_BLKTAP_DEVICE)
-+ return -EINVAL;
-+
-+ tap = blktaps[minor];
-+ if (!tap)
-+ return -ENODEV;
-+
-+ return blktap_control_destroy_tap(tap);
-+ }
-+ }
-+
-+ return -ENOIOCTLCMD;
-+}
-+
-+static struct file_operations blktap_control_file_operations = {
-+ .owner = THIS_MODULE,
-+ .ioctl = blktap_control_ioctl,
-+};
-+
-+static struct miscdevice blktap_control = {
-+ .minor = MISC_DYNAMIC_MINOR,
-+ .name = "blktap-control",
-+ .fops = &blktap_control_file_operations,
-+};
-+
-+static struct device *control_device;
-+
-+static ssize_t
-+blktap_control_show_default_pool(struct device *device,
-+ struct device_attribute *attr,
-+ char *buf)
-+{
-+ return sprintf(buf, "%s", kobject_name(&default_pool->kobj));
-+}
-+
-+static ssize_t
-+blktap_control_store_default_pool(struct device *device,
-+ struct device_attribute *attr,
-+ const char *buf, size_t size)
-+{
-+ struct blktap_page_pool *pool, *tmp = default_pool;
-+
-+ pool = blktap_page_pool_get(buf);
-+ if (IS_ERR(pool))
-+ return PTR_ERR(pool);
-+
-+ default_pool = pool;
-+ kobject_put(&tmp->kobj);
-+
-+ return size;
-+}
-+
-+static DEVICE_ATTR(default_pool, S_IRUSR|S_IWUSR|S_IRGRP|S_IROTH,
-+ blktap_control_show_default_pool,
-+ blktap_control_store_default_pool);
-+
-+size_t
-+blktap_control_debug(struct blktap *tap, char *buf, size_t size)
-+{
-+ char *s = buf, *end = buf + size;
-+
-+ s += snprintf(s, end - s,
-+ "tap %u:%u name:'%s' flags:%#08lx\n",
-+ MAJOR(tap->ring.devno), MINOR(tap->ring.devno),
-+ tap->name, tap->dev_inuse);
-+
-+ return s - buf;
-+}
-+
-+static int __init
-+blktap_control_init(void)
-+{
-+ int err;
-+
-+ err = misc_register(&blktap_control);
-+ if (err)
-+ return err;
-+
-+ control_device = blktap_control.this_device;
-+
-+ blktap_max_minor = min(64, MAX_BLKTAP_DEVICE);
-+ blktaps = kzalloc(blktap_max_minor * sizeof(blktaps[0]), GFP_KERNEL);
-+ if (!blktaps) {
-+ BTERR("failed to allocate blktap minor map");
-+ return -ENOMEM;
-+ }
-+
-+ err = blktap_page_pool_init(&control_device->kobj);
-+ if (err)
-+ return err;
-+
-+ default_pool = blktap_page_pool_get("default");
-+ if (!default_pool)
-+ return -ENOMEM;
-+
-+ err = device_create_file(control_device, &dev_attr_default_pool);
-+ if (err)
-+ return err;
-+
-+ return 0;
-+}
-+
-+static void
-+blktap_control_exit(void)
-+{
-+ if (default_pool) {
-+ kobject_put(&default_pool->kobj);
-+ default_pool = NULL;
-+ }
-+
-+ blktap_page_pool_exit();
-+
-+ if (blktaps) {
-+ kfree(blktaps);
-+ blktaps = NULL;
-+ }
-+
-+ if (control_device) {
-+ misc_deregister(&blktap_control);
-+ control_device = NULL;
-+ }
-+}
-+
-+static void
-+blktap_exit(void)
-+{
-+ blktap_control_exit();
-+ blktap_ring_exit();
-+ blktap_sysfs_exit();
-+ blktap_device_exit();
-+}
-+
-+static int __init
-+blktap_init(void)
-+{
-+ int err;
-+
-+ err = blktap_device_init();
-+ if (err)
-+ goto fail;
-+
-+ err = blktap_ring_init();
-+ if (err)
-+ goto fail;
-+
-+ err = blktap_sysfs_init();
-+ if (err)
-+ goto fail;
-+
-+ err = blktap_control_init();
-+ if (err)
-+ goto fail;
-+
-+ return 0;
-+
-+fail:
-+ blktap_exit();
-+ return err;
-+}
-+
-+module_init(blktap_init);
-+module_exit(blktap_exit);
-+MODULE_LICENSE("Dual BSD/GPL");
-diff --git a/drivers/xen/blktap/device.c b/drivers/xen/blktap/device.c
-new file mode 100644
-index 0000000..fce2769
---- /dev/null
-+++ b/drivers/xen/blktap/device.c
-@@ -0,0 +1,564 @@
-+#include <linux/fs.h>
-+#include <linux/blkdev.h>
-+#include <linux/cdrom.h>
-+#include <linux/hdreg.h>
-+#include <scsi/scsi.h>
-+#include <scsi/scsi_ioctl.h>
-+
-+#include "blktap.h"
-+
-+int blktap_device_major;
-+
-+#define dev_to_blktap(_dev) container_of(_dev, struct blktap, device)
-+
-+static int
-+blktap_device_open(struct block_device *bdev, fmode_t mode)
-+{
-+ struct gendisk *disk = bdev->bd_disk;
-+ struct blktap_device *tapdev = disk->private_data;
-+
-+ if (!tapdev)
-+ return -ENXIO;
-+
-+ /* NB. we might have bounced a bd trylock by tapdisk. when
-+ * failing for reasons not !tapdev, make sure to kick tapdisk
-+ * out of destroy wait state again. */
-+
-+ return 0;
-+}
-+
-+static int
-+blktap_device_release(struct gendisk *disk, fmode_t mode)
-+{
-+ struct blktap_device *tapdev = disk->private_data;
-+ struct block_device *bdev = bdget_disk(disk, 0);
-+ struct blktap *tap = dev_to_blktap(tapdev);
-+
-+ bdput(bdev);
-+
-+ if (!bdev->bd_openers) {
-+ set_bit(BLKTAP_DEVICE_CLOSED, &tap->dev_inuse);
-+ blktap_ring_kick_user(tap);
-+ }
-+
-+ return 0;
-+}
-+
-+static int
-+blktap_device_getgeo(struct block_device *bd, struct hd_geometry *hg)
-+{
-+ /* We don't have real geometry info, but let's at least return
-+ values consistent with the size of the device */
-+ sector_t nsect = get_capacity(bd->bd_disk);
-+ sector_t cylinders = nsect;
-+
-+ hg->heads = 0xff;
-+ hg->sectors = 0x3f;
-+ sector_div(cylinders, hg->heads * hg->sectors);
-+ hg->cylinders = cylinders;
-+ if ((sector_t)(hg->cylinders + 1) * hg->heads * hg->sectors < nsect)
-+ hg->cylinders = 0xffff;
-+ return 0;
-+}
-+
-+static int
-+blktap_device_ioctl(struct block_device *bd, fmode_t mode,
-+ unsigned command, unsigned long argument)
-+{
-+ int i;
-+
-+ switch (command) {
-+ case CDROMMULTISESSION:
-+ BTDBG("FIXME: support multisession CDs later\n");
-+ for (i = 0; i < sizeof(struct cdrom_multisession); i++)
-+ if (put_user(0, (char __user *)(argument + i)))
-+ return -EFAULT;
-+ return 0;
-+
-+ case SCSI_IOCTL_GET_IDLUN:
-+ if (!access_ok(VERIFY_WRITE, argument,
-+ sizeof(struct scsi_idlun)))
-+ return -EFAULT;
-+
-+ /* return 0 for now. */
-+ __put_user(0, &((struct scsi_idlun __user *)argument)->dev_id);
-+ __put_user(0,
-+ &((struct scsi_idlun __user *)argument)->host_unique_id);
-+ return 0;
-+
-+ default:
-+ /*printk(KERN_ALERT "ioctl %08x not supported by Xen blkdev\n",
-+ command);*/
-+ return -EINVAL; /* same return as native Linux */
-+ }
-+
-+ return 0;
-+}
-+
-+static struct block_device_operations blktap_device_file_operations = {
-+ .owner = THIS_MODULE,
-+ .open = blktap_device_open,
-+ .release = blktap_device_release,
-+ .ioctl = blktap_device_ioctl,
-+ .getgeo = blktap_device_getgeo
-+};
-+
-+/* NB. __blktap holding the queue lock; blktap where unlocked */
-+
-+static inline struct request*
-+__blktap_next_queued_rq(struct request_queue *q)
-+{
-+ return blk_peek_request(q);
-+}
-+
-+static inline void
-+__blktap_dequeue_rq(struct request *rq)
-+{
-+ blk_start_request(rq);
-+}
-+
-+/* NB. err == 0 indicates success, failures < 0 */
-+
-+static inline void
-+__blktap_end_queued_rq(struct request *rq, int err)
-+{
-+ blk_start_request(rq);
-+ __blk_end_request(rq, err, blk_rq_bytes(rq));
-+}
-+
-+static inline void
-+__blktap_end_rq(struct request *rq, int err)
-+{
-+ __blk_end_request(rq, err, blk_rq_bytes(rq));
-+}
-+
-+static inline void
-+blktap_end_rq(struct request *rq, int err)
-+{
-+ spin_lock_irq(rq->q->queue_lock);
-+ __blktap_end_rq(rq, err);
-+ spin_unlock_irq(rq->q->queue_lock);
-+}
-+
-+void
-+blktap_device_end_request(struct blktap *tap,
-+ struct blktap_request *request,
-+ int error)
-+{
-+ struct blktap_device *tapdev = &tap->device;
-+ struct request *rq = request->rq;
-+
-+ blktap_ring_unmap_request(tap, request);
-+
-+ blktap_ring_free_request(tap, request);
-+
-+ dev_dbg(disk_to_dev(tapdev->gd),
-+ "end_request: op=%d error=%d bytes=%d\n",
-+ rq_data_dir(rq), error, blk_rq_bytes(rq));
-+
-+ blktap_end_rq(rq, error);
-+}
-+
-+int
-+blktap_device_make_request(struct blktap *tap, struct request *rq)
-+{
-+ struct blktap_device *tapdev = &tap->device;
-+ struct blktap_request *request;
-+ int write, nsegs;
-+ int err;
-+
-+ request = blktap_ring_make_request(tap);
-+ if (IS_ERR(request)) {
-+ err = PTR_ERR(request);
-+ request = NULL;
-+
-+ if (err == -ENOSPC || err == -ENOMEM)
-+ goto stop;
-+
-+ goto fail;
-+ }
-+
-+ write = rq_data_dir(rq) == WRITE;
-+ nsegs = blk_rq_map_sg(rq->q, rq, request->sg_table);
-+
-+ dev_dbg(disk_to_dev(tapdev->gd),
-+ "make_request: op=%c bytes=%d nsegs=%d\n",
-+ write ? 'w' : 'r', blk_rq_bytes(rq), nsegs);
-+
-+ request->rq = rq;
-+ request->operation = write ? BLKIF_OP_WRITE : BLKIF_OP_READ;
-+
-+ err = blktap_request_get_pages(tap, request, nsegs);
-+ if (err)
-+ goto stop;
-+
-+ err = blktap_ring_map_request(tap, request);
-+ if (err)
-+ goto fail;
-+
-+ blktap_ring_submit_request(tap, request);
-+
-+ return 0;
-+
-+stop:
-+ tap->stats.st_oo_req++;
-+ err = -EBUSY;
-+
-+_out:
-+ if (request)
-+ blktap_ring_free_request(tap, request);
-+
-+ return err;
-+fail:
-+ if (printk_ratelimit())
-+ dev_warn(disk_to_dev(tapdev->gd),
-+ "make request: %d, failing\n", err);
-+ goto _out;
-+}
-+
-+/*
-+ * called from tapdisk context
-+ */
-+void
-+blktap_device_run_queue(struct blktap *tap)
-+{
-+ struct blktap_device *tapdev = &tap->device;
-+ struct request_queue *q;
-+ struct request *rq;
-+ int err;
-+
-+ if (!tapdev->gd)
-+ return;
-+
-+ q = tapdev->gd->queue;
-+
-+ spin_lock_irq(&tapdev->lock);
-+ queue_flag_clear(QUEUE_FLAG_STOPPED, q);
-+
-+ do {
-+ rq = __blktap_next_queued_rq(q);
-+ if (!rq)
-+ break;
-+
-+ if (!blk_fs_request(rq)) {
-+ __blktap_end_queued_rq(rq, -EOPNOTSUPP);
-+ continue;
-+ }
-+
-+ spin_unlock_irq(&tapdev->lock);
-+
-+ err = blktap_device_make_request(tap, rq);
-+
-+ spin_lock_irq(&tapdev->lock);
-+
-+ if (err == -EBUSY) {
-+ blk_stop_queue(q);
-+ break;
-+ }
-+
-+ __blktap_dequeue_rq(rq);
-+
-+ if (unlikely(err))
-+ __blktap_end_rq(rq, err);
-+ } while (1);
-+
-+ spin_unlock_irq(&tapdev->lock);
-+}
-+
-+static void
-+blktap_device_do_request(struct request_queue *rq)
-+{
-+ struct blktap_device *tapdev = rq->queuedata;
-+ struct blktap *tap = dev_to_blktap(tapdev);
-+
-+ blktap_ring_kick_user(tap);
-+}
-+
-+static void
-+blktap_device_configure(struct blktap *tap,
-+ struct blktap_params *params)
-+{
-+ struct request_queue *rq;
-+ struct blktap_device *dev = &tap->device;
-+
-+ dev = &tap->device;
-+ rq = dev->gd->queue;
-+
-+ spin_lock_irq(&dev->lock);
-+
-+ set_capacity(dev->gd, params->capacity);
-+
-+ /* Hard sector size and max sectors impersonate the equiv. hardware. */
-+ blk_queue_logical_block_size(rq, params->sector_size);
-+ blk_queue_max_sectors(rq, 512);
-+
-+ /* Each segment in a request is up to an aligned page in size. */
-+ blk_queue_segment_boundary(rq, PAGE_SIZE - 1);
-+ blk_queue_max_segment_size(rq, PAGE_SIZE);
-+
-+ /* Ensure a merged request will fit in a single I/O ring slot. */
-+ blk_queue_max_phys_segments(rq, BLKIF_MAX_SEGMENTS_PER_REQUEST);
-+ blk_queue_max_hw_segments(rq, BLKIF_MAX_SEGMENTS_PER_REQUEST);
-+
-+ /* Make sure buffer addresses are sector-aligned. */
-+ blk_queue_dma_alignment(rq, 511);
-+
-+ /* We are reordering, but cacheless. */
-+ blk_queue_ordered(rq, QUEUE_ORDERED_DRAIN, NULL);
-+
-+ spin_unlock_irq(&dev->lock);
-+}
-+
-+static int
-+blktap_device_validate_params(struct blktap *tap,
-+ struct blktap_params *params)
-+{
-+ struct device *dev = tap->ring.dev;
-+ int sector_order, name_sz;
-+
-+ sector_order = ffs(params->sector_size) - 1;
-+
-+ if (sector_order < 9 ||
-+ sector_order > 12 ||
-+ params->sector_size != 1U<<sector_order)
-+ goto fail;
-+
-+ if (!params->capacity ||
-+ (params->capacity > ULLONG_MAX >> sector_order))
-+ goto fail;
-+
-+ name_sz = min(sizeof(params->name), sizeof(tap->name));
-+ if (strnlen(params->name, name_sz) >= name_sz)
-+ goto fail;
-+
-+ return 0;
-+
-+fail:
-+ params->name[name_sz-1] = 0;
-+ dev_err(dev, "capacity: %llu, sector-size: %lu, name: %s\n",
-+ params->capacity, params->sector_size, params->name);
-+ return -EINVAL;
-+}
-+
-+int
-+blktap_device_destroy(struct blktap *tap)
-+{
-+ struct blktap_device *tapdev = &tap->device;
-+ struct block_device *bdev;
-+ struct gendisk *gd;
-+ int err;
-+
-+ gd = tapdev->gd;
-+ if (!gd)
-+ return 0;
-+
-+ bdev = bdget_disk(gd, 0);
-+
-+ err = !mutex_trylock(&bdev->bd_mutex);
-+ if (err) {
-+ /* NB. avoid a deadlock. the last opener syncs the
-+ * bdev holding bd_mutex. */
-+ err = -EBUSY;
-+ goto out_nolock;
-+ }
-+
-+ if (bdev->bd_openers) {
-+ err = -EBUSY;
-+ goto out;
-+ }
-+
-+ del_gendisk(gd);
-+ gd->private_data = NULL;
-+
-+ blk_cleanup_queue(gd->queue);
-+
-+ put_disk(gd);
-+ tapdev->gd = NULL;
-+
-+ clear_bit(BLKTAP_DEVICE, &tap->dev_inuse);
-+ err = 0;
-+out:
-+ mutex_unlock(&bdev->bd_mutex);
-+out_nolock:
-+ bdput(bdev);
-+
-+ return err;
-+}
-+
-+static void
-+blktap_device_fail_queue(struct blktap *tap)
-+{
-+ struct blktap_device *tapdev = &tap->device;
-+ struct request_queue *q = tapdev->gd->queue;
-+
-+ spin_lock_irq(&tapdev->lock);
-+ queue_flag_clear(QUEUE_FLAG_STOPPED, q);
-+
-+ do {
-+ struct request *rq = __blktap_next_queued_rq(q);
-+ if (!rq)
-+ break;
-+
-+ __blktap_end_queued_rq(rq, -EIO);
-+ } while (1);
-+
-+ spin_unlock_irq(&tapdev->lock);
-+}
-+
-+static int
-+blktap_device_try_destroy(struct blktap *tap)
-+{
-+ int err;
-+
-+ err = blktap_device_destroy(tap);
-+ if (err)
-+ blktap_device_fail_queue(tap);
-+
-+ return err;
-+}
-+
-+void
-+blktap_device_destroy_sync(struct blktap *tap)
-+{
-+ wait_event(tap->ring.poll_wait,
-+ !blktap_device_try_destroy(tap));
-+}
-+
-+int
-+blktap_device_create(struct blktap *tap, struct blktap_params *params)
-+{
-+ int minor, err;
-+ struct gendisk *gd;
-+ struct request_queue *rq;
-+ struct blktap_device *tapdev;
-+
-+ gd = NULL;
-+ rq = NULL;
-+ tapdev = &tap->device;
-+ minor = tap->minor;
-+
-+ if (test_bit(BLKTAP_DEVICE, &tap->dev_inuse))
-+ return -EEXIST;
-+
-+ if (blktap_device_validate_params(tap, params))
-+ return -EINVAL;
-+
-+ gd = alloc_disk(1);
-+ if (!gd) {
-+ err = -ENOMEM;
-+ goto fail;
-+ }
-+
-+ if (minor < 26) {
-+ sprintf(gd->disk_name, "td%c", 'a' + minor % 26);
-+ } else if (minor < (26 + 1) * 26) {
-+ sprintf(gd->disk_name, "td%c%c",
-+ 'a' + minor / 26 - 1,'a' + minor % 26);
-+ } else {
-+ const unsigned int m1 = (minor / 26 - 1) / 26 - 1;
-+ const unsigned int m2 = (minor / 26 - 1) % 26;
-+ const unsigned int m3 = minor % 26;
-+ sprintf(gd->disk_name, "td%c%c%c",
-+ 'a' + m1, 'a' + m2, 'a' + m3);
-+ }
-+
-+ gd->major = blktap_device_major;
-+ gd->first_minor = minor;
-+ gd->fops = &blktap_device_file_operations;
-+ gd->private_data = tapdev;
-+
-+ spin_lock_init(&tapdev->lock);
-+ rq = blk_init_queue(blktap_device_do_request, &tapdev->lock);
-+ if (!rq) {
-+ err = -ENOMEM;
-+ goto fail;
-+ }
-+ elevator_init(rq, "noop");
-+
-+ gd->queue = rq;
-+ rq->queuedata = tapdev;
-+ tapdev->gd = gd;
-+
-+ blktap_device_configure(tap, params);
-+ add_disk(gd);
-+
-+ if (params->name[0])
-+ strncpy(tap->name, params->name, sizeof(tap->name)-1);
-+
-+ set_bit(BLKTAP_DEVICE, &tap->dev_inuse);
-+
-+ dev_info(disk_to_dev(gd), "sector-size: %u capacity: %llu\n",
-+ queue_logical_block_size(rq),
-+ (unsigned long long)get_capacity(gd));
-+
-+ return 0;
-+
-+fail:
-+ if (gd)
-+ del_gendisk(gd);
-+ if (rq)
-+ blk_cleanup_queue(rq);
-+
-+ return err;
-+}
-+
-+size_t
-+blktap_device_debug(struct blktap *tap, char *buf, size_t size)
-+{
-+ struct gendisk *disk = tap->device.gd;
-+ struct request_queue *q;
-+ struct block_device *bdev;
-+ char *s = buf, *end = buf + size;
-+
-+ if (!disk)
-+ return 0;
-+
-+ q = disk->queue;
-+
-+ s += snprintf(s, end - s,
-+ "disk capacity:%llu sector size:%u\n",
-+ (unsigned long long)get_capacity(disk),
-+ queue_logical_block_size(q));
-+
-+ s += snprintf(s, end - s,
-+ "queue flags:%#lx plugged:%d stopped:%d empty:%d\n",
-+ q->queue_flags,
-+ blk_queue_plugged(q), blk_queue_stopped(q),
-+ elv_queue_empty(q));
-+
-+ bdev = bdget_disk(disk, 0);
-+ if (bdev) {
-+ s += snprintf(s, end - s,
-+ "bdev openers:%d closed:%d\n",
-+ bdev->bd_openers,
-+ test_bit(BLKTAP_DEVICE_CLOSED, &tap->dev_inuse));
-+ bdput(bdev);
-+ }
-+
-+ return s - buf;
-+}
-+
-+int __init
-+blktap_device_init()
-+{
-+ int major;
-+
-+ /* Dynamically allocate a major for this device */
-+ major = register_blkdev(0, "tapdev");
-+ if (major < 0) {
-+ BTERR("Couldn't register blktap device\n");
-+ return -ENOMEM;
-+ }
-+
-+ blktap_device_major = major;
-+ BTINFO("blktap device major %d\n", major);
-+
-+ return 0;
-+}
-+
-+void
-+blktap_device_exit(void)
-+{
-+ if (blktap_device_major)
-+ unregister_blkdev(blktap_device_major, "tapdev");
-+}
-diff --git a/drivers/xen/blktap/request.c b/drivers/xen/blktap/request.c
-new file mode 100644
-index 0000000..9bef48c
---- /dev/null
-+++ b/drivers/xen/blktap/request.c
-@@ -0,0 +1,418 @@
-+#include <linux/mempool.h>
-+#include <linux/spinlock.h>
-+#include <linux/mutex.h>
-+#include <linux/sched.h>
-+#include <linux/device.h>
-+
-+#include "blktap.h"
-+
-+/* max pages per shared pool. just to prevent accidental dos. */
-+#define POOL_MAX_PAGES (256*BLKIF_MAX_SEGMENTS_PER_REQUEST)
-+
-+/* default page pool size. when considering to shrink a shared pool,
-+ * note that paused tapdisks may grab a whole lot of pages for a long
-+ * time. */
-+#define POOL_DEFAULT_PAGES (2 * MMAP_PAGES)
-+
-+/* max number of pages allocatable per request. */
-+#define POOL_MAX_REQUEST_PAGES BLKIF_MAX_SEGMENTS_PER_REQUEST
-+
-+/* min request structs per pool. These grow dynamically. */
-+#define POOL_MIN_REQS BLK_RING_SIZE
-+
-+static struct kset *pool_set;
-+
-+#define kobj_to_pool(_kobj) \
-+ container_of(_kobj, struct blktap_page_pool, kobj)
-+
-+static struct kmem_cache *request_cache;
-+static mempool_t *request_pool;
-+
-+static void
-+__page_pool_wake(struct blktap_page_pool *pool)
-+{
-+ mempool_t *mem = pool->bufs;
-+
-+ /*
-+ NB. slightly wasteful to always wait for a full segment
-+ set. but this ensures the next disk makes
-+ progress. presently, the repeated request struct
-+ alloc/release cycles would otherwise keep everyone spinning.
-+ */
-+
-+ if (mem->curr_nr >= POOL_MAX_REQUEST_PAGES)
-+ wake_up(&pool->wait);
-+}
-+
-+int
-+blktap_request_get_pages(struct blktap *tap,
-+ struct blktap_request *request, int nr_pages)
-+{
-+ struct blktap_page_pool *pool = tap->pool;
-+ mempool_t *mem = pool->bufs;
-+ struct page *page;
-+
-+ BUG_ON(request->nr_pages != 0);
-+ BUG_ON(nr_pages > POOL_MAX_REQUEST_PAGES);
-+
-+ if (mem->curr_nr < nr_pages)
-+ return -ENOMEM;
-+
-+ /* NB. avoid thundering herds of tapdisks colliding. */
-+ spin_lock(&pool->lock);
-+
-+ if (mem->curr_nr < nr_pages) {
-+ spin_unlock(&pool->lock);
-+ return -ENOMEM;
-+ }
-+
-+ while (request->nr_pages < nr_pages) {
-+ page = mempool_alloc(mem, GFP_NOWAIT);
-+ BUG_ON(!page);
-+ request->pages[request->nr_pages++] = page;
-+ }
-+
-+ spin_unlock(&pool->lock);
-+
-+ return 0;
-+}
-+
-+static void
-+blktap_request_put_pages(struct blktap *tap,
-+ struct blktap_request *request)
-+{
-+ struct blktap_page_pool *pool = tap->pool;
-+ struct page *page;
-+
-+ while (request->nr_pages) {
-+ page = request->pages[--request->nr_pages];
-+ mempool_free(page, pool->bufs);
-+ }
-+}
-+
-+size_t
-+blktap_request_debug(struct blktap *tap, char *buf, size_t size)
-+{
-+ struct blktap_page_pool *pool = tap->pool;
-+ mempool_t *mem = pool->bufs;
-+ char *s = buf, *end = buf + size;
-+
-+ s += snprintf(buf, end - s,
-+ "pool:%s pages:%d free:%d\n",
-+ kobject_name(&pool->kobj),
-+ mem->min_nr, mem->curr_nr);
-+
-+ return s - buf;
-+}
-+
-+struct blktap_request*
-+blktap_request_alloc(struct blktap *tap)
-+{
-+ struct blktap_request *request;
-+
-+ request = mempool_alloc(request_pool, GFP_NOWAIT);
-+ if (request)
-+ request->tap = tap;
-+
-+ return request;
-+}
-+
-+void
-+blktap_request_free(struct blktap *tap,
-+ struct blktap_request *request)
-+{
-+ blktap_request_put_pages(tap, request);
-+
-+ mempool_free(request, request_pool);
-+
-+ __page_pool_wake(tap->pool);
-+}
-+
-+void
-+blktap_request_bounce(struct blktap *tap,
-+ struct blktap_request *request,
-+ int seg, int write)
-+{
-+ struct scatterlist *sg = &request->sg_table[seg];
-+ void *s, *p;
-+
-+ BUG_ON(seg >= request->nr_pages);
-+
-+ s = sg_virt(sg);
-+ p = page_address(request->pages[seg]) + sg->offset;
-+
-+ if (write)
-+ memcpy(p, s, sg->length);
-+ else
-+ memcpy(s, p, sg->length);
-+}
-+
-+static void
-+blktap_request_ctor(void *obj)
-+{
-+ struct blktap_request *request = obj;
-+
-+ memset(request, 0, sizeof(*request));
-+ sg_init_table(request->sg_table, ARRAY_SIZE(request->sg_table));
-+}
-+
-+static int
-+blktap_page_pool_resize(struct blktap_page_pool *pool, int target)
-+{
-+ mempool_t *bufs = pool->bufs;
-+ int err;
-+
-+ /* NB. mempool asserts min_nr >= 1 */
-+ target = max(1, target);
-+
-+ err = mempool_resize(bufs, target, GFP_KERNEL);
-+ if (err)
-+ return err;
-+
-+ __page_pool_wake(pool);
-+
-+ return 0;
-+}
-+
-+struct pool_attribute {
-+ struct attribute attr;
-+
-+ ssize_t (*show)(struct blktap_page_pool *pool,
-+ char *buf);
-+
-+ ssize_t (*store)(struct blktap_page_pool *pool,
-+ const char *buf, size_t count);
-+};
-+
-+#define kattr_to_pool_attr(_kattr) \
-+ container_of(_kattr, struct pool_attribute, attr)
-+
-+static ssize_t
-+blktap_page_pool_show_size(struct blktap_page_pool *pool,
-+ char *buf)
-+{
-+ mempool_t *mem = pool->bufs;
-+ return sprintf(buf, "%d", mem->min_nr);
-+}
-+
-+static ssize_t
-+blktap_page_pool_store_size(struct blktap_page_pool *pool,
-+ const char *buf, size_t size)
-+{
-+ int target;
-+
-+ /*
-+ * NB. target fixup to avoid undesired results. less than a
-+ * full segment set can wedge the disk. much more than a
-+ * couple times the physical queue depth is rarely useful.
-+ */
-+
-+ target = simple_strtoul(buf, NULL, 0);
-+ target = max(POOL_MAX_REQUEST_PAGES, target);
-+ target = min(target, POOL_MAX_PAGES);
-+
-+ return blktap_page_pool_resize(pool, target) ? : size;
-+}
-+
-+static struct pool_attribute blktap_page_pool_attr_size =
-+ __ATTR(size, S_IRUSR|S_IWUSR|S_IRGRP|S_IROTH,
-+ blktap_page_pool_show_size,
-+ blktap_page_pool_store_size);
-+
-+static ssize_t
-+blktap_page_pool_show_free(struct blktap_page_pool *pool,
-+ char *buf)
-+{
-+ mempool_t *mem = pool->bufs;
-+ return sprintf(buf, "%d", mem->curr_nr);
-+}
-+
-+static struct pool_attribute blktap_page_pool_attr_free =
-+ __ATTR(free, S_IRUSR|S_IRGRP|S_IROTH,
-+ blktap_page_pool_show_free,
-+ NULL);
-+
-+static struct attribute *blktap_page_pool_attrs[] = {
-+ &blktap_page_pool_attr_size.attr,
-+ &blktap_page_pool_attr_free.attr,
-+ NULL,
-+};
-+
-+static inline struct kobject*
-+__blktap_kset_find_obj(struct kset *kset, const char *name)
-+{
-+ struct kobject *k;
-+ struct kobject *ret = NULL;
-+
-+ spin_lock(&kset->list_lock);
-+ list_for_each_entry(k, &kset->list, entry) {
-+ if (kobject_name(k) && !strcmp(kobject_name(k), name)) {
-+ ret = kobject_get(k);
-+ break;
-+ }
-+ }
-+ spin_unlock(&kset->list_lock);
-+ return ret;
-+}
-+
-+static ssize_t
-+blktap_page_pool_show_attr(struct kobject *kobj, struct attribute *kattr,
-+ char *buf)
-+{
-+ struct blktap_page_pool *pool = kobj_to_pool(kobj);
-+ struct pool_attribute *attr = kattr_to_pool_attr(kattr);
-+
-+ if (attr->show)
-+ return attr->show(pool, buf);
-+
-+ return -EIO;
-+}
-+
-+static ssize_t
-+blktap_page_pool_store_attr(struct kobject *kobj, struct attribute *kattr,
-+ const char *buf, size_t size)
-+{
-+ struct blktap_page_pool *pool = kobj_to_pool(kobj);
-+ struct pool_attribute *attr = kattr_to_pool_attr(kattr);
-+
-+ if (attr->show)
-+ return attr->store(pool, buf, size);
-+
-+ return -EIO;
-+}
-+
-+static struct sysfs_ops blktap_page_pool_sysfs_ops = {
-+ .show = blktap_page_pool_show_attr,
-+ .store = blktap_page_pool_store_attr,
-+};
-+
-+static void
-+blktap_page_pool_release(struct kobject *kobj)
-+{
-+ struct blktap_page_pool *pool = kobj_to_pool(kobj);
-+ mempool_destroy(pool->bufs);
-+ kfree(pool);
-+}
-+
-+struct kobj_type blktap_page_pool_ktype = {
-+ .release = blktap_page_pool_release,
-+ .sysfs_ops = &blktap_page_pool_sysfs_ops,
-+ .default_attrs = blktap_page_pool_attrs,
-+};
-+
-+static void*
-+__mempool_page_alloc(gfp_t gfp_mask, void *pool_data)
-+{
-+ struct page *page;
-+
-+ if (!(gfp_mask & __GFP_WAIT))
-+ return NULL;
-+
-+ page = alloc_page(gfp_mask);
-+ if (page)
-+ SetPageReserved(page);
-+
-+ return page;
-+}
-+
-+static void
-+__mempool_page_free(void *element, void *pool_data)
-+{
-+ struct page *page = element;
-+
-+ ClearPageReserved(page);
-+ put_page(page);
-+}
-+
-+static struct kobject*
-+blktap_page_pool_create(const char *name, int nr_pages)
-+{
-+ struct blktap_page_pool *pool;
-+ int err;
-+
-+ pool = kzalloc(sizeof(*pool), GFP_KERNEL);
-+ if (!pool)
-+ goto fail;
-+
-+ spin_lock_init(&pool->lock);
-+ init_waitqueue_head(&pool->wait);
-+
-+ pool->bufs = mempool_create(nr_pages,
-+ __mempool_page_alloc, __mempool_page_free,
-+ pool);
-+ if (!pool->bufs)
-+ goto fail_pool;
-+
-+ kobject_init(&pool->kobj, &blktap_page_pool_ktype);
-+ pool->kobj.kset = pool_set;
-+ err = kobject_add(&pool->kobj, &pool_set->kobj, "%s", name);
-+ if (err)
-+ goto fail_bufs;
-+
-+ return &pool->kobj;
-+
-+ kobject_del(&pool->kobj);
-+fail_bufs:
-+ mempool_destroy(pool->bufs);
-+fail_pool:
-+ kfree(pool);
-+fail:
-+ return NULL;
-+}
-+
-+struct blktap_page_pool*
-+blktap_page_pool_get(const char *name)
-+{
-+ struct kobject *kobj;
-+
-+ kobj = __blktap_kset_find_obj(pool_set, name);
-+ if (!kobj)
-+ kobj = blktap_page_pool_create(name,
-+ POOL_DEFAULT_PAGES);
-+ if (!kobj)
-+ return ERR_PTR(-ENOMEM);
-+
-+ return kobj_to_pool(kobj);
-+}
-+
-+int __init
-+blktap_page_pool_init(struct kobject *parent)
-+{
-+ request_cache =
-+ kmem_cache_create("blktap-request",
-+ sizeof(struct blktap_request), 0,
-+ 0, blktap_request_ctor);
-+ if (!request_cache)
-+ return -ENOMEM;
-+
-+ request_pool =
-+ mempool_create_slab_pool(POOL_MIN_REQS, request_cache);
-+ if (!request_pool)
-+ return -ENOMEM;
-+
-+ pool_set = kset_create_and_add("pools", NULL, parent);
-+ if (!pool_set)
-+ return -ENOMEM;
-+
-+ return 0;
-+}
-+
-+void
-+blktap_page_pool_exit(void)
-+{
-+ if (pool_set) {
-+ BUG_ON(!list_empty(&pool_set->list));
-+ kset_unregister(pool_set);
-+ pool_set = NULL;
-+ }
-+
-+ if (request_pool) {
-+ mempool_destroy(request_pool);
-+ request_pool = NULL;
-+ }
-+
-+ if (request_cache) {
-+ kmem_cache_destroy(request_cache);
-+ request_cache = NULL;
-+ }
-+}
-diff --git a/drivers/xen/blktap/ring.c b/drivers/xen/blktap/ring.c
-new file mode 100644
-index 0000000..6b86be5
---- /dev/null
-+++ b/drivers/xen/blktap/ring.c
-@@ -0,0 +1,550 @@
-+
-+#include <linux/device.h>
-+#include <linux/signal.h>
-+#include <linux/sched.h>
-+#include <linux/poll.h>
-+#include <linux/blkdev.h>
-+
-+#include "blktap.h"
-+
-+int blktap_ring_major;
-+static struct cdev blktap_ring_cdev;
-+
-+ /*
-+ * BLKTAP - immediately before the mmap area,
-+ * we have a bunch of pages reserved for shared memory rings.
-+ */
-+#define RING_PAGES 1
-+
-+static void
-+blktap_ring_read_response(struct blktap *tap,
-+ const struct blkif_response *rsp)
-+{
-+ struct blktap_ring *ring = &tap->ring;
-+ struct blktap_request *request;
-+ int usr_idx, err;
-+
-+ request = NULL;
-+
-+ usr_idx = rsp->id;
-+ if (usr_idx < 0 || usr_idx >= MAX_PENDING_REQS) {
-+ err = -ERANGE;
-+ goto invalid;
-+ }
-+
-+ request = ring->pending[usr_idx];
-+
-+ if (!request) {
-+ err = -ESRCH;
-+ goto invalid;
-+ }
-+
-+ if (rsp->operation != request->operation) {
-+ err = -EINVAL;
-+ goto invalid;
-+ }
-+
-+ dev_dbg(ring->dev,
-+ "request %d [%p] response: %d\n",
-+ request->usr_idx, request, rsp->status);
-+
-+ err = rsp->status == BLKIF_RSP_OKAY ? 0 : -EIO;
-+end_request:
-+ blktap_device_end_request(tap, request, err);
-+ return;
-+
-+invalid:
-+ dev_warn(ring->dev,
-+ "invalid response, idx:%d status:%d op:%d/%d: err %d\n",
-+ usr_idx, rsp->status,
-+ rsp->operation, request->operation,
-+ err);
-+ if (request)
-+ goto end_request;
-+}
-+
-+static void
-+blktap_read_ring(struct blktap *tap)
-+{
-+ struct blktap_ring *ring = &tap->ring;
-+ struct blkif_response rsp;
-+ RING_IDX rc, rp;
-+
-+ down_read(&current->mm->mmap_sem);
-+ if (!ring->vma) {
-+ up_read(&current->mm->mmap_sem);
-+ return;
-+ }
-+
-+ /* for each outstanding message on the ring */
-+ rp = ring->ring.sring->rsp_prod;
-+ rmb();
-+
-+ for (rc = ring->ring.rsp_cons; rc != rp; rc++) {
-+ memcpy(&rsp, RING_GET_RESPONSE(&ring->ring, rc), sizeof(rsp));
-+ blktap_ring_read_response(tap, &rsp);
-+ }
-+
-+ ring->ring.rsp_cons = rc;
-+
-+ up_read(&current->mm->mmap_sem);
-+}
-+
-+static int blktap_ring_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
-+{
-+ return VM_FAULT_SIGBUS;
-+}
-+
-+static void
-+blktap_ring_fail_pending(struct blktap *tap)
-+{
-+ struct blktap_ring *ring = &tap->ring;
-+ struct blktap_request *request;
-+ int usr_idx;
-+
-+ for (usr_idx = 0; usr_idx < MAX_PENDING_REQS; usr_idx++) {
-+ request = ring->pending[usr_idx];
-+ if (!request)
-+ continue;
-+
-+ blktap_device_end_request(tap, request, -EIO);
-+ }
-+}
-+
-+static void
-+blktap_ring_vm_close(struct vm_area_struct *vma)
-+{
-+ struct blktap *tap = vma->vm_private_data;
-+ struct blktap_ring *ring = &tap->ring;
-+ struct page *page = virt_to_page(ring->ring.sring);
-+
-+ blktap_ring_fail_pending(tap);
-+
-+ zap_page_range(vma, vma->vm_start, PAGE_SIZE, NULL);
-+ ClearPageReserved(page);
-+ __free_page(page);
-+
-+ ring->vma = NULL;
-+
-+ if (test_bit(BLKTAP_SHUTDOWN_REQUESTED, &tap->dev_inuse))
-+ blktap_control_destroy_tap(tap);
-+}
-+
-+static struct vm_operations_struct blktap_ring_vm_operations = {
-+ .close = blktap_ring_vm_close,
-+ .fault = blktap_ring_fault,
-+};
-+
-+int
-+blktap_ring_map_segment(struct blktap *tap,
-+ struct blktap_request *request,
-+ int seg)
-+{
-+ struct blktap_ring *ring = &tap->ring;
-+ unsigned long uaddr;
-+
-+ uaddr = MMAP_VADDR(ring->user_vstart, request->usr_idx, seg);
-+ return vm_insert_page(ring->vma, uaddr, request->pages[seg]);
-+}
-+
-+int
-+blktap_ring_map_request(struct blktap *tap,
-+ struct blktap_request *request)
-+{
-+ int seg, err = 0;
-+ int write;
-+
-+ write = request->operation == BLKIF_OP_WRITE;
-+
-+ for (seg = 0; seg < request->nr_pages; seg++) {
-+ if (write)
-+ blktap_request_bounce(tap, request, seg, write);
-+
-+ err = blktap_ring_map_segment(tap, request, seg);
-+ if (err)
-+ break;
-+ }
-+
-+ if (err)
-+ blktap_ring_unmap_request(tap, request);
-+
-+ return err;
-+}
-+
-+void
-+blktap_ring_unmap_request(struct blktap *tap,
-+ struct blktap_request *request)
-+{
-+ struct blktap_ring *ring = &tap->ring;
-+ unsigned long uaddr;
-+ unsigned size;
-+ int seg, read;
-+
-+ uaddr = MMAP_VADDR(ring->user_vstart, request->usr_idx, 0);
-+ size = request->nr_pages << PAGE_SHIFT;
-+ read = request->operation == BLKIF_OP_READ;
-+
-+ if (read)
-+ for (seg = 0; seg < request->nr_pages; seg++)
-+ blktap_request_bounce(tap, request, seg, !read);
-+
-+ zap_page_range(ring->vma, uaddr, size, NULL);
-+}
-+
-+void
-+blktap_ring_free_request(struct blktap *tap,
-+ struct blktap_request *request)
-+{
-+ struct blktap_ring *ring = &tap->ring;
-+
-+ ring->pending[request->usr_idx] = NULL;
-+ ring->n_pending--;
-+
-+ blktap_request_free(tap, request);
-+}
-+
-+struct blktap_request*
-+blktap_ring_make_request(struct blktap *tap)
-+{
-+ struct blktap_ring *ring = &tap->ring;
-+ struct blktap_request *request;
-+ int usr_idx;
-+
-+ if (RING_FULL(&ring->ring))
-+ return ERR_PTR(-ENOSPC);
-+
-+ request = blktap_request_alloc(tap);
-+ if (!request)
-+ return ERR_PTR(-ENOMEM);
-+
-+ for (usr_idx = 0; usr_idx < BLK_RING_SIZE; usr_idx++)
-+ if (!ring->pending[usr_idx])
-+ break;
-+
-+ BUG_ON(usr_idx >= BLK_RING_SIZE);
-+
-+ request->tap = tap;
-+ request->usr_idx = usr_idx;
-+
-+ ring->pending[usr_idx] = request;
-+ ring->n_pending++;
-+
-+ return request;
-+}
-+
-+void
-+blktap_ring_submit_request(struct blktap *tap,
-+ struct blktap_request *request)
-+{
-+ struct blktap_ring *ring = &tap->ring;
-+ struct blkif_request *breq;
-+ struct scatterlist *sg;
-+ int i, nsecs = 0;
-+
-+ dev_dbg(ring->dev,
-+ "request %d [%p] submit\n", request->usr_idx, request);
-+
-+ breq = RING_GET_REQUEST(&ring->ring, ring->ring.req_prod_pvt);
-+
-+ breq->id = request->usr_idx;
-+ breq->sector_number = blk_rq_pos(request->rq);
-+ breq->handle = 0;
-+ breq->operation = request->operation;
-+ breq->nr_segments = request->nr_pages;
-+
-+ blktap_for_each_sg(sg, request, i) {
-+ struct blkif_request_segment *seg = &breq->seg[i];
-+ int first, count;
-+
-+ count = sg->length >> 9;
-+ first = sg->offset >> 9;
-+
-+ seg->first_sect = first;
-+ seg->last_sect = first + count - 1;
-+
-+ nsecs += count;
-+ }
-+
-+ ring->ring.req_prod_pvt++;
-+
-+ do_gettimeofday(&request->time);
-+
-+
-+ if (request->operation == BLKIF_OP_WRITE) {
-+ tap->stats.st_wr_sect += nsecs;
-+ tap->stats.st_wr_req++;
-+ }
-+
-+ if (request->operation == BLKIF_OP_READ) {
-+ tap->stats.st_rd_sect += nsecs;
-+ tap->stats.st_rd_req++;
-+ }
-+}
-+
-+static int
-+blktap_ring_open(struct inode *inode, struct file *filp)
-+{
-+ struct blktap *tap = NULL;
-+ int minor;
-+
-+ minor = iminor(inode);
-+
-+ if (minor < blktap_max_minor)
-+ tap = blktaps[minor];
-+
-+ if (!tap)
-+ return -ENXIO;
-+
-+ if (test_bit(BLKTAP_SHUTDOWN_REQUESTED, &tap->dev_inuse))
-+ return -ENXIO;
-+
-+ if (tap->ring.task)
-+ return -EBUSY;
-+
-+ filp->private_data = tap;
-+ tap->ring.task = current;
-+
-+ return 0;
-+}
-+
-+static int
-+blktap_ring_release(struct inode *inode, struct file *filp)
-+{
-+ struct blktap *tap = filp->private_data;
-+
-+ blktap_device_destroy_sync(tap);
-+
-+ tap->ring.task = NULL;
-+
-+ if (test_bit(BLKTAP_SHUTDOWN_REQUESTED, &tap->dev_inuse))
-+ blktap_control_destroy_tap(tap);
-+
-+ return 0;
-+}
-+
-+static int
-+blktap_ring_mmap(struct file *filp, struct vm_area_struct *vma)
-+{
-+ struct blktap *tap = filp->private_data;
-+ struct blktap_ring *ring = &tap->ring;
-+ struct blkif_sring *sring;
-+ struct page *page = NULL;
-+ int err;
-+
-+ if (ring->vma)
-+ return -EBUSY;
-+
-+ page = alloc_page(GFP_KERNEL|__GFP_ZERO);
-+ if (!page)
-+ return -ENOMEM;
-+
-+ SetPageReserved(page);
-+
-+ err = vm_insert_page(vma, vma->vm_start, page);
-+ if (err)
-+ goto fail;
-+
-+ sring = page_address(page);
-+ SHARED_RING_INIT(sring);
-+ FRONT_RING_INIT(&ring->ring, sring, PAGE_SIZE);
-+
-+ ring->ring_vstart = vma->vm_start;
-+ ring->user_vstart = ring->ring_vstart + PAGE_SIZE;
-+
-+ vma->vm_private_data = tap;
-+
-+ vma->vm_flags |= VM_DONTCOPY;
-+ vma->vm_flags |= VM_RESERVED;
-+
-+ vma->vm_ops = &blktap_ring_vm_operations;
-+
-+ ring->vma = vma;
-+ return 0;
-+
-+fail:
-+ if (page) {
-+ zap_page_range(vma, vma->vm_start, PAGE_SIZE, NULL);
-+ ClearPageReserved(page);
-+ __free_page(page);
-+ }
-+
-+ return err;
-+}
-+
-+static int
-+blktap_ring_ioctl(struct inode *inode, struct file *filp,
-+ unsigned int cmd, unsigned long arg)
-+{
-+ struct blktap *tap = filp->private_data;
-+ struct blktap_ring *ring = &tap->ring;
-+
-+ BTDBG("%d: cmd: %u, arg: %lu\n", tap->minor, cmd, arg);
-+
-+ if (!ring->vma || ring->vma->vm_mm != current->mm)
-+ return -EACCES;
-+
-+ switch(cmd) {
-+ case BLKTAP2_IOCTL_KICK_FE:
-+
-+ blktap_read_ring(tap);
-+ return 0;
-+
-+ case BLKTAP2_IOCTL_CREATE_DEVICE: {
-+ struct blktap_params params;
-+ void __user *ptr = (void *)arg;
-+
-+ if (!arg)
-+ return -EINVAL;
-+
-+ if (copy_from_user(&params, ptr, sizeof(params)))
-+ return -EFAULT;
-+
-+ return blktap_device_create(tap, &params);
-+ }
-+
-+ case BLKTAP2_IOCTL_REMOVE_DEVICE:
-+
-+ return blktap_device_destroy(tap);
-+ }
-+
-+ return -ENOIOCTLCMD;
-+}
-+
-+static unsigned int blktap_ring_poll(struct file *filp, poll_table *wait)
-+{
-+ struct blktap *tap = filp->private_data;
-+ struct blktap_ring *ring = &tap->ring;
-+ int work;
-+
-+ poll_wait(filp, &tap->pool->wait, wait);
-+ poll_wait(filp, &ring->poll_wait, wait);
-+
-+ down_read(&current->mm->mmap_sem);
-+ if (ring->vma && tap->device.gd)
-+ blktap_device_run_queue(tap);
-+ up_read(&current->mm->mmap_sem);
-+
-+ work = ring->ring.req_prod_pvt - ring->ring.sring->req_prod;
-+ RING_PUSH_REQUESTS(&ring->ring);
-+
-+ if (work ||
-+ ring->ring.sring->private.tapif_user.msg ||
-+ test_and_clear_bit(BLKTAP_DEVICE_CLOSED, &tap->dev_inuse))
-+ return POLLIN | POLLRDNORM;
-+
-+ return 0;
-+}
-+
-+static struct file_operations blktap_ring_file_operations = {
-+ .owner = THIS_MODULE,
-+ .open = blktap_ring_open,
-+ .release = blktap_ring_release,
-+ .ioctl = blktap_ring_ioctl,
-+ .mmap = blktap_ring_mmap,
-+ .poll = blktap_ring_poll,
-+};
-+
-+void
-+blktap_ring_kick_user(struct blktap *tap)
-+{
-+ wake_up(&tap->ring.poll_wait);
-+}
-+
-+int
-+blktap_ring_destroy(struct blktap *tap)
-+{
-+ struct blktap_ring *ring = &tap->ring;
-+
-+ if (ring->task || ring->vma)
-+ return -EBUSY;
-+
-+ return 0;
-+}
-+
-+int
-+blktap_ring_create(struct blktap *tap)
-+{
-+ struct blktap_ring *ring = &tap->ring;
-+
-+ init_waitqueue_head(&ring->poll_wait);
-+ ring->devno = MKDEV(blktap_ring_major, tap->minor);
-+
-+ return 0;
-+}
-+
-+size_t
-+blktap_ring_debug(struct blktap *tap, char *buf, size_t size)
-+{
-+ struct blktap_ring *ring = &tap->ring;
-+ char *s = buf, *end = buf + size;
-+ int usr_idx;
-+
-+ s += snprintf(s, end - s,
-+ "begin pending:%d\n", ring->n_pending);
-+
-+ for (usr_idx = 0; usr_idx < MAX_PENDING_REQS; usr_idx++) {
-+ struct blktap_request *request;
-+ struct timeval *time;
-+ int write;
-+
-+ request = ring->pending[usr_idx];
-+ if (!request)
-+ continue;
-+
-+ write = request->operation == BLKIF_OP_WRITE;
-+ time = &request->time;
-+
-+ s += snprintf(s, end - s,
-+ "%02d: usr_idx:%02d "
-+ "op:%c nr_pages:%02d time:%lu.%09lu\n",
-+ usr_idx, request->usr_idx,
-+ write ? 'W' : 'R', request->nr_pages,
-+ time->tv_sec, time->tv_usec);
-+ }
-+
-+ s += snprintf(s, end - s, "end pending\n");
-+
-+ return s - buf;
-+}
-+
-+
-+int __init
-+blktap_ring_init(void)
-+{
-+ dev_t dev = 0;
-+ int err;
-+
-+ cdev_init(&blktap_ring_cdev, &blktap_ring_file_operations);
-+ blktap_ring_cdev.owner = THIS_MODULE;
-+
-+ err = alloc_chrdev_region(&dev, 0, MAX_BLKTAP_DEVICE, "blktap2");
-+ if (err < 0) {
-+ BTERR("error registering ring devices: %d\n", err);
-+ return err;
-+ }
-+
-+ err = cdev_add(&blktap_ring_cdev, dev, MAX_BLKTAP_DEVICE);
-+ if (err) {
-+ BTERR("error adding ring device: %d\n", err);
-+ unregister_chrdev_region(dev, MAX_BLKTAP_DEVICE);
-+ return err;
-+ }
-+
-+ blktap_ring_major = MAJOR(dev);
-+ BTINFO("blktap ring major: %d\n", blktap_ring_major);
-+
-+ return 0;
-+}
-+
-+void
-+blktap_ring_exit(void)
-+{
-+ if (!blktap_ring_major)
-+ return;
-+
-+ cdev_del(&blktap_ring_cdev);
-+ unregister_chrdev_region(MKDEV(blktap_ring_major, 0),
-+ MAX_BLKTAP_DEVICE);
-+
-+ blktap_ring_major = 0;
-+}
-diff --git a/drivers/xen/blktap/sysfs.c b/drivers/xen/blktap/sysfs.c
-new file mode 100644
-index 0000000..3c424af
---- /dev/null
-+++ b/drivers/xen/blktap/sysfs.c
-@@ -0,0 +1,288 @@
-+#include <linux/types.h>
-+#include <linux/device.h>
-+#include <linux/module.h>
-+#include <linux/sched.h>
-+#include <linux/genhd.h>
-+#include <linux/blkdev.h>
-+
-+#include "blktap.h"
-+
-+int blktap_debug_level = 1;
-+
-+static struct class *class;
-+
-+static ssize_t
-+blktap_sysfs_set_name(struct device *dev, struct device_attribute *attr, const char *buf, size_t size)
-+{
-+ struct blktap *tap;
-+
-+ tap = dev_get_drvdata(dev);
-+ if (!tap)
-+ return 0;
-+
-+ if (size >= BLKTAP2_MAX_MESSAGE_LEN)
-+ return -ENAMETOOLONG;
-+
-+ if (strnlen(buf, size) != size)
-+ return -EINVAL;
-+
-+ strcpy(tap->name, buf);
-+
-+ return size;
-+}
-+
-+static ssize_t
-+blktap_sysfs_get_name(struct device *dev, struct device_attribute *attr, char *buf)
-+{
-+ struct blktap *tap;
-+ ssize_t size;
-+
-+ tap = dev_get_drvdata(dev);
-+ if (!tap)
-+ return 0;
-+
-+ if (tap->name[0])
-+ size = sprintf(buf, "%s\n", tap->name);
-+ else
-+ size = sprintf(buf, "%d\n", tap->minor);
-+
-+ return size;
-+}
-+static DEVICE_ATTR(name, S_IRUGO|S_IWUSR,
-+ blktap_sysfs_get_name, blktap_sysfs_set_name);
-+
-+static void
-+blktap_sysfs_remove_work(struct work_struct *work)
-+{
-+ struct blktap *tap
-+ = container_of(work, struct blktap, remove_work);
-+ blktap_control_destroy_tap(tap);
-+}
-+
-+static ssize_t
-+blktap_sysfs_remove_device(struct device *dev,
-+ struct device_attribute *attr,
-+ const char *buf, size_t size)
-+{
-+ struct blktap *tap;
-+ int err;
-+
-+ tap = dev_get_drvdata(dev);
-+ if (!tap)
-+ return size;
-+
-+ if (test_and_set_bit(BLKTAP_SHUTDOWN_REQUESTED, &tap->dev_inuse))
-+ goto wait;
-+
-+ if (tap->ring.vma) {
-+ struct blkif_sring *sring = tap->ring.ring.sring;
-+ sring->private.tapif_user.msg = BLKTAP2_RING_MESSAGE_CLOSE;
-+ blktap_ring_kick_user(tap);
-+ } else {
-+ INIT_WORK(&tap->remove_work, blktap_sysfs_remove_work);
-+ schedule_work(&tap->remove_work);
-+ }
-+wait:
-+ err = wait_event_interruptible(tap->remove_wait,
-+ !dev_get_drvdata(dev));
-+ if (err)
-+ return err;
-+
-+ return size;
-+}
-+static DEVICE_ATTR(remove, S_IWUSR, NULL, blktap_sysfs_remove_device);
-+
-+static ssize_t
-+blktap_sysfs_debug_device(struct device *dev, struct device_attribute *attr, char *buf)
-+{
-+ struct blktap *tap;
-+ char *s = buf, *end = buf + PAGE_SIZE;
-+
-+ tap = dev_get_drvdata(dev);
-+ if (!tap)
-+ return 0;
-+
-+ s += blktap_control_debug(tap, s, end - s);
-+
-+ s += blktap_request_debug(tap, s, end - s);
-+
-+ s += blktap_device_debug(tap, s, end - s);
-+
-+ s += blktap_ring_debug(tap, s, end - s);
-+
-+ return s - buf;
-+}
-+static DEVICE_ATTR(debug, S_IRUGO, blktap_sysfs_debug_device, NULL);
-+
-+static ssize_t
-+blktap_sysfs_show_task(struct device *dev, struct device_attribute *attr, char *buf)
-+{
-+ struct blktap *tap;
-+ ssize_t rv = 0;
-+
-+ tap = dev_get_drvdata(dev);
-+ if (!tap)
-+ return 0;
-+
-+ if (tap->ring.task)
-+ rv = sprintf(buf, "%d\n", tap->ring.task->pid);
-+
-+ return rv;
-+}
-+static DEVICE_ATTR(task, S_IRUGO, blktap_sysfs_show_task, NULL);
-+
-+static ssize_t
-+blktap_sysfs_show_pool(struct device *dev,
-+ struct device_attribute *attr,
-+ char *buf)
-+{
-+ struct blktap *tap = dev_get_drvdata(dev);
-+ return sprintf(buf, "%s", kobject_name(&tap->pool->kobj));
-+}
-+
-+static ssize_t
-+blktap_sysfs_store_pool(struct device *dev,
-+ struct device_attribute *attr,
-+ const char *buf, size_t size)
-+{
-+ struct blktap *tap = dev_get_drvdata(dev);
-+ struct blktap_page_pool *pool, *tmp = tap->pool;
-+
-+ if (tap->device.gd)
-+ return -EBUSY;
-+
-+ pool = blktap_page_pool_get(buf);
-+ if (IS_ERR(pool))
-+ return PTR_ERR(pool);
-+
-+ tap->pool = pool;
-+ kobject_put(&tmp->kobj);
-+
-+ return size;
-+}
-+DEVICE_ATTR(pool, S_IRUSR|S_IWUSR,
-+ blktap_sysfs_show_pool, blktap_sysfs_store_pool);
-+
-+int
-+blktap_sysfs_create(struct blktap *tap)
-+{
-+ struct blktap_ring *ring = &tap->ring;
-+ struct device *dev;
-+ int err = 0;
-+
-+ init_waitqueue_head(&tap->remove_wait);
-+
-+ dev = device_create(class, NULL, ring->devno,
-+ tap, "blktap%d", tap->minor);
-+ if (IS_ERR(dev))
-+ err = PTR_ERR(dev);
-+ if (!err)
-+ err = device_create_file(dev, &dev_attr_name);
-+ if (!err)
-+ err = device_create_file(dev, &dev_attr_remove);
-+ if (!err)
-+ err = device_create_file(dev, &dev_attr_debug);
-+ if (!err)
-+ err = device_create_file(dev, &dev_attr_task);
-+ if (!err)
-+ err = device_create_file(dev, &dev_attr_pool);
-+ if (!err)
-+ ring->dev = dev;
-+ else
-+ device_unregister(dev);
-+
-+ return err;
-+}
-+
-+void
-+blktap_sysfs_destroy(struct blktap *tap)
-+{
-+ struct blktap_ring *ring = &tap->ring;
-+ struct device *dev;
-+
-+ dev = ring->dev;
-+
-+ if (!dev)
-+ return;
-+
-+ dev_set_drvdata(dev, NULL);
-+ wake_up(&tap->remove_wait);
-+
-+ device_unregister(dev);
-+ ring->dev = NULL;
-+}
-+
-+static ssize_t
-+blktap_sysfs_show_verbosity(struct class *class, char *buf)
-+{
-+ return sprintf(buf, "%d\n", blktap_debug_level);
-+}
-+
-+static ssize_t
-+blktap_sysfs_set_verbosity(struct class *class, const char *buf, size_t size)
-+{
-+ int level;
-+
-+ if (sscanf(buf, "%d", &level) == 1) {
-+ blktap_debug_level = level;
-+ return size;
-+ }
-+
-+ return -EINVAL;
-+}
-+static CLASS_ATTR(verbosity, S_IRUGO|S_IWUSR,
-+ blktap_sysfs_show_verbosity, blktap_sysfs_set_verbosity);
-+
-+static ssize_t
-+blktap_sysfs_show_devices(struct class *class, char *buf)
-+{
-+ int i, ret;
-+ struct blktap *tap;
-+
-+ mutex_lock(&blktap_lock);
-+
-+ ret = 0;
-+ for (i = 0; i < blktap_max_minor; i++) {
-+ tap = blktaps[i];
-+ if (!tap)
-+ continue;
-+
-+ if (!test_bit(BLKTAP_DEVICE, &tap->dev_inuse))
-+ continue;
-+
-+ ret += sprintf(buf + ret, "%d %s\n", tap->minor, tap->name);
-+ }
-+
-+ mutex_unlock(&blktap_lock);
-+
-+ return ret;
-+}
-+static CLASS_ATTR(devices, S_IRUGO, blktap_sysfs_show_devices, NULL);
-+
-+void
-+blktap_sysfs_exit(void)
-+{
-+ if (class)
-+ class_destroy(class);
-+}
-+
-+int __init
-+blktap_sysfs_init(void)
-+{
-+ struct class *cls;
-+ int err = 0;
-+
-+ cls = class_create(THIS_MODULE, "blktap2");
-+ if (IS_ERR(cls))
-+ err = PTR_ERR(cls);
-+ if (!err)
-+ err = class_create_file(cls, &class_attr_verbosity);
-+ if (!err)
-+ err = class_create_file(cls, &class_attr_devices);
-+ if (!err)
-+ class = cls;
-+ else
-+ class_destroy(cls);
-+
-+ return err;
-+}
-diff --git a/drivers/xen/cpu_hotplug.c b/drivers/xen/cpu_hotplug.c
-index bdfd584..6625ffe 100644
---- a/drivers/xen/cpu_hotplug.c
-+++ b/drivers/xen/cpu_hotplug.c
-@@ -1,5 +1,6 @@
- #include <linux/notifier.h>
-
-+#include <xen/xen.h>
- #include <xen/xenbus.h>
-
- #include <asm/xen/hypervisor.h>
-diff --git a/drivers/xen/events.c b/drivers/xen/events.c
-index 1417015..ac7b42f 100644
---- a/drivers/xen/events.c
-+++ b/drivers/xen/events.c
-@@ -16,7 +16,7 @@
- * (typically dom0).
- * 2. VIRQs, typically used for timers. These are per-cpu events.
- * 3. IPIs.
-- * 4. Hardware interrupts. Not supported at present.
-+ * 4. PIRQs - Hardware interrupts.
- *
- * Jeremy Fitzhardinge <jeremy@xensource.com>, XenSource Inc, 2007
- */
-@@ -27,18 +27,32 @@
- #include <linux/module.h>
- #include <linux/string.h>
- #include <linux/bootmem.h>
-+#include <linux/irqnr.h>
-+#include <linux/pci_regs.h>
-+#include <linux/pci.h>
-+#include <linux/msi.h>
-
-+#include <asm/desc.h>
- #include <asm/ptrace.h>
- #include <asm/irq.h>
- #include <asm/idle.h>
-+#include <asm/io_apic.h>
- #include <asm/sync_bitops.h>
- #include <asm/xen/hypercall.h>
- #include <asm/xen/hypervisor.h>
-+#include <asm/xen/pci.h>
-
-+#include <xen/xen.h>
-+#include <xen/hvm.h>
- #include <xen/xen-ops.h>
- #include <xen/events.h>
- #include <xen/interface/xen.h>
- #include <xen/interface/event_channel.h>
-+#include <xen/interface/hvm/hvm_op.h>
-+#include <xen/interface/hvm/params.h>
-+#include <xen/page.h>
-+
-+#include "../pci/msi.h"
-
- /*
- * This lock protects updates to the following mapping and reference-count
-@@ -67,7 +81,7 @@ enum xen_irq_type {
- * event channel - irq->event channel mapping
- * cpu - cpu this event channel is bound to
- * index - type-specific information:
-- * PIRQ - vector, with MSB being "needs EIO"
-+ * PIRQ - with MSB being "needs EIO"
- * VIRQ - virq number
- * IPI - IPI vector
- * EVTCHN -
-@@ -83,20 +97,30 @@ struct irq_info
- enum ipi_vector ipi;
- struct {
- unsigned short gsi;
-- unsigned short vector;
-+ unsigned char vector;
-+ unsigned char flags;
-+ uint16_t domid;
- } pirq;
- } u;
- };
-+#define PIRQ_SHAREABLE (1 << 1)
-
--static struct irq_info irq_info[NR_IRQS];
-+/* Bitmap indicating which PIRQs require Xen to be notified on unmask. */
-+static bool pirq_eoi_does_unmask;
-+static unsigned long *pirq_needs_eoi_bits;
-
--static int evtchn_to_irq[NR_EVENT_CHANNELS] = {
-- [0 ... NR_EVENT_CHANNELS-1] = -1
--};
-+static struct irq_info *irq_info;
-+
-+static int *evtchn_to_irq;
- struct cpu_evtchn_s {
- unsigned long bits[NR_EVENT_CHANNELS/BITS_PER_LONG];
- };
--static struct cpu_evtchn_s *cpu_evtchn_mask_p;
-+
-+static __initdata struct cpu_evtchn_s init_evtchn_mask = {
-+ .bits[0 ... (NR_EVENT_CHANNELS/BITS_PER_LONG)-1] = ~0ul,
-+};
-+static struct cpu_evtchn_s *cpu_evtchn_mask_p = &init_evtchn_mask;
-+
- static inline unsigned long *cpu_evtchn_mask(int cpu)
- {
- return cpu_evtchn_mask_p[cpu].bits;
-@@ -107,6 +131,7 @@ static inline unsigned long *cpu_evtchn_mask(int cpu)
-
- static struct irq_chip xen_dynamic_chip;
- static struct irq_chip xen_percpu_chip;
-+static struct irq_chip xen_pirq_chip;
-
- /* Constructor for packed IRQ information. */
- static struct irq_info mk_unbound_info(void)
-@@ -136,7 +161,8 @@ static struct irq_info mk_pirq_info(unsigned short evtchn,
- unsigned short gsi, unsigned short vector)
- {
- return (struct irq_info) { .type = IRQT_PIRQ, .evtchn = evtchn,
-- .cpu = 0, .u.pirq = { .gsi = gsi, .vector = vector } };
-+ .cpu = 0, .u.pirq =
-+ { .gsi = gsi, .vector = vector, .domid = DOMID_SELF } };
- }
-
- /*
-@@ -219,6 +245,15 @@ static unsigned int cpu_from_evtchn(unsigned int evtchn)
- return ret;
- }
-
-+static bool pirq_needs_eoi(unsigned irq)
-+{
-+ struct irq_info *info = info_for_irq(irq);
-+
-+ BUG_ON(info->type != IRQT_PIRQ);
-+
-+ return test_bit(info->u.pirq.gsi, pirq_needs_eoi_bits);
-+}
-+
- static inline unsigned long active_evtchns(unsigned int cpu,
- struct shared_info *sh,
- unsigned int idx)
-@@ -237,17 +272,17 @@ static void bind_evtchn_to_cpu(unsigned int chn, unsigned int cpu)
- cpumask_copy(irq_to_desc(irq)->affinity, cpumask_of(cpu));
- #endif
-
-- __clear_bit(chn, cpu_evtchn_mask(cpu_from_irq(irq)));
-- __set_bit(chn, cpu_evtchn_mask(cpu));
-+ clear_bit(chn, cpu_evtchn_mask(cpu_from_irq(irq)));
-+ set_bit(chn, cpu_evtchn_mask(cpu));
-
- irq_info[irq].cpu = cpu;
- }
-
- static void init_evtchn_cpu_bindings(void)
- {
-+ int i;
- #ifdef CONFIG_SMP
- struct irq_desc *desc;
-- int i;
-
- /* By default all event channels notify CPU#0. */
- for_each_irq_desc(i, desc) {
-@@ -255,7 +290,9 @@ static void init_evtchn_cpu_bindings(void)
- }
- #endif
-
-- memset(cpu_evtchn_mask(0), ~0, sizeof(struct cpu_evtchn_s));
-+ for_each_possible_cpu(i)
-+ memset(cpu_evtchn_mask(i),
-+ (i == 0) ? ~0 : 0, sizeof(struct cpu_evtchn_s));
- }
-
- static inline void clear_evtchn(int port)
-@@ -300,6 +337,14 @@ static void mask_evtchn(int port)
- sync_set_bit(port, &s->evtchn_mask[0]);
- }
-
-+static void mask_irq(unsigned int irq)
-+{
-+ int evtchn = evtchn_from_irq(irq);
-+
-+ if (VALID_EVTCHN(evtchn))
-+ mask_evtchn(evtchn);
-+}
-+
- static void unmask_evtchn(int port)
- {
- struct shared_info *s = HYPERVISOR_shared_info;
-@@ -330,26 +375,370 @@ static void unmask_evtchn(int port)
- put_cpu();
- }
-
-+static void unmask_irq(unsigned int irq)
-+{
-+ int evtchn = evtchn_from_irq(irq);
-+
-+ if (VALID_EVTCHN(evtchn))
-+ unmask_evtchn(evtchn);
-+}
-+
-+static int get_nr_hw_irqs(void)
-+{
-+ int ret = 1;
-+
-+#ifdef CONFIG_X86_IO_APIC
-+ ret = get_nr_irqs_gsi();
-+#endif
-+
-+ return ret;
-+}
-+
- static int find_unbound_irq(void)
- {
- int irq;
- struct irq_desc *desc;
-+ int start = get_nr_hw_irqs();
-
-- for (irq = 0; irq < nr_irqs; irq++)
-+ if (start == nr_irqs)
-+ goto no_irqs;
-+
-+ /* nr_irqs is a magic value. Must not use it.*/
-+ for (irq = nr_irqs-1; irq > start; irq--) {
-+ desc = irq_to_desc(irq);
-+ /* only 0->15 have init'd desc; handle irq > 16 */
-+ if (desc == NULL)
-+ break;
-+ if (desc->chip == &no_irq_chip)
-+ break;
-+ if (desc->chip != &xen_dynamic_chip)
-+ continue;
- if (irq_info[irq].type == IRQT_UNBOUND)
- break;
-+ }
-
-- if (irq == nr_irqs)
-- panic("No available IRQ to bind to: increase nr_irqs!\n");
-+ if (irq == start)
-+ goto no_irqs;
-
-- desc = irq_to_desc_alloc_node(irq, 0);
-+ desc = irq_to_desc_alloc_node(irq, -1);
- if (WARN_ON(desc == NULL))
- return -1;
-
-- dynamic_irq_init(irq);
-+ dynamic_irq_init_keep_chip_data(irq);
-+
-+ return irq;
-+
-+no_irqs:
-+ panic("No available IRQ to bind to: increase nr_irqs!\n");
-+}
-+
-+static bool identity_mapped_irq(unsigned irq)
-+{
-+ /* identity map all the hardware irqs */
-+ return irq < get_nr_hw_irqs();
-+}
-+
-+static void pirq_eoi(unsigned int irq)
-+{
-+ struct irq_info *info = info_for_irq(irq);
-+ struct physdev_eoi eoi = { .irq = info->u.pirq.gsi };
-+ bool need_eoi;
-+
-+ need_eoi = pirq_needs_eoi(irq);
-+
-+ if (!need_eoi || !pirq_eoi_does_unmask)
-+ unmask_evtchn(info->evtchn);
-+
-+ if (need_eoi) {
-+ int rc = HYPERVISOR_physdev_op(PHYSDEVOP_eoi, &eoi);
-+ WARN_ON(rc);
-+ }
-+}
-+
-+static void pirq_query_unmask(int irq)
-+{
-+ struct physdev_irq_status_query irq_status;
-+ struct irq_info *info = info_for_irq(irq);
-+
-+ if (pirq_eoi_does_unmask)
-+ return;
-+
-+ BUG_ON(info->type != IRQT_PIRQ);
-+
-+ irq_status.irq = info->u.pirq.gsi;
-+ if (HYPERVISOR_physdev_op(PHYSDEVOP_irq_status_query, &irq_status))
-+ irq_status.flags = 0;
-+
-+ clear_bit(info->u.pirq.gsi, pirq_needs_eoi_bits);
-+ if (irq_status.flags & XENIRQSTAT_needs_eoi)
-+ set_bit(info->u.pirq.gsi, pirq_needs_eoi_bits);
-+}
-+
-+static bool probing_irq(int irq)
-+{
-+ struct irq_desc *desc = irq_to_desc(irq);
-+
-+ return desc && desc->action == NULL;
-+}
-+
-+static unsigned int startup_pirq(unsigned int irq)
-+{
-+ struct evtchn_bind_pirq bind_pirq;
-+ struct irq_info *info = info_for_irq(irq);
-+ int evtchn = evtchn_from_irq(irq);
-+ int rc;
-+
-+ BUG_ON(info->type != IRQT_PIRQ);
-+
-+ if (VALID_EVTCHN(evtchn))
-+ goto out;
-+
-+ bind_pirq.pirq = info->u.pirq.gsi;
-+ /* NB. We are happy to share unless we are probing. */
-+ bind_pirq.flags = info->u.pirq.flags & PIRQ_SHAREABLE ?
-+ BIND_PIRQ__WILL_SHARE : 0;
-+ rc = HYPERVISOR_event_channel_op(EVTCHNOP_bind_pirq, &bind_pirq);
-+ if (rc != 0) {
-+ if (!probing_irq(irq))
-+ printk(KERN_INFO "Failed to obtain physical IRQ %d" \
-+ " (GSI:%d)\n", irq, info->u.pirq.gsi);
-+ return 0;
-+ }
-+ evtchn = bind_pirq.port;
-+
-+ pirq_query_unmask(irq);
-+
-+ evtchn_to_irq[evtchn] = irq;
-+ bind_evtchn_to_cpu(evtchn, 0);
-+ info->evtchn = evtchn;
-+
-+ out:
-+ pirq_eoi(irq);
-+
-+ return 0;
-+}
-+
-+static void shutdown_pirq(unsigned int irq)
-+{
-+ struct evtchn_close close;
-+ struct irq_info *info = info_for_irq(irq);
-+ int evtchn = evtchn_from_irq(irq);
-+
-+ BUG_ON(info->type != IRQT_PIRQ);
-+
-+ if (!VALID_EVTCHN(evtchn))
-+ return;
-+
-+ mask_evtchn(evtchn);
-+
-+ close.port = evtchn;
-+ if (HYPERVISOR_event_channel_op(EVTCHNOP_close, &close) != 0)
-+ BUG();
-+
-+ bind_evtchn_to_cpu(evtchn, 0);
-+ evtchn_to_irq[evtchn] = -1;
-+ info->evtchn = 0;
-+}
-+
-+static void ack_pirq(unsigned int irq)
-+{
-+ move_masked_irq(irq);
-+
-+ pirq_eoi(irq);
-+}
-+
-+static void end_pirq(unsigned int irq)
-+{
-+ int evtchn = evtchn_from_irq(irq);
-+ struct irq_desc *desc = irq_to_desc(irq);
-+
-+ if (WARN_ON(!desc))
-+ return;
-+
-+ if ((desc->status & (IRQ_DISABLED|IRQ_PENDING)) ==
-+ (IRQ_DISABLED|IRQ_PENDING)) {
-+ shutdown_pirq(irq);
-+ } else if (VALID_EVTCHN(evtchn)) {
-+ pirq_eoi(irq);
-+ }
-+}
-+
-+static int find_irq_by_gsi(unsigned gsi)
-+{
-+ int irq;
-+
-+ for (irq = 0; irq < nr_irqs; irq++) {
-+ struct irq_info *info = info_for_irq(irq);
-+
-+ if (info == NULL || info->type != IRQT_PIRQ)
-+ continue;
-+
-+ if (gsi_from_irq(irq) == gsi)
-+ return irq;
-+ }
-+
-+ return -1;
-+}
-+
-+/*
-+ * Allocate a physical irq, along with a vector. We don't assign an
-+ * event channel until the irq actually started up. Return an
-+ * existing irq if we've already got one for the gsi.
-+ */
-+int xen_allocate_pirq(unsigned gsi, int shareable, char *name)
-+{
-+ int irq;
-+ struct physdev_irq irq_op;
-+
-+ spin_lock(&irq_mapping_update_lock);
-+
-+ irq = find_irq_by_gsi(gsi);
-+ if (irq != -1) {
-+ printk(KERN_INFO "xen_allocate_pirq: returning irq %d for gsi %u\n",
-+ irq, gsi);
-+ goto out; /* XXX need refcount? */
-+ }
-+
-+ /* If we are a PV guest, we don't have GSIs (no ACPI passed). Therefore
-+ * we are using the !xen_initial_domain() to drop in the function.*/
-+ if (identity_mapped_irq(gsi) || !xen_initial_domain()) {
-+ irq = gsi;
-+ irq_to_desc_alloc_node(irq, 0);
-+ dynamic_irq_init(irq);
-+ } else
-+ irq = find_unbound_irq();
-+
-+ set_irq_chip_and_handler_name(irq, &xen_pirq_chip,
-+ handle_fasteoi_irq, name);
-+
-+ irq_op.irq = gsi;
-+ irq_op.vector = 0;
-+
-+ /* Only the privileged domain can do this. For non-priv, the pcifront
-+ * driver provides a PCI bus that does the call to do exactly
-+ * this in the priv domain. */
-+ if (xen_initial_domain() &&
-+ HYPERVISOR_physdev_op(PHYSDEVOP_alloc_irq_vector, &irq_op)) {
-+ dynamic_irq_cleanup(irq);
-+ irq = -ENOSPC;
-+ goto out;
-+ }
-+
-+ irq_info[irq] = mk_pirq_info(0, gsi, irq_op.vector);
-+ irq_info[irq].u.pirq.flags |= shareable ? PIRQ_SHAREABLE : 0;
-+
-+out:
-+ spin_unlock(&irq_mapping_update_lock);
-+
-+ return irq;
-+}
-+
-+#ifdef CONFIG_PCI_MSI
-+int xen_destroy_irq(int irq)
-+{
-+ struct irq_desc *desc;
-+ struct physdev_unmap_pirq unmap_irq;
-+ struct irq_info *info = info_for_irq(irq);
-+ int rc = -ENOENT;
-+
-+ spin_lock(&irq_mapping_update_lock);
-+
-+ desc = irq_to_desc(irq);
-+ if (!desc)
-+ goto out;
-+
-+ if (xen_initial_domain()) {
-+ unmap_irq.pirq = info->u.pirq.gsi;
-+ unmap_irq.domid = info->u.pirq.domid;
-+ rc = HYPERVISOR_physdev_op(PHYSDEVOP_unmap_pirq, &unmap_irq);
-+ if (rc) {
-+ printk(KERN_WARNING "unmap irq failed %d\n", rc);
-+ goto out;
-+ }
-+ }
-+ irq_info[irq] = mk_unbound_info();
-+
-+ dynamic_irq_cleanup(irq);
-+
-+out:
-+ spin_unlock(&irq_mapping_update_lock);
-+ return rc;
-+}
-+
-+#ifdef CONFIG_PCI_XEN
-+int xen_create_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, int type)
-+{
-+ int irq = 0;
-+ struct physdev_map_pirq map_irq;
-+ int rc;
-+ domid_t domid;
-+ int pos;
-+ u32 table_offset, bir;
-+
-+ domid = rc = xen_find_device_domain_owner(dev);
-+ if (rc < 0)
-+ domid = DOMID_SELF;
-+
-+ memset(&map_irq, 0, sizeof(map_irq));
-+ map_irq.domid = domid;
-+ map_irq.type = MAP_PIRQ_TYPE_MSI;
-+ map_irq.index = -1;
-+ map_irq.pirq = -1;
-+ map_irq.bus = dev->bus->number;
-+ map_irq.devfn = dev->devfn;
-+
-+ if (type == PCI_CAP_ID_MSIX) {
-+ pos = pci_find_capability(dev, PCI_CAP_ID_MSIX);
-+
-+ pci_read_config_dword(dev, msix_table_offset_reg(pos),
-+ &table_offset);
-+ bir = (u8)(table_offset & PCI_MSIX_FLAGS_BIRMASK);
-+
-+ map_irq.table_base = pci_resource_start(dev, bir);
-+ map_irq.entry_nr = msidesc->msi_attrib.entry_nr;
-+ }
-+
-+ spin_lock(&irq_mapping_update_lock);
-+
-+ irq = find_unbound_irq();
-+
-+ if (irq == -1)
-+ goto out;
-+
-+ rc = HYPERVISOR_physdev_op(PHYSDEVOP_map_pirq, &map_irq);
-+ if (rc) {
-+ printk(KERN_WARNING "xen map irq failed %d\n", rc);
-+
-+ dynamic_irq_cleanup(irq);
-+
-+ irq = -1;
-+ goto out;
-+ }
-+ irq_info[irq] = mk_pirq_info(0, map_irq.pirq, map_irq.index);
-+ if (domid)
-+ irq_info[irq].u.pirq.domid = domid;
-+
-+ set_irq_chip_and_handler_name(irq, &xen_pirq_chip,
-+ handle_fasteoi_irq,
-+ (type == PCI_CAP_ID_MSIX) ? "msi-x":"msi");
-
-+out:
-+ spin_unlock(&irq_mapping_update_lock);
- return irq;
- }
-+#endif
-+#endif
-+
-+int xen_vector_from_irq(unsigned irq)
-+{
-+ return vector_from_irq(irq);
-+}
-+
-+int xen_gsi_from_irq(unsigned irq)
-+{
-+ return gsi_from_irq(irq);
-+}
-+EXPORT_SYMBOL_GPL(xen_gsi_from_irq);
-
- int bind_evtchn_to_irq(unsigned int evtchn)
- {
-@@ -363,7 +752,7 @@ int bind_evtchn_to_irq(unsigned int evtchn)
- irq = find_unbound_irq();
-
- set_irq_chip_and_handler_name(irq, &xen_dynamic_chip,
-- handle_edge_irq, "event");
-+ handle_fasteoi_irq, "event");
-
- evtchn_to_irq[evtchn] = irq;
- irq_info[irq] = mk_evtchn_info(evtchn);
-@@ -410,8 +799,23 @@ static int bind_ipi_to_irq(unsigned int ipi, unsigned int cpu)
- return irq;
- }
-
-+static int bind_interdomain_evtchn_to_irq(unsigned int remote_domain,
-+ unsigned int remote_port)
-+{
-+ struct evtchn_bind_interdomain bind_interdomain;
-+ int err;
-+
-+ bind_interdomain.remote_dom = remote_domain;
-+ bind_interdomain.remote_port = remote_port;
-+
-+ err = HYPERVISOR_event_channel_op(EVTCHNOP_bind_interdomain,
-+ &bind_interdomain);
-
--static int bind_virq_to_irq(unsigned int virq, unsigned int cpu)
-+ return err ? : bind_evtchn_to_irq(bind_interdomain.local_port);
-+}
-+
-+
-+int bind_virq_to_irq(unsigned int virq, unsigned int cpu)
- {
- struct evtchn_bind_virq bind_virq;
- int evtchn, irq;
-@@ -421,6 +825,11 @@ static int bind_virq_to_irq(unsigned int virq, unsigned int cpu)
- irq = per_cpu(virq_to_irq, cpu)[virq];
-
- if (irq == -1) {
-+ irq = find_unbound_irq();
-+
-+ set_irq_chip_and_handler_name(irq, &xen_percpu_chip,
-+ handle_percpu_irq, "virq");
-+
- bind_virq.virq = virq;
- bind_virq.vcpu = cpu;
- if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_virq,
-@@ -428,11 +837,6 @@ static int bind_virq_to_irq(unsigned int virq, unsigned int cpu)
- BUG();
- evtchn = bind_virq.port;
-
-- irq = find_unbound_irq();
--
-- set_irq_chip_and_handler_name(irq, &xen_percpu_chip,
-- handle_percpu_irq, "virq");
--
- evtchn_to_irq[evtchn] = irq;
- irq_info[irq] = mk_virq_info(evtchn, virq);
-
-@@ -505,6 +909,29 @@ int bind_evtchn_to_irqhandler(unsigned int evtchn,
- }
- EXPORT_SYMBOL_GPL(bind_evtchn_to_irqhandler);
-
-+int bind_interdomain_evtchn_to_irqhandler(unsigned int remote_domain,
-+ unsigned int remote_port,
-+ irq_handler_t handler,
-+ unsigned long irqflags,
-+ const char *devname,
-+ void *dev_id)
-+{
-+ int irq, retval;
-+
-+ irq = bind_interdomain_evtchn_to_irq(remote_domain, remote_port);
-+ if (irq < 0)
-+ return irq;
-+
-+ retval = request_irq(irq, handler, irqflags, devname, dev_id);
-+ if (retval != 0) {
-+ unbind_from_irq(irq);
-+ return retval;
-+ }
-+
-+ return irq;
-+}
-+EXPORT_SYMBOL_GPL(bind_interdomain_evtchn_to_irqhandler);
-+
- int bind_virq_to_irqhandler(unsigned int virq, unsigned int cpu,
- irq_handler_t handler,
- unsigned long irqflags, const char *devname, void *dev_id)
-@@ -564,41 +991,75 @@ irqreturn_t xen_debug_interrupt(int irq, void *dev_id)
- {
- struct shared_info *sh = HYPERVISOR_shared_info;
- int cpu = smp_processor_id();
-+ unsigned long *cpu_evtchn = cpu_evtchn_mask(cpu);
- int i;
- unsigned long flags;
- static DEFINE_SPINLOCK(debug_lock);
-+ struct vcpu_info *v;
-
- spin_lock_irqsave(&debug_lock, flags);
-
-- printk("vcpu %d\n ", cpu);
-+ printk("\nvcpu %d\n ", cpu);
-
- for_each_online_cpu(i) {
-- struct vcpu_info *v = per_cpu(xen_vcpu, i);
-- printk("%d: masked=%d pending=%d event_sel %08lx\n ", i,
-- (get_irq_regs() && i == cpu) ? xen_irqs_disabled(get_irq_regs()) : v->evtchn_upcall_mask,
-- v->evtchn_upcall_pending,
-- v->evtchn_pending_sel);
-- }
-- printk("pending:\n ");
-- for(i = ARRAY_SIZE(sh->evtchn_pending)-1; i >= 0; i--)
-- printk("%08lx%s", sh->evtchn_pending[i],
-- i % 8 == 0 ? "\n " : " ");
-- printk("\nmasks:\n ");
-- for(i = ARRAY_SIZE(sh->evtchn_mask)-1; i >= 0; i--)
-- printk("%08lx%s", sh->evtchn_mask[i],
-- i % 8 == 0 ? "\n " : " ");
--
-- printk("\nunmasked:\n ");
-- for(i = ARRAY_SIZE(sh->evtchn_mask)-1; i >= 0; i--)
-- printk("%08lx%s", sh->evtchn_pending[i] & ~sh->evtchn_mask[i],
-- i % 8 == 0 ? "\n " : " ");
-+ int pending;
-+ v = per_cpu(xen_vcpu, i);
-+ pending = (get_irq_regs() && i == cpu)
-+ ? xen_irqs_disabled(get_irq_regs())
-+ : v->evtchn_upcall_mask;
-+ printk("%d: masked=%d pending=%d event_sel %0*lx\n ", i,
-+ pending, v->evtchn_upcall_pending,
-+ (int)(sizeof(v->evtchn_pending_sel)*2),
-+ v->evtchn_pending_sel);
-+ }
-+ v = per_cpu(xen_vcpu, cpu);
-+
-+ printk("\npending:\n ");
-+ for (i = ARRAY_SIZE(sh->evtchn_pending)-1; i >= 0; i--)
-+ printk("%0*lx%s", (int)sizeof(sh->evtchn_pending[0])*2,
-+ sh->evtchn_pending[i],
-+ i % 8 == 0 ? "\n " : " ");
-+ printk("\nglobal mask:\n ");
-+ for (i = ARRAY_SIZE(sh->evtchn_mask)-1; i >= 0; i--)
-+ printk("%0*lx%s",
-+ (int)(sizeof(sh->evtchn_mask[0])*2),
-+ sh->evtchn_mask[i],
-+ i % 8 == 0 ? "\n " : " ");
-+
-+ printk("\nglobally unmasked:\n ");
-+ for (i = ARRAY_SIZE(sh->evtchn_mask)-1; i >= 0; i--)
-+ printk("%0*lx%s", (int)(sizeof(sh->evtchn_mask[0])*2),
-+ sh->evtchn_pending[i] & ~sh->evtchn_mask[i],
-+ i % 8 == 0 ? "\n " : " ");
-+
-+ printk("\nlocal cpu%d mask:\n ", cpu);
-+ for (i = (NR_EVENT_CHANNELS/BITS_PER_LONG)-1; i >= 0; i--)
-+ printk("%0*lx%s", (int)(sizeof(cpu_evtchn[0])*2),
-+ cpu_evtchn[i],
-+ i % 8 == 0 ? "\n " : " ");
-+
-+ printk("\nlocally unmasked:\n ");
-+ for (i = ARRAY_SIZE(sh->evtchn_mask)-1; i >= 0; i--) {
-+ unsigned long pending = sh->evtchn_pending[i]
-+ & ~sh->evtchn_mask[i]
-+ & cpu_evtchn[i];
-+ printk("%0*lx%s", (int)(sizeof(sh->evtchn_mask[0])*2),
-+ pending, i % 8 == 0 ? "\n " : " ");
-+ }
-
- printk("\npending list:\n");
-- for(i = 0; i < NR_EVENT_CHANNELS; i++) {
-+ for (i = 0; i < NR_EVENT_CHANNELS; i++) {
- if (sync_test_bit(i, sh->evtchn_pending)) {
-- printk(" %d: event %d -> irq %d\n",
-+ int word_idx = i / BITS_PER_LONG;
-+ printk(" %d: event %d -> irq %d%s%s%s\n",
- cpu_from_evtchn(i), i,
-- evtchn_to_irq[i]);
-+ evtchn_to_irq[i],
-+ sync_test_bit(word_idx, &v->evtchn_pending_sel)
-+ ? "" : " l2-clear",
-+ !sync_test_bit(i, sh->evtchn_mask)
-+ ? "" : " globally-masked",
-+ sync_test_bit(i, cpu_evtchn)
-+ ? "" : " locally-masked");
- }
- }
-
-@@ -618,17 +1079,13 @@ static DEFINE_PER_CPU(unsigned, xed_nesting_count);
- * a bitset of words which contain pending event bits. The second
- * level is a bitset of pending events themselves.
- */
--void xen_evtchn_do_upcall(struct pt_regs *regs)
-+static void __xen_evtchn_do_upcall(struct pt_regs *regs)
- {
- int cpu = get_cpu();
-- struct pt_regs *old_regs = set_irq_regs(regs);
- struct shared_info *s = HYPERVISOR_shared_info;
- struct vcpu_info *vcpu_info = __get_cpu_var(xen_vcpu);
- unsigned count;
-
-- exit_idle();
-- irq_enter();
--
- do {
- unsigned long pending_words;
-
-@@ -651,9 +1108,16 @@ void xen_evtchn_do_upcall(struct pt_regs *regs)
- int bit_idx = __ffs(pending_bits);
- int port = (word_idx * BITS_PER_LONG) + bit_idx;
- int irq = evtchn_to_irq[port];
-+ struct irq_desc *desc;
-
-- if (irq != -1)
-- handle_irq(irq, regs);
-+ mask_evtchn(port);
-+ clear_evtchn(port);
-+
-+ if (irq != -1) {
-+ desc = irq_to_desc(irq);
-+ if (desc)
-+ generic_handle_irq_desc(irq, desc);
-+ }
- }
- }
-
-@@ -661,14 +1125,32 @@ void xen_evtchn_do_upcall(struct pt_regs *regs)
-
- count = __get_cpu_var(xed_nesting_count);
- __get_cpu_var(xed_nesting_count) = 0;
-- } while(count != 1);
-+ } while (count != 1 || vcpu_info->evtchn_upcall_pending);
-
- out:
-+
-+ put_cpu();
-+}
-+
-+void xen_evtchn_do_upcall(struct pt_regs *regs)
-+{
-+ struct pt_regs *old_regs = set_irq_regs(regs);
-+
-+ exit_idle();
-+ irq_enter();
-+
-+ __xen_evtchn_do_upcall(regs);
-+
- irq_exit();
- set_irq_regs(old_regs);
-+}
-
-- put_cpu();
-+void xen_hvm_evtchn_do_upcall(void)
-+{
-+ struct pt_regs *regs = get_irq_regs();
-+ __xen_evtchn_do_upcall(regs);
- }
-+EXPORT_SYMBOL_GPL(xen_hvm_evtchn_do_upcall);
-
- /* Rebind a new event channel to an existing irq. */
- void rebind_evtchn_irq(int evtchn, int irq)
-@@ -705,7 +1187,10 @@ static int rebind_irq_to_cpu(unsigned irq, unsigned tcpu)
- struct evtchn_bind_vcpu bind_vcpu;
- int evtchn = evtchn_from_irq(irq);
-
-- if (!VALID_EVTCHN(evtchn))
-+ /* events delivered via platform PCI interrupts are always
-+ * routed to vcpu 0 */
-+ if (!VALID_EVTCHN(evtchn) ||
-+ (xen_hvm_domain() && !xen_have_vector_callback))
- return -1;
-
- /* Send future instances of this interrupt to other vcpu. */
-@@ -746,33 +1231,18 @@ int resend_irq_on_evtchn(unsigned int irq)
- return 1;
- }
-
--static void enable_dynirq(unsigned int irq)
--{
-- int evtchn = evtchn_from_irq(irq);
--
-- if (VALID_EVTCHN(evtchn))
-- unmask_evtchn(evtchn);
--}
--
--static void disable_dynirq(unsigned int irq)
--{
-- int evtchn = evtchn_from_irq(irq);
--
-- if (VALID_EVTCHN(evtchn))
-- mask_evtchn(evtchn);
--}
--
- static void ack_dynirq(unsigned int irq)
- {
- int evtchn = evtchn_from_irq(irq);
-+ struct irq_desc *desc = irq_to_desc(irq);
-
-- move_native_irq(irq);
-+ move_masked_irq(irq);
-
-- if (VALID_EVTCHN(evtchn))
-- clear_evtchn(evtchn);
-+ if (VALID_EVTCHN(evtchn) && !(desc->status & IRQ_DISABLED))
-+ unmask_evtchn(evtchn);
- }
-
--static int retrigger_dynirq(unsigned int irq)
-+static int retrigger_irq(unsigned int irq)
- {
- int evtchn = evtchn_from_irq(irq);
- struct shared_info *sh = HYPERVISOR_shared_info;
-@@ -814,9 +1284,6 @@ static void restore_cpu_virqs(unsigned int cpu)
- evtchn_to_irq[evtchn] = irq;
- irq_info[irq] = mk_virq_info(evtchn, virq);
- bind_evtchn_to_cpu(evtchn, cpu);
--
-- /* Ready for use. */
-- unmask_evtchn(evtchn);
- }
- }
-
-@@ -842,10 +1309,6 @@ static void restore_cpu_ipis(unsigned int cpu)
- evtchn_to_irq[evtchn] = irq;
- irq_info[irq] = mk_ipi_info(evtchn, ipi);
- bind_evtchn_to_cpu(evtchn, cpu);
--
-- /* Ready for use. */
-- unmask_evtchn(evtchn);
--
- }
- }
-
-@@ -857,7 +1320,7 @@ void xen_clear_irq_pending(int irq)
- if (VALID_EVTCHN(evtchn))
- clear_evtchn(evtchn);
- }
--
-+EXPORT_SYMBOL(xen_clear_irq_pending);
- void xen_set_irq_pending(int irq)
- {
- int evtchn = evtchn_from_irq(irq);
-@@ -877,9 +1340,9 @@ bool xen_test_irq_pending(int irq)
- return ret;
- }
-
--/* Poll waiting for an irq to become pending. In the usual case, the
-+/* Poll waiting for an irq to become pending with timeout. In the usual case, the
- irq will be disabled so it won't deliver an interrupt. */
--void xen_poll_irq(int irq)
-+void xen_poll_irq_timeout(int irq, u64 timeout)
- {
- evtchn_port_t evtchn = evtchn_from_irq(irq);
-
-@@ -887,17 +1350,38 @@ void xen_poll_irq(int irq)
- struct sched_poll poll;
-
- poll.nr_ports = 1;
-- poll.timeout = 0;
-+ poll.timeout = timeout;
- set_xen_guest_handle(poll.ports, &evtchn);
-
- if (HYPERVISOR_sched_op(SCHEDOP_poll, &poll) != 0)
- BUG();
- }
- }
-+EXPORT_SYMBOL(xen_poll_irq_timeout);
-+/* Poll waiting for an irq to become pending. In the usual case, the
-+ irq will be disabled so it won't deliver an interrupt. */
-+void xen_poll_irq(int irq)
-+{
-+ xen_poll_irq_timeout(irq, 0 /* no timeout */);
-+}
-+
-+/* Check whether the IRQ line is shared with other guests. */
-+int xen_ignore_irq(int irq)
-+{
-+ struct irq_info *info = info_for_irq(irq);
-+ struct physdev_irq_status_query irq_status = { .irq =
-+ info->u.pirq.gsi };
-+
-+ if (HYPERVISOR_physdev_op(PHYSDEVOP_irq_status_query, &irq_status))
-+ return 0;
-+ return !(irq_status.flags & XENIRQSTAT_shared);
-+}
-+EXPORT_SYMBOL_GPL(xen_ignore_irq);
-
- void xen_irq_resume(void)
- {
- unsigned int cpu, irq, evtchn;
-+ struct irq_desc *desc;
-
- init_evtchn_cpu_bindings();
-
-@@ -916,37 +1400,134 @@ void xen_irq_resume(void)
- restore_cpu_virqs(cpu);
- restore_cpu_ipis(cpu);
- }
-+
-+ /*
-+ * Unmask any IRQF_NO_SUSPEND IRQs which are enabled. These
-+ * are not handled by the IRQ core.
-+ */
-+ for_each_irq_desc(irq, desc) {
-+ if (!desc->action || !(desc->action->flags & IRQF_NO_SUSPEND))
-+ continue;
-+ if (desc->status & IRQ_DISABLED)
-+ continue;
-+
-+ evtchn = evtchn_from_irq(irq);
-+ if (evtchn == -1)
-+ continue;
-+
-+ unmask_evtchn(evtchn);
-+ }
-+
-+ if (pirq_eoi_does_unmask) {
-+ struct physdev_pirq_eoi_gmfn eoi_gmfn;
-+
-+ eoi_gmfn.gmfn = virt_to_mfn(pirq_needs_eoi_bits);
-+ if (HYPERVISOR_physdev_op(PHYSDEVOP_pirq_eoi_gmfn, &eoi_gmfn) != 0) {
-+ /* Could recover by reverting to old method...? */
-+ BUG();
-+ }
-+ }
- }
-
- static struct irq_chip xen_dynamic_chip __read_mostly = {
- .name = "xen-dyn",
-
-- .disable = disable_dynirq,
-- .mask = disable_dynirq,
-- .unmask = enable_dynirq,
-+ .disable = mask_irq,
-+ .mask = mask_irq,
-+ .unmask = unmask_irq,
-
-- .ack = ack_dynirq,
-+ .eoi = ack_dynirq,
- .set_affinity = set_affinity_irq,
-- .retrigger = retrigger_dynirq,
-+ .retrigger = retrigger_irq,
- };
-
- static struct irq_chip xen_percpu_chip __read_mostly = {
- .name = "xen-percpu",
-
-- .disable = disable_dynirq,
-- .mask = disable_dynirq,
-- .unmask = enable_dynirq,
-+ .disable = mask_irq,
-+ .mask = mask_irq,
-+ .unmask = unmask_irq,
-
- .ack = ack_dynirq,
- };
-
-+static struct irq_chip xen_pirq_chip __read_mostly = {
-+ .name = "xen-pirq",
-+
-+ .startup = startup_pirq,
-+ .shutdown = shutdown_pirq,
-+
-+ .enable = pirq_eoi,
-+ .unmask = unmask_irq,
-+
-+ .disable = mask_irq,
-+ .mask = mask_irq,
-+
-+ .eoi = ack_pirq,
-+ .end = end_pirq,
-+
-+ .set_affinity = set_affinity_irq,
-+
-+ .retrigger = retrigger_irq,
-+};
-+
-+int xen_set_callback_via(uint64_t via)
-+{
-+ struct xen_hvm_param a;
-+ a.domid = DOMID_SELF;
-+ a.index = HVM_PARAM_CALLBACK_IRQ;
-+ a.value = via;
-+ return HYPERVISOR_hvm_op(HVMOP_set_param, &a);
-+}
-+EXPORT_SYMBOL_GPL(xen_set_callback_via);
-+
-+#ifdef CONFIG_XEN_PVHVM
-+/* Vector callbacks are better than PCI interrupts to receive event
-+ * channel notifications because we can receive vector callbacks on any
-+ * vcpu and we don't need PCI support or APIC interactions. */
-+void xen_callback_vector(void)
-+{
-+ int rc;
-+ uint64_t callback_via;
-+ if (xen_have_vector_callback) {
-+ callback_via = HVM_CALLBACK_VECTOR(XEN_HVM_EVTCHN_CALLBACK);
-+ rc = xen_set_callback_via(callback_via);
-+ if (rc) {
-+ printk(KERN_ERR "Request for Xen HVM callback vector"
-+ " failed.\n");
-+ xen_have_vector_callback = 0;
-+ return;
-+ }
-+ printk(KERN_INFO "Xen HVM callback vector for event delivery is "
-+ "enabled\n");
-+ alloc_intr_gate(XEN_HVM_EVTCHN_CALLBACK, xen_hvm_callback_vector);
-+ }
-+}
-+#else
-+void xen_callback_vector(void) {}
-+#endif
-+
- void __init xen_init_IRQ(void)
- {
- int i;
-+ struct physdev_pirq_eoi_gmfn eoi_gmfn;
-+ int nr_pirqs = NR_IRQS;
-
- cpu_evtchn_mask_p = kcalloc(nr_cpu_ids, sizeof(struct cpu_evtchn_s),
- GFP_KERNEL);
-- BUG_ON(cpu_evtchn_mask_p == NULL);
-+ irq_info = kcalloc(nr_irqs, sizeof(*irq_info), GFP_KERNEL);
-+
-+ evtchn_to_irq = kcalloc(NR_EVENT_CHANNELS, sizeof(*evtchn_to_irq),
-+ GFP_KERNEL);
-+ for(i = 0; i < NR_EVENT_CHANNELS; i++)
-+ evtchn_to_irq[i] = -1;
-+
-+ i = get_order(sizeof(unsigned long) * BITS_TO_LONGS(nr_pirqs));
-+ pirq_needs_eoi_bits = (void *)__get_free_pages(GFP_KERNEL|__GFP_ZERO, i);
-+
-+ eoi_gmfn.gmfn = virt_to_mfn(pirq_needs_eoi_bits);
-+ if (HYPERVISOR_physdev_op(PHYSDEVOP_pirq_eoi_gmfn, &eoi_gmfn) == 0)
-+ pirq_eoi_does_unmask = true;
-
- init_evtchn_cpu_bindings();
-
-@@ -954,5 +1535,11 @@ void __init xen_init_IRQ(void)
- for (i = 0; i < NR_EVENT_CHANNELS; i++)
- mask_evtchn(i);
-
-- irq_ctx_init(smp_processor_id());
-+ if (xen_hvm_domain()) {
-+ xen_callback_vector();
-+ native_init_IRQ();
-+ } else {
-+ irq_ctx_init(smp_processor_id());
-+ xen_setup_pirqs();
-+ }
- }
-diff --git a/drivers/xen/evtchn.c b/drivers/xen/evtchn.c
-index 79bedba..b82666a 100644
---- a/drivers/xen/evtchn.c
-+++ b/drivers/xen/evtchn.c
-@@ -48,6 +48,8 @@
- #include <linux/gfp.h>
- #include <linux/mutex.h>
- #include <linux/cpu.h>
-+
-+#include <xen/xen.h>
- #include <xen/events.h>
- #include <xen/evtchn.h>
- #include <asm/xen/hypervisor.h>
-@@ -68,10 +70,36 @@ struct per_user_data {
- const char *name;
- };
-
--/* Who's bound to each port? */
--static struct per_user_data *port_user[NR_EVENT_CHANNELS];
-+/*
-+ * Who's bound to each port? This is logically an array of struct
-+ * per_user_data *, but we encode the current enabled-state in bit 0.
-+ */
-+static unsigned long *port_user;
- static DEFINE_SPINLOCK(port_user_lock); /* protects port_user[] and ring_prod */
-
-+static inline struct per_user_data *get_port_user(unsigned port)
-+{
-+ return (struct per_user_data *)(port_user[port] & ~1);
-+}
-+
-+static inline void set_port_user(unsigned port, struct per_user_data *u)
-+{
-+ port_user[port] = (unsigned long)u;
-+}
-+
-+static inline bool get_port_enabled(unsigned port)
-+{
-+ return port_user[port] & 1;
-+}
-+
-+static inline void set_port_enabled(unsigned port, bool enabled)
-+{
-+ if (enabled)
-+ port_user[port] |= 1;
-+ else
-+ port_user[port] &= ~1;
-+}
-+
- irqreturn_t evtchn_interrupt(int irq, void *data)
- {
- unsigned int port = (unsigned long)data;
-@@ -79,9 +107,14 @@ irqreturn_t evtchn_interrupt(int irq, void *data)
-
- spin_lock(&port_user_lock);
-
-- u = port_user[port];
-+ u = get_port_user(port);
-+
-+ WARN(!get_port_enabled(port),
-+ "Interrupt for port %d, but apparently not enabled; per-user %p\n",
-+ port, u);
-
- disable_irq_nosync(irq);
-+ set_port_enabled(port, false);
-
- if ((u->ring_prod - u->ring_cons) < EVTCHN_RING_SIZE) {
- u->ring[EVTCHN_RING_MASK(u->ring_prod)] = port;
-@@ -91,9 +124,8 @@ irqreturn_t evtchn_interrupt(int irq, void *data)
- kill_fasync(&u->evtchn_async_queue,
- SIGIO, POLL_IN);
- }
-- } else {
-+ } else
- u->ring_overflow = 1;
-- }
-
- spin_unlock(&port_user_lock);
-
-@@ -197,9 +229,18 @@ static ssize_t evtchn_write(struct file *file, const char __user *buf,
- goto out;
-
- spin_lock_irq(&port_user_lock);
-- for (i = 0; i < (count/sizeof(evtchn_port_t)); i++)
-- if ((kbuf[i] < NR_EVENT_CHANNELS) && (port_user[kbuf[i]] == u))
-- enable_irq(irq_from_evtchn(kbuf[i]));
-+
-+ for (i = 0; i < (count/sizeof(evtchn_port_t)); i++) {
-+ unsigned port = kbuf[i];
-+
-+ if (port < NR_EVENT_CHANNELS &&
-+ get_port_user(port) == u &&
-+ !get_port_enabled(port)) {
-+ set_port_enabled(port, true);
-+ enable_irq(irq_from_evtchn(port));
-+ }
-+ }
-+
- spin_unlock_irq(&port_user_lock);
-
- rc = count;
-@@ -221,8 +262,9 @@ static int evtchn_bind_to_user(struct per_user_data *u, int port)
- * interrupt handler yet, and our caller has already
- * serialized bind operations.)
- */
-- BUG_ON(port_user[port] != NULL);
-- port_user[port] = u;
-+ BUG_ON(get_port_user(port) != NULL);
-+ set_port_user(port, u);
-+ set_port_enabled(port, true); /* start enabled */
-
- rc = bind_evtchn_to_irqhandler(port, evtchn_interrupt, IRQF_DISABLED,
- u->name, (void *)(unsigned long)port);
-@@ -238,10 +280,7 @@ static void evtchn_unbind_from_user(struct per_user_data *u, int port)
-
- unbind_from_irqhandler(irq, (void *)(unsigned long)port);
-
-- /* make sure we unbind the irq handler before clearing the port */
-- barrier();
--
-- port_user[port] = NULL;
-+ set_port_user(port, NULL);
- }
-
- static long evtchn_ioctl(struct file *file,
-@@ -332,15 +371,17 @@ static long evtchn_ioctl(struct file *file,
- spin_lock_irq(&port_user_lock);
-
- rc = -ENOTCONN;
-- if (port_user[unbind.port] != u) {
-+ if (get_port_user(unbind.port) != u) {
- spin_unlock_irq(&port_user_lock);
- break;
- }
-
-- evtchn_unbind_from_user(u, unbind.port);
-+ disable_irq(irq_from_evtchn(unbind.port));
-
- spin_unlock_irq(&port_user_lock);
-
-+ evtchn_unbind_from_user(u, unbind.port);
-+
- rc = 0;
- break;
- }
-@@ -354,7 +395,7 @@ static long evtchn_ioctl(struct file *file,
-
- if (notify.port >= NR_EVENT_CHANNELS) {
- rc = -EINVAL;
-- } else if (port_user[notify.port] != u) {
-+ } else if (get_port_user(notify.port) != u) {
- rc = -ENOTCONN;
- } else {
- notify_remote_via_evtchn(notify.port);
-@@ -443,14 +484,21 @@ static int evtchn_release(struct inode *inode, struct file *filp)
- free_page((unsigned long)u->ring);
-
- for (i = 0; i < NR_EVENT_CHANNELS; i++) {
-- if (port_user[i] != u)
-+ if (get_port_user(i) != u)
- continue;
-
-- evtchn_unbind_from_user(port_user[i], i);
-+ disable_irq(irq_from_evtchn(i));
- }
-
- spin_unlock_irq(&port_user_lock);
-
-+ for (i = 0; i < NR_EVENT_CHANNELS; i++) {
-+ if (get_port_user(i) != u)
-+ continue;
-+
-+ evtchn_unbind_from_user(get_port_user(i), i);
-+ }
-+
- kfree(u->name);
- kfree(u);
-
-@@ -470,7 +518,7 @@ static const struct file_operations evtchn_fops = {
-
- static struct miscdevice evtchn_miscdev = {
- .minor = MISC_DYNAMIC_MINOR,
-- .name = "evtchn",
-+ .name = "xen/evtchn",
- .fops = &evtchn_fops,
- };
- static int __init evtchn_init(void)
-@@ -480,8 +528,11 @@ static int __init evtchn_init(void)
- if (!xen_domain())
- return -ENODEV;
-
-+ port_user = kcalloc(NR_EVENT_CHANNELS, sizeof(*port_user), GFP_KERNEL);
-+ if (port_user == NULL)
-+ return -ENOMEM;
-+
- spin_lock_init(&port_user_lock);
-- memset(port_user, 0, sizeof(port_user));
-
- /* Create '/dev/misc/evtchn'. */
- err = misc_register(&evtchn_miscdev);
-@@ -497,6 +548,9 @@ static int __init evtchn_init(void)
-
- static void __exit evtchn_cleanup(void)
- {
-+ kfree(port_user);
-+ port_user = NULL;
-+
- misc_deregister(&evtchn_miscdev);
- }
-
-diff --git a/drivers/xen/features.c b/drivers/xen/features.c
-index 99eda16..9e2b64f 100644
---- a/drivers/xen/features.c
-+++ b/drivers/xen/features.c
-@@ -18,7 +18,7 @@
- u8 xen_features[XENFEAT_NR_SUBMAPS * 32] __read_mostly;
- EXPORT_SYMBOL_GPL(xen_features);
-
--void xen_setup_features(void)
-+void __init xen_setup_features(void)
- {
- struct xen_feature_info fi;
- int i, j;
-diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c
-new file mode 100644
-index 0000000..a33e443
---- /dev/null
-+++ b/drivers/xen/gntdev.c
-@@ -0,0 +1,645 @@
-+/******************************************************************************
-+ * gntdev.c
-+ *
-+ * Device for accessing (in user-space) pages that have been granted by other
-+ * domains.
-+ *
-+ * Copyright (c) 2006-2007, D G Murray.
-+ * (c) 2009 Gerd Hoffmann <kraxel@redhat.com>
-+ *
-+ * This program is distributed in the hope that it will be useful,
-+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
-+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-+ * GNU General Public License for more details.
-+ *
-+ * You should have received a copy of the GNU General Public License
-+ * along with this program; if not, write to the Free Software
-+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
-+ */
-+
-+#include <linux/module.h>
-+#include <linux/kernel.h>
-+#include <linux/init.h>
-+#include <linux/miscdevice.h>
-+#include <linux/fs.h>
-+#include <linux/mm.h>
-+#include <linux/mman.h>
-+#include <linux/mmu_notifier.h>
-+#include <linux/types.h>
-+#include <linux/uaccess.h>
-+#include <linux/sched.h>
-+#include <linux/spinlock.h>
-+
-+#include <xen/xen.h>
-+#include <xen/grant_table.h>
-+#include <xen/gntdev.h>
-+#include <asm/xen/hypervisor.h>
-+#include <asm/xen/hypercall.h>
-+#include <asm/xen/page.h>
-+
-+MODULE_LICENSE("GPL");
-+MODULE_AUTHOR("Derek G. Murray <Derek.Murray@cl.cam.ac.uk>, "
-+ "Gerd Hoffmann <kraxel@redhat.com>");
-+MODULE_DESCRIPTION("User-space granted page access driver");
-+
-+static int debug = 0;
-+module_param(debug, int, 0644);
-+static int limit = 1024;
-+module_param(limit, int, 0644);
-+
-+struct gntdev_priv {
-+ struct list_head maps;
-+ uint32_t used;
-+ uint32_t limit;
-+ spinlock_t lock;
-+ struct mm_struct *mm;
-+ struct mmu_notifier mn;
-+};
-+
-+struct grant_map {
-+ struct list_head next;
-+ struct gntdev_priv *priv;
-+ struct vm_area_struct *vma;
-+ int index;
-+ int count;
-+ int flags;
-+ int is_mapped;
-+ struct ioctl_gntdev_grant_ref *grants;
-+ struct gnttab_map_grant_ref *map_ops;
-+ struct gnttab_unmap_grant_ref *unmap_ops;
-+};
-+
-+/* ------------------------------------------------------------------ */
-+
-+static void gntdev_print_maps(struct gntdev_priv *priv,
-+ char *text, int text_index)
-+{
-+ struct grant_map *map;
-+
-+ printk("%s: maps list (priv %p, usage %d/%d)\n",
-+ __FUNCTION__, priv, priv->used, priv->limit);
-+ list_for_each_entry(map, &priv->maps, next)
-+ printk(" index %2d, count %2d %s\n",
-+ map->index, map->count,
-+ map->index == text_index && text ? text : "");
-+}
-+
-+static struct grant_map *gntdev_alloc_map(struct gntdev_priv *priv, int count)
-+{
-+ struct grant_map *add;
-+
-+ add = kzalloc(sizeof(struct grant_map), GFP_KERNEL);
-+ if (NULL == add)
-+ return NULL;
-+
-+ add->grants = kzalloc(sizeof(add->grants[0]) * count, GFP_KERNEL);
-+ add->map_ops = kzalloc(sizeof(add->map_ops[0]) * count, GFP_KERNEL);
-+ add->unmap_ops = kzalloc(sizeof(add->unmap_ops[0]) * count, GFP_KERNEL);
-+ if (NULL == add->grants ||
-+ NULL == add->map_ops ||
-+ NULL == add->unmap_ops)
-+ goto err;
-+
-+ add->index = 0;
-+ add->count = count;
-+ add->priv = priv;
-+
-+ if (add->count + priv->used > priv->limit)
-+ goto err;
-+
-+ return add;
-+
-+err:
-+ kfree(add->grants);
-+ kfree(add->map_ops);
-+ kfree(add->unmap_ops);
-+ kfree(add);
-+ return NULL;
-+}
-+
-+static void gntdev_add_map(struct gntdev_priv *priv, struct grant_map *add)
-+{
-+ struct grant_map *map;
-+
-+ list_for_each_entry(map, &priv->maps, next) {
-+ if (add->index + add->count < map->index) {
-+ list_add_tail(&add->next, &map->next);
-+ goto done;
-+ }
-+ add->index = map->index + map->count;
-+ }
-+ list_add_tail(&add->next, &priv->maps);
-+
-+done:
-+ priv->used += add->count;
-+ if (debug)
-+ gntdev_print_maps(priv, "[new]", add->index);
-+}
-+
-+static struct grant_map *gntdev_find_map_index(struct gntdev_priv *priv, int index,
-+ int count)
-+{
-+ struct grant_map *map;
-+
-+ list_for_each_entry(map, &priv->maps, next) {
-+ if (map->index != index)
-+ continue;
-+ if (map->count != count)
-+ continue;
-+ return map;
-+ }
-+ return NULL;
-+}
-+
-+static struct grant_map *gntdev_find_map_vaddr(struct gntdev_priv *priv,
-+ unsigned long vaddr)
-+{
-+ struct grant_map *map;
-+
-+ list_for_each_entry(map, &priv->maps, next) {
-+ if (!map->vma)
-+ continue;
-+ if (vaddr < map->vma->vm_start)
-+ continue;
-+ if (vaddr >= map->vma->vm_end)
-+ continue;
-+ return map;
-+ }
-+ return NULL;
-+}
-+
-+static int gntdev_del_map(struct grant_map *map)
-+{
-+ int i;
-+
-+ if (map->vma)
-+ return -EBUSY;
-+ for (i = 0; i < map->count; i++)
-+ if (map->unmap_ops[i].handle)
-+ return -EBUSY;
-+
-+ map->priv->used -= map->count;
-+ list_del(&map->next);
-+ return 0;
-+}
-+
-+static void gntdev_free_map(struct grant_map *map)
-+{
-+ if (!map)
-+ return;
-+ kfree(map->grants);
-+ kfree(map->map_ops);
-+ kfree(map->unmap_ops);
-+ kfree(map);
-+}
-+
-+/* ------------------------------------------------------------------ */
-+
-+static int find_grant_ptes(pte_t *pte, pgtable_t token, unsigned long addr, void *data)
-+{
-+ struct grant_map *map = data;
-+ unsigned int pgnr = (addr - map->vma->vm_start) >> PAGE_SHIFT;
-+ u64 pte_maddr;
-+
-+ BUG_ON(pgnr >= map->count);
-+ pte_maddr = (u64)pfn_to_mfn(page_to_pfn(token)) << PAGE_SHIFT;
-+ pte_maddr += (unsigned long)pte & ~PAGE_MASK;
-+ gnttab_set_map_op(&map->map_ops[pgnr], pte_maddr, map->flags,
-+ map->grants[pgnr].ref,
-+ map->grants[pgnr].domid);
-+ gnttab_set_unmap_op(&map->unmap_ops[pgnr], pte_maddr, map->flags,
-+ 0 /* handle */);
-+ return 0;
-+}
-+
-+static int map_grant_pages(struct grant_map *map)
-+{
-+ int i, err = 0;
-+
-+ if (debug)
-+ printk("%s: map %d+%d\n", __FUNCTION__, map->index, map->count);
-+ err = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref,
-+ map->map_ops, map->count);
-+ if (WARN_ON(err))
-+ return err;
-+
-+ for (i = 0; i < map->count; i++) {
-+ if (map->map_ops[i].status)
-+ err = -EINVAL;
-+ map->unmap_ops[i].handle = map->map_ops[i].handle;
-+ }
-+ return err;
-+}
-+
-+static int unmap_grant_pages(struct grant_map *map, int offset, int pages)
-+{
-+ int i, err = 0;
-+
-+ if (debug)
-+ printk("%s: map %d+%d [%d+%d]\n", __FUNCTION__,
-+ map->index, map->count, offset, pages);
-+ err = HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref,
-+ map->unmap_ops + offset, pages);
-+ if (WARN_ON(err))
-+ return err;
-+
-+ for (i = 0; i < pages; i++) {
-+ if (map->unmap_ops[offset+i].status)
-+ err = -EINVAL;
-+ map->unmap_ops[offset+i].handle = 0;
-+ }
-+ return err;
-+}
-+
-+/* ------------------------------------------------------------------ */
-+
-+static void gntdev_vma_close(struct vm_area_struct *vma)
-+{
-+ struct grant_map *map = vma->vm_private_data;
-+
-+ if (debug)
-+ printk("%s\n", __FUNCTION__);
-+ map->is_mapped = 0;
-+ map->vma = NULL;
-+ vma->vm_private_data = NULL;
-+}
-+
-+static int gntdev_vma_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
-+{
-+ if (debug)
-+ printk("%s: vaddr %p, pgoff %ld (shouldn't happen)\n",
-+ __FUNCTION__, vmf->virtual_address, vmf->pgoff);
-+ vmf->flags = VM_FAULT_ERROR;
-+ return 0;
-+}
-+
-+static struct vm_operations_struct gntdev_vmops = {
-+ .close = gntdev_vma_close,
-+ .fault = gntdev_vma_fault,
-+};
-+
-+/* ------------------------------------------------------------------ */
-+
-+static void mn_invl_range_start(struct mmu_notifier *mn,
-+ struct mm_struct *mm,
-+ unsigned long start, unsigned long end)
-+{
-+ struct gntdev_priv *priv = container_of(mn, struct gntdev_priv, mn);
-+ struct grant_map *map;
-+ unsigned long mstart, mend;
-+ int err;
-+
-+ spin_lock(&priv->lock);
-+ list_for_each_entry(map, &priv->maps, next) {
-+ if (!map->vma)
-+ continue;
-+ if (!map->is_mapped)
-+ continue;
-+ if (map->vma->vm_start >= end)
-+ continue;
-+ if (map->vma->vm_end <= start)
-+ continue;
-+ mstart = max(start, map->vma->vm_start);
-+ mend = min(end, map->vma->vm_end);
-+ if (debug)
-+ printk("%s: map %d+%d (%lx %lx), range %lx %lx, mrange %lx %lx\n",
-+ __FUNCTION__, map->index, map->count,
-+ map->vma->vm_start, map->vma->vm_end,
-+ start, end, mstart, mend);
-+ err = unmap_grant_pages(map,
-+ (mstart - map->vma->vm_start) >> PAGE_SHIFT,
-+ (mend - mstart) >> PAGE_SHIFT);
-+ WARN_ON(err);
-+ }
-+ spin_unlock(&priv->lock);
-+}
-+
-+static void mn_invl_page(struct mmu_notifier *mn,
-+ struct mm_struct *mm,
-+ unsigned long address)
-+{
-+ mn_invl_range_start(mn, mm, address, address + PAGE_SIZE);
-+}
-+
-+static void mn_release(struct mmu_notifier *mn,
-+ struct mm_struct *mm)
-+{
-+ struct gntdev_priv *priv = container_of(mn, struct gntdev_priv, mn);
-+ struct grant_map *map;
-+ int err;
-+
-+ spin_lock(&priv->lock);
-+ list_for_each_entry(map, &priv->maps, next) {
-+ if (!map->vma)
-+ continue;
-+ if (debug)
-+ printk("%s: map %d+%d (%lx %lx)\n",
-+ __FUNCTION__, map->index, map->count,
-+ map->vma->vm_start, map->vma->vm_end);
-+ err = unmap_grant_pages(map, 0, map->count);
-+ WARN_ON(err);
-+ }
-+ spin_unlock(&priv->lock);
-+}
-+
-+struct mmu_notifier_ops gntdev_mmu_ops = {
-+ .release = mn_release,
-+ .invalidate_page = mn_invl_page,
-+ .invalidate_range_start = mn_invl_range_start,
-+};
-+
-+/* ------------------------------------------------------------------ */
-+
-+static int gntdev_open(struct inode *inode, struct file *flip)
-+{
-+ struct gntdev_priv *priv;
-+
-+ priv = kzalloc(sizeof(*priv), GFP_KERNEL);
-+ if (!priv)
-+ return -ENOMEM;
-+
-+ INIT_LIST_HEAD(&priv->maps);
-+ spin_lock_init(&priv->lock);
-+ priv->limit = limit;
-+
-+ priv->mm = get_task_mm(current);
-+ if (!priv->mm) {
-+ kfree(priv);
-+ return -ENOMEM;
-+ }
-+ priv->mn.ops = &gntdev_mmu_ops;
-+ mmu_notifier_register(&priv->mn, priv->mm);
-+ mmput(priv->mm);
-+
-+ flip->private_data = priv;
-+ if (debug)
-+ printk("%s: priv %p\n", __FUNCTION__, priv);
-+
-+ return 0;
-+}
-+
-+static int gntdev_release(struct inode *inode, struct file *flip)
-+{
-+ struct gntdev_priv *priv = flip->private_data;
-+ struct grant_map *map;
-+ int err;
-+
-+ if (debug)
-+ printk("%s: priv %p\n", __FUNCTION__, priv);
-+
-+ spin_lock(&priv->lock);
-+ while (!list_empty(&priv->maps)) {
-+ map = list_entry(priv->maps.next, struct grant_map, next);
-+ err = gntdev_del_map(map);
-+ if (WARN_ON(err))
-+ gntdev_free_map(map);
-+
-+ }
-+ spin_unlock(&priv->lock);
-+
-+ mmu_notifier_unregister(&priv->mn, priv->mm);
-+ kfree(priv);
-+ return 0;
-+}
-+
-+static long gntdev_ioctl_map_grant_ref(struct gntdev_priv *priv,
-+ struct ioctl_gntdev_map_grant_ref __user *u)
-+{
-+ struct ioctl_gntdev_map_grant_ref op;
-+ struct grant_map *map;
-+ int err;
-+
-+ if (copy_from_user(&op, u, sizeof(op)) != 0)
-+ return -EFAULT;
-+ if (debug)
-+ printk("%s: priv %p, add %d\n", __FUNCTION__, priv,
-+ op.count);
-+ if (unlikely(op.count <= 0))
-+ return -EINVAL;
-+ if (unlikely(op.count > priv->limit))
-+ return -EINVAL;
-+
-+ err = -ENOMEM;
-+ map = gntdev_alloc_map(priv, op.count);
-+ if (!map)
-+ return err;
-+ if (copy_from_user(map->grants, &u->refs,
-+ sizeof(map->grants[0]) * op.count) != 0) {
-+ gntdev_free_map(map);
-+ return err;
-+ }
-+
-+ spin_lock(&priv->lock);
-+ gntdev_add_map(priv, map);
-+ op.index = map->index << PAGE_SHIFT;
-+ spin_unlock(&priv->lock);
-+
-+ if (copy_to_user(u, &op, sizeof(op)) != 0) {
-+ spin_lock(&priv->lock);
-+ gntdev_del_map(map);
-+ spin_unlock(&priv->lock);
-+ gntdev_free_map(map);
-+ return err;
-+ }
-+ return 0;
-+}
-+
-+static long gntdev_ioctl_unmap_grant_ref(struct gntdev_priv *priv,
-+ struct ioctl_gntdev_unmap_grant_ref __user *u)
-+{
-+ struct ioctl_gntdev_unmap_grant_ref op;
-+ struct grant_map *map;
-+ int err = -EINVAL;
-+
-+ if (copy_from_user(&op, u, sizeof(op)) != 0)
-+ return -EFAULT;
-+ if (debug)
-+ printk("%s: priv %p, del %d+%d\n", __FUNCTION__, priv,
-+ (int)op.index, (int)op.count);
-+
-+ spin_lock(&priv->lock);
-+ map = gntdev_find_map_index(priv, op.index >> PAGE_SHIFT, op.count);
-+ if (map)
-+ err = gntdev_del_map(map);
-+ spin_unlock(&priv->lock);
-+ if (!err)
-+ gntdev_free_map(map);
-+ return err;
-+}
-+
-+static long gntdev_ioctl_get_offset_for_vaddr(struct gntdev_priv *priv,
-+ struct ioctl_gntdev_get_offset_for_vaddr __user *u)
-+{
-+ struct ioctl_gntdev_get_offset_for_vaddr op;
-+ struct grant_map *map;
-+
-+ if (copy_from_user(&op, u, sizeof(op)) != 0)
-+ return -EFAULT;
-+ if (debug)
-+ printk("%s: priv %p, offset for vaddr %lx\n", __FUNCTION__, priv,
-+ (unsigned long)op.vaddr);
-+
-+ spin_lock(&priv->lock);
-+ map = gntdev_find_map_vaddr(priv, op.vaddr);
-+ if (map == NULL ||
-+ map->vma->vm_start != op.vaddr) {
-+ spin_unlock(&priv->lock);
-+ return -EINVAL;
-+ }
-+ op.offset = map->index << PAGE_SHIFT;
-+ op.count = map->count;
-+ spin_unlock(&priv->lock);
-+
-+ if (copy_to_user(u, &op, sizeof(op)) != 0)
-+ return -EFAULT;
-+ return 0;
-+}
-+
-+static long gntdev_ioctl_set_max_grants(struct gntdev_priv *priv,
-+ struct ioctl_gntdev_set_max_grants __user *u)
-+{
-+ struct ioctl_gntdev_set_max_grants op;
-+
-+ if (copy_from_user(&op, u, sizeof(op)) != 0)
-+ return -EFAULT;
-+ if (debug)
-+ printk("%s: priv %p, limit %d\n", __FUNCTION__, priv, op.count);
-+ if (op.count > limit)
-+ return -EINVAL;
-+
-+ spin_lock(&priv->lock);
-+ priv->limit = op.count;
-+ spin_unlock(&priv->lock);
-+ return 0;
-+}
-+
-+static long gntdev_ioctl(struct file *flip,
-+ unsigned int cmd, unsigned long arg)
-+{
-+ struct gntdev_priv *priv = flip->private_data;
-+ void __user *ptr = (void __user *)arg;
-+
-+ switch (cmd) {
-+ case IOCTL_GNTDEV_MAP_GRANT_REF:
-+ return gntdev_ioctl_map_grant_ref(priv, ptr);
-+
-+ case IOCTL_GNTDEV_UNMAP_GRANT_REF:
-+ return gntdev_ioctl_unmap_grant_ref(priv, ptr);
-+
-+ case IOCTL_GNTDEV_GET_OFFSET_FOR_VADDR:
-+ return gntdev_ioctl_get_offset_for_vaddr(priv, ptr);
-+
-+ case IOCTL_GNTDEV_SET_MAX_GRANTS:
-+ return gntdev_ioctl_set_max_grants(priv, ptr);
-+
-+ default:
-+ if (debug)
-+ printk("%s: priv %p, unknown cmd %x\n",
-+ __FUNCTION__, priv, cmd);
-+ return -ENOIOCTLCMD;
-+ }
-+
-+ return 0;
-+}
-+
-+static int gntdev_mmap(struct file *flip, struct vm_area_struct *vma)
-+{
-+ struct gntdev_priv *priv = flip->private_data;
-+ int index = vma->vm_pgoff;
-+ int count = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
-+ struct grant_map *map;
-+ int err = -EINVAL;
-+
-+ if ((vma->vm_flags & VM_WRITE) && !(vma->vm_flags & VM_SHARED))
-+ return -EINVAL;
-+
-+ if (debug)
-+ printk("%s: map %d+%d at %lx (pgoff %lx)\n", __FUNCTION__,
-+ index, count, vma->vm_start, vma->vm_pgoff);
-+
-+ spin_lock(&priv->lock);
-+ map = gntdev_find_map_index(priv, index, count);
-+ if (!map)
-+ goto unlock_out;
-+ if (map->vma)
-+ goto unlock_out;
-+ if (priv->mm != vma->vm_mm) {
-+ printk("%s: Huh? Other mm?\n", __FUNCTION__);
-+ goto unlock_out;
-+ }
-+
-+ vma->vm_ops = &gntdev_vmops;
-+
-+ vma->vm_flags |= VM_RESERVED;
-+ vma->vm_flags |= VM_DONTCOPY;
-+ vma->vm_flags |= VM_DONTEXPAND;
-+
-+ vma->vm_private_data = map;
-+ map->vma = vma;
-+
-+ map->flags = GNTMAP_host_map | GNTMAP_application_map | GNTMAP_contains_pte;
-+ if (!(vma->vm_flags & VM_WRITE))
-+ map->flags |= GNTMAP_readonly;
-+
-+ err = apply_to_page_range(vma->vm_mm, vma->vm_start,
-+ vma->vm_end - vma->vm_start,
-+ find_grant_ptes, map);
-+ if (err) {
-+ goto unlock_out;
-+ if (debug)
-+ printk("%s: find_grant_ptes() failure.\n", __FUNCTION__);
-+ }
-+
-+ err = map_grant_pages(map);
-+ if (err) {
-+ goto unlock_out;
-+ if (debug)
-+ printk("%s: map_grant_pages() failure.\n", __FUNCTION__);
-+ }
-+ map->is_mapped = 1;
-+
-+unlock_out:
-+ spin_unlock(&priv->lock);
-+ return err;
-+}
-+
-+static const struct file_operations gntdev_fops = {
-+ .owner = THIS_MODULE,
-+ .open = gntdev_open,
-+ .release = gntdev_release,
-+ .mmap = gntdev_mmap,
-+ .unlocked_ioctl = gntdev_ioctl
-+};
-+
-+static struct miscdevice gntdev_miscdev = {
-+ .minor = MISC_DYNAMIC_MINOR,
-+ .name = "xen/gntdev",
-+ .fops = &gntdev_fops,
-+};
-+
-+/* ------------------------------------------------------------------ */
-+
-+static int __init gntdev_init(void)
-+{
-+ int err;
-+
-+ if (!xen_domain())
-+ return -ENODEV;
-+
-+ err = misc_register(&gntdev_miscdev);
-+ if (err != 0) {
-+ printk(KERN_ERR "Could not register gntdev device\n");
-+ return err;
-+ }
-+ return 0;
-+}
-+
-+static void __exit gntdev_exit(void)
-+{
-+ misc_deregister(&gntdev_miscdev);
-+}
-+
-+module_init(gntdev_init);
-+module_exit(gntdev_exit);
-+
-+/* ------------------------------------------------------------------ */
-diff --git a/drivers/xen/grant-table.c b/drivers/xen/grant-table.c
-index 7d8f531..09bb742 100644
---- a/drivers/xen/grant-table.c
-+++ b/drivers/xen/grant-table.c
-@@ -36,10 +36,13 @@
- #include <linux/mm.h>
- #include <linux/vmalloc.h>
- #include <linux/uaccess.h>
-+#include <linux/io.h>
-
-+#include <xen/xen.h>
- #include <xen/interface/xen.h>
- #include <xen/page.h>
- #include <xen/grant_table.h>
-+#include <xen/interface/memory.h>
- #include <asm/xen/hypercall.h>
-
- #include <asm/pgtable.h>
-@@ -57,6 +60,8 @@ static unsigned int boot_max_nr_grant_frames;
- static int gnttab_free_count;
- static grant_ref_t gnttab_free_head;
- static DEFINE_SPINLOCK(gnttab_list_lock);
-+unsigned long xen_hvm_resume_frames;
-+EXPORT_SYMBOL_GPL(xen_hvm_resume_frames);
-
- static struct grant_entry *shared;
-
-@@ -431,7 +436,7 @@ static unsigned int __max_nr_grant_frames(void)
- return query.max_nr_frames;
- }
-
--static inline unsigned int max_nr_grant_frames(void)
-+unsigned int gnttab_max_grant_frames(void)
- {
- unsigned int xen_max = __max_nr_grant_frames();
-
-@@ -439,6 +444,7 @@ static inline unsigned int max_nr_grant_frames(void)
- return boot_max_nr_grant_frames;
- return xen_max;
- }
-+EXPORT_SYMBOL_GPL(gnttab_max_grant_frames);
-
- static int gnttab_map(unsigned int start_idx, unsigned int end_idx)
- {
-@@ -447,6 +453,30 @@ static int gnttab_map(unsigned int start_idx, unsigned int end_idx)
- unsigned int nr_gframes = end_idx + 1;
- int rc;
-
-+ if (xen_hvm_domain()) {
-+ struct xen_add_to_physmap xatp;
-+ unsigned int i = end_idx;
-+ rc = 0;
-+ /*
-+ * Loop backwards, so that the first hypercall has the largest
-+ * index, ensuring that the table will grow only once.
-+ */
-+ do {
-+ xatp.domid = DOMID_SELF;
-+ xatp.idx = i;
-+ xatp.space = XENMAPSPACE_grant_table;
-+ xatp.gpfn = (xen_hvm_resume_frames >> PAGE_SHIFT) + i;
-+ rc = HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xatp);
-+ if (rc != 0) {
-+ printk(KERN_WARNING
-+ "grant table add_to_physmap failed, err=%d\n", rc);
-+ break;
-+ }
-+ } while (i-- > start_idx);
-+
-+ return rc;
-+ }
-+
- frames = kmalloc(nr_gframes * sizeof(unsigned long), GFP_ATOMIC);
- if (!frames)
- return -ENOMEM;
-@@ -463,7 +493,7 @@ static int gnttab_map(unsigned int start_idx, unsigned int end_idx)
-
- BUG_ON(rc || setup.status);
-
-- rc = arch_gnttab_map_shared(frames, nr_gframes, max_nr_grant_frames(),
-+ rc = arch_gnttab_map_shared(frames, nr_gframes, gnttab_max_grant_frames(),
- &shared);
- BUG_ON(rc);
-
-@@ -472,11 +502,127 @@ static int gnttab_map(unsigned int start_idx, unsigned int end_idx)
- return 0;
- }
-
-+static void gnttab_page_free(struct page *page, unsigned int order)
-+{
-+ BUG_ON(order);
-+ ClearPageForeign(page);
-+ gnttab_reset_grant_page(page);
-+ put_page(page);
-+}
-+
-+/*
-+ * Must not be called with IRQs off. This should only be used on the
-+ * slow path.
-+ *
-+ * Copy a foreign granted page to local memory.
-+ */
-+int gnttab_copy_grant_page(grant_ref_t ref, struct page **pagep)
-+{
-+ struct gnttab_unmap_and_replace unmap;
-+ struct mmu_update mmu;
-+ struct page *page;
-+ struct page *new_page;
-+ void *new_addr;
-+ void *addr;
-+ unsigned long pfn;
-+ unsigned long mfn;
-+ unsigned long new_mfn;
-+ int err;
-+
-+ page = *pagep;
-+ if (!get_page_unless_zero(page))
-+ return -ENOENT;
-+
-+ err = -ENOMEM;
-+ new_page = alloc_page(GFP_ATOMIC | __GFP_NOWARN);
-+ if (!new_page)
-+ goto out;
-+
-+ new_addr = page_address(new_page);
-+ addr = page_address(page);
-+ memcpy(new_addr, addr, PAGE_SIZE);
-+
-+ pfn = page_to_pfn(page);
-+ mfn = pfn_to_mfn(pfn);
-+ new_mfn = virt_to_mfn(new_addr);
-+
-+ /* Make seq visible before checking page_mapped. */
-+ smp_mb();
-+
-+ /* Has the page been DMA-mapped? */
-+ if (unlikely(page_mapped(page))) {
-+ put_page(new_page);
-+ err = -EBUSY;
-+ goto out;
-+ }
-+
-+ if (!xen_feature(XENFEAT_auto_translated_physmap))
-+ set_phys_to_machine(pfn, new_mfn);
-+
-+ unmap.host_addr = (unsigned long)addr;
-+ unmap.new_addr = (unsigned long)new_addr;
-+ unmap.handle = ref;
-+
-+ err = HYPERVISOR_grant_table_op(GNTTABOP_unmap_and_replace,
-+ &unmap, 1);
-+ BUG_ON(err);
-+ BUG_ON(unmap.status);
-+
-+ if (!xen_feature(XENFEAT_auto_translated_physmap)) {
-+ set_phys_to_machine(page_to_pfn(new_page), INVALID_P2M_ENTRY);
-+
-+ mmu.ptr = PFN_PHYS(new_mfn) | MMU_MACHPHYS_UPDATE;
-+ mmu.val = pfn;
-+ err = HYPERVISOR_mmu_update(&mmu, 1, NULL, DOMID_SELF);
-+ BUG_ON(err);
-+ }
-+
-+ new_page->mapping = page->mapping;
-+ SetPageForeign(new_page, _PageForeignDestructor(page));
-+ if (PageReserved(page))
-+ SetPageReserved(new_page);
-+ *pagep = new_page;
-+
-+ SetPageForeign(page, gnttab_page_free);
-+ ClearPageReserved(page);
-+ page->mapping = NULL;
-+
-+out:
-+ put_page(page);
-+ return err;
-+}
-+EXPORT_SYMBOL_GPL(gnttab_copy_grant_page);
-+
-+void gnttab_reset_grant_page(struct page *page)
-+{
-+ init_page_count(page);
-+ reset_page_mapcount(page);
-+}
-+EXPORT_SYMBOL_GPL(gnttab_reset_grant_page);
-+
- int gnttab_resume(void)
- {
-- if (max_nr_grant_frames() < nr_grant_frames)
-+ unsigned int max_nr_gframes;
-+
-+ max_nr_gframes = gnttab_max_grant_frames();
-+ if (max_nr_gframes < nr_grant_frames)
- return -ENOSYS;
-- return gnttab_map(0, nr_grant_frames - 1);
-+
-+ if (xen_pv_domain())
-+ return gnttab_map(0, nr_grant_frames - 1);
-+
-+ if (!shared) {
-+ shared = ioremap(xen_hvm_resume_frames, PAGE_SIZE * max_nr_gframes);
-+ if (shared == NULL) {
-+ printk(KERN_WARNING
-+ "Failed to ioremap gnttab share frames!");
-+ return -ENOMEM;
-+ }
-+ }
-+
-+ gnttab_map(0, nr_grant_frames - 1);
-+
-+ return 0;
- }
-
- int gnttab_suspend(void)
-@@ -493,7 +639,7 @@ static int gnttab_expand(unsigned int req_entries)
- cur = nr_grant_frames;
- extra = ((req_entries + (GREFS_PER_GRANT_FRAME-1)) /
- GREFS_PER_GRANT_FRAME);
-- if (cur + extra > max_nr_grant_frames())
-+ if (cur + extra > gnttab_max_grant_frames())
- return -ENOSPC;
-
- rc = gnttab_map(cur, cur + extra - 1);
-@@ -503,15 +649,12 @@ static int gnttab_expand(unsigned int req_entries)
- return rc;
- }
-
--static int __devinit gnttab_init(void)
-+int gnttab_init(void)
- {
- int i;
- unsigned int max_nr_glist_frames, nr_glist_frames;
- unsigned int nr_init_grefs;
-
-- if (!xen_domain())
-- return -ENODEV;
--
- nr_grant_frames = 1;
- boot_max_nr_grant_frames = __max_nr_grant_frames();
-
-@@ -554,5 +697,18 @@ static int __devinit gnttab_init(void)
- kfree(gnttab_list);
- return -ENOMEM;
- }
-+EXPORT_SYMBOL_GPL(gnttab_init);
-+
-+static int __devinit __gnttab_init(void)
-+{
-+ /* Delay grant-table initialization in the PV on HVM case */
-+ if (xen_hvm_domain())
-+ return 0;
-+
-+ if (!xen_pv_domain())
-+ return -ENODEV;
-+
-+ return gnttab_init();
-+}
-
--core_initcall(gnttab_init);
-+core_initcall(__gnttab_init);
-diff --git a/drivers/xen/manage.c b/drivers/xen/manage.c
-index 5d42d55..0b50906 100644
---- a/drivers/xen/manage.c
-+++ b/drivers/xen/manage.c
-@@ -8,6 +8,7 @@
- #include <linux/stop_machine.h>
- #include <linux/freezer.h>
-
-+#include <xen/xen.h>
- #include <xen/xenbus.h>
- #include <xen/grant_table.h>
- #include <xen/events.h>
-@@ -32,10 +33,30 @@ enum shutdown_state {
- static enum shutdown_state shutting_down = SHUTDOWN_INVALID;
-
- #ifdef CONFIG_PM_SLEEP
--static int xen_suspend(void *data)
-+static int xen_hvm_suspend(void *data)
- {
-+ struct sched_shutdown r = { .reason = SHUTDOWN_suspend };
- int *cancelled = data;
-+
-+ BUG_ON(!irqs_disabled());
-+
-+ *cancelled = HYPERVISOR_sched_op(SCHEDOP_shutdown, &r);
-+
-+ xen_hvm_post_suspend(*cancelled);
-+ gnttab_resume();
-+
-+ if (!*cancelled) {
-+ xen_irq_resume();
-+ xen_timer_resume();
-+ }
-+
-+ return 0;
-+}
-+
-+static int xen_suspend(void *data)
-+{
- int err;
-+ int *cancelled = data;
-
- BUG_ON(!irqs_disabled());
-
-@@ -111,7 +132,10 @@ static void do_suspend(void)
- goto out_resume;
- }
-
-- err = stop_machine(xen_suspend, &cancelled, cpumask_of(0));
-+ if (xen_hvm_domain())
-+ err = stop_machine(xen_hvm_suspend, &cancelled, cpumask_of(0));
-+ else
-+ err = stop_machine(xen_suspend, &cancelled, cpumask_of(0));
-
- dpm_resume_noirq(PMSG_RESUME);
-
-@@ -260,7 +284,19 @@ static int shutdown_event(struct notifier_block *notifier,
- return NOTIFY_DONE;
- }
-
--static int __init setup_shutdown_event(void)
-+static int __init __setup_shutdown_event(void)
-+{
-+ /* Delay initialization in the PV on HVM case */
-+ if (xen_hvm_domain())
-+ return 0;
-+
-+ if (!xen_pv_domain())
-+ return -ENODEV;
-+
-+ return xen_setup_shutdown_event();
-+}
-+
-+int xen_setup_shutdown_event(void)
- {
- static struct notifier_block xenstore_notifier = {
- .notifier_call = shutdown_event
-@@ -269,5 +305,6 @@ static int __init setup_shutdown_event(void)
-
- return 0;
- }
-+EXPORT_SYMBOL_GPL(xen_setup_shutdown_event);
-
--subsys_initcall(setup_shutdown_event);
-+subsys_initcall(__setup_shutdown_event);
-diff --git a/drivers/xen/mce.c b/drivers/xen/mce.c
-new file mode 100644
-index 0000000..da566a5
---- /dev/null
-+++ b/drivers/xen/mce.c
-@@ -0,0 +1,216 @@
-+/******************************************************************************
-+ * mce.c
-+ * Add Machine Check event Logging support in DOM0
-+ *
-+ * Driver for receiving and logging machine check event
-+ *
-+ * Copyright (c) 2008, 2009 Intel Corporation
-+ *
-+ * This program is free software; you can redistribute it and/or
-+ * modify it under the terms of the GNU General Public License version 2
-+ * as published by the Free Software Foundation; or, when distributed
-+ * separately from the Linux kernel or incorporated into other
-+ * software packages, subject to the following license:
-+ *
-+ * Permission is hereby granted, free of charge, to any person obtaining a copy
-+ * of this source file (the "Software"), to deal in the Software without
-+ * restriction, including without limitation the rights to use, copy, modify,
-+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
-+ * and to permit persons to whom the Software is furnished to do so, subject to
-+ * the following conditions:
-+ *
-+ * The above copyright notice and this permission notice shall be included in
-+ * all copies or substantial portions of the Software.
-+ *
-+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-+ * IN THE SOFTWARE.
-+ */
-+
-+#include <linux/module.h>
-+#include <linux/init.h>
-+#include <linux/types.h>
-+#include <linux/kernel.h>
-+#include <xen/interface/xen.h>
-+#include <asm/xen/hypervisor.h>
-+#include <xen/events.h>
-+#include <xen/interface/vcpu.h>
-+#include <asm/xen/hypercall.h>
-+#include <asm/mce.h>
-+#include <xen/xen.h>
-+
-+static mc_info_t *g_mi;
-+static mcinfo_logical_cpu_t *g_physinfo;
-+static uint32_t ncpus;
-+
-+static int convert_log(struct mc_info *mi)
-+{
-+ struct mcinfo_common *mic = NULL;
-+ struct mcinfo_global *mc_global;
-+ struct mcinfo_bank *mc_bank;
-+ struct mce m;
-+ int i, found = 0;
-+
-+ x86_mcinfo_lookup(&mic, mi, MC_TYPE_GLOBAL);
-+ WARN_ON(!mic);
-+
-+ mce_setup(&m);
-+ mc_global = (struct mcinfo_global *)mic;
-+ m.mcgstatus = mc_global->mc_gstatus;
-+ m.apicid = mc_global->mc_apicid;
-+ for (i = 0; i < ncpus; i++) {
-+ if (g_physinfo[i].mc_apicid == m.apicid) {
-+ found = 1;
-+ break;
-+ }
-+ }
-+ WARN_ON(!found);
-+
-+ m.socketid = g_physinfo[i].mc_chipid;
-+ m.cpu = m.extcpu = g_physinfo[i].mc_cpunr;
-+ m.cpuvendor = (__u8)g_physinfo[i].mc_vendor;
-+ m.mcgcap = g_physinfo[i].mc_msrvalues[0].value;
-+ x86_mcinfo_lookup(&mic, mi, MC_TYPE_BANK);
-+ do {
-+ if (mic == NULL || mic->size == 0)
-+ break;
-+ if (mic->type == MC_TYPE_BANK) {
-+ mc_bank = (struct mcinfo_bank *)mic;
-+ m.misc = mc_bank->mc_misc;
-+ m.status = mc_bank->mc_status;
-+ m.addr = mc_bank->mc_addr;
-+ m.tsc = mc_bank->mc_tsc;
-+ m.bank = mc_bank->mc_bank;
-+ m.finished = 1;
-+ /*log this record*/
-+ mce_log(&m);
-+ }
-+ mic = x86_mcinfo_next(mic);
-+ } while (1);
-+
-+ return 0;
-+}
-+
-+/*pv_ops domain mce virq handler, logging physical mce error info*/
-+static irqreturn_t mce_dom_interrupt(int irq, void *dev_id)
-+{
-+ xen_mc_t mc_op;
-+ int result = 0;
-+
-+ mc_op.cmd = XEN_MC_fetch;
-+ mc_op.interface_version = XEN_MCA_INTERFACE_VERSION;
-+ set_xen_guest_handle(mc_op.u.mc_fetch.data, g_mi);
-+urgent:
-+ mc_op.u.mc_fetch.flags = XEN_MC_URGENT;
-+ result = HYPERVISOR_mca(&mc_op);
-+ if (result || mc_op.u.mc_fetch.flags & XEN_MC_NODATA ||
-+ mc_op.u.mc_fetch.flags & XEN_MC_FETCHFAILED)
-+ goto nonurgent;
-+ else {
-+ result = convert_log(g_mi);
-+ if (result)
-+ goto end;
-+ /* After fetching the error event log entry from DOM0,
-+ * we need to dec the refcnt and release the entry.
-+ * The entry is reserved and inc refcnt when filling
-+ * the error log entry.
-+ */
-+ mc_op.u.mc_fetch.flags = XEN_MC_URGENT | XEN_MC_ACK;
-+ result = HYPERVISOR_mca(&mc_op);
-+ goto urgent;
-+ }
-+nonurgent:
-+ mc_op.u.mc_fetch.flags = XEN_MC_NONURGENT;
-+ result = HYPERVISOR_mca(&mc_op);
-+ if (result || mc_op.u.mc_fetch.flags & XEN_MC_NODATA ||
-+ mc_op.u.mc_fetch.flags & XEN_MC_FETCHFAILED)
-+ goto end;
-+ else {
-+ result = convert_log(g_mi);
-+ if (result)
-+ goto end;
-+ /* After fetching the error event log entry from DOM0,
-+ * we need to dec the refcnt and release the entry. The
-+ * entry is reserved and inc refcnt when filling the
-+ * error log entry.
-+ */
-+ mc_op.u.mc_fetch.flags = XEN_MC_NONURGENT | XEN_MC_ACK;
-+ result = HYPERVISOR_mca(&mc_op);
-+ goto nonurgent;
-+ }
-+end:
-+ return IRQ_HANDLED;
-+}
-+
-+static int bind_virq_for_mce(void)
-+{
-+ int ret;
-+ xen_mc_t mc_op;
-+
-+ g_mi = kmalloc(sizeof(struct mc_info), GFP_KERNEL);
-+
-+ if (!g_mi)
-+ return -ENOMEM;
-+
-+ /* Fetch physical CPU Numbers */
-+ mc_op.cmd = XEN_MC_physcpuinfo;
-+ mc_op.interface_version = XEN_MCA_INTERFACE_VERSION;
-+ set_xen_guest_handle(mc_op.u.mc_physcpuinfo.info, g_physinfo);
-+ ret = HYPERVISOR_mca(&mc_op);
-+ if (ret) {
-+ printk(KERN_ERR "MCE_DOM0_LOG: Fail to get physical CPU numbers\n");
-+ kfree(g_mi);
-+ return ret;
-+ }
-+
-+ /* Fetch each CPU Physical Info for later reference*/
-+ ncpus = mc_op.u.mc_physcpuinfo.ncpus;
-+ g_physinfo = kmalloc(sizeof(struct mcinfo_logical_cpu)*ncpus,
-+ GFP_KERNEL);
-+ if (!g_physinfo) {
-+ kfree(g_mi);
-+ return -ENOMEM;
-+ }
-+ set_xen_guest_handle(mc_op.u.mc_physcpuinfo.info, g_physinfo);
-+ ret = HYPERVISOR_mca(&mc_op);
-+ if (ret) {
-+ printk(KERN_ERR "MCE_DOM0_LOG: Fail to get physical CPUs info\n");
-+ kfree(g_mi);
-+ kfree(g_physinfo);
-+ return ret;
-+ }
-+
-+ ret = bind_virq_to_irqhandler(VIRQ_MCA, 0,
-+ mce_dom_interrupt, 0, "mce", NULL);
-+
-+ if (ret < 0) {
-+ printk(KERN_ERR "MCE_DOM0_LOG: bind_virq for DOM0 failed\n");
-+ return ret;
-+ }
-+
-+ return 0;
-+}
-+
-+static int __init mcelog_init(void)
-+{
-+ /* Only DOM0 is responsible for MCE logging */
-+ if (xen_initial_domain())
-+ return bind_virq_for_mce();
-+
-+ return 0;
-+}
-+
-+
-+static void __exit mcelog_cleanup(void)
-+{
-+ kfree(g_mi);
-+ kfree(g_physinfo);
-+}
-+module_init(mcelog_init);
-+module_exit(mcelog_cleanup);
-+
-+MODULE_LICENSE("GPL");
-diff --git a/drivers/xen/netback/Makefile b/drivers/xen/netback/Makefile
-new file mode 100644
-index 0000000..e346e81
---- /dev/null
-+++ b/drivers/xen/netback/Makefile
-@@ -0,0 +1,3 @@
-+obj-$(CONFIG_XEN_NETDEV_BACKEND) := xen-netback.o
-+
-+xen-netback-y := netback.o xenbus.o interface.o
-diff --git a/drivers/xen/netback/common.h b/drivers/xen/netback/common.h
-new file mode 100644
-index 0000000..feacf5f
---- /dev/null
-+++ b/drivers/xen/netback/common.h
-@@ -0,0 +1,329 @@
-+/******************************************************************************
-+ * arch/xen/drivers/netif/backend/common.h
-+ *
-+ * This program is free software; you can redistribute it and/or
-+ * modify it under the terms of the GNU General Public License version 2
-+ * as published by the Free Software Foundation; or, when distributed
-+ * separately from the Linux kernel or incorporated into other
-+ * software packages, subject to the following license:
-+ *
-+ * Permission is hereby granted, free of charge, to any person obtaining a copy
-+ * of this source file (the "Software"), to deal in the Software without
-+ * restriction, including without limitation the rights to use, copy, modify,
-+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
-+ * and to permit persons to whom the Software is furnished to do so, subject to
-+ * the following conditions:
-+ *
-+ * The above copyright notice and this permission notice shall be included in
-+ * all copies or substantial portions of the Software.
-+ *
-+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-+ * IN THE SOFTWARE.
-+ */
-+
-+#ifndef __NETIF__BACKEND__COMMON_H__
-+#define __NETIF__BACKEND__COMMON_H__
-+
-+#include <linux/version.h>
-+#include <linux/module.h>
-+#include <linux/interrupt.h>
-+#include <linux/slab.h>
-+#include <linux/ip.h>
-+#include <linux/in.h>
-+#include <linux/netdevice.h>
-+#include <linux/etherdevice.h>
-+#include <linux/wait.h>
-+#include <linux/sched.h>
-+
-+#include <xen/interface/io/netif.h>
-+#include <asm/io.h>
-+#include <asm/pgalloc.h>
-+#include <xen/interface/grant_table.h>
-+#include <xen/grant_table.h>
-+#include <xen/xenbus.h>
-+
-+#define DPRINTK(_f, _a...) \
-+ pr_debug("(file=%s, line=%d) " _f, \
-+ __FILE__ , __LINE__ , ## _a )
-+#define IPRINTK(fmt, args...) \
-+ printk(KERN_INFO "xen_net: " fmt, ##args)
-+#define WPRINTK(fmt, args...) \
-+ printk(KERN_WARNING "xen_net: " fmt, ##args)
-+
-+struct xen_netif {
-+ /* Unique identifier for this interface. */
-+ domid_t domid;
-+ int group;
-+ unsigned int handle;
-+
-+ u8 fe_dev_addr[6];
-+
-+ /* Physical parameters of the comms window. */
-+ grant_handle_t tx_shmem_handle;
-+ grant_ref_t tx_shmem_ref;
-+ grant_handle_t rx_shmem_handle;
-+ grant_ref_t rx_shmem_ref;
-+ unsigned int irq;
-+
-+ /* The shared rings and indexes. */
-+ struct xen_netif_tx_back_ring tx;
-+ struct xen_netif_rx_back_ring rx;
-+ struct vm_struct *tx_comms_area;
-+ struct vm_struct *rx_comms_area;
-+
-+ /* Flags that must not be set in dev->features */
-+ int features_disabled;
-+
-+ /* Frontend feature information. */
-+ u8 can_sg:1;
-+ u8 gso:1;
-+ u8 gso_prefix:1;
-+ u8 csum:1;
-+ u8 smart_poll:1;
-+
-+ /* Internal feature information. */
-+ u8 can_queue:1; /* can queue packets for receiver? */
-+
-+ /* Allow netif_be_start_xmit() to peek ahead in the rx request
-+ * ring. This is a prediction of what rx_req_cons will be once
-+ * all queued skbs are put on the ring. */
-+ RING_IDX rx_req_cons_peek;
-+
-+ /* Transmit shaping: allow 'credit_bytes' every 'credit_usec'. */
-+ unsigned long credit_bytes;
-+ unsigned long credit_usec;
-+ unsigned long remaining_credit;
-+ struct timer_list credit_timeout;
-+
-+ /* Enforce draining of the transmit queue. */
-+ struct timer_list tx_queue_timeout;
-+
-+ /* Statistics */
-+ int nr_copied_skbs;
-+
-+ /* Miscellaneous private stuff. */
-+ struct list_head list; /* scheduling list */
-+ atomic_t refcnt;
-+ struct net_device *dev;
-+ struct net_device_stats stats;
-+
-+ unsigned int carrier;
-+
-+ wait_queue_head_t waiting_to_free;
-+};
-+
-+/*
-+ * Implement our own carrier flag: the network stack's version causes delays
-+ * when the carrier is re-enabled (in particular, dev_activate() may not
-+ * immediately be called, which can cause packet loss; also the etherbridge
-+ * can be rather lazy in activating its port).
-+ */
-+#define netback_carrier_on(netif) ((netif)->carrier = 1)
-+#define netback_carrier_off(netif) ((netif)->carrier = 0)
-+#define netback_carrier_ok(netif) ((netif)->carrier)
-+
-+enum {
-+ NETBK_DONT_COPY_SKB,
-+ NETBK_DELAYED_COPY_SKB,
-+ NETBK_ALWAYS_COPY_SKB,
-+};
-+
-+extern int netbk_copy_skb_mode;
-+
-+/* Function pointers into netback accelerator plugin modules */
-+struct netback_accel_hooks {
-+ struct module *owner;
-+ int (*probe)(struct xenbus_device *dev);
-+ int (*remove)(struct xenbus_device *dev);
-+};
-+
-+/* Structure to track the state of a netback accelerator plugin */
-+struct netback_accelerator {
-+ struct list_head link;
-+ int id;
-+ char *eth_name;
-+ atomic_t use_count;
-+ struct netback_accel_hooks *hooks;
-+};
-+
-+struct backend_info {
-+ struct xenbus_device *dev;
-+ struct xen_netif *netif;
-+ enum xenbus_state frontend_state;
-+ struct xenbus_watch hotplug_status_watch;
-+ int have_hotplug_status_watch:1;
-+
-+ /* State relating to the netback accelerator */
-+ void *netback_accel_priv;
-+ /* The accelerator that this backend is currently using */
-+ struct netback_accelerator *accelerator;
-+};
-+
-+#define NETBACK_ACCEL_VERSION 0x00010001
-+
-+/*
-+ * Connect an accelerator plugin module to netback. Returns zero on
-+ * success, < 0 on error, > 0 (with highest version number supported)
-+ * if version mismatch.
-+ */
-+extern int netback_connect_accelerator(unsigned version,
-+ int id, const char *eth_name,
-+ struct netback_accel_hooks *hooks);
-+/* Disconnect a previously connected accelerator plugin module */
-+extern void netback_disconnect_accelerator(int id, const char *eth_name);
-+
-+
-+extern
-+void netback_probe_accelerators(struct backend_info *be,
-+ struct xenbus_device *dev);
-+extern
-+void netback_remove_accelerators(struct backend_info *be,
-+ struct xenbus_device *dev);
-+extern
-+void netif_accel_init(void);
-+
-+
-+#define NET_TX_RING_SIZE __RING_SIZE((struct xen_netif_tx_sring *)0, PAGE_SIZE)
-+#define NET_RX_RING_SIZE __RING_SIZE((struct xen_netif_rx_sring *)0, PAGE_SIZE)
-+
-+void netif_disconnect(struct xen_netif *netif);
-+
-+void netif_set_features(struct xen_netif *netif);
-+struct xen_netif *netif_alloc(struct device *parent, domid_t domid, unsigned int handle);
-+int netif_map(struct xen_netif *netif, unsigned long tx_ring_ref,
-+ unsigned long rx_ring_ref, unsigned int evtchn);
-+
-+static inline void netif_get(struct xen_netif *netif)
-+{
-+ atomic_inc(&netif->refcnt);
-+}
-+
-+static inline void netif_put(struct xen_netif *netif)
-+{
-+ if (atomic_dec_and_test(&netif->refcnt))
-+ wake_up(&netif->waiting_to_free);
-+}
-+
-+int netif_xenbus_init(void);
-+
-+#define netif_schedulable(netif) \
-+ (netif_running((netif)->dev) && netback_carrier_ok(netif))
-+
-+void netif_schedule_work(struct xen_netif *netif);
-+void netif_deschedule_work(struct xen_netif *netif);
-+
-+int netif_be_start_xmit(struct sk_buff *skb, struct net_device *dev);
-+struct net_device_stats *netif_be_get_stats(struct net_device *dev);
-+irqreturn_t netif_be_int(int irq, void *dev_id);
-+
-+static inline int netbk_can_queue(struct net_device *dev)
-+{
-+ struct xen_netif *netif = netdev_priv(dev);
-+ return netif->can_queue;
-+}
-+
-+static inline int netbk_can_sg(struct net_device *dev)
-+{
-+ struct xen_netif *netif = netdev_priv(dev);
-+ return netif->can_sg;
-+}
-+
-+struct pending_tx_info {
-+ struct xen_netif_tx_request req;
-+ struct xen_netif *netif;
-+};
-+typedef unsigned int pending_ring_idx_t;
-+
-+struct netbk_rx_meta {
-+ int id;
-+ int size;
-+ int gso_size;
-+};
-+
-+struct netbk_tx_pending_inuse {
-+ struct list_head list;
-+ unsigned long alloc_time;
-+};
-+
-+#define MAX_PENDING_REQS 256
-+
-+#define MAX_BUFFER_OFFSET PAGE_SIZE
-+
-+/* extra field used in struct page */
-+union page_ext {
-+ struct {
-+#if BITS_PER_LONG < 64
-+#define IDX_WIDTH 8
-+#define GROUP_WIDTH (BITS_PER_LONG - IDX_WIDTH)
-+ unsigned int group:GROUP_WIDTH;
-+ unsigned int idx:IDX_WIDTH;
-+#else
-+ unsigned int group, idx;
-+#endif
-+ } e;
-+ void *mapping;
-+};
-+
-+struct xen_netbk {
-+ union {
-+ struct {
-+ struct tasklet_struct net_tx_tasklet;
-+ struct tasklet_struct net_rx_tasklet;
-+ } tasklet;
-+
-+ struct {
-+ wait_queue_head_t netbk_action_wq;
-+ struct task_struct *task;
-+ } kthread;
-+ };
-+
-+ struct sk_buff_head rx_queue;
-+ struct sk_buff_head tx_queue;
-+
-+ struct timer_list net_timer;
-+ struct timer_list netbk_tx_pending_timer;
-+
-+ struct page **mmap_pages;
-+
-+ pending_ring_idx_t pending_prod;
-+ pending_ring_idx_t pending_cons;
-+ pending_ring_idx_t dealloc_prod;
-+ pending_ring_idx_t dealloc_cons;
-+
-+ struct list_head pending_inuse_head;
-+ struct list_head net_schedule_list;
-+
-+ /* Protect the net_schedule_list in netif. */
-+ spinlock_t net_schedule_list_lock;
-+
-+ atomic_t netfront_count;
-+
-+ struct pending_tx_info pending_tx_info[MAX_PENDING_REQS];
-+ struct netbk_tx_pending_inuse pending_inuse[MAX_PENDING_REQS];
-+ struct gnttab_unmap_grant_ref tx_unmap_ops[MAX_PENDING_REQS];
-+ struct gnttab_map_grant_ref tx_map_ops[MAX_PENDING_REQS];
-+
-+ grant_handle_t grant_tx_handle[MAX_PENDING_REQS];
-+ u16 pending_ring[MAX_PENDING_REQS];
-+ u16 dealloc_ring[MAX_PENDING_REQS];
-+
-+ /*
-+ * Each head or fragment can be up to 4096 bytes. Given
-+ * MAX_BUFFER_OFFSET of 4096 the worst case is that each
-+ * head/fragment uses 2 copy operation.
-+ */
-+ struct gnttab_copy grant_copy_op[2*NET_RX_RING_SIZE];
-+ unsigned char rx_notify[NR_IRQS];
-+ u16 notify_list[NET_RX_RING_SIZE];
-+ struct netbk_rx_meta meta[2*NET_RX_RING_SIZE];
-+};
-+
-+extern struct xen_netbk *xen_netbk;
-+extern int xen_netbk_group_nr;
-+
-+#endif /* __NETIF__BACKEND__COMMON_H__ */
-diff --git a/drivers/xen/netback/interface.c b/drivers/xen/netback/interface.c
-new file mode 100644
-index 0000000..2e8508a
---- /dev/null
-+++ b/drivers/xen/netback/interface.c
-@@ -0,0 +1,475 @@
-+/******************************************************************************
-+ * arch/xen/drivers/netif/backend/interface.c
-+ *
-+ * Network-device interface management.
-+ *
-+ * Copyright (c) 2004-2005, Keir Fraser
-+ *
-+ * This program is free software; you can redistribute it and/or
-+ * modify it under the terms of the GNU General Public License version 2
-+ * as published by the Free Software Foundation; or, when distributed
-+ * separately from the Linux kernel or incorporated into other
-+ * software packages, subject to the following license:
-+ *
-+ * Permission is hereby granted, free of charge, to any person obtaining a copy
-+ * of this source file (the "Software"), to deal in the Software without
-+ * restriction, including without limitation the rights to use, copy, modify,
-+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
-+ * and to permit persons to whom the Software is furnished to do so, subject to
-+ * the following conditions:
-+ *
-+ * The above copyright notice and this permission notice shall be included in
-+ * all copies or substantial portions of the Software.
-+ *
-+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-+ * IN THE SOFTWARE.
-+ */
-+
-+#include "common.h"
-+#include <linux/ethtool.h>
-+#include <linux/rtnetlink.h>
-+
-+#include <xen/events.h>
-+#include <asm/xen/hypercall.h>
-+
-+/*
-+ * Module parameter 'queue_length':
-+ *
-+ * Enables queuing in the network stack when a client has run out of receive
-+ * descriptors. Although this feature can improve receive bandwidth by avoiding
-+ * packet loss, it can also result in packets sitting in the 'tx_queue' for
-+ * unbounded time. This is bad if those packets hold onto foreign resources.
-+ * For example, consider a packet that holds onto resources belonging to the
-+ * guest for which it is queued (e.g., packet received on vif1.0, destined for
-+ * vif1.1 which is not activated in the guest): in this situation the guest
-+ * will never be destroyed, unless vif1.1 is taken down. To avoid this, we
-+ * run a timer (tx_queue_timeout) to drain the queue when the interface is
-+ * blocked.
-+ */
-+static unsigned long netbk_queue_length = 32;
-+module_param_named(queue_length, netbk_queue_length, ulong, 0644);
-+
-+static void netbk_add_netif(struct xen_netbk *netbk, int group_nr,
-+ struct xen_netif *netif)
-+{
-+ int i;
-+ int min_netfront_count;
-+ int min_group = 0;
-+ min_netfront_count = atomic_read(&netbk[0].netfront_count);
-+ for (i = 0; i < group_nr; i++) {
-+ int netfront_count = atomic_read(&netbk[i].netfront_count);
-+ if (netfront_count < min_netfront_count) {
-+ min_group = i;
-+ min_netfront_count = netfront_count;
-+ }
-+ }
-+
-+ netif->group = min_group;
-+ atomic_inc(&netbk[netif->group].netfront_count);
-+}
-+
-+static void netbk_remove_netif(struct xen_netbk *netbk, struct xen_netif *netif)
-+{
-+ atomic_dec(&netbk[netif->group].netfront_count);
-+}
-+
-+static void __netif_up(struct xen_netif *netif)
-+{
-+ netbk_add_netif(xen_netbk, xen_netbk_group_nr, netif);
-+ enable_irq(netif->irq);
-+ netif_schedule_work(netif);
-+}
-+
-+static void __netif_down(struct xen_netif *netif)
-+{
-+ disable_irq(netif->irq);
-+ netif_deschedule_work(netif);
-+ netbk_remove_netif(xen_netbk, netif);
-+}
-+
-+static int net_open(struct net_device *dev)
-+{
-+ struct xen_netif *netif = netdev_priv(dev);
-+ if (netback_carrier_ok(netif)) {
-+ __netif_up(netif);
-+ netif_start_queue(dev);
-+ }
-+ return 0;
-+}
-+
-+static int net_close(struct net_device *dev)
-+{
-+ struct xen_netif *netif = netdev_priv(dev);
-+ if (netback_carrier_ok(netif))
-+ __netif_down(netif);
-+ netif_stop_queue(dev);
-+ return 0;
-+}
-+
-+static int netbk_change_mtu(struct net_device *dev, int mtu)
-+{
-+ int max = netbk_can_sg(dev) ? 65535 - ETH_HLEN : ETH_DATA_LEN;
-+
-+ if (mtu > max)
-+ return -EINVAL;
-+ dev->mtu = mtu;
-+ return 0;
-+}
-+
-+void netif_set_features(struct xen_netif *netif)
-+{
-+ struct net_device *dev = netif->dev;
-+ int features = dev->features;
-+
-+ if (netif->can_sg)
-+ features |= NETIF_F_SG;
-+ if (netif->gso || netif->gso_prefix)
-+ features |= NETIF_F_TSO;
-+ if (netif->csum)
-+ features |= NETIF_F_IP_CSUM;
-+
-+ features &= ~(netif->features_disabled);
-+
-+ if (!(features & NETIF_F_SG) && dev->mtu > ETH_DATA_LEN)
-+ dev->mtu = ETH_DATA_LEN;
-+
-+ dev->features = features;
-+}
-+
-+static int netbk_set_tx_csum(struct net_device *dev, u32 data)
-+{
-+ struct xen_netif *netif = netdev_priv(dev);
-+ if (data) {
-+ if (!netif->csum)
-+ return -ENOSYS;
-+ netif->features_disabled &= ~NETIF_F_IP_CSUM;
-+ } else {
-+ netif->features_disabled |= NETIF_F_IP_CSUM;
-+ }
-+
-+ netif_set_features(netif);
-+ return 0;
-+}
-+
-+static int netbk_set_sg(struct net_device *dev, u32 data)
-+{
-+ struct xen_netif *netif = netdev_priv(dev);
-+ if (data) {
-+ if (!netif->can_sg)
-+ return -ENOSYS;
-+ netif->features_disabled &= ~NETIF_F_SG;
-+ } else {
-+ netif->features_disabled |= NETIF_F_SG;
-+ }
-+
-+ netif_set_features(netif);
-+ return 0;
-+}
-+
-+static int netbk_set_tso(struct net_device *dev, u32 data)
-+{
-+ struct xen_netif *netif = netdev_priv(dev);
-+ if (data) {
-+ if (!netif->gso && !netif->gso_prefix)
-+ return -ENOSYS;
-+ netif->features_disabled &= ~NETIF_F_TSO;
-+ } else {
-+ netif->features_disabled |= NETIF_F_TSO;
-+ }
-+
-+ netif_set_features(netif);
-+ return 0;
-+}
-+
-+static void netbk_get_drvinfo(struct net_device *dev,
-+ struct ethtool_drvinfo *info)
-+{
-+ strcpy(info->driver, "netbk");
-+ strcpy(info->bus_info, dev_name(dev->dev.parent));
-+}
-+
-+static const struct netif_stat {
-+ char name[ETH_GSTRING_LEN];
-+ u16 offset;
-+} netbk_stats[] = {
-+ { "copied_skbs", offsetof(struct xen_netif, nr_copied_skbs) },
-+};
-+
-+static int netbk_get_sset_count(struct net_device *dev, int string_set)
-+{
-+ switch (string_set) {
-+ case ETH_SS_STATS:
-+ return ARRAY_SIZE(netbk_stats);
-+ default:
-+ return -EINVAL;
-+ }
-+}
-+
-+static void netbk_get_ethtool_stats(struct net_device *dev,
-+ struct ethtool_stats *stats, u64 * data)
-+{
-+ void *netif = netdev_priv(dev);
-+ int i;
-+
-+ for (i = 0; i < ARRAY_SIZE(netbk_stats); i++)
-+ data[i] = *(int *)(netif + netbk_stats[i].offset);
-+}
-+
-+static void netbk_get_strings(struct net_device *dev, u32 stringset, u8 * data)
-+{
-+ int i;
-+
-+ switch (stringset) {
-+ case ETH_SS_STATS:
-+ for (i = 0; i < ARRAY_SIZE(netbk_stats); i++)
-+ memcpy(data + i * ETH_GSTRING_LEN,
-+ netbk_stats[i].name, ETH_GSTRING_LEN);
-+ break;
-+ }
-+}
-+
-+static struct ethtool_ops network_ethtool_ops =
-+{
-+ .get_drvinfo = netbk_get_drvinfo,
-+
-+ .get_tx_csum = ethtool_op_get_tx_csum,
-+ .set_tx_csum = netbk_set_tx_csum,
-+ .get_sg = ethtool_op_get_sg,
-+ .set_sg = netbk_set_sg,
-+ .get_tso = ethtool_op_get_tso,
-+ .set_tso = netbk_set_tso,
-+ .get_link = ethtool_op_get_link,
-+
-+ .get_sset_count = netbk_get_sset_count,
-+ .get_ethtool_stats = netbk_get_ethtool_stats,
-+ .get_strings = netbk_get_strings,
-+};
-+
-+static struct net_device_ops netback_ops =
-+{
-+ .ndo_start_xmit = netif_be_start_xmit,
-+ .ndo_get_stats = netif_be_get_stats,
-+ .ndo_open = net_open,
-+ .ndo_stop = net_close,
-+ .ndo_change_mtu = netbk_change_mtu,
-+};
-+
-+struct xen_netif *netif_alloc(struct device *parent, domid_t domid, unsigned int handle)
-+{
-+ int err = 0;
-+ struct net_device *dev;
-+ struct xen_netif *netif;
-+ char name[IFNAMSIZ] = {};
-+
-+ snprintf(name, IFNAMSIZ - 1, "vif%u.%u", domid, handle);
-+ dev = alloc_netdev(sizeof(struct xen_netif), name, ether_setup);
-+ if (dev == NULL) {
-+ DPRINTK("Could not create netif: out of memory\n");
-+ return ERR_PTR(-ENOMEM);
-+ }
-+
-+ SET_NETDEV_DEV(dev, parent);
-+
-+ netif = netdev_priv(dev);
-+ memset(netif, 0, sizeof(*netif));
-+ netif->domid = domid;
-+ netif->group = -1;
-+ netif->handle = handle;
-+ netif->can_sg = 1;
-+ netif->csum = 1;
-+ atomic_set(&netif->refcnt, 1);
-+ init_waitqueue_head(&netif->waiting_to_free);
-+ netif->dev = dev;
-+ INIT_LIST_HEAD(&netif->list);
-+
-+ netback_carrier_off(netif);
-+
-+ netif->credit_bytes = netif->remaining_credit = ~0UL;
-+ netif->credit_usec = 0UL;
-+ init_timer(&netif->credit_timeout);
-+ /* Initialize 'expires' now: it's used to track the credit window. */
-+ netif->credit_timeout.expires = jiffies;
-+
-+ init_timer(&netif->tx_queue_timeout);
-+
-+ dev->netdev_ops = &netback_ops;
-+ netif_set_features(netif);
-+ SET_ETHTOOL_OPS(dev, &network_ethtool_ops);
-+
-+ dev->tx_queue_len = netbk_queue_length;
-+
-+ /*
-+ * Initialise a dummy MAC address. We choose the numerically
-+ * largest non-broadcast address to prevent the address getting
-+ * stolen by an Ethernet bridge for STP purposes.
-+ * (FE:FF:FF:FF:FF:FF)
-+ */
-+ memset(dev->dev_addr, 0xFF, ETH_ALEN);
-+ dev->dev_addr[0] &= ~0x01;
-+
-+ rtnl_lock();
-+ err = register_netdevice(dev);
-+ rtnl_unlock();
-+ if (err) {
-+ DPRINTK("Could not register new net device %s: err=%d\n",
-+ dev->name, err);
-+ free_netdev(dev);
-+ return ERR_PTR(err);
-+ }
-+
-+ DPRINTK("Successfully created netif\n");
-+ return netif;
-+}
-+
-+static int map_frontend_pages(
-+ struct xen_netif *netif, grant_ref_t tx_ring_ref, grant_ref_t rx_ring_ref)
-+{
-+ struct gnttab_map_grant_ref op;
-+
-+ gnttab_set_map_op(&op, (unsigned long)netif->tx_comms_area->addr,
-+ GNTMAP_host_map, tx_ring_ref, netif->domid);
-+
-+ if (HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1))
-+ BUG();
-+
-+ if (op.status) {
-+ DPRINTK(" Gnttab failure mapping tx_ring_ref!\n");
-+ return op.status;
-+ }
-+
-+ netif->tx_shmem_ref = tx_ring_ref;
-+ netif->tx_shmem_handle = op.handle;
-+
-+ gnttab_set_map_op(&op, (unsigned long)netif->rx_comms_area->addr,
-+ GNTMAP_host_map, rx_ring_ref, netif->domid);
-+
-+ if (HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1))
-+ BUG();
-+
-+ if (op.status) {
-+ struct gnttab_unmap_grant_ref unop;
-+
-+ gnttab_set_unmap_op(&unop,
-+ (unsigned long)netif->tx_comms_area->addr,
-+ GNTMAP_host_map, netif->tx_shmem_handle);
-+ HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &unop, 1);
-+ DPRINTK(" Gnttab failure mapping rx_ring_ref!\n");
-+ return op.status;
-+ }
-+
-+ netif->rx_shmem_ref = rx_ring_ref;
-+ netif->rx_shmem_handle = op.handle;
-+
-+ return 0;
-+}
-+
-+static void unmap_frontend_pages(struct xen_netif *netif)
-+{
-+ struct gnttab_unmap_grant_ref op;
-+
-+ gnttab_set_unmap_op(&op, (unsigned long)netif->tx_comms_area->addr,
-+ GNTMAP_host_map, netif->tx_shmem_handle);
-+
-+ if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1))
-+ BUG();
-+
-+ gnttab_set_unmap_op(&op, (unsigned long)netif->rx_comms_area->addr,
-+ GNTMAP_host_map, netif->rx_shmem_handle);
-+
-+ if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1))
-+ BUG();
-+}
-+
-+int netif_map(struct xen_netif *netif, unsigned long tx_ring_ref,
-+ unsigned long rx_ring_ref, unsigned int evtchn)
-+{
-+ int err = -ENOMEM;
-+ struct xen_netif_tx_sring *txs;
-+ struct xen_netif_rx_sring *rxs;
-+
-+ /* Already connected through? */
-+ if (netif->irq)
-+ return 0;
-+
-+ netif->tx_comms_area = alloc_vm_area(PAGE_SIZE);
-+ if (netif->tx_comms_area == NULL)
-+ return -ENOMEM;
-+ netif->rx_comms_area = alloc_vm_area(PAGE_SIZE);
-+ if (netif->rx_comms_area == NULL)
-+ goto err_rx;
-+
-+ err = map_frontend_pages(netif, tx_ring_ref, rx_ring_ref);
-+ if (err)
-+ goto err_map;
-+
-+ err = bind_interdomain_evtchn_to_irqhandler(
-+ netif->domid, evtchn, netif_be_int, 0,
-+ netif->dev->name, netif);
-+ if (err < 0)
-+ goto err_hypervisor;
-+ netif->irq = err;
-+ disable_irq(netif->irq);
-+
-+ txs = (struct xen_netif_tx_sring *)netif->tx_comms_area->addr;
-+ BACK_RING_INIT(&netif->tx, txs, PAGE_SIZE);
-+
-+ rxs = (struct xen_netif_rx_sring *)
-+ ((char *)netif->rx_comms_area->addr);
-+ BACK_RING_INIT(&netif->rx, rxs, PAGE_SIZE);
-+
-+ netif->rx_req_cons_peek = 0;
-+
-+ netif_get(netif);
-+
-+ rtnl_lock();
-+ netback_carrier_on(netif);
-+ if (netif_running(netif->dev))
-+ __netif_up(netif);
-+ rtnl_unlock();
-+
-+ return 0;
-+err_hypervisor:
-+ unmap_frontend_pages(netif);
-+err_map:
-+ free_vm_area(netif->rx_comms_area);
-+err_rx:
-+ free_vm_area(netif->tx_comms_area);
-+ return err;
-+}
-+
-+void netif_disconnect(struct xen_netif *netif)
-+{
-+ if (netback_carrier_ok(netif)) {
-+ rtnl_lock();
-+ netback_carrier_off(netif);
-+ netif_carrier_off(netif->dev); /* discard queued packets */
-+ if (netif_running(netif->dev))
-+ __netif_down(netif);
-+ rtnl_unlock();
-+ netif_put(netif);
-+ }
-+
-+ atomic_dec(&netif->refcnt);
-+ wait_event(netif->waiting_to_free, atomic_read(&netif->refcnt) == 0);
-+
-+ del_timer_sync(&netif->credit_timeout);
-+ del_timer_sync(&netif->tx_queue_timeout);
-+
-+ if (netif->irq)
-+ unbind_from_irqhandler(netif->irq, netif);
-+
-+ unregister_netdev(netif->dev);
-+
-+ if (netif->tx.sring) {
-+ unmap_frontend_pages(netif);
-+ free_vm_area(netif->tx_comms_area);
-+ free_vm_area(netif->rx_comms_area);
-+ }
-+
-+ free_netdev(netif->dev);
-+}
-diff --git a/drivers/xen/netback/netback.c b/drivers/xen/netback/netback.c
-new file mode 100644
-index 0000000..c448675
---- /dev/null
-+++ b/drivers/xen/netback/netback.c
-@@ -0,0 +1,1902 @@
-+/******************************************************************************
-+ * drivers/xen/netback/netback.c
-+ *
-+ * Back-end of the driver for virtual network devices. This portion of the
-+ * driver exports a 'unified' network-device interface that can be accessed
-+ * by any operating system that implements a compatible front end. A
-+ * reference front-end implementation can be found in:
-+ * drivers/xen/netfront/netfront.c
-+ *
-+ * Copyright (c) 2002-2005, K A Fraser
-+ *
-+ * This program is free software; you can redistribute it and/or
-+ * modify it under the terms of the GNU General Public License version 2
-+ * as published by the Free Software Foundation; or, when distributed
-+ * separately from the Linux kernel or incorporated into other
-+ * software packages, subject to the following license:
-+ *
-+ * Permission is hereby granted, free of charge, to any person obtaining a copy
-+ * of this source file (the "Software"), to deal in the Software without
-+ * restriction, including without limitation the rights to use, copy, modify,
-+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
-+ * and to permit persons to whom the Software is furnished to do so, subject to
-+ * the following conditions:
-+ *
-+ * The above copyright notice and this permission notice shall be included in
-+ * all copies or substantial portions of the Software.
-+ *
-+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-+ * IN THE SOFTWARE.
-+ */
-+
-+#include "common.h"
-+
-+#include <linux/tcp.h>
-+#include <linux/udp.h>
-+#include <linux/kthread.h>
-+
-+#include <xen/balloon.h>
-+#include <xen/events.h>
-+#include <xen/interface/memory.h>
-+
-+#include <asm/xen/hypercall.h>
-+#include <asm/xen/page.h>
-+
-+/*define NETBE_DEBUG_INTERRUPT*/
-+
-+struct xen_netbk *xen_netbk;
-+int xen_netbk_group_nr;
-+
-+static void netif_idx_release(struct xen_netbk *netbk, u16 pending_idx);
-+static void make_tx_response(struct xen_netif *netif,
-+ struct xen_netif_tx_request *txp,
-+ s8 st);
-+static struct xen_netif_rx_response *make_rx_response(struct xen_netif *netif,
-+ u16 id,
-+ s8 st,
-+ u16 offset,
-+ u16 size,
-+ u16 flags);
-+
-+static void net_tx_action(unsigned long data);
-+
-+static void net_rx_action(unsigned long data);
-+
-+static inline unsigned long idx_to_pfn(struct xen_netbk *netbk,
-+ unsigned int idx)
-+{
-+ return page_to_pfn(netbk->mmap_pages[idx]);
-+}
-+
-+static inline unsigned long idx_to_kaddr(struct xen_netbk *netbk,
-+ unsigned int idx)
-+{
-+ return (unsigned long)pfn_to_kaddr(idx_to_pfn(netbk, idx));
-+}
-+
-+/* extra field used in struct page */
-+static inline void netif_set_page_ext(struct page *pg, unsigned int group,
-+ unsigned int idx)
-+{
-+ union page_ext ext = { .e = { .group = group + 1, .idx = idx } };
-+
-+ BUILD_BUG_ON(sizeof(ext) > sizeof(ext.mapping));
-+ pg->mapping = ext.mapping;
-+}
-+
-+static inline int netif_get_page_ext(struct page *pg, unsigned int *_group, unsigned int *_idx)
-+{
-+ union page_ext ext = { .mapping = pg->mapping };
-+ struct xen_netbk *netbk;
-+ unsigned int group, idx;
-+
-+ if (!PageForeign(pg))
-+ return 0;
-+
-+ group = ext.e.group - 1;
-+
-+ if (group < 0 || group >= xen_netbk_group_nr)
-+ return 0;
-+
-+ netbk = &xen_netbk[group];
-+
-+ if (netbk->mmap_pages == NULL)
-+ return 0;
-+
-+ idx = ext.e.idx;
-+
-+ if ((idx < 0) || (idx >= MAX_PENDING_REQS))
-+ return 0;
-+
-+ if (netbk->mmap_pages[idx] != pg)
-+ return 0;
-+
-+ *_group = group;
-+ *_idx = idx;
-+
-+ return 1;
-+}
-+
-+/*
-+ * This is the amount of packet we copy rather than map, so that the
-+ * guest can't fiddle with the contents of the headers while we do
-+ * packet processing on them (netfilter, routing, etc). 72 is enough
-+ * to cover TCP+IP headers including options.
-+ */
-+#define PKT_PROT_LEN 72
-+
-+static inline pending_ring_idx_t pending_index(unsigned i)
-+{
-+ return i & (MAX_PENDING_REQS-1);
-+}
-+
-+static inline pending_ring_idx_t nr_pending_reqs(struct xen_netbk *netbk)
-+{
-+ return MAX_PENDING_REQS -
-+ netbk->pending_prod + netbk->pending_cons;
-+}
-+
-+/* Setting this allows the safe use of this driver without netloop. */
-+static int MODPARM_copy_skb = 1;
-+module_param_named(copy_skb, MODPARM_copy_skb, bool, 0);
-+MODULE_PARM_DESC(copy_skb, "Copy data received from netfront without netloop");
-+
-+int netbk_copy_skb_mode;
-+
-+static int MODPARM_netback_kthread;
-+module_param_named(netback_kthread, MODPARM_netback_kthread, bool, 0);
-+MODULE_PARM_DESC(netback_kthread, "Use kernel thread to replace tasklet");
-+
-+/*
-+ * Netback bottom half handler.
-+ * dir indicates the data direction.
-+ * rx: 1, tx: 0.
-+ */
-+static inline void xen_netbk_bh_handler(struct xen_netbk *netbk, int dir)
-+{
-+ if (MODPARM_netback_kthread)
-+ wake_up(&netbk->kthread.netbk_action_wq);
-+ else if (dir)
-+ tasklet_schedule(&netbk->tasklet.net_rx_tasklet);
-+ else
-+ tasklet_schedule(&netbk->tasklet.net_tx_tasklet);
-+}
-+
-+static inline void maybe_schedule_tx_action(struct xen_netbk *netbk)
-+{
-+ smp_mb();
-+ if ((nr_pending_reqs(netbk) < (MAX_PENDING_REQS/2)) &&
-+ !list_empty(&netbk->net_schedule_list))
-+ xen_netbk_bh_handler(netbk, 0);
-+}
-+
-+static struct sk_buff *netbk_copy_skb(struct sk_buff *skb)
-+{
-+ struct skb_shared_info *ninfo;
-+ struct sk_buff *nskb;
-+ unsigned long offset;
-+ int ret;
-+ int len;
-+ int headlen;
-+
-+ BUG_ON(skb_shinfo(skb)->frag_list != NULL);
-+
-+ nskb = alloc_skb(SKB_MAX_HEAD(0), GFP_ATOMIC | __GFP_NOWARN);
-+ if (unlikely(!nskb))
-+ goto err;
-+
-+ skb_reserve(nskb, NET_SKB_PAD + NET_IP_ALIGN);
-+ headlen = skb_end_pointer(nskb) - nskb->data;
-+ if (headlen > skb_headlen(skb))
-+ headlen = skb_headlen(skb);
-+ ret = skb_copy_bits(skb, 0, __skb_put(nskb, headlen), headlen);
-+ BUG_ON(ret);
-+
-+ ninfo = skb_shinfo(nskb);
-+ ninfo->gso_size = skb_shinfo(skb)->gso_size;
-+ ninfo->gso_type = skb_shinfo(skb)->gso_type;
-+
-+ offset = headlen;
-+ len = skb->len - headlen;
-+
-+ nskb->len = skb->len;
-+ nskb->data_len = len;
-+ nskb->truesize += len;
-+
-+ while (len) {
-+ struct page *page;
-+ int copy;
-+ int zero;
-+
-+ if (unlikely(ninfo->nr_frags >= MAX_SKB_FRAGS)) {
-+ dump_stack();
-+ goto err_free;
-+ }
-+
-+ copy = len >= PAGE_SIZE ? PAGE_SIZE : len;
-+ zero = len >= PAGE_SIZE ? 0 : __GFP_ZERO;
-+
-+ page = alloc_page(GFP_ATOMIC | __GFP_NOWARN | zero);
-+ if (unlikely(!page))
-+ goto err_free;
-+
-+ ret = skb_copy_bits(skb, offset, page_address(page), copy);
-+ BUG_ON(ret);
-+
-+ ninfo->frags[ninfo->nr_frags].page = page;
-+ ninfo->frags[ninfo->nr_frags].page_offset = 0;
-+ ninfo->frags[ninfo->nr_frags].size = copy;
-+ ninfo->nr_frags++;
-+
-+ offset += copy;
-+ len -= copy;
-+ }
-+
-+#ifdef NET_SKBUFF_DATA_USES_OFFSET
-+ offset = 0;
-+#else
-+ offset = nskb->data - skb->data;
-+#endif
-+
-+ nskb->transport_header = skb->transport_header + offset;
-+ nskb->network_header = skb->network_header + offset;
-+ nskb->mac_header = skb->mac_header + offset;
-+
-+ return nskb;
-+
-+ err_free:
-+ kfree_skb(nskb);
-+ err:
-+ return NULL;
-+}
-+
-+static inline int netbk_max_required_rx_slots(struct xen_netif *netif)
-+{
-+ if (netif->can_sg || netif->gso || netif->gso_prefix)
-+ return MAX_SKB_FRAGS + 2; /* header + extra_info + frags */
-+ return 1; /* all in one */
-+}
-+
-+static inline int netbk_queue_full(struct xen_netif *netif)
-+{
-+ RING_IDX peek = netif->rx_req_cons_peek;
-+ RING_IDX needed = netbk_max_required_rx_slots(netif);
-+
-+ return ((netif->rx.sring->req_prod - peek) < needed) ||
-+ ((netif->rx.rsp_prod_pvt + NET_RX_RING_SIZE - peek) < needed);
-+}
-+
-+static void tx_queue_callback(unsigned long data)
-+{
-+ struct xen_netif *netif = (struct xen_netif *)data;
-+ if (netif_schedulable(netif))
-+ netif_wake_queue(netif->dev);
-+}
-+
-+/* Figure out how many ring slots we're going to need to send @skb to
-+ the guest. */
-+static unsigned count_skb_slots(struct sk_buff *skb, struct xen_netif *netif)
-+{
-+ unsigned count;
-+ unsigned copy_off;
-+ unsigned i;
-+
-+ copy_off = 0;
-+ count = 1;
-+
-+ BUG_ON(offset_in_page(skb->data) + skb_headlen(skb) > MAX_BUFFER_OFFSET);
-+
-+ copy_off = skb_headlen(skb);
-+
-+ if (skb_shinfo(skb)->gso_size)
-+ count++;
-+
-+ for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
-+ unsigned long size = skb_shinfo(skb)->frags[i].size;
-+ unsigned long bytes;
-+ while (size > 0) {
-+ BUG_ON(copy_off > MAX_BUFFER_OFFSET);
-+
-+ /* These checks are the same as in netbk_gop_frag_copy */
-+ if (copy_off == MAX_BUFFER_OFFSET
-+ || ((copy_off + size > MAX_BUFFER_OFFSET) && (size <= MAX_BUFFER_OFFSET) && copy_off)) {
-+ count++;
-+ copy_off = 0;
-+ }
-+
-+ bytes = size;
-+ if (copy_off + bytes > MAX_BUFFER_OFFSET)
-+ bytes = MAX_BUFFER_OFFSET - copy_off;
-+
-+ copy_off += bytes;
-+ size -= bytes;
-+ }
-+ }
-+ return count;
-+}
-+
-+int netif_be_start_xmit(struct sk_buff *skb, struct net_device *dev)
-+{
-+ struct xen_netif *netif = netdev_priv(dev);
-+ struct xen_netbk *netbk;
-+
-+ BUG_ON(skb->dev != dev);
-+
-+ if (netif->group == -1)
-+ goto drop;
-+
-+ netbk = &xen_netbk[netif->group];
-+
-+ /* Drop the packet if the target domain has no receive buffers. */
-+ if (unlikely(!netif_schedulable(netif) || netbk_queue_full(netif)))
-+ goto drop;
-+
-+ /*
-+ * XXX For now we also copy skbuffs whose head crosses a page
-+ * boundary, because netbk_gop_skb can't handle them.
-+ */
-+ if ((skb_headlen(skb) + offset_in_page(skb->data)) >= PAGE_SIZE) {
-+ struct sk_buff *nskb = netbk_copy_skb(skb);
-+ if ( unlikely(nskb == NULL) )
-+ goto drop;
-+ /* Copy only the header fields we use in this driver. */
-+ nskb->dev = skb->dev;
-+ nskb->ip_summed = skb->ip_summed;
-+ dev_kfree_skb(skb);
-+ skb = nskb;
-+ }
-+
-+ /* Reserve ring slots for the worst-case number of
-+ * fragments. */
-+ netif->rx_req_cons_peek += count_skb_slots(skb, netif);
-+ netif_get(netif);
-+
-+ if (netbk_can_queue(dev) && netbk_queue_full(netif)) {
-+ netif->rx.sring->req_event = netif->rx_req_cons_peek +
-+ netbk_max_required_rx_slots(netif);
-+ mb(); /* request notification /then/ check & stop the queue */
-+ if (netbk_queue_full(netif)) {
-+ netif_stop_queue(dev);
-+ /*
-+ * Schedule 500ms timeout to restart the queue, thus
-+ * ensuring that an inactive queue will be drained.
-+ * Packets will be immediately be dropped until more
-+ * receive buffers become available (see
-+ * netbk_queue_full() check above).
-+ */
-+ netif->tx_queue_timeout.data = (unsigned long)netif;
-+ netif->tx_queue_timeout.function = tx_queue_callback;
-+ mod_timer(&netif->tx_queue_timeout, jiffies + HZ/2);
-+ }
-+ }
-+ skb_queue_tail(&netbk->rx_queue, skb);
-+
-+ xen_netbk_bh_handler(netbk, 1);
-+
-+ return 0;
-+
-+ drop:
-+ netif->stats.tx_dropped++;
-+ dev_kfree_skb(skb);
-+ return 0;
-+}
-+
-+struct netrx_pending_operations {
-+ unsigned copy_prod, copy_cons;
-+ unsigned meta_prod, meta_cons;
-+ struct gnttab_copy *copy;
-+ struct netbk_rx_meta *meta;
-+ int copy_off;
-+ grant_ref_t copy_gref;
-+};
-+
-+/* Set up the grant operations for this fragment. If it's a flipping
-+ interface, we also set up the unmap request from here. */
-+
-+static void netbk_gop_frag_copy(struct xen_netif *netif,
-+ struct netrx_pending_operations *npo,
-+ struct page *page, unsigned long size,
-+ unsigned long offset, int head)
-+{
-+ struct gnttab_copy *copy_gop;
-+ struct netbk_rx_meta *meta;
-+ /*
-+ * These variables a used iff netif_get_page_ext returns true,
-+ * in which case they are guaranteed to be initialized.
-+ */
-+ unsigned int uninitialized_var(group), uninitialized_var(idx);
-+ int foreign = netif_get_page_ext(page, &group, &idx);
-+ unsigned long bytes;
-+
-+ /* Data must not cross a page boundary. */
-+ BUG_ON(size + offset > PAGE_SIZE);
-+
-+ meta = npo->meta + npo->meta_prod - 1;
-+
-+ while (size > 0) {
-+ BUG_ON(npo->copy_off > MAX_BUFFER_OFFSET);
-+
-+ /*
-+ * Move to a new receive buffer if:
-+ *
-+ * simple case: we have completely filled the current buffer.
-+ *
-+ * complex case: the current frag would overflow
-+ * the current buffer but only if:
-+ * (i) this frag would fit completely in the next buffer
-+ * and (ii) there is already some data in the current buffer
-+ * and (iii) this is not the head buffer.
-+ *
-+ * Where:
-+ * - (i) stops us splitting a frag into two copies
-+ * unless the frag is too large for a single buffer.
-+ * - (ii) stops us from leaving a buffer pointlessly empty.
-+ * - (iii) stops us leaving the first buffer
-+ * empty. Strictly speaking this is already covered
-+ * by (ii) but is explicitly checked because
-+ * netfront relies on the first buffer being
-+ * non-empty and can crash otherwise.
-+ *
-+ * This means we will effectively linearise small
-+ * frags but do not needlessly split large buffers
-+ * into multiple copies tend to give large frags their
-+ * own buffers as before.
-+ */
-+ if (npo->copy_off == MAX_BUFFER_OFFSET
-+ || ((npo->copy_off + size > MAX_BUFFER_OFFSET) && (size <= MAX_BUFFER_OFFSET) && npo->copy_off && !head)) {
-+ struct xen_netif_rx_request *req;
-+
-+ BUG_ON(head); /* Netfront requires there to be some data in the head buffer. */
-+ /* Overflowed this request, go to the next one */
-+ req = RING_GET_REQUEST(&netif->rx, netif->rx.req_cons++);
-+ meta = npo->meta + npo->meta_prod++;
-+ meta->gso_size = 0;
-+ meta->size = 0;
-+ meta->id = req->id;
-+ npo->copy_off = 0;
-+ npo->copy_gref = req->gref;
-+ }
-+
-+ bytes = size;
-+ if (npo->copy_off + bytes > MAX_BUFFER_OFFSET)
-+ bytes = MAX_BUFFER_OFFSET - npo->copy_off;
-+
-+ copy_gop = npo->copy + npo->copy_prod++;
-+ copy_gop->flags = GNTCOPY_dest_gref;
-+ if (foreign) {
-+ struct xen_netbk *netbk = &xen_netbk[group];
-+ struct pending_tx_info *src_pend;
-+
-+ src_pend = &netbk->pending_tx_info[idx];
-+
-+ copy_gop->source.domid = src_pend->netif->domid;
-+ copy_gop->source.u.ref = src_pend->req.gref;
-+ copy_gop->flags |= GNTCOPY_source_gref;
-+ } else {
-+ copy_gop->source.domid = DOMID_SELF;
-+ copy_gop->source.u.gmfn = virt_to_mfn(page_address(page));
-+ }
-+ copy_gop->source.offset = offset;
-+ copy_gop->dest.domid = netif->domid;
-+
-+ copy_gop->dest.offset = npo->copy_off;
-+ copy_gop->dest.u.ref = npo->copy_gref;
-+ copy_gop->len = bytes;
-+
-+ npo->copy_off += bytes;
-+ meta->size += bytes;
-+
-+ offset += bytes;
-+ size -= bytes;
-+ head = 0; /* Must be something in this buffer now */
-+ }
-+}
-+
-+/* Prepare an SKB to be transmitted to the frontend. This is
-+ responsible for allocating grant operations, meta structures, etc.
-+ It returns the number of meta structures consumed. The number of
-+ ring slots used is always equal to the number of meta slots used
-+ plus the number of GSO descriptors used. Currently, we use either
-+ zero GSO descriptors (for non-GSO packets) or one descriptor (for
-+ frontend-side LRO). */
-+static int netbk_gop_skb(struct sk_buff *skb,
-+ struct netrx_pending_operations *npo)
-+{
-+ struct xen_netif *netif = netdev_priv(skb->dev);
-+ int nr_frags = skb_shinfo(skb)->nr_frags;
-+ int i;
-+ struct xen_netif_rx_request *req;
-+ struct netbk_rx_meta *meta;
-+ int old_meta_prod;
-+
-+ old_meta_prod = npo->meta_prod;
-+
-+ /* Set up a GSO prefix descriptor, if necessary */
-+ if (skb_shinfo(skb)->gso_size && netif->gso_prefix) {
-+ req = RING_GET_REQUEST(&netif->rx, netif->rx.req_cons++);
-+ meta = npo->meta + npo->meta_prod++;
-+ meta->gso_size = skb_shinfo(skb)->gso_size;
-+ meta->size = 0;
-+ meta->id = req->id;
-+ }
-+
-+ req = RING_GET_REQUEST(&netif->rx, netif->rx.req_cons++);
-+ meta = npo->meta + npo->meta_prod++;
-+
-+ if (!netif->gso_prefix)
-+ meta->gso_size = skb_shinfo(skb)->gso_size;
-+ else
-+ meta->gso_size = 0;
-+
-+ meta->size = 0;
-+ meta->id = req->id;
-+ npo->copy_off = 0;
-+ npo->copy_gref = req->gref;
-+
-+ netbk_gop_frag_copy(netif,
-+ npo, virt_to_page(skb->data),
-+ skb_headlen(skb),
-+ offset_in_page(skb->data), 1);
-+
-+ /* Leave a gap for the GSO descriptor. */
-+ if (skb_shinfo(skb)->gso_size && !netif->gso_prefix)
-+ netif->rx.req_cons++;
-+
-+ for (i = 0; i < nr_frags; i++) {
-+ netbk_gop_frag_copy(netif, npo,
-+ skb_shinfo(skb)->frags[i].page,
-+ skb_shinfo(skb)->frags[i].size,
-+ skb_shinfo(skb)->frags[i].page_offset,
-+ 0);
-+ }
-+
-+ return npo->meta_prod - old_meta_prod;
-+}
-+
-+/* This is a twin to netbk_gop_skb. Assume that netbk_gop_skb was
-+ used to set up the operations on the top of
-+ netrx_pending_operations, which have since been done. Check that
-+ they didn't give any errors and advance over them. */
-+static int netbk_check_gop(int nr_meta_slots, domid_t domid,
-+ struct netrx_pending_operations *npo)
-+{
-+ struct gnttab_copy *copy_op;
-+ int status = NETIF_RSP_OKAY;
-+ int i;
-+
-+ for (i = 0; i < nr_meta_slots; i++) {
-+ copy_op = npo->copy + npo->copy_cons++;
-+ if (copy_op->status != GNTST_okay) {
-+ DPRINTK("Bad status %d from copy to DOM%d.\n",
-+ copy_op->status, domid);
-+ status = NETIF_RSP_ERROR;
-+ }
-+ }
-+
-+ return status;
-+}
-+
-+static void netbk_add_frag_responses(struct xen_netif *netif, int status,
-+ struct netbk_rx_meta *meta,
-+ int nr_meta_slots)
-+{
-+ int i;
-+ unsigned long offset;
-+
-+ for (i = 0; i < nr_meta_slots; i++) {
-+ int flags;
-+ if (i == nr_meta_slots - 1)
-+ flags = 0;
-+ else
-+ flags = NETRXF_more_data;
-+
-+ offset = 0;
-+ make_rx_response(netif, meta[i].id, status, offset,
-+ meta[i].size, flags);
-+ }
-+}
-+
-+struct skb_cb_overlay {
-+ int meta_slots_used;
-+};
-+
-+static void net_rx_action(unsigned long data)
-+{
-+ struct xen_netif *netif = NULL;
-+ struct xen_netbk *netbk = (struct xen_netbk *)data;
-+ s8 status;
-+ u16 irq, flags;
-+ struct xen_netif_rx_response *resp;
-+ struct sk_buff_head rxq;
-+ struct sk_buff *skb;
-+ int notify_nr = 0;
-+ int ret;
-+ int nr_frags;
-+ int count;
-+ unsigned long offset;
-+ struct skb_cb_overlay *sco;
-+
-+ struct netrx_pending_operations npo = {
-+ .copy = netbk->grant_copy_op,
-+ .meta = netbk->meta,
-+ };
-+
-+ skb_queue_head_init(&rxq);
-+
-+ count = 0;
-+
-+ while ((skb = skb_dequeue(&netbk->rx_queue)) != NULL) {
-+ netif = netdev_priv(skb->dev);
-+ nr_frags = skb_shinfo(skb)->nr_frags;
-+
-+ sco = (struct skb_cb_overlay *)skb->cb;
-+ sco->meta_slots_used = netbk_gop_skb(skb, &npo);
-+
-+ count += nr_frags + 1;
-+
-+ __skb_queue_tail(&rxq, skb);
-+
-+ /* Filled the batch queue? */
-+ if (count + MAX_SKB_FRAGS >= NET_RX_RING_SIZE)
-+ break;
-+ }
-+
-+ BUG_ON(npo.meta_prod > ARRAY_SIZE(netbk->meta));
-+
-+ if (!npo.copy_prod)
-+ return;
-+
-+ BUG_ON(npo.copy_prod > ARRAY_SIZE(netbk->grant_copy_op));
-+ ret = HYPERVISOR_grant_table_op(GNTTABOP_copy, &netbk->grant_copy_op,
-+ npo.copy_prod);
-+ BUG_ON(ret != 0);
-+
-+ while ((skb = __skb_dequeue(&rxq)) != NULL) {
-+ sco = (struct skb_cb_overlay *)skb->cb;
-+
-+ netif = netdev_priv(skb->dev);
-+
-+ if (netbk->meta[npo.meta_cons].gso_size && netif->gso_prefix) {
-+ resp = RING_GET_RESPONSE(&netif->rx,
-+ netif->rx.rsp_prod_pvt++);
-+
-+ resp->flags = NETRXF_gso_prefix | NETRXF_more_data;
-+
-+ resp->offset = netbk->meta[npo.meta_cons].gso_size;
-+ resp->id = netbk->meta[npo.meta_cons].id;
-+ resp->status = sco->meta_slots_used;
-+
-+ npo.meta_cons++;
-+ sco->meta_slots_used--;
-+ }
-+
-+
-+ netif->stats.tx_bytes += skb->len;
-+ netif->stats.tx_packets++;
-+
-+ status = netbk_check_gop(sco->meta_slots_used,
-+ netif->domid, &npo);
-+
-+ if (sco->meta_slots_used == 1)
-+ flags = 0;
-+ else
-+ flags = NETRXF_more_data;
-+
-+ if (skb->ip_summed == CHECKSUM_PARTIAL) /* local packet? */
-+ flags |= NETRXF_csum_blank | NETRXF_data_validated;
-+ else if (skb->ip_summed == CHECKSUM_UNNECESSARY)
-+ /* remote but checksummed. */
-+ flags |= NETRXF_data_validated;
-+
-+ offset = 0;
-+ resp = make_rx_response(netif, netbk->meta[npo.meta_cons].id,
-+ status, offset,
-+ netbk->meta[npo.meta_cons].size,
-+ flags);
-+
-+ if (netbk->meta[npo.meta_cons].gso_size && !netif->gso_prefix) {
-+ struct xen_netif_extra_info *gso =
-+ (struct xen_netif_extra_info *)
-+ RING_GET_RESPONSE(&netif->rx,
-+ netif->rx.rsp_prod_pvt++);
-+
-+ resp->flags |= NETRXF_extra_info;
-+
-+ gso->u.gso.size = netbk->meta[npo.meta_cons].gso_size;
-+ gso->u.gso.type = XEN_NETIF_GSO_TYPE_TCPV4;
-+ gso->u.gso.pad = 0;
-+ gso->u.gso.features = 0;
-+
-+ gso->type = XEN_NETIF_EXTRA_TYPE_GSO;
-+ gso->flags = 0;
-+ }
-+
-+ if (sco->meta_slots_used > 1) {
-+ netbk_add_frag_responses(netif, status,
-+ netbk->meta + npo.meta_cons + 1,
-+ sco->meta_slots_used - 1);
-+ }
-+
-+ RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&netif->rx, ret);
-+ irq = netif->irq;
-+ if (ret && !netbk->rx_notify[irq] &&
-+ (netif->smart_poll != 1)) {
-+ netbk->rx_notify[irq] = 1;
-+ netbk->notify_list[notify_nr++] = irq;
-+ }
-+
-+ if (netif_queue_stopped(netif->dev) &&
-+ netif_schedulable(netif) &&
-+ !netbk_queue_full(netif))
-+ netif_wake_queue(netif->dev);
-+
-+ /*
-+ * netfront_smartpoll_active indicates whether
-+ * netfront timer is active.
-+ */
-+ if ((netif->smart_poll == 1) &&
-+ !(netif->rx.sring->private.netif.smartpoll_active)) {
-+ notify_remote_via_irq(irq);
-+ netif->rx.sring->private.netif.smartpoll_active = 1;
-+ }
-+
-+ netif_put(netif);
-+ npo.meta_cons += sco->meta_slots_used;
-+ dev_kfree_skb(skb);
-+ }
-+
-+ while (notify_nr != 0) {
-+ irq = netbk->notify_list[--notify_nr];
-+ netbk->rx_notify[irq] = 0;
-+ notify_remote_via_irq(irq);
-+ }
-+
-+ /* More work to do? */
-+ if (!skb_queue_empty(&netbk->rx_queue) &&
-+ !timer_pending(&netbk->net_timer))
-+ xen_netbk_bh_handler(netbk, 1);
-+}
-+
-+static void net_alarm(unsigned long data)
-+{
-+ struct xen_netbk *netbk = (struct xen_netbk *)data;
-+ xen_netbk_bh_handler(netbk, 1);
-+}
-+
-+static void netbk_tx_pending_timeout(unsigned long data)
-+{
-+ struct xen_netbk *netbk = (struct xen_netbk *)data;
-+ xen_netbk_bh_handler(netbk, 0);
-+}
-+
-+struct net_device_stats *netif_be_get_stats(struct net_device *dev)
-+{
-+ struct xen_netif *netif = netdev_priv(dev);
-+ return &netif->stats;
-+}
-+
-+static int __on_net_schedule_list(struct xen_netif *netif)
-+{
-+ return !list_empty(&netif->list);
-+}
-+
-+/* Must be called with net_schedule_list_lock held */
-+static void remove_from_net_schedule_list(struct xen_netif *netif)
-+{
-+ if (likely(__on_net_schedule_list(netif))) {
-+ list_del_init(&netif->list);
-+ netif_put(netif);
-+ }
-+}
-+
-+static struct xen_netif *poll_net_schedule_list(struct xen_netbk *netbk)
-+{
-+ struct xen_netif *netif = NULL;
-+
-+ spin_lock_irq(&netbk->net_schedule_list_lock);
-+ if (list_empty(&netbk->net_schedule_list))
-+ goto out;
-+
-+ netif = list_first_entry(&netbk->net_schedule_list,
-+ struct xen_netif, list);
-+ if (!netif)
-+ goto out;
-+
-+ netif_get(netif);
-+
-+ remove_from_net_schedule_list(netif);
-+out:
-+ spin_unlock_irq(&netbk->net_schedule_list_lock);
-+ return netif;
-+}
-+
-+static void add_to_net_schedule_list_tail(struct xen_netif *netif)
-+{
-+ unsigned long flags;
-+
-+ struct xen_netbk *netbk = &xen_netbk[netif->group];
-+ if (__on_net_schedule_list(netif))
-+ return;
-+
-+ spin_lock_irqsave(&netbk->net_schedule_list_lock, flags);
-+ if (!__on_net_schedule_list(netif) &&
-+ likely(netif_schedulable(netif))) {
-+ list_add_tail(&netif->list, &netbk->net_schedule_list);
-+ netif_get(netif);
-+ }
-+ spin_unlock_irqrestore(&netbk->net_schedule_list_lock, flags);
-+}
-+
-+void netif_schedule_work(struct xen_netif *netif)
-+{
-+ struct xen_netbk *netbk = &xen_netbk[netif->group];
-+ int more_to_do;
-+
-+ RING_FINAL_CHECK_FOR_REQUESTS(&netif->tx, more_to_do);
-+
-+ if (more_to_do) {
-+ add_to_net_schedule_list_tail(netif);
-+ maybe_schedule_tx_action(netbk);
-+ }
-+}
-+
-+void netif_deschedule_work(struct xen_netif *netif)
-+{
-+ struct xen_netbk *netbk = &xen_netbk[netif->group];
-+ spin_lock_irq(&netbk->net_schedule_list_lock);
-+ remove_from_net_schedule_list(netif);
-+ spin_unlock_irq(&netbk->net_schedule_list_lock);
-+}
-+
-+
-+static void tx_add_credit(struct xen_netif *netif)
-+{
-+ unsigned long max_burst, max_credit;
-+
-+ /*
-+ * Allow a burst big enough to transmit a jumbo packet of up to 128kB.
-+ * Otherwise the interface can seize up due to insufficient credit.
-+ */
-+ max_burst = RING_GET_REQUEST(&netif->tx, netif->tx.req_cons)->size;
-+ max_burst = min(max_burst, 131072UL);
-+ max_burst = max(max_burst, netif->credit_bytes);
-+
-+ /* Take care that adding a new chunk of credit doesn't wrap to zero. */
-+ max_credit = netif->remaining_credit + netif->credit_bytes;
-+ if (max_credit < netif->remaining_credit)
-+ max_credit = ULONG_MAX; /* wrapped: clamp to ULONG_MAX */
-+
-+ netif->remaining_credit = min(max_credit, max_burst);
-+}
-+
-+static void tx_credit_callback(unsigned long data)
-+{
-+ struct xen_netif *netif = (struct xen_netif *)data;
-+ tx_add_credit(netif);
-+ netif_schedule_work(netif);
-+}
-+
-+static inline int copy_pending_req(struct xen_netbk *netbk,
-+ pending_ring_idx_t pending_idx)
-+{
-+ return gnttab_copy_grant_page(
-+ netbk->grant_tx_handle[pending_idx],
-+ &netbk->mmap_pages[pending_idx]);
-+}
-+
-+static inline void net_tx_action_dealloc(struct xen_netbk *netbk)
-+{
-+ struct netbk_tx_pending_inuse *inuse, *n;
-+ struct gnttab_unmap_grant_ref *gop;
-+ u16 pending_idx;
-+ pending_ring_idx_t dc, dp;
-+ struct xen_netif *netif;
-+ int ret;
-+ LIST_HEAD(list);
-+
-+ dc = netbk->dealloc_cons;
-+ gop = netbk->tx_unmap_ops;
-+
-+ /*
-+ * Free up any grants we have finished using
-+ */
-+ do {
-+ dp = netbk->dealloc_prod;
-+
-+ /* Ensure we see all indices enqueued by netif_idx_release(). */
-+ smp_rmb();
-+
-+ while (dc != dp) {
-+ unsigned long pfn;
-+ struct netbk_tx_pending_inuse *pending_inuse =
-+ netbk->pending_inuse;
-+
-+ pending_idx = netbk->dealloc_ring[pending_index(dc++)];
-+ list_move_tail(&pending_inuse[pending_idx].list, &list);
-+
-+ pfn = idx_to_pfn(netbk, pending_idx);
-+ /* Already unmapped? */
-+ if (!phys_to_machine_mapping_valid(pfn))
-+ continue;
-+
-+ gnttab_set_unmap_op(gop,
-+ idx_to_kaddr(netbk, pending_idx),
-+ GNTMAP_host_map,
-+ netbk->grant_tx_handle[pending_idx]);
-+ gop++;
-+ }
-+
-+ if (netbk_copy_skb_mode != NETBK_DELAYED_COPY_SKB ||
-+ list_empty(&netbk->pending_inuse_head))
-+ break;
-+
-+ /* Copy any entries that have been pending for too long. */
-+ list_for_each_entry_safe(inuse, n,
-+ &netbk->pending_inuse_head, list) {
-+ struct pending_tx_info *pending_tx_info;
-+ pending_tx_info = netbk->pending_tx_info;
-+
-+ if (time_after(inuse->alloc_time + HZ / 2, jiffies))
-+ break;
-+
-+ pending_idx = inuse - netbk->pending_inuse;
-+
-+ pending_tx_info[pending_idx].netif->nr_copied_skbs++;
-+
-+ switch (copy_pending_req(netbk, pending_idx)) {
-+ case 0:
-+ list_move_tail(&inuse->list, &list);
-+ continue;
-+ case -EBUSY:
-+ list_del_init(&inuse->list);
-+ continue;
-+ case -ENOENT:
-+ continue;
-+ }
-+
-+ break;
-+ }
-+ } while (dp != netbk->dealloc_prod);
-+
-+ netbk->dealloc_cons = dc;
-+
-+ ret = HYPERVISOR_grant_table_op(
-+ GNTTABOP_unmap_grant_ref, netbk->tx_unmap_ops,
-+ gop - netbk->tx_unmap_ops);
-+ BUG_ON(ret);
-+
-+ list_for_each_entry_safe(inuse, n, &list, list) {
-+ struct pending_tx_info *pending_tx_info;
-+ pending_ring_idx_t index;
-+
-+ pending_tx_info = netbk->pending_tx_info;
-+ pending_idx = inuse - netbk->pending_inuse;
-+
-+ netif = pending_tx_info[pending_idx].netif;
-+
-+ make_tx_response(netif, &pending_tx_info[pending_idx].req,
-+ NETIF_RSP_OKAY);
-+
-+ /* Ready for next use. */
-+ gnttab_reset_grant_page(netbk->mmap_pages[pending_idx]);
-+
-+ index = pending_index(netbk->pending_prod++);
-+ netbk->pending_ring[index] = pending_idx;
-+
-+ netif_put(netif);
-+
-+ list_del_init(&inuse->list);
-+ }
-+}
-+
-+static void netbk_tx_err(struct xen_netif *netif,
-+ struct xen_netif_tx_request *txp, RING_IDX end)
-+{
-+ RING_IDX cons = netif->tx.req_cons;
-+
-+ do {
-+ make_tx_response(netif, txp, NETIF_RSP_ERROR);
-+ if (cons >= end)
-+ break;
-+ txp = RING_GET_REQUEST(&netif->tx, cons++);
-+ } while (1);
-+ netif->tx.req_cons = cons;
-+ netif_schedule_work(netif);
-+ netif_put(netif);
-+}
-+
-+static int netbk_count_requests(struct xen_netif *netif,
-+ struct xen_netif_tx_request *first,
-+ struct xen_netif_tx_request *txp, int work_to_do)
-+{
-+ RING_IDX cons = netif->tx.req_cons;
-+ int frags = 0;
-+
-+ if (!(first->flags & NETTXF_more_data))
-+ return 0;
-+
-+ do {
-+ if (frags >= work_to_do) {
-+ DPRINTK("Need more frags\n");
-+ return -frags;
-+ }
-+
-+ if (unlikely(frags >= MAX_SKB_FRAGS)) {
-+ DPRINTK("Too many frags\n");
-+ return -frags;
-+ }
-+
-+ memcpy(txp, RING_GET_REQUEST(&netif->tx, cons + frags),
-+ sizeof(*txp));
-+ if (txp->size > first->size) {
-+ DPRINTK("Frags galore\n");
-+ return -frags;
-+ }
-+
-+ first->size -= txp->size;
-+ frags++;
-+
-+ if (unlikely((txp->offset + txp->size) > PAGE_SIZE)) {
-+ DPRINTK("txp->offset: %x, size: %u\n",
-+ txp->offset, txp->size);
-+ return -frags;
-+ }
-+ } while ((txp++)->flags & NETTXF_more_data);
-+
-+ return frags;
-+}
-+
-+static struct gnttab_map_grant_ref *netbk_get_requests(struct xen_netbk *netbk,
-+ struct xen_netif *netif,
-+ struct sk_buff *skb,
-+ struct xen_netif_tx_request *txp,
-+ struct gnttab_map_grant_ref *mop)
-+{
-+ struct skb_shared_info *shinfo = skb_shinfo(skb);
-+ skb_frag_t *frags = shinfo->frags;
-+ unsigned long pending_idx = *((u16 *)skb->data);
-+ int i, start;
-+
-+ /* Skip first skb fragment if it is on same page as header fragment. */
-+ start = ((unsigned long)shinfo->frags[0].page == pending_idx);
-+
-+ for (i = start; i < shinfo->nr_frags; i++, txp++) {
-+ pending_ring_idx_t index;
-+ struct pending_tx_info *pending_tx_info =
-+ netbk->pending_tx_info;
-+
-+ index = pending_index(netbk->pending_cons++);
-+ pending_idx = netbk->pending_ring[index];
-+
-+ gnttab_set_map_op(mop++, idx_to_kaddr(netbk, pending_idx),
-+ GNTMAP_host_map | GNTMAP_readonly,
-+ txp->gref, netif->domid);
-+
-+ memcpy(&pending_tx_info[pending_idx].req, txp, sizeof(*txp));
-+ netif_get(netif);
-+ pending_tx_info[pending_idx].netif = netif;
-+ frags[i].page = (void *)pending_idx;
-+ }
-+
-+ return mop;
-+}
-+
-+static int netbk_tx_check_mop(struct xen_netbk *netbk,
-+ struct sk_buff *skb,
-+ struct gnttab_map_grant_ref **mopp)
-+{
-+ struct gnttab_map_grant_ref *mop = *mopp;
-+ int pending_idx = *((u16 *)skb->data);
-+ struct pending_tx_info *pending_tx_info = netbk->pending_tx_info;
-+ struct xen_netif *netif = pending_tx_info[pending_idx].netif;
-+ struct xen_netif_tx_request *txp;
-+ struct skb_shared_info *shinfo = skb_shinfo(skb);
-+ int nr_frags = shinfo->nr_frags;
-+ int i, err, start;
-+
-+ /* Check status of header. */
-+ err = mop->status;
-+ if (unlikely(err)) {
-+ pending_ring_idx_t index;
-+ index = pending_index(netbk->pending_prod++);
-+ txp = &pending_tx_info[pending_idx].req;
-+ make_tx_response(netif, txp, NETIF_RSP_ERROR);
-+ netbk->pending_ring[index] = pending_idx;
-+ netif_put(netif);
-+ } else {
-+ set_phys_to_machine(
-+ __pa(idx_to_kaddr(netbk, pending_idx)) >> PAGE_SHIFT,
-+ FOREIGN_FRAME(mop->dev_bus_addr >> PAGE_SHIFT));
-+ netbk->grant_tx_handle[pending_idx] = mop->handle;
-+ }
-+
-+ /* Skip first skb fragment if it is on same page as header fragment. */
-+ start = ((unsigned long)shinfo->frags[0].page == pending_idx);
-+
-+ for (i = start; i < nr_frags; i++) {
-+ int j, newerr;
-+ pending_ring_idx_t index;
-+
-+ pending_idx = (unsigned long)shinfo->frags[i].page;
-+
-+ /* Check error status: if okay then remember grant handle. */
-+ newerr = (++mop)->status;
-+ if (likely(!newerr)) {
-+ unsigned long addr;
-+ addr = idx_to_kaddr(netbk, pending_idx);
-+ set_phys_to_machine(
-+ __pa(addr)>>PAGE_SHIFT,
-+ FOREIGN_FRAME(mop->dev_bus_addr>>PAGE_SHIFT));
-+ netbk->grant_tx_handle[pending_idx] = mop->handle;
-+ /* Had a previous error? Invalidate this fragment. */
-+ if (unlikely(err))
-+ netif_idx_release(netbk, pending_idx);
-+ continue;
-+ }
-+
-+ /* Error on this fragment: respond to client with an error. */
-+ txp = &netbk->pending_tx_info[pending_idx].req;
-+ make_tx_response(netif, txp, NETIF_RSP_ERROR);
-+ index = pending_index(netbk->pending_prod++);
-+ netbk->pending_ring[index] = pending_idx;
-+ netif_put(netif);
-+
-+ /* Not the first error? Preceding frags already invalidated. */
-+ if (err)
-+ continue;
-+
-+ /* First error: invalidate header and preceding fragments. */
-+ pending_idx = *((u16 *)skb->data);
-+ netif_idx_release(netbk, pending_idx);
-+ for (j = start; j < i; j++) {
-+ pending_idx = (unsigned long)shinfo->frags[i].page;
-+ netif_idx_release(netbk, pending_idx);
-+ }
-+
-+ /* Remember the error: invalidate all subsequent fragments. */
-+ err = newerr;
-+ }
-+
-+ *mopp = mop + 1;
-+ return err;
-+}
-+
-+static void netbk_fill_frags(struct xen_netbk *netbk, struct sk_buff *skb)
-+{
-+ struct skb_shared_info *shinfo = skb_shinfo(skb);
-+ int nr_frags = shinfo->nr_frags;
-+ int i;
-+
-+ for (i = 0; i < nr_frags; i++) {
-+ skb_frag_t *frag = shinfo->frags + i;
-+ struct xen_netif_tx_request *txp;
-+ unsigned long pending_idx;
-+
-+ pending_idx = (unsigned long)frag->page;
-+
-+ netbk->pending_inuse[pending_idx].alloc_time = jiffies;
-+ list_add_tail(&netbk->pending_inuse[pending_idx].list,
-+ &netbk->pending_inuse_head);
-+
-+ txp = &netbk->pending_tx_info[pending_idx].req;
-+ frag->page = virt_to_page(idx_to_kaddr(netbk, pending_idx));
-+ frag->size = txp->size;
-+ frag->page_offset = txp->offset;
-+
-+ skb->len += txp->size;
-+ skb->data_len += txp->size;
-+ skb->truesize += txp->size;
-+ }
-+}
-+
-+int netbk_get_extras(struct xen_netif *netif, struct xen_netif_extra_info *extras,
-+ int work_to_do)
-+{
-+ struct xen_netif_extra_info extra;
-+ RING_IDX cons = netif->tx.req_cons;
-+
-+ do {
-+ if (unlikely(work_to_do-- <= 0)) {
-+ DPRINTK("Missing extra info\n");
-+ return -EBADR;
-+ }
-+
-+ memcpy(&extra, RING_GET_REQUEST(&netif->tx, cons),
-+ sizeof(extra));
-+ if (unlikely(!extra.type ||
-+ extra.type >= XEN_NETIF_EXTRA_TYPE_MAX)) {
-+ netif->tx.req_cons = ++cons;
-+ DPRINTK("Invalid extra type: %d\n", extra.type);
-+ return -EINVAL;
-+ }
-+
-+ memcpy(&extras[extra.type - 1], &extra, sizeof(extra));
-+ netif->tx.req_cons = ++cons;
-+ } while (extra.flags & XEN_NETIF_EXTRA_FLAG_MORE);
-+
-+ return work_to_do;
-+}
-+
-+static int netbk_set_skb_gso(struct sk_buff *skb, struct xen_netif_extra_info *gso)
-+{
-+ if (!gso->u.gso.size) {
-+ DPRINTK("GSO size must not be zero.\n");
-+ return -EINVAL;
-+ }
-+
-+ /* Currently only TCPv4 S.O. is supported. */
-+ if (gso->u.gso.type != XEN_NETIF_GSO_TYPE_TCPV4) {
-+ DPRINTK("Bad GSO type %d.\n", gso->u.gso.type);
-+ return -EINVAL;
-+ }
-+
-+ skb_shinfo(skb)->gso_size = gso->u.gso.size;
-+ skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4;
-+
-+ /* Header must be checked, and gso_segs computed. */
-+ skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY;
-+ skb_shinfo(skb)->gso_segs = 0;
-+
-+ return 0;
-+}
-+
-+static int skb_checksum_setup(struct sk_buff *skb)
-+{
-+ struct iphdr *iph;
-+ unsigned char *th;
-+ int err = -EPROTO;
-+
-+ if (skb->protocol != htons(ETH_P_IP))
-+ goto out;
-+
-+ iph = (void *)skb->data;
-+ th = skb->data + 4 * iph->ihl;
-+ if (th >= skb_tail_pointer(skb))
-+ goto out;
-+
-+ skb->csum_start = th - skb->head;
-+ switch (iph->protocol) {
-+ case IPPROTO_TCP:
-+ skb->csum_offset = offsetof(struct tcphdr, check);
-+ break;
-+ case IPPROTO_UDP:
-+ skb->csum_offset = offsetof(struct udphdr, check);
-+ break;
-+ default:
-+ if (net_ratelimit())
-+ printk(KERN_ERR "Attempting to checksum a non-"
-+ "TCP/UDP packet, dropping a protocol"
-+ " %d packet", iph->protocol);
-+ goto out;
-+ }
-+
-+ if ((th + skb->csum_offset + 2) > skb_tail_pointer(skb))
-+ goto out;
-+
-+ err = 0;
-+
-+out:
-+ return err;
-+}
-+
-+static bool tx_credit_exceeded(struct xen_netif *netif, unsigned size)
-+{
-+ unsigned long now = jiffies;
-+ unsigned long next_credit =
-+ netif->credit_timeout.expires +
-+ msecs_to_jiffies(netif->credit_usec / 1000);
-+
-+ /* Timer could already be pending in rare cases. */
-+ if (timer_pending(&netif->credit_timeout))
-+ return true;
-+
-+ /* Passed the point where we can replenish credit? */
-+ if (time_after_eq(now, next_credit)) {
-+ netif->credit_timeout.expires = now;
-+ tx_add_credit(netif);
-+ }
-+
-+ /* Still too big to send right now? Set a callback. */
-+ if (size > netif->remaining_credit) {
-+ netif->credit_timeout.data =
-+ (unsigned long)netif;
-+ netif->credit_timeout.function =
-+ tx_credit_callback;
-+ mod_timer(&netif->credit_timeout,
-+ next_credit);
-+
-+ return true;
-+ }
-+
-+ return false;
-+}
-+
-+static unsigned net_tx_build_mops(struct xen_netbk *netbk)
-+{
-+ struct gnttab_map_grant_ref *mop;
-+ struct sk_buff *skb;
-+ int ret;
-+
-+ mop = netbk->tx_map_ops;
-+ while (((nr_pending_reqs(netbk) + MAX_SKB_FRAGS) < MAX_PENDING_REQS) &&
-+ !list_empty(&netbk->net_schedule_list)) {
-+ struct xen_netif *netif;
-+ struct xen_netif_tx_request txreq;
-+ struct xen_netif_tx_request txfrags[MAX_SKB_FRAGS];
-+ struct xen_netif_extra_info extras[XEN_NETIF_EXTRA_TYPE_MAX - 1];
-+ u16 pending_idx;
-+ RING_IDX idx;
-+ int work_to_do;
-+ unsigned int data_len;
-+ pending_ring_idx_t index;
-+
-+ /* Get a netif from the list with work to do. */
-+ netif = poll_net_schedule_list(netbk);
-+ if (!netif)
-+ continue;
-+
-+ RING_FINAL_CHECK_FOR_REQUESTS(&netif->tx, work_to_do);
-+ if (!work_to_do) {
-+ netif_put(netif);
-+ continue;
-+ }
-+
-+ idx = netif->tx.req_cons;
-+ rmb(); /* Ensure that we see the request before we copy it. */
-+ memcpy(&txreq, RING_GET_REQUEST(&netif->tx, idx), sizeof(txreq));
-+
-+ /* Credit-based scheduling. */
-+ if (txreq.size > netif->remaining_credit &&
-+ tx_credit_exceeded(netif, txreq.size)) {
-+ netif_put(netif);
-+ continue;
-+ }
-+
-+ netif->remaining_credit -= txreq.size;
-+
-+ work_to_do--;
-+ netif->tx.req_cons = ++idx;
-+
-+ memset(extras, 0, sizeof(extras));
-+ if (txreq.flags & NETTXF_extra_info) {
-+ work_to_do = netbk_get_extras(netif, extras,
-+ work_to_do);
-+ idx = netif->tx.req_cons;
-+ if (unlikely(work_to_do < 0)) {
-+ netbk_tx_err(netif, &txreq, idx);
-+ continue;
-+ }
-+ }
-+
-+ ret = netbk_count_requests(netif, &txreq, txfrags, work_to_do);
-+ if (unlikely(ret < 0)) {
-+ netbk_tx_err(netif, &txreq, idx - ret);
-+ continue;
-+ }
-+ idx += ret;
-+
-+ if (unlikely(txreq.size < ETH_HLEN)) {
-+ DPRINTK("Bad packet size: %d\n", txreq.size);
-+ netbk_tx_err(netif, &txreq, idx);
-+ continue;
-+ }
-+
-+ /* No crossing a page as the payload mustn't fragment. */
-+ if (unlikely((txreq.offset + txreq.size) > PAGE_SIZE)) {
-+ DPRINTK("txreq.offset: %x, size: %u, end: %lu\n",
-+ txreq.offset, txreq.size,
-+ (txreq.offset &~PAGE_MASK) + txreq.size);
-+ netbk_tx_err(netif, &txreq, idx);
-+ continue;
-+ }
-+
-+ index = pending_index(netbk->pending_cons);
-+ pending_idx = netbk->pending_ring[index];
-+
-+ data_len = (txreq.size > PKT_PROT_LEN &&
-+ ret < MAX_SKB_FRAGS) ?
-+ PKT_PROT_LEN : txreq.size;
-+
-+ skb = alloc_skb(data_len + NET_SKB_PAD + NET_IP_ALIGN,
-+ GFP_ATOMIC | __GFP_NOWARN);
-+ if (unlikely(skb == NULL)) {
-+ DPRINTK("Can't allocate a skb in start_xmit.\n");
-+ netbk_tx_err(netif, &txreq, idx);
-+ break;
-+ }
-+
-+ /* Packets passed to netif_rx() must have some headroom. */
-+ skb_reserve(skb, NET_SKB_PAD + NET_IP_ALIGN);
-+
-+ if (extras[XEN_NETIF_EXTRA_TYPE_GSO - 1].type) {
-+ struct xen_netif_extra_info *gso;
-+ gso = &extras[XEN_NETIF_EXTRA_TYPE_GSO - 1];
-+
-+ if (netbk_set_skb_gso(skb, gso)) {
-+ kfree_skb(skb);
-+ netbk_tx_err(netif, &txreq, idx);
-+ continue;
-+ }
-+ }
-+
-+ gnttab_set_map_op(mop, idx_to_kaddr(netbk, pending_idx),
-+ GNTMAP_host_map | GNTMAP_readonly,
-+ txreq.gref, netif->domid);
-+ mop++;
-+
-+ memcpy(&netbk->pending_tx_info[pending_idx].req,
-+ &txreq, sizeof(txreq));
-+ netbk->pending_tx_info[pending_idx].netif = netif;
-+ *((u16 *)skb->data) = pending_idx;
-+
-+ __skb_put(skb, data_len);
-+
-+ skb_shinfo(skb)->nr_frags = ret;
-+ if (data_len < txreq.size) {
-+ skb_shinfo(skb)->nr_frags++;
-+ skb_shinfo(skb)->frags[0].page =
-+ (void *)(unsigned long)pending_idx;
-+ } else {
-+ /* Discriminate from any valid pending_idx value. */
-+ skb_shinfo(skb)->frags[0].page = (void *)~0UL;
-+ }
-+
-+ __skb_queue_tail(&netbk->tx_queue, skb);
-+
-+ netbk->pending_cons++;
-+
-+ mop = netbk_get_requests(netbk, netif, skb, txfrags, mop);
-+
-+ netif->tx.req_cons = idx;
-+ netif_schedule_work(netif);
-+
-+ if ((mop - netbk->tx_map_ops) >= ARRAY_SIZE(netbk->tx_map_ops))
-+ break;
-+ }
-+
-+ return mop - netbk->tx_map_ops;
-+}
-+
-+static void net_tx_submit(struct xen_netbk *netbk)
-+{
-+ struct gnttab_map_grant_ref *mop;
-+ struct sk_buff *skb;
-+
-+ mop = netbk->tx_map_ops;
-+ while ((skb = __skb_dequeue(&netbk->tx_queue)) != NULL) {
-+ struct xen_netif_tx_request *txp;
-+ struct xen_netif *netif;
-+ u16 pending_idx;
-+ unsigned data_len;
-+
-+ pending_idx = *((u16 *)skb->data);
-+ netif = netbk->pending_tx_info[pending_idx].netif;
-+ txp = &netbk->pending_tx_info[pending_idx].req;
-+
-+ /* Check the remap error code. */
-+ if (unlikely(netbk_tx_check_mop(netbk, skb, &mop))) {
-+ DPRINTK("netback grant failed.\n");
-+ skb_shinfo(skb)->nr_frags = 0;
-+ kfree_skb(skb);
-+ continue;
-+ }
-+
-+ data_len = skb->len;
-+ memcpy(skb->data,
-+ (void *)(idx_to_kaddr(netbk, pending_idx)|txp->offset),
-+ data_len);
-+ if (data_len < txp->size) {
-+ /* Append the packet payload as a fragment. */
-+ txp->offset += data_len;
-+ txp->size -= data_len;
-+ } else {
-+ /* Schedule a response immediately. */
-+ netif_idx_release(netbk, pending_idx);
-+ }
-+
-+ if (txp->flags & NETTXF_csum_blank)
-+ skb->ip_summed = CHECKSUM_PARTIAL;
-+ else if (txp->flags & NETTXF_data_validated)
-+ skb->ip_summed = CHECKSUM_UNNECESSARY;
-+
-+ netbk_fill_frags(netbk, skb);
-+
-+ /*
-+ * If the initial fragment was < PKT_PROT_LEN then
-+ * pull through some bytes from the other fragments to
-+ * increase the linear region to PKT_PROT_LEN bytes.
-+ */
-+ if (skb_headlen(skb) < PKT_PROT_LEN && skb_is_nonlinear(skb)) {
-+ int target = min_t(int, skb->len, PKT_PROT_LEN);
-+ __pskb_pull_tail(skb, target - skb_headlen(skb));
-+ }
-+
-+ skb->dev = netif->dev;
-+ skb->protocol = eth_type_trans(skb, skb->dev);
-+
-+ netif->stats.rx_bytes += skb->len;
-+ netif->stats.rx_packets++;
-+
-+ if (skb->ip_summed == CHECKSUM_PARTIAL) {
-+ if (skb_checksum_setup(skb)) {
-+ DPRINTK("Can't setup checksum in net_tx_action\n");
-+ kfree_skb(skb);
-+ continue;
-+ }
-+ }
-+
-+ if (unlikely(netbk_copy_skb_mode == NETBK_ALWAYS_COPY_SKB) &&
-+ unlikely(skb_linearize(skb))) {
-+ DPRINTK("Can't linearize skb in net_tx_action.\n");
-+ kfree_skb(skb);
-+ continue;
-+ }
-+
-+ netif_rx_ni(skb);
-+ netif->dev->last_rx = jiffies;
-+ }
-+}
-+
-+/* Called after netfront has transmitted */
-+static void net_tx_action(unsigned long data)
-+{
-+ struct xen_netbk *netbk = (struct xen_netbk *)data;
-+ unsigned nr_mops;
-+ int ret;
-+
-+ net_tx_action_dealloc(netbk);
-+
-+ nr_mops = net_tx_build_mops(netbk);
-+
-+ if (nr_mops == 0)
-+ goto out;
-+
-+ ret = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref,
-+ netbk->tx_map_ops, nr_mops);
-+ BUG_ON(ret);
-+
-+ net_tx_submit(netbk);
-+out:
-+ if (netbk_copy_skb_mode == NETBK_DELAYED_COPY_SKB &&
-+ !list_empty(&netbk->pending_inuse_head)) {
-+ struct netbk_tx_pending_inuse *oldest;
-+
-+ oldest = list_entry(netbk->pending_inuse_head.next,
-+ struct netbk_tx_pending_inuse, list);
-+ mod_timer(&netbk->netbk_tx_pending_timer,
-+ oldest->alloc_time + HZ);
-+ }
-+}
-+
-+static void netif_idx_release(struct xen_netbk *netbk, u16 pending_idx)
-+{
-+ static DEFINE_SPINLOCK(_lock);
-+ unsigned long flags;
-+ pending_ring_idx_t index;
-+
-+ spin_lock_irqsave(&_lock, flags);
-+ index = pending_index(netbk->dealloc_prod);
-+ netbk->dealloc_ring[index] = pending_idx;
-+ /* Sync with net_tx_action_dealloc: insert idx /then/ incr producer. */
-+ smp_wmb();
-+ netbk->dealloc_prod++;
-+ spin_unlock_irqrestore(&_lock, flags);
-+
-+ xen_netbk_bh_handler(netbk, 0);
-+}
-+
-+static void netif_page_release(struct page *page, unsigned int order)
-+{
-+ unsigned int group, idx;
-+ int foreign = netif_get_page_ext(page, &group, &idx);
-+
-+ BUG_ON(!foreign);
-+ BUG_ON(order);
-+
-+ netif_idx_release(&xen_netbk[group], idx);
-+}
-+
-+irqreturn_t netif_be_int(int irq, void *dev_id)
-+{
-+ struct xen_netif *netif = dev_id;
-+ struct xen_netbk *netbk;
-+
-+ if (netif->group == -1)
-+ return IRQ_NONE;
-+
-+ netbk = &xen_netbk[netif->group];
-+
-+ add_to_net_schedule_list_tail(netif);
-+ maybe_schedule_tx_action(netbk);
-+
-+ if (netif_schedulable(netif) && !netbk_queue_full(netif))
-+ netif_wake_queue(netif->dev);
-+
-+ return IRQ_HANDLED;
-+}
-+
-+static void make_tx_response(struct xen_netif *netif,
-+ struct xen_netif_tx_request *txp,
-+ s8 st)
-+{
-+ RING_IDX i = netif->tx.rsp_prod_pvt;
-+ struct xen_netif_tx_response *resp;
-+ int notify;
-+
-+ resp = RING_GET_RESPONSE(&netif->tx, i);
-+ resp->id = txp->id;
-+ resp->status = st;
-+
-+ if (txp->flags & NETTXF_extra_info)
-+ RING_GET_RESPONSE(&netif->tx, ++i)->status = NETIF_RSP_NULL;
-+
-+ netif->tx.rsp_prod_pvt = ++i;
-+ RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&netif->tx, notify);
-+
-+ /*
-+ * netfront_smartpoll_active indicates whether netfront timer
-+ * is active.
-+ */
-+ if ((netif->smart_poll == 1)) {
-+ if (!(netif->rx.sring->private.netif.smartpoll_active)) {
-+ notify_remote_via_irq(netif->irq);
-+ netif->rx.sring->private.netif.smartpoll_active = 1;
-+ }
-+ } else if (notify)
-+ notify_remote_via_irq(netif->irq);
-+}
-+
-+static struct xen_netif_rx_response *make_rx_response(struct xen_netif *netif,
-+ u16 id,
-+ s8 st,
-+ u16 offset,
-+ u16 size,
-+ u16 flags)
-+{
-+ RING_IDX i = netif->rx.rsp_prod_pvt;
-+ struct xen_netif_rx_response *resp;
-+
-+ resp = RING_GET_RESPONSE(&netif->rx, i);
-+ resp->offset = offset;
-+ resp->flags = flags;
-+ resp->id = id;
-+ resp->status = (s16)size;
-+ if (st < 0)
-+ resp->status = (s16)st;
-+
-+ netif->rx.rsp_prod_pvt = ++i;
-+
-+ return resp;
-+}
-+
-+#ifdef NETBE_DEBUG_INTERRUPT
-+static irqreturn_t netif_be_dbg(int irq, void *dev_id, struct pt_regs *regs)
-+{
-+ struct list_head *ent;
-+ struct xen_netif *netif;
-+ int i = 0;
-+ int group = 0;
-+
-+ printk(KERN_ALERT "netif_schedule_list:\n");
-+
-+ for (group = 0; group < xen_netbk_group_nr; group++) {
-+ struct xen_netbk *netbk = &xen_netbk[group];
-+ spin_lock_irq(&netbk->net_schedule_list_lock);
-+ printk(KERN_ALERT "xen_netback group number: %d\n", group);
-+ list_for_each(ent, &netbk->net_schedule_list) {
-+ netif = list_entry(ent, struct xen_netif, list);
-+ printk(KERN_ALERT " %d: private(rx_req_cons=%08x "
-+ "rx_resp_prod=%08x\n",
-+ i, netif->rx.req_cons, netif->rx.rsp_prod_pvt);
-+ printk(KERN_ALERT
-+ " tx_req_cons=%08x, tx_resp_prod=%08x)\n",
-+ netif->tx.req_cons, netif->tx.rsp_prod_pvt);
-+ printk(KERN_ALERT
-+ " shared(rx_req_prod=%08x "
-+ "rx_resp_prod=%08x\n",
-+ netif->rx.sring->req_prod,
-+ netif->rx.sring->rsp_prod);
-+ printk(KERN_ALERT
-+ " rx_event=%08x, tx_req_prod=%08x\n",
-+ netif->rx.sring->rsp_event,
-+ netif->tx.sring->req_prod);
-+ printk(KERN_ALERT
-+ " tx_resp_prod=%08x, tx_event=%08x)\n",
-+ netif->tx.sring->rsp_prod,
-+ netif->tx.sring->rsp_event);
-+ i++;
-+ }
-+ spin_unlock_irq(&netbk->net_schedule_list_lock);
-+ }
-+
-+ printk(KERN_ALERT " ** End of netif_schedule_list **\n");
-+
-+ return IRQ_HANDLED;
-+}
-+#endif
-+
-+static inline int rx_work_todo(struct xen_netbk *netbk)
-+{
-+ return !skb_queue_empty(&netbk->rx_queue);
-+}
-+
-+static inline int tx_work_todo(struct xen_netbk *netbk)
-+{
-+ if (netbk->dealloc_cons != netbk->dealloc_prod)
-+ return 1;
-+
-+ if (((nr_pending_reqs(netbk) + MAX_SKB_FRAGS) < MAX_PENDING_REQS) &&
-+ !list_empty(&netbk->net_schedule_list))
-+ return 1;
-+
-+ return 0;
-+}
-+
-+static int netbk_action_thread(void *data)
-+{
-+ struct xen_netbk *netbk = (struct xen_netbk *)data;
-+ while (!kthread_should_stop()) {
-+ wait_event_interruptible(netbk->kthread.netbk_action_wq,
-+ rx_work_todo(netbk)
-+ || tx_work_todo(netbk)
-+ || kthread_should_stop());
-+ cond_resched();
-+
-+ if (kthread_should_stop())
-+ break;
-+
-+ if (rx_work_todo(netbk))
-+ net_rx_action((unsigned long)netbk);
-+
-+ if (tx_work_todo(netbk))
-+ net_tx_action((unsigned long)netbk);
-+ }
-+
-+ return 0;
-+}
-+
-+static int __init netback_init(void)
-+{
-+ int i;
-+ struct page *page;
-+ int rc = 0;
-+ int group;
-+
-+ if (!xen_pv_domain())
-+ return -ENODEV;
-+
-+ xen_netbk_group_nr = num_online_cpus();
-+ xen_netbk = vmalloc(sizeof(struct xen_netbk) * xen_netbk_group_nr);
-+ if (!xen_netbk) {
-+ printk(KERN_ALERT "%s: out of memory\n", __func__);
-+ return -ENOMEM;
-+ }
-+ memset(xen_netbk, 0, sizeof(struct xen_netbk) * xen_netbk_group_nr);
-+
-+ /* We can increase reservation by this much in net_rx_action(). */
-+// balloon_update_driver_allowance(NET_RX_RING_SIZE);
-+
-+ for (group = 0; group < xen_netbk_group_nr; group++) {
-+ struct xen_netbk *netbk = &xen_netbk[group];
-+ skb_queue_head_init(&netbk->rx_queue);
-+ skb_queue_head_init(&netbk->tx_queue);
-+
-+ init_timer(&netbk->net_timer);
-+ netbk->net_timer.data = (unsigned long)netbk;
-+ netbk->net_timer.function = net_alarm;
-+
-+ init_timer(&netbk->netbk_tx_pending_timer);
-+ netbk->netbk_tx_pending_timer.data = (unsigned long)netbk;
-+ netbk->netbk_tx_pending_timer.function =
-+ netbk_tx_pending_timeout;
-+
-+ netbk->mmap_pages =
-+ alloc_empty_pages_and_pagevec(MAX_PENDING_REQS);
-+ if (!netbk->mmap_pages) {
-+ printk(KERN_ALERT "%s: out of memory\n", __func__);
-+ del_timer(&netbk->netbk_tx_pending_timer);
-+ del_timer(&netbk->net_timer);
-+ rc = -ENOMEM;
-+ goto failed_init;
-+ }
-+
-+ for (i = 0; i < MAX_PENDING_REQS; i++) {
-+ page = netbk->mmap_pages[i];
-+ SetPageForeign(page, netif_page_release);
-+ netif_set_page_ext(page, group, i);
-+ INIT_LIST_HEAD(&netbk->pending_inuse[i].list);
-+ }
-+
-+ netbk->pending_cons = 0;
-+ netbk->pending_prod = MAX_PENDING_REQS;
-+ for (i = 0; i < MAX_PENDING_REQS; i++)
-+ netbk->pending_ring[i] = i;
-+
-+ if (MODPARM_netback_kthread) {
-+ init_waitqueue_head(&netbk->kthread.netbk_action_wq);
-+ netbk->kthread.task =
-+ kthread_create(netbk_action_thread,
-+ (void *)netbk,
-+ "netback/%u", group);
-+
-+ if (!IS_ERR(netbk->kthread.task)) {
-+ kthread_bind(netbk->kthread.task, group);
-+ } else {
-+ printk(KERN_ALERT
-+ "kthread_run() fails at netback\n");
-+ free_empty_pages_and_pagevec(netbk->mmap_pages,
-+ MAX_PENDING_REQS);
-+ del_timer(&netbk->netbk_tx_pending_timer);
-+ del_timer(&netbk->net_timer);
-+ rc = PTR_ERR(netbk->kthread.task);
-+ goto failed_init;
-+ }
-+ } else {
-+ tasklet_init(&netbk->tasklet.net_tx_tasklet,
-+ net_tx_action,
-+ (unsigned long)netbk);
-+ tasklet_init(&netbk->tasklet.net_rx_tasklet,
-+ net_rx_action,
-+ (unsigned long)netbk);
-+ }
-+
-+ INIT_LIST_HEAD(&netbk->pending_inuse_head);
-+ INIT_LIST_HEAD(&netbk->net_schedule_list);
-+
-+ spin_lock_init(&netbk->net_schedule_list_lock);
-+
-+ atomic_set(&netbk->netfront_count, 0);
-+
-+ if (MODPARM_netback_kthread)
-+ wake_up_process(netbk->kthread.task);
-+ }
-+
-+ netbk_copy_skb_mode = NETBK_DONT_COPY_SKB;
-+ if (MODPARM_copy_skb) {
-+ if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_and_replace,
-+ NULL, 0))
-+ netbk_copy_skb_mode = NETBK_ALWAYS_COPY_SKB;
-+ else
-+ netbk_copy_skb_mode = NETBK_DELAYED_COPY_SKB;
-+ }
-+
-+ //netif_accel_init();
-+
-+ rc = netif_xenbus_init();
-+ if (rc)
-+ goto failed_init;
-+
-+#ifdef NETBE_DEBUG_INTERRUPT
-+ (void)bind_virq_to_irqhandler(VIRQ_DEBUG,
-+ 0,
-+ netif_be_dbg,
-+ IRQF_SHARED,
-+ "net-be-dbg",
-+ &netif_be_dbg);
-+#endif
-+
-+ return 0;
-+
-+failed_init:
-+ for (i = 0; i < group; i++) {
-+ struct xen_netbk *netbk = &xen_netbk[i];
-+ free_empty_pages_and_pagevec(netbk->mmap_pages,
-+ MAX_PENDING_REQS);
-+ del_timer(&netbk->netbk_tx_pending_timer);
-+ del_timer(&netbk->net_timer);
-+ if (MODPARM_netback_kthread)
-+ kthread_stop(netbk->kthread.task);
-+ }
-+ vfree(xen_netbk);
-+ return rc;
-+
-+}
-+
-+module_init(netback_init);
-+
-+MODULE_LICENSE("Dual BSD/GPL");
-diff --git a/drivers/xen/netback/xenbus.c b/drivers/xen/netback/xenbus.c
-new file mode 100644
-index 0000000..1930f64
---- /dev/null
-+++ b/drivers/xen/netback/xenbus.c
-@@ -0,0 +1,518 @@
-+/* Xenbus code for netif backend
-+ Copyright (C) 2005 Rusty Russell <rusty@rustcorp.com.au>
-+ Copyright (C) 2005 XenSource Ltd
-+
-+ This program is free software; you can redistribute it and/or modify
-+ it under the terms of the GNU General Public License as published by
-+ the Free Software Foundation; either version 2 of the License, or
-+ (at your option) any later version.
-+
-+ This program is distributed in the hope that it will be useful,
-+ but WITHOUT ANY WARRANTY; without even the implied warranty of
-+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-+ GNU General Public License for more details.
-+
-+ You should have received a copy of the GNU General Public License
-+ along with this program; if not, write to the Free Software
-+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
-+*/
-+
-+#include <stdarg.h>
-+#include <linux/module.h>
-+#include <xen/xenbus.h>
-+#include "common.h"
-+
-+#if 0
-+#undef DPRINTK
-+#define DPRINTK(fmt, args...) \
-+ printk("netback/xenbus (%s:%d) " fmt ".\n", __FUNCTION__, __LINE__, ##args)
-+#endif
-+
-+
-+static int connect_rings(struct backend_info *);
-+static void connect(struct backend_info *);
-+static void backend_create_netif(struct backend_info *be);
-+static void unregister_hotplug_status_watch(struct backend_info *be);
-+
-+static int netback_remove(struct xenbus_device *dev)
-+{
-+ struct backend_info *be = dev_get_drvdata(&dev->dev);
-+
-+ //netback_remove_accelerators(be, dev);
-+
-+ unregister_hotplug_status_watch(be);
-+ if (be->netif) {
-+ kobject_uevent(&dev->dev.kobj, KOBJ_OFFLINE);
-+ xenbus_rm(XBT_NIL, dev->nodename, "hotplug-status");
-+ netif_disconnect(be->netif);
-+ be->netif = NULL;
-+ }
-+ kfree(be);
-+ dev_set_drvdata(&dev->dev, NULL);
-+ return 0;
-+}
-+
-+
-+/**
-+ * Entry point to this code when a new device is created. Allocate the basic
-+ * structures and switch to InitWait.
-+ */
-+static int netback_probe(struct xenbus_device *dev,
-+ const struct xenbus_device_id *id)
-+{
-+ const char *message;
-+ struct xenbus_transaction xbt;
-+ int err;
-+ int sg;
-+ struct backend_info *be = kzalloc(sizeof(struct backend_info),
-+ GFP_KERNEL);
-+ if (!be) {
-+ xenbus_dev_fatal(dev, -ENOMEM,
-+ "allocating backend structure");
-+ return -ENOMEM;
-+ }
-+
-+ be->dev = dev;
-+ dev_set_drvdata(&dev->dev, be);
-+
-+ sg = 1;
-+ if (netbk_copy_skb_mode == NETBK_ALWAYS_COPY_SKB)
-+ sg = 0;
-+
-+ do {
-+ err = xenbus_transaction_start(&xbt);
-+ if (err) {
-+ xenbus_dev_fatal(dev, err, "starting transaction");
-+ goto fail;
-+ }
-+
-+ err = xenbus_printf(xbt, dev->nodename, "feature-sg", "%d", sg);
-+ if (err) {
-+ message = "writing feature-sg";
-+ goto abort_transaction;
-+ }
-+
-+ err = xenbus_printf(xbt, dev->nodename, "feature-gso-tcpv4",
-+ "%d", sg);
-+ if (err) {
-+ message = "writing feature-gso-tcpv4";
-+ goto abort_transaction;
-+ }
-+
-+ /* We support rx-copy path. */
-+ err = xenbus_printf(xbt, dev->nodename,
-+ "feature-rx-copy", "%d", 1);
-+ if (err) {
-+ message = "writing feature-rx-copy";
-+ goto abort_transaction;
-+ }
-+
-+ /*
-+ * We don't support rx-flip path (except old guests who don't
-+ * grok this feature flag).
-+ */
-+ err = xenbus_printf(xbt, dev->nodename,
-+ "feature-rx-flip", "%d", 0);
-+ if (err) {
-+ message = "writing feature-rx-flip";
-+ goto abort_transaction;
-+ }
-+
-+ /* We support data smart poll mechanism */
-+ err = xenbus_printf(xbt, dev->nodename,
-+ "feature-smart-poll", "%d", 1);
-+ if (err) {
-+ message = "writing feature-smart-poll";
-+ goto abort_transaction;
-+ }
-+
-+ err = xenbus_transaction_end(xbt, 0);
-+ } while (err == -EAGAIN);
-+
-+ if (err) {
-+ xenbus_dev_fatal(dev, err, "completing transaction");
-+ goto fail;
-+ }
-+
-+ //netback_probe_accelerators(be, dev);
-+
-+ err = xenbus_switch_state(dev, XenbusStateInitWait);
-+ if (err)
-+ goto fail;
-+
-+ /* This kicks hotplug scripts, so do it immediately. */
-+ backend_create_netif(be);
-+
-+ return 0;
-+
-+abort_transaction:
-+ xenbus_transaction_end(xbt, 1);
-+ xenbus_dev_fatal(dev, err, "%s", message);
-+fail:
-+ DPRINTK("failed");
-+ netback_remove(dev);
-+ return err;
-+}
-+
-+
-+/**
-+ * Handle the creation of the hotplug script environment. We add the script
-+ * and vif variables to the environment, for the benefit of the vif-* hotplug
-+ * scripts.
-+ */
-+static int netback_uevent(struct xenbus_device *xdev, struct kobj_uevent_env *env)
-+{
-+ struct backend_info *be = dev_get_drvdata(&xdev->dev);
-+ char *val;
-+
-+ DPRINTK("netback_uevent");
-+
-+ val = xenbus_read(XBT_NIL, xdev->nodename, "script", NULL);
-+ if (IS_ERR(val)) {
-+ int err = PTR_ERR(val);
-+ xenbus_dev_fatal(xdev, err, "reading script");
-+ return err;
-+ }
-+ else {
-+ if (add_uevent_var(env, "script=%s", val)) {
-+ kfree(val);
-+ return -ENOMEM;
-+ }
-+ kfree(val);
-+ }
-+
-+ if (be && be->netif && add_uevent_var(env, "vif=%s", be->netif->dev->name))
-+ return -ENOMEM;
-+
-+ return 0;
-+}
-+
-+
-+static void backend_create_netif(struct backend_info *be)
-+{
-+ int err;
-+ long handle;
-+ struct xenbus_device *dev = be->dev;
-+
-+ if (be->netif != NULL)
-+ return;
-+
-+ err = xenbus_scanf(XBT_NIL, dev->nodename, "handle", "%li", &handle);
-+ if (err != 1) {
-+ xenbus_dev_fatal(dev, err, "reading handle");
-+ return;
-+ }
-+
-+ be->netif = netif_alloc(&dev->dev, dev->otherend_id, handle);
-+ if (IS_ERR(be->netif)) {
-+ err = PTR_ERR(be->netif);
-+ be->netif = NULL;
-+ xenbus_dev_fatal(dev, err, "creating interface");
-+ return;
-+ }
-+
-+ kobject_uevent(&dev->dev.kobj, KOBJ_ONLINE);
-+}
-+
-+
-+static void disconnect_backend(struct xenbus_device *dev)
-+{
-+ struct backend_info *be = dev_get_drvdata(&dev->dev);
-+
-+ if (be->netif) {
-+ xenbus_rm(XBT_NIL, dev->nodename, "hotplug-status");
-+ netif_disconnect(be->netif);
-+ be->netif = NULL;
-+ }
-+}
-+
-+/**
-+ * Callback received when the frontend's state changes.
-+ */
-+static void frontend_changed(struct xenbus_device *dev,
-+ enum xenbus_state frontend_state)
-+{
-+ struct backend_info *be = dev_get_drvdata(&dev->dev);
-+
-+ DPRINTK("%s", xenbus_strstate(frontend_state));
-+
-+ be->frontend_state = frontend_state;
-+
-+ switch (frontend_state) {
-+ case XenbusStateInitialising:
-+ if (dev->state == XenbusStateClosed) {
-+ printk(KERN_INFO "%s: %s: prepare for reconnect\n",
-+ __FUNCTION__, dev->nodename);
-+ xenbus_switch_state(dev, XenbusStateInitWait);
-+ }
-+ break;
-+
-+ case XenbusStateInitialised:
-+ break;
-+
-+ case XenbusStateConnected:
-+ if (dev->state == XenbusStateConnected)
-+ break;
-+ backend_create_netif(be);
-+ if (be->netif)
-+ connect(be);
-+ break;
-+
-+ case XenbusStateClosing:
-+ if (be->netif)
-+ kobject_uevent(&dev->dev.kobj, KOBJ_OFFLINE);
-+ disconnect_backend(dev);
-+ xenbus_switch_state(dev, XenbusStateClosing);
-+ break;
-+
-+ case XenbusStateClosed:
-+ xenbus_switch_state(dev, XenbusStateClosed);
-+ if (xenbus_dev_is_online(dev))
-+ break;
-+ /* fall through if not online */
-+ case XenbusStateUnknown:
-+ device_unregister(&dev->dev);
-+ break;
-+
-+ default:
-+ xenbus_dev_fatal(dev, -EINVAL, "saw state %d at frontend",
-+ frontend_state);
-+ break;
-+ }
-+}
-+
-+
-+static void xen_net_read_rate(struct xenbus_device *dev,
-+ unsigned long *bytes, unsigned long *usec)
-+{
-+ char *s, *e;
-+ unsigned long b, u;
-+ char *ratestr;
-+
-+ /* Default to unlimited bandwidth. */
-+ *bytes = ~0UL;
-+ *usec = 0;
-+
-+ ratestr = xenbus_read(XBT_NIL, dev->nodename, "rate", NULL);
-+ if (IS_ERR(ratestr))
-+ return;
-+
-+ s = ratestr;
-+ b = simple_strtoul(s, &e, 10);
-+ if ((s == e) || (*e != ','))
-+ goto fail;
-+
-+ s = e + 1;
-+ u = simple_strtoul(s, &e, 10);
-+ if ((s == e) || (*e != '\0'))
-+ goto fail;
-+
-+ *bytes = b;
-+ *usec = u;
-+
-+ kfree(ratestr);
-+ return;
-+
-+ fail:
-+ WPRINTK("Failed to parse network rate limit. Traffic unlimited.\n");
-+ kfree(ratestr);
-+}
-+
-+static int xen_net_read_mac(struct xenbus_device *dev, u8 mac[])
-+{
-+ char *s, *e, *macstr;
-+ int i;
-+
-+ macstr = s = xenbus_read(XBT_NIL, dev->nodename, "mac", NULL);
-+ if (IS_ERR(macstr))
-+ return PTR_ERR(macstr);
-+
-+ for (i = 0; i < ETH_ALEN; i++) {
-+ mac[i] = simple_strtoul(s, &e, 16);
-+ if ((s == e) || (*e != ((i == ETH_ALEN-1) ? '\0' : ':'))) {
-+ kfree(macstr);
-+ return -ENOENT;
-+ }
-+ s = e+1;
-+ }
-+
-+ kfree(macstr);
-+ return 0;
-+}
-+
-+static void unregister_hotplug_status_watch(struct backend_info *be)
-+{
-+ if (be->have_hotplug_status_watch) {
-+ unregister_xenbus_watch(&be->hotplug_status_watch);
-+ kfree(be->hotplug_status_watch.node);
-+ }
-+ be->have_hotplug_status_watch = 0;
-+}
-+
-+static void hotplug_status_changed(struct xenbus_watch *watch,
-+ const char **vec,
-+ unsigned int vec_size)
-+{
-+ struct backend_info *be = container_of(watch,
-+ struct backend_info,
-+ hotplug_status_watch);
-+ char *str;
-+ unsigned int len;
-+
-+ str = xenbus_read(XBT_NIL, be->dev->nodename, "hotplug-status", &len);
-+ if (IS_ERR(str))
-+ return;
-+ if (len == sizeof("connected")-1 && !memcmp(str, "connected", len)) {
-+ xenbus_switch_state(be->dev, XenbusStateConnected);
-+ /* Not interested in this watch anymore. */
-+ unregister_hotplug_status_watch(be);
-+ }
-+ kfree(str);
-+}
-+
-+static void connect(struct backend_info *be)
-+{
-+ int err;
-+ struct xenbus_device *dev = be->dev;
-+
-+ err = connect_rings(be);
-+ if (err)
-+ return;
-+
-+ err = xen_net_read_mac(dev, be->netif->fe_dev_addr);
-+ if (err) {
-+ xenbus_dev_fatal(dev, err, "parsing %s/mac", dev->nodename);
-+ return;
-+ }
-+
-+ xen_net_read_rate(dev, &be->netif->credit_bytes,
-+ &be->netif->credit_usec);
-+ be->netif->remaining_credit = be->netif->credit_bytes;
-+
-+ unregister_hotplug_status_watch(be);
-+ err = xenbus_watch_pathfmt(dev, &be->hotplug_status_watch,
-+ hotplug_status_changed,
-+ "%s/%s", dev->nodename, "hotplug-status");
-+ if (err) {
-+ /* Switch now, since we can't do a watch. */
-+ xenbus_switch_state(dev, XenbusStateConnected);
-+ } else {
-+ be->have_hotplug_status_watch = 1;
-+ }
-+
-+ netif_wake_queue(be->netif->dev);
-+}
-+
-+
-+static int connect_rings(struct backend_info *be)
-+{
-+ struct xen_netif *netif = be->netif;
-+ struct xenbus_device *dev = be->dev;
-+ unsigned long tx_ring_ref, rx_ring_ref;
-+ unsigned int evtchn, rx_copy;
-+ int err;
-+ int val;
-+
-+ DPRINTK("");
-+
-+ err = xenbus_gather(XBT_NIL, dev->otherend,
-+ "tx-ring-ref", "%lu", &tx_ring_ref,
-+ "rx-ring-ref", "%lu", &rx_ring_ref,
-+ "event-channel", "%u", &evtchn, NULL);
-+ if (err) {
-+ xenbus_dev_fatal(dev, err,
-+ "reading %s/ring-ref and event-channel",
-+ dev->otherend);
-+ return err;
-+ }
-+
-+ err = xenbus_scanf(XBT_NIL, dev->otherend, "request-rx-copy", "%u",
-+ &rx_copy);
-+ if (err == -ENOENT) {
-+ err = 0;
-+ rx_copy = 0;
-+ }
-+ if (err < 0) {
-+ xenbus_dev_fatal(dev, err, "reading %s/request-rx-copy",
-+ dev->otherend);
-+ return err;
-+ }
-+ if (!rx_copy)
-+ return -EOPNOTSUPP;
-+
-+ if (netif->dev->tx_queue_len != 0) {
-+ if (xenbus_scanf(XBT_NIL, dev->otherend,
-+ "feature-rx-notify", "%d", &val) < 0)
-+ val = 0;
-+ if (val)
-+ netif->can_queue = 1;
-+ else
-+ /* Must be non-zero for pfifo_fast to work. */
-+ netif->dev->tx_queue_len = 1;
-+ }
-+
-+ if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-sg",
-+ "%d", &val) < 0)
-+ val = 0;
-+ netif->can_sg = !!val;
-+
-+ if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-gso-tcpv4",
-+ "%d", &val) < 0)
-+ val = 0;
-+ netif->gso = !!val;
-+
-+ if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-gso-tcpv4-prefix",
-+ "%d", &val) < 0)
-+ val = 0;
-+ netif->gso_prefix = !!val;
-+
-+ if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-no-csum-offload",
-+ "%d", &val) < 0)
-+ val = 0;
-+ netif->csum = !val;
-+
-+ if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-smart-poll",
-+ "%d", &val) < 0)
-+ val = 0;
-+ netif->smart_poll = !!val;
-+
-+ /* Set dev->features */
-+ netif_set_features(netif);
-+
-+ /* Map the shared frame, irq etc. */
-+ err = netif_map(netif, tx_ring_ref, rx_ring_ref, evtchn);
-+ if (err) {
-+ xenbus_dev_fatal(dev, err,
-+ "mapping shared-frames %lu/%lu port %u",
-+ tx_ring_ref, rx_ring_ref, evtchn);
-+ return err;
-+ }
-+ return 0;
-+}
-+
-+
-+/* ** Driver Registration ** */
-+
-+
-+static const struct xenbus_device_id netback_ids[] = {
-+ { "vif" },
-+ { "" }
-+};
-+
-+
-+static struct xenbus_driver netback = {
-+ .name = "vif",
-+ .owner = THIS_MODULE,
-+ .ids = netback_ids,
-+ .probe = netback_probe,
-+ .remove = netback_remove,
-+ .uevent = netback_uevent,
-+ .otherend_changed = frontend_changed,
-+};
-+
-+
-+int netif_xenbus_init(void)
-+{
-+ printk(KERN_CRIT "registering netback\n");
-+ return xenbus_register_backend(&netback);
-+}
-diff --git a/drivers/xen/pci.c b/drivers/xen/pci.c
-new file mode 100644
-index 0000000..ae693e7
---- /dev/null
-+++ b/drivers/xen/pci.c
-@@ -0,0 +1,124 @@
-+/*
-+ * Copyright (c) 2009, Intel Corporation.
-+ *
-+ * This program is free software; you can redistribute it and/or modify it
-+ * under the terms and conditions of the GNU General Public License,
-+ * version 2, as published by the Free Software Foundation.
-+ *
-+ * This program is distributed in the hope it will be useful, but WITHOUT
-+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
-+ * more details.
-+ *
-+ * You should have received a copy of the GNU General Public License along with
-+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-+ * Place - Suite 330, Boston, MA 02111-1307 USA.
-+ *
-+ * Author: Weidong Han <weidong.han@intel.com>
-+ */
-+
-+#include <linux/pci.h>
-+
-+#include <xen/interface/xen.h>
-+#include <xen/interface/physdev.h>
-+
-+#include <asm/xen/hypervisor.h>
-+#include <asm/xen/hypercall.h>
-+
-+#include "../pci/pci.h"
-+
-+
-+#ifdef CONFIG_PCI_IOV
-+#define HANDLE_PCI_IOV 1
-+#else
-+#define HANDLE_PCI_IOV 0
-+#endif
-+
-+static int xen_add_device(struct device *dev)
-+{
-+ int r;
-+ struct pci_dev *pci_dev = to_pci_dev(dev);
-+
-+ if (HANDLE_PCI_IOV && pci_dev->is_virtfn) {
-+ struct physdev_manage_pci_ext manage_pci_ext = {
-+ .bus = pci_dev->bus->number,
-+ .devfn = pci_dev->devfn,
-+ .is_virtfn = 1,
-+#ifdef CONFIG_PCI_IOV
-+ .physfn.bus = pci_dev->physfn->bus->number,
-+ .physfn.devfn = pci_dev->physfn->devfn,
-+#endif
-+ };
-+
-+ r = HYPERVISOR_physdev_op(PHYSDEVOP_manage_pci_add_ext,
-+ &manage_pci_ext);
-+ } else if (pci_ari_enabled(pci_dev->bus) && PCI_SLOT(pci_dev->devfn)) {
-+ struct physdev_manage_pci_ext manage_pci_ext = {
-+ .bus = pci_dev->bus->number,
-+ .devfn = pci_dev->devfn,
-+ .is_extfn = 1,
-+ };
-+
-+ r = HYPERVISOR_physdev_op(PHYSDEVOP_manage_pci_add_ext,
-+ &manage_pci_ext);
-+ } else {
-+ struct physdev_manage_pci manage_pci = {
-+ .bus = pci_dev->bus->number,
-+ .devfn = pci_dev->devfn,
-+ };
-+
-+ r = HYPERVISOR_physdev_op(PHYSDEVOP_manage_pci_add,
-+ &manage_pci);
-+ }
-+
-+ return r;
-+}
-+
-+static int xen_remove_device(struct device *dev)
-+{
-+ int r;
-+ struct pci_dev *pci_dev = to_pci_dev(dev);
-+ struct physdev_manage_pci manage_pci;
-+
-+ manage_pci.bus = pci_dev->bus->number;
-+ manage_pci.devfn = pci_dev->devfn;
-+
-+ r = HYPERVISOR_physdev_op(PHYSDEVOP_manage_pci_remove,
-+ &manage_pci);
-+
-+ return r;
-+}
-+
-+static int xen_pci_notifier(struct notifier_block *nb,
-+ unsigned long action, void *data)
-+{
-+ struct device *dev = data;
-+ int r = 0;
-+
-+ switch (action) {
-+ case BUS_NOTIFY_ADD_DEVICE:
-+ r = xen_add_device(dev);
-+ break;
-+ case BUS_NOTIFY_DEL_DEVICE:
-+ r = xen_remove_device(dev);
-+ break;
-+ default:
-+ break;
-+ }
-+
-+ return r;
-+}
-+
-+struct notifier_block device_nb = {
-+ .notifier_call = xen_pci_notifier,
-+};
-+
-+static int __init register_xen_pci_notifier(void)
-+{
-+ if (!xen_pv_domain())
-+ return 0;
-+
-+ return bus_register_notifier(&pci_bus_type, &device_nb);
-+}
-+
-+arch_initcall(register_xen_pci_notifier);
-diff --git a/drivers/xen/pciback/Makefile b/drivers/xen/pciback/Makefile
-new file mode 100644
-index 0000000..38bc123
---- /dev/null
-+++ b/drivers/xen/pciback/Makefile
-@@ -0,0 +1,17 @@
-+obj-$(CONFIG_XEN_PCIDEV_BACKEND) += xen-pciback.o
-+
-+xen-pciback-y := pci_stub.o pciback_ops.o xenbus.o
-+xen-pciback-y += conf_space.o conf_space_header.o \
-+ conf_space_capability.o \
-+ conf_space_capability_vpd.o \
-+ conf_space_capability_pm.o \
-+ conf_space_quirks.o
-+xen-pciback-$(CONFIG_PCI_MSI) += conf_space_capability_msi.o
-+xen-pciback-$(CONFIG_XEN_PCIDEV_BACKEND_VPCI) += vpci.o
-+xen-pciback-$(CONFIG_XEN_PCIDEV_BACKEND_SLOT) += slot.o
-+xen-pciback-$(CONFIG_XEN_PCIDEV_BACKEND_PASS) += passthrough.o
-+xen-pciback-$(CONFIG_XEN_PCIDEV_BACKEND_CONTROLLER) += controller.o
-+
-+ifeq ($(CONFIG_XEN_PCIDEV_BE_DEBUG),y)
-+EXTRA_CFLAGS += -DDEBUG
-+endif
-diff --git a/drivers/xen/pciback/conf_space.c b/drivers/xen/pciback/conf_space.c
-new file mode 100644
-index 0000000..370c18e
---- /dev/null
-+++ b/drivers/xen/pciback/conf_space.c
-@@ -0,0 +1,435 @@
-+/*
-+ * PCI Backend - Functions for creating a virtual configuration space for
-+ * exported PCI Devices.
-+ * It's dangerous to allow PCI Driver Domains to change their
-+ * device's resources (memory, i/o ports, interrupts). We need to
-+ * restrict changes to certain PCI Configuration registers:
-+ * BARs, INTERRUPT_PIN, most registers in the header...
-+ *
-+ * Author: Ryan Wilson <hap9@epoch.ncsc.mil>
-+ */
-+
-+#include <linux/kernel.h>
-+#include <linux/pci.h>
-+#include "pciback.h"
-+#include "conf_space.h"
-+#include "conf_space_quirks.h"
-+
-+static int permissive;
-+module_param(permissive, bool, 0644);
-+
-+#define DEFINE_PCI_CONFIG(op, size, type) \
-+int pciback_##op##_config_##size \
-+(struct pci_dev *dev, int offset, type value, void *data) \
-+{ \
-+ return pci_##op##_config_##size(dev, offset, value); \
-+}
-+
-+DEFINE_PCI_CONFIG(read, byte, u8 *)
-+DEFINE_PCI_CONFIG(read, word, u16 *)
-+DEFINE_PCI_CONFIG(read, dword, u32 *)
-+
-+DEFINE_PCI_CONFIG(write, byte, u8)
-+DEFINE_PCI_CONFIG(write, word, u16)
-+DEFINE_PCI_CONFIG(write, dword, u32)
-+
-+static int conf_space_read(struct pci_dev *dev,
-+ const struct config_field_entry *entry,
-+ int offset, u32 *value)
-+{
-+ int ret = 0;
-+ const struct config_field *field = entry->field;
-+
-+ *value = 0;
-+
-+ switch (field->size) {
-+ case 1:
-+ if (field->u.b.read)
-+ ret = field->u.b.read(dev, offset, (u8 *) value,
-+ entry->data);
-+ break;
-+ case 2:
-+ if (field->u.w.read)
-+ ret = field->u.w.read(dev, offset, (u16 *) value,
-+ entry->data);
-+ break;
-+ case 4:
-+ if (field->u.dw.read)
-+ ret = field->u.dw.read(dev, offset, value, entry->data);
-+ break;
-+ }
-+ return ret;
-+}
-+
-+static int conf_space_write(struct pci_dev *dev,
-+ const struct config_field_entry *entry,
-+ int offset, u32 value)
-+{
-+ int ret = 0;
-+ const struct config_field *field = entry->field;
-+
-+ switch (field->size) {
-+ case 1:
-+ if (field->u.b.write)
-+ ret = field->u.b.write(dev, offset, (u8) value,
-+ entry->data);
-+ break;
-+ case 2:
-+ if (field->u.w.write)
-+ ret = field->u.w.write(dev, offset, (u16) value,
-+ entry->data);
-+ break;
-+ case 4:
-+ if (field->u.dw.write)
-+ ret = field->u.dw.write(dev, offset, value,
-+ entry->data);
-+ break;
-+ }
-+ return ret;
-+}
-+
-+static inline u32 get_mask(int size)
-+{
-+ if (size == 1)
-+ return 0xff;
-+ else if (size == 2)
-+ return 0xffff;
-+ else
-+ return 0xffffffff;
-+}
-+
-+static inline int valid_request(int offset, int size)
-+{
-+ /* Validate request (no un-aligned requests) */
-+ if ((size == 1 || size == 2 || size == 4) && (offset % size) == 0)
-+ return 1;
-+ return 0;
-+}
-+
-+static inline u32 merge_value(u32 val, u32 new_val, u32 new_val_mask,
-+ int offset)
-+{
-+ if (offset >= 0) {
-+ new_val_mask <<= (offset * 8);
-+ new_val <<= (offset * 8);
-+ } else {
-+ new_val_mask >>= (offset * -8);
-+ new_val >>= (offset * -8);
-+ }
-+ val = (val & ~new_val_mask) | (new_val & new_val_mask);
-+
-+ return val;
-+}
-+
-+static int pcibios_err_to_errno(int err)
-+{
-+ switch (err) {
-+ case PCIBIOS_SUCCESSFUL:
-+ return XEN_PCI_ERR_success;
-+ case PCIBIOS_DEVICE_NOT_FOUND:
-+ return XEN_PCI_ERR_dev_not_found;
-+ case PCIBIOS_BAD_REGISTER_NUMBER:
-+ return XEN_PCI_ERR_invalid_offset;
-+ case PCIBIOS_FUNC_NOT_SUPPORTED:
-+ return XEN_PCI_ERR_not_implemented;
-+ case PCIBIOS_SET_FAILED:
-+ return XEN_PCI_ERR_access_denied;
-+ }
-+ return err;
-+}
-+
-+int pciback_config_read(struct pci_dev *dev, int offset, int size,
-+ u32 *ret_val)
-+{
-+ int err = 0;
-+ struct pciback_dev_data *dev_data = pci_get_drvdata(dev);
-+ const struct config_field_entry *cfg_entry;
-+ const struct config_field *field;
-+ int req_start, req_end, field_start, field_end;
-+ /* if read fails for any reason, return 0
-+ * (as if device didn't respond) */
-+ u32 value = 0, tmp_val;
-+
-+ if (unlikely(verbose_request))
-+ printk(KERN_DEBUG "pciback: %s: read %d bytes at 0x%x\n",
-+ pci_name(dev), size, offset);
-+
-+ if (!valid_request(offset, size)) {
-+ err = XEN_PCI_ERR_invalid_offset;
-+ goto out;
-+ }
-+
-+ /* Get the real value first, then modify as appropriate */
-+ switch (size) {
-+ case 1:
-+ err = pci_read_config_byte(dev, offset, (u8 *) &value);
-+ break;
-+ case 2:
-+ err = pci_read_config_word(dev, offset, (u16 *) &value);
-+ break;
-+ case 4:
-+ err = pci_read_config_dword(dev, offset, &value);
-+ break;
-+ }
-+
-+ list_for_each_entry(cfg_entry, &dev_data->config_fields, list) {
-+ field = cfg_entry->field;
-+
-+ req_start = offset;
-+ req_end = offset + size;
-+ field_start = OFFSET(cfg_entry);
-+ field_end = OFFSET(cfg_entry) + field->size;
-+
-+ if ((req_start >= field_start && req_start < field_end)
-+ || (req_end > field_start && req_end <= field_end)) {
-+ err = conf_space_read(dev, cfg_entry, field_start,
-+ &tmp_val);
-+ if (err)
-+ goto out;
-+
-+ value = merge_value(value, tmp_val,
-+ get_mask(field->size),
-+ field_start - req_start);
-+ }
-+ }
-+
-+out:
-+ if (unlikely(verbose_request))
-+ printk(KERN_DEBUG "pciback: %s: read %d bytes at 0x%x = %x\n",
-+ pci_name(dev), size, offset, value);
-+
-+ *ret_val = value;
-+ return pcibios_err_to_errno(err);
-+}
-+
-+int pciback_config_write(struct pci_dev *dev, int offset, int size, u32 value)
-+{
-+ int err = 0, handled = 0;
-+ struct pciback_dev_data *dev_data = pci_get_drvdata(dev);
-+ const struct config_field_entry *cfg_entry;
-+ const struct config_field *field;
-+ u32 tmp_val;
-+ int req_start, req_end, field_start, field_end;
-+
-+ if (unlikely(verbose_request))
-+ printk(KERN_DEBUG
-+ "pciback: %s: write request %d bytes at 0x%x = %x\n",
-+ pci_name(dev), size, offset, value);
-+
-+ if (!valid_request(offset, size))
-+ return XEN_PCI_ERR_invalid_offset;
-+
-+ list_for_each_entry(cfg_entry, &dev_data->config_fields, list) {
-+ field = cfg_entry->field;
-+
-+ req_start = offset;
-+ req_end = offset + size;
-+ field_start = OFFSET(cfg_entry);
-+ field_end = OFFSET(cfg_entry) + field->size;
-+
-+ if ((req_start >= field_start && req_start < field_end)
-+ || (req_end > field_start && req_end <= field_end)) {
-+ tmp_val = 0;
-+
-+ err = pciback_config_read(dev, field_start,
-+ field->size, &tmp_val);
-+ if (err)
-+ break;
-+
-+ tmp_val = merge_value(tmp_val, value, get_mask(size),
-+ req_start - field_start);
-+
-+ err = conf_space_write(dev, cfg_entry, field_start,
-+ tmp_val);
-+
-+ /* handled is set true here, but not every byte
-+ * may have been written! Properly detecting if
-+ * every byte is handled is unnecessary as the
-+ * flag is used to detect devices that need
-+ * special helpers to work correctly.
-+ */
-+ handled = 1;
-+ }
-+ }
-+
-+ if (!handled && !err) {
-+ /* By default, anything not specificially handled above is
-+ * read-only. The permissive flag changes this behavior so
-+ * that anything not specifically handled above is writable.
-+ * This means that some fields may still be read-only because
-+ * they have entries in the config_field list that intercept
-+ * the write and do nothing. */
-+ if (dev_data->permissive || permissive) {
-+ switch (size) {
-+ case 1:
-+ err = pci_write_config_byte(dev, offset,
-+ (u8) value);
-+ break;
-+ case 2:
-+ err = pci_write_config_word(dev, offset,
-+ (u16) value);
-+ break;
-+ case 4:
-+ err = pci_write_config_dword(dev, offset,
-+ (u32) value);
-+ break;
-+ }
-+ } else if (!dev_data->warned_on_write) {
-+ dev_data->warned_on_write = 1;
-+ dev_warn(&dev->dev, "Driver tried to write to a "
-+ "read-only configuration space field at offset"
-+ " 0x%x, size %d. This may be harmless, but if "
-+ "you have problems with your device:\n"
-+ "1) see permissive attribute in sysfs\n"
-+ "2) report problems to the xen-devel "
-+ "mailing list along with details of your "
-+ "device obtained from lspci.\n", offset, size);
-+ }
-+ }
-+
-+ return pcibios_err_to_errno(err);
-+}
-+
-+void pciback_config_free_dyn_fields(struct pci_dev *dev)
-+{
-+ struct pciback_dev_data *dev_data = pci_get_drvdata(dev);
-+ struct config_field_entry *cfg_entry, *t;
-+ const struct config_field *field;
-+
-+ dev_dbg(&dev->dev, "free-ing dynamically allocated virtual "
-+ "configuration space fields\n");
-+ if (!dev_data)
-+ return;
-+
-+ list_for_each_entry_safe(cfg_entry, t, &dev_data->config_fields, list) {
-+ field = cfg_entry->field;
-+
-+ if (field->clean) {
-+ field->clean((struct config_field *)field);
-+
-+ kfree(cfg_entry->data);
-+
-+ list_del(&cfg_entry->list);
-+ kfree(cfg_entry);
-+ }
-+
-+ }
-+}
-+
-+void pciback_config_reset_dev(struct pci_dev *dev)
-+{
-+ struct pciback_dev_data *dev_data = pci_get_drvdata(dev);
-+ const struct config_field_entry *cfg_entry;
-+ const struct config_field *field;
-+
-+ dev_dbg(&dev->dev, "resetting virtual configuration space\n");
-+ if (!dev_data)
-+ return;
-+
-+ list_for_each_entry(cfg_entry, &dev_data->config_fields, list) {
-+ field = cfg_entry->field;
-+
-+ if (field->reset)
-+ field->reset(dev, OFFSET(cfg_entry), cfg_entry->data);
-+ }
-+}
-+
-+void pciback_config_free_dev(struct pci_dev *dev)
-+{
-+ struct pciback_dev_data *dev_data = pci_get_drvdata(dev);
-+ struct config_field_entry *cfg_entry, *t;
-+ const struct config_field *field;
-+
-+ dev_dbg(&dev->dev, "free-ing virtual configuration space fields\n");
-+ if (!dev_data)
-+ return;
-+
-+ list_for_each_entry_safe(cfg_entry, t, &dev_data->config_fields, list) {
-+ list_del(&cfg_entry->list);
-+
-+ field = cfg_entry->field;
-+
-+ if (field->release)
-+ field->release(dev, OFFSET(cfg_entry), cfg_entry->data);
-+
-+ kfree(cfg_entry);
-+ }
-+}
-+
-+int pciback_config_add_field_offset(struct pci_dev *dev,
-+ const struct config_field *field,
-+ unsigned int base_offset)
-+{
-+ int err = 0;
-+ struct pciback_dev_data *dev_data = pci_get_drvdata(dev);
-+ struct config_field_entry *cfg_entry;
-+ void *tmp;
-+
-+ cfg_entry = kmalloc(sizeof(*cfg_entry), GFP_KERNEL);
-+ if (!cfg_entry) {
-+ err = -ENOMEM;
-+ goto out;
-+ }
-+
-+ cfg_entry->data = NULL;
-+ cfg_entry->field = field;
-+ cfg_entry->base_offset = base_offset;
-+
-+ /* silently ignore duplicate fields */
-+ err = pciback_field_is_dup(dev, OFFSET(cfg_entry));
-+ if (err)
-+ goto out;
-+
-+ if (field->init) {
-+ tmp = field->init(dev, OFFSET(cfg_entry));
-+
-+ if (IS_ERR(tmp)) {
-+ err = PTR_ERR(tmp);
-+ goto out;
-+ }
-+
-+ cfg_entry->data = tmp;
-+ }
-+
-+ dev_dbg(&dev->dev, "added config field at offset 0x%02x\n",
-+ OFFSET(cfg_entry));
-+ list_add_tail(&cfg_entry->list, &dev_data->config_fields);
-+
-+out:
-+ if (err)
-+ kfree(cfg_entry);
-+
-+ return err;
-+}
-+
-+/* This sets up the device's virtual configuration space to keep track of
-+ * certain registers (like the base address registers (BARs) so that we can
-+ * keep the client from manipulating them directly.
-+ */
-+int pciback_config_init_dev(struct pci_dev *dev)
-+{
-+ int err = 0;
-+ struct pciback_dev_data *dev_data = pci_get_drvdata(dev);
-+
-+ dev_dbg(&dev->dev, "initializing virtual configuration space\n");
-+
-+ INIT_LIST_HEAD(&dev_data->config_fields);
-+
-+ err = pciback_config_header_add_fields(dev);
-+ if (err)
-+ goto out;
-+
-+ err = pciback_config_capability_add_fields(dev);
-+ if (err)
-+ goto out;
-+
-+ err = pciback_config_quirks_init(dev);
-+
-+out:
-+ return err;
-+}
-+
-+int pciback_config_init(void)
-+{
-+ return pciback_config_capability_init();
-+}
-diff --git a/drivers/xen/pciback/conf_space.h b/drivers/xen/pciback/conf_space.h
-new file mode 100644
-index 0000000..50ebef2
---- /dev/null
-+++ b/drivers/xen/pciback/conf_space.h
-@@ -0,0 +1,126 @@
-+/*
-+ * PCI Backend - Common data structures for overriding the configuration space
-+ *
-+ * Author: Ryan Wilson <hap9@epoch.ncsc.mil>
-+ */
-+
-+#ifndef __XEN_PCIBACK_CONF_SPACE_H__
-+#define __XEN_PCIBACK_CONF_SPACE_H__
-+
-+#include <linux/list.h>
-+#include <linux/err.h>
-+
-+/* conf_field_init can return an errno in a ptr with ERR_PTR() */
-+typedef void *(*conf_field_init) (struct pci_dev *dev, int offset);
-+typedef void (*conf_field_reset) (struct pci_dev *dev, int offset, void *data);
-+typedef void (*conf_field_free) (struct pci_dev *dev, int offset, void *data);
-+
-+typedef int (*conf_dword_write) (struct pci_dev *dev, int offset, u32 value,
-+ void *data);
-+typedef int (*conf_word_write) (struct pci_dev *dev, int offset, u16 value,
-+ void *data);
-+typedef int (*conf_byte_write) (struct pci_dev *dev, int offset, u8 value,
-+ void *data);
-+typedef int (*conf_dword_read) (struct pci_dev *dev, int offset, u32 *value,
-+ void *data);
-+typedef int (*conf_word_read) (struct pci_dev *dev, int offset, u16 *value,
-+ void *data);
-+typedef int (*conf_byte_read) (struct pci_dev *dev, int offset, u8 *value,
-+ void *data);
-+
-+/* These are the fields within the configuration space which we
-+ * are interested in intercepting reads/writes to and changing their
-+ * values.
-+ */
-+struct config_field {
-+ unsigned int offset;
-+ unsigned int size;
-+ unsigned int mask;
-+ conf_field_init init;
-+ conf_field_reset reset;
-+ conf_field_free release;
-+ void (*clean) (struct config_field *field);
-+ union {
-+ struct {
-+ conf_dword_write write;
-+ conf_dword_read read;
-+ } dw;
-+ struct {
-+ conf_word_write write;
-+ conf_word_read read;
-+ } w;
-+ struct {
-+ conf_byte_write write;
-+ conf_byte_read read;
-+ } b;
-+ } u;
-+ struct list_head list;
-+};
-+
-+struct config_field_entry {
-+ struct list_head list;
-+ const struct config_field *field;
-+ unsigned int base_offset;
-+ void *data;
-+};
-+
-+#define OFFSET(cfg_entry) ((cfg_entry)->base_offset+(cfg_entry)->field->offset)
-+
-+/* Add fields to a device - the add_fields macro expects to get a pointer to
-+ * the first entry in an array (of which the ending is marked by size==0)
-+ */
-+int pciback_config_add_field_offset(struct pci_dev *dev,
-+ const struct config_field *field,
-+ unsigned int offset);
-+
-+static inline int pciback_config_add_field(struct pci_dev *dev,
-+ const struct config_field *field)
-+{
-+ return pciback_config_add_field_offset(dev, field, 0);
-+}
-+
-+static inline int pciback_config_add_fields(struct pci_dev *dev,
-+ const struct config_field *field)
-+{
-+ int i, err = 0;
-+ for (i = 0; field[i].size != 0; i++) {
-+ err = pciback_config_add_field(dev, &field[i]);
-+ if (err)
-+ break;
-+ }
-+ return err;
-+}
-+
-+static inline int pciback_config_add_fields_offset(struct pci_dev *dev,
-+ const struct config_field *field,
-+ unsigned int offset)
-+{
-+ int i, err = 0;
-+ for (i = 0; field[i].size != 0; i++) {
-+ err = pciback_config_add_field_offset(dev, &field[i], offset);
-+ if (err)
-+ break;
-+ }
-+ return err;
-+}
-+
-+/* Read/Write the real configuration space */
-+int pciback_read_config_byte(struct pci_dev *dev, int offset, u8 *value,
-+ void *data);
-+int pciback_read_config_word(struct pci_dev *dev, int offset, u16 *value,
-+ void *data);
-+int pciback_read_config_dword(struct pci_dev *dev, int offset, u32 *value,
-+ void *data);
-+int pciback_write_config_byte(struct pci_dev *dev, int offset, u8 value,
-+ void *data);
-+int pciback_write_config_word(struct pci_dev *dev, int offset, u16 value,
-+ void *data);
-+int pciback_write_config_dword(struct pci_dev *dev, int offset, u32 value,
-+ void *data);
-+
-+int pciback_config_capability_init(void);
-+
-+int pciback_config_header_add_fields(struct pci_dev *dev);
-+int pciback_config_capability_add_fields(struct pci_dev *dev);
-+
-+#endif /* __XEN_PCIBACK_CONF_SPACE_H__ */
-diff --git a/drivers/xen/pciback/conf_space_capability.c b/drivers/xen/pciback/conf_space_capability.c
-new file mode 100644
-index 0000000..0ea84d6
---- /dev/null
-+++ b/drivers/xen/pciback/conf_space_capability.c
-@@ -0,0 +1,66 @@
-+/*
-+ * PCI Backend - Handles the virtual fields found on the capability lists
-+ * in the configuration space.
-+ *
-+ * Author: Ryan Wilson <hap9@epoch.ncsc.mil>
-+ */
-+
-+#include <linux/kernel.h>
-+#include <linux/pci.h>
-+#include "pciback.h"
-+#include "conf_space.h"
-+#include "conf_space_capability.h"
-+
-+static LIST_HEAD(capabilities);
-+
-+static const struct config_field caplist_header[] = {
-+ {
-+ .offset = PCI_CAP_LIST_ID,
-+ .size = 2, /* encompass PCI_CAP_LIST_ID & PCI_CAP_LIST_NEXT */
-+ .u.w.read = pciback_read_config_word,
-+ .u.w.write = NULL,
-+ },
-+ {}
-+};
-+
-+static inline void register_capability(struct pciback_config_capability *cap)
-+{
-+ list_add_tail(&cap->cap_list, &capabilities);
-+}
-+
-+int pciback_config_capability_add_fields(struct pci_dev *dev)
-+{
-+ int err = 0;
-+ struct pciback_config_capability *cap;
-+ int cap_offset;
-+
-+ list_for_each_entry(cap, &capabilities, cap_list) {
-+ cap_offset = pci_find_capability(dev, cap->capability);
-+ if (cap_offset) {
-+ dev_dbg(&dev->dev, "Found capability 0x%x at 0x%x\n",
-+ cap->capability, cap_offset);
-+
-+ err = pciback_config_add_fields_offset(dev,
-+ caplist_header,
-+ cap_offset);
-+ if (err)
-+ goto out;
-+ err = pciback_config_add_fields_offset(dev,
-+ cap->fields,
-+ cap_offset);
-+ if (err)
-+ goto out;
-+ }
-+ }
-+
-+out:
-+ return err;
-+}
-+
-+int pciback_config_capability_init(void)
-+{
-+ register_capability(&pciback_config_capability_vpd);
-+ register_capability(&pciback_config_capability_pm);
-+
-+ return 0;
-+}
-diff --git a/drivers/xen/pciback/conf_space_capability.h b/drivers/xen/pciback/conf_space_capability.h
-new file mode 100644
-index 0000000..8da3ac4
---- /dev/null
-+++ b/drivers/xen/pciback/conf_space_capability.h
-@@ -0,0 +1,26 @@
-+/*
-+ * PCI Backend - Data structures for special overlays for structures on
-+ * the capability list.
-+ *
-+ * Author: Ryan Wilson <hap9@epoch.ncsc.mil>
-+ */
-+
-+#ifndef __PCIBACK_CONFIG_CAPABILITY_H__
-+#define __PCIBACK_CONFIG_CAPABILITY_H__
-+
-+#include <linux/pci.h>
-+#include <linux/list.h>
-+
-+struct pciback_config_capability {
-+ struct list_head cap_list;
-+
-+ int capability;
-+
-+ /* If the device has the capability found above, add these fields */
-+ const struct config_field *fields;
-+};
-+
-+extern struct pciback_config_capability pciback_config_capability_vpd;
-+extern struct pciback_config_capability pciback_config_capability_pm;
-+
-+#endif
-diff --git a/drivers/xen/pciback/conf_space_capability_msi.c b/drivers/xen/pciback/conf_space_capability_msi.c
-new file mode 100644
-index 0000000..b15131e
---- /dev/null
-+++ b/drivers/xen/pciback/conf_space_capability_msi.c
-@@ -0,0 +1,110 @@
-+/*
-+ * PCI Backend -- Configuration overlay for MSI capability
-+ */
-+#include <linux/pci.h>
-+#include <linux/slab.h>
-+#include "conf_space.h"
-+#include "conf_space_capability.h"
-+#include <xen/interface/io/pciif.h>
-+#include <xen/events.h>
-+#include "pciback.h"
-+
-+int pciback_enable_msi(struct pciback_device *pdev,
-+ struct pci_dev *dev, struct xen_pci_op *op)
-+{
-+ struct pciback_dev_data *dev_data;
-+ int otherend = pdev->xdev->otherend_id;
-+ int status;
-+
-+ if (unlikely(verbose_request))
-+ printk(KERN_DEBUG "pciback: %s: enable MSI\n", pci_name(dev));
-+
-+ status = pci_enable_msi(dev);
-+
-+ if (status) {
-+ printk(KERN_ERR "error enable msi for guest %x status %x\n",
-+ otherend, status);
-+ op->value = 0;
-+ return XEN_PCI_ERR_op_failed;
-+ }
-+
-+ /* The value the guest needs is actually the IDT vector, not the
-+ * the local domain's IRQ number. */
-+ op->value = xen_gsi_from_irq(dev->irq);
-+ dev_data = pci_get_drvdata(dev);
-+ if (dev_data)
-+ dev_data->ack_intr = 0;
-+
-+ return 0;
-+}
-+
-+int pciback_disable_msi(struct pciback_device *pdev,
-+ struct pci_dev *dev, struct xen_pci_op *op)
-+{
-+ struct pciback_dev_data *dev_data;
-+
-+ if (unlikely(verbose_request))
-+ printk(KERN_DEBUG "pciback: %s: disable MSI\n", pci_name(dev));
-+ pci_disable_msi(dev);
-+
-+ op->value = xen_gsi_from_irq(dev->irq);
-+ dev_data = pci_get_drvdata(dev);
-+ if (dev_data)
-+ dev_data->ack_intr = 1;
-+ return 0;
-+}
-+
-+int pciback_enable_msix(struct pciback_device *pdev,
-+ struct pci_dev *dev, struct xen_pci_op *op)
-+{
-+ struct pciback_dev_data *dev_data;
-+ int i, result;
-+ struct msix_entry *entries;
-+
-+ if (unlikely(verbose_request))
-+ printk(KERN_DEBUG "pciback: %s: enable MSI-X\n", pci_name(dev));
-+ if (op->value > SH_INFO_MAX_VEC)
-+ return -EINVAL;
-+
-+ entries = kmalloc(op->value * sizeof(*entries), GFP_KERNEL);
-+ if (entries == NULL)
-+ return -ENOMEM;
-+
-+ for (i = 0; i < op->value; i++) {
-+ entries[i].entry = op->msix_entries[i].entry;
-+ entries[i].vector = op->msix_entries[i].vector;
-+ }
-+
-+ result = pci_enable_msix(dev, entries, op->value);
-+
-+ for (i = 0; i < op->value; i++) {
-+ op->msix_entries[i].entry = entries[i].entry;
-+ op->msix_entries[i].vector =
-+ xen_gsi_from_irq(entries[i].vector);
-+ }
-+
-+ kfree(entries);
-+
-+ op->value = result;
-+ dev_data = pci_get_drvdata(dev);
-+ if (dev_data)
-+ dev_data->ack_intr = 0;
-+
-+ return result;
-+}
-+
-+int pciback_disable_msix(struct pciback_device *pdev,
-+ struct pci_dev *dev, struct xen_pci_op *op)
-+{
-+ struct pciback_dev_data *dev_data;
-+ if (unlikely(verbose_request))
-+ printk(KERN_DEBUG "pciback: %s: disable MSI-X\n", pci_name(dev));
-+ pci_disable_msix(dev);
-+
-+ op->value = xen_gsi_from_irq(dev->irq);
-+ dev_data = pci_get_drvdata(dev);
-+ if (dev_data)
-+ dev_data->ack_intr = 1;
-+ return 0;
-+}
-+
-diff --git a/drivers/xen/pciback/conf_space_capability_pm.c b/drivers/xen/pciback/conf_space_capability_pm.c
-new file mode 100644
-index 0000000..0442616
---- /dev/null
-+++ b/drivers/xen/pciback/conf_space_capability_pm.c
-@@ -0,0 +1,113 @@
-+/*
-+ * PCI Backend - Configuration space overlay for power management
-+ *
-+ * Author: Ryan Wilson <hap9@epoch.ncsc.mil>
-+ */
-+
-+#include <linux/pci.h>
-+#include "conf_space.h"
-+#include "conf_space_capability.h"
-+
-+static int pm_caps_read(struct pci_dev *dev, int offset, u16 *value,
-+ void *data)
-+{
-+ int err;
-+ u16 real_value;
-+
-+ err = pci_read_config_word(dev, offset, &real_value);
-+ if (err)
-+ goto out;
-+
-+ *value = real_value & ~PCI_PM_CAP_PME_MASK;
-+
-+out:
-+ return err;
-+}
-+
-+/* PM_OK_BITS specifies the bits that the driver domain is allowed to change.
-+ * Can't allow driver domain to enable PMEs - they're shared */
-+#define PM_OK_BITS (PCI_PM_CTRL_PME_STATUS|PCI_PM_CTRL_DATA_SEL_MASK)
-+
-+static int pm_ctrl_write(struct pci_dev *dev, int offset, u16 new_value,
-+ void *data)
-+{
-+ int err;
-+ u16 old_value;
-+ pci_power_t new_state, old_state;
-+
-+ err = pci_read_config_word(dev, offset, &old_value);
-+ if (err)
-+ goto out;
-+
-+ old_state = (pci_power_t)(old_value & PCI_PM_CTRL_STATE_MASK);
-+ new_state = (pci_power_t)(new_value & PCI_PM_CTRL_STATE_MASK);
-+
-+ new_value &= PM_OK_BITS;
-+ if ((old_value & PM_OK_BITS) != new_value) {
-+ new_value = (old_value & ~PM_OK_BITS) | new_value;
-+ err = pci_write_config_word(dev, offset, new_value);
-+ if (err)
-+ goto out;
-+ }
-+
-+ /* Let pci core handle the power management change */
-+ dev_dbg(&dev->dev, "set power state to %x\n", new_state);
-+ err = pci_set_power_state(dev, new_state);
-+ if (err) {
-+ err = PCIBIOS_SET_FAILED;
-+ goto out;
-+ }
-+
-+ out:
-+ return err;
-+}
-+
-+/* Ensure PMEs are disabled */
-+static void *pm_ctrl_init(struct pci_dev *dev, int offset)
-+{
-+ int err;
-+ u16 value;
-+
-+ err = pci_read_config_word(dev, offset, &value);
-+ if (err)
-+ goto out;
-+
-+ if (value & PCI_PM_CTRL_PME_ENABLE) {
-+ value &= ~PCI_PM_CTRL_PME_ENABLE;
-+ err = pci_write_config_word(dev, offset, value);
-+ }
-+
-+out:
-+ return ERR_PTR(err);
-+}
-+
-+static const struct config_field caplist_pm[] = {
-+ {
-+ .offset = PCI_PM_PMC,
-+ .size = 2,
-+ .u.w.read = pm_caps_read,
-+ },
-+ {
-+ .offset = PCI_PM_CTRL,
-+ .size = 2,
-+ .init = pm_ctrl_init,
-+ .u.w.read = pciback_read_config_word,
-+ .u.w.write = pm_ctrl_write,
-+ },
-+ {
-+ .offset = PCI_PM_PPB_EXTENSIONS,
-+ .size = 1,
-+ .u.b.read = pciback_read_config_byte,
-+ },
-+ {
-+ .offset = PCI_PM_DATA_REGISTER,
-+ .size = 1,
-+ .u.b.read = pciback_read_config_byte,
-+ },
-+ {}
-+};
-+
-+struct pciback_config_capability pciback_config_capability_pm = {
-+ .capability = PCI_CAP_ID_PM,
-+ .fields = caplist_pm,
-+};
-diff --git a/drivers/xen/pciback/conf_space_capability_vpd.c b/drivers/xen/pciback/conf_space_capability_vpd.c
-new file mode 100644
-index 0000000..e7b4d66
---- /dev/null
-+++ b/drivers/xen/pciback/conf_space_capability_vpd.c
-@@ -0,0 +1,40 @@
-+/*
-+ * PCI Backend - Configuration space overlay for Vital Product Data
-+ *
-+ * Author: Ryan Wilson <hap9@epoch.ncsc.mil>
-+ */
-+
-+#include <linux/pci.h>
-+#include "conf_space.h"
-+#include "conf_space_capability.h"
-+
-+static int vpd_address_write(struct pci_dev *dev, int offset, u16 value,
-+ void *data)
-+{
-+ /* Disallow writes to the vital product data */
-+ if (value & PCI_VPD_ADDR_F)
-+ return PCIBIOS_SET_FAILED;
-+ else
-+ return pci_write_config_word(dev, offset, value);
-+}
-+
-+static const struct config_field caplist_vpd[] = {
-+ {
-+ .offset = PCI_VPD_ADDR,
-+ .size = 2,
-+ .u.w.read = pciback_read_config_word,
-+ .u.w.write = vpd_address_write,
-+ },
-+ {
-+ .offset = PCI_VPD_DATA,
-+ .size = 4,
-+ .u.dw.read = pciback_read_config_dword,
-+ .u.dw.write = NULL,
-+ },
-+ {}
-+};
-+
-+struct pciback_config_capability pciback_config_capability_vpd = {
-+ .capability = PCI_CAP_ID_VPD,
-+ .fields = caplist_vpd,
-+};
-diff --git a/drivers/xen/pciback/conf_space_header.c b/drivers/xen/pciback/conf_space_header.c
-new file mode 100644
-index 0000000..cb450f4
---- /dev/null
-+++ b/drivers/xen/pciback/conf_space_header.c
-@@ -0,0 +1,385 @@
-+/*
-+ * PCI Backend - Handles the virtual fields in the configuration space headers.
-+ *
-+ * Author: Ryan Wilson <hap9@epoch.ncsc.mil>
-+ */
-+
-+#include <linux/kernel.h>
-+#include <linux/pci.h>
-+#include "pciback.h"
-+#include "conf_space.h"
-+
-+struct pci_bar_info {
-+ u32 val;
-+ u32 len_val;
-+ int which;
-+};
-+
-+#define is_enable_cmd(value) ((value)&(PCI_COMMAND_MEMORY|PCI_COMMAND_IO))
-+#define is_master_cmd(value) ((value)&PCI_COMMAND_MASTER)
-+
-+static int command_read(struct pci_dev *dev, int offset, u16 *value, void *data)
-+{
-+ int i;
-+ int ret;
-+
-+ ret = pciback_read_config_word(dev, offset, value, data);
-+ if (!atomic_read(&dev->enable_cnt))
-+ return ret;
-+
-+ for (i = 0; i < PCI_ROM_RESOURCE; i++) {
-+ if (dev->resource[i].flags & IORESOURCE_IO)
-+ *value |= PCI_COMMAND_IO;
-+ if (dev->resource[i].flags & IORESOURCE_MEM)
-+ *value |= PCI_COMMAND_MEMORY;
-+ }
-+
-+ return ret;
-+}
-+
-+static int command_write(struct pci_dev *dev, int offset, u16 value, void *data)
-+{
-+ struct pciback_dev_data *dev_data;
-+ int err;
-+
-+ dev_data = pci_get_drvdata(dev);
-+ if (!pci_is_enabled(dev) && is_enable_cmd(value)) {
-+ if (unlikely(verbose_request))
-+ printk(KERN_DEBUG "pciback: %s: enable\n",
-+ pci_name(dev));
-+ err = pci_enable_device(dev);
-+ if (err)
-+ return err;
-+ if (dev_data)
-+ dev_data->enable_intx = 1;
-+ } else if (pci_is_enabled(dev) && !is_enable_cmd(value)) {
-+ if (unlikely(verbose_request))
-+ printk(KERN_DEBUG "pciback: %s: disable\n",
-+ pci_name(dev));
-+ pci_disable_device(dev);
-+ if (dev_data)
-+ dev_data->enable_intx = 0;
-+ }
-+
-+ if (!dev->is_busmaster && is_master_cmd(value)) {
-+ if (unlikely(verbose_request))
-+ printk(KERN_DEBUG "pciback: %s: set bus master\n",
-+ pci_name(dev));
-+ pci_set_master(dev);
-+ }
-+
-+ if (value & PCI_COMMAND_INVALIDATE) {
-+ if (unlikely(verbose_request))
-+ printk(KERN_DEBUG
-+ "pciback: %s: enable memory-write-invalidate\n",
-+ pci_name(dev));
-+ err = pci_set_mwi(dev);
-+ if (err) {
-+ printk(KERN_WARNING
-+ "pciback: %s: cannot enable "
-+ "memory-write-invalidate (%d)\n",
-+ pci_name(dev), err);
-+ value &= ~PCI_COMMAND_INVALIDATE;
-+ }
-+ }
-+
-+ return pci_write_config_word(dev, offset, value);
-+}
-+
-+static int rom_write(struct pci_dev *dev, int offset, u32 value, void *data)
-+{
-+ struct pci_bar_info *bar = data;
-+
-+ if (unlikely(!bar)) {
-+ printk(KERN_WARNING "pciback: driver data not found for %s\n",
-+ pci_name(dev));
-+ return XEN_PCI_ERR_op_failed;
-+ }
-+
-+ /* A write to obtain the length must happen as a 32-bit write.
-+ * This does not (yet) support writing individual bytes
-+ */
-+ if (value == ~PCI_ROM_ADDRESS_ENABLE)
-+ bar->which = 1;
-+ else {
-+ u32 tmpval;
-+ pci_read_config_dword(dev, offset, &tmpval);
-+ if (tmpval != bar->val && value == bar->val) {
-+ /* Allow restoration of bar value. */
-+ pci_write_config_dword(dev, offset, bar->val);
-+ }
-+ bar->which = 0;
-+ }
-+
-+ /* Do we need to support enabling/disabling the rom address here? */
-+
-+ return 0;
-+}
-+
-+/* For the BARs, only allow writes which write ~0 or
-+ * the correct resource information
-+ * (Needed for when the driver probes the resource usage)
-+ */
-+static int bar_write(struct pci_dev *dev, int offset, u32 value, void *data)
-+{
-+ struct pci_bar_info *bar = data;
-+
-+ if (unlikely(!bar)) {
-+ printk(KERN_WARNING "pciback: driver data not found for %s\n",
-+ pci_name(dev));
-+ return XEN_PCI_ERR_op_failed;
-+ }
-+
-+ /* A write to obtain the length must happen as a 32-bit write.
-+ * This does not (yet) support writing individual bytes
-+ */
-+ if (value == ~0)
-+ bar->which = 1;
-+ else {
-+ u32 tmpval;
-+ pci_read_config_dword(dev, offset, &tmpval);
-+ if (tmpval != bar->val && value == bar->val) {
-+ /* Allow restoration of bar value. */
-+ pci_write_config_dword(dev, offset, bar->val);
-+ }
-+ bar->which = 0;
-+ }
-+
-+ return 0;
-+}
-+
-+static int bar_read(struct pci_dev *dev, int offset, u32 * value, void *data)
-+{
-+ struct pci_bar_info *bar = data;
-+
-+ if (unlikely(!bar)) {
-+ printk(KERN_WARNING "pciback: driver data not found for %s\n",
-+ pci_name(dev));
-+ return XEN_PCI_ERR_op_failed;
-+ }
-+
-+ *value = bar->which ? bar->len_val : bar->val;
-+
-+ return 0;
-+}
-+
-+static inline void read_dev_bar(struct pci_dev *dev,
-+ struct pci_bar_info *bar_info, int offset,
-+ u32 len_mask)
-+{
-+ int pos;
-+ struct resource *res = dev->resource;
-+
-+ if (offset == PCI_ROM_ADDRESS || offset == PCI_ROM_ADDRESS1)
-+ pos = PCI_ROM_RESOURCE;
-+ else {
-+ pos = (offset - PCI_BASE_ADDRESS_0) / 4;
-+ if (pos && ((res[pos - 1].flags & (PCI_BASE_ADDRESS_SPACE |
-+ PCI_BASE_ADDRESS_MEM_TYPE_MASK)) ==
-+ (PCI_BASE_ADDRESS_SPACE_MEMORY |
-+ PCI_BASE_ADDRESS_MEM_TYPE_64))) {
-+ bar_info->val = res[pos - 1].start >> 32;
-+ bar_info->len_val = res[pos - 1].end >> 32;
-+ return;
-+ }
-+ }
-+
-+ bar_info->val = res[pos].start |
-+ (res[pos].flags & PCI_REGION_FLAG_MASK);
-+ bar_info->len_val = res[pos].end - res[pos].start + 1;
-+}
-+
-+static void *bar_init(struct pci_dev *dev, int offset)
-+{
-+ struct pci_bar_info *bar = kmalloc(sizeof(*bar), GFP_KERNEL);
-+
-+ if (!bar)
-+ return ERR_PTR(-ENOMEM);
-+
-+ read_dev_bar(dev, bar, offset, ~0);
-+ bar->which = 0;
-+
-+ return bar;
-+}
-+
-+static void *rom_init(struct pci_dev *dev, int offset)
-+{
-+ struct pci_bar_info *bar = kmalloc(sizeof(*bar), GFP_KERNEL);
-+
-+ if (!bar)
-+ return ERR_PTR(-ENOMEM);
-+
-+ read_dev_bar(dev, bar, offset, ~PCI_ROM_ADDRESS_ENABLE);
-+ bar->which = 0;
-+
-+ return bar;
-+}
-+
-+static void bar_reset(struct pci_dev *dev, int offset, void *data)
-+{
-+ struct pci_bar_info *bar = data;
-+
-+ bar->which = 0;
-+}
-+
-+static void bar_release(struct pci_dev *dev, int offset, void *data)
-+{
-+ kfree(data);
-+}
-+
-+static int pciback_read_vendor(struct pci_dev *dev, int offset,
-+ u16 *value, void *data)
-+{
-+ *value = dev->vendor;
-+
-+ return 0;
-+}
-+
-+static int pciback_read_device(struct pci_dev *dev, int offset,
-+ u16 *value, void *data)
-+{
-+ *value = dev->device;
-+
-+ return 0;
-+}
-+
-+static int interrupt_read(struct pci_dev *dev, int offset, u8 * value,
-+ void *data)
-+{
-+ *value = (u8) dev->irq;
-+
-+ return 0;
-+}
-+
-+static int bist_write(struct pci_dev *dev, int offset, u8 value, void *data)
-+{
-+ u8 cur_value;
-+ int err;
-+
-+ err = pci_read_config_byte(dev, offset, &cur_value);
-+ if (err)
-+ goto out;
-+
-+ if ((cur_value & ~PCI_BIST_START) == (value & ~PCI_BIST_START)
-+ || value == PCI_BIST_START)
-+ err = pci_write_config_byte(dev, offset, value);
-+
-+out:
-+ return err;
-+}
-+
-+static const struct config_field header_common[] = {
-+ {
-+ .offset = PCI_VENDOR_ID,
-+ .size = 2,
-+ .u.w.read = pciback_read_vendor,
-+ },
-+ {
-+ .offset = PCI_DEVICE_ID,
-+ .size = 2,
-+ .u.w.read = pciback_read_device,
-+ },
-+ {
-+ .offset = PCI_COMMAND,
-+ .size = 2,
-+ .u.w.read = command_read,
-+ .u.w.write = command_write,
-+ },
-+ {
-+ .offset = PCI_INTERRUPT_LINE,
-+ .size = 1,
-+ .u.b.read = interrupt_read,
-+ },
-+ {
-+ .offset = PCI_INTERRUPT_PIN,
-+ .size = 1,
-+ .u.b.read = pciback_read_config_byte,
-+ },
-+ {
-+ /* Any side effects of letting driver domain control cache line? */
-+ .offset = PCI_CACHE_LINE_SIZE,
-+ .size = 1,
-+ .u.b.read = pciback_read_config_byte,
-+ .u.b.write = pciback_write_config_byte,
-+ },
-+ {
-+ .offset = PCI_LATENCY_TIMER,
-+ .size = 1,
-+ .u.b.read = pciback_read_config_byte,
-+ },
-+ {
-+ .offset = PCI_BIST,
-+ .size = 1,
-+ .u.b.read = pciback_read_config_byte,
-+ .u.b.write = bist_write,
-+ },
-+ {}
-+};
-+
-+#define CFG_FIELD_BAR(reg_offset) \
-+ { \
-+ .offset = reg_offset, \
-+ .size = 4, \
-+ .init = bar_init, \
-+ .reset = bar_reset, \
-+ .release = bar_release, \
-+ .u.dw.read = bar_read, \
-+ .u.dw.write = bar_write, \
-+ }
-+
-+#define CFG_FIELD_ROM(reg_offset) \
-+ { \
-+ .offset = reg_offset, \
-+ .size = 4, \
-+ .init = rom_init, \
-+ .reset = bar_reset, \
-+ .release = bar_release, \
-+ .u.dw.read = bar_read, \
-+ .u.dw.write = rom_write, \
-+ }
-+
-+static const struct config_field header_0[] = {
-+ CFG_FIELD_BAR(PCI_BASE_ADDRESS_0),
-+ CFG_FIELD_BAR(PCI_BASE_ADDRESS_1),
-+ CFG_FIELD_BAR(PCI_BASE_ADDRESS_2),
-+ CFG_FIELD_BAR(PCI_BASE_ADDRESS_3),
-+ CFG_FIELD_BAR(PCI_BASE_ADDRESS_4),
-+ CFG_FIELD_BAR(PCI_BASE_ADDRESS_5),
-+ CFG_FIELD_ROM(PCI_ROM_ADDRESS),
-+ {}
-+};
-+
-+static const struct config_field header_1[] = {
-+ CFG_FIELD_BAR(PCI_BASE_ADDRESS_0),
-+ CFG_FIELD_BAR(PCI_BASE_ADDRESS_1),
-+ CFG_FIELD_ROM(PCI_ROM_ADDRESS1),
-+ {}
-+};
-+
-+int pciback_config_header_add_fields(struct pci_dev *dev)
-+{
-+ int err;
-+
-+ err = pciback_config_add_fields(dev, header_common);
-+ if (err)
-+ goto out;
-+
-+ switch (dev->hdr_type) {
-+ case PCI_HEADER_TYPE_NORMAL:
-+ err = pciback_config_add_fields(dev, header_0);
-+ break;
-+
-+ case PCI_HEADER_TYPE_BRIDGE:
-+ err = pciback_config_add_fields(dev, header_1);
-+ break;
-+
-+ default:
-+ err = -EINVAL;
-+ printk(KERN_ERR "pciback: %s: Unsupported header type %d!\n",
-+ pci_name(dev), dev->hdr_type);
-+ break;
-+ }
-+
-+out:
-+ return err;
-+}
-diff --git a/drivers/xen/pciback/conf_space_quirks.c b/drivers/xen/pciback/conf_space_quirks.c
-new file mode 100644
-index 0000000..45c31fb
---- /dev/null
-+++ b/drivers/xen/pciback/conf_space_quirks.c
-@@ -0,0 +1,140 @@
-+/*
-+ * PCI Backend - Handle special overlays for broken devices.
-+ *
-+ * Author: Ryan Wilson <hap9@epoch.ncsc.mil>
-+ * Author: Chris Bookholt <hap10@epoch.ncsc.mil>
-+ */
-+
-+#include <linux/kernel.h>
-+#include <linux/pci.h>
-+#include "pciback.h"
-+#include "conf_space.h"
-+#include "conf_space_quirks.h"
-+
-+LIST_HEAD(pciback_quirks);
-+
-+static inline const struct pci_device_id *
-+match_one_device(const struct pci_device_id *id, const struct pci_dev *dev)
-+{
-+ if ((id->vendor == PCI_ANY_ID || id->vendor == dev->vendor) &&
-+ (id->device == PCI_ANY_ID || id->device == dev->device) &&
-+ (id->subvendor == PCI_ANY_ID ||
-+ id->subvendor == dev->subsystem_vendor) &&
-+ (id->subdevice == PCI_ANY_ID ||
-+ id->subdevice == dev->subsystem_device) &&
-+ !((id->class ^ dev->class) & id->class_mask))
-+ return id;
-+ return NULL;
-+}
-+
-+struct pciback_config_quirk *pciback_find_quirk(struct pci_dev *dev)
-+{
-+ struct pciback_config_quirk *tmp_quirk;
-+
-+ list_for_each_entry(tmp_quirk, &pciback_quirks, quirks_list)
-+ if (match_one_device(&tmp_quirk->devid, dev) != NULL)
-+ goto out;
-+ tmp_quirk = NULL;
-+ printk(KERN_DEBUG
-+ "quirk didn't match any device pciback knows about\n");
-+out:
-+ return tmp_quirk;
-+}
-+
-+static inline void register_quirk(struct pciback_config_quirk *quirk)
-+{
-+ list_add_tail(&quirk->quirks_list, &pciback_quirks);
-+}
-+
-+int pciback_field_is_dup(struct pci_dev *dev, unsigned int reg)
-+{
-+ int ret = 0;
-+ struct pciback_dev_data *dev_data = pci_get_drvdata(dev);
-+ struct config_field_entry *cfg_entry;
-+
-+ list_for_each_entry(cfg_entry, &dev_data->config_fields, list) {
-+ if (OFFSET(cfg_entry) == reg) {
-+ ret = 1;
-+ break;
-+ }
-+ }
-+ return ret;
-+}
-+
-+int pciback_config_quirks_add_field(struct pci_dev *dev, struct config_field
-+ *field)
-+{
-+ int err = 0;
-+
-+ switch (field->size) {
-+ case 1:
-+ field->u.b.read = pciback_read_config_byte;
-+ field->u.b.write = pciback_write_config_byte;
-+ break;
-+ case 2:
-+ field->u.w.read = pciback_read_config_word;
-+ field->u.w.write = pciback_write_config_word;
-+ break;
-+ case 4:
-+ field->u.dw.read = pciback_read_config_dword;
-+ field->u.dw.write = pciback_write_config_dword;
-+ break;
-+ default:
-+ err = -EINVAL;
-+ goto out;
-+ }
-+
-+ pciback_config_add_field(dev, field);
-+
-+out:
-+ return err;
-+}
-+
-+int pciback_config_quirks_init(struct pci_dev *dev)
-+{
-+ struct pciback_config_quirk *quirk;
-+ int ret = 0;
-+
-+ quirk = kzalloc(sizeof(*quirk), GFP_ATOMIC);
-+ if (!quirk) {
-+ ret = -ENOMEM;
-+ goto out;
-+ }
-+
-+ quirk->devid.vendor = dev->vendor;
-+ quirk->devid.device = dev->device;
-+ quirk->devid.subvendor = dev->subsystem_vendor;
-+ quirk->devid.subdevice = dev->subsystem_device;
-+ quirk->devid.class = 0;
-+ quirk->devid.class_mask = 0;
-+ quirk->devid.driver_data = 0UL;
-+
-+ quirk->pdev = dev;
-+
-+ register_quirk(quirk);
-+out:
-+ return ret;
-+}
-+
-+void pciback_config_field_free(struct config_field *field)
-+{
-+ kfree(field);
-+}
-+
-+int pciback_config_quirk_release(struct pci_dev *dev)
-+{
-+ struct pciback_config_quirk *quirk;
-+ int ret = 0;
-+
-+ quirk = pciback_find_quirk(dev);
-+ if (!quirk) {
-+ ret = -ENXIO;
-+ goto out;
-+ }
-+
-+ list_del(&quirk->quirks_list);
-+ kfree(quirk);
-+
-+out:
-+ return ret;
-+}
-diff --git a/drivers/xen/pciback/conf_space_quirks.h b/drivers/xen/pciback/conf_space_quirks.h
-new file mode 100644
-index 0000000..acd0e1a
---- /dev/null
-+++ b/drivers/xen/pciback/conf_space_quirks.h
-@@ -0,0 +1,35 @@
-+/*
-+ * PCI Backend - Data structures for special overlays for broken devices.
-+ *
-+ * Ryan Wilson <hap9@epoch.ncsc.mil>
-+ * Chris Bookholt <hap10@epoch.ncsc.mil>
-+ */
-+
-+#ifndef __XEN_PCIBACK_CONF_SPACE_QUIRKS_H__
-+#define __XEN_PCIBACK_CONF_SPACE_QUIRKS_H__
-+
-+#include <linux/pci.h>
-+#include <linux/list.h>
-+
-+struct pciback_config_quirk {
-+ struct list_head quirks_list;
-+ struct pci_device_id devid;
-+ struct pci_dev *pdev;
-+};
-+
-+struct pciback_config_quirk *pciback_find_quirk(struct pci_dev *dev);
-+
-+int pciback_config_quirks_add_field(struct pci_dev *dev, struct config_field
-+ *field);
-+
-+int pciback_config_quirks_remove_field(struct pci_dev *dev, int reg);
-+
-+int pciback_config_quirks_init(struct pci_dev *dev);
-+
-+void pciback_config_field_free(struct config_field *field);
-+
-+int pciback_config_quirk_release(struct pci_dev *dev);
-+
-+int pciback_field_is_dup(struct pci_dev *dev, unsigned int reg);
-+
-+#endif
-diff --git a/drivers/xen/pciback/controller.c b/drivers/xen/pciback/controller.c
-new file mode 100644
-index 0000000..7f04f11
---- /dev/null
-+++ b/drivers/xen/pciback/controller.c
-@@ -0,0 +1,442 @@
-+/*
-+ * Copyright (C) 2007 Hewlett-Packard Development Company, L.P.
-+ * Alex Williamson <alex.williamson@hp.com>
-+ *
-+ * PCI "Controller" Backend - virtualize PCI bus topology based on PCI
-+ * controllers. Devices under the same PCI controller are exposed on the
-+ * same virtual domain:bus. Within a bus, device slots are virtualized
-+ * to compact the bus.
-+ *
-+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-+ * This program is free software; you can redistribute it and/or modify
-+ * it under the terms of the GNU General Public License as published by
-+ * the Free Software Foundation; either version 2 of the License, or
-+ * (at your option) any later version.
-+ *
-+ * This program is distributed in the hope that it will be useful,
-+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
-+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-+ * GNU General Public License for more details.
-+ *
-+ * You should have received a copy of the GNU General Public License
-+ * along with this program; if not, write to the Free Software
-+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
-+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-+ */
-+
-+#include <linux/acpi.h>
-+#include <linux/list.h>
-+#include <linux/pci.h>
-+#include <linux/spinlock.h>
-+#include "pciback.h"
-+
-+#define PCI_MAX_BUSSES 255
-+#define PCI_MAX_SLOTS 32
-+
-+struct controller_dev_entry {
-+ struct list_head list;
-+ struct pci_dev *dev;
-+ unsigned int devfn;
-+};
-+
-+struct controller_list_entry {
-+ struct list_head list;
-+ struct pci_controller *controller;
-+ unsigned int domain;
-+ unsigned int bus;
-+ unsigned int next_devfn;
-+ struct list_head dev_list;
-+};
-+
-+struct controller_dev_data {
-+ struct list_head list;
-+ unsigned int next_domain;
-+ unsigned int next_bus;
-+ spinlock_t lock;
-+};
-+
-+struct walk_info {
-+ struct pciback_device *pdev;
-+ int resource_count;
-+ int root_num;
-+};
-+
-+struct pci_dev *pciback_get_pci_dev(struct pciback_device *pdev,
-+ unsigned int domain, unsigned int bus,
-+ unsigned int devfn)
-+{
-+ struct controller_dev_data *dev_data = pdev->pci_dev_data;
-+ struct controller_dev_entry *dev_entry;
-+ struct controller_list_entry *cntrl_entry;
-+ struct pci_dev *dev = NULL;
-+ unsigned long flags;
-+
-+ spin_lock_irqsave(&dev_data->lock, flags);
-+
-+ list_for_each_entry(cntrl_entry, &dev_data->list, list) {
-+ if (cntrl_entry->domain != domain ||
-+ cntrl_entry->bus != bus)
-+ continue;
-+
-+ list_for_each_entry(dev_entry, &cntrl_entry->dev_list, list) {
-+ if (devfn == dev_entry->devfn) {
-+ dev = dev_entry->dev;
-+ goto found;
-+ }
-+ }
-+ }
-+found:
-+ spin_unlock_irqrestore(&dev_data->lock, flags);
-+
-+ return dev;
-+}
-+
-+int pciback_add_pci_dev(struct pciback_device *pdev, struct pci_dev *dev,
-+ int devid, publish_pci_dev_cb publish_cb)
-+{
-+ struct controller_dev_data *dev_data = pdev->pci_dev_data;
-+ struct controller_dev_entry *dev_entry;
-+ struct controller_list_entry *cntrl_entry;
-+ struct pci_controller *dev_controller = PCI_CONTROLLER(dev);
-+ unsigned long flags;
-+ int ret = 0, found = 0;
-+
-+ spin_lock_irqsave(&dev_data->lock, flags);
-+
-+ /* Look to see if we already have a domain:bus for this controller */
-+ list_for_each_entry(cntrl_entry, &dev_data->list, list) {
-+ if (cntrl_entry->controller == dev_controller) {
-+ found = 1;
-+ break;
-+ }
-+ }
-+
-+ if (!found) {
-+ cntrl_entry = kmalloc(sizeof(*cntrl_entry), GFP_ATOMIC);
-+ if (!cntrl_entry) {
-+ ret = -ENOMEM;
-+ goto out;
-+ }
-+
-+ cntrl_entry->controller = dev_controller;
-+ cntrl_entry->next_devfn = PCI_DEVFN(0, 0);
-+
-+ cntrl_entry->domain = dev_data->next_domain;
-+ cntrl_entry->bus = dev_data->next_bus++;
-+ if (dev_data->next_bus > PCI_MAX_BUSSES) {
-+ dev_data->next_domain++;
-+ dev_data->next_bus = 0;
-+ }
-+
-+ INIT_LIST_HEAD(&cntrl_entry->dev_list);
-+
-+ list_add_tail(&cntrl_entry->list, &dev_data->list);
-+ }
-+
-+ if (PCI_SLOT(cntrl_entry->next_devfn) > PCI_MAX_SLOTS) {
-+ /*
-+ * While it seems unlikely, this can actually happen if
-+ * a controller has P2P bridges under it.
-+ */
-+ xenbus_dev_fatal(pdev->xdev, -ENOSPC, "Virtual bus %04x:%02x "
-+ "is full, no room to export %04x:%02x:%02x.%x",
-+ cntrl_entry->domain, cntrl_entry->bus,
-+ pci_domain_nr(dev->bus), dev->bus->number,
-+ PCI_SLOT(dev->devfn), PCI_FUNC(dev->devfn));
-+ ret = -ENOSPC;
-+ goto out;
-+ }
-+
-+ dev_entry = kmalloc(sizeof(*dev_entry), GFP_ATOMIC);
-+ if (!dev_entry) {
-+ if (list_empty(&cntrl_entry->dev_list)) {
-+ list_del(&cntrl_entry->list);
-+ kfree(cntrl_entry);
-+ }
-+ ret = -ENOMEM;
-+ goto out;
-+ }
-+
-+ dev_entry->dev = dev;
-+ dev_entry->devfn = cntrl_entry->next_devfn;
-+
-+ list_add_tail(&dev_entry->list, &cntrl_entry->dev_list);
-+
-+ cntrl_entry->next_devfn += PCI_DEVFN(1, 0);
-+
-+out:
-+ spin_unlock_irqrestore(&dev_data->lock, flags);
-+
-+ /* TODO: Publish virtual domain:bus:slot.func here. */
-+
-+ return ret;
-+}
-+
-+void pciback_release_pci_dev(struct pciback_device *pdev, struct pci_dev *dev)
-+{
-+ struct controller_dev_data *dev_data = pdev->pci_dev_data;
-+ struct controller_list_entry *cntrl_entry;
-+ struct controller_dev_entry *dev_entry = NULL;
-+ struct pci_dev *found_dev = NULL;
-+ unsigned long flags;
-+
-+ spin_lock_irqsave(&dev_data->lock, flags);
-+
-+ list_for_each_entry(cntrl_entry, &dev_data->list, list) {
-+ if (cntrl_entry->controller != PCI_CONTROLLER(dev))
-+ continue;
-+
-+ list_for_each_entry(dev_entry, &cntrl_entry->dev_list, list) {
-+ if (dev_entry->dev == dev) {
-+ found_dev = dev_entry->dev;
-+ break;
-+ }
-+ }
-+ }
-+
-+ if (!found_dev) {
-+ spin_unlock_irqrestore(&dev_data->lock, flags);
-+ return;
-+ }
-+
-+ list_del(&dev_entry->list);
-+ kfree(dev_entry);
-+
-+ if (list_empty(&cntrl_entry->dev_list)) {
-+ list_del(&cntrl_entry->list);
-+ kfree(cntrl_entry);
-+ }
-+
-+ spin_unlock_irqrestore(&dev_data->lock, flags);
-+ pcistub_put_pci_dev(found_dev);
-+}
-+
-+int pciback_init_devices(struct pciback_device *pdev)
-+{
-+ struct controller_dev_data *dev_data;
-+
-+ dev_data = kmalloc(sizeof(*dev_data), GFP_KERNEL);
-+ if (!dev_data)
-+ return -ENOMEM;
-+
-+ spin_lock_init(&dev_data->lock);
-+
-+ INIT_LIST_HEAD(&dev_data->list);
-+
-+ /* Starting domain:bus numbers */
-+ dev_data->next_domain = 0;
-+ dev_data->next_bus = 0;
-+
-+ pdev->pci_dev_data = dev_data;
-+
-+ return 0;
-+}
-+
-+static acpi_status write_xenbus_resource(struct acpi_resource *res, void *data)
-+{
-+ struct walk_info *info = data;
-+ struct acpi_resource_address64 addr;
-+ acpi_status status;
-+ int i, len, err;
-+ char str[32], tmp[3];
-+ unsigned char *ptr, *buf;
-+
-+ status = acpi_resource_to_address64(res, &addr);
-+
-+ /* Do we care about this range? Let's check. */
-+ if (!ACPI_SUCCESS(status) ||
-+ !(addr.resource_type == ACPI_MEMORY_RANGE ||
-+ addr.resource_type == ACPI_IO_RANGE) ||
-+ !addr.address_length || addr.producer_consumer != ACPI_PRODUCER)
-+ return AE_OK;
-+
-+ /*
-+ * Furthermore, we really only care to tell the guest about
-+ * address ranges that require address translation of some sort.
-+ */
-+ if (!(addr.resource_type == ACPI_MEMORY_RANGE &&
-+ addr.info.mem.translation) &&
-+ !(addr.resource_type == ACPI_IO_RANGE &&
-+ addr.info.io.translation))
-+ return AE_OK;
-+
-+ /* Store the resource in xenbus for the guest */
-+ len = snprintf(str, sizeof(str), "root-%d-resource-%d",
-+ info->root_num, info->resource_count);
-+ if (unlikely(len >= (sizeof(str) - 1)))
-+ return AE_OK;
-+
-+ buf = kzalloc((sizeof(*res) * 2) + 1, GFP_KERNEL);
-+ if (!buf)
-+ return AE_OK;
-+
-+ /* Clean out resource_source */
-+ res->data.address64.resource_source.index = 0xFF;
-+ res->data.address64.resource_source.string_length = 0;
-+ res->data.address64.resource_source.string_ptr = NULL;
-+
-+ ptr = (unsigned char *)res;
-+
-+ /* Turn the acpi_resource into an ASCII byte stream */
-+ for (i = 0; i < sizeof(*res); i++) {
-+ snprintf(tmp, sizeof(tmp), "%02x", ptr[i]);
-+ strncat(buf, tmp, 2);
-+ }
-+
-+ err = xenbus_printf(XBT_NIL, info->pdev->xdev->nodename,
-+ str, "%s", buf);
-+
-+ if (!err)
-+ info->resource_count++;
-+
-+ kfree(buf);
-+
-+ return AE_OK;
-+}
-+
-+int pciback_publish_pci_roots(struct pciback_device *pdev,
-+ publish_pci_root_cb publish_root_cb)
-+{
-+ struct controller_dev_data *dev_data = pdev->pci_dev_data;
-+ struct controller_list_entry *cntrl_entry;
-+ int i, root_num, len, err = 0;
-+ unsigned int domain, bus;
-+ char str[64];
-+ struct walk_info info;
-+
-+ spin_lock(&dev_data->lock);
-+
-+ list_for_each_entry(cntrl_entry, &dev_data->list, list) {
-+ /* First publish all the domain:bus info */
-+ err = publish_root_cb(pdev, cntrl_entry->domain,
-+ cntrl_entry->bus);
-+ if (err)
-+ goto out;
-+
-+ /*
-+ * Now figure out which root-%d this belongs to
-+ * so we can associate resources with it.
-+ */
-+ err = xenbus_scanf(XBT_NIL, pdev->xdev->nodename,
-+ "root_num", "%d", &root_num);
-+
-+ if (err != 1)
-+ goto out;
-+
-+ for (i = 0; i < root_num; i++) {
-+ len = snprintf(str, sizeof(str), "root-%d", i);
-+ if (unlikely(len >= (sizeof(str) - 1))) {
-+ err = -ENOMEM;
-+ goto out;
-+ }
-+
-+ err = xenbus_scanf(XBT_NIL, pdev->xdev->nodename,
-+ str, "%x:%x", &domain, &bus);
-+ if (err != 2)
-+ goto out;
-+
-+ /* Is this the one we just published? */
-+ if (domain == cntrl_entry->domain &&
-+ bus == cntrl_entry->bus)
-+ break;
-+ }
-+
-+ if (i == root_num)
-+ goto out;
-+
-+ info.pdev = pdev;
-+ info.resource_count = 0;
-+ info.root_num = i;
-+
-+ /* Let ACPI do the heavy lifting on decoding resources */
-+ acpi_walk_resources(cntrl_entry->controller->acpi_handle,
-+ METHOD_NAME__CRS, write_xenbus_resource,
-+ &info);
-+
-+ /* No resouces. OK. On to the next one */
-+ if (!info.resource_count)
-+ continue;
-+
-+ /* Store the number of resources we wrote for this root-%d */
-+ len = snprintf(str, sizeof(str), "root-%d-resources", i);
-+ if (unlikely(len >= (sizeof(str) - 1))) {
-+ err = -ENOMEM;
-+ goto out;
-+ }
-+
-+ err = xenbus_printf(XBT_NIL, pdev->xdev->nodename, str,
-+ "%d", info.resource_count);
-+ if (err)
-+ goto out;
-+ }
-+
-+ /* Finally, write some magic to synchronize with the guest. */
-+ len = snprintf(str, sizeof(str), "root-resource-magic");
-+ if (unlikely(len >= (sizeof(str) - 1))) {
-+ err = -ENOMEM;
-+ goto out;
-+ }
-+
-+ err = xenbus_printf(XBT_NIL, pdev->xdev->nodename, str,
-+ "%lx", (sizeof(struct acpi_resource) * 2) + 1);
-+
-+out:
-+ spin_unlock(&dev_data->lock);
-+
-+ return err;
-+}
-+
-+void pciback_release_devices(struct pciback_device *pdev)
-+{
-+ struct controller_dev_data *dev_data = pdev->pci_dev_data;
-+ struct controller_list_entry *cntrl_entry, *c;
-+ struct controller_dev_entry *dev_entry, *d;
-+
-+ list_for_each_entry_safe(cntrl_entry, c, &dev_data->list, list) {
-+ list_for_each_entry_safe(dev_entry, d,
-+ &cntrl_entry->dev_list, list) {
-+ list_del(&dev_entry->list);
-+ pcistub_put_pci_dev(dev_entry->dev);
-+ kfree(dev_entry);
-+ }
-+ list_del(&cntrl_entry->list);
-+ kfree(cntrl_entry);
-+ }
-+
-+ kfree(dev_data);
-+ pdev->pci_dev_data = NULL;
-+}
-+
-+int pciback_get_pcifront_dev(struct pci_dev *pcidev,
-+ struct pciback_device *pdev,
-+ unsigned int *domain, unsigned int *bus, unsigned int *devfn)
-+{
-+ struct controller_dev_data *dev_data = pdev->pci_dev_data;
-+ struct controller_dev_entry *dev_entry;
-+ struct controller_list_entry *cntrl_entry;
-+ unsigned long flags;
-+ int found = 0;
-+ spin_lock_irqsave(&dev_data->lock, flags);
-+
-+ list_for_each_entry(cntrl_entry, &dev_data->list, list) {
-+ list_for_each_entry(dev_entry, &cntrl_entry->dev_list, list) {
-+ if ((dev_entry->dev->bus->number ==
-+ pcidev->bus->number) &&
-+ (dev_entry->dev->devfn ==
-+ pcidev->devfn) &&
-+ (pci_domain_nr(dev_entry->dev->bus) ==
-+ pci_domain_nr(pcidev->bus))) {
-+ found = 1;
-+ *domain = cntrl_entry->domain;
-+ *bus = cntrl_entry->bus;
-+ *devfn = dev_entry->devfn;
-+ goto out;
-+ }
-+ }
-+ }
-+out:
-+ spin_unlock_irqrestore(&dev_data->lock, flags);
-+ return found;
-+
-+}
-+
-diff --git a/drivers/xen/pciback/passthrough.c b/drivers/xen/pciback/passthrough.c
-new file mode 100644
-index 0000000..5386bebf
---- /dev/null
-+++ b/drivers/xen/pciback/passthrough.c
-@@ -0,0 +1,178 @@
-+/*
-+ * PCI Backend - Provides restricted access to the real PCI bus topology
-+ * to the frontend
-+ *
-+ * Author: Ryan Wilson <hap9@epoch.ncsc.mil>
-+ */
-+
-+#include <linux/list.h>
-+#include <linux/pci.h>
-+#include <linux/spinlock.h>
-+#include "pciback.h"
-+
-+struct passthrough_dev_data {
-+ /* Access to dev_list must be protected by lock */
-+ struct list_head dev_list;
-+ spinlock_t lock;
-+};
-+
-+struct pci_dev *pciback_get_pci_dev(struct pciback_device *pdev,
-+ unsigned int domain, unsigned int bus,
-+ unsigned int devfn)
-+{
-+ struct passthrough_dev_data *dev_data = pdev->pci_dev_data;
-+ struct pci_dev_entry *dev_entry;
-+ struct pci_dev *dev = NULL;
-+ unsigned long flags;
-+
-+ spin_lock_irqsave(&dev_data->lock, flags);
-+
-+ list_for_each_entry(dev_entry, &dev_data->dev_list, list) {
-+ if (domain == (unsigned int)pci_domain_nr(dev_entry->dev->bus)
-+ && bus == (unsigned int)dev_entry->dev->bus->number
-+ && devfn == dev_entry->dev->devfn) {
-+ dev = dev_entry->dev;
-+ break;
-+ }
-+ }
-+
-+ spin_unlock_irqrestore(&dev_data->lock, flags);
-+
-+ return dev;
-+}
-+
-+int pciback_add_pci_dev(struct pciback_device *pdev, struct pci_dev *dev,
-+ int devid, publish_pci_dev_cb publish_cb)
-+{
-+ struct passthrough_dev_data *dev_data = pdev->pci_dev_data;
-+ struct pci_dev_entry *dev_entry;
-+ unsigned long flags;
-+ unsigned int domain, bus, devfn;
-+ int err;
-+
-+ dev_entry = kmalloc(sizeof(*dev_entry), GFP_KERNEL);
-+ if (!dev_entry)
-+ return -ENOMEM;
-+ dev_entry->dev = dev;
-+
-+ spin_lock_irqsave(&dev_data->lock, flags);
-+ list_add_tail(&dev_entry->list, &dev_data->dev_list);
-+ spin_unlock_irqrestore(&dev_data->lock, flags);
-+
-+ /* Publish this device. */
-+ domain = (unsigned int)pci_domain_nr(dev->bus);
-+ bus = (unsigned int)dev->bus->number;
-+ devfn = dev->devfn;
-+ err = publish_cb(pdev, domain, bus, devfn, devid);
-+
-+ return err;
-+}
-+
-+void pciback_release_pci_dev(struct pciback_device *pdev, struct pci_dev *dev)
-+{
-+ struct passthrough_dev_data *dev_data = pdev->pci_dev_data;
-+ struct pci_dev_entry *dev_entry, *t;
-+ struct pci_dev *found_dev = NULL;
-+ unsigned long flags;
-+
-+ spin_lock_irqsave(&dev_data->lock, flags);
-+
-+ list_for_each_entry_safe(dev_entry, t, &dev_data->dev_list, list) {
-+ if (dev_entry->dev == dev) {
-+ list_del(&dev_entry->list);
-+ found_dev = dev_entry->dev;
-+ kfree(dev_entry);
-+ }
-+ }
-+
-+ spin_unlock_irqrestore(&dev_data->lock, flags);
-+
-+ if (found_dev)
-+ pcistub_put_pci_dev(found_dev);
-+}
-+
-+int pciback_init_devices(struct pciback_device *pdev)
-+{
-+ struct passthrough_dev_data *dev_data;
-+
-+ dev_data = kmalloc(sizeof(*dev_data), GFP_KERNEL);
-+ if (!dev_data)
-+ return -ENOMEM;
-+
-+ spin_lock_init(&dev_data->lock);
-+
-+ INIT_LIST_HEAD(&dev_data->dev_list);
-+
-+ pdev->pci_dev_data = dev_data;
-+
-+ return 0;
-+}
-+
-+int pciback_publish_pci_roots(struct pciback_device *pdev,
-+ publish_pci_root_cb publish_root_cb)
-+{
-+ int err = 0;
-+ struct passthrough_dev_data *dev_data = pdev->pci_dev_data;
-+ struct pci_dev_entry *dev_entry, *e;
-+ struct pci_dev *dev;
-+ int found;
-+ unsigned int domain, bus;
-+
-+ spin_lock(&dev_data->lock);
-+
-+ list_for_each_entry(dev_entry, &dev_data->dev_list, list) {
-+ /* Only publish this device as a root if none of its
-+ * parent bridges are exported
-+ */
-+ found = 0;
-+ dev = dev_entry->dev->bus->self;
-+ for (; !found && dev != NULL; dev = dev->bus->self) {
-+ list_for_each_entry(e, &dev_data->dev_list, list) {
-+ if (dev == e->dev) {
-+ found = 1;
-+ break;
-+ }
-+ }
-+ }
-+
-+ domain = (unsigned int)pci_domain_nr(dev_entry->dev->bus);
-+ bus = (unsigned int)dev_entry->dev->bus->number;
-+
-+ if (!found) {
-+ err = publish_root_cb(pdev, domain, bus);
-+ if (err)
-+ break;
-+ }
-+ }
-+
-+ spin_unlock(&dev_data->lock);
-+
-+ return err;
-+}
-+
-+void pciback_release_devices(struct pciback_device *pdev)
-+{
-+ struct passthrough_dev_data *dev_data = pdev->pci_dev_data;
-+ struct pci_dev_entry *dev_entry, *t;
-+
-+ list_for_each_entry_safe(dev_entry, t, &dev_data->dev_list, list) {
-+ list_del(&dev_entry->list);
-+ pcistub_put_pci_dev(dev_entry->dev);
-+ kfree(dev_entry);
-+ }
-+
-+ kfree(dev_data);
-+ pdev->pci_dev_data = NULL;
-+}
-+
-+int pciback_get_pcifront_dev(struct pci_dev *pcidev,
-+ struct pciback_device *pdev,
-+ unsigned int *domain, unsigned int *bus,
-+ unsigned int *devfn)
-+
-+{
-+ *domain = pci_domain_nr(pcidev->bus);
-+ *bus = pcidev->bus->number;
-+ *devfn = pcidev->devfn;
-+ return 1;
-+}
-diff --git a/drivers/xen/pciback/pci_stub.c b/drivers/xen/pciback/pci_stub.c
-new file mode 100644
-index 0000000..88c7ca1
---- /dev/null
-+++ b/drivers/xen/pciback/pci_stub.c
-@@ -0,0 +1,1370 @@
-+/*
-+ * PCI Stub Driver - Grabs devices in backend to be exported later
-+ *
-+ * Ryan Wilson <hap9@epoch.ncsc.mil>
-+ * Chris Bookholt <hap10@epoch.ncsc.mil>
-+ */
-+#include <linux/module.h>
-+#include <linux/init.h>
-+#include <linux/rwsem.h>
-+#include <linux/list.h>
-+#include <linux/spinlock.h>
-+#include <linux/kref.h>
-+#include <linux/pci.h>
-+#include <linux/wait.h>
-+#include <linux/sched.h>
-+#include <asm/atomic.h>
-+#include <xen/events.h>
-+#include <asm/xen/pci.h>
-+#include <asm/xen/hypervisor.h>
-+#include "pciback.h"
-+#include "conf_space.h"
-+#include "conf_space_quirks.h"
-+
-+#define DRV_NAME "pciback"
-+
-+static char *pci_devs_to_hide;
-+wait_queue_head_t aer_wait_queue;
-+/*Add sem for sync AER handling and pciback remove/reconfigue ops,
-+* We want to avoid in middle of AER ops, pciback devices is being removed
-+*/
-+static DECLARE_RWSEM(pcistub_sem);
-+module_param_named(hide, pci_devs_to_hide, charp, 0444);
-+
-+struct pcistub_device_id {
-+ struct list_head slot_list;
-+ int domain;
-+ unsigned char bus;
-+ unsigned int devfn;
-+};
-+static LIST_HEAD(pcistub_device_ids);
-+static DEFINE_SPINLOCK(device_ids_lock);
-+
-+struct pcistub_device {
-+ struct kref kref;
-+ struct list_head dev_list;
-+ spinlock_t lock;
-+
-+ struct pci_dev *dev;
-+ struct pciback_device *pdev;/* non-NULL if struct pci_dev is in use */
-+};
-+
-+/* Access to pcistub_devices & seized_devices lists and the initialize_devices
-+ * flag must be locked with pcistub_devices_lock
-+ */
-+static DEFINE_SPINLOCK(pcistub_devices_lock);
-+static LIST_HEAD(pcistub_devices);
-+
-+/* wait for device_initcall before initializing our devices
-+ * (see pcistub_init_devices_late)
-+ */
-+static int initialize_devices;
-+static LIST_HEAD(seized_devices);
-+
-+static struct pcistub_device *pcistub_device_alloc(struct pci_dev *dev)
-+{
-+ struct pcistub_device *psdev;
-+
-+ dev_dbg(&dev->dev, "pcistub_device_alloc\n");
-+
-+ psdev = kzalloc(sizeof(*psdev), GFP_ATOMIC);
-+ if (!psdev)
-+ return NULL;
-+
-+ psdev->dev = pci_dev_get(dev);
-+ if (!psdev->dev) {
-+ kfree(psdev);
-+ return NULL;
-+ }
-+
-+ kref_init(&psdev->kref);
-+ spin_lock_init(&psdev->lock);
-+
-+ return psdev;
-+}
-+
-+/* Don't call this directly as it's called by pcistub_device_put */
-+static void pcistub_device_release(struct kref *kref)
-+{
-+ struct pcistub_device *psdev;
-+
-+ psdev = container_of(kref, struct pcistub_device, kref);
-+
-+ dev_dbg(&psdev->dev->dev, "pcistub_device_release\n");
-+
-+ xen_unregister_device_domain_owner(psdev->dev);
-+
-+ /* Clean-up the device */
-+ pciback_reset_device(psdev->dev);
-+ pciback_config_free_dyn_fields(psdev->dev);
-+ pciback_config_free_dev(psdev->dev);
-+ kfree(pci_get_drvdata(psdev->dev));
-+ pci_set_drvdata(psdev->dev, NULL);
-+
-+ pci_dev_put(psdev->dev);
-+
-+ kfree(psdev);
-+}
-+
-+static inline void pcistub_device_get(struct pcistub_device *psdev)
-+{
-+ kref_get(&psdev->kref);
-+}
-+
-+static inline void pcistub_device_put(struct pcistub_device *psdev)
-+{
-+ kref_put(&psdev->kref, pcistub_device_release);
-+}
-+
-+static struct pcistub_device *pcistub_device_find(int domain, int bus,
-+ int slot, int func)
-+{
-+ struct pcistub_device *psdev = NULL;
-+ unsigned long flags;
-+
-+ spin_lock_irqsave(&pcistub_devices_lock, flags);
-+
-+ list_for_each_entry(psdev, &pcistub_devices, dev_list) {
-+ if (psdev->dev != NULL
-+ && domain == pci_domain_nr(psdev->dev->bus)
-+ && bus == psdev->dev->bus->number
-+ && PCI_DEVFN(slot, func) == psdev->dev->devfn) {
-+ pcistub_device_get(psdev);
-+ goto out;
-+ }
-+ }
-+
-+ /* didn't find it */
-+ psdev = NULL;
-+
-+out:
-+ spin_unlock_irqrestore(&pcistub_devices_lock, flags);
-+ return psdev;
-+}
-+
-+static struct pci_dev *pcistub_device_get_pci_dev(struct pciback_device *pdev,
-+ struct pcistub_device *psdev)
-+{
-+ struct pci_dev *pci_dev = NULL;
-+ unsigned long flags;
-+
-+ pcistub_device_get(psdev);
-+
-+ spin_lock_irqsave(&psdev->lock, flags);
-+ if (!psdev->pdev) {
-+ psdev->pdev = pdev;
-+ pci_dev = psdev->dev;
-+ }
-+ spin_unlock_irqrestore(&psdev->lock, flags);
-+
-+ if (!pci_dev)
-+ pcistub_device_put(psdev);
-+
-+ return pci_dev;
-+}
-+
-+struct pci_dev *pcistub_get_pci_dev_by_slot(struct pciback_device *pdev,
-+ int domain, int bus,
-+ int slot, int func)
-+{
-+ struct pcistub_device *psdev;
-+ struct pci_dev *found_dev = NULL;
-+ unsigned long flags;
-+
-+ spin_lock_irqsave(&pcistub_devices_lock, flags);
-+
-+ list_for_each_entry(psdev, &pcistub_devices, dev_list) {
-+ if (psdev->dev != NULL
-+ && domain == pci_domain_nr(psdev->dev->bus)
-+ && bus == psdev->dev->bus->number
-+ && PCI_DEVFN(slot, func) == psdev->dev->devfn) {
-+ found_dev = pcistub_device_get_pci_dev(pdev, psdev);
-+ break;
-+ }
-+ }
-+
-+ spin_unlock_irqrestore(&pcistub_devices_lock, flags);
-+ return found_dev;
-+}
-+
-+struct pci_dev *pcistub_get_pci_dev(struct pciback_device *pdev,
-+ struct pci_dev *dev)
-+{
-+ struct pcistub_device *psdev;
-+ struct pci_dev *found_dev = NULL;
-+ unsigned long flags;
-+
-+ spin_lock_irqsave(&pcistub_devices_lock, flags);
-+
-+ list_for_each_entry(psdev, &pcistub_devices, dev_list) {
-+ if (psdev->dev == dev) {
-+ found_dev = pcistub_device_get_pci_dev(pdev, psdev);
-+ break;
-+ }
-+ }
-+
-+ spin_unlock_irqrestore(&pcistub_devices_lock, flags);
-+ return found_dev;
-+}
-+
-+void pcistub_put_pci_dev(struct pci_dev *dev)
-+{
-+ struct pcistub_device *psdev, *found_psdev = NULL;
-+ unsigned long flags;
-+
-+ spin_lock_irqsave(&pcistub_devices_lock, flags);
-+
-+ list_for_each_entry(psdev, &pcistub_devices, dev_list) {
-+ if (psdev->dev == dev) {
-+ found_psdev = psdev;
-+ break;
-+ }
-+ }
-+
-+ spin_unlock_irqrestore(&pcistub_devices_lock, flags);
-+
-+ /*hold this lock for avoiding breaking link between
-+ * pcistub and pciback when AER is in processing
-+ */
-+ down_write(&pcistub_sem);
-+ /* Cleanup our device
-+ * (so it's ready for the next domain)
-+ */
-+ pciback_reset_device(found_psdev->dev);
-+ pciback_config_free_dyn_fields(found_psdev->dev);
-+ pciback_config_reset_dev(found_psdev->dev);
-+
-+ spin_lock_irqsave(&found_psdev->lock, flags);
-+ found_psdev->pdev = NULL;
-+ spin_unlock_irqrestore(&found_psdev->lock, flags);
-+
-+ pcistub_device_put(found_psdev);
-+ up_write(&pcistub_sem);
-+}
-+
-+static int __devinit pcistub_match_one(struct pci_dev *dev,
-+ struct pcistub_device_id *pdev_id)
-+{
-+ /* Match the specified device by domain, bus, slot, func and also if
-+ * any of the device's parent bridges match.
-+ */
-+ for (; dev != NULL; dev = dev->bus->self) {
-+ if (pci_domain_nr(dev->bus) == pdev_id->domain
-+ && dev->bus->number == pdev_id->bus
-+ && dev->devfn == pdev_id->devfn)
-+ return 1;
-+
-+ /* Sometimes topmost bridge links to itself. */
-+ if (dev == dev->bus->self)
-+ break;
-+ }
-+
-+ return 0;
-+}
-+
-+static int __devinit pcistub_match(struct pci_dev *dev)
-+{
-+ struct pcistub_device_id *pdev_id;
-+ unsigned long flags;
-+ int found = 0;
-+
-+ spin_lock_irqsave(&device_ids_lock, flags);
-+ list_for_each_entry(pdev_id, &pcistub_device_ids, slot_list) {
-+ if (pcistub_match_one(dev, pdev_id)) {
-+ found = 1;
-+ break;
-+ }
-+ }
-+ spin_unlock_irqrestore(&device_ids_lock, flags);
-+
-+ return found;
-+}
-+
-+static int __devinit pcistub_init_device(struct pci_dev *dev)
-+{
-+ struct pciback_dev_data *dev_data;
-+ int err = 0;
-+
-+ dev_dbg(&dev->dev, "initializing...\n");
-+
-+ /* The PCI backend is not intended to be a module (or to work with
-+ * removable PCI devices (yet). If it were, pciback_config_free()
-+ * would need to be called somewhere to free the memory allocated
-+ * here and then to call kfree(pci_get_drvdata(psdev->dev)).
-+ */
-+ dev_data = kzalloc(sizeof(*dev_data) + strlen(DRV_NAME "[]")
-+ + strlen(pci_name(dev)) + 1, GFP_ATOMIC);
-+ if (!dev_data) {
-+ err = -ENOMEM;
-+ goto out;
-+ }
-+ pci_set_drvdata(dev, dev_data);
-+
-+ /*
-+ * Setup name for fake IRQ handler. It will only be enabled
-+ * once the device is turned on by the guest.
-+ */
-+ sprintf(dev_data->irq_name, DRV_NAME "[%s]", pci_name(dev));
-+
-+ dev_dbg(&dev->dev, "initializing config\n");
-+
-+ init_waitqueue_head(&aer_wait_queue);
-+ err = pciback_config_init_dev(dev);
-+ if (err)
-+ goto out;
-+
-+ /* HACK: Force device (& ACPI) to determine what IRQ it's on - we
-+ * must do this here because pcibios_enable_device may specify
-+ * the pci device's true irq (and possibly its other resources)
-+ * if they differ from what's in the configuration space.
-+ * This makes the assumption that the device's resources won't
-+ * change after this point (otherwise this code may break!)
-+ */
-+ dev_dbg(&dev->dev, "enabling device\n");
-+ err = pci_enable_device(dev);
-+ if (err)
-+ goto config_release;
-+
-+ /* Now disable the device (this also ensures some private device
-+ * data is setup before we export)
-+ */
-+ dev_dbg(&dev->dev, "reset device\n");
-+ pciback_reset_device(dev);
-+
-+ return 0;
-+
-+config_release:
-+ pciback_config_free_dev(dev);
-+
-+out:
-+ pci_set_drvdata(dev, NULL);
-+ kfree(dev_data);
-+ return err;
-+}
-+
-+/*
-+ * Because some initialization still happens on
-+ * devices during fs_initcall, we need to defer
-+ * full initialization of our devices until
-+ * device_initcall.
-+ */
-+static int __init pcistub_init_devices_late(void)
-+{
-+ struct pcistub_device *psdev;
-+ unsigned long flags;
-+ int err = 0;
-+
-+ pr_debug("pciback: pcistub_init_devices_late\n");
-+
-+ spin_lock_irqsave(&pcistub_devices_lock, flags);
-+
-+ while (!list_empty(&seized_devices)) {
-+ psdev = container_of(seized_devices.next,
-+ struct pcistub_device, dev_list);
-+ list_del(&psdev->dev_list);
-+
-+ spin_unlock_irqrestore(&pcistub_devices_lock, flags);
-+
-+ err = pcistub_init_device(psdev->dev);
-+ if (err) {
-+ dev_err(&psdev->dev->dev,
-+ "error %d initializing device\n", err);
-+ kfree(psdev);
-+ psdev = NULL;
-+ }
-+
-+ spin_lock_irqsave(&pcistub_devices_lock, flags);
-+
-+ if (psdev)
-+ list_add_tail(&psdev->dev_list, &pcistub_devices);
-+ }
-+
-+ initialize_devices = 1;
-+
-+ spin_unlock_irqrestore(&pcistub_devices_lock, flags);
-+
-+ return 0;
-+}
-+
-+static int __devinit pcistub_seize(struct pci_dev *dev)
-+{
-+ struct pcistub_device *psdev;
-+ unsigned long flags;
-+ int err = 0;
-+
-+ psdev = pcistub_device_alloc(dev);
-+ if (!psdev)
-+ return -ENOMEM;
-+
-+ spin_lock_irqsave(&pcistub_devices_lock, flags);
-+
-+ if (initialize_devices) {
-+ spin_unlock_irqrestore(&pcistub_devices_lock, flags);
-+
-+ /* don't want irqs disabled when calling pcistub_init_device */
-+ err = pcistub_init_device(psdev->dev);
-+
-+ spin_lock_irqsave(&pcistub_devices_lock, flags);
-+
-+ if (!err)
-+ list_add(&psdev->dev_list, &pcistub_devices);
-+ } else {
-+ dev_dbg(&dev->dev, "deferring initialization\n");
-+ list_add(&psdev->dev_list, &seized_devices);
-+ }
-+
-+ spin_unlock_irqrestore(&pcistub_devices_lock, flags);
-+
-+ if (err)
-+ pcistub_device_put(psdev);
-+
-+ return err;
-+}
-+
-+static int __devinit pcistub_probe(struct pci_dev *dev,
-+ const struct pci_device_id *id)
-+{
-+ int err = 0;
-+
-+ dev_dbg(&dev->dev, "probing...\n");
-+
-+ if (pcistub_match(dev)) {
-+
-+ if (dev->hdr_type != PCI_HEADER_TYPE_NORMAL
-+ && dev->hdr_type != PCI_HEADER_TYPE_BRIDGE) {
-+ dev_err(&dev->dev, "can't export pci devices that "
-+ "don't have a normal (0) or bridge (1) "
-+ "header type!\n");
-+ err = -ENODEV;
-+ goto out;
-+ }
-+
-+ dev_info(&dev->dev, "seizing device\n");
-+ err = pcistub_seize(dev);
-+ } else
-+ /* Didn't find the device */
-+ err = -ENODEV;
-+
-+out:
-+ return err;
-+}
-+
-+static void pcistub_remove(struct pci_dev *dev)
-+{
-+ struct pcistub_device *psdev, *found_psdev = NULL;
-+ unsigned long flags;
-+
-+ dev_dbg(&dev->dev, "removing\n");
-+
-+ spin_lock_irqsave(&pcistub_devices_lock, flags);
-+
-+ pciback_config_quirk_release(dev);
-+
-+ list_for_each_entry(psdev, &pcistub_devices, dev_list) {
-+ if (psdev->dev == dev) {
-+ found_psdev = psdev;
-+ break;
-+ }
-+ }
-+
-+ spin_unlock_irqrestore(&pcistub_devices_lock, flags);
-+
-+ if (found_psdev) {
-+ dev_dbg(&dev->dev, "found device to remove - in use? %p\n",
-+ found_psdev->pdev);
-+
-+ if (found_psdev->pdev) {
-+ printk(KERN_WARNING "pciback: ****** removing device "
-+ "%s while still in-use! ******\n",
-+ pci_name(found_psdev->dev));
-+ printk(KERN_WARNING "pciback: ****** driver domain may "
-+ "still access this device's i/o resources!\n");
-+ printk(KERN_WARNING "pciback: ****** shutdown driver "
-+ "domain before binding device\n");
-+ printk(KERN_WARNING "pciback: ****** to other drivers "
-+ "or domains\n");
-+
-+ pciback_release_pci_dev(found_psdev->pdev,
-+ found_psdev->dev);
-+ }
-+
-+ spin_lock_irqsave(&pcistub_devices_lock, flags);
-+ list_del(&found_psdev->dev_list);
-+ spin_unlock_irqrestore(&pcistub_devices_lock, flags);
-+
-+ /* the final put for releasing from the list */
-+ pcistub_device_put(found_psdev);
-+ }
-+}
-+
-+static const struct pci_device_id pcistub_ids[] = {
-+ {
-+ .vendor = PCI_ANY_ID,
-+ .device = PCI_ANY_ID,
-+ .subvendor = PCI_ANY_ID,
-+ .subdevice = PCI_ANY_ID,
-+ },
-+ {0,},
-+};
-+
-+#define PCI_NODENAME_MAX 40
-+static void kill_domain_by_device(struct pcistub_device *psdev)
-+{
-+ struct xenbus_transaction xbt;
-+ int err;
-+ char nodename[PCI_NODENAME_MAX];
-+
-+ if (!psdev)
-+ dev_err(&psdev->dev->dev,
-+ "device is NULL when do AER recovery/kill_domain\n");
-+ snprintf(nodename, PCI_NODENAME_MAX, "/local/domain/0/backend/pci/%d/0",
-+ psdev->pdev->xdev->otherend_id);
-+ nodename[strlen(nodename)] = '\0';
-+
-+again:
-+ err = xenbus_transaction_start(&xbt);
-+ if (err) {
-+ dev_err(&psdev->dev->dev,
-+ "error %d when start xenbus transaction\n", err);
-+ return;
-+ }
-+ /*PV AER handlers will set this flag*/
-+ xenbus_printf(xbt, nodename, "aerState" , "aerfail");
-+ err = xenbus_transaction_end(xbt, 0);
-+ if (err) {
-+ if (err == -EAGAIN)
-+ goto again;
-+ dev_err(&psdev->dev->dev,
-+ "error %d when end xenbus transaction\n", err);
-+ return;
-+ }
-+}
-+
-+/* For each aer recovery step error_detected, mmio_enabled, etc, front_end and
-+ * backend need to have cooperation. In pciback, those steps will do similar
-+ * jobs: send service request and waiting for front_end response.
-+*/
-+static pci_ers_result_t common_process(struct pcistub_device *psdev,
-+ pci_channel_state_t state, int aer_cmd, pci_ers_result_t result)
-+{
-+ pci_ers_result_t res = result;
-+ struct xen_pcie_aer_op *aer_op;
-+ int ret;
-+
-+ /*with PV AER drivers*/
-+ aer_op = &(psdev->pdev->sh_info->aer_op);
-+ aer_op->cmd = aer_cmd ;
-+ /*useful for error_detected callback*/
-+ aer_op->err = state;
-+ /*pcifront_end BDF*/
-+ ret = pciback_get_pcifront_dev(psdev->dev, psdev->pdev,
-+ &aer_op->domain, &aer_op->bus, &aer_op->devfn);
-+ if (!ret) {
-+ dev_err(&psdev->dev->dev,
-+ "pciback: failed to get pcifront device\n");
-+ return PCI_ERS_RESULT_NONE;
-+ }
-+ wmb();
-+
-+ dev_dbg(&psdev->dev->dev,
-+ "pciback: aer_op %x dom %x bus %x devfn %x\n",
-+ aer_cmd, aer_op->domain, aer_op->bus, aer_op->devfn);
-+ /*local flag to mark there's aer request, pciback callback will use this
-+ * flag to judge whether we need to check pci-front give aer service
-+ * ack signal
-+ */
-+ set_bit(_PCIB_op_pending, (unsigned long *)&psdev->pdev->flags);
-+
-+ /*It is possible that a pcifront conf_read_write ops request invokes
-+ * the callback which cause the spurious execution of wake_up.
-+ * Yet it is harmless and better than a spinlock here
-+ */
-+ set_bit(_XEN_PCIB_active,
-+ (unsigned long *)&psdev->pdev->sh_info->flags);
-+ wmb();
-+ notify_remote_via_irq(psdev->pdev->evtchn_irq);
-+
-+ ret = wait_event_timeout(aer_wait_queue, !(test_bit(_XEN_PCIB_active,
-+ (unsigned long *)&psdev->pdev->sh_info->flags)), 300*HZ);
-+
-+ if (!ret) {
-+ if (test_bit(_XEN_PCIB_active,
-+ (unsigned long *)&psdev->pdev->sh_info->flags)) {
-+ dev_err(&psdev->dev->dev,
-+ "pcifront aer process not responding!\n");
-+ clear_bit(_XEN_PCIB_active,
-+ (unsigned long *)&psdev->pdev->sh_info->flags);
-+ aer_op->err = PCI_ERS_RESULT_NONE;
-+ return res;
-+ }
-+ }
-+ clear_bit(_PCIB_op_pending, (unsigned long *)&psdev->pdev->flags);
-+
-+ if (test_bit(_XEN_PCIF_active,
-+ (unsigned long *)&psdev->pdev->sh_info->flags)) {
-+ dev_dbg(&psdev->dev->dev,
-+ "schedule pci_conf service in pciback \n");
-+ test_and_schedule_op(psdev->pdev);
-+ }
-+
-+ res = (pci_ers_result_t)aer_op->err;
-+ return res;
-+}
-+
-+/*
-+* pciback_slot_reset: it will send the slot_reset request to pcifront in case
-+* of the device driver could provide this service, and then wait for pcifront
-+* ack.
-+* @dev: pointer to PCI devices
-+* return value is used by aer_core do_recovery policy
-+*/
-+static pci_ers_result_t pciback_slot_reset(struct pci_dev *dev)
-+{
-+ struct pcistub_device *psdev;
-+ pci_ers_result_t result;
-+
-+ result = PCI_ERS_RESULT_RECOVERED;
-+ dev_dbg(&dev->dev, "pciback_slot_reset(bus:%x,devfn:%x)\n",
-+ dev->bus->number, dev->devfn);
-+
-+ down_write(&pcistub_sem);
-+ psdev = pcistub_device_find(pci_domain_nr(dev->bus),
-+ dev->bus->number,
-+ PCI_SLOT(dev->devfn),
-+ PCI_FUNC(dev->devfn));
-+
-+ if (!psdev || !psdev->pdev) {
-+ dev_err(&dev->dev,
-+ "pciback device is not found/assigned\n");
-+ goto end;
-+ }
-+
-+ if (!psdev->pdev->sh_info) {
-+ dev_err(&dev->dev, "pciback device is not connected or owned"
-+ " by HVM, kill it\n");
-+ kill_domain_by_device(psdev);
-+ goto release;
-+ }
-+
-+ if (!test_bit(_XEN_PCIB_AERHANDLER,
-+ (unsigned long *)&psdev->pdev->sh_info->flags)) {
-+ dev_err(&dev->dev,
-+ "guest with no AER driver should have been killed\n");
-+ goto release;
-+ }
-+ result = common_process(psdev, 1, XEN_PCI_OP_aer_slotreset, result);
-+
-+ if (result == PCI_ERS_RESULT_NONE ||
-+ result == PCI_ERS_RESULT_DISCONNECT) {
-+ dev_dbg(&dev->dev,
-+ "No AER slot_reset service or disconnected!\n");
-+ kill_domain_by_device(psdev);
-+ }
-+release:
-+ pcistub_device_put(psdev);
-+end:
-+ up_write(&pcistub_sem);
-+ return result;
-+
-+}
-+
-+
-+/*pciback_mmio_enabled: it will send the mmio_enabled request to pcifront
-+* in case of the device driver could provide this service, and then wait
-+* for pcifront ack
-+* @dev: pointer to PCI devices
-+* return value is used by aer_core do_recovery policy
-+*/
-+
-+static pci_ers_result_t pciback_mmio_enabled(struct pci_dev *dev)
-+{
-+ struct pcistub_device *psdev;
-+ pci_ers_result_t result;
-+
-+ result = PCI_ERS_RESULT_RECOVERED;
-+ dev_dbg(&dev->dev, "pciback_mmio_enabled(bus:%x,devfn:%x)\n",
-+ dev->bus->number, dev->devfn);
-+
-+ down_write(&pcistub_sem);
-+ psdev = pcistub_device_find(pci_domain_nr(dev->bus),
-+ dev->bus->number,
-+ PCI_SLOT(dev->devfn),
-+ PCI_FUNC(dev->devfn));
-+
-+ if (!psdev || !psdev->pdev) {
-+ dev_err(&dev->dev,
-+ "pciback device is not found/assigned\n");
-+ goto end;
-+ }
-+
-+ if (!psdev->pdev->sh_info) {
-+ dev_err(&dev->dev, "pciback device is not connected or owned"
-+ " by HVM, kill it\n");
-+ kill_domain_by_device(psdev);
-+ goto release;
-+ }
-+
-+ if (!test_bit(_XEN_PCIB_AERHANDLER,
-+ (unsigned long *)&psdev->pdev->sh_info->flags)) {
-+ dev_err(&dev->dev,
-+ "guest with no AER driver should have been killed\n");
-+ goto release;
-+ }
-+ result = common_process(psdev, 1, XEN_PCI_OP_aer_mmio, result);
-+
-+ if (result == PCI_ERS_RESULT_NONE ||
-+ result == PCI_ERS_RESULT_DISCONNECT) {
-+ dev_dbg(&dev->dev,
-+ "No AER mmio_enabled service or disconnected!\n");
-+ kill_domain_by_device(psdev);
-+ }
-+release:
-+ pcistub_device_put(psdev);
-+end:
-+ up_write(&pcistub_sem);
-+ return result;
-+}
-+
-+/*pciback_error_detected: it will send the error_detected request to pcifront
-+* in case of the device driver could provide this service, and then wait
-+* for pcifront ack.
-+* @dev: pointer to PCI devices
-+* @error: the current PCI connection state
-+* return value is used by aer_core do_recovery policy
-+*/
-+
-+static pci_ers_result_t pciback_error_detected(struct pci_dev *dev,
-+ pci_channel_state_t error)
-+{
-+ struct pcistub_device *psdev;
-+ pci_ers_result_t result;
-+
-+ result = PCI_ERS_RESULT_CAN_RECOVER;
-+ dev_dbg(&dev->dev, "pciback_error_detected(bus:%x,devfn:%x)\n",
-+ dev->bus->number, dev->devfn);
-+
-+ down_write(&pcistub_sem);
-+ psdev = pcistub_device_find(pci_domain_nr(dev->bus),
-+ dev->bus->number,
-+ PCI_SLOT(dev->devfn),
-+ PCI_FUNC(dev->devfn));
-+
-+ if (!psdev || !psdev->pdev) {
-+ dev_err(&dev->dev,
-+ "pciback device is not found/assigned\n");
-+ goto end;
-+ }
-+
-+ if (!psdev->pdev->sh_info) {
-+ dev_err(&dev->dev, "pciback device is not connected or owned"
-+ " by HVM, kill it\n");
-+ kill_domain_by_device(psdev);
-+ goto release;
-+ }
-+
-+ /*Guest owns the device yet no aer handler regiested, kill guest*/
-+ if (!test_bit(_XEN_PCIB_AERHANDLER,
-+ (unsigned long *)&psdev->pdev->sh_info->flags)) {
-+ dev_dbg(&dev->dev, "guest may have no aer driver, kill it\n");
-+ kill_domain_by_device(psdev);
-+ goto release;
-+ }
-+ result = common_process(psdev, error, XEN_PCI_OP_aer_detected, result);
-+
-+ if (result == PCI_ERS_RESULT_NONE ||
-+ result == PCI_ERS_RESULT_DISCONNECT) {
-+ dev_dbg(&dev->dev,
-+ "No AER error_detected service or disconnected!\n");
-+ kill_domain_by_device(psdev);
-+ }
-+release:
-+ pcistub_device_put(psdev);
-+end:
-+ up_write(&pcistub_sem);
-+ return result;
-+}
-+
-+/*pciback_error_resume: it will send the error_resume request to pcifront
-+* in case of the device driver could provide this service, and then wait
-+* for pcifront ack.
-+* @dev: pointer to PCI devices
-+*/
-+
-+static void pciback_error_resume(struct pci_dev *dev)
-+{
-+ struct pcistub_device *psdev;
-+
-+ dev_dbg(&dev->dev, "pciback_error_resume(bus:%x,devfn:%x)\n",
-+ dev->bus->number, dev->devfn);
-+
-+ down_write(&pcistub_sem);
-+ psdev = pcistub_device_find(pci_domain_nr(dev->bus),
-+ dev->bus->number,
-+ PCI_SLOT(dev->devfn),
-+ PCI_FUNC(dev->devfn));
-+
-+ if (!psdev || !psdev->pdev) {
-+ dev_err(&dev->dev,
-+ "pciback device is not found/assigned\n");
-+ goto end;
-+ }
-+
-+ if (!psdev->pdev->sh_info) {
-+ dev_err(&dev->dev, "pciback device is not connected or owned"
-+ " by HVM, kill it\n");
-+ kill_domain_by_device(psdev);
-+ goto release;
-+ }
-+
-+ if (!test_bit(_XEN_PCIB_AERHANDLER,
-+ (unsigned long *)&psdev->pdev->sh_info->flags)) {
-+ dev_err(&dev->dev,
-+ "guest with no AER driver should have been killed\n");
-+ kill_domain_by_device(psdev);
-+ goto release;
-+ }
-+ common_process(psdev, 1, XEN_PCI_OP_aer_resume,
-+ PCI_ERS_RESULT_RECOVERED);
-+release:
-+ pcistub_device_put(psdev);
-+end:
-+ up_write(&pcistub_sem);
-+ return;
-+}
-+
-+/*add pciback AER handling*/
-+static struct pci_error_handlers pciback_error_handler = {
-+ .error_detected = pciback_error_detected,
-+ .mmio_enabled = pciback_mmio_enabled,
-+ .slot_reset = pciback_slot_reset,
-+ .resume = pciback_error_resume,
-+};
-+
-+/*
-+ * Note: There is no MODULE_DEVICE_TABLE entry here because this isn't
-+ * for a normal device. I don't want it to be loaded automatically.
-+ */
-+
-+static struct pci_driver pciback_pci_driver = {
-+ .name = DRV_NAME,
-+ .id_table = pcistub_ids,
-+ .probe = pcistub_probe,
-+ .remove = pcistub_remove,
-+ .err_handler = &pciback_error_handler,
-+};
-+
-+static inline int str_to_slot(const char *buf, int *domain, int *bus,
-+ int *slot, int *func)
-+{
-+ int err;
-+
-+ err = sscanf(buf, " %x:%x:%x.%x", domain, bus, slot, func);
-+ if (err == 4)
-+ return 0;
-+ else if (err < 0)
-+ return -EINVAL;
-+
-+ /* try again without domain */
-+ *domain = 0;
-+ err = sscanf(buf, " %x:%x.%x", bus, slot, func);
-+ if (err == 3)
-+ return 0;
-+
-+ return -EINVAL;
-+}
-+
-+static inline int str_to_quirk(const char *buf, int *domain, int *bus, int
-+ *slot, int *func, int *reg, int *size, int *mask)
-+{
-+ int err;
-+
-+ err =
-+ sscanf(buf, " %04x:%02x:%02x.%1x-%08x:%1x:%08x", domain, bus, slot,
-+ func, reg, size, mask);
-+ if (err == 7)
-+ return 0;
-+ return -EINVAL;
-+}
-+
-+static int pcistub_device_id_add(int domain, int bus, int slot, int func)
-+{
-+ struct pcistub_device_id *pci_dev_id;
-+ unsigned long flags;
-+
-+ pci_dev_id = kmalloc(sizeof(*pci_dev_id), GFP_KERNEL);
-+ if (!pci_dev_id)
-+ return -ENOMEM;
-+
-+ pci_dev_id->domain = domain;
-+ pci_dev_id->bus = bus;
-+ pci_dev_id->devfn = PCI_DEVFN(slot, func);
-+
-+ pr_debug("pciback: wants to seize %04x:%02x:%02x.%01x\n",
-+ domain, bus, slot, func);
-+
-+ spin_lock_irqsave(&device_ids_lock, flags);
-+ list_add_tail(&pci_dev_id->slot_list, &pcistub_device_ids);
-+ spin_unlock_irqrestore(&device_ids_lock, flags);
-+
-+ return 0;
-+}
-+
-+static int pcistub_device_id_remove(int domain, int bus, int slot, int func)
-+{
-+ struct pcistub_device_id *pci_dev_id, *t;
-+ int devfn = PCI_DEVFN(slot, func);
-+ int err = -ENOENT;
-+ unsigned long flags;
-+
-+ spin_lock_irqsave(&device_ids_lock, flags);
-+ list_for_each_entry_safe(pci_dev_id, t, &pcistub_device_ids,
-+ slot_list) {
-+ if (pci_dev_id->domain == domain
-+ && pci_dev_id->bus == bus && pci_dev_id->devfn == devfn) {
-+ /* Don't break; here because it's possible the same
-+ * slot could be in the list more than once
-+ */
-+ list_del(&pci_dev_id->slot_list);
-+ kfree(pci_dev_id);
-+
-+ err = 0;
-+
-+ pr_debug("pciback: removed %04x:%02x:%02x.%01x from "
-+ "seize list\n", domain, bus, slot, func);
-+ }
-+ }
-+ spin_unlock_irqrestore(&device_ids_lock, flags);
-+
-+ return err;
-+}
-+
-+static int pcistub_reg_add(int domain, int bus, int slot, int func, int reg,
-+ int size, int mask)
-+{
-+ int err = 0;
-+ struct pcistub_device *psdev;
-+ struct pci_dev *dev;
-+ struct config_field *field;
-+
-+ psdev = pcistub_device_find(domain, bus, slot, func);
-+ if (!psdev || !psdev->dev) {
-+ err = -ENODEV;
-+ goto out;
-+ }
-+ dev = psdev->dev;
-+
-+ field = kzalloc(sizeof(*field), GFP_ATOMIC);
-+ if (!field) {
-+ err = -ENOMEM;
-+ goto out;
-+ }
-+
-+ field->offset = reg;
-+ field->size = size;
-+ field->mask = mask;
-+ field->init = NULL;
-+ field->reset = NULL;
-+ field->release = NULL;
-+ field->clean = pciback_config_field_free;
-+
-+ err = pciback_config_quirks_add_field(dev, field);
-+ if (err)
-+ kfree(field);
-+out:
-+ return err;
-+}
-+
-+static ssize_t pcistub_slot_add(struct device_driver *drv, const char *buf,
-+ size_t count)
-+{
-+ int domain, bus, slot, func;
-+ int err;
-+
-+ err = str_to_slot(buf, &domain, &bus, &slot, &func);
-+ if (err)
-+ goto out;
-+
-+ err = pcistub_device_id_add(domain, bus, slot, func);
-+
-+out:
-+ if (!err)
-+ err = count;
-+ return err;
-+}
-+
-+DRIVER_ATTR(new_slot, S_IWUSR, NULL, pcistub_slot_add);
-+
-+static ssize_t pcistub_slot_remove(struct device_driver *drv, const char *buf,
-+ size_t count)
-+{
-+ int domain, bus, slot, func;
-+ int err;
-+
-+ err = str_to_slot(buf, &domain, &bus, &slot, &func);
-+ if (err)
-+ goto out;
-+
-+ err = pcistub_device_id_remove(domain, bus, slot, func);
-+
-+out:
-+ if (!err)
-+ err = count;
-+ return err;
-+}
-+
-+DRIVER_ATTR(remove_slot, S_IWUSR, NULL, pcistub_slot_remove);
-+
-+static ssize_t pcistub_slot_show(struct device_driver *drv, char *buf)
-+{
-+ struct pcistub_device_id *pci_dev_id;
-+ size_t count = 0;
-+ unsigned long flags;
-+
-+ spin_lock_irqsave(&device_ids_lock, flags);
-+ list_for_each_entry(pci_dev_id, &pcistub_device_ids, slot_list) {
-+ if (count >= PAGE_SIZE)
-+ break;
-+
-+ count += scnprintf(buf + count, PAGE_SIZE - count,
-+ "%04x:%02x:%02x.%01x\n",
-+ pci_dev_id->domain, pci_dev_id->bus,
-+ PCI_SLOT(pci_dev_id->devfn),
-+ PCI_FUNC(pci_dev_id->devfn));
-+ }
-+ spin_unlock_irqrestore(&device_ids_lock, flags);
-+
-+ return count;
-+}
-+
-+DRIVER_ATTR(slots, S_IRUSR, pcistub_slot_show, NULL);
-+
-+static ssize_t pcistub_irq_handler_show(struct device_driver *drv, char *buf)
-+{
-+ struct pcistub_device *psdev;
-+ struct pciback_dev_data *dev_data;
-+ size_t count = 0;
-+ unsigned long flags;
-+
-+ spin_lock_irqsave(&pcistub_devices_lock, flags);
-+ list_for_each_entry(psdev, &pcistub_devices, dev_list) {
-+ if (count >= PAGE_SIZE)
-+ break;
-+ if (!psdev->dev)
-+ continue;
-+ dev_data = pci_get_drvdata(psdev->dev);
-+ if (!dev_data)
-+ continue;
-+ count +=
-+ scnprintf(buf + count, PAGE_SIZE - count, "%s:%s:%sing:%ld\n",
-+ pci_name(psdev->dev),
-+ dev_data->isr_on ? "on" : "off",
-+ dev_data->ack_intr ? "ack" : "not ack",
-+ dev_data->handled);
-+ }
-+ spin_unlock_irqrestore(&pcistub_devices_lock, flags);
-+ return count;
-+}
-+
-+DRIVER_ATTR(irq_handlers, S_IRUSR, pcistub_irq_handler_show, NULL);
-+
-+static ssize_t pcistub_irq_handler_switch(struct device_driver *drv,
-+ const char *buf,
-+ size_t count)
-+{
-+ struct pcistub_device *psdev;
-+ struct pciback_dev_data *dev_data;
-+ int domain, bus, slot, func;
-+ int err = -ENOENT;
-+
-+ err = str_to_slot(buf, &domain, &bus, &slot, &func);
-+ if (err)
-+ goto out;
-+
-+ psdev = pcistub_device_find(domain, bus, slot, func);
-+
-+ if (!psdev)
-+ goto out;
-+
-+ dev_data = pci_get_drvdata(psdev->dev);
-+ if (!dev_data)
-+ goto out;
-+
-+ dev_dbg(&psdev->dev->dev, "%s fake irq handler: %d->%d\n",
-+ dev_data->irq_name, dev_data->isr_on,
-+ !dev_data->isr_on);
-+
-+ dev_data->isr_on = !(dev_data->isr_on);
-+ if (dev_data->isr_on)
-+ dev_data->ack_intr = 1;
-+out:
-+ if (!err)
-+ err = count;
-+ return err;
-+}
-+DRIVER_ATTR(irq_handler_state, S_IWUSR, NULL, pcistub_irq_handler_switch);
-+
-+static ssize_t pcistub_quirk_add(struct device_driver *drv, const char *buf,
-+ size_t count)
-+{
-+ int domain, bus, slot, func, reg, size, mask;
-+ int err;
-+
-+ err = str_to_quirk(buf, &domain, &bus, &slot, &func, &reg, &size,
-+ &mask);
-+ if (err)
-+ goto out;
-+
-+ err = pcistub_reg_add(domain, bus, slot, func, reg, size, mask);
-+
-+out:
-+ if (!err)
-+ err = count;
-+ return err;
-+}
-+
-+static ssize_t pcistub_quirk_show(struct device_driver *drv, char *buf)
-+{
-+ int count = 0;
-+ unsigned long flags;
-+ struct pciback_config_quirk *quirk;
-+ struct pciback_dev_data *dev_data;
-+ const struct config_field *field;
-+ const struct config_field_entry *cfg_entry;
-+
-+ spin_lock_irqsave(&device_ids_lock, flags);
-+ list_for_each_entry(quirk, &pciback_quirks, quirks_list) {
-+ if (count >= PAGE_SIZE)
-+ goto out;
-+
-+ count += scnprintf(buf + count, PAGE_SIZE - count,
-+ "%02x:%02x.%01x\n\t%04x:%04x:%04x:%04x\n",
-+ quirk->pdev->bus->number,
-+ PCI_SLOT(quirk->pdev->devfn),
-+ PCI_FUNC(quirk->pdev->devfn),
-+ quirk->devid.vendor, quirk->devid.device,
-+ quirk->devid.subvendor,
-+ quirk->devid.subdevice);
-+
-+ dev_data = pci_get_drvdata(quirk->pdev);
-+
-+ list_for_each_entry(cfg_entry, &dev_data->config_fields, list) {
-+ field = cfg_entry->field;
-+ if (count >= PAGE_SIZE)
-+ goto out;
-+
-+ count += scnprintf(buf + count, PAGE_SIZE - count,
-+ "\t\t%08x:%01x:%08x\n",
-+ cfg_entry->base_offset +
-+ field->offset, field->size,
-+ field->mask);
-+ }
-+ }
-+
-+out:
-+ spin_unlock_irqrestore(&device_ids_lock, flags);
-+
-+ return count;
-+}
-+
-+DRIVER_ATTR(quirks, S_IRUSR | S_IWUSR, pcistub_quirk_show, pcistub_quirk_add);
-+
-+static ssize_t permissive_add(struct device_driver *drv, const char *buf,
-+ size_t count)
-+{
-+ int domain, bus, slot, func;
-+ int err;
-+ struct pcistub_device *psdev;
-+ struct pciback_dev_data *dev_data;
-+ err = str_to_slot(buf, &domain, &bus, &slot, &func);
-+ if (err)
-+ goto out;
-+ psdev = pcistub_device_find(domain, bus, slot, func);
-+ if (!psdev) {
-+ err = -ENODEV;
-+ goto out;
-+ }
-+ if (!psdev->dev) {
-+ err = -ENODEV;
-+ goto release;
-+ }
-+ dev_data = pci_get_drvdata(psdev->dev);
-+ /* the driver data for a device should never be null at this point */
-+ if (!dev_data) {
-+ err = -ENXIO;
-+ goto release;
-+ }
-+ if (!dev_data->permissive) {
-+ dev_data->permissive = 1;
-+ /* Let user know that what they're doing could be unsafe */
-+ dev_warn(&psdev->dev->dev, "enabling permissive mode "
-+ "configuration space accesses!\n");
-+ dev_warn(&psdev->dev->dev,
-+ "permissive mode is potentially unsafe!\n");
-+ }
-+release:
-+ pcistub_device_put(psdev);
-+out:
-+ if (!err)
-+ err = count;
-+ return err;
-+}
-+
-+static ssize_t permissive_show(struct device_driver *drv, char *buf)
-+{
-+ struct pcistub_device *psdev;
-+ struct pciback_dev_data *dev_data;
-+ size_t count = 0;
-+ unsigned long flags;
-+ spin_lock_irqsave(&pcistub_devices_lock, flags);
-+ list_for_each_entry(psdev, &pcistub_devices, dev_list) {
-+ if (count >= PAGE_SIZE)
-+ break;
-+ if (!psdev->dev)
-+ continue;
-+ dev_data = pci_get_drvdata(psdev->dev);
-+ if (!dev_data || !dev_data->permissive)
-+ continue;
-+ count +=
-+ scnprintf(buf + count, PAGE_SIZE - count, "%s\n",
-+ pci_name(psdev->dev));
-+ }
-+ spin_unlock_irqrestore(&pcistub_devices_lock, flags);
-+ return count;
-+}
-+
-+DRIVER_ATTR(permissive, S_IRUSR | S_IWUSR, permissive_show, permissive_add);
-+
-+static void pcistub_exit(void)
-+{
-+ driver_remove_file(&pciback_pci_driver.driver, &driver_attr_new_slot);
-+ driver_remove_file(&pciback_pci_driver.driver,
-+ &driver_attr_remove_slot);
-+ driver_remove_file(&pciback_pci_driver.driver, &driver_attr_slots);
-+ driver_remove_file(&pciback_pci_driver.driver, &driver_attr_quirks);
-+ driver_remove_file(&pciback_pci_driver.driver, &driver_attr_permissive);
-+ driver_remove_file(&pciback_pci_driver.driver,
-+ &driver_attr_irq_handlers);
-+ driver_remove_file(&pciback_pci_driver.driver,
-+ &driver_attr_irq_handler_state);
-+ pci_unregister_driver(&pciback_pci_driver);
-+}
-+
-+static int __init pcistub_init(void)
-+{
-+ int pos = 0;
-+ int err = 0;
-+ int domain, bus, slot, func;
-+ int parsed;
-+
-+ if (pci_devs_to_hide && *pci_devs_to_hide) {
-+ do {
-+ parsed = 0;
-+
-+ err = sscanf(pci_devs_to_hide + pos,
-+ " (%x:%x:%x.%x) %n",
-+ &domain, &bus, &slot, &func, &parsed);
-+ if (err != 4) {
-+ domain = 0;
-+ err = sscanf(pci_devs_to_hide + pos,
-+ " (%x:%x.%x) %n",
-+ &bus, &slot, &func, &parsed);
-+ if (err != 3)
-+ goto parse_error;
-+ }
-+
-+ err = pcistub_device_id_add(domain, bus, slot, func);
-+ if (err)
-+ goto out;
-+
-+ /* if parsed<=0, we've reached the end of the string */
-+ pos += parsed;
-+ } while (parsed > 0 && pci_devs_to_hide[pos]);
-+ }
-+
-+ /* If we're the first PCI Device Driver to register, we're the
-+ * first one to get offered PCI devices as they become
-+ * available (and thus we can be the first to grab them)
-+ */
-+ err = pci_register_driver(&pciback_pci_driver);
-+ if (err < 0)
-+ goto out;
-+
-+ err = driver_create_file(&pciback_pci_driver.driver,
-+ &driver_attr_new_slot);
-+ if (!err)
-+ err = driver_create_file(&pciback_pci_driver.driver,
-+ &driver_attr_remove_slot);
-+ if (!err)
-+ err = driver_create_file(&pciback_pci_driver.driver,
-+ &driver_attr_slots);
-+ if (!err)
-+ err = driver_create_file(&pciback_pci_driver.driver,
-+ &driver_attr_quirks);
-+ if (!err)
-+ err = driver_create_file(&pciback_pci_driver.driver,
-+ &driver_attr_permissive);
-+
-+ if (!err)
-+ err = driver_create_file(&pciback_pci_driver.driver,
-+ &driver_attr_irq_handlers);
-+ if (!err)
-+ err = driver_create_file(&pciback_pci_driver.driver,
-+ &driver_attr_irq_handler_state);
-+ if (err)
-+ pcistub_exit();
-+
-+out:
-+ return err;
-+
-+parse_error:
-+ printk(KERN_ERR "pciback: Error parsing pci_devs_to_hide at \"%s\"\n",
-+ pci_devs_to_hide + pos);
-+ return -EINVAL;
-+}
-+
-+#ifndef MODULE
-+/*
-+ * fs_initcall happens before device_initcall
-+ * so pciback *should* get called first (b/c we
-+ * want to suck up any device before other drivers
-+ * get a chance by being the first pci device
-+ * driver to register)
-+ */
-+fs_initcall(pcistub_init);
-+#endif
-+
-+static int __init pciback_init(void)
-+{
-+ int err;
-+
-+ if (!xen_initial_domain())
-+ return -ENODEV;
-+
-+ err = pciback_config_init();
-+ if (err)
-+ return err;
-+
-+#ifdef MODULE
-+ err = pcistub_init();
-+ if (err < 0)
-+ return err;
-+#endif
-+
-+ pcistub_init_devices_late();
-+ err = pciback_xenbus_register();
-+ if (err)
-+ pcistub_exit();
-+
-+ return err;
-+}
-+
-+static void __exit pciback_cleanup(void)
-+{
-+ pciback_xenbus_unregister();
-+ pcistub_exit();
-+}
-+
-+module_init(pciback_init);
-+module_exit(pciback_cleanup);
-+
-+MODULE_LICENSE("Dual BSD/GPL");
-diff --git a/drivers/xen/pciback/pciback.h b/drivers/xen/pciback/pciback.h
-new file mode 100644
-index 0000000..fc31052
---- /dev/null
-+++ b/drivers/xen/pciback/pciback.h
-@@ -0,0 +1,142 @@
-+/*
-+ * PCI Backend Common Data Structures & Function Declarations
-+ *
-+ * Author: Ryan Wilson <hap9@epoch.ncsc.mil>
-+ */
-+#ifndef __XEN_PCIBACK_H__
-+#define __XEN_PCIBACK_H__
-+
-+#include <linux/pci.h>
-+#include <linux/interrupt.h>
-+#include <xen/xenbus.h>
-+#include <linux/list.h>
-+#include <linux/spinlock.h>
-+#include <linux/workqueue.h>
-+#include <asm/atomic.h>
-+#include <xen/interface/io/pciif.h>
-+
-+struct pci_dev_entry {
-+ struct list_head list;
-+ struct pci_dev *dev;
-+};
-+
-+#define _PDEVF_op_active (0)
-+#define PDEVF_op_active (1<<(_PDEVF_op_active))
-+#define _PCIB_op_pending (1)
-+#define PCIB_op_pending (1<<(_PCIB_op_pending))
-+
-+struct pciback_device {
-+ void *pci_dev_data;
-+ spinlock_t dev_lock;
-+
-+ struct xenbus_device *xdev;
-+
-+ struct xenbus_watch be_watch;
-+ u8 be_watching;
-+
-+ int evtchn_irq;
-+
-+ struct xen_pci_sharedinfo *sh_info;
-+
-+ unsigned long flags;
-+
-+ struct work_struct op_work;
-+};
-+
-+struct pciback_dev_data {
-+ struct list_head config_fields;
-+ unsigned int permissive : 1;
-+ unsigned int warned_on_write : 1;
-+ unsigned int enable_intx : 1;
-+ unsigned int isr_on : 1; /* Whether the IRQ handler is installed. */
-+ unsigned int ack_intr : 1; /* .. and ACK-ing */
-+ unsigned long handled;
-+ unsigned int irq; /* Saved in case device transitions to MSI/MSI-X */
-+ char irq_name[0]; /* pciback[000:04:00.0] */
-+};
-+
-+/* Used by XenBus and pciback_ops.c */
-+extern wait_queue_head_t aer_wait_queue;
-+extern struct workqueue_struct *pciback_wq;
-+/* Used by pcistub.c and conf_space_quirks.c */
-+extern struct list_head pciback_quirks;
-+
-+/* Get/Put PCI Devices that are hidden from the PCI Backend Domain */
-+struct pci_dev *pcistub_get_pci_dev_by_slot(struct pciback_device *pdev,
-+ int domain, int bus,
-+ int slot, int func);
-+struct pci_dev *pcistub_get_pci_dev(struct pciback_device *pdev,
-+ struct pci_dev *dev);
-+void pcistub_put_pci_dev(struct pci_dev *dev);
-+
-+/* Ensure a device is turned off or reset */
-+void pciback_reset_device(struct pci_dev *pdev);
-+
-+/* Access a virtual configuration space for a PCI device */
-+int pciback_config_init(void);
-+int pciback_config_init_dev(struct pci_dev *dev);
-+void pciback_config_free_dyn_fields(struct pci_dev *dev);
-+void pciback_config_reset_dev(struct pci_dev *dev);
-+void pciback_config_free_dev(struct pci_dev *dev);
-+int pciback_config_read(struct pci_dev *dev, int offset, int size,
-+ u32 *ret_val);
-+int pciback_config_write(struct pci_dev *dev, int offset, int size, u32 value);
-+
-+/* Handle requests for specific devices from the frontend */
-+typedef int (*publish_pci_dev_cb) (struct pciback_device *pdev,
-+ unsigned int domain, unsigned int bus,
-+ unsigned int devfn, unsigned int devid);
-+typedef int (*publish_pci_root_cb) (struct pciback_device *pdev,
-+ unsigned int domain, unsigned int bus);
-+int pciback_add_pci_dev(struct pciback_device *pdev, struct pci_dev *dev,
-+ int devid, publish_pci_dev_cb publish_cb);
-+void pciback_release_pci_dev(struct pciback_device *pdev, struct pci_dev *dev);
-+struct pci_dev *pciback_get_pci_dev(struct pciback_device *pdev,
-+ unsigned int domain, unsigned int bus,
-+ unsigned int devfn);
-+
-+/**
-+* Add for domain0 PCIE-AER handling. Get guest domain/bus/devfn in pciback
-+* before sending aer request to pcifront, so that guest could identify
-+* device, coopearte with pciback to finish aer recovery job if device driver
-+* has the capability
-+*/
-+
-+int pciback_get_pcifront_dev(struct pci_dev *pcidev,
-+ struct pciback_device *pdev,
-+ unsigned int *domain, unsigned int *bus,
-+ unsigned int *devfn);
-+int pciback_init_devices(struct pciback_device *pdev);
-+int pciback_publish_pci_roots(struct pciback_device *pdev,
-+ publish_pci_root_cb cb);
-+void pciback_release_devices(struct pciback_device *pdev);
-+
-+/* Handles events from front-end */
-+irqreturn_t pciback_handle_event(int irq, void *dev_id);
-+void pciback_do_op(struct work_struct *data);
-+
-+int pciback_xenbus_register(void);
-+void pciback_xenbus_unregister(void);
-+
-+#ifdef CONFIG_PCI_MSI
-+int pciback_enable_msi(struct pciback_device *pdev,
-+ struct pci_dev *dev, struct xen_pci_op *op);
-+
-+int pciback_disable_msi(struct pciback_device *pdev,
-+ struct pci_dev *dev, struct xen_pci_op *op);
-+
-+
-+int pciback_enable_msix(struct pciback_device *pdev,
-+ struct pci_dev *dev, struct xen_pci_op *op);
-+
-+int pciback_disable_msix(struct pciback_device *pdev,
-+ struct pci_dev *dev, struct xen_pci_op *op);
-+#endif
-+extern int verbose_request;
-+
-+void test_and_schedule_op(struct pciback_device *pdev);
-+#endif
-+
-+/* Handles shared IRQs that can to device domain and control domain. */
-+void pciback_irq_handler(struct pci_dev *dev, int reset);
-+irqreturn_t pciback_guest_interrupt(int irq, void *dev_id);
-diff --git a/drivers/xen/pciback/pciback_ops.c b/drivers/xen/pciback/pciback_ops.c
-new file mode 100644
-index 0000000..5543881
---- /dev/null
-+++ b/drivers/xen/pciback/pciback_ops.c
-@@ -0,0 +1,242 @@
-+/*
-+ * PCI Backend Operations - respond to PCI requests from Frontend
-+ *
-+ * Author: Ryan Wilson <hap9@epoch.ncsc.mil>
-+ */
-+#include <linux/module.h>
-+#include <linux/wait.h>
-+#include <linux/bitops.h>
-+#include <xen/events.h>
-+#include <linux/sched.h>
-+#include "pciback.h"
-+
-+int verbose_request;
-+module_param(verbose_request, int, 0644);
-+
-+/* Ensure a device is has the fake IRQ handler "turned on/off" and is
-+ * ready to be exported. This MUST be run after pciback_reset_device
-+ * which does the actual PCI device enable/disable.
-+ */
-+void pciback_control_isr(struct pci_dev *dev, int reset)
-+{
-+ struct pciback_dev_data *dev_data;
-+ int rc;
-+ int enable = 0;
-+
-+ dev_data = pci_get_drvdata(dev);
-+ if (!dev_data)
-+ return;
-+
-+ /* We don't deal with bridges */
-+ if (dev->hdr_type != PCI_HEADER_TYPE_NORMAL)
-+ return;
-+
-+ if (reset) {
-+ dev_data->enable_intx = 0;
-+ dev_data->ack_intr = 0;
-+ }
-+ enable = dev_data->enable_intx;
-+
-+ /* Asked to disable, but ISR isn't runnig */
-+ if (!enable && !dev_data->isr_on)
-+ return;
-+
-+ /* Squirrel away the IRQs in the dev_data. We need this
-+ * b/c when device transitions to MSI, the dev->irq is
-+ * overwritten with the MSI vector.
-+ */
-+ if (enable)
-+ dev_data->irq = dev->irq;
-+
-+ dev_dbg(&dev->dev, "%s: #%d %s %s%s %s-> %s\n",
-+ dev_data->irq_name,
-+ dev_data->irq,
-+ pci_is_enabled(dev) ? "on" : "off",
-+ dev->msi_enabled ? "MSI" : "",
-+ dev->msix_enabled ? "MSI/X" : "",
-+ dev_data->isr_on ? "enable" : "disable",
-+ enable ? "enable" : "disable");
-+
-+ if (enable) {
-+ rc = request_irq(dev_data->irq,
-+ pciback_guest_interrupt, IRQF_SHARED,
-+ dev_data->irq_name, dev);
-+ if (rc) {
-+ dev_err(&dev->dev, "%s: failed to install fake IRQ " \
-+ "handler for IRQ %d! (rc:%d)\n", dev_data->irq_name,
-+ dev_data->irq, rc);
-+ goto out;
-+ }
-+ }
-+ else {
-+ free_irq(dev_data->irq, dev);
-+ dev_data->irq = 0;
-+ }
-+ dev_data->isr_on = enable;
-+ dev_data->ack_intr = enable;
-+out:
-+ dev_dbg(&dev->dev, "%s: #%d %s %s%s %s\n",
-+ dev_data->irq_name,
-+ dev_data->irq,
-+ pci_is_enabled(dev) ? "on" : "off",
-+ dev->msi_enabled ? "MSI" : "",
-+ dev->msix_enabled ? "MSI/X" : "",
-+ enable ? (dev_data->isr_on ? "enabled" : "failed to enable") :
-+ (dev_data->isr_on ? "failed to disable" : "disabled"));
-+}
-+
-+/* Ensure a device is "turned off" and ready to be exported.
-+ * (Also see pciback_config_reset to ensure virtual configuration space is
-+ * ready to be re-exported)
-+ */
-+void pciback_reset_device(struct pci_dev *dev)
-+{
-+ u16 cmd;
-+
-+ pciback_control_isr(dev, 1 /* reset device */);
-+
-+ /* Disable devices (but not bridges) */
-+ if (dev->hdr_type == PCI_HEADER_TYPE_NORMAL) {
-+#ifdef CONFIG_PCI_MSI
-+ /* The guest could have been abruptly killed without
-+ * disabling MSI/MSI-X interrupts.*/
-+ if (dev->msix_enabled)
-+ pci_disable_msix(dev);
-+ if (dev->msi_enabled)
-+ pci_disable_msi(dev);
-+#endif
-+ pci_disable_device(dev);
-+
-+ pci_write_config_word(dev, PCI_COMMAND, 0);
-+
-+ dev->is_busmaster = 0;
-+ } else {
-+ pci_read_config_word(dev, PCI_COMMAND, &cmd);
-+ if (cmd & (PCI_COMMAND_INVALIDATE)) {
-+ cmd &= ~(PCI_COMMAND_INVALIDATE);
-+ pci_write_config_word(dev, PCI_COMMAND, cmd);
-+
-+ dev->is_busmaster = 0;
-+ }
-+ }
-+}
-+/*
-+* Now the same evtchn is used for both pcifront conf_read_write request
-+* as well as pcie aer front end ack. We use a new work_queue to schedule
-+* pciback conf_read_write service for avoiding confict with aer_core
-+* do_recovery job which also use the system default work_queue
-+*/
-+void test_and_schedule_op(struct pciback_device *pdev)
-+{
-+ /* Check that frontend is requesting an operation and that we are not
-+ * already processing a request */
-+ if (test_bit(_XEN_PCIF_active, (unsigned long *)&pdev->sh_info->flags)
-+ && !test_and_set_bit(_PDEVF_op_active, &pdev->flags)) {
-+ queue_work(pciback_wq, &pdev->op_work);
-+ }
-+ /*_XEN_PCIB_active should have been cleared by pcifront. And also make
-+ sure pciback is waiting for ack by checking _PCIB_op_pending*/
-+ if (!test_bit(_XEN_PCIB_active, (unsigned long *)&pdev->sh_info->flags)
-+ && test_bit(_PCIB_op_pending, &pdev->flags)) {
-+ wake_up(&aer_wait_queue);
-+ }
-+}
-+
-+/* Performing the configuration space reads/writes must not be done in atomic
-+ * context because some of the pci_* functions can sleep (mostly due to ACPI
-+ * use of semaphores). This function is intended to be called from a work
-+ * queue in process context taking a struct pciback_device as a parameter */
-+
-+void pciback_do_op(struct work_struct *data)
-+{
-+ struct pciback_device *pdev =
-+ container_of(data, struct pciback_device, op_work);
-+ struct pci_dev *dev;
-+ struct pciback_dev_data *dev_data = NULL;
-+ struct xen_pci_op *op = &pdev->sh_info->op;
-+ int test_intx = 0;
-+
-+ dev = pciback_get_pci_dev(pdev, op->domain, op->bus, op->devfn);
-+
-+ if (dev == NULL)
-+ op->err = XEN_PCI_ERR_dev_not_found;
-+ else {
-+ dev_data = pci_get_drvdata(dev);
-+ if (dev_data)
-+ test_intx = dev_data->enable_intx;
-+ switch (op->cmd) {
-+ case XEN_PCI_OP_conf_read:
-+ op->err = pciback_config_read(dev,
-+ op->offset, op->size, &op->value);
-+ break;
-+ case XEN_PCI_OP_conf_write:
-+ op->err = pciback_config_write(dev,
-+ op->offset, op->size, op->value);
-+ break;
-+#ifdef CONFIG_PCI_MSI
-+ case XEN_PCI_OP_enable_msi:
-+ op->err = pciback_enable_msi(pdev, dev, op);
-+ break;
-+ case XEN_PCI_OP_disable_msi:
-+ op->err = pciback_disable_msi(pdev, dev, op);
-+ break;
-+ case XEN_PCI_OP_enable_msix:
-+ op->err = pciback_enable_msix(pdev, dev, op);
-+ break;
-+ case XEN_PCI_OP_disable_msix:
-+ op->err = pciback_disable_msix(pdev, dev, op);
-+ break;
-+#endif
-+ default:
-+ op->err = XEN_PCI_ERR_not_implemented;
-+ break;
-+ }
-+ }
-+ if (!op->err && dev && dev_data) {
-+ /* Transition detected */
-+ if ((dev_data->enable_intx != test_intx))
-+ pciback_control_isr(dev, 0 /* no reset */);
-+ }
-+ /* Tell the driver domain that we're done. */
-+ wmb();
-+ clear_bit(_XEN_PCIF_active, (unsigned long *)&pdev->sh_info->flags);
-+ notify_remote_via_irq(pdev->evtchn_irq);
-+
-+ /* Mark that we're done. */
-+ smp_mb__before_clear_bit(); /* /after/ clearing PCIF_active */
-+ clear_bit(_PDEVF_op_active, &pdev->flags);
-+ smp_mb__after_clear_bit(); /* /before/ final check for work */
-+
-+ /* Check to see if the driver domain tried to start another request in
-+ * between clearing _XEN_PCIF_active and clearing _PDEVF_op_active.
-+ */
-+ test_and_schedule_op(pdev);
-+}
-+
-+irqreturn_t pciback_handle_event(int irq, void *dev_id)
-+{
-+ struct pciback_device *pdev = dev_id;
-+
-+ test_and_schedule_op(pdev);
-+
-+ return IRQ_HANDLED;
-+}
-+irqreturn_t pciback_guest_interrupt(int irq, void *dev_id)
-+{
-+ struct pci_dev *dev = (struct pci_dev *)dev_id;
-+ struct pciback_dev_data *dev_data = pci_get_drvdata(dev);
-+
-+ if (dev_data->isr_on && dev_data->ack_intr) {
-+ dev_data->handled++;
-+ if ((dev_data->handled % 1000) == 0) {
-+ if (xen_ignore_irq(irq)) {
-+ printk(KERN_INFO "%s IRQ line is not shared "
-+ "with other domains. Turning ISR off\n",
-+ dev_data->irq_name);
-+ dev_data->ack_intr = 0;
-+ }
-+ }
-+ return IRQ_HANDLED;
-+ }
-+ return IRQ_NONE;
-+}
-diff --git a/drivers/xen/pciback/slot.c b/drivers/xen/pciback/slot.c
-new file mode 100644
-index 0000000..efb922d
---- /dev/null
-+++ b/drivers/xen/pciback/slot.c
-@@ -0,0 +1,191 @@
-+/*
-+ * PCI Backend - Provides a Virtual PCI bus (with real devices)
-+ * to the frontend
-+ *
-+ * Author: Ryan Wilson <hap9@epoch.ncsc.mil> (vpci.c)
-+ * Author: Tristan Gingold <tristan.gingold@bull.net>, from vpci.c
-+ */
-+
-+#include <linux/list.h>
-+#include <linux/slab.h>
-+#include <linux/pci.h>
-+#include <linux/spinlock.h>
-+#include "pciback.h"
-+
-+/* There are at most 32 slots in a pci bus. */
-+#define PCI_SLOT_MAX 32
-+
-+#define PCI_BUS_NBR 2
-+
-+struct slot_dev_data {
-+ /* Access to dev_list must be protected by lock */
-+ struct pci_dev *slots[PCI_BUS_NBR][PCI_SLOT_MAX];
-+ spinlock_t lock;
-+};
-+
-+struct pci_dev *pciback_get_pci_dev(struct pciback_device *pdev,
-+ unsigned int domain, unsigned int bus,
-+ unsigned int devfn)
-+{
-+ struct pci_dev *dev = NULL;
-+ struct slot_dev_data *slot_dev = pdev->pci_dev_data;
-+ unsigned long flags;
-+
-+ if (domain != 0 || PCI_FUNC(devfn) != 0)
-+ return NULL;
-+
-+ if (PCI_SLOT(devfn) >= PCI_SLOT_MAX || bus >= PCI_BUS_NBR)
-+ return NULL;
-+
-+ spin_lock_irqsave(&slot_dev->lock, flags);
-+ dev = slot_dev->slots[bus][PCI_SLOT(devfn)];
-+ spin_unlock_irqrestore(&slot_dev->lock, flags);
-+
-+ return dev;
-+}
-+
-+int pciback_add_pci_dev(struct pciback_device *pdev, struct pci_dev *dev,
-+ int devid, publish_pci_dev_cb publish_cb)
-+{
-+ int err = 0, slot, bus;
-+ struct slot_dev_data *slot_dev = pdev->pci_dev_data;
-+ unsigned long flags;
-+
-+ if ((dev->class >> 24) == PCI_BASE_CLASS_BRIDGE) {
-+ err = -EFAULT;
-+ xenbus_dev_fatal(pdev->xdev, err,
-+ "Can't export bridges on the virtual PCI bus");
-+ goto out;
-+ }
-+
-+ spin_lock_irqsave(&slot_dev->lock, flags);
-+
-+ /* Assign to a new slot on the virtual PCI bus */
-+ for (bus = 0; bus < PCI_BUS_NBR; bus++)
-+ for (slot = 0; slot < PCI_SLOT_MAX; slot++) {
-+ if (slot_dev->slots[bus][slot] == NULL) {
-+ printk(KERN_INFO
-+ "pciback: slot: %s: assign to virtual "
-+ "slot %d, bus %d\n",
-+ pci_name(dev), slot, bus);
-+ slot_dev->slots[bus][slot] = dev;
-+ goto unlock;
-+ }
-+ }
-+
-+ err = -ENOMEM;
-+ xenbus_dev_fatal(pdev->xdev, err,
-+ "No more space on root virtual PCI bus");
-+
-+unlock:
-+ spin_unlock_irqrestore(&slot_dev->lock, flags);
-+
-+ /* Publish this device. */
-+ if (!err)
-+ err = publish_cb(pdev, 0, 0, PCI_DEVFN(slot, 0), devid);
-+
-+out:
-+ return err;
-+}
-+
-+void pciback_release_pci_dev(struct pciback_device *pdev, struct pci_dev *dev)
-+{
-+ int slot, bus;
-+ struct slot_dev_data *slot_dev = pdev->pci_dev_data;
-+ struct pci_dev *found_dev = NULL;
-+ unsigned long flags;
-+
-+ spin_lock_irqsave(&slot_dev->lock, flags);
-+
-+ for (bus = 0; bus < PCI_BUS_NBR; bus++)
-+ for (slot = 0; slot < PCI_SLOT_MAX; slot++) {
-+ if (slot_dev->slots[bus][slot] == dev) {
-+ slot_dev->slots[bus][slot] = NULL;
-+ found_dev = dev;
-+ goto out;
-+ }
-+ }
-+
-+out:
-+ spin_unlock_irqrestore(&slot_dev->lock, flags);
-+
-+ if (found_dev)
-+ pcistub_put_pci_dev(found_dev);
-+}
-+
-+int pciback_init_devices(struct pciback_device *pdev)
-+{
-+ int slot, bus;
-+ struct slot_dev_data *slot_dev;
-+
-+ slot_dev = kmalloc(sizeof(*slot_dev), GFP_KERNEL);
-+ if (!slot_dev)
-+ return -ENOMEM;
-+
-+ spin_lock_init(&slot_dev->lock);
-+
-+ for (bus = 0; bus < PCI_BUS_NBR; bus++)
-+ for (slot = 0; slot < PCI_SLOT_MAX; slot++)
-+ slot_dev->slots[bus][slot] = NULL;
-+
-+ pdev->pci_dev_data = slot_dev;
-+
-+ return 0;
-+}
-+
-+int pciback_publish_pci_roots(struct pciback_device *pdev,
-+ publish_pci_root_cb publish_cb)
-+{
-+ /* The Virtual PCI bus has only one root */
-+ return publish_cb(pdev, 0, 0);
-+}
-+
-+void pciback_release_devices(struct pciback_device *pdev)
-+{
-+ int slot, bus;
-+ struct slot_dev_data *slot_dev = pdev->pci_dev_data;
-+ struct pci_dev *dev;
-+
-+ for (bus = 0; bus < PCI_BUS_NBR; bus++)
-+ for (slot = 0; slot < PCI_SLOT_MAX; slot++) {
-+ dev = slot_dev->slots[bus][slot];
-+ if (dev != NULL)
-+ pcistub_put_pci_dev(dev);
-+ }
-+
-+ kfree(slot_dev);
-+ pdev->pci_dev_data = NULL;
-+}
-+
-+int pciback_get_pcifront_dev(struct pci_dev *pcidev,
-+ struct pciback_device *pdev,
-+ unsigned int *domain, unsigned int *bus,
-+ unsigned int *devfn)
-+{
-+ int slot, busnr;
-+ struct slot_dev_data *slot_dev = pdev->pci_dev_data;
-+ struct pci_dev *dev;
-+ int found = 0;
-+ unsigned long flags;
-+
-+ spin_lock_irqsave(&slot_dev->lock, flags);
-+
-+ for (busnr = 0; busnr < PCI_BUS_NBR; bus++)
-+ for (slot = 0; slot < PCI_SLOT_MAX; slot++) {
-+ dev = slot_dev->slots[busnr][slot];
-+ if (dev && dev->bus->number == pcidev->bus->number
-+ && dev->devfn == pcidev->devfn
-+ && pci_domain_nr(dev->bus) ==
-+ pci_domain_nr(pcidev->bus)) {
-+ found = 1;
-+ *domain = 0;
-+ *bus = busnr;
-+ *devfn = PCI_DEVFN(slot, 0);
-+ goto out;
-+ }
-+ }
-+out:
-+ spin_unlock_irqrestore(&slot_dev->lock, flags);
-+ return found;
-+
-+}
-diff --git a/drivers/xen/pciback/vpci.c b/drivers/xen/pciback/vpci.c
-new file mode 100644
-index 0000000..2857ab8
---- /dev/null
-+++ b/drivers/xen/pciback/vpci.c
-@@ -0,0 +1,244 @@
-+/*
-+ * PCI Backend - Provides a Virtual PCI bus (with real devices)
-+ * to the frontend
-+ *
-+ * Author: Ryan Wilson <hap9@epoch.ncsc.mil>
-+ */
-+
-+#include <linux/list.h>
-+#include <linux/slab.h>
-+#include <linux/pci.h>
-+#include <linux/spinlock.h>
-+#include "pciback.h"
-+
-+#define PCI_SLOT_MAX 32
-+
-+struct vpci_dev_data {
-+ /* Access to dev_list must be protected by lock */
-+ struct list_head dev_list[PCI_SLOT_MAX];
-+ spinlock_t lock;
-+};
-+
-+static inline struct list_head *list_first(struct list_head *head)
-+{
-+ return head->next;
-+}
-+
-+struct pci_dev *pciback_get_pci_dev(struct pciback_device *pdev,
-+ unsigned int domain, unsigned int bus,
-+ unsigned int devfn)
-+{
-+ struct pci_dev_entry *entry;
-+ struct pci_dev *dev = NULL;
-+ struct vpci_dev_data *vpci_dev = pdev->pci_dev_data;
-+ unsigned long flags;
-+
-+ if (domain != 0 || bus != 0)
-+ return NULL;
-+
-+ if (PCI_SLOT(devfn) < PCI_SLOT_MAX) {
-+ spin_lock_irqsave(&vpci_dev->lock, flags);
-+
-+ list_for_each_entry(entry,
-+ &vpci_dev->dev_list[PCI_SLOT(devfn)],
-+ list) {
-+ if (PCI_FUNC(entry->dev->devfn) == PCI_FUNC(devfn)) {
-+ dev = entry->dev;
-+ break;
-+ }
-+ }
-+
-+ spin_unlock_irqrestore(&vpci_dev->lock, flags);
-+ }
-+ return dev;
-+}
-+
-+static inline int match_slot(struct pci_dev *l, struct pci_dev *r)
-+{
-+ if (pci_domain_nr(l->bus) == pci_domain_nr(r->bus)
-+ && l->bus == r->bus && PCI_SLOT(l->devfn) == PCI_SLOT(r->devfn))
-+ return 1;
-+
-+ return 0;
-+}
-+
-+int pciback_add_pci_dev(struct pciback_device *pdev, struct pci_dev *dev,
-+ int devid, publish_pci_dev_cb publish_cb)
-+{
-+ int err = 0, slot, func = -1;
-+ struct pci_dev_entry *t, *dev_entry;
-+ struct vpci_dev_data *vpci_dev = pdev->pci_dev_data;
-+ unsigned long flags;
-+
-+ if ((dev->class >> 24) == PCI_BASE_CLASS_BRIDGE) {
-+ err = -EFAULT;
-+ xenbus_dev_fatal(pdev->xdev, err,
-+ "Can't export bridges on the virtual PCI bus");
-+ goto out;
-+ }
-+
-+ dev_entry = kmalloc(sizeof(*dev_entry), GFP_KERNEL);
-+ if (!dev_entry) {
-+ err = -ENOMEM;
-+ xenbus_dev_fatal(pdev->xdev, err,
-+ "Error adding entry to virtual PCI bus");
-+ goto out;
-+ }
-+
-+ dev_entry->dev = dev;
-+
-+ spin_lock_irqsave(&vpci_dev->lock, flags);
-+
-+ /* Keep multi-function devices together on the virtual PCI bus */
-+ for (slot = 0; slot < PCI_SLOT_MAX; slot++) {
-+ if (!list_empty(&vpci_dev->dev_list[slot])) {
-+ t = list_entry(list_first(&vpci_dev->dev_list[slot]),
-+ struct pci_dev_entry, list);
-+
-+ if (match_slot(dev, t->dev)) {
-+ pr_info("pciback: vpci: %s: "
-+ "assign to virtual slot %d func %d\n",
-+ pci_name(dev), slot,
-+ PCI_FUNC(dev->devfn));
-+ list_add_tail(&dev_entry->list,
-+ &vpci_dev->dev_list[slot]);
-+ func = PCI_FUNC(dev->devfn);
-+ goto unlock;
-+ }
-+ }
-+ }
-+
-+ /* Assign to a new slot on the virtual PCI bus */
-+ for (slot = 0; slot < PCI_SLOT_MAX; slot++) {
-+ if (list_empty(&vpci_dev->dev_list[slot])) {
-+ printk(KERN_INFO
-+ "pciback: vpci: %s: assign to virtual slot %d\n",
-+ pci_name(dev), slot);
-+ list_add_tail(&dev_entry->list,
-+ &vpci_dev->dev_list[slot]);
-+ func = PCI_FUNC(dev->devfn);
-+ goto unlock;
-+ }
-+ }
-+
-+ err = -ENOMEM;
-+ xenbus_dev_fatal(pdev->xdev, err,
-+ "No more space on root virtual PCI bus");
-+
-+unlock:
-+ spin_unlock_irqrestore(&vpci_dev->lock, flags);
-+
-+ /* Publish this device. */
-+ if (!err)
-+ err = publish_cb(pdev, 0, 0, PCI_DEVFN(slot, func), devid);
-+
-+out:
-+ return err;
-+}
-+
-+void pciback_release_pci_dev(struct pciback_device *pdev, struct pci_dev *dev)
-+{
-+ int slot;
-+ struct vpci_dev_data *vpci_dev = pdev->pci_dev_data;
-+ struct pci_dev *found_dev = NULL;
-+ unsigned long flags;
-+
-+ spin_lock_irqsave(&vpci_dev->lock, flags);
-+
-+ for (slot = 0; slot < PCI_SLOT_MAX; slot++) {
-+ struct pci_dev_entry *e, *tmp;
-+ list_for_each_entry_safe(e, tmp, &vpci_dev->dev_list[slot],
-+ list) {
-+ if (e->dev == dev) {
-+ list_del(&e->list);
-+ found_dev = e->dev;
-+ kfree(e);
-+ goto out;
-+ }
-+ }
-+ }
-+
-+out:
-+ spin_unlock_irqrestore(&vpci_dev->lock, flags);
-+
-+ if (found_dev)
-+ pcistub_put_pci_dev(found_dev);
-+}
-+
-+int pciback_init_devices(struct pciback_device *pdev)
-+{
-+ int slot;
-+ struct vpci_dev_data *vpci_dev;
-+
-+ vpci_dev = kmalloc(sizeof(*vpci_dev), GFP_KERNEL);
-+ if (!vpci_dev)
-+ return -ENOMEM;
-+
-+ spin_lock_init(&vpci_dev->lock);
-+
-+ for (slot = 0; slot < PCI_SLOT_MAX; slot++)
-+ INIT_LIST_HEAD(&vpci_dev->dev_list[slot]);
-+
-+ pdev->pci_dev_data = vpci_dev;
-+
-+ return 0;
-+}
-+
-+int pciback_publish_pci_roots(struct pciback_device *pdev,
-+ publish_pci_root_cb publish_cb)
-+{
-+ /* The Virtual PCI bus has only one root */
-+ return publish_cb(pdev, 0, 0);
-+}
-+
-+void pciback_release_devices(struct pciback_device *pdev)
-+{
-+ int slot;
-+ struct vpci_dev_data *vpci_dev = pdev->pci_dev_data;
-+
-+ for (slot = 0; slot < PCI_SLOT_MAX; slot++) {
-+ struct pci_dev_entry *e, *tmp;
-+ list_for_each_entry_safe(e, tmp, &vpci_dev->dev_list[slot],
-+ list) {
-+ list_del(&e->list);
-+ pcistub_put_pci_dev(e->dev);
-+ kfree(e);
-+ }
-+ }
-+
-+ kfree(vpci_dev);
-+ pdev->pci_dev_data = NULL;
-+}
-+
-+int pciback_get_pcifront_dev(struct pci_dev *pcidev,
-+ struct pciback_device *pdev,
-+ unsigned int *domain, unsigned int *bus,
-+ unsigned int *devfn)
-+{
-+ struct pci_dev_entry *entry;
-+ struct pci_dev *dev = NULL;
-+ struct vpci_dev_data *vpci_dev = pdev->pci_dev_data;
-+ unsigned long flags;
-+ int found = 0, slot;
-+
-+ spin_lock_irqsave(&vpci_dev->lock, flags);
-+ for (slot = 0; slot < PCI_SLOT_MAX; slot++) {
-+ list_for_each_entry(entry,
-+ &vpci_dev->dev_list[slot],
-+ list) {
-+ dev = entry->dev;
-+ if (dev && dev->bus->number == pcidev->bus->number
-+ && pci_domain_nr(dev->bus) ==
-+ pci_domain_nr(pcidev->bus)
-+ && dev->devfn == pcidev->devfn) {
-+ found = 1;
-+ *domain = 0;
-+ *bus = 0;
-+ *devfn = PCI_DEVFN(slot,
-+ PCI_FUNC(pcidev->devfn));
-+ }
-+ }
-+ }
-+ spin_unlock_irqrestore(&vpci_dev->lock, flags);
-+ return found;
-+}
-diff --git a/drivers/xen/pciback/xenbus.c b/drivers/xen/pciback/xenbus.c
-new file mode 100644
-index 0000000..f0d5426
---- /dev/null
-+++ b/drivers/xen/pciback/xenbus.c
-@@ -0,0 +1,730 @@
-+/*
-+ * PCI Backend Xenbus Setup - handles setup with frontend and xend
-+ *
-+ * Author: Ryan Wilson <hap9@epoch.ncsc.mil>
-+ */
-+#include <linux/module.h>
-+#include <linux/init.h>
-+#include <linux/list.h>
-+#include <linux/vmalloc.h>
-+#include <linux/workqueue.h>
-+#include <xen/xenbus.h>
-+#include <xen/events.h>
-+#include <asm/xen/pci.h>
-+#include <linux/workqueue.h>
-+#include "pciback.h"
-+
-+#define INVALID_EVTCHN_IRQ (-1)
-+struct workqueue_struct *pciback_wq;
-+
-+static struct pciback_device *alloc_pdev(struct xenbus_device *xdev)
-+{
-+ struct pciback_device *pdev;
-+
-+ pdev = kzalloc(sizeof(struct pciback_device), GFP_KERNEL);
-+ if (pdev == NULL)
-+ goto out;
-+ dev_dbg(&xdev->dev, "allocated pdev @ 0x%p\n", pdev);
-+
-+ pdev->xdev = xdev;
-+ dev_set_drvdata(&xdev->dev, pdev);
-+
-+ spin_lock_init(&pdev->dev_lock);
-+
-+ pdev->sh_info = NULL;
-+ pdev->evtchn_irq = INVALID_EVTCHN_IRQ;
-+ pdev->be_watching = 0;
-+
-+ INIT_WORK(&pdev->op_work, pciback_do_op);
-+
-+ if (pciback_init_devices(pdev)) {
-+ kfree(pdev);
-+ pdev = NULL;
-+ }
-+out:
-+ return pdev;
-+}
-+
-+static void pciback_disconnect(struct pciback_device *pdev)
-+{
-+ spin_lock(&pdev->dev_lock);
-+
-+ /* Ensure the guest can't trigger our handler before removing devices */
-+ if (pdev->evtchn_irq != INVALID_EVTCHN_IRQ) {
-+ unbind_from_irqhandler(pdev->evtchn_irq, pdev);
-+ pdev->evtchn_irq = INVALID_EVTCHN_IRQ;
-+ }
-+ spin_unlock(&pdev->dev_lock);
-+
-+ /* If the driver domain started an op, make sure we complete it
-+ * before releasing the shared memory */
-+
-+ /* Note, the workqueue does not use spinlocks at all.*/
-+ flush_workqueue(pciback_wq);
-+
-+ spin_lock(&pdev->dev_lock);
-+ if (pdev->sh_info != NULL) {
-+ xenbus_unmap_ring_vfree(pdev->xdev, pdev->sh_info);
-+ pdev->sh_info = NULL;
-+ }
-+ spin_unlock(&pdev->dev_lock);
-+
-+}
-+
-+static void free_pdev(struct pciback_device *pdev)
-+{
-+ spin_lock(&pdev->dev_lock);
-+ if (pdev->be_watching) {
-+ unregister_xenbus_watch(&pdev->be_watch);
-+ pdev->be_watching = 0;
-+ }
-+ spin_unlock(&pdev->dev_lock);
-+
-+ pciback_disconnect(pdev);
-+
-+ pciback_release_devices(pdev);
-+
-+ dev_set_drvdata(&pdev->xdev->dev, NULL);
-+ pdev->xdev = NULL;
-+
-+ kfree(pdev);
-+}
-+
-+static int pciback_do_attach(struct pciback_device *pdev, int gnt_ref,
-+ int remote_evtchn)
-+{
-+ int err = 0;
-+ void *vaddr;
-+
-+ dev_dbg(&pdev->xdev->dev,
-+ "Attaching to frontend resources - gnt_ref=%d evtchn=%d\n",
-+ gnt_ref, remote_evtchn);
-+
-+ err = xenbus_map_ring_valloc(pdev->xdev, gnt_ref, &vaddr);
-+ if (err < 0) {
-+ xenbus_dev_fatal(pdev->xdev, err,
-+ "Error mapping other domain page in ours.");
-+ goto out;
-+ }
-+
-+ spin_lock(&pdev->dev_lock);
-+ pdev->sh_info = vaddr;
-+ spin_unlock(&pdev->dev_lock);
-+
-+ err = bind_interdomain_evtchn_to_irqhandler(
-+ pdev->xdev->otherend_id, remote_evtchn, pciback_handle_event,
-+ 0, "pciback", pdev);
-+ if (err < 0) {
-+ xenbus_dev_fatal(pdev->xdev, err,
-+ "Error binding event channel to IRQ");
-+ goto out;
-+ }
-+
-+ spin_lock(&pdev->dev_lock);
-+ pdev->evtchn_irq = err;
-+ spin_unlock(&pdev->dev_lock);
-+ err = 0;
-+
-+ dev_dbg(&pdev->xdev->dev, "Attached!\n");
-+out:
-+ return err;
-+}
-+
-+static int pciback_attach(struct pciback_device *pdev)
-+{
-+ int err = 0;
-+ int gnt_ref, remote_evtchn;
-+ char *magic = NULL;
-+
-+
-+ /* Make sure we only do this setup once */
-+ if (xenbus_read_driver_state(pdev->xdev->nodename) !=
-+ XenbusStateInitialised)
-+ goto out;
-+
-+ /* Wait for frontend to state that it has published the configuration */
-+ if (xenbus_read_driver_state(pdev->xdev->otherend) !=
-+ XenbusStateInitialised)
-+ goto out;
-+
-+ dev_dbg(&pdev->xdev->dev, "Reading frontend config\n");
-+
-+ err = xenbus_gather(XBT_NIL, pdev->xdev->otherend,
-+ "pci-op-ref", "%u", &gnt_ref,
-+ "event-channel", "%u", &remote_evtchn,
-+ "magic", NULL, &magic, NULL);
-+ if (err) {
-+ /* If configuration didn't get read correctly, wait longer */
-+ xenbus_dev_fatal(pdev->xdev, err,
-+ "Error reading configuration from frontend");
-+ goto out;
-+ }
-+
-+ if (magic == NULL || strcmp(magic, XEN_PCI_MAGIC) != 0) {
-+ xenbus_dev_fatal(pdev->xdev, -EFAULT,
-+ "version mismatch (%s/%s) with pcifront - "
-+ "halting pciback",
-+ magic, XEN_PCI_MAGIC);
-+ goto out;
-+ }
-+
-+ err = pciback_do_attach(pdev, gnt_ref, remote_evtchn);
-+ if (err)
-+ goto out;
-+
-+ dev_dbg(&pdev->xdev->dev, "Connecting...\n");
-+
-+ err = xenbus_switch_state(pdev->xdev, XenbusStateConnected);
-+ if (err)
-+ xenbus_dev_fatal(pdev->xdev, err,
-+ "Error switching to connected state!");
-+
-+ dev_dbg(&pdev->xdev->dev, "Connected? %d\n", err);
-+out:
-+
-+ kfree(magic);
-+
-+ return err;
-+}
-+
-+static int pciback_publish_pci_dev(struct pciback_device *pdev,
-+ unsigned int domain, unsigned int bus,
-+ unsigned int devfn, unsigned int devid)
-+{
-+ int err;
-+ int len;
-+ char str[64];
-+
-+ len = snprintf(str, sizeof(str), "vdev-%d", devid);
-+ if (unlikely(len >= (sizeof(str) - 1))) {
-+ err = -ENOMEM;
-+ goto out;
-+ }
-+
-+ err = xenbus_printf(XBT_NIL, pdev->xdev->nodename, str,
-+ "%04x:%02x:%02x.%02x", domain, bus,
-+ PCI_SLOT(devfn), PCI_FUNC(devfn));
-+
-+out:
-+ return err;
-+}
-+
-+static int pciback_export_device(struct pciback_device *pdev,
-+ int domain, int bus, int slot, int func,
-+ int devid)
-+{
-+ struct pci_dev *dev;
-+ int err = 0;
-+
-+ dev_dbg(&pdev->xdev->dev, "exporting dom %x bus %x slot %x func %x\n",
-+ domain, bus, slot, func);
-+
-+ dev = pcistub_get_pci_dev_by_slot(pdev, domain, bus, slot, func);
-+ if (!dev) {
-+ err = -EINVAL;
-+ xenbus_dev_fatal(pdev->xdev, err,
-+ "Couldn't locate PCI device "
-+ "(%04x:%02x:%02x.%01x)! "
-+ "perhaps already in-use?",
-+ domain, bus, slot, func);
-+ goto out;
-+ }
-+
-+ err = pciback_add_pci_dev(pdev, dev, devid, pciback_publish_pci_dev);
-+ if (err)
-+ goto out;
-+
-+ dev_dbg(&dev->dev, "registering for %d\n", pdev->xdev->otherend_id);
-+ if (xen_register_device_domain_owner(dev,
-+ pdev->xdev->otherend_id) != 0) {
-+ dev_err(&dev->dev, "device has been assigned to another " \
-+ "domain! Over-writting the ownership, but beware.\n");
-+ xen_unregister_device_domain_owner(dev);
-+ xen_register_device_domain_owner(dev, pdev->xdev->otherend_id);
-+ }
-+
-+ /* TODO: It'd be nice to export a bridge and have all of its children
-+ * get exported with it. This may be best done in xend (which will
-+ * have to calculate resource usage anyway) but we probably want to
-+ * put something in here to ensure that if a bridge gets given to a
-+ * driver domain, that all devices under that bridge are not given
-+ * to other driver domains (as he who controls the bridge can disable
-+ * it and stop the other devices from working).
-+ */
-+out:
-+ return err;
-+}
-+
-+static int pciback_remove_device(struct pciback_device *pdev,
-+ int domain, int bus, int slot, int func)
-+{
-+ int err = 0;
-+ struct pci_dev *dev;
-+
-+ dev_dbg(&pdev->xdev->dev, "removing dom %x bus %x slot %x func %x\n",
-+ domain, bus, slot, func);
-+
-+ dev = pciback_get_pci_dev(pdev, domain, bus, PCI_DEVFN(slot, func));
-+ if (!dev) {
-+ err = -EINVAL;
-+ dev_dbg(&pdev->xdev->dev, "Couldn't locate PCI device "
-+ "(%04x:%02x:%02x.%01x)! not owned by this domain\n",
-+ domain, bus, slot, func);
-+ goto out;
-+ }
-+
-+ dev_dbg(&dev->dev, "unregistering for %d\n", pdev->xdev->otherend_id);
-+ xen_unregister_device_domain_owner(dev);
-+
-+ pciback_release_pci_dev(pdev, dev);
-+
-+out:
-+ return err;
-+}
-+
-+static int pciback_publish_pci_root(struct pciback_device *pdev,
-+ unsigned int domain, unsigned int bus)
-+{
-+ unsigned int d, b;
-+ int i, root_num, len, err;
-+ char str[64];
-+
-+ dev_dbg(&pdev->xdev->dev, "Publishing pci roots\n");
-+
-+ err = xenbus_scanf(XBT_NIL, pdev->xdev->nodename,
-+ "root_num", "%d", &root_num);
-+ if (err == 0 || err == -ENOENT)
-+ root_num = 0;
-+ else if (err < 0)
-+ goto out;
-+
-+ /* Verify that we haven't already published this pci root */
-+ for (i = 0; i < root_num; i++) {
-+ len = snprintf(str, sizeof(str), "root-%d", i);
-+ if (unlikely(len >= (sizeof(str) - 1))) {
-+ err = -ENOMEM;
-+ goto out;
-+ }
-+
-+ err = xenbus_scanf(XBT_NIL, pdev->xdev->nodename,
-+ str, "%x:%x", &d, &b);
-+ if (err < 0)
-+ goto out;
-+ if (err != 2) {
-+ err = -EINVAL;
-+ goto out;
-+ }
-+
-+ if (d == domain && b == bus) {
-+ err = 0;
-+ goto out;
-+ }
-+ }
-+
-+ len = snprintf(str, sizeof(str), "root-%d", root_num);
-+ if (unlikely(len >= (sizeof(str) - 1))) {
-+ err = -ENOMEM;
-+ goto out;
-+ }
-+
-+ dev_dbg(&pdev->xdev->dev, "writing root %d at %04x:%02x\n",
-+ root_num, domain, bus);
-+
-+ err = xenbus_printf(XBT_NIL, pdev->xdev->nodename, str,
-+ "%04x:%02x", domain, bus);
-+ if (err)
-+ goto out;
-+
-+ err = xenbus_printf(XBT_NIL, pdev->xdev->nodename,
-+ "root_num", "%d", (root_num + 1));
-+
-+out:
-+ return err;
-+}
-+
-+static int pciback_reconfigure(struct pciback_device *pdev)
-+{
-+ int err = 0;
-+ int num_devs;
-+ int domain, bus, slot, func;
-+ int substate;
-+ int i, len;
-+ char state_str[64];
-+ char dev_str[64];
-+
-+
-+ dev_dbg(&pdev->xdev->dev, "Reconfiguring device ...\n");
-+
-+ /* Make sure we only reconfigure once */
-+ if (xenbus_read_driver_state(pdev->xdev->nodename) !=
-+ XenbusStateReconfiguring)
-+ goto out;
-+
-+ err = xenbus_scanf(XBT_NIL, pdev->xdev->nodename, "num_devs", "%d",
-+ &num_devs);
-+ if (err != 1) {
-+ if (err >= 0)
-+ err = -EINVAL;
-+ xenbus_dev_fatal(pdev->xdev, err,
-+ "Error reading number of devices");
-+ goto out;
-+ }
-+
-+ for (i = 0; i < num_devs; i++) {
-+ len = snprintf(state_str, sizeof(state_str), "state-%d", i);
-+ if (unlikely(len >= (sizeof(state_str) - 1))) {
-+ err = -ENOMEM;
-+ xenbus_dev_fatal(pdev->xdev, err,
-+ "String overflow while reading "
-+ "configuration");
-+ goto out;
-+ }
-+ err = xenbus_scanf(XBT_NIL, pdev->xdev->nodename, state_str,
-+ "%d", &substate);
-+ if (err != 1)
-+ substate = XenbusStateUnknown;
-+
-+ switch (substate) {
-+ case XenbusStateInitialising:
-+ dev_dbg(&pdev->xdev->dev, "Attaching dev-%d ...\n", i);
-+
-+ len = snprintf(dev_str, sizeof(dev_str), "dev-%d", i);
-+ if (unlikely(len >= (sizeof(dev_str) - 1))) {
-+ err = -ENOMEM;
-+ xenbus_dev_fatal(pdev->xdev, err,
-+ "String overflow while "
-+ "reading configuration");
-+ goto out;
-+ }
-+ err = xenbus_scanf(XBT_NIL, pdev->xdev->nodename,
-+ dev_str, "%x:%x:%x.%x",
-+ &domain, &bus, &slot, &func);
-+ if (err < 0) {
-+ xenbus_dev_fatal(pdev->xdev, err,
-+ "Error reading device "
-+ "configuration");
-+ goto out;
-+ }
-+ if (err != 4) {
-+ err = -EINVAL;
-+ xenbus_dev_fatal(pdev->xdev, err,
-+ "Error parsing pci device "
-+ "configuration");
-+ goto out;
-+ }
-+
-+ err = pciback_export_device(pdev, domain, bus, slot,
-+ func, i);
-+ if (err)
-+ goto out;
-+
-+ /* Publish pci roots. */
-+ err = pciback_publish_pci_roots(pdev,
-+ pciback_publish_pci_root);
-+ if (err) {
-+ xenbus_dev_fatal(pdev->xdev, err,
-+ "Error while publish PCI root"
-+ "buses for frontend");
-+ goto out;
-+ }
-+
-+ err = xenbus_printf(XBT_NIL, pdev->xdev->nodename,
-+ state_str, "%d",
-+ XenbusStateInitialised);
-+ if (err) {
-+ xenbus_dev_fatal(pdev->xdev, err,
-+ "Error switching substate of "
-+ "dev-%d\n", i);
-+ goto out;
-+ }
-+ break;
-+
-+ case XenbusStateClosing:
-+ dev_dbg(&pdev->xdev->dev, "Detaching dev-%d ...\n", i);
-+
-+ len = snprintf(dev_str, sizeof(dev_str), "vdev-%d", i);
-+ if (unlikely(len >= (sizeof(dev_str) - 1))) {
-+ err = -ENOMEM;
-+ xenbus_dev_fatal(pdev->xdev, err,
-+ "String overflow while "
-+ "reading configuration");
-+ goto out;
-+ }
-+ err = xenbus_scanf(XBT_NIL, pdev->xdev->nodename,
-+ dev_str, "%x:%x:%x.%x",
-+ &domain, &bus, &slot, &func);
-+ if (err < 0) {
-+ xenbus_dev_fatal(pdev->xdev, err,
-+ "Error reading device "
-+ "configuration");
-+ goto out;
-+ }
-+ if (err != 4) {
-+ err = -EINVAL;
-+ xenbus_dev_fatal(pdev->xdev, err,
-+ "Error parsing pci device "
-+ "configuration");
-+ goto out;
-+ }
-+
-+ err = pciback_remove_device(pdev, domain, bus, slot,
-+ func);
-+ if (err)
-+ goto out;
-+
-+ /* TODO: If at some point we implement support for pci
-+ * root hot-remove on pcifront side, we'll need to
-+ * remove unnecessary xenstore nodes of pci roots here.
-+ */
-+
-+ break;
-+
-+ default:
-+ break;
-+ }
-+ }
-+
-+ err = xenbus_switch_state(pdev->xdev, XenbusStateReconfigured);
-+ if (err) {
-+ xenbus_dev_fatal(pdev->xdev, err,
-+ "Error switching to reconfigured state!");
-+ goto out;
-+ }
-+
-+out:
-+ return 0;
-+}
-+
-+static void pciback_frontend_changed(struct xenbus_device *xdev,
-+ enum xenbus_state fe_state)
-+{
-+ struct pciback_device *pdev = dev_get_drvdata(&xdev->dev);
-+
-+ dev_dbg(&xdev->dev, "fe state changed %d\n", fe_state);
-+
-+ switch (fe_state) {
-+ case XenbusStateInitialised:
-+ pciback_attach(pdev);
-+ break;
-+
-+ case XenbusStateReconfiguring:
-+ pciback_reconfigure(pdev);
-+ break;
-+
-+ case XenbusStateConnected:
-+ /* pcifront switched its state from reconfiguring to connected.
-+ * Then switch to connected state.
-+ */
-+ xenbus_switch_state(xdev, XenbusStateConnected);
-+ break;
-+
-+ case XenbusStateClosing:
-+ pciback_disconnect(pdev);
-+ xenbus_switch_state(xdev, XenbusStateClosing);
-+ break;
-+
-+ case XenbusStateClosed:
-+ pciback_disconnect(pdev);
-+ xenbus_switch_state(xdev, XenbusStateClosed);
-+ if (xenbus_dev_is_online(xdev))
-+ break;
-+ /* fall through if not online */
-+ case XenbusStateUnknown:
-+ dev_dbg(&xdev->dev, "frontend is gone! unregister device\n");
-+ device_unregister(&xdev->dev);
-+ break;
-+
-+ default:
-+ break;
-+ }
-+}
-+
-+static int pciback_setup_backend(struct pciback_device *pdev)
-+{
-+ /* Get configuration from xend (if available now) */
-+ int domain, bus, slot, func;
-+ int err = 0;
-+ int i, num_devs;
-+ char dev_str[64];
-+ char state_str[64];
-+
-+ /* It's possible we could get the call to setup twice, so make sure
-+ * we're not already connected.
-+ */
-+ if (xenbus_read_driver_state(pdev->xdev->nodename) !=
-+ XenbusStateInitWait)
-+ goto out;
-+
-+ dev_dbg(&pdev->xdev->dev, "getting be setup\n");
-+
-+ err = xenbus_scanf(XBT_NIL, pdev->xdev->nodename, "num_devs", "%d",
-+ &num_devs);
-+ if (err != 1) {
-+ if (err >= 0)
-+ err = -EINVAL;
-+ xenbus_dev_fatal(pdev->xdev, err,
-+ "Error reading number of devices");
-+ goto out;
-+ }
-+
-+ for (i = 0; i < num_devs; i++) {
-+ int l = snprintf(dev_str, sizeof(dev_str), "dev-%d", i);
-+ if (unlikely(l >= (sizeof(dev_str) - 1))) {
-+ err = -ENOMEM;
-+ xenbus_dev_fatal(pdev->xdev, err,
-+ "String overflow while reading "
-+ "configuration");
-+ goto out;
-+ }
-+
-+ err = xenbus_scanf(XBT_NIL, pdev->xdev->nodename, dev_str,
-+ "%x:%x:%x.%x", &domain, &bus, &slot, &func);
-+ if (err < 0) {
-+ xenbus_dev_fatal(pdev->xdev, err,
-+ "Error reading device configuration");
-+ goto out;
-+ }
-+ if (err != 4) {
-+ err = -EINVAL;
-+ xenbus_dev_fatal(pdev->xdev, err,
-+ "Error parsing pci device "
-+ "configuration");
-+ goto out;
-+ }
-+
-+ err = pciback_export_device(pdev, domain, bus, slot, func, i);
-+ if (err)
-+ goto out;
-+
-+ /* Switch substate of this device. */
-+ l = snprintf(state_str, sizeof(state_str), "state-%d", i);
-+ if (unlikely(l >= (sizeof(state_str) - 1))) {
-+ err = -ENOMEM;
-+ xenbus_dev_fatal(pdev->xdev, err,
-+ "String overflow while reading "
-+ "configuration");
-+ goto out;
-+ }
-+ err = xenbus_printf(XBT_NIL, pdev->xdev->nodename, state_str,
-+ "%d", XenbusStateInitialised);
-+ if (err) {
-+ xenbus_dev_fatal(pdev->xdev, err, "Error switching "
-+ "substate of dev-%d\n", i);
-+ goto out;
-+ }
-+ }
-+
-+ err = pciback_publish_pci_roots(pdev, pciback_publish_pci_root);
-+ if (err) {
-+ xenbus_dev_fatal(pdev->xdev, err,
-+ "Error while publish PCI root buses "
-+ "for frontend");
-+ goto out;
-+ }
-+
-+ err = xenbus_switch_state(pdev->xdev, XenbusStateInitialised);
-+ if (err)
-+ xenbus_dev_fatal(pdev->xdev, err,
-+ "Error switching to initialised state!");
-+
-+out:
-+ if (!err)
-+ /* see if pcifront is already configured (if not, we'll wait) */
-+ pciback_attach(pdev);
-+
-+ return err;
-+}
-+
-+static void pciback_be_watch(struct xenbus_watch *watch,
-+ const char **vec, unsigned int len)
-+{
-+ struct pciback_device *pdev =
-+ container_of(watch, struct pciback_device, be_watch);
-+
-+ switch (xenbus_read_driver_state(pdev->xdev->nodename)) {
-+ case XenbusStateInitWait:
-+ pciback_setup_backend(pdev);
-+ break;
-+
-+ default:
-+ break;
-+ }
-+}
-+
-+static int pciback_xenbus_probe(struct xenbus_device *dev,
-+ const struct xenbus_device_id *id)
-+{
-+ int err = 0;
-+ struct pciback_device *pdev = alloc_pdev(dev);
-+
-+ if (pdev == NULL) {
-+ err = -ENOMEM;
-+ xenbus_dev_fatal(dev, err,
-+ "Error allocating pciback_device struct");
-+ goto out;
-+ }
-+
-+ /* wait for xend to configure us */
-+ err = xenbus_switch_state(dev, XenbusStateInitWait);
-+ if (err)
-+ goto out;
-+
-+ /* watch the backend node for backend configuration information */
-+ err = xenbus_watch_path(dev, dev->nodename, &pdev->be_watch,
-+ pciback_be_watch);
-+ if (err)
-+ goto out;
-+
-+ spin_lock(&pdev->dev_lock);
-+ pdev->be_watching = 1;
-+ spin_unlock(&pdev->dev_lock);
-+
-+ /* We need to force a call to our callback here in case
-+ * xend already configured us!
-+ */
-+ pciback_be_watch(&pdev->be_watch, NULL, 0);
-+
-+out:
-+ return err;
-+}
-+
-+static int pciback_xenbus_remove(struct xenbus_device *dev)
-+{
-+ struct pciback_device *pdev = dev_get_drvdata(&dev->dev);
-+
-+ if (pdev != NULL)
-+ free_pdev(pdev);
-+
-+ return 0;
-+}
-+
-+static const struct xenbus_device_id xenpci_ids[] = {
-+ {"pci"},
-+ {""},
-+};
-+
-+static struct xenbus_driver xenbus_pciback_driver = {
-+ .name = "pciback",
-+ .owner = THIS_MODULE,
-+ .ids = xenpci_ids,
-+ .probe = pciback_xenbus_probe,
-+ .remove = pciback_xenbus_remove,
-+ .otherend_changed = pciback_frontend_changed,
-+};
-+
-+int __init pciback_xenbus_register(void)
-+{
-+ pciback_wq = create_workqueue("pciback_workqueue");
-+ if (!pciback_wq) {
-+ printk(KERN_ERR "%s: create"
-+ "pciback_workqueue failed\n",__FUNCTION__);
-+ return -EFAULT;
-+ }
-+ return xenbus_register_backend(&xenbus_pciback_driver);
-+}
-+
-+void __exit pciback_xenbus_unregister(void)
-+{
-+ destroy_workqueue(pciback_wq);
-+ xenbus_unregister_driver(&xenbus_pciback_driver);
-+}
-diff --git a/drivers/xen/pcpu.c b/drivers/xen/pcpu.c
-new file mode 100644
-index 0000000..6d1a770
---- /dev/null
-+++ b/drivers/xen/pcpu.c
-@@ -0,0 +1,452 @@
-+/*
-+ * pcpu.c - management physical cpu in dom0 environment
-+ */
-+#include <linux/interrupt.h>
-+#include <linux/spinlock.h>
-+#include <asm/xen/hypervisor.h>
-+#include <asm/xen/hypercall.h>
-+#include <linux/cpu.h>
-+#include <xen/xenbus.h>
-+#include <xen/pcpu.h>
-+#include <xen/events.h>
-+#include <xen/acpi.h>
-+
-+static struct sysdev_class xen_pcpu_sysdev_class = {
-+ .name = "xen_pcpu",
-+};
-+
-+static DEFINE_MUTEX(xen_pcpu_lock);
-+static RAW_NOTIFIER_HEAD(xen_pcpu_chain);
-+
-+/* No need for irq disable since hotplug notify is in workqueue context */
-+#define get_pcpu_lock() mutex_lock(&xen_pcpu_lock);
-+#define put_pcpu_lock() mutex_unlock(&xen_pcpu_lock);
-+
-+struct xen_pcpus {
-+ struct list_head list;
-+ int present;
-+};
-+static struct xen_pcpus xen_pcpus;
-+
-+int register_xen_pcpu_notifier(struct notifier_block *nb)
-+{
-+ int ret;
-+
-+ /* All refer to the chain notifier is protected by the pcpu_lock */
-+ get_pcpu_lock();
-+ ret = raw_notifier_chain_register(&xen_pcpu_chain, nb);
-+ put_pcpu_lock();
-+ return ret;
-+}
-+EXPORT_SYMBOL_GPL(register_xen_pcpu_notifier);
-+
-+void unregister_xen_pcpu_notifier(struct notifier_block *nb)
-+{
-+ get_pcpu_lock();
-+ raw_notifier_chain_unregister(&xen_pcpu_chain, nb);
-+ put_pcpu_lock();
-+}
-+EXPORT_SYMBOL_GPL(unregister_xen_pcpu_notifier);
-+
-+static int xen_pcpu_down(uint32_t xen_id)
-+{
-+ int ret;
-+ xen_platform_op_t op = {
-+ .cmd = XENPF_cpu_offline,
-+ .interface_version = XENPF_INTERFACE_VERSION,
-+ .u.cpu_ol.cpuid = xen_id,
-+ };
-+
-+ ret = HYPERVISOR_dom0_op(&op);
-+ return ret;
-+}
-+
-+static int xen_pcpu_up(uint32_t xen_id)
-+{
-+ int ret;
-+ xen_platform_op_t op = {
-+ .cmd = XENPF_cpu_online,
-+ .interface_version = XENPF_INTERFACE_VERSION,
-+ .u.cpu_ol.cpuid = xen_id,
-+ };
-+
-+ ret = HYPERVISOR_dom0_op(&op);
-+ return ret;
-+}
-+
-+static ssize_t show_online(struct sys_device *dev,
-+ struct sysdev_attribute *attr,
-+ char *buf)
-+{
-+ struct pcpu *cpu = container_of(dev, struct pcpu, sysdev);
-+
-+ return sprintf(buf, "%u\n", !!(cpu->flags & XEN_PCPU_FLAGS_ONLINE));
-+}
-+
-+static ssize_t __ref store_online(struct sys_device *dev,
-+ struct sysdev_attribute *attr,
-+ const char *buf, size_t count)
-+{
-+ struct pcpu *cpu = container_of(dev, struct pcpu, sysdev);
-+ ssize_t ret;
-+
-+ switch (buf[0]) {
-+ case '0':
-+ ret = xen_pcpu_down(cpu->xen_id);
-+ break;
-+ case '1':
-+ ret = xen_pcpu_up(cpu->xen_id);
-+ break;
-+ default:
-+ ret = -EINVAL;
-+ }
-+
-+ if (ret >= 0)
-+ ret = count;
-+ return ret;
-+}
-+
-+static SYSDEV_ATTR(online, 0644, show_online, store_online);
-+
-+static ssize_t show_apicid(struct sys_device *dev,
-+ struct sysdev_attribute *attr,
-+ char *buf)
-+{
-+ struct pcpu *cpu = container_of(dev, struct pcpu, sysdev);
-+
-+ return sprintf(buf, "%u\n", cpu->apic_id);
-+}
-+
-+static ssize_t show_acpiid(struct sys_device *dev,
-+ struct sysdev_attribute *attr,
-+ char *buf)
-+{
-+ struct pcpu *cpu = container_of(dev, struct pcpu, sysdev);
-+
-+ return sprintf(buf, "%u\n", cpu->acpi_id);
-+}
-+static SYSDEV_ATTR(apic_id, 0444, show_apicid, NULL);
-+static SYSDEV_ATTR(acpi_id, 0444, show_acpiid, NULL);
-+
-+static int xen_pcpu_free(struct pcpu *pcpu)
-+{
-+ if (!pcpu)
-+ return 0;
-+
-+ sysdev_remove_file(&pcpu->sysdev, &attr_online);
-+ sysdev_unregister(&pcpu->sysdev);
-+ list_del(&pcpu->pcpu_list);
-+ kfree(pcpu);
-+
-+ return 0;
-+}
-+
-+static inline int same_pcpu(struct xenpf_pcpuinfo *info,
-+ struct pcpu *pcpu)
-+{
-+ return (pcpu->apic_id == info->apic_id) &&
-+ (pcpu->xen_id == info->xen_cpuid);
-+}
-+
-+/*
-+ * Return 1 if online status changed
-+ */
-+static int xen_pcpu_online_check(struct xenpf_pcpuinfo *info,
-+ struct pcpu *pcpu)
-+{
-+ int result = 0;
-+
-+ if (info->xen_cpuid != pcpu->xen_id)
-+ return 0;
-+
-+ if (xen_pcpu_online(info->flags) && !xen_pcpu_online(pcpu->flags)) {
-+ /* the pcpu is onlined */
-+ pcpu->flags |= XEN_PCPU_FLAGS_ONLINE;
-+ kobject_uevent(&pcpu->sysdev.kobj, KOBJ_ONLINE);
-+ raw_notifier_call_chain(&xen_pcpu_chain,
-+ XEN_PCPU_ONLINE, (void *)(long)pcpu->xen_id);
-+ result = 1;
-+ } else if (!xen_pcpu_online(info->flags) &&
-+ xen_pcpu_online(pcpu->flags)) {
-+ /* The pcpu is offlined now */
-+ pcpu->flags &= ~XEN_PCPU_FLAGS_ONLINE;
-+ kobject_uevent(&pcpu->sysdev.kobj, KOBJ_OFFLINE);
-+ raw_notifier_call_chain(&xen_pcpu_chain,
-+ XEN_PCPU_OFFLINE, (void *)(long)pcpu->xen_id);
-+ result = 1;
-+ }
-+
-+ return result;
-+}
-+
-+static int pcpu_sysdev_init(struct pcpu *cpu)
-+{
-+ int error;
-+
-+ error = sysdev_register(&cpu->sysdev);
-+ if (error) {
-+ printk(KERN_WARNING "xen_pcpu_add: Failed to register pcpu\n");
-+ kfree(cpu);
-+ return -1;
-+ }
-+ sysdev_create_file(&cpu->sysdev, &attr_online);
-+ sysdev_create_file(&cpu->sysdev, &attr_apic_id);
-+ sysdev_create_file(&cpu->sysdev, &attr_acpi_id);
-+ return 0;
-+}
-+
-+static struct pcpu *get_pcpu(int xen_id)
-+{
-+ struct pcpu *pcpu = NULL;
-+
-+ list_for_each_entry(pcpu, &xen_pcpus.list, pcpu_list) {
-+ if (pcpu->xen_id == xen_id)
-+ return pcpu;
-+ }
-+ return NULL;
-+}
-+
-+static struct pcpu *init_pcpu(struct xenpf_pcpuinfo *info)
-+{
-+ struct pcpu *pcpu;
-+
-+ if (info->flags & XEN_PCPU_FLAGS_INVALID)
-+ return NULL;
-+
-+ /* The PCPU is just added */
-+ pcpu = kzalloc(sizeof(struct pcpu), GFP_KERNEL);
-+ if (!pcpu)
-+ return NULL;
-+
-+ INIT_LIST_HEAD(&pcpu->pcpu_list);
-+ pcpu->xen_id = info->xen_cpuid;
-+ pcpu->apic_id = info->apic_id;
-+ pcpu->acpi_id = info->acpi_id;
-+ pcpu->flags = info->flags;
-+
-+ pcpu->sysdev.cls = &xen_pcpu_sysdev_class;
-+ pcpu->sysdev.id = info->xen_cpuid;
-+
-+ if (pcpu_sysdev_init(pcpu)) {
-+ kfree(pcpu);
-+ return NULL;
-+ }
-+
-+ list_add_tail(&pcpu->pcpu_list, &xen_pcpus.list);
-+ raw_notifier_call_chain(&xen_pcpu_chain,
-+ XEN_PCPU_ADD,
-+ (void *)(long)pcpu->xen_id);
-+ return pcpu;
-+}
-+
-+#define PCPU_NO_CHANGE 0
-+#define PCPU_ADDED 1
-+#define PCPU_ONLINE_OFFLINE 2
-+#define PCPU_REMOVED 3
-+/*
-+ * Caller should hold the pcpu lock
-+ * < 0: Something wrong
-+ * 0: No changes
-+ * > 0: State changed
-+ */
-+static struct pcpu *_sync_pcpu(int cpu_num, int *max_id, int *result)
-+{
-+ struct pcpu *pcpu = NULL;
-+ struct xenpf_pcpuinfo *info;
-+ xen_platform_op_t op = {
-+ .cmd = XENPF_get_cpuinfo,
-+ .interface_version = XENPF_INTERFACE_VERSION,
-+ };
-+ int ret;
-+
-+ *result = -1;
-+
-+ info = &op.u.pcpu_info;
-+ info->xen_cpuid = cpu_num;
-+
-+ ret = HYPERVISOR_dom0_op(&op);
-+ if (ret)
-+ return NULL;
-+
-+ if (max_id)
-+ *max_id = op.u.pcpu_info.max_present;
-+
-+ pcpu = get_pcpu(cpu_num);
-+
-+ if (info->flags & XEN_PCPU_FLAGS_INVALID) {
-+ /* The pcpu has been removed */
-+ *result = PCPU_NO_CHANGE;
-+ if (pcpu) {
-+ raw_notifier_call_chain(&xen_pcpu_chain,
-+ XEN_PCPU_REMOVE,
-+ (void *)(long)pcpu->xen_id);
-+ xen_pcpu_free(pcpu);
-+ *result = PCPU_REMOVED;
-+ }
-+ return NULL;
-+ }
-+
-+
-+ if (!pcpu) {
-+ *result = PCPU_ADDED;
-+ pcpu = init_pcpu(info);
-+ if (pcpu == NULL) {
-+ printk(KERN_WARNING "Failed to init pcpu %x\n",
-+ info->xen_cpuid);
-+ *result = -1;
-+ }
-+ } else {
-+ *result = PCPU_NO_CHANGE;
-+ /*
-+ * Old PCPU is replaced with a new pcpu, this means
-+ * several virq is missed, will it happen?
-+ */
-+ if (!same_pcpu(info, pcpu)) {
-+ printk(KERN_WARNING "Pcpu %x changed!\n",
-+ pcpu->xen_id);
-+ pcpu->apic_id = info->apic_id;
-+ pcpu->acpi_id = info->acpi_id;
-+ }
-+ if (xen_pcpu_online_check(info, pcpu))
-+ *result = PCPU_ONLINE_OFFLINE;
-+ }
-+ return pcpu;
-+}
-+
-+int xen_pcpu_index(uint32_t id, int is_acpiid)
-+{
-+ int cpu_num = 0, max_id = 0, ret;
-+ xen_platform_op_t op = {
-+ .cmd = XENPF_get_cpuinfo,
-+ .interface_version = XENPF_INTERFACE_VERSION,
-+ };
-+ struct xenpf_pcpuinfo *info = &op.u.pcpu_info;
-+
-+ info->xen_cpuid = 0;
-+ ret = HYPERVISOR_dom0_op(&op);
-+ if (ret)
-+ return -1;
-+ max_id = op.u.pcpu_info.max_present;
-+
-+ while ((cpu_num <= max_id)) {
-+ info->xen_cpuid = cpu_num;
-+ ret = HYPERVISOR_dom0_op(&op);
-+ if (ret)
-+ continue;
-+
-+ if (op.u.pcpu_info.max_present > max_id)
-+ max_id = op.u.pcpu_info.max_present;
-+ if (id == (is_acpiid ? info->acpi_id : info->apic_id))
-+ return cpu_num;
-+ cpu_num++;
-+ }
-+
-+ return -1;
-+}
-+EXPORT_SYMBOL(xen_pcpu_index);
-+
-+/*
-+ * Sync dom0's pcpu information with xen hypervisor's
-+ */
-+static int xen_sync_pcpus(void)
-+{
-+ /*
-+ * Boot cpu always have cpu_id 0 in xen
-+ */
-+ int cpu_num = 0, max_id = 0, result = 0, present = 0;
-+ struct list_head *elem, *tmp;
-+ struct pcpu *pcpu;
-+
-+ get_pcpu_lock();
-+
-+ while ((result >= 0) && (cpu_num <= max_id)) {
-+ pcpu = _sync_pcpu(cpu_num, &max_id, &result);
-+
-+ printk(KERN_DEBUG "sync cpu %x get result %x max_id %x\n",
-+ cpu_num, result, max_id);
-+
-+ switch (result) {
-+ case PCPU_NO_CHANGE:
-+ if (pcpu)
-+ present++;
-+ break;
-+ case PCPU_ADDED:
-+ case PCPU_ONLINE_OFFLINE:
-+ present++;
-+ case PCPU_REMOVED:
-+ break;
-+ default:
-+ printk(KERN_WARNING "Failed to sync pcpu %x\n",
-+ cpu_num);
-+ break;
-+
-+ }
-+ cpu_num++;
-+ }
-+
-+ if (result < 0) {
-+ list_for_each_safe(elem, tmp, &xen_pcpus.list) {
-+ pcpu = list_entry(elem, struct pcpu, pcpu_list);
-+ xen_pcpu_free(pcpu);
-+ }
-+ present = 0;
-+ }
-+
-+ xen_pcpus.present = present;
-+
-+ put_pcpu_lock();
-+
-+ return 0;
-+}
-+
-+static void xen_pcpu_dpc(struct work_struct *work)
-+{
-+ if (xen_sync_pcpus() < 0)
-+ printk(KERN_WARNING
-+ "xen_pcpu_dpc: Failed to sync pcpu information\n");
-+}
-+static DECLARE_WORK(xen_pcpu_work, xen_pcpu_dpc);
-+
-+int xen_pcpu_hotplug(int type, uint32_t apic_id)
-+{
-+ schedule_work(&xen_pcpu_work);
-+
-+ return 0;
-+}
-+EXPORT_SYMBOL(xen_pcpu_hotplug);
-+
-+static irqreturn_t xen_pcpu_interrupt(int irq, void *dev_id)
-+{
-+ schedule_work(&xen_pcpu_work);
-+ return IRQ_HANDLED;
-+}
-+
-+static int __init xen_pcpu_init(void)
-+{
-+ int err;
-+
-+ if (!xen_initial_domain())
-+ return 0;
-+
-+ err = sysdev_class_register(&xen_pcpu_sysdev_class);
-+ if (err) {
-+ printk(KERN_WARNING
-+ "xen_pcpu_init: register xen_pcpu sysdev Failed!\n");
-+ return err;
-+ }
-+
-+ INIT_LIST_HEAD(&xen_pcpus.list);
-+ xen_pcpus.present = 0;
-+
-+ xen_sync_pcpus();
-+ if (xen_pcpus.present > 0)
-+ err = bind_virq_to_irqhandler(VIRQ_PCPU_STATE,
-+ 0, xen_pcpu_interrupt, 0, "pcpu", NULL);
-+ if (err < 0)
-+ printk(KERN_WARNING "xen_pcpu_init: "
-+ "Failed to bind pcpu_state virq\n"
-+ "You will lost latest information! \n");
-+ return err;
-+}
-+
-+arch_initcall(xen_pcpu_init);
-diff --git a/drivers/xen/platform-pci.c b/drivers/xen/platform-pci.c
-new file mode 100644
-index 0000000..c01b5dd
---- /dev/null
-+++ b/drivers/xen/platform-pci.c
-@@ -0,0 +1,207 @@
-+/******************************************************************************
-+ * platform-pci.c
-+ *
-+ * Xen platform PCI device driver
-+ * Copyright (c) 2005, Intel Corporation.
-+ * Copyright (c) 2007, XenSource Inc.
-+ * Copyright (c) 2010, Citrix
-+ *
-+ * This program is free software; you can redistribute it and/or modify it
-+ * under the terms and conditions of the GNU General Public License,
-+ * version 2, as published by the Free Software Foundation.
-+ *
-+ * This program is distributed in the hope it will be useful, but WITHOUT
-+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
-+ * more details.
-+ *
-+ * You should have received a copy of the GNU General Public License along with
-+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-+ * Place - Suite 330, Boston, MA 02111-1307 USA.
-+ *
-+ */
-+
-+
-+#include <linux/interrupt.h>
-+#include <linux/io.h>
-+#include <linux/module.h>
-+#include <linux/pci.h>
-+
-+#include <xen/platform_pci.h>
-+#include <xen/grant_table.h>
-+#include <xen/xenbus.h>
-+#include <xen/events.h>
-+#include <xen/hvm.h>
-+#include <xen/xen-ops.h>
-+
-+#define DRV_NAME "xen-platform-pci"
-+
-+MODULE_AUTHOR("ssmith@xensource.com and stefano.stabellini@eu.citrix.com");
-+MODULE_DESCRIPTION("Xen platform PCI device");
-+MODULE_LICENSE("GPL");
-+
-+static unsigned long platform_mmio;
-+static unsigned long platform_mmio_alloc;
-+static unsigned long platform_mmiolen;
-+static uint64_t callback_via;
-+
-+unsigned long alloc_xen_mmio(unsigned long len)
-+{
-+ unsigned long addr;
-+
-+ addr = platform_mmio + platform_mmio_alloc;
-+ platform_mmio_alloc += len;
-+ BUG_ON(platform_mmio_alloc > platform_mmiolen);
-+
-+ return addr;
-+}
-+
-+static uint64_t get_callback_via(struct pci_dev *pdev)
-+{
-+ u8 pin;
-+ int irq;
-+
-+ irq = pdev->irq;
-+ if (irq < 16)
-+ return irq; /* ISA IRQ */
-+
-+ pin = pdev->pin;
-+
-+ /* We don't know the GSI. Specify the PCI INTx line instead. */
-+ return ((uint64_t)0x01 << 56) | /* PCI INTx identifier */
-+ ((uint64_t)pci_domain_nr(pdev->bus) << 32) |
-+ ((uint64_t)pdev->bus->number << 16) |
-+ ((uint64_t)(pdev->devfn & 0xff) << 8) |
-+ ((uint64_t)(pin - 1) & 3);
-+}
-+
-+static irqreturn_t do_hvm_evtchn_intr(int irq, void *dev_id)
-+{
-+ xen_hvm_evtchn_do_upcall();
-+ return IRQ_HANDLED;
-+}
-+
-+static int xen_allocate_irq(struct pci_dev *pdev)
-+{
-+ return request_irq(pdev->irq, do_hvm_evtchn_intr,
-+ IRQF_DISABLED | IRQF_NOBALANCING | IRQF_TRIGGER_RISING,
-+ "xen-platform-pci", pdev);
-+}
-+
-+static int platform_pci_resume(struct pci_dev *pdev)
-+{
-+ int err;
-+ if (xen_have_vector_callback)
-+ return 0;
-+ err = xen_set_callback_via(callback_via);
-+ if (err) {
-+ dev_err(&pdev->dev, "platform_pci_resume failure!\n");
-+ return err;
-+ }
-+ return 0;
-+}
-+
-+static int __devinit platform_pci_init(struct pci_dev *pdev,
-+ const struct pci_device_id *ent)
-+{
-+ int i, ret;
-+ long ioaddr, iolen;
-+ long mmio_addr, mmio_len;
-+ unsigned int max_nr_gframes;
-+
-+ i = pci_enable_device(pdev);
-+ if (i)
-+ return i;
-+
-+ ioaddr = pci_resource_start(pdev, 0);
-+ iolen = pci_resource_len(pdev, 0);
-+
-+ mmio_addr = pci_resource_start(pdev, 1);
-+ mmio_len = pci_resource_len(pdev, 1);
-+
-+ if (mmio_addr == 0 || ioaddr == 0) {
-+ dev_err(&pdev->dev, "no resources found\n");
-+ ret = -ENOENT;
-+ goto pci_out;
-+ }
-+
-+ if (request_mem_region(mmio_addr, mmio_len, DRV_NAME) == NULL) {
-+ dev_err(&pdev->dev, "MEM I/O resource 0x%lx @ 0x%lx busy\n",
-+ mmio_addr, mmio_len);
-+ ret = -EBUSY;
-+ goto pci_out;
-+ }
-+
-+ if (request_region(ioaddr, iolen, DRV_NAME) == NULL) {
-+ dev_err(&pdev->dev, "I/O resource 0x%lx @ 0x%lx busy\n",
-+ iolen, ioaddr);
-+ ret = -EBUSY;
-+ goto mem_out;
-+ }
-+
-+ platform_mmio = mmio_addr;
-+ platform_mmiolen = mmio_len;
-+
-+ if (!xen_have_vector_callback) {
-+ ret = xen_allocate_irq(pdev);
-+ if (ret) {
-+ dev_warn(&pdev->dev, "request_irq failed err=%d\n", ret);
-+ goto out;
-+ }
-+ callback_via = get_callback_via(pdev);
-+ ret = xen_set_callback_via(callback_via);
-+ if (ret) {
-+ dev_warn(&pdev->dev, "Unable to set the evtchn callback "
-+ "err=%d\n", ret);
-+ goto out;
-+ }
-+ }
-+
-+ max_nr_gframes = gnttab_max_grant_frames();
-+ xen_hvm_resume_frames = alloc_xen_mmio(PAGE_SIZE * max_nr_gframes);
-+ ret = gnttab_init();
-+ if (ret)
-+ goto out;
-+ xenbus_probe(NULL);
-+ ret = xen_setup_shutdown_event();
-+ if (ret)
-+ goto out;
-+ return 0;
-+
-+out:
-+ release_region(ioaddr, iolen);
-+mem_out:
-+ release_mem_region(mmio_addr, mmio_len);
-+pci_out:
-+ pci_disable_device(pdev);
-+ return ret;
-+}
-+
-+static struct pci_device_id platform_pci_tbl[] __devinitdata = {
-+ {PCI_VENDOR_ID_XEN, PCI_DEVICE_ID_XEN_PLATFORM,
-+ PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
-+ {0,}
-+};
-+
-+MODULE_DEVICE_TABLE(pci, platform_pci_tbl);
-+
-+static struct pci_driver platform_driver = {
-+ .name = DRV_NAME,
-+ .probe = platform_pci_init,
-+ .id_table = platform_pci_tbl,
-+#ifdef CONFIG_PM
-+ .resume_early = platform_pci_resume,
-+#endif
-+};
-+
-+static int __init platform_pci_module_init(void)
-+{
-+ /* no unplug has been done, IGNORE hasn't been specified: just
-+ * return now */
-+ if (!xen_platform_pci_unplug)
-+ return -ENODEV;
-+
-+ return pci_register_driver(&platform_driver);
-+}
-+
-+module_init(platform_pci_module_init);
-diff --git a/drivers/xen/sys-hypervisor.c b/drivers/xen/sys-hypervisor.c
-index 88a60e0..ae5cb05 100644
---- a/drivers/xen/sys-hypervisor.c
-+++ b/drivers/xen/sys-hypervisor.c
-@@ -14,6 +14,7 @@
- #include <asm/xen/hypervisor.h>
- #include <asm/xen/hypercall.h>
-
-+#include <xen/xen.h>
- #include <xen/xenbus.h>
- #include <xen/interface/xen.h>
- #include <xen/interface/version.h>
-diff --git a/drivers/xen/xen_acpi_memhotplug.c b/drivers/xen/xen_acpi_memhotplug.c
-new file mode 100644
-index 0000000..0c4af99
---- /dev/null
-+++ b/drivers/xen/xen_acpi_memhotplug.c
-@@ -0,0 +1,209 @@
-+/*
-+ * xen_acpi_memhotplug.c - interface to notify Xen on memory device hotadd
-+ *
-+ * Copyright (C) 2008, Intel corporation
-+ *
-+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-+ *
-+ * This program is free software; you can redistribute it and/or modify
-+ * it under the terms of the GNU General Public License as published by
-+ * the Free Software Foundation; either version 2 of the License, or (at
-+ * your option) any later version.
-+ *
-+ * This program is distributed in the hope that it will be useful, but
-+ * WITHOUT ANY WARRANTY; without even the implied warranty of
-+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-+ * General Public License for more details.
-+ *
-+ * You should have received a copy of the GNU General Public License along
-+ * with this program; if not, write to the Free Software Foundation, Inc.,
-+ * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
-+ *
-+ */
-+
-+#include <linux/kernel.h>
-+#include <linux/module.h>
-+#include <linux/init.h>
-+#include <linux/types.h>
-+#include <linux/memory_hotplug.h>
-+#include <acpi/acpi_drivers.h>
-+#include <xen/interface/platform.h>
-+#include <linux/interrupt.h>
-+#include <linux/spinlock.h>
-+#include <asm/xen/hypervisor.h>
-+#include <asm/xen/hypercall.h>
-+#include <xen/acpi.h>
-+
-+struct xen_hotmem_entry {
-+ struct list_head hotmem_list;
-+ uint64_t start;
-+ uint64_t end;
-+ uint32_t flags;
-+ uint32_t pxm;
-+};
-+
-+struct xen_hotmem_list {
-+ struct list_head list;
-+ int entry_nr;
-+} xen_hotmem;
-+
-+DEFINE_SPINLOCK(xen_hotmem_lock);
-+
-+static int xen_hyper_addmem(struct xen_hotmem_entry *entry)
-+{
-+ int ret;
-+
-+ xen_platform_op_t op = {
-+ .cmd = XENPF_mem_hotadd,
-+ .interface_version = XENPF_INTERFACE_VERSION,
-+ };
-+ op.u.mem_add.spfn = entry->start >> PAGE_SHIFT;
-+ op.u.mem_add.epfn = entry->end >> PAGE_SHIFT;
-+ op.u.mem_add.flags = entry->flags;
-+ op.u.mem_add.pxm = entry->pxm;
-+
-+ ret = HYPERVISOR_dom0_op(&op);
-+ return ret;
-+}
-+
-+static int add_hotmem_entry(int pxm, uint64_t start,
-+ uint64_t length, uint32_t flags)
-+{
-+ struct xen_hotmem_entry *entry;
-+
-+ if (pxm < 0 || !length)
-+ return -EINVAL;
-+
-+ entry = kzalloc(sizeof(struct xen_hotmem_entry), GFP_ATOMIC);
-+ if (!entry)
-+ return -ENOMEM;
-+
-+ INIT_LIST_HEAD(&entry->hotmem_list);
-+ entry->start = start;
-+ entry->end = start + length;
-+ entry->flags = flags;
-+ entry->pxm = pxm;
-+
-+ spin_lock(&xen_hotmem_lock);
-+
-+ list_add_tail(&entry->hotmem_list, &xen_hotmem.list);
-+ xen_hotmem.entry_nr++;
-+
-+ spin_unlock(&xen_hotmem_lock);
-+
-+ return 0;
-+}
-+
-+static int free_hotmem_entry(struct xen_hotmem_entry *entry)
-+{
-+ list_del(&entry->hotmem_list);
-+ kfree(entry);
-+
-+ return 0;
-+}
-+
-+static void xen_hotadd_mem_dpc(struct work_struct *work)
-+{
-+ struct list_head *elem, *tmp;
-+ struct xen_hotmem_entry *entry;
-+ unsigned long flags;
-+ int ret;
-+
-+ spin_lock_irqsave(&xen_hotmem_lock, flags);
-+ list_for_each_safe(elem, tmp, &xen_hotmem.list) {
-+ entry = list_entry(elem, struct xen_hotmem_entry, hotmem_list);
-+ ret = xen_hyper_addmem(entry);
-+ if (ret)
-+ printk(KERN_WARNING "xen addmem failed with %x\n", ret);
-+ free_hotmem_entry(entry);
-+ xen_hotmem.entry_nr--;
-+ }
-+ spin_unlock_irqrestore(&xen_hotmem_lock, flags);
-+}
-+
-+static DECLARE_WORK(xen_hotadd_mem_work, xen_hotadd_mem_dpc);
-+
-+static int xen_acpi_get_pxm(acpi_handle h)
-+{
-+ unsigned long long pxm;
-+ acpi_status status;
-+ acpi_handle handle;
-+ acpi_handle phandle = h;
-+
-+ do {
-+ handle = phandle;
-+ status = acpi_evaluate_integer(handle, "_PXM", NULL, &pxm);
-+ if (ACPI_SUCCESS(status))
-+ return pxm;
-+ status = acpi_get_parent(handle, &phandle);
-+ } while (ACPI_SUCCESS(status));
-+
-+ return -1;
-+}
-+
-+int xen_hotadd_memory(struct acpi_memory_device *mem_device)
-+{
-+ int pxm, result;
-+ int num_enabled = 0;
-+ struct acpi_memory_info *info;
-+
-+ if (!mem_device)
-+ return -EINVAL;
-+
-+ pxm = xen_acpi_get_pxm(mem_device->device->handle);
-+
-+ if (pxm < 0)
-+ return -EINVAL;
-+
-+ /*
-+ * Always return success to ACPI driver, and notify hypervisor later
-+ * because hypervisor will utilize the memory in memory hotadd hypercall
-+ */
-+ list_for_each_entry(info, &mem_device->res_list, list) {
-+ if (info->enabled) { /* just sanity check...*/
-+ num_enabled++;
-+ continue;
-+ }
-+ /*
-+ * If the memory block size is zero, please ignore it.
-+ * Don't try to do the following memory hotplug flowchart.
-+ */
-+ if (!info->length)
-+ continue;
-+
-+ result = add_hotmem_entry(pxm, info->start_addr,
-+ info->length, 0);
-+ if (result)
-+ continue;
-+ info->enabled = 1;
-+ num_enabled++;
-+ }
-+
-+ if (!num_enabled)
-+ return -EINVAL;
-+
-+ schedule_work(&xen_hotadd_mem_work);
-+
-+ return 0;
-+}
-+EXPORT_SYMBOL(xen_hotadd_memory);
-+
-+static int xen_hotadd_mem_init(void)
-+{
-+ if (!xen_initial_domain())
-+ return -ENODEV;
-+
-+ INIT_LIST_HEAD(&xen_hotmem.list);
-+ xen_hotmem.entry_nr = 0;
-+
-+ return 0;
-+}
-+
-+static void xen_hotadd_mem_exit(void)
-+{
-+ flush_scheduled_work();
-+}
-+
-+module_init(xen_hotadd_mem_init);
-+module_exit(xen_hotadd_mem_exit);
-+MODULE_LICENSE("GPL");
-diff --git a/drivers/xen/xenbus/Makefile b/drivers/xen/xenbus/Makefile
-index 5571f5b..8dca685 100644
---- a/drivers/xen/xenbus/Makefile
-+++ b/drivers/xen/xenbus/Makefile
-@@ -5,3 +5,8 @@ xenbus-objs += xenbus_client.o
- xenbus-objs += xenbus_comms.o
- xenbus-objs += xenbus_xs.o
- xenbus-objs += xenbus_probe.o
-+
-+xenbus-be-objs-$(CONFIG_XEN_BACKEND) += xenbus_probe_backend.o
-+xenbus-objs += $(xenbus-be-objs-y)
-+
-+obj-$(CONFIG_XEN_XENBUS_FRONTEND) += xenbus_probe_frontend.o
-diff --git a/drivers/xen/xenbus/xenbus_client.c b/drivers/xen/xenbus/xenbus_client.c
-index 92a1ef8..89f2e42 100644
---- a/drivers/xen/xenbus/xenbus_client.c
-+++ b/drivers/xen/xenbus/xenbus_client.c
-@@ -49,6 +49,8 @@ const char *xenbus_strstate(enum xenbus_state state)
- [ XenbusStateConnected ] = "Connected",
- [ XenbusStateClosing ] = "Closing",
- [ XenbusStateClosed ] = "Closed",
-+ [ XenbusStateReconfiguring ] = "Reconfiguring",
-+ [ XenbusStateReconfigured ] = "Reconfigured",
- };
- return (state < ARRAY_SIZE(name)) ? name[state] : "INVALID";
- }
-@@ -132,17 +134,12 @@ int xenbus_watch_pathfmt(struct xenbus_device *dev,
- }
- EXPORT_SYMBOL_GPL(xenbus_watch_pathfmt);
-
-+static void xenbus_switch_fatal(struct xenbus_device *, int, int,
-+ const char *, ...);
-
--/**
-- * xenbus_switch_state
-- * @dev: xenbus device
-- * @state: new state
-- *
-- * Advertise in the store a change of the given driver to the given new_state.
-- * Return 0 on success, or -errno on error. On error, the device will switch
-- * to XenbusStateClosing, and the error will be saved in the store.
-- */
--int xenbus_switch_state(struct xenbus_device *dev, enum xenbus_state state)
-+static int
-+__xenbus_switch_state(struct xenbus_device *dev,
-+ enum xenbus_state state, int depth)
- {
- /* We check whether the state is currently set to the given value, and
- if not, then the state is set. We don't want to unconditionally
-@@ -151,35 +148,65 @@ int xenbus_switch_state(struct xenbus_device *dev, enum xenbus_state state)
- to it, as the device will be tearing down, and we don't want to
- resurrect that directory.
-
-- Note that, because of this cached value of our state, this function
-- will not work inside a Xenstore transaction (something it was
-- trying to in the past) because dev->state would not get reset if
-- the transaction was aborted.
--
-+ Note that, because of this cached value of our state, this
-+ function will not take a caller's Xenstore transaction
-+ (something it was trying to in the past) because dev->state
-+ would not get reset if the transaction was aborted.
- */
-
-+ struct xenbus_transaction xbt;
- int current_state;
-- int err;
-+ int err, abort;
-
- if (state == dev->state)
- return 0;
-
-- err = xenbus_scanf(XBT_NIL, dev->nodename, "state", "%d",
-- &current_state);
-- if (err != 1)
-+again:
-+ abort = 1;
-+
-+ err = xenbus_transaction_start(&xbt);
-+ if (err) {
-+ xenbus_switch_fatal(dev, depth, err, "starting transaction");
- return 0;
-+ }
-+
-+ err = xenbus_scanf(xbt, dev->nodename, "state", "%d", &current_state);
-+ if (err != 1)
-+ goto abort;
-
-- err = xenbus_printf(XBT_NIL, dev->nodename, "state", "%d", state);
-+ err = xenbus_printf(xbt, dev->nodename, "state", "%d", state);
- if (err) {
-- if (state != XenbusStateClosing) /* Avoid looping */
-- xenbus_dev_fatal(dev, err, "writing new state");
-- return err;
-+ xenbus_switch_fatal(dev, depth, err, "writing new state");
-+ goto abort;
- }
-
-- dev->state = state;
-+ abort = 0;
-+abort:
-+ err = xenbus_transaction_end(xbt, abort);
-+ if (err) {
-+ if (err == -EAGAIN && !abort)
-+ goto again;
-+ xenbus_switch_fatal(dev, depth, err, "ending transaction");
-+ } else
-+ dev->state = state;
-
- return 0;
- }
-+
-+/**
-+ * xenbus_switch_state
-+ * @dev: xenbus device
-+ * @state: new state
-+ *
-+ * Advertise in the store a change of the given driver to the given new_state.
-+ * Return 0 on success, or -errno on error. On error, the device will switch
-+ * to XenbusStateClosing, and the error will be saved in the store.
-+ */
-+int xenbus_switch_state(struct xenbus_device *dev, enum xenbus_state state)
-+{
-+ return __xenbus_switch_state(dev, state, 0);
-+}
-+
- EXPORT_SYMBOL_GPL(xenbus_switch_state);
-
- int xenbus_frontend_closed(struct xenbus_device *dev)
-@@ -283,6 +310,23 @@ void xenbus_dev_fatal(struct xenbus_device *dev, int err, const char *fmt, ...)
- EXPORT_SYMBOL_GPL(xenbus_dev_fatal);
-
- /**
-+ * Equivalent to xenbus_dev_fatal(dev, err, fmt, args), but helps
-+ * avoiding recursion within xenbus_switch_state.
-+ */
-+static void xenbus_switch_fatal(struct xenbus_device *dev, int depth, int err,
-+ const char *fmt, ...)
-+{
-+ va_list ap;
-+
-+ va_start(ap, fmt);
-+ xenbus_va_dev_error(dev, err, fmt, ap);
-+ va_end(ap);
-+
-+ if (!depth)
-+ __xenbus_switch_state(dev, XenbusStateClosing, 1);
-+}
-+
-+/**
- * xenbus_grant_ring
- * @dev: xenbus device
- * @ring_mfn: mfn of ring to grant
-diff --git a/drivers/xen/xenbus/xenbus_probe.c b/drivers/xen/xenbus/xenbus_probe.c
-index 649fcdf..3a83ba2 100644
---- a/drivers/xen/xenbus/xenbus_probe.c
-+++ b/drivers/xen/xenbus/xenbus_probe.c
-@@ -49,31 +49,29 @@
- #include <asm/page.h>
- #include <asm/pgtable.h>
- #include <asm/xen/hypervisor.h>
-+
-+#include <xen/xen.h>
- #include <xen/xenbus.h>
- #include <xen/events.h>
- #include <xen/page.h>
-
-+#include <xen/platform_pci.h>
-+#include <xen/hvm.h>
-+
- #include "xenbus_comms.h"
- #include "xenbus_probe.h"
-
-
- int xen_store_evtchn;
--EXPORT_SYMBOL(xen_store_evtchn);
-+EXPORT_SYMBOL_GPL(xen_store_evtchn);
-
- struct xenstore_domain_interface *xen_store_interface;
-+EXPORT_SYMBOL_GPL(xen_store_interface);
-+
- static unsigned long xen_store_mfn;
-
- static BLOCKING_NOTIFIER_HEAD(xenstore_chain);
-
--static void wait_for_devices(struct xenbus_driver *xendrv);
--
--static int xenbus_probe_frontend(const char *type, const char *name);
--
--static void xenbus_dev_shutdown(struct device *_dev);
--
--static int xenbus_dev_suspend(struct device *dev, pm_message_t state);
--static int xenbus_dev_resume(struct device *dev);
--
- /* If something in array of ids matches this device, return it. */
- static const struct xenbus_device_id *
- match_device(const struct xenbus_device_id *arr, struct xenbus_device *dev)
-@@ -94,34 +92,7 @@ int xenbus_match(struct device *_dev, struct device_driver *_drv)
-
- return match_device(drv->ids, to_xenbus_device(_dev)) != NULL;
- }
--
--static int xenbus_uevent(struct device *_dev, struct kobj_uevent_env *env)
--{
-- struct xenbus_device *dev = to_xenbus_device(_dev);
--
-- if (add_uevent_var(env, "MODALIAS=xen:%s", dev->devicetype))
-- return -ENOMEM;
--
-- return 0;
--}
--
--/* device/<type>/<id> => <type>-<id> */
--static int frontend_bus_id(char bus_id[XEN_BUS_ID_SIZE], const char *nodename)
--{
-- nodename = strchr(nodename, '/');
-- if (!nodename || strlen(nodename + 1) >= XEN_BUS_ID_SIZE) {
-- printk(KERN_WARNING "XENBUS: bad frontend %s\n", nodename);
-- return -EINVAL;
-- }
--
-- strlcpy(bus_id, nodename + 1, XEN_BUS_ID_SIZE);
-- if (!strchr(bus_id, '/')) {
-- printk(KERN_WARNING "XENBUS: bus_id %s no slash\n", bus_id);
-- return -EINVAL;
-- }
-- *strchr(bus_id, '/') = '-';
-- return 0;
--}
-+EXPORT_SYMBOL_GPL(xenbus_match);
-
-
- static void free_otherend_details(struct xenbus_device *dev)
-@@ -141,7 +112,28 @@ static void free_otherend_watch(struct xenbus_device *dev)
- }
-
-
--int read_otherend_details(struct xenbus_device *xendev,
-+static int talk_to_otherend(struct xenbus_device *dev)
-+{
-+ struct xenbus_driver *drv = to_xenbus_driver(dev->dev.driver);
-+
-+ free_otherend_watch(dev);
-+ free_otherend_details(dev);
-+
-+ return drv->read_otherend_details(dev);
-+}
-+
-+
-+
-+static int watch_otherend(struct xenbus_device *dev)
-+{
-+ struct xen_bus_type *bus = container_of(dev->dev.bus, struct xen_bus_type, bus);
-+
-+ return xenbus_watch_pathfmt(dev, &dev->otherend_watch, bus->otherend_changed,
-+ "%s/%s", dev->otherend, "state");
-+}
-+
-+
-+int xenbus_read_otherend_details(struct xenbus_device *xendev,
- char *id_node, char *path_node)
- {
- int err = xenbus_gather(XBT_NIL, xendev->nodename,
-@@ -166,39 +158,11 @@ int read_otherend_details(struct xenbus_device *xendev,
-
- return 0;
- }
-+EXPORT_SYMBOL_GPL(xenbus_read_otherend_details);
-
--
--static int read_backend_details(struct xenbus_device *xendev)
--{
-- return read_otherend_details(xendev, "backend-id", "backend");
--}
--
--static struct device_attribute xenbus_dev_attrs[] = {
-- __ATTR_NULL
--};
--
--/* Bus type for frontend drivers. */
--static struct xen_bus_type xenbus_frontend = {
-- .root = "device",
-- .levels = 2, /* device/type/<id> */
-- .get_bus_id = frontend_bus_id,
-- .probe = xenbus_probe_frontend,
-- .bus = {
-- .name = "xen",
-- .match = xenbus_match,
-- .uevent = xenbus_uevent,
-- .probe = xenbus_dev_probe,
-- .remove = xenbus_dev_remove,
-- .shutdown = xenbus_dev_shutdown,
-- .dev_attrs = xenbus_dev_attrs,
--
-- .suspend = xenbus_dev_suspend,
-- .resume = xenbus_dev_resume,
-- },
--};
--
--static void otherend_changed(struct xenbus_watch *watch,
-- const char **vec, unsigned int len)
-+void xenbus_otherend_changed(struct xenbus_watch *watch,
-+ const char **vec, unsigned int len,
-+ int ignore_on_shutdown)
- {
- struct xenbus_device *dev =
- container_of(watch, struct xenbus_device, otherend_watch);
-@@ -226,11 +190,7 @@ static void otherend_changed(struct xenbus_watch *watch,
- * work that can fail e.g., when the rootfs is gone.
- */
- if (system_state > SYSTEM_RUNNING) {
-- struct xen_bus_type *bus = bus;
-- bus = container_of(dev->dev.bus, struct xen_bus_type, bus);
-- /* If we're frontend, drive the state machine to Closed. */
-- /* This should cause the backend to release our resources. */
-- if ((bus == &xenbus_frontend) && (state == XenbusStateClosing))
-+ if (ignore_on_shutdown && (state == XenbusStateClosing))
- xenbus_frontend_closed(dev);
- return;
- }
-@@ -238,25 +198,7 @@ static void otherend_changed(struct xenbus_watch *watch,
- if (drv->otherend_changed)
- drv->otherend_changed(dev, state);
- }
--
--
--static int talk_to_otherend(struct xenbus_device *dev)
--{
-- struct xenbus_driver *drv = to_xenbus_driver(dev->dev.driver);
--
-- free_otherend_watch(dev);
-- free_otherend_details(dev);
--
-- return drv->read_otherend_details(dev);
--}
--
--
--static int watch_otherend(struct xenbus_device *dev)
--{
-- return xenbus_watch_pathfmt(dev, &dev->otherend_watch, otherend_changed,
-- "%s/%s", dev->otherend, "state");
--}
--
-+EXPORT_SYMBOL_GPL(xenbus_otherend_changed);
-
- int xenbus_dev_probe(struct device *_dev)
- {
-@@ -300,8 +242,9 @@ int xenbus_dev_probe(struct device *_dev)
- fail:
- xenbus_dev_error(dev, err, "xenbus_dev_probe on %s", dev->nodename);
- xenbus_switch_state(dev, XenbusStateClosed);
-- return -ENODEV;
-+ return err;
- }
-+EXPORT_SYMBOL_GPL(xenbus_dev_probe);
-
- int xenbus_dev_remove(struct device *_dev)
- {
-@@ -319,8 +262,9 @@ int xenbus_dev_remove(struct device *_dev)
- xenbus_switch_state(dev, XenbusStateClosed);
- return 0;
- }
-+EXPORT_SYMBOL_GPL(xenbus_dev_remove);
-
--static void xenbus_dev_shutdown(struct device *_dev)
-+void xenbus_dev_shutdown(struct device *_dev)
- {
- struct xenbus_device *dev = to_xenbus_device(_dev);
- unsigned long timeout = 5*HZ;
-@@ -341,6 +285,7 @@ static void xenbus_dev_shutdown(struct device *_dev)
- out:
- put_device(&dev->dev);
- }
-+EXPORT_SYMBOL_GPL(xenbus_dev_shutdown);
-
- int xenbus_register_driver_common(struct xenbus_driver *drv,
- struct xen_bus_type *bus,
-@@ -354,25 +299,7 @@ int xenbus_register_driver_common(struct xenbus_driver *drv,
-
- return driver_register(&drv->driver);
- }
--
--int __xenbus_register_frontend(struct xenbus_driver *drv,
-- struct module *owner, const char *mod_name)
--{
-- int ret;
--
-- drv->read_otherend_details = read_backend_details;
--
-- ret = xenbus_register_driver_common(drv, &xenbus_frontend,
-- owner, mod_name);
-- if (ret)
-- return ret;
--
-- /* If this driver is loaded as a module wait for devices to attach. */
-- wait_for_devices(drv);
--
-- return 0;
--}
--EXPORT_SYMBOL_GPL(__xenbus_register_frontend);
-+EXPORT_SYMBOL_GPL(xenbus_register_driver_common);
-
- void xenbus_unregister_driver(struct xenbus_driver *drv)
- {
-@@ -543,24 +470,7 @@ fail:
- kfree(xendev);
- return err;
- }
--
--/* device/<typename>/<name> */
--static int xenbus_probe_frontend(const char *type, const char *name)
--{
-- char *nodename;
-- int err;
--
-- nodename = kasprintf(GFP_KERNEL, "%s/%s/%s",
-- xenbus_frontend.root, type, name);
-- if (!nodename)
-- return -ENOMEM;
--
-- DPRINTK("%s", nodename);
--
-- err = xenbus_probe_node(&xenbus_frontend, type, nodename);
-- kfree(nodename);
-- return err;
--}
-+EXPORT_SYMBOL_GPL(xenbus_probe_node);
-
- static int xenbus_probe_device_type(struct xen_bus_type *bus, const char *type)
- {
-@@ -574,10 +484,11 @@ static int xenbus_probe_device_type(struct xen_bus_type *bus, const char *type)
- return PTR_ERR(dir);
-
- for (i = 0; i < dir_n; i++) {
-- err = bus->probe(type, dir[i]);
-+ err = bus->probe(bus, type, dir[i]);
- if (err)
- break;
- }
-+
- kfree(dir);
- return err;
- }
-@@ -597,9 +508,11 @@ int xenbus_probe_devices(struct xen_bus_type *bus)
- if (err)
- break;
- }
-+
- kfree(dir);
- return err;
- }
-+EXPORT_SYMBOL_GPL(xenbus_probe_devices);
-
- static unsigned int char_count(const char *str, char c)
- {
-@@ -662,32 +575,17 @@ void xenbus_dev_changed(const char *node, struct xen_bus_type *bus)
- }
- EXPORT_SYMBOL_GPL(xenbus_dev_changed);
-
--static void frontend_changed(struct xenbus_watch *watch,
-- const char **vec, unsigned int len)
--{
-- DPRINTK("");
--
-- xenbus_dev_changed(vec[XS_WATCH_PATH], &xenbus_frontend);
--}
--
--/* We watch for devices appearing and vanishing. */
--static struct xenbus_watch fe_watch = {
-- .node = "device",
-- .callback = frontend_changed,
--};
--
--static int xenbus_dev_suspend(struct device *dev, pm_message_t state)
-+int xenbus_dev_suspend(struct device *dev, pm_message_t state)
- {
- int err = 0;
- struct xenbus_driver *drv;
-- struct xenbus_device *xdev;
-+ struct xenbus_device *xdev = container_of(dev, struct xenbus_device, dev);
-
-- DPRINTK("");
-+ DPRINTK("%s", xdev->nodename);
-
- if (dev->driver == NULL)
- return 0;
- drv = to_xenbus_driver(dev->driver);
-- xdev = container_of(dev, struct xenbus_device, dev);
- if (drv->suspend)
- err = drv->suspend(xdev, state);
- if (err)
-@@ -695,21 +593,19 @@ static int xenbus_dev_suspend(struct device *dev, pm_message_t state)
- "xenbus: suspend %s failed: %i\n", dev_name(dev), err);
- return 0;
- }
-+EXPORT_SYMBOL_GPL(xenbus_dev_suspend);
-
--static int xenbus_dev_resume(struct device *dev)
-+int xenbus_dev_resume(struct device *dev)
- {
- int err;
- struct xenbus_driver *drv;
-- struct xenbus_device *xdev;
-+ struct xenbus_device *xdev = container_of(dev, struct xenbus_device, dev);
-
-- DPRINTK("");
-+ DPRINTK("%s", xdev->nodename);
-
- if (dev->driver == NULL)
- return 0;
--
- drv = to_xenbus_driver(dev->driver);
-- xdev = container_of(dev, struct xenbus_device, dev);
--
- err = talk_to_otherend(xdev);
- if (err) {
- printk(KERN_WARNING
-@@ -740,6 +636,7 @@ static int xenbus_dev_resume(struct device *dev)
-
- return 0;
- }
-+EXPORT_SYMBOL_GPL(xenbus_dev_resume);
-
- /* A flag to determine if xenstored is 'ready' (i.e. has started) */
- int xenstored_ready = 0;
-@@ -749,10 +646,7 @@ int register_xenstore_notifier(struct notifier_block *nb)
- {
- int ret = 0;
-
-- if (xenstored_ready > 0)
-- ret = nb->notifier_call(nb, 0, NULL);
-- else
-- blocking_notifier_chain_register(&xenstore_chain, nb);
-+ blocking_notifier_chain_register(&xenstore_chain, nb);
-
- return ret;
- }
-@@ -768,57 +662,93 @@ void xenbus_probe(struct work_struct *unused)
- {
- BUG_ON((xenstored_ready <= 0));
-
-- /* Enumerate devices in xenstore and watch for changes. */
-- xenbus_probe_devices(&xenbus_frontend);
-- register_xenbus_watch(&fe_watch);
-- xenbus_backend_probe_and_watch();
--
- /* Notify others that xenstore is up */
- blocking_notifier_call_chain(&xenstore_chain, 0, NULL);
- }
-+EXPORT_SYMBOL_GPL(xenbus_probe);
-+
-+static int __init xenbus_probe_initcall(void)
-+{
-+ if (!xen_domain())
-+ return -ENODEV;
-+
-+ if (xen_initial_domain() || xen_hvm_domain())
-+ return 0;
-+
-+ xenbus_probe(NULL);
-+ return 0;
-+}
-+
-+device_initcall(xenbus_probe_initcall);
-
--static int __init xenbus_probe_init(void)
-+static int __init xenbus_init(void)
- {
- int err = 0;
-+ unsigned long page = 0;
-
- DPRINTK("");
-
- err = -ENODEV;
- if (!xen_domain())
-- goto out_error;
--
-- /* Register ourselves with the kernel bus subsystem */
-- err = bus_register(&xenbus_frontend.bus);
-- if (err)
-- goto out_error;
--
-- err = xenbus_backend_bus_register();
-- if (err)
-- goto out_unreg_front;
-+ return err;
-
- /*
- * Domain0 doesn't have a store_evtchn or store_mfn yet.
- */
- if (xen_initial_domain()) {
-- /* dom0 not yet supported */
-+ struct evtchn_alloc_unbound alloc_unbound;
-+
-+ /* Allocate Xenstore page */
-+ page = get_zeroed_page(GFP_KERNEL);
-+ if (!page)
-+ goto out_error;
-+
-+ xen_store_mfn = xen_start_info->store_mfn =
-+ pfn_to_mfn(virt_to_phys((void *)page) >>
-+ PAGE_SHIFT);
-+
-+ /* Next allocate a local port which xenstored can bind to */
-+ alloc_unbound.dom = DOMID_SELF;
-+ alloc_unbound.remote_dom = 0;
-+
-+ err = HYPERVISOR_event_channel_op(EVTCHNOP_alloc_unbound,
-+ &alloc_unbound);
-+ if (err == -ENOSYS)
-+ goto out_error;
-+
-+ BUG_ON(err);
-+ xen_store_evtchn = xen_start_info->store_evtchn =
-+ alloc_unbound.port;
-+
-+ xen_store_interface = mfn_to_virt(xen_store_mfn);
- } else {
- xenstored_ready = 1;
-- xen_store_evtchn = xen_start_info->store_evtchn;
-- xen_store_mfn = xen_start_info->store_mfn;
-+ if (xen_hvm_domain()) {
-+ uint64_t v = 0;
-+ err = hvm_get_parameter(HVM_PARAM_STORE_EVTCHN, &v);
-+ if (err)
-+ goto out_error;
-+ xen_store_evtchn = (int)v;
-+ err = hvm_get_parameter(HVM_PARAM_STORE_PFN, &v);
-+ if (err)
-+ goto out_error;
-+ xen_store_mfn = (unsigned long)v;
-+ xen_store_interface = ioremap(xen_store_mfn << PAGE_SHIFT, PAGE_SIZE);
-+ } else {
-+ xen_store_evtchn = xen_start_info->store_evtchn;
-+ xen_store_mfn = xen_start_info->store_mfn;
-+ xen_store_interface = mfn_to_virt(xen_store_mfn);
-+ }
- }
-- xen_store_interface = mfn_to_virt(xen_store_mfn);
-
- /* Initialize the interface to xenstore. */
- err = xs_init();
- if (err) {
- printk(KERN_WARNING
- "XENBUS: Error initializing xenstore comms: %i\n", err);
-- goto out_unreg_back;
-+ goto out_error;
- }
-
-- if (!xen_initial_domain())
-- xenbus_probe(NULL);
--
- #ifdef CONFIG_XEN_COMPAT_XENFS
- /*
- * Create xenfs mountpoint in /proc for compatibility with
-@@ -829,128 +759,13 @@ static int __init xenbus_probe_init(void)
-
- return 0;
-
-- out_unreg_back:
-- xenbus_backend_bus_unregister();
--
-- out_unreg_front:
-- bus_unregister(&xenbus_frontend.bus);
--
- out_error:
-+ if (page != 0)
-+ free_page(page);
-+
- return err;
- }
-
--postcore_initcall(xenbus_probe_init);
-+postcore_initcall(xenbus_init);
-
- MODULE_LICENSE("GPL");
--
--static int is_device_connecting(struct device *dev, void *data)
--{
-- struct xenbus_device *xendev = to_xenbus_device(dev);
-- struct device_driver *drv = data;
-- struct xenbus_driver *xendrv;
--
-- /*
-- * A device with no driver will never connect. We care only about
-- * devices which should currently be in the process of connecting.
-- */
-- if (!dev->driver)
-- return 0;
--
-- /* Is this search limited to a particular driver? */
-- if (drv && (dev->driver != drv))
-- return 0;
--
-- xendrv = to_xenbus_driver(dev->driver);
-- return (xendev->state < XenbusStateConnected ||
-- (xendev->state == XenbusStateConnected &&
-- xendrv->is_ready && !xendrv->is_ready(xendev)));
--}
--
--static int exists_connecting_device(struct device_driver *drv)
--{
-- return bus_for_each_dev(&xenbus_frontend.bus, NULL, drv,
-- is_device_connecting);
--}
--
--static int print_device_status(struct device *dev, void *data)
--{
-- struct xenbus_device *xendev = to_xenbus_device(dev);
-- struct device_driver *drv = data;
--
-- /* Is this operation limited to a particular driver? */
-- if (drv && (dev->driver != drv))
-- return 0;
--
-- if (!dev->driver) {
-- /* Information only: is this too noisy? */
-- printk(KERN_INFO "XENBUS: Device with no driver: %s\n",
-- xendev->nodename);
-- } else if (xendev->state < XenbusStateConnected) {
-- enum xenbus_state rstate = XenbusStateUnknown;
-- if (xendev->otherend)
-- rstate = xenbus_read_driver_state(xendev->otherend);
-- printk(KERN_WARNING "XENBUS: Timeout connecting "
-- "to device: %s (local state %d, remote state %d)\n",
-- xendev->nodename, xendev->state, rstate);
-- }
--
-- return 0;
--}
--
--/* We only wait for device setup after most initcalls have run. */
--static int ready_to_wait_for_devices;
--
--/*
-- * On a 5-minute timeout, wait for all devices currently configured. We need
-- * to do this to guarantee that the filesystems and / or network devices
-- * needed for boot are available, before we can allow the boot to proceed.
-- *
-- * This needs to be on a late_initcall, to happen after the frontend device
-- * drivers have been initialised, but before the root fs is mounted.
-- *
-- * A possible improvement here would be to have the tools add a per-device
-- * flag to the store entry, indicating whether it is needed at boot time.
-- * This would allow people who knew what they were doing to accelerate their
-- * boot slightly, but of course needs tools or manual intervention to set up
-- * those flags correctly.
-- */
--static void wait_for_devices(struct xenbus_driver *xendrv)
--{
-- unsigned long start = jiffies;
-- struct device_driver *drv = xendrv ? &xendrv->driver : NULL;
-- unsigned int seconds_waited = 0;
--
-- if (!ready_to_wait_for_devices || !xen_domain())
-- return;
--
-- while (exists_connecting_device(drv)) {
-- if (time_after(jiffies, start + (seconds_waited+5)*HZ)) {
-- if (!seconds_waited)
-- printk(KERN_WARNING "XENBUS: Waiting for "
-- "devices to initialise: ");
-- seconds_waited += 5;
-- printk("%us...", 300 - seconds_waited);
-- if (seconds_waited == 300)
-- break;
-- }
--
-- schedule_timeout_interruptible(HZ/10);
-- }
--
-- if (seconds_waited)
-- printk("\n");
--
-- bus_for_each_dev(&xenbus_frontend.bus, NULL, drv,
-- print_device_status);
--}
--
--#ifndef MODULE
--static int __init boot_wait_for_devices(void)
--{
-- ready_to_wait_for_devices = 1;
-- wait_for_devices(NULL);
-- return 0;
--}
--
--late_initcall(boot_wait_for_devices);
--#endif
-diff --git a/drivers/xen/xenbus/xenbus_probe.h b/drivers/xen/xenbus/xenbus_probe.h
-index 6c5e318..0e5fc4c 100644
---- a/drivers/xen/xenbus/xenbus_probe.h
-+++ b/drivers/xen/xenbus/xenbus_probe.h
-@@ -36,26 +36,13 @@
-
- #define XEN_BUS_ID_SIZE 20
-
--#ifdef CONFIG_XEN_BACKEND
--extern void xenbus_backend_suspend(int (*fn)(struct device *, void *));
--extern void xenbus_backend_resume(int (*fn)(struct device *, void *));
--extern void xenbus_backend_probe_and_watch(void);
--extern int xenbus_backend_bus_register(void);
--extern void xenbus_backend_bus_unregister(void);
--#else
--static inline void xenbus_backend_suspend(int (*fn)(struct device *, void *)) {}
--static inline void xenbus_backend_resume(int (*fn)(struct device *, void *)) {}
--static inline void xenbus_backend_probe_and_watch(void) {}
--static inline int xenbus_backend_bus_register(void) { return 0; }
--static inline void xenbus_backend_bus_unregister(void) {}
--#endif
--
- struct xen_bus_type
- {
- char *root;
- unsigned int levels;
- int (*get_bus_id)(char bus_id[XEN_BUS_ID_SIZE], const char *nodename);
-- int (*probe)(const char *type, const char *dir);
-+ int (*probe)(struct xen_bus_type *bus, const char *type, const char *dir);
-+ void (*otherend_changed)(struct xenbus_watch *watch, const char **vec, unsigned int len);
- struct bus_type bus;
- };
-
-@@ -73,4 +60,16 @@ extern int xenbus_probe_devices(struct xen_bus_type *bus);
-
- extern void xenbus_dev_changed(const char *node, struct xen_bus_type *bus);
-
-+extern void xenbus_dev_shutdown(struct device *_dev);
-+
-+extern int xenbus_dev_suspend(struct device *dev, pm_message_t state);
-+extern int xenbus_dev_resume(struct device *dev);
-+
-+extern void xenbus_otherend_changed(struct xenbus_watch *watch,
-+ const char **vec, unsigned int len,
-+ int ignore_on_shutdown);
-+
-+extern int xenbus_read_otherend_details(struct xenbus_device *xendev,
-+ char *id_node, char *path_node);
-+
- #endif
-diff --git a/drivers/xen/xenbus/xenbus_probe_backend.c b/drivers/xen/xenbus/xenbus_probe_backend.c
-new file mode 100644
-index 0000000..9b9dd36
---- /dev/null
-+++ b/drivers/xen/xenbus/xenbus_probe_backend.c
-@@ -0,0 +1,293 @@
-+/******************************************************************************
-+ * Talks to Xen Store to figure out what devices we have (backend half).
-+ *
-+ * Copyright (C) 2005 Rusty Russell, IBM Corporation
-+ * Copyright (C) 2005 Mike Wray, Hewlett-Packard
-+ * Copyright (C) 2005, 2006 XenSource Ltd
-+ * Copyright (C) 2007 Solarflare Communications, Inc.
-+ *
-+ * This program is free software; you can redistribute it and/or
-+ * modify it under the terms of the GNU General Public License version 2
-+ * as published by the Free Software Foundation; or, when distributed
-+ * separately from the Linux kernel or incorporated into other
-+ * software packages, subject to the following license:
-+ *
-+ * Permission is hereby granted, free of charge, to any person obtaining a copy
-+ * of this source file (the "Software"), to deal in the Software without
-+ * restriction, including without limitation the rights to use, copy, modify,
-+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
-+ * and to permit persons to whom the Software is furnished to do so, subject to
-+ * the following conditions:
-+ *
-+ * The above copyright notice and this permission notice shall be included in
-+ * all copies or substantial portions of the Software.
-+ *
-+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-+ * IN THE SOFTWARE.
-+ */
-+
-+#define DPRINTK(fmt, args...) \
-+ pr_debug("xenbus_probe (%s:%d) " fmt ".\n", \
-+ __func__, __LINE__, ##args)
-+
-+#include <linux/kernel.h>
-+#include <linux/err.h>
-+#include <linux/string.h>
-+#include <linux/ctype.h>
-+#include <linux/fcntl.h>
-+#include <linux/mm.h>
-+#include <linux/notifier.h>
-+
-+#include <asm/page.h>
-+#include <asm/pgtable.h>
-+#include <asm/xen/hypervisor.h>
-+#include <asm/hypervisor.h>
-+#include <xen/xenbus.h>
-+#include <xen/features.h>
-+
-+#include "xenbus_comms.h"
-+#include "xenbus_probe.h"
-+
-+/* backend/<type>/<fe-uuid>/<id> => <type>-<fe-domid>-<id> */
-+static int backend_bus_id(char bus_id[XEN_BUS_ID_SIZE], const char *nodename)
-+{
-+ int domid, err;
-+ const char *devid, *type, *frontend;
-+ unsigned int typelen;
-+
-+ type = strchr(nodename, '/');
-+ if (!type)
-+ return -EINVAL;
-+ type++;
-+ typelen = strcspn(type, "/");
-+ if (!typelen || type[typelen] != '/')
-+ return -EINVAL;
-+
-+ devid = strrchr(nodename, '/') + 1;
-+
-+ err = xenbus_gather(XBT_NIL, nodename, "frontend-id", "%i", &domid,
-+ "frontend", NULL, &frontend,
-+ NULL);
-+ if (err)
-+ return err;
-+ if (strlen(frontend) == 0)
-+ err = -ERANGE;
-+ if (!err && !xenbus_exists(XBT_NIL, frontend, ""))
-+ err = -ENOENT;
-+ kfree(frontend);
-+
-+ if (err)
-+ return err;
-+
-+ if (snprintf(bus_id, XEN_BUS_ID_SIZE,
-+ "%.*s-%i-%s", typelen, type, domid, devid) >= XEN_BUS_ID_SIZE)
-+ return -ENOSPC;
-+ return 0;
-+}
-+
-+static int xenbus_uevent_backend(struct device *dev,
-+ struct kobj_uevent_env *env)
-+{
-+ struct xenbus_device *xdev;
-+ struct xenbus_driver *drv;
-+ struct xen_bus_type *bus;
-+
-+ DPRINTK("");
-+
-+ if (dev == NULL)
-+ return -ENODEV;
-+
-+ xdev = to_xenbus_device(dev);
-+ bus = container_of(xdev->dev.bus, struct xen_bus_type, bus);
-+ if (xdev == NULL)
-+ return -ENODEV;
-+
-+ /* stuff we want to pass to /sbin/hotplug */
-+ if (add_uevent_var(env, "XENBUS_TYPE=%s", xdev->devicetype))
-+ return -ENOMEM;
-+
-+ if (add_uevent_var(env, "XENBUS_PATH=%s", xdev->nodename))
-+ return -ENOMEM;
-+
-+ if (add_uevent_var(env, "XENBUS_BASE_PATH=%s", bus->root))
-+ return -ENOMEM;
-+
-+ if (dev->driver) {
-+ drv = to_xenbus_driver(dev->driver);
-+ if (drv && drv->uevent)
-+ return drv->uevent(xdev, env);
-+ }
-+
-+ return 0;
-+}
-+
-+/* backend/<typename>/<frontend-uuid>/<name> */
-+static int xenbus_probe_backend_unit(struct xen_bus_type *bus,
-+ const char *dir,
-+ const char *type,
-+ const char *name)
-+{
-+ char *nodename;
-+ int err;
-+
-+ nodename = kasprintf(GFP_KERNEL, "%s/%s", dir, name);
-+ if (!nodename)
-+ return -ENOMEM;
-+
-+ DPRINTK("%s\n", nodename);
-+
-+ err = xenbus_probe_node(bus, type, nodename);
-+ kfree(nodename);
-+ return err;
-+}
-+
-+/* backend/<typename>/<frontend-domid> */
-+static int xenbus_probe_backend(struct xen_bus_type *bus, const char *type, const char *domid)
-+{
-+ char *nodename;
-+ int err = 0;
-+ char **dir;
-+ unsigned int i, dir_n = 0;
-+
-+ DPRINTK("");
-+
-+ nodename = kasprintf(GFP_KERNEL, "%s/%s/%s", bus->root, type, domid);
-+ if (!nodename)
-+ return -ENOMEM;
-+
-+ dir = xenbus_directory(XBT_NIL, nodename, "", &dir_n);
-+ if (IS_ERR(dir)) {
-+ kfree(nodename);
-+ return PTR_ERR(dir);
-+ }
-+
-+ for (i = 0; i < dir_n; i++) {
-+ err = xenbus_probe_backend_unit(bus, nodename, type, dir[i]);
-+ if (err)
-+ break;
-+ }
-+ kfree(dir);
-+ kfree(nodename);
-+ return err;
-+}
-+
-+static void frontend_changed(struct xenbus_watch *watch,
-+ const char **vec, unsigned int len)
-+{
-+ xenbus_otherend_changed(watch, vec, len, 0);
-+}
-+
-+static struct device_attribute xenbus_backend_dev_attrs[] = {
-+ __ATTR_NULL
-+};
-+
-+static struct xen_bus_type xenbus_backend = {
-+ .root = "backend",
-+ .levels = 3, /* backend/type/<frontend>/<id> */
-+ .get_bus_id = backend_bus_id,
-+ .probe = xenbus_probe_backend,
-+ .otherend_changed = frontend_changed,
-+ .bus = {
-+ .name = "xen-backend",
-+ .match = xenbus_match,
-+ .uevent = xenbus_uevent_backend,
-+ .probe = xenbus_dev_probe,
-+ .remove = xenbus_dev_remove,
-+ .shutdown = xenbus_dev_shutdown,
-+ .dev_attrs = xenbus_backend_dev_attrs,
-+ },
-+};
-+
-+static void backend_changed(struct xenbus_watch *watch,
-+ const char **vec, unsigned int len)
-+{
-+ DPRINTK("");
-+
-+ xenbus_dev_changed(vec[XS_WATCH_PATH], &xenbus_backend);
-+}
-+
-+static struct xenbus_watch be_watch = {
-+ .node = "backend",
-+ .callback = backend_changed,
-+};
-+
-+static int read_frontend_details(struct xenbus_device *xendev)
-+{
-+ return xenbus_read_otherend_details(xendev, "frontend-id", "frontend");
-+}
-+
-+//void xenbus_backend_suspend(int (*fn)(struct device *, void *))
-+//{
-+// DPRINTK("");
-+// bus_for_each_dev(&xenbus_backend.bus, NULL, NULL, fn);
-+//}
-+
-+//void xenbus_backend_resume(int (*fn)(struct device *, void *))
-+//{
-+// DPRINTK("");
-+// bus_for_each_dev(&xenbus_backend.bus, NULL, NULL, fn);
-+//}
-+
-+//int xenbus_for_each_backend(void *arg, int (*fn)(struct device *, void *))
-+//{
-+// return bus_for_each_dev(&xenbus_backend.bus, NULL, arg, fn);
-+//}
-+//EXPORT_SYMBOL_GPL(xenbus_for_each_backend);
-+
-+int xenbus_dev_is_online(struct xenbus_device *dev)
-+{
-+ int rc, val;
-+
-+ rc = xenbus_scanf(XBT_NIL, dev->nodename, "online", "%d", &val);
-+ if (rc != 1)
-+ val = 0; /* no online node present */
-+
-+ return val;
-+}
-+EXPORT_SYMBOL_GPL(xenbus_dev_is_online);
-+
-+int __xenbus_register_backend(struct xenbus_driver *drv,
-+ struct module *owner, const char *mod_name)
-+{
-+ drv->read_otherend_details = read_frontend_details;
-+
-+ return xenbus_register_driver_common(drv, &xenbus_backend,
-+ owner, mod_name);
-+}
-+EXPORT_SYMBOL_GPL(__xenbus_register_backend);
-+
-+static int backend_probe_and_watch(struct notifier_block *notifier,
-+ unsigned long event,
-+ void *data)
-+{
-+ /* Enumerate devices in xenstore and watch for changes. */
-+ xenbus_probe_devices(&xenbus_backend);
-+ register_xenbus_watch(&be_watch);
-+
-+ return NOTIFY_DONE;
-+}
-+
-+static int __init xenbus_probe_backend_init(void)
-+{
-+ static struct notifier_block xenstore_notifier = {
-+ .notifier_call = backend_probe_and_watch
-+ };
-+ int err;
-+
-+ DPRINTK("");
-+
-+ /* Register ourselves with the kernel bus subsystem */
-+ err = bus_register(&xenbus_backend.bus);
-+ if (err)
-+ return err;
-+
-+ register_xenstore_notifier(&xenstore_notifier);
-+
-+ return 0;
-+}
-+subsys_initcall(xenbus_probe_backend_init);
-diff --git a/drivers/xen/xenbus/xenbus_probe_frontend.c b/drivers/xen/xenbus/xenbus_probe_frontend.c
-new file mode 100644
-index 0000000..5413248
---- /dev/null
-+++ b/drivers/xen/xenbus/xenbus_probe_frontend.c
-@@ -0,0 +1,292 @@
-+#define DPRINTK(fmt, args...) \
-+ pr_debug("xenbus_probe (%s:%d) " fmt ".\n", \
-+ __func__, __LINE__, ##args)
-+
-+#include <linux/kernel.h>
-+#include <linux/err.h>
-+#include <linux/string.h>
-+#include <linux/ctype.h>
-+#include <linux/fcntl.h>
-+#include <linux/mm.h>
-+#include <linux/proc_fs.h>
-+#include <linux/notifier.h>
-+#include <linux/kthread.h>
-+#include <linux/mutex.h>
-+#include <linux/io.h>
-+
-+#include <asm/page.h>
-+#include <asm/pgtable.h>
-+#include <asm/xen/hypervisor.h>
-+#include <xen/xenbus.h>
-+#include <xen/events.h>
-+#include <xen/page.h>
-+#include <xen/xen.h>
-+#include <xen/platform_pci.h>
-+
-+#include "xenbus_comms.h"
-+#include "xenbus_probe.h"
-+
-+/* device/<type>/<id> => <type>-<id> */
-+static int frontend_bus_id(char bus_id[XEN_BUS_ID_SIZE], const char *nodename)
-+{
-+ nodename = strchr(nodename, '/');
-+ if (!nodename || strlen(nodename + 1) >= XEN_BUS_ID_SIZE) {
-+ printk(KERN_WARNING "XENBUS: bad frontend %s\n", nodename);
-+ return -EINVAL;
-+ }
-+
-+ strlcpy(bus_id, nodename + 1, XEN_BUS_ID_SIZE);
-+ if (!strchr(bus_id, '/')) {
-+ printk(KERN_WARNING "XENBUS: bus_id %s no slash\n", bus_id);
-+ return -EINVAL;
-+ }
-+ *strchr(bus_id, '/') = '-';
-+ return 0;
-+}
-+
-+/* device/<typename>/<name> */
-+static int xenbus_probe_frontend(struct xen_bus_type *bus, const char *type, const char *name)
-+{
-+ char *nodename;
-+ int err;
-+
-+ nodename = kasprintf(GFP_KERNEL, "%s/%s/%s", bus->root, type, name);
-+ if (!nodename)
-+ return -ENOMEM;
-+
-+ DPRINTK("%s", nodename);
-+
-+ err = xenbus_probe_node(bus, type, nodename);
-+ kfree(nodename);
-+ return err;
-+}
-+
-+static int xenbus_uevent_frontend(struct device *_dev, struct kobj_uevent_env *env)
-+{
-+ struct xenbus_device *dev = to_xenbus_device(_dev);
-+
-+ if (add_uevent_var(env, "MODALIAS=xen:%s", dev->devicetype))
-+ return -ENOMEM;
-+
-+ return 0;
-+}
-+
-+
-+static void backend_changed(struct xenbus_watch *watch,
-+ const char **vec, unsigned int len)
-+{
-+ xenbus_otherend_changed(watch, vec, len, 1);
-+}
-+
-+static struct device_attribute xenbus_frontend_dev_attrs[] = {
-+ __ATTR_NULL
-+};
-+
-+
-+static struct xen_bus_type xenbus_frontend = {
-+ .root = "device",
-+ .levels = 2, /* device/type/<id> */
-+ .get_bus_id = frontend_bus_id,
-+ .probe = xenbus_probe_frontend,
-+ .otherend_changed = backend_changed,
-+ .bus = {
-+ .name = "xen",
-+ .match = xenbus_match,
-+ .uevent = xenbus_uevent_frontend,
-+ .probe = xenbus_dev_probe,
-+ .remove = xenbus_dev_remove,
-+ .shutdown = xenbus_dev_shutdown,
-+ .dev_attrs= xenbus_frontend_dev_attrs,
-+
-+ .suspend = xenbus_dev_suspend,
-+ .resume = xenbus_dev_resume,
-+ },
-+};
-+
-+static void frontend_changed(struct xenbus_watch *watch,
-+ const char **vec, unsigned int len)
-+{
-+ DPRINTK("");
-+
-+ xenbus_dev_changed(vec[XS_WATCH_PATH], &xenbus_frontend);
-+}
-+
-+
-+/* We watch for devices appearing and vanishing. */
-+static struct xenbus_watch fe_watch = {
-+ .node = "device",
-+ .callback = frontend_changed,
-+};
-+
-+static int read_backend_details(struct xenbus_device *xendev)
-+{
-+ return xenbus_read_otherend_details(xendev, "backend-id", "backend");
-+}
-+
-+static int is_device_connecting(struct device *dev, void *data)
-+{
-+ struct xenbus_device *xendev = to_xenbus_device(dev);
-+ struct device_driver *drv = data;
-+ struct xenbus_driver *xendrv;
-+
-+ /*
-+ * A device with no driver will never connect. We care only about
-+ * devices which should currently be in the process of connecting.
-+ */
-+ if (!dev->driver)
-+ return 0;
-+
-+ /* Is this search limited to a particular driver? */
-+ if (drv && (dev->driver != drv))
-+ return 0;
-+
-+ xendrv = to_xenbus_driver(dev->driver);
-+ return (xendev->state < XenbusStateConnected ||
-+ (xendev->state == XenbusStateConnected &&
-+ xendrv->is_ready && !xendrv->is_ready(xendev)));
-+}
-+
-+static int exists_connecting_device(struct device_driver *drv)
-+{
-+ return bus_for_each_dev(&xenbus_frontend.bus, NULL, drv,
-+ is_device_connecting);
-+}
-+
-+static int print_device_status(struct device *dev, void *data)
-+{
-+ struct xenbus_device *xendev = to_xenbus_device(dev);
-+ struct device_driver *drv = data;
-+
-+ /* Is this operation limited to a particular driver? */
-+ if (drv && (dev->driver != drv))
-+ return 0;
-+
-+ if (!dev->driver) {
-+ /* Information only: is this too noisy? */
-+ printk(KERN_INFO "XENBUS: Device with no driver: %s\n",
-+ xendev->nodename);
-+ } else if (xendev->state < XenbusStateConnected) {
-+ enum xenbus_state rstate = XenbusStateUnknown;
-+ if (xendev->otherend)
-+ rstate = xenbus_read_driver_state(xendev->otherend);
-+ printk(KERN_WARNING "XENBUS: Timeout connecting "
-+ "to device: %s (local state %d, remote state %d)\n",
-+ xendev->nodename, xendev->state, rstate);
-+ }
-+
-+ return 0;
-+}
-+
-+/* We only wait for device setup after most initcalls have run. */
-+static int ready_to_wait_for_devices;
-+
-+/*
-+ * On a 5-minute timeout, wait for all devices currently configured. We need
-+ * to do this to guarantee that the filesystems and / or network devices
-+ * needed for boot are available, before we can allow the boot to proceed.
-+ *
-+ * This needs to be on a late_initcall, to happen after the frontend device
-+ * drivers have been initialised, but before the root fs is mounted.
-+ *
-+ * A possible improvement here would be to have the tools add a per-device
-+ * flag to the store entry, indicating whether it is needed at boot time.
-+ * This would allow people who knew what they were doing to accelerate their
-+ * boot slightly, but of course needs tools or manual intervention to set up
-+ * those flags correctly.
-+ */
-+static void wait_for_devices(struct xenbus_driver *xendrv)
-+{
-+ unsigned long start = jiffies;
-+ struct device_driver *drv = xendrv ? &xendrv->driver : NULL;
-+ unsigned int seconds_waited = 0;
-+
-+ if (!ready_to_wait_for_devices || !xen_domain())
-+ return;
-+
-+ while (exists_connecting_device(drv)) {
-+ if (time_after(jiffies, start + (seconds_waited+5)*HZ)) {
-+ if (!seconds_waited)
-+ printk(KERN_WARNING "XENBUS: Waiting for "
-+ "devices to initialise: ");
-+ seconds_waited += 5;
-+ printk("%us...", 300 - seconds_waited);
-+ if (seconds_waited == 300)
-+ break;
-+ }
-+
-+ schedule_timeout_interruptible(HZ/10);
-+ }
-+
-+ if (seconds_waited)
-+ printk("\n");
-+
-+ bus_for_each_dev(&xenbus_frontend.bus, NULL, drv,
-+ print_device_status);
-+}
-+
-+int __xenbus_register_frontend(struct xenbus_driver *drv,
-+ struct module *owner, const char *mod_name)
-+{
-+ int ret;
-+
-+ drv->read_otherend_details = read_backend_details;
-+
-+ ret = xenbus_register_driver_common(drv, &xenbus_frontend,
-+ owner, mod_name);
-+ if (ret)
-+ return ret;
-+
-+ /* If this driver is loaded as a module wait for devices to attach. */
-+ wait_for_devices(drv);
-+
-+ return 0;
-+}
-+EXPORT_SYMBOL_GPL(__xenbus_register_frontend);
-+
-+static int frontend_probe_and_watch(struct notifier_block *notifier,
-+ unsigned long event,
-+ void *data)
-+{
-+ /* Enumerate devices in xenstore and watch for changes. */
-+ xenbus_probe_devices(&xenbus_frontend);
-+ register_xenbus_watch(&fe_watch);
-+
-+ return NOTIFY_DONE;
-+}
-+
-+
-+static int __init xenbus_probe_frontend_init(void)
-+{
-+ static struct notifier_block xenstore_notifier = {
-+ .notifier_call = frontend_probe_and_watch
-+ };
-+ int err;
-+
-+ DPRINTK("");
-+
-+ /* Register ourselves with the kernel bus subsystem */
-+ err = bus_register(&xenbus_frontend.bus);
-+ if (err)
-+ return err;
-+
-+ register_xenstore_notifier(&xenstore_notifier);
-+
-+ return 0;
-+}
-+subsys_initcall(xenbus_probe_frontend_init);
-+
-+#ifndef MODULE
-+static int __init boot_wait_for_devices(void)
-+{
-+ if (xen_hvm_domain() && !xen_platform_pci_unplug)
-+ return -ENODEV;
-+
-+ ready_to_wait_for_devices = 1;
-+ wait_for_devices(NULL);
-+ return 0;
-+}
-+
-+late_initcall(boot_wait_for_devices);
-+#endif
-+
-+MODULE_LICENSE("GPL");
-diff --git a/drivers/xen/xenbus/xenbus_xs.c b/drivers/xen/xenbus/xenbus_xs.c
-index 7b547f5..5534690 100644
---- a/drivers/xen/xenbus/xenbus_xs.c
-+++ b/drivers/xen/xenbus/xenbus_xs.c
-@@ -76,6 +76,14 @@ struct xs_handle {
- /*
- * Mutex ordering: transaction_mutex -> watch_mutex -> request_mutex.
- * response_mutex is never taken simultaneously with the other three.
-+ *
-+ * transaction_mutex must be held before incrementing
-+ * transaction_count. The mutex is held when a suspend is in
-+ * progress to prevent new transactions starting.
-+ *
-+ * When decrementing transaction_count to zero the wait queue
-+ * should be woken up, the suspend code waits for count to
-+ * reach zero.
- */
-
- /* One request at a time. */
-@@ -85,7 +93,9 @@ struct xs_handle {
- struct mutex response_mutex;
-
- /* Protect transactions against save/restore. */
-- struct rw_semaphore transaction_mutex;
-+ struct mutex transaction_mutex;
-+ atomic_t transaction_count;
-+ wait_queue_head_t transaction_wq;
-
- /* Protect watch (de)register against save/restore. */
- struct rw_semaphore watch_mutex;
-@@ -157,6 +167,31 @@ static void *read_reply(enum xsd_sockmsg_type *type, unsigned int *len)
- return body;
- }
-
-+static void transaction_start(void)
-+{
-+ mutex_lock(&xs_state.transaction_mutex);
-+ atomic_inc(&xs_state.transaction_count);
-+ mutex_unlock(&xs_state.transaction_mutex);
-+}
-+
-+static void transaction_end(void)
-+{
-+ if (atomic_dec_and_test(&xs_state.transaction_count))
-+ wake_up(&xs_state.transaction_wq);
-+}
-+
-+static void transaction_suspend(void)
-+{
-+ mutex_lock(&xs_state.transaction_mutex);
-+ wait_event(xs_state.transaction_wq,
-+ atomic_read(&xs_state.transaction_count) == 0);
-+}
-+
-+static void transaction_resume(void)
-+{
-+ mutex_unlock(&xs_state.transaction_mutex);
-+}
-+
- void *xenbus_dev_request_and_reply(struct xsd_sockmsg *msg)
- {
- void *ret;
-@@ -164,7 +199,7 @@ void *xenbus_dev_request_and_reply(struct xsd_sockmsg *msg)
- int err;
-
- if (req_msg.type == XS_TRANSACTION_START)
-- down_read(&xs_state.transaction_mutex);
-+ transaction_start();
-
- mutex_lock(&xs_state.request_mutex);
-
-@@ -180,7 +215,7 @@ void *xenbus_dev_request_and_reply(struct xsd_sockmsg *msg)
- if ((msg->type == XS_TRANSACTION_END) ||
- ((req_msg.type == XS_TRANSACTION_START) &&
- (msg->type == XS_ERROR)))
-- up_read(&xs_state.transaction_mutex);
-+ transaction_end();
-
- return ret;
- }
-@@ -432,11 +467,11 @@ int xenbus_transaction_start(struct xenbus_transaction *t)
- {
- char *id_str;
-
-- down_read(&xs_state.transaction_mutex);
-+ transaction_start();
-
- id_str = xs_single(XBT_NIL, XS_TRANSACTION_START, "", NULL);
- if (IS_ERR(id_str)) {
-- up_read(&xs_state.transaction_mutex);
-+ transaction_end();
- return PTR_ERR(id_str);
- }
-
-@@ -461,7 +496,7 @@ int xenbus_transaction_end(struct xenbus_transaction t, int abort)
-
- err = xs_error(xs_single(t, XS_TRANSACTION_END, abortstr, NULL));
-
-- up_read(&xs_state.transaction_mutex);
-+ transaction_end();
-
- return err;
- }
-@@ -662,7 +697,7 @@ EXPORT_SYMBOL_GPL(unregister_xenbus_watch);
-
- void xs_suspend(void)
- {
-- down_write(&xs_state.transaction_mutex);
-+ transaction_suspend();
- down_write(&xs_state.watch_mutex);
- mutex_lock(&xs_state.request_mutex);
- mutex_lock(&xs_state.response_mutex);
-@@ -677,7 +712,7 @@ void xs_resume(void)
-
- mutex_unlock(&xs_state.response_mutex);
- mutex_unlock(&xs_state.request_mutex);
-- up_write(&xs_state.transaction_mutex);
-+ transaction_resume();
-
- /* No need for watches_lock: the watch_mutex is sufficient. */
- list_for_each_entry(watch, &watches, list) {
-@@ -693,7 +728,7 @@ void xs_suspend_cancel(void)
- mutex_unlock(&xs_state.response_mutex);
- mutex_unlock(&xs_state.request_mutex);
- up_write(&xs_state.watch_mutex);
-- up_write(&xs_state.transaction_mutex);
-+ mutex_unlock(&xs_state.transaction_mutex);
- }
-
- static int xenwatch_thread(void *unused)
-@@ -843,8 +878,10 @@ int xs_init(void)
-
- mutex_init(&xs_state.request_mutex);
- mutex_init(&xs_state.response_mutex);
-- init_rwsem(&xs_state.transaction_mutex);
-+ mutex_init(&xs_state.transaction_mutex);
- init_rwsem(&xs_state.watch_mutex);
-+ atomic_set(&xs_state.transaction_count, 0);
-+ init_waitqueue_head(&xs_state.transaction_wq);
-
- /* Initialize the shared memory rings to talk to xenstored */
- err = xb_init_comms();
-diff --git a/drivers/xen/xenfs/Makefile b/drivers/xen/xenfs/Makefile
-index 25275c3..4fde944 100644
---- a/drivers/xen/xenfs/Makefile
-+++ b/drivers/xen/xenfs/Makefile
-@@ -1,3 +1,4 @@
- obj-$(CONFIG_XENFS) += xenfs.o
-
--xenfs-objs = super.o xenbus.o
-\ No newline at end of file
-+xenfs-y = super.o xenbus.o privcmd.o
-+xenfs-$(CONFIG_XEN_DOM0) += xenstored.o
-diff --git a/drivers/xen/xenfs/privcmd.c b/drivers/xen/xenfs/privcmd.c
-new file mode 100644
-index 0000000..f80be7f
---- /dev/null
-+++ b/drivers/xen/xenfs/privcmd.c
-@@ -0,0 +1,404 @@
-+/******************************************************************************
-+ * privcmd.c
-+ *
-+ * Interface to privileged domain-0 commands.
-+ *
-+ * Copyright (c) 2002-2004, K A Fraser, B Dragovic
-+ */
-+
-+#include <linux/kernel.h>
-+#include <linux/sched.h>
-+#include <linux/slab.h>
-+#include <linux/string.h>
-+#include <linux/errno.h>
-+#include <linux/mm.h>
-+#include <linux/mman.h>
-+#include <linux/uaccess.h>
-+#include <linux/swap.h>
-+#include <linux/smp_lock.h>
-+#include <linux/highmem.h>
-+#include <linux/pagemap.h>
-+#include <linux/seq_file.h>
-+
-+#include <asm/pgalloc.h>
-+#include <asm/pgtable.h>
-+#include <asm/tlb.h>
-+#include <asm/xen/hypervisor.h>
-+#include <asm/xen/hypercall.h>
-+
-+#include <xen/xen.h>
-+#include <xen/privcmd.h>
-+#include <xen/interface/xen.h>
-+#include <xen/features.h>
-+#include <xen/page.h>
-+#include <xen/xen-ops.h>
-+
-+#ifndef HAVE_ARCH_PRIVCMD_MMAP
-+static int privcmd_enforce_singleshot_mapping(struct vm_area_struct *vma);
-+#endif
-+
-+static long privcmd_ioctl_hypercall(void __user *udata)
-+{
-+ struct privcmd_hypercall hypercall;
-+ long ret;
-+
-+ if (copy_from_user(&hypercall, udata, sizeof(hypercall)))
-+ return -EFAULT;
-+
-+ ret = privcmd_call(hypercall.op,
-+ hypercall.arg[0], hypercall.arg[1],
-+ hypercall.arg[2], hypercall.arg[3],
-+ hypercall.arg[4]);
-+
-+ return ret;
-+}
-+
-+static void free_page_list(struct list_head *pages)
-+{
-+ struct page *p, *n;
-+
-+ list_for_each_entry_safe(p, n, pages, lru)
-+ __free_page(p);
-+
-+ INIT_LIST_HEAD(pages);
-+}
-+
-+/*
-+ * Given an array of items in userspace, return a list of pages
-+ * containing the data. If copying fails, either because of memory
-+ * allocation failure or a problem reading user memory, return an
-+ * error code; its up to the caller to dispose of any partial list.
-+ */
-+static int gather_array(struct list_head *pagelist,
-+ unsigned nelem, size_t size,
-+ void __user *data)
-+{
-+ unsigned pageidx;
-+ void *pagedata;
-+ int ret;
-+
-+ if (size > PAGE_SIZE)
-+ return 0;
-+
-+ pageidx = PAGE_SIZE;
-+ pagedata = NULL; /* quiet, gcc */
-+ while (nelem--) {
-+ if (pageidx > PAGE_SIZE-size) {
-+ struct page *page = alloc_page(GFP_KERNEL);
-+
-+ ret = -ENOMEM;
-+ if (page == NULL)
-+ goto fail;
-+
-+ pagedata = page_address(page);
-+
-+ list_add_tail(&page->lru, pagelist);
-+ pageidx = 0;
-+ }
-+
-+ ret = -EFAULT;
-+ if (copy_from_user(pagedata + pageidx, data, size))
-+ goto fail;
-+
-+ data += size;
-+ pageidx += size;
-+ }
-+
-+ ret = 0;
-+
-+fail:
-+ return ret;
-+}
-+
-+/*
-+ * Call function "fn" on each element of the array fragmented
-+ * over a list of pages.
-+ */
-+static int traverse_pages(unsigned nelem, size_t size,
-+ struct list_head *pos,
-+ int (*fn)(void *data, void *state),
-+ void *state)
-+{
-+ void *pagedata;
-+ unsigned pageidx;
-+ int ret = 0;
-+
-+ BUG_ON(size > PAGE_SIZE);
-+
-+ pageidx = PAGE_SIZE;
-+ pagedata = NULL; /* hush, gcc */
-+
-+ while (nelem--) {
-+ if (pageidx > PAGE_SIZE-size) {
-+ struct page *page;
-+ pos = pos->next;
-+ page = list_entry(pos, struct page, lru);
-+ pagedata = page_address(page);
-+ pageidx = 0;
-+ }
-+
-+ ret = (*fn)(pagedata + pageidx, state);
-+ if (ret)
-+ break;
-+ pageidx += size;
-+ }
-+
-+ return ret;
-+}
-+
-+struct mmap_mfn_state {
-+ unsigned long va;
-+ struct vm_area_struct *vma;
-+ domid_t domain;
-+};
-+
-+static int mmap_mfn_range(void *data, void *state)
-+{
-+ struct privcmd_mmap_entry *msg = data;
-+ struct mmap_mfn_state *st = state;
-+ struct vm_area_struct *vma = st->vma;
-+ int rc;
-+
-+ /* Do not allow range to wrap the address space. */
-+ if ((msg->npages > (LONG_MAX >> PAGE_SHIFT)) ||
-+ ((unsigned long)(msg->npages << PAGE_SHIFT) >= -st->va))
-+ return -EINVAL;
-+
-+ /* Range chunks must be contiguous in va space. */
-+ if ((msg->va != st->va) ||
-+ ((msg->va+(msg->npages<<PAGE_SHIFT)) > vma->vm_end))
-+ return -EINVAL;
-+
-+ rc = xen_remap_domain_mfn_range(vma,
-+ msg->va & PAGE_MASK,
-+ msg->mfn, msg->npages,
-+ vma->vm_page_prot,
-+ st->domain);
-+ if (rc < 0)
-+ return rc;
-+
-+ st->va += msg->npages << PAGE_SHIFT;
-+
-+ return 0;
-+}
-+
-+static long privcmd_ioctl_mmap(void __user *udata)
-+{
-+ struct privcmd_mmap mmapcmd;
-+ struct mm_struct *mm = current->mm;
-+ struct vm_area_struct *vma;
-+ int rc;
-+ LIST_HEAD(pagelist);
-+ struct mmap_mfn_state state;
-+
-+ if (!xen_initial_domain())
-+ return -EPERM;
-+
-+ if (copy_from_user(&mmapcmd, udata, sizeof(mmapcmd)))
-+ return -EFAULT;
-+
-+ rc = gather_array(&pagelist,
-+ mmapcmd.num, sizeof(struct privcmd_mmap_entry),
-+ mmapcmd.entry);
-+
-+ if (rc || list_empty(&pagelist))
-+ goto out;
-+
-+ down_write(&mm->mmap_sem);
-+
-+ {
-+ struct page *page = list_first_entry(&pagelist,
-+ struct page, lru);
-+ struct privcmd_mmap_entry *msg = page_address(page);
-+
-+ vma = find_vma(mm, msg->va);
-+ rc = -EINVAL;
-+
-+ if (!vma || (msg->va != vma->vm_start) ||
-+ !privcmd_enforce_singleshot_mapping(vma))
-+ goto out_up;
-+ }
-+
-+ state.va = vma->vm_start;
-+ state.vma = vma;
-+ state.domain = mmapcmd.dom;
-+
-+ rc = traverse_pages(mmapcmd.num, sizeof(struct privcmd_mmap_entry),
-+ &pagelist,
-+ mmap_mfn_range, &state);
-+
-+
-+out_up:
-+ up_write(&mm->mmap_sem);
-+
-+out:
-+ free_page_list(&pagelist);
-+
-+ return rc;
-+}
-+
-+struct mmap_batch_state {
-+ domid_t domain;
-+ unsigned long va;
-+ struct vm_area_struct *vma;
-+ int err;
-+
-+ xen_pfn_t __user *user;
-+};
-+
-+static int mmap_batch_fn(void *data, void *state)
-+{
-+ xen_pfn_t *mfnp = data;
-+ struct mmap_batch_state *st = state;
-+
-+ if (xen_remap_domain_mfn_range(st->vma, st->va & PAGE_MASK, *mfnp, 1,
-+ st->vma->vm_page_prot, st->domain) < 0) {
-+ *mfnp |= 0xf0000000U;
-+ st->err++;
-+ }
-+ st->va += PAGE_SIZE;
-+
-+ return 0;
-+}
-+
-+static int mmap_return_errors(void *data, void *state)
-+{
-+ xen_pfn_t *mfnp = data;
-+ struct mmap_batch_state *st = state;
-+
-+ put_user(*mfnp, st->user++);
-+
-+ return 0;
-+}
-+
-+static struct vm_operations_struct privcmd_vm_ops;
-+
-+static long privcmd_ioctl_mmap_batch(void __user *udata)
-+{
-+ int ret;
-+ struct privcmd_mmapbatch m;
-+ struct mm_struct *mm = current->mm;
-+ struct vm_area_struct *vma;
-+ unsigned long nr_pages;
-+ LIST_HEAD(pagelist);
-+ struct mmap_batch_state state;
-+
-+ if (!xen_initial_domain())
-+ return -EPERM;
-+
-+ if (copy_from_user(&m, udata, sizeof(m)))
-+ return -EFAULT;
-+
-+ nr_pages = m.num;
-+ if ((m.num <= 0) || (nr_pages > (LONG_MAX >> PAGE_SHIFT)))
-+ return -EINVAL;
-+
-+ ret = gather_array(&pagelist, m.num, sizeof(xen_pfn_t),
-+ m.arr);
-+
-+ if (ret || list_empty(&pagelist))
-+ goto out;
-+
-+ down_write(&mm->mmap_sem);
-+
-+ vma = find_vma(mm, m.addr);
-+ ret = -EINVAL;
-+ if (!vma ||
-+ vma->vm_ops != &privcmd_vm_ops ||
-+ (m.addr != vma->vm_start) ||
-+ ((m.addr + (nr_pages << PAGE_SHIFT)) != vma->vm_end) ||
-+ !privcmd_enforce_singleshot_mapping(vma)) {
-+ up_write(&mm->mmap_sem);
-+ goto out;
-+ }
-+
-+ state.domain = m.dom;
-+ state.vma = vma;
-+ state.va = m.addr;
-+ state.err = 0;
-+
-+ ret = traverse_pages(m.num, sizeof(xen_pfn_t),
-+ &pagelist, mmap_batch_fn, &state);
-+
-+ up_write(&mm->mmap_sem);
-+
-+ if (state.err > 0) {
-+ ret = 0;
-+
-+ state.user = m.arr;
-+ traverse_pages(m.num, sizeof(xen_pfn_t),
-+ &pagelist,
-+ mmap_return_errors, &state);
-+ }
-+
-+out:
-+ free_page_list(&pagelist);
-+
-+ return ret;
-+}
-+
-+static long privcmd_ioctl(struct file *file,
-+ unsigned int cmd, unsigned long data)
-+{
-+ int ret = -ENOSYS;
-+ void __user *udata = (void __user *) data;
-+
-+ switch (cmd) {
-+ case IOCTL_PRIVCMD_HYPERCALL:
-+ ret = privcmd_ioctl_hypercall(udata);
-+ break;
-+
-+ case IOCTL_PRIVCMD_MMAP:
-+ ret = privcmd_ioctl_mmap(udata);
-+ break;
-+
-+ case IOCTL_PRIVCMD_MMAPBATCH:
-+ ret = privcmd_ioctl_mmap_batch(udata);
-+ break;
-+
-+ default:
-+ ret = -EINVAL;
-+ break;
-+ }
-+
-+ return ret;
-+}
-+
-+#ifndef HAVE_ARCH_PRIVCMD_MMAP
-+static int privcmd_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
-+{
-+ printk(KERN_DEBUG "privcmd_fault: vma=%p %lx-%lx, pgoff=%lx, uv=%p\n",
-+ vma, vma->vm_start, vma->vm_end,
-+ vmf->pgoff, vmf->virtual_address);
-+
-+ return VM_FAULT_SIGBUS;
-+}
-+
-+static struct vm_operations_struct privcmd_vm_ops = {
-+ .fault = privcmd_fault
-+};
-+
-+static int privcmd_mmap(struct file *file, struct vm_area_struct *vma)
-+{
-+ /* Unsupported for auto-translate guests. */
-+ if (xen_feature(XENFEAT_auto_translated_physmap))
-+ return -ENOSYS;
-+
-+ /* DONTCOPY is essential for Xen as copy_page_range is broken. */
-+ vma->vm_flags |= VM_RESERVED | VM_IO | VM_DONTCOPY;
-+ vma->vm_ops = &privcmd_vm_ops;
-+ vma->vm_private_data = NULL;
-+
-+ return 0;
-+}
-+
-+static int privcmd_enforce_singleshot_mapping(struct vm_area_struct *vma)
-+{
-+ return (xchg(&vma->vm_private_data, (void *)1) == NULL);
-+}
-+#endif
-+
-+const struct file_operations privcmd_file_ops = {
-+ .unlocked_ioctl = privcmd_ioctl,
-+ .mmap = privcmd_mmap,
-+};
-diff --git a/drivers/xen/xenfs/super.c b/drivers/xen/xenfs/super.c
-index 6559e0c..984891e 100644
---- a/drivers/xen/xenfs/super.c
-+++ b/drivers/xen/xenfs/super.c
-@@ -12,6 +12,10 @@
- #include <linux/module.h>
- #include <linux/fs.h>
- #include <linux/magic.h>
-+#include <linux/mm.h>
-+#include <linux/backing-dev.h>
-+
-+#include <xen/xen.h>
-
- #include "xenfs.h"
-
-@@ -20,6 +24,62 @@
- MODULE_DESCRIPTION("Xen filesystem");
- MODULE_LICENSE("GPL");
-
-+static int xenfs_set_page_dirty(struct page *page)
-+{
-+ return !TestSetPageDirty(page);
-+}
-+
-+static const struct address_space_operations xenfs_aops = {
-+ .set_page_dirty = xenfs_set_page_dirty,
-+};
-+
-+static struct backing_dev_info xenfs_backing_dev_info = {
-+ .ra_pages = 0, /* No readahead */
-+ .capabilities = BDI_CAP_NO_ACCT_AND_WRITEBACK,
-+};
-+
-+static struct inode *xenfs_make_inode(struct super_block *sb, int mode)
-+{
-+ struct inode *ret = new_inode(sb);
-+
-+ if (ret) {
-+ ret->i_mode = mode;
-+ ret->i_mapping->a_ops = &xenfs_aops;
-+ ret->i_mapping->backing_dev_info = &xenfs_backing_dev_info;
-+ ret->i_uid = ret->i_gid = 0;
-+ ret->i_blocks = 0;
-+ ret->i_atime = ret->i_mtime = ret->i_ctime = CURRENT_TIME;
-+ }
-+ return ret;
-+}
-+
-+static struct dentry *xenfs_create_file(struct super_block *sb,
-+ struct dentry *parent,
-+ const char *name,
-+ const struct file_operations *fops,
-+ void *data,
-+ int mode)
-+{
-+ struct dentry *dentry;
-+ struct inode *inode;
-+
-+ dentry = d_alloc_name(parent, name);
-+ if (!dentry)
-+ return NULL;
-+
-+ inode = xenfs_make_inode(sb, S_IFREG | mode);
-+ if (!inode) {
-+ dput(dentry);
-+ return NULL;
-+ }
-+
-+ inode->i_fop = fops;
-+ inode->i_private = data;
-+
-+ d_add(dentry, inode);
-+ return dentry;
-+}
-+
- static ssize_t capabilities_read(struct file *file, char __user *buf,
- size_t size, loff_t *off)
- {
-@@ -41,10 +101,23 @@ static int xenfs_fill_super(struct super_block *sb, void *data, int silent)
- [1] = {},
- { "xenbus", &xenbus_file_ops, S_IRUSR|S_IWUSR },
- { "capabilities", &capabilities_file_ops, S_IRUGO },
-+ { "privcmd", &privcmd_file_ops, S_IRUSR|S_IWUSR },
- {""},
- };
-+ int rc;
-+
-+ rc = simple_fill_super(sb, XENFS_SUPER_MAGIC, xenfs_files);
-+ if (rc < 0)
-+ return rc;
-+
-+ if (xen_initial_domain()) {
-+ xenfs_create_file(sb, sb->s_root, "xsd_kva",
-+ &xsd_kva_file_ops, NULL, S_IRUSR|S_IWUSR);
-+ xenfs_create_file(sb, sb->s_root, "xsd_port",
-+ &xsd_port_file_ops, NULL, S_IRUSR|S_IWUSR);
-+ }
-
-- return simple_fill_super(sb, XENFS_SUPER_MAGIC, xenfs_files);
-+ return rc;
- }
-
- static int xenfs_get_sb(struct file_system_type *fs_type,
-@@ -63,16 +136,30 @@ static struct file_system_type xenfs_type = {
-
- static int __init xenfs_init(void)
- {
-- if (xen_pv_domain())
-- return register_filesystem(&xenfs_type);
-+ int err;
-+ if (!xen_domain()) {
-+ printk(KERN_INFO "xenfs: not registering filesystem on non-xen platform\n");
-+ return 0;
-+ }
-+
-+ err = register_filesystem(&xenfs_type);
-+ if (err) {
-+ printk(KERN_ERR "xenfs: Unable to register filesystem!\n");
-+ goto out;
-+ }
-+
-+ err = bdi_init(&xenfs_backing_dev_info);
-+ if (err)
-+ unregister_filesystem(&xenfs_type);
-+
-+ out:
-
-- printk(KERN_INFO "XENFS: not registering filesystem on non-xen platform\n");
-- return 0;
-+ return err;
- }
-
- static void __exit xenfs_exit(void)
- {
-- if (xen_pv_domain())
-+ if (xen_domain())
- unregister_filesystem(&xenfs_type);
- }
-
-diff --git a/drivers/xen/xenfs/xenbus.c b/drivers/xen/xenfs/xenbus.c
-index 6c4269b..0ddef43 100644
---- a/drivers/xen/xenfs/xenbus.c
-+++ b/drivers/xen/xenfs/xenbus.c
-@@ -121,8 +121,12 @@ static ssize_t xenbus_file_read(struct file *filp,
- int ret;
-
- mutex_lock(&u->reply_mutex);
-+again:
- while (list_empty(&u->read_buffers)) {
- mutex_unlock(&u->reply_mutex);
-+ if (filp->f_flags & O_NONBLOCK)
-+ return -EAGAIN;
-+
- ret = wait_event_interruptible(u->read_waitq,
- !list_empty(&u->read_buffers));
- if (ret)
-@@ -140,7 +144,7 @@ static ssize_t xenbus_file_read(struct file *filp,
- i += sz - ret;
- rb->cons += sz - ret;
-
-- if (ret != sz) {
-+ if (ret != 0) {
- if (i == 0)
- i = -EFAULT;
- goto out;
-@@ -156,6 +160,8 @@ static ssize_t xenbus_file_read(struct file *filp,
- struct read_buffer, list);
- }
- }
-+ if (i == 0)
-+ goto again;
-
- out:
- mutex_unlock(&u->reply_mutex);
-@@ -403,6 +409,7 @@ static int xenbus_write_watch(unsigned msg_type, struct xenbus_file_priv *u)
-
- mutex_lock(&u->reply_mutex);
- rc = queue_reply(&u->read_buffers, &reply, sizeof(reply));
-+ wake_up(&u->read_waitq);
- mutex_unlock(&u->reply_mutex);
- }
-
-@@ -451,7 +458,7 @@ static ssize_t xenbus_file_write(struct file *filp,
-
- ret = copy_from_user(u->u.buffer + u->len, ubuf, len);
-
-- if (ret == len) {
-+ if (ret != 0) {
- rc = -EFAULT;
- goto out;
- }
-@@ -484,21 +491,6 @@ static ssize_t xenbus_file_write(struct file *filp,
- msg_type = u->u.msg.type;
-
- switch (msg_type) {
-- case XS_TRANSACTION_START:
-- case XS_TRANSACTION_END:
-- case XS_DIRECTORY:
-- case XS_READ:
-- case XS_GET_PERMS:
-- case XS_RELEASE:
-- case XS_GET_DOMAIN_PATH:
-- case XS_WRITE:
-- case XS_MKDIR:
-- case XS_RM:
-- case XS_SET_PERMS:
-- /* Send out a transaction */
-- ret = xenbus_write_transaction(msg_type, u);
-- break;
--
- case XS_WATCH:
- case XS_UNWATCH:
- /* (Un)Ask for some path to be watched for changes */
-@@ -506,7 +498,8 @@ static ssize_t xenbus_file_write(struct file *filp,
- break;
-
- default:
-- ret = -EINVAL;
-+ /* Send out a transaction */
-+ ret = xenbus_write_transaction(msg_type, u);
- break;
- }
- if (ret != 0)
-diff --git a/drivers/xen/xenfs/xenfs.h b/drivers/xen/xenfs/xenfs.h
-index 51f08b2..b68aa62 100644
---- a/drivers/xen/xenfs/xenfs.h
-+++ b/drivers/xen/xenfs/xenfs.h
-@@ -2,5 +2,8 @@
- #define _XENFS_XENBUS_H
-
- extern const struct file_operations xenbus_file_ops;
-+extern const struct file_operations privcmd_file_ops;
-+extern const struct file_operations xsd_kva_file_ops;
-+extern const struct file_operations xsd_port_file_ops;
-
- #endif /* _XENFS_XENBUS_H */
-diff --git a/drivers/xen/xenfs/xenstored.c b/drivers/xen/xenfs/xenstored.c
-new file mode 100644
-index 0000000..af10804
---- /dev/null
-+++ b/drivers/xen/xenfs/xenstored.c
-@@ -0,0 +1,67 @@
-+#include <linux/types.h>
-+#include <linux/mm.h>
-+#include <linux/fs.h>
-+
-+#include <xen/page.h>
-+
-+#include "xenfs.h"
-+#include "../xenbus/xenbus_comms.h"
-+
-+static ssize_t xsd_read(struct file *file, char __user *buf,
-+ size_t size, loff_t *off)
-+{
-+ const char *str = (const char *)file->private_data;
-+ return simple_read_from_buffer(buf, size, off, str, strlen(str));
-+}
-+
-+static int xsd_release(struct inode *inode, struct file *file)
-+{
-+ kfree(file->private_data);
-+ return 0;
-+}
-+
-+static int xsd_kva_open(struct inode *inode, struct file *file)
-+{
-+ file->private_data = (void *)kasprintf(GFP_KERNEL, "0x%p",
-+ xen_store_interface);
-+ if (!file->private_data)
-+ return -ENOMEM;
-+ return 0;
-+}
-+
-+static int xsd_kva_mmap(struct file *file, struct vm_area_struct *vma)
-+{
-+ size_t size = vma->vm_end - vma->vm_start;
-+
-+ if ((size > PAGE_SIZE) || (vma->vm_pgoff != 0))
-+ return -EINVAL;
-+
-+ if (remap_pfn_range(vma, vma->vm_start,
-+ virt_to_pfn(xen_store_interface),
-+ size, vma->vm_page_prot))
-+ return -EAGAIN;
-+
-+ return 0;
-+}
-+
-+const struct file_operations xsd_kva_file_ops = {
-+ .open = xsd_kva_open,
-+ .mmap = xsd_kva_mmap,
-+ .read = xsd_read,
-+ .release = xsd_release,
-+};
-+
-+static int xsd_port_open(struct inode *inode, struct file *file)
-+{
-+ file->private_data = (void *)kasprintf(GFP_KERNEL, "%d",
-+ xen_store_evtchn);
-+ if (!file->private_data)
-+ return -ENOMEM;
-+ return 0;
-+}
-+
-+const struct file_operations xsd_port_file_ops = {
-+ .open = xsd_port_open,
-+ .read = xsd_read,
-+ .release = xsd_release,
-+};
-diff --git a/include/acpi/acpi_drivers.h b/include/acpi/acpi_drivers.h
-index f4906f6..e7233e8 100644
---- a/include/acpi/acpi_drivers.h
-+++ b/include/acpi/acpi_drivers.h
-@@ -154,4 +154,25 @@ static inline void unregister_hotplug_dock_device(acpi_handle handle)
- }
- #endif
-
-+/*--------------------------------------------------------------------------
-+ Memory
-+ -------------------------------------------------------------------------- */
-+#if defined(CONFIG_ACPI_HOTPLUG_MEMORY) || \
-+ defined(CONFIG_ACPI_HOTPLUG_MEMORY_MODULE)
-+struct acpi_memory_info {
-+ struct list_head list;
-+ u64 start_addr; /* Memory Range start physical addr */
-+ u64 length; /* Memory Range length */
-+ unsigned short caching; /* memory cache attribute */
-+ unsigned short write_protect; /* memory read/write attribute */
-+ unsigned int enabled:1;
-+};
-+
-+struct acpi_memory_device {
-+ struct acpi_device *device;
-+ unsigned int state; /* State of the memory device */
-+ struct list_head res_list;
-+};
-+#endif
-+
- #endif /*__ACPI_DRIVERS_H__*/
-diff --git a/include/acpi/processor.h b/include/acpi/processor.h
-index e7bdaaf..6aa3111 100644
---- a/include/acpi/processor.h
-+++ b/include/acpi/processor.h
-@@ -239,6 +239,25 @@ struct acpi_processor_errata {
- } piix4;
- };
-
-+extern int acpi_processor_errata(struct acpi_processor *pr);
-+#ifdef CONFIG_ACPI_PROCFS
-+extern int acpi_processor_add_fs(struct acpi_device *device);
-+extern int acpi_processor_remove_fs(struct acpi_device *device);
-+#else
-+static inline int acpi_processor_add_fs(struct acpi_device *device)
-+{
-+ return 0;
-+}
-+
-+static inline int acpi_processor_remove_fs(struct acpi_device *device)
-+{
-+ return 0;
-+}
-+#endif
-+extern int acpi_processor_set_pdc(struct acpi_processor *pr);
-+extern int acpi_processor_remove(struct acpi_device *device, int type);
-+extern void acpi_processor_notify(struct acpi_device *device, u32 event);
-+
- extern int acpi_processor_preregister_performance(struct
- acpi_processor_performance
- *performance);
-@@ -296,6 +315,8 @@ static inline void acpi_processor_ffh_cstate_enter(struct acpi_processor_cx
- void acpi_processor_ppc_init(void);
- void acpi_processor_ppc_exit(void);
- int acpi_processor_ppc_has_changed(struct acpi_processor *pr);
-+int acpi_processor_get_performance_info(struct acpi_processor *pr);
-+int acpi_processor_get_psd(struct acpi_processor *pr);
- #else
- static inline void acpi_processor_ppc_init(void)
- {
-@@ -332,6 +353,7 @@ int acpi_processor_power_init(struct acpi_processor *pr,
- int acpi_processor_cst_has_changed(struct acpi_processor *pr);
- int acpi_processor_power_exit(struct acpi_processor *pr,
- struct acpi_device *device);
-+int acpi_processor_get_power_info(struct acpi_processor *pr);
- int acpi_processor_suspend(struct acpi_device * device, pm_message_t state);
- int acpi_processor_resume(struct acpi_device * device);
- extern struct cpuidle_driver acpi_idle_driver;
-diff --git a/include/asm-generic/pci.h b/include/asm-generic/pci.h
-index 26373cf..9fb4270 100644
---- a/include/asm-generic/pci.h
-+++ b/include/asm-generic/pci.h
-@@ -43,6 +43,8 @@ pcibios_select_root(struct pci_dev *pdev, struct resource *res)
- return root;
- }
-
-+#ifndef HAVE_ARCH_PCIBIOS_SCAN_ALL_FNS
-+#endif
- #ifndef HAVE_ARCH_PCI_GET_LEGACY_IDE_IRQ
- static inline int pci_get_legacy_ide_irq(struct pci_dev *dev, int channel)
- {
-diff --git a/include/drm/drmP.h b/include/drm/drmP.h
-index 7ad3faa..cf9ddce 100644
---- a/include/drm/drmP.h
-+++ b/include/drm/drmP.h
-@@ -1388,7 +1388,7 @@ extern int drm_vma_info(struct seq_file *m, void *data);
- #endif
-
- /* Scatter Gather Support (drm_scatter.h) */
--extern void drm_sg_cleanup(struct drm_sg_mem * entry);
-+extern void drm_sg_cleanup(struct drm_device *dev, struct drm_sg_mem * entry);
- extern int drm_sg_alloc_ioctl(struct drm_device *dev, void *data,
- struct drm_file *file_priv);
- extern int drm_sg_alloc(struct drm_device *dev, struct drm_scatter_gather * request);
-diff --git a/include/linux/bootmem.h b/include/linux/bootmem.h
-index dd97fb8..b10ec49 100644
---- a/include/linux/bootmem.h
-+++ b/include/linux/bootmem.h
-@@ -53,6 +53,7 @@ extern void free_bootmem_node(pg_data_t *pgdat,
- unsigned long addr,
- unsigned long size);
- extern void free_bootmem(unsigned long addr, unsigned long size);
-+extern void free_bootmem_late(unsigned long addr, unsigned long size);
-
- /*
- * Flags for reserve_bootmem (also if CONFIG_HAVE_ARCH_BOOTMEM_NODE,
-diff --git a/include/linux/dmar.h b/include/linux/dmar.h
-index 4a2b162..5de4c9e 100644
---- a/include/linux/dmar.h
-+++ b/include/linux/dmar.h
-@@ -208,16 +208,9 @@ struct dmar_atsr_unit {
- u8 include_all:1; /* include all ports */
- };
-
--/* Intel DMAR initialization functions */
- extern int intel_iommu_init(void);
--#else
--static inline int intel_iommu_init(void)
--{
--#ifdef CONFIG_INTR_REMAP
-- return dmar_dev_scope_init();
--#else
-- return -ENODEV;
--#endif
--}
--#endif /* !CONFIG_DMAR */
-+#else /* !CONFIG_DMAR: */
-+static inline int intel_iommu_init(void) { return -ENODEV; }
-+#endif /* CONFIG_DMAR */
-+
- #endif /* __DMAR_H__ */
-diff --git a/include/linux/fb.h b/include/linux/fb.h
-index 862e7d4..74d67ca 100644
---- a/include/linux/fb.h
-+++ b/include/linux/fb.h
-@@ -763,6 +763,7 @@ struct fb_tile_ops {
- * takes over; acceleration engine should be in a quiescent state */
-
- /* hints */
-+#define FBINFO_VIRTFB 0x0004 /* FB is System RAM, not device. */
- #define FBINFO_PARTIAL_PAN_OK 0x0040 /* otw use pan only for double-buffering */
- #define FBINFO_READS_FAST 0x0080 /* soft-copy faster than rendering */
-
-diff --git a/include/linux/if_link.h b/include/linux/if_link.h
-index 176c518..d681cc9 100644
---- a/include/linux/if_link.h
-+++ b/include/linux/if_link.h
-@@ -81,6 +81,8 @@ enum
- #define IFLA_LINKINFO IFLA_LINKINFO
- IFLA_NET_NS_PID,
- IFLA_IFALIAS,
-+ IFLA_NUM_VF, /* Number of VFs if device is SR-IOV PF */
-+ IFLA_VFINFO_LIST,
- __IFLA_MAX
- };
-
-@@ -190,4 +192,47 @@ struct ifla_vlan_qos_mapping
- __u32 to;
- };
-
-+/* SR-IOV virtual function managment section */
-+
-+enum {
-+ IFLA_VF_INFO_UNSPEC,
-+ IFLA_VF_INFO,
-+ __IFLA_VF_INFO_MAX,
-+};
-+
-+#define IFLA_VF_INFO_MAX (__IFLA_VF_INFO_MAX - 1)
-+
-+enum {
-+ IFLA_VF_UNSPEC,
-+ IFLA_VF_MAC, /* Hardware queue specific attributes */
-+ IFLA_VF_VLAN,
-+ IFLA_VF_TX_RATE, /* TX Bandwidth Allocation */
-+ __IFLA_VF_MAX,
-+};
-+
-+#define IFLA_VF_MAX (__IFLA_VF_MAX - 1)
-+
-+struct ifla_vf_mac {
-+ __u32 vf;
-+ __u8 mac[32]; /* MAX_ADDR_LEN */
-+};
-+
-+struct ifla_vf_vlan {
-+ __u32 vf;
-+ __u32 vlan; /* 0 - 4095, 0 disables VLAN filter */
-+ __u32 qos;
-+};
-+
-+struct ifla_vf_tx_rate {
-+ __u32 vf;
-+ __u32 rate; /* Max TX bandwidth in Mbps, 0 disables throttling */
-+};
-+
-+struct ifla_vf_info {
-+ __u32 vf;
-+ __u8 mac[32];
-+ __u32 vlan;
-+ __u32 qos;
-+ __u32 tx_rate;
-+};
- #endif /* _LINUX_IF_LINK_H */
-diff --git a/include/linux/mm.h b/include/linux/mm.h
-index 11e5be6..4c98621 100644
---- a/include/linux/mm.h
-+++ b/include/linux/mm.h
-@@ -109,6 +109,12 @@ extern unsigned int kobjsize(const void *objp);
- #define VM_SAO 0x20000000 /* Strong Access Ordering (powerpc) */
- #define VM_PFN_AT_MMAP 0x40000000 /* PFNMAP vma that is fully mapped at mmap time */
- #define VM_MERGEABLE 0x80000000 /* KSM may merge identical pages */
-+#ifdef CONFIG_XEN
-+#define VM_FOREIGN 0x20000000 /* Has pages belonging to another VM */
-+struct vm_foreign_map {
-+ struct page **map;
-+};
-+#endif
-
- #ifndef VM_STACK_DEFAULT_FLAGS /* arch can override this */
- #define VM_STACK_DEFAULT_FLAGS VM_DATA_DEFAULT_FLAGS
-@@ -199,6 +205,11 @@ struct vm_operations_struct {
- */
- int (*access)(struct vm_area_struct *vma, unsigned long addr,
- void *buf, int len, int write);
-+
-+ /* Area-specific function for clearing the PTE at @ptep. Returns the
-+ * original value of @ptep. */
-+ pte_t (*zap_pte)(struct vm_area_struct *vma,
-+ unsigned long addr, pte_t *ptep, int is_fullmm);
- #ifdef CONFIG_NUMA
- /*
- * set_policy() op must add a reference to any non-NULL @new mempolicy
-diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
-index ec12f8c..3f4991c 100644
---- a/include/linux/netdevice.h
-+++ b/include/linux/netdevice.h
-@@ -28,6 +28,7 @@
- #include <linux/if.h>
- #include <linux/if_ether.h>
- #include <linux/if_packet.h>
-+#include <linux/if_link.h>
-
- #ifdef __KERNEL__
- #include <linux/timer.h>
-@@ -577,6 +578,13 @@ struct netdev_queue {
- * this function is called when a VLAN id is unregistered.
- *
- * void (*ndo_poll_controller)(struct net_device *dev);
-+ *
-+ * SR-IOV management functions.
-+ * int (*ndo_set_vf_mac)(struct net_device *dev, int vf, u8* mac);
-+ * int (*ndo_set_vf_vlan)(struct net_device *dev, int vf, u16 vlan, u8 qos);
-+ * int (*ndo_set_vf_tx_rate)(struct net_device *dev, int vf, int rate);
-+ * int (*ndo_get_vf_config)(struct net_device *dev,
-+ * int vf, struct ifla_vf_info *ivf);
- */
- #define HAVE_NET_DEVICE_OPS
- struct net_device_ops {
-@@ -626,6 +634,15 @@ struct net_device_ops {
- #define HAVE_NETDEV_POLL
- void (*ndo_poll_controller)(struct net_device *dev);
- #endif
-+ int (*ndo_set_vf_mac)(struct net_device *dev,
-+ int queue, u8 *mac);
-+ int (*ndo_set_vf_vlan)(struct net_device *dev,
-+ int queue, u16 vlan, u8 qos);
-+ int (*ndo_set_vf_tx_rate)(struct net_device *dev,
-+ int vf, int rate);
-+ int (*ndo_get_vf_config)(struct net_device *dev,
-+ int vf,
-+ struct ifla_vf_info *ivf);
- #if defined(CONFIG_FCOE) || defined(CONFIG_FCOE_MODULE)
- int (*ndo_fcoe_enable)(struct net_device *dev);
- int (*ndo_fcoe_disable)(struct net_device *dev);
-diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
-index 6b202b1..b03950e 100644
---- a/include/linux/page-flags.h
-+++ b/include/linux/page-flags.h
-@@ -105,6 +105,9 @@ enum pageflags {
- #ifdef CONFIG_ARCH_USES_PG_UNCACHED
- PG_uncached, /* Page has been mapped as uncached */
- #endif
-+#ifdef CONFIG_XEN
-+ PG_foreign,
-+#endif
- #ifdef CONFIG_MEMORY_FAILURE
- PG_hwpoison, /* hardware poisoned page. Don't touch */
- #endif
-@@ -275,6 +278,23 @@ PAGEFLAG(Uncached, uncached)
- PAGEFLAG_FALSE(Uncached)
- #endif
-
-+#ifdef CONFIG_XEN
-+TESTPAGEFLAG(Foreign, foreign)
-+__SETPAGEFLAG(Foreign, foreign)
-+CLEARPAGEFLAG(Foreign, foreign)
-+#define SetPageForeign(_page, dtor) do { \
-+ __SetPageForeign(_page); \
-+ BUG_ON((dtor) == (void (*)(struct page *, unsigned int))0); \
-+ (_page)->index = (long)(dtor); \
-+} while (0)
-+#define _PageForeignDestructor(_page) \
-+ ((void (*)(struct page *, unsigned int))(_page)->index)
-+#define PageForeignDestructor(_page, order) \
-+ _PageForeignDestructor(_page)(_page, order)
-+#else
-+PAGEFLAG_FALSE(Foreign)
-+#endif
-+
- #ifdef CONFIG_MEMORY_FAILURE
- PAGEFLAG(HWPoison, hwpoison)
- TESTSETFLAG(HWPoison, hwpoison)
-diff --git a/include/linux/pci.h b/include/linux/pci.h
-index e07d194..ca28e46 100644
---- a/include/linux/pci.h
-+++ b/include/linux/pci.h
-@@ -609,6 +609,9 @@ extern void pci_remove_bus_device(struct pci_dev *dev);
- extern void pci_stop_bus_device(struct pci_dev *dev);
- void pci_setup_cardbus(struct pci_bus *bus);
- extern void pci_sort_breadthfirst(void);
-+#define dev_is_pci(d) ((d)->bus == &pci_bus_type)
-+#define dev_is_pf(d) ((dev_is_pci(d) ? to_pci_dev(d)->is_physfn : false))
-+#define dev_num_vf(d) ((dev_is_pci(d) ? pci_num_vf(to_pci_dev(d)) : 0))
-
- /* Generic PCI functions exported to card drivers */
-
-@@ -1124,6 +1127,9 @@ static inline struct pci_dev *pci_get_bus_and_slot(unsigned int bus,
- unsigned int devfn)
- { return NULL; }
-
-+#define dev_is_pci(d) (false)
-+#define dev_is_pf(d) (false)
-+#define dev_num_vf(d) (0)
- #endif /* CONFIG_PCI */
-
- /* Include architecture-dependent settings and functions */
-@@ -1279,6 +1285,7 @@ void __iomem *pci_ioremap_bar(struct pci_dev *pdev, int bar);
- extern int pci_enable_sriov(struct pci_dev *dev, int nr_virtfn);
- extern void pci_disable_sriov(struct pci_dev *dev);
- extern irqreturn_t pci_sriov_migration(struct pci_dev *dev);
-+extern int pci_num_vf(struct pci_dev *dev);
- #else
- static inline int pci_enable_sriov(struct pci_dev *dev, int nr_virtfn)
- {
-@@ -1291,6 +1298,10 @@ static inline irqreturn_t pci_sriov_migration(struct pci_dev *dev)
- {
- return IRQ_NONE;
- }
-+static inline int pci_num_vf(struct pci_dev *dev)
-+{
-+ return 0;
-+}
- #endif
-
- #if defined(CONFIG_HOTPLUG_PCI) || defined(CONFIG_HOTPLUG_PCI_MODULE)
-diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
-index fe2f4ee..b72b9e6 100644
---- a/include/linux/pci_ids.h
-+++ b/include/linux/pci_ids.h
-@@ -2717,3 +2717,6 @@
- #define PCI_DEVICE_ID_RME_DIGI32 0x9896
- #define PCI_DEVICE_ID_RME_DIGI32_PRO 0x9897
- #define PCI_DEVICE_ID_RME_DIGI32_8 0x9898
-+
-+#define PCI_VENDOR_ID_XEN 0x5853
-+#define PCI_DEVICE_ID_XEN_PLATFORM 0x0001
-diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h
-index 73b1f1c..113585a 100644
---- a/include/linux/swiotlb.h
-+++ b/include/linux/swiotlb.h
-@@ -7,6 +7,8 @@ struct device;
- struct dma_attrs;
- struct scatterlist;
-
-+extern int swiotlb_force;
-+
- /*
- * Maximum allowable number of contiguous slabs to map,
- * must be a power of 2. What is the appropriate value ?
-@@ -20,9 +22,46 @@ struct scatterlist;
- */
- #define IO_TLB_SHIFT 11
-
--extern void
--swiotlb_init(void);
--
-+/* swiotlb-core.c */
-+extern void swiotlb_init(int verbose);
-+#ifdef CONFIG_SWIOTLB
-+extern void __init swiotlb_free(void);
-+#else
-+static inline void swiotlb_free(void) { }
-+#endif
-+extern void swiotlb_print_info(void);
-+
-+/* swiotlb-core.c: Internal book-keeping functions.
-+ * Must be linked against the library to take advantage of them.*/
-+#ifdef CONFIG_SWIOTLB
-+/*
-+ * Enumeration for sync targets
-+ */
-+enum dma_sync_target {
-+ SYNC_FOR_CPU = 0,
-+ SYNC_FOR_DEVICE = 1,
-+};
-+extern char *io_tlb_start;
-+extern char *io_tlb_end;
-+extern unsigned long io_tlb_nslabs;
-+extern void *io_tlb_overflow_buffer;
-+extern unsigned long io_tlb_overflow;
-+extern int is_swiotlb_buffer(phys_addr_t paddr);
-+extern void swiotlb_bounce(phys_addr_t phys, char *dma_addr, size_t size,
-+ enum dma_data_direction dir);
-+extern void *do_map_single(struct device *hwdev, phys_addr_t phys,
-+ unsigned long start_dma_addr, size_t size, int dir);
-+
-+extern void do_unmap_single(struct device *hwdev, char *dma_addr, size_t size,
-+ int dir);
-+
-+extern void do_sync_single(struct device *hwdev, char *dma_addr, size_t size,
-+ int dir, int target);
-+extern void swiotlb_full(struct device *dev, size_t size, int dir, int do_panic);
-+extern void __init swiotlb_init_early(size_t default_size, int verbose);
-+#endif
-+
-+/* swiotlb.c: dma_ops functions. */
- extern void
- *swiotlb_alloc_coherent(struct device *hwdev, size_t size,
- dma_addr_t *dma_handle, gfp_t flags);
-@@ -88,4 +127,74 @@ swiotlb_dma_mapping_error(struct device *hwdev, dma_addr_t dma_addr);
- extern int
- swiotlb_dma_supported(struct device *hwdev, u64 mask);
-
-+/* swiotlb-xen.c: dma_ops functions. */
-+extern void xen_swiotlb_init(int verbose);
-+extern void
-+*xen_swiotlb_alloc_coherent(struct device *hwdev, size_t size,
-+ dma_addr_t *dma_handle, gfp_t flags);
-+
-+extern void
-+xen_swiotlb_free_coherent(struct device *hwdev, size_t size,
-+ void *vaddr, dma_addr_t dma_handle);
-+
-+extern dma_addr_t xen_swiotlb_map_page(struct device *dev, struct page *page,
-+ unsigned long offset, size_t size,
-+ enum dma_data_direction dir,
-+ struct dma_attrs *attrs);
-+extern void xen_swiotlb_unmap_page(struct device *hwdev, dma_addr_t dev_addr,
-+ size_t size, enum dma_data_direction dir,
-+ struct dma_attrs *attrs);
-+
-+extern int
-+xen_swiotlb_map_sg(struct device *hwdev, struct scatterlist *sg, int nents,
-+ int direction);
-+
-+extern void
-+xen_swiotlb_unmap_sg(struct device *hwdev, struct scatterlist *sg, int nents,
-+ int direction);
-+
-+extern int
-+xen_swiotlb_map_sg_attrs(struct device *hwdev, struct scatterlist *sgl,
-+ int nelems, enum dma_data_direction dir,
-+ struct dma_attrs *attrs);
-+
-+extern void
-+xen_swiotlb_unmap_sg_attrs(struct device *hwdev, struct scatterlist *sgl,
-+ int nelems, enum dma_data_direction dir,
-+ struct dma_attrs *attrs);
-+
-+extern void
-+xen_swiotlb_sync_single_for_cpu(struct device *hwdev, dma_addr_t dev_addr,
-+ size_t size, enum dma_data_direction dir);
-+
-+extern void
-+xen_swiotlb_sync_sg_for_cpu(struct device *hwdev, struct scatterlist *sg,
-+ int nelems, enum dma_data_direction dir);
-+
-+extern void
-+xen_swiotlb_sync_single_for_device(struct device *hwdev, dma_addr_t dev_addr,
-+ size_t size, enum dma_data_direction dir);
-+
-+extern void
-+xen_swiotlb_sync_sg_for_device(struct device *hwdev, struct scatterlist *sg,
-+ int nelems, enum dma_data_direction dir);
-+
-+extern void
-+xen_swiotlb_sync_single_range_for_cpu(struct device *hwdev, dma_addr_t dev_addr,
-+ unsigned long offset, size_t size,
-+ enum dma_data_direction dir);
-+
-+extern void
-+xen_swiotlb_sync_single_range_for_device(struct device *hwdev,
-+ dma_addr_t dev_addr,
-+ unsigned long offset, size_t size,
-+ enum dma_data_direction dir);
-+
-+extern int
-+xen_swiotlb_dma_mapping_error(struct device *hwdev, dma_addr_t dma_addr);
-+
-+extern int
-+xen_swiotlb_dma_supported(struct device *hwdev, u64 mask);
-+
-+
- #endif /* __LINUX_SWIOTLB_H */
-diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h
-index 3c123c3..1a2ba21 100644
---- a/include/linux/vmalloc.h
-+++ b/include/linux/vmalloc.h
-@@ -7,6 +7,8 @@
-
- struct vm_area_struct; /* vma defining user mapping in mm_types.h */
-
-+extern bool vmap_lazy_unmap;
-+
- /* bits in flags of vmalloc's vm_struct below */
- #define VM_IOREMAP 0x00000001 /* ioremap() and friends */
- #define VM_ALLOC 0x00000002 /* vmalloc() */
-diff --git a/include/xen/Kbuild b/include/xen/Kbuild
-index 4e65c16..84ad8f0 100644
---- a/include/xen/Kbuild
-+++ b/include/xen/Kbuild
-@@ -1 +1,2 @@
- header-y += evtchn.h
-+header-y += privcmd.h
-diff --git a/include/xen/acpi.h b/include/xen/acpi.h
-new file mode 100644
-index 0000000..279142d
---- /dev/null
-+++ b/include/xen/acpi.h
-@@ -0,0 +1,106 @@
-+#ifndef _XEN_ACPI_H
-+#define _XEN_ACPI_H
-+
-+#include <linux/types.h>
-+#include <acpi/acpi_drivers.h>
-+#include <acpi/processor.h>
-+#include <xen/xen.h>
-+
-+#ifdef CONFIG_XEN_S3
-+#include <asm/xen/hypervisor.h>
-+
-+static inline bool xen_pv_acpi(void)
-+{
-+ return xen_pv_domain();
-+}
-+#else
-+static inline bool xen_pv_acpi(void)
-+{
-+ return false;
-+}
-+#endif
-+
-+int acpi_notify_hypervisor_state(u8 sleep_state,
-+ u32 pm1a_cnt, u32 pm1b_cnd);
-+
-+/*
-+ * Following are interfaces for xen acpi processor control
-+ */
-+
-+/* Events notified to xen */
-+#define PROCESSOR_PM_INIT 1
-+#define PROCESSOR_PM_CHANGE 2
-+#define PROCESSOR_HOTPLUG 3
-+
-+/* Objects for the PM events */
-+#define PM_TYPE_IDLE 0
-+#define PM_TYPE_PERF 1
-+#define PM_TYPE_THR 2
-+#define PM_TYPE_MAX 3
-+
-+#define XEN_MAX_ACPI_ID 255
-+
-+/* Processor hotplug events */
-+#define HOTPLUG_TYPE_ADD 0
-+#define HOTPLUG_TYPE_REMOVE 1
-+
-+int xen_acpi_processor_init(void);
-+void xen_acpi_processor_exit(void);
-+
-+int xen_acpi_processor_power_init(struct acpi_processor *pr,
-+ struct acpi_device *device);
-+int xen_acpi_processor_cst_has_changed(struct acpi_processor *pr);
-+
-+void xen_arch_acpi_processor_init_pdc(struct acpi_processor *pr);
-+
-+#ifdef CONFIG_CPU_FREQ
-+int xen_acpi_processor_ppc_has_changed(struct acpi_processor *pr);
-+int xen_acpi_processor_get_performance(struct acpi_processor *pr);
-+#else
-+static inline int xen_acpi_processor_ppc_has_changed(struct acpi_processor *pr)
-+{
-+ return acpi_processor_ppc_has_changed(pr);
-+}
-+static inline int xen_acpi_processor_get_performance(struct acpi_processor *pr)
-+{
-+ printk(KERN_WARNING
-+ "Warning: xen_acpi_processor_get_performance not supported\n"
-+ "Consider compiling CPUfreq support into your kernel.\n");
-+ return 0;
-+}
-+#endif
-+
-+#if defined(CONFIG_ACPI_HOTPLUG_MEMORY) || \
-+ defined(CONFIG_ACPI_HOTPLUG_MEMORY_MODULE)
-+int xen_hotadd_memory(struct acpi_memory_device *mem_device);
-+#endif
-+
-+#if defined(CONFIG_ACPI_PROCESSOR_XEN) || \
-+defined(CONFIG_ACPI_PROCESSOR_XEN_MODULE)
-+
-+struct processor_cntl_xen_ops {
-+ /* Transfer processor PM events to xen */
-+int (*pm_ops[PM_TYPE_MAX])(struct acpi_processor *pr, int event);
-+ /* Notify physical processor status to xen */
-+ int (*hotplug)(struct acpi_processor *pr, int type);
-+};
-+
-+extern int processor_cntl_xen_notify(struct acpi_processor *pr,
-+ int event, int type);
-+extern int processor_cntl_xen_power_cache(int cpu, int cx,
-+ struct acpi_power_register *reg);
-+#else
-+
-+static inline int processor_cntl_xen_notify(struct acpi_processor *pr,
-+ int event, int type)
-+{
-+ return 0;
-+}
-+static inline int processor_cntl_xen_power_cache(int cpu, int cx,
-+ struct acpi_power_register *reg)
-+{
-+ return 0;
-+}
-+#endif /* CONFIG_ACPI_PROCESSOR_XEN */
-+
-+#endif /* _XEN_ACPI_H */
-diff --git a/include/xen/balloon.h b/include/xen/balloon.h
-new file mode 100644
-index 0000000..e751514
---- /dev/null
-+++ b/include/xen/balloon.h
-@@ -0,0 +1,8 @@
-+#ifndef _XEN_BALLOON_H
-+#define _XEN_BALLOON_H
-+
-+/* Allocate/free a set of empty pages in low memory (i.e., no RAM mapped). */
-+struct page **alloc_empty_pages_and_pagevec(int nr_pages);
-+void free_empty_pages_and_pagevec(struct page **pagevec, int nr_pages);
-+
-+#endif
-diff --git a/include/xen/blkif.h b/include/xen/blkif.h
-new file mode 100644
-index 0000000..7172081
---- /dev/null
-+++ b/include/xen/blkif.h
-@@ -0,0 +1,123 @@
-+/*
-+ * Permission is hereby granted, free of charge, to any person obtaining a copy
-+ * of this software and associated documentation files (the "Software"), to
-+ * deal in the Software without restriction, including without limitation the
-+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
-+ * sell copies of the Software, and to permit persons to whom the Software is
-+ * furnished to do so, subject to the following conditions:
-+ *
-+ * The above copyright notice and this permission notice shall be included in
-+ * all copies or substantial portions of the Software.
-+ *
-+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
-+ * DEALINGS IN THE SOFTWARE.
-+ */
-+
-+#ifndef __XEN_BLKIF_H__
-+#define __XEN_BLKIF_H__
-+
-+#include <xen/interface/xen.h>
-+#include <xen/interface/io/ring.h>
-+#include <xen/interface/io/blkif.h>
-+#include <xen/interface/io/protocols.h>
-+
-+/* Not a real protocol. Used to generate ring structs which contain
-+ * the elements common to all protocols only. This way we get a
-+ * compiler-checkable way to use common struct elements, so we can
-+ * avoid using switch(protocol) in a number of places. */
-+struct blkif_common_request {
-+ char dummy;
-+};
-+struct blkif_common_response {
-+ char dummy;
-+};
-+
-+/* i386 protocol version */
-+#pragma pack(push, 4)
-+struct blkif_x86_32_request {
-+ uint8_t operation; /* BLKIF_OP_??? */
-+ uint8_t nr_segments; /* number of segments */
-+ blkif_vdev_t handle; /* only for read/write requests */
-+ uint64_t id; /* private guest value, echoed in resp */
-+ blkif_sector_t sector_number;/* start sector idx on disk (r/w only) */
-+ struct blkif_request_segment seg[BLKIF_MAX_SEGMENTS_PER_REQUEST];
-+};
-+struct blkif_x86_32_response {
-+ uint64_t id; /* copied from request */
-+ uint8_t operation; /* copied from request */
-+ int16_t status; /* BLKIF_RSP_??? */
-+};
-+typedef struct blkif_x86_32_request blkif_x86_32_request_t;
-+typedef struct blkif_x86_32_response blkif_x86_32_response_t;
-+#pragma pack(pop)
-+
-+/* x86_64 protocol version */
-+struct blkif_x86_64_request {
-+ uint8_t operation; /* BLKIF_OP_??? */
-+ uint8_t nr_segments; /* number of segments */
-+ blkif_vdev_t handle; /* only for read/write requests */
-+ uint64_t __attribute__((__aligned__(8))) id;
-+ blkif_sector_t sector_number;/* start sector idx on disk (r/w only) */
-+ struct blkif_request_segment seg[BLKIF_MAX_SEGMENTS_PER_REQUEST];
-+};
-+struct blkif_x86_64_response {
-+ uint64_t __attribute__((__aligned__(8))) id;
-+ uint8_t operation; /* copied from request */
-+ int16_t status; /* BLKIF_RSP_??? */
-+};
-+typedef struct blkif_x86_64_request blkif_x86_64_request_t;
-+typedef struct blkif_x86_64_response blkif_x86_64_response_t;
-+
-+DEFINE_RING_TYPES(blkif_common, struct blkif_common_request, struct blkif_common_response);
-+DEFINE_RING_TYPES(blkif_x86_32, struct blkif_x86_32_request, struct blkif_x86_32_response);
-+DEFINE_RING_TYPES(blkif_x86_64, struct blkif_x86_64_request, struct blkif_x86_64_response);
-+
-+union blkif_back_rings {
-+ struct blkif_back_ring native;
-+ struct blkif_common_back_ring common;
-+ struct blkif_x86_32_back_ring x86_32;
-+ struct blkif_x86_64_back_ring x86_64;
-+};
-+
-+enum blkif_protocol {
-+ BLKIF_PROTOCOL_NATIVE = 1,
-+ BLKIF_PROTOCOL_X86_32 = 2,
-+ BLKIF_PROTOCOL_X86_64 = 3,
-+};
-+
-+static void inline blkif_get_x86_32_req(struct blkif_request *dst, struct blkif_x86_32_request *src)
-+{
-+ int i, n = BLKIF_MAX_SEGMENTS_PER_REQUEST;
-+ dst->operation = src->operation;
-+ dst->nr_segments = src->nr_segments;
-+ dst->handle = src->handle;
-+ dst->id = src->id;
-+ dst->sector_number = src->sector_number;
-+ barrier();
-+ if (n > dst->nr_segments)
-+ n = dst->nr_segments;
-+ for (i = 0; i < n; i++)
-+ dst->seg[i] = src->seg[i];
-+}
-+
-+static void inline blkif_get_x86_64_req(struct blkif_request *dst, struct blkif_x86_64_request *src)
-+{
-+ int i, n = BLKIF_MAX_SEGMENTS_PER_REQUEST;
-+ dst->operation = src->operation;
-+ dst->nr_segments = src->nr_segments;
-+ dst->handle = src->handle;
-+ dst->id = src->id;
-+ dst->sector_number = src->sector_number;
-+ barrier();
-+ if (n > dst->nr_segments)
-+ n = dst->nr_segments;
-+ for (i = 0; i < n; i++)
-+ dst->seg[i] = src->seg[i];
-+}
-+
-+#endif /* __XEN_BLKIF_H__ */
-diff --git a/include/xen/events.h b/include/xen/events.h
-index e68d59a..7e17e2a 100644
---- a/include/xen/events.h
-+++ b/include/xen/events.h
-@@ -12,6 +12,8 @@ int bind_evtchn_to_irqhandler(unsigned int evtchn,
- irq_handler_t handler,
- unsigned long irqflags, const char *devname,
- void *dev_id);
-+int bind_virq_to_irq(unsigned int virq, unsigned int cpu);
-+
- int bind_virq_to_irqhandler(unsigned int virq, unsigned int cpu,
- irq_handler_t handler,
- unsigned long irqflags, const char *devname,
-@@ -22,6 +24,12 @@ int bind_ipi_to_irqhandler(enum ipi_vector ipi,
- unsigned long irqflags,
- const char *devname,
- void *dev_id);
-+int bind_interdomain_evtchn_to_irqhandler(unsigned int remote_domain,
-+ unsigned int remote_port,
-+ irq_handler_t handler,
-+ unsigned long irqflags,
-+ const char *devname,
-+ void *dev_id);
-
- /*
- * Common unbind function for all event sources. Takes IRQ to unbind from.
-@@ -53,7 +61,42 @@ bool xen_test_irq_pending(int irq);
- irq will be disabled so it won't deliver an interrupt. */
- void xen_poll_irq(int irq);
-
-+/* Poll waiting for an irq to become pending with a timeout. In the usual case, the
-+ irq will be disabled so it won't deliver an interrupt. */
-+void xen_poll_irq_timeout(int irq, u64 timeout);
-+
- /* Determine the IRQ which is bound to an event channel */
- unsigned irq_from_evtchn(unsigned int evtchn);
-
-+/* Allocate an irq for a physical interrupt, given a gsi. "Legacy"
-+ GSIs are identity mapped; others are dynamically allocated as
-+ usual. */
-+int xen_allocate_pirq(unsigned gsi, int shareable, char *name);
-+
-+/* De-allocates the above mentioned physical interrupt. */
-+int xen_destroy_irq(int irq);
-+
-+/* Return vector allocated to pirq */
-+int xen_vector_from_irq(unsigned pirq);
-+
-+/* Return gsi allocated to pirq */
-+int xen_gsi_from_irq(unsigned pirq);
-+
-+#ifdef CONFIG_XEN_DOM0_PCI
-+void xen_setup_pirqs(void);
-+#else
-+static inline void xen_setup_pirqs(void)
-+{
-+}
-+#endif
-+
-+/* Determine whether to ignore this IRQ if passed to a guest. */
-+int xen_ignore_irq(int irq);
-+/* Xen HVM evtchn vector callback */
-+extern void xen_hvm_callback_vector(void);
-+extern int xen_have_vector_callback;
-+int xen_set_callback_via(uint64_t via);
-+void xen_evtchn_do_upcall(struct pt_regs *regs);
-+void xen_hvm_evtchn_do_upcall(void);
-+
- #endif /* _XEN_EVENTS_H */
-diff --git a/include/xen/gntdev.h b/include/xen/gntdev.h
-new file mode 100644
-index 0000000..8bd1467
---- /dev/null
-+++ b/include/xen/gntdev.h
-@@ -0,0 +1,119 @@
-+/******************************************************************************
-+ * gntdev.h
-+ *
-+ * Interface to /dev/xen/gntdev.
-+ *
-+ * Copyright (c) 2007, D G Murray
-+ *
-+ * This program is free software; you can redistribute it and/or
-+ * modify it under the terms of the GNU General Public License version 2
-+ * as published by the Free Software Foundation; or, when distributed
-+ * separately from the Linux kernel or incorporated into other
-+ * software packages, subject to the following license:
-+ *
-+ * Permission is hereby granted, free of charge, to any person obtaining a copy
-+ * of this source file (the "Software"), to deal in the Software without
-+ * restriction, including without limitation the rights to use, copy, modify,
-+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
-+ * and to permit persons to whom the Software is furnished to do so, subject to
-+ * the following conditions:
-+ *
-+ * The above copyright notice and this permission notice shall be included in
-+ * all copies or substantial portions of the Software.
-+ *
-+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-+ * IN THE SOFTWARE.
-+ */
-+
-+#ifndef __LINUX_PUBLIC_GNTDEV_H__
-+#define __LINUX_PUBLIC_GNTDEV_H__
-+
-+struct ioctl_gntdev_grant_ref {
-+ /* The domain ID of the grant to be mapped. */
-+ uint32_t domid;
-+ /* The grant reference of the grant to be mapped. */
-+ uint32_t ref;
-+};
-+
-+/*
-+ * Inserts the grant references into the mapping table of an instance
-+ * of gntdev. N.B. This does not perform the mapping, which is deferred
-+ * until mmap() is called with @index as the offset.
-+ */
-+#define IOCTL_GNTDEV_MAP_GRANT_REF \
-+_IOC(_IOC_NONE, 'G', 0, sizeof(struct ioctl_gntdev_map_grant_ref))
-+struct ioctl_gntdev_map_grant_ref {
-+ /* IN parameters */
-+ /* The number of grants to be mapped. */
-+ uint32_t count;
-+ uint32_t pad;
-+ /* OUT parameters */
-+ /* The offset to be used on a subsequent call to mmap(). */
-+ uint64_t index;
-+ /* Variable IN parameter. */
-+ /* Array of grant references, of size @count. */
-+ struct ioctl_gntdev_grant_ref refs[1];
-+};
-+
-+/*
-+ * Removes the grant references from the mapping table of an instance of
-+ * of gntdev. N.B. munmap() must be called on the relevant virtual address(es)
-+ * before this ioctl is called, or an error will result.
-+ */
-+#define IOCTL_GNTDEV_UNMAP_GRANT_REF \
-+_IOC(_IOC_NONE, 'G', 1, sizeof(struct ioctl_gntdev_unmap_grant_ref))
-+struct ioctl_gntdev_unmap_grant_ref {
-+ /* IN parameters */
-+ /* The offset was returned by the corresponding map operation. */
-+ uint64_t index;
-+ /* The number of pages to be unmapped. */
-+ uint32_t count;
-+ uint32_t pad;
-+};
-+
-+/*
-+ * Returns the offset in the driver's address space that corresponds
-+ * to @vaddr. This can be used to perform a munmap(), followed by an
-+ * UNMAP_GRANT_REF ioctl, where no state about the offset is retained by
-+ * the caller. The number of pages that were allocated at the same time as
-+ * @vaddr is returned in @count.
-+ *
-+ * N.B. Where more than one page has been mapped into a contiguous range, the
-+ * supplied @vaddr must correspond to the start of the range; otherwise
-+ * an error will result. It is only possible to munmap() the entire
-+ * contiguously-allocated range at once, and not any subrange thereof.
-+ */
-+#define IOCTL_GNTDEV_GET_OFFSET_FOR_VADDR \
-+_IOC(_IOC_NONE, 'G', 2, sizeof(struct ioctl_gntdev_get_offset_for_vaddr))
-+struct ioctl_gntdev_get_offset_for_vaddr {
-+ /* IN parameters */
-+ /* The virtual address of the first mapped page in a range. */
-+ uint64_t vaddr;
-+ /* OUT parameters */
-+ /* The offset that was used in the initial mmap() operation. */
-+ uint64_t offset;
-+ /* The number of pages mapped in the VM area that begins at @vaddr. */
-+ uint32_t count;
-+ uint32_t pad;
-+};
-+
-+/*
-+ * Sets the maximum number of grants that may mapped at once by this gntdev
-+ * instance.
-+ *
-+ * N.B. This must be called before any other ioctl is performed on the device.
-+ */
-+#define IOCTL_GNTDEV_SET_MAX_GRANTS \
-+_IOC(_IOC_NONE, 'G', 3, sizeof(struct ioctl_gntdev_set_max_grants))
-+struct ioctl_gntdev_set_max_grants {
-+ /* IN parameter */
-+ /* The maximum number of grants that may be mapped at once. */
-+ uint32_t count;
-+};
-+
-+#endif /* __LINUX_PUBLIC_GNTDEV_H__ */
-diff --git a/include/xen/grant_table.h b/include/xen/grant_table.h
-index a40f1cd..871b553 100644
---- a/include/xen/grant_table.h
-+++ b/include/xen/grant_table.h
-@@ -37,10 +37,16 @@
- #ifndef __ASM_GNTTAB_H__
- #define __ASM_GNTTAB_H__
-
--#include <asm/xen/hypervisor.h>
-+#include <asm/page.h>
-+
-+#include <xen/interface/xen.h>
- #include <xen/interface/grant_table.h>
-+
-+#include <asm/xen/hypervisor.h>
- #include <asm/xen/grant_table.h>
-
-+#include <xen/features.h>
-+
- /* NR_GRANT_FRAMES must be less than or equal to that configured in Xen */
- #define NR_GRANT_FRAMES 4
-
-@@ -51,6 +57,9 @@ struct gnttab_free_callback {
- u16 count;
- };
-
-+void gnttab_reset_grant_page(struct page *page);
-+
-+int gnttab_init(void);
- int gnttab_suspend(void);
- int gnttab_resume(void);
-
-@@ -80,6 +89,8 @@ unsigned long gnttab_end_foreign_transfer(grant_ref_t ref);
-
- int gnttab_query_foreign_access(grant_ref_t ref);
-
-+int gnttab_copy_grant_page(grant_ref_t ref, struct page **pagep);
-+
- /*
- * operations on reserved batches of grant references
- */
-@@ -106,12 +117,46 @@ void gnttab_grant_foreign_access_ref(grant_ref_t ref, domid_t domid,
- void gnttab_grant_foreign_transfer_ref(grant_ref_t, domid_t domid,
- unsigned long pfn);
-
-+static inline void
-+gnttab_set_map_op(struct gnttab_map_grant_ref *map, phys_addr_t addr,
-+ uint32_t flags, grant_ref_t ref, domid_t domid)
-+{
-+ if (flags & GNTMAP_contains_pte)
-+ map->host_addr = addr;
-+ else if (xen_feature(XENFEAT_auto_translated_physmap))
-+ map->host_addr = __pa(addr);
-+ else
-+ map->host_addr = addr;
-+
-+ map->flags = flags;
-+ map->ref = ref;
-+ map->dom = domid;
-+}
-+
-+static inline void
-+gnttab_set_unmap_op(struct gnttab_unmap_grant_ref *unmap, phys_addr_t addr,
-+ uint32_t flags, grant_handle_t handle)
-+{
-+ if (flags & GNTMAP_contains_pte)
-+ unmap->host_addr = addr;
-+ else if (xen_feature(XENFEAT_auto_translated_physmap))
-+ unmap->host_addr = __pa(addr);
-+ else
-+ unmap->host_addr = addr;
-+
-+ unmap->handle = handle;
-+ unmap->dev_bus_addr = 0;
-+}
-+
- int arch_gnttab_map_shared(unsigned long *frames, unsigned long nr_gframes,
- unsigned long max_nr_gframes,
- struct grant_entry **__shared);
- void arch_gnttab_unmap_shared(struct grant_entry *shared,
- unsigned long nr_gframes);
-
-+extern unsigned long xen_hvm_resume_frames;
-+unsigned int gnttab_max_grant_frames(void);
-+
- #define gnttab_map_vaddr(map) ((void *)(map.host_virt_addr))
-
- #endif /* __ASM_GNTTAB_H__ */
-diff --git a/include/xen/hvm.h b/include/xen/hvm.h
-new file mode 100644
-index 0000000..b193fa2
---- /dev/null
-+++ b/include/xen/hvm.h
-@@ -0,0 +1,30 @@
-+/* Simple wrappers around HVM functions */
-+#ifndef XEN_HVM_H__
-+#define XEN_HVM_H__
-+
-+#include <xen/interface/hvm/params.h>
-+#include <asm/xen/hypercall.h>
-+
-+static inline int hvm_get_parameter(int idx, uint64_t *value)
-+{
-+ struct xen_hvm_param xhv;
-+ int r;
-+
-+ xhv.domid = DOMID_SELF;
-+ xhv.index = idx;
-+ r = HYPERVISOR_hvm_op(HVMOP_get_param, &xhv);
-+ if (r < 0) {
-+ printk(KERN_ERR "Cannot get hvm parameter %d: %d!\n",
-+ idx, r);
-+ return r;
-+ }
-+ *value = xhv.value;
-+ return r;
-+}
-+
-+#define HVM_CALLBACK_VIA_TYPE_VECTOR 0x2
-+#define HVM_CALLBACK_VIA_TYPE_SHIFT 56
-+#define HVM_CALLBACK_VECTOR(x) (((uint64_t)HVM_CALLBACK_VIA_TYPE_VECTOR)<<\
-+ HVM_CALLBACK_VIA_TYPE_SHIFT | (x))
-+
-+#endif /* XEN_HVM_H__ */
-diff --git a/include/xen/interface/features.h b/include/xen/interface/features.h
-index f51b641..70d2563 100644
---- a/include/xen/interface/features.h
-+++ b/include/xen/interface/features.h
-@@ -41,6 +41,12 @@
- /* x86: Does this Xen host support the MMU_PT_UPDATE_PRESERVE_AD hypercall? */
- #define XENFEAT_mmu_pt_update_preserve_ad 5
-
-+/* x86: Does this Xen host support the HVM callback vector type? */
-+#define XENFEAT_hvm_callback_vector 8
-+
-+/* x86: pvclock algorithm is safe to use on HVM */
-+#define XENFEAT_hvm_safe_pvclock 9
-+
- #define XENFEAT_NR_SUBMAPS 1
-
- #endif /* __XEN_PUBLIC_FEATURES_H__ */
-diff --git a/include/xen/interface/grant_table.h b/include/xen/interface/grant_table.h
-index 39da93c..c704fe5 100644
---- a/include/xen/interface/grant_table.h
-+++ b/include/xen/interface/grant_table.h
-@@ -28,6 +28,7 @@
- #ifndef __XEN_PUBLIC_GRANT_TABLE_H__
- #define __XEN_PUBLIC_GRANT_TABLE_H__
-
-+#include <xen/interface/xen.h>
-
- /***********************************
- * GRANT TABLE REPRESENTATION
-@@ -321,6 +322,28 @@ struct gnttab_query_size {
- DEFINE_GUEST_HANDLE_STRUCT(gnttab_query_size);
-
- /*
-+ * GNTTABOP_unmap_and_replace: Destroy one or more grant-reference mappings
-+ * tracked by <handle> but atomically replace the page table entry with one
-+ * pointing to the machine address under <new_addr>. <new_addr> will be
-+ * redirected to the null entry.
-+ * NOTES:
-+ * 1. The call may fail in an undefined manner if either mapping is not
-+ * tracked by <handle>.
-+ * 2. After executing a batch of unmaps, it is guaranteed that no stale
-+ * mappings will remain in the device or host TLBs.
-+ */
-+#define GNTTABOP_unmap_and_replace 7
-+struct gnttab_unmap_and_replace {
-+ /* IN parameters. */
-+ uint64_t host_addr;
-+ uint64_t new_addr;
-+ grant_handle_t handle;
-+ /* OUT parameters. */
-+ int16_t status; /* GNTST_* */
-+};
-+DEFINE_GUEST_HANDLE_STRUCT(gnttab_unmap_and_replace);
-+
-+/*
- * Bitfield values for update_pin_status.flags.
- */
- /* Map the grant entry for access by I/O devices. */
-diff --git a/include/xen/interface/hvm/hvm_op.h b/include/xen/interface/hvm/hvm_op.h
-new file mode 100644
-index 0000000..a4827f4
---- /dev/null
-+++ b/include/xen/interface/hvm/hvm_op.h
-@@ -0,0 +1,46 @@
-+/*
-+ * Permission is hereby granted, free of charge, to any person obtaining a copy
-+ * of this software and associated documentation files (the "Software"), to
-+ * deal in the Software without restriction, including without limitation the
-+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
-+ * sell copies of the Software, and to permit persons to whom the Software is
-+ * furnished to do so, subject to the following conditions:
-+ *
-+ * The above copyright notice and this permission notice shall be included in
-+ * all copies or substantial portions of the Software.
-+ *
-+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
-+ * DEALINGS IN THE SOFTWARE.
-+ */
-+
-+#ifndef __XEN_PUBLIC_HVM_HVM_OP_H__
-+#define __XEN_PUBLIC_HVM_HVM_OP_H__
-+
-+/* Get/set subcommands: the second argument of the hypercall is a
-+ * pointer to a xen_hvm_param struct. */
-+#define HVMOP_set_param 0
-+#define HVMOP_get_param 1
-+struct xen_hvm_param {
-+ domid_t domid; /* IN */
-+ uint32_t index; /* IN */
-+ uint64_t value; /* IN/OUT */
-+};
-+DEFINE_GUEST_HANDLE_STRUCT(xen_hvm_param);
-+
-+/* Hint from PV drivers for pagetable destruction. */
-+#define HVMOP_pagetable_dying 9
-+struct xen_hvm_pagetable_dying {
-+ /* Domain with a pagetable about to be destroyed. */
-+ domid_t domid;
-+ /* guest physical address of the toplevel pagetable dying */
-+ aligned_u64 gpa;
-+};
-+typedef struct xen_hvm_pagetable_dying xen_hvm_pagetable_dying_t;
-+DEFINE_GUEST_HANDLE_STRUCT(xen_hvm_pagetable_dying_t);
-+
-+#endif /* __XEN_PUBLIC_HVM_HVM_OP_H__ */
-diff --git a/include/xen/interface/hvm/params.h b/include/xen/interface/hvm/params.h
-new file mode 100644
-index 0000000..1888d8c
---- /dev/null
-+++ b/include/xen/interface/hvm/params.h
-@@ -0,0 +1,95 @@
-+/*
-+ * Permission is hereby granted, free of charge, to any person obtaining a copy
-+ * of this software and associated documentation files (the "Software"), to
-+ * deal in the Software without restriction, including without limitation the
-+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
-+ * sell copies of the Software, and to permit persons to whom the Software is
-+ * furnished to do so, subject to the following conditions:
-+ *
-+ * The above copyright notice and this permission notice shall be included in
-+ * all copies or substantial portions of the Software.
-+ *
-+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
-+ * DEALINGS IN THE SOFTWARE.
-+ */
-+
-+#ifndef __XEN_PUBLIC_HVM_PARAMS_H__
-+#define __XEN_PUBLIC_HVM_PARAMS_H__
-+
-+#include "hvm_op.h"
-+
-+/*
-+ * Parameter space for HVMOP_{set,get}_param.
-+ */
-+
-+/*
-+ * How should CPU0 event-channel notifications be delivered?
-+ * val[63:56] == 0: val[55:0] is a delivery GSI (Global System Interrupt).
-+ * val[63:56] == 1: val[55:0] is a delivery PCI INTx line, as follows:
-+ * Domain = val[47:32], Bus = val[31:16],
-+ * DevFn = val[15: 8], IntX = val[ 1: 0]
-+ * val[63:56] == 2: val[7:0] is a vector number.
-+ * If val == 0 then CPU0 event-channel notifications are not delivered.
-+ */
-+#define HVM_PARAM_CALLBACK_IRQ 0
-+
-+#define HVM_PARAM_STORE_PFN 1
-+#define HVM_PARAM_STORE_EVTCHN 2
-+
-+#define HVM_PARAM_PAE_ENABLED 4
-+
-+#define HVM_PARAM_IOREQ_PFN 5
-+
-+#define HVM_PARAM_BUFIOREQ_PFN 6
-+
-+/*
-+ * Set mode for virtual timers (currently x86 only):
-+ * delay_for_missed_ticks (default):
-+ * Do not advance a vcpu's time beyond the correct delivery time for
-+ * interrupts that have been missed due to preemption. Deliver missed
-+ * interrupts when the vcpu is rescheduled and advance the vcpu's virtual
-+ * time stepwise for each one.
-+ * no_delay_for_missed_ticks:
-+ * As above, missed interrupts are delivered, but guest time always tracks
-+ * wallclock (i.e., real) time while doing so.
-+ * no_missed_ticks_pending:
-+ * No missed interrupts are held pending. Instead, to ensure ticks are
-+ * delivered at some non-zero rate, if we detect missed ticks then the
-+ * internal tick alarm is not disabled if the VCPU is preempted during the
-+ * next tick period.
-+ * one_missed_tick_pending:
-+ * Missed interrupts are collapsed together and delivered as one 'late tick'.
-+ * Guest time always tracks wallclock (i.e., real) time.
-+ */
-+#define HVM_PARAM_TIMER_MODE 10
-+#define HVMPTM_delay_for_missed_ticks 0
-+#define HVMPTM_no_delay_for_missed_ticks 1
-+#define HVMPTM_no_missed_ticks_pending 2
-+#define HVMPTM_one_missed_tick_pending 3
-+
-+/* Boolean: Enable virtual HPET (high-precision event timer)? (x86-only) */
-+#define HVM_PARAM_HPET_ENABLED 11
-+
-+/* Identity-map page directory used by Intel EPT when CR0.PG=0. */
-+#define HVM_PARAM_IDENT_PT 12
-+
-+/* Device Model domain, defaults to 0. */
-+#define HVM_PARAM_DM_DOMAIN 13
-+
-+/* ACPI S state: currently support S0 and S3 on x86. */
-+#define HVM_PARAM_ACPI_S_STATE 14
-+
-+/* TSS used on Intel when CR0.PE=0. */
-+#define HVM_PARAM_VM86_TSS 15
-+
-+/* Boolean: Enable aligning all periodic vpts to reduce interrupts */
-+#define HVM_PARAM_VPT_ALIGN 16
-+
-+#define HVM_NR_PARAMS 17
-+
-+#endif /* __XEN_PUBLIC_HVM_PARAMS_H__ */
-diff --git a/include/xen/interface/io/blkif.h b/include/xen/interface/io/blkif.h
-index c2d1fa4..68dd2b4 100644
---- a/include/xen/interface/io/blkif.h
-+++ b/include/xen/interface/io/blkif.h
-@@ -91,4 +91,25 @@ DEFINE_RING_TYPES(blkif, struct blkif_request, struct blkif_response);
- #define VDISK_REMOVABLE 0x2
- #define VDISK_READONLY 0x4
-
-+/* Xen-defined major numbers for virtual disks, they look strangely
-+ * familiar */
-+#define XEN_IDE0_MAJOR 3
-+#define XEN_IDE1_MAJOR 22
-+#define XEN_SCSI_DISK0_MAJOR 8
-+#define XEN_SCSI_DISK1_MAJOR 65
-+#define XEN_SCSI_DISK2_MAJOR 66
-+#define XEN_SCSI_DISK3_MAJOR 67
-+#define XEN_SCSI_DISK4_MAJOR 68
-+#define XEN_SCSI_DISK5_MAJOR 69
-+#define XEN_SCSI_DISK6_MAJOR 70
-+#define XEN_SCSI_DISK7_MAJOR 71
-+#define XEN_SCSI_DISK8_MAJOR 128
-+#define XEN_SCSI_DISK9_MAJOR 129
-+#define XEN_SCSI_DISK10_MAJOR 130
-+#define XEN_SCSI_DISK11_MAJOR 131
-+#define XEN_SCSI_DISK12_MAJOR 132
-+#define XEN_SCSI_DISK13_MAJOR 133
-+#define XEN_SCSI_DISK14_MAJOR 134
-+#define XEN_SCSI_DISK15_MAJOR 135
-+
- #endif /* __XEN_PUBLIC_IO_BLKIF_H__ */
-diff --git a/include/xen/interface/io/netif.h b/include/xen/interface/io/netif.h
-index 518481c..8309344 100644
---- a/include/xen/interface/io/netif.h
-+++ b/include/xen/interface/io/netif.h
-@@ -131,6 +131,10 @@ struct xen_netif_rx_request {
- #define _NETRXF_extra_info (3)
- #define NETRXF_extra_info (1U<<_NETRXF_extra_info)
-
-+/* GSO Prefix descriptor. */
-+#define _NETRXF_gso_prefix (4)
-+#define NETRXF_gso_prefix (1U<<_NETRXF_gso_prefix)
-+
- struct xen_netif_rx_response {
- uint16_t id;
- uint16_t offset; /* Offset in page of start of received packet */
-diff --git a/include/xen/interface/io/pciif.h b/include/xen/interface/io/pciif.h
-new file mode 100644
-index 0000000..c4177f3
---- /dev/null
-+++ b/include/xen/interface/io/pciif.h
-@@ -0,0 +1,124 @@
-+/*
-+ * PCI Backend/Frontend Common Data Structures & Macros
-+ *
-+ * Permission is hereby granted, free of charge, to any person obtaining a copy
-+ * of this software and associated documentation files (the "Software"), to
-+ * deal in the Software without restriction, including without limitation the
-+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
-+ * sell copies of the Software, and to permit persons to whom the Software is
-+ * furnished to do so, subject to the following conditions:
-+ *
-+ * The above copyright notice and this permission notice shall be included in
-+ * all copies or substantial portions of the Software.
-+ *
-+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
-+ * DEALINGS IN THE SOFTWARE.
-+ *
-+ * Author: Ryan Wilson <hap9@epoch.ncsc.mil>
-+ */
-+#ifndef __XEN_PCI_COMMON_H__
-+#define __XEN_PCI_COMMON_H__
-+
-+/* Be sure to bump this number if you change this file */
-+#define XEN_PCI_MAGIC "7"
-+
-+/* xen_pci_sharedinfo flags */
-+#define _XEN_PCIF_active (0)
-+#define XEN_PCIF_active (1<<_XEN_PCIF_active)
-+#define _XEN_PCIB_AERHANDLER (1)
-+#define XEN_PCIB_AERHANDLER (1<<_XEN_PCIB_AERHANDLER)
-+#define _XEN_PCIB_active (2)
-+#define XEN_PCIB_active (1<<_XEN_PCIB_active)
-+
-+/* xen_pci_op commands */
-+#define XEN_PCI_OP_conf_read (0)
-+#define XEN_PCI_OP_conf_write (1)
-+#define XEN_PCI_OP_enable_msi (2)
-+#define XEN_PCI_OP_disable_msi (3)
-+#define XEN_PCI_OP_enable_msix (4)
-+#define XEN_PCI_OP_disable_msix (5)
-+#define XEN_PCI_OP_aer_detected (6)
-+#define XEN_PCI_OP_aer_resume (7)
-+#define XEN_PCI_OP_aer_mmio (8)
-+#define XEN_PCI_OP_aer_slotreset (9)
-+
-+/* xen_pci_op error numbers */
-+#define XEN_PCI_ERR_success (0)
-+#define XEN_PCI_ERR_dev_not_found (-1)
-+#define XEN_PCI_ERR_invalid_offset (-2)
-+#define XEN_PCI_ERR_access_denied (-3)
-+#define XEN_PCI_ERR_not_implemented (-4)
-+/* XEN_PCI_ERR_op_failed - backend failed to complete the operation */
-+#define XEN_PCI_ERR_op_failed (-5)
-+
-+/*
-+ * it should be PAGE_SIZE-sizeof(struct xen_pci_op))/sizeof(struct msix_entry))
-+ * Should not exceed 128
-+ */
-+#define SH_INFO_MAX_VEC 128
-+
-+struct xen_msix_entry {
-+ uint16_t vector;
-+ uint16_t entry;
-+};
-+struct xen_pci_op {
-+ /* IN: what action to perform: XEN_PCI_OP_* */
-+ uint32_t cmd;
-+
-+ /* OUT: will contain an error number (if any) from errno.h */
-+ int32_t err;
-+
-+ /* IN: which device to touch */
-+ uint32_t domain; /* PCI Domain/Segment */
-+ uint32_t bus;
-+ uint32_t devfn;
-+
-+ /* IN: which configuration registers to touch */
-+ int32_t offset;
-+ int32_t size;
-+
-+ /* IN/OUT: Contains the result after a READ or the value to WRITE */
-+ uint32_t value;
-+ /* IN: Contains extra infor for this operation */
-+ uint32_t info;
-+ /*IN: param for msi-x */
-+ struct xen_msix_entry msix_entries[SH_INFO_MAX_VEC];
-+};
-+
-+/*used for pcie aer handling*/
-+struct xen_pcie_aer_op
-+{
-+
-+ /* IN: what action to perform: XEN_PCI_OP_* */
-+ uint32_t cmd;
-+ /*IN/OUT: return aer_op result or carry error_detected state as input*/
-+ int32_t err;
-+
-+ /* IN: which device to touch */
-+ uint32_t domain; /* PCI Domain/Segment*/
-+ uint32_t bus;
-+ uint32_t devfn;
-+};
-+struct xen_pci_sharedinfo {
-+ /* flags - XEN_PCIF_* */
-+ uint32_t flags;
-+ struct xen_pci_op op;
-+ struct xen_pcie_aer_op aer_op;
-+};
-+
-+#endif /* __XEN_PCI_COMMON_H__ */
-+
-+/*
-+ * Local variables:
-+ * mode: C
-+ * c-set-style: "BSD"
-+ * c-basic-offset: 4
-+ * tab-width: 4
-+ * indent-tabs-mode: nil
-+ * End:
-+ */
-diff --git a/include/xen/interface/io/ring.h b/include/xen/interface/io/ring.h
-index e8cbf43..c9ba846 100644
---- a/include/xen/interface/io/ring.h
-+++ b/include/xen/interface/io/ring.h
-@@ -24,8 +24,15 @@ typedef unsigned int RING_IDX;
- * A ring contains as many entries as will fit, rounded down to the nearest
- * power of two (so we can mask with (size-1) to loop around).
- */
--#define __RING_SIZE(_s, _sz) \
-- (__RD32(((_sz) - (long)&(_s)->ring + (long)(_s)) / sizeof((_s)->ring[0])))
-+#define __CONST_RING_SIZE(_s, _sz) \
-+ (__RD32(((_sz) - offsetof(struct _s##_sring, ring)) / \
-+ sizeof(((struct _s##_sring *)0)->ring[0])))
-+
-+/*
-+ * The same for passing in an actual pointer instead of a name tag.
-+ */
-+#define __RING_SIZE(_s, _sz) \
-+ (__RD32(((_sz) - (long)&(_s)->ring + (long)(_s)) / sizeof((_s)->ring[0])))
-
- /*
- * Macros to make the correct C datatypes for a new kind of ring.
-@@ -73,7 +80,16 @@ union __name##_sring_entry { \
- struct __name##_sring { \
- RING_IDX req_prod, req_event; \
- RING_IDX rsp_prod, rsp_event; \
-- uint8_t pad[48]; \
-+ union { \
-+ struct { \
-+ uint8_t smartpoll_active; \
-+ } netif; \
-+ struct { \
-+ uint8_t msg; \
-+ } tapif_user; \
-+ uint8_t pvt_pad[4]; \
-+ } private; \
-+ uint8_t pad[44]; \
- union __name##_sring_entry ring[1]; /* variable-length */ \
- }; \
- \
-diff --git a/include/xen/interface/io/xenbus.h b/include/xen/interface/io/xenbus.h
-index 46508c7..9fda532 100644
---- a/include/xen/interface/io/xenbus.h
-+++ b/include/xen/interface/io/xenbus.h
-@@ -27,8 +27,14 @@ enum xenbus_state
- XenbusStateClosing = 5, /* The device is being closed
- due to an error or an unplug
- event. */
-- XenbusStateClosed = 6
-+ XenbusStateClosed = 6,
-
-+ /*
-+ * Reconfiguring: The device is being reconfigured.
-+ */
-+ XenbusStateReconfiguring = 7,
-+
-+ XenbusStateReconfigured = 8
- };
-
- #endif /* _XEN_PUBLIC_IO_XENBUS_H */
-diff --git a/include/xen/interface/memory.h b/include/xen/interface/memory.h
-index af36ead..aa4e368 100644
---- a/include/xen/interface/memory.h
-+++ b/include/xen/interface/memory.h
-@@ -9,6 +9,8 @@
- #ifndef __XEN_PUBLIC_MEMORY_H__
- #define __XEN_PUBLIC_MEMORY_H__
-
-+#include <linux/spinlock.h>
-+
- /*
- * Increase or decrease the specified domain's memory reservation. Returns a
- * -ve errcode on failure, or the # extents successfully allocated or freed.
-@@ -53,6 +55,48 @@ struct xen_memory_reservation {
- DEFINE_GUEST_HANDLE_STRUCT(xen_memory_reservation);
-
- /*
-+ * An atomic exchange of memory pages. If return code is zero then
-+ * @out.extent_list provides GMFNs of the newly-allocated memory.
-+ * Returns zero on complete success, otherwise a negative error code.
-+ * On complete success then always @nr_exchanged == @in.nr_extents.
-+ * On partial success @nr_exchanged indicates how much work was done.
-+ */
-+#define XENMEM_exchange 11
-+struct xen_memory_exchange {
-+ /*
-+ * [IN] Details of memory extents to be exchanged (GMFN bases).
-+ * Note that @in.address_bits is ignored and unused.
-+ */
-+ struct xen_memory_reservation in;
-+
-+ /*
-+ * [IN/OUT] Details of new memory extents.
-+ * We require that:
-+ * 1. @in.domid == @out.domid
-+ * 2. @in.nr_extents << @in.extent_order ==
-+ * @out.nr_extents << @out.extent_order
-+ * 3. @in.extent_start and @out.extent_start lists must not overlap
-+ * 4. @out.extent_start lists GPFN bases to be populated
-+ * 5. @out.extent_start is overwritten with allocated GMFN bases
-+ */
-+ struct xen_memory_reservation out;
-+
-+ /*
-+ * [OUT] Number of input extents that were successfully exchanged:
-+ * 1. The first @nr_exchanged input extents were successfully
-+ * deallocated.
-+ * 2. The corresponding first entries in the output extent list correctly
-+ * indicate the GMFNs that were successfully exchanged.
-+ * 3. All other input and output extents are untouched.
-+ * 4. If not all input exents are exchanged then the return code of this
-+ * command will be non-zero.
-+ * 5. THIS FIELD MUST BE INITIALISED TO ZERO BY THE CALLER!
-+ */
-+ unsigned long nr_exchanged;
-+};
-+
-+DEFINE_GUEST_HANDLE_STRUCT(xen_memory_exchange);
-+/*
- * Returns the maximum machine frame number of mapped RAM in this system.
- * This command always succeeds (it never returns an error code).
- * arg == NULL.
-@@ -97,6 +141,19 @@ struct xen_machphys_mfn_list {
- DEFINE_GUEST_HANDLE_STRUCT(xen_machphys_mfn_list);
-
- /*
-+ * Returns the location in virtual address space of the machine_to_phys
-+ * mapping table. Architectures which do not have a m2p table, or which do not
-+ * map it by default into guest address space, do not implement this command.
-+ * arg == addr of xen_machphys_mapping_t.
-+ */
-+#define XENMEM_machphys_mapping 12
-+struct xen_machphys_mapping {
-+ unsigned long v_start, v_end; /* Start and end virtual addresses. */
-+ unsigned long max_mfn; /* Maximum MFN that can be looked up. */
-+};
-+DEFINE_GUEST_HANDLE_STRUCT(xen_machphys_mapping_t);
-+
-+/*
- * Sets the GPFN at which a particular page appears in the specified guest's
- * pseudophysical address space.
- * arg == addr of xen_add_to_physmap_t.
-@@ -142,4 +199,38 @@ struct xen_translate_gpfn_list {
- };
- DEFINE_GUEST_HANDLE_STRUCT(xen_translate_gpfn_list);
-
-+/*
-+ * Returns the pseudo-physical memory map as it was when the domain
-+ * was started (specified by XENMEM_set_memory_map).
-+ * arg == addr of struct xen_memory_map.
-+ */
-+#define XENMEM_memory_map 9
-+struct xen_memory_map {
-+ /*
-+ * On call the number of entries which can be stored in buffer. On
-+ * return the number of entries which have been stored in
-+ * buffer.
-+ */
-+ unsigned int nr_entries;
-+
-+ /*
-+ * Entries in the buffer are in the same format as returned by the
-+ * BIOS INT 0x15 EAX=0xE820 call.
-+ */
-+ GUEST_HANDLE(void) buffer;
-+};
-+DEFINE_GUEST_HANDLE_STRUCT(xen_memory_map);
-+
-+/*
-+ * Returns the real physical memory map. Passes the same structure as
-+ * XENMEM_memory_map.
-+ * arg == addr of struct xen_memory_map.
-+ */
-+#define XENMEM_machine_memory_map 10
-+
-+/*
-+ * Prevent the balloon driver from changing the memory reservation
-+ * during a driver critical region.
-+ */
-+extern spinlock_t xen_reservation_lock;
- #endif /* __XEN_PUBLIC_MEMORY_H__ */
-diff --git a/include/xen/interface/physdev.h b/include/xen/interface/physdev.h
-index cd69391..0703ef6 100644
---- a/include/xen/interface/physdev.h
-+++ b/include/xen/interface/physdev.h
-@@ -39,6 +39,19 @@ struct physdev_eoi {
- };
-
- /*
-+ * Register a shared page for the hypervisor to indicate whether the guest
-+ * must issue PHYSDEVOP_eoi. The semantics of PHYSDEVOP_eoi change slightly
-+ * once the guest used this function in that the associated event channel
-+ * will automatically get unmasked. The page registered is used as a bit
-+ * array indexed by Xen's PIRQ value.
-+ */
-+#define PHYSDEVOP_pirq_eoi_gmfn 17
-+struct physdev_pirq_eoi_gmfn {
-+ /* IN */
-+ unsigned long gmfn;
-+};
-+
-+/*
- * Query the status of an IRQ line.
- * @arg == pointer to physdev_irq_status_query structure.
- */
-@@ -106,6 +119,64 @@ struct physdev_irq {
- uint32_t vector;
- };
-
-+#define MAP_PIRQ_TYPE_MSI 0x0
-+#define MAP_PIRQ_TYPE_GSI 0x1
-+#define MAP_PIRQ_TYPE_UNKNOWN 0x2
-+
-+#define PHYSDEVOP_map_pirq 13
-+struct physdev_map_pirq {
-+ domid_t domid;
-+ /* IN */
-+ int type;
-+ /* IN */
-+ int index;
-+ /* IN or OUT */
-+ int pirq;
-+ /* IN */
-+ int bus;
-+ /* IN */
-+ int devfn;
-+ /* IN */
-+ int entry_nr;
-+ /* IN */
-+ uint64_t table_base;
-+};
-+
-+#define PHYSDEVOP_unmap_pirq 14
-+struct physdev_unmap_pirq {
-+ domid_t domid;
-+ /* IN */
-+ int pirq;
-+};
-+
-+#define PHYSDEVOP_manage_pci_add 15
-+#define PHYSDEVOP_manage_pci_remove 16
-+struct physdev_manage_pci {
-+ /* IN */
-+ uint8_t bus;
-+ uint8_t devfn;
-+};
-+
-+#define PHYSDEVOP_restore_msi 19
-+struct physdev_restore_msi {
-+ /* IN */
-+ uint8_t bus;
-+ uint8_t devfn;
-+};
-+
-+#define PHYSDEVOP_manage_pci_add_ext 20
-+struct physdev_manage_pci_ext {
-+ /* IN */
-+ uint8_t bus;
-+ uint8_t devfn;
-+ unsigned is_extfn;
-+ unsigned is_virtfn;
-+ struct {
-+ uint8_t bus;
-+ uint8_t devfn;
-+ } physfn;
-+};
-+
- /*
- * Argument to physdev_op_compat() hypercall. Superceded by new physdev_op()
- * hypercall since 0x00030202.
-@@ -121,6 +192,16 @@ struct physdev_op {
- } u;
- };
-
-+#define PHYSDEVOP_setup_gsi 21
-+struct physdev_setup_gsi {
-+ int gsi;
-+ /* IN */
-+ uint8_t triggering;
-+ /* IN */
-+ uint8_t polarity;
-+ /* IN */
-+};
-+
- /*
- * Notify that some PIRQ-bound event channels have been unmasked.
- * ** This command is obsolete since interface version 0x00030202 and is **
-diff --git a/include/xen/interface/platform.h b/include/xen/interface/platform.h
-new file mode 100644
-index 0000000..17ae622
---- /dev/null
-+++ b/include/xen/interface/platform.h
-@@ -0,0 +1,381 @@
-+/******************************************************************************
-+ * platform.h
-+ *
-+ * Hardware platform operations. Intended for use by domain-0 kernel.
-+ *
-+ * Permission is hereby granted, free of charge, to any person obtaining a copy
-+ * of this software and associated documentation files (the "Software"), to
-+ * deal in the Software without restriction, including without limitation the
-+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
-+ * sell copies of the Software, and to permit persons to whom the Software is
-+ * furnished to do so, subject to the following conditions:
-+ *
-+ * The above copyright notice and this permission notice shall be included in
-+ * all copies or substantial portions of the Software.
-+ *
-+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
-+ * DEALINGS IN THE SOFTWARE.
-+ *
-+ * Copyright (c) 2002-2006, K Fraser
-+ */
-+
-+#ifndef __XEN_PUBLIC_PLATFORM_H__
-+#define __XEN_PUBLIC_PLATFORM_H__
-+
-+#include "xen.h"
-+
-+#define XENPF_INTERFACE_VERSION 0x03000001
-+
-+/*
-+ * Set clock such that it would read <secs,nsecs> after 00:00:00 UTC,
-+ * 1 January, 1970 if the current system time was <system_time>.
-+ */
-+#define XENPF_settime 17
-+struct xenpf_settime {
-+ /* IN variables. */
-+ uint32_t secs;
-+ uint32_t nsecs;
-+ uint64_t system_time;
-+};
-+typedef struct xenpf_settime xenpf_settime_t;
-+DEFINE_GUEST_HANDLE_STRUCT(xenpf_settime_t);
-+
-+/*
-+ * Request memory range (@mfn, @mfn+@nr_mfns-1) to have type @type.
-+ * On x86, @type is an architecture-defined MTRR memory type.
-+ * On success, returns the MTRR that was used (@reg) and a handle that can
-+ * be passed to XENPF_DEL_MEMTYPE to accurately tear down the new setting.
-+ * (x86-specific).
-+ */
-+#define XENPF_add_memtype 31
-+struct xenpf_add_memtype {
-+ /* IN variables. */
-+ unsigned long mfn;
-+ uint64_t nr_mfns;
-+ uint32_t type;
-+ /* OUT variables. */
-+ uint32_t handle;
-+ uint32_t reg;
-+};
-+typedef struct xenpf_add_memtype xenpf_add_memtype_t;
-+DEFINE_GUEST_HANDLE_STRUCT(xenpf_add_memtype_t);
-+
-+/*
-+ * Tear down an existing memory-range type. If @handle is remembered then it
-+ * should be passed in to accurately tear down the correct setting (in case
-+ * of overlapping memory regions with differing types). If it is not known
-+ * then @handle should be set to zero. In all cases @reg must be set.
-+ * (x86-specific).
-+ */
-+#define XENPF_del_memtype 32
-+struct xenpf_del_memtype {
-+ /* IN variables. */
-+ uint32_t handle;
-+ uint32_t reg;
-+};
-+typedef struct xenpf_del_memtype xenpf_del_memtype_t;
-+DEFINE_GUEST_HANDLE_STRUCT(xenpf_del_memtype_t);
-+
-+/* Read current type of an MTRR (x86-specific). */
-+#define XENPF_read_memtype 33
-+struct xenpf_read_memtype {
-+ /* IN variables. */
-+ uint32_t reg;
-+ /* OUT variables. */
-+ unsigned long mfn;
-+ uint64_t nr_mfns;
-+ uint32_t type;
-+};
-+typedef struct xenpf_read_memtype xenpf_read_memtype_t;
-+DEFINE_GUEST_HANDLE_STRUCT(xenpf_read_memtype_t);
-+
-+#define XENPF_microcode_update 35
-+struct xenpf_microcode_update {
-+ /* IN variables. */
-+ GUEST_HANDLE(void) data; /* Pointer to microcode data */
-+ uint32_t length; /* Length of microcode data. */
-+};
-+typedef struct xenpf_microcode_update xenpf_microcode_update_t;
-+DEFINE_GUEST_HANDLE_STRUCT(xenpf_microcode_update_t);
-+
-+#define XENPF_platform_quirk 39
-+#define QUIRK_NOIRQBALANCING 1 /* Do not restrict IO-APIC RTE targets */
-+#define QUIRK_IOAPIC_BAD_REGSEL 2 /* IO-APIC REGSEL forgets its value */
-+#define QUIRK_IOAPIC_GOOD_REGSEL 3 /* IO-APIC REGSEL behaves properly */
-+struct xenpf_platform_quirk {
-+ /* IN variables. */
-+ uint32_t quirk_id;
-+};
-+typedef struct xenpf_platform_quirk xenpf_platform_quirk_t;
-+DEFINE_GUEST_HANDLE_STRUCT(xenpf_platform_quirk_t);
-+
-+#define XENPF_firmware_info 50
-+#define XEN_FW_DISK_INFO 1 /* from int 13 AH=08/41/48 */
-+#define XEN_FW_DISK_MBR_SIGNATURE 2 /* from MBR offset 0x1b8 */
-+#define XEN_FW_VBEDDC_INFO 3 /* from int 10 AX=4f15 */
-+struct xenpf_firmware_info {
-+ /* IN variables. */
-+ uint32_t type;
-+ uint32_t index;
-+ /* OUT variables. */
-+ union {
-+ struct {
-+ /* Int13, Fn48: Check Extensions Present. */
-+ uint8_t device; /* %dl: bios device number */
-+ uint8_t version; /* %ah: major version */
-+ uint16_t interface_support; /* %cx: support bitmap */
-+ /* Int13, Fn08: Legacy Get Device Parameters. */
-+ uint16_t legacy_max_cylinder; /* %cl[7:6]:%ch: max cyl # */
-+ uint8_t legacy_max_head; /* %dh: max head # */
-+ uint8_t legacy_sectors_per_track; /* %cl[5:0]: max sector # */
-+ /* Int13, Fn41: Get Device Parameters (as filled into %ds:%esi). */
-+ /* NB. First uint16_t of buffer must be set to buffer size. */
-+ GUEST_HANDLE(void) edd_params;
-+ } disk_info; /* XEN_FW_DISK_INFO */
-+ struct {
-+ uint8_t device; /* bios device number */
-+ uint32_t mbr_signature; /* offset 0x1b8 in mbr */
-+ } disk_mbr_signature; /* XEN_FW_DISK_MBR_SIGNATURE */
-+ struct {
-+ /* Int10, AX=4F15: Get EDID info. */
-+ uint8_t capabilities;
-+ uint8_t edid_transfer_time;
-+ /* must refer to 128-byte buffer */
-+ GUEST_HANDLE(uchar) edid;
-+ } vbeddc_info; /* XEN_FW_VBEDDC_INFO */
-+ } u;
-+};
-+typedef struct xenpf_firmware_info xenpf_firmware_info_t;
-+DEFINE_GUEST_HANDLE_STRUCT(xenpf_firmware_info_t);
-+
-+#define XENPF_enter_acpi_sleep 51
-+struct xenpf_enter_acpi_sleep {
-+ /* IN variables */
-+ uint16_t pm1a_cnt_val; /* PM1a control value. */
-+ uint16_t pm1b_cnt_val; /* PM1b control value. */
-+ uint32_t sleep_state; /* Which state to enter (Sn). */
-+ uint32_t flags; /* Must be zero. */
-+};
-+typedef struct xenpf_enter_acpi_sleep xenpf_enter_acpi_sleep_t;
-+DEFINE_GUEST_HANDLE_STRUCT(xenpf_enter_acpi_sleep_t);
-+
-+#define XENPF_change_freq 52
-+struct xenpf_change_freq {
-+ /* IN variables */
-+ uint32_t flags; /* Must be zero. */
-+ uint32_t cpu; /* Physical cpu. */
-+ uint64_t freq; /* New frequency (Hz). */
-+};
-+typedef struct xenpf_change_freq xenpf_change_freq_t;
-+DEFINE_GUEST_HANDLE_STRUCT(xenpf_change_freq_t);
-+
-+/*
-+ * Get idle times (nanoseconds since boot) for physical CPUs specified in the
-+ * @cpumap_bitmap with range [0..@cpumap_nr_cpus-1]. The @idletime array is
-+ * indexed by CPU number; only entries with the corresponding @cpumap_bitmap
-+ * bit set are written to. On return, @cpumap_bitmap is modified so that any
-+ * non-existent CPUs are cleared. Such CPUs have their @idletime array entry
-+ * cleared.
-+ */
-+#define XENPF_getidletime 53
-+struct xenpf_getidletime {
-+ /* IN/OUT variables */
-+ /* IN: CPUs to interrogate; OUT: subset of IN which are present */
-+ GUEST_HANDLE(uchar) cpumap_bitmap;
-+ /* IN variables */
-+ /* Size of cpumap bitmap. */
-+ uint32_t cpumap_nr_cpus;
-+ /* Must be indexable for every cpu in cpumap_bitmap. */
-+ GUEST_HANDLE(uint64_t) idletime;
-+ /* OUT variables */
-+ /* System time when the idletime snapshots were taken. */
-+ uint64_t now;
-+};
-+typedef struct xenpf_getidletime xenpf_getidletime_t;
-+DEFINE_GUEST_HANDLE_STRUCT(xenpf_getidletime_t);
-+
-+#define XENPF_set_processor_pminfo 54
-+
-+/* ability bits */
-+#define XEN_PROCESSOR_PM_CX 1
-+#define XEN_PROCESSOR_PM_PX 2
-+#define XEN_PROCESSOR_PM_TX 4
-+
-+/* cmd type */
-+#define XEN_PM_CX 0
-+#define XEN_PM_PX 1
-+#define XEN_PM_TX 2
-+
-+/* Px sub info type */
-+#define XEN_PX_PCT 1
-+#define XEN_PX_PSS 2
-+#define XEN_PX_PPC 4
-+#define XEN_PX_PSD 8
-+
-+struct xen_power_register {
-+ uint32_t space_id;
-+ uint32_t bit_width;
-+ uint32_t bit_offset;
-+ uint32_t access_size;
-+ uint64_t address;
-+};
-+
-+struct xen_processor_csd {
-+ uint32_t domain; /* domain number of one dependent group */
-+ uint32_t coord_type; /* coordination type */
-+ uint32_t num; /* number of processors in same domain */
-+};
-+typedef struct xen_processor_csd xen_processor_csd_t;
-+DEFINE_GUEST_HANDLE_STRUCT(xen_processor_csd);
-+
-+struct xen_processor_cx {
-+ struct xen_power_register reg; /* GAS for Cx trigger register */
-+ uint8_t type; /* cstate value, c0: 0, c1: 1, ... */
-+ uint32_t latency; /* worst latency (ms) to enter/exit this cstate */
-+ uint32_t power; /* average power consumption(mW) */
-+ uint32_t dpcnt; /* number of dependency entries */
-+ GUEST_HANDLE(xen_processor_csd) dp; /* NULL if no dependency */
-+};
-+typedef struct xen_processor_cx xen_processor_cx_t;
-+DEFINE_GUEST_HANDLE_STRUCT(xen_processor_cx);
-+
-+struct xen_processor_flags {
-+ uint32_t bm_control:1;
-+ uint32_t bm_check:1;
-+ uint32_t has_cst:1;
-+ uint32_t power_setup_done:1;
-+ uint32_t bm_rld_set:1;
-+};
-+
-+struct xen_processor_power {
-+ uint32_t count; /* number of C state entries in array below */
-+ struct xen_processor_flags flags; /* global flags of this processor */
-+ GUEST_HANDLE(xen_processor_cx) states; /* supported c states */
-+};
-+
-+struct xen_pct_register {
-+ uint8_t descriptor;
-+ uint16_t length;
-+ uint8_t space_id;
-+ uint8_t bit_width;
-+ uint8_t bit_offset;
-+ uint8_t reserved;
-+ uint64_t address;
-+};
-+
-+struct xen_processor_px {
-+ uint64_t core_frequency; /* megahertz */
-+ uint64_t power; /* milliWatts */
-+ uint64_t transition_latency; /* microseconds */
-+ uint64_t bus_master_latency; /* microseconds */
-+ uint64_t control; /* control value */
-+ uint64_t status; /* success indicator */
-+};
-+typedef struct xen_processor_px xen_processor_px_t;
-+DEFINE_GUEST_HANDLE_STRUCT(xen_processor_px);
-+
-+struct xen_psd_package {
-+ uint64_t num_entries;
-+ uint64_t revision;
-+ uint64_t domain;
-+ uint64_t coord_type;
-+ uint64_t num_processors;
-+};
-+
-+struct xen_processor_performance {
-+ uint32_t flags; /* flag for Px sub info type */
-+ uint32_t platform_limit; /* Platform limitation on freq usage */
-+ struct xen_pct_register control_register;
-+ struct xen_pct_register status_register;
-+ uint32_t state_count; /* total available performance states */
-+ GUEST_HANDLE(xen_processor_px) states;
-+ struct xen_psd_package domain_info;
-+ uint32_t shared_type; /* coordination type of this processor */
-+};
-+typedef struct xen_processor_performance xen_processor_performance_t;
-+DEFINE_GUEST_HANDLE_STRUCT(xen_processor_performance);
-+
-+struct xenpf_set_processor_pminfo {
-+ /* IN variables */
-+ uint32_t id; /* ACPI CPU ID */
-+ uint32_t type; /* {XEN_PM_CX, XEN_PM_PX} */
-+ union {
-+ struct xen_processor_power power;/* Cx: _CST/_CSD */
-+ struct xen_processor_performance perf; /* Px: _PPC/_PCT/_PSS/_PSD */
-+ };
-+};
-+typedef struct xenpf_set_processor_pminfo xenpf_set_processor_pminfo_t;
-+DEFINE_GUEST_HANDLE_STRUCT(xenpf_set_processor_pminfo);
-+
-+#define XENPF_get_cpuinfo 55
-+struct xenpf_pcpuinfo {
-+ /* IN */
-+ uint32_t xen_cpuid;
-+ /* OUT */
-+ /* The maxium cpu_id that is present */
-+ uint32_t max_present;
-+#define XEN_PCPU_FLAGS_ONLINE 1
-+ /* Correponding xen_cpuid is not present*/
-+#define XEN_PCPU_FLAGS_INVALID 2
-+ uint32_t flags;
-+ uint32_t apic_id;
-+ uint32_t acpi_id;
-+};
-+typedef struct xenpf_pcpuinfo xenpf_pcpuinfo_t;
-+DEFINE_GUEST_HANDLE_STRUCT(xenpf_pcpuinfo_t);
-+
-+#define XENPF_cpu_online 56
-+#define XENPF_cpu_offline 57
-+struct xenpf_cpu_ol {
-+ uint32_t cpuid;
-+};
-+typedef struct xenpf_cpu_ol xenpf_cpu_ol_t;
-+DEFINE_GUEST_HANDLE_STRUCT(xenpf_cpu_ol_t);
-+
-+#define XENPF_cpu_hotadd 58
-+struct xenpf_cpu_hotadd {
-+ uint32_t apic_id;
-+ uint32_t acpi_id;
-+ uint32_t pxm;
-+};
-+
-+
-+#define XENPF_mem_hotadd 59
-+struct xenpf_mem_hotadd {
-+ uint64_t spfn;
-+ uint64_t epfn;
-+ uint32_t pxm;
-+ uint32_t flags;
-+};
-+
-+struct xen_platform_op {
-+ uint32_t cmd;
-+ uint32_t interface_version; /* XENPF_INTERFACE_VERSION */
-+ union {
-+ struct xenpf_settime settime;
-+ struct xenpf_add_memtype add_memtype;
-+ struct xenpf_del_memtype del_memtype;
-+ struct xenpf_read_memtype read_memtype;
-+ struct xenpf_microcode_update microcode;
-+ struct xenpf_platform_quirk platform_quirk;
-+ struct xenpf_firmware_info firmware_info;
-+ struct xenpf_enter_acpi_sleep enter_acpi_sleep;
-+ struct xenpf_change_freq change_freq;
-+ struct xenpf_getidletime getidletime;
-+ struct xenpf_set_processor_pminfo set_pminfo;
-+ struct xenpf_pcpuinfo pcpu_info;
-+ struct xenpf_cpu_ol cpu_ol;
-+ struct xenpf_cpu_hotadd cpu_add;
-+ struct xenpf_mem_hotadd mem_add;
-+ uint8_t pad[128];
-+ } u;
-+};
-+typedef struct xen_platform_op xen_platform_op_t;
-+DEFINE_GUEST_HANDLE_STRUCT(xen_platform_op_t);
-+
-+#endif /* __XEN_PUBLIC_PLATFORM_H__ */
-diff --git a/include/xen/interface/sched.h b/include/xen/interface/sched.h
-index 5fec575..dd55dac 100644
---- a/include/xen/interface/sched.h
-+++ b/include/xen/interface/sched.h
-@@ -65,6 +65,39 @@ struct sched_poll {
- DEFINE_GUEST_HANDLE_STRUCT(sched_poll);
-
- /*
-+ * Declare a shutdown for another domain. The main use of this function is
-+ * in interpreting shutdown requests and reasons for fully-virtualized
-+ * domains. A para-virtualized domain may use SCHEDOP_shutdown directly.
-+ * @arg == pointer to sched_remote_shutdown structure.
-+ */
-+#define SCHEDOP_remote_shutdown 4
-+struct sched_remote_shutdown {
-+ domid_t domain_id; /* Remote domain ID */
-+ unsigned int reason; /* SHUTDOWN_xxx reason */
-+};
-+
-+/*
-+ * Latch a shutdown code, so that when the domain later shuts down it
-+ * reports this code to the control tools.
-+ * @arg == as for SCHEDOP_shutdown.
-+ */
-+#define SCHEDOP_shutdown_code 5
-+
-+/*
-+ * Setup, poke and destroy a domain watchdog timer.
-+ * @arg == pointer to sched_watchdog structure.
-+ * With id == 0, setup a domain watchdog timer to cause domain shutdown
-+ * after timeout, returns watchdog id.
-+ * With id != 0 and timeout == 0, destroy domain watchdog timer.
-+ * With id != 0 and timeout != 0, poke watchdog timer and set new timeout.
-+ */
-+#define SCHEDOP_watchdog 6
-+struct sched_watchdog {
-+ uint32_t id; /* watchdog ID */
-+ uint32_t timeout; /* timeout */
-+};
-+
-+/*
- * Reason codes for SCHEDOP_shutdown. These may be interpreted by control
- * software to determine the appropriate action. For the most part, Xen does
- * not care about the shutdown code.
-@@ -73,5 +106,6 @@ DEFINE_GUEST_HANDLE_STRUCT(sched_poll);
- #define SHUTDOWN_reboot 1 /* Clean up, kill, and then restart. */
- #define SHUTDOWN_suspend 2 /* Clean up, save suspend info, kill. */
- #define SHUTDOWN_crash 3 /* Tell controller we've crashed. */
-+#define SHUTDOWN_watchdog 4 /* Restart because watchdog time expired. */
-
- #endif /* __XEN_PUBLIC_SCHED_H__ */
-diff --git a/include/xen/interface/xen-mca.h b/include/xen/interface/xen-mca.h
-new file mode 100644
-index 0000000..f31fdab
---- /dev/null
-+++ b/include/xen/interface/xen-mca.h
-@@ -0,0 +1,429 @@
-+/******************************************************************************
-+ * arch-x86/mca.h
-+ *
-+ * Contributed by Advanced Micro Devices, Inc.
-+ * Author: Christoph Egger <Christoph.Egger@amd.com>
-+ *
-+ * Guest OS machine check interface to x86 Xen.
-+ *
-+ * Permission is hereby granted, free of charge, to any person obtaining a copy
-+ * of this software and associated documentation files (the "Software"), to
-+ * deal in the Software without restriction, including without limitation the
-+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
-+ * sell copies of the Software, and to permit persons to whom the Software is
-+ * furnished to do so, subject to the following conditions:
-+ *
-+ * The above copyright notice and this permission notice shall be included in
-+ * all copies or substantial portions of the Software.
-+ *
-+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
-+ * DEALINGS IN THE SOFTWARE.
-+ */
-+
-+/* Full MCA functionality has the following Usecases from the guest side:
-+ *
-+ * Must have's:
-+ * 1. Dom0 and DomU register machine check trap callback handlers
-+ * (already done via "set_trap_table" hypercall)
-+ * 2. Dom0 registers machine check event callback handler
-+ * (doable via EVTCHNOP_bind_virq)
-+ * 3. Dom0 and DomU fetches machine check data
-+ * 4. Dom0 wants Xen to notify a DomU
-+ * 5. Dom0 gets DomU ID from physical address
-+ * 6. Dom0 wants Xen to kill DomU (already done for "xm destroy")
-+ *
-+ * Nice to have's:
-+ * 7. Dom0 wants Xen to deactivate a physical CPU
-+ * This is better done as separate task, physical CPU hotplugging,
-+ * and hypercall(s) should be sysctl's
-+ * 8. Page migration proposed from Xen NUMA work, where Dom0 can tell Xen to
-+ * move a DomU (or Dom0 itself) away from a malicious page
-+ * producing correctable errors.
-+ * 9. offlining physical page:
-+ * Xen free's and never re-uses a certain physical page.
-+ * 10. Testfacility: Allow Dom0 to write values into machine check MSR's
-+ * and tell Xen to trigger a machine check
-+ */
-+
-+#ifndef __XEN_PUBLIC_ARCH_X86_MCA_H__
-+#define __XEN_PUBLIC_ARCH_X86_MCA_H__
-+
-+/* Hypercall */
-+#define __HYPERVISOR_mca __HYPERVISOR_arch_0
-+
-+/*
-+ * The xen-unstable repo has interface version 0x03000001; out interface
-+ * is incompatible with that and any future minor revisions, so we
-+ * choose a different version number range that is numerically less
-+ * than that used in xen-unstable.
-+ */
-+#define XEN_MCA_INTERFACE_VERSION 0x01ecc003
-+
-+/* IN: Dom0 calls hypercall to retrieve nonurgent error log entry */
-+#define XEN_MC_NONURGENT 0x0001
-+/* IN: Dom0/DomU calls hypercall to retrieve urgent error log entry */
-+#define XEN_MC_URGENT 0x0002
-+/* IN: Dom0 acknowledges previosly-fetched error log entry */
-+#define XEN_MC_ACK 0x0004
-+
-+/* OUT: All is ok */
-+#define XEN_MC_OK 0x0
-+/* OUT: Domain could not fetch data. */
-+#define XEN_MC_FETCHFAILED 0x1
-+/* OUT: There was no machine check data to fetch. */
-+#define XEN_MC_NODATA 0x2
-+/* OUT: Between notification time and this hypercall an other
-+ * (most likely) correctable error happened. The fetched data,
-+ * does not match the original machine check data. */
-+#define XEN_MC_NOMATCH 0x4
-+
-+/* OUT: DomU did not register MC NMI handler. Try something else. */
-+#define XEN_MC_CANNOTHANDLE 0x8
-+/* OUT: Notifying DomU failed. Retry later or try something else. */
-+#define XEN_MC_NOTDELIVERED 0x10
-+/* Note, XEN_MC_CANNOTHANDLE and XEN_MC_NOTDELIVERED are mutually exclusive. */
-+
-+
-+#ifndef __ASSEMBLY__
-+
-+#define VIRQ_MCA VIRQ_ARCH_0 /* G. (DOM0) Machine Check Architecture */
-+
-+/*
-+ * Machine Check Architecure:
-+ * structs are read-only and used to report all kinds of
-+ * correctable and uncorrectable errors detected by the HW.
-+ * Dom0 and DomU: register a handler to get notified.
-+ * Dom0 only: Correctable errors are reported via VIRQ_MCA
-+ */
-+#define MC_TYPE_GLOBAL 0
-+#define MC_TYPE_BANK 1
-+#define MC_TYPE_EXTENDED 2
-+#define MC_TYPE_RECOVERY 3
-+
-+struct mcinfo_common {
-+ uint16_t type; /* structure type */
-+ uint16_t size; /* size of this struct in bytes */
-+};
-+
-+
-+#define MC_FLAG_CORRECTABLE (1 << 0)
-+#define MC_FLAG_UNCORRECTABLE (1 << 1)
-+#define MC_FLAG_RECOVERABLE (1 << 2)
-+#define MC_FLAG_POLLED (1 << 3)
-+#define MC_FLAG_RESET (1 << 4)
-+#define MC_FLAG_CMCI (1 << 5)
-+#define MC_FLAG_MCE (1 << 6)
-+/* contains global x86 mc information */
-+struct mcinfo_global {
-+ struct mcinfo_common common;
-+
-+ /* running domain at the time in error (most likely
-+ * the impacted one) */
-+ uint16_t mc_domid;
-+ uint16_t mc_vcpuid; /* virtual cpu scheduled for mc_domid */
-+ uint32_t mc_socketid; /* physical socket of the physical core */
-+ uint16_t mc_coreid; /* physical impacted core */
-+ uint16_t mc_core_threadid; /* core thread of physical core */
-+ uint32_t mc_apicid;
-+ uint32_t mc_flags;
-+ uint64_t mc_gstatus; /* global status */
-+};
-+
-+/* contains bank local x86 mc information */
-+struct mcinfo_bank {
-+ struct mcinfo_common common;
-+
-+ uint16_t mc_bank; /* bank nr */
-+ uint16_t mc_domid; /* Usecase 5: domain referenced by mc_addr on
-+ * privileged pv-ops dom and if mc_addr is valid.
-+ * Never valid on DomU. */
-+ uint64_t mc_status; /* bank status */
-+ uint64_t mc_addr; /* bank address, only valid
-+ * if addr bit is set in mc_status */
-+ uint64_t mc_misc;
-+ uint64_t mc_ctrl2;
-+ uint64_t mc_tsc;
-+};
-+
-+
-+struct mcinfo_msr {
-+ uint64_t reg; /* MSR */
-+ uint64_t value; /* MSR value */
-+};
-+
-+/* contains mc information from other
-+ * or additional mc MSRs */
-+struct mcinfo_extended {
-+ struct mcinfo_common common;
-+
-+ /* You can fill up to five registers.
-+ * If you need more, then use this structure
-+ * multiple times. */
-+
-+ uint32_t mc_msrs; /* Number of msr with valid values. */
-+ /*
-+ * Currently Intel extended MSR (32/64) include all gp registers
-+ * and E(R)FLAGS, E(R)IP, E(R)MISC, up to 11/19 of them might be
-+ * useful at present. So expand this array to 16/32 to leave room.
-+ */
-+ struct mcinfo_msr mc_msr[sizeof(void *) * 4];
-+};
-+
-+/* Recovery Action flags. Giving recovery result information to DOM0 */
-+
-+/* Xen takes successful recovery action, the error is recovered */
-+#define REC_ACTION_RECOVERED (0x1 << 0)
-+/* No action is performed by XEN */
-+#define REC_ACTION_NONE (0x1 << 1)
-+/* It's possible DOM0 might take action ownership in some case */
-+#define REC_ACTION_NEED_RESET (0x1 << 2)
-+
-+/* Different Recovery Action types, if the action is performed successfully,
-+ * REC_ACTION_RECOVERED flag will be returned.
-+ */
-+
-+/* Page Offline Action */
-+#define MC_ACTION_PAGE_OFFLINE (0x1 << 0)
-+/* CPU offline Action */
-+#define MC_ACTION_CPU_OFFLINE (0x1 << 1)
-+/* L3 cache disable Action */
-+#define MC_ACTION_CACHE_SHRINK (0x1 << 2)
-+
-+/* Below interface used between XEN/DOM0 for passing XEN's recovery action
-+ * information to DOM0.
-+ * usage Senario: After offlining broken page, XEN might pass its page offline
-+ * recovery action result to DOM0. DOM0 will save the information in
-+ * non-volatile memory for further proactive actions, such as offlining the
-+ * easy broken page earlier when doing next reboot.
-+*/
-+struct page_offline_action {
-+ /* Params for passing the offlined page number to DOM0 */
-+ uint64_t mfn;
-+ uint64_t status;
-+};
-+
-+struct cpu_offline_action {
-+ /* Params for passing the identity of the offlined CPU to DOM0 */
-+ uint32_t mc_socketid;
-+ uint16_t mc_coreid;
-+ uint16_t mc_core_threadid;
-+};
-+
-+#define MAX_UNION_SIZE 16
-+struct mcinfo_recovery {
-+ struct mcinfo_common common;
-+ uint16_t mc_bank; /* bank nr */
-+ /* Recovery Action Flags defined above such as REC_ACTION_DONE */
-+ uint8_t action_flags;
-+ /* Recovery Action types defined above such as MC_ACTION_PAGE_OFFLINE */
-+ uint8_t action_types;
-+ /* In future if more than one recovery action permitted per error bank,
-+ * a mcinfo_recovery data array will be returned
-+ */
-+ union {
-+ struct page_offline_action page_retire;
-+ struct cpu_offline_action cpu_offline;
-+ uint8_t pad[MAX_UNION_SIZE];
-+ } action_info;
-+};
-+
-+
-+#define MCINFO_HYPERCALLSIZE 1024
-+#define MCINFO_MAXSIZE 768
-+
-+struct mc_info {
-+ /* Number of mcinfo_* entries in mi_data */
-+ uint32_t mi_nentries;
-+ uint32_t _pad0;
-+ uint64_t mi_data[(MCINFO_MAXSIZE - 1) / 8];
-+};
-+typedef struct mc_info mc_info_t;
-+DEFINE_GUEST_HANDLE_STRUCT(mc_info);
-+
-+#define __MC_MSR_ARRAYSIZE 8
-+#define __MC_NMSRS 1
-+#define MC_NCAPS 7 /* 7 CPU feature flag words */
-+#define MC_CAPS_STD_EDX 0 /* cpuid level 0x00000001 (%edx) */
-+#define MC_CAPS_AMD_EDX 1 /* cpuid level 0x80000001 (%edx) */
-+#define MC_CAPS_TM 2 /* cpuid level 0x80860001 (TransMeta) */
-+#define MC_CAPS_LINUX 3 /* Linux-defined */
-+#define MC_CAPS_STD_ECX 4 /* cpuid level 0x00000001 (%ecx) */
-+#define MC_CAPS_VIA 5 /* cpuid level 0xc0000001 */
-+#define MC_CAPS_AMD_ECX 6 /* cpuid level 0x80000001 (%ecx) */
-+
-+struct mcinfo_logical_cpu {
-+ uint32_t mc_cpunr;
-+ uint32_t mc_chipid;
-+ uint16_t mc_coreid;
-+ uint16_t mc_threadid;
-+ uint32_t mc_apicid;
-+ uint32_t mc_clusterid;
-+ uint32_t mc_ncores;
-+ uint32_t mc_ncores_active;
-+ uint32_t mc_nthreads;
-+ int32_t mc_cpuid_level;
-+ uint32_t mc_family;
-+ uint32_t mc_vendor;
-+ uint32_t mc_model;
-+ uint32_t mc_step;
-+ char mc_vendorid[16];
-+ char mc_brandid[64];
-+ uint32_t mc_cpu_caps[MC_NCAPS];
-+ uint32_t mc_cache_size;
-+ uint32_t mc_cache_alignment;
-+ int32_t mc_nmsrvals;
-+ struct mcinfo_msr mc_msrvalues[__MC_MSR_ARRAYSIZE];
-+};
-+typedef struct mcinfo_logical_cpu mcinfo_logical_cpu_t;
-+DEFINE_GUEST_HANDLE_STRUCT(mcinfo_logical_cpu);
-+
-+
-+/*
-+ * OS's should use these instead of writing their own lookup function
-+ * each with its own bugs and drawbacks.
-+ * We use macros instead of static inline functions to allow guests
-+ * to include this header in assembly files (*.S).
-+ */
-+/* Prototype:
-+ * uint32_t x86_mcinfo_nentries(struct mc_info *mi);
-+ */
-+#define x86_mcinfo_nentries(_mi) \
-+ ((_mi)->mi_nentries)
-+/* Prototype:
-+ * struct mcinfo_common *x86_mcinfo_first(struct mc_info *mi);
-+ */
-+#define x86_mcinfo_first(_mi) \
-+ ((struct mcinfo_common *)(_mi)->mi_data)
-+/* Prototype:
-+ * struct mcinfo_common *x86_mcinfo_next(struct mcinfo_common *mic);
-+ */
-+#define x86_mcinfo_next(_mic) \
-+ ((struct mcinfo_common *)((uint8_t *)(_mic) + (_mic)->size))
-+
-+/* Prototype:
-+ * void x86_mcinfo_lookup(void *ret, struct mc_info *mi, uint16_t type);
-+ */
-+
-+static inline void x86_mcinfo_lookup
-+ (struct mcinfo_common **ret, struct mc_info *mi, uint16_t type)
-+{
-+ uint32_t found = 0, i;
-+ struct mcinfo_common *mic;
-+
-+ *ret = NULL;
-+ if (!mi)
-+ return;
-+ mic = x86_mcinfo_first(mi);
-+
-+ for (i = 0; i < x86_mcinfo_nentries(mi); i++) {
-+ if (mic->type == type) {
-+ found = 1;
-+ break;
-+ }
-+ mic = x86_mcinfo_next(mic);
-+ }
-+
-+ *ret = found ? mic : NULL;
-+}
-+/* Usecase 1
-+ * Register machine check trap callback handler
-+ * (already done via "set_trap_table" hypercall)
-+ */
-+
-+/* Usecase 2
-+ * Dom0 registers machine check event callback handler
-+ * done by EVTCHNOP_bind_virq
-+ */
-+
-+/* Usecase 3
-+ * Fetch machine check data from hypervisor.
-+ * Note, this hypercall is special, because both Dom0 and DomU must use this.
-+ */
-+#define XEN_MC_fetch 1
-+struct xen_mc_fetch {
-+ /* IN/OUT variables.
-+ * IN: XEN_MC_NONURGENT, XEN_MC_URGENT,
-+ * XEN_MC_ACK if ack'king an earlier fetch
-+ * OUT: XEN_MC_OK, XEN_MC_FETCHAILED,
-+ * XEN_MC_NODATA, XEN_MC_NOMATCH
-+ */
-+ uint32_t flags;
-+ uint32_t _pad0;
-+ /* OUT: id for ack, IN: id we are ack'ing */
-+ uint64_t fetch_id;
-+
-+ /* OUT variables. */
-+ GUEST_HANDLE(mc_info) data;
-+};
-+typedef struct xen_mc_fetch xen_mc_fetch_t;
-+DEFINE_GUEST_HANDLE_STRUCT(xen_mc_fetch);
-+
-+
-+/* Usecase 4
-+ * This tells the hypervisor to notify a DomU about the machine check error
-+ */
-+#define XEN_MC_notifydomain 2
-+struct xen_mc_notifydomain {
-+ /* IN variables. */
-+ uint16_t mc_domid;/* The unprivileged domain to notify. */
-+ uint16_t mc_vcpuid;/* The vcpu in mc_domid to notify.
-+ * Usually echo'd value from the fetch hypercall. */
-+
-+ /* IN/OUT variables. */
-+ uint32_t flags;
-+
-+/* OUT: XEN_MC_OK, XEN_MC_CANNOTHANDLE, XEN_MC_NOTDELIVERED, XEN_MC_NOMATCH */
-+};
-+typedef struct xen_mc_notifydomain xen_mc_notifydomain_t;
-+DEFINE_GUEST_HANDLE_STRUCT(xen_mc_notifydomain);
-+
-+#define XEN_MC_physcpuinfo 3
-+struct xen_mc_physcpuinfo {
-+ /* IN/OUT */
-+ uint32_t ncpus;
-+ uint32_t _pad0;
-+ /* OUT */
-+ GUEST_HANDLE(mcinfo_logical_cpu) info;
-+};
-+
-+#define XEN_MC_msrinject 4
-+#define MC_MSRINJ_MAXMSRS 8
-+struct xen_mc_msrinject {
-+ /* IN */
-+ uint32_t mcinj_cpunr;/* target processor id */
-+ uint32_t mcinj_flags;/* see MC_MSRINJ_F_* below */
-+ uint32_t mcinj_count;/* 0 .. count-1 in array are valid */
-+ uint32_t _pad0;
-+ struct mcinfo_msr mcinj_msr[MC_MSRINJ_MAXMSRS];
-+};
-+
-+/* Flags for mcinj_flags above; bits 16-31 are reserved */
-+#define MC_MSRINJ_F_INTERPOSE 0x1
-+
-+#define XEN_MC_mceinject 5
-+struct xen_mc_mceinject {
-+ unsigned int mceinj_cpunr; /* target processor id */
-+};
-+
-+struct xen_mc {
-+ uint32_t cmd;
-+ uint32_t interface_version; /* XEN_MCA_INTERFACE_VERSION */
-+ union {
-+ struct xen_mc_fetch mc_fetch;
-+ struct xen_mc_notifydomain mc_notifydomain;
-+ struct xen_mc_physcpuinfo mc_physcpuinfo;
-+ struct xen_mc_msrinject mc_msrinject;
-+ struct xen_mc_mceinject mc_mceinject;
-+ } u;
-+};
-+typedef struct xen_mc xen_mc_t;
-+DEFINE_GUEST_HANDLE_STRUCT(xen_mc);
-+
-+#endif /* __ASSEMBLY__ */
-+
-+#endif /* __XEN_PUBLIC_ARCH_X86_MCA_H__ */
-diff --git a/include/xen/interface/xen.h b/include/xen/interface/xen.h
-index 2befa3e..9ffaee0 100644
---- a/include/xen/interface/xen.h
-+++ b/include/xen/interface/xen.h
-@@ -79,6 +79,7 @@
- #define VIRQ_CONSOLE 2 /* (DOM0) Bytes received on emergency console. */
- #define VIRQ_DOM_EXC 3 /* (DOM0) Exceptional event for some domain. */
- #define VIRQ_DEBUGGER 6 /* (DOM0) A domain has paused for debugging. */
-+#define VIRQ_PCPU_STATE 9 /* (DOM0) PCPU state changed */
-
- /* Architecture-specific VIRQ definitions. */
- #define VIRQ_ARCH_0 16
-@@ -184,6 +185,8 @@
- #define MMUEXT_NEW_USER_BASEPTR 15
-
- #ifndef __ASSEMBLY__
-+#include <linux/types.h>
-+
- struct mmuext_op {
- unsigned int cmd;
- union {
-@@ -449,9 +452,49 @@ struct start_info {
- int8_t cmd_line[MAX_GUEST_CMDLINE];
- };
-
-+struct dom0_vga_console_info {
-+ uint8_t video_type; /* DOM0_VGA_CONSOLE_??? */
-+#define XEN_VGATYPE_TEXT_MODE_3 0x03
-+#define XEN_VGATYPE_VESA_LFB 0x23
-+
-+ union {
-+ struct {
-+ /* Font height, in pixels. */
-+ uint16_t font_height;
-+ /* Cursor location (column, row). */
-+ uint16_t cursor_x, cursor_y;
-+ /* Number of rows and columns (dimensions in characters). */
-+ uint16_t rows, columns;
-+ } text_mode_3;
-+
-+ struct {
-+ /* Width and height, in pixels. */
-+ uint16_t width, height;
-+ /* Bytes per scan line. */
-+ uint16_t bytes_per_line;
-+ /* Bits per pixel. */
-+ uint16_t bits_per_pixel;
-+ /* LFB physical address, and size (in units of 64kB). */
-+ uint32_t lfb_base;
-+ uint32_t lfb_size;
-+ /* RGB mask offsets and sizes, as defined by VBE 1.2+ */
-+ uint8_t red_pos, red_size;
-+ uint8_t green_pos, green_size;
-+ uint8_t blue_pos, blue_size;
-+ uint8_t rsvd_pos, rsvd_size;
-+
-+ /* VESA capabilities (offset 0xa, VESA command 0x4f00). */
-+ uint32_t gbl_caps;
-+ /* Mode attributes (offset 0x0, VESA command 0x4f01). */
-+ uint16_t mode_attrs;
-+ } vesa_lfb;
-+ } u;
-+};
-+
- /* These flags are passed in the 'flags' field of start_info_t. */
- #define SIF_PRIVILEGED (1<<0) /* Is the domain privileged? */
- #define SIF_INITDOMAIN (1<<1) /* Is this the initial control domain? */
-+#define SIF_PM_MASK (0xFF<<8) /* reserve 1 byte for xen-pm options */
-
- typedef uint64_t cpumap_t;
-
-@@ -461,6 +504,8 @@ typedef uint8_t xen_domain_handle_t[16];
- #define __mk_unsigned_long(x) x ## UL
- #define mk_unsigned_long(x) __mk_unsigned_long(x)
-
-+DEFINE_GUEST_HANDLE(uint64_t);
-+
- #else /* __ASSEMBLY__ */
-
- /* In assembly code we cannot use C numeric constant suffixes. */
-diff --git a/include/xen/page.h b/include/xen/page.h
-index eaf85fa..0be36b9 100644
---- a/include/xen/page.h
-+++ b/include/xen/page.h
-@@ -1 +1,8 @@
-+#ifndef _XEN_PAGE_H
-+#define _XEN_PAGE_H
-+
- #include <asm/xen/page.h>
-+
-+extern phys_addr_t xen_extra_mem_start, xen_extra_mem_size;
-+
-+#endif /* _XEN_PAGE_H */
-diff --git a/include/xen/pcpu.h b/include/xen/pcpu.h
-new file mode 100644
-index 0000000..7e8f9d1
---- /dev/null
-+++ b/include/xen/pcpu.h
-@@ -0,0 +1,32 @@
-+#ifndef _XEN_PCPU_H
-+#define _XEN_PCPU_H
-+
-+#include <xen/interface/platform.h>
-+#include <linux/sysdev.h>
-+
-+extern int xen_pcpu_hotplug(int type, uint32_t apic_id);
-+#define XEN_PCPU_ONLINE 0x01
-+#define XEN_PCPU_OFFLINE 0x02
-+#define XEN_PCPU_ADD 0x04
-+#define XEN_PCPU_REMOVE 0x08
-+
-+struct pcpu {
-+ struct list_head pcpu_list;
-+ struct sys_device sysdev;
-+ uint32_t xen_id;
-+ uint32_t apic_id;
-+ uint32_t acpi_id;
-+ uint32_t flags;
-+};
-+
-+static inline int xen_pcpu_online(uint32_t flags)
-+{
-+ return !!(flags & XEN_PCPU_FLAGS_ONLINE);
-+}
-+
-+extern int register_xen_pcpu_notifier(struct notifier_block *nb);
-+
-+extern void unregister_xen_pcpu_notifier(struct notifier_block *nb);
-+
-+extern int xen_pcpu_index(uint32_t acpi_id, int is_acpiid);
-+#endif
-diff --git a/include/xen/platform_pci.h b/include/xen/platform_pci.h
-new file mode 100644
-index 0000000..a785a3b
---- /dev/null
-+++ b/include/xen/platform_pci.h
-@@ -0,0 +1,53 @@
-+#ifndef _XEN_PLATFORM_PCI_H
-+#define _XEN_PLATFORM_PCI_H
-+
-+#define XEN_IOPORT_MAGIC_VAL 0x49d2
-+#define XEN_IOPORT_LINUX_PRODNUM 0x0003
-+#define XEN_IOPORT_LINUX_DRVVER 0x0001
-+
-+#define XEN_IOPORT_BASE 0x10
-+
-+#define XEN_IOPORT_PLATFLAGS (XEN_IOPORT_BASE + 0) /* 1 byte access (R/W) */
-+#define XEN_IOPORT_MAGIC (XEN_IOPORT_BASE + 0) /* 2 byte access (R) */
-+#define XEN_IOPORT_UNPLUG (XEN_IOPORT_BASE + 0) /* 2 byte access (W) */
-+#define XEN_IOPORT_DRVVER (XEN_IOPORT_BASE + 0) /* 4 byte access (W) */
-+
-+#define XEN_IOPORT_SYSLOG (XEN_IOPORT_BASE + 2) /* 1 byte access (W) */
-+#define XEN_IOPORT_PROTOVER (XEN_IOPORT_BASE + 2) /* 1 byte access (R) */
-+#define XEN_IOPORT_PRODNUM (XEN_IOPORT_BASE + 2) /* 2 byte access (W) */
-+
-+#define XEN_UNPLUG_ALL_IDE_DISKS (1<<0)
-+#define XEN_UNPLUG_ALL_NICS (1<<1)
-+#define XEN_UNPLUG_AUX_IDE_DISKS (1<<2)
-+#define XEN_UNPLUG_ALL (XEN_UNPLUG_ALL_IDE_DISKS|\
-+ XEN_UNPLUG_ALL_NICS|\
-+ XEN_UNPLUG_AUX_IDE_DISKS)
-+
-+#define XEN_UNPLUG_UNNECESSARY (1<<16)
-+#define XEN_UNPLUG_NEVER (1<<17)
-+
-+static inline int xen_must_unplug_nics(void) {
-+#if (defined(CONFIG_XEN_NETDEV_FRONTEND) || \
-+ defined(CONFIG_XEN_NETDEV_FRONTEND_MODULE)) && \
-+ (defined(CONFIG_XEN_PLATFORM_PCI) || \
-+ defined(CONFIG_XEN_PLATFORM_PCI_MODULE))
-+ return 1;
-+#else
-+ return 0;
-+#endif
-+}
-+
-+static inline int xen_must_unplug_disks(void) {
-+#if (defined(CONFIG_XEN_BLKDEV_FRONTEND) || \
-+ defined(CONFIG_XEN_BLKDEV_FRONTEND_MODULE)) && \
-+ (defined(CONFIG_XEN_PLATFORM_PCI) || \
-+ defined(CONFIG_XEN_PLATFORM_PCI_MODULE))
-+ return 1;
-+#else
-+ return 0;
-+#endif
-+}
-+
-+extern int xen_platform_pci_unplug;
-+
-+#endif /* _XEN_PLATFORM_PCI_H */
-diff --git a/include/xen/privcmd.h b/include/xen/privcmd.h
-new file mode 100644
-index 0000000..b42cdfd
---- /dev/null
-+++ b/include/xen/privcmd.h
-@@ -0,0 +1,80 @@
-+/******************************************************************************
-+ * privcmd.h
-+ *
-+ * Interface to /proc/xen/privcmd.
-+ *
-+ * Copyright (c) 2003-2005, K A Fraser
-+ *
-+ * This program is free software; you can redistribute it and/or
-+ * modify it under the terms of the GNU General Public License version 2
-+ * as published by the Free Software Foundation; or, when distributed
-+ * separately from the Linux kernel or incorporated into other
-+ * software packages, subject to the following license:
-+ *
-+ * Permission is hereby granted, free of charge, to any person obtaining a copy
-+ * of this source file (the "Software"), to deal in the Software without
-+ * restriction, including without limitation the rights to use, copy, modify,
-+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
-+ * and to permit persons to whom the Software is furnished to do so, subject to
-+ * the following conditions:
-+ *
-+ * The above copyright notice and this permission notice shall be included in
-+ * all copies or substantial portions of the Software.
-+ *
-+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-+ * IN THE SOFTWARE.
-+ */
-+
-+#ifndef __LINUX_PUBLIC_PRIVCMD_H__
-+#define __LINUX_PUBLIC_PRIVCMD_H__
-+
-+#include <linux/types.h>
-+
-+typedef unsigned long xen_pfn_t;
-+
-+#ifndef __user
-+#define __user
-+#endif
-+
-+struct privcmd_hypercall {
-+ __u64 op;
-+ __u64 arg[5];
-+};
-+
-+struct privcmd_mmap_entry {
-+ __u64 va;
-+ __u64 mfn;
-+ __u64 npages;
-+};
-+
-+struct privcmd_mmap {
-+ int num;
-+ domid_t dom; /* target domain */
-+ struct privcmd_mmap_entry __user *entry;
-+};
-+
-+struct privcmd_mmapbatch {
-+ int num; /* number of pages to populate */
-+ domid_t dom; /* target domain */
-+ __u64 addr; /* virtual address */
-+ xen_pfn_t __user *arr; /* array of mfns - top nibble set on err */
-+};
-+
-+/*
-+ * @cmd: IOCTL_PRIVCMD_HYPERCALL
-+ * @arg: &privcmd_hypercall_t
-+ * Return: Value returned from execution of the specified hypercall.
-+ */
-+#define IOCTL_PRIVCMD_HYPERCALL \
-+ _IOC(_IOC_NONE, 'P', 0, sizeof(struct privcmd_hypercall))
-+#define IOCTL_PRIVCMD_MMAP \
-+ _IOC(_IOC_NONE, 'P', 2, sizeof(struct privcmd_mmap))
-+#define IOCTL_PRIVCMD_MMAPBATCH \
-+ _IOC(_IOC_NONE, 'P', 3, sizeof(struct privcmd_mmapbatch))
-+
-+#endif /* __LINUX_PUBLIC_PRIVCMD_H__ */
-diff --git a/include/xen/xen-ops.h b/include/xen/xen-ops.h
-index 883a21b..7058f8a 100644
---- a/include/xen/xen-ops.h
-+++ b/include/xen/xen-ops.h
-@@ -7,6 +7,7 @@ DECLARE_PER_CPU(struct vcpu_info *, xen_vcpu);
-
- void xen_pre_suspend(void);
- void xen_post_suspend(int suspend_cancelled);
-+void xen_hvm_post_suspend(int suspend_cancelled);
-
- void xen_mm_pin_all(void);
- void xen_mm_unpin_all(void);
-@@ -14,4 +15,16 @@ void xen_mm_unpin_all(void);
- void xen_timer_resume(void);
- void xen_arch_resume(void);
-
-+int xen_remap_domain_mfn_range(struct vm_area_struct *vma,
-+ unsigned long addr,
-+ unsigned long mfn, int nr,
-+ pgprot_t prot, unsigned domid);
-+
-+extern unsigned long *xen_contiguous_bitmap;
-+int xen_create_contiguous_region(unsigned long vstart, unsigned int order,
-+ unsigned int address_bits);
-+
-+void xen_destroy_contiguous_region(unsigned long vstart, unsigned int order);
-+int xen_setup_shutdown_event(void);
-+
- #endif /* INCLUDE_XEN_OPS_H */
-diff --git a/include/xen/xen.h b/include/xen/xen.h
-new file mode 100644
-index 0000000..77604ed
---- /dev/null
-+++ b/include/xen/xen.h
-@@ -0,0 +1,34 @@
-+#ifndef _XEN_XEN_H
-+#define _XEN_XEN_H
-+
-+enum xen_domain_type {
-+ XEN_NATIVE, /* running on bare hardware */
-+ XEN_PV_DOMAIN, /* running in a PV domain */
-+ XEN_HVM_DOMAIN, /* running in a Xen hvm domain */
-+};
-+
-+#ifdef CONFIG_XEN
-+extern enum xen_domain_type xen_domain_type;
-+extern void xen_hvm_guest_init(void);
-+#else
-+#define xen_domain_type XEN_NATIVE
-+#define xen_hvm_guest_init() do { } while (0)
-+#endif
-+
-+#define xen_domain() (xen_domain_type != XEN_NATIVE)
-+#define xen_pv_domain() (xen_domain() && \
-+ xen_domain_type == XEN_PV_DOMAIN)
-+#define xen_hvm_domain() (xen_domain() && \
-+ xen_domain_type == XEN_HVM_DOMAIN)
-+
-+#ifdef CONFIG_XEN_DOM0
-+#include <xen/interface/xen.h>
-+#include <asm/xen/hypervisor.h>
-+
-+#define xen_initial_domain() (xen_pv_domain() && \
-+ xen_start_info->flags & SIF_INITDOMAIN)
-+#else /* !CONFIG_XEN_DOM0 */
-+#define xen_initial_domain() (0)
-+#endif /* CONFIG_XEN_DOM0 */
-+
-+#endif /* _XEN_XEN_H */
-diff --git a/include/xen/xenbus.h b/include/xen/xenbus.h
-index b9763ba..542ca7c 100644
---- a/include/xen/xenbus.h
-+++ b/include/xen/xenbus.h
-@@ -93,7 +93,7 @@ struct xenbus_driver {
- int (*remove)(struct xenbus_device *dev);
- int (*suspend)(struct xenbus_device *dev, pm_message_t state);
- int (*resume)(struct xenbus_device *dev);
-- int (*uevent)(struct xenbus_device *, char **, int, char *, int);
-+ int (*uevent)(struct xenbus_device *, struct kobj_uevent_env *);
- struct device_driver driver;
- int (*read_otherend_details)(struct xenbus_device *dev);
- int (*is_ready)(struct xenbus_device *dev);
-diff --git a/lib/Makefile b/lib/Makefile
-index 452f188..001e918 100644
---- a/lib/Makefile
-+++ b/lib/Makefile
-@@ -77,7 +77,8 @@ obj-$(CONFIG_TEXTSEARCH_FSM) += ts_fsm.o
- obj-$(CONFIG_SMP) += percpu_counter.o
- obj-$(CONFIG_AUDIT_GENERIC) += audit.o
-
--obj-$(CONFIG_SWIOTLB) += swiotlb.o
-+obj-$(CONFIG_SWIOTLB) += swiotlb-core.o swiotlb.o
-+obj-$(CONFIG_SWIOTLB_XEN) += swiotlb-xen.o
- obj-$(CONFIG_IOMMU_HELPER) += iommu-helper.o
- obj-$(CONFIG_FAULT_INJECTION) += fault-inject.o
-
-diff --git a/lib/swiotlb-core.c b/lib/swiotlb-core.c
-new file mode 100644
-index 0000000..a17c89e
---- /dev/null
-+++ b/lib/swiotlb-core.c
-@@ -0,0 +1,572 @@
-+/*
-+ * Dynamic DMA mapping support.
-+ *
-+ * This implementation is a fallback for platforms that do not support
-+ * I/O TLBs (aka DMA address translation hardware).
-+ * Copyright (C) 2000 Asit Mallick <Asit.K.Mallick@intel.com>
-+ * Copyright (C) 2000 Goutham Rao <goutham.rao@intel.com>
-+ * Copyright (C) 2000, 2003 Hewlett-Packard Co
-+ * David Mosberger-Tang <davidm@hpl.hp.com>
-+ *
-+ * 03/05/07 davidm Switch from PCI-DMA to generic device DMA API.
-+ * 00/12/13 davidm Rename to swiotlb.c and add mark_clean() to avoid
-+ * unnecessary i-cache flushing.
-+ * 04/07/.. ak Better overflow handling. Assorted fixes.
-+ * 05/09/10 linville Add support for syncing ranges, support syncing for
-+ * DMA_BIDIRECTIONAL mappings, miscellaneous cleanup.
-+ * 08/12/11 beckyb Add highmem support
-+ */
-+
-+#include <linux/cache.h>
-+#include <linux/dma-mapping.h>
-+#include <linux/mm.h>
-+#include <linux/module.h>
-+#include <linux/spinlock.h>
-+#include <linux/string.h>
-+#include <linux/swiotlb.h>
-+#include <linux/pfn.h>
-+#include <linux/types.h>
-+#include <linux/ctype.h>
-+#include <linux/highmem.h>
-+
-+#include <linux/io.h>
-+#include <asm/dma.h>
-+#include <linux/scatterlist.h>
-+
-+#include <linux/init.h>
-+#include <linux/bootmem.h>
-+#include <linux/iommu-helper.h>
-+
-+#define OFFSET(val, align) ((unsigned long) ((val) & ((align) - 1)))
-+
-+#define SLABS_PER_PAGE (1 << (PAGE_SHIFT - IO_TLB_SHIFT))
-+
-+/*
-+ * Minimum IO TLB size to bother booting with. Systems with mainly
-+ * 64bit capable cards will only lightly use the swiotlb. If we can't
-+ * allocate a contiguous 1MB, we're probably in trouble anyway.
-+ */
-+#define IO_TLB_MIN_SLABS ((1<<20) >> IO_TLB_SHIFT)
-+
-+int swiotlb_force;
-+
-+/*
-+ * Used to do a quick range check in do_unmap_single and
-+ * do_sync_single_*, to see if the memory was in fact allocated by this
-+ * API.
-+ */
-+char *io_tlb_start, *io_tlb_end;
-+
-+/*
-+ * The number of IO TLB blocks (in groups of 64) betweeen io_tlb_start and
-+ * io_tlb_end. This is command line adjustable via setup_io_tlb_npages.
-+ */
-+unsigned long io_tlb_nslabs;
-+
-+/*
-+ * When the IOMMU overflows we return a fallback buffer. This sets the size.
-+ */
-+unsigned long io_tlb_overflow = 32*1024;
-+
-+void *io_tlb_overflow_buffer;
-+
-+/*
-+ * This is a free list describing the number of free entries available from
-+ * each index
-+ */
-+static unsigned int *io_tlb_list;
-+static unsigned int io_tlb_index;
-+
-+/*
-+ * We need to save away the original address corresponding to a mapped entry
-+ * for the sync operations.
-+ */
-+static phys_addr_t *io_tlb_orig_addr;
-+
-+/*
-+ * Protect the above data structures in the map and unmap calls
-+ */
-+static DEFINE_SPINLOCK(io_tlb_lock);
-+
-+static int late_alloc;
-+
-+static int __init
-+setup_io_tlb_npages(char *str)
-+{
-+ int get_value(const char *token, char *str, char **endp)
-+ {
-+ ssize_t len;
-+ int val = 0;
-+
-+ len = strlen(token);
-+ if (!strncmp(str, token, len)) {
-+ str += len;
-+ if (*str == '=')
-+ ++str;
-+ if (*str != '\0')
-+ val = simple_strtoul(str, endp, 0);
-+ }
-+ *endp = str;
-+ return val;
-+ }
-+
-+ int val;
-+
-+ while (*str) {
-+ /* The old syntax */
-+ if (isdigit(*str)) {
-+ io_tlb_nslabs = simple_strtoul(str, &str, 0);
-+ /* avoid tail segment of size < IO_TLB_SEGSIZE */
-+ io_tlb_nslabs = ALIGN(io_tlb_nslabs, IO_TLB_SEGSIZE);
-+ }
-+ if (!strncmp(str, "force", 5))
-+ swiotlb_force = 1;
-+ /* The new syntax: swiotlb=nslabs=16384,overflow=32768,force */
-+ val = get_value("nslabs", str, &str);
-+ if (val)
-+ io_tlb_nslabs = ALIGN(val, IO_TLB_SEGSIZE);
-+
-+ val = get_value("overflow", str, &str);
-+ if (val)
-+ io_tlb_overflow = val;
-+ str = strpbrk(str, ",");
-+ if (!str)
-+ break;
-+ str++; /* skip ',' */
-+ }
-+ return 1;
-+}
-+__setup("swiotlb=", setup_io_tlb_npages);
-+
-+void swiotlb_print_info(void)
-+{
-+ unsigned long bytes = io_tlb_nslabs << IO_TLB_SHIFT;
-+ phys_addr_t pstart, pend;
-+
-+ pstart = virt_to_phys(io_tlb_start);
-+ pend = virt_to_phys(io_tlb_end);
-+
-+ printk(KERN_INFO "DMA: Placing %luMB software IO TLB between %p - %p\n",
-+ bytes >> 20, io_tlb_start, io_tlb_end);
-+ printk(KERN_INFO "DMA: software IO TLB at phys %#llx - %#llx\n",
-+ (unsigned long long)pstart,
-+ (unsigned long long)pend);
-+}
-+
-+/*
-+ * Statically reserve bounce buffer space and initialize bounce buffer data
-+ * structures for the software IO TLB used to implement the DMA API.
-+ */
-+void __init
-+swiotlb_init_early(size_t default_size, int verbose)
-+{
-+ unsigned long i, bytes;
-+
-+ if (!io_tlb_nslabs) {
-+ io_tlb_nslabs = (default_size >> IO_TLB_SHIFT);
-+ io_tlb_nslabs = ALIGN(io_tlb_nslabs, IO_TLB_SEGSIZE);
-+ }
-+
-+ bytes = io_tlb_nslabs << IO_TLB_SHIFT;
-+
-+ /*
-+ * Get IO TLB memory from the low pages
-+ */
-+ io_tlb_start = alloc_bootmem_low_pages(bytes);
-+ if (!io_tlb_start)
-+ panic("DMA: Cannot allocate SWIOTLB buffer");
-+ io_tlb_end = io_tlb_start + bytes;
-+
-+ /*
-+ * Allocate and initialize the free list array. This array is used
-+ * to find contiguous free memory regions of size up to IO_TLB_SEGSIZE
-+ * between io_tlb_start and io_tlb_end.
-+ */
-+ io_tlb_list = alloc_bootmem(io_tlb_nslabs * sizeof(int));
-+ for (i = 0; i < io_tlb_nslabs; i++)
-+ io_tlb_list[i] = IO_TLB_SEGSIZE - OFFSET(i, IO_TLB_SEGSIZE);
-+ io_tlb_index = 0;
-+ io_tlb_orig_addr = alloc_bootmem(io_tlb_nslabs * sizeof(phys_addr_t));
-+
-+ /*
-+ * Get the overflow emergency buffer
-+ */
-+ io_tlb_overflow_buffer = alloc_bootmem_low(io_tlb_overflow);
-+ if (!io_tlb_overflow_buffer)
-+ panic("DMA: Cannot allocate SWIOTLB overflow buffer!\n");
-+ if (verbose)
-+ swiotlb_print_info();
-+}
-+
-+void __init
-+swiotlb_init(int verbose)
-+{
-+ swiotlb_init_early(64 * (1<<20), verbose); /* default to 64MB */
-+}
-+
-+/*
-+ * Systems with larger DMA zones (those that don't support ISA) can
-+ * initialize the swiotlb later using the slab allocator if needed.
-+ * This should be just like above, but with some error catching.
-+ */
-+int
-+swiotlb_init_late(size_t default_size)
-+{
-+ unsigned long i, bytes, req_nslabs = io_tlb_nslabs;
-+ unsigned int order;
-+
-+ if (!io_tlb_nslabs) {
-+ io_tlb_nslabs = (default_size >> IO_TLB_SHIFT);
-+ io_tlb_nslabs = ALIGN(io_tlb_nslabs, IO_TLB_SEGSIZE);
-+ }
-+
-+ /*
-+ * Get IO TLB memory from the low pages
-+ */
-+ order = get_order(io_tlb_nslabs << IO_TLB_SHIFT);
-+ io_tlb_nslabs = SLABS_PER_PAGE << order;
-+ bytes = io_tlb_nslabs << IO_TLB_SHIFT;
-+
-+ while ((SLABS_PER_PAGE << order) > IO_TLB_MIN_SLABS) {
-+ io_tlb_start = (void *)__get_free_pages(GFP_DMA | __GFP_NOWARN,
-+ order);
-+ if (io_tlb_start)
-+ break;
-+ order--;
-+ }
-+
-+ if (!io_tlb_start)
-+ goto cleanup1;
-+
-+ if (order != get_order(bytes)) {
-+ printk(KERN_WARNING "DMA: Warning: only able to allocate %ld MB"
-+ " for software IO TLB\n", (PAGE_SIZE << order) >> 20);
-+ io_tlb_nslabs = SLABS_PER_PAGE << order;
-+ bytes = io_tlb_nslabs << IO_TLB_SHIFT;
-+ }
-+ io_tlb_end = io_tlb_start + bytes;
-+ memset(io_tlb_start, 0, bytes);
-+
-+ /*
-+ * Allocate and initialize the free list array. This array is used
-+ * to find contiguous free memory regions of size up to IO_TLB_SEGSIZE
-+ * between io_tlb_start and io_tlb_end.
-+ */
-+ io_tlb_list = (unsigned int *)__get_free_pages(GFP_KERNEL,
-+ get_order(io_tlb_nslabs * sizeof(int)));
-+ if (!io_tlb_list)
-+ goto cleanup2;
-+
-+ for (i = 0; i < io_tlb_nslabs; i++)
-+ io_tlb_list[i] = IO_TLB_SEGSIZE - OFFSET(i, IO_TLB_SEGSIZE);
-+ io_tlb_index = 0;
-+
-+ io_tlb_orig_addr = (phys_addr_t *) __get_free_pages(GFP_KERNEL,
-+ get_order(io_tlb_nslabs * sizeof(phys_addr_t)));
-+ if (!io_tlb_orig_addr)
-+ goto cleanup3;
-+
-+ memset(io_tlb_orig_addr, 0, io_tlb_nslabs * sizeof(phys_addr_t));
-+
-+ /*
-+ * Get the overflow emergency buffer
-+ */
-+ io_tlb_overflow_buffer = (void *)__get_free_pages(GFP_DMA,
-+ get_order(io_tlb_overflow));
-+ if (!io_tlb_overflow_buffer)
-+ goto cleanup4;
-+
-+ swiotlb_print_info();
-+
-+ late_alloc = 1;
-+
-+ return 0;
-+
-+cleanup4:
-+ free_pages((unsigned long)io_tlb_orig_addr,
-+ get_order(io_tlb_nslabs * sizeof(phys_addr_t)));
-+ io_tlb_orig_addr = NULL;
-+cleanup3:
-+ free_pages((unsigned long)io_tlb_list,
-+ get_order(io_tlb_nslabs * sizeof(int)));
-+ io_tlb_list = NULL;
-+cleanup2:
-+ io_tlb_end = NULL;
-+ free_pages((unsigned long)io_tlb_start, order);
-+ io_tlb_start = NULL;
-+cleanup1:
-+ io_tlb_nslabs = req_nslabs;
-+ return -ENOMEM;
-+}
-+
-+void __init swiotlb_free(void)
-+{
-+ if (!io_tlb_overflow_buffer)
-+ return;
-+
-+ if (late_alloc) {
-+ free_pages((unsigned long)io_tlb_overflow_buffer,
-+ get_order(io_tlb_overflow));
-+ free_pages((unsigned long)io_tlb_orig_addr,
-+ get_order(io_tlb_nslabs * sizeof(phys_addr_t)));
-+ free_pages((unsigned long)io_tlb_list, get_order(io_tlb_nslabs *
-+ sizeof(int)));
-+ free_pages((unsigned long)io_tlb_start,
-+ get_order(io_tlb_nslabs << IO_TLB_SHIFT));
-+ } else {
-+ free_bootmem_late(__pa(io_tlb_overflow_buffer),
-+ io_tlb_overflow);
-+ free_bootmem_late(__pa(io_tlb_orig_addr),
-+ io_tlb_nslabs * sizeof(phys_addr_t));
-+ free_bootmem_late(__pa(io_tlb_list),
-+ io_tlb_nslabs * sizeof(int));
-+ free_bootmem_late(__pa(io_tlb_start),
-+ io_tlb_nslabs << IO_TLB_SHIFT);
-+ }
-+}
-+
-+int is_swiotlb_buffer(phys_addr_t paddr)
-+{
-+ return paddr >= virt_to_phys(io_tlb_start) &&
-+ paddr < virt_to_phys(io_tlb_end);
-+}
-+
-+/*
-+ * Bounce: copy the swiotlb buffer back to the original dma location
-+ */
-+void swiotlb_bounce(phys_addr_t phys, char *dma_addr, size_t size,
-+ enum dma_data_direction dir)
-+{
-+ unsigned long pfn = PFN_DOWN(phys);
-+
-+ if (PageHighMem(pfn_to_page(pfn))) {
-+ /* The buffer does not have a mapping. Map it in and copy */
-+ unsigned int offset = phys & ~PAGE_MASK;
-+ char *buffer;
-+ unsigned int sz = 0;
-+ unsigned long flags;
-+
-+ while (size) {
-+ sz = min_t(size_t, PAGE_SIZE - offset, size);
-+
-+ local_irq_save(flags);
-+ buffer = kmap_atomic(pfn_to_page(pfn),
-+ KM_BOUNCE_READ);
-+ if (dir == DMA_TO_DEVICE)
-+ memcpy(dma_addr, buffer + offset, sz);
-+ else
-+ memcpy(buffer + offset, dma_addr, sz);
-+ kunmap_atomic(buffer, KM_BOUNCE_READ);
-+ local_irq_restore(flags);
-+
-+ size -= sz;
-+ pfn++;
-+ dma_addr += sz;
-+ offset = 0;
-+ }
-+ } else {
-+ if (dir == DMA_TO_DEVICE)
-+ memcpy(dma_addr, phys_to_virt(phys), size);
-+ else
-+ memcpy(phys_to_virt(phys), dma_addr, size);
-+ }
-+}
-+
-+/*
-+ * Allocates bounce buffer and returns its kernel virtual address.
-+ */
-+void *
-+do_map_single(struct device *hwdev, phys_addr_t phys,
-+ unsigned long start_dma_addr, size_t size, int dir)
-+{
-+ unsigned long flags;
-+ char *dma_addr;
-+ unsigned int nslots, stride, index, wrap;
-+ int i;
-+ unsigned long mask;
-+ unsigned long offset_slots;
-+ unsigned long max_slots;
-+
-+ mask = dma_get_seg_boundary(hwdev);
-+ start_dma_addr = start_dma_addr & mask;
-+ offset_slots = ALIGN(start_dma_addr, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT;
-+
-+ /*
-+ * Carefully handle integer overflow which can occur when mask == ~0UL.
-+ */
-+ max_slots = mask + 1
-+ ? ALIGN(mask + 1, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT
-+ : 1UL << (BITS_PER_LONG - IO_TLB_SHIFT);
-+
-+ /*
-+ * For mappings greater than a page, we limit the stride (and
-+ * hence alignment) to a page size.
-+ */
-+ nslots = ALIGN(size, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT;
-+ if (size > PAGE_SIZE)
-+ stride = (1 << (PAGE_SHIFT - IO_TLB_SHIFT));
-+ else
-+ stride = 1;
-+
-+ BUG_ON(!nslots);
-+
-+ /*
-+ * Find suitable number of IO TLB entries size that will fit this
-+ * request and allocate a buffer from that IO TLB pool.
-+ */
-+ spin_lock_irqsave(&io_tlb_lock, flags);
-+ index = ALIGN(io_tlb_index, stride);
-+ if (index >= io_tlb_nslabs)
-+ index = 0;
-+ wrap = index;
-+
-+ do {
-+ while (iommu_is_span_boundary(index, nslots, offset_slots,
-+ max_slots)) {
-+ index += stride;
-+ if (index >= io_tlb_nslabs)
-+ index = 0;
-+ if (index == wrap)
-+ goto not_found;
-+ }
-+
-+ /*
-+ * If we find a slot that indicates we have 'nslots' number of
-+ * contiguous buffers, we allocate the buffers from that slot
-+ * and mark the entries as '0' indicating unavailable.
-+ */
-+ if (io_tlb_list[index] >= nslots) {
-+ int count = 0;
-+
-+ for (i = index; i < (int) (index + nslots); i++)
-+ io_tlb_list[i] = 0;
-+ for (i = index - 1; (OFFSET(i, IO_TLB_SEGSIZE)
-+ != IO_TLB_SEGSIZE - 1) && io_tlb_list[i]; i--)
-+ io_tlb_list[i] = ++count;
-+ dma_addr = io_tlb_start + (index << IO_TLB_SHIFT);
-+
-+ /*
-+ * Update the indices to avoid searching in the next
-+ * round.
-+ */
-+ io_tlb_index = ((index + nslots) < io_tlb_nslabs
-+ ? (index + nslots) : 0);
-+
-+ goto found;
-+ }
-+ index += stride;
-+ if (index >= io_tlb_nslabs)
-+ index = 0;
-+ } while (index != wrap);
-+
-+not_found:
-+ spin_unlock_irqrestore(&io_tlb_lock, flags);
-+ return NULL;
-+found:
-+ spin_unlock_irqrestore(&io_tlb_lock, flags);
-+
-+ /*
-+ * Save away the mapping from the original address to the DMA address.
-+ * This is needed when we sync the memory. Then we sync the buffer if
-+ * needed.
-+ */
-+ for (i = 0; i < nslots; i++)
-+ io_tlb_orig_addr[index+i] = phys + (i << IO_TLB_SHIFT);
-+ if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL)
-+ swiotlb_bounce(phys, dma_addr, size, DMA_TO_DEVICE);
-+
-+ return dma_addr;
-+}
-+
-+/*
-+ * dma_addr is the kernel virtual address of the bounce buffer to unmap.
-+ */
-+void
-+do_unmap_single(struct device *hwdev, char *dma_addr, size_t size, int dir)
-+{
-+ unsigned long flags;
-+ int i, count, nslots = ALIGN(size, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT;
-+ int index = (dma_addr - io_tlb_start) >> IO_TLB_SHIFT;
-+ phys_addr_t phys = io_tlb_orig_addr[index];
-+
-+ /*
-+ * First, sync the memory before unmapping the entry
-+ */
-+ if (phys && ((dir == DMA_FROM_DEVICE) || (dir == DMA_BIDIRECTIONAL)))
-+ swiotlb_bounce(phys, dma_addr, size, DMA_FROM_DEVICE);
-+
-+ /*
-+ * Return the buffer to the free list by setting the corresponding
-+ * entries to indicate the number of contigous entries available.
-+ * While returning the entries to the free list, we merge the entries
-+ * with slots below and above the pool being returned.
-+ */
-+ spin_lock_irqsave(&io_tlb_lock, flags);
-+ {
-+ count = ((index + nslots) < ALIGN(index + 1, IO_TLB_SEGSIZE) ?
-+ io_tlb_list[index + nslots] : 0);
-+ /*
-+ * Step 1: return the slots to the free list, merging the
-+ * slots with superceeding slots
-+ */
-+ for (i = index + nslots - 1; i >= index; i--)
-+ io_tlb_list[i] = ++count;
-+ /*
-+ * Step 2: merge the returned slots with the preceding slots,
-+ * if available (non zero)
-+ */
-+ for (i = index - 1; (OFFSET(i, IO_TLB_SEGSIZE) !=
-+ IO_TLB_SEGSIZE - 1) && io_tlb_list[i]; i--)
-+ io_tlb_list[i] = ++count;
-+ }
-+ spin_unlock_irqrestore(&io_tlb_lock, flags);
-+}
-+
-+void
-+do_sync_single(struct device *hwdev, char *dma_addr, size_t size,
-+ int dir, int target)
-+{
-+ int index = (dma_addr - io_tlb_start) >> IO_TLB_SHIFT;
-+ phys_addr_t phys = io_tlb_orig_addr[index];
-+
-+ phys += ((unsigned long)dma_addr & ((1 << IO_TLB_SHIFT) - 1));
-+
-+ switch (target) {
-+ case SYNC_FOR_CPU:
-+ if (likely(dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL))
-+ swiotlb_bounce(phys, dma_addr, size, DMA_FROM_DEVICE);
-+ else
-+ BUG_ON(dir != DMA_TO_DEVICE);
-+ break;
-+ case SYNC_FOR_DEVICE:
-+ if (likely(dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL))
-+ swiotlb_bounce(phys, dma_addr, size, DMA_TO_DEVICE);
-+ else
-+ BUG_ON(dir != DMA_FROM_DEVICE);
-+ break;
-+ default:
-+ BUG();
-+ }
-+}
-+void
-+swiotlb_full(struct device *dev, size_t size, int dir, int do_panic)
-+{
-+ /*
-+ * Ran out of IOMMU space for this operation. This is very bad.
-+ * Unfortunately the drivers cannot handle this operation properly.
-+ * unless they check for dma_mapping_error (most don't)
-+ * When the mapping is small enough return a static buffer to limit
-+ * the damage, or panic when the transfer is too big.
-+ */
-+ dev_err(dev, "DMA: Out of SW-IOMMU space for %zu bytes.", size);
-+
-+ if (size <= io_tlb_overflow || !do_panic)
-+ return;
-+
-+ if (dir == DMA_BIDIRECTIONAL)
-+ panic("DMA: Random memory could be DMA accessed\n");
-+ if (dir == DMA_FROM_DEVICE)
-+ panic("DMA: Random memory could be DMA written\n");
-+ if (dir == DMA_TO_DEVICE)
-+ panic("DMA: Random memory could be DMA read\n");
-+}
-diff --git a/lib/swiotlb-xen.c b/lib/swiotlb-xen.c
-new file mode 100644
-index 0000000..bee577f
---- /dev/null
-+++ b/lib/swiotlb-xen.c
-@@ -0,0 +1,504 @@
-+/* An software based IOMMU that utilizes the swiotlb-core fuctionality.
-+ * It can function on Xen when there are PCI devices present.*/
-+
-+
-+#include <linux/dma-mapping.h>
-+#include <linux/io.h>
-+#include <asm/dma.h>
-+#include <linux/scatterlist.h>
-+#include <xen/interface/xen.h>
-+#include <xen/grant_table.h>
-+
-+#include <asm/xen/page.h>
-+#include <xen/page.h>
-+#include <xen/xen-ops.h>
-+
-+static dma_addr_t xen_phys_to_bus(phys_addr_t paddr)
-+{
-+ return phys_to_machine(XPADDR(paddr)).maddr;;
-+}
-+
-+static phys_addr_t xen_bus_to_phys(dma_addr_t baddr)
-+{
-+ return machine_to_phys(XMADDR(baddr)).paddr;
-+}
-+
-+static dma_addr_t xen_virt_to_bus(void *address)
-+{
-+ return xen_phys_to_bus(virt_to_phys(address));
-+}
-+
-+static int check_pages_physically_contiguous(unsigned long pfn,
-+ unsigned int offset,
-+ size_t length)
-+{
-+ unsigned long next_mfn;
-+ int i;
-+ int nr_pages;
-+
-+ next_mfn = pfn_to_mfn(pfn);
-+ nr_pages = (offset + length + PAGE_SIZE-1) >> PAGE_SHIFT;
-+
-+ for (i = 1; i < nr_pages; i++) {
-+ if (pfn_to_mfn(++pfn) != ++next_mfn)
-+ return 0;
-+ }
-+ return 1;
-+}
-+
-+static int range_straddles_page_boundary(phys_addr_t p, size_t size)
-+{
-+ unsigned long pfn = PFN_DOWN(p);
-+ unsigned int offset = p & ~PAGE_MASK;
-+
-+ if (offset + size <= PAGE_SIZE)
-+ return 0;
-+ if (check_pages_physically_contiguous(pfn, offset, size))
-+ return 0;
-+ return 1;
-+}
-+
-+
-+bool xen_dma_capable(struct device *dev, dma_addr_t dev_addr,
-+ phys_addr_t phys, size_t size)
-+{
-+ int rc = 0;
-+
-+ rc = dma_capable(dev, dev_addr, size) &&
-+ !range_straddles_page_boundary(phys, size);
-+ return rc;
-+}
-+
-+static int is_xen_swiotlb_buffer(dma_addr_t dma_addr)
-+{
-+ unsigned long mfn = PFN_DOWN(dma_addr);
-+ unsigned long pfn = mfn_to_local_pfn(mfn);
-+
-+ /* If the address is outside our domain, it CAN have the same virtual
-+ * address as another address in our domain. Hence only check address
-+ * within our domain. */
-+ if (pfn_valid(pfn))
-+ return is_swiotlb_buffer(PFN_PHYS(pfn));
-+
-+ return 0;
-+}
-+void *
-+xen_swiotlb_alloc_coherent(struct device *hwdev, size_t size,
-+ dma_addr_t *dma_handle, gfp_t flags)
-+{
-+ void *ret;
-+ int order = get_order(size);
-+ u64 dma_mask = DMA_BIT_MASK(32);
-+ unsigned long vstart;
-+
-+ /*
-+ * Ignore region specifiers - the kernel's ideas of
-+ * pseudo-phys memory layout has nothing to do with the
-+ * machine physical layout. We can't allocate highmem
-+ * because we can't return a pointer to it.
-+ */
-+ flags &= ~(__GFP_DMA | __GFP_HIGHMEM);
-+
-+ if (dma_alloc_from_coherent(hwdev, size, dma_handle, &ret))
-+ return ret;
-+
-+ vstart = __get_free_pages(flags, order);
-+ ret = (void *)vstart;
-+
-+ if (hwdev && hwdev->coherent_dma_mask)
-+ dma_mask = dma_alloc_coherent_mask(hwdev, flags);
-+
-+ if (ret) {
-+ if (xen_create_contiguous_region(vstart, order,
-+ fls64(dma_mask)) != 0) {
-+ free_pages(vstart, order);
-+ return NULL;
-+ }
-+ memset(ret, 0, size);
-+ *dma_handle = virt_to_machine(ret).maddr;
-+ }
-+ return ret;
-+}
-+EXPORT_SYMBOL(xen_swiotlb_alloc_coherent);
-+
-+void
-+xen_swiotlb_free_coherent(struct device *hwdev, size_t size, void *vaddr,
-+ dma_addr_t dev_addr)
-+{
-+ int order = get_order(size);
-+
-+ if (dma_release_from_coherent(hwdev, order, vaddr))
-+ return;
-+
-+ xen_destroy_contiguous_region((unsigned long)vaddr, order);
-+ free_pages((unsigned long)vaddr, order);
-+}
-+EXPORT_SYMBOL(xen_swiotlb_free_coherent);
-+
-+
-+static int max_dma_bits = 32;
-+
-+static int
-+xen_swiotlb_fixup(void *buf, size_t size, unsigned long nslabs)
-+{
-+ int i, rc;
-+ int dma_bits;
-+
-+ printk(KERN_INFO "xen_swiotlb_fixup: buf=%p size=%zu\n",
-+ buf, size);
-+
-+ dma_bits = get_order(IO_TLB_SEGSIZE << IO_TLB_SHIFT) + PAGE_SHIFT;
-+
-+ i = 0;
-+ do {
-+ int slabs = min(nslabs - i, (unsigned long)IO_TLB_SEGSIZE);
-+
-+ do {
-+ rc = xen_create_contiguous_region(
-+ (unsigned long)buf + (i << IO_TLB_SHIFT),
-+ get_order(slabs << IO_TLB_SHIFT),
-+ dma_bits);
-+ } while (rc && dma_bits++ < max_dma_bits);
-+ if (rc)
-+ return rc;
-+
-+ i += slabs;
-+ } while(i < nslabs);
-+ return 0;
-+}
-+
-+void __init xen_swiotlb_init(int verbose)
-+{
-+ int rc = 0;
-+
-+ swiotlb_init_early(64 * (1<<20), verbose);
-+
-+ if ((rc = xen_swiotlb_fixup(io_tlb_start,
-+ io_tlb_nslabs << IO_TLB_SHIFT,
-+ io_tlb_nslabs)))
-+ goto error;
-+
-+ if ((rc = xen_swiotlb_fixup(io_tlb_overflow_buffer,
-+ io_tlb_overflow,
-+ io_tlb_overflow >> IO_TLB_SHIFT)))
-+ goto error;
-+
-+ return;
-+error:
-+ panic("DMA(%d): Failed to exchange pages allocated for DMA with Xen! "\
-+ "We either don't have the permission or you do not have enough"\
-+ "free memory under 4GB!\n", rc);
-+}
-+
-+/*
-+ * Map a single buffer of the indicated size for DMA in streaming mode. The
-+ * physical address to use is returned.
-+ *
-+ * Once the device is given the dma address, the device owns this memory until
-+ * either xen_swiotlb_unmap_page or xen_swiotlb_dma_sync_single is performed.
-+ */
-+dma_addr_t xen_swiotlb_map_page(struct device *dev, struct page *page,
-+ unsigned long offset, size_t size,
-+ enum dma_data_direction dir,
-+ struct dma_attrs *attrs)
-+{
-+ unsigned long start_dma_addr;
-+ phys_addr_t phys = page_to_phys(page) + offset;
-+ dma_addr_t dev_addr = xen_phys_to_bus(phys);
-+ void *map;
-+
-+ BUG_ON(dir == DMA_NONE);
-+ /*
-+ * If the address happens to be in the device's DMA window,
-+ * we can safely return the device addr and not worry about bounce
-+ * buffering it.
-+ */
-+ if (dma_capable(dev, dev_addr, size) &&
-+ !range_straddles_page_boundary(phys, size) && !swiotlb_force)
-+ return dev_addr;
-+
-+ /*
-+ * Oh well, have to allocate and map a bounce buffer.
-+ */
-+ start_dma_addr = xen_virt_to_bus(io_tlb_start);
-+ map = do_map_single(dev, phys, start_dma_addr, size, dir);
-+ if (!map) {
-+ swiotlb_full(dev, size, dir, 1);
-+ map = io_tlb_overflow_buffer;
-+ }
-+
-+ dev_addr = xen_virt_to_bus(map);
-+
-+ /*
-+ * Ensure that the address returned is DMA'ble
-+ */
-+ if (!dma_capable(dev, dev_addr, size))
-+ panic("DMA: xen_swiotlb_map_single: bounce buffer is not " \
-+ "DMA'ble\n");
-+ return dev_addr;
-+}
-+EXPORT_SYMBOL_GPL(xen_swiotlb_map_page);
-+
-+/*
-+ * Unmap a single streaming mode DMA translation. The dma_addr and size must
-+ * match what was provided for in a previous xen_swiotlb_map_page call. All
-+ * other usages are undefined.
-+ *
-+ * After this call, reads by the cpu to the buffer are guaranteed to see
-+ * whatever the device wrote there.
-+ */
-+static void unmap_single(struct device *hwdev, dma_addr_t dev_addr,
-+ size_t size, int dir)
-+{
-+ phys_addr_t paddr = xen_bus_to_phys(dev_addr);
-+
-+ BUG_ON(dir == DMA_NONE);
-+
-+ /* NOTE: We use dev_addr here, not paddr! */
-+ if (is_xen_swiotlb_buffer(dev_addr)) {
-+ do_unmap_single(hwdev, phys_to_virt(paddr), size, dir);
-+ return;
-+ }
-+
-+ if (dir != DMA_FROM_DEVICE)
-+ return;
-+
-+ /*
-+ * phys_to_virt doesn't work with hihgmem page but we could
-+ * call dma_mark_clean() with hihgmem page here. However, we
-+ * are fine since dma_mark_clean() is null on POWERPC. We can
-+ * make dma_mark_clean() take a physical address if necessary.
-+ */
-+ dma_mark_clean(phys_to_virt(paddr), size);
-+}
-+
-+void xen_swiotlb_unmap_page(struct device *hwdev, dma_addr_t dev_addr,
-+ size_t size, enum dma_data_direction dir,
-+ struct dma_attrs *attrs)
-+{
-+ unmap_single(hwdev, dev_addr, size, dir);
-+}
-+EXPORT_SYMBOL_GPL(xen_swiotlb_unmap_page);
-+
-+/*
-+ * Make physical memory consistent for a single streaming mode DMA translation
-+ * after a transfer.
-+ *
-+ * If you perform a xen_swiotlb_map_page() but wish to interrogate the buffer
-+ * using the cpu, yet do not wish to teardown the dma mapping, you must
-+ * call this function before doing so. At the next point you give the dma
-+ * address back to the card, you must first perform a
-+ * xen_swiotlb_dma_sync_for_device, and then the device again owns the buffer
-+ */
-+static void
-+xen_swiotlb_sync_single(struct device *hwdev, dma_addr_t dev_addr,
-+ size_t size, int dir, int target)
-+{
-+ phys_addr_t paddr = xen_bus_to_phys(dev_addr);
-+
-+ BUG_ON(dir == DMA_NONE);
-+
-+ if (is_xen_swiotlb_buffer(dev_addr)) {
-+ do_sync_single(hwdev, phys_to_virt(paddr), size, dir, target);
-+ return;
-+ }
-+
-+ if (dir != DMA_FROM_DEVICE)
-+ return;
-+
-+ dma_mark_clean(phys_to_virt(paddr), size);
-+}
-+
-+void
-+xen_swiotlb_sync_single_for_cpu(struct device *hwdev, dma_addr_t dev_addr,
-+ size_t size, enum dma_data_direction dir)
-+{
-+ xen_swiotlb_sync_single(hwdev, dev_addr, size, dir, SYNC_FOR_CPU);
-+}
-+EXPORT_SYMBOL(xen_swiotlb_sync_single_for_cpu);
-+
-+void
-+xen_swiotlb_sync_single_for_device(struct device *hwdev, dma_addr_t dev_addr,
-+ size_t size, enum dma_data_direction dir)
-+{
-+ xen_swiotlb_sync_single(hwdev, dev_addr, size, dir, SYNC_FOR_DEVICE);
-+}
-+EXPORT_SYMBOL(xen_swiotlb_sync_single_for_device);
-+
-+/*
-+ * Same as above, but for a sub-range of the mapping.
-+ */
-+static void
-+xen_swiotlb_sync_single_range(struct device *hwdev, dma_addr_t dev_addr,
-+ unsigned long offset, size_t size,
-+ int dir, int target)
-+{
-+ xen_swiotlb_sync_single(hwdev, dev_addr + offset, size, dir, target);
-+}
-+
-+void
-+xen_swiotlb_sync_single_range_for_cpu(struct device *hwdev, dma_addr_t dev_addr,
-+ unsigned long offset, size_t size,
-+ enum dma_data_direction dir)
-+{
-+ xen_swiotlb_sync_single_range(hwdev, dev_addr, offset, size, dir,
-+ SYNC_FOR_CPU);
-+}
-+EXPORT_SYMBOL_GPL(xen_swiotlb_sync_single_range_for_cpu);
-+
-+void
-+xen_swiotlb_sync_single_range_for_device(struct device *hwdev,
-+ dma_addr_t dev_addr,
-+ unsigned long offset, size_t size,
-+ enum dma_data_direction dir)
-+{
-+ xen_swiotlb_sync_single_range(hwdev, dev_addr, offset, size, dir,
-+ SYNC_FOR_DEVICE);
-+}
-+EXPORT_SYMBOL_GPL(xen_swiotlb_sync_single_range_for_device);
-+
-+/*
-+ * Map a set of buffers described by scatterlist in streaming mode for DMA.
-+ * This is the scatter-gather version of the above xen_swiotlb_map_page
-+ * interface. Here the scatter gather list elements are each tagged with the
-+ * appropriate dma address and length. They are obtained via
-+ * sg_dma_{address,length}(SG).
-+ *
-+ * NOTE: An implementation may be able to use a smaller number of
-+ * DMA address/length pairs than there are SG table elements.
-+ * (for example via virtual mapping capabilities)
-+ * The routine returns the number of addr/length pairs actually
-+ * used, at most nents.
-+ *
-+ * Device ownership issues as mentioned above for xen_swiotlb_map_page are the
-+ * same here.
-+ */
-+int
-+xen_swiotlb_map_sg_attrs(struct device *hwdev, struct scatterlist *sgl,
-+ int nelems, enum dma_data_direction dir,
-+ struct dma_attrs *attrs)
-+{
-+ unsigned long start_dma_addr;
-+ struct scatterlist *sg;
-+ int i;
-+ BUG_ON(dir == DMA_NONE);
-+
-+ start_dma_addr = xen_virt_to_bus(io_tlb_start);
-+ for_each_sg(sgl, sg, nelems, i) {
-+ phys_addr_t paddr = sg_phys(sg);
-+ dma_addr_t dev_addr = xen_phys_to_bus(paddr);
-+
-+ if (swiotlb_force ||
-+ !dma_capable(hwdev, dev_addr, sg->length) ||
-+ range_straddles_page_boundary(paddr, sg->length)) {
-+ void *map = do_map_single(hwdev, sg_phys(sg),
-+ start_dma_addr,
-+ sg->length, dir);
-+ if (!map) {
-+ /* Don't panic here, we expect map_sg users
-+ to do proper error handling. */
-+ swiotlb_full(hwdev, sg->length, dir, 0);
-+ xen_swiotlb_unmap_sg_attrs(hwdev, sgl, i, dir,
-+ attrs);
-+ sgl[0].dma_length = 0;
-+ return 0;
-+ }
-+ sg->dma_address = xen_virt_to_bus(map);
-+ } else
-+ sg->dma_address = dev_addr;
-+ sg->dma_length = sg->length;
-+ }
-+ return nelems;
-+}
-+EXPORT_SYMBOL(xen_swiotlb_map_sg_attrs);
-+
-+int
-+xen_swiotlb_map_sg(struct device *hwdev, struct scatterlist *sgl, int nelems,
-+ int dir)
-+{
-+ return xen_swiotlb_map_sg_attrs(hwdev, sgl, nelems, dir, NULL);
-+}
-+EXPORT_SYMBOL(xen_swiotlb_map_sg);
-+
-+/*
-+ * Unmap a set of streaming mode DMA translations. Again, cpu read rules
-+ * concerning calls here are the same as for xen_swiotlb_unmap_page() above.
-+ */
-+void
-+xen_swiotlb_unmap_sg_attrs(struct device *hwdev, struct scatterlist *sgl,
-+ int nelems, enum dma_data_direction dir,
-+ struct dma_attrs *attrs)
-+{
-+ struct scatterlist *sg;
-+ int i;
-+
-+ BUG_ON(dir == DMA_NONE);
-+
-+ for_each_sg(sgl, sg, nelems, i)
-+ unmap_single(hwdev, sg->dma_address, sg->dma_length, dir);
-+
-+}
-+EXPORT_SYMBOL(xen_swiotlb_unmap_sg_attrs);
-+
-+void
-+xen_swiotlb_unmap_sg(struct device *hwdev, struct scatterlist *sgl, int nelems,
-+ int dir)
-+{
-+ return xen_swiotlb_unmap_sg_attrs(hwdev, sgl, nelems, dir, NULL);
-+}
-+EXPORT_SYMBOL(xen_swiotlb_unmap_sg);
-+
-+/*
-+ * Make physical memory consistent for a set of streaming mode DMA translations
-+ * after a transfer.
-+ *
-+ * The same as xen_swiotlb_sync_single_* but for a scatter-gather list,
-+ * same rules and usage.
-+ */
-+static void
-+xen_swiotlb_sync_sg(struct device *hwdev, struct scatterlist *sgl,
-+ int nelems, int dir, int target)
-+{
-+ struct scatterlist *sg;
-+ int i;
-+
-+ for_each_sg(sgl, sg, nelems, i)
-+ xen_swiotlb_sync_single(hwdev, sg->dma_address,
-+ sg->dma_length, dir, target);
-+}
-+
-+void
-+xen_swiotlb_sync_sg_for_cpu(struct device *hwdev, struct scatterlist *sg,
-+ int nelems, enum dma_data_direction dir)
-+{
-+ xen_swiotlb_sync_sg(hwdev, sg, nelems, dir, SYNC_FOR_CPU);
-+}
-+EXPORT_SYMBOL(xen_swiotlb_sync_sg_for_cpu);
-+
-+void
-+xen_swiotlb_sync_sg_for_device(struct device *hwdev, struct scatterlist *sg,
-+ int nelems, enum dma_data_direction dir)
-+{
-+ xen_swiotlb_sync_sg(hwdev, sg, nelems, dir, SYNC_FOR_DEVICE);
-+}
-+EXPORT_SYMBOL(xen_swiotlb_sync_sg_for_device);
-+
-+int
-+xen_swiotlb_dma_mapping_error(struct device *hwdev, dma_addr_t dma_addr)
-+{
-+ return (dma_addr == xen_virt_to_bus(io_tlb_overflow_buffer));
-+}
-+EXPORT_SYMBOL(xen_swiotlb_dma_mapping_error);
-+
-+/*
-+ * Return whether the given device DMA address mask can be supported
-+ * properly. For example, if your device can only drive the low 24-bits
-+ * during bus mastering, then you would pass 0x00ffffff as the mask to
-+ * this function.
-+ */
-+int
-+xen_swiotlb_dma_supported(struct device *hwdev, u64 mask)
-+{
-+ return xen_virt_to_bus(io_tlb_end - 1) <= mask;
-+}
-+EXPORT_SYMBOL(xen_swiotlb_dma_supported);
-diff --git a/lib/swiotlb.c b/lib/swiotlb.c
-index ac25cd2..f6bbcd1 100644
---- a/lib/swiotlb.c
-+++ b/lib/swiotlb.c
-@@ -1,118 +1,11 @@
--/*
-- * Dynamic DMA mapping support.
-- *
-- * This implementation is a fallback for platforms that do not support
-- * I/O TLBs (aka DMA address translation hardware).
-- * Copyright (C) 2000 Asit Mallick <Asit.K.Mallick@intel.com>
-- * Copyright (C) 2000 Goutham Rao <goutham.rao@intel.com>
-- * Copyright (C) 2000, 2003 Hewlett-Packard Co
-- * David Mosberger-Tang <davidm@hpl.hp.com>
-- *
-- * 03/05/07 davidm Switch from PCI-DMA to generic device DMA API.
-- * 00/12/13 davidm Rename to swiotlb.c and add mark_clean() to avoid
-- * unnecessary i-cache flushing.
-- * 04/07/.. ak Better overflow handling. Assorted fixes.
-- * 05/09/10 linville Add support for syncing ranges, support syncing for
-- * DMA_BIDIRECTIONAL mappings, miscellaneous cleanup.
-- * 08/12/11 beckyb Add highmem support
-- */
-
--#include <linux/cache.h>
- #include <linux/dma-mapping.h>
--#include <linux/mm.h>
- #include <linux/module.h>
--#include <linux/spinlock.h>
--#include <linux/string.h>
- #include <linux/swiotlb.h>
--#include <linux/pfn.h>
--#include <linux/types.h>
--#include <linux/ctype.h>
--#include <linux/highmem.h>
-
--#include <asm/io.h>
--#include <asm/dma.h>
- #include <asm/scatterlist.h>
--
--#include <linux/init.h>
--#include <linux/bootmem.h>
- #include <linux/iommu-helper.h>
-
--#define OFFSET(val,align) ((unsigned long) \
-- ( (val) & ( (align) - 1)))
--
--#define SLABS_PER_PAGE (1 << (PAGE_SHIFT - IO_TLB_SHIFT))
--
--/*
-- * Minimum IO TLB size to bother booting with. Systems with mainly
-- * 64bit capable cards will only lightly use the swiotlb. If we can't
-- * allocate a contiguous 1MB, we're probably in trouble anyway.
-- */
--#define IO_TLB_MIN_SLABS ((1<<20) >> IO_TLB_SHIFT)
--
--/*
-- * Enumeration for sync targets
-- */
--enum dma_sync_target {
-- SYNC_FOR_CPU = 0,
-- SYNC_FOR_DEVICE = 1,
--};
--
--int swiotlb_force;
--
--/*
-- * Used to do a quick range check in unmap_single and
-- * sync_single_*, to see if the memory was in fact allocated by this
-- * API.
-- */
--static char *io_tlb_start, *io_tlb_end;
--
--/*
-- * The number of IO TLB blocks (in groups of 64) betweeen io_tlb_start and
-- * io_tlb_end. This is command line adjustable via setup_io_tlb_npages.
-- */
--static unsigned long io_tlb_nslabs;
--
--/*
-- * When the IOMMU overflows we return a fallback buffer. This sets the size.
-- */
--static unsigned long io_tlb_overflow = 32*1024;
--
--void *io_tlb_overflow_buffer;
--
--/*
-- * This is a free list describing the number of free entries available from
-- * each index
-- */
--static unsigned int *io_tlb_list;
--static unsigned int io_tlb_index;
--
--/*
-- * We need to save away the original address corresponding to a mapped entry
-- * for the sync operations.
-- */
--static phys_addr_t *io_tlb_orig_addr;
--
--/*
-- * Protect the above data structures in the map and unmap calls
-- */
--static DEFINE_SPINLOCK(io_tlb_lock);
--
--static int __init
--setup_io_tlb_npages(char *str)
--{
-- if (isdigit(*str)) {
-- io_tlb_nslabs = simple_strtoul(str, &str, 0);
-- /* avoid tail segment of size < IO_TLB_SEGSIZE */
-- io_tlb_nslabs = ALIGN(io_tlb_nslabs, IO_TLB_SEGSIZE);
-- }
-- if (*str == ',')
-- ++str;
-- if (!strcmp(str, "force"))
-- swiotlb_force = 1;
-- return 1;
--}
--__setup("swiotlb=", setup_io_tlb_npages);
--/* make io_tlb_overflow tunable too? */
-
- /* Note that this doesn't work with highmem page */
- static dma_addr_t swiotlb_virt_to_bus(struct device *hwdev,
-@@ -120,390 +13,6 @@ static dma_addr_t swiotlb_virt_to_bus(struct device *hwdev,
- {
- return phys_to_dma(hwdev, virt_to_phys(address));
- }
--
--static void swiotlb_print_info(unsigned long bytes)
--{
-- phys_addr_t pstart, pend;
--
-- pstart = virt_to_phys(io_tlb_start);
-- pend = virt_to_phys(io_tlb_end);
--
-- printk(KERN_INFO "Placing %luMB software IO TLB between %p - %p\n",
-- bytes >> 20, io_tlb_start, io_tlb_end);
-- printk(KERN_INFO "software IO TLB at phys %#llx - %#llx\n",
-- (unsigned long long)pstart,
-- (unsigned long long)pend);
--}
--
--/*
-- * Statically reserve bounce buffer space and initialize bounce buffer data
-- * structures for the software IO TLB used to implement the DMA API.
-- */
--void __init
--swiotlb_init_with_default_size(size_t default_size)
--{
-- unsigned long i, bytes;
--
-- if (!io_tlb_nslabs) {
-- io_tlb_nslabs = (default_size >> IO_TLB_SHIFT);
-- io_tlb_nslabs = ALIGN(io_tlb_nslabs, IO_TLB_SEGSIZE);
-- }
--
-- bytes = io_tlb_nslabs << IO_TLB_SHIFT;
--
-- /*
-- * Get IO TLB memory from the low pages
-- */
-- io_tlb_start = alloc_bootmem_low_pages(bytes);
-- if (!io_tlb_start)
-- panic("Cannot allocate SWIOTLB buffer");
-- io_tlb_end = io_tlb_start + bytes;
--
-- /*
-- * Allocate and initialize the free list array. This array is used
-- * to find contiguous free memory regions of size up to IO_TLB_SEGSIZE
-- * between io_tlb_start and io_tlb_end.
-- */
-- io_tlb_list = alloc_bootmem(io_tlb_nslabs * sizeof(int));
-- for (i = 0; i < io_tlb_nslabs; i++)
-- io_tlb_list[i] = IO_TLB_SEGSIZE - OFFSET(i, IO_TLB_SEGSIZE);
-- io_tlb_index = 0;
-- io_tlb_orig_addr = alloc_bootmem(io_tlb_nslabs * sizeof(phys_addr_t));
--
-- /*
-- * Get the overflow emergency buffer
-- */
-- io_tlb_overflow_buffer = alloc_bootmem_low(io_tlb_overflow);
-- if (!io_tlb_overflow_buffer)
-- panic("Cannot allocate SWIOTLB overflow buffer!\n");
--
-- swiotlb_print_info(bytes);
--}
--
--void __init
--swiotlb_init(void)
--{
-- swiotlb_init_with_default_size(64 * (1<<20)); /* default to 64MB */
--}
--
--/*
-- * Systems with larger DMA zones (those that don't support ISA) can
-- * initialize the swiotlb later using the slab allocator if needed.
-- * This should be just like above, but with some error catching.
-- */
--int
--swiotlb_late_init_with_default_size(size_t default_size)
--{
-- unsigned long i, bytes, req_nslabs = io_tlb_nslabs;
-- unsigned int order;
--
-- if (!io_tlb_nslabs) {
-- io_tlb_nslabs = (default_size >> IO_TLB_SHIFT);
-- io_tlb_nslabs = ALIGN(io_tlb_nslabs, IO_TLB_SEGSIZE);
-- }
--
-- /*
-- * Get IO TLB memory from the low pages
-- */
-- order = get_order(io_tlb_nslabs << IO_TLB_SHIFT);
-- io_tlb_nslabs = SLABS_PER_PAGE << order;
-- bytes = io_tlb_nslabs << IO_TLB_SHIFT;
--
-- while ((SLABS_PER_PAGE << order) > IO_TLB_MIN_SLABS) {
-- io_tlb_start = (void *)__get_free_pages(GFP_DMA | __GFP_NOWARN,
-- order);
-- if (io_tlb_start)
-- break;
-- order--;
-- }
--
-- if (!io_tlb_start)
-- goto cleanup1;
--
-- if (order != get_order(bytes)) {
-- printk(KERN_WARNING "Warning: only able to allocate %ld MB "
-- "for software IO TLB\n", (PAGE_SIZE << order) >> 20);
-- io_tlb_nslabs = SLABS_PER_PAGE << order;
-- bytes = io_tlb_nslabs << IO_TLB_SHIFT;
-- }
-- io_tlb_end = io_tlb_start + bytes;
-- memset(io_tlb_start, 0, bytes);
--
-- /*
-- * Allocate and initialize the free list array. This array is used
-- * to find contiguous free memory regions of size up to IO_TLB_SEGSIZE
-- * between io_tlb_start and io_tlb_end.
-- */
-- io_tlb_list = (unsigned int *)__get_free_pages(GFP_KERNEL,
-- get_order(io_tlb_nslabs * sizeof(int)));
-- if (!io_tlb_list)
-- goto cleanup2;
--
-- for (i = 0; i < io_tlb_nslabs; i++)
-- io_tlb_list[i] = IO_TLB_SEGSIZE - OFFSET(i, IO_TLB_SEGSIZE);
-- io_tlb_index = 0;
--
-- io_tlb_orig_addr = (phys_addr_t *)
-- __get_free_pages(GFP_KERNEL,
-- get_order(io_tlb_nslabs *
-- sizeof(phys_addr_t)));
-- if (!io_tlb_orig_addr)
-- goto cleanup3;
--
-- memset(io_tlb_orig_addr, 0, io_tlb_nslabs * sizeof(phys_addr_t));
--
-- /*
-- * Get the overflow emergency buffer
-- */
-- io_tlb_overflow_buffer = (void *)__get_free_pages(GFP_DMA,
-- get_order(io_tlb_overflow));
-- if (!io_tlb_overflow_buffer)
-- goto cleanup4;
--
-- swiotlb_print_info(bytes);
--
-- return 0;
--
--cleanup4:
-- free_pages((unsigned long)io_tlb_orig_addr,
-- get_order(io_tlb_nslabs * sizeof(phys_addr_t)));
-- io_tlb_orig_addr = NULL;
--cleanup3:
-- free_pages((unsigned long)io_tlb_list, get_order(io_tlb_nslabs *
-- sizeof(int)));
-- io_tlb_list = NULL;
--cleanup2:
-- io_tlb_end = NULL;
-- free_pages((unsigned long)io_tlb_start, order);
-- io_tlb_start = NULL;
--cleanup1:
-- io_tlb_nslabs = req_nslabs;
-- return -ENOMEM;
--}
--
--static int is_swiotlb_buffer(phys_addr_t paddr)
--{
-- return paddr >= virt_to_phys(io_tlb_start) &&
-- paddr < virt_to_phys(io_tlb_end);
--}
--
--/*
-- * Bounce: copy the swiotlb buffer back to the original dma location
-- */
--static void swiotlb_bounce(phys_addr_t phys, char *dma_addr, size_t size,
-- enum dma_data_direction dir)
--{
-- unsigned long pfn = PFN_DOWN(phys);
--
-- if (PageHighMem(pfn_to_page(pfn))) {
-- /* The buffer does not have a mapping. Map it in and copy */
-- unsigned int offset = phys & ~PAGE_MASK;
-- char *buffer;
-- unsigned int sz = 0;
-- unsigned long flags;
--
-- while (size) {
-- sz = min_t(size_t, PAGE_SIZE - offset, size);
--
-- local_irq_save(flags);
-- buffer = kmap_atomic(pfn_to_page(pfn),
-- KM_BOUNCE_READ);
-- if (dir == DMA_TO_DEVICE)
-- memcpy(dma_addr, buffer + offset, sz);
-- else
-- memcpy(buffer + offset, dma_addr, sz);
-- kunmap_atomic(buffer, KM_BOUNCE_READ);
-- local_irq_restore(flags);
--
-- size -= sz;
-- pfn++;
-- dma_addr += sz;
-- offset = 0;
-- }
-- } else {
-- if (dir == DMA_TO_DEVICE)
-- memcpy(dma_addr, phys_to_virt(phys), size);
-- else
-- memcpy(phys_to_virt(phys), dma_addr, size);
-- }
--}
--
--/*
-- * Allocates bounce buffer and returns its kernel virtual address.
-- */
--static void *
--map_single(struct device *hwdev, phys_addr_t phys, size_t size, int dir)
--{
-- unsigned long flags;
-- char *dma_addr;
-- unsigned int nslots, stride, index, wrap;
-- int i;
-- unsigned long start_dma_addr;
-- unsigned long mask;
-- unsigned long offset_slots;
-- unsigned long max_slots;
--
-- mask = dma_get_seg_boundary(hwdev);
-- start_dma_addr = swiotlb_virt_to_bus(hwdev, io_tlb_start) & mask;
--
-- offset_slots = ALIGN(start_dma_addr, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT;
--
-- /*
-- * Carefully handle integer overflow which can occur when mask == ~0UL.
-- */
-- max_slots = mask + 1
-- ? ALIGN(mask + 1, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT
-- : 1UL << (BITS_PER_LONG - IO_TLB_SHIFT);
--
-- /*
-- * For mappings greater than a page, we limit the stride (and
-- * hence alignment) to a page size.
-- */
-- nslots = ALIGN(size, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT;
-- if (size > PAGE_SIZE)
-- stride = (1 << (PAGE_SHIFT - IO_TLB_SHIFT));
-- else
-- stride = 1;
--
-- BUG_ON(!nslots);
--
-- /*
-- * Find suitable number of IO TLB entries size that will fit this
-- * request and allocate a buffer from that IO TLB pool.
-- */
-- spin_lock_irqsave(&io_tlb_lock, flags);
-- index = ALIGN(io_tlb_index, stride);
-- if (index >= io_tlb_nslabs)
-- index = 0;
-- wrap = index;
--
-- do {
-- while (iommu_is_span_boundary(index, nslots, offset_slots,
-- max_slots)) {
-- index += stride;
-- if (index >= io_tlb_nslabs)
-- index = 0;
-- if (index == wrap)
-- goto not_found;
-- }
--
-- /*
-- * If we find a slot that indicates we have 'nslots' number of
-- * contiguous buffers, we allocate the buffers from that slot
-- * and mark the entries as '0' indicating unavailable.
-- */
-- if (io_tlb_list[index] >= nslots) {
-- int count = 0;
--
-- for (i = index; i < (int) (index + nslots); i++)
-- io_tlb_list[i] = 0;
-- for (i = index - 1; (OFFSET(i, IO_TLB_SEGSIZE) != IO_TLB_SEGSIZE - 1) && io_tlb_list[i]; i--)
-- io_tlb_list[i] = ++count;
-- dma_addr = io_tlb_start + (index << IO_TLB_SHIFT);
--
-- /*
-- * Update the indices to avoid searching in the next
-- * round.
-- */
-- io_tlb_index = ((index + nslots) < io_tlb_nslabs
-- ? (index + nslots) : 0);
--
-- goto found;
-- }
-- index += stride;
-- if (index >= io_tlb_nslabs)
-- index = 0;
-- } while (index != wrap);
--
--not_found:
-- spin_unlock_irqrestore(&io_tlb_lock, flags);
-- return NULL;
--found:
-- spin_unlock_irqrestore(&io_tlb_lock, flags);
--
-- /*
-- * Save away the mapping from the original address to the DMA address.
-- * This is needed when we sync the memory. Then we sync the buffer if
-- * needed.
-- */
-- for (i = 0; i < nslots; i++)
-- io_tlb_orig_addr[index+i] = phys + (i << IO_TLB_SHIFT);
-- if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL)
-- swiotlb_bounce(phys, dma_addr, size, DMA_TO_DEVICE);
--
-- return dma_addr;
--}
--
--/*
-- * dma_addr is the kernel virtual address of the bounce buffer to unmap.
-- */
--static void
--do_unmap_single(struct device *hwdev, char *dma_addr, size_t size, int dir)
--{
-- unsigned long flags;
-- int i, count, nslots = ALIGN(size, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT;
-- int index = (dma_addr - io_tlb_start) >> IO_TLB_SHIFT;
-- phys_addr_t phys = io_tlb_orig_addr[index];
--
-- /*
-- * First, sync the memory before unmapping the entry
-- */
-- if (phys && ((dir == DMA_FROM_DEVICE) || (dir == DMA_BIDIRECTIONAL)))
-- swiotlb_bounce(phys, dma_addr, size, DMA_FROM_DEVICE);
--
-- /*
-- * Return the buffer to the free list by setting the corresponding
-- * entries to indicate the number of contigous entries available.
-- * While returning the entries to the free list, we merge the entries
-- * with slots below and above the pool being returned.
-- */
-- spin_lock_irqsave(&io_tlb_lock, flags);
-- {
-- count = ((index + nslots) < ALIGN(index + 1, IO_TLB_SEGSIZE) ?
-- io_tlb_list[index + nslots] : 0);
-- /*
-- * Step 1: return the slots to the free list, merging the
-- * slots with superceeding slots
-- */
-- for (i = index + nslots - 1; i >= index; i--)
-- io_tlb_list[i] = ++count;
-- /*
-- * Step 2: merge the returned slots with the preceding slots,
-- * if available (non zero)
-- */
-- for (i = index - 1; (OFFSET(i, IO_TLB_SEGSIZE) != IO_TLB_SEGSIZE -1) && io_tlb_list[i]; i--)
-- io_tlb_list[i] = ++count;
-- }
-- spin_unlock_irqrestore(&io_tlb_lock, flags);
--}
--
--static void
--sync_single(struct device *hwdev, char *dma_addr, size_t size,
-- int dir, int target)
--{
-- int index = (dma_addr - io_tlb_start) >> IO_TLB_SHIFT;
-- phys_addr_t phys = io_tlb_orig_addr[index];
--
-- phys += ((unsigned long)dma_addr & ((1 << IO_TLB_SHIFT) - 1));
--
-- switch (target) {
-- case SYNC_FOR_CPU:
-- if (likely(dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL))
-- swiotlb_bounce(phys, dma_addr, size, DMA_FROM_DEVICE);
-- else
-- BUG_ON(dir != DMA_TO_DEVICE);
-- break;
-- case SYNC_FOR_DEVICE:
-- if (likely(dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL))
-- swiotlb_bounce(phys, dma_addr, size, DMA_TO_DEVICE);
-- else
-- BUG_ON(dir != DMA_FROM_DEVICE);
-- break;
-- default:
-- BUG();
-- }
--}
--
- void *
- swiotlb_alloc_coherent(struct device *hwdev, size_t size,
- dma_addr_t *dma_handle, gfp_t flags)
-@@ -512,12 +21,13 @@ swiotlb_alloc_coherent(struct device *hwdev, size_t size,
- void *ret;
- int order = get_order(size);
- u64 dma_mask = DMA_BIT_MASK(32);
-+ unsigned long start_dma_addr;
-
- if (hwdev && hwdev->coherent_dma_mask)
- dma_mask = hwdev->coherent_dma_mask;
-
- ret = (void *)__get_free_pages(flags, order);
-- if (ret && swiotlb_virt_to_bus(hwdev, ret) + size > dma_mask) {
-+ if (ret && swiotlb_virt_to_bus(hwdev, ret) + size - 1 > dma_mask) {
- /*
- * The allocated memory isn't reachable by the device.
- */
-@@ -527,10 +37,12 @@ swiotlb_alloc_coherent(struct device *hwdev, size_t size,
- if (!ret) {
- /*
- * We are either out of memory or the device can't DMA
-- * to GFP_DMA memory; fall back on map_single(), which
-+ * to GFP_DMA memory; fall back on do_map_single(), which
- * will grab memory from the lowest available address range.
- */
-- ret = map_single(hwdev, 0, size, DMA_FROM_DEVICE);
-+ start_dma_addr = swiotlb_virt_to_bus(hwdev, io_tlb_start);
-+ ret = do_map_single(hwdev, 0, start_dma_addr, size,
-+ DMA_FROM_DEVICE);
- if (!ret)
- return NULL;
- }
-@@ -539,12 +51,13 @@ swiotlb_alloc_coherent(struct device *hwdev, size_t size,
- dev_addr = swiotlb_virt_to_bus(hwdev, ret);
-
- /* Confirm address can be DMA'd by device */
-- if (dev_addr + size > dma_mask) {
-- printk("hwdev DMA mask = 0x%016Lx, dev_addr = 0x%016Lx\n",
-+ if (dev_addr + size - 1 > dma_mask) {
-+ dev_err(hwdev, "DMA: hwdev DMA mask = 0x%016Lx, " \
-+ "dev_addr = 0x%016Lx\n",
- (unsigned long long)dma_mask,
- (unsigned long long)dev_addr);
-
-- /* DMA_TO_DEVICE to avoid memcpy in unmap_single */
-+ /* DMA_TO_DEVICE to avoid memcpy in do_unmap_single */
- do_unmap_single(hwdev, ret, size, DMA_TO_DEVICE);
- return NULL;
- }
-@@ -563,35 +76,11 @@ swiotlb_free_coherent(struct device *hwdev, size_t size, void *vaddr,
- if (!is_swiotlb_buffer(paddr))
- free_pages((unsigned long)vaddr, get_order(size));
- else
-- /* DMA_TO_DEVICE to avoid memcpy in unmap_single */
-+ /* DMA_TO_DEVICE to avoid memcpy in do_unmap_single */
- do_unmap_single(hwdev, vaddr, size, DMA_TO_DEVICE);
- }
- EXPORT_SYMBOL(swiotlb_free_coherent);
-
--static void
--swiotlb_full(struct device *dev, size_t size, int dir, int do_panic)
--{
-- /*
-- * Ran out of IOMMU space for this operation. This is very bad.
-- * Unfortunately the drivers cannot handle this operation properly.
-- * unless they check for dma_mapping_error (most don't)
-- * When the mapping is small enough return a static buffer to limit
-- * the damage, or panic when the transfer is too big.
-- */
-- printk(KERN_ERR "DMA: Out of SW-IOMMU space for %zu bytes at "
-- "device %s\n", size, dev ? dev_name(dev) : "?");
--
-- if (size <= io_tlb_overflow || !do_panic)
-- return;
--
-- if (dir == DMA_BIDIRECTIONAL)
-- panic("DMA: Random memory could be DMA accessed\n");
-- if (dir == DMA_FROM_DEVICE)
-- panic("DMA: Random memory could be DMA written\n");
-- if (dir == DMA_TO_DEVICE)
-- panic("DMA: Random memory could be DMA read\n");
--}
--
- /*
- * Map a single buffer of the indicated size for DMA in streaming mode. The
- * physical address to use is returned.
-@@ -604,6 +93,7 @@ dma_addr_t swiotlb_map_page(struct device *dev, struct page *page,
- enum dma_data_direction dir,
- struct dma_attrs *attrs)
- {
-+ unsigned long start_dma_addr;
- phys_addr_t phys = page_to_phys(page) + offset;
- dma_addr_t dev_addr = phys_to_dma(dev, phys);
- void *map;
-@@ -620,7 +110,8 @@ dma_addr_t swiotlb_map_page(struct device *dev, struct page *page,
- /*
- * Oh well, have to allocate and map a bounce buffer.
- */
-- map = map_single(dev, phys, size, dir);
-+ start_dma_addr = swiotlb_virt_to_bus(dev, io_tlb_start);
-+ map = do_map_single(dev, phys, start_dma_addr, size, dir);
- if (!map) {
- swiotlb_full(dev, size, dir, 1);
- map = io_tlb_overflow_buffer;
-@@ -632,7 +123,7 @@ dma_addr_t swiotlb_map_page(struct device *dev, struct page *page,
- * Ensure that the address returned is DMA'ble
- */
- if (!dma_capable(dev, dev_addr, size))
-- panic("map_single: bounce buffer is not DMA'ble");
-+ panic("DMA: swiotlb_map_single: bounce buffer is not DMA'ble");
-
- return dev_addr;
- }
-@@ -697,7 +188,7 @@ swiotlb_sync_single(struct device *hwdev, dma_addr_t dev_addr,
- BUG_ON(dir == DMA_NONE);
-
- if (is_swiotlb_buffer(paddr)) {
-- sync_single(hwdev, phys_to_virt(paddr), size, dir, target);
-+ do_sync_single(hwdev, phys_to_virt(paddr), size, dir, target);
- return;
- }
-
-@@ -774,19 +265,22 @@ int
- swiotlb_map_sg_attrs(struct device *hwdev, struct scatterlist *sgl, int nelems,
- enum dma_data_direction dir, struct dma_attrs *attrs)
- {
-+ unsigned long start_dma_addr;
- struct scatterlist *sg;
- int i;
-
- BUG_ON(dir == DMA_NONE);
-
-+ start_dma_addr = swiotlb_virt_to_bus(hwdev, io_tlb_start);
- for_each_sg(sgl, sg, nelems, i) {
- phys_addr_t paddr = sg_phys(sg);
- dma_addr_t dev_addr = phys_to_dma(hwdev, paddr);
-
- if (swiotlb_force ||
- !dma_capable(hwdev, dev_addr, sg->length)) {
-- void *map = map_single(hwdev, sg_phys(sg),
-- sg->length, dir);
-+ void *map = do_map_single(hwdev, sg_phys(sg),
-+ start_dma_addr,
-+ sg->length, dir);
- if (!map) {
- /* Don't panic here, we expect map_sg users
- to do proper error handling. */
-@@ -819,7 +313,8 @@ EXPORT_SYMBOL(swiotlb_map_sg);
- */
- void
- swiotlb_unmap_sg_attrs(struct device *hwdev, struct scatterlist *sgl,
-- int nelems, enum dma_data_direction dir, struct dma_attrs *attrs)
-+ int nelems, enum dma_data_direction dir,
-+ struct dma_attrs *attrs)
- {
- struct scatterlist *sg;
- int i;
-diff --git a/mm/bootmem.c b/mm/bootmem.c
-index 555d5d2..d1dc23c 100644
---- a/mm/bootmem.c
-+++ b/mm/bootmem.c
-@@ -143,6 +143,30 @@ unsigned long __init init_bootmem(unsigned long start, unsigned long pages)
- return init_bootmem_core(NODE_DATA(0)->bdata, start, 0, pages);
- }
-
-+/*
-+ * free_bootmem_late - free bootmem pages directly to page allocator
-+ * @addr: starting address of the range
-+ * @size: size of the range in bytes
-+ *
-+ * This is only useful when the bootmem allocator has already been torn
-+ * down, but we are still initializing the system. Pages are given directly
-+ * to the page allocator, no bootmem metadata is updated because it is gone.
-+ */
-+void __init free_bootmem_late(unsigned long addr, unsigned long size)
-+{
-+ unsigned long cursor, end;
-+
-+ kmemleak_free_part(__va(addr), size);
-+
-+ cursor = PFN_UP(addr);
-+ end = PFN_DOWN(addr + size);
-+
-+ for (; cursor < end; cursor++) {
-+ __free_pages_bootmem(pfn_to_page(cursor), 0);
-+ totalram_pages++;
-+ }
-+}
-+
- static unsigned long __init free_all_bootmem_core(bootmem_data_t *bdata)
- {
- int aligned;
-diff --git a/mm/memory.c b/mm/memory.c
-index 53c1da0..c8741df 100644
---- a/mm/memory.c
-+++ b/mm/memory.c
-@@ -553,6 +553,13 @@ struct page *vm_normal_page(struct vm_area_struct *vma, unsigned long addr,
- if (is_zero_pfn(pfn))
- return NULL;
- check_pfn:
-+
-+#if defined(CONFIG_XEN) && defined(CONFIG_X86)
-+ /* XEN: Covers user-space grant mappings (even of local pages). */
-+ if (unlikely(vma->vm_flags & VM_FOREIGN))
-+ return NULL;
-+#endif
-+
- if (unlikely(pfn > highest_memmap_pfn)) {
- print_bad_pte(vma, addr, pte, NULL);
- return NULL;
-@@ -839,8 +846,12 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb,
- page->index > details->last_index))
- continue;
- }
-- ptent = ptep_get_and_clear_full(mm, addr, pte,
-- tlb->fullmm);
-+ if (unlikely(vma->vm_ops && vma->vm_ops->zap_pte))
-+ ptent = vma->vm_ops->zap_pte(vma, addr, pte,
-+ tlb->fullmm);
-+ else
-+ ptent = ptep_get_and_clear_full(mm, addr, pte,
-+ tlb->fullmm);
- tlb_remove_tlb_entry(tlb, pte, addr);
- if (unlikely(!page))
- continue;
-@@ -1100,6 +1111,7 @@ unsigned long zap_page_range(struct vm_area_struct *vma, unsigned long address,
- tlb_finish_mmu(tlb, address, end);
- return end;
- }
-+EXPORT_SYMBOL_GPL(zap_page_range);
-
- /**
- * zap_vma_ptes - remove ptes mapping the vma
-@@ -1306,6 +1318,29 @@ int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
- continue;
- }
-
-+#ifdef CONFIG_XEN
-+ if (vma && (vma->vm_flags & VM_FOREIGN)) {
-+ struct vm_foreign_map *foreign_map =
-+ vma->vm_private_data;
-+ struct page **map = foreign_map->map;
-+ int offset = (start - vma->vm_start) >> PAGE_SHIFT;
-+ if (map[offset] != NULL) {
-+ if (pages) {
-+ struct page *page = map[offset];
-+
-+ pages[i] = page;
-+ get_page(page);
-+ }
-+ if (vmas)
-+ vmas[i] = vma;
-+ i++;
-+ start += PAGE_SIZE;
-+ nr_pages--;
-+ continue;
-+ }
-+ }
-+#endif
-+
- if (!vma ||
- (vma->vm_flags & (VM_IO | VM_PFNMAP)) ||
- !(vm_flags & vma->vm_flags))
-@@ -1781,6 +1816,10 @@ int remap_pfn_range(struct vm_area_struct *vma, unsigned long addr,
-
- vma->vm_flags |= VM_IO | VM_RESERVED | VM_PFNMAP;
-
-+#ifdef CONFIG_XEN
-+ vma->vm_mm->context.has_foreign_mappings = 1;
-+#endif
-+
- err = track_pfn_vma_new(vma, &prot, pfn, PAGE_ALIGN(size));
- if (err) {
- /*
-@@ -1896,11 +1935,10 @@ int apply_to_page_range(struct mm_struct *mm, unsigned long addr,
- {
- pgd_t *pgd;
- unsigned long next;
-- unsigned long start = addr, end = addr + size;
-+ unsigned long end = addr + size;
- int err;
-
- BUG_ON(addr >= end);
-- mmu_notifier_invalidate_range_start(mm, start, end);
- pgd = pgd_offset(mm, addr);
- do {
- next = pgd_addr_end(addr, end);
-@@ -1908,7 +1946,7 @@ int apply_to_page_range(struct mm_struct *mm, unsigned long addr,
- if (err)
- break;
- } while (pgd++, addr = next, addr != end);
-- mmu_notifier_invalidate_range_end(mm, start, end);
-+
- return err;
- }
- EXPORT_SYMBOL_GPL(apply_to_page_range);
-diff --git a/mm/page_alloc.c b/mm/page_alloc.c
-index 902e5fc..101715c 100644
---- a/mm/page_alloc.c
-+++ b/mm/page_alloc.c
-@@ -594,6 +594,13 @@ static void __free_pages_ok(struct page *page, unsigned int order)
- if (bad)
- return;
-
-+#ifdef CONFIG_XEN
-+ if (PageForeign(page)) {
-+ PageForeignDestructor(page, order);
-+ return;
-+ }
-+#endif
-+
- if (!PageHighMem(page)) {
- debug_check_no_locks_freed(page_address(page),PAGE_SIZE<<order);
- debug_check_no_obj_freed(page_address(page),
-@@ -1088,6 +1095,13 @@ static void free_hot_cold_page(struct page *page, int cold)
-
- kmemcheck_free_shadow(page, 0);
-
-+#ifdef CONFIG_XEN
-+ if (PageForeign(page)) {
-+ PageForeignDestructor(page, 0);
-+ return;
-+ }
-+#endif
-+
- if (PageAnon(page))
- page->mapping = NULL;
- if (free_pages_check(page))
-diff --git a/mm/vmalloc.c b/mm/vmalloc.c
-index 680dcbb..4f701c2 100644
---- a/mm/vmalloc.c
-+++ b/mm/vmalloc.c
-@@ -31,6 +31,7 @@
- #include <asm/tlbflush.h>
- #include <asm/shmparam.h>
-
-+bool vmap_lazy_unmap __read_mostly = true;
-
- /*** Page table manipulation functions ***/
-
-@@ -502,6 +503,9 @@ static unsigned long lazy_max_pages(void)
- {
- unsigned int log;
-
-+ if (!vmap_lazy_unmap)
-+ return 0;
-+
- log = fls(num_online_cpus());
-
- return log * (32UL * 1024 * 1024 / PAGE_SIZE);
-@@ -570,8 +574,9 @@ static void __purge_vmap_area_lazy(unsigned long *start, unsigned long *end,
- }
- rcu_read_unlock();
-
-- if (nr)
-+ if (nr) {
- atomic_sub(nr, &vmap_lazy_nr);
-+ }
-
- if (nr || force_flush)
- flush_tlb_kernel_range(*start, *end);
-diff --git a/net/core/ethtool.c b/net/core/ethtool.c
-index abbe8fa..e661dd7 100644
---- a/net/core/ethtool.c
-+++ b/net/core/ethtool.c
-@@ -179,14 +179,24 @@ static int ethtool_get_drvinfo(struct net_device *dev, void __user *useraddr)
- struct ethtool_drvinfo info;
- const struct ethtool_ops *ops = dev->ethtool_ops;
-
-- if (!ops->get_drvinfo)
-- return -EOPNOTSUPP;
--
- memset(&info, 0, sizeof(info));
- info.cmd = ETHTOOL_GDRVINFO;
-- ops->get_drvinfo(dev, &info);
-+ if (ops && ops->get_drvinfo) {
-+ ops->get_drvinfo(dev, &info);
-+ } else if (dev->dev.parent && dev->dev.parent->driver) {
-+ strlcpy(info.bus_info, dev_name(dev->dev.parent),
-+ sizeof(info.bus_info));
-+ strlcpy(info.driver, dev->dev.parent->driver->name,
-+ sizeof(info.driver));
-+ } else {
-+ return -EOPNOTSUPP;
-+ }
-
-- if (ops->get_sset_count) {
-+ /*
-+ * this method of obtaining string set info is deprecated;
-+ * Use ETHTOOL_GSSET_INFO instead.
-+ */
-+ if (ops && ops->get_sset_count) {
- int rc;
-
- rc = ops->get_sset_count(dev, ETH_SS_TEST);
-@@ -201,14 +211,14 @@ static int ethtool_get_drvinfo(struct net_device *dev, void __user *useraddr)
- } else {
- /* code path for obsolete hooks */
-
-- if (ops->self_test_count)
-+ if (ops && ops->self_test_count)
- info.testinfo_len = ops->self_test_count(dev);
-- if (ops->get_stats_count)
-+ if (ops && ops->get_stats_count)
- info.n_stats = ops->get_stats_count(dev);
- }
-- if (ops->get_regs_len)
-+ if (ops && ops->get_regs_len)
- info.regdump_len = ops->get_regs_len(dev);
-- if (ops->get_eeprom_len)
-+ if (ops && ops->get_eeprom_len)
- info.eedump_len = ops->get_eeprom_len(dev);
-
- if (copy_to_user(useraddr, &info, sizeof(info)))
-@@ -945,12 +955,19 @@ int dev_ethtool(struct net *net, struct ifreq *ifr)
- if (!dev || !netif_device_present(dev))
- return -ENODEV;
-
-- if (!dev->ethtool_ops)
-- return -EOPNOTSUPP;
--
-- if (copy_from_user(&ethcmd, useraddr, sizeof (ethcmd)))
-+ if (copy_from_user(&ethcmd, useraddr, sizeof(ethcmd)))
- return -EFAULT;
-
-+ if (!dev->ethtool_ops) {
-+ /* ETHTOOL_GDRVINFO does not require any driver support.
-+ * It is also unprivileged and does not change anything,
-+ * so we can take a shortcut to it. */
-+ if (ethcmd == ETHTOOL_GDRVINFO)
-+ return ethtool_get_drvinfo(dev, useraddr);
-+ else
-+ return -EOPNOTSUPP;
-+ }
-+
- /* Allow some commands to be done by anyone */
- switch(ethcmd) {
- case ETHTOOL_GDRVINFO:
-diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
-index d4fd895..4ab8c97 100644
---- a/net/core/rtnetlink.c
-+++ b/net/core/rtnetlink.c
-@@ -35,6 +35,7 @@
- #include <linux/security.h>
- #include <linux/mutex.h>
- #include <linux/if_addr.h>
-+#include <linux/pci.h>
-
- #include <asm/uaccess.h>
- #include <asm/system.h>
-@@ -582,6 +583,22 @@ static void copy_rtnl_link_stats(struct rtnl_link_stats *a,
- a->tx_compressed = b->tx_compressed;
- };
-
-+/* All VF info */
-+static inline int rtnl_vfinfo_size(const struct net_device *dev)
-+{
-+ if (dev->dev.parent && dev_is_pci(dev->dev.parent)) {
-+
-+ int num_vfs = dev_num_vf(dev->dev.parent);
-+ size_t size = nlmsg_total_size(sizeof(struct nlattr));
-+ size += nlmsg_total_size(num_vfs * sizeof(struct nlattr));
-+ size += num_vfs * (sizeof(struct ifla_vf_mac) +
-+ sizeof(struct ifla_vf_vlan) +
-+ sizeof(struct ifla_vf_tx_rate));
-+ return size;
-+ } else
-+ return 0;
-+}
-+
- static inline size_t if_nlmsg_size(const struct net_device *dev)
- {
- return NLMSG_ALIGN(sizeof(struct ifinfomsg))
-@@ -599,6 +616,8 @@ static inline size_t if_nlmsg_size(const struct net_device *dev)
- + nla_total_size(4) /* IFLA_MASTER */
- + nla_total_size(1) /* IFLA_OPERSTATE */
- + nla_total_size(1) /* IFLA_LINKMODE */
-+ + nla_total_size(4) /* IFLA_NUM_VF */
-+ + rtnl_vfinfo_size(dev) /* IFLA_VFINFO_LIST */
- + rtnl_link_get_size(dev); /* IFLA_LINKINFO */
- }
-
-@@ -667,6 +686,40 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
- stats = dev_get_stats(dev);
- copy_rtnl_link_stats(nla_data(attr), stats);
-
-+ if (dev->netdev_ops->ndo_get_vf_config && dev->dev.parent) {
-+ int i;
-+
-+ struct nlattr *vfinfo, *vf;
-+ int num_vfs = dev_num_vf(dev->dev.parent);
-+
-+ NLA_PUT_U32(skb, IFLA_NUM_VF, num_vfs);
-+ vfinfo = nla_nest_start(skb, IFLA_VFINFO_LIST);
-+ if (!vfinfo)
-+ goto nla_put_failure;
-+ for (i = 0; i < num_vfs; i++) {
-+ struct ifla_vf_info ivi;
-+ struct ifla_vf_mac vf_mac;
-+ struct ifla_vf_vlan vf_vlan;
-+ struct ifla_vf_tx_rate vf_tx_rate;
-+ if (dev->netdev_ops->ndo_get_vf_config(dev, i, &ivi))
-+ break;
-+ vf_mac.vf = vf_vlan.vf = vf_tx_rate.vf = ivi.vf;
-+ memcpy(vf_mac.mac, ivi.mac, sizeof(ivi.mac));
-+ vf_vlan.vlan = ivi.vlan;
-+ vf_vlan.qos = ivi.qos;
-+ vf_tx_rate.rate = ivi.tx_rate;
-+ vf = nla_nest_start(skb, IFLA_VF_INFO);
-+ if (!vf) {
-+ nla_nest_cancel(skb, vfinfo);
-+ goto nla_put_failure;
-+ }
-+ NLA_PUT(skb, IFLA_VF_MAC, sizeof(vf_mac), &vf_mac);
-+ NLA_PUT(skb, IFLA_VF_VLAN, sizeof(vf_vlan), &vf_vlan);
-+ NLA_PUT(skb, IFLA_VF_TX_RATE, sizeof(vf_tx_rate), &vf_tx_rate);
-+ nla_nest_end(skb, vf);
-+ }
-+ nla_nest_end(skb, vfinfo);
-+ }
- if (dev->rtnl_link_ops) {
- if (rtnl_link_fill(skb, dev) < 0)
- goto nla_put_failure;
-@@ -716,6 +769,7 @@ const struct nla_policy ifla_policy[IFLA_MAX+1] = {
- [IFLA_LINKINFO] = { .type = NLA_NESTED },
- [IFLA_NET_NS_PID] = { .type = NLA_U32 },
- [IFLA_IFALIAS] = { .type = NLA_STRING, .len = IFALIASZ-1 },
-+ [IFLA_VFINFO_LIST] = {. type = NLA_NESTED },
- };
-
- static const struct nla_policy ifla_info_policy[IFLA_INFO_MAX+1] = {
-@@ -723,6 +777,33 @@ static const struct nla_policy ifla_info_policy[IFLA_INFO_MAX+1] = {
- [IFLA_INFO_DATA] = { .type = NLA_NESTED },
- };
-
-+static const struct nla_policy ifla_vfinfo_policy[IFLA_VF_INFO_MAX+1] = {
-+ [IFLA_VF_INFO] = { .type = NLA_NESTED },
-+};
-+
-+static const struct nla_policy ifla_vf_policy[IFLA_VF_MAX+1] = {
-+ [IFLA_VF_MAC] = { .type = NLA_BINARY,
-+ .len = sizeof(struct ifla_vf_mac) },
-+ [IFLA_VF_VLAN] = { .type = NLA_BINARY,
-+ .len = sizeof(struct ifla_vf_vlan) },
-+ [IFLA_VF_TX_RATE] = { .type = NLA_BINARY,
-+ .len = sizeof(struct ifla_vf_tx_rate) },
-+};
-+
-+struct net *rtnl_link_get_net(struct net *src_net, struct nlattr *tb[])
-+{
-+ struct net *net;
-+ /* Examine the link attributes and figure out which
-+ * network namespace we are talking about.
-+ */
-+ if (tb[IFLA_NET_NS_PID])
-+ net = get_net_ns_by_pid(nla_get_u32(tb[IFLA_NET_NS_PID]));
-+ else
-+ net = get_net(src_net);
-+ return net;
-+}
-+EXPORT_SYMBOL(rtnl_link_get_net);
-+
- static int validate_linkmsg(struct net_device *dev, struct nlattr *tb[])
- {
- if (dev) {
-@@ -738,6 +819,52 @@ static int validate_linkmsg(struct net_device *dev, struct nlattr *tb[])
- return 0;
- }
-
-+static int do_setvfinfo(struct net_device *dev, struct nlattr *attr)
-+{
-+ int rem, err = -EINVAL;
-+ struct nlattr *vf;
-+ const struct net_device_ops *ops = dev->netdev_ops;
-+
-+ nla_for_each_nested(vf, attr, rem) {
-+ switch (nla_type(vf)) {
-+ case IFLA_VF_MAC: {
-+ struct ifla_vf_mac *ivm;
-+ ivm = nla_data(vf);
-+ err = -EOPNOTSUPP;
-+ if (ops->ndo_set_vf_mac)
-+ err = ops->ndo_set_vf_mac(dev, ivm->vf,
-+ ivm->mac);
-+ break;
-+ }
-+ case IFLA_VF_VLAN: {
-+ struct ifla_vf_vlan *ivv;
-+ ivv = nla_data(vf);
-+ err = -EOPNOTSUPP;
-+ if (ops->ndo_set_vf_vlan)
-+ err = ops->ndo_set_vf_vlan(dev, ivv->vf,
-+ ivv->vlan,
-+ ivv->qos);
-+ break;
-+ }
-+ case IFLA_VF_TX_RATE: {
-+ struct ifla_vf_tx_rate *ivt;
-+ ivt = nla_data(vf);
-+ err = -EOPNOTSUPP;
-+ if (ops->ndo_set_vf_tx_rate)
-+ err = ops->ndo_set_vf_tx_rate(dev, ivt->vf,
-+ ivt->rate);
-+ break;
-+ }
-+ default:
-+ err = -EINVAL;
-+ break;
-+ }
-+ if (err)
-+ break;
-+ }
-+ return err;
-+}
-+
- static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm,
- struct nlattr **tb, char *ifname, int modified)
- {
-@@ -875,6 +1002,18 @@ static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm,
- write_unlock_bh(&dev_base_lock);
- }
-
-+ if (tb[IFLA_VFINFO_LIST]) {
-+ struct nlattr *attr;
-+ int rem;
-+ nla_for_each_nested(attr, tb[IFLA_VFINFO_LIST], rem) {
-+ if (nla_type(attr) != IFLA_VF_INFO)
-+ goto errout;
-+ err = do_setvfinfo(dev, attr);
-+ if (err < 0)
-+ goto errout;
-+ modified = 1;
-+ }
-+ }
- err = 0;
-
- errout:
-diff --git a/net/sched/Kconfig b/net/sched/Kconfig
-index 929218a..956cd0a 100644
---- a/net/sched/Kconfig
-+++ b/net/sched/Kconfig
-@@ -215,6 +215,26 @@ config NET_SCH_INGRESS
- To compile this code as a module, choose M here: the
- module will be called sch_ingress.
-
-+config NET_SCH_PLUG
-+ tristate "Plug network traffic until release"
-+ ---help---
-+ Say Y here if you are using this kernel for Xen dom0 and
-+ want to protect Xen guests with Remus.
-+
-+ This queueing discipline is controlled by netlink. When it receives an
-+ enqueue command it inserts a plug into the outbound queue that causes
-+ following packets to enqueue until a dequeue command arrives over
-+ netlink, releasing packets up to the plug for delivery.
-+
-+ Its intention is to support speculative execution by allowing generated
-+ network traffic to be rolled back. It is used to provide network
-+ protection for the Remus high availability project.
-+
-+ If unsure, say N.
-+
-+ To compile this code as a module, choose M here: the
-+ module will be called sch_plug.
-+
- comment "Classification"
-
- config NET_CLS
-diff --git a/net/sched/Makefile b/net/sched/Makefile
-index f14e71b..61ef5f7 100644
---- a/net/sched/Makefile
-+++ b/net/sched/Makefile
-@@ -31,6 +31,7 @@ obj-$(CONFIG_NET_SCH_MULTIQ) += sch_multiq.o
- obj-$(CONFIG_NET_SCH_ATM) += sch_atm.o
- obj-$(CONFIG_NET_SCH_NETEM) += sch_netem.o
- obj-$(CONFIG_NET_SCH_DRR) += sch_drr.o
-+obj-$(CONFIG_NET_SCH_PLUG) += sch_plug.o
- obj-$(CONFIG_NET_CLS_U32) += cls_u32.o
- obj-$(CONFIG_NET_CLS_ROUTE4) += cls_route.o
- obj-$(CONFIG_NET_CLS_FW) += cls_fw.o
-diff --git a/net/sched/sch_plug.c b/net/sched/sch_plug.c
-new file mode 100644
-index 0000000..86c3ee1
---- /dev/null
-+++ b/net/sched/sch_plug.c
-@@ -0,0 +1,156 @@
-+/*
-+ * sch_plug.c Queue traffic until an explicit release command
-+ *
-+ * This program is free software; you can redistribute it and/or
-+ * modify it under the terms of the GNU General Public License
-+ * as published by the Free Software Foundation; either version
-+ * 2 of the License, or (at your option) any later version.
-+ *
-+ * The operation of the buffer is as follows:
-+ * When a checkpoint begins, a plug is inserted into the
-+ * network queue by a netlink request (it operates by storing
-+ * a pointer to the next packet which arrives and blocking dequeue
-+ * when that packet is at the head of the queue).
-+ * When a checkpoint completes (the backup acknowledges receipt),
-+ * currently-queued packets are released.
-+ * So it supports two operations, plug and unplug.
-+ */
-+
-+#include <linux/module.h>
-+#include <linux/types.h>
-+#include <linux/kernel.h>
-+#include <linux/errno.h>
-+#include <linux/netdevice.h>
-+#include <linux/skbuff.h>
-+#include <net/pkt_sched.h>
-+
-+#define FIFO_BUF (10*1024*1024)
-+
-+#define TCQ_PLUG 0
-+#define TCQ_UNPLUG 1
-+
-+struct plug_sched_data {
-+ /*
-+ * This packet is the first packet which should not be
-+ * delivered. If it is NULL, plug_enqueue will set it to the
-+ * next packet it sees.
-+ */
-+ struct sk_buff *stop;
-+};
-+
-+struct tc_plug_qopt {
-+ /* 0: reset stop packet pointer
-+ * 1: dequeue to stop pointer */
-+ int action;
-+};
-+
-+static int skb_remove_foreign_references(struct sk_buff *skb)
-+{
-+ return !skb_linearize(skb);
-+}
-+
-+static int plug_enqueue(struct sk_buff *skb, struct Qdisc* sch)
-+{
-+ struct plug_sched_data *q = qdisc_priv(sch);
-+
-+ if (likely(sch->qstats.backlog + skb->len <= FIFO_BUF)) {
-+ if (!q->stop)
-+ q->stop = skb;
-+
-+ if (!skb_remove_foreign_references(skb)) {
-+ printk(KERN_DEBUG "error removing foreign ref\n");
-+ return qdisc_reshape_fail(skb, sch);
-+ }
-+
-+ return qdisc_enqueue_tail(skb, sch);
-+ }
-+ printk(KERN_WARNING "queue reported full: %d,%d\n",
-+ sch->qstats.backlog, skb->len);
-+
-+ return qdisc_reshape_fail(skb, sch);
-+}
-+
-+/* dequeue doesn't actually dequeue until the release command is
-+ * received. */
-+static struct sk_buff *plug_dequeue(struct Qdisc* sch)
-+{
-+ struct plug_sched_data *q = qdisc_priv(sch);
-+ struct sk_buff *peek;
-+
-+ if (sch->flags & TCQ_F_THROTTLED)
-+ return NULL;
-+
-+ peek = (struct sk_buff *)((sch->q).next);
-+
-+ /* this pointer comparison may be shady */
-+ if (peek == q->stop) {
-+ /*
-+ * This is the tail of the last round. Release it and
-+ * block the queue
-+ */
-+ sch->flags |= TCQ_F_THROTTLED;
-+ return NULL;
-+ }
-+
-+ return qdisc_dequeue_head(sch);
-+}
-+
-+static int plug_init(struct Qdisc *sch, struct nlattr *opt)
-+{
-+ sch->flags |= TCQ_F_THROTTLED;
-+
-+ return 0;
-+}
-+
-+/*
-+ * receives two messages:
-+ * 0: checkpoint queue (set stop to next packet)
-+ * 1: dequeue until stop
-+ */
-+static int plug_change(struct Qdisc *sch, struct nlattr *opt)
-+{
-+ struct plug_sched_data *q = qdisc_priv(sch);
-+ struct tc_plug_qopt *msg;
-+
-+ if (!opt || nla_len(opt) < sizeof(*msg))
-+ return -EINVAL;
-+
-+ msg = nla_data(opt);
-+
-+ if (msg->action == TCQ_PLUG) {
-+ /* reset stop */
-+ q->stop = NULL;
-+ } else if (msg->action == TCQ_UNPLUG) {
-+ /* dequeue */
-+ sch->flags &= ~TCQ_F_THROTTLED;
-+ netif_schedule_queue(sch->dev_queue);
-+ } else {
-+ return -EINVAL;
-+ }
-+
-+ return 0;
-+}
-+
-+struct Qdisc_ops plug_qdisc_ops = {
-+ .id = "plug",
-+ .priv_size = sizeof(struct plug_sched_data),
-+ .enqueue = plug_enqueue,
-+ .dequeue = plug_dequeue,
-+ .peek = qdisc_peek_head,
-+ .init = plug_init,
-+ .change = plug_change,
-+ .owner = THIS_MODULE,
-+};
-+
-+static int __init plug_module_init(void)
-+{
-+ return register_qdisc(&plug_qdisc_ops);
-+}
-+
-+static void __exit plug_module_exit(void)
-+{
-+ unregister_qdisc(&plug_qdisc_ops);
-+}
-+module_init(plug_module_init)
-+module_exit(plug_module_exit)
-+MODULE_LICENSE("GPL");