diff options
-rw-r--r-- | testing/linux-xen0/APKBUILD | 147 | ||||
-rw-r--r-- | testing/linux-xen0/kernelconfig.x86_64 | 4241 | ||||
-rw-r--r-- | testing/linux-xen0/pvops.patch | 37837 |
3 files changed, 0 insertions, 42225 deletions
diff --git a/testing/linux-xen0/APKBUILD b/testing/linux-xen0/APKBUILD deleted file mode 100644 index a08692ad4..000000000 --- a/testing/linux-xen0/APKBUILD +++ /dev/null @@ -1,147 +0,0 @@ -# Maintainer: William Pitcock <nenolod@dereferenced.org> - -_flavor=xen0 -pkgname=linux-${_flavor} -pkgver=2.6.32.28 -_kernver=2.6.32 -pkgrel=1 -pkgdesc="Linux kernel with dom0 support (no grsecurity)" -url=http://grsecurity.net -depends="mkinitfs linux-firmware xen" -makedepends="perl installkernel bash xen" -options="!strip" -_config=${config:-kernelconfig.${CARCH}} -install= -source="ftp://ftp.kernel.org/pub/linux/kernel/v2.6/linux-$_kernver.tar.bz2 - http://www.kernel.org/pub/linux/kernel/v2.6/longterm/v${pkgver%.*}/patch-$pkgver.bz2 - - pvops.patch - - kernelconfig.x86_64" -subpackages="$pkgname-dev linux-firmware:firmware" -arch="x86_64" -license="GPL-2" - -_abi_release=${pkgver}-${_flavor} - -prepare() { - local _patch_failed= - cd "$srcdir"/linux-$_kernver - if [ "$_kernver" != "$pkgver" ]; then - bunzip2 -c < ../patch-$pkgver.bz2 | patch -p1 -N || return 1 - fi - - # first apply patches in specified order - for i in $source; do - case $i in - *.patch) - bn=$(basename $i) - - msg "Applying $bn..." - if ! patch -s -p1 -N -i "$srcdir"/$bn; then - echo $bn >>failed - _patch_failed=1 - fi - ;; - esac - done - - if ! [ -z "$_patch_failed" ]; then - error "The following patches failed:" - cat failed - return 1 - fi - - echo "-xen0" > "$srcdir"/linux-$_kernver/localversion-xen0 - - mkdir -p "$srcdir"/build - cp "$srcdir"/$_config "$srcdir"/build/.config || return 1 - make -C "$srcdir"/linux-$_kernver O="$srcdir"/build HOSTCC="${CC:-gcc}" V=1 \ - silentoldconfig -} - -# this is so we can do: 'abuild menuconfig' to reconfigure kernel -menuconfig() { - cd "$srcdir"/build || return 1 - make menuconfig - cp .config "$startdir"/$_config -} - -build() { - cd "$srcdir"/build - make CC="${CC:-gcc}" \ - KBUILD_BUILD_VERSION="$((pkgrel + 1 ))-Alpine" V=1 \ - || return 1 -} - -package() { - cd "$srcdir"/build - mkdir -p "$pkgdir"/boot "$pkgdir"/lib/modules - make -j1 modules_install firmware_install install \ - INSTALL_MOD_PATH="$pkgdir" \ - INSTALL_PATH="$pkgdir"/boot \ - || return 1 - - rm -f "$pkgdir"/lib/modules/${_abi_release}/build \ - "$pkgdir"/lib/modules/${_abi_release}/source - install -D include/config/kernel.release \ - "$pkgdir"/usr/share/kernel/$_flavor/kernel.release -} - -dev() { - # copy the only the parts that we really need for build 3rd party - # kernel modules and install those as /usr/src/linux-headers, - # simlar to what ubuntu does - # - # this way you dont need to install the 300-400 kernel sources to - # build a tiny kernel module - # - pkgdesc="Headers and script for third party modules for grsec kernel" - local dir="$subpkgdir"/usr/src/linux-headers-${_abi_release} - - # first we import config, run prepare to set up for building - # external modules, and create the scripts - mkdir -p "$dir" - cp "$srcdir"/$_config "$dir"/.config - make -j1 -C "$srcdir"/linux-$_kernver O="$dir" HOSTCC="${CC:-gcc}" \ - silentoldconfig prepare scripts - - # remove the stuff that poits to real sources. we want 3rd party - # modules to believe this is the soruces - rm "$dir"/Makefile "$dir"/source - - # copy the needed stuff from real sources - # - # this is taken from ubuntu kernel build script - # http://kernel.ubuntu.com/git?p=ubuntu/ubuntu-jaunty.git;a=blob;f=debian/rules.d/3-binary-indep.mk;hb=HEAD - cd "$srcdir"/linux-$_kernver - find . -path './include/*' -prune -o -path './scripts/*' -prune \ - -o -type f \( -name 'Makefile*' -o -name 'Kconfig*' \ - -o -name 'Kbuild*' -o -name '*.sh' -o -name '*.pl' \ - -o -name '*.lds' \) | cpio -pdm "$dir" - cp -a drivers/media/dvb/dvb-core/*.h "$dir"/drivers/media/dvb/dvb-core - cp -a drivers/media/video/*.h "$dir"/drivers/media/video - cp -a drivers/media/dvb/frontends/*.h "$dir"/drivers/media/dvb/frontends - cp -a scripts include "$dir" - find $(find arch -name include -type d -print) -type f \ - | cpio -pdm "$dir" - - install -Dm644 "$srcdir"/build/Module.symvers \ - "$dir"/Module.symvers - - mkdir -p "$subpkgdir"/lib/modules/${_abi_release} - ln -sf /usr/src/linux-headers-${_abi_release} \ - "$subpkgdir"/lib/modules/${_abi_release}/build -} - -firmware() { - pkgdesc="Firmware for linux kernel" - replaces="linux-grsec linux-vserver" - mkdir -p "$subpkgdir"/lib - mv "$pkgdir"/lib/firmware "$subpkgdir"/lib/ -} - -md5sums="260551284ac224c3a43c4adac7df4879 linux-2.6.32.tar.bz2 -fc8c36b4638d8384a5d26a50413a1d11 patch-2.6.32.28.bz2 -2c678c4610b9d425fd3791e4ebaa0bdd pvops.patch -22f2c14e9ca592d668fc9aeda989f3e7 kernelconfig.x86_64" diff --git a/testing/linux-xen0/kernelconfig.x86_64 b/testing/linux-xen0/kernelconfig.x86_64 deleted file mode 100644 index 8749f2637..000000000 --- a/testing/linux-xen0/kernelconfig.x86_64 +++ /dev/null @@ -1,4241 +0,0 @@ -# -# Automatically generated make config: don't edit -# Linux kernel version: 2.6.32.27 -# Tue Dec 28 00:27:19 2010 -# -CONFIG_64BIT=y -# CONFIG_X86_32 is not set -CONFIG_X86_64=y -CONFIG_X86=y -CONFIG_OUTPUT_FORMAT="elf64-x86-64" -CONFIG_ARCH_DEFCONFIG="arch/x86/configs/x86_64_defconfig" -CONFIG_GENERIC_TIME=y -CONFIG_GENERIC_CMOS_UPDATE=y -CONFIG_CLOCKSOURCE_WATCHDOG=y -CONFIG_GENERIC_CLOCKEVENTS=y -CONFIG_GENERIC_CLOCKEVENTS_BROADCAST=y -CONFIG_LOCKDEP_SUPPORT=y -CONFIG_STACKTRACE_SUPPORT=y -CONFIG_HAVE_LATENCYTOP_SUPPORT=y -CONFIG_MMU=y -CONFIG_ZONE_DMA=y -CONFIG_GENERIC_ISA_DMA=y -CONFIG_GENERIC_IOMAP=y -CONFIG_GENERIC_BUG=y -CONFIG_GENERIC_BUG_RELATIVE_POINTERS=y -CONFIG_GENERIC_HWEIGHT=y -CONFIG_GENERIC_GPIO=y -CONFIG_ARCH_MAY_HAVE_PC_FDC=y -# CONFIG_RWSEM_GENERIC_SPINLOCK is not set -CONFIG_RWSEM_XCHGADD_ALGORITHM=y -CONFIG_ARCH_HAS_CPU_IDLE_WAIT=y -CONFIG_GENERIC_CALIBRATE_DELAY=y -CONFIG_GENERIC_TIME_VSYSCALL=y -CONFIG_ARCH_HAS_CPU_RELAX=y -CONFIG_ARCH_HAS_DEFAULT_IDLE=y -CONFIG_ARCH_HAS_CACHE_LINE_SIZE=y -CONFIG_HAVE_SETUP_PER_CPU_AREA=y -CONFIG_NEED_PER_CPU_EMBED_FIRST_CHUNK=y -CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK=y -CONFIG_HAVE_CPUMASK_OF_CPU_MAP=y -CONFIG_ARCH_HIBERNATION_POSSIBLE=y -CONFIG_ARCH_SUSPEND_POSSIBLE=y -CONFIG_ZONE_DMA32=y -CONFIG_ARCH_POPULATES_NODE_MAP=y -CONFIG_AUDIT_ARCH=y -CONFIG_ARCH_SUPPORTS_OPTIMIZED_INLINING=y -CONFIG_ARCH_SUPPORTS_DEBUG_PAGEALLOC=y -CONFIG_GENERIC_HARDIRQS=y -CONFIG_GENERIC_HARDIRQS_NO__DO_IRQ=y -CONFIG_GENERIC_IRQ_PROBE=y -CONFIG_GENERIC_PENDING_IRQ=y -CONFIG_USE_GENERIC_SMP_HELPERS=y -CONFIG_X86_64_SMP=y -CONFIG_X86_HT=y -CONFIG_X86_TRAMPOLINE=y -# CONFIG_KTIME_SCALAR is not set -CONFIG_ARCH_CPU_PROBE_RELEASE=y -CONFIG_DEFCONFIG_LIST="/lib/modules/$UNAME_RELEASE/.config" -CONFIG_CONSTRUCTORS=y - -# -# General setup -# -CONFIG_EXPERIMENTAL=y -CONFIG_LOCK_KERNEL=y -CONFIG_INIT_ENV_ARG_LIMIT=32 -CONFIG_LOCALVERSION="" -# CONFIG_LOCALVERSION_AUTO is not set -CONFIG_HAVE_KERNEL_GZIP=y -CONFIG_HAVE_KERNEL_BZIP2=y -CONFIG_HAVE_KERNEL_LZMA=y -CONFIG_KERNEL_GZIP=y -# CONFIG_KERNEL_BZIP2 is not set -# CONFIG_KERNEL_LZMA is not set -CONFIG_SWAP=y -CONFIG_SYSVIPC=y -CONFIG_SYSVIPC_SYSCTL=y -# CONFIG_POSIX_MQUEUE is not set -CONFIG_BSD_PROCESS_ACCT=y -CONFIG_BSD_PROCESS_ACCT_V3=y -# CONFIG_TASKSTATS is not set -# CONFIG_AUDIT is not set - -# -# RCU Subsystem -# -CONFIG_TREE_RCU=y -# CONFIG_TREE_PREEMPT_RCU is not set -# CONFIG_RCU_TRACE is not set -CONFIG_RCU_FANOUT=32 -# CONFIG_RCU_FANOUT_EXACT is not set -# CONFIG_TREE_RCU_TRACE is not set -CONFIG_IKCONFIG=m -CONFIG_IKCONFIG_PROC=y -CONFIG_LOG_BUF_SHIFT=14 -CONFIG_HAVE_UNSTABLE_SCHED_CLOCK=y -# CONFIG_GROUP_SCHED is not set -# CONFIG_CGROUPS is not set -# CONFIG_SYSFS_DEPRECATED_V2 is not set -# CONFIG_RELAY is not set -# CONFIG_NAMESPACES is not set -CONFIG_BLK_DEV_INITRD=y -CONFIG_INITRAMFS_SOURCE="" -CONFIG_RD_GZIP=y -CONFIG_RD_BZIP2=y -CONFIG_RD_LZMA=y -CONFIG_CC_OPTIMIZE_FOR_SIZE=y -CONFIG_SYSCTL=y -CONFIG_ANON_INODES=y -CONFIG_EMBEDDED=y -CONFIG_UID16=y -CONFIG_SYSCTL_SYSCALL=y -# CONFIG_KALLSYMS is not set -CONFIG_HOTPLUG=y -CONFIG_PRINTK=y -CONFIG_BUG=y -CONFIG_ELF_CORE=y -CONFIG_PCSPKR_PLATFORM=y -CONFIG_BASE_FULL=y -CONFIG_FUTEX=y -CONFIG_EPOLL=y -CONFIG_SIGNALFD=y -CONFIG_TIMERFD=y -CONFIG_EVENTFD=y -CONFIG_SHMEM=y -CONFIG_AIO=y -CONFIG_HAVE_PERF_EVENTS=y - -# -# Kernel Performance Events And Counters -# -CONFIG_PERF_EVENTS=y -CONFIG_PERF_COUNTERS=y -CONFIG_VM_EVENT_COUNTERS=y -CONFIG_PCI_QUIRKS=y -# CONFIG_SLUB_DEBUG is not set -# CONFIG_COMPAT_BRK is not set -# CONFIG_SLAB is not set -CONFIG_SLUB=y -# CONFIG_SLOB is not set -CONFIG_PROFILING=y -CONFIG_OPROFILE=m -# CONFIG_OPROFILE_IBS is not set -# CONFIG_OPROFILE_EVENT_MULTIPLEX is not set -CONFIG_HAVE_OPROFILE=y -CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS=y -CONFIG_HAVE_IOREMAP_PROT=y -CONFIG_HAVE_KPROBES=y -CONFIG_HAVE_KRETPROBES=y -CONFIG_HAVE_ARCH_TRACEHOOK=y -CONFIG_HAVE_DMA_ATTRS=y -CONFIG_HAVE_DMA_API_DEBUG=y - -# -# GCOV-based kernel profiling -# -# CONFIG_GCOV_KERNEL is not set -CONFIG_SLOW_WORK=y -# CONFIG_SLOW_WORK_DEBUG is not set -# CONFIG_HAVE_GENERIC_DMA_COHERENT is not set -CONFIG_RT_MUTEXES=y -CONFIG_BASE_SMALL=0 -CONFIG_MODULES=y -# CONFIG_MODULE_FORCE_LOAD is not set -CONFIG_MODULE_UNLOAD=y -# CONFIG_MODULE_FORCE_UNLOAD is not set -CONFIG_MODVERSIONS=y -# CONFIG_MODULE_SRCVERSION_ALL is not set -CONFIG_STOP_MACHINE=y -CONFIG_BLOCK=y -CONFIG_BLK_DEV_BSG=y -# CONFIG_BLK_DEV_INTEGRITY is not set -CONFIG_BLOCK_COMPAT=y - -# -# IO Schedulers -# -CONFIG_IOSCHED_NOOP=y -CONFIG_IOSCHED_AS=y -CONFIG_IOSCHED_DEADLINE=m -CONFIG_IOSCHED_CFQ=y -# CONFIG_DEFAULT_AS is not set -# CONFIG_DEFAULT_DEADLINE is not set -CONFIG_DEFAULT_CFQ=y -# CONFIG_DEFAULT_NOOP is not set -CONFIG_DEFAULT_IOSCHED="cfq" -CONFIG_PREEMPT_NOTIFIERS=y -CONFIG_FREEZER=y - -# -# Processor type and features -# -CONFIG_TICK_ONESHOT=y -CONFIG_NO_HZ=y -CONFIG_HIGH_RES_TIMERS=y -CONFIG_GENERIC_CLOCKEVENTS_BUILD=y -CONFIG_SMP=y -# CONFIG_SPARSE_IRQ is not set -CONFIG_X86_MPPARSE=y -CONFIG_X86_EXTENDED_PLATFORM=y -# CONFIG_X86_VSMP is not set -CONFIG_SCHED_OMIT_FRAME_POINTER=y -CONFIG_PARAVIRT_GUEST=y -CONFIG_XEN=y -CONFIG_XEN_PVHVM=y -CONFIG_XEN_MAX_DOMAIN_MEMORY=128 -CONFIG_XEN_SAVE_RESTORE=y -CONFIG_XEN_DEBUG_FS=y -CONFIG_SWIOTLB_XEN=y -CONFIG_MICROCODE_XEN=y -CONFIG_XEN_DOM0=y -CONFIG_XEN_PRIVILEGED_GUEST=y -CONFIG_XEN_DOM0_PCI=y -CONFIG_XEN_PCI_PASSTHROUGH=y -CONFIG_KVM_CLOCK=y -CONFIG_KVM_GUEST=y -CONFIG_PARAVIRT=y -# CONFIG_PARAVIRT_SPINLOCKS is not set -CONFIG_PARAVIRT_CLOCK=y -# CONFIG_MEMTEST is not set -# CONFIG_M386 is not set -# CONFIG_M486 is not set -# CONFIG_M586 is not set -# CONFIG_M586TSC is not set -# CONFIG_M586MMX is not set -# CONFIG_M686 is not set -# CONFIG_MPENTIUMII is not set -# CONFIG_MPENTIUMIII is not set -# CONFIG_MPENTIUMM is not set -# CONFIG_MPENTIUM4 is not set -# CONFIG_MK6 is not set -# CONFIG_MK7 is not set -# CONFIG_MK8 is not set -# CONFIG_MCRUSOE is not set -# CONFIG_MEFFICEON is not set -# CONFIG_MWINCHIPC6 is not set -# CONFIG_MWINCHIP3D is not set -# CONFIG_MGEODEGX1 is not set -# CONFIG_MGEODE_LX is not set -# CONFIG_MCYRIXIII is not set -# CONFIG_MVIAC3_2 is not set -# CONFIG_MVIAC7 is not set -# CONFIG_MPSC is not set -# CONFIG_MCORE2 is not set -# CONFIG_MATOM is not set -CONFIG_GENERIC_CPU=y -CONFIG_X86_CPU=y -CONFIG_X86_L1_CACHE_BYTES=64 -CONFIG_X86_INTERNODE_CACHE_BYTES=64 -CONFIG_X86_CMPXCHG=y -CONFIG_X86_L1_CACHE_SHIFT=6 -CONFIG_X86_XADD=y -CONFIG_X86_WP_WORKS_OK=y -CONFIG_X86_TSC=y -CONFIG_X86_CMPXCHG64=y -CONFIG_X86_CMOV=y -CONFIG_X86_MINIMUM_CPU_FAMILY=64 -CONFIG_X86_DEBUGCTLMSR=y -# CONFIG_PROCESSOR_SELECT is not set -CONFIG_CPU_SUP_INTEL=y -CONFIG_CPU_SUP_AMD=y -CONFIG_CPU_SUP_CENTAUR=y -# CONFIG_X86_DS is not set -CONFIG_HPET_TIMER=y -CONFIG_HPET_EMULATE_RTC=y -CONFIG_DMI=y -CONFIG_GART_IOMMU=y -CONFIG_CALGARY_IOMMU=y -CONFIG_CALGARY_IOMMU_ENABLED_BY_DEFAULT=y -CONFIG_AMD_IOMMU=y -# CONFIG_AMD_IOMMU_STATS is not set -CONFIG_SWIOTLB=y -CONFIG_IOMMU_HELPER=y -CONFIG_IOMMU_API=y -CONFIG_NR_CPUS=8 -CONFIG_SCHED_SMT=y -CONFIG_SCHED_MC=y -CONFIG_PREEMPT_NONE=y -# CONFIG_PREEMPT_VOLUNTARY is not set -# CONFIG_PREEMPT is not set -CONFIG_X86_LOCAL_APIC=y -CONFIG_X86_IO_APIC=y -# CONFIG_X86_REROUTE_FOR_BROKEN_BOOT_IRQS is not set -# CONFIG_X86_MCE is not set -CONFIG_I8K=m -CONFIG_MICROCODE=m -CONFIG_MICROCODE_INTEL=y -CONFIG_MICROCODE_AMD=y -CONFIG_MICROCODE_OLD_INTERFACE=y -CONFIG_X86_MSR=m -CONFIG_X86_CPUID=m -CONFIG_ARCH_PHYS_ADDR_T_64BIT=y -CONFIG_DIRECT_GBPAGES=y -# CONFIG_NUMA is not set -CONFIG_ARCH_SPARSEMEM_DEFAULT=y -CONFIG_ARCH_SPARSEMEM_ENABLE=y -CONFIG_ARCH_SELECT_MEMORY_MODEL=y -CONFIG_ILLEGAL_POINTER_VALUE=0xdead000000000000 -CONFIG_SELECT_MEMORY_MODEL=y -# CONFIG_FLATMEM_MANUAL is not set -# CONFIG_DISCONTIGMEM_MANUAL is not set -CONFIG_SPARSEMEM_MANUAL=y -CONFIG_SPARSEMEM=y -CONFIG_HAVE_MEMORY_PRESENT=y -CONFIG_SPARSEMEM_EXTREME=y -CONFIG_SPARSEMEM_VMEMMAP_ENABLE=y -CONFIG_SPARSEMEM_VMEMMAP=y -# CONFIG_MEMORY_HOTPLUG is not set -CONFIG_PAGEFLAGS_EXTENDED=y -CONFIG_SPLIT_PTLOCK_CPUS=4 -CONFIG_PHYS_ADDR_T_64BIT=y -CONFIG_ZONE_DMA_FLAG=1 -CONFIG_BOUNCE=y -CONFIG_VIRT_TO_BUS=y -CONFIG_HAVE_MLOCK=y -CONFIG_HAVE_MLOCKED_PAGE_BIT=y -CONFIG_MMU_NOTIFIER=y -CONFIG_KSM=y -CONFIG_DEFAULT_MMAP_MIN_ADDR=4096 -# CONFIG_X86_CHECK_BIOS_CORRUPTION is not set -CONFIG_X86_RESERVE_LOW_64K=y -CONFIG_MTRR=y -CONFIG_MTRR_SANITIZER=y -CONFIG_MTRR_SANITIZER_ENABLE_DEFAULT=0 -CONFIG_MTRR_SANITIZER_SPARE_REG_NR_DEFAULT=1 -CONFIG_X86_PAT=y -CONFIG_ARCH_USES_PG_UNCACHED=y -# CONFIG_EFI is not set -# CONFIG_SECCOMP is not set -# CONFIG_CC_STACKPROTECTOR is not set -# CONFIG_HZ_100 is not set -# CONFIG_HZ_250 is not set -CONFIG_HZ_300=y -# CONFIG_HZ_1000 is not set -CONFIG_HZ=300 -CONFIG_SCHED_HRTICK=y -# CONFIG_KEXEC is not set -# CONFIG_CRASH_DUMP is not set -CONFIG_PHYSICAL_START=0x1000000 -# CONFIG_RELOCATABLE is not set -CONFIG_PHYSICAL_ALIGN=0x1000000 -CONFIG_HOTPLUG_CPU=y -CONFIG_COMPAT_VDSO=y -# CONFIG_CMDLINE_BOOL is not set -CONFIG_ARCH_ENABLE_MEMORY_HOTPLUG=y - -# -# Power management and ACPI options -# -CONFIG_PM=y -# CONFIG_PM_DEBUG is not set -CONFIG_PM_SLEEP_SMP=y -CONFIG_PM_SLEEP=y -CONFIG_SUSPEND=y -CONFIG_SUSPEND_FREEZER=y -# CONFIG_HIBERNATION is not set -# CONFIG_PM_RUNTIME is not set -CONFIG_ACPI=y -CONFIG_ACPI_SLEEP=y -CONFIG_ACPI_PROCFS=y -CONFIG_ACPI_PROCFS_POWER=y -# CONFIG_ACPI_POWER_METER is not set -CONFIG_ACPI_SYSFS_POWER=y -CONFIG_ACPI_PROC_EVENT=y -CONFIG_ACPI_AC=m -CONFIG_ACPI_BATTERY=m -CONFIG_ACPI_BUTTON=m -CONFIG_ACPI_VIDEO=m -CONFIG_ACPI_FAN=m -CONFIG_ACPI_DOCK=y -CONFIG_ACPI_PROCESSOR=m -CONFIG_ACPI_HOTPLUG_CPU=y -# CONFIG_ACPI_PROCESSOR_AGGREGATOR is not set -CONFIG_ACPI_THERMAL=m -# CONFIG_ACPI_CUSTOM_DSDT is not set -CONFIG_ACPI_BLACKLIST_YEAR=0 -# CONFIG_ACPI_DEBUG is not set -CONFIG_ACPI_PCI_SLOT=m -CONFIG_X86_PM_TIMER=y -CONFIG_ACPI_CONTAINER=m -CONFIG_ACPI_SBS=m -# CONFIG_SFI is not set - -# -# CPU Frequency scaling -# -CONFIG_CPU_FREQ=y -CONFIG_CPU_FREQ_TABLE=m -# CONFIG_CPU_FREQ_DEBUG is not set -CONFIG_CPU_FREQ_STAT=m -# CONFIG_CPU_FREQ_STAT_DETAILS is not set -CONFIG_CPU_FREQ_DEFAULT_GOV_PERFORMANCE=y -# CONFIG_CPU_FREQ_DEFAULT_GOV_POWERSAVE is not set -# CONFIG_CPU_FREQ_DEFAULT_GOV_USERSPACE is not set -# CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND is not set -# CONFIG_CPU_FREQ_DEFAULT_GOV_CONSERVATIVE is not set -CONFIG_CPU_FREQ_GOV_PERFORMANCE=y -CONFIG_CPU_FREQ_GOV_POWERSAVE=m -CONFIG_CPU_FREQ_GOV_USERSPACE=m -CONFIG_CPU_FREQ_GOV_ONDEMAND=m -CONFIG_CPU_FREQ_GOV_CONSERVATIVE=m - -# -# CPUFreq processor drivers -# -CONFIG_X86_ACPI_CPUFREQ=m -CONFIG_X86_POWERNOW_K8=m -CONFIG_X86_SPEEDSTEP_CENTRINO=m -CONFIG_X86_P4_CLOCKMOD=m - -# -# shared options -# -CONFIG_X86_SPEEDSTEP_LIB=m -CONFIG_CPU_IDLE=y -CONFIG_CPU_IDLE_GOV_LADDER=y -CONFIG_CPU_IDLE_GOV_MENU=y - -# -# Memory power savings -# -CONFIG_I7300_IDLE_IOAT_CHANNEL=y -CONFIG_I7300_IDLE=m - -# -# Bus options (PCI etc.) -# -CONFIG_PCI=y -CONFIG_PCI_DIRECT=y -CONFIG_PCI_MMCONFIG=y -CONFIG_PCI_XEN=y -CONFIG_PCI_DOMAINS=y -# CONFIG_DMAR is not set -# CONFIG_INTR_REMAP is not set -CONFIG_PCIEPORTBUS=y -CONFIG_HOTPLUG_PCI_PCIE=m -# CONFIG_PCIEAER is not set -CONFIG_PCIEASPM=y -# CONFIG_PCIEASPM_DEBUG is not set -CONFIG_ARCH_SUPPORTS_MSI=y -CONFIG_PCI_MSI=y -CONFIG_PCI_LEGACY=y -CONFIG_PCI_STUB=m -CONFIG_XEN_PCIDEV_FRONTEND=y -CONFIG_HT_IRQ=y -# CONFIG_PCI_IOV is not set -CONFIG_ISA_DMA_API=y -CONFIG_K8_NB=y -CONFIG_PCCARD=m -# CONFIG_PCMCIA_DEBUG is not set -CONFIG_PCMCIA=m -CONFIG_PCMCIA_LOAD_CIS=y -CONFIG_PCMCIA_IOCTL=y -CONFIG_CARDBUS=y - -# -# PC-card bridges -# -CONFIG_YENTA=m -CONFIG_YENTA_O2=y -CONFIG_YENTA_RICOH=y -CONFIG_YENTA_TI=y -CONFIG_YENTA_ENE_TUNE=y -CONFIG_YENTA_TOSHIBA=y -CONFIG_PD6729=m -CONFIG_I82092=m -CONFIG_PCCARD_NONSTATIC=m -CONFIG_HOTPLUG_PCI=m -CONFIG_HOTPLUG_PCI_FAKE=m -CONFIG_HOTPLUG_PCI_ACPI=m -CONFIG_HOTPLUG_PCI_ACPI_IBM=m -CONFIG_HOTPLUG_PCI_CPCI=y -CONFIG_HOTPLUG_PCI_CPCI_ZT5550=m -CONFIG_HOTPLUG_PCI_CPCI_GENERIC=m -CONFIG_HOTPLUG_PCI_SHPC=m - -# -# Executable file formats / Emulations -# -CONFIG_BINFMT_ELF=y -CONFIG_COMPAT_BINFMT_ELF=y -# CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set -# CONFIG_HAVE_AOUT is not set -CONFIG_BINFMT_MISC=m -CONFIG_IA32_EMULATION=y -# CONFIG_IA32_AOUT is not set -CONFIG_COMPAT=y -CONFIG_COMPAT_FOR_U64_ALIGNMENT=y -CONFIG_SYSVIPC_COMPAT=y -CONFIG_NET=y -CONFIG_COMPAT_NETLINK_MESSAGES=y - -# -# Networking options -# -CONFIG_PACKET=m -# CONFIG_PACKET_MMAP is not set -CONFIG_UNIX=y -CONFIG_XFRM=y -CONFIG_XFRM_USER=m -CONFIG_XFRM_SUB_POLICY=y -CONFIG_XFRM_MIGRATE=y -# CONFIG_XFRM_STATISTICS is not set -CONFIG_XFRM_IPCOMP=m -CONFIG_NET_KEY=m -CONFIG_NET_KEY_MIGRATE=y -CONFIG_INET=y -CONFIG_IP_MULTICAST=y -CONFIG_IP_ADVANCED_ROUTER=y -CONFIG_ASK_IP_FIB_HASH=y -# CONFIG_IP_FIB_TRIE is not set -CONFIG_IP_FIB_HASH=y -CONFIG_IP_MULTIPLE_TABLES=y -CONFIG_IP_ROUTE_MULTIPATH=y -CONFIG_IP_ROUTE_VERBOSE=y -CONFIG_IP_PNP=y -CONFIG_IP_PNP_DHCP=y -CONFIG_IP_PNP_BOOTP=y -CONFIG_IP_PNP_RARP=y -CONFIG_NET_IPIP=m -CONFIG_NET_IPGRE=m -CONFIG_NET_IPGRE_BROADCAST=y -CONFIG_IP_MROUTE=y -# CONFIG_IP_PIMSM_V1 is not set -CONFIG_IP_PIMSM_V2=y -CONFIG_ARPD=y -CONFIG_SYN_COOKIES=y -CONFIG_INET_AH=m -CONFIG_INET_ESP=m -CONFIG_INET_IPCOMP=m -CONFIG_INET_XFRM_TUNNEL=m -CONFIG_INET_TUNNEL=m -CONFIG_INET_XFRM_MODE_TRANSPORT=m -CONFIG_INET_XFRM_MODE_TUNNEL=m -CONFIG_INET_XFRM_MODE_BEET=m -CONFIG_INET_LRO=y -CONFIG_INET_DIAG=m -CONFIG_INET_TCP_DIAG=m -CONFIG_TCP_CONG_ADVANCED=y -CONFIG_TCP_CONG_BIC=m -CONFIG_TCP_CONG_CUBIC=y -CONFIG_TCP_CONG_WESTWOOD=m -CONFIG_TCP_CONG_HTCP=m -CONFIG_TCP_CONG_HSTCP=m -CONFIG_TCP_CONG_HYBLA=m -CONFIG_TCP_CONG_VEGAS=m -CONFIG_TCP_CONG_SCALABLE=m -CONFIG_TCP_CONG_LP=m -CONFIG_TCP_CONG_VENO=m -CONFIG_TCP_CONG_YEAH=m -CONFIG_TCP_CONG_ILLINOIS=m -# CONFIG_DEFAULT_BIC is not set -CONFIG_DEFAULT_CUBIC=y -# CONFIG_DEFAULT_HTCP is not set -# CONFIG_DEFAULT_VEGAS is not set -# CONFIG_DEFAULT_WESTWOOD is not set -# CONFIG_DEFAULT_RENO is not set -CONFIG_DEFAULT_TCP_CONG="cubic" -CONFIG_TCP_MD5SIG=y -CONFIG_IPV6=m -CONFIG_IPV6_PRIVACY=y -CONFIG_IPV6_ROUTER_PREF=y -CONFIG_IPV6_ROUTE_INFO=y -# CONFIG_IPV6_OPTIMISTIC_DAD is not set -CONFIG_INET6_AH=m -CONFIG_INET6_ESP=m -CONFIG_INET6_IPCOMP=m -CONFIG_IPV6_MIP6=m -CONFIG_INET6_XFRM_TUNNEL=m -CONFIG_INET6_TUNNEL=m -CONFIG_INET6_XFRM_MODE_TRANSPORT=m -CONFIG_INET6_XFRM_MODE_TUNNEL=m -CONFIG_INET6_XFRM_MODE_BEET=m -CONFIG_INET6_XFRM_MODE_ROUTEOPTIMIZATION=m -CONFIG_IPV6_SIT=m -CONFIG_IPV6_NDISC_NODETYPE=y -CONFIG_IPV6_TUNNEL=m -CONFIG_IPV6_MULTIPLE_TABLES=y -CONFIG_IPV6_SUBTREES=y -CONFIG_IPV6_MROUTE=y -CONFIG_IPV6_PIMSM_V2=y -CONFIG_NETLABEL=y -CONFIG_NETWORK_SECMARK=y -CONFIG_NETFILTER=y -# CONFIG_NETFILTER_DEBUG is not set -CONFIG_NETFILTER_ADVANCED=y -CONFIG_BRIDGE_NETFILTER=y - -# -# Core Netfilter Configuration -# -CONFIG_NETFILTER_NETLINK=m -CONFIG_NETFILTER_NETLINK_QUEUE=m -CONFIG_NETFILTER_NETLINK_LOG=m -CONFIG_NF_CONNTRACK=m -CONFIG_NF_CT_ACCT=y -CONFIG_NF_CONNTRACK_MARK=y -CONFIG_NF_CONNTRACK_SECMARK=y -CONFIG_NF_CONNTRACK_EVENTS=y -CONFIG_NF_CT_PROTO_DCCP=m -CONFIG_NF_CT_PROTO_GRE=m -CONFIG_NF_CT_PROTO_SCTP=m -CONFIG_NF_CT_PROTO_UDPLITE=m -CONFIG_NF_CONNTRACK_AMANDA=m -CONFIG_NF_CONNTRACK_FTP=m -CONFIG_NF_CONNTRACK_H323=m -CONFIG_NF_CONNTRACK_IRC=m -CONFIG_NF_CONNTRACK_NETBIOS_NS=m -CONFIG_NF_CONNTRACK_PPTP=m -CONFIG_NF_CONNTRACK_SANE=m -CONFIG_NF_CONNTRACK_SIP=m -CONFIG_NF_CONNTRACK_TFTP=m -CONFIG_NF_CT_NETLINK=m -CONFIG_NETFILTER_TPROXY=m -CONFIG_NETFILTER_XTABLES=m -CONFIG_NETFILTER_XT_TARGET_CLASSIFY=m -CONFIG_NETFILTER_XT_TARGET_CONNMARK=m -CONFIG_NETFILTER_XT_TARGET_CONNSECMARK=m -CONFIG_NETFILTER_XT_TARGET_DSCP=m -CONFIG_NETFILTER_XT_TARGET_HL=m -CONFIG_NETFILTER_XT_TARGET_LED=m -CONFIG_NETFILTER_XT_TARGET_MARK=m -CONFIG_NETFILTER_XT_TARGET_NFLOG=m -CONFIG_NETFILTER_XT_TARGET_NFQUEUE=m -CONFIG_NETFILTER_XT_TARGET_NOTRACK=m -CONFIG_NETFILTER_XT_TARGET_RATEEST=m -CONFIG_NETFILTER_XT_TARGET_TPROXY=m -CONFIG_NETFILTER_XT_TARGET_TRACE=m -CONFIG_NETFILTER_XT_TARGET_SECMARK=m -CONFIG_NETFILTER_XT_TARGET_TCPMSS=m -CONFIG_NETFILTER_XT_TARGET_TCPOPTSTRIP=m -CONFIG_NETFILTER_XT_MATCH_CLUSTER=m -CONFIG_NETFILTER_XT_MATCH_COMMENT=m -CONFIG_NETFILTER_XT_MATCH_CONNBYTES=m -CONFIG_NETFILTER_XT_MATCH_CONNLIMIT=m -CONFIG_NETFILTER_XT_MATCH_CONNMARK=m -CONFIG_NETFILTER_XT_MATCH_CONNTRACK=m -CONFIG_NETFILTER_XT_MATCH_DCCP=m -CONFIG_NETFILTER_XT_MATCH_DSCP=m -CONFIG_NETFILTER_XT_MATCH_ESP=m -CONFIG_NETFILTER_XT_MATCH_HASHLIMIT=m -CONFIG_NETFILTER_XT_MATCH_HELPER=m -CONFIG_NETFILTER_XT_MATCH_HL=m -CONFIG_NETFILTER_XT_MATCH_IPRANGE=m -CONFIG_NETFILTER_XT_MATCH_LENGTH=m -CONFIG_NETFILTER_XT_MATCH_LIMIT=m -CONFIG_NETFILTER_XT_MATCH_MAC=m -CONFIG_NETFILTER_XT_MATCH_MARK=m -CONFIG_NETFILTER_XT_MATCH_MULTIPORT=m -CONFIG_NETFILTER_XT_MATCH_OWNER=m -CONFIG_NETFILTER_XT_MATCH_POLICY=m -CONFIG_NETFILTER_XT_MATCH_PHYSDEV=m -CONFIG_NETFILTER_XT_MATCH_PKTTYPE=m -CONFIG_NETFILTER_XT_MATCH_QUOTA=m -CONFIG_NETFILTER_XT_MATCH_RATEEST=m -CONFIG_NETFILTER_XT_MATCH_REALM=m -CONFIG_NETFILTER_XT_MATCH_RECENT=m -# CONFIG_NETFILTER_XT_MATCH_RECENT_PROC_COMPAT is not set -CONFIG_NETFILTER_XT_MATCH_SCTP=m -CONFIG_NETFILTER_XT_MATCH_SOCKET=m -CONFIG_NETFILTER_XT_MATCH_STATE=m -CONFIG_NETFILTER_XT_MATCH_STATISTIC=m -CONFIG_NETFILTER_XT_MATCH_STRING=m -CONFIG_NETFILTER_XT_MATCH_TCPMSS=m -CONFIG_NETFILTER_XT_MATCH_TIME=m -CONFIG_NETFILTER_XT_MATCH_U32=m -CONFIG_NETFILTER_XT_MATCH_OSF=m -CONFIG_IP_VS=m -CONFIG_IP_VS_IPV6=y -# CONFIG_IP_VS_DEBUG is not set -CONFIG_IP_VS_TAB_BITS=12 - -# -# IPVS transport protocol load balancing support -# -CONFIG_IP_VS_PROTO_TCP=y -CONFIG_IP_VS_PROTO_UDP=y -CONFIG_IP_VS_PROTO_AH_ESP=y -CONFIG_IP_VS_PROTO_ESP=y -CONFIG_IP_VS_PROTO_AH=y - -# -# IPVS scheduler -# -CONFIG_IP_VS_RR=m -CONFIG_IP_VS_WRR=m -CONFIG_IP_VS_LC=m -CONFIG_IP_VS_WLC=m -CONFIG_IP_VS_LBLC=m -CONFIG_IP_VS_LBLCR=m -CONFIG_IP_VS_DH=m -CONFIG_IP_VS_SH=m -CONFIG_IP_VS_SED=m -CONFIG_IP_VS_NQ=m - -# -# IPVS application helper -# -CONFIG_IP_VS_FTP=m - -# -# IP: Netfilter Configuration -# -CONFIG_NF_DEFRAG_IPV4=m -CONFIG_NF_CONNTRACK_IPV4=m -CONFIG_NF_CONNTRACK_PROC_COMPAT=y -CONFIG_IP_NF_QUEUE=m -CONFIG_IP_NF_IPTABLES=m -CONFIG_IP_NF_MATCH_ADDRTYPE=m -CONFIG_IP_NF_MATCH_AH=m -CONFIG_IP_NF_MATCH_ECN=m -CONFIG_IP_NF_MATCH_TTL=m -CONFIG_IP_NF_FILTER=m -CONFIG_IP_NF_TARGET_REJECT=m -CONFIG_IP_NF_TARGET_LOG=m -CONFIG_IP_NF_TARGET_ULOG=m -CONFIG_NF_NAT=m -CONFIG_NF_NAT_NEEDED=y -CONFIG_IP_NF_TARGET_MASQUERADE=m -CONFIG_IP_NF_TARGET_NETMAP=m -CONFIG_IP_NF_TARGET_REDIRECT=m -CONFIG_NF_NAT_SNMP_BASIC=m -CONFIG_NF_NAT_PROTO_DCCP=m -CONFIG_NF_NAT_PROTO_GRE=m -CONFIG_NF_NAT_PROTO_UDPLITE=m -CONFIG_NF_NAT_PROTO_SCTP=m -CONFIG_NF_NAT_FTP=m -CONFIG_NF_NAT_IRC=m -CONFIG_NF_NAT_TFTP=m -CONFIG_NF_NAT_AMANDA=m -CONFIG_NF_NAT_PPTP=m -CONFIG_NF_NAT_H323=m -CONFIG_NF_NAT_SIP=m -CONFIG_IP_NF_MANGLE=m -CONFIG_IP_NF_TARGET_CLUSTERIP=m -CONFIG_IP_NF_TARGET_ECN=m -CONFIG_IP_NF_TARGET_TTL=m -CONFIG_IP_NF_RAW=m -CONFIG_IP_NF_SECURITY=m -CONFIG_IP_NF_ARPTABLES=m -CONFIG_IP_NF_ARPFILTER=m -CONFIG_IP_NF_ARP_MANGLE=m - -# -# IPv6: Netfilter Configuration -# -CONFIG_NF_CONNTRACK_IPV6=m -CONFIG_IP6_NF_QUEUE=m -CONFIG_IP6_NF_IPTABLES=m -CONFIG_IP6_NF_MATCH_AH=m -CONFIG_IP6_NF_MATCH_EUI64=m -CONFIG_IP6_NF_MATCH_FRAG=m -CONFIG_IP6_NF_MATCH_OPTS=m -CONFIG_IP6_NF_MATCH_HL=m -CONFIG_IP6_NF_MATCH_IPV6HEADER=m -CONFIG_IP6_NF_MATCH_MH=m -CONFIG_IP6_NF_MATCH_RT=m -CONFIG_IP6_NF_TARGET_HL=m -CONFIG_IP6_NF_TARGET_LOG=m -CONFIG_IP6_NF_FILTER=m -CONFIG_IP6_NF_TARGET_REJECT=m -CONFIG_IP6_NF_MANGLE=m -CONFIG_IP6_NF_RAW=m -CONFIG_IP6_NF_SECURITY=m - -# -# DECnet: Netfilter Configuration -# -CONFIG_DECNET_NF_GRABULATOR=m -CONFIG_BRIDGE_NF_EBTABLES=m -CONFIG_BRIDGE_EBT_BROUTE=m -CONFIG_BRIDGE_EBT_T_FILTER=m -CONFIG_BRIDGE_EBT_T_NAT=m -CONFIG_BRIDGE_EBT_802_3=m -CONFIG_BRIDGE_EBT_AMONG=m -CONFIG_BRIDGE_EBT_ARP=m -CONFIG_BRIDGE_EBT_IP=m -CONFIG_BRIDGE_EBT_IP6=m -CONFIG_BRIDGE_EBT_LIMIT=m -CONFIG_BRIDGE_EBT_MARK=m -CONFIG_BRIDGE_EBT_PKTTYPE=m -CONFIG_BRIDGE_EBT_STP=m -CONFIG_BRIDGE_EBT_VLAN=m -CONFIG_BRIDGE_EBT_ARPREPLY=m -CONFIG_BRIDGE_EBT_DNAT=m -CONFIG_BRIDGE_EBT_MARK_T=m -CONFIG_BRIDGE_EBT_REDIRECT=m -CONFIG_BRIDGE_EBT_SNAT=m -CONFIG_BRIDGE_EBT_LOG=m -CONFIG_BRIDGE_EBT_ULOG=m -CONFIG_BRIDGE_EBT_NFLOG=m -CONFIG_IP_DCCP=m -CONFIG_INET_DCCP_DIAG=m - -# -# DCCP CCIDs Configuration (EXPERIMENTAL) -# -# CONFIG_IP_DCCP_CCID2_DEBUG is not set -CONFIG_IP_DCCP_CCID3=y -# CONFIG_IP_DCCP_CCID3_DEBUG is not set -CONFIG_IP_DCCP_CCID3_RTO=100 -CONFIG_IP_DCCP_TFRC_LIB=y -CONFIG_IP_SCTP=m -# CONFIG_SCTP_DBG_MSG is not set -# CONFIG_SCTP_DBG_OBJCNT is not set -# CONFIG_SCTP_HMAC_NONE is not set -CONFIG_SCTP_HMAC_SHA1=y -# CONFIG_SCTP_HMAC_MD5 is not set -CONFIG_RDS=m -# CONFIG_RDS_RDMA is not set -# CONFIG_RDS_TCP is not set -# CONFIG_RDS_DEBUG is not set -CONFIG_TIPC=m -# CONFIG_TIPC_ADVANCED is not set -# CONFIG_TIPC_DEBUG is not set -CONFIG_ATM=m -CONFIG_ATM_CLIP=m -# CONFIG_ATM_CLIP_NO_ICMP is not set -CONFIG_ATM_LANE=m -CONFIG_ATM_MPOA=m -CONFIG_ATM_BR2684=m -# CONFIG_ATM_BR2684_IPFILTER is not set -CONFIG_STP=m -CONFIG_BRIDGE=m -# CONFIG_NET_DSA is not set -CONFIG_VLAN_8021Q=m -# CONFIG_VLAN_8021Q_GVRP is not set -CONFIG_DECNET=m -CONFIG_DECNET_ROUTER=y -CONFIG_LLC=m -CONFIG_LLC2=m -CONFIG_IPX=m -# CONFIG_IPX_INTERN is not set -CONFIG_ATALK=m -CONFIG_DEV_APPLETALK=m -CONFIG_IPDDP=m -CONFIG_IPDDP_ENCAP=y -CONFIG_IPDDP_DECAP=y -CONFIG_X25=m -CONFIG_LAPB=m -CONFIG_ECONET=m -CONFIG_ECONET_AUNUDP=y -CONFIG_ECONET_NATIVE=y -CONFIG_WAN_ROUTER=m -CONFIG_PHONET=m -CONFIG_IEEE802154=m -CONFIG_NET_SCHED=y - -# -# Queueing/Scheduling -# -CONFIG_NET_SCH_CBQ=m -CONFIG_NET_SCH_HTB=m -CONFIG_NET_SCH_HFSC=m -CONFIG_NET_SCH_ATM=m -CONFIG_NET_SCH_PRIO=m -CONFIG_NET_SCH_MULTIQ=m -CONFIG_NET_SCH_RED=m -CONFIG_NET_SCH_SFQ=m -CONFIG_NET_SCH_TEQL=m -CONFIG_NET_SCH_TBF=m -CONFIG_NET_SCH_GRED=m -CONFIG_NET_SCH_DSMARK=m -CONFIG_NET_SCH_NETEM=m -CONFIG_NET_SCH_DRR=m -CONFIG_NET_SCH_INGRESS=m -# CONFIG_NET_SCH_PLUG is not set - -# -# Classification -# -CONFIG_NET_CLS=y -CONFIG_NET_CLS_BASIC=m -CONFIG_NET_CLS_TCINDEX=m -CONFIG_NET_CLS_ROUTE4=m -CONFIG_NET_CLS_ROUTE=y -CONFIG_NET_CLS_FW=m -CONFIG_NET_CLS_U32=m -CONFIG_CLS_U32_PERF=y -CONFIG_CLS_U32_MARK=y -CONFIG_NET_CLS_RSVP=m -CONFIG_NET_CLS_RSVP6=m -CONFIG_NET_CLS_FLOW=m -CONFIG_NET_EMATCH=y -CONFIG_NET_EMATCH_STACK=32 -CONFIG_NET_EMATCH_CMP=m -CONFIG_NET_EMATCH_NBYTE=m -CONFIG_NET_EMATCH_U32=m -CONFIG_NET_EMATCH_META=m -CONFIG_NET_EMATCH_TEXT=m -CONFIG_NET_CLS_ACT=y -CONFIG_NET_ACT_POLICE=m -CONFIG_NET_ACT_GACT=m -CONFIG_GACT_PROB=y -CONFIG_NET_ACT_MIRRED=m -CONFIG_NET_ACT_IPT=m -CONFIG_NET_ACT_NAT=m -CONFIG_NET_ACT_PEDIT=m -CONFIG_NET_ACT_SIMP=m -CONFIG_NET_ACT_SKBEDIT=m -# CONFIG_NET_CLS_IND is not set -CONFIG_NET_SCH_FIFO=y -# CONFIG_DCB is not set - -# -# Network testing -# -CONFIG_NET_PKTGEN=m -# CONFIG_HAMRADIO is not set -CONFIG_CAN=m -CONFIG_CAN_RAW=m -CONFIG_CAN_BCM=m - -# -# CAN Device Drivers -# -CONFIG_CAN_VCAN=m -CONFIG_CAN_DEV=m -# CONFIG_CAN_CALC_BITTIMING is not set -CONFIG_CAN_SJA1000=m -CONFIG_CAN_SJA1000_PLATFORM=m -CONFIG_CAN_EMS_PCI=m -CONFIG_CAN_KVASER_PCI=m - -# -# CAN USB interfaces -# -# CONFIG_CAN_EMS_USB is not set -# CONFIG_CAN_DEBUG_DEVICES is not set -CONFIG_IRDA=m - -# -# IrDA protocols -# -CONFIG_IRLAN=m -CONFIG_IRNET=m -CONFIG_IRCOMM=m -CONFIG_IRDA_ULTRA=y - -# -# IrDA options -# -CONFIG_IRDA_CACHE_LAST_LSAP=y -CONFIG_IRDA_FAST_RR=y -# CONFIG_IRDA_DEBUG is not set - -# -# Infrared-port device drivers -# - -# -# SIR device drivers -# -CONFIG_IRTTY_SIR=m - -# -# Dongle support -# -CONFIG_DONGLE=y -CONFIG_ESI_DONGLE=m -CONFIG_ACTISYS_DONGLE=m -CONFIG_TEKRAM_DONGLE=m -CONFIG_TOIM3232_DONGLE=m -CONFIG_LITELINK_DONGLE=m -CONFIG_MA600_DONGLE=m -CONFIG_GIRBIL_DONGLE=m -CONFIG_MCP2120_DONGLE=m -CONFIG_OLD_BELKIN_DONGLE=m -CONFIG_ACT200L_DONGLE=m -CONFIG_KINGSUN_DONGLE=m -CONFIG_KSDAZZLE_DONGLE=m -CONFIG_KS959_DONGLE=m - -# -# FIR device drivers -# -CONFIG_USB_IRDA=m -CONFIG_SIGMATEL_FIR=m -CONFIG_NSC_FIR=m -CONFIG_WINBOND_FIR=m -CONFIG_SMC_IRCC_FIR=m -CONFIG_ALI_FIR=m -CONFIG_VLSI_FIR=m -CONFIG_VIA_FIR=m -CONFIG_MCS_FIR=m -CONFIG_BT=m -CONFIG_BT_L2CAP=m -CONFIG_BT_SCO=m -CONFIG_BT_RFCOMM=m -CONFIG_BT_RFCOMM_TTY=y -CONFIG_BT_BNEP=m -CONFIG_BT_BNEP_MC_FILTER=y -CONFIG_BT_BNEP_PROTO_FILTER=y -CONFIG_BT_CMTP=m -CONFIG_BT_HIDP=m - -# -# Bluetooth device drivers -# -CONFIG_BT_HCIBTUSB=m -CONFIG_BT_HCIBTSDIO=m -CONFIG_BT_HCIUART=m -CONFIG_BT_HCIUART_H4=y -CONFIG_BT_HCIUART_BCSP=y -CONFIG_BT_HCIUART_LL=y -CONFIG_BT_HCIBCM203X=m -CONFIG_BT_HCIBPA10X=m -CONFIG_BT_HCIBFUSB=m -CONFIG_BT_HCIDTL1=m -CONFIG_BT_HCIBT3C=m -CONFIG_BT_HCIBLUECARD=m -CONFIG_BT_HCIBTUART=m -CONFIG_BT_HCIVHCI=m -# CONFIG_BT_MRVL is not set -CONFIG_AF_RXRPC=m -# CONFIG_AF_RXRPC_DEBUG is not set -CONFIG_RXKAD=m -CONFIG_FIB_RULES=y -CONFIG_WIRELESS=y -CONFIG_CFG80211=m -# CONFIG_NL80211_TESTMODE is not set -# CONFIG_CFG80211_DEVELOPER_WARNINGS is not set -# CONFIG_CFG80211_REG_DEBUG is not set -CONFIG_CFG80211_DEFAULT_PS=y -CONFIG_CFG80211_DEFAULT_PS_VALUE=1 -# CONFIG_CFG80211_DEBUGFS is not set -# CONFIG_WIRELESS_OLD_REGULATORY is not set -CONFIG_WIRELESS_EXT=y -CONFIG_WIRELESS_EXT_SYSFS=y -CONFIG_LIB80211=m -# CONFIG_LIB80211_DEBUG is not set -CONFIG_MAC80211=m -CONFIG_MAC80211_HAS_RC=y -CONFIG_MAC80211_RC_PID=y -CONFIG_MAC80211_RC_MINSTREL=y -CONFIG_MAC80211_RC_DEFAULT_PID=y -# CONFIG_MAC80211_RC_DEFAULT_MINSTREL is not set -CONFIG_MAC80211_RC_DEFAULT="pid" -# CONFIG_MAC80211_MESH is not set -CONFIG_MAC80211_LEDS=y -# CONFIG_MAC80211_DEBUGFS is not set -# CONFIG_MAC80211_DEBUG_MENU is not set -CONFIG_WIMAX=m -CONFIG_WIMAX_DEBUG_LEVEL=8 -CONFIG_RFKILL=m -CONFIG_RFKILL_LEDS=y -# CONFIG_RFKILL_INPUT is not set -CONFIG_NET_9P=m -CONFIG_NET_9P_VIRTIO=m -CONFIG_NET_9P_RDMA=m -# CONFIG_NET_9P_DEBUG is not set - -# -# Device Drivers -# - -# -# Generic Driver Options -# -CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" -# CONFIG_DEVTMPFS is not set -CONFIG_STANDALONE=y -# CONFIG_PREVENT_FIRMWARE_BUILD is not set -CONFIG_FW_LOADER=m -# CONFIG_FIRMWARE_IN_KERNEL is not set -CONFIG_EXTRA_FIRMWARE="" -CONFIG_SYS_HYPERVISOR=y -CONFIG_CONNECTOR=m -CONFIG_MTD=m -# CONFIG_MTD_DEBUG is not set -CONFIG_MTD_TESTS=m -CONFIG_MTD_CONCAT=m -CONFIG_MTD_PARTITIONS=y -CONFIG_MTD_REDBOOT_PARTS=m -CONFIG_MTD_REDBOOT_DIRECTORY_BLOCK=-1 -# CONFIG_MTD_REDBOOT_PARTS_UNALLOCATED is not set -# CONFIG_MTD_REDBOOT_PARTS_READONLY is not set -CONFIG_MTD_AR7_PARTS=m - -# -# User Modules And Translation Layers -# -CONFIG_MTD_CHAR=m -CONFIG_HAVE_MTD_OTP=y -CONFIG_MTD_BLKDEVS=m -CONFIG_MTD_BLOCK=m -CONFIG_MTD_BLOCK_RO=m -CONFIG_FTL=m -CONFIG_NFTL=m -CONFIG_NFTL_RW=y -CONFIG_INFTL=m -CONFIG_RFD_FTL=m -CONFIG_SSFDC=m -CONFIG_MTD_OOPS=m - -# -# RAM/ROM/Flash chip drivers -# -CONFIG_MTD_CFI=m -CONFIG_MTD_JEDECPROBE=m -CONFIG_MTD_GEN_PROBE=m -# CONFIG_MTD_CFI_ADV_OPTIONS is not set -CONFIG_MTD_MAP_BANK_WIDTH_1=y -CONFIG_MTD_MAP_BANK_WIDTH_2=y -CONFIG_MTD_MAP_BANK_WIDTH_4=y -# CONFIG_MTD_MAP_BANK_WIDTH_8 is not set -# CONFIG_MTD_MAP_BANK_WIDTH_16 is not set -# CONFIG_MTD_MAP_BANK_WIDTH_32 is not set -CONFIG_MTD_CFI_I1=y -CONFIG_MTD_CFI_I2=y -# CONFIG_MTD_CFI_I4 is not set -# CONFIG_MTD_CFI_I8 is not set -CONFIG_MTD_CFI_INTELEXT=m -CONFIG_MTD_CFI_AMDSTD=m -CONFIG_MTD_CFI_STAA=m -CONFIG_MTD_CFI_UTIL=m -CONFIG_MTD_RAM=m -CONFIG_MTD_ROM=m -CONFIG_MTD_ABSENT=m - -# -# Mapping drivers for chip access -# -CONFIG_MTD_COMPLEX_MAPPINGS=y -CONFIG_MTD_PHYSMAP=m -# CONFIG_MTD_PHYSMAP_COMPAT is not set -CONFIG_MTD_SC520CDP=m -CONFIG_MTD_NETSC520=m -CONFIG_MTD_TS5500=m -CONFIG_MTD_SBC_GXX=m -CONFIG_MTD_AMD76XROM=m -CONFIG_MTD_ICHXROM=m -CONFIG_MTD_ESB2ROM=m -CONFIG_MTD_CK804XROM=m -CONFIG_MTD_SCB2_FLASH=m -CONFIG_MTD_NETtel=m -CONFIG_MTD_L440GX=m -CONFIG_MTD_PCI=m -# CONFIG_MTD_GPIO_ADDR is not set -CONFIG_MTD_INTEL_VR_NOR=m -CONFIG_MTD_PLATRAM=m - -# -# Self-contained MTD device drivers -# -CONFIG_MTD_PMC551=m -CONFIG_MTD_PMC551_BUGFIX=y -# CONFIG_MTD_PMC551_DEBUG is not set -CONFIG_MTD_DATAFLASH=m -# CONFIG_MTD_DATAFLASH_WRITE_VERIFY is not set -# CONFIG_MTD_DATAFLASH_OTP is not set -CONFIG_MTD_M25P80=m -CONFIG_M25PXX_USE_FAST_READ=y -# CONFIG_MTD_SST25L is not set -CONFIG_MTD_SLRAM=m -CONFIG_MTD_PHRAM=m -CONFIG_MTD_MTDRAM=m -CONFIG_MTDRAM_TOTAL_SIZE=4096 -CONFIG_MTDRAM_ERASE_SIZE=128 -CONFIG_MTD_BLOCK2MTD=m - -# -# Disk-On-Chip Device Drivers -# -CONFIG_MTD_DOC2000=m -CONFIG_MTD_DOC2001=m -CONFIG_MTD_DOC2001PLUS=m -CONFIG_MTD_DOCPROBE=m -CONFIG_MTD_DOCECC=m -CONFIG_MTD_DOCPROBE_ADVANCED=y -CONFIG_MTD_DOCPROBE_ADDRESS=0x0000 -# CONFIG_MTD_DOCPROBE_HIGH is not set -# CONFIG_MTD_DOCPROBE_55AA is not set -CONFIG_MTD_NAND=m -# CONFIG_MTD_NAND_VERIFY_WRITE is not set -CONFIG_MTD_NAND_ECC_SMC=y -# CONFIG_MTD_NAND_MUSEUM_IDS is not set -CONFIG_MTD_NAND_IDS=m -CONFIG_MTD_NAND_DISKONCHIP=m -# CONFIG_MTD_NAND_DISKONCHIP_PROBE_ADVANCED is not set -CONFIG_MTD_NAND_DISKONCHIP_PROBE_ADDRESS=0 -# CONFIG_MTD_NAND_DISKONCHIP_BBTWRITE is not set -CONFIG_MTD_NAND_CAFE=m -CONFIG_MTD_NAND_NANDSIM=m -CONFIG_MTD_NAND_PLATFORM=m -CONFIG_MTD_ALAUDA=m -CONFIG_MTD_ONENAND=m -# CONFIG_MTD_ONENAND_VERIFY_WRITE is not set -# CONFIG_MTD_ONENAND_GENERIC is not set -CONFIG_MTD_ONENAND_OTP=y -CONFIG_MTD_ONENAND_2X_PROGRAM=y -CONFIG_MTD_ONENAND_SIM=m - -# -# LPDDR flash memory drivers -# -CONFIG_MTD_LPDDR=m -CONFIG_MTD_QINFO_PROBE=m - -# -# UBI - Unsorted block images -# -CONFIG_MTD_UBI=m -CONFIG_MTD_UBI_WL_THRESHOLD=4096 -CONFIG_MTD_UBI_BEB_RESERVE=1 -# CONFIG_MTD_UBI_GLUEBI is not set - -# -# UBI debugging options -# -# CONFIG_MTD_UBI_DEBUG is not set -CONFIG_PARPORT=m -CONFIG_PARPORT_PC=m -CONFIG_PARPORT_SERIAL=m -# CONFIG_PARPORT_PC_FIFO is not set -# CONFIG_PARPORT_PC_SUPERIO is not set -CONFIG_PARPORT_PC_PCMCIA=m -# CONFIG_PARPORT_GSC is not set -CONFIG_PARPORT_AX88796=m -# CONFIG_PARPORT_1284 is not set -CONFIG_PARPORT_NOT_PC=y -CONFIG_PNP=y -# CONFIG_PNP_DEBUG_MESSAGES is not set - -# -# Protocols -# -CONFIG_PNPACPI=y -CONFIG_BLK_DEV=y -CONFIG_BLK_DEV_FD=m -# CONFIG_PARIDE is not set -CONFIG_BLK_CPQ_DA=m -CONFIG_BLK_CPQ_CISS_DA=m -CONFIG_CISS_SCSI_TAPE=y -CONFIG_BLK_DEV_DAC960=m -CONFIG_BLK_DEV_UMEM=m -# CONFIG_BLK_DEV_COW_COMMON is not set -CONFIG_BLK_DEV_LOOP=m -CONFIG_BLK_DEV_CRYPTOLOOP=m -CONFIG_BLK_DEV_NBD=m -CONFIG_BLK_DEV_OSD=m -CONFIG_BLK_DEV_SX8=m -CONFIG_BLK_DEV_UB=m -CONFIG_BLK_DEV_RAM=y -CONFIG_BLK_DEV_RAM_COUNT=16 -CONFIG_BLK_DEV_RAM_SIZE=4096 -# CONFIG_BLK_DEV_XIP is not set -CONFIG_CDROM_PKTCDVD=m -CONFIG_CDROM_PKTCDVD_BUFFERS=8 -# CONFIG_CDROM_PKTCDVD_WCACHE is not set -CONFIG_ATA_OVER_ETH=m -CONFIG_XEN_BLKDEV_FRONTEND=y -CONFIG_VIRTIO_BLK=m -# CONFIG_BLK_DEV_HD is not set -CONFIG_MISC_DEVICES=y -CONFIG_IBM_ASM=m -CONFIG_PHANTOM=m -CONFIG_SGI_IOC4=m -CONFIG_TIFM_CORE=m -CONFIG_TIFM_7XX1=m -CONFIG_ICS932S401=m -CONFIG_ENCLOSURE_SERVICES=m -CONFIG_HP_ILO=m -CONFIG_DELL_LAPTOP=m -CONFIG_ISL29003=m -CONFIG_C2PORT=m -CONFIG_C2PORT_DURAMAR_2150=m - -# -# EEPROM support -# -CONFIG_EEPROM_AT24=m -CONFIG_EEPROM_AT25=m -CONFIG_EEPROM_LEGACY=m -CONFIG_EEPROM_MAX6875=m -CONFIG_EEPROM_93CX6=m -CONFIG_CB710_CORE=m -# CONFIG_CB710_DEBUG is not set -CONFIG_CB710_DEBUG_ASSUMPTIONS=y -CONFIG_HAVE_IDE=y -# CONFIG_IDE is not set - -# -# SCSI device support -# -CONFIG_RAID_ATTRS=m -CONFIG_SCSI=m -CONFIG_SCSI_DMA=y -CONFIG_SCSI_TGT=m -CONFIG_SCSI_NETLINK=y -CONFIG_SCSI_PROC_FS=y - -# -# SCSI support type (disk, tape, CD-ROM) -# -CONFIG_BLK_DEV_SD=m -CONFIG_CHR_DEV_ST=m -CONFIG_CHR_DEV_OSST=m -CONFIG_BLK_DEV_SR=m -CONFIG_BLK_DEV_SR_VENDOR=y -CONFIG_CHR_DEV_SG=m -CONFIG_CHR_DEV_SCH=m -CONFIG_SCSI_ENCLOSURE=m -CONFIG_SCSI_MULTI_LUN=y -# CONFIG_SCSI_CONSTANTS is not set -# CONFIG_SCSI_LOGGING is not set -CONFIG_SCSI_SCAN_ASYNC=y -CONFIG_SCSI_WAIT_SCAN=m - -# -# SCSI Transports -# -CONFIG_SCSI_SPI_ATTRS=m -CONFIG_SCSI_FC_ATTRS=m -CONFIG_SCSI_FC_TGT_ATTRS=y -CONFIG_SCSI_ISCSI_ATTRS=m -CONFIG_SCSI_SAS_ATTRS=m -CONFIG_SCSI_SAS_LIBSAS=m -CONFIG_SCSI_SAS_ATA=y -CONFIG_SCSI_SAS_HOST_SMP=y -# CONFIG_SCSI_SAS_LIBSAS_DEBUG is not set -CONFIG_SCSI_SRP_ATTRS=m -CONFIG_SCSI_SRP_TGT_ATTRS=y -CONFIG_SCSI_LOWLEVEL=y -CONFIG_ISCSI_TCP=m -CONFIG_SCSI_CXGB3_ISCSI=m -CONFIG_SCSI_BNX2_ISCSI=m -# CONFIG_BE2ISCSI is not set -CONFIG_BLK_DEV_3W_XXXX_RAID=m -CONFIG_SCSI_3W_9XXX=m -CONFIG_SCSI_ACARD=m -CONFIG_SCSI_AACRAID=m -CONFIG_SCSI_AIC7XXX=m -CONFIG_AIC7XXX_CMDS_PER_DEVICE=32 -CONFIG_AIC7XXX_RESET_DELAY_MS=15000 -# CONFIG_AIC7XXX_BUILD_FIRMWARE is not set -CONFIG_AIC7XXX_DEBUG_ENABLE=y -CONFIG_AIC7XXX_DEBUG_MASK=0 -CONFIG_AIC7XXX_REG_PRETTY_PRINT=y -CONFIG_SCSI_AIC7XXX_OLD=m -CONFIG_SCSI_AIC79XX=m -CONFIG_AIC79XX_CMDS_PER_DEVICE=32 -CONFIG_AIC79XX_RESET_DELAY_MS=15000 -# CONFIG_AIC79XX_BUILD_FIRMWARE is not set -CONFIG_AIC79XX_DEBUG_ENABLE=y -CONFIG_AIC79XX_DEBUG_MASK=0 -CONFIG_AIC79XX_REG_PRETTY_PRINT=y -CONFIG_SCSI_AIC94XX=m -# CONFIG_AIC94XX_DEBUG is not set -CONFIG_SCSI_MVSAS=m -CONFIG_SCSI_MVSAS_DEBUG=y -CONFIG_SCSI_DPT_I2O=m -CONFIG_SCSI_ADVANSYS=m -CONFIG_SCSI_ARCMSR=m -CONFIG_MEGARAID_NEWGEN=y -CONFIG_MEGARAID_MM=m -CONFIG_MEGARAID_MAILBOX=m -CONFIG_MEGARAID_LEGACY=m -CONFIG_MEGARAID_SAS=m -CONFIG_SCSI_MPT2SAS=m -CONFIG_SCSI_MPT2SAS_MAX_SGE=128 -# CONFIG_SCSI_MPT2SAS_LOGGING is not set -CONFIG_SCSI_HPTIOP=m -CONFIG_SCSI_BUSLOGIC=m -CONFIG_LIBFC=m -CONFIG_LIBFCOE=m -CONFIG_FCOE=m -CONFIG_FCOE_FNIC=m -CONFIG_SCSI_DMX3191D=m -CONFIG_SCSI_EATA=m -# CONFIG_SCSI_EATA_TAGGED_QUEUE is not set -# CONFIG_SCSI_EATA_LINKED_COMMANDS is not set -CONFIG_SCSI_EATA_MAX_TAGS=16 -CONFIG_SCSI_FUTURE_DOMAIN=m -CONFIG_SCSI_GDTH=m -CONFIG_SCSI_IPS=m -CONFIG_SCSI_INITIO=m -CONFIG_SCSI_INIA100=m -CONFIG_SCSI_PPA=m -CONFIG_SCSI_IMM=m -# CONFIG_SCSI_IZIP_EPP16 is not set -# CONFIG_SCSI_IZIP_SLOW_CTR is not set -CONFIG_SCSI_STEX=m -CONFIG_SCSI_SYM53C8XX_2=m -CONFIG_SCSI_SYM53C8XX_DMA_ADDRESSING_MODE=1 -CONFIG_SCSI_SYM53C8XX_DEFAULT_TAGS=16 -CONFIG_SCSI_SYM53C8XX_MAX_TAGS=64 -CONFIG_SCSI_SYM53C8XX_MMIO=y -CONFIG_SCSI_IPR=m -CONFIG_SCSI_IPR_TRACE=y -# CONFIG_SCSI_IPR_DUMP is not set -CONFIG_SCSI_QLOGIC_1280=m -CONFIG_SCSI_QLA_FC=m -CONFIG_SCSI_QLA_ISCSI=m -CONFIG_SCSI_LPFC=m -# CONFIG_SCSI_LPFC_DEBUG_FS is not set -CONFIG_SCSI_DC395x=m -CONFIG_SCSI_DC390T=m -CONFIG_SCSI_DEBUG=m -# CONFIG_SCSI_PMCRAID is not set -CONFIG_SCSI_SRP=m -# CONFIG_SCSI_BFA_FC is not set -CONFIG_SCSI_LOWLEVEL_PCMCIA=y -CONFIG_PCMCIA_FDOMAIN=m -CONFIG_PCMCIA_QLOGIC=m -CONFIG_PCMCIA_SYM53C500=m -CONFIG_SCSI_DH=m -CONFIG_SCSI_DH_RDAC=m -CONFIG_SCSI_DH_HP_SW=m -CONFIG_SCSI_DH_EMC=m -CONFIG_SCSI_DH_ALUA=m -CONFIG_SCSI_OSD_INITIATOR=m -CONFIG_SCSI_OSD_ULD=m -CONFIG_SCSI_OSD_DPRINT_SENSE=1 -# CONFIG_SCSI_OSD_DEBUG is not set -CONFIG_ATA=m -# CONFIG_ATA_NONSTANDARD is not set -CONFIG_ATA_VERBOSE_ERROR=y -CONFIG_ATA_ACPI=y -CONFIG_SATA_PMP=y -CONFIG_SATA_AHCI=m -CONFIG_SATA_SIL24=m -CONFIG_ATA_SFF=y -CONFIG_SATA_SVW=m -CONFIG_ATA_PIIX=m -CONFIG_SATA_MV=m -CONFIG_SATA_NV=m -CONFIG_PDC_ADMA=m -CONFIG_SATA_QSTOR=m -CONFIG_SATA_PROMISE=m -CONFIG_SATA_SX4=m -CONFIG_SATA_SIL=m -CONFIG_SATA_SIS=m -CONFIG_SATA_ULI=m -CONFIG_SATA_VIA=m -CONFIG_SATA_VITESSE=m -CONFIG_SATA_INIC162X=m -CONFIG_PATA_ACPI=m -CONFIG_PATA_ALI=m -CONFIG_PATA_AMD=m -CONFIG_PATA_ARTOP=m -CONFIG_PATA_ATP867X=m -CONFIG_PATA_ATIIXP=m -CONFIG_PATA_CMD640_PCI=m -CONFIG_PATA_CMD64X=m -CONFIG_PATA_CS5520=m -CONFIG_PATA_CS5530=m -CONFIG_PATA_CYPRESS=m -CONFIG_PATA_EFAR=m -CONFIG_ATA_GENERIC=m -CONFIG_PATA_HPT366=m -CONFIG_PATA_HPT37X=m -CONFIG_PATA_HPT3X2N=m -CONFIG_PATA_HPT3X3=m -CONFIG_PATA_HPT3X3_DMA=y -CONFIG_PATA_IT821X=m -CONFIG_PATA_IT8213=m -CONFIG_PATA_JMICRON=m -CONFIG_PATA_TRIFLEX=m -CONFIG_PATA_MARVELL=m -CONFIG_PATA_MPIIX=m -CONFIG_PATA_OLDPIIX=m -CONFIG_PATA_NETCELL=m -CONFIG_PATA_NINJA32=m -CONFIG_PATA_NS87410=m -CONFIG_PATA_NS87415=m -CONFIG_PATA_OPTI=m -CONFIG_PATA_OPTIDMA=m -CONFIG_PATA_PCMCIA=m -CONFIG_PATA_PDC_OLD=m -CONFIG_PATA_RADISYS=m -CONFIG_PATA_RDC=m -CONFIG_PATA_RZ1000=m -CONFIG_PATA_SC1200=m -CONFIG_PATA_SERVERWORKS=m -CONFIG_PATA_PDC2027X=m -CONFIG_PATA_SIL680=m -CONFIG_PATA_SIS=m -CONFIG_PATA_VIA=m -CONFIG_PATA_WINBOND=m -CONFIG_PATA_PLATFORM=m -CONFIG_PATA_SCH=m -CONFIG_MD=y -CONFIG_BLK_DEV_MD=y -# CONFIG_MD_AUTODETECT is not set -CONFIG_MD_LINEAR=m -CONFIG_MD_RAID0=m -CONFIG_MD_RAID1=m -CONFIG_MD_RAID10=m -CONFIG_MD_RAID456=m -# CONFIG_MULTICORE_RAID456 is not set -CONFIG_MD_RAID6_PQ=m -# CONFIG_ASYNC_RAID6_TEST is not set -CONFIG_MD_MULTIPATH=m -CONFIG_MD_FAULTY=m -CONFIG_BLK_DEV_DM=m -# CONFIG_DM_DEBUG is not set -CONFIG_DM_CRYPT=m -CONFIG_DM_SNAPSHOT=m -CONFIG_DM_MIRROR=m -CONFIG_DM_LOG_USERSPACE=m -CONFIG_DM_ZERO=m -CONFIG_DM_MULTIPATH=m -CONFIG_DM_MULTIPATH_QL=m -CONFIG_DM_MULTIPATH_ST=m -CONFIG_DM_DELAY=m -# CONFIG_DM_UEVENT is not set -CONFIG_FUSION=y -CONFIG_FUSION_SPI=m -CONFIG_FUSION_FC=m -CONFIG_FUSION_SAS=m -CONFIG_FUSION_MAX_SGE=128 -CONFIG_FUSION_CTL=m -# CONFIG_FUSION_LOGGING is not set - -# -# IEEE 1394 (FireWire) support -# - -# -# You can enable one or both FireWire driver stacks. -# - -# -# See the help texts for more information. -# -CONFIG_FIREWIRE=m -CONFIG_FIREWIRE_OHCI=m -CONFIG_FIREWIRE_OHCI_DEBUG=y -CONFIG_FIREWIRE_SBP2=m -CONFIG_FIREWIRE_NET=m -CONFIG_IEEE1394=m -CONFIG_IEEE1394_OHCI1394=m -CONFIG_IEEE1394_PCILYNX=m -CONFIG_IEEE1394_SBP2=m -# CONFIG_IEEE1394_SBP2_PHYS_DMA is not set -CONFIG_IEEE1394_ETH1394_ROM_ENTRY=y -CONFIG_IEEE1394_ETH1394=m -CONFIG_IEEE1394_RAWIO=m -CONFIG_IEEE1394_VIDEO1394=m -CONFIG_IEEE1394_DV1394=m -# CONFIG_IEEE1394_VERBOSEDEBUG is not set -CONFIG_I2O=m -CONFIG_I2O_LCT_NOTIFY_ON_CHANGES=y -CONFIG_I2O_EXT_ADAPTEC=y -CONFIG_I2O_EXT_ADAPTEC_DMA64=y -CONFIG_I2O_CONFIG=m -CONFIG_I2O_CONFIG_OLD_IOCTL=y -CONFIG_I2O_BUS=m -CONFIG_I2O_BLOCK=m -CONFIG_I2O_SCSI=m -CONFIG_I2O_PROC=m -# CONFIG_MACINTOSH_DRIVERS is not set -CONFIG_NETDEVICES=y -CONFIG_IFB=m -CONFIG_DUMMY=m -CONFIG_BONDING=m -CONFIG_MACVLAN=m -CONFIG_EQUALIZER=m -CONFIG_TUN=m -CONFIG_VETH=m -CONFIG_NET_SB1000=m -CONFIG_ARCNET=m -CONFIG_ARCNET_1201=m -CONFIG_ARCNET_1051=m -CONFIG_ARCNET_RAW=m -CONFIG_ARCNET_CAP=m -CONFIG_ARCNET_COM90xx=m -CONFIG_ARCNET_COM90xxIO=m -CONFIG_ARCNET_RIM_I=m -CONFIG_ARCNET_COM20020=m -CONFIG_ARCNET_COM20020_PCI=m -CONFIG_PHYLIB=m - -# -# MII PHY device drivers -# -CONFIG_MARVELL_PHY=m -CONFIG_DAVICOM_PHY=m -CONFIG_QSEMI_PHY=m -CONFIG_LXT_PHY=m -CONFIG_CICADA_PHY=m -CONFIG_VITESSE_PHY=m -CONFIG_SMSC_PHY=m -CONFIG_BROADCOM_PHY=m -CONFIG_ICPLUS_PHY=m -CONFIG_REALTEK_PHY=m -CONFIG_NATIONAL_PHY=m -CONFIG_STE10XP=m -CONFIG_LSI_ET1011C_PHY=m -CONFIG_MDIO_BITBANG=m -CONFIG_MDIO_GPIO=m -CONFIG_NET_ETHERNET=y -CONFIG_MII=m -CONFIG_HAPPYMEAL=m -CONFIG_SUNGEM=m -CONFIG_CASSINI=m -CONFIG_NET_VENDOR_3COM=y -CONFIG_VORTEX=m -CONFIG_TYPHOON=m -CONFIG_ENC28J60=m -# CONFIG_ENC28J60_WRITEVERIFY is not set -CONFIG_ETHOC=m -CONFIG_DNET=m -CONFIG_NET_TULIP=y -CONFIG_DE2104X=m -CONFIG_DE2104X_DSL=0 -CONFIG_TULIP=m -# CONFIG_TULIP_MWI is not set -# CONFIG_TULIP_MMIO is not set -# CONFIG_TULIP_NAPI is not set -CONFIG_DE4X5=m -CONFIG_WINBOND_840=m -CONFIG_DM9102=m -CONFIG_ULI526X=m -CONFIG_PCMCIA_XIRCOM=m -CONFIG_HP100=m -# CONFIG_IBM_NEW_EMAC_ZMII is not set -# CONFIG_IBM_NEW_EMAC_RGMII is not set -# CONFIG_IBM_NEW_EMAC_TAH is not set -# CONFIG_IBM_NEW_EMAC_EMAC4 is not set -# CONFIG_IBM_NEW_EMAC_NO_FLOW_CTRL is not set -# CONFIG_IBM_NEW_EMAC_MAL_CLR_ICINTSTAT is not set -# CONFIG_IBM_NEW_EMAC_MAL_COMMON_ERR is not set -CONFIG_NET_PCI=y -CONFIG_PCNET32=m -CONFIG_AMD8111_ETH=m -CONFIG_ADAPTEC_STARFIRE=m -CONFIG_B44=m -CONFIG_B44_PCI_AUTOSELECT=y -CONFIG_B44_PCICORE_AUTOSELECT=y -CONFIG_B44_PCI=y -CONFIG_FORCEDETH=m -# CONFIG_FORCEDETH_NAPI is not set -CONFIG_E100=m -CONFIG_FEALNX=m -CONFIG_NATSEMI=m -CONFIG_NE2K_PCI=m -CONFIG_8139CP=m -CONFIG_8139TOO=m -CONFIG_8139TOO_PIO=y -# CONFIG_8139TOO_TUNE_TWISTER is not set -# CONFIG_8139TOO_8129 is not set -# CONFIG_8139_OLD_RX_RESET is not set -CONFIG_R6040=m -CONFIG_SIS900=m -CONFIG_EPIC100=m -CONFIG_SMSC9420=m -CONFIG_SUNDANCE=m -# CONFIG_SUNDANCE_MMIO is not set -CONFIG_TLAN=m -CONFIG_KS8842=m -CONFIG_KS8851=m -CONFIG_KS8851_MLL=m -CONFIG_VIA_RHINE=m -# CONFIG_VIA_RHINE_MMIO is not set -CONFIG_SC92031=m -CONFIG_NET_POCKET=y -CONFIG_ATP=m -CONFIG_DE600=m -CONFIG_DE620=m -CONFIG_ATL2=m -CONFIG_NETDEV_1000=y -CONFIG_ACENIC=m -# CONFIG_ACENIC_OMIT_TIGON_I is not set -CONFIG_DL2K=m -CONFIG_E1000=m -CONFIG_E1000E=m -CONFIG_IP1000=m -CONFIG_IGB=m -CONFIG_IGB_DCA=y -CONFIG_IGBVF=m -CONFIG_NS83820=m -CONFIG_HAMACHI=m -CONFIG_YELLOWFIN=m -CONFIG_R8169=m -CONFIG_R8169_VLAN=y -CONFIG_SIS190=m -CONFIG_SKGE=m -# CONFIG_SKGE_DEBUG is not set -CONFIG_SKY2=m -# CONFIG_SKY2_DEBUG is not set -CONFIG_VIA_VELOCITY=m -CONFIG_TIGON3=m -CONFIG_BNX2=m -CONFIG_CNIC=m -CONFIG_QLA3XXX=m -CONFIG_ATL1=m -CONFIG_ATL1E=m -CONFIG_ATL1C=m -CONFIG_JME=m -CONFIG_NETDEV_10000=y -CONFIG_MDIO=m -CONFIG_CHELSIO_T1=m -CONFIG_CHELSIO_T1_1G=y -CONFIG_CHELSIO_T3_DEPENDS=y -CONFIG_CHELSIO_T3=m -CONFIG_ENIC=m -CONFIG_IXGBE=m -CONFIG_IXGBE_DCA=y -CONFIG_IXGB=m -CONFIG_S2IO=m -CONFIG_VXGE=m -# CONFIG_VXGE_DEBUG_TRACE_ALL is not set -CONFIG_MYRI10GE=m -CONFIG_MYRI10GE_DCA=y -CONFIG_NETXEN_NIC=m -CONFIG_NIU=m -CONFIG_MLX4_EN=m -CONFIG_MLX4_CORE=m -CONFIG_MLX4_DEBUG=y -CONFIG_TEHUTI=m -CONFIG_BNX2X=m -CONFIG_QLGE=m -CONFIG_SFC=m -CONFIG_SFC_MTD=y -CONFIG_BE2NET=m -# CONFIG_TR is not set -CONFIG_WLAN=y -# CONFIG_WLAN_PRE80211 is not set -# CONFIG_WLAN_80211 is not set - -# -# WiMAX Wireless Broadband devices -# -CONFIG_WIMAX_I2400M=m -CONFIG_WIMAX_I2400M_USB=m -CONFIG_WIMAX_I2400M_SDIO=m -CONFIG_WIMAX_I2400M_DEBUG_LEVEL=8 - -# -# USB Network Adapters -# -CONFIG_USB_CATC=m -CONFIG_USB_KAWETH=m -CONFIG_USB_PEGASUS=m -CONFIG_USB_RTL8150=m -CONFIG_USB_USBNET=m -CONFIG_USB_NET_AX8817X=m -CONFIG_USB_NET_CDCETHER=m -CONFIG_USB_NET_CDC_EEM=m -CONFIG_USB_NET_DM9601=m -CONFIG_USB_NET_SMSC95XX=m -CONFIG_USB_NET_GL620A=m -CONFIG_USB_NET_NET1080=m -CONFIG_USB_NET_PLUSB=m -CONFIG_USB_NET_MCS7830=m -CONFIG_USB_NET_RNDIS_HOST=m -CONFIG_USB_NET_CDC_SUBSET=m -CONFIG_USB_ALI_M5632=y -CONFIG_USB_AN2720=y -CONFIG_USB_BELKIN=y -CONFIG_USB_ARMLINUX=y -CONFIG_USB_EPSON2888=y -CONFIG_USB_KC2190=y -CONFIG_USB_NET_ZAURUS=m -CONFIG_USB_HSO=m -CONFIG_USB_NET_INT51X1=m -CONFIG_USB_CDC_PHONET=m -CONFIG_NET_PCMCIA=y -CONFIG_PCMCIA_3C589=m -CONFIG_PCMCIA_3C574=m -CONFIG_PCMCIA_FMVJ18X=m -CONFIG_PCMCIA_PCNET=m -CONFIG_PCMCIA_NMCLAN=m -CONFIG_PCMCIA_SMC91C92=m -CONFIG_PCMCIA_XIRC2PS=m -CONFIG_PCMCIA_AXNET=m -CONFIG_ARCNET_COM20020_CS=m -CONFIG_WAN=y -CONFIG_LANMEDIA=m -CONFIG_HDLC=m -CONFIG_HDLC_RAW=m -CONFIG_HDLC_RAW_ETH=m -CONFIG_HDLC_CISCO=m -CONFIG_HDLC_FR=m -CONFIG_HDLC_PPP=m -CONFIG_HDLC_X25=m -CONFIG_PCI200SYN=m -CONFIG_WANXL=m -# CONFIG_WANXL_BUILD_FIRMWARE is not set -CONFIG_PC300TOO=m -CONFIG_FARSYNC=m -CONFIG_DSCC4=m -CONFIG_DSCC4_PCISYNC=y -CONFIG_DSCC4_PCI_RST=y -CONFIG_DLCI=m -CONFIG_DLCI_MAX=8 -CONFIG_WAN_ROUTER_DRIVERS=m -CONFIG_CYCLADES_SYNC=m -CONFIG_CYCLOMX_X25=y -CONFIG_LAPBETHER=m -CONFIG_X25_ASY=m -CONFIG_SBNI=m -CONFIG_SBNI_MULTILINE=y -CONFIG_ATM_DRIVERS=y -CONFIG_ATM_DUMMY=m -CONFIG_ATM_TCP=m -CONFIG_ATM_LANAI=m -CONFIG_ATM_ENI=m -# CONFIG_ATM_ENI_DEBUG is not set -# CONFIG_ATM_ENI_TUNE_BURST is not set -CONFIG_ATM_FIRESTREAM=m -CONFIG_ATM_ZATM=m -# CONFIG_ATM_ZATM_DEBUG is not set -CONFIG_ATM_IDT77252=m -# CONFIG_ATM_IDT77252_DEBUG is not set -# CONFIG_ATM_IDT77252_RCV_ALL is not set -CONFIG_ATM_IDT77252_USE_SUNI=y -CONFIG_ATM_AMBASSADOR=m -# CONFIG_ATM_AMBASSADOR_DEBUG is not set -CONFIG_ATM_HORIZON=m -# CONFIG_ATM_HORIZON_DEBUG is not set -CONFIG_ATM_IA=m -# CONFIG_ATM_IA_DEBUG is not set -CONFIG_ATM_FORE200E=m -CONFIG_ATM_FORE200E_USE_TASKLET=y -CONFIG_ATM_FORE200E_TX_RETRY=16 -CONFIG_ATM_FORE200E_DEBUG=0 -CONFIG_ATM_HE=m -CONFIG_ATM_HE_USE_SUNI=y -CONFIG_ATM_SOLOS=m -CONFIG_IEEE802154_DRIVERS=m -CONFIG_IEEE802154_FAKEHARD=m -CONFIG_XEN_NETDEV_FRONTEND=y -CONFIG_FDDI=y -CONFIG_DEFXX=m -# CONFIG_DEFXX_MMIO is not set -CONFIG_SKFP=m -CONFIG_HIPPI=y -CONFIG_ROADRUNNER=m -# CONFIG_ROADRUNNER_LARGE_RINGS is not set -CONFIG_PLIP=m -CONFIG_PPP=m -CONFIG_PPP_MULTILINK=y -CONFIG_PPP_FILTER=y -CONFIG_PPP_ASYNC=m -CONFIG_PPP_SYNC_TTY=m -CONFIG_PPP_DEFLATE=m -CONFIG_PPP_BSDCOMP=m -CONFIG_PPP_MPPE=m -CONFIG_PPPOE=m -CONFIG_PPPOATM=m -CONFIG_PPPOL2TP=m -CONFIG_SLIP=m -CONFIG_SLIP_COMPRESSED=y -CONFIG_SLHC=m -CONFIG_SLIP_SMART=y -CONFIG_SLIP_MODE_SLIP6=y -# CONFIG_NET_FC is not set -CONFIG_NETCONSOLE=m -CONFIG_NETCONSOLE_DYNAMIC=y -CONFIG_NETPOLL=y -# CONFIG_NETPOLL_TRAP is not set -CONFIG_NET_POLL_CONTROLLER=y -CONFIG_VIRTIO_NET=m -CONFIG_VMXNET3=m -CONFIG_ISDN=y -# CONFIG_ISDN_I4L is not set -CONFIG_MISDN=m -CONFIG_MISDN_DSP=m -CONFIG_MISDN_L1OIP=m - -# -# mISDN hardware drivers -# -CONFIG_MISDN_HFCPCI=m -CONFIG_MISDN_HFCMULTI=m -CONFIG_MISDN_HFCUSB=m -CONFIG_MISDN_AVMFRITZ=m -# CONFIG_MISDN_SPEEDFAX is not set -# CONFIG_MISDN_INFINEON is not set -# CONFIG_MISDN_W6692 is not set -# CONFIG_MISDN_NETJET is not set -CONFIG_MISDN_IPAC=m -CONFIG_ISDN_CAPI=m -# CONFIG_ISDN_DRV_AVMB1_VERBOSE_REASON is not set -# CONFIG_CAPI_TRACE is not set -CONFIG_ISDN_CAPI_MIDDLEWARE=y -CONFIG_ISDN_CAPI_CAPI20=m -CONFIG_ISDN_CAPI_CAPIFS_BOOL=y -CONFIG_ISDN_CAPI_CAPIFS=m - -# -# CAPI hardware drivers -# -CONFIG_CAPI_AVM=y -CONFIG_ISDN_DRV_AVMB1_B1PCI=m -CONFIG_ISDN_DRV_AVMB1_B1PCIV4=y -CONFIG_ISDN_DRV_AVMB1_B1PCMCIA=m -CONFIG_ISDN_DRV_AVMB1_AVM_CS=m -CONFIG_ISDN_DRV_AVMB1_T1PCI=m -CONFIG_ISDN_DRV_AVMB1_C4=m -CONFIG_CAPI_EICON=y -CONFIG_ISDN_DIVAS=m -CONFIG_ISDN_DIVAS_BRIPCI=y -CONFIG_ISDN_DIVAS_PRIPCI=y -CONFIG_ISDN_DIVAS_DIVACAPI=m -CONFIG_ISDN_DIVAS_USERIDI=m -CONFIG_ISDN_DIVAS_MAINT=m -# CONFIG_PHONE is not set - -# -# Input device support -# -CONFIG_INPUT=y -CONFIG_INPUT_FF_MEMLESS=m -CONFIG_INPUT_POLLDEV=m - -# -# Userland interfaces -# -CONFIG_INPUT_MOUSEDEV=m -CONFIG_INPUT_MOUSEDEV_PSAUX=y -CONFIG_INPUT_MOUSEDEV_SCREEN_X=1024 -CONFIG_INPUT_MOUSEDEV_SCREEN_Y=768 -CONFIG_INPUT_JOYDEV=m -CONFIG_INPUT_EVDEV=m -CONFIG_INPUT_EVBUG=m -CONFIG_XEN_KBDDEV_FRONTEND=m - -# -# Input Device Drivers -# -CONFIG_INPUT_KEYBOARD=y -# CONFIG_KEYBOARD_ADP5588 is not set -CONFIG_KEYBOARD_ATKBD=y -# CONFIG_QT2160 is not set -CONFIG_KEYBOARD_LKKBD=m -CONFIG_KEYBOARD_GPIO=m -CONFIG_KEYBOARD_MATRIX=m -CONFIG_KEYBOARD_LM8323=m -# CONFIG_KEYBOARD_MAX7359 is not set -CONFIG_KEYBOARD_NEWTON=m -# CONFIG_KEYBOARD_OPENCORES is not set -CONFIG_KEYBOARD_STOWAWAY=m -CONFIG_KEYBOARD_SUNKBD=m -CONFIG_KEYBOARD_XTKBD=m -CONFIG_INPUT_MOUSE=y -CONFIG_MOUSE_PS2=m -CONFIG_MOUSE_PS2_ALPS=y -CONFIG_MOUSE_PS2_LOGIPS2PP=y -CONFIG_MOUSE_PS2_SYNAPTICS=y -CONFIG_MOUSE_PS2_LIFEBOOK=y -CONFIG_MOUSE_PS2_TRACKPOINT=y -# CONFIG_MOUSE_PS2_ELANTECH is not set -# CONFIG_MOUSE_PS2_SENTELIC is not set -# CONFIG_MOUSE_PS2_TOUCHKIT is not set -CONFIG_MOUSE_SERIAL=m -CONFIG_MOUSE_APPLETOUCH=m -CONFIG_MOUSE_BCM5974=m -CONFIG_MOUSE_VSXXXAA=m -CONFIG_MOUSE_GPIO=m -CONFIG_MOUSE_SYNAPTICS_I2C=m -# CONFIG_INPUT_JOYSTICK is not set -# CONFIG_INPUT_TABLET is not set -CONFIG_INPUT_TOUCHSCREEN=y -CONFIG_TOUCHSCREEN_ADS7846=m -CONFIG_TOUCHSCREEN_AD7877=m -CONFIG_TOUCHSCREEN_AD7879_I2C=m -CONFIG_TOUCHSCREEN_AD7879=m -CONFIG_TOUCHSCREEN_EETI=m -CONFIG_TOUCHSCREEN_FUJITSU=m -CONFIG_TOUCHSCREEN_GUNZE=m -CONFIG_TOUCHSCREEN_ELO=m -CONFIG_TOUCHSCREEN_WACOM_W8001=m -# CONFIG_TOUCHSCREEN_MCS5000 is not set -CONFIG_TOUCHSCREEN_MTOUCH=m -CONFIG_TOUCHSCREEN_INEXIO=m -CONFIG_TOUCHSCREEN_MK712=m -CONFIG_TOUCHSCREEN_PENMOUNT=m -CONFIG_TOUCHSCREEN_TOUCHRIGHT=m -CONFIG_TOUCHSCREEN_TOUCHWIN=m -CONFIG_TOUCHSCREEN_UCB1400=m -CONFIG_TOUCHSCREEN_WM97XX=m -CONFIG_TOUCHSCREEN_WM9705=y -CONFIG_TOUCHSCREEN_WM9712=y -CONFIG_TOUCHSCREEN_WM9713=y -CONFIG_TOUCHSCREEN_USB_COMPOSITE=m -CONFIG_TOUCHSCREEN_USB_EGALAX=y -CONFIG_TOUCHSCREEN_USB_PANJIT=y -CONFIG_TOUCHSCREEN_USB_3M=y -CONFIG_TOUCHSCREEN_USB_ITM=y -CONFIG_TOUCHSCREEN_USB_ETURBO=y -CONFIG_TOUCHSCREEN_USB_GUNZE=y -CONFIG_TOUCHSCREEN_USB_DMC_TSC10=y -CONFIG_TOUCHSCREEN_USB_IRTOUCH=y -CONFIG_TOUCHSCREEN_USB_IDEALTEK=y -CONFIG_TOUCHSCREEN_USB_GENERAL_TOUCH=y -CONFIG_TOUCHSCREEN_USB_GOTOP=y -CONFIG_TOUCHSCREEN_USB_JASTEC=y -CONFIG_TOUCHSCREEN_USB_E2I=y -CONFIG_TOUCHSCREEN_TOUCHIT213=m -CONFIG_TOUCHSCREEN_TSC2007=m -CONFIG_INPUT_MISC=y -CONFIG_INPUT_PCSPKR=m -CONFIG_INPUT_APANEL=m -CONFIG_INPUT_ATLAS_BTNS=m -CONFIG_INPUT_ATI_REMOTE=m -CONFIG_INPUT_ATI_REMOTE2=m -CONFIG_INPUT_KEYSPAN_REMOTE=m -CONFIG_INPUT_POWERMATE=m -CONFIG_INPUT_YEALINK=m -CONFIG_INPUT_CM109=m -CONFIG_INPUT_UINPUT=m -CONFIG_INPUT_WINBOND_CIR=m -CONFIG_INPUT_PCF50633_PMU=m -CONFIG_INPUT_GPIO_ROTARY_ENCODER=m - -# -# Hardware I/O ports -# -CONFIG_SERIO=y -CONFIG_SERIO_I8042=y -CONFIG_SERIO_SERPORT=m -CONFIG_SERIO_CT82C710=m -CONFIG_SERIO_PARKBD=m -CONFIG_SERIO_PCIPS2=m -CONFIG_SERIO_LIBPS2=y -CONFIG_SERIO_RAW=m -# CONFIG_GAMEPORT is not set - -# -# Character devices -# -CONFIG_VT=y -CONFIG_CONSOLE_TRANSLATIONS=y -CONFIG_VT_CONSOLE=y -CONFIG_HW_CONSOLE=y -# CONFIG_VT_HW_CONSOLE_BINDING is not set -# CONFIG_DEVKMEM is not set -CONFIG_SERIAL_NONSTANDARD=y -CONFIG_COMPUTONE=m -CONFIG_ROCKETPORT=m -CONFIG_CYCLADES=m -# CONFIG_CYZ_INTR is not set -CONFIG_DIGIEPCA=m -CONFIG_MOXA_INTELLIO=m -CONFIG_MOXA_SMARTIO=m -CONFIG_ISI=m -CONFIG_SYNCLINK=m -CONFIG_SYNCLINKMP=m -CONFIG_SYNCLINK_GT=m -CONFIG_N_HDLC=m -CONFIG_RISCOM8=m -CONFIG_SPECIALIX=m -CONFIG_STALDRV=y -CONFIG_STALLION=m -CONFIG_ISTALLION=m -CONFIG_NOZOMI=m - -# -# Serial drivers -# -CONFIG_SERIAL_8250=y -CONFIG_SERIAL_8250_CONSOLE=y -CONFIG_FIX_EARLYCON_MEM=y -CONFIG_SERIAL_8250_PCI=y -CONFIG_SERIAL_8250_PNP=y -CONFIG_SERIAL_8250_CS=m -CONFIG_SERIAL_8250_NR_UARTS=16 -CONFIG_SERIAL_8250_RUNTIME_UARTS=4 -CONFIG_SERIAL_8250_EXTENDED=y -CONFIG_SERIAL_8250_MANY_PORTS=y -CONFIG_SERIAL_8250_SHARE_IRQ=y -# CONFIG_SERIAL_8250_DETECT_IRQ is not set -CONFIG_SERIAL_8250_RSA=y - -# -# Non-8250 serial port support -# -CONFIG_SERIAL_MAX3100=m -CONFIG_SERIAL_CORE=y -CONFIG_SERIAL_CORE_CONSOLE=y -CONFIG_SERIAL_JSM=m -CONFIG_UNIX98_PTYS=y -# CONFIG_DEVPTS_MULTIPLE_INSTANCES is not set -# CONFIG_LEGACY_PTYS is not set -CONFIG_PRINTER=m -# CONFIG_LP_CONSOLE is not set -CONFIG_PPDEV=m -CONFIG_HVC_DRIVER=y -CONFIG_HVC_IRQ=y -CONFIG_HVC_XEN=y -CONFIG_VIRTIO_CONSOLE=m -CONFIG_IPMI_HANDLER=m -# CONFIG_IPMI_PANIC_EVENT is not set -CONFIG_IPMI_DEVICE_INTERFACE=m -CONFIG_IPMI_SI=m -CONFIG_IPMI_WATCHDOG=m -CONFIG_IPMI_POWEROFF=m -CONFIG_HW_RANDOM=m -CONFIG_HW_RANDOM_TIMERIOMEM=m -CONFIG_HW_RANDOM_INTEL=m -CONFIG_HW_RANDOM_AMD=m -CONFIG_HW_RANDOM_VIA=m -CONFIG_HW_RANDOM_VIRTIO=m -CONFIG_NVRAM=m -CONFIG_R3964=m -CONFIG_APPLICOM=m - -# -# PCMCIA character devices -# -CONFIG_SYNCLINK_CS=m -CONFIG_CARDMAN_4000=m -CONFIG_CARDMAN_4040=m -CONFIG_IPWIRELESS=m -CONFIG_MWAVE=m -CONFIG_PC8736x_GPIO=m -CONFIG_NSC_GPIO=m -CONFIG_RAW_DRIVER=m -CONFIG_MAX_RAW_DEVS=256 -CONFIG_HPET=y -CONFIG_HPET_MMAP=y -CONFIG_HANGCHECK_TIMER=m -CONFIG_TCG_TPM=m -CONFIG_TCG_TIS=m -CONFIG_TCG_NSC=m -CONFIG_TCG_ATMEL=m -CONFIG_TCG_INFINEON=m -CONFIG_TELCLOCK=m -CONFIG_DEVPORT=y -CONFIG_I2C=m -CONFIG_I2C_BOARDINFO=y -CONFIG_I2C_COMPAT=y -CONFIG_I2C_CHARDEV=m -CONFIG_I2C_HELPER_AUTO=y -CONFIG_I2C_ALGOBIT=m -CONFIG_I2C_ALGOPCA=m - -# -# I2C Hardware Bus support -# - -# -# PC SMBus host controller drivers -# -CONFIG_I2C_ALI1535=m -CONFIG_I2C_ALI1563=m -CONFIG_I2C_ALI15X3=m -CONFIG_I2C_AMD756=m -CONFIG_I2C_AMD756_S4882=m -CONFIG_I2C_AMD8111=m -CONFIG_I2C_I801=m -CONFIG_I2C_ISCH=m -CONFIG_I2C_PIIX4=m -CONFIG_I2C_NFORCE2=m -CONFIG_I2C_NFORCE2_S4985=m -CONFIG_I2C_SIS5595=m -CONFIG_I2C_SIS630=m -CONFIG_I2C_SIS96X=m -CONFIG_I2C_VIA=m -CONFIG_I2C_VIAPRO=m - -# -# ACPI drivers -# -CONFIG_I2C_SCMI=m - -# -# I2C system bus drivers (mostly embedded / system-on-chip) -# -CONFIG_I2C_GPIO=m -CONFIG_I2C_OCORES=m -CONFIG_I2C_SIMTEC=m - -# -# External I2C/SMBus adapter drivers -# -CONFIG_I2C_PARPORT=m -CONFIG_I2C_PARPORT_LIGHT=m -CONFIG_I2C_TAOS_EVM=m -CONFIG_I2C_TINY_USB=m - -# -# Graphics adapter I2C/DDC channel drivers -# -# CONFIG_I2C_VOODOO3 is not set - -# -# Other I2C/SMBus bus drivers -# -CONFIG_I2C_PCA_PLATFORM=m -CONFIG_I2C_STUB=m - -# -# Miscellaneous I2C Chip support -# -CONFIG_DS1682=m -CONFIG_SENSORS_TSL2550=m -# CONFIG_I2C_DEBUG_CORE is not set -# CONFIG_I2C_DEBUG_ALGO is not set -# CONFIG_I2C_DEBUG_BUS is not set -# CONFIG_I2C_DEBUG_CHIP is not set -CONFIG_SPI=y -CONFIG_SPI_MASTER=y - -# -# SPI Master Controller Drivers -# -CONFIG_SPI_BITBANG=m -CONFIG_SPI_BUTTERFLY=m -CONFIG_SPI_GPIO=m -CONFIG_SPI_LM70_LLP=m - -# -# SPI Protocol Masters -# -CONFIG_SPI_SPIDEV=m -CONFIG_SPI_TLE62X0=m - -# -# PPS support -# -# CONFIG_PPS is not set -CONFIG_ARCH_WANT_OPTIONAL_GPIOLIB=y -CONFIG_GPIOLIB=y -# CONFIG_GPIO_SYSFS is not set - -# -# Memory mapped GPIO expanders: -# - -# -# I2C GPIO expanders: -# -CONFIG_GPIO_MAX732X=m -CONFIG_GPIO_PCA953X=m -CONFIG_GPIO_PCF857X=m - -# -# PCI GPIO expanders: -# -# CONFIG_GPIO_LANGWELL is not set - -# -# SPI GPIO expanders: -# -CONFIG_GPIO_MAX7301=m -CONFIG_GPIO_MCP23S08=m -# CONFIG_GPIO_MC33880 is not set - -# -# AC97 GPIO expanders: -# -# CONFIG_GPIO_UCB1400 is not set -CONFIG_W1=m -CONFIG_W1_CON=y - -# -# 1-wire Bus Masters -# -CONFIG_W1_MASTER_MATROX=m -CONFIG_W1_MASTER_DS2490=m -CONFIG_W1_MASTER_DS2482=m -CONFIG_W1_MASTER_GPIO=m - -# -# 1-wire Slaves -# -CONFIG_W1_SLAVE_THERM=m -CONFIG_W1_SLAVE_SMEM=m -CONFIG_W1_SLAVE_DS2431=m -CONFIG_W1_SLAVE_DS2433=m -# CONFIG_W1_SLAVE_DS2433_CRC is not set -CONFIG_W1_SLAVE_DS2760=m -CONFIG_W1_SLAVE_BQ27000=m -CONFIG_POWER_SUPPLY=y -# CONFIG_POWER_SUPPLY_DEBUG is not set -CONFIG_PDA_POWER=m -CONFIG_BATTERY_DS2760=m -CONFIG_BATTERY_DS2782=m -CONFIG_BATTERY_BQ27x00=m -CONFIG_BATTERY_MAX17040=m -CONFIG_CHARGER_PCF50633=m -CONFIG_HWMON=m -CONFIG_HWMON_VID=m -# CONFIG_HWMON_DEBUG_CHIP is not set - -# -# Native drivers -# -CONFIG_SENSORS_ABITUGURU=m -CONFIG_SENSORS_ABITUGURU3=m -CONFIG_SENSORS_AD7414=m -CONFIG_SENSORS_AD7418=m -CONFIG_SENSORS_ADCXX=m -CONFIG_SENSORS_ADM1021=m -CONFIG_SENSORS_ADM1025=m -CONFIG_SENSORS_ADM1026=m -CONFIG_SENSORS_ADM1029=m -CONFIG_SENSORS_ADM1031=m -CONFIG_SENSORS_ADM9240=m -CONFIG_SENSORS_ADT7462=m -CONFIG_SENSORS_ADT7470=m -# CONFIG_SENSORS_ADT7473 is not set -CONFIG_SENSORS_ADT7475=m -CONFIG_SENSORS_K8TEMP=m -CONFIG_SENSORS_ASB100=m -CONFIG_SENSORS_ATXP1=m -CONFIG_SENSORS_DS1621=m -CONFIG_SENSORS_I5K_AMB=m -CONFIG_SENSORS_F71805F=m -CONFIG_SENSORS_F71882FG=m -CONFIG_SENSORS_F75375S=m -CONFIG_SENSORS_FSCHMD=m -CONFIG_SENSORS_G760A=m -CONFIG_SENSORS_GL518SM=m -CONFIG_SENSORS_GL520SM=m -CONFIG_SENSORS_CORETEMP=m -CONFIG_SENSORS_IBMAEM=m -CONFIG_SENSORS_IBMPEX=m -CONFIG_SENSORS_IT87=m -CONFIG_SENSORS_LM63=m -CONFIG_SENSORS_LM70=m -CONFIG_SENSORS_LM75=m -CONFIG_SENSORS_LM77=m -CONFIG_SENSORS_LM78=m -CONFIG_SENSORS_LM80=m -CONFIG_SENSORS_LM83=m -CONFIG_SENSORS_LM85=m -CONFIG_SENSORS_LM87=m -CONFIG_SENSORS_LM90=m -CONFIG_SENSORS_LM92=m -CONFIG_SENSORS_LM93=m -CONFIG_SENSORS_LTC4215=m -CONFIG_SENSORS_LTC4245=m -CONFIG_SENSORS_LM95241=m -CONFIG_SENSORS_MAX1111=m -CONFIG_SENSORS_MAX1619=m -CONFIG_SENSORS_MAX6650=m -CONFIG_SENSORS_PC87360=m -CONFIG_SENSORS_PC87427=m -CONFIG_SENSORS_PCF8591=m -CONFIG_SENSORS_SHT15=m -CONFIG_SENSORS_SIS5595=m -CONFIG_SENSORS_DME1737=m -CONFIG_SENSORS_SMSC47M1=m -CONFIG_SENSORS_SMSC47M192=m -CONFIG_SENSORS_SMSC47B397=m -CONFIG_SENSORS_ADS7828=m -CONFIG_SENSORS_THMC50=m -CONFIG_SENSORS_TMP401=m -CONFIG_SENSORS_TMP421=m -CONFIG_SENSORS_VIA686A=m -CONFIG_SENSORS_VT1211=m -CONFIG_SENSORS_VT8231=m -CONFIG_SENSORS_W83781D=m -CONFIG_SENSORS_W83791D=m -CONFIG_SENSORS_W83792D=m -CONFIG_SENSORS_W83793=m -CONFIG_SENSORS_W83L785TS=m -CONFIG_SENSORS_W83L786NG=m -CONFIG_SENSORS_W83627HF=m -CONFIG_SENSORS_W83627EHF=m -CONFIG_SENSORS_HDAPS=m -CONFIG_SENSORS_APPLESMC=m - -# -# ACPI drivers -# -CONFIG_SENSORS_ATK0110=m -CONFIG_SENSORS_LIS3LV02D=m -CONFIG_THERMAL=y -CONFIG_WATCHDOG=y -# CONFIG_WATCHDOG_NOWAYOUT is not set - -# -# Watchdog Device Drivers -# -CONFIG_SOFT_WATCHDOG=m -CONFIG_ACQUIRE_WDT=m -CONFIG_ADVANTECH_WDT=m -CONFIG_ALIM1535_WDT=m -CONFIG_ALIM7101_WDT=m -CONFIG_SC520_WDT=m -# CONFIG_SBC_FITPC2_WATCHDOG is not set -CONFIG_EUROTECH_WDT=m -CONFIG_IB700_WDT=m -CONFIG_IBMASR=m -CONFIG_WAFER_WDT=m -CONFIG_I6300ESB_WDT=m -CONFIG_ITCO_WDT=m -CONFIG_ITCO_VENDOR_SUPPORT=y -CONFIG_IT8712F_WDT=m -CONFIG_IT87_WDT=m -# CONFIG_HP_WATCHDOG is not set -CONFIG_SC1200_WDT=m -CONFIG_PC87413_WDT=m -CONFIG_60XX_WDT=m -CONFIG_SBC8360_WDT=m -CONFIG_CPU5_WDT=m -CONFIG_SMSC_SCH311X_WDT=m -CONFIG_SMSC37B787_WDT=m -CONFIG_W83627HF_WDT=m -CONFIG_W83697HF_WDT=m -CONFIG_W83697UG_WDT=m -CONFIG_W83877F_WDT=m -CONFIG_W83977F_WDT=m -CONFIG_MACHZ_WDT=m -CONFIG_SBC_EPX_C3_WATCHDOG=m -# CONFIG_XEN_WDT is not set - -# -# PCI-based Watchdog Cards -# -CONFIG_PCIPCWATCHDOG=m -CONFIG_WDTPCI=m - -# -# USB-based Watchdog Cards -# -CONFIG_USBPCWATCHDOG=m -CONFIG_SSB_POSSIBLE=y - -# -# Sonics Silicon Backplane -# -CONFIG_SSB=m -CONFIG_SSB_SPROM=y -CONFIG_SSB_PCIHOST_POSSIBLE=y -CONFIG_SSB_PCIHOST=y -# CONFIG_SSB_B43_PCI_BRIDGE is not set -CONFIG_SSB_PCMCIAHOST_POSSIBLE=y -CONFIG_SSB_PCMCIAHOST=y -CONFIG_SSB_SDIOHOST_POSSIBLE=y -CONFIG_SSB_SDIOHOST=y -# CONFIG_SSB_SILENT is not set -# CONFIG_SSB_DEBUG is not set -CONFIG_SSB_DRIVER_PCICORE_POSSIBLE=y -CONFIG_SSB_DRIVER_PCICORE=y - -# -# Multifunction device drivers -# -CONFIG_MFD_CORE=m -CONFIG_MFD_SM501=m -# CONFIG_MFD_SM501_GPIO is not set -CONFIG_HTC_PASIC3=m -CONFIG_UCB1400_CORE=m -CONFIG_TPS65010=m -# CONFIG_MFD_TMIO is not set -CONFIG_MFD_WM8400=m -# CONFIG_MFD_WM831X is not set -# CONFIG_MFD_WM8350_I2C is not set -CONFIG_MFD_PCF50633=m -# CONFIG_MFD_MC13783 is not set -CONFIG_PCF50633_ADC=m -CONFIG_PCF50633_GPIO=m -# CONFIG_AB3100_CORE is not set -# CONFIG_EZX_PCAP is not set -CONFIG_REGULATOR=y -# CONFIG_REGULATOR_DEBUG is not set -# CONFIG_REGULATOR_FIXED_VOLTAGE is not set -CONFIG_REGULATOR_VIRTUAL_CONSUMER=m -CONFIG_REGULATOR_USERSPACE_CONSUMER=m -CONFIG_REGULATOR_BQ24022=m -CONFIG_REGULATOR_MAX1586=m -CONFIG_REGULATOR_WM8400=m -CONFIG_REGULATOR_PCF50633=m -CONFIG_REGULATOR_LP3971=m -# CONFIG_REGULATOR_TPS65023 is not set -# CONFIG_REGULATOR_TPS6507X is not set -CONFIG_MEDIA_SUPPORT=m - -# -# Multimedia core support -# -CONFIG_VIDEO_DEV=m -CONFIG_VIDEO_V4L2_COMMON=m -# CONFIG_VIDEO_ALLOW_V4L1 is not set -CONFIG_VIDEO_V4L1_COMPAT=y -CONFIG_DVB_CORE=m -CONFIG_VIDEO_MEDIA=m - -# -# Multimedia drivers -# -CONFIG_VIDEO_SAA7146=m -CONFIG_VIDEO_SAA7146_VV=m -# CONFIG_MEDIA_ATTACH is not set -CONFIG_MEDIA_TUNER=m -# CONFIG_MEDIA_TUNER_CUSTOMISE is not set -CONFIG_MEDIA_TUNER_SIMPLE=m -CONFIG_MEDIA_TUNER_TDA8290=m -CONFIG_MEDIA_TUNER_TDA827X=m -CONFIG_MEDIA_TUNER_TDA18271=m -CONFIG_MEDIA_TUNER_TDA9887=m -CONFIG_MEDIA_TUNER_TEA5761=m -CONFIG_MEDIA_TUNER_TEA5767=m -CONFIG_MEDIA_TUNER_MT20XX=m -CONFIG_MEDIA_TUNER_MT2060=m -CONFIG_MEDIA_TUNER_MT2266=m -CONFIG_MEDIA_TUNER_MT2131=m -CONFIG_MEDIA_TUNER_QT1010=m -CONFIG_MEDIA_TUNER_XC2028=m -CONFIG_MEDIA_TUNER_XC5000=m -CONFIG_MEDIA_TUNER_MXL5005S=m -CONFIG_MEDIA_TUNER_MXL5007T=m -CONFIG_MEDIA_TUNER_MC44S803=m -CONFIG_VIDEO_V4L2=m -CONFIG_VIDEOBUF_GEN=m -CONFIG_VIDEOBUF_DMA_SG=m -CONFIG_VIDEOBUF_VMALLOC=m -CONFIG_VIDEOBUF_DVB=m -CONFIG_VIDEO_BTCX=m -CONFIG_VIDEO_IR=m -CONFIG_VIDEO_TVEEPROM=m -CONFIG_VIDEO_TUNER=m -CONFIG_VIDEO_CAPTURE_DRIVERS=y -# CONFIG_VIDEO_ADV_DEBUG is not set -# CONFIG_VIDEO_FIXED_MINOR_RANGES is not set -# CONFIG_VIDEO_HELPER_CHIPS_AUTO is not set -CONFIG_VIDEO_IR_I2C=m - -# -# Encoders/decoders and other helper chips -# - -# -# Audio decoders -# -CONFIG_VIDEO_TVAUDIO=m -CONFIG_VIDEO_TDA7432=m -CONFIG_VIDEO_TDA9840=m -CONFIG_VIDEO_TDA9875=m -CONFIG_VIDEO_TEA6415C=m -CONFIG_VIDEO_TEA6420=m -CONFIG_VIDEO_MSP3400=m -CONFIG_VIDEO_CS5345=m -CONFIG_VIDEO_CS53L32A=m -CONFIG_VIDEO_M52790=m -CONFIG_VIDEO_TLV320AIC23B=m -CONFIG_VIDEO_WM8775=m -CONFIG_VIDEO_WM8739=m -CONFIG_VIDEO_VP27SMPX=m - -# -# RDS decoders -# -CONFIG_VIDEO_SAA6588=m - -# -# Video decoders -# -# CONFIG_VIDEO_ADV7180 is not set -CONFIG_VIDEO_BT819=m -CONFIG_VIDEO_BT856=m -CONFIG_VIDEO_BT866=m -CONFIG_VIDEO_KS0127=m -CONFIG_VIDEO_OV7670=m -CONFIG_VIDEO_MT9V011=m -CONFIG_VIDEO_TCM825X=m -CONFIG_VIDEO_SAA7110=m -CONFIG_VIDEO_SAA711X=m -CONFIG_VIDEO_SAA717X=m -CONFIG_VIDEO_TVP514X=m -CONFIG_VIDEO_TVP5150=m -CONFIG_VIDEO_VPX3220=m - -# -# Video and audio decoders -# -CONFIG_VIDEO_CX25840=m - -# -# MPEG video encoders -# -CONFIG_VIDEO_CX2341X=m - -# -# Video encoders -# -CONFIG_VIDEO_SAA7127=m -CONFIG_VIDEO_SAA7185=m -CONFIG_VIDEO_ADV7170=m -CONFIG_VIDEO_ADV7175=m -CONFIG_VIDEO_THS7303=m -CONFIG_VIDEO_ADV7343=m - -# -# Video improvement chips -# -CONFIG_VIDEO_UPD64031A=m -CONFIG_VIDEO_UPD64083=m -# CONFIG_VIDEO_VIVI is not set -CONFIG_VIDEO_BT848=m -CONFIG_VIDEO_BT848_DVB=y -CONFIG_VIDEO_SAA5246A=m -CONFIG_VIDEO_SAA5249=m -CONFIG_VIDEO_ZORAN=m -CONFIG_VIDEO_ZORAN_DC30=m -CONFIG_VIDEO_ZORAN_ZR36060=m -CONFIG_VIDEO_ZORAN_BUZ=m -CONFIG_VIDEO_ZORAN_DC10=m -CONFIG_VIDEO_ZORAN_LML33=m -CONFIG_VIDEO_ZORAN_LML33R10=m -CONFIG_VIDEO_ZORAN_AVS6EYES=m -CONFIG_VIDEO_SAA7134=m -CONFIG_VIDEO_SAA7134_ALSA=m -CONFIG_VIDEO_SAA7134_DVB=m -CONFIG_VIDEO_HEXIUM_ORION=m -CONFIG_VIDEO_HEXIUM_GEMINI=m -CONFIG_VIDEO_CX88=m -CONFIG_VIDEO_CX88_ALSA=m -CONFIG_VIDEO_CX88_BLACKBIRD=m -CONFIG_VIDEO_CX88_DVB=m -CONFIG_VIDEO_CX88_MPEG=m -CONFIG_VIDEO_CX88_VP3054=m -CONFIG_VIDEO_CX23885=m -CONFIG_VIDEO_AU0828=m -CONFIG_VIDEO_IVTV=m -CONFIG_VIDEO_FB_IVTV=m -CONFIG_VIDEO_CX18=m -CONFIG_VIDEO_SAA7164=m -CONFIG_VIDEO_CAFE_CCIC=m -CONFIG_SOC_CAMERA=m -CONFIG_SOC_CAMERA_MT9M001=m -CONFIG_SOC_CAMERA_MT9M111=m -CONFIG_SOC_CAMERA_MT9T031=m -CONFIG_SOC_CAMERA_MT9V022=m -CONFIG_SOC_CAMERA_TW9910=m -CONFIG_SOC_CAMERA_PLATFORM=m -CONFIG_SOC_CAMERA_OV772X=m -CONFIG_V4L_USB_DRIVERS=y -CONFIG_USB_VIDEO_CLASS=m -CONFIG_USB_VIDEO_CLASS_INPUT_EVDEV=y -CONFIG_USB_GSPCA=m -CONFIG_USB_M5602=m -CONFIG_USB_STV06XX=m -CONFIG_USB_GL860=m -CONFIG_USB_GSPCA_CONEX=m -CONFIG_USB_GSPCA_ETOMS=m -CONFIG_USB_GSPCA_FINEPIX=m -CONFIG_USB_GSPCA_JEILINJ=m -CONFIG_USB_GSPCA_MARS=m -CONFIG_USB_GSPCA_MR97310A=m -CONFIG_USB_GSPCA_OV519=m -CONFIG_USB_GSPCA_OV534=m -CONFIG_USB_GSPCA_PAC207=m -CONFIG_USB_GSPCA_PAC7311=m -CONFIG_USB_GSPCA_SN9C20X=m -# CONFIG_USB_GSPCA_SN9C20X_EVDEV is not set -CONFIG_USB_GSPCA_SONIXB=m -CONFIG_USB_GSPCA_SONIXJ=m -CONFIG_USB_GSPCA_SPCA500=m -CONFIG_USB_GSPCA_SPCA501=m -CONFIG_USB_GSPCA_SPCA505=m -CONFIG_USB_GSPCA_SPCA506=m -CONFIG_USB_GSPCA_SPCA508=m -CONFIG_USB_GSPCA_SPCA561=m -CONFIG_USB_GSPCA_SQ905=m -CONFIG_USB_GSPCA_SQ905C=m -CONFIG_USB_GSPCA_STK014=m -CONFIG_USB_GSPCA_SUNPLUS=m -CONFIG_USB_GSPCA_T613=m -CONFIG_USB_GSPCA_TV8532=m -CONFIG_USB_GSPCA_VC032X=m -CONFIG_USB_GSPCA_ZC3XX=m -CONFIG_VIDEO_PVRUSB2=m -CONFIG_VIDEO_PVRUSB2_SYSFS=y -CONFIG_VIDEO_PVRUSB2_DVB=y -# CONFIG_VIDEO_PVRUSB2_DEBUGIFC is not set -CONFIG_VIDEO_HDPVR=m -CONFIG_VIDEO_EM28XX=m -CONFIG_VIDEO_EM28XX_ALSA=m -CONFIG_VIDEO_EM28XX_DVB=m -CONFIG_VIDEO_CX231XX=m -CONFIG_VIDEO_CX231XX_ALSA=m -CONFIG_VIDEO_CX231XX_DVB=m -CONFIG_VIDEO_USBVISION=m -CONFIG_USB_ET61X251=m -CONFIG_USB_SN9C102=m -CONFIG_USB_ZC0301=m -CONFIG_USB_PWC_INPUT_EVDEV=y -CONFIG_USB_ZR364XX=m -CONFIG_USB_STKWEBCAM=m -CONFIG_USB_S2255=m -CONFIG_RADIO_ADAPTERS=y -CONFIG_RADIO_GEMTEK_PCI=m -CONFIG_RADIO_MAXIRADIO=m -CONFIG_RADIO_MAESTRO=m -# CONFIG_I2C_SI4713 is not set -# CONFIG_RADIO_SI4713 is not set -CONFIG_USB_DSBR=m -# CONFIG_RADIO_SI470X is not set -CONFIG_USB_MR800=m -CONFIG_RADIO_TEA5764=m -CONFIG_DVB_MAX_ADAPTERS=8 -# CONFIG_DVB_DYNAMIC_MINORS is not set -CONFIG_DVB_CAPTURE_DRIVERS=y - -# -# Supported SAA7146 based PCI Adapters -# -CONFIG_TTPCI_EEPROM=m -CONFIG_DVB_AV7110=m -CONFIG_DVB_AV7110_OSD=y -CONFIG_DVB_BUDGET_CORE=m -CONFIG_DVB_BUDGET=m -CONFIG_DVB_BUDGET_CI=m -CONFIG_DVB_BUDGET_AV=m -CONFIG_DVB_BUDGET_PATCH=m - -# -# Supported USB Adapters -# -CONFIG_DVB_USB=m -# CONFIG_DVB_USB_DEBUG is not set -CONFIG_DVB_USB_A800=m -CONFIG_DVB_USB_DIBUSB_MB=m -# CONFIG_DVB_USB_DIBUSB_MB_FAULTY is not set -CONFIG_DVB_USB_DIBUSB_MC=m -CONFIG_DVB_USB_DIB0700=m -CONFIG_DVB_USB_UMT_010=m -CONFIG_DVB_USB_CXUSB=m -CONFIG_DVB_USB_M920X=m -CONFIG_DVB_USB_GL861=m -CONFIG_DVB_USB_AU6610=m -CONFIG_DVB_USB_DIGITV=m -CONFIG_DVB_USB_VP7045=m -CONFIG_DVB_USB_VP702X=m -CONFIG_DVB_USB_GP8PSK=m -CONFIG_DVB_USB_NOVA_T_USB2=m -CONFIG_DVB_USB_TTUSB2=m -CONFIG_DVB_USB_DTT200U=m -CONFIG_DVB_USB_OPERA1=m -CONFIG_DVB_USB_AF9005=m -CONFIG_DVB_USB_AF9005_REMOTE=m -CONFIG_DVB_USB_DW2102=m -CONFIG_DVB_USB_CINERGY_T2=m -CONFIG_DVB_USB_ANYSEE=m -CONFIG_DVB_USB_DTV5100=m -CONFIG_DVB_USB_AF9015=m -CONFIG_DVB_USB_CE6230=m -# CONFIG_DVB_USB_FRIIO is not set -CONFIG_DVB_TTUSB_BUDGET=m -CONFIG_DVB_TTUSB_DEC=m -CONFIG_SMS_SIANO_MDTV=m - -# -# Siano module components -# -CONFIG_SMS_USB_DRV=m -CONFIG_SMS_SDIO_DRV=m - -# -# Supported FlexCopII (B2C2) Adapters -# -CONFIG_DVB_B2C2_FLEXCOP=m -CONFIG_DVB_B2C2_FLEXCOP_PCI=m -CONFIG_DVB_B2C2_FLEXCOP_USB=m -# CONFIG_DVB_B2C2_FLEXCOP_DEBUG is not set - -# -# Supported BT878 Adapters -# -CONFIG_DVB_BT8XX=m - -# -# Supported Pluto2 Adapters -# -CONFIG_DVB_PLUTO2=m - -# -# Supported SDMC DM1105 Adapters -# -CONFIG_DVB_DM1105=m - -# -# Supported FireWire (IEEE 1394) Adapters -# -CONFIG_DVB_FIREDTV=m -CONFIG_DVB_FIREDTV_IEEE1394=y -CONFIG_DVB_FIREDTV_INPUT=y - -# -# Supported Earthsoft PT1 Adapters -# -# CONFIG_DVB_PT1 is not set - -# -# Supported DVB Frontends -# -# CONFIG_DVB_FE_CUSTOMISE is not set -CONFIG_DVB_STB0899=m -CONFIG_DVB_STB6100=m -CONFIG_DVB_STV090x=m -CONFIG_DVB_STV6110x=m -CONFIG_DVB_CX24110=m -CONFIG_DVB_CX24123=m -CONFIG_DVB_MT312=m -CONFIG_DVB_ZL10036=m -CONFIG_DVB_ZL10039=m -CONFIG_DVB_S5H1420=m -CONFIG_DVB_STV0288=m -CONFIG_DVB_STB6000=m -CONFIG_DVB_STV0299=m -CONFIG_DVB_STV6110=m -CONFIG_DVB_STV0900=m -CONFIG_DVB_TDA8083=m -CONFIG_DVB_TDA10086=m -CONFIG_DVB_TDA8261=m -CONFIG_DVB_VES1X93=m -CONFIG_DVB_TUNER_ITD1000=m -CONFIG_DVB_TUNER_CX24113=m -CONFIG_DVB_TDA826X=m -CONFIG_DVB_TUA6100=m -CONFIG_DVB_CX24116=m -CONFIG_DVB_SI21XX=m -CONFIG_DVB_SP8870=m -CONFIG_DVB_SP887X=m -CONFIG_DVB_CX22700=m -CONFIG_DVB_CX22702=m -CONFIG_DVB_L64781=m -CONFIG_DVB_TDA1004X=m -CONFIG_DVB_NXT6000=m -CONFIG_DVB_MT352=m -CONFIG_DVB_ZL10353=m -CONFIG_DVB_DIB3000MB=m -CONFIG_DVB_DIB3000MC=m -CONFIG_DVB_DIB7000M=m -CONFIG_DVB_DIB7000P=m -CONFIG_DVB_TDA10048=m -CONFIG_DVB_AF9013=m -CONFIG_DVB_VES1820=m -CONFIG_DVB_TDA10021=m -CONFIG_DVB_TDA10023=m -CONFIG_DVB_STV0297=m -CONFIG_DVB_NXT200X=m -CONFIG_DVB_OR51211=m -CONFIG_DVB_OR51132=m -CONFIG_DVB_BCM3510=m -CONFIG_DVB_LGDT330X=m -CONFIG_DVB_LGDT3305=m -CONFIG_DVB_S5H1409=m -CONFIG_DVB_AU8522=m -CONFIG_DVB_S5H1411=m -CONFIG_DVB_DIB8000=m -CONFIG_DVB_PLL=m -CONFIG_DVB_TUNER_DIB0070=m -CONFIG_DVB_LNBP21=m -CONFIG_DVB_ISL6405=m -CONFIG_DVB_ISL6421=m -CONFIG_DVB_ISL6423=m -CONFIG_DVB_LGS8GXX=m -CONFIG_DAB=y -CONFIG_USB_DABUSB=m - -# -# Graphics support -# -CONFIG_AGP=m -CONFIG_AGP_AMD64=m -CONFIG_AGP_INTEL=m -CONFIG_AGP_SIS=m -CONFIG_AGP_VIA=m -# CONFIG_VGA_ARB is not set -CONFIG_DRM=m -CONFIG_DRM_KMS_HELPER=m -CONFIG_DRM_TTM=m -CONFIG_DRM_TDFX=m -CONFIG_DRM_R128=m -CONFIG_DRM_RADEON=m -CONFIG_DRM_I810=m -CONFIG_DRM_I830=m -CONFIG_DRM_I915=m -# CONFIG_DRM_I915_KMS is not set -CONFIG_DRM_MGA=m -CONFIG_DRM_SIS=m -CONFIG_DRM_VIA=m -CONFIG_DRM_SAVAGE=m -CONFIG_VGASTATE=m -CONFIG_VIDEO_OUTPUT_CONTROL=m -CONFIG_FB=m -# CONFIG_FIRMWARE_EDID is not set -CONFIG_FB_DDC=m -# CONFIG_FB_BOOT_VESA_SUPPORT is not set -CONFIG_FB_CFB_FILLRECT=m -CONFIG_FB_CFB_COPYAREA=m -CONFIG_FB_CFB_IMAGEBLIT=m -# CONFIG_FB_CFB_REV_PIXELS_IN_BYTE is not set -CONFIG_FB_SYS_FILLRECT=m -CONFIG_FB_SYS_COPYAREA=m -CONFIG_FB_SYS_IMAGEBLIT=m -# CONFIG_FB_FOREIGN_ENDIAN is not set -CONFIG_FB_SYS_FOPS=m -CONFIG_FB_DEFERRED_IO=y -CONFIG_FB_HECUBA=m -CONFIG_FB_SVGALIB=m -# CONFIG_FB_MACMODES is not set -CONFIG_FB_BACKLIGHT=y -CONFIG_FB_MODE_HELPERS=y -CONFIG_FB_TILEBLITTING=y - -# -# Frame buffer hardware drivers -# -CONFIG_FB_CIRRUS=m -CONFIG_FB_PM2=m -CONFIG_FB_PM2_FIFO_DISCONNECT=y -CONFIG_FB_CYBER2000=m -CONFIG_FB_ARC=m -CONFIG_FB_VGA16=m -CONFIG_FB_UVESA=m -CONFIG_FB_N411=m -CONFIG_FB_HGA=m -# CONFIG_FB_HGA_ACCEL is not set -CONFIG_FB_S1D13XXX=m -CONFIG_FB_NVIDIA=m -CONFIG_FB_NVIDIA_I2C=y -# CONFIG_FB_NVIDIA_DEBUG is not set -CONFIG_FB_NVIDIA_BACKLIGHT=y -CONFIG_FB_RIVA=m -CONFIG_FB_RIVA_I2C=y -# CONFIG_FB_RIVA_DEBUG is not set -CONFIG_FB_RIVA_BACKLIGHT=y -CONFIG_FB_LE80578=m -CONFIG_FB_CARILLO_RANCH=m -CONFIG_FB_INTEL=m -# CONFIG_FB_INTEL_DEBUG is not set -CONFIG_FB_INTEL_I2C=y -CONFIG_FB_MATROX=m -CONFIG_FB_MATROX_MILLENIUM=y -CONFIG_FB_MATROX_MYSTIQUE=y -CONFIG_FB_MATROX_G=y -CONFIG_FB_MATROX_I2C=m -CONFIG_FB_MATROX_MAVEN=m -CONFIG_FB_RADEON=m -CONFIG_FB_RADEON_I2C=y -CONFIG_FB_RADEON_BACKLIGHT=y -# CONFIG_FB_RADEON_DEBUG is not set -CONFIG_FB_ATY128=m -CONFIG_FB_ATY128_BACKLIGHT=y -CONFIG_FB_ATY=m -CONFIG_FB_ATY_CT=y -CONFIG_FB_ATY_GENERIC_LCD=y -CONFIG_FB_ATY_GX=y -CONFIG_FB_ATY_BACKLIGHT=y -CONFIG_FB_S3=m -CONFIG_FB_SAVAGE=m -CONFIG_FB_SAVAGE_I2C=y -CONFIG_FB_SAVAGE_ACCEL=y -CONFIG_FB_SIS=m -CONFIG_FB_SIS_300=y -CONFIG_FB_SIS_315=y -CONFIG_FB_VIA=m -CONFIG_FB_NEOMAGIC=m -CONFIG_FB_KYRO=m -CONFIG_FB_3DFX=m -CONFIG_FB_3DFX_ACCEL=y -CONFIG_FB_3DFX_I2C=y -CONFIG_FB_VOODOO1=m -CONFIG_FB_VT8623=m -CONFIG_FB_TRIDENT=m -CONFIG_FB_ARK=m -CONFIG_FB_PM3=m -CONFIG_FB_CARMINE=m -CONFIG_FB_CARMINE_DRAM_EVAL=y -# CONFIG_CARMINE_DRAM_CUSTOM is not set -CONFIG_FB_GEODE=y -CONFIG_FB_GEODE_LX=m -CONFIG_FB_GEODE_GX=m -CONFIG_FB_GEODE_GX1=m -CONFIG_FB_TMIO=m -CONFIG_FB_TMIO_ACCELL=y -CONFIG_FB_SM501=m -# CONFIG_FB_VIRTUAL is not set -CONFIG_XEN_FBDEV_FRONTEND=m -CONFIG_FB_METRONOME=m -CONFIG_FB_MB862XX=m -# CONFIG_FB_MB862XX_PCI_GDC is not set -CONFIG_FB_BROADSHEET=m -CONFIG_BACKLIGHT_LCD_SUPPORT=y -CONFIG_LCD_CLASS_DEVICE=m -CONFIG_LCD_LMS283GF05=m -CONFIG_LCD_LTV350QV=m -CONFIG_LCD_ILI9320=m -CONFIG_LCD_TDO24M=m -CONFIG_LCD_VGG2432A4=m -CONFIG_LCD_PLATFORM=m -CONFIG_BACKLIGHT_CLASS_DEVICE=m -CONFIG_BACKLIGHT_GENERIC=m -CONFIG_BACKLIGHT_PROGEAR=m -CONFIG_BACKLIGHT_CARILLO_RANCH=m -CONFIG_BACKLIGHT_MBP_NVIDIA=m -CONFIG_BACKLIGHT_SAHARA=m - -# -# Display device support -# -CONFIG_DISPLAY_SUPPORT=m - -# -# Display hardware drivers -# - -# -# Console display driver support -# -CONFIG_VGA_CONSOLE=y -# CONFIG_VGACON_SOFT_SCROLLBACK is not set -CONFIG_DUMMY_CONSOLE=y -CONFIG_FRAMEBUFFER_CONSOLE=m -CONFIG_FRAMEBUFFER_CONSOLE_DETECT_PRIMARY=y -CONFIG_FRAMEBUFFER_CONSOLE_ROTATION=y -# CONFIG_FONTS is not set -CONFIG_FONT_8x8=y -CONFIG_FONT_8x16=y -# CONFIG_LOGO is not set -CONFIG_SOUND=m -CONFIG_SOUND_OSS_CORE=y -CONFIG_SOUND_OSS_CORE_PRECLAIM=y -CONFIG_SND=m -CONFIG_SND_TIMER=m -CONFIG_SND_PCM=m -CONFIG_SND_HWDEP=m -CONFIG_SND_RAWMIDI=m -CONFIG_SND_JACK=y -CONFIG_SND_SEQUENCER=m -CONFIG_SND_SEQ_DUMMY=m -CONFIG_SND_OSSEMUL=y -CONFIG_SND_MIXER_OSS=m -CONFIG_SND_PCM_OSS=m -CONFIG_SND_PCM_OSS_PLUGINS=y -CONFIG_SND_SEQUENCER_OSS=y -CONFIG_SND_HRTIMER=m -CONFIG_SND_SEQ_HRTIMER_DEFAULT=y -CONFIG_SND_DYNAMIC_MINORS=y -# CONFIG_SND_SUPPORT_OLD_API is not set -# CONFIG_SND_VERBOSE_PROCFS is not set -# CONFIG_SND_VERBOSE_PRINTK is not set -# CONFIG_SND_DEBUG is not set -CONFIG_SND_VMASTER=y -CONFIG_SND_DMA_SGBUF=y -CONFIG_SND_RAWMIDI_SEQ=m -CONFIG_SND_OPL3_LIB_SEQ=m -# CONFIG_SND_OPL4_LIB_SEQ is not set -# CONFIG_SND_SBAWE_SEQ is not set -CONFIG_SND_EMU10K1_SEQ=m -CONFIG_SND_MPU401_UART=m -CONFIG_SND_OPL3_LIB=m -CONFIG_SND_VX_LIB=m -CONFIG_SND_AC97_CODEC=m -CONFIG_SND_DRIVERS=y -CONFIG_SND_PCSP=m -CONFIG_SND_DUMMY=m -CONFIG_SND_VIRMIDI=m -CONFIG_SND_MTPAV=m -CONFIG_SND_MTS64=m -CONFIG_SND_SERIAL_U16550=m -CONFIG_SND_MPU401=m -CONFIG_SND_PORTMAN2X4=m -CONFIG_SND_AC97_POWER_SAVE=y -CONFIG_SND_AC97_POWER_SAVE_DEFAULT=0 -CONFIG_SND_SB_COMMON=m -CONFIG_SND_SB16_DSP=m -CONFIG_SND_PCI=y -CONFIG_SND_AD1889=m -CONFIG_SND_ALS300=m -CONFIG_SND_ALS4000=m -CONFIG_SND_ALI5451=m -CONFIG_SND_ATIIXP=m -CONFIG_SND_ATIIXP_MODEM=m -CONFIG_SND_AU8810=m -CONFIG_SND_AU8820=m -CONFIG_SND_AU8830=m -CONFIG_SND_AW2=m -CONFIG_SND_AZT3328=m -CONFIG_SND_BT87X=m -# CONFIG_SND_BT87X_OVERCLOCK is not set -CONFIG_SND_CA0106=m -CONFIG_SND_CMIPCI=m -CONFIG_SND_OXYGEN_LIB=m -CONFIG_SND_OXYGEN=m -CONFIG_SND_CS4281=m -CONFIG_SND_CS46XX=m -CONFIG_SND_CS46XX_NEW_DSP=y -CONFIG_SND_CS5530=m -CONFIG_SND_CS5535AUDIO=m -CONFIG_SND_CTXFI=m -CONFIG_SND_DARLA20=m -CONFIG_SND_GINA20=m -CONFIG_SND_LAYLA20=m -CONFIG_SND_DARLA24=m -CONFIG_SND_GINA24=m -CONFIG_SND_LAYLA24=m -CONFIG_SND_MONA=m -CONFIG_SND_MIA=m -CONFIG_SND_ECHO3G=m -CONFIG_SND_INDIGO=m -CONFIG_SND_INDIGOIO=m -CONFIG_SND_INDIGODJ=m -CONFIG_SND_INDIGOIOX=m -CONFIG_SND_INDIGODJX=m -CONFIG_SND_EMU10K1=m -CONFIG_SND_EMU10K1X=m -CONFIG_SND_ENS1370=m -CONFIG_SND_ENS1371=m -CONFIG_SND_ES1938=m -CONFIG_SND_ES1968=m -CONFIG_SND_FM801=m -# CONFIG_SND_FM801_TEA575X_BOOL is not set -CONFIG_SND_HDA_INTEL=m -CONFIG_SND_HDA_HWDEP=y -# CONFIG_SND_HDA_RECONFIG is not set -CONFIG_SND_HDA_INPUT_BEEP=y -CONFIG_SND_HDA_INPUT_JACK=y -# CONFIG_SND_HDA_PATCH_LOADER is not set -CONFIG_SND_HDA_CODEC_REALTEK=y -CONFIG_SND_HDA_CODEC_ANALOG=y -CONFIG_SND_HDA_CODEC_SIGMATEL=y -CONFIG_SND_HDA_CODEC_VIA=y -CONFIG_SND_HDA_CODEC_ATIHDMI=y -CONFIG_SND_HDA_CODEC_NVHDMI=y -CONFIG_SND_HDA_CODEC_INTELHDMI=y -CONFIG_SND_HDA_ELD=y -CONFIG_SND_HDA_CODEC_CIRRUS=y -CONFIG_SND_HDA_CODEC_CONEXANT=y -CONFIG_SND_HDA_CODEC_CA0110=y -CONFIG_SND_HDA_CODEC_CMEDIA=y -CONFIG_SND_HDA_CODEC_SI3054=y -CONFIG_SND_HDA_GENERIC=y -# CONFIG_SND_HDA_POWER_SAVE is not set -CONFIG_SND_HDSP=m -CONFIG_SND_HDSPM=m -CONFIG_SND_HIFIER=m -CONFIG_SND_ICE1712=m -CONFIG_SND_ICE1724=m -CONFIG_SND_INTEL8X0=m -CONFIG_SND_INTEL8X0M=m -CONFIG_SND_KORG1212=m -CONFIG_SND_LX6464ES=m -CONFIG_SND_MAESTRO3=m -CONFIG_SND_MIXART=m -CONFIG_SND_NM256=m -CONFIG_SND_PCXHR=m -CONFIG_SND_RIPTIDE=m -CONFIG_SND_RME32=m -CONFIG_SND_RME96=m -CONFIG_SND_RME9652=m -CONFIG_SND_SONICVIBES=m -CONFIG_SND_TRIDENT=m -CONFIG_SND_VIA82XX=m -CONFIG_SND_VIA82XX_MODEM=m -CONFIG_SND_VIRTUOSO=m -CONFIG_SND_VX222=m -CONFIG_SND_YMFPCI=m -CONFIG_SND_SPI=y -CONFIG_SND_USB=y -CONFIG_SND_USB_AUDIO=m -CONFIG_SND_USB_USX2Y=m -CONFIG_SND_USB_CAIAQ=m -# CONFIG_SND_USB_CAIAQ_INPUT is not set -CONFIG_SND_USB_US122L=m -CONFIG_SND_PCMCIA=y -CONFIG_SND_VXPOCKET=m -CONFIG_SND_PDAUDIOCF=m -CONFIG_SND_SOC=m -CONFIG_SND_SOC_I2C_AND_SPI=m -CONFIG_SND_SOC_ALL_CODECS=m -CONFIG_SND_SOC_WM_HUBS=m -CONFIG_SND_SOC_AD1836=m -CONFIG_SND_SOC_AD1938=m -CONFIG_SND_SOC_AD73311=m -CONFIG_SND_SOC_AK4104=m -CONFIG_SND_SOC_AK4535=m -CONFIG_SND_SOC_AK4642=m -CONFIG_SND_SOC_CS4270=m -CONFIG_SND_SOC_L3=m -CONFIG_SND_SOC_PCM3008=m -CONFIG_SND_SOC_SPDIF=m -CONFIG_SND_SOC_SSM2602=m -CONFIG_SND_SOC_TLV320AIC23=m -CONFIG_SND_SOC_TLV320AIC26=m -CONFIG_SND_SOC_TLV320AIC3X=m -CONFIG_SND_SOC_UDA134X=m -CONFIG_SND_SOC_UDA1380=m -CONFIG_SND_SOC_WM8400=m -CONFIG_SND_SOC_WM8510=m -CONFIG_SND_SOC_WM8523=m -CONFIG_SND_SOC_WM8580=m -CONFIG_SND_SOC_WM8728=m -CONFIG_SND_SOC_WM8731=m -CONFIG_SND_SOC_WM8750=m -CONFIG_SND_SOC_WM8753=m -CONFIG_SND_SOC_WM8776=m -CONFIG_SND_SOC_WM8900=m -CONFIG_SND_SOC_WM8903=m -CONFIG_SND_SOC_WM8940=m -CONFIG_SND_SOC_WM8960=m -CONFIG_SND_SOC_WM8961=m -CONFIG_SND_SOC_WM8971=m -CONFIG_SND_SOC_WM8974=m -CONFIG_SND_SOC_WM8988=m -CONFIG_SND_SOC_WM8990=m -CONFIG_SND_SOC_WM8993=m -CONFIG_SND_SOC_WM9081=m -CONFIG_SND_SOC_MAX9877=m -# CONFIG_SOUND_PRIME is not set -CONFIG_AC97_BUS=m -CONFIG_HID_SUPPORT=y -CONFIG_HID=m -CONFIG_HIDRAW=y - -# -# USB Input Devices -# -CONFIG_USB_HID=m -# CONFIG_HID_PID is not set -# CONFIG_USB_HIDDEV is not set - -# -# USB HID Boot Protocol drivers -# -CONFIG_USB_KBD=m -CONFIG_USB_MOUSE=m - -# -# Special HID drivers -# -# CONFIG_HID_A4TECH is not set -# CONFIG_HID_APPLE is not set -# CONFIG_HID_BELKIN is not set -# CONFIG_HID_CHERRY is not set -# CONFIG_HID_CHICONY is not set -# CONFIG_HID_CYPRESS is not set -# CONFIG_HID_DRAGONRISE is not set -# CONFIG_HID_EZKEY is not set -# CONFIG_HID_KYE is not set -# CONFIG_HID_GYRATION is not set -# CONFIG_HID_TWINHAN is not set -# CONFIG_HID_KENSINGTON is not set -# CONFIG_HID_LOGITECH is not set -# CONFIG_HID_MICROSOFT is not set -# CONFIG_HID_MONTEREY is not set -# CONFIG_HID_NTRIG is not set -# CONFIG_HID_PANTHERLORD is not set -# CONFIG_HID_PETALYNX is not set -# CONFIG_HID_SAMSUNG is not set -# CONFIG_HID_SONY is not set -# CONFIG_HID_SUNPLUS is not set -# CONFIG_HID_GREENASIA is not set -# CONFIG_HID_SMARTJOYPLUS is not set -# CONFIG_HID_TOPSEED is not set -# CONFIG_HID_THRUSTMASTER is not set -# CONFIG_HID_WACOM is not set -# CONFIG_HID_ZEROPLUS is not set -CONFIG_USB_SUPPORT=y -CONFIG_USB_ARCH_HAS_HCD=y -CONFIG_USB_ARCH_HAS_OHCI=y -CONFIG_USB_ARCH_HAS_EHCI=y -CONFIG_USB=m -# CONFIG_USB_DEBUG is not set -CONFIG_USB_ANNOUNCE_NEW_DEVICES=y - -# -# Miscellaneous USB options -# -CONFIG_USB_DEVICEFS=y -CONFIG_USB_DEVICE_CLASS=y -# CONFIG_USB_DYNAMIC_MINORS is not set -# CONFIG_USB_SUSPEND is not set -# CONFIG_USB_OTG is not set -# CONFIG_USB_OTG_WHITELIST is not set -# CONFIG_USB_OTG_BLACKLIST_HUB is not set -CONFIG_USB_MON=m -CONFIG_USB_WUSB=m -CONFIG_USB_WUSB_CBAF=m -# CONFIG_USB_WUSB_CBAF_DEBUG is not set - -# -# USB Host Controller Drivers -# -CONFIG_USB_C67X00_HCD=m -CONFIG_USB_XHCI_HCD=m -# CONFIG_USB_XHCI_HCD_DEBUGGING is not set -CONFIG_USB_EHCI_HCD=m -# CONFIG_USB_EHCI_ROOT_HUB_TT is not set -# CONFIG_USB_EHCI_TT_NEWSCHED is not set -CONFIG_USB_OXU210HP_HCD=m -CONFIG_USB_ISP116X_HCD=m -CONFIG_USB_ISP1760_HCD=m -CONFIG_USB_ISP1362_HCD=m -CONFIG_USB_OHCI_HCD=m -CONFIG_USB_OHCI_HCD_SSB=y -# CONFIG_USB_OHCI_BIG_ENDIAN_DESC is not set -# CONFIG_USB_OHCI_BIG_ENDIAN_MMIO is not set -CONFIG_USB_OHCI_LITTLE_ENDIAN=y -CONFIG_USB_UHCI_HCD=m -CONFIG_USB_U132_HCD=m -CONFIG_USB_SL811_HCD=m -CONFIG_USB_SL811_CS=m -CONFIG_USB_R8A66597_HCD=m -CONFIG_USB_WHCI_HCD=m -CONFIG_USB_HWA_HCD=m - -# -# Enable Host or Gadget support to see Inventra options -# - -# -# USB Device Class drivers -# -CONFIG_USB_ACM=m -CONFIG_USB_PRINTER=m -CONFIG_USB_WDM=m -CONFIG_USB_TMC=m - -# -# NOTE: USB_STORAGE depends on SCSI but BLK_DEV_SD may -# - -# -# also be needed; see USB_STORAGE Help for more info -# -CONFIG_USB_STORAGE=m -# CONFIG_USB_STORAGE_DEBUG is not set -CONFIG_USB_STORAGE_DATAFAB=m -CONFIG_USB_STORAGE_FREECOM=m -CONFIG_USB_STORAGE_ISD200=m -CONFIG_USB_STORAGE_USBAT=m -CONFIG_USB_STORAGE_SDDR09=m -CONFIG_USB_STORAGE_SDDR55=m -CONFIG_USB_STORAGE_JUMPSHOT=m -CONFIG_USB_STORAGE_ALAUDA=m -CONFIG_USB_STORAGE_ONETOUCH=m -CONFIG_USB_STORAGE_KARMA=m -CONFIG_USB_STORAGE_CYPRESS_ATACB=m -CONFIG_USB_LIBUSUAL=y - -# -# USB Imaging devices -# -# CONFIG_USB_MDC800 is not set -# CONFIG_USB_MICROTEK is not set - -# -# USB port drivers -# -CONFIG_USB_USS720=m -CONFIG_USB_SERIAL=m -CONFIG_USB_EZUSB=y -CONFIG_USB_SERIAL_GENERIC=y -CONFIG_USB_SERIAL_AIRCABLE=m -CONFIG_USB_SERIAL_ARK3116=m -CONFIG_USB_SERIAL_BELKIN=m -CONFIG_USB_SERIAL_CH341=m -CONFIG_USB_SERIAL_WHITEHEAT=m -CONFIG_USB_SERIAL_DIGI_ACCELEPORT=m -CONFIG_USB_SERIAL_CP210X=m -CONFIG_USB_SERIAL_CYPRESS_M8=m -CONFIG_USB_SERIAL_EMPEG=m -CONFIG_USB_SERIAL_FTDI_SIO=m -CONFIG_USB_SERIAL_FUNSOFT=m -CONFIG_USB_SERIAL_VISOR=m -CONFIG_USB_SERIAL_IPAQ=m -CONFIG_USB_SERIAL_IR=m -CONFIG_USB_SERIAL_EDGEPORT=m -CONFIG_USB_SERIAL_EDGEPORT_TI=m -CONFIG_USB_SERIAL_GARMIN=m -CONFIG_USB_SERIAL_IPW=m -CONFIG_USB_SERIAL_IUU=m -CONFIG_USB_SERIAL_KEYSPAN_PDA=m -CONFIG_USB_SERIAL_KEYSPAN=m -CONFIG_USB_SERIAL_KLSI=m -CONFIG_USB_SERIAL_KOBIL_SCT=m -CONFIG_USB_SERIAL_MCT_U232=m -CONFIG_USB_SERIAL_MOS7720=m -CONFIG_USB_SERIAL_MOS7840=m -CONFIG_USB_SERIAL_MOTOROLA=m -CONFIG_USB_SERIAL_NAVMAN=m -CONFIG_USB_SERIAL_PL2303=m -CONFIG_USB_SERIAL_OTI6858=m -CONFIG_USB_SERIAL_QUALCOMM=m -CONFIG_USB_SERIAL_SPCP8X5=m -CONFIG_USB_SERIAL_HP4X=m -CONFIG_USB_SERIAL_SAFE=m -CONFIG_USB_SERIAL_SAFE_PADDED=y -CONFIG_USB_SERIAL_SIEMENS_MPI=m -CONFIG_USB_SERIAL_SIERRAWIRELESS=m -CONFIG_USB_SERIAL_SYMBOL=m -CONFIG_USB_SERIAL_TI=m -CONFIG_USB_SERIAL_CYBERJACK=m -CONFIG_USB_SERIAL_XIRCOM=m -CONFIG_USB_SERIAL_OPTION=m -CONFIG_USB_SERIAL_OMNINET=m -CONFIG_USB_SERIAL_OPTICON=m -CONFIG_USB_SERIAL_DEBUG=m - -# -# USB Miscellaneous drivers -# -CONFIG_USB_EMI62=m -CONFIG_USB_EMI26=m -CONFIG_USB_ADUTUX=m -CONFIG_USB_SEVSEG=m -CONFIG_USB_RIO500=m -# CONFIG_USB_LEGOTOWER is not set -CONFIG_USB_LCD=m -# CONFIG_USB_BERRY_CHARGE is not set -CONFIG_USB_LED=m -CONFIG_USB_CYPRESS_CY7C63=m -CONFIG_USB_CYTHERM=m -CONFIG_USB_IDMOUSE=m -CONFIG_USB_FTDI_ELAN=m -# CONFIG_USB_APPLEDISPLAY is not set -CONFIG_USB_SISUSBVGA=m -CONFIG_USB_SISUSBVGA_CON=y -CONFIG_USB_LD=m -# CONFIG_USB_TRANCEVIBRATOR is not set -CONFIG_USB_IOWARRIOR=m -CONFIG_USB_TEST=m -CONFIG_USB_ISIGHTFW=m -# CONFIG_USB_VST is not set -CONFIG_USB_ATM=m -CONFIG_USB_SPEEDTOUCH=m -CONFIG_USB_CXACRU=m -CONFIG_USB_UEAGLEATM=m -CONFIG_USB_XUSBATM=m -# CONFIG_USB_GADGET is not set - -# -# OTG and related infrastructure -# -CONFIG_USB_OTG_UTILS=y -CONFIG_USB_GPIO_VBUS=m -CONFIG_NOP_USB_XCEIV=m -CONFIG_UWB=m -CONFIG_UWB_HWA=m -CONFIG_UWB_WHCI=m -CONFIG_UWB_WLP=m -CONFIG_UWB_I1480U=m -CONFIG_UWB_I1480U_WLP=m -CONFIG_MMC=m -# CONFIG_MMC_DEBUG is not set -# CONFIG_MMC_UNSAFE_RESUME is not set - -# -# MMC/SD/SDIO Card Drivers -# -CONFIG_MMC_BLOCK=m -CONFIG_MMC_BLOCK_BOUNCE=y -CONFIG_SDIO_UART=m -CONFIG_MMC_TEST=m - -# -# MMC/SD/SDIO Host Controller Drivers -# -CONFIG_MMC_SDHCI=m -CONFIG_MMC_SDHCI_PCI=m -CONFIG_MMC_RICOH_MMC=m -CONFIG_MMC_SDHCI_PLTFM=m -CONFIG_MMC_WBSD=m -# CONFIG_MMC_AT91 is not set -# CONFIG_MMC_ATMELMCI is not set -CONFIG_MMC_TIFM_SD=m -# CONFIG_MMC_SPI is not set -CONFIG_MMC_SDRICOH_CS=m -CONFIG_MMC_CB710=m -CONFIG_MMC_VIA_SDMMC=m -CONFIG_MEMSTICK=m -# CONFIG_MEMSTICK_DEBUG is not set - -# -# MemoryStick drivers -# -# CONFIG_MEMSTICK_UNSAFE_RESUME is not set -CONFIG_MSPRO_BLOCK=m - -# -# MemoryStick Host Controller Drivers -# -CONFIG_MEMSTICK_TIFM_MS=m -CONFIG_MEMSTICK_JMICRON_38X=m -CONFIG_NEW_LEDS=y -CONFIG_LEDS_CLASS=m - -# -# LED drivers -# -CONFIG_LEDS_ALIX2=m -CONFIG_LEDS_PCA9532=m -CONFIG_LEDS_GPIO=m -CONFIG_LEDS_GPIO_PLATFORM=y -CONFIG_LEDS_LP3944=m -CONFIG_LEDS_CLEVO_MAIL=m -CONFIG_LEDS_PCA955X=m -CONFIG_LEDS_DAC124S085=m -CONFIG_LEDS_BD2802=m - -# -# LED Triggers -# -CONFIG_LEDS_TRIGGERS=y -CONFIG_LEDS_TRIGGER_TIMER=m -CONFIG_LEDS_TRIGGER_HEARTBEAT=m -CONFIG_LEDS_TRIGGER_BACKLIGHT=m -CONFIG_LEDS_TRIGGER_GPIO=m -CONFIG_LEDS_TRIGGER_DEFAULT_ON=m - -# -# iptables trigger is under Netfilter config (LED target) -# -CONFIG_ACCESSIBILITY=y -# CONFIG_A11Y_BRAILLE_CONSOLE is not set -CONFIG_INFINIBAND=m -CONFIG_INFINIBAND_USER_MAD=m -CONFIG_INFINIBAND_USER_ACCESS=m -CONFIG_INFINIBAND_USER_MEM=y -CONFIG_INFINIBAND_ADDR_TRANS=y -CONFIG_INFINIBAND_MTHCA=m -# CONFIG_INFINIBAND_MTHCA_DEBUG is not set -# CONFIG_INFINIBAND_IPATH is not set -CONFIG_INFINIBAND_AMSO1100=m -# CONFIG_INFINIBAND_AMSO1100_DEBUG is not set -CONFIG_INFINIBAND_CXGB3=m -# CONFIG_INFINIBAND_CXGB3_DEBUG is not set -CONFIG_MLX4_INFINIBAND=m -CONFIG_INFINIBAND_NES=m -# CONFIG_INFINIBAND_NES_DEBUG is not set -CONFIG_INFINIBAND_IPOIB=m -# CONFIG_INFINIBAND_IPOIB_CM is not set -# CONFIG_INFINIBAND_IPOIB_DEBUG is not set -CONFIG_INFINIBAND_SRP=m -CONFIG_INFINIBAND_ISER=m -# CONFIG_EDAC is not set -CONFIG_RTC_LIB=m -CONFIG_RTC_CLASS=m - -# -# RTC interfaces -# -CONFIG_RTC_INTF_SYSFS=y -CONFIG_RTC_INTF_PROC=y -CONFIG_RTC_INTF_DEV=y -CONFIG_RTC_INTF_DEV_UIE_EMUL=y -CONFIG_RTC_DRV_TEST=m - -# -# I2C RTC drivers -# -CONFIG_RTC_DRV_DS1307=m -CONFIG_RTC_DRV_DS1374=m -CONFIG_RTC_DRV_DS1672=m -CONFIG_RTC_DRV_MAX6900=m -CONFIG_RTC_DRV_RS5C372=m -CONFIG_RTC_DRV_ISL1208=m -CONFIG_RTC_DRV_X1205=m -CONFIG_RTC_DRV_PCF8563=m -CONFIG_RTC_DRV_PCF8583=m -CONFIG_RTC_DRV_M41T80=m -CONFIG_RTC_DRV_M41T80_WDT=y -CONFIG_RTC_DRV_S35390A=m -CONFIG_RTC_DRV_FM3130=m -CONFIG_RTC_DRV_RX8581=m -CONFIG_RTC_DRV_RX8025=m - -# -# SPI RTC drivers -# -CONFIG_RTC_DRV_M41T94=m -CONFIG_RTC_DRV_DS1305=m -CONFIG_RTC_DRV_DS1390=m -CONFIG_RTC_DRV_MAX6902=m -CONFIG_RTC_DRV_R9701=m -CONFIG_RTC_DRV_RS5C348=m -CONFIG_RTC_DRV_DS3234=m -CONFIG_RTC_DRV_PCF2123=m - -# -# Platform RTC drivers -# -CONFIG_RTC_DRV_CMOS=m -CONFIG_RTC_DRV_DS1286=m -CONFIG_RTC_DRV_DS1511=m -CONFIG_RTC_DRV_DS1553=m -CONFIG_RTC_DRV_DS1742=m -CONFIG_RTC_DRV_STK17TA8=m -CONFIG_RTC_DRV_M48T86=m -CONFIG_RTC_DRV_M48T35=m -CONFIG_RTC_DRV_M48T59=m -CONFIG_RTC_DRV_BQ4802=m -CONFIG_RTC_DRV_V3020=m -CONFIG_RTC_DRV_PCF50633=m - -# -# on-CPU RTC drivers -# -CONFIG_DMADEVICES=y - -# -# DMA Devices -# -CONFIG_ASYNC_TX_DISABLE_CHANNEL_SWITCH=y -CONFIG_INTEL_IOATDMA=m -CONFIG_DMA_ENGINE=y - -# -# DMA Clients -# -CONFIG_NET_DMA=y -# CONFIG_ASYNC_TX_DMA is not set -CONFIG_DMATEST=m -CONFIG_DCA=m -CONFIG_AUXDISPLAY=y -CONFIG_KS0108=m -CONFIG_KS0108_PORT=0x378 -CONFIG_KS0108_DELAY=2 -CONFIG_CFAG12864B=m -CONFIG_CFAG12864B_RATE=20 -CONFIG_UIO=m -CONFIG_UIO_CIF=m -CONFIG_UIO_PDRV=m -CONFIG_UIO_PDRV_GENIRQ=m -# CONFIG_UIO_SMX is not set -CONFIG_UIO_AEC=m -CONFIG_UIO_SERCOS3=m -# CONFIG_UIO_PCI_GENERIC is not set - -# -# TI VLYNQ -# -CONFIG_XEN_BALLOON=y -CONFIG_XEN_SCRUB_PAGES=y -CONFIG_XEN_DEV_EVTCHN=y -CONFIG_XEN_BACKEND=y -# CONFIG_XEN_NETDEV_BACKEND is not set -# CONFIG_XEN_BLKDEV_BACKEND is not set -# CONFIG_XEN_BLKDEV_TAP is not set -CONFIG_XEN_PCIDEV_BACKEND=y -CONFIG_XEN_PCIDEV_BACKEND_VPCI=y -# CONFIG_XEN_PCIDEV_BACKEND_PASS is not set -# CONFIG_XEN_PCIDEV_BACKEND_SLOT is not set -# CONFIG_XEN_PCIDEV_BACKEND_CONTROLLER is not set -# CONFIG_XEN_PCIDEV_BE_DEBUG is not set -CONFIG_XENFS=y -CONFIG_XEN_COMPAT_XENFS=y -CONFIG_XEN_SYS_HYPERVISOR=y -CONFIG_XEN_XENBUS_FRONTEND=y -# CONFIG_XEN_GNTDEV is not set -CONFIG_XEN_S3=y -CONFIG_ACPI_PROCESSOR_XEN=m -CONFIG_XEN_PLATFORM_PCI=m -CONFIG_STAGING=y -# CONFIG_STAGING_EXCLUDE_BUILD is not set -# CONFIG_ET131X is not set -# CONFIG_SLICOSS is not set -# CONFIG_VIDEO_GO7007 is not set -# CONFIG_VIDEO_CX25821 is not set -# CONFIG_USB_IP_COMMON is not set -# CONFIG_W35UND is not set -# CONFIG_PRISM2_USB is not set -# CONFIG_ECHO is not set -# CONFIG_POCH is not set -# CONFIG_OTUS is not set -# CONFIG_RT2860 is not set -# CONFIG_RT2870 is not set -# CONFIG_RT3090 is not set -# CONFIG_COMEDI is not set -# CONFIG_ASUS_OLED is not set -# CONFIG_PANEL is not set -# CONFIG_ALTERA_PCIE_CHDMA is not set -# CONFIG_RTL8187SE is not set -# CONFIG_RTL8192SU is not set -# CONFIG_RTL8192E is not set -# CONFIG_TRANZPORT is not set - -# -# Android -# - -# -# Qualcomm MSM Camera And Video -# - -# -# Camera Sensor Selection -# -# CONFIG_INPUT_GPIO is not set -# CONFIG_DST is not set -# CONFIG_POHMELFS is not set -# CONFIG_B3DFG is not set -# CONFIG_IDE_PHISON is not set -# CONFIG_PLAN9AUTH is not set -# CONFIG_LINE6_USB is not set -# CONFIG_DRM_RADEON_KMS is not set -# CONFIG_USB_SERIAL_QUATECH2 is not set -# CONFIG_USB_SERIAL_QUATECH_USB2 is not set -# CONFIG_VT6655 is not set -# CONFIG_VT6656 is not set -# CONFIG_FB_UDL is not set -CONFIG_HYPERV=m -CONFIG_HYPERV_STORAGE=m -CONFIG_HYPERV_BLOCK=m -CONFIG_HYPERV_NET=m -# CONFIG_VME_BUS is not set - -# -# RAR Register Driver -# -# CONFIG_RAR_REGISTER is not set -# CONFIG_IIO is not set -CONFIG_X86_PLATFORM_DEVICES=y -CONFIG_ACER_WMI=m -CONFIG_ASUS_LAPTOP=m -CONFIG_DELL_WMI=m -CONFIG_FUJITSU_LAPTOP=m -# CONFIG_FUJITSU_LAPTOP_DEBUG is not set -CONFIG_HP_WMI=m -CONFIG_MSI_LAPTOP=m -CONFIG_PANASONIC_LAPTOP=m -CONFIG_COMPAL_LAPTOP=m -CONFIG_SONY_LAPTOP=m -# CONFIG_SONYPI_COMPAT is not set -CONFIG_THINKPAD_ACPI=m -# CONFIG_THINKPAD_ACPI_DEBUGFACILITIES is not set -# CONFIG_THINKPAD_ACPI_DEBUG is not set -# CONFIG_THINKPAD_ACPI_UNSAFE_LEDS is not set -CONFIG_THINKPAD_ACPI_VIDEO=y -CONFIG_THINKPAD_ACPI_HOTKEY_POLL=y -CONFIG_INTEL_MENLOW=m -CONFIG_EEEPC_LAPTOP=m -CONFIG_ACPI_WMI=m -CONFIG_ACPI_ASUS=m -# CONFIG_TOPSTAR_LAPTOP is not set -CONFIG_ACPI_TOSHIBA=m - -# -# Firmware Drivers -# -CONFIG_EDD=m -# CONFIG_EDD_OFF is not set -CONFIG_FIRMWARE_MEMMAP=y -CONFIG_DELL_RBU=m -CONFIG_DCDBAS=m -CONFIG_DMIID=y -# CONFIG_ISCSI_IBFT_FIND is not set - -# -# File systems -# -CONFIG_EXT2_FS=m -CONFIG_EXT2_FS_XATTR=y -CONFIG_EXT2_FS_POSIX_ACL=y -CONFIG_EXT2_FS_SECURITY=y -CONFIG_EXT2_FS_XIP=y -CONFIG_EXT3_FS=m -# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set -CONFIG_EXT3_FS_XATTR=y -CONFIG_EXT3_FS_POSIX_ACL=y -CONFIG_EXT3_FS_SECURITY=y -CONFIG_EXT4_FS=m -CONFIG_EXT4_FS_XATTR=y -CONFIG_EXT4_FS_POSIX_ACL=y -CONFIG_EXT4_FS_SECURITY=y -# CONFIG_EXT4_DEBUG is not set -CONFIG_FS_XIP=y -CONFIG_JBD=m -# CONFIG_JBD_DEBUG is not set -CONFIG_JBD2=m -# CONFIG_JBD2_DEBUG is not set -CONFIG_FS_MBCACHE=m -CONFIG_REISERFS_FS=m -# CONFIG_REISERFS_CHECK is not set -CONFIG_REISERFS_PROC_INFO=y -CONFIG_REISERFS_FS_XATTR=y -CONFIG_REISERFS_FS_POSIX_ACL=y -# CONFIG_REISERFS_FS_SECURITY is not set -CONFIG_JFS_FS=m -CONFIG_JFS_POSIX_ACL=y -CONFIG_JFS_SECURITY=y -# CONFIG_JFS_DEBUG is not set -CONFIG_JFS_STATISTICS=y -CONFIG_FS_POSIX_ACL=y -CONFIG_XFS_FS=m -CONFIG_XFS_QUOTA=y -CONFIG_XFS_POSIX_ACL=y -CONFIG_XFS_RT=y -# CONFIG_XFS_DEBUG is not set -CONFIG_GFS2_FS=m -CONFIG_GFS2_FS_LOCKING_DLM=y -CONFIG_OCFS2_FS=m -CONFIG_OCFS2_FS_O2CB=m -CONFIG_OCFS2_FS_USERSPACE_CLUSTER=m -CONFIG_OCFS2_FS_STATS=y -CONFIG_OCFS2_DEBUG_MASKLOG=y -# CONFIG_OCFS2_DEBUG_FS is not set -# CONFIG_OCFS2_FS_POSIX_ACL is not set -CONFIG_BTRFS_FS=m -CONFIG_BTRFS_FS_POSIX_ACL=y -CONFIG_NILFS2_FS=m -CONFIG_FILE_LOCKING=y -CONFIG_FSNOTIFY=y -# CONFIG_DNOTIFY is not set -CONFIG_INOTIFY=y -CONFIG_INOTIFY_USER=y -CONFIG_QUOTA=y -CONFIG_QUOTA_NETLINK_INTERFACE=y -# CONFIG_PRINT_QUOTA_WARNING is not set -CONFIG_QUOTA_TREE=m -CONFIG_QFMT_V1=m -CONFIG_QFMT_V2=m -CONFIG_QUOTACTL=y -CONFIG_AUTOFS_FS=m -CONFIG_AUTOFS4_FS=m -CONFIG_FUSE_FS=m -# CONFIG_CUSE is not set - -# -# Caches -# -CONFIG_FSCACHE=m -CONFIG_FSCACHE_STATS=y -CONFIG_FSCACHE_HISTOGRAM=y -# CONFIG_FSCACHE_DEBUG is not set -# CONFIG_FSCACHE_OBJECT_LIST is not set -CONFIG_CACHEFILES=m -# CONFIG_CACHEFILES_DEBUG is not set -# CONFIG_CACHEFILES_HISTOGRAM is not set - -# -# CD-ROM/DVD Filesystems -# -CONFIG_ISO9660_FS=m -CONFIG_JOLIET=y -CONFIG_ZISOFS=y -CONFIG_UDF_FS=m -CONFIG_UDF_NLS=y - -# -# DOS/FAT/NT Filesystems -# -CONFIG_FAT_FS=m -CONFIG_MSDOS_FS=m -CONFIG_VFAT_FS=m -CONFIG_FAT_DEFAULT_CODEPAGE=437 -CONFIG_FAT_DEFAULT_IOCHARSET="iso8859-1" -CONFIG_NTFS_FS=m -# CONFIG_NTFS_DEBUG is not set -CONFIG_NTFS_RW=y - -# -# Pseudo filesystems -# -CONFIG_PROC_FS=y -# CONFIG_PROC_KCORE is not set -CONFIG_PROC_SYSCTL=y -CONFIG_PROC_PAGE_MONITOR=y -CONFIG_SYSFS=y -CONFIG_TMPFS=y -# CONFIG_TMPFS_POSIX_ACL is not set -# CONFIG_HUGETLBFS is not set -# CONFIG_HUGETLB_PAGE is not set -CONFIG_CONFIGFS_FS=m -CONFIG_MISC_FILESYSTEMS=y -# CONFIG_ADFS_FS is not set -# CONFIG_AFFS_FS is not set -CONFIG_ECRYPT_FS=m -CONFIG_HFS_FS=m -CONFIG_HFSPLUS_FS=m -# CONFIG_BEFS_FS is not set -# CONFIG_BFS_FS is not set -CONFIG_EFS_FS=m -CONFIG_JFFS2_FS=m -CONFIG_JFFS2_FS_DEBUG=0 -CONFIG_JFFS2_FS_WRITEBUFFER=y -# CONFIG_JFFS2_FS_WBUF_VERIFY is not set -CONFIG_JFFS2_SUMMARY=y -CONFIG_JFFS2_FS_XATTR=y -CONFIG_JFFS2_FS_POSIX_ACL=y -CONFIG_JFFS2_FS_SECURITY=y -CONFIG_JFFS2_COMPRESSION_OPTIONS=y -CONFIG_JFFS2_ZLIB=y -CONFIG_JFFS2_LZO=y -CONFIG_JFFS2_RTIME=y -CONFIG_JFFS2_RUBIN=y -# CONFIG_JFFS2_CMODE_NONE is not set -CONFIG_JFFS2_CMODE_PRIORITY=y -# CONFIG_JFFS2_CMODE_SIZE is not set -# CONFIG_JFFS2_CMODE_FAVOURLZO is not set -CONFIG_UBIFS_FS=m -# CONFIG_UBIFS_FS_XATTR is not set -# CONFIG_UBIFS_FS_ADVANCED_COMPR is not set -CONFIG_UBIFS_FS_LZO=y -CONFIG_UBIFS_FS_ZLIB=y -# CONFIG_UBIFS_FS_DEBUG is not set -CONFIG_CRAMFS=m -CONFIG_SQUASHFS=m -# CONFIG_SQUASHFS_EMBEDDED is not set -CONFIG_SQUASHFS_FRAGMENT_CACHE_SIZE=3 -# CONFIG_VXFS_FS is not set -CONFIG_MINIX_FS=m -CONFIG_OMFS_FS=m -CONFIG_HPFS_FS=m -# CONFIG_QNX4FS_FS is not set -CONFIG_ROMFS_FS=m -CONFIG_ROMFS_BACKED_BY_BLOCK=y -# CONFIG_ROMFS_BACKED_BY_MTD is not set -# CONFIG_ROMFS_BACKED_BY_BOTH is not set -CONFIG_ROMFS_ON_BLOCK=y -CONFIG_SYSV_FS=m -CONFIG_UFS_FS=m -# CONFIG_UFS_FS_WRITE is not set -# CONFIG_UFS_DEBUG is not set -CONFIG_EXOFS_FS=m -# CONFIG_EXOFS_DEBUG is not set -CONFIG_NETWORK_FILESYSTEMS=y -CONFIG_NFS_FS=m -CONFIG_NFS_V3=y -# CONFIG_NFS_V3_ACL is not set -CONFIG_NFS_V4=y -# CONFIG_NFS_V4_1 is not set -# CONFIG_NFS_FSCACHE is not set -CONFIG_NFSD=m -CONFIG_NFSD_V3=y -# CONFIG_NFSD_V3_ACL is not set -CONFIG_NFSD_V4=y -CONFIG_LOCKD=m -CONFIG_LOCKD_V4=y -CONFIG_EXPORTFS=m -CONFIG_NFS_COMMON=y -CONFIG_SUNRPC=m -CONFIG_SUNRPC_GSS=m -CONFIG_SUNRPC_XPRT_RDMA=m -CONFIG_RPCSEC_GSS_KRB5=m -# CONFIG_RPCSEC_GSS_SPKM3 is not set -# CONFIG_SMB_FS is not set -CONFIG_CIFS=m -# CONFIG_CIFS_STATS is not set -# CONFIG_CIFS_WEAK_PW_HASH is not set -# CONFIG_CIFS_UPCALL is not set -CONFIG_CIFS_XATTR=y -CONFIG_CIFS_POSIX=y -# CONFIG_CIFS_DEBUG2 is not set -CONFIG_CIFS_DFS_UPCALL=y -CONFIG_CIFS_EXPERIMENTAL=y -# CONFIG_NCP_FS is not set -# CONFIG_CODA_FS is not set -# CONFIG_AFS_FS is not set -# CONFIG_9P_FS is not set - -# -# Partition Types -# -CONFIG_PARTITION_ADVANCED=y -# CONFIG_ACORN_PARTITION is not set -# CONFIG_OSF_PARTITION is not set -# CONFIG_AMIGA_PARTITION is not set -# CONFIG_ATARI_PARTITION is not set -# CONFIG_MAC_PARTITION is not set -CONFIG_MSDOS_PARTITION=y -# CONFIG_BSD_DISKLABEL is not set -# CONFIG_MINIX_SUBPARTITION is not set -# CONFIG_SOLARIS_X86_PARTITION is not set -# CONFIG_UNIXWARE_DISKLABEL is not set -# CONFIG_LDM_PARTITION is not set -# CONFIG_SGI_PARTITION is not set -# CONFIG_ULTRIX_PARTITION is not set -# CONFIG_SUN_PARTITION is not set -# CONFIG_KARMA_PARTITION is not set -CONFIG_EFI_PARTITION=y -# CONFIG_SYSV68_PARTITION is not set -CONFIG_NLS=m -CONFIG_NLS_DEFAULT="iso8859-1" -CONFIG_NLS_CODEPAGE_437=m -CONFIG_NLS_CODEPAGE_737=m -CONFIG_NLS_CODEPAGE_775=m -CONFIG_NLS_CODEPAGE_850=m -CONFIG_NLS_CODEPAGE_852=m -CONFIG_NLS_CODEPAGE_855=m -CONFIG_NLS_CODEPAGE_857=m -CONFIG_NLS_CODEPAGE_860=m -CONFIG_NLS_CODEPAGE_861=m -CONFIG_NLS_CODEPAGE_862=m -CONFIG_NLS_CODEPAGE_863=m -CONFIG_NLS_CODEPAGE_864=m -CONFIG_NLS_CODEPAGE_865=m -CONFIG_NLS_CODEPAGE_866=m -CONFIG_NLS_CODEPAGE_869=m -CONFIG_NLS_CODEPAGE_936=m -CONFIG_NLS_CODEPAGE_950=m -CONFIG_NLS_CODEPAGE_932=m -CONFIG_NLS_CODEPAGE_949=m -CONFIG_NLS_CODEPAGE_874=m -CONFIG_NLS_ISO8859_8=m -CONFIG_NLS_CODEPAGE_1250=m -CONFIG_NLS_CODEPAGE_1251=m -CONFIG_NLS_ASCII=m -CONFIG_NLS_ISO8859_1=m -CONFIG_NLS_ISO8859_2=m -CONFIG_NLS_ISO8859_3=m -CONFIG_NLS_ISO8859_4=m -CONFIG_NLS_ISO8859_5=m -CONFIG_NLS_ISO8859_6=m -CONFIG_NLS_ISO8859_7=m -CONFIG_NLS_ISO8859_9=m -CONFIG_NLS_ISO8859_13=m -CONFIG_NLS_ISO8859_14=m -CONFIG_NLS_ISO8859_15=m -CONFIG_NLS_KOI8_R=m -CONFIG_NLS_KOI8_U=m -CONFIG_NLS_UTF8=m -CONFIG_DLM=m -# CONFIG_DLM_DEBUG is not set - -# -# Kernel hacking -# -CONFIG_TRACE_IRQFLAGS_SUPPORT=y -CONFIG_PRINTK_TIME=y -CONFIG_ENABLE_WARN_DEPRECATED=y -# CONFIG_ENABLE_MUST_CHECK is not set -CONFIG_FRAME_WARN=1024 -# CONFIG_MAGIC_SYSRQ is not set -# CONFIG_STRIP_ASM_SYMS is not set -# CONFIG_UNUSED_SYMBOLS is not set -CONFIG_DEBUG_FS=y -# CONFIG_HEADERS_CHECK is not set -# CONFIG_DEBUG_KERNEL is not set -# CONFIG_DEBUG_BUGVERBOSE is not set -# CONFIG_DEBUG_MEMORY_INIT is not set -CONFIG_ARCH_WANT_FRAME_POINTERS=y -CONFIG_FRAME_POINTER=y -# CONFIG_RCU_CPU_STALL_DETECTOR is not set -# CONFIG_LATENCYTOP is not set -CONFIG_SYSCTL_SYSCALL_CHECK=y -CONFIG_USER_STACKTRACE_SUPPORT=y -CONFIG_HAVE_FUNCTION_TRACER=y -CONFIG_HAVE_FUNCTION_GRAPH_TRACER=y -CONFIG_HAVE_FUNCTION_GRAPH_FP_TEST=y -CONFIG_HAVE_FUNCTION_TRACE_MCOUNT_TEST=y -CONFIG_HAVE_DYNAMIC_FTRACE=y -CONFIG_HAVE_FTRACE_MCOUNT_RECORD=y -CONFIG_HAVE_SYSCALL_TRACEPOINTS=y -CONFIG_RING_BUFFER=y -CONFIG_RING_BUFFER_ALLOW_SWAP=y -CONFIG_TRACING_SUPPORT=y -# CONFIG_FTRACE is not set -# CONFIG_PROVIDE_OHCI1394_DMA_INIT is not set -# CONFIG_FIREWIRE_OHCI_REMOTE_DMA is not set -# CONFIG_DYNAMIC_DEBUG is not set -# CONFIG_DMA_API_DEBUG is not set -# CONFIG_SAMPLES is not set -CONFIG_HAVE_ARCH_KGDB=y -CONFIG_HAVE_ARCH_KMEMCHECK=y -CONFIG_STRICT_DEVMEM=y -# CONFIG_X86_VERBOSE_BOOTUP is not set -# CONFIG_EARLY_PRINTK is not set -# CONFIG_IOMMU_STRESS is not set -CONFIG_HAVE_MMIOTRACE_SUPPORT=y -CONFIG_IO_DELAY_TYPE_0X80=0 -CONFIG_IO_DELAY_TYPE_0XED=1 -CONFIG_IO_DELAY_TYPE_UDELAY=2 -CONFIG_IO_DELAY_TYPE_NONE=3 -CONFIG_IO_DELAY_0X80=y -# CONFIG_IO_DELAY_0XED is not set -# CONFIG_IO_DELAY_UDELAY is not set -# CONFIG_IO_DELAY_NONE is not set -CONFIG_DEFAULT_IO_DELAY_TYPE=0 -# CONFIG_OPTIMIZE_INLINING is not set - -# -# Security options -# -CONFIG_KEYS=y -# CONFIG_KEYS_DEBUG_PROC_KEYS is not set -CONFIG_SECURITY=y -CONFIG_SECURITYFS=y -# CONFIG_SECURITY_NETWORK is not set -# CONFIG_SECURITY_PATH is not set -# CONFIG_SECURITY_FILE_CAPABILITIES is not set -# CONFIG_SECURITY_TOMOYO is not set -# CONFIG_IMA is not set -CONFIG_XOR_BLOCKS=m -CONFIG_ASYNC_CORE=m -CONFIG_ASYNC_MEMCPY=m -CONFIG_ASYNC_XOR=m -CONFIG_ASYNC_PQ=m -CONFIG_ASYNC_RAID6_RECOV=m -CONFIG_ASYNC_TX_DISABLE_PQ_VAL_DMA=y -CONFIG_ASYNC_TX_DISABLE_XOR_VAL_DMA=y -CONFIG_CRYPTO=y - -# -# Crypto core or helper -# -CONFIG_CRYPTO_FIPS=y -CONFIG_CRYPTO_ALGAPI=y -CONFIG_CRYPTO_ALGAPI2=y -CONFIG_CRYPTO_AEAD=m -CONFIG_CRYPTO_AEAD2=y -CONFIG_CRYPTO_BLKCIPHER=m -CONFIG_CRYPTO_BLKCIPHER2=y -CONFIG_CRYPTO_HASH=y -CONFIG_CRYPTO_HASH2=y -CONFIG_CRYPTO_RNG=m -CONFIG_CRYPTO_RNG2=y -CONFIG_CRYPTO_PCOMP=y -CONFIG_CRYPTO_MANAGER=m -CONFIG_CRYPTO_MANAGER2=y -CONFIG_CRYPTO_GF128MUL=m -CONFIG_CRYPTO_NULL=m -CONFIG_CRYPTO_WORKQUEUE=y -CONFIG_CRYPTO_CRYPTD=m -CONFIG_CRYPTO_AUTHENC=m -CONFIG_CRYPTO_TEST=m - -# -# Authenticated Encryption with Associated Data -# -CONFIG_CRYPTO_CCM=m -CONFIG_CRYPTO_GCM=m -CONFIG_CRYPTO_SEQIV=m - -# -# Block modes -# -CONFIG_CRYPTO_CBC=m -CONFIG_CRYPTO_CTR=m -CONFIG_CRYPTO_CTS=m -CONFIG_CRYPTO_ECB=m -CONFIG_CRYPTO_LRW=m -CONFIG_CRYPTO_PCBC=m -CONFIG_CRYPTO_XTS=m - -# -# Hash modes -# -CONFIG_CRYPTO_HMAC=m -CONFIG_CRYPTO_XCBC=m -CONFIG_CRYPTO_VMAC=m - -# -# Digest -# -CONFIG_CRYPTO_CRC32C=m -CONFIG_CRYPTO_CRC32C_INTEL=m -CONFIG_CRYPTO_GHASH=m -CONFIG_CRYPTO_MD4=m -CONFIG_CRYPTO_MD5=y -CONFIG_CRYPTO_MICHAEL_MIC=m -CONFIG_CRYPTO_RMD128=m -CONFIG_CRYPTO_RMD160=m -CONFIG_CRYPTO_RMD256=m -CONFIG_CRYPTO_RMD320=m -CONFIG_CRYPTO_SHA1=m -CONFIG_CRYPTO_SHA256=y -CONFIG_CRYPTO_SHA512=m -CONFIG_CRYPTO_TGR192=m -CONFIG_CRYPTO_WP512=m - -# -# Ciphers -# -CONFIG_CRYPTO_AES=m -# CONFIG_CRYPTO_AES_X86_64 is not set -# CONFIG_CRYPTO_AES_NI_INTEL is not set -CONFIG_CRYPTO_ANUBIS=m -CONFIG_CRYPTO_ARC4=m -CONFIG_CRYPTO_BLOWFISH=m -CONFIG_CRYPTO_CAMELLIA=m -CONFIG_CRYPTO_CAST5=m -CONFIG_CRYPTO_CAST6=m -CONFIG_CRYPTO_DES=m -CONFIG_CRYPTO_FCRYPT=m -CONFIG_CRYPTO_KHAZAD=m -CONFIG_CRYPTO_SALSA20=m -# CONFIG_CRYPTO_SALSA20_X86_64 is not set -CONFIG_CRYPTO_SEED=m -CONFIG_CRYPTO_SERPENT=m -CONFIG_CRYPTO_TEA=m -CONFIG_CRYPTO_TWOFISH=m -CONFIG_CRYPTO_TWOFISH_COMMON=m -# CONFIG_CRYPTO_TWOFISH_X86_64 is not set - -# -# Compression -# -CONFIG_CRYPTO_DEFLATE=m -CONFIG_CRYPTO_ZLIB=m -CONFIG_CRYPTO_LZO=m - -# -# Random Number Generation -# -CONFIG_CRYPTO_ANSI_CPRNG=m -CONFIG_CRYPTO_HW=y -CONFIG_CRYPTO_DEV_PADLOCK=m -CONFIG_CRYPTO_DEV_PADLOCK_AES=m -CONFIG_CRYPTO_DEV_PADLOCK_SHA=m -CONFIG_CRYPTO_DEV_HIFN_795X=m -CONFIG_CRYPTO_DEV_HIFN_795X_RNG=y -CONFIG_HAVE_KVM=y -CONFIG_HAVE_KVM_IRQCHIP=y -CONFIG_HAVE_KVM_EVENTFD=y -CONFIG_KVM_APIC_ARCHITECTURE=y -CONFIG_VIRTUALIZATION=y -CONFIG_KVM=m -CONFIG_KVM_INTEL=m -CONFIG_KVM_AMD=m -CONFIG_VIRTIO=m -CONFIG_VIRTIO_RING=m -CONFIG_VIRTIO_PCI=m -CONFIG_VIRTIO_BALLOON=m -# CONFIG_BINARY_PRINTF is not set - -# -# Library routines -# -CONFIG_BITREVERSE=y -CONFIG_GENERIC_FIND_FIRST_BIT=y -CONFIG_GENERIC_FIND_NEXT_BIT=y -CONFIG_GENERIC_FIND_LAST_BIT=y -CONFIG_CRC_CCITT=m -CONFIG_CRC16=m -CONFIG_CRC_T10DIF=m -CONFIG_CRC_ITU_T=m -CONFIG_CRC32=y -CONFIG_CRC7=m -CONFIG_LIBCRC32C=m -CONFIG_ZLIB_INFLATE=y -CONFIG_ZLIB_DEFLATE=m -CONFIG_LZO_COMPRESS=m -CONFIG_LZO_DECOMPRESS=m -CONFIG_DECOMPRESS_GZIP=y -CONFIG_DECOMPRESS_BZIP2=y -CONFIG_DECOMPRESS_LZMA=y -CONFIG_GENERIC_ALLOCATOR=y -CONFIG_REED_SOLOMON=m -CONFIG_REED_SOLOMON_DEC16=y -CONFIG_TEXTSEARCH=y -CONFIG_TEXTSEARCH_KMP=m -CONFIG_TEXTSEARCH_BM=m -CONFIG_TEXTSEARCH_FSM=m -CONFIG_HAS_IOMEM=y -CONFIG_HAS_IOPORT=y -CONFIG_HAS_DMA=y -CONFIG_CHECK_SIGNATURE=y -CONFIG_NLATTR=y diff --git a/testing/linux-xen0/pvops.patch b/testing/linux-xen0/pvops.patch deleted file mode 100644 index 49969705b..000000000 --- a/testing/linux-xen0/pvops.patch +++ /dev/null @@ -1,37837 +0,0 @@ -diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt -index 5f6aa11..9ec8558 100644 ---- a/Documentation/kernel-parameters.txt -+++ b/Documentation/kernel-parameters.txt -@@ -113,6 +113,7 @@ parameter is applicable: - More X86-64 boot options can be found in - Documentation/x86/x86_64/boot-options.txt . - X86 Either 32bit or 64bit x86 (same as X86-32+X86-64) -+ XEN Xen support is enabled - - In addition, the following text indicates that the option: - -@@ -2760,6 +2761,18 @@ and is between 256 and 4096 characters. It is defined in the file - xd= [HW,XT] Original XT pre-IDE (RLL encoded) disks. - xd_geo= See header of drivers/block/xd.c. - -+ xen_emul_unplug= [HW,X86,XEN] -+ Unplug Xen emulated devices -+ Format: [unplug0,][unplug1] -+ ide-disks -- unplug primary master IDE devices -+ aux-ide-disks -- unplug non-primary-master IDE devices -+ nics -- unplug network devices -+ all -- unplug all emulated devices (NICs and IDE disks) -+ unnecessary -- unplugging emulated devices is -+ unnecessary even if the host did not respond to -+ the unplug protocol -+ never -- do not unplug even if version check succeeds -+ - xirc2ps_cs= [NET,PCMCIA] - Format: - <irq>,<irq_mask>,<io>,<full_duplex>,<do_sound>,<lockup_hack>[,<irq2>[,<irq3>[,<irq4>]]] -diff --git a/Documentation/x86/x86_64/boot-options.txt b/Documentation/x86/x86_64/boot-options.txt -index 29a6ff8..81f9b94 100644 ---- a/Documentation/x86/x86_64/boot-options.txt -+++ b/Documentation/x86/x86_64/boot-options.txt -@@ -267,10 +267,14 @@ IOMMU (input/output memory management unit) - - iommu options only relevant to the software bounce buffering (SWIOTLB) IOMMU - implementation: -- swiotlb=<pages>[,force] -+ swiotlb=[npages=<pages>] -+ swiotlb=[force] -+ swiotlb=[overflow=<size>] -+ - <pages> Prereserve that many 128K pages for the software IO - bounce buffering. - force Force all IO through the software TLB. -+ <size> Size in bytes of the overflow buffer. - - Settings for the IBM Calgary hardware IOMMU currently found in IBM - pSeries and xSeries machines: -diff --git a/arch/ia64/include/asm/dma-mapping.h b/arch/ia64/include/asm/dma-mapping.h -index 8d3c79c..7d09a09 100644 ---- a/arch/ia64/include/asm/dma-mapping.h -+++ b/arch/ia64/include/asm/dma-mapping.h -@@ -73,7 +73,7 @@ static inline bool dma_capable(struct device *dev, dma_addr_t addr, size_t size) - if (!dev->dma_mask) - return 0; - -- return addr + size <= *dev->dma_mask; -+ return addr + size - 1 <= *dev->dma_mask; - } - - static inline dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr) -diff --git a/arch/ia64/include/asm/swiotlb.h b/arch/ia64/include/asm/swiotlb.h -index dcbaea7..f0acde6 100644 ---- a/arch/ia64/include/asm/swiotlb.h -+++ b/arch/ia64/include/asm/swiotlb.h -@@ -4,8 +4,6 @@ - #include <linux/dma-mapping.h> - #include <linux/swiotlb.h> - --extern int swiotlb_force; -- - #ifdef CONFIG_SWIOTLB - extern int swiotlb; - extern void pci_swiotlb_init(void); -diff --git a/arch/ia64/include/asm/xen/events.h b/arch/ia64/include/asm/xen/events.h -index b8370c8..baa74c8 100644 ---- a/arch/ia64/include/asm/xen/events.h -+++ b/arch/ia64/include/asm/xen/events.h -@@ -36,10 +36,6 @@ static inline int xen_irqs_disabled(struct pt_regs *regs) - return !(ia64_psr(regs)->i); - } - --static inline void handle_irq(int irq, struct pt_regs *regs) --{ -- __do_IRQ(irq); --} - #define irq_ctx_init(cpu) do { } while (0) - - #endif /* _ASM_IA64_XEN_EVENTS_H */ -diff --git a/arch/ia64/kernel/pci-swiotlb.c b/arch/ia64/kernel/pci-swiotlb.c -index 285aae8..53292ab 100644 ---- a/arch/ia64/kernel/pci-swiotlb.c -+++ b/arch/ia64/kernel/pci-swiotlb.c -@@ -41,7 +41,7 @@ struct dma_map_ops swiotlb_dma_ops = { - void __init swiotlb_dma_init(void) - { - dma_ops = &swiotlb_dma_ops; -- swiotlb_init(); -+ swiotlb_init(1); - } - - void __init pci_swiotlb_init(void) -@@ -51,7 +51,7 @@ void __init pci_swiotlb_init(void) - swiotlb = 1; - printk(KERN_INFO "PCI-DMA: Re-initialize machine vector.\n"); - machvec_init("dig"); -- swiotlb_init(); -+ swiotlb_init(1); - dma_ops = &swiotlb_dma_ops; - #else - panic("Unable to find Intel IOMMU"); -diff --git a/arch/powerpc/include/asm/dma-mapping.h b/arch/powerpc/include/asm/dma-mapping.h -index e281dae..80a973b 100644 ---- a/arch/powerpc/include/asm/dma-mapping.h -+++ b/arch/powerpc/include/asm/dma-mapping.h -@@ -197,7 +197,7 @@ static inline bool dma_capable(struct device *dev, dma_addr_t addr, size_t size) - if (!dev->dma_mask) - return 0; - -- return addr + size <= *dev->dma_mask; -+ return addr + size - 1 <= *dev->dma_mask; - } - - static inline dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr) -diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c -index 53bcf3d..b152de3 100644 ---- a/arch/powerpc/kernel/setup_32.c -+++ b/arch/powerpc/kernel/setup_32.c -@@ -345,7 +345,7 @@ void __init setup_arch(char **cmdline_p) - - #ifdef CONFIG_SWIOTLB - if (ppc_swiotlb_enable) -- swiotlb_init(); -+ swiotlb_init(1); - #endif - - paging_init(); -diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c -index 04f638d..df2c9e9 100644 ---- a/arch/powerpc/kernel/setup_64.c -+++ b/arch/powerpc/kernel/setup_64.c -@@ -550,7 +550,7 @@ void __init setup_arch(char **cmdline_p) - - #ifdef CONFIG_SWIOTLB - if (ppc_swiotlb_enable) -- swiotlb_init(); -+ swiotlb_init(1); - #endif - - paging_init(); -diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig -index cb5a57c..a3b7475 100644 ---- a/arch/x86/Kconfig -+++ b/arch/x86/Kconfig -@@ -1885,6 +1885,10 @@ config PCI_OLPC - def_bool y - depends on PCI && OLPC && (PCI_GOOLPC || PCI_GOANY) - -+config PCI_XEN -+ bool -+ select SWIOTLB -+ - config PCI_DOMAINS - def_bool y - depends on PCI -diff --git a/arch/x86/include/asm/amd_iommu.h b/arch/x86/include/asm/amd_iommu.h -index 18aa3f8..4413ba4 100644 ---- a/arch/x86/include/asm/amd_iommu.h -+++ b/arch/x86/include/asm/amd_iommu.h -@@ -23,20 +23,16 @@ - #include <linux/irqreturn.h> - - #ifdef CONFIG_AMD_IOMMU --extern int amd_iommu_init(void); - extern int amd_iommu_init_dma_ops(void); - extern int amd_iommu_init_passthrough(void); - extern void amd_iommu_detect(void); - extern irqreturn_t amd_iommu_int_handler(int irq, void *data); - extern void amd_iommu_flush_all_domains(void); - extern void amd_iommu_flush_all_devices(void); --extern void amd_iommu_shutdown(void); - extern void amd_iommu_apply_erratum_63(u16 devid); - extern void amd_iommu_init_api(void); - #else --static inline int amd_iommu_init(void) { return -ENODEV; } - static inline void amd_iommu_detect(void) { } --static inline void amd_iommu_shutdown(void) { } - #endif - - #endif /* _ASM_X86_AMD_IOMMU_H */ -diff --git a/arch/x86/include/asm/calgary.h b/arch/x86/include/asm/calgary.h -index b03bedb..0918654 100644 ---- a/arch/x86/include/asm/calgary.h -+++ b/arch/x86/include/asm/calgary.h -@@ -62,10 +62,8 @@ struct cal_chipset_ops { - extern int use_calgary; - - #ifdef CONFIG_CALGARY_IOMMU --extern int calgary_iommu_init(void); - extern void detect_calgary(void); - #else --static inline int calgary_iommu_init(void) { return 1; } - static inline void detect_calgary(void) { return; } - #endif - -diff --git a/arch/x86/include/asm/dma-mapping.h b/arch/x86/include/asm/dma-mapping.h -index 6a25d5d..ac91eed 100644 ---- a/arch/x86/include/asm/dma-mapping.h -+++ b/arch/x86/include/asm/dma-mapping.h -@@ -20,7 +20,8 @@ - # define ISA_DMA_BIT_MASK DMA_BIT_MASK(32) - #endif - --extern dma_addr_t bad_dma_address; -+#define DMA_ERROR_CODE 0 -+ - extern int iommu_merge; - extern struct device x86_dma_fallback_dev; - extern int panic_on_overflow; -@@ -48,7 +49,7 @@ static inline int dma_mapping_error(struct device *dev, dma_addr_t dma_addr) - if (ops->mapping_error) - return ops->mapping_error(dev, dma_addr); - -- return (dma_addr == bad_dma_address); -+ return (dma_addr == DMA_ERROR_CODE); - } - - #define dma_alloc_noncoherent(d, s, h, f) dma_alloc_coherent(d, s, h, f) -@@ -66,7 +67,7 @@ static inline bool dma_capable(struct device *dev, dma_addr_t addr, size_t size) - if (!dev->dma_mask) - return 0; - -- return addr + size <= *dev->dma_mask; -+ return addr + size - 1 <= *dev->dma_mask; - } - - static inline dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr) -diff --git a/arch/x86/include/asm/e820.h b/arch/x86/include/asm/e820.h -index 40b4e61..fa3fd43 100644 ---- a/arch/x86/include/asm/e820.h -+++ b/arch/x86/include/asm/e820.h -@@ -109,6 +109,8 @@ extern void reserve_early(u64 start, u64 end, char *name); - extern void reserve_early_overlap_ok(u64 start, u64 end, char *name); - extern void free_early(u64 start, u64 end); - extern void early_res_to_bootmem(u64 start, u64 end); -+extern u64 early_res_next_free(u64 start); -+extern u64 early_res_next_reserved(u64 addr, u64 max); - extern u64 early_reserve_e820(u64 startt, u64 sizet, u64 align); - - extern unsigned long e820_end_of_ram_pfn(void); -diff --git a/arch/x86/include/asm/gart.h b/arch/x86/include/asm/gart.h -index 6cfdafa..4ac5b0f 100644 ---- a/arch/x86/include/asm/gart.h -+++ b/arch/x86/include/asm/gart.h -@@ -35,8 +35,7 @@ extern int gart_iommu_aperture_allowed; - extern int gart_iommu_aperture_disabled; - - extern void early_gart_iommu_check(void); --extern void gart_iommu_init(void); --extern void gart_iommu_shutdown(void); -+extern int gart_iommu_init(void); - extern void __init gart_parse_options(char *); - extern void gart_iommu_hole_init(void); - -@@ -48,12 +47,6 @@ extern void gart_iommu_hole_init(void); - static inline void early_gart_iommu_check(void) - { - } --static inline void gart_iommu_init(void) --{ --} --static inline void gart_iommu_shutdown(void) --{ --} - static inline void gart_parse_options(char *options) - { - } -diff --git a/arch/x86/include/asm/hpet.h b/arch/x86/include/asm/hpet.h -index 3251e23..fa152cb 100644 ---- a/arch/x86/include/asm/hpet.h -+++ b/arch/x86/include/asm/hpet.h -@@ -68,6 +68,7 @@ extern unsigned long force_hpet_address; - extern int hpet_force_user; - extern u8 hpet_msi_disable; - extern int is_hpet_enabled(void); -+extern int disable_hpet(char *); - extern int hpet_enable(void); - extern void hpet_disable(void); - extern unsigned long hpet_readl(unsigned long a); -@@ -108,6 +109,7 @@ extern void hpet_unregister_irq_handler(rtc_irq_handler handler); - #else /* CONFIG_HPET_TIMER */ - - static inline int hpet_enable(void) { return 0; } -+static inline int disable_hpet(char *s) { return 0; } - static inline int is_hpet_enabled(void) { return 0; } - #define hpet_readl(a) 0 - -diff --git a/arch/x86/include/asm/hugetlb.h b/arch/x86/include/asm/hugetlb.h -index 439a9ac..bf88684 100644 ---- a/arch/x86/include/asm/hugetlb.h -+++ b/arch/x86/include/asm/hugetlb.h -@@ -36,16 +36,28 @@ static inline void hugetlb_free_pgd_range(struct mmu_gather *tlb, - free_pgd_range(tlb, addr, end, floor, ceiling); - } - -+static inline pte_t huge_ptep_get(pte_t *ptep) -+{ -+ return *ptep; -+} -+ - static inline void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, - pte_t *ptep, pte_t pte) - { -- set_pte_at(mm, addr, ptep, pte); -+#if PAGETABLE_LEVELS >= 3 -+ set_pmd((pmd_t *)ptep, native_make_pmd(native_pte_val(pte))); -+#else -+ set_pgd((pgd_t *)ptep, native_make_pgd(native_pte_val(pte))); -+#endif - } - - static inline pte_t huge_ptep_get_and_clear(struct mm_struct *mm, - unsigned long addr, pte_t *ptep) - { -- return ptep_get_and_clear(mm, addr, ptep); -+ pte_t pte = huge_ptep_get(ptep); -+ -+ set_huge_pte_at(mm, addr, ptep, __pte(0)); -+ return pte; - } - - static inline void huge_ptep_clear_flush(struct vm_area_struct *vma, -@@ -66,19 +78,25 @@ static inline pte_t huge_pte_wrprotect(pte_t pte) - static inline void huge_ptep_set_wrprotect(struct mm_struct *mm, - unsigned long addr, pte_t *ptep) - { -- ptep_set_wrprotect(mm, addr, ptep); -+ pte_t pte = huge_ptep_get(ptep); -+ -+ pte = pte_wrprotect(pte); -+ set_huge_pte_at(mm, addr, ptep, pte); - } - - static inline int huge_ptep_set_access_flags(struct vm_area_struct *vma, - unsigned long addr, pte_t *ptep, - pte_t pte, int dirty) - { -- return ptep_set_access_flags(vma, addr, ptep, pte, dirty); --} -+ pte_t oldpte = huge_ptep_get(ptep); -+ int changed = !pte_same(oldpte, pte); - --static inline pte_t huge_ptep_get(pte_t *ptep) --{ -- return *ptep; -+ if (changed && dirty) { -+ set_huge_pte_at(vma->vm_mm, addr, ptep, pte); -+ flush_tlb_page(vma, addr); -+ } -+ -+ return changed; - } - - static inline int arch_prepare_hugepage(struct page *page) -diff --git a/arch/x86/include/asm/io.h b/arch/x86/include/asm/io.h -index 6a63b86..9ad387e 100644 ---- a/arch/x86/include/asm/io.h -+++ b/arch/x86/include/asm/io.h -@@ -7,6 +7,10 @@ - #include <asm-generic/int-ll64.h> - #include <asm/page.h> - -+#include <xen/xen.h> -+ -+extern int isapnp_disable; -+ - #define build_mmio_read(name, size, type, reg, barrier) \ - static inline type name(const volatile void __iomem *addr) \ - { type ret; asm volatile("mov" size " %1,%0":reg (ret) \ -@@ -199,6 +203,18 @@ extern void __iomem *early_ioremap(resource_size_t phys_addr, - extern void __iomem *early_memremap(resource_size_t phys_addr, - unsigned long size); - extern void early_iounmap(void __iomem *addr, unsigned long size); -+extern bool is_early_ioremap_ptep(pte_t *ptep); -+ -+#ifdef CONFIG_XEN -+struct bio_vec; -+ -+extern bool xen_biovec_phys_mergeable(const struct bio_vec *vec1, -+ const struct bio_vec *vec2); -+ -+#define BIOVEC_PHYS_MERGEABLE(vec1, vec2) \ -+ (__BIOVEC_PHYS_MERGEABLE(vec1, vec2) && \ -+ (!xen_domain() || xen_biovec_phys_mergeable(vec1, vec2))) -+#endif /* CONFIG_XEN */ - - #define IO_SPACE_LIMIT 0xffff - -diff --git a/arch/x86/include/asm/io_apic.h b/arch/x86/include/asm/io_apic.h -index 5f61f6e..b852da9 100644 ---- a/arch/x86/include/asm/io_apic.h -+++ b/arch/x86/include/asm/io_apic.h -@@ -172,6 +172,7 @@ extern void mask_IO_APIC_setup(struct IO_APIC_route_entry **ioapic_entries); - extern int restore_IO_APIC_setup(struct IO_APIC_route_entry **ioapic_entries); - - extern void probe_nr_irqs_gsi(void); -+extern int get_nr_irqs_gsi(void); - - extern int setup_ioapic_entry(int apic, int irq, - struct IO_APIC_route_entry *entry, -@@ -201,4 +202,6 @@ static inline void probe_nr_irqs_gsi(void) { } - - #endif - -+void xen_io_apic_init(void); -+ - #endif /* _ASM_X86_IO_APIC_H */ -diff --git a/arch/x86/include/asm/iommu.h b/arch/x86/include/asm/iommu.h -index fd6d21b..345c99c 100644 ---- a/arch/x86/include/asm/iommu.h -+++ b/arch/x86/include/asm/iommu.h -@@ -1,8 +1,6 @@ - #ifndef _ASM_X86_IOMMU_H - #define _ASM_X86_IOMMU_H - --extern void pci_iommu_shutdown(void); --extern void no_iommu_init(void); - extern struct dma_map_ops nommu_dma_ops; - extern int force_iommu, no_iommu; - extern int iommu_detected; -diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h -index 6e90a04..ba4dc7b 100644 ---- a/arch/x86/include/asm/irq_vectors.h -+++ b/arch/x86/include/asm/irq_vectors.h -@@ -120,6 +120,12 @@ - */ - #define MCE_SELF_VECTOR 0xeb - -+#ifdef CONFIG_XEN -+/* Xen vector callback to receive events in a HVM domain */ -+#define XEN_HVM_EVTCHN_CALLBACK 0xe9 -+#endif -+ -+ - /* - * First APIC vector available to drivers: (vectors 0x30-0xee) we - * start at 0x31(0x41) to spread out vectors evenly between priority -@@ -157,6 +163,14 @@ static inline int invalid_vm86_irq(int irq) - #define CPU_VECTOR_LIMIT ( 8 * NR_CPUS ) - #define IO_APIC_VECTOR_LIMIT ( 32 * MAX_IO_APICS ) - -+#ifndef __ASSEMBLY__ -+# if defined(CONFIG_X86_IO_APIC) && defined(CONFIG_SPARSE_IRQ) -+extern int nr_dynamic_irqs; -+# else -+# define NR_DYNAMIC_IRQS 256 -+# endif -+#endif -+ - #ifdef CONFIG_X86_IO_APIC - # ifdef CONFIG_SPARSE_IRQ - # define NR_IRQS \ -@@ -165,13 +179,13 @@ static inline int invalid_vm86_irq(int irq) - (NR_VECTORS + IO_APIC_VECTOR_LIMIT)) - # else - # if NR_CPUS < MAX_IO_APICS --# define NR_IRQS (NR_VECTORS + 4*CPU_VECTOR_LIMIT) -+# define NR_IRQS (NR_VECTORS + 4*CPU_VECTOR_LIMIT) + NR_DYNAMIC_IRQS - # else --# define NR_IRQS (NR_VECTORS + IO_APIC_VECTOR_LIMIT) -+# define NR_IRQS (NR_VECTORS + IO_APIC_VECTOR_LIMIT) + NR_DYNAMIC_IRQS - # endif - # endif - #else /* !CONFIG_X86_IO_APIC: */ --# define NR_IRQS NR_IRQS_LEGACY -+# define NR_IRQS NR_IRQS_LEGACY + NR_DYNAMIC_IRQS - #endif - - #endif /* _ASM_X86_IRQ_VECTORS_H */ -diff --git a/arch/x86/include/asm/microcode.h b/arch/x86/include/asm/microcode.h -index ef51b50..e15fca1 100644 ---- a/arch/x86/include/asm/microcode.h -+++ b/arch/x86/include/asm/microcode.h -@@ -55,4 +55,13 @@ static inline struct microcode_ops * __init init_amd_microcode(void) - } - #endif - -+#ifdef CONFIG_MICROCODE_XEN -+extern struct microcode_ops * __init init_xen_microcode(void); -+#else -+static inline struct microcode_ops * __init init_xen_microcode(void) -+{ -+ return NULL; -+} -+#endif -+ - #endif /* _ASM_X86_MICROCODE_H */ -diff --git a/arch/x86/include/asm/mmu.h b/arch/x86/include/asm/mmu.h -index 80a1dee..67eaa91 100644 ---- a/arch/x86/include/asm/mmu.h -+++ b/arch/x86/include/asm/mmu.h -@@ -13,6 +13,9 @@ typedef struct { - int size; - struct mutex lock; - void *vdso; -+#ifdef CONFIG_XEN -+ int has_foreign_mappings; -+#endif - } mm_context_t; - - #ifdef CONFIG_SMP -diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h -index efb3899..e571db4 100644 ---- a/arch/x86/include/asm/paravirt.h -+++ b/arch/x86/include/asm/paravirt.h -@@ -330,11 +330,18 @@ static inline void write_idt_entry(gate_desc *dt, int entry, const gate_desc *g) - { - PVOP_VCALL3(pv_cpu_ops.write_idt_entry, dt, entry, g); - } -+ - static inline void set_iopl_mask(unsigned mask) - { - PVOP_VCALL1(pv_cpu_ops.set_iopl_mask, mask); - } - -+static inline void set_io_bitmap(struct thread_struct *thread, -+ unsigned long bytes_updated) -+{ -+ PVOP_VCALL2(pv_cpu_ops.set_io_bitmap, thread, bytes_updated); -+} -+ - /* The paravirtualized I/O functions */ - static inline void slow_down_io(void) - { -@@ -770,15 +777,28 @@ static __always_inline void __raw_spin_unlock(struct raw_spinlock *lock) - #define PV_RESTORE_REGS "popl %edx; popl %ecx;" - - /* save and restore all caller-save registers, except return value */ --#define PV_SAVE_ALL_CALLER_REGS "pushl %ecx;" --#define PV_RESTORE_ALL_CALLER_REGS "popl %ecx;" -+#define __PV_SAVE_ALL_CALLER_REGS "pushl %ecx;" -+#define __PV_RESTORE_ALL_CALLER_REGS "popl %ecx;" -+ -+#ifdef CONFIG_FRAME_POINTER -+#define PV_SAVE_ALL_CALLER_REGS \ -+ "push %ebp;" \ -+ "mov %esp, %ebp;" \ -+ __PV_SAVE_ALL_CALLER_REGS -+#define PV_RESTORE_ALL_CALLER_REGS \ -+ __PV_RESTORE_ALL_CALLER_REGS \ -+ "leave;" -+#else -+#define PV_SAVE_ALL_CALLER_REGS __PV_SAVE_ALL_CALLER_REGS -+#define PV_RESTORE_ALL_CALLER_REGS __PV_RESTORE_ALL_CALLER_REGS -+#endif - - #define PV_FLAGS_ARG "0" - #define PV_EXTRA_CLOBBERS - #define PV_VEXTRA_CLOBBERS - #else - /* save and restore all caller-save registers, except return value */ --#define PV_SAVE_ALL_CALLER_REGS \ -+#define __PV_SAVE_ALL_CALLER_REGS \ - "push %rcx;" \ - "push %rdx;" \ - "push %rsi;" \ -@@ -787,7 +807,7 @@ static __always_inline void __raw_spin_unlock(struct raw_spinlock *lock) - "push %r9;" \ - "push %r10;" \ - "push %r11;" --#define PV_RESTORE_ALL_CALLER_REGS \ -+#define __PV_RESTORE_ALL_CALLER_REGS \ - "pop %r11;" \ - "pop %r10;" \ - "pop %r9;" \ -@@ -797,6 +817,19 @@ static __always_inline void __raw_spin_unlock(struct raw_spinlock *lock) - "pop %rdx;" \ - "pop %rcx;" - -+#ifdef CONFIG_FRAME_POINTER -+#define PV_SAVE_ALL_CALLER_REGS \ -+ "push %rbp;" \ -+ "mov %rsp, %rbp;" \ -+ __PV_SAVE_ALL_CALLER_REGS -+#define PV_RESTORE_ALL_CALLER_REGS \ -+ __PV_RESTORE_ALL_CALLER_REGS \ -+ "leaveq;" -+#else -+#define PV_SAVE_ALL_CALLER_REGS __PV_SAVE_ALL_CALLER_REGS -+#define PV_RESTORE_ALL_CALLER_REGS __PV_RESTORE_ALL_CALLER_REGS -+#endif -+ - /* We save some registers, but all of them, that's too much. We clobber all - * caller saved registers but the argument parameter */ - #define PV_SAVE_REGS "pushq %%rdi;" -diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h -index 9357473..3202dcc 100644 ---- a/arch/x86/include/asm/paravirt_types.h -+++ b/arch/x86/include/asm/paravirt_types.h -@@ -135,6 +135,8 @@ struct pv_cpu_ops { - void (*load_sp0)(struct tss_struct *tss, struct thread_struct *t); - - void (*set_iopl_mask)(unsigned mask); -+ void (*set_io_bitmap)(struct thread_struct *thread, -+ unsigned long bytes_updated); - - void (*wbinvd)(void); - void (*io_delay)(void); -diff --git a/arch/x86/include/asm/pci.h b/arch/x86/include/asm/pci.h -index ada8c20..faa0af1 100644 ---- a/arch/x86/include/asm/pci.h -+++ b/arch/x86/include/asm/pci.h -@@ -21,6 +21,7 @@ struct pci_sysdata { - extern int pci_routeirq; - extern int noioapicquirk; - extern int noioapicreroute; -+extern int pci_scan_all_fns; - - /* scan a bus after allocating a pci_sysdata for it */ - extern struct pci_bus *pci_scan_bus_on_node(int busno, struct pci_ops *ops, -@@ -49,6 +50,11 @@ extern unsigned int pcibios_assign_all_busses(void); - #define pcibios_assign_all_busses() 0 - #endif - -+static inline int pcibios_scan_all_fns(struct pci_bus *bus, int devfn) -+{ -+ return pci_scan_all_fns; -+} -+ - extern unsigned long pci_mem_start; - #define PCIBIOS_MIN_IO 0x1000 - #define PCIBIOS_MIN_MEM (pci_mem_start) -@@ -87,6 +93,7 @@ extern void pci_iommu_alloc(void); - - /* MSI arch hook */ - #define arch_setup_msi_irqs arch_setup_msi_irqs -+#define arch_teardown_msi_irqs arch_teardown_msi_irqs - - #define PCI_DMA_BUS_IS_PHYS (dma_ops->is_phys) - -@@ -128,6 +135,7 @@ extern void pci_iommu_alloc(void); - #include <asm-generic/pci-dma-compat.h> - - /* generic pci stuff */ -+#define HAVE_ARCH_PCIBIOS_SCAN_ALL_FNS - #include <asm-generic/pci.h> - #define PCIBIOS_MAX_MEM_32 0xffffffff - -diff --git a/arch/x86/include/asm/pci_x86.h b/arch/x86/include/asm/pci_x86.h -index b399988..30cbf49 100644 ---- a/arch/x86/include/asm/pci_x86.h -+++ b/arch/x86/include/asm/pci_x86.h -@@ -45,6 +45,7 @@ enum pci_bf_sort_state { - extern unsigned int pcibios_max_latency; - - void pcibios_resource_survey(void); -+void pcibios_set_cache_line_size(void); - - /* pci-pc.c */ - -@@ -106,6 +107,7 @@ extern int pci_direct_probe(void); - extern void pci_direct_init(int type); - extern void pci_pcbios_init(void); - extern int pci_olpc_init(void); -+extern int pci_xen_init(void); - extern void __init dmi_check_pciprobe(void); - extern void __init dmi_check_skip_isa_align(void); - -diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h -index af6fd36..430e3cc 100644 ---- a/arch/x86/include/asm/pgtable.h -+++ b/arch/x86/include/asm/pgtable.h -@@ -15,7 +15,6 @@ - : (prot)) - - #ifndef __ASSEMBLY__ -- - /* - * ZERO_PAGE is a global shared page that is always zero: used - * for zero-mapped memory areas etc.. -@@ -26,6 +25,8 @@ extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)]; - extern spinlock_t pgd_lock; - extern struct list_head pgd_list; - -+extern struct mm_struct *pgd_page_get_mm(struct page *page); -+ - #ifdef CONFIG_PARAVIRT - #include <asm/paravirt.h> - #else /* !CONFIG_PARAVIRT */ -@@ -76,6 +77,11 @@ extern struct list_head pgd_list; - - #endif /* CONFIG_PARAVIRT */ - -+static inline pteval_t pte_flags(pte_t pte) -+{ -+ return pte_val(pte) & PTE_FLAGS_MASK; -+} -+ - /* - * The following only work if pte_present() is true. - * Undefined behaviour if not.. -@@ -397,6 +403,9 @@ static inline unsigned long pages_to_mb(unsigned long npg) - #define io_remap_pfn_range(vma, vaddr, pfn, size, prot) \ - remap_pfn_range(vma, vaddr, pfn, size, prot) - -+#define arch_vm_get_page_prot arch_vm_get_page_prot -+extern pgprot_t arch_vm_get_page_prot(unsigned vm_flags); -+ - #if PAGETABLE_LEVELS > 2 - static inline int pud_none(pud_t pud) - { -@@ -616,6 +625,9 @@ static inline void clone_pgd_range(pgd_t *dst, pgd_t *src, int count) - memcpy(dst, src, count * sizeof(pgd_t)); - } - -+int create_lookup_pte_addr(struct mm_struct *mm, -+ unsigned long address, -+ uint64_t *ptep); - - #include <asm-generic/pgtable.h> - #endif /* __ASSEMBLY__ */ -diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h -index c57a301..4e46931 100644 ---- a/arch/x86/include/asm/pgtable_64.h -+++ b/arch/x86/include/asm/pgtable_64.h -@@ -160,7 +160,7 @@ extern void cleanup_highmap(void); - #define pgtable_cache_init() do { } while (0) - #define check_pgt_cache() do { } while (0) - --#define PAGE_AGP PAGE_KERNEL_NOCACHE -+#define PAGE_AGP PAGE_KERNEL_IO_NOCACHE - #define HAVE_PAGE_AGP 1 - - /* fs/proc/kcore.c */ -diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h -index d1f4a76..a81b0ed 100644 ---- a/arch/x86/include/asm/pgtable_types.h -+++ b/arch/x86/include/asm/pgtable_types.h -@@ -265,11 +265,6 @@ static inline pteval_t native_pte_val(pte_t pte) - return pte.pte; - } - --static inline pteval_t pte_flags(pte_t pte) --{ -- return native_pte_val(pte) & PTE_FLAGS_MASK; --} -- - #define pgprot_val(x) ((x).pgprot) - #define __pgprot(x) ((pgprot_t) { (x) } ) - -diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h -index 78bb4d7..2232bd2 100644 ---- a/arch/x86/include/asm/processor.h -+++ b/arch/x86/include/asm/processor.h -@@ -551,6 +551,9 @@ static inline void native_set_iopl_mask(unsigned mask) - #endif - } - -+extern void native_set_io_bitmap(struct thread_struct *thread, -+ unsigned long updated_bytes); -+ - static inline void - native_load_sp0(struct tss_struct *tss, struct thread_struct *thread) - { -@@ -592,6 +595,7 @@ static inline void load_sp0(struct tss_struct *tss, - } - - #define set_iopl_mask native_set_iopl_mask -+#define set_io_bitmap native_set_io_bitmap - #endif /* CONFIG_PARAVIRT */ - - /* -diff --git a/arch/x86/include/asm/pvclock.h b/arch/x86/include/asm/pvclock.h -index 53235fd..daaacab 100644 ---- a/arch/x86/include/asm/pvclock.h -+++ b/arch/x86/include/asm/pvclock.h -@@ -10,5 +10,6 @@ unsigned long pvclock_tsc_khz(struct pvclock_vcpu_time_info *src); - void pvclock_read_wallclock(struct pvclock_wall_clock *wall, - struct pvclock_vcpu_time_info *vcpu, - struct timespec *ts); -+void pvclock_resume(void); - - #endif /* _ASM_X86_PVCLOCK_H */ -diff --git a/arch/x86/include/asm/setup.h b/arch/x86/include/asm/setup.h -index 18e496c..154a5f1 100644 ---- a/arch/x86/include/asm/setup.h -+++ b/arch/x86/include/asm/setup.h -@@ -95,6 +95,11 @@ void *extend_brk(size_t size, size_t align); - : : "i" (sz)); \ - } - -+/* Helper for reserving space for arrays of things */ -+#define RESERVE_BRK_ARRAY(type, name, entries) \ -+ type *name; \ -+ RESERVE_BRK(name, sizeof(type) * entries) -+ - #ifdef __i386__ - - void __init i386_start_kernel(void); -diff --git a/arch/x86/include/asm/swiotlb.h b/arch/x86/include/asm/swiotlb.h -index b9e4e20..8085277 100644 ---- a/arch/x86/include/asm/swiotlb.h -+++ b/arch/x86/include/asm/swiotlb.h -@@ -3,15 +3,16 @@ - - #include <linux/swiotlb.h> - --/* SWIOTLB interface */ -- --extern int swiotlb_force; -- - #ifdef CONFIG_SWIOTLB - extern int swiotlb; --extern void pci_swiotlb_init(void); -+extern int __init pci_swiotlb_detect(void); -+extern void __init pci_swiotlb_init(void); - #else - #define swiotlb 0 -+static inline int pci_swiotlb_detect(void) -+{ -+ return 0; -+} - static inline void pci_swiotlb_init(void) - { - } -diff --git a/arch/x86/include/asm/syscalls.h b/arch/x86/include/asm/syscalls.h -index 1bb6e39..ef0fa4d 100644 ---- a/arch/x86/include/asm/syscalls.h -+++ b/arch/x86/include/asm/syscalls.h -@@ -33,11 +33,11 @@ long sys_rt_sigreturn(struct pt_regs *); - asmlinkage int sys_set_thread_area(struct user_desc __user *); - asmlinkage int sys_get_thread_area(struct user_desc __user *); - --/* X86_32 only */ --#ifdef CONFIG_X86_32 - /* kernel/ioport.c */ --long sys_iopl(struct pt_regs *); -+asmlinkage long sys_iopl(unsigned int); - -+/* X86_32 only */ -+#ifdef CONFIG_X86_32 - /* kernel/process_32.c */ - int sys_clone(struct pt_regs *); - int sys_execve(struct pt_regs *); -@@ -68,8 +68,6 @@ int sys_vm86(struct pt_regs *); - #else /* CONFIG_X86_32 */ - - /* X86_64 only */ --/* kernel/ioport.c */ --asmlinkage long sys_iopl(unsigned int, struct pt_regs *); - - /* kernel/process_64.c */ - asmlinkage long sys_clone(unsigned long, unsigned long, -diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h -index 7f3eba0..e4fc8ea 100644 ---- a/arch/x86/include/asm/tlbflush.h -+++ b/arch/x86/include/asm/tlbflush.h -@@ -89,6 +89,10 @@ static inline void __flush_tlb_one(unsigned long addr) - - #ifndef CONFIG_SMP - -+static inline void __init init_smp_flush(void) -+{ -+} -+ - #define flush_tlb() __flush_tlb() - #define flush_tlb_all() __flush_tlb_all() - #define local_flush_tlb() __flush_tlb() -@@ -129,6 +133,8 @@ static inline void reset_lazy_tlbstate(void) - - #define local_flush_tlb() __flush_tlb() - -+extern void init_smp_flush(void); -+ - extern void flush_tlb_all(void); - extern void flush_tlb_current_task(void); - extern void flush_tlb_mm(struct mm_struct *); -diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h -index 2c756fd..d8e7145 100644 ---- a/arch/x86/include/asm/x86_init.h -+++ b/arch/x86/include/asm/x86_init.h -@@ -91,6 +91,14 @@ struct x86_init_timers { - }; - - /** -+ * struct x86_init_iommu - platform specific iommu setup -+ * @iommu_init: platform specific iommu setup -+ */ -+struct x86_init_iommu { -+ int (*iommu_init)(void); -+}; -+ -+/** - * struct x86_init_ops - functions for platform specific setup - * - */ -@@ -101,6 +109,7 @@ struct x86_init_ops { - struct x86_init_oem oem; - struct x86_init_paging paging; - struct x86_init_timers timers; -+ struct x86_init_iommu iommu; - }; - - /** -@@ -121,6 +130,7 @@ struct x86_platform_ops { - unsigned long (*calibrate_tsc)(void); - unsigned long (*get_wallclock)(void); - int (*set_wallclock)(unsigned long nowtime); -+ void (*iommu_shutdown)(void); - }; - - extern struct x86_init_ops x86_init; -diff --git a/arch/x86/include/asm/xen/hypercall.h b/arch/x86/include/asm/xen/hypercall.h -index 9c371e4..41c4be0 100644 ---- a/arch/x86/include/asm/xen/hypercall.h -+++ b/arch/x86/include/asm/xen/hypercall.h -@@ -45,6 +45,8 @@ - #include <xen/interface/xen.h> - #include <xen/interface/sched.h> - #include <xen/interface/physdev.h> -+#include <xen/interface/platform.h> -+#include <xen/interface/xen-mca.h> - - /* - * The hypercall asms have to meet several constraints: -@@ -200,6 +202,23 @@ extern struct { char _entry[32]; } hypercall_page[]; - (type)__res; \ - }) - -+static inline long -+privcmd_call(unsigned call, -+ unsigned long a1, unsigned long a2, -+ unsigned long a3, unsigned long a4, -+ unsigned long a5) -+{ -+ __HYPERCALL_DECLS; -+ __HYPERCALL_5ARG(a1, a2, a3, a4, a5); -+ -+ asm volatile("call *%[call]" -+ : __HYPERCALL_5PARAM -+ : [call] "a" (&hypercall_page[call]) -+ : __HYPERCALL_CLOBBER5); -+ -+ return (long)__res; -+} -+ - static inline int - HYPERVISOR_set_trap_table(struct trap_info *table) - { -@@ -282,6 +301,20 @@ HYPERVISOR_set_timer_op(u64 timeout) - } - - static inline int -+HYPERVISOR_mca(struct xen_mc *mc_op) -+{ -+ mc_op->interface_version = XEN_MCA_INTERFACE_VERSION; -+ return _hypercall1(int, mca, mc_op); -+} -+ -+static inline int -+HYPERVISOR_dom0_op(struct xen_platform_op *platform_op) -+{ -+ platform_op->interface_version = XENPF_INTERFACE_VERSION; -+ return _hypercall1(int, dom0_op, platform_op); -+} -+ -+static inline int - HYPERVISOR_set_debugreg(int reg, unsigned long value) - { - return _hypercall2(int, set_debugreg, reg, value); -@@ -417,6 +450,12 @@ HYPERVISOR_nmi_op(unsigned long op, unsigned long arg) - return _hypercall2(int, nmi_op, op, arg); - } - -+static inline unsigned long __must_check -+HYPERVISOR_hvm_op(int op, void *arg) -+{ -+ return _hypercall2(unsigned long, hvm_op, op, arg); -+} -+ - static inline void - MULTI_fpu_taskswitch(struct multicall_entry *mcl, int set) - { -@@ -424,6 +463,14 @@ MULTI_fpu_taskswitch(struct multicall_entry *mcl, int set) - mcl->args[0] = set; - } - -+#if defined(CONFIG_X86_64) -+#define MULTI_UVMFLAGS_INDEX 2 -+#define MULTI_UVMDOMID_INDEX 3 -+#else -+#define MULTI_UVMFLAGS_INDEX 3 -+#define MULTI_UVMDOMID_INDEX 4 -+#endif -+ - static inline void - MULTI_update_va_mapping(struct multicall_entry *mcl, unsigned long va, - pte_t new_val, unsigned long flags) -@@ -432,12 +479,11 @@ MULTI_update_va_mapping(struct multicall_entry *mcl, unsigned long va, - mcl->args[0] = va; - if (sizeof(new_val) == sizeof(long)) { - mcl->args[1] = new_val.pte; -- mcl->args[2] = flags; - } else { - mcl->args[1] = new_val.pte; - mcl->args[2] = new_val.pte >> 32; -- mcl->args[3] = flags; - } -+ mcl->args[MULTI_UVMFLAGS_INDEX] = flags; - } - - static inline void -diff --git a/arch/x86/include/asm/xen/hypervisor.h b/arch/x86/include/asm/xen/hypervisor.h -index d5b7e90..396ff4c 100644 ---- a/arch/x86/include/asm/xen/hypervisor.h -+++ b/arch/x86/include/asm/xen/hypervisor.h -@@ -37,31 +37,4 @@ - extern struct shared_info *HYPERVISOR_shared_info; - extern struct start_info *xen_start_info; - --enum xen_domain_type { -- XEN_NATIVE, /* running on bare hardware */ -- XEN_PV_DOMAIN, /* running in a PV domain */ -- XEN_HVM_DOMAIN, /* running in a Xen hvm domain */ --}; -- --#ifdef CONFIG_XEN --extern enum xen_domain_type xen_domain_type; --#else --#define xen_domain_type XEN_NATIVE --#endif -- --#define xen_domain() (xen_domain_type != XEN_NATIVE) --#define xen_pv_domain() (xen_domain() && \ -- xen_domain_type == XEN_PV_DOMAIN) --#define xen_hvm_domain() (xen_domain() && \ -- xen_domain_type == XEN_HVM_DOMAIN) -- --#ifdef CONFIG_XEN_DOM0 --#include <xen/interface/xen.h> -- --#define xen_initial_domain() (xen_pv_domain() && \ -- xen_start_info->flags & SIF_INITDOMAIN) --#else /* !CONFIG_XEN_DOM0 */ --#define xen_initial_domain() (0) --#endif /* CONFIG_XEN_DOM0 */ -- - #endif /* _ASM_X86_XEN_HYPERVISOR_H */ -diff --git a/arch/x86/include/asm/xen/interface.h b/arch/x86/include/asm/xen/interface.h -index e8506c1..9539998 100644 ---- a/arch/x86/include/asm/xen/interface.h -+++ b/arch/x86/include/asm/xen/interface.h -@@ -61,9 +61,9 @@ DEFINE_GUEST_HANDLE(void); - #define HYPERVISOR_VIRT_START mk_unsigned_long(__HYPERVISOR_VIRT_START) - #endif - --#ifndef machine_to_phys_mapping --#define machine_to_phys_mapping ((unsigned long *)HYPERVISOR_VIRT_START) --#endif -+#define MACH2PHYS_VIRT_START mk_unsigned_long(__MACH2PHYS_VIRT_START) -+#define MACH2PHYS_VIRT_END mk_unsigned_long(__MACH2PHYS_VIRT_END) -+#define MACH2PHYS_NR_ENTRIES ((MACH2PHYS_VIRT_END-MACH2PHYS_VIRT_START)>>__MACH2PHYS_SHIFT) - - /* Maximum number of virtual CPUs in multi-processor guests. */ - #define MAX_VIRT_CPUS 32 -@@ -97,6 +97,8 @@ DEFINE_GUEST_HANDLE(void); - #define TI_SET_IF(_ti, _if) ((_ti)->flags |= ((!!(_if))<<2)) - - #ifndef __ASSEMBLY__ -+#include <linux/types.h> -+ - struct trap_info { - uint8_t vector; /* exception vector */ - uint8_t flags; /* 0-3: privilege level; 4: clear event enable? */ -diff --git a/arch/x86/include/asm/xen/interface_32.h b/arch/x86/include/asm/xen/interface_32.h -index 42a7e00..8413688 100644 ---- a/arch/x86/include/asm/xen/interface_32.h -+++ b/arch/x86/include/asm/xen/interface_32.h -@@ -32,6 +32,11 @@ - /* And the trap vector is... */ - #define TRAP_INSTR "int $0x82" - -+#define __MACH2PHYS_VIRT_START 0xF5800000 -+#define __MACH2PHYS_VIRT_END 0xF6800000 -+ -+#define __MACH2PHYS_SHIFT 2 -+ - /* - * Virtual addresses beyond this are not modifiable by guest OSes. The - * machine->physical mapping table starts at this address, read-only. -diff --git a/arch/x86/include/asm/xen/interface_64.h b/arch/x86/include/asm/xen/interface_64.h -index 100d266..839a481 100644 ---- a/arch/x86/include/asm/xen/interface_64.h -+++ b/arch/x86/include/asm/xen/interface_64.h -@@ -39,18 +39,7 @@ - #define __HYPERVISOR_VIRT_END 0xFFFF880000000000 - #define __MACH2PHYS_VIRT_START 0xFFFF800000000000 - #define __MACH2PHYS_VIRT_END 0xFFFF804000000000 -- --#ifndef HYPERVISOR_VIRT_START --#define HYPERVISOR_VIRT_START mk_unsigned_long(__HYPERVISOR_VIRT_START) --#define HYPERVISOR_VIRT_END mk_unsigned_long(__HYPERVISOR_VIRT_END) --#endif -- --#define MACH2PHYS_VIRT_START mk_unsigned_long(__MACH2PHYS_VIRT_START) --#define MACH2PHYS_VIRT_END mk_unsigned_long(__MACH2PHYS_VIRT_END) --#define MACH2PHYS_NR_ENTRIES ((MACH2PHYS_VIRT_END-MACH2PHYS_VIRT_START)>>3) --#ifndef machine_to_phys_mapping --#define machine_to_phys_mapping ((unsigned long *)HYPERVISOR_VIRT_START) --#endif -+#define __MACH2PHYS_SHIFT 3 - - /* - * int HYPERVISOR_set_segment_base(unsigned int which, unsigned long base) -diff --git a/arch/x86/include/asm/xen/iommu.h b/arch/x86/include/asm/xen/iommu.h -new file mode 100644 -index 0000000..75df312 ---- /dev/null -+++ b/arch/x86/include/asm/xen/iommu.h -@@ -0,0 +1,12 @@ -+#ifndef ASM_X86__XEN_IOMMU_H -+ -+#ifdef CONFIG_PCI_XEN -+extern void xen_iommu_init(void); -+#else -+static inline void xen_iommu_init(void) -+{ -+} -+#endif -+ -+#endif -+ -diff --git a/arch/x86/include/asm/xen/page.h b/arch/x86/include/asm/xen/page.h -index 018a0a4..05c5cf5 100644 ---- a/arch/x86/include/asm/xen/page.h -+++ b/arch/x86/include/asm/xen/page.h -@@ -5,6 +5,7 @@ - #include <linux/types.h> - #include <linux/spinlock.h> - #include <linux/pfn.h> -+#include <linux/mm.h> - - #include <asm/uaccess.h> - #include <asm/page.h> -@@ -28,23 +29,32 @@ typedef struct xpaddr { - - /**** MACHINE <-> PHYSICAL CONVERSION MACROS ****/ - #define INVALID_P2M_ENTRY (~0UL) --#define FOREIGN_FRAME_BIT (1UL<<31) -+#define FOREIGN_FRAME_BIT (1UL << (sizeof(unsigned long) * 8 - 1)) - #define FOREIGN_FRAME(m) ((m) | FOREIGN_FRAME_BIT) - - /* Maximum amount of memory we can handle in a domain in pages */ - #define MAX_DOMAIN_PAGES \ - ((unsigned long)((u64)CONFIG_XEN_MAX_DOMAIN_MEMORY * 1024 * 1024 * 1024 / PAGE_SIZE)) - -+extern unsigned long *machine_to_phys_mapping; -+extern unsigned int machine_to_phys_order; - - extern unsigned long get_phys_to_machine(unsigned long pfn); --extern void set_phys_to_machine(unsigned long pfn, unsigned long mfn); -+extern bool set_phys_to_machine(unsigned long pfn, unsigned long mfn); - - static inline unsigned long pfn_to_mfn(unsigned long pfn) - { -+ unsigned long mfn; -+ - if (xen_feature(XENFEAT_auto_translated_physmap)) - return pfn; - -- return get_phys_to_machine(pfn) & ~FOREIGN_FRAME_BIT; -+ mfn = get_phys_to_machine(pfn); -+ -+ if (mfn != INVALID_P2M_ENTRY) -+ mfn &= ~FOREIGN_FRAME_BIT; -+ -+ return mfn; - } - - static inline int phys_to_machine_mapping_valid(unsigned long pfn) -@@ -62,10 +72,8 @@ static inline unsigned long mfn_to_pfn(unsigned long mfn) - if (xen_feature(XENFEAT_auto_translated_physmap)) - return mfn; - --#if 0 - if (unlikely((mfn >> machine_to_phys_order) != 0)) -- return max_mapnr; --#endif -+ return ~0; - - pfn = 0; - /* -@@ -112,13 +120,9 @@ static inline xpaddr_t machine_to_phys(xmaddr_t machine) - */ - static inline unsigned long mfn_to_local_pfn(unsigned long mfn) - { -- extern unsigned long max_mapnr; - unsigned long pfn = mfn_to_pfn(mfn); -- if ((pfn < max_mapnr) -- && !xen_feature(XENFEAT_auto_translated_physmap) -- && (get_phys_to_machine(pfn) != mfn)) -- return max_mapnr; /* force !pfn_valid() */ -- /* XXX fixme; not true with sparsemem */ -+ if (get_phys_to_machine(pfn) != mfn) -+ return -1; /* force !pfn_valid() */ - return pfn; - } - -@@ -163,6 +167,7 @@ static inline pte_t __pte_ma(pteval_t x) - - #define pgd_val_ma(x) ((x).pgd) - -+void xen_set_domain_pte(pte_t *ptep, pte_t pteval, unsigned domid); - - xmaddr_t arbitrary_virt_to_machine(void *address); - unsigned long arbitrary_virt_to_mfn(void *vaddr); -diff --git a/arch/x86/include/asm/xen/pci.h b/arch/x86/include/asm/xen/pci.h -new file mode 100644 -index 0000000..6683196 ---- /dev/null -+++ b/arch/x86/include/asm/xen/pci.h -@@ -0,0 +1,104 @@ -+#ifndef _ASM_X86_XEN_PCI_H -+#define _ASM_X86_XEN_PCI_H -+ -+#if defined(CONFIG_PCI_MSI) -+#if defined(CONFIG_PCI_XEN) -+int xen_register_pirq(u32 gsi, int triggering); -+int xen_register_gsi(u32 gsi, int triggering, int polarity); -+int xen_create_msi_irq(struct pci_dev *dev, -+ struct msi_desc *msidesc, -+ int type); -+void xen_pci_teardown_msi_dev(struct pci_dev *dev); -+void xen_pci_teardown_msi_irq(int irq); -+int xen_pci_setup_msi_irqs(struct pci_dev *dev, int nvec, int type); -+ -+/* The drivers/pci/xen-pcifront.c sets this structure to -+ * its own functions. -+ */ -+struct xen_pci_frontend_ops { -+ int (*enable_msi)(struct pci_dev *dev, int **vectors); -+ void (*disable_msi)(struct pci_dev *dev); -+ int (*enable_msix)(struct pci_dev *dev, int **vectors, int nvec); -+ void (*disable_msix)(struct pci_dev *dev); -+}; -+ -+extern struct xen_pci_frontend_ops *xen_pci_frontend; -+ -+static inline int xen_pci_frontend_enable_msi(struct pci_dev *dev, -+ int **vectors) -+{ -+ if (xen_pci_frontend && xen_pci_frontend->enable_msi) -+ return xen_pci_frontend->enable_msi(dev, vectors); -+ return -ENODEV; -+} -+static inline void xen_pci_frontend_disable_msi(struct pci_dev *dev) -+{ -+ if (xen_pci_frontend && xen_pci_frontend->disable_msi) -+ xen_pci_frontend->disable_msi(dev); -+} -+static inline int xen_pci_frontend_enable_msix(struct pci_dev *dev, -+ int **vectors, int nvec) -+{ -+ if (xen_pci_frontend && xen_pci_frontend->enable_msix) -+ return xen_pci_frontend->enable_msix(dev, vectors, nvec); -+ return -ENODEV; -+} -+static inline void xen_pci_frontend_disable_msix(struct pci_dev *dev) -+{ -+ if (xen_pci_frontend && xen_pci_frontend->disable_msix) -+ xen_pci_frontend->disable_msix(dev); -+} -+#else -+static inline int xen_create_msi_irq(struct pci_dev *dev, -+ struct msi_desc *msidesc, -+ int type) -+{ -+ return -1; -+} -+static inline void xen_pci_teardown_msi_dev(struct pci_dev *dev) { } -+static inline void xen_pci_teardown_msi_irq(int irq) { } -+static inline int xen_pci_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) -+{ -+ return -ENODEV; -+} -+#endif /* CONFIG_PCI_XEN */ -+ -+#endif /* CONFIG_PCI_MSI */ -+ -+#ifdef CONFIG_XEN_DOM0_PCI -+int xen_register_gsi(u32 gsi, int triggering, int polarity); -+int xen_find_device_domain_owner(struct pci_dev *dev); -+int xen_register_device_domain_owner(struct pci_dev *dev, uint16_t domain); -+int xen_unregister_device_domain_owner(struct pci_dev *dev); -+ -+#else -+static inline int xen_register_gsi(u32 gsi, int triggering, int polarity) -+{ -+ return -1; -+} -+ -+static inline int xen_find_device_domain_owner(struct pci_dev *dev) -+{ -+ return -1; -+} -+static inline int xen_register_device_domain_owner(struct pci_dev *dev, -+ uint16_t domain) -+{ -+ return -1; -+} -+static inline int xen_unregister_device_domain_owner(struct pci_dev *dev) -+{ -+ return -1; -+} -+#endif -+ -+#if defined(CONFIG_PCI_MSI) && defined(CONFIG_XEN_DOM0_PCI) -+int xen_setup_msi_irqs(struct pci_dev *dev, int nvec, int type); -+#else -+static inline int xen_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) -+{ -+ return -1; -+} -+#endif -+ -+#endif /* _ASM_X86_XEN_PCI_H */ -diff --git a/arch/x86/include/asm/xen/swiotlb-xen.h b/arch/x86/include/asm/xen/swiotlb-xen.h -new file mode 100644 -index 0000000..e4fe299 ---- /dev/null -+++ b/arch/x86/include/asm/xen/swiotlb-xen.h -@@ -0,0 +1,14 @@ -+#ifndef _ASM_X86_SWIOTLB_XEN_H -+#define _ASM_X86_SWIOTLB_XEN_H -+ -+#ifdef CONFIG_PCI_XEN -+extern int xen_swiotlb; -+extern int __init pci_xen_swiotlb_detect(void); -+extern void __init pci_xen_swiotlb_init(void); -+#else -+#define xen_swiotlb 0 -+static inline int __init pci_xen_swiotlb_detect(void) { return 0; } -+static inline void __init pci_xen_swiotlb_init(void) { } -+#endif -+ -+#endif /* _ASM_X86_SWIOTLB_XEN_H */ -diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile -index d1911ab..cfe00bc 100644 ---- a/arch/x86/kernel/Makefile -+++ b/arch/x86/kernel/Makefile -@@ -113,6 +113,7 @@ obj-$(CONFIG_X86_MRST) += mrst.o - microcode-y := microcode_core.o - microcode-$(CONFIG_MICROCODE_INTEL) += microcode_intel.o - microcode-$(CONFIG_MICROCODE_AMD) += microcode_amd.o -+microcode-$(CONFIG_MICROCODE_XEN) += microcode_xen.o - obj-$(CONFIG_MICROCODE) += microcode.o - - obj-$(CONFIG_X86_CHECK_BIOS_CORRUPTION) += check.o -diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c -index 23c2da8..a2a5125 100644 ---- a/arch/x86/kernel/acpi/boot.c -+++ b/arch/x86/kernel/acpi/boot.c -@@ -42,6 +42,10 @@ - #include <asm/mpspec.h> - #include <asm/smp.h> - -+#include <asm/xen/pci.h> -+ -+#include <asm/xen/hypervisor.h> -+ - static int __initdata acpi_force = 0; - u32 acpi_rsdt_forced; - int acpi_disabled; -@@ -149,6 +153,10 @@ static void __cpuinit acpi_register_lapic(int id, u8 enabled) - { - unsigned int ver = 0; - -+ /* We don't want to register lapics when in Xen dom0 */ -+ if (xen_initial_domain()) -+ return; -+ - if (!enabled) { - ++disabled_cpus; - return; -@@ -461,9 +469,13 @@ int acpi_gsi_to_irq(u32 gsi, unsigned int *irq) - */ - int acpi_register_gsi(struct device *dev, u32 gsi, int trigger, int polarity) - { -- unsigned int irq; -+ int irq; - unsigned int plat_gsi = gsi; - -+ irq = xen_register_gsi(gsi, trigger, polarity); -+ if (irq >= 0) -+ return irq; -+ - #ifdef CONFIG_PCI - /* - * Make sure all (legacy) PCI IRQs are set as level-triggered. -@@ -740,6 +752,10 @@ static int __init acpi_parse_fadt(struct acpi_table_header *table) - - static void __init acpi_register_lapic_address(unsigned long address) - { -+ /* Xen dom0 doesn't have usable lapics */ -+ if (xen_initial_domain()) -+ return; -+ - mp_lapic_addr = address; - - set_fixmap_nocache(FIX_APIC_BASE, address); -@@ -860,6 +876,9 @@ int __init acpi_probe_gsi(void) - max_gsi = gsi; - } - -+ if (xen_initial_domain()) -+ max_gsi += 255; /* Plus maximum entries of an ioapic. */ -+ - return max_gsi + 1; - } - -diff --git a/arch/x86/kernel/acpi/processor.c b/arch/x86/kernel/acpi/processor.c -index d85d1b2..8aabedd 100644 ---- a/arch/x86/kernel/acpi/processor.c -+++ b/arch/x86/kernel/acpi/processor.c -@@ -12,6 +12,8 @@ - #include <acpi/processor.h> - #include <asm/acpi.h> - -+#include <xen/xen.h> -+ - static void init_intel_pdc(struct acpi_processor *pr, struct cpuinfo_x86 *c) - { - struct acpi_object_list *obj_list; -@@ -59,7 +61,7 @@ static void init_intel_pdc(struct acpi_processor *pr, struct cpuinfo_x86 *c) - /* - * If mwait/monitor is unsupported, C2/C3_FFH will be disabled - */ -- if (!cpu_has(c, X86_FEATURE_MWAIT)) -+ if (!cpu_has(c, X86_FEATURE_MWAIT) && !xen_initial_domain()) - buf[2] &= ~(ACPI_PDC_C_C2C3_FFH); - - obj->type = ACPI_TYPE_BUFFER; -@@ -88,6 +90,19 @@ void arch_acpi_processor_init_pdc(struct acpi_processor *pr) - - EXPORT_SYMBOL(arch_acpi_processor_init_pdc); - -+/* Initialize _PDC data based on the CPU vendor */ -+void xen_arch_acpi_processor_init_pdc(struct acpi_processor *pr) -+{ -+ struct cpuinfo_x86 *c = &cpu_data(0); -+ -+ pr->pdc = NULL; -+ if (c->x86_vendor == X86_VENDOR_INTEL) -+ init_intel_pdc(pr, c); -+ -+ return; -+} -+EXPORT_SYMBOL(xen_arch_acpi_processor_init_pdc); -+ - void arch_acpi_processor_cleanup_pdc(struct acpi_processor *pr) - { - if (pr->pdc) { -diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c -index ca93638..9eff23c 100644 ---- a/arch/x86/kernel/acpi/sleep.c -+++ b/arch/x86/kernel/acpi/sleep.c -@@ -12,6 +12,8 @@ - #include <asm/segment.h> - #include <asm/desc.h> - -+#include <xen/acpi.h> -+ - #include "realmode/wakeup.h" - #include "sleep.h" - -diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c -index 7cd33f7..b8497c6 100644 ---- a/arch/x86/kernel/amd_iommu.c -+++ b/arch/x86/kernel/amd_iommu.c -@@ -928,7 +928,7 @@ static unsigned long dma_ops_alloc_addresses(struct device *dev, - } - - if (unlikely(address == -1)) -- address = bad_dma_address; -+ address = DMA_ERROR_CODE; - - WARN_ON((address + (PAGE_SIZE*pages)) > dom->aperture_size); - -@@ -1545,7 +1545,7 @@ static dma_addr_t dma_ops_domain_map(struct amd_iommu *iommu, - - pte = dma_ops_get_pte(dom, address); - if (!pte) -- return bad_dma_address; -+ return DMA_ERROR_CODE; - - __pte = paddr | IOMMU_PTE_P | IOMMU_PTE_FC; - -@@ -1626,7 +1626,7 @@ static dma_addr_t __map_single(struct device *dev, - retry: - address = dma_ops_alloc_addresses(dev, dma_dom, pages, align_mask, - dma_mask); -- if (unlikely(address == bad_dma_address)) { -+ if (unlikely(address == DMA_ERROR_CODE)) { - /* - * setting next_address here will let the address - * allocator only scan the new allocated range in the -@@ -1647,7 +1647,7 @@ retry: - start = address; - for (i = 0; i < pages; ++i) { - ret = dma_ops_domain_map(iommu, dma_dom, start, paddr, dir); -- if (ret == bad_dma_address) -+ if (ret == DMA_ERROR_CODE) - goto out_unmap; - - paddr += PAGE_SIZE; -@@ -1675,7 +1675,7 @@ out_unmap: - - dma_ops_free_addresses(dma_dom, address, pages); - -- return bad_dma_address; -+ return DMA_ERROR_CODE; - } - - /* -@@ -1692,7 +1692,7 @@ static void __unmap_single(struct amd_iommu *iommu, - dma_addr_t i, start; - unsigned int pages; - -- if ((dma_addr == bad_dma_address) || -+ if ((dma_addr == DMA_ERROR_CODE) || - (dma_addr + size > dma_dom->aperture_size)) - return; - -@@ -1735,7 +1735,7 @@ static dma_addr_t map_page(struct device *dev, struct page *page, - INC_STATS_COUNTER(cnt_map_single); - - if (!check_device(dev)) -- return bad_dma_address; -+ return DMA_ERROR_CODE; - - dma_mask = *dev->dma_mask; - -@@ -1746,12 +1746,12 @@ static dma_addr_t map_page(struct device *dev, struct page *page, - return (dma_addr_t)paddr; - - if (!dma_ops_domain(domain)) -- return bad_dma_address; -+ return DMA_ERROR_CODE; - - spin_lock_irqsave(&domain->lock, flags); - addr = __map_single(dev, iommu, domain->priv, paddr, size, dir, false, - dma_mask); -- if (addr == bad_dma_address) -+ if (addr == DMA_ERROR_CODE) - goto out; - - iommu_completion_wait(iommu); -@@ -1960,7 +1960,7 @@ static void *alloc_coherent(struct device *dev, size_t size, - *dma_addr = __map_single(dev, iommu, domain->priv, paddr, - size, DMA_BIDIRECTIONAL, true, dma_mask); - -- if (*dma_addr == bad_dma_address) { -+ if (*dma_addr == DMA_ERROR_CODE) { - spin_unlock_irqrestore(&domain->lock, flags); - goto out_free; - } -@@ -2122,8 +2122,7 @@ int __init amd_iommu_init_dma_ops(void) - prealloc_protection_domains(); - - iommu_detected = 1; -- force_iommu = 1; -- bad_dma_address = 0; -+ swiotlb = 0; - #ifdef CONFIG_GART_IOMMU - gart_iommu_aperture_disabled = 1; - gart_iommu_aperture = 0; -diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c -index 400be99..0069df5 100644 ---- a/arch/x86/kernel/amd_iommu_init.c -+++ b/arch/x86/kernel/amd_iommu_init.c -@@ -29,6 +29,7 @@ - #include <asm/amd_iommu.h> - #include <asm/iommu.h> - #include <asm/gart.h> -+#include <asm/x86_init.h> - - /* - * definitions for the ACPI scanning code -@@ -1206,19 +1207,10 @@ static struct sys_device device_amd_iommu = { - * functions. Finally it prints some information about AMD IOMMUs and - * the driver state and enables the hardware. - */ --int __init amd_iommu_init(void) -+static int __init amd_iommu_init(void) - { - int i, ret = 0; - -- -- if (no_iommu) { -- printk(KERN_INFO "AMD-Vi disabled by kernel command line\n"); -- return 0; -- } -- -- if (!amd_iommu_detected) -- return -ENODEV; -- - /* - * First parse ACPI tables to find the largest Bus/Dev/Func - * we need to handle. Upon this information the shared data -@@ -1333,6 +1325,7 @@ int __init amd_iommu_init(void) - else - printk(KERN_INFO "AMD-Vi: Lazy IO/TLB flushing enabled\n"); - -+ x86_platform.iommu_shutdown = disable_iommus; - out: - return ret; - -@@ -1361,11 +1354,6 @@ free: - goto out; - } - --void amd_iommu_shutdown(void) --{ -- disable_iommus(); --} -- - /**************************************************************************** - * - * Early detect code. This code runs at IOMMU detection time in the DMA -@@ -1380,16 +1368,13 @@ static int __init early_amd_iommu_detect(struct acpi_table_header *table) - - void __init amd_iommu_detect(void) - { -- if (swiotlb || no_iommu || (iommu_detected && !gart_iommu_aperture)) -+ if (no_iommu || (iommu_detected && !gart_iommu_aperture)) - return; - - if (acpi_table_parse("IVRS", early_amd_iommu_detect) == 0) { - iommu_detected = 1; - amd_iommu_detected = 1; --#ifdef CONFIG_GART_IOMMU -- gart_iommu_aperture_disabled = 1; -- gart_iommu_aperture = 0; --#endif -+ x86_init.iommu.iommu_init = amd_iommu_init; - } - } - -diff --git a/arch/x86/kernel/aperture_64.c b/arch/x86/kernel/aperture_64.c -index 082089e..8d34362 100644 ---- a/arch/x86/kernel/aperture_64.c -+++ b/arch/x86/kernel/aperture_64.c -@@ -28,6 +28,7 @@ - #include <asm/pci-direct.h> - #include <asm/dma.h> - #include <asm/k8.h> -+#include <asm/x86_init.h> - - int gart_iommu_aperture; - int gart_iommu_aperture_disabled __initdata; -@@ -401,6 +402,7 @@ void __init gart_iommu_hole_init(void) - - iommu_detected = 1; - gart_iommu_aperture = 1; -+ x86_init.iommu.iommu_init = gart_iommu_init; - - ctl = read_pci_config(bus, slot, 3, - AMD64_GARTAPERTURECTL); -@@ -469,7 +471,7 @@ out: - - if (aper_alloc) { - /* Got the aperture from the AGP bridge */ -- } else if (swiotlb && !valid_agp) { -+ } else if (!valid_agp) { - /* Do nothing */ - } else if ((!no_iommu && max_pfn > MAX_DMA32_PFN) || - force_iommu || -diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c -index 8928d97..4848d5d 100644 ---- a/arch/x86/kernel/apic/io_apic.c -+++ b/arch/x86/kernel/apic/io_apic.c -@@ -63,7 +63,12 @@ - #include <asm/uv/uv_hub.h> - #include <asm/uv/uv_irq.h> - -+#include <asm/xen/hypervisor.h> - #include <asm/apic.h> -+#include <asm/xen/hypervisor.h> -+#include <asm/xen/pci.h> -+ -+#include <asm/xen/pci.h> - - #define __apicdebuginit(type) static type __init - #define for_each_irq_pin(entry, head) \ -@@ -395,14 +400,18 @@ static inline void io_apic_eoi(unsigned int apic, unsigned int vector) - - static inline unsigned int io_apic_read(unsigned int apic, unsigned int reg) - { -- struct io_apic __iomem *io_apic = io_apic_base(apic); -+ struct io_apic __iomem *io_apic; -+ -+ io_apic = io_apic_base(apic); - writel(reg, &io_apic->index); - return readl(&io_apic->data); - } - - static inline void io_apic_write(unsigned int apic, unsigned int reg, unsigned int value) - { -- struct io_apic __iomem *io_apic = io_apic_base(apic); -+ struct io_apic __iomem *io_apic; -+ -+ io_apic = io_apic_base(apic); - writel(reg, &io_apic->index); - writel(value, &io_apic->data); - } -@@ -415,7 +424,9 @@ static inline void io_apic_write(unsigned int apic, unsigned int reg, unsigned i - */ - static inline void io_apic_modify(unsigned int apic, unsigned int reg, unsigned int value) - { -- struct io_apic __iomem *io_apic = io_apic_base(apic); -+ struct io_apic __iomem *io_apic; -+ -+ io_apic = io_apic_base(apic); - - if (sis_apic_bug) - writel(reg, &io_apic->index); -@@ -3494,6 +3505,9 @@ int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) - if (type == PCI_CAP_ID_MSI && nvec > 1) - return 1; - -+ if (xen_pv_domain()) -+ return xen_pci_setup_msi_irqs(dev, nvec, type); -+ - node = dev_to_node(&dev->dev); - irq_want = nr_irqs_gsi; - sub_handle = 0; -@@ -3543,7 +3557,29 @@ error: - - void arch_teardown_msi_irq(unsigned int irq) - { -- destroy_irq(irq); -+ if (xen_domain()) -+ xen_pci_teardown_msi_irq(irq); -+ else -+ destroy_irq(irq); -+} -+ -+void arch_teardown_msi_irqs(struct pci_dev *dev) -+{ -+ struct msi_desc *entry; -+ -+ /* If we are non-privileged PV domain, we have to -+ * to call xen_teardown_msi_dev first. */ -+ if (xen_domain()) -+ xen_pci_teardown_msi_dev(dev); -+ -+ list_for_each_entry(entry, &dev->msi_list, list) { -+ int i, nvec; -+ if (entry->irq == 0) -+ continue; -+ nvec = 1 << entry->msi_attrib.multiple; -+ for (i = 0; i < nvec; i++) -+ arch_teardown_msi_irq(entry->irq + i); -+ } - } - - #if defined (CONFIG_DMAR) || defined (CONFIG_INTR_REMAP) -@@ -3860,7 +3896,14 @@ void __init probe_nr_irqs_gsi(void) - printk(KERN_DEBUG "nr_irqs_gsi: %d\n", nr_irqs_gsi); - } - -+int get_nr_irqs_gsi(void) -+{ -+ return nr_irqs_gsi; -+} -+ - #ifdef CONFIG_SPARSE_IRQ -+int nr_dynamic_irqs; -+ - int __init arch_probe_nr_irqs(void) - { - int nr; -@@ -3878,6 +3921,8 @@ int __init arch_probe_nr_irqs(void) - if (nr < nr_irqs) - nr_irqs = nr; - -+ nr_irqs += nr_dynamic_irqs; -+ - return 0; - } - #endif -diff --git a/arch/x86/kernel/apic/nmi.c b/arch/x86/kernel/apic/nmi.c -index 7ff61d6..d1e6e60 100644 ---- a/arch/x86/kernel/apic/nmi.c -+++ b/arch/x86/kernel/apic/nmi.c -@@ -558,6 +558,9 @@ void arch_trigger_all_cpu_backtrace(void) - { - int i; - -+ if (!cpu_has_apic) -+ return; -+ - cpumask_copy(&backtrace_mask, cpu_online_mask); - - printk(KERN_INFO "sending NMI to all CPUs:\n"); -diff --git a/arch/x86/kernel/cpu/mtrr/Makefile b/arch/x86/kernel/cpu/mtrr/Makefile -index f4361b5..404e458 100644 ---- a/arch/x86/kernel/cpu/mtrr/Makefile -+++ b/arch/x86/kernel/cpu/mtrr/Makefile -@@ -1,3 +1,4 @@ - obj-y := main.o if.o generic.o state.o cleanup.o - obj-$(CONFIG_X86_32) += amd.o cyrix.o centaur.o -+obj-$(CONFIG_XEN_DOM0) += xen.o - -diff --git a/arch/x86/kernel/cpu/mtrr/amd.c b/arch/x86/kernel/cpu/mtrr/amd.c -index 33af141..378f8dc 100644 ---- a/arch/x86/kernel/cpu/mtrr/amd.c -+++ b/arch/x86/kernel/cpu/mtrr/amd.c -@@ -108,6 +108,11 @@ amd_validate_add_page(unsigned long base, unsigned long size, unsigned int type) - return 0; - } - -+static int amd_num_var_ranges(void) -+{ -+ return 2; -+} -+ - static struct mtrr_ops amd_mtrr_ops = { - .vendor = X86_VENDOR_AMD, - .set = amd_set_mtrr, -@@ -115,6 +120,7 @@ static struct mtrr_ops amd_mtrr_ops = { - .get_free_region = generic_get_free_region, - .validate_add_page = amd_validate_add_page, - .have_wrcomb = positive_have_wrcomb, -+ .num_var_ranges = amd_num_var_ranges, - }; - - int __init amd_init_mtrr(void) -diff --git a/arch/x86/kernel/cpu/mtrr/centaur.c b/arch/x86/kernel/cpu/mtrr/centaur.c -index de89f14..7c686a0 100644 ---- a/arch/x86/kernel/cpu/mtrr/centaur.c -+++ b/arch/x86/kernel/cpu/mtrr/centaur.c -@@ -110,6 +110,11 @@ centaur_validate_add_page(unsigned long base, unsigned long size, unsigned int t - return 0; - } - -+static int centaur_num_var_ranges(void) -+{ -+ return 8; -+} -+ - static struct mtrr_ops centaur_mtrr_ops = { - .vendor = X86_VENDOR_CENTAUR, - .set = centaur_set_mcr, -@@ -117,6 +122,7 @@ static struct mtrr_ops centaur_mtrr_ops = { - .get_free_region = centaur_get_free_region, - .validate_add_page = centaur_validate_add_page, - .have_wrcomb = positive_have_wrcomb, -+ .num_var_ranges = centaur_num_var_ranges, - }; - - int __init centaur_init_mtrr(void) -diff --git a/arch/x86/kernel/cpu/mtrr/cyrix.c b/arch/x86/kernel/cpu/mtrr/cyrix.c -index 228d982..fd6edcc 100644 ---- a/arch/x86/kernel/cpu/mtrr/cyrix.c -+++ b/arch/x86/kernel/cpu/mtrr/cyrix.c -@@ -265,6 +265,11 @@ static void cyrix_set_all(void) - post_set(); - } - -+static int cyrix_num_var_ranges(void) -+{ -+ return 8; -+} -+ - static struct mtrr_ops cyrix_mtrr_ops = { - .vendor = X86_VENDOR_CYRIX, - .set_all = cyrix_set_all, -@@ -273,6 +278,7 @@ static struct mtrr_ops cyrix_mtrr_ops = { - .get_free_region = cyrix_get_free_region, - .validate_add_page = generic_validate_add_page, - .have_wrcomb = positive_have_wrcomb, -+ .num_var_ranges = cyrix_num_var_ranges, - }; - - int __init cyrix_init_mtrr(void) -diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c -index 55da0c5..42f30cd 100644 ---- a/arch/x86/kernel/cpu/mtrr/generic.c -+++ b/arch/x86/kernel/cpu/mtrr/generic.c -@@ -749,8 +749,16 @@ int positive_have_wrcomb(void) - return 1; - } - --/* -- * Generic structure... -+static int generic_num_var_ranges(void) -+{ -+ unsigned long config = 0, dummy; -+ -+ rdmsr(MSR_MTRRcap, config, dummy); -+ -+ return config & 0xff; -+} -+ -+/* generic structure... - */ - struct mtrr_ops generic_mtrr_ops = { - .use_intel_if = 1, -@@ -760,4 +768,5 @@ struct mtrr_ops generic_mtrr_ops = { - .set = generic_set_mtrr, - .validate_add_page = generic_validate_add_page, - .have_wrcomb = generic_have_wrcomb, -+ .num_var_ranges = generic_num_var_ranges, - }; -diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c -index 84e83de..c8cb9ed 100644 ---- a/arch/x86/kernel/cpu/mtrr/main.c -+++ b/arch/x86/kernel/cpu/mtrr/main.c -@@ -110,21 +110,6 @@ static int have_wrcomb(void) - return mtrr_if->have_wrcomb ? mtrr_if->have_wrcomb() : 0; - } - --/* This function returns the number of variable MTRRs */ --static void __init set_num_var_ranges(void) --{ -- unsigned long config = 0, dummy; -- -- if (use_intel()) -- rdmsr(MSR_MTRRcap, config, dummy); -- else if (is_cpu(AMD)) -- config = 2; -- else if (is_cpu(CYRIX) || is_cpu(CENTAUR)) -- config = 8; -- -- num_var_ranges = config & 0xff; --} -- - static void __init init_table(void) - { - int i, max; -@@ -711,8 +696,11 @@ void __init mtrr_bp_init(void) - } - } - -+ /* Let Xen code override the above if it wants */ -+ xen_init_mtrr(); -+ - if (mtrr_if) { -- set_num_var_ranges(); -+ num_var_ranges = mtrr_if->num_var_ranges(); - init_table(); - if (use_intel()) { - get_mtrr_state(); -diff --git a/arch/x86/kernel/cpu/mtrr/mtrr.h b/arch/x86/kernel/cpu/mtrr/mtrr.h -index a501dee..98569c3 100644 ---- a/arch/x86/kernel/cpu/mtrr/mtrr.h -+++ b/arch/x86/kernel/cpu/mtrr/mtrr.h -@@ -5,6 +5,8 @@ - #include <linux/types.h> - #include <linux/stddef.h> - -+#include <asm/mtrr.h> -+ - #define MTRR_CHANGE_MASK_FIXED 0x01 - #define MTRR_CHANGE_MASK_VARIABLE 0x02 - #define MTRR_CHANGE_MASK_DEFTYPE 0x04 -@@ -25,6 +27,8 @@ struct mtrr_ops { - int (*validate_add_page)(unsigned long base, unsigned long size, - unsigned int type); - int (*have_wrcomb)(void); -+ -+ int (*num_var_ranges)(void); - }; - - extern int generic_get_free_region(unsigned long base, unsigned long size, -@@ -73,6 +77,13 @@ void mtrr_wrmsr(unsigned, unsigned, unsigned); - int amd_init_mtrr(void); - int cyrix_init_mtrr(void); - int centaur_init_mtrr(void); -+#ifdef CONFIG_XEN_DOM0 -+void xen_init_mtrr(void); -+#else -+static inline void xen_init_mtrr(void) -+{ -+} -+#endif - - extern int changed_by_mtrr_cleanup; - extern int mtrr_cleanup(unsigned address_bits); -diff --git a/arch/x86/kernel/cpu/mtrr/xen.c b/arch/x86/kernel/cpu/mtrr/xen.c -new file mode 100644 -index 0000000..852018b ---- /dev/null -+++ b/arch/x86/kernel/cpu/mtrr/xen.c -@@ -0,0 +1,109 @@ -+#include <linux/init.h> -+#include <linux/mm.h> -+ -+#include <asm/pat.h> -+ -+#include "mtrr.h" -+ -+#include <xen/xen.h> -+#include <xen/interface/platform.h> -+#include <asm/xen/hypervisor.h> -+#include <asm/xen/hypercall.h> -+ -+static void xen_set_mtrr(unsigned int reg, unsigned long base, -+ unsigned long size, mtrr_type type) -+{ -+ struct xen_platform_op op; -+ int error; -+ -+ /* mtrr_ops->set() is called once per CPU, -+ * but Xen's ops apply to all CPUs. -+ */ -+ if (smp_processor_id()) -+ return; -+ -+ if (size == 0) { -+ op.cmd = XENPF_del_memtype; -+ op.u.del_memtype.handle = 0; -+ op.u.del_memtype.reg = reg; -+ } else { -+ op.cmd = XENPF_add_memtype; -+ op.u.add_memtype.mfn = base; -+ op.u.add_memtype.nr_mfns = size; -+ op.u.add_memtype.type = type; -+ } -+ -+ error = HYPERVISOR_dom0_op(&op); -+ BUG_ON(error != 0); -+} -+ -+static void xen_get_mtrr(unsigned int reg, unsigned long *base, -+ unsigned long *size, mtrr_type *type) -+{ -+ struct xen_platform_op op; -+ -+ op.cmd = XENPF_read_memtype; -+ op.u.read_memtype.reg = reg; -+ if (HYPERVISOR_dom0_op(&op) != 0) { -+ *base = 0; -+ *size = 0; -+ *type = 0; -+ return; -+ } -+ -+ *size = op.u.read_memtype.nr_mfns; -+ *base = op.u.read_memtype.mfn; -+ *type = op.u.read_memtype.type; -+} -+ -+static int __init xen_num_var_ranges(void) -+{ -+ int ranges; -+ struct xen_platform_op op; -+ -+ op.cmd = XENPF_read_memtype; -+ -+ for (ranges = 0; ; ranges++) { -+ op.u.read_memtype.reg = ranges; -+ if (HYPERVISOR_dom0_op(&op) != 0) -+ break; -+ } -+ return ranges; -+} -+ -+/* -+ * DOM0 TODO: Need to fill in the remaining mtrr methods to have full -+ * working userland mtrr support. -+ */ -+static struct mtrr_ops xen_mtrr_ops = { -+ .vendor = X86_VENDOR_UNKNOWN, -+ .get_free_region = generic_get_free_region, -+ .set = xen_set_mtrr, -+ .get = xen_get_mtrr, -+ .have_wrcomb = positive_have_wrcomb, -+ .validate_add_page = generic_validate_add_page, -+ .use_intel_if = 0, -+ .num_var_ranges = xen_num_var_ranges, -+}; -+ -+void __init xen_init_mtrr(void) -+{ -+ /* -+ * Check that we're running under Xen, and privileged enough -+ * to play with MTRRs. -+ */ -+ if (!xen_initial_domain()) -+ return; -+ -+ /* -+ * Check that the CPU has an MTRR implementation we can -+ * support. -+ */ -+ if (cpu_has_mtrr || -+ cpu_has_k6_mtrr || -+ cpu_has_cyrix_arr || -+ cpu_has_centaur_mcr) { -+ mtrr_if = &xen_mtrr_ops; -+ pat_init(); -+ } -+} -diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c -index ff95824..ebd4c51 100644 ---- a/arch/x86/kernel/crash.c -+++ b/arch/x86/kernel/crash.c -@@ -28,7 +28,6 @@ - #include <asm/reboot.h> - #include <asm/virtext.h> - -- - #if defined(CONFIG_SMP) && defined(CONFIG_X86_LOCAL_APIC) - - static void kdump_nmi_callback(int cpu, struct die_args *args) -diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c -index d17d482..4d0aded 100644 ---- a/arch/x86/kernel/e820.c -+++ b/arch/x86/kernel/e820.c -@@ -750,6 +750,36 @@ static int __init find_overlapped_early(u64 start, u64 end) - return i; - } - -+u64 __init early_res_next_free(u64 addr) -+{ -+ int i; -+ u64 end = addr; -+ struct early_res *r; -+ -+ for (i = 0; i < MAX_EARLY_RES; i++) { -+ r = &early_res[i]; -+ if (addr >= r->start && addr < r->end) { -+ end = r->end; -+ break; -+ } -+ } -+ return end; -+} -+ -+u64 __init early_res_next_reserved(u64 addr, u64 max) -+{ -+ int i; -+ struct early_res *r; -+ u64 next_res = max; -+ -+ for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) { -+ r = &early_res[i]; -+ if ((r->start >= addr) && (r->start < next_res)) -+ next_res = r->start; -+ } -+ return next_res; -+} -+ - /* - * Drop the i-th range from the early reservation map, - * by copying any higher ranges down one over it, and -diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S -index c097e7d..7764118 100644 ---- a/arch/x86/kernel/entry_32.S -+++ b/arch/x86/kernel/entry_32.S -@@ -1088,6 +1088,9 @@ ENTRY(xen_failsafe_callback) - .previous - ENDPROC(xen_failsafe_callback) - -+BUILD_INTERRUPT3(xen_hvm_callback_vector, XEN_HVM_EVTCHN_CALLBACK, -+ xen_evtchn_do_upcall) -+ - #endif /* CONFIG_XEN */ - - #ifdef CONFIG_FUNCTION_TRACER -diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S -index b5c061f..a626344 100644 ---- a/arch/x86/kernel/entry_64.S -+++ b/arch/x86/kernel/entry_64.S -@@ -1364,6 +1364,9 @@ ENTRY(xen_failsafe_callback) - CFI_ENDPROC - END(xen_failsafe_callback) - -+apicinterrupt XEN_HVM_EVTCHN_CALLBACK \ -+ xen_hvm_callback_vector xen_evtchn_do_upcall -+ - #endif /* CONFIG_XEN */ - - /* -diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c -index 0b06cd7..f59b07a 100644 ---- a/arch/x86/kernel/head64.c -+++ b/arch/x86/kernel/head64.c -@@ -79,6 +79,8 @@ void __init x86_64_start_kernel(char * real_mode_data) - /* Cleanup the over mapped high alias */ - cleanup_highmap(); - -+ max_pfn_mapped = KERNEL_IMAGE_SIZE >> PAGE_SHIFT; -+ - for (i = 0; i < NUM_EXCEPTION_VECTORS; i++) { - #ifdef CONFIG_EARLY_PRINTK - set_intr_gate(i, &early_idt_handlers[i]); -diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c -index c771e1a..8b970b8 100644 ---- a/arch/x86/kernel/hpet.c -+++ b/arch/x86/kernel/hpet.c -@@ -98,7 +98,7 @@ static int __init hpet_setup(char *str) - } - __setup("hpet=", hpet_setup); - --static int __init disable_hpet(char *str) -+int __init disable_hpet(char *str) - { - boot_hpet_disable = 1; - return 1; -diff --git a/arch/x86/kernel/ioport.c b/arch/x86/kernel/ioport.c -index 99c4d30..919c1a8 100644 ---- a/arch/x86/kernel/ioport.c -+++ b/arch/x86/kernel/ioport.c -@@ -30,13 +30,29 @@ static void set_bitmap(unsigned long *bitmap, unsigned int base, - } - } - -+void native_set_io_bitmap(struct thread_struct *t, -+ unsigned long bytes_updated) -+{ -+ struct tss_struct *tss; -+ -+ if (!bytes_updated) -+ return; -+ -+ tss = &__get_cpu_var(init_tss); -+ -+ /* Update the TSS: */ -+ if (t->io_bitmap_ptr) -+ memcpy(tss->io_bitmap, t->io_bitmap_ptr, bytes_updated); -+ else -+ memset(tss->io_bitmap, 0xff, bytes_updated); -+} -+ - /* - * this changes the io permissions bitmap in the current task. - */ - asmlinkage long sys_ioperm(unsigned long from, unsigned long num, int turn_on) - { - struct thread_struct *t = ¤t->thread; -- struct tss_struct *tss; - unsigned int i, max_long, bytes, bytes_updated; - - if ((from + num <= from) || (from + num > IO_BITMAP_BITS)) -@@ -61,13 +77,13 @@ asmlinkage long sys_ioperm(unsigned long from, unsigned long num, int turn_on) - } - - /* -- * do it in the per-thread copy and in the TSS ... -+ * do it in the per-thread copy - * -- * Disable preemption via get_cpu() - we must not switch away -+ * Disable preemption - we must not switch away - * because the ->io_bitmap_max value must match the bitmap - * contents: - */ -- tss = &per_cpu(init_tss, get_cpu()); -+ preempt_disable(); - - set_bitmap(t->io_bitmap_ptr, from, num, !turn_on); - -@@ -85,10 +101,9 @@ asmlinkage long sys_ioperm(unsigned long from, unsigned long num, int turn_on) - - t->io_bitmap_max = bytes; - -- /* Update the TSS: */ -- memcpy(tss->io_bitmap, t->io_bitmap_ptr, bytes_updated); -+ set_io_bitmap(t, bytes_updated); - -- put_cpu(); -+ preempt_enable(); - - return 0; - } -@@ -119,11 +134,10 @@ static int do_iopl(unsigned int level, struct pt_regs *regs) - return 0; - } - --#ifdef CONFIG_X86_32 --long sys_iopl(struct pt_regs *regs) -+asmlinkage long sys_iopl(unsigned int level) - { -- unsigned int level = regs->bx; - struct thread_struct *t = ¤t->thread; -+ struct pt_regs *regs = task_pt_regs(current); - int rc; - - rc = do_iopl(level, regs); -@@ -135,9 +149,3 @@ long sys_iopl(struct pt_regs *regs) - out: - return rc; - } --#else --asmlinkage long sys_iopl(unsigned int level, struct pt_regs *regs) --{ -- return do_iopl(level, regs); --} --#endif -diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c -index ec6ef60..fa5b061 100644 ---- a/arch/x86/kernel/ldt.c -+++ b/arch/x86/kernel/ldt.c -@@ -109,6 +109,9 @@ int init_new_context(struct task_struct *tsk, struct mm_struct *mm) - - mutex_init(&mm->context.lock); - mm->context.size = 0; -+#ifdef CONFIG_XEN -+ mm->context.has_foreign_mappings = 0; -+#endif - old_mm = current->mm; - if (old_mm && old_mm->context.size > 0) { - mutex_lock(&old_mm->context.lock); -diff --git a/arch/x86/kernel/microcode_core.c b/arch/x86/kernel/microcode_core.c -index 378e9a8..86ca771 100644 ---- a/arch/x86/kernel/microcode_core.c -+++ b/arch/x86/kernel/microcode_core.c -@@ -81,6 +81,8 @@ - #include <linux/fs.h> - #include <linux/mm.h> - -+#include <xen/xen.h> -+#include <asm/xen/hypervisor.h> - #include <asm/microcode.h> - #include <asm/processor.h> - -@@ -503,7 +505,9 @@ static int __init microcode_init(void) - struct cpuinfo_x86 *c = &cpu_data(0); - int error; - -- if (c->x86_vendor == X86_VENDOR_INTEL) -+ if (xen_pv_domain()) -+ microcode_ops = init_xen_microcode(); -+ else if (c->x86_vendor == X86_VENDOR_INTEL) - microcode_ops = init_intel_microcode(); - else if (c->x86_vendor == X86_VENDOR_AMD) - microcode_ops = init_amd_microcode(); -diff --git a/arch/x86/kernel/microcode_xen.c b/arch/x86/kernel/microcode_xen.c -new file mode 100644 -index 0000000..16c742e ---- /dev/null -+++ b/arch/x86/kernel/microcode_xen.c -@@ -0,0 +1,201 @@ -+/* -+ * Xen microcode update driver -+ * -+ * Xen does most of the work here. We just pass the whole blob into -+ * Xen, and it will apply it to all CPUs as appropriate. Xen will -+ * worry about how different CPU models are actually updated. -+ */ -+#include <linux/sched.h> -+#include <linux/module.h> -+#include <linux/firmware.h> -+#include <linux/vmalloc.h> -+#include <linux/uaccess.h> -+ -+#include <asm/microcode.h> -+ -+#include <xen/xen.h> -+#include <xen/interface/platform.h> -+#include <xen/interface/xen.h> -+ -+#include <asm/xen/hypercall.h> -+#include <asm/xen/hypervisor.h> -+ -+MODULE_DESCRIPTION("Xen microcode update driver"); -+MODULE_LICENSE("GPL"); -+ -+struct xen_microcode { -+ size_t len; -+ char data[0]; -+}; -+ -+static int xen_microcode_update(int cpu) -+{ -+ int err; -+ struct xen_platform_op op; -+ struct ucode_cpu_info *uci = ucode_cpu_info + cpu; -+ struct xen_microcode *uc = uci->mc; -+ -+ if (uc == NULL || uc->len == 0) { -+ /* -+ * We do all cpus at once, so we don't need to do -+ * other cpus explicitly (besides, these vcpu numbers -+ * have no relationship to underlying physical cpus). -+ */ -+ return 0; -+ } -+ -+ op.cmd = XENPF_microcode_update; -+ set_xen_guest_handle(op.u.microcode.data, uc->data); -+ op.u.microcode.length = uc->len; -+ -+ err = HYPERVISOR_dom0_op(&op); -+ -+ if (err != 0) -+ printk(KERN_WARNING "microcode_xen: microcode update failed: %d\n", err); -+ -+ return err; -+} -+ -+static enum ucode_state xen_request_microcode_fw(int cpu, struct device *device) -+{ -+ char name[30]; -+ struct cpuinfo_x86 *c = &cpu_data(cpu); -+ const struct firmware *firmware; -+ struct ucode_cpu_info *uci = ucode_cpu_info + cpu; -+ enum ucode_state ret; -+ struct xen_microcode *uc; -+ size_t size; -+ int err; -+ -+ switch (c->x86_vendor) { -+ case X86_VENDOR_INTEL: -+ snprintf(name, sizeof(name), "intel-ucode/%02x-%02x-%02x", -+ c->x86, c->x86_model, c->x86_mask); -+ break; -+ -+ case X86_VENDOR_AMD: -+ snprintf(name, sizeof(name), "amd-ucode/microcode_amd.bin"); -+ break; -+ -+ default: -+ return UCODE_NFOUND; -+ } -+ -+ err = request_firmware(&firmware, name, device); -+ if (err) { -+ pr_debug("microcode: data file %s load failed\n", name); -+ return UCODE_NFOUND; -+ } -+ -+ /* -+ * Only bother getting real firmware for cpu 0; the others get -+ * dummy placeholders. -+ */ -+ if (cpu == 0) -+ size = firmware->size; -+ else -+ size = 0; -+ -+ if (uci->mc != NULL) { -+ vfree(uci->mc); -+ uci->mc = NULL; -+ } -+ -+ ret = UCODE_ERROR; -+ uc = vmalloc(sizeof(*uc) + size); -+ if (uc == NULL) -+ goto out; -+ -+ ret = UCODE_OK; -+ uc->len = size; -+ memcpy(uc->data, firmware->data, uc->len); -+ -+ uci->mc = uc; -+ -+out: -+ release_firmware(firmware); -+ -+ return ret; -+} -+ -+static enum ucode_state xen_request_microcode_user(int cpu, -+ const void __user *buf, size_t size) -+{ -+ struct ucode_cpu_info *uci = ucode_cpu_info + cpu; -+ struct xen_microcode *uc; -+ enum ucode_state ret; -+ size_t unread; -+ -+ if (cpu != 0) { -+ /* No real firmware for non-zero cpus; just store a -+ placeholder */ -+ size = 0; -+ } -+ -+ if (uci->mc != NULL) { -+ vfree(uci->mc); -+ uci->mc = NULL; -+ } -+ -+ ret = UCODE_ERROR; -+ uc = vmalloc(sizeof(*uc) + size); -+ if (uc == NULL) -+ goto out; -+ -+ uc->len = size; -+ -+ ret = UCODE_NFOUND; -+ -+ /* XXX This sporadically returns uncopied bytes, so we return -+ EFAULT. As far as I can see, the usermode code -+ (microcode_ctl) isn't doing anything wrong... */ -+ unread = copy_from_user(uc->data, buf, size); -+ -+ if (unread != 0) { -+ printk(KERN_WARNING "failed to read %zd of %zd bytes at %p -> %p\n", -+ unread, size, buf, uc->data); -+ goto out; -+ } -+ -+ ret = UCODE_OK; -+ -+out: -+ if (ret == 0) -+ uci->mc = uc; -+ else -+ vfree(uc); -+ -+ return ret; -+} -+ -+static void xen_microcode_fini_cpu(int cpu) -+{ -+ struct ucode_cpu_info *uci = ucode_cpu_info + cpu; -+ -+ vfree(uci->mc); -+ uci->mc = NULL; -+} -+ -+static int xen_collect_cpu_info(int cpu, struct cpu_signature *sig) -+{ -+ sig->sig = 0; -+ sig->pf = 0; -+ sig->rev = 0; -+ -+ return 0; -+} -+ -+static struct microcode_ops microcode_xen_ops = { -+ .request_microcode_user = xen_request_microcode_user, -+ .request_microcode_fw = xen_request_microcode_fw, -+ .collect_cpu_info = xen_collect_cpu_info, -+ .apply_microcode = xen_microcode_update, -+ .microcode_fini_cpu = xen_microcode_fini_cpu, -+}; -+ -+struct microcode_ops * __init init_xen_microcode(void) -+{ -+ if (!xen_initial_domain()) -+ return NULL; -+ return µcode_xen_ops; -+} -diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c -index 1b1739d..f7e115c 100644 ---- a/arch/x86/kernel/paravirt.c -+++ b/arch/x86/kernel/paravirt.c -@@ -376,6 +376,7 @@ struct pv_cpu_ops pv_cpu_ops = { - .swapgs = native_swapgs, - - .set_iopl_mask = native_set_iopl_mask, -+ .set_io_bitmap = native_set_io_bitmap, - .io_delay = native_io_delay, - - .start_context_switch = paravirt_nop, -diff --git a/arch/x86/kernel/pci-calgary_64.c b/arch/x86/kernel/pci-calgary_64.c -index 1a2d4b1..2f158a5 100644 ---- a/arch/x86/kernel/pci-calgary_64.c -+++ b/arch/x86/kernel/pci-calgary_64.c -@@ -46,6 +46,7 @@ - #include <asm/dma.h> - #include <asm/rio.h> - #include <asm/bios_ebda.h> -+#include <asm/x86_init.h> - - #ifdef CONFIG_CALGARY_IOMMU_ENABLED_BY_DEFAULT - int use_calgary __read_mostly = 1; -@@ -249,7 +250,7 @@ static unsigned long iommu_range_alloc(struct device *dev, - if (panic_on_overflow) - panic("Calgary: fix the allocator.\n"); - else -- return bad_dma_address; -+ return DMA_ERROR_CODE; - } - } - -@@ -265,11 +266,11 @@ static dma_addr_t iommu_alloc(struct device *dev, struct iommu_table *tbl, - void *vaddr, unsigned int npages, int direction) - { - unsigned long entry; -- dma_addr_t ret = bad_dma_address; -+ dma_addr_t ret = DMA_ERROR_CODE; - - entry = iommu_range_alloc(dev, tbl, npages); - -- if (unlikely(entry == bad_dma_address)) -+ if (unlikely(entry == DMA_ERROR_CODE)) - goto error; - - /* set the return dma address */ -@@ -284,7 +285,7 @@ static dma_addr_t iommu_alloc(struct device *dev, struct iommu_table *tbl, - error: - printk(KERN_WARNING "Calgary: failed to allocate %u pages in " - "iommu %p\n", npages, tbl); -- return bad_dma_address; -+ return DMA_ERROR_CODE; - } - - static void iommu_free(struct iommu_table *tbl, dma_addr_t dma_addr, -@@ -295,8 +296,8 @@ static void iommu_free(struct iommu_table *tbl, dma_addr_t dma_addr, - unsigned long flags; - - /* were we called with bad_dma_address? */ -- badend = bad_dma_address + (EMERGENCY_PAGES * PAGE_SIZE); -- if (unlikely((dma_addr >= bad_dma_address) && (dma_addr < badend))) { -+ badend = DMA_ERROR_CODE + (EMERGENCY_PAGES * PAGE_SIZE); -+ if (unlikely((dma_addr >= DMA_ERROR_CODE) && (dma_addr < badend))) { - WARN(1, KERN_ERR "Calgary: driver tried unmapping bad DMA " - "address 0x%Lx\n", dma_addr); - return; -@@ -380,7 +381,7 @@ static int calgary_map_sg(struct device *dev, struct scatterlist *sg, - npages = iommu_num_pages(vaddr, s->length, PAGE_SIZE); - - entry = iommu_range_alloc(dev, tbl, npages); -- if (entry == bad_dma_address) { -+ if (entry == DMA_ERROR_CODE) { - /* makes sure unmap knows to stop */ - s->dma_length = 0; - goto error; -@@ -398,7 +399,7 @@ static int calgary_map_sg(struct device *dev, struct scatterlist *sg, - error: - calgary_unmap_sg(dev, sg, nelems, dir, NULL); - for_each_sg(sg, s, nelems, i) { -- sg->dma_address = bad_dma_address; -+ sg->dma_address = DMA_ERROR_CODE; - sg->dma_length = 0; - } - return 0; -@@ -453,7 +454,7 @@ static void* calgary_alloc_coherent(struct device *dev, size_t size, - - /* set up tces to cover the allocated range */ - mapping = iommu_alloc(dev, tbl, ret, npages, DMA_BIDIRECTIONAL); -- if (mapping == bad_dma_address) -+ if (mapping == DMA_ERROR_CODE) - goto free; - *dma_handle = mapping; - return ret; -@@ -734,7 +735,7 @@ static void __init calgary_reserve_regions(struct pci_dev *dev) - struct iommu_table *tbl = pci_iommu(dev->bus); - - /* reserve EMERGENCY_PAGES from bad_dma_address and up */ -- iommu_range_reserve(tbl, bad_dma_address, EMERGENCY_PAGES); -+ iommu_range_reserve(tbl, DMA_ERROR_CODE, EMERGENCY_PAGES); - - /* avoid the BIOS/VGA first 640KB-1MB region */ - /* for CalIOC2 - avoid the entire first MB */ -@@ -1349,6 +1350,23 @@ static void __init get_tce_space_from_tar(void) - return; - } - -+static int __init calgary_iommu_init(void) -+{ -+ int ret; -+ -+ /* ok, we're trying to use Calgary - let's roll */ -+ printk(KERN_INFO "PCI-DMA: Using Calgary IOMMU\n"); -+ -+ ret = calgary_init(); -+ if (ret) { -+ printk(KERN_ERR "PCI-DMA: Calgary init failed %d, " -+ "falling back to no_iommu\n", ret); -+ return ret; -+ } -+ -+ return 0; -+} -+ - void __init detect_calgary(void) - { - int bus; -@@ -1362,7 +1380,7 @@ void __init detect_calgary(void) - * if the user specified iommu=off or iommu=soft or we found - * another HW IOMMU already, bail out. - */ -- if (swiotlb || no_iommu || iommu_detected) -+ if (no_iommu || iommu_detected) - return; - - if (!use_calgary) -@@ -1447,9 +1465,7 @@ void __init detect_calgary(void) - printk(KERN_INFO "PCI-DMA: Calgary TCE table spec is %d\n", - specified_table_size); - -- /* swiotlb for devices that aren't behind the Calgary. */ -- if (max_pfn > MAX_DMA32_PFN) -- swiotlb = 1; -+ x86_init.iommu.iommu_init = calgary_iommu_init; - } - return; - -@@ -1462,35 +1478,6 @@ cleanup: - } - } - --int __init calgary_iommu_init(void) --{ -- int ret; -- -- if (no_iommu || (swiotlb && !calgary_detected)) -- return -ENODEV; -- -- if (!calgary_detected) -- return -ENODEV; -- -- /* ok, we're trying to use Calgary - let's roll */ -- printk(KERN_INFO "PCI-DMA: Using Calgary IOMMU\n"); -- -- ret = calgary_init(); -- if (ret) { -- printk(KERN_ERR "PCI-DMA: Calgary init failed %d, " -- "falling back to no_iommu\n", ret); -- return ret; -- } -- -- force_iommu = 1; -- bad_dma_address = 0x0; -- /* dma_ops is set to swiotlb or nommu */ -- if (!dma_ops) -- dma_ops = &nommu_dma_ops; -- -- return 0; --} -- - static int __init calgary_parse_options(char *p) - { - unsigned int bridge; -diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c -index 6ac3931..3e57c58 100644 ---- a/arch/x86/kernel/pci-dma.c -+++ b/arch/x86/kernel/pci-dma.c -@@ -11,10 +11,12 @@ - #include <asm/gart.h> - #include <asm/calgary.h> - #include <asm/amd_iommu.h> -+#include <asm/x86_init.h> -+#include <asm/xen/swiotlb-xen.h> - - static int forbid_dac __read_mostly; - --struct dma_map_ops *dma_ops; -+struct dma_map_ops *dma_ops = &nommu_dma_ops; - EXPORT_SYMBOL(dma_ops); - - static int iommu_sac_force __read_mostly; -@@ -42,9 +44,6 @@ int iommu_detected __read_mostly = 0; - */ - int iommu_pass_through __read_mostly; - --dma_addr_t bad_dma_address __read_mostly = 0; --EXPORT_SYMBOL(bad_dma_address); -- - /* Dummy device used for NULL arguments (normally ISA). */ - struct device x86_dma_fallback_dev = { - .init_name = "fallback device", -@@ -126,18 +125,19 @@ void __init pci_iommu_alloc(void) - /* free the range so iommu could get some range less than 4G */ - dma32_free_bootmem(); - #endif -+ if (pci_xen_swiotlb_detect() || pci_swiotlb_detect()) -+ goto out; - -- /* -- * The order of these functions is important for -- * fall-back/fail-over reasons -- */ - gart_iommu_hole_init(); - - detect_calgary(); - - detect_intel_iommu(); - -+ /* needs to be called after gart_iommu_hole_init */ - amd_iommu_detect(); -+out: -+ pci_xen_swiotlb_init(); - - pci_swiotlb_init(); - } -@@ -289,25 +289,17 @@ static int __init pci_iommu_init(void) - #ifdef CONFIG_PCI - dma_debug_add_bus(&pci_bus_type); - #endif -+ x86_init.iommu.iommu_init(); - -- calgary_iommu_init(); -- -- intel_iommu_init(); -- -- amd_iommu_init(); -+ if (swiotlb || xen_swiotlb) { -+ printk(KERN_INFO "PCI-DMA: " -+ "Using software bounce buffering for IO (SWIOTLB)\n"); -+ swiotlb_print_info(); -+ } else -+ swiotlb_free(); - -- gart_iommu_init(); -- -- no_iommu_init(); - return 0; - } -- --void pci_iommu_shutdown(void) --{ -- gart_iommu_shutdown(); -- -- amd_iommu_shutdown(); --} - /* Must execute after PCI subsystem */ - rootfs_initcall(pci_iommu_init); - -diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c -index 1c76691..8c9dd05 100644 ---- a/arch/x86/kernel/pci-gart_64.c -+++ b/arch/x86/kernel/pci-gart_64.c -@@ -39,6 +39,7 @@ - #include <asm/swiotlb.h> - #include <asm/dma.h> - #include <asm/k8.h> -+#include <asm/x86_init.h> - - static unsigned long iommu_bus_base; /* GART remapping area (physical) */ - static unsigned long iommu_size; /* size of remapping area bytes */ -@@ -46,6 +47,8 @@ static unsigned long iommu_pages; /* .. and in pages */ - - static u32 *iommu_gatt_base; /* Remapping table */ - -+static dma_addr_t bad_dma_addr; -+ - /* - * If this is disabled the IOMMU will use an optimized flushing strategy - * of only flushing when an mapping is reused. With it true the GART is -@@ -216,7 +219,7 @@ static dma_addr_t dma_map_area(struct device *dev, dma_addr_t phys_mem, - if (panic_on_overflow) - panic("dma_map_area overflow %lu bytes\n", size); - iommu_full(dev, size, dir); -- return bad_dma_address; -+ return bad_dma_addr; - } - - for (i = 0; i < npages; i++) { -@@ -302,7 +305,7 @@ static int dma_map_sg_nonforce(struct device *dev, struct scatterlist *sg, - - if (nonforced_iommu(dev, addr, s->length)) { - addr = dma_map_area(dev, addr, s->length, dir, 0); -- if (addr == bad_dma_address) { -+ if (addr == bad_dma_addr) { - if (i > 0) - gart_unmap_sg(dev, sg, i, dir, NULL); - nents = 0; -@@ -455,7 +458,7 @@ error: - - iommu_full(dev, pages << PAGE_SHIFT, dir); - for_each_sg(sg, s, nents, i) -- s->dma_address = bad_dma_address; -+ s->dma_address = bad_dma_addr; - return 0; - } - -@@ -479,7 +482,7 @@ gart_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_addr, - DMA_BIDIRECTIONAL, align_mask); - - flush_gart(); -- if (paddr != bad_dma_address) { -+ if (paddr != bad_dma_addr) { - *dma_addr = paddr; - return page_address(page); - } -@@ -499,6 +502,11 @@ gart_free_coherent(struct device *dev, size_t size, void *vaddr, - free_pages((unsigned long)vaddr, get_order(size)); - } - -+static int gart_mapping_error(struct device *dev, dma_addr_t dma_addr) -+{ -+ return (dma_addr == bad_dma_addr); -+} -+ - static int no_agp; - - static __init unsigned long check_iommu_size(unsigned long aper, u64 aper_size) -@@ -689,14 +697,15 @@ static struct dma_map_ops gart_dma_ops = { - .unmap_page = gart_unmap_page, - .alloc_coherent = gart_alloc_coherent, - .free_coherent = gart_free_coherent, -+ .mapping_error = gart_mapping_error, - }; - --void gart_iommu_shutdown(void) -+static void gart_iommu_shutdown(void) - { - struct pci_dev *dev; - int i; - -- if (no_agp && (dma_ops != &gart_dma_ops)) -+ if (no_agp) - return; - - for (i = 0; i < num_k8_northbridges; i++) { -@@ -711,7 +720,7 @@ void gart_iommu_shutdown(void) - } - } - --void __init gart_iommu_init(void) -+int __init gart_iommu_init(void) - { - struct agp_kern_info info; - unsigned long iommu_start; -@@ -721,7 +730,7 @@ void __init gart_iommu_init(void) - long i; - - if (num_k8_northbridges == 0) -- return; -+ return 0; - - #ifndef CONFIG_AGP_AMD64 - no_agp = 1; -@@ -733,13 +742,6 @@ void __init gart_iommu_init(void) - (agp_copy_info(agp_bridge, &info) < 0); - #endif - -- if (swiotlb) -- return; -- -- /* Did we detect a different HW IOMMU? */ -- if (iommu_detected && !gart_iommu_aperture) -- return; -- - if (no_iommu || - (!force_iommu && max_pfn <= MAX_DMA32_PFN) || - !gart_iommu_aperture || -@@ -749,7 +751,7 @@ void __init gart_iommu_init(void) - "but GART IOMMU not available.\n"); - printk(KERN_WARNING "falling back to iommu=soft.\n"); - } -- return; -+ return 0; - } - - /* need to map that range */ -@@ -794,7 +796,7 @@ void __init gart_iommu_init(void) - - iommu_start = aper_size - iommu_size; - iommu_bus_base = info.aper_base + iommu_start; -- bad_dma_address = iommu_bus_base; -+ bad_dma_addr = iommu_bus_base; - iommu_gatt_base = agp_gatt_table + (iommu_start>>PAGE_SHIFT); - - /* -@@ -841,6 +843,10 @@ void __init gart_iommu_init(void) - - flush_gart(); - dma_ops = &gart_dma_ops; -+ x86_platform.iommu_shutdown = gart_iommu_shutdown; -+ swiotlb = 0; -+ -+ return 0; - } - - void __init gart_parse_options(char *p) -diff --git a/arch/x86/kernel/pci-nommu.c b/arch/x86/kernel/pci-nommu.c -index a3933d4..22be12b 100644 ---- a/arch/x86/kernel/pci-nommu.c -+++ b/arch/x86/kernel/pci-nommu.c -@@ -33,7 +33,7 @@ static dma_addr_t nommu_map_page(struct device *dev, struct page *page, - dma_addr_t bus = page_to_phys(page) + offset; - WARN_ON(size == 0); - if (!check_addr("map_single", dev, bus, size)) -- return bad_dma_address; -+ return DMA_ERROR_CODE; - flush_write_buffers(); - return bus; - } -@@ -103,12 +103,3 @@ struct dma_map_ops nommu_dma_ops = { - .sync_sg_for_device = nommu_sync_sg_for_device, - .is_phys = 1, - }; -- --void __init no_iommu_init(void) --{ -- if (dma_ops) -- return; -- -- force_iommu = 0; /* no HW IOMMU */ -- dma_ops = &nommu_dma_ops; --} -diff --git a/arch/x86/kernel/pci-swiotlb.c b/arch/x86/kernel/pci-swiotlb.c -index aaa6b78..7d2829d 100644 ---- a/arch/x86/kernel/pci-swiotlb.c -+++ b/arch/x86/kernel/pci-swiotlb.c -@@ -42,18 +42,31 @@ static struct dma_map_ops swiotlb_dma_ops = { - .dma_supported = NULL, - }; - --void __init pci_swiotlb_init(void) -+/* -+ * pci_swiotlb_detect - set swiotlb to 1 if necessary -+ * -+ * This returns non-zero if we are forced to use swiotlb (by the boot -+ * option). -+ */ -+int __init pci_swiotlb_detect(void) - { -+ int use_swiotlb = swiotlb | swiotlb_force; -+ - /* don't initialize swiotlb if iommu=off (no_iommu=1) */ - #ifdef CONFIG_X86_64 -- if ((!iommu_detected && !no_iommu && max_pfn > MAX_DMA32_PFN)) -+ if (!no_iommu && max_pfn > MAX_DMA32_PFN) - swiotlb = 1; - #endif - if (swiotlb_force) - swiotlb = 1; -+ -+ return use_swiotlb; -+} -+ -+void __init pci_swiotlb_init(void) -+{ - if (swiotlb) { -- printk(KERN_INFO "PCI-DMA: Using software bounce buffering for IO (SWIOTLB)\n"); -- swiotlb_init(); -+ swiotlb_init(0); - dma_ops = &swiotlb_dma_ops; - } - } -diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c -index 5fd5b07..11d8667 100644 ---- a/arch/x86/kernel/process.c -+++ b/arch/x86/kernel/process.c -@@ -73,16 +73,12 @@ void exit_thread(void) - unsigned long *bp = t->io_bitmap_ptr; - - if (bp) { -- struct tss_struct *tss = &per_cpu(init_tss, get_cpu()); -- -+ preempt_disable(); - t->io_bitmap_ptr = NULL; - clear_thread_flag(TIF_IO_BITMAP); -- /* -- * Careful, clear this in the TSS too: -- */ -- memset(tss->io_bitmap, 0xff, t->io_bitmap_max); -+ set_io_bitmap(t, t->io_bitmap_max); - t->io_bitmap_max = 0; -- put_cpu(); -+ preempt_enable(); - kfree(bp); - } - } -@@ -199,19 +195,10 @@ void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, - hard_enable_TSC(); - } - -- if (test_tsk_thread_flag(next_p, TIF_IO_BITMAP)) { -- /* -- * Copy the relevant range of the IO bitmap. -- * Normally this is 128 bytes or less: -- */ -- memcpy(tss->io_bitmap, next->io_bitmap_ptr, -- max(prev->io_bitmap_max, next->io_bitmap_max)); -- } else if (test_tsk_thread_flag(prev_p, TIF_IO_BITMAP)) { -- /* -- * Clear any possible leftover bits: -- */ -- memset(tss->io_bitmap, 0xff, prev->io_bitmap_max); -- } -+ if (test_tsk_thread_flag(next_p, TIF_IO_BITMAP) || -+ test_tsk_thread_flag(prev_p, TIF_IO_BITMAP)) -+ set_io_bitmap(next, -+ max(prev->io_bitmap_max, next->io_bitmap_max)); - } - - int sys_fork(struct pt_regs *regs) -diff --git a/arch/x86/kernel/pvclock.c b/arch/x86/kernel/pvclock.c -index dfdfe46..b12fe8d 100644 ---- a/arch/x86/kernel/pvclock.c -+++ b/arch/x86/kernel/pvclock.c -@@ -111,6 +111,11 @@ unsigned long pvclock_tsc_khz(struct pvclock_vcpu_time_info *src) - - static atomic64_t last_value = ATOMIC64_INIT(0); - -+void pvclock_resume(void) -+{ -+ atomic64_set(&last_value, 0); -+} -+ - cycle_t pvclock_clocksource_read(struct pvclock_vcpu_time_info *src) - { - struct pvclock_shadow_time shadow; -diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c -index 200fcde..ff8cc40 100644 ---- a/arch/x86/kernel/reboot.c -+++ b/arch/x86/kernel/reboot.c -@@ -23,7 +23,7 @@ - # include <linux/ctype.h> - # include <linux/mc146818rtc.h> - #else --# include <asm/iommu.h> -+# include <asm/x86_init.h> - #endif - - /* -@@ -647,7 +647,7 @@ void native_machine_shutdown(void) - #endif - - #ifdef CONFIG_X86_64 -- pci_iommu_shutdown(); -+ x86_platform.iommu_shutdown(); - #endif - } - -diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c -index 5449a26..56b4707 100644 ---- a/arch/x86/kernel/setup.c -+++ b/arch/x86/kernel/setup.c -@@ -70,6 +70,7 @@ - #include <linux/tboot.h> - - #include <video/edid.h> -+#include <xen/xen.h> - - #include <asm/mtrr.h> - #include <asm/apic.h> -@@ -89,6 +90,7 @@ - #include <asm/cacheflush.h> - #include <asm/processor.h> - #include <asm/bugs.h> -+#include <asm/tlbflush.h> - - #include <asm/system.h> - #include <asm/vsyscall.h> -@@ -909,7 +911,6 @@ void __init setup_arch(char **cmdline_p) - max_low_pfn = max_pfn; - - high_memory = (void *)__va(max_pfn * PAGE_SIZE - 1) + 1; -- max_pfn_mapped = KERNEL_IMAGE_SIZE >> PAGE_SHIFT; - #endif - - #ifdef CONFIG_X86_CHECK_BIOS_CORRUPTION -@@ -967,6 +968,9 @@ void __init setup_arch(char **cmdline_p) - - initmem_init(0, max_pfn); - -+ /* Initialize cross-cpu tlb flushes */ -+ init_smp_flush(); -+ - #ifdef CONFIG_ACPI_SLEEP - /* - * Reserve low memory region for sleep support. -@@ -1037,6 +1041,7 @@ void __init setup_arch(char **cmdline_p) - probe_nr_irqs_gsi(); - - kvm_guest_init(); -+ xen_hvm_guest_init(); - - e820_reserve_resources(); - e820_mark_nosave_regions(max_low_pfn); -diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c -index 4449a4a..d11c5ff 100644 ---- a/arch/x86/kernel/x86_init.c -+++ b/arch/x86/kernel/x86_init.c -@@ -14,10 +14,13 @@ - #include <asm/time.h> - #include <asm/irq.h> - #include <asm/tsc.h> -+#include <asm/iommu.h> - - void __cpuinit x86_init_noop(void) { } - void __init x86_init_uint_noop(unsigned int unused) { } - void __init x86_init_pgd_noop(pgd_t *unused) { } -+int __init iommu_init_noop(void) { return 0; } -+void iommu_shutdown_noop(void) { } - - /* - * The platform setup functions are preset with the default functions -@@ -62,6 +65,10 @@ struct x86_init_ops x86_init __initdata = { - .tsc_pre_init = x86_init_noop, - .timer_init = hpet_time_init, - }, -+ -+ .iommu = { -+ .iommu_init = iommu_init_noop, -+ }, - }; - - struct x86_cpuinit_ops x86_cpuinit __cpuinitdata = { -@@ -72,4 +79,5 @@ struct x86_platform_ops x86_platform = { - .calibrate_tsc = native_calibrate_tsc, - .get_wallclock = mach_get_cmos_time, - .set_wallclock = mach_set_rtc_mmss, -+ .iommu_shutdown = iommu_shutdown_noop, - }; -diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile -index 06630d2..ad895ae 100644 ---- a/arch/x86/mm/Makefile -+++ b/arch/x86/mm/Makefile -@@ -6,6 +6,11 @@ nostackp := $(call cc-option, -fno-stack-protector) - CFLAGS_physaddr.o := $(nostackp) - CFLAGS_setup_nx.o := $(nostackp) - -+# Make sure __phys_addr has no stackprotector -+nostackp := $(call cc-option, -fno-stack-protector) -+CFLAGS_ioremap.o := $(nostackp) -+CFLAGS_init.o := $(nostackp) -+ - obj-$(CONFIG_SMP) += tlb.o - - obj-$(CONFIG_X86_32) += pgtable_32.o iomap_32.o -diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c -index 1739358..e003b83 100644 ---- a/arch/x86/mm/fault.c -+++ b/arch/x86/mm/fault.c -@@ -228,7 +228,16 @@ void vmalloc_sync_all(void) - - spin_lock_irqsave(&pgd_lock, flags); - list_for_each_entry(page, &pgd_list, lru) { -- if (!vmalloc_sync_one(page_address(page), address)) -+ spinlock_t *pgt_lock; -+ int ret; -+ -+ pgt_lock = &pgd_page_get_mm(page)->page_table_lock; -+ -+ spin_lock(pgt_lock); -+ ret = vmalloc_sync_one(page_address(page), address); -+ spin_unlock(pgt_lock); -+ -+ if (!ret) - break; - } - spin_unlock_irqrestore(&pgd_lock, flags); -@@ -340,11 +349,19 @@ void vmalloc_sync_all(void) - spin_lock_irqsave(&pgd_lock, flags); - list_for_each_entry(page, &pgd_list, lru) { - pgd_t *pgd; -+ spinlock_t *pgt_lock; -+ - pgd = (pgd_t *)page_address(page) + pgd_index(address); -+ -+ pgt_lock = &pgd_page_get_mm(page)->page_table_lock; -+ spin_lock(pgt_lock); -+ - if (pgd_none(*pgd)) - set_pgd(pgd, *pgd_ref); - else - BUG_ON(pgd_page_vaddr(*pgd) != pgd_page_vaddr(*pgd_ref)); -+ -+ spin_unlock(pgt_lock); - } - spin_unlock_irqrestore(&pgd_lock, flags); - } -diff --git a/arch/x86/mm/gup.c b/arch/x86/mm/gup.c -index 71da1bc..892b8eb 100644 ---- a/arch/x86/mm/gup.c -+++ b/arch/x86/mm/gup.c -@@ -313,6 +313,11 @@ int get_user_pages_fast(unsigned long start, int nr_pages, int write, - goto slow_irqon; - #endif - -+#ifdef CONFIG_XEN -+ if (unlikely(mm->context.has_foreign_mappings)) -+ goto slow_irqon; -+#endif -+ - /* - * XXX: batch / limit 'nr', to avoid large irq off latency - * needs some instrumenting to determine the common sizes used by -diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c -index 30938c1..10c3719 100644 ---- a/arch/x86/mm/init_32.c -+++ b/arch/x86/mm/init_32.c -@@ -430,22 +430,45 @@ static int __init add_highpages_work_fn(unsigned long start_pfn, - { - int node_pfn; - struct page *page; -+ phys_addr_t chunk_end, chunk_max; - unsigned long final_start_pfn, final_end_pfn; -- struct add_highpages_data *data; -- -- data = (struct add_highpages_data *)datax; -+ struct add_highpages_data *data = (struct add_highpages_data *)datax; - - final_start_pfn = max(start_pfn, data->start_pfn); - final_end_pfn = min(end_pfn, data->end_pfn); - if (final_start_pfn >= final_end_pfn) - return 0; - -- for (node_pfn = final_start_pfn; node_pfn < final_end_pfn; -- node_pfn++) { -- if (!pfn_valid(node_pfn)) -- continue; -- page = pfn_to_page(node_pfn); -- add_one_highpage_init(page, node_pfn); -+ chunk_end = PFN_PHYS(final_start_pfn); -+ chunk_max = PFN_PHYS(final_end_pfn); -+ -+ /* -+ * Check for reserved areas. -+ */ -+ for (;;) { -+ phys_addr_t chunk_start; -+ chunk_start = early_res_next_free(chunk_end); -+ -+ /* -+ * Reserved area. Just count high mem pages. -+ */ -+ for (node_pfn = PFN_DOWN(chunk_end); -+ node_pfn < PFN_DOWN(chunk_start); node_pfn++) { -+ if (pfn_valid(node_pfn)) -+ totalhigh_pages++; -+ } -+ -+ if (chunk_start >= chunk_max) -+ break; -+ -+ chunk_end = early_res_next_reserved(chunk_start, chunk_max); -+ for (node_pfn = PFN_DOWN(chunk_start); -+ node_pfn < PFN_DOWN(chunk_end); node_pfn++) { -+ if (!pfn_valid(node_pfn)) -+ continue; -+ page = pfn_to_page(node_pfn); -+ add_one_highpage_init(page, node_pfn); -+ } - } - - return 0; -@@ -459,7 +482,6 @@ void __init add_highpages_with_active_regions(int nid, unsigned long start_pfn, - - data.start_pfn = start_pfn; - data.end_pfn = end_pfn; -- - work_with_active_regions(nid, add_highpages_work_fn, &data); - } - -diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c -index 2feb9bd..2601df2 100644 ---- a/arch/x86/mm/ioremap.c -+++ b/arch/x86/mm/ioremap.c -@@ -425,6 +425,11 @@ static inline pte_t * __init early_ioremap_pte(unsigned long addr) - return &bm_pte[pte_index(addr)]; - } - -+bool __init is_early_ioremap_ptep(pte_t *ptep) -+{ -+ return ptep >= &bm_pte[0] && ptep < &bm_pte[PAGE_SIZE/sizeof(pte_t)]; -+} -+ - static unsigned long slot_virt[FIX_BTMAPS_SLOTS] __initdata; - - void __init early_ioremap_init(void) -diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c -index e78cd0e..fb91994 100644 ---- a/arch/x86/mm/pat.c -+++ b/arch/x86/mm/pat.c -@@ -666,7 +666,7 @@ void io_free_memtype(resource_size_t start, resource_size_t end) - pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn, - unsigned long size, pgprot_t vma_prot) - { -- return vma_prot; -+ return __pgprot(pgprot_val(vma_prot) | _PAGE_IOMAP); - } - - #ifdef CONFIG_STRICT_DEVMEM -diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c -index c9ba9de..1fcc191 100644 ---- a/arch/x86/mm/pgtable.c -+++ b/arch/x86/mm/pgtable.c -@@ -4,6 +4,9 @@ - #include <asm/tlb.h> - #include <asm/fixmap.h> - -+#include <xen/xen.h> -+#include <asm/xen/hypervisor.h> -+ - #define PGALLOC_GFP GFP_KERNEL | __GFP_NOTRACK | __GFP_REPEAT | __GFP_ZERO - - #ifdef CONFIG_HIGHPTE -@@ -14,6 +17,16 @@ - - gfp_t __userpte_alloc_gfp = PGALLOC_GFP | PGALLOC_USER_GFP; - -+pgprot_t arch_vm_get_page_prot(unsigned vm_flags) -+{ -+ pgprot_t ret = __pgprot(0); -+ -+ if (vm_flags & VM_IO) -+ ret = __pgprot(_PAGE_IOMAP); -+ -+ return ret; -+} -+ - pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address) - { - return (pte_t *)__get_free_page(PGALLOC_GFP); -@@ -86,7 +99,19 @@ static inline void pgd_list_del(pgd_t *pgd) - #define UNSHARED_PTRS_PER_PGD \ - (SHARED_KERNEL_PMD ? KERNEL_PGD_BOUNDARY : PTRS_PER_PGD) - --static void pgd_ctor(pgd_t *pgd) -+ -+static void pgd_set_mm(pgd_t *pgd, struct mm_struct *mm) -+{ -+ BUILD_BUG_ON(sizeof(virt_to_page(pgd)->index) < sizeof(mm)); -+ virt_to_page(pgd)->index = (pgoff_t)mm; -+} -+ -+struct mm_struct *pgd_page_get_mm(struct page *page) -+{ -+ return (struct mm_struct *)page->index; -+} -+ -+static void pgd_ctor(struct mm_struct *mm, pgd_t *pgd) - { - /* If the pgd points to a shared pagetable level (either the - ptes in non-PAE, or shared PMD in PAE), then just copy the -@@ -104,8 +129,10 @@ static void pgd_ctor(pgd_t *pgd) - } - - /* list required to sync kernel mapping updates */ -- if (!SHARED_KERNEL_PMD) -+ if (!SHARED_KERNEL_PMD) { -+ pgd_set_mm(pgd, mm); - pgd_list_add(pgd); -+ } - } - - static void pgd_dtor(pgd_t *pgd) -@@ -271,7 +298,7 @@ pgd_t *pgd_alloc(struct mm_struct *mm) - */ - spin_lock_irqsave(&pgd_lock, flags); - -- pgd_ctor(pgd); -+ pgd_ctor(mm, pgd); - pgd_prepopulate_pmd(mm, pgd, pmds); - - spin_unlock_irqrestore(&pgd_lock, flags); -@@ -288,6 +315,12 @@ out: - - void pgd_free(struct mm_struct *mm, pgd_t *pgd) - { -+#ifdef CONFIG_XEN -+ /* EEW */ -+ extern void xen_late_unpin_pgd(struct mm_struct *mm, pgd_t *pgd); -+ if (xen_pv_domain()) -+ xen_late_unpin_pgd(mm, pgd); -+#endif - pgd_mop_up_pmds(mm, pgd); - pgd_dtor(pgd); - paravirt_pgd_free(mm, pgd); -diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c -index 36fe08e..7317947 100644 ---- a/arch/x86/mm/tlb.c -+++ b/arch/x86/mm/tlb.c -@@ -148,13 +148,25 @@ void smp_invalidate_interrupt(struct pt_regs *regs) - * BUG(); - */ - -- if (f->flush_mm == percpu_read(cpu_tlbstate.active_mm)) { -- if (percpu_read(cpu_tlbstate.state) == TLBSTATE_OK) { -+ if (f->flush_mm == NULL || -+ f->flush_mm == percpu_read(cpu_tlbstate.active_mm)) { -+ int tlbstate = percpu_read(cpu_tlbstate.state); -+ -+ /* -+ * flush_mm == NULL means flush everything, including -+ * global tlbs, which will only happen when flushing -+ * kernel mappings. -+ */ -+ if (f->flush_mm == NULL) -+ __flush_tlb_all(); -+ else if (tlbstate == TLBSTATE_OK) { - if (f->flush_va == TLB_FLUSH_ALL) - local_flush_tlb(); - else - __flush_tlb_one(f->flush_va); -- } else -+ } -+ -+ if (tlbstate == TLBSTATE_LAZY) - leave_mm(cpu); - } - out: -@@ -217,16 +229,13 @@ void native_flush_tlb_others(const struct cpumask *cpumask, - flush_tlb_others_ipi(cpumask, mm, va); - } - --static int __cpuinit init_smp_flush(void) -+void __init init_smp_flush(void) - { - int i; - - for (i = 0; i < ARRAY_SIZE(flush_state); i++) - spin_lock_init(&flush_state[i].tlbstate_lock); -- -- return 0; - } --core_initcall(init_smp_flush); - - void flush_tlb_current_task(void) - { -@@ -274,17 +283,19 @@ void flush_tlb_page(struct vm_area_struct *vma, unsigned long va) - - preempt_enable(); - } -+EXPORT_SYMBOL_GPL(flush_tlb_page); - --static void do_flush_tlb_all(void *info) -+void flush_tlb_all(void) - { -- unsigned long cpu = smp_processor_id(); -+ /* flush_tlb_others expects preempt to be disabled */ -+ int cpu = get_cpu(); -+ -+ flush_tlb_others(cpu_online_mask, NULL, TLB_FLUSH_ALL); - - __flush_tlb_all(); - if (percpu_read(cpu_tlbstate.state) == TLBSTATE_LAZY) - leave_mm(cpu); --} - --void flush_tlb_all(void) --{ -- on_each_cpu(do_flush_tlb_all, NULL, 1); -+ put_cpu(); - } -+EXPORT_SYMBOL_GPL(flush_tlb_all); -diff --git a/arch/x86/pci/Makefile b/arch/x86/pci/Makefile -index d49202e..64182c5 100644 ---- a/arch/x86/pci/Makefile -+++ b/arch/x86/pci/Makefile -@@ -4,6 +4,7 @@ obj-$(CONFIG_PCI_BIOS) += pcbios.o - obj-$(CONFIG_PCI_MMCONFIG) += mmconfig_$(BITS).o direct.o mmconfig-shared.o - obj-$(CONFIG_PCI_DIRECT) += direct.o - obj-$(CONFIG_PCI_OLPC) += olpc.o -+obj-$(CONFIG_PCI_XEN) += xen.o - - obj-y += fixup.o - obj-$(CONFIG_ACPI) += acpi.o -diff --git a/arch/x86/pci/common.c b/arch/x86/pci/common.c -index 1331fcf..30a9808 100644 ---- a/arch/x86/pci/common.c -+++ b/arch/x86/pci/common.c -@@ -22,6 +22,7 @@ unsigned int pci_probe = PCI_PROBE_BIOS | PCI_PROBE_CONF1 | PCI_PROBE_CONF2 | - unsigned int pci_early_dump_regs; - static int pci_bf_sort; - int pci_routeirq; -+int pci_scan_all_fns; - int noioapicquirk; - #ifdef CONFIG_X86_REROUTE_FOR_BROKEN_BOOT_IRQS - int noioapicreroute = 0; -@@ -412,26 +413,31 @@ struct pci_bus * __devinit pcibios_scan_root(int busnum) - - extern u8 pci_cache_line_size; - --int __init pcibios_init(void) -+void __init pcibios_set_cache_line_size(void) - { - struct cpuinfo_x86 *c = &boot_cpu_data; - -- if (!raw_pci_ops) { -- printk(KERN_WARNING "PCI: System does not support PCI\n"); -- return 0; -- } -- - /* - * Assume PCI cacheline size of 32 bytes for all x86s except K7/K8 - * and P4. It's also good for 386/486s (which actually have 16) - * as quite a few PCI devices do not support smaller values. - */ -+ - pci_cache_line_size = 32 >> 2; - if (c->x86 >= 6 && c->x86_vendor == X86_VENDOR_AMD) - pci_cache_line_size = 64 >> 2; /* K7 & K8 */ - else if (c->x86 > 6 && c->x86_vendor == X86_VENDOR_INTEL) - pci_cache_line_size = 128 >> 2; /* P4 */ -+} -+ -+int __init pcibios_init(void) -+{ -+ if (!raw_pci_ops) { -+ printk(KERN_WARNING "PCI: System does not support PCI\n"); -+ return 0; -+ } - -+ pcibios_set_cache_line_size(); - pcibios_resource_survey(); - - if (pci_bf_sort >= pci_force_bf) -diff --git a/arch/x86/pci/i386.c b/arch/x86/pci/i386.c -index a672f12..91d040e 100644 ---- a/arch/x86/pci/i386.c -+++ b/arch/x86/pci/i386.c -@@ -283,6 +283,8 @@ int pci_mmap_page_range(struct pci_dev *dev, struct vm_area_struct *vma, - - prot = pgprot_val(vma->vm_page_prot); - -+ prot |= _PAGE_IOMAP; /* creating a mapping for IO */ -+ - /* - * Return error if pat is not enabled and write_combine is requested. - * Caller can followup with UC MINUS request and add a WC mtrr if there -diff --git a/arch/x86/pci/init.c b/arch/x86/pci/init.c -index 25a1f8e..4e2f90a 100644 ---- a/arch/x86/pci/init.c -+++ b/arch/x86/pci/init.c -@@ -15,10 +15,16 @@ static __init int pci_arch_init(void) - if (!(pci_probe & PCI_PROBE_NOEARLY)) - pci_mmcfg_early_init(); - -+#ifdef CONFIG_PCI_XEN -+ if (!pci_xen_init()) -+ return 0; -+#endif -+ - #ifdef CONFIG_PCI_OLPC - if (!pci_olpc_init()) - return 0; /* skip additional checks if it's an XO */ - #endif -+ - #ifdef CONFIG_PCI_BIOS - pci_pcbios_init(); - #endif -diff --git a/arch/x86/pci/xen.c b/arch/x86/pci/xen.c -new file mode 100644 -index 0000000..67fa926 ---- /dev/null -+++ b/arch/x86/pci/xen.c -@@ -0,0 +1,154 @@ -+/* -+ * Xen PCI Frontend Stub - puts some "dummy" functions in to the Linux -+ * x86 PCI core to support the Xen PCI Frontend -+ * -+ * Author: Ryan Wilson <hap9@epoch.ncsc.mil> -+ */ -+#include <linux/module.h> -+#include <linux/init.h> -+#include <linux/pci.h> -+#include <linux/acpi.h> -+ -+#include <asm/io.h> -+#include <asm/pci_x86.h> -+ -+#include <asm/xen/hypervisor.h> -+ -+#include <xen/events.h> -+#include <asm/xen/pci.h> -+ -+#if defined(CONFIG_PCI_MSI) -+#include <linux/msi.h> -+ -+struct xen_pci_frontend_ops *xen_pci_frontend; -+EXPORT_SYMBOL_GPL(xen_pci_frontend); -+ -+/* -+ * For MSI interrupts we have to use drivers/xen/event.s functions to -+ * allocate an irq_desc and setup the right */ -+ -+ -+int xen_pci_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) -+{ -+ int irq, ret, i; -+ struct msi_desc *msidesc; -+ int *v; -+ -+ /* Dom0 has another mechanism for this. The exit path -+ * (xen_pci_teardown_msi_irq) is shared with Dom0. -+ */ -+ if (xen_initial_domain()) -+ return xen_setup_msi_irqs(dev, nvec, type); -+ -+ v = kzalloc(sizeof(int) * max(1, nvec), GFP_KERNEL); -+ if (!v) -+ return -ENOMEM; -+ -+ if (!xen_initial_domain()) { -+ if (type == PCI_CAP_ID_MSIX) -+ ret = xen_pci_frontend_enable_msix(dev, &v, nvec); -+ else -+ ret = xen_pci_frontend_enable_msi(dev, &v); -+ if (ret) -+ goto error; -+ } -+ i = 0; -+ list_for_each_entry(msidesc, &dev->msi_list, list) { -+ irq = xen_allocate_pirq(v[i], 0, /* not sharable */ -+ (type == PCI_CAP_ID_MSIX) ? -+ "pcifront-msi-x":"pcifront-msi"); -+ if (irq < 0) -+ return -1; -+ -+ ret = set_irq_msi(irq, msidesc); -+ if (ret) -+ goto error_while; -+ i++; -+ } -+ kfree(v); -+ return 0; -+ -+error_while: -+ unbind_from_irqhandler(irq, NULL); -+error: -+ if (ret == -ENODEV) -+ dev_err(&dev->dev,"Xen PCI frontend has not registered" \ -+ " MSI/MSI-X support!\n"); -+ -+ kfree(v); -+ return ret; -+} -+ -+void xen_pci_teardown_msi_dev(struct pci_dev *dev) -+{ -+ /* Only do this when were are in non-privileged mode.*/ -+ if (!xen_initial_domain()) { -+ struct msi_desc *msidesc; -+ -+ msidesc = list_entry(dev->msi_list.next, struct msi_desc, list); -+ if (msidesc->msi_attrib.is_msix) -+ xen_pci_frontend_disable_msix(dev); -+ else -+ xen_pci_frontend_disable_msi(dev); -+ } -+ -+} -+ -+void xen_pci_teardown_msi_irq(int irq) -+{ -+ xen_destroy_irq(irq); -+} -+#endif -+ -+static int xen_pcifront_enable_irq(struct pci_dev *dev) -+{ -+ int rc; -+ int share = 1; -+ -+ dev_info(&dev->dev, "Xen PCI enabling IRQ: %d\n", dev->irq); -+ -+ if (dev->irq < 0) -+ return -EINVAL; -+ -+ if (dev->irq < NR_IRQS_LEGACY) -+ share = 0; -+ -+ rc = xen_allocate_pirq(dev->irq, share, "pcifront"); -+ if (rc < 0) { -+ dev_warn(&dev->dev, "Xen PCI IRQ: %d, failed to register:%d\n", -+ dev->irq, rc); -+ return rc; -+ } -+ return 0; -+} -+ -+int __init pci_xen_init(void) -+{ -+ if (!xen_pv_domain() || xen_initial_domain()) -+ return -ENODEV; -+ -+ printk(KERN_INFO "PCI: setting up Xen PCI frontend stub\n"); -+ -+ pcibios_set_cache_line_size(); -+ -+ pcibios_enable_irq = xen_pcifront_enable_irq; -+ pcibios_disable_irq = NULL; -+ -+#ifdef CONFIG_ACPI -+ /* Keep ACPI out of the picture */ -+ acpi_noirq = 1; -+#endif -+ -+#ifdef CONFIG_ISAPNP -+ /* Stop isapnp from probing */ -+ isapnp_disable = 1; -+#endif -+ -+ /* Ensure a device still gets scanned even if it's fn number -+ * is non-zero. -+ */ -+ pci_scan_all_fns = 1; -+ -+ return 0; -+} -+ -diff --git a/arch/x86/xen/Kconfig b/arch/x86/xen/Kconfig -index b83e119..3f9f4a0 100644 ---- a/arch/x86/xen/Kconfig -+++ b/arch/x86/xen/Kconfig -@@ -13,16 +13,18 @@ config XEN - kernel to boot in a paravirtualized environment under the - Xen hypervisor. - -+config XEN_PVHVM -+ def_bool y -+ depends on XEN -+ depends on X86_LOCAL_APIC -+ - config XEN_MAX_DOMAIN_MEMORY -- int "Maximum allowed size of a domain in gigabytes" -- default 8 if X86_32 -- default 32 if X86_64 -+ int -+ default 128 - depends on XEN - help -- The pseudo-physical to machine address array is sized -- according to the maximum possible memory size of a Xen -- domain. This array uses 1 page per gigabyte, so there's no -- need to be too stingy here. -+ This only affects the sizing of some bss arrays, the unused -+ portions of which are freed. - - config XEN_SAVE_RESTORE - bool -@@ -36,3 +38,40 @@ config XEN_DEBUG_FS - help - Enable statistics output and various tuning options in debugfs. - Enabling this option may incur a significant performance overhead. -+ -+config SWIOTLB_XEN -+ def_bool y -+ depends on XEN && SWIOTLB -+ -+config MICROCODE_XEN -+ def_bool y -+ depends on XEN_DOM0 && MICROCODE -+ -+config XEN_DOM0 -+ bool "Enable Xen privileged domain support" -+ depends on XEN && X86_IO_APIC && ACPI -+ help -+ The Xen hypervisor requires a privileged domain ("dom0") to -+ actually manage the machine, provide devices drivers, etc. -+ This option enables dom0 support. A dom0 kernel can also -+ run as an unprivileged domU kernel, or a kernel running -+ native on bare hardware. -+ -+# Dummy symbol since people have come to rely on the PRIVILEGED_GUEST -+# name in tools. -+config XEN_PRIVILEGED_GUEST -+ def_bool XEN_DOM0 -+ -+config XEN_DOM0_PCI -+ def_bool y -+ depends on XEN_DOM0 && PCI -+ select PCI_XEN -+ -+config XEN_PCI_PASSTHROUGH -+ bool "Enable support for Xen PCI passthrough devices" -+ depends on XEN && PCI -+ select PCI_XEN -+ select SWIOTLB_XEN -+ help -+ Enable support for passing PCI devices through to -+ unprivileged domains. (COMPLETELY UNTESTED) -diff --git a/arch/x86/xen/Makefile b/arch/x86/xen/Makefile -index 3bb4fc2..13ca65c 100644 ---- a/arch/x86/xen/Makefile -+++ b/arch/x86/xen/Makefile -@@ -12,9 +12,12 @@ CFLAGS_mmu.o := $(nostackp) - - obj-y := enlighten.o setup.o multicalls.o mmu.o irq.o \ - time.o xen-asm.o xen-asm_$(BITS).o \ -- grant-table.o suspend.o -+ grant-table.o suspend.o platform-pci-unplug.o - - obj-$(CONFIG_SMP) += smp.o - obj-$(CONFIG_PARAVIRT_SPINLOCKS)+= spinlock.o - obj-$(CONFIG_XEN_DEBUG_FS) += debugfs.o -- -+obj-$(CONFIG_XEN_DOM0) += vga.o -+obj-$(CONFIG_XEN_DOM0) += apic.o -+obj-$(CONFIG_SWIOTLB) += pci-swiotlb-xen.o -+obj-$(CONFIG_XEN_DOM0_PCI) += pci.o -\ No newline at end of file -diff --git a/arch/x86/xen/apic.c b/arch/x86/xen/apic.c -new file mode 100644 -index 0000000..21a3089 ---- /dev/null -+++ b/arch/x86/xen/apic.c -@@ -0,0 +1,33 @@ -+#include <linux/kernel.h> -+#include <linux/threads.h> -+#include <linux/bitmap.h> -+ -+#include <asm/io_apic.h> -+#include <asm/acpi.h> -+#include <asm/hw_irq.h> -+ -+#include <asm/xen/hypervisor.h> -+#include <asm/xen/hypercall.h> -+ -+#include <xen/xen.h> -+#include <xen/interface/xen.h> -+#include <xen/interface/physdev.h> -+ -+void __init xen_io_apic_init(void) -+{ -+ enable_IO_APIC(); -+} -+ -+void xen_init_apic(void) -+{ -+ if (!xen_initial_domain()) -+ return; -+ -+#ifdef CONFIG_ACPI -+ /* -+ * Pretend ACPI found our lapic even though we've disabled it, -+ * to prevent MP tables from setting up lapics. -+ */ -+ acpi_lapic = 1; -+#endif -+} -diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c -index 0087b00..070f138 100644 ---- a/arch/x86/xen/enlighten.c -+++ b/arch/x86/xen/enlighten.c -@@ -11,6 +11,7 @@ - * Jeremy Fitzhardinge <jeremy@xensource.com>, XenSource Inc, 2007 - */ - -+#include <linux/cpu.h> - #include <linux/kernel.h> - #include <linux/init.h> - #include <linux/smp.h> -@@ -28,12 +29,15 @@ - #include <linux/highmem.h> - #include <linux/console.h> - -+#include <xen/xen.h> - #include <xen/interface/xen.h> - #include <xen/interface/version.h> - #include <xen/interface/physdev.h> - #include <xen/interface/vcpu.h> -+#include <xen/interface/memory.h> - #include <xen/features.h> - #include <xen/page.h> -+#include <xen/hvm.h> - #include <xen/hvc-console.h> - - #include <asm/paravirt.h> -@@ -53,6 +57,7 @@ - #include <asm/tlbflush.h> - #include <asm/reboot.h> - #include <asm/stackprotector.h> -+#include <asm/hypervisor.h> - - #include "xen-ops.h" - #include "mmu.h" -@@ -66,6 +71,11 @@ DEFINE_PER_CPU(struct vcpu_info, xen_vcpu_info); - enum xen_domain_type xen_domain_type = XEN_NATIVE; - EXPORT_SYMBOL_GPL(xen_domain_type); - -+unsigned long *machine_to_phys_mapping = (void *)MACH2PHYS_VIRT_START; -+EXPORT_SYMBOL(machine_to_phys_mapping); -+unsigned int machine_to_phys_order; -+EXPORT_SYMBOL(machine_to_phys_order); -+ - struct start_info *xen_start_info; - EXPORT_SYMBOL_GPL(xen_start_info); - -@@ -73,6 +83,9 @@ struct shared_info xen_dummy_shared_info; - - void *xen_initial_gdt; - -+__read_mostly int xen_have_vector_callback; -+EXPORT_SYMBOL_GPL(xen_have_vector_callback); -+ - /* - * Point at some empty memory to start with. We map the real shared_info - * page as soon as fixmap is up and running. -@@ -94,6 +107,14 @@ struct shared_info *HYPERVISOR_shared_info = (void *)&xen_dummy_shared_info; - */ - static int have_vcpu_info_placement = 1; - -+static void clamp_max_cpus(void) -+{ -+#ifdef CONFIG_SMP -+ if (setup_max_cpus > MAX_VIRT_CPUS) -+ setup_max_cpus = MAX_VIRT_CPUS; -+#endif -+} -+ - static void xen_vcpu_setup(int cpu) - { - struct vcpu_register_vcpu_info info; -@@ -101,19 +122,20 @@ static void xen_vcpu_setup(int cpu) - struct vcpu_info *vcpup; - - BUG_ON(HYPERVISOR_shared_info == &xen_dummy_shared_info); -- per_cpu(xen_vcpu, cpu) = &HYPERVISOR_shared_info->vcpu_info[cpu]; - -- if (!have_vcpu_info_placement) -- return; /* already tested, not available */ -+ if (cpu < MAX_VIRT_CPUS) -+ per_cpu(xen_vcpu,cpu) = &HYPERVISOR_shared_info->vcpu_info[cpu]; - -- vcpup = &per_cpu(xen_vcpu_info, cpu); -+ if (!have_vcpu_info_placement) { -+ if (cpu >= MAX_VIRT_CPUS) -+ clamp_max_cpus(); -+ return; -+ } - -+ vcpup = &per_cpu(xen_vcpu_info, cpu); - info.mfn = arbitrary_virt_to_mfn(vcpup); - info.offset = offset_in_page(vcpup); - -- printk(KERN_DEBUG "trying to map vcpu_info %d at %p, mfn %llx, offset %d\n", -- cpu, vcpup, info.mfn, info.offset); -- - /* Check to see if the hypervisor will put the vcpu_info - structure where we want it, which allows direct access via - a percpu-variable. */ -@@ -122,13 +144,11 @@ static void xen_vcpu_setup(int cpu) - if (err) { - printk(KERN_DEBUG "register_vcpu_info failed: err=%d\n", err); - have_vcpu_info_placement = 0; -+ clamp_max_cpus(); - } else { - /* This cpu is using the registered vcpu info, even if - later ones fail to. */ - per_cpu(xen_vcpu, cpu) = vcpup; -- -- printk(KERN_DEBUG "cpu %d using vcpu_info at %p\n", -- cpu, vcpup); - } - } - -@@ -167,13 +187,16 @@ static void __init xen_banner(void) - - printk(KERN_INFO "Booting paravirtualized kernel on %s\n", - pv_info.name); -- printk(KERN_INFO "Xen version: %d.%d%s%s\n", -+ printk(KERN_INFO "Xen version: %d.%d%s%s%s\n", - version >> 16, version & 0xffff, extra.extraversion, -- xen_feature(XENFEAT_mmu_pt_update_preserve_ad) ? " (preserve-AD)" : ""); -+ xen_feature(XENFEAT_mmu_pt_update_preserve_ad) ? -+ " (preserve-AD)" : "", -+ xen_initial_domain() ? " (dom0)" : ""); - } - - static __read_mostly unsigned int cpuid_leaf1_edx_mask = ~0; - static __read_mostly unsigned int cpuid_leaf1_ecx_mask = ~0; -+static __read_mostly unsigned int cpuid_leaf81_edx_mask = ~0; - - static void xen_cpuid(unsigned int *ax, unsigned int *bx, - unsigned int *cx, unsigned int *dx) -@@ -187,7 +210,7 @@ static void xen_cpuid(unsigned int *ax, unsigned int *bx, - * unsupported kernel subsystems as possible. - */ - switch (*ax) { -- case 1: -+ case 0x1: - maskecx = cpuid_leaf1_ecx_mask; - maskedx = cpuid_leaf1_edx_mask; - break; -@@ -196,6 +219,10 @@ static void xen_cpuid(unsigned int *ax, unsigned int *bx, - /* Suppress extended topology stuff */ - maskebx = 0; - break; -+ -+ case 0x80000001: -+ maskedx = cpuid_leaf81_edx_mask; -+ break; - } - - asm(XEN_EMULATE_PREFIX "cpuid" -@@ -213,34 +240,29 @@ static void xen_cpuid(unsigned int *ax, unsigned int *bx, - static __init void xen_init_cpuid_mask(void) - { - unsigned int ax, bx, cx, dx; -+ unsigned int xsave_mask; - - cpuid_leaf1_edx_mask = -- ~((1 << X86_FEATURE_MCE) | /* disable MCE */ -- (1 << X86_FEATURE_MCA) | /* disable MCA */ -- (1 << X86_FEATURE_ACC)); /* thermal monitoring */ -+ ~(1 << X86_FEATURE_ACC); /* thermal monitoring */ -+ -+ cpuid_leaf81_edx_mask = ~(1 << (X86_FEATURE_GBPAGES % 32)); - - if (!xen_initial_domain()) - cpuid_leaf1_edx_mask &= -- ~((1 << X86_FEATURE_APIC) | /* disable local APIC */ -+ ~((1 << X86_FEATURE_MCE) | /* disable MCE */ -+ (1 << X86_FEATURE_MCA) | /* disable MCA */ -+ (1 << X86_FEATURE_APIC) | /* disable local APIC */ - (1 << X86_FEATURE_ACPI)); /* disable ACPI */ -- - ax = 1; -- cx = 0; - xen_cpuid(&ax, &bx, &cx, &dx); - -- /* cpuid claims we support xsave; try enabling it to see what happens */ -- if (cx & (1 << (X86_FEATURE_XSAVE % 32))) { -- unsigned long cr4; -- -- set_in_cr4(X86_CR4_OSXSAVE); -- -- cr4 = read_cr4(); -- -- if ((cr4 & X86_CR4_OSXSAVE) == 0) -- cpuid_leaf1_ecx_mask &= ~(1 << (X86_FEATURE_XSAVE % 32)); -+ xsave_mask = -+ (1 << (X86_FEATURE_XSAVE % 32)) | -+ (1 << (X86_FEATURE_OSXSAVE % 32)); - -- clear_in_cr4(X86_CR4_OSXSAVE); -- } -+ /* Xen will set CR4.OSXSAVE if supported and not disabled by force */ -+ if ((cx & xsave_mask) != xsave_mask) -+ cpuid_leaf1_ecx_mask &= ~xsave_mask; /* disable XSAVE & OSXSAVE */ - } - - static void xen_set_debugreg(int reg, unsigned long val) -@@ -406,7 +428,7 @@ static __init void xen_load_gdt_boot(const struct desc_ptr *dtr) - - pte = pfn_pte(pfn, PAGE_KERNEL_RO); - -- if (HYPERVISOR_update_va_mapping((unsigned long)va, pte, 0)) -+ if (HYPERVISOR_update_va_mapping(va, pte, 0)) - BUG(); - - frames[f] = mfn; -@@ -517,13 +539,13 @@ static int cvt_gate_to_trap(int vector, const gate_desc *val, - return 0; - #ifdef CONFIG_X86_MCE - } else if (addr == (unsigned long)machine_check) { -- return 0; -+ /* We can use the original machine_check handler, -+ despite IST. */ - #endif -- } else { -- /* Some other trap using IST? */ -- if (WARN_ON(val->ist != 0)) -- return 0; -- } -+ } else if (WARN(val->ist != 0, -+ "Unknown IST-using trap: vector %d, %pF, val->ist=%d\n", -+ vector, (void *)addr, val->ist)) -+ return 0; - #endif /* CONFIG_X86_64 */ - info->address = addr; - -@@ -679,6 +701,18 @@ static void xen_set_iopl_mask(unsigned mask) - HYPERVISOR_physdev_op(PHYSDEVOP_set_iopl, &set_iopl); - } - -+static void xen_set_io_bitmap(struct thread_struct *thread, -+ unsigned long bytes_updated) -+{ -+ struct physdev_set_iobitmap set_iobitmap; -+ -+ set_xen_guest_handle(set_iobitmap.bitmap, -+ (char *)thread->io_bitmap_ptr); -+ set_iobitmap.nr_ports = thread->io_bitmap_ptr ? IO_BITMAP_BITS : 0; -+ WARN_ON(HYPERVISOR_physdev_op(PHYSDEVOP_set_iobitmap, -+ &set_iobitmap)); -+} -+ - static void xen_io_delay(void) - { - } -@@ -716,7 +750,7 @@ static u32 xen_safe_apic_wait_icr_idle(void) - return 0; - } - --static void set_xen_basic_apic_ops(void) -+static __init void set_xen_basic_apic_ops(void) - { - apic->read = xen_apic_read; - apic->write = xen_apic_write; -@@ -728,7 +762,6 @@ static void set_xen_basic_apic_ops(void) - - #endif - -- - static void xen_clts(void) - { - struct multicall_space mcs; -@@ -811,6 +844,11 @@ static int xen_write_msr_safe(unsigned int msr, unsigned low, unsigned high) - Xen console noise. */ - break; - -+ case MSR_IA32_CR_PAT: -+ if (smp_processor_id() == 0) -+ xen_set_pat(((u64)high << 32) | low); -+ break; -+ - default: - ret = native_write_msr_safe(msr, low, high); - } -@@ -849,8 +887,6 @@ void xen_setup_vcpu_info_placement(void) - /* xen_vcpu_setup managed to place the vcpu_info within the - percpu area for all cpus, so make use of it */ - if (have_vcpu_info_placement) { -- printk(KERN_INFO "Xen: using vcpu_info placement\n"); -- - pv_irq_ops.save_fl = __PV_IS_CALLEE_SAVE(xen_save_fl_direct); - pv_irq_ops.restore_fl = __PV_IS_CALLEE_SAVE(xen_restore_fl_direct); - pv_irq_ops.irq_disable = __PV_IS_CALLEE_SAVE(xen_irq_disable_direct); -@@ -923,10 +959,6 @@ static const struct pv_init_ops xen_init_ops __initdata = { - .patch = xen_patch, - }; - --static const struct pv_time_ops xen_time_ops __initdata = { -- .sched_clock = xen_clocksource_read, --}; -- - static const struct pv_cpu_ops xen_cpu_ops __initdata = { - .cpuid = xen_cpuid, - -@@ -978,6 +1010,7 @@ static const struct pv_cpu_ops xen_cpu_ops __initdata = { - .load_sp0 = xen_load_sp0, - - .set_iopl_mask = xen_set_iopl_mask, -+ .set_io_bitmap = xen_set_io_bitmap, - .io_delay = xen_io_delay, - - /* Xen takes care of %gs when switching to usermode for us */ -@@ -1016,15 +1049,40 @@ static void xen_machine_halt(void) - xen_reboot(SHUTDOWN_poweroff); - } - -+static void xen_machine_power_off(void) -+{ -+ if (pm_power_off) -+ pm_power_off(); -+ else -+ xen_reboot(SHUTDOWN_poweroff); -+} -+ - static void xen_crash_shutdown(struct pt_regs *regs) - { - xen_reboot(SHUTDOWN_crash); - } - -+static int -+xen_panic_event(struct notifier_block *this, unsigned long event, void *ptr) -+{ -+ xen_reboot(SHUTDOWN_crash); -+ return NOTIFY_DONE; -+} -+ -+static struct notifier_block xen_panic_block = { -+ .notifier_call= xen_panic_event, -+}; -+ -+int xen_panic_handler_init(void) -+{ -+ atomic_notifier_chain_register(&panic_notifier_list, &xen_panic_block); -+ return 0; -+} -+ - static const struct machine_ops __initdata xen_machine_ops = { - .restart = xen_restart, - .halt = xen_machine_halt, -- .power_off = xen_machine_halt, -+ .power_off = xen_machine_power_off, - .shutdown = xen_machine_halt, - .crash_shutdown = xen_crash_shutdown, - .emergency_restart = xen_emergency_restart, -@@ -1057,10 +1115,11 @@ asmlinkage void __init xen_start_kernel(void) - - xen_domain_type = XEN_PV_DOMAIN; - -+ xen_setup_machphys_mapping(); -+ - /* Install Xen paravirt ops */ - pv_info = xen_info; - pv_init_ops = xen_init_ops; -- pv_time_ops = xen_time_ops; - pv_cpu_ops = xen_cpu_ops; - pv_apic_ops = xen_apic_ops; - -@@ -1068,13 +1127,7 @@ asmlinkage void __init xen_start_kernel(void) - x86_init.oem.arch_setup = xen_arch_setup; - x86_init.oem.banner = xen_banner; - -- x86_init.timers.timer_init = xen_time_init; -- x86_init.timers.setup_percpu_clockev = x86_init_noop; -- x86_cpuinit.setup_percpu_clockev = x86_init_noop; -- -- x86_platform.calibrate_tsc = xen_tsc_khz; -- x86_platform.get_wallclock = xen_get_wallclock; -- x86_platform.set_wallclock = xen_set_wallclock; -+ xen_init_time_ops(); - - /* - * Set up some pagetable state before starting to set any ptes. -@@ -1112,6 +1165,10 @@ asmlinkage void __init xen_start_kernel(void) - */ - xen_setup_stackprotector(); - -+#ifdef CONFIG_SPARSE_IRQ -+ nr_dynamic_irqs += 256; -+#endif -+ - xen_init_irq_ops(); - xen_init_cpuid_mask(); - -@@ -1138,8 +1195,19 @@ asmlinkage void __init xen_start_kernel(void) - - xen_smp_init(); - -+#ifdef CONFIG_ACPI_NUMA -+ /* -+ * The pages we from Xen are not related to machine pages, so -+ * any NUMA information the kernel tries to get from ACPI will -+ * be meaningless. Prevent it from trying. -+ */ -+ acpi_numa = -1; -+#endif -+ - pgd = (pgd_t *)xen_start_info->pt_base; - -+ __supported_pte_mask |= _PAGE_IOMAP; -+ - /* Don't do the full vcpu_info placement stuff until we have a - possible map and a non-dummy shared_info. */ - per_cpu(xen_vcpu, 0) = &HYPERVISOR_shared_info->vcpu_info[0]; -@@ -1149,6 +1217,10 @@ asmlinkage void __init xen_start_kernel(void) - - xen_raw_console_write("mapping kernel into physical memory\n"); - pgd = xen_setup_kernel_pagetable(pgd, xen_start_info->nr_pages); -+ xen_ident_map_ISA(); -+ -+ /* Allocate and initialize top and mid mfn levels for p2m structure */ -+ xen_build_mfn_list_list(); - - init_mm.pgd = pgd; - -@@ -1158,6 +1230,14 @@ asmlinkage void __init xen_start_kernel(void) - if (xen_feature(XENFEAT_supervisor_mode_kernel)) - pv_info.kernel_rpl = 0; - -+ if (xen_initial_domain()) { -+ struct physdev_set_iopl set_iopl; -+ set_iopl.iopl = 1; -+ if (HYPERVISOR_physdev_op(PHYSDEVOP_set_iopl, &set_iopl) == -1) -+ BUG(); -+ xen_init_apic(); -+ } -+ - /* set the limit of our address space */ - xen_reserve_top(); - -@@ -1180,6 +1260,16 @@ asmlinkage void __init xen_start_kernel(void) - add_preferred_console("xenboot", 0, NULL); - add_preferred_console("tty", 0, NULL); - add_preferred_console("hvc", 0, NULL); -+ -+ boot_params.screen_info.orig_video_isVGA = 0; -+ } else { -+ const struct dom0_vga_console_info *info = -+ (void *)((char *)xen_start_info + -+ xen_start_info->console.dom0.info_off); -+ -+ xen_init_vga(info, xen_start_info->console.dom0.info_size); -+ xen_start_info->console.domU.mfn = 0; -+ xen_start_info->console.domU.evtchn = 0; - } - - xen_raw_console_write("about to get started...\n"); -@@ -1193,3 +1283,126 @@ asmlinkage void __init xen_start_kernel(void) - x86_64_start_reservations((char *)__pa_symbol(&boot_params)); - #endif - } -+ -+static uint32_t xen_cpuid_base(void) -+{ -+ uint32_t base, eax, ebx, ecx, edx; -+ char signature[13]; -+ -+ for (base = 0x40000000; base < 0x40010000; base += 0x100) { -+ cpuid(base, &eax, &ebx, &ecx, &edx); -+ *(uint32_t *)(signature + 0) = ebx; -+ *(uint32_t *)(signature + 4) = ecx; -+ *(uint32_t *)(signature + 8) = edx; -+ signature[12] = 0; -+ -+ if (!strcmp("XenVMMXenVMM", signature) && ((eax - base) >= 2)) -+ return base; -+ } -+ -+ return 0; -+} -+ -+static int init_hvm_pv_info(int *major, int *minor) -+{ -+ uint32_t eax, ebx, ecx, edx, pages, msr, base; -+ u64 pfn; -+ -+ base = xen_cpuid_base(); -+ if (!base) -+ return -EINVAL; -+ -+ cpuid(base + 1, &eax, &ebx, &ecx, &edx); -+ -+ *major = eax >> 16; -+ *minor = eax & 0xffff; -+ printk(KERN_INFO "Xen version %d.%d.\n", *major, *minor); -+ -+ cpuid(base + 2, &pages, &msr, &ecx, &edx); -+ -+ pfn = __pa(hypercall_page); -+ wrmsr_safe(msr, (u32)pfn, (u32)(pfn >> 32)); -+ -+ xen_setup_features(); -+ -+ pv_info = xen_info; -+ pv_info.kernel_rpl = 0; -+ -+ xen_domain_type = XEN_HVM_DOMAIN; -+ -+ return 0; -+} -+ -+void xen_hvm_init_shared_info(void) -+{ -+ int cpu; -+ struct xen_add_to_physmap xatp; -+ static struct shared_info *shared_info_page = 0; -+ -+ if (!shared_info_page) -+ shared_info_page = (struct shared_info *) alloc_bootmem_pages(PAGE_SIZE); -+ xatp.domid = DOMID_SELF; -+ xatp.idx = 0; -+ xatp.space = XENMAPSPACE_shared_info; -+ xatp.gpfn = __pa(shared_info_page) >> PAGE_SHIFT; -+ if (HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xatp)) -+ BUG(); -+ -+ HYPERVISOR_shared_info = (struct shared_info *)shared_info_page; -+ -+ /* xen_vcpu is a pointer to the vcpu_info struct in the shared_info -+ * page, we use it in the event channel upcall and in some pvclock -+ * related functions. We don't need the vcpu_info placement -+ * optimizations because we don't use any pv_mmu or pv_irq op on -+ * HVM. -+ * When xen_hvm_init_shared_info is run at boot time only vcpu 0 is -+ * online but xen_hvm_init_shared_info is run at resume time too and -+ * in that case multiple vcpus might be online. */ -+ for_each_online_cpu(cpu) { -+ per_cpu(xen_vcpu, cpu) = &HYPERVISOR_shared_info->vcpu_info[cpu]; -+ } -+} -+ -+#ifdef CONFIG_XEN_PVHVM -+static int __cpuinit xen_hvm_cpu_notify(struct notifier_block *self, -+ unsigned long action, void *hcpu) -+{ -+ int cpu = (long)hcpu; -+ switch (action) { -+ case CPU_UP_PREPARE: -+ per_cpu(xen_vcpu, cpu) = &HYPERVISOR_shared_info->vcpu_info[cpu]; -+ break; -+ default: -+ break; -+ } -+ return NOTIFY_OK; -+} -+ -+static struct notifier_block __cpuinitdata xen_hvm_cpu_notifier = { -+ .notifier_call = xen_hvm_cpu_notify, -+}; -+ -+void __init xen_hvm_guest_init(void) -+{ -+ int r; -+ int major, minor; -+ -+ if (xen_pv_domain()) -+ return; -+ -+ r = init_hvm_pv_info(&major, &minor); -+ if (r < 0) -+ return; -+ -+ xen_hvm_init_shared_info(); -+ -+ if (xen_feature(XENFEAT_hvm_callback_vector)) -+ xen_have_vector_callback = 1; -+ register_cpu_notifier(&xen_hvm_cpu_notifier); -+ xen_unplug_emulated_devices(); -+ have_vcpu_info_placement = 0; -+ x86_init.irqs.intr_init = xen_init_IRQ; -+ xen_hvm_init_time_ops(); -+ xen_hvm_init_mmu_ops(); -+} -+#endif -diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c -index 350a3de..32a1c65 100644 ---- a/arch/x86/xen/mmu.c -+++ b/arch/x86/xen/mmu.c -@@ -42,6 +42,7 @@ - #include <linux/highmem.h> - #include <linux/debugfs.h> - #include <linux/bug.h> -+#include <linux/vmalloc.h> - #include <linux/module.h> - - #include <asm/pgtable.h> -@@ -50,14 +51,20 @@ - #include <asm/mmu_context.h> - #include <asm/setup.h> - #include <asm/paravirt.h> -+#include <asm/e820.h> - #include <asm/linkage.h> -+#include <asm/pat.h> -+#include <asm/init.h> -+#include <asm/page.h> - - #include <asm/xen/hypercall.h> - #include <asm/xen/hypervisor.h> - - #include <xen/page.h> - #include <xen/interface/xen.h> -+#include <xen/interface/hvm/hvm_op.h> - #include <xen/interface/version.h> -+#include <xen/interface/memory.h> - #include <xen/hvc-console.h> - - #include "multicalls.h" -@@ -66,6 +73,13 @@ - - #define MMU_UPDATE_HISTO 30 - -+/* -+ * Protects atomic reservation decrease/increase against concurrent increases. -+ * Also protects non-atomic updates of current_pages and driver_pages, and -+ * balloon lists. -+ */ -+DEFINE_SPINLOCK(xen_reservation_lock); -+ - #ifdef CONFIG_XEN_DEBUG_FS - - static struct { -@@ -124,7 +138,8 @@ static inline void check_zero(void) - * large enough to allocate page table pages to allocate the rest. - * Each page can map 2MB. - */ --static pte_t level1_ident_pgt[PTRS_PER_PTE * 4] __page_aligned_bss; -+#define LEVEL1_IDENT_ENTRIES (PTRS_PER_PTE * 4) -+static RESERVE_BRK_ARRAY(pte_t, level1_ident_pgt, LEVEL1_IDENT_ENTRIES); - - #ifdef CONFIG_X86_64 - /* l3 pud for userspace vsyscall mapping */ -@@ -155,49 +170,202 @@ DEFINE_PER_CPU(unsigned long, xen_current_cr3); /* actual vcpu cr3 */ - */ - #define USER_LIMIT ((STACK_TOP_MAX + PGDIR_SIZE - 1) & PGDIR_MASK) - -+/* -+ * Xen leaves the responsibility for maintaining p2m mappings to the -+ * guests themselves, but it must also access and update the p2m array -+ * during suspend/resume when all the pages are reallocated. -+ * -+ * The p2m table is logically a flat array, but we implement it as a -+ * three-level tree to allow the address space to be sparse. -+ * -+ * Xen -+ * | -+ * p2m_top p2m_top_mfn -+ * / \ / \ -+ * p2m_mid p2m_mid p2m_mid_mfn p2m_mid_mfn -+ * / \ / \ / / -+ * p2m p2m p2m p2m p2m p2m p2m ... -+ * -+ * The p2m_mid_mfn pages are mapped by p2m_top_mfn_p. -+ * -+ * The p2m_top and p2m_top_mfn levels are limited to 1 page, so the -+ * maximum representable pseudo-physical address space is: -+ * P2M_TOP_PER_PAGE * P2M_MID_PER_PAGE * P2M_PER_PAGE pages -+ * -+ * P2M_PER_PAGE depends on the architecture, as a mfn is always -+ * unsigned long (8 bytes on 64-bit, 4 bytes on 32), leading to -+ * 512 and 1024 entries respectively. -+ */ -+ -+unsigned long xen_max_p2m_pfn __read_mostly; - --#define P2M_ENTRIES_PER_PAGE (PAGE_SIZE / sizeof(unsigned long)) --#define TOP_ENTRIES (MAX_DOMAIN_PAGES / P2M_ENTRIES_PER_PAGE) -+#define P2M_PER_PAGE (PAGE_SIZE / sizeof(unsigned long)) -+#define P2M_MID_PER_PAGE (PAGE_SIZE / sizeof(unsigned long *)) -+#define P2M_TOP_PER_PAGE (PAGE_SIZE / sizeof(unsigned long **)) - --/* Placeholder for holes in the address space */ --static unsigned long p2m_missing[P2M_ENTRIES_PER_PAGE] __page_aligned_data = -- { [ 0 ... P2M_ENTRIES_PER_PAGE-1 ] = ~0UL }; -+#define MAX_P2M_PFN (P2M_TOP_PER_PAGE * P2M_MID_PER_PAGE * P2M_PER_PAGE) - -- /* Array of pointers to pages containing p2m entries */ --static unsigned long *p2m_top[TOP_ENTRIES] __page_aligned_data = -- { [ 0 ... TOP_ENTRIES - 1] = &p2m_missing[0] }; -+/* Placeholders for holes in the address space */ -+static RESERVE_BRK_ARRAY(unsigned long, p2m_missing, P2M_PER_PAGE); -+static RESERVE_BRK_ARRAY(unsigned long *, p2m_mid_missing, P2M_MID_PER_PAGE); -+static RESERVE_BRK_ARRAY(unsigned long, p2m_mid_missing_mfn, P2M_MID_PER_PAGE); - --/* Arrays of p2m arrays expressed in mfns used for save/restore */ --static unsigned long p2m_top_mfn[TOP_ENTRIES] __page_aligned_bss; -+static RESERVE_BRK_ARRAY(unsigned long **, p2m_top, P2M_TOP_PER_PAGE); -+static RESERVE_BRK_ARRAY(unsigned long, p2m_top_mfn, P2M_TOP_PER_PAGE); -+static RESERVE_BRK_ARRAY(unsigned long *, p2m_top_mfn_p, P2M_TOP_PER_PAGE); - --static unsigned long p2m_top_mfn_list[TOP_ENTRIES / P2M_ENTRIES_PER_PAGE] -- __page_aligned_bss; -+RESERVE_BRK(p2m_mid, PAGE_SIZE * (MAX_DOMAIN_PAGES / (P2M_PER_PAGE * P2M_MID_PER_PAGE))); -+RESERVE_BRK(p2m_mid_mfn, PAGE_SIZE * (MAX_DOMAIN_PAGES / (P2M_PER_PAGE * P2M_MID_PER_PAGE))); - - static inline unsigned p2m_top_index(unsigned long pfn) - { -- BUG_ON(pfn >= MAX_DOMAIN_PAGES); -- return pfn / P2M_ENTRIES_PER_PAGE; -+ BUG_ON(pfn >= MAX_P2M_PFN); -+ return pfn / (P2M_MID_PER_PAGE * P2M_PER_PAGE); -+} -+ -+static inline unsigned p2m_mid_index(unsigned long pfn) -+{ -+ return (pfn / P2M_PER_PAGE) % P2M_MID_PER_PAGE; - } - - static inline unsigned p2m_index(unsigned long pfn) - { -- return pfn % P2M_ENTRIES_PER_PAGE; -+ return pfn % P2M_PER_PAGE; -+} -+ -+static void p2m_top_init(unsigned long ***top) -+{ -+ unsigned i; -+ -+ for (i = 0; i < P2M_TOP_PER_PAGE; i++) -+ top[i] = p2m_mid_missing; -+} -+ -+static void p2m_top_mfn_init(unsigned long *top) -+{ -+ unsigned i; -+ -+ for (i = 0; i < P2M_TOP_PER_PAGE; i++) -+ top[i] = virt_to_mfn(p2m_mid_missing_mfn); -+} -+ -+static void p2m_top_mfn_p_init(unsigned long **top) -+{ -+ unsigned i; -+ -+ for (i = 0; i < P2M_TOP_PER_PAGE; i++) -+ top[i] = p2m_mid_missing_mfn; -+} -+ -+static void p2m_mid_init(unsigned long **mid) -+{ -+ unsigned i; -+ -+ for (i = 0; i < P2M_MID_PER_PAGE; i++) -+ mid[i] = p2m_missing; -+} -+ -+static void p2m_mid_mfn_init(unsigned long *mid) -+{ -+ unsigned i; -+ -+ for (i = 0; i < P2M_MID_PER_PAGE; i++) -+ mid[i] = virt_to_mfn(p2m_missing); -+} -+ -+static void p2m_init(unsigned long *p2m) -+{ -+ unsigned i; -+ -+ for (i = 0; i < P2M_MID_PER_PAGE; i++) -+ p2m[i] = INVALID_P2M_ENTRY; -+} -+ -+static int lookup_pte_fn( -+ pte_t *pte, struct page *pmd_page, unsigned long addr, void *data) -+{ -+ uint64_t *ptep = (uint64_t *)data; -+ if (ptep) -+ *ptep = ((uint64_t)pfn_to_mfn(page_to_pfn(pmd_page)) << -+ PAGE_SHIFT) | ((unsigned long)pte & ~PAGE_MASK); -+ return 0; - } - --/* Build the parallel p2m_top_mfn structures */ -+int create_lookup_pte_addr(struct mm_struct *mm, -+ unsigned long address, -+ uint64_t *ptep) -+{ -+ return apply_to_page_range(mm, address, PAGE_SIZE, -+ lookup_pte_fn, ptep); -+} -+ -+EXPORT_SYMBOL(create_lookup_pte_addr); -+ -+/* -+ * Build the parallel p2m_top_mfn and p2m_mid_mfn structures -+ * -+ * This is called both at boot time, and after resuming from suspend: -+ * - At boot time we're called very early, and must use extend_brk() -+ * to allocate memory. -+ * -+ * - After resume we're called from within stop_machine, but the mfn -+ * tree should alreay be completely allocated. -+ */ - void xen_build_mfn_list_list(void) - { -- unsigned pfn, idx; -+ unsigned long pfn; - -- for (pfn = 0; pfn < MAX_DOMAIN_PAGES; pfn += P2M_ENTRIES_PER_PAGE) { -- unsigned topidx = p2m_top_index(pfn); -+ /* Pre-initialize p2m_top_mfn to be completely missing */ -+ if (p2m_top_mfn == NULL) { -+ p2m_mid_missing_mfn = extend_brk(PAGE_SIZE, PAGE_SIZE); -+ p2m_mid_mfn_init(p2m_mid_missing_mfn); -+ -+ p2m_top_mfn_p = extend_brk(PAGE_SIZE, PAGE_SIZE); -+ p2m_top_mfn_p_init(p2m_top_mfn_p); - -- p2m_top_mfn[topidx] = virt_to_mfn(p2m_top[topidx]); -+ p2m_top_mfn = extend_brk(PAGE_SIZE, PAGE_SIZE); -+ p2m_top_mfn_init(p2m_top_mfn); -+ } else { -+ /* Reinitialise, mfn's all change after migration */ -+ p2m_mid_mfn_init(p2m_mid_missing_mfn); - } - -- for (idx = 0; idx < ARRAY_SIZE(p2m_top_mfn_list); idx++) { -- unsigned topidx = idx * P2M_ENTRIES_PER_PAGE; -- p2m_top_mfn_list[idx] = virt_to_mfn(&p2m_top_mfn[topidx]); -+ for (pfn = 0; pfn < xen_max_p2m_pfn; pfn += P2M_PER_PAGE) { -+ unsigned topidx = p2m_top_index(pfn); -+ unsigned mididx = p2m_mid_index(pfn); -+ unsigned long **mid; -+ unsigned long *mid_mfn_p; -+ -+ mid = p2m_top[topidx]; -+ mid_mfn_p = p2m_top_mfn_p[topidx]; -+ -+ /* Don't bother allocating any mfn mid levels if -+ * they're just missing, just update the stored mfn, -+ * since all could have changed over a migrate. -+ */ -+ if (mid == p2m_mid_missing) { -+ BUG_ON(mididx); -+ BUG_ON(mid_mfn_p != p2m_mid_missing_mfn); -+ p2m_top_mfn[topidx] = virt_to_mfn(p2m_mid_missing_mfn); -+ pfn += (P2M_MID_PER_PAGE - 1) * P2M_PER_PAGE; -+ continue; -+ } -+ -+ if (mid_mfn_p == p2m_mid_missing_mfn) { -+ /* -+ * XXX boot-time only! We should never find -+ * missing parts of the mfn tree after -+ * runtime. extend_brk() will BUG if we call -+ * it too late. -+ */ -+ mid_mfn_p = extend_brk(PAGE_SIZE, PAGE_SIZE); -+ p2m_mid_mfn_init(mid_mfn_p); -+ -+ p2m_top_mfn_p[topidx] = mid_mfn_p; -+ } -+ -+ p2m_top_mfn[topidx] = virt_to_mfn(mid_mfn_p); -+ mid_mfn_p[mididx] = virt_to_mfn(mid[mididx]); - } - } - -@@ -206,8 +374,8 @@ void xen_setup_mfn_list_list(void) - BUG_ON(HYPERVISOR_shared_info == &xen_dummy_shared_info); - - HYPERVISOR_shared_info->arch.pfn_to_mfn_frame_list_list = -- virt_to_mfn(p2m_top_mfn_list); -- HYPERVISOR_shared_info->arch.max_pfn = xen_start_info->nr_pages; -+ virt_to_mfn(p2m_top_mfn); -+ HYPERVISOR_shared_info->arch.max_pfn = xen_max_p2m_pfn; - } - - /* Set up p2m_top to point to the domain-builder provided p2m pages */ -@@ -215,98 +383,176 @@ void __init xen_build_dynamic_phys_to_machine(void) - { - unsigned long *mfn_list = (unsigned long *)xen_start_info->mfn_list; - unsigned long max_pfn = min(MAX_DOMAIN_PAGES, xen_start_info->nr_pages); -- unsigned pfn; -+ unsigned long pfn; -+ -+ xen_max_p2m_pfn = max_pfn; - -- for (pfn = 0; pfn < max_pfn; pfn += P2M_ENTRIES_PER_PAGE) { -+ p2m_missing = extend_brk(PAGE_SIZE, PAGE_SIZE); -+ p2m_init(p2m_missing); -+ -+ p2m_mid_missing = extend_brk(PAGE_SIZE, PAGE_SIZE); -+ p2m_mid_init(p2m_mid_missing); -+ -+ p2m_top = extend_brk(PAGE_SIZE, PAGE_SIZE); -+ p2m_top_init(p2m_top); -+ -+ /* -+ * The domain builder gives us a pre-constructed p2m array in -+ * mfn_list for all the pages initially given to us, so we just -+ * need to graft that into our tree structure. -+ */ -+ for (pfn = 0; pfn < max_pfn; pfn += P2M_PER_PAGE) { - unsigned topidx = p2m_top_index(pfn); -+ unsigned mididx = p2m_mid_index(pfn); - -- p2m_top[topidx] = &mfn_list[pfn]; -- } -+ if (p2m_top[topidx] == p2m_mid_missing) { -+ unsigned long **mid = extend_brk(PAGE_SIZE, PAGE_SIZE); -+ p2m_mid_init(mid); -+ -+ p2m_top[topidx] = mid; -+ } - -- xen_build_mfn_list_list(); -+ p2m_top[topidx][mididx] = &mfn_list[pfn]; -+ } - } - - unsigned long get_phys_to_machine(unsigned long pfn) - { -- unsigned topidx, idx; -+ unsigned topidx, mididx, idx; - -- if (unlikely(pfn >= MAX_DOMAIN_PAGES)) -+ if (unlikely(pfn >= MAX_P2M_PFN)) - return INVALID_P2M_ENTRY; - - topidx = p2m_top_index(pfn); -+ mididx = p2m_mid_index(pfn); - idx = p2m_index(pfn); -- return p2m_top[topidx][idx]; -+ -+ return p2m_top[topidx][mididx][idx]; - } - EXPORT_SYMBOL_GPL(get_phys_to_machine); - --/* install a new p2m_top page */ --bool install_p2mtop_page(unsigned long pfn, unsigned long *p) -+static void *alloc_p2m_page(void) - { -- unsigned topidx = p2m_top_index(pfn); -- unsigned long **pfnp, *mfnp; -- unsigned i; -+ return (void *)__get_free_page(GFP_KERNEL | __GFP_REPEAT); -+} - -- pfnp = &p2m_top[topidx]; -- mfnp = &p2m_top_mfn[topidx]; -+static void free_p2m_page(void *p) -+{ -+ free_page((unsigned long)p); -+} - -- for (i = 0; i < P2M_ENTRIES_PER_PAGE; i++) -- p[i] = INVALID_P2M_ENTRY; -+/* -+ * Fully allocate the p2m structure for a given pfn. We need to check -+ * that both the top and mid levels are allocated, and make sure the -+ * parallel mfn tree is kept in sync. We may race with other cpus, so -+ * the new pages are installed with cmpxchg; if we lose the race then -+ * simply free the page we allocated and use the one that's there. -+ */ -+static bool alloc_p2m(unsigned long pfn) -+{ -+ unsigned topidx, mididx; -+ unsigned long ***top_p, **mid; -+ unsigned long *top_mfn_p, *mid_mfn; - -- if (cmpxchg(pfnp, p2m_missing, p) == p2m_missing) { -- *mfnp = virt_to_mfn(p); -- return true; -+ topidx = p2m_top_index(pfn); -+ mididx = p2m_mid_index(pfn); -+ -+ top_p = &p2m_top[topidx]; -+ mid = *top_p; -+ -+ if (mid == p2m_mid_missing) { -+ /* Mid level is missing, allocate a new one */ -+ mid = alloc_p2m_page(); -+ if (!mid) -+ return false; -+ -+ p2m_mid_init(mid); -+ -+ if (cmpxchg(top_p, p2m_mid_missing, mid) != p2m_mid_missing) -+ free_p2m_page(mid); - } - -- return false; --} -+ top_mfn_p = &p2m_top_mfn[topidx]; -+ mid_mfn = p2m_top_mfn_p[topidx]; - --static void alloc_p2m(unsigned long pfn) --{ -- unsigned long *p; -+ BUG_ON(virt_to_mfn(mid_mfn) != *top_mfn_p); -+ -+ if (mid_mfn == p2m_mid_missing_mfn) { -+ /* Separately check the mid mfn level */ -+ unsigned long missing_mfn; -+ unsigned long mid_mfn_mfn; -+ -+ mid_mfn = alloc_p2m_page(); -+ if (!mid_mfn) -+ return false; - -- p = (void *)__get_free_page(GFP_KERNEL | __GFP_NOFAIL); -- BUG_ON(p == NULL); -+ p2m_mid_mfn_init(mid_mfn); - -- if (!install_p2mtop_page(pfn, p)) -- free_page((unsigned long)p); -+ missing_mfn = virt_to_mfn(p2m_mid_missing_mfn); -+ mid_mfn_mfn = virt_to_mfn(mid_mfn); -+ if (cmpxchg(top_mfn_p, missing_mfn, mid_mfn_mfn) != missing_mfn) -+ free_p2m_page(mid_mfn); -+ else -+ p2m_top_mfn_p[topidx] = mid_mfn; -+ } -+ -+ if (p2m_top[topidx][mididx] == p2m_missing) { -+ /* p2m leaf page is missing */ -+ unsigned long *p2m; -+ -+ p2m = alloc_p2m_page(); -+ if (!p2m) -+ return false; -+ -+ p2m_init(p2m); -+ -+ if (cmpxchg(&mid[mididx], p2m_missing, p2m) != p2m_missing) -+ free_p2m_page(p2m); -+ else -+ mid_mfn[mididx] = virt_to_mfn(p2m); -+ } -+ -+ return true; - } - - /* Try to install p2m mapping; fail if intermediate bits missing */ - bool __set_phys_to_machine(unsigned long pfn, unsigned long mfn) - { -- unsigned topidx, idx; -+ unsigned topidx, mididx, idx; - -- if (unlikely(pfn >= MAX_DOMAIN_PAGES)) { -+ if (unlikely(pfn >= MAX_P2M_PFN)) { - BUG_ON(mfn != INVALID_P2M_ENTRY); - return true; - } - - topidx = p2m_top_index(pfn); -- if (p2m_top[topidx] == p2m_missing) { -- if (mfn == INVALID_P2M_ENTRY) -- return true; -- return false; -- } -- -+ mididx = p2m_mid_index(pfn); - idx = p2m_index(pfn); -- p2m_top[topidx][idx] = mfn; -+ -+ if (p2m_top[topidx][mididx] == p2m_missing) -+ return mfn == INVALID_P2M_ENTRY; -+ -+ p2m_top[topidx][mididx][idx] = mfn; - - return true; - } - --void set_phys_to_machine(unsigned long pfn, unsigned long mfn) -+bool set_phys_to_machine(unsigned long pfn, unsigned long mfn) - { - if (unlikely(xen_feature(XENFEAT_auto_translated_physmap))) { - BUG_ON(pfn != mfn && mfn != INVALID_P2M_ENTRY); -- return; -+ return true; - } - - if (unlikely(!__set_phys_to_machine(pfn, mfn))) { -- alloc_p2m(pfn); -+ if (!alloc_p2m(pfn)) -+ return false; - - if (!__set_phys_to_machine(pfn, mfn)) -- BUG(); -+ return false; - } -+ -+ return true; - } - - unsigned long arbitrary_virt_to_mfn(void *vaddr) -@@ -315,6 +561,7 @@ unsigned long arbitrary_virt_to_mfn(void *vaddr) - - return PFN_DOWN(maddr.maddr); - } -+EXPORT_SYMBOL_GPL(set_phys_to_machine); - - xmaddr_t arbitrary_virt_to_machine(void *vaddr) - { -@@ -345,7 +592,8 @@ void make_lowmem_page_readonly(void *vaddr) - unsigned int level; - - pte = lookup_address(address, &level); -- BUG_ON(pte == NULL); -+ if (pte == NULL) -+ return; /* vaddr missing */ - - ptev = pte_wrprotect(*pte); - -@@ -360,7 +608,8 @@ void make_lowmem_page_readwrite(void *vaddr) - unsigned int level; - - pte = lookup_address(address, &level); -- BUG_ON(pte == NULL); -+ if (pte == NULL) -+ return; /* vaddr missing */ - - ptev = pte_mkwrite(*pte); - -@@ -376,6 +625,24 @@ static bool xen_page_pinned(void *ptr) - return PagePinned(page); - } - -+void xen_set_domain_pte(pte_t *ptep, pte_t pteval, unsigned domid) -+{ -+ struct multicall_space mcs; -+ struct mmu_update *u; -+ -+ mcs = xen_mc_entry(sizeof(*u)); -+ u = mcs.args; -+ -+ /* ptep might be kmapped when using 32-bit HIGHPTE */ -+ u->ptr = arbitrary_virt_to_machine(ptep).maddr; -+ u->val = pte_val_ma(pteval); -+ -+ MULTI_mmu_update(mcs.mc, mcs.args, 1, NULL, domid); -+ -+ xen_mc_issue(PARAVIRT_LAZY_MMU); -+} -+EXPORT_SYMBOL_GPL(xen_set_domain_pte); -+ - static void xen_extend_mmu_update(const struct mmu_update *update) - { - struct multicall_space mcs; -@@ -516,7 +783,34 @@ static pteval_t pte_pfn_to_mfn(pteval_t val) - if (val & _PAGE_PRESENT) { - unsigned long pfn = (val & PTE_PFN_MASK) >> PAGE_SHIFT; - pteval_t flags = val & PTE_FLAGS_MASK; -- val = ((pteval_t)pfn_to_mfn(pfn) << PAGE_SHIFT) | flags; -+ unsigned long mfn = pfn_to_mfn(pfn); -+ -+ /* -+ * If there's no mfn for the pfn, then just create an -+ * empty non-present pte. Unfortunately this loses -+ * information about the original pfn, so -+ * pte_mfn_to_pfn is asymmetric. -+ */ -+ if (unlikely(mfn == INVALID_P2M_ENTRY)) { -+ mfn = 0; -+ flags = 0; -+ } -+ -+ val = ((pteval_t)mfn << PAGE_SHIFT) | flags; -+ } -+ -+ return val; -+} -+ -+static pteval_t iomap_pte(pteval_t val) -+{ -+ if (val & _PAGE_PRESENT) { -+ unsigned long pfn = (val & PTE_PFN_MASK) >> PAGE_SHIFT; -+ pteval_t flags = val & PTE_FLAGS_MASK; -+ -+ /* We assume the pte frame number is a MFN, so -+ just use it as-is. */ -+ val = ((pteval_t)pfn << PAGE_SHIFT) | flags; - } - - return val; -@@ -524,7 +818,18 @@ static pteval_t pte_pfn_to_mfn(pteval_t val) - - pteval_t xen_pte_val(pte_t pte) - { -- return pte_mfn_to_pfn(pte.pte); -+ pteval_t pteval = pte.pte; -+ -+ /* If this is a WC pte, convert back from Xen WC to Linux WC */ -+ if ((pteval & (_PAGE_PAT | _PAGE_PCD | _PAGE_PWT)) == _PAGE_PAT) { -+ WARN_ON(!pat_enabled); -+ pteval = (pteval & ~_PAGE_PAT) | _PAGE_PWT; -+ } -+ -+ if (xen_initial_domain() && (pteval & _PAGE_IOMAP)) -+ return pteval; -+ -+ return pte_mfn_to_pfn(pteval); - } - PV_CALLEE_SAVE_REGS_THUNK(xen_pte_val); - -@@ -534,9 +839,62 @@ pgdval_t xen_pgd_val(pgd_t pgd) - } - PV_CALLEE_SAVE_REGS_THUNK(xen_pgd_val); - -+/* -+ * Xen's PAT setup is part of its ABI, though I assume entries 6 & 7 -+ * are reserved for now, to correspond to the Intel-reserved PAT -+ * types. -+ * -+ * We expect Linux's PAT set as follows: -+ * -+ * Idx PTE flags Linux Xen Default -+ * 0 WB WB WB -+ * 1 PWT WC WT WT -+ * 2 PCD UC- UC- UC- -+ * 3 PCD PWT UC UC UC -+ * 4 PAT WB WC WB -+ * 5 PAT PWT WC WP WT -+ * 6 PAT PCD UC- UC UC- -+ * 7 PAT PCD PWT UC UC UC -+ */ -+ -+void xen_set_pat(u64 pat) -+{ -+ /* We expect Linux to use a PAT setting of -+ * UC UC- WC WB (ignoring the PAT flag) */ -+ WARN_ON(pat != 0x0007010600070106ull); -+} -+ - pte_t xen_make_pte(pteval_t pte) - { -- pte = pte_pfn_to_mfn(pte); -+ phys_addr_t addr = (pte & PTE_PFN_MASK); -+ -+ /* If Linux is trying to set a WC pte, then map to the Xen WC. -+ * If _PAGE_PAT is set, then it probably means it is really -+ * _PAGE_PSE, so avoid fiddling with the PAT mapping and hope -+ * things work out OK... -+ * -+ * (We should never see kernel mappings with _PAGE_PSE set, -+ * but we could see hugetlbfs mappings, I think.). -+ */ -+ if (pat_enabled && !WARN_ON(pte & _PAGE_PAT)) { -+ if ((pte & (_PAGE_PCD | _PAGE_PWT)) == _PAGE_PWT) -+ pte = (pte & ~(_PAGE_PCD | _PAGE_PWT)) | _PAGE_PAT; -+ } -+ -+ /* -+ * Unprivileged domains are allowed to do IOMAPpings for -+ * PCI passthrough, but not map ISA space. The ISA -+ * mappings are just dummy local mappings to keep other -+ * parts of the kernel happy. -+ */ -+ if (unlikely(pte & _PAGE_IOMAP) && -+ (xen_initial_domain() || addr >= ISA_END_ADDRESS)) { -+ pte = iomap_pte(pte); -+ } else { -+ pte &= ~_PAGE_IOMAP; -+ pte = pte_pfn_to_mfn(pte); -+ } -+ - return native_make_pte(pte); - } - PV_CALLEE_SAVE_REGS_THUNK(xen_make_pte); -@@ -934,8 +1292,6 @@ static int xen_pin_page(struct mm_struct *mm, struct page *page, - read-only, and can be pinned. */ - static void __xen_pgd_pin(struct mm_struct *mm, pgd_t *pgd) - { -- vm_unmap_aliases(); -- - xen_mc_batch(); - - if (__xen_pgd_walk(mm, pgd, xen_pin_page, USER_LIMIT)) { -@@ -1219,7 +1575,7 @@ void xen_exit_mmap(struct mm_struct *mm) - spin_lock(&mm->page_table_lock); - - /* pgd may not be pinned in the error exit path of execve */ -- if (xen_page_pinned(mm->pgd)) -+ if (xen_page_pinned(mm->pgd) && !mm->context.has_foreign_mappings) - xen_pgd_unpin(mm); - - spin_unlock(&mm->page_table_lock); -@@ -1288,12 +1644,19 @@ static void xen_flush_tlb_single(unsigned long addr) - preempt_enable(); - } - -+/* -+ * Flush tlb on other cpus. Xen can do this via a single hypercall -+ * rather than explicit IPIs, which has the nice property of avoiding -+ * any cpus which don't actually have dirty tlbs. Unfortunately it -+ * doesn't give us an opportunity to kick out cpus which are in lazy -+ * tlb state, so we may end up reflushing some cpus unnecessarily. -+ */ - static void xen_flush_tlb_others(const struct cpumask *cpus, - struct mm_struct *mm, unsigned long va) - { - struct { - struct mmuext_op op; -- DECLARE_BITMAP(mask, NR_CPUS); -+ DECLARE_BITMAP(mask, num_processors); - } *args; - struct multicall_space mcs; - -@@ -1417,6 +1780,13 @@ static int xen_pgd_alloc(struct mm_struct *mm) - return ret; - } - -+void xen_late_unpin_pgd(struct mm_struct *mm, pgd_t *pgd) -+{ -+ if (xen_page_pinned(pgd)) -+ __xen_pgd_unpin(mm, pgd); -+ -+} -+ - static void xen_pgd_free(struct mm_struct *mm, pgd_t *pgd) - { - #ifdef CONFIG_X86_64 -@@ -1445,13 +1815,29 @@ static void *xen_kmap_atomic_pte(struct page *page, enum km_type type) - } - #endif - --#ifdef CONFIG_X86_32 - static __init pte_t mask_rw_pte(pte_t *ptep, pte_t pte) - { -- /* If there's an existing pte, then don't allow _PAGE_RW to be set */ -- if (pte_val_ma(*ptep) & _PAGE_PRESENT) -+ unsigned long pfn = pte_pfn(pte); -+ pte_t oldpte = *ptep; -+ -+ if (pte_flags(oldpte) & _PAGE_PRESENT) { -+ /* Don't allow existing IO mappings to be overridden */ -+ if (pte_flags(oldpte) & _PAGE_IOMAP) -+ pte = oldpte; -+ -+ /* Don't allow _PAGE_RW to be set on existing pte */ - pte = __pte_ma(((pte_val_ma(*ptep) & _PAGE_RW) | ~_PAGE_RW) & - pte_val_ma(pte)); -+ } -+ -+ /* -+ * If the new pfn is within the range of the newly allocated -+ * kernel pagetable, and it isn't being mapped into an -+ * early_ioremap fixmap slot, make sure it is RO. -+ */ -+ if (!is_early_ioremap_ptep(ptep) && -+ pfn >= e820_table_start && pfn < e820_table_end) -+ pte = pte_wrprotect(pte); - - return pte; - } -@@ -1464,7 +1850,6 @@ static __init void xen_set_pte_init(pte_t *ptep, pte_t pte) - - xen_set_pte(ptep, pte); - } --#endif - - static void pin_pagetable_pfn(unsigned cmd, unsigned long pfn) - { -@@ -1517,7 +1902,6 @@ static void xen_alloc_ptpage(struct mm_struct *mm, unsigned long pfn, unsigned l - if (PagePinned(virt_to_page(mm->pgd))) { - SetPagePinned(page); - -- vm_unmap_aliases(); - if (!PageHighMem(page)) { - make_lowmem_page_readonly(__va(PFN_PHYS((unsigned long)pfn))); - if (level == PT_PTE && USE_SPLIT_PTLOCKS) -@@ -1620,6 +2004,7 @@ static void *m2v(phys_addr_t maddr) - return __ka(m2p(maddr)); - } - -+/* Set the page permissions on an identity-mapped pages */ - static void set_page_prot(void *addr, pgprot_t prot) - { - unsigned long pfn = __pa(addr) >> PAGE_SHIFT; -@@ -1635,6 +2020,9 @@ static __init void xen_map_identity_early(pmd_t *pmd, unsigned long max_pfn) - unsigned ident_pte; - unsigned long pfn; - -+ level1_ident_pgt = extend_brk(sizeof(pte_t) * LEVEL1_IDENT_ENTRIES, -+ PAGE_SIZE); -+ - ident_pte = 0; - pfn = 0; - for (pmdidx = 0; pmdidx < PTRS_PER_PMD && pfn < max_pfn; pmdidx++) { -@@ -1645,7 +2033,7 @@ static __init void xen_map_identity_early(pmd_t *pmd, unsigned long max_pfn) - pte_page = m2v(pmd[pmdidx].pmd); - else { - /* Check for free pte pages */ -- if (ident_pte == ARRAY_SIZE(level1_ident_pgt)) -+ if (ident_pte == LEVEL1_IDENT_ENTRIES) - break; - - pte_page = &level1_ident_pgt[ident_pte]; -@@ -1675,6 +2063,20 @@ static __init void xen_map_identity_early(pmd_t *pmd, unsigned long max_pfn) - set_page_prot(pmd, PAGE_KERNEL_RO); - } - -+void __init xen_setup_machphys_mapping(void) -+{ -+ struct xen_machphys_mapping mapping; -+ unsigned long machine_to_phys_nr_ents; -+ -+ if (HYPERVISOR_memory_op(XENMEM_machphys_mapping, &mapping) == 0) { -+ machine_to_phys_mapping = (unsigned long *)mapping.v_start; -+ machine_to_phys_nr_ents = mapping.max_mfn + 1; -+ } else { -+ machine_to_phys_nr_ents = MACH2PHYS_NR_ENTRIES; -+ } -+ machine_to_phys_order = fls(machine_to_phys_nr_ents - 1); -+} -+ - #ifdef CONFIG_X86_64 - static void convert_pfn_mfn(void *v) - { -@@ -1760,12 +2162,15 @@ __init pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd, - return pgd; - } - #else /* !CONFIG_X86_64 */ --static pmd_t level2_kernel_pgt[PTRS_PER_PMD] __page_aligned_bss; -+static RESERVE_BRK_ARRAY(pmd_t, level2_kernel_pgt, PTRS_PER_PMD); - - __init pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd, - unsigned long max_pfn) - { - pmd_t *kernel_pmd; -+ int i; -+ -+ level2_kernel_pgt = extend_brk(sizeof(pmd_t) * PTRS_PER_PMD, PAGE_SIZE); - - max_pfn_mapped = PFN_DOWN(__pa(xen_start_info->pt_base) + - xen_start_info->nr_pt_frames * PAGE_SIZE + -@@ -1777,6 +2182,20 @@ __init pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd, - xen_map_identity_early(level2_kernel_pgt, max_pfn); - - memcpy(swapper_pg_dir, pgd, sizeof(pgd_t) * PTRS_PER_PGD); -+ -+ /* -+ * When running a 32 bit domain 0 on a 64 bit hypervisor a -+ * pinned L3 (such as the initial pgd here) contains bits -+ * which are reserved in the PAE layout but not in the 64 bit -+ * layout. Unfortunately some versions of the hypervisor -+ * (incorrectly) validate compat mode guests against the PAE -+ * layout and hence will not allow such a pagetable to be -+ * pinned by the guest. Therefore we mask off only the PFN and -+ * Present bits of the supplied L3. -+ */ -+ for (i = 0; i < PTRS_PER_PGD; i++) -+ swapper_pg_dir[i].pgd &= (PTE_PFN_MASK | _PAGE_PRESENT); -+ - set_pgd(&swapper_pg_dir[KERNEL_PGD_BOUNDARY], - __pgd(__pa(level2_kernel_pgt) | _PAGE_PRESENT)); - -@@ -1799,6 +2218,8 @@ __init pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd, - } - #endif /* CONFIG_X86_64 */ - -+static unsigned char dummy_ioapic_mapping[PAGE_SIZE] __page_aligned_bss; -+ - static void xen_set_fixmap(unsigned idx, phys_addr_t phys, pgprot_t prot) - { - pte_t pte; -@@ -1828,9 +2249,26 @@ static void xen_set_fixmap(unsigned idx, phys_addr_t phys, pgprot_t prot) - pte = pfn_pte(phys, prot); - break; - -- default: -+#ifdef CONFIG_X86_IO_APIC -+ case FIX_IO_APIC_BASE_0 ... FIX_IO_APIC_BASE_END: -+ /* -+ * We just don't map the IO APIC - all access is via -+ * hypercalls. Keep the address in the pte for reference. -+ */ -+ pte = pfn_pte(PFN_DOWN(__pa(dummy_ioapic_mapping)), PAGE_KERNEL); -+ break; -+#endif -+ -+ case FIX_PARAVIRT_BOOTMAP: -+ /* This is an MFN, but it isn't an IO mapping from the -+ IO domain */ - pte = mfn_pte(phys, prot); - break; -+ -+ default: -+ /* By default, set_fixmap is used for hardware mappings */ -+ pte = mfn_pte(phys, __pgprot(pgprot_val(prot) | _PAGE_IOMAP)); -+ break; - } - - __native_set_fixmap(idx, pte); -@@ -1845,6 +2283,29 @@ static void xen_set_fixmap(unsigned idx, phys_addr_t phys, pgprot_t prot) - #endif - } - -+__init void xen_ident_map_ISA(void) -+{ -+ unsigned long pa; -+ -+ /* -+ * If we're dom0, then linear map the ISA machine addresses into -+ * the kernel's address space. -+ */ -+ if (!xen_initial_domain()) -+ return; -+ -+ xen_raw_printk("Xen: setup ISA identity maps\n"); -+ -+ for (pa = ISA_START_ADDRESS; pa < ISA_END_ADDRESS; pa += PAGE_SIZE) { -+ pte_t pte = mfn_pte(PFN_DOWN(pa), PAGE_KERNEL_IO); -+ -+ if (HYPERVISOR_update_va_mapping(PAGE_OFFSET + pa, pte, 0)) -+ BUG(); -+ } -+ -+ xen_flush_tlb(); -+} -+ - static __init void xen_post_allocator_init(void) - { - pv_mmu_ops.set_pte = xen_set_pte; -@@ -1907,11 +2368,7 @@ static const struct pv_mmu_ops xen_mmu_ops __initdata = { - .kmap_atomic_pte = xen_kmap_atomic_pte, - #endif - --#ifdef CONFIG_X86_64 -- .set_pte = xen_set_pte, --#else - .set_pte = xen_set_pte_init, --#endif - .set_pte_at = xen_set_pte_at, - .set_pmd = xen_set_pmd_hyper, - -@@ -1960,8 +2417,305 @@ void __init xen_init_mmu_ops(void) - x86_init.paging.pagetable_setup_start = xen_pagetable_setup_start; - x86_init.paging.pagetable_setup_done = xen_pagetable_setup_done; - pv_mmu_ops = xen_mmu_ops; -+ -+ vmap_lazy_unmap = false; -+} -+ -+/* Protected by xen_reservation_lock. */ -+#define MAX_CONTIG_ORDER 9 /* 2MB */ -+static unsigned long discontig_frames[1<<MAX_CONTIG_ORDER]; -+ -+#define VOID_PTE (mfn_pte(0, __pgprot(0))) -+static void xen_zap_pfn_range(unsigned long vaddr, unsigned int order, -+ unsigned long *in_frames, -+ unsigned long *out_frames) -+{ -+ int i; -+ struct multicall_space mcs; -+ -+ xen_mc_batch(); -+ for (i = 0; i < (1UL<<order); i++, vaddr += PAGE_SIZE) { -+ mcs = __xen_mc_entry(0); -+ -+ if (in_frames) -+ in_frames[i] = virt_to_mfn(vaddr); -+ -+ MULTI_update_va_mapping(mcs.mc, vaddr, VOID_PTE, 0); -+ set_phys_to_machine(virt_to_pfn(vaddr), INVALID_P2M_ENTRY); -+ -+ if (out_frames) -+ out_frames[i] = virt_to_pfn(vaddr); -+ } -+ xen_mc_issue(0); -+} -+ -+/* -+ * Update the pfn-to-mfn mappings for a virtual address range, either to -+ * point to an array of mfns, or contiguously from a single starting -+ * mfn. -+ */ -+static void xen_remap_exchanged_ptes(unsigned long vaddr, int order, -+ unsigned long *mfns, -+ unsigned long first_mfn) -+{ -+ unsigned i, limit; -+ unsigned long mfn; -+ -+ xen_mc_batch(); -+ -+ limit = 1u << order; -+ for (i = 0; i < limit; i++, vaddr += PAGE_SIZE) { -+ struct multicall_space mcs; -+ unsigned flags; -+ -+ mcs = __xen_mc_entry(0); -+ if (mfns) -+ mfn = mfns[i]; -+ else -+ mfn = first_mfn + i; -+ -+ if (i < (limit - 1)) -+ flags = 0; -+ else { -+ if (order == 0) -+ flags = UVMF_INVLPG | UVMF_ALL; -+ else -+ flags = UVMF_TLB_FLUSH | UVMF_ALL; -+ } -+ -+ MULTI_update_va_mapping(mcs.mc, vaddr, -+ mfn_pte(mfn, PAGE_KERNEL), flags); -+ -+ set_phys_to_machine(virt_to_pfn(vaddr), mfn); -+ } -+ -+ xen_mc_issue(0); -+} -+ -+/* -+ * Perform the hypercall to exchange a region of our pfns to point to -+ * memory with the required contiguous alignment. Takes the pfns as -+ * input, and populates mfns as output. -+ * -+ * Returns a success code indicating whether the hypervisor was able to -+ * satisfy the request or not. -+ */ -+static int xen_exchange_memory(unsigned long extents_in, unsigned int order_in, -+ unsigned long *pfns_in, -+ unsigned long extents_out, unsigned int order_out, -+ unsigned long *mfns_out, -+ unsigned int address_bits) -+{ -+ long rc; -+ int success; -+ -+ struct xen_memory_exchange exchange = { -+ .in = { -+ .nr_extents = extents_in, -+ .extent_order = order_in, -+ .extent_start = pfns_in, -+ .domid = DOMID_SELF -+ }, -+ .out = { -+ .nr_extents = extents_out, -+ .extent_order = order_out, -+ .extent_start = mfns_out, -+ .address_bits = address_bits, -+ .domid = DOMID_SELF -+ } -+ }; -+ -+ BUG_ON(extents_in << order_in != extents_out << order_out); -+ -+ rc = HYPERVISOR_memory_op(XENMEM_exchange, &exchange); -+ success = (exchange.nr_exchanged == extents_in); -+ -+ BUG_ON(!success && ((exchange.nr_exchanged != 0) || (rc == 0))); -+ BUG_ON(success && (rc != 0)); -+ -+ return success; - } - -+int xen_create_contiguous_region(unsigned long vstart, unsigned int order, -+ unsigned int address_bits) -+{ -+ unsigned long *in_frames = discontig_frames, out_frame; -+ unsigned long flags; -+ int success; -+ -+ /* -+ * Currently an auto-translated guest will not perform I/O, nor will -+ * it require PAE page directories below 4GB. Therefore any calls to -+ * this function are redundant and can be ignored. -+ */ -+ -+ if (xen_feature(XENFEAT_auto_translated_physmap)) -+ return 0; -+ -+ if (unlikely(order > MAX_CONTIG_ORDER)) -+ return -ENOMEM; -+ -+ memset((void *) vstart, 0, PAGE_SIZE << order); -+ -+ spin_lock_irqsave(&xen_reservation_lock, flags); -+ -+ /* 1. Zap current PTEs, remembering MFNs. */ -+ xen_zap_pfn_range(vstart, order, in_frames, NULL); -+ -+ /* 2. Get a new contiguous memory extent. */ -+ out_frame = virt_to_pfn(vstart); -+ success = xen_exchange_memory(1UL << order, 0, in_frames, -+ 1, order, &out_frame, -+ address_bits); -+ -+ /* 3. Map the new extent in place of old pages. */ -+ if (success) -+ xen_remap_exchanged_ptes(vstart, order, NULL, out_frame); -+ else -+ xen_remap_exchanged_ptes(vstart, order, in_frames, 0); -+ -+ spin_unlock_irqrestore(&xen_reservation_lock, flags); -+ -+ return success ? 0 : -ENOMEM; -+} -+EXPORT_SYMBOL_GPL(xen_create_contiguous_region); -+ -+void xen_destroy_contiguous_region(unsigned long vstart, unsigned int order) -+{ -+ unsigned long *out_frames = discontig_frames, in_frame; -+ unsigned long flags; -+ int success; -+ -+ if (xen_feature(XENFEAT_auto_translated_physmap)) -+ return; -+ -+ if (unlikely(order > MAX_CONTIG_ORDER)) -+ return; -+ -+ memset((void *) vstart, 0, PAGE_SIZE << order); -+ -+ spin_lock_irqsave(&xen_reservation_lock, flags); -+ -+ /* 1. Find start MFN of contiguous extent. */ -+ in_frame = virt_to_mfn(vstart); -+ -+ /* 2. Zap current PTEs. */ -+ xen_zap_pfn_range(vstart, order, NULL, out_frames); -+ -+ /* 3. Do the exchange for non-contiguous MFNs. */ -+ success = xen_exchange_memory(1, order, &in_frame, 1UL << order, -+ 0, out_frames, 0); -+ -+ /* 4. Map new pages in place of old pages. */ -+ if (success) -+ xen_remap_exchanged_ptes(vstart, order, out_frames, 0); -+ else -+ xen_remap_exchanged_ptes(vstart, order, NULL, in_frame); -+ -+ spin_unlock_irqrestore(&xen_reservation_lock, flags); -+} -+EXPORT_SYMBOL_GPL(xen_destroy_contiguous_region); -+ -+#define REMAP_BATCH_SIZE 16 -+ -+struct remap_data { -+ unsigned long mfn; -+ pgprot_t prot; -+ struct mmu_update *mmu_update; -+}; -+ -+static int remap_area_mfn_pte_fn(pte_t *ptep, pgtable_t token, -+ unsigned long addr, void *data) -+{ -+ struct remap_data *rmd = data; -+ pte_t pte = pte_mkspecial(pfn_pte(rmd->mfn++, rmd->prot)); -+ -+ rmd->mmu_update->ptr = arbitrary_virt_to_machine(ptep).maddr; -+ rmd->mmu_update->val = pte_val_ma(pte); -+ rmd->mmu_update++; -+ -+ return 0; -+} -+ -+int xen_remap_domain_mfn_range(struct vm_area_struct *vma, -+ unsigned long addr, -+ unsigned long mfn, int nr, -+ pgprot_t prot, unsigned domid) -+{ -+ struct remap_data rmd; -+ struct mmu_update mmu_update[REMAP_BATCH_SIZE]; -+ int batch; -+ unsigned long range; -+ int err = 0; -+ -+ prot = __pgprot(pgprot_val(prot) | _PAGE_IOMAP); -+ -+ vma->vm_flags |= VM_IO | VM_RESERVED | VM_PFNMAP; -+ -+ rmd.mfn = mfn; -+ rmd.prot = prot; -+ -+ while (nr) { -+ batch = min(REMAP_BATCH_SIZE, nr); -+ range = (unsigned long)batch << PAGE_SHIFT; -+ -+ rmd.mmu_update = mmu_update; -+ err = apply_to_page_range(vma->vm_mm, addr, range, -+ remap_area_mfn_pte_fn, &rmd); -+ if (err) -+ goto out; -+ -+ err = -EFAULT; -+ if (HYPERVISOR_mmu_update(mmu_update, batch, NULL, domid) < 0) -+ goto out; -+ -+ nr -= batch; -+ addr += range; -+ } -+ -+ err = 0; -+out: -+ -+ flush_tlb_all(); -+ -+ return err; -+} -+EXPORT_SYMBOL_GPL(xen_remap_domain_mfn_range); -+ -+#ifdef CONFIG_XEN_PVHVM -+static void xen_hvm_exit_mmap(struct mm_struct *mm) -+{ -+ struct xen_hvm_pagetable_dying a; -+ int rc; -+ -+ a.domid = DOMID_SELF; -+ a.gpa = __pa(mm->pgd); -+ rc = HYPERVISOR_hvm_op(HVMOP_pagetable_dying, &a); -+ WARN_ON_ONCE(rc < 0); -+} -+ -+static int is_pagetable_dying_supported(void) -+{ -+ struct xen_hvm_pagetable_dying a; -+ int rc = 0; -+ -+ a.domid = DOMID_SELF; -+ a.gpa = 0x00; -+ rc = HYPERVISOR_hvm_op(HVMOP_pagetable_dying, &a); -+ if (rc < 0) { -+ printk(KERN_DEBUG "HVMOP_pagetable_dying not supported\n"); -+ return 0; -+ } -+ return 1; -+} -+ -+void __init xen_hvm_init_mmu_ops(void) -+{ -+ if (is_pagetable_dying_supported()) -+ pv_mmu_ops.exit_mmap = xen_hvm_exit_mmap; -+} -+#endif -+ - #ifdef CONFIG_XEN_DEBUG_FS - - static struct dentry *d_mmu_debug; -diff --git a/arch/x86/xen/mmu.h b/arch/x86/xen/mmu.h -index 5fe6bc7..537bb9a 100644 ---- a/arch/x86/xen/mmu.h -+++ b/arch/x86/xen/mmu.h -@@ -12,7 +12,6 @@ enum pt_level { - - - bool __set_phys_to_machine(unsigned long pfn, unsigned long mfn); --bool install_p2mtop_page(unsigned long pfn, unsigned long *p); - - void set_pte_mfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags); - -@@ -60,4 +59,5 @@ void xen_ptep_modify_prot_commit(struct mm_struct *mm, unsigned long addr, - unsigned long xen_read_cr2_direct(void); - - extern void xen_init_mmu_ops(void); -+extern void xen_hvm_init_mmu_ops(void); - #endif /* _XEN_MMU_H */ -diff --git a/arch/x86/xen/pci-swiotlb-xen.c b/arch/x86/xen/pci-swiotlb-xen.c -new file mode 100644 -index 0000000..4d55524 ---- /dev/null -+++ b/arch/x86/xen/pci-swiotlb-xen.c -@@ -0,0 +1,52 @@ -+/* Glue code to lib/swiotlb-xen.c */ -+ -+#include <linux/dma-mapping.h> -+#include <linux/swiotlb.h> -+ -+#include <asm/xen/hypervisor.h> -+ -+int xen_swiotlb __read_mostly; -+ -+static struct dma_map_ops xen_swiotlb_dma_ops = { -+ .mapping_error = xen_swiotlb_dma_mapping_error, -+ .alloc_coherent = xen_swiotlb_alloc_coherent, -+ .free_coherent = xen_swiotlb_free_coherent, -+ .sync_single_for_cpu = xen_swiotlb_sync_single_for_cpu, -+ .sync_single_for_device = xen_swiotlb_sync_single_for_device, -+ .sync_single_range_for_cpu = xen_swiotlb_sync_single_range_for_cpu, -+ .sync_single_range_for_device = xen_swiotlb_sync_single_range_for_device, -+ .sync_sg_for_cpu = xen_swiotlb_sync_sg_for_cpu, -+ .sync_sg_for_device = xen_swiotlb_sync_sg_for_device, -+ .map_sg = xen_swiotlb_map_sg_attrs, -+ .unmap_sg = xen_swiotlb_unmap_sg_attrs, -+ .map_page = xen_swiotlb_map_page, -+ .unmap_page = xen_swiotlb_unmap_page, -+ .dma_supported = xen_swiotlb_dma_supported, -+}; -+ -+/* -+ * pci_swiotlb_detect - set swiotlb to 1 if necessary -+ * -+ * This returns non-zero if we are forced to use swiotlb (by the boot -+ * option). -+ */ -+int __init pci_xen_swiotlb_detect(void) -+{ -+ -+ if (xen_pv_domain() && (xen_initial_domain() || swiotlb)) -+ xen_swiotlb = 1; -+ -+ /* If we are running under Xen, we MUST disable the native SWIOTLB */ -+ if (xen_pv_domain()) -+ swiotlb = 0; -+ -+ return xen_swiotlb; -+} -+ -+void __init pci_xen_swiotlb_init(void) -+{ -+ if (xen_swiotlb) { -+ xen_swiotlb_init(1); -+ dma_ops = &xen_swiotlb_dma_ops; -+ } -+} -diff --git a/arch/x86/xen/pci.c b/arch/x86/xen/pci.c -new file mode 100644 -index 0000000..8ca31f1 ---- /dev/null -+++ b/arch/x86/xen/pci.c -@@ -0,0 +1,296 @@ -+#include <linux/kernel.h> -+#include <linux/acpi.h> -+#include <linux/pci.h> -+#include <linux/msi.h> -+#include <linux/slab.h> -+ -+#include <asm/mpspec.h> -+#include <asm/io_apic.h> -+#include <asm/pci_x86.h> -+ -+#include <asm/xen/hypervisor.h> -+#include <asm/xen/pci.h> -+ -+#include <xen/interface/xen.h> -+#include <xen/events.h> -+ -+#include "xen-ops.h" -+ -+int xen_register_pirq(u32 gsi, int triggering) -+{ -+ int rc, irq; -+ struct physdev_map_pirq map_irq; -+ int shareable = 0; -+ char *name; -+ -+ if (!xen_pv_domain()) -+ return -1; -+ -+ if (triggering == ACPI_EDGE_SENSITIVE) { -+ shareable = 0; -+ name = "ioapic-edge"; -+ } else { -+ shareable = 1; -+ name = "ioapic-level"; -+ } -+ -+ irq = xen_allocate_pirq(gsi, shareable, name); -+ -+ printk(KERN_DEBUG "xen: --> irq=%d\n", irq); -+ -+ if (irq < 0) -+ goto out; -+ -+ map_irq.domid = DOMID_SELF; -+ map_irq.type = MAP_PIRQ_TYPE_GSI; -+ map_irq.index = gsi; -+ map_irq.pirq = irq; -+ -+ rc = HYPERVISOR_physdev_op(PHYSDEVOP_map_pirq, &map_irq); -+ if (rc) { -+ printk(KERN_WARNING "xen map irq failed %d\n", rc); -+ return -1; -+ } -+ -+out: -+ return irq; -+} -+ -+int xen_register_gsi(u32 gsi, int triggering, int polarity) -+{ -+ int rc, irq; -+ struct physdev_setup_gsi setup_gsi; -+ -+ if (!xen_pv_domain()) -+ return -1; -+ -+ printk(KERN_DEBUG "xen: registering gsi %u triggering %d polarity %d\n", -+ gsi, triggering, polarity); -+ -+ irq = xen_register_pirq(gsi, triggering); -+ -+ setup_gsi.gsi = gsi; -+ setup_gsi.triggering = (triggering == ACPI_EDGE_SENSITIVE ? 0 : 1); -+ setup_gsi.polarity = (polarity == ACPI_ACTIVE_HIGH ? 0 : 1); -+ -+ rc = HYPERVISOR_physdev_op(PHYSDEVOP_setup_gsi, &setup_gsi); -+ if (rc == -EEXIST) -+ printk(KERN_INFO "Already setup the GSI :%d\n", gsi); -+ else if (rc) { -+ printk(KERN_ERR "Failed to setup GSI :%d, err_code:%d\n", -+ gsi, rc); -+ } -+ -+ return irq; -+} -+ -+#ifdef CONFIG_ACPI -+#define BAD_MADT_ENTRY(entry, end) ( \ -+ (!entry) || (unsigned long)entry + sizeof(*entry) > end || \ -+ ((struct acpi_subtable_header *)entry)->length < sizeof(*entry)) -+ -+ -+static int __init -+xen_acpi_parse_int_src_ovr(struct acpi_subtable_header * header, -+ const unsigned long end) -+{ -+ struct acpi_madt_interrupt_override *intsrc = NULL; -+ -+ intsrc = (struct acpi_madt_interrupt_override *)header; -+ -+ if (BAD_MADT_ENTRY(intsrc, end)) -+ return -EINVAL; -+ -+ acpi_table_print_madt_entry(header); -+ -+ if (intsrc->source_irq == acpi_gbl_FADT.sci_interrupt) { -+ int gsi; -+ int trigger, polarity; -+ -+ trigger = intsrc->inti_flags & ACPI_MADT_TRIGGER_MASK; -+ polarity = intsrc->inti_flags & ACPI_MADT_POLARITY_MASK; -+ -+ /* Command-line over-ride via acpi_sci= */ -+ if (acpi_sci_flags & ACPI_MADT_TRIGGER_MASK) -+ trigger = acpi_sci_flags & ACPI_MADT_TRIGGER_MASK; -+ -+ if (acpi_sci_flags & ACPI_MADT_POLARITY_MASK) -+ polarity = acpi_sci_flags & ACPI_MADT_POLARITY_MASK; -+ -+ printk("xen: sci override: source_irq=%d global_irq=%d trigger=%x polarity=%x\n", -+ intsrc->source_irq, intsrc->global_irq, -+ trigger, polarity); -+ -+ switch (polarity) { -+ case ACPI_MADT_POLARITY_CONFORMS: -+ case ACPI_MADT_POLARITY_ACTIVE_LOW: -+ polarity = ACPI_ACTIVE_LOW; -+ break; -+ -+ case ACPI_MADT_POLARITY_ACTIVE_HIGH: -+ polarity = ACPI_ACTIVE_HIGH; -+ break; -+ -+ default: -+ return 0; -+ } -+ -+ switch (trigger) { -+ case ACPI_MADT_TRIGGER_CONFORMS: -+ case ACPI_MADT_TRIGGER_LEVEL: -+ trigger = ACPI_LEVEL_SENSITIVE; -+ break; -+ -+ case ACPI_MADT_TRIGGER_EDGE: -+ trigger = ACPI_EDGE_SENSITIVE; -+ break; -+ -+ default: -+ return 0; -+ } -+ -+ gsi = xen_register_gsi(intsrc->global_irq, -+ trigger, polarity); -+ /* -+ * stash over-ride to indicate we've been here -+ * and for later update of acpi_gbl_FADT -+ */ -+ acpi_sci_override_gsi = gsi; -+ -+ printk("xen: acpi sci %d\n", gsi); -+ } -+ -+ return 0; -+} -+ -+static __init void xen_setup_acpi_sci(void) -+{ -+ acpi_table_parse_madt(ACPI_MADT_TYPE_INTERRUPT_OVERRIDE, -+ xen_acpi_parse_int_src_ovr, -+ nr_irqs); -+} -+#else -+static __init void xen_setup_acpi_sci(void) -+{ -+} -+#endif -+ -+void __init xen_setup_pirqs(void) -+{ -+ int irq; -+ -+ if (0 == nr_ioapics) { -+ for (irq = 0; irq < NR_IRQS_LEGACY; irq++) -+ xen_allocate_pirq(irq, 0, "xt-pic"); -+ return; -+ } -+ -+ /* Pre-allocate legacy irqs */ -+ for (irq = 0; irq < NR_IRQS_LEGACY; irq++) { -+ int trigger, polarity; -+ -+ if (acpi_get_override_irq(irq, &trigger, &polarity) == -1) -+ continue; -+ -+ xen_register_pirq(irq, -+ trigger ? ACPI_LEVEL_SENSITIVE : ACPI_EDGE_SENSITIVE); -+ } -+ -+ xen_setup_acpi_sci(); -+} -+ -+#ifdef CONFIG_PCI_MSI -+int xen_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) -+{ -+ int irq, ret; -+ struct msi_desc *msidesc; -+ -+ list_for_each_entry(msidesc, &dev->msi_list, list) { -+ irq = xen_create_msi_irq(dev, msidesc, type); -+ if (irq < 0) -+ return -1; -+ -+ ret = set_irq_msi(irq, msidesc); -+ if (ret) -+ goto error; -+ } -+ return 0; -+ -+error: -+ xen_destroy_irq(irq); -+ return ret; -+} -+#endif -+ -+struct xen_device_domain_owner { -+ domid_t domain; -+ struct pci_dev *dev; -+ struct list_head list; -+}; -+ -+static DEFINE_SPINLOCK(dev_domain_list_spinlock); -+static struct list_head dev_domain_list = LIST_HEAD_INIT(dev_domain_list); -+ -+static struct xen_device_domain_owner *find_device(struct pci_dev *dev) -+{ -+ struct xen_device_domain_owner *owner; -+ -+ list_for_each_entry(owner, &dev_domain_list, list) { -+ if (owner->dev == dev) -+ return owner; -+ } -+ return NULL; -+} -+ -+int xen_find_device_domain_owner(struct pci_dev *dev) -+{ -+ struct xen_device_domain_owner *owner; -+ int domain = -ENODEV; -+ -+ spin_lock(&dev_domain_list_spinlock); -+ owner = find_device(dev); -+ if (owner) -+ domain = owner->domain; -+ spin_unlock(&dev_domain_list_spinlock); -+ return domain; -+} -+EXPORT_SYMBOL(xen_find_device_domain_owner); -+ -+int xen_register_device_domain_owner(struct pci_dev *dev, uint16_t domain) -+{ -+ struct xen_device_domain_owner *owner; -+ -+ owner = kzalloc(sizeof(struct xen_device_domain_owner), GFP_KERNEL); -+ if (!owner) -+ return -ENODEV; -+ -+ spin_lock(&dev_domain_list_spinlock); -+ if (find_device(dev)) { -+ spin_unlock(&dev_domain_list_spinlock); -+ kfree(owner); -+ return -EEXIST; -+ } -+ owner->domain = domain; -+ owner->dev = dev; -+ list_add_tail(&owner->list, &dev_domain_list); -+ spin_unlock(&dev_domain_list_spinlock); -+ return 0; -+} -+EXPORT_SYMBOL(xen_register_device_domain_owner); -+ -+int xen_unregister_device_domain_owner(struct pci_dev *dev) -+{ -+ struct xen_device_domain_owner *owner; -+ -+ spin_lock(&dev_domain_list_spinlock); -+ owner = find_device(dev); -+ if (!owner) { -+ spin_unlock(&dev_domain_list_spinlock); -+ return -ENODEV; -+ } -+ list_del(&owner->list); -+ spin_unlock(&dev_domain_list_spinlock); -+ kfree(owner); -+ return 0; -+} -+EXPORT_SYMBOL(xen_unregister_device_domain_owner); -diff --git a/arch/x86/xen/platform-pci-unplug.c b/arch/x86/xen/platform-pci-unplug.c -new file mode 100644 -index 0000000..0f45638 ---- /dev/null -+++ b/arch/x86/xen/platform-pci-unplug.c -@@ -0,0 +1,143 @@ -+/****************************************************************************** -+ * platform-pci-unplug.c -+ * -+ * Xen platform PCI device driver -+ * Copyright (c) 2010, Citrix -+ * -+ * This program is free software; you can redistribute it and/or modify it -+ * under the terms and conditions of the GNU General Public License, -+ * version 2, as published by the Free Software Foundation. -+ * -+ * This program is distributed in the hope it will be useful, but WITHOUT -+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for -+ * more details. -+ * -+ * You should have received a copy of the GNU General Public License along with -+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple -+ * Place - Suite 330, Boston, MA 02111-1307 USA. -+ * -+ */ -+ -+#include <linux/init.h> -+#include <linux/io.h> -+#include <linux/module.h> -+ -+#include <xen/platform_pci.h> -+ -+#define XEN_PLATFORM_ERR_MAGIC -1 -+#define XEN_PLATFORM_ERR_PROTOCOL -2 -+#define XEN_PLATFORM_ERR_BLACKLIST -3 -+ -+/* store the value of xen_emul_unplug after the unplug is done */ -+int xen_platform_pci_unplug; -+EXPORT_SYMBOL_GPL(xen_platform_pci_unplug); -+#ifdef CONFIG_XEN_PVHVM -+static int xen_emul_unplug; -+ -+static int __init check_platform_magic(void) -+{ -+ short magic; -+ char protocol; -+ -+ magic = inw(XEN_IOPORT_MAGIC); -+ if (magic != XEN_IOPORT_MAGIC_VAL) { -+ printk(KERN_ERR "Xen Platform PCI: unrecognised magic value\n"); -+ return XEN_PLATFORM_ERR_MAGIC; -+ } -+ -+ protocol = inb(XEN_IOPORT_PROTOVER); -+ -+ printk(KERN_DEBUG "Xen Platform PCI: I/O protocol version %d\n", -+ protocol); -+ -+ switch (protocol) { -+ case 1: -+ outw(XEN_IOPORT_LINUX_PRODNUM, XEN_IOPORT_PRODNUM); -+ outl(XEN_IOPORT_LINUX_DRVVER, XEN_IOPORT_DRVVER); -+ if (inw(XEN_IOPORT_MAGIC) != XEN_IOPORT_MAGIC_VAL) { -+ printk(KERN_ERR "Xen Platform: blacklisted by host\n"); -+ return XEN_PLATFORM_ERR_BLACKLIST; -+ } -+ break; -+ default: -+ printk(KERN_WARNING "Xen Platform PCI: unknown I/O protocol version"); -+ return XEN_PLATFORM_ERR_PROTOCOL; -+ } -+ -+ return 0; -+} -+ -+void __init xen_unplug_emulated_devices(void) -+{ -+ int r; -+ -+ /* user explicitly requested no unplug */ -+ if (xen_emul_unplug & XEN_UNPLUG_NEVER) -+ return; -+ /* check the version of the xen platform PCI device */ -+ r = check_platform_magic(); -+ /* If the version matches enable the Xen platform PCI driver. -+ * Also enable the Xen platform PCI driver if the host does -+ * not support the unplug protocol (XEN_PLATFORM_ERR_MAGIC) -+ * but the user told us that unplugging is unnecessary. */ -+ if (r && !(r == XEN_PLATFORM_ERR_MAGIC && -+ (xen_emul_unplug & XEN_UNPLUG_UNNECESSARY))) -+ return; -+ /* Set the default value of xen_emul_unplug depending on whether or -+ * not the Xen PV frontends and the Xen platform PCI driver have -+ * been compiled for this kernel (modules or built-in are both OK). */ -+ if (!xen_emul_unplug) { -+ if (xen_must_unplug_nics()) { -+ printk(KERN_INFO "Netfront and the Xen platform PCI driver have " -+ "been compiled for this kernel: unplug emulated NICs.\n"); -+ xen_emul_unplug |= XEN_UNPLUG_ALL_NICS; -+ } -+ if (xen_must_unplug_disks()) { -+ printk(KERN_INFO "Blkfront and the Xen platform PCI driver have " -+ "been compiled for this kernel: unplug emulated disks.\n" -+ "You might have to change the root device\n" -+ "from /dev/hd[a-d] to /dev/xvd[a-d]\n" -+ "in your root= kernel command line option\n"); -+ xen_emul_unplug |= XEN_UNPLUG_ALL_IDE_DISKS; -+ } -+ } -+ /* Now unplug the emulated devices */ -+ if (!(xen_emul_unplug & XEN_UNPLUG_UNNECESSARY)) -+ outw(xen_emul_unplug, XEN_IOPORT_UNPLUG); -+ xen_platform_pci_unplug = xen_emul_unplug; -+} -+ -+static int __init parse_xen_emul_unplug(char *arg) -+{ -+ char *p, *q; -+ int l; -+ -+ for (p = arg; p; p = q) { -+ q = strchr(p, ','); -+ if (q) { -+ l = q - p; -+ q++; -+ } else { -+ l = strlen(p); -+ } -+ if (!strncmp(p, "all", l)) -+ xen_emul_unplug |= XEN_UNPLUG_ALL; -+ else if (!strncmp(p, "ide-disks", l)) -+ xen_emul_unplug |= XEN_UNPLUG_ALL_IDE_DISKS; -+ else if (!strncmp(p, "aux-ide-disks", l)) -+ xen_emul_unplug |= XEN_UNPLUG_AUX_IDE_DISKS; -+ else if (!strncmp(p, "nics", l)) -+ xen_emul_unplug |= XEN_UNPLUG_ALL_NICS; -+ else if (!strncmp(p, "unnecessary", l)) -+ xen_emul_unplug |= XEN_UNPLUG_UNNECESSARY; -+ else if (!strncmp(p, "never", l)) -+ xen_emul_unplug |= XEN_UNPLUG_NEVER; -+ else -+ printk(KERN_WARNING "unrecognised option '%s' " -+ "in parameter 'xen_emul_unplug'\n", p); -+ } -+ return 0; -+} -+early_param("xen_emul_unplug", parse_xen_emul_unplug); -+#endif -diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c -index ad0047f..1a1934a 100644 ---- a/arch/x86/xen/setup.c -+++ b/arch/x86/xen/setup.c -@@ -10,6 +10,7 @@ - #include <linux/pm.h> - - #include <asm/elf.h> -+#include <asm/hpet.h> - #include <asm/vdso.h> - #include <asm/e820.h> - #include <asm/setup.h> -@@ -19,7 +20,9 @@ - - #include <xen/page.h> - #include <xen/interface/callback.h> -+#include <xen/interface/memory.h> - #include <xen/interface/physdev.h> -+#include <xen/interface/memory.h> - #include <xen/features.h> - - #include "xen-ops.h" -@@ -32,25 +35,178 @@ extern void xen_sysenter_target(void); - extern void xen_syscall_target(void); - extern void xen_syscall32_target(void); - -+/* Amount of extra memory space we add to the e820 ranges */ -+phys_addr_t xen_extra_mem_start, xen_extra_mem_size; -+ -+/* -+ * The maximum amount of extra memory compared to the base size. The -+ * main scaling factor is the size of struct page. At extreme ratios -+ * of base:extra, all the base memory can be filled with page -+ * structures for the extra memory, leaving no space for anything -+ * else. -+ * -+ * 10x seems like a reasonable balance between scaling flexibility and -+ * leaving a practically usable system. -+ */ -+#define EXTRA_MEM_RATIO (10) -+ -+static __init void xen_add_extra_mem(unsigned long pages) -+{ -+ u64 size = (u64)pages * PAGE_SIZE; -+ u64 extra_start = xen_extra_mem_start + xen_extra_mem_size; -+ -+ if (!pages) -+ return; -+ -+ e820_add_region(extra_start, size, E820_RAM); -+ sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map); -+ -+ reserve_early(extra_start, extra_start + size, "XEN EXTRA"); -+ -+ xen_extra_mem_size += size; -+ -+ xen_max_p2m_pfn = PFN_DOWN(extra_start + size); -+} -+ -+static unsigned long __init xen_release_chunk(phys_addr_t start_addr, -+ phys_addr_t end_addr) -+{ -+ struct xen_memory_reservation reservation = { -+ .address_bits = 0, -+ .extent_order = 0, -+ .domid = DOMID_SELF -+ }; -+ unsigned long start, end; -+ unsigned long len = 0; -+ unsigned long pfn; -+ int ret; -+ -+ start = PFN_UP(start_addr); -+ end = PFN_DOWN(end_addr); -+ -+ if (end <= start) -+ return 0; -+ -+ printk(KERN_INFO "xen_release_chunk: looking at area pfn %lx-%lx: ", -+ start, end); -+ for(pfn = start; pfn < end; pfn++) { -+ unsigned long mfn = pfn_to_mfn(pfn); -+ -+ /* Make sure pfn exists to start with */ -+ if (mfn == INVALID_P2M_ENTRY || mfn_to_pfn(mfn) != pfn) -+ continue; -+ -+ set_xen_guest_handle(reservation.extent_start, &mfn); -+ reservation.nr_extents = 1; -+ -+ ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation, -+ &reservation); -+ WARN(ret != 1, "Failed to release memory %lx-%lx err=%d\n", -+ start, end, ret); -+ if (ret == 1) { -+ set_phys_to_machine(pfn, INVALID_P2M_ENTRY); -+ len++; -+ } -+ } -+ printk(KERN_CONT "%ld pages freed\n", len); -+ -+ return len; -+} -+ -+static unsigned long __init xen_return_unused_memory(unsigned long max_pfn, -+ const struct e820map *e820) -+{ -+ phys_addr_t max_addr = PFN_PHYS(max_pfn); -+ phys_addr_t last_end = ISA_END_ADDRESS; -+ unsigned long released = 0; -+ int i; -+ -+ /* Free any unused memory above the low 1Mbyte. */ -+ for (i = 0; i < e820->nr_map && last_end < max_addr; i++) { -+ phys_addr_t end = e820->map[i].addr; -+ end = min(max_addr, end); -+ -+ if (last_end < end) -+ released += xen_release_chunk(last_end, end); -+ last_end = max(last_end, e820->map[i].addr + e820->map[i].size); -+ } -+ -+ if (last_end < max_addr) -+ released += xen_release_chunk(last_end, max_addr); -+ -+ printk(KERN_INFO "released %ld pages of unused memory\n", released); -+ return released; -+} - - /** - * machine_specific_memory_setup - Hook for machine specific memory setup. - **/ -- - char * __init xen_memory_setup(void) - { -+ static struct e820entry map[E820MAX] __initdata; -+ - unsigned long max_pfn = xen_start_info->nr_pages; -+ unsigned long long mem_end; -+ int rc; -+ struct xen_memory_map memmap; -+ unsigned long extra_pages = 0; -+ unsigned long extra_limit; -+ int op; -+ int i; - - max_pfn = min(MAX_DOMAIN_PAGES, max_pfn); -+ mem_end = PFN_PHYS(max_pfn); -+ -+ memmap.nr_entries = E820MAX; -+ set_xen_guest_handle(memmap.buffer, map); -+ -+ op = xen_initial_domain() ? -+ XENMEM_machine_memory_map : -+ XENMEM_memory_map; -+ rc = HYPERVISOR_memory_op(op, &memmap); -+ if (rc == -ENOSYS) { -+ BUG_ON(xen_initial_domain()); -+ memmap.nr_entries = 1; -+ map[0].addr = 0ULL; -+ map[0].size = mem_end; -+ /* 8MB slack (to balance backend allocations). */ -+ map[0].size += 8ULL << 20; -+ map[0].type = E820_RAM; -+ rc = 0; -+ } -+ BUG_ON(rc); - - e820.nr_map = 0; -+ xen_extra_mem_start = mem_end; -+ for (i = 0; i < memmap.nr_entries; i++) { -+ unsigned long long end = map[i].addr + map[i].size; -+ -+ if (map[i].type == E820_RAM && end > mem_end) { -+ /* RAM off the end - may be partially included */ -+ u64 delta = min(map[i].size, end - mem_end); - -- e820_add_region(0, PFN_PHYS((u64)max_pfn), E820_RAM); -+ map[i].size -= delta; -+ end -= delta; -+ -+ extra_pages += PFN_DOWN(delta); -+ } -+ -+ if (map[i].size > 0 && end > xen_extra_mem_start) -+ xen_extra_mem_start = end; -+ -+ /* Add region if any remains */ -+ if (map[i].size > 0) -+ e820_add_region(map[i].addr, map[i].size, map[i].type); -+ } - - /* -- * Even though this is normal, usable memory under Xen, reserve -- * ISA memory anyway because too many things think they can poke -+ * In domU, the ISA region is normal, usable memory, but we -+ * reserve ISA memory anyway because too many things poke - * about in there. -+ * -+ * In Dom0, the host E820 information can leave gaps in the -+ * ISA range, which would cause us to release those pages. To -+ * avoid this, we unconditionally reserve them here. - */ - e820_add_region(ISA_START_ADDRESS, ISA_END_ADDRESS - ISA_START_ADDRESS, - E820_RESERVED); -@@ -67,21 +223,30 @@ char * __init xen_memory_setup(void) - - sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map); - -- return "Xen"; --} -+ extra_pages += xen_return_unused_memory(xen_start_info->nr_pages, &e820); - --static void xen_idle(void) --{ -- local_irq_disable(); -- -- if (need_resched()) -- local_irq_enable(); -- else { -- current_thread_info()->status &= ~TS_POLLING; -- smp_mb__after_clear_bit(); -- safe_halt(); -- current_thread_info()->status |= TS_POLLING; -- } -+ /* -+ * Clamp the amount of extra memory to a EXTRA_MEM_RATIO -+ * factor the base size. On non-highmem systems, the base -+ * size is the full initial memory allocation; on highmem it -+ * is limited to the max size of lowmem, so that it doesn't -+ * get completely filled. -+ * -+ * In principle there could be a problem in lowmem systems if -+ * the initial memory is also very large with respect to -+ * lowmem, but we won't try to deal with that here. -+ */ -+ extra_limit = min(EXTRA_MEM_RATIO * min(max_pfn, PFN_DOWN(MAXMEM)), -+ max_pfn + extra_pages); -+ -+ if (extra_limit >= max_pfn) -+ extra_pages = extra_limit - max_pfn; -+ else -+ extra_pages = 0; -+ -+ xen_add_extra_mem(extra_pages); -+ -+ return "Xen"; - } - - /* -@@ -156,6 +321,8 @@ void __init xen_arch_setup(void) - struct physdev_set_iopl set_iopl; - int rc; - -+ xen_panic_handler_init(); -+ - HYPERVISOR_vm_assist(VMASST_CMD_enable, VMASST_TYPE_4gb_segments); - HYPERVISOR_vm_assist(VMASST_CMD_enable, VMASST_TYPE_writable_pagetables); - -@@ -182,13 +349,21 @@ void __init xen_arch_setup(void) - } - #endif - -+ /* -+ * Xen hypervisor uses HPET to wakeup cpu from deep c-states, -+ * so the HPET usage in dom0 must be forbidden. -+ */ -+ disable_hpet(NULL); -+ - memcpy(boot_command_line, xen_start_info->cmd_line, - MAX_GUEST_CMDLINE > COMMAND_LINE_SIZE ? - COMMAND_LINE_SIZE : MAX_GUEST_CMDLINE); - -- pm_idle = xen_idle; -- -- paravirt_disable_iospace(); -+ /* Set up idle, making sure it calls safe_halt() pvop */ -+#ifdef CONFIG_X86_32 -+ boot_cpu_data.hlt_works_ok = 1; -+#endif -+ pm_idle = default_idle; - - fiddle_vdso(); - } -diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c -index ca5f56e..3e06a9e 100644 ---- a/arch/x86/xen/smp.c -+++ b/arch/x86/xen/smp.c -@@ -178,11 +178,18 @@ static void __init xen_smp_prepare_boot_cpu(void) - static void __init xen_smp_prepare_cpus(unsigned int max_cpus) - { - unsigned cpu; -+ unsigned int i; - - xen_init_lock_cpu(0); - - smp_store_cpu_info(0); - cpu_data(0).x86_max_cores = 1; -+ -+ for_each_possible_cpu(i) { -+ zalloc_cpumask_var(&per_cpu(cpu_sibling_map, i), GFP_KERNEL); -+ zalloc_cpumask_var(&per_cpu(cpu_core_map, i), GFP_KERNEL); -+ zalloc_cpumask_var(&cpu_data(i).llc_shared_map, GFP_KERNEL); -+ } - set_cpu_sibling_map(0); - - if (xen_smp_intr_init(0)) -@@ -299,6 +306,8 @@ static int __cpuinit xen_cpu_up(unsigned int cpu) - xen_setup_timer(cpu); - xen_init_lock_cpu(cpu); - -+ cpumask_set_cpu(cpu, cpu_callout_mask); -+ - per_cpu(cpu_state, cpu) = CPU_UP_PREPARE; - - /* make sure interrupts start blocked */ -@@ -392,6 +401,8 @@ static void stop_self(void *v) - load_cr3(swapper_pg_dir); - /* should set up a minimal gdt */ - -+ set_cpu_online(cpu, false); -+ - HYPERVISOR_vcpu_op(VCPUOP_down, cpu, NULL); - BUG(); - } -diff --git a/arch/x86/xen/suspend.c b/arch/x86/xen/suspend.c -index a9c6611..1d789d5 100644 ---- a/arch/x86/xen/suspend.c -+++ b/arch/x86/xen/suspend.c -@@ -26,6 +26,18 @@ void xen_pre_suspend(void) - BUG(); - } - -+void xen_hvm_post_suspend(int suspend_cancelled) -+{ -+ int cpu; -+ xen_hvm_init_shared_info(); -+ xen_callback_vector(); -+ if (xen_feature(XENFEAT_hvm_safe_pvclock)) { -+ for_each_online_cpu(cpu) { -+ xen_setup_runstate_info(cpu); -+ } -+ } -+} -+ - void xen_post_suspend(int suspend_cancelled) - { - xen_build_mfn_list_list(); -diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c -index 8e04980..ab35140 100644 ---- a/arch/x86/xen/time.c -+++ b/arch/x86/xen/time.c -@@ -19,6 +19,7 @@ - #include <asm/xen/hypercall.h> - - #include <xen/events.h> -+#include <xen/features.h> - #include <xen/interface/xen.h> - #include <xen/interface/vcpu.h> - -@@ -155,7 +156,7 @@ static void do_stolen_accounting(void) - } - - /* Get the TSC speed from Xen */ --unsigned long xen_tsc_khz(void) -+static unsigned long xen_tsc_khz(void) - { - struct pvclock_vcpu_time_info *info = - &HYPERVISOR_shared_info->vcpu_info[0].time; -@@ -190,7 +191,7 @@ static void xen_read_wallclock(struct timespec *ts) - put_cpu_var(xen_vcpu); - } - --unsigned long xen_get_wallclock(void) -+static unsigned long xen_get_wallclock(void) - { - struct timespec ts; - -@@ -198,10 +199,24 @@ unsigned long xen_get_wallclock(void) - return ts.tv_sec; - } - --int xen_set_wallclock(unsigned long now) -+static int xen_set_wallclock(unsigned long now) - { -+ struct xen_platform_op op; -+ int rc; -+ - /* do nothing for domU */ -- return -1; -+ if (!xen_initial_domain()) -+ return -1; -+ -+ op.cmd = XENPF_settime; -+ op.u.settime.secs = now; -+ op.u.settime.nsecs = 0; -+ op.u.settime.system_time = xen_clocksource_read(); -+ -+ rc = HYPERVISOR_dom0_op(&op); -+ WARN(rc != 0, "XENPF_settime failed: now=%ld\n", now); -+ -+ return rc; - } - - static struct clocksource xen_clocksource __read_mostly = { -@@ -403,6 +418,8 @@ void xen_setup_timer(int cpu) - - evt->cpumask = cpumask_of(cpu); - evt->irq = irq; -+ -+ xen_setup_runstate_info(cpu); - } - - void xen_teardown_timer(int cpu) -@@ -424,6 +441,8 @@ void xen_timer_resume(void) - { - int cpu; - -+ pvclock_resume(); -+ - if (xen_clockevent != &xen_vcpuop_clockevent) - return; - -@@ -433,7 +452,7 @@ void xen_timer_resume(void) - } - } - --__init void xen_time_init(void) -+static __init void xen_time_init(void) - { - int cpu = smp_processor_id(); - -@@ -457,3 +476,51 @@ __init void xen_time_init(void) - xen_setup_timer(cpu); - xen_setup_cpu_clockevents(); - } -+ -+static const struct pv_time_ops xen_time_ops __initdata = { -+ .sched_clock = xen_clocksource_read, -+}; -+ -+__init void xen_init_time_ops(void) -+{ -+ pv_time_ops = xen_time_ops; -+ -+ x86_init.timers.timer_init = xen_time_init; -+ x86_init.timers.setup_percpu_clockev = x86_init_noop; -+ x86_cpuinit.setup_percpu_clockev = x86_init_noop; -+ -+ x86_platform.calibrate_tsc = xen_tsc_khz; -+ x86_platform.get_wallclock = xen_get_wallclock; -+ x86_platform.set_wallclock = xen_set_wallclock; -+} -+ -+#ifdef CONFIG_XEN_PVHVM -+static void xen_hvm_setup_cpu_clockevents(void) -+{ -+ int cpu = smp_processor_id(); -+ xen_setup_runstate_info(cpu); -+ xen_setup_timer(cpu); -+ xen_setup_cpu_clockevents(); -+} -+ -+__init void xen_hvm_init_time_ops(void) -+{ -+ /* vector callback is needed otherwise we cannot receive interrupts -+ * on cpu > 0 */ -+ if (!xen_have_vector_callback && num_present_cpus() > 1) -+ return; -+ if (!xen_feature(XENFEAT_hvm_safe_pvclock)) { -+ printk(KERN_INFO "Xen doesn't support pvclock on HVM," -+ "disable pv timer\n"); -+ return; -+ } -+ -+ pv_time_ops = xen_time_ops; -+ x86_init.timers.setup_percpu_clockev = xen_time_init; -+ x86_cpuinit.setup_percpu_clockev = xen_hvm_setup_cpu_clockevents; -+ -+ x86_platform.calibrate_tsc = xen_tsc_khz; -+ x86_platform.get_wallclock = xen_get_wallclock; -+ x86_platform.set_wallclock = xen_set_wallclock; -+} -+#endif -diff --git a/arch/x86/xen/vga.c b/arch/x86/xen/vga.c -new file mode 100644 -index 0000000..1cd7f4d ---- /dev/null -+++ b/arch/x86/xen/vga.c -@@ -0,0 +1,67 @@ -+#include <linux/screen_info.h> -+#include <linux/init.h> -+ -+#include <asm/bootparam.h> -+#include <asm/setup.h> -+ -+#include <xen/interface/xen.h> -+ -+#include "xen-ops.h" -+ -+void __init xen_init_vga(const struct dom0_vga_console_info *info, size_t size) -+{ -+ struct screen_info *screen_info = &boot_params.screen_info; -+ -+ /* This is drawn from a dump from vgacon:startup in -+ * standard Linux. */ -+ screen_info->orig_video_mode = 3; -+ screen_info->orig_video_isVGA = 1; -+ screen_info->orig_video_lines = 25; -+ screen_info->orig_video_cols = 80; -+ screen_info->orig_video_ega_bx = 3; -+ screen_info->orig_video_points = 16; -+ screen_info->orig_y = screen_info->orig_video_lines - 1; -+ -+ switch (info->video_type) { -+ case XEN_VGATYPE_TEXT_MODE_3: -+ if (size < offsetof(struct dom0_vga_console_info, u.text_mode_3) -+ + sizeof(info->u.text_mode_3)) -+ break; -+ screen_info->orig_video_lines = info->u.text_mode_3.rows; -+ screen_info->orig_video_cols = info->u.text_mode_3.columns; -+ screen_info->orig_x = info->u.text_mode_3.cursor_x; -+ screen_info->orig_y = info->u.text_mode_3.cursor_y; -+ screen_info->orig_video_points = -+ info->u.text_mode_3.font_height; -+ break; -+ -+ case XEN_VGATYPE_VESA_LFB: -+ if (size < offsetof(struct dom0_vga_console_info, -+ u.vesa_lfb.gbl_caps)) -+ break; -+ screen_info->orig_video_isVGA = VIDEO_TYPE_VLFB; -+ screen_info->lfb_width = info->u.vesa_lfb.width; -+ screen_info->lfb_height = info->u.vesa_lfb.height; -+ screen_info->lfb_depth = info->u.vesa_lfb.bits_per_pixel; -+ screen_info->lfb_base = info->u.vesa_lfb.lfb_base; -+ screen_info->lfb_size = info->u.vesa_lfb.lfb_size; -+ screen_info->lfb_linelength = info->u.vesa_lfb.bytes_per_line; -+ screen_info->red_size = info->u.vesa_lfb.red_size; -+ screen_info->red_pos = info->u.vesa_lfb.red_pos; -+ screen_info->green_size = info->u.vesa_lfb.green_size; -+ screen_info->green_pos = info->u.vesa_lfb.green_pos; -+ screen_info->blue_size = info->u.vesa_lfb.blue_size; -+ screen_info->blue_pos = info->u.vesa_lfb.blue_pos; -+ screen_info->rsvd_size = info->u.vesa_lfb.rsvd_size; -+ screen_info->rsvd_pos = info->u.vesa_lfb.rsvd_pos; -+ if (size >= offsetof(struct dom0_vga_console_info, -+ u.vesa_lfb.gbl_caps) -+ + sizeof(info->u.vesa_lfb.gbl_caps)) -+ screen_info->capabilities = info->u.vesa_lfb.gbl_caps; -+ if (size >= offsetof(struct dom0_vga_console_info, -+ u.vesa_lfb.mode_attrs) -+ + sizeof(info->u.vesa_lfb.mode_attrs)) -+ screen_info->vesa_attributes = info->u.vesa_lfb.mode_attrs; -+ break; -+ } -+} -diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h -index f9153a3..ebbee21 100644 ---- a/arch/x86/xen/xen-ops.h -+++ b/arch/x86/xen/xen-ops.h -@@ -30,6 +30,10 @@ void xen_setup_machphys_mapping(void); - pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn); - void xen_ident_map_ISA(void); - void xen_reserve_top(void); -+void xen_ident_map_ISA(void); -+extern unsigned long xen_max_p2m_pfn; -+ -+void xen_set_pat(u64); - - char * __init xen_memory_setup(void); - void __init xen_arch_setup(void); -@@ -38,6 +42,10 @@ void xen_enable_sysenter(void); - void xen_enable_syscall(void); - void xen_vcpu_restore(void); - -+void xen_callback_vector(void); -+void xen_hvm_init_shared_info(void); -+void __init xen_unplug_emulated_devices(void); -+ - void __init xen_build_dynamic_phys_to_machine(void); - - void xen_init_irq_ops(void); -@@ -46,11 +54,8 @@ void xen_setup_runstate_info(int cpu); - void xen_teardown_timer(int cpu); - cycle_t xen_clocksource_read(void); - void xen_setup_cpu_clockevents(void); --unsigned long xen_tsc_khz(void); --void __init xen_time_init(void); --unsigned long xen_get_wallclock(void); --int xen_set_wallclock(unsigned long time); --unsigned long long xen_sched_clock(void); -+void __init xen_init_time_ops(void); -+void __init xen_hvm_init_time_ops(void); - - irqreturn_t xen_debug_interrupt(int irq, void *dev_id); - -@@ -82,6 +87,23 @@ static inline void xen_uninit_lock_cpu(int cpu) - } - #endif - -+struct dom0_vga_console_info; -+ -+#ifdef CONFIG_XEN_DOM0 -+void xen_init_vga(const struct dom0_vga_console_info *, size_t size); -+#else -+static inline void xen_init_vga(const struct dom0_vga_console_info *info, -+ size_t size) -+{ -+} -+#endif -+ -+#ifdef CONFIG_XEN_DOM0 -+void xen_init_apic(void); -+#else -+static inline void xen_init_apic(void) {} -+#endif -+ - /* Declare an asm function, along with symbols needed to make it - inlineable */ - #define DECL_ASM(ret, name, ...) \ -@@ -101,4 +123,6 @@ void xen_sysret32(void); - void xen_sysret64(void); - void xen_adjust_exception_frame(void); - -+extern int xen_panic_handler_init(void); -+ - #endif /* XEN_OPS_H */ -diff --git a/block/blk-core.c b/block/blk-core.c -index 71da511..32d305c 100644 ---- a/block/blk-core.c -+++ b/block/blk-core.c -@@ -439,6 +439,7 @@ void blk_put_queue(struct request_queue *q) - { - kobject_put(&q->kobj); - } -+EXPORT_SYMBOL_GPL(blk_put_queue); - - void blk_cleanup_queue(struct request_queue *q) - { -@@ -612,6 +613,7 @@ int blk_get_queue(struct request_queue *q) - - return 1; - } -+EXPORT_SYMBOL_GPL(blk_get_queue); - - static inline void blk_free_request(struct request_queue *q, struct request *rq) - { -diff --git a/drivers/acpi/Makefile b/drivers/acpi/Makefile -index 7702118..1be123c 100644 ---- a/drivers/acpi/Makefile -+++ b/drivers/acpi/Makefile -@@ -61,6 +61,7 @@ obj-$(CONFIG_ACPI_POWER_METER) += power_meter.o - # processor has its own "processor." module_param namespace - processor-y := processor_core.o processor_throttling.o - processor-y += processor_idle.o processor_thermal.o -+processor-y += processor_xen.o - processor-$(CONFIG_CPU_FREQ) += processor_perflib.o - - obj-$(CONFIG_ACPI_PROCESSOR_AGGREGATOR) += acpi_pad.o -diff --git a/drivers/acpi/acpi_memhotplug.c b/drivers/acpi/acpi_memhotplug.c -index 28ccdbc..b0f9ed6 100644 ---- a/drivers/acpi/acpi_memhotplug.c -+++ b/drivers/acpi/acpi_memhotplug.c -@@ -31,6 +31,7 @@ - #include <linux/types.h> - #include <linux/memory_hotplug.h> - #include <acpi/acpi_drivers.h> -+#include <xen/acpi.h> - - #define ACPI_MEMORY_DEVICE_CLASS "memory" - #define ACPI_MEMORY_DEVICE_HID "PNP0C80" -@@ -70,21 +71,6 @@ static struct acpi_driver acpi_memory_device_driver = { - }, - }; - --struct acpi_memory_info { -- struct list_head list; -- u64 start_addr; /* Memory Range start physical addr */ -- u64 length; /* Memory Range length */ -- unsigned short caching; /* memory cache attribute */ -- unsigned short write_protect; /* memory read/write attribute */ -- unsigned int enabled:1; --}; -- --struct acpi_memory_device { -- struct acpi_device * device; -- unsigned int state; /* State of the memory device */ -- struct list_head res_list; --}; -- - static int acpi_hotmem_initialized; - - static acpi_status -@@ -228,6 +214,9 @@ static int acpi_memory_enable_device(struct acpi_memory_device *mem_device) - return result; - } - -+ if (xen_initial_domain()) -+ return xen_hotadd_memory(mem_device); -+ - node = acpi_get_node(mem_device->device->handle); - /* - * Tell the VM there is more memory here... -diff --git a/drivers/acpi/acpica/hwsleep.c b/drivers/acpi/acpica/hwsleep.c -index cc22f9a..747d96f 100644 ---- a/drivers/acpi/acpica/hwsleep.c -+++ b/drivers/acpi/acpica/hwsleep.c -@@ -47,6 +47,9 @@ - #include "actables.h" - #include <linux/tboot.h> - -+#include <xen/acpi.h> -+#include <asm/xen/hypervisor.h> -+ - #define _COMPONENT ACPI_HARDWARE - ACPI_MODULE_NAME("hwsleep") - -@@ -346,6 +349,19 @@ acpi_status asmlinkage acpi_enter_sleep_state(u8 sleep_state) - tboot_sleep(sleep_state, pm1a_control, pm1b_control); - - /* Write #2: Write both SLP_TYP + SLP_EN */ -+ if (xen_pv_acpi()) { -+ int err; -+ -+ err = acpi_notify_hypervisor_state(sleep_state, -+ pm1a_control, pm1b_control); -+ if (err) { -+ ACPI_DEBUG_PRINT((ACPI_DB_INIT, -+ "Hypervisor failure [%d]\n", err)); -+ return_ACPI_STATUS(AE_ERROR); -+ } -+ -+ return_ACPI_STATUS(AE_OK); -+ } - - status = acpi_hw_write_pm1_control(pm1a_control, pm1b_control); - if (ACPI_FAILURE(status)) { -diff --git a/drivers/acpi/processor_core.c b/drivers/acpi/processor_core.c -index 7102474..2428cc0 100644 ---- a/drivers/acpi/processor_core.c -+++ b/drivers/acpi/processor_core.c -@@ -58,6 +58,7 @@ - #include <acpi/acpi_bus.h> - #include <acpi/acpi_drivers.h> - #include <acpi/processor.h> -+#include <xen/acpi.h> - - #define PREFIX "ACPI: " - -@@ -81,11 +82,9 @@ MODULE_DESCRIPTION("ACPI Processor Driver"); - MODULE_LICENSE("GPL"); - - static int acpi_processor_add(struct acpi_device *device); --static int acpi_processor_remove(struct acpi_device *device, int type); - #ifdef CONFIG_ACPI_PROCFS - static int acpi_processor_info_open_fs(struct inode *inode, struct file *file); - #endif --static void acpi_processor_notify(struct acpi_device *device, u32 event); - static acpi_status acpi_processor_hotadd_init(acpi_handle handle, int *p_cpu); - static int acpi_processor_handle_eject(struct acpi_processor *pr); - -@@ -247,7 +246,7 @@ static int acpi_processor_errata_piix4(struct pci_dev *dev) - return 0; - } - --static int acpi_processor_errata(struct acpi_processor *pr) -+int acpi_processor_errata(struct acpi_processor *pr) - { - int result = 0; - struct pci_dev *dev = NULL; -@@ -278,7 +277,7 @@ static int acpi_processor_errata(struct acpi_processor *pr) - * _PDC is required for a BIOS-OS handshake for most of the newer - * ACPI processor features. - */ --static int acpi_processor_set_pdc(struct acpi_processor *pr) -+int acpi_processor_set_pdc(struct acpi_processor *pr) - { - struct acpi_object_list *pdc_in = pr->pdc; - acpi_status status = AE_OK; -@@ -347,7 +346,7 @@ static int acpi_processor_info_open_fs(struct inode *inode, struct file *file) - PDE(inode)->data); - } - --static int acpi_processor_add_fs(struct acpi_device *device) -+int acpi_processor_add_fs(struct acpi_device *device) - { - struct proc_dir_entry *entry = NULL; - -@@ -386,7 +385,7 @@ static int acpi_processor_add_fs(struct acpi_device *device) - return -EIO; - return 0; - } --static int acpi_processor_remove_fs(struct acpi_device *device) -+int acpi_processor_remove_fs(struct acpi_device *device) - { - - if (acpi_device_dir(device)) { -@@ -402,15 +401,6 @@ static int acpi_processor_remove_fs(struct acpi_device *device) - - return 0; - } --#else --static inline int acpi_processor_add_fs(struct acpi_device *device) --{ -- return 0; --} --static inline int acpi_processor_remove_fs(struct acpi_device *device) --{ -- return 0; --} - #endif - - /* Use the acpiid in MADT to map cpus in case of SMP */ -@@ -705,7 +695,7 @@ static int acpi_processor_get_info(struct acpi_device *device) - - static DEFINE_PER_CPU(void *, processor_device_array); - --static void acpi_processor_notify(struct acpi_device *device, u32 event) -+void acpi_processor_notify(struct acpi_device *device, u32 event) - { - struct acpi_processor *pr = acpi_driver_data(device); - int saved; -@@ -873,7 +863,7 @@ err_free_cpumask: - return result; - } - --static int acpi_processor_remove(struct acpi_device *device, int type) -+int acpi_processor_remove(struct acpi_device *device, int type) - { - struct acpi_processor *pr = NULL; - -@@ -1148,7 +1138,11 @@ static int __init acpi_processor_init(void) - if (result < 0) - goto out_proc; - -- result = acpi_bus_register_driver(&acpi_processor_driver); -+ if (xen_initial_domain()) -+ result = xen_acpi_processor_init(); -+ else -+ result = acpi_bus_register_driver(&acpi_processor_driver); -+ - if (result < 0) - goto out_cpuidle; - -@@ -1184,7 +1178,10 @@ static void __exit acpi_processor_exit(void) - - acpi_processor_uninstall_hotplug_notify(); - -- acpi_bus_unregister_driver(&acpi_processor_driver); -+ if (xen_initial_domain()) -+ xen_acpi_processor_exit(); -+ else -+ acpi_bus_unregister_driver(&acpi_processor_driver); - - cpuidle_unregister_driver(&acpi_idle_driver); - -diff --git a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c -index a6ad608..3c32e87 100644 ---- a/drivers/acpi/processor_idle.c -+++ b/drivers/acpi/processor_idle.c -@@ -58,6 +58,7 @@ - - #include <acpi/acpi_bus.h> - #include <acpi/processor.h> -+#include <xen/acpi.h> - #include <asm/processor.h> - - #define PREFIX "ACPI: " -@@ -439,7 +440,8 @@ static int acpi_processor_get_power_info_cst(struct acpi_processor *pr) - cx.entry_method = ACPI_CSTATE_HALT; - snprintf(cx.desc, ACPI_CX_DESC_LEN, "ACPI HLT"); - } else { -- continue; -+ if (!xen_initial_domain()) -+ continue; - } - if (cx.type == ACPI_STATE_C1 && - (idle_halt || idle_nomwait)) { -@@ -477,6 +479,9 @@ static int acpi_processor_get_power_info_cst(struct acpi_processor *pr) - - cx.power = obj->integer.value; - -+ /* cache control methods to notify xen*/ -+ processor_cntl_xen_power_cache(pr->acpi_id, i, reg); -+ - current_count++; - memcpy(&(pr->power.states[current_count]), &cx, sizeof(cx)); - -@@ -653,7 +658,7 @@ static int acpi_processor_power_verify(struct acpi_processor *pr) - return (working); - } - --static int acpi_processor_get_power_info(struct acpi_processor *pr) -+int acpi_processor_get_power_info(struct acpi_processor *pr) - { - unsigned int i; - int result; -@@ -1223,9 +1228,14 @@ int __cpuinit acpi_processor_power_init(struct acpi_processor *pr, - * platforms that only support C1. - */ - if (pr->flags.power) { -- acpi_processor_setup_cpuidle(pr); -- if (cpuidle_register_device(&pr->power.dev)) -- return -EIO; -+ if (xen_initial_domain()) { -+ processor_cntl_xen_notify(pr, -+ PROCESSOR_PM_INIT, PM_TYPE_IDLE); -+ } else { -+ acpi_processor_setup_cpuidle(pr); -+ if (cpuidle_register_device(&pr->power.dev)) -+ return -EIO; -+ } - } - #ifdef CONFIG_ACPI_PROCFS - /* 'power' [R] */ -diff --git a/drivers/acpi/processor_perflib.c b/drivers/acpi/processor_perflib.c -index 40d395e..7ba143d 100644 ---- a/drivers/acpi/processor_perflib.c -+++ b/drivers/acpi/processor_perflib.c -@@ -332,7 +332,7 @@ static int acpi_processor_get_performance_states(struct acpi_processor *pr) - return result; - } - --static int acpi_processor_get_performance_info(struct acpi_processor *pr) -+int acpi_processor_get_performance_info(struct acpi_processor *pr) - { - int result = 0; - acpi_status status = AE_OK; -@@ -438,7 +438,7 @@ int acpi_processor_notify_smm(struct module *calling_module) - - EXPORT_SYMBOL(acpi_processor_notify_smm); - --static int acpi_processor_get_psd(struct acpi_processor *pr) -+int acpi_processor_get_psd(struct acpi_processor *pr) - { - int result = 0; - acpi_status status = AE_OK; -diff --git a/drivers/acpi/processor_xen.c b/drivers/acpi/processor_xen.c -new file mode 100644 -index 0000000..305398d ---- /dev/null -+++ b/drivers/acpi/processor_xen.c -@@ -0,0 +1,651 @@ -+/* -+ * processor_xen.c - ACPI Processor Driver for xen -+ * -+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -+ * -+ * This program is free software; you can redistribute it and/or modify -+ * it under the terms of the GNU General Public License as published by -+ * the Free Software Foundation; either version 2 of the License, or (at -+ * your option) any later version. -+ * -+ * This program is distributed in the hope that it will be useful, but -+ * WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ * General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License along -+ * with this program; if not, write to the Free Software Foundation, Inc., -+ * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. -+ * -+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -+ */ -+ -+#include <linux/kernel.h> -+#include <linux/module.h> -+#include <linux/init.h> -+#include <linux/types.h> -+#include <linux/pci.h> -+#include <linux/pm.h> -+#include <linux/cpufreq.h> -+#include <linux/cpu.h> -+#include <linux/proc_fs.h> -+#include <linux/seq_file.h> -+#include <linux/dmi.h> -+#include <linux/moduleparam.h> -+#include <linux/cpuidle.h> -+#include <linux/acpi.h> -+ -+#include <acpi/acpi_bus.h> -+#include <acpi/acpi_drivers.h> -+#include <acpi/processor.h> -+#include <xen/acpi.h> -+#include <xen/pcpu.h> -+ -+#define PREFIX "ACPI: " -+ -+#define ACPI_PROCESSOR_CLASS "processor" -+#define ACPI_PROCESSOR_DEVICE_NAME "Processor" -+#define ACPI_PROCESSOR_FILE_INFO "info" -+#define ACPI_PROCESSOR_FILE_THROTTLING "throttling" -+#define ACPI_PROCESSOR_FILE_LIMIT "limit" -+#define ACPI_PROCESSOR_NOTIFY_PERFORMANCE 0x80 -+#define ACPI_PROCESSOR_NOTIFY_POWER 0x81 -+#define ACPI_PROCESSOR_NOTIFY_THROTTLING 0x82 -+ -+#define _COMPONENT ACPI_PROCESSOR_COMPONENT -+ACPI_MODULE_NAME("processor_xen"); -+ -+static const struct acpi_device_id processor_device_ids[] = { -+ {ACPI_PROCESSOR_OBJECT_HID, 0}, -+ {"ACPI0007", 0}, -+ {"", 0}, -+}; -+ -+/* -+ * Xen ACPI processor driver -+ */ -+ -+/* from processor_core.c */ -+ -+static int xen_acpi_processor_add(struct acpi_device *device); -+static void xen_acpi_processor_notify(struct acpi_device *device, u32 event); -+ -+struct acpi_driver xen_acpi_processor_driver = { -+ .name = "processor", -+ .class = ACPI_PROCESSOR_CLASS, -+ .ids = processor_device_ids, -+ .ops = { -+ .add = xen_acpi_processor_add, -+ .remove = acpi_processor_remove, -+ .suspend = acpi_processor_suspend, -+ .resume = acpi_processor_resume, -+ .notify = xen_acpi_processor_notify, -+ }, -+}; -+ -+static int is_processor_present(acpi_handle handle) -+{ -+ acpi_status status; -+ unsigned long long sta = 0; -+ -+ -+ status = acpi_evaluate_integer(handle, "_STA", NULL, &sta); -+ -+ if (ACPI_SUCCESS(status) && (sta & ACPI_STA_DEVICE_PRESENT)) -+ return 1; -+ -+ /* -+ * _STA is mandatory for a processor that supports hot plug -+ */ -+ if (status == AE_NOT_FOUND) -+ ACPI_DEBUG_PRINT((ACPI_DB_INFO, -+ "Processor does not support hot plug\n")); -+ else -+ ACPI_EXCEPTION((AE_INFO, status, -+ "Processor Device is not present")); -+ return 0; -+} -+ -+static acpi_status -+xen_acpi_processor_hotadd_init(struct acpi_processor *pr, int *p_cpu) -+{ -+ if (!is_processor_present(pr->handle)) -+ return AE_ERROR; -+ -+ if (processor_cntl_xen_notify(pr, -+ PROCESSOR_HOTPLUG, HOTPLUG_TYPE_ADD)) -+ return AE_ERROR; -+ -+ return AE_OK; -+} -+ -+static int xen_acpi_processor_get_info(struct acpi_device *device) -+{ -+ acpi_status status = 0; -+ union acpi_object object = { 0 }; -+ struct acpi_buffer buffer = { sizeof(union acpi_object), &object }; -+ struct acpi_processor *pr; -+ int cpu_index, device_declaration = 0; -+ static int cpu0_initialized; -+ -+ pr = acpi_driver_data(device); -+ if (!pr) -+ return -EINVAL; -+ -+ if (num_online_cpus() > 1) -+ errata.smp = TRUE; -+ -+ acpi_processor_errata(pr); -+ -+ /* -+ * Check to see if we have bus mastering arbitration control. This -+ * is required for proper C3 usage (to maintain cache coherency). -+ */ -+ if (acpi_gbl_FADT.pm2_control_block && -+ acpi_gbl_FADT.pm2_control_length) { -+ pr->flags.bm_control = 1; -+ ACPI_DEBUG_PRINT((ACPI_DB_INFO, -+ "Bus mastering arbitration control present\n" -+ )); -+ } else -+ ACPI_DEBUG_PRINT((ACPI_DB_INFO, -+ "No bus mastering arbitration control\n")); -+ -+ if (!strcmp(acpi_device_hid(device), ACPI_PROCESSOR_OBJECT_HID)) { -+ /* Declared with "Processor" statement; match ProcessorID */ -+ status = acpi_evaluate_object(pr->handle, NULL, NULL, &buffer); -+ if (ACPI_FAILURE(status)) { -+ printk(KERN_ERR PREFIX "Evaluating processor object\n"); -+ return -ENODEV; -+ } -+ -+ /* -+ * TBD: Synch processor ID (via LAPIC/LSAPIC structures) on SMP. -+ * >>> 'acpi_get_processor_id(acpi_id, &id)' in -+ * arch/xxx/acpi.c -+ */ -+ pr->acpi_id = object.processor.proc_id; -+ } else { -+ /* -+ * Declared with "Device" statement; match _UID. -+ * Note that we don't handle string _UIDs yet. -+ */ -+ unsigned long long value; -+ status = acpi_evaluate_integer(pr->handle, METHOD_NAME__UID, -+ NULL, &value); -+ if (ACPI_FAILURE(status)) { -+ printk(KERN_ERR PREFIX -+ "Evaluating processor _UID [%#x]\n", status); -+ return -ENODEV; -+ } -+ device_declaration = 1; -+ pr->acpi_id = value; -+ } -+ -+ /* TBD: add Xen specific code to query cpu_index */ -+ cpu_index = -1; -+ -+ /* Handle UP system running SMP kernel, with no LAPIC in MADT */ -+ if (!cpu0_initialized && (cpu_index == -1) && -+ (num_online_cpus() == 1)) { -+ cpu_index = 0; -+ } -+ -+ cpu0_initialized = 1; -+ -+ pr->id = cpu_index; -+ -+ /* -+ * Extra Processor objects may be enumerated on MP systems with -+ * less than the max # of CPUs, or Xen vCPU < pCPU. -+ * They should be ignored _iff they are physically not present. -+ * -+ */ -+ if (xen_pcpu_index(pr->acpi_id, 1) == -1) { -+ if (ACPI_FAILURE -+ (xen_acpi_processor_hotadd_init(pr, &pr->id))) { -+ return -ENODEV; -+ } -+ } -+ -+ /* -+ * On some boxes several processors use the same processor bus id. -+ * But they are located in different scope. For example: -+ * \_SB.SCK0.CPU0 -+ * \_SB.SCK1.CPU0 -+ * Rename the processor device bus id. And the new bus id will be -+ * generated as the following format: -+ * CPU+CPU ID. -+ */ -+ sprintf(acpi_device_bid(device), "CPU%X", pr->id); -+ ACPI_DEBUG_PRINT((ACPI_DB_INFO, "Processor [%d:%d]\n", pr->id, -+ pr->acpi_id)); -+ -+ if (!object.processor.pblk_address) -+ ACPI_DEBUG_PRINT((ACPI_DB_INFO, "No PBLK (NULL address)\n")); -+ else if (object.processor.pblk_length != 6) -+ printk(KERN_ERR PREFIX "Invalid PBLK length [%d]\n", -+ object.processor.pblk_length); -+ else { -+ pr->throttling.address = object.processor.pblk_address; -+ pr->throttling.duty_offset = acpi_gbl_FADT.duty_offset; -+ pr->throttling.duty_width = acpi_gbl_FADT.duty_width; -+ -+ pr->pblk = object.processor.pblk_address; -+ -+ /* -+ * We don't care about error returns - we just try to mark -+ * these reserved so that nobody else is confused into thinking -+ * that this region might be unused.. -+ * -+ * (In particular, allocating the IO range for Cardbus) -+ */ -+ request_region(pr->throttling.address, 6, "ACPI CPU throttle"); -+ } -+ -+ /* -+ * If ACPI describes a slot number for this CPU, we can use it -+ * ensure we get the right value in the "physical id" field -+ * of /proc/cpuinfo -+ */ -+ status = acpi_evaluate_object(pr->handle, "_SUN", NULL, &buffer); -+ if (ACPI_SUCCESS(status)) -+ arch_fix_phys_package_id(pr->id, object.integer.value); -+ -+ return 0; -+} -+ -+static struct acpi_device *processor_device_array[XEN_MAX_ACPI_ID + 1]; -+ -+static int __cpuinit xen_acpi_processor_add(struct acpi_device *device) -+{ -+ struct acpi_processor *pr = NULL; -+ int result = 0; -+ struct sys_device *sysdev; -+ -+ pr = kzalloc(sizeof(struct acpi_processor), GFP_KERNEL); -+ if (!pr) -+ return -ENOMEM; -+ -+ if (!zalloc_cpumask_var(&pr->throttling.shared_cpu_map, GFP_KERNEL)) { -+ kfree(pr); -+ return -ENOMEM; -+ } -+ -+ pr->handle = device->handle; -+ strcpy(acpi_device_name(device), ACPI_PROCESSOR_DEVICE_NAME); -+ strcpy(acpi_device_class(device), ACPI_PROCESSOR_CLASS); -+ device->driver_data = pr; -+ -+ result = xen_acpi_processor_get_info(device); -+ if (result) { -+ /* Processor is physically not present */ -+ return 0; -+ } -+ -+ /* -+ * Buggy BIOS check -+ * ACPI id of processors can be reported wrongly by the BIOS. -+ * Don't trust it blindly -+ */ -+ if (pr->acpi_id > XEN_MAX_ACPI_ID || -+ (processor_device_array[pr->acpi_id] != NULL && -+ processor_device_array[pr->acpi_id] != device)) { -+ printk(KERN_WARNING "BIOS reported wrong ACPI id " -+ "for the processor\n"); -+ result = -ENODEV; -+ goto err_free_cpumask; -+ } -+ -+ processor_device_array[pr->acpi_id] = device; -+ -+ if (pr->id != -1) { -+ per_cpu(processors, pr->id) = pr; -+ -+ result = acpi_processor_add_fs(device); -+ if (result) -+ goto err_free_cpumask; -+ -+ sysdev = get_cpu_sysdev(pr->id); -+ if (sysdev != NULL && sysfs_create_link(&device->dev.kobj, -+ &sysdev->kobj, "sysdev")) { -+ result = -EFAULT; -+ goto err_remove_fs; -+ } -+ } -+ -+ /* _PDC call should be done before doing anything else (if reqd.). */ -+ xen_arch_acpi_processor_init_pdc(pr); -+ acpi_processor_set_pdc(pr); -+ arch_acpi_processor_cleanup_pdc(pr); -+ -+#ifdef CONFIG_CPU_FREQ -+ xen_acpi_processor_ppc_has_changed(pr); -+ result = xen_acpi_processor_get_performance(pr); -+ if (result) -+ goto err_remove_fs; -+#endif -+ -+ if (pr->id != -1) { -+ acpi_processor_get_throttling_info(pr); -+ acpi_processor_get_limit_info(pr); -+ } -+ -+ xen_acpi_processor_power_init(pr, device); -+ -+ if (pr->id != -1) { -+ pr->cdev = thermal_cooling_device_register("Processor", device, -+ &processor_cooling_ops); -+ if (IS_ERR(pr->cdev)) { -+ result = PTR_ERR(pr->cdev); -+ goto err_power_exit; -+ } -+ -+ dev_info(&device->dev, "registered as cooling_device%d\n", -+ pr->cdev->id); -+ -+ result = sysfs_create_link(&device->dev.kobj, -+ &pr->cdev->device.kobj, -+ "thermal_cooling"); -+ if (result) { -+ printk(KERN_ERR PREFIX "Create sysfs link\n"); -+ goto err_thermal_unregister; -+ } -+ result = sysfs_create_link(&pr->cdev->device.kobj, -+ &device->dev.kobj, -+ "device"); -+ if (result) { -+ printk(KERN_ERR PREFIX "Create sysfs link\n"); -+ goto err_remove_sysfs; -+ } -+ } -+ -+ return 0; -+ -+err_remove_sysfs: -+ sysfs_remove_link(&device->dev.kobj, "thermal_cooling"); -+err_thermal_unregister: -+ thermal_cooling_device_unregister(pr->cdev); -+err_power_exit: -+ acpi_processor_power_exit(pr, device); -+err_remove_fs: -+ acpi_processor_remove_fs(device); -+err_free_cpumask: -+ free_cpumask_var(pr->throttling.shared_cpu_map); -+ -+ return result; -+} -+ -+static void xen_acpi_processor_notify(struct acpi_device *device, u32 event) -+{ -+ struct acpi_processor *pr = acpi_driver_data(device); -+ int saved; -+ -+ if (!pr) -+ return; -+ -+ switch (event) { -+ case ACPI_PROCESSOR_NOTIFY_PERFORMANCE: -+ saved = pr->performance_platform_limit; -+ xen_acpi_processor_ppc_has_changed(pr); -+ if (saved == pr->performance_platform_limit) -+ break; -+ acpi_bus_generate_proc_event(device, event, -+ pr->performance_platform_limit); -+ acpi_bus_generate_netlink_event(device->pnp.device_class, -+ dev_name(&device->dev), event, -+ pr->performance_platform_limit); -+ break; -+ case ACPI_PROCESSOR_NOTIFY_POWER: -+ xen_acpi_processor_cst_has_changed(pr); -+ acpi_bus_generate_proc_event(device, event, 0); -+ acpi_bus_generate_netlink_event(device->pnp.device_class, -+ dev_name(&device->dev), event, 0); -+ break; -+ case ACPI_PROCESSOR_NOTIFY_THROTTLING: -+ acpi_processor_tstate_has_changed(pr); -+ acpi_bus_generate_proc_event(device, event, 0); -+ acpi_bus_generate_netlink_event(device->pnp.device_class, -+ dev_name(&device->dev), event, 0); -+ default: -+ ACPI_DEBUG_PRINT((ACPI_DB_INFO, -+ "Unsupported event [0x%x]\n", event)); -+ break; -+ } -+ -+ return; -+} -+ -+/* from processor_idle.c */ -+ -+static int xen_acpi_processor_get_power_info(struct acpi_processor *pr) -+{ -+ int ret; -+ int invalid_pr_id = 0; -+ -+ /* -+ * acpi_processor_get_power_info need valid pr->id -+ * so set pr->id=0 temporarily -+ */ -+ if (pr->id == -1) { -+ invalid_pr_id = 1; -+ pr->id = 0; -+ } -+ -+ ret = acpi_processor_get_power_info(pr); -+ -+ if (invalid_pr_id) -+ pr->id = -1; -+ -+ return ret; -+} -+ -+int xen_acpi_processor_cst_has_changed(struct acpi_processor *pr) -+{ -+ if (!pr) -+ return -EINVAL; -+ -+ if (!pr->flags.power_setup_done) -+ return -ENODEV; -+ -+ xen_acpi_processor_get_power_info(pr); -+ -+ processor_cntl_xen_notify(pr, -+ PROCESSOR_PM_CHANGE, PM_TYPE_IDLE); -+ -+ return 0; -+} -+ -+ -+int __cpuinit xen_acpi_processor_power_init(struct acpi_processor *pr, -+ struct acpi_device *device) -+{ -+ acpi_status status = 0; -+ unsigned int i; -+ -+ if (!pr) -+ return -EINVAL; -+ -+ if (acpi_gbl_FADT.cst_control) { -+ status = acpi_os_write_port(acpi_gbl_FADT.smi_command, -+ acpi_gbl_FADT.cst_control, 8); -+ if (ACPI_FAILURE(status)) { -+ ACPI_EXCEPTION((AE_INFO, status, -+ "Notifying BIOS of _CST ability failed")); -+ } -+ } -+ -+ xen_acpi_processor_get_power_info(pr); -+ -+ pr->flags.power_setup_done = 1; -+ -+ if (pr->flags.power) { -+ processor_cntl_xen_notify(pr, -+ PROCESSOR_PM_INIT, PM_TYPE_IDLE); -+ -+ printk(KERN_INFO PREFIX "CPU%d (power states:", pr->id); -+ for (i = 1; i <= pr->power.count; i++) -+ if (pr->power.states[i].valid) -+ printk(" C%d[C%d]", i, -+ pr->power.states[i].type); -+ printk(")\n"); -+ } -+ -+ return 0; -+} -+ -+/* from processor_perflib.c */ -+ -+#ifdef CONFIG_CPU_FREQ -+static int xen_processor_notify_smm(void) -+{ -+ acpi_status status; -+ static int is_done; -+ -+ /* only need successfully notify BIOS once */ -+ /* avoid double notification which may lead to unexpected result */ -+ if (is_done) -+ return 0; -+ -+ /* Can't write pstate_cnt to smi_cmd if either value is zero */ -+ if ((!acpi_gbl_FADT.smi_command) || (!acpi_gbl_FADT.pstate_control)) { -+ ACPI_DEBUG_PRINT((ACPI_DB_INFO, "No SMI port or pstate_cnt\n")); -+ return 0; -+ } -+ -+ ACPI_DEBUG_PRINT((ACPI_DB_INFO, -+ "Writing pstate_cnt [0x%x] to smi_cmd [0x%x]\n", -+ acpi_gbl_FADT.pstate_control, acpi_gbl_FADT.smi_command)); -+ -+ status = acpi_os_write_port(acpi_gbl_FADT.smi_command, -+ (u32) acpi_gbl_FADT.pstate_control, 8); -+ if (ACPI_FAILURE(status)) -+ return status; -+ -+ is_done = 1; -+ -+ return 0; -+} -+ -+static int xen_acpi_processor_get_platform_limit(struct acpi_processor *pr) -+{ -+ acpi_status status = 0; -+ unsigned long long ppc = 0; -+ -+ if (!pr) -+ return -EINVAL; -+ -+ /* -+ * _PPC indicates the maximum state currently supported by the platform -+ * (e.g. 0 = states 0..n; 1 = states 1..n; etc. -+ */ -+ status = acpi_evaluate_integer(pr->handle, "_PPC", NULL, &ppc); -+ -+ if (ACPI_FAILURE(status) && status != AE_NOT_FOUND) { -+ ACPI_EXCEPTION((AE_INFO, status, "Evaluating _PPC")); -+ return -ENODEV; -+ } -+ -+ pr->performance_platform_limit = (int)ppc; -+ -+ return 0; -+} -+ -+int xen_acpi_processor_ppc_has_changed(struct acpi_processor *pr) -+{ -+ int ret; -+ -+ ret = xen_acpi_processor_get_platform_limit(pr); -+ -+ if (ret < 0) -+ return ret; -+ else -+ return processor_cntl_xen_notify(pr, -+ PROCESSOR_PM_CHANGE, PM_TYPE_PERF); -+} -+ -+/* -+ * Existing ACPI module does parse performance states at some point, -+ * when acpi-cpufreq driver is loaded which however is something -+ * we'd like to disable to avoid confliction with xen PM -+ * logic. So we have to collect raw performance information here -+ * when ACPI processor object is found and started. -+ */ -+int xen_acpi_processor_get_performance(struct acpi_processor *pr) -+{ -+ int ret; -+ struct acpi_processor_performance *perf; -+ struct acpi_psd_package *pdomain; -+ -+ if (pr->performance) -+ return -EBUSY; -+ -+ perf = kzalloc(sizeof(struct acpi_processor_performance), GFP_KERNEL); -+ if (!perf) -+ return -ENOMEM; -+ -+ pr->performance = perf; -+ /* Get basic performance state information */ -+ ret = acpi_processor_get_performance_info(pr); -+ if (ret < 0) -+ goto err_out; -+ -+ /* -+ * Well, here we need retrieve performance dependency information -+ * from _PSD object. The reason why existing interface is not used -+ * is due to the reason that existing interface sticks to Linux cpu -+ * id to construct some bitmap, however we want to split ACPI -+ * processor objects from Linux cpu id logic. For example, even -+ * when Linux is configured as UP, we still want to parse all ACPI -+ * processor objects to xen. In this case, it's preferred -+ * to use ACPI ID instead. -+ */ -+ pdomain = &pr->performance->domain_info; -+ pdomain->num_processors = 0; -+ ret = acpi_processor_get_psd(pr); -+ if (ret < 0) { -+ /* -+ * _PSD is optional - assume no coordination if absent (or -+ * broken), matching native kernels' behavior. -+ */ -+ pdomain->num_entries = ACPI_PSD_REV0_ENTRIES; -+ pdomain->revision = ACPI_PSD_REV0_REVISION; -+ pdomain->domain = pr->acpi_id; -+ pdomain->coord_type = DOMAIN_COORD_TYPE_SW_ALL; -+ pdomain->num_processors = 1; -+ } -+ -+ /* Some sanity check */ -+ if ((pdomain->revision != ACPI_PSD_REV0_REVISION) || -+ (pdomain->num_entries != ACPI_PSD_REV0_ENTRIES) || -+ ((pdomain->coord_type != DOMAIN_COORD_TYPE_SW_ALL) && -+ (pdomain->coord_type != DOMAIN_COORD_TYPE_SW_ANY) && -+ (pdomain->coord_type != DOMAIN_COORD_TYPE_HW_ALL))) { -+ ret = -EINVAL; -+ goto err_out; -+ } -+ -+ /* Last step is to notify BIOS that xen exists */ -+ xen_processor_notify_smm(); -+ -+ processor_cntl_xen_notify(pr, PROCESSOR_PM_INIT, PM_TYPE_PERF); -+ -+ return 0; -+err_out: -+ pr->performance = NULL; -+ kfree(perf); -+ return ret; -+} -+#endif /* CONFIG_CPU_FREQ */ -+ -+/* init and exit */ -+ -+int xen_acpi_processor_init(void) -+{ -+ return acpi_bus_register_driver(&xen_acpi_processor_driver); -+} -+ -+void xen_acpi_processor_exit(void) -+{ -+ acpi_bus_unregister_driver(&xen_acpi_processor_driver); -+} -diff --git a/drivers/acpi/sleep.c b/drivers/acpi/sleep.c -index 0458094..85a1308 100644 ---- a/drivers/acpi/sleep.c -+++ b/drivers/acpi/sleep.c -@@ -19,6 +19,8 @@ - - #include <asm/io.h> - -+#include <xen/acpi.h> -+ - #include <acpi/acpi_bus.h> - #include <acpi/acpi_drivers.h> - -@@ -200,6 +202,21 @@ static int acpi_suspend_begin(suspend_state_t pm_state) - return error; - } - -+static void do_suspend(void) -+{ -+ if (!xen_pv_acpi()) { -+ do_suspend_lowlevel(); -+ return; -+ } -+ -+ /* -+ * Xen will save and restore CPU context, so -+ * we can skip that and just go straight to -+ * the suspend. -+ */ -+ acpi_enter_sleep_state(ACPI_STATE_S3); -+} -+ - /** - * acpi_suspend_enter - Actually enter a sleep state. - * @pm_state: ignored -@@ -233,7 +250,7 @@ static int acpi_suspend_enter(suspend_state_t pm_state) - break; - - case ACPI_STATE_S3: -- do_suspend_lowlevel(); -+ do_suspend(); - break; - } - -diff --git a/drivers/block/Kconfig b/drivers/block/Kconfig -index 1d886e0..f4a2b10 100644 ---- a/drivers/block/Kconfig -+++ b/drivers/block/Kconfig -@@ -462,6 +462,7 @@ config XEN_BLKDEV_FRONTEND - tristate "Xen virtual block device support" - depends on XEN - default y -+ select XEN_XENBUS_FRONTEND - help - This driver implements the front-end of the Xen virtual - block device driver. It communicates with a back-end driver -diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c -index b8578bb..9679ffa 100644 ---- a/drivers/block/xen-blkfront.c -+++ b/drivers/block/xen-blkfront.c -@@ -42,10 +42,12 @@ - #include <linux/module.h> - #include <linux/scatterlist.h> - -+#include <xen/xen.h> - #include <xen/xenbus.h> - #include <xen/grant_table.h> - #include <xen/events.h> - #include <xen/page.h> -+#include <xen/platform_pci.h> - - #include <xen/interface/grant_table.h> - #include <xen/interface/io/blkif.h> -@@ -67,7 +69,7 @@ struct blk_shadow { - - static const struct block_device_operations xlvbd_block_fops; - --#define BLK_RING_SIZE __RING_SIZE((struct blkif_sring *)0, PAGE_SIZE) -+#define BLK_RING_SIZE __CONST_RING_SIZE(blkif, PAGE_SIZE) - - /* - * We have one of these per vbd, whether ide, scsi or 'other'. They -@@ -76,6 +78,7 @@ static const struct block_device_operations xlvbd_block_fops; - */ - struct blkfront_info - { -+ struct mutex mutex; - struct xenbus_device *xbdev; - struct gendisk *gd; - int vdevice; -@@ -85,6 +88,7 @@ struct blkfront_info - struct blkif_front_ring ring; - struct scatterlist sg[BLKIF_MAX_SEGMENTS_PER_REQUEST]; - unsigned int evtchn, irq; -+ struct tasklet_struct tasklet; - struct request_queue *rq; - struct work_struct work; - struct gnttab_free_callback callback; -@@ -93,14 +97,12 @@ struct blkfront_info - int feature_barrier; - int is_ready; - -- /** -- * The number of people holding this device open. We won't allow a -- * hot-unplug unless this is 0. -- */ -- int users; -+ spinlock_t io_lock; - }; - --static DEFINE_SPINLOCK(blkif_io_lock); -+static unsigned int nr_minors; -+static unsigned long *minors; -+static DEFINE_SPINLOCK(minor_lock); - - #define MAXIMUM_OUTSTANDING_BLOCK_REQS \ - (BLKIF_MAX_SEGMENTS_PER_REQUEST * BLK_RING_SIZE) -@@ -116,6 +118,10 @@ static DEFINE_SPINLOCK(blkif_io_lock); - #define EXTENDED (1<<EXT_SHIFT) - #define VDEV_IS_EXTENDED(dev) ((dev)&(EXTENDED)) - #define BLKIF_MINOR_EXT(dev) ((dev)&(~EXTENDED)) -+#define EMULATED_HD_DISK_MINOR_OFFSET (0) -+#define EMULATED_HD_DISK_NAME_OFFSET (EMULATED_HD_DISK_MINOR_OFFSET / 256) -+#define EMULATED_SD_DISK_MINOR_OFFSET (EMULATED_HD_DISK_MINOR_OFFSET + (4 * 16)) -+#define EMULATED_SD_DISK_NAME_OFFSET (EMULATED_HD_DISK_NAME_OFFSET + 4) - - #define DEV_NAME "xvd" /* name in /dev */ - -@@ -136,6 +142,55 @@ static void add_id_to_freelist(struct blkfront_info *info, - info->shadow_free = id; - } - -+static int xlbd_reserve_minors(unsigned int minor, unsigned int nr) -+{ -+ unsigned int end = minor + nr; -+ int rc; -+ -+ if (end > nr_minors) { -+ unsigned long *bitmap, *old; -+ -+ bitmap = kzalloc(BITS_TO_LONGS(end) * sizeof(*bitmap), -+ GFP_KERNEL); -+ if (bitmap == NULL) -+ return -ENOMEM; -+ -+ spin_lock(&minor_lock); -+ if (end > nr_minors) { -+ old = minors; -+ memcpy(bitmap, minors, -+ BITS_TO_LONGS(nr_minors) * sizeof(*bitmap)); -+ minors = bitmap; -+ nr_minors = BITS_TO_LONGS(end) * BITS_PER_LONG; -+ } else -+ old = bitmap; -+ spin_unlock(&minor_lock); -+ kfree(old); -+ } -+ -+ spin_lock(&minor_lock); -+ if (find_next_bit(minors, end, minor) >= end) { -+ for (; minor < end; ++minor) -+ __set_bit(minor, minors); -+ rc = 0; -+ } else -+ rc = -EBUSY; -+ spin_unlock(&minor_lock); -+ -+ return rc; -+} -+ -+static void xlbd_release_minors(unsigned int minor, unsigned int nr) -+{ -+ unsigned int end = minor + nr; -+ -+ BUG_ON(end > nr_minors); -+ spin_lock(&minor_lock); -+ for (; minor < end; ++minor) -+ __clear_bit(minor, minors); -+ spin_unlock(&minor_lock); -+} -+ - static void blkif_restart_queue_callback(void *arg) - { - struct blkfront_info *info = (struct blkfront_info *)arg; -@@ -333,11 +388,12 @@ wait: - flush_requests(info); - } - --static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size) -+static int xlvbd_init_blk_queue(struct blkfront_info *info, -+ struct gendisk *gd, u16 sector_size) - { - struct request_queue *rq; - -- rq = blk_init_queue(do_blkif_request, &blkif_io_lock); -+ rq = blk_init_queue(do_blkif_request, &info->io_lock); - if (rq == NULL) - return -1; - -@@ -370,20 +426,84 @@ static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size) - static int xlvbd_barrier(struct blkfront_info *info) - { - int err; -+ const char *barrier; -+ -+ switch (info->feature_barrier) { -+ case QUEUE_ORDERED_DRAIN: barrier = "enabled (drain)"; break; -+ case QUEUE_ORDERED_TAG: barrier = "enabled (tag)"; break; -+ case QUEUE_ORDERED_NONE: barrier = "disabled"; break; -+ default: return -EINVAL; -+ } - -- err = blk_queue_ordered(info->rq, -- info->feature_barrier ? QUEUE_ORDERED_DRAIN : QUEUE_ORDERED_NONE, -- NULL); -+ err = blk_queue_ordered(info->rq, info->feature_barrier, NULL); - - if (err) - return err; - - printk(KERN_INFO "blkfront: %s: barriers %s\n", -- info->gd->disk_name, -- info->feature_barrier ? "enabled" : "disabled"); -+ info->gd->disk_name, barrier); - return 0; - } - -+static int xen_translate_vdev(int vdevice, int *minor, unsigned int *offset) -+{ -+ int major; -+ major = BLKIF_MAJOR(vdevice); -+ *minor = BLKIF_MINOR(vdevice); -+ switch (major) { -+ case XEN_IDE0_MAJOR: -+ *offset = (*minor / 64) + EMULATED_HD_DISK_NAME_OFFSET; -+ *minor = ((*minor / 64) * PARTS_PER_DISK) + -+ EMULATED_HD_DISK_MINOR_OFFSET; -+ break; -+ case XEN_IDE1_MAJOR: -+ *offset = (*minor / 64) + 2 + EMULATED_HD_DISK_NAME_OFFSET; -+ *minor = (((*minor / 64) + 2) * PARTS_PER_DISK) + -+ EMULATED_HD_DISK_MINOR_OFFSET; -+ break; -+ case XEN_SCSI_DISK0_MAJOR: -+ *offset = (*minor / PARTS_PER_DISK) + EMULATED_SD_DISK_NAME_OFFSET; -+ *minor = *minor + EMULATED_SD_DISK_MINOR_OFFSET; -+ break; -+ case XEN_SCSI_DISK1_MAJOR: -+ case XEN_SCSI_DISK2_MAJOR: -+ case XEN_SCSI_DISK3_MAJOR: -+ case XEN_SCSI_DISK4_MAJOR: -+ case XEN_SCSI_DISK5_MAJOR: -+ case XEN_SCSI_DISK6_MAJOR: -+ case XEN_SCSI_DISK7_MAJOR: -+ *offset = (*minor / PARTS_PER_DISK) + -+ ((major - XEN_SCSI_DISK1_MAJOR + 1) * 16) + -+ EMULATED_SD_DISK_NAME_OFFSET; -+ *minor = *minor + -+ ((major - XEN_SCSI_DISK1_MAJOR + 1) * 16 * PARTS_PER_DISK) + -+ EMULATED_SD_DISK_MINOR_OFFSET; -+ break; -+ case XEN_SCSI_DISK8_MAJOR: -+ case XEN_SCSI_DISK9_MAJOR: -+ case XEN_SCSI_DISK10_MAJOR: -+ case XEN_SCSI_DISK11_MAJOR: -+ case XEN_SCSI_DISK12_MAJOR: -+ case XEN_SCSI_DISK13_MAJOR: -+ case XEN_SCSI_DISK14_MAJOR: -+ case XEN_SCSI_DISK15_MAJOR: -+ *offset = (*minor / PARTS_PER_DISK) + -+ ((major - XEN_SCSI_DISK8_MAJOR + 8) * 16) + -+ EMULATED_SD_DISK_NAME_OFFSET; -+ *minor = *minor + -+ ((major - XEN_SCSI_DISK8_MAJOR + 8) * 16 * PARTS_PER_DISK) + -+ EMULATED_SD_DISK_MINOR_OFFSET; -+ break; -+ case XENVBD_MAJOR: -+ *offset = *minor / PARTS_PER_DISK; -+ break; -+ default: -+ printk(KERN_WARNING "blkfront: your disk configuration is " -+ "incorrect, please use an xvd device instead\n"); -+ return -ENODEV; -+ } -+ return 0; -+} - - static int xlvbd_alloc_gendisk(blkif_sector_t capacity, - struct blkfront_info *info, -@@ -391,7 +511,7 @@ static int xlvbd_alloc_gendisk(blkif_sector_t capacity, - { - struct gendisk *gd; - int nr_minors = 1; -- int err = -ENODEV; -+ int err; - unsigned int offset; - int minor; - int nr_parts; -@@ -406,21 +526,33 @@ static int xlvbd_alloc_gendisk(blkif_sector_t capacity, - } - - if (!VDEV_IS_EXTENDED(info->vdevice)) { -- minor = BLKIF_MINOR(info->vdevice); -- nr_parts = PARTS_PER_DISK; -+ err = xen_translate_vdev(info->vdevice, &minor, &offset); -+ if (err) -+ return err; -+ nr_parts = PARTS_PER_DISK; - } else { - minor = BLKIF_MINOR_EXT(info->vdevice); - nr_parts = PARTS_PER_EXT_DISK; -+ offset = minor / nr_parts; -+ if (xen_hvm_domain() && minor >= EMULATED_HD_DISK_MINOR_OFFSET) { -+ printk(KERN_WARNING "blkfront: vdevice 0x%x might conflict with " -+ "emulated IDE and SCSI disks; ignoring", info->vdevice); -+ return -ENODEV; -+ } - } -+ err = -ENODEV; - - if ((minor % nr_parts) == 0) - nr_minors = nr_parts; - -- gd = alloc_disk(nr_minors); -- if (gd == NULL) -+ err = xlbd_reserve_minors(minor, nr_minors); -+ if (err) - goto out; -+ err = -ENODEV; - -- offset = minor / nr_parts; -+ gd = alloc_disk(nr_minors); -+ if (gd == NULL) -+ goto release; - - if (nr_minors > 1) { - if (offset < 26) -@@ -447,16 +579,15 @@ static int xlvbd_alloc_gendisk(blkif_sector_t capacity, - gd->driverfs_dev = &(info->xbdev->dev); - set_capacity(gd, capacity); - -- if (xlvbd_init_blk_queue(gd, sector_size)) { -+ if (xlvbd_init_blk_queue(info, gd, sector_size)) { - del_gendisk(gd); -- goto out; -+ goto release; - } - - info->rq = gd->queue; - info->gd = gd; - -- if (info->feature_barrier) -- xlvbd_barrier(info); -+ xlvbd_barrier(info); - - if (vdisk_info & VDISK_READONLY) - set_disk_ro(gd, 1); -@@ -469,10 +600,45 @@ static int xlvbd_alloc_gendisk(blkif_sector_t capacity, - - return 0; - -+ release: -+ xlbd_release_minors(minor, nr_minors); - out: - return err; - } - -+static void xlvbd_release_gendisk(struct blkfront_info *info) -+{ -+ unsigned int minor, nr_minors; -+ unsigned long flags; -+ -+ if (info->rq == NULL) -+ return; -+ -+ spin_lock_irqsave(&info->io_lock, flags); -+ -+ /* No more blkif_request(). */ -+ blk_stop_queue(info->rq); -+ -+ /* No more gnttab callback work. */ -+ gnttab_cancel_free_callback(&info->callback); -+ spin_unlock_irqrestore(&info->io_lock, flags); -+ -+ /* Flush gnttab callback work. Must be done with no locks held. */ -+ flush_scheduled_work(); -+ -+ del_gendisk(info->gd); -+ -+ minor = info->gd->first_minor; -+ nr_minors = info->gd->minors; -+ xlbd_release_minors(minor, nr_minors); -+ -+ blk_cleanup_queue(info->rq); -+ info->rq = NULL; -+ -+ put_disk(info->gd); -+ info->gd = NULL; -+} -+ - static void kick_pending_request_queues(struct blkfront_info *info) - { - if (!RING_FULL(&info->ring)) { -@@ -487,16 +653,16 @@ static void blkif_restart_queue(struct work_struct *work) - { - struct blkfront_info *info = container_of(work, struct blkfront_info, work); - -- spin_lock_irq(&blkif_io_lock); -+ spin_lock_irq(&info->io_lock); - if (info->connected == BLKIF_STATE_CONNECTED) - kick_pending_request_queues(info); -- spin_unlock_irq(&blkif_io_lock); -+ spin_unlock_irq(&info->io_lock); - } - - static void blkif_free(struct blkfront_info *info, int suspend) - { - /* Prevent new requests being issued until we fix things up. */ -- spin_lock_irq(&blkif_io_lock); -+ spin_lock_irq(&info->io_lock); - info->connected = suspend ? - BLKIF_STATE_SUSPENDED : BLKIF_STATE_DISCONNECTED; - /* No more blkif_request(). */ -@@ -504,7 +670,7 @@ static void blkif_free(struct blkfront_info *info, int suspend) - blk_stop_queue(info->rq); - /* No more gnttab callback work. */ - gnttab_cancel_free_callback(&info->callback); -- spin_unlock_irq(&blkif_io_lock); -+ spin_unlock_irq(&info->io_lock); - - /* Flush gnttab callback work. Must be done with no locks held. */ - flush_scheduled_work(); -@@ -529,21 +695,20 @@ static void blkif_completion(struct blk_shadow *s) - gnttab_end_foreign_access(s->req.seg[i].gref, 0, 0UL); - } - --static irqreturn_t blkif_interrupt(int irq, void *dev_id) -+static void -+blkif_do_interrupt(unsigned long data) - { -+ struct blkfront_info *info = (struct blkfront_info *)data; - struct request *req; - struct blkif_response *bret; - RING_IDX i, rp; - unsigned long flags; -- struct blkfront_info *info = (struct blkfront_info *)dev_id; - int error; - -- spin_lock_irqsave(&blkif_io_lock, flags); -+ spin_lock_irqsave(&info->io_lock, flags); - -- if (unlikely(info->connected != BLKIF_STATE_CONNECTED)) { -- spin_unlock_irqrestore(&blkif_io_lock, flags); -- return IRQ_HANDLED; -- } -+ if (unlikely(info->connected != BLKIF_STATE_CONNECTED)) -+ goto out; - - again: - rp = info->ring.sring->rsp_prod; -@@ -567,7 +732,7 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id) - printk(KERN_WARNING "blkfront: %s: write barrier op failed\n", - info->gd->disk_name); - error = -EOPNOTSUPP; -- info->feature_barrier = 0; -+ info->feature_barrier = QUEUE_ORDERED_NONE; - xlvbd_barrier(info); - } - /* fall through */ -@@ -596,7 +761,17 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id) - - kick_pending_request_queues(info); - -- spin_unlock_irqrestore(&blkif_io_lock, flags); -+out: -+ spin_unlock_irqrestore(&info->io_lock, flags); -+} -+ -+ -+static irqreturn_t -+blkif_interrupt(int irq, void *dev_id) -+{ -+ struct blkfront_info *info = (struct blkfront_info *)dev_id; -+ -+ tasklet_schedule(&info->tasklet); - - return IRQ_HANDLED; - } -@@ -650,7 +825,7 @@ fail: - - - /* Common code used when first setting up, and when resuming. */ --static int talk_to_backend(struct xenbus_device *dev, -+static int talk_to_blkback(struct xenbus_device *dev, - struct blkfront_info *info) - { - const char *message = NULL; -@@ -710,7 +885,6 @@ again: - return err; - } - -- - /** - * Entry point to this code when a new device is created. Allocate the basic - * structures and the ring buffer for communication with the backend, and -@@ -736,16 +910,48 @@ static int blkfront_probe(struct xenbus_device *dev, - } - } - -+ if (xen_hvm_domain()) { -+ char *type; -+ int len; -+ /* no unplug has been done: do not hook devices != xen vbds */ -+ if (xen_platform_pci_unplug & XEN_UNPLUG_UNNECESSARY) { -+ int major; -+ -+ if (!VDEV_IS_EXTENDED(vdevice)) -+ major = BLKIF_MAJOR(vdevice); -+ else -+ major = XENVBD_MAJOR; -+ -+ if (major != XENVBD_MAJOR) { -+ printk(KERN_INFO -+ "%s: HVM does not support vbd %d as xen block device\n", -+ __FUNCTION__, vdevice); -+ return -ENODEV; -+ } -+ } -+ /* do not create a PV cdrom device if we are an HVM guest */ -+ type = xenbus_read(XBT_NIL, dev->nodename, "device-type", &len); -+ if (IS_ERR(type)) -+ return -ENODEV; -+ if (strncmp(type, "cdrom", 5) == 0) { -+ kfree(type); -+ return -ENODEV; -+ } -+ kfree(type); -+ } - info = kzalloc(sizeof(*info), GFP_KERNEL); - if (!info) { - xenbus_dev_fatal(dev, -ENOMEM, "allocating info structure"); - return -ENOMEM; - } - -+ mutex_init(&info->mutex); - info->xbdev = dev; - info->vdevice = vdevice; - info->connected = BLKIF_STATE_DISCONNECTED; - INIT_WORK(&info->work, blkif_restart_queue); -+ spin_lock_init(&info->io_lock); -+ tasklet_init(&info->tasklet, blkif_do_interrupt, (unsigned long)info); - - for (i = 0; i < BLK_RING_SIZE; i++) - info->shadow[i].req.id = i+1; -@@ -755,7 +961,7 @@ static int blkfront_probe(struct xenbus_device *dev, - info->handle = simple_strtoul(strrchr(dev->nodename, '/')+1, NULL, 0); - dev_set_drvdata(&dev->dev, info); - -- err = talk_to_backend(dev, info); -+ err = talk_to_blkback(dev, info); - if (err) { - kfree(info); - dev_set_drvdata(&dev->dev, NULL); -@@ -819,7 +1025,7 @@ static int blkif_recover(struct blkfront_info *info) - - xenbus_switch_state(info->xbdev, XenbusStateConnected); - -- spin_lock_irq(&blkif_io_lock); -+ spin_lock_irq(&info->io_lock); - - /* Now safe for us to use the shared ring */ - info->connected = BLKIF_STATE_CONNECTED; -@@ -830,7 +1036,7 @@ static int blkif_recover(struct blkfront_info *info) - /* Kick any other new requests queued since we resumed */ - kick_pending_request_queues(info); - -- spin_unlock_irq(&blkif_io_lock); -+ spin_unlock_irq(&info->io_lock); - - return 0; - } -@@ -850,13 +1056,50 @@ static int blkfront_resume(struct xenbus_device *dev) - - blkif_free(info, info->connected == BLKIF_STATE_CONNECTED); - -- err = talk_to_backend(dev, info); -+ err = talk_to_blkback(dev, info); - if (info->connected == BLKIF_STATE_SUSPENDED && !err) - err = blkif_recover(info); - - return err; - } - -+static void -+blkfront_closing(struct blkfront_info *info) -+{ -+ struct xenbus_device *xbdev = info->xbdev; -+ struct block_device *bdev = NULL; -+ -+ mutex_lock(&info->mutex); -+ -+ if (xbdev->state == XenbusStateClosing) { -+ mutex_unlock(&info->mutex); -+ return; -+ } -+ -+ if (info->gd) -+ bdev = bdget_disk(info->gd, 0); -+ -+ mutex_unlock(&info->mutex); -+ -+ if (!bdev) { -+ xenbus_frontend_closed(xbdev); -+ return; -+ } -+ -+ mutex_lock(&bdev->bd_mutex); -+ -+ if (bdev->bd_openers) { -+ xenbus_dev_error(xbdev, -EBUSY, -+ "Device in use; refusing to close"); -+ xenbus_switch_state(xbdev, XenbusStateClosing); -+ } else { -+ xlvbd_release_gendisk(info); -+ xenbus_frontend_closed(xbdev); -+ } -+ -+ mutex_unlock(&bdev->bd_mutex); -+ bdput(bdev); -+} - - /* - * Invoked when the backend is finally 'ready' (and has told produced -@@ -868,11 +1111,31 @@ static void blkfront_connect(struct blkfront_info *info) - unsigned long sector_size; - unsigned int binfo; - int err; -- -- if ((info->connected == BLKIF_STATE_CONNECTED) || -- (info->connected == BLKIF_STATE_SUSPENDED) ) -+ int barrier; -+ -+ switch (info->connected) { -+ case BLKIF_STATE_CONNECTED: -+ /* -+ * Potentially, the back-end may be signalling -+ * a capacity change; update the capacity. -+ */ -+ err = xenbus_scanf(XBT_NIL, info->xbdev->otherend, -+ "sectors", "%Lu", §ors); -+ if (XENBUS_EXIST_ERR(err)) -+ return; -+ printk(KERN_INFO "Setting capacity to %Lu\n", -+ sectors); -+ set_capacity(info->gd, sectors); -+ revalidate_disk(info->gd); -+ -+ /* fall through */ -+ case BLKIF_STATE_SUSPENDED: - return; - -+ default: -+ break; -+ } -+ - dev_dbg(&info->xbdev->dev, "%s:%s.\n", - __func__, info->xbdev->otherend); - -@@ -889,10 +1152,26 @@ static void blkfront_connect(struct blkfront_info *info) - } - - err = xenbus_gather(XBT_NIL, info->xbdev->otherend, -- "feature-barrier", "%lu", &info->feature_barrier, -+ "feature-barrier", "%lu", &barrier, - NULL); -+ -+ /* -+ * If there's no "feature-barrier" defined, then it means -+ * we're dealing with a very old backend which writes -+ * synchronously; draining will do what needs to get done. -+ * -+ * If there are barriers, then we can do full queued writes -+ * with tagged barriers. -+ * -+ * If barriers are not supported, then there's no much we can -+ * do, so just set ordering to NONE. -+ */ - if (err) -- info->feature_barrier = 0; -+ info->feature_barrier = QUEUE_ORDERED_DRAIN; -+ else if (barrier) -+ info->feature_barrier = QUEUE_ORDERED_TAG; -+ else -+ info->feature_barrier = QUEUE_ORDERED_NONE; - - err = xlvbd_alloc_gendisk(sectors, info, binfo, sector_size); - if (err) { -@@ -904,10 +1183,10 @@ static void blkfront_connect(struct blkfront_info *info) - xenbus_switch_state(info->xbdev, XenbusStateConnected); - - /* Kick pending requests. */ -- spin_lock_irq(&blkif_io_lock); -+ spin_lock_irq(&info->io_lock); - info->connected = BLKIF_STATE_CONNECTED; - kick_pending_request_queues(info); -- spin_unlock_irq(&blkif_io_lock); -+ spin_unlock_irq(&info->io_lock); - - add_disk(info->gd); - -@@ -915,57 +1194,21 @@ static void blkfront_connect(struct blkfront_info *info) - } - - /** -- * Handle the change of state of the backend to Closing. We must delete our -- * device-layer structures now, to ensure that writes are flushed through to -- * the backend. Once is this done, we can switch to Closed in -- * acknowledgement. -- */ --static void blkfront_closing(struct xenbus_device *dev) --{ -- struct blkfront_info *info = dev_get_drvdata(&dev->dev); -- unsigned long flags; -- -- dev_dbg(&dev->dev, "blkfront_closing: %s removed\n", dev->nodename); -- -- if (info->rq == NULL) -- goto out; -- -- spin_lock_irqsave(&blkif_io_lock, flags); -- -- /* No more blkif_request(). */ -- blk_stop_queue(info->rq); -- -- /* No more gnttab callback work. */ -- gnttab_cancel_free_callback(&info->callback); -- spin_unlock_irqrestore(&blkif_io_lock, flags); -- -- /* Flush gnttab callback work. Must be done with no locks held. */ -- flush_scheduled_work(); -- -- blk_cleanup_queue(info->rq); -- info->rq = NULL; -- -- del_gendisk(info->gd); -- -- out: -- xenbus_frontend_closed(dev); --} -- --/** - * Callback received when the backend's state changes. - */ --static void backend_changed(struct xenbus_device *dev, -+static void blkback_changed(struct xenbus_device *dev, - enum xenbus_state backend_state) - { - struct blkfront_info *info = dev_get_drvdata(&dev->dev); -- struct block_device *bd; - -- dev_dbg(&dev->dev, "blkfront:backend_changed.\n"); -+ dev_dbg(&dev->dev, "blkfront:blkback_changed to state %d.\n", backend_state); - - switch (backend_state) { - case XenbusStateInitialising: - case XenbusStateInitWait: - case XenbusStateInitialised: -+ case XenbusStateReconfiguring: -+ case XenbusStateReconfigured: - case XenbusStateUnknown: - case XenbusStateClosed: - break; -@@ -975,35 +1218,56 @@ static void backend_changed(struct xenbus_device *dev, - break; - - case XenbusStateClosing: -- if (info->gd == NULL) { -- xenbus_frontend_closed(dev); -- break; -- } -- bd = bdget_disk(info->gd, 0); -- if (bd == NULL) -- xenbus_dev_fatal(dev, -ENODEV, "bdget failed"); -- -- mutex_lock(&bd->bd_mutex); -- if (info->users > 0) -- xenbus_dev_error(dev, -EBUSY, -- "Device in use; refusing to close"); -- else -- blkfront_closing(dev); -- mutex_unlock(&bd->bd_mutex); -- bdput(bd); -+ blkfront_closing(info); - break; - } - } - --static int blkfront_remove(struct xenbus_device *dev) -+static int blkfront_remove(struct xenbus_device *xbdev) - { -- struct blkfront_info *info = dev_get_drvdata(&dev->dev); -+ struct blkfront_info *info = dev_get_drvdata(&xbdev->dev); -+ struct block_device *bdev = NULL; -+ struct gendisk *disk; - -- dev_dbg(&dev->dev, "blkfront_remove: %s removed\n", dev->nodename); -+ dev_dbg(&xbdev->dev, "%s removed", xbdev->nodename); - - blkif_free(info, 0); - -- kfree(info); -+ mutex_lock(&info->mutex); -+ -+ disk = info->gd; -+ if (disk) -+ bdev = bdget_disk(disk, 0); -+ -+ info->xbdev = NULL; -+ mutex_unlock(&info->mutex); -+ -+ if (!bdev) { -+ kfree(info); -+ return 0; -+ } -+ -+ /* -+ * The xbdev was removed before we reached the Closed -+ * state. See if it's safe to remove the disk. If the bdev -+ * isn't closed yet, we let release take care of it. -+ */ -+ -+ mutex_lock(&bdev->bd_mutex); -+ info = disk->private_data; -+ -+ dev_warn(disk_to_dev(disk), -+ "%s was hot-unplugged, %d stale handles\n", -+ xbdev->nodename, bdev->bd_openers); -+ -+ if (info && !bdev->bd_openers) { -+ xlvbd_release_gendisk(info); -+ disk->private_data = NULL; -+ kfree(info); -+ } -+ -+ mutex_unlock(&bdev->bd_mutex); -+ bdput(bdev); - - return 0; - } -@@ -1012,30 +1276,68 @@ static int blkfront_is_ready(struct xenbus_device *dev) - { - struct blkfront_info *info = dev_get_drvdata(&dev->dev); - -- return info->is_ready; -+ return info->is_ready && info->xbdev; - } - - static int blkif_open(struct block_device *bdev, fmode_t mode) - { -- struct blkfront_info *info = bdev->bd_disk->private_data; -- info->users++; -- return 0; -+ struct gendisk *disk = bdev->bd_disk; -+ struct blkfront_info *info; -+ int err = 0; -+ -+ info = disk->private_data; -+ if (!info) -+ /* xbdev gone */ -+ return -ERESTARTSYS; -+ -+ mutex_lock(&info->mutex); -+ -+ if (!info->gd) -+ /* xbdev is closed */ -+ err = -ERESTARTSYS; -+ -+ mutex_unlock(&info->mutex); -+ -+ return err; - } - - static int blkif_release(struct gendisk *disk, fmode_t mode) - { - struct blkfront_info *info = disk->private_data; -- info->users--; -- if (info->users == 0) { -- /* Check whether we have been instructed to close. We will -- have ignored this request initially, as the device was -- still mounted. */ -- struct xenbus_device *dev = info->xbdev; -- enum xenbus_state state = xenbus_read_driver_state(dev->otherend); -- -- if (state == XenbusStateClosing && info->is_ready) -- blkfront_closing(dev); -+ struct block_device *bdev; -+ struct xenbus_device *xbdev; -+ -+ bdev = bdget_disk(disk, 0); -+ bdput(bdev); -+ -+ if (bdev->bd_openers) -+ return 0; -+ -+ /* -+ * Check if we have been instructed to close. We will have -+ * deferred this request, because the bdev was still open. -+ */ -+ -+ mutex_lock(&info->mutex); -+ xbdev = info->xbdev; -+ -+ if (xbdev && xbdev->state == XenbusStateClosing) { -+ /* pending switch to state closed */ -+ dev_info(disk_to_dev(bdev->bd_disk), "releasing disk\n"); -+ xlvbd_release_gendisk(info); -+ xenbus_frontend_closed(info->xbdev); - } -+ -+ mutex_unlock(&info->mutex); -+ -+ if (!xbdev) { -+ /* sudden device removal */ -+ dev_info(disk_to_dev(bdev->bd_disk), "releasing disk\n"); -+ xlvbd_release_gendisk(info); -+ disk->private_data = NULL; -+ kfree(info); -+ } -+ - return 0; - } - -@@ -1061,7 +1363,7 @@ static struct xenbus_driver blkfront = { - .probe = blkfront_probe, - .remove = blkfront_remove, - .resume = blkfront_resume, -- .otherend_changed = backend_changed, -+ .otherend_changed = blkback_changed, - .is_ready = blkfront_is_ready, - }; - -diff --git a/drivers/char/agp/amd64-agp.c b/drivers/char/agp/amd64-agp.c -index c496c8a..4064d95 100644 ---- a/drivers/char/agp/amd64-agp.c -+++ b/drivers/char/agp/amd64-agp.c -@@ -18,6 +18,8 @@ - #include <asm/k8.h> - #include <asm/gart.h> - #include "agp.h" -+#include <xen/page.h> -+#include <asm/xen/page.h> - - /* NVIDIA K8 registers */ - #define NVIDIA_X86_64_0_APBASE 0x10 -@@ -78,8 +80,21 @@ static int amd64_insert_memory(struct agp_memory *mem, off_t pg_start, int type) - } - - for (i = 0, j = pg_start; i < mem->page_count; i++, j++) { -+ phys_addr_t phys = page_to_phys(mem->pages[i]); -+ if (xen_pv_domain()) { -+ phys_addr_t xen_phys = PFN_PHYS(pfn_to_mfn( -+ page_to_pfn(mem->pages[i]))); -+ if (phys != xen_phys) { -+ printk(KERN_ERR "Fixing up GART: (0x%lx->0x%lx)." \ -+ " CODE UNTESTED!\n", -+ (unsigned long)phys, -+ (unsigned long)xen_phys); -+ WARN_ON_ONCE(phys != xen_phys); -+ phys = xen_phys; -+ } -+ } - tmp = agp_bridge->driver->mask_memory(agp_bridge, -- page_to_phys(mem->pages[i]), -+ phys, - mask_type); - - BUG_ON(tmp & 0xffffff0000000ffcULL); -@@ -181,6 +196,20 @@ static int amd_8151_configure(void) - unsigned long gatt_bus = virt_to_phys(agp_bridge->gatt_table_real); - int i; - -+ if (xen_pv_domain()) { -+ phys_addr_t xen_phys = PFN_PHYS(pfn_to_mfn( -+ virt_to_pfn(agp_bridge->gatt_table_real))); -+ /* Future thoughts: Perhaps use the gatt_table_bus that -+ * agp_generic_create_gatt_table has setup instead of -+ * doing the virt_to_phys once more? */ -+ if (gatt_bus != xen_phys) { -+ printk(KERN_ERR "Fixing up GATT: (0x%lx->0x%lx)." \ -+ " CODE UNTESTED!\n", gatt_bus, -+ (unsigned long)xen_phys); -+ WARN_ON_ONCE(gatt_bus != xen_phys); -+ gatt_bus = xen_phys; -+ } -+ } - /* Configure AGP regs in each x86-64 host bridge. */ - for (i = 0; i < num_k8_northbridges; i++) { - agp_bridge->gart_bus_addr = -diff --git a/drivers/char/agp/backend.c b/drivers/char/agp/backend.c -index a56ca08..30fc4b6 100644 ---- a/drivers/char/agp/backend.c -+++ b/drivers/char/agp/backend.c -@@ -38,6 +38,8 @@ - #include <linux/vmalloc.h> - #include <asm/io.h> - #include "agp.h" -+#include <xen/page.h> -+#include <asm/xen/page.h> - - /* Due to XFree86 brain-damage, we can't go to 1.0 until they - * fix some real stupidity. It's only by chance we can bump -@@ -160,8 +162,13 @@ static int agp_backend_initialize(struct agp_bridge_data *bridge) - } - } else { - bridge->scratch_page_dma = page_to_phys(page); -+ if (xen_pv_domain()) { -+ phys_addr_t xen_phys = PFN_PHYS(pfn_to_mfn( -+ page_to_pfn(page))); -+ if (bridge->scratch_page_dma != xen_phys) -+ bridge->scratch_page_dma = xen_phys; -+ } - } -- - bridge->scratch_page = bridge->driver->mask_memory(bridge, - bridge->scratch_page_dma, 0); - } -diff --git a/drivers/char/agp/generic.c b/drivers/char/agp/generic.c -index c505439..2434c91 100644 ---- a/drivers/char/agp/generic.c -+++ b/drivers/char/agp/generic.c -@@ -42,6 +42,8 @@ - #include <asm/cacheflush.h> - #include <asm/pgtable.h> - #include "agp.h" -+#include <xen/page.h> -+#include <asm/xen/page.h> - - __u32 *agp_gatt_table; - int agp_memory_reserved; -@@ -1002,6 +1004,14 @@ int agp_generic_create_gatt_table(struct agp_bridge_data *bridge) - return -ENOMEM; - } - bridge->gatt_bus_addr = virt_to_phys(bridge->gatt_table_real); -+ /* KRW: virt_to_phys under Xen is not safe. */ -+ if (xen_pv_domain()) { -+ /* Use back-door to get the "real" PFN. */ -+ phys_addr_t pfn = virt_to_pfn(bridge->gatt_table_real); -+ phys_addr_t xen_phys = PFN_PHYS(pfn_to_mfn(pfn)); -+ if (bridge->gatt_bus_addr != xen_phys) -+ bridge->gatt_bus_addr = xen_phys; -+ } - - /* AK: bogus, should encode addresses > 4GB */ - for (i = 0; i < num_entries; i++) { -@@ -1141,8 +1151,17 @@ int agp_generic_insert_memory(struct agp_memory * mem, off_t pg_start, int type) - } - - for (i = 0, j = pg_start; i < mem->page_count; i++, j++) { -+ phys_addr_t phys = page_to_phys(mem->pages[i]); -+ -+ /* HACK: Via a back-door we get the bus address. */ -+ if (xen_pv_domain()) { -+ phys_addr_t xen_phys = PFN_PHYS(pfn_to_mfn( -+ page_to_pfn(mem->pages[i]))); -+ if (phys != xen_phys) -+ phys = xen_phys; -+ } - writel(bridge->driver->mask_memory(bridge, -- page_to_phys(mem->pages[i]), -+ phys, - mask_type), - bridge->gatt_table+j); - } -@@ -1235,7 +1254,16 @@ int agp_generic_alloc_pages(struct agp_bridge_data *bridge, struct agp_memory *m - int i, ret = -ENOMEM; - - for (i = 0; i < num_pages; i++) { -- page = alloc_page(GFP_KERNEL | GFP_DMA32 | __GFP_ZERO); -+ if (xen_pv_domain()) { -+ void *addr; -+ dma_addr_t _d; -+ -+ addr = dma_alloc_coherent(NULL, PAGE_SIZE, &_d, GFP_KERNEL); -+ if (!addr) -+ goto out; -+ page = virt_to_page(addr); -+ } else -+ page = alloc_page(GFP_KERNEL | GFP_DMA32 | __GFP_ZERO); - /* agp_free_memory() needs gart address */ - if (page == NULL) - goto out; -@@ -1263,7 +1291,17 @@ struct page *agp_generic_alloc_page(struct agp_bridge_data *bridge) - { - struct page * page; - -- page = alloc_page(GFP_KERNEL | GFP_DMA32 | __GFP_ZERO); -+ if (xen_pv_domain()) { -+ void *addr; -+ dma_addr_t _d; -+ -+ addr = dma_alloc_coherent(NULL, PAGE_SIZE, &_d, GFP_KERNEL); -+ if (!addr) -+ return NULL; -+ page = virt_to_page(addr); -+ } else -+ page = alloc_page(GFP_KERNEL | GFP_DMA32 | __GFP_ZERO); -+ - if (page == NULL) - return NULL; - -@@ -1294,7 +1332,12 @@ void agp_generic_destroy_pages(struct agp_memory *mem) - unmap_page_from_agp(page); - #endif - put_page(page); -- __free_page(page); -+ if (xen_pv_domain()) { -+ void *addr = page_address(page); -+ dma_free_coherent(NULL, PAGE_SIZE, addr, -+ virt_to_bus(addr)); -+ } else -+ __free_page(page); - atomic_dec(&agp_bridge->current_memory_agp); - mem->pages[i] = NULL; - } -@@ -1311,7 +1354,12 @@ void agp_generic_destroy_page(struct page *page, int flags) - - if (flags & AGP_PAGE_DESTROY_FREE) { - put_page(page); -- __free_page(page); -+ if (xen_pv_domain()) { -+ void *addr = page_address(page); -+ dma_free_coherent(NULL, PAGE_SIZE, addr, -+ virt_to_bus(addr)); -+ } else -+ __free_page(page); - atomic_dec(&agp_bridge->current_memory_agp); - } - } -diff --git a/drivers/char/agp/intel-agp.c b/drivers/char/agp/intel-agp.c -index b8e0219..7a62c3c 100644 ---- a/drivers/char/agp/intel-agp.c -+++ b/drivers/char/agp/intel-agp.c -@@ -10,14 +10,20 @@ - #include <linux/agp_backend.h> - #include <asm/smp.h> - #include "agp.h" -+#include <xen/page.h> -+#include <asm/xen/page.h> - - /* - * If we have Intel graphics, we're not going to have anything other than - * an Intel IOMMU. So make the correct use of the PCI DMA API contingent - * on the Intel IOMMU support (CONFIG_DMAR). - * Only newer chipsets need to bother with this, of course. -+ * -+ * Xen guests accessing graphics hardware also need proper translation -+ * between pseudo-physical addresses and real machine addresses, which -+ * is also achieved by using the DMA API. - */ --#ifdef CONFIG_DMAR -+#if defined(CONFIG_DMAR) || defined(CONFIG_XEN) - #define USE_PCI_DMA_API 1 - #endif - -@@ -296,8 +302,20 @@ static void intel_agp_insert_sg_entries(struct agp_memory *mem, - int i, j; - - for (i = 0, j = pg_start; i < mem->page_count; i++, j++) { -+ phys_addr_t phys = page_to_phys(mem->pages[i]); -+ if (xen_pv_domain()) { -+ phys_addr_t xen_phys = PFN_PHYS(pfn_to_mfn( -+ page_to_pfn(mem->pages[i]))); -+ if (xen_phys != phys) { -+ printk(KERN_ERR "Compile kernel with " \ -+ "CONFIG_DMAR to get rid of this " \ -+ "warning!\n"); -+ WARN_ON_ONCE(xen_phys != phys); -+ /* Fixup: */ -+ phys = xen_phys; -+ } - writel(agp_bridge->driver->mask_memory(agp_bridge, -- page_to_phys(mem->pages[i]), mask_type), -+ phys, mask_type), - intel_private.gtt+j); - } - -@@ -395,15 +413,19 @@ static void intel_i810_agp_enable(struct agp_bridge_data *bridge, u32 mode) - /* Exists to support ARGB cursors */ - static struct page *i8xx_alloc_pages(void) - { -+ void *addr; -+ dma_addr_t _d; - struct page *page; - -- page = alloc_pages(GFP_KERNEL | GFP_DMA32, 2); -- if (page == NULL) -+ addr = dma_alloc_coherent(NULL, 4 * PAGE_SIZE, &_d, GFP_KERNEL); -+ if (addr == NULL) - return NULL; - -+ page = virt_to_page(addr); -+ - if (set_pages_uc(page, 4) < 0) { - set_pages_wb(page, 4); -- __free_pages(page, 2); -+ dma_free_coherent(NULL, 4 * PAGE_SIZE, addr, _d); - return NULL; - } - get_page(page); -@@ -413,12 +435,17 @@ static struct page *i8xx_alloc_pages(void) - - static void i8xx_destroy_pages(struct page *page) - { -+ void *addr; -+ - if (page == NULL) - return; - - set_pages_wb(page, 4); - put_page(page); -- __free_pages(page, 2); -+ -+ addr = page_address(page); -+ -+ dma_free_coherent(NULL, 4 * PAGE_SIZE, addr, virt_to_bus(addr)); - atomic_dec(&agp_bridge->current_memory_agp); - } - -@@ -478,8 +505,16 @@ static int intel_i810_insert_entries(struct agp_memory *mem, off_t pg_start, - if (!mem->is_flushed) - global_cache_flush(); - for (i = 0, j = pg_start; i < mem->page_count; i++, j++) { -+ phys_addr_t phys = page_to_phys(mem->pages[i]); -+ if (xen_pv_domain()) { -+ phys_addr_t xen_phys = PFN_PHYS(pfn_to_mfn( -+ page_to_pfn(mem->pages[i]))); -+ /* Fixup: */ -+ if (xen_phys != phys) -+ phys = xen_phys; -+ } - writel(agp_bridge->driver->mask_memory(agp_bridge, -- page_to_phys(mem->pages[i]), mask_type), -+ phys, mask_type), - intel_private.registers+I810_PTE_BASE+(j*4)); - } - readl(intel_private.registers+I810_PTE_BASE+((j-1)*4)); -@@ -552,6 +587,12 @@ static struct agp_memory *alloc_agpphysmem_i8xx(size_t pg_count, int type) - new->num_scratch_pages = pg_count; - new->type = AGP_PHYS_MEMORY; - new->physical = page_to_phys(new->pages[0]); -+ if (xen_pv_domain()) { -+ phys_addr_t xen_phys = PFN_PHYS(pfn_to_mfn( -+ page_to_pfn(new->pages[0]))); -+ if (xen_phys != new->physical) -+ new->physical = xen_phys; -+ } - return new; - } - -@@ -992,8 +1033,16 @@ static int intel_i830_insert_entries(struct agp_memory *mem, off_t pg_start, - global_cache_flush(); - - for (i = 0, j = pg_start; i < mem->page_count; i++, j++) { -+ phys_addr_t phys = page_to_phys(mem->pages[i]); -+ if (xen_pv_domain()) { -+ phys_addr_t xen_phys = PFN_PHYS(pfn_to_mfn( -+ page_to_pfn(mem->pages[i]))); -+ /* Fixup: */ -+ if (xen_phys != phys) -+ phys = xen_phys; -+ } - writel(agp_bridge->driver->mask_memory(agp_bridge, -- page_to_phys(mem->pages[i]), mask_type), -+ phys, mask_type), - intel_private.registers+I810_PTE_BASE+(j*4)); - } - readl(intel_private.registers+I810_PTE_BASE+((j-1)*4)); -diff --git a/drivers/char/hvc_xen.c b/drivers/char/hvc_xen.c -index a6ee32b..a7c6529 100644 ---- a/drivers/char/hvc_xen.c -+++ b/drivers/char/hvc_xen.c -@@ -25,6 +25,8 @@ - #include <linux/types.h> - - #include <asm/xen/hypervisor.h> -+ -+#include <xen/xen.h> - #include <xen/page.h> - #include <xen/events.h> - #include <xen/interface/io/console.h> -@@ -72,11 +74,12 @@ static int __write_console(const char *data, int len) - wmb(); /* write ring before updating pointer */ - intf->out_prod = prod; - -- notify_daemon(); -+ if (sent) -+ notify_daemon(); - return sent; - } - --static int write_console(uint32_t vtermno, const char *data, int len) -+static int domU_write_console(uint32_t vtermno, const char *data, int len) - { - int ret = len; - -@@ -99,7 +102,7 @@ static int write_console(uint32_t vtermno, const char *data, int len) - return ret; - } - --static int read_console(uint32_t vtermno, char *buf, int len) -+static int domU_read_console(uint32_t vtermno, char *buf, int len) - { - struct xencons_interface *intf = xencons_interface(); - XENCONS_RING_IDX cons, prod; -@@ -120,28 +123,63 @@ static int read_console(uint32_t vtermno, char *buf, int len) - return recv; - } - --static struct hv_ops hvc_ops = { -- .get_chars = read_console, -- .put_chars = write_console, -+static struct hv_ops domU_hvc_ops = { -+ .get_chars = domU_read_console, -+ .put_chars = domU_write_console, - .notifier_add = notifier_add_irq, - .notifier_del = notifier_del_irq, - .notifier_hangup = notifier_hangup_irq, - }; - --static int __init xen_init(void) -+static int dom0_read_console(uint32_t vtermno, char *buf, int len) -+{ -+ return HYPERVISOR_console_io(CONSOLEIO_read, len, buf); -+} -+ -+/* -+ * Either for a dom0 to write to the system console, or a domU with a -+ * debug version of Xen -+ */ -+static int dom0_write_console(uint32_t vtermno, const char *str, int len) -+{ -+ int rc = HYPERVISOR_console_io(CONSOLEIO_write, len, (char *)str); -+ if (rc < 0) -+ return 0; -+ -+ return len; -+} -+ -+static struct hv_ops dom0_hvc_ops = { -+ .get_chars = dom0_read_console, -+ .put_chars = dom0_write_console, -+ .notifier_add = notifier_add_irq, -+ .notifier_del = notifier_del_irq, -+ .notifier_hangup = notifier_hangup_irq, -+}; -+ -+static int __init xen_hvc_init(void) - { - struct hvc_struct *hp; -+ struct hv_ops *ops; - -- if (!xen_pv_domain() || -- xen_initial_domain() || -- !xen_start_info->console.domU.evtchn) -+ if (!xen_pv_domain()) - return -ENODEV; - -- xencons_irq = bind_evtchn_to_irq(xen_start_info->console.domU.evtchn); -+ if (xen_initial_domain()) { -+ ops = &dom0_hvc_ops; -+ xencons_irq = bind_virq_to_irq(VIRQ_CONSOLE, 0); -+ } else { -+ if (!xen_start_info->console.domU.evtchn) -+ return -ENODEV; -+ -+ ops = &domU_hvc_ops; -+ xencons_irq = bind_evtchn_to_irq(xen_start_info->console.domU.evtchn); -+ } -+ - if (xencons_irq < 0) - xencons_irq = 0; /* NO_IRQ */ - -- hp = hvc_alloc(HVC_COOKIE, xencons_irq, &hvc_ops, 256); -+ hp = hvc_alloc(HVC_COOKIE, xencons_irq, ops, 256); - if (IS_ERR(hp)) - return PTR_ERR(hp); - -@@ -158,7 +196,7 @@ void xen_console_resume(void) - rebind_evtchn_irq(xen_start_info->console.domU.evtchn, xencons_irq); - } - --static void __exit xen_fini(void) -+static void __exit xen_hvc_fini(void) - { - if (hvc) - hvc_remove(hvc); -@@ -166,29 +204,24 @@ static void __exit xen_fini(void) - - static int xen_cons_init(void) - { -+ struct hv_ops *ops; -+ - if (!xen_pv_domain()) - return 0; - -- hvc_instantiate(HVC_COOKIE, 0, &hvc_ops); -+ ops = &domU_hvc_ops; -+ if (xen_initial_domain()) -+ ops = &dom0_hvc_ops; -+ -+ hvc_instantiate(HVC_COOKIE, 0, ops); -+ - return 0; - } - --module_init(xen_init); --module_exit(xen_fini); -+module_init(xen_hvc_init); -+module_exit(xen_hvc_fini); - console_initcall(xen_cons_init); - --static void raw_console_write(const char *str, int len) --{ -- while(len > 0) { -- int rc = HYPERVISOR_console_io(CONSOLEIO_write, len, (char *)str); -- if (rc <= 0) -- break; -- -- str += rc; -- len -= rc; -- } --} -- - #ifdef CONFIG_EARLY_PRINTK - static void xenboot_write_console(struct console *console, const char *string, - unsigned len) -@@ -196,19 +229,22 @@ static void xenboot_write_console(struct console *console, const char *string, - unsigned int linelen, off = 0; - const char *pos; - -- raw_console_write(string, len); -+ dom0_write_console(0, string, len); -+ -+ if (xen_initial_domain()) -+ return; - -- write_console(0, "(early) ", 8); -+ domU_write_console(0, "(early) ", 8); - while (off < len && NULL != (pos = strchr(string+off, '\n'))) { - linelen = pos-string+off; - if (off + linelen > len) - break; -- write_console(0, string+off, linelen); -- write_console(0, "\r\n", 2); -+ domU_write_console(0, string+off, linelen); -+ domU_write_console(0, "\r\n", 2); - off += linelen + 1; - } - if (off < len) -- write_console(0, string+off, len-off); -+ domU_write_console(0, string+off, len-off); - } - - struct console xenboot_console = { -@@ -220,7 +256,7 @@ struct console xenboot_console = { - - void xen_raw_console_write(const char *str) - { -- raw_console_write(str, strlen(str)); -+ dom0_write_console(0, str, strlen(str)); - } - - void xen_raw_printk(const char *fmt, ...) -diff --git a/drivers/firewire/net.c b/drivers/firewire/net.c -index cbaf420..163459d 100644 ---- a/drivers/firewire/net.c -+++ b/drivers/firewire/net.c -@@ -8,7 +8,6 @@ - - #include <linux/bug.h> - #include <linux/device.h> --#include <linux/ethtool.h> - #include <linux/firewire.h> - #include <linux/firewire-constants.h> - #include <linux/highmem.h> -@@ -1333,17 +1332,6 @@ static int fwnet_change_mtu(struct net_device *net, int new_mtu) - return 0; - } - --static void fwnet_get_drvinfo(struct net_device *net, -- struct ethtool_drvinfo *info) --{ -- strcpy(info->driver, KBUILD_MODNAME); -- strcpy(info->bus_info, "ieee1394"); --} -- --static const struct ethtool_ops fwnet_ethtool_ops = { -- .get_drvinfo = fwnet_get_drvinfo, --}; -- - static const struct net_device_ops fwnet_netdev_ops = { - .ndo_open = fwnet_open, - .ndo_stop = fwnet_stop, -@@ -1362,7 +1350,6 @@ static void fwnet_init_dev(struct net_device *net) - net->hard_header_len = FWNET_HLEN; - net->type = ARPHRD_IEEE1394; - net->tx_queue_len = 10; -- SET_ETHTOOL_OPS(net, &fwnet_ethtool_ops); - } - - /* caller must hold fwnet_device_mutex */ -diff --git a/drivers/gpu/drm/drm_drv.c b/drivers/gpu/drm/drm_drv.c -index 0e27d98..f5e2572 100644 ---- a/drivers/gpu/drm/drm_drv.c -+++ b/drivers/gpu/drm/drm_drv.c -@@ -201,7 +201,7 @@ int drm_lastclose(struct drm_device * dev) - } - if (drm_core_check_feature(dev, DRIVER_SG) && dev->sg && - !drm_core_check_feature(dev, DRIVER_MODESET)) { -- drm_sg_cleanup(dev->sg); -+ drm_sg_cleanup(dev, dev->sg); - dev->sg = NULL; - } - -diff --git a/drivers/gpu/drm/drm_gem.c b/drivers/gpu/drm/drm_gem.c -index 8bf3770..dde5f66 100644 ---- a/drivers/gpu/drm/drm_gem.c -+++ b/drivers/gpu/drm/drm_gem.c -@@ -539,7 +539,7 @@ int drm_gem_mmap(struct file *filp, struct vm_area_struct *vma) - vma->vm_flags |= VM_RESERVED | VM_IO | VM_PFNMAP | VM_DONTEXPAND; - vma->vm_ops = obj->dev->driver->gem_vm_ops; - vma->vm_private_data = map->handle; -- vma->vm_page_prot = pgprot_writecombine(vm_get_page_prot(vma->vm_flags)); -+ vma->vm_page_prot = pgprot_writecombine(vm_get_page_prot(vma->vm_flags)); - - /* Take a ref for this mapping of the object, so that the fault - * handler can dereference the mmap offset's pointer to the object. -diff --git a/drivers/gpu/drm/drm_scatter.c b/drivers/gpu/drm/drm_scatter.c -index c7823c8..95ffb8a 100644 ---- a/drivers/gpu/drm/drm_scatter.c -+++ b/drivers/gpu/drm/drm_scatter.c -@@ -32,20 +32,73 @@ - */ - - #include <linux/vmalloc.h> -+#include <linux/mm.h> - #include "drmP.h" - - #define DEBUG_SCATTER 0 - --static inline void *drm_vmalloc_dma(unsigned long size) -+static void *drm_vmalloc_dma(struct drm_device *drmdev, unsigned long size) - { - #if defined(__powerpc__) && defined(CONFIG_NOT_COHERENT_CACHE) - return __vmalloc(size, GFP_KERNEL, PAGE_KERNEL | _PAGE_NO_CACHE); - #else -- return vmalloc_32(size); -+ struct device *dev = &drmdev->pdev->dev; -+ struct page **pages; -+ void *addr; -+ const int npages = PFN_UP(size); -+ int i; -+ -+ pages = kmalloc(npages * sizeof(*pages), GFP_KERNEL); -+ if (!pages) -+ goto fail; -+ -+ for (i = 0; i < npages; i++) { -+ dma_addr_t phys; -+ void *addr; -+ addr = dma_alloc_coherent(dev, PAGE_SIZE, &phys, GFP_KERNEL); -+ if (addr == NULL) -+ goto out_free_pages; -+ -+ pages[i] = virt_to_page(addr); -+ } -+ -+ addr = vmap(pages, npages, VM_MAP | VM_IOREMAP, PAGE_KERNEL); -+ -+ kfree(pages); -+ -+ return addr; -+ -+out_free_pages: -+ while (i > 0) { -+ void *addr = page_address(pages[--i]); -+ dma_free_coherent(dev, PAGE_SIZE, addr, virt_to_bus(addr)); -+ } -+ -+ kfree(pages); -+ -+fail: -+ return NULL; -+#endif -+} -+ -+static void drm_vfree_dma(struct drm_device *drmdev, void *addr, int npages, -+ struct page **pages) -+{ -+#if defined(__powerpc__) && defined(CONFIG_NOT_COHERENT_CACHE) -+ vfree(addr); -+#else -+ struct device *dev = &drmdev->pdev->dev; -+ int i; -+ -+ for (i = 0; i < npages; i++) { -+ void *addr = page_address(pages[i]); -+ dma_free_coherent(dev, PAGE_SIZE, addr, virt_to_bus(addr)); -+ } -+ vunmap(addr); - #endif - } - --void drm_sg_cleanup(struct drm_sg_mem * entry) -+void drm_sg_cleanup(struct drm_device *drmdev, struct drm_sg_mem * entry) - { - struct page *page; - int i; -@@ -56,7 +109,7 @@ void drm_sg_cleanup(struct drm_sg_mem * entry) - ClearPageReserved(page); - } - -- vfree(entry->virtual); -+ drm_vfree_dma(drmdev, entry->virtual, entry->pages, entry->pagelist); - - kfree(entry->busaddr); - kfree(entry->pagelist); -@@ -107,7 +160,7 @@ int drm_sg_alloc(struct drm_device *dev, struct drm_scatter_gather * request) - } - memset((void *)entry->busaddr, 0, pages * sizeof(*entry->busaddr)); - -- entry->virtual = drm_vmalloc_dma(pages << PAGE_SHIFT); -+ entry->virtual = drm_vmalloc_dma(dev, pages << PAGE_SHIFT); - if (!entry->virtual) { - kfree(entry->busaddr); - kfree(entry->pagelist); -@@ -180,7 +233,7 @@ int drm_sg_alloc(struct drm_device *dev, struct drm_scatter_gather * request) - return 0; - - failed: -- drm_sg_cleanup(entry); -+ drm_sg_cleanup(dev, entry); - return -ENOMEM; - } - EXPORT_SYMBOL(drm_sg_alloc); -@@ -212,7 +265,7 @@ int drm_sg_free(struct drm_device *dev, void *data, - - DRM_DEBUG("virtual = %p\n", entry->virtual); - -- drm_sg_cleanup(entry); -+ drm_sg_cleanup(dev, entry); - - return 0; - } -diff --git a/drivers/gpu/drm/ttm/ttm_bo_vm.c b/drivers/gpu/drm/ttm/ttm_bo_vm.c -index 1c040d0..e3555bf 100644 ---- a/drivers/gpu/drm/ttm/ttm_bo_vm.c -+++ b/drivers/gpu/drm/ttm/ttm_bo_vm.c -@@ -87,6 +87,9 @@ static int ttm_bo_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf) - bool is_iomem; - unsigned long address = (unsigned long)vmf->virtual_address; - int retval = VM_FAULT_NOPAGE; -+ bool vm_io = (vma->vm_flags & VM_IO) && VM_IO; -+ bool pte_iomap = (pgprot_val(vma->vm_page_prot) & _PAGE_IOMAP) -+ && _PAGE_IOMAP; - - /* - * Work around locking order reversal in fault / nopfn -@@ -158,11 +161,30 @@ static int ttm_bo_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf) - if (is_iomem) { - vma->vm_page_prot = ttm_io_prot(bo->mem.placement, - vma->vm_page_prot); -+ if (!vm_io || !pte_iomap) { -+ vma->vm_flags |= VM_IO; -+ pgprot_val(vma->vm_page_prot) |= _PAGE_IOMAP; -+ } - } else { - ttm = bo->ttm; - vma->vm_page_prot = (bo->mem.placement & TTM_PL_FLAG_CACHED) ? - vm_get_page_prot(vma->vm_flags) : - ttm_io_prot(bo->mem.placement, vma->vm_page_prot); -+ /* -+ * During PCI suspend the graphic cards purge their VRAM and -+ * move their graphic objects to the TT. They also unmap all -+ * of the objects, meaning that when an user application is -+ * unfrozen it will re-fault and call here. -+ * -+ * What this means is that the VMA for the graphic object might -+ * have been set for VRAM TTM but now it is with the TT -+ * (normal RAM) meaning that the vma->vm_flags could be -+ * inappropiate (say, VM_IO on TT - no good). -+ */ -+ if (vm_io || pte_iomap) { -+ vma->vm_flags &= ~VM_IO; -+ pgprot_val(vma->vm_page_prot) &= ~_PAGE_IOMAP; -+ } - } - - /* -@@ -239,6 +261,7 @@ int ttm_bo_mmap(struct file *filp, struct vm_area_struct *vma, - { - struct ttm_bo_driver *driver; - struct ttm_buffer_object *bo; -+ struct ttm_mem_type_manager *man; - int ret; - - read_lock(&bdev->vm_lock); -@@ -271,7 +294,11 @@ int ttm_bo_mmap(struct file *filp, struct vm_area_struct *vma, - */ - - vma->vm_private_data = bo; -- vma->vm_flags |= VM_RESERVED | VM_IO | VM_MIXEDMAP | VM_DONTEXPAND; -+ vma->vm_flags |= VM_RESERVED | VM_MIXEDMAP | VM_DONTEXPAND; -+ man = &bdev->man[bo->mem.mem_type]; -+ if (man->flags & TTM_MEMTYPE_FLAG_NEEDS_IOREMAP) -+ vma->vm_flags |= VM_IO; -+ vma->vm_page_prot = vm_get_page_prot(vma->vm_flags); - return 0; - out_unref: - ttm_bo_unref(&bo); -diff --git a/drivers/gpu/drm/ttm/ttm_tt.c b/drivers/gpu/drm/ttm/ttm_tt.c -index 3d5b8b0..8b05e38 100644 ---- a/drivers/gpu/drm/ttm/ttm_tt.c -+++ b/drivers/gpu/drm/ttm/ttm_tt.c -@@ -38,7 +38,8 @@ - #include "ttm/ttm_module.h" - #include "ttm/ttm_bo_driver.h" - #include "ttm/ttm_placement.h" -- -+#include <linux/dma-mapping.h> -+#include <xen/xen.h> - static int ttm_tt_swapin(struct ttm_tt *ttm); - - /** -@@ -84,6 +85,16 @@ static struct page *ttm_tt_alloc_page(unsigned page_flags) - else - gfp_flags |= __GFP_HIGHMEM; - -+ if ((page_flags & TTM_PAGE_FLAG_DMA32) && xen_pv_domain()) -+ { -+ void *addr; -+ dma_addr_t _d; -+ -+ addr = dma_alloc_coherent(NULL, PAGE_SIZE, &_d, GFP_KERNEL); -+ if (addr == NULL) -+ return NULL; -+ return virt_to_page(addr); -+ } - return alloc_page(gfp_flags); - } - -@@ -286,6 +297,7 @@ static void ttm_tt_free_alloced_pages(struct ttm_tt *ttm) - int i; - struct page *cur_page; - struct ttm_backend *be = ttm->be; -+ void *addr; - - if (be) - be->func->clear(be); -@@ -300,7 +312,16 @@ static void ttm_tt_free_alloced_pages(struct ttm_tt *ttm) - "Leaking pages.\n"); - ttm_mem_global_free_page(ttm->glob->mem_glob, - cur_page); -- __free_page(cur_page); -+ -+ if ((ttm->page_flags & TTM_PAGE_FLAG_DMA32) && -+ xen_pv_domain()) { -+ addr = page_address(cur_page); -+ WARN_ON(!addr); -+ if (addr) -+ dma_free_coherent(NULL, PAGE_SIZE, addr, -+ virt_to_bus(addr)); -+ } else -+ __free_page(cur_page); - } - } - ttm->state = tt_unpopulated; -diff --git a/drivers/ieee1394/eth1394.c b/drivers/ieee1394/eth1394.c -index a4e9dcb..62ab09e 100644 ---- a/drivers/ieee1394/eth1394.c -+++ b/drivers/ieee1394/eth1394.c -@@ -58,7 +58,6 @@ - #include <linux/tcp.h> - #include <linux/skbuff.h> - #include <linux/bitops.h> --#include <linux/ethtool.h> - #include <asm/uaccess.h> - #include <asm/delay.h> - #include <asm/unaligned.h> -@@ -173,8 +172,6 @@ static netdev_tx_t ether1394_tx(struct sk_buff *skb, - struct net_device *dev); - static void ether1394_iso(struct hpsb_iso *iso); - --static const struct ethtool_ops ethtool_ops; -- - static int ether1394_write(struct hpsb_host *host, int srcid, int destid, - quadlet_t *data, u64 addr, size_t len, u16 flags); - static void ether1394_add_host(struct hpsb_host *host); -@@ -525,8 +522,6 @@ static void ether1394_init_dev(struct net_device *dev) - dev->header_ops = ðer1394_header_ops; - dev->netdev_ops = ðer1394_netdev_ops; - -- SET_ETHTOOL_OPS(dev, ðtool_ops); -- - dev->watchdog_timeo = ETHER1394_TIMEOUT; - dev->flags = IFF_BROADCAST | IFF_MULTICAST; - dev->features = NETIF_F_HIGHDMA; -@@ -1698,17 +1693,6 @@ fail: - return NETDEV_TX_OK; - } - --static void ether1394_get_drvinfo(struct net_device *dev, -- struct ethtool_drvinfo *info) --{ -- strcpy(info->driver, driver_name); -- strcpy(info->bus_info, "ieee1394"); /* FIXME provide more detail? */ --} -- --static const struct ethtool_ops ethtool_ops = { -- .get_drvinfo = ether1394_get_drvinfo --}; -- - static int __init ether1394_init_module(void) - { - int err; -diff --git a/drivers/input/xen-kbdfront.c b/drivers/input/xen-kbdfront.c -index b115726..80a072e 100644 ---- a/drivers/input/xen-kbdfront.c -+++ b/drivers/input/xen-kbdfront.c -@@ -21,7 +21,10 @@ - #include <linux/errno.h> - #include <linux/module.h> - #include <linux/input.h> -+ - #include <asm/xen/hypervisor.h> -+ -+#include <xen/xen.h> - #include <xen/events.h> - #include <xen/page.h> - #include <xen/interface/io/fbif.h> -@@ -272,6 +275,8 @@ static void xenkbd_backend_changed(struct xenbus_device *dev, - switch (backend_state) { - case XenbusStateInitialising: - case XenbusStateInitialised: -+ case XenbusStateReconfiguring: -+ case XenbusStateReconfigured: - case XenbusStateUnknown: - case XenbusStateClosed: - break; -@@ -335,7 +340,7 @@ static struct xenbus_driver xenkbd_driver = { - - static int __init xenkbd_init(void) - { -- if (!xen_domain()) -+ if (!xen_domain() || xen_hvm_domain()) - return -ENODEV; - - /* Nothing to do if running in dom0. */ -diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig -index b2f71f7..b7feb84 100644 ---- a/drivers/net/Kconfig -+++ b/drivers/net/Kconfig -@@ -2787,6 +2787,7 @@ source "drivers/s390/net/Kconfig" - config XEN_NETDEV_FRONTEND - tristate "Xen network device frontend driver" - depends on XEN -+ select XEN_XENBUS_FRONTEND - default y - help - The network device frontend driver allows the kernel to -diff --git a/drivers/net/bmac.c b/drivers/net/bmac.c -index 406f064..c063b53 100644 ---- a/drivers/net/bmac.c -+++ b/drivers/net/bmac.c -@@ -1236,15 +1236,8 @@ static void bmac_reset_and_enable(struct net_device *dev) - } - spin_unlock_irqrestore(&bp->lock, flags); - } --static void bmac_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info) --{ -- struct bmac_data *bp = netdev_priv(dev); -- strcpy(info->driver, "bmac"); -- strcpy(info->bus_info, dev_name(&bp->mdev->ofdev.dev)); --} - - static const struct ethtool_ops bmac_ethtool_ops = { -- .get_drvinfo = bmac_get_drvinfo, - .get_link = ethtool_op_get_link, - }; - -diff --git a/drivers/net/fec_mpc52xx.c b/drivers/net/fec_mpc52xx.c -index 66dace6..8238fa2 100644 ---- a/drivers/net/fec_mpc52xx.c -+++ b/drivers/net/fec_mpc52xx.c -@@ -772,11 +772,6 @@ static void mpc52xx_fec_reset(struct net_device *dev) - - - /* ethtool interface */ --static void mpc52xx_fec_get_drvinfo(struct net_device *dev, -- struct ethtool_drvinfo *info) --{ -- strcpy(info->driver, DRIVER_NAME); --} - - static int mpc52xx_fec_get_settings(struct net_device *dev, struct ethtool_cmd *cmd) - { -@@ -811,7 +806,6 @@ static void mpc52xx_fec_set_msglevel(struct net_device *dev, u32 level) - } - - static const struct ethtool_ops mpc52xx_fec_ethtool_ops = { -- .get_drvinfo = mpc52xx_fec_get_drvinfo, - .get_settings = mpc52xx_fec_get_settings, - .set_settings = mpc52xx_fec_set_settings, - .get_link = ethtool_op_get_link, -diff --git a/drivers/net/pasemi_mac_ethtool.c b/drivers/net/pasemi_mac_ethtool.c -index 28a8622..29ff9ad 100644 ---- a/drivers/net/pasemi_mac_ethtool.c -+++ b/drivers/net/pasemi_mac_ethtool.c -@@ -77,21 +77,6 @@ pasemi_mac_ethtool_get_settings(struct net_device *netdev, - return phy_ethtool_gset(phydev, cmd); - } - --static void --pasemi_mac_ethtool_get_drvinfo(struct net_device *netdev, -- struct ethtool_drvinfo *drvinfo) --{ -- struct pasemi_mac *mac; -- mac = netdev_priv(netdev); -- -- /* clear and fill out info */ -- memset(drvinfo, 0, sizeof(struct ethtool_drvinfo)); -- strncpy(drvinfo->driver, "pasemi_mac", 12); -- strcpy(drvinfo->version, "N/A"); -- strcpy(drvinfo->fw_version, "N/A"); -- strncpy(drvinfo->bus_info, pci_name(mac->pdev), 32); --} -- - static u32 - pasemi_mac_ethtool_get_msglevel(struct net_device *netdev) - { -@@ -150,7 +135,6 @@ static void pasemi_mac_get_strings(struct net_device *netdev, u32 stringset, - - const struct ethtool_ops pasemi_mac_ethtool_ops = { - .get_settings = pasemi_mac_ethtool_get_settings, -- .get_drvinfo = pasemi_mac_ethtool_get_drvinfo, - .get_msglevel = pasemi_mac_ethtool_get_msglevel, - .set_msglevel = pasemi_mac_ethtool_set_msglevel, - .get_link = ethtool_op_get_link, -diff --git a/drivers/net/pcmcia/3c574_cs.c b/drivers/net/pcmcia/3c574_cs.c -index b58965a..7f9a4f4 100644 ---- a/drivers/net/pcmcia/3c574_cs.c -+++ b/drivers/net/pcmcia/3c574_cs.c -@@ -83,7 +83,6 @@ earlier 3Com products. - #include <linux/skbuff.h> - #include <linux/if_arp.h> - #include <linux/ioport.h> --#include <linux/ethtool.h> - #include <linux/bitops.h> - #include <linux/mii.h> - -@@ -249,7 +248,6 @@ static int el3_rx(struct net_device *dev, int worklimit); - static int el3_close(struct net_device *dev); - static void el3_tx_timeout(struct net_device *dev); - static int el3_ioctl(struct net_device *dev, struct ifreq *rq, int cmd); --static const struct ethtool_ops netdev_ethtool_ops; - static void set_rx_mode(struct net_device *dev); - static void set_multicast_list(struct net_device *dev); - -@@ -300,7 +298,6 @@ static int tc574_probe(struct pcmcia_device *link) - link->conf.ConfigIndex = 1; - - dev->netdev_ops = &el3_netdev_ops; -- SET_ETHTOOL_OPS(dev, &netdev_ethtool_ops); - dev->watchdog_timeo = TX_TIMEOUT; - - return tc574_config(link); -@@ -1083,16 +1080,6 @@ static int el3_rx(struct net_device *dev, int worklimit) - return worklimit; - } - --static void netdev_get_drvinfo(struct net_device *dev, -- struct ethtool_drvinfo *info) --{ -- strcpy(info->driver, "3c574_cs"); --} -- --static const struct ethtool_ops netdev_ethtool_ops = { -- .get_drvinfo = netdev_get_drvinfo, --}; -- - /* Provide ioctl() calls to examine the MII xcvr state. */ - static int el3_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) - { -diff --git a/drivers/net/pcmcia/axnet_cs.c b/drivers/net/pcmcia/axnet_cs.c -index 3131a59..40e5e7c 100644 ---- a/drivers/net/pcmcia/axnet_cs.c -+++ b/drivers/net/pcmcia/axnet_cs.c -@@ -33,7 +33,6 @@ - #include <linux/timer.h> - #include <linux/delay.h> - #include <linux/spinlock.h> --#include <linux/ethtool.h> - #include <linux/netdevice.h> - #include <linux/etherdevice.h> - #include <linux/crc32.h> -@@ -98,7 +97,6 @@ static netdev_tx_t axnet_start_xmit(struct sk_buff *skb, - static struct net_device_stats *get_stats(struct net_device *dev); - static void set_multicast_list(struct net_device *dev); - static void axnet_tx_timeout(struct net_device *dev); --static const struct ethtool_ops netdev_ethtool_ops; - static irqreturn_t ei_irq_wrapper(int irq, void *dev_id); - static void ei_watchdog(u_long arg); - static void axnet_reset_8390(struct net_device *dev); -@@ -186,7 +184,6 @@ static int axnet_probe(struct pcmcia_device *link) - - dev->netdev_ops = &axnet_netdev_ops; - -- SET_ETHTOOL_OPS(dev, &netdev_ethtool_ops); - dev->watchdog_timeo = TX_TIMEOUT; - - return axnet_config(link); -@@ -683,16 +680,6 @@ reschedule: - add_timer(&info->watchdog); - } - --static void netdev_get_drvinfo(struct net_device *dev, -- struct ethtool_drvinfo *info) --{ -- strcpy(info->driver, "axnet_cs"); --} -- --static const struct ethtool_ops netdev_ethtool_ops = { -- .get_drvinfo = netdev_get_drvinfo, --}; -- - /*====================================================================*/ - - static int axnet_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) -diff --git a/drivers/net/pcmcia/ibmtr_cs.c b/drivers/net/pcmcia/ibmtr_cs.c -index 06618af..db0c890 100644 ---- a/drivers/net/pcmcia/ibmtr_cs.c -+++ b/drivers/net/pcmcia/ibmtr_cs.c -@@ -52,7 +52,6 @@ - #include <linux/string.h> - #include <linux/timer.h> - #include <linux/module.h> --#include <linux/ethtool.h> - #include <linux/netdevice.h> - #include <linux/trdevice.h> - #include <linux/ibmtr.h> -@@ -120,16 +119,6 @@ typedef struct ibmtr_dev_t { - struct tok_info *ti; - } ibmtr_dev_t; - --static void netdev_get_drvinfo(struct net_device *dev, -- struct ethtool_drvinfo *info) --{ -- strcpy(info->driver, "ibmtr_cs"); --} -- --static const struct ethtool_ops netdev_ethtool_ops = { -- .get_drvinfo = netdev_get_drvinfo, --}; -- - /*====================================================================== - - ibmtr_attach() creates an "instance" of the driver, allocating -@@ -170,8 +159,6 @@ static int __devinit ibmtr_attach(struct pcmcia_device *link) - - link->irq.Instance = info->dev = dev; - -- SET_ETHTOOL_OPS(dev, &netdev_ethtool_ops); -- - return ibmtr_config(link); - } /* ibmtr_attach */ - -diff --git a/drivers/net/pcmcia/pcnet_cs.c b/drivers/net/pcmcia/pcnet_cs.c -index 94c9ad2..1b673b0 100644 ---- a/drivers/net/pcmcia/pcnet_cs.c -+++ b/drivers/net/pcmcia/pcnet_cs.c -@@ -36,7 +36,6 @@ - #include <linux/string.h> - #include <linux/timer.h> - #include <linux/delay.h> --#include <linux/ethtool.h> - #include <linux/netdevice.h> - #include <linux/log2.h> - #include <linux/etherdevice.h> -@@ -111,7 +110,6 @@ static void pcnet_release(struct pcmcia_device *link); - static int pcnet_open(struct net_device *dev); - static int pcnet_close(struct net_device *dev); - static int ei_ioctl(struct net_device *dev, struct ifreq *rq, int cmd); --static const struct ethtool_ops netdev_ethtool_ops; - static irqreturn_t ei_irq_wrapper(int irq, void *dev_id); - static void ei_watchdog(u_long arg); - static void pcnet_reset_8390(struct net_device *dev); -@@ -654,8 +652,6 @@ static int pcnet_config(struct pcmcia_device *link) - ei_status.word16 = 1; - ei_status.reset_8390 = &pcnet_reset_8390; - -- SET_ETHTOOL_OPS(dev, &netdev_ethtool_ops); -- - if (info->flags & (IS_DL10019|IS_DL10022)) - mii_phy_probe(dev); - -@@ -1175,18 +1171,6 @@ reschedule: - - /*====================================================================*/ - --static void netdev_get_drvinfo(struct net_device *dev, -- struct ethtool_drvinfo *info) --{ -- strcpy(info->driver, "pcnet_cs"); --} -- --static const struct ethtool_ops netdev_ethtool_ops = { -- .get_drvinfo = netdev_get_drvinfo, --}; -- --/*====================================================================*/ -- - - static int ei_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) - { -diff --git a/drivers/net/sc92031.c b/drivers/net/sc92031.c -index 8d60300..0926832 100644 ---- a/drivers/net/sc92031.c -+++ b/drivers/net/sc92031.c -@@ -1255,16 +1255,6 @@ static int sc92031_ethtool_set_settings(struct net_device *dev, - return 0; - } - --static void sc92031_ethtool_get_drvinfo(struct net_device *dev, -- struct ethtool_drvinfo *drvinfo) --{ -- struct sc92031_priv *priv = netdev_priv(dev); -- struct pci_dev *pdev = priv->pdev; -- -- strcpy(drvinfo->driver, SC92031_NAME); -- strcpy(drvinfo->bus_info, pci_name(pdev)); --} -- - static void sc92031_ethtool_get_wol(struct net_device *dev, - struct ethtool_wolinfo *wolinfo) - { -@@ -1386,7 +1376,6 @@ static void sc92031_ethtool_get_ethtool_stats(struct net_device *dev, - static const struct ethtool_ops sc92031_ethtool_ops = { - .get_settings = sc92031_ethtool_get_settings, - .set_settings = sc92031_ethtool_set_settings, -- .get_drvinfo = sc92031_ethtool_get_drvinfo, - .get_wol = sc92031_ethtool_get_wol, - .set_wol = sc92031_ethtool_set_wol, - .nway_reset = sc92031_ethtool_nway_reset, -diff --git a/drivers/net/tulip/xircom_cb.c b/drivers/net/tulip/xircom_cb.c -index 0f2ca598..44159be 100644 ---- a/drivers/net/tulip/xircom_cb.c -+++ b/drivers/net/tulip/xircom_cb.c -@@ -27,7 +27,6 @@ - #include <linux/skbuff.h> - #include <linux/delay.h> - #include <linux/init.h> --#include <linux/ethtool.h> - #include <linux/bitops.h> - - #include <asm/uaccess.h> -@@ -179,19 +178,6 @@ static void print_binary(unsigned int number) - } - #endif - --static void netdev_get_drvinfo(struct net_device *dev, -- struct ethtool_drvinfo *info) --{ -- struct xircom_private *private = netdev_priv(dev); -- -- strcpy(info->driver, "xircom_cb"); -- strcpy(info->bus_info, pci_name(private->pdev)); --} -- --static const struct ethtool_ops netdev_ethtool_ops = { -- .get_drvinfo = netdev_get_drvinfo, --}; -- - static const struct net_device_ops netdev_ops = { - .ndo_open = xircom_open, - .ndo_stop = xircom_close, -@@ -277,7 +263,6 @@ static int __devinit xircom_probe(struct pci_dev *pdev, const struct pci_device_ - setup_descriptors(private); - - dev->netdev_ops = &netdev_ops; -- SET_ETHTOOL_OPS(dev, &netdev_ethtool_ops); - pci_set_drvdata(pdev, dev); - - if (register_netdev(dev)) { -diff --git a/drivers/net/usb/hso.c b/drivers/net/usb/hso.c -index f450bc9..2109514 100644 ---- a/drivers/net/usb/hso.c -+++ b/drivers/net/usb/hso.c -@@ -820,17 +820,7 @@ static netdev_tx_t hso_net_start_xmit(struct sk_buff *skb, - return NETDEV_TX_OK; - } - --static void hso_get_drvinfo(struct net_device *net, struct ethtool_drvinfo *info) --{ -- struct hso_net *odev = netdev_priv(net); -- -- strncpy(info->driver, driver_name, ETHTOOL_BUSINFO_LEN); -- strncpy(info->version, DRIVER_VERSION, ETHTOOL_BUSINFO_LEN); -- usb_make_path(odev->parent->usb, info->bus_info, sizeof info->bus_info); --} -- - static const struct ethtool_ops ops = { -- .get_drvinfo = hso_get_drvinfo, - .get_link = ethtool_op_get_link - }; - -diff --git a/drivers/net/usb/kaweth.c b/drivers/net/usb/kaweth.c -index e391ef9..47d1926 100644 ---- a/drivers/net/usb/kaweth.c -+++ b/drivers/net/usb/kaweth.c -@@ -767,14 +767,6 @@ static int kaweth_close(struct net_device *net) - return 0; - } - --static void kaweth_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info) --{ -- struct kaweth_device *kaweth = netdev_priv(dev); -- -- strlcpy(info->driver, driver_name, sizeof(info->driver)); -- usb_make_path(kaweth->dev, info->bus_info, sizeof (info->bus_info)); --} -- - static u32 kaweth_get_link(struct net_device *dev) - { - struct kaweth_device *kaweth = netdev_priv(dev); -@@ -783,7 +775,6 @@ static u32 kaweth_get_link(struct net_device *dev) - } - - static const struct ethtool_ops ops = { -- .get_drvinfo = kaweth_get_drvinfo, - .get_link = kaweth_get_link - }; - -diff --git a/drivers/net/wireless/ray_cs.c b/drivers/net/wireless/ray_cs.c -index 1c88c2e..2e65100 100644 ---- a/drivers/net/wireless/ray_cs.c -+++ b/drivers/net/wireless/ray_cs.c -@@ -44,7 +44,6 @@ - #include <linux/if_arp.h> - #include <linux/ioport.h> - #include <linux/skbuff.h> --#include <linux/ethtool.h> - #include <linux/ieee80211.h> - - #include <pcmcia/cs_types.h> -@@ -101,8 +100,6 @@ static int ray_dev_config(struct net_device *dev, struct ifmap *map); - static struct net_device_stats *ray_get_stats(struct net_device *dev); - static int ray_dev_init(struct net_device *dev); - --static const struct ethtool_ops netdev_ethtool_ops; -- - static int ray_open(struct net_device *dev); - static netdev_tx_t ray_dev_start_xmit(struct sk_buff *skb, - struct net_device *dev); -@@ -362,7 +359,6 @@ static int ray_probe(struct pcmcia_device *p_dev) - - /* Raylink entries in the device structure */ - dev->netdev_ops = &ray_netdev_ops; -- SET_ETHTOOL_OPS(dev, &netdev_ethtool_ops); - dev->wireless_handlers = &ray_handler_def; - #ifdef WIRELESS_SPY - local->wireless_data.spy_data = &local->spy_data; -@@ -1106,18 +1102,6 @@ AP to AP 1 1 dest AP src AP dest source - } - } /* end encapsulate_frame */ - --/*===========================================================================*/ -- --static void netdev_get_drvinfo(struct net_device *dev, -- struct ethtool_drvinfo *info) --{ -- strcpy(info->driver, "ray_cs"); --} -- --static const struct ethtool_ops netdev_ethtool_ops = { -- .get_drvinfo = netdev_get_drvinfo, --}; -- - /*====================================================================*/ - - /*------------------------------------------------------------------*/ -diff --git a/drivers/net/wireless/wl3501_cs.c b/drivers/net/wireless/wl3501_cs.c -index 4f1e0cf..22b2b43 100644 ---- a/drivers/net/wireless/wl3501_cs.c -+++ b/drivers/net/wireless/wl3501_cs.c -@@ -29,7 +29,6 @@ - - #include <linux/delay.h> - #include <linux/types.h> --#include <linux/ethtool.h> - #include <linux/init.h> - #include <linux/interrupt.h> - #include <linux/in.h> -@@ -1436,15 +1435,6 @@ static struct iw_statistics *wl3501_get_wireless_stats(struct net_device *dev) - return wstats; - } - --static void wl3501_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info) --{ -- strlcpy(info->driver, wl3501_dev_info, sizeof(info->driver)); --} -- --static const struct ethtool_ops ops = { -- .get_drvinfo = wl3501_get_drvinfo --}; -- - /** - * wl3501_detach - deletes a driver "instance" - * @link - FILL_IN -@@ -1936,7 +1926,6 @@ static int wl3501_probe(struct pcmcia_device *p_dev) - this->p_dev = p_dev; - dev->wireless_data = &this->wireless_data; - dev->wireless_handlers = &wl3501_handler_def; -- SET_ETHTOOL_OPS(dev, &ops); - netif_stop_queue(dev); - p_dev->priv = p_dev->irq.Instance = dev; - -diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c -index 1a11d95..3f71199 100644 ---- a/drivers/net/xen-netfront.c -+++ b/drivers/net/xen-netfront.c -@@ -42,6 +42,7 @@ - #include <linux/mm.h> - #include <net/ip.h> - -+#include <xen/xen.h> - #include <xen/xenbus.h> - #include <xen/events.h> - #include <xen/page.h> -@@ -53,19 +54,36 @@ - - static const struct ethtool_ops xennet_ethtool_ops; - -+static int use_smartpoll = 0; -+module_param(use_smartpoll, int, 0600); -+MODULE_PARM_DESC (use_smartpoll, "Use smartpoll mechanism if available"); -+ - struct netfront_cb { - struct page *page; - unsigned offset; - }; - -+#define MICRO_SECOND 1000000UL -+#define NANO_SECOND 1000000000UL -+#define DEFAULT_SMART_POLL_FREQ 1000UL -+ -+struct netfront_smart_poll { -+ struct hrtimer timer; -+ struct net_device *netdev; -+ unsigned int smart_poll_freq; -+ unsigned int feature_smart_poll; -+ unsigned int active; -+ unsigned long counter; -+}; -+ - #define NETFRONT_SKB_CB(skb) ((struct netfront_cb *)((skb)->cb)) - - #define RX_COPY_THRESHOLD 256 - - #define GRANT_INVALID_REF 0 - --#define NET_TX_RING_SIZE __RING_SIZE((struct xen_netif_tx_sring *)0, PAGE_SIZE) --#define NET_RX_RING_SIZE __RING_SIZE((struct xen_netif_rx_sring *)0, PAGE_SIZE) -+#define NET_TX_RING_SIZE __CONST_RING_SIZE(xen_netif_tx, PAGE_SIZE) -+#define NET_RX_RING_SIZE __CONST_RING_SIZE(xen_netif_rx, PAGE_SIZE) - #define TX_MAX_TARGET min_t(int, NET_RX_RING_SIZE, 256) - - struct netfront_info { -@@ -104,7 +122,7 @@ struct netfront_info { - - /* Receive-ring batched refills. */ - #define RX_MIN_TARGET 8 --#define RX_DFL_MIN_TARGET 64 -+#define RX_DFL_MIN_TARGET 80 - #define RX_MAX_TARGET min_t(int, NET_RX_RING_SIZE, 256) - unsigned rx_min_target, rx_max_target, rx_target; - struct sk_buff_head rx_batch; -@@ -118,6 +136,8 @@ struct netfront_info { - unsigned long rx_pfn_array[NET_RX_RING_SIZE]; - struct multicall_entry rx_mcl[NET_RX_RING_SIZE+1]; - struct mmu_update rx_mmu[NET_RX_RING_SIZE]; -+ -+ struct netfront_smart_poll smart_poll; - }; - - struct netfront_rx_info { -@@ -337,15 +357,17 @@ static int xennet_open(struct net_device *dev) - return 0; - } - --static void xennet_tx_buf_gc(struct net_device *dev) -+static int xennet_tx_buf_gc(struct net_device *dev) - { - RING_IDX cons, prod; -+ RING_IDX cons_begin, cons_end; - unsigned short id; - struct netfront_info *np = netdev_priv(dev); - struct sk_buff *skb; - - BUG_ON(!netif_carrier_ok(dev)); - -+ cons_begin = np->tx.rsp_cons; - do { - prod = np->tx.sring->rsp_prod; - rmb(); /* Ensure we see responses up to 'rp'. */ -@@ -390,7 +412,11 @@ static void xennet_tx_buf_gc(struct net_device *dev) - mb(); /* update shared area */ - } while ((cons == prod) && (prod != np->tx.sring->rsp_prod)); - -+ cons_end = np->tx.rsp_cons; -+ - xennet_maybe_wake_tx(dev); -+ -+ return (cons_begin == cons_end); - } - - static void xennet_make_frags(struct sk_buff *skb, struct net_device *dev, -@@ -1267,6 +1293,14 @@ static void xennet_disconnect_backend(struct netfront_info *info) - info->rx.sring = NULL; - } - -+static int netfront_suspend(struct xenbus_device *dev, pm_message_t state) -+{ -+ struct netfront_info *info = dev_get_drvdata(&dev->dev); -+ struct hrtimer *timer = &info->smart_poll.timer; -+ hrtimer_cancel(timer); -+ return 0; -+} -+ - /** - * We are reconnecting to the backend, due to a suspend/resume, or a backend - * driver restart. We tear down our netif structure and recreate it, but -@@ -1305,6 +1339,59 @@ static int xen_net_read_mac(struct xenbus_device *dev, u8 mac[]) - return 0; - } - -+static enum hrtimer_restart smart_poll_function(struct hrtimer *timer) -+{ -+ struct netfront_smart_poll *psmart_poll; -+ struct net_device *dev; -+ struct netfront_info *np; -+ unsigned long flags; -+ unsigned int tx_active = 0, rx_active = 0; -+ -+ psmart_poll = container_of(timer, struct netfront_smart_poll, timer); -+ dev = psmart_poll->netdev; -+ np = netdev_priv(dev); -+ -+ spin_lock_irqsave(&np->tx_lock, flags); -+ -+ if (!np->rx.sring) -+ goto end; -+ -+ np->smart_poll.counter++; -+ -+ if (likely(netif_carrier_ok(dev))) { -+ tx_active = !(xennet_tx_buf_gc(dev)); -+ /* Under tx_lock: protects access to rx shared-ring indexes. */ -+ if (RING_HAS_UNCONSUMED_RESPONSES(&np->rx)) { -+ rx_active = 1; -+ napi_schedule(&np->napi); -+ } -+ } -+ -+ np->smart_poll.active |= (tx_active || rx_active); -+ if (np->smart_poll.counter % -+ (np->smart_poll.smart_poll_freq / 10) == 0) { -+ if (!np->smart_poll.active) { -+ np->rx.sring->private.netif.smartpoll_active = 0; -+ goto end; -+ } -+ np->smart_poll.active = 0; -+ } -+ -+ if (np->rx.sring->private.netif.smartpoll_active) { -+ if ( hrtimer_start(timer, -+ ktime_set(0, NANO_SECOND/psmart_poll->smart_poll_freq), -+ HRTIMER_MODE_REL) ) { -+ printk(KERN_DEBUG "Failed to start hrtimer," -+ "use interrupt mode for this packet\n"); -+ np->rx.sring->private.netif.smartpoll_active = 0; -+ } -+ } -+ -+end: -+ spin_unlock_irqrestore(&np->tx_lock, flags); -+ return HRTIMER_NORESTART; -+} -+ - static irqreturn_t xennet_interrupt(int irq, void *dev_id) - { - struct net_device *dev = dev_id; -@@ -1320,6 +1407,16 @@ static irqreturn_t xennet_interrupt(int irq, void *dev_id) - napi_schedule(&np->napi); - } - -+ if (np->smart_poll.feature_smart_poll) { -+ if ( hrtimer_start(&np->smart_poll.timer, -+ ktime_set(0,NANO_SECOND/np->smart_poll.smart_poll_freq), -+ HRTIMER_MODE_REL) ) { -+ printk(KERN_DEBUG "Failed to start hrtimer," -+ "use interrupt mode for this packet\n"); -+ np->rx.sring->private.netif.smartpoll_active = 0; -+ } -+ } -+ - spin_unlock_irqrestore(&np->tx_lock, flags); - - return IRQ_HANDLED; -@@ -1393,7 +1490,7 @@ static int setup_netfront(struct xenbus_device *dev, struct netfront_info *info) - } - - /* Common code used when first setting up, and when resuming. */ --static int talk_to_backend(struct xenbus_device *dev, -+static int talk_to_netback(struct xenbus_device *dev, - struct netfront_info *info) - { - const char *message; -@@ -1456,6 +1553,12 @@ again: - goto abort_transaction; - } - -+ err = xenbus_printf(xbt, dev->nodename, "feature-smart-poll", "%d", use_smartpoll); -+ if (err) { -+ message = "writing feature-smart-poll"; -+ goto abort_transaction; -+ } -+ - err = xenbus_transaction_end(xbt, 0); - if (err) { - if (err == -EAGAIN) -@@ -1543,7 +1646,26 @@ static int xennet_connect(struct net_device *dev) - return -ENODEV; - } - -- err = talk_to_backend(np->xbdev, np); -+ np->smart_poll.feature_smart_poll = 0; -+ if (use_smartpoll) { -+ err = xenbus_scanf(XBT_NIL, np->xbdev->otherend, -+ "feature-smart-poll", "%u", -+ &np->smart_poll.feature_smart_poll); -+ if (err != 1) -+ np->smart_poll.feature_smart_poll = 0; -+ } -+ -+ hrtimer_init(&np->smart_poll.timer, CLOCK_MONOTONIC, -+ HRTIMER_MODE_REL); -+ if (np->smart_poll.feature_smart_poll) { -+ np->smart_poll.timer.function = smart_poll_function; -+ np->smart_poll.netdev = dev; -+ np->smart_poll.smart_poll_freq = DEFAULT_SMART_POLL_FREQ; -+ np->smart_poll.active = 0; -+ np->smart_poll.counter = 0; -+ } -+ -+ err = talk_to_netback(np->xbdev, np); - if (err) - return err; - -@@ -1597,7 +1719,7 @@ static int xennet_connect(struct net_device *dev) - /** - * Callback received when the backend's state changes. - */ --static void backend_changed(struct xenbus_device *dev, -+static void netback_changed(struct xenbus_device *dev, - enum xenbus_state backend_state) - { - struct netfront_info *np = dev_get_drvdata(&dev->dev); -@@ -1608,6 +1730,8 @@ static void backend_changed(struct xenbus_device *dev, - switch (backend_state) { - case XenbusStateInitialising: - case XenbusStateInitialised: -+ case XenbusStateReconfiguring: -+ case XenbusStateReconfigured: - case XenbusStateConnected: - case XenbusStateUnknown: - case XenbusStateClosed: -@@ -1628,12 +1752,30 @@ static void backend_changed(struct xenbus_device *dev, - } - } - -+static int xennet_get_coalesce(struct net_device *netdev, -+ struct ethtool_coalesce *ec) -+{ -+ struct netfront_info *np = netdev_priv(netdev); -+ ec->rx_coalesce_usecs = MICRO_SECOND / np->smart_poll.smart_poll_freq; -+ return 0; -+} -+ -+static int xennet_set_coalesce(struct net_device *netdev, -+ struct ethtool_coalesce *ec) -+{ -+ struct netfront_info *np = netdev_priv(netdev); -+ np->smart_poll.smart_poll_freq = MICRO_SECOND / ec->rx_coalesce_usecs; -+ return 0; -+} -+ - static const struct ethtool_ops xennet_ethtool_ops = - { - .set_tx_csum = ethtool_op_set_tx_csum, - .set_sg = xennet_set_sg, - .set_tso = xennet_set_tso, - .get_link = ethtool_op_get_link, -+ .get_coalesce = xennet_get_coalesce, -+ .set_coalesce = xennet_set_coalesce, - }; - - #ifdef CONFIG_SYSFS -@@ -1798,8 +1940,9 @@ static struct xenbus_driver netfront_driver = { - .ids = netfront_ids, - .probe = netfront_probe, - .remove = __devexit_p(xennet_remove), -+ .suspend = netfront_suspend, - .resume = netfront_resume, -- .otherend_changed = backend_changed, -+ .otherend_changed = netback_changed, - }; - - static int __init netif_init(void) -diff --git a/drivers/pci/Kconfig b/drivers/pci/Kconfig -index fdc864f..7802fcd 100644 ---- a/drivers/pci/Kconfig -+++ b/drivers/pci/Kconfig -@@ -51,6 +51,16 @@ config PCI_STUB - - When in doubt, say N. - -+config XEN_PCIDEV_FRONTEND -+ tristate "Xen PCI Frontend" -+ depends on XEN && PCI && X86 -+ select HOTPLUG -+ select XEN_XENBUS_FRONTEND -+ default y -+ help -+ The PCI device frontend driver allows the kernel to import arbitrary -+ PCI devices from a PCI backend to support PCI driver domains. -+ - config HT_IRQ - bool "Interrupts on hypertransport devices" - default y -diff --git a/drivers/pci/Makefile b/drivers/pci/Makefile -index 4a7f11d..b70aa4d 100644 ---- a/drivers/pci/Makefile -+++ b/drivers/pci/Makefile -@@ -31,6 +31,8 @@ obj-$(CONFIG_HT_IRQ) += htirq.o - # Build Intel IOMMU support - obj-$(CONFIG_DMAR) += dmar.o iova.o intel-iommu.o - -+# Build Xen IOMMU support -+obj-$(CONFIG_PCI_XEN) += xen-iommu.o - obj-$(CONFIG_INTR_REMAP) += dmar.o intr_remapping.o - - obj-$(CONFIG_PCI_IOV) += iov.o -@@ -60,6 +62,8 @@ obj-$(CONFIG_PCI_SYSCALL) += syscall.o - - obj-$(CONFIG_PCI_STUB) += pci-stub.o - -+obj-$(CONFIG_XEN_PCIDEV_FRONTEND) += xen-pcifront.o -+ - ifeq ($(CONFIG_PCI_DEBUG),y) - EXTRA_CFLAGS += -DDEBUG - endif -diff --git a/drivers/pci/bus.c b/drivers/pci/bus.c -index cef28a7..1940183 100644 ---- a/drivers/pci/bus.c -+++ b/drivers/pci/bus.c -@@ -249,6 +249,7 @@ void pci_walk_bus(struct pci_bus *top, int (*cb)(struct pci_dev *, void *), - up_read(&pci_bus_sem); - } - -+EXPORT_SYMBOL_GPL(pci_walk_bus); - EXPORT_SYMBOL(pci_bus_alloc_resource); - EXPORT_SYMBOL_GPL(pci_bus_add_device); - EXPORT_SYMBOL(pci_bus_add_devices); -diff --git a/drivers/pci/dmar.c b/drivers/pci/dmar.c -index 91d0390..24f6f28 100644 ---- a/drivers/pci/dmar.c -+++ b/drivers/pci/dmar.c -@@ -673,10 +673,13 @@ void __init detect_intel_iommu(void) - "x2apic and Intr-remapping.\n"); - #endif - #ifdef CONFIG_DMAR -- if (ret && !no_iommu && !iommu_detected && !swiotlb && -- !dmar_disabled) -+ if (ret && !no_iommu && !iommu_detected && !dmar_disabled) - iommu_detected = 1; - #endif -+#ifdef CONFIG_X86 -+ if (ret) -+ x86_init.iommu.iommu_init = intel_iommu_init; -+#endif - } - early_acpi_os_unmap_memory(dmar_tbl, dmar_tbl_size); - dmar_tbl = NULL; -diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c -index ba83495..1506d4a 100644 ---- a/drivers/pci/intel-iommu.c -+++ b/drivers/pci/intel-iommu.c -@@ -3278,7 +3278,7 @@ int __init intel_iommu_init(void) - * Check the need for DMA-remapping initialization now. - * Above initialization will also be used by Interrupt-remapping. - */ -- if (no_iommu || swiotlb || dmar_disabled) -+ if (no_iommu || dmar_disabled) - return -ENODEV; - - iommu_init_mempool(); -@@ -3299,7 +3299,9 @@ int __init intel_iommu_init(void) - "PCI-DMA: Intel(R) Virtualization Technology for Directed I/O\n"); - - init_timer(&unmap_timer); -- force_iommu = 1; -+#ifdef CONFIG_SWIOTLB -+ swiotlb = 0; -+#endif - dma_ops = &intel_dma_ops; - - init_iommu_sysfs(); -diff --git a/drivers/pci/iov.c b/drivers/pci/iov.c -index e03fe98..f9db891 100644 ---- a/drivers/pci/iov.c -+++ b/drivers/pci/iov.c -@@ -706,6 +706,21 @@ irqreturn_t pci_sriov_migration(struct pci_dev *dev) - } - EXPORT_SYMBOL_GPL(pci_sriov_migration); - -+/** -+ * pci_num_vf - return number of VFs associated with a PF device_release_driver -+ * @dev: the PCI device -+ * -+ * Returns number of VFs, or 0 if SR-IOV is not enabled. -+ */ -+int pci_num_vf(struct pci_dev *dev) -+{ -+ if (!dev || !dev->is_physfn) -+ return 0; -+ else -+ return dev->sriov->nr_virtfn; -+} -+EXPORT_SYMBOL_GPL(pci_num_vf); -+ - static int ats_alloc_one(struct pci_dev *dev, int ps) - { - int pos; -diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c -index 0fb1d05..c7e8a69 100644 ---- a/drivers/pci/msi.c -+++ b/drivers/pci/msi.c -@@ -19,6 +19,9 @@ - #include <linux/errno.h> - #include <linux/io.h> - -+#include <asm/xen/hypercall.h> -+#include <asm/xen/hypervisor.h> -+ - #include "pci.h" - #include "msi.h" - -@@ -391,6 +394,20 @@ static void __pci_restore_msix_state(struct pci_dev *dev) - - void pci_restore_msi_state(struct pci_dev *dev) - { -+ if (xen_initial_domain()) { -+ struct physdev_restore_msi physdev; -+ -+ if (!dev->msi_enabled && !dev->msix_enabled) -+ return; -+ -+ pci_intx_for_msi(dev, 0); -+ -+ physdev.bus = dev->bus->number; -+ physdev.devfn = dev->devfn; -+ HYPERVISOR_physdev_op(PHYSDEVOP_restore_msi, &physdev); -+ -+ return; -+ } - __pci_restore_msi_state(dev); - __pci_restore_msix_state(dev); - } -diff --git a/drivers/pci/xen-iommu.c b/drivers/pci/xen-iommu.c -new file mode 100644 -index 0000000..ac6bcdb ---- /dev/null -+++ b/drivers/pci/xen-iommu.c -@@ -0,0 +1,271 @@ -+#include <linux/types.h> -+#include <linux/mm.h> -+#include <linux/string.h> -+#include <linux/pci.h> -+#include <linux/module.h> -+#include <linux/version.h> -+#include <linux/scatterlist.h> -+#include <linux/io.h> -+#include <linux/bug.h> -+ -+#include <xen/interface/xen.h> -+#include <xen/grant_table.h> -+#include <xen/page.h> -+#include <xen/xen-ops.h> -+ -+#include <asm/iommu.h> -+#include <asm/swiotlb.h> -+#include <asm/tlbflush.h> -+ -+#define IOMMU_BUG_ON(test) \ -+do { \ -+ if (unlikely(test)) { \ -+ printk(KERN_ALERT "Fatal DMA error! " \ -+ "Please use 'swiotlb=force'\n"); \ -+ BUG(); \ -+ } \ -+} while (0) -+ -+/* Print address range with message */ -+#define PAR(msg, addr, size) \ -+do { \ -+ printk(msg "[%#llx - %#llx]\n", \ -+ (unsigned long long)addr, \ -+ (unsigned long long)addr + size); \ -+} while (0) -+ -+static inline int address_needs_mapping(struct device *hwdev, -+ dma_addr_t addr) -+{ -+ dma_addr_t mask = DMA_BIT_MASK(32); -+ int ret; -+ -+ /* If the device has a mask, use it, otherwise default to 32 bits */ -+ if (hwdev) -+ mask = *hwdev->dma_mask; -+ -+ ret = (addr & ~mask) != 0; -+ -+ if (ret) { -+ printk(KERN_ERR "dma address needs mapping\n"); -+ printk(KERN_ERR "mask: %#llx\n address: [%#llx]\n", mask, addr); -+ } -+ return ret; -+} -+ -+static int check_pages_physically_contiguous(unsigned long pfn, -+ unsigned int offset, -+ size_t length) -+{ -+ unsigned long next_mfn; -+ int i; -+ int nr_pages; -+ -+ next_mfn = pfn_to_mfn(pfn); -+ nr_pages = (offset + length + PAGE_SIZE-1) >> PAGE_SHIFT; -+ -+ for (i = 1; i < nr_pages; i++) { -+ if (pfn_to_mfn(++pfn) != ++next_mfn) -+ return 0; -+ } -+ return 1; -+} -+ -+static int range_straddles_page_boundary(phys_addr_t p, size_t size) -+{ -+ unsigned long pfn = PFN_DOWN(p); -+ unsigned int offset = p & ~PAGE_MASK; -+ -+ if (offset + size <= PAGE_SIZE) -+ return 0; -+ if (check_pages_physically_contiguous(pfn, offset, size)) -+ return 0; -+ return 1; -+} -+ -+static inline void xen_dma_unmap_page(struct page *page) -+{ -+ /* Xen TODO: 2.6.18 xen calls __gnttab_dma_unmap_page here -+ * to deal with foreign pages. We'll need similar logic here at -+ * some point. -+ */ -+} -+ -+/* Gets dma address of a page */ -+static inline dma_addr_t xen_dma_map_page(struct page *page) -+{ -+ /* Xen TODO: 2.6.18 xen calls __gnttab_dma_map_page here to deal -+ * with foreign pages. We'll need similar logic here at some -+ * point. -+ */ -+ return ((dma_addr_t)pfn_to_mfn(page_to_pfn(page))) << PAGE_SHIFT; -+} -+ -+static int xen_map_sg(struct device *hwdev, struct scatterlist *sg, -+ int nents, -+ enum dma_data_direction direction, -+ struct dma_attrs *attrs) -+{ -+ struct scatterlist *s; -+ struct page *page; -+ int i, rc; -+ -+ BUG_ON(direction == DMA_NONE); -+ WARN_ON(nents == 0 || sg[0].length == 0); -+ -+ for_each_sg(sg, s, nents, i) { -+ BUG_ON(!sg_page(s)); -+ page = sg_page(s); -+ s->dma_address = xen_dma_map_page(page) + s->offset; -+ s->dma_length = s->length; -+ IOMMU_BUG_ON(range_straddles_page_boundary( -+ page_to_phys(page), s->length)); -+ } -+ -+ rc = nents; -+ -+ flush_write_buffers(); -+ return rc; -+} -+ -+static void xen_unmap_sg(struct device *hwdev, struct scatterlist *sg, -+ int nents, -+ enum dma_data_direction direction, -+ struct dma_attrs *attrs) -+{ -+ struct scatterlist *s; -+ struct page *page; -+ int i; -+ -+ for_each_sg(sg, s, nents, i) { -+ page = pfn_to_page(mfn_to_pfn(PFN_DOWN(s->dma_address))); -+ xen_dma_unmap_page(page); -+ } -+} -+ -+static void *xen_alloc_coherent(struct device *dev, size_t size, -+ dma_addr_t *dma_handle, gfp_t gfp) -+{ -+ void *ret; -+ unsigned int order = get_order(size); -+ unsigned long vstart; -+ u64 mask; -+ -+ /* ignore region specifiers */ -+ gfp &= ~(__GFP_DMA | __GFP_HIGHMEM); -+ -+ if (dma_alloc_from_coherent(dev, size, dma_handle, &ret)) -+ return ret; -+ -+ if (dev == NULL || (dev->coherent_dma_mask < DMA_BIT_MASK(32))) -+ gfp |= GFP_DMA; -+ -+ vstart = __get_free_pages(gfp, order); -+ ret = (void *)vstart; -+ -+ if (dev != NULL && dev->coherent_dma_mask) -+ mask = dev->coherent_dma_mask; -+ else -+ mask = DMA_BIT_MASK(32); -+ -+ if (ret != NULL) { -+ if (xen_create_contiguous_region(vstart, order, -+ fls64(mask)) != 0) { -+ free_pages(vstart, order); -+ return NULL; -+ } -+ memset(ret, 0, size); -+ *dma_handle = virt_to_machine(ret).maddr; -+ } -+ return ret; -+} -+ -+static void xen_free_coherent(struct device *dev, size_t size, -+ void *vaddr, dma_addr_t dma_addr) -+{ -+ int order = get_order(size); -+ -+ if (dma_release_from_coherent(dev, order, vaddr)) -+ return; -+ -+ xen_destroy_contiguous_region((unsigned long)vaddr, order); -+ free_pages((unsigned long)vaddr, order); -+} -+ -+static dma_addr_t xen_map_page(struct device *dev, struct page *page, -+ unsigned long offset, size_t size, -+ enum dma_data_direction direction, -+ struct dma_attrs *attrs) -+{ -+ dma_addr_t dma; -+ -+ BUG_ON(direction == DMA_NONE); -+ -+ WARN_ON(size == 0); -+ -+ dma = xen_dma_map_page(page) + offset; -+ -+ IOMMU_BUG_ON(address_needs_mapping(dev, dma)); -+ flush_write_buffers(); -+ return dma; -+} -+ -+static void xen_unmap_page(struct device *dev, dma_addr_t dma_addr, -+ size_t size, -+ enum dma_data_direction direction, -+ struct dma_attrs *attrs) -+{ -+ BUG_ON(direction == DMA_NONE); -+ xen_dma_unmap_page(pfn_to_page(mfn_to_pfn(PFN_DOWN(dma_addr)))); -+} -+ -+static struct dma_map_ops xen_dma_ops = { -+ .dma_supported = NULL, -+ -+ .alloc_coherent = xen_alloc_coherent, -+ .free_coherent = xen_free_coherent, -+ -+ .map_page = xen_map_page, -+ .unmap_page = xen_unmap_page, -+ -+ .map_sg = xen_map_sg, -+ .unmap_sg = xen_unmap_sg, -+ -+ .mapping_error = NULL, -+ -+ .is_phys = 0, -+}; -+ -+static struct dma_map_ops xen_swiotlb_dma_ops = { -+ .dma_supported = swiotlb_dma_supported, -+ -+ .alloc_coherent = xen_alloc_coherent, -+ .free_coherent = xen_free_coherent, -+ -+ .map_page = swiotlb_map_page, -+ .unmap_page = swiotlb_unmap_page, -+ -+ .map_sg = swiotlb_map_sg_attrs, -+ .unmap_sg = swiotlb_unmap_sg_attrs, -+ -+ .mapping_error = swiotlb_dma_mapping_error, -+ -+ .is_phys = 0, -+}; -+ -+void __init xen_iommu_init(void) -+{ -+ if (!xen_pv_domain()) -+ return; -+ -+ printk(KERN_INFO "Xen: Initializing Xen DMA ops\n"); -+ -+ force_iommu = 0; -+ dma_ops = &xen_dma_ops; -+ -+ if (swiotlb) { -+ printk(KERN_INFO "Xen: Enabling DMA fallback to swiotlb\n"); -+ dma_ops = &xen_swiotlb_dma_ops; -+ } -+} -+ -diff --git a/drivers/pci/xen-pcifront.c b/drivers/pci/xen-pcifront.c -new file mode 100644 -index 0000000..76d0bdd ---- /dev/null -+++ b/drivers/pci/xen-pcifront.c -@@ -0,0 +1,1157 @@ -+/* -+ * PCI Frontend Xenbus Setup - handles setup with backend (imports page/evtchn) -+ * -+ * Author: Ryan Wilson <hap9@epoch.ncsc.mil> -+ */ -+#include <linux/module.h> -+#include <linux/init.h> -+#include <linux/mm.h> -+#include <xen/xenbus.h> -+#include <xen/events.h> -+#include <xen/grant_table.h> -+#include <xen/page.h> -+#include <linux/spinlock.h> -+#include <linux/pci.h> -+#include <linux/msi.h> -+#include <xen/xenbus.h> -+#include <xen/interface/io/pciif.h> -+#include <asm/xen/pci.h> -+#include <linux/interrupt.h> -+#include <asm/atomic.h> -+#include <linux/workqueue.h> -+#include <linux/bitops.h> -+#include <linux/time.h> -+ -+ -+#ifndef __init_refok -+#define __init_refok -+#endif -+ -+#define INVALID_GRANT_REF (0) -+#define INVALID_EVTCHN (-1) -+ -+ -+struct pci_bus_entry { -+ struct list_head list; -+ struct pci_bus *bus; -+}; -+ -+#define _PDEVB_op_active (0) -+#define PDEVB_op_active (1 << (_PDEVB_op_active)) -+ -+struct pcifront_device { -+ struct xenbus_device *xdev; -+ struct list_head root_buses; -+ -+ int evtchn; -+ int gnt_ref; -+ -+ int irq; -+ -+ /* Lock this when doing any operations in sh_info */ -+ spinlock_t sh_info_lock; -+ struct xen_pci_sharedinfo *sh_info; -+ struct work_struct op_work; -+ unsigned long flags; -+ -+}; -+ -+struct pcifront_sd { -+ int domain; -+ struct pcifront_device *pdev; -+}; -+ -+static inline struct pcifront_device * -+pcifront_get_pdev(struct pcifront_sd *sd) -+{ -+ return sd->pdev; -+} -+ -+static inline void pcifront_init_sd(struct pcifront_sd *sd, -+ unsigned int domain, unsigned int bus, -+ struct pcifront_device *pdev) -+{ -+ sd->domain = domain; -+ sd->pdev = pdev; -+} -+ -+static inline void pcifront_setup_root_resources(struct pci_bus *bus, -+ struct pcifront_sd *sd) -+{ -+} -+ -+ -+DEFINE_SPINLOCK(pcifront_dev_lock); -+static struct pcifront_device *pcifront_dev; -+ -+static int verbose_request; -+module_param(verbose_request, int, 0644); -+ -+static int errno_to_pcibios_err(int errno) -+{ -+ switch (errno) { -+ case XEN_PCI_ERR_success: -+ return PCIBIOS_SUCCESSFUL; -+ -+ case XEN_PCI_ERR_dev_not_found: -+ return PCIBIOS_DEVICE_NOT_FOUND; -+ -+ case XEN_PCI_ERR_invalid_offset: -+ case XEN_PCI_ERR_op_failed: -+ return PCIBIOS_BAD_REGISTER_NUMBER; -+ -+ case XEN_PCI_ERR_not_implemented: -+ return PCIBIOS_FUNC_NOT_SUPPORTED; -+ -+ case XEN_PCI_ERR_access_denied: -+ return PCIBIOS_SET_FAILED; -+ } -+ return errno; -+} -+ -+static inline void schedule_pcifront_aer_op(struct pcifront_device *pdev) -+{ -+ if (test_bit(_XEN_PCIB_active, (unsigned long *)&pdev->sh_info->flags) -+ && !test_and_set_bit(_PDEVB_op_active, &pdev->flags)) { -+ dev_dbg(&pdev->xdev->dev, "schedule aer frontend job\n"); -+ schedule_work(&pdev->op_work); -+ } -+} -+ -+static int do_pci_op(struct pcifront_device *pdev, struct xen_pci_op *op) -+{ -+ int err = 0; -+ struct xen_pci_op *active_op = &pdev->sh_info->op; -+ unsigned long irq_flags; -+ evtchn_port_t port = pdev->evtchn; -+ unsigned irq = pdev->irq; -+ s64 ns, ns_timeout; -+ struct timeval tv; -+ -+ spin_lock_irqsave(&pdev->sh_info_lock, irq_flags); -+ -+ memcpy(active_op, op, sizeof(struct xen_pci_op)); -+ -+ /* Go */ -+ wmb(); -+ set_bit(_XEN_PCIF_active, (unsigned long *)&pdev->sh_info->flags); -+ notify_remote_via_evtchn(port); -+ -+ /* -+ * We set a poll timeout of 3 seconds but give up on return after -+ * 2 seconds. It is better to time out too late rather than too early -+ * (in the latter case we end up continually re-executing poll() with a -+ * timeout in the past). 1s difference gives plenty of slack for error. -+ */ -+ do_gettimeofday(&tv); -+ ns_timeout = timeval_to_ns(&tv) + 2 * (s64)NSEC_PER_SEC; -+ -+ xen_clear_irq_pending(irq); -+ -+ while (test_bit(_XEN_PCIF_active, -+ (unsigned long *)&pdev->sh_info->flags)) { -+ xen_poll_irq_timeout(irq, jiffies + 3*HZ); -+ xen_clear_irq_pending(irq); -+ do_gettimeofday(&tv); -+ ns = timeval_to_ns(&tv); -+ if (ns > ns_timeout) { -+ dev_err(&pdev->xdev->dev, -+ "pciback not responding!!!\n"); -+ clear_bit(_XEN_PCIF_active, -+ (unsigned long *)&pdev->sh_info->flags); -+ err = XEN_PCI_ERR_dev_not_found; -+ goto out; -+ } -+ } -+ -+ /* -+ * We might lose backend service request since we -+ * reuse same evtchn with pci_conf backend response. So re-schedule -+ * aer pcifront service. -+ */ -+ if (test_bit(_XEN_PCIB_active, -+ (unsigned long *)&pdev->sh_info->flags)) { -+ dev_err(&pdev->xdev->dev, -+ "schedule aer pcifront service\n"); -+ schedule_pcifront_aer_op(pdev); -+ } -+ -+ memcpy(op, active_op, sizeof(struct xen_pci_op)); -+ -+ err = op->err; -+out: -+ spin_unlock_irqrestore(&pdev->sh_info_lock, irq_flags); -+ return err; -+} -+ -+/* Access to this function is spinlocked in drivers/pci/access.c */ -+static int pcifront_bus_read(struct pci_bus *bus, unsigned int devfn, -+ int where, int size, u32 *val) -+{ -+ int err = 0; -+ struct xen_pci_op op = { -+ .cmd = XEN_PCI_OP_conf_read, -+ .domain = pci_domain_nr(bus), -+ .bus = bus->number, -+ .devfn = devfn, -+ .offset = where, -+ .size = size, -+ }; -+ struct pcifront_sd *sd = bus->sysdata; -+ struct pcifront_device *pdev = pcifront_get_pdev(sd); -+ -+ if (verbose_request) -+ dev_info(&pdev->xdev->dev, -+ "read dev=%04x:%02x:%02x.%01x - offset %x size %d\n", -+ pci_domain_nr(bus), bus->number, PCI_SLOT(devfn), -+ PCI_FUNC(devfn), where, size); -+ -+ err = do_pci_op(pdev, &op); -+ -+ if (likely(!err)) { -+ if (verbose_request) -+ dev_info(&pdev->xdev->dev, "read got back value %x\n", -+ op.value); -+ -+ *val = op.value; -+ } else if (err == -ENODEV) { -+ /* No device here, pretend that it just returned 0 */ -+ err = 0; -+ *val = 0; -+ } -+ -+ return errno_to_pcibios_err(err); -+} -+ -+/* Access to this function is spinlocked in drivers/pci/access.c */ -+static int pcifront_bus_write(struct pci_bus *bus, unsigned int devfn, -+ int where, int size, u32 val) -+{ -+ struct xen_pci_op op = { -+ .cmd = XEN_PCI_OP_conf_write, -+ .domain = pci_domain_nr(bus), -+ .bus = bus->number, -+ .devfn = devfn, -+ .offset = where, -+ .size = size, -+ .value = val, -+ }; -+ struct pcifront_sd *sd = bus->sysdata; -+ struct pcifront_device *pdev = pcifront_get_pdev(sd); -+ -+ if (verbose_request) -+ dev_info(&pdev->xdev->dev, -+ "write dev=%04x:%02x:%02x.%01x - " -+ "offset %x size %d val %x\n", -+ pci_domain_nr(bus), bus->number, -+ PCI_SLOT(devfn), PCI_FUNC(devfn), where, size, val); -+ -+ return errno_to_pcibios_err(do_pci_op(pdev, &op)); -+} -+ -+struct pci_ops pcifront_bus_ops = { -+ .read = pcifront_bus_read, -+ .write = pcifront_bus_write, -+}; -+ -+#ifdef CONFIG_PCI_MSI -+static int pci_frontend_enable_msix(struct pci_dev *dev, -+ int **vector, int nvec) -+{ -+ int err; -+ int i; -+ struct xen_pci_op op = { -+ .cmd = XEN_PCI_OP_enable_msix, -+ .domain = pci_domain_nr(dev->bus), -+ .bus = dev->bus->number, -+ .devfn = dev->devfn, -+ .value = nvec, -+ }; -+ struct pcifront_sd *sd = dev->bus->sysdata; -+ struct pcifront_device *pdev = pcifront_get_pdev(sd); -+ struct msi_desc *entry; -+ -+ if (nvec > SH_INFO_MAX_VEC) { -+ dev_err(&dev->dev, "too much vector for pci frontend: %x." -+ " Increase SH_INFO_MAX_VEC.\n", nvec); -+ return -EINVAL; -+ } -+ -+ i = 0; -+ list_for_each_entry(entry, &dev->msi_list, list) { -+ op.msix_entries[i].entry = entry->msi_attrib.entry_nr; -+ /* Vector is useless at this point. */ -+ op.msix_entries[i].vector = -1; -+ i++; -+ } -+ -+ err = do_pci_op(pdev, &op); -+ -+ if (likely(!err)) { -+ if (likely(!op.value)) { -+ /* we get the result */ -+ for (i = 0; i < nvec; i++) -+ *(*vector+i) = op.msix_entries[i].vector; -+ return 0; -+ } else { -+ printk(KERN_DEBUG "enable msix get value %x\n", -+ op.value); -+ return op.value; -+ } -+ } else { -+ dev_err(&dev->dev, "enable msix get err %x\n", err); -+ return err; -+ } -+} -+ -+static void pci_frontend_disable_msix(struct pci_dev *dev) -+{ -+ int err; -+ struct xen_pci_op op = { -+ .cmd = XEN_PCI_OP_disable_msix, -+ .domain = pci_domain_nr(dev->bus), -+ .bus = dev->bus->number, -+ .devfn = dev->devfn, -+ }; -+ struct pcifront_sd *sd = dev->bus->sysdata; -+ struct pcifront_device *pdev = pcifront_get_pdev(sd); -+ -+ err = do_pci_op(pdev, &op); -+ -+ /* What should do for error ? */ -+ if (err) -+ dev_err(&dev->dev, "pci_disable_msix get err %x\n", err); -+} -+ -+static int pci_frontend_enable_msi(struct pci_dev *dev, int **vector) -+{ -+ int err; -+ struct xen_pci_op op = { -+ .cmd = XEN_PCI_OP_enable_msi, -+ .domain = pci_domain_nr(dev->bus), -+ .bus = dev->bus->number, -+ .devfn = dev->devfn, -+ }; -+ struct pcifront_sd *sd = dev->bus->sysdata; -+ struct pcifront_device *pdev = pcifront_get_pdev(sd); -+ -+ err = do_pci_op(pdev, &op); -+ if (likely(!err)) { -+ *(*vector) = op.value; -+ } else { -+ dev_err(&dev->dev, "pci frontend enable msi failed for dev " -+ "%x:%x \n", op.bus, op.devfn); -+ err = -EINVAL; -+ } -+ return err; -+} -+ -+static void pci_frontend_disable_msi(struct pci_dev *dev) -+{ -+ int err; -+ struct xen_pci_op op = { -+ .cmd = XEN_PCI_OP_disable_msi, -+ .domain = pci_domain_nr(dev->bus), -+ .bus = dev->bus->number, -+ .devfn = dev->devfn, -+ }; -+ struct pcifront_sd *sd = dev->bus->sysdata; -+ struct pcifront_device *pdev = pcifront_get_pdev(sd); -+ -+ err = do_pci_op(pdev, &op); -+ if (err == XEN_PCI_ERR_dev_not_found) { -+ /* XXX No response from backend, what shall we do? */ -+ printk(KERN_DEBUG "get no response from backend for disable MSI\n"); -+ return; -+ } -+ if (err) -+ /* how can pciback notify us fail? */ -+ printk(KERN_DEBUG "get fake response frombackend \n"); -+} -+ -+static struct xen_pci_frontend_ops pci_frontend_ops = { -+ .enable_msi = pci_frontend_enable_msi, -+ .disable_msi = pci_frontend_disable_msi, -+ .enable_msix = pci_frontend_enable_msix, -+ .disable_msix = pci_frontend_disable_msix, -+}; -+ -+static void pci_frontend_registrar(int enable) -+{ -+ if (enable) -+ xen_pci_frontend = &pci_frontend_ops; -+ else -+ xen_pci_frontend = NULL; -+}; -+#else -+static inline void pci_frontend_registrar(int enable) { }; -+#endif /* CONFIG_PCI_MSI */ -+ -+/* Claim resources for the PCI frontend as-is, backend won't allow changes */ -+static int pcifront_claim_resource(struct pci_dev *dev, void *data) -+{ -+ struct pcifront_device *pdev = data; -+ int i; -+ struct resource *r; -+ -+ for (i = 0; i < PCI_NUM_RESOURCES; i++) { -+ r = &dev->resource[i]; -+ -+ if (!r->parent && r->start && r->flags) { -+ dev_info(&pdev->xdev->dev, "claiming resource %s/%d\n", -+ pci_name(dev), i); -+ if (pci_claim_resource(dev, i)) { -+ dev_err(&pdev->xdev->dev, "Could not claim " -+ "resource %s/%d! Device offline. Try " -+ "giving less than 4GB to domain.\n", -+ pci_name(dev), i); -+ } -+ } -+ } -+ -+ return 0; -+} -+ -+int __devinit pcifront_scan_bus(struct pcifront_device *pdev, -+ unsigned int domain, unsigned int bus, -+ struct pci_bus *b) -+{ -+ struct pci_dev *d; -+ unsigned int devfn; -+ int err; -+ -+ /* Scan the bus for functions and add. -+ * We omit handling of PCI bridge attachment because pciback prevents -+ * bridges from being exported. -+ */ -+ for (devfn = 0; devfn < 0x100; devfn++) { -+ d = pci_get_slot(b, devfn); -+ if (d) { -+ /* Device is already known. */ -+ pci_dev_put(d); -+ continue; -+ } -+ -+ d = pci_scan_single_device(b, devfn); -+ if (d) -+ dev_info(&pdev->xdev->dev, "New device on " -+ "%04x:%02x:%02x.%02x found.\n", domain, bus, -+ PCI_SLOT(devfn), PCI_FUNC(devfn)); -+ } -+ -+ return 0; -+} -+ -+int __devinit pcifront_scan_root(struct pcifront_device *pdev, -+ unsigned int domain, unsigned int bus) -+{ -+ struct pci_bus *b; -+ struct pcifront_sd *sd = NULL; -+ struct pci_bus_entry *bus_entry = NULL; -+ int err = 0; -+ -+#ifndef CONFIG_PCI_DOMAINS -+ if (domain != 0) { -+ dev_err(&pdev->xdev->dev, -+ "PCI Root in non-zero PCI Domain! domain=%d\n", domain); -+ dev_err(&pdev->xdev->dev, -+ "Please compile with CONFIG_PCI_DOMAINS\n"); -+ err = -EINVAL; -+ goto err_out; -+ } -+#endif -+ -+ dev_info(&pdev->xdev->dev, "Creating PCI Frontend Bus %04x:%02x\n", -+ domain, bus); -+ -+ bus_entry = kmalloc(sizeof(*bus_entry), GFP_KERNEL); -+ sd = kmalloc(sizeof(*sd), GFP_KERNEL); -+ if (!bus_entry || !sd) { -+ err = -ENOMEM; -+ goto err_out; -+ } -+ pcifront_init_sd(sd, domain, bus, pdev); -+ -+ b = pci_scan_bus_parented(&pdev->xdev->dev, bus, -+ &pcifront_bus_ops, sd); -+ if (!b) { -+ dev_err(&pdev->xdev->dev, -+ "Error creating PCI Frontend Bus!\n"); -+ err = -ENOMEM; -+ goto err_out; -+ } -+ -+ pcifront_setup_root_resources(b, sd); -+ bus_entry->bus = b; -+ -+ list_add(&bus_entry->list, &pdev->root_buses); -+ -+ /* pci_scan_bus_parented skips devices which do not have a have -+ * devfn==0. The pcifront_scan_bus enumerates all devfn. */ -+ err = pcifront_scan_bus(pdev, domain, bus, b); -+ -+ /* Claim resources before going "live" with our devices */ -+ pci_walk_bus(b, pcifront_claim_resource, pdev); -+ -+ /* Create SysFS and notify udev of the devices. Aka: "going live" */ -+ pci_bus_add_devices(b); -+ -+ return err; -+ -+err_out: -+ kfree(bus_entry); -+ kfree(sd); -+ -+ return err; -+} -+ -+int __devinit pcifront_rescan_root(struct pcifront_device *pdev, -+ unsigned int domain, unsigned int bus) -+{ -+ int err; -+ struct pci_bus *b; -+ -+#ifndef CONFIG_PCI_DOMAINS -+ if (domain != 0) { -+ dev_err(&pdev->xdev->dev, -+ "PCI Root in non-zero PCI Domain! domain=%d\n", domain); -+ dev_err(&pdev->xdev->dev, -+ "Please compile with CONFIG_PCI_DOMAINS\n"); -+ return -EINVAL; -+ } -+#endif -+ -+ dev_info(&pdev->xdev->dev, "Rescanning PCI Frontend Bus %04x:%02x\n", -+ domain, bus); -+ -+ b = pci_find_bus(domain, bus); -+ if (!b) -+ /* If the bus is unknown, create it. */ -+ return pcifront_scan_root(pdev, domain, bus); -+ -+ err = pcifront_scan_bus(pdev, domain, bus, b); -+ -+ /* Claim resources before going "live" with our devices */ -+ pci_walk_bus(b, pcifront_claim_resource, pdev); -+ -+ /* Create SysFS and notify udev of the devices. Aka: "going live" */ -+ pci_bus_add_devices(b); -+ -+ return err; -+} -+ -+static void free_root_bus_devs(struct pci_bus *bus) -+{ -+ struct pci_dev *dev; -+ -+ while (!list_empty(&bus->devices)) { -+ dev = container_of(bus->devices.next, struct pci_dev, -+ bus_list); -+ dev_dbg(&dev->dev, "removing device\n"); -+ pci_remove_bus_device(dev); -+ } -+} -+ -+void pcifront_free_roots(struct pcifront_device *pdev) -+{ -+ struct pci_bus_entry *bus_entry, *t; -+ -+ dev_dbg(&pdev->xdev->dev, "cleaning up root buses\n"); -+ -+ list_for_each_entry_safe(bus_entry, t, &pdev->root_buses, list) { -+ list_del(&bus_entry->list); -+ -+ free_root_bus_devs(bus_entry->bus); -+ -+ kfree(bus_entry->bus->sysdata); -+ -+ device_unregister(bus_entry->bus->bridge); -+ pci_remove_bus(bus_entry->bus); -+ -+ kfree(bus_entry); -+ } -+} -+ -+static pci_ers_result_t pcifront_common_process(int cmd, -+ struct pcifront_device *pdev, -+ pci_channel_state_t state) -+{ -+ pci_ers_result_t result; -+ struct pci_driver *pdrv; -+ int bus = pdev->sh_info->aer_op.bus; -+ int devfn = pdev->sh_info->aer_op.devfn; -+ struct pci_dev *pcidev; -+ int flag = 0; -+ -+ dev_dbg(&pdev->xdev->dev, -+ "pcifront AER process: cmd %x (bus:%x, devfn%x)", -+ cmd, bus, devfn); -+ result = PCI_ERS_RESULT_NONE; -+ -+ pcidev = pci_get_bus_and_slot(bus, devfn); -+ if (!pcidev || !pcidev->driver) { -+ dev_err(&pcidev->dev, -+ "device or driver is NULL\n"); -+ return result; -+ } -+ pdrv = pcidev->driver; -+ -+ if (get_driver(&pdrv->driver)) { -+ if (pdrv->err_handler && pdrv->err_handler->error_detected) { -+ dev_dbg(&pcidev->dev, -+ "trying to call AER service\n"); -+ if (pcidev) { -+ flag = 1; -+ switch (cmd) { -+ case XEN_PCI_OP_aer_detected: -+ result = pdrv->err_handler-> -+ error_detected(pcidev, state); -+ break; -+ case XEN_PCI_OP_aer_mmio: -+ result = pdrv->err_handler-> -+ mmio_enabled(pcidev); -+ break; -+ case XEN_PCI_OP_aer_slotreset: -+ result = pdrv->err_handler-> -+ slot_reset(pcidev); -+ break; -+ case XEN_PCI_OP_aer_resume: -+ pdrv->err_handler->resume(pcidev); -+ break; -+ default: -+ dev_err(&pdev->xdev->dev, -+ "bad request in aer recovery " -+ "operation!\n"); -+ -+ } -+ } -+ } -+ put_driver(&pdrv->driver); -+ } -+ if (!flag) -+ result = PCI_ERS_RESULT_NONE; -+ -+ return result; -+} -+ -+ -+void pcifront_do_aer(struct work_struct *data) -+{ -+ struct pcifront_device *pdev = -+ container_of(data, struct pcifront_device, op_work); -+ int cmd = pdev->sh_info->aer_op.cmd; -+ pci_channel_state_t state = -+ (pci_channel_state_t)pdev->sh_info->aer_op.err; -+ -+ /*If a pci_conf op is in progress, -+ we have to wait until it is done before service aer op*/ -+ dev_dbg(&pdev->xdev->dev, -+ "pcifront service aer bus %x devfn %x\n", -+ pdev->sh_info->aer_op.bus, pdev->sh_info->aer_op.devfn); -+ -+ pdev->sh_info->aer_op.err = pcifront_common_process(cmd, pdev, state); -+ -+ wmb(); -+ clear_bit(_XEN_PCIB_active, (unsigned long *)&pdev->sh_info->flags); -+ notify_remote_via_evtchn(pdev->evtchn); -+ -+ /*in case of we lost an aer request in four lines time_window*/ -+ smp_mb__before_clear_bit(); -+ clear_bit(_PDEVB_op_active, &pdev->flags); -+ smp_mb__after_clear_bit(); -+ -+ schedule_pcifront_aer_op(pdev); -+ -+} -+ -+irqreturn_t pcifront_handler_aer(int irq, void *dev) -+{ -+ struct pcifront_device *pdev = dev; -+ schedule_pcifront_aer_op(pdev); -+ return IRQ_HANDLED; -+} -+int pcifront_connect(struct pcifront_device *pdev) -+{ -+ int err = 0; -+ -+ spin_lock(&pcifront_dev_lock); -+ -+ if (!pcifront_dev) { -+ dev_info(&pdev->xdev->dev, "Installing PCI frontend\n"); -+ pcifront_dev = pdev; -+ } else { -+ dev_err(&pdev->xdev->dev, "PCI frontend already installed!\n"); -+ err = -EEXIST; -+ } -+ -+ spin_unlock(&pcifront_dev_lock); -+ -+ return err; -+} -+ -+void pcifront_disconnect(struct pcifront_device *pdev) -+{ -+ spin_lock(&pcifront_dev_lock); -+ -+ if (pdev == pcifront_dev) { -+ dev_info(&pdev->xdev->dev, -+ "Disconnecting PCI Frontend Buses\n"); -+ pcifront_dev = NULL; -+ } -+ -+ spin_unlock(&pcifront_dev_lock); -+} -+static struct pcifront_device *alloc_pdev(struct xenbus_device *xdev) -+{ -+ struct pcifront_device *pdev; -+ -+ pdev = kzalloc(sizeof(struct pcifront_device), GFP_KERNEL); -+ if (pdev == NULL) -+ goto out; -+ -+ pdev->sh_info = -+ (struct xen_pci_sharedinfo *)__get_free_page(GFP_KERNEL); -+ if (pdev->sh_info == NULL) { -+ kfree(pdev); -+ pdev = NULL; -+ goto out; -+ } -+ pdev->sh_info->flags = 0; -+ -+ /*Flag for registering PV AER handler*/ -+ set_bit(_XEN_PCIB_AERHANDLER, (void *)&pdev->sh_info->flags); -+ -+ dev_set_drvdata(&xdev->dev, pdev); -+ pdev->xdev = xdev; -+ -+ INIT_LIST_HEAD(&pdev->root_buses); -+ -+ spin_lock_init(&pdev->sh_info_lock); -+ -+ pdev->evtchn = INVALID_EVTCHN; -+ pdev->gnt_ref = INVALID_GRANT_REF; -+ pdev->irq = -1; -+ -+ INIT_WORK(&pdev->op_work, pcifront_do_aer); -+ -+ dev_dbg(&xdev->dev, "Allocated pdev @ 0x%p pdev->sh_info @ 0x%p\n", -+ pdev, pdev->sh_info); -+out: -+ return pdev; -+} -+ -+static void free_pdev(struct pcifront_device *pdev) -+{ -+ dev_dbg(&pdev->xdev->dev, "freeing pdev @ 0x%p\n", pdev); -+ -+ pcifront_free_roots(pdev); -+ -+ /*For PCIE_AER error handling job*/ -+ flush_scheduled_work(); -+ unbind_from_irqhandler(pdev->irq, pdev); -+ -+ if (pdev->evtchn != INVALID_EVTCHN) -+ xenbus_free_evtchn(pdev->xdev, pdev->evtchn); -+ -+ if (pdev->gnt_ref != INVALID_GRANT_REF) -+ gnttab_end_foreign_access(pdev->gnt_ref, 0 /* r/w page */, -+ (unsigned long)pdev->sh_info); -+ -+ dev_set_drvdata(&pdev->xdev->dev, NULL); -+ kfree(pdev); -+} -+ -+static int pcifront_publish_info(struct pcifront_device *pdev) -+{ -+ int err = 0; -+ struct xenbus_transaction trans; -+ -+ err = xenbus_grant_ring(pdev->xdev, virt_to_mfn(pdev->sh_info)); -+ if (err < 0) -+ goto out; -+ -+ pdev->gnt_ref = err; -+ -+ err = xenbus_alloc_evtchn(pdev->xdev, &pdev->evtchn); -+ if (err) -+ goto out; -+ -+ err = bind_evtchn_to_irqhandler(pdev->evtchn, pcifront_handler_aer, -+ 0, "pcifront", pdev); -+ if (err < 0) { -+ xenbus_free_evtchn(pdev->xdev, pdev->evtchn); -+ xenbus_dev_fatal(pdev->xdev, err, "Failed to bind evtchn to " -+ "irqhandler.\n"); -+ return err; -+ } -+ pdev->irq = err; -+ -+do_publish: -+ err = xenbus_transaction_start(&trans); -+ if (err) { -+ xenbus_dev_fatal(pdev->xdev, err, -+ "Error writing configuration for backend " -+ "(start transaction)"); -+ goto out; -+ } -+ -+ err = xenbus_printf(trans, pdev->xdev->nodename, -+ "pci-op-ref", "%u", pdev->gnt_ref); -+ if (!err) -+ err = xenbus_printf(trans, pdev->xdev->nodename, -+ "event-channel", "%u", pdev->evtchn); -+ if (!err) -+ err = xenbus_printf(trans, pdev->xdev->nodename, -+ "magic", XEN_PCI_MAGIC); -+ -+ if (err) { -+ xenbus_transaction_end(trans, 1); -+ xenbus_dev_fatal(pdev->xdev, err, -+ "Error writing configuration for backend"); -+ goto out; -+ } else { -+ err = xenbus_transaction_end(trans, 0); -+ if (err == -EAGAIN) -+ goto do_publish; -+ else if (err) { -+ xenbus_dev_fatal(pdev->xdev, err, -+ "Error completing transaction " -+ "for backend"); -+ goto out; -+ } -+ } -+ -+ xenbus_switch_state(pdev->xdev, XenbusStateInitialised); -+ -+ dev_dbg(&pdev->xdev->dev, "publishing successful!\n"); -+ -+out: -+ return err; -+} -+ -+static int __devinit pcifront_try_connect(struct pcifront_device *pdev) -+{ -+ int err = -EFAULT; -+ int i, num_roots, len; -+ char str[64]; -+ unsigned int domain, bus; -+ -+ -+ /* Only connect once */ -+ if (xenbus_read_driver_state(pdev->xdev->nodename) != -+ XenbusStateInitialised) -+ goto out; -+ -+ err = pcifront_connect(pdev); -+ if (err) { -+ xenbus_dev_fatal(pdev->xdev, err, -+ "Error connecting PCI Frontend"); -+ goto out; -+ } -+ -+ err = xenbus_scanf(XBT_NIL, pdev->xdev->otherend, -+ "root_num", "%d", &num_roots); -+ if (err == -ENOENT) { -+ xenbus_dev_error(pdev->xdev, err, -+ "No PCI Roots found, trying 0000:00"); -+ err = pcifront_scan_root(pdev, 0, 0); -+ num_roots = 0; -+ } else if (err != 1) { -+ if (err == 0) -+ err = -EINVAL; -+ xenbus_dev_fatal(pdev->xdev, err, -+ "Error reading number of PCI roots"); -+ goto out; -+ } -+ -+ for (i = 0; i < num_roots; i++) { -+ len = snprintf(str, sizeof(str), "root-%d", i); -+ if (unlikely(len >= (sizeof(str) - 1))) { -+ err = -ENOMEM; -+ goto out; -+ } -+ -+ err = xenbus_scanf(XBT_NIL, pdev->xdev->otherend, str, -+ "%x:%x", &domain, &bus); -+ if (err != 2) { -+ if (err >= 0) -+ err = -EINVAL; -+ xenbus_dev_fatal(pdev->xdev, err, -+ "Error reading PCI root %d", i); -+ goto out; -+ } -+ -+ err = pcifront_scan_root(pdev, domain, bus); -+ if (err) { -+ xenbus_dev_fatal(pdev->xdev, err, -+ "Error scanning PCI root %04x:%02x", -+ domain, bus); -+ goto out; -+ } -+ } -+ -+ err = xenbus_switch_state(pdev->xdev, XenbusStateConnected); -+ -+out: -+ return err; -+} -+ -+static int pcifront_try_disconnect(struct pcifront_device *pdev) -+{ -+ int err = 0; -+ enum xenbus_state prev_state; -+ -+ -+ prev_state = xenbus_read_driver_state(pdev->xdev->nodename); -+ -+ if (prev_state >= XenbusStateClosing) -+ goto out; -+ -+ if (prev_state == XenbusStateConnected) { -+ pcifront_free_roots(pdev); -+ pcifront_disconnect(pdev); -+ } -+ -+ err = xenbus_switch_state(pdev->xdev, XenbusStateClosed); -+ -+out: -+ -+ return err; -+} -+ -+static int __devinit pcifront_attach_devices(struct pcifront_device *pdev) -+{ -+ int err = -EFAULT; -+ int i, num_roots, len; -+ unsigned int domain, bus; -+ char str[64]; -+ -+ if (xenbus_read_driver_state(pdev->xdev->nodename) != -+ XenbusStateReconfiguring) -+ goto out; -+ -+ err = xenbus_scanf(XBT_NIL, pdev->xdev->otherend, -+ "root_num", "%d", &num_roots); -+ if (err == -ENOENT) { -+ xenbus_dev_error(pdev->xdev, err, -+ "No PCI Roots found, trying 0000:00"); -+ err = pcifront_rescan_root(pdev, 0, 0); -+ num_roots = 0; -+ } else if (err != 1) { -+ if (err == 0) -+ err = -EINVAL; -+ xenbus_dev_fatal(pdev->xdev, err, -+ "Error reading number of PCI roots"); -+ goto out; -+ } -+ -+ for (i = 0; i < num_roots; i++) { -+ len = snprintf(str, sizeof(str), "root-%d", i); -+ if (unlikely(len >= (sizeof(str) - 1))) { -+ err = -ENOMEM; -+ goto out; -+ } -+ -+ err = xenbus_scanf(XBT_NIL, pdev->xdev->otherend, str, -+ "%x:%x", &domain, &bus); -+ if (err != 2) { -+ if (err >= 0) -+ err = -EINVAL; -+ xenbus_dev_fatal(pdev->xdev, err, -+ "Error reading PCI root %d", i); -+ goto out; -+ } -+ -+ err = pcifront_rescan_root(pdev, domain, bus); -+ if (err) { -+ xenbus_dev_fatal(pdev->xdev, err, -+ "Error scanning PCI root %04x:%02x", -+ domain, bus); -+ goto out; -+ } -+ } -+ -+ xenbus_switch_state(pdev->xdev, XenbusStateConnected); -+ -+out: -+ return err; -+} -+ -+static int pcifront_detach_devices(struct pcifront_device *pdev) -+{ -+ int err = 0; -+ int i, num_devs; -+ unsigned int domain, bus, slot, func; -+ struct pci_bus *pci_bus; -+ struct pci_dev *pci_dev; -+ char str[64]; -+ -+ if (xenbus_read_driver_state(pdev->xdev->nodename) != -+ XenbusStateConnected) -+ goto out; -+ -+ err = xenbus_scanf(XBT_NIL, pdev->xdev->otherend, "num_devs", "%d", -+ &num_devs); -+ if (err != 1) { -+ if (err >= 0) -+ err = -EINVAL; -+ xenbus_dev_fatal(pdev->xdev, err, -+ "Error reading number of PCI devices"); -+ goto out; -+ } -+ -+ /* Find devices being detached and remove them. */ -+ for (i = 0; i < num_devs; i++) { -+ int l, state; -+ l = snprintf(str, sizeof(str), "state-%d", i); -+ if (unlikely(l >= (sizeof(str) - 1))) { -+ err = -ENOMEM; -+ goto out; -+ } -+ err = xenbus_scanf(XBT_NIL, pdev->xdev->otherend, str, "%d", -+ &state); -+ if (err != 1) -+ state = XenbusStateUnknown; -+ -+ if (state != XenbusStateClosing) -+ continue; -+ -+ /* Remove device. */ -+ l = snprintf(str, sizeof(str), "vdev-%d", i); -+ if (unlikely(l >= (sizeof(str) - 1))) { -+ err = -ENOMEM; -+ goto out; -+ } -+ err = xenbus_scanf(XBT_NIL, pdev->xdev->otherend, str, -+ "%x:%x:%x.%x", &domain, &bus, &slot, &func); -+ if (err != 4) { -+ if (err >= 0) -+ err = -EINVAL; -+ xenbus_dev_fatal(pdev->xdev, err, -+ "Error reading PCI device %d", i); -+ goto out; -+ } -+ -+ pci_bus = pci_find_bus(domain, bus); -+ if (!pci_bus) { -+ dev_dbg(&pdev->xdev->dev, "Cannot get bus %04x:%02x\n", -+ domain, bus); -+ continue; -+ } -+ pci_dev = pci_get_slot(pci_bus, PCI_DEVFN(slot, func)); -+ if (!pci_dev) { -+ dev_dbg(&pdev->xdev->dev, -+ "Cannot get PCI device %04x:%02x:%02x.%02x\n", -+ domain, bus, slot, func); -+ continue; -+ } -+ pci_remove_bus_device(pci_dev); -+ pci_dev_put(pci_dev); -+ -+ dev_dbg(&pdev->xdev->dev, -+ "PCI device %04x:%02x:%02x.%02x removed.\n", -+ domain, bus, slot, func); -+ } -+ -+ err = xenbus_switch_state(pdev->xdev, XenbusStateReconfiguring); -+ -+out: -+ return err; -+} -+ -+static void __init_refok pcifront_backend_changed(struct xenbus_device *xdev, -+ enum xenbus_state be_state) -+{ -+ struct pcifront_device *pdev = dev_get_drvdata(&xdev->dev); -+ -+ switch (be_state) { -+ case XenbusStateUnknown: -+ case XenbusStateInitialising: -+ case XenbusStateInitWait: -+ case XenbusStateInitialised: -+ case XenbusStateClosed: -+ break; -+ -+ case XenbusStateConnected: -+ pcifront_try_connect(pdev); -+ break; -+ -+ case XenbusStateClosing: -+ dev_warn(&xdev->dev, "backend going away!\n"); -+ pcifront_try_disconnect(pdev); -+ break; -+ -+ case XenbusStateReconfiguring: -+ pcifront_detach_devices(pdev); -+ break; -+ -+ case XenbusStateReconfigured: -+ pcifront_attach_devices(pdev); -+ break; -+ } -+} -+ -+static int pcifront_xenbus_probe(struct xenbus_device *xdev, -+ const struct xenbus_device_id *id) -+{ -+ int err = 0; -+ struct pcifront_device *pdev = alloc_pdev(xdev); -+ -+ if (pdev == NULL) { -+ err = -ENOMEM; -+ xenbus_dev_fatal(xdev, err, -+ "Error allocating pcifront_device struct"); -+ goto out; -+ } -+ -+ err = pcifront_publish_info(pdev); -+ -+out: -+ return err; -+} -+ -+static int pcifront_xenbus_remove(struct xenbus_device *xdev) -+{ -+ struct pcifront_device *pdev = dev_get_drvdata(&xdev->dev); -+ -+ if (pdev) -+ free_pdev(pdev); -+ -+ return 0; -+} -+ -+static const struct xenbus_device_id xenpci_ids[] = { -+ {"pci"}, -+ {""}, -+}; -+ -+static struct xenbus_driver xenbus_pcifront_driver = { -+ .name = "pcifront", -+ .owner = THIS_MODULE, -+ .ids = xenpci_ids, -+ .probe = pcifront_xenbus_probe, -+ .remove = pcifront_xenbus_remove, -+ .otherend_changed = pcifront_backend_changed, -+}; -+ -+static int __init pcifront_init(void) -+{ -+ if (!xen_domain()) -+ return -ENODEV; -+ -+ pci_frontend_registrar(1 /* enable */); -+ -+ return xenbus_register_frontend(&xenbus_pcifront_driver); -+} -+ -+static void __exit pcifront_cleanup(void) -+{ -+ xenbus_unregister_driver(&xenbus_pcifront_driver); -+ pci_frontend_registrar(0 /* disable */); -+} -+module_init(pcifront_init); -+module_exit(pcifront_cleanup); -+ -+MODULE_DESCRIPTION("Xen PCI passthrough frontend."); -+MODULE_LICENSE("GPL"); -+MODULE_ALIAS("xen:pci"); -diff --git a/drivers/video/Kconfig b/drivers/video/Kconfig -index 188e1ba..efac9e3 100644 ---- a/drivers/video/Kconfig -+++ b/drivers/video/Kconfig -@@ -2063,6 +2063,7 @@ config XEN_FBDEV_FRONTEND - select FB_SYS_IMAGEBLIT - select FB_SYS_FOPS - select FB_DEFERRED_IO -+ select XEN_XENBUS_FRONTEND - default y - help - This driver implements the front-end of the Xen virtual -diff --git a/drivers/video/broadsheetfb.c b/drivers/video/broadsheetfb.c -index 509cb92..df9ccb9 100644 ---- a/drivers/video/broadsheetfb.c -+++ b/drivers/video/broadsheetfb.c -@@ -470,7 +470,7 @@ static int __devinit broadsheetfb_probe(struct platform_device *dev) - par->read_reg = broadsheet_read_reg; - init_waitqueue_head(&par->waitq); - -- info->flags = FBINFO_FLAG_DEFAULT; -+ info->flags = FBINFO_FLAG_DEFAULT | FBINFO_VIRTFB; - - info->fbdefio = &broadsheetfb_defio; - fb_deferred_io_init(info); -diff --git a/drivers/video/fb_defio.c b/drivers/video/fb_defio.c -index c27ab1e..94414fc 100644 ---- a/drivers/video/fb_defio.c -+++ b/drivers/video/fb_defio.c -@@ -144,7 +144,9 @@ static const struct address_space_operations fb_deferred_io_aops = { - static int fb_deferred_io_mmap(struct fb_info *info, struct vm_area_struct *vma) - { - vma->vm_ops = &fb_deferred_io_vm_ops; -- vma->vm_flags |= ( VM_IO | VM_RESERVED | VM_DONTEXPAND ); -+ vma->vm_flags |= ( VM_RESERVED | VM_DONTEXPAND ); -+ if (!(info->flags & FBINFO_VIRTFB)) -+ vma->vm_flags |= VM_IO; - vma->vm_private_data = info; - return 0; - } -diff --git a/drivers/video/fbmem.c b/drivers/video/fbmem.c -index 99bbd28..057433a 100644 ---- a/drivers/video/fbmem.c -+++ b/drivers/video/fbmem.c -@@ -1362,6 +1362,7 @@ fb_mmap(struct file *file, struct vm_area_struct * vma) - vma->vm_pgoff = off >> PAGE_SHIFT; - /* This is an IO map - tell maydump to skip this VMA */ - vma->vm_flags |= VM_IO | VM_RESERVED; -+ vma->vm_page_prot = vm_get_page_prot(vma->vm_flags); - fb_pgprotect(file, vma, off); - if (io_remap_pfn_range(vma, vma->vm_start, off >> PAGE_SHIFT, - vma->vm_end - vma->vm_start, vma->vm_page_prot)) -diff --git a/drivers/video/hecubafb.c b/drivers/video/hecubafb.c -index 0b4bffb..f9d77ad 100644 ---- a/drivers/video/hecubafb.c -+++ b/drivers/video/hecubafb.c -@@ -253,7 +253,7 @@ static int __devinit hecubafb_probe(struct platform_device *dev) - par->send_command = apollo_send_command; - par->send_data = apollo_send_data; - -- info->flags = FBINFO_FLAG_DEFAULT; -+ info->flags = FBINFO_FLAG_DEFAULT | FBINFO_VIRTFB; - - info->fbdefio = &hecubafb_defio; - fb_deferred_io_init(info); -diff --git a/drivers/video/metronomefb.c b/drivers/video/metronomefb.c -index df1f757..661bfd2 100644 ---- a/drivers/video/metronomefb.c -+++ b/drivers/video/metronomefb.c -@@ -700,7 +700,7 @@ static int __devinit metronomefb_probe(struct platform_device *dev) - if (retval < 0) - goto err_free_irq; - -- info->flags = FBINFO_FLAG_DEFAULT; -+ info->flags = FBINFO_FLAG_DEFAULT | FBINFO_VIRTFB; - - info->fbdefio = &metronomefb_defio; - fb_deferred_io_init(info); -diff --git a/drivers/video/xen-fbfront.c b/drivers/video/xen-fbfront.c -index 54cd916..dc72563 100644 ---- a/drivers/video/xen-fbfront.c -+++ b/drivers/video/xen-fbfront.c -@@ -25,7 +25,10 @@ - #include <linux/module.h> - #include <linux/vmalloc.h> - #include <linux/mm.h> -+ - #include <asm/xen/hypervisor.h> -+ -+#include <xen/xen.h> - #include <xen/events.h> - #include <xen/page.h> - #include <xen/interface/io/fbif.h> -@@ -440,7 +443,7 @@ static int __devinit xenfb_probe(struct xenbus_device *dev, - fb_info->fix.type = FB_TYPE_PACKED_PIXELS; - fb_info->fix.accel = FB_ACCEL_NONE; - -- fb_info->flags = FBINFO_FLAG_DEFAULT; -+ fb_info->flags = FBINFO_FLAG_DEFAULT | FBINFO_VIRTFB; - - ret = fb_alloc_cmap(&fb_info->cmap, 256, 0); - if (ret < 0) { -@@ -627,6 +630,8 @@ static void xenfb_backend_changed(struct xenbus_device *dev, - switch (backend_state) { - case XenbusStateInitialising: - case XenbusStateInitialised: -+ case XenbusStateReconfiguring: -+ case XenbusStateReconfigured: - case XenbusStateUnknown: - case XenbusStateClosed: - break; -@@ -680,7 +685,7 @@ static struct xenbus_driver xenfb_driver = { - - static int __init xenfb_init(void) - { -- if (!xen_domain()) -+ if (!xen_domain() || xen_hvm_domain()) - return -ENODEV; - - /* Nothing to do if running in dom0. */ -diff --git a/drivers/watchdog/Kconfig b/drivers/watchdog/Kconfig -index 3711b88..4fcb4c5 100644 ---- a/drivers/watchdog/Kconfig -+++ b/drivers/watchdog/Kconfig -@@ -975,6 +975,16 @@ config WATCHDOG_RIO - - # XTENSA Architecture - -+# Xen Architecture -+ -+config XEN_WDT -+ tristate "Xen Watchdog support" -+ depends on XEN -+ help -+ Say Y here to support the hypervisor watchdog capability provided -+ by Xen 4.0 and newer. The watchdog timeout period is normally one -+ minute but can be changed with a boot-time parameter. -+ - # - # ISA-based Watchdog Cards - # -diff --git a/drivers/watchdog/Makefile b/drivers/watchdog/Makefile -index 699199b..2f6739a 100644 ---- a/drivers/watchdog/Makefile -+++ b/drivers/watchdog/Makefile -@@ -141,6 +141,9 @@ obj-$(CONFIG_WATCHDOG_CP1XXX) += cpwd.o - - # XTENSA Architecture - -+# Xen -+obj-$(CONFIG_XEN_WDT) += xen_wdt.o -+ - # Architecture Independant - obj-$(CONFIG_WM831X_WATCHDOG) += wm831x_wdt.o - obj-$(CONFIG_WM8350_WATCHDOG) += wm8350_wdt.o -diff --git a/drivers/watchdog/xen_wdt.c b/drivers/watchdog/xen_wdt.c -new file mode 100644 -index 0000000..bcfaafb ---- /dev/null -+++ b/drivers/watchdog/xen_wdt.c -@@ -0,0 +1,359 @@ -+/* -+ * Xen Watchdog Driver -+ * -+ * (c) Copyright 2010 Novell, Inc. -+ * -+ * This program is free software; you can redistribute it and/or -+ * modify it under the terms of the GNU General Public License -+ * as published by the Free Software Foundation; either version -+ * 2 of the License, or (at your option) any later version. -+ */ -+ -+#define DRV_NAME "wdt" -+#define DRV_VERSION "0.01" -+#define PFX DRV_NAME ": " -+ -+#include <linux/bug.h> -+#include <linux/errno.h> -+#include <linux/fs.h> -+#include <linux/hrtimer.h> -+#include <linux/kernel.h> -+#include <linux/ktime.h> -+#include <linux/init.h> -+#include <linux/miscdevice.h> -+#include <linux/module.h> -+#include <linux/moduleparam.h> -+#include <linux/platform_device.h> -+#include <linux/spinlock.h> -+#include <linux/uaccess.h> -+#include <linux/watchdog.h> -+#include <xen/xen.h> -+#include <asm/xen/hypercall.h> -+#include <xen/interface/sched.h> -+ -+static struct platform_device *platform_device; -+static DEFINE_SPINLOCK(wdt_lock); -+static struct sched_watchdog wdt; -+static __kernel_time_t wdt_expires; -+static bool is_active, expect_release; -+ -+#define WATCHDOG_TIMEOUT 60 /* in seconds */ -+static unsigned int timeout = WATCHDOG_TIMEOUT; -+module_param(timeout, uint, S_IRUGO); -+MODULE_PARM_DESC(timeout, "Watchdog timeout in seconds " -+ "(default=" __MODULE_STRING(WATCHDOG_TIMEOUT) ")"); -+ -+static bool nowayout = WATCHDOG_NOWAYOUT; -+module_param(nowayout, bool, S_IRUGO); -+MODULE_PARM_DESC(nowayout, "Watchdog cannot be stopped once started " -+ "(default=" __MODULE_STRING(WATCHDOG_NOWAYOUT) ")"); -+ -+static inline __kernel_time_t set_timeout(void) -+{ -+ wdt.timeout = timeout; -+ return ktime_to_timespec(ktime_get()).tv_sec + timeout; -+} -+ -+static int xen_wdt_start(void) -+{ -+ __kernel_time_t expires; -+ int err; -+ -+ spin_lock(&wdt_lock); -+ -+ expires = set_timeout(); -+ if (!wdt.id) -+ err = HYPERVISOR_sched_op(SCHEDOP_watchdog, &wdt); -+ else -+ err = -EBUSY; -+ if (err > 0) { -+ wdt.id = err; -+ wdt_expires = expires; -+ err = 0; -+ } else -+ BUG_ON(!err); -+ -+ spin_unlock(&wdt_lock); -+ -+ return err; -+} -+ -+static int xen_wdt_stop(void) -+{ -+ int err = 0; -+ -+ spin_lock(&wdt_lock); -+ -+ wdt.timeout = 0; -+ if (wdt.id) -+ err = HYPERVISOR_sched_op(SCHEDOP_watchdog, &wdt); -+ if (!err) -+ wdt.id = 0; -+ -+ spin_unlock(&wdt_lock); -+ -+ return err; -+} -+ -+static int xen_wdt_kick(void) -+{ -+ __kernel_time_t expires; -+ int err; -+ -+ spin_lock(&wdt_lock); -+ -+ expires = set_timeout(); -+ if (wdt.id) -+ err = HYPERVISOR_sched_op(SCHEDOP_watchdog, &wdt); -+ else -+ err = -ENXIO; -+ if (!err) -+ wdt_expires = expires; -+ -+ spin_unlock(&wdt_lock); -+ -+ return err; -+} -+ -+static int xen_wdt_open(struct inode *inode, struct file *file) -+{ -+ int err; -+ -+ /* /dev/watchdog can only be opened once */ -+ if (xchg(&is_active, true)) -+ return -EBUSY; -+ -+ err = xen_wdt_start(); -+ if (err == -EBUSY) -+ err = xen_wdt_kick(); -+ return err ?: nonseekable_open(inode, file); -+} -+ -+static int xen_wdt_release(struct inode *inode, struct file *file) -+{ -+ if (expect_release) -+ xen_wdt_stop(); -+ else { -+ printk(KERN_CRIT PFX -+ "unexpected close, not stopping watchdog!\n"); -+ xen_wdt_kick(); -+ } -+ is_active = false; -+ expect_release = false; -+ return 0; -+} -+ -+static ssize_t xen_wdt_write(struct file *file, const char __user *data, -+ size_t len, loff_t *ppos) -+{ -+ /* See if we got the magic character 'V' and reload the timer */ -+ if (len) { -+ if (!nowayout) { -+ size_t i; -+ -+ /* in case it was set long ago */ -+ expect_release = false; -+ -+ /* scan to see whether or not we got the magic -+ character */ -+ for (i = 0; i != len; i++) { -+ char c; -+ if (get_user(c, data + i)) -+ return -EFAULT; -+ if (c == 'V') -+ expect_release = true; -+ } -+ } -+ -+ /* someone wrote to us, we should reload the timer */ -+ xen_wdt_kick(); -+ } -+ return len; -+} -+ -+static long xen_wdt_ioctl(struct file *file, unsigned int cmd, -+ unsigned long arg) -+{ -+ int new_options, retval = -EINVAL; -+ int new_timeout; -+ int __user *argp = (void __user *)arg; -+ static const struct watchdog_info ident = { -+ .options = WDIOF_SETTIMEOUT | WDIOF_MAGICCLOSE, -+ .firmware_version = 0, -+ .identity = DRV_NAME, -+ }; -+ -+ switch (cmd) { -+ case WDIOC_GETSUPPORT: -+ return copy_to_user(argp, &ident, sizeof(ident)) ? -EFAULT : 0; -+ -+ case WDIOC_GETSTATUS: -+ case WDIOC_GETBOOTSTATUS: -+ return put_user(0, argp); -+ -+ case WDIOC_SETOPTIONS: -+ if (get_user(new_options, argp)) -+ return -EFAULT; -+ -+ if (new_options & WDIOS_DISABLECARD) -+ retval = xen_wdt_stop(); -+ if (new_options & WDIOS_ENABLECARD) { -+ retval = xen_wdt_start(); -+ if (retval == -EBUSY) -+ retval = xen_wdt_kick(); -+ } -+ return retval; -+ -+ case WDIOC_KEEPALIVE: -+ xen_wdt_kick(); -+ return 0; -+ -+ case WDIOC_SETTIMEOUT: -+ if (get_user(new_timeout, argp)) -+ return -EFAULT; -+ if (!new_timeout) -+ return -EINVAL; -+ timeout = new_timeout; -+ xen_wdt_kick(); -+ /* fall through */ -+ case WDIOC_GETTIMEOUT: -+ return put_user(timeout, argp); -+ -+ case WDIOC_GETTIMELEFT: -+ retval = wdt_expires - ktime_to_timespec(ktime_get()).tv_sec; -+ return put_user(retval, argp); -+ } -+ -+ return -ENOTTY; -+} -+ -+static const struct file_operations xen_wdt_fops = { -+ .owner = THIS_MODULE, -+ .llseek = no_llseek, -+ .write = xen_wdt_write, -+ .unlocked_ioctl = xen_wdt_ioctl, -+ .open = xen_wdt_open, -+ .release = xen_wdt_release, -+}; -+ -+static struct miscdevice xen_wdt_miscdev = { -+ .minor = WATCHDOG_MINOR, -+ .name = "watchdog", -+ .fops = &xen_wdt_fops, -+}; -+ -+static int __devinit xen_wdt_probe(struct platform_device *dev) -+{ -+ struct sched_watchdog wd = { .id = ~0 }; -+ int ret = HYPERVISOR_sched_op(SCHEDOP_watchdog, &wd); -+ -+ switch (ret) { -+ case -EINVAL: -+ if (!timeout) { -+ timeout = WATCHDOG_TIMEOUT; -+ printk(KERN_INFO PFX -+ "timeout value invalid, using %d\n", timeout); -+ } -+ -+ ret = misc_register(&xen_wdt_miscdev); -+ if (ret) { -+ printk(KERN_ERR PFX -+ "cannot register miscdev on minor=%d (%d)\n", -+ WATCHDOG_MINOR, ret); -+ break; -+ } -+ -+ printk(KERN_INFO PFX -+ "initialized (timeout=%ds, nowayout=%d)\n", -+ timeout, nowayout); -+ break; -+ -+ case -ENOSYS: -+ printk(KERN_INFO PFX "not supported\n"); -+ ret = -ENODEV; -+ break; -+ -+ default: -+ printk(KERN_INFO PFX "bogus return value %d\n", ret); -+ break; -+ } -+ -+ return ret; -+} -+ -+static int __devexit xen_wdt_remove(struct platform_device *dev) -+{ -+ /* Stop the timer before we leave */ -+ if (!nowayout) -+ xen_wdt_stop(); -+ -+ misc_deregister(&xen_wdt_miscdev); -+ -+ return 0; -+} -+ -+static void xen_wdt_shutdown(struct platform_device *dev) -+{ -+ xen_wdt_stop(); -+} -+ -+static int xen_wdt_suspend(struct platform_device *dev, pm_message_t state) -+{ -+ return xen_wdt_stop(); -+} -+ -+static int xen_wdt_resume(struct platform_device *dev) -+{ -+ return xen_wdt_start(); -+} -+ -+static struct platform_driver xen_wdt_driver = { -+ .probe = xen_wdt_probe, -+ .remove = __devexit_p(xen_wdt_remove), -+ .shutdown = xen_wdt_shutdown, -+ .suspend = xen_wdt_suspend, -+ .resume = xen_wdt_resume, -+ .driver = { -+ .owner = THIS_MODULE, -+ .name = DRV_NAME, -+ }, -+}; -+ -+static int __init xen_wdt_init_module(void) -+{ -+ int err; -+ -+ if (!xen_domain()) -+ return -ENODEV; -+ -+ printk(KERN_INFO PFX "Xen WatchDog Timer Driver v%s\n", DRV_VERSION); -+ -+ err = platform_driver_register(&xen_wdt_driver); -+ if (err) -+ return err; -+ -+ platform_device = platform_device_register_simple(DRV_NAME, -+ -1, NULL, 0); -+ if (IS_ERR(platform_device)) { -+ err = PTR_ERR(platform_device); -+ platform_driver_unregister(&xen_wdt_driver); -+ } -+ -+ return err; -+} -+ -+static void __exit xen_wdt_cleanup_module(void) -+{ -+ platform_device_unregister(platform_device); -+ platform_driver_unregister(&xen_wdt_driver); -+ printk(KERN_INFO PFX "module unloaded\n"); -+} -+ -+module_init(xen_wdt_init_module); -+module_exit(xen_wdt_cleanup_module); -+ -+MODULE_AUTHOR("Jen Beulich <jbeulich@novell.com>"); -+MODULE_DESCRIPTION("Xen WatchDog Timer Driver"); -+MODULE_VERSION(DRV_VERSION); -+MODULE_LICENSE("GPL"); -+MODULE_ALIAS_MISCDEV(WATCHDOG_MINOR); -diff --git a/drivers/xen/Kconfig b/drivers/xen/Kconfig -index cab100a..fa9982e 100644 ---- a/drivers/xen/Kconfig -+++ b/drivers/xen/Kconfig -@@ -28,6 +28,110 @@ config XEN_DEV_EVTCHN - firing. - If in doubt, say yes. - -+config XEN_BACKEND -+ bool "Backend driver support" -+ depends on XEN_DOM0 -+ default y -+ help -+ Support for backend device drivers that provide I/O services -+ to other virtual machines. -+ -+config XEN_NETDEV_BACKEND -+ tristate "Xen backend network device" -+ depends on XEN_BACKEND && NET -+ help -+ Implement the network backend driver, which passes packets -+ from the guest domain's frontend drivers to the network. -+ -+config XEN_BLKDEV_BACKEND -+ tristate "Block-device backend driver" -+ depends on XEN_BACKEND && BLOCK -+ help -+ The block-device backend driver allows the kernel to export its -+ block devices to other guests via a high-performance shared-memory -+ interface. -+ -+ -+config XEN_BLKDEV_TAP -+ tristate "Block-device tap backend driver" -+ depends on XEN_BACKEND && BLOCK -+ help -+ The block tap driver is an alternative to the block back driver -+ and allows VM block requests to be redirected to userspace through -+ a device interface. The tap allows user-space development of -+ high-performance block backends, where disk images may be implemented -+ as files, in memory, or on other hosts across the network. This -+ driver can safely coexist with the existing blockback driver. -+ -+config XEN_BLKBACK_PAGEMAP -+ tristate -+ depends on XEN_BLKDEV_BACKEND != n && XEN_BLKDEV_TAP != n -+ default XEN_BLKDEV_BACKEND || XEN_BLKDEV_TAP -+ -+config XEN_PCIDEV_BACKEND -+ tristate "PCI-device backend driver" -+ depends on PCI && XEN_BACKEND -+ default XEN_BACKEND -+ help -+ The PCI device backend driver allows the kernel to export arbitrary -+ PCI devices to other guests. If you select this to be a module, you -+ will need to make sure no other driver has bound to the device(s) -+ you want to make visible to other guests. -+ -+choice -+ prompt "PCI Backend Mode" -+ depends on XEN_PCIDEV_BACKEND -+ default XEN_PCIDEV_BACKEND_VPCI if !IA64 -+ default XEN_PCIDEV_BACKEND_CONTROLLER if IA64 -+ -+config XEN_PCIDEV_BACKEND_VPCI -+ bool "Virtual PCI" -+ ---help--- -+ This PCI Backend hides the true PCI topology and makes the frontend -+ think there is a single PCI bus with only the exported devices on it. -+ For example, a device at 03:05.0 will be re-assigned to 00:00.0. A -+ second device at 02:1a.1 will be re-assigned to 00:01.1. -+ -+config XEN_PCIDEV_BACKEND_PASS -+ bool "Passthrough" -+ ---help--- -+ This PCI Backend provides a real view of the PCI topology to the -+ frontend (for example, a device at 06:01.b will still appear at -+ 06:01.b to the frontend). This is similar to how Xen 2.0.x exposed -+ PCI devices to its driver domains. This may be required for drivers -+ which depend on finding their hardward in certain bus/slot -+ locations. -+ -+config XEN_PCIDEV_BACKEND_SLOT -+ bool "Slot" -+ ---help--- -+ This PCI Backend hides the true PCI topology and makes the frontend -+ think there is a single PCI bus with only the exported devices on it. -+ Contrary to the virtual PCI backend, a function becomes a new slot. -+ For example, a device at 03:05.2 will be re-assigned to 00:00.0. A -+ second device at 02:1a.1 will be re-assigned to 00:01.0. -+ -+config XEN_PCIDEV_BACKEND_CONTROLLER -+ bool "Controller" -+ depends on IA64 -+ ---help--- -+ This PCI backend virtualizes the PCI bus topology by providing a -+ virtual bus per PCI root device. Devices which are physically under -+ the same root bus will appear on the same virtual bus. For systems -+ with complex I/O addressing, this is the only backend which supports -+ extended I/O port spaces and MMIO translation offsets. This backend -+ also supports slot virtualization. For example, a device at -+ 0000:01:02.1 will be re-assigned to 0000:00:00.0. A second device -+ at 0000:02:05.0 (behind a P2P bridge on bus 0000:01) will be -+ re-assigned to 0000:00:01.0. A third device at 0000:16:05.0 (under -+ a different PCI root bus) will be re-assigned to 0000:01:00.0. -+ -+endchoice -+ -+config XEN_PCIDEV_BE_DEBUG -+ bool "PCI Backend Debugging" -+ depends on XEN_PCIDEV_BACKEND -+ - config XENFS - tristate "Xen filesystem" - depends on XEN -@@ -60,4 +164,37 @@ config XEN_SYS_HYPERVISOR - Create entries under /sys/hypervisor describing the Xen - hypervisor environment. When running native or in another - virtual environment, /sys/hypervisor will still be present, -- but will have no xen contents. -\ No newline at end of file -+ but will have no xen contents. -+ -+config XEN_MCE -+ def_bool y -+ depends on XEN_DOM0 && X86_64 && X86_MCE_INTEL -+ -+config XEN_XENBUS_FRONTEND -+ tristate -+ -+config XEN_GNTDEV -+ tristate "userspace grant access device driver" -+ depends on XEN -+ select MMU_NOTIFIER -+ help -+ Allows userspace processes use grants. -+ -+config XEN_S3 -+ def_bool y -+ depends on XEN_DOM0 && ACPI -+ -+config ACPI_PROCESSOR_XEN -+ tristate -+ depends on XEN_DOM0 && ACPI_PROCESSOR && CPU_FREQ -+ default y -+ -+config XEN_PLATFORM_PCI -+ tristate "xen platform pci device driver" -+ depends on XEN_PVHVM -+ default m -+ help -+ Driver for the Xen PCI Platform device: it is responsible for -+ initializing xenbus and grant_table when running in a Xen HVM -+ domain. As a consequence this driver is required to run any Xen PV -+ frontend on Xen HVM. -diff --git a/drivers/xen/Makefile b/drivers/xen/Makefile -index 7c28434..ef1ea63 100644 ---- a/drivers/xen/Makefile -+++ b/drivers/xen/Makefile -@@ -1,12 +1,27 @@ --obj-y += grant-table.o features.o events.o manage.o -+obj-y += grant-table.o features.o events.o manage.o biomerge.o pcpu.o - obj-y += xenbus/ - - nostackp := $(call cc-option, -fno-stack-protector) - CFLAGS_features.o := $(nostackp) - --obj-$(CONFIG_HOTPLUG_CPU) += cpu_hotplug.o --obj-$(CONFIG_XEN_XENCOMM) += xencomm.o --obj-$(CONFIG_XEN_BALLOON) += balloon.o --obj-$(CONFIG_XEN_DEV_EVTCHN) += evtchn.o --obj-$(CONFIG_XENFS) += xenfs/ --obj-$(CONFIG_XEN_SYS_HYPERVISOR) += sys-hypervisor.o -\ No newline at end of file -+obj-$(CONFIG_PCI) += pci.o -+obj-$(CONFIG_HOTPLUG_CPU) += cpu_hotplug.o -+obj-$(CONFIG_XEN_XENCOMM) += xencomm.o -+obj-$(CONFIG_XEN_BALLOON) += balloon.o -+obj-$(CONFIG_XEN_DEV_EVTCHN) += xen-evtchn.o -+obj-$(CONFIG_XEN_GNTDEV) += xen-gntdev.o -+obj-$(CONFIG_XEN_PCIDEV_BACKEND) += pciback/ -+obj-$(CONFIG_XEN_BLKDEV_BACKEND) += blkback/ -+obj-$(CONFIG_XEN_BLKDEV_TAP) += blktap/ -+obj-$(CONFIG_XEN_NETDEV_BACKEND) += netback/ -+obj-$(CONFIG_XENFS) += xenfs/ -+obj-$(CONFIG_XEN_SYS_HYPERVISOR) += sys-hypervisor.o -+obj-$(CONFIG_XEN_MCE) += mce.o -+ -+obj-$(CONFIG_XEN_S3) += acpi.o -+obj-$(CONFIG_ACPI_PROCESSOR_XEN) += acpi_processor.o -+obj-$(CONFIG_ACPI_HOTPLUG_MEMORY) += xen_acpi_memhotplug.o -+obj-$(CONFIG_XEN_PLATFORM_PCI) += platform-pci.o -+ -+xen-evtchn-y := evtchn.o -+xen-gntdev-y := gntdev.o -diff --git a/drivers/xen/acpi.c b/drivers/xen/acpi.c -new file mode 100644 -index 0000000..e6d3d0e ---- /dev/null -+++ b/drivers/xen/acpi.c -@@ -0,0 +1,23 @@ -+#include <xen/acpi.h> -+ -+#include <xen/interface/platform.h> -+#include <asm/xen/hypercall.h> -+#include <asm/xen/hypervisor.h> -+ -+int acpi_notify_hypervisor_state(u8 sleep_state, -+ u32 pm1a_cnt, u32 pm1b_cnt) -+{ -+ struct xen_platform_op op = { -+ .cmd = XENPF_enter_acpi_sleep, -+ .interface_version = XENPF_INTERFACE_VERSION, -+ .u = { -+ .enter_acpi_sleep = { -+ .pm1a_cnt_val = (u16)pm1a_cnt, -+ .pm1b_cnt_val = (u16)pm1b_cnt, -+ .sleep_state = sleep_state, -+ }, -+ }, -+ }; -+ -+ return HYPERVISOR_dom0_op(&op); -+} -diff --git a/drivers/xen/acpi_processor.c b/drivers/xen/acpi_processor.c -new file mode 100644 -index 0000000..e83b615 ---- /dev/null -+++ b/drivers/xen/acpi_processor.c -@@ -0,0 +1,417 @@ -+/* -+ * acpi_processor.c - interface to notify Xen on acpi processor object -+ * info parsing -+ * -+ * Copyright (C) 2008, Intel corporation -+ * -+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -+ * -+ * This program is free software; you can redistribute it and/or modify -+ * it under the terms of the GNU General Public License as published by -+ * the Free Software Foundation; either version 2 of the License, or (at -+ * your option) any later version. -+ * -+ * This program is distributed in the hope that it will be useful, but -+ * WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ * General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License along -+ * with this program; if not, write to the Free Software Foundation, Inc., -+ * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. -+ * -+ */ -+ -+#include <linux/kernel.h> -+#include <linux/init.h> -+#include <linux/types.h> -+#include <linux/acpi.h> -+#include <linux/pm.h> -+#include <linux/cpu.h> -+ -+#include <linux/cpufreq.h> -+#include <acpi/processor.h> -+#include <xen/acpi.h> -+#include <xen/pcpu.h> -+ -+#include <asm/xen/hypercall.h> -+#include <asm/xen/hypervisor.h> -+ -+static int xen_hotplug_notifier(struct acpi_processor *pr, int event); -+ -+static struct processor_cntl_xen_ops xen_ops = { -+ .hotplug = xen_hotplug_notifier, -+}; -+ -+static struct acpi_power_register *power_registers[XEN_MAX_ACPI_ID + 1]; -+ -+int processor_cntl_xen_power_cache(int cpu, int cx, -+ struct acpi_power_register *reg) -+{ -+ struct acpi_power_register *buf; -+ -+ if (cpu < 0 || cpu > XEN_MAX_ACPI_ID || -+ cx < 1 || cx > ACPI_PROCESSOR_MAX_POWER) { -+ return -EINVAL; -+ } -+ -+ if (power_registers[cpu] == NULL) { -+ buf = kzalloc(ACPI_PROCESSOR_MAX_POWER * -+ sizeof(struct xen_processor_cx), GFP_KERNEL); -+ if (buf == NULL) -+ return -ENOMEM; -+ -+ power_registers[cpu] = buf; -+ } -+ -+ memcpy(power_registers[cpu]+cx-1, reg, sizeof(*reg)); -+ -+ return 0; -+} -+EXPORT_SYMBOL(processor_cntl_xen_power_cache); -+ -+#ifdef CONFIG_ACPI_HOTPLUG_CPU -+static int xen_get_apic_id(acpi_handle handle) -+{ -+ struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL }; -+ union acpi_object *obj; -+ struct acpi_madt_local_apic *lapic; -+ u8 physid; -+ -+ if (ACPI_FAILURE(acpi_evaluate_object(handle, "_MAT", NULL, &buffer))) -+ return -EINVAL; -+ -+ if (!buffer.length || !buffer.pointer) -+ return -EINVAL; -+ -+ obj = buffer.pointer; -+ if (obj->type != ACPI_TYPE_BUFFER || -+ obj->buffer.length < sizeof(*lapic)) { -+ kfree(buffer.pointer); -+ return -EINVAL; -+ } -+ -+ lapic = (struct acpi_madt_local_apic *)obj->buffer.pointer; -+ -+ if (lapic->header.type != ACPI_MADT_TYPE_LOCAL_APIC || -+ !(lapic->lapic_flags & ACPI_MADT_ENABLED)) { -+ kfree(buffer.pointer); -+ return -EINVAL; -+ } -+ -+ physid = lapic->id; -+ kfree(buffer.pointer); -+ buffer.length = ACPI_ALLOCATE_BUFFER; -+ buffer.pointer = NULL; -+ -+ return physid; -+} -+#else -+static int xen_get_apic_id(acpi_handle handle) -+{ -+ return -1; -+} -+#endif -+ -+int processor_cntl_xen_notify(struct acpi_processor *pr, int event, int type) -+{ -+ int ret = -EINVAL; -+ -+ switch (event) { -+ case PROCESSOR_PM_INIT: -+ case PROCESSOR_PM_CHANGE: -+ if ((type >= PM_TYPE_MAX) || -+ !xen_ops.pm_ops[type]) -+ break; -+ -+ ret = xen_ops.pm_ops[type](pr, event); -+ break; -+ case PROCESSOR_HOTPLUG: -+ { -+ int apic_id; -+ -+ apic_id = xen_get_apic_id(pr->handle); -+ if (apic_id < 0) -+ break; -+ if (xen_ops.hotplug) -+ ret = xen_ops.hotplug(pr, type); -+ xen_pcpu_hotplug(type, apic_id); -+ break; -+ } -+ default: -+ printk(KERN_ERR "Unsupport processor events %d.\n", event); -+ break; -+ } -+ -+ return ret; -+} -+EXPORT_SYMBOL(processor_cntl_xen_notify); -+ -+static inline void xen_convert_pct_reg(struct xen_pct_register *xpct, -+ struct acpi_pct_register *apct) -+{ -+ xpct->descriptor = apct->descriptor; -+ xpct->length = apct->length; -+ xpct->space_id = apct->space_id; -+ xpct->bit_width = apct->bit_width; -+ xpct->bit_offset = apct->bit_offset; -+ xpct->reserved = apct->reserved; -+ xpct->address = apct->address; -+} -+ -+static inline void xen_convert_pss_states(struct xen_processor_px *xpss, -+ struct acpi_processor_px *apss, int state_count) -+{ -+ int i; -+ for (i = 0; i < state_count; i++) { -+ xpss->core_frequency = apss->core_frequency; -+ xpss->power = apss->power; -+ xpss->transition_latency = apss->transition_latency; -+ xpss->bus_master_latency = apss->bus_master_latency; -+ xpss->control = apss->control; -+ xpss->status = apss->status; -+ xpss++; -+ apss++; -+ } -+} -+ -+static inline void xen_convert_psd_pack(struct xen_psd_package *xpsd, -+ struct acpi_psd_package *apsd) -+{ -+ xpsd->num_entries = apsd->num_entries; -+ xpsd->revision = apsd->revision; -+ xpsd->domain = apsd->domain; -+ xpsd->coord_type = apsd->coord_type; -+ xpsd->num_processors = apsd->num_processors; -+} -+ -+static int xen_cx_notifier(struct acpi_processor *pr, int action) -+{ -+ int ret, count = 0, i; -+ xen_platform_op_t op = { -+ .cmd = XENPF_set_processor_pminfo, -+ .interface_version = XENPF_INTERFACE_VERSION, -+ .u.set_pminfo.id = pr->acpi_id, -+ .u.set_pminfo.type = XEN_PM_CX, -+ }; -+ struct xen_processor_cx *data, *buf; -+ struct acpi_processor_cx *cx; -+ struct acpi_power_register *reg; -+ -+ if (action == PROCESSOR_PM_CHANGE) -+ return -EINVAL; -+ -+ if (power_registers[pr->acpi_id] == NULL) { -+ printk(KERN_WARNING "No C state info for acpi processor %d\n", -+ pr->acpi_id); -+ return -EINVAL; -+ } -+ -+ /* Convert to Xen defined structure and hypercall */ -+ buf = kzalloc(pr->power.count * sizeof(struct xen_processor_cx), -+ GFP_KERNEL); -+ if (!buf) -+ return -ENOMEM; -+ -+ data = buf; -+ for (i = 1; i <= pr->power.count; i++) { -+ cx = &pr->power.states[i]; -+ reg = power_registers[pr->acpi_id]+i-1; -+ /* Skip invalid cstate entry */ -+ if (!cx->valid) -+ continue; -+ -+ data->type = cx->type; -+ data->latency = cx->latency; -+ data->power = cx->power; -+ data->reg.space_id = reg->space_id; -+ data->reg.bit_width = reg->bit_width; -+ data->reg.bit_offset = reg->bit_offset; -+ data->reg.access_size = reg->access_size; -+ data->reg.address = reg->address; -+ -+ /* Get dependency relationships, _CSD is not supported yet */ -+ data->dpcnt = 0; -+ set_xen_guest_handle(data->dp, NULL); -+ -+ data++; -+ count++; -+ } -+ -+ if (!count) { -+ printk(KERN_ERR "No available Cx info for cpu %d\n", -+ pr->acpi_id); -+ kfree(buf); -+ return -EINVAL; -+ } -+ -+ op.u.set_pminfo.power.count = count; -+ op.u.set_pminfo.power.flags.bm_control = pr->flags.bm_control; -+ op.u.set_pminfo.power.flags.bm_check = pr->flags.bm_check; -+ op.u.set_pminfo.power.flags.has_cst = pr->flags.has_cst; -+ op.u.set_pminfo.power.flags.power_setup_done = -+ pr->flags.power_setup_done; -+ -+ set_xen_guest_handle(op.u.set_pminfo.power.states, buf); -+ ret = HYPERVISOR_dom0_op(&op); -+ kfree(buf); -+ return ret; -+} -+ -+static int xen_px_notifier(struct acpi_processor *pr, int action) -+{ -+ int ret = -EINVAL; -+ xen_platform_op_t op = { -+ .cmd = XENPF_set_processor_pminfo, -+ .interface_version = XENPF_INTERFACE_VERSION, -+ .u.set_pminfo.id = pr->acpi_id, -+ .u.set_pminfo.type = XEN_PM_PX, -+ }; -+ struct xen_processor_performance *perf; -+ struct xen_processor_px *states = NULL; -+ struct acpi_processor_performance *px; -+ struct acpi_psd_package *pdomain; -+ -+ if (!pr) -+ return -EINVAL; -+ -+ perf = &op.u.set_pminfo.perf; -+ px = pr->performance; -+ -+ switch (action) { -+ case PROCESSOR_PM_CHANGE: -+ /* ppc dynamic handle */ -+ perf->flags = XEN_PX_PPC; -+ perf->platform_limit = pr->performance_platform_limit; -+ -+ ret = HYPERVISOR_dom0_op(&op); -+ break; -+ -+ case PROCESSOR_PM_INIT: -+ /* px normal init */ -+ perf->flags = XEN_PX_PPC | -+ XEN_PX_PCT | -+ XEN_PX_PSS | -+ XEN_PX_PSD; -+ -+ /* ppc */ -+ perf->platform_limit = pr->performance_platform_limit; -+ -+ /* pct */ -+ xen_convert_pct_reg(&perf->control_register, -+ &px->control_register); -+ xen_convert_pct_reg(&perf->status_register, -+ &px->status_register); -+ -+ /* pss */ -+ perf->state_count = px->state_count; -+ states = kzalloc(px->state_count*sizeof(xen_processor_px_t), -+ GFP_KERNEL); -+ if (!states) -+ return -ENOMEM; -+ xen_convert_pss_states(states, px->states, px->state_count); -+ set_xen_guest_handle(perf->states, states); -+ -+ /* psd */ -+ pdomain = &px->domain_info; -+ xen_convert_psd_pack(&perf->domain_info, pdomain); -+ if (pdomain->coord_type == DOMAIN_COORD_TYPE_SW_ALL) -+ perf->shared_type = CPUFREQ_SHARED_TYPE_ALL; -+ else if (pdomain->coord_type == DOMAIN_COORD_TYPE_SW_ANY) -+ perf->shared_type = CPUFREQ_SHARED_TYPE_ANY; -+ else if (pdomain->coord_type == DOMAIN_COORD_TYPE_HW_ALL) -+ perf->shared_type = CPUFREQ_SHARED_TYPE_HW; -+ else { -+ ret = -ENODEV; -+ kfree(states); -+ break; -+ } -+ -+ ret = HYPERVISOR_dom0_op(&op); -+ kfree(states); -+ break; -+ -+ default: -+ break; -+ } -+ -+ return ret; -+} -+ -+static int xen_tx_notifier(struct acpi_processor *pr, int action) -+{ -+ return -EINVAL; -+} -+ -+#ifdef CONFIG_ACPI_HOTPLUG_CPU -+static int xen_hotplug_notifier(struct acpi_processor *pr, int event) -+{ -+ int ret = -EINVAL; -+ uint32_t apic_id; -+ unsigned long long pxm; -+ acpi_status status = 0; -+ -+ xen_platform_op_t op = { -+ .interface_version = XENPF_INTERFACE_VERSION, -+ }; -+ -+ apic_id = xen_get_apic_id(pr->handle); -+ if (apic_id < 0) { -+ printk(KERN_WARNING "Can't get apic_id for acpi_id %x\n", -+ pr->acpi_id); -+ return -1; -+ } -+ -+ status = acpi_evaluate_integer(pr->handle, "_PXM", -+ NULL, &pxm); -+ if (ACPI_FAILURE(status)) { -+ printk(KERN_WARNING "can't get pxm for acpi_id %x\n", -+ pr->acpi_id); -+ return -1; -+ } -+ -+ switch (event) { -+ case HOTPLUG_TYPE_ADD: -+ op.cmd = XENPF_cpu_hotadd; -+ op.u.cpu_add.apic_id = apic_id; -+ op.u.cpu_add.acpi_id = pr->acpi_id; -+ op.u.cpu_add.pxm = pxm; -+ ret = HYPERVISOR_dom0_op(&op); -+ break; -+ case HOTPLUG_TYPE_REMOVE: -+ printk(KERN_WARNING "Xen not support CPU hotremove\n"); -+ ret = -ENOSYS; -+ break; -+ } -+ -+ return ret; -+} -+#else -+static int xen_hotplug_notifier(struct acpi_processor *pr, int event) -+{ -+ return -ENOSYS; -+} -+#endif -+ -+static int __init xen_acpi_processor_extcntl_init(void) -+{ -+ unsigned int pmbits; -+ -+ /* Only xen dom0 is allowed to handle ACPI processor info */ -+ if (!xen_initial_domain()) -+ return 0; -+ -+ pmbits = (xen_start_info->flags & SIF_PM_MASK) >> 8; -+ -+ if (pmbits & XEN_PROCESSOR_PM_CX) -+ xen_ops.pm_ops[PM_TYPE_IDLE] = xen_cx_notifier; -+ if (pmbits & XEN_PROCESSOR_PM_PX) -+ xen_ops.pm_ops[PM_TYPE_PERF] = xen_px_notifier; -+ if (pmbits & XEN_PROCESSOR_PM_TX) -+ xen_ops.pm_ops[PM_TYPE_THR] = xen_tx_notifier; -+ -+ return 0; -+} -+ -+subsys_initcall(xen_acpi_processor_extcntl_init); -+MODULE_LICENSE("GPL"); -diff --git a/drivers/xen/balloon.c b/drivers/xen/balloon.c -index 4204336..158cdd1 100644 ---- a/drivers/xen/balloon.c -+++ b/drivers/xen/balloon.c -@@ -43,22 +43,26 @@ - #include <linux/mutex.h> - #include <linux/list.h> - #include <linux/sysdev.h> -+#include <linux/swap.h> - - #include <asm/page.h> - #include <asm/pgalloc.h> - #include <asm/pgtable.h> - #include <asm/uaccess.h> - #include <asm/tlb.h> -+#include <asm/e820.h> - - #include <asm/xen/hypervisor.h> - #include <asm/xen/hypercall.h> -+ -+#include <xen/xen.h> - #include <xen/interface/xen.h> - #include <xen/interface/memory.h> - #include <xen/xenbus.h> - #include <xen/features.h> - #include <xen/page.h> - --#define PAGES2KB(_p) ((_p)<<(PAGE_SHIFT-10)) -+#define PAGES2KB(_p) ((_p)<<(PAGE_SHIFT+balloon_order-10)) - - #define BALLOON_CLASS_NAME "xen_memory" - -@@ -82,14 +86,15 @@ static struct sys_device balloon_sysdev; - - static int register_balloon(struct sys_device *sysdev); - -+static struct balloon_stats balloon_stats; -+ - /* -- * Protects atomic reservation decrease/increase against concurrent increases. -- * Also protects non-atomic updates of current_pages and driver_pages, and -- * balloon lists. -+ * Work in pages of this order. Can be either 0 for normal pages -+ * or 9 for hugepages. - */ --static DEFINE_SPINLOCK(balloon_lock); -- --static struct balloon_stats balloon_stats; -+static int balloon_order; -+static unsigned long balloon_npages; -+static unsigned long discontig_frame_list[PAGE_SIZE / sizeof(unsigned long)]; - - /* We increase/decrease in batches which fit in a page */ - static unsigned long frame_list[PAGE_SIZE / sizeof(unsigned long)]; -@@ -118,12 +123,43 @@ static struct timer_list balloon_timer; - static void scrub_page(struct page *page) - { - #ifdef CONFIG_XEN_SCRUB_PAGES -- clear_highpage(page); -+ int i; -+ -+ for (i = 0; i < balloon_npages; i++) -+ clear_highpage(page++); - #endif - } - -+static void free_discontig_frame(void) -+{ -+ int rc; -+ struct xen_memory_reservation reservation = { -+ .address_bits = 0, -+ .domid = DOMID_SELF, -+ .nr_extents = balloon_npages, -+ .extent_order = 0 -+ }; -+ -+ set_xen_guest_handle(reservation.extent_start, discontig_frame_list); -+ rc = HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation); -+ BUG_ON(rc != balloon_npages); -+} -+ -+static unsigned long shrink_frame(unsigned long nr_pages) -+{ -+ unsigned long i, j; -+ -+ for (i = 0, j = 0; i < nr_pages; i++, j++) { -+ if (frame_list[i] == 0) -+ j++; -+ if (i != j) -+ frame_list[i] = frame_list[j]; -+ } -+ return i; -+} -+ - /* balloon_append: add the given page to the balloon. */ --static void balloon_append(struct page *page) -+static void __balloon_append(struct page *page) - { - /* Lowmem is re-populated first, so highmem pages go at list tail. */ - if (PageHighMem(page)) { -@@ -134,7 +170,11 @@ static void balloon_append(struct page *page) - list_add(&page->lru, &ballooned_pages); - balloon_stats.balloon_low++; - } -+} - -+static void balloon_append(struct page *page) -+{ -+ __balloon_append(page); - totalram_pages--; - } - -@@ -195,20 +235,17 @@ static unsigned long current_target(void) - - static int increase_reservation(unsigned long nr_pages) - { -- unsigned long pfn, i, flags; -+ unsigned long pfn, mfn, i, j; - struct page *page; - long rc; - struct xen_memory_reservation reservation = { - .address_bits = 0, -- .extent_order = 0, - .domid = DOMID_SELF - }; - - if (nr_pages > ARRAY_SIZE(frame_list)) - nr_pages = ARRAY_SIZE(frame_list); - -- spin_lock_irqsave(&balloon_lock, flags); -- - page = balloon_first_page(); - for (i = 0; i < nr_pages; i++) { - BUG_ON(page == NULL); -@@ -218,6 +255,8 @@ static int increase_reservation(unsigned long nr_pages) - - set_xen_guest_handle(reservation.extent_start, frame_list); - reservation.nr_extents = nr_pages; -+ reservation.extent_order = balloon_order; -+ - rc = HYPERVISOR_memory_op(XENMEM_populate_physmap, &reservation); - if (rc < 0) - goto out; -@@ -227,19 +266,22 @@ static int increase_reservation(unsigned long nr_pages) - BUG_ON(page == NULL); - - pfn = page_to_pfn(page); -+ mfn = frame_list[i]; - BUG_ON(!xen_feature(XENFEAT_auto_translated_physmap) && - phys_to_machine_mapping_valid(pfn)); - -- set_phys_to_machine(pfn, frame_list[i]); -- -- /* Link back into the page tables if not highmem. */ -- if (pfn < max_low_pfn) { -- int ret; -- ret = HYPERVISOR_update_va_mapping( -- (unsigned long)__va(pfn << PAGE_SHIFT), -- mfn_pte(frame_list[i], PAGE_KERNEL), -- 0); -- BUG_ON(ret); -+ for (j = 0; j < balloon_npages; j++, pfn++, mfn++) { -+ set_phys_to_machine(pfn, mfn); -+ -+ /* Link back into the page tables if not highmem. */ -+ if (pfn < max_low_pfn) { -+ int ret; -+ ret = HYPERVISOR_update_va_mapping( -+ (unsigned long)__va(pfn << PAGE_SHIFT), -+ mfn_pte(mfn, PAGE_KERNEL), -+ 0); -+ BUG_ON(ret); -+ } - } - - /* Relinquish the page back to the allocator. */ -@@ -251,20 +293,18 @@ static int increase_reservation(unsigned long nr_pages) - balloon_stats.current_pages += rc; - - out: -- spin_unlock_irqrestore(&balloon_lock, flags); -- - return rc < 0 ? rc : rc != nr_pages; - } - - static int decrease_reservation(unsigned long nr_pages) - { -- unsigned long pfn, i, flags; -- struct page *page; -+ unsigned long pfn, lpfn, mfn, i, j; -+ struct page *page = NULL; - int need_sleep = 0; -- int ret; -+ int discontig, discontig_free; -+ int ret; - struct xen_memory_reservation reservation = { - .address_bits = 0, -- .extent_order = 0, - .domid = DOMID_SELF - }; - -@@ -272,7 +312,7 @@ static int decrease_reservation(unsigned long nr_pages) - nr_pages = ARRAY_SIZE(frame_list); - - for (i = 0; i < nr_pages; i++) { -- if ((page = alloc_page(GFP_BALLOON)) == NULL) { -+ if ((page = alloc_pages(GFP_BALLOON, balloon_order)) == NULL) { - nr_pages = i; - need_sleep = 1; - break; -@@ -282,38 +322,49 @@ static int decrease_reservation(unsigned long nr_pages) - frame_list[i] = pfn_to_mfn(pfn); - - scrub_page(page); -- -- if (!PageHighMem(page)) { -- ret = HYPERVISOR_update_va_mapping( -- (unsigned long)__va(pfn << PAGE_SHIFT), -- __pte_ma(0), 0); -- BUG_ON(ret); -- } -- - } - - /* Ensure that ballooned highmem pages don't have kmaps. */ - kmap_flush_unused(); - flush_tlb_all(); - -- spin_lock_irqsave(&balloon_lock, flags); -- - /* No more mappings: invalidate P2M and add to balloon. */ - for (i = 0; i < nr_pages; i++) { -- pfn = mfn_to_pfn(frame_list[i]); -- set_phys_to_machine(pfn, INVALID_P2M_ENTRY); -+ mfn = frame_list[i]; -+ lpfn = pfn = mfn_to_pfn(mfn); - balloon_append(pfn_to_page(pfn)); -+ discontig_free = 0; -+ for (j = 0; j < balloon_npages; j++, lpfn++, mfn++) { -+ if ((discontig_frame_list[j] = pfn_to_mfn(lpfn)) != mfn) -+ discontig_free = 1; -+ -+ set_phys_to_machine(lpfn, INVALID_P2M_ENTRY); -+ page = pfn_to_page(lpfn); -+ -+ if (!PageHighMem(page)) { -+ ret = HYPERVISOR_update_va_mapping( -+ (unsigned long)__va(lpfn << PAGE_SHIFT), -+ __pte_ma(0), 0); -+ BUG_ON(ret); -+ } -+ } -+ if (discontig_free) { -+ free_discontig_frame(); -+ frame_list[i] = 0; -+ discontig = 1; -+ } - } -+ balloon_stats.current_pages -= nr_pages; -+ -+ if (discontig) -+ nr_pages = shrink_frame(nr_pages); - - set_xen_guest_handle(reservation.extent_start, frame_list); - reservation.nr_extents = nr_pages; -+ reservation.extent_order = balloon_order; - ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation); - BUG_ON(ret != nr_pages); - -- balloon_stats.current_pages -= nr_pages; -- -- spin_unlock_irqrestore(&balloon_lock, flags); -- - return need_sleep; - } - -@@ -379,7 +430,7 @@ static void watch_target(struct xenbus_watch *watch, - /* The given memory/target value is in KiB, so it needs converting to - * pages. PAGE_SHIFT converts bytes to pages, hence PAGE_SHIFT - 10. - */ -- balloon_set_new_target(new_target >> (PAGE_SHIFT - 10)); -+ balloon_set_new_target(new_target >> ((PAGE_SHIFT - 10) + balloon_order)); - } - - static int balloon_init_watcher(struct notifier_block *notifier, -@@ -399,15 +450,18 @@ static struct notifier_block xenstore_notifier; - - static int __init balloon_init(void) - { -- unsigned long pfn; -+ unsigned long pfn, extra_pfn_end; - struct page *page; - - if (!xen_pv_domain()) - return -ENODEV; - -- pr_info("xen_balloon: Initialising balloon driver.\n"); -+ pr_info("xen_balloon: Initialising balloon driver with page order %d.\n", -+ balloon_order); -+ -+ balloon_npages = 1 << balloon_order; - -- balloon_stats.current_pages = min(xen_start_info->nr_pages, max_pfn); -+ balloon_stats.current_pages = (min(xen_start_info->nr_pages, max_pfn)) >> balloon_order; - balloon_stats.target_pages = balloon_stats.current_pages; - balloon_stats.balloon_low = 0; - balloon_stats.balloon_high = 0; -@@ -419,11 +473,24 @@ static int __init balloon_init(void) - - register_balloon(&balloon_sysdev); - -- /* Initialise the balloon with excess memory space. */ -- for (pfn = xen_start_info->nr_pages; pfn < max_pfn; pfn++) { -+ /* -+ * Initialise the balloon with excess memory space. We need -+ * to make sure we don't add memory which doesn't exist or -+ * logically exist. The E820 map can be trimmed to be smaller -+ * than the amount of physical memory due to the mem= command -+ * line parameter. And if this is a 32-bit non-HIGHMEM kernel -+ * on a system with memory which requires highmem to access, -+ * don't try to use it. -+ */ -+ extra_pfn_end = min(min(max_pfn, e820_end_of_ram_pfn()), -+ (unsigned long)PFN_DOWN(xen_extra_mem_start + xen_extra_mem_size)); -+ for (pfn = PFN_UP(xen_extra_mem_start); -+ pfn < extra_pfn_end; -+ pfn += balloon_npages) { - page = pfn_to_page(pfn); -- if (!PageReserved(page)) -- balloon_append(page); -+ /* totalram_pages doesn't include the boot-time -+ balloon extension, so don't subtract from it. */ -+ __balloon_append(page); - } - - target_watch.callback = watch_target; -@@ -444,6 +511,121 @@ static void balloon_exit(void) - - module_exit(balloon_exit); - -+static int __init balloon_parse_huge(char *s) -+{ -+ balloon_order = 9; -+ return 1; -+} -+ -+__setup("balloon_hugepages", balloon_parse_huge); -+ -+static int dealloc_pte_fn(pte_t *pte, struct page *pmd_page, -+ unsigned long addr, void *data) -+{ -+ unsigned long mfn = pte_mfn(*pte); -+ int ret; -+ struct xen_memory_reservation reservation = { -+ .nr_extents = 1, -+ .extent_order = 0, -+ .domid = DOMID_SELF -+ }; -+ -+ set_xen_guest_handle(reservation.extent_start, &mfn); -+ set_pte_at(&init_mm, addr, pte, __pte_ma(0)); -+ set_phys_to_machine(__pa(addr) >> PAGE_SHIFT, INVALID_P2M_ENTRY); -+ -+ ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation); -+ BUG_ON(ret != 1); -+ -+ return 0; -+} -+ -+struct page **alloc_empty_pages_and_pagevec(int nr_pages) -+{ -+ struct page *page, **pagevec; -+ int npages; -+ int i, j, ret; -+ -+ /* Round up to next number of balloon_order pages */ -+ npages = (nr_pages + (balloon_npages-1)) >> balloon_order; -+ -+ pagevec = kmalloc(sizeof(page) * nr_pages << balloon_order, GFP_KERNEL); -+ if (pagevec == NULL) -+ return NULL; -+ -+ for (i = 0; i < nr_pages; i++) { -+ void *v; -+ -+ page = alloc_pages(GFP_KERNEL|__GFP_COLD, balloon_order); -+ if (page == NULL) -+ goto err; -+ -+ scrub_page(page); -+ -+ mutex_lock(&balloon_mutex); -+ -+ v = page_address(page); -+ -+ ret = apply_to_page_range(&init_mm, (unsigned long)v, -+ PAGE_SIZE << balloon_order, -+ dealloc_pte_fn, NULL); -+ -+ if (ret != 0) { -+ mutex_unlock(&balloon_mutex); -+ //balloon_free_page(page); /* tries to use free_cold_page */ -+ __free_page(page); -+ goto err; -+ } -+ for (j = 0; j < balloon_npages; j++) -+ pagevec[(i<<balloon_order)+j] = page++; -+ -+ totalram_pages = balloon_stats.current_pages -= balloon_npages; -+ -+ mutex_unlock(&balloon_mutex); -+ } -+ -+ out: -+ schedule_work(&balloon_worker); -+ flush_tlb_all(); -+ return pagevec; -+ -+ err: -+ mutex_lock(&balloon_mutex); -+ while (--i >= 0) -+ balloon_append(pagevec[i << balloon_order]); -+ mutex_unlock(&balloon_mutex); -+ kfree(pagevec); -+ pagevec = NULL; -+ goto out; -+} -+EXPORT_SYMBOL_GPL(alloc_empty_pages_and_pagevec); -+ -+void free_empty_pages_and_pagevec(struct page **pagevec, int nr_pages) -+{ -+ struct page *page; -+ int i; -+ int npages; -+ -+ if (pagevec == NULL) -+ return; -+ -+ /* Round up to next number of balloon_order pages */ -+ npages = (nr_pages + (balloon_npages-1)) >> balloon_order; -+ -+ mutex_lock(&balloon_mutex); -+ for (i = 0; i < nr_pages; i++) { -+ page = pagevec[i << balloon_order]; -+ BUG_ON(page_count(page) != 1); -+ balloon_append(page); -+ } -+ mutex_unlock(&balloon_mutex); -+ -+ kfree(pagevec); -+ -+ schedule_work(&balloon_worker); -+} -+EXPORT_SYMBOL_GPL(free_empty_pages_and_pagevec); -+ - #define BALLOON_SHOW(name, format, args...) \ - static ssize_t show_##name(struct sys_device *dev, \ - struct sysdev_attribute *attr, \ -@@ -477,7 +659,7 @@ static ssize_t store_target_kb(struct sys_device *dev, - - target_bytes = simple_strtoull(buf, &endchar, 0) * 1024; - -- balloon_set_new_target(target_bytes >> PAGE_SHIFT); -+ balloon_set_new_target(target_bytes >> (PAGE_SHIFT + balloon_order)); - - return count; - } -@@ -491,7 +673,7 @@ static ssize_t show_target(struct sys_device *dev, struct sysdev_attribute *attr - { - return sprintf(buf, "%llu\n", - (unsigned long long)balloon_stats.target_pages -- << PAGE_SHIFT); -+ << (PAGE_SHIFT + balloon_order)); - } - - static ssize_t store_target(struct sys_device *dev, -@@ -507,7 +689,7 @@ static ssize_t store_target(struct sys_device *dev, - - target_bytes = memparse(buf, &endchar); - -- balloon_set_new_target(target_bytes >> PAGE_SHIFT); -+ balloon_set_new_target(target_bytes >> (PAGE_SHIFT + balloon_order)); - - return count; - } -diff --git a/drivers/xen/biomerge.c b/drivers/xen/biomerge.c -new file mode 100644 -index 0000000..d40f534 ---- /dev/null -+++ b/drivers/xen/biomerge.c -@@ -0,0 +1,14 @@ -+#include <linux/bio.h> -+#include <asm/io.h> -+#include <xen/page.h> -+ -+bool xen_biovec_phys_mergeable(const struct bio_vec *vec1, -+ const struct bio_vec *vec2) -+{ -+ unsigned long mfn1 = pfn_to_mfn(page_to_pfn(vec1->bv_page)); -+ unsigned long mfn2 = pfn_to_mfn(page_to_pfn(vec2->bv_page)); -+ -+ return __BIOVEC_PHYS_MERGEABLE(vec1, vec2) && -+ ((mfn1 == mfn2) || ((mfn1+1) == mfn2)); -+} -+ -diff --git a/drivers/xen/blkback/Makefile b/drivers/xen/blkback/Makefile -new file mode 100644 -index 0000000..dee55ba ---- /dev/null -+++ b/drivers/xen/blkback/Makefile -@@ -0,0 +1,4 @@ -+obj-$(CONFIG_XEN_BLKDEV_BACKEND) := xen-blkback.o -+obj-$(CONFIG_XEN_BLKBACK_PAGEMAP) += blkback-pagemap.o -+ -+xen-blkback-y := blkback.o xenbus.o interface.o vbd.o -diff --git a/drivers/xen/blkback/blkback-pagemap.c b/drivers/xen/blkback/blkback-pagemap.c -new file mode 100644 -index 0000000..45f6eb2 ---- /dev/null -+++ b/drivers/xen/blkback/blkback-pagemap.c -@@ -0,0 +1,109 @@ -+#include <linux/module.h> -+#include "blkback-pagemap.h" -+ -+static int blkback_pagemap_size; -+static struct blkback_pagemap *blkback_pagemap; -+ -+static inline int -+blkback_pagemap_entry_clear(struct blkback_pagemap *map) -+{ -+ static struct blkback_pagemap zero; -+ return !memcmp(map, &zero, sizeof(zero)); -+} -+ -+int -+blkback_pagemap_init(int pages) -+{ -+ blkback_pagemap = kzalloc(pages * sizeof(struct blkback_pagemap), -+ GFP_KERNEL); -+ if (!blkback_pagemap) -+ return -ENOMEM; -+ -+ blkback_pagemap_size = pages; -+ return 0; -+} -+EXPORT_SYMBOL_GPL(blkback_pagemap_init); -+ -+void -+blkback_pagemap_set(int idx, struct page *page, -+ domid_t domid, busid_t busid, grant_ref_t gref) -+{ -+ struct blkback_pagemap *entry; -+ -+ BUG_ON(!blkback_pagemap); -+ BUG_ON(idx >= blkback_pagemap_size); -+ -+ set_page_private(page, idx); -+ -+ entry = blkback_pagemap + idx; -+ if (!blkback_pagemap_entry_clear(entry)) { -+ printk("overwriting pagemap %d: d %u b %u g %u\n", -+ idx, entry->domid, entry->busid, entry->gref); -+ BUG(); -+ } -+ -+ entry->page = page; -+ entry->domid = domid; -+ entry->busid = busid; -+ entry->gref = gref; -+} -+EXPORT_SYMBOL_GPL(blkback_pagemap_set); -+ -+void -+blkback_pagemap_clear(struct page *page) -+{ -+ int idx; -+ struct blkback_pagemap *entry; -+ -+ idx = (int)page_private(page); -+ -+ BUG_ON(!blkback_pagemap); -+ BUG_ON(idx >= blkback_pagemap_size); -+ -+ entry = blkback_pagemap + idx; -+ if (blkback_pagemap_entry_clear(entry)) { -+ printk("clearing empty pagemap %d\n", idx); -+ BUG(); -+ } -+ -+ memset(entry, 0, sizeof(*entry)); -+} -+EXPORT_SYMBOL_GPL(blkback_pagemap_clear); -+ -+struct blkback_pagemap -+blkback_pagemap_read(struct page *page) -+{ -+ int idx; -+ struct blkback_pagemap *entry; -+ -+ idx = (int)page_private(page); -+ -+ BUG_ON(!blkback_pagemap); -+ BUG_ON(idx >= blkback_pagemap_size); -+ -+ entry = blkback_pagemap + idx; -+ if (blkback_pagemap_entry_clear(entry)) { -+ printk("reading empty pagemap %d\n", idx); -+ BUG(); -+ } -+ -+ return *entry; -+} -+EXPORT_SYMBOL(blkback_pagemap_read); -+ -+MODULE_LICENSE("Dual BSD/GPL"); -+ -+int -+blkback_pagemap_contains_page(struct page *page) -+{ -+ struct blkback_pagemap *entry; -+ int idx = (int)page_private(page); -+ -+ if (idx < 0 || idx >= blkback_pagemap_size) -+ return 0; -+ -+ entry = blkback_pagemap + idx; -+ -+ return (entry->page == page); -+} -+EXPORT_SYMBOL(blkback_pagemap_contains_page); -diff --git a/drivers/xen/blkback/blkback-pagemap.h b/drivers/xen/blkback/blkback-pagemap.h -new file mode 100644 -index 0000000..7f97d15 ---- /dev/null -+++ b/drivers/xen/blkback/blkback-pagemap.h -@@ -0,0 +1,36 @@ -+#ifndef _BLKBACK_PAGEMAP_H_ -+#define _BLKBACK_PAGEMAP_H_ -+ -+#include <linux/mm.h> -+#include <xen/interface/xen.h> -+#include <xen/interface/grant_table.h> -+ -+typedef unsigned int busid_t; -+ -+struct blkback_pagemap { -+ struct page *page; -+ domid_t domid; -+ busid_t busid; -+ grant_ref_t gref; -+}; -+ -+#if defined(CONFIG_XEN_BLKBACK_PAGEMAP) || defined(CONFIG_XEN_BLKBACK_PAGEMAP_MODULE) -+ -+int blkback_pagemap_init(int); -+void blkback_pagemap_set(int, struct page *, domid_t, busid_t, grant_ref_t); -+void blkback_pagemap_clear(struct page *); -+struct blkback_pagemap blkback_pagemap_read(struct page *); -+int blkback_pagemap_contains_page(struct page *page); -+ -+#else /* CONFIG_XEN_BLKBACK_PAGEMAP */ -+ -+static inline int blkback_pagemap_init(int pages) { return 0; } -+static inline void blkback_pagemap_set(int idx, struct page *page, domid_t dom, -+ busid_t bus, grant_ref_t gnt) {} -+static inline void blkback_pagemap_clear(struct page *page) {} -+#define blkback_pagemap_read(_page) ({ BUG(); (struct blkback_pagemap){0}; }) -+static inline int blkback_pagemap_contains_page(struct page *page) { return 0; } -+ -+#endif /* CONFIG_XEN_BLKBACK_PAGEMAP */ -+ -+#endif -diff --git a/drivers/xen/blkback/blkback.c b/drivers/xen/blkback/blkback.c -new file mode 100644 -index 0000000..0bef445 ---- /dev/null -+++ b/drivers/xen/blkback/blkback.c -@@ -0,0 +1,675 @@ -+/****************************************************************************** -+ * arch/xen/drivers/blkif/backend/main.c -+ * -+ * Back-end of the driver for virtual block devices. This portion of the -+ * driver exports a 'unified' block-device interface that can be accessed -+ * by any operating system that implements a compatible front end. A -+ * reference front-end implementation can be found in: -+ * arch/xen/drivers/blkif/frontend -+ * -+ * Copyright (c) 2003-2004, Keir Fraser & Steve Hand -+ * Copyright (c) 2005, Christopher Clark -+ * -+ * This program is free software; you can redistribute it and/or -+ * modify it under the terms of the GNU General Public License version 2 -+ * as published by the Free Software Foundation; or, when distributed -+ * separately from the Linux kernel or incorporated into other -+ * software packages, subject to the following license: -+ * -+ * Permission is hereby granted, free of charge, to any person obtaining a copy -+ * of this source file (the "Software"), to deal in the Software without -+ * restriction, including without limitation the rights to use, copy, modify, -+ * merge, publish, distribute, sublicense, and/or sell copies of the Software, -+ * and to permit persons to whom the Software is furnished to do so, subject to -+ * the following conditions: -+ * -+ * The above copyright notice and this permission notice shall be included in -+ * all copies or substantial portions of the Software. -+ * -+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS -+ * IN THE SOFTWARE. -+ */ -+ -+#include <linux/spinlock.h> -+#include <linux/kthread.h> -+#include <linux/list.h> -+#include <linux/delay.h> -+#include <linux/freezer.h> -+ -+#include <xen/balloon.h> -+#include <xen/events.h> -+#include <xen/page.h> -+#include <asm/xen/hypervisor.h> -+#include <asm/xen/hypercall.h> -+#include "common.h" -+ -+/* -+ * These are rather arbitrary. They are fairly large because adjacent requests -+ * pulled from a communication ring are quite likely to end up being part of -+ * the same scatter/gather request at the disc. -+ * -+ * ** TRY INCREASING 'blkif_reqs' IF WRITE SPEEDS SEEM TOO LOW ** -+ * -+ * This will increase the chances of being able to write whole tracks. -+ * 64 should be enough to keep us competitive with Linux. -+ */ -+static int blkif_reqs = 64; -+module_param_named(reqs, blkif_reqs, int, 0); -+MODULE_PARM_DESC(reqs, "Number of blkback requests to allocate"); -+ -+/* Run-time switchable: /sys/module/blkback/parameters/ */ -+static unsigned int log_stats = 0; -+static unsigned int debug_lvl = 0; -+module_param(log_stats, int, 0644); -+module_param(debug_lvl, int, 0644); -+ -+/* -+ * Each outstanding request that we've passed to the lower device layers has a -+ * 'pending_req' allocated to it. Each buffer_head that completes decrements -+ * the pendcnt towards zero. When it hits zero, the specified domain has a -+ * response queued for it, with the saved 'id' passed back. -+ */ -+typedef struct { -+ blkif_t *blkif; -+ u64 id; -+ int nr_pages; -+ atomic_t pendcnt; -+ unsigned short operation; -+ int status; -+ struct list_head free_list; -+} pending_req_t; -+ -+static pending_req_t *pending_reqs; -+static struct list_head pending_free; -+static DEFINE_SPINLOCK(pending_free_lock); -+static DECLARE_WAIT_QUEUE_HEAD(pending_free_wq); -+ -+#define BLKBACK_INVALID_HANDLE (~0) -+ -+static struct page **pending_pages; -+static grant_handle_t *pending_grant_handles; -+ -+static inline int vaddr_pagenr(pending_req_t *req, int seg) -+{ -+ return (req - pending_reqs) * BLKIF_MAX_SEGMENTS_PER_REQUEST + seg; -+} -+ -+#define pending_page(req, seg) pending_pages[vaddr_pagenr(req, seg)] -+ -+static inline unsigned long vaddr(pending_req_t *req, int seg) -+{ -+ unsigned long pfn = page_to_pfn(pending_page(req, seg)); -+ return (unsigned long)pfn_to_kaddr(pfn); -+} -+ -+#define pending_handle(_req, _seg) \ -+ (pending_grant_handles[vaddr_pagenr(_req, _seg)]) -+ -+ -+static int do_block_io_op(blkif_t *blkif); -+static void dispatch_rw_block_io(blkif_t *blkif, -+ struct blkif_request *req, -+ pending_req_t *pending_req); -+static void make_response(blkif_t *blkif, u64 id, -+ unsigned short op, int st); -+ -+/****************************************************************** -+ * misc small helpers -+ */ -+static pending_req_t* alloc_req(void) -+{ -+ pending_req_t *req = NULL; -+ unsigned long flags; -+ -+ spin_lock_irqsave(&pending_free_lock, flags); -+ if (!list_empty(&pending_free)) { -+ req = list_entry(pending_free.next, pending_req_t, free_list); -+ list_del(&req->free_list); -+ } -+ spin_unlock_irqrestore(&pending_free_lock, flags); -+ return req; -+} -+ -+static void free_req(pending_req_t *req) -+{ -+ unsigned long flags; -+ int was_empty; -+ -+ spin_lock_irqsave(&pending_free_lock, flags); -+ was_empty = list_empty(&pending_free); -+ list_add(&req->free_list, &pending_free); -+ spin_unlock_irqrestore(&pending_free_lock, flags); -+ if (was_empty) -+ wake_up(&pending_free_wq); -+} -+ -+static void unplug_queue(blkif_t *blkif) -+{ -+ if (blkif->plug == NULL) -+ return; -+ if (blkif->plug->unplug_fn) -+ blkif->plug->unplug_fn(blkif->plug); -+ blk_put_queue(blkif->plug); -+ blkif->plug = NULL; -+} -+ -+static void plug_queue(blkif_t *blkif, struct block_device *bdev) -+{ -+ struct request_queue *q = bdev_get_queue(bdev); -+ -+ if (q == blkif->plug) -+ return; -+ unplug_queue(blkif); -+ blk_get_queue(q); -+ blkif->plug = q; -+} -+ -+static void fast_flush_area(pending_req_t *req) -+{ -+ struct gnttab_unmap_grant_ref unmap[BLKIF_MAX_SEGMENTS_PER_REQUEST]; -+ unsigned int i, invcount = 0; -+ grant_handle_t handle; -+ int ret; -+ -+ for (i = 0; i < req->nr_pages; i++) { -+ handle = pending_handle(req, i); -+ if (handle == BLKBACK_INVALID_HANDLE) -+ continue; -+ blkback_pagemap_clear(pending_page(req, i)); -+ gnttab_set_unmap_op(&unmap[invcount], vaddr(req, i), -+ GNTMAP_host_map, handle); -+ pending_handle(req, i) = BLKBACK_INVALID_HANDLE; -+ invcount++; -+ } -+ -+ ret = HYPERVISOR_grant_table_op( -+ GNTTABOP_unmap_grant_ref, unmap, invcount); -+ BUG_ON(ret); -+} -+ -+/****************************************************************** -+ * SCHEDULER FUNCTIONS -+ */ -+ -+static void print_stats(blkif_t *blkif) -+{ -+ printk(KERN_DEBUG "%s: oo %3d | rd %4d | wr %4d | br %4d\n", -+ current->comm, blkif->st_oo_req, -+ blkif->st_rd_req, blkif->st_wr_req, blkif->st_br_req); -+ blkif->st_print = jiffies + msecs_to_jiffies(10 * 1000); -+ blkif->st_rd_req = 0; -+ blkif->st_wr_req = 0; -+ blkif->st_oo_req = 0; -+} -+ -+int blkif_schedule(void *arg) -+{ -+ blkif_t *blkif = arg; -+ struct vbd *vbd = &blkif->vbd; -+ -+ blkif_get(blkif); -+ -+ if (debug_lvl) -+ printk(KERN_DEBUG "%s: started\n", current->comm); -+ -+ while (!kthread_should_stop()) { -+ if (try_to_freeze()) -+ continue; -+ if (unlikely(vbd->size != vbd_size(vbd))) -+ vbd_resize(blkif); -+ -+ wait_event_interruptible( -+ blkif->wq, -+ blkif->waiting_reqs || kthread_should_stop()); -+ wait_event_interruptible( -+ pending_free_wq, -+ !list_empty(&pending_free) || kthread_should_stop()); -+ -+ blkif->waiting_reqs = 0; -+ smp_mb(); /* clear flag *before* checking for work */ -+ -+ if (do_block_io_op(blkif)) -+ blkif->waiting_reqs = 1; -+ unplug_queue(blkif); -+ -+ if (log_stats && time_after(jiffies, blkif->st_print)) -+ print_stats(blkif); -+ } -+ -+ if (log_stats) -+ print_stats(blkif); -+ if (debug_lvl) -+ printk(KERN_DEBUG "%s: exiting\n", current->comm); -+ -+ blkif->xenblkd = NULL; -+ blkif_put(blkif); -+ -+ return 0; -+} -+ -+/****************************************************************** -+ * COMPLETION CALLBACK -- Called as bh->b_end_io() -+ */ -+ -+static void __end_block_io_op(pending_req_t *pending_req, int error) -+{ -+ /* An error fails the entire request. */ -+ if ((pending_req->operation == BLKIF_OP_WRITE_BARRIER) && -+ (error == -EOPNOTSUPP)) { -+ DPRINTK("blkback: write barrier op failed, not supported\n"); -+ blkback_barrier(XBT_NIL, pending_req->blkif->be, 0); -+ pending_req->status = BLKIF_RSP_EOPNOTSUPP; -+ } else if (error) { -+ DPRINTK("Buffer not up-to-date at end of operation, " -+ "error=%d\n", error); -+ pending_req->status = BLKIF_RSP_ERROR; -+ } -+ -+ if (atomic_dec_and_test(&pending_req->pendcnt)) { -+ fast_flush_area(pending_req); -+ make_response(pending_req->blkif, pending_req->id, -+ pending_req->operation, pending_req->status); -+ blkif_put(pending_req->blkif); -+ free_req(pending_req); -+ } -+} -+ -+static void end_block_io_op(struct bio *bio, int error) -+{ -+ __end_block_io_op(bio->bi_private, error); -+ bio_put(bio); -+} -+ -+ -+/****************************************************************************** -+ * NOTIFICATION FROM GUEST OS. -+ */ -+ -+static void blkif_notify_work(blkif_t *blkif) -+{ -+ blkif->waiting_reqs = 1; -+ wake_up(&blkif->wq); -+} -+ -+irqreturn_t blkif_be_int(int irq, void *dev_id) -+{ -+ blkif_notify_work(dev_id); -+ return IRQ_HANDLED; -+} -+ -+ -+ -+/****************************************************************** -+ * DOWNWARD CALLS -- These interface with the block-device layer proper. -+ */ -+ -+static int do_block_io_op(blkif_t *blkif) -+{ -+ union blkif_back_rings *blk_rings = &blkif->blk_rings; -+ struct blkif_request req; -+ pending_req_t *pending_req; -+ RING_IDX rc, rp; -+ int more_to_do = 0; -+ -+ rc = blk_rings->common.req_cons; -+ rp = blk_rings->common.sring->req_prod; -+ rmb(); /* Ensure we see queued requests up to 'rp'. */ -+ -+ while (rc != rp) { -+ -+ if (RING_REQUEST_CONS_OVERFLOW(&blk_rings->common, rc)) -+ break; -+ -+ if (kthread_should_stop()) { -+ more_to_do = 1; -+ break; -+ } -+ -+ pending_req = alloc_req(); -+ if (NULL == pending_req) { -+ blkif->st_oo_req++; -+ more_to_do = 1; -+ break; -+ } -+ -+ switch (blkif->blk_protocol) { -+ case BLKIF_PROTOCOL_NATIVE: -+ memcpy(&req, RING_GET_REQUEST(&blk_rings->native, rc), sizeof(req)); -+ break; -+ case BLKIF_PROTOCOL_X86_32: -+ blkif_get_x86_32_req(&req, RING_GET_REQUEST(&blk_rings->x86_32, rc)); -+ break; -+ case BLKIF_PROTOCOL_X86_64: -+ blkif_get_x86_64_req(&req, RING_GET_REQUEST(&blk_rings->x86_64, rc)); -+ break; -+ default: -+ BUG(); -+ } -+ blk_rings->common.req_cons = ++rc; /* before make_response() */ -+ -+ /* Apply all sanity checks to /private copy/ of request. */ -+ barrier(); -+ -+ switch (req.operation) { -+ case BLKIF_OP_READ: -+ blkif->st_rd_req++; -+ dispatch_rw_block_io(blkif, &req, pending_req); -+ break; -+ case BLKIF_OP_WRITE_BARRIER: -+ blkif->st_br_req++; -+ /* fall through */ -+ case BLKIF_OP_WRITE: -+ blkif->st_wr_req++; -+ dispatch_rw_block_io(blkif, &req, pending_req); -+ break; -+ default: -+ /* A good sign something is wrong: sleep for a while to -+ * avoid excessive CPU consumption by a bad guest. */ -+ msleep(1); -+ DPRINTK("error: unknown block io operation [%d]\n", -+ req.operation); -+ make_response(blkif, req.id, req.operation, -+ BLKIF_RSP_ERROR); -+ free_req(pending_req); -+ break; -+ } -+ -+ /* Yield point for this unbounded loop. */ -+ cond_resched(); -+ } -+ -+ return more_to_do; -+} -+ -+static void dispatch_rw_block_io(blkif_t *blkif, -+ struct blkif_request *req, -+ pending_req_t *pending_req) -+{ -+ struct gnttab_map_grant_ref map[BLKIF_MAX_SEGMENTS_PER_REQUEST]; -+ struct phys_req preq; -+ struct { -+ unsigned long buf; unsigned int nsec; -+ } seg[BLKIF_MAX_SEGMENTS_PER_REQUEST]; -+ unsigned int nseg; -+ struct bio *bio = NULL; -+ int ret, i; -+ int operation; -+ -+ switch (req->operation) { -+ case BLKIF_OP_READ: -+ operation = READ; -+ break; -+ case BLKIF_OP_WRITE: -+ operation = WRITE; -+ break; -+ case BLKIF_OP_WRITE_BARRIER: -+ operation = WRITE_BARRIER; -+ break; -+ default: -+ operation = 0; /* make gcc happy */ -+ BUG(); -+ } -+ -+ /* Check that number of segments is sane. */ -+ nseg = req->nr_segments; -+ if (unlikely(nseg == 0 && operation != WRITE_BARRIER) || -+ unlikely(nseg > BLKIF_MAX_SEGMENTS_PER_REQUEST)) { -+ DPRINTK("Bad number of segments in request (%d)\n", nseg); -+ goto fail_response; -+ } -+ -+ preq.dev = req->handle; -+ preq.sector_number = req->sector_number; -+ preq.nr_sects = 0; -+ -+ pending_req->blkif = blkif; -+ pending_req->id = req->id; -+ pending_req->operation = req->operation; -+ pending_req->status = BLKIF_RSP_OKAY; -+ pending_req->nr_pages = nseg; -+ -+ for (i = 0; i < nseg; i++) { -+ uint32_t flags; -+ -+ seg[i].nsec = req->seg[i].last_sect - -+ req->seg[i].first_sect + 1; -+ -+ if ((req->seg[i].last_sect >= (PAGE_SIZE >> 9)) || -+ (req->seg[i].last_sect < req->seg[i].first_sect)) -+ goto fail_response; -+ preq.nr_sects += seg[i].nsec; -+ -+ flags = GNTMAP_host_map; -+ if (operation != READ) -+ flags |= GNTMAP_readonly; -+ gnttab_set_map_op(&map[i], vaddr(pending_req, i), flags, -+ req->seg[i].gref, blkif->domid); -+ } -+ -+ ret = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, map, nseg); -+ BUG_ON(ret); -+ -+ for (i = 0; i < nseg; i++) { -+ if (unlikely(map[i].status != 0)) { -+ DPRINTK("invalid buffer -- could not remap it\n"); -+ map[i].handle = BLKBACK_INVALID_HANDLE; -+ ret |= 1; -+ continue; -+ } -+ -+ set_phys_to_machine( -+ page_to_pfn(pending_page(pending_req, i)), -+ FOREIGN_FRAME(map[i].dev_bus_addr >> PAGE_SHIFT)); -+ seg[i].buf = map[i].dev_bus_addr | -+ (req->seg[i].first_sect << 9); -+ blkback_pagemap_set(vaddr_pagenr(pending_req, i), -+ pending_page(pending_req, i), -+ blkif->domid, req->handle, -+ req->seg[i].gref); -+ pending_handle(pending_req, i) = map[i].handle; -+ } -+ -+ if (ret) -+ goto fail_flush; -+ -+ if (vbd_translate(&preq, blkif, operation) != 0) { -+ DPRINTK("access denied: %s of [%llu,%llu] on dev=%04x\n", -+ operation == READ ? "read" : "write", -+ preq.sector_number, -+ preq.sector_number + preq.nr_sects, preq.dev); -+ goto fail_flush; -+ } -+ -+ plug_queue(blkif, preq.bdev); -+ atomic_set(&pending_req->pendcnt, 1); -+ blkif_get(blkif); -+ -+ for (i = 0; i < nseg; i++) { -+ if (((int)preq.sector_number|(int)seg[i].nsec) & -+ ((bdev_logical_block_size(preq.bdev) >> 9) - 1)) { -+ DPRINTK("Misaligned I/O request from domain %d", -+ blkif->domid); -+ goto fail_put_bio; -+ } -+ -+ while ((bio == NULL) || -+ (bio_add_page(bio, -+ pending_page(pending_req, i), -+ seg[i].nsec << 9, -+ seg[i].buf & ~PAGE_MASK) == 0)) { -+ if (bio) { -+ atomic_inc(&pending_req->pendcnt); -+ submit_bio(operation, bio); -+ } -+ -+ bio = bio_alloc(GFP_KERNEL, nseg-i); -+ if (unlikely(bio == NULL)) -+ goto fail_put_bio; -+ -+ bio->bi_bdev = preq.bdev; -+ bio->bi_private = pending_req; -+ bio->bi_end_io = end_block_io_op; -+ bio->bi_sector = preq.sector_number; -+ } -+ -+ preq.sector_number += seg[i].nsec; -+ } -+ -+ if (!bio) { -+ BUG_ON(operation != WRITE_BARRIER); -+ bio = bio_alloc(GFP_KERNEL, 0); -+ if (unlikely(bio == NULL)) -+ goto fail_put_bio; -+ -+ bio->bi_bdev = preq.bdev; -+ bio->bi_private = pending_req; -+ bio->bi_end_io = end_block_io_op; -+ bio->bi_sector = -1; -+ } -+ -+ submit_bio(operation, bio); -+ -+ if (operation == READ) -+ blkif->st_rd_sect += preq.nr_sects; -+ else if (operation == WRITE || operation == WRITE_BARRIER) -+ blkif->st_wr_sect += preq.nr_sects; -+ -+ return; -+ -+ fail_flush: -+ fast_flush_area(pending_req); -+ fail_response: -+ make_response(blkif, req->id, req->operation, BLKIF_RSP_ERROR); -+ free_req(pending_req); -+ msleep(1); /* back off a bit */ -+ return; -+ -+ fail_put_bio: -+ __end_block_io_op(pending_req, -EINVAL); -+ if (bio) -+ bio_put(bio); -+ unplug_queue(blkif); -+ msleep(1); /* back off a bit */ -+ return; -+} -+ -+ -+ -+/****************************************************************** -+ * MISCELLANEOUS SETUP / TEARDOWN / DEBUGGING -+ */ -+ -+ -+static void make_response(blkif_t *blkif, u64 id, -+ unsigned short op, int st) -+{ -+ struct blkif_response resp; -+ unsigned long flags; -+ union blkif_back_rings *blk_rings = &blkif->blk_rings; -+ int more_to_do = 0; -+ int notify; -+ -+ resp.id = id; -+ resp.operation = op; -+ resp.status = st; -+ -+ spin_lock_irqsave(&blkif->blk_ring_lock, flags); -+ /* Place on the response ring for the relevant domain. */ -+ switch (blkif->blk_protocol) { -+ case BLKIF_PROTOCOL_NATIVE: -+ memcpy(RING_GET_RESPONSE(&blk_rings->native, blk_rings->native.rsp_prod_pvt), -+ &resp, sizeof(resp)); -+ break; -+ case BLKIF_PROTOCOL_X86_32: -+ memcpy(RING_GET_RESPONSE(&blk_rings->x86_32, blk_rings->x86_32.rsp_prod_pvt), -+ &resp, sizeof(resp)); -+ break; -+ case BLKIF_PROTOCOL_X86_64: -+ memcpy(RING_GET_RESPONSE(&blk_rings->x86_64, blk_rings->x86_64.rsp_prod_pvt), -+ &resp, sizeof(resp)); -+ break; -+ default: -+ BUG(); -+ } -+ blk_rings->common.rsp_prod_pvt++; -+ RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&blk_rings->common, notify); -+ if (blk_rings->common.rsp_prod_pvt == blk_rings->common.req_cons) { -+ /* -+ * Tail check for pending requests. Allows frontend to avoid -+ * notifications if requests are already in flight (lower -+ * overheads and promotes batching). -+ */ -+ RING_FINAL_CHECK_FOR_REQUESTS(&blk_rings->common, more_to_do); -+ -+ } else if (RING_HAS_UNCONSUMED_REQUESTS(&blk_rings->common)) { -+ more_to_do = 1; -+ } -+ -+ spin_unlock_irqrestore(&blkif->blk_ring_lock, flags); -+ -+ if (more_to_do) -+ blkif_notify_work(blkif); -+ if (notify) -+ notify_remote_via_irq(blkif->irq); -+} -+ -+static int __init blkif_init(void) -+{ -+ int i, mmap_pages; -+ int rc = 0; -+ -+ if (!xen_pv_domain()) -+ return -ENODEV; -+ -+ mmap_pages = blkif_reqs * BLKIF_MAX_SEGMENTS_PER_REQUEST; -+ -+ pending_reqs = kmalloc(sizeof(pending_reqs[0]) * -+ blkif_reqs, GFP_KERNEL); -+ pending_grant_handles = kmalloc(sizeof(pending_grant_handles[0]) * -+ mmap_pages, GFP_KERNEL); -+ pending_pages = alloc_empty_pages_and_pagevec(mmap_pages); -+ -+ if (blkback_pagemap_init(mmap_pages)) -+ goto out_of_memory; -+ -+ if (!pending_reqs || !pending_grant_handles || !pending_pages) { -+ rc = -ENOMEM; -+ goto out_of_memory; -+ } -+ -+ for (i = 0; i < mmap_pages; i++) -+ pending_grant_handles[i] = BLKBACK_INVALID_HANDLE; -+ -+ rc = blkif_interface_init(); -+ if (rc) -+ goto failed_init; -+ -+ memset(pending_reqs, 0, sizeof(pending_reqs)); -+ INIT_LIST_HEAD(&pending_free); -+ -+ for (i = 0; i < blkif_reqs; i++) -+ list_add_tail(&pending_reqs[i].free_list, &pending_free); -+ -+ rc = blkif_xenbus_init(); -+ if (rc) -+ goto failed_init; -+ -+ return 0; -+ -+ out_of_memory: -+ printk(KERN_ERR "%s: out of memory\n", __func__); -+ failed_init: -+ kfree(pending_reqs); -+ kfree(pending_grant_handles); -+ free_empty_pages_and_pagevec(pending_pages, mmap_pages); -+ return rc; -+} -+ -+module_init(blkif_init); -+ -+MODULE_LICENSE("Dual BSD/GPL"); -diff --git a/drivers/xen/blkback/common.h b/drivers/xen/blkback/common.h -new file mode 100644 -index 0000000..531ba81 ---- /dev/null -+++ b/drivers/xen/blkback/common.h -@@ -0,0 +1,143 @@ -+/* -+ * This program is free software; you can redistribute it and/or -+ * modify it under the terms of the GNU General Public License version 2 -+ * as published by the Free Software Foundation; or, when distributed -+ * separately from the Linux kernel or incorporated into other -+ * software packages, subject to the following license: -+ * -+ * Permission is hereby granted, free of charge, to any person obtaining a copy -+ * of this source file (the "Software"), to deal in the Software without -+ * restriction, including without limitation the rights to use, copy, modify, -+ * merge, publish, distribute, sublicense, and/or sell copies of the Software, -+ * and to permit persons to whom the Software is furnished to do so, subject to -+ * the following conditions: -+ * -+ * The above copyright notice and this permission notice shall be included in -+ * all copies or substantial portions of the Software. -+ * -+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS -+ * IN THE SOFTWARE. -+ */ -+ -+#ifndef __BLKIF__BACKEND__COMMON_H__ -+#define __BLKIF__BACKEND__COMMON_H__ -+ -+#include <linux/version.h> -+#include <linux/module.h> -+#include <linux/interrupt.h> -+#include <linux/slab.h> -+#include <linux/blkdev.h> -+#include <linux/vmalloc.h> -+#include <linux/wait.h> -+#include <asm/io.h> -+#include <asm/setup.h> -+#include <asm/pgalloc.h> -+#include <asm/hypervisor.h> -+#include <xen/blkif.h> -+#include <xen/grant_table.h> -+#include <xen/xenbus.h> -+#include "blkback-pagemap.h" -+ -+ -+#define DPRINTK(_f, _a...) \ -+ pr_debug("(file=%s, line=%d) " _f, \ -+ __FILE__ , __LINE__ , ## _a ) -+ -+struct vbd { -+ blkif_vdev_t handle; /* what the domain refers to this vbd as */ -+ unsigned char readonly; /* Non-zero -> read-only */ -+ unsigned char type; /* VDISK_xxx */ -+ u32 pdevice; /* phys device that this vbd maps to */ -+ struct block_device *bdev; -+ sector_t size; /* Cached size parameter */ -+}; -+ -+struct backend_info; -+ -+typedef struct blkif_st { -+ /* Unique identifier for this interface. */ -+ domid_t domid; -+ unsigned int handle; -+ /* Physical parameters of the comms window. */ -+ unsigned int irq; -+ /* Comms information. */ -+ enum blkif_protocol blk_protocol; -+ union blkif_back_rings blk_rings; -+ struct vm_struct *blk_ring_area; -+ /* The VBD attached to this interface. */ -+ struct vbd vbd; -+ /* Back pointer to the backend_info. */ -+ struct backend_info *be; -+ /* Private fields. */ -+ spinlock_t blk_ring_lock; -+ atomic_t refcnt; -+ -+ wait_queue_head_t wq; -+ struct task_struct *xenblkd; -+ unsigned int waiting_reqs; -+ struct request_queue *plug; -+ -+ /* statistics */ -+ unsigned long st_print; -+ int st_rd_req; -+ int st_wr_req; -+ int st_oo_req; -+ int st_br_req; -+ int st_rd_sect; -+ int st_wr_sect; -+ -+ wait_queue_head_t waiting_to_free; -+ -+ grant_handle_t shmem_handle; -+ grant_ref_t shmem_ref; -+} blkif_t; -+ -+blkif_t *blkif_alloc(domid_t domid); -+void blkif_disconnect(blkif_t *blkif); -+void blkif_free(blkif_t *blkif); -+int blkif_map(blkif_t *blkif, unsigned long shared_page, unsigned int evtchn); -+void vbd_resize(blkif_t *blkif); -+ -+#define blkif_get(_b) (atomic_inc(&(_b)->refcnt)) -+#define blkif_put(_b) \ -+ do { \ -+ if (atomic_dec_and_test(&(_b)->refcnt)) \ -+ wake_up(&(_b)->waiting_to_free);\ -+ } while (0) -+ -+/* Create a vbd. */ -+int vbd_create(blkif_t *blkif, blkif_vdev_t vdevice, unsigned major, -+ unsigned minor, int readonly, int cdrom); -+void vbd_free(struct vbd *vbd); -+ -+unsigned long long vbd_size(struct vbd *vbd); -+unsigned int vbd_info(struct vbd *vbd); -+unsigned long vbd_secsize(struct vbd *vbd); -+ -+struct phys_req { -+ unsigned short dev; -+ unsigned short nr_sects; -+ struct block_device *bdev; -+ blkif_sector_t sector_number; -+}; -+ -+int vbd_translate(struct phys_req *req, blkif_t *blkif, int operation); -+ -+int blkif_interface_init(void); -+ -+int blkif_xenbus_init(void); -+ -+irqreturn_t blkif_be_int(int irq, void *dev_id); -+int blkif_schedule(void *arg); -+ -+int blkback_barrier(struct xenbus_transaction xbt, -+ struct backend_info *be, int state); -+ -+struct xenbus_device *blkback_xenbus(struct backend_info *be); -+ -+#endif /* __BLKIF__BACKEND__COMMON_H__ */ -diff --git a/drivers/xen/blkback/interface.c b/drivers/xen/blkback/interface.c -new file mode 100644 -index 0000000..e397a41 ---- /dev/null -+++ b/drivers/xen/blkback/interface.c -@@ -0,0 +1,186 @@ -+/****************************************************************************** -+ * arch/xen/drivers/blkif/backend/interface.c -+ * -+ * Block-device interface management. -+ * -+ * Copyright (c) 2004, Keir Fraser -+ * -+ * This program is free software; you can redistribute it and/or -+ * modify it under the terms of the GNU General Public License version 2 -+ * as published by the Free Software Foundation; or, when distributed -+ * separately from the Linux kernel or incorporated into other -+ * software packages, subject to the following license: -+ * -+ * Permission is hereby granted, free of charge, to any person obtaining a copy -+ * of this source file (the "Software"), to deal in the Software without -+ * restriction, including without limitation the rights to use, copy, modify, -+ * merge, publish, distribute, sublicense, and/or sell copies of the Software, -+ * and to permit persons to whom the Software is furnished to do so, subject to -+ * the following conditions: -+ * -+ * The above copyright notice and this permission notice shall be included in -+ * all copies or substantial portions of the Software. -+ * -+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS -+ * IN THE SOFTWARE. -+ */ -+ -+#include "common.h" -+#include <xen/events.h> -+#include <xen/grant_table.h> -+#include <linux/kthread.h> -+ -+static struct kmem_cache *blkif_cachep; -+ -+blkif_t *blkif_alloc(domid_t domid) -+{ -+ blkif_t *blkif; -+ -+ blkif = kmem_cache_alloc(blkif_cachep, GFP_KERNEL); -+ if (!blkif) -+ return ERR_PTR(-ENOMEM); -+ -+ memset(blkif, 0, sizeof(*blkif)); -+ blkif->domid = domid; -+ spin_lock_init(&blkif->blk_ring_lock); -+ atomic_set(&blkif->refcnt, 1); -+ init_waitqueue_head(&blkif->wq); -+ blkif->st_print = jiffies; -+ init_waitqueue_head(&blkif->waiting_to_free); -+ -+ return blkif; -+} -+ -+static int map_frontend_page(blkif_t *blkif, unsigned long shared_page) -+{ -+ struct gnttab_map_grant_ref op; -+ -+ gnttab_set_map_op(&op, (unsigned long)blkif->blk_ring_area->addr, -+ GNTMAP_host_map, shared_page, blkif->domid); -+ -+ if (HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1)) -+ BUG(); -+ -+ if (op.status) { -+ DPRINTK(" Grant table operation failure !\n"); -+ return op.status; -+ } -+ -+ blkif->shmem_ref = shared_page; -+ blkif->shmem_handle = op.handle; -+ -+ return 0; -+} -+ -+static void unmap_frontend_page(blkif_t *blkif) -+{ -+ struct gnttab_unmap_grant_ref op; -+ -+ gnttab_set_unmap_op(&op, (unsigned long)blkif->blk_ring_area->addr, -+ GNTMAP_host_map, blkif->shmem_handle); -+ -+ if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1)) -+ BUG(); -+} -+ -+int blkif_map(blkif_t *blkif, unsigned long shared_page, unsigned int evtchn) -+{ -+ int err; -+ -+ /* Already connected through? */ -+ if (blkif->irq) -+ return 0; -+ -+ if ( (blkif->blk_ring_area = alloc_vm_area(PAGE_SIZE)) == NULL ) -+ return -ENOMEM; -+ -+ err = map_frontend_page(blkif, shared_page); -+ if (err) { -+ free_vm_area(blkif->blk_ring_area); -+ return err; -+ } -+ -+ switch (blkif->blk_protocol) { -+ case BLKIF_PROTOCOL_NATIVE: -+ { -+ struct blkif_sring *sring; -+ sring = (struct blkif_sring *)blkif->blk_ring_area->addr; -+ BACK_RING_INIT(&blkif->blk_rings.native, sring, PAGE_SIZE); -+ break; -+ } -+ case BLKIF_PROTOCOL_X86_32: -+ { -+ struct blkif_x86_32_sring *sring_x86_32; -+ sring_x86_32 = (struct blkif_x86_32_sring *)blkif->blk_ring_area->addr; -+ BACK_RING_INIT(&blkif->blk_rings.x86_32, sring_x86_32, PAGE_SIZE); -+ break; -+ } -+ case BLKIF_PROTOCOL_X86_64: -+ { -+ struct blkif_x86_64_sring *sring_x86_64; -+ sring_x86_64 = (struct blkif_x86_64_sring *)blkif->blk_ring_area->addr; -+ BACK_RING_INIT(&blkif->blk_rings.x86_64, sring_x86_64, PAGE_SIZE); -+ break; -+ } -+ default: -+ BUG(); -+ } -+ -+ err = bind_interdomain_evtchn_to_irqhandler( -+ blkif->domid, evtchn, blkif_be_int, 0, "blkif-backend", blkif); -+ if (err < 0) -+ { -+ unmap_frontend_page(blkif); -+ free_vm_area(blkif->blk_ring_area); -+ blkif->blk_rings.common.sring = NULL; -+ return err; -+ } -+ blkif->irq = err; -+ -+ return 0; -+} -+ -+void blkif_disconnect(blkif_t *blkif) -+{ -+ if (blkif->xenblkd) { -+ kthread_stop(blkif->xenblkd); -+ blkif->xenblkd = NULL; -+ } -+ -+ atomic_dec(&blkif->refcnt); -+ wait_event(blkif->waiting_to_free, atomic_read(&blkif->refcnt) == 0); -+ atomic_inc(&blkif->refcnt); -+ -+ if (blkif->irq) { -+ unbind_from_irqhandler(blkif->irq, blkif); -+ blkif->irq = 0; -+ } -+ -+ if (blkif->blk_rings.common.sring) { -+ unmap_frontend_page(blkif); -+ free_vm_area(blkif->blk_ring_area); -+ blkif->blk_rings.common.sring = NULL; -+ } -+} -+ -+void blkif_free(blkif_t *blkif) -+{ -+ if (!atomic_dec_and_test(&blkif->refcnt)) -+ BUG(); -+ kmem_cache_free(blkif_cachep, blkif); -+} -+ -+int __init blkif_interface_init(void) -+{ -+ blkif_cachep = kmem_cache_create("blkif_cache", sizeof(blkif_t), -+ 0, 0, NULL); -+ if (!blkif_cachep) -+ return -ENOMEM; -+ -+ return 0; -+} -diff --git a/drivers/xen/blkback/vbd.c b/drivers/xen/blkback/vbd.c -new file mode 100644 -index 0000000..943ec23 ---- /dev/null -+++ b/drivers/xen/blkback/vbd.c -@@ -0,0 +1,161 @@ -+/****************************************************************************** -+ * blkback/vbd.c -+ * -+ * Routines for managing virtual block devices (VBDs). -+ * -+ * Copyright (c) 2003-2005, Keir Fraser & Steve Hand -+ * -+ * This program is free software; you can redistribute it and/or -+ * modify it under the terms of the GNU General Public License version 2 -+ * as published by the Free Software Foundation; or, when distributed -+ * separately from the Linux kernel or incorporated into other -+ * software packages, subject to the following license: -+ * -+ * Permission is hereby granted, free of charge, to any person obtaining a copy -+ * of this source file (the "Software"), to deal in the Software without -+ * restriction, including without limitation the rights to use, copy, modify, -+ * merge, publish, distribute, sublicense, and/or sell copies of the Software, -+ * and to permit persons to whom the Software is furnished to do so, subject to -+ * the following conditions: -+ * -+ * The above copyright notice and this permission notice shall be included in -+ * all copies or substantial portions of the Software. -+ * -+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS -+ * IN THE SOFTWARE. -+ */ -+ -+#include "common.h" -+ -+#define vbd_sz(_v) ((_v)->bdev->bd_part ? \ -+ (_v)->bdev->bd_part->nr_sects : get_capacity((_v)->bdev->bd_disk)) -+ -+unsigned long long vbd_size(struct vbd *vbd) -+{ -+ return vbd_sz(vbd); -+} -+ -+unsigned int vbd_info(struct vbd *vbd) -+{ -+ return vbd->type | (vbd->readonly?VDISK_READONLY:0); -+} -+ -+unsigned long vbd_secsize(struct vbd *vbd) -+{ -+ return bdev_logical_block_size(vbd->bdev); -+} -+ -+int vbd_create(blkif_t *blkif, blkif_vdev_t handle, unsigned major, -+ unsigned minor, int readonly, int cdrom) -+{ -+ struct vbd *vbd; -+ struct block_device *bdev; -+ -+ vbd = &blkif->vbd; -+ vbd->handle = handle; -+ vbd->readonly = readonly; -+ vbd->type = 0; -+ -+ vbd->pdevice = MKDEV(major, minor); -+ -+ bdev = open_by_devnum(vbd->pdevice, -+ vbd->readonly ? FMODE_READ : FMODE_WRITE); -+ -+ if (IS_ERR(bdev)) { -+ DPRINTK("vbd_creat: device %08x could not be opened.\n", -+ vbd->pdevice); -+ return -ENOENT; -+ } -+ -+ vbd->bdev = bdev; -+ vbd->size = vbd_size(vbd); -+ -+ if (vbd->bdev->bd_disk == NULL) { -+ DPRINTK("vbd_creat: device %08x doesn't exist.\n", -+ vbd->pdevice); -+ vbd_free(vbd); -+ return -ENOENT; -+ } -+ -+ if (vbd->bdev->bd_disk->flags & GENHD_FL_CD || cdrom) -+ vbd->type |= VDISK_CDROM; -+ if (vbd->bdev->bd_disk->flags & GENHD_FL_REMOVABLE) -+ vbd->type |= VDISK_REMOVABLE; -+ -+ DPRINTK("Successful creation of handle=%04x (dom=%u)\n", -+ handle, blkif->domid); -+ return 0; -+} -+ -+void vbd_free(struct vbd *vbd) -+{ -+ if (vbd->bdev) -+ blkdev_put(vbd->bdev, vbd->readonly ? FMODE_READ : FMODE_WRITE); -+ vbd->bdev = NULL; -+} -+ -+int vbd_translate(struct phys_req *req, blkif_t *blkif, int operation) -+{ -+ struct vbd *vbd = &blkif->vbd; -+ int rc = -EACCES; -+ -+ if ((operation != READ) && vbd->readonly) -+ goto out; -+ -+ if (unlikely((req->sector_number + req->nr_sects) > vbd_sz(vbd))) -+ goto out; -+ -+ req->dev = vbd->pdevice; -+ req->bdev = vbd->bdev; -+ rc = 0; -+ -+ out: -+ return rc; -+} -+ -+void vbd_resize(blkif_t *blkif) -+{ -+ struct vbd *vbd = &blkif->vbd; -+ struct xenbus_transaction xbt; -+ int err; -+ struct xenbus_device *dev = blkback_xenbus(blkif->be); -+ unsigned long long new_size = vbd_size(vbd); -+ -+ printk(KERN_INFO "VBD Resize: new size %Lu\n", new_size); -+ vbd->size = new_size; -+again: -+ err = xenbus_transaction_start(&xbt); -+ if (err) { -+ printk(KERN_WARNING "Error starting transaction"); -+ return; -+ } -+ err = xenbus_printf(xbt, dev->nodename, "sectors", "%Lu", -+ vbd_size(vbd)); -+ if (err) { -+ printk(KERN_WARNING "Error writing new size"); -+ goto abort; -+ } -+ /* -+ * Write the current state; we will use this to synchronize -+ * the front-end. If the current state is "connected" the -+ * front-end will get the new size information online. -+ */ -+ err = xenbus_printf(xbt, dev->nodename, "state", "%d", dev->state); -+ if (err) { -+ printk(KERN_WARNING "Error writing the state"); -+ goto abort; -+ } -+ -+ err = xenbus_transaction_end(xbt, 0); -+ if (err == -EAGAIN) -+ goto again; -+ if (err) -+ printk(KERN_WARNING "Error ending transaction"); -+abort: -+ xenbus_transaction_end(xbt, 1); -+} -diff --git a/drivers/xen/blkback/xenbus.c b/drivers/xen/blkback/xenbus.c -new file mode 100644 -index 0000000..a0534fc ---- /dev/null -+++ b/drivers/xen/blkback/xenbus.c -@@ -0,0 +1,553 @@ -+/* Xenbus code for blkif backend -+ Copyright (C) 2005 Rusty Russell <rusty@rustcorp.com.au> -+ Copyright (C) 2005 XenSource Ltd -+ -+ This program is free software; you can redistribute it and/or modify -+ it under the terms of the GNU General Public License as published by -+ the Free Software Foundation; either version 2 of the License, or -+ (at your option) any later version. -+ -+ This program is distributed in the hope that it will be useful, -+ but WITHOUT ANY WARRANTY; without even the implied warranty of -+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ GNU General Public License for more details. -+ -+ You should have received a copy of the GNU General Public License -+ along with this program; if not, write to the Free Software -+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -+*/ -+ -+#include <stdarg.h> -+#include <linux/module.h> -+#include <linux/kthread.h> -+#include "common.h" -+ -+#undef DPRINTK -+#define DPRINTK(fmt, args...) \ -+ pr_debug("blkback/xenbus (%s:%d) " fmt ".\n", \ -+ __FUNCTION__, __LINE__, ##args) -+ -+struct backend_info -+{ -+ struct xenbus_device *dev; -+ blkif_t *blkif; -+ struct xenbus_watch backend_watch; -+ unsigned major; -+ unsigned minor; -+ char *mode; -+}; -+ -+static void connect(struct backend_info *); -+static int connect_ring(struct backend_info *); -+static void backend_changed(struct xenbus_watch *, const char **, -+ unsigned int); -+ -+struct xenbus_device *blkback_xenbus(struct backend_info *be) -+{ -+ return be->dev; -+} -+ -+static int blkback_name(blkif_t *blkif, char *buf) -+{ -+ char *devpath, *devname; -+ struct xenbus_device *dev = blkif->be->dev; -+ -+ devpath = xenbus_read(XBT_NIL, dev->nodename, "dev", NULL); -+ if (IS_ERR(devpath)) -+ return PTR_ERR(devpath); -+ -+ if ((devname = strstr(devpath, "/dev/")) != NULL) -+ devname += strlen("/dev/"); -+ else -+ devname = devpath; -+ -+ snprintf(buf, TASK_COMM_LEN, "blkback.%d.%s", blkif->domid, devname); -+ kfree(devpath); -+ -+ return 0; -+} -+ -+static void update_blkif_status(blkif_t *blkif) -+{ -+ int err; -+ char name[TASK_COMM_LEN]; -+ -+ /* Not ready to connect? */ -+ if (!blkif->irq || !blkif->vbd.bdev) -+ return; -+ -+ /* Already connected? */ -+ if (blkif->be->dev->state == XenbusStateConnected) -+ return; -+ -+ /* Attempt to connect: exit if we fail to. */ -+ connect(blkif->be); -+ if (blkif->be->dev->state != XenbusStateConnected) -+ return; -+ -+ err = blkback_name(blkif, name); -+ if (err) { -+ xenbus_dev_error(blkif->be->dev, err, "get blkback dev name"); -+ return; -+ } -+ -+ err = filemap_write_and_wait(blkif->vbd.bdev->bd_inode->i_mapping); -+ if (err) { -+ xenbus_dev_error(blkif->be->dev, err, "block flush"); -+ return; -+ } -+ invalidate_inode_pages2(blkif->vbd.bdev->bd_inode->i_mapping); -+ -+ blkif->xenblkd = kthread_run(blkif_schedule, blkif, name); -+ if (IS_ERR(blkif->xenblkd)) { -+ err = PTR_ERR(blkif->xenblkd); -+ blkif->xenblkd = NULL; -+ xenbus_dev_error(blkif->be->dev, err, "start xenblkd"); -+ } -+} -+ -+ -+/**************************************************************** -+ * sysfs interface for VBD I/O requests -+ */ -+ -+#define VBD_SHOW(name, format, args...) \ -+ static ssize_t show_##name(struct device *_dev, \ -+ struct device_attribute *attr, \ -+ char *buf) \ -+ { \ -+ struct xenbus_device *dev = to_xenbus_device(_dev); \ -+ struct backend_info *be = dev_get_drvdata(&dev->dev); \ -+ \ -+ return sprintf(buf, format, ##args); \ -+ } \ -+ static DEVICE_ATTR(name, S_IRUGO, show_##name, NULL) -+ -+VBD_SHOW(oo_req, "%d\n", be->blkif->st_oo_req); -+VBD_SHOW(rd_req, "%d\n", be->blkif->st_rd_req); -+VBD_SHOW(wr_req, "%d\n", be->blkif->st_wr_req); -+VBD_SHOW(br_req, "%d\n", be->blkif->st_br_req); -+VBD_SHOW(rd_sect, "%d\n", be->blkif->st_rd_sect); -+VBD_SHOW(wr_sect, "%d\n", be->blkif->st_wr_sect); -+ -+static struct attribute *vbdstat_attrs[] = { -+ &dev_attr_oo_req.attr, -+ &dev_attr_rd_req.attr, -+ &dev_attr_wr_req.attr, -+ &dev_attr_br_req.attr, -+ &dev_attr_rd_sect.attr, -+ &dev_attr_wr_sect.attr, -+ NULL -+}; -+ -+static struct attribute_group vbdstat_group = { -+ .name = "statistics", -+ .attrs = vbdstat_attrs, -+}; -+ -+VBD_SHOW(physical_device, "%x:%x\n", be->major, be->minor); -+VBD_SHOW(mode, "%s\n", be->mode); -+ -+int xenvbd_sysfs_addif(struct xenbus_device *dev) -+{ -+ int error; -+ -+ error = device_create_file(&dev->dev, &dev_attr_physical_device); -+ if (error) -+ goto fail1; -+ -+ error = device_create_file(&dev->dev, &dev_attr_mode); -+ if (error) -+ goto fail2; -+ -+ error = sysfs_create_group(&dev->dev.kobj, &vbdstat_group); -+ if (error) -+ goto fail3; -+ -+ return 0; -+ -+fail3: sysfs_remove_group(&dev->dev.kobj, &vbdstat_group); -+fail2: device_remove_file(&dev->dev, &dev_attr_mode); -+fail1: device_remove_file(&dev->dev, &dev_attr_physical_device); -+ return error; -+} -+ -+void xenvbd_sysfs_delif(struct xenbus_device *dev) -+{ -+ sysfs_remove_group(&dev->dev.kobj, &vbdstat_group); -+ device_remove_file(&dev->dev, &dev_attr_mode); -+ device_remove_file(&dev->dev, &dev_attr_physical_device); -+} -+ -+static int blkback_remove(struct xenbus_device *dev) -+{ -+ struct backend_info *be = dev_get_drvdata(&dev->dev); -+ -+ DPRINTK(""); -+ -+ if (be->major || be->minor) -+ xenvbd_sysfs_delif(dev); -+ -+ if (be->backend_watch.node) { -+ unregister_xenbus_watch(&be->backend_watch); -+ kfree(be->backend_watch.node); -+ be->backend_watch.node = NULL; -+ } -+ -+ if (be->blkif) { -+ blkif_disconnect(be->blkif); -+ vbd_free(&be->blkif->vbd); -+ blkif_free(be->blkif); -+ be->blkif = NULL; -+ } -+ -+ kfree(be); -+ dev_set_drvdata(&dev->dev, NULL); -+ return 0; -+} -+ -+int blkback_barrier(struct xenbus_transaction xbt, -+ struct backend_info *be, int state) -+{ -+ struct xenbus_device *dev = be->dev; -+ int err; -+ -+ err = xenbus_printf(xbt, dev->nodename, "feature-barrier", -+ "%d", state); -+ if (err) -+ xenbus_dev_fatal(dev, err, "writing feature-barrier"); -+ -+ return err; -+} -+ -+/** -+ * Entry point to this code when a new device is created. Allocate the basic -+ * structures, and watch the store waiting for the hotplug scripts to tell us -+ * the device's physical major and minor numbers. Switch to InitWait. -+ */ -+static int blkback_probe(struct xenbus_device *dev, -+ const struct xenbus_device_id *id) -+{ -+ int err; -+ struct backend_info *be = kzalloc(sizeof(struct backend_info), -+ GFP_KERNEL); -+ if (!be) { -+ xenbus_dev_fatal(dev, -ENOMEM, -+ "allocating backend structure"); -+ return -ENOMEM; -+ } -+ be->dev = dev; -+ dev_set_drvdata(&dev->dev, be); -+ -+ be->blkif = blkif_alloc(dev->otherend_id); -+ if (IS_ERR(be->blkif)) { -+ err = PTR_ERR(be->blkif); -+ be->blkif = NULL; -+ xenbus_dev_fatal(dev, err, "creating block interface"); -+ goto fail; -+ } -+ -+ /* setup back pointer */ -+ be->blkif->be = be; -+ -+ err = xenbus_watch_pathfmt(dev, &be->backend_watch, backend_changed, -+ "%s/%s", dev->nodename, "physical-device"); -+ if (err) -+ goto fail; -+ -+ err = xenbus_switch_state(dev, XenbusStateInitWait); -+ if (err) -+ goto fail; -+ -+ return 0; -+ -+fail: -+ DPRINTK("failed"); -+ blkback_remove(dev); -+ return err; -+} -+ -+ -+/** -+ * Callback received when the hotplug scripts have placed the physical-device -+ * node. Read it and the mode node, and create a vbd. If the frontend is -+ * ready, connect. -+ */ -+static void backend_changed(struct xenbus_watch *watch, -+ const char **vec, unsigned int len) -+{ -+ int err; -+ unsigned major; -+ unsigned minor; -+ struct backend_info *be -+ = container_of(watch, struct backend_info, backend_watch); -+ struct xenbus_device *dev = be->dev; -+ int cdrom = 0; -+ char *device_type; -+ -+ DPRINTK(""); -+ -+ err = xenbus_scanf(XBT_NIL, dev->nodename, "physical-device", "%x:%x", -+ &major, &minor); -+ if (XENBUS_EXIST_ERR(err)) { -+ /* Since this watch will fire once immediately after it is -+ registered, we expect this. Ignore it, and wait for the -+ hotplug scripts. */ -+ return; -+ } -+ if (err != 2) { -+ xenbus_dev_fatal(dev, err, "reading physical-device"); -+ return; -+ } -+ -+ if ((be->major || be->minor) && -+ ((be->major != major) || (be->minor != minor))) { -+ printk(KERN_WARNING -+ "blkback: changing physical device (from %x:%x to " -+ "%x:%x) not supported.\n", be->major, be->minor, -+ major, minor); -+ return; -+ } -+ -+ be->mode = xenbus_read(XBT_NIL, dev->nodename, "mode", NULL); -+ if (IS_ERR(be->mode)) { -+ err = PTR_ERR(be->mode); -+ be->mode = NULL; -+ xenbus_dev_fatal(dev, err, "reading mode"); -+ return; -+ } -+ -+ device_type = xenbus_read(XBT_NIL, dev->otherend, "device-type", NULL); -+ if (!IS_ERR(device_type)) { -+ cdrom = strcmp(device_type, "cdrom") == 0; -+ kfree(device_type); -+ } -+ -+ if (be->major == 0 && be->minor == 0) { -+ /* Front end dir is a number, which is used as the handle. */ -+ -+ char *p = strrchr(dev->otherend, '/') + 1; -+ long handle = simple_strtoul(p, NULL, 0); -+ -+ be->major = major; -+ be->minor = minor; -+ -+ err = vbd_create(be->blkif, handle, major, minor, -+ (NULL == strchr(be->mode, 'w')), cdrom); -+ if (err) { -+ be->major = be->minor = 0; -+ xenbus_dev_fatal(dev, err, "creating vbd structure"); -+ return; -+ } -+ -+ err = xenvbd_sysfs_addif(dev); -+ if (err) { -+ vbd_free(&be->blkif->vbd); -+ be->major = be->minor = 0; -+ xenbus_dev_fatal(dev, err, "creating sysfs entries"); -+ return; -+ } -+ -+ /* We're potentially connected now */ -+ update_blkif_status(be->blkif); -+ } -+} -+ -+ -+/** -+ * Callback received when the frontend's state changes. -+ */ -+static void frontend_changed(struct xenbus_device *dev, -+ enum xenbus_state frontend_state) -+{ -+ struct backend_info *be = dev_get_drvdata(&dev->dev); -+ int err; -+ -+ DPRINTK("%s", xenbus_strstate(frontend_state)); -+ -+ switch (frontend_state) { -+ case XenbusStateInitialising: -+ if (dev->state == XenbusStateClosed) { -+ printk(KERN_INFO "%s: %s: prepare for reconnect\n", -+ __FUNCTION__, dev->nodename); -+ xenbus_switch_state(dev, XenbusStateInitWait); -+ } -+ break; -+ -+ case XenbusStateInitialised: -+ case XenbusStateConnected: -+ /* Ensure we connect even when two watches fire in -+ close successsion and we miss the intermediate value -+ of frontend_state. */ -+ if (dev->state == XenbusStateConnected) -+ break; -+ -+ err = connect_ring(be); -+ if (err) -+ break; -+ update_blkif_status(be->blkif); -+ break; -+ -+ case XenbusStateClosing: -+ blkif_disconnect(be->blkif); -+ xenbus_switch_state(dev, XenbusStateClosing); -+ break; -+ -+ case XenbusStateClosed: -+ xenbus_switch_state(dev, XenbusStateClosed); -+ if (xenbus_dev_is_online(dev)) -+ break; -+ /* fall through if not online */ -+ case XenbusStateUnknown: -+ device_unregister(&dev->dev); -+ break; -+ -+ default: -+ xenbus_dev_fatal(dev, -EINVAL, "saw state %d at frontend", -+ frontend_state); -+ break; -+ } -+} -+ -+ -+/* ** Connection ** */ -+ -+ -+/** -+ * Write the physical details regarding the block device to the store, and -+ * switch to Connected state. -+ */ -+static void connect(struct backend_info *be) -+{ -+ struct xenbus_transaction xbt; -+ int err; -+ struct xenbus_device *dev = be->dev; -+ -+ DPRINTK("%s", dev->otherend); -+ -+ /* Supply the information about the device the frontend needs */ -+again: -+ err = xenbus_transaction_start(&xbt); -+ if (err) { -+ xenbus_dev_fatal(dev, err, "starting transaction"); -+ return; -+ } -+ -+ err = blkback_barrier(xbt, be, 1); -+ if (err) -+ goto abort; -+ -+ err = xenbus_printf(xbt, dev->nodename, "sectors", "%llu", -+ vbd_size(&be->blkif->vbd)); -+ if (err) { -+ xenbus_dev_fatal(dev, err, "writing %s/sectors", -+ dev->nodename); -+ goto abort; -+ } -+ -+ /* FIXME: use a typename instead */ -+ err = xenbus_printf(xbt, dev->nodename, "info", "%u", -+ vbd_info(&be->blkif->vbd)); -+ if (err) { -+ xenbus_dev_fatal(dev, err, "writing %s/info", -+ dev->nodename); -+ goto abort; -+ } -+ err = xenbus_printf(xbt, dev->nodename, "sector-size", "%lu", -+ vbd_secsize(&be->blkif->vbd)); -+ if (err) { -+ xenbus_dev_fatal(dev, err, "writing %s/sector-size", -+ dev->nodename); -+ goto abort; -+ } -+ -+ err = xenbus_transaction_end(xbt, 0); -+ if (err == -EAGAIN) -+ goto again; -+ if (err) -+ xenbus_dev_fatal(dev, err, "ending transaction"); -+ -+ err = xenbus_switch_state(dev, XenbusStateConnected); -+ if (err) -+ xenbus_dev_fatal(dev, err, "switching to Connected state", -+ dev->nodename); -+ -+ return; -+ abort: -+ xenbus_transaction_end(xbt, 1); -+} -+ -+ -+static int connect_ring(struct backend_info *be) -+{ -+ struct xenbus_device *dev = be->dev; -+ unsigned long ring_ref; -+ unsigned int evtchn; -+ char protocol[64] = ""; -+ int err; -+ -+ DPRINTK("%s", dev->otherend); -+ -+ err = xenbus_gather(XBT_NIL, dev->otherend, "ring-ref", "%lu", &ring_ref, -+ "event-channel", "%u", &evtchn, NULL); -+ if (err) { -+ xenbus_dev_fatal(dev, err, -+ "reading %s/ring-ref and event-channel", -+ dev->otherend); -+ return err; -+ } -+ -+ be->blkif->blk_protocol = BLKIF_PROTOCOL_NATIVE; -+ err = xenbus_gather(XBT_NIL, dev->otherend, "protocol", -+ "%63s", protocol, NULL); -+ if (err) -+ strcpy(protocol, "unspecified, assuming native"); -+ else if (0 == strcmp(protocol, XEN_IO_PROTO_ABI_NATIVE)) -+ be->blkif->blk_protocol = BLKIF_PROTOCOL_NATIVE; -+ else if (0 == strcmp(protocol, XEN_IO_PROTO_ABI_X86_32)) -+ be->blkif->blk_protocol = BLKIF_PROTOCOL_X86_32; -+ else if (0 == strcmp(protocol, XEN_IO_PROTO_ABI_X86_64)) -+ be->blkif->blk_protocol = BLKIF_PROTOCOL_X86_64; -+ else { -+ xenbus_dev_fatal(dev, err, "unknown fe protocol %s", protocol); -+ return -1; -+ } -+ printk(KERN_INFO -+ "blkback: ring-ref %ld, event-channel %d, protocol %d (%s)\n", -+ ring_ref, evtchn, be->blkif->blk_protocol, protocol); -+ -+ /* Map the shared frame, irq etc. */ -+ err = blkif_map(be->blkif, ring_ref, evtchn); -+ if (err) { -+ xenbus_dev_fatal(dev, err, "mapping ring-ref %lu port %u", -+ ring_ref, evtchn); -+ return err; -+ } -+ -+ return 0; -+} -+ -+ -+/* ** Driver Registration ** */ -+ -+ -+static const struct xenbus_device_id blkback_ids[] = { -+ { "vbd" }, -+ { "" } -+}; -+ -+ -+static struct xenbus_driver blkback = { -+ .name = "vbd", -+ .owner = THIS_MODULE, -+ .ids = blkback_ids, -+ .probe = blkback_probe, -+ .remove = blkback_remove, -+ .otherend_changed = frontend_changed -+}; -+ -+ -+int blkif_xenbus_init(void) -+{ -+ return xenbus_register_backend(&blkback); -+} -diff --git a/drivers/xen/blktap/Makefile b/drivers/xen/blktap/Makefile -new file mode 100644 -index 0000000..822b4e4 ---- /dev/null -+++ b/drivers/xen/blktap/Makefile -@@ -0,0 +1,3 @@ -+obj-$(CONFIG_XEN_BLKDEV_TAP) := blktap.o -+ -+blktap-objs := control.o ring.o device.o request.o sysfs.o -diff --git a/drivers/xen/blktap/blktap.h b/drivers/xen/blktap/blktap.h -new file mode 100644 -index 0000000..fe63fc9 ---- /dev/null -+++ b/drivers/xen/blktap/blktap.h -@@ -0,0 +1,209 @@ -+#ifndef _BLKTAP_H_ -+#define _BLKTAP_H_ -+ -+#include <linux/mm.h> -+#include <linux/fs.h> -+#include <linux/cdev.h> -+#include <linux/init.h> -+#include <linux/scatterlist.h> -+#include <xen/blkif.h> -+ -+extern int blktap_debug_level; -+extern int blktap_ring_major; -+extern int blktap_device_major; -+ -+#define BTPRINTK(level, tag, force, _f, _a...) \ -+ do { \ -+ if (blktap_debug_level > level && \ -+ (force || printk_ratelimit())) \ -+ printk(tag "%s: " _f, __func__, ##_a); \ -+ } while (0) -+ -+#define BTDBG(_f, _a...) BTPRINTK(8, KERN_DEBUG, 1, _f, ##_a) -+#define BTINFO(_f, _a...) BTPRINTK(0, KERN_INFO, 0, _f, ##_a) -+#define BTWARN(_f, _a...) BTPRINTK(0, KERN_WARNING, 0, _f, ##_a) -+#define BTERR(_f, _a...) BTPRINTK(0, KERN_ERR, 0, _f, ##_a) -+ -+#define MAX_BLKTAP_DEVICE 1024 -+ -+#define BLKTAP_DEVICE 4 -+#define BLKTAP_DEVICE_CLOSED 5 -+#define BLKTAP_SHUTDOWN_REQUESTED 8 -+ -+/* blktap IOCTLs: */ -+#define BLKTAP2_IOCTL_KICK_FE 1 -+#define BLKTAP2_IOCTL_ALLOC_TAP 200 -+#define BLKTAP2_IOCTL_FREE_TAP 201 -+#define BLKTAP2_IOCTL_CREATE_DEVICE 202 -+#define BLKTAP2_IOCTL_REMOVE_DEVICE 207 -+ -+#define BLKTAP2_MAX_MESSAGE_LEN 256 -+ -+#define BLKTAP2_RING_MESSAGE_CLOSE 3 -+ -+#define BLKTAP_REQUEST_FREE 0 -+#define BLKTAP_REQUEST_PENDING 1 -+ -+/* -+ * The maximum number of requests that can be outstanding at any time -+ * is determined by -+ * -+ * [mmap_alloc * MAX_PENDING_REQS * BLKIF_MAX_SEGMENTS_PER_REQUEST] -+ * -+ * where mmap_alloc < MAX_DYNAMIC_MEM. -+ * -+ * TODO: -+ * mmap_alloc is initialised to 2 and should be adjustable on the fly via -+ * sysfs. -+ */ -+#define BLK_RING_SIZE __RING_SIZE((struct blkif_sring *)0, PAGE_SIZE) -+#define MAX_DYNAMIC_MEM BLK_RING_SIZE -+#define MAX_PENDING_REQS BLK_RING_SIZE -+#define MMAP_PAGES (MAX_PENDING_REQS * BLKIF_MAX_SEGMENTS_PER_REQUEST) -+#define MMAP_VADDR(_start, _req, _seg) \ -+ (_start + \ -+ ((_req) * BLKIF_MAX_SEGMENTS_PER_REQUEST * PAGE_SIZE) + \ -+ ((_seg) * PAGE_SIZE)) -+ -+struct grant_handle_pair { -+ grant_handle_t kernel; -+ grant_handle_t user; -+}; -+#define INVALID_GRANT_HANDLE 0xFFFF -+ -+struct blktap_handle { -+ unsigned int ring; -+ unsigned int device; -+ unsigned int minor; -+}; -+ -+struct blktap_params { -+ char name[BLKTAP2_MAX_MESSAGE_LEN]; -+ unsigned long long capacity; -+ unsigned long sector_size; -+}; -+ -+struct blktap_device { -+ spinlock_t lock; -+ struct gendisk *gd; -+}; -+ -+struct blktap_ring { -+ struct task_struct *task; -+ -+ struct vm_area_struct *vma; -+ struct blkif_front_ring ring; -+ unsigned long ring_vstart; -+ unsigned long user_vstart; -+ -+ int n_pending; -+ struct blktap_request *pending[MAX_PENDING_REQS]; -+ -+ wait_queue_head_t poll_wait; -+ -+ dev_t devno; -+ struct device *dev; -+}; -+ -+struct blktap_statistics { -+ unsigned long st_print; -+ int st_rd_req; -+ int st_wr_req; -+ int st_oo_req; -+ int st_rd_sect; -+ int st_wr_sect; -+ s64 st_rd_cnt; -+ s64 st_rd_sum_usecs; -+ s64 st_rd_max_usecs; -+ s64 st_wr_cnt; -+ s64 st_wr_sum_usecs; -+ s64 st_wr_max_usecs; -+}; -+ -+struct blktap_request { -+ struct blktap *tap; -+ struct request *rq; -+ int usr_idx; -+ -+ int operation; -+ struct timeval time; -+ -+ struct scatterlist sg_table[BLKIF_MAX_SEGMENTS_PER_REQUEST]; -+ struct page *pages[BLKIF_MAX_SEGMENTS_PER_REQUEST]; -+ int nr_pages; -+}; -+ -+#define blktap_for_each_sg(_sg, _req, _i) \ -+ for (_sg = (_req)->sg_table, _i = 0; \ -+ _i < (_req)->nr_pages; \ -+ (_sg)++, (_i)++) -+ -+struct blktap { -+ int minor; -+ unsigned long dev_inuse; -+ -+ struct blktap_ring ring; -+ struct blktap_device device; -+ struct blktap_page_pool *pool; -+ -+ wait_queue_head_t remove_wait; -+ struct work_struct remove_work; -+ char name[BLKTAP2_MAX_MESSAGE_LEN]; -+ -+ struct blktap_statistics stats; -+}; -+ -+struct blktap_page_pool { -+ struct mempool_s *bufs; -+ spinlock_t lock; -+ struct kobject kobj; -+ wait_queue_head_t wait; -+}; -+ -+extern struct mutex blktap_lock; -+extern struct blktap **blktaps; -+extern int blktap_max_minor; -+ -+int blktap_control_destroy_tap(struct blktap *); -+size_t blktap_control_debug(struct blktap *, char *, size_t); -+ -+int blktap_ring_init(void); -+void blktap_ring_exit(void); -+size_t blktap_ring_debug(struct blktap *, char *, size_t); -+int blktap_ring_create(struct blktap *); -+int blktap_ring_destroy(struct blktap *); -+struct blktap_request *blktap_ring_make_request(struct blktap *); -+void blktap_ring_free_request(struct blktap *,struct blktap_request *); -+void blktap_ring_submit_request(struct blktap *, struct blktap_request *); -+int blktap_ring_map_request_segment(struct blktap *, struct blktap_request *, int); -+int blktap_ring_map_request(struct blktap *, struct blktap_request *); -+void blktap_ring_unmap_request(struct blktap *, struct blktap_request *); -+void blktap_ring_set_message(struct blktap *, int); -+void blktap_ring_kick_user(struct blktap *); -+ -+int blktap_sysfs_init(void); -+void blktap_sysfs_exit(void); -+int blktap_sysfs_create(struct blktap *); -+void blktap_sysfs_destroy(struct blktap *); -+ -+int blktap_device_init(void); -+void blktap_device_exit(void); -+size_t blktap_device_debug(struct blktap *, char *, size_t); -+int blktap_device_create(struct blktap *, struct blktap_params *); -+int blktap_device_destroy(struct blktap *); -+void blktap_device_destroy_sync(struct blktap *); -+void blktap_device_run_queue(struct blktap *); -+void blktap_device_end_request(struct blktap *, struct blktap_request *, int); -+ -+int blktap_page_pool_init(struct kobject *); -+void blktap_page_pool_exit(void); -+struct blktap_page_pool *blktap_page_pool_get(const char *); -+ -+size_t blktap_request_debug(struct blktap *, char *, size_t); -+struct blktap_request *blktap_request_alloc(struct blktap *); -+int blktap_request_get_pages(struct blktap *, struct blktap_request *, int); -+void blktap_request_free(struct blktap *, struct blktap_request *); -+void blktap_request_bounce(struct blktap *, struct blktap_request *, int, int); -+ -+ -+#endif -diff --git a/drivers/xen/blktap/control.c b/drivers/xen/blktap/control.c -new file mode 100644 -index 0000000..f339bba ---- /dev/null -+++ b/drivers/xen/blktap/control.c -@@ -0,0 +1,315 @@ -+#include <linux/module.h> -+#include <linux/sched.h> -+#include <linux/miscdevice.h> -+#include <linux/device.h> -+#include <asm/uaccess.h> -+ -+#include "blktap.h" -+ -+DEFINE_MUTEX(blktap_lock); -+ -+struct blktap **blktaps; -+int blktap_max_minor; -+static struct blktap_page_pool *default_pool; -+ -+static struct blktap * -+blktap_control_get_minor(void) -+{ -+ int minor; -+ struct blktap *tap; -+ -+ tap = kzalloc(sizeof(*tap), GFP_KERNEL); -+ if (unlikely(!tap)) -+ return NULL; -+ -+ mutex_lock(&blktap_lock); -+ -+ for (minor = 0; minor < blktap_max_minor; minor++) -+ if (!blktaps[minor]) -+ break; -+ -+ if (minor == MAX_BLKTAP_DEVICE) -+ goto fail; -+ -+ if (minor == blktap_max_minor) { -+ void *p; -+ int n; -+ -+ n = min(2 * blktap_max_minor, MAX_BLKTAP_DEVICE); -+ p = krealloc(blktaps, n * sizeof(blktaps[0]), GFP_KERNEL); -+ if (!p) -+ goto fail; -+ -+ blktaps = p; -+ minor = blktap_max_minor; -+ blktap_max_minor = n; -+ -+ memset(&blktaps[minor], 0, (n - minor) * sizeof(blktaps[0])); -+ } -+ -+ tap->minor = minor; -+ blktaps[minor] = tap; -+ -+ __module_get(THIS_MODULE); -+out: -+ mutex_unlock(&blktap_lock); -+ return tap; -+ -+fail: -+ mutex_unlock(&blktap_lock); -+ kfree(tap); -+ tap = NULL; -+ goto out; -+} -+ -+static void -+blktap_control_put_minor(struct blktap* tap) -+{ -+ blktaps[tap->minor] = NULL; -+ kfree(tap); -+ -+ module_put(THIS_MODULE); -+} -+ -+static struct blktap* -+blktap_control_create_tap(void) -+{ -+ struct blktap *tap; -+ int err; -+ -+ tap = blktap_control_get_minor(); -+ if (!tap) -+ return NULL; -+ -+ kobject_get(&default_pool->kobj); -+ tap->pool = default_pool; -+ -+ err = blktap_ring_create(tap); -+ if (err) -+ goto fail_tap; -+ -+ err = blktap_sysfs_create(tap); -+ if (err) -+ goto fail_ring; -+ -+ return tap; -+ -+fail_ring: -+ blktap_ring_destroy(tap); -+fail_tap: -+ blktap_control_put_minor(tap); -+ -+ return NULL; -+} -+ -+int -+blktap_control_destroy_tap(struct blktap *tap) -+{ -+ int err; -+ -+ err = blktap_ring_destroy(tap); -+ if (err) -+ return err; -+ -+ kobject_put(&tap->pool->kobj); -+ -+ blktap_sysfs_destroy(tap); -+ -+ blktap_control_put_minor(tap); -+ -+ return 0; -+} -+ -+static int -+blktap_control_ioctl(struct inode *inode, struct file *filp, -+ unsigned int cmd, unsigned long arg) -+{ -+ struct blktap *tap; -+ -+ switch (cmd) { -+ case BLKTAP2_IOCTL_ALLOC_TAP: { -+ struct blktap_handle h; -+ void __user *ptr = (void __user*)arg; -+ -+ tap = blktap_control_create_tap(); -+ if (!tap) -+ return -ENOMEM; -+ -+ h.ring = blktap_ring_major; -+ h.device = blktap_device_major; -+ h.minor = tap->minor; -+ -+ if (copy_to_user(ptr, &h, sizeof(h))) { -+ blktap_control_destroy_tap(tap); -+ return -EFAULT; -+ } -+ -+ return 0; -+ } -+ -+ case BLKTAP2_IOCTL_FREE_TAP: { -+ int minor = arg; -+ -+ if (minor > MAX_BLKTAP_DEVICE) -+ return -EINVAL; -+ -+ tap = blktaps[minor]; -+ if (!tap) -+ return -ENODEV; -+ -+ return blktap_control_destroy_tap(tap); -+ } -+ } -+ -+ return -ENOIOCTLCMD; -+} -+ -+static struct file_operations blktap_control_file_operations = { -+ .owner = THIS_MODULE, -+ .ioctl = blktap_control_ioctl, -+}; -+ -+static struct miscdevice blktap_control = { -+ .minor = MISC_DYNAMIC_MINOR, -+ .name = "blktap-control", -+ .fops = &blktap_control_file_operations, -+}; -+ -+static struct device *control_device; -+ -+static ssize_t -+blktap_control_show_default_pool(struct device *device, -+ struct device_attribute *attr, -+ char *buf) -+{ -+ return sprintf(buf, "%s", kobject_name(&default_pool->kobj)); -+} -+ -+static ssize_t -+blktap_control_store_default_pool(struct device *device, -+ struct device_attribute *attr, -+ const char *buf, size_t size) -+{ -+ struct blktap_page_pool *pool, *tmp = default_pool; -+ -+ pool = blktap_page_pool_get(buf); -+ if (IS_ERR(pool)) -+ return PTR_ERR(pool); -+ -+ default_pool = pool; -+ kobject_put(&tmp->kobj); -+ -+ return size; -+} -+ -+static DEVICE_ATTR(default_pool, S_IRUSR|S_IWUSR|S_IRGRP|S_IROTH, -+ blktap_control_show_default_pool, -+ blktap_control_store_default_pool); -+ -+size_t -+blktap_control_debug(struct blktap *tap, char *buf, size_t size) -+{ -+ char *s = buf, *end = buf + size; -+ -+ s += snprintf(s, end - s, -+ "tap %u:%u name:'%s' flags:%#08lx\n", -+ MAJOR(tap->ring.devno), MINOR(tap->ring.devno), -+ tap->name, tap->dev_inuse); -+ -+ return s - buf; -+} -+ -+static int __init -+blktap_control_init(void) -+{ -+ int err; -+ -+ err = misc_register(&blktap_control); -+ if (err) -+ return err; -+ -+ control_device = blktap_control.this_device; -+ -+ blktap_max_minor = min(64, MAX_BLKTAP_DEVICE); -+ blktaps = kzalloc(blktap_max_minor * sizeof(blktaps[0]), GFP_KERNEL); -+ if (!blktaps) { -+ BTERR("failed to allocate blktap minor map"); -+ return -ENOMEM; -+ } -+ -+ err = blktap_page_pool_init(&control_device->kobj); -+ if (err) -+ return err; -+ -+ default_pool = blktap_page_pool_get("default"); -+ if (!default_pool) -+ return -ENOMEM; -+ -+ err = device_create_file(control_device, &dev_attr_default_pool); -+ if (err) -+ return err; -+ -+ return 0; -+} -+ -+static void -+blktap_control_exit(void) -+{ -+ if (default_pool) { -+ kobject_put(&default_pool->kobj); -+ default_pool = NULL; -+ } -+ -+ blktap_page_pool_exit(); -+ -+ if (blktaps) { -+ kfree(blktaps); -+ blktaps = NULL; -+ } -+ -+ if (control_device) { -+ misc_deregister(&blktap_control); -+ control_device = NULL; -+ } -+} -+ -+static void -+blktap_exit(void) -+{ -+ blktap_control_exit(); -+ blktap_ring_exit(); -+ blktap_sysfs_exit(); -+ blktap_device_exit(); -+} -+ -+static int __init -+blktap_init(void) -+{ -+ int err; -+ -+ err = blktap_device_init(); -+ if (err) -+ goto fail; -+ -+ err = blktap_ring_init(); -+ if (err) -+ goto fail; -+ -+ err = blktap_sysfs_init(); -+ if (err) -+ goto fail; -+ -+ err = blktap_control_init(); -+ if (err) -+ goto fail; -+ -+ return 0; -+ -+fail: -+ blktap_exit(); -+ return err; -+} -+ -+module_init(blktap_init); -+module_exit(blktap_exit); -+MODULE_LICENSE("Dual BSD/GPL"); -diff --git a/drivers/xen/blktap/device.c b/drivers/xen/blktap/device.c -new file mode 100644 -index 0000000..fce2769 ---- /dev/null -+++ b/drivers/xen/blktap/device.c -@@ -0,0 +1,564 @@ -+#include <linux/fs.h> -+#include <linux/blkdev.h> -+#include <linux/cdrom.h> -+#include <linux/hdreg.h> -+#include <scsi/scsi.h> -+#include <scsi/scsi_ioctl.h> -+ -+#include "blktap.h" -+ -+int blktap_device_major; -+ -+#define dev_to_blktap(_dev) container_of(_dev, struct blktap, device) -+ -+static int -+blktap_device_open(struct block_device *bdev, fmode_t mode) -+{ -+ struct gendisk *disk = bdev->bd_disk; -+ struct blktap_device *tapdev = disk->private_data; -+ -+ if (!tapdev) -+ return -ENXIO; -+ -+ /* NB. we might have bounced a bd trylock by tapdisk. when -+ * failing for reasons not !tapdev, make sure to kick tapdisk -+ * out of destroy wait state again. */ -+ -+ return 0; -+} -+ -+static int -+blktap_device_release(struct gendisk *disk, fmode_t mode) -+{ -+ struct blktap_device *tapdev = disk->private_data; -+ struct block_device *bdev = bdget_disk(disk, 0); -+ struct blktap *tap = dev_to_blktap(tapdev); -+ -+ bdput(bdev); -+ -+ if (!bdev->bd_openers) { -+ set_bit(BLKTAP_DEVICE_CLOSED, &tap->dev_inuse); -+ blktap_ring_kick_user(tap); -+ } -+ -+ return 0; -+} -+ -+static int -+blktap_device_getgeo(struct block_device *bd, struct hd_geometry *hg) -+{ -+ /* We don't have real geometry info, but let's at least return -+ values consistent with the size of the device */ -+ sector_t nsect = get_capacity(bd->bd_disk); -+ sector_t cylinders = nsect; -+ -+ hg->heads = 0xff; -+ hg->sectors = 0x3f; -+ sector_div(cylinders, hg->heads * hg->sectors); -+ hg->cylinders = cylinders; -+ if ((sector_t)(hg->cylinders + 1) * hg->heads * hg->sectors < nsect) -+ hg->cylinders = 0xffff; -+ return 0; -+} -+ -+static int -+blktap_device_ioctl(struct block_device *bd, fmode_t mode, -+ unsigned command, unsigned long argument) -+{ -+ int i; -+ -+ switch (command) { -+ case CDROMMULTISESSION: -+ BTDBG("FIXME: support multisession CDs later\n"); -+ for (i = 0; i < sizeof(struct cdrom_multisession); i++) -+ if (put_user(0, (char __user *)(argument + i))) -+ return -EFAULT; -+ return 0; -+ -+ case SCSI_IOCTL_GET_IDLUN: -+ if (!access_ok(VERIFY_WRITE, argument, -+ sizeof(struct scsi_idlun))) -+ return -EFAULT; -+ -+ /* return 0 for now. */ -+ __put_user(0, &((struct scsi_idlun __user *)argument)->dev_id); -+ __put_user(0, -+ &((struct scsi_idlun __user *)argument)->host_unique_id); -+ return 0; -+ -+ default: -+ /*printk(KERN_ALERT "ioctl %08x not supported by Xen blkdev\n", -+ command);*/ -+ return -EINVAL; /* same return as native Linux */ -+ } -+ -+ return 0; -+} -+ -+static struct block_device_operations blktap_device_file_operations = { -+ .owner = THIS_MODULE, -+ .open = blktap_device_open, -+ .release = blktap_device_release, -+ .ioctl = blktap_device_ioctl, -+ .getgeo = blktap_device_getgeo -+}; -+ -+/* NB. __blktap holding the queue lock; blktap where unlocked */ -+ -+static inline struct request* -+__blktap_next_queued_rq(struct request_queue *q) -+{ -+ return blk_peek_request(q); -+} -+ -+static inline void -+__blktap_dequeue_rq(struct request *rq) -+{ -+ blk_start_request(rq); -+} -+ -+/* NB. err == 0 indicates success, failures < 0 */ -+ -+static inline void -+__blktap_end_queued_rq(struct request *rq, int err) -+{ -+ blk_start_request(rq); -+ __blk_end_request(rq, err, blk_rq_bytes(rq)); -+} -+ -+static inline void -+__blktap_end_rq(struct request *rq, int err) -+{ -+ __blk_end_request(rq, err, blk_rq_bytes(rq)); -+} -+ -+static inline void -+blktap_end_rq(struct request *rq, int err) -+{ -+ spin_lock_irq(rq->q->queue_lock); -+ __blktap_end_rq(rq, err); -+ spin_unlock_irq(rq->q->queue_lock); -+} -+ -+void -+blktap_device_end_request(struct blktap *tap, -+ struct blktap_request *request, -+ int error) -+{ -+ struct blktap_device *tapdev = &tap->device; -+ struct request *rq = request->rq; -+ -+ blktap_ring_unmap_request(tap, request); -+ -+ blktap_ring_free_request(tap, request); -+ -+ dev_dbg(disk_to_dev(tapdev->gd), -+ "end_request: op=%d error=%d bytes=%d\n", -+ rq_data_dir(rq), error, blk_rq_bytes(rq)); -+ -+ blktap_end_rq(rq, error); -+} -+ -+int -+blktap_device_make_request(struct blktap *tap, struct request *rq) -+{ -+ struct blktap_device *tapdev = &tap->device; -+ struct blktap_request *request; -+ int write, nsegs; -+ int err; -+ -+ request = blktap_ring_make_request(tap); -+ if (IS_ERR(request)) { -+ err = PTR_ERR(request); -+ request = NULL; -+ -+ if (err == -ENOSPC || err == -ENOMEM) -+ goto stop; -+ -+ goto fail; -+ } -+ -+ write = rq_data_dir(rq) == WRITE; -+ nsegs = blk_rq_map_sg(rq->q, rq, request->sg_table); -+ -+ dev_dbg(disk_to_dev(tapdev->gd), -+ "make_request: op=%c bytes=%d nsegs=%d\n", -+ write ? 'w' : 'r', blk_rq_bytes(rq), nsegs); -+ -+ request->rq = rq; -+ request->operation = write ? BLKIF_OP_WRITE : BLKIF_OP_READ; -+ -+ err = blktap_request_get_pages(tap, request, nsegs); -+ if (err) -+ goto stop; -+ -+ err = blktap_ring_map_request(tap, request); -+ if (err) -+ goto fail; -+ -+ blktap_ring_submit_request(tap, request); -+ -+ return 0; -+ -+stop: -+ tap->stats.st_oo_req++; -+ err = -EBUSY; -+ -+_out: -+ if (request) -+ blktap_ring_free_request(tap, request); -+ -+ return err; -+fail: -+ if (printk_ratelimit()) -+ dev_warn(disk_to_dev(tapdev->gd), -+ "make request: %d, failing\n", err); -+ goto _out; -+} -+ -+/* -+ * called from tapdisk context -+ */ -+void -+blktap_device_run_queue(struct blktap *tap) -+{ -+ struct blktap_device *tapdev = &tap->device; -+ struct request_queue *q; -+ struct request *rq; -+ int err; -+ -+ if (!tapdev->gd) -+ return; -+ -+ q = tapdev->gd->queue; -+ -+ spin_lock_irq(&tapdev->lock); -+ queue_flag_clear(QUEUE_FLAG_STOPPED, q); -+ -+ do { -+ rq = __blktap_next_queued_rq(q); -+ if (!rq) -+ break; -+ -+ if (!blk_fs_request(rq)) { -+ __blktap_end_queued_rq(rq, -EOPNOTSUPP); -+ continue; -+ } -+ -+ spin_unlock_irq(&tapdev->lock); -+ -+ err = blktap_device_make_request(tap, rq); -+ -+ spin_lock_irq(&tapdev->lock); -+ -+ if (err == -EBUSY) { -+ blk_stop_queue(q); -+ break; -+ } -+ -+ __blktap_dequeue_rq(rq); -+ -+ if (unlikely(err)) -+ __blktap_end_rq(rq, err); -+ } while (1); -+ -+ spin_unlock_irq(&tapdev->lock); -+} -+ -+static void -+blktap_device_do_request(struct request_queue *rq) -+{ -+ struct blktap_device *tapdev = rq->queuedata; -+ struct blktap *tap = dev_to_blktap(tapdev); -+ -+ blktap_ring_kick_user(tap); -+} -+ -+static void -+blktap_device_configure(struct blktap *tap, -+ struct blktap_params *params) -+{ -+ struct request_queue *rq; -+ struct blktap_device *dev = &tap->device; -+ -+ dev = &tap->device; -+ rq = dev->gd->queue; -+ -+ spin_lock_irq(&dev->lock); -+ -+ set_capacity(dev->gd, params->capacity); -+ -+ /* Hard sector size and max sectors impersonate the equiv. hardware. */ -+ blk_queue_logical_block_size(rq, params->sector_size); -+ blk_queue_max_sectors(rq, 512); -+ -+ /* Each segment in a request is up to an aligned page in size. */ -+ blk_queue_segment_boundary(rq, PAGE_SIZE - 1); -+ blk_queue_max_segment_size(rq, PAGE_SIZE); -+ -+ /* Ensure a merged request will fit in a single I/O ring slot. */ -+ blk_queue_max_phys_segments(rq, BLKIF_MAX_SEGMENTS_PER_REQUEST); -+ blk_queue_max_hw_segments(rq, BLKIF_MAX_SEGMENTS_PER_REQUEST); -+ -+ /* Make sure buffer addresses are sector-aligned. */ -+ blk_queue_dma_alignment(rq, 511); -+ -+ /* We are reordering, but cacheless. */ -+ blk_queue_ordered(rq, QUEUE_ORDERED_DRAIN, NULL); -+ -+ spin_unlock_irq(&dev->lock); -+} -+ -+static int -+blktap_device_validate_params(struct blktap *tap, -+ struct blktap_params *params) -+{ -+ struct device *dev = tap->ring.dev; -+ int sector_order, name_sz; -+ -+ sector_order = ffs(params->sector_size) - 1; -+ -+ if (sector_order < 9 || -+ sector_order > 12 || -+ params->sector_size != 1U<<sector_order) -+ goto fail; -+ -+ if (!params->capacity || -+ (params->capacity > ULLONG_MAX >> sector_order)) -+ goto fail; -+ -+ name_sz = min(sizeof(params->name), sizeof(tap->name)); -+ if (strnlen(params->name, name_sz) >= name_sz) -+ goto fail; -+ -+ return 0; -+ -+fail: -+ params->name[name_sz-1] = 0; -+ dev_err(dev, "capacity: %llu, sector-size: %lu, name: %s\n", -+ params->capacity, params->sector_size, params->name); -+ return -EINVAL; -+} -+ -+int -+blktap_device_destroy(struct blktap *tap) -+{ -+ struct blktap_device *tapdev = &tap->device; -+ struct block_device *bdev; -+ struct gendisk *gd; -+ int err; -+ -+ gd = tapdev->gd; -+ if (!gd) -+ return 0; -+ -+ bdev = bdget_disk(gd, 0); -+ -+ err = !mutex_trylock(&bdev->bd_mutex); -+ if (err) { -+ /* NB. avoid a deadlock. the last opener syncs the -+ * bdev holding bd_mutex. */ -+ err = -EBUSY; -+ goto out_nolock; -+ } -+ -+ if (bdev->bd_openers) { -+ err = -EBUSY; -+ goto out; -+ } -+ -+ del_gendisk(gd); -+ gd->private_data = NULL; -+ -+ blk_cleanup_queue(gd->queue); -+ -+ put_disk(gd); -+ tapdev->gd = NULL; -+ -+ clear_bit(BLKTAP_DEVICE, &tap->dev_inuse); -+ err = 0; -+out: -+ mutex_unlock(&bdev->bd_mutex); -+out_nolock: -+ bdput(bdev); -+ -+ return err; -+} -+ -+static void -+blktap_device_fail_queue(struct blktap *tap) -+{ -+ struct blktap_device *tapdev = &tap->device; -+ struct request_queue *q = tapdev->gd->queue; -+ -+ spin_lock_irq(&tapdev->lock); -+ queue_flag_clear(QUEUE_FLAG_STOPPED, q); -+ -+ do { -+ struct request *rq = __blktap_next_queued_rq(q); -+ if (!rq) -+ break; -+ -+ __blktap_end_queued_rq(rq, -EIO); -+ } while (1); -+ -+ spin_unlock_irq(&tapdev->lock); -+} -+ -+static int -+blktap_device_try_destroy(struct blktap *tap) -+{ -+ int err; -+ -+ err = blktap_device_destroy(tap); -+ if (err) -+ blktap_device_fail_queue(tap); -+ -+ return err; -+} -+ -+void -+blktap_device_destroy_sync(struct blktap *tap) -+{ -+ wait_event(tap->ring.poll_wait, -+ !blktap_device_try_destroy(tap)); -+} -+ -+int -+blktap_device_create(struct blktap *tap, struct blktap_params *params) -+{ -+ int minor, err; -+ struct gendisk *gd; -+ struct request_queue *rq; -+ struct blktap_device *tapdev; -+ -+ gd = NULL; -+ rq = NULL; -+ tapdev = &tap->device; -+ minor = tap->minor; -+ -+ if (test_bit(BLKTAP_DEVICE, &tap->dev_inuse)) -+ return -EEXIST; -+ -+ if (blktap_device_validate_params(tap, params)) -+ return -EINVAL; -+ -+ gd = alloc_disk(1); -+ if (!gd) { -+ err = -ENOMEM; -+ goto fail; -+ } -+ -+ if (minor < 26) { -+ sprintf(gd->disk_name, "td%c", 'a' + minor % 26); -+ } else if (minor < (26 + 1) * 26) { -+ sprintf(gd->disk_name, "td%c%c", -+ 'a' + minor / 26 - 1,'a' + minor % 26); -+ } else { -+ const unsigned int m1 = (minor / 26 - 1) / 26 - 1; -+ const unsigned int m2 = (minor / 26 - 1) % 26; -+ const unsigned int m3 = minor % 26; -+ sprintf(gd->disk_name, "td%c%c%c", -+ 'a' + m1, 'a' + m2, 'a' + m3); -+ } -+ -+ gd->major = blktap_device_major; -+ gd->first_minor = minor; -+ gd->fops = &blktap_device_file_operations; -+ gd->private_data = tapdev; -+ -+ spin_lock_init(&tapdev->lock); -+ rq = blk_init_queue(blktap_device_do_request, &tapdev->lock); -+ if (!rq) { -+ err = -ENOMEM; -+ goto fail; -+ } -+ elevator_init(rq, "noop"); -+ -+ gd->queue = rq; -+ rq->queuedata = tapdev; -+ tapdev->gd = gd; -+ -+ blktap_device_configure(tap, params); -+ add_disk(gd); -+ -+ if (params->name[0]) -+ strncpy(tap->name, params->name, sizeof(tap->name)-1); -+ -+ set_bit(BLKTAP_DEVICE, &tap->dev_inuse); -+ -+ dev_info(disk_to_dev(gd), "sector-size: %u capacity: %llu\n", -+ queue_logical_block_size(rq), -+ (unsigned long long)get_capacity(gd)); -+ -+ return 0; -+ -+fail: -+ if (gd) -+ del_gendisk(gd); -+ if (rq) -+ blk_cleanup_queue(rq); -+ -+ return err; -+} -+ -+size_t -+blktap_device_debug(struct blktap *tap, char *buf, size_t size) -+{ -+ struct gendisk *disk = tap->device.gd; -+ struct request_queue *q; -+ struct block_device *bdev; -+ char *s = buf, *end = buf + size; -+ -+ if (!disk) -+ return 0; -+ -+ q = disk->queue; -+ -+ s += snprintf(s, end - s, -+ "disk capacity:%llu sector size:%u\n", -+ (unsigned long long)get_capacity(disk), -+ queue_logical_block_size(q)); -+ -+ s += snprintf(s, end - s, -+ "queue flags:%#lx plugged:%d stopped:%d empty:%d\n", -+ q->queue_flags, -+ blk_queue_plugged(q), blk_queue_stopped(q), -+ elv_queue_empty(q)); -+ -+ bdev = bdget_disk(disk, 0); -+ if (bdev) { -+ s += snprintf(s, end - s, -+ "bdev openers:%d closed:%d\n", -+ bdev->bd_openers, -+ test_bit(BLKTAP_DEVICE_CLOSED, &tap->dev_inuse)); -+ bdput(bdev); -+ } -+ -+ return s - buf; -+} -+ -+int __init -+blktap_device_init() -+{ -+ int major; -+ -+ /* Dynamically allocate a major for this device */ -+ major = register_blkdev(0, "tapdev"); -+ if (major < 0) { -+ BTERR("Couldn't register blktap device\n"); -+ return -ENOMEM; -+ } -+ -+ blktap_device_major = major; -+ BTINFO("blktap device major %d\n", major); -+ -+ return 0; -+} -+ -+void -+blktap_device_exit(void) -+{ -+ if (blktap_device_major) -+ unregister_blkdev(blktap_device_major, "tapdev"); -+} -diff --git a/drivers/xen/blktap/request.c b/drivers/xen/blktap/request.c -new file mode 100644 -index 0000000..9bef48c ---- /dev/null -+++ b/drivers/xen/blktap/request.c -@@ -0,0 +1,418 @@ -+#include <linux/mempool.h> -+#include <linux/spinlock.h> -+#include <linux/mutex.h> -+#include <linux/sched.h> -+#include <linux/device.h> -+ -+#include "blktap.h" -+ -+/* max pages per shared pool. just to prevent accidental dos. */ -+#define POOL_MAX_PAGES (256*BLKIF_MAX_SEGMENTS_PER_REQUEST) -+ -+/* default page pool size. when considering to shrink a shared pool, -+ * note that paused tapdisks may grab a whole lot of pages for a long -+ * time. */ -+#define POOL_DEFAULT_PAGES (2 * MMAP_PAGES) -+ -+/* max number of pages allocatable per request. */ -+#define POOL_MAX_REQUEST_PAGES BLKIF_MAX_SEGMENTS_PER_REQUEST -+ -+/* min request structs per pool. These grow dynamically. */ -+#define POOL_MIN_REQS BLK_RING_SIZE -+ -+static struct kset *pool_set; -+ -+#define kobj_to_pool(_kobj) \ -+ container_of(_kobj, struct blktap_page_pool, kobj) -+ -+static struct kmem_cache *request_cache; -+static mempool_t *request_pool; -+ -+static void -+__page_pool_wake(struct blktap_page_pool *pool) -+{ -+ mempool_t *mem = pool->bufs; -+ -+ /* -+ NB. slightly wasteful to always wait for a full segment -+ set. but this ensures the next disk makes -+ progress. presently, the repeated request struct -+ alloc/release cycles would otherwise keep everyone spinning. -+ */ -+ -+ if (mem->curr_nr >= POOL_MAX_REQUEST_PAGES) -+ wake_up(&pool->wait); -+} -+ -+int -+blktap_request_get_pages(struct blktap *tap, -+ struct blktap_request *request, int nr_pages) -+{ -+ struct blktap_page_pool *pool = tap->pool; -+ mempool_t *mem = pool->bufs; -+ struct page *page; -+ -+ BUG_ON(request->nr_pages != 0); -+ BUG_ON(nr_pages > POOL_MAX_REQUEST_PAGES); -+ -+ if (mem->curr_nr < nr_pages) -+ return -ENOMEM; -+ -+ /* NB. avoid thundering herds of tapdisks colliding. */ -+ spin_lock(&pool->lock); -+ -+ if (mem->curr_nr < nr_pages) { -+ spin_unlock(&pool->lock); -+ return -ENOMEM; -+ } -+ -+ while (request->nr_pages < nr_pages) { -+ page = mempool_alloc(mem, GFP_NOWAIT); -+ BUG_ON(!page); -+ request->pages[request->nr_pages++] = page; -+ } -+ -+ spin_unlock(&pool->lock); -+ -+ return 0; -+} -+ -+static void -+blktap_request_put_pages(struct blktap *tap, -+ struct blktap_request *request) -+{ -+ struct blktap_page_pool *pool = tap->pool; -+ struct page *page; -+ -+ while (request->nr_pages) { -+ page = request->pages[--request->nr_pages]; -+ mempool_free(page, pool->bufs); -+ } -+} -+ -+size_t -+blktap_request_debug(struct blktap *tap, char *buf, size_t size) -+{ -+ struct blktap_page_pool *pool = tap->pool; -+ mempool_t *mem = pool->bufs; -+ char *s = buf, *end = buf + size; -+ -+ s += snprintf(buf, end - s, -+ "pool:%s pages:%d free:%d\n", -+ kobject_name(&pool->kobj), -+ mem->min_nr, mem->curr_nr); -+ -+ return s - buf; -+} -+ -+struct blktap_request* -+blktap_request_alloc(struct blktap *tap) -+{ -+ struct blktap_request *request; -+ -+ request = mempool_alloc(request_pool, GFP_NOWAIT); -+ if (request) -+ request->tap = tap; -+ -+ return request; -+} -+ -+void -+blktap_request_free(struct blktap *tap, -+ struct blktap_request *request) -+{ -+ blktap_request_put_pages(tap, request); -+ -+ mempool_free(request, request_pool); -+ -+ __page_pool_wake(tap->pool); -+} -+ -+void -+blktap_request_bounce(struct blktap *tap, -+ struct blktap_request *request, -+ int seg, int write) -+{ -+ struct scatterlist *sg = &request->sg_table[seg]; -+ void *s, *p; -+ -+ BUG_ON(seg >= request->nr_pages); -+ -+ s = sg_virt(sg); -+ p = page_address(request->pages[seg]) + sg->offset; -+ -+ if (write) -+ memcpy(p, s, sg->length); -+ else -+ memcpy(s, p, sg->length); -+} -+ -+static void -+blktap_request_ctor(void *obj) -+{ -+ struct blktap_request *request = obj; -+ -+ memset(request, 0, sizeof(*request)); -+ sg_init_table(request->sg_table, ARRAY_SIZE(request->sg_table)); -+} -+ -+static int -+blktap_page_pool_resize(struct blktap_page_pool *pool, int target) -+{ -+ mempool_t *bufs = pool->bufs; -+ int err; -+ -+ /* NB. mempool asserts min_nr >= 1 */ -+ target = max(1, target); -+ -+ err = mempool_resize(bufs, target, GFP_KERNEL); -+ if (err) -+ return err; -+ -+ __page_pool_wake(pool); -+ -+ return 0; -+} -+ -+struct pool_attribute { -+ struct attribute attr; -+ -+ ssize_t (*show)(struct blktap_page_pool *pool, -+ char *buf); -+ -+ ssize_t (*store)(struct blktap_page_pool *pool, -+ const char *buf, size_t count); -+}; -+ -+#define kattr_to_pool_attr(_kattr) \ -+ container_of(_kattr, struct pool_attribute, attr) -+ -+static ssize_t -+blktap_page_pool_show_size(struct blktap_page_pool *pool, -+ char *buf) -+{ -+ mempool_t *mem = pool->bufs; -+ return sprintf(buf, "%d", mem->min_nr); -+} -+ -+static ssize_t -+blktap_page_pool_store_size(struct blktap_page_pool *pool, -+ const char *buf, size_t size) -+{ -+ int target; -+ -+ /* -+ * NB. target fixup to avoid undesired results. less than a -+ * full segment set can wedge the disk. much more than a -+ * couple times the physical queue depth is rarely useful. -+ */ -+ -+ target = simple_strtoul(buf, NULL, 0); -+ target = max(POOL_MAX_REQUEST_PAGES, target); -+ target = min(target, POOL_MAX_PAGES); -+ -+ return blktap_page_pool_resize(pool, target) ? : size; -+} -+ -+static struct pool_attribute blktap_page_pool_attr_size = -+ __ATTR(size, S_IRUSR|S_IWUSR|S_IRGRP|S_IROTH, -+ blktap_page_pool_show_size, -+ blktap_page_pool_store_size); -+ -+static ssize_t -+blktap_page_pool_show_free(struct blktap_page_pool *pool, -+ char *buf) -+{ -+ mempool_t *mem = pool->bufs; -+ return sprintf(buf, "%d", mem->curr_nr); -+} -+ -+static struct pool_attribute blktap_page_pool_attr_free = -+ __ATTR(free, S_IRUSR|S_IRGRP|S_IROTH, -+ blktap_page_pool_show_free, -+ NULL); -+ -+static struct attribute *blktap_page_pool_attrs[] = { -+ &blktap_page_pool_attr_size.attr, -+ &blktap_page_pool_attr_free.attr, -+ NULL, -+}; -+ -+static inline struct kobject* -+__blktap_kset_find_obj(struct kset *kset, const char *name) -+{ -+ struct kobject *k; -+ struct kobject *ret = NULL; -+ -+ spin_lock(&kset->list_lock); -+ list_for_each_entry(k, &kset->list, entry) { -+ if (kobject_name(k) && !strcmp(kobject_name(k), name)) { -+ ret = kobject_get(k); -+ break; -+ } -+ } -+ spin_unlock(&kset->list_lock); -+ return ret; -+} -+ -+static ssize_t -+blktap_page_pool_show_attr(struct kobject *kobj, struct attribute *kattr, -+ char *buf) -+{ -+ struct blktap_page_pool *pool = kobj_to_pool(kobj); -+ struct pool_attribute *attr = kattr_to_pool_attr(kattr); -+ -+ if (attr->show) -+ return attr->show(pool, buf); -+ -+ return -EIO; -+} -+ -+static ssize_t -+blktap_page_pool_store_attr(struct kobject *kobj, struct attribute *kattr, -+ const char *buf, size_t size) -+{ -+ struct blktap_page_pool *pool = kobj_to_pool(kobj); -+ struct pool_attribute *attr = kattr_to_pool_attr(kattr); -+ -+ if (attr->show) -+ return attr->store(pool, buf, size); -+ -+ return -EIO; -+} -+ -+static struct sysfs_ops blktap_page_pool_sysfs_ops = { -+ .show = blktap_page_pool_show_attr, -+ .store = blktap_page_pool_store_attr, -+}; -+ -+static void -+blktap_page_pool_release(struct kobject *kobj) -+{ -+ struct blktap_page_pool *pool = kobj_to_pool(kobj); -+ mempool_destroy(pool->bufs); -+ kfree(pool); -+} -+ -+struct kobj_type blktap_page_pool_ktype = { -+ .release = blktap_page_pool_release, -+ .sysfs_ops = &blktap_page_pool_sysfs_ops, -+ .default_attrs = blktap_page_pool_attrs, -+}; -+ -+static void* -+__mempool_page_alloc(gfp_t gfp_mask, void *pool_data) -+{ -+ struct page *page; -+ -+ if (!(gfp_mask & __GFP_WAIT)) -+ return NULL; -+ -+ page = alloc_page(gfp_mask); -+ if (page) -+ SetPageReserved(page); -+ -+ return page; -+} -+ -+static void -+__mempool_page_free(void *element, void *pool_data) -+{ -+ struct page *page = element; -+ -+ ClearPageReserved(page); -+ put_page(page); -+} -+ -+static struct kobject* -+blktap_page_pool_create(const char *name, int nr_pages) -+{ -+ struct blktap_page_pool *pool; -+ int err; -+ -+ pool = kzalloc(sizeof(*pool), GFP_KERNEL); -+ if (!pool) -+ goto fail; -+ -+ spin_lock_init(&pool->lock); -+ init_waitqueue_head(&pool->wait); -+ -+ pool->bufs = mempool_create(nr_pages, -+ __mempool_page_alloc, __mempool_page_free, -+ pool); -+ if (!pool->bufs) -+ goto fail_pool; -+ -+ kobject_init(&pool->kobj, &blktap_page_pool_ktype); -+ pool->kobj.kset = pool_set; -+ err = kobject_add(&pool->kobj, &pool_set->kobj, "%s", name); -+ if (err) -+ goto fail_bufs; -+ -+ return &pool->kobj; -+ -+ kobject_del(&pool->kobj); -+fail_bufs: -+ mempool_destroy(pool->bufs); -+fail_pool: -+ kfree(pool); -+fail: -+ return NULL; -+} -+ -+struct blktap_page_pool* -+blktap_page_pool_get(const char *name) -+{ -+ struct kobject *kobj; -+ -+ kobj = __blktap_kset_find_obj(pool_set, name); -+ if (!kobj) -+ kobj = blktap_page_pool_create(name, -+ POOL_DEFAULT_PAGES); -+ if (!kobj) -+ return ERR_PTR(-ENOMEM); -+ -+ return kobj_to_pool(kobj); -+} -+ -+int __init -+blktap_page_pool_init(struct kobject *parent) -+{ -+ request_cache = -+ kmem_cache_create("blktap-request", -+ sizeof(struct blktap_request), 0, -+ 0, blktap_request_ctor); -+ if (!request_cache) -+ return -ENOMEM; -+ -+ request_pool = -+ mempool_create_slab_pool(POOL_MIN_REQS, request_cache); -+ if (!request_pool) -+ return -ENOMEM; -+ -+ pool_set = kset_create_and_add("pools", NULL, parent); -+ if (!pool_set) -+ return -ENOMEM; -+ -+ return 0; -+} -+ -+void -+blktap_page_pool_exit(void) -+{ -+ if (pool_set) { -+ BUG_ON(!list_empty(&pool_set->list)); -+ kset_unregister(pool_set); -+ pool_set = NULL; -+ } -+ -+ if (request_pool) { -+ mempool_destroy(request_pool); -+ request_pool = NULL; -+ } -+ -+ if (request_cache) { -+ kmem_cache_destroy(request_cache); -+ request_cache = NULL; -+ } -+} -diff --git a/drivers/xen/blktap/ring.c b/drivers/xen/blktap/ring.c -new file mode 100644 -index 0000000..6b86be5 ---- /dev/null -+++ b/drivers/xen/blktap/ring.c -@@ -0,0 +1,550 @@ -+ -+#include <linux/device.h> -+#include <linux/signal.h> -+#include <linux/sched.h> -+#include <linux/poll.h> -+#include <linux/blkdev.h> -+ -+#include "blktap.h" -+ -+int blktap_ring_major; -+static struct cdev blktap_ring_cdev; -+ -+ /* -+ * BLKTAP - immediately before the mmap area, -+ * we have a bunch of pages reserved for shared memory rings. -+ */ -+#define RING_PAGES 1 -+ -+static void -+blktap_ring_read_response(struct blktap *tap, -+ const struct blkif_response *rsp) -+{ -+ struct blktap_ring *ring = &tap->ring; -+ struct blktap_request *request; -+ int usr_idx, err; -+ -+ request = NULL; -+ -+ usr_idx = rsp->id; -+ if (usr_idx < 0 || usr_idx >= MAX_PENDING_REQS) { -+ err = -ERANGE; -+ goto invalid; -+ } -+ -+ request = ring->pending[usr_idx]; -+ -+ if (!request) { -+ err = -ESRCH; -+ goto invalid; -+ } -+ -+ if (rsp->operation != request->operation) { -+ err = -EINVAL; -+ goto invalid; -+ } -+ -+ dev_dbg(ring->dev, -+ "request %d [%p] response: %d\n", -+ request->usr_idx, request, rsp->status); -+ -+ err = rsp->status == BLKIF_RSP_OKAY ? 0 : -EIO; -+end_request: -+ blktap_device_end_request(tap, request, err); -+ return; -+ -+invalid: -+ dev_warn(ring->dev, -+ "invalid response, idx:%d status:%d op:%d/%d: err %d\n", -+ usr_idx, rsp->status, -+ rsp->operation, request->operation, -+ err); -+ if (request) -+ goto end_request; -+} -+ -+static void -+blktap_read_ring(struct blktap *tap) -+{ -+ struct blktap_ring *ring = &tap->ring; -+ struct blkif_response rsp; -+ RING_IDX rc, rp; -+ -+ down_read(¤t->mm->mmap_sem); -+ if (!ring->vma) { -+ up_read(¤t->mm->mmap_sem); -+ return; -+ } -+ -+ /* for each outstanding message on the ring */ -+ rp = ring->ring.sring->rsp_prod; -+ rmb(); -+ -+ for (rc = ring->ring.rsp_cons; rc != rp; rc++) { -+ memcpy(&rsp, RING_GET_RESPONSE(&ring->ring, rc), sizeof(rsp)); -+ blktap_ring_read_response(tap, &rsp); -+ } -+ -+ ring->ring.rsp_cons = rc; -+ -+ up_read(¤t->mm->mmap_sem); -+} -+ -+static int blktap_ring_fault(struct vm_area_struct *vma, struct vm_fault *vmf) -+{ -+ return VM_FAULT_SIGBUS; -+} -+ -+static void -+blktap_ring_fail_pending(struct blktap *tap) -+{ -+ struct blktap_ring *ring = &tap->ring; -+ struct blktap_request *request; -+ int usr_idx; -+ -+ for (usr_idx = 0; usr_idx < MAX_PENDING_REQS; usr_idx++) { -+ request = ring->pending[usr_idx]; -+ if (!request) -+ continue; -+ -+ blktap_device_end_request(tap, request, -EIO); -+ } -+} -+ -+static void -+blktap_ring_vm_close(struct vm_area_struct *vma) -+{ -+ struct blktap *tap = vma->vm_private_data; -+ struct blktap_ring *ring = &tap->ring; -+ struct page *page = virt_to_page(ring->ring.sring); -+ -+ blktap_ring_fail_pending(tap); -+ -+ zap_page_range(vma, vma->vm_start, PAGE_SIZE, NULL); -+ ClearPageReserved(page); -+ __free_page(page); -+ -+ ring->vma = NULL; -+ -+ if (test_bit(BLKTAP_SHUTDOWN_REQUESTED, &tap->dev_inuse)) -+ blktap_control_destroy_tap(tap); -+} -+ -+static struct vm_operations_struct blktap_ring_vm_operations = { -+ .close = blktap_ring_vm_close, -+ .fault = blktap_ring_fault, -+}; -+ -+int -+blktap_ring_map_segment(struct blktap *tap, -+ struct blktap_request *request, -+ int seg) -+{ -+ struct blktap_ring *ring = &tap->ring; -+ unsigned long uaddr; -+ -+ uaddr = MMAP_VADDR(ring->user_vstart, request->usr_idx, seg); -+ return vm_insert_page(ring->vma, uaddr, request->pages[seg]); -+} -+ -+int -+blktap_ring_map_request(struct blktap *tap, -+ struct blktap_request *request) -+{ -+ int seg, err = 0; -+ int write; -+ -+ write = request->operation == BLKIF_OP_WRITE; -+ -+ for (seg = 0; seg < request->nr_pages; seg++) { -+ if (write) -+ blktap_request_bounce(tap, request, seg, write); -+ -+ err = blktap_ring_map_segment(tap, request, seg); -+ if (err) -+ break; -+ } -+ -+ if (err) -+ blktap_ring_unmap_request(tap, request); -+ -+ return err; -+} -+ -+void -+blktap_ring_unmap_request(struct blktap *tap, -+ struct blktap_request *request) -+{ -+ struct blktap_ring *ring = &tap->ring; -+ unsigned long uaddr; -+ unsigned size; -+ int seg, read; -+ -+ uaddr = MMAP_VADDR(ring->user_vstart, request->usr_idx, 0); -+ size = request->nr_pages << PAGE_SHIFT; -+ read = request->operation == BLKIF_OP_READ; -+ -+ if (read) -+ for (seg = 0; seg < request->nr_pages; seg++) -+ blktap_request_bounce(tap, request, seg, !read); -+ -+ zap_page_range(ring->vma, uaddr, size, NULL); -+} -+ -+void -+blktap_ring_free_request(struct blktap *tap, -+ struct blktap_request *request) -+{ -+ struct blktap_ring *ring = &tap->ring; -+ -+ ring->pending[request->usr_idx] = NULL; -+ ring->n_pending--; -+ -+ blktap_request_free(tap, request); -+} -+ -+struct blktap_request* -+blktap_ring_make_request(struct blktap *tap) -+{ -+ struct blktap_ring *ring = &tap->ring; -+ struct blktap_request *request; -+ int usr_idx; -+ -+ if (RING_FULL(&ring->ring)) -+ return ERR_PTR(-ENOSPC); -+ -+ request = blktap_request_alloc(tap); -+ if (!request) -+ return ERR_PTR(-ENOMEM); -+ -+ for (usr_idx = 0; usr_idx < BLK_RING_SIZE; usr_idx++) -+ if (!ring->pending[usr_idx]) -+ break; -+ -+ BUG_ON(usr_idx >= BLK_RING_SIZE); -+ -+ request->tap = tap; -+ request->usr_idx = usr_idx; -+ -+ ring->pending[usr_idx] = request; -+ ring->n_pending++; -+ -+ return request; -+} -+ -+void -+blktap_ring_submit_request(struct blktap *tap, -+ struct blktap_request *request) -+{ -+ struct blktap_ring *ring = &tap->ring; -+ struct blkif_request *breq; -+ struct scatterlist *sg; -+ int i, nsecs = 0; -+ -+ dev_dbg(ring->dev, -+ "request %d [%p] submit\n", request->usr_idx, request); -+ -+ breq = RING_GET_REQUEST(&ring->ring, ring->ring.req_prod_pvt); -+ -+ breq->id = request->usr_idx; -+ breq->sector_number = blk_rq_pos(request->rq); -+ breq->handle = 0; -+ breq->operation = request->operation; -+ breq->nr_segments = request->nr_pages; -+ -+ blktap_for_each_sg(sg, request, i) { -+ struct blkif_request_segment *seg = &breq->seg[i]; -+ int first, count; -+ -+ count = sg->length >> 9; -+ first = sg->offset >> 9; -+ -+ seg->first_sect = first; -+ seg->last_sect = first + count - 1; -+ -+ nsecs += count; -+ } -+ -+ ring->ring.req_prod_pvt++; -+ -+ do_gettimeofday(&request->time); -+ -+ -+ if (request->operation == BLKIF_OP_WRITE) { -+ tap->stats.st_wr_sect += nsecs; -+ tap->stats.st_wr_req++; -+ } -+ -+ if (request->operation == BLKIF_OP_READ) { -+ tap->stats.st_rd_sect += nsecs; -+ tap->stats.st_rd_req++; -+ } -+} -+ -+static int -+blktap_ring_open(struct inode *inode, struct file *filp) -+{ -+ struct blktap *tap = NULL; -+ int minor; -+ -+ minor = iminor(inode); -+ -+ if (minor < blktap_max_minor) -+ tap = blktaps[minor]; -+ -+ if (!tap) -+ return -ENXIO; -+ -+ if (test_bit(BLKTAP_SHUTDOWN_REQUESTED, &tap->dev_inuse)) -+ return -ENXIO; -+ -+ if (tap->ring.task) -+ return -EBUSY; -+ -+ filp->private_data = tap; -+ tap->ring.task = current; -+ -+ return 0; -+} -+ -+static int -+blktap_ring_release(struct inode *inode, struct file *filp) -+{ -+ struct blktap *tap = filp->private_data; -+ -+ blktap_device_destroy_sync(tap); -+ -+ tap->ring.task = NULL; -+ -+ if (test_bit(BLKTAP_SHUTDOWN_REQUESTED, &tap->dev_inuse)) -+ blktap_control_destroy_tap(tap); -+ -+ return 0; -+} -+ -+static int -+blktap_ring_mmap(struct file *filp, struct vm_area_struct *vma) -+{ -+ struct blktap *tap = filp->private_data; -+ struct blktap_ring *ring = &tap->ring; -+ struct blkif_sring *sring; -+ struct page *page = NULL; -+ int err; -+ -+ if (ring->vma) -+ return -EBUSY; -+ -+ page = alloc_page(GFP_KERNEL|__GFP_ZERO); -+ if (!page) -+ return -ENOMEM; -+ -+ SetPageReserved(page); -+ -+ err = vm_insert_page(vma, vma->vm_start, page); -+ if (err) -+ goto fail; -+ -+ sring = page_address(page); -+ SHARED_RING_INIT(sring); -+ FRONT_RING_INIT(&ring->ring, sring, PAGE_SIZE); -+ -+ ring->ring_vstart = vma->vm_start; -+ ring->user_vstart = ring->ring_vstart + PAGE_SIZE; -+ -+ vma->vm_private_data = tap; -+ -+ vma->vm_flags |= VM_DONTCOPY; -+ vma->vm_flags |= VM_RESERVED; -+ -+ vma->vm_ops = &blktap_ring_vm_operations; -+ -+ ring->vma = vma; -+ return 0; -+ -+fail: -+ if (page) { -+ zap_page_range(vma, vma->vm_start, PAGE_SIZE, NULL); -+ ClearPageReserved(page); -+ __free_page(page); -+ } -+ -+ return err; -+} -+ -+static int -+blktap_ring_ioctl(struct inode *inode, struct file *filp, -+ unsigned int cmd, unsigned long arg) -+{ -+ struct blktap *tap = filp->private_data; -+ struct blktap_ring *ring = &tap->ring; -+ -+ BTDBG("%d: cmd: %u, arg: %lu\n", tap->minor, cmd, arg); -+ -+ if (!ring->vma || ring->vma->vm_mm != current->mm) -+ return -EACCES; -+ -+ switch(cmd) { -+ case BLKTAP2_IOCTL_KICK_FE: -+ -+ blktap_read_ring(tap); -+ return 0; -+ -+ case BLKTAP2_IOCTL_CREATE_DEVICE: { -+ struct blktap_params params; -+ void __user *ptr = (void *)arg; -+ -+ if (!arg) -+ return -EINVAL; -+ -+ if (copy_from_user(¶ms, ptr, sizeof(params))) -+ return -EFAULT; -+ -+ return blktap_device_create(tap, ¶ms); -+ } -+ -+ case BLKTAP2_IOCTL_REMOVE_DEVICE: -+ -+ return blktap_device_destroy(tap); -+ } -+ -+ return -ENOIOCTLCMD; -+} -+ -+static unsigned int blktap_ring_poll(struct file *filp, poll_table *wait) -+{ -+ struct blktap *tap = filp->private_data; -+ struct blktap_ring *ring = &tap->ring; -+ int work; -+ -+ poll_wait(filp, &tap->pool->wait, wait); -+ poll_wait(filp, &ring->poll_wait, wait); -+ -+ down_read(¤t->mm->mmap_sem); -+ if (ring->vma && tap->device.gd) -+ blktap_device_run_queue(tap); -+ up_read(¤t->mm->mmap_sem); -+ -+ work = ring->ring.req_prod_pvt - ring->ring.sring->req_prod; -+ RING_PUSH_REQUESTS(&ring->ring); -+ -+ if (work || -+ ring->ring.sring->private.tapif_user.msg || -+ test_and_clear_bit(BLKTAP_DEVICE_CLOSED, &tap->dev_inuse)) -+ return POLLIN | POLLRDNORM; -+ -+ return 0; -+} -+ -+static struct file_operations blktap_ring_file_operations = { -+ .owner = THIS_MODULE, -+ .open = blktap_ring_open, -+ .release = blktap_ring_release, -+ .ioctl = blktap_ring_ioctl, -+ .mmap = blktap_ring_mmap, -+ .poll = blktap_ring_poll, -+}; -+ -+void -+blktap_ring_kick_user(struct blktap *tap) -+{ -+ wake_up(&tap->ring.poll_wait); -+} -+ -+int -+blktap_ring_destroy(struct blktap *tap) -+{ -+ struct blktap_ring *ring = &tap->ring; -+ -+ if (ring->task || ring->vma) -+ return -EBUSY; -+ -+ return 0; -+} -+ -+int -+blktap_ring_create(struct blktap *tap) -+{ -+ struct blktap_ring *ring = &tap->ring; -+ -+ init_waitqueue_head(&ring->poll_wait); -+ ring->devno = MKDEV(blktap_ring_major, tap->minor); -+ -+ return 0; -+} -+ -+size_t -+blktap_ring_debug(struct blktap *tap, char *buf, size_t size) -+{ -+ struct blktap_ring *ring = &tap->ring; -+ char *s = buf, *end = buf + size; -+ int usr_idx; -+ -+ s += snprintf(s, end - s, -+ "begin pending:%d\n", ring->n_pending); -+ -+ for (usr_idx = 0; usr_idx < MAX_PENDING_REQS; usr_idx++) { -+ struct blktap_request *request; -+ struct timeval *time; -+ int write; -+ -+ request = ring->pending[usr_idx]; -+ if (!request) -+ continue; -+ -+ write = request->operation == BLKIF_OP_WRITE; -+ time = &request->time; -+ -+ s += snprintf(s, end - s, -+ "%02d: usr_idx:%02d " -+ "op:%c nr_pages:%02d time:%lu.%09lu\n", -+ usr_idx, request->usr_idx, -+ write ? 'W' : 'R', request->nr_pages, -+ time->tv_sec, time->tv_usec); -+ } -+ -+ s += snprintf(s, end - s, "end pending\n"); -+ -+ return s - buf; -+} -+ -+ -+int __init -+blktap_ring_init(void) -+{ -+ dev_t dev = 0; -+ int err; -+ -+ cdev_init(&blktap_ring_cdev, &blktap_ring_file_operations); -+ blktap_ring_cdev.owner = THIS_MODULE; -+ -+ err = alloc_chrdev_region(&dev, 0, MAX_BLKTAP_DEVICE, "blktap2"); -+ if (err < 0) { -+ BTERR("error registering ring devices: %d\n", err); -+ return err; -+ } -+ -+ err = cdev_add(&blktap_ring_cdev, dev, MAX_BLKTAP_DEVICE); -+ if (err) { -+ BTERR("error adding ring device: %d\n", err); -+ unregister_chrdev_region(dev, MAX_BLKTAP_DEVICE); -+ return err; -+ } -+ -+ blktap_ring_major = MAJOR(dev); -+ BTINFO("blktap ring major: %d\n", blktap_ring_major); -+ -+ return 0; -+} -+ -+void -+blktap_ring_exit(void) -+{ -+ if (!blktap_ring_major) -+ return; -+ -+ cdev_del(&blktap_ring_cdev); -+ unregister_chrdev_region(MKDEV(blktap_ring_major, 0), -+ MAX_BLKTAP_DEVICE); -+ -+ blktap_ring_major = 0; -+} -diff --git a/drivers/xen/blktap/sysfs.c b/drivers/xen/blktap/sysfs.c -new file mode 100644 -index 0000000..3c424af ---- /dev/null -+++ b/drivers/xen/blktap/sysfs.c -@@ -0,0 +1,288 @@ -+#include <linux/types.h> -+#include <linux/device.h> -+#include <linux/module.h> -+#include <linux/sched.h> -+#include <linux/genhd.h> -+#include <linux/blkdev.h> -+ -+#include "blktap.h" -+ -+int blktap_debug_level = 1; -+ -+static struct class *class; -+ -+static ssize_t -+blktap_sysfs_set_name(struct device *dev, struct device_attribute *attr, const char *buf, size_t size) -+{ -+ struct blktap *tap; -+ -+ tap = dev_get_drvdata(dev); -+ if (!tap) -+ return 0; -+ -+ if (size >= BLKTAP2_MAX_MESSAGE_LEN) -+ return -ENAMETOOLONG; -+ -+ if (strnlen(buf, size) != size) -+ return -EINVAL; -+ -+ strcpy(tap->name, buf); -+ -+ return size; -+} -+ -+static ssize_t -+blktap_sysfs_get_name(struct device *dev, struct device_attribute *attr, char *buf) -+{ -+ struct blktap *tap; -+ ssize_t size; -+ -+ tap = dev_get_drvdata(dev); -+ if (!tap) -+ return 0; -+ -+ if (tap->name[0]) -+ size = sprintf(buf, "%s\n", tap->name); -+ else -+ size = sprintf(buf, "%d\n", tap->minor); -+ -+ return size; -+} -+static DEVICE_ATTR(name, S_IRUGO|S_IWUSR, -+ blktap_sysfs_get_name, blktap_sysfs_set_name); -+ -+static void -+blktap_sysfs_remove_work(struct work_struct *work) -+{ -+ struct blktap *tap -+ = container_of(work, struct blktap, remove_work); -+ blktap_control_destroy_tap(tap); -+} -+ -+static ssize_t -+blktap_sysfs_remove_device(struct device *dev, -+ struct device_attribute *attr, -+ const char *buf, size_t size) -+{ -+ struct blktap *tap; -+ int err; -+ -+ tap = dev_get_drvdata(dev); -+ if (!tap) -+ return size; -+ -+ if (test_and_set_bit(BLKTAP_SHUTDOWN_REQUESTED, &tap->dev_inuse)) -+ goto wait; -+ -+ if (tap->ring.vma) { -+ struct blkif_sring *sring = tap->ring.ring.sring; -+ sring->private.tapif_user.msg = BLKTAP2_RING_MESSAGE_CLOSE; -+ blktap_ring_kick_user(tap); -+ } else { -+ INIT_WORK(&tap->remove_work, blktap_sysfs_remove_work); -+ schedule_work(&tap->remove_work); -+ } -+wait: -+ err = wait_event_interruptible(tap->remove_wait, -+ !dev_get_drvdata(dev)); -+ if (err) -+ return err; -+ -+ return size; -+} -+static DEVICE_ATTR(remove, S_IWUSR, NULL, blktap_sysfs_remove_device); -+ -+static ssize_t -+blktap_sysfs_debug_device(struct device *dev, struct device_attribute *attr, char *buf) -+{ -+ struct blktap *tap; -+ char *s = buf, *end = buf + PAGE_SIZE; -+ -+ tap = dev_get_drvdata(dev); -+ if (!tap) -+ return 0; -+ -+ s += blktap_control_debug(tap, s, end - s); -+ -+ s += blktap_request_debug(tap, s, end - s); -+ -+ s += blktap_device_debug(tap, s, end - s); -+ -+ s += blktap_ring_debug(tap, s, end - s); -+ -+ return s - buf; -+} -+static DEVICE_ATTR(debug, S_IRUGO, blktap_sysfs_debug_device, NULL); -+ -+static ssize_t -+blktap_sysfs_show_task(struct device *dev, struct device_attribute *attr, char *buf) -+{ -+ struct blktap *tap; -+ ssize_t rv = 0; -+ -+ tap = dev_get_drvdata(dev); -+ if (!tap) -+ return 0; -+ -+ if (tap->ring.task) -+ rv = sprintf(buf, "%d\n", tap->ring.task->pid); -+ -+ return rv; -+} -+static DEVICE_ATTR(task, S_IRUGO, blktap_sysfs_show_task, NULL); -+ -+static ssize_t -+blktap_sysfs_show_pool(struct device *dev, -+ struct device_attribute *attr, -+ char *buf) -+{ -+ struct blktap *tap = dev_get_drvdata(dev); -+ return sprintf(buf, "%s", kobject_name(&tap->pool->kobj)); -+} -+ -+static ssize_t -+blktap_sysfs_store_pool(struct device *dev, -+ struct device_attribute *attr, -+ const char *buf, size_t size) -+{ -+ struct blktap *tap = dev_get_drvdata(dev); -+ struct blktap_page_pool *pool, *tmp = tap->pool; -+ -+ if (tap->device.gd) -+ return -EBUSY; -+ -+ pool = blktap_page_pool_get(buf); -+ if (IS_ERR(pool)) -+ return PTR_ERR(pool); -+ -+ tap->pool = pool; -+ kobject_put(&tmp->kobj); -+ -+ return size; -+} -+DEVICE_ATTR(pool, S_IRUSR|S_IWUSR, -+ blktap_sysfs_show_pool, blktap_sysfs_store_pool); -+ -+int -+blktap_sysfs_create(struct blktap *tap) -+{ -+ struct blktap_ring *ring = &tap->ring; -+ struct device *dev; -+ int err = 0; -+ -+ init_waitqueue_head(&tap->remove_wait); -+ -+ dev = device_create(class, NULL, ring->devno, -+ tap, "blktap%d", tap->minor); -+ if (IS_ERR(dev)) -+ err = PTR_ERR(dev); -+ if (!err) -+ err = device_create_file(dev, &dev_attr_name); -+ if (!err) -+ err = device_create_file(dev, &dev_attr_remove); -+ if (!err) -+ err = device_create_file(dev, &dev_attr_debug); -+ if (!err) -+ err = device_create_file(dev, &dev_attr_task); -+ if (!err) -+ err = device_create_file(dev, &dev_attr_pool); -+ if (!err) -+ ring->dev = dev; -+ else -+ device_unregister(dev); -+ -+ return err; -+} -+ -+void -+blktap_sysfs_destroy(struct blktap *tap) -+{ -+ struct blktap_ring *ring = &tap->ring; -+ struct device *dev; -+ -+ dev = ring->dev; -+ -+ if (!dev) -+ return; -+ -+ dev_set_drvdata(dev, NULL); -+ wake_up(&tap->remove_wait); -+ -+ device_unregister(dev); -+ ring->dev = NULL; -+} -+ -+static ssize_t -+blktap_sysfs_show_verbosity(struct class *class, char *buf) -+{ -+ return sprintf(buf, "%d\n", blktap_debug_level); -+} -+ -+static ssize_t -+blktap_sysfs_set_verbosity(struct class *class, const char *buf, size_t size) -+{ -+ int level; -+ -+ if (sscanf(buf, "%d", &level) == 1) { -+ blktap_debug_level = level; -+ return size; -+ } -+ -+ return -EINVAL; -+} -+static CLASS_ATTR(verbosity, S_IRUGO|S_IWUSR, -+ blktap_sysfs_show_verbosity, blktap_sysfs_set_verbosity); -+ -+static ssize_t -+blktap_sysfs_show_devices(struct class *class, char *buf) -+{ -+ int i, ret; -+ struct blktap *tap; -+ -+ mutex_lock(&blktap_lock); -+ -+ ret = 0; -+ for (i = 0; i < blktap_max_minor; i++) { -+ tap = blktaps[i]; -+ if (!tap) -+ continue; -+ -+ if (!test_bit(BLKTAP_DEVICE, &tap->dev_inuse)) -+ continue; -+ -+ ret += sprintf(buf + ret, "%d %s\n", tap->minor, tap->name); -+ } -+ -+ mutex_unlock(&blktap_lock); -+ -+ return ret; -+} -+static CLASS_ATTR(devices, S_IRUGO, blktap_sysfs_show_devices, NULL); -+ -+void -+blktap_sysfs_exit(void) -+{ -+ if (class) -+ class_destroy(class); -+} -+ -+int __init -+blktap_sysfs_init(void) -+{ -+ struct class *cls; -+ int err = 0; -+ -+ cls = class_create(THIS_MODULE, "blktap2"); -+ if (IS_ERR(cls)) -+ err = PTR_ERR(cls); -+ if (!err) -+ err = class_create_file(cls, &class_attr_verbosity); -+ if (!err) -+ err = class_create_file(cls, &class_attr_devices); -+ if (!err) -+ class = cls; -+ else -+ class_destroy(cls); -+ -+ return err; -+} -diff --git a/drivers/xen/cpu_hotplug.c b/drivers/xen/cpu_hotplug.c -index bdfd584..6625ffe 100644 ---- a/drivers/xen/cpu_hotplug.c -+++ b/drivers/xen/cpu_hotplug.c -@@ -1,5 +1,6 @@ - #include <linux/notifier.h> - -+#include <xen/xen.h> - #include <xen/xenbus.h> - - #include <asm/xen/hypervisor.h> -diff --git a/drivers/xen/events.c b/drivers/xen/events.c -index 1417015..ac7b42f 100644 ---- a/drivers/xen/events.c -+++ b/drivers/xen/events.c -@@ -16,7 +16,7 @@ - * (typically dom0). - * 2. VIRQs, typically used for timers. These are per-cpu events. - * 3. IPIs. -- * 4. Hardware interrupts. Not supported at present. -+ * 4. PIRQs - Hardware interrupts. - * - * Jeremy Fitzhardinge <jeremy@xensource.com>, XenSource Inc, 2007 - */ -@@ -27,18 +27,32 @@ - #include <linux/module.h> - #include <linux/string.h> - #include <linux/bootmem.h> -+#include <linux/irqnr.h> -+#include <linux/pci_regs.h> -+#include <linux/pci.h> -+#include <linux/msi.h> - -+#include <asm/desc.h> - #include <asm/ptrace.h> - #include <asm/irq.h> - #include <asm/idle.h> -+#include <asm/io_apic.h> - #include <asm/sync_bitops.h> - #include <asm/xen/hypercall.h> - #include <asm/xen/hypervisor.h> -+#include <asm/xen/pci.h> - -+#include <xen/xen.h> -+#include <xen/hvm.h> - #include <xen/xen-ops.h> - #include <xen/events.h> - #include <xen/interface/xen.h> - #include <xen/interface/event_channel.h> -+#include <xen/interface/hvm/hvm_op.h> -+#include <xen/interface/hvm/params.h> -+#include <xen/page.h> -+ -+#include "../pci/msi.h" - - /* - * This lock protects updates to the following mapping and reference-count -@@ -67,7 +81,7 @@ enum xen_irq_type { - * event channel - irq->event channel mapping - * cpu - cpu this event channel is bound to - * index - type-specific information: -- * PIRQ - vector, with MSB being "needs EIO" -+ * PIRQ - with MSB being "needs EIO" - * VIRQ - virq number - * IPI - IPI vector - * EVTCHN - -@@ -83,20 +97,30 @@ struct irq_info - enum ipi_vector ipi; - struct { - unsigned short gsi; -- unsigned short vector; -+ unsigned char vector; -+ unsigned char flags; -+ uint16_t domid; - } pirq; - } u; - }; -+#define PIRQ_SHAREABLE (1 << 1) - --static struct irq_info irq_info[NR_IRQS]; -+/* Bitmap indicating which PIRQs require Xen to be notified on unmask. */ -+static bool pirq_eoi_does_unmask; -+static unsigned long *pirq_needs_eoi_bits; - --static int evtchn_to_irq[NR_EVENT_CHANNELS] = { -- [0 ... NR_EVENT_CHANNELS-1] = -1 --}; -+static struct irq_info *irq_info; -+ -+static int *evtchn_to_irq; - struct cpu_evtchn_s { - unsigned long bits[NR_EVENT_CHANNELS/BITS_PER_LONG]; - }; --static struct cpu_evtchn_s *cpu_evtchn_mask_p; -+ -+static __initdata struct cpu_evtchn_s init_evtchn_mask = { -+ .bits[0 ... (NR_EVENT_CHANNELS/BITS_PER_LONG)-1] = ~0ul, -+}; -+static struct cpu_evtchn_s *cpu_evtchn_mask_p = &init_evtchn_mask; -+ - static inline unsigned long *cpu_evtchn_mask(int cpu) - { - return cpu_evtchn_mask_p[cpu].bits; -@@ -107,6 +131,7 @@ static inline unsigned long *cpu_evtchn_mask(int cpu) - - static struct irq_chip xen_dynamic_chip; - static struct irq_chip xen_percpu_chip; -+static struct irq_chip xen_pirq_chip; - - /* Constructor for packed IRQ information. */ - static struct irq_info mk_unbound_info(void) -@@ -136,7 +161,8 @@ static struct irq_info mk_pirq_info(unsigned short evtchn, - unsigned short gsi, unsigned short vector) - { - return (struct irq_info) { .type = IRQT_PIRQ, .evtchn = evtchn, -- .cpu = 0, .u.pirq = { .gsi = gsi, .vector = vector } }; -+ .cpu = 0, .u.pirq = -+ { .gsi = gsi, .vector = vector, .domid = DOMID_SELF } }; - } - - /* -@@ -219,6 +245,15 @@ static unsigned int cpu_from_evtchn(unsigned int evtchn) - return ret; - } - -+static bool pirq_needs_eoi(unsigned irq) -+{ -+ struct irq_info *info = info_for_irq(irq); -+ -+ BUG_ON(info->type != IRQT_PIRQ); -+ -+ return test_bit(info->u.pirq.gsi, pirq_needs_eoi_bits); -+} -+ - static inline unsigned long active_evtchns(unsigned int cpu, - struct shared_info *sh, - unsigned int idx) -@@ -237,17 +272,17 @@ static void bind_evtchn_to_cpu(unsigned int chn, unsigned int cpu) - cpumask_copy(irq_to_desc(irq)->affinity, cpumask_of(cpu)); - #endif - -- __clear_bit(chn, cpu_evtchn_mask(cpu_from_irq(irq))); -- __set_bit(chn, cpu_evtchn_mask(cpu)); -+ clear_bit(chn, cpu_evtchn_mask(cpu_from_irq(irq))); -+ set_bit(chn, cpu_evtchn_mask(cpu)); - - irq_info[irq].cpu = cpu; - } - - static void init_evtchn_cpu_bindings(void) - { -+ int i; - #ifdef CONFIG_SMP - struct irq_desc *desc; -- int i; - - /* By default all event channels notify CPU#0. */ - for_each_irq_desc(i, desc) { -@@ -255,7 +290,9 @@ static void init_evtchn_cpu_bindings(void) - } - #endif - -- memset(cpu_evtchn_mask(0), ~0, sizeof(struct cpu_evtchn_s)); -+ for_each_possible_cpu(i) -+ memset(cpu_evtchn_mask(i), -+ (i == 0) ? ~0 : 0, sizeof(struct cpu_evtchn_s)); - } - - static inline void clear_evtchn(int port) -@@ -300,6 +337,14 @@ static void mask_evtchn(int port) - sync_set_bit(port, &s->evtchn_mask[0]); - } - -+static void mask_irq(unsigned int irq) -+{ -+ int evtchn = evtchn_from_irq(irq); -+ -+ if (VALID_EVTCHN(evtchn)) -+ mask_evtchn(evtchn); -+} -+ - static void unmask_evtchn(int port) - { - struct shared_info *s = HYPERVISOR_shared_info; -@@ -330,26 +375,370 @@ static void unmask_evtchn(int port) - put_cpu(); - } - -+static void unmask_irq(unsigned int irq) -+{ -+ int evtchn = evtchn_from_irq(irq); -+ -+ if (VALID_EVTCHN(evtchn)) -+ unmask_evtchn(evtchn); -+} -+ -+static int get_nr_hw_irqs(void) -+{ -+ int ret = 1; -+ -+#ifdef CONFIG_X86_IO_APIC -+ ret = get_nr_irqs_gsi(); -+#endif -+ -+ return ret; -+} -+ - static int find_unbound_irq(void) - { - int irq; - struct irq_desc *desc; -+ int start = get_nr_hw_irqs(); - -- for (irq = 0; irq < nr_irqs; irq++) -+ if (start == nr_irqs) -+ goto no_irqs; -+ -+ /* nr_irqs is a magic value. Must not use it.*/ -+ for (irq = nr_irqs-1; irq > start; irq--) { -+ desc = irq_to_desc(irq); -+ /* only 0->15 have init'd desc; handle irq > 16 */ -+ if (desc == NULL) -+ break; -+ if (desc->chip == &no_irq_chip) -+ break; -+ if (desc->chip != &xen_dynamic_chip) -+ continue; - if (irq_info[irq].type == IRQT_UNBOUND) - break; -+ } - -- if (irq == nr_irqs) -- panic("No available IRQ to bind to: increase nr_irqs!\n"); -+ if (irq == start) -+ goto no_irqs; - -- desc = irq_to_desc_alloc_node(irq, 0); -+ desc = irq_to_desc_alloc_node(irq, -1); - if (WARN_ON(desc == NULL)) - return -1; - -- dynamic_irq_init(irq); -+ dynamic_irq_init_keep_chip_data(irq); -+ -+ return irq; -+ -+no_irqs: -+ panic("No available IRQ to bind to: increase nr_irqs!\n"); -+} -+ -+static bool identity_mapped_irq(unsigned irq) -+{ -+ /* identity map all the hardware irqs */ -+ return irq < get_nr_hw_irqs(); -+} -+ -+static void pirq_eoi(unsigned int irq) -+{ -+ struct irq_info *info = info_for_irq(irq); -+ struct physdev_eoi eoi = { .irq = info->u.pirq.gsi }; -+ bool need_eoi; -+ -+ need_eoi = pirq_needs_eoi(irq); -+ -+ if (!need_eoi || !pirq_eoi_does_unmask) -+ unmask_evtchn(info->evtchn); -+ -+ if (need_eoi) { -+ int rc = HYPERVISOR_physdev_op(PHYSDEVOP_eoi, &eoi); -+ WARN_ON(rc); -+ } -+} -+ -+static void pirq_query_unmask(int irq) -+{ -+ struct physdev_irq_status_query irq_status; -+ struct irq_info *info = info_for_irq(irq); -+ -+ if (pirq_eoi_does_unmask) -+ return; -+ -+ BUG_ON(info->type != IRQT_PIRQ); -+ -+ irq_status.irq = info->u.pirq.gsi; -+ if (HYPERVISOR_physdev_op(PHYSDEVOP_irq_status_query, &irq_status)) -+ irq_status.flags = 0; -+ -+ clear_bit(info->u.pirq.gsi, pirq_needs_eoi_bits); -+ if (irq_status.flags & XENIRQSTAT_needs_eoi) -+ set_bit(info->u.pirq.gsi, pirq_needs_eoi_bits); -+} -+ -+static bool probing_irq(int irq) -+{ -+ struct irq_desc *desc = irq_to_desc(irq); -+ -+ return desc && desc->action == NULL; -+} -+ -+static unsigned int startup_pirq(unsigned int irq) -+{ -+ struct evtchn_bind_pirq bind_pirq; -+ struct irq_info *info = info_for_irq(irq); -+ int evtchn = evtchn_from_irq(irq); -+ int rc; -+ -+ BUG_ON(info->type != IRQT_PIRQ); -+ -+ if (VALID_EVTCHN(evtchn)) -+ goto out; -+ -+ bind_pirq.pirq = info->u.pirq.gsi; -+ /* NB. We are happy to share unless we are probing. */ -+ bind_pirq.flags = info->u.pirq.flags & PIRQ_SHAREABLE ? -+ BIND_PIRQ__WILL_SHARE : 0; -+ rc = HYPERVISOR_event_channel_op(EVTCHNOP_bind_pirq, &bind_pirq); -+ if (rc != 0) { -+ if (!probing_irq(irq)) -+ printk(KERN_INFO "Failed to obtain physical IRQ %d" \ -+ " (GSI:%d)\n", irq, info->u.pirq.gsi); -+ return 0; -+ } -+ evtchn = bind_pirq.port; -+ -+ pirq_query_unmask(irq); -+ -+ evtchn_to_irq[evtchn] = irq; -+ bind_evtchn_to_cpu(evtchn, 0); -+ info->evtchn = evtchn; -+ -+ out: -+ pirq_eoi(irq); -+ -+ return 0; -+} -+ -+static void shutdown_pirq(unsigned int irq) -+{ -+ struct evtchn_close close; -+ struct irq_info *info = info_for_irq(irq); -+ int evtchn = evtchn_from_irq(irq); -+ -+ BUG_ON(info->type != IRQT_PIRQ); -+ -+ if (!VALID_EVTCHN(evtchn)) -+ return; -+ -+ mask_evtchn(evtchn); -+ -+ close.port = evtchn; -+ if (HYPERVISOR_event_channel_op(EVTCHNOP_close, &close) != 0) -+ BUG(); -+ -+ bind_evtchn_to_cpu(evtchn, 0); -+ evtchn_to_irq[evtchn] = -1; -+ info->evtchn = 0; -+} -+ -+static void ack_pirq(unsigned int irq) -+{ -+ move_masked_irq(irq); -+ -+ pirq_eoi(irq); -+} -+ -+static void end_pirq(unsigned int irq) -+{ -+ int evtchn = evtchn_from_irq(irq); -+ struct irq_desc *desc = irq_to_desc(irq); -+ -+ if (WARN_ON(!desc)) -+ return; -+ -+ if ((desc->status & (IRQ_DISABLED|IRQ_PENDING)) == -+ (IRQ_DISABLED|IRQ_PENDING)) { -+ shutdown_pirq(irq); -+ } else if (VALID_EVTCHN(evtchn)) { -+ pirq_eoi(irq); -+ } -+} -+ -+static int find_irq_by_gsi(unsigned gsi) -+{ -+ int irq; -+ -+ for (irq = 0; irq < nr_irqs; irq++) { -+ struct irq_info *info = info_for_irq(irq); -+ -+ if (info == NULL || info->type != IRQT_PIRQ) -+ continue; -+ -+ if (gsi_from_irq(irq) == gsi) -+ return irq; -+ } -+ -+ return -1; -+} -+ -+/* -+ * Allocate a physical irq, along with a vector. We don't assign an -+ * event channel until the irq actually started up. Return an -+ * existing irq if we've already got one for the gsi. -+ */ -+int xen_allocate_pirq(unsigned gsi, int shareable, char *name) -+{ -+ int irq; -+ struct physdev_irq irq_op; -+ -+ spin_lock(&irq_mapping_update_lock); -+ -+ irq = find_irq_by_gsi(gsi); -+ if (irq != -1) { -+ printk(KERN_INFO "xen_allocate_pirq: returning irq %d for gsi %u\n", -+ irq, gsi); -+ goto out; /* XXX need refcount? */ -+ } -+ -+ /* If we are a PV guest, we don't have GSIs (no ACPI passed). Therefore -+ * we are using the !xen_initial_domain() to drop in the function.*/ -+ if (identity_mapped_irq(gsi) || !xen_initial_domain()) { -+ irq = gsi; -+ irq_to_desc_alloc_node(irq, 0); -+ dynamic_irq_init(irq); -+ } else -+ irq = find_unbound_irq(); -+ -+ set_irq_chip_and_handler_name(irq, &xen_pirq_chip, -+ handle_fasteoi_irq, name); -+ -+ irq_op.irq = gsi; -+ irq_op.vector = 0; -+ -+ /* Only the privileged domain can do this. For non-priv, the pcifront -+ * driver provides a PCI bus that does the call to do exactly -+ * this in the priv domain. */ -+ if (xen_initial_domain() && -+ HYPERVISOR_physdev_op(PHYSDEVOP_alloc_irq_vector, &irq_op)) { -+ dynamic_irq_cleanup(irq); -+ irq = -ENOSPC; -+ goto out; -+ } -+ -+ irq_info[irq] = mk_pirq_info(0, gsi, irq_op.vector); -+ irq_info[irq].u.pirq.flags |= shareable ? PIRQ_SHAREABLE : 0; -+ -+out: -+ spin_unlock(&irq_mapping_update_lock); -+ -+ return irq; -+} -+ -+#ifdef CONFIG_PCI_MSI -+int xen_destroy_irq(int irq) -+{ -+ struct irq_desc *desc; -+ struct physdev_unmap_pirq unmap_irq; -+ struct irq_info *info = info_for_irq(irq); -+ int rc = -ENOENT; -+ -+ spin_lock(&irq_mapping_update_lock); -+ -+ desc = irq_to_desc(irq); -+ if (!desc) -+ goto out; -+ -+ if (xen_initial_domain()) { -+ unmap_irq.pirq = info->u.pirq.gsi; -+ unmap_irq.domid = info->u.pirq.domid; -+ rc = HYPERVISOR_physdev_op(PHYSDEVOP_unmap_pirq, &unmap_irq); -+ if (rc) { -+ printk(KERN_WARNING "unmap irq failed %d\n", rc); -+ goto out; -+ } -+ } -+ irq_info[irq] = mk_unbound_info(); -+ -+ dynamic_irq_cleanup(irq); -+ -+out: -+ spin_unlock(&irq_mapping_update_lock); -+ return rc; -+} -+ -+#ifdef CONFIG_PCI_XEN -+int xen_create_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, int type) -+{ -+ int irq = 0; -+ struct physdev_map_pirq map_irq; -+ int rc; -+ domid_t domid; -+ int pos; -+ u32 table_offset, bir; -+ -+ domid = rc = xen_find_device_domain_owner(dev); -+ if (rc < 0) -+ domid = DOMID_SELF; -+ -+ memset(&map_irq, 0, sizeof(map_irq)); -+ map_irq.domid = domid; -+ map_irq.type = MAP_PIRQ_TYPE_MSI; -+ map_irq.index = -1; -+ map_irq.pirq = -1; -+ map_irq.bus = dev->bus->number; -+ map_irq.devfn = dev->devfn; -+ -+ if (type == PCI_CAP_ID_MSIX) { -+ pos = pci_find_capability(dev, PCI_CAP_ID_MSIX); -+ -+ pci_read_config_dword(dev, msix_table_offset_reg(pos), -+ &table_offset); -+ bir = (u8)(table_offset & PCI_MSIX_FLAGS_BIRMASK); -+ -+ map_irq.table_base = pci_resource_start(dev, bir); -+ map_irq.entry_nr = msidesc->msi_attrib.entry_nr; -+ } -+ -+ spin_lock(&irq_mapping_update_lock); -+ -+ irq = find_unbound_irq(); -+ -+ if (irq == -1) -+ goto out; -+ -+ rc = HYPERVISOR_physdev_op(PHYSDEVOP_map_pirq, &map_irq); -+ if (rc) { -+ printk(KERN_WARNING "xen map irq failed %d\n", rc); -+ -+ dynamic_irq_cleanup(irq); -+ -+ irq = -1; -+ goto out; -+ } -+ irq_info[irq] = mk_pirq_info(0, map_irq.pirq, map_irq.index); -+ if (domid) -+ irq_info[irq].u.pirq.domid = domid; -+ -+ set_irq_chip_and_handler_name(irq, &xen_pirq_chip, -+ handle_fasteoi_irq, -+ (type == PCI_CAP_ID_MSIX) ? "msi-x":"msi"); - -+out: -+ spin_unlock(&irq_mapping_update_lock); - return irq; - } -+#endif -+#endif -+ -+int xen_vector_from_irq(unsigned irq) -+{ -+ return vector_from_irq(irq); -+} -+ -+int xen_gsi_from_irq(unsigned irq) -+{ -+ return gsi_from_irq(irq); -+} -+EXPORT_SYMBOL_GPL(xen_gsi_from_irq); - - int bind_evtchn_to_irq(unsigned int evtchn) - { -@@ -363,7 +752,7 @@ int bind_evtchn_to_irq(unsigned int evtchn) - irq = find_unbound_irq(); - - set_irq_chip_and_handler_name(irq, &xen_dynamic_chip, -- handle_edge_irq, "event"); -+ handle_fasteoi_irq, "event"); - - evtchn_to_irq[evtchn] = irq; - irq_info[irq] = mk_evtchn_info(evtchn); -@@ -410,8 +799,23 @@ static int bind_ipi_to_irq(unsigned int ipi, unsigned int cpu) - return irq; - } - -+static int bind_interdomain_evtchn_to_irq(unsigned int remote_domain, -+ unsigned int remote_port) -+{ -+ struct evtchn_bind_interdomain bind_interdomain; -+ int err; -+ -+ bind_interdomain.remote_dom = remote_domain; -+ bind_interdomain.remote_port = remote_port; -+ -+ err = HYPERVISOR_event_channel_op(EVTCHNOP_bind_interdomain, -+ &bind_interdomain); - --static int bind_virq_to_irq(unsigned int virq, unsigned int cpu) -+ return err ? : bind_evtchn_to_irq(bind_interdomain.local_port); -+} -+ -+ -+int bind_virq_to_irq(unsigned int virq, unsigned int cpu) - { - struct evtchn_bind_virq bind_virq; - int evtchn, irq; -@@ -421,6 +825,11 @@ static int bind_virq_to_irq(unsigned int virq, unsigned int cpu) - irq = per_cpu(virq_to_irq, cpu)[virq]; - - if (irq == -1) { -+ irq = find_unbound_irq(); -+ -+ set_irq_chip_and_handler_name(irq, &xen_percpu_chip, -+ handle_percpu_irq, "virq"); -+ - bind_virq.virq = virq; - bind_virq.vcpu = cpu; - if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_virq, -@@ -428,11 +837,6 @@ static int bind_virq_to_irq(unsigned int virq, unsigned int cpu) - BUG(); - evtchn = bind_virq.port; - -- irq = find_unbound_irq(); -- -- set_irq_chip_and_handler_name(irq, &xen_percpu_chip, -- handle_percpu_irq, "virq"); -- - evtchn_to_irq[evtchn] = irq; - irq_info[irq] = mk_virq_info(evtchn, virq); - -@@ -505,6 +909,29 @@ int bind_evtchn_to_irqhandler(unsigned int evtchn, - } - EXPORT_SYMBOL_GPL(bind_evtchn_to_irqhandler); - -+int bind_interdomain_evtchn_to_irqhandler(unsigned int remote_domain, -+ unsigned int remote_port, -+ irq_handler_t handler, -+ unsigned long irqflags, -+ const char *devname, -+ void *dev_id) -+{ -+ int irq, retval; -+ -+ irq = bind_interdomain_evtchn_to_irq(remote_domain, remote_port); -+ if (irq < 0) -+ return irq; -+ -+ retval = request_irq(irq, handler, irqflags, devname, dev_id); -+ if (retval != 0) { -+ unbind_from_irq(irq); -+ return retval; -+ } -+ -+ return irq; -+} -+EXPORT_SYMBOL_GPL(bind_interdomain_evtchn_to_irqhandler); -+ - int bind_virq_to_irqhandler(unsigned int virq, unsigned int cpu, - irq_handler_t handler, - unsigned long irqflags, const char *devname, void *dev_id) -@@ -564,41 +991,75 @@ irqreturn_t xen_debug_interrupt(int irq, void *dev_id) - { - struct shared_info *sh = HYPERVISOR_shared_info; - int cpu = smp_processor_id(); -+ unsigned long *cpu_evtchn = cpu_evtchn_mask(cpu); - int i; - unsigned long flags; - static DEFINE_SPINLOCK(debug_lock); -+ struct vcpu_info *v; - - spin_lock_irqsave(&debug_lock, flags); - -- printk("vcpu %d\n ", cpu); -+ printk("\nvcpu %d\n ", cpu); - - for_each_online_cpu(i) { -- struct vcpu_info *v = per_cpu(xen_vcpu, i); -- printk("%d: masked=%d pending=%d event_sel %08lx\n ", i, -- (get_irq_regs() && i == cpu) ? xen_irqs_disabled(get_irq_regs()) : v->evtchn_upcall_mask, -- v->evtchn_upcall_pending, -- v->evtchn_pending_sel); -- } -- printk("pending:\n "); -- for(i = ARRAY_SIZE(sh->evtchn_pending)-1; i >= 0; i--) -- printk("%08lx%s", sh->evtchn_pending[i], -- i % 8 == 0 ? "\n " : " "); -- printk("\nmasks:\n "); -- for(i = ARRAY_SIZE(sh->evtchn_mask)-1; i >= 0; i--) -- printk("%08lx%s", sh->evtchn_mask[i], -- i % 8 == 0 ? "\n " : " "); -- -- printk("\nunmasked:\n "); -- for(i = ARRAY_SIZE(sh->evtchn_mask)-1; i >= 0; i--) -- printk("%08lx%s", sh->evtchn_pending[i] & ~sh->evtchn_mask[i], -- i % 8 == 0 ? "\n " : " "); -+ int pending; -+ v = per_cpu(xen_vcpu, i); -+ pending = (get_irq_regs() && i == cpu) -+ ? xen_irqs_disabled(get_irq_regs()) -+ : v->evtchn_upcall_mask; -+ printk("%d: masked=%d pending=%d event_sel %0*lx\n ", i, -+ pending, v->evtchn_upcall_pending, -+ (int)(sizeof(v->evtchn_pending_sel)*2), -+ v->evtchn_pending_sel); -+ } -+ v = per_cpu(xen_vcpu, cpu); -+ -+ printk("\npending:\n "); -+ for (i = ARRAY_SIZE(sh->evtchn_pending)-1; i >= 0; i--) -+ printk("%0*lx%s", (int)sizeof(sh->evtchn_pending[0])*2, -+ sh->evtchn_pending[i], -+ i % 8 == 0 ? "\n " : " "); -+ printk("\nglobal mask:\n "); -+ for (i = ARRAY_SIZE(sh->evtchn_mask)-1; i >= 0; i--) -+ printk("%0*lx%s", -+ (int)(sizeof(sh->evtchn_mask[0])*2), -+ sh->evtchn_mask[i], -+ i % 8 == 0 ? "\n " : " "); -+ -+ printk("\nglobally unmasked:\n "); -+ for (i = ARRAY_SIZE(sh->evtchn_mask)-1; i >= 0; i--) -+ printk("%0*lx%s", (int)(sizeof(sh->evtchn_mask[0])*2), -+ sh->evtchn_pending[i] & ~sh->evtchn_mask[i], -+ i % 8 == 0 ? "\n " : " "); -+ -+ printk("\nlocal cpu%d mask:\n ", cpu); -+ for (i = (NR_EVENT_CHANNELS/BITS_PER_LONG)-1; i >= 0; i--) -+ printk("%0*lx%s", (int)(sizeof(cpu_evtchn[0])*2), -+ cpu_evtchn[i], -+ i % 8 == 0 ? "\n " : " "); -+ -+ printk("\nlocally unmasked:\n "); -+ for (i = ARRAY_SIZE(sh->evtchn_mask)-1; i >= 0; i--) { -+ unsigned long pending = sh->evtchn_pending[i] -+ & ~sh->evtchn_mask[i] -+ & cpu_evtchn[i]; -+ printk("%0*lx%s", (int)(sizeof(sh->evtchn_mask[0])*2), -+ pending, i % 8 == 0 ? "\n " : " "); -+ } - - printk("\npending list:\n"); -- for(i = 0; i < NR_EVENT_CHANNELS; i++) { -+ for (i = 0; i < NR_EVENT_CHANNELS; i++) { - if (sync_test_bit(i, sh->evtchn_pending)) { -- printk(" %d: event %d -> irq %d\n", -+ int word_idx = i / BITS_PER_LONG; -+ printk(" %d: event %d -> irq %d%s%s%s\n", - cpu_from_evtchn(i), i, -- evtchn_to_irq[i]); -+ evtchn_to_irq[i], -+ sync_test_bit(word_idx, &v->evtchn_pending_sel) -+ ? "" : " l2-clear", -+ !sync_test_bit(i, sh->evtchn_mask) -+ ? "" : " globally-masked", -+ sync_test_bit(i, cpu_evtchn) -+ ? "" : " locally-masked"); - } - } - -@@ -618,17 +1079,13 @@ static DEFINE_PER_CPU(unsigned, xed_nesting_count); - * a bitset of words which contain pending event bits. The second - * level is a bitset of pending events themselves. - */ --void xen_evtchn_do_upcall(struct pt_regs *regs) -+static void __xen_evtchn_do_upcall(struct pt_regs *regs) - { - int cpu = get_cpu(); -- struct pt_regs *old_regs = set_irq_regs(regs); - struct shared_info *s = HYPERVISOR_shared_info; - struct vcpu_info *vcpu_info = __get_cpu_var(xen_vcpu); - unsigned count; - -- exit_idle(); -- irq_enter(); -- - do { - unsigned long pending_words; - -@@ -651,9 +1108,16 @@ void xen_evtchn_do_upcall(struct pt_regs *regs) - int bit_idx = __ffs(pending_bits); - int port = (word_idx * BITS_PER_LONG) + bit_idx; - int irq = evtchn_to_irq[port]; -+ struct irq_desc *desc; - -- if (irq != -1) -- handle_irq(irq, regs); -+ mask_evtchn(port); -+ clear_evtchn(port); -+ -+ if (irq != -1) { -+ desc = irq_to_desc(irq); -+ if (desc) -+ generic_handle_irq_desc(irq, desc); -+ } - } - } - -@@ -661,14 +1125,32 @@ void xen_evtchn_do_upcall(struct pt_regs *regs) - - count = __get_cpu_var(xed_nesting_count); - __get_cpu_var(xed_nesting_count) = 0; -- } while(count != 1); -+ } while (count != 1 || vcpu_info->evtchn_upcall_pending); - - out: -+ -+ put_cpu(); -+} -+ -+void xen_evtchn_do_upcall(struct pt_regs *regs) -+{ -+ struct pt_regs *old_regs = set_irq_regs(regs); -+ -+ exit_idle(); -+ irq_enter(); -+ -+ __xen_evtchn_do_upcall(regs); -+ - irq_exit(); - set_irq_regs(old_regs); -+} - -- put_cpu(); -+void xen_hvm_evtchn_do_upcall(void) -+{ -+ struct pt_regs *regs = get_irq_regs(); -+ __xen_evtchn_do_upcall(regs); - } -+EXPORT_SYMBOL_GPL(xen_hvm_evtchn_do_upcall); - - /* Rebind a new event channel to an existing irq. */ - void rebind_evtchn_irq(int evtchn, int irq) -@@ -705,7 +1187,10 @@ static int rebind_irq_to_cpu(unsigned irq, unsigned tcpu) - struct evtchn_bind_vcpu bind_vcpu; - int evtchn = evtchn_from_irq(irq); - -- if (!VALID_EVTCHN(evtchn)) -+ /* events delivered via platform PCI interrupts are always -+ * routed to vcpu 0 */ -+ if (!VALID_EVTCHN(evtchn) || -+ (xen_hvm_domain() && !xen_have_vector_callback)) - return -1; - - /* Send future instances of this interrupt to other vcpu. */ -@@ -746,33 +1231,18 @@ int resend_irq_on_evtchn(unsigned int irq) - return 1; - } - --static void enable_dynirq(unsigned int irq) --{ -- int evtchn = evtchn_from_irq(irq); -- -- if (VALID_EVTCHN(evtchn)) -- unmask_evtchn(evtchn); --} -- --static void disable_dynirq(unsigned int irq) --{ -- int evtchn = evtchn_from_irq(irq); -- -- if (VALID_EVTCHN(evtchn)) -- mask_evtchn(evtchn); --} -- - static void ack_dynirq(unsigned int irq) - { - int evtchn = evtchn_from_irq(irq); -+ struct irq_desc *desc = irq_to_desc(irq); - -- move_native_irq(irq); -+ move_masked_irq(irq); - -- if (VALID_EVTCHN(evtchn)) -- clear_evtchn(evtchn); -+ if (VALID_EVTCHN(evtchn) && !(desc->status & IRQ_DISABLED)) -+ unmask_evtchn(evtchn); - } - --static int retrigger_dynirq(unsigned int irq) -+static int retrigger_irq(unsigned int irq) - { - int evtchn = evtchn_from_irq(irq); - struct shared_info *sh = HYPERVISOR_shared_info; -@@ -814,9 +1284,6 @@ static void restore_cpu_virqs(unsigned int cpu) - evtchn_to_irq[evtchn] = irq; - irq_info[irq] = mk_virq_info(evtchn, virq); - bind_evtchn_to_cpu(evtchn, cpu); -- -- /* Ready for use. */ -- unmask_evtchn(evtchn); - } - } - -@@ -842,10 +1309,6 @@ static void restore_cpu_ipis(unsigned int cpu) - evtchn_to_irq[evtchn] = irq; - irq_info[irq] = mk_ipi_info(evtchn, ipi); - bind_evtchn_to_cpu(evtchn, cpu); -- -- /* Ready for use. */ -- unmask_evtchn(evtchn); -- - } - } - -@@ -857,7 +1320,7 @@ void xen_clear_irq_pending(int irq) - if (VALID_EVTCHN(evtchn)) - clear_evtchn(evtchn); - } -- -+EXPORT_SYMBOL(xen_clear_irq_pending); - void xen_set_irq_pending(int irq) - { - int evtchn = evtchn_from_irq(irq); -@@ -877,9 +1340,9 @@ bool xen_test_irq_pending(int irq) - return ret; - } - --/* Poll waiting for an irq to become pending. In the usual case, the -+/* Poll waiting for an irq to become pending with timeout. In the usual case, the - irq will be disabled so it won't deliver an interrupt. */ --void xen_poll_irq(int irq) -+void xen_poll_irq_timeout(int irq, u64 timeout) - { - evtchn_port_t evtchn = evtchn_from_irq(irq); - -@@ -887,17 +1350,38 @@ void xen_poll_irq(int irq) - struct sched_poll poll; - - poll.nr_ports = 1; -- poll.timeout = 0; -+ poll.timeout = timeout; - set_xen_guest_handle(poll.ports, &evtchn); - - if (HYPERVISOR_sched_op(SCHEDOP_poll, &poll) != 0) - BUG(); - } - } -+EXPORT_SYMBOL(xen_poll_irq_timeout); -+/* Poll waiting for an irq to become pending. In the usual case, the -+ irq will be disabled so it won't deliver an interrupt. */ -+void xen_poll_irq(int irq) -+{ -+ xen_poll_irq_timeout(irq, 0 /* no timeout */); -+} -+ -+/* Check whether the IRQ line is shared with other guests. */ -+int xen_ignore_irq(int irq) -+{ -+ struct irq_info *info = info_for_irq(irq); -+ struct physdev_irq_status_query irq_status = { .irq = -+ info->u.pirq.gsi }; -+ -+ if (HYPERVISOR_physdev_op(PHYSDEVOP_irq_status_query, &irq_status)) -+ return 0; -+ return !(irq_status.flags & XENIRQSTAT_shared); -+} -+EXPORT_SYMBOL_GPL(xen_ignore_irq); - - void xen_irq_resume(void) - { - unsigned int cpu, irq, evtchn; -+ struct irq_desc *desc; - - init_evtchn_cpu_bindings(); - -@@ -916,37 +1400,134 @@ void xen_irq_resume(void) - restore_cpu_virqs(cpu); - restore_cpu_ipis(cpu); - } -+ -+ /* -+ * Unmask any IRQF_NO_SUSPEND IRQs which are enabled. These -+ * are not handled by the IRQ core. -+ */ -+ for_each_irq_desc(irq, desc) { -+ if (!desc->action || !(desc->action->flags & IRQF_NO_SUSPEND)) -+ continue; -+ if (desc->status & IRQ_DISABLED) -+ continue; -+ -+ evtchn = evtchn_from_irq(irq); -+ if (evtchn == -1) -+ continue; -+ -+ unmask_evtchn(evtchn); -+ } -+ -+ if (pirq_eoi_does_unmask) { -+ struct physdev_pirq_eoi_gmfn eoi_gmfn; -+ -+ eoi_gmfn.gmfn = virt_to_mfn(pirq_needs_eoi_bits); -+ if (HYPERVISOR_physdev_op(PHYSDEVOP_pirq_eoi_gmfn, &eoi_gmfn) != 0) { -+ /* Could recover by reverting to old method...? */ -+ BUG(); -+ } -+ } - } - - static struct irq_chip xen_dynamic_chip __read_mostly = { - .name = "xen-dyn", - -- .disable = disable_dynirq, -- .mask = disable_dynirq, -- .unmask = enable_dynirq, -+ .disable = mask_irq, -+ .mask = mask_irq, -+ .unmask = unmask_irq, - -- .ack = ack_dynirq, -+ .eoi = ack_dynirq, - .set_affinity = set_affinity_irq, -- .retrigger = retrigger_dynirq, -+ .retrigger = retrigger_irq, - }; - - static struct irq_chip xen_percpu_chip __read_mostly = { - .name = "xen-percpu", - -- .disable = disable_dynirq, -- .mask = disable_dynirq, -- .unmask = enable_dynirq, -+ .disable = mask_irq, -+ .mask = mask_irq, -+ .unmask = unmask_irq, - - .ack = ack_dynirq, - }; - -+static struct irq_chip xen_pirq_chip __read_mostly = { -+ .name = "xen-pirq", -+ -+ .startup = startup_pirq, -+ .shutdown = shutdown_pirq, -+ -+ .enable = pirq_eoi, -+ .unmask = unmask_irq, -+ -+ .disable = mask_irq, -+ .mask = mask_irq, -+ -+ .eoi = ack_pirq, -+ .end = end_pirq, -+ -+ .set_affinity = set_affinity_irq, -+ -+ .retrigger = retrigger_irq, -+}; -+ -+int xen_set_callback_via(uint64_t via) -+{ -+ struct xen_hvm_param a; -+ a.domid = DOMID_SELF; -+ a.index = HVM_PARAM_CALLBACK_IRQ; -+ a.value = via; -+ return HYPERVISOR_hvm_op(HVMOP_set_param, &a); -+} -+EXPORT_SYMBOL_GPL(xen_set_callback_via); -+ -+#ifdef CONFIG_XEN_PVHVM -+/* Vector callbacks are better than PCI interrupts to receive event -+ * channel notifications because we can receive vector callbacks on any -+ * vcpu and we don't need PCI support or APIC interactions. */ -+void xen_callback_vector(void) -+{ -+ int rc; -+ uint64_t callback_via; -+ if (xen_have_vector_callback) { -+ callback_via = HVM_CALLBACK_VECTOR(XEN_HVM_EVTCHN_CALLBACK); -+ rc = xen_set_callback_via(callback_via); -+ if (rc) { -+ printk(KERN_ERR "Request for Xen HVM callback vector" -+ " failed.\n"); -+ xen_have_vector_callback = 0; -+ return; -+ } -+ printk(KERN_INFO "Xen HVM callback vector for event delivery is " -+ "enabled\n"); -+ alloc_intr_gate(XEN_HVM_EVTCHN_CALLBACK, xen_hvm_callback_vector); -+ } -+} -+#else -+void xen_callback_vector(void) {} -+#endif -+ - void __init xen_init_IRQ(void) - { - int i; -+ struct physdev_pirq_eoi_gmfn eoi_gmfn; -+ int nr_pirqs = NR_IRQS; - - cpu_evtchn_mask_p = kcalloc(nr_cpu_ids, sizeof(struct cpu_evtchn_s), - GFP_KERNEL); -- BUG_ON(cpu_evtchn_mask_p == NULL); -+ irq_info = kcalloc(nr_irqs, sizeof(*irq_info), GFP_KERNEL); -+ -+ evtchn_to_irq = kcalloc(NR_EVENT_CHANNELS, sizeof(*evtchn_to_irq), -+ GFP_KERNEL); -+ for(i = 0; i < NR_EVENT_CHANNELS; i++) -+ evtchn_to_irq[i] = -1; -+ -+ i = get_order(sizeof(unsigned long) * BITS_TO_LONGS(nr_pirqs)); -+ pirq_needs_eoi_bits = (void *)__get_free_pages(GFP_KERNEL|__GFP_ZERO, i); -+ -+ eoi_gmfn.gmfn = virt_to_mfn(pirq_needs_eoi_bits); -+ if (HYPERVISOR_physdev_op(PHYSDEVOP_pirq_eoi_gmfn, &eoi_gmfn) == 0) -+ pirq_eoi_does_unmask = true; - - init_evtchn_cpu_bindings(); - -@@ -954,5 +1535,11 @@ void __init xen_init_IRQ(void) - for (i = 0; i < NR_EVENT_CHANNELS; i++) - mask_evtchn(i); - -- irq_ctx_init(smp_processor_id()); -+ if (xen_hvm_domain()) { -+ xen_callback_vector(); -+ native_init_IRQ(); -+ } else { -+ irq_ctx_init(smp_processor_id()); -+ xen_setup_pirqs(); -+ } - } -diff --git a/drivers/xen/evtchn.c b/drivers/xen/evtchn.c -index 79bedba..b82666a 100644 ---- a/drivers/xen/evtchn.c -+++ b/drivers/xen/evtchn.c -@@ -48,6 +48,8 @@ - #include <linux/gfp.h> - #include <linux/mutex.h> - #include <linux/cpu.h> -+ -+#include <xen/xen.h> - #include <xen/events.h> - #include <xen/evtchn.h> - #include <asm/xen/hypervisor.h> -@@ -68,10 +70,36 @@ struct per_user_data { - const char *name; - }; - --/* Who's bound to each port? */ --static struct per_user_data *port_user[NR_EVENT_CHANNELS]; -+/* -+ * Who's bound to each port? This is logically an array of struct -+ * per_user_data *, but we encode the current enabled-state in bit 0. -+ */ -+static unsigned long *port_user; - static DEFINE_SPINLOCK(port_user_lock); /* protects port_user[] and ring_prod */ - -+static inline struct per_user_data *get_port_user(unsigned port) -+{ -+ return (struct per_user_data *)(port_user[port] & ~1); -+} -+ -+static inline void set_port_user(unsigned port, struct per_user_data *u) -+{ -+ port_user[port] = (unsigned long)u; -+} -+ -+static inline bool get_port_enabled(unsigned port) -+{ -+ return port_user[port] & 1; -+} -+ -+static inline void set_port_enabled(unsigned port, bool enabled) -+{ -+ if (enabled) -+ port_user[port] |= 1; -+ else -+ port_user[port] &= ~1; -+} -+ - irqreturn_t evtchn_interrupt(int irq, void *data) - { - unsigned int port = (unsigned long)data; -@@ -79,9 +107,14 @@ irqreturn_t evtchn_interrupt(int irq, void *data) - - spin_lock(&port_user_lock); - -- u = port_user[port]; -+ u = get_port_user(port); -+ -+ WARN(!get_port_enabled(port), -+ "Interrupt for port %d, but apparently not enabled; per-user %p\n", -+ port, u); - - disable_irq_nosync(irq); -+ set_port_enabled(port, false); - - if ((u->ring_prod - u->ring_cons) < EVTCHN_RING_SIZE) { - u->ring[EVTCHN_RING_MASK(u->ring_prod)] = port; -@@ -91,9 +124,8 @@ irqreturn_t evtchn_interrupt(int irq, void *data) - kill_fasync(&u->evtchn_async_queue, - SIGIO, POLL_IN); - } -- } else { -+ } else - u->ring_overflow = 1; -- } - - spin_unlock(&port_user_lock); - -@@ -197,9 +229,18 @@ static ssize_t evtchn_write(struct file *file, const char __user *buf, - goto out; - - spin_lock_irq(&port_user_lock); -- for (i = 0; i < (count/sizeof(evtchn_port_t)); i++) -- if ((kbuf[i] < NR_EVENT_CHANNELS) && (port_user[kbuf[i]] == u)) -- enable_irq(irq_from_evtchn(kbuf[i])); -+ -+ for (i = 0; i < (count/sizeof(evtchn_port_t)); i++) { -+ unsigned port = kbuf[i]; -+ -+ if (port < NR_EVENT_CHANNELS && -+ get_port_user(port) == u && -+ !get_port_enabled(port)) { -+ set_port_enabled(port, true); -+ enable_irq(irq_from_evtchn(port)); -+ } -+ } -+ - spin_unlock_irq(&port_user_lock); - - rc = count; -@@ -221,8 +262,9 @@ static int evtchn_bind_to_user(struct per_user_data *u, int port) - * interrupt handler yet, and our caller has already - * serialized bind operations.) - */ -- BUG_ON(port_user[port] != NULL); -- port_user[port] = u; -+ BUG_ON(get_port_user(port) != NULL); -+ set_port_user(port, u); -+ set_port_enabled(port, true); /* start enabled */ - - rc = bind_evtchn_to_irqhandler(port, evtchn_interrupt, IRQF_DISABLED, - u->name, (void *)(unsigned long)port); -@@ -238,10 +280,7 @@ static void evtchn_unbind_from_user(struct per_user_data *u, int port) - - unbind_from_irqhandler(irq, (void *)(unsigned long)port); - -- /* make sure we unbind the irq handler before clearing the port */ -- barrier(); -- -- port_user[port] = NULL; -+ set_port_user(port, NULL); - } - - static long evtchn_ioctl(struct file *file, -@@ -332,15 +371,17 @@ static long evtchn_ioctl(struct file *file, - spin_lock_irq(&port_user_lock); - - rc = -ENOTCONN; -- if (port_user[unbind.port] != u) { -+ if (get_port_user(unbind.port) != u) { - spin_unlock_irq(&port_user_lock); - break; - } - -- evtchn_unbind_from_user(u, unbind.port); -+ disable_irq(irq_from_evtchn(unbind.port)); - - spin_unlock_irq(&port_user_lock); - -+ evtchn_unbind_from_user(u, unbind.port); -+ - rc = 0; - break; - } -@@ -354,7 +395,7 @@ static long evtchn_ioctl(struct file *file, - - if (notify.port >= NR_EVENT_CHANNELS) { - rc = -EINVAL; -- } else if (port_user[notify.port] != u) { -+ } else if (get_port_user(notify.port) != u) { - rc = -ENOTCONN; - } else { - notify_remote_via_evtchn(notify.port); -@@ -443,14 +484,21 @@ static int evtchn_release(struct inode *inode, struct file *filp) - free_page((unsigned long)u->ring); - - for (i = 0; i < NR_EVENT_CHANNELS; i++) { -- if (port_user[i] != u) -+ if (get_port_user(i) != u) - continue; - -- evtchn_unbind_from_user(port_user[i], i); -+ disable_irq(irq_from_evtchn(i)); - } - - spin_unlock_irq(&port_user_lock); - -+ for (i = 0; i < NR_EVENT_CHANNELS; i++) { -+ if (get_port_user(i) != u) -+ continue; -+ -+ evtchn_unbind_from_user(get_port_user(i), i); -+ } -+ - kfree(u->name); - kfree(u); - -@@ -470,7 +518,7 @@ static const struct file_operations evtchn_fops = { - - static struct miscdevice evtchn_miscdev = { - .minor = MISC_DYNAMIC_MINOR, -- .name = "evtchn", -+ .name = "xen/evtchn", - .fops = &evtchn_fops, - }; - static int __init evtchn_init(void) -@@ -480,8 +528,11 @@ static int __init evtchn_init(void) - if (!xen_domain()) - return -ENODEV; - -+ port_user = kcalloc(NR_EVENT_CHANNELS, sizeof(*port_user), GFP_KERNEL); -+ if (port_user == NULL) -+ return -ENOMEM; -+ - spin_lock_init(&port_user_lock); -- memset(port_user, 0, sizeof(port_user)); - - /* Create '/dev/misc/evtchn'. */ - err = misc_register(&evtchn_miscdev); -@@ -497,6 +548,9 @@ static int __init evtchn_init(void) - - static void __exit evtchn_cleanup(void) - { -+ kfree(port_user); -+ port_user = NULL; -+ - misc_deregister(&evtchn_miscdev); - } - -diff --git a/drivers/xen/features.c b/drivers/xen/features.c -index 99eda16..9e2b64f 100644 ---- a/drivers/xen/features.c -+++ b/drivers/xen/features.c -@@ -18,7 +18,7 @@ - u8 xen_features[XENFEAT_NR_SUBMAPS * 32] __read_mostly; - EXPORT_SYMBOL_GPL(xen_features); - --void xen_setup_features(void) -+void __init xen_setup_features(void) - { - struct xen_feature_info fi; - int i, j; -diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c -new file mode 100644 -index 0000000..a33e443 ---- /dev/null -+++ b/drivers/xen/gntdev.c -@@ -0,0 +1,645 @@ -+/****************************************************************************** -+ * gntdev.c -+ * -+ * Device for accessing (in user-space) pages that have been granted by other -+ * domains. -+ * -+ * Copyright (c) 2006-2007, D G Murray. -+ * (c) 2009 Gerd Hoffmann <kraxel@redhat.com> -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, write to the Free Software -+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -+ */ -+ -+#include <linux/module.h> -+#include <linux/kernel.h> -+#include <linux/init.h> -+#include <linux/miscdevice.h> -+#include <linux/fs.h> -+#include <linux/mm.h> -+#include <linux/mman.h> -+#include <linux/mmu_notifier.h> -+#include <linux/types.h> -+#include <linux/uaccess.h> -+#include <linux/sched.h> -+#include <linux/spinlock.h> -+ -+#include <xen/xen.h> -+#include <xen/grant_table.h> -+#include <xen/gntdev.h> -+#include <asm/xen/hypervisor.h> -+#include <asm/xen/hypercall.h> -+#include <asm/xen/page.h> -+ -+MODULE_LICENSE("GPL"); -+MODULE_AUTHOR("Derek G. Murray <Derek.Murray@cl.cam.ac.uk>, " -+ "Gerd Hoffmann <kraxel@redhat.com>"); -+MODULE_DESCRIPTION("User-space granted page access driver"); -+ -+static int debug = 0; -+module_param(debug, int, 0644); -+static int limit = 1024; -+module_param(limit, int, 0644); -+ -+struct gntdev_priv { -+ struct list_head maps; -+ uint32_t used; -+ uint32_t limit; -+ spinlock_t lock; -+ struct mm_struct *mm; -+ struct mmu_notifier mn; -+}; -+ -+struct grant_map { -+ struct list_head next; -+ struct gntdev_priv *priv; -+ struct vm_area_struct *vma; -+ int index; -+ int count; -+ int flags; -+ int is_mapped; -+ struct ioctl_gntdev_grant_ref *grants; -+ struct gnttab_map_grant_ref *map_ops; -+ struct gnttab_unmap_grant_ref *unmap_ops; -+}; -+ -+/* ------------------------------------------------------------------ */ -+ -+static void gntdev_print_maps(struct gntdev_priv *priv, -+ char *text, int text_index) -+{ -+ struct grant_map *map; -+ -+ printk("%s: maps list (priv %p, usage %d/%d)\n", -+ __FUNCTION__, priv, priv->used, priv->limit); -+ list_for_each_entry(map, &priv->maps, next) -+ printk(" index %2d, count %2d %s\n", -+ map->index, map->count, -+ map->index == text_index && text ? text : ""); -+} -+ -+static struct grant_map *gntdev_alloc_map(struct gntdev_priv *priv, int count) -+{ -+ struct grant_map *add; -+ -+ add = kzalloc(sizeof(struct grant_map), GFP_KERNEL); -+ if (NULL == add) -+ return NULL; -+ -+ add->grants = kzalloc(sizeof(add->grants[0]) * count, GFP_KERNEL); -+ add->map_ops = kzalloc(sizeof(add->map_ops[0]) * count, GFP_KERNEL); -+ add->unmap_ops = kzalloc(sizeof(add->unmap_ops[0]) * count, GFP_KERNEL); -+ if (NULL == add->grants || -+ NULL == add->map_ops || -+ NULL == add->unmap_ops) -+ goto err; -+ -+ add->index = 0; -+ add->count = count; -+ add->priv = priv; -+ -+ if (add->count + priv->used > priv->limit) -+ goto err; -+ -+ return add; -+ -+err: -+ kfree(add->grants); -+ kfree(add->map_ops); -+ kfree(add->unmap_ops); -+ kfree(add); -+ return NULL; -+} -+ -+static void gntdev_add_map(struct gntdev_priv *priv, struct grant_map *add) -+{ -+ struct grant_map *map; -+ -+ list_for_each_entry(map, &priv->maps, next) { -+ if (add->index + add->count < map->index) { -+ list_add_tail(&add->next, &map->next); -+ goto done; -+ } -+ add->index = map->index + map->count; -+ } -+ list_add_tail(&add->next, &priv->maps); -+ -+done: -+ priv->used += add->count; -+ if (debug) -+ gntdev_print_maps(priv, "[new]", add->index); -+} -+ -+static struct grant_map *gntdev_find_map_index(struct gntdev_priv *priv, int index, -+ int count) -+{ -+ struct grant_map *map; -+ -+ list_for_each_entry(map, &priv->maps, next) { -+ if (map->index != index) -+ continue; -+ if (map->count != count) -+ continue; -+ return map; -+ } -+ return NULL; -+} -+ -+static struct grant_map *gntdev_find_map_vaddr(struct gntdev_priv *priv, -+ unsigned long vaddr) -+{ -+ struct grant_map *map; -+ -+ list_for_each_entry(map, &priv->maps, next) { -+ if (!map->vma) -+ continue; -+ if (vaddr < map->vma->vm_start) -+ continue; -+ if (vaddr >= map->vma->vm_end) -+ continue; -+ return map; -+ } -+ return NULL; -+} -+ -+static int gntdev_del_map(struct grant_map *map) -+{ -+ int i; -+ -+ if (map->vma) -+ return -EBUSY; -+ for (i = 0; i < map->count; i++) -+ if (map->unmap_ops[i].handle) -+ return -EBUSY; -+ -+ map->priv->used -= map->count; -+ list_del(&map->next); -+ return 0; -+} -+ -+static void gntdev_free_map(struct grant_map *map) -+{ -+ if (!map) -+ return; -+ kfree(map->grants); -+ kfree(map->map_ops); -+ kfree(map->unmap_ops); -+ kfree(map); -+} -+ -+/* ------------------------------------------------------------------ */ -+ -+static int find_grant_ptes(pte_t *pte, pgtable_t token, unsigned long addr, void *data) -+{ -+ struct grant_map *map = data; -+ unsigned int pgnr = (addr - map->vma->vm_start) >> PAGE_SHIFT; -+ u64 pte_maddr; -+ -+ BUG_ON(pgnr >= map->count); -+ pte_maddr = (u64)pfn_to_mfn(page_to_pfn(token)) << PAGE_SHIFT; -+ pte_maddr += (unsigned long)pte & ~PAGE_MASK; -+ gnttab_set_map_op(&map->map_ops[pgnr], pte_maddr, map->flags, -+ map->grants[pgnr].ref, -+ map->grants[pgnr].domid); -+ gnttab_set_unmap_op(&map->unmap_ops[pgnr], pte_maddr, map->flags, -+ 0 /* handle */); -+ return 0; -+} -+ -+static int map_grant_pages(struct grant_map *map) -+{ -+ int i, err = 0; -+ -+ if (debug) -+ printk("%s: map %d+%d\n", __FUNCTION__, map->index, map->count); -+ err = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, -+ map->map_ops, map->count); -+ if (WARN_ON(err)) -+ return err; -+ -+ for (i = 0; i < map->count; i++) { -+ if (map->map_ops[i].status) -+ err = -EINVAL; -+ map->unmap_ops[i].handle = map->map_ops[i].handle; -+ } -+ return err; -+} -+ -+static int unmap_grant_pages(struct grant_map *map, int offset, int pages) -+{ -+ int i, err = 0; -+ -+ if (debug) -+ printk("%s: map %d+%d [%d+%d]\n", __FUNCTION__, -+ map->index, map->count, offset, pages); -+ err = HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, -+ map->unmap_ops + offset, pages); -+ if (WARN_ON(err)) -+ return err; -+ -+ for (i = 0; i < pages; i++) { -+ if (map->unmap_ops[offset+i].status) -+ err = -EINVAL; -+ map->unmap_ops[offset+i].handle = 0; -+ } -+ return err; -+} -+ -+/* ------------------------------------------------------------------ */ -+ -+static void gntdev_vma_close(struct vm_area_struct *vma) -+{ -+ struct grant_map *map = vma->vm_private_data; -+ -+ if (debug) -+ printk("%s\n", __FUNCTION__); -+ map->is_mapped = 0; -+ map->vma = NULL; -+ vma->vm_private_data = NULL; -+} -+ -+static int gntdev_vma_fault(struct vm_area_struct *vma, struct vm_fault *vmf) -+{ -+ if (debug) -+ printk("%s: vaddr %p, pgoff %ld (shouldn't happen)\n", -+ __FUNCTION__, vmf->virtual_address, vmf->pgoff); -+ vmf->flags = VM_FAULT_ERROR; -+ return 0; -+} -+ -+static struct vm_operations_struct gntdev_vmops = { -+ .close = gntdev_vma_close, -+ .fault = gntdev_vma_fault, -+}; -+ -+/* ------------------------------------------------------------------ */ -+ -+static void mn_invl_range_start(struct mmu_notifier *mn, -+ struct mm_struct *mm, -+ unsigned long start, unsigned long end) -+{ -+ struct gntdev_priv *priv = container_of(mn, struct gntdev_priv, mn); -+ struct grant_map *map; -+ unsigned long mstart, mend; -+ int err; -+ -+ spin_lock(&priv->lock); -+ list_for_each_entry(map, &priv->maps, next) { -+ if (!map->vma) -+ continue; -+ if (!map->is_mapped) -+ continue; -+ if (map->vma->vm_start >= end) -+ continue; -+ if (map->vma->vm_end <= start) -+ continue; -+ mstart = max(start, map->vma->vm_start); -+ mend = min(end, map->vma->vm_end); -+ if (debug) -+ printk("%s: map %d+%d (%lx %lx), range %lx %lx, mrange %lx %lx\n", -+ __FUNCTION__, map->index, map->count, -+ map->vma->vm_start, map->vma->vm_end, -+ start, end, mstart, mend); -+ err = unmap_grant_pages(map, -+ (mstart - map->vma->vm_start) >> PAGE_SHIFT, -+ (mend - mstart) >> PAGE_SHIFT); -+ WARN_ON(err); -+ } -+ spin_unlock(&priv->lock); -+} -+ -+static void mn_invl_page(struct mmu_notifier *mn, -+ struct mm_struct *mm, -+ unsigned long address) -+{ -+ mn_invl_range_start(mn, mm, address, address + PAGE_SIZE); -+} -+ -+static void mn_release(struct mmu_notifier *mn, -+ struct mm_struct *mm) -+{ -+ struct gntdev_priv *priv = container_of(mn, struct gntdev_priv, mn); -+ struct grant_map *map; -+ int err; -+ -+ spin_lock(&priv->lock); -+ list_for_each_entry(map, &priv->maps, next) { -+ if (!map->vma) -+ continue; -+ if (debug) -+ printk("%s: map %d+%d (%lx %lx)\n", -+ __FUNCTION__, map->index, map->count, -+ map->vma->vm_start, map->vma->vm_end); -+ err = unmap_grant_pages(map, 0, map->count); -+ WARN_ON(err); -+ } -+ spin_unlock(&priv->lock); -+} -+ -+struct mmu_notifier_ops gntdev_mmu_ops = { -+ .release = mn_release, -+ .invalidate_page = mn_invl_page, -+ .invalidate_range_start = mn_invl_range_start, -+}; -+ -+/* ------------------------------------------------------------------ */ -+ -+static int gntdev_open(struct inode *inode, struct file *flip) -+{ -+ struct gntdev_priv *priv; -+ -+ priv = kzalloc(sizeof(*priv), GFP_KERNEL); -+ if (!priv) -+ return -ENOMEM; -+ -+ INIT_LIST_HEAD(&priv->maps); -+ spin_lock_init(&priv->lock); -+ priv->limit = limit; -+ -+ priv->mm = get_task_mm(current); -+ if (!priv->mm) { -+ kfree(priv); -+ return -ENOMEM; -+ } -+ priv->mn.ops = &gntdev_mmu_ops; -+ mmu_notifier_register(&priv->mn, priv->mm); -+ mmput(priv->mm); -+ -+ flip->private_data = priv; -+ if (debug) -+ printk("%s: priv %p\n", __FUNCTION__, priv); -+ -+ return 0; -+} -+ -+static int gntdev_release(struct inode *inode, struct file *flip) -+{ -+ struct gntdev_priv *priv = flip->private_data; -+ struct grant_map *map; -+ int err; -+ -+ if (debug) -+ printk("%s: priv %p\n", __FUNCTION__, priv); -+ -+ spin_lock(&priv->lock); -+ while (!list_empty(&priv->maps)) { -+ map = list_entry(priv->maps.next, struct grant_map, next); -+ err = gntdev_del_map(map); -+ if (WARN_ON(err)) -+ gntdev_free_map(map); -+ -+ } -+ spin_unlock(&priv->lock); -+ -+ mmu_notifier_unregister(&priv->mn, priv->mm); -+ kfree(priv); -+ return 0; -+} -+ -+static long gntdev_ioctl_map_grant_ref(struct gntdev_priv *priv, -+ struct ioctl_gntdev_map_grant_ref __user *u) -+{ -+ struct ioctl_gntdev_map_grant_ref op; -+ struct grant_map *map; -+ int err; -+ -+ if (copy_from_user(&op, u, sizeof(op)) != 0) -+ return -EFAULT; -+ if (debug) -+ printk("%s: priv %p, add %d\n", __FUNCTION__, priv, -+ op.count); -+ if (unlikely(op.count <= 0)) -+ return -EINVAL; -+ if (unlikely(op.count > priv->limit)) -+ return -EINVAL; -+ -+ err = -ENOMEM; -+ map = gntdev_alloc_map(priv, op.count); -+ if (!map) -+ return err; -+ if (copy_from_user(map->grants, &u->refs, -+ sizeof(map->grants[0]) * op.count) != 0) { -+ gntdev_free_map(map); -+ return err; -+ } -+ -+ spin_lock(&priv->lock); -+ gntdev_add_map(priv, map); -+ op.index = map->index << PAGE_SHIFT; -+ spin_unlock(&priv->lock); -+ -+ if (copy_to_user(u, &op, sizeof(op)) != 0) { -+ spin_lock(&priv->lock); -+ gntdev_del_map(map); -+ spin_unlock(&priv->lock); -+ gntdev_free_map(map); -+ return err; -+ } -+ return 0; -+} -+ -+static long gntdev_ioctl_unmap_grant_ref(struct gntdev_priv *priv, -+ struct ioctl_gntdev_unmap_grant_ref __user *u) -+{ -+ struct ioctl_gntdev_unmap_grant_ref op; -+ struct grant_map *map; -+ int err = -EINVAL; -+ -+ if (copy_from_user(&op, u, sizeof(op)) != 0) -+ return -EFAULT; -+ if (debug) -+ printk("%s: priv %p, del %d+%d\n", __FUNCTION__, priv, -+ (int)op.index, (int)op.count); -+ -+ spin_lock(&priv->lock); -+ map = gntdev_find_map_index(priv, op.index >> PAGE_SHIFT, op.count); -+ if (map) -+ err = gntdev_del_map(map); -+ spin_unlock(&priv->lock); -+ if (!err) -+ gntdev_free_map(map); -+ return err; -+} -+ -+static long gntdev_ioctl_get_offset_for_vaddr(struct gntdev_priv *priv, -+ struct ioctl_gntdev_get_offset_for_vaddr __user *u) -+{ -+ struct ioctl_gntdev_get_offset_for_vaddr op; -+ struct grant_map *map; -+ -+ if (copy_from_user(&op, u, sizeof(op)) != 0) -+ return -EFAULT; -+ if (debug) -+ printk("%s: priv %p, offset for vaddr %lx\n", __FUNCTION__, priv, -+ (unsigned long)op.vaddr); -+ -+ spin_lock(&priv->lock); -+ map = gntdev_find_map_vaddr(priv, op.vaddr); -+ if (map == NULL || -+ map->vma->vm_start != op.vaddr) { -+ spin_unlock(&priv->lock); -+ return -EINVAL; -+ } -+ op.offset = map->index << PAGE_SHIFT; -+ op.count = map->count; -+ spin_unlock(&priv->lock); -+ -+ if (copy_to_user(u, &op, sizeof(op)) != 0) -+ return -EFAULT; -+ return 0; -+} -+ -+static long gntdev_ioctl_set_max_grants(struct gntdev_priv *priv, -+ struct ioctl_gntdev_set_max_grants __user *u) -+{ -+ struct ioctl_gntdev_set_max_grants op; -+ -+ if (copy_from_user(&op, u, sizeof(op)) != 0) -+ return -EFAULT; -+ if (debug) -+ printk("%s: priv %p, limit %d\n", __FUNCTION__, priv, op.count); -+ if (op.count > limit) -+ return -EINVAL; -+ -+ spin_lock(&priv->lock); -+ priv->limit = op.count; -+ spin_unlock(&priv->lock); -+ return 0; -+} -+ -+static long gntdev_ioctl(struct file *flip, -+ unsigned int cmd, unsigned long arg) -+{ -+ struct gntdev_priv *priv = flip->private_data; -+ void __user *ptr = (void __user *)arg; -+ -+ switch (cmd) { -+ case IOCTL_GNTDEV_MAP_GRANT_REF: -+ return gntdev_ioctl_map_grant_ref(priv, ptr); -+ -+ case IOCTL_GNTDEV_UNMAP_GRANT_REF: -+ return gntdev_ioctl_unmap_grant_ref(priv, ptr); -+ -+ case IOCTL_GNTDEV_GET_OFFSET_FOR_VADDR: -+ return gntdev_ioctl_get_offset_for_vaddr(priv, ptr); -+ -+ case IOCTL_GNTDEV_SET_MAX_GRANTS: -+ return gntdev_ioctl_set_max_grants(priv, ptr); -+ -+ default: -+ if (debug) -+ printk("%s: priv %p, unknown cmd %x\n", -+ __FUNCTION__, priv, cmd); -+ return -ENOIOCTLCMD; -+ } -+ -+ return 0; -+} -+ -+static int gntdev_mmap(struct file *flip, struct vm_area_struct *vma) -+{ -+ struct gntdev_priv *priv = flip->private_data; -+ int index = vma->vm_pgoff; -+ int count = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT; -+ struct grant_map *map; -+ int err = -EINVAL; -+ -+ if ((vma->vm_flags & VM_WRITE) && !(vma->vm_flags & VM_SHARED)) -+ return -EINVAL; -+ -+ if (debug) -+ printk("%s: map %d+%d at %lx (pgoff %lx)\n", __FUNCTION__, -+ index, count, vma->vm_start, vma->vm_pgoff); -+ -+ spin_lock(&priv->lock); -+ map = gntdev_find_map_index(priv, index, count); -+ if (!map) -+ goto unlock_out; -+ if (map->vma) -+ goto unlock_out; -+ if (priv->mm != vma->vm_mm) { -+ printk("%s: Huh? Other mm?\n", __FUNCTION__); -+ goto unlock_out; -+ } -+ -+ vma->vm_ops = &gntdev_vmops; -+ -+ vma->vm_flags |= VM_RESERVED; -+ vma->vm_flags |= VM_DONTCOPY; -+ vma->vm_flags |= VM_DONTEXPAND; -+ -+ vma->vm_private_data = map; -+ map->vma = vma; -+ -+ map->flags = GNTMAP_host_map | GNTMAP_application_map | GNTMAP_contains_pte; -+ if (!(vma->vm_flags & VM_WRITE)) -+ map->flags |= GNTMAP_readonly; -+ -+ err = apply_to_page_range(vma->vm_mm, vma->vm_start, -+ vma->vm_end - vma->vm_start, -+ find_grant_ptes, map); -+ if (err) { -+ goto unlock_out; -+ if (debug) -+ printk("%s: find_grant_ptes() failure.\n", __FUNCTION__); -+ } -+ -+ err = map_grant_pages(map); -+ if (err) { -+ goto unlock_out; -+ if (debug) -+ printk("%s: map_grant_pages() failure.\n", __FUNCTION__); -+ } -+ map->is_mapped = 1; -+ -+unlock_out: -+ spin_unlock(&priv->lock); -+ return err; -+} -+ -+static const struct file_operations gntdev_fops = { -+ .owner = THIS_MODULE, -+ .open = gntdev_open, -+ .release = gntdev_release, -+ .mmap = gntdev_mmap, -+ .unlocked_ioctl = gntdev_ioctl -+}; -+ -+static struct miscdevice gntdev_miscdev = { -+ .minor = MISC_DYNAMIC_MINOR, -+ .name = "xen/gntdev", -+ .fops = &gntdev_fops, -+}; -+ -+/* ------------------------------------------------------------------ */ -+ -+static int __init gntdev_init(void) -+{ -+ int err; -+ -+ if (!xen_domain()) -+ return -ENODEV; -+ -+ err = misc_register(&gntdev_miscdev); -+ if (err != 0) { -+ printk(KERN_ERR "Could not register gntdev device\n"); -+ return err; -+ } -+ return 0; -+} -+ -+static void __exit gntdev_exit(void) -+{ -+ misc_deregister(&gntdev_miscdev); -+} -+ -+module_init(gntdev_init); -+module_exit(gntdev_exit); -+ -+/* ------------------------------------------------------------------ */ -diff --git a/drivers/xen/grant-table.c b/drivers/xen/grant-table.c -index 7d8f531..09bb742 100644 ---- a/drivers/xen/grant-table.c -+++ b/drivers/xen/grant-table.c -@@ -36,10 +36,13 @@ - #include <linux/mm.h> - #include <linux/vmalloc.h> - #include <linux/uaccess.h> -+#include <linux/io.h> - -+#include <xen/xen.h> - #include <xen/interface/xen.h> - #include <xen/page.h> - #include <xen/grant_table.h> -+#include <xen/interface/memory.h> - #include <asm/xen/hypercall.h> - - #include <asm/pgtable.h> -@@ -57,6 +60,8 @@ static unsigned int boot_max_nr_grant_frames; - static int gnttab_free_count; - static grant_ref_t gnttab_free_head; - static DEFINE_SPINLOCK(gnttab_list_lock); -+unsigned long xen_hvm_resume_frames; -+EXPORT_SYMBOL_GPL(xen_hvm_resume_frames); - - static struct grant_entry *shared; - -@@ -431,7 +436,7 @@ static unsigned int __max_nr_grant_frames(void) - return query.max_nr_frames; - } - --static inline unsigned int max_nr_grant_frames(void) -+unsigned int gnttab_max_grant_frames(void) - { - unsigned int xen_max = __max_nr_grant_frames(); - -@@ -439,6 +444,7 @@ static inline unsigned int max_nr_grant_frames(void) - return boot_max_nr_grant_frames; - return xen_max; - } -+EXPORT_SYMBOL_GPL(gnttab_max_grant_frames); - - static int gnttab_map(unsigned int start_idx, unsigned int end_idx) - { -@@ -447,6 +453,30 @@ static int gnttab_map(unsigned int start_idx, unsigned int end_idx) - unsigned int nr_gframes = end_idx + 1; - int rc; - -+ if (xen_hvm_domain()) { -+ struct xen_add_to_physmap xatp; -+ unsigned int i = end_idx; -+ rc = 0; -+ /* -+ * Loop backwards, so that the first hypercall has the largest -+ * index, ensuring that the table will grow only once. -+ */ -+ do { -+ xatp.domid = DOMID_SELF; -+ xatp.idx = i; -+ xatp.space = XENMAPSPACE_grant_table; -+ xatp.gpfn = (xen_hvm_resume_frames >> PAGE_SHIFT) + i; -+ rc = HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xatp); -+ if (rc != 0) { -+ printk(KERN_WARNING -+ "grant table add_to_physmap failed, err=%d\n", rc); -+ break; -+ } -+ } while (i-- > start_idx); -+ -+ return rc; -+ } -+ - frames = kmalloc(nr_gframes * sizeof(unsigned long), GFP_ATOMIC); - if (!frames) - return -ENOMEM; -@@ -463,7 +493,7 @@ static int gnttab_map(unsigned int start_idx, unsigned int end_idx) - - BUG_ON(rc || setup.status); - -- rc = arch_gnttab_map_shared(frames, nr_gframes, max_nr_grant_frames(), -+ rc = arch_gnttab_map_shared(frames, nr_gframes, gnttab_max_grant_frames(), - &shared); - BUG_ON(rc); - -@@ -472,11 +502,127 @@ static int gnttab_map(unsigned int start_idx, unsigned int end_idx) - return 0; - } - -+static void gnttab_page_free(struct page *page, unsigned int order) -+{ -+ BUG_ON(order); -+ ClearPageForeign(page); -+ gnttab_reset_grant_page(page); -+ put_page(page); -+} -+ -+/* -+ * Must not be called with IRQs off. This should only be used on the -+ * slow path. -+ * -+ * Copy a foreign granted page to local memory. -+ */ -+int gnttab_copy_grant_page(grant_ref_t ref, struct page **pagep) -+{ -+ struct gnttab_unmap_and_replace unmap; -+ struct mmu_update mmu; -+ struct page *page; -+ struct page *new_page; -+ void *new_addr; -+ void *addr; -+ unsigned long pfn; -+ unsigned long mfn; -+ unsigned long new_mfn; -+ int err; -+ -+ page = *pagep; -+ if (!get_page_unless_zero(page)) -+ return -ENOENT; -+ -+ err = -ENOMEM; -+ new_page = alloc_page(GFP_ATOMIC | __GFP_NOWARN); -+ if (!new_page) -+ goto out; -+ -+ new_addr = page_address(new_page); -+ addr = page_address(page); -+ memcpy(new_addr, addr, PAGE_SIZE); -+ -+ pfn = page_to_pfn(page); -+ mfn = pfn_to_mfn(pfn); -+ new_mfn = virt_to_mfn(new_addr); -+ -+ /* Make seq visible before checking page_mapped. */ -+ smp_mb(); -+ -+ /* Has the page been DMA-mapped? */ -+ if (unlikely(page_mapped(page))) { -+ put_page(new_page); -+ err = -EBUSY; -+ goto out; -+ } -+ -+ if (!xen_feature(XENFEAT_auto_translated_physmap)) -+ set_phys_to_machine(pfn, new_mfn); -+ -+ unmap.host_addr = (unsigned long)addr; -+ unmap.new_addr = (unsigned long)new_addr; -+ unmap.handle = ref; -+ -+ err = HYPERVISOR_grant_table_op(GNTTABOP_unmap_and_replace, -+ &unmap, 1); -+ BUG_ON(err); -+ BUG_ON(unmap.status); -+ -+ if (!xen_feature(XENFEAT_auto_translated_physmap)) { -+ set_phys_to_machine(page_to_pfn(new_page), INVALID_P2M_ENTRY); -+ -+ mmu.ptr = PFN_PHYS(new_mfn) | MMU_MACHPHYS_UPDATE; -+ mmu.val = pfn; -+ err = HYPERVISOR_mmu_update(&mmu, 1, NULL, DOMID_SELF); -+ BUG_ON(err); -+ } -+ -+ new_page->mapping = page->mapping; -+ SetPageForeign(new_page, _PageForeignDestructor(page)); -+ if (PageReserved(page)) -+ SetPageReserved(new_page); -+ *pagep = new_page; -+ -+ SetPageForeign(page, gnttab_page_free); -+ ClearPageReserved(page); -+ page->mapping = NULL; -+ -+out: -+ put_page(page); -+ return err; -+} -+EXPORT_SYMBOL_GPL(gnttab_copy_grant_page); -+ -+void gnttab_reset_grant_page(struct page *page) -+{ -+ init_page_count(page); -+ reset_page_mapcount(page); -+} -+EXPORT_SYMBOL_GPL(gnttab_reset_grant_page); -+ - int gnttab_resume(void) - { -- if (max_nr_grant_frames() < nr_grant_frames) -+ unsigned int max_nr_gframes; -+ -+ max_nr_gframes = gnttab_max_grant_frames(); -+ if (max_nr_gframes < nr_grant_frames) - return -ENOSYS; -- return gnttab_map(0, nr_grant_frames - 1); -+ -+ if (xen_pv_domain()) -+ return gnttab_map(0, nr_grant_frames - 1); -+ -+ if (!shared) { -+ shared = ioremap(xen_hvm_resume_frames, PAGE_SIZE * max_nr_gframes); -+ if (shared == NULL) { -+ printk(KERN_WARNING -+ "Failed to ioremap gnttab share frames!"); -+ return -ENOMEM; -+ } -+ } -+ -+ gnttab_map(0, nr_grant_frames - 1); -+ -+ return 0; - } - - int gnttab_suspend(void) -@@ -493,7 +639,7 @@ static int gnttab_expand(unsigned int req_entries) - cur = nr_grant_frames; - extra = ((req_entries + (GREFS_PER_GRANT_FRAME-1)) / - GREFS_PER_GRANT_FRAME); -- if (cur + extra > max_nr_grant_frames()) -+ if (cur + extra > gnttab_max_grant_frames()) - return -ENOSPC; - - rc = gnttab_map(cur, cur + extra - 1); -@@ -503,15 +649,12 @@ static int gnttab_expand(unsigned int req_entries) - return rc; - } - --static int __devinit gnttab_init(void) -+int gnttab_init(void) - { - int i; - unsigned int max_nr_glist_frames, nr_glist_frames; - unsigned int nr_init_grefs; - -- if (!xen_domain()) -- return -ENODEV; -- - nr_grant_frames = 1; - boot_max_nr_grant_frames = __max_nr_grant_frames(); - -@@ -554,5 +697,18 @@ static int __devinit gnttab_init(void) - kfree(gnttab_list); - return -ENOMEM; - } -+EXPORT_SYMBOL_GPL(gnttab_init); -+ -+static int __devinit __gnttab_init(void) -+{ -+ /* Delay grant-table initialization in the PV on HVM case */ -+ if (xen_hvm_domain()) -+ return 0; -+ -+ if (!xen_pv_domain()) -+ return -ENODEV; -+ -+ return gnttab_init(); -+} - --core_initcall(gnttab_init); -+core_initcall(__gnttab_init); -diff --git a/drivers/xen/manage.c b/drivers/xen/manage.c -index 5d42d55..0b50906 100644 ---- a/drivers/xen/manage.c -+++ b/drivers/xen/manage.c -@@ -8,6 +8,7 @@ - #include <linux/stop_machine.h> - #include <linux/freezer.h> - -+#include <xen/xen.h> - #include <xen/xenbus.h> - #include <xen/grant_table.h> - #include <xen/events.h> -@@ -32,10 +33,30 @@ enum shutdown_state { - static enum shutdown_state shutting_down = SHUTDOWN_INVALID; - - #ifdef CONFIG_PM_SLEEP --static int xen_suspend(void *data) -+static int xen_hvm_suspend(void *data) - { -+ struct sched_shutdown r = { .reason = SHUTDOWN_suspend }; - int *cancelled = data; -+ -+ BUG_ON(!irqs_disabled()); -+ -+ *cancelled = HYPERVISOR_sched_op(SCHEDOP_shutdown, &r); -+ -+ xen_hvm_post_suspend(*cancelled); -+ gnttab_resume(); -+ -+ if (!*cancelled) { -+ xen_irq_resume(); -+ xen_timer_resume(); -+ } -+ -+ return 0; -+} -+ -+static int xen_suspend(void *data) -+{ - int err; -+ int *cancelled = data; - - BUG_ON(!irqs_disabled()); - -@@ -111,7 +132,10 @@ static void do_suspend(void) - goto out_resume; - } - -- err = stop_machine(xen_suspend, &cancelled, cpumask_of(0)); -+ if (xen_hvm_domain()) -+ err = stop_machine(xen_hvm_suspend, &cancelled, cpumask_of(0)); -+ else -+ err = stop_machine(xen_suspend, &cancelled, cpumask_of(0)); - - dpm_resume_noirq(PMSG_RESUME); - -@@ -260,7 +284,19 @@ static int shutdown_event(struct notifier_block *notifier, - return NOTIFY_DONE; - } - --static int __init setup_shutdown_event(void) -+static int __init __setup_shutdown_event(void) -+{ -+ /* Delay initialization in the PV on HVM case */ -+ if (xen_hvm_domain()) -+ return 0; -+ -+ if (!xen_pv_domain()) -+ return -ENODEV; -+ -+ return xen_setup_shutdown_event(); -+} -+ -+int xen_setup_shutdown_event(void) - { - static struct notifier_block xenstore_notifier = { - .notifier_call = shutdown_event -@@ -269,5 +305,6 @@ static int __init setup_shutdown_event(void) - - return 0; - } -+EXPORT_SYMBOL_GPL(xen_setup_shutdown_event); - --subsys_initcall(setup_shutdown_event); -+subsys_initcall(__setup_shutdown_event); -diff --git a/drivers/xen/mce.c b/drivers/xen/mce.c -new file mode 100644 -index 0000000..da566a5 ---- /dev/null -+++ b/drivers/xen/mce.c -@@ -0,0 +1,216 @@ -+/****************************************************************************** -+ * mce.c -+ * Add Machine Check event Logging support in DOM0 -+ * -+ * Driver for receiving and logging machine check event -+ * -+ * Copyright (c) 2008, 2009 Intel Corporation -+ * -+ * This program is free software; you can redistribute it and/or -+ * modify it under the terms of the GNU General Public License version 2 -+ * as published by the Free Software Foundation; or, when distributed -+ * separately from the Linux kernel or incorporated into other -+ * software packages, subject to the following license: -+ * -+ * Permission is hereby granted, free of charge, to any person obtaining a copy -+ * of this source file (the "Software"), to deal in the Software without -+ * restriction, including without limitation the rights to use, copy, modify, -+ * merge, publish, distribute, sublicense, and/or sell copies of the Software, -+ * and to permit persons to whom the Software is furnished to do so, subject to -+ * the following conditions: -+ * -+ * The above copyright notice and this permission notice shall be included in -+ * all copies or substantial portions of the Software. -+ * -+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS -+ * IN THE SOFTWARE. -+ */ -+ -+#include <linux/module.h> -+#include <linux/init.h> -+#include <linux/types.h> -+#include <linux/kernel.h> -+#include <xen/interface/xen.h> -+#include <asm/xen/hypervisor.h> -+#include <xen/events.h> -+#include <xen/interface/vcpu.h> -+#include <asm/xen/hypercall.h> -+#include <asm/mce.h> -+#include <xen/xen.h> -+ -+static mc_info_t *g_mi; -+static mcinfo_logical_cpu_t *g_physinfo; -+static uint32_t ncpus; -+ -+static int convert_log(struct mc_info *mi) -+{ -+ struct mcinfo_common *mic = NULL; -+ struct mcinfo_global *mc_global; -+ struct mcinfo_bank *mc_bank; -+ struct mce m; -+ int i, found = 0; -+ -+ x86_mcinfo_lookup(&mic, mi, MC_TYPE_GLOBAL); -+ WARN_ON(!mic); -+ -+ mce_setup(&m); -+ mc_global = (struct mcinfo_global *)mic; -+ m.mcgstatus = mc_global->mc_gstatus; -+ m.apicid = mc_global->mc_apicid; -+ for (i = 0; i < ncpus; i++) { -+ if (g_physinfo[i].mc_apicid == m.apicid) { -+ found = 1; -+ break; -+ } -+ } -+ WARN_ON(!found); -+ -+ m.socketid = g_physinfo[i].mc_chipid; -+ m.cpu = m.extcpu = g_physinfo[i].mc_cpunr; -+ m.cpuvendor = (__u8)g_physinfo[i].mc_vendor; -+ m.mcgcap = g_physinfo[i].mc_msrvalues[0].value; -+ x86_mcinfo_lookup(&mic, mi, MC_TYPE_BANK); -+ do { -+ if (mic == NULL || mic->size == 0) -+ break; -+ if (mic->type == MC_TYPE_BANK) { -+ mc_bank = (struct mcinfo_bank *)mic; -+ m.misc = mc_bank->mc_misc; -+ m.status = mc_bank->mc_status; -+ m.addr = mc_bank->mc_addr; -+ m.tsc = mc_bank->mc_tsc; -+ m.bank = mc_bank->mc_bank; -+ m.finished = 1; -+ /*log this record*/ -+ mce_log(&m); -+ } -+ mic = x86_mcinfo_next(mic); -+ } while (1); -+ -+ return 0; -+} -+ -+/*pv_ops domain mce virq handler, logging physical mce error info*/ -+static irqreturn_t mce_dom_interrupt(int irq, void *dev_id) -+{ -+ xen_mc_t mc_op; -+ int result = 0; -+ -+ mc_op.cmd = XEN_MC_fetch; -+ mc_op.interface_version = XEN_MCA_INTERFACE_VERSION; -+ set_xen_guest_handle(mc_op.u.mc_fetch.data, g_mi); -+urgent: -+ mc_op.u.mc_fetch.flags = XEN_MC_URGENT; -+ result = HYPERVISOR_mca(&mc_op); -+ if (result || mc_op.u.mc_fetch.flags & XEN_MC_NODATA || -+ mc_op.u.mc_fetch.flags & XEN_MC_FETCHFAILED) -+ goto nonurgent; -+ else { -+ result = convert_log(g_mi); -+ if (result) -+ goto end; -+ /* After fetching the error event log entry from DOM0, -+ * we need to dec the refcnt and release the entry. -+ * The entry is reserved and inc refcnt when filling -+ * the error log entry. -+ */ -+ mc_op.u.mc_fetch.flags = XEN_MC_URGENT | XEN_MC_ACK; -+ result = HYPERVISOR_mca(&mc_op); -+ goto urgent; -+ } -+nonurgent: -+ mc_op.u.mc_fetch.flags = XEN_MC_NONURGENT; -+ result = HYPERVISOR_mca(&mc_op); -+ if (result || mc_op.u.mc_fetch.flags & XEN_MC_NODATA || -+ mc_op.u.mc_fetch.flags & XEN_MC_FETCHFAILED) -+ goto end; -+ else { -+ result = convert_log(g_mi); -+ if (result) -+ goto end; -+ /* After fetching the error event log entry from DOM0, -+ * we need to dec the refcnt and release the entry. The -+ * entry is reserved and inc refcnt when filling the -+ * error log entry. -+ */ -+ mc_op.u.mc_fetch.flags = XEN_MC_NONURGENT | XEN_MC_ACK; -+ result = HYPERVISOR_mca(&mc_op); -+ goto nonurgent; -+ } -+end: -+ return IRQ_HANDLED; -+} -+ -+static int bind_virq_for_mce(void) -+{ -+ int ret; -+ xen_mc_t mc_op; -+ -+ g_mi = kmalloc(sizeof(struct mc_info), GFP_KERNEL); -+ -+ if (!g_mi) -+ return -ENOMEM; -+ -+ /* Fetch physical CPU Numbers */ -+ mc_op.cmd = XEN_MC_physcpuinfo; -+ mc_op.interface_version = XEN_MCA_INTERFACE_VERSION; -+ set_xen_guest_handle(mc_op.u.mc_physcpuinfo.info, g_physinfo); -+ ret = HYPERVISOR_mca(&mc_op); -+ if (ret) { -+ printk(KERN_ERR "MCE_DOM0_LOG: Fail to get physical CPU numbers\n"); -+ kfree(g_mi); -+ return ret; -+ } -+ -+ /* Fetch each CPU Physical Info for later reference*/ -+ ncpus = mc_op.u.mc_physcpuinfo.ncpus; -+ g_physinfo = kmalloc(sizeof(struct mcinfo_logical_cpu)*ncpus, -+ GFP_KERNEL); -+ if (!g_physinfo) { -+ kfree(g_mi); -+ return -ENOMEM; -+ } -+ set_xen_guest_handle(mc_op.u.mc_physcpuinfo.info, g_physinfo); -+ ret = HYPERVISOR_mca(&mc_op); -+ if (ret) { -+ printk(KERN_ERR "MCE_DOM0_LOG: Fail to get physical CPUs info\n"); -+ kfree(g_mi); -+ kfree(g_physinfo); -+ return ret; -+ } -+ -+ ret = bind_virq_to_irqhandler(VIRQ_MCA, 0, -+ mce_dom_interrupt, 0, "mce", NULL); -+ -+ if (ret < 0) { -+ printk(KERN_ERR "MCE_DOM0_LOG: bind_virq for DOM0 failed\n"); -+ return ret; -+ } -+ -+ return 0; -+} -+ -+static int __init mcelog_init(void) -+{ -+ /* Only DOM0 is responsible for MCE logging */ -+ if (xen_initial_domain()) -+ return bind_virq_for_mce(); -+ -+ return 0; -+} -+ -+ -+static void __exit mcelog_cleanup(void) -+{ -+ kfree(g_mi); -+ kfree(g_physinfo); -+} -+module_init(mcelog_init); -+module_exit(mcelog_cleanup); -+ -+MODULE_LICENSE("GPL"); -diff --git a/drivers/xen/netback/Makefile b/drivers/xen/netback/Makefile -new file mode 100644 -index 0000000..e346e81 ---- /dev/null -+++ b/drivers/xen/netback/Makefile -@@ -0,0 +1,3 @@ -+obj-$(CONFIG_XEN_NETDEV_BACKEND) := xen-netback.o -+ -+xen-netback-y := netback.o xenbus.o interface.o -diff --git a/drivers/xen/netback/common.h b/drivers/xen/netback/common.h -new file mode 100644 -index 0000000..feacf5f ---- /dev/null -+++ b/drivers/xen/netback/common.h -@@ -0,0 +1,329 @@ -+/****************************************************************************** -+ * arch/xen/drivers/netif/backend/common.h -+ * -+ * This program is free software; you can redistribute it and/or -+ * modify it under the terms of the GNU General Public License version 2 -+ * as published by the Free Software Foundation; or, when distributed -+ * separately from the Linux kernel or incorporated into other -+ * software packages, subject to the following license: -+ * -+ * Permission is hereby granted, free of charge, to any person obtaining a copy -+ * of this source file (the "Software"), to deal in the Software without -+ * restriction, including without limitation the rights to use, copy, modify, -+ * merge, publish, distribute, sublicense, and/or sell copies of the Software, -+ * and to permit persons to whom the Software is furnished to do so, subject to -+ * the following conditions: -+ * -+ * The above copyright notice and this permission notice shall be included in -+ * all copies or substantial portions of the Software. -+ * -+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS -+ * IN THE SOFTWARE. -+ */ -+ -+#ifndef __NETIF__BACKEND__COMMON_H__ -+#define __NETIF__BACKEND__COMMON_H__ -+ -+#include <linux/version.h> -+#include <linux/module.h> -+#include <linux/interrupt.h> -+#include <linux/slab.h> -+#include <linux/ip.h> -+#include <linux/in.h> -+#include <linux/netdevice.h> -+#include <linux/etherdevice.h> -+#include <linux/wait.h> -+#include <linux/sched.h> -+ -+#include <xen/interface/io/netif.h> -+#include <asm/io.h> -+#include <asm/pgalloc.h> -+#include <xen/interface/grant_table.h> -+#include <xen/grant_table.h> -+#include <xen/xenbus.h> -+ -+#define DPRINTK(_f, _a...) \ -+ pr_debug("(file=%s, line=%d) " _f, \ -+ __FILE__ , __LINE__ , ## _a ) -+#define IPRINTK(fmt, args...) \ -+ printk(KERN_INFO "xen_net: " fmt, ##args) -+#define WPRINTK(fmt, args...) \ -+ printk(KERN_WARNING "xen_net: " fmt, ##args) -+ -+struct xen_netif { -+ /* Unique identifier for this interface. */ -+ domid_t domid; -+ int group; -+ unsigned int handle; -+ -+ u8 fe_dev_addr[6]; -+ -+ /* Physical parameters of the comms window. */ -+ grant_handle_t tx_shmem_handle; -+ grant_ref_t tx_shmem_ref; -+ grant_handle_t rx_shmem_handle; -+ grant_ref_t rx_shmem_ref; -+ unsigned int irq; -+ -+ /* The shared rings and indexes. */ -+ struct xen_netif_tx_back_ring tx; -+ struct xen_netif_rx_back_ring rx; -+ struct vm_struct *tx_comms_area; -+ struct vm_struct *rx_comms_area; -+ -+ /* Flags that must not be set in dev->features */ -+ int features_disabled; -+ -+ /* Frontend feature information. */ -+ u8 can_sg:1; -+ u8 gso:1; -+ u8 gso_prefix:1; -+ u8 csum:1; -+ u8 smart_poll:1; -+ -+ /* Internal feature information. */ -+ u8 can_queue:1; /* can queue packets for receiver? */ -+ -+ /* Allow netif_be_start_xmit() to peek ahead in the rx request -+ * ring. This is a prediction of what rx_req_cons will be once -+ * all queued skbs are put on the ring. */ -+ RING_IDX rx_req_cons_peek; -+ -+ /* Transmit shaping: allow 'credit_bytes' every 'credit_usec'. */ -+ unsigned long credit_bytes; -+ unsigned long credit_usec; -+ unsigned long remaining_credit; -+ struct timer_list credit_timeout; -+ -+ /* Enforce draining of the transmit queue. */ -+ struct timer_list tx_queue_timeout; -+ -+ /* Statistics */ -+ int nr_copied_skbs; -+ -+ /* Miscellaneous private stuff. */ -+ struct list_head list; /* scheduling list */ -+ atomic_t refcnt; -+ struct net_device *dev; -+ struct net_device_stats stats; -+ -+ unsigned int carrier; -+ -+ wait_queue_head_t waiting_to_free; -+}; -+ -+/* -+ * Implement our own carrier flag: the network stack's version causes delays -+ * when the carrier is re-enabled (in particular, dev_activate() may not -+ * immediately be called, which can cause packet loss; also the etherbridge -+ * can be rather lazy in activating its port). -+ */ -+#define netback_carrier_on(netif) ((netif)->carrier = 1) -+#define netback_carrier_off(netif) ((netif)->carrier = 0) -+#define netback_carrier_ok(netif) ((netif)->carrier) -+ -+enum { -+ NETBK_DONT_COPY_SKB, -+ NETBK_DELAYED_COPY_SKB, -+ NETBK_ALWAYS_COPY_SKB, -+}; -+ -+extern int netbk_copy_skb_mode; -+ -+/* Function pointers into netback accelerator plugin modules */ -+struct netback_accel_hooks { -+ struct module *owner; -+ int (*probe)(struct xenbus_device *dev); -+ int (*remove)(struct xenbus_device *dev); -+}; -+ -+/* Structure to track the state of a netback accelerator plugin */ -+struct netback_accelerator { -+ struct list_head link; -+ int id; -+ char *eth_name; -+ atomic_t use_count; -+ struct netback_accel_hooks *hooks; -+}; -+ -+struct backend_info { -+ struct xenbus_device *dev; -+ struct xen_netif *netif; -+ enum xenbus_state frontend_state; -+ struct xenbus_watch hotplug_status_watch; -+ int have_hotplug_status_watch:1; -+ -+ /* State relating to the netback accelerator */ -+ void *netback_accel_priv; -+ /* The accelerator that this backend is currently using */ -+ struct netback_accelerator *accelerator; -+}; -+ -+#define NETBACK_ACCEL_VERSION 0x00010001 -+ -+/* -+ * Connect an accelerator plugin module to netback. Returns zero on -+ * success, < 0 on error, > 0 (with highest version number supported) -+ * if version mismatch. -+ */ -+extern int netback_connect_accelerator(unsigned version, -+ int id, const char *eth_name, -+ struct netback_accel_hooks *hooks); -+/* Disconnect a previously connected accelerator plugin module */ -+extern void netback_disconnect_accelerator(int id, const char *eth_name); -+ -+ -+extern -+void netback_probe_accelerators(struct backend_info *be, -+ struct xenbus_device *dev); -+extern -+void netback_remove_accelerators(struct backend_info *be, -+ struct xenbus_device *dev); -+extern -+void netif_accel_init(void); -+ -+ -+#define NET_TX_RING_SIZE __RING_SIZE((struct xen_netif_tx_sring *)0, PAGE_SIZE) -+#define NET_RX_RING_SIZE __RING_SIZE((struct xen_netif_rx_sring *)0, PAGE_SIZE) -+ -+void netif_disconnect(struct xen_netif *netif); -+ -+void netif_set_features(struct xen_netif *netif); -+struct xen_netif *netif_alloc(struct device *parent, domid_t domid, unsigned int handle); -+int netif_map(struct xen_netif *netif, unsigned long tx_ring_ref, -+ unsigned long rx_ring_ref, unsigned int evtchn); -+ -+static inline void netif_get(struct xen_netif *netif) -+{ -+ atomic_inc(&netif->refcnt); -+} -+ -+static inline void netif_put(struct xen_netif *netif) -+{ -+ if (atomic_dec_and_test(&netif->refcnt)) -+ wake_up(&netif->waiting_to_free); -+} -+ -+int netif_xenbus_init(void); -+ -+#define netif_schedulable(netif) \ -+ (netif_running((netif)->dev) && netback_carrier_ok(netif)) -+ -+void netif_schedule_work(struct xen_netif *netif); -+void netif_deschedule_work(struct xen_netif *netif); -+ -+int netif_be_start_xmit(struct sk_buff *skb, struct net_device *dev); -+struct net_device_stats *netif_be_get_stats(struct net_device *dev); -+irqreturn_t netif_be_int(int irq, void *dev_id); -+ -+static inline int netbk_can_queue(struct net_device *dev) -+{ -+ struct xen_netif *netif = netdev_priv(dev); -+ return netif->can_queue; -+} -+ -+static inline int netbk_can_sg(struct net_device *dev) -+{ -+ struct xen_netif *netif = netdev_priv(dev); -+ return netif->can_sg; -+} -+ -+struct pending_tx_info { -+ struct xen_netif_tx_request req; -+ struct xen_netif *netif; -+}; -+typedef unsigned int pending_ring_idx_t; -+ -+struct netbk_rx_meta { -+ int id; -+ int size; -+ int gso_size; -+}; -+ -+struct netbk_tx_pending_inuse { -+ struct list_head list; -+ unsigned long alloc_time; -+}; -+ -+#define MAX_PENDING_REQS 256 -+ -+#define MAX_BUFFER_OFFSET PAGE_SIZE -+ -+/* extra field used in struct page */ -+union page_ext { -+ struct { -+#if BITS_PER_LONG < 64 -+#define IDX_WIDTH 8 -+#define GROUP_WIDTH (BITS_PER_LONG - IDX_WIDTH) -+ unsigned int group:GROUP_WIDTH; -+ unsigned int idx:IDX_WIDTH; -+#else -+ unsigned int group, idx; -+#endif -+ } e; -+ void *mapping; -+}; -+ -+struct xen_netbk { -+ union { -+ struct { -+ struct tasklet_struct net_tx_tasklet; -+ struct tasklet_struct net_rx_tasklet; -+ } tasklet; -+ -+ struct { -+ wait_queue_head_t netbk_action_wq; -+ struct task_struct *task; -+ } kthread; -+ }; -+ -+ struct sk_buff_head rx_queue; -+ struct sk_buff_head tx_queue; -+ -+ struct timer_list net_timer; -+ struct timer_list netbk_tx_pending_timer; -+ -+ struct page **mmap_pages; -+ -+ pending_ring_idx_t pending_prod; -+ pending_ring_idx_t pending_cons; -+ pending_ring_idx_t dealloc_prod; -+ pending_ring_idx_t dealloc_cons; -+ -+ struct list_head pending_inuse_head; -+ struct list_head net_schedule_list; -+ -+ /* Protect the net_schedule_list in netif. */ -+ spinlock_t net_schedule_list_lock; -+ -+ atomic_t netfront_count; -+ -+ struct pending_tx_info pending_tx_info[MAX_PENDING_REQS]; -+ struct netbk_tx_pending_inuse pending_inuse[MAX_PENDING_REQS]; -+ struct gnttab_unmap_grant_ref tx_unmap_ops[MAX_PENDING_REQS]; -+ struct gnttab_map_grant_ref tx_map_ops[MAX_PENDING_REQS]; -+ -+ grant_handle_t grant_tx_handle[MAX_PENDING_REQS]; -+ u16 pending_ring[MAX_PENDING_REQS]; -+ u16 dealloc_ring[MAX_PENDING_REQS]; -+ -+ /* -+ * Each head or fragment can be up to 4096 bytes. Given -+ * MAX_BUFFER_OFFSET of 4096 the worst case is that each -+ * head/fragment uses 2 copy operation. -+ */ -+ struct gnttab_copy grant_copy_op[2*NET_RX_RING_SIZE]; -+ unsigned char rx_notify[NR_IRQS]; -+ u16 notify_list[NET_RX_RING_SIZE]; -+ struct netbk_rx_meta meta[2*NET_RX_RING_SIZE]; -+}; -+ -+extern struct xen_netbk *xen_netbk; -+extern int xen_netbk_group_nr; -+ -+#endif /* __NETIF__BACKEND__COMMON_H__ */ -diff --git a/drivers/xen/netback/interface.c b/drivers/xen/netback/interface.c -new file mode 100644 -index 0000000..2e8508a ---- /dev/null -+++ b/drivers/xen/netback/interface.c -@@ -0,0 +1,475 @@ -+/****************************************************************************** -+ * arch/xen/drivers/netif/backend/interface.c -+ * -+ * Network-device interface management. -+ * -+ * Copyright (c) 2004-2005, Keir Fraser -+ * -+ * This program is free software; you can redistribute it and/or -+ * modify it under the terms of the GNU General Public License version 2 -+ * as published by the Free Software Foundation; or, when distributed -+ * separately from the Linux kernel or incorporated into other -+ * software packages, subject to the following license: -+ * -+ * Permission is hereby granted, free of charge, to any person obtaining a copy -+ * of this source file (the "Software"), to deal in the Software without -+ * restriction, including without limitation the rights to use, copy, modify, -+ * merge, publish, distribute, sublicense, and/or sell copies of the Software, -+ * and to permit persons to whom the Software is furnished to do so, subject to -+ * the following conditions: -+ * -+ * The above copyright notice and this permission notice shall be included in -+ * all copies or substantial portions of the Software. -+ * -+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS -+ * IN THE SOFTWARE. -+ */ -+ -+#include "common.h" -+#include <linux/ethtool.h> -+#include <linux/rtnetlink.h> -+ -+#include <xen/events.h> -+#include <asm/xen/hypercall.h> -+ -+/* -+ * Module parameter 'queue_length': -+ * -+ * Enables queuing in the network stack when a client has run out of receive -+ * descriptors. Although this feature can improve receive bandwidth by avoiding -+ * packet loss, it can also result in packets sitting in the 'tx_queue' for -+ * unbounded time. This is bad if those packets hold onto foreign resources. -+ * For example, consider a packet that holds onto resources belonging to the -+ * guest for which it is queued (e.g., packet received on vif1.0, destined for -+ * vif1.1 which is not activated in the guest): in this situation the guest -+ * will never be destroyed, unless vif1.1 is taken down. To avoid this, we -+ * run a timer (tx_queue_timeout) to drain the queue when the interface is -+ * blocked. -+ */ -+static unsigned long netbk_queue_length = 32; -+module_param_named(queue_length, netbk_queue_length, ulong, 0644); -+ -+static void netbk_add_netif(struct xen_netbk *netbk, int group_nr, -+ struct xen_netif *netif) -+{ -+ int i; -+ int min_netfront_count; -+ int min_group = 0; -+ min_netfront_count = atomic_read(&netbk[0].netfront_count); -+ for (i = 0; i < group_nr; i++) { -+ int netfront_count = atomic_read(&netbk[i].netfront_count); -+ if (netfront_count < min_netfront_count) { -+ min_group = i; -+ min_netfront_count = netfront_count; -+ } -+ } -+ -+ netif->group = min_group; -+ atomic_inc(&netbk[netif->group].netfront_count); -+} -+ -+static void netbk_remove_netif(struct xen_netbk *netbk, struct xen_netif *netif) -+{ -+ atomic_dec(&netbk[netif->group].netfront_count); -+} -+ -+static void __netif_up(struct xen_netif *netif) -+{ -+ netbk_add_netif(xen_netbk, xen_netbk_group_nr, netif); -+ enable_irq(netif->irq); -+ netif_schedule_work(netif); -+} -+ -+static void __netif_down(struct xen_netif *netif) -+{ -+ disable_irq(netif->irq); -+ netif_deschedule_work(netif); -+ netbk_remove_netif(xen_netbk, netif); -+} -+ -+static int net_open(struct net_device *dev) -+{ -+ struct xen_netif *netif = netdev_priv(dev); -+ if (netback_carrier_ok(netif)) { -+ __netif_up(netif); -+ netif_start_queue(dev); -+ } -+ return 0; -+} -+ -+static int net_close(struct net_device *dev) -+{ -+ struct xen_netif *netif = netdev_priv(dev); -+ if (netback_carrier_ok(netif)) -+ __netif_down(netif); -+ netif_stop_queue(dev); -+ return 0; -+} -+ -+static int netbk_change_mtu(struct net_device *dev, int mtu) -+{ -+ int max = netbk_can_sg(dev) ? 65535 - ETH_HLEN : ETH_DATA_LEN; -+ -+ if (mtu > max) -+ return -EINVAL; -+ dev->mtu = mtu; -+ return 0; -+} -+ -+void netif_set_features(struct xen_netif *netif) -+{ -+ struct net_device *dev = netif->dev; -+ int features = dev->features; -+ -+ if (netif->can_sg) -+ features |= NETIF_F_SG; -+ if (netif->gso || netif->gso_prefix) -+ features |= NETIF_F_TSO; -+ if (netif->csum) -+ features |= NETIF_F_IP_CSUM; -+ -+ features &= ~(netif->features_disabled); -+ -+ if (!(features & NETIF_F_SG) && dev->mtu > ETH_DATA_LEN) -+ dev->mtu = ETH_DATA_LEN; -+ -+ dev->features = features; -+} -+ -+static int netbk_set_tx_csum(struct net_device *dev, u32 data) -+{ -+ struct xen_netif *netif = netdev_priv(dev); -+ if (data) { -+ if (!netif->csum) -+ return -ENOSYS; -+ netif->features_disabled &= ~NETIF_F_IP_CSUM; -+ } else { -+ netif->features_disabled |= NETIF_F_IP_CSUM; -+ } -+ -+ netif_set_features(netif); -+ return 0; -+} -+ -+static int netbk_set_sg(struct net_device *dev, u32 data) -+{ -+ struct xen_netif *netif = netdev_priv(dev); -+ if (data) { -+ if (!netif->can_sg) -+ return -ENOSYS; -+ netif->features_disabled &= ~NETIF_F_SG; -+ } else { -+ netif->features_disabled |= NETIF_F_SG; -+ } -+ -+ netif_set_features(netif); -+ return 0; -+} -+ -+static int netbk_set_tso(struct net_device *dev, u32 data) -+{ -+ struct xen_netif *netif = netdev_priv(dev); -+ if (data) { -+ if (!netif->gso && !netif->gso_prefix) -+ return -ENOSYS; -+ netif->features_disabled &= ~NETIF_F_TSO; -+ } else { -+ netif->features_disabled |= NETIF_F_TSO; -+ } -+ -+ netif_set_features(netif); -+ return 0; -+} -+ -+static void netbk_get_drvinfo(struct net_device *dev, -+ struct ethtool_drvinfo *info) -+{ -+ strcpy(info->driver, "netbk"); -+ strcpy(info->bus_info, dev_name(dev->dev.parent)); -+} -+ -+static const struct netif_stat { -+ char name[ETH_GSTRING_LEN]; -+ u16 offset; -+} netbk_stats[] = { -+ { "copied_skbs", offsetof(struct xen_netif, nr_copied_skbs) }, -+}; -+ -+static int netbk_get_sset_count(struct net_device *dev, int string_set) -+{ -+ switch (string_set) { -+ case ETH_SS_STATS: -+ return ARRAY_SIZE(netbk_stats); -+ default: -+ return -EINVAL; -+ } -+} -+ -+static void netbk_get_ethtool_stats(struct net_device *dev, -+ struct ethtool_stats *stats, u64 * data) -+{ -+ void *netif = netdev_priv(dev); -+ int i; -+ -+ for (i = 0; i < ARRAY_SIZE(netbk_stats); i++) -+ data[i] = *(int *)(netif + netbk_stats[i].offset); -+} -+ -+static void netbk_get_strings(struct net_device *dev, u32 stringset, u8 * data) -+{ -+ int i; -+ -+ switch (stringset) { -+ case ETH_SS_STATS: -+ for (i = 0; i < ARRAY_SIZE(netbk_stats); i++) -+ memcpy(data + i * ETH_GSTRING_LEN, -+ netbk_stats[i].name, ETH_GSTRING_LEN); -+ break; -+ } -+} -+ -+static struct ethtool_ops network_ethtool_ops = -+{ -+ .get_drvinfo = netbk_get_drvinfo, -+ -+ .get_tx_csum = ethtool_op_get_tx_csum, -+ .set_tx_csum = netbk_set_tx_csum, -+ .get_sg = ethtool_op_get_sg, -+ .set_sg = netbk_set_sg, -+ .get_tso = ethtool_op_get_tso, -+ .set_tso = netbk_set_tso, -+ .get_link = ethtool_op_get_link, -+ -+ .get_sset_count = netbk_get_sset_count, -+ .get_ethtool_stats = netbk_get_ethtool_stats, -+ .get_strings = netbk_get_strings, -+}; -+ -+static struct net_device_ops netback_ops = -+{ -+ .ndo_start_xmit = netif_be_start_xmit, -+ .ndo_get_stats = netif_be_get_stats, -+ .ndo_open = net_open, -+ .ndo_stop = net_close, -+ .ndo_change_mtu = netbk_change_mtu, -+}; -+ -+struct xen_netif *netif_alloc(struct device *parent, domid_t domid, unsigned int handle) -+{ -+ int err = 0; -+ struct net_device *dev; -+ struct xen_netif *netif; -+ char name[IFNAMSIZ] = {}; -+ -+ snprintf(name, IFNAMSIZ - 1, "vif%u.%u", domid, handle); -+ dev = alloc_netdev(sizeof(struct xen_netif), name, ether_setup); -+ if (dev == NULL) { -+ DPRINTK("Could not create netif: out of memory\n"); -+ return ERR_PTR(-ENOMEM); -+ } -+ -+ SET_NETDEV_DEV(dev, parent); -+ -+ netif = netdev_priv(dev); -+ memset(netif, 0, sizeof(*netif)); -+ netif->domid = domid; -+ netif->group = -1; -+ netif->handle = handle; -+ netif->can_sg = 1; -+ netif->csum = 1; -+ atomic_set(&netif->refcnt, 1); -+ init_waitqueue_head(&netif->waiting_to_free); -+ netif->dev = dev; -+ INIT_LIST_HEAD(&netif->list); -+ -+ netback_carrier_off(netif); -+ -+ netif->credit_bytes = netif->remaining_credit = ~0UL; -+ netif->credit_usec = 0UL; -+ init_timer(&netif->credit_timeout); -+ /* Initialize 'expires' now: it's used to track the credit window. */ -+ netif->credit_timeout.expires = jiffies; -+ -+ init_timer(&netif->tx_queue_timeout); -+ -+ dev->netdev_ops = &netback_ops; -+ netif_set_features(netif); -+ SET_ETHTOOL_OPS(dev, &network_ethtool_ops); -+ -+ dev->tx_queue_len = netbk_queue_length; -+ -+ /* -+ * Initialise a dummy MAC address. We choose the numerically -+ * largest non-broadcast address to prevent the address getting -+ * stolen by an Ethernet bridge for STP purposes. -+ * (FE:FF:FF:FF:FF:FF) -+ */ -+ memset(dev->dev_addr, 0xFF, ETH_ALEN); -+ dev->dev_addr[0] &= ~0x01; -+ -+ rtnl_lock(); -+ err = register_netdevice(dev); -+ rtnl_unlock(); -+ if (err) { -+ DPRINTK("Could not register new net device %s: err=%d\n", -+ dev->name, err); -+ free_netdev(dev); -+ return ERR_PTR(err); -+ } -+ -+ DPRINTK("Successfully created netif\n"); -+ return netif; -+} -+ -+static int map_frontend_pages( -+ struct xen_netif *netif, grant_ref_t tx_ring_ref, grant_ref_t rx_ring_ref) -+{ -+ struct gnttab_map_grant_ref op; -+ -+ gnttab_set_map_op(&op, (unsigned long)netif->tx_comms_area->addr, -+ GNTMAP_host_map, tx_ring_ref, netif->domid); -+ -+ if (HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1)) -+ BUG(); -+ -+ if (op.status) { -+ DPRINTK(" Gnttab failure mapping tx_ring_ref!\n"); -+ return op.status; -+ } -+ -+ netif->tx_shmem_ref = tx_ring_ref; -+ netif->tx_shmem_handle = op.handle; -+ -+ gnttab_set_map_op(&op, (unsigned long)netif->rx_comms_area->addr, -+ GNTMAP_host_map, rx_ring_ref, netif->domid); -+ -+ if (HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1)) -+ BUG(); -+ -+ if (op.status) { -+ struct gnttab_unmap_grant_ref unop; -+ -+ gnttab_set_unmap_op(&unop, -+ (unsigned long)netif->tx_comms_area->addr, -+ GNTMAP_host_map, netif->tx_shmem_handle); -+ HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &unop, 1); -+ DPRINTK(" Gnttab failure mapping rx_ring_ref!\n"); -+ return op.status; -+ } -+ -+ netif->rx_shmem_ref = rx_ring_ref; -+ netif->rx_shmem_handle = op.handle; -+ -+ return 0; -+} -+ -+static void unmap_frontend_pages(struct xen_netif *netif) -+{ -+ struct gnttab_unmap_grant_ref op; -+ -+ gnttab_set_unmap_op(&op, (unsigned long)netif->tx_comms_area->addr, -+ GNTMAP_host_map, netif->tx_shmem_handle); -+ -+ if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1)) -+ BUG(); -+ -+ gnttab_set_unmap_op(&op, (unsigned long)netif->rx_comms_area->addr, -+ GNTMAP_host_map, netif->rx_shmem_handle); -+ -+ if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1)) -+ BUG(); -+} -+ -+int netif_map(struct xen_netif *netif, unsigned long tx_ring_ref, -+ unsigned long rx_ring_ref, unsigned int evtchn) -+{ -+ int err = -ENOMEM; -+ struct xen_netif_tx_sring *txs; -+ struct xen_netif_rx_sring *rxs; -+ -+ /* Already connected through? */ -+ if (netif->irq) -+ return 0; -+ -+ netif->tx_comms_area = alloc_vm_area(PAGE_SIZE); -+ if (netif->tx_comms_area == NULL) -+ return -ENOMEM; -+ netif->rx_comms_area = alloc_vm_area(PAGE_SIZE); -+ if (netif->rx_comms_area == NULL) -+ goto err_rx; -+ -+ err = map_frontend_pages(netif, tx_ring_ref, rx_ring_ref); -+ if (err) -+ goto err_map; -+ -+ err = bind_interdomain_evtchn_to_irqhandler( -+ netif->domid, evtchn, netif_be_int, 0, -+ netif->dev->name, netif); -+ if (err < 0) -+ goto err_hypervisor; -+ netif->irq = err; -+ disable_irq(netif->irq); -+ -+ txs = (struct xen_netif_tx_sring *)netif->tx_comms_area->addr; -+ BACK_RING_INIT(&netif->tx, txs, PAGE_SIZE); -+ -+ rxs = (struct xen_netif_rx_sring *) -+ ((char *)netif->rx_comms_area->addr); -+ BACK_RING_INIT(&netif->rx, rxs, PAGE_SIZE); -+ -+ netif->rx_req_cons_peek = 0; -+ -+ netif_get(netif); -+ -+ rtnl_lock(); -+ netback_carrier_on(netif); -+ if (netif_running(netif->dev)) -+ __netif_up(netif); -+ rtnl_unlock(); -+ -+ return 0; -+err_hypervisor: -+ unmap_frontend_pages(netif); -+err_map: -+ free_vm_area(netif->rx_comms_area); -+err_rx: -+ free_vm_area(netif->tx_comms_area); -+ return err; -+} -+ -+void netif_disconnect(struct xen_netif *netif) -+{ -+ if (netback_carrier_ok(netif)) { -+ rtnl_lock(); -+ netback_carrier_off(netif); -+ netif_carrier_off(netif->dev); /* discard queued packets */ -+ if (netif_running(netif->dev)) -+ __netif_down(netif); -+ rtnl_unlock(); -+ netif_put(netif); -+ } -+ -+ atomic_dec(&netif->refcnt); -+ wait_event(netif->waiting_to_free, atomic_read(&netif->refcnt) == 0); -+ -+ del_timer_sync(&netif->credit_timeout); -+ del_timer_sync(&netif->tx_queue_timeout); -+ -+ if (netif->irq) -+ unbind_from_irqhandler(netif->irq, netif); -+ -+ unregister_netdev(netif->dev); -+ -+ if (netif->tx.sring) { -+ unmap_frontend_pages(netif); -+ free_vm_area(netif->tx_comms_area); -+ free_vm_area(netif->rx_comms_area); -+ } -+ -+ free_netdev(netif->dev); -+} -diff --git a/drivers/xen/netback/netback.c b/drivers/xen/netback/netback.c -new file mode 100644 -index 0000000..c448675 ---- /dev/null -+++ b/drivers/xen/netback/netback.c -@@ -0,0 +1,1902 @@ -+/****************************************************************************** -+ * drivers/xen/netback/netback.c -+ * -+ * Back-end of the driver for virtual network devices. This portion of the -+ * driver exports a 'unified' network-device interface that can be accessed -+ * by any operating system that implements a compatible front end. A -+ * reference front-end implementation can be found in: -+ * drivers/xen/netfront/netfront.c -+ * -+ * Copyright (c) 2002-2005, K A Fraser -+ * -+ * This program is free software; you can redistribute it and/or -+ * modify it under the terms of the GNU General Public License version 2 -+ * as published by the Free Software Foundation; or, when distributed -+ * separately from the Linux kernel or incorporated into other -+ * software packages, subject to the following license: -+ * -+ * Permission is hereby granted, free of charge, to any person obtaining a copy -+ * of this source file (the "Software"), to deal in the Software without -+ * restriction, including without limitation the rights to use, copy, modify, -+ * merge, publish, distribute, sublicense, and/or sell copies of the Software, -+ * and to permit persons to whom the Software is furnished to do so, subject to -+ * the following conditions: -+ * -+ * The above copyright notice and this permission notice shall be included in -+ * all copies or substantial portions of the Software. -+ * -+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS -+ * IN THE SOFTWARE. -+ */ -+ -+#include "common.h" -+ -+#include <linux/tcp.h> -+#include <linux/udp.h> -+#include <linux/kthread.h> -+ -+#include <xen/balloon.h> -+#include <xen/events.h> -+#include <xen/interface/memory.h> -+ -+#include <asm/xen/hypercall.h> -+#include <asm/xen/page.h> -+ -+/*define NETBE_DEBUG_INTERRUPT*/ -+ -+struct xen_netbk *xen_netbk; -+int xen_netbk_group_nr; -+ -+static void netif_idx_release(struct xen_netbk *netbk, u16 pending_idx); -+static void make_tx_response(struct xen_netif *netif, -+ struct xen_netif_tx_request *txp, -+ s8 st); -+static struct xen_netif_rx_response *make_rx_response(struct xen_netif *netif, -+ u16 id, -+ s8 st, -+ u16 offset, -+ u16 size, -+ u16 flags); -+ -+static void net_tx_action(unsigned long data); -+ -+static void net_rx_action(unsigned long data); -+ -+static inline unsigned long idx_to_pfn(struct xen_netbk *netbk, -+ unsigned int idx) -+{ -+ return page_to_pfn(netbk->mmap_pages[idx]); -+} -+ -+static inline unsigned long idx_to_kaddr(struct xen_netbk *netbk, -+ unsigned int idx) -+{ -+ return (unsigned long)pfn_to_kaddr(idx_to_pfn(netbk, idx)); -+} -+ -+/* extra field used in struct page */ -+static inline void netif_set_page_ext(struct page *pg, unsigned int group, -+ unsigned int idx) -+{ -+ union page_ext ext = { .e = { .group = group + 1, .idx = idx } }; -+ -+ BUILD_BUG_ON(sizeof(ext) > sizeof(ext.mapping)); -+ pg->mapping = ext.mapping; -+} -+ -+static inline int netif_get_page_ext(struct page *pg, unsigned int *_group, unsigned int *_idx) -+{ -+ union page_ext ext = { .mapping = pg->mapping }; -+ struct xen_netbk *netbk; -+ unsigned int group, idx; -+ -+ if (!PageForeign(pg)) -+ return 0; -+ -+ group = ext.e.group - 1; -+ -+ if (group < 0 || group >= xen_netbk_group_nr) -+ return 0; -+ -+ netbk = &xen_netbk[group]; -+ -+ if (netbk->mmap_pages == NULL) -+ return 0; -+ -+ idx = ext.e.idx; -+ -+ if ((idx < 0) || (idx >= MAX_PENDING_REQS)) -+ return 0; -+ -+ if (netbk->mmap_pages[idx] != pg) -+ return 0; -+ -+ *_group = group; -+ *_idx = idx; -+ -+ return 1; -+} -+ -+/* -+ * This is the amount of packet we copy rather than map, so that the -+ * guest can't fiddle with the contents of the headers while we do -+ * packet processing on them (netfilter, routing, etc). 72 is enough -+ * to cover TCP+IP headers including options. -+ */ -+#define PKT_PROT_LEN 72 -+ -+static inline pending_ring_idx_t pending_index(unsigned i) -+{ -+ return i & (MAX_PENDING_REQS-1); -+} -+ -+static inline pending_ring_idx_t nr_pending_reqs(struct xen_netbk *netbk) -+{ -+ return MAX_PENDING_REQS - -+ netbk->pending_prod + netbk->pending_cons; -+} -+ -+/* Setting this allows the safe use of this driver without netloop. */ -+static int MODPARM_copy_skb = 1; -+module_param_named(copy_skb, MODPARM_copy_skb, bool, 0); -+MODULE_PARM_DESC(copy_skb, "Copy data received from netfront without netloop"); -+ -+int netbk_copy_skb_mode; -+ -+static int MODPARM_netback_kthread; -+module_param_named(netback_kthread, MODPARM_netback_kthread, bool, 0); -+MODULE_PARM_DESC(netback_kthread, "Use kernel thread to replace tasklet"); -+ -+/* -+ * Netback bottom half handler. -+ * dir indicates the data direction. -+ * rx: 1, tx: 0. -+ */ -+static inline void xen_netbk_bh_handler(struct xen_netbk *netbk, int dir) -+{ -+ if (MODPARM_netback_kthread) -+ wake_up(&netbk->kthread.netbk_action_wq); -+ else if (dir) -+ tasklet_schedule(&netbk->tasklet.net_rx_tasklet); -+ else -+ tasklet_schedule(&netbk->tasklet.net_tx_tasklet); -+} -+ -+static inline void maybe_schedule_tx_action(struct xen_netbk *netbk) -+{ -+ smp_mb(); -+ if ((nr_pending_reqs(netbk) < (MAX_PENDING_REQS/2)) && -+ !list_empty(&netbk->net_schedule_list)) -+ xen_netbk_bh_handler(netbk, 0); -+} -+ -+static struct sk_buff *netbk_copy_skb(struct sk_buff *skb) -+{ -+ struct skb_shared_info *ninfo; -+ struct sk_buff *nskb; -+ unsigned long offset; -+ int ret; -+ int len; -+ int headlen; -+ -+ BUG_ON(skb_shinfo(skb)->frag_list != NULL); -+ -+ nskb = alloc_skb(SKB_MAX_HEAD(0), GFP_ATOMIC | __GFP_NOWARN); -+ if (unlikely(!nskb)) -+ goto err; -+ -+ skb_reserve(nskb, NET_SKB_PAD + NET_IP_ALIGN); -+ headlen = skb_end_pointer(nskb) - nskb->data; -+ if (headlen > skb_headlen(skb)) -+ headlen = skb_headlen(skb); -+ ret = skb_copy_bits(skb, 0, __skb_put(nskb, headlen), headlen); -+ BUG_ON(ret); -+ -+ ninfo = skb_shinfo(nskb); -+ ninfo->gso_size = skb_shinfo(skb)->gso_size; -+ ninfo->gso_type = skb_shinfo(skb)->gso_type; -+ -+ offset = headlen; -+ len = skb->len - headlen; -+ -+ nskb->len = skb->len; -+ nskb->data_len = len; -+ nskb->truesize += len; -+ -+ while (len) { -+ struct page *page; -+ int copy; -+ int zero; -+ -+ if (unlikely(ninfo->nr_frags >= MAX_SKB_FRAGS)) { -+ dump_stack(); -+ goto err_free; -+ } -+ -+ copy = len >= PAGE_SIZE ? PAGE_SIZE : len; -+ zero = len >= PAGE_SIZE ? 0 : __GFP_ZERO; -+ -+ page = alloc_page(GFP_ATOMIC | __GFP_NOWARN | zero); -+ if (unlikely(!page)) -+ goto err_free; -+ -+ ret = skb_copy_bits(skb, offset, page_address(page), copy); -+ BUG_ON(ret); -+ -+ ninfo->frags[ninfo->nr_frags].page = page; -+ ninfo->frags[ninfo->nr_frags].page_offset = 0; -+ ninfo->frags[ninfo->nr_frags].size = copy; -+ ninfo->nr_frags++; -+ -+ offset += copy; -+ len -= copy; -+ } -+ -+#ifdef NET_SKBUFF_DATA_USES_OFFSET -+ offset = 0; -+#else -+ offset = nskb->data - skb->data; -+#endif -+ -+ nskb->transport_header = skb->transport_header + offset; -+ nskb->network_header = skb->network_header + offset; -+ nskb->mac_header = skb->mac_header + offset; -+ -+ return nskb; -+ -+ err_free: -+ kfree_skb(nskb); -+ err: -+ return NULL; -+} -+ -+static inline int netbk_max_required_rx_slots(struct xen_netif *netif) -+{ -+ if (netif->can_sg || netif->gso || netif->gso_prefix) -+ return MAX_SKB_FRAGS + 2; /* header + extra_info + frags */ -+ return 1; /* all in one */ -+} -+ -+static inline int netbk_queue_full(struct xen_netif *netif) -+{ -+ RING_IDX peek = netif->rx_req_cons_peek; -+ RING_IDX needed = netbk_max_required_rx_slots(netif); -+ -+ return ((netif->rx.sring->req_prod - peek) < needed) || -+ ((netif->rx.rsp_prod_pvt + NET_RX_RING_SIZE - peek) < needed); -+} -+ -+static void tx_queue_callback(unsigned long data) -+{ -+ struct xen_netif *netif = (struct xen_netif *)data; -+ if (netif_schedulable(netif)) -+ netif_wake_queue(netif->dev); -+} -+ -+/* Figure out how many ring slots we're going to need to send @skb to -+ the guest. */ -+static unsigned count_skb_slots(struct sk_buff *skb, struct xen_netif *netif) -+{ -+ unsigned count; -+ unsigned copy_off; -+ unsigned i; -+ -+ copy_off = 0; -+ count = 1; -+ -+ BUG_ON(offset_in_page(skb->data) + skb_headlen(skb) > MAX_BUFFER_OFFSET); -+ -+ copy_off = skb_headlen(skb); -+ -+ if (skb_shinfo(skb)->gso_size) -+ count++; -+ -+ for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { -+ unsigned long size = skb_shinfo(skb)->frags[i].size; -+ unsigned long bytes; -+ while (size > 0) { -+ BUG_ON(copy_off > MAX_BUFFER_OFFSET); -+ -+ /* These checks are the same as in netbk_gop_frag_copy */ -+ if (copy_off == MAX_BUFFER_OFFSET -+ || ((copy_off + size > MAX_BUFFER_OFFSET) && (size <= MAX_BUFFER_OFFSET) && copy_off)) { -+ count++; -+ copy_off = 0; -+ } -+ -+ bytes = size; -+ if (copy_off + bytes > MAX_BUFFER_OFFSET) -+ bytes = MAX_BUFFER_OFFSET - copy_off; -+ -+ copy_off += bytes; -+ size -= bytes; -+ } -+ } -+ return count; -+} -+ -+int netif_be_start_xmit(struct sk_buff *skb, struct net_device *dev) -+{ -+ struct xen_netif *netif = netdev_priv(dev); -+ struct xen_netbk *netbk; -+ -+ BUG_ON(skb->dev != dev); -+ -+ if (netif->group == -1) -+ goto drop; -+ -+ netbk = &xen_netbk[netif->group]; -+ -+ /* Drop the packet if the target domain has no receive buffers. */ -+ if (unlikely(!netif_schedulable(netif) || netbk_queue_full(netif))) -+ goto drop; -+ -+ /* -+ * XXX For now we also copy skbuffs whose head crosses a page -+ * boundary, because netbk_gop_skb can't handle them. -+ */ -+ if ((skb_headlen(skb) + offset_in_page(skb->data)) >= PAGE_SIZE) { -+ struct sk_buff *nskb = netbk_copy_skb(skb); -+ if ( unlikely(nskb == NULL) ) -+ goto drop; -+ /* Copy only the header fields we use in this driver. */ -+ nskb->dev = skb->dev; -+ nskb->ip_summed = skb->ip_summed; -+ dev_kfree_skb(skb); -+ skb = nskb; -+ } -+ -+ /* Reserve ring slots for the worst-case number of -+ * fragments. */ -+ netif->rx_req_cons_peek += count_skb_slots(skb, netif); -+ netif_get(netif); -+ -+ if (netbk_can_queue(dev) && netbk_queue_full(netif)) { -+ netif->rx.sring->req_event = netif->rx_req_cons_peek + -+ netbk_max_required_rx_slots(netif); -+ mb(); /* request notification /then/ check & stop the queue */ -+ if (netbk_queue_full(netif)) { -+ netif_stop_queue(dev); -+ /* -+ * Schedule 500ms timeout to restart the queue, thus -+ * ensuring that an inactive queue will be drained. -+ * Packets will be immediately be dropped until more -+ * receive buffers become available (see -+ * netbk_queue_full() check above). -+ */ -+ netif->tx_queue_timeout.data = (unsigned long)netif; -+ netif->tx_queue_timeout.function = tx_queue_callback; -+ mod_timer(&netif->tx_queue_timeout, jiffies + HZ/2); -+ } -+ } -+ skb_queue_tail(&netbk->rx_queue, skb); -+ -+ xen_netbk_bh_handler(netbk, 1); -+ -+ return 0; -+ -+ drop: -+ netif->stats.tx_dropped++; -+ dev_kfree_skb(skb); -+ return 0; -+} -+ -+struct netrx_pending_operations { -+ unsigned copy_prod, copy_cons; -+ unsigned meta_prod, meta_cons; -+ struct gnttab_copy *copy; -+ struct netbk_rx_meta *meta; -+ int copy_off; -+ grant_ref_t copy_gref; -+}; -+ -+/* Set up the grant operations for this fragment. If it's a flipping -+ interface, we also set up the unmap request from here. */ -+ -+static void netbk_gop_frag_copy(struct xen_netif *netif, -+ struct netrx_pending_operations *npo, -+ struct page *page, unsigned long size, -+ unsigned long offset, int head) -+{ -+ struct gnttab_copy *copy_gop; -+ struct netbk_rx_meta *meta; -+ /* -+ * These variables a used iff netif_get_page_ext returns true, -+ * in which case they are guaranteed to be initialized. -+ */ -+ unsigned int uninitialized_var(group), uninitialized_var(idx); -+ int foreign = netif_get_page_ext(page, &group, &idx); -+ unsigned long bytes; -+ -+ /* Data must not cross a page boundary. */ -+ BUG_ON(size + offset > PAGE_SIZE); -+ -+ meta = npo->meta + npo->meta_prod - 1; -+ -+ while (size > 0) { -+ BUG_ON(npo->copy_off > MAX_BUFFER_OFFSET); -+ -+ /* -+ * Move to a new receive buffer if: -+ * -+ * simple case: we have completely filled the current buffer. -+ * -+ * complex case: the current frag would overflow -+ * the current buffer but only if: -+ * (i) this frag would fit completely in the next buffer -+ * and (ii) there is already some data in the current buffer -+ * and (iii) this is not the head buffer. -+ * -+ * Where: -+ * - (i) stops us splitting a frag into two copies -+ * unless the frag is too large for a single buffer. -+ * - (ii) stops us from leaving a buffer pointlessly empty. -+ * - (iii) stops us leaving the first buffer -+ * empty. Strictly speaking this is already covered -+ * by (ii) but is explicitly checked because -+ * netfront relies on the first buffer being -+ * non-empty and can crash otherwise. -+ * -+ * This means we will effectively linearise small -+ * frags but do not needlessly split large buffers -+ * into multiple copies tend to give large frags their -+ * own buffers as before. -+ */ -+ if (npo->copy_off == MAX_BUFFER_OFFSET -+ || ((npo->copy_off + size > MAX_BUFFER_OFFSET) && (size <= MAX_BUFFER_OFFSET) && npo->copy_off && !head)) { -+ struct xen_netif_rx_request *req; -+ -+ BUG_ON(head); /* Netfront requires there to be some data in the head buffer. */ -+ /* Overflowed this request, go to the next one */ -+ req = RING_GET_REQUEST(&netif->rx, netif->rx.req_cons++); -+ meta = npo->meta + npo->meta_prod++; -+ meta->gso_size = 0; -+ meta->size = 0; -+ meta->id = req->id; -+ npo->copy_off = 0; -+ npo->copy_gref = req->gref; -+ } -+ -+ bytes = size; -+ if (npo->copy_off + bytes > MAX_BUFFER_OFFSET) -+ bytes = MAX_BUFFER_OFFSET - npo->copy_off; -+ -+ copy_gop = npo->copy + npo->copy_prod++; -+ copy_gop->flags = GNTCOPY_dest_gref; -+ if (foreign) { -+ struct xen_netbk *netbk = &xen_netbk[group]; -+ struct pending_tx_info *src_pend; -+ -+ src_pend = &netbk->pending_tx_info[idx]; -+ -+ copy_gop->source.domid = src_pend->netif->domid; -+ copy_gop->source.u.ref = src_pend->req.gref; -+ copy_gop->flags |= GNTCOPY_source_gref; -+ } else { -+ copy_gop->source.domid = DOMID_SELF; -+ copy_gop->source.u.gmfn = virt_to_mfn(page_address(page)); -+ } -+ copy_gop->source.offset = offset; -+ copy_gop->dest.domid = netif->domid; -+ -+ copy_gop->dest.offset = npo->copy_off; -+ copy_gop->dest.u.ref = npo->copy_gref; -+ copy_gop->len = bytes; -+ -+ npo->copy_off += bytes; -+ meta->size += bytes; -+ -+ offset += bytes; -+ size -= bytes; -+ head = 0; /* Must be something in this buffer now */ -+ } -+} -+ -+/* Prepare an SKB to be transmitted to the frontend. This is -+ responsible for allocating grant operations, meta structures, etc. -+ It returns the number of meta structures consumed. The number of -+ ring slots used is always equal to the number of meta slots used -+ plus the number of GSO descriptors used. Currently, we use either -+ zero GSO descriptors (for non-GSO packets) or one descriptor (for -+ frontend-side LRO). */ -+static int netbk_gop_skb(struct sk_buff *skb, -+ struct netrx_pending_operations *npo) -+{ -+ struct xen_netif *netif = netdev_priv(skb->dev); -+ int nr_frags = skb_shinfo(skb)->nr_frags; -+ int i; -+ struct xen_netif_rx_request *req; -+ struct netbk_rx_meta *meta; -+ int old_meta_prod; -+ -+ old_meta_prod = npo->meta_prod; -+ -+ /* Set up a GSO prefix descriptor, if necessary */ -+ if (skb_shinfo(skb)->gso_size && netif->gso_prefix) { -+ req = RING_GET_REQUEST(&netif->rx, netif->rx.req_cons++); -+ meta = npo->meta + npo->meta_prod++; -+ meta->gso_size = skb_shinfo(skb)->gso_size; -+ meta->size = 0; -+ meta->id = req->id; -+ } -+ -+ req = RING_GET_REQUEST(&netif->rx, netif->rx.req_cons++); -+ meta = npo->meta + npo->meta_prod++; -+ -+ if (!netif->gso_prefix) -+ meta->gso_size = skb_shinfo(skb)->gso_size; -+ else -+ meta->gso_size = 0; -+ -+ meta->size = 0; -+ meta->id = req->id; -+ npo->copy_off = 0; -+ npo->copy_gref = req->gref; -+ -+ netbk_gop_frag_copy(netif, -+ npo, virt_to_page(skb->data), -+ skb_headlen(skb), -+ offset_in_page(skb->data), 1); -+ -+ /* Leave a gap for the GSO descriptor. */ -+ if (skb_shinfo(skb)->gso_size && !netif->gso_prefix) -+ netif->rx.req_cons++; -+ -+ for (i = 0; i < nr_frags; i++) { -+ netbk_gop_frag_copy(netif, npo, -+ skb_shinfo(skb)->frags[i].page, -+ skb_shinfo(skb)->frags[i].size, -+ skb_shinfo(skb)->frags[i].page_offset, -+ 0); -+ } -+ -+ return npo->meta_prod - old_meta_prod; -+} -+ -+/* This is a twin to netbk_gop_skb. Assume that netbk_gop_skb was -+ used to set up the operations on the top of -+ netrx_pending_operations, which have since been done. Check that -+ they didn't give any errors and advance over them. */ -+static int netbk_check_gop(int nr_meta_slots, domid_t domid, -+ struct netrx_pending_operations *npo) -+{ -+ struct gnttab_copy *copy_op; -+ int status = NETIF_RSP_OKAY; -+ int i; -+ -+ for (i = 0; i < nr_meta_slots; i++) { -+ copy_op = npo->copy + npo->copy_cons++; -+ if (copy_op->status != GNTST_okay) { -+ DPRINTK("Bad status %d from copy to DOM%d.\n", -+ copy_op->status, domid); -+ status = NETIF_RSP_ERROR; -+ } -+ } -+ -+ return status; -+} -+ -+static void netbk_add_frag_responses(struct xen_netif *netif, int status, -+ struct netbk_rx_meta *meta, -+ int nr_meta_slots) -+{ -+ int i; -+ unsigned long offset; -+ -+ for (i = 0; i < nr_meta_slots; i++) { -+ int flags; -+ if (i == nr_meta_slots - 1) -+ flags = 0; -+ else -+ flags = NETRXF_more_data; -+ -+ offset = 0; -+ make_rx_response(netif, meta[i].id, status, offset, -+ meta[i].size, flags); -+ } -+} -+ -+struct skb_cb_overlay { -+ int meta_slots_used; -+}; -+ -+static void net_rx_action(unsigned long data) -+{ -+ struct xen_netif *netif = NULL; -+ struct xen_netbk *netbk = (struct xen_netbk *)data; -+ s8 status; -+ u16 irq, flags; -+ struct xen_netif_rx_response *resp; -+ struct sk_buff_head rxq; -+ struct sk_buff *skb; -+ int notify_nr = 0; -+ int ret; -+ int nr_frags; -+ int count; -+ unsigned long offset; -+ struct skb_cb_overlay *sco; -+ -+ struct netrx_pending_operations npo = { -+ .copy = netbk->grant_copy_op, -+ .meta = netbk->meta, -+ }; -+ -+ skb_queue_head_init(&rxq); -+ -+ count = 0; -+ -+ while ((skb = skb_dequeue(&netbk->rx_queue)) != NULL) { -+ netif = netdev_priv(skb->dev); -+ nr_frags = skb_shinfo(skb)->nr_frags; -+ -+ sco = (struct skb_cb_overlay *)skb->cb; -+ sco->meta_slots_used = netbk_gop_skb(skb, &npo); -+ -+ count += nr_frags + 1; -+ -+ __skb_queue_tail(&rxq, skb); -+ -+ /* Filled the batch queue? */ -+ if (count + MAX_SKB_FRAGS >= NET_RX_RING_SIZE) -+ break; -+ } -+ -+ BUG_ON(npo.meta_prod > ARRAY_SIZE(netbk->meta)); -+ -+ if (!npo.copy_prod) -+ return; -+ -+ BUG_ON(npo.copy_prod > ARRAY_SIZE(netbk->grant_copy_op)); -+ ret = HYPERVISOR_grant_table_op(GNTTABOP_copy, &netbk->grant_copy_op, -+ npo.copy_prod); -+ BUG_ON(ret != 0); -+ -+ while ((skb = __skb_dequeue(&rxq)) != NULL) { -+ sco = (struct skb_cb_overlay *)skb->cb; -+ -+ netif = netdev_priv(skb->dev); -+ -+ if (netbk->meta[npo.meta_cons].gso_size && netif->gso_prefix) { -+ resp = RING_GET_RESPONSE(&netif->rx, -+ netif->rx.rsp_prod_pvt++); -+ -+ resp->flags = NETRXF_gso_prefix | NETRXF_more_data; -+ -+ resp->offset = netbk->meta[npo.meta_cons].gso_size; -+ resp->id = netbk->meta[npo.meta_cons].id; -+ resp->status = sco->meta_slots_used; -+ -+ npo.meta_cons++; -+ sco->meta_slots_used--; -+ } -+ -+ -+ netif->stats.tx_bytes += skb->len; -+ netif->stats.tx_packets++; -+ -+ status = netbk_check_gop(sco->meta_slots_used, -+ netif->domid, &npo); -+ -+ if (sco->meta_slots_used == 1) -+ flags = 0; -+ else -+ flags = NETRXF_more_data; -+ -+ if (skb->ip_summed == CHECKSUM_PARTIAL) /* local packet? */ -+ flags |= NETRXF_csum_blank | NETRXF_data_validated; -+ else if (skb->ip_summed == CHECKSUM_UNNECESSARY) -+ /* remote but checksummed. */ -+ flags |= NETRXF_data_validated; -+ -+ offset = 0; -+ resp = make_rx_response(netif, netbk->meta[npo.meta_cons].id, -+ status, offset, -+ netbk->meta[npo.meta_cons].size, -+ flags); -+ -+ if (netbk->meta[npo.meta_cons].gso_size && !netif->gso_prefix) { -+ struct xen_netif_extra_info *gso = -+ (struct xen_netif_extra_info *) -+ RING_GET_RESPONSE(&netif->rx, -+ netif->rx.rsp_prod_pvt++); -+ -+ resp->flags |= NETRXF_extra_info; -+ -+ gso->u.gso.size = netbk->meta[npo.meta_cons].gso_size; -+ gso->u.gso.type = XEN_NETIF_GSO_TYPE_TCPV4; -+ gso->u.gso.pad = 0; -+ gso->u.gso.features = 0; -+ -+ gso->type = XEN_NETIF_EXTRA_TYPE_GSO; -+ gso->flags = 0; -+ } -+ -+ if (sco->meta_slots_used > 1) { -+ netbk_add_frag_responses(netif, status, -+ netbk->meta + npo.meta_cons + 1, -+ sco->meta_slots_used - 1); -+ } -+ -+ RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&netif->rx, ret); -+ irq = netif->irq; -+ if (ret && !netbk->rx_notify[irq] && -+ (netif->smart_poll != 1)) { -+ netbk->rx_notify[irq] = 1; -+ netbk->notify_list[notify_nr++] = irq; -+ } -+ -+ if (netif_queue_stopped(netif->dev) && -+ netif_schedulable(netif) && -+ !netbk_queue_full(netif)) -+ netif_wake_queue(netif->dev); -+ -+ /* -+ * netfront_smartpoll_active indicates whether -+ * netfront timer is active. -+ */ -+ if ((netif->smart_poll == 1) && -+ !(netif->rx.sring->private.netif.smartpoll_active)) { -+ notify_remote_via_irq(irq); -+ netif->rx.sring->private.netif.smartpoll_active = 1; -+ } -+ -+ netif_put(netif); -+ npo.meta_cons += sco->meta_slots_used; -+ dev_kfree_skb(skb); -+ } -+ -+ while (notify_nr != 0) { -+ irq = netbk->notify_list[--notify_nr]; -+ netbk->rx_notify[irq] = 0; -+ notify_remote_via_irq(irq); -+ } -+ -+ /* More work to do? */ -+ if (!skb_queue_empty(&netbk->rx_queue) && -+ !timer_pending(&netbk->net_timer)) -+ xen_netbk_bh_handler(netbk, 1); -+} -+ -+static void net_alarm(unsigned long data) -+{ -+ struct xen_netbk *netbk = (struct xen_netbk *)data; -+ xen_netbk_bh_handler(netbk, 1); -+} -+ -+static void netbk_tx_pending_timeout(unsigned long data) -+{ -+ struct xen_netbk *netbk = (struct xen_netbk *)data; -+ xen_netbk_bh_handler(netbk, 0); -+} -+ -+struct net_device_stats *netif_be_get_stats(struct net_device *dev) -+{ -+ struct xen_netif *netif = netdev_priv(dev); -+ return &netif->stats; -+} -+ -+static int __on_net_schedule_list(struct xen_netif *netif) -+{ -+ return !list_empty(&netif->list); -+} -+ -+/* Must be called with net_schedule_list_lock held */ -+static void remove_from_net_schedule_list(struct xen_netif *netif) -+{ -+ if (likely(__on_net_schedule_list(netif))) { -+ list_del_init(&netif->list); -+ netif_put(netif); -+ } -+} -+ -+static struct xen_netif *poll_net_schedule_list(struct xen_netbk *netbk) -+{ -+ struct xen_netif *netif = NULL; -+ -+ spin_lock_irq(&netbk->net_schedule_list_lock); -+ if (list_empty(&netbk->net_schedule_list)) -+ goto out; -+ -+ netif = list_first_entry(&netbk->net_schedule_list, -+ struct xen_netif, list); -+ if (!netif) -+ goto out; -+ -+ netif_get(netif); -+ -+ remove_from_net_schedule_list(netif); -+out: -+ spin_unlock_irq(&netbk->net_schedule_list_lock); -+ return netif; -+} -+ -+static void add_to_net_schedule_list_tail(struct xen_netif *netif) -+{ -+ unsigned long flags; -+ -+ struct xen_netbk *netbk = &xen_netbk[netif->group]; -+ if (__on_net_schedule_list(netif)) -+ return; -+ -+ spin_lock_irqsave(&netbk->net_schedule_list_lock, flags); -+ if (!__on_net_schedule_list(netif) && -+ likely(netif_schedulable(netif))) { -+ list_add_tail(&netif->list, &netbk->net_schedule_list); -+ netif_get(netif); -+ } -+ spin_unlock_irqrestore(&netbk->net_schedule_list_lock, flags); -+} -+ -+void netif_schedule_work(struct xen_netif *netif) -+{ -+ struct xen_netbk *netbk = &xen_netbk[netif->group]; -+ int more_to_do; -+ -+ RING_FINAL_CHECK_FOR_REQUESTS(&netif->tx, more_to_do); -+ -+ if (more_to_do) { -+ add_to_net_schedule_list_tail(netif); -+ maybe_schedule_tx_action(netbk); -+ } -+} -+ -+void netif_deschedule_work(struct xen_netif *netif) -+{ -+ struct xen_netbk *netbk = &xen_netbk[netif->group]; -+ spin_lock_irq(&netbk->net_schedule_list_lock); -+ remove_from_net_schedule_list(netif); -+ spin_unlock_irq(&netbk->net_schedule_list_lock); -+} -+ -+ -+static void tx_add_credit(struct xen_netif *netif) -+{ -+ unsigned long max_burst, max_credit; -+ -+ /* -+ * Allow a burst big enough to transmit a jumbo packet of up to 128kB. -+ * Otherwise the interface can seize up due to insufficient credit. -+ */ -+ max_burst = RING_GET_REQUEST(&netif->tx, netif->tx.req_cons)->size; -+ max_burst = min(max_burst, 131072UL); -+ max_burst = max(max_burst, netif->credit_bytes); -+ -+ /* Take care that adding a new chunk of credit doesn't wrap to zero. */ -+ max_credit = netif->remaining_credit + netif->credit_bytes; -+ if (max_credit < netif->remaining_credit) -+ max_credit = ULONG_MAX; /* wrapped: clamp to ULONG_MAX */ -+ -+ netif->remaining_credit = min(max_credit, max_burst); -+} -+ -+static void tx_credit_callback(unsigned long data) -+{ -+ struct xen_netif *netif = (struct xen_netif *)data; -+ tx_add_credit(netif); -+ netif_schedule_work(netif); -+} -+ -+static inline int copy_pending_req(struct xen_netbk *netbk, -+ pending_ring_idx_t pending_idx) -+{ -+ return gnttab_copy_grant_page( -+ netbk->grant_tx_handle[pending_idx], -+ &netbk->mmap_pages[pending_idx]); -+} -+ -+static inline void net_tx_action_dealloc(struct xen_netbk *netbk) -+{ -+ struct netbk_tx_pending_inuse *inuse, *n; -+ struct gnttab_unmap_grant_ref *gop; -+ u16 pending_idx; -+ pending_ring_idx_t dc, dp; -+ struct xen_netif *netif; -+ int ret; -+ LIST_HEAD(list); -+ -+ dc = netbk->dealloc_cons; -+ gop = netbk->tx_unmap_ops; -+ -+ /* -+ * Free up any grants we have finished using -+ */ -+ do { -+ dp = netbk->dealloc_prod; -+ -+ /* Ensure we see all indices enqueued by netif_idx_release(). */ -+ smp_rmb(); -+ -+ while (dc != dp) { -+ unsigned long pfn; -+ struct netbk_tx_pending_inuse *pending_inuse = -+ netbk->pending_inuse; -+ -+ pending_idx = netbk->dealloc_ring[pending_index(dc++)]; -+ list_move_tail(&pending_inuse[pending_idx].list, &list); -+ -+ pfn = idx_to_pfn(netbk, pending_idx); -+ /* Already unmapped? */ -+ if (!phys_to_machine_mapping_valid(pfn)) -+ continue; -+ -+ gnttab_set_unmap_op(gop, -+ idx_to_kaddr(netbk, pending_idx), -+ GNTMAP_host_map, -+ netbk->grant_tx_handle[pending_idx]); -+ gop++; -+ } -+ -+ if (netbk_copy_skb_mode != NETBK_DELAYED_COPY_SKB || -+ list_empty(&netbk->pending_inuse_head)) -+ break; -+ -+ /* Copy any entries that have been pending for too long. */ -+ list_for_each_entry_safe(inuse, n, -+ &netbk->pending_inuse_head, list) { -+ struct pending_tx_info *pending_tx_info; -+ pending_tx_info = netbk->pending_tx_info; -+ -+ if (time_after(inuse->alloc_time + HZ / 2, jiffies)) -+ break; -+ -+ pending_idx = inuse - netbk->pending_inuse; -+ -+ pending_tx_info[pending_idx].netif->nr_copied_skbs++; -+ -+ switch (copy_pending_req(netbk, pending_idx)) { -+ case 0: -+ list_move_tail(&inuse->list, &list); -+ continue; -+ case -EBUSY: -+ list_del_init(&inuse->list); -+ continue; -+ case -ENOENT: -+ continue; -+ } -+ -+ break; -+ } -+ } while (dp != netbk->dealloc_prod); -+ -+ netbk->dealloc_cons = dc; -+ -+ ret = HYPERVISOR_grant_table_op( -+ GNTTABOP_unmap_grant_ref, netbk->tx_unmap_ops, -+ gop - netbk->tx_unmap_ops); -+ BUG_ON(ret); -+ -+ list_for_each_entry_safe(inuse, n, &list, list) { -+ struct pending_tx_info *pending_tx_info; -+ pending_ring_idx_t index; -+ -+ pending_tx_info = netbk->pending_tx_info; -+ pending_idx = inuse - netbk->pending_inuse; -+ -+ netif = pending_tx_info[pending_idx].netif; -+ -+ make_tx_response(netif, &pending_tx_info[pending_idx].req, -+ NETIF_RSP_OKAY); -+ -+ /* Ready for next use. */ -+ gnttab_reset_grant_page(netbk->mmap_pages[pending_idx]); -+ -+ index = pending_index(netbk->pending_prod++); -+ netbk->pending_ring[index] = pending_idx; -+ -+ netif_put(netif); -+ -+ list_del_init(&inuse->list); -+ } -+} -+ -+static void netbk_tx_err(struct xen_netif *netif, -+ struct xen_netif_tx_request *txp, RING_IDX end) -+{ -+ RING_IDX cons = netif->tx.req_cons; -+ -+ do { -+ make_tx_response(netif, txp, NETIF_RSP_ERROR); -+ if (cons >= end) -+ break; -+ txp = RING_GET_REQUEST(&netif->tx, cons++); -+ } while (1); -+ netif->tx.req_cons = cons; -+ netif_schedule_work(netif); -+ netif_put(netif); -+} -+ -+static int netbk_count_requests(struct xen_netif *netif, -+ struct xen_netif_tx_request *first, -+ struct xen_netif_tx_request *txp, int work_to_do) -+{ -+ RING_IDX cons = netif->tx.req_cons; -+ int frags = 0; -+ -+ if (!(first->flags & NETTXF_more_data)) -+ return 0; -+ -+ do { -+ if (frags >= work_to_do) { -+ DPRINTK("Need more frags\n"); -+ return -frags; -+ } -+ -+ if (unlikely(frags >= MAX_SKB_FRAGS)) { -+ DPRINTK("Too many frags\n"); -+ return -frags; -+ } -+ -+ memcpy(txp, RING_GET_REQUEST(&netif->tx, cons + frags), -+ sizeof(*txp)); -+ if (txp->size > first->size) { -+ DPRINTK("Frags galore\n"); -+ return -frags; -+ } -+ -+ first->size -= txp->size; -+ frags++; -+ -+ if (unlikely((txp->offset + txp->size) > PAGE_SIZE)) { -+ DPRINTK("txp->offset: %x, size: %u\n", -+ txp->offset, txp->size); -+ return -frags; -+ } -+ } while ((txp++)->flags & NETTXF_more_data); -+ -+ return frags; -+} -+ -+static struct gnttab_map_grant_ref *netbk_get_requests(struct xen_netbk *netbk, -+ struct xen_netif *netif, -+ struct sk_buff *skb, -+ struct xen_netif_tx_request *txp, -+ struct gnttab_map_grant_ref *mop) -+{ -+ struct skb_shared_info *shinfo = skb_shinfo(skb); -+ skb_frag_t *frags = shinfo->frags; -+ unsigned long pending_idx = *((u16 *)skb->data); -+ int i, start; -+ -+ /* Skip first skb fragment if it is on same page as header fragment. */ -+ start = ((unsigned long)shinfo->frags[0].page == pending_idx); -+ -+ for (i = start; i < shinfo->nr_frags; i++, txp++) { -+ pending_ring_idx_t index; -+ struct pending_tx_info *pending_tx_info = -+ netbk->pending_tx_info; -+ -+ index = pending_index(netbk->pending_cons++); -+ pending_idx = netbk->pending_ring[index]; -+ -+ gnttab_set_map_op(mop++, idx_to_kaddr(netbk, pending_idx), -+ GNTMAP_host_map | GNTMAP_readonly, -+ txp->gref, netif->domid); -+ -+ memcpy(&pending_tx_info[pending_idx].req, txp, sizeof(*txp)); -+ netif_get(netif); -+ pending_tx_info[pending_idx].netif = netif; -+ frags[i].page = (void *)pending_idx; -+ } -+ -+ return mop; -+} -+ -+static int netbk_tx_check_mop(struct xen_netbk *netbk, -+ struct sk_buff *skb, -+ struct gnttab_map_grant_ref **mopp) -+{ -+ struct gnttab_map_grant_ref *mop = *mopp; -+ int pending_idx = *((u16 *)skb->data); -+ struct pending_tx_info *pending_tx_info = netbk->pending_tx_info; -+ struct xen_netif *netif = pending_tx_info[pending_idx].netif; -+ struct xen_netif_tx_request *txp; -+ struct skb_shared_info *shinfo = skb_shinfo(skb); -+ int nr_frags = shinfo->nr_frags; -+ int i, err, start; -+ -+ /* Check status of header. */ -+ err = mop->status; -+ if (unlikely(err)) { -+ pending_ring_idx_t index; -+ index = pending_index(netbk->pending_prod++); -+ txp = &pending_tx_info[pending_idx].req; -+ make_tx_response(netif, txp, NETIF_RSP_ERROR); -+ netbk->pending_ring[index] = pending_idx; -+ netif_put(netif); -+ } else { -+ set_phys_to_machine( -+ __pa(idx_to_kaddr(netbk, pending_idx)) >> PAGE_SHIFT, -+ FOREIGN_FRAME(mop->dev_bus_addr >> PAGE_SHIFT)); -+ netbk->grant_tx_handle[pending_idx] = mop->handle; -+ } -+ -+ /* Skip first skb fragment if it is on same page as header fragment. */ -+ start = ((unsigned long)shinfo->frags[0].page == pending_idx); -+ -+ for (i = start; i < nr_frags; i++) { -+ int j, newerr; -+ pending_ring_idx_t index; -+ -+ pending_idx = (unsigned long)shinfo->frags[i].page; -+ -+ /* Check error status: if okay then remember grant handle. */ -+ newerr = (++mop)->status; -+ if (likely(!newerr)) { -+ unsigned long addr; -+ addr = idx_to_kaddr(netbk, pending_idx); -+ set_phys_to_machine( -+ __pa(addr)>>PAGE_SHIFT, -+ FOREIGN_FRAME(mop->dev_bus_addr>>PAGE_SHIFT)); -+ netbk->grant_tx_handle[pending_idx] = mop->handle; -+ /* Had a previous error? Invalidate this fragment. */ -+ if (unlikely(err)) -+ netif_idx_release(netbk, pending_idx); -+ continue; -+ } -+ -+ /* Error on this fragment: respond to client with an error. */ -+ txp = &netbk->pending_tx_info[pending_idx].req; -+ make_tx_response(netif, txp, NETIF_RSP_ERROR); -+ index = pending_index(netbk->pending_prod++); -+ netbk->pending_ring[index] = pending_idx; -+ netif_put(netif); -+ -+ /* Not the first error? Preceding frags already invalidated. */ -+ if (err) -+ continue; -+ -+ /* First error: invalidate header and preceding fragments. */ -+ pending_idx = *((u16 *)skb->data); -+ netif_idx_release(netbk, pending_idx); -+ for (j = start; j < i; j++) { -+ pending_idx = (unsigned long)shinfo->frags[i].page; -+ netif_idx_release(netbk, pending_idx); -+ } -+ -+ /* Remember the error: invalidate all subsequent fragments. */ -+ err = newerr; -+ } -+ -+ *mopp = mop + 1; -+ return err; -+} -+ -+static void netbk_fill_frags(struct xen_netbk *netbk, struct sk_buff *skb) -+{ -+ struct skb_shared_info *shinfo = skb_shinfo(skb); -+ int nr_frags = shinfo->nr_frags; -+ int i; -+ -+ for (i = 0; i < nr_frags; i++) { -+ skb_frag_t *frag = shinfo->frags + i; -+ struct xen_netif_tx_request *txp; -+ unsigned long pending_idx; -+ -+ pending_idx = (unsigned long)frag->page; -+ -+ netbk->pending_inuse[pending_idx].alloc_time = jiffies; -+ list_add_tail(&netbk->pending_inuse[pending_idx].list, -+ &netbk->pending_inuse_head); -+ -+ txp = &netbk->pending_tx_info[pending_idx].req; -+ frag->page = virt_to_page(idx_to_kaddr(netbk, pending_idx)); -+ frag->size = txp->size; -+ frag->page_offset = txp->offset; -+ -+ skb->len += txp->size; -+ skb->data_len += txp->size; -+ skb->truesize += txp->size; -+ } -+} -+ -+int netbk_get_extras(struct xen_netif *netif, struct xen_netif_extra_info *extras, -+ int work_to_do) -+{ -+ struct xen_netif_extra_info extra; -+ RING_IDX cons = netif->tx.req_cons; -+ -+ do { -+ if (unlikely(work_to_do-- <= 0)) { -+ DPRINTK("Missing extra info\n"); -+ return -EBADR; -+ } -+ -+ memcpy(&extra, RING_GET_REQUEST(&netif->tx, cons), -+ sizeof(extra)); -+ if (unlikely(!extra.type || -+ extra.type >= XEN_NETIF_EXTRA_TYPE_MAX)) { -+ netif->tx.req_cons = ++cons; -+ DPRINTK("Invalid extra type: %d\n", extra.type); -+ return -EINVAL; -+ } -+ -+ memcpy(&extras[extra.type - 1], &extra, sizeof(extra)); -+ netif->tx.req_cons = ++cons; -+ } while (extra.flags & XEN_NETIF_EXTRA_FLAG_MORE); -+ -+ return work_to_do; -+} -+ -+static int netbk_set_skb_gso(struct sk_buff *skb, struct xen_netif_extra_info *gso) -+{ -+ if (!gso->u.gso.size) { -+ DPRINTK("GSO size must not be zero.\n"); -+ return -EINVAL; -+ } -+ -+ /* Currently only TCPv4 S.O. is supported. */ -+ if (gso->u.gso.type != XEN_NETIF_GSO_TYPE_TCPV4) { -+ DPRINTK("Bad GSO type %d.\n", gso->u.gso.type); -+ return -EINVAL; -+ } -+ -+ skb_shinfo(skb)->gso_size = gso->u.gso.size; -+ skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4; -+ -+ /* Header must be checked, and gso_segs computed. */ -+ skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY; -+ skb_shinfo(skb)->gso_segs = 0; -+ -+ return 0; -+} -+ -+static int skb_checksum_setup(struct sk_buff *skb) -+{ -+ struct iphdr *iph; -+ unsigned char *th; -+ int err = -EPROTO; -+ -+ if (skb->protocol != htons(ETH_P_IP)) -+ goto out; -+ -+ iph = (void *)skb->data; -+ th = skb->data + 4 * iph->ihl; -+ if (th >= skb_tail_pointer(skb)) -+ goto out; -+ -+ skb->csum_start = th - skb->head; -+ switch (iph->protocol) { -+ case IPPROTO_TCP: -+ skb->csum_offset = offsetof(struct tcphdr, check); -+ break; -+ case IPPROTO_UDP: -+ skb->csum_offset = offsetof(struct udphdr, check); -+ break; -+ default: -+ if (net_ratelimit()) -+ printk(KERN_ERR "Attempting to checksum a non-" -+ "TCP/UDP packet, dropping a protocol" -+ " %d packet", iph->protocol); -+ goto out; -+ } -+ -+ if ((th + skb->csum_offset + 2) > skb_tail_pointer(skb)) -+ goto out; -+ -+ err = 0; -+ -+out: -+ return err; -+} -+ -+static bool tx_credit_exceeded(struct xen_netif *netif, unsigned size) -+{ -+ unsigned long now = jiffies; -+ unsigned long next_credit = -+ netif->credit_timeout.expires + -+ msecs_to_jiffies(netif->credit_usec / 1000); -+ -+ /* Timer could already be pending in rare cases. */ -+ if (timer_pending(&netif->credit_timeout)) -+ return true; -+ -+ /* Passed the point where we can replenish credit? */ -+ if (time_after_eq(now, next_credit)) { -+ netif->credit_timeout.expires = now; -+ tx_add_credit(netif); -+ } -+ -+ /* Still too big to send right now? Set a callback. */ -+ if (size > netif->remaining_credit) { -+ netif->credit_timeout.data = -+ (unsigned long)netif; -+ netif->credit_timeout.function = -+ tx_credit_callback; -+ mod_timer(&netif->credit_timeout, -+ next_credit); -+ -+ return true; -+ } -+ -+ return false; -+} -+ -+static unsigned net_tx_build_mops(struct xen_netbk *netbk) -+{ -+ struct gnttab_map_grant_ref *mop; -+ struct sk_buff *skb; -+ int ret; -+ -+ mop = netbk->tx_map_ops; -+ while (((nr_pending_reqs(netbk) + MAX_SKB_FRAGS) < MAX_PENDING_REQS) && -+ !list_empty(&netbk->net_schedule_list)) { -+ struct xen_netif *netif; -+ struct xen_netif_tx_request txreq; -+ struct xen_netif_tx_request txfrags[MAX_SKB_FRAGS]; -+ struct xen_netif_extra_info extras[XEN_NETIF_EXTRA_TYPE_MAX - 1]; -+ u16 pending_idx; -+ RING_IDX idx; -+ int work_to_do; -+ unsigned int data_len; -+ pending_ring_idx_t index; -+ -+ /* Get a netif from the list with work to do. */ -+ netif = poll_net_schedule_list(netbk); -+ if (!netif) -+ continue; -+ -+ RING_FINAL_CHECK_FOR_REQUESTS(&netif->tx, work_to_do); -+ if (!work_to_do) { -+ netif_put(netif); -+ continue; -+ } -+ -+ idx = netif->tx.req_cons; -+ rmb(); /* Ensure that we see the request before we copy it. */ -+ memcpy(&txreq, RING_GET_REQUEST(&netif->tx, idx), sizeof(txreq)); -+ -+ /* Credit-based scheduling. */ -+ if (txreq.size > netif->remaining_credit && -+ tx_credit_exceeded(netif, txreq.size)) { -+ netif_put(netif); -+ continue; -+ } -+ -+ netif->remaining_credit -= txreq.size; -+ -+ work_to_do--; -+ netif->tx.req_cons = ++idx; -+ -+ memset(extras, 0, sizeof(extras)); -+ if (txreq.flags & NETTXF_extra_info) { -+ work_to_do = netbk_get_extras(netif, extras, -+ work_to_do); -+ idx = netif->tx.req_cons; -+ if (unlikely(work_to_do < 0)) { -+ netbk_tx_err(netif, &txreq, idx); -+ continue; -+ } -+ } -+ -+ ret = netbk_count_requests(netif, &txreq, txfrags, work_to_do); -+ if (unlikely(ret < 0)) { -+ netbk_tx_err(netif, &txreq, idx - ret); -+ continue; -+ } -+ idx += ret; -+ -+ if (unlikely(txreq.size < ETH_HLEN)) { -+ DPRINTK("Bad packet size: %d\n", txreq.size); -+ netbk_tx_err(netif, &txreq, idx); -+ continue; -+ } -+ -+ /* No crossing a page as the payload mustn't fragment. */ -+ if (unlikely((txreq.offset + txreq.size) > PAGE_SIZE)) { -+ DPRINTK("txreq.offset: %x, size: %u, end: %lu\n", -+ txreq.offset, txreq.size, -+ (txreq.offset &~PAGE_MASK) + txreq.size); -+ netbk_tx_err(netif, &txreq, idx); -+ continue; -+ } -+ -+ index = pending_index(netbk->pending_cons); -+ pending_idx = netbk->pending_ring[index]; -+ -+ data_len = (txreq.size > PKT_PROT_LEN && -+ ret < MAX_SKB_FRAGS) ? -+ PKT_PROT_LEN : txreq.size; -+ -+ skb = alloc_skb(data_len + NET_SKB_PAD + NET_IP_ALIGN, -+ GFP_ATOMIC | __GFP_NOWARN); -+ if (unlikely(skb == NULL)) { -+ DPRINTK("Can't allocate a skb in start_xmit.\n"); -+ netbk_tx_err(netif, &txreq, idx); -+ break; -+ } -+ -+ /* Packets passed to netif_rx() must have some headroom. */ -+ skb_reserve(skb, NET_SKB_PAD + NET_IP_ALIGN); -+ -+ if (extras[XEN_NETIF_EXTRA_TYPE_GSO - 1].type) { -+ struct xen_netif_extra_info *gso; -+ gso = &extras[XEN_NETIF_EXTRA_TYPE_GSO - 1]; -+ -+ if (netbk_set_skb_gso(skb, gso)) { -+ kfree_skb(skb); -+ netbk_tx_err(netif, &txreq, idx); -+ continue; -+ } -+ } -+ -+ gnttab_set_map_op(mop, idx_to_kaddr(netbk, pending_idx), -+ GNTMAP_host_map | GNTMAP_readonly, -+ txreq.gref, netif->domid); -+ mop++; -+ -+ memcpy(&netbk->pending_tx_info[pending_idx].req, -+ &txreq, sizeof(txreq)); -+ netbk->pending_tx_info[pending_idx].netif = netif; -+ *((u16 *)skb->data) = pending_idx; -+ -+ __skb_put(skb, data_len); -+ -+ skb_shinfo(skb)->nr_frags = ret; -+ if (data_len < txreq.size) { -+ skb_shinfo(skb)->nr_frags++; -+ skb_shinfo(skb)->frags[0].page = -+ (void *)(unsigned long)pending_idx; -+ } else { -+ /* Discriminate from any valid pending_idx value. */ -+ skb_shinfo(skb)->frags[0].page = (void *)~0UL; -+ } -+ -+ __skb_queue_tail(&netbk->tx_queue, skb); -+ -+ netbk->pending_cons++; -+ -+ mop = netbk_get_requests(netbk, netif, skb, txfrags, mop); -+ -+ netif->tx.req_cons = idx; -+ netif_schedule_work(netif); -+ -+ if ((mop - netbk->tx_map_ops) >= ARRAY_SIZE(netbk->tx_map_ops)) -+ break; -+ } -+ -+ return mop - netbk->tx_map_ops; -+} -+ -+static void net_tx_submit(struct xen_netbk *netbk) -+{ -+ struct gnttab_map_grant_ref *mop; -+ struct sk_buff *skb; -+ -+ mop = netbk->tx_map_ops; -+ while ((skb = __skb_dequeue(&netbk->tx_queue)) != NULL) { -+ struct xen_netif_tx_request *txp; -+ struct xen_netif *netif; -+ u16 pending_idx; -+ unsigned data_len; -+ -+ pending_idx = *((u16 *)skb->data); -+ netif = netbk->pending_tx_info[pending_idx].netif; -+ txp = &netbk->pending_tx_info[pending_idx].req; -+ -+ /* Check the remap error code. */ -+ if (unlikely(netbk_tx_check_mop(netbk, skb, &mop))) { -+ DPRINTK("netback grant failed.\n"); -+ skb_shinfo(skb)->nr_frags = 0; -+ kfree_skb(skb); -+ continue; -+ } -+ -+ data_len = skb->len; -+ memcpy(skb->data, -+ (void *)(idx_to_kaddr(netbk, pending_idx)|txp->offset), -+ data_len); -+ if (data_len < txp->size) { -+ /* Append the packet payload as a fragment. */ -+ txp->offset += data_len; -+ txp->size -= data_len; -+ } else { -+ /* Schedule a response immediately. */ -+ netif_idx_release(netbk, pending_idx); -+ } -+ -+ if (txp->flags & NETTXF_csum_blank) -+ skb->ip_summed = CHECKSUM_PARTIAL; -+ else if (txp->flags & NETTXF_data_validated) -+ skb->ip_summed = CHECKSUM_UNNECESSARY; -+ -+ netbk_fill_frags(netbk, skb); -+ -+ /* -+ * If the initial fragment was < PKT_PROT_LEN then -+ * pull through some bytes from the other fragments to -+ * increase the linear region to PKT_PROT_LEN bytes. -+ */ -+ if (skb_headlen(skb) < PKT_PROT_LEN && skb_is_nonlinear(skb)) { -+ int target = min_t(int, skb->len, PKT_PROT_LEN); -+ __pskb_pull_tail(skb, target - skb_headlen(skb)); -+ } -+ -+ skb->dev = netif->dev; -+ skb->protocol = eth_type_trans(skb, skb->dev); -+ -+ netif->stats.rx_bytes += skb->len; -+ netif->stats.rx_packets++; -+ -+ if (skb->ip_summed == CHECKSUM_PARTIAL) { -+ if (skb_checksum_setup(skb)) { -+ DPRINTK("Can't setup checksum in net_tx_action\n"); -+ kfree_skb(skb); -+ continue; -+ } -+ } -+ -+ if (unlikely(netbk_copy_skb_mode == NETBK_ALWAYS_COPY_SKB) && -+ unlikely(skb_linearize(skb))) { -+ DPRINTK("Can't linearize skb in net_tx_action.\n"); -+ kfree_skb(skb); -+ continue; -+ } -+ -+ netif_rx_ni(skb); -+ netif->dev->last_rx = jiffies; -+ } -+} -+ -+/* Called after netfront has transmitted */ -+static void net_tx_action(unsigned long data) -+{ -+ struct xen_netbk *netbk = (struct xen_netbk *)data; -+ unsigned nr_mops; -+ int ret; -+ -+ net_tx_action_dealloc(netbk); -+ -+ nr_mops = net_tx_build_mops(netbk); -+ -+ if (nr_mops == 0) -+ goto out; -+ -+ ret = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, -+ netbk->tx_map_ops, nr_mops); -+ BUG_ON(ret); -+ -+ net_tx_submit(netbk); -+out: -+ if (netbk_copy_skb_mode == NETBK_DELAYED_COPY_SKB && -+ !list_empty(&netbk->pending_inuse_head)) { -+ struct netbk_tx_pending_inuse *oldest; -+ -+ oldest = list_entry(netbk->pending_inuse_head.next, -+ struct netbk_tx_pending_inuse, list); -+ mod_timer(&netbk->netbk_tx_pending_timer, -+ oldest->alloc_time + HZ); -+ } -+} -+ -+static void netif_idx_release(struct xen_netbk *netbk, u16 pending_idx) -+{ -+ static DEFINE_SPINLOCK(_lock); -+ unsigned long flags; -+ pending_ring_idx_t index; -+ -+ spin_lock_irqsave(&_lock, flags); -+ index = pending_index(netbk->dealloc_prod); -+ netbk->dealloc_ring[index] = pending_idx; -+ /* Sync with net_tx_action_dealloc: insert idx /then/ incr producer. */ -+ smp_wmb(); -+ netbk->dealloc_prod++; -+ spin_unlock_irqrestore(&_lock, flags); -+ -+ xen_netbk_bh_handler(netbk, 0); -+} -+ -+static void netif_page_release(struct page *page, unsigned int order) -+{ -+ unsigned int group, idx; -+ int foreign = netif_get_page_ext(page, &group, &idx); -+ -+ BUG_ON(!foreign); -+ BUG_ON(order); -+ -+ netif_idx_release(&xen_netbk[group], idx); -+} -+ -+irqreturn_t netif_be_int(int irq, void *dev_id) -+{ -+ struct xen_netif *netif = dev_id; -+ struct xen_netbk *netbk; -+ -+ if (netif->group == -1) -+ return IRQ_NONE; -+ -+ netbk = &xen_netbk[netif->group]; -+ -+ add_to_net_schedule_list_tail(netif); -+ maybe_schedule_tx_action(netbk); -+ -+ if (netif_schedulable(netif) && !netbk_queue_full(netif)) -+ netif_wake_queue(netif->dev); -+ -+ return IRQ_HANDLED; -+} -+ -+static void make_tx_response(struct xen_netif *netif, -+ struct xen_netif_tx_request *txp, -+ s8 st) -+{ -+ RING_IDX i = netif->tx.rsp_prod_pvt; -+ struct xen_netif_tx_response *resp; -+ int notify; -+ -+ resp = RING_GET_RESPONSE(&netif->tx, i); -+ resp->id = txp->id; -+ resp->status = st; -+ -+ if (txp->flags & NETTXF_extra_info) -+ RING_GET_RESPONSE(&netif->tx, ++i)->status = NETIF_RSP_NULL; -+ -+ netif->tx.rsp_prod_pvt = ++i; -+ RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&netif->tx, notify); -+ -+ /* -+ * netfront_smartpoll_active indicates whether netfront timer -+ * is active. -+ */ -+ if ((netif->smart_poll == 1)) { -+ if (!(netif->rx.sring->private.netif.smartpoll_active)) { -+ notify_remote_via_irq(netif->irq); -+ netif->rx.sring->private.netif.smartpoll_active = 1; -+ } -+ } else if (notify) -+ notify_remote_via_irq(netif->irq); -+} -+ -+static struct xen_netif_rx_response *make_rx_response(struct xen_netif *netif, -+ u16 id, -+ s8 st, -+ u16 offset, -+ u16 size, -+ u16 flags) -+{ -+ RING_IDX i = netif->rx.rsp_prod_pvt; -+ struct xen_netif_rx_response *resp; -+ -+ resp = RING_GET_RESPONSE(&netif->rx, i); -+ resp->offset = offset; -+ resp->flags = flags; -+ resp->id = id; -+ resp->status = (s16)size; -+ if (st < 0) -+ resp->status = (s16)st; -+ -+ netif->rx.rsp_prod_pvt = ++i; -+ -+ return resp; -+} -+ -+#ifdef NETBE_DEBUG_INTERRUPT -+static irqreturn_t netif_be_dbg(int irq, void *dev_id, struct pt_regs *regs) -+{ -+ struct list_head *ent; -+ struct xen_netif *netif; -+ int i = 0; -+ int group = 0; -+ -+ printk(KERN_ALERT "netif_schedule_list:\n"); -+ -+ for (group = 0; group < xen_netbk_group_nr; group++) { -+ struct xen_netbk *netbk = &xen_netbk[group]; -+ spin_lock_irq(&netbk->net_schedule_list_lock); -+ printk(KERN_ALERT "xen_netback group number: %d\n", group); -+ list_for_each(ent, &netbk->net_schedule_list) { -+ netif = list_entry(ent, struct xen_netif, list); -+ printk(KERN_ALERT " %d: private(rx_req_cons=%08x " -+ "rx_resp_prod=%08x\n", -+ i, netif->rx.req_cons, netif->rx.rsp_prod_pvt); -+ printk(KERN_ALERT -+ " tx_req_cons=%08x, tx_resp_prod=%08x)\n", -+ netif->tx.req_cons, netif->tx.rsp_prod_pvt); -+ printk(KERN_ALERT -+ " shared(rx_req_prod=%08x " -+ "rx_resp_prod=%08x\n", -+ netif->rx.sring->req_prod, -+ netif->rx.sring->rsp_prod); -+ printk(KERN_ALERT -+ " rx_event=%08x, tx_req_prod=%08x\n", -+ netif->rx.sring->rsp_event, -+ netif->tx.sring->req_prod); -+ printk(KERN_ALERT -+ " tx_resp_prod=%08x, tx_event=%08x)\n", -+ netif->tx.sring->rsp_prod, -+ netif->tx.sring->rsp_event); -+ i++; -+ } -+ spin_unlock_irq(&netbk->net_schedule_list_lock); -+ } -+ -+ printk(KERN_ALERT " ** End of netif_schedule_list **\n"); -+ -+ return IRQ_HANDLED; -+} -+#endif -+ -+static inline int rx_work_todo(struct xen_netbk *netbk) -+{ -+ return !skb_queue_empty(&netbk->rx_queue); -+} -+ -+static inline int tx_work_todo(struct xen_netbk *netbk) -+{ -+ if (netbk->dealloc_cons != netbk->dealloc_prod) -+ return 1; -+ -+ if (((nr_pending_reqs(netbk) + MAX_SKB_FRAGS) < MAX_PENDING_REQS) && -+ !list_empty(&netbk->net_schedule_list)) -+ return 1; -+ -+ return 0; -+} -+ -+static int netbk_action_thread(void *data) -+{ -+ struct xen_netbk *netbk = (struct xen_netbk *)data; -+ while (!kthread_should_stop()) { -+ wait_event_interruptible(netbk->kthread.netbk_action_wq, -+ rx_work_todo(netbk) -+ || tx_work_todo(netbk) -+ || kthread_should_stop()); -+ cond_resched(); -+ -+ if (kthread_should_stop()) -+ break; -+ -+ if (rx_work_todo(netbk)) -+ net_rx_action((unsigned long)netbk); -+ -+ if (tx_work_todo(netbk)) -+ net_tx_action((unsigned long)netbk); -+ } -+ -+ return 0; -+} -+ -+static int __init netback_init(void) -+{ -+ int i; -+ struct page *page; -+ int rc = 0; -+ int group; -+ -+ if (!xen_pv_domain()) -+ return -ENODEV; -+ -+ xen_netbk_group_nr = num_online_cpus(); -+ xen_netbk = vmalloc(sizeof(struct xen_netbk) * xen_netbk_group_nr); -+ if (!xen_netbk) { -+ printk(KERN_ALERT "%s: out of memory\n", __func__); -+ return -ENOMEM; -+ } -+ memset(xen_netbk, 0, sizeof(struct xen_netbk) * xen_netbk_group_nr); -+ -+ /* We can increase reservation by this much in net_rx_action(). */ -+// balloon_update_driver_allowance(NET_RX_RING_SIZE); -+ -+ for (group = 0; group < xen_netbk_group_nr; group++) { -+ struct xen_netbk *netbk = &xen_netbk[group]; -+ skb_queue_head_init(&netbk->rx_queue); -+ skb_queue_head_init(&netbk->tx_queue); -+ -+ init_timer(&netbk->net_timer); -+ netbk->net_timer.data = (unsigned long)netbk; -+ netbk->net_timer.function = net_alarm; -+ -+ init_timer(&netbk->netbk_tx_pending_timer); -+ netbk->netbk_tx_pending_timer.data = (unsigned long)netbk; -+ netbk->netbk_tx_pending_timer.function = -+ netbk_tx_pending_timeout; -+ -+ netbk->mmap_pages = -+ alloc_empty_pages_and_pagevec(MAX_PENDING_REQS); -+ if (!netbk->mmap_pages) { -+ printk(KERN_ALERT "%s: out of memory\n", __func__); -+ del_timer(&netbk->netbk_tx_pending_timer); -+ del_timer(&netbk->net_timer); -+ rc = -ENOMEM; -+ goto failed_init; -+ } -+ -+ for (i = 0; i < MAX_PENDING_REQS; i++) { -+ page = netbk->mmap_pages[i]; -+ SetPageForeign(page, netif_page_release); -+ netif_set_page_ext(page, group, i); -+ INIT_LIST_HEAD(&netbk->pending_inuse[i].list); -+ } -+ -+ netbk->pending_cons = 0; -+ netbk->pending_prod = MAX_PENDING_REQS; -+ for (i = 0; i < MAX_PENDING_REQS; i++) -+ netbk->pending_ring[i] = i; -+ -+ if (MODPARM_netback_kthread) { -+ init_waitqueue_head(&netbk->kthread.netbk_action_wq); -+ netbk->kthread.task = -+ kthread_create(netbk_action_thread, -+ (void *)netbk, -+ "netback/%u", group); -+ -+ if (!IS_ERR(netbk->kthread.task)) { -+ kthread_bind(netbk->kthread.task, group); -+ } else { -+ printk(KERN_ALERT -+ "kthread_run() fails at netback\n"); -+ free_empty_pages_and_pagevec(netbk->mmap_pages, -+ MAX_PENDING_REQS); -+ del_timer(&netbk->netbk_tx_pending_timer); -+ del_timer(&netbk->net_timer); -+ rc = PTR_ERR(netbk->kthread.task); -+ goto failed_init; -+ } -+ } else { -+ tasklet_init(&netbk->tasklet.net_tx_tasklet, -+ net_tx_action, -+ (unsigned long)netbk); -+ tasklet_init(&netbk->tasklet.net_rx_tasklet, -+ net_rx_action, -+ (unsigned long)netbk); -+ } -+ -+ INIT_LIST_HEAD(&netbk->pending_inuse_head); -+ INIT_LIST_HEAD(&netbk->net_schedule_list); -+ -+ spin_lock_init(&netbk->net_schedule_list_lock); -+ -+ atomic_set(&netbk->netfront_count, 0); -+ -+ if (MODPARM_netback_kthread) -+ wake_up_process(netbk->kthread.task); -+ } -+ -+ netbk_copy_skb_mode = NETBK_DONT_COPY_SKB; -+ if (MODPARM_copy_skb) { -+ if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_and_replace, -+ NULL, 0)) -+ netbk_copy_skb_mode = NETBK_ALWAYS_COPY_SKB; -+ else -+ netbk_copy_skb_mode = NETBK_DELAYED_COPY_SKB; -+ } -+ -+ //netif_accel_init(); -+ -+ rc = netif_xenbus_init(); -+ if (rc) -+ goto failed_init; -+ -+#ifdef NETBE_DEBUG_INTERRUPT -+ (void)bind_virq_to_irqhandler(VIRQ_DEBUG, -+ 0, -+ netif_be_dbg, -+ IRQF_SHARED, -+ "net-be-dbg", -+ &netif_be_dbg); -+#endif -+ -+ return 0; -+ -+failed_init: -+ for (i = 0; i < group; i++) { -+ struct xen_netbk *netbk = &xen_netbk[i]; -+ free_empty_pages_and_pagevec(netbk->mmap_pages, -+ MAX_PENDING_REQS); -+ del_timer(&netbk->netbk_tx_pending_timer); -+ del_timer(&netbk->net_timer); -+ if (MODPARM_netback_kthread) -+ kthread_stop(netbk->kthread.task); -+ } -+ vfree(xen_netbk); -+ return rc; -+ -+} -+ -+module_init(netback_init); -+ -+MODULE_LICENSE("Dual BSD/GPL"); -diff --git a/drivers/xen/netback/xenbus.c b/drivers/xen/netback/xenbus.c -new file mode 100644 -index 0000000..1930f64 ---- /dev/null -+++ b/drivers/xen/netback/xenbus.c -@@ -0,0 +1,518 @@ -+/* Xenbus code for netif backend -+ Copyright (C) 2005 Rusty Russell <rusty@rustcorp.com.au> -+ Copyright (C) 2005 XenSource Ltd -+ -+ This program is free software; you can redistribute it and/or modify -+ it under the terms of the GNU General Public License as published by -+ the Free Software Foundation; either version 2 of the License, or -+ (at your option) any later version. -+ -+ This program is distributed in the hope that it will be useful, -+ but WITHOUT ANY WARRANTY; without even the implied warranty of -+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ GNU General Public License for more details. -+ -+ You should have received a copy of the GNU General Public License -+ along with this program; if not, write to the Free Software -+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -+*/ -+ -+#include <stdarg.h> -+#include <linux/module.h> -+#include <xen/xenbus.h> -+#include "common.h" -+ -+#if 0 -+#undef DPRINTK -+#define DPRINTK(fmt, args...) \ -+ printk("netback/xenbus (%s:%d) " fmt ".\n", __FUNCTION__, __LINE__, ##args) -+#endif -+ -+ -+static int connect_rings(struct backend_info *); -+static void connect(struct backend_info *); -+static void backend_create_netif(struct backend_info *be); -+static void unregister_hotplug_status_watch(struct backend_info *be); -+ -+static int netback_remove(struct xenbus_device *dev) -+{ -+ struct backend_info *be = dev_get_drvdata(&dev->dev); -+ -+ //netback_remove_accelerators(be, dev); -+ -+ unregister_hotplug_status_watch(be); -+ if (be->netif) { -+ kobject_uevent(&dev->dev.kobj, KOBJ_OFFLINE); -+ xenbus_rm(XBT_NIL, dev->nodename, "hotplug-status"); -+ netif_disconnect(be->netif); -+ be->netif = NULL; -+ } -+ kfree(be); -+ dev_set_drvdata(&dev->dev, NULL); -+ return 0; -+} -+ -+ -+/** -+ * Entry point to this code when a new device is created. Allocate the basic -+ * structures and switch to InitWait. -+ */ -+static int netback_probe(struct xenbus_device *dev, -+ const struct xenbus_device_id *id) -+{ -+ const char *message; -+ struct xenbus_transaction xbt; -+ int err; -+ int sg; -+ struct backend_info *be = kzalloc(sizeof(struct backend_info), -+ GFP_KERNEL); -+ if (!be) { -+ xenbus_dev_fatal(dev, -ENOMEM, -+ "allocating backend structure"); -+ return -ENOMEM; -+ } -+ -+ be->dev = dev; -+ dev_set_drvdata(&dev->dev, be); -+ -+ sg = 1; -+ if (netbk_copy_skb_mode == NETBK_ALWAYS_COPY_SKB) -+ sg = 0; -+ -+ do { -+ err = xenbus_transaction_start(&xbt); -+ if (err) { -+ xenbus_dev_fatal(dev, err, "starting transaction"); -+ goto fail; -+ } -+ -+ err = xenbus_printf(xbt, dev->nodename, "feature-sg", "%d", sg); -+ if (err) { -+ message = "writing feature-sg"; -+ goto abort_transaction; -+ } -+ -+ err = xenbus_printf(xbt, dev->nodename, "feature-gso-tcpv4", -+ "%d", sg); -+ if (err) { -+ message = "writing feature-gso-tcpv4"; -+ goto abort_transaction; -+ } -+ -+ /* We support rx-copy path. */ -+ err = xenbus_printf(xbt, dev->nodename, -+ "feature-rx-copy", "%d", 1); -+ if (err) { -+ message = "writing feature-rx-copy"; -+ goto abort_transaction; -+ } -+ -+ /* -+ * We don't support rx-flip path (except old guests who don't -+ * grok this feature flag). -+ */ -+ err = xenbus_printf(xbt, dev->nodename, -+ "feature-rx-flip", "%d", 0); -+ if (err) { -+ message = "writing feature-rx-flip"; -+ goto abort_transaction; -+ } -+ -+ /* We support data smart poll mechanism */ -+ err = xenbus_printf(xbt, dev->nodename, -+ "feature-smart-poll", "%d", 1); -+ if (err) { -+ message = "writing feature-smart-poll"; -+ goto abort_transaction; -+ } -+ -+ err = xenbus_transaction_end(xbt, 0); -+ } while (err == -EAGAIN); -+ -+ if (err) { -+ xenbus_dev_fatal(dev, err, "completing transaction"); -+ goto fail; -+ } -+ -+ //netback_probe_accelerators(be, dev); -+ -+ err = xenbus_switch_state(dev, XenbusStateInitWait); -+ if (err) -+ goto fail; -+ -+ /* This kicks hotplug scripts, so do it immediately. */ -+ backend_create_netif(be); -+ -+ return 0; -+ -+abort_transaction: -+ xenbus_transaction_end(xbt, 1); -+ xenbus_dev_fatal(dev, err, "%s", message); -+fail: -+ DPRINTK("failed"); -+ netback_remove(dev); -+ return err; -+} -+ -+ -+/** -+ * Handle the creation of the hotplug script environment. We add the script -+ * and vif variables to the environment, for the benefit of the vif-* hotplug -+ * scripts. -+ */ -+static int netback_uevent(struct xenbus_device *xdev, struct kobj_uevent_env *env) -+{ -+ struct backend_info *be = dev_get_drvdata(&xdev->dev); -+ char *val; -+ -+ DPRINTK("netback_uevent"); -+ -+ val = xenbus_read(XBT_NIL, xdev->nodename, "script", NULL); -+ if (IS_ERR(val)) { -+ int err = PTR_ERR(val); -+ xenbus_dev_fatal(xdev, err, "reading script"); -+ return err; -+ } -+ else { -+ if (add_uevent_var(env, "script=%s", val)) { -+ kfree(val); -+ return -ENOMEM; -+ } -+ kfree(val); -+ } -+ -+ if (be && be->netif && add_uevent_var(env, "vif=%s", be->netif->dev->name)) -+ return -ENOMEM; -+ -+ return 0; -+} -+ -+ -+static void backend_create_netif(struct backend_info *be) -+{ -+ int err; -+ long handle; -+ struct xenbus_device *dev = be->dev; -+ -+ if (be->netif != NULL) -+ return; -+ -+ err = xenbus_scanf(XBT_NIL, dev->nodename, "handle", "%li", &handle); -+ if (err != 1) { -+ xenbus_dev_fatal(dev, err, "reading handle"); -+ return; -+ } -+ -+ be->netif = netif_alloc(&dev->dev, dev->otherend_id, handle); -+ if (IS_ERR(be->netif)) { -+ err = PTR_ERR(be->netif); -+ be->netif = NULL; -+ xenbus_dev_fatal(dev, err, "creating interface"); -+ return; -+ } -+ -+ kobject_uevent(&dev->dev.kobj, KOBJ_ONLINE); -+} -+ -+ -+static void disconnect_backend(struct xenbus_device *dev) -+{ -+ struct backend_info *be = dev_get_drvdata(&dev->dev); -+ -+ if (be->netif) { -+ xenbus_rm(XBT_NIL, dev->nodename, "hotplug-status"); -+ netif_disconnect(be->netif); -+ be->netif = NULL; -+ } -+} -+ -+/** -+ * Callback received when the frontend's state changes. -+ */ -+static void frontend_changed(struct xenbus_device *dev, -+ enum xenbus_state frontend_state) -+{ -+ struct backend_info *be = dev_get_drvdata(&dev->dev); -+ -+ DPRINTK("%s", xenbus_strstate(frontend_state)); -+ -+ be->frontend_state = frontend_state; -+ -+ switch (frontend_state) { -+ case XenbusStateInitialising: -+ if (dev->state == XenbusStateClosed) { -+ printk(KERN_INFO "%s: %s: prepare for reconnect\n", -+ __FUNCTION__, dev->nodename); -+ xenbus_switch_state(dev, XenbusStateInitWait); -+ } -+ break; -+ -+ case XenbusStateInitialised: -+ break; -+ -+ case XenbusStateConnected: -+ if (dev->state == XenbusStateConnected) -+ break; -+ backend_create_netif(be); -+ if (be->netif) -+ connect(be); -+ break; -+ -+ case XenbusStateClosing: -+ if (be->netif) -+ kobject_uevent(&dev->dev.kobj, KOBJ_OFFLINE); -+ disconnect_backend(dev); -+ xenbus_switch_state(dev, XenbusStateClosing); -+ break; -+ -+ case XenbusStateClosed: -+ xenbus_switch_state(dev, XenbusStateClosed); -+ if (xenbus_dev_is_online(dev)) -+ break; -+ /* fall through if not online */ -+ case XenbusStateUnknown: -+ device_unregister(&dev->dev); -+ break; -+ -+ default: -+ xenbus_dev_fatal(dev, -EINVAL, "saw state %d at frontend", -+ frontend_state); -+ break; -+ } -+} -+ -+ -+static void xen_net_read_rate(struct xenbus_device *dev, -+ unsigned long *bytes, unsigned long *usec) -+{ -+ char *s, *e; -+ unsigned long b, u; -+ char *ratestr; -+ -+ /* Default to unlimited bandwidth. */ -+ *bytes = ~0UL; -+ *usec = 0; -+ -+ ratestr = xenbus_read(XBT_NIL, dev->nodename, "rate", NULL); -+ if (IS_ERR(ratestr)) -+ return; -+ -+ s = ratestr; -+ b = simple_strtoul(s, &e, 10); -+ if ((s == e) || (*e != ',')) -+ goto fail; -+ -+ s = e + 1; -+ u = simple_strtoul(s, &e, 10); -+ if ((s == e) || (*e != '\0')) -+ goto fail; -+ -+ *bytes = b; -+ *usec = u; -+ -+ kfree(ratestr); -+ return; -+ -+ fail: -+ WPRINTK("Failed to parse network rate limit. Traffic unlimited.\n"); -+ kfree(ratestr); -+} -+ -+static int xen_net_read_mac(struct xenbus_device *dev, u8 mac[]) -+{ -+ char *s, *e, *macstr; -+ int i; -+ -+ macstr = s = xenbus_read(XBT_NIL, dev->nodename, "mac", NULL); -+ if (IS_ERR(macstr)) -+ return PTR_ERR(macstr); -+ -+ for (i = 0; i < ETH_ALEN; i++) { -+ mac[i] = simple_strtoul(s, &e, 16); -+ if ((s == e) || (*e != ((i == ETH_ALEN-1) ? '\0' : ':'))) { -+ kfree(macstr); -+ return -ENOENT; -+ } -+ s = e+1; -+ } -+ -+ kfree(macstr); -+ return 0; -+} -+ -+static void unregister_hotplug_status_watch(struct backend_info *be) -+{ -+ if (be->have_hotplug_status_watch) { -+ unregister_xenbus_watch(&be->hotplug_status_watch); -+ kfree(be->hotplug_status_watch.node); -+ } -+ be->have_hotplug_status_watch = 0; -+} -+ -+static void hotplug_status_changed(struct xenbus_watch *watch, -+ const char **vec, -+ unsigned int vec_size) -+{ -+ struct backend_info *be = container_of(watch, -+ struct backend_info, -+ hotplug_status_watch); -+ char *str; -+ unsigned int len; -+ -+ str = xenbus_read(XBT_NIL, be->dev->nodename, "hotplug-status", &len); -+ if (IS_ERR(str)) -+ return; -+ if (len == sizeof("connected")-1 && !memcmp(str, "connected", len)) { -+ xenbus_switch_state(be->dev, XenbusStateConnected); -+ /* Not interested in this watch anymore. */ -+ unregister_hotplug_status_watch(be); -+ } -+ kfree(str); -+} -+ -+static void connect(struct backend_info *be) -+{ -+ int err; -+ struct xenbus_device *dev = be->dev; -+ -+ err = connect_rings(be); -+ if (err) -+ return; -+ -+ err = xen_net_read_mac(dev, be->netif->fe_dev_addr); -+ if (err) { -+ xenbus_dev_fatal(dev, err, "parsing %s/mac", dev->nodename); -+ return; -+ } -+ -+ xen_net_read_rate(dev, &be->netif->credit_bytes, -+ &be->netif->credit_usec); -+ be->netif->remaining_credit = be->netif->credit_bytes; -+ -+ unregister_hotplug_status_watch(be); -+ err = xenbus_watch_pathfmt(dev, &be->hotplug_status_watch, -+ hotplug_status_changed, -+ "%s/%s", dev->nodename, "hotplug-status"); -+ if (err) { -+ /* Switch now, since we can't do a watch. */ -+ xenbus_switch_state(dev, XenbusStateConnected); -+ } else { -+ be->have_hotplug_status_watch = 1; -+ } -+ -+ netif_wake_queue(be->netif->dev); -+} -+ -+ -+static int connect_rings(struct backend_info *be) -+{ -+ struct xen_netif *netif = be->netif; -+ struct xenbus_device *dev = be->dev; -+ unsigned long tx_ring_ref, rx_ring_ref; -+ unsigned int evtchn, rx_copy; -+ int err; -+ int val; -+ -+ DPRINTK(""); -+ -+ err = xenbus_gather(XBT_NIL, dev->otherend, -+ "tx-ring-ref", "%lu", &tx_ring_ref, -+ "rx-ring-ref", "%lu", &rx_ring_ref, -+ "event-channel", "%u", &evtchn, NULL); -+ if (err) { -+ xenbus_dev_fatal(dev, err, -+ "reading %s/ring-ref and event-channel", -+ dev->otherend); -+ return err; -+ } -+ -+ err = xenbus_scanf(XBT_NIL, dev->otherend, "request-rx-copy", "%u", -+ &rx_copy); -+ if (err == -ENOENT) { -+ err = 0; -+ rx_copy = 0; -+ } -+ if (err < 0) { -+ xenbus_dev_fatal(dev, err, "reading %s/request-rx-copy", -+ dev->otherend); -+ return err; -+ } -+ if (!rx_copy) -+ return -EOPNOTSUPP; -+ -+ if (netif->dev->tx_queue_len != 0) { -+ if (xenbus_scanf(XBT_NIL, dev->otherend, -+ "feature-rx-notify", "%d", &val) < 0) -+ val = 0; -+ if (val) -+ netif->can_queue = 1; -+ else -+ /* Must be non-zero for pfifo_fast to work. */ -+ netif->dev->tx_queue_len = 1; -+ } -+ -+ if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-sg", -+ "%d", &val) < 0) -+ val = 0; -+ netif->can_sg = !!val; -+ -+ if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-gso-tcpv4", -+ "%d", &val) < 0) -+ val = 0; -+ netif->gso = !!val; -+ -+ if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-gso-tcpv4-prefix", -+ "%d", &val) < 0) -+ val = 0; -+ netif->gso_prefix = !!val; -+ -+ if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-no-csum-offload", -+ "%d", &val) < 0) -+ val = 0; -+ netif->csum = !val; -+ -+ if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-smart-poll", -+ "%d", &val) < 0) -+ val = 0; -+ netif->smart_poll = !!val; -+ -+ /* Set dev->features */ -+ netif_set_features(netif); -+ -+ /* Map the shared frame, irq etc. */ -+ err = netif_map(netif, tx_ring_ref, rx_ring_ref, evtchn); -+ if (err) { -+ xenbus_dev_fatal(dev, err, -+ "mapping shared-frames %lu/%lu port %u", -+ tx_ring_ref, rx_ring_ref, evtchn); -+ return err; -+ } -+ return 0; -+} -+ -+ -+/* ** Driver Registration ** */ -+ -+ -+static const struct xenbus_device_id netback_ids[] = { -+ { "vif" }, -+ { "" } -+}; -+ -+ -+static struct xenbus_driver netback = { -+ .name = "vif", -+ .owner = THIS_MODULE, -+ .ids = netback_ids, -+ .probe = netback_probe, -+ .remove = netback_remove, -+ .uevent = netback_uevent, -+ .otherend_changed = frontend_changed, -+}; -+ -+ -+int netif_xenbus_init(void) -+{ -+ printk(KERN_CRIT "registering netback\n"); -+ return xenbus_register_backend(&netback); -+} -diff --git a/drivers/xen/pci.c b/drivers/xen/pci.c -new file mode 100644 -index 0000000..ae693e7 ---- /dev/null -+++ b/drivers/xen/pci.c -@@ -0,0 +1,124 @@ -+/* -+ * Copyright (c) 2009, Intel Corporation. -+ * -+ * This program is free software; you can redistribute it and/or modify it -+ * under the terms and conditions of the GNU General Public License, -+ * version 2, as published by the Free Software Foundation. -+ * -+ * This program is distributed in the hope it will be useful, but WITHOUT -+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for -+ * more details. -+ * -+ * You should have received a copy of the GNU General Public License along with -+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple -+ * Place - Suite 330, Boston, MA 02111-1307 USA. -+ * -+ * Author: Weidong Han <weidong.han@intel.com> -+ */ -+ -+#include <linux/pci.h> -+ -+#include <xen/interface/xen.h> -+#include <xen/interface/physdev.h> -+ -+#include <asm/xen/hypervisor.h> -+#include <asm/xen/hypercall.h> -+ -+#include "../pci/pci.h" -+ -+ -+#ifdef CONFIG_PCI_IOV -+#define HANDLE_PCI_IOV 1 -+#else -+#define HANDLE_PCI_IOV 0 -+#endif -+ -+static int xen_add_device(struct device *dev) -+{ -+ int r; -+ struct pci_dev *pci_dev = to_pci_dev(dev); -+ -+ if (HANDLE_PCI_IOV && pci_dev->is_virtfn) { -+ struct physdev_manage_pci_ext manage_pci_ext = { -+ .bus = pci_dev->bus->number, -+ .devfn = pci_dev->devfn, -+ .is_virtfn = 1, -+#ifdef CONFIG_PCI_IOV -+ .physfn.bus = pci_dev->physfn->bus->number, -+ .physfn.devfn = pci_dev->physfn->devfn, -+#endif -+ }; -+ -+ r = HYPERVISOR_physdev_op(PHYSDEVOP_manage_pci_add_ext, -+ &manage_pci_ext); -+ } else if (pci_ari_enabled(pci_dev->bus) && PCI_SLOT(pci_dev->devfn)) { -+ struct physdev_manage_pci_ext manage_pci_ext = { -+ .bus = pci_dev->bus->number, -+ .devfn = pci_dev->devfn, -+ .is_extfn = 1, -+ }; -+ -+ r = HYPERVISOR_physdev_op(PHYSDEVOP_manage_pci_add_ext, -+ &manage_pci_ext); -+ } else { -+ struct physdev_manage_pci manage_pci = { -+ .bus = pci_dev->bus->number, -+ .devfn = pci_dev->devfn, -+ }; -+ -+ r = HYPERVISOR_physdev_op(PHYSDEVOP_manage_pci_add, -+ &manage_pci); -+ } -+ -+ return r; -+} -+ -+static int xen_remove_device(struct device *dev) -+{ -+ int r; -+ struct pci_dev *pci_dev = to_pci_dev(dev); -+ struct physdev_manage_pci manage_pci; -+ -+ manage_pci.bus = pci_dev->bus->number; -+ manage_pci.devfn = pci_dev->devfn; -+ -+ r = HYPERVISOR_physdev_op(PHYSDEVOP_manage_pci_remove, -+ &manage_pci); -+ -+ return r; -+} -+ -+static int xen_pci_notifier(struct notifier_block *nb, -+ unsigned long action, void *data) -+{ -+ struct device *dev = data; -+ int r = 0; -+ -+ switch (action) { -+ case BUS_NOTIFY_ADD_DEVICE: -+ r = xen_add_device(dev); -+ break; -+ case BUS_NOTIFY_DEL_DEVICE: -+ r = xen_remove_device(dev); -+ break; -+ default: -+ break; -+ } -+ -+ return r; -+} -+ -+struct notifier_block device_nb = { -+ .notifier_call = xen_pci_notifier, -+}; -+ -+static int __init register_xen_pci_notifier(void) -+{ -+ if (!xen_pv_domain()) -+ return 0; -+ -+ return bus_register_notifier(&pci_bus_type, &device_nb); -+} -+ -+arch_initcall(register_xen_pci_notifier); -diff --git a/drivers/xen/pciback/Makefile b/drivers/xen/pciback/Makefile -new file mode 100644 -index 0000000..38bc123 ---- /dev/null -+++ b/drivers/xen/pciback/Makefile -@@ -0,0 +1,17 @@ -+obj-$(CONFIG_XEN_PCIDEV_BACKEND) += xen-pciback.o -+ -+xen-pciback-y := pci_stub.o pciback_ops.o xenbus.o -+xen-pciback-y += conf_space.o conf_space_header.o \ -+ conf_space_capability.o \ -+ conf_space_capability_vpd.o \ -+ conf_space_capability_pm.o \ -+ conf_space_quirks.o -+xen-pciback-$(CONFIG_PCI_MSI) += conf_space_capability_msi.o -+xen-pciback-$(CONFIG_XEN_PCIDEV_BACKEND_VPCI) += vpci.o -+xen-pciback-$(CONFIG_XEN_PCIDEV_BACKEND_SLOT) += slot.o -+xen-pciback-$(CONFIG_XEN_PCIDEV_BACKEND_PASS) += passthrough.o -+xen-pciback-$(CONFIG_XEN_PCIDEV_BACKEND_CONTROLLER) += controller.o -+ -+ifeq ($(CONFIG_XEN_PCIDEV_BE_DEBUG),y) -+EXTRA_CFLAGS += -DDEBUG -+endif -diff --git a/drivers/xen/pciback/conf_space.c b/drivers/xen/pciback/conf_space.c -new file mode 100644 -index 0000000..370c18e ---- /dev/null -+++ b/drivers/xen/pciback/conf_space.c -@@ -0,0 +1,435 @@ -+/* -+ * PCI Backend - Functions for creating a virtual configuration space for -+ * exported PCI Devices. -+ * It's dangerous to allow PCI Driver Domains to change their -+ * device's resources (memory, i/o ports, interrupts). We need to -+ * restrict changes to certain PCI Configuration registers: -+ * BARs, INTERRUPT_PIN, most registers in the header... -+ * -+ * Author: Ryan Wilson <hap9@epoch.ncsc.mil> -+ */ -+ -+#include <linux/kernel.h> -+#include <linux/pci.h> -+#include "pciback.h" -+#include "conf_space.h" -+#include "conf_space_quirks.h" -+ -+static int permissive; -+module_param(permissive, bool, 0644); -+ -+#define DEFINE_PCI_CONFIG(op, size, type) \ -+int pciback_##op##_config_##size \ -+(struct pci_dev *dev, int offset, type value, void *data) \ -+{ \ -+ return pci_##op##_config_##size(dev, offset, value); \ -+} -+ -+DEFINE_PCI_CONFIG(read, byte, u8 *) -+DEFINE_PCI_CONFIG(read, word, u16 *) -+DEFINE_PCI_CONFIG(read, dword, u32 *) -+ -+DEFINE_PCI_CONFIG(write, byte, u8) -+DEFINE_PCI_CONFIG(write, word, u16) -+DEFINE_PCI_CONFIG(write, dword, u32) -+ -+static int conf_space_read(struct pci_dev *dev, -+ const struct config_field_entry *entry, -+ int offset, u32 *value) -+{ -+ int ret = 0; -+ const struct config_field *field = entry->field; -+ -+ *value = 0; -+ -+ switch (field->size) { -+ case 1: -+ if (field->u.b.read) -+ ret = field->u.b.read(dev, offset, (u8 *) value, -+ entry->data); -+ break; -+ case 2: -+ if (field->u.w.read) -+ ret = field->u.w.read(dev, offset, (u16 *) value, -+ entry->data); -+ break; -+ case 4: -+ if (field->u.dw.read) -+ ret = field->u.dw.read(dev, offset, value, entry->data); -+ break; -+ } -+ return ret; -+} -+ -+static int conf_space_write(struct pci_dev *dev, -+ const struct config_field_entry *entry, -+ int offset, u32 value) -+{ -+ int ret = 0; -+ const struct config_field *field = entry->field; -+ -+ switch (field->size) { -+ case 1: -+ if (field->u.b.write) -+ ret = field->u.b.write(dev, offset, (u8) value, -+ entry->data); -+ break; -+ case 2: -+ if (field->u.w.write) -+ ret = field->u.w.write(dev, offset, (u16) value, -+ entry->data); -+ break; -+ case 4: -+ if (field->u.dw.write) -+ ret = field->u.dw.write(dev, offset, value, -+ entry->data); -+ break; -+ } -+ return ret; -+} -+ -+static inline u32 get_mask(int size) -+{ -+ if (size == 1) -+ return 0xff; -+ else if (size == 2) -+ return 0xffff; -+ else -+ return 0xffffffff; -+} -+ -+static inline int valid_request(int offset, int size) -+{ -+ /* Validate request (no un-aligned requests) */ -+ if ((size == 1 || size == 2 || size == 4) && (offset % size) == 0) -+ return 1; -+ return 0; -+} -+ -+static inline u32 merge_value(u32 val, u32 new_val, u32 new_val_mask, -+ int offset) -+{ -+ if (offset >= 0) { -+ new_val_mask <<= (offset * 8); -+ new_val <<= (offset * 8); -+ } else { -+ new_val_mask >>= (offset * -8); -+ new_val >>= (offset * -8); -+ } -+ val = (val & ~new_val_mask) | (new_val & new_val_mask); -+ -+ return val; -+} -+ -+static int pcibios_err_to_errno(int err) -+{ -+ switch (err) { -+ case PCIBIOS_SUCCESSFUL: -+ return XEN_PCI_ERR_success; -+ case PCIBIOS_DEVICE_NOT_FOUND: -+ return XEN_PCI_ERR_dev_not_found; -+ case PCIBIOS_BAD_REGISTER_NUMBER: -+ return XEN_PCI_ERR_invalid_offset; -+ case PCIBIOS_FUNC_NOT_SUPPORTED: -+ return XEN_PCI_ERR_not_implemented; -+ case PCIBIOS_SET_FAILED: -+ return XEN_PCI_ERR_access_denied; -+ } -+ return err; -+} -+ -+int pciback_config_read(struct pci_dev *dev, int offset, int size, -+ u32 *ret_val) -+{ -+ int err = 0; -+ struct pciback_dev_data *dev_data = pci_get_drvdata(dev); -+ const struct config_field_entry *cfg_entry; -+ const struct config_field *field; -+ int req_start, req_end, field_start, field_end; -+ /* if read fails for any reason, return 0 -+ * (as if device didn't respond) */ -+ u32 value = 0, tmp_val; -+ -+ if (unlikely(verbose_request)) -+ printk(KERN_DEBUG "pciback: %s: read %d bytes at 0x%x\n", -+ pci_name(dev), size, offset); -+ -+ if (!valid_request(offset, size)) { -+ err = XEN_PCI_ERR_invalid_offset; -+ goto out; -+ } -+ -+ /* Get the real value first, then modify as appropriate */ -+ switch (size) { -+ case 1: -+ err = pci_read_config_byte(dev, offset, (u8 *) &value); -+ break; -+ case 2: -+ err = pci_read_config_word(dev, offset, (u16 *) &value); -+ break; -+ case 4: -+ err = pci_read_config_dword(dev, offset, &value); -+ break; -+ } -+ -+ list_for_each_entry(cfg_entry, &dev_data->config_fields, list) { -+ field = cfg_entry->field; -+ -+ req_start = offset; -+ req_end = offset + size; -+ field_start = OFFSET(cfg_entry); -+ field_end = OFFSET(cfg_entry) + field->size; -+ -+ if ((req_start >= field_start && req_start < field_end) -+ || (req_end > field_start && req_end <= field_end)) { -+ err = conf_space_read(dev, cfg_entry, field_start, -+ &tmp_val); -+ if (err) -+ goto out; -+ -+ value = merge_value(value, tmp_val, -+ get_mask(field->size), -+ field_start - req_start); -+ } -+ } -+ -+out: -+ if (unlikely(verbose_request)) -+ printk(KERN_DEBUG "pciback: %s: read %d bytes at 0x%x = %x\n", -+ pci_name(dev), size, offset, value); -+ -+ *ret_val = value; -+ return pcibios_err_to_errno(err); -+} -+ -+int pciback_config_write(struct pci_dev *dev, int offset, int size, u32 value) -+{ -+ int err = 0, handled = 0; -+ struct pciback_dev_data *dev_data = pci_get_drvdata(dev); -+ const struct config_field_entry *cfg_entry; -+ const struct config_field *field; -+ u32 tmp_val; -+ int req_start, req_end, field_start, field_end; -+ -+ if (unlikely(verbose_request)) -+ printk(KERN_DEBUG -+ "pciback: %s: write request %d bytes at 0x%x = %x\n", -+ pci_name(dev), size, offset, value); -+ -+ if (!valid_request(offset, size)) -+ return XEN_PCI_ERR_invalid_offset; -+ -+ list_for_each_entry(cfg_entry, &dev_data->config_fields, list) { -+ field = cfg_entry->field; -+ -+ req_start = offset; -+ req_end = offset + size; -+ field_start = OFFSET(cfg_entry); -+ field_end = OFFSET(cfg_entry) + field->size; -+ -+ if ((req_start >= field_start && req_start < field_end) -+ || (req_end > field_start && req_end <= field_end)) { -+ tmp_val = 0; -+ -+ err = pciback_config_read(dev, field_start, -+ field->size, &tmp_val); -+ if (err) -+ break; -+ -+ tmp_val = merge_value(tmp_val, value, get_mask(size), -+ req_start - field_start); -+ -+ err = conf_space_write(dev, cfg_entry, field_start, -+ tmp_val); -+ -+ /* handled is set true here, but not every byte -+ * may have been written! Properly detecting if -+ * every byte is handled is unnecessary as the -+ * flag is used to detect devices that need -+ * special helpers to work correctly. -+ */ -+ handled = 1; -+ } -+ } -+ -+ if (!handled && !err) { -+ /* By default, anything not specificially handled above is -+ * read-only. The permissive flag changes this behavior so -+ * that anything not specifically handled above is writable. -+ * This means that some fields may still be read-only because -+ * they have entries in the config_field list that intercept -+ * the write and do nothing. */ -+ if (dev_data->permissive || permissive) { -+ switch (size) { -+ case 1: -+ err = pci_write_config_byte(dev, offset, -+ (u8) value); -+ break; -+ case 2: -+ err = pci_write_config_word(dev, offset, -+ (u16) value); -+ break; -+ case 4: -+ err = pci_write_config_dword(dev, offset, -+ (u32) value); -+ break; -+ } -+ } else if (!dev_data->warned_on_write) { -+ dev_data->warned_on_write = 1; -+ dev_warn(&dev->dev, "Driver tried to write to a " -+ "read-only configuration space field at offset" -+ " 0x%x, size %d. This may be harmless, but if " -+ "you have problems with your device:\n" -+ "1) see permissive attribute in sysfs\n" -+ "2) report problems to the xen-devel " -+ "mailing list along with details of your " -+ "device obtained from lspci.\n", offset, size); -+ } -+ } -+ -+ return pcibios_err_to_errno(err); -+} -+ -+void pciback_config_free_dyn_fields(struct pci_dev *dev) -+{ -+ struct pciback_dev_data *dev_data = pci_get_drvdata(dev); -+ struct config_field_entry *cfg_entry, *t; -+ const struct config_field *field; -+ -+ dev_dbg(&dev->dev, "free-ing dynamically allocated virtual " -+ "configuration space fields\n"); -+ if (!dev_data) -+ return; -+ -+ list_for_each_entry_safe(cfg_entry, t, &dev_data->config_fields, list) { -+ field = cfg_entry->field; -+ -+ if (field->clean) { -+ field->clean((struct config_field *)field); -+ -+ kfree(cfg_entry->data); -+ -+ list_del(&cfg_entry->list); -+ kfree(cfg_entry); -+ } -+ -+ } -+} -+ -+void pciback_config_reset_dev(struct pci_dev *dev) -+{ -+ struct pciback_dev_data *dev_data = pci_get_drvdata(dev); -+ const struct config_field_entry *cfg_entry; -+ const struct config_field *field; -+ -+ dev_dbg(&dev->dev, "resetting virtual configuration space\n"); -+ if (!dev_data) -+ return; -+ -+ list_for_each_entry(cfg_entry, &dev_data->config_fields, list) { -+ field = cfg_entry->field; -+ -+ if (field->reset) -+ field->reset(dev, OFFSET(cfg_entry), cfg_entry->data); -+ } -+} -+ -+void pciback_config_free_dev(struct pci_dev *dev) -+{ -+ struct pciback_dev_data *dev_data = pci_get_drvdata(dev); -+ struct config_field_entry *cfg_entry, *t; -+ const struct config_field *field; -+ -+ dev_dbg(&dev->dev, "free-ing virtual configuration space fields\n"); -+ if (!dev_data) -+ return; -+ -+ list_for_each_entry_safe(cfg_entry, t, &dev_data->config_fields, list) { -+ list_del(&cfg_entry->list); -+ -+ field = cfg_entry->field; -+ -+ if (field->release) -+ field->release(dev, OFFSET(cfg_entry), cfg_entry->data); -+ -+ kfree(cfg_entry); -+ } -+} -+ -+int pciback_config_add_field_offset(struct pci_dev *dev, -+ const struct config_field *field, -+ unsigned int base_offset) -+{ -+ int err = 0; -+ struct pciback_dev_data *dev_data = pci_get_drvdata(dev); -+ struct config_field_entry *cfg_entry; -+ void *tmp; -+ -+ cfg_entry = kmalloc(sizeof(*cfg_entry), GFP_KERNEL); -+ if (!cfg_entry) { -+ err = -ENOMEM; -+ goto out; -+ } -+ -+ cfg_entry->data = NULL; -+ cfg_entry->field = field; -+ cfg_entry->base_offset = base_offset; -+ -+ /* silently ignore duplicate fields */ -+ err = pciback_field_is_dup(dev, OFFSET(cfg_entry)); -+ if (err) -+ goto out; -+ -+ if (field->init) { -+ tmp = field->init(dev, OFFSET(cfg_entry)); -+ -+ if (IS_ERR(tmp)) { -+ err = PTR_ERR(tmp); -+ goto out; -+ } -+ -+ cfg_entry->data = tmp; -+ } -+ -+ dev_dbg(&dev->dev, "added config field at offset 0x%02x\n", -+ OFFSET(cfg_entry)); -+ list_add_tail(&cfg_entry->list, &dev_data->config_fields); -+ -+out: -+ if (err) -+ kfree(cfg_entry); -+ -+ return err; -+} -+ -+/* This sets up the device's virtual configuration space to keep track of -+ * certain registers (like the base address registers (BARs) so that we can -+ * keep the client from manipulating them directly. -+ */ -+int pciback_config_init_dev(struct pci_dev *dev) -+{ -+ int err = 0; -+ struct pciback_dev_data *dev_data = pci_get_drvdata(dev); -+ -+ dev_dbg(&dev->dev, "initializing virtual configuration space\n"); -+ -+ INIT_LIST_HEAD(&dev_data->config_fields); -+ -+ err = pciback_config_header_add_fields(dev); -+ if (err) -+ goto out; -+ -+ err = pciback_config_capability_add_fields(dev); -+ if (err) -+ goto out; -+ -+ err = pciback_config_quirks_init(dev); -+ -+out: -+ return err; -+} -+ -+int pciback_config_init(void) -+{ -+ return pciback_config_capability_init(); -+} -diff --git a/drivers/xen/pciback/conf_space.h b/drivers/xen/pciback/conf_space.h -new file mode 100644 -index 0000000..50ebef2 ---- /dev/null -+++ b/drivers/xen/pciback/conf_space.h -@@ -0,0 +1,126 @@ -+/* -+ * PCI Backend - Common data structures for overriding the configuration space -+ * -+ * Author: Ryan Wilson <hap9@epoch.ncsc.mil> -+ */ -+ -+#ifndef __XEN_PCIBACK_CONF_SPACE_H__ -+#define __XEN_PCIBACK_CONF_SPACE_H__ -+ -+#include <linux/list.h> -+#include <linux/err.h> -+ -+/* conf_field_init can return an errno in a ptr with ERR_PTR() */ -+typedef void *(*conf_field_init) (struct pci_dev *dev, int offset); -+typedef void (*conf_field_reset) (struct pci_dev *dev, int offset, void *data); -+typedef void (*conf_field_free) (struct pci_dev *dev, int offset, void *data); -+ -+typedef int (*conf_dword_write) (struct pci_dev *dev, int offset, u32 value, -+ void *data); -+typedef int (*conf_word_write) (struct pci_dev *dev, int offset, u16 value, -+ void *data); -+typedef int (*conf_byte_write) (struct pci_dev *dev, int offset, u8 value, -+ void *data); -+typedef int (*conf_dword_read) (struct pci_dev *dev, int offset, u32 *value, -+ void *data); -+typedef int (*conf_word_read) (struct pci_dev *dev, int offset, u16 *value, -+ void *data); -+typedef int (*conf_byte_read) (struct pci_dev *dev, int offset, u8 *value, -+ void *data); -+ -+/* These are the fields within the configuration space which we -+ * are interested in intercepting reads/writes to and changing their -+ * values. -+ */ -+struct config_field { -+ unsigned int offset; -+ unsigned int size; -+ unsigned int mask; -+ conf_field_init init; -+ conf_field_reset reset; -+ conf_field_free release; -+ void (*clean) (struct config_field *field); -+ union { -+ struct { -+ conf_dword_write write; -+ conf_dword_read read; -+ } dw; -+ struct { -+ conf_word_write write; -+ conf_word_read read; -+ } w; -+ struct { -+ conf_byte_write write; -+ conf_byte_read read; -+ } b; -+ } u; -+ struct list_head list; -+}; -+ -+struct config_field_entry { -+ struct list_head list; -+ const struct config_field *field; -+ unsigned int base_offset; -+ void *data; -+}; -+ -+#define OFFSET(cfg_entry) ((cfg_entry)->base_offset+(cfg_entry)->field->offset) -+ -+/* Add fields to a device - the add_fields macro expects to get a pointer to -+ * the first entry in an array (of which the ending is marked by size==0) -+ */ -+int pciback_config_add_field_offset(struct pci_dev *dev, -+ const struct config_field *field, -+ unsigned int offset); -+ -+static inline int pciback_config_add_field(struct pci_dev *dev, -+ const struct config_field *field) -+{ -+ return pciback_config_add_field_offset(dev, field, 0); -+} -+ -+static inline int pciback_config_add_fields(struct pci_dev *dev, -+ const struct config_field *field) -+{ -+ int i, err = 0; -+ for (i = 0; field[i].size != 0; i++) { -+ err = pciback_config_add_field(dev, &field[i]); -+ if (err) -+ break; -+ } -+ return err; -+} -+ -+static inline int pciback_config_add_fields_offset(struct pci_dev *dev, -+ const struct config_field *field, -+ unsigned int offset) -+{ -+ int i, err = 0; -+ for (i = 0; field[i].size != 0; i++) { -+ err = pciback_config_add_field_offset(dev, &field[i], offset); -+ if (err) -+ break; -+ } -+ return err; -+} -+ -+/* Read/Write the real configuration space */ -+int pciback_read_config_byte(struct pci_dev *dev, int offset, u8 *value, -+ void *data); -+int pciback_read_config_word(struct pci_dev *dev, int offset, u16 *value, -+ void *data); -+int pciback_read_config_dword(struct pci_dev *dev, int offset, u32 *value, -+ void *data); -+int pciback_write_config_byte(struct pci_dev *dev, int offset, u8 value, -+ void *data); -+int pciback_write_config_word(struct pci_dev *dev, int offset, u16 value, -+ void *data); -+int pciback_write_config_dword(struct pci_dev *dev, int offset, u32 value, -+ void *data); -+ -+int pciback_config_capability_init(void); -+ -+int pciback_config_header_add_fields(struct pci_dev *dev); -+int pciback_config_capability_add_fields(struct pci_dev *dev); -+ -+#endif /* __XEN_PCIBACK_CONF_SPACE_H__ */ -diff --git a/drivers/xen/pciback/conf_space_capability.c b/drivers/xen/pciback/conf_space_capability.c -new file mode 100644 -index 0000000..0ea84d6 ---- /dev/null -+++ b/drivers/xen/pciback/conf_space_capability.c -@@ -0,0 +1,66 @@ -+/* -+ * PCI Backend - Handles the virtual fields found on the capability lists -+ * in the configuration space. -+ * -+ * Author: Ryan Wilson <hap9@epoch.ncsc.mil> -+ */ -+ -+#include <linux/kernel.h> -+#include <linux/pci.h> -+#include "pciback.h" -+#include "conf_space.h" -+#include "conf_space_capability.h" -+ -+static LIST_HEAD(capabilities); -+ -+static const struct config_field caplist_header[] = { -+ { -+ .offset = PCI_CAP_LIST_ID, -+ .size = 2, /* encompass PCI_CAP_LIST_ID & PCI_CAP_LIST_NEXT */ -+ .u.w.read = pciback_read_config_word, -+ .u.w.write = NULL, -+ }, -+ {} -+}; -+ -+static inline void register_capability(struct pciback_config_capability *cap) -+{ -+ list_add_tail(&cap->cap_list, &capabilities); -+} -+ -+int pciback_config_capability_add_fields(struct pci_dev *dev) -+{ -+ int err = 0; -+ struct pciback_config_capability *cap; -+ int cap_offset; -+ -+ list_for_each_entry(cap, &capabilities, cap_list) { -+ cap_offset = pci_find_capability(dev, cap->capability); -+ if (cap_offset) { -+ dev_dbg(&dev->dev, "Found capability 0x%x at 0x%x\n", -+ cap->capability, cap_offset); -+ -+ err = pciback_config_add_fields_offset(dev, -+ caplist_header, -+ cap_offset); -+ if (err) -+ goto out; -+ err = pciback_config_add_fields_offset(dev, -+ cap->fields, -+ cap_offset); -+ if (err) -+ goto out; -+ } -+ } -+ -+out: -+ return err; -+} -+ -+int pciback_config_capability_init(void) -+{ -+ register_capability(&pciback_config_capability_vpd); -+ register_capability(&pciback_config_capability_pm); -+ -+ return 0; -+} -diff --git a/drivers/xen/pciback/conf_space_capability.h b/drivers/xen/pciback/conf_space_capability.h -new file mode 100644 -index 0000000..8da3ac4 ---- /dev/null -+++ b/drivers/xen/pciback/conf_space_capability.h -@@ -0,0 +1,26 @@ -+/* -+ * PCI Backend - Data structures for special overlays for structures on -+ * the capability list. -+ * -+ * Author: Ryan Wilson <hap9@epoch.ncsc.mil> -+ */ -+ -+#ifndef __PCIBACK_CONFIG_CAPABILITY_H__ -+#define __PCIBACK_CONFIG_CAPABILITY_H__ -+ -+#include <linux/pci.h> -+#include <linux/list.h> -+ -+struct pciback_config_capability { -+ struct list_head cap_list; -+ -+ int capability; -+ -+ /* If the device has the capability found above, add these fields */ -+ const struct config_field *fields; -+}; -+ -+extern struct pciback_config_capability pciback_config_capability_vpd; -+extern struct pciback_config_capability pciback_config_capability_pm; -+ -+#endif -diff --git a/drivers/xen/pciback/conf_space_capability_msi.c b/drivers/xen/pciback/conf_space_capability_msi.c -new file mode 100644 -index 0000000..b15131e ---- /dev/null -+++ b/drivers/xen/pciback/conf_space_capability_msi.c -@@ -0,0 +1,110 @@ -+/* -+ * PCI Backend -- Configuration overlay for MSI capability -+ */ -+#include <linux/pci.h> -+#include <linux/slab.h> -+#include "conf_space.h" -+#include "conf_space_capability.h" -+#include <xen/interface/io/pciif.h> -+#include <xen/events.h> -+#include "pciback.h" -+ -+int pciback_enable_msi(struct pciback_device *pdev, -+ struct pci_dev *dev, struct xen_pci_op *op) -+{ -+ struct pciback_dev_data *dev_data; -+ int otherend = pdev->xdev->otherend_id; -+ int status; -+ -+ if (unlikely(verbose_request)) -+ printk(KERN_DEBUG "pciback: %s: enable MSI\n", pci_name(dev)); -+ -+ status = pci_enable_msi(dev); -+ -+ if (status) { -+ printk(KERN_ERR "error enable msi for guest %x status %x\n", -+ otherend, status); -+ op->value = 0; -+ return XEN_PCI_ERR_op_failed; -+ } -+ -+ /* The value the guest needs is actually the IDT vector, not the -+ * the local domain's IRQ number. */ -+ op->value = xen_gsi_from_irq(dev->irq); -+ dev_data = pci_get_drvdata(dev); -+ if (dev_data) -+ dev_data->ack_intr = 0; -+ -+ return 0; -+} -+ -+int pciback_disable_msi(struct pciback_device *pdev, -+ struct pci_dev *dev, struct xen_pci_op *op) -+{ -+ struct pciback_dev_data *dev_data; -+ -+ if (unlikely(verbose_request)) -+ printk(KERN_DEBUG "pciback: %s: disable MSI\n", pci_name(dev)); -+ pci_disable_msi(dev); -+ -+ op->value = xen_gsi_from_irq(dev->irq); -+ dev_data = pci_get_drvdata(dev); -+ if (dev_data) -+ dev_data->ack_intr = 1; -+ return 0; -+} -+ -+int pciback_enable_msix(struct pciback_device *pdev, -+ struct pci_dev *dev, struct xen_pci_op *op) -+{ -+ struct pciback_dev_data *dev_data; -+ int i, result; -+ struct msix_entry *entries; -+ -+ if (unlikely(verbose_request)) -+ printk(KERN_DEBUG "pciback: %s: enable MSI-X\n", pci_name(dev)); -+ if (op->value > SH_INFO_MAX_VEC) -+ return -EINVAL; -+ -+ entries = kmalloc(op->value * sizeof(*entries), GFP_KERNEL); -+ if (entries == NULL) -+ return -ENOMEM; -+ -+ for (i = 0; i < op->value; i++) { -+ entries[i].entry = op->msix_entries[i].entry; -+ entries[i].vector = op->msix_entries[i].vector; -+ } -+ -+ result = pci_enable_msix(dev, entries, op->value); -+ -+ for (i = 0; i < op->value; i++) { -+ op->msix_entries[i].entry = entries[i].entry; -+ op->msix_entries[i].vector = -+ xen_gsi_from_irq(entries[i].vector); -+ } -+ -+ kfree(entries); -+ -+ op->value = result; -+ dev_data = pci_get_drvdata(dev); -+ if (dev_data) -+ dev_data->ack_intr = 0; -+ -+ return result; -+} -+ -+int pciback_disable_msix(struct pciback_device *pdev, -+ struct pci_dev *dev, struct xen_pci_op *op) -+{ -+ struct pciback_dev_data *dev_data; -+ if (unlikely(verbose_request)) -+ printk(KERN_DEBUG "pciback: %s: disable MSI-X\n", pci_name(dev)); -+ pci_disable_msix(dev); -+ -+ op->value = xen_gsi_from_irq(dev->irq); -+ dev_data = pci_get_drvdata(dev); -+ if (dev_data) -+ dev_data->ack_intr = 1; -+ return 0; -+} -+ -diff --git a/drivers/xen/pciback/conf_space_capability_pm.c b/drivers/xen/pciback/conf_space_capability_pm.c -new file mode 100644 -index 0000000..0442616 ---- /dev/null -+++ b/drivers/xen/pciback/conf_space_capability_pm.c -@@ -0,0 +1,113 @@ -+/* -+ * PCI Backend - Configuration space overlay for power management -+ * -+ * Author: Ryan Wilson <hap9@epoch.ncsc.mil> -+ */ -+ -+#include <linux/pci.h> -+#include "conf_space.h" -+#include "conf_space_capability.h" -+ -+static int pm_caps_read(struct pci_dev *dev, int offset, u16 *value, -+ void *data) -+{ -+ int err; -+ u16 real_value; -+ -+ err = pci_read_config_word(dev, offset, &real_value); -+ if (err) -+ goto out; -+ -+ *value = real_value & ~PCI_PM_CAP_PME_MASK; -+ -+out: -+ return err; -+} -+ -+/* PM_OK_BITS specifies the bits that the driver domain is allowed to change. -+ * Can't allow driver domain to enable PMEs - they're shared */ -+#define PM_OK_BITS (PCI_PM_CTRL_PME_STATUS|PCI_PM_CTRL_DATA_SEL_MASK) -+ -+static int pm_ctrl_write(struct pci_dev *dev, int offset, u16 new_value, -+ void *data) -+{ -+ int err; -+ u16 old_value; -+ pci_power_t new_state, old_state; -+ -+ err = pci_read_config_word(dev, offset, &old_value); -+ if (err) -+ goto out; -+ -+ old_state = (pci_power_t)(old_value & PCI_PM_CTRL_STATE_MASK); -+ new_state = (pci_power_t)(new_value & PCI_PM_CTRL_STATE_MASK); -+ -+ new_value &= PM_OK_BITS; -+ if ((old_value & PM_OK_BITS) != new_value) { -+ new_value = (old_value & ~PM_OK_BITS) | new_value; -+ err = pci_write_config_word(dev, offset, new_value); -+ if (err) -+ goto out; -+ } -+ -+ /* Let pci core handle the power management change */ -+ dev_dbg(&dev->dev, "set power state to %x\n", new_state); -+ err = pci_set_power_state(dev, new_state); -+ if (err) { -+ err = PCIBIOS_SET_FAILED; -+ goto out; -+ } -+ -+ out: -+ return err; -+} -+ -+/* Ensure PMEs are disabled */ -+static void *pm_ctrl_init(struct pci_dev *dev, int offset) -+{ -+ int err; -+ u16 value; -+ -+ err = pci_read_config_word(dev, offset, &value); -+ if (err) -+ goto out; -+ -+ if (value & PCI_PM_CTRL_PME_ENABLE) { -+ value &= ~PCI_PM_CTRL_PME_ENABLE; -+ err = pci_write_config_word(dev, offset, value); -+ } -+ -+out: -+ return ERR_PTR(err); -+} -+ -+static const struct config_field caplist_pm[] = { -+ { -+ .offset = PCI_PM_PMC, -+ .size = 2, -+ .u.w.read = pm_caps_read, -+ }, -+ { -+ .offset = PCI_PM_CTRL, -+ .size = 2, -+ .init = pm_ctrl_init, -+ .u.w.read = pciback_read_config_word, -+ .u.w.write = pm_ctrl_write, -+ }, -+ { -+ .offset = PCI_PM_PPB_EXTENSIONS, -+ .size = 1, -+ .u.b.read = pciback_read_config_byte, -+ }, -+ { -+ .offset = PCI_PM_DATA_REGISTER, -+ .size = 1, -+ .u.b.read = pciback_read_config_byte, -+ }, -+ {} -+}; -+ -+struct pciback_config_capability pciback_config_capability_pm = { -+ .capability = PCI_CAP_ID_PM, -+ .fields = caplist_pm, -+}; -diff --git a/drivers/xen/pciback/conf_space_capability_vpd.c b/drivers/xen/pciback/conf_space_capability_vpd.c -new file mode 100644 -index 0000000..e7b4d66 ---- /dev/null -+++ b/drivers/xen/pciback/conf_space_capability_vpd.c -@@ -0,0 +1,40 @@ -+/* -+ * PCI Backend - Configuration space overlay for Vital Product Data -+ * -+ * Author: Ryan Wilson <hap9@epoch.ncsc.mil> -+ */ -+ -+#include <linux/pci.h> -+#include "conf_space.h" -+#include "conf_space_capability.h" -+ -+static int vpd_address_write(struct pci_dev *dev, int offset, u16 value, -+ void *data) -+{ -+ /* Disallow writes to the vital product data */ -+ if (value & PCI_VPD_ADDR_F) -+ return PCIBIOS_SET_FAILED; -+ else -+ return pci_write_config_word(dev, offset, value); -+} -+ -+static const struct config_field caplist_vpd[] = { -+ { -+ .offset = PCI_VPD_ADDR, -+ .size = 2, -+ .u.w.read = pciback_read_config_word, -+ .u.w.write = vpd_address_write, -+ }, -+ { -+ .offset = PCI_VPD_DATA, -+ .size = 4, -+ .u.dw.read = pciback_read_config_dword, -+ .u.dw.write = NULL, -+ }, -+ {} -+}; -+ -+struct pciback_config_capability pciback_config_capability_vpd = { -+ .capability = PCI_CAP_ID_VPD, -+ .fields = caplist_vpd, -+}; -diff --git a/drivers/xen/pciback/conf_space_header.c b/drivers/xen/pciback/conf_space_header.c -new file mode 100644 -index 0000000..cb450f4 ---- /dev/null -+++ b/drivers/xen/pciback/conf_space_header.c -@@ -0,0 +1,385 @@ -+/* -+ * PCI Backend - Handles the virtual fields in the configuration space headers. -+ * -+ * Author: Ryan Wilson <hap9@epoch.ncsc.mil> -+ */ -+ -+#include <linux/kernel.h> -+#include <linux/pci.h> -+#include "pciback.h" -+#include "conf_space.h" -+ -+struct pci_bar_info { -+ u32 val; -+ u32 len_val; -+ int which; -+}; -+ -+#define is_enable_cmd(value) ((value)&(PCI_COMMAND_MEMORY|PCI_COMMAND_IO)) -+#define is_master_cmd(value) ((value)&PCI_COMMAND_MASTER) -+ -+static int command_read(struct pci_dev *dev, int offset, u16 *value, void *data) -+{ -+ int i; -+ int ret; -+ -+ ret = pciback_read_config_word(dev, offset, value, data); -+ if (!atomic_read(&dev->enable_cnt)) -+ return ret; -+ -+ for (i = 0; i < PCI_ROM_RESOURCE; i++) { -+ if (dev->resource[i].flags & IORESOURCE_IO) -+ *value |= PCI_COMMAND_IO; -+ if (dev->resource[i].flags & IORESOURCE_MEM) -+ *value |= PCI_COMMAND_MEMORY; -+ } -+ -+ return ret; -+} -+ -+static int command_write(struct pci_dev *dev, int offset, u16 value, void *data) -+{ -+ struct pciback_dev_data *dev_data; -+ int err; -+ -+ dev_data = pci_get_drvdata(dev); -+ if (!pci_is_enabled(dev) && is_enable_cmd(value)) { -+ if (unlikely(verbose_request)) -+ printk(KERN_DEBUG "pciback: %s: enable\n", -+ pci_name(dev)); -+ err = pci_enable_device(dev); -+ if (err) -+ return err; -+ if (dev_data) -+ dev_data->enable_intx = 1; -+ } else if (pci_is_enabled(dev) && !is_enable_cmd(value)) { -+ if (unlikely(verbose_request)) -+ printk(KERN_DEBUG "pciback: %s: disable\n", -+ pci_name(dev)); -+ pci_disable_device(dev); -+ if (dev_data) -+ dev_data->enable_intx = 0; -+ } -+ -+ if (!dev->is_busmaster && is_master_cmd(value)) { -+ if (unlikely(verbose_request)) -+ printk(KERN_DEBUG "pciback: %s: set bus master\n", -+ pci_name(dev)); -+ pci_set_master(dev); -+ } -+ -+ if (value & PCI_COMMAND_INVALIDATE) { -+ if (unlikely(verbose_request)) -+ printk(KERN_DEBUG -+ "pciback: %s: enable memory-write-invalidate\n", -+ pci_name(dev)); -+ err = pci_set_mwi(dev); -+ if (err) { -+ printk(KERN_WARNING -+ "pciback: %s: cannot enable " -+ "memory-write-invalidate (%d)\n", -+ pci_name(dev), err); -+ value &= ~PCI_COMMAND_INVALIDATE; -+ } -+ } -+ -+ return pci_write_config_word(dev, offset, value); -+} -+ -+static int rom_write(struct pci_dev *dev, int offset, u32 value, void *data) -+{ -+ struct pci_bar_info *bar = data; -+ -+ if (unlikely(!bar)) { -+ printk(KERN_WARNING "pciback: driver data not found for %s\n", -+ pci_name(dev)); -+ return XEN_PCI_ERR_op_failed; -+ } -+ -+ /* A write to obtain the length must happen as a 32-bit write. -+ * This does not (yet) support writing individual bytes -+ */ -+ if (value == ~PCI_ROM_ADDRESS_ENABLE) -+ bar->which = 1; -+ else { -+ u32 tmpval; -+ pci_read_config_dword(dev, offset, &tmpval); -+ if (tmpval != bar->val && value == bar->val) { -+ /* Allow restoration of bar value. */ -+ pci_write_config_dword(dev, offset, bar->val); -+ } -+ bar->which = 0; -+ } -+ -+ /* Do we need to support enabling/disabling the rom address here? */ -+ -+ return 0; -+} -+ -+/* For the BARs, only allow writes which write ~0 or -+ * the correct resource information -+ * (Needed for when the driver probes the resource usage) -+ */ -+static int bar_write(struct pci_dev *dev, int offset, u32 value, void *data) -+{ -+ struct pci_bar_info *bar = data; -+ -+ if (unlikely(!bar)) { -+ printk(KERN_WARNING "pciback: driver data not found for %s\n", -+ pci_name(dev)); -+ return XEN_PCI_ERR_op_failed; -+ } -+ -+ /* A write to obtain the length must happen as a 32-bit write. -+ * This does not (yet) support writing individual bytes -+ */ -+ if (value == ~0) -+ bar->which = 1; -+ else { -+ u32 tmpval; -+ pci_read_config_dword(dev, offset, &tmpval); -+ if (tmpval != bar->val && value == bar->val) { -+ /* Allow restoration of bar value. */ -+ pci_write_config_dword(dev, offset, bar->val); -+ } -+ bar->which = 0; -+ } -+ -+ return 0; -+} -+ -+static int bar_read(struct pci_dev *dev, int offset, u32 * value, void *data) -+{ -+ struct pci_bar_info *bar = data; -+ -+ if (unlikely(!bar)) { -+ printk(KERN_WARNING "pciback: driver data not found for %s\n", -+ pci_name(dev)); -+ return XEN_PCI_ERR_op_failed; -+ } -+ -+ *value = bar->which ? bar->len_val : bar->val; -+ -+ return 0; -+} -+ -+static inline void read_dev_bar(struct pci_dev *dev, -+ struct pci_bar_info *bar_info, int offset, -+ u32 len_mask) -+{ -+ int pos; -+ struct resource *res = dev->resource; -+ -+ if (offset == PCI_ROM_ADDRESS || offset == PCI_ROM_ADDRESS1) -+ pos = PCI_ROM_RESOURCE; -+ else { -+ pos = (offset - PCI_BASE_ADDRESS_0) / 4; -+ if (pos && ((res[pos - 1].flags & (PCI_BASE_ADDRESS_SPACE | -+ PCI_BASE_ADDRESS_MEM_TYPE_MASK)) == -+ (PCI_BASE_ADDRESS_SPACE_MEMORY | -+ PCI_BASE_ADDRESS_MEM_TYPE_64))) { -+ bar_info->val = res[pos - 1].start >> 32; -+ bar_info->len_val = res[pos - 1].end >> 32; -+ return; -+ } -+ } -+ -+ bar_info->val = res[pos].start | -+ (res[pos].flags & PCI_REGION_FLAG_MASK); -+ bar_info->len_val = res[pos].end - res[pos].start + 1; -+} -+ -+static void *bar_init(struct pci_dev *dev, int offset) -+{ -+ struct pci_bar_info *bar = kmalloc(sizeof(*bar), GFP_KERNEL); -+ -+ if (!bar) -+ return ERR_PTR(-ENOMEM); -+ -+ read_dev_bar(dev, bar, offset, ~0); -+ bar->which = 0; -+ -+ return bar; -+} -+ -+static void *rom_init(struct pci_dev *dev, int offset) -+{ -+ struct pci_bar_info *bar = kmalloc(sizeof(*bar), GFP_KERNEL); -+ -+ if (!bar) -+ return ERR_PTR(-ENOMEM); -+ -+ read_dev_bar(dev, bar, offset, ~PCI_ROM_ADDRESS_ENABLE); -+ bar->which = 0; -+ -+ return bar; -+} -+ -+static void bar_reset(struct pci_dev *dev, int offset, void *data) -+{ -+ struct pci_bar_info *bar = data; -+ -+ bar->which = 0; -+} -+ -+static void bar_release(struct pci_dev *dev, int offset, void *data) -+{ -+ kfree(data); -+} -+ -+static int pciback_read_vendor(struct pci_dev *dev, int offset, -+ u16 *value, void *data) -+{ -+ *value = dev->vendor; -+ -+ return 0; -+} -+ -+static int pciback_read_device(struct pci_dev *dev, int offset, -+ u16 *value, void *data) -+{ -+ *value = dev->device; -+ -+ return 0; -+} -+ -+static int interrupt_read(struct pci_dev *dev, int offset, u8 * value, -+ void *data) -+{ -+ *value = (u8) dev->irq; -+ -+ return 0; -+} -+ -+static int bist_write(struct pci_dev *dev, int offset, u8 value, void *data) -+{ -+ u8 cur_value; -+ int err; -+ -+ err = pci_read_config_byte(dev, offset, &cur_value); -+ if (err) -+ goto out; -+ -+ if ((cur_value & ~PCI_BIST_START) == (value & ~PCI_BIST_START) -+ || value == PCI_BIST_START) -+ err = pci_write_config_byte(dev, offset, value); -+ -+out: -+ return err; -+} -+ -+static const struct config_field header_common[] = { -+ { -+ .offset = PCI_VENDOR_ID, -+ .size = 2, -+ .u.w.read = pciback_read_vendor, -+ }, -+ { -+ .offset = PCI_DEVICE_ID, -+ .size = 2, -+ .u.w.read = pciback_read_device, -+ }, -+ { -+ .offset = PCI_COMMAND, -+ .size = 2, -+ .u.w.read = command_read, -+ .u.w.write = command_write, -+ }, -+ { -+ .offset = PCI_INTERRUPT_LINE, -+ .size = 1, -+ .u.b.read = interrupt_read, -+ }, -+ { -+ .offset = PCI_INTERRUPT_PIN, -+ .size = 1, -+ .u.b.read = pciback_read_config_byte, -+ }, -+ { -+ /* Any side effects of letting driver domain control cache line? */ -+ .offset = PCI_CACHE_LINE_SIZE, -+ .size = 1, -+ .u.b.read = pciback_read_config_byte, -+ .u.b.write = pciback_write_config_byte, -+ }, -+ { -+ .offset = PCI_LATENCY_TIMER, -+ .size = 1, -+ .u.b.read = pciback_read_config_byte, -+ }, -+ { -+ .offset = PCI_BIST, -+ .size = 1, -+ .u.b.read = pciback_read_config_byte, -+ .u.b.write = bist_write, -+ }, -+ {} -+}; -+ -+#define CFG_FIELD_BAR(reg_offset) \ -+ { \ -+ .offset = reg_offset, \ -+ .size = 4, \ -+ .init = bar_init, \ -+ .reset = bar_reset, \ -+ .release = bar_release, \ -+ .u.dw.read = bar_read, \ -+ .u.dw.write = bar_write, \ -+ } -+ -+#define CFG_FIELD_ROM(reg_offset) \ -+ { \ -+ .offset = reg_offset, \ -+ .size = 4, \ -+ .init = rom_init, \ -+ .reset = bar_reset, \ -+ .release = bar_release, \ -+ .u.dw.read = bar_read, \ -+ .u.dw.write = rom_write, \ -+ } -+ -+static const struct config_field header_0[] = { -+ CFG_FIELD_BAR(PCI_BASE_ADDRESS_0), -+ CFG_FIELD_BAR(PCI_BASE_ADDRESS_1), -+ CFG_FIELD_BAR(PCI_BASE_ADDRESS_2), -+ CFG_FIELD_BAR(PCI_BASE_ADDRESS_3), -+ CFG_FIELD_BAR(PCI_BASE_ADDRESS_4), -+ CFG_FIELD_BAR(PCI_BASE_ADDRESS_5), -+ CFG_FIELD_ROM(PCI_ROM_ADDRESS), -+ {} -+}; -+ -+static const struct config_field header_1[] = { -+ CFG_FIELD_BAR(PCI_BASE_ADDRESS_0), -+ CFG_FIELD_BAR(PCI_BASE_ADDRESS_1), -+ CFG_FIELD_ROM(PCI_ROM_ADDRESS1), -+ {} -+}; -+ -+int pciback_config_header_add_fields(struct pci_dev *dev) -+{ -+ int err; -+ -+ err = pciback_config_add_fields(dev, header_common); -+ if (err) -+ goto out; -+ -+ switch (dev->hdr_type) { -+ case PCI_HEADER_TYPE_NORMAL: -+ err = pciback_config_add_fields(dev, header_0); -+ break; -+ -+ case PCI_HEADER_TYPE_BRIDGE: -+ err = pciback_config_add_fields(dev, header_1); -+ break; -+ -+ default: -+ err = -EINVAL; -+ printk(KERN_ERR "pciback: %s: Unsupported header type %d!\n", -+ pci_name(dev), dev->hdr_type); -+ break; -+ } -+ -+out: -+ return err; -+} -diff --git a/drivers/xen/pciback/conf_space_quirks.c b/drivers/xen/pciback/conf_space_quirks.c -new file mode 100644 -index 0000000..45c31fb ---- /dev/null -+++ b/drivers/xen/pciback/conf_space_quirks.c -@@ -0,0 +1,140 @@ -+/* -+ * PCI Backend - Handle special overlays for broken devices. -+ * -+ * Author: Ryan Wilson <hap9@epoch.ncsc.mil> -+ * Author: Chris Bookholt <hap10@epoch.ncsc.mil> -+ */ -+ -+#include <linux/kernel.h> -+#include <linux/pci.h> -+#include "pciback.h" -+#include "conf_space.h" -+#include "conf_space_quirks.h" -+ -+LIST_HEAD(pciback_quirks); -+ -+static inline const struct pci_device_id * -+match_one_device(const struct pci_device_id *id, const struct pci_dev *dev) -+{ -+ if ((id->vendor == PCI_ANY_ID || id->vendor == dev->vendor) && -+ (id->device == PCI_ANY_ID || id->device == dev->device) && -+ (id->subvendor == PCI_ANY_ID || -+ id->subvendor == dev->subsystem_vendor) && -+ (id->subdevice == PCI_ANY_ID || -+ id->subdevice == dev->subsystem_device) && -+ !((id->class ^ dev->class) & id->class_mask)) -+ return id; -+ return NULL; -+} -+ -+struct pciback_config_quirk *pciback_find_quirk(struct pci_dev *dev) -+{ -+ struct pciback_config_quirk *tmp_quirk; -+ -+ list_for_each_entry(tmp_quirk, &pciback_quirks, quirks_list) -+ if (match_one_device(&tmp_quirk->devid, dev) != NULL) -+ goto out; -+ tmp_quirk = NULL; -+ printk(KERN_DEBUG -+ "quirk didn't match any device pciback knows about\n"); -+out: -+ return tmp_quirk; -+} -+ -+static inline void register_quirk(struct pciback_config_quirk *quirk) -+{ -+ list_add_tail(&quirk->quirks_list, &pciback_quirks); -+} -+ -+int pciback_field_is_dup(struct pci_dev *dev, unsigned int reg) -+{ -+ int ret = 0; -+ struct pciback_dev_data *dev_data = pci_get_drvdata(dev); -+ struct config_field_entry *cfg_entry; -+ -+ list_for_each_entry(cfg_entry, &dev_data->config_fields, list) { -+ if (OFFSET(cfg_entry) == reg) { -+ ret = 1; -+ break; -+ } -+ } -+ return ret; -+} -+ -+int pciback_config_quirks_add_field(struct pci_dev *dev, struct config_field -+ *field) -+{ -+ int err = 0; -+ -+ switch (field->size) { -+ case 1: -+ field->u.b.read = pciback_read_config_byte; -+ field->u.b.write = pciback_write_config_byte; -+ break; -+ case 2: -+ field->u.w.read = pciback_read_config_word; -+ field->u.w.write = pciback_write_config_word; -+ break; -+ case 4: -+ field->u.dw.read = pciback_read_config_dword; -+ field->u.dw.write = pciback_write_config_dword; -+ break; -+ default: -+ err = -EINVAL; -+ goto out; -+ } -+ -+ pciback_config_add_field(dev, field); -+ -+out: -+ return err; -+} -+ -+int pciback_config_quirks_init(struct pci_dev *dev) -+{ -+ struct pciback_config_quirk *quirk; -+ int ret = 0; -+ -+ quirk = kzalloc(sizeof(*quirk), GFP_ATOMIC); -+ if (!quirk) { -+ ret = -ENOMEM; -+ goto out; -+ } -+ -+ quirk->devid.vendor = dev->vendor; -+ quirk->devid.device = dev->device; -+ quirk->devid.subvendor = dev->subsystem_vendor; -+ quirk->devid.subdevice = dev->subsystem_device; -+ quirk->devid.class = 0; -+ quirk->devid.class_mask = 0; -+ quirk->devid.driver_data = 0UL; -+ -+ quirk->pdev = dev; -+ -+ register_quirk(quirk); -+out: -+ return ret; -+} -+ -+void pciback_config_field_free(struct config_field *field) -+{ -+ kfree(field); -+} -+ -+int pciback_config_quirk_release(struct pci_dev *dev) -+{ -+ struct pciback_config_quirk *quirk; -+ int ret = 0; -+ -+ quirk = pciback_find_quirk(dev); -+ if (!quirk) { -+ ret = -ENXIO; -+ goto out; -+ } -+ -+ list_del(&quirk->quirks_list); -+ kfree(quirk); -+ -+out: -+ return ret; -+} -diff --git a/drivers/xen/pciback/conf_space_quirks.h b/drivers/xen/pciback/conf_space_quirks.h -new file mode 100644 -index 0000000..acd0e1a ---- /dev/null -+++ b/drivers/xen/pciback/conf_space_quirks.h -@@ -0,0 +1,35 @@ -+/* -+ * PCI Backend - Data structures for special overlays for broken devices. -+ * -+ * Ryan Wilson <hap9@epoch.ncsc.mil> -+ * Chris Bookholt <hap10@epoch.ncsc.mil> -+ */ -+ -+#ifndef __XEN_PCIBACK_CONF_SPACE_QUIRKS_H__ -+#define __XEN_PCIBACK_CONF_SPACE_QUIRKS_H__ -+ -+#include <linux/pci.h> -+#include <linux/list.h> -+ -+struct pciback_config_quirk { -+ struct list_head quirks_list; -+ struct pci_device_id devid; -+ struct pci_dev *pdev; -+}; -+ -+struct pciback_config_quirk *pciback_find_quirk(struct pci_dev *dev); -+ -+int pciback_config_quirks_add_field(struct pci_dev *dev, struct config_field -+ *field); -+ -+int pciback_config_quirks_remove_field(struct pci_dev *dev, int reg); -+ -+int pciback_config_quirks_init(struct pci_dev *dev); -+ -+void pciback_config_field_free(struct config_field *field); -+ -+int pciback_config_quirk_release(struct pci_dev *dev); -+ -+int pciback_field_is_dup(struct pci_dev *dev, unsigned int reg); -+ -+#endif -diff --git a/drivers/xen/pciback/controller.c b/drivers/xen/pciback/controller.c -new file mode 100644 -index 0000000..7f04f11 ---- /dev/null -+++ b/drivers/xen/pciback/controller.c -@@ -0,0 +1,442 @@ -+/* -+ * Copyright (C) 2007 Hewlett-Packard Development Company, L.P. -+ * Alex Williamson <alex.williamson@hp.com> -+ * -+ * PCI "Controller" Backend - virtualize PCI bus topology based on PCI -+ * controllers. Devices under the same PCI controller are exposed on the -+ * same virtual domain:bus. Within a bus, device slots are virtualized -+ * to compact the bus. -+ * -+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -+ * This program is free software; you can redistribute it and/or modify -+ * it under the terms of the GNU General Public License as published by -+ * the Free Software Foundation; either version 2 of the License, or -+ * (at your option) any later version. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, write to the Free Software -+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -+ */ -+ -+#include <linux/acpi.h> -+#include <linux/list.h> -+#include <linux/pci.h> -+#include <linux/spinlock.h> -+#include "pciback.h" -+ -+#define PCI_MAX_BUSSES 255 -+#define PCI_MAX_SLOTS 32 -+ -+struct controller_dev_entry { -+ struct list_head list; -+ struct pci_dev *dev; -+ unsigned int devfn; -+}; -+ -+struct controller_list_entry { -+ struct list_head list; -+ struct pci_controller *controller; -+ unsigned int domain; -+ unsigned int bus; -+ unsigned int next_devfn; -+ struct list_head dev_list; -+}; -+ -+struct controller_dev_data { -+ struct list_head list; -+ unsigned int next_domain; -+ unsigned int next_bus; -+ spinlock_t lock; -+}; -+ -+struct walk_info { -+ struct pciback_device *pdev; -+ int resource_count; -+ int root_num; -+}; -+ -+struct pci_dev *pciback_get_pci_dev(struct pciback_device *pdev, -+ unsigned int domain, unsigned int bus, -+ unsigned int devfn) -+{ -+ struct controller_dev_data *dev_data = pdev->pci_dev_data; -+ struct controller_dev_entry *dev_entry; -+ struct controller_list_entry *cntrl_entry; -+ struct pci_dev *dev = NULL; -+ unsigned long flags; -+ -+ spin_lock_irqsave(&dev_data->lock, flags); -+ -+ list_for_each_entry(cntrl_entry, &dev_data->list, list) { -+ if (cntrl_entry->domain != domain || -+ cntrl_entry->bus != bus) -+ continue; -+ -+ list_for_each_entry(dev_entry, &cntrl_entry->dev_list, list) { -+ if (devfn == dev_entry->devfn) { -+ dev = dev_entry->dev; -+ goto found; -+ } -+ } -+ } -+found: -+ spin_unlock_irqrestore(&dev_data->lock, flags); -+ -+ return dev; -+} -+ -+int pciback_add_pci_dev(struct pciback_device *pdev, struct pci_dev *dev, -+ int devid, publish_pci_dev_cb publish_cb) -+{ -+ struct controller_dev_data *dev_data = pdev->pci_dev_data; -+ struct controller_dev_entry *dev_entry; -+ struct controller_list_entry *cntrl_entry; -+ struct pci_controller *dev_controller = PCI_CONTROLLER(dev); -+ unsigned long flags; -+ int ret = 0, found = 0; -+ -+ spin_lock_irqsave(&dev_data->lock, flags); -+ -+ /* Look to see if we already have a domain:bus for this controller */ -+ list_for_each_entry(cntrl_entry, &dev_data->list, list) { -+ if (cntrl_entry->controller == dev_controller) { -+ found = 1; -+ break; -+ } -+ } -+ -+ if (!found) { -+ cntrl_entry = kmalloc(sizeof(*cntrl_entry), GFP_ATOMIC); -+ if (!cntrl_entry) { -+ ret = -ENOMEM; -+ goto out; -+ } -+ -+ cntrl_entry->controller = dev_controller; -+ cntrl_entry->next_devfn = PCI_DEVFN(0, 0); -+ -+ cntrl_entry->domain = dev_data->next_domain; -+ cntrl_entry->bus = dev_data->next_bus++; -+ if (dev_data->next_bus > PCI_MAX_BUSSES) { -+ dev_data->next_domain++; -+ dev_data->next_bus = 0; -+ } -+ -+ INIT_LIST_HEAD(&cntrl_entry->dev_list); -+ -+ list_add_tail(&cntrl_entry->list, &dev_data->list); -+ } -+ -+ if (PCI_SLOT(cntrl_entry->next_devfn) > PCI_MAX_SLOTS) { -+ /* -+ * While it seems unlikely, this can actually happen if -+ * a controller has P2P bridges under it. -+ */ -+ xenbus_dev_fatal(pdev->xdev, -ENOSPC, "Virtual bus %04x:%02x " -+ "is full, no room to export %04x:%02x:%02x.%x", -+ cntrl_entry->domain, cntrl_entry->bus, -+ pci_domain_nr(dev->bus), dev->bus->number, -+ PCI_SLOT(dev->devfn), PCI_FUNC(dev->devfn)); -+ ret = -ENOSPC; -+ goto out; -+ } -+ -+ dev_entry = kmalloc(sizeof(*dev_entry), GFP_ATOMIC); -+ if (!dev_entry) { -+ if (list_empty(&cntrl_entry->dev_list)) { -+ list_del(&cntrl_entry->list); -+ kfree(cntrl_entry); -+ } -+ ret = -ENOMEM; -+ goto out; -+ } -+ -+ dev_entry->dev = dev; -+ dev_entry->devfn = cntrl_entry->next_devfn; -+ -+ list_add_tail(&dev_entry->list, &cntrl_entry->dev_list); -+ -+ cntrl_entry->next_devfn += PCI_DEVFN(1, 0); -+ -+out: -+ spin_unlock_irqrestore(&dev_data->lock, flags); -+ -+ /* TODO: Publish virtual domain:bus:slot.func here. */ -+ -+ return ret; -+} -+ -+void pciback_release_pci_dev(struct pciback_device *pdev, struct pci_dev *dev) -+{ -+ struct controller_dev_data *dev_data = pdev->pci_dev_data; -+ struct controller_list_entry *cntrl_entry; -+ struct controller_dev_entry *dev_entry = NULL; -+ struct pci_dev *found_dev = NULL; -+ unsigned long flags; -+ -+ spin_lock_irqsave(&dev_data->lock, flags); -+ -+ list_for_each_entry(cntrl_entry, &dev_data->list, list) { -+ if (cntrl_entry->controller != PCI_CONTROLLER(dev)) -+ continue; -+ -+ list_for_each_entry(dev_entry, &cntrl_entry->dev_list, list) { -+ if (dev_entry->dev == dev) { -+ found_dev = dev_entry->dev; -+ break; -+ } -+ } -+ } -+ -+ if (!found_dev) { -+ spin_unlock_irqrestore(&dev_data->lock, flags); -+ return; -+ } -+ -+ list_del(&dev_entry->list); -+ kfree(dev_entry); -+ -+ if (list_empty(&cntrl_entry->dev_list)) { -+ list_del(&cntrl_entry->list); -+ kfree(cntrl_entry); -+ } -+ -+ spin_unlock_irqrestore(&dev_data->lock, flags); -+ pcistub_put_pci_dev(found_dev); -+} -+ -+int pciback_init_devices(struct pciback_device *pdev) -+{ -+ struct controller_dev_data *dev_data; -+ -+ dev_data = kmalloc(sizeof(*dev_data), GFP_KERNEL); -+ if (!dev_data) -+ return -ENOMEM; -+ -+ spin_lock_init(&dev_data->lock); -+ -+ INIT_LIST_HEAD(&dev_data->list); -+ -+ /* Starting domain:bus numbers */ -+ dev_data->next_domain = 0; -+ dev_data->next_bus = 0; -+ -+ pdev->pci_dev_data = dev_data; -+ -+ return 0; -+} -+ -+static acpi_status write_xenbus_resource(struct acpi_resource *res, void *data) -+{ -+ struct walk_info *info = data; -+ struct acpi_resource_address64 addr; -+ acpi_status status; -+ int i, len, err; -+ char str[32], tmp[3]; -+ unsigned char *ptr, *buf; -+ -+ status = acpi_resource_to_address64(res, &addr); -+ -+ /* Do we care about this range? Let's check. */ -+ if (!ACPI_SUCCESS(status) || -+ !(addr.resource_type == ACPI_MEMORY_RANGE || -+ addr.resource_type == ACPI_IO_RANGE) || -+ !addr.address_length || addr.producer_consumer != ACPI_PRODUCER) -+ return AE_OK; -+ -+ /* -+ * Furthermore, we really only care to tell the guest about -+ * address ranges that require address translation of some sort. -+ */ -+ if (!(addr.resource_type == ACPI_MEMORY_RANGE && -+ addr.info.mem.translation) && -+ !(addr.resource_type == ACPI_IO_RANGE && -+ addr.info.io.translation)) -+ return AE_OK; -+ -+ /* Store the resource in xenbus for the guest */ -+ len = snprintf(str, sizeof(str), "root-%d-resource-%d", -+ info->root_num, info->resource_count); -+ if (unlikely(len >= (sizeof(str) - 1))) -+ return AE_OK; -+ -+ buf = kzalloc((sizeof(*res) * 2) + 1, GFP_KERNEL); -+ if (!buf) -+ return AE_OK; -+ -+ /* Clean out resource_source */ -+ res->data.address64.resource_source.index = 0xFF; -+ res->data.address64.resource_source.string_length = 0; -+ res->data.address64.resource_source.string_ptr = NULL; -+ -+ ptr = (unsigned char *)res; -+ -+ /* Turn the acpi_resource into an ASCII byte stream */ -+ for (i = 0; i < sizeof(*res); i++) { -+ snprintf(tmp, sizeof(tmp), "%02x", ptr[i]); -+ strncat(buf, tmp, 2); -+ } -+ -+ err = xenbus_printf(XBT_NIL, info->pdev->xdev->nodename, -+ str, "%s", buf); -+ -+ if (!err) -+ info->resource_count++; -+ -+ kfree(buf); -+ -+ return AE_OK; -+} -+ -+int pciback_publish_pci_roots(struct pciback_device *pdev, -+ publish_pci_root_cb publish_root_cb) -+{ -+ struct controller_dev_data *dev_data = pdev->pci_dev_data; -+ struct controller_list_entry *cntrl_entry; -+ int i, root_num, len, err = 0; -+ unsigned int domain, bus; -+ char str[64]; -+ struct walk_info info; -+ -+ spin_lock(&dev_data->lock); -+ -+ list_for_each_entry(cntrl_entry, &dev_data->list, list) { -+ /* First publish all the domain:bus info */ -+ err = publish_root_cb(pdev, cntrl_entry->domain, -+ cntrl_entry->bus); -+ if (err) -+ goto out; -+ -+ /* -+ * Now figure out which root-%d this belongs to -+ * so we can associate resources with it. -+ */ -+ err = xenbus_scanf(XBT_NIL, pdev->xdev->nodename, -+ "root_num", "%d", &root_num); -+ -+ if (err != 1) -+ goto out; -+ -+ for (i = 0; i < root_num; i++) { -+ len = snprintf(str, sizeof(str), "root-%d", i); -+ if (unlikely(len >= (sizeof(str) - 1))) { -+ err = -ENOMEM; -+ goto out; -+ } -+ -+ err = xenbus_scanf(XBT_NIL, pdev->xdev->nodename, -+ str, "%x:%x", &domain, &bus); -+ if (err != 2) -+ goto out; -+ -+ /* Is this the one we just published? */ -+ if (domain == cntrl_entry->domain && -+ bus == cntrl_entry->bus) -+ break; -+ } -+ -+ if (i == root_num) -+ goto out; -+ -+ info.pdev = pdev; -+ info.resource_count = 0; -+ info.root_num = i; -+ -+ /* Let ACPI do the heavy lifting on decoding resources */ -+ acpi_walk_resources(cntrl_entry->controller->acpi_handle, -+ METHOD_NAME__CRS, write_xenbus_resource, -+ &info); -+ -+ /* No resouces. OK. On to the next one */ -+ if (!info.resource_count) -+ continue; -+ -+ /* Store the number of resources we wrote for this root-%d */ -+ len = snprintf(str, sizeof(str), "root-%d-resources", i); -+ if (unlikely(len >= (sizeof(str) - 1))) { -+ err = -ENOMEM; -+ goto out; -+ } -+ -+ err = xenbus_printf(XBT_NIL, pdev->xdev->nodename, str, -+ "%d", info.resource_count); -+ if (err) -+ goto out; -+ } -+ -+ /* Finally, write some magic to synchronize with the guest. */ -+ len = snprintf(str, sizeof(str), "root-resource-magic"); -+ if (unlikely(len >= (sizeof(str) - 1))) { -+ err = -ENOMEM; -+ goto out; -+ } -+ -+ err = xenbus_printf(XBT_NIL, pdev->xdev->nodename, str, -+ "%lx", (sizeof(struct acpi_resource) * 2) + 1); -+ -+out: -+ spin_unlock(&dev_data->lock); -+ -+ return err; -+} -+ -+void pciback_release_devices(struct pciback_device *pdev) -+{ -+ struct controller_dev_data *dev_data = pdev->pci_dev_data; -+ struct controller_list_entry *cntrl_entry, *c; -+ struct controller_dev_entry *dev_entry, *d; -+ -+ list_for_each_entry_safe(cntrl_entry, c, &dev_data->list, list) { -+ list_for_each_entry_safe(dev_entry, d, -+ &cntrl_entry->dev_list, list) { -+ list_del(&dev_entry->list); -+ pcistub_put_pci_dev(dev_entry->dev); -+ kfree(dev_entry); -+ } -+ list_del(&cntrl_entry->list); -+ kfree(cntrl_entry); -+ } -+ -+ kfree(dev_data); -+ pdev->pci_dev_data = NULL; -+} -+ -+int pciback_get_pcifront_dev(struct pci_dev *pcidev, -+ struct pciback_device *pdev, -+ unsigned int *domain, unsigned int *bus, unsigned int *devfn) -+{ -+ struct controller_dev_data *dev_data = pdev->pci_dev_data; -+ struct controller_dev_entry *dev_entry; -+ struct controller_list_entry *cntrl_entry; -+ unsigned long flags; -+ int found = 0; -+ spin_lock_irqsave(&dev_data->lock, flags); -+ -+ list_for_each_entry(cntrl_entry, &dev_data->list, list) { -+ list_for_each_entry(dev_entry, &cntrl_entry->dev_list, list) { -+ if ((dev_entry->dev->bus->number == -+ pcidev->bus->number) && -+ (dev_entry->dev->devfn == -+ pcidev->devfn) && -+ (pci_domain_nr(dev_entry->dev->bus) == -+ pci_domain_nr(pcidev->bus))) { -+ found = 1; -+ *domain = cntrl_entry->domain; -+ *bus = cntrl_entry->bus; -+ *devfn = dev_entry->devfn; -+ goto out; -+ } -+ } -+ } -+out: -+ spin_unlock_irqrestore(&dev_data->lock, flags); -+ return found; -+ -+} -+ -diff --git a/drivers/xen/pciback/passthrough.c b/drivers/xen/pciback/passthrough.c -new file mode 100644 -index 0000000..5386bebf ---- /dev/null -+++ b/drivers/xen/pciback/passthrough.c -@@ -0,0 +1,178 @@ -+/* -+ * PCI Backend - Provides restricted access to the real PCI bus topology -+ * to the frontend -+ * -+ * Author: Ryan Wilson <hap9@epoch.ncsc.mil> -+ */ -+ -+#include <linux/list.h> -+#include <linux/pci.h> -+#include <linux/spinlock.h> -+#include "pciback.h" -+ -+struct passthrough_dev_data { -+ /* Access to dev_list must be protected by lock */ -+ struct list_head dev_list; -+ spinlock_t lock; -+}; -+ -+struct pci_dev *pciback_get_pci_dev(struct pciback_device *pdev, -+ unsigned int domain, unsigned int bus, -+ unsigned int devfn) -+{ -+ struct passthrough_dev_data *dev_data = pdev->pci_dev_data; -+ struct pci_dev_entry *dev_entry; -+ struct pci_dev *dev = NULL; -+ unsigned long flags; -+ -+ spin_lock_irqsave(&dev_data->lock, flags); -+ -+ list_for_each_entry(dev_entry, &dev_data->dev_list, list) { -+ if (domain == (unsigned int)pci_domain_nr(dev_entry->dev->bus) -+ && bus == (unsigned int)dev_entry->dev->bus->number -+ && devfn == dev_entry->dev->devfn) { -+ dev = dev_entry->dev; -+ break; -+ } -+ } -+ -+ spin_unlock_irqrestore(&dev_data->lock, flags); -+ -+ return dev; -+} -+ -+int pciback_add_pci_dev(struct pciback_device *pdev, struct pci_dev *dev, -+ int devid, publish_pci_dev_cb publish_cb) -+{ -+ struct passthrough_dev_data *dev_data = pdev->pci_dev_data; -+ struct pci_dev_entry *dev_entry; -+ unsigned long flags; -+ unsigned int domain, bus, devfn; -+ int err; -+ -+ dev_entry = kmalloc(sizeof(*dev_entry), GFP_KERNEL); -+ if (!dev_entry) -+ return -ENOMEM; -+ dev_entry->dev = dev; -+ -+ spin_lock_irqsave(&dev_data->lock, flags); -+ list_add_tail(&dev_entry->list, &dev_data->dev_list); -+ spin_unlock_irqrestore(&dev_data->lock, flags); -+ -+ /* Publish this device. */ -+ domain = (unsigned int)pci_domain_nr(dev->bus); -+ bus = (unsigned int)dev->bus->number; -+ devfn = dev->devfn; -+ err = publish_cb(pdev, domain, bus, devfn, devid); -+ -+ return err; -+} -+ -+void pciback_release_pci_dev(struct pciback_device *pdev, struct pci_dev *dev) -+{ -+ struct passthrough_dev_data *dev_data = pdev->pci_dev_data; -+ struct pci_dev_entry *dev_entry, *t; -+ struct pci_dev *found_dev = NULL; -+ unsigned long flags; -+ -+ spin_lock_irqsave(&dev_data->lock, flags); -+ -+ list_for_each_entry_safe(dev_entry, t, &dev_data->dev_list, list) { -+ if (dev_entry->dev == dev) { -+ list_del(&dev_entry->list); -+ found_dev = dev_entry->dev; -+ kfree(dev_entry); -+ } -+ } -+ -+ spin_unlock_irqrestore(&dev_data->lock, flags); -+ -+ if (found_dev) -+ pcistub_put_pci_dev(found_dev); -+} -+ -+int pciback_init_devices(struct pciback_device *pdev) -+{ -+ struct passthrough_dev_data *dev_data; -+ -+ dev_data = kmalloc(sizeof(*dev_data), GFP_KERNEL); -+ if (!dev_data) -+ return -ENOMEM; -+ -+ spin_lock_init(&dev_data->lock); -+ -+ INIT_LIST_HEAD(&dev_data->dev_list); -+ -+ pdev->pci_dev_data = dev_data; -+ -+ return 0; -+} -+ -+int pciback_publish_pci_roots(struct pciback_device *pdev, -+ publish_pci_root_cb publish_root_cb) -+{ -+ int err = 0; -+ struct passthrough_dev_data *dev_data = pdev->pci_dev_data; -+ struct pci_dev_entry *dev_entry, *e; -+ struct pci_dev *dev; -+ int found; -+ unsigned int domain, bus; -+ -+ spin_lock(&dev_data->lock); -+ -+ list_for_each_entry(dev_entry, &dev_data->dev_list, list) { -+ /* Only publish this device as a root if none of its -+ * parent bridges are exported -+ */ -+ found = 0; -+ dev = dev_entry->dev->bus->self; -+ for (; !found && dev != NULL; dev = dev->bus->self) { -+ list_for_each_entry(e, &dev_data->dev_list, list) { -+ if (dev == e->dev) { -+ found = 1; -+ break; -+ } -+ } -+ } -+ -+ domain = (unsigned int)pci_domain_nr(dev_entry->dev->bus); -+ bus = (unsigned int)dev_entry->dev->bus->number; -+ -+ if (!found) { -+ err = publish_root_cb(pdev, domain, bus); -+ if (err) -+ break; -+ } -+ } -+ -+ spin_unlock(&dev_data->lock); -+ -+ return err; -+} -+ -+void pciback_release_devices(struct pciback_device *pdev) -+{ -+ struct passthrough_dev_data *dev_data = pdev->pci_dev_data; -+ struct pci_dev_entry *dev_entry, *t; -+ -+ list_for_each_entry_safe(dev_entry, t, &dev_data->dev_list, list) { -+ list_del(&dev_entry->list); -+ pcistub_put_pci_dev(dev_entry->dev); -+ kfree(dev_entry); -+ } -+ -+ kfree(dev_data); -+ pdev->pci_dev_data = NULL; -+} -+ -+int pciback_get_pcifront_dev(struct pci_dev *pcidev, -+ struct pciback_device *pdev, -+ unsigned int *domain, unsigned int *bus, -+ unsigned int *devfn) -+ -+{ -+ *domain = pci_domain_nr(pcidev->bus); -+ *bus = pcidev->bus->number; -+ *devfn = pcidev->devfn; -+ return 1; -+} -diff --git a/drivers/xen/pciback/pci_stub.c b/drivers/xen/pciback/pci_stub.c -new file mode 100644 -index 0000000..88c7ca1 ---- /dev/null -+++ b/drivers/xen/pciback/pci_stub.c -@@ -0,0 +1,1370 @@ -+/* -+ * PCI Stub Driver - Grabs devices in backend to be exported later -+ * -+ * Ryan Wilson <hap9@epoch.ncsc.mil> -+ * Chris Bookholt <hap10@epoch.ncsc.mil> -+ */ -+#include <linux/module.h> -+#include <linux/init.h> -+#include <linux/rwsem.h> -+#include <linux/list.h> -+#include <linux/spinlock.h> -+#include <linux/kref.h> -+#include <linux/pci.h> -+#include <linux/wait.h> -+#include <linux/sched.h> -+#include <asm/atomic.h> -+#include <xen/events.h> -+#include <asm/xen/pci.h> -+#include <asm/xen/hypervisor.h> -+#include "pciback.h" -+#include "conf_space.h" -+#include "conf_space_quirks.h" -+ -+#define DRV_NAME "pciback" -+ -+static char *pci_devs_to_hide; -+wait_queue_head_t aer_wait_queue; -+/*Add sem for sync AER handling and pciback remove/reconfigue ops, -+* We want to avoid in middle of AER ops, pciback devices is being removed -+*/ -+static DECLARE_RWSEM(pcistub_sem); -+module_param_named(hide, pci_devs_to_hide, charp, 0444); -+ -+struct pcistub_device_id { -+ struct list_head slot_list; -+ int domain; -+ unsigned char bus; -+ unsigned int devfn; -+}; -+static LIST_HEAD(pcistub_device_ids); -+static DEFINE_SPINLOCK(device_ids_lock); -+ -+struct pcistub_device { -+ struct kref kref; -+ struct list_head dev_list; -+ spinlock_t lock; -+ -+ struct pci_dev *dev; -+ struct pciback_device *pdev;/* non-NULL if struct pci_dev is in use */ -+}; -+ -+/* Access to pcistub_devices & seized_devices lists and the initialize_devices -+ * flag must be locked with pcistub_devices_lock -+ */ -+static DEFINE_SPINLOCK(pcistub_devices_lock); -+static LIST_HEAD(pcistub_devices); -+ -+/* wait for device_initcall before initializing our devices -+ * (see pcistub_init_devices_late) -+ */ -+static int initialize_devices; -+static LIST_HEAD(seized_devices); -+ -+static struct pcistub_device *pcistub_device_alloc(struct pci_dev *dev) -+{ -+ struct pcistub_device *psdev; -+ -+ dev_dbg(&dev->dev, "pcistub_device_alloc\n"); -+ -+ psdev = kzalloc(sizeof(*psdev), GFP_ATOMIC); -+ if (!psdev) -+ return NULL; -+ -+ psdev->dev = pci_dev_get(dev); -+ if (!psdev->dev) { -+ kfree(psdev); -+ return NULL; -+ } -+ -+ kref_init(&psdev->kref); -+ spin_lock_init(&psdev->lock); -+ -+ return psdev; -+} -+ -+/* Don't call this directly as it's called by pcistub_device_put */ -+static void pcistub_device_release(struct kref *kref) -+{ -+ struct pcistub_device *psdev; -+ -+ psdev = container_of(kref, struct pcistub_device, kref); -+ -+ dev_dbg(&psdev->dev->dev, "pcistub_device_release\n"); -+ -+ xen_unregister_device_domain_owner(psdev->dev); -+ -+ /* Clean-up the device */ -+ pciback_reset_device(psdev->dev); -+ pciback_config_free_dyn_fields(psdev->dev); -+ pciback_config_free_dev(psdev->dev); -+ kfree(pci_get_drvdata(psdev->dev)); -+ pci_set_drvdata(psdev->dev, NULL); -+ -+ pci_dev_put(psdev->dev); -+ -+ kfree(psdev); -+} -+ -+static inline void pcistub_device_get(struct pcistub_device *psdev) -+{ -+ kref_get(&psdev->kref); -+} -+ -+static inline void pcistub_device_put(struct pcistub_device *psdev) -+{ -+ kref_put(&psdev->kref, pcistub_device_release); -+} -+ -+static struct pcistub_device *pcistub_device_find(int domain, int bus, -+ int slot, int func) -+{ -+ struct pcistub_device *psdev = NULL; -+ unsigned long flags; -+ -+ spin_lock_irqsave(&pcistub_devices_lock, flags); -+ -+ list_for_each_entry(psdev, &pcistub_devices, dev_list) { -+ if (psdev->dev != NULL -+ && domain == pci_domain_nr(psdev->dev->bus) -+ && bus == psdev->dev->bus->number -+ && PCI_DEVFN(slot, func) == psdev->dev->devfn) { -+ pcistub_device_get(psdev); -+ goto out; -+ } -+ } -+ -+ /* didn't find it */ -+ psdev = NULL; -+ -+out: -+ spin_unlock_irqrestore(&pcistub_devices_lock, flags); -+ return psdev; -+} -+ -+static struct pci_dev *pcistub_device_get_pci_dev(struct pciback_device *pdev, -+ struct pcistub_device *psdev) -+{ -+ struct pci_dev *pci_dev = NULL; -+ unsigned long flags; -+ -+ pcistub_device_get(psdev); -+ -+ spin_lock_irqsave(&psdev->lock, flags); -+ if (!psdev->pdev) { -+ psdev->pdev = pdev; -+ pci_dev = psdev->dev; -+ } -+ spin_unlock_irqrestore(&psdev->lock, flags); -+ -+ if (!pci_dev) -+ pcistub_device_put(psdev); -+ -+ return pci_dev; -+} -+ -+struct pci_dev *pcistub_get_pci_dev_by_slot(struct pciback_device *pdev, -+ int domain, int bus, -+ int slot, int func) -+{ -+ struct pcistub_device *psdev; -+ struct pci_dev *found_dev = NULL; -+ unsigned long flags; -+ -+ spin_lock_irqsave(&pcistub_devices_lock, flags); -+ -+ list_for_each_entry(psdev, &pcistub_devices, dev_list) { -+ if (psdev->dev != NULL -+ && domain == pci_domain_nr(psdev->dev->bus) -+ && bus == psdev->dev->bus->number -+ && PCI_DEVFN(slot, func) == psdev->dev->devfn) { -+ found_dev = pcistub_device_get_pci_dev(pdev, psdev); -+ break; -+ } -+ } -+ -+ spin_unlock_irqrestore(&pcistub_devices_lock, flags); -+ return found_dev; -+} -+ -+struct pci_dev *pcistub_get_pci_dev(struct pciback_device *pdev, -+ struct pci_dev *dev) -+{ -+ struct pcistub_device *psdev; -+ struct pci_dev *found_dev = NULL; -+ unsigned long flags; -+ -+ spin_lock_irqsave(&pcistub_devices_lock, flags); -+ -+ list_for_each_entry(psdev, &pcistub_devices, dev_list) { -+ if (psdev->dev == dev) { -+ found_dev = pcistub_device_get_pci_dev(pdev, psdev); -+ break; -+ } -+ } -+ -+ spin_unlock_irqrestore(&pcistub_devices_lock, flags); -+ return found_dev; -+} -+ -+void pcistub_put_pci_dev(struct pci_dev *dev) -+{ -+ struct pcistub_device *psdev, *found_psdev = NULL; -+ unsigned long flags; -+ -+ spin_lock_irqsave(&pcistub_devices_lock, flags); -+ -+ list_for_each_entry(psdev, &pcistub_devices, dev_list) { -+ if (psdev->dev == dev) { -+ found_psdev = psdev; -+ break; -+ } -+ } -+ -+ spin_unlock_irqrestore(&pcistub_devices_lock, flags); -+ -+ /*hold this lock for avoiding breaking link between -+ * pcistub and pciback when AER is in processing -+ */ -+ down_write(&pcistub_sem); -+ /* Cleanup our device -+ * (so it's ready for the next domain) -+ */ -+ pciback_reset_device(found_psdev->dev); -+ pciback_config_free_dyn_fields(found_psdev->dev); -+ pciback_config_reset_dev(found_psdev->dev); -+ -+ spin_lock_irqsave(&found_psdev->lock, flags); -+ found_psdev->pdev = NULL; -+ spin_unlock_irqrestore(&found_psdev->lock, flags); -+ -+ pcistub_device_put(found_psdev); -+ up_write(&pcistub_sem); -+} -+ -+static int __devinit pcistub_match_one(struct pci_dev *dev, -+ struct pcistub_device_id *pdev_id) -+{ -+ /* Match the specified device by domain, bus, slot, func and also if -+ * any of the device's parent bridges match. -+ */ -+ for (; dev != NULL; dev = dev->bus->self) { -+ if (pci_domain_nr(dev->bus) == pdev_id->domain -+ && dev->bus->number == pdev_id->bus -+ && dev->devfn == pdev_id->devfn) -+ return 1; -+ -+ /* Sometimes topmost bridge links to itself. */ -+ if (dev == dev->bus->self) -+ break; -+ } -+ -+ return 0; -+} -+ -+static int __devinit pcistub_match(struct pci_dev *dev) -+{ -+ struct pcistub_device_id *pdev_id; -+ unsigned long flags; -+ int found = 0; -+ -+ spin_lock_irqsave(&device_ids_lock, flags); -+ list_for_each_entry(pdev_id, &pcistub_device_ids, slot_list) { -+ if (pcistub_match_one(dev, pdev_id)) { -+ found = 1; -+ break; -+ } -+ } -+ spin_unlock_irqrestore(&device_ids_lock, flags); -+ -+ return found; -+} -+ -+static int __devinit pcistub_init_device(struct pci_dev *dev) -+{ -+ struct pciback_dev_data *dev_data; -+ int err = 0; -+ -+ dev_dbg(&dev->dev, "initializing...\n"); -+ -+ /* The PCI backend is not intended to be a module (or to work with -+ * removable PCI devices (yet). If it were, pciback_config_free() -+ * would need to be called somewhere to free the memory allocated -+ * here and then to call kfree(pci_get_drvdata(psdev->dev)). -+ */ -+ dev_data = kzalloc(sizeof(*dev_data) + strlen(DRV_NAME "[]") -+ + strlen(pci_name(dev)) + 1, GFP_ATOMIC); -+ if (!dev_data) { -+ err = -ENOMEM; -+ goto out; -+ } -+ pci_set_drvdata(dev, dev_data); -+ -+ /* -+ * Setup name for fake IRQ handler. It will only be enabled -+ * once the device is turned on by the guest. -+ */ -+ sprintf(dev_data->irq_name, DRV_NAME "[%s]", pci_name(dev)); -+ -+ dev_dbg(&dev->dev, "initializing config\n"); -+ -+ init_waitqueue_head(&aer_wait_queue); -+ err = pciback_config_init_dev(dev); -+ if (err) -+ goto out; -+ -+ /* HACK: Force device (& ACPI) to determine what IRQ it's on - we -+ * must do this here because pcibios_enable_device may specify -+ * the pci device's true irq (and possibly its other resources) -+ * if they differ from what's in the configuration space. -+ * This makes the assumption that the device's resources won't -+ * change after this point (otherwise this code may break!) -+ */ -+ dev_dbg(&dev->dev, "enabling device\n"); -+ err = pci_enable_device(dev); -+ if (err) -+ goto config_release; -+ -+ /* Now disable the device (this also ensures some private device -+ * data is setup before we export) -+ */ -+ dev_dbg(&dev->dev, "reset device\n"); -+ pciback_reset_device(dev); -+ -+ return 0; -+ -+config_release: -+ pciback_config_free_dev(dev); -+ -+out: -+ pci_set_drvdata(dev, NULL); -+ kfree(dev_data); -+ return err; -+} -+ -+/* -+ * Because some initialization still happens on -+ * devices during fs_initcall, we need to defer -+ * full initialization of our devices until -+ * device_initcall. -+ */ -+static int __init pcistub_init_devices_late(void) -+{ -+ struct pcistub_device *psdev; -+ unsigned long flags; -+ int err = 0; -+ -+ pr_debug("pciback: pcistub_init_devices_late\n"); -+ -+ spin_lock_irqsave(&pcistub_devices_lock, flags); -+ -+ while (!list_empty(&seized_devices)) { -+ psdev = container_of(seized_devices.next, -+ struct pcistub_device, dev_list); -+ list_del(&psdev->dev_list); -+ -+ spin_unlock_irqrestore(&pcistub_devices_lock, flags); -+ -+ err = pcistub_init_device(psdev->dev); -+ if (err) { -+ dev_err(&psdev->dev->dev, -+ "error %d initializing device\n", err); -+ kfree(psdev); -+ psdev = NULL; -+ } -+ -+ spin_lock_irqsave(&pcistub_devices_lock, flags); -+ -+ if (psdev) -+ list_add_tail(&psdev->dev_list, &pcistub_devices); -+ } -+ -+ initialize_devices = 1; -+ -+ spin_unlock_irqrestore(&pcistub_devices_lock, flags); -+ -+ return 0; -+} -+ -+static int __devinit pcistub_seize(struct pci_dev *dev) -+{ -+ struct pcistub_device *psdev; -+ unsigned long flags; -+ int err = 0; -+ -+ psdev = pcistub_device_alloc(dev); -+ if (!psdev) -+ return -ENOMEM; -+ -+ spin_lock_irqsave(&pcistub_devices_lock, flags); -+ -+ if (initialize_devices) { -+ spin_unlock_irqrestore(&pcistub_devices_lock, flags); -+ -+ /* don't want irqs disabled when calling pcistub_init_device */ -+ err = pcistub_init_device(psdev->dev); -+ -+ spin_lock_irqsave(&pcistub_devices_lock, flags); -+ -+ if (!err) -+ list_add(&psdev->dev_list, &pcistub_devices); -+ } else { -+ dev_dbg(&dev->dev, "deferring initialization\n"); -+ list_add(&psdev->dev_list, &seized_devices); -+ } -+ -+ spin_unlock_irqrestore(&pcistub_devices_lock, flags); -+ -+ if (err) -+ pcistub_device_put(psdev); -+ -+ return err; -+} -+ -+static int __devinit pcistub_probe(struct pci_dev *dev, -+ const struct pci_device_id *id) -+{ -+ int err = 0; -+ -+ dev_dbg(&dev->dev, "probing...\n"); -+ -+ if (pcistub_match(dev)) { -+ -+ if (dev->hdr_type != PCI_HEADER_TYPE_NORMAL -+ && dev->hdr_type != PCI_HEADER_TYPE_BRIDGE) { -+ dev_err(&dev->dev, "can't export pci devices that " -+ "don't have a normal (0) or bridge (1) " -+ "header type!\n"); -+ err = -ENODEV; -+ goto out; -+ } -+ -+ dev_info(&dev->dev, "seizing device\n"); -+ err = pcistub_seize(dev); -+ } else -+ /* Didn't find the device */ -+ err = -ENODEV; -+ -+out: -+ return err; -+} -+ -+static void pcistub_remove(struct pci_dev *dev) -+{ -+ struct pcistub_device *psdev, *found_psdev = NULL; -+ unsigned long flags; -+ -+ dev_dbg(&dev->dev, "removing\n"); -+ -+ spin_lock_irqsave(&pcistub_devices_lock, flags); -+ -+ pciback_config_quirk_release(dev); -+ -+ list_for_each_entry(psdev, &pcistub_devices, dev_list) { -+ if (psdev->dev == dev) { -+ found_psdev = psdev; -+ break; -+ } -+ } -+ -+ spin_unlock_irqrestore(&pcistub_devices_lock, flags); -+ -+ if (found_psdev) { -+ dev_dbg(&dev->dev, "found device to remove - in use? %p\n", -+ found_psdev->pdev); -+ -+ if (found_psdev->pdev) { -+ printk(KERN_WARNING "pciback: ****** removing device " -+ "%s while still in-use! ******\n", -+ pci_name(found_psdev->dev)); -+ printk(KERN_WARNING "pciback: ****** driver domain may " -+ "still access this device's i/o resources!\n"); -+ printk(KERN_WARNING "pciback: ****** shutdown driver " -+ "domain before binding device\n"); -+ printk(KERN_WARNING "pciback: ****** to other drivers " -+ "or domains\n"); -+ -+ pciback_release_pci_dev(found_psdev->pdev, -+ found_psdev->dev); -+ } -+ -+ spin_lock_irqsave(&pcistub_devices_lock, flags); -+ list_del(&found_psdev->dev_list); -+ spin_unlock_irqrestore(&pcistub_devices_lock, flags); -+ -+ /* the final put for releasing from the list */ -+ pcistub_device_put(found_psdev); -+ } -+} -+ -+static const struct pci_device_id pcistub_ids[] = { -+ { -+ .vendor = PCI_ANY_ID, -+ .device = PCI_ANY_ID, -+ .subvendor = PCI_ANY_ID, -+ .subdevice = PCI_ANY_ID, -+ }, -+ {0,}, -+}; -+ -+#define PCI_NODENAME_MAX 40 -+static void kill_domain_by_device(struct pcistub_device *psdev) -+{ -+ struct xenbus_transaction xbt; -+ int err; -+ char nodename[PCI_NODENAME_MAX]; -+ -+ if (!psdev) -+ dev_err(&psdev->dev->dev, -+ "device is NULL when do AER recovery/kill_domain\n"); -+ snprintf(nodename, PCI_NODENAME_MAX, "/local/domain/0/backend/pci/%d/0", -+ psdev->pdev->xdev->otherend_id); -+ nodename[strlen(nodename)] = '\0'; -+ -+again: -+ err = xenbus_transaction_start(&xbt); -+ if (err) { -+ dev_err(&psdev->dev->dev, -+ "error %d when start xenbus transaction\n", err); -+ return; -+ } -+ /*PV AER handlers will set this flag*/ -+ xenbus_printf(xbt, nodename, "aerState" , "aerfail"); -+ err = xenbus_transaction_end(xbt, 0); -+ if (err) { -+ if (err == -EAGAIN) -+ goto again; -+ dev_err(&psdev->dev->dev, -+ "error %d when end xenbus transaction\n", err); -+ return; -+ } -+} -+ -+/* For each aer recovery step error_detected, mmio_enabled, etc, front_end and -+ * backend need to have cooperation. In pciback, those steps will do similar -+ * jobs: send service request and waiting for front_end response. -+*/ -+static pci_ers_result_t common_process(struct pcistub_device *psdev, -+ pci_channel_state_t state, int aer_cmd, pci_ers_result_t result) -+{ -+ pci_ers_result_t res = result; -+ struct xen_pcie_aer_op *aer_op; -+ int ret; -+ -+ /*with PV AER drivers*/ -+ aer_op = &(psdev->pdev->sh_info->aer_op); -+ aer_op->cmd = aer_cmd ; -+ /*useful for error_detected callback*/ -+ aer_op->err = state; -+ /*pcifront_end BDF*/ -+ ret = pciback_get_pcifront_dev(psdev->dev, psdev->pdev, -+ &aer_op->domain, &aer_op->bus, &aer_op->devfn); -+ if (!ret) { -+ dev_err(&psdev->dev->dev, -+ "pciback: failed to get pcifront device\n"); -+ return PCI_ERS_RESULT_NONE; -+ } -+ wmb(); -+ -+ dev_dbg(&psdev->dev->dev, -+ "pciback: aer_op %x dom %x bus %x devfn %x\n", -+ aer_cmd, aer_op->domain, aer_op->bus, aer_op->devfn); -+ /*local flag to mark there's aer request, pciback callback will use this -+ * flag to judge whether we need to check pci-front give aer service -+ * ack signal -+ */ -+ set_bit(_PCIB_op_pending, (unsigned long *)&psdev->pdev->flags); -+ -+ /*It is possible that a pcifront conf_read_write ops request invokes -+ * the callback which cause the spurious execution of wake_up. -+ * Yet it is harmless and better than a spinlock here -+ */ -+ set_bit(_XEN_PCIB_active, -+ (unsigned long *)&psdev->pdev->sh_info->flags); -+ wmb(); -+ notify_remote_via_irq(psdev->pdev->evtchn_irq); -+ -+ ret = wait_event_timeout(aer_wait_queue, !(test_bit(_XEN_PCIB_active, -+ (unsigned long *)&psdev->pdev->sh_info->flags)), 300*HZ); -+ -+ if (!ret) { -+ if (test_bit(_XEN_PCIB_active, -+ (unsigned long *)&psdev->pdev->sh_info->flags)) { -+ dev_err(&psdev->dev->dev, -+ "pcifront aer process not responding!\n"); -+ clear_bit(_XEN_PCIB_active, -+ (unsigned long *)&psdev->pdev->sh_info->flags); -+ aer_op->err = PCI_ERS_RESULT_NONE; -+ return res; -+ } -+ } -+ clear_bit(_PCIB_op_pending, (unsigned long *)&psdev->pdev->flags); -+ -+ if (test_bit(_XEN_PCIF_active, -+ (unsigned long *)&psdev->pdev->sh_info->flags)) { -+ dev_dbg(&psdev->dev->dev, -+ "schedule pci_conf service in pciback \n"); -+ test_and_schedule_op(psdev->pdev); -+ } -+ -+ res = (pci_ers_result_t)aer_op->err; -+ return res; -+} -+ -+/* -+* pciback_slot_reset: it will send the slot_reset request to pcifront in case -+* of the device driver could provide this service, and then wait for pcifront -+* ack. -+* @dev: pointer to PCI devices -+* return value is used by aer_core do_recovery policy -+*/ -+static pci_ers_result_t pciback_slot_reset(struct pci_dev *dev) -+{ -+ struct pcistub_device *psdev; -+ pci_ers_result_t result; -+ -+ result = PCI_ERS_RESULT_RECOVERED; -+ dev_dbg(&dev->dev, "pciback_slot_reset(bus:%x,devfn:%x)\n", -+ dev->bus->number, dev->devfn); -+ -+ down_write(&pcistub_sem); -+ psdev = pcistub_device_find(pci_domain_nr(dev->bus), -+ dev->bus->number, -+ PCI_SLOT(dev->devfn), -+ PCI_FUNC(dev->devfn)); -+ -+ if (!psdev || !psdev->pdev) { -+ dev_err(&dev->dev, -+ "pciback device is not found/assigned\n"); -+ goto end; -+ } -+ -+ if (!psdev->pdev->sh_info) { -+ dev_err(&dev->dev, "pciback device is not connected or owned" -+ " by HVM, kill it\n"); -+ kill_domain_by_device(psdev); -+ goto release; -+ } -+ -+ if (!test_bit(_XEN_PCIB_AERHANDLER, -+ (unsigned long *)&psdev->pdev->sh_info->flags)) { -+ dev_err(&dev->dev, -+ "guest with no AER driver should have been killed\n"); -+ goto release; -+ } -+ result = common_process(psdev, 1, XEN_PCI_OP_aer_slotreset, result); -+ -+ if (result == PCI_ERS_RESULT_NONE || -+ result == PCI_ERS_RESULT_DISCONNECT) { -+ dev_dbg(&dev->dev, -+ "No AER slot_reset service or disconnected!\n"); -+ kill_domain_by_device(psdev); -+ } -+release: -+ pcistub_device_put(psdev); -+end: -+ up_write(&pcistub_sem); -+ return result; -+ -+} -+ -+ -+/*pciback_mmio_enabled: it will send the mmio_enabled request to pcifront -+* in case of the device driver could provide this service, and then wait -+* for pcifront ack -+* @dev: pointer to PCI devices -+* return value is used by aer_core do_recovery policy -+*/ -+ -+static pci_ers_result_t pciback_mmio_enabled(struct pci_dev *dev) -+{ -+ struct pcistub_device *psdev; -+ pci_ers_result_t result; -+ -+ result = PCI_ERS_RESULT_RECOVERED; -+ dev_dbg(&dev->dev, "pciback_mmio_enabled(bus:%x,devfn:%x)\n", -+ dev->bus->number, dev->devfn); -+ -+ down_write(&pcistub_sem); -+ psdev = pcistub_device_find(pci_domain_nr(dev->bus), -+ dev->bus->number, -+ PCI_SLOT(dev->devfn), -+ PCI_FUNC(dev->devfn)); -+ -+ if (!psdev || !psdev->pdev) { -+ dev_err(&dev->dev, -+ "pciback device is not found/assigned\n"); -+ goto end; -+ } -+ -+ if (!psdev->pdev->sh_info) { -+ dev_err(&dev->dev, "pciback device is not connected or owned" -+ " by HVM, kill it\n"); -+ kill_domain_by_device(psdev); -+ goto release; -+ } -+ -+ if (!test_bit(_XEN_PCIB_AERHANDLER, -+ (unsigned long *)&psdev->pdev->sh_info->flags)) { -+ dev_err(&dev->dev, -+ "guest with no AER driver should have been killed\n"); -+ goto release; -+ } -+ result = common_process(psdev, 1, XEN_PCI_OP_aer_mmio, result); -+ -+ if (result == PCI_ERS_RESULT_NONE || -+ result == PCI_ERS_RESULT_DISCONNECT) { -+ dev_dbg(&dev->dev, -+ "No AER mmio_enabled service or disconnected!\n"); -+ kill_domain_by_device(psdev); -+ } -+release: -+ pcistub_device_put(psdev); -+end: -+ up_write(&pcistub_sem); -+ return result; -+} -+ -+/*pciback_error_detected: it will send the error_detected request to pcifront -+* in case of the device driver could provide this service, and then wait -+* for pcifront ack. -+* @dev: pointer to PCI devices -+* @error: the current PCI connection state -+* return value is used by aer_core do_recovery policy -+*/ -+ -+static pci_ers_result_t pciback_error_detected(struct pci_dev *dev, -+ pci_channel_state_t error) -+{ -+ struct pcistub_device *psdev; -+ pci_ers_result_t result; -+ -+ result = PCI_ERS_RESULT_CAN_RECOVER; -+ dev_dbg(&dev->dev, "pciback_error_detected(bus:%x,devfn:%x)\n", -+ dev->bus->number, dev->devfn); -+ -+ down_write(&pcistub_sem); -+ psdev = pcistub_device_find(pci_domain_nr(dev->bus), -+ dev->bus->number, -+ PCI_SLOT(dev->devfn), -+ PCI_FUNC(dev->devfn)); -+ -+ if (!psdev || !psdev->pdev) { -+ dev_err(&dev->dev, -+ "pciback device is not found/assigned\n"); -+ goto end; -+ } -+ -+ if (!psdev->pdev->sh_info) { -+ dev_err(&dev->dev, "pciback device is not connected or owned" -+ " by HVM, kill it\n"); -+ kill_domain_by_device(psdev); -+ goto release; -+ } -+ -+ /*Guest owns the device yet no aer handler regiested, kill guest*/ -+ if (!test_bit(_XEN_PCIB_AERHANDLER, -+ (unsigned long *)&psdev->pdev->sh_info->flags)) { -+ dev_dbg(&dev->dev, "guest may have no aer driver, kill it\n"); -+ kill_domain_by_device(psdev); -+ goto release; -+ } -+ result = common_process(psdev, error, XEN_PCI_OP_aer_detected, result); -+ -+ if (result == PCI_ERS_RESULT_NONE || -+ result == PCI_ERS_RESULT_DISCONNECT) { -+ dev_dbg(&dev->dev, -+ "No AER error_detected service or disconnected!\n"); -+ kill_domain_by_device(psdev); -+ } -+release: -+ pcistub_device_put(psdev); -+end: -+ up_write(&pcistub_sem); -+ return result; -+} -+ -+/*pciback_error_resume: it will send the error_resume request to pcifront -+* in case of the device driver could provide this service, and then wait -+* for pcifront ack. -+* @dev: pointer to PCI devices -+*/ -+ -+static void pciback_error_resume(struct pci_dev *dev) -+{ -+ struct pcistub_device *psdev; -+ -+ dev_dbg(&dev->dev, "pciback_error_resume(bus:%x,devfn:%x)\n", -+ dev->bus->number, dev->devfn); -+ -+ down_write(&pcistub_sem); -+ psdev = pcistub_device_find(pci_domain_nr(dev->bus), -+ dev->bus->number, -+ PCI_SLOT(dev->devfn), -+ PCI_FUNC(dev->devfn)); -+ -+ if (!psdev || !psdev->pdev) { -+ dev_err(&dev->dev, -+ "pciback device is not found/assigned\n"); -+ goto end; -+ } -+ -+ if (!psdev->pdev->sh_info) { -+ dev_err(&dev->dev, "pciback device is not connected or owned" -+ " by HVM, kill it\n"); -+ kill_domain_by_device(psdev); -+ goto release; -+ } -+ -+ if (!test_bit(_XEN_PCIB_AERHANDLER, -+ (unsigned long *)&psdev->pdev->sh_info->flags)) { -+ dev_err(&dev->dev, -+ "guest with no AER driver should have been killed\n"); -+ kill_domain_by_device(psdev); -+ goto release; -+ } -+ common_process(psdev, 1, XEN_PCI_OP_aer_resume, -+ PCI_ERS_RESULT_RECOVERED); -+release: -+ pcistub_device_put(psdev); -+end: -+ up_write(&pcistub_sem); -+ return; -+} -+ -+/*add pciback AER handling*/ -+static struct pci_error_handlers pciback_error_handler = { -+ .error_detected = pciback_error_detected, -+ .mmio_enabled = pciback_mmio_enabled, -+ .slot_reset = pciback_slot_reset, -+ .resume = pciback_error_resume, -+}; -+ -+/* -+ * Note: There is no MODULE_DEVICE_TABLE entry here because this isn't -+ * for a normal device. I don't want it to be loaded automatically. -+ */ -+ -+static struct pci_driver pciback_pci_driver = { -+ .name = DRV_NAME, -+ .id_table = pcistub_ids, -+ .probe = pcistub_probe, -+ .remove = pcistub_remove, -+ .err_handler = &pciback_error_handler, -+}; -+ -+static inline int str_to_slot(const char *buf, int *domain, int *bus, -+ int *slot, int *func) -+{ -+ int err; -+ -+ err = sscanf(buf, " %x:%x:%x.%x", domain, bus, slot, func); -+ if (err == 4) -+ return 0; -+ else if (err < 0) -+ return -EINVAL; -+ -+ /* try again without domain */ -+ *domain = 0; -+ err = sscanf(buf, " %x:%x.%x", bus, slot, func); -+ if (err == 3) -+ return 0; -+ -+ return -EINVAL; -+} -+ -+static inline int str_to_quirk(const char *buf, int *domain, int *bus, int -+ *slot, int *func, int *reg, int *size, int *mask) -+{ -+ int err; -+ -+ err = -+ sscanf(buf, " %04x:%02x:%02x.%1x-%08x:%1x:%08x", domain, bus, slot, -+ func, reg, size, mask); -+ if (err == 7) -+ return 0; -+ return -EINVAL; -+} -+ -+static int pcistub_device_id_add(int domain, int bus, int slot, int func) -+{ -+ struct pcistub_device_id *pci_dev_id; -+ unsigned long flags; -+ -+ pci_dev_id = kmalloc(sizeof(*pci_dev_id), GFP_KERNEL); -+ if (!pci_dev_id) -+ return -ENOMEM; -+ -+ pci_dev_id->domain = domain; -+ pci_dev_id->bus = bus; -+ pci_dev_id->devfn = PCI_DEVFN(slot, func); -+ -+ pr_debug("pciback: wants to seize %04x:%02x:%02x.%01x\n", -+ domain, bus, slot, func); -+ -+ spin_lock_irqsave(&device_ids_lock, flags); -+ list_add_tail(&pci_dev_id->slot_list, &pcistub_device_ids); -+ spin_unlock_irqrestore(&device_ids_lock, flags); -+ -+ return 0; -+} -+ -+static int pcistub_device_id_remove(int domain, int bus, int slot, int func) -+{ -+ struct pcistub_device_id *pci_dev_id, *t; -+ int devfn = PCI_DEVFN(slot, func); -+ int err = -ENOENT; -+ unsigned long flags; -+ -+ spin_lock_irqsave(&device_ids_lock, flags); -+ list_for_each_entry_safe(pci_dev_id, t, &pcistub_device_ids, -+ slot_list) { -+ if (pci_dev_id->domain == domain -+ && pci_dev_id->bus == bus && pci_dev_id->devfn == devfn) { -+ /* Don't break; here because it's possible the same -+ * slot could be in the list more than once -+ */ -+ list_del(&pci_dev_id->slot_list); -+ kfree(pci_dev_id); -+ -+ err = 0; -+ -+ pr_debug("pciback: removed %04x:%02x:%02x.%01x from " -+ "seize list\n", domain, bus, slot, func); -+ } -+ } -+ spin_unlock_irqrestore(&device_ids_lock, flags); -+ -+ return err; -+} -+ -+static int pcistub_reg_add(int domain, int bus, int slot, int func, int reg, -+ int size, int mask) -+{ -+ int err = 0; -+ struct pcistub_device *psdev; -+ struct pci_dev *dev; -+ struct config_field *field; -+ -+ psdev = pcistub_device_find(domain, bus, slot, func); -+ if (!psdev || !psdev->dev) { -+ err = -ENODEV; -+ goto out; -+ } -+ dev = psdev->dev; -+ -+ field = kzalloc(sizeof(*field), GFP_ATOMIC); -+ if (!field) { -+ err = -ENOMEM; -+ goto out; -+ } -+ -+ field->offset = reg; -+ field->size = size; -+ field->mask = mask; -+ field->init = NULL; -+ field->reset = NULL; -+ field->release = NULL; -+ field->clean = pciback_config_field_free; -+ -+ err = pciback_config_quirks_add_field(dev, field); -+ if (err) -+ kfree(field); -+out: -+ return err; -+} -+ -+static ssize_t pcistub_slot_add(struct device_driver *drv, const char *buf, -+ size_t count) -+{ -+ int domain, bus, slot, func; -+ int err; -+ -+ err = str_to_slot(buf, &domain, &bus, &slot, &func); -+ if (err) -+ goto out; -+ -+ err = pcistub_device_id_add(domain, bus, slot, func); -+ -+out: -+ if (!err) -+ err = count; -+ return err; -+} -+ -+DRIVER_ATTR(new_slot, S_IWUSR, NULL, pcistub_slot_add); -+ -+static ssize_t pcistub_slot_remove(struct device_driver *drv, const char *buf, -+ size_t count) -+{ -+ int domain, bus, slot, func; -+ int err; -+ -+ err = str_to_slot(buf, &domain, &bus, &slot, &func); -+ if (err) -+ goto out; -+ -+ err = pcistub_device_id_remove(domain, bus, slot, func); -+ -+out: -+ if (!err) -+ err = count; -+ return err; -+} -+ -+DRIVER_ATTR(remove_slot, S_IWUSR, NULL, pcistub_slot_remove); -+ -+static ssize_t pcistub_slot_show(struct device_driver *drv, char *buf) -+{ -+ struct pcistub_device_id *pci_dev_id; -+ size_t count = 0; -+ unsigned long flags; -+ -+ spin_lock_irqsave(&device_ids_lock, flags); -+ list_for_each_entry(pci_dev_id, &pcistub_device_ids, slot_list) { -+ if (count >= PAGE_SIZE) -+ break; -+ -+ count += scnprintf(buf + count, PAGE_SIZE - count, -+ "%04x:%02x:%02x.%01x\n", -+ pci_dev_id->domain, pci_dev_id->bus, -+ PCI_SLOT(pci_dev_id->devfn), -+ PCI_FUNC(pci_dev_id->devfn)); -+ } -+ spin_unlock_irqrestore(&device_ids_lock, flags); -+ -+ return count; -+} -+ -+DRIVER_ATTR(slots, S_IRUSR, pcistub_slot_show, NULL); -+ -+static ssize_t pcistub_irq_handler_show(struct device_driver *drv, char *buf) -+{ -+ struct pcistub_device *psdev; -+ struct pciback_dev_data *dev_data; -+ size_t count = 0; -+ unsigned long flags; -+ -+ spin_lock_irqsave(&pcistub_devices_lock, flags); -+ list_for_each_entry(psdev, &pcistub_devices, dev_list) { -+ if (count >= PAGE_SIZE) -+ break; -+ if (!psdev->dev) -+ continue; -+ dev_data = pci_get_drvdata(psdev->dev); -+ if (!dev_data) -+ continue; -+ count += -+ scnprintf(buf + count, PAGE_SIZE - count, "%s:%s:%sing:%ld\n", -+ pci_name(psdev->dev), -+ dev_data->isr_on ? "on" : "off", -+ dev_data->ack_intr ? "ack" : "not ack", -+ dev_data->handled); -+ } -+ spin_unlock_irqrestore(&pcistub_devices_lock, flags); -+ return count; -+} -+ -+DRIVER_ATTR(irq_handlers, S_IRUSR, pcistub_irq_handler_show, NULL); -+ -+static ssize_t pcistub_irq_handler_switch(struct device_driver *drv, -+ const char *buf, -+ size_t count) -+{ -+ struct pcistub_device *psdev; -+ struct pciback_dev_data *dev_data; -+ int domain, bus, slot, func; -+ int err = -ENOENT; -+ -+ err = str_to_slot(buf, &domain, &bus, &slot, &func); -+ if (err) -+ goto out; -+ -+ psdev = pcistub_device_find(domain, bus, slot, func); -+ -+ if (!psdev) -+ goto out; -+ -+ dev_data = pci_get_drvdata(psdev->dev); -+ if (!dev_data) -+ goto out; -+ -+ dev_dbg(&psdev->dev->dev, "%s fake irq handler: %d->%d\n", -+ dev_data->irq_name, dev_data->isr_on, -+ !dev_data->isr_on); -+ -+ dev_data->isr_on = !(dev_data->isr_on); -+ if (dev_data->isr_on) -+ dev_data->ack_intr = 1; -+out: -+ if (!err) -+ err = count; -+ return err; -+} -+DRIVER_ATTR(irq_handler_state, S_IWUSR, NULL, pcistub_irq_handler_switch); -+ -+static ssize_t pcistub_quirk_add(struct device_driver *drv, const char *buf, -+ size_t count) -+{ -+ int domain, bus, slot, func, reg, size, mask; -+ int err; -+ -+ err = str_to_quirk(buf, &domain, &bus, &slot, &func, ®, &size, -+ &mask); -+ if (err) -+ goto out; -+ -+ err = pcistub_reg_add(domain, bus, slot, func, reg, size, mask); -+ -+out: -+ if (!err) -+ err = count; -+ return err; -+} -+ -+static ssize_t pcistub_quirk_show(struct device_driver *drv, char *buf) -+{ -+ int count = 0; -+ unsigned long flags; -+ struct pciback_config_quirk *quirk; -+ struct pciback_dev_data *dev_data; -+ const struct config_field *field; -+ const struct config_field_entry *cfg_entry; -+ -+ spin_lock_irqsave(&device_ids_lock, flags); -+ list_for_each_entry(quirk, &pciback_quirks, quirks_list) { -+ if (count >= PAGE_SIZE) -+ goto out; -+ -+ count += scnprintf(buf + count, PAGE_SIZE - count, -+ "%02x:%02x.%01x\n\t%04x:%04x:%04x:%04x\n", -+ quirk->pdev->bus->number, -+ PCI_SLOT(quirk->pdev->devfn), -+ PCI_FUNC(quirk->pdev->devfn), -+ quirk->devid.vendor, quirk->devid.device, -+ quirk->devid.subvendor, -+ quirk->devid.subdevice); -+ -+ dev_data = pci_get_drvdata(quirk->pdev); -+ -+ list_for_each_entry(cfg_entry, &dev_data->config_fields, list) { -+ field = cfg_entry->field; -+ if (count >= PAGE_SIZE) -+ goto out; -+ -+ count += scnprintf(buf + count, PAGE_SIZE - count, -+ "\t\t%08x:%01x:%08x\n", -+ cfg_entry->base_offset + -+ field->offset, field->size, -+ field->mask); -+ } -+ } -+ -+out: -+ spin_unlock_irqrestore(&device_ids_lock, flags); -+ -+ return count; -+} -+ -+DRIVER_ATTR(quirks, S_IRUSR | S_IWUSR, pcistub_quirk_show, pcistub_quirk_add); -+ -+static ssize_t permissive_add(struct device_driver *drv, const char *buf, -+ size_t count) -+{ -+ int domain, bus, slot, func; -+ int err; -+ struct pcistub_device *psdev; -+ struct pciback_dev_data *dev_data; -+ err = str_to_slot(buf, &domain, &bus, &slot, &func); -+ if (err) -+ goto out; -+ psdev = pcistub_device_find(domain, bus, slot, func); -+ if (!psdev) { -+ err = -ENODEV; -+ goto out; -+ } -+ if (!psdev->dev) { -+ err = -ENODEV; -+ goto release; -+ } -+ dev_data = pci_get_drvdata(psdev->dev); -+ /* the driver data for a device should never be null at this point */ -+ if (!dev_data) { -+ err = -ENXIO; -+ goto release; -+ } -+ if (!dev_data->permissive) { -+ dev_data->permissive = 1; -+ /* Let user know that what they're doing could be unsafe */ -+ dev_warn(&psdev->dev->dev, "enabling permissive mode " -+ "configuration space accesses!\n"); -+ dev_warn(&psdev->dev->dev, -+ "permissive mode is potentially unsafe!\n"); -+ } -+release: -+ pcistub_device_put(psdev); -+out: -+ if (!err) -+ err = count; -+ return err; -+} -+ -+static ssize_t permissive_show(struct device_driver *drv, char *buf) -+{ -+ struct pcistub_device *psdev; -+ struct pciback_dev_data *dev_data; -+ size_t count = 0; -+ unsigned long flags; -+ spin_lock_irqsave(&pcistub_devices_lock, flags); -+ list_for_each_entry(psdev, &pcistub_devices, dev_list) { -+ if (count >= PAGE_SIZE) -+ break; -+ if (!psdev->dev) -+ continue; -+ dev_data = pci_get_drvdata(psdev->dev); -+ if (!dev_data || !dev_data->permissive) -+ continue; -+ count += -+ scnprintf(buf + count, PAGE_SIZE - count, "%s\n", -+ pci_name(psdev->dev)); -+ } -+ spin_unlock_irqrestore(&pcistub_devices_lock, flags); -+ return count; -+} -+ -+DRIVER_ATTR(permissive, S_IRUSR | S_IWUSR, permissive_show, permissive_add); -+ -+static void pcistub_exit(void) -+{ -+ driver_remove_file(&pciback_pci_driver.driver, &driver_attr_new_slot); -+ driver_remove_file(&pciback_pci_driver.driver, -+ &driver_attr_remove_slot); -+ driver_remove_file(&pciback_pci_driver.driver, &driver_attr_slots); -+ driver_remove_file(&pciback_pci_driver.driver, &driver_attr_quirks); -+ driver_remove_file(&pciback_pci_driver.driver, &driver_attr_permissive); -+ driver_remove_file(&pciback_pci_driver.driver, -+ &driver_attr_irq_handlers); -+ driver_remove_file(&pciback_pci_driver.driver, -+ &driver_attr_irq_handler_state); -+ pci_unregister_driver(&pciback_pci_driver); -+} -+ -+static int __init pcistub_init(void) -+{ -+ int pos = 0; -+ int err = 0; -+ int domain, bus, slot, func; -+ int parsed; -+ -+ if (pci_devs_to_hide && *pci_devs_to_hide) { -+ do { -+ parsed = 0; -+ -+ err = sscanf(pci_devs_to_hide + pos, -+ " (%x:%x:%x.%x) %n", -+ &domain, &bus, &slot, &func, &parsed); -+ if (err != 4) { -+ domain = 0; -+ err = sscanf(pci_devs_to_hide + pos, -+ " (%x:%x.%x) %n", -+ &bus, &slot, &func, &parsed); -+ if (err != 3) -+ goto parse_error; -+ } -+ -+ err = pcistub_device_id_add(domain, bus, slot, func); -+ if (err) -+ goto out; -+ -+ /* if parsed<=0, we've reached the end of the string */ -+ pos += parsed; -+ } while (parsed > 0 && pci_devs_to_hide[pos]); -+ } -+ -+ /* If we're the first PCI Device Driver to register, we're the -+ * first one to get offered PCI devices as they become -+ * available (and thus we can be the first to grab them) -+ */ -+ err = pci_register_driver(&pciback_pci_driver); -+ if (err < 0) -+ goto out; -+ -+ err = driver_create_file(&pciback_pci_driver.driver, -+ &driver_attr_new_slot); -+ if (!err) -+ err = driver_create_file(&pciback_pci_driver.driver, -+ &driver_attr_remove_slot); -+ if (!err) -+ err = driver_create_file(&pciback_pci_driver.driver, -+ &driver_attr_slots); -+ if (!err) -+ err = driver_create_file(&pciback_pci_driver.driver, -+ &driver_attr_quirks); -+ if (!err) -+ err = driver_create_file(&pciback_pci_driver.driver, -+ &driver_attr_permissive); -+ -+ if (!err) -+ err = driver_create_file(&pciback_pci_driver.driver, -+ &driver_attr_irq_handlers); -+ if (!err) -+ err = driver_create_file(&pciback_pci_driver.driver, -+ &driver_attr_irq_handler_state); -+ if (err) -+ pcistub_exit(); -+ -+out: -+ return err; -+ -+parse_error: -+ printk(KERN_ERR "pciback: Error parsing pci_devs_to_hide at \"%s\"\n", -+ pci_devs_to_hide + pos); -+ return -EINVAL; -+} -+ -+#ifndef MODULE -+/* -+ * fs_initcall happens before device_initcall -+ * so pciback *should* get called first (b/c we -+ * want to suck up any device before other drivers -+ * get a chance by being the first pci device -+ * driver to register) -+ */ -+fs_initcall(pcistub_init); -+#endif -+ -+static int __init pciback_init(void) -+{ -+ int err; -+ -+ if (!xen_initial_domain()) -+ return -ENODEV; -+ -+ err = pciback_config_init(); -+ if (err) -+ return err; -+ -+#ifdef MODULE -+ err = pcistub_init(); -+ if (err < 0) -+ return err; -+#endif -+ -+ pcistub_init_devices_late(); -+ err = pciback_xenbus_register(); -+ if (err) -+ pcistub_exit(); -+ -+ return err; -+} -+ -+static void __exit pciback_cleanup(void) -+{ -+ pciback_xenbus_unregister(); -+ pcistub_exit(); -+} -+ -+module_init(pciback_init); -+module_exit(pciback_cleanup); -+ -+MODULE_LICENSE("Dual BSD/GPL"); -diff --git a/drivers/xen/pciback/pciback.h b/drivers/xen/pciback/pciback.h -new file mode 100644 -index 0000000..fc31052 ---- /dev/null -+++ b/drivers/xen/pciback/pciback.h -@@ -0,0 +1,142 @@ -+/* -+ * PCI Backend Common Data Structures & Function Declarations -+ * -+ * Author: Ryan Wilson <hap9@epoch.ncsc.mil> -+ */ -+#ifndef __XEN_PCIBACK_H__ -+#define __XEN_PCIBACK_H__ -+ -+#include <linux/pci.h> -+#include <linux/interrupt.h> -+#include <xen/xenbus.h> -+#include <linux/list.h> -+#include <linux/spinlock.h> -+#include <linux/workqueue.h> -+#include <asm/atomic.h> -+#include <xen/interface/io/pciif.h> -+ -+struct pci_dev_entry { -+ struct list_head list; -+ struct pci_dev *dev; -+}; -+ -+#define _PDEVF_op_active (0) -+#define PDEVF_op_active (1<<(_PDEVF_op_active)) -+#define _PCIB_op_pending (1) -+#define PCIB_op_pending (1<<(_PCIB_op_pending)) -+ -+struct pciback_device { -+ void *pci_dev_data; -+ spinlock_t dev_lock; -+ -+ struct xenbus_device *xdev; -+ -+ struct xenbus_watch be_watch; -+ u8 be_watching; -+ -+ int evtchn_irq; -+ -+ struct xen_pci_sharedinfo *sh_info; -+ -+ unsigned long flags; -+ -+ struct work_struct op_work; -+}; -+ -+struct pciback_dev_data { -+ struct list_head config_fields; -+ unsigned int permissive : 1; -+ unsigned int warned_on_write : 1; -+ unsigned int enable_intx : 1; -+ unsigned int isr_on : 1; /* Whether the IRQ handler is installed. */ -+ unsigned int ack_intr : 1; /* .. and ACK-ing */ -+ unsigned long handled; -+ unsigned int irq; /* Saved in case device transitions to MSI/MSI-X */ -+ char irq_name[0]; /* pciback[000:04:00.0] */ -+}; -+ -+/* Used by XenBus and pciback_ops.c */ -+extern wait_queue_head_t aer_wait_queue; -+extern struct workqueue_struct *pciback_wq; -+/* Used by pcistub.c and conf_space_quirks.c */ -+extern struct list_head pciback_quirks; -+ -+/* Get/Put PCI Devices that are hidden from the PCI Backend Domain */ -+struct pci_dev *pcistub_get_pci_dev_by_slot(struct pciback_device *pdev, -+ int domain, int bus, -+ int slot, int func); -+struct pci_dev *pcistub_get_pci_dev(struct pciback_device *pdev, -+ struct pci_dev *dev); -+void pcistub_put_pci_dev(struct pci_dev *dev); -+ -+/* Ensure a device is turned off or reset */ -+void pciback_reset_device(struct pci_dev *pdev); -+ -+/* Access a virtual configuration space for a PCI device */ -+int pciback_config_init(void); -+int pciback_config_init_dev(struct pci_dev *dev); -+void pciback_config_free_dyn_fields(struct pci_dev *dev); -+void pciback_config_reset_dev(struct pci_dev *dev); -+void pciback_config_free_dev(struct pci_dev *dev); -+int pciback_config_read(struct pci_dev *dev, int offset, int size, -+ u32 *ret_val); -+int pciback_config_write(struct pci_dev *dev, int offset, int size, u32 value); -+ -+/* Handle requests for specific devices from the frontend */ -+typedef int (*publish_pci_dev_cb) (struct pciback_device *pdev, -+ unsigned int domain, unsigned int bus, -+ unsigned int devfn, unsigned int devid); -+typedef int (*publish_pci_root_cb) (struct pciback_device *pdev, -+ unsigned int domain, unsigned int bus); -+int pciback_add_pci_dev(struct pciback_device *pdev, struct pci_dev *dev, -+ int devid, publish_pci_dev_cb publish_cb); -+void pciback_release_pci_dev(struct pciback_device *pdev, struct pci_dev *dev); -+struct pci_dev *pciback_get_pci_dev(struct pciback_device *pdev, -+ unsigned int domain, unsigned int bus, -+ unsigned int devfn); -+ -+/** -+* Add for domain0 PCIE-AER handling. Get guest domain/bus/devfn in pciback -+* before sending aer request to pcifront, so that guest could identify -+* device, coopearte with pciback to finish aer recovery job if device driver -+* has the capability -+*/ -+ -+int pciback_get_pcifront_dev(struct pci_dev *pcidev, -+ struct pciback_device *pdev, -+ unsigned int *domain, unsigned int *bus, -+ unsigned int *devfn); -+int pciback_init_devices(struct pciback_device *pdev); -+int pciback_publish_pci_roots(struct pciback_device *pdev, -+ publish_pci_root_cb cb); -+void pciback_release_devices(struct pciback_device *pdev); -+ -+/* Handles events from front-end */ -+irqreturn_t pciback_handle_event(int irq, void *dev_id); -+void pciback_do_op(struct work_struct *data); -+ -+int pciback_xenbus_register(void); -+void pciback_xenbus_unregister(void); -+ -+#ifdef CONFIG_PCI_MSI -+int pciback_enable_msi(struct pciback_device *pdev, -+ struct pci_dev *dev, struct xen_pci_op *op); -+ -+int pciback_disable_msi(struct pciback_device *pdev, -+ struct pci_dev *dev, struct xen_pci_op *op); -+ -+ -+int pciback_enable_msix(struct pciback_device *pdev, -+ struct pci_dev *dev, struct xen_pci_op *op); -+ -+int pciback_disable_msix(struct pciback_device *pdev, -+ struct pci_dev *dev, struct xen_pci_op *op); -+#endif -+extern int verbose_request; -+ -+void test_and_schedule_op(struct pciback_device *pdev); -+#endif -+ -+/* Handles shared IRQs that can to device domain and control domain. */ -+void pciback_irq_handler(struct pci_dev *dev, int reset); -+irqreturn_t pciback_guest_interrupt(int irq, void *dev_id); -diff --git a/drivers/xen/pciback/pciback_ops.c b/drivers/xen/pciback/pciback_ops.c -new file mode 100644 -index 0000000..5543881 ---- /dev/null -+++ b/drivers/xen/pciback/pciback_ops.c -@@ -0,0 +1,242 @@ -+/* -+ * PCI Backend Operations - respond to PCI requests from Frontend -+ * -+ * Author: Ryan Wilson <hap9@epoch.ncsc.mil> -+ */ -+#include <linux/module.h> -+#include <linux/wait.h> -+#include <linux/bitops.h> -+#include <xen/events.h> -+#include <linux/sched.h> -+#include "pciback.h" -+ -+int verbose_request; -+module_param(verbose_request, int, 0644); -+ -+/* Ensure a device is has the fake IRQ handler "turned on/off" and is -+ * ready to be exported. This MUST be run after pciback_reset_device -+ * which does the actual PCI device enable/disable. -+ */ -+void pciback_control_isr(struct pci_dev *dev, int reset) -+{ -+ struct pciback_dev_data *dev_data; -+ int rc; -+ int enable = 0; -+ -+ dev_data = pci_get_drvdata(dev); -+ if (!dev_data) -+ return; -+ -+ /* We don't deal with bridges */ -+ if (dev->hdr_type != PCI_HEADER_TYPE_NORMAL) -+ return; -+ -+ if (reset) { -+ dev_data->enable_intx = 0; -+ dev_data->ack_intr = 0; -+ } -+ enable = dev_data->enable_intx; -+ -+ /* Asked to disable, but ISR isn't runnig */ -+ if (!enable && !dev_data->isr_on) -+ return; -+ -+ /* Squirrel away the IRQs in the dev_data. We need this -+ * b/c when device transitions to MSI, the dev->irq is -+ * overwritten with the MSI vector. -+ */ -+ if (enable) -+ dev_data->irq = dev->irq; -+ -+ dev_dbg(&dev->dev, "%s: #%d %s %s%s %s-> %s\n", -+ dev_data->irq_name, -+ dev_data->irq, -+ pci_is_enabled(dev) ? "on" : "off", -+ dev->msi_enabled ? "MSI" : "", -+ dev->msix_enabled ? "MSI/X" : "", -+ dev_data->isr_on ? "enable" : "disable", -+ enable ? "enable" : "disable"); -+ -+ if (enable) { -+ rc = request_irq(dev_data->irq, -+ pciback_guest_interrupt, IRQF_SHARED, -+ dev_data->irq_name, dev); -+ if (rc) { -+ dev_err(&dev->dev, "%s: failed to install fake IRQ " \ -+ "handler for IRQ %d! (rc:%d)\n", dev_data->irq_name, -+ dev_data->irq, rc); -+ goto out; -+ } -+ } -+ else { -+ free_irq(dev_data->irq, dev); -+ dev_data->irq = 0; -+ } -+ dev_data->isr_on = enable; -+ dev_data->ack_intr = enable; -+out: -+ dev_dbg(&dev->dev, "%s: #%d %s %s%s %s\n", -+ dev_data->irq_name, -+ dev_data->irq, -+ pci_is_enabled(dev) ? "on" : "off", -+ dev->msi_enabled ? "MSI" : "", -+ dev->msix_enabled ? "MSI/X" : "", -+ enable ? (dev_data->isr_on ? "enabled" : "failed to enable") : -+ (dev_data->isr_on ? "failed to disable" : "disabled")); -+} -+ -+/* Ensure a device is "turned off" and ready to be exported. -+ * (Also see pciback_config_reset to ensure virtual configuration space is -+ * ready to be re-exported) -+ */ -+void pciback_reset_device(struct pci_dev *dev) -+{ -+ u16 cmd; -+ -+ pciback_control_isr(dev, 1 /* reset device */); -+ -+ /* Disable devices (but not bridges) */ -+ if (dev->hdr_type == PCI_HEADER_TYPE_NORMAL) { -+#ifdef CONFIG_PCI_MSI -+ /* The guest could have been abruptly killed without -+ * disabling MSI/MSI-X interrupts.*/ -+ if (dev->msix_enabled) -+ pci_disable_msix(dev); -+ if (dev->msi_enabled) -+ pci_disable_msi(dev); -+#endif -+ pci_disable_device(dev); -+ -+ pci_write_config_word(dev, PCI_COMMAND, 0); -+ -+ dev->is_busmaster = 0; -+ } else { -+ pci_read_config_word(dev, PCI_COMMAND, &cmd); -+ if (cmd & (PCI_COMMAND_INVALIDATE)) { -+ cmd &= ~(PCI_COMMAND_INVALIDATE); -+ pci_write_config_word(dev, PCI_COMMAND, cmd); -+ -+ dev->is_busmaster = 0; -+ } -+ } -+} -+/* -+* Now the same evtchn is used for both pcifront conf_read_write request -+* as well as pcie aer front end ack. We use a new work_queue to schedule -+* pciback conf_read_write service for avoiding confict with aer_core -+* do_recovery job which also use the system default work_queue -+*/ -+void test_and_schedule_op(struct pciback_device *pdev) -+{ -+ /* Check that frontend is requesting an operation and that we are not -+ * already processing a request */ -+ if (test_bit(_XEN_PCIF_active, (unsigned long *)&pdev->sh_info->flags) -+ && !test_and_set_bit(_PDEVF_op_active, &pdev->flags)) { -+ queue_work(pciback_wq, &pdev->op_work); -+ } -+ /*_XEN_PCIB_active should have been cleared by pcifront. And also make -+ sure pciback is waiting for ack by checking _PCIB_op_pending*/ -+ if (!test_bit(_XEN_PCIB_active, (unsigned long *)&pdev->sh_info->flags) -+ && test_bit(_PCIB_op_pending, &pdev->flags)) { -+ wake_up(&aer_wait_queue); -+ } -+} -+ -+/* Performing the configuration space reads/writes must not be done in atomic -+ * context because some of the pci_* functions can sleep (mostly due to ACPI -+ * use of semaphores). This function is intended to be called from a work -+ * queue in process context taking a struct pciback_device as a parameter */ -+ -+void pciback_do_op(struct work_struct *data) -+{ -+ struct pciback_device *pdev = -+ container_of(data, struct pciback_device, op_work); -+ struct pci_dev *dev; -+ struct pciback_dev_data *dev_data = NULL; -+ struct xen_pci_op *op = &pdev->sh_info->op; -+ int test_intx = 0; -+ -+ dev = pciback_get_pci_dev(pdev, op->domain, op->bus, op->devfn); -+ -+ if (dev == NULL) -+ op->err = XEN_PCI_ERR_dev_not_found; -+ else { -+ dev_data = pci_get_drvdata(dev); -+ if (dev_data) -+ test_intx = dev_data->enable_intx; -+ switch (op->cmd) { -+ case XEN_PCI_OP_conf_read: -+ op->err = pciback_config_read(dev, -+ op->offset, op->size, &op->value); -+ break; -+ case XEN_PCI_OP_conf_write: -+ op->err = pciback_config_write(dev, -+ op->offset, op->size, op->value); -+ break; -+#ifdef CONFIG_PCI_MSI -+ case XEN_PCI_OP_enable_msi: -+ op->err = pciback_enable_msi(pdev, dev, op); -+ break; -+ case XEN_PCI_OP_disable_msi: -+ op->err = pciback_disable_msi(pdev, dev, op); -+ break; -+ case XEN_PCI_OP_enable_msix: -+ op->err = pciback_enable_msix(pdev, dev, op); -+ break; -+ case XEN_PCI_OP_disable_msix: -+ op->err = pciback_disable_msix(pdev, dev, op); -+ break; -+#endif -+ default: -+ op->err = XEN_PCI_ERR_not_implemented; -+ break; -+ } -+ } -+ if (!op->err && dev && dev_data) { -+ /* Transition detected */ -+ if ((dev_data->enable_intx != test_intx)) -+ pciback_control_isr(dev, 0 /* no reset */); -+ } -+ /* Tell the driver domain that we're done. */ -+ wmb(); -+ clear_bit(_XEN_PCIF_active, (unsigned long *)&pdev->sh_info->flags); -+ notify_remote_via_irq(pdev->evtchn_irq); -+ -+ /* Mark that we're done. */ -+ smp_mb__before_clear_bit(); /* /after/ clearing PCIF_active */ -+ clear_bit(_PDEVF_op_active, &pdev->flags); -+ smp_mb__after_clear_bit(); /* /before/ final check for work */ -+ -+ /* Check to see if the driver domain tried to start another request in -+ * between clearing _XEN_PCIF_active and clearing _PDEVF_op_active. -+ */ -+ test_and_schedule_op(pdev); -+} -+ -+irqreturn_t pciback_handle_event(int irq, void *dev_id) -+{ -+ struct pciback_device *pdev = dev_id; -+ -+ test_and_schedule_op(pdev); -+ -+ return IRQ_HANDLED; -+} -+irqreturn_t pciback_guest_interrupt(int irq, void *dev_id) -+{ -+ struct pci_dev *dev = (struct pci_dev *)dev_id; -+ struct pciback_dev_data *dev_data = pci_get_drvdata(dev); -+ -+ if (dev_data->isr_on && dev_data->ack_intr) { -+ dev_data->handled++; -+ if ((dev_data->handled % 1000) == 0) { -+ if (xen_ignore_irq(irq)) { -+ printk(KERN_INFO "%s IRQ line is not shared " -+ "with other domains. Turning ISR off\n", -+ dev_data->irq_name); -+ dev_data->ack_intr = 0; -+ } -+ } -+ return IRQ_HANDLED; -+ } -+ return IRQ_NONE; -+} -diff --git a/drivers/xen/pciback/slot.c b/drivers/xen/pciback/slot.c -new file mode 100644 -index 0000000..efb922d ---- /dev/null -+++ b/drivers/xen/pciback/slot.c -@@ -0,0 +1,191 @@ -+/* -+ * PCI Backend - Provides a Virtual PCI bus (with real devices) -+ * to the frontend -+ * -+ * Author: Ryan Wilson <hap9@epoch.ncsc.mil> (vpci.c) -+ * Author: Tristan Gingold <tristan.gingold@bull.net>, from vpci.c -+ */ -+ -+#include <linux/list.h> -+#include <linux/slab.h> -+#include <linux/pci.h> -+#include <linux/spinlock.h> -+#include "pciback.h" -+ -+/* There are at most 32 slots in a pci bus. */ -+#define PCI_SLOT_MAX 32 -+ -+#define PCI_BUS_NBR 2 -+ -+struct slot_dev_data { -+ /* Access to dev_list must be protected by lock */ -+ struct pci_dev *slots[PCI_BUS_NBR][PCI_SLOT_MAX]; -+ spinlock_t lock; -+}; -+ -+struct pci_dev *pciback_get_pci_dev(struct pciback_device *pdev, -+ unsigned int domain, unsigned int bus, -+ unsigned int devfn) -+{ -+ struct pci_dev *dev = NULL; -+ struct slot_dev_data *slot_dev = pdev->pci_dev_data; -+ unsigned long flags; -+ -+ if (domain != 0 || PCI_FUNC(devfn) != 0) -+ return NULL; -+ -+ if (PCI_SLOT(devfn) >= PCI_SLOT_MAX || bus >= PCI_BUS_NBR) -+ return NULL; -+ -+ spin_lock_irqsave(&slot_dev->lock, flags); -+ dev = slot_dev->slots[bus][PCI_SLOT(devfn)]; -+ spin_unlock_irqrestore(&slot_dev->lock, flags); -+ -+ return dev; -+} -+ -+int pciback_add_pci_dev(struct pciback_device *pdev, struct pci_dev *dev, -+ int devid, publish_pci_dev_cb publish_cb) -+{ -+ int err = 0, slot, bus; -+ struct slot_dev_data *slot_dev = pdev->pci_dev_data; -+ unsigned long flags; -+ -+ if ((dev->class >> 24) == PCI_BASE_CLASS_BRIDGE) { -+ err = -EFAULT; -+ xenbus_dev_fatal(pdev->xdev, err, -+ "Can't export bridges on the virtual PCI bus"); -+ goto out; -+ } -+ -+ spin_lock_irqsave(&slot_dev->lock, flags); -+ -+ /* Assign to a new slot on the virtual PCI bus */ -+ for (bus = 0; bus < PCI_BUS_NBR; bus++) -+ for (slot = 0; slot < PCI_SLOT_MAX; slot++) { -+ if (slot_dev->slots[bus][slot] == NULL) { -+ printk(KERN_INFO -+ "pciback: slot: %s: assign to virtual " -+ "slot %d, bus %d\n", -+ pci_name(dev), slot, bus); -+ slot_dev->slots[bus][slot] = dev; -+ goto unlock; -+ } -+ } -+ -+ err = -ENOMEM; -+ xenbus_dev_fatal(pdev->xdev, err, -+ "No more space on root virtual PCI bus"); -+ -+unlock: -+ spin_unlock_irqrestore(&slot_dev->lock, flags); -+ -+ /* Publish this device. */ -+ if (!err) -+ err = publish_cb(pdev, 0, 0, PCI_DEVFN(slot, 0), devid); -+ -+out: -+ return err; -+} -+ -+void pciback_release_pci_dev(struct pciback_device *pdev, struct pci_dev *dev) -+{ -+ int slot, bus; -+ struct slot_dev_data *slot_dev = pdev->pci_dev_data; -+ struct pci_dev *found_dev = NULL; -+ unsigned long flags; -+ -+ spin_lock_irqsave(&slot_dev->lock, flags); -+ -+ for (bus = 0; bus < PCI_BUS_NBR; bus++) -+ for (slot = 0; slot < PCI_SLOT_MAX; slot++) { -+ if (slot_dev->slots[bus][slot] == dev) { -+ slot_dev->slots[bus][slot] = NULL; -+ found_dev = dev; -+ goto out; -+ } -+ } -+ -+out: -+ spin_unlock_irqrestore(&slot_dev->lock, flags); -+ -+ if (found_dev) -+ pcistub_put_pci_dev(found_dev); -+} -+ -+int pciback_init_devices(struct pciback_device *pdev) -+{ -+ int slot, bus; -+ struct slot_dev_data *slot_dev; -+ -+ slot_dev = kmalloc(sizeof(*slot_dev), GFP_KERNEL); -+ if (!slot_dev) -+ return -ENOMEM; -+ -+ spin_lock_init(&slot_dev->lock); -+ -+ for (bus = 0; bus < PCI_BUS_NBR; bus++) -+ for (slot = 0; slot < PCI_SLOT_MAX; slot++) -+ slot_dev->slots[bus][slot] = NULL; -+ -+ pdev->pci_dev_data = slot_dev; -+ -+ return 0; -+} -+ -+int pciback_publish_pci_roots(struct pciback_device *pdev, -+ publish_pci_root_cb publish_cb) -+{ -+ /* The Virtual PCI bus has only one root */ -+ return publish_cb(pdev, 0, 0); -+} -+ -+void pciback_release_devices(struct pciback_device *pdev) -+{ -+ int slot, bus; -+ struct slot_dev_data *slot_dev = pdev->pci_dev_data; -+ struct pci_dev *dev; -+ -+ for (bus = 0; bus < PCI_BUS_NBR; bus++) -+ for (slot = 0; slot < PCI_SLOT_MAX; slot++) { -+ dev = slot_dev->slots[bus][slot]; -+ if (dev != NULL) -+ pcistub_put_pci_dev(dev); -+ } -+ -+ kfree(slot_dev); -+ pdev->pci_dev_data = NULL; -+} -+ -+int pciback_get_pcifront_dev(struct pci_dev *pcidev, -+ struct pciback_device *pdev, -+ unsigned int *domain, unsigned int *bus, -+ unsigned int *devfn) -+{ -+ int slot, busnr; -+ struct slot_dev_data *slot_dev = pdev->pci_dev_data; -+ struct pci_dev *dev; -+ int found = 0; -+ unsigned long flags; -+ -+ spin_lock_irqsave(&slot_dev->lock, flags); -+ -+ for (busnr = 0; busnr < PCI_BUS_NBR; bus++) -+ for (slot = 0; slot < PCI_SLOT_MAX; slot++) { -+ dev = slot_dev->slots[busnr][slot]; -+ if (dev && dev->bus->number == pcidev->bus->number -+ && dev->devfn == pcidev->devfn -+ && pci_domain_nr(dev->bus) == -+ pci_domain_nr(pcidev->bus)) { -+ found = 1; -+ *domain = 0; -+ *bus = busnr; -+ *devfn = PCI_DEVFN(slot, 0); -+ goto out; -+ } -+ } -+out: -+ spin_unlock_irqrestore(&slot_dev->lock, flags); -+ return found; -+ -+} -diff --git a/drivers/xen/pciback/vpci.c b/drivers/xen/pciback/vpci.c -new file mode 100644 -index 0000000..2857ab8 ---- /dev/null -+++ b/drivers/xen/pciback/vpci.c -@@ -0,0 +1,244 @@ -+/* -+ * PCI Backend - Provides a Virtual PCI bus (with real devices) -+ * to the frontend -+ * -+ * Author: Ryan Wilson <hap9@epoch.ncsc.mil> -+ */ -+ -+#include <linux/list.h> -+#include <linux/slab.h> -+#include <linux/pci.h> -+#include <linux/spinlock.h> -+#include "pciback.h" -+ -+#define PCI_SLOT_MAX 32 -+ -+struct vpci_dev_data { -+ /* Access to dev_list must be protected by lock */ -+ struct list_head dev_list[PCI_SLOT_MAX]; -+ spinlock_t lock; -+}; -+ -+static inline struct list_head *list_first(struct list_head *head) -+{ -+ return head->next; -+} -+ -+struct pci_dev *pciback_get_pci_dev(struct pciback_device *pdev, -+ unsigned int domain, unsigned int bus, -+ unsigned int devfn) -+{ -+ struct pci_dev_entry *entry; -+ struct pci_dev *dev = NULL; -+ struct vpci_dev_data *vpci_dev = pdev->pci_dev_data; -+ unsigned long flags; -+ -+ if (domain != 0 || bus != 0) -+ return NULL; -+ -+ if (PCI_SLOT(devfn) < PCI_SLOT_MAX) { -+ spin_lock_irqsave(&vpci_dev->lock, flags); -+ -+ list_for_each_entry(entry, -+ &vpci_dev->dev_list[PCI_SLOT(devfn)], -+ list) { -+ if (PCI_FUNC(entry->dev->devfn) == PCI_FUNC(devfn)) { -+ dev = entry->dev; -+ break; -+ } -+ } -+ -+ spin_unlock_irqrestore(&vpci_dev->lock, flags); -+ } -+ return dev; -+} -+ -+static inline int match_slot(struct pci_dev *l, struct pci_dev *r) -+{ -+ if (pci_domain_nr(l->bus) == pci_domain_nr(r->bus) -+ && l->bus == r->bus && PCI_SLOT(l->devfn) == PCI_SLOT(r->devfn)) -+ return 1; -+ -+ return 0; -+} -+ -+int pciback_add_pci_dev(struct pciback_device *pdev, struct pci_dev *dev, -+ int devid, publish_pci_dev_cb publish_cb) -+{ -+ int err = 0, slot, func = -1; -+ struct pci_dev_entry *t, *dev_entry; -+ struct vpci_dev_data *vpci_dev = pdev->pci_dev_data; -+ unsigned long flags; -+ -+ if ((dev->class >> 24) == PCI_BASE_CLASS_BRIDGE) { -+ err = -EFAULT; -+ xenbus_dev_fatal(pdev->xdev, err, -+ "Can't export bridges on the virtual PCI bus"); -+ goto out; -+ } -+ -+ dev_entry = kmalloc(sizeof(*dev_entry), GFP_KERNEL); -+ if (!dev_entry) { -+ err = -ENOMEM; -+ xenbus_dev_fatal(pdev->xdev, err, -+ "Error adding entry to virtual PCI bus"); -+ goto out; -+ } -+ -+ dev_entry->dev = dev; -+ -+ spin_lock_irqsave(&vpci_dev->lock, flags); -+ -+ /* Keep multi-function devices together on the virtual PCI bus */ -+ for (slot = 0; slot < PCI_SLOT_MAX; slot++) { -+ if (!list_empty(&vpci_dev->dev_list[slot])) { -+ t = list_entry(list_first(&vpci_dev->dev_list[slot]), -+ struct pci_dev_entry, list); -+ -+ if (match_slot(dev, t->dev)) { -+ pr_info("pciback: vpci: %s: " -+ "assign to virtual slot %d func %d\n", -+ pci_name(dev), slot, -+ PCI_FUNC(dev->devfn)); -+ list_add_tail(&dev_entry->list, -+ &vpci_dev->dev_list[slot]); -+ func = PCI_FUNC(dev->devfn); -+ goto unlock; -+ } -+ } -+ } -+ -+ /* Assign to a new slot on the virtual PCI bus */ -+ for (slot = 0; slot < PCI_SLOT_MAX; slot++) { -+ if (list_empty(&vpci_dev->dev_list[slot])) { -+ printk(KERN_INFO -+ "pciback: vpci: %s: assign to virtual slot %d\n", -+ pci_name(dev), slot); -+ list_add_tail(&dev_entry->list, -+ &vpci_dev->dev_list[slot]); -+ func = PCI_FUNC(dev->devfn); -+ goto unlock; -+ } -+ } -+ -+ err = -ENOMEM; -+ xenbus_dev_fatal(pdev->xdev, err, -+ "No more space on root virtual PCI bus"); -+ -+unlock: -+ spin_unlock_irqrestore(&vpci_dev->lock, flags); -+ -+ /* Publish this device. */ -+ if (!err) -+ err = publish_cb(pdev, 0, 0, PCI_DEVFN(slot, func), devid); -+ -+out: -+ return err; -+} -+ -+void pciback_release_pci_dev(struct pciback_device *pdev, struct pci_dev *dev) -+{ -+ int slot; -+ struct vpci_dev_data *vpci_dev = pdev->pci_dev_data; -+ struct pci_dev *found_dev = NULL; -+ unsigned long flags; -+ -+ spin_lock_irqsave(&vpci_dev->lock, flags); -+ -+ for (slot = 0; slot < PCI_SLOT_MAX; slot++) { -+ struct pci_dev_entry *e, *tmp; -+ list_for_each_entry_safe(e, tmp, &vpci_dev->dev_list[slot], -+ list) { -+ if (e->dev == dev) { -+ list_del(&e->list); -+ found_dev = e->dev; -+ kfree(e); -+ goto out; -+ } -+ } -+ } -+ -+out: -+ spin_unlock_irqrestore(&vpci_dev->lock, flags); -+ -+ if (found_dev) -+ pcistub_put_pci_dev(found_dev); -+} -+ -+int pciback_init_devices(struct pciback_device *pdev) -+{ -+ int slot; -+ struct vpci_dev_data *vpci_dev; -+ -+ vpci_dev = kmalloc(sizeof(*vpci_dev), GFP_KERNEL); -+ if (!vpci_dev) -+ return -ENOMEM; -+ -+ spin_lock_init(&vpci_dev->lock); -+ -+ for (slot = 0; slot < PCI_SLOT_MAX; slot++) -+ INIT_LIST_HEAD(&vpci_dev->dev_list[slot]); -+ -+ pdev->pci_dev_data = vpci_dev; -+ -+ return 0; -+} -+ -+int pciback_publish_pci_roots(struct pciback_device *pdev, -+ publish_pci_root_cb publish_cb) -+{ -+ /* The Virtual PCI bus has only one root */ -+ return publish_cb(pdev, 0, 0); -+} -+ -+void pciback_release_devices(struct pciback_device *pdev) -+{ -+ int slot; -+ struct vpci_dev_data *vpci_dev = pdev->pci_dev_data; -+ -+ for (slot = 0; slot < PCI_SLOT_MAX; slot++) { -+ struct pci_dev_entry *e, *tmp; -+ list_for_each_entry_safe(e, tmp, &vpci_dev->dev_list[slot], -+ list) { -+ list_del(&e->list); -+ pcistub_put_pci_dev(e->dev); -+ kfree(e); -+ } -+ } -+ -+ kfree(vpci_dev); -+ pdev->pci_dev_data = NULL; -+} -+ -+int pciback_get_pcifront_dev(struct pci_dev *pcidev, -+ struct pciback_device *pdev, -+ unsigned int *domain, unsigned int *bus, -+ unsigned int *devfn) -+{ -+ struct pci_dev_entry *entry; -+ struct pci_dev *dev = NULL; -+ struct vpci_dev_data *vpci_dev = pdev->pci_dev_data; -+ unsigned long flags; -+ int found = 0, slot; -+ -+ spin_lock_irqsave(&vpci_dev->lock, flags); -+ for (slot = 0; slot < PCI_SLOT_MAX; slot++) { -+ list_for_each_entry(entry, -+ &vpci_dev->dev_list[slot], -+ list) { -+ dev = entry->dev; -+ if (dev && dev->bus->number == pcidev->bus->number -+ && pci_domain_nr(dev->bus) == -+ pci_domain_nr(pcidev->bus) -+ && dev->devfn == pcidev->devfn) { -+ found = 1; -+ *domain = 0; -+ *bus = 0; -+ *devfn = PCI_DEVFN(slot, -+ PCI_FUNC(pcidev->devfn)); -+ } -+ } -+ } -+ spin_unlock_irqrestore(&vpci_dev->lock, flags); -+ return found; -+} -diff --git a/drivers/xen/pciback/xenbus.c b/drivers/xen/pciback/xenbus.c -new file mode 100644 -index 0000000..f0d5426 ---- /dev/null -+++ b/drivers/xen/pciback/xenbus.c -@@ -0,0 +1,730 @@ -+/* -+ * PCI Backend Xenbus Setup - handles setup with frontend and xend -+ * -+ * Author: Ryan Wilson <hap9@epoch.ncsc.mil> -+ */ -+#include <linux/module.h> -+#include <linux/init.h> -+#include <linux/list.h> -+#include <linux/vmalloc.h> -+#include <linux/workqueue.h> -+#include <xen/xenbus.h> -+#include <xen/events.h> -+#include <asm/xen/pci.h> -+#include <linux/workqueue.h> -+#include "pciback.h" -+ -+#define INVALID_EVTCHN_IRQ (-1) -+struct workqueue_struct *pciback_wq; -+ -+static struct pciback_device *alloc_pdev(struct xenbus_device *xdev) -+{ -+ struct pciback_device *pdev; -+ -+ pdev = kzalloc(sizeof(struct pciback_device), GFP_KERNEL); -+ if (pdev == NULL) -+ goto out; -+ dev_dbg(&xdev->dev, "allocated pdev @ 0x%p\n", pdev); -+ -+ pdev->xdev = xdev; -+ dev_set_drvdata(&xdev->dev, pdev); -+ -+ spin_lock_init(&pdev->dev_lock); -+ -+ pdev->sh_info = NULL; -+ pdev->evtchn_irq = INVALID_EVTCHN_IRQ; -+ pdev->be_watching = 0; -+ -+ INIT_WORK(&pdev->op_work, pciback_do_op); -+ -+ if (pciback_init_devices(pdev)) { -+ kfree(pdev); -+ pdev = NULL; -+ } -+out: -+ return pdev; -+} -+ -+static void pciback_disconnect(struct pciback_device *pdev) -+{ -+ spin_lock(&pdev->dev_lock); -+ -+ /* Ensure the guest can't trigger our handler before removing devices */ -+ if (pdev->evtchn_irq != INVALID_EVTCHN_IRQ) { -+ unbind_from_irqhandler(pdev->evtchn_irq, pdev); -+ pdev->evtchn_irq = INVALID_EVTCHN_IRQ; -+ } -+ spin_unlock(&pdev->dev_lock); -+ -+ /* If the driver domain started an op, make sure we complete it -+ * before releasing the shared memory */ -+ -+ /* Note, the workqueue does not use spinlocks at all.*/ -+ flush_workqueue(pciback_wq); -+ -+ spin_lock(&pdev->dev_lock); -+ if (pdev->sh_info != NULL) { -+ xenbus_unmap_ring_vfree(pdev->xdev, pdev->sh_info); -+ pdev->sh_info = NULL; -+ } -+ spin_unlock(&pdev->dev_lock); -+ -+} -+ -+static void free_pdev(struct pciback_device *pdev) -+{ -+ spin_lock(&pdev->dev_lock); -+ if (pdev->be_watching) { -+ unregister_xenbus_watch(&pdev->be_watch); -+ pdev->be_watching = 0; -+ } -+ spin_unlock(&pdev->dev_lock); -+ -+ pciback_disconnect(pdev); -+ -+ pciback_release_devices(pdev); -+ -+ dev_set_drvdata(&pdev->xdev->dev, NULL); -+ pdev->xdev = NULL; -+ -+ kfree(pdev); -+} -+ -+static int pciback_do_attach(struct pciback_device *pdev, int gnt_ref, -+ int remote_evtchn) -+{ -+ int err = 0; -+ void *vaddr; -+ -+ dev_dbg(&pdev->xdev->dev, -+ "Attaching to frontend resources - gnt_ref=%d evtchn=%d\n", -+ gnt_ref, remote_evtchn); -+ -+ err = xenbus_map_ring_valloc(pdev->xdev, gnt_ref, &vaddr); -+ if (err < 0) { -+ xenbus_dev_fatal(pdev->xdev, err, -+ "Error mapping other domain page in ours."); -+ goto out; -+ } -+ -+ spin_lock(&pdev->dev_lock); -+ pdev->sh_info = vaddr; -+ spin_unlock(&pdev->dev_lock); -+ -+ err = bind_interdomain_evtchn_to_irqhandler( -+ pdev->xdev->otherend_id, remote_evtchn, pciback_handle_event, -+ 0, "pciback", pdev); -+ if (err < 0) { -+ xenbus_dev_fatal(pdev->xdev, err, -+ "Error binding event channel to IRQ"); -+ goto out; -+ } -+ -+ spin_lock(&pdev->dev_lock); -+ pdev->evtchn_irq = err; -+ spin_unlock(&pdev->dev_lock); -+ err = 0; -+ -+ dev_dbg(&pdev->xdev->dev, "Attached!\n"); -+out: -+ return err; -+} -+ -+static int pciback_attach(struct pciback_device *pdev) -+{ -+ int err = 0; -+ int gnt_ref, remote_evtchn; -+ char *magic = NULL; -+ -+ -+ /* Make sure we only do this setup once */ -+ if (xenbus_read_driver_state(pdev->xdev->nodename) != -+ XenbusStateInitialised) -+ goto out; -+ -+ /* Wait for frontend to state that it has published the configuration */ -+ if (xenbus_read_driver_state(pdev->xdev->otherend) != -+ XenbusStateInitialised) -+ goto out; -+ -+ dev_dbg(&pdev->xdev->dev, "Reading frontend config\n"); -+ -+ err = xenbus_gather(XBT_NIL, pdev->xdev->otherend, -+ "pci-op-ref", "%u", &gnt_ref, -+ "event-channel", "%u", &remote_evtchn, -+ "magic", NULL, &magic, NULL); -+ if (err) { -+ /* If configuration didn't get read correctly, wait longer */ -+ xenbus_dev_fatal(pdev->xdev, err, -+ "Error reading configuration from frontend"); -+ goto out; -+ } -+ -+ if (magic == NULL || strcmp(magic, XEN_PCI_MAGIC) != 0) { -+ xenbus_dev_fatal(pdev->xdev, -EFAULT, -+ "version mismatch (%s/%s) with pcifront - " -+ "halting pciback", -+ magic, XEN_PCI_MAGIC); -+ goto out; -+ } -+ -+ err = pciback_do_attach(pdev, gnt_ref, remote_evtchn); -+ if (err) -+ goto out; -+ -+ dev_dbg(&pdev->xdev->dev, "Connecting...\n"); -+ -+ err = xenbus_switch_state(pdev->xdev, XenbusStateConnected); -+ if (err) -+ xenbus_dev_fatal(pdev->xdev, err, -+ "Error switching to connected state!"); -+ -+ dev_dbg(&pdev->xdev->dev, "Connected? %d\n", err); -+out: -+ -+ kfree(magic); -+ -+ return err; -+} -+ -+static int pciback_publish_pci_dev(struct pciback_device *pdev, -+ unsigned int domain, unsigned int bus, -+ unsigned int devfn, unsigned int devid) -+{ -+ int err; -+ int len; -+ char str[64]; -+ -+ len = snprintf(str, sizeof(str), "vdev-%d", devid); -+ if (unlikely(len >= (sizeof(str) - 1))) { -+ err = -ENOMEM; -+ goto out; -+ } -+ -+ err = xenbus_printf(XBT_NIL, pdev->xdev->nodename, str, -+ "%04x:%02x:%02x.%02x", domain, bus, -+ PCI_SLOT(devfn), PCI_FUNC(devfn)); -+ -+out: -+ return err; -+} -+ -+static int pciback_export_device(struct pciback_device *pdev, -+ int domain, int bus, int slot, int func, -+ int devid) -+{ -+ struct pci_dev *dev; -+ int err = 0; -+ -+ dev_dbg(&pdev->xdev->dev, "exporting dom %x bus %x slot %x func %x\n", -+ domain, bus, slot, func); -+ -+ dev = pcistub_get_pci_dev_by_slot(pdev, domain, bus, slot, func); -+ if (!dev) { -+ err = -EINVAL; -+ xenbus_dev_fatal(pdev->xdev, err, -+ "Couldn't locate PCI device " -+ "(%04x:%02x:%02x.%01x)! " -+ "perhaps already in-use?", -+ domain, bus, slot, func); -+ goto out; -+ } -+ -+ err = pciback_add_pci_dev(pdev, dev, devid, pciback_publish_pci_dev); -+ if (err) -+ goto out; -+ -+ dev_dbg(&dev->dev, "registering for %d\n", pdev->xdev->otherend_id); -+ if (xen_register_device_domain_owner(dev, -+ pdev->xdev->otherend_id) != 0) { -+ dev_err(&dev->dev, "device has been assigned to another " \ -+ "domain! Over-writting the ownership, but beware.\n"); -+ xen_unregister_device_domain_owner(dev); -+ xen_register_device_domain_owner(dev, pdev->xdev->otherend_id); -+ } -+ -+ /* TODO: It'd be nice to export a bridge and have all of its children -+ * get exported with it. This may be best done in xend (which will -+ * have to calculate resource usage anyway) but we probably want to -+ * put something in here to ensure that if a bridge gets given to a -+ * driver domain, that all devices under that bridge are not given -+ * to other driver domains (as he who controls the bridge can disable -+ * it and stop the other devices from working). -+ */ -+out: -+ return err; -+} -+ -+static int pciback_remove_device(struct pciback_device *pdev, -+ int domain, int bus, int slot, int func) -+{ -+ int err = 0; -+ struct pci_dev *dev; -+ -+ dev_dbg(&pdev->xdev->dev, "removing dom %x bus %x slot %x func %x\n", -+ domain, bus, slot, func); -+ -+ dev = pciback_get_pci_dev(pdev, domain, bus, PCI_DEVFN(slot, func)); -+ if (!dev) { -+ err = -EINVAL; -+ dev_dbg(&pdev->xdev->dev, "Couldn't locate PCI device " -+ "(%04x:%02x:%02x.%01x)! not owned by this domain\n", -+ domain, bus, slot, func); -+ goto out; -+ } -+ -+ dev_dbg(&dev->dev, "unregistering for %d\n", pdev->xdev->otherend_id); -+ xen_unregister_device_domain_owner(dev); -+ -+ pciback_release_pci_dev(pdev, dev); -+ -+out: -+ return err; -+} -+ -+static int pciback_publish_pci_root(struct pciback_device *pdev, -+ unsigned int domain, unsigned int bus) -+{ -+ unsigned int d, b; -+ int i, root_num, len, err; -+ char str[64]; -+ -+ dev_dbg(&pdev->xdev->dev, "Publishing pci roots\n"); -+ -+ err = xenbus_scanf(XBT_NIL, pdev->xdev->nodename, -+ "root_num", "%d", &root_num); -+ if (err == 0 || err == -ENOENT) -+ root_num = 0; -+ else if (err < 0) -+ goto out; -+ -+ /* Verify that we haven't already published this pci root */ -+ for (i = 0; i < root_num; i++) { -+ len = snprintf(str, sizeof(str), "root-%d", i); -+ if (unlikely(len >= (sizeof(str) - 1))) { -+ err = -ENOMEM; -+ goto out; -+ } -+ -+ err = xenbus_scanf(XBT_NIL, pdev->xdev->nodename, -+ str, "%x:%x", &d, &b); -+ if (err < 0) -+ goto out; -+ if (err != 2) { -+ err = -EINVAL; -+ goto out; -+ } -+ -+ if (d == domain && b == bus) { -+ err = 0; -+ goto out; -+ } -+ } -+ -+ len = snprintf(str, sizeof(str), "root-%d", root_num); -+ if (unlikely(len >= (sizeof(str) - 1))) { -+ err = -ENOMEM; -+ goto out; -+ } -+ -+ dev_dbg(&pdev->xdev->dev, "writing root %d at %04x:%02x\n", -+ root_num, domain, bus); -+ -+ err = xenbus_printf(XBT_NIL, pdev->xdev->nodename, str, -+ "%04x:%02x", domain, bus); -+ if (err) -+ goto out; -+ -+ err = xenbus_printf(XBT_NIL, pdev->xdev->nodename, -+ "root_num", "%d", (root_num + 1)); -+ -+out: -+ return err; -+} -+ -+static int pciback_reconfigure(struct pciback_device *pdev) -+{ -+ int err = 0; -+ int num_devs; -+ int domain, bus, slot, func; -+ int substate; -+ int i, len; -+ char state_str[64]; -+ char dev_str[64]; -+ -+ -+ dev_dbg(&pdev->xdev->dev, "Reconfiguring device ...\n"); -+ -+ /* Make sure we only reconfigure once */ -+ if (xenbus_read_driver_state(pdev->xdev->nodename) != -+ XenbusStateReconfiguring) -+ goto out; -+ -+ err = xenbus_scanf(XBT_NIL, pdev->xdev->nodename, "num_devs", "%d", -+ &num_devs); -+ if (err != 1) { -+ if (err >= 0) -+ err = -EINVAL; -+ xenbus_dev_fatal(pdev->xdev, err, -+ "Error reading number of devices"); -+ goto out; -+ } -+ -+ for (i = 0; i < num_devs; i++) { -+ len = snprintf(state_str, sizeof(state_str), "state-%d", i); -+ if (unlikely(len >= (sizeof(state_str) - 1))) { -+ err = -ENOMEM; -+ xenbus_dev_fatal(pdev->xdev, err, -+ "String overflow while reading " -+ "configuration"); -+ goto out; -+ } -+ err = xenbus_scanf(XBT_NIL, pdev->xdev->nodename, state_str, -+ "%d", &substate); -+ if (err != 1) -+ substate = XenbusStateUnknown; -+ -+ switch (substate) { -+ case XenbusStateInitialising: -+ dev_dbg(&pdev->xdev->dev, "Attaching dev-%d ...\n", i); -+ -+ len = snprintf(dev_str, sizeof(dev_str), "dev-%d", i); -+ if (unlikely(len >= (sizeof(dev_str) - 1))) { -+ err = -ENOMEM; -+ xenbus_dev_fatal(pdev->xdev, err, -+ "String overflow while " -+ "reading configuration"); -+ goto out; -+ } -+ err = xenbus_scanf(XBT_NIL, pdev->xdev->nodename, -+ dev_str, "%x:%x:%x.%x", -+ &domain, &bus, &slot, &func); -+ if (err < 0) { -+ xenbus_dev_fatal(pdev->xdev, err, -+ "Error reading device " -+ "configuration"); -+ goto out; -+ } -+ if (err != 4) { -+ err = -EINVAL; -+ xenbus_dev_fatal(pdev->xdev, err, -+ "Error parsing pci device " -+ "configuration"); -+ goto out; -+ } -+ -+ err = pciback_export_device(pdev, domain, bus, slot, -+ func, i); -+ if (err) -+ goto out; -+ -+ /* Publish pci roots. */ -+ err = pciback_publish_pci_roots(pdev, -+ pciback_publish_pci_root); -+ if (err) { -+ xenbus_dev_fatal(pdev->xdev, err, -+ "Error while publish PCI root" -+ "buses for frontend"); -+ goto out; -+ } -+ -+ err = xenbus_printf(XBT_NIL, pdev->xdev->nodename, -+ state_str, "%d", -+ XenbusStateInitialised); -+ if (err) { -+ xenbus_dev_fatal(pdev->xdev, err, -+ "Error switching substate of " -+ "dev-%d\n", i); -+ goto out; -+ } -+ break; -+ -+ case XenbusStateClosing: -+ dev_dbg(&pdev->xdev->dev, "Detaching dev-%d ...\n", i); -+ -+ len = snprintf(dev_str, sizeof(dev_str), "vdev-%d", i); -+ if (unlikely(len >= (sizeof(dev_str) - 1))) { -+ err = -ENOMEM; -+ xenbus_dev_fatal(pdev->xdev, err, -+ "String overflow while " -+ "reading configuration"); -+ goto out; -+ } -+ err = xenbus_scanf(XBT_NIL, pdev->xdev->nodename, -+ dev_str, "%x:%x:%x.%x", -+ &domain, &bus, &slot, &func); -+ if (err < 0) { -+ xenbus_dev_fatal(pdev->xdev, err, -+ "Error reading device " -+ "configuration"); -+ goto out; -+ } -+ if (err != 4) { -+ err = -EINVAL; -+ xenbus_dev_fatal(pdev->xdev, err, -+ "Error parsing pci device " -+ "configuration"); -+ goto out; -+ } -+ -+ err = pciback_remove_device(pdev, domain, bus, slot, -+ func); -+ if (err) -+ goto out; -+ -+ /* TODO: If at some point we implement support for pci -+ * root hot-remove on pcifront side, we'll need to -+ * remove unnecessary xenstore nodes of pci roots here. -+ */ -+ -+ break; -+ -+ default: -+ break; -+ } -+ } -+ -+ err = xenbus_switch_state(pdev->xdev, XenbusStateReconfigured); -+ if (err) { -+ xenbus_dev_fatal(pdev->xdev, err, -+ "Error switching to reconfigured state!"); -+ goto out; -+ } -+ -+out: -+ return 0; -+} -+ -+static void pciback_frontend_changed(struct xenbus_device *xdev, -+ enum xenbus_state fe_state) -+{ -+ struct pciback_device *pdev = dev_get_drvdata(&xdev->dev); -+ -+ dev_dbg(&xdev->dev, "fe state changed %d\n", fe_state); -+ -+ switch (fe_state) { -+ case XenbusStateInitialised: -+ pciback_attach(pdev); -+ break; -+ -+ case XenbusStateReconfiguring: -+ pciback_reconfigure(pdev); -+ break; -+ -+ case XenbusStateConnected: -+ /* pcifront switched its state from reconfiguring to connected. -+ * Then switch to connected state. -+ */ -+ xenbus_switch_state(xdev, XenbusStateConnected); -+ break; -+ -+ case XenbusStateClosing: -+ pciback_disconnect(pdev); -+ xenbus_switch_state(xdev, XenbusStateClosing); -+ break; -+ -+ case XenbusStateClosed: -+ pciback_disconnect(pdev); -+ xenbus_switch_state(xdev, XenbusStateClosed); -+ if (xenbus_dev_is_online(xdev)) -+ break; -+ /* fall through if not online */ -+ case XenbusStateUnknown: -+ dev_dbg(&xdev->dev, "frontend is gone! unregister device\n"); -+ device_unregister(&xdev->dev); -+ break; -+ -+ default: -+ break; -+ } -+} -+ -+static int pciback_setup_backend(struct pciback_device *pdev) -+{ -+ /* Get configuration from xend (if available now) */ -+ int domain, bus, slot, func; -+ int err = 0; -+ int i, num_devs; -+ char dev_str[64]; -+ char state_str[64]; -+ -+ /* It's possible we could get the call to setup twice, so make sure -+ * we're not already connected. -+ */ -+ if (xenbus_read_driver_state(pdev->xdev->nodename) != -+ XenbusStateInitWait) -+ goto out; -+ -+ dev_dbg(&pdev->xdev->dev, "getting be setup\n"); -+ -+ err = xenbus_scanf(XBT_NIL, pdev->xdev->nodename, "num_devs", "%d", -+ &num_devs); -+ if (err != 1) { -+ if (err >= 0) -+ err = -EINVAL; -+ xenbus_dev_fatal(pdev->xdev, err, -+ "Error reading number of devices"); -+ goto out; -+ } -+ -+ for (i = 0; i < num_devs; i++) { -+ int l = snprintf(dev_str, sizeof(dev_str), "dev-%d", i); -+ if (unlikely(l >= (sizeof(dev_str) - 1))) { -+ err = -ENOMEM; -+ xenbus_dev_fatal(pdev->xdev, err, -+ "String overflow while reading " -+ "configuration"); -+ goto out; -+ } -+ -+ err = xenbus_scanf(XBT_NIL, pdev->xdev->nodename, dev_str, -+ "%x:%x:%x.%x", &domain, &bus, &slot, &func); -+ if (err < 0) { -+ xenbus_dev_fatal(pdev->xdev, err, -+ "Error reading device configuration"); -+ goto out; -+ } -+ if (err != 4) { -+ err = -EINVAL; -+ xenbus_dev_fatal(pdev->xdev, err, -+ "Error parsing pci device " -+ "configuration"); -+ goto out; -+ } -+ -+ err = pciback_export_device(pdev, domain, bus, slot, func, i); -+ if (err) -+ goto out; -+ -+ /* Switch substate of this device. */ -+ l = snprintf(state_str, sizeof(state_str), "state-%d", i); -+ if (unlikely(l >= (sizeof(state_str) - 1))) { -+ err = -ENOMEM; -+ xenbus_dev_fatal(pdev->xdev, err, -+ "String overflow while reading " -+ "configuration"); -+ goto out; -+ } -+ err = xenbus_printf(XBT_NIL, pdev->xdev->nodename, state_str, -+ "%d", XenbusStateInitialised); -+ if (err) { -+ xenbus_dev_fatal(pdev->xdev, err, "Error switching " -+ "substate of dev-%d\n", i); -+ goto out; -+ } -+ } -+ -+ err = pciback_publish_pci_roots(pdev, pciback_publish_pci_root); -+ if (err) { -+ xenbus_dev_fatal(pdev->xdev, err, -+ "Error while publish PCI root buses " -+ "for frontend"); -+ goto out; -+ } -+ -+ err = xenbus_switch_state(pdev->xdev, XenbusStateInitialised); -+ if (err) -+ xenbus_dev_fatal(pdev->xdev, err, -+ "Error switching to initialised state!"); -+ -+out: -+ if (!err) -+ /* see if pcifront is already configured (if not, we'll wait) */ -+ pciback_attach(pdev); -+ -+ return err; -+} -+ -+static void pciback_be_watch(struct xenbus_watch *watch, -+ const char **vec, unsigned int len) -+{ -+ struct pciback_device *pdev = -+ container_of(watch, struct pciback_device, be_watch); -+ -+ switch (xenbus_read_driver_state(pdev->xdev->nodename)) { -+ case XenbusStateInitWait: -+ pciback_setup_backend(pdev); -+ break; -+ -+ default: -+ break; -+ } -+} -+ -+static int pciback_xenbus_probe(struct xenbus_device *dev, -+ const struct xenbus_device_id *id) -+{ -+ int err = 0; -+ struct pciback_device *pdev = alloc_pdev(dev); -+ -+ if (pdev == NULL) { -+ err = -ENOMEM; -+ xenbus_dev_fatal(dev, err, -+ "Error allocating pciback_device struct"); -+ goto out; -+ } -+ -+ /* wait for xend to configure us */ -+ err = xenbus_switch_state(dev, XenbusStateInitWait); -+ if (err) -+ goto out; -+ -+ /* watch the backend node for backend configuration information */ -+ err = xenbus_watch_path(dev, dev->nodename, &pdev->be_watch, -+ pciback_be_watch); -+ if (err) -+ goto out; -+ -+ spin_lock(&pdev->dev_lock); -+ pdev->be_watching = 1; -+ spin_unlock(&pdev->dev_lock); -+ -+ /* We need to force a call to our callback here in case -+ * xend already configured us! -+ */ -+ pciback_be_watch(&pdev->be_watch, NULL, 0); -+ -+out: -+ return err; -+} -+ -+static int pciback_xenbus_remove(struct xenbus_device *dev) -+{ -+ struct pciback_device *pdev = dev_get_drvdata(&dev->dev); -+ -+ if (pdev != NULL) -+ free_pdev(pdev); -+ -+ return 0; -+} -+ -+static const struct xenbus_device_id xenpci_ids[] = { -+ {"pci"}, -+ {""}, -+}; -+ -+static struct xenbus_driver xenbus_pciback_driver = { -+ .name = "pciback", -+ .owner = THIS_MODULE, -+ .ids = xenpci_ids, -+ .probe = pciback_xenbus_probe, -+ .remove = pciback_xenbus_remove, -+ .otherend_changed = pciback_frontend_changed, -+}; -+ -+int __init pciback_xenbus_register(void) -+{ -+ pciback_wq = create_workqueue("pciback_workqueue"); -+ if (!pciback_wq) { -+ printk(KERN_ERR "%s: create" -+ "pciback_workqueue failed\n",__FUNCTION__); -+ return -EFAULT; -+ } -+ return xenbus_register_backend(&xenbus_pciback_driver); -+} -+ -+void __exit pciback_xenbus_unregister(void) -+{ -+ destroy_workqueue(pciback_wq); -+ xenbus_unregister_driver(&xenbus_pciback_driver); -+} -diff --git a/drivers/xen/pcpu.c b/drivers/xen/pcpu.c -new file mode 100644 -index 0000000..6d1a770 ---- /dev/null -+++ b/drivers/xen/pcpu.c -@@ -0,0 +1,452 @@ -+/* -+ * pcpu.c - management physical cpu in dom0 environment -+ */ -+#include <linux/interrupt.h> -+#include <linux/spinlock.h> -+#include <asm/xen/hypervisor.h> -+#include <asm/xen/hypercall.h> -+#include <linux/cpu.h> -+#include <xen/xenbus.h> -+#include <xen/pcpu.h> -+#include <xen/events.h> -+#include <xen/acpi.h> -+ -+static struct sysdev_class xen_pcpu_sysdev_class = { -+ .name = "xen_pcpu", -+}; -+ -+static DEFINE_MUTEX(xen_pcpu_lock); -+static RAW_NOTIFIER_HEAD(xen_pcpu_chain); -+ -+/* No need for irq disable since hotplug notify is in workqueue context */ -+#define get_pcpu_lock() mutex_lock(&xen_pcpu_lock); -+#define put_pcpu_lock() mutex_unlock(&xen_pcpu_lock); -+ -+struct xen_pcpus { -+ struct list_head list; -+ int present; -+}; -+static struct xen_pcpus xen_pcpus; -+ -+int register_xen_pcpu_notifier(struct notifier_block *nb) -+{ -+ int ret; -+ -+ /* All refer to the chain notifier is protected by the pcpu_lock */ -+ get_pcpu_lock(); -+ ret = raw_notifier_chain_register(&xen_pcpu_chain, nb); -+ put_pcpu_lock(); -+ return ret; -+} -+EXPORT_SYMBOL_GPL(register_xen_pcpu_notifier); -+ -+void unregister_xen_pcpu_notifier(struct notifier_block *nb) -+{ -+ get_pcpu_lock(); -+ raw_notifier_chain_unregister(&xen_pcpu_chain, nb); -+ put_pcpu_lock(); -+} -+EXPORT_SYMBOL_GPL(unregister_xen_pcpu_notifier); -+ -+static int xen_pcpu_down(uint32_t xen_id) -+{ -+ int ret; -+ xen_platform_op_t op = { -+ .cmd = XENPF_cpu_offline, -+ .interface_version = XENPF_INTERFACE_VERSION, -+ .u.cpu_ol.cpuid = xen_id, -+ }; -+ -+ ret = HYPERVISOR_dom0_op(&op); -+ return ret; -+} -+ -+static int xen_pcpu_up(uint32_t xen_id) -+{ -+ int ret; -+ xen_platform_op_t op = { -+ .cmd = XENPF_cpu_online, -+ .interface_version = XENPF_INTERFACE_VERSION, -+ .u.cpu_ol.cpuid = xen_id, -+ }; -+ -+ ret = HYPERVISOR_dom0_op(&op); -+ return ret; -+} -+ -+static ssize_t show_online(struct sys_device *dev, -+ struct sysdev_attribute *attr, -+ char *buf) -+{ -+ struct pcpu *cpu = container_of(dev, struct pcpu, sysdev); -+ -+ return sprintf(buf, "%u\n", !!(cpu->flags & XEN_PCPU_FLAGS_ONLINE)); -+} -+ -+static ssize_t __ref store_online(struct sys_device *dev, -+ struct sysdev_attribute *attr, -+ const char *buf, size_t count) -+{ -+ struct pcpu *cpu = container_of(dev, struct pcpu, sysdev); -+ ssize_t ret; -+ -+ switch (buf[0]) { -+ case '0': -+ ret = xen_pcpu_down(cpu->xen_id); -+ break; -+ case '1': -+ ret = xen_pcpu_up(cpu->xen_id); -+ break; -+ default: -+ ret = -EINVAL; -+ } -+ -+ if (ret >= 0) -+ ret = count; -+ return ret; -+} -+ -+static SYSDEV_ATTR(online, 0644, show_online, store_online); -+ -+static ssize_t show_apicid(struct sys_device *dev, -+ struct sysdev_attribute *attr, -+ char *buf) -+{ -+ struct pcpu *cpu = container_of(dev, struct pcpu, sysdev); -+ -+ return sprintf(buf, "%u\n", cpu->apic_id); -+} -+ -+static ssize_t show_acpiid(struct sys_device *dev, -+ struct sysdev_attribute *attr, -+ char *buf) -+{ -+ struct pcpu *cpu = container_of(dev, struct pcpu, sysdev); -+ -+ return sprintf(buf, "%u\n", cpu->acpi_id); -+} -+static SYSDEV_ATTR(apic_id, 0444, show_apicid, NULL); -+static SYSDEV_ATTR(acpi_id, 0444, show_acpiid, NULL); -+ -+static int xen_pcpu_free(struct pcpu *pcpu) -+{ -+ if (!pcpu) -+ return 0; -+ -+ sysdev_remove_file(&pcpu->sysdev, &attr_online); -+ sysdev_unregister(&pcpu->sysdev); -+ list_del(&pcpu->pcpu_list); -+ kfree(pcpu); -+ -+ return 0; -+} -+ -+static inline int same_pcpu(struct xenpf_pcpuinfo *info, -+ struct pcpu *pcpu) -+{ -+ return (pcpu->apic_id == info->apic_id) && -+ (pcpu->xen_id == info->xen_cpuid); -+} -+ -+/* -+ * Return 1 if online status changed -+ */ -+static int xen_pcpu_online_check(struct xenpf_pcpuinfo *info, -+ struct pcpu *pcpu) -+{ -+ int result = 0; -+ -+ if (info->xen_cpuid != pcpu->xen_id) -+ return 0; -+ -+ if (xen_pcpu_online(info->flags) && !xen_pcpu_online(pcpu->flags)) { -+ /* the pcpu is onlined */ -+ pcpu->flags |= XEN_PCPU_FLAGS_ONLINE; -+ kobject_uevent(&pcpu->sysdev.kobj, KOBJ_ONLINE); -+ raw_notifier_call_chain(&xen_pcpu_chain, -+ XEN_PCPU_ONLINE, (void *)(long)pcpu->xen_id); -+ result = 1; -+ } else if (!xen_pcpu_online(info->flags) && -+ xen_pcpu_online(pcpu->flags)) { -+ /* The pcpu is offlined now */ -+ pcpu->flags &= ~XEN_PCPU_FLAGS_ONLINE; -+ kobject_uevent(&pcpu->sysdev.kobj, KOBJ_OFFLINE); -+ raw_notifier_call_chain(&xen_pcpu_chain, -+ XEN_PCPU_OFFLINE, (void *)(long)pcpu->xen_id); -+ result = 1; -+ } -+ -+ return result; -+} -+ -+static int pcpu_sysdev_init(struct pcpu *cpu) -+{ -+ int error; -+ -+ error = sysdev_register(&cpu->sysdev); -+ if (error) { -+ printk(KERN_WARNING "xen_pcpu_add: Failed to register pcpu\n"); -+ kfree(cpu); -+ return -1; -+ } -+ sysdev_create_file(&cpu->sysdev, &attr_online); -+ sysdev_create_file(&cpu->sysdev, &attr_apic_id); -+ sysdev_create_file(&cpu->sysdev, &attr_acpi_id); -+ return 0; -+} -+ -+static struct pcpu *get_pcpu(int xen_id) -+{ -+ struct pcpu *pcpu = NULL; -+ -+ list_for_each_entry(pcpu, &xen_pcpus.list, pcpu_list) { -+ if (pcpu->xen_id == xen_id) -+ return pcpu; -+ } -+ return NULL; -+} -+ -+static struct pcpu *init_pcpu(struct xenpf_pcpuinfo *info) -+{ -+ struct pcpu *pcpu; -+ -+ if (info->flags & XEN_PCPU_FLAGS_INVALID) -+ return NULL; -+ -+ /* The PCPU is just added */ -+ pcpu = kzalloc(sizeof(struct pcpu), GFP_KERNEL); -+ if (!pcpu) -+ return NULL; -+ -+ INIT_LIST_HEAD(&pcpu->pcpu_list); -+ pcpu->xen_id = info->xen_cpuid; -+ pcpu->apic_id = info->apic_id; -+ pcpu->acpi_id = info->acpi_id; -+ pcpu->flags = info->flags; -+ -+ pcpu->sysdev.cls = &xen_pcpu_sysdev_class; -+ pcpu->sysdev.id = info->xen_cpuid; -+ -+ if (pcpu_sysdev_init(pcpu)) { -+ kfree(pcpu); -+ return NULL; -+ } -+ -+ list_add_tail(&pcpu->pcpu_list, &xen_pcpus.list); -+ raw_notifier_call_chain(&xen_pcpu_chain, -+ XEN_PCPU_ADD, -+ (void *)(long)pcpu->xen_id); -+ return pcpu; -+} -+ -+#define PCPU_NO_CHANGE 0 -+#define PCPU_ADDED 1 -+#define PCPU_ONLINE_OFFLINE 2 -+#define PCPU_REMOVED 3 -+/* -+ * Caller should hold the pcpu lock -+ * < 0: Something wrong -+ * 0: No changes -+ * > 0: State changed -+ */ -+static struct pcpu *_sync_pcpu(int cpu_num, int *max_id, int *result) -+{ -+ struct pcpu *pcpu = NULL; -+ struct xenpf_pcpuinfo *info; -+ xen_platform_op_t op = { -+ .cmd = XENPF_get_cpuinfo, -+ .interface_version = XENPF_INTERFACE_VERSION, -+ }; -+ int ret; -+ -+ *result = -1; -+ -+ info = &op.u.pcpu_info; -+ info->xen_cpuid = cpu_num; -+ -+ ret = HYPERVISOR_dom0_op(&op); -+ if (ret) -+ return NULL; -+ -+ if (max_id) -+ *max_id = op.u.pcpu_info.max_present; -+ -+ pcpu = get_pcpu(cpu_num); -+ -+ if (info->flags & XEN_PCPU_FLAGS_INVALID) { -+ /* The pcpu has been removed */ -+ *result = PCPU_NO_CHANGE; -+ if (pcpu) { -+ raw_notifier_call_chain(&xen_pcpu_chain, -+ XEN_PCPU_REMOVE, -+ (void *)(long)pcpu->xen_id); -+ xen_pcpu_free(pcpu); -+ *result = PCPU_REMOVED; -+ } -+ return NULL; -+ } -+ -+ -+ if (!pcpu) { -+ *result = PCPU_ADDED; -+ pcpu = init_pcpu(info); -+ if (pcpu == NULL) { -+ printk(KERN_WARNING "Failed to init pcpu %x\n", -+ info->xen_cpuid); -+ *result = -1; -+ } -+ } else { -+ *result = PCPU_NO_CHANGE; -+ /* -+ * Old PCPU is replaced with a new pcpu, this means -+ * several virq is missed, will it happen? -+ */ -+ if (!same_pcpu(info, pcpu)) { -+ printk(KERN_WARNING "Pcpu %x changed!\n", -+ pcpu->xen_id); -+ pcpu->apic_id = info->apic_id; -+ pcpu->acpi_id = info->acpi_id; -+ } -+ if (xen_pcpu_online_check(info, pcpu)) -+ *result = PCPU_ONLINE_OFFLINE; -+ } -+ return pcpu; -+} -+ -+int xen_pcpu_index(uint32_t id, int is_acpiid) -+{ -+ int cpu_num = 0, max_id = 0, ret; -+ xen_platform_op_t op = { -+ .cmd = XENPF_get_cpuinfo, -+ .interface_version = XENPF_INTERFACE_VERSION, -+ }; -+ struct xenpf_pcpuinfo *info = &op.u.pcpu_info; -+ -+ info->xen_cpuid = 0; -+ ret = HYPERVISOR_dom0_op(&op); -+ if (ret) -+ return -1; -+ max_id = op.u.pcpu_info.max_present; -+ -+ while ((cpu_num <= max_id)) { -+ info->xen_cpuid = cpu_num; -+ ret = HYPERVISOR_dom0_op(&op); -+ if (ret) -+ continue; -+ -+ if (op.u.pcpu_info.max_present > max_id) -+ max_id = op.u.pcpu_info.max_present; -+ if (id == (is_acpiid ? info->acpi_id : info->apic_id)) -+ return cpu_num; -+ cpu_num++; -+ } -+ -+ return -1; -+} -+EXPORT_SYMBOL(xen_pcpu_index); -+ -+/* -+ * Sync dom0's pcpu information with xen hypervisor's -+ */ -+static int xen_sync_pcpus(void) -+{ -+ /* -+ * Boot cpu always have cpu_id 0 in xen -+ */ -+ int cpu_num = 0, max_id = 0, result = 0, present = 0; -+ struct list_head *elem, *tmp; -+ struct pcpu *pcpu; -+ -+ get_pcpu_lock(); -+ -+ while ((result >= 0) && (cpu_num <= max_id)) { -+ pcpu = _sync_pcpu(cpu_num, &max_id, &result); -+ -+ printk(KERN_DEBUG "sync cpu %x get result %x max_id %x\n", -+ cpu_num, result, max_id); -+ -+ switch (result) { -+ case PCPU_NO_CHANGE: -+ if (pcpu) -+ present++; -+ break; -+ case PCPU_ADDED: -+ case PCPU_ONLINE_OFFLINE: -+ present++; -+ case PCPU_REMOVED: -+ break; -+ default: -+ printk(KERN_WARNING "Failed to sync pcpu %x\n", -+ cpu_num); -+ break; -+ -+ } -+ cpu_num++; -+ } -+ -+ if (result < 0) { -+ list_for_each_safe(elem, tmp, &xen_pcpus.list) { -+ pcpu = list_entry(elem, struct pcpu, pcpu_list); -+ xen_pcpu_free(pcpu); -+ } -+ present = 0; -+ } -+ -+ xen_pcpus.present = present; -+ -+ put_pcpu_lock(); -+ -+ return 0; -+} -+ -+static void xen_pcpu_dpc(struct work_struct *work) -+{ -+ if (xen_sync_pcpus() < 0) -+ printk(KERN_WARNING -+ "xen_pcpu_dpc: Failed to sync pcpu information\n"); -+} -+static DECLARE_WORK(xen_pcpu_work, xen_pcpu_dpc); -+ -+int xen_pcpu_hotplug(int type, uint32_t apic_id) -+{ -+ schedule_work(&xen_pcpu_work); -+ -+ return 0; -+} -+EXPORT_SYMBOL(xen_pcpu_hotplug); -+ -+static irqreturn_t xen_pcpu_interrupt(int irq, void *dev_id) -+{ -+ schedule_work(&xen_pcpu_work); -+ return IRQ_HANDLED; -+} -+ -+static int __init xen_pcpu_init(void) -+{ -+ int err; -+ -+ if (!xen_initial_domain()) -+ return 0; -+ -+ err = sysdev_class_register(&xen_pcpu_sysdev_class); -+ if (err) { -+ printk(KERN_WARNING -+ "xen_pcpu_init: register xen_pcpu sysdev Failed!\n"); -+ return err; -+ } -+ -+ INIT_LIST_HEAD(&xen_pcpus.list); -+ xen_pcpus.present = 0; -+ -+ xen_sync_pcpus(); -+ if (xen_pcpus.present > 0) -+ err = bind_virq_to_irqhandler(VIRQ_PCPU_STATE, -+ 0, xen_pcpu_interrupt, 0, "pcpu", NULL); -+ if (err < 0) -+ printk(KERN_WARNING "xen_pcpu_init: " -+ "Failed to bind pcpu_state virq\n" -+ "You will lost latest information! \n"); -+ return err; -+} -+ -+arch_initcall(xen_pcpu_init); -diff --git a/drivers/xen/platform-pci.c b/drivers/xen/platform-pci.c -new file mode 100644 -index 0000000..c01b5dd ---- /dev/null -+++ b/drivers/xen/platform-pci.c -@@ -0,0 +1,207 @@ -+/****************************************************************************** -+ * platform-pci.c -+ * -+ * Xen platform PCI device driver -+ * Copyright (c) 2005, Intel Corporation. -+ * Copyright (c) 2007, XenSource Inc. -+ * Copyright (c) 2010, Citrix -+ * -+ * This program is free software; you can redistribute it and/or modify it -+ * under the terms and conditions of the GNU General Public License, -+ * version 2, as published by the Free Software Foundation. -+ * -+ * This program is distributed in the hope it will be useful, but WITHOUT -+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for -+ * more details. -+ * -+ * You should have received a copy of the GNU General Public License along with -+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple -+ * Place - Suite 330, Boston, MA 02111-1307 USA. -+ * -+ */ -+ -+ -+#include <linux/interrupt.h> -+#include <linux/io.h> -+#include <linux/module.h> -+#include <linux/pci.h> -+ -+#include <xen/platform_pci.h> -+#include <xen/grant_table.h> -+#include <xen/xenbus.h> -+#include <xen/events.h> -+#include <xen/hvm.h> -+#include <xen/xen-ops.h> -+ -+#define DRV_NAME "xen-platform-pci" -+ -+MODULE_AUTHOR("ssmith@xensource.com and stefano.stabellini@eu.citrix.com"); -+MODULE_DESCRIPTION("Xen platform PCI device"); -+MODULE_LICENSE("GPL"); -+ -+static unsigned long platform_mmio; -+static unsigned long platform_mmio_alloc; -+static unsigned long platform_mmiolen; -+static uint64_t callback_via; -+ -+unsigned long alloc_xen_mmio(unsigned long len) -+{ -+ unsigned long addr; -+ -+ addr = platform_mmio + platform_mmio_alloc; -+ platform_mmio_alloc += len; -+ BUG_ON(platform_mmio_alloc > platform_mmiolen); -+ -+ return addr; -+} -+ -+static uint64_t get_callback_via(struct pci_dev *pdev) -+{ -+ u8 pin; -+ int irq; -+ -+ irq = pdev->irq; -+ if (irq < 16) -+ return irq; /* ISA IRQ */ -+ -+ pin = pdev->pin; -+ -+ /* We don't know the GSI. Specify the PCI INTx line instead. */ -+ return ((uint64_t)0x01 << 56) | /* PCI INTx identifier */ -+ ((uint64_t)pci_domain_nr(pdev->bus) << 32) | -+ ((uint64_t)pdev->bus->number << 16) | -+ ((uint64_t)(pdev->devfn & 0xff) << 8) | -+ ((uint64_t)(pin - 1) & 3); -+} -+ -+static irqreturn_t do_hvm_evtchn_intr(int irq, void *dev_id) -+{ -+ xen_hvm_evtchn_do_upcall(); -+ return IRQ_HANDLED; -+} -+ -+static int xen_allocate_irq(struct pci_dev *pdev) -+{ -+ return request_irq(pdev->irq, do_hvm_evtchn_intr, -+ IRQF_DISABLED | IRQF_NOBALANCING | IRQF_TRIGGER_RISING, -+ "xen-platform-pci", pdev); -+} -+ -+static int platform_pci_resume(struct pci_dev *pdev) -+{ -+ int err; -+ if (xen_have_vector_callback) -+ return 0; -+ err = xen_set_callback_via(callback_via); -+ if (err) { -+ dev_err(&pdev->dev, "platform_pci_resume failure!\n"); -+ return err; -+ } -+ return 0; -+} -+ -+static int __devinit platform_pci_init(struct pci_dev *pdev, -+ const struct pci_device_id *ent) -+{ -+ int i, ret; -+ long ioaddr, iolen; -+ long mmio_addr, mmio_len; -+ unsigned int max_nr_gframes; -+ -+ i = pci_enable_device(pdev); -+ if (i) -+ return i; -+ -+ ioaddr = pci_resource_start(pdev, 0); -+ iolen = pci_resource_len(pdev, 0); -+ -+ mmio_addr = pci_resource_start(pdev, 1); -+ mmio_len = pci_resource_len(pdev, 1); -+ -+ if (mmio_addr == 0 || ioaddr == 0) { -+ dev_err(&pdev->dev, "no resources found\n"); -+ ret = -ENOENT; -+ goto pci_out; -+ } -+ -+ if (request_mem_region(mmio_addr, mmio_len, DRV_NAME) == NULL) { -+ dev_err(&pdev->dev, "MEM I/O resource 0x%lx @ 0x%lx busy\n", -+ mmio_addr, mmio_len); -+ ret = -EBUSY; -+ goto pci_out; -+ } -+ -+ if (request_region(ioaddr, iolen, DRV_NAME) == NULL) { -+ dev_err(&pdev->dev, "I/O resource 0x%lx @ 0x%lx busy\n", -+ iolen, ioaddr); -+ ret = -EBUSY; -+ goto mem_out; -+ } -+ -+ platform_mmio = mmio_addr; -+ platform_mmiolen = mmio_len; -+ -+ if (!xen_have_vector_callback) { -+ ret = xen_allocate_irq(pdev); -+ if (ret) { -+ dev_warn(&pdev->dev, "request_irq failed err=%d\n", ret); -+ goto out; -+ } -+ callback_via = get_callback_via(pdev); -+ ret = xen_set_callback_via(callback_via); -+ if (ret) { -+ dev_warn(&pdev->dev, "Unable to set the evtchn callback " -+ "err=%d\n", ret); -+ goto out; -+ } -+ } -+ -+ max_nr_gframes = gnttab_max_grant_frames(); -+ xen_hvm_resume_frames = alloc_xen_mmio(PAGE_SIZE * max_nr_gframes); -+ ret = gnttab_init(); -+ if (ret) -+ goto out; -+ xenbus_probe(NULL); -+ ret = xen_setup_shutdown_event(); -+ if (ret) -+ goto out; -+ return 0; -+ -+out: -+ release_region(ioaddr, iolen); -+mem_out: -+ release_mem_region(mmio_addr, mmio_len); -+pci_out: -+ pci_disable_device(pdev); -+ return ret; -+} -+ -+static struct pci_device_id platform_pci_tbl[] __devinitdata = { -+ {PCI_VENDOR_ID_XEN, PCI_DEVICE_ID_XEN_PLATFORM, -+ PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0}, -+ {0,} -+}; -+ -+MODULE_DEVICE_TABLE(pci, platform_pci_tbl); -+ -+static struct pci_driver platform_driver = { -+ .name = DRV_NAME, -+ .probe = platform_pci_init, -+ .id_table = platform_pci_tbl, -+#ifdef CONFIG_PM -+ .resume_early = platform_pci_resume, -+#endif -+}; -+ -+static int __init platform_pci_module_init(void) -+{ -+ /* no unplug has been done, IGNORE hasn't been specified: just -+ * return now */ -+ if (!xen_platform_pci_unplug) -+ return -ENODEV; -+ -+ return pci_register_driver(&platform_driver); -+} -+ -+module_init(platform_pci_module_init); -diff --git a/drivers/xen/sys-hypervisor.c b/drivers/xen/sys-hypervisor.c -index 88a60e0..ae5cb05 100644 ---- a/drivers/xen/sys-hypervisor.c -+++ b/drivers/xen/sys-hypervisor.c -@@ -14,6 +14,7 @@ - #include <asm/xen/hypervisor.h> - #include <asm/xen/hypercall.h> - -+#include <xen/xen.h> - #include <xen/xenbus.h> - #include <xen/interface/xen.h> - #include <xen/interface/version.h> -diff --git a/drivers/xen/xen_acpi_memhotplug.c b/drivers/xen/xen_acpi_memhotplug.c -new file mode 100644 -index 0000000..0c4af99 ---- /dev/null -+++ b/drivers/xen/xen_acpi_memhotplug.c -@@ -0,0 +1,209 @@ -+/* -+ * xen_acpi_memhotplug.c - interface to notify Xen on memory device hotadd -+ * -+ * Copyright (C) 2008, Intel corporation -+ * -+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -+ * -+ * This program is free software; you can redistribute it and/or modify -+ * it under the terms of the GNU General Public License as published by -+ * the Free Software Foundation; either version 2 of the License, or (at -+ * your option) any later version. -+ * -+ * This program is distributed in the hope that it will be useful, but -+ * WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ * General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License along -+ * with this program; if not, write to the Free Software Foundation, Inc., -+ * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. -+ * -+ */ -+ -+#include <linux/kernel.h> -+#include <linux/module.h> -+#include <linux/init.h> -+#include <linux/types.h> -+#include <linux/memory_hotplug.h> -+#include <acpi/acpi_drivers.h> -+#include <xen/interface/platform.h> -+#include <linux/interrupt.h> -+#include <linux/spinlock.h> -+#include <asm/xen/hypervisor.h> -+#include <asm/xen/hypercall.h> -+#include <xen/acpi.h> -+ -+struct xen_hotmem_entry { -+ struct list_head hotmem_list; -+ uint64_t start; -+ uint64_t end; -+ uint32_t flags; -+ uint32_t pxm; -+}; -+ -+struct xen_hotmem_list { -+ struct list_head list; -+ int entry_nr; -+} xen_hotmem; -+ -+DEFINE_SPINLOCK(xen_hotmem_lock); -+ -+static int xen_hyper_addmem(struct xen_hotmem_entry *entry) -+{ -+ int ret; -+ -+ xen_platform_op_t op = { -+ .cmd = XENPF_mem_hotadd, -+ .interface_version = XENPF_INTERFACE_VERSION, -+ }; -+ op.u.mem_add.spfn = entry->start >> PAGE_SHIFT; -+ op.u.mem_add.epfn = entry->end >> PAGE_SHIFT; -+ op.u.mem_add.flags = entry->flags; -+ op.u.mem_add.pxm = entry->pxm; -+ -+ ret = HYPERVISOR_dom0_op(&op); -+ return ret; -+} -+ -+static int add_hotmem_entry(int pxm, uint64_t start, -+ uint64_t length, uint32_t flags) -+{ -+ struct xen_hotmem_entry *entry; -+ -+ if (pxm < 0 || !length) -+ return -EINVAL; -+ -+ entry = kzalloc(sizeof(struct xen_hotmem_entry), GFP_ATOMIC); -+ if (!entry) -+ return -ENOMEM; -+ -+ INIT_LIST_HEAD(&entry->hotmem_list); -+ entry->start = start; -+ entry->end = start + length; -+ entry->flags = flags; -+ entry->pxm = pxm; -+ -+ spin_lock(&xen_hotmem_lock); -+ -+ list_add_tail(&entry->hotmem_list, &xen_hotmem.list); -+ xen_hotmem.entry_nr++; -+ -+ spin_unlock(&xen_hotmem_lock); -+ -+ return 0; -+} -+ -+static int free_hotmem_entry(struct xen_hotmem_entry *entry) -+{ -+ list_del(&entry->hotmem_list); -+ kfree(entry); -+ -+ return 0; -+} -+ -+static void xen_hotadd_mem_dpc(struct work_struct *work) -+{ -+ struct list_head *elem, *tmp; -+ struct xen_hotmem_entry *entry; -+ unsigned long flags; -+ int ret; -+ -+ spin_lock_irqsave(&xen_hotmem_lock, flags); -+ list_for_each_safe(elem, tmp, &xen_hotmem.list) { -+ entry = list_entry(elem, struct xen_hotmem_entry, hotmem_list); -+ ret = xen_hyper_addmem(entry); -+ if (ret) -+ printk(KERN_WARNING "xen addmem failed with %x\n", ret); -+ free_hotmem_entry(entry); -+ xen_hotmem.entry_nr--; -+ } -+ spin_unlock_irqrestore(&xen_hotmem_lock, flags); -+} -+ -+static DECLARE_WORK(xen_hotadd_mem_work, xen_hotadd_mem_dpc); -+ -+static int xen_acpi_get_pxm(acpi_handle h) -+{ -+ unsigned long long pxm; -+ acpi_status status; -+ acpi_handle handle; -+ acpi_handle phandle = h; -+ -+ do { -+ handle = phandle; -+ status = acpi_evaluate_integer(handle, "_PXM", NULL, &pxm); -+ if (ACPI_SUCCESS(status)) -+ return pxm; -+ status = acpi_get_parent(handle, &phandle); -+ } while (ACPI_SUCCESS(status)); -+ -+ return -1; -+} -+ -+int xen_hotadd_memory(struct acpi_memory_device *mem_device) -+{ -+ int pxm, result; -+ int num_enabled = 0; -+ struct acpi_memory_info *info; -+ -+ if (!mem_device) -+ return -EINVAL; -+ -+ pxm = xen_acpi_get_pxm(mem_device->device->handle); -+ -+ if (pxm < 0) -+ return -EINVAL; -+ -+ /* -+ * Always return success to ACPI driver, and notify hypervisor later -+ * because hypervisor will utilize the memory in memory hotadd hypercall -+ */ -+ list_for_each_entry(info, &mem_device->res_list, list) { -+ if (info->enabled) { /* just sanity check...*/ -+ num_enabled++; -+ continue; -+ } -+ /* -+ * If the memory block size is zero, please ignore it. -+ * Don't try to do the following memory hotplug flowchart. -+ */ -+ if (!info->length) -+ continue; -+ -+ result = add_hotmem_entry(pxm, info->start_addr, -+ info->length, 0); -+ if (result) -+ continue; -+ info->enabled = 1; -+ num_enabled++; -+ } -+ -+ if (!num_enabled) -+ return -EINVAL; -+ -+ schedule_work(&xen_hotadd_mem_work); -+ -+ return 0; -+} -+EXPORT_SYMBOL(xen_hotadd_memory); -+ -+static int xen_hotadd_mem_init(void) -+{ -+ if (!xen_initial_domain()) -+ return -ENODEV; -+ -+ INIT_LIST_HEAD(&xen_hotmem.list); -+ xen_hotmem.entry_nr = 0; -+ -+ return 0; -+} -+ -+static void xen_hotadd_mem_exit(void) -+{ -+ flush_scheduled_work(); -+} -+ -+module_init(xen_hotadd_mem_init); -+module_exit(xen_hotadd_mem_exit); -+MODULE_LICENSE("GPL"); -diff --git a/drivers/xen/xenbus/Makefile b/drivers/xen/xenbus/Makefile -index 5571f5b..8dca685 100644 ---- a/drivers/xen/xenbus/Makefile -+++ b/drivers/xen/xenbus/Makefile -@@ -5,3 +5,8 @@ xenbus-objs += xenbus_client.o - xenbus-objs += xenbus_comms.o - xenbus-objs += xenbus_xs.o - xenbus-objs += xenbus_probe.o -+ -+xenbus-be-objs-$(CONFIG_XEN_BACKEND) += xenbus_probe_backend.o -+xenbus-objs += $(xenbus-be-objs-y) -+ -+obj-$(CONFIG_XEN_XENBUS_FRONTEND) += xenbus_probe_frontend.o -diff --git a/drivers/xen/xenbus/xenbus_client.c b/drivers/xen/xenbus/xenbus_client.c -index 92a1ef8..89f2e42 100644 ---- a/drivers/xen/xenbus/xenbus_client.c -+++ b/drivers/xen/xenbus/xenbus_client.c -@@ -49,6 +49,8 @@ const char *xenbus_strstate(enum xenbus_state state) - [ XenbusStateConnected ] = "Connected", - [ XenbusStateClosing ] = "Closing", - [ XenbusStateClosed ] = "Closed", -+ [ XenbusStateReconfiguring ] = "Reconfiguring", -+ [ XenbusStateReconfigured ] = "Reconfigured", - }; - return (state < ARRAY_SIZE(name)) ? name[state] : "INVALID"; - } -@@ -132,17 +134,12 @@ int xenbus_watch_pathfmt(struct xenbus_device *dev, - } - EXPORT_SYMBOL_GPL(xenbus_watch_pathfmt); - -+static void xenbus_switch_fatal(struct xenbus_device *, int, int, -+ const char *, ...); - --/** -- * xenbus_switch_state -- * @dev: xenbus device -- * @state: new state -- * -- * Advertise in the store a change of the given driver to the given new_state. -- * Return 0 on success, or -errno on error. On error, the device will switch -- * to XenbusStateClosing, and the error will be saved in the store. -- */ --int xenbus_switch_state(struct xenbus_device *dev, enum xenbus_state state) -+static int -+__xenbus_switch_state(struct xenbus_device *dev, -+ enum xenbus_state state, int depth) - { - /* We check whether the state is currently set to the given value, and - if not, then the state is set. We don't want to unconditionally -@@ -151,35 +148,65 @@ int xenbus_switch_state(struct xenbus_device *dev, enum xenbus_state state) - to it, as the device will be tearing down, and we don't want to - resurrect that directory. - -- Note that, because of this cached value of our state, this function -- will not work inside a Xenstore transaction (something it was -- trying to in the past) because dev->state would not get reset if -- the transaction was aborted. -- -+ Note that, because of this cached value of our state, this -+ function will not take a caller's Xenstore transaction -+ (something it was trying to in the past) because dev->state -+ would not get reset if the transaction was aborted. - */ - -+ struct xenbus_transaction xbt; - int current_state; -- int err; -+ int err, abort; - - if (state == dev->state) - return 0; - -- err = xenbus_scanf(XBT_NIL, dev->nodename, "state", "%d", -- ¤t_state); -- if (err != 1) -+again: -+ abort = 1; -+ -+ err = xenbus_transaction_start(&xbt); -+ if (err) { -+ xenbus_switch_fatal(dev, depth, err, "starting transaction"); - return 0; -+ } -+ -+ err = xenbus_scanf(xbt, dev->nodename, "state", "%d", ¤t_state); -+ if (err != 1) -+ goto abort; - -- err = xenbus_printf(XBT_NIL, dev->nodename, "state", "%d", state); -+ err = xenbus_printf(xbt, dev->nodename, "state", "%d", state); - if (err) { -- if (state != XenbusStateClosing) /* Avoid looping */ -- xenbus_dev_fatal(dev, err, "writing new state"); -- return err; -+ xenbus_switch_fatal(dev, depth, err, "writing new state"); -+ goto abort; - } - -- dev->state = state; -+ abort = 0; -+abort: -+ err = xenbus_transaction_end(xbt, abort); -+ if (err) { -+ if (err == -EAGAIN && !abort) -+ goto again; -+ xenbus_switch_fatal(dev, depth, err, "ending transaction"); -+ } else -+ dev->state = state; - - return 0; - } -+ -+/** -+ * xenbus_switch_state -+ * @dev: xenbus device -+ * @state: new state -+ * -+ * Advertise in the store a change of the given driver to the given new_state. -+ * Return 0 on success, or -errno on error. On error, the device will switch -+ * to XenbusStateClosing, and the error will be saved in the store. -+ */ -+int xenbus_switch_state(struct xenbus_device *dev, enum xenbus_state state) -+{ -+ return __xenbus_switch_state(dev, state, 0); -+} -+ - EXPORT_SYMBOL_GPL(xenbus_switch_state); - - int xenbus_frontend_closed(struct xenbus_device *dev) -@@ -283,6 +310,23 @@ void xenbus_dev_fatal(struct xenbus_device *dev, int err, const char *fmt, ...) - EXPORT_SYMBOL_GPL(xenbus_dev_fatal); - - /** -+ * Equivalent to xenbus_dev_fatal(dev, err, fmt, args), but helps -+ * avoiding recursion within xenbus_switch_state. -+ */ -+static void xenbus_switch_fatal(struct xenbus_device *dev, int depth, int err, -+ const char *fmt, ...) -+{ -+ va_list ap; -+ -+ va_start(ap, fmt); -+ xenbus_va_dev_error(dev, err, fmt, ap); -+ va_end(ap); -+ -+ if (!depth) -+ __xenbus_switch_state(dev, XenbusStateClosing, 1); -+} -+ -+/** - * xenbus_grant_ring - * @dev: xenbus device - * @ring_mfn: mfn of ring to grant -diff --git a/drivers/xen/xenbus/xenbus_probe.c b/drivers/xen/xenbus/xenbus_probe.c -index 649fcdf..3a83ba2 100644 ---- a/drivers/xen/xenbus/xenbus_probe.c -+++ b/drivers/xen/xenbus/xenbus_probe.c -@@ -49,31 +49,29 @@ - #include <asm/page.h> - #include <asm/pgtable.h> - #include <asm/xen/hypervisor.h> -+ -+#include <xen/xen.h> - #include <xen/xenbus.h> - #include <xen/events.h> - #include <xen/page.h> - -+#include <xen/platform_pci.h> -+#include <xen/hvm.h> -+ - #include "xenbus_comms.h" - #include "xenbus_probe.h" - - - int xen_store_evtchn; --EXPORT_SYMBOL(xen_store_evtchn); -+EXPORT_SYMBOL_GPL(xen_store_evtchn); - - struct xenstore_domain_interface *xen_store_interface; -+EXPORT_SYMBOL_GPL(xen_store_interface); -+ - static unsigned long xen_store_mfn; - - static BLOCKING_NOTIFIER_HEAD(xenstore_chain); - --static void wait_for_devices(struct xenbus_driver *xendrv); -- --static int xenbus_probe_frontend(const char *type, const char *name); -- --static void xenbus_dev_shutdown(struct device *_dev); -- --static int xenbus_dev_suspend(struct device *dev, pm_message_t state); --static int xenbus_dev_resume(struct device *dev); -- - /* If something in array of ids matches this device, return it. */ - static const struct xenbus_device_id * - match_device(const struct xenbus_device_id *arr, struct xenbus_device *dev) -@@ -94,34 +92,7 @@ int xenbus_match(struct device *_dev, struct device_driver *_drv) - - return match_device(drv->ids, to_xenbus_device(_dev)) != NULL; - } -- --static int xenbus_uevent(struct device *_dev, struct kobj_uevent_env *env) --{ -- struct xenbus_device *dev = to_xenbus_device(_dev); -- -- if (add_uevent_var(env, "MODALIAS=xen:%s", dev->devicetype)) -- return -ENOMEM; -- -- return 0; --} -- --/* device/<type>/<id> => <type>-<id> */ --static int frontend_bus_id(char bus_id[XEN_BUS_ID_SIZE], const char *nodename) --{ -- nodename = strchr(nodename, '/'); -- if (!nodename || strlen(nodename + 1) >= XEN_BUS_ID_SIZE) { -- printk(KERN_WARNING "XENBUS: bad frontend %s\n", nodename); -- return -EINVAL; -- } -- -- strlcpy(bus_id, nodename + 1, XEN_BUS_ID_SIZE); -- if (!strchr(bus_id, '/')) { -- printk(KERN_WARNING "XENBUS: bus_id %s no slash\n", bus_id); -- return -EINVAL; -- } -- *strchr(bus_id, '/') = '-'; -- return 0; --} -+EXPORT_SYMBOL_GPL(xenbus_match); - - - static void free_otherend_details(struct xenbus_device *dev) -@@ -141,7 +112,28 @@ static void free_otherend_watch(struct xenbus_device *dev) - } - - --int read_otherend_details(struct xenbus_device *xendev, -+static int talk_to_otherend(struct xenbus_device *dev) -+{ -+ struct xenbus_driver *drv = to_xenbus_driver(dev->dev.driver); -+ -+ free_otherend_watch(dev); -+ free_otherend_details(dev); -+ -+ return drv->read_otherend_details(dev); -+} -+ -+ -+ -+static int watch_otherend(struct xenbus_device *dev) -+{ -+ struct xen_bus_type *bus = container_of(dev->dev.bus, struct xen_bus_type, bus); -+ -+ return xenbus_watch_pathfmt(dev, &dev->otherend_watch, bus->otherend_changed, -+ "%s/%s", dev->otherend, "state"); -+} -+ -+ -+int xenbus_read_otherend_details(struct xenbus_device *xendev, - char *id_node, char *path_node) - { - int err = xenbus_gather(XBT_NIL, xendev->nodename, -@@ -166,39 +158,11 @@ int read_otherend_details(struct xenbus_device *xendev, - - return 0; - } -+EXPORT_SYMBOL_GPL(xenbus_read_otherend_details); - -- --static int read_backend_details(struct xenbus_device *xendev) --{ -- return read_otherend_details(xendev, "backend-id", "backend"); --} -- --static struct device_attribute xenbus_dev_attrs[] = { -- __ATTR_NULL --}; -- --/* Bus type for frontend drivers. */ --static struct xen_bus_type xenbus_frontend = { -- .root = "device", -- .levels = 2, /* device/type/<id> */ -- .get_bus_id = frontend_bus_id, -- .probe = xenbus_probe_frontend, -- .bus = { -- .name = "xen", -- .match = xenbus_match, -- .uevent = xenbus_uevent, -- .probe = xenbus_dev_probe, -- .remove = xenbus_dev_remove, -- .shutdown = xenbus_dev_shutdown, -- .dev_attrs = xenbus_dev_attrs, -- -- .suspend = xenbus_dev_suspend, -- .resume = xenbus_dev_resume, -- }, --}; -- --static void otherend_changed(struct xenbus_watch *watch, -- const char **vec, unsigned int len) -+void xenbus_otherend_changed(struct xenbus_watch *watch, -+ const char **vec, unsigned int len, -+ int ignore_on_shutdown) - { - struct xenbus_device *dev = - container_of(watch, struct xenbus_device, otherend_watch); -@@ -226,11 +190,7 @@ static void otherend_changed(struct xenbus_watch *watch, - * work that can fail e.g., when the rootfs is gone. - */ - if (system_state > SYSTEM_RUNNING) { -- struct xen_bus_type *bus = bus; -- bus = container_of(dev->dev.bus, struct xen_bus_type, bus); -- /* If we're frontend, drive the state machine to Closed. */ -- /* This should cause the backend to release our resources. */ -- if ((bus == &xenbus_frontend) && (state == XenbusStateClosing)) -+ if (ignore_on_shutdown && (state == XenbusStateClosing)) - xenbus_frontend_closed(dev); - return; - } -@@ -238,25 +198,7 @@ static void otherend_changed(struct xenbus_watch *watch, - if (drv->otherend_changed) - drv->otherend_changed(dev, state); - } -- -- --static int talk_to_otherend(struct xenbus_device *dev) --{ -- struct xenbus_driver *drv = to_xenbus_driver(dev->dev.driver); -- -- free_otherend_watch(dev); -- free_otherend_details(dev); -- -- return drv->read_otherend_details(dev); --} -- -- --static int watch_otherend(struct xenbus_device *dev) --{ -- return xenbus_watch_pathfmt(dev, &dev->otherend_watch, otherend_changed, -- "%s/%s", dev->otherend, "state"); --} -- -+EXPORT_SYMBOL_GPL(xenbus_otherend_changed); - - int xenbus_dev_probe(struct device *_dev) - { -@@ -300,8 +242,9 @@ int xenbus_dev_probe(struct device *_dev) - fail: - xenbus_dev_error(dev, err, "xenbus_dev_probe on %s", dev->nodename); - xenbus_switch_state(dev, XenbusStateClosed); -- return -ENODEV; -+ return err; - } -+EXPORT_SYMBOL_GPL(xenbus_dev_probe); - - int xenbus_dev_remove(struct device *_dev) - { -@@ -319,8 +262,9 @@ int xenbus_dev_remove(struct device *_dev) - xenbus_switch_state(dev, XenbusStateClosed); - return 0; - } -+EXPORT_SYMBOL_GPL(xenbus_dev_remove); - --static void xenbus_dev_shutdown(struct device *_dev) -+void xenbus_dev_shutdown(struct device *_dev) - { - struct xenbus_device *dev = to_xenbus_device(_dev); - unsigned long timeout = 5*HZ; -@@ -341,6 +285,7 @@ static void xenbus_dev_shutdown(struct device *_dev) - out: - put_device(&dev->dev); - } -+EXPORT_SYMBOL_GPL(xenbus_dev_shutdown); - - int xenbus_register_driver_common(struct xenbus_driver *drv, - struct xen_bus_type *bus, -@@ -354,25 +299,7 @@ int xenbus_register_driver_common(struct xenbus_driver *drv, - - return driver_register(&drv->driver); - } -- --int __xenbus_register_frontend(struct xenbus_driver *drv, -- struct module *owner, const char *mod_name) --{ -- int ret; -- -- drv->read_otherend_details = read_backend_details; -- -- ret = xenbus_register_driver_common(drv, &xenbus_frontend, -- owner, mod_name); -- if (ret) -- return ret; -- -- /* If this driver is loaded as a module wait for devices to attach. */ -- wait_for_devices(drv); -- -- return 0; --} --EXPORT_SYMBOL_GPL(__xenbus_register_frontend); -+EXPORT_SYMBOL_GPL(xenbus_register_driver_common); - - void xenbus_unregister_driver(struct xenbus_driver *drv) - { -@@ -543,24 +470,7 @@ fail: - kfree(xendev); - return err; - } -- --/* device/<typename>/<name> */ --static int xenbus_probe_frontend(const char *type, const char *name) --{ -- char *nodename; -- int err; -- -- nodename = kasprintf(GFP_KERNEL, "%s/%s/%s", -- xenbus_frontend.root, type, name); -- if (!nodename) -- return -ENOMEM; -- -- DPRINTK("%s", nodename); -- -- err = xenbus_probe_node(&xenbus_frontend, type, nodename); -- kfree(nodename); -- return err; --} -+EXPORT_SYMBOL_GPL(xenbus_probe_node); - - static int xenbus_probe_device_type(struct xen_bus_type *bus, const char *type) - { -@@ -574,10 +484,11 @@ static int xenbus_probe_device_type(struct xen_bus_type *bus, const char *type) - return PTR_ERR(dir); - - for (i = 0; i < dir_n; i++) { -- err = bus->probe(type, dir[i]); -+ err = bus->probe(bus, type, dir[i]); - if (err) - break; - } -+ - kfree(dir); - return err; - } -@@ -597,9 +508,11 @@ int xenbus_probe_devices(struct xen_bus_type *bus) - if (err) - break; - } -+ - kfree(dir); - return err; - } -+EXPORT_SYMBOL_GPL(xenbus_probe_devices); - - static unsigned int char_count(const char *str, char c) - { -@@ -662,32 +575,17 @@ void xenbus_dev_changed(const char *node, struct xen_bus_type *bus) - } - EXPORT_SYMBOL_GPL(xenbus_dev_changed); - --static void frontend_changed(struct xenbus_watch *watch, -- const char **vec, unsigned int len) --{ -- DPRINTK(""); -- -- xenbus_dev_changed(vec[XS_WATCH_PATH], &xenbus_frontend); --} -- --/* We watch for devices appearing and vanishing. */ --static struct xenbus_watch fe_watch = { -- .node = "device", -- .callback = frontend_changed, --}; -- --static int xenbus_dev_suspend(struct device *dev, pm_message_t state) -+int xenbus_dev_suspend(struct device *dev, pm_message_t state) - { - int err = 0; - struct xenbus_driver *drv; -- struct xenbus_device *xdev; -+ struct xenbus_device *xdev = container_of(dev, struct xenbus_device, dev); - -- DPRINTK(""); -+ DPRINTK("%s", xdev->nodename); - - if (dev->driver == NULL) - return 0; - drv = to_xenbus_driver(dev->driver); -- xdev = container_of(dev, struct xenbus_device, dev); - if (drv->suspend) - err = drv->suspend(xdev, state); - if (err) -@@ -695,21 +593,19 @@ static int xenbus_dev_suspend(struct device *dev, pm_message_t state) - "xenbus: suspend %s failed: %i\n", dev_name(dev), err); - return 0; - } -+EXPORT_SYMBOL_GPL(xenbus_dev_suspend); - --static int xenbus_dev_resume(struct device *dev) -+int xenbus_dev_resume(struct device *dev) - { - int err; - struct xenbus_driver *drv; -- struct xenbus_device *xdev; -+ struct xenbus_device *xdev = container_of(dev, struct xenbus_device, dev); - -- DPRINTK(""); -+ DPRINTK("%s", xdev->nodename); - - if (dev->driver == NULL) - return 0; -- - drv = to_xenbus_driver(dev->driver); -- xdev = container_of(dev, struct xenbus_device, dev); -- - err = talk_to_otherend(xdev); - if (err) { - printk(KERN_WARNING -@@ -740,6 +636,7 @@ static int xenbus_dev_resume(struct device *dev) - - return 0; - } -+EXPORT_SYMBOL_GPL(xenbus_dev_resume); - - /* A flag to determine if xenstored is 'ready' (i.e. has started) */ - int xenstored_ready = 0; -@@ -749,10 +646,7 @@ int register_xenstore_notifier(struct notifier_block *nb) - { - int ret = 0; - -- if (xenstored_ready > 0) -- ret = nb->notifier_call(nb, 0, NULL); -- else -- blocking_notifier_chain_register(&xenstore_chain, nb); -+ blocking_notifier_chain_register(&xenstore_chain, nb); - - return ret; - } -@@ -768,57 +662,93 @@ void xenbus_probe(struct work_struct *unused) - { - BUG_ON((xenstored_ready <= 0)); - -- /* Enumerate devices in xenstore and watch for changes. */ -- xenbus_probe_devices(&xenbus_frontend); -- register_xenbus_watch(&fe_watch); -- xenbus_backend_probe_and_watch(); -- - /* Notify others that xenstore is up */ - blocking_notifier_call_chain(&xenstore_chain, 0, NULL); - } -+EXPORT_SYMBOL_GPL(xenbus_probe); -+ -+static int __init xenbus_probe_initcall(void) -+{ -+ if (!xen_domain()) -+ return -ENODEV; -+ -+ if (xen_initial_domain() || xen_hvm_domain()) -+ return 0; -+ -+ xenbus_probe(NULL); -+ return 0; -+} -+ -+device_initcall(xenbus_probe_initcall); - --static int __init xenbus_probe_init(void) -+static int __init xenbus_init(void) - { - int err = 0; -+ unsigned long page = 0; - - DPRINTK(""); - - err = -ENODEV; - if (!xen_domain()) -- goto out_error; -- -- /* Register ourselves with the kernel bus subsystem */ -- err = bus_register(&xenbus_frontend.bus); -- if (err) -- goto out_error; -- -- err = xenbus_backend_bus_register(); -- if (err) -- goto out_unreg_front; -+ return err; - - /* - * Domain0 doesn't have a store_evtchn or store_mfn yet. - */ - if (xen_initial_domain()) { -- /* dom0 not yet supported */ -+ struct evtchn_alloc_unbound alloc_unbound; -+ -+ /* Allocate Xenstore page */ -+ page = get_zeroed_page(GFP_KERNEL); -+ if (!page) -+ goto out_error; -+ -+ xen_store_mfn = xen_start_info->store_mfn = -+ pfn_to_mfn(virt_to_phys((void *)page) >> -+ PAGE_SHIFT); -+ -+ /* Next allocate a local port which xenstored can bind to */ -+ alloc_unbound.dom = DOMID_SELF; -+ alloc_unbound.remote_dom = 0; -+ -+ err = HYPERVISOR_event_channel_op(EVTCHNOP_alloc_unbound, -+ &alloc_unbound); -+ if (err == -ENOSYS) -+ goto out_error; -+ -+ BUG_ON(err); -+ xen_store_evtchn = xen_start_info->store_evtchn = -+ alloc_unbound.port; -+ -+ xen_store_interface = mfn_to_virt(xen_store_mfn); - } else { - xenstored_ready = 1; -- xen_store_evtchn = xen_start_info->store_evtchn; -- xen_store_mfn = xen_start_info->store_mfn; -+ if (xen_hvm_domain()) { -+ uint64_t v = 0; -+ err = hvm_get_parameter(HVM_PARAM_STORE_EVTCHN, &v); -+ if (err) -+ goto out_error; -+ xen_store_evtchn = (int)v; -+ err = hvm_get_parameter(HVM_PARAM_STORE_PFN, &v); -+ if (err) -+ goto out_error; -+ xen_store_mfn = (unsigned long)v; -+ xen_store_interface = ioremap(xen_store_mfn << PAGE_SHIFT, PAGE_SIZE); -+ } else { -+ xen_store_evtchn = xen_start_info->store_evtchn; -+ xen_store_mfn = xen_start_info->store_mfn; -+ xen_store_interface = mfn_to_virt(xen_store_mfn); -+ } - } -- xen_store_interface = mfn_to_virt(xen_store_mfn); - - /* Initialize the interface to xenstore. */ - err = xs_init(); - if (err) { - printk(KERN_WARNING - "XENBUS: Error initializing xenstore comms: %i\n", err); -- goto out_unreg_back; -+ goto out_error; - } - -- if (!xen_initial_domain()) -- xenbus_probe(NULL); -- - #ifdef CONFIG_XEN_COMPAT_XENFS - /* - * Create xenfs mountpoint in /proc for compatibility with -@@ -829,128 +759,13 @@ static int __init xenbus_probe_init(void) - - return 0; - -- out_unreg_back: -- xenbus_backend_bus_unregister(); -- -- out_unreg_front: -- bus_unregister(&xenbus_frontend.bus); -- - out_error: -+ if (page != 0) -+ free_page(page); -+ - return err; - } - --postcore_initcall(xenbus_probe_init); -+postcore_initcall(xenbus_init); - - MODULE_LICENSE("GPL"); -- --static int is_device_connecting(struct device *dev, void *data) --{ -- struct xenbus_device *xendev = to_xenbus_device(dev); -- struct device_driver *drv = data; -- struct xenbus_driver *xendrv; -- -- /* -- * A device with no driver will never connect. We care only about -- * devices which should currently be in the process of connecting. -- */ -- if (!dev->driver) -- return 0; -- -- /* Is this search limited to a particular driver? */ -- if (drv && (dev->driver != drv)) -- return 0; -- -- xendrv = to_xenbus_driver(dev->driver); -- return (xendev->state < XenbusStateConnected || -- (xendev->state == XenbusStateConnected && -- xendrv->is_ready && !xendrv->is_ready(xendev))); --} -- --static int exists_connecting_device(struct device_driver *drv) --{ -- return bus_for_each_dev(&xenbus_frontend.bus, NULL, drv, -- is_device_connecting); --} -- --static int print_device_status(struct device *dev, void *data) --{ -- struct xenbus_device *xendev = to_xenbus_device(dev); -- struct device_driver *drv = data; -- -- /* Is this operation limited to a particular driver? */ -- if (drv && (dev->driver != drv)) -- return 0; -- -- if (!dev->driver) { -- /* Information only: is this too noisy? */ -- printk(KERN_INFO "XENBUS: Device with no driver: %s\n", -- xendev->nodename); -- } else if (xendev->state < XenbusStateConnected) { -- enum xenbus_state rstate = XenbusStateUnknown; -- if (xendev->otherend) -- rstate = xenbus_read_driver_state(xendev->otherend); -- printk(KERN_WARNING "XENBUS: Timeout connecting " -- "to device: %s (local state %d, remote state %d)\n", -- xendev->nodename, xendev->state, rstate); -- } -- -- return 0; --} -- --/* We only wait for device setup after most initcalls have run. */ --static int ready_to_wait_for_devices; -- --/* -- * On a 5-minute timeout, wait for all devices currently configured. We need -- * to do this to guarantee that the filesystems and / or network devices -- * needed for boot are available, before we can allow the boot to proceed. -- * -- * This needs to be on a late_initcall, to happen after the frontend device -- * drivers have been initialised, but before the root fs is mounted. -- * -- * A possible improvement here would be to have the tools add a per-device -- * flag to the store entry, indicating whether it is needed at boot time. -- * This would allow people who knew what they were doing to accelerate their -- * boot slightly, but of course needs tools or manual intervention to set up -- * those flags correctly. -- */ --static void wait_for_devices(struct xenbus_driver *xendrv) --{ -- unsigned long start = jiffies; -- struct device_driver *drv = xendrv ? &xendrv->driver : NULL; -- unsigned int seconds_waited = 0; -- -- if (!ready_to_wait_for_devices || !xen_domain()) -- return; -- -- while (exists_connecting_device(drv)) { -- if (time_after(jiffies, start + (seconds_waited+5)*HZ)) { -- if (!seconds_waited) -- printk(KERN_WARNING "XENBUS: Waiting for " -- "devices to initialise: "); -- seconds_waited += 5; -- printk("%us...", 300 - seconds_waited); -- if (seconds_waited == 300) -- break; -- } -- -- schedule_timeout_interruptible(HZ/10); -- } -- -- if (seconds_waited) -- printk("\n"); -- -- bus_for_each_dev(&xenbus_frontend.bus, NULL, drv, -- print_device_status); --} -- --#ifndef MODULE --static int __init boot_wait_for_devices(void) --{ -- ready_to_wait_for_devices = 1; -- wait_for_devices(NULL); -- return 0; --} -- --late_initcall(boot_wait_for_devices); --#endif -diff --git a/drivers/xen/xenbus/xenbus_probe.h b/drivers/xen/xenbus/xenbus_probe.h -index 6c5e318..0e5fc4c 100644 ---- a/drivers/xen/xenbus/xenbus_probe.h -+++ b/drivers/xen/xenbus/xenbus_probe.h -@@ -36,26 +36,13 @@ - - #define XEN_BUS_ID_SIZE 20 - --#ifdef CONFIG_XEN_BACKEND --extern void xenbus_backend_suspend(int (*fn)(struct device *, void *)); --extern void xenbus_backend_resume(int (*fn)(struct device *, void *)); --extern void xenbus_backend_probe_and_watch(void); --extern int xenbus_backend_bus_register(void); --extern void xenbus_backend_bus_unregister(void); --#else --static inline void xenbus_backend_suspend(int (*fn)(struct device *, void *)) {} --static inline void xenbus_backend_resume(int (*fn)(struct device *, void *)) {} --static inline void xenbus_backend_probe_and_watch(void) {} --static inline int xenbus_backend_bus_register(void) { return 0; } --static inline void xenbus_backend_bus_unregister(void) {} --#endif -- - struct xen_bus_type - { - char *root; - unsigned int levels; - int (*get_bus_id)(char bus_id[XEN_BUS_ID_SIZE], const char *nodename); -- int (*probe)(const char *type, const char *dir); -+ int (*probe)(struct xen_bus_type *bus, const char *type, const char *dir); -+ void (*otherend_changed)(struct xenbus_watch *watch, const char **vec, unsigned int len); - struct bus_type bus; - }; - -@@ -73,4 +60,16 @@ extern int xenbus_probe_devices(struct xen_bus_type *bus); - - extern void xenbus_dev_changed(const char *node, struct xen_bus_type *bus); - -+extern void xenbus_dev_shutdown(struct device *_dev); -+ -+extern int xenbus_dev_suspend(struct device *dev, pm_message_t state); -+extern int xenbus_dev_resume(struct device *dev); -+ -+extern void xenbus_otherend_changed(struct xenbus_watch *watch, -+ const char **vec, unsigned int len, -+ int ignore_on_shutdown); -+ -+extern int xenbus_read_otherend_details(struct xenbus_device *xendev, -+ char *id_node, char *path_node); -+ - #endif -diff --git a/drivers/xen/xenbus/xenbus_probe_backend.c b/drivers/xen/xenbus/xenbus_probe_backend.c -new file mode 100644 -index 0000000..9b9dd36 ---- /dev/null -+++ b/drivers/xen/xenbus/xenbus_probe_backend.c -@@ -0,0 +1,293 @@ -+/****************************************************************************** -+ * Talks to Xen Store to figure out what devices we have (backend half). -+ * -+ * Copyright (C) 2005 Rusty Russell, IBM Corporation -+ * Copyright (C) 2005 Mike Wray, Hewlett-Packard -+ * Copyright (C) 2005, 2006 XenSource Ltd -+ * Copyright (C) 2007 Solarflare Communications, Inc. -+ * -+ * This program is free software; you can redistribute it and/or -+ * modify it under the terms of the GNU General Public License version 2 -+ * as published by the Free Software Foundation; or, when distributed -+ * separately from the Linux kernel or incorporated into other -+ * software packages, subject to the following license: -+ * -+ * Permission is hereby granted, free of charge, to any person obtaining a copy -+ * of this source file (the "Software"), to deal in the Software without -+ * restriction, including without limitation the rights to use, copy, modify, -+ * merge, publish, distribute, sublicense, and/or sell copies of the Software, -+ * and to permit persons to whom the Software is furnished to do so, subject to -+ * the following conditions: -+ * -+ * The above copyright notice and this permission notice shall be included in -+ * all copies or substantial portions of the Software. -+ * -+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS -+ * IN THE SOFTWARE. -+ */ -+ -+#define DPRINTK(fmt, args...) \ -+ pr_debug("xenbus_probe (%s:%d) " fmt ".\n", \ -+ __func__, __LINE__, ##args) -+ -+#include <linux/kernel.h> -+#include <linux/err.h> -+#include <linux/string.h> -+#include <linux/ctype.h> -+#include <linux/fcntl.h> -+#include <linux/mm.h> -+#include <linux/notifier.h> -+ -+#include <asm/page.h> -+#include <asm/pgtable.h> -+#include <asm/xen/hypervisor.h> -+#include <asm/hypervisor.h> -+#include <xen/xenbus.h> -+#include <xen/features.h> -+ -+#include "xenbus_comms.h" -+#include "xenbus_probe.h" -+ -+/* backend/<type>/<fe-uuid>/<id> => <type>-<fe-domid>-<id> */ -+static int backend_bus_id(char bus_id[XEN_BUS_ID_SIZE], const char *nodename) -+{ -+ int domid, err; -+ const char *devid, *type, *frontend; -+ unsigned int typelen; -+ -+ type = strchr(nodename, '/'); -+ if (!type) -+ return -EINVAL; -+ type++; -+ typelen = strcspn(type, "/"); -+ if (!typelen || type[typelen] != '/') -+ return -EINVAL; -+ -+ devid = strrchr(nodename, '/') + 1; -+ -+ err = xenbus_gather(XBT_NIL, nodename, "frontend-id", "%i", &domid, -+ "frontend", NULL, &frontend, -+ NULL); -+ if (err) -+ return err; -+ if (strlen(frontend) == 0) -+ err = -ERANGE; -+ if (!err && !xenbus_exists(XBT_NIL, frontend, "")) -+ err = -ENOENT; -+ kfree(frontend); -+ -+ if (err) -+ return err; -+ -+ if (snprintf(bus_id, XEN_BUS_ID_SIZE, -+ "%.*s-%i-%s", typelen, type, domid, devid) >= XEN_BUS_ID_SIZE) -+ return -ENOSPC; -+ return 0; -+} -+ -+static int xenbus_uevent_backend(struct device *dev, -+ struct kobj_uevent_env *env) -+{ -+ struct xenbus_device *xdev; -+ struct xenbus_driver *drv; -+ struct xen_bus_type *bus; -+ -+ DPRINTK(""); -+ -+ if (dev == NULL) -+ return -ENODEV; -+ -+ xdev = to_xenbus_device(dev); -+ bus = container_of(xdev->dev.bus, struct xen_bus_type, bus); -+ if (xdev == NULL) -+ return -ENODEV; -+ -+ /* stuff we want to pass to /sbin/hotplug */ -+ if (add_uevent_var(env, "XENBUS_TYPE=%s", xdev->devicetype)) -+ return -ENOMEM; -+ -+ if (add_uevent_var(env, "XENBUS_PATH=%s", xdev->nodename)) -+ return -ENOMEM; -+ -+ if (add_uevent_var(env, "XENBUS_BASE_PATH=%s", bus->root)) -+ return -ENOMEM; -+ -+ if (dev->driver) { -+ drv = to_xenbus_driver(dev->driver); -+ if (drv && drv->uevent) -+ return drv->uevent(xdev, env); -+ } -+ -+ return 0; -+} -+ -+/* backend/<typename>/<frontend-uuid>/<name> */ -+static int xenbus_probe_backend_unit(struct xen_bus_type *bus, -+ const char *dir, -+ const char *type, -+ const char *name) -+{ -+ char *nodename; -+ int err; -+ -+ nodename = kasprintf(GFP_KERNEL, "%s/%s", dir, name); -+ if (!nodename) -+ return -ENOMEM; -+ -+ DPRINTK("%s\n", nodename); -+ -+ err = xenbus_probe_node(bus, type, nodename); -+ kfree(nodename); -+ return err; -+} -+ -+/* backend/<typename>/<frontend-domid> */ -+static int xenbus_probe_backend(struct xen_bus_type *bus, const char *type, const char *domid) -+{ -+ char *nodename; -+ int err = 0; -+ char **dir; -+ unsigned int i, dir_n = 0; -+ -+ DPRINTK(""); -+ -+ nodename = kasprintf(GFP_KERNEL, "%s/%s/%s", bus->root, type, domid); -+ if (!nodename) -+ return -ENOMEM; -+ -+ dir = xenbus_directory(XBT_NIL, nodename, "", &dir_n); -+ if (IS_ERR(dir)) { -+ kfree(nodename); -+ return PTR_ERR(dir); -+ } -+ -+ for (i = 0; i < dir_n; i++) { -+ err = xenbus_probe_backend_unit(bus, nodename, type, dir[i]); -+ if (err) -+ break; -+ } -+ kfree(dir); -+ kfree(nodename); -+ return err; -+} -+ -+static void frontend_changed(struct xenbus_watch *watch, -+ const char **vec, unsigned int len) -+{ -+ xenbus_otherend_changed(watch, vec, len, 0); -+} -+ -+static struct device_attribute xenbus_backend_dev_attrs[] = { -+ __ATTR_NULL -+}; -+ -+static struct xen_bus_type xenbus_backend = { -+ .root = "backend", -+ .levels = 3, /* backend/type/<frontend>/<id> */ -+ .get_bus_id = backend_bus_id, -+ .probe = xenbus_probe_backend, -+ .otherend_changed = frontend_changed, -+ .bus = { -+ .name = "xen-backend", -+ .match = xenbus_match, -+ .uevent = xenbus_uevent_backend, -+ .probe = xenbus_dev_probe, -+ .remove = xenbus_dev_remove, -+ .shutdown = xenbus_dev_shutdown, -+ .dev_attrs = xenbus_backend_dev_attrs, -+ }, -+}; -+ -+static void backend_changed(struct xenbus_watch *watch, -+ const char **vec, unsigned int len) -+{ -+ DPRINTK(""); -+ -+ xenbus_dev_changed(vec[XS_WATCH_PATH], &xenbus_backend); -+} -+ -+static struct xenbus_watch be_watch = { -+ .node = "backend", -+ .callback = backend_changed, -+}; -+ -+static int read_frontend_details(struct xenbus_device *xendev) -+{ -+ return xenbus_read_otherend_details(xendev, "frontend-id", "frontend"); -+} -+ -+//void xenbus_backend_suspend(int (*fn)(struct device *, void *)) -+//{ -+// DPRINTK(""); -+// bus_for_each_dev(&xenbus_backend.bus, NULL, NULL, fn); -+//} -+ -+//void xenbus_backend_resume(int (*fn)(struct device *, void *)) -+//{ -+// DPRINTK(""); -+// bus_for_each_dev(&xenbus_backend.bus, NULL, NULL, fn); -+//} -+ -+//int xenbus_for_each_backend(void *arg, int (*fn)(struct device *, void *)) -+//{ -+// return bus_for_each_dev(&xenbus_backend.bus, NULL, arg, fn); -+//} -+//EXPORT_SYMBOL_GPL(xenbus_for_each_backend); -+ -+int xenbus_dev_is_online(struct xenbus_device *dev) -+{ -+ int rc, val; -+ -+ rc = xenbus_scanf(XBT_NIL, dev->nodename, "online", "%d", &val); -+ if (rc != 1) -+ val = 0; /* no online node present */ -+ -+ return val; -+} -+EXPORT_SYMBOL_GPL(xenbus_dev_is_online); -+ -+int __xenbus_register_backend(struct xenbus_driver *drv, -+ struct module *owner, const char *mod_name) -+{ -+ drv->read_otherend_details = read_frontend_details; -+ -+ return xenbus_register_driver_common(drv, &xenbus_backend, -+ owner, mod_name); -+} -+EXPORT_SYMBOL_GPL(__xenbus_register_backend); -+ -+static int backend_probe_and_watch(struct notifier_block *notifier, -+ unsigned long event, -+ void *data) -+{ -+ /* Enumerate devices in xenstore and watch for changes. */ -+ xenbus_probe_devices(&xenbus_backend); -+ register_xenbus_watch(&be_watch); -+ -+ return NOTIFY_DONE; -+} -+ -+static int __init xenbus_probe_backend_init(void) -+{ -+ static struct notifier_block xenstore_notifier = { -+ .notifier_call = backend_probe_and_watch -+ }; -+ int err; -+ -+ DPRINTK(""); -+ -+ /* Register ourselves with the kernel bus subsystem */ -+ err = bus_register(&xenbus_backend.bus); -+ if (err) -+ return err; -+ -+ register_xenstore_notifier(&xenstore_notifier); -+ -+ return 0; -+} -+subsys_initcall(xenbus_probe_backend_init); -diff --git a/drivers/xen/xenbus/xenbus_probe_frontend.c b/drivers/xen/xenbus/xenbus_probe_frontend.c -new file mode 100644 -index 0000000..5413248 ---- /dev/null -+++ b/drivers/xen/xenbus/xenbus_probe_frontend.c -@@ -0,0 +1,292 @@ -+#define DPRINTK(fmt, args...) \ -+ pr_debug("xenbus_probe (%s:%d) " fmt ".\n", \ -+ __func__, __LINE__, ##args) -+ -+#include <linux/kernel.h> -+#include <linux/err.h> -+#include <linux/string.h> -+#include <linux/ctype.h> -+#include <linux/fcntl.h> -+#include <linux/mm.h> -+#include <linux/proc_fs.h> -+#include <linux/notifier.h> -+#include <linux/kthread.h> -+#include <linux/mutex.h> -+#include <linux/io.h> -+ -+#include <asm/page.h> -+#include <asm/pgtable.h> -+#include <asm/xen/hypervisor.h> -+#include <xen/xenbus.h> -+#include <xen/events.h> -+#include <xen/page.h> -+#include <xen/xen.h> -+#include <xen/platform_pci.h> -+ -+#include "xenbus_comms.h" -+#include "xenbus_probe.h" -+ -+/* device/<type>/<id> => <type>-<id> */ -+static int frontend_bus_id(char bus_id[XEN_BUS_ID_SIZE], const char *nodename) -+{ -+ nodename = strchr(nodename, '/'); -+ if (!nodename || strlen(nodename + 1) >= XEN_BUS_ID_SIZE) { -+ printk(KERN_WARNING "XENBUS: bad frontend %s\n", nodename); -+ return -EINVAL; -+ } -+ -+ strlcpy(bus_id, nodename + 1, XEN_BUS_ID_SIZE); -+ if (!strchr(bus_id, '/')) { -+ printk(KERN_WARNING "XENBUS: bus_id %s no slash\n", bus_id); -+ return -EINVAL; -+ } -+ *strchr(bus_id, '/') = '-'; -+ return 0; -+} -+ -+/* device/<typename>/<name> */ -+static int xenbus_probe_frontend(struct xen_bus_type *bus, const char *type, const char *name) -+{ -+ char *nodename; -+ int err; -+ -+ nodename = kasprintf(GFP_KERNEL, "%s/%s/%s", bus->root, type, name); -+ if (!nodename) -+ return -ENOMEM; -+ -+ DPRINTK("%s", nodename); -+ -+ err = xenbus_probe_node(bus, type, nodename); -+ kfree(nodename); -+ return err; -+} -+ -+static int xenbus_uevent_frontend(struct device *_dev, struct kobj_uevent_env *env) -+{ -+ struct xenbus_device *dev = to_xenbus_device(_dev); -+ -+ if (add_uevent_var(env, "MODALIAS=xen:%s", dev->devicetype)) -+ return -ENOMEM; -+ -+ return 0; -+} -+ -+ -+static void backend_changed(struct xenbus_watch *watch, -+ const char **vec, unsigned int len) -+{ -+ xenbus_otherend_changed(watch, vec, len, 1); -+} -+ -+static struct device_attribute xenbus_frontend_dev_attrs[] = { -+ __ATTR_NULL -+}; -+ -+ -+static struct xen_bus_type xenbus_frontend = { -+ .root = "device", -+ .levels = 2, /* device/type/<id> */ -+ .get_bus_id = frontend_bus_id, -+ .probe = xenbus_probe_frontend, -+ .otherend_changed = backend_changed, -+ .bus = { -+ .name = "xen", -+ .match = xenbus_match, -+ .uevent = xenbus_uevent_frontend, -+ .probe = xenbus_dev_probe, -+ .remove = xenbus_dev_remove, -+ .shutdown = xenbus_dev_shutdown, -+ .dev_attrs= xenbus_frontend_dev_attrs, -+ -+ .suspend = xenbus_dev_suspend, -+ .resume = xenbus_dev_resume, -+ }, -+}; -+ -+static void frontend_changed(struct xenbus_watch *watch, -+ const char **vec, unsigned int len) -+{ -+ DPRINTK(""); -+ -+ xenbus_dev_changed(vec[XS_WATCH_PATH], &xenbus_frontend); -+} -+ -+ -+/* We watch for devices appearing and vanishing. */ -+static struct xenbus_watch fe_watch = { -+ .node = "device", -+ .callback = frontend_changed, -+}; -+ -+static int read_backend_details(struct xenbus_device *xendev) -+{ -+ return xenbus_read_otherend_details(xendev, "backend-id", "backend"); -+} -+ -+static int is_device_connecting(struct device *dev, void *data) -+{ -+ struct xenbus_device *xendev = to_xenbus_device(dev); -+ struct device_driver *drv = data; -+ struct xenbus_driver *xendrv; -+ -+ /* -+ * A device with no driver will never connect. We care only about -+ * devices which should currently be in the process of connecting. -+ */ -+ if (!dev->driver) -+ return 0; -+ -+ /* Is this search limited to a particular driver? */ -+ if (drv && (dev->driver != drv)) -+ return 0; -+ -+ xendrv = to_xenbus_driver(dev->driver); -+ return (xendev->state < XenbusStateConnected || -+ (xendev->state == XenbusStateConnected && -+ xendrv->is_ready && !xendrv->is_ready(xendev))); -+} -+ -+static int exists_connecting_device(struct device_driver *drv) -+{ -+ return bus_for_each_dev(&xenbus_frontend.bus, NULL, drv, -+ is_device_connecting); -+} -+ -+static int print_device_status(struct device *dev, void *data) -+{ -+ struct xenbus_device *xendev = to_xenbus_device(dev); -+ struct device_driver *drv = data; -+ -+ /* Is this operation limited to a particular driver? */ -+ if (drv && (dev->driver != drv)) -+ return 0; -+ -+ if (!dev->driver) { -+ /* Information only: is this too noisy? */ -+ printk(KERN_INFO "XENBUS: Device with no driver: %s\n", -+ xendev->nodename); -+ } else if (xendev->state < XenbusStateConnected) { -+ enum xenbus_state rstate = XenbusStateUnknown; -+ if (xendev->otherend) -+ rstate = xenbus_read_driver_state(xendev->otherend); -+ printk(KERN_WARNING "XENBUS: Timeout connecting " -+ "to device: %s (local state %d, remote state %d)\n", -+ xendev->nodename, xendev->state, rstate); -+ } -+ -+ return 0; -+} -+ -+/* We only wait for device setup after most initcalls have run. */ -+static int ready_to_wait_for_devices; -+ -+/* -+ * On a 5-minute timeout, wait for all devices currently configured. We need -+ * to do this to guarantee that the filesystems and / or network devices -+ * needed for boot are available, before we can allow the boot to proceed. -+ * -+ * This needs to be on a late_initcall, to happen after the frontend device -+ * drivers have been initialised, but before the root fs is mounted. -+ * -+ * A possible improvement here would be to have the tools add a per-device -+ * flag to the store entry, indicating whether it is needed at boot time. -+ * This would allow people who knew what they were doing to accelerate their -+ * boot slightly, but of course needs tools or manual intervention to set up -+ * those flags correctly. -+ */ -+static void wait_for_devices(struct xenbus_driver *xendrv) -+{ -+ unsigned long start = jiffies; -+ struct device_driver *drv = xendrv ? &xendrv->driver : NULL; -+ unsigned int seconds_waited = 0; -+ -+ if (!ready_to_wait_for_devices || !xen_domain()) -+ return; -+ -+ while (exists_connecting_device(drv)) { -+ if (time_after(jiffies, start + (seconds_waited+5)*HZ)) { -+ if (!seconds_waited) -+ printk(KERN_WARNING "XENBUS: Waiting for " -+ "devices to initialise: "); -+ seconds_waited += 5; -+ printk("%us...", 300 - seconds_waited); -+ if (seconds_waited == 300) -+ break; -+ } -+ -+ schedule_timeout_interruptible(HZ/10); -+ } -+ -+ if (seconds_waited) -+ printk("\n"); -+ -+ bus_for_each_dev(&xenbus_frontend.bus, NULL, drv, -+ print_device_status); -+} -+ -+int __xenbus_register_frontend(struct xenbus_driver *drv, -+ struct module *owner, const char *mod_name) -+{ -+ int ret; -+ -+ drv->read_otherend_details = read_backend_details; -+ -+ ret = xenbus_register_driver_common(drv, &xenbus_frontend, -+ owner, mod_name); -+ if (ret) -+ return ret; -+ -+ /* If this driver is loaded as a module wait for devices to attach. */ -+ wait_for_devices(drv); -+ -+ return 0; -+} -+EXPORT_SYMBOL_GPL(__xenbus_register_frontend); -+ -+static int frontend_probe_and_watch(struct notifier_block *notifier, -+ unsigned long event, -+ void *data) -+{ -+ /* Enumerate devices in xenstore and watch for changes. */ -+ xenbus_probe_devices(&xenbus_frontend); -+ register_xenbus_watch(&fe_watch); -+ -+ return NOTIFY_DONE; -+} -+ -+ -+static int __init xenbus_probe_frontend_init(void) -+{ -+ static struct notifier_block xenstore_notifier = { -+ .notifier_call = frontend_probe_and_watch -+ }; -+ int err; -+ -+ DPRINTK(""); -+ -+ /* Register ourselves with the kernel bus subsystem */ -+ err = bus_register(&xenbus_frontend.bus); -+ if (err) -+ return err; -+ -+ register_xenstore_notifier(&xenstore_notifier); -+ -+ return 0; -+} -+subsys_initcall(xenbus_probe_frontend_init); -+ -+#ifndef MODULE -+static int __init boot_wait_for_devices(void) -+{ -+ if (xen_hvm_domain() && !xen_platform_pci_unplug) -+ return -ENODEV; -+ -+ ready_to_wait_for_devices = 1; -+ wait_for_devices(NULL); -+ return 0; -+} -+ -+late_initcall(boot_wait_for_devices); -+#endif -+ -+MODULE_LICENSE("GPL"); -diff --git a/drivers/xen/xenbus/xenbus_xs.c b/drivers/xen/xenbus/xenbus_xs.c -index 7b547f5..5534690 100644 ---- a/drivers/xen/xenbus/xenbus_xs.c -+++ b/drivers/xen/xenbus/xenbus_xs.c -@@ -76,6 +76,14 @@ struct xs_handle { - /* - * Mutex ordering: transaction_mutex -> watch_mutex -> request_mutex. - * response_mutex is never taken simultaneously with the other three. -+ * -+ * transaction_mutex must be held before incrementing -+ * transaction_count. The mutex is held when a suspend is in -+ * progress to prevent new transactions starting. -+ * -+ * When decrementing transaction_count to zero the wait queue -+ * should be woken up, the suspend code waits for count to -+ * reach zero. - */ - - /* One request at a time. */ -@@ -85,7 +93,9 @@ struct xs_handle { - struct mutex response_mutex; - - /* Protect transactions against save/restore. */ -- struct rw_semaphore transaction_mutex; -+ struct mutex transaction_mutex; -+ atomic_t transaction_count; -+ wait_queue_head_t transaction_wq; - - /* Protect watch (de)register against save/restore. */ - struct rw_semaphore watch_mutex; -@@ -157,6 +167,31 @@ static void *read_reply(enum xsd_sockmsg_type *type, unsigned int *len) - return body; - } - -+static void transaction_start(void) -+{ -+ mutex_lock(&xs_state.transaction_mutex); -+ atomic_inc(&xs_state.transaction_count); -+ mutex_unlock(&xs_state.transaction_mutex); -+} -+ -+static void transaction_end(void) -+{ -+ if (atomic_dec_and_test(&xs_state.transaction_count)) -+ wake_up(&xs_state.transaction_wq); -+} -+ -+static void transaction_suspend(void) -+{ -+ mutex_lock(&xs_state.transaction_mutex); -+ wait_event(xs_state.transaction_wq, -+ atomic_read(&xs_state.transaction_count) == 0); -+} -+ -+static void transaction_resume(void) -+{ -+ mutex_unlock(&xs_state.transaction_mutex); -+} -+ - void *xenbus_dev_request_and_reply(struct xsd_sockmsg *msg) - { - void *ret; -@@ -164,7 +199,7 @@ void *xenbus_dev_request_and_reply(struct xsd_sockmsg *msg) - int err; - - if (req_msg.type == XS_TRANSACTION_START) -- down_read(&xs_state.transaction_mutex); -+ transaction_start(); - - mutex_lock(&xs_state.request_mutex); - -@@ -180,7 +215,7 @@ void *xenbus_dev_request_and_reply(struct xsd_sockmsg *msg) - if ((msg->type == XS_TRANSACTION_END) || - ((req_msg.type == XS_TRANSACTION_START) && - (msg->type == XS_ERROR))) -- up_read(&xs_state.transaction_mutex); -+ transaction_end(); - - return ret; - } -@@ -432,11 +467,11 @@ int xenbus_transaction_start(struct xenbus_transaction *t) - { - char *id_str; - -- down_read(&xs_state.transaction_mutex); -+ transaction_start(); - - id_str = xs_single(XBT_NIL, XS_TRANSACTION_START, "", NULL); - if (IS_ERR(id_str)) { -- up_read(&xs_state.transaction_mutex); -+ transaction_end(); - return PTR_ERR(id_str); - } - -@@ -461,7 +496,7 @@ int xenbus_transaction_end(struct xenbus_transaction t, int abort) - - err = xs_error(xs_single(t, XS_TRANSACTION_END, abortstr, NULL)); - -- up_read(&xs_state.transaction_mutex); -+ transaction_end(); - - return err; - } -@@ -662,7 +697,7 @@ EXPORT_SYMBOL_GPL(unregister_xenbus_watch); - - void xs_suspend(void) - { -- down_write(&xs_state.transaction_mutex); -+ transaction_suspend(); - down_write(&xs_state.watch_mutex); - mutex_lock(&xs_state.request_mutex); - mutex_lock(&xs_state.response_mutex); -@@ -677,7 +712,7 @@ void xs_resume(void) - - mutex_unlock(&xs_state.response_mutex); - mutex_unlock(&xs_state.request_mutex); -- up_write(&xs_state.transaction_mutex); -+ transaction_resume(); - - /* No need for watches_lock: the watch_mutex is sufficient. */ - list_for_each_entry(watch, &watches, list) { -@@ -693,7 +728,7 @@ void xs_suspend_cancel(void) - mutex_unlock(&xs_state.response_mutex); - mutex_unlock(&xs_state.request_mutex); - up_write(&xs_state.watch_mutex); -- up_write(&xs_state.transaction_mutex); -+ mutex_unlock(&xs_state.transaction_mutex); - } - - static int xenwatch_thread(void *unused) -@@ -843,8 +878,10 @@ int xs_init(void) - - mutex_init(&xs_state.request_mutex); - mutex_init(&xs_state.response_mutex); -- init_rwsem(&xs_state.transaction_mutex); -+ mutex_init(&xs_state.transaction_mutex); - init_rwsem(&xs_state.watch_mutex); -+ atomic_set(&xs_state.transaction_count, 0); -+ init_waitqueue_head(&xs_state.transaction_wq); - - /* Initialize the shared memory rings to talk to xenstored */ - err = xb_init_comms(); -diff --git a/drivers/xen/xenfs/Makefile b/drivers/xen/xenfs/Makefile -index 25275c3..4fde944 100644 ---- a/drivers/xen/xenfs/Makefile -+++ b/drivers/xen/xenfs/Makefile -@@ -1,3 +1,4 @@ - obj-$(CONFIG_XENFS) += xenfs.o - --xenfs-objs = super.o xenbus.o -\ No newline at end of file -+xenfs-y = super.o xenbus.o privcmd.o -+xenfs-$(CONFIG_XEN_DOM0) += xenstored.o -diff --git a/drivers/xen/xenfs/privcmd.c b/drivers/xen/xenfs/privcmd.c -new file mode 100644 -index 0000000..f80be7f ---- /dev/null -+++ b/drivers/xen/xenfs/privcmd.c -@@ -0,0 +1,404 @@ -+/****************************************************************************** -+ * privcmd.c -+ * -+ * Interface to privileged domain-0 commands. -+ * -+ * Copyright (c) 2002-2004, K A Fraser, B Dragovic -+ */ -+ -+#include <linux/kernel.h> -+#include <linux/sched.h> -+#include <linux/slab.h> -+#include <linux/string.h> -+#include <linux/errno.h> -+#include <linux/mm.h> -+#include <linux/mman.h> -+#include <linux/uaccess.h> -+#include <linux/swap.h> -+#include <linux/smp_lock.h> -+#include <linux/highmem.h> -+#include <linux/pagemap.h> -+#include <linux/seq_file.h> -+ -+#include <asm/pgalloc.h> -+#include <asm/pgtable.h> -+#include <asm/tlb.h> -+#include <asm/xen/hypervisor.h> -+#include <asm/xen/hypercall.h> -+ -+#include <xen/xen.h> -+#include <xen/privcmd.h> -+#include <xen/interface/xen.h> -+#include <xen/features.h> -+#include <xen/page.h> -+#include <xen/xen-ops.h> -+ -+#ifndef HAVE_ARCH_PRIVCMD_MMAP -+static int privcmd_enforce_singleshot_mapping(struct vm_area_struct *vma); -+#endif -+ -+static long privcmd_ioctl_hypercall(void __user *udata) -+{ -+ struct privcmd_hypercall hypercall; -+ long ret; -+ -+ if (copy_from_user(&hypercall, udata, sizeof(hypercall))) -+ return -EFAULT; -+ -+ ret = privcmd_call(hypercall.op, -+ hypercall.arg[0], hypercall.arg[1], -+ hypercall.arg[2], hypercall.arg[3], -+ hypercall.arg[4]); -+ -+ return ret; -+} -+ -+static void free_page_list(struct list_head *pages) -+{ -+ struct page *p, *n; -+ -+ list_for_each_entry_safe(p, n, pages, lru) -+ __free_page(p); -+ -+ INIT_LIST_HEAD(pages); -+} -+ -+/* -+ * Given an array of items in userspace, return a list of pages -+ * containing the data. If copying fails, either because of memory -+ * allocation failure or a problem reading user memory, return an -+ * error code; its up to the caller to dispose of any partial list. -+ */ -+static int gather_array(struct list_head *pagelist, -+ unsigned nelem, size_t size, -+ void __user *data) -+{ -+ unsigned pageidx; -+ void *pagedata; -+ int ret; -+ -+ if (size > PAGE_SIZE) -+ return 0; -+ -+ pageidx = PAGE_SIZE; -+ pagedata = NULL; /* quiet, gcc */ -+ while (nelem--) { -+ if (pageidx > PAGE_SIZE-size) { -+ struct page *page = alloc_page(GFP_KERNEL); -+ -+ ret = -ENOMEM; -+ if (page == NULL) -+ goto fail; -+ -+ pagedata = page_address(page); -+ -+ list_add_tail(&page->lru, pagelist); -+ pageidx = 0; -+ } -+ -+ ret = -EFAULT; -+ if (copy_from_user(pagedata + pageidx, data, size)) -+ goto fail; -+ -+ data += size; -+ pageidx += size; -+ } -+ -+ ret = 0; -+ -+fail: -+ return ret; -+} -+ -+/* -+ * Call function "fn" on each element of the array fragmented -+ * over a list of pages. -+ */ -+static int traverse_pages(unsigned nelem, size_t size, -+ struct list_head *pos, -+ int (*fn)(void *data, void *state), -+ void *state) -+{ -+ void *pagedata; -+ unsigned pageidx; -+ int ret = 0; -+ -+ BUG_ON(size > PAGE_SIZE); -+ -+ pageidx = PAGE_SIZE; -+ pagedata = NULL; /* hush, gcc */ -+ -+ while (nelem--) { -+ if (pageidx > PAGE_SIZE-size) { -+ struct page *page; -+ pos = pos->next; -+ page = list_entry(pos, struct page, lru); -+ pagedata = page_address(page); -+ pageidx = 0; -+ } -+ -+ ret = (*fn)(pagedata + pageidx, state); -+ if (ret) -+ break; -+ pageidx += size; -+ } -+ -+ return ret; -+} -+ -+struct mmap_mfn_state { -+ unsigned long va; -+ struct vm_area_struct *vma; -+ domid_t domain; -+}; -+ -+static int mmap_mfn_range(void *data, void *state) -+{ -+ struct privcmd_mmap_entry *msg = data; -+ struct mmap_mfn_state *st = state; -+ struct vm_area_struct *vma = st->vma; -+ int rc; -+ -+ /* Do not allow range to wrap the address space. */ -+ if ((msg->npages > (LONG_MAX >> PAGE_SHIFT)) || -+ ((unsigned long)(msg->npages << PAGE_SHIFT) >= -st->va)) -+ return -EINVAL; -+ -+ /* Range chunks must be contiguous in va space. */ -+ if ((msg->va != st->va) || -+ ((msg->va+(msg->npages<<PAGE_SHIFT)) > vma->vm_end)) -+ return -EINVAL; -+ -+ rc = xen_remap_domain_mfn_range(vma, -+ msg->va & PAGE_MASK, -+ msg->mfn, msg->npages, -+ vma->vm_page_prot, -+ st->domain); -+ if (rc < 0) -+ return rc; -+ -+ st->va += msg->npages << PAGE_SHIFT; -+ -+ return 0; -+} -+ -+static long privcmd_ioctl_mmap(void __user *udata) -+{ -+ struct privcmd_mmap mmapcmd; -+ struct mm_struct *mm = current->mm; -+ struct vm_area_struct *vma; -+ int rc; -+ LIST_HEAD(pagelist); -+ struct mmap_mfn_state state; -+ -+ if (!xen_initial_domain()) -+ return -EPERM; -+ -+ if (copy_from_user(&mmapcmd, udata, sizeof(mmapcmd))) -+ return -EFAULT; -+ -+ rc = gather_array(&pagelist, -+ mmapcmd.num, sizeof(struct privcmd_mmap_entry), -+ mmapcmd.entry); -+ -+ if (rc || list_empty(&pagelist)) -+ goto out; -+ -+ down_write(&mm->mmap_sem); -+ -+ { -+ struct page *page = list_first_entry(&pagelist, -+ struct page, lru); -+ struct privcmd_mmap_entry *msg = page_address(page); -+ -+ vma = find_vma(mm, msg->va); -+ rc = -EINVAL; -+ -+ if (!vma || (msg->va != vma->vm_start) || -+ !privcmd_enforce_singleshot_mapping(vma)) -+ goto out_up; -+ } -+ -+ state.va = vma->vm_start; -+ state.vma = vma; -+ state.domain = mmapcmd.dom; -+ -+ rc = traverse_pages(mmapcmd.num, sizeof(struct privcmd_mmap_entry), -+ &pagelist, -+ mmap_mfn_range, &state); -+ -+ -+out_up: -+ up_write(&mm->mmap_sem); -+ -+out: -+ free_page_list(&pagelist); -+ -+ return rc; -+} -+ -+struct mmap_batch_state { -+ domid_t domain; -+ unsigned long va; -+ struct vm_area_struct *vma; -+ int err; -+ -+ xen_pfn_t __user *user; -+}; -+ -+static int mmap_batch_fn(void *data, void *state) -+{ -+ xen_pfn_t *mfnp = data; -+ struct mmap_batch_state *st = state; -+ -+ if (xen_remap_domain_mfn_range(st->vma, st->va & PAGE_MASK, *mfnp, 1, -+ st->vma->vm_page_prot, st->domain) < 0) { -+ *mfnp |= 0xf0000000U; -+ st->err++; -+ } -+ st->va += PAGE_SIZE; -+ -+ return 0; -+} -+ -+static int mmap_return_errors(void *data, void *state) -+{ -+ xen_pfn_t *mfnp = data; -+ struct mmap_batch_state *st = state; -+ -+ put_user(*mfnp, st->user++); -+ -+ return 0; -+} -+ -+static struct vm_operations_struct privcmd_vm_ops; -+ -+static long privcmd_ioctl_mmap_batch(void __user *udata) -+{ -+ int ret; -+ struct privcmd_mmapbatch m; -+ struct mm_struct *mm = current->mm; -+ struct vm_area_struct *vma; -+ unsigned long nr_pages; -+ LIST_HEAD(pagelist); -+ struct mmap_batch_state state; -+ -+ if (!xen_initial_domain()) -+ return -EPERM; -+ -+ if (copy_from_user(&m, udata, sizeof(m))) -+ return -EFAULT; -+ -+ nr_pages = m.num; -+ if ((m.num <= 0) || (nr_pages > (LONG_MAX >> PAGE_SHIFT))) -+ return -EINVAL; -+ -+ ret = gather_array(&pagelist, m.num, sizeof(xen_pfn_t), -+ m.arr); -+ -+ if (ret || list_empty(&pagelist)) -+ goto out; -+ -+ down_write(&mm->mmap_sem); -+ -+ vma = find_vma(mm, m.addr); -+ ret = -EINVAL; -+ if (!vma || -+ vma->vm_ops != &privcmd_vm_ops || -+ (m.addr != vma->vm_start) || -+ ((m.addr + (nr_pages << PAGE_SHIFT)) != vma->vm_end) || -+ !privcmd_enforce_singleshot_mapping(vma)) { -+ up_write(&mm->mmap_sem); -+ goto out; -+ } -+ -+ state.domain = m.dom; -+ state.vma = vma; -+ state.va = m.addr; -+ state.err = 0; -+ -+ ret = traverse_pages(m.num, sizeof(xen_pfn_t), -+ &pagelist, mmap_batch_fn, &state); -+ -+ up_write(&mm->mmap_sem); -+ -+ if (state.err > 0) { -+ ret = 0; -+ -+ state.user = m.arr; -+ traverse_pages(m.num, sizeof(xen_pfn_t), -+ &pagelist, -+ mmap_return_errors, &state); -+ } -+ -+out: -+ free_page_list(&pagelist); -+ -+ return ret; -+} -+ -+static long privcmd_ioctl(struct file *file, -+ unsigned int cmd, unsigned long data) -+{ -+ int ret = -ENOSYS; -+ void __user *udata = (void __user *) data; -+ -+ switch (cmd) { -+ case IOCTL_PRIVCMD_HYPERCALL: -+ ret = privcmd_ioctl_hypercall(udata); -+ break; -+ -+ case IOCTL_PRIVCMD_MMAP: -+ ret = privcmd_ioctl_mmap(udata); -+ break; -+ -+ case IOCTL_PRIVCMD_MMAPBATCH: -+ ret = privcmd_ioctl_mmap_batch(udata); -+ break; -+ -+ default: -+ ret = -EINVAL; -+ break; -+ } -+ -+ return ret; -+} -+ -+#ifndef HAVE_ARCH_PRIVCMD_MMAP -+static int privcmd_fault(struct vm_area_struct *vma, struct vm_fault *vmf) -+{ -+ printk(KERN_DEBUG "privcmd_fault: vma=%p %lx-%lx, pgoff=%lx, uv=%p\n", -+ vma, vma->vm_start, vma->vm_end, -+ vmf->pgoff, vmf->virtual_address); -+ -+ return VM_FAULT_SIGBUS; -+} -+ -+static struct vm_operations_struct privcmd_vm_ops = { -+ .fault = privcmd_fault -+}; -+ -+static int privcmd_mmap(struct file *file, struct vm_area_struct *vma) -+{ -+ /* Unsupported for auto-translate guests. */ -+ if (xen_feature(XENFEAT_auto_translated_physmap)) -+ return -ENOSYS; -+ -+ /* DONTCOPY is essential for Xen as copy_page_range is broken. */ -+ vma->vm_flags |= VM_RESERVED | VM_IO | VM_DONTCOPY; -+ vma->vm_ops = &privcmd_vm_ops; -+ vma->vm_private_data = NULL; -+ -+ return 0; -+} -+ -+static int privcmd_enforce_singleshot_mapping(struct vm_area_struct *vma) -+{ -+ return (xchg(&vma->vm_private_data, (void *)1) == NULL); -+} -+#endif -+ -+const struct file_operations privcmd_file_ops = { -+ .unlocked_ioctl = privcmd_ioctl, -+ .mmap = privcmd_mmap, -+}; -diff --git a/drivers/xen/xenfs/super.c b/drivers/xen/xenfs/super.c -index 6559e0c..984891e 100644 ---- a/drivers/xen/xenfs/super.c -+++ b/drivers/xen/xenfs/super.c -@@ -12,6 +12,10 @@ - #include <linux/module.h> - #include <linux/fs.h> - #include <linux/magic.h> -+#include <linux/mm.h> -+#include <linux/backing-dev.h> -+ -+#include <xen/xen.h> - - #include "xenfs.h" - -@@ -20,6 +24,62 @@ - MODULE_DESCRIPTION("Xen filesystem"); - MODULE_LICENSE("GPL"); - -+static int xenfs_set_page_dirty(struct page *page) -+{ -+ return !TestSetPageDirty(page); -+} -+ -+static const struct address_space_operations xenfs_aops = { -+ .set_page_dirty = xenfs_set_page_dirty, -+}; -+ -+static struct backing_dev_info xenfs_backing_dev_info = { -+ .ra_pages = 0, /* No readahead */ -+ .capabilities = BDI_CAP_NO_ACCT_AND_WRITEBACK, -+}; -+ -+static struct inode *xenfs_make_inode(struct super_block *sb, int mode) -+{ -+ struct inode *ret = new_inode(sb); -+ -+ if (ret) { -+ ret->i_mode = mode; -+ ret->i_mapping->a_ops = &xenfs_aops; -+ ret->i_mapping->backing_dev_info = &xenfs_backing_dev_info; -+ ret->i_uid = ret->i_gid = 0; -+ ret->i_blocks = 0; -+ ret->i_atime = ret->i_mtime = ret->i_ctime = CURRENT_TIME; -+ } -+ return ret; -+} -+ -+static struct dentry *xenfs_create_file(struct super_block *sb, -+ struct dentry *parent, -+ const char *name, -+ const struct file_operations *fops, -+ void *data, -+ int mode) -+{ -+ struct dentry *dentry; -+ struct inode *inode; -+ -+ dentry = d_alloc_name(parent, name); -+ if (!dentry) -+ return NULL; -+ -+ inode = xenfs_make_inode(sb, S_IFREG | mode); -+ if (!inode) { -+ dput(dentry); -+ return NULL; -+ } -+ -+ inode->i_fop = fops; -+ inode->i_private = data; -+ -+ d_add(dentry, inode); -+ return dentry; -+} -+ - static ssize_t capabilities_read(struct file *file, char __user *buf, - size_t size, loff_t *off) - { -@@ -41,10 +101,23 @@ static int xenfs_fill_super(struct super_block *sb, void *data, int silent) - [1] = {}, - { "xenbus", &xenbus_file_ops, S_IRUSR|S_IWUSR }, - { "capabilities", &capabilities_file_ops, S_IRUGO }, -+ { "privcmd", &privcmd_file_ops, S_IRUSR|S_IWUSR }, - {""}, - }; -+ int rc; -+ -+ rc = simple_fill_super(sb, XENFS_SUPER_MAGIC, xenfs_files); -+ if (rc < 0) -+ return rc; -+ -+ if (xen_initial_domain()) { -+ xenfs_create_file(sb, sb->s_root, "xsd_kva", -+ &xsd_kva_file_ops, NULL, S_IRUSR|S_IWUSR); -+ xenfs_create_file(sb, sb->s_root, "xsd_port", -+ &xsd_port_file_ops, NULL, S_IRUSR|S_IWUSR); -+ } - -- return simple_fill_super(sb, XENFS_SUPER_MAGIC, xenfs_files); -+ return rc; - } - - static int xenfs_get_sb(struct file_system_type *fs_type, -@@ -63,16 +136,30 @@ static struct file_system_type xenfs_type = { - - static int __init xenfs_init(void) - { -- if (xen_pv_domain()) -- return register_filesystem(&xenfs_type); -+ int err; -+ if (!xen_domain()) { -+ printk(KERN_INFO "xenfs: not registering filesystem on non-xen platform\n"); -+ return 0; -+ } -+ -+ err = register_filesystem(&xenfs_type); -+ if (err) { -+ printk(KERN_ERR "xenfs: Unable to register filesystem!\n"); -+ goto out; -+ } -+ -+ err = bdi_init(&xenfs_backing_dev_info); -+ if (err) -+ unregister_filesystem(&xenfs_type); -+ -+ out: - -- printk(KERN_INFO "XENFS: not registering filesystem on non-xen platform\n"); -- return 0; -+ return err; - } - - static void __exit xenfs_exit(void) - { -- if (xen_pv_domain()) -+ if (xen_domain()) - unregister_filesystem(&xenfs_type); - } - -diff --git a/drivers/xen/xenfs/xenbus.c b/drivers/xen/xenfs/xenbus.c -index 6c4269b..0ddef43 100644 ---- a/drivers/xen/xenfs/xenbus.c -+++ b/drivers/xen/xenfs/xenbus.c -@@ -121,8 +121,12 @@ static ssize_t xenbus_file_read(struct file *filp, - int ret; - - mutex_lock(&u->reply_mutex); -+again: - while (list_empty(&u->read_buffers)) { - mutex_unlock(&u->reply_mutex); -+ if (filp->f_flags & O_NONBLOCK) -+ return -EAGAIN; -+ - ret = wait_event_interruptible(u->read_waitq, - !list_empty(&u->read_buffers)); - if (ret) -@@ -140,7 +144,7 @@ static ssize_t xenbus_file_read(struct file *filp, - i += sz - ret; - rb->cons += sz - ret; - -- if (ret != sz) { -+ if (ret != 0) { - if (i == 0) - i = -EFAULT; - goto out; -@@ -156,6 +160,8 @@ static ssize_t xenbus_file_read(struct file *filp, - struct read_buffer, list); - } - } -+ if (i == 0) -+ goto again; - - out: - mutex_unlock(&u->reply_mutex); -@@ -403,6 +409,7 @@ static int xenbus_write_watch(unsigned msg_type, struct xenbus_file_priv *u) - - mutex_lock(&u->reply_mutex); - rc = queue_reply(&u->read_buffers, &reply, sizeof(reply)); -+ wake_up(&u->read_waitq); - mutex_unlock(&u->reply_mutex); - } - -@@ -451,7 +458,7 @@ static ssize_t xenbus_file_write(struct file *filp, - - ret = copy_from_user(u->u.buffer + u->len, ubuf, len); - -- if (ret == len) { -+ if (ret != 0) { - rc = -EFAULT; - goto out; - } -@@ -484,21 +491,6 @@ static ssize_t xenbus_file_write(struct file *filp, - msg_type = u->u.msg.type; - - switch (msg_type) { -- case XS_TRANSACTION_START: -- case XS_TRANSACTION_END: -- case XS_DIRECTORY: -- case XS_READ: -- case XS_GET_PERMS: -- case XS_RELEASE: -- case XS_GET_DOMAIN_PATH: -- case XS_WRITE: -- case XS_MKDIR: -- case XS_RM: -- case XS_SET_PERMS: -- /* Send out a transaction */ -- ret = xenbus_write_transaction(msg_type, u); -- break; -- - case XS_WATCH: - case XS_UNWATCH: - /* (Un)Ask for some path to be watched for changes */ -@@ -506,7 +498,8 @@ static ssize_t xenbus_file_write(struct file *filp, - break; - - default: -- ret = -EINVAL; -+ /* Send out a transaction */ -+ ret = xenbus_write_transaction(msg_type, u); - break; - } - if (ret != 0) -diff --git a/drivers/xen/xenfs/xenfs.h b/drivers/xen/xenfs/xenfs.h -index 51f08b2..b68aa62 100644 ---- a/drivers/xen/xenfs/xenfs.h -+++ b/drivers/xen/xenfs/xenfs.h -@@ -2,5 +2,8 @@ - #define _XENFS_XENBUS_H - - extern const struct file_operations xenbus_file_ops; -+extern const struct file_operations privcmd_file_ops; -+extern const struct file_operations xsd_kva_file_ops; -+extern const struct file_operations xsd_port_file_ops; - - #endif /* _XENFS_XENBUS_H */ -diff --git a/drivers/xen/xenfs/xenstored.c b/drivers/xen/xenfs/xenstored.c -new file mode 100644 -index 0000000..af10804 ---- /dev/null -+++ b/drivers/xen/xenfs/xenstored.c -@@ -0,0 +1,67 @@ -+#include <linux/types.h> -+#include <linux/mm.h> -+#include <linux/fs.h> -+ -+#include <xen/page.h> -+ -+#include "xenfs.h" -+#include "../xenbus/xenbus_comms.h" -+ -+static ssize_t xsd_read(struct file *file, char __user *buf, -+ size_t size, loff_t *off) -+{ -+ const char *str = (const char *)file->private_data; -+ return simple_read_from_buffer(buf, size, off, str, strlen(str)); -+} -+ -+static int xsd_release(struct inode *inode, struct file *file) -+{ -+ kfree(file->private_data); -+ return 0; -+} -+ -+static int xsd_kva_open(struct inode *inode, struct file *file) -+{ -+ file->private_data = (void *)kasprintf(GFP_KERNEL, "0x%p", -+ xen_store_interface); -+ if (!file->private_data) -+ return -ENOMEM; -+ return 0; -+} -+ -+static int xsd_kva_mmap(struct file *file, struct vm_area_struct *vma) -+{ -+ size_t size = vma->vm_end - vma->vm_start; -+ -+ if ((size > PAGE_SIZE) || (vma->vm_pgoff != 0)) -+ return -EINVAL; -+ -+ if (remap_pfn_range(vma, vma->vm_start, -+ virt_to_pfn(xen_store_interface), -+ size, vma->vm_page_prot)) -+ return -EAGAIN; -+ -+ return 0; -+} -+ -+const struct file_operations xsd_kva_file_ops = { -+ .open = xsd_kva_open, -+ .mmap = xsd_kva_mmap, -+ .read = xsd_read, -+ .release = xsd_release, -+}; -+ -+static int xsd_port_open(struct inode *inode, struct file *file) -+{ -+ file->private_data = (void *)kasprintf(GFP_KERNEL, "%d", -+ xen_store_evtchn); -+ if (!file->private_data) -+ return -ENOMEM; -+ return 0; -+} -+ -+const struct file_operations xsd_port_file_ops = { -+ .open = xsd_port_open, -+ .read = xsd_read, -+ .release = xsd_release, -+}; -diff --git a/include/acpi/acpi_drivers.h b/include/acpi/acpi_drivers.h -index f4906f6..e7233e8 100644 ---- a/include/acpi/acpi_drivers.h -+++ b/include/acpi/acpi_drivers.h -@@ -154,4 +154,25 @@ static inline void unregister_hotplug_dock_device(acpi_handle handle) - } - #endif - -+/*-------------------------------------------------------------------------- -+ Memory -+ -------------------------------------------------------------------------- */ -+#if defined(CONFIG_ACPI_HOTPLUG_MEMORY) || \ -+ defined(CONFIG_ACPI_HOTPLUG_MEMORY_MODULE) -+struct acpi_memory_info { -+ struct list_head list; -+ u64 start_addr; /* Memory Range start physical addr */ -+ u64 length; /* Memory Range length */ -+ unsigned short caching; /* memory cache attribute */ -+ unsigned short write_protect; /* memory read/write attribute */ -+ unsigned int enabled:1; -+}; -+ -+struct acpi_memory_device { -+ struct acpi_device *device; -+ unsigned int state; /* State of the memory device */ -+ struct list_head res_list; -+}; -+#endif -+ - #endif /*__ACPI_DRIVERS_H__*/ -diff --git a/include/acpi/processor.h b/include/acpi/processor.h -index e7bdaaf..6aa3111 100644 ---- a/include/acpi/processor.h -+++ b/include/acpi/processor.h -@@ -239,6 +239,25 @@ struct acpi_processor_errata { - } piix4; - }; - -+extern int acpi_processor_errata(struct acpi_processor *pr); -+#ifdef CONFIG_ACPI_PROCFS -+extern int acpi_processor_add_fs(struct acpi_device *device); -+extern int acpi_processor_remove_fs(struct acpi_device *device); -+#else -+static inline int acpi_processor_add_fs(struct acpi_device *device) -+{ -+ return 0; -+} -+ -+static inline int acpi_processor_remove_fs(struct acpi_device *device) -+{ -+ return 0; -+} -+#endif -+extern int acpi_processor_set_pdc(struct acpi_processor *pr); -+extern int acpi_processor_remove(struct acpi_device *device, int type); -+extern void acpi_processor_notify(struct acpi_device *device, u32 event); -+ - extern int acpi_processor_preregister_performance(struct - acpi_processor_performance - *performance); -@@ -296,6 +315,8 @@ static inline void acpi_processor_ffh_cstate_enter(struct acpi_processor_cx - void acpi_processor_ppc_init(void); - void acpi_processor_ppc_exit(void); - int acpi_processor_ppc_has_changed(struct acpi_processor *pr); -+int acpi_processor_get_performance_info(struct acpi_processor *pr); -+int acpi_processor_get_psd(struct acpi_processor *pr); - #else - static inline void acpi_processor_ppc_init(void) - { -@@ -332,6 +353,7 @@ int acpi_processor_power_init(struct acpi_processor *pr, - int acpi_processor_cst_has_changed(struct acpi_processor *pr); - int acpi_processor_power_exit(struct acpi_processor *pr, - struct acpi_device *device); -+int acpi_processor_get_power_info(struct acpi_processor *pr); - int acpi_processor_suspend(struct acpi_device * device, pm_message_t state); - int acpi_processor_resume(struct acpi_device * device); - extern struct cpuidle_driver acpi_idle_driver; -diff --git a/include/asm-generic/pci.h b/include/asm-generic/pci.h -index 26373cf..9fb4270 100644 ---- a/include/asm-generic/pci.h -+++ b/include/asm-generic/pci.h -@@ -43,6 +43,8 @@ pcibios_select_root(struct pci_dev *pdev, struct resource *res) - return root; - } - -+#ifndef HAVE_ARCH_PCIBIOS_SCAN_ALL_FNS -+#endif - #ifndef HAVE_ARCH_PCI_GET_LEGACY_IDE_IRQ - static inline int pci_get_legacy_ide_irq(struct pci_dev *dev, int channel) - { -diff --git a/include/drm/drmP.h b/include/drm/drmP.h -index 7ad3faa..cf9ddce 100644 ---- a/include/drm/drmP.h -+++ b/include/drm/drmP.h -@@ -1388,7 +1388,7 @@ extern int drm_vma_info(struct seq_file *m, void *data); - #endif - - /* Scatter Gather Support (drm_scatter.h) */ --extern void drm_sg_cleanup(struct drm_sg_mem * entry); -+extern void drm_sg_cleanup(struct drm_device *dev, struct drm_sg_mem * entry); - extern int drm_sg_alloc_ioctl(struct drm_device *dev, void *data, - struct drm_file *file_priv); - extern int drm_sg_alloc(struct drm_device *dev, struct drm_scatter_gather * request); -diff --git a/include/linux/bootmem.h b/include/linux/bootmem.h -index dd97fb8..b10ec49 100644 ---- a/include/linux/bootmem.h -+++ b/include/linux/bootmem.h -@@ -53,6 +53,7 @@ extern void free_bootmem_node(pg_data_t *pgdat, - unsigned long addr, - unsigned long size); - extern void free_bootmem(unsigned long addr, unsigned long size); -+extern void free_bootmem_late(unsigned long addr, unsigned long size); - - /* - * Flags for reserve_bootmem (also if CONFIG_HAVE_ARCH_BOOTMEM_NODE, -diff --git a/include/linux/dmar.h b/include/linux/dmar.h -index 4a2b162..5de4c9e 100644 ---- a/include/linux/dmar.h -+++ b/include/linux/dmar.h -@@ -208,16 +208,9 @@ struct dmar_atsr_unit { - u8 include_all:1; /* include all ports */ - }; - --/* Intel DMAR initialization functions */ - extern int intel_iommu_init(void); --#else --static inline int intel_iommu_init(void) --{ --#ifdef CONFIG_INTR_REMAP -- return dmar_dev_scope_init(); --#else -- return -ENODEV; --#endif --} --#endif /* !CONFIG_DMAR */ -+#else /* !CONFIG_DMAR: */ -+static inline int intel_iommu_init(void) { return -ENODEV; } -+#endif /* CONFIG_DMAR */ -+ - #endif /* __DMAR_H__ */ -diff --git a/include/linux/fb.h b/include/linux/fb.h -index 862e7d4..74d67ca 100644 ---- a/include/linux/fb.h -+++ b/include/linux/fb.h -@@ -763,6 +763,7 @@ struct fb_tile_ops { - * takes over; acceleration engine should be in a quiescent state */ - - /* hints */ -+#define FBINFO_VIRTFB 0x0004 /* FB is System RAM, not device. */ - #define FBINFO_PARTIAL_PAN_OK 0x0040 /* otw use pan only for double-buffering */ - #define FBINFO_READS_FAST 0x0080 /* soft-copy faster than rendering */ - -diff --git a/include/linux/if_link.h b/include/linux/if_link.h -index 176c518..d681cc9 100644 ---- a/include/linux/if_link.h -+++ b/include/linux/if_link.h -@@ -81,6 +81,8 @@ enum - #define IFLA_LINKINFO IFLA_LINKINFO - IFLA_NET_NS_PID, - IFLA_IFALIAS, -+ IFLA_NUM_VF, /* Number of VFs if device is SR-IOV PF */ -+ IFLA_VFINFO_LIST, - __IFLA_MAX - }; - -@@ -190,4 +192,47 @@ struct ifla_vlan_qos_mapping - __u32 to; - }; - -+/* SR-IOV virtual function managment section */ -+ -+enum { -+ IFLA_VF_INFO_UNSPEC, -+ IFLA_VF_INFO, -+ __IFLA_VF_INFO_MAX, -+}; -+ -+#define IFLA_VF_INFO_MAX (__IFLA_VF_INFO_MAX - 1) -+ -+enum { -+ IFLA_VF_UNSPEC, -+ IFLA_VF_MAC, /* Hardware queue specific attributes */ -+ IFLA_VF_VLAN, -+ IFLA_VF_TX_RATE, /* TX Bandwidth Allocation */ -+ __IFLA_VF_MAX, -+}; -+ -+#define IFLA_VF_MAX (__IFLA_VF_MAX - 1) -+ -+struct ifla_vf_mac { -+ __u32 vf; -+ __u8 mac[32]; /* MAX_ADDR_LEN */ -+}; -+ -+struct ifla_vf_vlan { -+ __u32 vf; -+ __u32 vlan; /* 0 - 4095, 0 disables VLAN filter */ -+ __u32 qos; -+}; -+ -+struct ifla_vf_tx_rate { -+ __u32 vf; -+ __u32 rate; /* Max TX bandwidth in Mbps, 0 disables throttling */ -+}; -+ -+struct ifla_vf_info { -+ __u32 vf; -+ __u8 mac[32]; -+ __u32 vlan; -+ __u32 qos; -+ __u32 tx_rate; -+}; - #endif /* _LINUX_IF_LINK_H */ -diff --git a/include/linux/mm.h b/include/linux/mm.h -index 11e5be6..4c98621 100644 ---- a/include/linux/mm.h -+++ b/include/linux/mm.h -@@ -109,6 +109,12 @@ extern unsigned int kobjsize(const void *objp); - #define VM_SAO 0x20000000 /* Strong Access Ordering (powerpc) */ - #define VM_PFN_AT_MMAP 0x40000000 /* PFNMAP vma that is fully mapped at mmap time */ - #define VM_MERGEABLE 0x80000000 /* KSM may merge identical pages */ -+#ifdef CONFIG_XEN -+#define VM_FOREIGN 0x20000000 /* Has pages belonging to another VM */ -+struct vm_foreign_map { -+ struct page **map; -+}; -+#endif - - #ifndef VM_STACK_DEFAULT_FLAGS /* arch can override this */ - #define VM_STACK_DEFAULT_FLAGS VM_DATA_DEFAULT_FLAGS -@@ -199,6 +205,11 @@ struct vm_operations_struct { - */ - int (*access)(struct vm_area_struct *vma, unsigned long addr, - void *buf, int len, int write); -+ -+ /* Area-specific function for clearing the PTE at @ptep. Returns the -+ * original value of @ptep. */ -+ pte_t (*zap_pte)(struct vm_area_struct *vma, -+ unsigned long addr, pte_t *ptep, int is_fullmm); - #ifdef CONFIG_NUMA - /* - * set_policy() op must add a reference to any non-NULL @new mempolicy -diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h -index ec12f8c..3f4991c 100644 ---- a/include/linux/netdevice.h -+++ b/include/linux/netdevice.h -@@ -28,6 +28,7 @@ - #include <linux/if.h> - #include <linux/if_ether.h> - #include <linux/if_packet.h> -+#include <linux/if_link.h> - - #ifdef __KERNEL__ - #include <linux/timer.h> -@@ -577,6 +578,13 @@ struct netdev_queue { - * this function is called when a VLAN id is unregistered. - * - * void (*ndo_poll_controller)(struct net_device *dev); -+ * -+ * SR-IOV management functions. -+ * int (*ndo_set_vf_mac)(struct net_device *dev, int vf, u8* mac); -+ * int (*ndo_set_vf_vlan)(struct net_device *dev, int vf, u16 vlan, u8 qos); -+ * int (*ndo_set_vf_tx_rate)(struct net_device *dev, int vf, int rate); -+ * int (*ndo_get_vf_config)(struct net_device *dev, -+ * int vf, struct ifla_vf_info *ivf); - */ - #define HAVE_NET_DEVICE_OPS - struct net_device_ops { -@@ -626,6 +634,15 @@ struct net_device_ops { - #define HAVE_NETDEV_POLL - void (*ndo_poll_controller)(struct net_device *dev); - #endif -+ int (*ndo_set_vf_mac)(struct net_device *dev, -+ int queue, u8 *mac); -+ int (*ndo_set_vf_vlan)(struct net_device *dev, -+ int queue, u16 vlan, u8 qos); -+ int (*ndo_set_vf_tx_rate)(struct net_device *dev, -+ int vf, int rate); -+ int (*ndo_get_vf_config)(struct net_device *dev, -+ int vf, -+ struct ifla_vf_info *ivf); - #if defined(CONFIG_FCOE) || defined(CONFIG_FCOE_MODULE) - int (*ndo_fcoe_enable)(struct net_device *dev); - int (*ndo_fcoe_disable)(struct net_device *dev); -diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h -index 6b202b1..b03950e 100644 ---- a/include/linux/page-flags.h -+++ b/include/linux/page-flags.h -@@ -105,6 +105,9 @@ enum pageflags { - #ifdef CONFIG_ARCH_USES_PG_UNCACHED - PG_uncached, /* Page has been mapped as uncached */ - #endif -+#ifdef CONFIG_XEN -+ PG_foreign, -+#endif - #ifdef CONFIG_MEMORY_FAILURE - PG_hwpoison, /* hardware poisoned page. Don't touch */ - #endif -@@ -275,6 +278,23 @@ PAGEFLAG(Uncached, uncached) - PAGEFLAG_FALSE(Uncached) - #endif - -+#ifdef CONFIG_XEN -+TESTPAGEFLAG(Foreign, foreign) -+__SETPAGEFLAG(Foreign, foreign) -+CLEARPAGEFLAG(Foreign, foreign) -+#define SetPageForeign(_page, dtor) do { \ -+ __SetPageForeign(_page); \ -+ BUG_ON((dtor) == (void (*)(struct page *, unsigned int))0); \ -+ (_page)->index = (long)(dtor); \ -+} while (0) -+#define _PageForeignDestructor(_page) \ -+ ((void (*)(struct page *, unsigned int))(_page)->index) -+#define PageForeignDestructor(_page, order) \ -+ _PageForeignDestructor(_page)(_page, order) -+#else -+PAGEFLAG_FALSE(Foreign) -+#endif -+ - #ifdef CONFIG_MEMORY_FAILURE - PAGEFLAG(HWPoison, hwpoison) - TESTSETFLAG(HWPoison, hwpoison) -diff --git a/include/linux/pci.h b/include/linux/pci.h -index e07d194..ca28e46 100644 ---- a/include/linux/pci.h -+++ b/include/linux/pci.h -@@ -609,6 +609,9 @@ extern void pci_remove_bus_device(struct pci_dev *dev); - extern void pci_stop_bus_device(struct pci_dev *dev); - void pci_setup_cardbus(struct pci_bus *bus); - extern void pci_sort_breadthfirst(void); -+#define dev_is_pci(d) ((d)->bus == &pci_bus_type) -+#define dev_is_pf(d) ((dev_is_pci(d) ? to_pci_dev(d)->is_physfn : false)) -+#define dev_num_vf(d) ((dev_is_pci(d) ? pci_num_vf(to_pci_dev(d)) : 0)) - - /* Generic PCI functions exported to card drivers */ - -@@ -1124,6 +1127,9 @@ static inline struct pci_dev *pci_get_bus_and_slot(unsigned int bus, - unsigned int devfn) - { return NULL; } - -+#define dev_is_pci(d) (false) -+#define dev_is_pf(d) (false) -+#define dev_num_vf(d) (0) - #endif /* CONFIG_PCI */ - - /* Include architecture-dependent settings and functions */ -@@ -1279,6 +1285,7 @@ void __iomem *pci_ioremap_bar(struct pci_dev *pdev, int bar); - extern int pci_enable_sriov(struct pci_dev *dev, int nr_virtfn); - extern void pci_disable_sriov(struct pci_dev *dev); - extern irqreturn_t pci_sriov_migration(struct pci_dev *dev); -+extern int pci_num_vf(struct pci_dev *dev); - #else - static inline int pci_enable_sriov(struct pci_dev *dev, int nr_virtfn) - { -@@ -1291,6 +1298,10 @@ static inline irqreturn_t pci_sriov_migration(struct pci_dev *dev) - { - return IRQ_NONE; - } -+static inline int pci_num_vf(struct pci_dev *dev) -+{ -+ return 0; -+} - #endif - - #if defined(CONFIG_HOTPLUG_PCI) || defined(CONFIG_HOTPLUG_PCI_MODULE) -diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h -index fe2f4ee..b72b9e6 100644 ---- a/include/linux/pci_ids.h -+++ b/include/linux/pci_ids.h -@@ -2717,3 +2717,6 @@ - #define PCI_DEVICE_ID_RME_DIGI32 0x9896 - #define PCI_DEVICE_ID_RME_DIGI32_PRO 0x9897 - #define PCI_DEVICE_ID_RME_DIGI32_8 0x9898 -+ -+#define PCI_VENDOR_ID_XEN 0x5853 -+#define PCI_DEVICE_ID_XEN_PLATFORM 0x0001 -diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h -index 73b1f1c..113585a 100644 ---- a/include/linux/swiotlb.h -+++ b/include/linux/swiotlb.h -@@ -7,6 +7,8 @@ struct device; - struct dma_attrs; - struct scatterlist; - -+extern int swiotlb_force; -+ - /* - * Maximum allowable number of contiguous slabs to map, - * must be a power of 2. What is the appropriate value ? -@@ -20,9 +22,46 @@ struct scatterlist; - */ - #define IO_TLB_SHIFT 11 - --extern void --swiotlb_init(void); -- -+/* swiotlb-core.c */ -+extern void swiotlb_init(int verbose); -+#ifdef CONFIG_SWIOTLB -+extern void __init swiotlb_free(void); -+#else -+static inline void swiotlb_free(void) { } -+#endif -+extern void swiotlb_print_info(void); -+ -+/* swiotlb-core.c: Internal book-keeping functions. -+ * Must be linked against the library to take advantage of them.*/ -+#ifdef CONFIG_SWIOTLB -+/* -+ * Enumeration for sync targets -+ */ -+enum dma_sync_target { -+ SYNC_FOR_CPU = 0, -+ SYNC_FOR_DEVICE = 1, -+}; -+extern char *io_tlb_start; -+extern char *io_tlb_end; -+extern unsigned long io_tlb_nslabs; -+extern void *io_tlb_overflow_buffer; -+extern unsigned long io_tlb_overflow; -+extern int is_swiotlb_buffer(phys_addr_t paddr); -+extern void swiotlb_bounce(phys_addr_t phys, char *dma_addr, size_t size, -+ enum dma_data_direction dir); -+extern void *do_map_single(struct device *hwdev, phys_addr_t phys, -+ unsigned long start_dma_addr, size_t size, int dir); -+ -+extern void do_unmap_single(struct device *hwdev, char *dma_addr, size_t size, -+ int dir); -+ -+extern void do_sync_single(struct device *hwdev, char *dma_addr, size_t size, -+ int dir, int target); -+extern void swiotlb_full(struct device *dev, size_t size, int dir, int do_panic); -+extern void __init swiotlb_init_early(size_t default_size, int verbose); -+#endif -+ -+/* swiotlb.c: dma_ops functions. */ - extern void - *swiotlb_alloc_coherent(struct device *hwdev, size_t size, - dma_addr_t *dma_handle, gfp_t flags); -@@ -88,4 +127,74 @@ swiotlb_dma_mapping_error(struct device *hwdev, dma_addr_t dma_addr); - extern int - swiotlb_dma_supported(struct device *hwdev, u64 mask); - -+/* swiotlb-xen.c: dma_ops functions. */ -+extern void xen_swiotlb_init(int verbose); -+extern void -+*xen_swiotlb_alloc_coherent(struct device *hwdev, size_t size, -+ dma_addr_t *dma_handle, gfp_t flags); -+ -+extern void -+xen_swiotlb_free_coherent(struct device *hwdev, size_t size, -+ void *vaddr, dma_addr_t dma_handle); -+ -+extern dma_addr_t xen_swiotlb_map_page(struct device *dev, struct page *page, -+ unsigned long offset, size_t size, -+ enum dma_data_direction dir, -+ struct dma_attrs *attrs); -+extern void xen_swiotlb_unmap_page(struct device *hwdev, dma_addr_t dev_addr, -+ size_t size, enum dma_data_direction dir, -+ struct dma_attrs *attrs); -+ -+extern int -+xen_swiotlb_map_sg(struct device *hwdev, struct scatterlist *sg, int nents, -+ int direction); -+ -+extern void -+xen_swiotlb_unmap_sg(struct device *hwdev, struct scatterlist *sg, int nents, -+ int direction); -+ -+extern int -+xen_swiotlb_map_sg_attrs(struct device *hwdev, struct scatterlist *sgl, -+ int nelems, enum dma_data_direction dir, -+ struct dma_attrs *attrs); -+ -+extern void -+xen_swiotlb_unmap_sg_attrs(struct device *hwdev, struct scatterlist *sgl, -+ int nelems, enum dma_data_direction dir, -+ struct dma_attrs *attrs); -+ -+extern void -+xen_swiotlb_sync_single_for_cpu(struct device *hwdev, dma_addr_t dev_addr, -+ size_t size, enum dma_data_direction dir); -+ -+extern void -+xen_swiotlb_sync_sg_for_cpu(struct device *hwdev, struct scatterlist *sg, -+ int nelems, enum dma_data_direction dir); -+ -+extern void -+xen_swiotlb_sync_single_for_device(struct device *hwdev, dma_addr_t dev_addr, -+ size_t size, enum dma_data_direction dir); -+ -+extern void -+xen_swiotlb_sync_sg_for_device(struct device *hwdev, struct scatterlist *sg, -+ int nelems, enum dma_data_direction dir); -+ -+extern void -+xen_swiotlb_sync_single_range_for_cpu(struct device *hwdev, dma_addr_t dev_addr, -+ unsigned long offset, size_t size, -+ enum dma_data_direction dir); -+ -+extern void -+xen_swiotlb_sync_single_range_for_device(struct device *hwdev, -+ dma_addr_t dev_addr, -+ unsigned long offset, size_t size, -+ enum dma_data_direction dir); -+ -+extern int -+xen_swiotlb_dma_mapping_error(struct device *hwdev, dma_addr_t dma_addr); -+ -+extern int -+xen_swiotlb_dma_supported(struct device *hwdev, u64 mask); -+ -+ - #endif /* __LINUX_SWIOTLB_H */ -diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h -index 3c123c3..1a2ba21 100644 ---- a/include/linux/vmalloc.h -+++ b/include/linux/vmalloc.h -@@ -7,6 +7,8 @@ - - struct vm_area_struct; /* vma defining user mapping in mm_types.h */ - -+extern bool vmap_lazy_unmap; -+ - /* bits in flags of vmalloc's vm_struct below */ - #define VM_IOREMAP 0x00000001 /* ioremap() and friends */ - #define VM_ALLOC 0x00000002 /* vmalloc() */ -diff --git a/include/xen/Kbuild b/include/xen/Kbuild -index 4e65c16..84ad8f0 100644 ---- a/include/xen/Kbuild -+++ b/include/xen/Kbuild -@@ -1 +1,2 @@ - header-y += evtchn.h -+header-y += privcmd.h -diff --git a/include/xen/acpi.h b/include/xen/acpi.h -new file mode 100644 -index 0000000..279142d ---- /dev/null -+++ b/include/xen/acpi.h -@@ -0,0 +1,106 @@ -+#ifndef _XEN_ACPI_H -+#define _XEN_ACPI_H -+ -+#include <linux/types.h> -+#include <acpi/acpi_drivers.h> -+#include <acpi/processor.h> -+#include <xen/xen.h> -+ -+#ifdef CONFIG_XEN_S3 -+#include <asm/xen/hypervisor.h> -+ -+static inline bool xen_pv_acpi(void) -+{ -+ return xen_pv_domain(); -+} -+#else -+static inline bool xen_pv_acpi(void) -+{ -+ return false; -+} -+#endif -+ -+int acpi_notify_hypervisor_state(u8 sleep_state, -+ u32 pm1a_cnt, u32 pm1b_cnd); -+ -+/* -+ * Following are interfaces for xen acpi processor control -+ */ -+ -+/* Events notified to xen */ -+#define PROCESSOR_PM_INIT 1 -+#define PROCESSOR_PM_CHANGE 2 -+#define PROCESSOR_HOTPLUG 3 -+ -+/* Objects for the PM events */ -+#define PM_TYPE_IDLE 0 -+#define PM_TYPE_PERF 1 -+#define PM_TYPE_THR 2 -+#define PM_TYPE_MAX 3 -+ -+#define XEN_MAX_ACPI_ID 255 -+ -+/* Processor hotplug events */ -+#define HOTPLUG_TYPE_ADD 0 -+#define HOTPLUG_TYPE_REMOVE 1 -+ -+int xen_acpi_processor_init(void); -+void xen_acpi_processor_exit(void); -+ -+int xen_acpi_processor_power_init(struct acpi_processor *pr, -+ struct acpi_device *device); -+int xen_acpi_processor_cst_has_changed(struct acpi_processor *pr); -+ -+void xen_arch_acpi_processor_init_pdc(struct acpi_processor *pr); -+ -+#ifdef CONFIG_CPU_FREQ -+int xen_acpi_processor_ppc_has_changed(struct acpi_processor *pr); -+int xen_acpi_processor_get_performance(struct acpi_processor *pr); -+#else -+static inline int xen_acpi_processor_ppc_has_changed(struct acpi_processor *pr) -+{ -+ return acpi_processor_ppc_has_changed(pr); -+} -+static inline int xen_acpi_processor_get_performance(struct acpi_processor *pr) -+{ -+ printk(KERN_WARNING -+ "Warning: xen_acpi_processor_get_performance not supported\n" -+ "Consider compiling CPUfreq support into your kernel.\n"); -+ return 0; -+} -+#endif -+ -+#if defined(CONFIG_ACPI_HOTPLUG_MEMORY) || \ -+ defined(CONFIG_ACPI_HOTPLUG_MEMORY_MODULE) -+int xen_hotadd_memory(struct acpi_memory_device *mem_device); -+#endif -+ -+#if defined(CONFIG_ACPI_PROCESSOR_XEN) || \ -+defined(CONFIG_ACPI_PROCESSOR_XEN_MODULE) -+ -+struct processor_cntl_xen_ops { -+ /* Transfer processor PM events to xen */ -+int (*pm_ops[PM_TYPE_MAX])(struct acpi_processor *pr, int event); -+ /* Notify physical processor status to xen */ -+ int (*hotplug)(struct acpi_processor *pr, int type); -+}; -+ -+extern int processor_cntl_xen_notify(struct acpi_processor *pr, -+ int event, int type); -+extern int processor_cntl_xen_power_cache(int cpu, int cx, -+ struct acpi_power_register *reg); -+#else -+ -+static inline int processor_cntl_xen_notify(struct acpi_processor *pr, -+ int event, int type) -+{ -+ return 0; -+} -+static inline int processor_cntl_xen_power_cache(int cpu, int cx, -+ struct acpi_power_register *reg) -+{ -+ return 0; -+} -+#endif /* CONFIG_ACPI_PROCESSOR_XEN */ -+ -+#endif /* _XEN_ACPI_H */ -diff --git a/include/xen/balloon.h b/include/xen/balloon.h -new file mode 100644 -index 0000000..e751514 ---- /dev/null -+++ b/include/xen/balloon.h -@@ -0,0 +1,8 @@ -+#ifndef _XEN_BALLOON_H -+#define _XEN_BALLOON_H -+ -+/* Allocate/free a set of empty pages in low memory (i.e., no RAM mapped). */ -+struct page **alloc_empty_pages_and_pagevec(int nr_pages); -+void free_empty_pages_and_pagevec(struct page **pagevec, int nr_pages); -+ -+#endif -diff --git a/include/xen/blkif.h b/include/xen/blkif.h -new file mode 100644 -index 0000000..7172081 ---- /dev/null -+++ b/include/xen/blkif.h -@@ -0,0 +1,123 @@ -+/* -+ * Permission is hereby granted, free of charge, to any person obtaining a copy -+ * of this software and associated documentation files (the "Software"), to -+ * deal in the Software without restriction, including without limitation the -+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or -+ * sell copies of the Software, and to permit persons to whom the Software is -+ * furnished to do so, subject to the following conditions: -+ * -+ * The above copyright notice and this permission notice shall be included in -+ * all copies or substantial portions of the Software. -+ * -+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -+ * DEALINGS IN THE SOFTWARE. -+ */ -+ -+#ifndef __XEN_BLKIF_H__ -+#define __XEN_BLKIF_H__ -+ -+#include <xen/interface/xen.h> -+#include <xen/interface/io/ring.h> -+#include <xen/interface/io/blkif.h> -+#include <xen/interface/io/protocols.h> -+ -+/* Not a real protocol. Used to generate ring structs which contain -+ * the elements common to all protocols only. This way we get a -+ * compiler-checkable way to use common struct elements, so we can -+ * avoid using switch(protocol) in a number of places. */ -+struct blkif_common_request { -+ char dummy; -+}; -+struct blkif_common_response { -+ char dummy; -+}; -+ -+/* i386 protocol version */ -+#pragma pack(push, 4) -+struct blkif_x86_32_request { -+ uint8_t operation; /* BLKIF_OP_??? */ -+ uint8_t nr_segments; /* number of segments */ -+ blkif_vdev_t handle; /* only for read/write requests */ -+ uint64_t id; /* private guest value, echoed in resp */ -+ blkif_sector_t sector_number;/* start sector idx on disk (r/w only) */ -+ struct blkif_request_segment seg[BLKIF_MAX_SEGMENTS_PER_REQUEST]; -+}; -+struct blkif_x86_32_response { -+ uint64_t id; /* copied from request */ -+ uint8_t operation; /* copied from request */ -+ int16_t status; /* BLKIF_RSP_??? */ -+}; -+typedef struct blkif_x86_32_request blkif_x86_32_request_t; -+typedef struct blkif_x86_32_response blkif_x86_32_response_t; -+#pragma pack(pop) -+ -+/* x86_64 protocol version */ -+struct blkif_x86_64_request { -+ uint8_t operation; /* BLKIF_OP_??? */ -+ uint8_t nr_segments; /* number of segments */ -+ blkif_vdev_t handle; /* only for read/write requests */ -+ uint64_t __attribute__((__aligned__(8))) id; -+ blkif_sector_t sector_number;/* start sector idx on disk (r/w only) */ -+ struct blkif_request_segment seg[BLKIF_MAX_SEGMENTS_PER_REQUEST]; -+}; -+struct blkif_x86_64_response { -+ uint64_t __attribute__((__aligned__(8))) id; -+ uint8_t operation; /* copied from request */ -+ int16_t status; /* BLKIF_RSP_??? */ -+}; -+typedef struct blkif_x86_64_request blkif_x86_64_request_t; -+typedef struct blkif_x86_64_response blkif_x86_64_response_t; -+ -+DEFINE_RING_TYPES(blkif_common, struct blkif_common_request, struct blkif_common_response); -+DEFINE_RING_TYPES(blkif_x86_32, struct blkif_x86_32_request, struct blkif_x86_32_response); -+DEFINE_RING_TYPES(blkif_x86_64, struct blkif_x86_64_request, struct blkif_x86_64_response); -+ -+union blkif_back_rings { -+ struct blkif_back_ring native; -+ struct blkif_common_back_ring common; -+ struct blkif_x86_32_back_ring x86_32; -+ struct blkif_x86_64_back_ring x86_64; -+}; -+ -+enum blkif_protocol { -+ BLKIF_PROTOCOL_NATIVE = 1, -+ BLKIF_PROTOCOL_X86_32 = 2, -+ BLKIF_PROTOCOL_X86_64 = 3, -+}; -+ -+static void inline blkif_get_x86_32_req(struct blkif_request *dst, struct blkif_x86_32_request *src) -+{ -+ int i, n = BLKIF_MAX_SEGMENTS_PER_REQUEST; -+ dst->operation = src->operation; -+ dst->nr_segments = src->nr_segments; -+ dst->handle = src->handle; -+ dst->id = src->id; -+ dst->sector_number = src->sector_number; -+ barrier(); -+ if (n > dst->nr_segments) -+ n = dst->nr_segments; -+ for (i = 0; i < n; i++) -+ dst->seg[i] = src->seg[i]; -+} -+ -+static void inline blkif_get_x86_64_req(struct blkif_request *dst, struct blkif_x86_64_request *src) -+{ -+ int i, n = BLKIF_MAX_SEGMENTS_PER_REQUEST; -+ dst->operation = src->operation; -+ dst->nr_segments = src->nr_segments; -+ dst->handle = src->handle; -+ dst->id = src->id; -+ dst->sector_number = src->sector_number; -+ barrier(); -+ if (n > dst->nr_segments) -+ n = dst->nr_segments; -+ for (i = 0; i < n; i++) -+ dst->seg[i] = src->seg[i]; -+} -+ -+#endif /* __XEN_BLKIF_H__ */ -diff --git a/include/xen/events.h b/include/xen/events.h -index e68d59a..7e17e2a 100644 ---- a/include/xen/events.h -+++ b/include/xen/events.h -@@ -12,6 +12,8 @@ int bind_evtchn_to_irqhandler(unsigned int evtchn, - irq_handler_t handler, - unsigned long irqflags, const char *devname, - void *dev_id); -+int bind_virq_to_irq(unsigned int virq, unsigned int cpu); -+ - int bind_virq_to_irqhandler(unsigned int virq, unsigned int cpu, - irq_handler_t handler, - unsigned long irqflags, const char *devname, -@@ -22,6 +24,12 @@ int bind_ipi_to_irqhandler(enum ipi_vector ipi, - unsigned long irqflags, - const char *devname, - void *dev_id); -+int bind_interdomain_evtchn_to_irqhandler(unsigned int remote_domain, -+ unsigned int remote_port, -+ irq_handler_t handler, -+ unsigned long irqflags, -+ const char *devname, -+ void *dev_id); - - /* - * Common unbind function for all event sources. Takes IRQ to unbind from. -@@ -53,7 +61,42 @@ bool xen_test_irq_pending(int irq); - irq will be disabled so it won't deliver an interrupt. */ - void xen_poll_irq(int irq); - -+/* Poll waiting for an irq to become pending with a timeout. In the usual case, the -+ irq will be disabled so it won't deliver an interrupt. */ -+void xen_poll_irq_timeout(int irq, u64 timeout); -+ - /* Determine the IRQ which is bound to an event channel */ - unsigned irq_from_evtchn(unsigned int evtchn); - -+/* Allocate an irq for a physical interrupt, given a gsi. "Legacy" -+ GSIs are identity mapped; others are dynamically allocated as -+ usual. */ -+int xen_allocate_pirq(unsigned gsi, int shareable, char *name); -+ -+/* De-allocates the above mentioned physical interrupt. */ -+int xen_destroy_irq(int irq); -+ -+/* Return vector allocated to pirq */ -+int xen_vector_from_irq(unsigned pirq); -+ -+/* Return gsi allocated to pirq */ -+int xen_gsi_from_irq(unsigned pirq); -+ -+#ifdef CONFIG_XEN_DOM0_PCI -+void xen_setup_pirqs(void); -+#else -+static inline void xen_setup_pirqs(void) -+{ -+} -+#endif -+ -+/* Determine whether to ignore this IRQ if passed to a guest. */ -+int xen_ignore_irq(int irq); -+/* Xen HVM evtchn vector callback */ -+extern void xen_hvm_callback_vector(void); -+extern int xen_have_vector_callback; -+int xen_set_callback_via(uint64_t via); -+void xen_evtchn_do_upcall(struct pt_regs *regs); -+void xen_hvm_evtchn_do_upcall(void); -+ - #endif /* _XEN_EVENTS_H */ -diff --git a/include/xen/gntdev.h b/include/xen/gntdev.h -new file mode 100644 -index 0000000..8bd1467 ---- /dev/null -+++ b/include/xen/gntdev.h -@@ -0,0 +1,119 @@ -+/****************************************************************************** -+ * gntdev.h -+ * -+ * Interface to /dev/xen/gntdev. -+ * -+ * Copyright (c) 2007, D G Murray -+ * -+ * This program is free software; you can redistribute it and/or -+ * modify it under the terms of the GNU General Public License version 2 -+ * as published by the Free Software Foundation; or, when distributed -+ * separately from the Linux kernel or incorporated into other -+ * software packages, subject to the following license: -+ * -+ * Permission is hereby granted, free of charge, to any person obtaining a copy -+ * of this source file (the "Software"), to deal in the Software without -+ * restriction, including without limitation the rights to use, copy, modify, -+ * merge, publish, distribute, sublicense, and/or sell copies of the Software, -+ * and to permit persons to whom the Software is furnished to do so, subject to -+ * the following conditions: -+ * -+ * The above copyright notice and this permission notice shall be included in -+ * all copies or substantial portions of the Software. -+ * -+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS -+ * IN THE SOFTWARE. -+ */ -+ -+#ifndef __LINUX_PUBLIC_GNTDEV_H__ -+#define __LINUX_PUBLIC_GNTDEV_H__ -+ -+struct ioctl_gntdev_grant_ref { -+ /* The domain ID of the grant to be mapped. */ -+ uint32_t domid; -+ /* The grant reference of the grant to be mapped. */ -+ uint32_t ref; -+}; -+ -+/* -+ * Inserts the grant references into the mapping table of an instance -+ * of gntdev. N.B. This does not perform the mapping, which is deferred -+ * until mmap() is called with @index as the offset. -+ */ -+#define IOCTL_GNTDEV_MAP_GRANT_REF \ -+_IOC(_IOC_NONE, 'G', 0, sizeof(struct ioctl_gntdev_map_grant_ref)) -+struct ioctl_gntdev_map_grant_ref { -+ /* IN parameters */ -+ /* The number of grants to be mapped. */ -+ uint32_t count; -+ uint32_t pad; -+ /* OUT parameters */ -+ /* The offset to be used on a subsequent call to mmap(). */ -+ uint64_t index; -+ /* Variable IN parameter. */ -+ /* Array of grant references, of size @count. */ -+ struct ioctl_gntdev_grant_ref refs[1]; -+}; -+ -+/* -+ * Removes the grant references from the mapping table of an instance of -+ * of gntdev. N.B. munmap() must be called on the relevant virtual address(es) -+ * before this ioctl is called, or an error will result. -+ */ -+#define IOCTL_GNTDEV_UNMAP_GRANT_REF \ -+_IOC(_IOC_NONE, 'G', 1, sizeof(struct ioctl_gntdev_unmap_grant_ref)) -+struct ioctl_gntdev_unmap_grant_ref { -+ /* IN parameters */ -+ /* The offset was returned by the corresponding map operation. */ -+ uint64_t index; -+ /* The number of pages to be unmapped. */ -+ uint32_t count; -+ uint32_t pad; -+}; -+ -+/* -+ * Returns the offset in the driver's address space that corresponds -+ * to @vaddr. This can be used to perform a munmap(), followed by an -+ * UNMAP_GRANT_REF ioctl, where no state about the offset is retained by -+ * the caller. The number of pages that were allocated at the same time as -+ * @vaddr is returned in @count. -+ * -+ * N.B. Where more than one page has been mapped into a contiguous range, the -+ * supplied @vaddr must correspond to the start of the range; otherwise -+ * an error will result. It is only possible to munmap() the entire -+ * contiguously-allocated range at once, and not any subrange thereof. -+ */ -+#define IOCTL_GNTDEV_GET_OFFSET_FOR_VADDR \ -+_IOC(_IOC_NONE, 'G', 2, sizeof(struct ioctl_gntdev_get_offset_for_vaddr)) -+struct ioctl_gntdev_get_offset_for_vaddr { -+ /* IN parameters */ -+ /* The virtual address of the first mapped page in a range. */ -+ uint64_t vaddr; -+ /* OUT parameters */ -+ /* The offset that was used in the initial mmap() operation. */ -+ uint64_t offset; -+ /* The number of pages mapped in the VM area that begins at @vaddr. */ -+ uint32_t count; -+ uint32_t pad; -+}; -+ -+/* -+ * Sets the maximum number of grants that may mapped at once by this gntdev -+ * instance. -+ * -+ * N.B. This must be called before any other ioctl is performed on the device. -+ */ -+#define IOCTL_GNTDEV_SET_MAX_GRANTS \ -+_IOC(_IOC_NONE, 'G', 3, sizeof(struct ioctl_gntdev_set_max_grants)) -+struct ioctl_gntdev_set_max_grants { -+ /* IN parameter */ -+ /* The maximum number of grants that may be mapped at once. */ -+ uint32_t count; -+}; -+ -+#endif /* __LINUX_PUBLIC_GNTDEV_H__ */ -diff --git a/include/xen/grant_table.h b/include/xen/grant_table.h -index a40f1cd..871b553 100644 ---- a/include/xen/grant_table.h -+++ b/include/xen/grant_table.h -@@ -37,10 +37,16 @@ - #ifndef __ASM_GNTTAB_H__ - #define __ASM_GNTTAB_H__ - --#include <asm/xen/hypervisor.h> -+#include <asm/page.h> -+ -+#include <xen/interface/xen.h> - #include <xen/interface/grant_table.h> -+ -+#include <asm/xen/hypervisor.h> - #include <asm/xen/grant_table.h> - -+#include <xen/features.h> -+ - /* NR_GRANT_FRAMES must be less than or equal to that configured in Xen */ - #define NR_GRANT_FRAMES 4 - -@@ -51,6 +57,9 @@ struct gnttab_free_callback { - u16 count; - }; - -+void gnttab_reset_grant_page(struct page *page); -+ -+int gnttab_init(void); - int gnttab_suspend(void); - int gnttab_resume(void); - -@@ -80,6 +89,8 @@ unsigned long gnttab_end_foreign_transfer(grant_ref_t ref); - - int gnttab_query_foreign_access(grant_ref_t ref); - -+int gnttab_copy_grant_page(grant_ref_t ref, struct page **pagep); -+ - /* - * operations on reserved batches of grant references - */ -@@ -106,12 +117,46 @@ void gnttab_grant_foreign_access_ref(grant_ref_t ref, domid_t domid, - void gnttab_grant_foreign_transfer_ref(grant_ref_t, domid_t domid, - unsigned long pfn); - -+static inline void -+gnttab_set_map_op(struct gnttab_map_grant_ref *map, phys_addr_t addr, -+ uint32_t flags, grant_ref_t ref, domid_t domid) -+{ -+ if (flags & GNTMAP_contains_pte) -+ map->host_addr = addr; -+ else if (xen_feature(XENFEAT_auto_translated_physmap)) -+ map->host_addr = __pa(addr); -+ else -+ map->host_addr = addr; -+ -+ map->flags = flags; -+ map->ref = ref; -+ map->dom = domid; -+} -+ -+static inline void -+gnttab_set_unmap_op(struct gnttab_unmap_grant_ref *unmap, phys_addr_t addr, -+ uint32_t flags, grant_handle_t handle) -+{ -+ if (flags & GNTMAP_contains_pte) -+ unmap->host_addr = addr; -+ else if (xen_feature(XENFEAT_auto_translated_physmap)) -+ unmap->host_addr = __pa(addr); -+ else -+ unmap->host_addr = addr; -+ -+ unmap->handle = handle; -+ unmap->dev_bus_addr = 0; -+} -+ - int arch_gnttab_map_shared(unsigned long *frames, unsigned long nr_gframes, - unsigned long max_nr_gframes, - struct grant_entry **__shared); - void arch_gnttab_unmap_shared(struct grant_entry *shared, - unsigned long nr_gframes); - -+extern unsigned long xen_hvm_resume_frames; -+unsigned int gnttab_max_grant_frames(void); -+ - #define gnttab_map_vaddr(map) ((void *)(map.host_virt_addr)) - - #endif /* __ASM_GNTTAB_H__ */ -diff --git a/include/xen/hvm.h b/include/xen/hvm.h -new file mode 100644 -index 0000000..b193fa2 ---- /dev/null -+++ b/include/xen/hvm.h -@@ -0,0 +1,30 @@ -+/* Simple wrappers around HVM functions */ -+#ifndef XEN_HVM_H__ -+#define XEN_HVM_H__ -+ -+#include <xen/interface/hvm/params.h> -+#include <asm/xen/hypercall.h> -+ -+static inline int hvm_get_parameter(int idx, uint64_t *value) -+{ -+ struct xen_hvm_param xhv; -+ int r; -+ -+ xhv.domid = DOMID_SELF; -+ xhv.index = idx; -+ r = HYPERVISOR_hvm_op(HVMOP_get_param, &xhv); -+ if (r < 0) { -+ printk(KERN_ERR "Cannot get hvm parameter %d: %d!\n", -+ idx, r); -+ return r; -+ } -+ *value = xhv.value; -+ return r; -+} -+ -+#define HVM_CALLBACK_VIA_TYPE_VECTOR 0x2 -+#define HVM_CALLBACK_VIA_TYPE_SHIFT 56 -+#define HVM_CALLBACK_VECTOR(x) (((uint64_t)HVM_CALLBACK_VIA_TYPE_VECTOR)<<\ -+ HVM_CALLBACK_VIA_TYPE_SHIFT | (x)) -+ -+#endif /* XEN_HVM_H__ */ -diff --git a/include/xen/interface/features.h b/include/xen/interface/features.h -index f51b641..70d2563 100644 ---- a/include/xen/interface/features.h -+++ b/include/xen/interface/features.h -@@ -41,6 +41,12 @@ - /* x86: Does this Xen host support the MMU_PT_UPDATE_PRESERVE_AD hypercall? */ - #define XENFEAT_mmu_pt_update_preserve_ad 5 - -+/* x86: Does this Xen host support the HVM callback vector type? */ -+#define XENFEAT_hvm_callback_vector 8 -+ -+/* x86: pvclock algorithm is safe to use on HVM */ -+#define XENFEAT_hvm_safe_pvclock 9 -+ - #define XENFEAT_NR_SUBMAPS 1 - - #endif /* __XEN_PUBLIC_FEATURES_H__ */ -diff --git a/include/xen/interface/grant_table.h b/include/xen/interface/grant_table.h -index 39da93c..c704fe5 100644 ---- a/include/xen/interface/grant_table.h -+++ b/include/xen/interface/grant_table.h -@@ -28,6 +28,7 @@ - #ifndef __XEN_PUBLIC_GRANT_TABLE_H__ - #define __XEN_PUBLIC_GRANT_TABLE_H__ - -+#include <xen/interface/xen.h> - - /*********************************** - * GRANT TABLE REPRESENTATION -@@ -321,6 +322,28 @@ struct gnttab_query_size { - DEFINE_GUEST_HANDLE_STRUCT(gnttab_query_size); - - /* -+ * GNTTABOP_unmap_and_replace: Destroy one or more grant-reference mappings -+ * tracked by <handle> but atomically replace the page table entry with one -+ * pointing to the machine address under <new_addr>. <new_addr> will be -+ * redirected to the null entry. -+ * NOTES: -+ * 1. The call may fail in an undefined manner if either mapping is not -+ * tracked by <handle>. -+ * 2. After executing a batch of unmaps, it is guaranteed that no stale -+ * mappings will remain in the device or host TLBs. -+ */ -+#define GNTTABOP_unmap_and_replace 7 -+struct gnttab_unmap_and_replace { -+ /* IN parameters. */ -+ uint64_t host_addr; -+ uint64_t new_addr; -+ grant_handle_t handle; -+ /* OUT parameters. */ -+ int16_t status; /* GNTST_* */ -+}; -+DEFINE_GUEST_HANDLE_STRUCT(gnttab_unmap_and_replace); -+ -+/* - * Bitfield values for update_pin_status.flags. - */ - /* Map the grant entry for access by I/O devices. */ -diff --git a/include/xen/interface/hvm/hvm_op.h b/include/xen/interface/hvm/hvm_op.h -new file mode 100644 -index 0000000..a4827f4 ---- /dev/null -+++ b/include/xen/interface/hvm/hvm_op.h -@@ -0,0 +1,46 @@ -+/* -+ * Permission is hereby granted, free of charge, to any person obtaining a copy -+ * of this software and associated documentation files (the "Software"), to -+ * deal in the Software without restriction, including without limitation the -+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or -+ * sell copies of the Software, and to permit persons to whom the Software is -+ * furnished to do so, subject to the following conditions: -+ * -+ * The above copyright notice and this permission notice shall be included in -+ * all copies or substantial portions of the Software. -+ * -+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -+ * DEALINGS IN THE SOFTWARE. -+ */ -+ -+#ifndef __XEN_PUBLIC_HVM_HVM_OP_H__ -+#define __XEN_PUBLIC_HVM_HVM_OP_H__ -+ -+/* Get/set subcommands: the second argument of the hypercall is a -+ * pointer to a xen_hvm_param struct. */ -+#define HVMOP_set_param 0 -+#define HVMOP_get_param 1 -+struct xen_hvm_param { -+ domid_t domid; /* IN */ -+ uint32_t index; /* IN */ -+ uint64_t value; /* IN/OUT */ -+}; -+DEFINE_GUEST_HANDLE_STRUCT(xen_hvm_param); -+ -+/* Hint from PV drivers for pagetable destruction. */ -+#define HVMOP_pagetable_dying 9 -+struct xen_hvm_pagetable_dying { -+ /* Domain with a pagetable about to be destroyed. */ -+ domid_t domid; -+ /* guest physical address of the toplevel pagetable dying */ -+ aligned_u64 gpa; -+}; -+typedef struct xen_hvm_pagetable_dying xen_hvm_pagetable_dying_t; -+DEFINE_GUEST_HANDLE_STRUCT(xen_hvm_pagetable_dying_t); -+ -+#endif /* __XEN_PUBLIC_HVM_HVM_OP_H__ */ -diff --git a/include/xen/interface/hvm/params.h b/include/xen/interface/hvm/params.h -new file mode 100644 -index 0000000..1888d8c ---- /dev/null -+++ b/include/xen/interface/hvm/params.h -@@ -0,0 +1,95 @@ -+/* -+ * Permission is hereby granted, free of charge, to any person obtaining a copy -+ * of this software and associated documentation files (the "Software"), to -+ * deal in the Software without restriction, including without limitation the -+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or -+ * sell copies of the Software, and to permit persons to whom the Software is -+ * furnished to do so, subject to the following conditions: -+ * -+ * The above copyright notice and this permission notice shall be included in -+ * all copies or substantial portions of the Software. -+ * -+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -+ * DEALINGS IN THE SOFTWARE. -+ */ -+ -+#ifndef __XEN_PUBLIC_HVM_PARAMS_H__ -+#define __XEN_PUBLIC_HVM_PARAMS_H__ -+ -+#include "hvm_op.h" -+ -+/* -+ * Parameter space for HVMOP_{set,get}_param. -+ */ -+ -+/* -+ * How should CPU0 event-channel notifications be delivered? -+ * val[63:56] == 0: val[55:0] is a delivery GSI (Global System Interrupt). -+ * val[63:56] == 1: val[55:0] is a delivery PCI INTx line, as follows: -+ * Domain = val[47:32], Bus = val[31:16], -+ * DevFn = val[15: 8], IntX = val[ 1: 0] -+ * val[63:56] == 2: val[7:0] is a vector number. -+ * If val == 0 then CPU0 event-channel notifications are not delivered. -+ */ -+#define HVM_PARAM_CALLBACK_IRQ 0 -+ -+#define HVM_PARAM_STORE_PFN 1 -+#define HVM_PARAM_STORE_EVTCHN 2 -+ -+#define HVM_PARAM_PAE_ENABLED 4 -+ -+#define HVM_PARAM_IOREQ_PFN 5 -+ -+#define HVM_PARAM_BUFIOREQ_PFN 6 -+ -+/* -+ * Set mode for virtual timers (currently x86 only): -+ * delay_for_missed_ticks (default): -+ * Do not advance a vcpu's time beyond the correct delivery time for -+ * interrupts that have been missed due to preemption. Deliver missed -+ * interrupts when the vcpu is rescheduled and advance the vcpu's virtual -+ * time stepwise for each one. -+ * no_delay_for_missed_ticks: -+ * As above, missed interrupts are delivered, but guest time always tracks -+ * wallclock (i.e., real) time while doing so. -+ * no_missed_ticks_pending: -+ * No missed interrupts are held pending. Instead, to ensure ticks are -+ * delivered at some non-zero rate, if we detect missed ticks then the -+ * internal tick alarm is not disabled if the VCPU is preempted during the -+ * next tick period. -+ * one_missed_tick_pending: -+ * Missed interrupts are collapsed together and delivered as one 'late tick'. -+ * Guest time always tracks wallclock (i.e., real) time. -+ */ -+#define HVM_PARAM_TIMER_MODE 10 -+#define HVMPTM_delay_for_missed_ticks 0 -+#define HVMPTM_no_delay_for_missed_ticks 1 -+#define HVMPTM_no_missed_ticks_pending 2 -+#define HVMPTM_one_missed_tick_pending 3 -+ -+/* Boolean: Enable virtual HPET (high-precision event timer)? (x86-only) */ -+#define HVM_PARAM_HPET_ENABLED 11 -+ -+/* Identity-map page directory used by Intel EPT when CR0.PG=0. */ -+#define HVM_PARAM_IDENT_PT 12 -+ -+/* Device Model domain, defaults to 0. */ -+#define HVM_PARAM_DM_DOMAIN 13 -+ -+/* ACPI S state: currently support S0 and S3 on x86. */ -+#define HVM_PARAM_ACPI_S_STATE 14 -+ -+/* TSS used on Intel when CR0.PE=0. */ -+#define HVM_PARAM_VM86_TSS 15 -+ -+/* Boolean: Enable aligning all periodic vpts to reduce interrupts */ -+#define HVM_PARAM_VPT_ALIGN 16 -+ -+#define HVM_NR_PARAMS 17 -+ -+#endif /* __XEN_PUBLIC_HVM_PARAMS_H__ */ -diff --git a/include/xen/interface/io/blkif.h b/include/xen/interface/io/blkif.h -index c2d1fa4..68dd2b4 100644 ---- a/include/xen/interface/io/blkif.h -+++ b/include/xen/interface/io/blkif.h -@@ -91,4 +91,25 @@ DEFINE_RING_TYPES(blkif, struct blkif_request, struct blkif_response); - #define VDISK_REMOVABLE 0x2 - #define VDISK_READONLY 0x4 - -+/* Xen-defined major numbers for virtual disks, they look strangely -+ * familiar */ -+#define XEN_IDE0_MAJOR 3 -+#define XEN_IDE1_MAJOR 22 -+#define XEN_SCSI_DISK0_MAJOR 8 -+#define XEN_SCSI_DISK1_MAJOR 65 -+#define XEN_SCSI_DISK2_MAJOR 66 -+#define XEN_SCSI_DISK3_MAJOR 67 -+#define XEN_SCSI_DISK4_MAJOR 68 -+#define XEN_SCSI_DISK5_MAJOR 69 -+#define XEN_SCSI_DISK6_MAJOR 70 -+#define XEN_SCSI_DISK7_MAJOR 71 -+#define XEN_SCSI_DISK8_MAJOR 128 -+#define XEN_SCSI_DISK9_MAJOR 129 -+#define XEN_SCSI_DISK10_MAJOR 130 -+#define XEN_SCSI_DISK11_MAJOR 131 -+#define XEN_SCSI_DISK12_MAJOR 132 -+#define XEN_SCSI_DISK13_MAJOR 133 -+#define XEN_SCSI_DISK14_MAJOR 134 -+#define XEN_SCSI_DISK15_MAJOR 135 -+ - #endif /* __XEN_PUBLIC_IO_BLKIF_H__ */ -diff --git a/include/xen/interface/io/netif.h b/include/xen/interface/io/netif.h -index 518481c..8309344 100644 ---- a/include/xen/interface/io/netif.h -+++ b/include/xen/interface/io/netif.h -@@ -131,6 +131,10 @@ struct xen_netif_rx_request { - #define _NETRXF_extra_info (3) - #define NETRXF_extra_info (1U<<_NETRXF_extra_info) - -+/* GSO Prefix descriptor. */ -+#define _NETRXF_gso_prefix (4) -+#define NETRXF_gso_prefix (1U<<_NETRXF_gso_prefix) -+ - struct xen_netif_rx_response { - uint16_t id; - uint16_t offset; /* Offset in page of start of received packet */ -diff --git a/include/xen/interface/io/pciif.h b/include/xen/interface/io/pciif.h -new file mode 100644 -index 0000000..c4177f3 ---- /dev/null -+++ b/include/xen/interface/io/pciif.h -@@ -0,0 +1,124 @@ -+/* -+ * PCI Backend/Frontend Common Data Structures & Macros -+ * -+ * Permission is hereby granted, free of charge, to any person obtaining a copy -+ * of this software and associated documentation files (the "Software"), to -+ * deal in the Software without restriction, including without limitation the -+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or -+ * sell copies of the Software, and to permit persons to whom the Software is -+ * furnished to do so, subject to the following conditions: -+ * -+ * The above copyright notice and this permission notice shall be included in -+ * all copies or substantial portions of the Software. -+ * -+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -+ * DEALINGS IN THE SOFTWARE. -+ * -+ * Author: Ryan Wilson <hap9@epoch.ncsc.mil> -+ */ -+#ifndef __XEN_PCI_COMMON_H__ -+#define __XEN_PCI_COMMON_H__ -+ -+/* Be sure to bump this number if you change this file */ -+#define XEN_PCI_MAGIC "7" -+ -+/* xen_pci_sharedinfo flags */ -+#define _XEN_PCIF_active (0) -+#define XEN_PCIF_active (1<<_XEN_PCIF_active) -+#define _XEN_PCIB_AERHANDLER (1) -+#define XEN_PCIB_AERHANDLER (1<<_XEN_PCIB_AERHANDLER) -+#define _XEN_PCIB_active (2) -+#define XEN_PCIB_active (1<<_XEN_PCIB_active) -+ -+/* xen_pci_op commands */ -+#define XEN_PCI_OP_conf_read (0) -+#define XEN_PCI_OP_conf_write (1) -+#define XEN_PCI_OP_enable_msi (2) -+#define XEN_PCI_OP_disable_msi (3) -+#define XEN_PCI_OP_enable_msix (4) -+#define XEN_PCI_OP_disable_msix (5) -+#define XEN_PCI_OP_aer_detected (6) -+#define XEN_PCI_OP_aer_resume (7) -+#define XEN_PCI_OP_aer_mmio (8) -+#define XEN_PCI_OP_aer_slotreset (9) -+ -+/* xen_pci_op error numbers */ -+#define XEN_PCI_ERR_success (0) -+#define XEN_PCI_ERR_dev_not_found (-1) -+#define XEN_PCI_ERR_invalid_offset (-2) -+#define XEN_PCI_ERR_access_denied (-3) -+#define XEN_PCI_ERR_not_implemented (-4) -+/* XEN_PCI_ERR_op_failed - backend failed to complete the operation */ -+#define XEN_PCI_ERR_op_failed (-5) -+ -+/* -+ * it should be PAGE_SIZE-sizeof(struct xen_pci_op))/sizeof(struct msix_entry)) -+ * Should not exceed 128 -+ */ -+#define SH_INFO_MAX_VEC 128 -+ -+struct xen_msix_entry { -+ uint16_t vector; -+ uint16_t entry; -+}; -+struct xen_pci_op { -+ /* IN: what action to perform: XEN_PCI_OP_* */ -+ uint32_t cmd; -+ -+ /* OUT: will contain an error number (if any) from errno.h */ -+ int32_t err; -+ -+ /* IN: which device to touch */ -+ uint32_t domain; /* PCI Domain/Segment */ -+ uint32_t bus; -+ uint32_t devfn; -+ -+ /* IN: which configuration registers to touch */ -+ int32_t offset; -+ int32_t size; -+ -+ /* IN/OUT: Contains the result after a READ or the value to WRITE */ -+ uint32_t value; -+ /* IN: Contains extra infor for this operation */ -+ uint32_t info; -+ /*IN: param for msi-x */ -+ struct xen_msix_entry msix_entries[SH_INFO_MAX_VEC]; -+}; -+ -+/*used for pcie aer handling*/ -+struct xen_pcie_aer_op -+{ -+ -+ /* IN: what action to perform: XEN_PCI_OP_* */ -+ uint32_t cmd; -+ /*IN/OUT: return aer_op result or carry error_detected state as input*/ -+ int32_t err; -+ -+ /* IN: which device to touch */ -+ uint32_t domain; /* PCI Domain/Segment*/ -+ uint32_t bus; -+ uint32_t devfn; -+}; -+struct xen_pci_sharedinfo { -+ /* flags - XEN_PCIF_* */ -+ uint32_t flags; -+ struct xen_pci_op op; -+ struct xen_pcie_aer_op aer_op; -+}; -+ -+#endif /* __XEN_PCI_COMMON_H__ */ -+ -+/* -+ * Local variables: -+ * mode: C -+ * c-set-style: "BSD" -+ * c-basic-offset: 4 -+ * tab-width: 4 -+ * indent-tabs-mode: nil -+ * End: -+ */ -diff --git a/include/xen/interface/io/ring.h b/include/xen/interface/io/ring.h -index e8cbf43..c9ba846 100644 ---- a/include/xen/interface/io/ring.h -+++ b/include/xen/interface/io/ring.h -@@ -24,8 +24,15 @@ typedef unsigned int RING_IDX; - * A ring contains as many entries as will fit, rounded down to the nearest - * power of two (so we can mask with (size-1) to loop around). - */ --#define __RING_SIZE(_s, _sz) \ -- (__RD32(((_sz) - (long)&(_s)->ring + (long)(_s)) / sizeof((_s)->ring[0]))) -+#define __CONST_RING_SIZE(_s, _sz) \ -+ (__RD32(((_sz) - offsetof(struct _s##_sring, ring)) / \ -+ sizeof(((struct _s##_sring *)0)->ring[0]))) -+ -+/* -+ * The same for passing in an actual pointer instead of a name tag. -+ */ -+#define __RING_SIZE(_s, _sz) \ -+ (__RD32(((_sz) - (long)&(_s)->ring + (long)(_s)) / sizeof((_s)->ring[0]))) - - /* - * Macros to make the correct C datatypes for a new kind of ring. -@@ -73,7 +80,16 @@ union __name##_sring_entry { \ - struct __name##_sring { \ - RING_IDX req_prod, req_event; \ - RING_IDX rsp_prod, rsp_event; \ -- uint8_t pad[48]; \ -+ union { \ -+ struct { \ -+ uint8_t smartpoll_active; \ -+ } netif; \ -+ struct { \ -+ uint8_t msg; \ -+ } tapif_user; \ -+ uint8_t pvt_pad[4]; \ -+ } private; \ -+ uint8_t pad[44]; \ - union __name##_sring_entry ring[1]; /* variable-length */ \ - }; \ - \ -diff --git a/include/xen/interface/io/xenbus.h b/include/xen/interface/io/xenbus.h -index 46508c7..9fda532 100644 ---- a/include/xen/interface/io/xenbus.h -+++ b/include/xen/interface/io/xenbus.h -@@ -27,8 +27,14 @@ enum xenbus_state - XenbusStateClosing = 5, /* The device is being closed - due to an error or an unplug - event. */ -- XenbusStateClosed = 6 -+ XenbusStateClosed = 6, - -+ /* -+ * Reconfiguring: The device is being reconfigured. -+ */ -+ XenbusStateReconfiguring = 7, -+ -+ XenbusStateReconfigured = 8 - }; - - #endif /* _XEN_PUBLIC_IO_XENBUS_H */ -diff --git a/include/xen/interface/memory.h b/include/xen/interface/memory.h -index af36ead..aa4e368 100644 ---- a/include/xen/interface/memory.h -+++ b/include/xen/interface/memory.h -@@ -9,6 +9,8 @@ - #ifndef __XEN_PUBLIC_MEMORY_H__ - #define __XEN_PUBLIC_MEMORY_H__ - -+#include <linux/spinlock.h> -+ - /* - * Increase or decrease the specified domain's memory reservation. Returns a - * -ve errcode on failure, or the # extents successfully allocated or freed. -@@ -53,6 +55,48 @@ struct xen_memory_reservation { - DEFINE_GUEST_HANDLE_STRUCT(xen_memory_reservation); - - /* -+ * An atomic exchange of memory pages. If return code is zero then -+ * @out.extent_list provides GMFNs of the newly-allocated memory. -+ * Returns zero on complete success, otherwise a negative error code. -+ * On complete success then always @nr_exchanged == @in.nr_extents. -+ * On partial success @nr_exchanged indicates how much work was done. -+ */ -+#define XENMEM_exchange 11 -+struct xen_memory_exchange { -+ /* -+ * [IN] Details of memory extents to be exchanged (GMFN bases). -+ * Note that @in.address_bits is ignored and unused. -+ */ -+ struct xen_memory_reservation in; -+ -+ /* -+ * [IN/OUT] Details of new memory extents. -+ * We require that: -+ * 1. @in.domid == @out.domid -+ * 2. @in.nr_extents << @in.extent_order == -+ * @out.nr_extents << @out.extent_order -+ * 3. @in.extent_start and @out.extent_start lists must not overlap -+ * 4. @out.extent_start lists GPFN bases to be populated -+ * 5. @out.extent_start is overwritten with allocated GMFN bases -+ */ -+ struct xen_memory_reservation out; -+ -+ /* -+ * [OUT] Number of input extents that were successfully exchanged: -+ * 1. The first @nr_exchanged input extents were successfully -+ * deallocated. -+ * 2. The corresponding first entries in the output extent list correctly -+ * indicate the GMFNs that were successfully exchanged. -+ * 3. All other input and output extents are untouched. -+ * 4. If not all input exents are exchanged then the return code of this -+ * command will be non-zero. -+ * 5. THIS FIELD MUST BE INITIALISED TO ZERO BY THE CALLER! -+ */ -+ unsigned long nr_exchanged; -+}; -+ -+DEFINE_GUEST_HANDLE_STRUCT(xen_memory_exchange); -+/* - * Returns the maximum machine frame number of mapped RAM in this system. - * This command always succeeds (it never returns an error code). - * arg == NULL. -@@ -97,6 +141,19 @@ struct xen_machphys_mfn_list { - DEFINE_GUEST_HANDLE_STRUCT(xen_machphys_mfn_list); - - /* -+ * Returns the location in virtual address space of the machine_to_phys -+ * mapping table. Architectures which do not have a m2p table, or which do not -+ * map it by default into guest address space, do not implement this command. -+ * arg == addr of xen_machphys_mapping_t. -+ */ -+#define XENMEM_machphys_mapping 12 -+struct xen_machphys_mapping { -+ unsigned long v_start, v_end; /* Start and end virtual addresses. */ -+ unsigned long max_mfn; /* Maximum MFN that can be looked up. */ -+}; -+DEFINE_GUEST_HANDLE_STRUCT(xen_machphys_mapping_t); -+ -+/* - * Sets the GPFN at which a particular page appears in the specified guest's - * pseudophysical address space. - * arg == addr of xen_add_to_physmap_t. -@@ -142,4 +199,38 @@ struct xen_translate_gpfn_list { - }; - DEFINE_GUEST_HANDLE_STRUCT(xen_translate_gpfn_list); - -+/* -+ * Returns the pseudo-physical memory map as it was when the domain -+ * was started (specified by XENMEM_set_memory_map). -+ * arg == addr of struct xen_memory_map. -+ */ -+#define XENMEM_memory_map 9 -+struct xen_memory_map { -+ /* -+ * On call the number of entries which can be stored in buffer. On -+ * return the number of entries which have been stored in -+ * buffer. -+ */ -+ unsigned int nr_entries; -+ -+ /* -+ * Entries in the buffer are in the same format as returned by the -+ * BIOS INT 0x15 EAX=0xE820 call. -+ */ -+ GUEST_HANDLE(void) buffer; -+}; -+DEFINE_GUEST_HANDLE_STRUCT(xen_memory_map); -+ -+/* -+ * Returns the real physical memory map. Passes the same structure as -+ * XENMEM_memory_map. -+ * arg == addr of struct xen_memory_map. -+ */ -+#define XENMEM_machine_memory_map 10 -+ -+/* -+ * Prevent the balloon driver from changing the memory reservation -+ * during a driver critical region. -+ */ -+extern spinlock_t xen_reservation_lock; - #endif /* __XEN_PUBLIC_MEMORY_H__ */ -diff --git a/include/xen/interface/physdev.h b/include/xen/interface/physdev.h -index cd69391..0703ef6 100644 ---- a/include/xen/interface/physdev.h -+++ b/include/xen/interface/physdev.h -@@ -39,6 +39,19 @@ struct physdev_eoi { - }; - - /* -+ * Register a shared page for the hypervisor to indicate whether the guest -+ * must issue PHYSDEVOP_eoi. The semantics of PHYSDEVOP_eoi change slightly -+ * once the guest used this function in that the associated event channel -+ * will automatically get unmasked. The page registered is used as a bit -+ * array indexed by Xen's PIRQ value. -+ */ -+#define PHYSDEVOP_pirq_eoi_gmfn 17 -+struct physdev_pirq_eoi_gmfn { -+ /* IN */ -+ unsigned long gmfn; -+}; -+ -+/* - * Query the status of an IRQ line. - * @arg == pointer to physdev_irq_status_query structure. - */ -@@ -106,6 +119,64 @@ struct physdev_irq { - uint32_t vector; - }; - -+#define MAP_PIRQ_TYPE_MSI 0x0 -+#define MAP_PIRQ_TYPE_GSI 0x1 -+#define MAP_PIRQ_TYPE_UNKNOWN 0x2 -+ -+#define PHYSDEVOP_map_pirq 13 -+struct physdev_map_pirq { -+ domid_t domid; -+ /* IN */ -+ int type; -+ /* IN */ -+ int index; -+ /* IN or OUT */ -+ int pirq; -+ /* IN */ -+ int bus; -+ /* IN */ -+ int devfn; -+ /* IN */ -+ int entry_nr; -+ /* IN */ -+ uint64_t table_base; -+}; -+ -+#define PHYSDEVOP_unmap_pirq 14 -+struct physdev_unmap_pirq { -+ domid_t domid; -+ /* IN */ -+ int pirq; -+}; -+ -+#define PHYSDEVOP_manage_pci_add 15 -+#define PHYSDEVOP_manage_pci_remove 16 -+struct physdev_manage_pci { -+ /* IN */ -+ uint8_t bus; -+ uint8_t devfn; -+}; -+ -+#define PHYSDEVOP_restore_msi 19 -+struct physdev_restore_msi { -+ /* IN */ -+ uint8_t bus; -+ uint8_t devfn; -+}; -+ -+#define PHYSDEVOP_manage_pci_add_ext 20 -+struct physdev_manage_pci_ext { -+ /* IN */ -+ uint8_t bus; -+ uint8_t devfn; -+ unsigned is_extfn; -+ unsigned is_virtfn; -+ struct { -+ uint8_t bus; -+ uint8_t devfn; -+ } physfn; -+}; -+ - /* - * Argument to physdev_op_compat() hypercall. Superceded by new physdev_op() - * hypercall since 0x00030202. -@@ -121,6 +192,16 @@ struct physdev_op { - } u; - }; - -+#define PHYSDEVOP_setup_gsi 21 -+struct physdev_setup_gsi { -+ int gsi; -+ /* IN */ -+ uint8_t triggering; -+ /* IN */ -+ uint8_t polarity; -+ /* IN */ -+}; -+ - /* - * Notify that some PIRQ-bound event channels have been unmasked. - * ** This command is obsolete since interface version 0x00030202 and is ** -diff --git a/include/xen/interface/platform.h b/include/xen/interface/platform.h -new file mode 100644 -index 0000000..17ae622 ---- /dev/null -+++ b/include/xen/interface/platform.h -@@ -0,0 +1,381 @@ -+/****************************************************************************** -+ * platform.h -+ * -+ * Hardware platform operations. Intended for use by domain-0 kernel. -+ * -+ * Permission is hereby granted, free of charge, to any person obtaining a copy -+ * of this software and associated documentation files (the "Software"), to -+ * deal in the Software without restriction, including without limitation the -+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or -+ * sell copies of the Software, and to permit persons to whom the Software is -+ * furnished to do so, subject to the following conditions: -+ * -+ * The above copyright notice and this permission notice shall be included in -+ * all copies or substantial portions of the Software. -+ * -+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -+ * DEALINGS IN THE SOFTWARE. -+ * -+ * Copyright (c) 2002-2006, K Fraser -+ */ -+ -+#ifndef __XEN_PUBLIC_PLATFORM_H__ -+#define __XEN_PUBLIC_PLATFORM_H__ -+ -+#include "xen.h" -+ -+#define XENPF_INTERFACE_VERSION 0x03000001 -+ -+/* -+ * Set clock such that it would read <secs,nsecs> after 00:00:00 UTC, -+ * 1 January, 1970 if the current system time was <system_time>. -+ */ -+#define XENPF_settime 17 -+struct xenpf_settime { -+ /* IN variables. */ -+ uint32_t secs; -+ uint32_t nsecs; -+ uint64_t system_time; -+}; -+typedef struct xenpf_settime xenpf_settime_t; -+DEFINE_GUEST_HANDLE_STRUCT(xenpf_settime_t); -+ -+/* -+ * Request memory range (@mfn, @mfn+@nr_mfns-1) to have type @type. -+ * On x86, @type is an architecture-defined MTRR memory type. -+ * On success, returns the MTRR that was used (@reg) and a handle that can -+ * be passed to XENPF_DEL_MEMTYPE to accurately tear down the new setting. -+ * (x86-specific). -+ */ -+#define XENPF_add_memtype 31 -+struct xenpf_add_memtype { -+ /* IN variables. */ -+ unsigned long mfn; -+ uint64_t nr_mfns; -+ uint32_t type; -+ /* OUT variables. */ -+ uint32_t handle; -+ uint32_t reg; -+}; -+typedef struct xenpf_add_memtype xenpf_add_memtype_t; -+DEFINE_GUEST_HANDLE_STRUCT(xenpf_add_memtype_t); -+ -+/* -+ * Tear down an existing memory-range type. If @handle is remembered then it -+ * should be passed in to accurately tear down the correct setting (in case -+ * of overlapping memory regions with differing types). If it is not known -+ * then @handle should be set to zero. In all cases @reg must be set. -+ * (x86-specific). -+ */ -+#define XENPF_del_memtype 32 -+struct xenpf_del_memtype { -+ /* IN variables. */ -+ uint32_t handle; -+ uint32_t reg; -+}; -+typedef struct xenpf_del_memtype xenpf_del_memtype_t; -+DEFINE_GUEST_HANDLE_STRUCT(xenpf_del_memtype_t); -+ -+/* Read current type of an MTRR (x86-specific). */ -+#define XENPF_read_memtype 33 -+struct xenpf_read_memtype { -+ /* IN variables. */ -+ uint32_t reg; -+ /* OUT variables. */ -+ unsigned long mfn; -+ uint64_t nr_mfns; -+ uint32_t type; -+}; -+typedef struct xenpf_read_memtype xenpf_read_memtype_t; -+DEFINE_GUEST_HANDLE_STRUCT(xenpf_read_memtype_t); -+ -+#define XENPF_microcode_update 35 -+struct xenpf_microcode_update { -+ /* IN variables. */ -+ GUEST_HANDLE(void) data; /* Pointer to microcode data */ -+ uint32_t length; /* Length of microcode data. */ -+}; -+typedef struct xenpf_microcode_update xenpf_microcode_update_t; -+DEFINE_GUEST_HANDLE_STRUCT(xenpf_microcode_update_t); -+ -+#define XENPF_platform_quirk 39 -+#define QUIRK_NOIRQBALANCING 1 /* Do not restrict IO-APIC RTE targets */ -+#define QUIRK_IOAPIC_BAD_REGSEL 2 /* IO-APIC REGSEL forgets its value */ -+#define QUIRK_IOAPIC_GOOD_REGSEL 3 /* IO-APIC REGSEL behaves properly */ -+struct xenpf_platform_quirk { -+ /* IN variables. */ -+ uint32_t quirk_id; -+}; -+typedef struct xenpf_platform_quirk xenpf_platform_quirk_t; -+DEFINE_GUEST_HANDLE_STRUCT(xenpf_platform_quirk_t); -+ -+#define XENPF_firmware_info 50 -+#define XEN_FW_DISK_INFO 1 /* from int 13 AH=08/41/48 */ -+#define XEN_FW_DISK_MBR_SIGNATURE 2 /* from MBR offset 0x1b8 */ -+#define XEN_FW_VBEDDC_INFO 3 /* from int 10 AX=4f15 */ -+struct xenpf_firmware_info { -+ /* IN variables. */ -+ uint32_t type; -+ uint32_t index; -+ /* OUT variables. */ -+ union { -+ struct { -+ /* Int13, Fn48: Check Extensions Present. */ -+ uint8_t device; /* %dl: bios device number */ -+ uint8_t version; /* %ah: major version */ -+ uint16_t interface_support; /* %cx: support bitmap */ -+ /* Int13, Fn08: Legacy Get Device Parameters. */ -+ uint16_t legacy_max_cylinder; /* %cl[7:6]:%ch: max cyl # */ -+ uint8_t legacy_max_head; /* %dh: max head # */ -+ uint8_t legacy_sectors_per_track; /* %cl[5:0]: max sector # */ -+ /* Int13, Fn41: Get Device Parameters (as filled into %ds:%esi). */ -+ /* NB. First uint16_t of buffer must be set to buffer size. */ -+ GUEST_HANDLE(void) edd_params; -+ } disk_info; /* XEN_FW_DISK_INFO */ -+ struct { -+ uint8_t device; /* bios device number */ -+ uint32_t mbr_signature; /* offset 0x1b8 in mbr */ -+ } disk_mbr_signature; /* XEN_FW_DISK_MBR_SIGNATURE */ -+ struct { -+ /* Int10, AX=4F15: Get EDID info. */ -+ uint8_t capabilities; -+ uint8_t edid_transfer_time; -+ /* must refer to 128-byte buffer */ -+ GUEST_HANDLE(uchar) edid; -+ } vbeddc_info; /* XEN_FW_VBEDDC_INFO */ -+ } u; -+}; -+typedef struct xenpf_firmware_info xenpf_firmware_info_t; -+DEFINE_GUEST_HANDLE_STRUCT(xenpf_firmware_info_t); -+ -+#define XENPF_enter_acpi_sleep 51 -+struct xenpf_enter_acpi_sleep { -+ /* IN variables */ -+ uint16_t pm1a_cnt_val; /* PM1a control value. */ -+ uint16_t pm1b_cnt_val; /* PM1b control value. */ -+ uint32_t sleep_state; /* Which state to enter (Sn). */ -+ uint32_t flags; /* Must be zero. */ -+}; -+typedef struct xenpf_enter_acpi_sleep xenpf_enter_acpi_sleep_t; -+DEFINE_GUEST_HANDLE_STRUCT(xenpf_enter_acpi_sleep_t); -+ -+#define XENPF_change_freq 52 -+struct xenpf_change_freq { -+ /* IN variables */ -+ uint32_t flags; /* Must be zero. */ -+ uint32_t cpu; /* Physical cpu. */ -+ uint64_t freq; /* New frequency (Hz). */ -+}; -+typedef struct xenpf_change_freq xenpf_change_freq_t; -+DEFINE_GUEST_HANDLE_STRUCT(xenpf_change_freq_t); -+ -+/* -+ * Get idle times (nanoseconds since boot) for physical CPUs specified in the -+ * @cpumap_bitmap with range [0..@cpumap_nr_cpus-1]. The @idletime array is -+ * indexed by CPU number; only entries with the corresponding @cpumap_bitmap -+ * bit set are written to. On return, @cpumap_bitmap is modified so that any -+ * non-existent CPUs are cleared. Such CPUs have their @idletime array entry -+ * cleared. -+ */ -+#define XENPF_getidletime 53 -+struct xenpf_getidletime { -+ /* IN/OUT variables */ -+ /* IN: CPUs to interrogate; OUT: subset of IN which are present */ -+ GUEST_HANDLE(uchar) cpumap_bitmap; -+ /* IN variables */ -+ /* Size of cpumap bitmap. */ -+ uint32_t cpumap_nr_cpus; -+ /* Must be indexable for every cpu in cpumap_bitmap. */ -+ GUEST_HANDLE(uint64_t) idletime; -+ /* OUT variables */ -+ /* System time when the idletime snapshots were taken. */ -+ uint64_t now; -+}; -+typedef struct xenpf_getidletime xenpf_getidletime_t; -+DEFINE_GUEST_HANDLE_STRUCT(xenpf_getidletime_t); -+ -+#define XENPF_set_processor_pminfo 54 -+ -+/* ability bits */ -+#define XEN_PROCESSOR_PM_CX 1 -+#define XEN_PROCESSOR_PM_PX 2 -+#define XEN_PROCESSOR_PM_TX 4 -+ -+/* cmd type */ -+#define XEN_PM_CX 0 -+#define XEN_PM_PX 1 -+#define XEN_PM_TX 2 -+ -+/* Px sub info type */ -+#define XEN_PX_PCT 1 -+#define XEN_PX_PSS 2 -+#define XEN_PX_PPC 4 -+#define XEN_PX_PSD 8 -+ -+struct xen_power_register { -+ uint32_t space_id; -+ uint32_t bit_width; -+ uint32_t bit_offset; -+ uint32_t access_size; -+ uint64_t address; -+}; -+ -+struct xen_processor_csd { -+ uint32_t domain; /* domain number of one dependent group */ -+ uint32_t coord_type; /* coordination type */ -+ uint32_t num; /* number of processors in same domain */ -+}; -+typedef struct xen_processor_csd xen_processor_csd_t; -+DEFINE_GUEST_HANDLE_STRUCT(xen_processor_csd); -+ -+struct xen_processor_cx { -+ struct xen_power_register reg; /* GAS for Cx trigger register */ -+ uint8_t type; /* cstate value, c0: 0, c1: 1, ... */ -+ uint32_t latency; /* worst latency (ms) to enter/exit this cstate */ -+ uint32_t power; /* average power consumption(mW) */ -+ uint32_t dpcnt; /* number of dependency entries */ -+ GUEST_HANDLE(xen_processor_csd) dp; /* NULL if no dependency */ -+}; -+typedef struct xen_processor_cx xen_processor_cx_t; -+DEFINE_GUEST_HANDLE_STRUCT(xen_processor_cx); -+ -+struct xen_processor_flags { -+ uint32_t bm_control:1; -+ uint32_t bm_check:1; -+ uint32_t has_cst:1; -+ uint32_t power_setup_done:1; -+ uint32_t bm_rld_set:1; -+}; -+ -+struct xen_processor_power { -+ uint32_t count; /* number of C state entries in array below */ -+ struct xen_processor_flags flags; /* global flags of this processor */ -+ GUEST_HANDLE(xen_processor_cx) states; /* supported c states */ -+}; -+ -+struct xen_pct_register { -+ uint8_t descriptor; -+ uint16_t length; -+ uint8_t space_id; -+ uint8_t bit_width; -+ uint8_t bit_offset; -+ uint8_t reserved; -+ uint64_t address; -+}; -+ -+struct xen_processor_px { -+ uint64_t core_frequency; /* megahertz */ -+ uint64_t power; /* milliWatts */ -+ uint64_t transition_latency; /* microseconds */ -+ uint64_t bus_master_latency; /* microseconds */ -+ uint64_t control; /* control value */ -+ uint64_t status; /* success indicator */ -+}; -+typedef struct xen_processor_px xen_processor_px_t; -+DEFINE_GUEST_HANDLE_STRUCT(xen_processor_px); -+ -+struct xen_psd_package { -+ uint64_t num_entries; -+ uint64_t revision; -+ uint64_t domain; -+ uint64_t coord_type; -+ uint64_t num_processors; -+}; -+ -+struct xen_processor_performance { -+ uint32_t flags; /* flag for Px sub info type */ -+ uint32_t platform_limit; /* Platform limitation on freq usage */ -+ struct xen_pct_register control_register; -+ struct xen_pct_register status_register; -+ uint32_t state_count; /* total available performance states */ -+ GUEST_HANDLE(xen_processor_px) states; -+ struct xen_psd_package domain_info; -+ uint32_t shared_type; /* coordination type of this processor */ -+}; -+typedef struct xen_processor_performance xen_processor_performance_t; -+DEFINE_GUEST_HANDLE_STRUCT(xen_processor_performance); -+ -+struct xenpf_set_processor_pminfo { -+ /* IN variables */ -+ uint32_t id; /* ACPI CPU ID */ -+ uint32_t type; /* {XEN_PM_CX, XEN_PM_PX} */ -+ union { -+ struct xen_processor_power power;/* Cx: _CST/_CSD */ -+ struct xen_processor_performance perf; /* Px: _PPC/_PCT/_PSS/_PSD */ -+ }; -+}; -+typedef struct xenpf_set_processor_pminfo xenpf_set_processor_pminfo_t; -+DEFINE_GUEST_HANDLE_STRUCT(xenpf_set_processor_pminfo); -+ -+#define XENPF_get_cpuinfo 55 -+struct xenpf_pcpuinfo { -+ /* IN */ -+ uint32_t xen_cpuid; -+ /* OUT */ -+ /* The maxium cpu_id that is present */ -+ uint32_t max_present; -+#define XEN_PCPU_FLAGS_ONLINE 1 -+ /* Correponding xen_cpuid is not present*/ -+#define XEN_PCPU_FLAGS_INVALID 2 -+ uint32_t flags; -+ uint32_t apic_id; -+ uint32_t acpi_id; -+}; -+typedef struct xenpf_pcpuinfo xenpf_pcpuinfo_t; -+DEFINE_GUEST_HANDLE_STRUCT(xenpf_pcpuinfo_t); -+ -+#define XENPF_cpu_online 56 -+#define XENPF_cpu_offline 57 -+struct xenpf_cpu_ol { -+ uint32_t cpuid; -+}; -+typedef struct xenpf_cpu_ol xenpf_cpu_ol_t; -+DEFINE_GUEST_HANDLE_STRUCT(xenpf_cpu_ol_t); -+ -+#define XENPF_cpu_hotadd 58 -+struct xenpf_cpu_hotadd { -+ uint32_t apic_id; -+ uint32_t acpi_id; -+ uint32_t pxm; -+}; -+ -+ -+#define XENPF_mem_hotadd 59 -+struct xenpf_mem_hotadd { -+ uint64_t spfn; -+ uint64_t epfn; -+ uint32_t pxm; -+ uint32_t flags; -+}; -+ -+struct xen_platform_op { -+ uint32_t cmd; -+ uint32_t interface_version; /* XENPF_INTERFACE_VERSION */ -+ union { -+ struct xenpf_settime settime; -+ struct xenpf_add_memtype add_memtype; -+ struct xenpf_del_memtype del_memtype; -+ struct xenpf_read_memtype read_memtype; -+ struct xenpf_microcode_update microcode; -+ struct xenpf_platform_quirk platform_quirk; -+ struct xenpf_firmware_info firmware_info; -+ struct xenpf_enter_acpi_sleep enter_acpi_sleep; -+ struct xenpf_change_freq change_freq; -+ struct xenpf_getidletime getidletime; -+ struct xenpf_set_processor_pminfo set_pminfo; -+ struct xenpf_pcpuinfo pcpu_info; -+ struct xenpf_cpu_ol cpu_ol; -+ struct xenpf_cpu_hotadd cpu_add; -+ struct xenpf_mem_hotadd mem_add; -+ uint8_t pad[128]; -+ } u; -+}; -+typedef struct xen_platform_op xen_platform_op_t; -+DEFINE_GUEST_HANDLE_STRUCT(xen_platform_op_t); -+ -+#endif /* __XEN_PUBLIC_PLATFORM_H__ */ -diff --git a/include/xen/interface/sched.h b/include/xen/interface/sched.h -index 5fec575..dd55dac 100644 ---- a/include/xen/interface/sched.h -+++ b/include/xen/interface/sched.h -@@ -65,6 +65,39 @@ struct sched_poll { - DEFINE_GUEST_HANDLE_STRUCT(sched_poll); - - /* -+ * Declare a shutdown for another domain. The main use of this function is -+ * in interpreting shutdown requests and reasons for fully-virtualized -+ * domains. A para-virtualized domain may use SCHEDOP_shutdown directly. -+ * @arg == pointer to sched_remote_shutdown structure. -+ */ -+#define SCHEDOP_remote_shutdown 4 -+struct sched_remote_shutdown { -+ domid_t domain_id; /* Remote domain ID */ -+ unsigned int reason; /* SHUTDOWN_xxx reason */ -+}; -+ -+/* -+ * Latch a shutdown code, so that when the domain later shuts down it -+ * reports this code to the control tools. -+ * @arg == as for SCHEDOP_shutdown. -+ */ -+#define SCHEDOP_shutdown_code 5 -+ -+/* -+ * Setup, poke and destroy a domain watchdog timer. -+ * @arg == pointer to sched_watchdog structure. -+ * With id == 0, setup a domain watchdog timer to cause domain shutdown -+ * after timeout, returns watchdog id. -+ * With id != 0 and timeout == 0, destroy domain watchdog timer. -+ * With id != 0 and timeout != 0, poke watchdog timer and set new timeout. -+ */ -+#define SCHEDOP_watchdog 6 -+struct sched_watchdog { -+ uint32_t id; /* watchdog ID */ -+ uint32_t timeout; /* timeout */ -+}; -+ -+/* - * Reason codes for SCHEDOP_shutdown. These may be interpreted by control - * software to determine the appropriate action. For the most part, Xen does - * not care about the shutdown code. -@@ -73,5 +106,6 @@ DEFINE_GUEST_HANDLE_STRUCT(sched_poll); - #define SHUTDOWN_reboot 1 /* Clean up, kill, and then restart. */ - #define SHUTDOWN_suspend 2 /* Clean up, save suspend info, kill. */ - #define SHUTDOWN_crash 3 /* Tell controller we've crashed. */ -+#define SHUTDOWN_watchdog 4 /* Restart because watchdog time expired. */ - - #endif /* __XEN_PUBLIC_SCHED_H__ */ -diff --git a/include/xen/interface/xen-mca.h b/include/xen/interface/xen-mca.h -new file mode 100644 -index 0000000..f31fdab ---- /dev/null -+++ b/include/xen/interface/xen-mca.h -@@ -0,0 +1,429 @@ -+/****************************************************************************** -+ * arch-x86/mca.h -+ * -+ * Contributed by Advanced Micro Devices, Inc. -+ * Author: Christoph Egger <Christoph.Egger@amd.com> -+ * -+ * Guest OS machine check interface to x86 Xen. -+ * -+ * Permission is hereby granted, free of charge, to any person obtaining a copy -+ * of this software and associated documentation files (the "Software"), to -+ * deal in the Software without restriction, including without limitation the -+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or -+ * sell copies of the Software, and to permit persons to whom the Software is -+ * furnished to do so, subject to the following conditions: -+ * -+ * The above copyright notice and this permission notice shall be included in -+ * all copies or substantial portions of the Software. -+ * -+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -+ * DEALINGS IN THE SOFTWARE. -+ */ -+ -+/* Full MCA functionality has the following Usecases from the guest side: -+ * -+ * Must have's: -+ * 1. Dom0 and DomU register machine check trap callback handlers -+ * (already done via "set_trap_table" hypercall) -+ * 2. Dom0 registers machine check event callback handler -+ * (doable via EVTCHNOP_bind_virq) -+ * 3. Dom0 and DomU fetches machine check data -+ * 4. Dom0 wants Xen to notify a DomU -+ * 5. Dom0 gets DomU ID from physical address -+ * 6. Dom0 wants Xen to kill DomU (already done for "xm destroy") -+ * -+ * Nice to have's: -+ * 7. Dom0 wants Xen to deactivate a physical CPU -+ * This is better done as separate task, physical CPU hotplugging, -+ * and hypercall(s) should be sysctl's -+ * 8. Page migration proposed from Xen NUMA work, where Dom0 can tell Xen to -+ * move a DomU (or Dom0 itself) away from a malicious page -+ * producing correctable errors. -+ * 9. offlining physical page: -+ * Xen free's and never re-uses a certain physical page. -+ * 10. Testfacility: Allow Dom0 to write values into machine check MSR's -+ * and tell Xen to trigger a machine check -+ */ -+ -+#ifndef __XEN_PUBLIC_ARCH_X86_MCA_H__ -+#define __XEN_PUBLIC_ARCH_X86_MCA_H__ -+ -+/* Hypercall */ -+#define __HYPERVISOR_mca __HYPERVISOR_arch_0 -+ -+/* -+ * The xen-unstable repo has interface version 0x03000001; out interface -+ * is incompatible with that and any future minor revisions, so we -+ * choose a different version number range that is numerically less -+ * than that used in xen-unstable. -+ */ -+#define XEN_MCA_INTERFACE_VERSION 0x01ecc003 -+ -+/* IN: Dom0 calls hypercall to retrieve nonurgent error log entry */ -+#define XEN_MC_NONURGENT 0x0001 -+/* IN: Dom0/DomU calls hypercall to retrieve urgent error log entry */ -+#define XEN_MC_URGENT 0x0002 -+/* IN: Dom0 acknowledges previosly-fetched error log entry */ -+#define XEN_MC_ACK 0x0004 -+ -+/* OUT: All is ok */ -+#define XEN_MC_OK 0x0 -+/* OUT: Domain could not fetch data. */ -+#define XEN_MC_FETCHFAILED 0x1 -+/* OUT: There was no machine check data to fetch. */ -+#define XEN_MC_NODATA 0x2 -+/* OUT: Between notification time and this hypercall an other -+ * (most likely) correctable error happened. The fetched data, -+ * does not match the original machine check data. */ -+#define XEN_MC_NOMATCH 0x4 -+ -+/* OUT: DomU did not register MC NMI handler. Try something else. */ -+#define XEN_MC_CANNOTHANDLE 0x8 -+/* OUT: Notifying DomU failed. Retry later or try something else. */ -+#define XEN_MC_NOTDELIVERED 0x10 -+/* Note, XEN_MC_CANNOTHANDLE and XEN_MC_NOTDELIVERED are mutually exclusive. */ -+ -+ -+#ifndef __ASSEMBLY__ -+ -+#define VIRQ_MCA VIRQ_ARCH_0 /* G. (DOM0) Machine Check Architecture */ -+ -+/* -+ * Machine Check Architecure: -+ * structs are read-only and used to report all kinds of -+ * correctable and uncorrectable errors detected by the HW. -+ * Dom0 and DomU: register a handler to get notified. -+ * Dom0 only: Correctable errors are reported via VIRQ_MCA -+ */ -+#define MC_TYPE_GLOBAL 0 -+#define MC_TYPE_BANK 1 -+#define MC_TYPE_EXTENDED 2 -+#define MC_TYPE_RECOVERY 3 -+ -+struct mcinfo_common { -+ uint16_t type; /* structure type */ -+ uint16_t size; /* size of this struct in bytes */ -+}; -+ -+ -+#define MC_FLAG_CORRECTABLE (1 << 0) -+#define MC_FLAG_UNCORRECTABLE (1 << 1) -+#define MC_FLAG_RECOVERABLE (1 << 2) -+#define MC_FLAG_POLLED (1 << 3) -+#define MC_FLAG_RESET (1 << 4) -+#define MC_FLAG_CMCI (1 << 5) -+#define MC_FLAG_MCE (1 << 6) -+/* contains global x86 mc information */ -+struct mcinfo_global { -+ struct mcinfo_common common; -+ -+ /* running domain at the time in error (most likely -+ * the impacted one) */ -+ uint16_t mc_domid; -+ uint16_t mc_vcpuid; /* virtual cpu scheduled for mc_domid */ -+ uint32_t mc_socketid; /* physical socket of the physical core */ -+ uint16_t mc_coreid; /* physical impacted core */ -+ uint16_t mc_core_threadid; /* core thread of physical core */ -+ uint32_t mc_apicid; -+ uint32_t mc_flags; -+ uint64_t mc_gstatus; /* global status */ -+}; -+ -+/* contains bank local x86 mc information */ -+struct mcinfo_bank { -+ struct mcinfo_common common; -+ -+ uint16_t mc_bank; /* bank nr */ -+ uint16_t mc_domid; /* Usecase 5: domain referenced by mc_addr on -+ * privileged pv-ops dom and if mc_addr is valid. -+ * Never valid on DomU. */ -+ uint64_t mc_status; /* bank status */ -+ uint64_t mc_addr; /* bank address, only valid -+ * if addr bit is set in mc_status */ -+ uint64_t mc_misc; -+ uint64_t mc_ctrl2; -+ uint64_t mc_tsc; -+}; -+ -+ -+struct mcinfo_msr { -+ uint64_t reg; /* MSR */ -+ uint64_t value; /* MSR value */ -+}; -+ -+/* contains mc information from other -+ * or additional mc MSRs */ -+struct mcinfo_extended { -+ struct mcinfo_common common; -+ -+ /* You can fill up to five registers. -+ * If you need more, then use this structure -+ * multiple times. */ -+ -+ uint32_t mc_msrs; /* Number of msr with valid values. */ -+ /* -+ * Currently Intel extended MSR (32/64) include all gp registers -+ * and E(R)FLAGS, E(R)IP, E(R)MISC, up to 11/19 of them might be -+ * useful at present. So expand this array to 16/32 to leave room. -+ */ -+ struct mcinfo_msr mc_msr[sizeof(void *) * 4]; -+}; -+ -+/* Recovery Action flags. Giving recovery result information to DOM0 */ -+ -+/* Xen takes successful recovery action, the error is recovered */ -+#define REC_ACTION_RECOVERED (0x1 << 0) -+/* No action is performed by XEN */ -+#define REC_ACTION_NONE (0x1 << 1) -+/* It's possible DOM0 might take action ownership in some case */ -+#define REC_ACTION_NEED_RESET (0x1 << 2) -+ -+/* Different Recovery Action types, if the action is performed successfully, -+ * REC_ACTION_RECOVERED flag will be returned. -+ */ -+ -+/* Page Offline Action */ -+#define MC_ACTION_PAGE_OFFLINE (0x1 << 0) -+/* CPU offline Action */ -+#define MC_ACTION_CPU_OFFLINE (0x1 << 1) -+/* L3 cache disable Action */ -+#define MC_ACTION_CACHE_SHRINK (0x1 << 2) -+ -+/* Below interface used between XEN/DOM0 for passing XEN's recovery action -+ * information to DOM0. -+ * usage Senario: After offlining broken page, XEN might pass its page offline -+ * recovery action result to DOM0. DOM0 will save the information in -+ * non-volatile memory for further proactive actions, such as offlining the -+ * easy broken page earlier when doing next reboot. -+*/ -+struct page_offline_action { -+ /* Params for passing the offlined page number to DOM0 */ -+ uint64_t mfn; -+ uint64_t status; -+}; -+ -+struct cpu_offline_action { -+ /* Params for passing the identity of the offlined CPU to DOM0 */ -+ uint32_t mc_socketid; -+ uint16_t mc_coreid; -+ uint16_t mc_core_threadid; -+}; -+ -+#define MAX_UNION_SIZE 16 -+struct mcinfo_recovery { -+ struct mcinfo_common common; -+ uint16_t mc_bank; /* bank nr */ -+ /* Recovery Action Flags defined above such as REC_ACTION_DONE */ -+ uint8_t action_flags; -+ /* Recovery Action types defined above such as MC_ACTION_PAGE_OFFLINE */ -+ uint8_t action_types; -+ /* In future if more than one recovery action permitted per error bank, -+ * a mcinfo_recovery data array will be returned -+ */ -+ union { -+ struct page_offline_action page_retire; -+ struct cpu_offline_action cpu_offline; -+ uint8_t pad[MAX_UNION_SIZE]; -+ } action_info; -+}; -+ -+ -+#define MCINFO_HYPERCALLSIZE 1024 -+#define MCINFO_MAXSIZE 768 -+ -+struct mc_info { -+ /* Number of mcinfo_* entries in mi_data */ -+ uint32_t mi_nentries; -+ uint32_t _pad0; -+ uint64_t mi_data[(MCINFO_MAXSIZE - 1) / 8]; -+}; -+typedef struct mc_info mc_info_t; -+DEFINE_GUEST_HANDLE_STRUCT(mc_info); -+ -+#define __MC_MSR_ARRAYSIZE 8 -+#define __MC_NMSRS 1 -+#define MC_NCAPS 7 /* 7 CPU feature flag words */ -+#define MC_CAPS_STD_EDX 0 /* cpuid level 0x00000001 (%edx) */ -+#define MC_CAPS_AMD_EDX 1 /* cpuid level 0x80000001 (%edx) */ -+#define MC_CAPS_TM 2 /* cpuid level 0x80860001 (TransMeta) */ -+#define MC_CAPS_LINUX 3 /* Linux-defined */ -+#define MC_CAPS_STD_ECX 4 /* cpuid level 0x00000001 (%ecx) */ -+#define MC_CAPS_VIA 5 /* cpuid level 0xc0000001 */ -+#define MC_CAPS_AMD_ECX 6 /* cpuid level 0x80000001 (%ecx) */ -+ -+struct mcinfo_logical_cpu { -+ uint32_t mc_cpunr; -+ uint32_t mc_chipid; -+ uint16_t mc_coreid; -+ uint16_t mc_threadid; -+ uint32_t mc_apicid; -+ uint32_t mc_clusterid; -+ uint32_t mc_ncores; -+ uint32_t mc_ncores_active; -+ uint32_t mc_nthreads; -+ int32_t mc_cpuid_level; -+ uint32_t mc_family; -+ uint32_t mc_vendor; -+ uint32_t mc_model; -+ uint32_t mc_step; -+ char mc_vendorid[16]; -+ char mc_brandid[64]; -+ uint32_t mc_cpu_caps[MC_NCAPS]; -+ uint32_t mc_cache_size; -+ uint32_t mc_cache_alignment; -+ int32_t mc_nmsrvals; -+ struct mcinfo_msr mc_msrvalues[__MC_MSR_ARRAYSIZE]; -+}; -+typedef struct mcinfo_logical_cpu mcinfo_logical_cpu_t; -+DEFINE_GUEST_HANDLE_STRUCT(mcinfo_logical_cpu); -+ -+ -+/* -+ * OS's should use these instead of writing their own lookup function -+ * each with its own bugs and drawbacks. -+ * We use macros instead of static inline functions to allow guests -+ * to include this header in assembly files (*.S). -+ */ -+/* Prototype: -+ * uint32_t x86_mcinfo_nentries(struct mc_info *mi); -+ */ -+#define x86_mcinfo_nentries(_mi) \ -+ ((_mi)->mi_nentries) -+/* Prototype: -+ * struct mcinfo_common *x86_mcinfo_first(struct mc_info *mi); -+ */ -+#define x86_mcinfo_first(_mi) \ -+ ((struct mcinfo_common *)(_mi)->mi_data) -+/* Prototype: -+ * struct mcinfo_common *x86_mcinfo_next(struct mcinfo_common *mic); -+ */ -+#define x86_mcinfo_next(_mic) \ -+ ((struct mcinfo_common *)((uint8_t *)(_mic) + (_mic)->size)) -+ -+/* Prototype: -+ * void x86_mcinfo_lookup(void *ret, struct mc_info *mi, uint16_t type); -+ */ -+ -+static inline void x86_mcinfo_lookup -+ (struct mcinfo_common **ret, struct mc_info *mi, uint16_t type) -+{ -+ uint32_t found = 0, i; -+ struct mcinfo_common *mic; -+ -+ *ret = NULL; -+ if (!mi) -+ return; -+ mic = x86_mcinfo_first(mi); -+ -+ for (i = 0; i < x86_mcinfo_nentries(mi); i++) { -+ if (mic->type == type) { -+ found = 1; -+ break; -+ } -+ mic = x86_mcinfo_next(mic); -+ } -+ -+ *ret = found ? mic : NULL; -+} -+/* Usecase 1 -+ * Register machine check trap callback handler -+ * (already done via "set_trap_table" hypercall) -+ */ -+ -+/* Usecase 2 -+ * Dom0 registers machine check event callback handler -+ * done by EVTCHNOP_bind_virq -+ */ -+ -+/* Usecase 3 -+ * Fetch machine check data from hypervisor. -+ * Note, this hypercall is special, because both Dom0 and DomU must use this. -+ */ -+#define XEN_MC_fetch 1 -+struct xen_mc_fetch { -+ /* IN/OUT variables. -+ * IN: XEN_MC_NONURGENT, XEN_MC_URGENT, -+ * XEN_MC_ACK if ack'king an earlier fetch -+ * OUT: XEN_MC_OK, XEN_MC_FETCHAILED, -+ * XEN_MC_NODATA, XEN_MC_NOMATCH -+ */ -+ uint32_t flags; -+ uint32_t _pad0; -+ /* OUT: id for ack, IN: id we are ack'ing */ -+ uint64_t fetch_id; -+ -+ /* OUT variables. */ -+ GUEST_HANDLE(mc_info) data; -+}; -+typedef struct xen_mc_fetch xen_mc_fetch_t; -+DEFINE_GUEST_HANDLE_STRUCT(xen_mc_fetch); -+ -+ -+/* Usecase 4 -+ * This tells the hypervisor to notify a DomU about the machine check error -+ */ -+#define XEN_MC_notifydomain 2 -+struct xen_mc_notifydomain { -+ /* IN variables. */ -+ uint16_t mc_domid;/* The unprivileged domain to notify. */ -+ uint16_t mc_vcpuid;/* The vcpu in mc_domid to notify. -+ * Usually echo'd value from the fetch hypercall. */ -+ -+ /* IN/OUT variables. */ -+ uint32_t flags; -+ -+/* OUT: XEN_MC_OK, XEN_MC_CANNOTHANDLE, XEN_MC_NOTDELIVERED, XEN_MC_NOMATCH */ -+}; -+typedef struct xen_mc_notifydomain xen_mc_notifydomain_t; -+DEFINE_GUEST_HANDLE_STRUCT(xen_mc_notifydomain); -+ -+#define XEN_MC_physcpuinfo 3 -+struct xen_mc_physcpuinfo { -+ /* IN/OUT */ -+ uint32_t ncpus; -+ uint32_t _pad0; -+ /* OUT */ -+ GUEST_HANDLE(mcinfo_logical_cpu) info; -+}; -+ -+#define XEN_MC_msrinject 4 -+#define MC_MSRINJ_MAXMSRS 8 -+struct xen_mc_msrinject { -+ /* IN */ -+ uint32_t mcinj_cpunr;/* target processor id */ -+ uint32_t mcinj_flags;/* see MC_MSRINJ_F_* below */ -+ uint32_t mcinj_count;/* 0 .. count-1 in array are valid */ -+ uint32_t _pad0; -+ struct mcinfo_msr mcinj_msr[MC_MSRINJ_MAXMSRS]; -+}; -+ -+/* Flags for mcinj_flags above; bits 16-31 are reserved */ -+#define MC_MSRINJ_F_INTERPOSE 0x1 -+ -+#define XEN_MC_mceinject 5 -+struct xen_mc_mceinject { -+ unsigned int mceinj_cpunr; /* target processor id */ -+}; -+ -+struct xen_mc { -+ uint32_t cmd; -+ uint32_t interface_version; /* XEN_MCA_INTERFACE_VERSION */ -+ union { -+ struct xen_mc_fetch mc_fetch; -+ struct xen_mc_notifydomain mc_notifydomain; -+ struct xen_mc_physcpuinfo mc_physcpuinfo; -+ struct xen_mc_msrinject mc_msrinject; -+ struct xen_mc_mceinject mc_mceinject; -+ } u; -+}; -+typedef struct xen_mc xen_mc_t; -+DEFINE_GUEST_HANDLE_STRUCT(xen_mc); -+ -+#endif /* __ASSEMBLY__ */ -+ -+#endif /* __XEN_PUBLIC_ARCH_X86_MCA_H__ */ -diff --git a/include/xen/interface/xen.h b/include/xen/interface/xen.h -index 2befa3e..9ffaee0 100644 ---- a/include/xen/interface/xen.h -+++ b/include/xen/interface/xen.h -@@ -79,6 +79,7 @@ - #define VIRQ_CONSOLE 2 /* (DOM0) Bytes received on emergency console. */ - #define VIRQ_DOM_EXC 3 /* (DOM0) Exceptional event for some domain. */ - #define VIRQ_DEBUGGER 6 /* (DOM0) A domain has paused for debugging. */ -+#define VIRQ_PCPU_STATE 9 /* (DOM0) PCPU state changed */ - - /* Architecture-specific VIRQ definitions. */ - #define VIRQ_ARCH_0 16 -@@ -184,6 +185,8 @@ - #define MMUEXT_NEW_USER_BASEPTR 15 - - #ifndef __ASSEMBLY__ -+#include <linux/types.h> -+ - struct mmuext_op { - unsigned int cmd; - union { -@@ -449,9 +452,49 @@ struct start_info { - int8_t cmd_line[MAX_GUEST_CMDLINE]; - }; - -+struct dom0_vga_console_info { -+ uint8_t video_type; /* DOM0_VGA_CONSOLE_??? */ -+#define XEN_VGATYPE_TEXT_MODE_3 0x03 -+#define XEN_VGATYPE_VESA_LFB 0x23 -+ -+ union { -+ struct { -+ /* Font height, in pixels. */ -+ uint16_t font_height; -+ /* Cursor location (column, row). */ -+ uint16_t cursor_x, cursor_y; -+ /* Number of rows and columns (dimensions in characters). */ -+ uint16_t rows, columns; -+ } text_mode_3; -+ -+ struct { -+ /* Width and height, in pixels. */ -+ uint16_t width, height; -+ /* Bytes per scan line. */ -+ uint16_t bytes_per_line; -+ /* Bits per pixel. */ -+ uint16_t bits_per_pixel; -+ /* LFB physical address, and size (in units of 64kB). */ -+ uint32_t lfb_base; -+ uint32_t lfb_size; -+ /* RGB mask offsets and sizes, as defined by VBE 1.2+ */ -+ uint8_t red_pos, red_size; -+ uint8_t green_pos, green_size; -+ uint8_t blue_pos, blue_size; -+ uint8_t rsvd_pos, rsvd_size; -+ -+ /* VESA capabilities (offset 0xa, VESA command 0x4f00). */ -+ uint32_t gbl_caps; -+ /* Mode attributes (offset 0x0, VESA command 0x4f01). */ -+ uint16_t mode_attrs; -+ } vesa_lfb; -+ } u; -+}; -+ - /* These flags are passed in the 'flags' field of start_info_t. */ - #define SIF_PRIVILEGED (1<<0) /* Is the domain privileged? */ - #define SIF_INITDOMAIN (1<<1) /* Is this the initial control domain? */ -+#define SIF_PM_MASK (0xFF<<8) /* reserve 1 byte for xen-pm options */ - - typedef uint64_t cpumap_t; - -@@ -461,6 +504,8 @@ typedef uint8_t xen_domain_handle_t[16]; - #define __mk_unsigned_long(x) x ## UL - #define mk_unsigned_long(x) __mk_unsigned_long(x) - -+DEFINE_GUEST_HANDLE(uint64_t); -+ - #else /* __ASSEMBLY__ */ - - /* In assembly code we cannot use C numeric constant suffixes. */ -diff --git a/include/xen/page.h b/include/xen/page.h -index eaf85fa..0be36b9 100644 ---- a/include/xen/page.h -+++ b/include/xen/page.h -@@ -1 +1,8 @@ -+#ifndef _XEN_PAGE_H -+#define _XEN_PAGE_H -+ - #include <asm/xen/page.h> -+ -+extern phys_addr_t xen_extra_mem_start, xen_extra_mem_size; -+ -+#endif /* _XEN_PAGE_H */ -diff --git a/include/xen/pcpu.h b/include/xen/pcpu.h -new file mode 100644 -index 0000000..7e8f9d1 ---- /dev/null -+++ b/include/xen/pcpu.h -@@ -0,0 +1,32 @@ -+#ifndef _XEN_PCPU_H -+#define _XEN_PCPU_H -+ -+#include <xen/interface/platform.h> -+#include <linux/sysdev.h> -+ -+extern int xen_pcpu_hotplug(int type, uint32_t apic_id); -+#define XEN_PCPU_ONLINE 0x01 -+#define XEN_PCPU_OFFLINE 0x02 -+#define XEN_PCPU_ADD 0x04 -+#define XEN_PCPU_REMOVE 0x08 -+ -+struct pcpu { -+ struct list_head pcpu_list; -+ struct sys_device sysdev; -+ uint32_t xen_id; -+ uint32_t apic_id; -+ uint32_t acpi_id; -+ uint32_t flags; -+}; -+ -+static inline int xen_pcpu_online(uint32_t flags) -+{ -+ return !!(flags & XEN_PCPU_FLAGS_ONLINE); -+} -+ -+extern int register_xen_pcpu_notifier(struct notifier_block *nb); -+ -+extern void unregister_xen_pcpu_notifier(struct notifier_block *nb); -+ -+extern int xen_pcpu_index(uint32_t acpi_id, int is_acpiid); -+#endif -diff --git a/include/xen/platform_pci.h b/include/xen/platform_pci.h -new file mode 100644 -index 0000000..a785a3b ---- /dev/null -+++ b/include/xen/platform_pci.h -@@ -0,0 +1,53 @@ -+#ifndef _XEN_PLATFORM_PCI_H -+#define _XEN_PLATFORM_PCI_H -+ -+#define XEN_IOPORT_MAGIC_VAL 0x49d2 -+#define XEN_IOPORT_LINUX_PRODNUM 0x0003 -+#define XEN_IOPORT_LINUX_DRVVER 0x0001 -+ -+#define XEN_IOPORT_BASE 0x10 -+ -+#define XEN_IOPORT_PLATFLAGS (XEN_IOPORT_BASE + 0) /* 1 byte access (R/W) */ -+#define XEN_IOPORT_MAGIC (XEN_IOPORT_BASE + 0) /* 2 byte access (R) */ -+#define XEN_IOPORT_UNPLUG (XEN_IOPORT_BASE + 0) /* 2 byte access (W) */ -+#define XEN_IOPORT_DRVVER (XEN_IOPORT_BASE + 0) /* 4 byte access (W) */ -+ -+#define XEN_IOPORT_SYSLOG (XEN_IOPORT_BASE + 2) /* 1 byte access (W) */ -+#define XEN_IOPORT_PROTOVER (XEN_IOPORT_BASE + 2) /* 1 byte access (R) */ -+#define XEN_IOPORT_PRODNUM (XEN_IOPORT_BASE + 2) /* 2 byte access (W) */ -+ -+#define XEN_UNPLUG_ALL_IDE_DISKS (1<<0) -+#define XEN_UNPLUG_ALL_NICS (1<<1) -+#define XEN_UNPLUG_AUX_IDE_DISKS (1<<2) -+#define XEN_UNPLUG_ALL (XEN_UNPLUG_ALL_IDE_DISKS|\ -+ XEN_UNPLUG_ALL_NICS|\ -+ XEN_UNPLUG_AUX_IDE_DISKS) -+ -+#define XEN_UNPLUG_UNNECESSARY (1<<16) -+#define XEN_UNPLUG_NEVER (1<<17) -+ -+static inline int xen_must_unplug_nics(void) { -+#if (defined(CONFIG_XEN_NETDEV_FRONTEND) || \ -+ defined(CONFIG_XEN_NETDEV_FRONTEND_MODULE)) && \ -+ (defined(CONFIG_XEN_PLATFORM_PCI) || \ -+ defined(CONFIG_XEN_PLATFORM_PCI_MODULE)) -+ return 1; -+#else -+ return 0; -+#endif -+} -+ -+static inline int xen_must_unplug_disks(void) { -+#if (defined(CONFIG_XEN_BLKDEV_FRONTEND) || \ -+ defined(CONFIG_XEN_BLKDEV_FRONTEND_MODULE)) && \ -+ (defined(CONFIG_XEN_PLATFORM_PCI) || \ -+ defined(CONFIG_XEN_PLATFORM_PCI_MODULE)) -+ return 1; -+#else -+ return 0; -+#endif -+} -+ -+extern int xen_platform_pci_unplug; -+ -+#endif /* _XEN_PLATFORM_PCI_H */ -diff --git a/include/xen/privcmd.h b/include/xen/privcmd.h -new file mode 100644 -index 0000000..b42cdfd ---- /dev/null -+++ b/include/xen/privcmd.h -@@ -0,0 +1,80 @@ -+/****************************************************************************** -+ * privcmd.h -+ * -+ * Interface to /proc/xen/privcmd. -+ * -+ * Copyright (c) 2003-2005, K A Fraser -+ * -+ * This program is free software; you can redistribute it and/or -+ * modify it under the terms of the GNU General Public License version 2 -+ * as published by the Free Software Foundation; or, when distributed -+ * separately from the Linux kernel or incorporated into other -+ * software packages, subject to the following license: -+ * -+ * Permission is hereby granted, free of charge, to any person obtaining a copy -+ * of this source file (the "Software"), to deal in the Software without -+ * restriction, including without limitation the rights to use, copy, modify, -+ * merge, publish, distribute, sublicense, and/or sell copies of the Software, -+ * and to permit persons to whom the Software is furnished to do so, subject to -+ * the following conditions: -+ * -+ * The above copyright notice and this permission notice shall be included in -+ * all copies or substantial portions of the Software. -+ * -+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS -+ * IN THE SOFTWARE. -+ */ -+ -+#ifndef __LINUX_PUBLIC_PRIVCMD_H__ -+#define __LINUX_PUBLIC_PRIVCMD_H__ -+ -+#include <linux/types.h> -+ -+typedef unsigned long xen_pfn_t; -+ -+#ifndef __user -+#define __user -+#endif -+ -+struct privcmd_hypercall { -+ __u64 op; -+ __u64 arg[5]; -+}; -+ -+struct privcmd_mmap_entry { -+ __u64 va; -+ __u64 mfn; -+ __u64 npages; -+}; -+ -+struct privcmd_mmap { -+ int num; -+ domid_t dom; /* target domain */ -+ struct privcmd_mmap_entry __user *entry; -+}; -+ -+struct privcmd_mmapbatch { -+ int num; /* number of pages to populate */ -+ domid_t dom; /* target domain */ -+ __u64 addr; /* virtual address */ -+ xen_pfn_t __user *arr; /* array of mfns - top nibble set on err */ -+}; -+ -+/* -+ * @cmd: IOCTL_PRIVCMD_HYPERCALL -+ * @arg: &privcmd_hypercall_t -+ * Return: Value returned from execution of the specified hypercall. -+ */ -+#define IOCTL_PRIVCMD_HYPERCALL \ -+ _IOC(_IOC_NONE, 'P', 0, sizeof(struct privcmd_hypercall)) -+#define IOCTL_PRIVCMD_MMAP \ -+ _IOC(_IOC_NONE, 'P', 2, sizeof(struct privcmd_mmap)) -+#define IOCTL_PRIVCMD_MMAPBATCH \ -+ _IOC(_IOC_NONE, 'P', 3, sizeof(struct privcmd_mmapbatch)) -+ -+#endif /* __LINUX_PUBLIC_PRIVCMD_H__ */ -diff --git a/include/xen/xen-ops.h b/include/xen/xen-ops.h -index 883a21b..7058f8a 100644 ---- a/include/xen/xen-ops.h -+++ b/include/xen/xen-ops.h -@@ -7,6 +7,7 @@ DECLARE_PER_CPU(struct vcpu_info *, xen_vcpu); - - void xen_pre_suspend(void); - void xen_post_suspend(int suspend_cancelled); -+void xen_hvm_post_suspend(int suspend_cancelled); - - void xen_mm_pin_all(void); - void xen_mm_unpin_all(void); -@@ -14,4 +15,16 @@ void xen_mm_unpin_all(void); - void xen_timer_resume(void); - void xen_arch_resume(void); - -+int xen_remap_domain_mfn_range(struct vm_area_struct *vma, -+ unsigned long addr, -+ unsigned long mfn, int nr, -+ pgprot_t prot, unsigned domid); -+ -+extern unsigned long *xen_contiguous_bitmap; -+int xen_create_contiguous_region(unsigned long vstart, unsigned int order, -+ unsigned int address_bits); -+ -+void xen_destroy_contiguous_region(unsigned long vstart, unsigned int order); -+int xen_setup_shutdown_event(void); -+ - #endif /* INCLUDE_XEN_OPS_H */ -diff --git a/include/xen/xen.h b/include/xen/xen.h -new file mode 100644 -index 0000000..77604ed ---- /dev/null -+++ b/include/xen/xen.h -@@ -0,0 +1,34 @@ -+#ifndef _XEN_XEN_H -+#define _XEN_XEN_H -+ -+enum xen_domain_type { -+ XEN_NATIVE, /* running on bare hardware */ -+ XEN_PV_DOMAIN, /* running in a PV domain */ -+ XEN_HVM_DOMAIN, /* running in a Xen hvm domain */ -+}; -+ -+#ifdef CONFIG_XEN -+extern enum xen_domain_type xen_domain_type; -+extern void xen_hvm_guest_init(void); -+#else -+#define xen_domain_type XEN_NATIVE -+#define xen_hvm_guest_init() do { } while (0) -+#endif -+ -+#define xen_domain() (xen_domain_type != XEN_NATIVE) -+#define xen_pv_domain() (xen_domain() && \ -+ xen_domain_type == XEN_PV_DOMAIN) -+#define xen_hvm_domain() (xen_domain() && \ -+ xen_domain_type == XEN_HVM_DOMAIN) -+ -+#ifdef CONFIG_XEN_DOM0 -+#include <xen/interface/xen.h> -+#include <asm/xen/hypervisor.h> -+ -+#define xen_initial_domain() (xen_pv_domain() && \ -+ xen_start_info->flags & SIF_INITDOMAIN) -+#else /* !CONFIG_XEN_DOM0 */ -+#define xen_initial_domain() (0) -+#endif /* CONFIG_XEN_DOM0 */ -+ -+#endif /* _XEN_XEN_H */ -diff --git a/include/xen/xenbus.h b/include/xen/xenbus.h -index b9763ba..542ca7c 100644 ---- a/include/xen/xenbus.h -+++ b/include/xen/xenbus.h -@@ -93,7 +93,7 @@ struct xenbus_driver { - int (*remove)(struct xenbus_device *dev); - int (*suspend)(struct xenbus_device *dev, pm_message_t state); - int (*resume)(struct xenbus_device *dev); -- int (*uevent)(struct xenbus_device *, char **, int, char *, int); -+ int (*uevent)(struct xenbus_device *, struct kobj_uevent_env *); - struct device_driver driver; - int (*read_otherend_details)(struct xenbus_device *dev); - int (*is_ready)(struct xenbus_device *dev); -diff --git a/lib/Makefile b/lib/Makefile -index 452f188..001e918 100644 ---- a/lib/Makefile -+++ b/lib/Makefile -@@ -77,7 +77,8 @@ obj-$(CONFIG_TEXTSEARCH_FSM) += ts_fsm.o - obj-$(CONFIG_SMP) += percpu_counter.o - obj-$(CONFIG_AUDIT_GENERIC) += audit.o - --obj-$(CONFIG_SWIOTLB) += swiotlb.o -+obj-$(CONFIG_SWIOTLB) += swiotlb-core.o swiotlb.o -+obj-$(CONFIG_SWIOTLB_XEN) += swiotlb-xen.o - obj-$(CONFIG_IOMMU_HELPER) += iommu-helper.o - obj-$(CONFIG_FAULT_INJECTION) += fault-inject.o - -diff --git a/lib/swiotlb-core.c b/lib/swiotlb-core.c -new file mode 100644 -index 0000000..a17c89e ---- /dev/null -+++ b/lib/swiotlb-core.c -@@ -0,0 +1,572 @@ -+/* -+ * Dynamic DMA mapping support. -+ * -+ * This implementation is a fallback for platforms that do not support -+ * I/O TLBs (aka DMA address translation hardware). -+ * Copyright (C) 2000 Asit Mallick <Asit.K.Mallick@intel.com> -+ * Copyright (C) 2000 Goutham Rao <goutham.rao@intel.com> -+ * Copyright (C) 2000, 2003 Hewlett-Packard Co -+ * David Mosberger-Tang <davidm@hpl.hp.com> -+ * -+ * 03/05/07 davidm Switch from PCI-DMA to generic device DMA API. -+ * 00/12/13 davidm Rename to swiotlb.c and add mark_clean() to avoid -+ * unnecessary i-cache flushing. -+ * 04/07/.. ak Better overflow handling. Assorted fixes. -+ * 05/09/10 linville Add support for syncing ranges, support syncing for -+ * DMA_BIDIRECTIONAL mappings, miscellaneous cleanup. -+ * 08/12/11 beckyb Add highmem support -+ */ -+ -+#include <linux/cache.h> -+#include <linux/dma-mapping.h> -+#include <linux/mm.h> -+#include <linux/module.h> -+#include <linux/spinlock.h> -+#include <linux/string.h> -+#include <linux/swiotlb.h> -+#include <linux/pfn.h> -+#include <linux/types.h> -+#include <linux/ctype.h> -+#include <linux/highmem.h> -+ -+#include <linux/io.h> -+#include <asm/dma.h> -+#include <linux/scatterlist.h> -+ -+#include <linux/init.h> -+#include <linux/bootmem.h> -+#include <linux/iommu-helper.h> -+ -+#define OFFSET(val, align) ((unsigned long) ((val) & ((align) - 1))) -+ -+#define SLABS_PER_PAGE (1 << (PAGE_SHIFT - IO_TLB_SHIFT)) -+ -+/* -+ * Minimum IO TLB size to bother booting with. Systems with mainly -+ * 64bit capable cards will only lightly use the swiotlb. If we can't -+ * allocate a contiguous 1MB, we're probably in trouble anyway. -+ */ -+#define IO_TLB_MIN_SLABS ((1<<20) >> IO_TLB_SHIFT) -+ -+int swiotlb_force; -+ -+/* -+ * Used to do a quick range check in do_unmap_single and -+ * do_sync_single_*, to see if the memory was in fact allocated by this -+ * API. -+ */ -+char *io_tlb_start, *io_tlb_end; -+ -+/* -+ * The number of IO TLB blocks (in groups of 64) betweeen io_tlb_start and -+ * io_tlb_end. This is command line adjustable via setup_io_tlb_npages. -+ */ -+unsigned long io_tlb_nslabs; -+ -+/* -+ * When the IOMMU overflows we return a fallback buffer. This sets the size. -+ */ -+unsigned long io_tlb_overflow = 32*1024; -+ -+void *io_tlb_overflow_buffer; -+ -+/* -+ * This is a free list describing the number of free entries available from -+ * each index -+ */ -+static unsigned int *io_tlb_list; -+static unsigned int io_tlb_index; -+ -+/* -+ * We need to save away the original address corresponding to a mapped entry -+ * for the sync operations. -+ */ -+static phys_addr_t *io_tlb_orig_addr; -+ -+/* -+ * Protect the above data structures in the map and unmap calls -+ */ -+static DEFINE_SPINLOCK(io_tlb_lock); -+ -+static int late_alloc; -+ -+static int __init -+setup_io_tlb_npages(char *str) -+{ -+ int get_value(const char *token, char *str, char **endp) -+ { -+ ssize_t len; -+ int val = 0; -+ -+ len = strlen(token); -+ if (!strncmp(str, token, len)) { -+ str += len; -+ if (*str == '=') -+ ++str; -+ if (*str != '\0') -+ val = simple_strtoul(str, endp, 0); -+ } -+ *endp = str; -+ return val; -+ } -+ -+ int val; -+ -+ while (*str) { -+ /* The old syntax */ -+ if (isdigit(*str)) { -+ io_tlb_nslabs = simple_strtoul(str, &str, 0); -+ /* avoid tail segment of size < IO_TLB_SEGSIZE */ -+ io_tlb_nslabs = ALIGN(io_tlb_nslabs, IO_TLB_SEGSIZE); -+ } -+ if (!strncmp(str, "force", 5)) -+ swiotlb_force = 1; -+ /* The new syntax: swiotlb=nslabs=16384,overflow=32768,force */ -+ val = get_value("nslabs", str, &str); -+ if (val) -+ io_tlb_nslabs = ALIGN(val, IO_TLB_SEGSIZE); -+ -+ val = get_value("overflow", str, &str); -+ if (val) -+ io_tlb_overflow = val; -+ str = strpbrk(str, ","); -+ if (!str) -+ break; -+ str++; /* skip ',' */ -+ } -+ return 1; -+} -+__setup("swiotlb=", setup_io_tlb_npages); -+ -+void swiotlb_print_info(void) -+{ -+ unsigned long bytes = io_tlb_nslabs << IO_TLB_SHIFT; -+ phys_addr_t pstart, pend; -+ -+ pstart = virt_to_phys(io_tlb_start); -+ pend = virt_to_phys(io_tlb_end); -+ -+ printk(KERN_INFO "DMA: Placing %luMB software IO TLB between %p - %p\n", -+ bytes >> 20, io_tlb_start, io_tlb_end); -+ printk(KERN_INFO "DMA: software IO TLB at phys %#llx - %#llx\n", -+ (unsigned long long)pstart, -+ (unsigned long long)pend); -+} -+ -+/* -+ * Statically reserve bounce buffer space and initialize bounce buffer data -+ * structures for the software IO TLB used to implement the DMA API. -+ */ -+void __init -+swiotlb_init_early(size_t default_size, int verbose) -+{ -+ unsigned long i, bytes; -+ -+ if (!io_tlb_nslabs) { -+ io_tlb_nslabs = (default_size >> IO_TLB_SHIFT); -+ io_tlb_nslabs = ALIGN(io_tlb_nslabs, IO_TLB_SEGSIZE); -+ } -+ -+ bytes = io_tlb_nslabs << IO_TLB_SHIFT; -+ -+ /* -+ * Get IO TLB memory from the low pages -+ */ -+ io_tlb_start = alloc_bootmem_low_pages(bytes); -+ if (!io_tlb_start) -+ panic("DMA: Cannot allocate SWIOTLB buffer"); -+ io_tlb_end = io_tlb_start + bytes; -+ -+ /* -+ * Allocate and initialize the free list array. This array is used -+ * to find contiguous free memory regions of size up to IO_TLB_SEGSIZE -+ * between io_tlb_start and io_tlb_end. -+ */ -+ io_tlb_list = alloc_bootmem(io_tlb_nslabs * sizeof(int)); -+ for (i = 0; i < io_tlb_nslabs; i++) -+ io_tlb_list[i] = IO_TLB_SEGSIZE - OFFSET(i, IO_TLB_SEGSIZE); -+ io_tlb_index = 0; -+ io_tlb_orig_addr = alloc_bootmem(io_tlb_nslabs * sizeof(phys_addr_t)); -+ -+ /* -+ * Get the overflow emergency buffer -+ */ -+ io_tlb_overflow_buffer = alloc_bootmem_low(io_tlb_overflow); -+ if (!io_tlb_overflow_buffer) -+ panic("DMA: Cannot allocate SWIOTLB overflow buffer!\n"); -+ if (verbose) -+ swiotlb_print_info(); -+} -+ -+void __init -+swiotlb_init(int verbose) -+{ -+ swiotlb_init_early(64 * (1<<20), verbose); /* default to 64MB */ -+} -+ -+/* -+ * Systems with larger DMA zones (those that don't support ISA) can -+ * initialize the swiotlb later using the slab allocator if needed. -+ * This should be just like above, but with some error catching. -+ */ -+int -+swiotlb_init_late(size_t default_size) -+{ -+ unsigned long i, bytes, req_nslabs = io_tlb_nslabs; -+ unsigned int order; -+ -+ if (!io_tlb_nslabs) { -+ io_tlb_nslabs = (default_size >> IO_TLB_SHIFT); -+ io_tlb_nslabs = ALIGN(io_tlb_nslabs, IO_TLB_SEGSIZE); -+ } -+ -+ /* -+ * Get IO TLB memory from the low pages -+ */ -+ order = get_order(io_tlb_nslabs << IO_TLB_SHIFT); -+ io_tlb_nslabs = SLABS_PER_PAGE << order; -+ bytes = io_tlb_nslabs << IO_TLB_SHIFT; -+ -+ while ((SLABS_PER_PAGE << order) > IO_TLB_MIN_SLABS) { -+ io_tlb_start = (void *)__get_free_pages(GFP_DMA | __GFP_NOWARN, -+ order); -+ if (io_tlb_start) -+ break; -+ order--; -+ } -+ -+ if (!io_tlb_start) -+ goto cleanup1; -+ -+ if (order != get_order(bytes)) { -+ printk(KERN_WARNING "DMA: Warning: only able to allocate %ld MB" -+ " for software IO TLB\n", (PAGE_SIZE << order) >> 20); -+ io_tlb_nslabs = SLABS_PER_PAGE << order; -+ bytes = io_tlb_nslabs << IO_TLB_SHIFT; -+ } -+ io_tlb_end = io_tlb_start + bytes; -+ memset(io_tlb_start, 0, bytes); -+ -+ /* -+ * Allocate and initialize the free list array. This array is used -+ * to find contiguous free memory regions of size up to IO_TLB_SEGSIZE -+ * between io_tlb_start and io_tlb_end. -+ */ -+ io_tlb_list = (unsigned int *)__get_free_pages(GFP_KERNEL, -+ get_order(io_tlb_nslabs * sizeof(int))); -+ if (!io_tlb_list) -+ goto cleanup2; -+ -+ for (i = 0; i < io_tlb_nslabs; i++) -+ io_tlb_list[i] = IO_TLB_SEGSIZE - OFFSET(i, IO_TLB_SEGSIZE); -+ io_tlb_index = 0; -+ -+ io_tlb_orig_addr = (phys_addr_t *) __get_free_pages(GFP_KERNEL, -+ get_order(io_tlb_nslabs * sizeof(phys_addr_t))); -+ if (!io_tlb_orig_addr) -+ goto cleanup3; -+ -+ memset(io_tlb_orig_addr, 0, io_tlb_nslabs * sizeof(phys_addr_t)); -+ -+ /* -+ * Get the overflow emergency buffer -+ */ -+ io_tlb_overflow_buffer = (void *)__get_free_pages(GFP_DMA, -+ get_order(io_tlb_overflow)); -+ if (!io_tlb_overflow_buffer) -+ goto cleanup4; -+ -+ swiotlb_print_info(); -+ -+ late_alloc = 1; -+ -+ return 0; -+ -+cleanup4: -+ free_pages((unsigned long)io_tlb_orig_addr, -+ get_order(io_tlb_nslabs * sizeof(phys_addr_t))); -+ io_tlb_orig_addr = NULL; -+cleanup3: -+ free_pages((unsigned long)io_tlb_list, -+ get_order(io_tlb_nslabs * sizeof(int))); -+ io_tlb_list = NULL; -+cleanup2: -+ io_tlb_end = NULL; -+ free_pages((unsigned long)io_tlb_start, order); -+ io_tlb_start = NULL; -+cleanup1: -+ io_tlb_nslabs = req_nslabs; -+ return -ENOMEM; -+} -+ -+void __init swiotlb_free(void) -+{ -+ if (!io_tlb_overflow_buffer) -+ return; -+ -+ if (late_alloc) { -+ free_pages((unsigned long)io_tlb_overflow_buffer, -+ get_order(io_tlb_overflow)); -+ free_pages((unsigned long)io_tlb_orig_addr, -+ get_order(io_tlb_nslabs * sizeof(phys_addr_t))); -+ free_pages((unsigned long)io_tlb_list, get_order(io_tlb_nslabs * -+ sizeof(int))); -+ free_pages((unsigned long)io_tlb_start, -+ get_order(io_tlb_nslabs << IO_TLB_SHIFT)); -+ } else { -+ free_bootmem_late(__pa(io_tlb_overflow_buffer), -+ io_tlb_overflow); -+ free_bootmem_late(__pa(io_tlb_orig_addr), -+ io_tlb_nslabs * sizeof(phys_addr_t)); -+ free_bootmem_late(__pa(io_tlb_list), -+ io_tlb_nslabs * sizeof(int)); -+ free_bootmem_late(__pa(io_tlb_start), -+ io_tlb_nslabs << IO_TLB_SHIFT); -+ } -+} -+ -+int is_swiotlb_buffer(phys_addr_t paddr) -+{ -+ return paddr >= virt_to_phys(io_tlb_start) && -+ paddr < virt_to_phys(io_tlb_end); -+} -+ -+/* -+ * Bounce: copy the swiotlb buffer back to the original dma location -+ */ -+void swiotlb_bounce(phys_addr_t phys, char *dma_addr, size_t size, -+ enum dma_data_direction dir) -+{ -+ unsigned long pfn = PFN_DOWN(phys); -+ -+ if (PageHighMem(pfn_to_page(pfn))) { -+ /* The buffer does not have a mapping. Map it in and copy */ -+ unsigned int offset = phys & ~PAGE_MASK; -+ char *buffer; -+ unsigned int sz = 0; -+ unsigned long flags; -+ -+ while (size) { -+ sz = min_t(size_t, PAGE_SIZE - offset, size); -+ -+ local_irq_save(flags); -+ buffer = kmap_atomic(pfn_to_page(pfn), -+ KM_BOUNCE_READ); -+ if (dir == DMA_TO_DEVICE) -+ memcpy(dma_addr, buffer + offset, sz); -+ else -+ memcpy(buffer + offset, dma_addr, sz); -+ kunmap_atomic(buffer, KM_BOUNCE_READ); -+ local_irq_restore(flags); -+ -+ size -= sz; -+ pfn++; -+ dma_addr += sz; -+ offset = 0; -+ } -+ } else { -+ if (dir == DMA_TO_DEVICE) -+ memcpy(dma_addr, phys_to_virt(phys), size); -+ else -+ memcpy(phys_to_virt(phys), dma_addr, size); -+ } -+} -+ -+/* -+ * Allocates bounce buffer and returns its kernel virtual address. -+ */ -+void * -+do_map_single(struct device *hwdev, phys_addr_t phys, -+ unsigned long start_dma_addr, size_t size, int dir) -+{ -+ unsigned long flags; -+ char *dma_addr; -+ unsigned int nslots, stride, index, wrap; -+ int i; -+ unsigned long mask; -+ unsigned long offset_slots; -+ unsigned long max_slots; -+ -+ mask = dma_get_seg_boundary(hwdev); -+ start_dma_addr = start_dma_addr & mask; -+ offset_slots = ALIGN(start_dma_addr, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT; -+ -+ /* -+ * Carefully handle integer overflow which can occur when mask == ~0UL. -+ */ -+ max_slots = mask + 1 -+ ? ALIGN(mask + 1, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT -+ : 1UL << (BITS_PER_LONG - IO_TLB_SHIFT); -+ -+ /* -+ * For mappings greater than a page, we limit the stride (and -+ * hence alignment) to a page size. -+ */ -+ nslots = ALIGN(size, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT; -+ if (size > PAGE_SIZE) -+ stride = (1 << (PAGE_SHIFT - IO_TLB_SHIFT)); -+ else -+ stride = 1; -+ -+ BUG_ON(!nslots); -+ -+ /* -+ * Find suitable number of IO TLB entries size that will fit this -+ * request and allocate a buffer from that IO TLB pool. -+ */ -+ spin_lock_irqsave(&io_tlb_lock, flags); -+ index = ALIGN(io_tlb_index, stride); -+ if (index >= io_tlb_nslabs) -+ index = 0; -+ wrap = index; -+ -+ do { -+ while (iommu_is_span_boundary(index, nslots, offset_slots, -+ max_slots)) { -+ index += stride; -+ if (index >= io_tlb_nslabs) -+ index = 0; -+ if (index == wrap) -+ goto not_found; -+ } -+ -+ /* -+ * If we find a slot that indicates we have 'nslots' number of -+ * contiguous buffers, we allocate the buffers from that slot -+ * and mark the entries as '0' indicating unavailable. -+ */ -+ if (io_tlb_list[index] >= nslots) { -+ int count = 0; -+ -+ for (i = index; i < (int) (index + nslots); i++) -+ io_tlb_list[i] = 0; -+ for (i = index - 1; (OFFSET(i, IO_TLB_SEGSIZE) -+ != IO_TLB_SEGSIZE - 1) && io_tlb_list[i]; i--) -+ io_tlb_list[i] = ++count; -+ dma_addr = io_tlb_start + (index << IO_TLB_SHIFT); -+ -+ /* -+ * Update the indices to avoid searching in the next -+ * round. -+ */ -+ io_tlb_index = ((index + nslots) < io_tlb_nslabs -+ ? (index + nslots) : 0); -+ -+ goto found; -+ } -+ index += stride; -+ if (index >= io_tlb_nslabs) -+ index = 0; -+ } while (index != wrap); -+ -+not_found: -+ spin_unlock_irqrestore(&io_tlb_lock, flags); -+ return NULL; -+found: -+ spin_unlock_irqrestore(&io_tlb_lock, flags); -+ -+ /* -+ * Save away the mapping from the original address to the DMA address. -+ * This is needed when we sync the memory. Then we sync the buffer if -+ * needed. -+ */ -+ for (i = 0; i < nslots; i++) -+ io_tlb_orig_addr[index+i] = phys + (i << IO_TLB_SHIFT); -+ if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL) -+ swiotlb_bounce(phys, dma_addr, size, DMA_TO_DEVICE); -+ -+ return dma_addr; -+} -+ -+/* -+ * dma_addr is the kernel virtual address of the bounce buffer to unmap. -+ */ -+void -+do_unmap_single(struct device *hwdev, char *dma_addr, size_t size, int dir) -+{ -+ unsigned long flags; -+ int i, count, nslots = ALIGN(size, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT; -+ int index = (dma_addr - io_tlb_start) >> IO_TLB_SHIFT; -+ phys_addr_t phys = io_tlb_orig_addr[index]; -+ -+ /* -+ * First, sync the memory before unmapping the entry -+ */ -+ if (phys && ((dir == DMA_FROM_DEVICE) || (dir == DMA_BIDIRECTIONAL))) -+ swiotlb_bounce(phys, dma_addr, size, DMA_FROM_DEVICE); -+ -+ /* -+ * Return the buffer to the free list by setting the corresponding -+ * entries to indicate the number of contigous entries available. -+ * While returning the entries to the free list, we merge the entries -+ * with slots below and above the pool being returned. -+ */ -+ spin_lock_irqsave(&io_tlb_lock, flags); -+ { -+ count = ((index + nslots) < ALIGN(index + 1, IO_TLB_SEGSIZE) ? -+ io_tlb_list[index + nslots] : 0); -+ /* -+ * Step 1: return the slots to the free list, merging the -+ * slots with superceeding slots -+ */ -+ for (i = index + nslots - 1; i >= index; i--) -+ io_tlb_list[i] = ++count; -+ /* -+ * Step 2: merge the returned slots with the preceding slots, -+ * if available (non zero) -+ */ -+ for (i = index - 1; (OFFSET(i, IO_TLB_SEGSIZE) != -+ IO_TLB_SEGSIZE - 1) && io_tlb_list[i]; i--) -+ io_tlb_list[i] = ++count; -+ } -+ spin_unlock_irqrestore(&io_tlb_lock, flags); -+} -+ -+void -+do_sync_single(struct device *hwdev, char *dma_addr, size_t size, -+ int dir, int target) -+{ -+ int index = (dma_addr - io_tlb_start) >> IO_TLB_SHIFT; -+ phys_addr_t phys = io_tlb_orig_addr[index]; -+ -+ phys += ((unsigned long)dma_addr & ((1 << IO_TLB_SHIFT) - 1)); -+ -+ switch (target) { -+ case SYNC_FOR_CPU: -+ if (likely(dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)) -+ swiotlb_bounce(phys, dma_addr, size, DMA_FROM_DEVICE); -+ else -+ BUG_ON(dir != DMA_TO_DEVICE); -+ break; -+ case SYNC_FOR_DEVICE: -+ if (likely(dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL)) -+ swiotlb_bounce(phys, dma_addr, size, DMA_TO_DEVICE); -+ else -+ BUG_ON(dir != DMA_FROM_DEVICE); -+ break; -+ default: -+ BUG(); -+ } -+} -+void -+swiotlb_full(struct device *dev, size_t size, int dir, int do_panic) -+{ -+ /* -+ * Ran out of IOMMU space for this operation. This is very bad. -+ * Unfortunately the drivers cannot handle this operation properly. -+ * unless they check for dma_mapping_error (most don't) -+ * When the mapping is small enough return a static buffer to limit -+ * the damage, or panic when the transfer is too big. -+ */ -+ dev_err(dev, "DMA: Out of SW-IOMMU space for %zu bytes.", size); -+ -+ if (size <= io_tlb_overflow || !do_panic) -+ return; -+ -+ if (dir == DMA_BIDIRECTIONAL) -+ panic("DMA: Random memory could be DMA accessed\n"); -+ if (dir == DMA_FROM_DEVICE) -+ panic("DMA: Random memory could be DMA written\n"); -+ if (dir == DMA_TO_DEVICE) -+ panic("DMA: Random memory could be DMA read\n"); -+} -diff --git a/lib/swiotlb-xen.c b/lib/swiotlb-xen.c -new file mode 100644 -index 0000000..bee577f ---- /dev/null -+++ b/lib/swiotlb-xen.c -@@ -0,0 +1,504 @@ -+/* An software based IOMMU that utilizes the swiotlb-core fuctionality. -+ * It can function on Xen when there are PCI devices present.*/ -+ -+ -+#include <linux/dma-mapping.h> -+#include <linux/io.h> -+#include <asm/dma.h> -+#include <linux/scatterlist.h> -+#include <xen/interface/xen.h> -+#include <xen/grant_table.h> -+ -+#include <asm/xen/page.h> -+#include <xen/page.h> -+#include <xen/xen-ops.h> -+ -+static dma_addr_t xen_phys_to_bus(phys_addr_t paddr) -+{ -+ return phys_to_machine(XPADDR(paddr)).maddr;; -+} -+ -+static phys_addr_t xen_bus_to_phys(dma_addr_t baddr) -+{ -+ return machine_to_phys(XMADDR(baddr)).paddr; -+} -+ -+static dma_addr_t xen_virt_to_bus(void *address) -+{ -+ return xen_phys_to_bus(virt_to_phys(address)); -+} -+ -+static int check_pages_physically_contiguous(unsigned long pfn, -+ unsigned int offset, -+ size_t length) -+{ -+ unsigned long next_mfn; -+ int i; -+ int nr_pages; -+ -+ next_mfn = pfn_to_mfn(pfn); -+ nr_pages = (offset + length + PAGE_SIZE-1) >> PAGE_SHIFT; -+ -+ for (i = 1; i < nr_pages; i++) { -+ if (pfn_to_mfn(++pfn) != ++next_mfn) -+ return 0; -+ } -+ return 1; -+} -+ -+static int range_straddles_page_boundary(phys_addr_t p, size_t size) -+{ -+ unsigned long pfn = PFN_DOWN(p); -+ unsigned int offset = p & ~PAGE_MASK; -+ -+ if (offset + size <= PAGE_SIZE) -+ return 0; -+ if (check_pages_physically_contiguous(pfn, offset, size)) -+ return 0; -+ return 1; -+} -+ -+ -+bool xen_dma_capable(struct device *dev, dma_addr_t dev_addr, -+ phys_addr_t phys, size_t size) -+{ -+ int rc = 0; -+ -+ rc = dma_capable(dev, dev_addr, size) && -+ !range_straddles_page_boundary(phys, size); -+ return rc; -+} -+ -+static int is_xen_swiotlb_buffer(dma_addr_t dma_addr) -+{ -+ unsigned long mfn = PFN_DOWN(dma_addr); -+ unsigned long pfn = mfn_to_local_pfn(mfn); -+ -+ /* If the address is outside our domain, it CAN have the same virtual -+ * address as another address in our domain. Hence only check address -+ * within our domain. */ -+ if (pfn_valid(pfn)) -+ return is_swiotlb_buffer(PFN_PHYS(pfn)); -+ -+ return 0; -+} -+void * -+xen_swiotlb_alloc_coherent(struct device *hwdev, size_t size, -+ dma_addr_t *dma_handle, gfp_t flags) -+{ -+ void *ret; -+ int order = get_order(size); -+ u64 dma_mask = DMA_BIT_MASK(32); -+ unsigned long vstart; -+ -+ /* -+ * Ignore region specifiers - the kernel's ideas of -+ * pseudo-phys memory layout has nothing to do with the -+ * machine physical layout. We can't allocate highmem -+ * because we can't return a pointer to it. -+ */ -+ flags &= ~(__GFP_DMA | __GFP_HIGHMEM); -+ -+ if (dma_alloc_from_coherent(hwdev, size, dma_handle, &ret)) -+ return ret; -+ -+ vstart = __get_free_pages(flags, order); -+ ret = (void *)vstart; -+ -+ if (hwdev && hwdev->coherent_dma_mask) -+ dma_mask = dma_alloc_coherent_mask(hwdev, flags); -+ -+ if (ret) { -+ if (xen_create_contiguous_region(vstart, order, -+ fls64(dma_mask)) != 0) { -+ free_pages(vstart, order); -+ return NULL; -+ } -+ memset(ret, 0, size); -+ *dma_handle = virt_to_machine(ret).maddr; -+ } -+ return ret; -+} -+EXPORT_SYMBOL(xen_swiotlb_alloc_coherent); -+ -+void -+xen_swiotlb_free_coherent(struct device *hwdev, size_t size, void *vaddr, -+ dma_addr_t dev_addr) -+{ -+ int order = get_order(size); -+ -+ if (dma_release_from_coherent(hwdev, order, vaddr)) -+ return; -+ -+ xen_destroy_contiguous_region((unsigned long)vaddr, order); -+ free_pages((unsigned long)vaddr, order); -+} -+EXPORT_SYMBOL(xen_swiotlb_free_coherent); -+ -+ -+static int max_dma_bits = 32; -+ -+static int -+xen_swiotlb_fixup(void *buf, size_t size, unsigned long nslabs) -+{ -+ int i, rc; -+ int dma_bits; -+ -+ printk(KERN_INFO "xen_swiotlb_fixup: buf=%p size=%zu\n", -+ buf, size); -+ -+ dma_bits = get_order(IO_TLB_SEGSIZE << IO_TLB_SHIFT) + PAGE_SHIFT; -+ -+ i = 0; -+ do { -+ int slabs = min(nslabs - i, (unsigned long)IO_TLB_SEGSIZE); -+ -+ do { -+ rc = xen_create_contiguous_region( -+ (unsigned long)buf + (i << IO_TLB_SHIFT), -+ get_order(slabs << IO_TLB_SHIFT), -+ dma_bits); -+ } while (rc && dma_bits++ < max_dma_bits); -+ if (rc) -+ return rc; -+ -+ i += slabs; -+ } while(i < nslabs); -+ return 0; -+} -+ -+void __init xen_swiotlb_init(int verbose) -+{ -+ int rc = 0; -+ -+ swiotlb_init_early(64 * (1<<20), verbose); -+ -+ if ((rc = xen_swiotlb_fixup(io_tlb_start, -+ io_tlb_nslabs << IO_TLB_SHIFT, -+ io_tlb_nslabs))) -+ goto error; -+ -+ if ((rc = xen_swiotlb_fixup(io_tlb_overflow_buffer, -+ io_tlb_overflow, -+ io_tlb_overflow >> IO_TLB_SHIFT))) -+ goto error; -+ -+ return; -+error: -+ panic("DMA(%d): Failed to exchange pages allocated for DMA with Xen! "\ -+ "We either don't have the permission or you do not have enough"\ -+ "free memory under 4GB!\n", rc); -+} -+ -+/* -+ * Map a single buffer of the indicated size for DMA in streaming mode. The -+ * physical address to use is returned. -+ * -+ * Once the device is given the dma address, the device owns this memory until -+ * either xen_swiotlb_unmap_page or xen_swiotlb_dma_sync_single is performed. -+ */ -+dma_addr_t xen_swiotlb_map_page(struct device *dev, struct page *page, -+ unsigned long offset, size_t size, -+ enum dma_data_direction dir, -+ struct dma_attrs *attrs) -+{ -+ unsigned long start_dma_addr; -+ phys_addr_t phys = page_to_phys(page) + offset; -+ dma_addr_t dev_addr = xen_phys_to_bus(phys); -+ void *map; -+ -+ BUG_ON(dir == DMA_NONE); -+ /* -+ * If the address happens to be in the device's DMA window, -+ * we can safely return the device addr and not worry about bounce -+ * buffering it. -+ */ -+ if (dma_capable(dev, dev_addr, size) && -+ !range_straddles_page_boundary(phys, size) && !swiotlb_force) -+ return dev_addr; -+ -+ /* -+ * Oh well, have to allocate and map a bounce buffer. -+ */ -+ start_dma_addr = xen_virt_to_bus(io_tlb_start); -+ map = do_map_single(dev, phys, start_dma_addr, size, dir); -+ if (!map) { -+ swiotlb_full(dev, size, dir, 1); -+ map = io_tlb_overflow_buffer; -+ } -+ -+ dev_addr = xen_virt_to_bus(map); -+ -+ /* -+ * Ensure that the address returned is DMA'ble -+ */ -+ if (!dma_capable(dev, dev_addr, size)) -+ panic("DMA: xen_swiotlb_map_single: bounce buffer is not " \ -+ "DMA'ble\n"); -+ return dev_addr; -+} -+EXPORT_SYMBOL_GPL(xen_swiotlb_map_page); -+ -+/* -+ * Unmap a single streaming mode DMA translation. The dma_addr and size must -+ * match what was provided for in a previous xen_swiotlb_map_page call. All -+ * other usages are undefined. -+ * -+ * After this call, reads by the cpu to the buffer are guaranteed to see -+ * whatever the device wrote there. -+ */ -+static void unmap_single(struct device *hwdev, dma_addr_t dev_addr, -+ size_t size, int dir) -+{ -+ phys_addr_t paddr = xen_bus_to_phys(dev_addr); -+ -+ BUG_ON(dir == DMA_NONE); -+ -+ /* NOTE: We use dev_addr here, not paddr! */ -+ if (is_xen_swiotlb_buffer(dev_addr)) { -+ do_unmap_single(hwdev, phys_to_virt(paddr), size, dir); -+ return; -+ } -+ -+ if (dir != DMA_FROM_DEVICE) -+ return; -+ -+ /* -+ * phys_to_virt doesn't work with hihgmem page but we could -+ * call dma_mark_clean() with hihgmem page here. However, we -+ * are fine since dma_mark_clean() is null on POWERPC. We can -+ * make dma_mark_clean() take a physical address if necessary. -+ */ -+ dma_mark_clean(phys_to_virt(paddr), size); -+} -+ -+void xen_swiotlb_unmap_page(struct device *hwdev, dma_addr_t dev_addr, -+ size_t size, enum dma_data_direction dir, -+ struct dma_attrs *attrs) -+{ -+ unmap_single(hwdev, dev_addr, size, dir); -+} -+EXPORT_SYMBOL_GPL(xen_swiotlb_unmap_page); -+ -+/* -+ * Make physical memory consistent for a single streaming mode DMA translation -+ * after a transfer. -+ * -+ * If you perform a xen_swiotlb_map_page() but wish to interrogate the buffer -+ * using the cpu, yet do not wish to teardown the dma mapping, you must -+ * call this function before doing so. At the next point you give the dma -+ * address back to the card, you must first perform a -+ * xen_swiotlb_dma_sync_for_device, and then the device again owns the buffer -+ */ -+static void -+xen_swiotlb_sync_single(struct device *hwdev, dma_addr_t dev_addr, -+ size_t size, int dir, int target) -+{ -+ phys_addr_t paddr = xen_bus_to_phys(dev_addr); -+ -+ BUG_ON(dir == DMA_NONE); -+ -+ if (is_xen_swiotlb_buffer(dev_addr)) { -+ do_sync_single(hwdev, phys_to_virt(paddr), size, dir, target); -+ return; -+ } -+ -+ if (dir != DMA_FROM_DEVICE) -+ return; -+ -+ dma_mark_clean(phys_to_virt(paddr), size); -+} -+ -+void -+xen_swiotlb_sync_single_for_cpu(struct device *hwdev, dma_addr_t dev_addr, -+ size_t size, enum dma_data_direction dir) -+{ -+ xen_swiotlb_sync_single(hwdev, dev_addr, size, dir, SYNC_FOR_CPU); -+} -+EXPORT_SYMBOL(xen_swiotlb_sync_single_for_cpu); -+ -+void -+xen_swiotlb_sync_single_for_device(struct device *hwdev, dma_addr_t dev_addr, -+ size_t size, enum dma_data_direction dir) -+{ -+ xen_swiotlb_sync_single(hwdev, dev_addr, size, dir, SYNC_FOR_DEVICE); -+} -+EXPORT_SYMBOL(xen_swiotlb_sync_single_for_device); -+ -+/* -+ * Same as above, but for a sub-range of the mapping. -+ */ -+static void -+xen_swiotlb_sync_single_range(struct device *hwdev, dma_addr_t dev_addr, -+ unsigned long offset, size_t size, -+ int dir, int target) -+{ -+ xen_swiotlb_sync_single(hwdev, dev_addr + offset, size, dir, target); -+} -+ -+void -+xen_swiotlb_sync_single_range_for_cpu(struct device *hwdev, dma_addr_t dev_addr, -+ unsigned long offset, size_t size, -+ enum dma_data_direction dir) -+{ -+ xen_swiotlb_sync_single_range(hwdev, dev_addr, offset, size, dir, -+ SYNC_FOR_CPU); -+} -+EXPORT_SYMBOL_GPL(xen_swiotlb_sync_single_range_for_cpu); -+ -+void -+xen_swiotlb_sync_single_range_for_device(struct device *hwdev, -+ dma_addr_t dev_addr, -+ unsigned long offset, size_t size, -+ enum dma_data_direction dir) -+{ -+ xen_swiotlb_sync_single_range(hwdev, dev_addr, offset, size, dir, -+ SYNC_FOR_DEVICE); -+} -+EXPORT_SYMBOL_GPL(xen_swiotlb_sync_single_range_for_device); -+ -+/* -+ * Map a set of buffers described by scatterlist in streaming mode for DMA. -+ * This is the scatter-gather version of the above xen_swiotlb_map_page -+ * interface. Here the scatter gather list elements are each tagged with the -+ * appropriate dma address and length. They are obtained via -+ * sg_dma_{address,length}(SG). -+ * -+ * NOTE: An implementation may be able to use a smaller number of -+ * DMA address/length pairs than there are SG table elements. -+ * (for example via virtual mapping capabilities) -+ * The routine returns the number of addr/length pairs actually -+ * used, at most nents. -+ * -+ * Device ownership issues as mentioned above for xen_swiotlb_map_page are the -+ * same here. -+ */ -+int -+xen_swiotlb_map_sg_attrs(struct device *hwdev, struct scatterlist *sgl, -+ int nelems, enum dma_data_direction dir, -+ struct dma_attrs *attrs) -+{ -+ unsigned long start_dma_addr; -+ struct scatterlist *sg; -+ int i; -+ BUG_ON(dir == DMA_NONE); -+ -+ start_dma_addr = xen_virt_to_bus(io_tlb_start); -+ for_each_sg(sgl, sg, nelems, i) { -+ phys_addr_t paddr = sg_phys(sg); -+ dma_addr_t dev_addr = xen_phys_to_bus(paddr); -+ -+ if (swiotlb_force || -+ !dma_capable(hwdev, dev_addr, sg->length) || -+ range_straddles_page_boundary(paddr, sg->length)) { -+ void *map = do_map_single(hwdev, sg_phys(sg), -+ start_dma_addr, -+ sg->length, dir); -+ if (!map) { -+ /* Don't panic here, we expect map_sg users -+ to do proper error handling. */ -+ swiotlb_full(hwdev, sg->length, dir, 0); -+ xen_swiotlb_unmap_sg_attrs(hwdev, sgl, i, dir, -+ attrs); -+ sgl[0].dma_length = 0; -+ return 0; -+ } -+ sg->dma_address = xen_virt_to_bus(map); -+ } else -+ sg->dma_address = dev_addr; -+ sg->dma_length = sg->length; -+ } -+ return nelems; -+} -+EXPORT_SYMBOL(xen_swiotlb_map_sg_attrs); -+ -+int -+xen_swiotlb_map_sg(struct device *hwdev, struct scatterlist *sgl, int nelems, -+ int dir) -+{ -+ return xen_swiotlb_map_sg_attrs(hwdev, sgl, nelems, dir, NULL); -+} -+EXPORT_SYMBOL(xen_swiotlb_map_sg); -+ -+/* -+ * Unmap a set of streaming mode DMA translations. Again, cpu read rules -+ * concerning calls here are the same as for xen_swiotlb_unmap_page() above. -+ */ -+void -+xen_swiotlb_unmap_sg_attrs(struct device *hwdev, struct scatterlist *sgl, -+ int nelems, enum dma_data_direction dir, -+ struct dma_attrs *attrs) -+{ -+ struct scatterlist *sg; -+ int i; -+ -+ BUG_ON(dir == DMA_NONE); -+ -+ for_each_sg(sgl, sg, nelems, i) -+ unmap_single(hwdev, sg->dma_address, sg->dma_length, dir); -+ -+} -+EXPORT_SYMBOL(xen_swiotlb_unmap_sg_attrs); -+ -+void -+xen_swiotlb_unmap_sg(struct device *hwdev, struct scatterlist *sgl, int nelems, -+ int dir) -+{ -+ return xen_swiotlb_unmap_sg_attrs(hwdev, sgl, nelems, dir, NULL); -+} -+EXPORT_SYMBOL(xen_swiotlb_unmap_sg); -+ -+/* -+ * Make physical memory consistent for a set of streaming mode DMA translations -+ * after a transfer. -+ * -+ * The same as xen_swiotlb_sync_single_* but for a scatter-gather list, -+ * same rules and usage. -+ */ -+static void -+xen_swiotlb_sync_sg(struct device *hwdev, struct scatterlist *sgl, -+ int nelems, int dir, int target) -+{ -+ struct scatterlist *sg; -+ int i; -+ -+ for_each_sg(sgl, sg, nelems, i) -+ xen_swiotlb_sync_single(hwdev, sg->dma_address, -+ sg->dma_length, dir, target); -+} -+ -+void -+xen_swiotlb_sync_sg_for_cpu(struct device *hwdev, struct scatterlist *sg, -+ int nelems, enum dma_data_direction dir) -+{ -+ xen_swiotlb_sync_sg(hwdev, sg, nelems, dir, SYNC_FOR_CPU); -+} -+EXPORT_SYMBOL(xen_swiotlb_sync_sg_for_cpu); -+ -+void -+xen_swiotlb_sync_sg_for_device(struct device *hwdev, struct scatterlist *sg, -+ int nelems, enum dma_data_direction dir) -+{ -+ xen_swiotlb_sync_sg(hwdev, sg, nelems, dir, SYNC_FOR_DEVICE); -+} -+EXPORT_SYMBOL(xen_swiotlb_sync_sg_for_device); -+ -+int -+xen_swiotlb_dma_mapping_error(struct device *hwdev, dma_addr_t dma_addr) -+{ -+ return (dma_addr == xen_virt_to_bus(io_tlb_overflow_buffer)); -+} -+EXPORT_SYMBOL(xen_swiotlb_dma_mapping_error); -+ -+/* -+ * Return whether the given device DMA address mask can be supported -+ * properly. For example, if your device can only drive the low 24-bits -+ * during bus mastering, then you would pass 0x00ffffff as the mask to -+ * this function. -+ */ -+int -+xen_swiotlb_dma_supported(struct device *hwdev, u64 mask) -+{ -+ return xen_virt_to_bus(io_tlb_end - 1) <= mask; -+} -+EXPORT_SYMBOL(xen_swiotlb_dma_supported); -diff --git a/lib/swiotlb.c b/lib/swiotlb.c -index ac25cd2..f6bbcd1 100644 ---- a/lib/swiotlb.c -+++ b/lib/swiotlb.c -@@ -1,118 +1,11 @@ --/* -- * Dynamic DMA mapping support. -- * -- * This implementation is a fallback for platforms that do not support -- * I/O TLBs (aka DMA address translation hardware). -- * Copyright (C) 2000 Asit Mallick <Asit.K.Mallick@intel.com> -- * Copyright (C) 2000 Goutham Rao <goutham.rao@intel.com> -- * Copyright (C) 2000, 2003 Hewlett-Packard Co -- * David Mosberger-Tang <davidm@hpl.hp.com> -- * -- * 03/05/07 davidm Switch from PCI-DMA to generic device DMA API. -- * 00/12/13 davidm Rename to swiotlb.c and add mark_clean() to avoid -- * unnecessary i-cache flushing. -- * 04/07/.. ak Better overflow handling. Assorted fixes. -- * 05/09/10 linville Add support for syncing ranges, support syncing for -- * DMA_BIDIRECTIONAL mappings, miscellaneous cleanup. -- * 08/12/11 beckyb Add highmem support -- */ - --#include <linux/cache.h> - #include <linux/dma-mapping.h> --#include <linux/mm.h> - #include <linux/module.h> --#include <linux/spinlock.h> --#include <linux/string.h> - #include <linux/swiotlb.h> --#include <linux/pfn.h> --#include <linux/types.h> --#include <linux/ctype.h> --#include <linux/highmem.h> - --#include <asm/io.h> --#include <asm/dma.h> - #include <asm/scatterlist.h> -- --#include <linux/init.h> --#include <linux/bootmem.h> - #include <linux/iommu-helper.h> - --#define OFFSET(val,align) ((unsigned long) \ -- ( (val) & ( (align) - 1))) -- --#define SLABS_PER_PAGE (1 << (PAGE_SHIFT - IO_TLB_SHIFT)) -- --/* -- * Minimum IO TLB size to bother booting with. Systems with mainly -- * 64bit capable cards will only lightly use the swiotlb. If we can't -- * allocate a contiguous 1MB, we're probably in trouble anyway. -- */ --#define IO_TLB_MIN_SLABS ((1<<20) >> IO_TLB_SHIFT) -- --/* -- * Enumeration for sync targets -- */ --enum dma_sync_target { -- SYNC_FOR_CPU = 0, -- SYNC_FOR_DEVICE = 1, --}; -- --int swiotlb_force; -- --/* -- * Used to do a quick range check in unmap_single and -- * sync_single_*, to see if the memory was in fact allocated by this -- * API. -- */ --static char *io_tlb_start, *io_tlb_end; -- --/* -- * The number of IO TLB blocks (in groups of 64) betweeen io_tlb_start and -- * io_tlb_end. This is command line adjustable via setup_io_tlb_npages. -- */ --static unsigned long io_tlb_nslabs; -- --/* -- * When the IOMMU overflows we return a fallback buffer. This sets the size. -- */ --static unsigned long io_tlb_overflow = 32*1024; -- --void *io_tlb_overflow_buffer; -- --/* -- * This is a free list describing the number of free entries available from -- * each index -- */ --static unsigned int *io_tlb_list; --static unsigned int io_tlb_index; -- --/* -- * We need to save away the original address corresponding to a mapped entry -- * for the sync operations. -- */ --static phys_addr_t *io_tlb_orig_addr; -- --/* -- * Protect the above data structures in the map and unmap calls -- */ --static DEFINE_SPINLOCK(io_tlb_lock); -- --static int __init --setup_io_tlb_npages(char *str) --{ -- if (isdigit(*str)) { -- io_tlb_nslabs = simple_strtoul(str, &str, 0); -- /* avoid tail segment of size < IO_TLB_SEGSIZE */ -- io_tlb_nslabs = ALIGN(io_tlb_nslabs, IO_TLB_SEGSIZE); -- } -- if (*str == ',') -- ++str; -- if (!strcmp(str, "force")) -- swiotlb_force = 1; -- return 1; --} --__setup("swiotlb=", setup_io_tlb_npages); --/* make io_tlb_overflow tunable too? */ - - /* Note that this doesn't work with highmem page */ - static dma_addr_t swiotlb_virt_to_bus(struct device *hwdev, -@@ -120,390 +13,6 @@ static dma_addr_t swiotlb_virt_to_bus(struct device *hwdev, - { - return phys_to_dma(hwdev, virt_to_phys(address)); - } -- --static void swiotlb_print_info(unsigned long bytes) --{ -- phys_addr_t pstart, pend; -- -- pstart = virt_to_phys(io_tlb_start); -- pend = virt_to_phys(io_tlb_end); -- -- printk(KERN_INFO "Placing %luMB software IO TLB between %p - %p\n", -- bytes >> 20, io_tlb_start, io_tlb_end); -- printk(KERN_INFO "software IO TLB at phys %#llx - %#llx\n", -- (unsigned long long)pstart, -- (unsigned long long)pend); --} -- --/* -- * Statically reserve bounce buffer space and initialize bounce buffer data -- * structures for the software IO TLB used to implement the DMA API. -- */ --void __init --swiotlb_init_with_default_size(size_t default_size) --{ -- unsigned long i, bytes; -- -- if (!io_tlb_nslabs) { -- io_tlb_nslabs = (default_size >> IO_TLB_SHIFT); -- io_tlb_nslabs = ALIGN(io_tlb_nslabs, IO_TLB_SEGSIZE); -- } -- -- bytes = io_tlb_nslabs << IO_TLB_SHIFT; -- -- /* -- * Get IO TLB memory from the low pages -- */ -- io_tlb_start = alloc_bootmem_low_pages(bytes); -- if (!io_tlb_start) -- panic("Cannot allocate SWIOTLB buffer"); -- io_tlb_end = io_tlb_start + bytes; -- -- /* -- * Allocate and initialize the free list array. This array is used -- * to find contiguous free memory regions of size up to IO_TLB_SEGSIZE -- * between io_tlb_start and io_tlb_end. -- */ -- io_tlb_list = alloc_bootmem(io_tlb_nslabs * sizeof(int)); -- for (i = 0; i < io_tlb_nslabs; i++) -- io_tlb_list[i] = IO_TLB_SEGSIZE - OFFSET(i, IO_TLB_SEGSIZE); -- io_tlb_index = 0; -- io_tlb_orig_addr = alloc_bootmem(io_tlb_nslabs * sizeof(phys_addr_t)); -- -- /* -- * Get the overflow emergency buffer -- */ -- io_tlb_overflow_buffer = alloc_bootmem_low(io_tlb_overflow); -- if (!io_tlb_overflow_buffer) -- panic("Cannot allocate SWIOTLB overflow buffer!\n"); -- -- swiotlb_print_info(bytes); --} -- --void __init --swiotlb_init(void) --{ -- swiotlb_init_with_default_size(64 * (1<<20)); /* default to 64MB */ --} -- --/* -- * Systems with larger DMA zones (those that don't support ISA) can -- * initialize the swiotlb later using the slab allocator if needed. -- * This should be just like above, but with some error catching. -- */ --int --swiotlb_late_init_with_default_size(size_t default_size) --{ -- unsigned long i, bytes, req_nslabs = io_tlb_nslabs; -- unsigned int order; -- -- if (!io_tlb_nslabs) { -- io_tlb_nslabs = (default_size >> IO_TLB_SHIFT); -- io_tlb_nslabs = ALIGN(io_tlb_nslabs, IO_TLB_SEGSIZE); -- } -- -- /* -- * Get IO TLB memory from the low pages -- */ -- order = get_order(io_tlb_nslabs << IO_TLB_SHIFT); -- io_tlb_nslabs = SLABS_PER_PAGE << order; -- bytes = io_tlb_nslabs << IO_TLB_SHIFT; -- -- while ((SLABS_PER_PAGE << order) > IO_TLB_MIN_SLABS) { -- io_tlb_start = (void *)__get_free_pages(GFP_DMA | __GFP_NOWARN, -- order); -- if (io_tlb_start) -- break; -- order--; -- } -- -- if (!io_tlb_start) -- goto cleanup1; -- -- if (order != get_order(bytes)) { -- printk(KERN_WARNING "Warning: only able to allocate %ld MB " -- "for software IO TLB\n", (PAGE_SIZE << order) >> 20); -- io_tlb_nslabs = SLABS_PER_PAGE << order; -- bytes = io_tlb_nslabs << IO_TLB_SHIFT; -- } -- io_tlb_end = io_tlb_start + bytes; -- memset(io_tlb_start, 0, bytes); -- -- /* -- * Allocate and initialize the free list array. This array is used -- * to find contiguous free memory regions of size up to IO_TLB_SEGSIZE -- * between io_tlb_start and io_tlb_end. -- */ -- io_tlb_list = (unsigned int *)__get_free_pages(GFP_KERNEL, -- get_order(io_tlb_nslabs * sizeof(int))); -- if (!io_tlb_list) -- goto cleanup2; -- -- for (i = 0; i < io_tlb_nslabs; i++) -- io_tlb_list[i] = IO_TLB_SEGSIZE - OFFSET(i, IO_TLB_SEGSIZE); -- io_tlb_index = 0; -- -- io_tlb_orig_addr = (phys_addr_t *) -- __get_free_pages(GFP_KERNEL, -- get_order(io_tlb_nslabs * -- sizeof(phys_addr_t))); -- if (!io_tlb_orig_addr) -- goto cleanup3; -- -- memset(io_tlb_orig_addr, 0, io_tlb_nslabs * sizeof(phys_addr_t)); -- -- /* -- * Get the overflow emergency buffer -- */ -- io_tlb_overflow_buffer = (void *)__get_free_pages(GFP_DMA, -- get_order(io_tlb_overflow)); -- if (!io_tlb_overflow_buffer) -- goto cleanup4; -- -- swiotlb_print_info(bytes); -- -- return 0; -- --cleanup4: -- free_pages((unsigned long)io_tlb_orig_addr, -- get_order(io_tlb_nslabs * sizeof(phys_addr_t))); -- io_tlb_orig_addr = NULL; --cleanup3: -- free_pages((unsigned long)io_tlb_list, get_order(io_tlb_nslabs * -- sizeof(int))); -- io_tlb_list = NULL; --cleanup2: -- io_tlb_end = NULL; -- free_pages((unsigned long)io_tlb_start, order); -- io_tlb_start = NULL; --cleanup1: -- io_tlb_nslabs = req_nslabs; -- return -ENOMEM; --} -- --static int is_swiotlb_buffer(phys_addr_t paddr) --{ -- return paddr >= virt_to_phys(io_tlb_start) && -- paddr < virt_to_phys(io_tlb_end); --} -- --/* -- * Bounce: copy the swiotlb buffer back to the original dma location -- */ --static void swiotlb_bounce(phys_addr_t phys, char *dma_addr, size_t size, -- enum dma_data_direction dir) --{ -- unsigned long pfn = PFN_DOWN(phys); -- -- if (PageHighMem(pfn_to_page(pfn))) { -- /* The buffer does not have a mapping. Map it in and copy */ -- unsigned int offset = phys & ~PAGE_MASK; -- char *buffer; -- unsigned int sz = 0; -- unsigned long flags; -- -- while (size) { -- sz = min_t(size_t, PAGE_SIZE - offset, size); -- -- local_irq_save(flags); -- buffer = kmap_atomic(pfn_to_page(pfn), -- KM_BOUNCE_READ); -- if (dir == DMA_TO_DEVICE) -- memcpy(dma_addr, buffer + offset, sz); -- else -- memcpy(buffer + offset, dma_addr, sz); -- kunmap_atomic(buffer, KM_BOUNCE_READ); -- local_irq_restore(flags); -- -- size -= sz; -- pfn++; -- dma_addr += sz; -- offset = 0; -- } -- } else { -- if (dir == DMA_TO_DEVICE) -- memcpy(dma_addr, phys_to_virt(phys), size); -- else -- memcpy(phys_to_virt(phys), dma_addr, size); -- } --} -- --/* -- * Allocates bounce buffer and returns its kernel virtual address. -- */ --static void * --map_single(struct device *hwdev, phys_addr_t phys, size_t size, int dir) --{ -- unsigned long flags; -- char *dma_addr; -- unsigned int nslots, stride, index, wrap; -- int i; -- unsigned long start_dma_addr; -- unsigned long mask; -- unsigned long offset_slots; -- unsigned long max_slots; -- -- mask = dma_get_seg_boundary(hwdev); -- start_dma_addr = swiotlb_virt_to_bus(hwdev, io_tlb_start) & mask; -- -- offset_slots = ALIGN(start_dma_addr, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT; -- -- /* -- * Carefully handle integer overflow which can occur when mask == ~0UL. -- */ -- max_slots = mask + 1 -- ? ALIGN(mask + 1, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT -- : 1UL << (BITS_PER_LONG - IO_TLB_SHIFT); -- -- /* -- * For mappings greater than a page, we limit the stride (and -- * hence alignment) to a page size. -- */ -- nslots = ALIGN(size, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT; -- if (size > PAGE_SIZE) -- stride = (1 << (PAGE_SHIFT - IO_TLB_SHIFT)); -- else -- stride = 1; -- -- BUG_ON(!nslots); -- -- /* -- * Find suitable number of IO TLB entries size that will fit this -- * request and allocate a buffer from that IO TLB pool. -- */ -- spin_lock_irqsave(&io_tlb_lock, flags); -- index = ALIGN(io_tlb_index, stride); -- if (index >= io_tlb_nslabs) -- index = 0; -- wrap = index; -- -- do { -- while (iommu_is_span_boundary(index, nslots, offset_slots, -- max_slots)) { -- index += stride; -- if (index >= io_tlb_nslabs) -- index = 0; -- if (index == wrap) -- goto not_found; -- } -- -- /* -- * If we find a slot that indicates we have 'nslots' number of -- * contiguous buffers, we allocate the buffers from that slot -- * and mark the entries as '0' indicating unavailable. -- */ -- if (io_tlb_list[index] >= nslots) { -- int count = 0; -- -- for (i = index; i < (int) (index + nslots); i++) -- io_tlb_list[i] = 0; -- for (i = index - 1; (OFFSET(i, IO_TLB_SEGSIZE) != IO_TLB_SEGSIZE - 1) && io_tlb_list[i]; i--) -- io_tlb_list[i] = ++count; -- dma_addr = io_tlb_start + (index << IO_TLB_SHIFT); -- -- /* -- * Update the indices to avoid searching in the next -- * round. -- */ -- io_tlb_index = ((index + nslots) < io_tlb_nslabs -- ? (index + nslots) : 0); -- -- goto found; -- } -- index += stride; -- if (index >= io_tlb_nslabs) -- index = 0; -- } while (index != wrap); -- --not_found: -- spin_unlock_irqrestore(&io_tlb_lock, flags); -- return NULL; --found: -- spin_unlock_irqrestore(&io_tlb_lock, flags); -- -- /* -- * Save away the mapping from the original address to the DMA address. -- * This is needed when we sync the memory. Then we sync the buffer if -- * needed. -- */ -- for (i = 0; i < nslots; i++) -- io_tlb_orig_addr[index+i] = phys + (i << IO_TLB_SHIFT); -- if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL) -- swiotlb_bounce(phys, dma_addr, size, DMA_TO_DEVICE); -- -- return dma_addr; --} -- --/* -- * dma_addr is the kernel virtual address of the bounce buffer to unmap. -- */ --static void --do_unmap_single(struct device *hwdev, char *dma_addr, size_t size, int dir) --{ -- unsigned long flags; -- int i, count, nslots = ALIGN(size, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT; -- int index = (dma_addr - io_tlb_start) >> IO_TLB_SHIFT; -- phys_addr_t phys = io_tlb_orig_addr[index]; -- -- /* -- * First, sync the memory before unmapping the entry -- */ -- if (phys && ((dir == DMA_FROM_DEVICE) || (dir == DMA_BIDIRECTIONAL))) -- swiotlb_bounce(phys, dma_addr, size, DMA_FROM_DEVICE); -- -- /* -- * Return the buffer to the free list by setting the corresponding -- * entries to indicate the number of contigous entries available. -- * While returning the entries to the free list, we merge the entries -- * with slots below and above the pool being returned. -- */ -- spin_lock_irqsave(&io_tlb_lock, flags); -- { -- count = ((index + nslots) < ALIGN(index + 1, IO_TLB_SEGSIZE) ? -- io_tlb_list[index + nslots] : 0); -- /* -- * Step 1: return the slots to the free list, merging the -- * slots with superceeding slots -- */ -- for (i = index + nslots - 1; i >= index; i--) -- io_tlb_list[i] = ++count; -- /* -- * Step 2: merge the returned slots with the preceding slots, -- * if available (non zero) -- */ -- for (i = index - 1; (OFFSET(i, IO_TLB_SEGSIZE) != IO_TLB_SEGSIZE -1) && io_tlb_list[i]; i--) -- io_tlb_list[i] = ++count; -- } -- spin_unlock_irqrestore(&io_tlb_lock, flags); --} -- --static void --sync_single(struct device *hwdev, char *dma_addr, size_t size, -- int dir, int target) --{ -- int index = (dma_addr - io_tlb_start) >> IO_TLB_SHIFT; -- phys_addr_t phys = io_tlb_orig_addr[index]; -- -- phys += ((unsigned long)dma_addr & ((1 << IO_TLB_SHIFT) - 1)); -- -- switch (target) { -- case SYNC_FOR_CPU: -- if (likely(dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)) -- swiotlb_bounce(phys, dma_addr, size, DMA_FROM_DEVICE); -- else -- BUG_ON(dir != DMA_TO_DEVICE); -- break; -- case SYNC_FOR_DEVICE: -- if (likely(dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL)) -- swiotlb_bounce(phys, dma_addr, size, DMA_TO_DEVICE); -- else -- BUG_ON(dir != DMA_FROM_DEVICE); -- break; -- default: -- BUG(); -- } --} -- - void * - swiotlb_alloc_coherent(struct device *hwdev, size_t size, - dma_addr_t *dma_handle, gfp_t flags) -@@ -512,12 +21,13 @@ swiotlb_alloc_coherent(struct device *hwdev, size_t size, - void *ret; - int order = get_order(size); - u64 dma_mask = DMA_BIT_MASK(32); -+ unsigned long start_dma_addr; - - if (hwdev && hwdev->coherent_dma_mask) - dma_mask = hwdev->coherent_dma_mask; - - ret = (void *)__get_free_pages(flags, order); -- if (ret && swiotlb_virt_to_bus(hwdev, ret) + size > dma_mask) { -+ if (ret && swiotlb_virt_to_bus(hwdev, ret) + size - 1 > dma_mask) { - /* - * The allocated memory isn't reachable by the device. - */ -@@ -527,10 +37,12 @@ swiotlb_alloc_coherent(struct device *hwdev, size_t size, - if (!ret) { - /* - * We are either out of memory or the device can't DMA -- * to GFP_DMA memory; fall back on map_single(), which -+ * to GFP_DMA memory; fall back on do_map_single(), which - * will grab memory from the lowest available address range. - */ -- ret = map_single(hwdev, 0, size, DMA_FROM_DEVICE); -+ start_dma_addr = swiotlb_virt_to_bus(hwdev, io_tlb_start); -+ ret = do_map_single(hwdev, 0, start_dma_addr, size, -+ DMA_FROM_DEVICE); - if (!ret) - return NULL; - } -@@ -539,12 +51,13 @@ swiotlb_alloc_coherent(struct device *hwdev, size_t size, - dev_addr = swiotlb_virt_to_bus(hwdev, ret); - - /* Confirm address can be DMA'd by device */ -- if (dev_addr + size > dma_mask) { -- printk("hwdev DMA mask = 0x%016Lx, dev_addr = 0x%016Lx\n", -+ if (dev_addr + size - 1 > dma_mask) { -+ dev_err(hwdev, "DMA: hwdev DMA mask = 0x%016Lx, " \ -+ "dev_addr = 0x%016Lx\n", - (unsigned long long)dma_mask, - (unsigned long long)dev_addr); - -- /* DMA_TO_DEVICE to avoid memcpy in unmap_single */ -+ /* DMA_TO_DEVICE to avoid memcpy in do_unmap_single */ - do_unmap_single(hwdev, ret, size, DMA_TO_DEVICE); - return NULL; - } -@@ -563,35 +76,11 @@ swiotlb_free_coherent(struct device *hwdev, size_t size, void *vaddr, - if (!is_swiotlb_buffer(paddr)) - free_pages((unsigned long)vaddr, get_order(size)); - else -- /* DMA_TO_DEVICE to avoid memcpy in unmap_single */ -+ /* DMA_TO_DEVICE to avoid memcpy in do_unmap_single */ - do_unmap_single(hwdev, vaddr, size, DMA_TO_DEVICE); - } - EXPORT_SYMBOL(swiotlb_free_coherent); - --static void --swiotlb_full(struct device *dev, size_t size, int dir, int do_panic) --{ -- /* -- * Ran out of IOMMU space for this operation. This is very bad. -- * Unfortunately the drivers cannot handle this operation properly. -- * unless they check for dma_mapping_error (most don't) -- * When the mapping is small enough return a static buffer to limit -- * the damage, or panic when the transfer is too big. -- */ -- printk(KERN_ERR "DMA: Out of SW-IOMMU space for %zu bytes at " -- "device %s\n", size, dev ? dev_name(dev) : "?"); -- -- if (size <= io_tlb_overflow || !do_panic) -- return; -- -- if (dir == DMA_BIDIRECTIONAL) -- panic("DMA: Random memory could be DMA accessed\n"); -- if (dir == DMA_FROM_DEVICE) -- panic("DMA: Random memory could be DMA written\n"); -- if (dir == DMA_TO_DEVICE) -- panic("DMA: Random memory could be DMA read\n"); --} -- - /* - * Map a single buffer of the indicated size for DMA in streaming mode. The - * physical address to use is returned. -@@ -604,6 +93,7 @@ dma_addr_t swiotlb_map_page(struct device *dev, struct page *page, - enum dma_data_direction dir, - struct dma_attrs *attrs) - { -+ unsigned long start_dma_addr; - phys_addr_t phys = page_to_phys(page) + offset; - dma_addr_t dev_addr = phys_to_dma(dev, phys); - void *map; -@@ -620,7 +110,8 @@ dma_addr_t swiotlb_map_page(struct device *dev, struct page *page, - /* - * Oh well, have to allocate and map a bounce buffer. - */ -- map = map_single(dev, phys, size, dir); -+ start_dma_addr = swiotlb_virt_to_bus(dev, io_tlb_start); -+ map = do_map_single(dev, phys, start_dma_addr, size, dir); - if (!map) { - swiotlb_full(dev, size, dir, 1); - map = io_tlb_overflow_buffer; -@@ -632,7 +123,7 @@ dma_addr_t swiotlb_map_page(struct device *dev, struct page *page, - * Ensure that the address returned is DMA'ble - */ - if (!dma_capable(dev, dev_addr, size)) -- panic("map_single: bounce buffer is not DMA'ble"); -+ panic("DMA: swiotlb_map_single: bounce buffer is not DMA'ble"); - - return dev_addr; - } -@@ -697,7 +188,7 @@ swiotlb_sync_single(struct device *hwdev, dma_addr_t dev_addr, - BUG_ON(dir == DMA_NONE); - - if (is_swiotlb_buffer(paddr)) { -- sync_single(hwdev, phys_to_virt(paddr), size, dir, target); -+ do_sync_single(hwdev, phys_to_virt(paddr), size, dir, target); - return; - } - -@@ -774,19 +265,22 @@ int - swiotlb_map_sg_attrs(struct device *hwdev, struct scatterlist *sgl, int nelems, - enum dma_data_direction dir, struct dma_attrs *attrs) - { -+ unsigned long start_dma_addr; - struct scatterlist *sg; - int i; - - BUG_ON(dir == DMA_NONE); - -+ start_dma_addr = swiotlb_virt_to_bus(hwdev, io_tlb_start); - for_each_sg(sgl, sg, nelems, i) { - phys_addr_t paddr = sg_phys(sg); - dma_addr_t dev_addr = phys_to_dma(hwdev, paddr); - - if (swiotlb_force || - !dma_capable(hwdev, dev_addr, sg->length)) { -- void *map = map_single(hwdev, sg_phys(sg), -- sg->length, dir); -+ void *map = do_map_single(hwdev, sg_phys(sg), -+ start_dma_addr, -+ sg->length, dir); - if (!map) { - /* Don't panic here, we expect map_sg users - to do proper error handling. */ -@@ -819,7 +313,8 @@ EXPORT_SYMBOL(swiotlb_map_sg); - */ - void - swiotlb_unmap_sg_attrs(struct device *hwdev, struct scatterlist *sgl, -- int nelems, enum dma_data_direction dir, struct dma_attrs *attrs) -+ int nelems, enum dma_data_direction dir, -+ struct dma_attrs *attrs) - { - struct scatterlist *sg; - int i; -diff --git a/mm/bootmem.c b/mm/bootmem.c -index 555d5d2..d1dc23c 100644 ---- a/mm/bootmem.c -+++ b/mm/bootmem.c -@@ -143,6 +143,30 @@ unsigned long __init init_bootmem(unsigned long start, unsigned long pages) - return init_bootmem_core(NODE_DATA(0)->bdata, start, 0, pages); - } - -+/* -+ * free_bootmem_late - free bootmem pages directly to page allocator -+ * @addr: starting address of the range -+ * @size: size of the range in bytes -+ * -+ * This is only useful when the bootmem allocator has already been torn -+ * down, but we are still initializing the system. Pages are given directly -+ * to the page allocator, no bootmem metadata is updated because it is gone. -+ */ -+void __init free_bootmem_late(unsigned long addr, unsigned long size) -+{ -+ unsigned long cursor, end; -+ -+ kmemleak_free_part(__va(addr), size); -+ -+ cursor = PFN_UP(addr); -+ end = PFN_DOWN(addr + size); -+ -+ for (; cursor < end; cursor++) { -+ __free_pages_bootmem(pfn_to_page(cursor), 0); -+ totalram_pages++; -+ } -+} -+ - static unsigned long __init free_all_bootmem_core(bootmem_data_t *bdata) - { - int aligned; -diff --git a/mm/memory.c b/mm/memory.c -index 53c1da0..c8741df 100644 ---- a/mm/memory.c -+++ b/mm/memory.c -@@ -553,6 +553,13 @@ struct page *vm_normal_page(struct vm_area_struct *vma, unsigned long addr, - if (is_zero_pfn(pfn)) - return NULL; - check_pfn: -+ -+#if defined(CONFIG_XEN) && defined(CONFIG_X86) -+ /* XEN: Covers user-space grant mappings (even of local pages). */ -+ if (unlikely(vma->vm_flags & VM_FOREIGN)) -+ return NULL; -+#endif -+ - if (unlikely(pfn > highest_memmap_pfn)) { - print_bad_pte(vma, addr, pte, NULL); - return NULL; -@@ -839,8 +846,12 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb, - page->index > details->last_index)) - continue; - } -- ptent = ptep_get_and_clear_full(mm, addr, pte, -- tlb->fullmm); -+ if (unlikely(vma->vm_ops && vma->vm_ops->zap_pte)) -+ ptent = vma->vm_ops->zap_pte(vma, addr, pte, -+ tlb->fullmm); -+ else -+ ptent = ptep_get_and_clear_full(mm, addr, pte, -+ tlb->fullmm); - tlb_remove_tlb_entry(tlb, pte, addr); - if (unlikely(!page)) - continue; -@@ -1100,6 +1111,7 @@ unsigned long zap_page_range(struct vm_area_struct *vma, unsigned long address, - tlb_finish_mmu(tlb, address, end); - return end; - } -+EXPORT_SYMBOL_GPL(zap_page_range); - - /** - * zap_vma_ptes - remove ptes mapping the vma -@@ -1306,6 +1318,29 @@ int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm, - continue; - } - -+#ifdef CONFIG_XEN -+ if (vma && (vma->vm_flags & VM_FOREIGN)) { -+ struct vm_foreign_map *foreign_map = -+ vma->vm_private_data; -+ struct page **map = foreign_map->map; -+ int offset = (start - vma->vm_start) >> PAGE_SHIFT; -+ if (map[offset] != NULL) { -+ if (pages) { -+ struct page *page = map[offset]; -+ -+ pages[i] = page; -+ get_page(page); -+ } -+ if (vmas) -+ vmas[i] = vma; -+ i++; -+ start += PAGE_SIZE; -+ nr_pages--; -+ continue; -+ } -+ } -+#endif -+ - if (!vma || - (vma->vm_flags & (VM_IO | VM_PFNMAP)) || - !(vm_flags & vma->vm_flags)) -@@ -1781,6 +1816,10 @@ int remap_pfn_range(struct vm_area_struct *vma, unsigned long addr, - - vma->vm_flags |= VM_IO | VM_RESERVED | VM_PFNMAP; - -+#ifdef CONFIG_XEN -+ vma->vm_mm->context.has_foreign_mappings = 1; -+#endif -+ - err = track_pfn_vma_new(vma, &prot, pfn, PAGE_ALIGN(size)); - if (err) { - /* -@@ -1896,11 +1935,10 @@ int apply_to_page_range(struct mm_struct *mm, unsigned long addr, - { - pgd_t *pgd; - unsigned long next; -- unsigned long start = addr, end = addr + size; -+ unsigned long end = addr + size; - int err; - - BUG_ON(addr >= end); -- mmu_notifier_invalidate_range_start(mm, start, end); - pgd = pgd_offset(mm, addr); - do { - next = pgd_addr_end(addr, end); -@@ -1908,7 +1946,7 @@ int apply_to_page_range(struct mm_struct *mm, unsigned long addr, - if (err) - break; - } while (pgd++, addr = next, addr != end); -- mmu_notifier_invalidate_range_end(mm, start, end); -+ - return err; - } - EXPORT_SYMBOL_GPL(apply_to_page_range); -diff --git a/mm/page_alloc.c b/mm/page_alloc.c -index 902e5fc..101715c 100644 ---- a/mm/page_alloc.c -+++ b/mm/page_alloc.c -@@ -594,6 +594,13 @@ static void __free_pages_ok(struct page *page, unsigned int order) - if (bad) - return; - -+#ifdef CONFIG_XEN -+ if (PageForeign(page)) { -+ PageForeignDestructor(page, order); -+ return; -+ } -+#endif -+ - if (!PageHighMem(page)) { - debug_check_no_locks_freed(page_address(page),PAGE_SIZE<<order); - debug_check_no_obj_freed(page_address(page), -@@ -1088,6 +1095,13 @@ static void free_hot_cold_page(struct page *page, int cold) - - kmemcheck_free_shadow(page, 0); - -+#ifdef CONFIG_XEN -+ if (PageForeign(page)) { -+ PageForeignDestructor(page, 0); -+ return; -+ } -+#endif -+ - if (PageAnon(page)) - page->mapping = NULL; - if (free_pages_check(page)) -diff --git a/mm/vmalloc.c b/mm/vmalloc.c -index 680dcbb..4f701c2 100644 ---- a/mm/vmalloc.c -+++ b/mm/vmalloc.c -@@ -31,6 +31,7 @@ - #include <asm/tlbflush.h> - #include <asm/shmparam.h> - -+bool vmap_lazy_unmap __read_mostly = true; - - /*** Page table manipulation functions ***/ - -@@ -502,6 +503,9 @@ static unsigned long lazy_max_pages(void) - { - unsigned int log; - -+ if (!vmap_lazy_unmap) -+ return 0; -+ - log = fls(num_online_cpus()); - - return log * (32UL * 1024 * 1024 / PAGE_SIZE); -@@ -570,8 +574,9 @@ static void __purge_vmap_area_lazy(unsigned long *start, unsigned long *end, - } - rcu_read_unlock(); - -- if (nr) -+ if (nr) { - atomic_sub(nr, &vmap_lazy_nr); -+ } - - if (nr || force_flush) - flush_tlb_kernel_range(*start, *end); -diff --git a/net/core/ethtool.c b/net/core/ethtool.c -index abbe8fa..e661dd7 100644 ---- a/net/core/ethtool.c -+++ b/net/core/ethtool.c -@@ -179,14 +179,24 @@ static int ethtool_get_drvinfo(struct net_device *dev, void __user *useraddr) - struct ethtool_drvinfo info; - const struct ethtool_ops *ops = dev->ethtool_ops; - -- if (!ops->get_drvinfo) -- return -EOPNOTSUPP; -- - memset(&info, 0, sizeof(info)); - info.cmd = ETHTOOL_GDRVINFO; -- ops->get_drvinfo(dev, &info); -+ if (ops && ops->get_drvinfo) { -+ ops->get_drvinfo(dev, &info); -+ } else if (dev->dev.parent && dev->dev.parent->driver) { -+ strlcpy(info.bus_info, dev_name(dev->dev.parent), -+ sizeof(info.bus_info)); -+ strlcpy(info.driver, dev->dev.parent->driver->name, -+ sizeof(info.driver)); -+ } else { -+ return -EOPNOTSUPP; -+ } - -- if (ops->get_sset_count) { -+ /* -+ * this method of obtaining string set info is deprecated; -+ * Use ETHTOOL_GSSET_INFO instead. -+ */ -+ if (ops && ops->get_sset_count) { - int rc; - - rc = ops->get_sset_count(dev, ETH_SS_TEST); -@@ -201,14 +211,14 @@ static int ethtool_get_drvinfo(struct net_device *dev, void __user *useraddr) - } else { - /* code path for obsolete hooks */ - -- if (ops->self_test_count) -+ if (ops && ops->self_test_count) - info.testinfo_len = ops->self_test_count(dev); -- if (ops->get_stats_count) -+ if (ops && ops->get_stats_count) - info.n_stats = ops->get_stats_count(dev); - } -- if (ops->get_regs_len) -+ if (ops && ops->get_regs_len) - info.regdump_len = ops->get_regs_len(dev); -- if (ops->get_eeprom_len) -+ if (ops && ops->get_eeprom_len) - info.eedump_len = ops->get_eeprom_len(dev); - - if (copy_to_user(useraddr, &info, sizeof(info))) -@@ -945,12 +955,19 @@ int dev_ethtool(struct net *net, struct ifreq *ifr) - if (!dev || !netif_device_present(dev)) - return -ENODEV; - -- if (!dev->ethtool_ops) -- return -EOPNOTSUPP; -- -- if (copy_from_user(ðcmd, useraddr, sizeof (ethcmd))) -+ if (copy_from_user(ðcmd, useraddr, sizeof(ethcmd))) - return -EFAULT; - -+ if (!dev->ethtool_ops) { -+ /* ETHTOOL_GDRVINFO does not require any driver support. -+ * It is also unprivileged and does not change anything, -+ * so we can take a shortcut to it. */ -+ if (ethcmd == ETHTOOL_GDRVINFO) -+ return ethtool_get_drvinfo(dev, useraddr); -+ else -+ return -EOPNOTSUPP; -+ } -+ - /* Allow some commands to be done by anyone */ - switch(ethcmd) { - case ETHTOOL_GDRVINFO: -diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c -index d4fd895..4ab8c97 100644 ---- a/net/core/rtnetlink.c -+++ b/net/core/rtnetlink.c -@@ -35,6 +35,7 @@ - #include <linux/security.h> - #include <linux/mutex.h> - #include <linux/if_addr.h> -+#include <linux/pci.h> - - #include <asm/uaccess.h> - #include <asm/system.h> -@@ -582,6 +583,22 @@ static void copy_rtnl_link_stats(struct rtnl_link_stats *a, - a->tx_compressed = b->tx_compressed; - }; - -+/* All VF info */ -+static inline int rtnl_vfinfo_size(const struct net_device *dev) -+{ -+ if (dev->dev.parent && dev_is_pci(dev->dev.parent)) { -+ -+ int num_vfs = dev_num_vf(dev->dev.parent); -+ size_t size = nlmsg_total_size(sizeof(struct nlattr)); -+ size += nlmsg_total_size(num_vfs * sizeof(struct nlattr)); -+ size += num_vfs * (sizeof(struct ifla_vf_mac) + -+ sizeof(struct ifla_vf_vlan) + -+ sizeof(struct ifla_vf_tx_rate)); -+ return size; -+ } else -+ return 0; -+} -+ - static inline size_t if_nlmsg_size(const struct net_device *dev) - { - return NLMSG_ALIGN(sizeof(struct ifinfomsg)) -@@ -599,6 +616,8 @@ static inline size_t if_nlmsg_size(const struct net_device *dev) - + nla_total_size(4) /* IFLA_MASTER */ - + nla_total_size(1) /* IFLA_OPERSTATE */ - + nla_total_size(1) /* IFLA_LINKMODE */ -+ + nla_total_size(4) /* IFLA_NUM_VF */ -+ + rtnl_vfinfo_size(dev) /* IFLA_VFINFO_LIST */ - + rtnl_link_get_size(dev); /* IFLA_LINKINFO */ - } - -@@ -667,6 +686,40 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, - stats = dev_get_stats(dev); - copy_rtnl_link_stats(nla_data(attr), stats); - -+ if (dev->netdev_ops->ndo_get_vf_config && dev->dev.parent) { -+ int i; -+ -+ struct nlattr *vfinfo, *vf; -+ int num_vfs = dev_num_vf(dev->dev.parent); -+ -+ NLA_PUT_U32(skb, IFLA_NUM_VF, num_vfs); -+ vfinfo = nla_nest_start(skb, IFLA_VFINFO_LIST); -+ if (!vfinfo) -+ goto nla_put_failure; -+ for (i = 0; i < num_vfs; i++) { -+ struct ifla_vf_info ivi; -+ struct ifla_vf_mac vf_mac; -+ struct ifla_vf_vlan vf_vlan; -+ struct ifla_vf_tx_rate vf_tx_rate; -+ if (dev->netdev_ops->ndo_get_vf_config(dev, i, &ivi)) -+ break; -+ vf_mac.vf = vf_vlan.vf = vf_tx_rate.vf = ivi.vf; -+ memcpy(vf_mac.mac, ivi.mac, sizeof(ivi.mac)); -+ vf_vlan.vlan = ivi.vlan; -+ vf_vlan.qos = ivi.qos; -+ vf_tx_rate.rate = ivi.tx_rate; -+ vf = nla_nest_start(skb, IFLA_VF_INFO); -+ if (!vf) { -+ nla_nest_cancel(skb, vfinfo); -+ goto nla_put_failure; -+ } -+ NLA_PUT(skb, IFLA_VF_MAC, sizeof(vf_mac), &vf_mac); -+ NLA_PUT(skb, IFLA_VF_VLAN, sizeof(vf_vlan), &vf_vlan); -+ NLA_PUT(skb, IFLA_VF_TX_RATE, sizeof(vf_tx_rate), &vf_tx_rate); -+ nla_nest_end(skb, vf); -+ } -+ nla_nest_end(skb, vfinfo); -+ } - if (dev->rtnl_link_ops) { - if (rtnl_link_fill(skb, dev) < 0) - goto nla_put_failure; -@@ -716,6 +769,7 @@ const struct nla_policy ifla_policy[IFLA_MAX+1] = { - [IFLA_LINKINFO] = { .type = NLA_NESTED }, - [IFLA_NET_NS_PID] = { .type = NLA_U32 }, - [IFLA_IFALIAS] = { .type = NLA_STRING, .len = IFALIASZ-1 }, -+ [IFLA_VFINFO_LIST] = {. type = NLA_NESTED }, - }; - - static const struct nla_policy ifla_info_policy[IFLA_INFO_MAX+1] = { -@@ -723,6 +777,33 @@ static const struct nla_policy ifla_info_policy[IFLA_INFO_MAX+1] = { - [IFLA_INFO_DATA] = { .type = NLA_NESTED }, - }; - -+static const struct nla_policy ifla_vfinfo_policy[IFLA_VF_INFO_MAX+1] = { -+ [IFLA_VF_INFO] = { .type = NLA_NESTED }, -+}; -+ -+static const struct nla_policy ifla_vf_policy[IFLA_VF_MAX+1] = { -+ [IFLA_VF_MAC] = { .type = NLA_BINARY, -+ .len = sizeof(struct ifla_vf_mac) }, -+ [IFLA_VF_VLAN] = { .type = NLA_BINARY, -+ .len = sizeof(struct ifla_vf_vlan) }, -+ [IFLA_VF_TX_RATE] = { .type = NLA_BINARY, -+ .len = sizeof(struct ifla_vf_tx_rate) }, -+}; -+ -+struct net *rtnl_link_get_net(struct net *src_net, struct nlattr *tb[]) -+{ -+ struct net *net; -+ /* Examine the link attributes and figure out which -+ * network namespace we are talking about. -+ */ -+ if (tb[IFLA_NET_NS_PID]) -+ net = get_net_ns_by_pid(nla_get_u32(tb[IFLA_NET_NS_PID])); -+ else -+ net = get_net(src_net); -+ return net; -+} -+EXPORT_SYMBOL(rtnl_link_get_net); -+ - static int validate_linkmsg(struct net_device *dev, struct nlattr *tb[]) - { - if (dev) { -@@ -738,6 +819,52 @@ static int validate_linkmsg(struct net_device *dev, struct nlattr *tb[]) - return 0; - } - -+static int do_setvfinfo(struct net_device *dev, struct nlattr *attr) -+{ -+ int rem, err = -EINVAL; -+ struct nlattr *vf; -+ const struct net_device_ops *ops = dev->netdev_ops; -+ -+ nla_for_each_nested(vf, attr, rem) { -+ switch (nla_type(vf)) { -+ case IFLA_VF_MAC: { -+ struct ifla_vf_mac *ivm; -+ ivm = nla_data(vf); -+ err = -EOPNOTSUPP; -+ if (ops->ndo_set_vf_mac) -+ err = ops->ndo_set_vf_mac(dev, ivm->vf, -+ ivm->mac); -+ break; -+ } -+ case IFLA_VF_VLAN: { -+ struct ifla_vf_vlan *ivv; -+ ivv = nla_data(vf); -+ err = -EOPNOTSUPP; -+ if (ops->ndo_set_vf_vlan) -+ err = ops->ndo_set_vf_vlan(dev, ivv->vf, -+ ivv->vlan, -+ ivv->qos); -+ break; -+ } -+ case IFLA_VF_TX_RATE: { -+ struct ifla_vf_tx_rate *ivt; -+ ivt = nla_data(vf); -+ err = -EOPNOTSUPP; -+ if (ops->ndo_set_vf_tx_rate) -+ err = ops->ndo_set_vf_tx_rate(dev, ivt->vf, -+ ivt->rate); -+ break; -+ } -+ default: -+ err = -EINVAL; -+ break; -+ } -+ if (err) -+ break; -+ } -+ return err; -+} -+ - static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm, - struct nlattr **tb, char *ifname, int modified) - { -@@ -875,6 +1002,18 @@ static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm, - write_unlock_bh(&dev_base_lock); - } - -+ if (tb[IFLA_VFINFO_LIST]) { -+ struct nlattr *attr; -+ int rem; -+ nla_for_each_nested(attr, tb[IFLA_VFINFO_LIST], rem) { -+ if (nla_type(attr) != IFLA_VF_INFO) -+ goto errout; -+ err = do_setvfinfo(dev, attr); -+ if (err < 0) -+ goto errout; -+ modified = 1; -+ } -+ } - err = 0; - - errout: -diff --git a/net/sched/Kconfig b/net/sched/Kconfig -index 929218a..956cd0a 100644 ---- a/net/sched/Kconfig -+++ b/net/sched/Kconfig -@@ -215,6 +215,26 @@ config NET_SCH_INGRESS - To compile this code as a module, choose M here: the - module will be called sch_ingress. - -+config NET_SCH_PLUG -+ tristate "Plug network traffic until release" -+ ---help--- -+ Say Y here if you are using this kernel for Xen dom0 and -+ want to protect Xen guests with Remus. -+ -+ This queueing discipline is controlled by netlink. When it receives an -+ enqueue command it inserts a plug into the outbound queue that causes -+ following packets to enqueue until a dequeue command arrives over -+ netlink, releasing packets up to the plug for delivery. -+ -+ Its intention is to support speculative execution by allowing generated -+ network traffic to be rolled back. It is used to provide network -+ protection for the Remus high availability project. -+ -+ If unsure, say N. -+ -+ To compile this code as a module, choose M here: the -+ module will be called sch_plug. -+ - comment "Classification" - - config NET_CLS -diff --git a/net/sched/Makefile b/net/sched/Makefile -index f14e71b..61ef5f7 100644 ---- a/net/sched/Makefile -+++ b/net/sched/Makefile -@@ -31,6 +31,7 @@ obj-$(CONFIG_NET_SCH_MULTIQ) += sch_multiq.o - obj-$(CONFIG_NET_SCH_ATM) += sch_atm.o - obj-$(CONFIG_NET_SCH_NETEM) += sch_netem.o - obj-$(CONFIG_NET_SCH_DRR) += sch_drr.o -+obj-$(CONFIG_NET_SCH_PLUG) += sch_plug.o - obj-$(CONFIG_NET_CLS_U32) += cls_u32.o - obj-$(CONFIG_NET_CLS_ROUTE4) += cls_route.o - obj-$(CONFIG_NET_CLS_FW) += cls_fw.o -diff --git a/net/sched/sch_plug.c b/net/sched/sch_plug.c -new file mode 100644 -index 0000000..86c3ee1 ---- /dev/null -+++ b/net/sched/sch_plug.c -@@ -0,0 +1,156 @@ -+/* -+ * sch_plug.c Queue traffic until an explicit release command -+ * -+ * This program is free software; you can redistribute it and/or -+ * modify it under the terms of the GNU General Public License -+ * as published by the Free Software Foundation; either version -+ * 2 of the License, or (at your option) any later version. -+ * -+ * The operation of the buffer is as follows: -+ * When a checkpoint begins, a plug is inserted into the -+ * network queue by a netlink request (it operates by storing -+ * a pointer to the next packet which arrives and blocking dequeue -+ * when that packet is at the head of the queue). -+ * When a checkpoint completes (the backup acknowledges receipt), -+ * currently-queued packets are released. -+ * So it supports two operations, plug and unplug. -+ */ -+ -+#include <linux/module.h> -+#include <linux/types.h> -+#include <linux/kernel.h> -+#include <linux/errno.h> -+#include <linux/netdevice.h> -+#include <linux/skbuff.h> -+#include <net/pkt_sched.h> -+ -+#define FIFO_BUF (10*1024*1024) -+ -+#define TCQ_PLUG 0 -+#define TCQ_UNPLUG 1 -+ -+struct plug_sched_data { -+ /* -+ * This packet is the first packet which should not be -+ * delivered. If it is NULL, plug_enqueue will set it to the -+ * next packet it sees. -+ */ -+ struct sk_buff *stop; -+}; -+ -+struct tc_plug_qopt { -+ /* 0: reset stop packet pointer -+ * 1: dequeue to stop pointer */ -+ int action; -+}; -+ -+static int skb_remove_foreign_references(struct sk_buff *skb) -+{ -+ return !skb_linearize(skb); -+} -+ -+static int plug_enqueue(struct sk_buff *skb, struct Qdisc* sch) -+{ -+ struct plug_sched_data *q = qdisc_priv(sch); -+ -+ if (likely(sch->qstats.backlog + skb->len <= FIFO_BUF)) { -+ if (!q->stop) -+ q->stop = skb; -+ -+ if (!skb_remove_foreign_references(skb)) { -+ printk(KERN_DEBUG "error removing foreign ref\n"); -+ return qdisc_reshape_fail(skb, sch); -+ } -+ -+ return qdisc_enqueue_tail(skb, sch); -+ } -+ printk(KERN_WARNING "queue reported full: %d,%d\n", -+ sch->qstats.backlog, skb->len); -+ -+ return qdisc_reshape_fail(skb, sch); -+} -+ -+/* dequeue doesn't actually dequeue until the release command is -+ * received. */ -+static struct sk_buff *plug_dequeue(struct Qdisc* sch) -+{ -+ struct plug_sched_data *q = qdisc_priv(sch); -+ struct sk_buff *peek; -+ -+ if (sch->flags & TCQ_F_THROTTLED) -+ return NULL; -+ -+ peek = (struct sk_buff *)((sch->q).next); -+ -+ /* this pointer comparison may be shady */ -+ if (peek == q->stop) { -+ /* -+ * This is the tail of the last round. Release it and -+ * block the queue -+ */ -+ sch->flags |= TCQ_F_THROTTLED; -+ return NULL; -+ } -+ -+ return qdisc_dequeue_head(sch); -+} -+ -+static int plug_init(struct Qdisc *sch, struct nlattr *opt) -+{ -+ sch->flags |= TCQ_F_THROTTLED; -+ -+ return 0; -+} -+ -+/* -+ * receives two messages: -+ * 0: checkpoint queue (set stop to next packet) -+ * 1: dequeue until stop -+ */ -+static int plug_change(struct Qdisc *sch, struct nlattr *opt) -+{ -+ struct plug_sched_data *q = qdisc_priv(sch); -+ struct tc_plug_qopt *msg; -+ -+ if (!opt || nla_len(opt) < sizeof(*msg)) -+ return -EINVAL; -+ -+ msg = nla_data(opt); -+ -+ if (msg->action == TCQ_PLUG) { -+ /* reset stop */ -+ q->stop = NULL; -+ } else if (msg->action == TCQ_UNPLUG) { -+ /* dequeue */ -+ sch->flags &= ~TCQ_F_THROTTLED; -+ netif_schedule_queue(sch->dev_queue); -+ } else { -+ return -EINVAL; -+ } -+ -+ return 0; -+} -+ -+struct Qdisc_ops plug_qdisc_ops = { -+ .id = "plug", -+ .priv_size = sizeof(struct plug_sched_data), -+ .enqueue = plug_enqueue, -+ .dequeue = plug_dequeue, -+ .peek = qdisc_peek_head, -+ .init = plug_init, -+ .change = plug_change, -+ .owner = THIS_MODULE, -+}; -+ -+static int __init plug_module_init(void) -+{ -+ return register_qdisc(&plug_qdisc_ops); -+} -+ -+static void __exit plug_module_exit(void) -+{ -+ unregister_qdisc(&plug_qdisc_ops); -+} -+module_init(plug_module_init) -+module_exit(plug_module_exit) -+MODULE_LICENSE("GPL"); |