From f369602f6bfd7345808a67c2427e770e97f44ca5 Mon Sep 17 00:00:00 2001 From: William Pitcock Date: Thu, 17 Feb 2011 19:05:56 -0600 Subject: testing/linux-xen0: dropped, deprecated by upstream dom0 support --- testing/linux-xen0/pvops.patch | 37837 --------------------------------------- 1 file changed, 37837 deletions(-) delete mode 100644 testing/linux-xen0/pvops.patch (limited to 'testing/linux-xen0/pvops.patch') diff --git a/testing/linux-xen0/pvops.patch b/testing/linux-xen0/pvops.patch deleted file mode 100644 index 49969705be..0000000000 --- a/testing/linux-xen0/pvops.patch +++ /dev/null @@ -1,37837 +0,0 @@ -diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt -index 5f6aa11..9ec8558 100644 ---- a/Documentation/kernel-parameters.txt -+++ b/Documentation/kernel-parameters.txt -@@ -113,6 +113,7 @@ parameter is applicable: - More X86-64 boot options can be found in - Documentation/x86/x86_64/boot-options.txt . - X86 Either 32bit or 64bit x86 (same as X86-32+X86-64) -+ XEN Xen support is enabled - - In addition, the following text indicates that the option: - -@@ -2760,6 +2761,18 @@ and is between 256 and 4096 characters. It is defined in the file - xd= [HW,XT] Original XT pre-IDE (RLL encoded) disks. - xd_geo= See header of drivers/block/xd.c. - -+ xen_emul_unplug= [HW,X86,XEN] -+ Unplug Xen emulated devices -+ Format: [unplug0,][unplug1] -+ ide-disks -- unplug primary master IDE devices -+ aux-ide-disks -- unplug non-primary-master IDE devices -+ nics -- unplug network devices -+ all -- unplug all emulated devices (NICs and IDE disks) -+ unnecessary -- unplugging emulated devices is -+ unnecessary even if the host did not respond to -+ the unplug protocol -+ never -- do not unplug even if version check succeeds -+ - xirc2ps_cs= [NET,PCMCIA] - Format: - ,,,,,[,[,[,]]] -diff --git a/Documentation/x86/x86_64/boot-options.txt b/Documentation/x86/x86_64/boot-options.txt -index 29a6ff8..81f9b94 100644 ---- a/Documentation/x86/x86_64/boot-options.txt -+++ b/Documentation/x86/x86_64/boot-options.txt -@@ -267,10 +267,14 @@ IOMMU (input/output memory management unit) - - iommu options only relevant to the software bounce buffering (SWIOTLB) IOMMU - implementation: -- swiotlb=[,force] -+ swiotlb=[npages=] -+ swiotlb=[force] -+ swiotlb=[overflow=] -+ - Prereserve that many 128K pages for the software IO - bounce buffering. - force Force all IO through the software TLB. -+ Size in bytes of the overflow buffer. - - Settings for the IBM Calgary hardware IOMMU currently found in IBM - pSeries and xSeries machines: -diff --git a/arch/ia64/include/asm/dma-mapping.h b/arch/ia64/include/asm/dma-mapping.h -index 8d3c79c..7d09a09 100644 ---- a/arch/ia64/include/asm/dma-mapping.h -+++ b/arch/ia64/include/asm/dma-mapping.h -@@ -73,7 +73,7 @@ static inline bool dma_capable(struct device *dev, dma_addr_t addr, size_t size) - if (!dev->dma_mask) - return 0; - -- return addr + size <= *dev->dma_mask; -+ return addr + size - 1 <= *dev->dma_mask; - } - - static inline dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr) -diff --git a/arch/ia64/include/asm/swiotlb.h b/arch/ia64/include/asm/swiotlb.h -index dcbaea7..f0acde6 100644 ---- a/arch/ia64/include/asm/swiotlb.h -+++ b/arch/ia64/include/asm/swiotlb.h -@@ -4,8 +4,6 @@ - #include - #include - --extern int swiotlb_force; -- - #ifdef CONFIG_SWIOTLB - extern int swiotlb; - extern void pci_swiotlb_init(void); -diff --git a/arch/ia64/include/asm/xen/events.h b/arch/ia64/include/asm/xen/events.h -index b8370c8..baa74c8 100644 ---- a/arch/ia64/include/asm/xen/events.h -+++ b/arch/ia64/include/asm/xen/events.h -@@ -36,10 +36,6 @@ static inline int xen_irqs_disabled(struct pt_regs *regs) - return !(ia64_psr(regs)->i); - } - --static inline void handle_irq(int irq, struct pt_regs *regs) --{ -- __do_IRQ(irq); --} - #define irq_ctx_init(cpu) do { } while (0) - - #endif /* _ASM_IA64_XEN_EVENTS_H */ -diff --git a/arch/ia64/kernel/pci-swiotlb.c b/arch/ia64/kernel/pci-swiotlb.c -index 285aae8..53292ab 100644 ---- a/arch/ia64/kernel/pci-swiotlb.c -+++ b/arch/ia64/kernel/pci-swiotlb.c -@@ -41,7 +41,7 @@ struct dma_map_ops swiotlb_dma_ops = { - void __init swiotlb_dma_init(void) - { - dma_ops = &swiotlb_dma_ops; -- swiotlb_init(); -+ swiotlb_init(1); - } - - void __init pci_swiotlb_init(void) -@@ -51,7 +51,7 @@ void __init pci_swiotlb_init(void) - swiotlb = 1; - printk(KERN_INFO "PCI-DMA: Re-initialize machine vector.\n"); - machvec_init("dig"); -- swiotlb_init(); -+ swiotlb_init(1); - dma_ops = &swiotlb_dma_ops; - #else - panic("Unable to find Intel IOMMU"); -diff --git a/arch/powerpc/include/asm/dma-mapping.h b/arch/powerpc/include/asm/dma-mapping.h -index e281dae..80a973b 100644 ---- a/arch/powerpc/include/asm/dma-mapping.h -+++ b/arch/powerpc/include/asm/dma-mapping.h -@@ -197,7 +197,7 @@ static inline bool dma_capable(struct device *dev, dma_addr_t addr, size_t size) - if (!dev->dma_mask) - return 0; - -- return addr + size <= *dev->dma_mask; -+ return addr + size - 1 <= *dev->dma_mask; - } - - static inline dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr) -diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c -index 53bcf3d..b152de3 100644 ---- a/arch/powerpc/kernel/setup_32.c -+++ b/arch/powerpc/kernel/setup_32.c -@@ -345,7 +345,7 @@ void __init setup_arch(char **cmdline_p) - - #ifdef CONFIG_SWIOTLB - if (ppc_swiotlb_enable) -- swiotlb_init(); -+ swiotlb_init(1); - #endif - - paging_init(); -diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c -index 04f638d..df2c9e9 100644 ---- a/arch/powerpc/kernel/setup_64.c -+++ b/arch/powerpc/kernel/setup_64.c -@@ -550,7 +550,7 @@ void __init setup_arch(char **cmdline_p) - - #ifdef CONFIG_SWIOTLB - if (ppc_swiotlb_enable) -- swiotlb_init(); -+ swiotlb_init(1); - #endif - - paging_init(); -diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig -index cb5a57c..a3b7475 100644 ---- a/arch/x86/Kconfig -+++ b/arch/x86/Kconfig -@@ -1885,6 +1885,10 @@ config PCI_OLPC - def_bool y - depends on PCI && OLPC && (PCI_GOOLPC || PCI_GOANY) - -+config PCI_XEN -+ bool -+ select SWIOTLB -+ - config PCI_DOMAINS - def_bool y - depends on PCI -diff --git a/arch/x86/include/asm/amd_iommu.h b/arch/x86/include/asm/amd_iommu.h -index 18aa3f8..4413ba4 100644 ---- a/arch/x86/include/asm/amd_iommu.h -+++ b/arch/x86/include/asm/amd_iommu.h -@@ -23,20 +23,16 @@ - #include - - #ifdef CONFIG_AMD_IOMMU --extern int amd_iommu_init(void); - extern int amd_iommu_init_dma_ops(void); - extern int amd_iommu_init_passthrough(void); - extern void amd_iommu_detect(void); - extern irqreturn_t amd_iommu_int_handler(int irq, void *data); - extern void amd_iommu_flush_all_domains(void); - extern void amd_iommu_flush_all_devices(void); --extern void amd_iommu_shutdown(void); - extern void amd_iommu_apply_erratum_63(u16 devid); - extern void amd_iommu_init_api(void); - #else --static inline int amd_iommu_init(void) { return -ENODEV; } - static inline void amd_iommu_detect(void) { } --static inline void amd_iommu_shutdown(void) { } - #endif - - #endif /* _ASM_X86_AMD_IOMMU_H */ -diff --git a/arch/x86/include/asm/calgary.h b/arch/x86/include/asm/calgary.h -index b03bedb..0918654 100644 ---- a/arch/x86/include/asm/calgary.h -+++ b/arch/x86/include/asm/calgary.h -@@ -62,10 +62,8 @@ struct cal_chipset_ops { - extern int use_calgary; - - #ifdef CONFIG_CALGARY_IOMMU --extern int calgary_iommu_init(void); - extern void detect_calgary(void); - #else --static inline int calgary_iommu_init(void) { return 1; } - static inline void detect_calgary(void) { return; } - #endif - -diff --git a/arch/x86/include/asm/dma-mapping.h b/arch/x86/include/asm/dma-mapping.h -index 6a25d5d..ac91eed 100644 ---- a/arch/x86/include/asm/dma-mapping.h -+++ b/arch/x86/include/asm/dma-mapping.h -@@ -20,7 +20,8 @@ - # define ISA_DMA_BIT_MASK DMA_BIT_MASK(32) - #endif - --extern dma_addr_t bad_dma_address; -+#define DMA_ERROR_CODE 0 -+ - extern int iommu_merge; - extern struct device x86_dma_fallback_dev; - extern int panic_on_overflow; -@@ -48,7 +49,7 @@ static inline int dma_mapping_error(struct device *dev, dma_addr_t dma_addr) - if (ops->mapping_error) - return ops->mapping_error(dev, dma_addr); - -- return (dma_addr == bad_dma_address); -+ return (dma_addr == DMA_ERROR_CODE); - } - - #define dma_alloc_noncoherent(d, s, h, f) dma_alloc_coherent(d, s, h, f) -@@ -66,7 +67,7 @@ static inline bool dma_capable(struct device *dev, dma_addr_t addr, size_t size) - if (!dev->dma_mask) - return 0; - -- return addr + size <= *dev->dma_mask; -+ return addr + size - 1 <= *dev->dma_mask; - } - - static inline dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr) -diff --git a/arch/x86/include/asm/e820.h b/arch/x86/include/asm/e820.h -index 40b4e61..fa3fd43 100644 ---- a/arch/x86/include/asm/e820.h -+++ b/arch/x86/include/asm/e820.h -@@ -109,6 +109,8 @@ extern void reserve_early(u64 start, u64 end, char *name); - extern void reserve_early_overlap_ok(u64 start, u64 end, char *name); - extern void free_early(u64 start, u64 end); - extern void early_res_to_bootmem(u64 start, u64 end); -+extern u64 early_res_next_free(u64 start); -+extern u64 early_res_next_reserved(u64 addr, u64 max); - extern u64 early_reserve_e820(u64 startt, u64 sizet, u64 align); - - extern unsigned long e820_end_of_ram_pfn(void); -diff --git a/arch/x86/include/asm/gart.h b/arch/x86/include/asm/gart.h -index 6cfdafa..4ac5b0f 100644 ---- a/arch/x86/include/asm/gart.h -+++ b/arch/x86/include/asm/gart.h -@@ -35,8 +35,7 @@ extern int gart_iommu_aperture_allowed; - extern int gart_iommu_aperture_disabled; - - extern void early_gart_iommu_check(void); --extern void gart_iommu_init(void); --extern void gart_iommu_shutdown(void); -+extern int gart_iommu_init(void); - extern void __init gart_parse_options(char *); - extern void gart_iommu_hole_init(void); - -@@ -48,12 +47,6 @@ extern void gart_iommu_hole_init(void); - static inline void early_gart_iommu_check(void) - { - } --static inline void gart_iommu_init(void) --{ --} --static inline void gart_iommu_shutdown(void) --{ --} - static inline void gart_parse_options(char *options) - { - } -diff --git a/arch/x86/include/asm/hpet.h b/arch/x86/include/asm/hpet.h -index 3251e23..fa152cb 100644 ---- a/arch/x86/include/asm/hpet.h -+++ b/arch/x86/include/asm/hpet.h -@@ -68,6 +68,7 @@ extern unsigned long force_hpet_address; - extern int hpet_force_user; - extern u8 hpet_msi_disable; - extern int is_hpet_enabled(void); -+extern int disable_hpet(char *); - extern int hpet_enable(void); - extern void hpet_disable(void); - extern unsigned long hpet_readl(unsigned long a); -@@ -108,6 +109,7 @@ extern void hpet_unregister_irq_handler(rtc_irq_handler handler); - #else /* CONFIG_HPET_TIMER */ - - static inline int hpet_enable(void) { return 0; } -+static inline int disable_hpet(char *s) { return 0; } - static inline int is_hpet_enabled(void) { return 0; } - #define hpet_readl(a) 0 - -diff --git a/arch/x86/include/asm/hugetlb.h b/arch/x86/include/asm/hugetlb.h -index 439a9ac..bf88684 100644 ---- a/arch/x86/include/asm/hugetlb.h -+++ b/arch/x86/include/asm/hugetlb.h -@@ -36,16 +36,28 @@ static inline void hugetlb_free_pgd_range(struct mmu_gather *tlb, - free_pgd_range(tlb, addr, end, floor, ceiling); - } - -+static inline pte_t huge_ptep_get(pte_t *ptep) -+{ -+ return *ptep; -+} -+ - static inline void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, - pte_t *ptep, pte_t pte) - { -- set_pte_at(mm, addr, ptep, pte); -+#if PAGETABLE_LEVELS >= 3 -+ set_pmd((pmd_t *)ptep, native_make_pmd(native_pte_val(pte))); -+#else -+ set_pgd((pgd_t *)ptep, native_make_pgd(native_pte_val(pte))); -+#endif - } - - static inline pte_t huge_ptep_get_and_clear(struct mm_struct *mm, - unsigned long addr, pte_t *ptep) - { -- return ptep_get_and_clear(mm, addr, ptep); -+ pte_t pte = huge_ptep_get(ptep); -+ -+ set_huge_pte_at(mm, addr, ptep, __pte(0)); -+ return pte; - } - - static inline void huge_ptep_clear_flush(struct vm_area_struct *vma, -@@ -66,19 +78,25 @@ static inline pte_t huge_pte_wrprotect(pte_t pte) - static inline void huge_ptep_set_wrprotect(struct mm_struct *mm, - unsigned long addr, pte_t *ptep) - { -- ptep_set_wrprotect(mm, addr, ptep); -+ pte_t pte = huge_ptep_get(ptep); -+ -+ pte = pte_wrprotect(pte); -+ set_huge_pte_at(mm, addr, ptep, pte); - } - - static inline int huge_ptep_set_access_flags(struct vm_area_struct *vma, - unsigned long addr, pte_t *ptep, - pte_t pte, int dirty) - { -- return ptep_set_access_flags(vma, addr, ptep, pte, dirty); --} -+ pte_t oldpte = huge_ptep_get(ptep); -+ int changed = !pte_same(oldpte, pte); - --static inline pte_t huge_ptep_get(pte_t *ptep) --{ -- return *ptep; -+ if (changed && dirty) { -+ set_huge_pte_at(vma->vm_mm, addr, ptep, pte); -+ flush_tlb_page(vma, addr); -+ } -+ -+ return changed; - } - - static inline int arch_prepare_hugepage(struct page *page) -diff --git a/arch/x86/include/asm/io.h b/arch/x86/include/asm/io.h -index 6a63b86..9ad387e 100644 ---- a/arch/x86/include/asm/io.h -+++ b/arch/x86/include/asm/io.h -@@ -7,6 +7,10 @@ - #include - #include - -+#include -+ -+extern int isapnp_disable; -+ - #define build_mmio_read(name, size, type, reg, barrier) \ - static inline type name(const volatile void __iomem *addr) \ - { type ret; asm volatile("mov" size " %1,%0":reg (ret) \ -@@ -199,6 +203,18 @@ extern void __iomem *early_ioremap(resource_size_t phys_addr, - extern void __iomem *early_memremap(resource_size_t phys_addr, - unsigned long size); - extern void early_iounmap(void __iomem *addr, unsigned long size); -+extern bool is_early_ioremap_ptep(pte_t *ptep); -+ -+#ifdef CONFIG_XEN -+struct bio_vec; -+ -+extern bool xen_biovec_phys_mergeable(const struct bio_vec *vec1, -+ const struct bio_vec *vec2); -+ -+#define BIOVEC_PHYS_MERGEABLE(vec1, vec2) \ -+ (__BIOVEC_PHYS_MERGEABLE(vec1, vec2) && \ -+ (!xen_domain() || xen_biovec_phys_mergeable(vec1, vec2))) -+#endif /* CONFIG_XEN */ - - #define IO_SPACE_LIMIT 0xffff - -diff --git a/arch/x86/include/asm/io_apic.h b/arch/x86/include/asm/io_apic.h -index 5f61f6e..b852da9 100644 ---- a/arch/x86/include/asm/io_apic.h -+++ b/arch/x86/include/asm/io_apic.h -@@ -172,6 +172,7 @@ extern void mask_IO_APIC_setup(struct IO_APIC_route_entry **ioapic_entries); - extern int restore_IO_APIC_setup(struct IO_APIC_route_entry **ioapic_entries); - - extern void probe_nr_irqs_gsi(void); -+extern int get_nr_irqs_gsi(void); - - extern int setup_ioapic_entry(int apic, int irq, - struct IO_APIC_route_entry *entry, -@@ -201,4 +202,6 @@ static inline void probe_nr_irqs_gsi(void) { } - - #endif - -+void xen_io_apic_init(void); -+ - #endif /* _ASM_X86_IO_APIC_H */ -diff --git a/arch/x86/include/asm/iommu.h b/arch/x86/include/asm/iommu.h -index fd6d21b..345c99c 100644 ---- a/arch/x86/include/asm/iommu.h -+++ b/arch/x86/include/asm/iommu.h -@@ -1,8 +1,6 @@ - #ifndef _ASM_X86_IOMMU_H - #define _ASM_X86_IOMMU_H - --extern void pci_iommu_shutdown(void); --extern void no_iommu_init(void); - extern struct dma_map_ops nommu_dma_ops; - extern int force_iommu, no_iommu; - extern int iommu_detected; -diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h -index 6e90a04..ba4dc7b 100644 ---- a/arch/x86/include/asm/irq_vectors.h -+++ b/arch/x86/include/asm/irq_vectors.h -@@ -120,6 +120,12 @@ - */ - #define MCE_SELF_VECTOR 0xeb - -+#ifdef CONFIG_XEN -+/* Xen vector callback to receive events in a HVM domain */ -+#define XEN_HVM_EVTCHN_CALLBACK 0xe9 -+#endif -+ -+ - /* - * First APIC vector available to drivers: (vectors 0x30-0xee) we - * start at 0x31(0x41) to spread out vectors evenly between priority -@@ -157,6 +163,14 @@ static inline int invalid_vm86_irq(int irq) - #define CPU_VECTOR_LIMIT ( 8 * NR_CPUS ) - #define IO_APIC_VECTOR_LIMIT ( 32 * MAX_IO_APICS ) - -+#ifndef __ASSEMBLY__ -+# if defined(CONFIG_X86_IO_APIC) && defined(CONFIG_SPARSE_IRQ) -+extern int nr_dynamic_irqs; -+# else -+# define NR_DYNAMIC_IRQS 256 -+# endif -+#endif -+ - #ifdef CONFIG_X86_IO_APIC - # ifdef CONFIG_SPARSE_IRQ - # define NR_IRQS \ -@@ -165,13 +179,13 @@ static inline int invalid_vm86_irq(int irq) - (NR_VECTORS + IO_APIC_VECTOR_LIMIT)) - # else - # if NR_CPUS < MAX_IO_APICS --# define NR_IRQS (NR_VECTORS + 4*CPU_VECTOR_LIMIT) -+# define NR_IRQS (NR_VECTORS + 4*CPU_VECTOR_LIMIT) + NR_DYNAMIC_IRQS - # else --# define NR_IRQS (NR_VECTORS + IO_APIC_VECTOR_LIMIT) -+# define NR_IRQS (NR_VECTORS + IO_APIC_VECTOR_LIMIT) + NR_DYNAMIC_IRQS - # endif - # endif - #else /* !CONFIG_X86_IO_APIC: */ --# define NR_IRQS NR_IRQS_LEGACY -+# define NR_IRQS NR_IRQS_LEGACY + NR_DYNAMIC_IRQS - #endif - - #endif /* _ASM_X86_IRQ_VECTORS_H */ -diff --git a/arch/x86/include/asm/microcode.h b/arch/x86/include/asm/microcode.h -index ef51b50..e15fca1 100644 ---- a/arch/x86/include/asm/microcode.h -+++ b/arch/x86/include/asm/microcode.h -@@ -55,4 +55,13 @@ static inline struct microcode_ops * __init init_amd_microcode(void) - } - #endif - -+#ifdef CONFIG_MICROCODE_XEN -+extern struct microcode_ops * __init init_xen_microcode(void); -+#else -+static inline struct microcode_ops * __init init_xen_microcode(void) -+{ -+ return NULL; -+} -+#endif -+ - #endif /* _ASM_X86_MICROCODE_H */ -diff --git a/arch/x86/include/asm/mmu.h b/arch/x86/include/asm/mmu.h -index 80a1dee..67eaa91 100644 ---- a/arch/x86/include/asm/mmu.h -+++ b/arch/x86/include/asm/mmu.h -@@ -13,6 +13,9 @@ typedef struct { - int size; - struct mutex lock; - void *vdso; -+#ifdef CONFIG_XEN -+ int has_foreign_mappings; -+#endif - } mm_context_t; - - #ifdef CONFIG_SMP -diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h -index efb3899..e571db4 100644 ---- a/arch/x86/include/asm/paravirt.h -+++ b/arch/x86/include/asm/paravirt.h -@@ -330,11 +330,18 @@ static inline void write_idt_entry(gate_desc *dt, int entry, const gate_desc *g) - { - PVOP_VCALL3(pv_cpu_ops.write_idt_entry, dt, entry, g); - } -+ - static inline void set_iopl_mask(unsigned mask) - { - PVOP_VCALL1(pv_cpu_ops.set_iopl_mask, mask); - } - -+static inline void set_io_bitmap(struct thread_struct *thread, -+ unsigned long bytes_updated) -+{ -+ PVOP_VCALL2(pv_cpu_ops.set_io_bitmap, thread, bytes_updated); -+} -+ - /* The paravirtualized I/O functions */ - static inline void slow_down_io(void) - { -@@ -770,15 +777,28 @@ static __always_inline void __raw_spin_unlock(struct raw_spinlock *lock) - #define PV_RESTORE_REGS "popl %edx; popl %ecx;" - - /* save and restore all caller-save registers, except return value */ --#define PV_SAVE_ALL_CALLER_REGS "pushl %ecx;" --#define PV_RESTORE_ALL_CALLER_REGS "popl %ecx;" -+#define __PV_SAVE_ALL_CALLER_REGS "pushl %ecx;" -+#define __PV_RESTORE_ALL_CALLER_REGS "popl %ecx;" -+ -+#ifdef CONFIG_FRAME_POINTER -+#define PV_SAVE_ALL_CALLER_REGS \ -+ "push %ebp;" \ -+ "mov %esp, %ebp;" \ -+ __PV_SAVE_ALL_CALLER_REGS -+#define PV_RESTORE_ALL_CALLER_REGS \ -+ __PV_RESTORE_ALL_CALLER_REGS \ -+ "leave;" -+#else -+#define PV_SAVE_ALL_CALLER_REGS __PV_SAVE_ALL_CALLER_REGS -+#define PV_RESTORE_ALL_CALLER_REGS __PV_RESTORE_ALL_CALLER_REGS -+#endif - - #define PV_FLAGS_ARG "0" - #define PV_EXTRA_CLOBBERS - #define PV_VEXTRA_CLOBBERS - #else - /* save and restore all caller-save registers, except return value */ --#define PV_SAVE_ALL_CALLER_REGS \ -+#define __PV_SAVE_ALL_CALLER_REGS \ - "push %rcx;" \ - "push %rdx;" \ - "push %rsi;" \ -@@ -787,7 +807,7 @@ static __always_inline void __raw_spin_unlock(struct raw_spinlock *lock) - "push %r9;" \ - "push %r10;" \ - "push %r11;" --#define PV_RESTORE_ALL_CALLER_REGS \ -+#define __PV_RESTORE_ALL_CALLER_REGS \ - "pop %r11;" \ - "pop %r10;" \ - "pop %r9;" \ -@@ -797,6 +817,19 @@ static __always_inline void __raw_spin_unlock(struct raw_spinlock *lock) - "pop %rdx;" \ - "pop %rcx;" - -+#ifdef CONFIG_FRAME_POINTER -+#define PV_SAVE_ALL_CALLER_REGS \ -+ "push %rbp;" \ -+ "mov %rsp, %rbp;" \ -+ __PV_SAVE_ALL_CALLER_REGS -+#define PV_RESTORE_ALL_CALLER_REGS \ -+ __PV_RESTORE_ALL_CALLER_REGS \ -+ "leaveq;" -+#else -+#define PV_SAVE_ALL_CALLER_REGS __PV_SAVE_ALL_CALLER_REGS -+#define PV_RESTORE_ALL_CALLER_REGS __PV_RESTORE_ALL_CALLER_REGS -+#endif -+ - /* We save some registers, but all of them, that's too much. We clobber all - * caller saved registers but the argument parameter */ - #define PV_SAVE_REGS "pushq %%rdi;" -diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h -index 9357473..3202dcc 100644 ---- a/arch/x86/include/asm/paravirt_types.h -+++ b/arch/x86/include/asm/paravirt_types.h -@@ -135,6 +135,8 @@ struct pv_cpu_ops { - void (*load_sp0)(struct tss_struct *tss, struct thread_struct *t); - - void (*set_iopl_mask)(unsigned mask); -+ void (*set_io_bitmap)(struct thread_struct *thread, -+ unsigned long bytes_updated); - - void (*wbinvd)(void); - void (*io_delay)(void); -diff --git a/arch/x86/include/asm/pci.h b/arch/x86/include/asm/pci.h -index ada8c20..faa0af1 100644 ---- a/arch/x86/include/asm/pci.h -+++ b/arch/x86/include/asm/pci.h -@@ -21,6 +21,7 @@ struct pci_sysdata { - extern int pci_routeirq; - extern int noioapicquirk; - extern int noioapicreroute; -+extern int pci_scan_all_fns; - - /* scan a bus after allocating a pci_sysdata for it */ - extern struct pci_bus *pci_scan_bus_on_node(int busno, struct pci_ops *ops, -@@ -49,6 +50,11 @@ extern unsigned int pcibios_assign_all_busses(void); - #define pcibios_assign_all_busses() 0 - #endif - -+static inline int pcibios_scan_all_fns(struct pci_bus *bus, int devfn) -+{ -+ return pci_scan_all_fns; -+} -+ - extern unsigned long pci_mem_start; - #define PCIBIOS_MIN_IO 0x1000 - #define PCIBIOS_MIN_MEM (pci_mem_start) -@@ -87,6 +93,7 @@ extern void pci_iommu_alloc(void); - - /* MSI arch hook */ - #define arch_setup_msi_irqs arch_setup_msi_irqs -+#define arch_teardown_msi_irqs arch_teardown_msi_irqs - - #define PCI_DMA_BUS_IS_PHYS (dma_ops->is_phys) - -@@ -128,6 +135,7 @@ extern void pci_iommu_alloc(void); - #include - - /* generic pci stuff */ -+#define HAVE_ARCH_PCIBIOS_SCAN_ALL_FNS - #include - #define PCIBIOS_MAX_MEM_32 0xffffffff - -diff --git a/arch/x86/include/asm/pci_x86.h b/arch/x86/include/asm/pci_x86.h -index b399988..30cbf49 100644 ---- a/arch/x86/include/asm/pci_x86.h -+++ b/arch/x86/include/asm/pci_x86.h -@@ -45,6 +45,7 @@ enum pci_bf_sort_state { - extern unsigned int pcibios_max_latency; - - void pcibios_resource_survey(void); -+void pcibios_set_cache_line_size(void); - - /* pci-pc.c */ - -@@ -106,6 +107,7 @@ extern int pci_direct_probe(void); - extern void pci_direct_init(int type); - extern void pci_pcbios_init(void); - extern int pci_olpc_init(void); -+extern int pci_xen_init(void); - extern void __init dmi_check_pciprobe(void); - extern void __init dmi_check_skip_isa_align(void); - -diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h -index af6fd36..430e3cc 100644 ---- a/arch/x86/include/asm/pgtable.h -+++ b/arch/x86/include/asm/pgtable.h -@@ -15,7 +15,6 @@ - : (prot)) - - #ifndef __ASSEMBLY__ -- - /* - * ZERO_PAGE is a global shared page that is always zero: used - * for zero-mapped memory areas etc.. -@@ -26,6 +25,8 @@ extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)]; - extern spinlock_t pgd_lock; - extern struct list_head pgd_list; - -+extern struct mm_struct *pgd_page_get_mm(struct page *page); -+ - #ifdef CONFIG_PARAVIRT - #include - #else /* !CONFIG_PARAVIRT */ -@@ -76,6 +77,11 @@ extern struct list_head pgd_list; - - #endif /* CONFIG_PARAVIRT */ - -+static inline pteval_t pte_flags(pte_t pte) -+{ -+ return pte_val(pte) & PTE_FLAGS_MASK; -+} -+ - /* - * The following only work if pte_present() is true. - * Undefined behaviour if not.. -@@ -397,6 +403,9 @@ static inline unsigned long pages_to_mb(unsigned long npg) - #define io_remap_pfn_range(vma, vaddr, pfn, size, prot) \ - remap_pfn_range(vma, vaddr, pfn, size, prot) - -+#define arch_vm_get_page_prot arch_vm_get_page_prot -+extern pgprot_t arch_vm_get_page_prot(unsigned vm_flags); -+ - #if PAGETABLE_LEVELS > 2 - static inline int pud_none(pud_t pud) - { -@@ -616,6 +625,9 @@ static inline void clone_pgd_range(pgd_t *dst, pgd_t *src, int count) - memcpy(dst, src, count * sizeof(pgd_t)); - } - -+int create_lookup_pte_addr(struct mm_struct *mm, -+ unsigned long address, -+ uint64_t *ptep); - - #include - #endif /* __ASSEMBLY__ */ -diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h -index c57a301..4e46931 100644 ---- a/arch/x86/include/asm/pgtable_64.h -+++ b/arch/x86/include/asm/pgtable_64.h -@@ -160,7 +160,7 @@ extern void cleanup_highmap(void); - #define pgtable_cache_init() do { } while (0) - #define check_pgt_cache() do { } while (0) - --#define PAGE_AGP PAGE_KERNEL_NOCACHE -+#define PAGE_AGP PAGE_KERNEL_IO_NOCACHE - #define HAVE_PAGE_AGP 1 - - /* fs/proc/kcore.c */ -diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h -index d1f4a76..a81b0ed 100644 ---- a/arch/x86/include/asm/pgtable_types.h -+++ b/arch/x86/include/asm/pgtable_types.h -@@ -265,11 +265,6 @@ static inline pteval_t native_pte_val(pte_t pte) - return pte.pte; - } - --static inline pteval_t pte_flags(pte_t pte) --{ -- return native_pte_val(pte) & PTE_FLAGS_MASK; --} -- - #define pgprot_val(x) ((x).pgprot) - #define __pgprot(x) ((pgprot_t) { (x) } ) - -diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h -index 78bb4d7..2232bd2 100644 ---- a/arch/x86/include/asm/processor.h -+++ b/arch/x86/include/asm/processor.h -@@ -551,6 +551,9 @@ static inline void native_set_iopl_mask(unsigned mask) - #endif - } - -+extern void native_set_io_bitmap(struct thread_struct *thread, -+ unsigned long updated_bytes); -+ - static inline void - native_load_sp0(struct tss_struct *tss, struct thread_struct *thread) - { -@@ -592,6 +595,7 @@ static inline void load_sp0(struct tss_struct *tss, - } - - #define set_iopl_mask native_set_iopl_mask -+#define set_io_bitmap native_set_io_bitmap - #endif /* CONFIG_PARAVIRT */ - - /* -diff --git a/arch/x86/include/asm/pvclock.h b/arch/x86/include/asm/pvclock.h -index 53235fd..daaacab 100644 ---- a/arch/x86/include/asm/pvclock.h -+++ b/arch/x86/include/asm/pvclock.h -@@ -10,5 +10,6 @@ unsigned long pvclock_tsc_khz(struct pvclock_vcpu_time_info *src); - void pvclock_read_wallclock(struct pvclock_wall_clock *wall, - struct pvclock_vcpu_time_info *vcpu, - struct timespec *ts); -+void pvclock_resume(void); - - #endif /* _ASM_X86_PVCLOCK_H */ -diff --git a/arch/x86/include/asm/setup.h b/arch/x86/include/asm/setup.h -index 18e496c..154a5f1 100644 ---- a/arch/x86/include/asm/setup.h -+++ b/arch/x86/include/asm/setup.h -@@ -95,6 +95,11 @@ void *extend_brk(size_t size, size_t align); - : : "i" (sz)); \ - } - -+/* Helper for reserving space for arrays of things */ -+#define RESERVE_BRK_ARRAY(type, name, entries) \ -+ type *name; \ -+ RESERVE_BRK(name, sizeof(type) * entries) -+ - #ifdef __i386__ - - void __init i386_start_kernel(void); -diff --git a/arch/x86/include/asm/swiotlb.h b/arch/x86/include/asm/swiotlb.h -index b9e4e20..8085277 100644 ---- a/arch/x86/include/asm/swiotlb.h -+++ b/arch/x86/include/asm/swiotlb.h -@@ -3,15 +3,16 @@ - - #include - --/* SWIOTLB interface */ -- --extern int swiotlb_force; -- - #ifdef CONFIG_SWIOTLB - extern int swiotlb; --extern void pci_swiotlb_init(void); -+extern int __init pci_swiotlb_detect(void); -+extern void __init pci_swiotlb_init(void); - #else - #define swiotlb 0 -+static inline int pci_swiotlb_detect(void) -+{ -+ return 0; -+} - static inline void pci_swiotlb_init(void) - { - } -diff --git a/arch/x86/include/asm/syscalls.h b/arch/x86/include/asm/syscalls.h -index 1bb6e39..ef0fa4d 100644 ---- a/arch/x86/include/asm/syscalls.h -+++ b/arch/x86/include/asm/syscalls.h -@@ -33,11 +33,11 @@ long sys_rt_sigreturn(struct pt_regs *); - asmlinkage int sys_set_thread_area(struct user_desc __user *); - asmlinkage int sys_get_thread_area(struct user_desc __user *); - --/* X86_32 only */ --#ifdef CONFIG_X86_32 - /* kernel/ioport.c */ --long sys_iopl(struct pt_regs *); -+asmlinkage long sys_iopl(unsigned int); - -+/* X86_32 only */ -+#ifdef CONFIG_X86_32 - /* kernel/process_32.c */ - int sys_clone(struct pt_regs *); - int sys_execve(struct pt_regs *); -@@ -68,8 +68,6 @@ int sys_vm86(struct pt_regs *); - #else /* CONFIG_X86_32 */ - - /* X86_64 only */ --/* kernel/ioport.c */ --asmlinkage long sys_iopl(unsigned int, struct pt_regs *); - - /* kernel/process_64.c */ - asmlinkage long sys_clone(unsigned long, unsigned long, -diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h -index 7f3eba0..e4fc8ea 100644 ---- a/arch/x86/include/asm/tlbflush.h -+++ b/arch/x86/include/asm/tlbflush.h -@@ -89,6 +89,10 @@ static inline void __flush_tlb_one(unsigned long addr) - - #ifndef CONFIG_SMP - -+static inline void __init init_smp_flush(void) -+{ -+} -+ - #define flush_tlb() __flush_tlb() - #define flush_tlb_all() __flush_tlb_all() - #define local_flush_tlb() __flush_tlb() -@@ -129,6 +133,8 @@ static inline void reset_lazy_tlbstate(void) - - #define local_flush_tlb() __flush_tlb() - -+extern void init_smp_flush(void); -+ - extern void flush_tlb_all(void); - extern void flush_tlb_current_task(void); - extern void flush_tlb_mm(struct mm_struct *); -diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h -index 2c756fd..d8e7145 100644 ---- a/arch/x86/include/asm/x86_init.h -+++ b/arch/x86/include/asm/x86_init.h -@@ -91,6 +91,14 @@ struct x86_init_timers { - }; - - /** -+ * struct x86_init_iommu - platform specific iommu setup -+ * @iommu_init: platform specific iommu setup -+ */ -+struct x86_init_iommu { -+ int (*iommu_init)(void); -+}; -+ -+/** - * struct x86_init_ops - functions for platform specific setup - * - */ -@@ -101,6 +109,7 @@ struct x86_init_ops { - struct x86_init_oem oem; - struct x86_init_paging paging; - struct x86_init_timers timers; -+ struct x86_init_iommu iommu; - }; - - /** -@@ -121,6 +130,7 @@ struct x86_platform_ops { - unsigned long (*calibrate_tsc)(void); - unsigned long (*get_wallclock)(void); - int (*set_wallclock)(unsigned long nowtime); -+ void (*iommu_shutdown)(void); - }; - - extern struct x86_init_ops x86_init; -diff --git a/arch/x86/include/asm/xen/hypercall.h b/arch/x86/include/asm/xen/hypercall.h -index 9c371e4..41c4be0 100644 ---- a/arch/x86/include/asm/xen/hypercall.h -+++ b/arch/x86/include/asm/xen/hypercall.h -@@ -45,6 +45,8 @@ - #include - #include - #include -+#include -+#include - - /* - * The hypercall asms have to meet several constraints: -@@ -200,6 +202,23 @@ extern struct { char _entry[32]; } hypercall_page[]; - (type)__res; \ - }) - -+static inline long -+privcmd_call(unsigned call, -+ unsigned long a1, unsigned long a2, -+ unsigned long a3, unsigned long a4, -+ unsigned long a5) -+{ -+ __HYPERCALL_DECLS; -+ __HYPERCALL_5ARG(a1, a2, a3, a4, a5); -+ -+ asm volatile("call *%[call]" -+ : __HYPERCALL_5PARAM -+ : [call] "a" (&hypercall_page[call]) -+ : __HYPERCALL_CLOBBER5); -+ -+ return (long)__res; -+} -+ - static inline int - HYPERVISOR_set_trap_table(struct trap_info *table) - { -@@ -282,6 +301,20 @@ HYPERVISOR_set_timer_op(u64 timeout) - } - - static inline int -+HYPERVISOR_mca(struct xen_mc *mc_op) -+{ -+ mc_op->interface_version = XEN_MCA_INTERFACE_VERSION; -+ return _hypercall1(int, mca, mc_op); -+} -+ -+static inline int -+HYPERVISOR_dom0_op(struct xen_platform_op *platform_op) -+{ -+ platform_op->interface_version = XENPF_INTERFACE_VERSION; -+ return _hypercall1(int, dom0_op, platform_op); -+} -+ -+static inline int - HYPERVISOR_set_debugreg(int reg, unsigned long value) - { - return _hypercall2(int, set_debugreg, reg, value); -@@ -417,6 +450,12 @@ HYPERVISOR_nmi_op(unsigned long op, unsigned long arg) - return _hypercall2(int, nmi_op, op, arg); - } - -+static inline unsigned long __must_check -+HYPERVISOR_hvm_op(int op, void *arg) -+{ -+ return _hypercall2(unsigned long, hvm_op, op, arg); -+} -+ - static inline void - MULTI_fpu_taskswitch(struct multicall_entry *mcl, int set) - { -@@ -424,6 +463,14 @@ MULTI_fpu_taskswitch(struct multicall_entry *mcl, int set) - mcl->args[0] = set; - } - -+#if defined(CONFIG_X86_64) -+#define MULTI_UVMFLAGS_INDEX 2 -+#define MULTI_UVMDOMID_INDEX 3 -+#else -+#define MULTI_UVMFLAGS_INDEX 3 -+#define MULTI_UVMDOMID_INDEX 4 -+#endif -+ - static inline void - MULTI_update_va_mapping(struct multicall_entry *mcl, unsigned long va, - pte_t new_val, unsigned long flags) -@@ -432,12 +479,11 @@ MULTI_update_va_mapping(struct multicall_entry *mcl, unsigned long va, - mcl->args[0] = va; - if (sizeof(new_val) == sizeof(long)) { - mcl->args[1] = new_val.pte; -- mcl->args[2] = flags; - } else { - mcl->args[1] = new_val.pte; - mcl->args[2] = new_val.pte >> 32; -- mcl->args[3] = flags; - } -+ mcl->args[MULTI_UVMFLAGS_INDEX] = flags; - } - - static inline void -diff --git a/arch/x86/include/asm/xen/hypervisor.h b/arch/x86/include/asm/xen/hypervisor.h -index d5b7e90..396ff4c 100644 ---- a/arch/x86/include/asm/xen/hypervisor.h -+++ b/arch/x86/include/asm/xen/hypervisor.h -@@ -37,31 +37,4 @@ - extern struct shared_info *HYPERVISOR_shared_info; - extern struct start_info *xen_start_info; - --enum xen_domain_type { -- XEN_NATIVE, /* running on bare hardware */ -- XEN_PV_DOMAIN, /* running in a PV domain */ -- XEN_HVM_DOMAIN, /* running in a Xen hvm domain */ --}; -- --#ifdef CONFIG_XEN --extern enum xen_domain_type xen_domain_type; --#else --#define xen_domain_type XEN_NATIVE --#endif -- --#define xen_domain() (xen_domain_type != XEN_NATIVE) --#define xen_pv_domain() (xen_domain() && \ -- xen_domain_type == XEN_PV_DOMAIN) --#define xen_hvm_domain() (xen_domain() && \ -- xen_domain_type == XEN_HVM_DOMAIN) -- --#ifdef CONFIG_XEN_DOM0 --#include -- --#define xen_initial_domain() (xen_pv_domain() && \ -- xen_start_info->flags & SIF_INITDOMAIN) --#else /* !CONFIG_XEN_DOM0 */ --#define xen_initial_domain() (0) --#endif /* CONFIG_XEN_DOM0 */ -- - #endif /* _ASM_X86_XEN_HYPERVISOR_H */ -diff --git a/arch/x86/include/asm/xen/interface.h b/arch/x86/include/asm/xen/interface.h -index e8506c1..9539998 100644 ---- a/arch/x86/include/asm/xen/interface.h -+++ b/arch/x86/include/asm/xen/interface.h -@@ -61,9 +61,9 @@ DEFINE_GUEST_HANDLE(void); - #define HYPERVISOR_VIRT_START mk_unsigned_long(__HYPERVISOR_VIRT_START) - #endif - --#ifndef machine_to_phys_mapping --#define machine_to_phys_mapping ((unsigned long *)HYPERVISOR_VIRT_START) --#endif -+#define MACH2PHYS_VIRT_START mk_unsigned_long(__MACH2PHYS_VIRT_START) -+#define MACH2PHYS_VIRT_END mk_unsigned_long(__MACH2PHYS_VIRT_END) -+#define MACH2PHYS_NR_ENTRIES ((MACH2PHYS_VIRT_END-MACH2PHYS_VIRT_START)>>__MACH2PHYS_SHIFT) - - /* Maximum number of virtual CPUs in multi-processor guests. */ - #define MAX_VIRT_CPUS 32 -@@ -97,6 +97,8 @@ DEFINE_GUEST_HANDLE(void); - #define TI_SET_IF(_ti, _if) ((_ti)->flags |= ((!!(_if))<<2)) - - #ifndef __ASSEMBLY__ -+#include -+ - struct trap_info { - uint8_t vector; /* exception vector */ - uint8_t flags; /* 0-3: privilege level; 4: clear event enable? */ -diff --git a/arch/x86/include/asm/xen/interface_32.h b/arch/x86/include/asm/xen/interface_32.h -index 42a7e00..8413688 100644 ---- a/arch/x86/include/asm/xen/interface_32.h -+++ b/arch/x86/include/asm/xen/interface_32.h -@@ -32,6 +32,11 @@ - /* And the trap vector is... */ - #define TRAP_INSTR "int $0x82" - -+#define __MACH2PHYS_VIRT_START 0xF5800000 -+#define __MACH2PHYS_VIRT_END 0xF6800000 -+ -+#define __MACH2PHYS_SHIFT 2 -+ - /* - * Virtual addresses beyond this are not modifiable by guest OSes. The - * machine->physical mapping table starts at this address, read-only. -diff --git a/arch/x86/include/asm/xen/interface_64.h b/arch/x86/include/asm/xen/interface_64.h -index 100d266..839a481 100644 ---- a/arch/x86/include/asm/xen/interface_64.h -+++ b/arch/x86/include/asm/xen/interface_64.h -@@ -39,18 +39,7 @@ - #define __HYPERVISOR_VIRT_END 0xFFFF880000000000 - #define __MACH2PHYS_VIRT_START 0xFFFF800000000000 - #define __MACH2PHYS_VIRT_END 0xFFFF804000000000 -- --#ifndef HYPERVISOR_VIRT_START --#define HYPERVISOR_VIRT_START mk_unsigned_long(__HYPERVISOR_VIRT_START) --#define HYPERVISOR_VIRT_END mk_unsigned_long(__HYPERVISOR_VIRT_END) --#endif -- --#define MACH2PHYS_VIRT_START mk_unsigned_long(__MACH2PHYS_VIRT_START) --#define MACH2PHYS_VIRT_END mk_unsigned_long(__MACH2PHYS_VIRT_END) --#define MACH2PHYS_NR_ENTRIES ((MACH2PHYS_VIRT_END-MACH2PHYS_VIRT_START)>>3) --#ifndef machine_to_phys_mapping --#define machine_to_phys_mapping ((unsigned long *)HYPERVISOR_VIRT_START) --#endif -+#define __MACH2PHYS_SHIFT 3 - - /* - * int HYPERVISOR_set_segment_base(unsigned int which, unsigned long base) -diff --git a/arch/x86/include/asm/xen/iommu.h b/arch/x86/include/asm/xen/iommu.h -new file mode 100644 -index 0000000..75df312 ---- /dev/null -+++ b/arch/x86/include/asm/xen/iommu.h -@@ -0,0 +1,12 @@ -+#ifndef ASM_X86__XEN_IOMMU_H -+ -+#ifdef CONFIG_PCI_XEN -+extern void xen_iommu_init(void); -+#else -+static inline void xen_iommu_init(void) -+{ -+} -+#endif -+ -+#endif -+ -diff --git a/arch/x86/include/asm/xen/page.h b/arch/x86/include/asm/xen/page.h -index 018a0a4..05c5cf5 100644 ---- a/arch/x86/include/asm/xen/page.h -+++ b/arch/x86/include/asm/xen/page.h -@@ -5,6 +5,7 @@ - #include - #include - #include -+#include - - #include - #include -@@ -28,23 +29,32 @@ typedef struct xpaddr { - - /**** MACHINE <-> PHYSICAL CONVERSION MACROS ****/ - #define INVALID_P2M_ENTRY (~0UL) --#define FOREIGN_FRAME_BIT (1UL<<31) -+#define FOREIGN_FRAME_BIT (1UL << (sizeof(unsigned long) * 8 - 1)) - #define FOREIGN_FRAME(m) ((m) | FOREIGN_FRAME_BIT) - - /* Maximum amount of memory we can handle in a domain in pages */ - #define MAX_DOMAIN_PAGES \ - ((unsigned long)((u64)CONFIG_XEN_MAX_DOMAIN_MEMORY * 1024 * 1024 * 1024 / PAGE_SIZE)) - -+extern unsigned long *machine_to_phys_mapping; -+extern unsigned int machine_to_phys_order; - - extern unsigned long get_phys_to_machine(unsigned long pfn); --extern void set_phys_to_machine(unsigned long pfn, unsigned long mfn); -+extern bool set_phys_to_machine(unsigned long pfn, unsigned long mfn); - - static inline unsigned long pfn_to_mfn(unsigned long pfn) - { -+ unsigned long mfn; -+ - if (xen_feature(XENFEAT_auto_translated_physmap)) - return pfn; - -- return get_phys_to_machine(pfn) & ~FOREIGN_FRAME_BIT; -+ mfn = get_phys_to_machine(pfn); -+ -+ if (mfn != INVALID_P2M_ENTRY) -+ mfn &= ~FOREIGN_FRAME_BIT; -+ -+ return mfn; - } - - static inline int phys_to_machine_mapping_valid(unsigned long pfn) -@@ -62,10 +72,8 @@ static inline unsigned long mfn_to_pfn(unsigned long mfn) - if (xen_feature(XENFEAT_auto_translated_physmap)) - return mfn; - --#if 0 - if (unlikely((mfn >> machine_to_phys_order) != 0)) -- return max_mapnr; --#endif -+ return ~0; - - pfn = 0; - /* -@@ -112,13 +120,9 @@ static inline xpaddr_t machine_to_phys(xmaddr_t machine) - */ - static inline unsigned long mfn_to_local_pfn(unsigned long mfn) - { -- extern unsigned long max_mapnr; - unsigned long pfn = mfn_to_pfn(mfn); -- if ((pfn < max_mapnr) -- && !xen_feature(XENFEAT_auto_translated_physmap) -- && (get_phys_to_machine(pfn) != mfn)) -- return max_mapnr; /* force !pfn_valid() */ -- /* XXX fixme; not true with sparsemem */ -+ if (get_phys_to_machine(pfn) != mfn) -+ return -1; /* force !pfn_valid() */ - return pfn; - } - -@@ -163,6 +167,7 @@ static inline pte_t __pte_ma(pteval_t x) - - #define pgd_val_ma(x) ((x).pgd) - -+void xen_set_domain_pte(pte_t *ptep, pte_t pteval, unsigned domid); - - xmaddr_t arbitrary_virt_to_machine(void *address); - unsigned long arbitrary_virt_to_mfn(void *vaddr); -diff --git a/arch/x86/include/asm/xen/pci.h b/arch/x86/include/asm/xen/pci.h -new file mode 100644 -index 0000000..6683196 ---- /dev/null -+++ b/arch/x86/include/asm/xen/pci.h -@@ -0,0 +1,104 @@ -+#ifndef _ASM_X86_XEN_PCI_H -+#define _ASM_X86_XEN_PCI_H -+ -+#if defined(CONFIG_PCI_MSI) -+#if defined(CONFIG_PCI_XEN) -+int xen_register_pirq(u32 gsi, int triggering); -+int xen_register_gsi(u32 gsi, int triggering, int polarity); -+int xen_create_msi_irq(struct pci_dev *dev, -+ struct msi_desc *msidesc, -+ int type); -+void xen_pci_teardown_msi_dev(struct pci_dev *dev); -+void xen_pci_teardown_msi_irq(int irq); -+int xen_pci_setup_msi_irqs(struct pci_dev *dev, int nvec, int type); -+ -+/* The drivers/pci/xen-pcifront.c sets this structure to -+ * its own functions. -+ */ -+struct xen_pci_frontend_ops { -+ int (*enable_msi)(struct pci_dev *dev, int **vectors); -+ void (*disable_msi)(struct pci_dev *dev); -+ int (*enable_msix)(struct pci_dev *dev, int **vectors, int nvec); -+ void (*disable_msix)(struct pci_dev *dev); -+}; -+ -+extern struct xen_pci_frontend_ops *xen_pci_frontend; -+ -+static inline int xen_pci_frontend_enable_msi(struct pci_dev *dev, -+ int **vectors) -+{ -+ if (xen_pci_frontend && xen_pci_frontend->enable_msi) -+ return xen_pci_frontend->enable_msi(dev, vectors); -+ return -ENODEV; -+} -+static inline void xen_pci_frontend_disable_msi(struct pci_dev *dev) -+{ -+ if (xen_pci_frontend && xen_pci_frontend->disable_msi) -+ xen_pci_frontend->disable_msi(dev); -+} -+static inline int xen_pci_frontend_enable_msix(struct pci_dev *dev, -+ int **vectors, int nvec) -+{ -+ if (xen_pci_frontend && xen_pci_frontend->enable_msix) -+ return xen_pci_frontend->enable_msix(dev, vectors, nvec); -+ return -ENODEV; -+} -+static inline void xen_pci_frontend_disable_msix(struct pci_dev *dev) -+{ -+ if (xen_pci_frontend && xen_pci_frontend->disable_msix) -+ xen_pci_frontend->disable_msix(dev); -+} -+#else -+static inline int xen_create_msi_irq(struct pci_dev *dev, -+ struct msi_desc *msidesc, -+ int type) -+{ -+ return -1; -+} -+static inline void xen_pci_teardown_msi_dev(struct pci_dev *dev) { } -+static inline void xen_pci_teardown_msi_irq(int irq) { } -+static inline int xen_pci_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) -+{ -+ return -ENODEV; -+} -+#endif /* CONFIG_PCI_XEN */ -+ -+#endif /* CONFIG_PCI_MSI */ -+ -+#ifdef CONFIG_XEN_DOM0_PCI -+int xen_register_gsi(u32 gsi, int triggering, int polarity); -+int xen_find_device_domain_owner(struct pci_dev *dev); -+int xen_register_device_domain_owner(struct pci_dev *dev, uint16_t domain); -+int xen_unregister_device_domain_owner(struct pci_dev *dev); -+ -+#else -+static inline int xen_register_gsi(u32 gsi, int triggering, int polarity) -+{ -+ return -1; -+} -+ -+static inline int xen_find_device_domain_owner(struct pci_dev *dev) -+{ -+ return -1; -+} -+static inline int xen_register_device_domain_owner(struct pci_dev *dev, -+ uint16_t domain) -+{ -+ return -1; -+} -+static inline int xen_unregister_device_domain_owner(struct pci_dev *dev) -+{ -+ return -1; -+} -+#endif -+ -+#if defined(CONFIG_PCI_MSI) && defined(CONFIG_XEN_DOM0_PCI) -+int xen_setup_msi_irqs(struct pci_dev *dev, int nvec, int type); -+#else -+static inline int xen_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) -+{ -+ return -1; -+} -+#endif -+ -+#endif /* _ASM_X86_XEN_PCI_H */ -diff --git a/arch/x86/include/asm/xen/swiotlb-xen.h b/arch/x86/include/asm/xen/swiotlb-xen.h -new file mode 100644 -index 0000000..e4fe299 ---- /dev/null -+++ b/arch/x86/include/asm/xen/swiotlb-xen.h -@@ -0,0 +1,14 @@ -+#ifndef _ASM_X86_SWIOTLB_XEN_H -+#define _ASM_X86_SWIOTLB_XEN_H -+ -+#ifdef CONFIG_PCI_XEN -+extern int xen_swiotlb; -+extern int __init pci_xen_swiotlb_detect(void); -+extern void __init pci_xen_swiotlb_init(void); -+#else -+#define xen_swiotlb 0 -+static inline int __init pci_xen_swiotlb_detect(void) { return 0; } -+static inline void __init pci_xen_swiotlb_init(void) { } -+#endif -+ -+#endif /* _ASM_X86_SWIOTLB_XEN_H */ -diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile -index d1911ab..cfe00bc 100644 ---- a/arch/x86/kernel/Makefile -+++ b/arch/x86/kernel/Makefile -@@ -113,6 +113,7 @@ obj-$(CONFIG_X86_MRST) += mrst.o - microcode-y := microcode_core.o - microcode-$(CONFIG_MICROCODE_INTEL) += microcode_intel.o - microcode-$(CONFIG_MICROCODE_AMD) += microcode_amd.o -+microcode-$(CONFIG_MICROCODE_XEN) += microcode_xen.o - obj-$(CONFIG_MICROCODE) += microcode.o - - obj-$(CONFIG_X86_CHECK_BIOS_CORRUPTION) += check.o -diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c -index 23c2da8..a2a5125 100644 ---- a/arch/x86/kernel/acpi/boot.c -+++ b/arch/x86/kernel/acpi/boot.c -@@ -42,6 +42,10 @@ - #include - #include - -+#include -+ -+#include -+ - static int __initdata acpi_force = 0; - u32 acpi_rsdt_forced; - int acpi_disabled; -@@ -149,6 +153,10 @@ static void __cpuinit acpi_register_lapic(int id, u8 enabled) - { - unsigned int ver = 0; - -+ /* We don't want to register lapics when in Xen dom0 */ -+ if (xen_initial_domain()) -+ return; -+ - if (!enabled) { - ++disabled_cpus; - return; -@@ -461,9 +469,13 @@ int acpi_gsi_to_irq(u32 gsi, unsigned int *irq) - */ - int acpi_register_gsi(struct device *dev, u32 gsi, int trigger, int polarity) - { -- unsigned int irq; -+ int irq; - unsigned int plat_gsi = gsi; - -+ irq = xen_register_gsi(gsi, trigger, polarity); -+ if (irq >= 0) -+ return irq; -+ - #ifdef CONFIG_PCI - /* - * Make sure all (legacy) PCI IRQs are set as level-triggered. -@@ -740,6 +752,10 @@ static int __init acpi_parse_fadt(struct acpi_table_header *table) - - static void __init acpi_register_lapic_address(unsigned long address) - { -+ /* Xen dom0 doesn't have usable lapics */ -+ if (xen_initial_domain()) -+ return; -+ - mp_lapic_addr = address; - - set_fixmap_nocache(FIX_APIC_BASE, address); -@@ -860,6 +876,9 @@ int __init acpi_probe_gsi(void) - max_gsi = gsi; - } - -+ if (xen_initial_domain()) -+ max_gsi += 255; /* Plus maximum entries of an ioapic. */ -+ - return max_gsi + 1; - } - -diff --git a/arch/x86/kernel/acpi/processor.c b/arch/x86/kernel/acpi/processor.c -index d85d1b2..8aabedd 100644 ---- a/arch/x86/kernel/acpi/processor.c -+++ b/arch/x86/kernel/acpi/processor.c -@@ -12,6 +12,8 @@ - #include - #include - -+#include -+ - static void init_intel_pdc(struct acpi_processor *pr, struct cpuinfo_x86 *c) - { - struct acpi_object_list *obj_list; -@@ -59,7 +61,7 @@ static void init_intel_pdc(struct acpi_processor *pr, struct cpuinfo_x86 *c) - /* - * If mwait/monitor is unsupported, C2/C3_FFH will be disabled - */ -- if (!cpu_has(c, X86_FEATURE_MWAIT)) -+ if (!cpu_has(c, X86_FEATURE_MWAIT) && !xen_initial_domain()) - buf[2] &= ~(ACPI_PDC_C_C2C3_FFH); - - obj->type = ACPI_TYPE_BUFFER; -@@ -88,6 +90,19 @@ void arch_acpi_processor_init_pdc(struct acpi_processor *pr) - - EXPORT_SYMBOL(arch_acpi_processor_init_pdc); - -+/* Initialize _PDC data based on the CPU vendor */ -+void xen_arch_acpi_processor_init_pdc(struct acpi_processor *pr) -+{ -+ struct cpuinfo_x86 *c = &cpu_data(0); -+ -+ pr->pdc = NULL; -+ if (c->x86_vendor == X86_VENDOR_INTEL) -+ init_intel_pdc(pr, c); -+ -+ return; -+} -+EXPORT_SYMBOL(xen_arch_acpi_processor_init_pdc); -+ - void arch_acpi_processor_cleanup_pdc(struct acpi_processor *pr) - { - if (pr->pdc) { -diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c -index ca93638..9eff23c 100644 ---- a/arch/x86/kernel/acpi/sleep.c -+++ b/arch/x86/kernel/acpi/sleep.c -@@ -12,6 +12,8 @@ - #include - #include - -+#include -+ - #include "realmode/wakeup.h" - #include "sleep.h" - -diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c -index 7cd33f7..b8497c6 100644 ---- a/arch/x86/kernel/amd_iommu.c -+++ b/arch/x86/kernel/amd_iommu.c -@@ -928,7 +928,7 @@ static unsigned long dma_ops_alloc_addresses(struct device *dev, - } - - if (unlikely(address == -1)) -- address = bad_dma_address; -+ address = DMA_ERROR_CODE; - - WARN_ON((address + (PAGE_SIZE*pages)) > dom->aperture_size); - -@@ -1545,7 +1545,7 @@ static dma_addr_t dma_ops_domain_map(struct amd_iommu *iommu, - - pte = dma_ops_get_pte(dom, address); - if (!pte) -- return bad_dma_address; -+ return DMA_ERROR_CODE; - - __pte = paddr | IOMMU_PTE_P | IOMMU_PTE_FC; - -@@ -1626,7 +1626,7 @@ static dma_addr_t __map_single(struct device *dev, - retry: - address = dma_ops_alloc_addresses(dev, dma_dom, pages, align_mask, - dma_mask); -- if (unlikely(address == bad_dma_address)) { -+ if (unlikely(address == DMA_ERROR_CODE)) { - /* - * setting next_address here will let the address - * allocator only scan the new allocated range in the -@@ -1647,7 +1647,7 @@ retry: - start = address; - for (i = 0; i < pages; ++i) { - ret = dma_ops_domain_map(iommu, dma_dom, start, paddr, dir); -- if (ret == bad_dma_address) -+ if (ret == DMA_ERROR_CODE) - goto out_unmap; - - paddr += PAGE_SIZE; -@@ -1675,7 +1675,7 @@ out_unmap: - - dma_ops_free_addresses(dma_dom, address, pages); - -- return bad_dma_address; -+ return DMA_ERROR_CODE; - } - - /* -@@ -1692,7 +1692,7 @@ static void __unmap_single(struct amd_iommu *iommu, - dma_addr_t i, start; - unsigned int pages; - -- if ((dma_addr == bad_dma_address) || -+ if ((dma_addr == DMA_ERROR_CODE) || - (dma_addr + size > dma_dom->aperture_size)) - return; - -@@ -1735,7 +1735,7 @@ static dma_addr_t map_page(struct device *dev, struct page *page, - INC_STATS_COUNTER(cnt_map_single); - - if (!check_device(dev)) -- return bad_dma_address; -+ return DMA_ERROR_CODE; - - dma_mask = *dev->dma_mask; - -@@ -1746,12 +1746,12 @@ static dma_addr_t map_page(struct device *dev, struct page *page, - return (dma_addr_t)paddr; - - if (!dma_ops_domain(domain)) -- return bad_dma_address; -+ return DMA_ERROR_CODE; - - spin_lock_irqsave(&domain->lock, flags); - addr = __map_single(dev, iommu, domain->priv, paddr, size, dir, false, - dma_mask); -- if (addr == bad_dma_address) -+ if (addr == DMA_ERROR_CODE) - goto out; - - iommu_completion_wait(iommu); -@@ -1960,7 +1960,7 @@ static void *alloc_coherent(struct device *dev, size_t size, - *dma_addr = __map_single(dev, iommu, domain->priv, paddr, - size, DMA_BIDIRECTIONAL, true, dma_mask); - -- if (*dma_addr == bad_dma_address) { -+ if (*dma_addr == DMA_ERROR_CODE) { - spin_unlock_irqrestore(&domain->lock, flags); - goto out_free; - } -@@ -2122,8 +2122,7 @@ int __init amd_iommu_init_dma_ops(void) - prealloc_protection_domains(); - - iommu_detected = 1; -- force_iommu = 1; -- bad_dma_address = 0; -+ swiotlb = 0; - #ifdef CONFIG_GART_IOMMU - gart_iommu_aperture_disabled = 1; - gart_iommu_aperture = 0; -diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c -index 400be99..0069df5 100644 ---- a/arch/x86/kernel/amd_iommu_init.c -+++ b/arch/x86/kernel/amd_iommu_init.c -@@ -29,6 +29,7 @@ - #include - #include - #include -+#include - - /* - * definitions for the ACPI scanning code -@@ -1206,19 +1207,10 @@ static struct sys_device device_amd_iommu = { - * functions. Finally it prints some information about AMD IOMMUs and - * the driver state and enables the hardware. - */ --int __init amd_iommu_init(void) -+static int __init amd_iommu_init(void) - { - int i, ret = 0; - -- -- if (no_iommu) { -- printk(KERN_INFO "AMD-Vi disabled by kernel command line\n"); -- return 0; -- } -- -- if (!amd_iommu_detected) -- return -ENODEV; -- - /* - * First parse ACPI tables to find the largest Bus/Dev/Func - * we need to handle. Upon this information the shared data -@@ -1333,6 +1325,7 @@ int __init amd_iommu_init(void) - else - printk(KERN_INFO "AMD-Vi: Lazy IO/TLB flushing enabled\n"); - -+ x86_platform.iommu_shutdown = disable_iommus; - out: - return ret; - -@@ -1361,11 +1354,6 @@ free: - goto out; - } - --void amd_iommu_shutdown(void) --{ -- disable_iommus(); --} -- - /**************************************************************************** - * - * Early detect code. This code runs at IOMMU detection time in the DMA -@@ -1380,16 +1368,13 @@ static int __init early_amd_iommu_detect(struct acpi_table_header *table) - - void __init amd_iommu_detect(void) - { -- if (swiotlb || no_iommu || (iommu_detected && !gart_iommu_aperture)) -+ if (no_iommu || (iommu_detected && !gart_iommu_aperture)) - return; - - if (acpi_table_parse("IVRS", early_amd_iommu_detect) == 0) { - iommu_detected = 1; - amd_iommu_detected = 1; --#ifdef CONFIG_GART_IOMMU -- gart_iommu_aperture_disabled = 1; -- gart_iommu_aperture = 0; --#endif -+ x86_init.iommu.iommu_init = amd_iommu_init; - } - } - -diff --git a/arch/x86/kernel/aperture_64.c b/arch/x86/kernel/aperture_64.c -index 082089e..8d34362 100644 ---- a/arch/x86/kernel/aperture_64.c -+++ b/arch/x86/kernel/aperture_64.c -@@ -28,6 +28,7 @@ - #include - #include - #include -+#include - - int gart_iommu_aperture; - int gart_iommu_aperture_disabled __initdata; -@@ -401,6 +402,7 @@ void __init gart_iommu_hole_init(void) - - iommu_detected = 1; - gart_iommu_aperture = 1; -+ x86_init.iommu.iommu_init = gart_iommu_init; - - ctl = read_pci_config(bus, slot, 3, - AMD64_GARTAPERTURECTL); -@@ -469,7 +471,7 @@ out: - - if (aper_alloc) { - /* Got the aperture from the AGP bridge */ -- } else if (swiotlb && !valid_agp) { -+ } else if (!valid_agp) { - /* Do nothing */ - } else if ((!no_iommu && max_pfn > MAX_DMA32_PFN) || - force_iommu || -diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c -index 8928d97..4848d5d 100644 ---- a/arch/x86/kernel/apic/io_apic.c -+++ b/arch/x86/kernel/apic/io_apic.c -@@ -63,7 +63,12 @@ - #include - #include - -+#include - #include -+#include -+#include -+ -+#include - - #define __apicdebuginit(type) static type __init - #define for_each_irq_pin(entry, head) \ -@@ -395,14 +400,18 @@ static inline void io_apic_eoi(unsigned int apic, unsigned int vector) - - static inline unsigned int io_apic_read(unsigned int apic, unsigned int reg) - { -- struct io_apic __iomem *io_apic = io_apic_base(apic); -+ struct io_apic __iomem *io_apic; -+ -+ io_apic = io_apic_base(apic); - writel(reg, &io_apic->index); - return readl(&io_apic->data); - } - - static inline void io_apic_write(unsigned int apic, unsigned int reg, unsigned int value) - { -- struct io_apic __iomem *io_apic = io_apic_base(apic); -+ struct io_apic __iomem *io_apic; -+ -+ io_apic = io_apic_base(apic); - writel(reg, &io_apic->index); - writel(value, &io_apic->data); - } -@@ -415,7 +424,9 @@ static inline void io_apic_write(unsigned int apic, unsigned int reg, unsigned i - */ - static inline void io_apic_modify(unsigned int apic, unsigned int reg, unsigned int value) - { -- struct io_apic __iomem *io_apic = io_apic_base(apic); -+ struct io_apic __iomem *io_apic; -+ -+ io_apic = io_apic_base(apic); - - if (sis_apic_bug) - writel(reg, &io_apic->index); -@@ -3494,6 +3505,9 @@ int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) - if (type == PCI_CAP_ID_MSI && nvec > 1) - return 1; - -+ if (xen_pv_domain()) -+ return xen_pci_setup_msi_irqs(dev, nvec, type); -+ - node = dev_to_node(&dev->dev); - irq_want = nr_irqs_gsi; - sub_handle = 0; -@@ -3543,7 +3557,29 @@ error: - - void arch_teardown_msi_irq(unsigned int irq) - { -- destroy_irq(irq); -+ if (xen_domain()) -+ xen_pci_teardown_msi_irq(irq); -+ else -+ destroy_irq(irq); -+} -+ -+void arch_teardown_msi_irqs(struct pci_dev *dev) -+{ -+ struct msi_desc *entry; -+ -+ /* If we are non-privileged PV domain, we have to -+ * to call xen_teardown_msi_dev first. */ -+ if (xen_domain()) -+ xen_pci_teardown_msi_dev(dev); -+ -+ list_for_each_entry(entry, &dev->msi_list, list) { -+ int i, nvec; -+ if (entry->irq == 0) -+ continue; -+ nvec = 1 << entry->msi_attrib.multiple; -+ for (i = 0; i < nvec; i++) -+ arch_teardown_msi_irq(entry->irq + i); -+ } - } - - #if defined (CONFIG_DMAR) || defined (CONFIG_INTR_REMAP) -@@ -3860,7 +3896,14 @@ void __init probe_nr_irqs_gsi(void) - printk(KERN_DEBUG "nr_irqs_gsi: %d\n", nr_irqs_gsi); - } - -+int get_nr_irqs_gsi(void) -+{ -+ return nr_irqs_gsi; -+} -+ - #ifdef CONFIG_SPARSE_IRQ -+int nr_dynamic_irqs; -+ - int __init arch_probe_nr_irqs(void) - { - int nr; -@@ -3878,6 +3921,8 @@ int __init arch_probe_nr_irqs(void) - if (nr < nr_irqs) - nr_irqs = nr; - -+ nr_irqs += nr_dynamic_irqs; -+ - return 0; - } - #endif -diff --git a/arch/x86/kernel/apic/nmi.c b/arch/x86/kernel/apic/nmi.c -index 7ff61d6..d1e6e60 100644 ---- a/arch/x86/kernel/apic/nmi.c -+++ b/arch/x86/kernel/apic/nmi.c -@@ -558,6 +558,9 @@ void arch_trigger_all_cpu_backtrace(void) - { - int i; - -+ if (!cpu_has_apic) -+ return; -+ - cpumask_copy(&backtrace_mask, cpu_online_mask); - - printk(KERN_INFO "sending NMI to all CPUs:\n"); -diff --git a/arch/x86/kernel/cpu/mtrr/Makefile b/arch/x86/kernel/cpu/mtrr/Makefile -index f4361b5..404e458 100644 ---- a/arch/x86/kernel/cpu/mtrr/Makefile -+++ b/arch/x86/kernel/cpu/mtrr/Makefile -@@ -1,3 +1,4 @@ - obj-y := main.o if.o generic.o state.o cleanup.o - obj-$(CONFIG_X86_32) += amd.o cyrix.o centaur.o -+obj-$(CONFIG_XEN_DOM0) += xen.o - -diff --git a/arch/x86/kernel/cpu/mtrr/amd.c b/arch/x86/kernel/cpu/mtrr/amd.c -index 33af141..378f8dc 100644 ---- a/arch/x86/kernel/cpu/mtrr/amd.c -+++ b/arch/x86/kernel/cpu/mtrr/amd.c -@@ -108,6 +108,11 @@ amd_validate_add_page(unsigned long base, unsigned long size, unsigned int type) - return 0; - } - -+static int amd_num_var_ranges(void) -+{ -+ return 2; -+} -+ - static struct mtrr_ops amd_mtrr_ops = { - .vendor = X86_VENDOR_AMD, - .set = amd_set_mtrr, -@@ -115,6 +120,7 @@ static struct mtrr_ops amd_mtrr_ops = { - .get_free_region = generic_get_free_region, - .validate_add_page = amd_validate_add_page, - .have_wrcomb = positive_have_wrcomb, -+ .num_var_ranges = amd_num_var_ranges, - }; - - int __init amd_init_mtrr(void) -diff --git a/arch/x86/kernel/cpu/mtrr/centaur.c b/arch/x86/kernel/cpu/mtrr/centaur.c -index de89f14..7c686a0 100644 ---- a/arch/x86/kernel/cpu/mtrr/centaur.c -+++ b/arch/x86/kernel/cpu/mtrr/centaur.c -@@ -110,6 +110,11 @@ centaur_validate_add_page(unsigned long base, unsigned long size, unsigned int t - return 0; - } - -+static int centaur_num_var_ranges(void) -+{ -+ return 8; -+} -+ - static struct mtrr_ops centaur_mtrr_ops = { - .vendor = X86_VENDOR_CENTAUR, - .set = centaur_set_mcr, -@@ -117,6 +122,7 @@ static struct mtrr_ops centaur_mtrr_ops = { - .get_free_region = centaur_get_free_region, - .validate_add_page = centaur_validate_add_page, - .have_wrcomb = positive_have_wrcomb, -+ .num_var_ranges = centaur_num_var_ranges, - }; - - int __init centaur_init_mtrr(void) -diff --git a/arch/x86/kernel/cpu/mtrr/cyrix.c b/arch/x86/kernel/cpu/mtrr/cyrix.c -index 228d982..fd6edcc 100644 ---- a/arch/x86/kernel/cpu/mtrr/cyrix.c -+++ b/arch/x86/kernel/cpu/mtrr/cyrix.c -@@ -265,6 +265,11 @@ static void cyrix_set_all(void) - post_set(); - } - -+static int cyrix_num_var_ranges(void) -+{ -+ return 8; -+} -+ - static struct mtrr_ops cyrix_mtrr_ops = { - .vendor = X86_VENDOR_CYRIX, - .set_all = cyrix_set_all, -@@ -273,6 +278,7 @@ static struct mtrr_ops cyrix_mtrr_ops = { - .get_free_region = cyrix_get_free_region, - .validate_add_page = generic_validate_add_page, - .have_wrcomb = positive_have_wrcomb, -+ .num_var_ranges = cyrix_num_var_ranges, - }; - - int __init cyrix_init_mtrr(void) -diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c -index 55da0c5..42f30cd 100644 ---- a/arch/x86/kernel/cpu/mtrr/generic.c -+++ b/arch/x86/kernel/cpu/mtrr/generic.c -@@ -749,8 +749,16 @@ int positive_have_wrcomb(void) - return 1; - } - --/* -- * Generic structure... -+static int generic_num_var_ranges(void) -+{ -+ unsigned long config = 0, dummy; -+ -+ rdmsr(MSR_MTRRcap, config, dummy); -+ -+ return config & 0xff; -+} -+ -+/* generic structure... - */ - struct mtrr_ops generic_mtrr_ops = { - .use_intel_if = 1, -@@ -760,4 +768,5 @@ struct mtrr_ops generic_mtrr_ops = { - .set = generic_set_mtrr, - .validate_add_page = generic_validate_add_page, - .have_wrcomb = generic_have_wrcomb, -+ .num_var_ranges = generic_num_var_ranges, - }; -diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c -index 84e83de..c8cb9ed 100644 ---- a/arch/x86/kernel/cpu/mtrr/main.c -+++ b/arch/x86/kernel/cpu/mtrr/main.c -@@ -110,21 +110,6 @@ static int have_wrcomb(void) - return mtrr_if->have_wrcomb ? mtrr_if->have_wrcomb() : 0; - } - --/* This function returns the number of variable MTRRs */ --static void __init set_num_var_ranges(void) --{ -- unsigned long config = 0, dummy; -- -- if (use_intel()) -- rdmsr(MSR_MTRRcap, config, dummy); -- else if (is_cpu(AMD)) -- config = 2; -- else if (is_cpu(CYRIX) || is_cpu(CENTAUR)) -- config = 8; -- -- num_var_ranges = config & 0xff; --} -- - static void __init init_table(void) - { - int i, max; -@@ -711,8 +696,11 @@ void __init mtrr_bp_init(void) - } - } - -+ /* Let Xen code override the above if it wants */ -+ xen_init_mtrr(); -+ - if (mtrr_if) { -- set_num_var_ranges(); -+ num_var_ranges = mtrr_if->num_var_ranges(); - init_table(); - if (use_intel()) { - get_mtrr_state(); -diff --git a/arch/x86/kernel/cpu/mtrr/mtrr.h b/arch/x86/kernel/cpu/mtrr/mtrr.h -index a501dee..98569c3 100644 ---- a/arch/x86/kernel/cpu/mtrr/mtrr.h -+++ b/arch/x86/kernel/cpu/mtrr/mtrr.h -@@ -5,6 +5,8 @@ - #include - #include - -+#include -+ - #define MTRR_CHANGE_MASK_FIXED 0x01 - #define MTRR_CHANGE_MASK_VARIABLE 0x02 - #define MTRR_CHANGE_MASK_DEFTYPE 0x04 -@@ -25,6 +27,8 @@ struct mtrr_ops { - int (*validate_add_page)(unsigned long base, unsigned long size, - unsigned int type); - int (*have_wrcomb)(void); -+ -+ int (*num_var_ranges)(void); - }; - - extern int generic_get_free_region(unsigned long base, unsigned long size, -@@ -73,6 +77,13 @@ void mtrr_wrmsr(unsigned, unsigned, unsigned); - int amd_init_mtrr(void); - int cyrix_init_mtrr(void); - int centaur_init_mtrr(void); -+#ifdef CONFIG_XEN_DOM0 -+void xen_init_mtrr(void); -+#else -+static inline void xen_init_mtrr(void) -+{ -+} -+#endif - - extern int changed_by_mtrr_cleanup; - extern int mtrr_cleanup(unsigned address_bits); -diff --git a/arch/x86/kernel/cpu/mtrr/xen.c b/arch/x86/kernel/cpu/mtrr/xen.c -new file mode 100644 -index 0000000..852018b ---- /dev/null -+++ b/arch/x86/kernel/cpu/mtrr/xen.c -@@ -0,0 +1,109 @@ -+#include -+#include -+ -+#include -+ -+#include "mtrr.h" -+ -+#include -+#include -+#include -+#include -+ -+static void xen_set_mtrr(unsigned int reg, unsigned long base, -+ unsigned long size, mtrr_type type) -+{ -+ struct xen_platform_op op; -+ int error; -+ -+ /* mtrr_ops->set() is called once per CPU, -+ * but Xen's ops apply to all CPUs. -+ */ -+ if (smp_processor_id()) -+ return; -+ -+ if (size == 0) { -+ op.cmd = XENPF_del_memtype; -+ op.u.del_memtype.handle = 0; -+ op.u.del_memtype.reg = reg; -+ } else { -+ op.cmd = XENPF_add_memtype; -+ op.u.add_memtype.mfn = base; -+ op.u.add_memtype.nr_mfns = size; -+ op.u.add_memtype.type = type; -+ } -+ -+ error = HYPERVISOR_dom0_op(&op); -+ BUG_ON(error != 0); -+} -+ -+static void xen_get_mtrr(unsigned int reg, unsigned long *base, -+ unsigned long *size, mtrr_type *type) -+{ -+ struct xen_platform_op op; -+ -+ op.cmd = XENPF_read_memtype; -+ op.u.read_memtype.reg = reg; -+ if (HYPERVISOR_dom0_op(&op) != 0) { -+ *base = 0; -+ *size = 0; -+ *type = 0; -+ return; -+ } -+ -+ *size = op.u.read_memtype.nr_mfns; -+ *base = op.u.read_memtype.mfn; -+ *type = op.u.read_memtype.type; -+} -+ -+static int __init xen_num_var_ranges(void) -+{ -+ int ranges; -+ struct xen_platform_op op; -+ -+ op.cmd = XENPF_read_memtype; -+ -+ for (ranges = 0; ; ranges++) { -+ op.u.read_memtype.reg = ranges; -+ if (HYPERVISOR_dom0_op(&op) != 0) -+ break; -+ } -+ return ranges; -+} -+ -+/* -+ * DOM0 TODO: Need to fill in the remaining mtrr methods to have full -+ * working userland mtrr support. -+ */ -+static struct mtrr_ops xen_mtrr_ops = { -+ .vendor = X86_VENDOR_UNKNOWN, -+ .get_free_region = generic_get_free_region, -+ .set = xen_set_mtrr, -+ .get = xen_get_mtrr, -+ .have_wrcomb = positive_have_wrcomb, -+ .validate_add_page = generic_validate_add_page, -+ .use_intel_if = 0, -+ .num_var_ranges = xen_num_var_ranges, -+}; -+ -+void __init xen_init_mtrr(void) -+{ -+ /* -+ * Check that we're running under Xen, and privileged enough -+ * to play with MTRRs. -+ */ -+ if (!xen_initial_domain()) -+ return; -+ -+ /* -+ * Check that the CPU has an MTRR implementation we can -+ * support. -+ */ -+ if (cpu_has_mtrr || -+ cpu_has_k6_mtrr || -+ cpu_has_cyrix_arr || -+ cpu_has_centaur_mcr) { -+ mtrr_if = &xen_mtrr_ops; -+ pat_init(); -+ } -+} -diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c -index ff95824..ebd4c51 100644 ---- a/arch/x86/kernel/crash.c -+++ b/arch/x86/kernel/crash.c -@@ -28,7 +28,6 @@ - #include - #include - -- - #if defined(CONFIG_SMP) && defined(CONFIG_X86_LOCAL_APIC) - - static void kdump_nmi_callback(int cpu, struct die_args *args) -diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c -index d17d482..4d0aded 100644 ---- a/arch/x86/kernel/e820.c -+++ b/arch/x86/kernel/e820.c -@@ -750,6 +750,36 @@ static int __init find_overlapped_early(u64 start, u64 end) - return i; - } - -+u64 __init early_res_next_free(u64 addr) -+{ -+ int i; -+ u64 end = addr; -+ struct early_res *r; -+ -+ for (i = 0; i < MAX_EARLY_RES; i++) { -+ r = &early_res[i]; -+ if (addr >= r->start && addr < r->end) { -+ end = r->end; -+ break; -+ } -+ } -+ return end; -+} -+ -+u64 __init early_res_next_reserved(u64 addr, u64 max) -+{ -+ int i; -+ struct early_res *r; -+ u64 next_res = max; -+ -+ for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) { -+ r = &early_res[i]; -+ if ((r->start >= addr) && (r->start < next_res)) -+ next_res = r->start; -+ } -+ return next_res; -+} -+ - /* - * Drop the i-th range from the early reservation map, - * by copying any higher ranges down one over it, and -diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S -index c097e7d..7764118 100644 ---- a/arch/x86/kernel/entry_32.S -+++ b/arch/x86/kernel/entry_32.S -@@ -1088,6 +1088,9 @@ ENTRY(xen_failsafe_callback) - .previous - ENDPROC(xen_failsafe_callback) - -+BUILD_INTERRUPT3(xen_hvm_callback_vector, XEN_HVM_EVTCHN_CALLBACK, -+ xen_evtchn_do_upcall) -+ - #endif /* CONFIG_XEN */ - - #ifdef CONFIG_FUNCTION_TRACER -diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S -index b5c061f..a626344 100644 ---- a/arch/x86/kernel/entry_64.S -+++ b/arch/x86/kernel/entry_64.S -@@ -1364,6 +1364,9 @@ ENTRY(xen_failsafe_callback) - CFI_ENDPROC - END(xen_failsafe_callback) - -+apicinterrupt XEN_HVM_EVTCHN_CALLBACK \ -+ xen_hvm_callback_vector xen_evtchn_do_upcall -+ - #endif /* CONFIG_XEN */ - - /* -diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c -index 0b06cd7..f59b07a 100644 ---- a/arch/x86/kernel/head64.c -+++ b/arch/x86/kernel/head64.c -@@ -79,6 +79,8 @@ void __init x86_64_start_kernel(char * real_mode_data) - /* Cleanup the over mapped high alias */ - cleanup_highmap(); - -+ max_pfn_mapped = KERNEL_IMAGE_SIZE >> PAGE_SHIFT; -+ - for (i = 0; i < NUM_EXCEPTION_VECTORS; i++) { - #ifdef CONFIG_EARLY_PRINTK - set_intr_gate(i, &early_idt_handlers[i]); -diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c -index c771e1a..8b970b8 100644 ---- a/arch/x86/kernel/hpet.c -+++ b/arch/x86/kernel/hpet.c -@@ -98,7 +98,7 @@ static int __init hpet_setup(char *str) - } - __setup("hpet=", hpet_setup); - --static int __init disable_hpet(char *str) -+int __init disable_hpet(char *str) - { - boot_hpet_disable = 1; - return 1; -diff --git a/arch/x86/kernel/ioport.c b/arch/x86/kernel/ioport.c -index 99c4d30..919c1a8 100644 ---- a/arch/x86/kernel/ioport.c -+++ b/arch/x86/kernel/ioport.c -@@ -30,13 +30,29 @@ static void set_bitmap(unsigned long *bitmap, unsigned int base, - } - } - -+void native_set_io_bitmap(struct thread_struct *t, -+ unsigned long bytes_updated) -+{ -+ struct tss_struct *tss; -+ -+ if (!bytes_updated) -+ return; -+ -+ tss = &__get_cpu_var(init_tss); -+ -+ /* Update the TSS: */ -+ if (t->io_bitmap_ptr) -+ memcpy(tss->io_bitmap, t->io_bitmap_ptr, bytes_updated); -+ else -+ memset(tss->io_bitmap, 0xff, bytes_updated); -+} -+ - /* - * this changes the io permissions bitmap in the current task. - */ - asmlinkage long sys_ioperm(unsigned long from, unsigned long num, int turn_on) - { - struct thread_struct *t = ¤t->thread; -- struct tss_struct *tss; - unsigned int i, max_long, bytes, bytes_updated; - - if ((from + num <= from) || (from + num > IO_BITMAP_BITS)) -@@ -61,13 +77,13 @@ asmlinkage long sys_ioperm(unsigned long from, unsigned long num, int turn_on) - } - - /* -- * do it in the per-thread copy and in the TSS ... -+ * do it in the per-thread copy - * -- * Disable preemption via get_cpu() - we must not switch away -+ * Disable preemption - we must not switch away - * because the ->io_bitmap_max value must match the bitmap - * contents: - */ -- tss = &per_cpu(init_tss, get_cpu()); -+ preempt_disable(); - - set_bitmap(t->io_bitmap_ptr, from, num, !turn_on); - -@@ -85,10 +101,9 @@ asmlinkage long sys_ioperm(unsigned long from, unsigned long num, int turn_on) - - t->io_bitmap_max = bytes; - -- /* Update the TSS: */ -- memcpy(tss->io_bitmap, t->io_bitmap_ptr, bytes_updated); -+ set_io_bitmap(t, bytes_updated); - -- put_cpu(); -+ preempt_enable(); - - return 0; - } -@@ -119,11 +134,10 @@ static int do_iopl(unsigned int level, struct pt_regs *regs) - return 0; - } - --#ifdef CONFIG_X86_32 --long sys_iopl(struct pt_regs *regs) -+asmlinkage long sys_iopl(unsigned int level) - { -- unsigned int level = regs->bx; - struct thread_struct *t = ¤t->thread; -+ struct pt_regs *regs = task_pt_regs(current); - int rc; - - rc = do_iopl(level, regs); -@@ -135,9 +149,3 @@ long sys_iopl(struct pt_regs *regs) - out: - return rc; - } --#else --asmlinkage long sys_iopl(unsigned int level, struct pt_regs *regs) --{ -- return do_iopl(level, regs); --} --#endif -diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c -index ec6ef60..fa5b061 100644 ---- a/arch/x86/kernel/ldt.c -+++ b/arch/x86/kernel/ldt.c -@@ -109,6 +109,9 @@ int init_new_context(struct task_struct *tsk, struct mm_struct *mm) - - mutex_init(&mm->context.lock); - mm->context.size = 0; -+#ifdef CONFIG_XEN -+ mm->context.has_foreign_mappings = 0; -+#endif - old_mm = current->mm; - if (old_mm && old_mm->context.size > 0) { - mutex_lock(&old_mm->context.lock); -diff --git a/arch/x86/kernel/microcode_core.c b/arch/x86/kernel/microcode_core.c -index 378e9a8..86ca771 100644 ---- a/arch/x86/kernel/microcode_core.c -+++ b/arch/x86/kernel/microcode_core.c -@@ -81,6 +81,8 @@ - #include - #include - -+#include -+#include - #include - #include - -@@ -503,7 +505,9 @@ static int __init microcode_init(void) - struct cpuinfo_x86 *c = &cpu_data(0); - int error; - -- if (c->x86_vendor == X86_VENDOR_INTEL) -+ if (xen_pv_domain()) -+ microcode_ops = init_xen_microcode(); -+ else if (c->x86_vendor == X86_VENDOR_INTEL) - microcode_ops = init_intel_microcode(); - else if (c->x86_vendor == X86_VENDOR_AMD) - microcode_ops = init_amd_microcode(); -diff --git a/arch/x86/kernel/microcode_xen.c b/arch/x86/kernel/microcode_xen.c -new file mode 100644 -index 0000000..16c742e ---- /dev/null -+++ b/arch/x86/kernel/microcode_xen.c -@@ -0,0 +1,201 @@ -+/* -+ * Xen microcode update driver -+ * -+ * Xen does most of the work here. We just pass the whole blob into -+ * Xen, and it will apply it to all CPUs as appropriate. Xen will -+ * worry about how different CPU models are actually updated. -+ */ -+#include -+#include -+#include -+#include -+#include -+ -+#include -+ -+#include -+#include -+#include -+ -+#include -+#include -+ -+MODULE_DESCRIPTION("Xen microcode update driver"); -+MODULE_LICENSE("GPL"); -+ -+struct xen_microcode { -+ size_t len; -+ char data[0]; -+}; -+ -+static int xen_microcode_update(int cpu) -+{ -+ int err; -+ struct xen_platform_op op; -+ struct ucode_cpu_info *uci = ucode_cpu_info + cpu; -+ struct xen_microcode *uc = uci->mc; -+ -+ if (uc == NULL || uc->len == 0) { -+ /* -+ * We do all cpus at once, so we don't need to do -+ * other cpus explicitly (besides, these vcpu numbers -+ * have no relationship to underlying physical cpus). -+ */ -+ return 0; -+ } -+ -+ op.cmd = XENPF_microcode_update; -+ set_xen_guest_handle(op.u.microcode.data, uc->data); -+ op.u.microcode.length = uc->len; -+ -+ err = HYPERVISOR_dom0_op(&op); -+ -+ if (err != 0) -+ printk(KERN_WARNING "microcode_xen: microcode update failed: %d\n", err); -+ -+ return err; -+} -+ -+static enum ucode_state xen_request_microcode_fw(int cpu, struct device *device) -+{ -+ char name[30]; -+ struct cpuinfo_x86 *c = &cpu_data(cpu); -+ const struct firmware *firmware; -+ struct ucode_cpu_info *uci = ucode_cpu_info + cpu; -+ enum ucode_state ret; -+ struct xen_microcode *uc; -+ size_t size; -+ int err; -+ -+ switch (c->x86_vendor) { -+ case X86_VENDOR_INTEL: -+ snprintf(name, sizeof(name), "intel-ucode/%02x-%02x-%02x", -+ c->x86, c->x86_model, c->x86_mask); -+ break; -+ -+ case X86_VENDOR_AMD: -+ snprintf(name, sizeof(name), "amd-ucode/microcode_amd.bin"); -+ break; -+ -+ default: -+ return UCODE_NFOUND; -+ } -+ -+ err = request_firmware(&firmware, name, device); -+ if (err) { -+ pr_debug("microcode: data file %s load failed\n", name); -+ return UCODE_NFOUND; -+ } -+ -+ /* -+ * Only bother getting real firmware for cpu 0; the others get -+ * dummy placeholders. -+ */ -+ if (cpu == 0) -+ size = firmware->size; -+ else -+ size = 0; -+ -+ if (uci->mc != NULL) { -+ vfree(uci->mc); -+ uci->mc = NULL; -+ } -+ -+ ret = UCODE_ERROR; -+ uc = vmalloc(sizeof(*uc) + size); -+ if (uc == NULL) -+ goto out; -+ -+ ret = UCODE_OK; -+ uc->len = size; -+ memcpy(uc->data, firmware->data, uc->len); -+ -+ uci->mc = uc; -+ -+out: -+ release_firmware(firmware); -+ -+ return ret; -+} -+ -+static enum ucode_state xen_request_microcode_user(int cpu, -+ const void __user *buf, size_t size) -+{ -+ struct ucode_cpu_info *uci = ucode_cpu_info + cpu; -+ struct xen_microcode *uc; -+ enum ucode_state ret; -+ size_t unread; -+ -+ if (cpu != 0) { -+ /* No real firmware for non-zero cpus; just store a -+ placeholder */ -+ size = 0; -+ } -+ -+ if (uci->mc != NULL) { -+ vfree(uci->mc); -+ uci->mc = NULL; -+ } -+ -+ ret = UCODE_ERROR; -+ uc = vmalloc(sizeof(*uc) + size); -+ if (uc == NULL) -+ goto out; -+ -+ uc->len = size; -+ -+ ret = UCODE_NFOUND; -+ -+ /* XXX This sporadically returns uncopied bytes, so we return -+ EFAULT. As far as I can see, the usermode code -+ (microcode_ctl) isn't doing anything wrong... */ -+ unread = copy_from_user(uc->data, buf, size); -+ -+ if (unread != 0) { -+ printk(KERN_WARNING "failed to read %zd of %zd bytes at %p -> %p\n", -+ unread, size, buf, uc->data); -+ goto out; -+ } -+ -+ ret = UCODE_OK; -+ -+out: -+ if (ret == 0) -+ uci->mc = uc; -+ else -+ vfree(uc); -+ -+ return ret; -+} -+ -+static void xen_microcode_fini_cpu(int cpu) -+{ -+ struct ucode_cpu_info *uci = ucode_cpu_info + cpu; -+ -+ vfree(uci->mc); -+ uci->mc = NULL; -+} -+ -+static int xen_collect_cpu_info(int cpu, struct cpu_signature *sig) -+{ -+ sig->sig = 0; -+ sig->pf = 0; -+ sig->rev = 0; -+ -+ return 0; -+} -+ -+static struct microcode_ops microcode_xen_ops = { -+ .request_microcode_user = xen_request_microcode_user, -+ .request_microcode_fw = xen_request_microcode_fw, -+ .collect_cpu_info = xen_collect_cpu_info, -+ .apply_microcode = xen_microcode_update, -+ .microcode_fini_cpu = xen_microcode_fini_cpu, -+}; -+ -+struct microcode_ops * __init init_xen_microcode(void) -+{ -+ if (!xen_initial_domain()) -+ return NULL; -+ return µcode_xen_ops; -+} -diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c -index 1b1739d..f7e115c 100644 ---- a/arch/x86/kernel/paravirt.c -+++ b/arch/x86/kernel/paravirt.c -@@ -376,6 +376,7 @@ struct pv_cpu_ops pv_cpu_ops = { - .swapgs = native_swapgs, - - .set_iopl_mask = native_set_iopl_mask, -+ .set_io_bitmap = native_set_io_bitmap, - .io_delay = native_io_delay, - - .start_context_switch = paravirt_nop, -diff --git a/arch/x86/kernel/pci-calgary_64.c b/arch/x86/kernel/pci-calgary_64.c -index 1a2d4b1..2f158a5 100644 ---- a/arch/x86/kernel/pci-calgary_64.c -+++ b/arch/x86/kernel/pci-calgary_64.c -@@ -46,6 +46,7 @@ - #include - #include - #include -+#include - - #ifdef CONFIG_CALGARY_IOMMU_ENABLED_BY_DEFAULT - int use_calgary __read_mostly = 1; -@@ -249,7 +250,7 @@ static unsigned long iommu_range_alloc(struct device *dev, - if (panic_on_overflow) - panic("Calgary: fix the allocator.\n"); - else -- return bad_dma_address; -+ return DMA_ERROR_CODE; - } - } - -@@ -265,11 +266,11 @@ static dma_addr_t iommu_alloc(struct device *dev, struct iommu_table *tbl, - void *vaddr, unsigned int npages, int direction) - { - unsigned long entry; -- dma_addr_t ret = bad_dma_address; -+ dma_addr_t ret = DMA_ERROR_CODE; - - entry = iommu_range_alloc(dev, tbl, npages); - -- if (unlikely(entry == bad_dma_address)) -+ if (unlikely(entry == DMA_ERROR_CODE)) - goto error; - - /* set the return dma address */ -@@ -284,7 +285,7 @@ static dma_addr_t iommu_alloc(struct device *dev, struct iommu_table *tbl, - error: - printk(KERN_WARNING "Calgary: failed to allocate %u pages in " - "iommu %p\n", npages, tbl); -- return bad_dma_address; -+ return DMA_ERROR_CODE; - } - - static void iommu_free(struct iommu_table *tbl, dma_addr_t dma_addr, -@@ -295,8 +296,8 @@ static void iommu_free(struct iommu_table *tbl, dma_addr_t dma_addr, - unsigned long flags; - - /* were we called with bad_dma_address? */ -- badend = bad_dma_address + (EMERGENCY_PAGES * PAGE_SIZE); -- if (unlikely((dma_addr >= bad_dma_address) && (dma_addr < badend))) { -+ badend = DMA_ERROR_CODE + (EMERGENCY_PAGES * PAGE_SIZE); -+ if (unlikely((dma_addr >= DMA_ERROR_CODE) && (dma_addr < badend))) { - WARN(1, KERN_ERR "Calgary: driver tried unmapping bad DMA " - "address 0x%Lx\n", dma_addr); - return; -@@ -380,7 +381,7 @@ static int calgary_map_sg(struct device *dev, struct scatterlist *sg, - npages = iommu_num_pages(vaddr, s->length, PAGE_SIZE); - - entry = iommu_range_alloc(dev, tbl, npages); -- if (entry == bad_dma_address) { -+ if (entry == DMA_ERROR_CODE) { - /* makes sure unmap knows to stop */ - s->dma_length = 0; - goto error; -@@ -398,7 +399,7 @@ static int calgary_map_sg(struct device *dev, struct scatterlist *sg, - error: - calgary_unmap_sg(dev, sg, nelems, dir, NULL); - for_each_sg(sg, s, nelems, i) { -- sg->dma_address = bad_dma_address; -+ sg->dma_address = DMA_ERROR_CODE; - sg->dma_length = 0; - } - return 0; -@@ -453,7 +454,7 @@ static void* calgary_alloc_coherent(struct device *dev, size_t size, - - /* set up tces to cover the allocated range */ - mapping = iommu_alloc(dev, tbl, ret, npages, DMA_BIDIRECTIONAL); -- if (mapping == bad_dma_address) -+ if (mapping == DMA_ERROR_CODE) - goto free; - *dma_handle = mapping; - return ret; -@@ -734,7 +735,7 @@ static void __init calgary_reserve_regions(struct pci_dev *dev) - struct iommu_table *tbl = pci_iommu(dev->bus); - - /* reserve EMERGENCY_PAGES from bad_dma_address and up */ -- iommu_range_reserve(tbl, bad_dma_address, EMERGENCY_PAGES); -+ iommu_range_reserve(tbl, DMA_ERROR_CODE, EMERGENCY_PAGES); - - /* avoid the BIOS/VGA first 640KB-1MB region */ - /* for CalIOC2 - avoid the entire first MB */ -@@ -1349,6 +1350,23 @@ static void __init get_tce_space_from_tar(void) - return; - } - -+static int __init calgary_iommu_init(void) -+{ -+ int ret; -+ -+ /* ok, we're trying to use Calgary - let's roll */ -+ printk(KERN_INFO "PCI-DMA: Using Calgary IOMMU\n"); -+ -+ ret = calgary_init(); -+ if (ret) { -+ printk(KERN_ERR "PCI-DMA: Calgary init failed %d, " -+ "falling back to no_iommu\n", ret); -+ return ret; -+ } -+ -+ return 0; -+} -+ - void __init detect_calgary(void) - { - int bus; -@@ -1362,7 +1380,7 @@ void __init detect_calgary(void) - * if the user specified iommu=off or iommu=soft or we found - * another HW IOMMU already, bail out. - */ -- if (swiotlb || no_iommu || iommu_detected) -+ if (no_iommu || iommu_detected) - return; - - if (!use_calgary) -@@ -1447,9 +1465,7 @@ void __init detect_calgary(void) - printk(KERN_INFO "PCI-DMA: Calgary TCE table spec is %d\n", - specified_table_size); - -- /* swiotlb for devices that aren't behind the Calgary. */ -- if (max_pfn > MAX_DMA32_PFN) -- swiotlb = 1; -+ x86_init.iommu.iommu_init = calgary_iommu_init; - } - return; - -@@ -1462,35 +1478,6 @@ cleanup: - } - } - --int __init calgary_iommu_init(void) --{ -- int ret; -- -- if (no_iommu || (swiotlb && !calgary_detected)) -- return -ENODEV; -- -- if (!calgary_detected) -- return -ENODEV; -- -- /* ok, we're trying to use Calgary - let's roll */ -- printk(KERN_INFO "PCI-DMA: Using Calgary IOMMU\n"); -- -- ret = calgary_init(); -- if (ret) { -- printk(KERN_ERR "PCI-DMA: Calgary init failed %d, " -- "falling back to no_iommu\n", ret); -- return ret; -- } -- -- force_iommu = 1; -- bad_dma_address = 0x0; -- /* dma_ops is set to swiotlb or nommu */ -- if (!dma_ops) -- dma_ops = &nommu_dma_ops; -- -- return 0; --} -- - static int __init calgary_parse_options(char *p) - { - unsigned int bridge; -diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c -index 6ac3931..3e57c58 100644 ---- a/arch/x86/kernel/pci-dma.c -+++ b/arch/x86/kernel/pci-dma.c -@@ -11,10 +11,12 @@ - #include - #include - #include -+#include -+#include - - static int forbid_dac __read_mostly; - --struct dma_map_ops *dma_ops; -+struct dma_map_ops *dma_ops = &nommu_dma_ops; - EXPORT_SYMBOL(dma_ops); - - static int iommu_sac_force __read_mostly; -@@ -42,9 +44,6 @@ int iommu_detected __read_mostly = 0; - */ - int iommu_pass_through __read_mostly; - --dma_addr_t bad_dma_address __read_mostly = 0; --EXPORT_SYMBOL(bad_dma_address); -- - /* Dummy device used for NULL arguments (normally ISA). */ - struct device x86_dma_fallback_dev = { - .init_name = "fallback device", -@@ -126,18 +125,19 @@ void __init pci_iommu_alloc(void) - /* free the range so iommu could get some range less than 4G */ - dma32_free_bootmem(); - #endif -+ if (pci_xen_swiotlb_detect() || pci_swiotlb_detect()) -+ goto out; - -- /* -- * The order of these functions is important for -- * fall-back/fail-over reasons -- */ - gart_iommu_hole_init(); - - detect_calgary(); - - detect_intel_iommu(); - -+ /* needs to be called after gart_iommu_hole_init */ - amd_iommu_detect(); -+out: -+ pci_xen_swiotlb_init(); - - pci_swiotlb_init(); - } -@@ -289,25 +289,17 @@ static int __init pci_iommu_init(void) - #ifdef CONFIG_PCI - dma_debug_add_bus(&pci_bus_type); - #endif -+ x86_init.iommu.iommu_init(); - -- calgary_iommu_init(); -- -- intel_iommu_init(); -- -- amd_iommu_init(); -+ if (swiotlb || xen_swiotlb) { -+ printk(KERN_INFO "PCI-DMA: " -+ "Using software bounce buffering for IO (SWIOTLB)\n"); -+ swiotlb_print_info(); -+ } else -+ swiotlb_free(); - -- gart_iommu_init(); -- -- no_iommu_init(); - return 0; - } -- --void pci_iommu_shutdown(void) --{ -- gart_iommu_shutdown(); -- -- amd_iommu_shutdown(); --} - /* Must execute after PCI subsystem */ - rootfs_initcall(pci_iommu_init); - -diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c -index 1c76691..8c9dd05 100644 ---- a/arch/x86/kernel/pci-gart_64.c -+++ b/arch/x86/kernel/pci-gart_64.c -@@ -39,6 +39,7 @@ - #include - #include - #include -+#include - - static unsigned long iommu_bus_base; /* GART remapping area (physical) */ - static unsigned long iommu_size; /* size of remapping area bytes */ -@@ -46,6 +47,8 @@ static unsigned long iommu_pages; /* .. and in pages */ - - static u32 *iommu_gatt_base; /* Remapping table */ - -+static dma_addr_t bad_dma_addr; -+ - /* - * If this is disabled the IOMMU will use an optimized flushing strategy - * of only flushing when an mapping is reused. With it true the GART is -@@ -216,7 +219,7 @@ static dma_addr_t dma_map_area(struct device *dev, dma_addr_t phys_mem, - if (panic_on_overflow) - panic("dma_map_area overflow %lu bytes\n", size); - iommu_full(dev, size, dir); -- return bad_dma_address; -+ return bad_dma_addr; - } - - for (i = 0; i < npages; i++) { -@@ -302,7 +305,7 @@ static int dma_map_sg_nonforce(struct device *dev, struct scatterlist *sg, - - if (nonforced_iommu(dev, addr, s->length)) { - addr = dma_map_area(dev, addr, s->length, dir, 0); -- if (addr == bad_dma_address) { -+ if (addr == bad_dma_addr) { - if (i > 0) - gart_unmap_sg(dev, sg, i, dir, NULL); - nents = 0; -@@ -455,7 +458,7 @@ error: - - iommu_full(dev, pages << PAGE_SHIFT, dir); - for_each_sg(sg, s, nents, i) -- s->dma_address = bad_dma_address; -+ s->dma_address = bad_dma_addr; - return 0; - } - -@@ -479,7 +482,7 @@ gart_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_addr, - DMA_BIDIRECTIONAL, align_mask); - - flush_gart(); -- if (paddr != bad_dma_address) { -+ if (paddr != bad_dma_addr) { - *dma_addr = paddr; - return page_address(page); - } -@@ -499,6 +502,11 @@ gart_free_coherent(struct device *dev, size_t size, void *vaddr, - free_pages((unsigned long)vaddr, get_order(size)); - } - -+static int gart_mapping_error(struct device *dev, dma_addr_t dma_addr) -+{ -+ return (dma_addr == bad_dma_addr); -+} -+ - static int no_agp; - - static __init unsigned long check_iommu_size(unsigned long aper, u64 aper_size) -@@ -689,14 +697,15 @@ static struct dma_map_ops gart_dma_ops = { - .unmap_page = gart_unmap_page, - .alloc_coherent = gart_alloc_coherent, - .free_coherent = gart_free_coherent, -+ .mapping_error = gart_mapping_error, - }; - --void gart_iommu_shutdown(void) -+static void gart_iommu_shutdown(void) - { - struct pci_dev *dev; - int i; - -- if (no_agp && (dma_ops != &gart_dma_ops)) -+ if (no_agp) - return; - - for (i = 0; i < num_k8_northbridges; i++) { -@@ -711,7 +720,7 @@ void gart_iommu_shutdown(void) - } - } - --void __init gart_iommu_init(void) -+int __init gart_iommu_init(void) - { - struct agp_kern_info info; - unsigned long iommu_start; -@@ -721,7 +730,7 @@ void __init gart_iommu_init(void) - long i; - - if (num_k8_northbridges == 0) -- return; -+ return 0; - - #ifndef CONFIG_AGP_AMD64 - no_agp = 1; -@@ -733,13 +742,6 @@ void __init gart_iommu_init(void) - (agp_copy_info(agp_bridge, &info) < 0); - #endif - -- if (swiotlb) -- return; -- -- /* Did we detect a different HW IOMMU? */ -- if (iommu_detected && !gart_iommu_aperture) -- return; -- - if (no_iommu || - (!force_iommu && max_pfn <= MAX_DMA32_PFN) || - !gart_iommu_aperture || -@@ -749,7 +751,7 @@ void __init gart_iommu_init(void) - "but GART IOMMU not available.\n"); - printk(KERN_WARNING "falling back to iommu=soft.\n"); - } -- return; -+ return 0; - } - - /* need to map that range */ -@@ -794,7 +796,7 @@ void __init gart_iommu_init(void) - - iommu_start = aper_size - iommu_size; - iommu_bus_base = info.aper_base + iommu_start; -- bad_dma_address = iommu_bus_base; -+ bad_dma_addr = iommu_bus_base; - iommu_gatt_base = agp_gatt_table + (iommu_start>>PAGE_SHIFT); - - /* -@@ -841,6 +843,10 @@ void __init gart_iommu_init(void) - - flush_gart(); - dma_ops = &gart_dma_ops; -+ x86_platform.iommu_shutdown = gart_iommu_shutdown; -+ swiotlb = 0; -+ -+ return 0; - } - - void __init gart_parse_options(char *p) -diff --git a/arch/x86/kernel/pci-nommu.c b/arch/x86/kernel/pci-nommu.c -index a3933d4..22be12b 100644 ---- a/arch/x86/kernel/pci-nommu.c -+++ b/arch/x86/kernel/pci-nommu.c -@@ -33,7 +33,7 @@ static dma_addr_t nommu_map_page(struct device *dev, struct page *page, - dma_addr_t bus = page_to_phys(page) + offset; - WARN_ON(size == 0); - if (!check_addr("map_single", dev, bus, size)) -- return bad_dma_address; -+ return DMA_ERROR_CODE; - flush_write_buffers(); - return bus; - } -@@ -103,12 +103,3 @@ struct dma_map_ops nommu_dma_ops = { - .sync_sg_for_device = nommu_sync_sg_for_device, - .is_phys = 1, - }; -- --void __init no_iommu_init(void) --{ -- if (dma_ops) -- return; -- -- force_iommu = 0; /* no HW IOMMU */ -- dma_ops = &nommu_dma_ops; --} -diff --git a/arch/x86/kernel/pci-swiotlb.c b/arch/x86/kernel/pci-swiotlb.c -index aaa6b78..7d2829d 100644 ---- a/arch/x86/kernel/pci-swiotlb.c -+++ b/arch/x86/kernel/pci-swiotlb.c -@@ -42,18 +42,31 @@ static struct dma_map_ops swiotlb_dma_ops = { - .dma_supported = NULL, - }; - --void __init pci_swiotlb_init(void) -+/* -+ * pci_swiotlb_detect - set swiotlb to 1 if necessary -+ * -+ * This returns non-zero if we are forced to use swiotlb (by the boot -+ * option). -+ */ -+int __init pci_swiotlb_detect(void) - { -+ int use_swiotlb = swiotlb | swiotlb_force; -+ - /* don't initialize swiotlb if iommu=off (no_iommu=1) */ - #ifdef CONFIG_X86_64 -- if ((!iommu_detected && !no_iommu && max_pfn > MAX_DMA32_PFN)) -+ if (!no_iommu && max_pfn > MAX_DMA32_PFN) - swiotlb = 1; - #endif - if (swiotlb_force) - swiotlb = 1; -+ -+ return use_swiotlb; -+} -+ -+void __init pci_swiotlb_init(void) -+{ - if (swiotlb) { -- printk(KERN_INFO "PCI-DMA: Using software bounce buffering for IO (SWIOTLB)\n"); -- swiotlb_init(); -+ swiotlb_init(0); - dma_ops = &swiotlb_dma_ops; - } - } -diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c -index 5fd5b07..11d8667 100644 ---- a/arch/x86/kernel/process.c -+++ b/arch/x86/kernel/process.c -@@ -73,16 +73,12 @@ void exit_thread(void) - unsigned long *bp = t->io_bitmap_ptr; - - if (bp) { -- struct tss_struct *tss = &per_cpu(init_tss, get_cpu()); -- -+ preempt_disable(); - t->io_bitmap_ptr = NULL; - clear_thread_flag(TIF_IO_BITMAP); -- /* -- * Careful, clear this in the TSS too: -- */ -- memset(tss->io_bitmap, 0xff, t->io_bitmap_max); -+ set_io_bitmap(t, t->io_bitmap_max); - t->io_bitmap_max = 0; -- put_cpu(); -+ preempt_enable(); - kfree(bp); - } - } -@@ -199,19 +195,10 @@ void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, - hard_enable_TSC(); - } - -- if (test_tsk_thread_flag(next_p, TIF_IO_BITMAP)) { -- /* -- * Copy the relevant range of the IO bitmap. -- * Normally this is 128 bytes or less: -- */ -- memcpy(tss->io_bitmap, next->io_bitmap_ptr, -- max(prev->io_bitmap_max, next->io_bitmap_max)); -- } else if (test_tsk_thread_flag(prev_p, TIF_IO_BITMAP)) { -- /* -- * Clear any possible leftover bits: -- */ -- memset(tss->io_bitmap, 0xff, prev->io_bitmap_max); -- } -+ if (test_tsk_thread_flag(next_p, TIF_IO_BITMAP) || -+ test_tsk_thread_flag(prev_p, TIF_IO_BITMAP)) -+ set_io_bitmap(next, -+ max(prev->io_bitmap_max, next->io_bitmap_max)); - } - - int sys_fork(struct pt_regs *regs) -diff --git a/arch/x86/kernel/pvclock.c b/arch/x86/kernel/pvclock.c -index dfdfe46..b12fe8d 100644 ---- a/arch/x86/kernel/pvclock.c -+++ b/arch/x86/kernel/pvclock.c -@@ -111,6 +111,11 @@ unsigned long pvclock_tsc_khz(struct pvclock_vcpu_time_info *src) - - static atomic64_t last_value = ATOMIC64_INIT(0); - -+void pvclock_resume(void) -+{ -+ atomic64_set(&last_value, 0); -+} -+ - cycle_t pvclock_clocksource_read(struct pvclock_vcpu_time_info *src) - { - struct pvclock_shadow_time shadow; -diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c -index 200fcde..ff8cc40 100644 ---- a/arch/x86/kernel/reboot.c -+++ b/arch/x86/kernel/reboot.c -@@ -23,7 +23,7 @@ - # include - # include - #else --# include -+# include - #endif - - /* -@@ -647,7 +647,7 @@ void native_machine_shutdown(void) - #endif - - #ifdef CONFIG_X86_64 -- pci_iommu_shutdown(); -+ x86_platform.iommu_shutdown(); - #endif - } - -diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c -index 5449a26..56b4707 100644 ---- a/arch/x86/kernel/setup.c -+++ b/arch/x86/kernel/setup.c -@@ -70,6 +70,7 @@ - #include - - #include