1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
|
From: Roger Pau Monné <roger.pau@citrix.com>
Subject: amd/iommu: fix flush checks
Flush checking for AMD IOMMU didn't check whether the previous entry
was present, or whether the flags (writable/readable) changed in order
to decide whether a flush should be executed.
Fix this by taking the writable/readable/next-level fields into account,
together with the present bit.
Along these lines the flushing in amd_iommu_map_page() must not be
omitted for PV domains. The comment there was simply wrong: Mappings may
very well change, both their addresses and their permissions. Ultimately
this should honor iommu_dont_flush_iotlb, but to achieve this
amd_iommu_ops first needs to gain an .iotlb_flush hook.
Also make clear_iommu_pte_present() static, to demonstrate there's no
caller omitting the (subsequent) flush.
This is part of XSA-275.
Reported-by: Paul Durrant <paul.durrant@citrix.com>
Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
Signed-off-by: Jan Beulich <jbeulich@suse.com>
--- a/xen/drivers/passthrough/amd/iommu_map.c
+++ b/xen/drivers/passthrough/amd/iommu_map.c
@@ -35,7 +35,7 @@ static unsigned int pfn_to_pde_idx(unsig
return idx;
}
-void clear_iommu_pte_present(unsigned long l1_mfn, unsigned long gfn)
+static void clear_iommu_pte_present(unsigned long l1_mfn, unsigned long gfn)
{
u64 *table, *pte;
@@ -49,23 +49,42 @@ static bool_t set_iommu_pde_present(u32
unsigned int next_level,
bool_t iw, bool_t ir)
{
- u64 addr_lo, addr_hi, maddr_old, maddr_next;
+ uint64_t addr_lo, addr_hi, maddr_next;
u32 entry;
- bool_t need_flush = 0;
+ bool need_flush = false, old_present;
maddr_next = (u64)next_mfn << PAGE_SHIFT;
- addr_hi = get_field_from_reg_u32(pde[1],
- IOMMU_PTE_ADDR_HIGH_MASK,
- IOMMU_PTE_ADDR_HIGH_SHIFT);
- addr_lo = get_field_from_reg_u32(pde[0],
- IOMMU_PTE_ADDR_LOW_MASK,
- IOMMU_PTE_ADDR_LOW_SHIFT);
-
- maddr_old = (addr_hi << 32) | (addr_lo << PAGE_SHIFT);
-
- if ( maddr_old != maddr_next )
- need_flush = 1;
+ old_present = get_field_from_reg_u32(pde[0], IOMMU_PTE_PRESENT_MASK,
+ IOMMU_PTE_PRESENT_SHIFT);
+ if ( old_present )
+ {
+ bool old_r, old_w;
+ unsigned int old_level;
+ uint64_t maddr_old;
+
+ addr_hi = get_field_from_reg_u32(pde[1],
+ IOMMU_PTE_ADDR_HIGH_MASK,
+ IOMMU_PTE_ADDR_HIGH_SHIFT);
+ addr_lo = get_field_from_reg_u32(pde[0],
+ IOMMU_PTE_ADDR_LOW_MASK,
+ IOMMU_PTE_ADDR_LOW_SHIFT);
+ old_level = get_field_from_reg_u32(pde[0],
+ IOMMU_PDE_NEXT_LEVEL_MASK,
+ IOMMU_PDE_NEXT_LEVEL_SHIFT);
+ old_w = get_field_from_reg_u32(pde[1],
+ IOMMU_PTE_IO_WRITE_PERMISSION_MASK,
+ IOMMU_PTE_IO_WRITE_PERMISSION_SHIFT);
+ old_r = get_field_from_reg_u32(pde[1],
+ IOMMU_PTE_IO_READ_PERMISSION_MASK,
+ IOMMU_PTE_IO_READ_PERMISSION_SHIFT);
+
+ maddr_old = (addr_hi << 32) | (addr_lo << PAGE_SHIFT);
+
+ if ( maddr_old != maddr_next || iw != old_w || ir != old_r ||
+ old_level != next_level )
+ need_flush = true;
+ }
addr_lo = maddr_next & DMA_32BIT_MASK;
addr_hi = maddr_next >> 32;
@@ -687,10 +706,7 @@ int amd_iommu_map_page(struct domain *d,
if ( !need_flush )
goto out;
- /* 4K mapping for PV guests never changes,
- * no need to flush if we trust non-present bits */
- if ( is_hvm_domain(d) )
- amd_iommu_flush_pages(d, gfn, 0);
+ amd_iommu_flush_pages(d, gfn, 0);
for ( merge_level = IOMMU_PAGING_MODE_LEVEL_2;
merge_level <= hd->arch.paging_mode; merge_level++ )
|