1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
|
From: Andrew Cooper <andrew.cooper3@citrix.com>
Subject: x86/vvmx: Disallow the use of VT-x instructions when nested virt is disabled
c/s ac6a4500b "vvmx: set vmxon_region_pa of vcpu out of VMX operation to an
invalid address" was a real bugfix as described, but has a very subtle bug
which results in all VT-x instructions being usable by a guest.
The toolstack constructs a guest by issuing:
XEN_DOMCTL_createdomain
XEN_DOMCTL_max_vcpus
and optionally later, HVMOP_set_param to enable nested virt.
As a result, the call to nvmx_vcpu_initialise() in hvm_vcpu_initialise()
(which is what makes the above patch look correct during review) is actually
dead code. In practice, nvmx_vcpu_initialise() first gets called when nested
virt is enabled, which is typically never.
As a result, the zeroed memory of struct vcpu causes nvmx_vcpu_in_vmx() to
return true before nested virt is enabled for the guest.
Fixing the order of initialisation is a work in progress for other reasons,
but not viable for security backports.
A compounding factor is that the vmexit handlers for all instructions, other
than VMXON, pass 0 into vmx_inst_check_privilege()'s vmxop_check parameter,
which skips the CR4.VMXE check. (This is one of many reasons why nested virt
isn't a supported feature yet.)
However, the overall result is that when nested virt is not enabled by the
toolstack (i.e. the default configuration for all production guests), the VT-x
instructions (other than VMXON) are actually usable, and Xen very quickly
falls over the fact that the nvmx structure is uninitialised.
In order to fail safe in the supported case, re-implement all the VT-x
instruction handling using a single function with a common prologue, covering
all the checks which should cause #UD or #GP faults. This deliberately
doesn't use any state from the nvmx structure, in case there are other lurking
issues.
This is XSA-278
Reported-by: Sergey Dyasli <sergey.dyasli@citrix.com>
Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
Reviewed-by: Sergey Dyasli <sergey.dyasli@citrix.com>
diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c
index a6415f0..a4d2829 100644
--- a/xen/arch/x86/hvm/vmx/vmx.c
+++ b/xen/arch/x86/hvm/vmx/vmx.c
@@ -3982,57 +3982,17 @@ void vmx_vmexit_handler(struct cpu_user_regs *regs)
break;
case EXIT_REASON_VMXOFF:
- if ( nvmx_handle_vmxoff(regs) == X86EMUL_OKAY )
- update_guest_eip();
- break;
-
case EXIT_REASON_VMXON:
- if ( nvmx_handle_vmxon(regs) == X86EMUL_OKAY )
- update_guest_eip();
- break;
-
case EXIT_REASON_VMCLEAR:
- if ( nvmx_handle_vmclear(regs) == X86EMUL_OKAY )
- update_guest_eip();
- break;
-
case EXIT_REASON_VMPTRLD:
- if ( nvmx_handle_vmptrld(regs) == X86EMUL_OKAY )
- update_guest_eip();
- break;
-
case EXIT_REASON_VMPTRST:
- if ( nvmx_handle_vmptrst(regs) == X86EMUL_OKAY )
- update_guest_eip();
- break;
-
case EXIT_REASON_VMREAD:
- if ( nvmx_handle_vmread(regs) == X86EMUL_OKAY )
- update_guest_eip();
- break;
-
case EXIT_REASON_VMWRITE:
- if ( nvmx_handle_vmwrite(regs) == X86EMUL_OKAY )
- update_guest_eip();
- break;
-
case EXIT_REASON_VMLAUNCH:
- if ( nvmx_handle_vmlaunch(regs) == X86EMUL_OKAY )
- update_guest_eip();
- break;
-
case EXIT_REASON_VMRESUME:
- if ( nvmx_handle_vmresume(regs) == X86EMUL_OKAY )
- update_guest_eip();
- break;
-
case EXIT_REASON_INVEPT:
- if ( nvmx_handle_invept(regs) == X86EMUL_OKAY )
- update_guest_eip();
- break;
-
case EXIT_REASON_INVVPID:
- if ( nvmx_handle_invvpid(regs) == X86EMUL_OKAY )
+ if ( nvmx_handle_vmx_insn(regs, exit_reason) == X86EMUL_OKAY )
update_guest_eip();
break;
diff --git a/xen/arch/x86/hvm/vmx/vvmx.c b/xen/arch/x86/hvm/vmx/vvmx.c
index e97db33..88cb58c 100644
--- a/xen/arch/x86/hvm/vmx/vvmx.c
+++ b/xen/arch/x86/hvm/vmx/vvmx.c
@@ -1470,7 +1470,7 @@ void nvmx_switch_guest(void)
* VMX instructions handling
*/
-int nvmx_handle_vmxon(struct cpu_user_regs *regs)
+static int nvmx_handle_vmxon(struct cpu_user_regs *regs)
{
struct vcpu *v=current;
struct nestedvmx *nvmx = &vcpu_2_nvmx(v);
@@ -1522,7 +1522,7 @@ int nvmx_handle_vmxon(struct cpu_user_regs *regs)
return X86EMUL_OKAY;
}
-int nvmx_handle_vmxoff(struct cpu_user_regs *regs)
+static int nvmx_handle_vmxoff(struct cpu_user_regs *regs)
{
struct vcpu *v=current;
struct nestedvmx *nvmx = &vcpu_2_nvmx(v);
@@ -1611,7 +1611,7 @@ static int nvmx_vmresume(struct vcpu *v, struct cpu_user_regs *regs)
return X86EMUL_OKAY;
}
-int nvmx_handle_vmresume(struct cpu_user_regs *regs)
+static int nvmx_handle_vmresume(struct cpu_user_regs *regs)
{
bool_t launched;
struct vcpu *v = current;
@@ -1645,7 +1645,7 @@ int nvmx_handle_vmresume(struct cpu_user_regs *regs)
return nvmx_vmresume(v,regs);
}
-int nvmx_handle_vmlaunch(struct cpu_user_regs *regs)
+static int nvmx_handle_vmlaunch(struct cpu_user_regs *regs)
{
bool_t launched;
struct vcpu *v = current;
@@ -1688,7 +1688,7 @@ int nvmx_handle_vmlaunch(struct cpu_user_regs *regs)
return rc;
}
-int nvmx_handle_vmptrld(struct cpu_user_regs *regs)
+static int nvmx_handle_vmptrld(struct cpu_user_regs *regs)
{
struct vcpu *v = current;
struct vmx_inst_decoded decode;
@@ -1759,7 +1759,7 @@ int nvmx_handle_vmptrld(struct cpu_user_regs *regs)
return X86EMUL_OKAY;
}
-int nvmx_handle_vmptrst(struct cpu_user_regs *regs)
+static int nvmx_handle_vmptrst(struct cpu_user_regs *regs)
{
struct vcpu *v = current;
struct vmx_inst_decoded decode;
@@ -1784,7 +1784,7 @@ int nvmx_handle_vmptrst(struct cpu_user_regs *regs)
return X86EMUL_OKAY;
}
-int nvmx_handle_vmclear(struct cpu_user_regs *regs)
+static int nvmx_handle_vmclear(struct cpu_user_regs *regs)
{
struct vcpu *v = current;
struct vmx_inst_decoded decode;
@@ -1836,7 +1836,7 @@ int nvmx_handle_vmclear(struct cpu_user_regs *regs)
return X86EMUL_OKAY;
}
-int nvmx_handle_vmread(struct cpu_user_regs *regs)
+static int nvmx_handle_vmread(struct cpu_user_regs *regs)
{
struct vcpu *v = current;
struct vmx_inst_decoded decode;
@@ -1878,7 +1878,7 @@ int nvmx_handle_vmread(struct cpu_user_regs *regs)
return X86EMUL_OKAY;
}
-int nvmx_handle_vmwrite(struct cpu_user_regs *regs)
+static int nvmx_handle_vmwrite(struct cpu_user_regs *regs)
{
struct vcpu *v = current;
struct vmx_inst_decoded decode;
@@ -1926,7 +1926,7 @@ int nvmx_handle_vmwrite(struct cpu_user_regs *regs)
return X86EMUL_OKAY;
}
-int nvmx_handle_invept(struct cpu_user_regs *regs)
+static int nvmx_handle_invept(struct cpu_user_regs *regs)
{
struct vmx_inst_decoded decode;
unsigned long eptp;
@@ -1954,7 +1954,7 @@ int nvmx_handle_invept(struct cpu_user_regs *regs)
return X86EMUL_OKAY;
}
-int nvmx_handle_invvpid(struct cpu_user_regs *regs)
+static int nvmx_handle_invvpid(struct cpu_user_regs *regs)
{
struct vmx_inst_decoded decode;
unsigned long vpid;
@@ -1980,6 +1980,81 @@ int nvmx_handle_invvpid(struct cpu_user_regs *regs)
return X86EMUL_OKAY;
}
+int nvmx_handle_vmx_insn(struct cpu_user_regs *regs, unsigned int exit_reason)
+{
+ struct vcpu *curr = current;
+ int ret;
+
+ if ( !(curr->arch.hvm_vcpu.guest_cr[4] & X86_CR4_VMXE) ||
+ !nestedhvm_enabled(curr->domain) ||
+ (vmx_guest_x86_mode(curr) < (hvm_long_mode_active(curr) ? 8 : 2)) )
+ {
+ hvm_inject_hw_exception(TRAP_invalid_op, X86_EVENT_NO_EC);
+ return X86EMUL_EXCEPTION;
+ }
+
+ if ( vmx_get_cpl() > 0 )
+ {
+ hvm_inject_hw_exception(TRAP_gp_fault, 0);
+ return X86EMUL_EXCEPTION;
+ }
+
+ switch ( exit_reason )
+ {
+ case EXIT_REASON_VMXOFF:
+ ret = nvmx_handle_vmxoff(regs);
+ break;
+
+ case EXIT_REASON_VMXON:
+ ret = nvmx_handle_vmxon(regs);
+ break;
+
+ case EXIT_REASON_VMCLEAR:
+ ret = nvmx_handle_vmclear(regs);
+ break;
+
+ case EXIT_REASON_VMPTRLD:
+ ret = nvmx_handle_vmptrld(regs);
+ break;
+
+ case EXIT_REASON_VMPTRST:
+ ret = nvmx_handle_vmptrst(regs);
+ break;
+
+ case EXIT_REASON_VMREAD:
+ ret = nvmx_handle_vmread(regs);
+ break;
+
+ case EXIT_REASON_VMWRITE:
+ ret = nvmx_handle_vmwrite(regs);
+ break;
+
+ case EXIT_REASON_VMLAUNCH:
+ ret = nvmx_handle_vmlaunch(regs);
+ break;
+
+ case EXIT_REASON_VMRESUME:
+ ret = nvmx_handle_vmresume(regs);
+ break;
+
+ case EXIT_REASON_INVEPT:
+ ret = nvmx_handle_invept(regs);
+ break;
+
+ case EXIT_REASON_INVVPID:
+ ret = nvmx_handle_invvpid(regs);
+ break;
+
+ default:
+ ASSERT_UNREACHABLE();
+ domain_crash(curr->domain);
+ ret = X86EMUL_UNHANDLEABLE;
+ break;
+ }
+
+ return ret;
+}
+
#define __emul_value(enable1, default1) \
((enable1 | default1) << 32 | (default1))
diff --git a/xen/include/asm-x86/hvm/vmx/vvmx.h b/xen/include/asm-x86/hvm/vmx/vvmx.h
index 9ea35eb..fc4a8d1 100644
--- a/xen/include/asm-x86/hvm/vmx/vvmx.h
+++ b/xen/include/asm-x86/hvm/vmx/vvmx.h
@@ -94,9 +94,6 @@ void nvmx_domain_relinquish_resources(struct domain *d);
bool_t nvmx_ept_enabled(struct vcpu *v);
-int nvmx_handle_vmxon(struct cpu_user_regs *regs);
-int nvmx_handle_vmxoff(struct cpu_user_regs *regs);
-
#define EPT_TRANSLATE_SUCCEED 0
#define EPT_TRANSLATE_VIOLATION 1
#define EPT_TRANSLATE_MISCONFIG 2
@@ -191,15 +188,7 @@ enum vmx_insn_errno set_vvmcs_real_safe(const struct vcpu *, u32 encoding,
uint64_t get_shadow_eptp(struct vcpu *v);
void nvmx_destroy_vmcs(struct vcpu *v);
-int nvmx_handle_vmptrld(struct cpu_user_regs *regs);
-int nvmx_handle_vmptrst(struct cpu_user_regs *regs);
-int nvmx_handle_vmclear(struct cpu_user_regs *regs);
-int nvmx_handle_vmread(struct cpu_user_regs *regs);
-int nvmx_handle_vmwrite(struct cpu_user_regs *regs);
-int nvmx_handle_vmresume(struct cpu_user_regs *regs);
-int nvmx_handle_vmlaunch(struct cpu_user_regs *regs);
-int nvmx_handle_invept(struct cpu_user_regs *regs);
-int nvmx_handle_invvpid(struct cpu_user_regs *regs);
+int nvmx_handle_vmx_insn(struct cpu_user_regs *regs, unsigned int exit_reason);
int nvmx_msr_read_intercept(unsigned int msr,
u64 *msr_content);
|