From: Jan Beulich <jbeulich@suse.com>
Subject: x86+Arm32: make find_next_{,zero_}bit() have well defined behavior

These functions getting used with the 2nd and 3rd arguments being equal
wasn't well defined: Arm64 reliably returns the value of the 2nd
argument in this case, while on x86 for bitmaps up to 64 bits wide the
return value was undefined (due to the undefined behavior of a shift of
a value by the number of bits it's wide) when the incoming value was 64.
On Arm32 an actual out of bounds access would happen when the
size/offset value is a multiple of 32; if this access doesn't fault, the
return value would have been sufficiently correct afaict.

Make the functions consistently tolerate the last two arguments being
equal (and in fact the 3rd argument being greater or equal to the 2nd),
in favor of finding and fixing all the use sites that violate the
original more strict assumption.

This is XSA-307.

Signed-off-by: Jan Beulich <jbeulich@suse.com>
Acked-by: Julien Grall <julien@xen.org>
---
The most obvious (albeit still indirect) exposure to guests is
evtchn_check_pollers(), which imo makes this a security issue at least
for Arm32.

This was originally already discussed between (at least) Andrew and me,
and I don't really recall who brought up the issue first.

Note that Arm's Linux origin of the code may call for syncing
publication with them. Then again I don't want to tell them just to see
them go public ahead of us.

--- a/xen/arch/arm/arm32/lib/findbit.S
+++ b/xen/arch/arm/arm32/lib/findbit.S
@@ -42,8 +42,8 @@ ENDPROC(_find_first_zero_bit_le)
  * Prototype: int find_next_zero_bit(void *addr, unsigned int maxbit, int offset)
  */
 ENTRY(_find_next_zero_bit_le)
-		teq	r1, #0
-		beq	3b
+		cmp	r1, r2
+		bls	3b
 		ands	ip, r2, #7
 		beq	1b			@ If new byte, goto old routine
  ARM(		ldrb	r3, [r0, r2, lsr #3]	)
@@ -83,8 +83,8 @@ ENDPROC(_find_first_bit_le)
  * Prototype: int find_next_zero_bit(void *addr, unsigned int maxbit, int offset)
  */
 ENTRY(_find_next_bit_le)
-		teq	r1, #0
-		beq	3b
+		cmp	r1, r2
+		bls	3b
 		ands	ip, r2, #7
 		beq	1b			@ If new byte, goto old routine
  ARM(		ldrb	r3, [r0, r2, lsr #3]	)
@@ -117,8 +117,8 @@ ENTRY(_find_first_zero_bit_be)
 ENDPROC(_find_first_zero_bit_be)
 
 ENTRY(_find_next_zero_bit_be)
-		teq	r1, #0
-		beq	3b
+		cmp	r1, r2
+		bls	3b
 		ands	ip, r2, #7
 		beq	1b			@ If new byte, goto old routine
 		eor	r3, r2, #0x18		@ big endian byte ordering
@@ -151,8 +151,8 @@ ENTRY(_find_first_bit_be)
 ENDPROC(_find_first_bit_be)
 
 ENTRY(_find_next_bit_be)
-		teq	r1, #0
-		beq	3b
+		cmp	r1, r2
+		bls	3b
 		ands	ip, r2, #7
 		beq	1b			@ If new byte, goto old routine
 		eor	r3, r2, #0x18		@ big endian byte ordering
--- a/xen/include/asm-x86/bitops.h
+++ b/xen/include/asm-x86/bitops.h
@@ -358,7 +358,7 @@ static always_inline unsigned int __scan
     const unsigned long *a__ = (addr);                                      \
     unsigned int s__ = (size);                                              \
     unsigned int o__ = (off);                                               \
-    if ( __builtin_constant_p(size) && !s__ )                               \
+    if ( o__ >= s__ )                                                       \
         r__ = s__;                                                          \
     else if ( __builtin_constant_p(size) && s__ <= BITS_PER_LONG )          \
         r__ = o__ + __scanbit(*(const unsigned long *)(a__) >> o__, s__);   \
@@ -390,7 +390,7 @@ static always_inline unsigned int __scan
     const unsigned long *a__ = (addr);                                      \
     unsigned int s__ = (size);                                              \
     unsigned int o__ = (off);                                               \
-    if ( __builtin_constant_p(size) && !s__ )                               \
+    if ( o__ >= s__ )                                                       \
         r__ = s__;                                                          \
     else if ( __builtin_constant_p(size) && s__ <= BITS_PER_LONG )          \
         r__ = o__ + __scanbit(~*(const unsigned long *)(a__) >> o__, s__);  \