summaryrefslogtreecommitdiffstats
path: root/main/linux-virt-grsec/0005-ipv4-use-separate-genid-for-next-hop-exceptions.patch
blob: 40cd4fc6c1fec574cdc6dd717731e4b130b8d99c (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
From 4a7dbb238480fac2b1ad02a74240efcf225c1f45 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Timo=20Ter=C3=A4s?= <timo.teras@iki.fi>
Date: Mon, 27 May 2013 10:55:52 +0300
Subject: [PATCH 5/6] ipv4: use separate genid for next hop exceptions
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

commit 13d82bf5 (ipv4: Fix flushing of cached routing informations)
added the support to flush learned pmtu information.

However, using rt_genid is quite heavy as it is bumped on route
add/change and multicast events amongst other places. These can
happen quote often, especially if using dynamic routing protocols.

While this is ok with routes (as they are just recreated locally),
the pmtu information is learned from remote systems and the icmp
notification can come with long delays. It is worthy to have separate
genid to avoid excessive pmtu resets.

Cc: Steffen Klassert <steffen.klassert@secunet.com>
Signed-off-by: Timo Teräs <timo.teras@iki.fi>
---
 include/net/ip_fib.h        |  1 +
 include/net/net_namespace.h | 11 +++++++++++
 net/ipv4/route.c            | 12 ++++++++++--
 3 files changed, 22 insertions(+), 2 deletions(-)

diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h
index e49db91..44424e9 100644
--- a/include/net/ip_fib.h
+++ b/include/net/ip_fib.h
@@ -51,6 +51,7 @@ struct rtable;
 
 struct fib_nh_exception {
 	struct fib_nh_exception __rcu	*fnhe_next;
+	int				fnhe_genid;
 	__be32				fnhe_daddr;
 	u32				fnhe_pmtu;
 	__be32				fnhe_gw;
diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h
index de644bc..b5a5baf 100644
--- a/include/net/net_namespace.h
+++ b/include/net/net_namespace.h
@@ -116,6 +116,7 @@ struct net {
 	struct netns_ipvs	*ipvs;
 	struct sock		*diag_nlsk;
 	atomic_unchecked_t	rt_genid;
+	atomic_unchecked_t	fnhe_genid;
 };
 
 /*
@@ -338,4 +339,14 @@ static inline void rt_genid_bump(struct net *net)
 	atomic_inc_unchecked(&net->rt_genid);
 }
 
+static inline int fnhe_genid(struct net *net)
+{
+	return atomic_read_unchecked(&net->fnhe_genid);
+}
+
+static inline void fnhe_genid_bump(struct net *net)
+{
+	atomic_inc_unchecked(&net->fnhe_genid);
+}
+
 #endif /* __NET_NET_NAMESPACE_H */
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 85b9c07..f44a4bb 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -658,6 +658,7 @@ static void update_or_create_fnhe(struct fib_nh *nh, __be32 daddr, __be32 gw,
 			fnhe->fnhe_next = hash->chain;
 			rcu_assign_pointer(hash->chain, fnhe);
 		}
+		fnhe->fnhe_genid = fnhe_genid(dev_net(nh->nh_dev));
 		fnhe->fnhe_daddr = daddr;
 		fnhe->fnhe_gw = gw;
 		fnhe->fnhe_pmtu = pmtu;
@@ -1236,8 +1237,11 @@ static bool rt_bind_exception(struct rtable *rt, struct fib_nh_exception *fnhe,
 	spin_lock_bh(&fnhe_lock);
 
 	if (daddr == fnhe->fnhe_daddr) {
+		int genid = fnhe_genid(dev_net(rt->dst.dev));
 		struct rtable *orig = rcu_dereference(fnhe->fnhe_rth);
-		if (orig && rt_is_expired(orig)) {
+
+		if (fnhe->fnhe_genid != genid) {
+			fnhe->fnhe_genid = genid;
 			fnhe->fnhe_gw = 0;
 			fnhe->fnhe_pmtu = 0;
 			fnhe->fnhe_expires = 0;
@@ -2443,8 +2447,11 @@ static int ipv4_sysctl_rtcache_flush(ctl_table *__ctl, int write,
 					void __user *buffer,
 					size_t *lenp, loff_t *ppos)
 {
+	struct net *net = (struct net *)__ctl->extra1;
+
 	if (write) {
-		rt_cache_flush((struct net *)__ctl->extra1);
+		rt_cache_flush(net);
+		fnhe_genid_bump(net);
 		return 0;
 	}
 
@@ -2619,6 +2626,7 @@ static __net_initdata struct pernet_operations sysctl_route_ops = {
 static __net_init int rt_genid_init(struct net *net)
 {
 	atomic_set_unchecked(&net->rt_genid, 0);
+	atomic_set_unchecked(&net->fnhe_genid, 0);
 	get_random_bytes(&net->ipv4.dev_addr_genid,
 			 sizeof(net->ipv4.dev_addr_genid));
 	return 0;
-- 
1.8.2.3