Merge branch 'master' of git://git.alpinelinux.org/aports

author: Michael Mason <ms13sp@gmail.com> 2010-03-17 17:43:41 +0000
committer: Michael Mason <ms13sp@gmail.com> 2010-03-17 17:43:41 +0000
commit: 4b23d4dfa33b09440ffbdfccbd46571649ccecaa (patch)
tree: 02c3b507f3e038c1e191fa97d59562b468e64b65 /main/linux-grsec/xfrm-flow-cache-grsec.patch
parent: e59a787cc0775b8f7f97d492674a257baef45f6d (diff)
parent: 192b8fad759488bbe2fea1b43acda638eb6ebe85 (diff)
download: aports-4b23d4dfa33b09440ffbdfccbd46571649ccecaa.tar.bz2
aports-4b23d4dfa33b09440ffbdfccbd46571649ccecaa.tar.xz
1 files changed, 1154 insertions, 0 deletions
diff --git a/main/linux-grsec/xfrm-flow-cache-grsec.patch b/main/linux-grsec/xfrm-flow-cache-grsec.patch
new file mode 100644
index 0000000000..881623d8ae
--- /dev/null
+++ b/main/linux-grsec/xfrm-flow-cache-grsec.patch
@@ -0,0 +1,1154 @@
+From 3519d7c86a6e87584d25f3292b53d3ce865a659e Mon Sep 17 00:00:00 2001
+From: Natanael Copa <ncopa@alpinelinux.org>
+Date: Mon, 15 Mar 2010 15:31:37 +0000
+Subject: [PATCH] xfrm: flow cache2
+
+---
+ include/net/flow.h               |   39 ++++-
+ include/net/netns/xfrm.h         |    4 +
+ include/net/xfrm.h               |    1 +
+ net/core/flow.c                  |  342 ++++++++++++++++++--------------------
+ net/ipv6/inet6_connection_sock.c |    6 +-
+ net/xfrm/xfrm_policy.c           |  271 +++++++++++++++++++++---------
+ 6 files changed, 394 insertions(+), 269 deletions(-)
+
+diff --git a/include/net/flow.h b/include/net/flow.h
+index 809970b..814a9d2 100644
+--- a/include/net/flow.h
++++ b/include/net/flow.h
+@@ -8,6 +8,9 @@
+ #define _NET_FLOW_H
+ 
+ #include <linux/in6.h>
++#include <linux/notifier.h>
++#include <linux/timer.h>
++#include <linux/slab.h>
+ #include <asm/atomic.h>
+ 
+ struct flowi {
+@@ -86,13 +89,37 @@ struct flowi {
+ 
+ struct net;
+ struct sock;
+-typedef int (*flow_resolve_t)(struct net *net, struct flowi *key, u16 family,
+-			      u8 dir, void **objp, atomic_t **obj_refp);
+ 
+-extern void *flow_cache_lookup(struct net *net, struct flowi *key, u16 family,
+-			       u8 dir, flow_resolve_t resolver);
+-extern void flow_cache_flush(void);
+-extern atomic_t flow_cache_genid;
++struct flow_cache_percpu;
++struct flow_cache_entry;
++
++struct flow_cache {
++	u32				hash_shift;
++	u32				order;
++	struct flow_cache_percpu *	percpu;
++	struct notifier_block		hotcpu_notifier;
++	int				low_watermark;
++	int				high_watermark;
++	struct timer_list		rnd_timer;
++	struct kmem_cache *		flow_cachep;
++};
++
++struct flow_cache_entry {
++	struct flow_cache_entry	*next;
++	struct flowi		key;
++	u16			family;
++	u8			dir;
++};
++
++extern struct flow_cache_entry *flow_cache_lookup(
++	struct flow_cache *cache, struct flowi *key,
++	u16 family, u8 dir);
++extern void flow_cache_entry_put(struct flow_cache_entry *fce);
++
++void flow_cache_flush(struct flow_cache *fc,
++		      void (*flush)(struct flow_cache *fc, struct flow_cache_entry *fce));
++extern int flow_cache_init(struct flow_cache *cache, size_t entry_size);
++extern void flow_cache_fini(struct flow_cache *cache);
+ 
+ static inline int flow_cache_uli_match(struct flowi *fl1, struct flowi *fl2)
+ {
+diff --git a/include/net/netns/xfrm.h b/include/net/netns/xfrm.h
+index 1ba9127..4bb72c4 100644
+--- a/include/net/netns/xfrm.h
++++ b/include/net/netns/xfrm.h
+@@ -41,6 +41,10 @@ struct netns_xfrm {
+ 	struct xfrm_policy_hash	policy_bydst[XFRM_POLICY_MAX * 2];
+ 	unsigned int		policy_count[XFRM_POLICY_MAX * 2];
+ 	struct work_struct	policy_hash_work;
++	atomic_t		policy_genid;
++	struct hlist_head	policy_gc_list;
++	struct work_struct	policy_gc_work;
++	struct flow_cache	flow_cache;
+ 
+ 	struct sock		*nlsk;
+ 
+diff --git a/include/net/xfrm.h b/include/net/xfrm.h
+index 223e90a..5cd4e29 100644
+--- a/include/net/xfrm.h
++++ b/include/net/xfrm.h
+@@ -487,6 +487,7 @@ struct xfrm_policy
+ 	struct xfrm_lifetime_cfg lft;
+ 	struct xfrm_lifetime_cur curlft;
+ 	struct dst_entry       *bundles;
++	atomic_t		bundles_genid;
+ 	struct xfrm_policy_walk_entry walk;
+ 	u8			type;
+ 	u8			action;
+diff --git a/net/core/flow.c b/net/core/flow.c
+index 5b27992..e3782c2 100644
+--- a/net/core/flow.c
++++ b/net/core/flow.c
+@@ -25,114 +25,85 @@
+ #include <asm/atomic.h>
+ #include <linux/security.h>
+ 
+-struct flow_cache_entry {
+-	struct flow_cache_entry	*next;
+-	u16			family;
+-	u8			dir;
+-	u32			genid;
+-	struct flowi		key;
+-	void			*object;
+-	atomic_t		*object_ref;
+-};
+-
+-atomic_t flow_cache_genid = ATOMIC_INIT(0);
+-
+-static u32 flow_hash_shift;
+-#define flow_hash_size	(1 << flow_hash_shift)
+-static DEFINE_PER_CPU(struct flow_cache_entry **, flow_tables);
+-
+-#define flow_table(cpu) (per_cpu(flow_tables, cpu))
+-
+-static struct kmem_cache *flow_cachep __read_mostly;
+ 
+-static int flow_lwm, flow_hwm;
+-
+-struct flow_percpu_info {
+-	int hash_rnd_recalc;
+-	u32 hash_rnd;
+-	int count;
++struct flow_cache_percpu {
++	struct flow_cache_entry **	hash_table;
++	int				hash_count;
++	u32				hash_rnd;
++	int				hash_rnd_recalc;
++	struct tasklet_struct		flush_tasklet;
+ };
+-static DEFINE_PER_CPU(struct flow_percpu_info, flow_hash_info);
+-
+-#define flow_hash_rnd_recalc(cpu) \
+-	(per_cpu(flow_hash_info, cpu).hash_rnd_recalc)
+-#define flow_hash_rnd(cpu) \
+-	(per_cpu(flow_hash_info, cpu).hash_rnd)
+-#define flow_count(cpu) \
+-	(per_cpu(flow_hash_info, cpu).count)
+-
+-static struct timer_list flow_hash_rnd_timer;
+-
+-#define FLOW_HASH_RND_PERIOD	(10 * 60 * HZ)
+ 
+ struct flow_flush_info {
+-	atomic_t cpuleft;
+-	struct completion completion;
++	void (*flush)(struct flow_cache *fc, struct flow_cache_entry *fce);
++	struct flow_cache *		cache;
++	atomic_t			cpuleft;
++	struct completion		completion;
+ };
+-static DEFINE_PER_CPU(struct tasklet_struct, flow_flush_tasklets);
+ 
+-#define flow_flush_tasklet(cpu) (&per_cpu(flow_flush_tasklets, cpu))
++#define flow_cache_hash_size(cache)	(1 << (cache)->hash_shift)
++#define FLOW_HASH_RND_PERIOD		(10 * 60 * HZ)
+ 
+ static void flow_cache_new_hashrnd(unsigned long arg)
+ {
++	struct flow_cache *fc = (struct flow_cache *) arg;
+ 	int i;
+ 
+ 	for_each_possible_cpu(i)
+-		flow_hash_rnd_recalc(i) = 1;
++		per_cpu_ptr(fc->percpu, i)->hash_rnd_recalc = 1;
+ 
+-	flow_hash_rnd_timer.expires = jiffies + FLOW_HASH_RND_PERIOD;
+-	add_timer(&flow_hash_rnd_timer);
++	fc->rnd_timer.expires = jiffies + FLOW_HASH_RND_PERIOD;
++	add_timer(&fc->rnd_timer);
+ }
+ 
+-static void flow_entry_kill(int cpu, struct flow_cache_entry *fle)
+-{
+-	if (fle->object)
+-		atomic_dec(fle->object_ref);
+-	kmem_cache_free(flow_cachep, fle);
+-	flow_count(cpu)--;
+-}
+-
+-static void __flow_cache_shrink(int cpu, int shrink_to)
++static void __flow_cache_shrink(struct flow_cache *fc,
++				struct flow_cache_percpu *fcp,
++				int shrink_to)
+ {
+ 	struct flow_cache_entry *fle, **flp;
+ 	int i;
+ 
+-	for (i = 0; i < flow_hash_size; i++) {
++	for (i = 0; i < flow_cache_hash_size(fc); i++) {
+ 		int k = 0;
+ 
+-		flp = &flow_table(cpu)[i];
++		flp = &fcp->hash_table[i];
+ 		while ((fle = *flp) != NULL && k < shrink_to) {
+ 			k++;
+ 			flp = &fle->next;
+ 		}
+ 		while ((fle = *flp) != NULL) {
+ 			*flp = fle->next;
+-			flow_entry_kill(cpu, fle);
++
++			kmem_cache_free(fc->flow_cachep, fle);
++			fcp->hash_count--;
+ 		}
+ 	}
+ }
+ 
+-static void flow_cache_shrink(int cpu)
++static void flow_cache_shrink(struct flow_cache *fc,
++			      struct flow_cache_percpu *fcp)
+ {
+-	int shrink_to = flow_lwm / flow_hash_size;
++	int shrink_to = fc->low_watermark / flow_cache_hash_size(fc);
+ 
+-	__flow_cache_shrink(cpu, shrink_to);
++	__flow_cache_shrink(fc, fcp, shrink_to);
+ }
+ 
+-static void flow_new_hash_rnd(int cpu)
++static void flow_new_hash_rnd(struct flow_cache *fc,
++			      struct flow_cache_percpu *fcp)
+ {
+-	get_random_bytes(&flow_hash_rnd(cpu), sizeof(u32));
+-	flow_hash_rnd_recalc(cpu) = 0;
+-
+-	__flow_cache_shrink(cpu, 0);
++	get_random_bytes(&fcp->hash_rnd, sizeof(u32));
++	fcp->hash_rnd_recalc = 0;
++	__flow_cache_shrink(fc, fcp, 0);
+ }
+ 
+-static u32 flow_hash_code(struct flowi *key, int cpu)
++static u32 flow_hash_code(struct flow_cache *fc,
++			  struct flow_cache_percpu *fcp,
++			  struct flowi *key)
+ {
+ 	u32 *k = (u32 *) key;
+ 
+-	return (jhash2(k, (sizeof(*key) / sizeof(u32)), flow_hash_rnd(cpu)) &
+-		(flow_hash_size - 1));
++	return (jhash2(k, (sizeof(*key) / sizeof(u32)), fcp->hash_rnd)
++		& (flow_cache_hash_size(fc) - 1));
+ }
+ 
+ #if (BITS_PER_LONG == 64)
+@@ -165,128 +136,100 @@ static int flow_key_compare(struct flowi *key1, struct flowi *key2)
+ 	return 0;
+ }
+ 
+-void *flow_cache_lookup(struct net *net, struct flowi *key, u16 family, u8 dir,
+-			flow_resolve_t resolver)
++struct flow_cache_entry *flow_cache_lookup(struct flow_cache *fc,
++					   struct flowi *key,
++					   u16 family, u8 dir)
+ {
+ 	struct flow_cache_entry *fle, **head;
++	struct flow_cache_percpu *fcp;
+ 	unsigned int hash;
+-	int cpu;
+ 
+ 	local_bh_disable();
+-	cpu = smp_processor_id();
++	fcp = per_cpu_ptr(fc->percpu, smp_processor_id());
+ 
+ 	fle = NULL;
+ 	/* Packet really early in init?  Making flow_cache_init a
+ 	 * pre-smp initcall would solve this.  --RR */
+-	if (!flow_table(cpu))
++	if (!fcp->hash_table)
+ 		goto nocache;
+ 
+-	if (flow_hash_rnd_recalc(cpu))
+-		flow_new_hash_rnd(cpu);
+-	hash = flow_hash_code(key, cpu);
++	if (fcp->hash_rnd_recalc)
++		flow_new_hash_rnd(fc, fcp);
++
++	hash = flow_hash_code(fc, fcp, key);
+ 
+-	head = &flow_table(cpu)[hash];
++	head = &fcp->hash_table[hash];
+ 	for (fle = *head; fle; fle = fle->next) {
+ 		if (fle->family == family &&
+ 		    fle->dir == dir &&
+ 		    flow_key_compare(key, &fle->key) == 0) {
+-			if (fle->genid == atomic_read(&flow_cache_genid)) {
+-				void *ret = fle->object;
+-
+-				if (ret)
+-					atomic_inc(fle->object_ref);
+-				local_bh_enable();
+-
+-				return ret;
+-			}
+-			break;
+-		}
+-	}
+-
+-	if (!fle) {
+-		if (flow_count(cpu) > flow_hwm)
+-			flow_cache_shrink(cpu);
+-
+-		fle = kmem_cache_alloc(flow_cachep, GFP_ATOMIC);
+-		if (fle) {
+-			fle->next = *head;
+-			*head = fle;
+-			fle->family = family;
+-			fle->dir = dir;
+-			memcpy(&fle->key, key, sizeof(*key));
+-			fle->object = NULL;
+-			flow_count(cpu)++;
++			return fle;
+ 		}
+ 	}
+ 
+-nocache:
+-	{
+-		int err;
+-		void *obj;
+-		atomic_t *obj_ref;
+-
+-		err = resolver(net, key, family, dir, &obj, &obj_ref);
++	if (fcp->hash_count > fc->high_watermark)
++		flow_cache_shrink(fc, fcp);
+ 
+-		if (fle && !err) {
+-			fle->genid = atomic_read(&flow_cache_genid);
++	fle = kmem_cache_zalloc(fc->flow_cachep, GFP_ATOMIC);
++	if (!fle)
++		goto nocache;
+ 
+-			if (fle->object)
+-				atomic_dec(fle->object_ref);
++	fle->next = *head;
++	*head = fle;
++	fle->family = family;
++	fle->dir = dir;
++	memcpy(&fle->key, key, sizeof(*key));
++	fcp->hash_count++;
++	return fle;
+ 
+-			fle->object = obj;
+-			fle->object_ref = obj_ref;
+-			if (obj)
+-				atomic_inc(fle->object_ref);
+-		}
+-		local_bh_enable();
++nocache:
++	local_bh_enable();
++	return NULL;
++}
+ 
+-		if (err)
+-			obj = ERR_PTR(err);
+-		return obj;
+-	}
++void flow_cache_entry_put(struct flow_cache_entry *fce)
++{
++	local_bh_enable();
+ }
+ 
+ static void flow_cache_flush_tasklet(unsigned long data)
+ {
+-	struct flow_flush_info *info = (void *)data;
++	struct flow_flush_info *info = (void *) data;
++	struct flow_cache *fc = (void *) info->cache;
++	struct flow_cache_percpu *fcp;
+ 	int i;
+-	int cpu;
+ 
+-	cpu = smp_processor_id();
+-	for (i = 0; i < flow_hash_size; i++) {
+-		struct flow_cache_entry *fle;
++	if (info->flush == NULL)
++		goto done;
+ 
+-		fle = flow_table(cpu)[i];
+-		for (; fle; fle = fle->next) {
+-			unsigned genid = atomic_read(&flow_cache_genid);
+-
+-			if (!fle->object || fle->genid == genid)
+-				continue;
++	fcp = per_cpu_ptr(fc->percpu, smp_processor_id());
++	for (i = 0; i < flow_cache_hash_size(fc); i++) {
++		struct flow_cache_entry *fle;
+ 
+-			fle->object = NULL;
+-			atomic_dec(fle->object_ref);
+-		}
++		fle = fcp->hash_table[i];
++		for (; fle; fle = fle->next)
++			info->flush(fc, fle);
+ 	}
+ 
++done:
+ 	if (atomic_dec_and_test(&info->cpuleft))
+ 		complete(&info->completion);
+ }
+ 
+-static void flow_cache_flush_per_cpu(void *) __attribute__((__unused__));
+ static void flow_cache_flush_per_cpu(void *data)
+ {
+ 	struct flow_flush_info *info = data;
+-	int cpu;
+ 	struct tasklet_struct *tasklet;
++	int cpu;
+ 
+ 	cpu = smp_processor_id();
+-
+-	tasklet = flow_flush_tasklet(cpu);
+-	tasklet->data = (unsigned long)info;
++	tasklet = &per_cpu_ptr(info->cache->percpu, cpu)->flush_tasklet;
++	tasklet->data = (unsigned long) data;
+ 	tasklet_schedule(tasklet);
+ }
+ 
+-void flow_cache_flush(void)
++void flow_cache_flush(struct flow_cache *fc,
++		      void (*flush)(struct flow_cache *fc, struct flow_cache_entry *fce))
+ {
+ 	struct flow_flush_info info;
+ 	static DEFINE_MUTEX(flow_flush_sem);
+@@ -294,6 +237,8 @@ void flow_cache_flush(void)
+ 	/* Don't want cpus going down or up during this. */
+ 	get_online_cpus();
+ 	mutex_lock(&flow_flush_sem);
++	info.cache = fc;
++	info.flush = flush;
+ 	atomic_set(&info.cpuleft, num_online_cpus());
+ 	init_completion(&info.completion);
+ 
+@@ -307,62 +252,99 @@ void flow_cache_flush(void)
+ 	put_online_cpus();
+ }
+ 
+-static void __init flow_cache_cpu_prepare(int cpu)
++static void __init flow_cache_cpu_prepare(struct flow_cache *fc,
++					  struct flow_cache_percpu *fcp)
++{
++	fcp->hash_table = (struct flow_cache_entry **)
++		__get_free_pages(GFP_KERNEL|__GFP_ZERO, fc->order);
++	fcp->hash_rnd_recalc = 1;
++	fcp->hash_count = 0;
++
++	tasklet_init(&fcp->flush_tasklet, flow_cache_flush_tasklet, 0);
++}
++
++static int __cpuinit flow_cache_cpu(struct notifier_block *nfb,
++				    unsigned long action,
++				    void *hcpu)
++{
++	struct flow_cache *fc = container_of(nfb, struct flow_cache, hotcpu_notifier);
++	int cpu = (unsigned long) hcpu;
++	struct flow_cache_percpu *fcp = per_cpu_ptr(fc->percpu, cpu);
++
++	switch (action) {
++	case CPU_UP_PREPARE:
++	case CPU_UP_PREPARE_FROZEN:
++		flow_cache_cpu_prepare(fc, fcp);
++		if (!fcp->hash_table)
++			return NOTIFY_BAD;
++		break;
++	case CPU_UP_CANCELED:
++	case CPU_UP_CANCELED_FROZEN:
++	case CPU_DEAD:
++	case CPU_DEAD_FROZEN:
++		if (fcp->hash_table) {
++			__flow_cache_shrink(fc, fcp, 0);
++			free_pages((unsigned long) fcp->hash_table, fc->order);
++			fcp->hash_table = NULL;
++		}
++		break;
++	}
++	return NOTIFY_OK;
++}
++
++int flow_cache_init(struct flow_cache *fc, size_t entry_size)
+ {
+-	struct tasklet_struct *tasklet;
+ 	unsigned long order;
++	int i, r;
++
++	BUG_ON(entry_size < sizeof(struct flow_cache_entry));
++	fc->flow_cachep = kmem_cache_create("flow_cache",
++					entry_size,
++					0, SLAB_PANIC,
++					NULL);
++	fc->hash_shift = 10;
++	fc->low_watermark = 2 * flow_cache_hash_size(fc);
++	fc->high_watermark = 4 * flow_cache_hash_size(fc);
++	fc->percpu = alloc_percpu(struct flow_cache_percpu);
+ 
+ 	for (order = 0;
+ 	     (PAGE_SIZE << order) <
+-		     (sizeof(struct flow_cache_entry *)*flow_hash_size);
++		(sizeof(struct flow_cache_entry *) * flow_cache_hash_size(fc));
+ 	     order++)
+ 		/* NOTHING */;
++	fc->order = order;
+ 
+-	flow_table(cpu) = (struct flow_cache_entry **)
+-		__get_free_pages(GFP_KERNEL|__GFP_ZERO, order);
+-	if (!flow_table(cpu))
+-		panic("NET: failed to allocate flow cache order %lu\n", order);
++	setup_timer(&fc->rnd_timer, flow_cache_new_hashrnd, (unsigned long) fc);
++	fc->rnd_timer.expires = jiffies + FLOW_HASH_RND_PERIOD;
++	add_timer(&fc->rnd_timer);
+ 
+-	flow_hash_rnd_recalc(cpu) = 1;
+-	flow_count(cpu) = 0;
++	for_each_online_cpu(i) {
++		r = flow_cache_cpu(&fc->hotcpu_notifier,
++				   CPU_UP_PREPARE, (void*) i);
++		if (r != NOTIFY_OK)
++			panic("NET: failed to allocate flow cache order %lu\n", order);
++	}
+ 
+-	tasklet = flow_flush_tasklet(cpu);
+-	tasklet_init(tasklet, flow_cache_flush_tasklet, 0);
+-}
++	fc->hotcpu_notifier = (struct notifier_block){
++		.notifier_call = flow_cache_cpu,
++	};
++	register_hotcpu_notifier(&fc->hotcpu_notifier);
+ 
+-static int flow_cache_cpu(struct notifier_block *nfb,
+-			  unsigned long action,
+-			  void *hcpu)
+-{
+-	if (action == CPU_DEAD || action == CPU_DEAD_FROZEN)
+-		__flow_cache_shrink((unsigned long)hcpu, 0);
+-	return NOTIFY_OK;
++	return 0;
+ }
+ 
+-static int __init flow_cache_init(void)
++void flow_cache_fini(struct flow_cache *fc)
+ {
+ 	int i;
+ 
+-	flow_cachep = kmem_cache_create("flow_cache",
+-					sizeof(struct flow_cache_entry),
+-					0, SLAB_PANIC,
+-					NULL);
+-	flow_hash_shift = 10;
+-	flow_lwm = 2 * flow_hash_size;
+-	flow_hwm = 4 * flow_hash_size;
+-
+-	setup_timer(&flow_hash_rnd_timer, flow_cache_new_hashrnd, 0);
+-	flow_hash_rnd_timer.expires = jiffies + FLOW_HASH_RND_PERIOD;
+-	add_timer(&flow_hash_rnd_timer);
++	del_timer(&fc->rnd_timer);
++	unregister_hotcpu_notifier(&fc->hotcpu_notifier);
+ 
+ 	for_each_possible_cpu(i)
+-		flow_cache_cpu_prepare(i);
++		flow_cache_cpu(&fc->hotcpu_notifier, CPU_DEAD, (void*) i);
+ 
+-	hotcpu_notifier(flow_cache_cpu, 0);
+-	return 0;
++	free_percpu(fc->percpu);
++	kmem_cache_destroy(fc->flow_cachep);
+ }
+ 
+-module_init(flow_cache_init);
+-
+-EXPORT_SYMBOL(flow_cache_genid);
+ EXPORT_SYMBOL(flow_cache_lookup);
+diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c
+index cc4797d..399853e 100644
+--- a/net/ipv6/inet6_connection_sock.c
++++ b/net/ipv6/inet6_connection_sock.c
+@@ -151,8 +151,9 @@ void __inet6_csk_dst_store(struct sock *sk, struct dst_entry *dst,
+ 
+ #ifdef CONFIG_XFRM
+ 	{
++		struct net *net = sock_net(sk);
+ 		struct rt6_info *rt = (struct rt6_info  *)dst;
+-		rt->rt6i_flow_cache_genid = atomic_read(&flow_cache_genid);
++		rt->rt6i_flow_cache_genid = atomic_read(&net->xfrm.policy_genid);
+ 	}
+ #endif
+ }
+@@ -166,8 +167,9 @@ struct dst_entry *__inet6_csk_dst_check(struct sock *sk, u32 cookie)
+ 
+ #ifdef CONFIG_XFRM
+ 	if (dst) {
++		struct net *net = sock_net(sk);
+ 		struct rt6_info *rt = (struct rt6_info *)dst;
+-		if (rt->rt6i_flow_cache_genid != atomic_read(&flow_cache_genid)) {
++		if (rt->rt6i_flow_cache_genid != atomic_read(&net->xfrm.policy_genid)) {
+ 			sk->sk_dst_cache = NULL;
+ 			dst_release(dst);
+ 			dst = NULL;
+diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
+index cb81ca3..82b01c3 100644
+--- a/net/xfrm/xfrm_policy.c
++++ b/net/xfrm/xfrm_policy.c
+@@ -44,7 +44,6 @@ static struct xfrm_policy_afinfo *xfrm_policy_afinfo[NPROTO];
+ 
+ static struct kmem_cache *xfrm_dst_cache __read_mostly;
+ 
+-static HLIST_HEAD(xfrm_policy_gc_list);
+ static DEFINE_SPINLOCK(xfrm_policy_gc_lock);
+ 
+ static struct xfrm_policy_afinfo *xfrm_policy_get_afinfo(unsigned short family);
+@@ -53,6 +52,7 @@ static void xfrm_init_pmtu(struct dst_entry *dst);
+ 
+ static struct xfrm_policy *__xfrm_policy_unlink(struct xfrm_policy *pol,
+ 						int dir);
++static int stale_bundle(struct dst_entry *dst);
+ 
+ static inline int
+ __xfrm4_selector_match(struct xfrm_selector *sel, struct flowi *fl)
+@@ -216,6 +216,35 @@ expired:
+ 	xfrm_pol_put(xp);
+ }
+ 
++struct xfrm_flow_cache_entry {
++	struct flow_cache_entry fce;
++	struct xfrm_policy *policy;
++	struct xfrm_dst *dst;
++	u32 policy_genid, bundles_genid;
++};
++#define XFRM_CACHE_NO_POLICY ((struct xfrm_policy *) -1)
++
++void xfrm_flow_cache_entry_validate(struct flow_cache *fc,
++				    struct flow_cache_entry *fce)
++{
++	struct net *net = container_of(fc, struct net, xfrm.flow_cache);
++	struct xfrm_flow_cache_entry *xfc =
++		container_of(fce, struct xfrm_flow_cache_entry, fce);
++
++	if (xfc->policy_genid != atomic_read(&net->xfrm.policy_genid))
++		goto invalid;
++	if (xfc->policy == NULL || xfc->policy == XFRM_CACHE_NO_POLICY)
++		return;
++	if (xfc->policy->walk.dead)
++		goto invalid;
++	if (xfc->bundles_genid != atomic_read(&xfc->policy->bundles_genid))
++		goto invalid_dst;
++	return;
++invalid:
++	xfc->policy = NULL;
++invalid_dst:
++	xfc->dst = NULL;
++}
+ 
+ /* Allocate xfrm_policy. Not used here, it is supposed to be used by pfkeyv2
+  * SPD calls.
+@@ -269,27 +298,26 @@ static void xfrm_policy_gc_kill(struct xfrm_policy *policy)
+ 	if (del_timer(&policy->timer))
+ 		atomic_dec(&policy->refcnt);
+ 
+-	if (atomic_read(&policy->refcnt) > 1)
+-		flow_cache_flush();
+-
+ 	xfrm_pol_put(policy);
+ }
+ 
+ static void xfrm_policy_gc_task(struct work_struct *work)
+ {
++	struct net *net = container_of(work, struct net, xfrm.policy_gc_work);
+ 	struct xfrm_policy *policy;
+ 	struct hlist_node *entry, *tmp;
+ 	struct hlist_head gc_list;
+ 
+ 	spin_lock_bh(&xfrm_policy_gc_lock);
+-	gc_list.first = xfrm_policy_gc_list.first;
+-	INIT_HLIST_HEAD(&xfrm_policy_gc_list);
++	gc_list.first = net->xfrm.policy_gc_list.first;
++	INIT_HLIST_HEAD(&net->xfrm.policy_gc_list);
+ 	spin_unlock_bh(&xfrm_policy_gc_lock);
+ 
++	flow_cache_flush(&net->xfrm.flow_cache, xfrm_flow_cache_entry_validate);
++
+ 	hlist_for_each_entry_safe(policy, entry, tmp, &gc_list, bydst)
+ 		xfrm_policy_gc_kill(policy);
+ }
+-static DECLARE_WORK(xfrm_policy_gc_work, xfrm_policy_gc_task);
+ 
+ /* Rule must be locked. Release descentant resources, announce
+  * entry dead. The rule must be unlinked from lists to the moment.
+@@ -297,6 +325,7 @@ static DECLARE_WORK(xfrm_policy_gc_work, xfrm_policy_gc_task);
+ 
+ static void xfrm_policy_kill(struct xfrm_policy *policy)
+ {
++	struct net *net = xp_net(policy);
+ 	int dead;
+ 
+ 	write_lock_bh(&policy->lock);
+@@ -310,10 +339,10 @@ static void xfrm_policy_kill(struct xfrm_policy *policy)
+ 	}
+ 
+ 	spin_lock_bh(&xfrm_policy_gc_lock);
+-	hlist_add_head(&policy->bydst, &xfrm_policy_gc_list);
++	hlist_add_head(&policy->bydst, &net->xfrm.policy_gc_list);
+ 	spin_unlock_bh(&xfrm_policy_gc_lock);
+ 
+-	schedule_work(&xfrm_policy_gc_work);
++	schedule_work(&net->xfrm.policy_gc_work);
+ }
+ 
+ static unsigned int xfrm_policy_hashmax __read_mostly = 1 * 1024 * 1024;
+@@ -586,7 +615,7 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl)
+ 		hlist_add_head(&policy->bydst, chain);
+ 	xfrm_pol_hold(policy);
+ 	net->xfrm.policy_count[dir]++;
+-	atomic_inc(&flow_cache_genid);
++	atomic_inc(&net->xfrm.policy_genid);
+ 	if (delpol)
+ 		__xfrm_policy_unlink(delpol, dir);
+ 	policy->index = delpol ? delpol->index : xfrm_gen_index(net, dir);
+@@ -619,11 +648,13 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl)
+ 			gc_list = dst;
+ 
+ 			policy->bundles = NULL;
++			atomic_inc(&policy->bundles_genid);
+ 		}
+ 		write_unlock(&policy->lock);
+ 	}
+ 	read_unlock_bh(&xfrm_policy_lock);
+ 
++	flow_cache_flush(&net->xfrm.flow_cache, NULL);
+ 	while (gc_list) {
+ 		struct dst_entry *dst = gc_list;
+ 
+@@ -669,7 +700,7 @@ struct xfrm_policy *xfrm_policy_bysel_ctx(struct net *net, u8 type, int dir,
+ 	write_unlock_bh(&xfrm_policy_lock);
+ 
+ 	if (ret && delete) {
+-		atomic_inc(&flow_cache_genid);
++		atomic_inc(&net->xfrm.policy_genid);
+ 		xfrm_policy_kill(ret);
+ 	}
+ 	return ret;
+@@ -710,7 +741,7 @@ struct xfrm_policy *xfrm_policy_byid(struct net *net, u8 type, int dir, u32 id,
+ 	write_unlock_bh(&xfrm_policy_lock);
+ 
+ 	if (ret && delete) {
+-		atomic_inc(&flow_cache_genid);
++		atomic_inc(&net->xfrm.policy_genid);
+ 		xfrm_policy_kill(ret);
+ 	}
+ 	return ret;
+@@ -824,7 +855,7 @@ int xfrm_policy_flush(struct net *net, u8 type, struct xfrm_audit *audit_info)
+ 		}
+ 
+ 	}
+-	atomic_inc(&flow_cache_genid);
++	atomic_inc(&net->xfrm.policy_genid);
+ out:
+ 	write_unlock_bh(&xfrm_policy_lock);
+ 	return err;
+@@ -977,32 +1008,18 @@ fail:
+ 	return ret;
+ }
+ 
+-static int xfrm_policy_lookup(struct net *net, struct flowi *fl, u16 family,
+-			      u8 dir, void **objp, atomic_t **obj_refp)
++static struct xfrm_policy *xfrm_policy_lookup(
++		struct net *net, struct flowi *fl,
++		u16 family, u8 dir)
+ {
++#ifdef CONFIG_XFRM_SUB_POLICY
+ 	struct xfrm_policy *pol;
+-	int err = 0;
+ 
+-#ifdef CONFIG_XFRM_SUB_POLICY
+ 	pol = xfrm_policy_lookup_bytype(net, XFRM_POLICY_TYPE_SUB, fl, family, dir);
+-	if (IS_ERR(pol)) {
+-		err = PTR_ERR(pol);
+-		pol = NULL;
+-	}
+-	if (pol || err)
+-		goto end;
++	if (pol != NULL)
++		return pol;
+ #endif
+-	pol = xfrm_policy_lookup_bytype(net, XFRM_POLICY_TYPE_MAIN, fl, family, dir);
+-	if (IS_ERR(pol)) {
+-		err = PTR_ERR(pol);
+-		pol = NULL;
+-	}
+-#ifdef CONFIG_XFRM_SUB_POLICY
+-end:
+-#endif
+-	if ((*objp = (void *) pol) != NULL)
+-		*obj_refp = &pol->refcnt;
+-	return err;
++	return xfrm_policy_lookup_bytype(net, XFRM_POLICY_TYPE_MAIN, fl, family, dir);
+ }
+ 
+ static inline int policy_to_flow_dir(int dir)
+@@ -1083,12 +1100,14 @@ static struct xfrm_policy *__xfrm_policy_unlink(struct xfrm_policy *pol,
+ 
+ int xfrm_policy_delete(struct xfrm_policy *pol, int dir)
+ {
++	struct net *net = xp_net(pol);
++
+ 	write_lock_bh(&xfrm_policy_lock);
+ 	pol = __xfrm_policy_unlink(pol, dir);
+ 	write_unlock_bh(&xfrm_policy_lock);
+ 	if (pol) {
+ 		if (dir < XFRM_POLICY_MAX)
+-			atomic_inc(&flow_cache_genid);
++			atomic_inc(&net->xfrm.policy_genid);
+ 		xfrm_policy_kill(pol);
+ 		return 0;
+ 	}
+@@ -1512,13 +1531,34 @@ xfrm_dst_update_origin(struct dst_entry *dst, struct flowi *fl)
+ #endif
+ }
+ 
+-static int stale_bundle(struct dst_entry *dst);
+-
+ /* Main function: finds/creates a bundle for given flow.
+  *
+  * At the moment we eat a raw IP route. Mostly to speed up lookups
+  * on interfaces with disabled IPsec.
+  */
++
++static void xfrm_flow_cache_update(struct net *net, struct flowi *key,
++				   u16 family, u8 dir,
++				   struct xfrm_policy *pol,
++				   struct xfrm_dst *dst)
++{
++	struct flow_cache_entry *fce;
++	struct xfrm_flow_cache_entry *xf;
++
++	fce = flow_cache_lookup(&net->xfrm.flow_cache,
++				key, family, dir);
++	if (fce == NULL)
++		return;
++
++	xf = container_of(fce, struct xfrm_flow_cache_entry, fce);
++	xf->policy_genid = atomic_read(&net->xfrm.policy_genid);
++	xf->policy = pol;
++	if (dst != NULL)
++		xf->bundles_genid = atomic_read(&pol->bundles_genid);
++	xf->dst = dst;
++	flow_cache_entry_put(fce);
++}
++
+ int __xfrm_lookup(struct net *net, struct dst_entry **dst_p, struct flowi *fl,
+ 		  struct sock *sk, int flags)
+ {
+@@ -1537,8 +1577,10 @@ int __xfrm_lookup(struct net *net, struct dst_entry **dst_p, struct flowi *fl,
+ 	u8 dir = policy_to_flow_dir(XFRM_POLICY_OUT);
+ 
+ restart:
+-	genid = atomic_read(&flow_cache_genid);
++	family = dst_orig->ops->family;
++	genid = atomic_read(&net->xfrm.policy_genid);
+ 	policy = NULL;
++	dst = NULL;
+ 	for (pi = 0; pi < ARRAY_SIZE(pols); pi++)
+ 		pols[pi] = NULL;
+ 	npols = 0;
+@@ -1555,24 +1597,51 @@ restart:
+ 	}
+ 
+ 	if (!policy) {
++		struct flow_cache_entry *fce;
++		struct xfrm_flow_cache_entry *xf;
++
+ 		/* To accelerate a bit...  */
+ 		if ((dst_orig->flags & DST_NOXFRM) ||
+ 		    !net->xfrm.policy_count[XFRM_POLICY_OUT])
+ 			goto nopol;
+ 
+-		policy = flow_cache_lookup(net, fl, dst_orig->ops->family,
+-					   dir, xfrm_policy_lookup);
+-		err = PTR_ERR(policy);
+-		if (IS_ERR(policy)) {
+-			XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTPOLERROR);
+-			goto dropdst;
++		fce = flow_cache_lookup(&net->xfrm.flow_cache,
++					fl, family, dir);
++		if (fce == NULL)
++			goto no_cache;
++
++		xf = container_of(fce, struct xfrm_flow_cache_entry, fce);
++		xfrm_flow_cache_entry_validate(&net->xfrm.flow_cache, fce);
++		if (xf->policy != NULL) {
++			policy = xf->policy;
++			if (policy != XFRM_CACHE_NO_POLICY)
++				xfrm_pol_hold(policy);
++			if (xf->dst != NULL)
++				dst = dst_clone((struct dst_entry *) xf->dst);
++		}
++		flow_cache_entry_put(fce);
++		if (policy == XFRM_CACHE_NO_POLICY)
++			goto nopol;
++		if (dst && !xfrm_bundle_ok(policy, (struct xfrm_dst *) dst, fl, family, 0)) {
++			dst_release(dst);
++			dst = NULL;
+ 		}
+ 	}
++no_cache:
++	if (!policy) {
++		policy = xfrm_policy_lookup(net, fl, family, dir);
++		if (!policy) {
++			xfrm_flow_cache_update(
++				net, fl, family, dir,
++				XFRM_CACHE_NO_POLICY, NULL);
++			goto nopol;
++		}
++	}
++	if (IS_ERR(policy)) {
++		XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTPOLERROR);
++		goto dropdst;
++	}
+ 
+-	if (!policy)
+-		goto nopol;
+-
+-	family = dst_orig->ops->family;
+ 	pols[0] = policy;
+ 	npols ++;
+ 	xfrm_nr += pols[0]->xfrm_nr;
+@@ -1583,6 +1652,9 @@ restart:
+ 
+ 	policy->curlft.use_time = get_seconds();
+ 
++	if (dst)
++		goto dst_found;
++
+ 	switch (policy->action) {
+ 	default:
+ 	case XFRM_POLICY_BLOCK:
+@@ -1593,18 +1665,11 @@ restart:
+ 
+ 	case XFRM_POLICY_ALLOW:
+ #ifndef CONFIG_XFRM_SUB_POLICY
+-		if (policy->xfrm_nr == 0) {
+-			/* Flow passes not transformed. */
+-			xfrm_pol_put(policy);
+-			return 0;
+-		}
++		if (policy->xfrm_nr == 0)
++			goto no_transform;
+ #endif
+ 
+-		/* Try to find matching bundle.
+-		 *
+-		 * LATER: help from flow cache. It is optional, this
+-		 * is required only for output policy.
+-		 */
++		/* Try to find matching bundle the hard way. */
+ 		dst = xfrm_find_bundle(fl, policy, family);
+ 		if (IS_ERR(dst)) {
+ 			XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTBUNDLECHECKERROR);
+@@ -1644,12 +1709,8 @@ restart:
+ 		 * they are searched. See above not-transformed bypass
+ 		 * is surrounded by non-sub policy configuration, too.
+ 		 */
+-		if (xfrm_nr == 0) {
+-			/* Flow passes not transformed. */
+-			xfrm_pols_put(pols, npols);
+-			return 0;
+-		}
+-
++		if (xfrm_nr == 0)
++			goto no_transform;
+ #endif
+ 		nx = xfrm_tmpl_resolve(pols, npols, fl, xfrm, family);
+ 
+@@ -1680,7 +1741,7 @@ restart:
+ 					goto error;
+ 				}
+ 				if (nx == -EAGAIN ||
+-				    genid != atomic_read(&flow_cache_genid)) {
++				    genid != atomic_read(&net->xfrm.policy_genid)) {
+ 					xfrm_pols_put(pols, npols);
+ 					goto restart;
+ 				}
+@@ -1691,11 +1752,8 @@ restart:
+ 				goto error;
+ 			}
+ 		}
+-		if (nx == 0) {
+-			/* Flow passes not transformed. */
+-			xfrm_pols_put(pols, npols);
+-			return 0;
+-		}
++		if (nx == 0)
++			goto no_transform;
+ 
+ 		dst = xfrm_bundle_create(policy, xfrm, nx, fl, dst_orig);
+ 		err = PTR_ERR(dst);
+@@ -1744,6 +1802,9 @@ restart:
+ 		dst_hold(dst);
+ 		write_unlock_bh(&policy->lock);
+ 	}
++	xfrm_flow_cache_update(net, fl, family, dir,
++			       policy, (struct xfrm_dst *) dst);
++dst_found:
+ 	*dst_p = dst;
+ 	dst_release(dst_orig);
+ 	xfrm_pols_put(pols, npols);
+@@ -1761,7 +1822,12 @@ nopol:
+ 	if (flags & XFRM_LOOKUP_ICMP)
+ 		goto dropdst;
+ 	return 0;
++no_transform:
++	/* Flow passes not transformed. */
++	xfrm_pols_put(pols, npols);
++	return 0;
+ }
++
+ EXPORT_SYMBOL(__xfrm_lookup);
+ 
+ int xfrm_lookup(struct net *net, struct dst_entry **dst_p, struct flowi *fl,
+@@ -1919,10 +1985,35 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,
+ 		}
+ 	}
+ 
+-	if (!pol)
+-		pol = flow_cache_lookup(net, &fl, family, fl_dir,
+-					xfrm_policy_lookup);
+-
++	if (!pol) {
++		struct flow_cache_entry *fce;
++		struct xfrm_flow_cache_entry *xf;
++
++		fce = flow_cache_lookup(&net->xfrm.flow_cache,
++					&fl, family, dir);
++		if (fce != NULL) {
++			xf = container_of(fce, struct xfrm_flow_cache_entry, fce);
++			xfrm_flow_cache_entry_validate(&net->xfrm.flow_cache, fce);
++			if (xf->policy != NULL) {
++				pol = xf->policy;
++				if (pol != XFRM_CACHE_NO_POLICY)
++					xfrm_pol_hold(pol);
++				else
++					pol = NULL;
++			} else {
++				pol = xfrm_policy_lookup(net, &fl, family, dir);
++				if (!IS_ERR(pol)) {
++					if (pol)
++						xf->policy = pol;
++					else
++						xf->policy = XFRM_CACHE_NO_POLICY;
++				}
++				xf->dst = NULL;
++				xf->policy_genid = atomic_read(&net->xfrm.policy_genid);
++			}
++			flow_cache_entry_put(fce);
++		}
++	}
+ 	if (IS_ERR(pol)) {
+ 		XFRM_INC_STATS(net, LINUX_MIB_XFRMINPOLERROR);
+ 		return 0;
+@@ -2121,6 +2212,7 @@ static void prune_one_bundle(struct xfrm_policy *pol, int (*func)(struct dst_ent
+ 			dstp = &dst->next;
+ 		}
+ 	}
++	atomic_inc(&pol->bundles_genid);
+ 	write_unlock(&pol->lock);
+ }
+ 
+@@ -2148,6 +2240,7 @@ static void xfrm_prune_bundles(struct net *net, int (*func)(struct dst_entry *))
+ 	}
+ 	read_unlock_bh(&xfrm_policy_lock);
+ 
++	flow_cache_flush(&net->xfrm.flow_cache, NULL);
+ 	while (gc_list) {
+ 		struct dst_entry *dst = gc_list;
+ 		gc_list = dst->next;
+@@ -2428,6 +2521,9 @@ static int __net_init xfrm_policy_init(struct net *net)
+ 
+ 	INIT_LIST_HEAD(&net->xfrm.policy_all);
+ 	INIT_WORK(&net->xfrm.policy_hash_work, xfrm_hash_resize);
++	INIT_HLIST_HEAD(&net->xfrm.policy_gc_list);
++	INIT_WORK(&net->xfrm.policy_gc_work, xfrm_policy_gc_task);
++	flow_cache_init(&net->xfrm.flow_cache, sizeof(struct xfrm_flow_cache_entry));
+ 	if (net_eq(net, &init_net))
+ 		register_netdevice_notifier(&xfrm_dev_notifier);
+ 	return 0;
+@@ -2461,7 +2557,7 @@ static void xfrm_policy_fini(struct net *net)
+ 	audit_info.sessionid = -1;
+ 	audit_info.secid = 0;
+ 	xfrm_policy_flush(net, XFRM_POLICY_TYPE_MAIN, &audit_info);
+-	flush_work(&xfrm_policy_gc_work);
++	flush_work(&net->xfrm.policy_gc_work);
+ 
+ 	WARN_ON(!list_empty(&net->xfrm.policy_all));
+ 
+@@ -2479,6 +2575,8 @@ static void xfrm_policy_fini(struct net *net)
+ 	sz = (net->xfrm.policy_idx_hmask + 1) * sizeof(struct hlist_head);
+ 	WARN_ON(!hlist_empty(net->xfrm.policy_byidx));
+ 	xfrm_hash_free(net->xfrm.policy_byidx, sz);
++
++	flow_cache_fini(&net->xfrm.flow_cache);
+ }
+ 
+ static int __net_init xfrm_net_init(struct net *net)
+@@ -2685,8 +2783,9 @@ static int migrate_tmpl_match(struct xfrm_migrate *m, struct xfrm_tmpl *t)
+ static int xfrm_policy_migrate(struct xfrm_policy *pol,
+ 			       struct xfrm_migrate *m, int num_migrate)
+ {
++	struct net *net = xp_net(pol);
+ 	struct xfrm_migrate *mp;
+-	struct dst_entry *dst;
++	struct dst_entry *gc_list = NULL, *tail;
+ 	int i, j, n = 0;
+ 
+ 	write_lock_bh(&pol->lock);
+@@ -2711,15 +2810,25 @@ static int xfrm_policy_migrate(struct xfrm_policy *pol,
+ 			       sizeof(pol->xfrm_vec[i].saddr));
+ 			pol->xfrm_vec[i].encap_family = mp->new_family;
+ 			/* flush bundles */
+-			while ((dst = pol->bundles) != NULL) {
+-				pol->bundles = dst->next;
+-				dst_free(dst);
+-			}
++			tail = pol->bundles;
++			while (tail->next)
++				tail = tail->next;
++			tail->next = gc_list;
++			gc_list = pol->bundles;
++			pol->bundles = NULL;
++			atomic_inc(&pol->bundles_genid);
+ 		}
+ 	}
+-
+ 	write_unlock_bh(&pol->lock);
+ 
++	flow_cache_flush(&net->xfrm.flow_cache, NULL);
++	while (gc_list) {
++		struct dst_entry *dst = gc_list;
++
++		gc_list = dst->next;
++		dst_free(dst);
++	}
++
+ 	if (!n)
+ 		return -ENODATA;
+ 
+-- 
+1.7.0.2
+
author	Michael Mason <ms13sp@gmail.com>	2010-03-17 17:43:41 +0000
committer	Michael Mason <ms13sp@gmail.com>	2010-03-17 17:43:41 +0000
commit	4b23d4dfa33b09440ffbdfccbd46571649ccecaa (patch)
tree	02c3b507f3e038c1e191fa97d59562b468e64b65 /main/linux-grsec/xfrm-flow-cache-grsec.patch
parent	e59a787cc0775b8f7f97d492674a257baef45f6d (diff)
parent	192b8fad759488bbe2fea1b43acda638eb6ebe85 (diff)
download	aports-4b23d4dfa33b09440ffbdfccbd46571649ccecaa.tar.bz2 aports-4b23d4dfa33b09440ffbdfccbd46571649ccecaa.tar.xz