aboutsummaryrefslogtreecommitdiffstats
path: root/main/linux-grsec/xfrm-flow-cache-grsec.patch
diff options
context:
space:
mode:
authorMichael Mason <ms13sp@gmail.com>2010-03-17 17:43:41 +0000
committerMichael Mason <ms13sp@gmail.com>2010-03-17 17:43:41 +0000
commit4b23d4dfa33b09440ffbdfccbd46571649ccecaa (patch)
tree02c3b507f3e038c1e191fa97d59562b468e64b65 /main/linux-grsec/xfrm-flow-cache-grsec.patch
parente59a787cc0775b8f7f97d492674a257baef45f6d (diff)
parent192b8fad759488bbe2fea1b43acda638eb6ebe85 (diff)
downloadaports-4b23d4dfa33b09440ffbdfccbd46571649ccecaa.tar.bz2
aports-4b23d4dfa33b09440ffbdfccbd46571649ccecaa.tar.xz
Merge branch 'master' of git://git.alpinelinux.org/aports
Diffstat (limited to 'main/linux-grsec/xfrm-flow-cache-grsec.patch')
-rw-r--r--main/linux-grsec/xfrm-flow-cache-grsec.patch1154
1 files changed, 1154 insertions, 0 deletions
diff --git a/main/linux-grsec/xfrm-flow-cache-grsec.patch b/main/linux-grsec/xfrm-flow-cache-grsec.patch
new file mode 100644
index 0000000000..881623d8ae
--- /dev/null
+++ b/main/linux-grsec/xfrm-flow-cache-grsec.patch
@@ -0,0 +1,1154 @@
+From 3519d7c86a6e87584d25f3292b53d3ce865a659e Mon Sep 17 00:00:00 2001
+From: Natanael Copa <ncopa@alpinelinux.org>
+Date: Mon, 15 Mar 2010 15:31:37 +0000
+Subject: [PATCH] xfrm: flow cache2
+
+---
+ include/net/flow.h | 39 ++++-
+ include/net/netns/xfrm.h | 4 +
+ include/net/xfrm.h | 1 +
+ net/core/flow.c | 342 ++++++++++++++++++--------------------
+ net/ipv6/inet6_connection_sock.c | 6 +-
+ net/xfrm/xfrm_policy.c | 271 +++++++++++++++++++++---------
+ 6 files changed, 394 insertions(+), 269 deletions(-)
+
+diff --git a/include/net/flow.h b/include/net/flow.h
+index 809970b..814a9d2 100644
+--- a/include/net/flow.h
++++ b/include/net/flow.h
+@@ -8,6 +8,9 @@
+ #define _NET_FLOW_H
+
+ #include <linux/in6.h>
++#include <linux/notifier.h>
++#include <linux/timer.h>
++#include <linux/slab.h>
+ #include <asm/atomic.h>
+
+ struct flowi {
+@@ -86,13 +89,37 @@ struct flowi {
+
+ struct net;
+ struct sock;
+-typedef int (*flow_resolve_t)(struct net *net, struct flowi *key, u16 family,
+- u8 dir, void **objp, atomic_t **obj_refp);
+
+-extern void *flow_cache_lookup(struct net *net, struct flowi *key, u16 family,
+- u8 dir, flow_resolve_t resolver);
+-extern void flow_cache_flush(void);
+-extern atomic_t flow_cache_genid;
++struct flow_cache_percpu;
++struct flow_cache_entry;
++
++struct flow_cache {
++ u32 hash_shift;
++ u32 order;
++ struct flow_cache_percpu * percpu;
++ struct notifier_block hotcpu_notifier;
++ int low_watermark;
++ int high_watermark;
++ struct timer_list rnd_timer;
++ struct kmem_cache * flow_cachep;
++};
++
++struct flow_cache_entry {
++ struct flow_cache_entry *next;
++ struct flowi key;
++ u16 family;
++ u8 dir;
++};
++
++extern struct flow_cache_entry *flow_cache_lookup(
++ struct flow_cache *cache, struct flowi *key,
++ u16 family, u8 dir);
++extern void flow_cache_entry_put(struct flow_cache_entry *fce);
++
++void flow_cache_flush(struct flow_cache *fc,
++ void (*flush)(struct flow_cache *fc, struct flow_cache_entry *fce));
++extern int flow_cache_init(struct flow_cache *cache, size_t entry_size);
++extern void flow_cache_fini(struct flow_cache *cache);
+
+ static inline int flow_cache_uli_match(struct flowi *fl1, struct flowi *fl2)
+ {
+diff --git a/include/net/netns/xfrm.h b/include/net/netns/xfrm.h
+index 1ba9127..4bb72c4 100644
+--- a/include/net/netns/xfrm.h
++++ b/include/net/netns/xfrm.h
+@@ -41,6 +41,10 @@ struct netns_xfrm {
+ struct xfrm_policy_hash policy_bydst[XFRM_POLICY_MAX * 2];
+ unsigned int policy_count[XFRM_POLICY_MAX * 2];
+ struct work_struct policy_hash_work;
++ atomic_t policy_genid;
++ struct hlist_head policy_gc_list;
++ struct work_struct policy_gc_work;
++ struct flow_cache flow_cache;
+
+ struct sock *nlsk;
+
+diff --git a/include/net/xfrm.h b/include/net/xfrm.h
+index 223e90a..5cd4e29 100644
+--- a/include/net/xfrm.h
++++ b/include/net/xfrm.h
+@@ -487,6 +487,7 @@ struct xfrm_policy
+ struct xfrm_lifetime_cfg lft;
+ struct xfrm_lifetime_cur curlft;
+ struct dst_entry *bundles;
++ atomic_t bundles_genid;
+ struct xfrm_policy_walk_entry walk;
+ u8 type;
+ u8 action;
+diff --git a/net/core/flow.c b/net/core/flow.c
+index 5b27992..e3782c2 100644
+--- a/net/core/flow.c
++++ b/net/core/flow.c
+@@ -25,114 +25,85 @@
+ #include <asm/atomic.h>
+ #include <linux/security.h>
+
+-struct flow_cache_entry {
+- struct flow_cache_entry *next;
+- u16 family;
+- u8 dir;
+- u32 genid;
+- struct flowi key;
+- void *object;
+- atomic_t *object_ref;
+-};
+-
+-atomic_t flow_cache_genid = ATOMIC_INIT(0);
+-
+-static u32 flow_hash_shift;
+-#define flow_hash_size (1 << flow_hash_shift)
+-static DEFINE_PER_CPU(struct flow_cache_entry **, flow_tables);
+-
+-#define flow_table(cpu) (per_cpu(flow_tables, cpu))
+-
+-static struct kmem_cache *flow_cachep __read_mostly;
+
+-static int flow_lwm, flow_hwm;
+-
+-struct flow_percpu_info {
+- int hash_rnd_recalc;
+- u32 hash_rnd;
+- int count;
++struct flow_cache_percpu {
++ struct flow_cache_entry ** hash_table;
++ int hash_count;
++ u32 hash_rnd;
++ int hash_rnd_recalc;
++ struct tasklet_struct flush_tasklet;
+ };
+-static DEFINE_PER_CPU(struct flow_percpu_info, flow_hash_info);
+-
+-#define flow_hash_rnd_recalc(cpu) \
+- (per_cpu(flow_hash_info, cpu).hash_rnd_recalc)
+-#define flow_hash_rnd(cpu) \
+- (per_cpu(flow_hash_info, cpu).hash_rnd)
+-#define flow_count(cpu) \
+- (per_cpu(flow_hash_info, cpu).count)
+-
+-static struct timer_list flow_hash_rnd_timer;
+-
+-#define FLOW_HASH_RND_PERIOD (10 * 60 * HZ)
+
+ struct flow_flush_info {
+- atomic_t cpuleft;
+- struct completion completion;
++ void (*flush)(struct flow_cache *fc, struct flow_cache_entry *fce);
++ struct flow_cache * cache;
++ atomic_t cpuleft;
++ struct completion completion;
+ };
+-static DEFINE_PER_CPU(struct tasklet_struct, flow_flush_tasklets);
+
+-#define flow_flush_tasklet(cpu) (&per_cpu(flow_flush_tasklets, cpu))
++#define flow_cache_hash_size(cache) (1 << (cache)->hash_shift)
++#define FLOW_HASH_RND_PERIOD (10 * 60 * HZ)
+
+ static void flow_cache_new_hashrnd(unsigned long arg)
+ {
++ struct flow_cache *fc = (struct flow_cache *) arg;
+ int i;
+
+ for_each_possible_cpu(i)
+- flow_hash_rnd_recalc(i) = 1;
++ per_cpu_ptr(fc->percpu, i)->hash_rnd_recalc = 1;
+
+- flow_hash_rnd_timer.expires = jiffies + FLOW_HASH_RND_PERIOD;
+- add_timer(&flow_hash_rnd_timer);
++ fc->rnd_timer.expires = jiffies + FLOW_HASH_RND_PERIOD;
++ add_timer(&fc->rnd_timer);
+ }
+
+-static void flow_entry_kill(int cpu, struct flow_cache_entry *fle)
+-{
+- if (fle->object)
+- atomic_dec(fle->object_ref);
+- kmem_cache_free(flow_cachep, fle);
+- flow_count(cpu)--;
+-}
+-
+-static void __flow_cache_shrink(int cpu, int shrink_to)
++static void __flow_cache_shrink(struct flow_cache *fc,
++ struct flow_cache_percpu *fcp,
++ int shrink_to)
+ {
+ struct flow_cache_entry *fle, **flp;
+ int i;
+
+- for (i = 0; i < flow_hash_size; i++) {
++ for (i = 0; i < flow_cache_hash_size(fc); i++) {
+ int k = 0;
+
+- flp = &flow_table(cpu)[i];
++ flp = &fcp->hash_table[i];
+ while ((fle = *flp) != NULL && k < shrink_to) {
+ k++;
+ flp = &fle->next;
+ }
+ while ((fle = *flp) != NULL) {
+ *flp = fle->next;
+- flow_entry_kill(cpu, fle);
++
++ kmem_cache_free(fc->flow_cachep, fle);
++ fcp->hash_count--;
+ }
+ }
+ }
+
+-static void flow_cache_shrink(int cpu)
++static void flow_cache_shrink(struct flow_cache *fc,
++ struct flow_cache_percpu *fcp)
+ {
+- int shrink_to = flow_lwm / flow_hash_size;
++ int shrink_to = fc->low_watermark / flow_cache_hash_size(fc);
+
+- __flow_cache_shrink(cpu, shrink_to);
++ __flow_cache_shrink(fc, fcp, shrink_to);
+ }
+
+-static void flow_new_hash_rnd(int cpu)
++static void flow_new_hash_rnd(struct flow_cache *fc,
++ struct flow_cache_percpu *fcp)
+ {
+- get_random_bytes(&flow_hash_rnd(cpu), sizeof(u32));
+- flow_hash_rnd_recalc(cpu) = 0;
+-
+- __flow_cache_shrink(cpu, 0);
++ get_random_bytes(&fcp->hash_rnd, sizeof(u32));
++ fcp->hash_rnd_recalc = 0;
++ __flow_cache_shrink(fc, fcp, 0);
+ }
+
+-static u32 flow_hash_code(struct flowi *key, int cpu)
++static u32 flow_hash_code(struct flow_cache *fc,
++ struct flow_cache_percpu *fcp,
++ struct flowi *key)
+ {
+ u32 *k = (u32 *) key;
+
+- return (jhash2(k, (sizeof(*key) / sizeof(u32)), flow_hash_rnd(cpu)) &
+- (flow_hash_size - 1));
++ return (jhash2(k, (sizeof(*key) / sizeof(u32)), fcp->hash_rnd)
++ & (flow_cache_hash_size(fc) - 1));
+ }
+
+ #if (BITS_PER_LONG == 64)
+@@ -165,128 +136,100 @@ static int flow_key_compare(struct flowi *key1, struct flowi *key2)
+ return 0;
+ }
+
+-void *flow_cache_lookup(struct net *net, struct flowi *key, u16 family, u8 dir,
+- flow_resolve_t resolver)
++struct flow_cache_entry *flow_cache_lookup(struct flow_cache *fc,
++ struct flowi *key,
++ u16 family, u8 dir)
+ {
+ struct flow_cache_entry *fle, **head;
++ struct flow_cache_percpu *fcp;
+ unsigned int hash;
+- int cpu;
+
+ local_bh_disable();
+- cpu = smp_processor_id();
++ fcp = per_cpu_ptr(fc->percpu, smp_processor_id());
+
+ fle = NULL;
+ /* Packet really early in init? Making flow_cache_init a
+ * pre-smp initcall would solve this. --RR */
+- if (!flow_table(cpu))
++ if (!fcp->hash_table)
+ goto nocache;
+
+- if (flow_hash_rnd_recalc(cpu))
+- flow_new_hash_rnd(cpu);
+- hash = flow_hash_code(key, cpu);
++ if (fcp->hash_rnd_recalc)
++ flow_new_hash_rnd(fc, fcp);
++
++ hash = flow_hash_code(fc, fcp, key);
+
+- head = &flow_table(cpu)[hash];
++ head = &fcp->hash_table[hash];
+ for (fle = *head; fle; fle = fle->next) {
+ if (fle->family == family &&
+ fle->dir == dir &&
+ flow_key_compare(key, &fle->key) == 0) {
+- if (fle->genid == atomic_read(&flow_cache_genid)) {
+- void *ret = fle->object;
+-
+- if (ret)
+- atomic_inc(fle->object_ref);
+- local_bh_enable();
+-
+- return ret;
+- }
+- break;
+- }
+- }
+-
+- if (!fle) {
+- if (flow_count(cpu) > flow_hwm)
+- flow_cache_shrink(cpu);
+-
+- fle = kmem_cache_alloc(flow_cachep, GFP_ATOMIC);
+- if (fle) {
+- fle->next = *head;
+- *head = fle;
+- fle->family = family;
+- fle->dir = dir;
+- memcpy(&fle->key, key, sizeof(*key));
+- fle->object = NULL;
+- flow_count(cpu)++;
++ return fle;
+ }
+ }
+
+-nocache:
+- {
+- int err;
+- void *obj;
+- atomic_t *obj_ref;
+-
+- err = resolver(net, key, family, dir, &obj, &obj_ref);
++ if (fcp->hash_count > fc->high_watermark)
++ flow_cache_shrink(fc, fcp);
+
+- if (fle && !err) {
+- fle->genid = atomic_read(&flow_cache_genid);
++ fle = kmem_cache_zalloc(fc->flow_cachep, GFP_ATOMIC);
++ if (!fle)
++ goto nocache;
+
+- if (fle->object)
+- atomic_dec(fle->object_ref);
++ fle->next = *head;
++ *head = fle;
++ fle->family = family;
++ fle->dir = dir;
++ memcpy(&fle->key, key, sizeof(*key));
++ fcp->hash_count++;
++ return fle;
+
+- fle->object = obj;
+- fle->object_ref = obj_ref;
+- if (obj)
+- atomic_inc(fle->object_ref);
+- }
+- local_bh_enable();
++nocache:
++ local_bh_enable();
++ return NULL;
++}
+
+- if (err)
+- obj = ERR_PTR(err);
+- return obj;
+- }
++void flow_cache_entry_put(struct flow_cache_entry *fce)
++{
++ local_bh_enable();
+ }
+
+ static void flow_cache_flush_tasklet(unsigned long data)
+ {
+- struct flow_flush_info *info = (void *)data;
++ struct flow_flush_info *info = (void *) data;
++ struct flow_cache *fc = (void *) info->cache;
++ struct flow_cache_percpu *fcp;
+ int i;
+- int cpu;
+
+- cpu = smp_processor_id();
+- for (i = 0; i < flow_hash_size; i++) {
+- struct flow_cache_entry *fle;
++ if (info->flush == NULL)
++ goto done;
+
+- fle = flow_table(cpu)[i];
+- for (; fle; fle = fle->next) {
+- unsigned genid = atomic_read(&flow_cache_genid);
+-
+- if (!fle->object || fle->genid == genid)
+- continue;
++ fcp = per_cpu_ptr(fc->percpu, smp_processor_id());
++ for (i = 0; i < flow_cache_hash_size(fc); i++) {
++ struct flow_cache_entry *fle;
+
+- fle->object = NULL;
+- atomic_dec(fle->object_ref);
+- }
++ fle = fcp->hash_table[i];
++ for (; fle; fle = fle->next)
++ info->flush(fc, fle);
+ }
+
++done:
+ if (atomic_dec_and_test(&info->cpuleft))
+ complete(&info->completion);
+ }
+
+-static void flow_cache_flush_per_cpu(void *) __attribute__((__unused__));
+ static void flow_cache_flush_per_cpu(void *data)
+ {
+ struct flow_flush_info *info = data;
+- int cpu;
+ struct tasklet_struct *tasklet;
++ int cpu;
+
+ cpu = smp_processor_id();
+-
+- tasklet = flow_flush_tasklet(cpu);
+- tasklet->data = (unsigned long)info;
++ tasklet = &per_cpu_ptr(info->cache->percpu, cpu)->flush_tasklet;
++ tasklet->data = (unsigned long) data;
+ tasklet_schedule(tasklet);
+ }
+
+-void flow_cache_flush(void)
++void flow_cache_flush(struct flow_cache *fc,
++ void (*flush)(struct flow_cache *fc, struct flow_cache_entry *fce))
+ {
+ struct flow_flush_info info;
+ static DEFINE_MUTEX(flow_flush_sem);
+@@ -294,6 +237,8 @@ void flow_cache_flush(void)
+ /* Don't want cpus going down or up during this. */
+ get_online_cpus();
+ mutex_lock(&flow_flush_sem);
++ info.cache = fc;
++ info.flush = flush;
+ atomic_set(&info.cpuleft, num_online_cpus());
+ init_completion(&info.completion);
+
+@@ -307,62 +252,99 @@ void flow_cache_flush(void)
+ put_online_cpus();
+ }
+
+-static void __init flow_cache_cpu_prepare(int cpu)
++static void __init flow_cache_cpu_prepare(struct flow_cache *fc,
++ struct flow_cache_percpu *fcp)
++{
++ fcp->hash_table = (struct flow_cache_entry **)
++ __get_free_pages(GFP_KERNEL|__GFP_ZERO, fc->order);
++ fcp->hash_rnd_recalc = 1;
++ fcp->hash_count = 0;
++
++ tasklet_init(&fcp->flush_tasklet, flow_cache_flush_tasklet, 0);
++}
++
++static int __cpuinit flow_cache_cpu(struct notifier_block *nfb,
++ unsigned long action,
++ void *hcpu)
++{
++ struct flow_cache *fc = container_of(nfb, struct flow_cache, hotcpu_notifier);
++ int cpu = (unsigned long) hcpu;
++ struct flow_cache_percpu *fcp = per_cpu_ptr(fc->percpu, cpu);
++
++ switch (action) {
++ case CPU_UP_PREPARE:
++ case CPU_UP_PREPARE_FROZEN:
++ flow_cache_cpu_prepare(fc, fcp);
++ if (!fcp->hash_table)
++ return NOTIFY_BAD;
++ break;
++ case CPU_UP_CANCELED:
++ case CPU_UP_CANCELED_FROZEN:
++ case CPU_DEAD:
++ case CPU_DEAD_FROZEN:
++ if (fcp->hash_table) {
++ __flow_cache_shrink(fc, fcp, 0);
++ free_pages((unsigned long) fcp->hash_table, fc->order);
++ fcp->hash_table = NULL;
++ }
++ break;
++ }
++ return NOTIFY_OK;
++}
++
++int flow_cache_init(struct flow_cache *fc, size_t entry_size)
+ {
+- struct tasklet_struct *tasklet;
+ unsigned long order;
++ int i, r;
++
++ BUG_ON(entry_size < sizeof(struct flow_cache_entry));
++ fc->flow_cachep = kmem_cache_create("flow_cache",
++ entry_size,
++ 0, SLAB_PANIC,
++ NULL);
++ fc->hash_shift = 10;
++ fc->low_watermark = 2 * flow_cache_hash_size(fc);
++ fc->high_watermark = 4 * flow_cache_hash_size(fc);
++ fc->percpu = alloc_percpu(struct flow_cache_percpu);
+
+ for (order = 0;
+ (PAGE_SIZE << order) <
+- (sizeof(struct flow_cache_entry *)*flow_hash_size);
++ (sizeof(struct flow_cache_entry *) * flow_cache_hash_size(fc));
+ order++)
+ /* NOTHING */;
++ fc->order = order;
+
+- flow_table(cpu) = (struct flow_cache_entry **)
+- __get_free_pages(GFP_KERNEL|__GFP_ZERO, order);
+- if (!flow_table(cpu))
+- panic("NET: failed to allocate flow cache order %lu\n", order);
++ setup_timer(&fc->rnd_timer, flow_cache_new_hashrnd, (unsigned long) fc);
++ fc->rnd_timer.expires = jiffies + FLOW_HASH_RND_PERIOD;
++ add_timer(&fc->rnd_timer);
+
+- flow_hash_rnd_recalc(cpu) = 1;
+- flow_count(cpu) = 0;
++ for_each_online_cpu(i) {
++ r = flow_cache_cpu(&fc->hotcpu_notifier,
++ CPU_UP_PREPARE, (void*) i);
++ if (r != NOTIFY_OK)
++ panic("NET: failed to allocate flow cache order %lu\n", order);
++ }
+
+- tasklet = flow_flush_tasklet(cpu);
+- tasklet_init(tasklet, flow_cache_flush_tasklet, 0);
+-}
++ fc->hotcpu_notifier = (struct notifier_block){
++ .notifier_call = flow_cache_cpu,
++ };
++ register_hotcpu_notifier(&fc->hotcpu_notifier);
+
+-static int flow_cache_cpu(struct notifier_block *nfb,
+- unsigned long action,
+- void *hcpu)
+-{
+- if (action == CPU_DEAD || action == CPU_DEAD_FROZEN)
+- __flow_cache_shrink((unsigned long)hcpu, 0);
+- return NOTIFY_OK;
++ return 0;
+ }
+
+-static int __init flow_cache_init(void)
++void flow_cache_fini(struct flow_cache *fc)
+ {
+ int i;
+
+- flow_cachep = kmem_cache_create("flow_cache",
+- sizeof(struct flow_cache_entry),
+- 0, SLAB_PANIC,
+- NULL);
+- flow_hash_shift = 10;
+- flow_lwm = 2 * flow_hash_size;
+- flow_hwm = 4 * flow_hash_size;
+-
+- setup_timer(&flow_hash_rnd_timer, flow_cache_new_hashrnd, 0);
+- flow_hash_rnd_timer.expires = jiffies + FLOW_HASH_RND_PERIOD;
+- add_timer(&flow_hash_rnd_timer);
++ del_timer(&fc->rnd_timer);
++ unregister_hotcpu_notifier(&fc->hotcpu_notifier);
+
+ for_each_possible_cpu(i)
+- flow_cache_cpu_prepare(i);
++ flow_cache_cpu(&fc->hotcpu_notifier, CPU_DEAD, (void*) i);
+
+- hotcpu_notifier(flow_cache_cpu, 0);
+- return 0;
++ free_percpu(fc->percpu);
++ kmem_cache_destroy(fc->flow_cachep);
+ }
+
+-module_init(flow_cache_init);
+-
+-EXPORT_SYMBOL(flow_cache_genid);
+ EXPORT_SYMBOL(flow_cache_lookup);
+diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c
+index cc4797d..399853e 100644
+--- a/net/ipv6/inet6_connection_sock.c
++++ b/net/ipv6/inet6_connection_sock.c
+@@ -151,8 +151,9 @@ void __inet6_csk_dst_store(struct sock *sk, struct dst_entry *dst,
+
+ #ifdef CONFIG_XFRM
+ {
++ struct net *net = sock_net(sk);
+ struct rt6_info *rt = (struct rt6_info *)dst;
+- rt->rt6i_flow_cache_genid = atomic_read(&flow_cache_genid);
++ rt->rt6i_flow_cache_genid = atomic_read(&net->xfrm.policy_genid);
+ }
+ #endif
+ }
+@@ -166,8 +167,9 @@ struct dst_entry *__inet6_csk_dst_check(struct sock *sk, u32 cookie)
+
+ #ifdef CONFIG_XFRM
+ if (dst) {
++ struct net *net = sock_net(sk);
+ struct rt6_info *rt = (struct rt6_info *)dst;
+- if (rt->rt6i_flow_cache_genid != atomic_read(&flow_cache_genid)) {
++ if (rt->rt6i_flow_cache_genid != atomic_read(&net->xfrm.policy_genid)) {
+ sk->sk_dst_cache = NULL;
+ dst_release(dst);
+ dst = NULL;
+diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
+index cb81ca3..82b01c3 100644
+--- a/net/xfrm/xfrm_policy.c
++++ b/net/xfrm/xfrm_policy.c
+@@ -44,7 +44,6 @@ static struct xfrm_policy_afinfo *xfrm_policy_afinfo[NPROTO];
+
+ static struct kmem_cache *xfrm_dst_cache __read_mostly;
+
+-static HLIST_HEAD(xfrm_policy_gc_list);
+ static DEFINE_SPINLOCK(xfrm_policy_gc_lock);
+
+ static struct xfrm_policy_afinfo *xfrm_policy_get_afinfo(unsigned short family);
+@@ -53,6 +52,7 @@ static void xfrm_init_pmtu(struct dst_entry *dst);
+
+ static struct xfrm_policy *__xfrm_policy_unlink(struct xfrm_policy *pol,
+ int dir);
++static int stale_bundle(struct dst_entry *dst);
+
+ static inline int
+ __xfrm4_selector_match(struct xfrm_selector *sel, struct flowi *fl)
+@@ -216,6 +216,35 @@ expired:
+ xfrm_pol_put(xp);
+ }
+
++struct xfrm_flow_cache_entry {
++ struct flow_cache_entry fce;
++ struct xfrm_policy *policy;
++ struct xfrm_dst *dst;
++ u32 policy_genid, bundles_genid;
++};
++#define XFRM_CACHE_NO_POLICY ((struct xfrm_policy *) -1)
++
++void xfrm_flow_cache_entry_validate(struct flow_cache *fc,
++ struct flow_cache_entry *fce)
++{
++ struct net *net = container_of(fc, struct net, xfrm.flow_cache);
++ struct xfrm_flow_cache_entry *xfc =
++ container_of(fce, struct xfrm_flow_cache_entry, fce);
++
++ if (xfc->policy_genid != atomic_read(&net->xfrm.policy_genid))
++ goto invalid;
++ if (xfc->policy == NULL || xfc->policy == XFRM_CACHE_NO_POLICY)
++ return;
++ if (xfc->policy->walk.dead)
++ goto invalid;
++ if (xfc->bundles_genid != atomic_read(&xfc->policy->bundles_genid))
++ goto invalid_dst;
++ return;
++invalid:
++ xfc->policy = NULL;
++invalid_dst:
++ xfc->dst = NULL;
++}
+
+ /* Allocate xfrm_policy. Not used here, it is supposed to be used by pfkeyv2
+ * SPD calls.
+@@ -269,27 +298,26 @@ static void xfrm_policy_gc_kill(struct xfrm_policy *policy)
+ if (del_timer(&policy->timer))
+ atomic_dec(&policy->refcnt);
+
+- if (atomic_read(&policy->refcnt) > 1)
+- flow_cache_flush();
+-
+ xfrm_pol_put(policy);
+ }
+
+ static void xfrm_policy_gc_task(struct work_struct *work)
+ {
++ struct net *net = container_of(work, struct net, xfrm.policy_gc_work);
+ struct xfrm_policy *policy;
+ struct hlist_node *entry, *tmp;
+ struct hlist_head gc_list;
+
+ spin_lock_bh(&xfrm_policy_gc_lock);
+- gc_list.first = xfrm_policy_gc_list.first;
+- INIT_HLIST_HEAD(&xfrm_policy_gc_list);
++ gc_list.first = net->xfrm.policy_gc_list.first;
++ INIT_HLIST_HEAD(&net->xfrm.policy_gc_list);
+ spin_unlock_bh(&xfrm_policy_gc_lock);
+
++ flow_cache_flush(&net->xfrm.flow_cache, xfrm_flow_cache_entry_validate);
++
+ hlist_for_each_entry_safe(policy, entry, tmp, &gc_list, bydst)
+ xfrm_policy_gc_kill(policy);
+ }
+-static DECLARE_WORK(xfrm_policy_gc_work, xfrm_policy_gc_task);
+
+ /* Rule must be locked. Release descentant resources, announce
+ * entry dead. The rule must be unlinked from lists to the moment.
+@@ -297,6 +325,7 @@ static DECLARE_WORK(xfrm_policy_gc_work, xfrm_policy_gc_task);
+
+ static void xfrm_policy_kill(struct xfrm_policy *policy)
+ {
++ struct net *net = xp_net(policy);
+ int dead;
+
+ write_lock_bh(&policy->lock);
+@@ -310,10 +339,10 @@ static void xfrm_policy_kill(struct xfrm_policy *policy)
+ }
+
+ spin_lock_bh(&xfrm_policy_gc_lock);
+- hlist_add_head(&policy->bydst, &xfrm_policy_gc_list);
++ hlist_add_head(&policy->bydst, &net->xfrm.policy_gc_list);
+ spin_unlock_bh(&xfrm_policy_gc_lock);
+
+- schedule_work(&xfrm_policy_gc_work);
++ schedule_work(&net->xfrm.policy_gc_work);
+ }
+
+ static unsigned int xfrm_policy_hashmax __read_mostly = 1 * 1024 * 1024;
+@@ -586,7 +615,7 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl)
+ hlist_add_head(&policy->bydst, chain);
+ xfrm_pol_hold(policy);
+ net->xfrm.policy_count[dir]++;
+- atomic_inc(&flow_cache_genid);
++ atomic_inc(&net->xfrm.policy_genid);
+ if (delpol)
+ __xfrm_policy_unlink(delpol, dir);
+ policy->index = delpol ? delpol->index : xfrm_gen_index(net, dir);
+@@ -619,11 +648,13 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl)
+ gc_list = dst;
+
+ policy->bundles = NULL;
++ atomic_inc(&policy->bundles_genid);
+ }
+ write_unlock(&policy->lock);
+ }
+ read_unlock_bh(&xfrm_policy_lock);
+
++ flow_cache_flush(&net->xfrm.flow_cache, NULL);
+ while (gc_list) {
+ struct dst_entry *dst = gc_list;
+
+@@ -669,7 +700,7 @@ struct xfrm_policy *xfrm_policy_bysel_ctx(struct net *net, u8 type, int dir,
+ write_unlock_bh(&xfrm_policy_lock);
+
+ if (ret && delete) {
+- atomic_inc(&flow_cache_genid);
++ atomic_inc(&net->xfrm.policy_genid);
+ xfrm_policy_kill(ret);
+ }
+ return ret;
+@@ -710,7 +741,7 @@ struct xfrm_policy *xfrm_policy_byid(struct net *net, u8 type, int dir, u32 id,
+ write_unlock_bh(&xfrm_policy_lock);
+
+ if (ret && delete) {
+- atomic_inc(&flow_cache_genid);
++ atomic_inc(&net->xfrm.policy_genid);
+ xfrm_policy_kill(ret);
+ }
+ return ret;
+@@ -824,7 +855,7 @@ int xfrm_policy_flush(struct net *net, u8 type, struct xfrm_audit *audit_info)
+ }
+
+ }
+- atomic_inc(&flow_cache_genid);
++ atomic_inc(&net->xfrm.policy_genid);
+ out:
+ write_unlock_bh(&xfrm_policy_lock);
+ return err;
+@@ -977,32 +1008,18 @@ fail:
+ return ret;
+ }
+
+-static int xfrm_policy_lookup(struct net *net, struct flowi *fl, u16 family,
+- u8 dir, void **objp, atomic_t **obj_refp)
++static struct xfrm_policy *xfrm_policy_lookup(
++ struct net *net, struct flowi *fl,
++ u16 family, u8 dir)
+ {
++#ifdef CONFIG_XFRM_SUB_POLICY
+ struct xfrm_policy *pol;
+- int err = 0;
+
+-#ifdef CONFIG_XFRM_SUB_POLICY
+ pol = xfrm_policy_lookup_bytype(net, XFRM_POLICY_TYPE_SUB, fl, family, dir);
+- if (IS_ERR(pol)) {
+- err = PTR_ERR(pol);
+- pol = NULL;
+- }
+- if (pol || err)
+- goto end;
++ if (pol != NULL)
++ return pol;
+ #endif
+- pol = xfrm_policy_lookup_bytype(net, XFRM_POLICY_TYPE_MAIN, fl, family, dir);
+- if (IS_ERR(pol)) {
+- err = PTR_ERR(pol);
+- pol = NULL;
+- }
+-#ifdef CONFIG_XFRM_SUB_POLICY
+-end:
+-#endif
+- if ((*objp = (void *) pol) != NULL)
+- *obj_refp = &pol->refcnt;
+- return err;
++ return xfrm_policy_lookup_bytype(net, XFRM_POLICY_TYPE_MAIN, fl, family, dir);
+ }
+
+ static inline int policy_to_flow_dir(int dir)
+@@ -1083,12 +1100,14 @@ static struct xfrm_policy *__xfrm_policy_unlink(struct xfrm_policy *pol,
+
+ int xfrm_policy_delete(struct xfrm_policy *pol, int dir)
+ {
++ struct net *net = xp_net(pol);
++
+ write_lock_bh(&xfrm_policy_lock);
+ pol = __xfrm_policy_unlink(pol, dir);
+ write_unlock_bh(&xfrm_policy_lock);
+ if (pol) {
+ if (dir < XFRM_POLICY_MAX)
+- atomic_inc(&flow_cache_genid);
++ atomic_inc(&net->xfrm.policy_genid);
+ xfrm_policy_kill(pol);
+ return 0;
+ }
+@@ -1512,13 +1531,34 @@ xfrm_dst_update_origin(struct dst_entry *dst, struct flowi *fl)
+ #endif
+ }
+
+-static int stale_bundle(struct dst_entry *dst);
+-
+ /* Main function: finds/creates a bundle for given flow.
+ *
+ * At the moment we eat a raw IP route. Mostly to speed up lookups
+ * on interfaces with disabled IPsec.
+ */
++
++static void xfrm_flow_cache_update(struct net *net, struct flowi *key,
++ u16 family, u8 dir,
++ struct xfrm_policy *pol,
++ struct xfrm_dst *dst)
++{
++ struct flow_cache_entry *fce;
++ struct xfrm_flow_cache_entry *xf;
++
++ fce = flow_cache_lookup(&net->xfrm.flow_cache,
++ key, family, dir);
++ if (fce == NULL)
++ return;
++
++ xf = container_of(fce, struct xfrm_flow_cache_entry, fce);
++ xf->policy_genid = atomic_read(&net->xfrm.policy_genid);
++ xf->policy = pol;
++ if (dst != NULL)
++ xf->bundles_genid = atomic_read(&pol->bundles_genid);
++ xf->dst = dst;
++ flow_cache_entry_put(fce);
++}
++
+ int __xfrm_lookup(struct net *net, struct dst_entry **dst_p, struct flowi *fl,
+ struct sock *sk, int flags)
+ {
+@@ -1537,8 +1577,10 @@ int __xfrm_lookup(struct net *net, struct dst_entry **dst_p, struct flowi *fl,
+ u8 dir = policy_to_flow_dir(XFRM_POLICY_OUT);
+
+ restart:
+- genid = atomic_read(&flow_cache_genid);
++ family = dst_orig->ops->family;
++ genid = atomic_read(&net->xfrm.policy_genid);
+ policy = NULL;
++ dst = NULL;
+ for (pi = 0; pi < ARRAY_SIZE(pols); pi++)
+ pols[pi] = NULL;
+ npols = 0;
+@@ -1555,24 +1597,51 @@ restart:
+ }
+
+ if (!policy) {
++ struct flow_cache_entry *fce;
++ struct xfrm_flow_cache_entry *xf;
++
+ /* To accelerate a bit... */
+ if ((dst_orig->flags & DST_NOXFRM) ||
+ !net->xfrm.policy_count[XFRM_POLICY_OUT])
+ goto nopol;
+
+- policy = flow_cache_lookup(net, fl, dst_orig->ops->family,
+- dir, xfrm_policy_lookup);
+- err = PTR_ERR(policy);
+- if (IS_ERR(policy)) {
+- XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTPOLERROR);
+- goto dropdst;
++ fce = flow_cache_lookup(&net->xfrm.flow_cache,
++ fl, family, dir);
++ if (fce == NULL)
++ goto no_cache;
++
++ xf = container_of(fce, struct xfrm_flow_cache_entry, fce);
++ xfrm_flow_cache_entry_validate(&net->xfrm.flow_cache, fce);
++ if (xf->policy != NULL) {
++ policy = xf->policy;
++ if (policy != XFRM_CACHE_NO_POLICY)
++ xfrm_pol_hold(policy);
++ if (xf->dst != NULL)
++ dst = dst_clone((struct dst_entry *) xf->dst);
++ }
++ flow_cache_entry_put(fce);
++ if (policy == XFRM_CACHE_NO_POLICY)
++ goto nopol;
++ if (dst && !xfrm_bundle_ok(policy, (struct xfrm_dst *) dst, fl, family, 0)) {
++ dst_release(dst);
++ dst = NULL;
+ }
+ }
++no_cache:
++ if (!policy) {
++ policy = xfrm_policy_lookup(net, fl, family, dir);
++ if (!policy) {
++ xfrm_flow_cache_update(
++ net, fl, family, dir,
++ XFRM_CACHE_NO_POLICY, NULL);
++ goto nopol;
++ }
++ }
++ if (IS_ERR(policy)) {
++ XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTPOLERROR);
++ goto dropdst;
++ }
+
+- if (!policy)
+- goto nopol;
+-
+- family = dst_orig->ops->family;
+ pols[0] = policy;
+ npols ++;
+ xfrm_nr += pols[0]->xfrm_nr;
+@@ -1583,6 +1652,9 @@ restart:
+
+ policy->curlft.use_time = get_seconds();
+
++ if (dst)
++ goto dst_found;
++
+ switch (policy->action) {
+ default:
+ case XFRM_POLICY_BLOCK:
+@@ -1593,18 +1665,11 @@ restart:
+
+ case XFRM_POLICY_ALLOW:
+ #ifndef CONFIG_XFRM_SUB_POLICY
+- if (policy->xfrm_nr == 0) {
+- /* Flow passes not transformed. */
+- xfrm_pol_put(policy);
+- return 0;
+- }
++ if (policy->xfrm_nr == 0)
++ goto no_transform;
+ #endif
+
+- /* Try to find matching bundle.
+- *
+- * LATER: help from flow cache. It is optional, this
+- * is required only for output policy.
+- */
++ /* Try to find matching bundle the hard way. */
+ dst = xfrm_find_bundle(fl, policy, family);
+ if (IS_ERR(dst)) {
+ XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTBUNDLECHECKERROR);
+@@ -1644,12 +1709,8 @@ restart:
+ * they are searched. See above not-transformed bypass
+ * is surrounded by non-sub policy configuration, too.
+ */
+- if (xfrm_nr == 0) {
+- /* Flow passes not transformed. */
+- xfrm_pols_put(pols, npols);
+- return 0;
+- }
+-
++ if (xfrm_nr == 0)
++ goto no_transform;
+ #endif
+ nx = xfrm_tmpl_resolve(pols, npols, fl, xfrm, family);
+
+@@ -1680,7 +1741,7 @@ restart:
+ goto error;
+ }
+ if (nx == -EAGAIN ||
+- genid != atomic_read(&flow_cache_genid)) {
++ genid != atomic_read(&net->xfrm.policy_genid)) {
+ xfrm_pols_put(pols, npols);
+ goto restart;
+ }
+@@ -1691,11 +1752,8 @@ restart:
+ goto error;
+ }
+ }
+- if (nx == 0) {
+- /* Flow passes not transformed. */
+- xfrm_pols_put(pols, npols);
+- return 0;
+- }
++ if (nx == 0)
++ goto no_transform;
+
+ dst = xfrm_bundle_create(policy, xfrm, nx, fl, dst_orig);
+ err = PTR_ERR(dst);
+@@ -1744,6 +1802,9 @@ restart:
+ dst_hold(dst);
+ write_unlock_bh(&policy->lock);
+ }
++ xfrm_flow_cache_update(net, fl, family, dir,
++ policy, (struct xfrm_dst *) dst);
++dst_found:
+ *dst_p = dst;
+ dst_release(dst_orig);
+ xfrm_pols_put(pols, npols);
+@@ -1761,7 +1822,12 @@ nopol:
+ if (flags & XFRM_LOOKUP_ICMP)
+ goto dropdst;
+ return 0;
++no_transform:
++ /* Flow passes not transformed. */
++ xfrm_pols_put(pols, npols);
++ return 0;
+ }
++
+ EXPORT_SYMBOL(__xfrm_lookup);
+
+ int xfrm_lookup(struct net *net, struct dst_entry **dst_p, struct flowi *fl,
+@@ -1919,10 +1985,35 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,
+ }
+ }
+
+- if (!pol)
+- pol = flow_cache_lookup(net, &fl, family, fl_dir,
+- xfrm_policy_lookup);
+-
++ if (!pol) {
++ struct flow_cache_entry *fce;
++ struct xfrm_flow_cache_entry *xf;
++
++ fce = flow_cache_lookup(&net->xfrm.flow_cache,
++ &fl, family, dir);
++ if (fce != NULL) {
++ xf = container_of(fce, struct xfrm_flow_cache_entry, fce);
++ xfrm_flow_cache_entry_validate(&net->xfrm.flow_cache, fce);
++ if (xf->policy != NULL) {
++ pol = xf->policy;
++ if (pol != XFRM_CACHE_NO_POLICY)
++ xfrm_pol_hold(pol);
++ else
++ pol = NULL;
++ } else {
++ pol = xfrm_policy_lookup(net, &fl, family, dir);
++ if (!IS_ERR(pol)) {
++ if (pol)
++ xf->policy = pol;
++ else
++ xf->policy = XFRM_CACHE_NO_POLICY;
++ }
++ xf->dst = NULL;
++ xf->policy_genid = atomic_read(&net->xfrm.policy_genid);
++ }
++ flow_cache_entry_put(fce);
++ }
++ }
+ if (IS_ERR(pol)) {
+ XFRM_INC_STATS(net, LINUX_MIB_XFRMINPOLERROR);
+ return 0;
+@@ -2121,6 +2212,7 @@ static void prune_one_bundle(struct xfrm_policy *pol, int (*func)(struct dst_ent
+ dstp = &dst->next;
+ }
+ }
++ atomic_inc(&pol->bundles_genid);
+ write_unlock(&pol->lock);
+ }
+
+@@ -2148,6 +2240,7 @@ static void xfrm_prune_bundles(struct net *net, int (*func)(struct dst_entry *))
+ }
+ read_unlock_bh(&xfrm_policy_lock);
+
++ flow_cache_flush(&net->xfrm.flow_cache, NULL);
+ while (gc_list) {
+ struct dst_entry *dst = gc_list;
+ gc_list = dst->next;
+@@ -2428,6 +2521,9 @@ static int __net_init xfrm_policy_init(struct net *net)
+
+ INIT_LIST_HEAD(&net->xfrm.policy_all);
+ INIT_WORK(&net->xfrm.policy_hash_work, xfrm_hash_resize);
++ INIT_HLIST_HEAD(&net->xfrm.policy_gc_list);
++ INIT_WORK(&net->xfrm.policy_gc_work, xfrm_policy_gc_task);
++ flow_cache_init(&net->xfrm.flow_cache, sizeof(struct xfrm_flow_cache_entry));
+ if (net_eq(net, &init_net))
+ register_netdevice_notifier(&xfrm_dev_notifier);
+ return 0;
+@@ -2461,7 +2557,7 @@ static void xfrm_policy_fini(struct net *net)
+ audit_info.sessionid = -1;
+ audit_info.secid = 0;
+ xfrm_policy_flush(net, XFRM_POLICY_TYPE_MAIN, &audit_info);
+- flush_work(&xfrm_policy_gc_work);
++ flush_work(&net->xfrm.policy_gc_work);
+
+ WARN_ON(!list_empty(&net->xfrm.policy_all));
+
+@@ -2479,6 +2575,8 @@ static void xfrm_policy_fini(struct net *net)
+ sz = (net->xfrm.policy_idx_hmask + 1) * sizeof(struct hlist_head);
+ WARN_ON(!hlist_empty(net->xfrm.policy_byidx));
+ xfrm_hash_free(net->xfrm.policy_byidx, sz);
++
++ flow_cache_fini(&net->xfrm.flow_cache);
+ }
+
+ static int __net_init xfrm_net_init(struct net *net)
+@@ -2685,8 +2783,9 @@ static int migrate_tmpl_match(struct xfrm_migrate *m, struct xfrm_tmpl *t)
+ static int xfrm_policy_migrate(struct xfrm_policy *pol,
+ struct xfrm_migrate *m, int num_migrate)
+ {
++ struct net *net = xp_net(pol);
+ struct xfrm_migrate *mp;
+- struct dst_entry *dst;
++ struct dst_entry *gc_list = NULL, *tail;
+ int i, j, n = 0;
+
+ write_lock_bh(&pol->lock);
+@@ -2711,15 +2810,25 @@ static int xfrm_policy_migrate(struct xfrm_policy *pol,
+ sizeof(pol->xfrm_vec[i].saddr));
+ pol->xfrm_vec[i].encap_family = mp->new_family;
+ /* flush bundles */
+- while ((dst = pol->bundles) != NULL) {
+- pol->bundles = dst->next;
+- dst_free(dst);
+- }
++ tail = pol->bundles;
++ while (tail->next)
++ tail = tail->next;
++ tail->next = gc_list;
++ gc_list = pol->bundles;
++ pol->bundles = NULL;
++ atomic_inc(&pol->bundles_genid);
+ }
+ }
+-
+ write_unlock_bh(&pol->lock);
+
++ flow_cache_flush(&net->xfrm.flow_cache, NULL);
++ while (gc_list) {
++ struct dst_entry *dst = gc_list;
++
++ gc_list = dst->next;
++ dst_free(dst);
++ }
++
+ if (!n)
+ return -ENODATA;
+
+--
+1.7.0.2
+