diff options
author | Michael Mason <ms13sp@gmail.com> | 2010-03-17 17:43:41 +0000 |
---|---|---|
committer | Michael Mason <ms13sp@gmail.com> | 2010-03-17 17:43:41 +0000 |
commit | 4b23d4dfa33b09440ffbdfccbd46571649ccecaa (patch) | |
tree | 02c3b507f3e038c1e191fa97d59562b468e64b65 /main/linux-grsec/xfrm-flow-cache-grsec.patch | |
parent | e59a787cc0775b8f7f97d492674a257baef45f6d (diff) | |
parent | 192b8fad759488bbe2fea1b43acda638eb6ebe85 (diff) | |
download | aports-4b23d4dfa33b09440ffbdfccbd46571649ccecaa.tar.bz2 aports-4b23d4dfa33b09440ffbdfccbd46571649ccecaa.tar.xz |
Merge branch 'master' of git://git.alpinelinux.org/aports
Diffstat (limited to 'main/linux-grsec/xfrm-flow-cache-grsec.patch')
-rw-r--r-- | main/linux-grsec/xfrm-flow-cache-grsec.patch | 1154 |
1 files changed, 1154 insertions, 0 deletions
diff --git a/main/linux-grsec/xfrm-flow-cache-grsec.patch b/main/linux-grsec/xfrm-flow-cache-grsec.patch new file mode 100644 index 0000000000..881623d8ae --- /dev/null +++ b/main/linux-grsec/xfrm-flow-cache-grsec.patch @@ -0,0 +1,1154 @@ +From 3519d7c86a6e87584d25f3292b53d3ce865a659e Mon Sep 17 00:00:00 2001 +From: Natanael Copa <ncopa@alpinelinux.org> +Date: Mon, 15 Mar 2010 15:31:37 +0000 +Subject: [PATCH] xfrm: flow cache2 + +--- + include/net/flow.h | 39 ++++- + include/net/netns/xfrm.h | 4 + + include/net/xfrm.h | 1 + + net/core/flow.c | 342 ++++++++++++++++++-------------------- + net/ipv6/inet6_connection_sock.c | 6 +- + net/xfrm/xfrm_policy.c | 271 +++++++++++++++++++++--------- + 6 files changed, 394 insertions(+), 269 deletions(-) + +diff --git a/include/net/flow.h b/include/net/flow.h +index 809970b..814a9d2 100644 +--- a/include/net/flow.h ++++ b/include/net/flow.h +@@ -8,6 +8,9 @@ + #define _NET_FLOW_H + + #include <linux/in6.h> ++#include <linux/notifier.h> ++#include <linux/timer.h> ++#include <linux/slab.h> + #include <asm/atomic.h> + + struct flowi { +@@ -86,13 +89,37 @@ struct flowi { + + struct net; + struct sock; +-typedef int (*flow_resolve_t)(struct net *net, struct flowi *key, u16 family, +- u8 dir, void **objp, atomic_t **obj_refp); + +-extern void *flow_cache_lookup(struct net *net, struct flowi *key, u16 family, +- u8 dir, flow_resolve_t resolver); +-extern void flow_cache_flush(void); +-extern atomic_t flow_cache_genid; ++struct flow_cache_percpu; ++struct flow_cache_entry; ++ ++struct flow_cache { ++ u32 hash_shift; ++ u32 order; ++ struct flow_cache_percpu * percpu; ++ struct notifier_block hotcpu_notifier; ++ int low_watermark; ++ int high_watermark; ++ struct timer_list rnd_timer; ++ struct kmem_cache * flow_cachep; ++}; ++ ++struct flow_cache_entry { ++ struct flow_cache_entry *next; ++ struct flowi key; ++ u16 family; ++ u8 dir; ++}; ++ ++extern struct flow_cache_entry *flow_cache_lookup( ++ struct flow_cache *cache, struct flowi *key, ++ u16 family, u8 dir); ++extern void flow_cache_entry_put(struct flow_cache_entry *fce); ++ ++void flow_cache_flush(struct flow_cache *fc, ++ void (*flush)(struct flow_cache *fc, struct flow_cache_entry *fce)); ++extern int flow_cache_init(struct flow_cache *cache, size_t entry_size); ++extern void flow_cache_fini(struct flow_cache *cache); + + static inline int flow_cache_uli_match(struct flowi *fl1, struct flowi *fl2) + { +diff --git a/include/net/netns/xfrm.h b/include/net/netns/xfrm.h +index 1ba9127..4bb72c4 100644 +--- a/include/net/netns/xfrm.h ++++ b/include/net/netns/xfrm.h +@@ -41,6 +41,10 @@ struct netns_xfrm { + struct xfrm_policy_hash policy_bydst[XFRM_POLICY_MAX * 2]; + unsigned int policy_count[XFRM_POLICY_MAX * 2]; + struct work_struct policy_hash_work; ++ atomic_t policy_genid; ++ struct hlist_head policy_gc_list; ++ struct work_struct policy_gc_work; ++ struct flow_cache flow_cache; + + struct sock *nlsk; + +diff --git a/include/net/xfrm.h b/include/net/xfrm.h +index 223e90a..5cd4e29 100644 +--- a/include/net/xfrm.h ++++ b/include/net/xfrm.h +@@ -487,6 +487,7 @@ struct xfrm_policy + struct xfrm_lifetime_cfg lft; + struct xfrm_lifetime_cur curlft; + struct dst_entry *bundles; ++ atomic_t bundles_genid; + struct xfrm_policy_walk_entry walk; + u8 type; + u8 action; +diff --git a/net/core/flow.c b/net/core/flow.c +index 5b27992..e3782c2 100644 +--- a/net/core/flow.c ++++ b/net/core/flow.c +@@ -25,114 +25,85 @@ + #include <asm/atomic.h> + #include <linux/security.h> + +-struct flow_cache_entry { +- struct flow_cache_entry *next; +- u16 family; +- u8 dir; +- u32 genid; +- struct flowi key; +- void *object; +- atomic_t *object_ref; +-}; +- +-atomic_t flow_cache_genid = ATOMIC_INIT(0); +- +-static u32 flow_hash_shift; +-#define flow_hash_size (1 << flow_hash_shift) +-static DEFINE_PER_CPU(struct flow_cache_entry **, flow_tables); +- +-#define flow_table(cpu) (per_cpu(flow_tables, cpu)) +- +-static struct kmem_cache *flow_cachep __read_mostly; + +-static int flow_lwm, flow_hwm; +- +-struct flow_percpu_info { +- int hash_rnd_recalc; +- u32 hash_rnd; +- int count; ++struct flow_cache_percpu { ++ struct flow_cache_entry ** hash_table; ++ int hash_count; ++ u32 hash_rnd; ++ int hash_rnd_recalc; ++ struct tasklet_struct flush_tasklet; + }; +-static DEFINE_PER_CPU(struct flow_percpu_info, flow_hash_info); +- +-#define flow_hash_rnd_recalc(cpu) \ +- (per_cpu(flow_hash_info, cpu).hash_rnd_recalc) +-#define flow_hash_rnd(cpu) \ +- (per_cpu(flow_hash_info, cpu).hash_rnd) +-#define flow_count(cpu) \ +- (per_cpu(flow_hash_info, cpu).count) +- +-static struct timer_list flow_hash_rnd_timer; +- +-#define FLOW_HASH_RND_PERIOD (10 * 60 * HZ) + + struct flow_flush_info { +- atomic_t cpuleft; +- struct completion completion; ++ void (*flush)(struct flow_cache *fc, struct flow_cache_entry *fce); ++ struct flow_cache * cache; ++ atomic_t cpuleft; ++ struct completion completion; + }; +-static DEFINE_PER_CPU(struct tasklet_struct, flow_flush_tasklets); + +-#define flow_flush_tasklet(cpu) (&per_cpu(flow_flush_tasklets, cpu)) ++#define flow_cache_hash_size(cache) (1 << (cache)->hash_shift) ++#define FLOW_HASH_RND_PERIOD (10 * 60 * HZ) + + static void flow_cache_new_hashrnd(unsigned long arg) + { ++ struct flow_cache *fc = (struct flow_cache *) arg; + int i; + + for_each_possible_cpu(i) +- flow_hash_rnd_recalc(i) = 1; ++ per_cpu_ptr(fc->percpu, i)->hash_rnd_recalc = 1; + +- flow_hash_rnd_timer.expires = jiffies + FLOW_HASH_RND_PERIOD; +- add_timer(&flow_hash_rnd_timer); ++ fc->rnd_timer.expires = jiffies + FLOW_HASH_RND_PERIOD; ++ add_timer(&fc->rnd_timer); + } + +-static void flow_entry_kill(int cpu, struct flow_cache_entry *fle) +-{ +- if (fle->object) +- atomic_dec(fle->object_ref); +- kmem_cache_free(flow_cachep, fle); +- flow_count(cpu)--; +-} +- +-static void __flow_cache_shrink(int cpu, int shrink_to) ++static void __flow_cache_shrink(struct flow_cache *fc, ++ struct flow_cache_percpu *fcp, ++ int shrink_to) + { + struct flow_cache_entry *fle, **flp; + int i; + +- for (i = 0; i < flow_hash_size; i++) { ++ for (i = 0; i < flow_cache_hash_size(fc); i++) { + int k = 0; + +- flp = &flow_table(cpu)[i]; ++ flp = &fcp->hash_table[i]; + while ((fle = *flp) != NULL && k < shrink_to) { + k++; + flp = &fle->next; + } + while ((fle = *flp) != NULL) { + *flp = fle->next; +- flow_entry_kill(cpu, fle); ++ ++ kmem_cache_free(fc->flow_cachep, fle); ++ fcp->hash_count--; + } + } + } + +-static void flow_cache_shrink(int cpu) ++static void flow_cache_shrink(struct flow_cache *fc, ++ struct flow_cache_percpu *fcp) + { +- int shrink_to = flow_lwm / flow_hash_size; ++ int shrink_to = fc->low_watermark / flow_cache_hash_size(fc); + +- __flow_cache_shrink(cpu, shrink_to); ++ __flow_cache_shrink(fc, fcp, shrink_to); + } + +-static void flow_new_hash_rnd(int cpu) ++static void flow_new_hash_rnd(struct flow_cache *fc, ++ struct flow_cache_percpu *fcp) + { +- get_random_bytes(&flow_hash_rnd(cpu), sizeof(u32)); +- flow_hash_rnd_recalc(cpu) = 0; +- +- __flow_cache_shrink(cpu, 0); ++ get_random_bytes(&fcp->hash_rnd, sizeof(u32)); ++ fcp->hash_rnd_recalc = 0; ++ __flow_cache_shrink(fc, fcp, 0); + } + +-static u32 flow_hash_code(struct flowi *key, int cpu) ++static u32 flow_hash_code(struct flow_cache *fc, ++ struct flow_cache_percpu *fcp, ++ struct flowi *key) + { + u32 *k = (u32 *) key; + +- return (jhash2(k, (sizeof(*key) / sizeof(u32)), flow_hash_rnd(cpu)) & +- (flow_hash_size - 1)); ++ return (jhash2(k, (sizeof(*key) / sizeof(u32)), fcp->hash_rnd) ++ & (flow_cache_hash_size(fc) - 1)); + } + + #if (BITS_PER_LONG == 64) +@@ -165,128 +136,100 @@ static int flow_key_compare(struct flowi *key1, struct flowi *key2) + return 0; + } + +-void *flow_cache_lookup(struct net *net, struct flowi *key, u16 family, u8 dir, +- flow_resolve_t resolver) ++struct flow_cache_entry *flow_cache_lookup(struct flow_cache *fc, ++ struct flowi *key, ++ u16 family, u8 dir) + { + struct flow_cache_entry *fle, **head; ++ struct flow_cache_percpu *fcp; + unsigned int hash; +- int cpu; + + local_bh_disable(); +- cpu = smp_processor_id(); ++ fcp = per_cpu_ptr(fc->percpu, smp_processor_id()); + + fle = NULL; + /* Packet really early in init? Making flow_cache_init a + * pre-smp initcall would solve this. --RR */ +- if (!flow_table(cpu)) ++ if (!fcp->hash_table) + goto nocache; + +- if (flow_hash_rnd_recalc(cpu)) +- flow_new_hash_rnd(cpu); +- hash = flow_hash_code(key, cpu); ++ if (fcp->hash_rnd_recalc) ++ flow_new_hash_rnd(fc, fcp); ++ ++ hash = flow_hash_code(fc, fcp, key); + +- head = &flow_table(cpu)[hash]; ++ head = &fcp->hash_table[hash]; + for (fle = *head; fle; fle = fle->next) { + if (fle->family == family && + fle->dir == dir && + flow_key_compare(key, &fle->key) == 0) { +- if (fle->genid == atomic_read(&flow_cache_genid)) { +- void *ret = fle->object; +- +- if (ret) +- atomic_inc(fle->object_ref); +- local_bh_enable(); +- +- return ret; +- } +- break; +- } +- } +- +- if (!fle) { +- if (flow_count(cpu) > flow_hwm) +- flow_cache_shrink(cpu); +- +- fle = kmem_cache_alloc(flow_cachep, GFP_ATOMIC); +- if (fle) { +- fle->next = *head; +- *head = fle; +- fle->family = family; +- fle->dir = dir; +- memcpy(&fle->key, key, sizeof(*key)); +- fle->object = NULL; +- flow_count(cpu)++; ++ return fle; + } + } + +-nocache: +- { +- int err; +- void *obj; +- atomic_t *obj_ref; +- +- err = resolver(net, key, family, dir, &obj, &obj_ref); ++ if (fcp->hash_count > fc->high_watermark) ++ flow_cache_shrink(fc, fcp); + +- if (fle && !err) { +- fle->genid = atomic_read(&flow_cache_genid); ++ fle = kmem_cache_zalloc(fc->flow_cachep, GFP_ATOMIC); ++ if (!fle) ++ goto nocache; + +- if (fle->object) +- atomic_dec(fle->object_ref); ++ fle->next = *head; ++ *head = fle; ++ fle->family = family; ++ fle->dir = dir; ++ memcpy(&fle->key, key, sizeof(*key)); ++ fcp->hash_count++; ++ return fle; + +- fle->object = obj; +- fle->object_ref = obj_ref; +- if (obj) +- atomic_inc(fle->object_ref); +- } +- local_bh_enable(); ++nocache: ++ local_bh_enable(); ++ return NULL; ++} + +- if (err) +- obj = ERR_PTR(err); +- return obj; +- } ++void flow_cache_entry_put(struct flow_cache_entry *fce) ++{ ++ local_bh_enable(); + } + + static void flow_cache_flush_tasklet(unsigned long data) + { +- struct flow_flush_info *info = (void *)data; ++ struct flow_flush_info *info = (void *) data; ++ struct flow_cache *fc = (void *) info->cache; ++ struct flow_cache_percpu *fcp; + int i; +- int cpu; + +- cpu = smp_processor_id(); +- for (i = 0; i < flow_hash_size; i++) { +- struct flow_cache_entry *fle; ++ if (info->flush == NULL) ++ goto done; + +- fle = flow_table(cpu)[i]; +- for (; fle; fle = fle->next) { +- unsigned genid = atomic_read(&flow_cache_genid); +- +- if (!fle->object || fle->genid == genid) +- continue; ++ fcp = per_cpu_ptr(fc->percpu, smp_processor_id()); ++ for (i = 0; i < flow_cache_hash_size(fc); i++) { ++ struct flow_cache_entry *fle; + +- fle->object = NULL; +- atomic_dec(fle->object_ref); +- } ++ fle = fcp->hash_table[i]; ++ for (; fle; fle = fle->next) ++ info->flush(fc, fle); + } + ++done: + if (atomic_dec_and_test(&info->cpuleft)) + complete(&info->completion); + } + +-static void flow_cache_flush_per_cpu(void *) __attribute__((__unused__)); + static void flow_cache_flush_per_cpu(void *data) + { + struct flow_flush_info *info = data; +- int cpu; + struct tasklet_struct *tasklet; ++ int cpu; + + cpu = smp_processor_id(); +- +- tasklet = flow_flush_tasklet(cpu); +- tasklet->data = (unsigned long)info; ++ tasklet = &per_cpu_ptr(info->cache->percpu, cpu)->flush_tasklet; ++ tasklet->data = (unsigned long) data; + tasklet_schedule(tasklet); + } + +-void flow_cache_flush(void) ++void flow_cache_flush(struct flow_cache *fc, ++ void (*flush)(struct flow_cache *fc, struct flow_cache_entry *fce)) + { + struct flow_flush_info info; + static DEFINE_MUTEX(flow_flush_sem); +@@ -294,6 +237,8 @@ void flow_cache_flush(void) + /* Don't want cpus going down or up during this. */ + get_online_cpus(); + mutex_lock(&flow_flush_sem); ++ info.cache = fc; ++ info.flush = flush; + atomic_set(&info.cpuleft, num_online_cpus()); + init_completion(&info.completion); + +@@ -307,62 +252,99 @@ void flow_cache_flush(void) + put_online_cpus(); + } + +-static void __init flow_cache_cpu_prepare(int cpu) ++static void __init flow_cache_cpu_prepare(struct flow_cache *fc, ++ struct flow_cache_percpu *fcp) ++{ ++ fcp->hash_table = (struct flow_cache_entry **) ++ __get_free_pages(GFP_KERNEL|__GFP_ZERO, fc->order); ++ fcp->hash_rnd_recalc = 1; ++ fcp->hash_count = 0; ++ ++ tasklet_init(&fcp->flush_tasklet, flow_cache_flush_tasklet, 0); ++} ++ ++static int __cpuinit flow_cache_cpu(struct notifier_block *nfb, ++ unsigned long action, ++ void *hcpu) ++{ ++ struct flow_cache *fc = container_of(nfb, struct flow_cache, hotcpu_notifier); ++ int cpu = (unsigned long) hcpu; ++ struct flow_cache_percpu *fcp = per_cpu_ptr(fc->percpu, cpu); ++ ++ switch (action) { ++ case CPU_UP_PREPARE: ++ case CPU_UP_PREPARE_FROZEN: ++ flow_cache_cpu_prepare(fc, fcp); ++ if (!fcp->hash_table) ++ return NOTIFY_BAD; ++ break; ++ case CPU_UP_CANCELED: ++ case CPU_UP_CANCELED_FROZEN: ++ case CPU_DEAD: ++ case CPU_DEAD_FROZEN: ++ if (fcp->hash_table) { ++ __flow_cache_shrink(fc, fcp, 0); ++ free_pages((unsigned long) fcp->hash_table, fc->order); ++ fcp->hash_table = NULL; ++ } ++ break; ++ } ++ return NOTIFY_OK; ++} ++ ++int flow_cache_init(struct flow_cache *fc, size_t entry_size) + { +- struct tasklet_struct *tasklet; + unsigned long order; ++ int i, r; ++ ++ BUG_ON(entry_size < sizeof(struct flow_cache_entry)); ++ fc->flow_cachep = kmem_cache_create("flow_cache", ++ entry_size, ++ 0, SLAB_PANIC, ++ NULL); ++ fc->hash_shift = 10; ++ fc->low_watermark = 2 * flow_cache_hash_size(fc); ++ fc->high_watermark = 4 * flow_cache_hash_size(fc); ++ fc->percpu = alloc_percpu(struct flow_cache_percpu); + + for (order = 0; + (PAGE_SIZE << order) < +- (sizeof(struct flow_cache_entry *)*flow_hash_size); ++ (sizeof(struct flow_cache_entry *) * flow_cache_hash_size(fc)); + order++) + /* NOTHING */; ++ fc->order = order; + +- flow_table(cpu) = (struct flow_cache_entry **) +- __get_free_pages(GFP_KERNEL|__GFP_ZERO, order); +- if (!flow_table(cpu)) +- panic("NET: failed to allocate flow cache order %lu\n", order); ++ setup_timer(&fc->rnd_timer, flow_cache_new_hashrnd, (unsigned long) fc); ++ fc->rnd_timer.expires = jiffies + FLOW_HASH_RND_PERIOD; ++ add_timer(&fc->rnd_timer); + +- flow_hash_rnd_recalc(cpu) = 1; +- flow_count(cpu) = 0; ++ for_each_online_cpu(i) { ++ r = flow_cache_cpu(&fc->hotcpu_notifier, ++ CPU_UP_PREPARE, (void*) i); ++ if (r != NOTIFY_OK) ++ panic("NET: failed to allocate flow cache order %lu\n", order); ++ } + +- tasklet = flow_flush_tasklet(cpu); +- tasklet_init(tasklet, flow_cache_flush_tasklet, 0); +-} ++ fc->hotcpu_notifier = (struct notifier_block){ ++ .notifier_call = flow_cache_cpu, ++ }; ++ register_hotcpu_notifier(&fc->hotcpu_notifier); + +-static int flow_cache_cpu(struct notifier_block *nfb, +- unsigned long action, +- void *hcpu) +-{ +- if (action == CPU_DEAD || action == CPU_DEAD_FROZEN) +- __flow_cache_shrink((unsigned long)hcpu, 0); +- return NOTIFY_OK; ++ return 0; + } + +-static int __init flow_cache_init(void) ++void flow_cache_fini(struct flow_cache *fc) + { + int i; + +- flow_cachep = kmem_cache_create("flow_cache", +- sizeof(struct flow_cache_entry), +- 0, SLAB_PANIC, +- NULL); +- flow_hash_shift = 10; +- flow_lwm = 2 * flow_hash_size; +- flow_hwm = 4 * flow_hash_size; +- +- setup_timer(&flow_hash_rnd_timer, flow_cache_new_hashrnd, 0); +- flow_hash_rnd_timer.expires = jiffies + FLOW_HASH_RND_PERIOD; +- add_timer(&flow_hash_rnd_timer); ++ del_timer(&fc->rnd_timer); ++ unregister_hotcpu_notifier(&fc->hotcpu_notifier); + + for_each_possible_cpu(i) +- flow_cache_cpu_prepare(i); ++ flow_cache_cpu(&fc->hotcpu_notifier, CPU_DEAD, (void*) i); + +- hotcpu_notifier(flow_cache_cpu, 0); +- return 0; ++ free_percpu(fc->percpu); ++ kmem_cache_destroy(fc->flow_cachep); + } + +-module_init(flow_cache_init); +- +-EXPORT_SYMBOL(flow_cache_genid); + EXPORT_SYMBOL(flow_cache_lookup); +diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c +index cc4797d..399853e 100644 +--- a/net/ipv6/inet6_connection_sock.c ++++ b/net/ipv6/inet6_connection_sock.c +@@ -151,8 +151,9 @@ void __inet6_csk_dst_store(struct sock *sk, struct dst_entry *dst, + + #ifdef CONFIG_XFRM + { ++ struct net *net = sock_net(sk); + struct rt6_info *rt = (struct rt6_info *)dst; +- rt->rt6i_flow_cache_genid = atomic_read(&flow_cache_genid); ++ rt->rt6i_flow_cache_genid = atomic_read(&net->xfrm.policy_genid); + } + #endif + } +@@ -166,8 +167,9 @@ struct dst_entry *__inet6_csk_dst_check(struct sock *sk, u32 cookie) + + #ifdef CONFIG_XFRM + if (dst) { ++ struct net *net = sock_net(sk); + struct rt6_info *rt = (struct rt6_info *)dst; +- if (rt->rt6i_flow_cache_genid != atomic_read(&flow_cache_genid)) { ++ if (rt->rt6i_flow_cache_genid != atomic_read(&net->xfrm.policy_genid)) { + sk->sk_dst_cache = NULL; + dst_release(dst); + dst = NULL; +diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c +index cb81ca3..82b01c3 100644 +--- a/net/xfrm/xfrm_policy.c ++++ b/net/xfrm/xfrm_policy.c +@@ -44,7 +44,6 @@ static struct xfrm_policy_afinfo *xfrm_policy_afinfo[NPROTO]; + + static struct kmem_cache *xfrm_dst_cache __read_mostly; + +-static HLIST_HEAD(xfrm_policy_gc_list); + static DEFINE_SPINLOCK(xfrm_policy_gc_lock); + + static struct xfrm_policy_afinfo *xfrm_policy_get_afinfo(unsigned short family); +@@ -53,6 +52,7 @@ static void xfrm_init_pmtu(struct dst_entry *dst); + + static struct xfrm_policy *__xfrm_policy_unlink(struct xfrm_policy *pol, + int dir); ++static int stale_bundle(struct dst_entry *dst); + + static inline int + __xfrm4_selector_match(struct xfrm_selector *sel, struct flowi *fl) +@@ -216,6 +216,35 @@ expired: + xfrm_pol_put(xp); + } + ++struct xfrm_flow_cache_entry { ++ struct flow_cache_entry fce; ++ struct xfrm_policy *policy; ++ struct xfrm_dst *dst; ++ u32 policy_genid, bundles_genid; ++}; ++#define XFRM_CACHE_NO_POLICY ((struct xfrm_policy *) -1) ++ ++void xfrm_flow_cache_entry_validate(struct flow_cache *fc, ++ struct flow_cache_entry *fce) ++{ ++ struct net *net = container_of(fc, struct net, xfrm.flow_cache); ++ struct xfrm_flow_cache_entry *xfc = ++ container_of(fce, struct xfrm_flow_cache_entry, fce); ++ ++ if (xfc->policy_genid != atomic_read(&net->xfrm.policy_genid)) ++ goto invalid; ++ if (xfc->policy == NULL || xfc->policy == XFRM_CACHE_NO_POLICY) ++ return; ++ if (xfc->policy->walk.dead) ++ goto invalid; ++ if (xfc->bundles_genid != atomic_read(&xfc->policy->bundles_genid)) ++ goto invalid_dst; ++ return; ++invalid: ++ xfc->policy = NULL; ++invalid_dst: ++ xfc->dst = NULL; ++} + + /* Allocate xfrm_policy. Not used here, it is supposed to be used by pfkeyv2 + * SPD calls. +@@ -269,27 +298,26 @@ static void xfrm_policy_gc_kill(struct xfrm_policy *policy) + if (del_timer(&policy->timer)) + atomic_dec(&policy->refcnt); + +- if (atomic_read(&policy->refcnt) > 1) +- flow_cache_flush(); +- + xfrm_pol_put(policy); + } + + static void xfrm_policy_gc_task(struct work_struct *work) + { ++ struct net *net = container_of(work, struct net, xfrm.policy_gc_work); + struct xfrm_policy *policy; + struct hlist_node *entry, *tmp; + struct hlist_head gc_list; + + spin_lock_bh(&xfrm_policy_gc_lock); +- gc_list.first = xfrm_policy_gc_list.first; +- INIT_HLIST_HEAD(&xfrm_policy_gc_list); ++ gc_list.first = net->xfrm.policy_gc_list.first; ++ INIT_HLIST_HEAD(&net->xfrm.policy_gc_list); + spin_unlock_bh(&xfrm_policy_gc_lock); + ++ flow_cache_flush(&net->xfrm.flow_cache, xfrm_flow_cache_entry_validate); ++ + hlist_for_each_entry_safe(policy, entry, tmp, &gc_list, bydst) + xfrm_policy_gc_kill(policy); + } +-static DECLARE_WORK(xfrm_policy_gc_work, xfrm_policy_gc_task); + + /* Rule must be locked. Release descentant resources, announce + * entry dead. The rule must be unlinked from lists to the moment. +@@ -297,6 +325,7 @@ static DECLARE_WORK(xfrm_policy_gc_work, xfrm_policy_gc_task); + + static void xfrm_policy_kill(struct xfrm_policy *policy) + { ++ struct net *net = xp_net(policy); + int dead; + + write_lock_bh(&policy->lock); +@@ -310,10 +339,10 @@ static void xfrm_policy_kill(struct xfrm_policy *policy) + } + + spin_lock_bh(&xfrm_policy_gc_lock); +- hlist_add_head(&policy->bydst, &xfrm_policy_gc_list); ++ hlist_add_head(&policy->bydst, &net->xfrm.policy_gc_list); + spin_unlock_bh(&xfrm_policy_gc_lock); + +- schedule_work(&xfrm_policy_gc_work); ++ schedule_work(&net->xfrm.policy_gc_work); + } + + static unsigned int xfrm_policy_hashmax __read_mostly = 1 * 1024 * 1024; +@@ -586,7 +615,7 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl) + hlist_add_head(&policy->bydst, chain); + xfrm_pol_hold(policy); + net->xfrm.policy_count[dir]++; +- atomic_inc(&flow_cache_genid); ++ atomic_inc(&net->xfrm.policy_genid); + if (delpol) + __xfrm_policy_unlink(delpol, dir); + policy->index = delpol ? delpol->index : xfrm_gen_index(net, dir); +@@ -619,11 +648,13 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl) + gc_list = dst; + + policy->bundles = NULL; ++ atomic_inc(&policy->bundles_genid); + } + write_unlock(&policy->lock); + } + read_unlock_bh(&xfrm_policy_lock); + ++ flow_cache_flush(&net->xfrm.flow_cache, NULL); + while (gc_list) { + struct dst_entry *dst = gc_list; + +@@ -669,7 +700,7 @@ struct xfrm_policy *xfrm_policy_bysel_ctx(struct net *net, u8 type, int dir, + write_unlock_bh(&xfrm_policy_lock); + + if (ret && delete) { +- atomic_inc(&flow_cache_genid); ++ atomic_inc(&net->xfrm.policy_genid); + xfrm_policy_kill(ret); + } + return ret; +@@ -710,7 +741,7 @@ struct xfrm_policy *xfrm_policy_byid(struct net *net, u8 type, int dir, u32 id, + write_unlock_bh(&xfrm_policy_lock); + + if (ret && delete) { +- atomic_inc(&flow_cache_genid); ++ atomic_inc(&net->xfrm.policy_genid); + xfrm_policy_kill(ret); + } + return ret; +@@ -824,7 +855,7 @@ int xfrm_policy_flush(struct net *net, u8 type, struct xfrm_audit *audit_info) + } + + } +- atomic_inc(&flow_cache_genid); ++ atomic_inc(&net->xfrm.policy_genid); + out: + write_unlock_bh(&xfrm_policy_lock); + return err; +@@ -977,32 +1008,18 @@ fail: + return ret; + } + +-static int xfrm_policy_lookup(struct net *net, struct flowi *fl, u16 family, +- u8 dir, void **objp, atomic_t **obj_refp) ++static struct xfrm_policy *xfrm_policy_lookup( ++ struct net *net, struct flowi *fl, ++ u16 family, u8 dir) + { ++#ifdef CONFIG_XFRM_SUB_POLICY + struct xfrm_policy *pol; +- int err = 0; + +-#ifdef CONFIG_XFRM_SUB_POLICY + pol = xfrm_policy_lookup_bytype(net, XFRM_POLICY_TYPE_SUB, fl, family, dir); +- if (IS_ERR(pol)) { +- err = PTR_ERR(pol); +- pol = NULL; +- } +- if (pol || err) +- goto end; ++ if (pol != NULL) ++ return pol; + #endif +- pol = xfrm_policy_lookup_bytype(net, XFRM_POLICY_TYPE_MAIN, fl, family, dir); +- if (IS_ERR(pol)) { +- err = PTR_ERR(pol); +- pol = NULL; +- } +-#ifdef CONFIG_XFRM_SUB_POLICY +-end: +-#endif +- if ((*objp = (void *) pol) != NULL) +- *obj_refp = &pol->refcnt; +- return err; ++ return xfrm_policy_lookup_bytype(net, XFRM_POLICY_TYPE_MAIN, fl, family, dir); + } + + static inline int policy_to_flow_dir(int dir) +@@ -1083,12 +1100,14 @@ static struct xfrm_policy *__xfrm_policy_unlink(struct xfrm_policy *pol, + + int xfrm_policy_delete(struct xfrm_policy *pol, int dir) + { ++ struct net *net = xp_net(pol); ++ + write_lock_bh(&xfrm_policy_lock); + pol = __xfrm_policy_unlink(pol, dir); + write_unlock_bh(&xfrm_policy_lock); + if (pol) { + if (dir < XFRM_POLICY_MAX) +- atomic_inc(&flow_cache_genid); ++ atomic_inc(&net->xfrm.policy_genid); + xfrm_policy_kill(pol); + return 0; + } +@@ -1512,13 +1531,34 @@ xfrm_dst_update_origin(struct dst_entry *dst, struct flowi *fl) + #endif + } + +-static int stale_bundle(struct dst_entry *dst); +- + /* Main function: finds/creates a bundle for given flow. + * + * At the moment we eat a raw IP route. Mostly to speed up lookups + * on interfaces with disabled IPsec. + */ ++ ++static void xfrm_flow_cache_update(struct net *net, struct flowi *key, ++ u16 family, u8 dir, ++ struct xfrm_policy *pol, ++ struct xfrm_dst *dst) ++{ ++ struct flow_cache_entry *fce; ++ struct xfrm_flow_cache_entry *xf; ++ ++ fce = flow_cache_lookup(&net->xfrm.flow_cache, ++ key, family, dir); ++ if (fce == NULL) ++ return; ++ ++ xf = container_of(fce, struct xfrm_flow_cache_entry, fce); ++ xf->policy_genid = atomic_read(&net->xfrm.policy_genid); ++ xf->policy = pol; ++ if (dst != NULL) ++ xf->bundles_genid = atomic_read(&pol->bundles_genid); ++ xf->dst = dst; ++ flow_cache_entry_put(fce); ++} ++ + int __xfrm_lookup(struct net *net, struct dst_entry **dst_p, struct flowi *fl, + struct sock *sk, int flags) + { +@@ -1537,8 +1577,10 @@ int __xfrm_lookup(struct net *net, struct dst_entry **dst_p, struct flowi *fl, + u8 dir = policy_to_flow_dir(XFRM_POLICY_OUT); + + restart: +- genid = atomic_read(&flow_cache_genid); ++ family = dst_orig->ops->family; ++ genid = atomic_read(&net->xfrm.policy_genid); + policy = NULL; ++ dst = NULL; + for (pi = 0; pi < ARRAY_SIZE(pols); pi++) + pols[pi] = NULL; + npols = 0; +@@ -1555,24 +1597,51 @@ restart: + } + + if (!policy) { ++ struct flow_cache_entry *fce; ++ struct xfrm_flow_cache_entry *xf; ++ + /* To accelerate a bit... */ + if ((dst_orig->flags & DST_NOXFRM) || + !net->xfrm.policy_count[XFRM_POLICY_OUT]) + goto nopol; + +- policy = flow_cache_lookup(net, fl, dst_orig->ops->family, +- dir, xfrm_policy_lookup); +- err = PTR_ERR(policy); +- if (IS_ERR(policy)) { +- XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTPOLERROR); +- goto dropdst; ++ fce = flow_cache_lookup(&net->xfrm.flow_cache, ++ fl, family, dir); ++ if (fce == NULL) ++ goto no_cache; ++ ++ xf = container_of(fce, struct xfrm_flow_cache_entry, fce); ++ xfrm_flow_cache_entry_validate(&net->xfrm.flow_cache, fce); ++ if (xf->policy != NULL) { ++ policy = xf->policy; ++ if (policy != XFRM_CACHE_NO_POLICY) ++ xfrm_pol_hold(policy); ++ if (xf->dst != NULL) ++ dst = dst_clone((struct dst_entry *) xf->dst); ++ } ++ flow_cache_entry_put(fce); ++ if (policy == XFRM_CACHE_NO_POLICY) ++ goto nopol; ++ if (dst && !xfrm_bundle_ok(policy, (struct xfrm_dst *) dst, fl, family, 0)) { ++ dst_release(dst); ++ dst = NULL; + } + } ++no_cache: ++ if (!policy) { ++ policy = xfrm_policy_lookup(net, fl, family, dir); ++ if (!policy) { ++ xfrm_flow_cache_update( ++ net, fl, family, dir, ++ XFRM_CACHE_NO_POLICY, NULL); ++ goto nopol; ++ } ++ } ++ if (IS_ERR(policy)) { ++ XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTPOLERROR); ++ goto dropdst; ++ } + +- if (!policy) +- goto nopol; +- +- family = dst_orig->ops->family; + pols[0] = policy; + npols ++; + xfrm_nr += pols[0]->xfrm_nr; +@@ -1583,6 +1652,9 @@ restart: + + policy->curlft.use_time = get_seconds(); + ++ if (dst) ++ goto dst_found; ++ + switch (policy->action) { + default: + case XFRM_POLICY_BLOCK: +@@ -1593,18 +1665,11 @@ restart: + + case XFRM_POLICY_ALLOW: + #ifndef CONFIG_XFRM_SUB_POLICY +- if (policy->xfrm_nr == 0) { +- /* Flow passes not transformed. */ +- xfrm_pol_put(policy); +- return 0; +- } ++ if (policy->xfrm_nr == 0) ++ goto no_transform; + #endif + +- /* Try to find matching bundle. +- * +- * LATER: help from flow cache. It is optional, this +- * is required only for output policy. +- */ ++ /* Try to find matching bundle the hard way. */ + dst = xfrm_find_bundle(fl, policy, family); + if (IS_ERR(dst)) { + XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTBUNDLECHECKERROR); +@@ -1644,12 +1709,8 @@ restart: + * they are searched. See above not-transformed bypass + * is surrounded by non-sub policy configuration, too. + */ +- if (xfrm_nr == 0) { +- /* Flow passes not transformed. */ +- xfrm_pols_put(pols, npols); +- return 0; +- } +- ++ if (xfrm_nr == 0) ++ goto no_transform; + #endif + nx = xfrm_tmpl_resolve(pols, npols, fl, xfrm, family); + +@@ -1680,7 +1741,7 @@ restart: + goto error; + } + if (nx == -EAGAIN || +- genid != atomic_read(&flow_cache_genid)) { ++ genid != atomic_read(&net->xfrm.policy_genid)) { + xfrm_pols_put(pols, npols); + goto restart; + } +@@ -1691,11 +1752,8 @@ restart: + goto error; + } + } +- if (nx == 0) { +- /* Flow passes not transformed. */ +- xfrm_pols_put(pols, npols); +- return 0; +- } ++ if (nx == 0) ++ goto no_transform; + + dst = xfrm_bundle_create(policy, xfrm, nx, fl, dst_orig); + err = PTR_ERR(dst); +@@ -1744,6 +1802,9 @@ restart: + dst_hold(dst); + write_unlock_bh(&policy->lock); + } ++ xfrm_flow_cache_update(net, fl, family, dir, ++ policy, (struct xfrm_dst *) dst); ++dst_found: + *dst_p = dst; + dst_release(dst_orig); + xfrm_pols_put(pols, npols); +@@ -1761,7 +1822,12 @@ nopol: + if (flags & XFRM_LOOKUP_ICMP) + goto dropdst; + return 0; ++no_transform: ++ /* Flow passes not transformed. */ ++ xfrm_pols_put(pols, npols); ++ return 0; + } ++ + EXPORT_SYMBOL(__xfrm_lookup); + + int xfrm_lookup(struct net *net, struct dst_entry **dst_p, struct flowi *fl, +@@ -1919,10 +1985,35 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb, + } + } + +- if (!pol) +- pol = flow_cache_lookup(net, &fl, family, fl_dir, +- xfrm_policy_lookup); +- ++ if (!pol) { ++ struct flow_cache_entry *fce; ++ struct xfrm_flow_cache_entry *xf; ++ ++ fce = flow_cache_lookup(&net->xfrm.flow_cache, ++ &fl, family, dir); ++ if (fce != NULL) { ++ xf = container_of(fce, struct xfrm_flow_cache_entry, fce); ++ xfrm_flow_cache_entry_validate(&net->xfrm.flow_cache, fce); ++ if (xf->policy != NULL) { ++ pol = xf->policy; ++ if (pol != XFRM_CACHE_NO_POLICY) ++ xfrm_pol_hold(pol); ++ else ++ pol = NULL; ++ } else { ++ pol = xfrm_policy_lookup(net, &fl, family, dir); ++ if (!IS_ERR(pol)) { ++ if (pol) ++ xf->policy = pol; ++ else ++ xf->policy = XFRM_CACHE_NO_POLICY; ++ } ++ xf->dst = NULL; ++ xf->policy_genid = atomic_read(&net->xfrm.policy_genid); ++ } ++ flow_cache_entry_put(fce); ++ } ++ } + if (IS_ERR(pol)) { + XFRM_INC_STATS(net, LINUX_MIB_XFRMINPOLERROR); + return 0; +@@ -2121,6 +2212,7 @@ static void prune_one_bundle(struct xfrm_policy *pol, int (*func)(struct dst_ent + dstp = &dst->next; + } + } ++ atomic_inc(&pol->bundles_genid); + write_unlock(&pol->lock); + } + +@@ -2148,6 +2240,7 @@ static void xfrm_prune_bundles(struct net *net, int (*func)(struct dst_entry *)) + } + read_unlock_bh(&xfrm_policy_lock); + ++ flow_cache_flush(&net->xfrm.flow_cache, NULL); + while (gc_list) { + struct dst_entry *dst = gc_list; + gc_list = dst->next; +@@ -2428,6 +2521,9 @@ static int __net_init xfrm_policy_init(struct net *net) + + INIT_LIST_HEAD(&net->xfrm.policy_all); + INIT_WORK(&net->xfrm.policy_hash_work, xfrm_hash_resize); ++ INIT_HLIST_HEAD(&net->xfrm.policy_gc_list); ++ INIT_WORK(&net->xfrm.policy_gc_work, xfrm_policy_gc_task); ++ flow_cache_init(&net->xfrm.flow_cache, sizeof(struct xfrm_flow_cache_entry)); + if (net_eq(net, &init_net)) + register_netdevice_notifier(&xfrm_dev_notifier); + return 0; +@@ -2461,7 +2557,7 @@ static void xfrm_policy_fini(struct net *net) + audit_info.sessionid = -1; + audit_info.secid = 0; + xfrm_policy_flush(net, XFRM_POLICY_TYPE_MAIN, &audit_info); +- flush_work(&xfrm_policy_gc_work); ++ flush_work(&net->xfrm.policy_gc_work); + + WARN_ON(!list_empty(&net->xfrm.policy_all)); + +@@ -2479,6 +2575,8 @@ static void xfrm_policy_fini(struct net *net) + sz = (net->xfrm.policy_idx_hmask + 1) * sizeof(struct hlist_head); + WARN_ON(!hlist_empty(net->xfrm.policy_byidx)); + xfrm_hash_free(net->xfrm.policy_byidx, sz); ++ ++ flow_cache_fini(&net->xfrm.flow_cache); + } + + static int __net_init xfrm_net_init(struct net *net) +@@ -2685,8 +2783,9 @@ static int migrate_tmpl_match(struct xfrm_migrate *m, struct xfrm_tmpl *t) + static int xfrm_policy_migrate(struct xfrm_policy *pol, + struct xfrm_migrate *m, int num_migrate) + { ++ struct net *net = xp_net(pol); + struct xfrm_migrate *mp; +- struct dst_entry *dst; ++ struct dst_entry *gc_list = NULL, *tail; + int i, j, n = 0; + + write_lock_bh(&pol->lock); +@@ -2711,15 +2810,25 @@ static int xfrm_policy_migrate(struct xfrm_policy *pol, + sizeof(pol->xfrm_vec[i].saddr)); + pol->xfrm_vec[i].encap_family = mp->new_family; + /* flush bundles */ +- while ((dst = pol->bundles) != NULL) { +- pol->bundles = dst->next; +- dst_free(dst); +- } ++ tail = pol->bundles; ++ while (tail->next) ++ tail = tail->next; ++ tail->next = gc_list; ++ gc_list = pol->bundles; ++ pol->bundles = NULL; ++ atomic_inc(&pol->bundles_genid); + } + } +- + write_unlock_bh(&pol->lock); + ++ flow_cache_flush(&net->xfrm.flow_cache, NULL); ++ while (gc_list) { ++ struct dst_entry *dst = gc_list; ++ ++ gc_list = dst->next; ++ dst_free(dst); ++ } ++ + if (!n) + return -ENODATA; + +-- +1.7.0.2 + |