diff options
Diffstat (limited to 'main/linux-pae/0015-xfrm-cache-bundles-instead-of-policies-for-outgoing-.patch')
-rw-r--r-- | main/linux-pae/0015-xfrm-cache-bundles-instead-of-policies-for-outgoing-.patch | 1068 |
1 files changed, 0 insertions, 1068 deletions
diff --git a/main/linux-pae/0015-xfrm-cache-bundles-instead-of-policies-for-outgoing-.patch b/main/linux-pae/0015-xfrm-cache-bundles-instead-of-policies-for-outgoing-.patch deleted file mode 100644 index 0d066c84d9..0000000000 --- a/main/linux-pae/0015-xfrm-cache-bundles-instead-of-policies-for-outgoing-.patch +++ /dev/null @@ -1,1068 +0,0 @@ -From f89d21648e6dc06db2aeabc8926c270894c41446 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Timo=20Ter=C3=A4s?= <timo.teras@iki.fi> -Date: Wed, 7 Apr 2010 00:30:05 +0000 -Subject: [PATCH 15/18] xfrm: cache bundles instead of policies for outgoing flows - -__xfrm_lookup() is called for each packet transmitted out of -system. The xfrm_find_bundle() does a linear search which can -kill system performance depending on how many bundles are -required per policy. - -This modifies __xfrm_lookup() to store bundles directly in -the flow cache. If we did not get a hit, we just create a new -bundle instead of doing slow search. This means that we can now -get multiple xfrm_dst's for same flow (on per-cpu basis). - -Signed-off-by: Timo Teras <timo.teras@iki.fi> -Signed-off-by: David S. Miller <davem@davemloft.net> -(backported from commit 80c802f3073e84c956846e921e8a0b02dfa3755f) ---- - include/net/xfrm.h | 10 +- - net/ipv4/xfrm4_policy.c | 22 -- - net/ipv6/xfrm6_policy.c | 31 -- - net/xfrm/xfrm_policy.c | 710 +++++++++++++++++++++++++---------------------- - 4 files changed, 383 insertions(+), 390 deletions(-) - -diff --git a/include/net/xfrm.h b/include/net/xfrm.h -index 6023a48..d51ef61 100644 ---- a/include/net/xfrm.h -+++ b/include/net/xfrm.h -@@ -266,7 +266,6 @@ struct xfrm_policy_afinfo { - xfrm_address_t *saddr, - xfrm_address_t *daddr); - int (*get_saddr)(struct net *net, xfrm_address_t *saddr, xfrm_address_t *daddr); -- struct dst_entry *(*find_bundle)(struct flowi *fl, struct xfrm_policy *policy); - void (*decode_session)(struct sk_buff *skb, - struct flowi *fl, - int reverse); -@@ -485,12 +484,12 @@ struct xfrm_policy - struct timer_list timer; - - struct flow_cache_object flo; -+ atomic_t genid; - u32 priority; - u32 index; - struct xfrm_selector selector; - struct xfrm_lifetime_cfg lft; - struct xfrm_lifetime_cur curlft; -- struct dst_entry *bundles; - struct xfrm_policy_walk_entry walk; - u8 type; - u8 action; -@@ -883,11 +882,15 @@ struct xfrm_dst - struct rt6_info rt6; - } u; - struct dst_entry *route; -+ struct flow_cache_object flo; -+ struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX]; -+ int num_pols, num_xfrms; - #ifdef CONFIG_XFRM_SUB_POLICY - struct flowi *origin; - struct xfrm_selector *partner; - #endif -- u32 genid; -+ u32 xfrm_genid; -+ u32 policy_genid; - u32 route_mtu_cached; - u32 child_mtu_cached; - u32 route_cookie; -@@ -897,6 +900,7 @@ struct xfrm_dst - #ifdef CONFIG_XFRM - static inline void xfrm_dst_destroy(struct xfrm_dst *xdst) - { -+ xfrm_pols_put(xdst->pols, xdst->num_pols); - dst_release(xdst->route); - if (likely(xdst->u.dst.xfrm)) - xfrm_state_put(xdst->u.dst.xfrm); -diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c -index 7009886..651a3e7 100644 ---- a/net/ipv4/xfrm4_policy.c -+++ b/net/ipv4/xfrm4_policy.c -@@ -60,27 +60,6 @@ static int xfrm4_get_saddr(struct net *net, - return 0; - } - --static struct dst_entry * --__xfrm4_find_bundle(struct flowi *fl, struct xfrm_policy *policy) --{ -- struct dst_entry *dst; -- -- read_lock_bh(&policy->lock); -- for (dst = policy->bundles; dst; dst = dst->next) { -- struct xfrm_dst *xdst = (struct xfrm_dst *)dst; -- if (xdst->u.rt.fl.oif == fl->oif && /*XXX*/ -- xdst->u.rt.fl.fl4_dst == fl->fl4_dst && -- xdst->u.rt.fl.fl4_src == fl->fl4_src && -- xdst->u.rt.fl.fl4_tos == fl->fl4_tos && -- xfrm_bundle_ok(policy, xdst, fl, AF_INET, 0)) { -- dst_clone(dst); -- break; -- } -- } -- read_unlock_bh(&policy->lock); -- return dst; --} -- - static int xfrm4_get_tos(struct flowi *fl) - { - return fl->fl4_tos; -@@ -258,7 +237,6 @@ static struct xfrm_policy_afinfo xfrm4_policy_afinfo = { - .dst_ops = &xfrm4_dst_ops, - .dst_lookup = xfrm4_dst_lookup, - .get_saddr = xfrm4_get_saddr, -- .find_bundle = __xfrm4_find_bundle, - .decode_session = _decode_session4, - .get_tos = xfrm4_get_tos, - .init_path = xfrm4_init_path, -diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c -index 3f89ab7..fb2a5b7 100644 ---- a/net/ipv6/xfrm6_policy.c -+++ b/net/ipv6/xfrm6_policy.c -@@ -68,36 +68,6 @@ static int xfrm6_get_saddr(struct net *net, - return 0; - } - --static struct dst_entry * --__xfrm6_find_bundle(struct flowi *fl, struct xfrm_policy *policy) --{ -- struct dst_entry *dst; -- -- /* Still not clear if we should set fl->fl6_{src,dst}... */ -- read_lock_bh(&policy->lock); -- for (dst = policy->bundles; dst; dst = dst->next) { -- struct xfrm_dst *xdst = (struct xfrm_dst*)dst; -- struct in6_addr fl_dst_prefix, fl_src_prefix; -- -- ipv6_addr_prefix(&fl_dst_prefix, -- &fl->fl6_dst, -- xdst->u.rt6.rt6i_dst.plen); -- ipv6_addr_prefix(&fl_src_prefix, -- &fl->fl6_src, -- xdst->u.rt6.rt6i_src.plen); -- if (ipv6_addr_equal(&xdst->u.rt6.rt6i_dst.addr, &fl_dst_prefix) && -- ipv6_addr_equal(&xdst->u.rt6.rt6i_src.addr, &fl_src_prefix) && -- xfrm_bundle_ok(policy, xdst, fl, AF_INET6, -- (xdst->u.rt6.rt6i_dst.plen != 128 || -- xdst->u.rt6.rt6i_src.plen != 128))) { -- dst_clone(dst); -- break; -- } -- } -- read_unlock_bh(&policy->lock); -- return dst; --} -- - static int xfrm6_get_tos(struct flowi *fl) - { - return 0; -@@ -290,7 +260,6 @@ static struct xfrm_policy_afinfo xfrm6_policy_afinfo = { - .dst_ops = &xfrm6_dst_ops, - .dst_lookup = xfrm6_dst_lookup, - .get_saddr = xfrm6_get_saddr, -- .find_bundle = __xfrm6_find_bundle, - .decode_session = _decode_session6, - .get_tos = xfrm6_get_tos, - .init_path = xfrm6_init_path, -diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c -index d1eb2b5..0379d82 100644 ---- a/net/xfrm/xfrm_policy.c -+++ b/net/xfrm/xfrm_policy.c -@@ -37,6 +37,8 @@ - DEFINE_MUTEX(xfrm_cfg_mutex); - EXPORT_SYMBOL(xfrm_cfg_mutex); - -+static DEFINE_SPINLOCK(xfrm_policy_sk_bundle_lock); -+static struct dst_entry *xfrm_policy_sk_bundles; - static DEFINE_RWLOCK(xfrm_policy_lock); - - static DEFINE_RWLOCK(xfrm_policy_afinfo_lock); -@@ -50,6 +52,7 @@ static DEFINE_SPINLOCK(xfrm_policy_gc_lock); - static struct xfrm_policy_afinfo *xfrm_policy_get_afinfo(unsigned short family); - static void xfrm_policy_put_afinfo(struct xfrm_policy_afinfo *afinfo); - static void xfrm_init_pmtu(struct dst_entry *dst); -+static int stale_bundle(struct dst_entry *dst); - - static struct xfrm_policy *__xfrm_policy_unlink(struct xfrm_policy *pol, - int dir); -@@ -277,8 +280,6 @@ void xfrm_policy_destroy(struct xfrm_policy *policy) - { - BUG_ON(!policy->walk.dead); - -- BUG_ON(policy->bundles); -- - if (del_timer(&policy->timer)) - BUG(); - -@@ -289,12 +290,7 @@ EXPORT_SYMBOL(xfrm_policy_destroy); - - static void xfrm_policy_gc_kill(struct xfrm_policy *policy) - { -- struct dst_entry *dst; -- -- while ((dst = policy->bundles) != NULL) { -- policy->bundles = dst->next; -- dst_free(dst); -- } -+ atomic_inc(&policy->genid); - - if (del_timer(&policy->timer)) - atomic_dec(&policy->refcnt); -@@ -572,7 +568,6 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl) - struct xfrm_policy *delpol; - struct hlist_head *chain; - struct hlist_node *entry, *newpos; -- struct dst_entry *gc_list; - - write_lock_bh(&xfrm_policy_lock); - chain = policy_hash_bysel(net, &policy->selector, policy->family, dir); -@@ -620,34 +615,6 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl) - else if (xfrm_bydst_should_resize(net, dir, NULL)) - schedule_work(&net->xfrm.policy_hash_work); - -- read_lock_bh(&xfrm_policy_lock); -- gc_list = NULL; -- entry = &policy->bydst; -- hlist_for_each_entry_continue(policy, entry, bydst) { -- struct dst_entry *dst; -- -- write_lock(&policy->lock); -- dst = policy->bundles; -- if (dst) { -- struct dst_entry *tail = dst; -- while (tail->next) -- tail = tail->next; -- tail->next = gc_list; -- gc_list = dst; -- -- policy->bundles = NULL; -- } -- write_unlock(&policy->lock); -- } -- read_unlock_bh(&xfrm_policy_lock); -- -- while (gc_list) { -- struct dst_entry *dst = gc_list; -- -- gc_list = dst->next; -- dst_free(dst); -- } -- - return 0; - } - EXPORT_SYMBOL(xfrm_policy_insert); -@@ -990,6 +957,19 @@ fail: - return ret; - } - -+static struct xfrm_policy * -+__xfrm_policy_lookup(struct net *net, struct flowi *fl, u16 family, u8 dir) -+{ -+#ifdef CONFIG_XFRM_SUB_POLICY -+ struct xfrm_policy *pol; -+ -+ pol = xfrm_policy_lookup_bytype(net, XFRM_POLICY_TYPE_SUB, fl, family, dir); -+ if (pol != NULL) -+ return pol; -+#endif -+ return xfrm_policy_lookup_bytype(net, XFRM_POLICY_TYPE_MAIN, fl, family, dir); -+} -+ - static struct flow_cache_object * - xfrm_policy_lookup(struct net *net, struct flowi *fl, u16 family, - u8 dir, struct flow_cache_object *old_obj, void *ctx) -@@ -999,21 +979,10 @@ xfrm_policy_lookup(struct net *net, struct flowi *fl, u16 family, - if (old_obj) - xfrm_pol_put(container_of(old_obj, struct xfrm_policy, flo)); - --#ifdef CONFIG_XFRM_SUB_POLICY -- pol = xfrm_policy_lookup_bytype(net, XFRM_POLICY_TYPE_SUB, fl, family, dir); -- if (IS_ERR(pol)) -+ pol = __xfrm_policy_lookup(net, fl, family, dir); -+ if (pol == NULL || IS_ERR(pol)) - return ERR_CAST(pol); -- if (pol) -- goto found; --#endif -- pol = xfrm_policy_lookup_bytype(net, XFRM_POLICY_TYPE_MAIN, fl, family, dir); -- if (IS_ERR(pol)) -- return ERR_CAST(pol); -- if (pol) -- goto found; -- return NULL; - --found: - /* Resolver returns two references: - * one for cache and one for caller of flow_cache_lookup() */ - xfrm_pol_hold(pol); -@@ -1299,18 +1268,6 @@ xfrm_tmpl_resolve(struct xfrm_policy **pols, int npols, struct flowi *fl, - * still valid. - */ - --static struct dst_entry * --xfrm_find_bundle(struct flowi *fl, struct xfrm_policy *policy, unsigned short family) --{ -- struct dst_entry *x; -- struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family); -- if (unlikely(afinfo == NULL)) -- return ERR_PTR(-EINVAL); -- x = afinfo->find_bundle(fl, policy); -- xfrm_policy_put_afinfo(afinfo); -- return x; --} -- - static inline int xfrm_get_tos(struct flowi *fl, int family) - { - struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family); -@@ -1326,6 +1283,54 @@ static inline int xfrm_get_tos(struct flowi *fl, int family) - return tos; - } - -+static struct flow_cache_object *xfrm_bundle_flo_get(struct flow_cache_object *flo) -+{ -+ struct xfrm_dst *xdst = container_of(flo, struct xfrm_dst, flo); -+ struct dst_entry *dst = &xdst->u.dst; -+ -+ if (xdst->route == NULL) { -+ /* Dummy bundle - if it has xfrms we were not -+ * able to build bundle as template resolution failed. -+ * It means we need to try again resolving. */ -+ if (xdst->num_xfrms > 0) -+ return NULL; -+ } else { -+ /* Real bundle */ -+ if (stale_bundle(dst)) -+ return NULL; -+ } -+ -+ dst_hold(dst); -+ return flo; -+} -+ -+static int xfrm_bundle_flo_check(struct flow_cache_object *flo) -+{ -+ struct xfrm_dst *xdst = container_of(flo, struct xfrm_dst, flo); -+ struct dst_entry *dst = &xdst->u.dst; -+ -+ if (!xdst->route) -+ return 0; -+ if (stale_bundle(dst)) -+ return 0; -+ -+ return 1; -+} -+ -+static void xfrm_bundle_flo_delete(struct flow_cache_object *flo) -+{ -+ struct xfrm_dst *xdst = container_of(flo, struct xfrm_dst, flo); -+ struct dst_entry *dst = &xdst->u.dst; -+ -+ dst_free(dst); -+} -+ -+static const struct flow_cache_ops xfrm_bundle_fc_ops = { -+ .get = xfrm_bundle_flo_get, -+ .check = xfrm_bundle_flo_check, -+ .delete = xfrm_bundle_flo_delete, -+}; -+ - static inline struct xfrm_dst *xfrm_alloc_dst(int family) - { - struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family); -@@ -1338,6 +1343,8 @@ static inline struct xfrm_dst *xfrm_alloc_dst(int family) - - xfrm_policy_put_afinfo(afinfo); - -+ xdst->flo.ops = &xfrm_bundle_fc_ops; -+ - return xdst; - } - -@@ -1375,6 +1382,7 @@ static inline int xfrm_fill_dst(struct xfrm_dst *xdst, struct net_device *dev, - return err; - } - -+ - /* Allocate chain of dst_entry's, attach known xfrm's, calculate - * all the metrics... Shortly, bundle a bundle. - */ -@@ -1437,7 +1445,7 @@ static struct dst_entry *xfrm_bundle_create(struct xfrm_policy *policy, - dst_hold(dst); - - dst1->xfrm = xfrm[i]; -- xdst->genid = xfrm[i]->genid; -+ xdst->xfrm_genid = xfrm[i]->genid; - - dst1->obsolete = -1; - dst1->flags |= DST_HOST; -@@ -1530,7 +1538,186 @@ xfrm_dst_update_origin(struct dst_entry *dst, struct flowi *fl) - #endif - } - --static int stale_bundle(struct dst_entry *dst); -+static int xfrm_expand_policies(struct flowi *fl, u16 family, -+ struct xfrm_policy **pols, -+ int *num_pols, int *num_xfrms) -+{ -+ int i; -+ -+ if (*num_pols == 0 || !pols[0]) { -+ *num_pols = 0; -+ *num_xfrms = 0; -+ return 0; -+ } -+ if (IS_ERR(pols[0])) -+ return PTR_ERR(pols[0]); -+ -+ *num_xfrms = pols[0]->xfrm_nr; -+ -+#ifdef CONFIG_XFRM_SUB_POLICY -+ if (pols[0] && pols[0]->action == XFRM_POLICY_ALLOW && -+ pols[0]->type != XFRM_POLICY_TYPE_MAIN) { -+ pols[1] = xfrm_policy_lookup_bytype(xp_net(pols[0]), -+ XFRM_POLICY_TYPE_MAIN, -+ fl, family, -+ XFRM_POLICY_OUT); -+ if (pols[1]) { -+ if (IS_ERR(pols[1])) { -+ xfrm_pols_put(pols, *num_pols); -+ return PTR_ERR(pols[1]); -+ } -+ (*num_pols) ++; -+ (*num_xfrms) += pols[1]->xfrm_nr; -+ } -+ } -+#endif -+ for (i = 0; i < *num_pols; i++) { -+ if (pols[i]->action != XFRM_POLICY_ALLOW) { -+ *num_xfrms = -1; -+ break; -+ } -+ } -+ -+ return 0; -+ -+} -+ -+static struct xfrm_dst * -+xfrm_resolve_and_create_bundle(struct xfrm_policy **pols, int num_pols, -+ struct flowi *fl, u16 family, -+ struct dst_entry *dst_orig) -+{ -+ struct net *net = xp_net(pols[0]); -+ struct xfrm_state *xfrm[XFRM_MAX_DEPTH]; -+ struct dst_entry *dst; -+ struct xfrm_dst *xdst; -+ int err; -+ -+ /* Try to instantiate a bundle */ -+ err = xfrm_tmpl_resolve(pols, num_pols, fl, xfrm, family); -+ if (err < 0) { -+ if (err != -EAGAIN) -+ XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTPOLERROR); -+ return ERR_PTR(err); -+ } -+ -+ dst = xfrm_bundle_create(pols[0], xfrm, err, fl, dst_orig); -+ if (IS_ERR(dst)) { -+ XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTBUNDLEGENERROR); -+ return ERR_CAST(dst); -+ } -+ -+ xdst = (struct xfrm_dst *)dst; -+ xdst->num_xfrms = err; -+ if (num_pols > 1) -+ err = xfrm_dst_update_parent(dst, &pols[1]->selector); -+ else -+ err = xfrm_dst_update_origin(dst, fl); -+ if (unlikely(err)) { -+ dst_free(dst); -+ XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTBUNDLECHECKERROR); -+ return ERR_PTR(err); -+ } -+ -+ xdst->num_pols = num_pols; -+ memcpy(xdst->pols, pols, sizeof(struct xfrm_policy*) * num_pols); -+ xdst->policy_genid = atomic_read(&pols[0]->genid); -+ -+ return xdst; -+} -+ -+static struct flow_cache_object * -+xfrm_bundle_lookup(struct net *net, struct flowi *fl, u16 family, u8 dir, -+ struct flow_cache_object *oldflo, void *ctx) -+{ -+ struct dst_entry *dst_orig = (struct dst_entry *)ctx; -+ struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX]; -+ struct xfrm_dst *xdst, *new_xdst; -+ int num_pols = 0, num_xfrms = 0, i, err, pol_dead; -+ -+ /* Check if the policies from old bundle are usable */ -+ xdst = NULL; -+ if (oldflo) { -+ xdst = container_of(oldflo, struct xfrm_dst, flo); -+ num_pols = xdst->num_pols; -+ num_xfrms = xdst->num_xfrms; -+ pol_dead = 0; -+ for (i = 0; i < num_pols; i++) { -+ pols[i] = xdst->pols[i]; -+ pol_dead |= pols[i]->walk.dead; -+ } -+ if (pol_dead) { -+ dst_free(&xdst->u.dst); -+ xdst = NULL; -+ num_pols = 0; -+ num_xfrms = 0; -+ oldflo = NULL; -+ } -+ } -+ -+ /* Resolve policies to use if we couldn't get them from -+ * previous cache entry */ -+ if (xdst == NULL) { -+ num_pols = 1; -+ pols[0] = __xfrm_policy_lookup(net, fl, family, dir); -+ err = xfrm_expand_policies(fl, family, pols, -+ &num_pols, &num_xfrms); -+ if (err < 0) -+ goto inc_error; -+ if (num_pols == 0) -+ return NULL; -+ if (num_xfrms <= 0) -+ goto make_dummy_bundle; -+ } -+ -+ new_xdst = xfrm_resolve_and_create_bundle(pols, num_pols, fl, family, dst_orig); -+ if (IS_ERR(new_xdst)) { -+ err = PTR_ERR(new_xdst); -+ if (err != -EAGAIN) -+ goto error; -+ if (oldflo == NULL) -+ goto make_dummy_bundle; -+ dst_hold(&xdst->u.dst); -+ return oldflo; -+ } -+ -+ /* Kill the previous bundle */ -+ if (xdst) { -+ /* The policies were stolen for newly generated bundle */ -+ xdst->num_pols = 0; -+ dst_free(&xdst->u.dst); -+ } -+ -+ /* Flow cache does not have reference, it dst_free()'s, -+ * but we do need to return one reference for original caller */ -+ dst_hold(&new_xdst->u.dst); -+ return &new_xdst->flo; -+ -+make_dummy_bundle: -+ /* We found policies, but there's no bundles to instantiate: -+ * either because the policy blocks, has no transformations or -+ * we could not build template (no xfrm_states).*/ -+ xdst = xfrm_alloc_dst(family); -+ if (IS_ERR(xdst)) { -+ xfrm_pols_put(pols, num_pols); -+ return ERR_CAST(xdst); -+ } -+ xdst->num_pols = num_pols; -+ xdst->num_xfrms = num_xfrms; -+ memcpy(xdst->pols, pols, sizeof(struct xfrm_policy*) * num_pols); -+ -+ dst_hold(&xdst->u.dst); -+ return &xdst->flo; -+ -+inc_error: -+ XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTPOLERROR); -+error: -+ if (xdst != NULL) -+ dst_free(&xdst->u.dst); -+ else -+ xfrm_pols_put(pols, num_pols); -+ return ERR_PTR(err); -+} - - /* Main function: finds/creates a bundle for given flow. - * -@@ -1540,248 +1727,152 @@ static int stale_bundle(struct dst_entry *dst); - int __xfrm_lookup(struct net *net, struct dst_entry **dst_p, struct flowi *fl, - struct sock *sk, int flags) - { -- struct xfrm_policy *policy; - struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX]; -- int npols; -- int pol_dead; -- int xfrm_nr; -- int pi; -- struct xfrm_state *xfrm[XFRM_MAX_DEPTH]; -- struct dst_entry *dst, *dst_orig = *dst_p; -- int nx = 0; -- int err; -- u32 genid; -- u16 family; -+ struct flow_cache_object *flo; -+ struct xfrm_dst *xdst; -+ struct dst_entry *dst, *dst_orig = *dst_p, *route; -+ u16 family = dst_orig->ops->family; - u8 dir = policy_to_flow_dir(XFRM_POLICY_OUT); -+ int i, err, num_pols, num_xfrms, drop_pols = 0; - - restart: -- genid = atomic_read(&flow_cache_genid); -- policy = NULL; -- for (pi = 0; pi < ARRAY_SIZE(pols); pi++) -- pols[pi] = NULL; -- npols = 0; -- pol_dead = 0; -- xfrm_nr = 0; -+ dst = NULL; -+ xdst = NULL; -+ route = NULL; - - if (sk && sk->sk_policy[XFRM_POLICY_OUT]) { -- policy = xfrm_sk_policy_lookup(sk, XFRM_POLICY_OUT, fl); -- err = PTR_ERR(policy); -- if (IS_ERR(policy)) { -- XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTPOLERROR); -+ num_pols = 1; -+ pols[0] = xfrm_sk_policy_lookup(sk, XFRM_POLICY_OUT, fl); -+ err = xfrm_expand_policies(fl, family, pols, -+ &num_pols, &num_xfrms); -+ if (err < 0) - goto dropdst; -+ -+ if (num_pols) { -+ if (num_xfrms <= 0) { -+ drop_pols = num_pols; -+ goto no_transform; -+ } -+ -+ xdst = xfrm_resolve_and_create_bundle( -+ pols, num_pols, fl, -+ family, dst_orig); -+ if (IS_ERR(xdst)) { -+ xfrm_pols_put(pols, num_pols); -+ err = PTR_ERR(xdst); -+ goto dropdst; -+ } -+ -+ spin_lock_bh(&xfrm_policy_sk_bundle_lock); -+ xdst->u.dst.next = xfrm_policy_sk_bundles; -+ xfrm_policy_sk_bundles = &xdst->u.dst; -+ spin_unlock_bh(&xfrm_policy_sk_bundle_lock); -+ -+ route = xdst->route; - } - } - -- if (!policy) { -- struct flow_cache_object *flo; -- -+ if (xdst == NULL) { - /* To accelerate a bit... */ - if ((dst_orig->flags & DST_NOXFRM) || - !net->xfrm.policy_count[XFRM_POLICY_OUT]) - goto nopol; - -- flo = flow_cache_lookup(net, fl, dst_orig->ops->family, -- dir, xfrm_policy_lookup, NULL); -- err = PTR_ERR(flo); -+ flo = flow_cache_lookup(net, fl, family, dir, -+ xfrm_bundle_lookup, dst_orig); -+ if (flo == NULL) -+ goto nopol; - if (IS_ERR(flo)) { -- XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTPOLERROR); -+ err = PTR_ERR(flo); - goto dropdst; - } -- if (flo) -- policy = container_of(flo, struct xfrm_policy, flo); -- else -- policy = NULL; -+ xdst = container_of(flo, struct xfrm_dst, flo); -+ -+ num_pols = xdst->num_pols; -+ num_xfrms = xdst->num_xfrms; -+ memcpy(pols, xdst->pols, sizeof(struct xfrm_policy*) * num_pols); -+ route = xdst->route; -+ } -+ -+ dst = &xdst->u.dst; -+ if (route == NULL && num_xfrms > 0) { -+ /* The only case when xfrm_bundle_lookup() returns a -+ * bundle with null route, is when the template could -+ * not be resolved. It means policies are there, but -+ * bundle could not be created, since we don't yet -+ * have the xfrm_state's. We need to wait for KM to -+ * negotiate new SA's or bail out with error.*/ -+ if (net->xfrm.sysctl_larval_drop) { -+ /* EREMOTE tells the caller to generate -+ * a one-shot blackhole route. */ -+ dst_release(dst); -+ xfrm_pols_put(pols, num_pols); -+ XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTNOSTATES); -+ return -EREMOTE; -+ } -+ if (flags & XFRM_LOOKUP_WAIT) { -+ DECLARE_WAITQUEUE(wait, current); -+ -+ add_wait_queue(&net->xfrm.km_waitq, &wait); -+ set_current_state(TASK_INTERRUPTIBLE); -+ schedule(); -+ set_current_state(TASK_RUNNING); -+ remove_wait_queue(&net->xfrm.km_waitq, &wait); -+ -+ if (!signal_pending(current)) { -+ dst_release(dst); -+ goto restart; -+ } -+ -+ err = -ERESTART; -+ } else -+ err = -EAGAIN; -+ -+ XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTNOSTATES); -+ goto error; - } - -- if (!policy) -+no_transform: -+ if (num_pols == 0) - goto nopol; - -- family = dst_orig->ops->family; -- pols[0] = policy; -- npols ++; -- xfrm_nr += pols[0]->xfrm_nr; -- -- err = -ENOENT; -- if ((flags & XFRM_LOOKUP_ICMP) && !(policy->flags & XFRM_POLICY_ICMP)) -+ if ((flags & XFRM_LOOKUP_ICMP) && -+ !(pols[0]->flags & XFRM_POLICY_ICMP)) { -+ err = -ENOENT; - goto error; -+ } - -- policy->curlft.use_time = get_seconds(); -+ for (i = 0; i < num_pols; i++) -+ pols[i]->curlft.use_time = get_seconds(); - -- switch (policy->action) { -- default: -- case XFRM_POLICY_BLOCK: -+ if (num_xfrms < 0) { - /* Prohibit the flow */ - XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTPOLBLOCK); - err = -EPERM; - goto error; -- -- case XFRM_POLICY_ALLOW: --#ifndef CONFIG_XFRM_SUB_POLICY -- if (policy->xfrm_nr == 0) { -- /* Flow passes not transformed. */ -- xfrm_pol_put(policy); -- return 0; -- } --#endif -- -- /* Try to find matching bundle. -- * -- * LATER: help from flow cache. It is optional, this -- * is required only for output policy. -- */ -- dst = xfrm_find_bundle(fl, policy, family); -- if (IS_ERR(dst)) { -- XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTBUNDLECHECKERROR); -- err = PTR_ERR(dst); -- goto error; -- } -- -- if (dst) -- break; -- --#ifdef CONFIG_XFRM_SUB_POLICY -- if (pols[0]->type != XFRM_POLICY_TYPE_MAIN) { -- pols[1] = xfrm_policy_lookup_bytype(net, -- XFRM_POLICY_TYPE_MAIN, -- fl, family, -- XFRM_POLICY_OUT); -- if (pols[1]) { -- if (IS_ERR(pols[1])) { -- XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTPOLERROR); -- err = PTR_ERR(pols[1]); -- goto error; -- } -- if (pols[1]->action == XFRM_POLICY_BLOCK) { -- XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTPOLBLOCK); -- err = -EPERM; -- goto error; -- } -- npols ++; -- xfrm_nr += pols[1]->xfrm_nr; -- } -- } -- -- /* -- * Because neither flowi nor bundle information knows about -- * transformation template size. On more than one policy usage -- * we can realize whether all of them is bypass or not after -- * they are searched. See above not-transformed bypass -- * is surrounded by non-sub policy configuration, too. -- */ -- if (xfrm_nr == 0) { -- /* Flow passes not transformed. */ -- xfrm_pols_put(pols, npols); -- return 0; -- } -- --#endif -- nx = xfrm_tmpl_resolve(pols, npols, fl, xfrm, family); -- -- if (unlikely(nx<0)) { -- err = nx; -- if (err == -EAGAIN && net->xfrm.sysctl_larval_drop) { -- /* EREMOTE tells the caller to generate -- * a one-shot blackhole route. -- */ -- XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTNOSTATES); -- xfrm_pol_put(policy); -- return -EREMOTE; -- } -- if (err == -EAGAIN && (flags & XFRM_LOOKUP_WAIT)) { -- DECLARE_WAITQUEUE(wait, current); -- -- add_wait_queue(&net->xfrm.km_waitq, &wait); -- set_current_state(TASK_INTERRUPTIBLE); -- schedule(); -- set_current_state(TASK_RUNNING); -- remove_wait_queue(&net->xfrm.km_waitq, &wait); -- -- nx = xfrm_tmpl_resolve(pols, npols, fl, xfrm, family); -- -- if (nx == -EAGAIN && signal_pending(current)) { -- XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTNOSTATES); -- err = -ERESTART; -- goto error; -- } -- if (nx == -EAGAIN || -- genid != atomic_read(&flow_cache_genid)) { -- xfrm_pols_put(pols, npols); -- goto restart; -- } -- err = nx; -- } -- if (err < 0) { -- XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTNOSTATES); -- goto error; -- } -- } -- if (nx == 0) { -- /* Flow passes not transformed. */ -- xfrm_pols_put(pols, npols); -- return 0; -- } -- -- dst = xfrm_bundle_create(policy, xfrm, nx, fl, dst_orig); -- err = PTR_ERR(dst); -- if (IS_ERR(dst)) { -- XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTBUNDLEGENERROR); -- goto error; -- } -- -- for (pi = 0; pi < npols; pi++) -- pol_dead |= pols[pi]->walk.dead; -- -- write_lock_bh(&policy->lock); -- if (unlikely(pol_dead || stale_bundle(dst))) { -- /* Wow! While we worked on resolving, this -- * policy has gone. Retry. It is not paranoia, -- * we just cannot enlist new bundle to dead object. -- * We can't enlist stable bundles either. -- */ -- write_unlock_bh(&policy->lock); -- dst_free(dst); -- -- if (pol_dead) -- XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTPOLDEAD); -- else -- XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTBUNDLECHECKERROR); -- err = -EHOSTUNREACH; -- goto error; -- } -- -- if (npols > 1) -- err = xfrm_dst_update_parent(dst, &pols[1]->selector); -- else -- err = xfrm_dst_update_origin(dst, fl); -- if (unlikely(err)) { -- write_unlock_bh(&policy->lock); -- dst_free(dst); -- XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTBUNDLECHECKERROR); -- goto error; -- } -- -- dst->next = policy->bundles; -- policy->bundles = dst; -- dst_hold(dst); -- write_unlock_bh(&policy->lock); -+ } else if (num_xfrms > 0) { -+ /* Flow transformed */ -+ *dst_p = dst; -+ dst_release(dst_orig); -+ } else { -+ /* Flow passes untransformed */ -+ dst_release(dst); - } -- *dst_p = dst; -- dst_release(dst_orig); -- xfrm_pols_put(pols, npols); -+ok: -+ xfrm_pols_put(pols, drop_pols); - return 0; - -+nopol: -+ if (!(flags & XFRM_LOOKUP_ICMP)) -+ goto ok; -+ err = -ENOENT; - error: -- xfrm_pols_put(pols, npols); -+ dst_release(dst); - dropdst: - dst_release(dst_orig); - *dst_p = NULL; -+ xfrm_pols_put(pols, drop_pols); - return err; -- --nopol: -- err = -ENOENT; -- if (flags & XFRM_LOOKUP_ICMP) -- goto dropdst; -- return 0; - } - EXPORT_SYMBOL(__xfrm_lookup); - -@@ -2134,71 +2225,24 @@ static struct dst_entry *xfrm_negative_advice(struct dst_entry *dst) - return dst; - } - --static void prune_one_bundle(struct xfrm_policy *pol, int (*func)(struct dst_entry *), struct dst_entry **gc_list_p) --{ -- struct dst_entry *dst, **dstp; -- -- write_lock(&pol->lock); -- dstp = &pol->bundles; -- while ((dst=*dstp) != NULL) { -- if (func(dst)) { -- *dstp = dst->next; -- dst->next = *gc_list_p; -- *gc_list_p = dst; -- } else { -- dstp = &dst->next; -- } -- } -- write_unlock(&pol->lock); --} -- --static void xfrm_prune_bundles(struct net *net, int (*func)(struct dst_entry *)) -+static void __xfrm_garbage_collect(struct net *net) - { -- struct dst_entry *gc_list = NULL; -- int dir; -+ struct dst_entry *head, *next; - -- read_lock_bh(&xfrm_policy_lock); -- for (dir = 0; dir < XFRM_POLICY_MAX * 2; dir++) { -- struct xfrm_policy *pol; -- struct hlist_node *entry; -- struct hlist_head *table; -- int i; -+ flow_cache_flush(); - -- hlist_for_each_entry(pol, entry, -- &net->xfrm.policy_inexact[dir], bydst) -- prune_one_bundle(pol, func, &gc_list); -+ spin_lock_bh(&xfrm_policy_sk_bundle_lock); -+ head = xfrm_policy_sk_bundles; -+ xfrm_policy_sk_bundles = NULL; -+ spin_unlock_bh(&xfrm_policy_sk_bundle_lock); - -- table = net->xfrm.policy_bydst[dir].table; -- for (i = net->xfrm.policy_bydst[dir].hmask; i >= 0; i--) { -- hlist_for_each_entry(pol, entry, table + i, bydst) -- prune_one_bundle(pol, func, &gc_list); -- } -- } -- read_unlock_bh(&xfrm_policy_lock); -- -- while (gc_list) { -- struct dst_entry *dst = gc_list; -- gc_list = dst->next; -- dst_free(dst); -+ while (head) { -+ next = head->next; -+ dst_free(head); -+ head = next; - } - } - --static int unused_bundle(struct dst_entry *dst) --{ -- return !atomic_read(&dst->__refcnt); --} -- --static void __xfrm_garbage_collect(struct net *net) --{ -- xfrm_prune_bundles(net, unused_bundle); --} -- --static int xfrm_flush_bundles(struct net *net) --{ -- xfrm_prune_bundles(net, stale_bundle); -- return 0; --} -- - static void xfrm_init_pmtu(struct dst_entry *dst) - { - do { -@@ -2256,7 +2300,9 @@ int xfrm_bundle_ok(struct xfrm_policy *pol, struct xfrm_dst *first, - return 0; - if (dst->xfrm->km.state != XFRM_STATE_VALID) - return 0; -- if (xdst->genid != dst->xfrm->genid) -+ if (xdst->xfrm_genid != dst->xfrm->genid) -+ return 0; -+ if (xdst->policy_genid != atomic_read(&xdst->pols[0]->genid)) - return 0; - - if (strict && fl && -@@ -2383,7 +2429,7 @@ static int xfrm_dev_event(struct notifier_block *this, unsigned long event, void - - switch (event) { - case NETDEV_DOWN: -- xfrm_flush_bundles(dev_net(dev)); -+ __xfrm_garbage_collect(dev_net(dev)); - } - return NOTIFY_DONE; - } -@@ -2714,7 +2760,6 @@ static int xfrm_policy_migrate(struct xfrm_policy *pol, - struct xfrm_migrate *m, int num_migrate) - { - struct xfrm_migrate *mp; -- struct dst_entry *dst; - int i, j, n = 0; - - write_lock_bh(&pol->lock); -@@ -2739,10 +2784,7 @@ static int xfrm_policy_migrate(struct xfrm_policy *pol, - sizeof(pol->xfrm_vec[i].saddr)); - pol->xfrm_vec[i].encap_family = mp->new_family; - /* flush bundles */ -- while ((dst = pol->bundles) != NULL) { -- pol->bundles = dst->next; -- dst_free(dst); -- } -+ atomic_inc(&pol->genid); - } - } - --- -1.7.0.2 - |