diff options
Diffstat (limited to 'main/linux-pae/0015-xfrm-cache-bundles-instead-of-policies-for-outgoing-.patch')
-rw-r--r-- | main/linux-pae/0015-xfrm-cache-bundles-instead-of-policies-for-outgoing-.patch | 1068 |
1 files changed, 1068 insertions, 0 deletions
diff --git a/main/linux-pae/0015-xfrm-cache-bundles-instead-of-policies-for-outgoing-.patch b/main/linux-pae/0015-xfrm-cache-bundles-instead-of-policies-for-outgoing-.patch new file mode 100644 index 0000000000..0d066c84d9 --- /dev/null +++ b/main/linux-pae/0015-xfrm-cache-bundles-instead-of-policies-for-outgoing-.patch @@ -0,0 +1,1068 @@ +From f89d21648e6dc06db2aeabc8926c270894c41446 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Timo=20Ter=C3=A4s?= <timo.teras@iki.fi> +Date: Wed, 7 Apr 2010 00:30:05 +0000 +Subject: [PATCH 15/18] xfrm: cache bundles instead of policies for outgoing flows + +__xfrm_lookup() is called for each packet transmitted out of +system. The xfrm_find_bundle() does a linear search which can +kill system performance depending on how many bundles are +required per policy. + +This modifies __xfrm_lookup() to store bundles directly in +the flow cache. If we did not get a hit, we just create a new +bundle instead of doing slow search. This means that we can now +get multiple xfrm_dst's for same flow (on per-cpu basis). + +Signed-off-by: Timo Teras <timo.teras@iki.fi> +Signed-off-by: David S. Miller <davem@davemloft.net> +(backported from commit 80c802f3073e84c956846e921e8a0b02dfa3755f) +--- + include/net/xfrm.h | 10 +- + net/ipv4/xfrm4_policy.c | 22 -- + net/ipv6/xfrm6_policy.c | 31 -- + net/xfrm/xfrm_policy.c | 710 +++++++++++++++++++++++++---------------------- + 4 files changed, 383 insertions(+), 390 deletions(-) + +diff --git a/include/net/xfrm.h b/include/net/xfrm.h +index 6023a48..d51ef61 100644 +--- a/include/net/xfrm.h ++++ b/include/net/xfrm.h +@@ -266,7 +266,6 @@ struct xfrm_policy_afinfo { + xfrm_address_t *saddr, + xfrm_address_t *daddr); + int (*get_saddr)(struct net *net, xfrm_address_t *saddr, xfrm_address_t *daddr); +- struct dst_entry *(*find_bundle)(struct flowi *fl, struct xfrm_policy *policy); + void (*decode_session)(struct sk_buff *skb, + struct flowi *fl, + int reverse); +@@ -485,12 +484,12 @@ struct xfrm_policy + struct timer_list timer; + + struct flow_cache_object flo; ++ atomic_t genid; + u32 priority; + u32 index; + struct xfrm_selector selector; + struct xfrm_lifetime_cfg lft; + struct xfrm_lifetime_cur curlft; +- struct dst_entry *bundles; + struct xfrm_policy_walk_entry walk; + u8 type; + u8 action; +@@ -883,11 +882,15 @@ struct xfrm_dst + struct rt6_info rt6; + } u; + struct dst_entry *route; ++ struct flow_cache_object flo; ++ struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX]; ++ int num_pols, num_xfrms; + #ifdef CONFIG_XFRM_SUB_POLICY + struct flowi *origin; + struct xfrm_selector *partner; + #endif +- u32 genid; ++ u32 xfrm_genid; ++ u32 policy_genid; + u32 route_mtu_cached; + u32 child_mtu_cached; + u32 route_cookie; +@@ -897,6 +900,7 @@ struct xfrm_dst + #ifdef CONFIG_XFRM + static inline void xfrm_dst_destroy(struct xfrm_dst *xdst) + { ++ xfrm_pols_put(xdst->pols, xdst->num_pols); + dst_release(xdst->route); + if (likely(xdst->u.dst.xfrm)) + xfrm_state_put(xdst->u.dst.xfrm); +diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c +index 7009886..651a3e7 100644 +--- a/net/ipv4/xfrm4_policy.c ++++ b/net/ipv4/xfrm4_policy.c +@@ -60,27 +60,6 @@ static int xfrm4_get_saddr(struct net *net, + return 0; + } + +-static struct dst_entry * +-__xfrm4_find_bundle(struct flowi *fl, struct xfrm_policy *policy) +-{ +- struct dst_entry *dst; +- +- read_lock_bh(&policy->lock); +- for (dst = policy->bundles; dst; dst = dst->next) { +- struct xfrm_dst *xdst = (struct xfrm_dst *)dst; +- if (xdst->u.rt.fl.oif == fl->oif && /*XXX*/ +- xdst->u.rt.fl.fl4_dst == fl->fl4_dst && +- xdst->u.rt.fl.fl4_src == fl->fl4_src && +- xdst->u.rt.fl.fl4_tos == fl->fl4_tos && +- xfrm_bundle_ok(policy, xdst, fl, AF_INET, 0)) { +- dst_clone(dst); +- break; +- } +- } +- read_unlock_bh(&policy->lock); +- return dst; +-} +- + static int xfrm4_get_tos(struct flowi *fl) + { + return fl->fl4_tos; +@@ -258,7 +237,6 @@ static struct xfrm_policy_afinfo xfrm4_policy_afinfo = { + .dst_ops = &xfrm4_dst_ops, + .dst_lookup = xfrm4_dst_lookup, + .get_saddr = xfrm4_get_saddr, +- .find_bundle = __xfrm4_find_bundle, + .decode_session = _decode_session4, + .get_tos = xfrm4_get_tos, + .init_path = xfrm4_init_path, +diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c +index 3f89ab7..fb2a5b7 100644 +--- a/net/ipv6/xfrm6_policy.c ++++ b/net/ipv6/xfrm6_policy.c +@@ -68,36 +68,6 @@ static int xfrm6_get_saddr(struct net *net, + return 0; + } + +-static struct dst_entry * +-__xfrm6_find_bundle(struct flowi *fl, struct xfrm_policy *policy) +-{ +- struct dst_entry *dst; +- +- /* Still not clear if we should set fl->fl6_{src,dst}... */ +- read_lock_bh(&policy->lock); +- for (dst = policy->bundles; dst; dst = dst->next) { +- struct xfrm_dst *xdst = (struct xfrm_dst*)dst; +- struct in6_addr fl_dst_prefix, fl_src_prefix; +- +- ipv6_addr_prefix(&fl_dst_prefix, +- &fl->fl6_dst, +- xdst->u.rt6.rt6i_dst.plen); +- ipv6_addr_prefix(&fl_src_prefix, +- &fl->fl6_src, +- xdst->u.rt6.rt6i_src.plen); +- if (ipv6_addr_equal(&xdst->u.rt6.rt6i_dst.addr, &fl_dst_prefix) && +- ipv6_addr_equal(&xdst->u.rt6.rt6i_src.addr, &fl_src_prefix) && +- xfrm_bundle_ok(policy, xdst, fl, AF_INET6, +- (xdst->u.rt6.rt6i_dst.plen != 128 || +- xdst->u.rt6.rt6i_src.plen != 128))) { +- dst_clone(dst); +- break; +- } +- } +- read_unlock_bh(&policy->lock); +- return dst; +-} +- + static int xfrm6_get_tos(struct flowi *fl) + { + return 0; +@@ -290,7 +260,6 @@ static struct xfrm_policy_afinfo xfrm6_policy_afinfo = { + .dst_ops = &xfrm6_dst_ops, + .dst_lookup = xfrm6_dst_lookup, + .get_saddr = xfrm6_get_saddr, +- .find_bundle = __xfrm6_find_bundle, + .decode_session = _decode_session6, + .get_tos = xfrm6_get_tos, + .init_path = xfrm6_init_path, +diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c +index d1eb2b5..0379d82 100644 +--- a/net/xfrm/xfrm_policy.c ++++ b/net/xfrm/xfrm_policy.c +@@ -37,6 +37,8 @@ + DEFINE_MUTEX(xfrm_cfg_mutex); + EXPORT_SYMBOL(xfrm_cfg_mutex); + ++static DEFINE_SPINLOCK(xfrm_policy_sk_bundle_lock); ++static struct dst_entry *xfrm_policy_sk_bundles; + static DEFINE_RWLOCK(xfrm_policy_lock); + + static DEFINE_RWLOCK(xfrm_policy_afinfo_lock); +@@ -50,6 +52,7 @@ static DEFINE_SPINLOCK(xfrm_policy_gc_lock); + static struct xfrm_policy_afinfo *xfrm_policy_get_afinfo(unsigned short family); + static void xfrm_policy_put_afinfo(struct xfrm_policy_afinfo *afinfo); + static void xfrm_init_pmtu(struct dst_entry *dst); ++static int stale_bundle(struct dst_entry *dst); + + static struct xfrm_policy *__xfrm_policy_unlink(struct xfrm_policy *pol, + int dir); +@@ -277,8 +280,6 @@ void xfrm_policy_destroy(struct xfrm_policy *policy) + { + BUG_ON(!policy->walk.dead); + +- BUG_ON(policy->bundles); +- + if (del_timer(&policy->timer)) + BUG(); + +@@ -289,12 +290,7 @@ EXPORT_SYMBOL(xfrm_policy_destroy); + + static void xfrm_policy_gc_kill(struct xfrm_policy *policy) + { +- struct dst_entry *dst; +- +- while ((dst = policy->bundles) != NULL) { +- policy->bundles = dst->next; +- dst_free(dst); +- } ++ atomic_inc(&policy->genid); + + if (del_timer(&policy->timer)) + atomic_dec(&policy->refcnt); +@@ -572,7 +568,6 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl) + struct xfrm_policy *delpol; + struct hlist_head *chain; + struct hlist_node *entry, *newpos; +- struct dst_entry *gc_list; + + write_lock_bh(&xfrm_policy_lock); + chain = policy_hash_bysel(net, &policy->selector, policy->family, dir); +@@ -620,34 +615,6 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl) + else if (xfrm_bydst_should_resize(net, dir, NULL)) + schedule_work(&net->xfrm.policy_hash_work); + +- read_lock_bh(&xfrm_policy_lock); +- gc_list = NULL; +- entry = &policy->bydst; +- hlist_for_each_entry_continue(policy, entry, bydst) { +- struct dst_entry *dst; +- +- write_lock(&policy->lock); +- dst = policy->bundles; +- if (dst) { +- struct dst_entry *tail = dst; +- while (tail->next) +- tail = tail->next; +- tail->next = gc_list; +- gc_list = dst; +- +- policy->bundles = NULL; +- } +- write_unlock(&policy->lock); +- } +- read_unlock_bh(&xfrm_policy_lock); +- +- while (gc_list) { +- struct dst_entry *dst = gc_list; +- +- gc_list = dst->next; +- dst_free(dst); +- } +- + return 0; + } + EXPORT_SYMBOL(xfrm_policy_insert); +@@ -990,6 +957,19 @@ fail: + return ret; + } + ++static struct xfrm_policy * ++__xfrm_policy_lookup(struct net *net, struct flowi *fl, u16 family, u8 dir) ++{ ++#ifdef CONFIG_XFRM_SUB_POLICY ++ struct xfrm_policy *pol; ++ ++ pol = xfrm_policy_lookup_bytype(net, XFRM_POLICY_TYPE_SUB, fl, family, dir); ++ if (pol != NULL) ++ return pol; ++#endif ++ return xfrm_policy_lookup_bytype(net, XFRM_POLICY_TYPE_MAIN, fl, family, dir); ++} ++ + static struct flow_cache_object * + xfrm_policy_lookup(struct net *net, struct flowi *fl, u16 family, + u8 dir, struct flow_cache_object *old_obj, void *ctx) +@@ -999,21 +979,10 @@ xfrm_policy_lookup(struct net *net, struct flowi *fl, u16 family, + if (old_obj) + xfrm_pol_put(container_of(old_obj, struct xfrm_policy, flo)); + +-#ifdef CONFIG_XFRM_SUB_POLICY +- pol = xfrm_policy_lookup_bytype(net, XFRM_POLICY_TYPE_SUB, fl, family, dir); +- if (IS_ERR(pol)) ++ pol = __xfrm_policy_lookup(net, fl, family, dir); ++ if (pol == NULL || IS_ERR(pol)) + return ERR_CAST(pol); +- if (pol) +- goto found; +-#endif +- pol = xfrm_policy_lookup_bytype(net, XFRM_POLICY_TYPE_MAIN, fl, family, dir); +- if (IS_ERR(pol)) +- return ERR_CAST(pol); +- if (pol) +- goto found; +- return NULL; + +-found: + /* Resolver returns two references: + * one for cache and one for caller of flow_cache_lookup() */ + xfrm_pol_hold(pol); +@@ -1299,18 +1268,6 @@ xfrm_tmpl_resolve(struct xfrm_policy **pols, int npols, struct flowi *fl, + * still valid. + */ + +-static struct dst_entry * +-xfrm_find_bundle(struct flowi *fl, struct xfrm_policy *policy, unsigned short family) +-{ +- struct dst_entry *x; +- struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family); +- if (unlikely(afinfo == NULL)) +- return ERR_PTR(-EINVAL); +- x = afinfo->find_bundle(fl, policy); +- xfrm_policy_put_afinfo(afinfo); +- return x; +-} +- + static inline int xfrm_get_tos(struct flowi *fl, int family) + { + struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family); +@@ -1326,6 +1283,54 @@ static inline int xfrm_get_tos(struct flowi *fl, int family) + return tos; + } + ++static struct flow_cache_object *xfrm_bundle_flo_get(struct flow_cache_object *flo) ++{ ++ struct xfrm_dst *xdst = container_of(flo, struct xfrm_dst, flo); ++ struct dst_entry *dst = &xdst->u.dst; ++ ++ if (xdst->route == NULL) { ++ /* Dummy bundle - if it has xfrms we were not ++ * able to build bundle as template resolution failed. ++ * It means we need to try again resolving. */ ++ if (xdst->num_xfrms > 0) ++ return NULL; ++ } else { ++ /* Real bundle */ ++ if (stale_bundle(dst)) ++ return NULL; ++ } ++ ++ dst_hold(dst); ++ return flo; ++} ++ ++static int xfrm_bundle_flo_check(struct flow_cache_object *flo) ++{ ++ struct xfrm_dst *xdst = container_of(flo, struct xfrm_dst, flo); ++ struct dst_entry *dst = &xdst->u.dst; ++ ++ if (!xdst->route) ++ return 0; ++ if (stale_bundle(dst)) ++ return 0; ++ ++ return 1; ++} ++ ++static void xfrm_bundle_flo_delete(struct flow_cache_object *flo) ++{ ++ struct xfrm_dst *xdst = container_of(flo, struct xfrm_dst, flo); ++ struct dst_entry *dst = &xdst->u.dst; ++ ++ dst_free(dst); ++} ++ ++static const struct flow_cache_ops xfrm_bundle_fc_ops = { ++ .get = xfrm_bundle_flo_get, ++ .check = xfrm_bundle_flo_check, ++ .delete = xfrm_bundle_flo_delete, ++}; ++ + static inline struct xfrm_dst *xfrm_alloc_dst(int family) + { + struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family); +@@ -1338,6 +1343,8 @@ static inline struct xfrm_dst *xfrm_alloc_dst(int family) + + xfrm_policy_put_afinfo(afinfo); + ++ xdst->flo.ops = &xfrm_bundle_fc_ops; ++ + return xdst; + } + +@@ -1375,6 +1382,7 @@ static inline int xfrm_fill_dst(struct xfrm_dst *xdst, struct net_device *dev, + return err; + } + ++ + /* Allocate chain of dst_entry's, attach known xfrm's, calculate + * all the metrics... Shortly, bundle a bundle. + */ +@@ -1437,7 +1445,7 @@ static struct dst_entry *xfrm_bundle_create(struct xfrm_policy *policy, + dst_hold(dst); + + dst1->xfrm = xfrm[i]; +- xdst->genid = xfrm[i]->genid; ++ xdst->xfrm_genid = xfrm[i]->genid; + + dst1->obsolete = -1; + dst1->flags |= DST_HOST; +@@ -1530,7 +1538,186 @@ xfrm_dst_update_origin(struct dst_entry *dst, struct flowi *fl) + #endif + } + +-static int stale_bundle(struct dst_entry *dst); ++static int xfrm_expand_policies(struct flowi *fl, u16 family, ++ struct xfrm_policy **pols, ++ int *num_pols, int *num_xfrms) ++{ ++ int i; ++ ++ if (*num_pols == 0 || !pols[0]) { ++ *num_pols = 0; ++ *num_xfrms = 0; ++ return 0; ++ } ++ if (IS_ERR(pols[0])) ++ return PTR_ERR(pols[0]); ++ ++ *num_xfrms = pols[0]->xfrm_nr; ++ ++#ifdef CONFIG_XFRM_SUB_POLICY ++ if (pols[0] && pols[0]->action == XFRM_POLICY_ALLOW && ++ pols[0]->type != XFRM_POLICY_TYPE_MAIN) { ++ pols[1] = xfrm_policy_lookup_bytype(xp_net(pols[0]), ++ XFRM_POLICY_TYPE_MAIN, ++ fl, family, ++ XFRM_POLICY_OUT); ++ if (pols[1]) { ++ if (IS_ERR(pols[1])) { ++ xfrm_pols_put(pols, *num_pols); ++ return PTR_ERR(pols[1]); ++ } ++ (*num_pols) ++; ++ (*num_xfrms) += pols[1]->xfrm_nr; ++ } ++ } ++#endif ++ for (i = 0; i < *num_pols; i++) { ++ if (pols[i]->action != XFRM_POLICY_ALLOW) { ++ *num_xfrms = -1; ++ break; ++ } ++ } ++ ++ return 0; ++ ++} ++ ++static struct xfrm_dst * ++xfrm_resolve_and_create_bundle(struct xfrm_policy **pols, int num_pols, ++ struct flowi *fl, u16 family, ++ struct dst_entry *dst_orig) ++{ ++ struct net *net = xp_net(pols[0]); ++ struct xfrm_state *xfrm[XFRM_MAX_DEPTH]; ++ struct dst_entry *dst; ++ struct xfrm_dst *xdst; ++ int err; ++ ++ /* Try to instantiate a bundle */ ++ err = xfrm_tmpl_resolve(pols, num_pols, fl, xfrm, family); ++ if (err < 0) { ++ if (err != -EAGAIN) ++ XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTPOLERROR); ++ return ERR_PTR(err); ++ } ++ ++ dst = xfrm_bundle_create(pols[0], xfrm, err, fl, dst_orig); ++ if (IS_ERR(dst)) { ++ XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTBUNDLEGENERROR); ++ return ERR_CAST(dst); ++ } ++ ++ xdst = (struct xfrm_dst *)dst; ++ xdst->num_xfrms = err; ++ if (num_pols > 1) ++ err = xfrm_dst_update_parent(dst, &pols[1]->selector); ++ else ++ err = xfrm_dst_update_origin(dst, fl); ++ if (unlikely(err)) { ++ dst_free(dst); ++ XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTBUNDLECHECKERROR); ++ return ERR_PTR(err); ++ } ++ ++ xdst->num_pols = num_pols; ++ memcpy(xdst->pols, pols, sizeof(struct xfrm_policy*) * num_pols); ++ xdst->policy_genid = atomic_read(&pols[0]->genid); ++ ++ return xdst; ++} ++ ++static struct flow_cache_object * ++xfrm_bundle_lookup(struct net *net, struct flowi *fl, u16 family, u8 dir, ++ struct flow_cache_object *oldflo, void *ctx) ++{ ++ struct dst_entry *dst_orig = (struct dst_entry *)ctx; ++ struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX]; ++ struct xfrm_dst *xdst, *new_xdst; ++ int num_pols = 0, num_xfrms = 0, i, err, pol_dead; ++ ++ /* Check if the policies from old bundle are usable */ ++ xdst = NULL; ++ if (oldflo) { ++ xdst = container_of(oldflo, struct xfrm_dst, flo); ++ num_pols = xdst->num_pols; ++ num_xfrms = xdst->num_xfrms; ++ pol_dead = 0; ++ for (i = 0; i < num_pols; i++) { ++ pols[i] = xdst->pols[i]; ++ pol_dead |= pols[i]->walk.dead; ++ } ++ if (pol_dead) { ++ dst_free(&xdst->u.dst); ++ xdst = NULL; ++ num_pols = 0; ++ num_xfrms = 0; ++ oldflo = NULL; ++ } ++ } ++ ++ /* Resolve policies to use if we couldn't get them from ++ * previous cache entry */ ++ if (xdst == NULL) { ++ num_pols = 1; ++ pols[0] = __xfrm_policy_lookup(net, fl, family, dir); ++ err = xfrm_expand_policies(fl, family, pols, ++ &num_pols, &num_xfrms); ++ if (err < 0) ++ goto inc_error; ++ if (num_pols == 0) ++ return NULL; ++ if (num_xfrms <= 0) ++ goto make_dummy_bundle; ++ } ++ ++ new_xdst = xfrm_resolve_and_create_bundle(pols, num_pols, fl, family, dst_orig); ++ if (IS_ERR(new_xdst)) { ++ err = PTR_ERR(new_xdst); ++ if (err != -EAGAIN) ++ goto error; ++ if (oldflo == NULL) ++ goto make_dummy_bundle; ++ dst_hold(&xdst->u.dst); ++ return oldflo; ++ } ++ ++ /* Kill the previous bundle */ ++ if (xdst) { ++ /* The policies were stolen for newly generated bundle */ ++ xdst->num_pols = 0; ++ dst_free(&xdst->u.dst); ++ } ++ ++ /* Flow cache does not have reference, it dst_free()'s, ++ * but we do need to return one reference for original caller */ ++ dst_hold(&new_xdst->u.dst); ++ return &new_xdst->flo; ++ ++make_dummy_bundle: ++ /* We found policies, but there's no bundles to instantiate: ++ * either because the policy blocks, has no transformations or ++ * we could not build template (no xfrm_states).*/ ++ xdst = xfrm_alloc_dst(family); ++ if (IS_ERR(xdst)) { ++ xfrm_pols_put(pols, num_pols); ++ return ERR_CAST(xdst); ++ } ++ xdst->num_pols = num_pols; ++ xdst->num_xfrms = num_xfrms; ++ memcpy(xdst->pols, pols, sizeof(struct xfrm_policy*) * num_pols); ++ ++ dst_hold(&xdst->u.dst); ++ return &xdst->flo; ++ ++inc_error: ++ XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTPOLERROR); ++error: ++ if (xdst != NULL) ++ dst_free(&xdst->u.dst); ++ else ++ xfrm_pols_put(pols, num_pols); ++ return ERR_PTR(err); ++} + + /* Main function: finds/creates a bundle for given flow. + * +@@ -1540,248 +1727,152 @@ static int stale_bundle(struct dst_entry *dst); + int __xfrm_lookup(struct net *net, struct dst_entry **dst_p, struct flowi *fl, + struct sock *sk, int flags) + { +- struct xfrm_policy *policy; + struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX]; +- int npols; +- int pol_dead; +- int xfrm_nr; +- int pi; +- struct xfrm_state *xfrm[XFRM_MAX_DEPTH]; +- struct dst_entry *dst, *dst_orig = *dst_p; +- int nx = 0; +- int err; +- u32 genid; +- u16 family; ++ struct flow_cache_object *flo; ++ struct xfrm_dst *xdst; ++ struct dst_entry *dst, *dst_orig = *dst_p, *route; ++ u16 family = dst_orig->ops->family; + u8 dir = policy_to_flow_dir(XFRM_POLICY_OUT); ++ int i, err, num_pols, num_xfrms, drop_pols = 0; + + restart: +- genid = atomic_read(&flow_cache_genid); +- policy = NULL; +- for (pi = 0; pi < ARRAY_SIZE(pols); pi++) +- pols[pi] = NULL; +- npols = 0; +- pol_dead = 0; +- xfrm_nr = 0; ++ dst = NULL; ++ xdst = NULL; ++ route = NULL; + + if (sk && sk->sk_policy[XFRM_POLICY_OUT]) { +- policy = xfrm_sk_policy_lookup(sk, XFRM_POLICY_OUT, fl); +- err = PTR_ERR(policy); +- if (IS_ERR(policy)) { +- XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTPOLERROR); ++ num_pols = 1; ++ pols[0] = xfrm_sk_policy_lookup(sk, XFRM_POLICY_OUT, fl); ++ err = xfrm_expand_policies(fl, family, pols, ++ &num_pols, &num_xfrms); ++ if (err < 0) + goto dropdst; ++ ++ if (num_pols) { ++ if (num_xfrms <= 0) { ++ drop_pols = num_pols; ++ goto no_transform; ++ } ++ ++ xdst = xfrm_resolve_and_create_bundle( ++ pols, num_pols, fl, ++ family, dst_orig); ++ if (IS_ERR(xdst)) { ++ xfrm_pols_put(pols, num_pols); ++ err = PTR_ERR(xdst); ++ goto dropdst; ++ } ++ ++ spin_lock_bh(&xfrm_policy_sk_bundle_lock); ++ xdst->u.dst.next = xfrm_policy_sk_bundles; ++ xfrm_policy_sk_bundles = &xdst->u.dst; ++ spin_unlock_bh(&xfrm_policy_sk_bundle_lock); ++ ++ route = xdst->route; + } + } + +- if (!policy) { +- struct flow_cache_object *flo; +- ++ if (xdst == NULL) { + /* To accelerate a bit... */ + if ((dst_orig->flags & DST_NOXFRM) || + !net->xfrm.policy_count[XFRM_POLICY_OUT]) + goto nopol; + +- flo = flow_cache_lookup(net, fl, dst_orig->ops->family, +- dir, xfrm_policy_lookup, NULL); +- err = PTR_ERR(flo); ++ flo = flow_cache_lookup(net, fl, family, dir, ++ xfrm_bundle_lookup, dst_orig); ++ if (flo == NULL) ++ goto nopol; + if (IS_ERR(flo)) { +- XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTPOLERROR); ++ err = PTR_ERR(flo); + goto dropdst; + } +- if (flo) +- policy = container_of(flo, struct xfrm_policy, flo); +- else +- policy = NULL; ++ xdst = container_of(flo, struct xfrm_dst, flo); ++ ++ num_pols = xdst->num_pols; ++ num_xfrms = xdst->num_xfrms; ++ memcpy(pols, xdst->pols, sizeof(struct xfrm_policy*) * num_pols); ++ route = xdst->route; ++ } ++ ++ dst = &xdst->u.dst; ++ if (route == NULL && num_xfrms > 0) { ++ /* The only case when xfrm_bundle_lookup() returns a ++ * bundle with null route, is when the template could ++ * not be resolved. It means policies are there, but ++ * bundle could not be created, since we don't yet ++ * have the xfrm_state's. We need to wait for KM to ++ * negotiate new SA's or bail out with error.*/ ++ if (net->xfrm.sysctl_larval_drop) { ++ /* EREMOTE tells the caller to generate ++ * a one-shot blackhole route. */ ++ dst_release(dst); ++ xfrm_pols_put(pols, num_pols); ++ XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTNOSTATES); ++ return -EREMOTE; ++ } ++ if (flags & XFRM_LOOKUP_WAIT) { ++ DECLARE_WAITQUEUE(wait, current); ++ ++ add_wait_queue(&net->xfrm.km_waitq, &wait); ++ set_current_state(TASK_INTERRUPTIBLE); ++ schedule(); ++ set_current_state(TASK_RUNNING); ++ remove_wait_queue(&net->xfrm.km_waitq, &wait); ++ ++ if (!signal_pending(current)) { ++ dst_release(dst); ++ goto restart; ++ } ++ ++ err = -ERESTART; ++ } else ++ err = -EAGAIN; ++ ++ XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTNOSTATES); ++ goto error; + } + +- if (!policy) ++no_transform: ++ if (num_pols == 0) + goto nopol; + +- family = dst_orig->ops->family; +- pols[0] = policy; +- npols ++; +- xfrm_nr += pols[0]->xfrm_nr; +- +- err = -ENOENT; +- if ((flags & XFRM_LOOKUP_ICMP) && !(policy->flags & XFRM_POLICY_ICMP)) ++ if ((flags & XFRM_LOOKUP_ICMP) && ++ !(pols[0]->flags & XFRM_POLICY_ICMP)) { ++ err = -ENOENT; + goto error; ++ } + +- policy->curlft.use_time = get_seconds(); ++ for (i = 0; i < num_pols; i++) ++ pols[i]->curlft.use_time = get_seconds(); + +- switch (policy->action) { +- default: +- case XFRM_POLICY_BLOCK: ++ if (num_xfrms < 0) { + /* Prohibit the flow */ + XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTPOLBLOCK); + err = -EPERM; + goto error; +- +- case XFRM_POLICY_ALLOW: +-#ifndef CONFIG_XFRM_SUB_POLICY +- if (policy->xfrm_nr == 0) { +- /* Flow passes not transformed. */ +- xfrm_pol_put(policy); +- return 0; +- } +-#endif +- +- /* Try to find matching bundle. +- * +- * LATER: help from flow cache. It is optional, this +- * is required only for output policy. +- */ +- dst = xfrm_find_bundle(fl, policy, family); +- if (IS_ERR(dst)) { +- XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTBUNDLECHECKERROR); +- err = PTR_ERR(dst); +- goto error; +- } +- +- if (dst) +- break; +- +-#ifdef CONFIG_XFRM_SUB_POLICY +- if (pols[0]->type != XFRM_POLICY_TYPE_MAIN) { +- pols[1] = xfrm_policy_lookup_bytype(net, +- XFRM_POLICY_TYPE_MAIN, +- fl, family, +- XFRM_POLICY_OUT); +- if (pols[1]) { +- if (IS_ERR(pols[1])) { +- XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTPOLERROR); +- err = PTR_ERR(pols[1]); +- goto error; +- } +- if (pols[1]->action == XFRM_POLICY_BLOCK) { +- XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTPOLBLOCK); +- err = -EPERM; +- goto error; +- } +- npols ++; +- xfrm_nr += pols[1]->xfrm_nr; +- } +- } +- +- /* +- * Because neither flowi nor bundle information knows about +- * transformation template size. On more than one policy usage +- * we can realize whether all of them is bypass or not after +- * they are searched. See above not-transformed bypass +- * is surrounded by non-sub policy configuration, too. +- */ +- if (xfrm_nr == 0) { +- /* Flow passes not transformed. */ +- xfrm_pols_put(pols, npols); +- return 0; +- } +- +-#endif +- nx = xfrm_tmpl_resolve(pols, npols, fl, xfrm, family); +- +- if (unlikely(nx<0)) { +- err = nx; +- if (err == -EAGAIN && net->xfrm.sysctl_larval_drop) { +- /* EREMOTE tells the caller to generate +- * a one-shot blackhole route. +- */ +- XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTNOSTATES); +- xfrm_pol_put(policy); +- return -EREMOTE; +- } +- if (err == -EAGAIN && (flags & XFRM_LOOKUP_WAIT)) { +- DECLARE_WAITQUEUE(wait, current); +- +- add_wait_queue(&net->xfrm.km_waitq, &wait); +- set_current_state(TASK_INTERRUPTIBLE); +- schedule(); +- set_current_state(TASK_RUNNING); +- remove_wait_queue(&net->xfrm.km_waitq, &wait); +- +- nx = xfrm_tmpl_resolve(pols, npols, fl, xfrm, family); +- +- if (nx == -EAGAIN && signal_pending(current)) { +- XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTNOSTATES); +- err = -ERESTART; +- goto error; +- } +- if (nx == -EAGAIN || +- genid != atomic_read(&flow_cache_genid)) { +- xfrm_pols_put(pols, npols); +- goto restart; +- } +- err = nx; +- } +- if (err < 0) { +- XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTNOSTATES); +- goto error; +- } +- } +- if (nx == 0) { +- /* Flow passes not transformed. */ +- xfrm_pols_put(pols, npols); +- return 0; +- } +- +- dst = xfrm_bundle_create(policy, xfrm, nx, fl, dst_orig); +- err = PTR_ERR(dst); +- if (IS_ERR(dst)) { +- XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTBUNDLEGENERROR); +- goto error; +- } +- +- for (pi = 0; pi < npols; pi++) +- pol_dead |= pols[pi]->walk.dead; +- +- write_lock_bh(&policy->lock); +- if (unlikely(pol_dead || stale_bundle(dst))) { +- /* Wow! While we worked on resolving, this +- * policy has gone. Retry. It is not paranoia, +- * we just cannot enlist new bundle to dead object. +- * We can't enlist stable bundles either. +- */ +- write_unlock_bh(&policy->lock); +- dst_free(dst); +- +- if (pol_dead) +- XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTPOLDEAD); +- else +- XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTBUNDLECHECKERROR); +- err = -EHOSTUNREACH; +- goto error; +- } +- +- if (npols > 1) +- err = xfrm_dst_update_parent(dst, &pols[1]->selector); +- else +- err = xfrm_dst_update_origin(dst, fl); +- if (unlikely(err)) { +- write_unlock_bh(&policy->lock); +- dst_free(dst); +- XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTBUNDLECHECKERROR); +- goto error; +- } +- +- dst->next = policy->bundles; +- policy->bundles = dst; +- dst_hold(dst); +- write_unlock_bh(&policy->lock); ++ } else if (num_xfrms > 0) { ++ /* Flow transformed */ ++ *dst_p = dst; ++ dst_release(dst_orig); ++ } else { ++ /* Flow passes untransformed */ ++ dst_release(dst); + } +- *dst_p = dst; +- dst_release(dst_orig); +- xfrm_pols_put(pols, npols); ++ok: ++ xfrm_pols_put(pols, drop_pols); + return 0; + ++nopol: ++ if (!(flags & XFRM_LOOKUP_ICMP)) ++ goto ok; ++ err = -ENOENT; + error: +- xfrm_pols_put(pols, npols); ++ dst_release(dst); + dropdst: + dst_release(dst_orig); + *dst_p = NULL; ++ xfrm_pols_put(pols, drop_pols); + return err; +- +-nopol: +- err = -ENOENT; +- if (flags & XFRM_LOOKUP_ICMP) +- goto dropdst; +- return 0; + } + EXPORT_SYMBOL(__xfrm_lookup); + +@@ -2134,71 +2225,24 @@ static struct dst_entry *xfrm_negative_advice(struct dst_entry *dst) + return dst; + } + +-static void prune_one_bundle(struct xfrm_policy *pol, int (*func)(struct dst_entry *), struct dst_entry **gc_list_p) +-{ +- struct dst_entry *dst, **dstp; +- +- write_lock(&pol->lock); +- dstp = &pol->bundles; +- while ((dst=*dstp) != NULL) { +- if (func(dst)) { +- *dstp = dst->next; +- dst->next = *gc_list_p; +- *gc_list_p = dst; +- } else { +- dstp = &dst->next; +- } +- } +- write_unlock(&pol->lock); +-} +- +-static void xfrm_prune_bundles(struct net *net, int (*func)(struct dst_entry *)) ++static void __xfrm_garbage_collect(struct net *net) + { +- struct dst_entry *gc_list = NULL; +- int dir; ++ struct dst_entry *head, *next; + +- read_lock_bh(&xfrm_policy_lock); +- for (dir = 0; dir < XFRM_POLICY_MAX * 2; dir++) { +- struct xfrm_policy *pol; +- struct hlist_node *entry; +- struct hlist_head *table; +- int i; ++ flow_cache_flush(); + +- hlist_for_each_entry(pol, entry, +- &net->xfrm.policy_inexact[dir], bydst) +- prune_one_bundle(pol, func, &gc_list); ++ spin_lock_bh(&xfrm_policy_sk_bundle_lock); ++ head = xfrm_policy_sk_bundles; ++ xfrm_policy_sk_bundles = NULL; ++ spin_unlock_bh(&xfrm_policy_sk_bundle_lock); + +- table = net->xfrm.policy_bydst[dir].table; +- for (i = net->xfrm.policy_bydst[dir].hmask; i >= 0; i--) { +- hlist_for_each_entry(pol, entry, table + i, bydst) +- prune_one_bundle(pol, func, &gc_list); +- } +- } +- read_unlock_bh(&xfrm_policy_lock); +- +- while (gc_list) { +- struct dst_entry *dst = gc_list; +- gc_list = dst->next; +- dst_free(dst); ++ while (head) { ++ next = head->next; ++ dst_free(head); ++ head = next; + } + } + +-static int unused_bundle(struct dst_entry *dst) +-{ +- return !atomic_read(&dst->__refcnt); +-} +- +-static void __xfrm_garbage_collect(struct net *net) +-{ +- xfrm_prune_bundles(net, unused_bundle); +-} +- +-static int xfrm_flush_bundles(struct net *net) +-{ +- xfrm_prune_bundles(net, stale_bundle); +- return 0; +-} +- + static void xfrm_init_pmtu(struct dst_entry *dst) + { + do { +@@ -2256,7 +2300,9 @@ int xfrm_bundle_ok(struct xfrm_policy *pol, struct xfrm_dst *first, + return 0; + if (dst->xfrm->km.state != XFRM_STATE_VALID) + return 0; +- if (xdst->genid != dst->xfrm->genid) ++ if (xdst->xfrm_genid != dst->xfrm->genid) ++ return 0; ++ if (xdst->policy_genid != atomic_read(&xdst->pols[0]->genid)) + return 0; + + if (strict && fl && +@@ -2383,7 +2429,7 @@ static int xfrm_dev_event(struct notifier_block *this, unsigned long event, void + + switch (event) { + case NETDEV_DOWN: +- xfrm_flush_bundles(dev_net(dev)); ++ __xfrm_garbage_collect(dev_net(dev)); + } + return NOTIFY_DONE; + } +@@ -2714,7 +2760,6 @@ static int xfrm_policy_migrate(struct xfrm_policy *pol, + struct xfrm_migrate *m, int num_migrate) + { + struct xfrm_migrate *mp; +- struct dst_entry *dst; + int i, j, n = 0; + + write_lock_bh(&pol->lock); +@@ -2739,10 +2784,7 @@ static int xfrm_policy_migrate(struct xfrm_policy *pol, + sizeof(pol->xfrm_vec[i].saddr)); + pol->xfrm_vec[i].encap_family = mp->new_family; + /* flush bundles */ +- while ((dst = pol->bundles) != NULL) { +- pol->bundles = dst->next; +- dst_free(dst); +- } ++ atomic_inc(&pol->genid); + } + } + +-- +1.7.0.2 + |