From ac9759a53270bfdb26ff73226209ee290320c4d5 Mon Sep 17 00:00:00 2001 From: Tobias Brunner Date: Wed, 21 Sep 2016 10:16:00 +0200 Subject: kernel-netlink: Support configuring XFRM policy hashing thresholds If the number of flows over a gateway exceeds the flow cache size of the Linux kernel, policy lookup gets very expensive. Policies covering more than a single address don't get hash-indexed by default, which results in wasting most of the cycles in xfrm_policy_lookup_bytype() and its xfrm_policy_match() use. Starting with several hundred policies the overhead gets inacceptable. Starting with Linux 3.18, Linux can hash the first n-bit of a policy subnet to perform indexed lookup. With correctly chosen netbits, this can completely eliminate the performance impact of policy lookups, freeing the resources for ESP crypto. WARNING: Due to a bug in kernels 3.19 through 4.7, the kernel crashes with a NULL pointer dereference if a socket policy is installed while hash thresholds are changed. And because the hashtable rebuild triggered by the threshold change that causes this is scheduled it might also happen if the socket policies are seemingly installed after setting the thresholds. The fix for this bug - 6916fb3b10b3 ("xfrm: Ignore socket policies when rebuilding hash tables") - is included since 4.8 (and might get backported). As a workaround `charon.plugins.kernel-netlink.port_bypass` may be enabled to replace the socket policies that allow IKE traffic with port specific bypass policies. --- conf/plugins/kernel-netlink.opt | 29 ++++++ .../plugins/kernel_netlink/kernel_netlink_ipsec.c | 107 +++++++++++++++++++++ 2 files changed, 136 insertions(+) diff --git a/conf/plugins/kernel-netlink.opt b/conf/plugins/kernel-netlink.opt index 0d465f607..77ba6ea97 100644 --- a/conf/plugins/kernel-netlink.opt +++ b/conf/plugins/kernel-netlink.opt @@ -51,6 +51,35 @@ charon.plugins.kernel-netlink.set_proto_port_transport_sa = no traffic, it also prevents the use of a single IPsec SA by more than one traffic selector. +charon.plugins.kernel-netlink.spdh_thresh {} + XFRM policy hashing threshold configuration for IPv4 and IPv6. + + XFRM policy hashing threshold configuration for IPv4 and IPv6. + + The section defines hashing thresholds to configure in the kernel during + daemon startup. Each address family takes a threshold for the local subnet + of an IPsec policy (src in out-policies, dst in in- and forward-policies) + and the remote subnet (dst in out-policies, src in in- and + forward-policies). + + If the subnet has more or equal net bits than the threshold, the first + threshold bits are used to calculate a hash to lookup the policy. + + Policy hashing thresholds are not supported before Linux 3.18 and might + conflict with socket policies before Linux 4.8. + +charon.plugins.kernel-netlink.spdh_thresh.ipv4.lbits = 32 + Local subnet XFRM policy hashing threshold for IPv4. + +charon.plugins.kernel-netlink.spdh_thresh.ipv4.rbits = 32 + Remote subnet XFRM policy hashing threshold for IPv4. + +charon.plugins.kernel-netlink.spdh_thresh.ipv6.lbits = 128 + Local subnet XFRM policy hashing threshold for IPv6. + +charon.plugins.kernel-netlink.spdh_thresh.ipv6.rbits = 128 + Remote subnet XFRM policy hashing threshold for IPv6. + charon.plugins.kernel-netlink.retries = 0 Number of Netlink message retransmissions to send on timeout. diff --git a/src/libcharon/plugins/kernel_netlink/kernel_netlink_ipsec.c b/src/libcharon/plugins/kernel_netlink/kernel_netlink_ipsec.c index c6803429a..6b06c269b 100644 --- a/src/libcharon/plugins/kernel_netlink/kernel_netlink_ipsec.c +++ b/src/libcharon/plugins/kernel_netlink/kernel_netlink_ipsec.c @@ -3046,6 +3046,110 @@ METHOD(kernel_ipsec_t, destroy, void, free(this); } +/** + * Get the currently configured SPD hashing thresholds for an address family + */ +static bool get_spd_hash_thresh(private_kernel_netlink_ipsec_t *this, + int type, uint8_t *lbits, uint8_t *rbits) +{ + netlink_buf_t request; + struct nlmsghdr *hdr, *out; + struct xfrmu_spdhthresh *thresh; + struct rtattr *rta; + size_t len, rtasize; + bool success = FALSE; + + memset(&request, 0, sizeof(request)); + + hdr = &request.hdr; + hdr->nlmsg_flags = NLM_F_REQUEST; + hdr->nlmsg_type = XFRM_MSG_GETSPDINFO; + hdr->nlmsg_len = NLMSG_LENGTH(sizeof(uint32_t)); + + if (this->socket_xfrm->send(this->socket_xfrm, hdr, &out, &len) == SUCCESS) + { + hdr = out; + while (NLMSG_OK(hdr, len)) + { + switch (hdr->nlmsg_type) + { + case XFRM_MSG_NEWSPDINFO: + { + rta = XFRM_RTA(hdr, uint32_t); + rtasize = XFRM_PAYLOAD(hdr, uint32_t); + while (RTA_OK(rta, rtasize)) + { + if (rta->rta_type == type && + RTA_PAYLOAD(rta) == sizeof(*thresh)) + { + thresh = RTA_DATA(rta); + *lbits = thresh->lbits; + *rbits = thresh->rbits; + success = TRUE; + break; + } + rta = RTA_NEXT(rta, rtasize); + } + break; + } + case NLMSG_ERROR: + { + struct nlmsgerr *err = NLMSG_DATA(hdr); + DBG1(DBG_KNL, "getting SPD hash threshold failed: %s (%d)", + strerror(-err->error), -err->error); + break; + } + default: + hdr = NLMSG_NEXT(hdr, len); + continue; + case NLMSG_DONE: + break; + } + break; + } + free(out); + } + return success; +} + +/** + * Configure SPD hashing threshold for an address family + */ +static void setup_spd_hash_thresh(private_kernel_netlink_ipsec_t *this, + char *key, int type, uint8_t def) +{ + struct xfrmu_spdhthresh *thresh; + struct nlmsghdr *hdr; + netlink_buf_t request; + uint8_t lbits, rbits; + + if (!get_spd_hash_thresh(this, type, &lbits, &rbits)) + { + return; + } + memset(&request, 0, sizeof(request)); + + hdr = &request.hdr; + hdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK; + hdr->nlmsg_type = XFRM_MSG_NEWSPDINFO; + hdr->nlmsg_len = NLMSG_LENGTH(sizeof(uint32_t)); + + thresh = netlink_reserve(hdr, sizeof(request), type, sizeof(*thresh)); + thresh->lbits = lib->settings->get_int(lib->settings, + "%s.plugins.kernel-netlink.spdh_thresh.%s.lbits", + def, lib->ns, key); + thresh->rbits = lib->settings->get_int(lib->settings, + "%s.plugins.kernel-netlink.spdh_thresh.%s.rbits", + def, lib->ns, key); + if (thresh->lbits != lbits || thresh->rbits != rbits) + { + if (this->socket_xfrm->send_ack(this->socket_xfrm, hdr) != SUCCESS) + { + DBG1(DBG_KNL, "setting SPD hash threshold failed"); + } + } +} + /* * Described in header. */ @@ -3116,6 +3220,9 @@ kernel_netlink_ipsec_t *kernel_netlink_ipsec_create() return NULL; } + setup_spd_hash_thresh(this, "ipv4", XFRMA_SPD_IPV4_HTHRESH, 32); + setup_spd_hash_thresh(this, "ipv6", XFRMA_SPD_IPV6_HTHRESH, 128); + if (register_for_events) { struct sockaddr_nl addr; -- cgit v1.2.3