From f572b388167f08a989b7fd74095a93b3ce401f90 Mon Sep 17 00:00:00 2001 From: Martin Willi Date: Wed, 9 Jul 2014 15:08:28 +0200 Subject: kernel-netlink: Add a stub for a test-runner --- src/libhydra/plugins/kernel_netlink/.gitignore | 1 + src/libhydra/plugins/kernel_netlink/Makefile.am | 19 +++++++++ src/libhydra/plugins/kernel_netlink/tests.c | 51 +++++++++++++++++++++++++ src/libhydra/plugins/kernel_netlink/tests.h | 14 +++++++ 4 files changed, 85 insertions(+) create mode 100644 src/libhydra/plugins/kernel_netlink/.gitignore create mode 100644 src/libhydra/plugins/kernel_netlink/tests.c create mode 100644 src/libhydra/plugins/kernel_netlink/tests.h (limited to 'src') diff --git a/src/libhydra/plugins/kernel_netlink/.gitignore b/src/libhydra/plugins/kernel_netlink/.gitignore new file mode 100644 index 000000000..2b29f2764 --- /dev/null +++ b/src/libhydra/plugins/kernel_netlink/.gitignore @@ -0,0 +1 @@ +tests diff --git a/src/libhydra/plugins/kernel_netlink/Makefile.am b/src/libhydra/plugins/kernel_netlink/Makefile.am index c91f9a9e4..daa9af8e6 100644 --- a/src/libhydra/plugins/kernel_netlink/Makefile.am +++ b/src/libhydra/plugins/kernel_netlink/Makefile.am @@ -21,3 +21,22 @@ libstrongswan_kernel_netlink_la_SOURCES = \ kernel_netlink_shared.h kernel_netlink_shared.c libstrongswan_kernel_netlink_la_LDFLAGS = -module -avoid-version + + +TESTS = tests + +check_PROGRAMS = $(TESTS) + +tests_SOURCES = \ + tests.h tests.c \ + kernel_netlink_shared.c + +tests_CFLAGS = \ + -I$(top_srcdir)/src/libstrongswan \ + -I$(top_srcdir)/src/libstrongswan/tests \ + @COVERAGE_CFLAGS@ + +tests_LDFLAGS = @COVERAGE_LDFLAGS@ +tests_LDADD = \ + $(top_builddir)/src/libstrongswan/libstrongswan.la \ + $(top_builddir)/src/libstrongswan/tests/libtest.la diff --git a/src/libhydra/plugins/kernel_netlink/tests.c b/src/libhydra/plugins/kernel_netlink/tests.c new file mode 100644 index 000000000..136b34d29 --- /dev/null +++ b/src/libhydra/plugins/kernel_netlink/tests.c @@ -0,0 +1,51 @@ +/* + * Copyright (C) 2014 Martin Willi + * Copyright (C) 2014 revosec AG + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. See . + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + */ + +#include + +#include + +/* declare test suite constructors */ +#define TEST_SUITE(x) test_suite_t* x(); +#include "tests.h" +#undef TEST_SUITE + +static test_configuration_t tests[] = { +#define TEST_SUITE(x) \ + { .suite = x, }, +#include "tests.h" + { .suite = NULL, } +}; + +static bool test_runner_init(bool init) +{ + if (init) + { + dbg_default_set_level(0); + lib->processor->set_threads(lib->processor, 8); + dbg_default_set_level(1); + } + else + { + lib->processor->set_threads(lib->processor, 0); + lib->processor->cancel(lib->processor); + } + return TRUE; +} + +int main(int argc, char *argv[]) +{ + return test_runner_run("kernel-netlink", tests, test_runner_init); +} diff --git a/src/libhydra/plugins/kernel_netlink/tests.h b/src/libhydra/plugins/kernel_netlink/tests.h new file mode 100644 index 000000000..05502f18e --- /dev/null +++ b/src/libhydra/plugins/kernel_netlink/tests.h @@ -0,0 +1,14 @@ +/* + * Copyright (C) 2014 Martin Willi + * Copyright (C) 2014 revosec AG + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. See . + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + */ -- cgit v1.2.3 From 02794456a9d9d81df238c7fac8aaff4dbc4c39de Mon Sep 17 00:00:00 2001 From: Martin Willi Date: Wed, 9 Jul 2014 15:26:30 +0200 Subject: kernel-netlink: Add a simple send message test querying available links --- src/libhydra/plugins/kernel_netlink/Makefile.am | 1 + .../plugins/kernel_netlink/suites/test_socket.c | 67 ++++++++++++++++++++++ src/libhydra/plugins/kernel_netlink/tests.h | 2 + 3 files changed, 70 insertions(+) create mode 100644 src/libhydra/plugins/kernel_netlink/suites/test_socket.c (limited to 'src') diff --git a/src/libhydra/plugins/kernel_netlink/Makefile.am b/src/libhydra/plugins/kernel_netlink/Makefile.am index daa9af8e6..aeffaad92 100644 --- a/src/libhydra/plugins/kernel_netlink/Makefile.am +++ b/src/libhydra/plugins/kernel_netlink/Makefile.am @@ -29,6 +29,7 @@ check_PROGRAMS = $(TESTS) tests_SOURCES = \ tests.h tests.c \ + suites/test_socket.c \ kernel_netlink_shared.c tests_CFLAGS = \ diff --git a/src/libhydra/plugins/kernel_netlink/suites/test_socket.c b/src/libhydra/plugins/kernel_netlink/suites/test_socket.c new file mode 100644 index 000000000..2ff8a9913 --- /dev/null +++ b/src/libhydra/plugins/kernel_netlink/suites/test_socket.c @@ -0,0 +1,67 @@ +/* + * Copyright (C) 2014 Martin Willi + * Copyright (C) 2014 revosec AG + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. See . + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + */ + +#include + +#include "../kernel_netlink_shared.h" + +START_TEST(test_echo) +{ + netlink_socket_t *s; + struct nlmsghdr *out, *current; + struct rtgenmsg *msg; + size_t len; + netlink_buf_t request = { + .hdr = { + .nlmsg_len = NLMSG_LENGTH(sizeof(struct rtgenmsg)), + .nlmsg_flags = NLM_F_REQUEST | NLM_F_MATCH | NLM_F_ROOT, + .nlmsg_type = RTM_GETLINK, + }, + }; + + s = netlink_socket_create(NETLINK_ROUTE, NULL); + msg = NLMSG_DATA(&request.hdr); + msg->rtgen_family = AF_UNSPEC; + + ck_assert(s->send(s, &request.hdr, &out, &len) == SUCCESS); + current = out; + while (TRUE) + { + ck_assert(NLMSG_OK(current, len)); + if (current->nlmsg_type == NLMSG_DONE) + { + break; + } + ck_assert_int_eq(current->nlmsg_type, RTM_NEWLINK); + current = NLMSG_NEXT(current, len); + } + free(out); + s->destroy(s); +} +END_TEST + +Suite *socket_suite_create() +{ + Suite *s; + TCase *tc; + + s = suite_create("netlink socket"); + + tc = tcase_create("echo"); + tcase_add_test(tc, test_echo); + suite_add_tcase(s, tc); + + return s; +} diff --git a/src/libhydra/plugins/kernel_netlink/tests.h b/src/libhydra/plugins/kernel_netlink/tests.h index 05502f18e..2b6715a78 100644 --- a/src/libhydra/plugins/kernel_netlink/tests.h +++ b/src/libhydra/plugins/kernel_netlink/tests.h @@ -12,3 +12,5 @@ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License * for more details. */ + +TEST_SUITE(socket_suite_create) -- cgit v1.2.3 From 3c7193f114e9dbb4f725aaf9e050fb132d9f2600 Mon Sep 17 00:00:00 2001 From: Martin Willi Date: Thu, 10 Jul 2014 16:28:44 +0200 Subject: kernel-netlink: Support parallel Netlink queries Instead of locking the socket exclusively to wait for replies, use watcher to wait for and read in responses asynchronously. This allows multiple parallel Netlink queries, which can significantly improve performance if the kernel Netlink layer has longer latencies and supports parallel queries. For vanilla Linux, parallel queries don't make much sense, as it usually returns EBUSY for the relevant dump requests. This requires a retry, and in the end makes queries more expensive under high load. Instead of checking the Netlink message sequence number to detect multi-part messages, this code now relies on the NLM_F_MULTI flag to detect them. This has previously been avoided (by 1d51abb7). It is unclear if the flag did not work correctly on very old Linux kernels, or if the flag was not used appropriately by strongSwan. The flag seems to work just fine back to 2.6.18, which is a kernel still in use by RedHat/CentOS 5. --- .../plugins/kernel_netlink/kernel_netlink_shared.c | 266 +++++++++++++++------ 1 file changed, 192 insertions(+), 74 deletions(-) (limited to 'src') diff --git a/src/libhydra/plugins/kernel_netlink/kernel_netlink_shared.c b/src/libhydra/plugins/kernel_netlink/kernel_netlink_shared.c index b4cece720..6e1dd8cf0 100644 --- a/src/libhydra/plugins/kernel_netlink/kernel_netlink_shared.c +++ b/src/libhydra/plugins/kernel_netlink/kernel_netlink_shared.c @@ -1,4 +1,6 @@ /* + * Copyright (C) 2014 Martin Willi + * Copyright (C) 2014 revosec AG * Copyright (C) 2008 Tobias Brunner * Hochschule fuer Technik Rapperswil * @@ -23,6 +25,9 @@ #include #include +#include +#include +#include typedef struct private_netlink_socket_t private_netlink_socket_t; @@ -30,20 +35,26 @@ typedef struct private_netlink_socket_t private_netlink_socket_t; * Private variables and functions of netlink_socket_t class. */ struct private_netlink_socket_t { + /** * public part of the netlink_socket_t object. */ netlink_socket_t public; /** - * mutex to lock access to netlink socket + * mutex to lock access entries */ mutex_t *mutex; /** - * current sequence number for netlink request + * Netlink request entries currently active, uintptr_t seq => entry_t + */ + hashtable_t *entries; + + /** + * Current sequence number for Netlink requests */ - int seq; + refcount_t seq; /** * netlink socket @@ -57,110 +68,212 @@ struct private_netlink_socket_t { }; /** - * Imported from kernel_netlink_ipsec.c + * Request entry the answer for a waiting thread is collected in */ -extern enum_name_t *xfrm_msg_names; +typedef struct { + /** Condition variable thread is waiting */ + condvar_t *condvar; + /** Array of hdrs in a multi-message response, as struct nlmsghdr* */ + array_t *hdrs; + /** All response messages received? */ + bool complete; +} entry_t; -METHOD(netlink_socket_t, netlink_send, status_t, - private_netlink_socket_t *this, struct nlmsghdr *in, struct nlmsghdr **out, - size_t *out_len) +/** + * Clean up a thread waiting entry + */ +static void destroy_entry(entry_t *entry) { - union { - struct nlmsghdr hdr; - u_char bytes[4096]; - } response; - struct sockaddr_nl addr; - chunk_t result = chunk_empty; - int len; - - this->mutex->lock(this->mutex); - - in->nlmsg_seq = ++this->seq; - in->nlmsg_pid = getpid(); + entry->condvar->destroy(entry->condvar); + array_destroy_function(entry->hdrs, (void*)free, NULL); + free(entry); +} - memset(&addr, 0, sizeof(addr)); - addr.nl_family = AF_NETLINK; - addr.nl_pid = 0; - addr.nl_groups = 0; +/** + * Write a Netlink message to socket + */ +static bool write_msg(private_netlink_socket_t *this, struct nlmsghdr *msg) +{ + struct sockaddr_nl addr = { + .nl_family = AF_NETLINK, + }; + int len; - if (this->names) - { - DBG3(DBG_KNL, "sending %N: %b", - this->names, in->nlmsg_type, in, in->nlmsg_len); - } while (TRUE) { - len = sendto(this->socket, in, in->nlmsg_len, 0, + len = sendto(this->socket, msg, msg->nlmsg_len, 0, (struct sockaddr*)&addr, sizeof(addr)); - - if (len != in->nlmsg_len) + if (len != msg->nlmsg_len) { if (errno == EINTR) { - /* interrupted, try again */ continue; } - this->mutex->unlock(this->mutex); - DBG1(DBG_KNL, "error sending to netlink socket: %s", strerror(errno)); - return FAILED; + DBG1(DBG_KNL, "netlink write error: %s", strerror(errno)); + return FALSE; } - break; + return TRUE; } +} - while (TRUE) +/** + * Read a single Netlink message from socket + */ +static size_t read_msg(private_netlink_socket_t *this, + char buf[4096], size_t buflen, bool block) +{ + ssize_t len; + + len = recv(this->socket, buf, buflen, block ? 0 : MSG_DONTWAIT); + if (len == buflen) + { + DBG1(DBG_KNL, "netlink response exceeds buffer size"); + return 0; + } + if (len < 0) { - len = recv(this->socket, &response, sizeof(response), 0); - if (len < 0) + if (errno != EAGAIN && errno != EWOULDBLOCK && errno != EINTR) { - if (errno == EINTR) - { - DBG1(DBG_KNL, "got interrupted"); - /* interrupted, try again */ - continue; - } - DBG1(DBG_KNL, "error reading from netlink socket: %s", strerror(errno)); - this->mutex->unlock(this->mutex); - free(result.ptr); - return FAILED; + DBG1(DBG_KNL, "netlink read error: %s", strerror(errno)); } - if (!NLMSG_OK(&response.hdr, len)) + return 0; + } + return len; +} + +/** + * Queue received response message + */ +static bool queue(private_netlink_socket_t *this, struct nlmsghdr *buf) +{ + struct nlmsghdr *hdr; + entry_t *entry; + uintptr_t seq; + + seq = (uintptr_t)buf->nlmsg_seq; + + this->mutex->lock(this->mutex); + entry = this->entries->get(this->entries, (void*)seq); + if (entry) + { + hdr = malloc(buf->nlmsg_len); + memcpy(hdr, buf, buf->nlmsg_len); + array_insert(entry->hdrs, ARRAY_TAIL, hdr); + if (hdr->nlmsg_type == NLMSG_DONE || !(hdr->nlmsg_flags & NLM_F_MULTI)) { - DBG1(DBG_KNL, "received corrupted netlink message"); - this->mutex->unlock(this->mutex); - free(result.ptr); - return FAILED; + entry->complete = TRUE; + entry->condvar->signal(entry->condvar); } - if (response.hdr.nlmsg_seq != this->seq) + } + else + { + DBG1(DBG_KNL, "received unknown netlink seq %u, ignored", seq); + } + this->mutex->unlock(this->mutex); + + return entry != NULL; +} + +/** + * Read and queue response message, optionally blocking + */ +static void read_and_queue(private_netlink_socket_t *this, bool block) +{ + struct nlmsghdr *hdr; + union { + struct nlmsghdr hdr; + char bytes[4096]; + } buf; + size_t len; + + len = read_msg(this, buf.bytes, sizeof(buf.bytes), block); + if (len) + { + hdr = &buf.hdr; + while (NLMSG_OK(hdr, len)) { - DBG1(DBG_KNL, "received invalid netlink sequence number"); - if (response.hdr.nlmsg_seq < this->seq) + if (!queue(this, hdr)) { - continue; + break; } - this->mutex->unlock(this->mutex); - free(result.ptr); - return FAILED; + hdr = NLMSG_NEXT(hdr, len); + } + } +} + +CALLBACK(watch, bool, + private_netlink_socket_t *this, int fd, watcher_event_t event) +{ + if (event == WATCHER_READ) + { + read_and_queue(this, FALSE); + } + return TRUE; +} + +METHOD(netlink_socket_t, netlink_send, status_t, + private_netlink_socket_t *this, struct nlmsghdr *in, struct nlmsghdr **out, + size_t *out_len) +{ + struct nlmsghdr *hdr; + chunk_t result = {}; + entry_t *entry; + uintptr_t seq; + + seq = ref_get(&this->seq); + in->nlmsg_seq = seq; + in->nlmsg_pid = getpid(); + + if (this->names) + { + DBG3(DBG_KNL, "sending %N %u: %b", this->names, in->nlmsg_type, + (u_int)seq, in, in->nlmsg_len); + } + + this->mutex->lock(this->mutex); + if (!write_msg(this, in)) + { + this->mutex->unlock(this->mutex); + return FAILED; + } + + INIT(entry, + .condvar = condvar_create(CONDVAR_TYPE_DEFAULT), + .hdrs = array_create(0, 0), + ); + this->entries->put(this->entries, (void*)seq, entry); + + while (!entry->complete) + { + if (lib->watcher->get_state(lib->watcher) == WATCHER_RUNNING) + { + entry->condvar->wait(entry->condvar, this->mutex); + } + else + { /* During (de-)initialization, no watcher thread is active. + * collect responses ourselves. */ + read_and_queue(this, TRUE); } + } + this->entries->remove(this->entries, (void*)seq); - result = chunk_cat("mc", result, chunk_create(response.bytes, len)); + this->mutex->unlock(this->mutex); - /* NLM_F_MULTI flag does not seem to be set correctly, we use sequence - * numbers to detect multi header messages */ - len = recv(this->socket, &response.hdr, sizeof(response.hdr), - MSG_PEEK | MSG_DONTWAIT); - if (len == sizeof(response.hdr) && response.hdr.nlmsg_seq == this->seq) + while (array_remove(entry->hdrs, ARRAY_HEAD, &hdr)) + { + if (this->names) { - /* seems to be multipart */ - continue; + DBG3(DBG_KNL, "received %N %u: %b", this->names, hdr->nlmsg_type, + hdr->nlmsg_seq, hdr, hdr->nlmsg_len); } - break; + result = chunk_cat("mm", result, + chunk_create((char*)hdr, hdr->nlmsg_len)); } + destroy_entry(entry); *out_len = result.len; *out = (struct nlmsghdr*)result.ptr; - this->mutex->unlock(this->mutex); - return SUCCESS; } @@ -221,8 +334,10 @@ METHOD(netlink_socket_t, destroy, void, { if (this->socket != -1) { + lib->watcher->remove(lib->watcher, this->socket); close(this->socket); } + this->entries->destroy(this->entries); this->mutex->destroy(this->mutex); free(this); } @@ -244,8 +359,9 @@ netlink_socket_t *netlink_socket_create(int protocol, enum_name_t *names) .destroy = _destroy, }, .seq = 200, - .mutex = mutex_create(MUTEX_TYPE_DEFAULT), + .mutex = mutex_create(MUTEX_TYPE_RECURSIVE), .socket = socket(AF_NETLINK, SOCK_RAW, protocol), + .entries = hashtable_create(hashtable_hash_ptr, hashtable_equals_ptr, 4), .names = names, ); @@ -262,6 +378,8 @@ netlink_socket_t *netlink_socket_create(int protocol, enum_name_t *names) return NULL; } + lib->watcher->add(lib->watcher, this->socket, WATCHER_READ, watch, this); + return &this->public; } -- cgit v1.2.3 From 84f6853c42e4d9658c17f2a76e8586f1f7f8929b Mon Sep 17 00:00:00 2001 From: Martin Willi Date: Mon, 14 Jul 2014 16:50:07 +0200 Subject: kernel-netlink: Retry netlink query while kernel returns EBUSY If the kernel can't execute a Netlink query because a different query is already active, it returns EBUSY. As this can happen now as we support parallel queries, retry on this error condition. --- .../plugins/kernel_netlink/kernel_netlink_shared.c | 40 ++++++++++++++++++++-- 1 file changed, 37 insertions(+), 3 deletions(-) (limited to 'src') diff --git a/src/libhydra/plugins/kernel_netlink/kernel_netlink_shared.c b/src/libhydra/plugins/kernel_netlink/kernel_netlink_shared.c index 6e1dd8cf0..9c2e34f82 100644 --- a/src/libhydra/plugins/kernel_netlink/kernel_netlink_shared.c +++ b/src/libhydra/plugins/kernel_netlink/kernel_netlink_shared.c @@ -211,9 +211,11 @@ CALLBACK(watch, bool, return TRUE; } -METHOD(netlink_socket_t, netlink_send, status_t, - private_netlink_socket_t *this, struct nlmsghdr *in, struct nlmsghdr **out, - size_t *out_len) +/** + * Send a netlink request, try once + */ +static status_t send_once(private_netlink_socket_t *this, struct nlmsghdr *in, + struct nlmsghdr **out, size_t *out_len) { struct nlmsghdr *hdr; chunk_t result = {}; @@ -277,6 +279,38 @@ METHOD(netlink_socket_t, netlink_send, status_t, return SUCCESS; } +METHOD(netlink_socket_t, netlink_send, status_t, + private_netlink_socket_t *this, struct nlmsghdr *in, struct nlmsghdr **out, + size_t *out_len) +{ + while (TRUE) + { + struct nlmsghdr *hdr; + status_t status; + size_t len; + + status = send_once(this, in, &hdr, &len); + if (status != SUCCESS) + { + return status; + } + if (hdr->nlmsg_type == NLMSG_ERROR) + { + struct nlmsgerr* err; + + err = NLMSG_DATA(hdr); + if (err->error == -EBUSY) + { + free(hdr); + continue; + } + } + *out = hdr; + *out_len = len; + return SUCCESS; + } +} + METHOD(netlink_socket_t, netlink_send_ack, status_t, private_netlink_socket_t *this, struct nlmsghdr *in) { -- cgit v1.2.3 From aa762bed7b3aad4756e3f8563c364f1f75ccf40f Mon Sep 17 00:00:00 2001 From: Martin Willi Date: Mon, 14 Jul 2014 16:56:30 +0200 Subject: kernel-netlink: Add a stress test with several threads doing Netlink exchanges --- .../plugins/kernel_netlink/suites/test_socket.c | 62 ++++++++++++++++++++++ 1 file changed, 62 insertions(+) (limited to 'src') diff --git a/src/libhydra/plugins/kernel_netlink/suites/test_socket.c b/src/libhydra/plugins/kernel_netlink/suites/test_socket.c index 2ff8a9913..415a93649 100644 --- a/src/libhydra/plugins/kernel_netlink/suites/test_socket.c +++ b/src/libhydra/plugins/kernel_netlink/suites/test_socket.c @@ -15,6 +15,8 @@ #include +#include + #include "../kernel_netlink_shared.h" START_TEST(test_echo) @@ -52,6 +54,62 @@ START_TEST(test_echo) } END_TEST +CALLBACK(stress, void*, + netlink_socket_t *s) +{ + struct nlmsghdr *out, *current; + struct rtgenmsg *msg; + size_t len; + int i; + netlink_buf_t request = { + .hdr = { + .nlmsg_len = NLMSG_LENGTH(sizeof(struct rtgenmsg)), + .nlmsg_flags = NLM_F_REQUEST | NLM_F_MATCH | NLM_F_ROOT, + .nlmsg_type = RTM_GETLINK, + }, + }; + + msg = NLMSG_DATA(&request.hdr); + msg->rtgen_family = AF_UNSPEC; + + for (i = 0; i < 10; i++) + { + ck_assert(s->send(s, &request.hdr, &out, &len) == SUCCESS); + current = out; + while (TRUE) + { + ck_assert(NLMSG_OK(current, len)); + if (current->nlmsg_type == NLMSG_DONE) + { + break; + } + ck_assert_int_eq(current->nlmsg_type, RTM_NEWLINK); + current = NLMSG_NEXT(current, len); + } + free(out); + } + return NULL; +} + +START_TEST(test_stress) +{ + thread_t *threads[10]; + netlink_socket_t *s; + int i; + + s = netlink_socket_create(NETLINK_ROUTE, NULL); + for (i = 0; i < countof(threads); i++) + { + threads[i] = thread_create(stress, s); + } + for (i = 0; i < countof(threads); i++) + { + threads[i]->join(threads[i]); + } + s->destroy(s); +} +END_TEST + Suite *socket_suite_create() { Suite *s; @@ -63,5 +121,9 @@ Suite *socket_suite_create() tcase_add_test(tc, test_echo); suite_add_tcase(s, tc); + tc = tcase_create("stress"); + tcase_add_test(tc, test_stress); + suite_add_tcase(s, tc); + return s; } -- cgit v1.2.3 From 15dc61757c2f4650469adcbec2626d01a671a69a Mon Sep 17 00:00:00 2001 From: Martin Willi Date: Mon, 14 Jul 2014 17:17:38 +0200 Subject: kernel-netlink: Implement configurable Netlink request retransmission --- .../plugins/kernel_netlink/kernel_netlink_shared.c | 101 ++++++++++++++++++--- 1 file changed, 86 insertions(+), 15 deletions(-) (limited to 'src') diff --git a/src/libhydra/plugins/kernel_netlink/kernel_netlink_shared.c b/src/libhydra/plugins/kernel_netlink/kernel_netlink_shared.c index 9c2e34f82..8f49e03d1 100644 --- a/src/libhydra/plugins/kernel_netlink/kernel_netlink_shared.c +++ b/src/libhydra/plugins/kernel_netlink/kernel_netlink_shared.c @@ -65,6 +65,16 @@ struct private_netlink_socket_t { * Enum names for Netlink messages */ enum_name_t *names; + + /** + * Timeout for Netlink replies, in ms + */ + u_int timeout; + + /** + * Number of times to repeat timed out queries + */ + u_int retries; }; /** @@ -117,13 +127,28 @@ static bool write_msg(private_netlink_socket_t *this, struct nlmsghdr *msg) } /** - * Read a single Netlink message from socket + * Read a single Netlink message from socket, return 0 on error, -1 on timeout */ -static size_t read_msg(private_netlink_socket_t *this, - char buf[4096], size_t buflen, bool block) +static ssize_t read_msg(private_netlink_socket_t *this, + char buf[4096], size_t buflen, bool block) { ssize_t len; + if (block) + { + fd_set set; + timeval_t tv = {}; + + FD_ZERO(&set); + FD_SET(this->socket, &set); + timeval_add_ms(&tv, this->timeout); + + if (select(this->socket + 1, &set, NULL, NULL, + this->timeout ? &tv : NULL) <= 0) + { + return -1; + } + } len = recv(this->socket, buf, buflen, block ? 0 : MSG_DONTWAIT); if (len == buflen) { @@ -175,18 +200,22 @@ static bool queue(private_netlink_socket_t *this, struct nlmsghdr *buf) } /** - * Read and queue response message, optionally blocking + * Read and queue response message, optionally blocking, returns TRUE on timeout */ -static void read_and_queue(private_netlink_socket_t *this, bool block) +static bool read_and_queue(private_netlink_socket_t *this, bool block) { struct nlmsghdr *hdr; union { struct nlmsghdr hdr; char bytes[4096]; } buf; - size_t len; + ssize_t len; len = read_msg(this, buf.bytes, sizeof(buf.bytes), block); + if (len == -1) + { + return TRUE; + } if (len) { hdr = &buf.hdr; @@ -199,6 +228,7 @@ static void read_and_queue(private_netlink_socket_t *this, bool block) hdr = NLMSG_NEXT(hdr, len); } } + return FALSE; } CALLBACK(watch, bool, @@ -215,14 +245,12 @@ CALLBACK(watch, bool, * Send a netlink request, try once */ static status_t send_once(private_netlink_socket_t *this, struct nlmsghdr *in, - struct nlmsghdr **out, size_t *out_len) + uintptr_t seq, struct nlmsghdr **out, size_t *out_len) { struct nlmsghdr *hdr; chunk_t result = {}; entry_t *entry; - uintptr_t seq; - seq = ref_get(&this->seq); in->nlmsg_seq = seq; in->nlmsg_pid = getpid(); @@ -249,18 +277,38 @@ static status_t send_once(private_netlink_socket_t *this, struct nlmsghdr *in, { if (lib->watcher->get_state(lib->watcher) == WATCHER_RUNNING) { - entry->condvar->wait(entry->condvar, this->mutex); + if (this->timeout) + { + if (entry->condvar->timed_wait(entry->condvar, this->mutex, + this->timeout)) + { + break; + } + } + else + { + entry->condvar->wait(entry->condvar, this->mutex); + } } else { /* During (de-)initialization, no watcher thread is active. * collect responses ourselves. */ - read_and_queue(this, TRUE); + if (read_and_queue(this, TRUE)) + { + break; + } } } this->entries->remove(this->entries, (void*)seq); this->mutex->unlock(this->mutex); + if (!entry->complete) + { /* timeout */ + destroy_entry(entry); + return OUT_OF_RES; + } + while (array_remove(entry->hdrs, ARRAY_HEAD, &hdr)) { if (this->names) @@ -283,16 +331,31 @@ METHOD(netlink_socket_t, netlink_send, status_t, private_netlink_socket_t *this, struct nlmsghdr *in, struct nlmsghdr **out, size_t *out_len) { - while (TRUE) + uintptr_t seq; + u_int try; + + seq = ref_get(&this->seq); + + for (try = 0; try <= this->retries; ++try) { struct nlmsghdr *hdr; status_t status; size_t len; - status = send_once(this, in, &hdr, &len); - if (status != SUCCESS) + if (try > 0) { - return status; + DBG1(DBG_KNL, "retransmitting Netlink request (%u/%u)", + try, this->retries); + } + status = send_once(this, in, seq, &hdr, &len); + switch (status) + { + case SUCCESS: + break; + case OUT_OF_RES: + continue; + default: + return status; } if (hdr->nlmsg_type == NLMSG_ERROR) { @@ -302,6 +365,7 @@ METHOD(netlink_socket_t, netlink_send, status_t, if (err->error == -EBUSY) { free(hdr); + try--; continue; } } @@ -309,6 +373,9 @@ METHOD(netlink_socket_t, netlink_send, status_t, *out_len = len; return SUCCESS; } + DBG1(DBG_KNL, "Netlink request timed out after %u retransmits", + this->retries); + return OUT_OF_RES; } METHOD(netlink_socket_t, netlink_send_ack, status_t, @@ -397,6 +464,10 @@ netlink_socket_t *netlink_socket_create(int protocol, enum_name_t *names) .socket = socket(AF_NETLINK, SOCK_RAW, protocol), .entries = hashtable_create(hashtable_hash_ptr, hashtable_equals_ptr, 4), .names = names, + .timeout = lib->settings->get_int(lib->settings, + "%s.plugins.kernel-netlink.timeout", 0, lib->ns), + .retries = lib->settings->get_int(lib->settings, + "%s.plugins.kernel-netlink.retries", 0, lib->ns), ); if (this->socket == -1) -- cgit v1.2.3 From 553be051b73755d3171dc38552e2beb2669793b0 Mon Sep 17 00:00:00 2001 From: Martin Willi Date: Tue, 15 Jul 2014 15:11:14 +0200 Subject: kernel-netlink: Add a compile-time hook to simulate request message loss --- .../plugins/kernel_netlink/kernel_netlink_shared.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'src') diff --git a/src/libhydra/plugins/kernel_netlink/kernel_netlink_shared.c b/src/libhydra/plugins/kernel_netlink/kernel_netlink_shared.c index 8f49e03d1..2875436c6 100644 --- a/src/libhydra/plugins/kernel_netlink/kernel_netlink_shared.c +++ b/src/libhydra/plugins/kernel_netlink/kernel_netlink_shared.c @@ -77,6 +77,16 @@ struct private_netlink_socket_t { u_int retries; }; +/** + * #definable hook to simulate request message loss + */ +#ifdef NETLINK_MSG_LOSS_HOOK +bool NETLINK_MSG_LOSS_HOOK(struct nlmsghdr *msg); +#define msg_loss_hook(msg) NETLINK_MSG_LOSS_HOOK(msg) +#else +#define msg_loss_hook(msg) FALSE +#endif + /** * Request entry the answer for a waiting thread is collected in */ @@ -109,6 +119,11 @@ static bool write_msg(private_netlink_socket_t *this, struct nlmsghdr *msg) }; int len; + if (msg_loss_hook(msg)) + { + return TRUE; + } + while (TRUE) { len = sendto(this->socket, msg, msg->nlmsg_len, 0, -- cgit v1.2.3 From adb930ed4f21c8deb39849f8165e14e003906844 Mon Sep 17 00:00:00 2001 From: Martin Willi Date: Tue, 15 Jul 2014 15:47:03 +0200 Subject: kernel-netlink: Add test cases for successful and timing out retransmissions --- src/libhydra/plugins/kernel_netlink/Makefile.am | 1 + .../plugins/kernel_netlink/suites/test_socket.c | 89 ++++++++++++++++++++++ 2 files changed, 90 insertions(+) (limited to 'src') diff --git a/src/libhydra/plugins/kernel_netlink/Makefile.am b/src/libhydra/plugins/kernel_netlink/Makefile.am index aeffaad92..cc8855406 100644 --- a/src/libhydra/plugins/kernel_netlink/Makefile.am +++ b/src/libhydra/plugins/kernel_netlink/Makefile.am @@ -35,6 +35,7 @@ tests_SOURCES = \ tests_CFLAGS = \ -I$(top_srcdir)/src/libstrongswan \ -I$(top_srcdir)/src/libstrongswan/tests \ + -DNETLINK_MSG_LOSS_HOOK=netlink_msg_loss \ @COVERAGE_CFLAGS@ tests_LDFLAGS = @COVERAGE_LDFLAGS@ diff --git a/src/libhydra/plugins/kernel_netlink/suites/test_socket.c b/src/libhydra/plugins/kernel_netlink/suites/test_socket.c index 415a93649..ceea6535d 100644 --- a/src/libhydra/plugins/kernel_netlink/suites/test_socket.c +++ b/src/libhydra/plugins/kernel_netlink/suites/test_socket.c @@ -19,6 +19,25 @@ #include "../kernel_netlink_shared.h" +/** + * Netlink message drop configuration + */ +static int drop_interval = 0; + +/** + * Netlink message drop hook + */ +bool netlink_msg_loss(struct nlmsghdr *hdr) +{ + static refcount_t i; + + if (drop_interval) + { + return ref_get(&i) % drop_interval == drop_interval - 1; + } + return FALSE; +} + START_TEST(test_echo) { netlink_socket_t *s; @@ -110,6 +129,71 @@ START_TEST(test_stress) } END_TEST +START_TEST(test_retransmit_success) +{ + netlink_socket_t *s; + struct nlmsghdr *out; + struct rtgenmsg *msg; + size_t len; + netlink_buf_t request = { + .hdr = { + .nlmsg_len = NLMSG_LENGTH(sizeof(struct rtgenmsg)), + .nlmsg_flags = NLM_F_REQUEST | NLM_F_MATCH | NLM_F_ROOT, + .nlmsg_type = RTM_GETLINK, + }, + }; + + drop_interval = 2; + + lib->settings->set_int(lib->settings, + "%s.plugins.kernel-netlink.timeout", 100, lib->ns); + lib->settings->set_int(lib->settings, + "%s.plugins.kernel-netlink.retries", 1, lib->ns); + + s = netlink_socket_create(NETLINK_ROUTE, NULL); + msg = NLMSG_DATA(&request.hdr); + msg->rtgen_family = AF_UNSPEC; + + ck_assert(s->send(s, &request.hdr, &out, &len) == SUCCESS); + free(out); + s->destroy(s); + + drop_interval = 0; +} +END_TEST + +START_TEST(test_retransmit_fail) +{ + netlink_socket_t *s; + struct nlmsghdr *out; + struct rtgenmsg *msg; + size_t len; + netlink_buf_t request = { + .hdr = { + .nlmsg_len = NLMSG_LENGTH(sizeof(struct rtgenmsg)), + .nlmsg_flags = NLM_F_REQUEST | NLM_F_MATCH | NLM_F_ROOT, + .nlmsg_type = RTM_GETLINK, + }, + }; + + drop_interval = 1; + + lib->settings->set_int(lib->settings, + "%s.plugins.kernel-netlink.timeout", 50, lib->ns); + lib->settings->set_int(lib->settings, + "%s.plugins.kernel-netlink.retries", 3, lib->ns); + + s = netlink_socket_create(NETLINK_ROUTE, NULL); + msg = NLMSG_DATA(&request.hdr); + msg->rtgen_family = AF_UNSPEC; + + ck_assert(s->send(s, &request.hdr, &out, &len) == OUT_OF_RES); + s->destroy(s); + + drop_interval = 0; +} +END_TEST + Suite *socket_suite_create() { Suite *s; @@ -125,5 +209,10 @@ Suite *socket_suite_create() tcase_add_test(tc, test_stress); suite_add_tcase(s, tc); + tc = tcase_create("retransmit"); + tcase_add_test(tc, test_retransmit_success); + tcase_add_test(tc, test_retransmit_fail); + suite_add_tcase(s, tc); + return s; } -- cgit v1.2.3 From 4b41ea956cb9ee2bdabf969ffd0937e946fdd5e5 Mon Sep 17 00:00:00 2001 From: Martin Willi Date: Tue, 15 Jul 2014 15:58:11 +0200 Subject: kernel-netlink: Add non-dumping variants of echo and stress tests --- .../plugins/kernel_netlink/suites/test_socket.c | 84 ++++++++++++++++++++++ 1 file changed, 84 insertions(+) (limited to 'src') diff --git a/src/libhydra/plugins/kernel_netlink/suites/test_socket.c b/src/libhydra/plugins/kernel_netlink/suites/test_socket.c index ceea6535d..c66aa2c02 100644 --- a/src/libhydra/plugins/kernel_netlink/suites/test_socket.c +++ b/src/libhydra/plugins/kernel_netlink/suites/test_socket.c @@ -39,6 +39,37 @@ bool netlink_msg_loss(struct nlmsghdr *hdr) } START_TEST(test_echo) +{ + netlink_socket_t *s; + struct nlmsghdr *out; + struct rtmsg *msg; + char dst[] = { + 127,0,0,1 + }; + size_t len; + netlink_buf_t request = { + .hdr = { + .nlmsg_len = NLMSG_LENGTH(sizeof(struct rtmsg)), + .nlmsg_flags = NLM_F_REQUEST, + .nlmsg_type = RTM_GETROUTE, + }, + }; + + msg = NLMSG_DATA(&request.hdr); + msg->rtm_family = AF_INET; + netlink_add_attribute(&request.hdr, RTA_DST, + chunk_from_thing(dst), sizeof(request)); + + s = netlink_socket_create(NETLINK_ROUTE, NULL); + + ck_assert(s->send(s, &request.hdr, &out, &len) == SUCCESS); + ck_assert_int_eq(out->nlmsg_type, RTM_NEWROUTE); + free(out); + s->destroy(s); +} +END_TEST + +START_TEST(test_echo_dump) { netlink_socket_t *s; struct nlmsghdr *out, *current; @@ -75,6 +106,38 @@ END_TEST CALLBACK(stress, void*, netlink_socket_t *s) +{ + struct nlmsghdr *out; + struct rtmsg *msg; + char dst[] = { + 127,0,0,1 + }; + size_t len; + int i; + netlink_buf_t request = { + .hdr = { + .nlmsg_len = NLMSG_LENGTH(sizeof(struct rtmsg)), + .nlmsg_flags = NLM_F_REQUEST, + .nlmsg_type = RTM_GETROUTE, + }, + }; + + for (i = 0; i < 10; i++) + { + msg = NLMSG_DATA(&request.hdr); + msg->rtm_family = AF_INET; + netlink_add_attribute(&request.hdr, RTA_DST, + chunk_from_thing(dst), sizeof(request)); + + ck_assert(s->send(s, &request.hdr, &out, &len) == SUCCESS); + ck_assert_int_eq(out->nlmsg_type, RTM_NEWROUTE); + free(out); + } + return NULL; +} + +CALLBACK(stress_dump, void*, + netlink_socket_t *s) { struct nlmsghdr *out, *current; struct rtgenmsg *msg; @@ -129,6 +192,25 @@ START_TEST(test_stress) } END_TEST +START_TEST(test_stress_dump) +{ + thread_t *threads[10]; + netlink_socket_t *s; + int i; + + s = netlink_socket_create(NETLINK_ROUTE, NULL); + for (i = 0; i < countof(threads); i++) + { + threads[i] = thread_create(stress_dump, s); + } + for (i = 0; i < countof(threads); i++) + { + threads[i]->join(threads[i]); + } + s->destroy(s); +} +END_TEST + START_TEST(test_retransmit_success) { netlink_socket_t *s; @@ -203,10 +285,12 @@ Suite *socket_suite_create() tc = tcase_create("echo"); tcase_add_test(tc, test_echo); + tcase_add_test(tc, test_echo_dump); suite_add_tcase(s, tc); tc = tcase_create("stress"); tcase_add_test(tc, test_stress); + tcase_add_test(tc, test_stress_dump); suite_add_tcase(s, tc); tc = tcase_create("retransmit"); -- cgit v1.2.3 From 9b43dddff4dee37a1b527482403d2e9d5ed6ece0 Mon Sep 17 00:00:00 2001 From: Martin Willi Date: Wed, 16 Jul 2014 11:59:59 +0200 Subject: kernel-netlink: Release lock while doing Netlink NEW/DELADDR operations Besides that it can improve throughput, it avoids a deadlock situation. If all threads are busy, watcher will invoke the FD notification for NEWADDR events itself. If the lock is held, it gets locked up. As watcher is not dispatching anymore, it can't signal Netlink socket send() completion, and the send() operation does not return and keeps the lock. --- .../plugins/kernel_netlink/kernel_netlink_net.c | 25 +++++++++++++++------- 1 file changed, 17 insertions(+), 8 deletions(-) (limited to 'src') diff --git a/src/libhydra/plugins/kernel_netlink/kernel_netlink_net.c b/src/libhydra/plugins/kernel_netlink/kernel_netlink_net.c index 9d9f15974..3c1a3f87d 100644 --- a/src/libhydra/plugins/kernel_netlink/kernel_netlink_net.c +++ b/src/libhydra/plugins/kernel_netlink/kernel_netlink_net.c @@ -1975,6 +1975,8 @@ METHOD(kernel_net_t, add_ip, status_t, if (iface) { addr_entry_t *addr; + char *ifname; + int ifi; INIT(addr, .ip = virtual_ip->clone(virtual_ip), @@ -1983,26 +1985,30 @@ METHOD(kernel_net_t, add_ip, status_t, ); iface->addrs->insert_last(iface->addrs, addr); addr_map_entry_add(this->vips, addr, iface); + ifi = iface->ifindex; + this->lock->unlock(this->lock); if (manage_ipaddr(this, RTM_NEWADDR, NLM_F_CREATE | NLM_F_EXCL, - iface->ifindex, virtual_ip, prefix) == SUCCESS) + ifi, virtual_ip, prefix) == SUCCESS) { + this->lock->write_lock(this->lock); while (!is_vip_installed_or_gone(this, virtual_ip, &entry)) { /* wait until address appears */ this->condvar->wait(this->condvar, this->lock); } if (entry) { /* we fail if the interface got deleted in the meantime */ - DBG2(DBG_KNL, "virtual IP %H installed on %s", virtual_ip, - entry->iface->ifname); + ifname = strdup(entry->iface->ifname); this->lock->unlock(this->lock); + DBG2(DBG_KNL, "virtual IP %H installed on %s", + virtual_ip, ifname); /* during IKEv1 reauthentication, children get moved from * old the new SA before the virtual IP is available. This * kills the route for our virtual IP, reinstall. */ - queue_route_reinstall(this, strdup(entry->iface->ifname)); + queue_route_reinstall(this, ifname); return SUCCESS; } + this->lock->unlock(this->lock); } - this->lock->unlock(this->lock); DBG1(DBG_KNL, "adding virtual IP %H failed", virtual_ip); return FAILED; } @@ -2048,20 +2054,23 @@ METHOD(kernel_net_t, del_ip, status_t, if (entry->addr->refcount == 1) { status_t status; + int ifi; /* we set this flag so that threads calling add_ip will block and wait * until the entry is gone, also so we can wait below */ entry->addr->installed = FALSE; - status = manage_ipaddr(this, RTM_DELADDR, 0, entry->iface->ifindex, - virtual_ip, prefix); + ifi = entry->iface->ifindex; + this->lock->unlock(this->lock); + status = manage_ipaddr(this, RTM_DELADDR, 0, ifi, virtual_ip, prefix); if (status == SUCCESS && wait) { /* wait until the address is really gone */ + this->lock->write_lock(this->lock); while (is_known_vip(this, virtual_ip)) { this->condvar->wait(this->condvar, this->lock); } + this->lock->unlock(this->lock); } - this->lock->unlock(this->lock); return status; } else -- cgit v1.2.3 From 6c58fabe29dd88384c8475293aec03fd946f969e Mon Sep 17 00:00:00 2001 From: Martin Willi Date: Wed, 16 Jul 2014 12:38:30 +0200 Subject: kernel-netlink: Add options to enable parallel Netlink queries explicitly As under vanilla Linux the kernel can't handle parallel dump queries and returns EBUSY, it makes not much sense to use them. Disable parallel queries by default to basically restore original behavior, improving performance. --- .../plugins/kernel_netlink/kernel_netlink_ipsec.c | 4 +++- .../plugins/kernel_netlink/kernel_netlink_net.c | 4 +++- .../plugins/kernel_netlink/kernel_netlink_shared.c | 23 ++++++++++++++++----- .../plugins/kernel_netlink/kernel_netlink_shared.h | 4 +++- .../plugins/kernel_netlink/suites/test_socket.c | 24 +++++++++++----------- 5 files changed, 39 insertions(+), 20 deletions(-) (limited to 'src') diff --git a/src/libhydra/plugins/kernel_netlink/kernel_netlink_ipsec.c b/src/libhydra/plugins/kernel_netlink/kernel_netlink_ipsec.c index dfd71f3bd..80c8e2433 100644 --- a/src/libhydra/plugins/kernel_netlink/kernel_netlink_ipsec.c +++ b/src/libhydra/plugins/kernel_netlink/kernel_netlink_ipsec.c @@ -2711,7 +2711,9 @@ kernel_netlink_ipsec_t *kernel_netlink_ipsec_create() fclose(f); } - this->socket_xfrm = netlink_socket_create(NETLINK_XFRM, xfrm_msg_names); + this->socket_xfrm = netlink_socket_create(NETLINK_XFRM, xfrm_msg_names, + lib->settings->get_bool(lib->settings, + "%s.plugins.kernel-netlink.parallel_xfrm", FALSE, lib->ns)); if (!this->socket_xfrm) { destroy(this); diff --git a/src/libhydra/plugins/kernel_netlink/kernel_netlink_net.c b/src/libhydra/plugins/kernel_netlink/kernel_netlink_net.c index 3c1a3f87d..b8cd3977d 100644 --- a/src/libhydra/plugins/kernel_netlink/kernel_netlink_net.c +++ b/src/libhydra/plugins/kernel_netlink/kernel_netlink_net.c @@ -2499,7 +2499,9 @@ kernel_netlink_net_t *kernel_netlink_net_create() .destroy = _destroy, }, }, - .socket = netlink_socket_create(NETLINK_ROUTE, rt_msg_names), + .socket = netlink_socket_create(NETLINK_ROUTE, rt_msg_names, + lib->settings->get_bool(lib->settings, + "%s.plugins.kernel-netlink.parallel_route", FALSE, lib->ns)), .rt_exclude = linked_list_create(), .routes = hashtable_create((hashtable_hash_t)route_entry_hash, (hashtable_equals_t)route_entry_equals, 16), diff --git a/src/libhydra/plugins/kernel_netlink/kernel_netlink_shared.c b/src/libhydra/plugins/kernel_netlink/kernel_netlink_shared.c index 2875436c6..ba3b17e23 100644 --- a/src/libhydra/plugins/kernel_netlink/kernel_netlink_shared.c +++ b/src/libhydra/plugins/kernel_netlink/kernel_netlink_shared.c @@ -75,6 +75,11 @@ struct private_netlink_socket_t { * Number of times to repeat timed out queries */ u_int retries; + + /** + * Use parallel netlink queries + */ + bool parallel; }; /** @@ -290,7 +295,8 @@ static status_t send_once(private_netlink_socket_t *this, struct nlmsghdr *in, while (!entry->complete) { - if (lib->watcher->get_state(lib->watcher) == WATCHER_RUNNING) + if (this->parallel && + lib->watcher->get_state(lib->watcher) == WATCHER_RUNNING) { if (this->timeout) { @@ -450,7 +456,10 @@ METHOD(netlink_socket_t, destroy, void, { if (this->socket != -1) { - lib->watcher->remove(lib->watcher, this->socket); + if (this->parallel) + { + lib->watcher->remove(lib->watcher, this->socket); + } close(this->socket); } this->entries->destroy(this->entries); @@ -461,7 +470,8 @@ METHOD(netlink_socket_t, destroy, void, /** * Described in header. */ -netlink_socket_t *netlink_socket_create(int protocol, enum_name_t *names) +netlink_socket_t *netlink_socket_create(int protocol, enum_name_t *names, + bool parallel) { private_netlink_socket_t *this; struct sockaddr_nl addr = { @@ -483,6 +493,7 @@ netlink_socket_t *netlink_socket_create(int protocol, enum_name_t *names) "%s.plugins.kernel-netlink.timeout", 0, lib->ns), .retries = lib->settings->get_int(lib->settings, "%s.plugins.kernel-netlink.retries", 0, lib->ns), + .parallel = parallel, ); if (this->socket == -1) @@ -497,8 +508,10 @@ netlink_socket_t *netlink_socket_create(int protocol, enum_name_t *names) destroy(this); return NULL; } - - lib->watcher->add(lib->watcher, this->socket, WATCHER_READ, watch, this); + if (this->parallel) + { + lib->watcher->add(lib->watcher, this->socket, WATCHER_READ, watch, this); + } return &this->public; } diff --git a/src/libhydra/plugins/kernel_netlink/kernel_netlink_shared.h b/src/libhydra/plugins/kernel_netlink/kernel_netlink_shared.h index 069f746d1..66682907d 100644 --- a/src/libhydra/plugins/kernel_netlink/kernel_netlink_shared.h +++ b/src/libhydra/plugins/kernel_netlink/kernel_netlink_shared.h @@ -66,8 +66,10 @@ struct netlink_socket_t { * * @param protocol protocol type (e.g. NETLINK_XFRM or NETLINK_ROUTE) * @param names optional enum names for Netlink messages + * @param parallel support parallel queries on this Netlink socket */ -netlink_socket_t *netlink_socket_create(int protocol, enum_name_t *names); +netlink_socket_t *netlink_socket_create(int protocol, enum_name_t *names, + bool parallel); /** * Creates an rtattr and adds it to the given netlink message. diff --git a/src/libhydra/plugins/kernel_netlink/suites/test_socket.c b/src/libhydra/plugins/kernel_netlink/suites/test_socket.c index c66aa2c02..3e8facd0a 100644 --- a/src/libhydra/plugins/kernel_netlink/suites/test_socket.c +++ b/src/libhydra/plugins/kernel_netlink/suites/test_socket.c @@ -60,7 +60,7 @@ START_TEST(test_echo) netlink_add_attribute(&request.hdr, RTA_DST, chunk_from_thing(dst), sizeof(request)); - s = netlink_socket_create(NETLINK_ROUTE, NULL); + s = netlink_socket_create(NETLINK_ROUTE, NULL, _i != 0); ck_assert(s->send(s, &request.hdr, &out, &len) == SUCCESS); ck_assert_int_eq(out->nlmsg_type, RTM_NEWROUTE); @@ -83,7 +83,7 @@ START_TEST(test_echo_dump) }, }; - s = netlink_socket_create(NETLINK_ROUTE, NULL); + s = netlink_socket_create(NETLINK_ROUTE, NULL, _i != 0); msg = NLMSG_DATA(&request.hdr); msg->rtgen_family = AF_UNSPEC; @@ -179,7 +179,7 @@ START_TEST(test_stress) netlink_socket_t *s; int i; - s = netlink_socket_create(NETLINK_ROUTE, NULL); + s = netlink_socket_create(NETLINK_ROUTE, NULL, _i != 0); for (i = 0; i < countof(threads); i++) { threads[i] = thread_create(stress, s); @@ -198,7 +198,7 @@ START_TEST(test_stress_dump) netlink_socket_t *s; int i; - s = netlink_socket_create(NETLINK_ROUTE, NULL); + s = netlink_socket_create(NETLINK_ROUTE, NULL, _i != 0); for (i = 0; i < countof(threads); i++) { threads[i] = thread_create(stress_dump, s); @@ -232,7 +232,7 @@ START_TEST(test_retransmit_success) lib->settings->set_int(lib->settings, "%s.plugins.kernel-netlink.retries", 1, lib->ns); - s = netlink_socket_create(NETLINK_ROUTE, NULL); + s = netlink_socket_create(NETLINK_ROUTE, NULL, _i != 0); msg = NLMSG_DATA(&request.hdr); msg->rtgen_family = AF_UNSPEC; @@ -265,7 +265,7 @@ START_TEST(test_retransmit_fail) lib->settings->set_int(lib->settings, "%s.plugins.kernel-netlink.retries", 3, lib->ns); - s = netlink_socket_create(NETLINK_ROUTE, NULL); + s = netlink_socket_create(NETLINK_ROUTE, NULL, _i != 0); msg = NLMSG_DATA(&request.hdr); msg->rtgen_family = AF_UNSPEC; @@ -284,18 +284,18 @@ Suite *socket_suite_create() s = suite_create("netlink socket"); tc = tcase_create("echo"); - tcase_add_test(tc, test_echo); - tcase_add_test(tc, test_echo_dump); + tcase_add_loop_test(tc, test_echo, 0, 2); + tcase_add_loop_test(tc, test_echo_dump, 0, 2); suite_add_tcase(s, tc); tc = tcase_create("stress"); - tcase_add_test(tc, test_stress); - tcase_add_test(tc, test_stress_dump); + tcase_add_loop_test(tc, test_stress, 0, 2); + tcase_add_loop_test(tc, test_stress_dump, 0, 2); suite_add_tcase(s, tc); tc = tcase_create("retransmit"); - tcase_add_test(tc, test_retransmit_success); - tcase_add_test(tc, test_retransmit_fail); + tcase_add_loop_test(tc, test_retransmit_success, 0, 2); + tcase_add_loop_test(tc, test_retransmit_fail, 0, 2); suite_add_tcase(s, tc); return s; -- cgit v1.2.3 From 87888f99265b8617fd430f2adc6c6c5e59a47979 Mon Sep 17 00:00:00 2001 From: Martin Willi Date: Wed, 16 Jul 2014 16:31:52 +0200 Subject: kernel-netlink: Alternatively support global port based IKE bypass policies The socket based IKE bypass policies are usually superior, but not supported on all networking stacks. The port based variant uses global policies for the UDP ports we have IKE sockets for. --- .../plugins/kernel_netlink/kernel_netlink_ipsec.c | 163 ++++++++++++++++++++- 1 file changed, 160 insertions(+), 3 deletions(-) (limited to 'src') diff --git a/src/libhydra/plugins/kernel_netlink/kernel_netlink_ipsec.c b/src/libhydra/plugins/kernel_netlink/kernel_netlink_ipsec.c index 80c8e2433..f0ada41d5 100644 --- a/src/libhydra/plugins/kernel_netlink/kernel_netlink_ipsec.c +++ b/src/libhydra/plugins/kernel_netlink/kernel_netlink_ipsec.c @@ -38,6 +38,7 @@ #include #include #include +#include #include #include @@ -319,6 +320,11 @@ struct private_kernel_netlink_ipsec_t { * Whether to track the history of a policy */ bool policy_history; + + /** + * Installed port based IKE bypass policies, as bypass_t + */ + array_t *bypass; }; typedef struct route_entry_t route_entry_t; @@ -2576,9 +2582,11 @@ METHOD(kernel_ipsec_t, flush_policies, status_t, return SUCCESS; } - -METHOD(kernel_ipsec_t, bypass_socket, bool, - private_kernel_netlink_ipsec_t *this, int fd, int family) +/** + * Bypass socket using a per-socket policy + */ +static bool add_socket_bypass(private_kernel_netlink_ipsec_t *this, + int fd, int family) { struct xfrm_userpolicy_info policy; u_int sol, ipsec_policy; @@ -2618,6 +2626,152 @@ METHOD(kernel_ipsec_t, bypass_socket, bool, return TRUE; } +/** + * Port based IKE bypass policy + */ +typedef struct { + /** address family */ + int family; + /** layer 4 protocol */ + int proto; + /** port number, network order */ + u_int16_t port; +} bypass_t; + +/** + * Add or remove a bypass policy from/to kernel + */ +static bool manage_bypass(private_kernel_netlink_ipsec_t *this, + int type, policy_dir_t dir, bypass_t *bypass) +{ + netlink_buf_t request; + struct xfrm_selector *sel; + struct nlmsghdr *hdr; + + memset(&request, 0, sizeof(request)); + hdr = &request.hdr; + hdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK; + hdr->nlmsg_type = type; + + if (type == XFRM_MSG_NEWPOLICY) + { + struct xfrm_userpolicy_info *policy; + + hdr->nlmsg_len = NLMSG_LENGTH(sizeof(struct xfrm_userpolicy_info)); + + policy = NLMSG_DATA(hdr); + policy->dir = dir; + policy->priority = 32; + policy->action = XFRM_POLICY_ALLOW; + policy->share = XFRM_SHARE_ANY; + + policy->lft.soft_byte_limit = XFRM_INF; + policy->lft.soft_packet_limit = XFRM_INF; + policy->lft.hard_byte_limit = XFRM_INF; + policy->lft.hard_packet_limit = XFRM_INF; + + sel = &policy->sel; + } + else /* XFRM_MSG_DELPOLICY */ + { + struct xfrm_userpolicy_id *policy; + + hdr->nlmsg_len = NLMSG_LENGTH(sizeof(struct xfrm_userpolicy_id)); + + policy = NLMSG_DATA(hdr); + policy->dir = dir; + + sel = &policy->sel; + } + + sel->family = bypass->family; + sel->proto = bypass->proto; + if (dir == POLICY_IN) + { + sel->dport = bypass->port; + sel->dport_mask = 0xffff; + } + else + { + sel->sport = bypass->port; + sel->sport_mask = 0xffff; + } + return this->socket_xfrm->send_ack(this->socket_xfrm, hdr) == SUCCESS; +} + +/** + * Bypass socket using a port-based bypass policy + */ +static bool add_port_bypass(private_kernel_netlink_ipsec_t *this, + int fd, int family) +{ + union { + struct sockaddr sa; + struct sockaddr_in in; + struct sockaddr_in6 in6; + } saddr; + socklen_t len; + bypass_t bypass = { + .family = family, + }; + + len = sizeof(saddr); + if (getsockname(fd, &saddr.sa, &len) != 0) + { + return FALSE; + } + len = sizeof(bypass.proto); + if (getsockopt(fd, SOL_SOCKET, SO_PROTOCOL, &bypass.proto, &len) != 0) + { + return FALSE; + } + switch (family) + { + case AF_INET: + bypass.port = saddr.in.sin_port; + break; + case AF_INET6: + bypass.port = saddr.in6.sin6_port; + break; + default: + return FALSE; + } + + if (!manage_bypass(this, XFRM_MSG_NEWPOLICY, POLICY_IN, &bypass)) + { + return FALSE; + } + if (!manage_bypass(this, XFRM_MSG_NEWPOLICY, POLICY_OUT, &bypass)) + { + manage_bypass(this, XFRM_MSG_DELPOLICY, POLICY_IN, &bypass); + return FALSE; + } + array_insert(this->bypass, ARRAY_TAIL, &bypass); + + return TRUE; +} + +/** + * Remove installed port based bypass policy + */ +static void remove_port_bypass(bypass_t *bypass, int idx, + private_kernel_netlink_ipsec_t *this) +{ + manage_bypass(this, XFRM_MSG_DELPOLICY, POLICY_OUT, bypass); + manage_bypass(this, XFRM_MSG_DELPOLICY, POLICY_IN, bypass); +} + +METHOD(kernel_ipsec_t, bypass_socket, bool, + private_kernel_netlink_ipsec_t *this, int fd, int family) +{ + if (lib->settings->get_bool(lib->settings, + "%s.plugins.kernel-netlink.port_bypass", FALSE, lib->ns)) + { + return add_port_bypass(this, fd, family); + } + return add_socket_bypass(this, fd, family); +} + METHOD(kernel_ipsec_t, enable_udp_decap, bool, private_kernel_netlink_ipsec_t *this, int fd, int family, u_int16_t port) { @@ -2637,6 +2791,8 @@ METHOD(kernel_ipsec_t, destroy, void, enumerator_t *enumerator; policy_entry_t *policy; + array_destroy_function(this->bypass, + (array_callback_t)remove_port_bypass, this); if (this->socket_xfrm_events > 0) { lib->watcher->remove(lib->watcher, this->socket_xfrm_events); @@ -2688,6 +2844,7 @@ kernel_netlink_ipsec_t *kernel_netlink_ipsec_create() (hashtable_equals_t)policy_equals, 32), .sas = hashtable_create((hashtable_hash_t)ipsec_sa_hash, (hashtable_equals_t)ipsec_sa_equals, 32), + .bypass = array_create(sizeof(bypass_t), 0), .mutex = mutex_create(MUTEX_TYPE_DEFAULT), .policy_history = TRUE, .install_routes = lib->settings->get_bool(lib->settings, -- cgit v1.2.3 From 3065081c3e9d1505e37a6b4aef8d9f4955bce61a Mon Sep 17 00:00:00 2001 From: Martin Willi Date: Mon, 29 Sep 2014 17:11:53 +0200 Subject: kernel-netlink: Fallback to UDP if detecting socket protocol fails getsockopt(SO_PROTOCOL) is not supported before 2.6.32. Default to UDP if either the SO_PROTOCOL define is missing or the syscall fails. --- src/libhydra/plugins/kernel_netlink/kernel_netlink_ipsec.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/libhydra/plugins/kernel_netlink/kernel_netlink_ipsec.c b/src/libhydra/plugins/kernel_netlink/kernel_netlink_ipsec.c index f0ada41d5..0f88b649e 100644 --- a/src/libhydra/plugins/kernel_netlink/kernel_netlink_ipsec.c +++ b/src/libhydra/plugins/kernel_netlink/kernel_netlink_ipsec.c @@ -2720,10 +2720,12 @@ static bool add_port_bypass(private_kernel_netlink_ipsec_t *this, { return FALSE; } +#ifdef SO_PROTOCOL /* since 2.6.32 */ len = sizeof(bypass.proto); if (getsockopt(fd, SOL_SOCKET, SO_PROTOCOL, &bypass.proto, &len) != 0) - { - return FALSE; +#endif + { /* assume UDP if SO_PROTOCOL not supported */ + bypass.proto = IPPROTO_UDP; } switch (family) { -- cgit v1.2.3 From 8925abbec49a6c603ce3417569d7c2392633739f Mon Sep 17 00:00:00 2001 From: Martin Willi Date: Tue, 29 Jul 2014 16:38:45 +0200 Subject: kernel-netlink: Add an option to enforce using XFRM_MSG_UPDPOLICY --- src/libhydra/plugins/kernel_netlink/kernel_netlink_ipsec.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'src') diff --git a/src/libhydra/plugins/kernel_netlink/kernel_netlink_ipsec.c b/src/libhydra/plugins/kernel_netlink/kernel_netlink_ipsec.c index 0f88b649e..977d6299a 100644 --- a/src/libhydra/plugins/kernel_netlink/kernel_netlink_ipsec.c +++ b/src/libhydra/plugins/kernel_netlink/kernel_netlink_ipsec.c @@ -321,6 +321,11 @@ struct private_kernel_netlink_ipsec_t { */ bool policy_history; + /** + * Wheter to always use UPDATE to install policies + */ + bool policy_update; + /** * Installed port based IKE bypass policies, as bypass_t */ @@ -2319,6 +2324,11 @@ METHOD(kernel_ipsec_t, add_policy, status_t, return SUCCESS; } + if (this->policy_update) + { + found = TRUE; + } + DBG2(DBG_KNL, "%s policy %R === %R %N (mark %u/0x%08x)", found ? "updating" : "adding", src_ts, dst_ts, policy_dir_names, direction, mark.value, mark.mask); @@ -2849,6 +2859,8 @@ kernel_netlink_ipsec_t *kernel_netlink_ipsec_create() .bypass = array_create(sizeof(bypass_t), 0), .mutex = mutex_create(MUTEX_TYPE_DEFAULT), .policy_history = TRUE, + .policy_update = lib->settings->get_bool(lib->settings, + "%s.plugins.kernel-netlink.policy_update", FALSE, lib->ns), .install_routes = lib->settings->get_bool(lib->settings, "%s.install_routes", TRUE, lib->ns), .proto_port_transport = lib->settings->get_bool(lib->settings, -- cgit v1.2.3 From 50bb81425e93ee04f42b5db542447f7665c4aa6a Mon Sep 17 00:00:00 2001 From: Martin Willi Date: Thu, 14 Aug 2014 11:33:12 +0200 Subject: kernel-netlink: Optionally ignore errors resulting from response message loss As some backends over unreliable transport do not cache response messages, retransmissions due the loss of responses perform the operation again. Add an option to ignore some errors arising from such duplicate operations. Note: This approach can't distinguish between real EXIST/NOTFOUND errors and packet failures, and therefore is a source of race conditions and can't detect any of these errors actually happening. Therefore that behavior is disabled by default, and can be enabled with the ignore_retransmit_errors strongswan.conf option. To properly distinguish between real and retransmission errors, a Netlink backend should implement retransmission detection using sequence numbers. --- .../plugins/kernel_netlink/kernel_netlink_shared.c | 82 ++++++++++++++++++++++ 1 file changed, 82 insertions(+) (limited to 'src') diff --git a/src/libhydra/plugins/kernel_netlink/kernel_netlink_shared.c b/src/libhydra/plugins/kernel_netlink/kernel_netlink_shared.c index ba3b17e23..a9adfe091 100644 --- a/src/libhydra/plugins/kernel_netlink/kernel_netlink_shared.c +++ b/src/libhydra/plugins/kernel_netlink/kernel_netlink_shared.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include @@ -61,6 +62,11 @@ struct private_netlink_socket_t { */ int socket; + /** + * Netlink protocol + */ + int protocol; + /** * Enum names for Netlink messages */ @@ -80,6 +86,11 @@ struct private_netlink_socket_t { * Use parallel netlink queries */ bool parallel; + + /** + * Ignore errors potentially resulting from a retransmission + */ + bool ignore_retransmit_errors; }; /** @@ -348,6 +359,69 @@ static status_t send_once(private_netlink_socket_t *this, struct nlmsghdr *in, return SUCCESS; } +/** + * Ignore errors for message types that might have completed previously + */ +static void ignore_retransmit_error(private_netlink_socket_t *this, + struct nlmsgerr *err, int type) +{ + switch (err->error) + { + case -EEXIST: + switch (this->protocol) + { + case NETLINK_XFRM: + switch (type) + { + case XFRM_MSG_NEWPOLICY: + case XFRM_MSG_NEWSA: + err->error = 0; + break; + } + break; + case NETLINK_ROUTE: + switch (type) + { + case RTM_NEWADDR: + case RTM_NEWLINK: + case RTM_NEWNEIGH: + case RTM_NEWROUTE: + case RTM_NEWRULE: + err->error = 0; + break; + } + break; + } + break; + case -ENOENT: + switch (this->protocol) + { + case NETLINK_XFRM: + switch (type) + { + case XFRM_MSG_DELPOLICY: + case XFRM_MSG_DELSA: + err->error = 0; + break; + } + break; + case NETLINK_ROUTE: + switch (type) + { + case RTM_DELADDR: + case RTM_DELLINK: + case RTM_DELNEIGH: + case RTM_DELROUTE: + case RTM_DELRULE: + err->error = 0; + break; + } + break; + } + break; + } +} + METHOD(netlink_socket_t, netlink_send, status_t, private_netlink_socket_t *this, struct nlmsghdr *in, struct nlmsghdr **out, size_t *out_len) @@ -389,6 +463,10 @@ METHOD(netlink_socket_t, netlink_send, status_t, try--; continue; } + if (this->ignore_retransmit_errors && try > 0) + { + ignore_retransmit_error(this, err, in->nlmsg_type); + } } *out = hdr; *out_len = len; @@ -488,11 +566,15 @@ netlink_socket_t *netlink_socket_create(int protocol, enum_name_t *names, .mutex = mutex_create(MUTEX_TYPE_RECURSIVE), .socket = socket(AF_NETLINK, SOCK_RAW, protocol), .entries = hashtable_create(hashtable_hash_ptr, hashtable_equals_ptr, 4), + .protocol = protocol, .names = names, .timeout = lib->settings->get_int(lib->settings, "%s.plugins.kernel-netlink.timeout", 0, lib->ns), .retries = lib->settings->get_int(lib->settings, "%s.plugins.kernel-netlink.retries", 0, lib->ns), + .ignore_retransmit_errors = lib->settings->get_bool(lib->settings, + "%s.plugins.kernel-netlink.ignore_retransmit_errors", + FALSE, lib->ns), .parallel = parallel, ); -- cgit v1.2.3