diff options
Diffstat (limited to 'bgpd/bgp_network.c')
-rw-r--r-- | bgpd/bgp_network.c | 1442 |
1 files changed, 1014 insertions, 428 deletions
diff --git a/bgpd/bgp_network.c b/bgpd/bgp_network.c index 570cc3b7..405fc62d 100644 --- a/bgpd/bgp_network.c +++ b/bgpd/bgp_network.c @@ -1,549 +1,1135 @@ /* BGP network related fucntions - Copyright (C) 1999 Kunihiro Ishiguro - -This file is part of GNU Zebra. - -GNU Zebra is free software; you can redistribute it and/or modify it -under the terms of the GNU General Public License as published by the -Free Software Foundation; either version 2, or (at your option) any -later version. - -GNU Zebra is distributed in the hope that it will be useful, but -WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -General Public License for more details. - -You should have received a copy of the GNU General Public License -along with GNU Zebra; see the file COPYING. If not, write to the Free -Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA -02111-1307, USA. */ + * Copyright (C) 1999 Kunihiro Ishiguro + * + * This file is part of GNU Zebra. + * + * GNU Zebra is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2, or (at your option) any + * later version. + * + * GNU Zebra is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNU Zebra; see the file COPYING. If not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 02111-1307, USA. + */ #include <zebra.h> +#include <stdbool.h> -#include "thread.h" #include "sockunion.h" #include "sockopt.h" #include "memory.h" #include "log.h" #include "if.h" #include "prefix.h" -#include "command.h" #include "privs.h" -#include "linklist.h" -#include "network.h" +#include "qpselect.h" -#include "bgpd/bgpd.h" -#include "bgpd/bgp_fsm.h" -#include "bgpd/bgp_attr.h" #include "bgpd/bgp_debug.h" #include "bgpd/bgp_network.h" +#include "bgpd/bgp_peer_index.h" + +#include "bgpd/bgp_session.h" +#include "bgpd/bgp_connection.h" +#include "bgpd/bgp_fsm.h" +#include "qpselect.h" extern struct zebra_privs_t bgpd_privs; +/*============================================================================== + * This is the socket connect/listen/accept/close stuff for the BGP Engine. + * + * NB: this code is for use in the BGP Engine *only*. + */ + +/* Forward references. */ +static void bgp_connect_action(qps_file qf, void* file_info) ; +static void bgp_accept_action(qps_file qf, void* file_info) ; +static int bgp_get_names(int sock_fd, union sockunion* su_local, + union sockunion* su_remote) ; +static int bgp_socket_set_common_options(int sock_fd, union sockunion* su, + bgp_connection connection) ; +static int bgp_set_ttl(int sock_fd, bgp_connection connnection, + int ttl, bool gtsm) ; +static int bgp_md5_set_listeners(union sockunion* su, const char* password) ; + +/*============================================================================== + * Open and close the listeners. + * + * When the BGP Engine is started it is passed the address and port to listen + * to. By default the address is NULL, which maps to INADDR_ANY and + * (if supported) IN6ADDR_ANY_INIT. + * + * When the BGP Engine is stopped the listening ports are closed. + * + * NB: once the listeners are opened they are active in the BGP Engine Nexus, + * and will be fielding attempts to connect. + * + * The BGP listeners are kept here. Keep lists of IPv4 and IPv6 listeners for + * the convenience of setting MD5 passwords. + */ + +typedef struct bgp_listener* bgp_listener ; + +static bgp_listener bgp_listeners_inet = NULL ; +#ifdef HAVE_IPV6 +static bgp_listener bgp_listeners_inet6 = NULL ; +#endif + +#if defined(HAVE_IPV6) && !defined(NRL) +# define BGP_USE_ADDRINFO 1 +#else +# define BGP_USE_ADDRINFO 0 +#endif + /* BGP listening socket. */ struct bgp_listener { - int fd; - union sockunion su; - struct thread *thread; -}; - -/* - * Set MD5 key for the socket, for the given IPv4 peer address. - * If the password is NULL or zero-length, the option will be disabled. - */ -static int -bgp_md5_set_socket (int socket, union sockunion *su, const char *password) + bgp_listener next ; + struct qps_file qf ; + union sockunion su ; +} ; + +/* Get pointer to list base for listeners in the given address family. */ +static inline +bgp_listener* bgp_listeners(sa_family_t family) { - int ret = -1; - int en = ENOSYS; - - assert (socket >= 0); - -#if HAVE_DECL_TCP_MD5SIG - ret = sockopt_tcp_signature (socket, su, password); - en = errno; -#endif /* HAVE_TCP_MD5SIG */ - - if (ret < 0) - zlog (NULL, LOG_WARNING, "can't set TCP_MD5SIG option on socket %d: %s", - socket, safe_strerror (en)); + switch (family) + { + case AF_INET: + return &bgp_listeners_inet ; - return ret; +#ifdef HAVE_IPV6 + case AF_INET6: + return &bgp_listeners_inet6 ; + +#endif + default: + zabort("invalid address family") ; + } ; } -/* Helper for bgp_connect */ -static int -bgp_md5_set_connect (int socket, union sockunion *su, const char *password) +/* Forward reference */ +static int bgp_open_listeners_addrinfo(const char* address, + unsigned short port) ; +static int bgp_open_listeners_simple(const char* address, unsigned short port) ; +static int bgp_open_listener(sockunion su, unsigned short port, + int sock_type, int sock_protocol) ; + +/*------------------------------------------------------------------------------ + * Open Listeners. + * + * Using given address and port, get all possible addresses and set up a + * listener on each one. + * + * Accepts: address = NULL => any local address + * address = comma separated list of addresses + * + * NB: an empty address counts as "any local address", so: + * + * "80.177.246.130,80.177.246.131" -- will listen on those addresses. + * + * "80.177.246.130," -- will list on that address and + * any other local address. + * + * NB: only listens on AF_INET and AF_INET6 (if HAVE_IPV6). + * + * Returns: > 0 => OK -- number of listeners set up + * -1 => failed -- no listeners set up + */ +extern int +bgp_open_listeners(const char* address, unsigned short port) { - int ret = -1; + int count ; + bool done_null ; + char* copy ; + char* next ; + + if (address == NULL) + address = "" ; -#if HAVE_DECL_TCP_MD5SIG - if ( bgpd_privs.change (ZPRIVS_RAISE) ) + copy = XSTRDUP(MTYPE_TMP, address) ; + + done_null = false ; + next = copy ; + count = 0 ; + do { - zlog_err ("%s: could not raise privs", __func__); - return ret; - } - - ret = bgp_md5_set_socket (socket, su, password); + address = next ; + next = strchr(address, ',') ; - if (bgpd_privs.change (ZPRIVS_LOWER) ) - zlog_err ("%s: could not lower privs", __func__); -#endif /* HAVE_TCP_MD5SIG */ - - return ret; -} + if (next != NULL) + *next++ = '\0' ; /* replace ',' and step past */ -int -bgp_md5_set (struct peer *peer) -{ - struct listnode *node; - int ret = 0; - struct bgp_listener *listener; + if (*address == '\0') + { + if (done_null) + continue ; /* don't do "" more than once */ + else + done_null = true ; + } ; + + count += BGP_USE_ADDRINFO ? bgp_open_listeners_addrinfo(address, port) + : bgp_open_listeners_simple(address, port) ; + } while (next != NULL) ; - if ( bgpd_privs.change (ZPRIVS_RAISE) ) + XFREE(MTYPE_TMP, copy) ; + + if (count == 0) { - zlog_err ("%s: could not raise privs", __func__); + zlog_err ("%s: no usable addresses", __func__); return -1; } - - /* Just set the password on the listen socket(s). Outbound connections - * are taken care of in bgp_connect() below. - */ - for (ALL_LIST_ELEMENTS_RO(bm->listen_sockets, node, listener)) - if (listener->su.sa.sa_family == peer->su.sa.sa_family) - { - ret = bgp_md5_set_socket (listener->fd, &peer->su, peer->password); - break; - } - if (bgpd_privs.change (ZPRIVS_LOWER) ) - zlog_err ("%s: could not lower privs", __func__); - - return ret; -} - -/* Accept bgp connection. */ + return 0; +} ; + +/*------------------------------------------------------------------------------ + * Open listeners using getaddrinfo() to find the addresses. + * + * Note that this will accept names as well as numeric addresses. + * + * Returns: count of listeners opened successfully. + */ static int -bgp_accept (struct thread *thread) +bgp_open_listeners_addrinfo(const char* address, unsigned short port) { - int bgp_sock; - int accept_sock; - union sockunion su; - struct bgp_listener *listener = THREAD_ARG(thread); - struct peer *peer; - struct peer *peer1; - char buf[SU_ADDRSTRLEN]; - - /* Register accept thread. */ - accept_sock = THREAD_FD (thread); - if (accept_sock < 0) - { - zlog_err ("accept_sock is nevative value %d", accept_sock); - return -1; - } - listener->thread = thread_add_read (master, bgp_accept, listener, accept_sock); +#if BGP_USE_ADDRINFO + +# ifndef HAVE_IPV6 +# error Using getaddrinfo() but HAVE_IPV6 is not defined ?? +# endif - /* Accept client connection. */ - bgp_sock = sockunion_accept (accept_sock, &su); - if (bgp_sock < 0) + struct addrinfo *ainfo; + struct addrinfo *ainfo_save; + int ret, count; + char port_str[16]; + + static const struct addrinfo req = { + .ai_family = AF_UNSPEC, + .ai_flags = AI_PASSIVE, + .ai_socktype = SOCK_STREAM, + } ; + + snprintf (port_str, sizeof(port_str), "%d", port); + port_str[sizeof (port_str) - 1] = '\0'; + + if (*address == '\0') + address = NULL ; + + ret = getaddrinfo (address, port_str, &req, &ainfo_save); + if (ret != 0) { - zlog_err ("[Error] BGP socket accept failed (%s)", safe_strerror (errno)); - return -1; + zlog_err ("%s: getaddrinfo: %s", __func__, eaitoa(ret, errno, 0).str); + return 0 ; } - set_nonblocking (bgp_sock); - - if (BGP_DEBUG (events, EVENTS)) - zlog_debug ("[Event] BGP connection from host %s", inet_sutop (&su, buf)); - - /* Check remote IP address */ - peer1 = peer_lookup (NULL, &su); - if (! peer1 || peer1->status == Idle) + + count = 0; + for (ainfo = ainfo_save; ainfo; ainfo = ainfo->ai_next) { - if (BGP_DEBUG (events, EVENTS)) - { - if (! peer1) - zlog_debug ("[Event] BGP connection IP address %s is not configured", - inet_sutop (&su, buf)); - else - zlog_debug ("[Event] BGP connection IP address %s is Idle state", - inet_sutop (&su, buf)); - } - close (bgp_sock); - return -1; + union sockunion su ; + int err ; + + if ((ainfo->ai_family != AF_INET) && (ainfo->ai_family != AF_INET6)) + continue; + + sockunion_new_sockaddr(&su, ainfo->ai_addr) ; + err = bgp_open_listener(&su, port, + ainfo->ai_socktype, ainfo->ai_protocol) ; + if (err == 0) + ++count; } + freeaddrinfo (ainfo_save); - /* In case of peer is EBGP, we should set TTL for this connection. */ - if (peer_sort (peer1) == BGP_PEER_EBGP) { - sockopt_ttl (peer1->su.sa.sa_family, bgp_sock, peer1->ttl); - if (peer1->gtsm_hops) - sockopt_minttl (peer1->su.sa.sa_family, bgp_sock, MAXTTL + 1 - peer1->gtsm_hops); - } + return count ; + +#else + zabort("bgp_open_listeners_addrinfo not implemented") ; +#endif /* BGP_USE_ADDRINFO */ +} +/*------------------------------------------------------------------------------ + * Open listener the old fashioned way. + * + * NB: if address is "" tries IPv4 and IPv6 (if supported). + * + * NB: if address is not NULL, must be a numeric IP address (which may be IPv6 + * if that is supported). + * + * Returns: count of listeners opened successfully. + */ +static int +bgp_open_listeners_simple(const char* address, unsigned short port) +{ + union sockunion su ; + int err ; + int count ; - /* Make dummy peer until read Open packet. */ - if (BGP_DEBUG (events, EVENTS)) - zlog_debug ("[Event] Make dummy peer structure until read Open packet"); + /* If address is not null, must be a single, specific, numeric address */ + if (*address != '\0') + { + int ret = str2sockunion (address, &su) ; + if (ret < 0) + { + zlog_err("bgp_socket: could not parse ip address %s: %s", + address, errtoa(errno, 0).str); + return 0 ; + } - { - char buf[SU_ADDRSTRLEN + 1]; + err = bgp_open_listener(&su, port, SOCK_STREAM, 0) ; - peer = peer_create_accept (peer1->bgp); - SET_FLAG (peer->sflags, PEER_STATUS_ACCEPT_PEER); - peer->su = su; - peer->fd = bgp_sock; - peer->status = Active; - peer->local_id = peer1->local_id; - peer->v_holdtime = peer1->v_holdtime; - peer->v_keepalive = peer1->v_keepalive; + return (err == 0) ? 1 : 0 ; + } ; - /* Make peer's address string. */ - sockunion2str (&su, buf, SU_ADDRSTRLEN); - peer->host = XSTRDUP (MTYPE_BGP_PEER_HOST, buf); - } + /* Null address, try <any> for IPv4 and (if supported) IPv6 */ + count = 0 ; - BGP_EVENT_ADD (peer, TCP_connection_open); + sockunion_init_new(&su, AF_INET) ; + err = bgp_open_listener(&su, port, SOCK_STREAM, 0) ; + if (err == 0) + ++count ; - return 0; -} +#ifdef HAVE_IPV6 + sockunion_init_new(&su, AF_INET6) ; + err = bgp_open_listener(&su, port, SOCK_STREAM, 0) ; + if (err == 0) + ++count ; +#endif -/* BGP socket bind. */ + return count ; +} ; + +/*------------------------------------------------------------------------------ + * Open Listener Socket + * + * Sets up socket with the usual options. Binds to given address and listens. + * + * If all that is successful, creates bgp_listener, sets up qpselect file, adds + * to the BGP Engine selection and enables it for reading. + * + * Listener read events are handled by bgp_accept_action(). + * + * Returns: 0 : OK + * != 0 : error number (from errno or otherwise) + */ static int -bgp_bind (struct peer *peer) +bgp_open_listener(sockunion su, unsigned short port, + int sock_type, int sock_protocol) { -#ifdef SO_BINDTODEVICE - int ret; - struct ifreq ifreq; + bgp_listener listener ; + int ret, err ; + int slen ; + int sock_fd ; + + /* Construct socket and set the common options. */ + sock_fd = sockunion_socket(su, sock_type, sock_protocol) ; + if (sock_fd < 0) + { + err = errno ; + zlog_err ("%s: could not open socket for family %d: %s", __func__, + sockunion_family(su), errtoa(err, 0).str) ; + return errno = err ; + } - if (! peer->ifname) - return 0; + err = bgp_socket_set_common_options(sock_fd, su, NULL) ; - strncpy ((char *)&ifreq.ifr_name, peer->ifname, sizeof (ifreq.ifr_name)); + /* Want only IPV6 on ipv6 socket (not mapped addresses) + * + * This distinguishes 0.0.0.0 from :: -- without this, bind() will reject the + * attempt to bind to :: after binding to 0.0.0.0. + * + * Also, for all the apparent utility of IPv4-mapped addresses, the semantics + * are simpler if IPv6 sockets speak IPv6 and IPv4 sockets speak IPv4. + */ +#ifdef HAVE_IPV6 + if ((err == 0) && (sockunion_family(su) == AF_INET6)) + if (setsockopt_ipv6_v6only(sock_fd) < 0) + err = errno ; +#endif - if ( bgpd_privs.change (ZPRIVS_RAISE) ) - zlog_err ("bgp_bind: could not raise privs"); - - ret = setsockopt (peer->fd, SOL_SOCKET, SO_BINDTODEVICE, - &ifreq, sizeof (ifreq)); + /* Bind to port and address (if any) */ + if (err == 0) + { + if (bgpd_privs.change(ZPRIVS_RAISE)) + { + err = errno ; + zlog_err("%s: could not raise privs: %s", __func__, + errtoa(errno, 0).str) ; + } ; + + slen = sockunion_set_port(su, port) ; + + ret = bind(sock_fd, &su->sa, slen) ; + if (ret < 0) + { + err = errno ; + zlog_err ("%s: bind: %s", __func__, errtoa(err, 0).str); + } ; + + if (bgpd_privs.change(ZPRIVS_LOWER)) + { + if (err == 0) + err = errno ; + zlog_err("%s: could not lower privs: %s", __func__, + errtoa(errno, 0).str) ; + } ; + } ; + + /* Last lap... listen() */ + if (err == 0) + { + ret = listen (sock_fd, 43); + if (ret < 0) + { + err = errno ; + zlog_err ("%s: listen: %s", __func__, errtoa(err, 0).str) ; + } + } ; + + if (err != 0) + { + close(sock_fd) ; + return err ; + } ; - if (bgpd_privs.change (ZPRIVS_LOWER) ) - zlog_err ("bgp_bind: could not lower privs"); + /* Having successfully opened the listener, record it so that can be found + * again, add it to the BGP Engine Nexus file selection and enable it for + * reading. + */ + listener = XCALLOC(MTYPE_BGP_LISTENER, sizeof(struct bgp_listener)) ; - if (ret < 0) - { - zlog (peer->log, LOG_INFO, "bind to interface %s failed", peer->ifname); - return ret; - } -#endif /* SO_BINDTODEVICE */ - return 0; -} + qps_file_init_new(&listener->qf, NULL) ; + qps_add_file(bgp_nexus->selection, &listener->qf, sock_fd, listener) ; + qps_enable_mode(&listener->qf, qps_read_mnum, bgp_accept_action) ; -static int -bgp_bind_address (int sock, struct in_addr *addr) + sockunion_copy(&listener->su, su) ; + + listener->next = *bgp_listeners(sockunion_family(su)) ; + *bgp_listeners(sockunion_family(su)) = listener ; + + return 0 ; +} ; + +/*------------------------------------------------------------------------------ + * Close Listeners. + * + * Empty the listener lists, close files, remove from the selection. + * + */ +static void bgp_reset_listeners(bgp_listener* p_listener) ; + +extern void +bgp_close_listeners(void) { - int ret; - struct sockaddr_in local; - - memset (&local, 0, sizeof (struct sockaddr_in)); - local.sin_family = AF_INET; -#ifdef HAVE_STRUCT_SOCKADDR_IN_SIN_LEN - local.sin_len = sizeof(struct sockaddr_in); -#endif /* HAVE_STRUCT_SOCKADDR_IN_SIN_LEN */ - memcpy (&local.sin_addr, addr, sizeof (struct in_addr)); - - if ( bgpd_privs.change (ZPRIVS_RAISE) ) - zlog_err ("bgp_bind_address: could not raise privs"); - - ret = bind (sock, (struct sockaddr *)&local, sizeof (struct sockaddr_in)); - if (ret < 0) - ; - - if (bgpd_privs.change (ZPRIVS_LOWER) ) - zlog_err ("bgp_bind_address: could not lower privs"); - - return 0; -} + bgp_reset_listeners(bgp_listeners(AF_INET)) ; +#ifdef HAVE_IPV6 + bgp_reset_listeners(bgp_listeners(AF_INET6)) ; +#endif +} ; -static struct in_addr * -bgp_update_address (struct interface *ifp) +static void +bgp_reset_listeners(bgp_listener* p_listener) { - struct prefix_ipv4 *p; - struct connected *connected; - struct listnode *node; + bgp_listener listener ; + bgp_listener next ; + + next = *p_listener ; + *p_listener = NULL ; - for (ALL_LIST_ELEMENTS_RO (ifp->connected, node, connected)) + while (next != NULL) { - p = (struct prefix_ipv4 *) connected->address; + listener = next ; + next = listener->next ; + + close(qps_file_fd(&listener->qf)) ; + qps_remove_file(&listener->qf) ; + + XFREE(MTYPE_BGP_LISTENER, listener) ; + } ; +} ; + +/*------------------------------------------------------------------------------ + * Prepare to accept() connection + * + * If the session has a password, then this is where the listener(s) for the + * appropriate address family are told about the password. + * + * This is done shortly before the session is first enabled for accept(). + * + * The effect is (probably) that the peer's attempts to connect with MD5 signed + * packets will simply have been ignored up to this point. From this point + * forward they will be accepted, but closed until accept is enabled. + * + * NB: requires the session mutex LOCKED. + */ +extern void +bgp_prepare_to_accept(bgp_connection connection) +{ + int err ; - if (p->family == AF_INET) - return &p->prefix; - } - return NULL; -} + if (connection->session->password != NULL) + { + err = bgp_md5_set_listeners(connection->session->su_peer, + connection->session->password) ; + +/* TODO: failure to set password in bgp_prepare_to_accept ? */ + } ; + + return ; +} ; + +/*------------------------------------------------------------------------------ + * No longer prepared to accept() connection + * + * If the session has a password, then this is where it is withdrawn from the + * listener(s) for the appropriate address family. + * + * NB: requires the session mutex LOCKED. + */ +extern void +bgp_not_prepared_to_accept(bgp_connection connection) +{ + int err ; -/* Update source selection. */ + if (connection->session->password != NULL) + { + err = bgp_md5_set_listeners(connection->session->su_peer, NULL) ; + +/* TODO: failure to clear password in bgp_not_prepared_to_accept ? */ + } ; + + return ; +} ; + +/*------------------------------------------------------------------------------ + * Accept bgp connection -- this is the read action for qpselect. + * + * Accepts the connection, then checks to see whether the source is a configured + * peer, and if it is, whether currently accepting connections from that peer. + * + * If connection passes those tests, sets up the new listener connection for + * the session (including qpselect file), and kicks the FSM for that into life + * by generating a bgp_fsm_TCP_connection_open event. At this point the qfile + * is not enabled in any mode and no timers are running. + * + * NB: uses bgp_session_lookup() to find the session, so will lock and unlock + * its mutex. + * + * NB: locks and unlocks the session mutex. + * + * NB: does not set up connection unless all parts of the accept process + * succeed. + * + * Events and Errors: + * + * * if the accept() fails, log (err) the error and continue. + * + * Error is no associated with any connection or session. + * + * * if the connection is not acceptable, because: + * + * (a) peer is not configured + * (b) session not currently accepting connections (for whatever reason) + * + * log (debug) the event and continue. + * + * -- could Cease/Connection Rejected in most cases + * -- could Cease/Connection Collision Resolution in those cases + * + * * if the connection is acceptable, but fails in getting the remote/local + * addresses or in setting options + * + * report error on primary connection and generate bgp_fsm_TCP_fatal_error + * event. + * + * * if all goes well, generate bgp_fsm_TCP_connection_open either for the + * new (secondary) connection or for the primary. + * + * Sets connection->err to the error (if any). + */ static void -bgp_update_source (struct peer *peer) +bgp_accept_action(qps_file qf, void* file_info) { - struct interface *ifp; - struct in_addr *addr; - - /* Source is specified with interface name. */ - if (peer->update_if) + bgp_connection connection ; + union sockunion su_remote ; + union sockunion su_local ; + bool exists ; + int sock_fd ; + int err ; + + /* Accept client connection. + * + * We arrange for an IPv4 listener *and* an IPv6 one (assuming have IPv6), + * and we arrange for AF_INET6 listener to be IPV6_V6ONLY. This means that + * should NOT get an IPv4 mapped address. However, should we get such an + * address, the su_remote will be set to the actual IPv4 address. + * + * This means: the address family of su_remote is the address family of the + * underlying connection, NOT NECESSARILY the socket -- should that matter. + */ + sock_fd = sockunion_accept(qps_file_fd(qf), &su_remote) ; + if (sock_fd < 0) { - ifp = if_lookup_by_name (peer->update_if); - if (! ifp) - return; + err = errno ; + if (sock_fd == -1) + zlog_err("[Error] BGP socket accept failed (%s)", errtoa(err, 0).str) ; + return ; /* have no connection to report this to */ + } ; + + if (BGP_DEBUG(events, EVENTS)) + zlog_debug("[Event] BGP connection from host %s", sutoa(&su_remote).str) ; + + /* See if we are ready to accept connections from the connecting party */ + connection = bgp_peer_index_seek_accept(&su_remote, &exists) ; + if (connection == NULL) + { + if (BGP_DEBUG(events, EVENTS)) + zlog_debug(exists + ? "[Event] BGP accept IP address %s is not accepting" + : "[Event] BGP accept IP address %s is not configured", + sutoa(&su_remote).str) ; + close(sock_fd) ; + return ; /* quietly reject connection */ +/* TODO: RFC recommends sending a NOTIFICATION when refusing accept() */ + } ; + + /* Will accept the connection. + * + * Now need the session locked, 'cos are about to start a new connection. + * + * This is running in the BGP Engine thread, so cannot in any case be + * foxed by the other connection making changes. + * + * The session is active, so the Routing Engine will not make any changes + * except under the mutex, and will not destroy the session. + */ - addr = bgp_update_address (ifp); - if (! addr) - return; + BGP_CONNECTION_SESSION_LOCK(connection) ; /*<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<*/ - bgp_bind_address (peer->fd, addr); - } + /* Set the common socket options. + * Does not set password -- that is inherited from the listener. + * + * At this point, su_remote is the value returned by sockunion_accept(), so + * if we have an AF_INET6 socket with an IPv4 mapped address, then su_remote + * is an AF_INET. + */ + err = bgp_socket_set_common_options(sock_fd, &su_remote, connection) ; - /* Source is specified with IP address. */ - if (peer->update_source) - sockunion_bind (peer->fd, peer->update_source, 0, peer->update_source); -} + /* Get the local and remote addresses -- noting that IPv6 mapped IPv4 + * addresses are rendered as IPv4 addresses. + */ + if (err == 0) + err = bgp_get_names(sock_fd, &su_local, &su_remote) ; + + /* If all is well, set up the accept connection, and set it ready + * to go. Set session not to accept further inbound connections. + */ + if (err == 0) + bgp_connection_open(connection, sock_fd) ; + else + close(sock_fd) ; + + BGP_CONNECTION_SESSION_UNLOCK(connection) ; /*>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>*/ -/* BGP try to connect to the peer. */ -int -bgp_connect (struct peer *peer) + /* Now kick the FSM in an appropriate fashion */ + bgp_fsm_connect_completed(connection, err, &su_local, &su_remote) ; +} ; + +/*============================================================================== + * Open BGP Connection -- connect() to the other end + */ + +static int bgp_md5_set_socket(int sock_fd, sockunion su, const char *password) ; +static int bgp_bind_ifname(bgp_connection connection, int sock_fd) ; +static int bgp_bind_ifaddress(bgp_connection connection, int sock_fd) ; + +/*------------------------------------------------------------------------------ + * Open BGP Connection -- connect() to the other end + * + * Creates a *non-blocking* socket. + * + * If fails immediately, generate suitable FSM event -- setting connection->err. + * + * Success (immediate or otherwise) and delayed failure are dealt with in the + * qpselect action -- bgp_connect_action() -- below. + * + * NB: requires the session mutex LOCKED. + */ +extern void +bgp_open_connect(bgp_connection connection) { - unsigned int ifindex = 0; - - /* Make socket for the peer. */ - peer->fd = sockunion_socket (&peer->su); - if (peer->fd < 0) - return -1; - - /* If we can get socket for the peer, adjest TTL and make connection. */ - if (peer_sort (peer) == BGP_PEER_EBGP) { - sockopt_ttl (peer->su.sa.sa_family, peer->fd, peer->ttl); - if (peer->gtsm_hops) - sockopt_minttl (peer->su.sa.sa_family, peer->fd, MAXTTL + 1 - peer->gtsm_hops); - } - - sockopt_reuseaddr (peer->fd); - sockopt_reuseport (peer->fd); - -#ifdef IPTOS_PREC_INTERNETCONTROL - if (sockunion_family (&peer->su) == AF_INET) - setsockopt_ipv4_tos (peer->fd, IPTOS_PREC_INTERNETCONTROL); -#endif + int sock_fd ; + int err ; - if (peer->password) - bgp_md5_set_connect (peer->fd, &peer->su, peer->password); + sockunion su = connection->session->su_peer ; - /* Bind socket. */ - bgp_bind (peer); + err = 0 ; - /* Update source bind. */ - bgp_update_source (peer); + /* Make socket for the connect connection. */ + sock_fd = sockunion_socket(su, SOCK_STREAM, 0) ; + if (sock_fd < 0) + err = errno ; -#ifdef HAVE_IPV6 - if (peer->ifname) - ifindex = if_nametoindex (peer->ifname); -#endif /* HAVE_IPV6 */ + if (BGP_DEBUG(events, EVENTS)) + plog_debug(connection->log, "%s [Event] Connect start to %s socket %d%s", + connection->host, connection->host, sock_fd, + (sock_fd < 0) ? " -- failed" : "" ) ; - if (BGP_DEBUG (events, EVENTS)) - plog_debug (peer->log, "%s [Event] Connect start to %s fd %d", - peer->host, peer->host, peer->fd); + /* Set the common options. */ + if (err == 0) + err = bgp_socket_set_common_options(sock_fd, su, connection) ; - /* Connect to the remote peer. */ - return sockunion_connect (peer->fd, &peer->su, htons (peer->port), ifindex); -} + /* Set the TCP MD5 "password", if required. */ + if (err== 0) + if (connection->session->password != NULL) + err = bgp_md5_set_socket(sock_fd, su, connection->session->password) ; -/* After TCP connection is established. Get local address and port. */ -void -bgp_getsockname (struct peer *peer) -{ - if (peer->su_local) + /* Bind socket. */ + if (err == 0) + err = bgp_bind_ifname(connection, sock_fd) ; + + /* Update source bind. */ + if (err == 0) + err = bgp_bind_ifaddress(connection, sock_fd) ; + + /* Connect to the remote peer. */ + if (err == 0) { - sockunion_free (peer->su_local); - peer->su_local = NULL; - } + int ret ; + ret = sockunion_connect(sock_fd, su, connection->session->port, + connection->session->ifindex) ; + /* does not report EINPROGRESS as an error. */ + if (ret < 0) + err = errno ; + } ; + + /* If not OK now, close the sock_fd and signal the error */ + + if (err != 0) + { + if (sock_fd >= 0) + close(sock_fd) ; + + bgp_fsm_connect_completed(connection, err, NULL, NULL) ; + + return ; + } ; + + /* Set connection waiting for connection to complete. + * + * The file is then enabled for both read and write: + * + * if succeeds: will become writable (may also be readable if data turns + * up immediately). + * if fails: will become readable (may also become writable) + * + * Generally, expect it to be a while before the sock_fd becomes readable or + * writable. But for local connections this may happen immediately. But, + * in any case, this will be handled by the qpselect action. + */ - if (peer->su_remote) + bgp_connection_open(connection, sock_fd) ; + + qps_enable_mode(connection->qf, qps_read_mnum, bgp_connect_action) ; + qps_enable_mode(connection->qf, qps_write_mnum, bgp_connect_action) ; + + return ; +} ; + +/*------------------------------------------------------------------------------ + * Complete non-blocking bgp connect() -- this is the read and write action for + * qpselect. + * + * If the connection succeeds, expect the socket to become writable. May also + * become readable if data arrives immediately. + * + * If the connection fails, expect the socket to become readable. May also + * become writable. + * + * Either way, use getsockopt() to extract any error condition. + * + * If becomes both readable and writable at the same time, then the first to + * arrive here will disable the file for both read and write, which will + * discard the other pending event -- so will not attempt to do this more than + * once. + * + * NB: does not require the session mutex. + * + * Events and Errors: + * + * * if has succeeded, generate a bgp_fsm_TCP_connection_open event. + * + * At this point the qfile is not enabled in any mode.. + * + * * if has failed, generate: + * + * * bgp_fsm_TCP_connection_open_failed event + * + * for "soft" errors. + * + * * bgp_fsm_TCP_fatal_error event + * + * for "hard" errors. + * + * Sets connection->err to the error (if any). + */ +static void +bgp_connect_action(qps_file qf, void* file_info) +{ + bgp_connection connection ; + int ret, err ; + socklen_t len ; + union sockunion su_remote ; + union sockunion su_local ; + + connection = file_info ; + + /* See if connection successful or not. */ + /* If successful, set the connection->su_local and ->su_remote */ + + len = sizeof(err) ; + err = 0 ; + ret = getsockopt(qps_file_fd(qf), SOL_SOCKET, SO_ERROR, &err, &len) ; + if (ret != 0) { - sockunion_free (peer->su_remote); - peer->su_remote = NULL; + err = errno ; + if (err == 0) /* cannot be and cannot continue */ + zabort("Invalid return from getsockopt()") ; } + else + { + if (len != sizeof(err)) + zabort("getsockopt returned unexpected length") ; + } ; - peer->su_local = sockunion_getsockname (peer->fd); - peer->su_remote = sockunion_getpeername (peer->fd); + if (err == 0) + err = bgp_get_names(qps_file_fd(qf), &su_local, &su_remote) ; - bgp_nexthop_set (peer->su_local, peer->su_remote, &peer->nexthop, peer); -} + /* In any case, disable both read and write for this file. */ + qps_disable_modes(qf, qps_write_mbit | qps_read_mbit) ; + /* Now kick the FSM in an appropriate fashion */ + bgp_fsm_connect_completed(connection, err, &su_local, &su_remote) ; +} ; +/*============================================================================== + * Set the TTL for the given connection (if any), if there is an sock_fd. + */ +extern void +bgp_set_new_ttl(bgp_connection connection, int ttl, bool gtsm) +{ + int sock_fd ; + + if (connection == NULL) + return ; + + sock_fd = qps_file_fd(connection->qf) ; + if (sock_fd < 0) + return ; + + bgp_set_ttl(sock_fd, connection, ttl, gtsm) ; +} ; + +/*------------------------------------------------------------------------------ + * BGP set minttl (GTSM) and/or ttl. + * + * A ttl of <= 0 is treated as "turn off" -- effectively MAXTTL, forcing gtsm + * *off*. + * + * If GTSM is not supported, then sets ttl. + * + * Returns: 0 : OK (so far so good) + * != 0 : error number (from errno or otherwise) + */ static int -bgp_listener (int sock, struct sockaddr *sa, socklen_t salen) +bgp_set_ttl(int sock_fd, bgp_connection connection, int ttl, bool gtsm) { - struct bgp_listener *listener; - int ret, en; + int ret ; - sockopt_reuseaddr (sock); - sockopt_reuseport (sock); + if (gtsm && (ttl > 0)) + { + ret = setsockopt_minttl(sock_fd, ttl) ; + + if (ret >= 0) + { + ttl = MAXTTL ; + connection->gtsm = true ; + } + else if (errno != EOPNOTSUPP) + return errno ; + } + else if (connection->gtsm) + { + ret = setsockopt_minttl(sock_fd, 0) ; /* turn off */ -#ifdef IPTOS_PREC_INTERNETCONTROL - if (sa->sa_family == AF_INET) - setsockopt_ipv4_tos (sock, IPTOS_PREC_INTERNETCONTROL); -#endif + if (ret < 0) /* must have turned it on, so should not fail */ + return errno ; -#ifdef IPV6_V6ONLY - /* Want only IPV6 on ipv6 socket (not mapped addresses) */ - if (sa->sa_family == AF_INET6) { - int on = 1; - setsockopt (sock, IPPROTO_IPV6, IPV6_V6ONLY, - (void *) &on, sizeof (on)); - } -#endif + connection->gtsm = false ; + } ; - if (bgpd_privs.change (ZPRIVS_RAISE) ) - zlog_err ("bgp_socket: could not raise privs"); + ret = setsockopt_ttl(sock_fd, ttl) ; - ret = bind (sock, sa, salen); - en = errno; - if (bgpd_privs.change (ZPRIVS_LOWER) ) - zlog_err ("bgp_bind_address: could not lower privs"); + return (ret >= 0) ? 0 : errno ; +} ; - if (ret < 0) - { - zlog_err ("bind: %s", safe_strerror (en)); - return ret; - } +/*============================================================================== + * Get local and remote address and port for connection. + * + * Returns: 0 => OK + * != 0 : error number (from errno or otherwise) + */ +static int +bgp_get_names(int sock_fd, union sockunion* su_local, + union sockunion* su_remote) +{ + int ret, err ; - ret = listen (sock, 3); + err = 0 ; + + ret = sockunion_getsockname(sock_fd, su_local) ; if (ret < 0) - { - zlog_err ("listen: %s", safe_strerror (errno)); - return ret; - } + err = errno ; - listener = XMALLOC (MTYPE_BGP_LISTENER, sizeof(*listener)); - listener->fd = sock; - memcpy(&listener->su, sa, salen); - listener->thread = thread_add_read (master, bgp_accept, listener, sock); - listnode_add (bm->listen_sockets, listener); + ret = sockunion_getpeername(sock_fd, su_remote) ; + if ((ret < 0) && (err == 0)) + err = errno ; - return 0; -} + return err ; +} ; + +/*============================================================================== + * Specific binding of outbound connections to interfaces... + * + */ -/* IPv6 supported version of BGP server socket setup. */ -#if defined (HAVE_IPV6) && ! defined (NRL) -int -bgp_socket (unsigned short port, const char *address) +/*------------------------------------------------------------------------------ + * BGP socket bind. + * + * If there is a specific interface to bind an outbound connection to, that + * is done here. + * + * Returns: 0 : OK (so far so good) + * != 0 : error number (from errno or otherwise) + */ +static int +bgp_bind_ifname(bgp_connection connection, int sock_fd) { - struct addrinfo *ainfo; - struct addrinfo *ainfo_save; - static const struct addrinfo req = { - .ai_family = AF_UNSPEC, - .ai_flags = AI_PASSIVE, - .ai_socktype = SOCK_STREAM, - }; - int ret, count; - char port_str[BUFSIZ]; +#ifdef SO_BINDTODEVICE + int ret, err ; + struct ifreq ifreq; - snprintf (port_str, sizeof(port_str), "%d", port); - port_str[sizeof (port_str) - 1] = '\0'; + if (connection->session->ifname == NULL) + return 0; - ret = getaddrinfo (address, port_str, &req, &ainfo_save); - if (ret != 0) + strncpy ((char *)&ifreq.ifr_name, connection->session->ifname, + sizeof (ifreq.ifr_name)) ; + + err = 0 ; + if (bgpd_privs.change (ZPRIVS_RAISE)) { - zlog_err ("getaddrinfo: %s", gai_strerror (ret)); - return -1; - } + err = errno ; + zlog_err ("bgp_bind: could not raise privs: %s", errtoa(errno, 0).str); + } ; - count = 0; - for (ainfo = ainfo_save; ainfo; ainfo = ainfo->ai_next) + ret = setsockopt (sock_fd, SOL_SOCKET, SO_BINDTODEVICE, + &ifreq, sizeof (ifreq)) ; + if (ret < 0) + err = errno ; + + if (bgpd_privs.change (ZPRIVS_LOWER) ) { - int sock; - - if (ainfo->ai_family != AF_INET && ainfo->ai_family != AF_INET6) - continue; - - sock = socket (ainfo->ai_family, ainfo->ai_socktype, ainfo->ai_protocol); - if (sock < 0) - { - zlog_err ("socket: %s", safe_strerror (errno)); - continue; - } - - /* if we intend to implement ttl-security, this socket needs ttl=255 */ - sockopt_ttl (ainfo->ai_family, sock, MAXTTL); - - ret = bgp_listener (sock, ainfo->ai_addr, ainfo->ai_addrlen); - if (ret == 0) - ++count; - else - close(sock); - } - freeaddrinfo (ainfo_save); - if (count == 0) + if (err == 0) + err = errno ; + zlog_err ("bgp_bind: could not lower privs: %s", errtoa(errno, 0).str); + } ; + + if (err != 0) { - zlog_err ("%s: no usable addresses", __func__); - return -1; + zlog (connection->log, LOG_INFO, "bind to interface %s failed (%s)", + connection->session->ifname, errtoa(err, 0).str) ; + return err ; } - +#endif /* SO_BINDTODEVICE */ return 0; -} -#else -/* Traditional IPv4 only version. */ -int -bgp_socket (unsigned short port, const char *address) +} ; + +/*------------------------------------------------------------------------------ + * Update source selection -- if connection specifies an IP address. + * + * If required, tries to bind the given socket to the given address. + * + * Returns: 0 : OK (so far so good) + * != 0 : error number (from errno or otherwise) + */ +static int +bgp_bind_ifaddress(bgp_connection connection, int sock_fd) { - int sock; - int socklen; - struct sockaddr_in sin; - int ret, en; - - sock = socket (AF_INET, SOCK_STREAM, 0); - if (sock < 0) + if (connection->session->ifaddress != NULL) { - zlog_err ("socket: %s", safe_strerror (errno)); - return sock; - } + union sockunion su[1] ; + int ret ; - /* if we intend to implement ttl-security, this socket needs ttl=255 */ - sockopt_ttl (AF_INET, sock, MAXTTL); + sockunion_new_sockaddr(su, &connection->session->ifaddress->sa) ; + ret = sockunion_bind (sock_fd, su, 0, false) ; - memset (&sin, 0, sizeof (struct sockaddr_in)); - sin.sin_family = AF_INET; - sin.sin_port = htons (port); - socklen = sizeof (struct sockaddr_in); + if (ret < 0) + return errno ; + } ; + return 0 ; +} ; - if (address && ((ret = inet_aton(address, &sin.sin_addr)) < 1)) +/*============================================================================== + * BGP Socket Option handling + */ + +/*------------------------------------------------------------------------------ + * Common socket options: + * + * * non-blocking -- at all times + * * reuseaddr + * * reuseport + * * set security ttl (GTSM) and/or ttl -- if connection given. + * * for IPv4, set TOS if required + * + * These options are set on all sockets: listen/connect/accept + * (except either form of ttl, which is not set on listen). + * + * Note that the family of the given sockunion is the *protocol*, not the + * *socket* family. + * + * Returns: 0 => OK + * != 0 == errno -- not that we really expect any errors here + */ +static int +bgp_socket_set_common_options(int sock_fd, union sockunion* su, + bgp_connection connection) +{ + int val ; + + /* Make socket non-blocking */ + val = fcntl(sock_fd, F_GETFL, 0) ; + if (val != -1) /* POSIX says "return value is not negative" */ + val = fcntl(sock_fd, F_SETFL, val | O_NONBLOCK) ; + if (val == -1) + return errno ; + + /* Reuse addr and port */ + if (setsockopt_reuseaddr(sock_fd) < 0) + return errno ; + if (setsockopt_reuseport(sock_fd) < 0) + return errno ; + + /* Adjust ttl if required */ + if (connection != NULL) { - zlog_err("bgp_socket: could not parse ip address %s: %s", - address, safe_strerror (errno)); - return ret; - } -#ifdef HAVE_STRUCT_SOCKADDR_IN_SIN_LEN - sin.sin_len = socklen; -#endif /* HAVE_STRUCT_SOCKADDR_IN_SIN_LEN */ + int err ; + err = bgp_set_ttl(sock_fd, connection, connection->session->ttl, + connection->session->gtsm) ; + if (err != 0) + return err ; + } ; + +#ifdef IPTOS_PREC_INTERNETCONTROL + /* set IPPROTO_IP/IP_TOS -- if is AF_INET + * + * We assume that if the socket is an AF_INET6 with an IPv4 mapped address, + * then can still set IP_PROTOCOL/IP_TOS. + */ + if (sockunion_family(su) == AF_INET) + if (setsockopt_ipv4_tos(sock_fd, IPTOS_PREC_INTERNETCONTROL) < 0) + return errno ; +#endif + + return 0 ; +} ; + +/*------------------------------------------------------------------------------ + * Set (or clear) MD5 key for the socket, for the given IPv4 peer address. + * + * If the password is NULL or zero-length, the option will be disabled. + * + * Returns: 0 => OK + * otherwise: errno + * + * NB: if MD5 is not supported, returns EOPNOTSUPP error (but it should not + * come to this !). + * + * NB: has to change up privileges, which can fail (if things are badly set up) + */ +static int +bgp_md5_set_socket(int sock_fd, sockunion su, const char *password) +{ + int err, ret ; - ret = bgp_listener (sock, (struct sockaddr *) &sin, socklen); - if (ret < 0) + assert(sock_fd >= 0) ; + + err = 0 ; + + if (bgpd_privs.change(ZPRIVS_RAISE)) { - close (sock); - return ret; - } - return sock; -} -#endif /* HAVE_IPV6 && !NRL */ + err = errno ; + zlog_err("%s: could not raise privs: %s", __func__, errtoa(errno, 0).str); + } ; -void -bgp_close (void) + ret = setsockopt_tcp_signature(sock_fd, su, password) ; + + if (ret != 0) + err = errno ; + + if (bgpd_privs.change(ZPRIVS_LOWER)) + { + if (err == 0) + err = errno ; + zlog_err("%s: could not lower privs: %s", __func__, errtoa(errno, 0).str); + } ; + return err ; +} ; + +/*------------------------------------------------------------------------------ + * Set (or clear) MD5 password for given peer in the listener(s) for the peer's + * address family. + * + * This allows system to accept MD5 "signed" incoming connections from the + * given address. + * + * NULL password clears the password for the given peer. + * + * Returns: 0 => OK + * otherwise: errno -- the first error encountered. + * + * NB: peer address must be AF_INET or (if supported) AF_INET6 + * + * NB: does nothing and returns "OK" if there are no listeners in the + * address family -- wanting to set MD5 makes no difference to this ! + */ +static int +bgp_md5_set_listeners(union sockunion* su, const char* password) { - struct listnode *node, *next; - struct bgp_listener *listener; + bgp_listener listener ; + int err ; - for (ALL_LIST_ELEMENTS (bm->listen_sockets, node, next, listener)) +#ifdef HAVE_IPV6 + assert((su->sa.sa_family == AF_INET) || (su->sa.sa_family == AF_INET6)) ; +#else + assert(su->sa.sa_family == AF_INET) ; +#endif + + listener = *bgp_listeners(su->sa.sa_family) ; + + while (listener != NULL) { - thread_cancel (listener->thread); - close (listener->fd); - listnode_delete (bm->listen_sockets, listener); - XFREE (MTYPE_BGP_LISTENER, listener); - } -} + err = bgp_md5_set_socket(qps_file_fd(&listener->qf), su, password) ; + if (err != 0) + return err ; + listener = listener->next ; + } ; + + return 0 ; +} ; + |