diff options
-rw-r--r-- | bgpd/bgp_common.h | 6 | ||||
-rw-r--r-- | bgpd/bgp_connection.c | 87 | ||||
-rw-r--r-- | bgpd/bgp_connection.h | 26 | ||||
-rw-r--r-- | bgpd/bgp_fsm.c | 278 | ||||
-rw-r--r-- | bgpd/bgp_fsm.h | 100 | ||||
-rw-r--r-- | bgpd/bgp_msg_read.c | 16 | ||||
-rw-r--r-- | bgpd/bgp_msg_read.h | 2 | ||||
-rw-r--r-- | bgpd/bgp_notification.c | 6 | ||||
-rw-r--r-- | bgpd/bgp_notification.h | 2 | ||||
-rw-r--r-- | bgpd/bgp_peer.c | 85 | ||||
-rw-r--r-- | bgpd/bgp_peer.h | 33 | ||||
-rw-r--r-- | bgpd/bgp_session.c | 76 | ||||
-rw-r--r-- | bgpd/bgp_session.h | 40 | ||||
-rw-r--r-- | lib/prefix.c | 54 | ||||
-rw-r--r-- | lib/prefix.h | 2 |
15 files changed, 421 insertions, 392 deletions
diff --git a/bgpd/bgp_common.h b/bgpd/bgp_common.h index 6ff74c50..f6f06446 100644 --- a/bgpd/bgp_common.h +++ b/bgpd/bgp_common.h @@ -117,11 +117,11 @@ enum bgp_session_events bgp_session_eFSM_error, /* unexpected BGP message received */ bgp_session_eNOM_recv, /* NOTIFICATION message received */ - bgp_session_eTCP_failed, /* TCP connection failed to come up */ + bgp_session_eExpired, /* HoldTime expired */ bgp_session_eTCP_dropped, /* TCP connection dropped */ - bgp_session_eTCP_error, /* some socket level error */ - bgp_session_eExpired, /* HoldTime expired */ + bgp_session_eTCP_failed, /* TCP connection failed to come up */ + bgp_session_eTCP_error, /* some socket level error */ bgp_session_eInvalid, /* invalid internal event */ diff --git a/bgpd/bgp_connection.c b/bgpd/bgp_connection.c index b74b8cef..1f78e8fd 100644 --- a/bgpd/bgp_connection.c +++ b/bgpd/bgp_connection.c @@ -46,10 +46,10 @@ * * input/output buffers and I/O management * * timers to support the above * - * Each BGP Session is associated with at most two BGP Connections. The second - * connection exists only if a connect and a listen connection is made while - * a session is starting up, and one will be dropped before either connection - * reaches Established state. + * Each BGP Session is associated with at most two BGP Connections, a primary + * and a secondary. The primary starts as the connect() connection, and the + * secondary as the acccept(). One will be dropped before either connection + * reaches sEstablished state, and the remaining connection becomes the primary. * * The bgp_connection structure is private to the BGP Engine, and is accessed * directly, without the need for any mutex. @@ -57,34 +57,37 @@ * Each connection is closely tied to its parent bgp_session. The bgp_session * is shared between the Routeing Engine and the BGP Engine, and therefore * access is subject to the bgp_session's mutex. - * */ /*============================================================================== - * The connection queue. + * The connection queue and the connection's pending queue. + * + * When it is no longer possible to write the the connection's write buffer, + * any mqueue messages that cannot be dealt with are queued on the connection's + * pending queue. So when the BGP Engine's mqueue is processed, the messages + * are either dealt with, or queued in the relevant connection. * * When the connection's write buffer empties, the connection is placed on the - * connection queue. + * BGP Engine's connection queue. * * The connection queue is processed as the highest priority action in the - * BGP Engine, at which point as many of the items on the connection's + * BGP Engine, at which point as many of the items on each connection's * pending queue as possible will be processed. * * The connection_queue is managed as a circular list of connections. The * connection_queue variable points at the next to be processed. - * */ -static bgp_connection bgp_connection_queue ; +static bgp_connection bgp_connection_queue ; /* BGP Engine connection queue */ /*============================================================================== * Managing bgp_connection stuctures. */ static const char* bgp_connection_tags[] = - { - [bgp_connection_primary] = "(primary)", - [bgp_connection_secondary] = "(secondary)", - } ; +{ + [bgp_connection_primary] = "(primary)", + [bgp_connection_secondary] = "(secondary)", +} ; static void bgp_connection_init_host(bgp_connection connection, const char* tag) ; @@ -94,7 +97,7 @@ static void bgp_write_buffer_free(bgp_wbuffer wb) ; /*------------------------------------------------------------------------------ * Initialise connection structure -- allocate if required. * - * + * Copies information required by the connection from the parent session. * * NB: requires the session LOCKED */ @@ -284,8 +287,8 @@ bgp_connection_make_primary(bgp_connection connection) session->route_refresh_pre = connection->route_refresh ; session->orf_prefix_pre = connection->orf_prefix ; - sockunion_set_mov(&session->su_local, &connection->su_local) ; - sockunion_set_mov(&session->su_remote, &connection->su_remote) ; + sockunion_set_dup(&session->su_local, connection->su_local) ; + sockunion_set_dup(&session->su_remote, connection->su_remote) ; } ; /*------------------------------------------------------------------------------ @@ -304,7 +307,7 @@ bgp_connection_make_primary(bgp_connection connection) extern void bgp_connection_exit(bgp_connection connection) { - bgp_connection_close(connection, 1) ; /* make sure */ + bgp_connection_close_down(connection) ; /* make sure */ assert(connection->state == bgp_fsm_sStopping) ; @@ -316,20 +319,20 @@ bgp_connection_exit(bgp_connection connection) * * Connection must be Stopping -- no longer attached to a session. * - * - * - * + * This is done in the BGP Engine connection queue handling -- so that the + * structure is reaped once there is no chance of any dangling pointers to it. */ static void bgp_connection_free(bgp_connection connection) { - assert( (connection->state == bgp_fsm_sStopping) && - (connection->session == NULL) ) ; + assert( (connection->state == bgp_fsm_sStopping) + && (connection->session == NULL) + && (connection->lock_count == 0) ) ; /* Make sure is closed, so no active file, no timers, pending queue is empty, * not on the connection queue, etc. */ - bgp_connection_close(connection, 1) ; + bgp_connection_close_down(connection) ; /* Free any components which still exist */ bgp_notify_unset(&connection->notification) ; @@ -438,9 +441,11 @@ bgp_connection_queue_del(bgp_connection connection) /*------------------------------------------------------------------------------ * Process the connection queue until it becomes empty. * - * Process each item until its pending queue becomes empty, or its write - * buffer becomes full, or it is stopped. + * Process each connection in turn, dealing with one item on each one's pending + * queue. Dealing with the item will either remove it from the connection's + * pending queue (success) or remove connection from the pending queue. * + * This is also where connections come to die. */ extern void bgp_connection_queue_process(void) @@ -486,8 +491,11 @@ bgp_connection_queue_process(void) * If mqb is not already pending, add it at the tail and mark it pending. * * If is already pending, then is being put back onto the queue, so put it - * at the head, and remove the connection from the connection queue -- there - * is nothing more to be done for the connection for the time being. + * at the head. + * + * In any case, remove the connection from the BGP Engine connection queue (if + * there) -- there is nothing more to be done for the connection for the time + * being. */ extern void bgp_connection_add_pending(bgp_connection connection, mqueue_block mqb, @@ -502,8 +510,9 @@ bgp_connection_add_pending(bgp_connection connection, mqueue_block mqb, { dassert(*is_pending == connection) ; mqueue_local_enqueue_head(&connection->pending_queue, mqb) ; - bgp_connection_queue_del(connection) ; } ; + + bgp_connection_queue_del(connection) ; } ; /*============================================================================== @@ -522,7 +531,7 @@ bgp_connection_add_pending(bgp_connection connection, mqueue_block mqb, * * Sets: * - * * if accept() clears the session accept flag + * * if secondary connection, turn off accept() * * sets the qfile and fd ready for use * * clears except, notification and err * * discards any open_state and notification @@ -549,7 +558,7 @@ bgp_connection_open(bgp_connection connection, int fd) /* Make sure that there is no file and that buffers are clear, etc. */ /* If this is the secondary connection, do not accept any more. */ - bgp_connection_close(connection, 0) ; + bgp_connection_close(connection) ; /* FSM deals with timers */ /* Set the file going */ qps_add_file(bgp_nexus->selection, &connection->qf, fd, connection) ; @@ -607,7 +616,7 @@ bgp_connection_disable_accept(bgp_connection connection) * * * state of the connection * * links to and from the session - * * the timers remain initialised (but unset) + * * the timers remain initialised (but may have been unset) * * the buffers remain (but reset) * * logging and host string * * any open_state that has been received @@ -616,14 +625,14 @@ bgp_connection_disable_accept(bgp_connection connection) * * Once closed, the only further possible actions are: * - * * bgp_connection_open() -- to retry connection + * * bgp_connection_open() -- to retry connection * - * * bgp_connection_free() -- to finally discard + * * bgp_connection_free() -- to finally discard * - * * bgp_connection_close() -- can do this again + * * bgp_connection_full_close() -- can do this again */ extern void -bgp_connection_close(bgp_connection connection, int unset_timers) +bgp_connection_full_close(bgp_connection connection, int unset_timers) { int fd ; @@ -712,12 +721,13 @@ bgp_connection_part_close(bgp_connection connection) if (wb->p_in != wb->p_out) /* will be equal if buffer is empty */ { + passert(wb->p_out < wb->p_in) ; mlen = 0 ; p = wb->base ; do /* Advance p until p + mlen > wb->p_out */ { p += mlen ; - mlen = bgp_msg_get_mlen(p) ; + mlen = bgp_msg_get_mlen(p, wb->p_in) ; /* checks pointers */ } while ((p + mlen) <= wb->p_out) ; if (p == wb->p_out) @@ -853,8 +863,7 @@ bgp_connection_write_direct(bgp_connection connection, struct stream* s) * -- if notification is pending, then generate a notification sent event * * -- otherwise: place connection on the connection queue, so can start to - * flush out anything on the connection's pending queue and/or send an - * XON message to the Peering Engine. + * flush out anything on the connection's pending queue. * * If empty out everything, disable write mode. * diff --git a/bgpd/bgp_connection.h b/bgpd/bgp_connection.h index c620de55..4a0e7f5d 100644 --- a/bgpd/bgp_connection.h +++ b/bgpd/bgp_connection.h @@ -99,7 +99,9 @@ enum bgp_fsm_events * */ -/* NB: p_out == p_in => buffer is empty +/* Write buffer for connection. + * + * NB: p_out == p_in => buffer is empty * * BUT: buffer is not allocated until required, and until then * p_out == p_in == NULL -- empty does NOT imply usable ! @@ -131,7 +133,7 @@ struct bgp_connection /* NULL if connection stopping */ qpt_mutex p_mutex ; /* session mutex* */ /* (avoids incomplete type issue) */ - unsigned lock_count ; /* session lock count */ + unsigned lock_count ; /* session mutex lock count */ bgp_connection_ord_t ordinal ; /* primary/secondary connection */ int accepted ; /* came via accept() */ @@ -142,10 +144,10 @@ struct bgp_connection bgp_connection next ; /* for the connection queue */ bgp_connection prev ; /* NULL <=> not on the queue */ - int fsm_active ; /* active in fsm count */ - bgp_fsm_event_t post ; /* event raised within FSM */ + int fsm_active ; /* active in FSM counter */ + bgp_fsm_event_t follow_on ; /* event raised within FSM */ - bgp_session_event_t except ; /* exception */ + bgp_session_event_t except ; /* exception posted here */ bgp_notify notification ; /* if any sent/received */ int err ; /* erno, if any */ @@ -189,7 +191,7 @@ struct bgp_connection } ; /*============================================================================== - * + * The functions */ extern bgp_connection @@ -211,7 +213,10 @@ extern void bgp_connection_make_primary(bgp_connection connection) ; extern void -bgp_connection_close(bgp_connection connection, int unset_timers) ; +bgp_connection_full_close(bgp_connection connection, int unset_timers) ; + +#define bgp_connection_close(conn) bgp_connection_full_close(conn, 0) +#define bgp_connection_close_down(conn) bgp_connection_full_close(conn, 1) extern void bgp_connection_part_close(bgp_connection connection) ; @@ -298,13 +303,6 @@ bgp_connection_write_empty(bgp_connection connection) } ; /*============================================================================== - * Access functions via bgp_connection for bgp_session attributes. - * - * - * - */ - -/*============================================================================== * Locking the session associated with the connection. * * This is slightly complicated by the fact that when the connection is in diff --git a/bgpd/bgp_fsm.c b/bgpd/bgp_fsm.c index 68bb0282..46bd3569 100644 --- a/bgpd/bgp_fsm.c +++ b/bgpd/bgp_fsm.c @@ -55,13 +55,19 @@ * * In some cases the action routine may override the the default new state. * + * In general the FSM manages connections, but there is some interaction with + * the session. In particular, exceptions are expressed as session_eXXX + * values -- which are passed to the Peering Engine as session events. The + * handling of FSM events is depends mostly on the FSM state, but any + * exception influences that too. + * * When a new state is entered, bgp_fsm_state_change() is called to complete * the transition (in particular to set/unset timers). * * The fsm action functions are called with the session locked. * *------------------------------------------------------------------------------ - * FSM "events" + * FSM "events" and Session "exceptions". * * These are raised when: * @@ -73,7 +79,12 @@ * * * timers go off * - * and the mechanism is to call bgp_fsm_event(). + * FSM events are raised by calling bgp_fsm_event(). A number of events are + * associated with session exceptions -- the exception is posted in the + * connection and then a suitable FSM event is raised. + * + * The most general FSM event is fsm_eBGP_Stop -- which MUST have a posted + * exception to tell it why the stop has been been raised. * * However, nothing external calls bgp_fsm_event() directly -- functions * defined here will raise the appropriate event. @@ -92,15 +103,15 @@ * * However, the FSM does some I/O operations -- notably write() and connect(). * These may complete immediately, and may need to trigger a new event. To - * handle this, the connection can set a "post" event, to be processed at the - * tail end of the current event processing. + * handle this, the connection can set a "follow on" event, to be processed at + * immediately after the current event and any state change that makes. * * Also, some things within the FSM are most consistently dealt with by * raising follow on events. * - * Note that there is only one level of "post" event. The FSM only ever issues - * one I/O operation per event or more than one follow on event, and never both - * at the same time. (It's a RULE.) + * Note that there is only one level of "follow on" event. The FSM never + * issues more than one I/O operation per event or more than one follow on + * event, and never both at the same time. (It's a RULE.) * *------------------------------------------------------------------------------ * Primary and Secondary Connections @@ -122,8 +133,8 @@ * Up to sEstablished state, the primary connection will be the out-bound * connect() connection (if allowed) and the secondary will be the in-bound * accept() connection (if allowed). In sEstablished state, the primary - * connection is the one that won the race -- any other connection is snuffed - * out. + * connection is the one that won the race -- any other connection has been + * snuffed out. * * As per the RFC, collision detection/resolution is performed when an OPEN * message is received -- that is, as the connection attempts to advance to @@ -134,7 +145,7 @@ * See below for a discussion of the fall back to sIdle -- the losing connection * will remain comatose until the winner either reaches sEstablished (when the * loser is snuffed out) or the winner falls back to sIdle (when the - * IdleHoldTimer for the loser is set, and it will be awoken in due course). + * IdleHoldTimer for the loser is set, and it will wake up in due course). * * NB: the RFC talks of matching source/destination and destination/source * addresses of connections in order to detect collisions. This code @@ -195,13 +206,13 @@ * * if a sibling exists and is in sOpenSent or sOpenConfirm: * * - do not change the IdleHoldTimer interval. - * - do not set the IdleHoldTimer (with jitter). + * - unset the IdleHoldTimer. * - set self "comatose". * * * otherwise: * * - increase the IdleHoldTimer interval. - * - set the IdleHoldTimer. + * - set the IdleHoldTimer (with jitter). * * and if a sibling exists and is comatose: * @@ -253,10 +264,10 @@ * * The basic mechanism is: * - * * exceptions may the "thrown" -- which records a given exception in the + * * exceptions may the "thrown" -- which posts a given exception in the * connection then kicks the FSM with a given fsm_eXxxxx event. * - * Information recorded is: + * Information posted is: * * sesssion_eXxxxx -- what the exception is * notification -- any NOTIFICATION message @@ -273,8 +284,13 @@ * See the various exception functions below for what exceptions are posted and * what fsm_eXxxx events are generated. * - * For internally generated exceptions, the FSM throws exceptions with an - * fsm_eBGP_Stop event. + * The following fsm events require an exception: + * + * bgp_fsm_eBGP_Stop -- bgp_fsm_exception() + * bgp_fsm_eTCP_connection_closed -- bgp_fsm_io_error() + * bgp_fsm_eTCP_connection_open_failed -- bgp_fsm_connect_completed() + * bgp_fsm_eTCP_fatal_error -- bgp_fsm_io_fatal_error() + * bgp_fsm_eReceive_NOTIFICATION_message -- bgp_fsm_notification_exception() * *------------------------------------------------------------------------------ * FSM errors @@ -308,8 +324,7 @@ * This ensures there is room in the write buffer at the very least. * * For sOpenSent and sOpenConfirm states there should be zero chance of - * there being anything to purge, and probably no write buffer in any - * case. + * there being anything to purge. * * -- purge any pending write messages for the connection (for sEstablished). * @@ -358,6 +373,26 @@ static void bgp_fsm_event(bgp_connection connection, bgp_fsm_event_t event) ; /*============================================================================== + * Recharge the HoldTimer + * + * Defined here for the convenience of bgp_fsm_pre_update(), which is called + * once for every incoming update. + * + * NB: no jitter. + * + * NB: do nothing if connection->hold_timer_interval == 0 + * + * NB: if connection->hold_timer_interval != 0, timer MUST be set + */ +static inline void +bgp_hold_timer_recharge(bgp_connection connection) +{ + if (connection->hold_timer_interval != 0) + qtimer_set_interval(&connection->hold_timer, + QTIME(connection->hold_timer_interval), NULL) ; +} ; + +/*============================================================================== * Enable the given session -- which must be newly initialised. * * This is the first step in the FSM, and the connection advances to Idle. @@ -448,8 +483,6 @@ bgp_fsm_keepalive_received(bgp_connection connection) * Deals, via the FSM, with unexpected "update" events -- for example an * UPDATE (or ROUTE-REFRESH) before reaching sEstablished ! */ -static void bgp_hold_timer_recharge(bgp_connection connection) ; - extern int bgp_fsm_pre_update(bgp_connection connection) { @@ -492,8 +525,7 @@ bgp_fsm_disable_session(bgp_session session, bgp_notify notification) connection = session->connections[bgp_connection_secondary] ; if (connection != NULL) - bgp_fsm_throw(connection, bgp_session_eDisabled, notification, 0, - bgp_fsm_eBGP_Stop) ; + bgp_fsm_exception(connection, bgp_session_eDisabled, notification) ; else { /* Acknowledge the disable -- session is stopped. */ @@ -511,7 +543,7 @@ bgp_fsm_disable_session(bgp_session session, bgp_notify notification) * NB: can throw an exception for other connections while in the FSM. * * Can throw an exception for the current connection while in the FSM, the - * fsm_active/post mechanism looks after this. + * fsm_active/follow_on mechanism looks after this. */ extern void bgp_fsm_exception(bgp_connection connection, bgp_session_event_t except, @@ -773,7 +805,7 @@ static bgp_fsm_action(bgp_fsm_exit) ; * * Start a new accept() attempt. * - * 2. BGP_Stop + * 2. BGP_Stop -- process the posted exception (invalid if none !) * * a. in all states: * @@ -798,7 +830,7 @@ static bgp_fsm_action(bgp_fsm_exit) ; * * Cannot happen at any other time. * - * 4. TCP_connection_closed + * 4. TCP_connection_closed -- process the posted exception (MUST be there) * * Raised by "EOF" on read or by EPIPE and some other errors. * @@ -820,6 +852,7 @@ static bgp_fsm_action(bgp_fsm_exit) ; * Cannot happen at any other time. * * 5. TCP_connection_open_failed ("soft" error) + * -- process the posted exception (MUST be there) * * a. in sConnect or sActive states * @@ -830,6 +863,7 @@ static bgp_fsm_action(bgp_fsm_exit) ; * Cannot happen at any other time. * * 6. TCP_fatal_error ("hard" error) + * -- process the posted exception (MUST be there) * * a. in sConnect or sActive states * @@ -959,6 +993,7 @@ static bgp_fsm_action(bgp_fsm_exit) ; * Cannot happen at any other time (connection not up or read closed). * * 13. Receive_NOTIFICATION_message + * -- process the posted exception (MUST be there) * * Generated by read action. * @@ -1379,7 +1414,7 @@ bgp_fsm_event(bgp_connection connection, bgp_fsm_event_t event) if (connection->fsm_active == 2) { - connection->post = event ; + connection->follow_on = event ; return ; } ; @@ -1431,9 +1466,9 @@ bgp_fsm_event(bgp_connection connection, bgp_fsm_event_t event) LOOKUP (bgp_status_msg, next_state)); } ; - /* Pick up post event -- if any */ - event = connection->post ; - connection->post = bgp_fsm_null_event ; + /* Pick up follow_on event -- if any */ + event = connection->follow_on ; + connection->follow_on = bgp_fsm_null_event ; } while (--connection->fsm_active != 0) ; @@ -1679,7 +1714,7 @@ static bgp_fsm_action(bgp_fsm_fatal) */ static bgp_fsm_action(bgp_fsm_retry) { - bgp_connection_close(connection, 0) ; /* FSM does timers */ + bgp_connection_close(connection) ; /* FSM does timers */ bgp_fsm_throw(connection, bgp_session_eRetry, NULL, 0, bgp_fsm_eBGP_Start) ; @@ -1714,7 +1749,7 @@ static bgp_fsm_action(bgp_fsm_expire) /* The process of sending a NOTIFICATION comes to an end here. */ if (connection->notification_pending) { - bgp_connection_close(connection, 0) ; /* FSM deals with timers */ + bgp_connection_close(connection) ; /* FSM deals with timers */ return next_state ; } ; @@ -1885,7 +1920,6 @@ static bgp_fsm_action(bgp_fsm_establish) bgp_connection_make_primary(connection) ; /* Signal exciting session event */ - session->made = 1 ; bgp_session_event(session, bgp_session_eEstablished, NULL, 0, 0, 0) ; /* TODO: now would be a good time to withdraw the password from listener ? */ @@ -1937,18 +1971,19 @@ static bgp_fsm_action(bgp_fsm_exit) * Uses the posted information and the expected next_state to deal with some * exception. Proceeds: * + * 0) stop any timers -- so if held in sOpenSent/sOpenConfirm by notification + * process, won't (eg) have extraneous keepalive going off. + * * 1a) if have notification & not eNOM_recv & is in a suitable state * - * (suitable state is sOpenSent/sOpenConfirm/sEstablished. + * Suitable states are sOpenSent/sOpenConfirm/sEstablished. * - * Start sending the NOTIFICATION message. + * Send NOTIFICATION -- see notes above on the process. * * For sOpenSent/sOpenConfirm, override the next_state to stay where it is * until NOTIFICATION process completes. * - * For sEstablished, the next state will be sStopping. - * - * Sending NOTIFICATION closes the connection for reading. + * For sEstablished, the next_state will be sStopping. * * 1b) otherwise: close the connection file. * @@ -1964,37 +1999,54 @@ static bgp_fsm_action(bgp_fsm_exit) * * (The sibling will be session_eDiscard -- so no deadly embrace here.) * + * So: proceeds to the given next_state unless has started notification process, + * and next_state was not sStopping. + * + * Issues follow-on events: + * + * * bgp_fsm_eSent_NOTIFICATION_message if notification clears to TCP buffers + * immediately. + * + * * bgp_fsm_eTCP_fatal_error (or other such) if fails trying to send + * notification. + * + * * bgp_fsm_eBGP_Stop if next_state is sStopping and no notification to + * send. + * * The state machine takes care of the rest: * - * * complete entry to new state (for sStopping will cut connection loose). + * * complete entry to new state * * * send message to Routeing Engine * + * * cutting the connection loose if ends up sStopping. + * * NB: requires the session LOCKED -- connection-wise */ static bgp_fsm_state_t bgp_fsm_catch(bgp_connection connection, bgp_fsm_state_t next_state) { - int send_notification ; + bgp_notify send_notification ; + + assert(connection->except != bgp_session_null_event) ; + + /* It's bad news, so stop doing whatever was doing. */ + qtimer_unset(&connection->hold_timer) ; + qtimer_unset(&connection->keepalive_timer) ; - /* Have a notification to send iff have not just received one, and are in a + /* Have a notification to send iff not just received one, and is in a * suitable state to send one at all. */ if (connection->except == bgp_session_eNOM_recv) - send_notification = 0 ; + send_notification = NULL ; else { - if ( (connection->state == bgp_fsm_sOpenSent) - || (connection->state == bgp_fsm_sOpenConfirm) - || (connection->state == bgp_fsm_sEstablished) ) - { - send_notification = (connection->notification != NULL) ; - } - else - { - bgp_notify_unset(&connection->notification) ; - send_notification = 0 ; - } ; + if ( (connection->state != bgp_fsm_sOpenSent) + && (connection->state != bgp_fsm_sOpenConfirm) + && (connection->state != bgp_fsm_sEstablished) ) + bgp_notify_unset(&connection->notification) ; + + send_notification = connection->notification ; } ; /* If there is a NOTIFICATION to send, now is the time to do that. @@ -2003,7 +2055,7 @@ bgp_fsm_catch(bgp_connection connection, bgp_fsm_state_t next_state) * The state transition stuff looks after timers. In particular an error * in Connect/Active states leaves the ConnectRetryTimer running. */ - if (send_notification) + if (send_notification != NULL) { int ret ; @@ -2021,7 +2073,7 @@ bgp_fsm_catch(bgp_connection connection, bgp_fsm_state_t next_state) * If the write fails it raises a suitable event, which will now be * sitting waiting to be processed on the way out of the FSM. */ - ret = bgp_msg_write_notification(connection, connection->notification) ; + ret = bgp_msg_write_notification(connection, send_notification) ; connection->notification_pending = (ret >= 0) ; /* is pending if not failed */ @@ -2047,7 +2099,7 @@ bgp_fsm_catch(bgp_connection connection, bgp_fsm_state_t next_state) } else { - bgp_connection_close(connection, 0) ; /* FSM deals with timers */ + bgp_connection_close(connection) ; /* FSM deals with timers */ if (next_state == bgp_fsm_sStopping) /* can exit if sStopping */ bgp_fsm_event(connection, bgp_fsm_eBGP_Stop) ; @@ -2062,11 +2114,10 @@ bgp_fsm_catch(bgp_connection connection, bgp_fsm_state_t next_state) sibling = bgp_connection_get_sibling(connection) ; /* ... if any */ if (sibling != NULL) - bgp_fsm_discard_sibling(sibling, - bgp_notify_dup(connection->notification)) ; + bgp_fsm_discard_sibling(sibling, bgp_notify_dup(send_notification)) ; } ; - /* Return the (possibly adjusted) next_state */ + /* Return the (possibly adjusted) next_state */ return next_state ; } ; @@ -2132,11 +2183,6 @@ bgp_fsm_catch(bgp_connection connection, bgp_fsm_state_t next_state) * bgp_fsm_eKeepAlive_timer_expired event. */ -/* Forward reference */ -static void -bgp_timer_set(bgp_connection connection, qtimer timer, unsigned secs, - int jitter, qtimer_action* action) ; - /* Forward reference the action functions */ static qtimer_action bgp_idle_hold_timer_action ; static qtimer_action bgp_connect_retry_timer_action ; @@ -2144,6 +2190,48 @@ static qtimer_action bgp_hold_timer_action ; static qtimer_action bgp_keepalive_timer_action ; /*============================================================================== + * Timer set functions -- general and HoldTimer specific. + */ +enum +{ + no_jitter = 0, + with_jitter = 1, +} ; + +/*------------------------------------------------------------------------------ + * Start or reset given qtimer with given interval, in seconds. + * + * If the interval is zero, unset the timer. + */ +static void +bgp_timer_set(bgp_connection connection, qtimer timer, unsigned secs, + int jitter, qtimer_action* action) +{ + if (secs == 0) + qtimer_unset(timer) ; + else + { + secs *= 40 ; /* a bit of resolution for jitter */ + if (jitter != no_jitter) + secs -= ((rand() % ((int)secs + 1)) / 4) ; + qtimer_set_interval(timer, QTIME(secs) / 40, action) ; + } ; +} ; + +/*------------------------------------------------------------------------------ + * Set HoldTimer with given time (without jitter) so will generate a + * Hold_Timer_expired event. + * + * Setting 0 will unset the HoldTimer. + */ +static void +bgp_hold_timer_set(bgp_connection connection, unsigned secs) +{ + bgp_timer_set(connection, &connection->hold_timer, secs, no_jitter, + bgp_hold_timer_action) ; +} ; + +/*============================================================================== * Completion of State Change * * This performs fixed changes associated with the entry to each state from @@ -2214,14 +2302,14 @@ bgp_fsm_state_change(bgp_connection connection, bgp_fsm_state_t new_state) if ((sibling != NULL) && (sibling->comatose)) { connection->comatose = 0 ; /* no longer comatose */ - bgp_timer_set(sibling, &sibling->hold_timer, interval, 1, - bgp_idle_hold_timer_action) ; + bgp_timer_set(sibling, &sibling->hold_timer, interval, + with_jitter, bgp_idle_hold_timer_action) ; } ; } ; } ; - bgp_timer_set(connection, &connection->hold_timer, interval, 1, - bgp_idle_hold_timer_action) ; + bgp_timer_set(connection, &connection->hold_timer, interval, + with_jitter, bgp_idle_hold_timer_action) ; qtimer_unset(&connection->keepalive_timer) ; @@ -2237,7 +2325,7 @@ bgp_fsm_state_change(bgp_connection connection, bgp_fsm_state_t new_state) case bgp_fsm_sConnect: case bgp_fsm_sActive: bgp_timer_set(connection, &connection->hold_timer, - session->connect_retry_timer_interval, 1, + session->connect_retry_timer_interval, with_jitter, bgp_connect_retry_timer_action) ; qtimer_unset(&connection->keepalive_timer) ; break; @@ -2269,8 +2357,8 @@ bgp_fsm_state_change(bgp_connection connection, bgp_fsm_state_t new_state) */ case bgp_fsm_sOpenConfirm: bgp_timer_set(connection, &connection->keepalive_timer, - connection->keepalive_timer_interval, 1, - bgp_keepalive_timer_action) ; + connection->keepalive_timer_interval, + with_jitter, bgp_keepalive_timer_action) ; case bgp_fsm_sEstablished: bgp_hold_timer_set(connection, connection->hold_timer_interval) ; break; @@ -2294,51 +2382,8 @@ bgp_fsm_state_change(bgp_connection connection, bgp_fsm_state_t new_state) } ; /*============================================================================== - * Timer set and Timer Action Functions - */ - -/*------------------------------------------------------------------------------ - * Start or reset given qtimer with given interval, in seconds. - * - * If the interval is zero, unset the timer. - */ -static void -bgp_timer_set(bgp_connection connection, qtimer timer, unsigned secs, - int jitter, qtimer_action* action) -{ - if (secs == 0) - qtimer_unset(timer) ; - else - { - secs *= 40 ; /* a bit of resolution for jitter */ - if (jitter) - secs -= ((rand() % ((int)secs + 1)) / 4) ; - qtimer_set_interval(timer, QTIME(secs) / 40, action) ; - } ; -} ; - -/*------------------------------------------------------------------------------ - * Set HoldTimer with given time (without jitter) so will generate a - * Hold_Timer_expired event. - * - * Setting 0 will unset the HoldTimer. + * Timer Action Functions */ -static void -bgp_hold_timer_set(bgp_connection connection, unsigned secs) -{ - bgp_timer_set(connection, &connection->hold_timer, secs, 0, - bgp_hold_timer_action) ; -} ; - -/*------------------------------------------------------------------------------ - * Recharge the HoldTimer - */ - -static void -bgp_hold_timer_recharge(bgp_connection connection) -{ - bgp_hold_timer_set(connection, connection->hold_timer_interval) ; -} ; /*------------------------------------------------------------------------------ * BGP start timer action => bgp_fsm_eBGP_Start event @@ -2369,7 +2414,7 @@ bgp_connect_retry_timer_action(qtimer qtr, void* timer_info, qtime_mono_t when) BGP_FSM_DEBUG(connection, "Timer (connect timer expire)") ; bgp_timer_set(connection, &connection->hold_timer, - connection->session->connect_retry_timer_interval, 1, NULL) ; + connection->session->connect_retry_timer_interval, with_jitter, NULL) ; bgp_fsm_event(connection, bgp_fsm_eConnectRetry_timer_expired) ; } ; @@ -2403,7 +2448,8 @@ bgp_keepalive_timer_action(qtimer qtr, void* timer_info, qtime_mono_t when) BGP_FSM_DEBUG(connection, "Timer (keepalive timer expire)") ; bgp_timer_set(connection, &connection->keepalive_timer, - connection->session->keepalive_timer_interval, 1, NULL) ; + connection->session->keepalive_timer_interval, + with_jitter, NULL) ; bgp_fsm_event(connection, bgp_fsm_eKeepAlive_timer_expired) ; } ; diff --git a/bgpd/bgp_fsm.h b/bgpd/bgp_fsm.h index 3a1bf883..077d45c3 100644 --- a/bgpd/bgp_fsm.h +++ b/bgpd/bgp_fsm.h @@ -1,23 +1,26 @@ /* BGP-4 Finite State Machine - From RFC1771 [A Border Gateway Protocol 4 (BGP-4)] - Copyright (C) 1998 Kunihiro Ishiguro - -This file is part of GNU Zebra. - -GNU Zebra is free software; you can redistribute it and/or modify it -under the terms of the GNU General Public License as published by the -Free Software Foundation; either version 2, or (at your option) any -later version. - -GNU Zebra is distributed in the hope that it will be useful, but -WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -General Public License for more details. - -You should have received a copy of the GNU General Public License -along with GNU Zebra; see the file COPYING. If not, write to the Free -Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA -02111-1307, USA. */ + * From RFC1771 [A Border Gateway Protocol 4 (BGP-4)] + * Copyright (C) 1996, 97, 98 Kunihiro Ishiguro + * + * Recast for pthreaded bgpd: Copyright (C) Chris Hall (GMCH), Highwayman + * + * This file is part of GNU Zebra. + * + * GNU Zebra is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published + * by the Free Software Foundation; either version 2, or (at your + * option) any later version. + * + * GNU Zebra is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNU Zebra; see the file COPYING. If not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 02111-1307, USA. + */ #ifndef _QUAGGA_BGP_FSM_H #define _QUAGGA_BGP_FSM_H @@ -65,63 +68,4 @@ bgp_fsm_notification_exception(bgp_connection connection, bgp_notify notification) ; -//extern int bgp_event (struct thread *); -//extern int bgp_stop (struct peer *peer); -//extern void bgp_timer_set (struct peer *); -//extern void bgp_fsm_change_status (struct peer *peer, int status); -//extern const char *peer_down_str[]; - -/*============================================================================== - * Legacy -- to be removed.... - */ - -/* Macro for BGP read, write and timer thread. */ -#define BGP_READ_ON(T,F,V) \ - do { \ - if (!(T) && (peer->state != bgp_peer_sDeleted)) \ - THREAD_READ_ON(master,T,F,peer,V); \ - } while (0) - -#define BGP_READ_OFF(T) \ - do { \ - if (T) \ - THREAD_READ_OFF(T); \ - } while (0) - -#define BGP_WRITE_ON(T,F,V) \ - do { \ - if (!(T) && (peer->state != bgp_peer_sDeleted)) \ - THREAD_WRITE_ON(master,(T),(F),peer,(V)); \ - } while (0) - -#define BGP_WRITE_OFF(T) \ - do { \ - if (T) \ - THREAD_WRITE_OFF(T); \ - } while (0) - -#define BGP_TIMER_ON(T,F,V) \ - do { \ - if (!(T) && (peer->state != bgp_peer_sDeleted)) \ - THREAD_TIMER_ON(master,(T),(F),peer,(V)); \ - } while (0) - -#define BGP_TIMER_OFF(T) \ - do { \ - if (T) \ - THREAD_TIMER_OFF(T); \ - } while (0) - -#define BGP_EVENT_ADD(P,E) \ - do { \ - if ((P)->state != bgp_peer_sDeleted) \ - thread_add_event (master, bgp_event, (P), (E)); \ - } while (0) - -#define BGP_EVENT_FLUSH(P) \ - do { \ - assert (peer); \ - thread_cancel_event (master, (P)); \ - } while (0) - #endif /* _QUAGGA_BGP_FSM_H */ diff --git a/bgpd/bgp_msg_read.c b/bgpd/bgp_msg_read.c index fbc163d7..9b02fddc 100644 --- a/bgpd/bgp_msg_read.c +++ b/bgpd/bgp_msg_read.c @@ -53,14 +53,22 @@ static void bgp_msg_capability_receive(bgp_connection connection, bgp_size_t body_size) ; /*------------------------------------------------------------------------------ - * Get BGP message length, given a pointer to the start of a message + * Get BGP message length, given a pointer to the start of a message. + * + * Make sure things are kosher. */ extern bgp_size_t -bgp_msg_get_mlen(uint8_t* p) +bgp_msg_get_mlen(uint8_t* p, uint8_t* limit) { - return (*(p + BGP_MH_MARKER_L)) + (*(p + BGP_MH_MARKER_L + 1) << 8) ; -} ; + uint16_t mlen ; + passert((p + BGP_MH_HEAD_L) <= limit) ; + + mlen = (*(p + BGP_MH_MARKER_L)) + (*(p + BGP_MH_MARKER_L + 1) << 8) ; + passert((p + mlen) <= limit) ; + + return mlen ; +} ; /*============================================================================== * Header validation and sexing of messages diff --git a/bgpd/bgp_msg_read.h b/bgpd/bgp_msg_read.h index 1556f24c..b8b6235e 100644 --- a/bgpd/bgp_msg_read.h +++ b/bgpd/bgp_msg_read.h @@ -27,7 +27,7 @@ #include "bgpd/bgp_notification.h" extern bgp_size_t -bgp_msg_get_mlen(uint8_t* p) ; +bgp_msg_get_mlen(uint8_t* p, uint8_t* limit) ; extern int bgp_msg_check_header(bgp_connection connection); diff --git a/bgpd/bgp_notification.c b/bgpd/bgp_notification.c index d14a13bf..2763f296 100644 --- a/bgpd/bgp_notification.c +++ b/bgpd/bgp_notification.c @@ -33,8 +33,6 @@ /*============================================================================== * A bgp_notify structure encapsulates the contents of a BGP NOTIFICATION * message. - * - * */ /*------------------------------------------------------------------------------ @@ -175,9 +173,9 @@ bgp_notify_unset(bgp_notify* p_notification) } ; /*------------------------------------------------------------------------------ - * Unset pointer to notification and free any existing notification structure. + * Unset pointer to notification and return the pointer value. * - * Does nothing if there is no structure. + * Returns NULL if there is no structure. */ extern bgp_notify bgp_notify_take(bgp_notify* p_notification) diff --git a/bgpd/bgp_notification.h b/bgpd/bgp_notification.h index 3587ad9b..a7e04501 100644 --- a/bgpd/bgp_notification.h +++ b/bgpd/bgp_notification.h @@ -114,7 +114,7 @@ struct bgp_notify #define BGP_NOTIFY_CAPABILITY_MAX 4 /*============================================================================== - * + * Functions */ extern bgp_notify diff --git a/bgpd/bgp_peer.c b/bgpd/bgp_peer.c index 095bfeb1..934bd7cb 100644 --- a/bgpd/bgp_peer.c +++ b/bgpd/bgp_peer.c @@ -44,6 +44,7 @@ #include "prefix.h" #include "vty.h" #include "sockunion.h" +#include "prefix.h" #include "thread.h" #include "log.h" #include "stream.h" @@ -301,7 +302,7 @@ bgp_session_has_stopped(bgp_peer peer) bgp_session session = peer->session ; assert(bgp_session_is_active(session)) ; - bgp_peer_disable(peer, NULL); + bgp_peer_reenable(peer, NULL); /* TODO: needs to deal with NOTIFICATION, if any ?? */ return 0; @@ -325,18 +326,11 @@ bgp_session_has_disabled(bgp_peer peer) mqueue_revoke(routing_nexus->queue, session) ; /* does the session need to be re-enabled? */ - if (session->defer_enable || peer->state == bgp_peer_sIdle) + if (session->defer_enable) { session->defer_enable = 0; bgp_session_enable(peer); } - else if (peer->state == bgp_peer_sEstablished) - { - /* disable the peer */ - bgp_peer_stop(peer); - bgp_clear_route_all(peer); - peer_change_status(peer, bgp_peer_sClearing); - } /* if the program is terminating then see if this was the last session * and if so ... die .... @@ -455,16 +449,12 @@ bgp_peer_stop (struct peer *peer) peer->v_holdtime = peer->bgp->default_holdtime; } - /* Until we are sure that there is no problem about prefix count - this should be commented out.*/ -#if 0 /* Reset prefix count */ peer->pcount[AFI_IP][SAFI_UNICAST] = 0; peer->pcount[AFI_IP][SAFI_MULTICAST] = 0; peer->pcount[AFI_IP][SAFI_MPLS_VPN] = 0; peer->pcount[AFI_IP6][SAFI_UNICAST] = 0; peer->pcount[AFI_IP6][SAFI_MULTICAST] = 0; -#endif /* 0 */ return 0; } @@ -829,7 +819,6 @@ peer_delete (struct peer *peer) */ peer->last_reset = PEER_DOWN_NEIGHBOR_DELETE; bgp_peer_stop (peer); - bgp_clear_route_all (peer); peer_change_status (peer, bgp_peer_sDeleted); /* Password configuration */ @@ -997,7 +986,22 @@ peer_nsf_stop (struct peer *peer) bgp_clear_route_all (peer); } + +/* Disable then enable the peer. Sends notification. */ +void +bgp_peer_reenable(bgp_peer peer, bgp_notify notification) +{ + if (bgp_session_is_active(peer->session)) + { + bgp_peer_disable(peer, notification); + bgp_peer_enable(peer); /* may defer if still stopping */ + } + else + bgp_notify_free(notification) ; +} + /* enable the peer */ + void bgp_peer_enable(bgp_peer peer) { @@ -1006,26 +1010,33 @@ bgp_peer_enable(bgp_peer peer) } /* disable the peer - * sent notification, disable session + * sent notification, disable session, stop the peer */ void bgp_peer_disable(bgp_peer peer, bgp_notify notification) { - if (bgp_session_is_active(peer->session)) - bgp_session_disable(peer, notification); - else - { - bgp_notify_free(notification) ; - bgp_peer_stop(peer); - } + /* disable the session */ + bgp_session_disable(peer, notification); + + /* and the peer */ + bgp_peer_stop(peer); + if (peer->state == bgp_peer_sEstablished) + peer_change_status (peer, bgp_peer_sClearing); } -/* Called after event occurred, this function changes status */ +/* Called after event occurred, this function change status and reset + read/write and timer thread. */ void peer_change_status (bgp_peer peer, int status) { bgp_dump_state (peer, peer->state, status); + /* Transition into Clearing or Deleted must /always/ clear all routes.. + * (and must do so before actually changing into Deleted.. + */ + if (status >= bgp_peer_sClearing) + bgp_clear_route_all (peer); + /* Preserve old status and change into new status. */ peer->ostate = peer->state; peer->state = status; @@ -1041,8 +1052,7 @@ peer_change_status (bgp_peer peer, int status) * For the given interface name, get a suitable address so can bind() before * connect() so that we use the required interface. * - * - * + * If has a choice, uses address that best matches the peer's address. */ extern sockunion bgp_peer_get_ifaddress(bgp_peer peer, const char* ifname, pAF_t paf) @@ -1050,6 +1060,9 @@ bgp_peer_get_ifaddress(bgp_peer peer, const char* ifname, pAF_t paf) struct interface* ifp ; struct connected* connected; struct listnode* node; + struct prefix* best_prefix ; + struct prefix* peer_prefix ; + int best, this ; if (ifname == NULL) return NULL ; @@ -1061,13 +1074,29 @@ bgp_peer_get_ifaddress(bgp_peer peer, const char* ifname, pAF_t paf) return NULL ; } ; + peer_prefix = sockunion2hostprefix(&peer->su) ; + best_prefix = NULL ; + best = -1 ; + for (ALL_LIST_ELEMENTS_RO (ifp->connected, node, connected)) { - if (connected->address->family == paf) - return sockunion_new(connected->address) ; + if (connected->address->family != paf) + continue ; + + this = prefix_common_bits (connected->address, peer_prefix) ; + if (this > best) + { + best_prefix = connected->address ; + best = this ; + } ; } ; - zlog_err("Peer %s interface %ss has no suitable address", peer->host, ifname); + prefix_free(peer_prefix) ; + + if (best_prefix != NULL) + return sockunion_new(best_prefix) ; + + zlog_err("Peer %s interface %s has no suitable address", peer->host, ifname); return NULL ; } ; diff --git a/bgpd/bgp_peer.h b/bgpd/bgp_peer.h index b05d7e06..b4b0c7d3 100644 --- a/bgpd/bgp_peer.h +++ b/bgpd/bgp_peer.h @@ -372,38 +372,6 @@ struct peer } ; - - - - - - - -/* Macro for BGP read, write and timer thread. */ -#define BGP_READ_ON(T,F,V) \ - do { \ - if (!(T) && (peer->state != bgp_peer_sDeleted)) \ - THREAD_READ_ON(master,T,F,peer,V); \ - } while (0) - -#define BGP_READ_OFF(T) \ - do { \ - if (T) \ - THREAD_READ_OFF(T); \ - } while (0) - -#define BGP_WRITE_ON(T,F,V) \ - do { \ - if (!(T) && (peer->state != bgp_peer_sDeleted)) \ - THREAD_WRITE_ON(master,(T),(F),peer,(V)); \ - } while (0) - -#define BGP_WRITE_OFF(T) \ - do { \ - if (T) \ - THREAD_WRITE_OFF(T); \ - } while (0) - #define BGP_TIMER_ON(T,F,V) \ do { \ if (!(T) && (peer->state != bgp_peer_sDeleted)) \ @@ -477,5 +445,6 @@ peer_nsf_stop (struct peer *peer); extern sockunion bgp_peer_get_ifaddress(bgp_peer peer, const char* ifname, pAF_t paf) ; + #endif /* _QUAGGA_BGP_PEER_H */ diff --git a/bgpd/bgp_session.c b/bgpd/bgp_session.c index 2b6a36b4..cbbc181e 100644 --- a/bgpd/bgp_session.c +++ b/bgpd/bgp_session.c @@ -62,7 +62,7 @@ static void bgp_session_do_route_refresh_recv(mqueue_block mqb, mqb_flag_t flag) * A session is created some time before it is enabled, and may be destroyed * once the session is disabled. * - * A session may be in one of four states: + * A session may be in one of the states: * * * bgp_session_sIdle -- not doing anything * * bgp_session_sEnabled -- the BGP Engine is trying to connect @@ -106,7 +106,7 @@ static void bgp_session_do_route_refresh_recv(mqueue_block mqb, mqb_flag_t flag) * NB: if not allocating, the existing session MUST be sIdle/sDisabled OR never * been kissed. * - * NB: in any event, the peer's peer index entry MUST have a NULL session + * NB: in any event, the peer's peer index entry MUST have a NULL accept * pointer. */ extern bgp_session @@ -130,8 +130,6 @@ bgp_session_init_new(bgp_session session, bgp_peer peer) /* Zeroising the structure has set: * - * made -- false, not yet sEstablished - * * event -- bgp_session_null_event * notification -- NULL -- none * err -- 0 -- none @@ -251,7 +249,6 @@ bgp_session_enable(bgp_peer peer) /* Initialise what we need to make and run connections */ session->state = bgp_session_sIdle; session->defer_enable = 0; - session->made = 0; session->flow_control = 0; session->event = bgp_session_null_event; bgp_notify_unset(&session->notification); @@ -259,7 +256,7 @@ bgp_session_enable(bgp_peer peer) session->ordinal = 0; session->open_send = bgp_peer_open_state_init_new(session->open_send, peer); - session->open_recv = bgp_open_state_free(session->open_recv); + bgp_open_state_unset(&session->open_recv); session->connect = (peer->flags & PEER_FLAG_PASSIVE) == 0 ; session->listen = 1 ; @@ -283,7 +280,6 @@ bgp_session_enable(bgp_peer peer) else if (peer->update_if != NULL) session->ifaddress = bgp_peer_get_ifaddress(peer, peer->update_if, peer->su.sa.sa_family) ; - session->as_peer = peer->as ; sockunion_set_dup(&session->su_peer, &peer->su) ; @@ -524,26 +520,15 @@ bgp_session_update_send(bgp_session session, struct stream* upd) * * -- if the connection's pending queue is empty, try to send the message. * - * If cannot send the message (and not encountered any error), add it to - * the connection's pending queue. - * - * -- otherwise, add mqb to the pending queue. - * - * When the mqb is on the connection's pending queue it must be the head of - * that queue -- and still on the queue. Then: + * When the mqb is from connection's pending queue, then: * - * -- if the message is sent (or is now redundant), remove the mqb from - * the connection's pending queue. + * -- try to send the message. * - * -- otherwise: leave the mqb on the connection's pending queue for later, - * but remove the connection from the connection queue, because unable to - * proceed any further. + * In any case, if cannot send the message (and not encountered any error), add + * it (back) to the connection's pending queue. * - * If the mqb has been dealt with (is not on the pending queue), it is freed, - * along with the stream buffer. - * - * NB: when not called "mqb_action", the mqb MUST NOT be on the connection's - * pending queue. + * If the mqb has been dealt with, it is freed, along with the stream buffer. + * Also, update the flow control counter, and issue XON if required. */ static void bgp_session_do_update_send(mqueue_block mqb, mqb_flag_t flag) @@ -858,11 +843,7 @@ bgp_session_do_XON(mqueue_block mqb, mqb_flag_t flag) bgp_session session = mqb_get_arg0(mqb) ; if ((flag == mqb_action) && (session->state == bgp_session_sEstablished)) - { - bgp_peer peer = session->peer; - - bgp_write (peer); - } + bgp_write (session->peer) ; mqb_free(mqb) ; } @@ -901,7 +882,7 @@ bgp_session_do_set_ttl(mqueue_block mqb, mqb_flag_t flag) session->ttl = args->ttl ; - bgp_set_ttl(session->connections[bgp_connection_primary], session->ttl) ; + bgp_set_ttl(session->connections[bgp_connection_primary], session->ttl); bgp_set_ttl(session->connections[bgp_connection_secondary], session->ttl); BGP_SESSION_UNLOCK(session) ; /*>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>*/ @@ -922,14 +903,16 @@ bgp_session_do_set_ttl(mqueue_block mqb, mqb_flag_t flag) * Ensure that if exists and is not active, that the peer index entry accept * pointer is NULL -- this is largely paranoia, but it would be a grave * mistake for the listening socket(s) to find a session which is not active ! + * + * NB: accessing Peering Engine "private" variable -- no lock required. + * + * accessing index_entry when not active -- no lock required. */ extern int bgp_session_is_active(bgp_session session) { int active ; - BGP_SESSION_LOCK(session) ; /*<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<*/ - if (session == NULL) active = 0 ; else @@ -942,43 +925,39 @@ bgp_session_is_active(bgp_session session) assert(session->index_entry->accept == NULL) ; } ; - BGP_SESSION_UNLOCK(session) ; /*>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>*/ - return active ; } ; /*------------------------------------------------------------------------------ - * If session is limping we defer re-enabling the session until it is disabled. + * Peering Engine: if session is limping we defer re-enabling the session + * until it is disabled. * * returns 1 if limping and defer * returns 0 if not limping - */ + * + * NB: accessing Peering Engine "private" variable -- no lock required. + */ static int bgp_session_defer_if_limping(bgp_session session) { int defer_enable = 0 ; if (session == NULL) - return defer_enable; - - BGP_SESSION_LOCK(session) ; /*<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<*/ - - session->defer_enable = - defer_enable = (session->state == bgp_session_sLimping); - - BGP_SESSION_UNLOCK(session) ; /*>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>*/ + defer_enable = 0 ; + else + defer_enable = (session->state == bgp_session_sLimping) ; - return defer_enable ; + return session->defer_enable = defer_enable ; } ; -/* Get a copy of the session statistics, copied all at once so +/*------------------------------------------------------------------------------ + * Get a copy of the session statistics, copied all at once so * forms a consistent snapshot - * */ void bgp_session_get_stats(bgp_session session, struct bgp_session_stats *stats) { - if (!session) + if (session == NULL) { memset(stats, 0, sizeof(struct bgp_session_stats)) ; return; @@ -989,5 +968,4 @@ bgp_session_get_stats(bgp_session session, struct bgp_session_stats *stats) *stats = session->stats; BGP_SESSION_UNLOCK(session) ; /*>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>*/ - } diff --git a/bgpd/bgp_session.h b/bgpd/bgp_session.h index 967286ef..af376f9a 100644 --- a/bgpd/bgp_session.h +++ b/bgpd/bgp_session.h @@ -40,7 +40,6 @@ #define Inline static inline #endif - /*============================================================================== * BGP Session data structure. * @@ -57,14 +56,16 @@ * NB: the session structure is shared by the Routeing Engine and the BGP * Engine, so there is a mutex to coordinate access. * - * The information in this shared structure is only required every now and - * then, so the overhead of a mutex operation for every access is not an - * issue. + * For simplicity, the BGP Engine may lock the session associated with the + * connection it is dealing with. + * + * Parts of the session structure are private to the Peering Engine, and + * do not require the mutex for access. * * NB: the connections associated with a BGP session are private to the BGP - * Engine when sessions are disabled or have failed, there will be no - * connections. + * Engine. * + * When sessions are disabled or have failed, there will be no connections. */ /* Statistics */ @@ -87,10 +88,13 @@ struct bgp_session_stats struct bgp_session { + /* The following are set when the session is created, and not changed + * thereafter. + */ bgp_peer peer ; /* peer whose session this is */ - bgp_peer_index_entry index_entry ; /* and its index entry */ + /* This is a *recursive* mutex */ qpt_mutex_t mutex ; /* for access to the rest */ /* While sIdle and sStopped: @@ -111,27 +115,18 @@ struct bgp_session * assumes that a session will not be destroyed while it is sEnabled, * sEstablished or sStopping. * - * Only the Peering Engine touches the state and defer_enable items. - * - * The made flag is cleared by the Peering Engine before enabling a session, - * and is set by the BGP Engine when the session becomes sEstablished. - * - * The Peering Engine may use this flag in sStopped state to see if the - * session was ever established. + * These are private to the Peering Engine. */ bgp_session_state_t state ; int defer_enable ; /* set when waiting for stop */ - flag_t made ; /* set when -> sEstablished */ - /* Flow control. Incremented when an update packet is sent * from peering to BGP engine. Decremented when packet processed * by BGP engine. On transition to 0 BGP engine should send an XON. */ - int flow_control; - /* These belong to the Peering Engine, and may be set when a session + /* These are private to the Peering Engine, and are set each time a session * event message is received from the BGP Engine. */ bgp_session_event_t event ; /* last event */ @@ -163,6 +158,10 @@ struct bgp_session int ttl ; /* TTL to set, if not zero */ unsigned short port ; /* destination port for peer */ + /* TODO: ifindex and ifaddress should be rebound if the peer hears any + * bgp_session_eTCP_failed or bgp_session_eTCP_error -- in case interface + * state has changed, for the better. + */ char* ifname ; /* interface to bind to, if any */ unsigned ifindex ; /* and its index, if any */ union sockunion* ifaddress ; /* address to bind to, if any */ @@ -220,7 +219,6 @@ struct bgp_session flag_t active ; } ; - /*============================================================================== * Mqueue messages related to sessions * @@ -292,8 +290,6 @@ struct bgp_session_ttl_args /* to bgp Engine */ } ; MQB_ARGS_SIZE_OK(bgp_session_ttl_args) ; - - /*============================================================================== * Session mutex lock/unlock */ @@ -361,8 +357,6 @@ bgp_session_get_stats(bgp_session session, struct bgp_session_stats *stats); /*============================================================================== * Session data access functions. - * - * */ extern int diff --git a/lib/prefix.c b/lib/prefix.c index 6399788c..7755d4e7 100644 --- a/lib/prefix.c +++ b/lib/prefix.c @@ -180,6 +180,46 @@ prefix_cmp (const struct prefix *p1, const struct prefix *p2) return 0; } +/* + * Count the number of common bits in 2 prefixes. The prefix length is + * ignored for this function; the whole prefix is compared. If the prefix + * address families don't match, return -1; otherwise the return value is + * in range 0 ... maximum prefix length for the address family. + */ +int +prefix_common_bits (const struct prefix *p1, const struct prefix *p2) +{ + int pos, bit; + int length = 0; + u_char xor; + + /* Set both prefix's head pointer. */ + const u_char *pp1 = (const u_char *)&p1->u.prefix; + const u_char *pp2 = (const u_char *)&p2->u.prefix; + + if (p1->family == AF_INET) + length = IPV4_MAX_BYTELEN; +#ifdef HAVE_IPV6 + if (p1->family == AF_INET6) + length = IPV6_MAX_BYTELEN; +#endif + if (p1->family != p2->family || !length) + return -1; + + for (pos = 0; pos < length; pos++) + if (pp1[pos] != pp2[pos]) + break; + if (pos == length) + return pos * 8; + + xor = pp1[pos] ^ pp2[pos]; + for (bit = 0; bit < 8; bit++) + if (xor & (1 << (7 - bit))) + break; + + return pos * 8 + bit; +} + /* Return prefix family type string. */ const char * prefix_family_str (const struct prefix *p) @@ -597,6 +637,19 @@ sockunion2hostprefix (const union sockunion *su) return NULL; } +void +prefix2sockunion (const struct prefix *p, union sockunion *su) { + memset (su, 0, sizeof (*su)); + + su->sa.sa_family = p->family; + if (p->family == AF_INET) + su->sin.sin_addr = p->u.prefix4; +#ifdef HAVE_IPV6 + if (p->family == AF_INET6) + memcpy (&su->sin6.sin6_addr, &p->u.prefix6, sizeof (struct in6_addr)); +#endif /* HAVE_IPV6 */ +} + int prefix_blen (const struct prefix *p) { @@ -779,3 +832,4 @@ inet6_ntoa (struct in6_addr addr) return buf; } #endif /* HAVE_IPV6 */ + diff --git a/lib/prefix.h b/lib/prefix.h index 9fd02ea6..74f32e94 100644 --- a/lib/prefix.h +++ b/lib/prefix.h @@ -162,12 +162,14 @@ extern int prefix2str (const struct prefix *, char *, int); extern int prefix_match (const struct prefix *, const struct prefix *); extern int prefix_same (const struct prefix *, const struct prefix *); extern int prefix_cmp (const struct prefix *, const struct prefix *); +extern int prefix_common_bits (const struct prefix *, const struct prefix *); extern void prefix_copy (struct prefix *dest, const struct prefix *src); extern void apply_mask (struct prefix *); extern struct prefix *sockunion2prefix (const_sockunion dest, const_sockunion mask); extern struct prefix *sockunion2hostprefix (const_sockunion src); +extern void prefix2sockunion (const struct prefix *, union sockunion *); extern struct prefix_ipv4 *prefix_ipv4_new (void); extern void prefix_ipv4_free (struct prefix_ipv4 *); |