diff options
35 files changed, 6281 insertions, 1157 deletions
diff --git a/bgpd/bgp_advertise.c b/bgpd/bgp_advertise.c index 90579e7c..04f1f847 100644 --- a/bgpd/bgp_advertise.c +++ b/bgpd/bgp_advertise.c @@ -300,7 +300,7 @@ bgp_adj_out_unset (struct bgp_node *rn, struct peer *peer, struct prefix *p, bgp_advertise_fifo_add(&peer->sync[afi][safi]->withdraw, adv); /* Schedule packet write. */ - bgp_write(peer); + bgp_write(peer, NULL) ; } else { diff --git a/bgpd/bgp_common.h b/bgpd/bgp_common.h index f6f06446..19db9d8a 100644 --- a/bgpd/bgp_common.h +++ b/bgpd/bgp_common.h @@ -107,7 +107,7 @@ enum bgp_session_events bgp_session_null_event = 0, bgp_session_eEstablished, /* session state -> sEstablished */ - bgp_session_eDisabled, /* disabled by Peering Engine */ + bgp_session_eDisabled, /* disabled by Routeing Engine */ bgp_session_eStart, /* enter sConnect/sAccept from sIdle */ bgp_session_eRetry, /* loop round in sConnect/sAccept */ diff --git a/bgpd/bgp_connection.c b/bgpd/bgp_connection.c index 8320ae49..1c427318 100644 --- a/bgpd/bgp_connection.c +++ b/bgpd/bgp_connection.c @@ -93,7 +93,6 @@ static const char* bgp_connection_tags[] = static void bgp_connection_init_host(bgp_connection connection, const char* tag) ; -static void bgp_write_buffer_init_new(bgp_wbuffer wb, size_t size) ; static void bgp_write_buffer_free(bgp_wbuffer wb) ; /*------------------------------------------------------------------------------ @@ -122,7 +121,8 @@ bgp_connection_init_new(bgp_connection connection, bgp_session session, * * comatose not comatose * * next NULL -- not on the connection queue * * prev NULL -- not on the connection queue - * * post bgp_fsm_null_event + * * follow_on bgp_fsm_null_event + * * exception bgp_session_null_event * * fsm_active not active * * notification NULL -- none received or sent * * err no error, so far @@ -138,15 +138,11 @@ bgp_connection_init_new(bgp_connection connection, bgp_session session, * * msg_type none -- set when reading message * * msg_size none -- set when reading message * * notification_pending nothing pending - * * wbuff all pointers NULL -- empty buffer - * *except* must set limit so is not "full". + * * wbuff all pointers NULL -- empty but not writable */ - - confirm(bgp_fsm_sInitial == 0) ; - confirm(bgp_fsm_null_event == 0) ; - - connection->wbuff.limit = connection->wbuff.base + - bgp_write_buffer_full_threshold ; + confirm(bgp_fsm_sInitial == 0) ; + confirm(bgp_fsm_null_event == 0) ; + confirm(bgp_session_null_event == 0) ; /* Link back to session, point at its mutex and point session here */ connection->session = session ; @@ -327,8 +323,8 @@ bgp_connection_exit(bgp_connection connection) static void bgp_connection_free(bgp_connection connection) { - assert( (connection->state == bgp_fsm_sStopping) - && (connection->session == NULL) + assert( (connection->state == bgp_fsm_sStopping) + && (connection->session == NULL) && ( (connection->lock_count == 0) || (connection->lock_count == CUT_LOOSE_LOCK_COUNT) ) ) ; @@ -353,20 +349,22 @@ bgp_connection_free(bgp_connection connection) } ; /*------------------------------------------------------------------------------ - * Allocate new write buffer and initialise pointers + * If required, allocate new write buffer. + * Initialise pointers empty and writable. * - * NB: assumes structure has been zeroised by the initialisation of the - * enclosing connection. + * NB: structure was zeroised the enclosing connection was initialised. + * Buffer may have been allocated since then. */ static void -bgp_write_buffer_init_new(bgp_wbuffer wb, size_t size) +bgp_write_buffer_init(bgp_wbuffer wb, size_t size) { - assert(wb->base == NULL) ; - - wb->base = XMALLOC(MTYPE_STREAM_DATA, size) ; - wb->limit = wb->base + size ; + if (wb->base == NULL) + { + wb->base = XMALLOC(MTYPE_STREAM_DATA, size) ; + wb->limit = wb->base + size ; + } ; - wb->p_in = wb->p_out = wb->base ; + bgp_write_buffer_reset(wb) ; } ; /*------------------------------------------------------------------------------ @@ -376,7 +374,9 @@ static void bgp_write_buffer_free(bgp_wbuffer wb) { if (wb->base != NULL) - XFREE(MTYPE_STREAM_DATA, wb->base) ; + XFREE(MTYPE_STREAM_DATA, wb->base) ; /* sets wb->base = NULL */ + + wb->p_in = wb->p_out = wb->limit = wb->base; } ; /*============================================================================== @@ -449,12 +449,18 @@ bgp_connection_queue_del(bgp_connection connection) * pending queue (success) or remove connection from the pending queue. * * This is also where connections come to die. + * + * Returns: 0 => nothing to do + * 1 => dealt with one or more queued bits of work */ -extern void +extern int bgp_connection_queue_process(void) { mqueue_block mqb ; + if (bgp_connection_queue == NULL) + return 0 ; + while (bgp_connection_queue != NULL) { /* select the first in the queue, and step to the next */ @@ -486,6 +492,8 @@ bgp_connection_queue_process(void) if (mqb == mqueue_local_head(&connection->pending_queue)) bgp_connection_queue_del(connection) ; } ; + + return 1 ; } ; /*------------------------------------------------------------------------------ @@ -527,17 +535,29 @@ bgp_connection_add_pending(bgp_connection connection, mqueue_block mqb, * * Expects connection to either be newly created or recently closed. * + * For connect() connections this is done at connect() time, so before any + * connection comes up. + * + * For accept() connections this is done at accept() time, so when the + * connection comes up. + * + * The file is disabled in all modes. + * + * To complete the process must bgp_connection_start(), which resets the write + * buffer (allocating if required), and ensures that all is ready to read/write. + * * Resets: * * * closes any file that may be lingering (should never be) - * * resets all buffering (should all be empty) + * * reset all stream buffers to empty (should already be) + * * set write buffer unwritable * * Sets: * * * if secondary connection, turn off accept() - * * sets the qfile and fd ready for use + * * sets the qfile and fd ready for use -- disabled in all modes * * clears err -- must be OK so far - * * discards any open_state and notification + * * discards any open_state * * copies hold_timer_interval and keep_alive_timer_interval from session * * Expects: @@ -545,13 +565,15 @@ bgp_connection_add_pending(bgp_connection connection, mqueue_block mqb, * * links to/from session to be set up (including ordinal) * * timers to be initialised * * log and host to be set up - * * buffers to exist + * * stream buffers to exist * * Does not touch: * - * * state of the connection (including post event) + * * state of the connection (including exception and follow-on event) * * timers -- FSM looks after those * + * NB: nothing can be written until bgp_connection_start() has been called. + * * NB: requires the session to be LOCKED. */ extern void @@ -580,7 +602,65 @@ bgp_connection_open(bgp_connection connection, int fd) } ; /*------------------------------------------------------------------------------ + * Start connection which has just come up -- connect() or accept() + * + * Copy the local and remote addresses and note the effective address family. + * + * Make sure now have a write buffer, and set it empty and writable. + */ +extern void +bgp_connection_start(bgp_connection connection, union sockunion* su_local, + union sockunion* su_remote) +{ + sockunion_set_dup(&connection->su_local, su_local) ; + sockunion_set_dup(&connection->su_remote, su_remote) ; + + connection->paf = sockunion_family(connection->su_local) ; + + bgp_write_buffer_init(&connection->wbuff, bgp_wbuff_size) ; +} ; + +/*------------------------------------------------------------------------------ + * Stop connection + * + * * reset stream buffers + * * empty out any pending queue + * * remove from the BGP Engine connection queue, if there + * * clear session->active flag, so will not process any more messages + * that expect some message to be sent. + * * no notification pending (yet) + * + * If required: + * + * * set write buffer unwritable + * * disable file in write mode + * + * NB: requires the session to be LOCKED. + */ +static void +bgp_connection_stop(bgp_connection connection, int stop_writer) +{ + /* Reset all stream buffering empty. */ + stream_reset(connection->ibuf) ; + stream_reset(connection->obuf) ; + + connection->read_pending = 0 ; + connection->read_header = 0 ; + connection->notification_pending = 0 ; + + /* Empty out the pending queue and remove from connection queue */ + mqueue_local_reset_keep(&connection->pending_queue) ; + bgp_connection_queue_del(connection) ; + + /* If required: set write buffer *unwritable* (and empty). */ + if (stop_writer) + bgp_write_buffer_unwritable(&connection->wbuff) ; +} ; + +/*------------------------------------------------------------------------------ * Enable connection for accept() + * + * NB: requires the session to be LOCKED. */ extern void bgp_connection_enable_accept(bgp_connection connection) @@ -590,6 +670,8 @@ bgp_connection_enable_accept(bgp_connection connection) /*------------------------------------------------------------------------------ * Disable connection for accept() -- assuming still have session ! + * + * NB: requires the session to be LOCKED. */ extern void bgp_connection_disable_accept(bgp_connection connection) @@ -605,7 +687,8 @@ bgp_connection_disable_accept(bgp_connection connection) * * if there is an fd, close it * * if qfile is active, remove it * * forget any addresses - * * reset all buffering to empty + * * reset all stream buffers to empty + * * reset write buffer to unwritable * * empties the pending queue -- destroying all messages * * * for secondary connection: disable accept @@ -630,6 +713,8 @@ bgp_connection_disable_accept(bgp_connection connection) * * bgp_connection_free() -- to finally discard * * * bgp_connection_full_close() -- can do this again + * + * NB: requires the session to be LOCKED. */ extern void bgp_connection_full_close(bgp_connection connection, int unset_timers) @@ -658,20 +743,8 @@ bgp_connection_full_close(bgp_connection connection, int unset_timers) sockunion_unset(&connection->su_local) ; sockunion_unset(&connection->su_remote) ; - /* Reset all buffering empty. */ - stream_reset(connection->ibuf) ; - stream_reset(connection->obuf) ; - - connection->read_pending = 0 ; - connection->read_header = 0 ; - connection->notification_pending = 0 ; - - connection->wbuff.p_in = connection->wbuff.base ; - connection->wbuff.p_out = connection->wbuff.base ; - - /* Empty out the pending queue and remove from connection queue */ - mqueue_local_reset_keep(&connection->pending_queue) ; - bgp_connection_queue_del(connection) ; + /* Bring connection to a stop. */ + bgp_connection_stop(connection, 1) ; } ; /*------------------------------------------------------------------------------ @@ -691,34 +764,39 @@ bgp_connection_full_close(bgp_connection connection, int unset_timers) * be written (at least as far as the write buffer). * * Everything else is left untouched. + * + * Returns: 1 => OK, ready to send NOTIFICATION now + * 0 => no file descriptor => no chance of sending NOTIFICATION + * + * NB: requires the session to be LOCKED. */ -extern void +extern int bgp_connection_part_close(bgp_connection connection) { + bgp_session session = connection->session ; bgp_wbuffer wb = &connection->wbuff ; int fd ; uint8_t* p ; bgp_size_t mlen ; - /* close the qfile and any associate file descriptor */ + /* Check that have a usable file descriptor */ fd = qps_file_fd(&connection->qf) ; - if (fd != fd_undef) - { - shutdown(fd, SHUT_RD) ; - qps_disable_modes(&connection->qf, qps_read_mbit) ; - } ; - /* Reset all input buffering. */ - stream_reset(connection->ibuf) ; + if (fd == fd_undef) + return 0 ; - connection->read_pending = 0 ; - connection->read_header = 0 ; + /* Shutdown the read side of this connection */ + shutdown(fd, SHUT_RD) ; + qps_disable_modes(&connection->qf, qps_read_mbit) ; - /* Reset obuf and purge wbuff. */ - stream_reset(connection->obuf) ; + /* Stop all buffering activity, except for write buffer. */ + bgp_connection_stop(connection, 0) ; - connection->notification_pending = 0 ; + /* Turn off session->active (if still attached). */ + if (session != NULL) + session->active = 0 ; + /* Purge wbuff of all but current partly written message (if any) */ if (wb->p_in != wb->p_out) /* will be equal if buffer is empty */ { passert(wb->p_out < wb->p_in) ; @@ -739,62 +817,49 @@ bgp_connection_part_close(bgp_connection connection) wb->p_in = wb->base + mlen ; } else - wb->p_in = wb->p_out = wb->base ; + bgp_write_buffer_reset(wb) ; - /* Empty out the pending queue and remove from connection queue */ - mqueue_local_reset_keep(&connection->pending_queue) ; - bgp_connection_queue_del(connection) ; + /* OK -- part closed, ready to send NOTIFICATION */ + return 1 ; } ; /*============================================================================== * Writing to BGP connection -- once TCP connection has come up. * - * All writing is done by preparing a BGP message in the "obuf" buffer, - * and then calling bgp_connection_write(). + * Nothing is written directly -- all writing is qpselect driven. * - * If possible, that is written away immediately. If not, then no further - * messages may be prepared until the buffer has been cleared. - * - * Write the contents of the "work" buffer. + * All writing is done by preparing a BGP message in a stream buffer, + * and then calling bgp_connection_write(). The contents of the stream buffer + * are transferred to the connection's write buffer. * * Returns true <=> able to write the entire buffer without blocking. */ -static int bgp_connection_write_direct(bgp_connection connection, - struct stream* s) ; static void bgp_connection_write_action(qps_file qf, void* file_info) ; /*------------------------------------------------------------------------------ - * Write the contents of the given stream, if possible - * - * Writes everything or nothing. + * Write the contents of the given stream * - * If the write buffer is empty, then will attempt to write directly to the - * socket, buffering anything that cannot be sent immediately. Any errors - * encountered in this process generate an FSM event. + * Writes everything or FATAL error. * - * In case it is relevant, identifies when the data has been written all the - * way into the TCP buffer. + * Returns: 1 => written to wbuff -- stream reset, empty * - * Returns: 2 => written to TCP -- it's gone -- stream reset, empty - * 1 => written to wbuff -- waiting for socket -- stream reset, empty - * 0 => nothing written -- insufficient space in wbuff - * -1 => failed -- error event generated + * NB: actual I/O occurs in the qpselect action function -- so this cannot + * fail ! */ extern int bgp_connection_write(bgp_connection connection, struct stream* s) { bgp_wbuffer wb = &connection->wbuff ; - if (bgp_write_buffer_empty(wb)) - { - /* write buffer is empty -- attempt to write directly */ - return bgp_connection_write_direct(connection, s) ; - } ; - - /* Write nothing if cannot write everything */ + /* FATAL error if cannot write everything. */ if (bgp_write_buffer_cannot(wb, stream_pending(s))) - return 0 ; + zabort("Write buffer does not have enough room") ; + + /* If buffer is empty, enable write mode */ + if (bgp_write_buffer_empty(wb)) + qps_enable_mode(&connection->qf, qps_write_mnum, + bgp_connection_write_action) ; /* Transfer the obuf contents to the write buffer. */ wb->p_in = stream_transfer(wb->p_in, s, wb->limit) ; @@ -803,71 +868,19 @@ bgp_connection_write(bgp_connection connection, struct stream* s) } ; /*------------------------------------------------------------------------------ - * The write buffer is empty -- so try to write stream directly. - * - * If cannot empty the stream directly to the TCP buffers, transfer it to to - * the write buffer, and enable the qpselect action. - * (This is where the write buffer is allocated, if it hasn't yet been.) - * - * Either way, the stream is cleared and can be reused (unless failed). - * - * Returns: 2 => written to TCP -- it's gone -- stream reset, empty - * 1 => written to wbuff -- waiting for socket -- stream reset, empty - * -1 => failed -- error event generated - */ -enum { bgp_wbuff_size = BGP_MSG_MAX_L * 10 } ; - -static int -bgp_connection_write_direct(bgp_connection connection, struct stream* s) -{ - int ret ; - - ret = stream_flush_try(s, qps_file_fd(&connection->qf)) ; - - if (ret == 0) - return 2 ; /* Done: wbuff and stream are empty */ - - else if (ret > 0) - { - bgp_wbuffer wb = &connection->wbuff ; - - /* Partial write -- set up buffering, if required. */ - if (wb->base == NULL) - bgp_write_buffer_init_new(wb, bgp_wbuff_size) ; - - /* Transfer *entire* message to staging buffer */ - wb->p_in = stream_transfer(wb->base, s, wb->limit) ; - - wb->p_out = wb->p_in - ret ; /* output from here */ - - /* Must now be enabled to write */ - qps_enable_mode(&connection->qf, qps_write_mnum, - bgp_connection_write_action) ; - - return 1 ; /* Done: wbuff is not empty -- stream is */ - } ; - - /* write failed -- signal error and return failed */ - bgp_fsm_io_error(connection, errno) ; - - return -1 ; -} ; - -/*------------------------------------------------------------------------------ * Write Action for bgp connection. * * Empty the write buffer if we can. * * If empties that, disable write mode, then: * - * -- if notification is pending, then generate a notification sent event + * -- if notification is pending, generate a notification sent event * * -- otherwise: place connection on the connection queue, so can start to * flush out anything on the connection's pending queue. * - * If empty out everything, disable write mode. - * - * If encounter an error, generate TCP_fatal_error event. + * If encounter an error, generate TCP_fatal_error event, forcing buffer + * empty but unwritable. */ static void bgp_connection_write_action(qps_file qf, void* file_info) @@ -894,14 +907,16 @@ bgp_connection_write_action(qps_file qf, void* file_info) continue ; if ((ret != EAGAIN) && (ret != EWOULDBLOCK)) - bgp_fsm_io_error(connection, errno) ; - + { + bgp_write_buffer_unwritable(wb) ; + bgp_fsm_io_error(connection, errno) ; + } ; return ; } ; } ; /* Buffer is empty -- reset it and disable write mode */ - wb->p_out = wb->p_in = wb->base ; + bgp_write_buffer_reset(wb) ; qps_disable_modes(&connection->qf, qps_write_mbit) ; diff --git a/bgpd/bgp_connection.h b/bgpd/bgp_connection.h index 054cd953..d50d2985 100644 --- a/bgpd/bgp_connection.h +++ b/bgpd/bgp_connection.h @@ -90,31 +90,24 @@ enum bgp_fsm_events } ; /*============================================================================== - * BGP Connection Structure + * BGP Connection Structures * - * The BGP Connection is the main data structure for the BGP Engine. + *------------------------------------------------------------------------------ + * Write buffer for connection. * - * When a session terminates, or a connection is shut it may have a short - * independent life, if a NOTIFICATION message is pending. + * NB: when connection is initialised all the pointers are set NULL. * - */ - -/* Write buffer for connection. + * The buffer is not allocated until the TCP connection comes up. * * NB: p_out == p_in => buffer is empty * - * BUT: buffer is not allocated until required, and until then - * p_out == p_in == NULL -- empty does NOT imply usable ! + * BUT: p_out == limit => buffer is not writable. * - * AND: when buffer is emptied, p_out and p_in will be some way down the - * buffer. + * When connection is first initialised all pointers are NULL, so the + * buffer is "empty but not writable". * - * SO: before writing, check for base != NULL and set p_out = p_in = base. - * - * NB: before buffer is allocated base == NULL, but limit is set to NULL + n, - * so that buffer does not appear full. - * - * SO: not full does NOT imply that p_out/p_in/base are set, either ! + * When connection is opened, closed or fails, buffer is set into this + * "empty but not writable" state. */ typedef struct bgp_wbuffer* bgp_wbuffer ; struct bgp_wbuffer @@ -126,7 +119,17 @@ struct bgp_wbuffer uint8_t* limit ; } ; +/* Buffer is allocated for a number of maximum size BGP messages. */ +enum { bgp_wbuff_size = BGP_MSG_MAX_L * 10 } ; +/*============================================================================== + * BGP Connection Structure + * + * The BGP Connection is the main data structure for the BGP Engine. + * + * When a session terminates, or a connection is shut it may have a short + * independent life, if a NOTIFICATION message is pending. + */ struct bgp_connection { bgp_session session ; /* session connection belongs to */ @@ -147,7 +150,7 @@ struct bgp_connection int fsm_active ; /* active in FSM counter */ bgp_fsm_event_t follow_on ; /* event raised within FSM */ - bgp_session_event_t except ; /* exception posted here */ + bgp_session_event_t exception; /* exception posted here */ bgp_notify notification ; /* if any sent/received */ int err ; /* erno, if any */ @@ -201,6 +204,9 @@ extern void bgp_connection_open(bgp_connection connection, int fd) ; extern void +bgp_connection_start(bgp_connection connection, union sockunion* su_local, + union sockunion* su_remote) ; +extern void bgp_connection_enable_accept(bgp_connection connection) ; extern void @@ -218,7 +224,7 @@ bgp_connection_full_close(bgp_connection connection, int unset_timers) ; #define bgp_connection_close(conn) bgp_connection_full_close(conn, 0) #define bgp_connection_close_down(conn) bgp_connection_full_close(conn, 1) -extern void +extern int bgp_connection_part_close(bgp_connection connection) ; extern void @@ -236,7 +242,7 @@ bgp_connection_queue_add(bgp_connection connection) ; extern void bgp_connection_queue_del(bgp_connection connection) ; -extern void +extern int bgp_connection_queue_process(void) ; Inline int @@ -251,12 +257,28 @@ bgp_connection_add_pending(bgp_connection connection, mqueue_block mqb, bgp_connection* is_pending) ; /*------------------------------------------------------------------------------ + * Set buffer *unwritable* (buffer appears full, but nothing pending). + */ +Inline void +bgp_write_buffer_unwritable(bgp_wbuffer wb) +{ + wb->p_in = wb->p_out = wb->limit ; +} ; + +/*------------------------------------------------------------------------------ + * If allocated: set buffer empty + * If unallocated: buffer remains *unwritable* + */ +Inline void +bgp_write_buffer_reset(bgp_wbuffer wb) +{ + wb->p_in = wb->p_out = wb->base ; +} ; + +/*------------------------------------------------------------------------------ * See if do NOT have enough room for what want to write PLUS 1. * - * NB: before using the buffer the caller MUST ensure it has been allocated. - * - * Unallocated buffer is made to appear to have room for one maximum - * size BGP message. + * NB: there is never any room in an unallocated buffer. */ Inline int bgp_write_buffer_cannot(bgp_wbuffer wb, size_t want) @@ -267,30 +289,35 @@ bgp_write_buffer_cannot(bgp_wbuffer wb, size_t want) /*------------------------------------------------------------------------------ * Full if NOT enough room for a maximum size BGP message + 1 * - * NB: this will be FALSE if the buffer has not been allocated -- because can - * allocate a buffer and proceed if required. + * NB: there is never any room in an unallocated buffer. */ enum { bgp_write_buffer_full_threshold = BGP_MSG_MAX_L + 1 } ; Inline int -bgp_write_buffer_full(bgp_wbuffer wb) +bgp_write_buffer_cannot_max(bgp_wbuffer wb) { return bgp_write_buffer_cannot(wb, BGP_MSG_MAX_L) ; } ; /*------------------------------------------------------------------------------ - * Empty if in and out pointers are equal. - * - * NB: buffer is empty if it has not yet been allocated. - * - * NOT empty => allocated. + * See if buffer has anything in it. * - * NB: empty does NOT imply that both pointers are at the start of the buffer. + * If empty, ensures that the buffer has been allocated, and sets the pointers + * to the start of the buffer -- so all set to go. */ Inline int bgp_write_buffer_empty(bgp_wbuffer wb) { - return (wb->p_out == wb->p_in) ; + if (wb->p_out < wb->p_in) + return 0 ; /* not empty => has buffer */ + + dassert(wb->p_out == wb->p_in) ; + + passert(wb->base != NULL) ; /* must have buffer */ + + bgp_write_buffer_reset(wb) ; /* pointers to start of buffer */ + + return 1 ; /* empty and all ready to go */ } ; /*------------------------------------------------------------------------------ @@ -299,8 +326,6 @@ bgp_write_buffer_empty(bgp_wbuffer wb) * NB: if returns 0, may not yet have been allocated. * * > 0 => allocated. - * - * NB: 0 does NOT imply that both pointers are at the start of the buffer. */ Inline int bgp_write_buffer_pending(bgp_wbuffer wb) @@ -313,9 +338,9 @@ bgp_write_buffer_pending(bgp_wbuffer wb) * As above, for connection */ Inline int -bgp_connection_write_full(bgp_connection connection) +bgp_connection_write_cannot_max(bgp_connection connection) { - return bgp_write_buffer_full(&connection->wbuff) ; + return bgp_write_buffer_cannot_max(&connection->wbuff) ; } ; /*------------------------------------------------------------------------------ diff --git a/bgpd/bgp_engine.h b/bgpd/bgp_engine.h index fdbcef70..3a751885 100644 --- a/bgpd/bgp_engine.h +++ b/bgpd/bgp_engine.h @@ -51,7 +51,7 @@ struct queue_stats } ; static struct queue_stats bgp_engine_queue_stats ; -static struct queue_stats peering_engine_queue_stats ; +static struct queue_stats routing_engine_queue_stats ; Inline void bgp_queue_logging(const char* name, mqueue_queue mq, struct queue_stats* stats) @@ -62,6 +62,8 @@ bgp_queue_logging(const char* name, mqueue_queue mq, struct queue_stats* stats) ++stats->count ; + qpt_mutex_lock(&mq->mutex) ; + if (mq->count > stats->max) stats->max = mq->count ; if (mq->count > stats->recent) @@ -70,7 +72,10 @@ bgp_queue_logging(const char* name, mqueue_queue mq, struct queue_stats* stats) stats->total += mq->count ; if (stats->count < 1000) - return ; + { + qpt_mutex_unlock(&mq->mutex) ; + return ; + } ; my_count = 0 ; @@ -83,6 +88,8 @@ bgp_queue_logging(const char* name, mqueue_queue mq, struct queue_stats* stats) assert(my_count == mq->count) ; + qpt_mutex_unlock(&mq->mutex) ; + average = stats->total ; average /= stats->count ; @@ -121,24 +128,24 @@ bgp_to_bgp_engine_priority(mqueue_block mqb) * */ -/* Send given message to the Peering Engine -- ordinary +/* Send given message to the Routing Engine -- ordinary */ Inline void -bgp_to_peering_engine(mqueue_block mqb) +bgp_to_routing_engine(mqueue_block mqb) { mqueue_enqueue(routing_nexus->queue, mqb, 0) ; - bgp_queue_logging("Peering Engine", routing_nexus->queue, - &peering_engine_queue_stats) ; + bgp_queue_logging("Routing Engine", routing_nexus->queue, + &routing_engine_queue_stats) ; } ; -/* Send given message to the Peering Engine -- priority +/* Send given message to the Routing Engine -- priority */ Inline void -bgp_to_peering_engine_priority(mqueue_block mqb) +bgp_to_routing_engine_priority(mqueue_block mqb) { mqueue_enqueue(routing_nexus->queue, mqb, 1) ; - bgp_queue_logging("Peering Engine", routing_nexus->queue, - &peering_engine_queue_stats) ; + bgp_queue_logging("Routing Engine", routing_nexus->queue, + &routing_engine_queue_stats) ; } ; #endif /* QUAGGA_BGP_ENGINE_H */ diff --git a/bgpd/bgp_fsm.c b/bgpd/bgp_fsm.c index 8a8be52d..77afa12f 100644 --- a/bgpd/bgp_fsm.c +++ b/bgpd/bgp_fsm.c @@ -57,7 +57,7 @@ * * In general the FSM manages connections, but there is some interaction with * the session. In particular, exceptions are expressed as session_eXXX - * values -- which are passed to the Peering Engine as session events. The + * values -- which are passed to the Routing Engine as session events. The * handling of FSM events is depends mostly on the FSM state, but any * exception influences that too. * @@ -273,7 +273,7 @@ * notification -- any NOTIFICATION message * err -- any I/O or other error * - * on exit from the FSM this information is passed to the Peering Engine. + * on exit from the FSM this information is passed to the Routing Engine. * * Can throw exceptions within the FSM, as discussed above. * @@ -446,7 +446,7 @@ bgp_fsm_enable_session(bgp_session session) * */ static void -bgp_fsm_throw(bgp_connection connection, bgp_session_event_t except, +bgp_fsm_throw(bgp_connection connection, bgp_session_event_t exception, bgp_notify notification, int err, bgp_fsm_event_t event) ; static bgp_fsm_state_t @@ -548,10 +548,10 @@ bgp_fsm_disable_session(bgp_session session, bgp_notify notification) * fsm_active/follow_on mechanism looks after this. */ extern void -bgp_fsm_exception(bgp_connection connection, bgp_session_event_t except, +bgp_fsm_exception(bgp_connection connection, bgp_session_event_t exception, bgp_notify notification) { - bgp_fsm_throw(connection, except, notification, 0, bgp_fsm_eBGP_Stop) ; + bgp_fsm_throw(connection, exception, notification, 0, bgp_fsm_eBGP_Stop) ; } ; /*------------------------------------------------------------------------------ @@ -559,7 +559,7 @@ bgp_fsm_exception(bgp_connection connection, bgp_session_event_t except, * * A connection will discard any sibling if: * - * * the session is being disabled (by the Peering Engine) + * * the session is being disabled (by the Routing Engine) * * * an invalid event is bringing down the session * @@ -655,7 +655,7 @@ bgp_fsm_io_error(bgp_connection connection, int err) * This is used by the connect() and accept() qpselect actions. It is also * used if a connect() attempt fails immediately. * - * If err == 0, then all is well: copy the local and remote sockunions + * If err == 0, then all is well: start the connection (can now write to it) * and generate TCP_connection_open event * * If err is one of: @@ -666,6 +666,9 @@ bgp_fsm_io_error(bgp_connection connection, int err) * these errors.) * * Other errors are reported as TCP_fatal_error. + * + * NB: in any case on entry to this function the file is *disabled* in all + * modes. */ extern void bgp_fsm_connect_completed(bgp_connection connection, int err, @@ -674,12 +677,8 @@ bgp_fsm_connect_completed(bgp_connection connection, int err, { if (err == 0) { + bgp_connection_start(connection, su_local, su_remote) ; bgp_fsm_event(connection, bgp_fsm_eTCP_connection_open) ; - - sockunion_set_dup(&connection->su_local, su_local) ; - sockunion_set_dup(&connection->su_remote, su_remote) ; - - connection->paf = sockunion_family(connection->su_local) ; } else if ( (err == ECONNREFUSED) || (err == ECONNRESET) @@ -697,12 +696,12 @@ bgp_fsm_connect_completed(bgp_connection connection, int err, * NB: takes responsibility for the notification structure. */ static void -bgp_fsm_throw(bgp_connection connection, bgp_session_event_t except, +bgp_fsm_throw(bgp_connection connection, bgp_session_event_t exception, bgp_notify notification, int err, bgp_fsm_event_t event) { - connection->except = except ; + connection->exception = exception ; bgp_notify_set(&connection->notification, notification) ; - connection->err = err ; + connection->err = err ; bgp_fsm_event(connection, event) ; } ; @@ -721,10 +720,10 @@ bgp_fsm_throw(bgp_connection connection, bgp_session_event_t except, * NB: takes responsibility for the notification structure. */ static bgp_fsm_state_t -bgp_fsm_throw_stop(bgp_connection connection, bgp_session_event_t except, +bgp_fsm_throw_stop(bgp_connection connection, bgp_session_event_t exception, bgp_notify notification) { - bgp_fsm_throw(connection, except, notification, 0, bgp_fsm_eBGP_Stop) ; + bgp_fsm_throw(connection, exception, notification, 0, bgp_fsm_eBGP_Stop) ; return connection->state ; } ; @@ -1477,7 +1476,7 @@ bgp_fsm_event(bgp_connection connection, bgp_fsm_event_t event) } while (--connection->fsm_active != 0) ; /* If required, post session event. */ - if (connection->except != bgp_session_null_event) + if (connection->exception != bgp_session_null_event) { int stopped = (connection->state == bgp_fsm_sStopping) ; int has_session = (connection->session != NULL) ; @@ -1488,16 +1487,16 @@ bgp_fsm_event(bgp_connection connection, bgp_fsm_event_t event) * connection->state will be Stopping is when the session is being * stopped. (eDiscard and eCollision go quietly to Stopping !) */ - if ((connection->except <= bgp_session_max_event) && has_session) - bgp_session_event(connection->session, connection->except, + if ((connection->exception <= bgp_session_max_event) && has_session) + bgp_session_event(connection->session, connection->exception, bgp_notify_take(&connection->notification), connection->err, connection->ordinal, stopped) ; /* Tidy up -- notification already cleared */ - connection->except = bgp_session_null_event ; - connection->err = 0 ; + connection->exception = bgp_session_null_event ; + connection->err = 0 ; bgp_notify_unset(&connection->notification) ; /* if any */ if (stopped && has_session) @@ -1552,12 +1551,12 @@ static bgp_fsm_action(bgp_fsm_enter) */ static bgp_fsm_action(bgp_fsm_stop) { - if (connection->except == bgp_session_null_event) + if (connection->exception == bgp_session_null_event) return bgp_fsm_invalid(connection, bgp_fsm_sStopping, event) ; - if ( (connection->except == bgp_session_eDisabled) - || (connection->except == bgp_session_eDiscard) - || (connection->except == bgp_session_eInvalid) ) + if ( (connection->exception == bgp_session_eDisabled) + || (connection->exception == bgp_session_eDiscard) + || (connection->exception == bgp_session_eInvalid) ) next_state = bgp_fsm_sStopping ; return bgp_fsm_catch(connection, next_state) ; @@ -1590,7 +1589,7 @@ static bgp_fsm_action(bgp_fsm_invalid) * * Enters either sConnect or sActive, depending on primary/secondary. * - * Throws a session_eStart exception so the Peering Engine gets to see this, + * Throws a session_eStart exception so the Routing Engine gets to see this, * and a follow-on fsm_eBGP_Start event to kick the connect() or accept() into * life. * @@ -2029,12 +2028,12 @@ bgp_fsm_catch(bgp_connection connection, bgp_fsm_state_t next_state) { bgp_notify send_notification ; - assert(connection->except != bgp_session_null_event) ; + assert(connection->exception != bgp_session_null_event) ; /* Have a notification to send iff not just received one, and is in a * suitable state to send one at all. */ - if (connection->except == bgp_session_eNOM_recv) + if (connection->exception == bgp_session_eNOM_recv) send_notification = NULL ; else { @@ -2046,54 +2045,36 @@ bgp_fsm_catch(bgp_connection connection, bgp_fsm_state_t next_state) send_notification = connection->notification ; } ; - /* If there is a NOTIFICATION to send, now is the time to do that. + /* If there is a NOTIFICATION to send, send it if possible. * Otherwise, close the connection but leave the timers. * * The state transition stuff looks after timers. In particular an error * in Connect/Active states leaves the ConnectRetryTimer running. */ - if (send_notification != NULL) + if ((send_notification != NULL) && bgp_connection_part_close(connection)) { - int ret ; - /* If not changing to stopping, we hold in the current state until * the NOTIFICATION process is complete. */ if (next_state != bgp_fsm_sStopping) next_state = connection->state ; - /* Close for reading and flush write buffers. */ - bgp_connection_part_close(connection) ; - + /* Make sure that cannot pop out a Keepalive ! */ qtimer_unset(&connection->keepalive_timer) ; - /* Write the message + /* Write the message */ + bgp_msg_write_notification(connection, send_notification) ; + + /* notification is sitting in the write buffer + * + * notification_pending is set, so write action will raise the required + * event in due course. * - * If the write fails it raises a suitable event, which will now be - * sitting waiting to be processed on the way out of the FSM. + * Set the HoldTimer to something suitable. Don't really expect this + * to happen in anything except sEstablished state -- but copes. (Is + * ready to wait 20 seconds in sStopping state and 5 otherwise.) */ - ret = bgp_msg_write_notification(connection, send_notification) ; - - connection->notification_pending = (ret >= 0) ; - /* is pending if not failed */ - if (ret > 0) - /* notification reached the TCP buffers instantly - * - * Send ourselves the good news ! - */ - bgp_fsm_notification_sent(connection) ; - - else if (ret == 0) - /* notification is sitting in the write buffer - * - * notification_pending is set, so write action will raise the required - * event in due course. - * - * Set the HoldTimer to something suitable. Don't really expect this - * to happen in anything except sEstablished state -- but copes. (Is - * ready to wait 20 seconds in sStopping state and 5 otherwise.) - */ - bgp_hold_timer_set(connection, + bgp_hold_timer_set(connection, (next_state == bgp_fsm_sStopping) ? 20 : 5) ; } else @@ -2106,7 +2087,7 @@ bgp_fsm_catch(bgp_connection connection, bgp_fsm_state_t next_state) /* If sStopping and not eDiscard, do in any sibling */ if ( (next_state == bgp_fsm_sStopping) - && (connection->except != bgp_session_eDiscard) ) + && (connection->exception != bgp_session_eDiscard) ) { bgp_connection sibling ; diff --git a/bgpd/bgp_main.c b/bgpd/bgp_main.c index 94f8c7e5..3c6d70aa 100644 --- a/bgpd/bgp_main.c +++ b/bgpd/bgp_main.c @@ -37,6 +37,7 @@ Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA #include "plist.h" #include "qpnexus.h" #include "qlib_init.h" +#include "thread.h" #include "bgpd/bgpd.h" #include "bgpd/bgp_attr.h" @@ -84,8 +85,8 @@ void sigusr2 (void); static void bgp_exit (int); static void init_second_stage(int pthreads); static void bgp_in_thread_init(void); -static qtime_mono_t routing_event_hook(enum qpn_priority priority); -static qtime_mono_t bgp_event_hook(enum qpn_priority priority); +static int routing_foreground(void); +static int routing_background(void); static void sighup_action(mqueue_block mqb, mqb_flag_t flag); static void sighup_enqueue(void); static void sigterm_action(mqueue_block mqb, mqb_flag_t flag); @@ -129,7 +130,6 @@ char *config_file = NULL; /* Have we done the second stage initialization? */ static int done_2nd_state_init = 0; - /* Process ID saved for use by init system */ static const char *pid_file = PATH_BGPD_PID; @@ -390,26 +390,32 @@ init_second_stage(int pthreads) /* if using pthreads create additional nexus */ if (qpthreads_enabled) { - bgp_nexus = qpn_init_new(bgp_nexus, 0); + bgp_nexus = qpn_init_new(bgp_nexus, 0); routing_nexus = qpn_init_new(routing_nexus, 0); } else { /* we all share the single nexus and single thread */ - bgp_nexus = cli_nexus; + bgp_nexus = cli_nexus; routing_nexus = cli_nexus; } + /* Tell thread stuff to use this qtimer pile */ + thread_set_qtimer_pile(routing_nexus->pile) ; + /* Nexus hooks. * Beware if !qpthreads_enabled then there is only 1 nexus object - * with all nexus pointers being aliases for it. So only one routine - * per hook for *all* nexus. + * with all nexus pointers being aliases for it. */ - bgp_nexus->in_thread_init = bgp_in_thread_init; - bgp_nexus->in_thread_final = bgp_close_listeners; - routing_nexus->event_hook[0] = routing_event_hook; - bgp_nexus->event_hook[1] = bgp_event_hook; - confirm(NUM_EVENT_HOOK >= 2); + bgp_nexus->in_thread_init = bgp_in_thread_init ; + bgp_nexus->in_thread_final = bgp_close_listeners ; + + qpn_add_hook_function(&routing_nexus->foreground, routing_foreground) ; + qpn_add_hook_function(&bgp_nexus->foreground, bgp_connection_queue_process) ; + + qpn_add_hook_function(&routing_nexus->background, routing_background) ; + + confirm(qpn_hooks_max >= 2) ; /* vty and zclient can use either nexus or threads. * For bgp client we always want nexus, regardless of pthreads. @@ -616,26 +622,18 @@ bgp_in_thread_init(void) bgp_open_listeners(bm->port, bm->address); } -/* legacy threads */ -static qtime_mono_t -routing_event_hook(enum qpn_priority priority) +/* legacy threads in routing engine */ +static int +routing_foreground(void) { - struct thread thread; - qtime_mono_t event_wait; - - while (thread_fetch_event (priority, master, &thread, &event_wait)) - thread_call (&thread); - - return event_wait; + return thread_dispatch(master) ; } -/* BGP local queued events */ -static qtime_mono_t -bgp_event_hook(enum qpn_priority priority) +/* background threads in routing engine */ +static int +routing_background(void) { - if (priority >= qpn_pri_fourth) - bgp_connection_queue_process(); - return 0; + return thread_dispatch_background(master) ; } /* SIGINT/TERM SIGHUP need to tell routing engine what to do */ diff --git a/bgpd/bgp_msg_read.c b/bgpd/bgp_msg_read.c index 48227364..b218c5cf 100644 --- a/bgpd/bgp_msg_read.c +++ b/bgpd/bgp_msg_read.c @@ -1373,7 +1373,7 @@ bgp_msg_update_receive (bgp_connection connection, bgp_size_t body_size) ++connection->session->stats.update_in ; connection->session->stats.update_time = time(NULL) ; - /* PRO TEM: pass raw update message across to Peering Engine */ + /* PRO TEM: pass raw update message across to Routing Engine */ /* TODO: decode update messages in the BGP Engine. */ bgp_session_update_recv(connection->session, connection->ibuf, body_size); } diff --git a/bgpd/bgp_msg_write.c b/bgpd/bgp_msg_write.c index ef843157..d61ba642 100644 --- a/bgpd/bgp_msg_write.c +++ b/bgpd/bgp_msg_write.c @@ -72,12 +72,12 @@ /*------------------------------------------------------------------------------ * Make NOTIFICATION message and dispatch. * - * NB: the write buffers will have been flushed -- so expect success ! + * NB: the write buffers MUST have been flushed -- so demand success ! * - * Returns: 2 => written to TCP -- it's gone - * 1 => written to wbuff -- waiting for socket - * 0 => nothing written -- insufficient space in wbuff - * -1 => failed -- error event generated + * Returns: 1 => written to wbuff -- qpselect will write from there + * + * NB: actual I/O occurs in the qpselect action function -- so this cannot + * fail ! * * NB: requires the session LOCKED -- connection-wise */ @@ -143,7 +143,12 @@ bgp_msg_write_notification(bgp_connection connection, bgp_notify notification) bgp_notify_free(text_form) ; } ; - /* Finally -- write the obuf away */ + /* Set flag so that write_action raises required event when buffer becomes + * empty. + */ + connection->notification_pending = 1 ; + + /* Finally -- write the obuf away */ return bgp_connection_write(connection, s) ; } ; @@ -156,10 +161,11 @@ bgp_msg_write_notification(bgp_connection connection, bgp_notify notification) * KEEPALIVE is sent in response to OPEN, and that MUST be sent. But if the * buffers are full at that point, something is broken ! * - * Returns: 2 => written to TCP -- it's gone - * 1 => written to wbuff -- waiting for socket - * 0 => nothing written -- insufficient space in wbuff - * -1 => failed -- error event generated + * Returns: 1 => written to wbuff -- qpselect will write from there + * 0 => nothing written -- no need, buffer not empty ! + * + * NB: actual I/O occurs in the qpselect action function -- so this cannot + * fail ! * * NB: requires the session LOCKED -- connection-wise */ @@ -208,10 +214,10 @@ bgp_open_capability_orf (struct stream *s, iAFI_t afi, iSAFI_t safi, * OPEN is the first message to be sent. If the buffers are not empty, * something is badly wrong ! * - * Returns: 2 => written to TCP -- it's gone - * 1 => written to wbuff -- waiting for socket - * 0 => nothing written -- wbuff was too full !!! - * -1 => failed -- error event generated + * Returns: 1 => written to wbuff -- qpselect will write from there + * + * NB: actual I/O occurs in the qpselect action function -- so this cannot + * fail ! * * NB: requires the session LOCKED -- connection-wise */ @@ -221,8 +227,6 @@ bgp_msg_send_open(bgp_connection connection, bgp_open_state open_state) struct stream *s = connection->obuf ; int length ; - assert(bgp_connection_write_empty(connection)) ; - ++connection->session->stats.open_out ; /* Make OPEN message header */ @@ -262,7 +266,7 @@ bgp_msg_send_open(bgp_connection connection, bgp_open_state open_state) /* Finally -- write the obuf away */ return bgp_connection_write(connection, s) ; -} +} ; enum { @@ -488,9 +492,11 @@ bgp_msg_orf_prefix(struct stream* s, uint8_t common, * * Supports the status quo, only Address-Prefix ORF. * - * Returns: > 0 => all written - * 0 => unable to write everything - * < 0 => failed -- error event generated + * Returns: 1 => written to wbuff -- qpselect will write from there + * 0 => nothing written -- insufficient space in wbuff + * + * NB: actual I/O occurs in the qpselect action function -- so this cannot + * fail ! * * NB: requires the session LOCKED -- connection-wise */ @@ -501,7 +507,6 @@ bgp_msg_send_route_refresh(bgp_connection connection, bgp_route_refresh rr) uint8_t msg_type ; flag_t done ; bgp_size_t msg_len ; - int ret ; ++connection->session->stats.refresh_out ; @@ -512,7 +517,7 @@ bgp_msg_send_route_refresh(bgp_connection connection, bgp_route_refresh rr) do { - if (bgp_connection_write_full(connection)) + if (bgp_connection_write_cannot_max(connection)) return 0 ; /* Construct BGP message header for new/old form ROUTE-REFRESH */ @@ -534,10 +539,7 @@ bgp_msg_send_route_refresh(bgp_connection connection, bgp_route_refresh rr) zlog_debug ("%s sending REFRESH_REQ for afi/safi: %d/%d length %d", connection->host, rr->afi, rr->safi, msg_len) ; - ret = bgp_connection_write(connection, s) ; - if (ret < 0) - return ret ; - + bgp_connection_write(connection, s) ; } while (!done) ; return done ; @@ -779,17 +781,18 @@ bgp_msg_orf_prefix(struct stream* s, uint8_t common, /*------------------------------------------------------------------------------ * Make UPDATE message and dispatch. * - * Returns: 2 => written to TCP -- it's gone - * 1 => written to wbuff -- waiting for socket + * Returns: 1 => written to wbuff -- qpselect will write from there * 0 => nothing written -- insufficient space in wbuff - * -1 => failed -- error event generated + * + * NB: actual I/O occurs in the qpselect action function -- so this cannot + * fail ! * * NB: requires the session LOCKED -- connection-wise */ extern int bgp_msg_send_update(bgp_connection connection, struct stream* s) { - if (bgp_connection_write_full(connection)) + if (bgp_connection_write_cannot_max(connection)) return 0 ; ++connection->session->stats.update_out ; @@ -804,19 +807,18 @@ bgp_msg_send_update(bgp_connection connection, struct stream* s) /*------------------------------------------------------------------------------ * Make End-of-RIB message and dispatch. * - * - * - * Returns: 2 => written to TCP -- it's gone - * 1 => written to wbuff -- waiting for socket + * Returns: 1 => written to wbuff -- qpselect will write from there * 0 => nothing written -- insufficient space in wbuff - * -1 => failed -- error event generated + * + * NB: actual I/O occurs in the qpselect action function -- so this cannot + * fail ! */ extern int bgp_msg_send_end_of_rib(bgp_connection connection, iAFI_t afi, iSAFI_t safi) { struct stream *s = connection->obuf ; - if (bgp_connection_write_full(connection)) + if (bgp_connection_write_cannot_max(connection)) return 0 ; /* Make UPDATE message header */ diff --git a/bgpd/bgp_network.c b/bgpd/bgp_network.c index 0c1072c9..fa1dbd37 100644 --- a/bgpd/bgp_network.c +++ b/bgpd/bgp_network.c @@ -479,7 +479,7 @@ bgp_accept_action(qps_file qf, void* file_info) * This is running in the BGP Engine thread, so cannot in any case be * foxed by the other connection making changes. * - * The session is active, so the Peering Engine will not make any changes + * The session is active, so the Routing Engine will not make any changes * except under the mutex, and will not destroy the session. */ diff --git a/bgpd/bgp_packet.c b/bgpd/bgp_packet.c index d565c265..8955be3b 100644 --- a/bgpd/bgp_packet.c +++ b/bgpd/bgp_packet.c @@ -87,21 +87,6 @@ bgp_packet_set_size (struct stream *s) return cp; } -/* Add new packet to the peer. */ -static void -bgp_packet_add (struct peer *peer, struct stream *s) -{ - /* Add packet to the end of list. */ - stream_fifo_push (peer->obuf, s); -} - -/* Free first packet. */ -static void -bgp_packet_delete (struct peer *peer) -{ - stream_free (stream_fifo_pop (peer->obuf)); -} - #if 0 /* Check file descriptor whether connect is established. */ static void @@ -437,8 +422,7 @@ bgp_default_update_send (struct peer *peer, struct attr *attr, #endif /* DEBUG */ /* Add packet to the peer. */ - bgp_packet_add (peer, stream_dup (s)); - bgp_write(peer); + bgp_write(peer, s); } /*------------------------------------------------------------------------------ @@ -513,8 +497,7 @@ bgp_default_withdraw_send (struct peer *peer, afi_t afi, safi_t safi) bgp_packet_set_size (s); /* Add packet to the peer. */ - bgp_packet_add (peer, stream_dup (s)); - bgp_write(peer); + bgp_write(peer, s); } /*------------------------------------------------------------------------------ @@ -616,69 +599,35 @@ bgp_write_proceed (struct peer *peer) /*------------------------------------------------------------------------------ * Write packets to the peer -- subject to the XON flow control. * - * Empties the obuf queue first. + * Takes an optional stream argument, if not NULL then must be peer->work, + * in which there is a message to be sent. * * Then processes the peer->sync structure to generate further updates. * * TODO: work out how bgp_routeadv_timer fits into this. */ int -bgp_write (bgp_peer peer) +bgp_write (bgp_peer peer, struct stream* s) { - u_char type; - struct stream *s; - int free_s ; + if (s != NULL) + stream_fifo_push(peer->obuf, stream_dup(s)) ; while (bgp_session_is_XON(peer)) { - free_s = 0 ; - - s = stream_fifo_head(peer->obuf) ; /* returns own stream */ - if (s != NULL) - free_s = 1 ; - else - { - s = bgp_write_packet(peer); /* uses peer->work */ - if (s == NULL) - break; - } ; - - bgp_session_update_send(peer->session, s); - - /* Retrieve BGP packet type. */ - stream_set_getp (s, BGP_MARKER_SIZE + 2); - type = stream_getc (s); + s = bgp_write_packet(peer); /* uses peer->work */ + if (s == NULL) + break; - switch (type) - { - case BGP_MSG_OPEN: - break; - case BGP_MSG_UPDATE: - break; - case BGP_MSG_NOTIFY: - /* Double start timer. */ - peer->v_start *= 2; - - /* Overflow check. */ - if (peer->v_start >= (60 * 2)) - peer->v_start = (60 * 2); - - assert(0); /* shouldn't get notifies through here */ - return 0; - case BGP_MSG_KEEPALIVE: - break; - case BGP_MSG_ROUTE_REFRESH_NEW: - case BGP_MSG_ROUTE_REFRESH_OLD: - break; - case BGP_MSG_CAPABILITY: - break; - } + stream_fifo_push (peer->obuf, stream_dup(s)) ; - /* OK we sent packet so delete it. */ - if (free_s) - bgp_packet_delete (peer); + /* Count down flow control, send fifo if hits BGP_XON_KICK */ + if (bgp_session_dec_flow_count(peer)) + bgp_session_update_send(peer->session, peer->obuf) ; + } ; - } + /* In any case, send what's in the FIFO */ + if (stream_fifo_head(peer->obuf) != NULL) + bgp_session_update_send(peer->session, peer->obuf) ; return 0; } @@ -842,7 +791,7 @@ bgp_notify_send_with_data (struct peer *peer, u_char code, u_char sub_code, peer->last_reset = PEER_DOWN_NOTIFY_SEND; } - bgp_peer_disable(peer, notification); + bgp_peer_disable(peer, notification); } /* Send BGP notify packet. */ @@ -1032,14 +981,14 @@ bgp_capability_send (struct peer *peer, afi_t afi, safi_t safi, int capability_code, int action) { struct stream *s; - struct stream *packet; int length; /* Adjust safi code. */ if (safi == SAFI_MPLS_VPN) safi = BGP_SAFI_VPNV4; - s = stream_new (BGP_MAX_PACKET_SIZE); + s = peer->work; + stream_reset (s); /* Make BGP update packet. */ bgp_packet_set_marker (s, BGP_MSG_CAPABILITY); @@ -1063,18 +1012,12 @@ bgp_capability_send (struct peer *peer, afi_t afi, safi_t safi, /* Set packet size. */ length = bgp_packet_set_size (s); - /* Make real packet. */ - packet = stream_dup (s); - stream_free (s); - - /* Add packet to the peer. */ - bgp_packet_add (peer, packet); - if (BGP_DEBUG (normal, NORMAL)) zlog_debug ("%s send message type %d, length (incl. header) %d", - peer->host, BGP_MSG_CAPABILITY, length); + peer->host, BGP_MSG_CAPABILITY, length); - bgp_write(peer); + /* Add packet to the peer. */ + bgp_write(peer, s); } #if 0 diff --git a/bgpd/bgp_packet.h b/bgpd/bgp_packet.h index 81937522..f0798846 100644 --- a/bgpd/bgp_packet.h +++ b/bgpd/bgp_packet.h @@ -44,7 +44,7 @@ Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA /* Packet send and receive function prototypes. */ extern int bgp_read (struct thread *); -extern int bgp_write (bgp_peer peer); +extern int bgp_write (bgp_peer peer, struct stream*); extern void bgp_keepalive_send (struct peer *); extern void bgp_open_send (struct peer *); diff --git a/bgpd/bgp_peer.c b/bgpd/bgp_peer.c index e6be06c7..16ec8bd9 100644 --- a/bgpd/bgp_peer.c +++ b/bgpd/bgp_peer.c @@ -202,6 +202,7 @@ bgp_session_has_established(bgp_peer peer) bgp_notify_unset(&(peer->session->notification)); /* Clear start timer value to default. */ + /* TODO: figure out where to increase the IdleHoldTimer */ peer->v_start = BGP_INIT_START_TIMER; /* Increment established count. */ @@ -547,7 +548,9 @@ bgp_timer_set (struct peer *peer) static int bgp_routeadv_timer (struct thread *thread) { - struct peer *peer; + struct peer *peer; + uint32_t jittered ; + uint32_t jitter ; peer = THREAD_ARG (thread); peer->t_routeadv = NULL; @@ -559,10 +562,21 @@ bgp_routeadv_timer (struct thread *thread) peer->synctime = time (NULL); - bgp_write(peer); + bgp_write(peer, NULL); + + /* Apply +/- 10% jitter to the route advertise timer. + * + * The time is in seconds, so for anything less than 10 seconds this forced + * to be +/- 1 second. + */ + jittered = jitter = peer->v_routeadv ; + if (jitter < 10) + jitter = 10 ; + jittered = (jittered * 90) + (rand() % (jitter * 20)) ; /* jitter is +/-10% */ + jittered = (jittered + 50) / 100 ; - BGP_TIMER_ON (peer->t_routeadv, bgp_routeadv_timer, - peer->v_routeadv); + /* TODO: move this to the Routeing Engine qtimer pile. */ + BGP_TIMER_ON (peer->t_routeadv, bgp_routeadv_timer, jittered) ; return 0; } @@ -1023,7 +1037,15 @@ void bgp_peer_disable(bgp_peer peer, bgp_notify notification) { if (bgp_session_is_active(peer->session)) + { + /* This code has been moved from where it was, in bgp_write */ + /* TODO: not clear whether v_start handling is still correct */ + peer->v_start *= 2; + if (peer->v_start >= (60 * 2)) + peer->v_start = (60 * 2); + bgp_session_disable(peer, notification); + } else { bgp_notify_free(notification) ; diff --git a/bgpd/bgp_peer_index.h b/bgpd/bgp_peer_index.h index 38d70907..c99ec710 100644 --- a/bgpd/bgp_peer_index.h +++ b/bgpd/bgp_peer_index.h @@ -40,7 +40,7 @@ typedef unsigned bgp_peer_id_t ; struct bgp_peer_index_entry { - bgp_peer peer ; /* used by Peering Engine */ + bgp_peer peer ; /* used by Routing Engine */ /* The accept pointer is used by the listening socket(s) to find the * session when it is prepared to accept a connection. diff --git a/bgpd/bgp_route.c b/bgpd/bgp_route.c index 7d3ad901..c5191b18 100644 --- a/bgpd/bgp_route.c +++ b/bgpd/bgp_route.c @@ -1457,10 +1457,12 @@ struct bgp_process_queue safi_t safi; }; +WQ_ARGS_SIZE_OK(bgp_process_queue) ; + static wq_item_status -bgp_process_rsclient (struct work_queue *wq, void *data) +bgp_process_rsclient (struct work_queue *wq, work_queue_item item) { - struct bgp_process_queue *pq = data; + struct bgp_process_queue *pq = work_queue_item_args(item) ; struct bgp *bgp = pq->bgp; struct bgp_node *rn = pq->rn; afi_t afi = pq->afi; @@ -1518,9 +1520,9 @@ bgp_process_rsclient (struct work_queue *wq, void *data) } static wq_item_status -bgp_process_main (struct work_queue *wq, void *data) +bgp_process_main (struct work_queue *wq, work_queue_item item) { - struct bgp_process_queue *pq = data; + struct bgp_process_queue *pq = work_queue_item_args(item) ; struct bgp *bgp = pq->bgp; struct bgp_node *rn = pq->rn; afi_t afi = pq->afi; @@ -1592,15 +1594,14 @@ bgp_process_main (struct work_queue *wq, void *data) } static void -bgp_processq_del (struct work_queue *wq, void *data) +bgp_processq_del (struct work_queue *wq, work_queue_item item) { - struct bgp_process_queue *pq = data; + struct bgp_process_queue *pq = work_queue_item_args(item); struct bgp_table *table = pq->rn->table; bgp_unlock (pq->bgp); bgp_unlock_node (pq->rn); bgp_table_unlock (table); - XFREE (MTYPE_BGP_PROCESS_QUEUE, pq); } static void @@ -1617,21 +1618,23 @@ bgp_process_queue_init (void) exit (1); } - bm->process_main_queue->spec.workfunc = &bgp_process_main; - bm->process_rsclient_queue->spec.workfunc = &bgp_process_rsclient; - bm->process_main_queue->spec.del_item_data = &bgp_processq_del; - bm->process_rsclient_queue->spec.del_item_data - = bm->process_main_queue->spec.del_item_data; - bm->process_main_queue->spec.max_retries - = bm->process_main_queue->spec.max_retries = 0; - bm->process_rsclient_queue->spec.hold - = bm->process_main_queue->spec.hold = 50; + bm->process_main_queue->spec.data = bm->master ; + bm->process_main_queue->spec.errorfunc = NULL ; + bm->process_main_queue->spec.workfunc = &bgp_process_main; + bm->process_main_queue->spec.del_item_data = &bgp_processq_del; + bm->process_main_queue->spec.completion_func = NULL ; + bm->process_main_queue->spec.max_retries = 0; + bm->process_main_queue->spec.hold = 50; + + bm->process_rsclient_queue->spec = bm->process_main_queue->spec ; + bm->process_rsclient_queue->spec.workfunc = &bgp_process_rsclient; } void bgp_process (struct bgp *bgp, struct bgp_node *rn, afi_t afi, safi_t safi) { struct bgp_process_queue *pqnode; + struct work_queue* wq ; /* already scheduled for processing? */ if (CHECK_FLAG (rn->flags, BGP_NODE_PROCESS_SCHEDULED)) @@ -1641,29 +1644,31 @@ bgp_process (struct bgp *bgp, struct bgp_node *rn, afi_t afi, safi_t safi) (bm->process_rsclient_queue == NULL) ) bgp_process_queue_init (); - pqnode = XCALLOC (MTYPE_BGP_PROCESS_QUEUE, - sizeof (struct bgp_process_queue)); - if (!pqnode) - return; - - /* all unlocked in bgp_processq_del */ - bgp_table_lock (rn->table); - pqnode->rn = bgp_lock_node (rn); - pqnode->bgp = bgp; - bgp_lock (bgp); - pqnode->afi = afi; - pqnode->safi = safi; - switch (rn->table->type) { case BGP_TABLE_MAIN: - work_queue_add (bm->process_main_queue, pqnode); + wq = bm->process_main_queue ; break; case BGP_TABLE_RSCLIENT: - work_queue_add (bm->process_rsclient_queue, pqnode); + wq = bm->process_rsclient_queue ; break; + default: + zabort("invalid rn->table->type") ; } + pqnode = work_queue_item_add(wq); + + if (!pqnode) + return; + + /* all unlocked in bgp_processq_del */ + bgp_table_lock (rn->table); + pqnode->rn = bgp_lock_node (rn); + pqnode->bgp = bgp; + bgp_lock (bgp); + pqnode->afi = afi; + pqnode->safi = safi; + return; } @@ -2672,17 +2677,18 @@ bgp_soft_reconfig_in (struct peer *peer, afi_t afi, safi_t safi) bgp_soft_reconfig_table (peer, afi, safi, table); } - struct bgp_clear_node_queue { struct bgp_node *rn; enum bgp_clear_route_type purpose; }; +WQ_ARGS_SIZE_OK(bgp_clear_node_queue) ; + static wq_item_status -bgp_clear_route_node (struct work_queue *wq, void *data) +bgp_clear_route_node (struct work_queue *wq, work_queue_item item) { - struct bgp_clear_node_queue *cnq = data; + struct bgp_clear_node_queue *cnq = work_queue_item_args(item) ; struct bgp_node *rn = cnq->rn; struct peer *peer = wq->spec.data; struct bgp_info *ri; @@ -2708,15 +2714,14 @@ bgp_clear_route_node (struct work_queue *wq, void *data) } static void -bgp_clear_node_queue_del (struct work_queue *wq, void *data) +bgp_clear_node_queue_del (struct work_queue *wq, work_queue_item item) { - struct bgp_clear_node_queue *cnq = data; + struct bgp_clear_node_queue *cnq = work_queue_item_args(item) ; struct bgp_node *rn = cnq->rn; struct bgp_table *table = rn->table; bgp_unlock_node (rn); bgp_table_unlock (table); - XFREE (MTYPE_BGP_CLEAR_NODE_QUEUE, cnq); } static void @@ -2823,11 +2828,9 @@ bgp_clear_route_table (struct peer *peer, afi_t afi, safi_t safi, /* both unlocked in bgp_clear_node_queue_del */ bgp_table_lock (rn->table); bgp_lock_node (rn); - cnq = XCALLOC (MTYPE_BGP_CLEAR_NODE_QUEUE, - sizeof (struct bgp_clear_node_queue)); - cnq->rn = rn; + cnq = work_queue_item_add(peer->clear_node_queue) ; + cnq->rn = rn; cnq->purpose = purpose; - work_queue_add (peer->clear_node_queue, cnq); break; } diff --git a/bgpd/bgp_route_refresh.h b/bgpd/bgp_route_refresh.h index b6e5eaf5..3afd997e 100644 --- a/bgpd/bgp_route_refresh.h +++ b/bgpd/bgp_route_refresh.h @@ -65,7 +65,10 @@ struct bgp_orf_entry } body ; } ; -typedef struct bgp_orf_entry bgp_orf_entry_t ; /* calm down Eclipse */ +/* (The typedef is required to stop Eclipse (3.4.2 with CDT 5.0) whining + * about first argument of offsetof().) + */ +typedef struct bgp_orf_entry bgp_orf_entry_t ; enum { bgp_orf_unknown_min_l = sizeof(struct bgp_orf_entry) diff --git a/bgpd/bgp_session.c b/bgpd/bgp_session.c index a2b49da5..9d17e36c 100644 --- a/bgpd/bgp_session.c +++ b/bgpd/bgp_session.c @@ -80,10 +80,10 @@ static void bgp_session_do_route_refresh_recv(mqueue_block mqb, mqb_flag_t flag) * change any shared item in the session, except under the mutex. And * even then it may make no sense ! * - * NB: a session reaches eDisabled when the Peering Engine has sent a disable + * NB: a session reaches eDisabled when the Routing Engine has sent a disable * request to the BGP Engine, AND an eDisabled event has come back. * - * While the Peering Engine is waiting for the eDisabled event, the session + * While the Routing Engine is waiting for the eDisabled event, the session * is in sLimping state. * * The BGP Engine's primary interest is in its (private) bgp_connection @@ -212,7 +212,7 @@ bgp_session_free(bgp_session session) } /*============================================================================== - * Peering Engine: enable session for given peer -- allocate if required. + * Routing Engine: enable session for given peer -- allocate if required. * * Sets up the session given the current state of the peer. If the state * changes, then need to disable the session and re-enable it again with new @@ -226,12 +226,12 @@ bgp_session_enable(bgp_peer peer) /* Set up session if required. Check session if already exists. * - * Only the Peering Engine creates sessions, so it is safe to pick up the + * Only the Routing Engine creates sessions, so it is safe to pick up the * peer->session pointer and test it. * * If session exists, it MUST be inactive. * - * Peering Engine does not require the mutex while the session is inactive. + * Routing Engine does not require the mutex while the session is inactive. */ session = peer->session ; @@ -348,7 +348,7 @@ bgp_session_do_enable(mqueue_block mqb, mqb_flag_t flag) } ; /*============================================================================== - * Peering Engine: disable session for given peer -- if enabled (!). + * Routing Engine: disable session for given peer -- if enabled (!). * * Passes any bgp_notify to the BGP Engine, which will dispose of it in due * course. @@ -398,7 +398,7 @@ bgp_session_disable(bgp_peer peer, bgp_notify notification) * * the disable is being issued in response to a stopped event from * the BGP Engine. * - * * the session is stopped, but the message to the Peering Engine is + * * the session is stopped, but the message to the Routing Engine is * still in its message queue. * * * the session is stopped while the disable message is in the @@ -410,11 +410,11 @@ bgp_session_disable(bgp_peer peer, bgp_notify notification) * * NB: The BGP Engine will discard any outstanding work for the session. * - * The Peering Engine should discard all further messages for this + * The Routing Engine should discard all further messages for this * session up to the eDisabled, and must then discard any other * messages for the session. * - * NB: the Peering Engine MUST not issue any further messages until it sees + * NB: the Routing Engine MUST not issue any further messages until it sees * the returned eDisabled event. */ mqb = mqb_init_new(NULL, bgp_session_do_disable, session) ; @@ -433,7 +433,6 @@ bgp_session_disable(bgp_peer peer, bgp_notify notification) c = 0 ; s = 0 ; } ; - fprintf(stderr, " session disable %d/%d", c, s) ; } ; ++bgp_engine_queue_stats.event ; @@ -469,7 +468,7 @@ bgp_session_do_disable(mqueue_block mqb, mqb_flag_t flag) /*============================================================================== * BGP Engine: send session event signal to Routeing Engine * - * NB: is passing responsibility for the notification to the Peering Engine. + * NB: is passing responsibility for the notification to the Routing Engine. */ extern void bgp_session_event(bgp_session session, bgp_session_event_t event, @@ -494,21 +493,20 @@ bgp_session_event(bgp_session session, bgp_session_event_t event, args->ordinal = ordinal ; args->stopped = stopped, - ++peering_engine_queue_stats.event ; + ++routing_engine_queue_stats.event ; - bgp_to_peering_engine(mqb) ; -} + bgp_to_routing_engine(mqb) ; +} ; /*============================================================================== - * Peering Engine: dispatch update to peer -> BGP Engine + * Routing Engine: dispatch update(s) to peer -> BGP Engine * - * PRO TEM -- this is being passed the pre-packaged BGP message. + * PRO TEM -- this is being passed the pre-packaged BGP message(s). * - * The BGP Engine takes care of discarding the stream block once it's been - * dealt with. + * The BGP Engine takes care of discarding the stream block(s) once dealt with. */ extern void -bgp_session_update_send(bgp_session session, struct stream* upd) +bgp_session_update_send(bgp_session session, struct stream_fifo* fifo) { struct bgp_session_update_args* args ; mqueue_block mqb ; @@ -516,37 +514,38 @@ bgp_session_update_send(bgp_session session, struct stream* upd) mqb = mqb_init_new(NULL, bgp_session_do_update_send, session) ; args = mqb_get_args(mqb) ; - args->buf = stream_dup(upd) ; + args->buf = stream_fifo_head(fifo) ; args->is_pending = NULL ; - args->xon_kick = (session->flow_control == BGP_XON_KICK); - session->flow_control--; + args->xon_kick = (session->flow_control == BGP_XON_KICK); ++bgp_engine_queue_stats.update ; bgp_to_bgp_engine(mqb) ; + + stream_fifo_reset(fifo) ; } ; /*------------------------------------------------------------------------------ - * BGP Engine: write given BGP update message -- mqb action function. + * BGP Engine: write given BGP update message(s) -- mqb action function. * * Each connection has a pending queue associated with it, onto which messages * are put if the connection's write buffer is unable to absorb any further * messages. * - * This function is called both when the mqb is received from the Peering + * This function is called both when the mqb is received from the Routing * Engine, and when the BGP Engine is trying to empty the connection's pending * queue. * - * When the mqb is received from the Peering Engine, then: + * When the mqb is received from the Routing Engine, then: * - * -- if the connection's pending queue is empty, try to send the message. + * -- if the connection's pending queue is empty, try to send the message(s). * * When the mqb is from connection's pending queue, then: * - * -- try to send the message. + * -- try to send the message(s). * - * In any case, if cannot send the message (and not encountered any error), add - * it (back) to the connection's pending queue. + * In any case, if cannot send all the message(s), add it (back) to the + * connection's pending queue. * * If the mqb has been dealt with, it is freed, along with the stream buffer. * Also, update the flow control counter, and issue XON if required. @@ -557,43 +556,54 @@ bgp_session_do_update_send(mqueue_block mqb, mqb_flag_t flag) struct bgp_session_update_args* args = mqb_get_args(mqb) ; bgp_session session = mqb_get_arg0(mqb) ; - if ((flag == mqb_action) && session->active) + while (args->buf != NULL) { - bgp_connection connection = session->connections[bgp_connection_primary] ; - assert(connection != NULL) ; + struct stream* buf ; - /* If established, try and send. */ - if (connection->state == bgp_fsm_sEstablished) + if ((flag == mqb_action) && session->active) { - int ret = bgp_connection_no_pending(connection, &args->is_pending) ; + bgp_connection connection ; - if (ret != 0) - ret = bgp_msg_send_update(connection, args->buf) ; + connection = session->connections[bgp_connection_primary] ; + assert(connection != NULL) ; - if (ret == 0) - { - /* Either there is already a pending queue, or the message - * could not be sent (and has not failed) -- so add to the - * pending queue. - */ - bgp_connection_add_pending(connection, mqb, &args->is_pending) ; - return ; /* Quit now, with message intact. */ - } - else if (ret > 0) + /* If established, try and send. */ + if (connection->state == bgp_fsm_sEstablished) { - /* Successfully wrote the message. XON if requested */ - if (args->xon_kick) - bgp_session_XON(session); + int ret ; + ret = bgp_connection_no_pending(connection, &args->is_pending) ; + + if (ret != 0) + ret = bgp_msg_send_update(connection, args->buf) ; + + if (ret == 0) + { + /* Either there is already a pending queue, or the message + * could not be sent (and has not failed) -- so add to the + * pending queue. + */ + bgp_connection_add_pending(connection, mqb, + &args->is_pending) ; + return ; /* Quit now, with message intact. */ + } } ; } ; + + buf = args->buf ; + args->buf = buf->next ; + + stream_free(buf) ; } ; - stream_free(args->buf) ; + /* If gets to here, then has dealt with all message(s). */ + if ((flag == mqb_action) && (args->xon_kick)) + bgp_session_XON(session) ; + mqb_free(mqb) ; } ; /*------------------------------------------------------------------------------ - * Peering Engine: are we in XON state ? + * Routing Engine: are we in XON state ? */ extern int bgp_session_is_XON(bgp_peer peer) @@ -606,8 +616,20 @@ bgp_session_is_XON(bgp_peer peer) return result; } ; +/*------------------------------------------------------------------------------ + * Count down flow control -- signal if reached XON point. + */ +extern int +bgp_session_dec_flow_count(bgp_peer peer) +{ + bgp_session session = peer->session; + + assert(session->flow_control > 0) ; + return (--session->flow_control == BGP_XON_KICK) ; +} ; + /*============================================================================== - * Peering Engine: dispatch Route Refresh to peer -> BGP Engine + * Routing Engine: dispatch Route Refresh to peer -> BGP Engine * * The BGP Engine takes care of discarding the bgp_route_refresh once it's been * dealt with. @@ -671,7 +693,7 @@ bgp_session_do_route_refresh_send(mqueue_block mqb, mqb_flag_t flag) } ; /*============================================================================== - * Peering Engine: dispatch End-of-RIB to peer -> BGP Engine + * Routing Engine: dispatch End-of-RIB to peer -> BGP Engine */ extern void bgp_session_end_of_rib_send(bgp_session session, qAFI_t afi, qSAFI_t safi) @@ -736,11 +758,11 @@ bgp_session_do_end_of_rib_send(mqueue_block mqb, mqb_flag_t flag) } ; /*============================================================================== - * BGP Engine: forward incoming update -> Peering Engine + * BGP Engine: forward incoming update -> Routing Engine * * PRO TEM -- this is being passed the raw BGP message. * - * The Peering Engine takes care of discarding the stream block once it's been + * The Routing Engine takes care of discarding the stream block once it's been * dealt with. */ extern void @@ -756,13 +778,13 @@ bgp_session_update_recv(bgp_session session, struct stream* buf, bgp_size_t size args->size = size; args->xon_kick = 0; - ++peering_engine_queue_stats.update ; + ++routing_engine_queue_stats.update ; - bgp_to_peering_engine(mqb) ; + bgp_to_routing_engine(mqb) ; } /*------------------------------------------------------------------------------ - * Peering Engine: process incoming update message -- mqb action function. + * Routing Engine: process incoming update message -- mqb action function. */ static void bgp_session_do_update_recv(mqueue_block mqb, mqb_flag_t flag) @@ -787,7 +809,7 @@ bgp_session_do_update_recv(mqueue_block mqb, mqb_flag_t flag) /*============================================================================== * BGP Engine: received Route Refresh to peer * - * The Peering Engine takes care of discarding the bgp_route_refresh once + * The Routing Engine takes care of discarding the bgp_route_refresh once * it's been dealt with. */ extern void @@ -802,11 +824,11 @@ bgp_session_route_refresh_recv(bgp_session session, bgp_route_refresh rr) args->rr = rr ; args->is_pending = NULL ; - bgp_to_peering_engine(mqb) ; + bgp_to_routing_engine(mqb) ; } ; /*------------------------------------------------------------------------------ - * Peering Engine: receive given BGP route refresh message -- mqb action + * Routing Engine: receive given BGP route refresh message -- mqb action * function. */ static void @@ -823,7 +845,7 @@ bgp_session_do_route_refresh_recv(mqueue_block mqb, mqb_flag_t flag) } /*============================================================================== - * BGP Engine: send XON message to Peering Engine + * BGP Engine: send XON message to Routing Engine * * Can be sent more packets now */ @@ -836,13 +858,13 @@ bgp_session_XON(bgp_session session) confirm(sizeof(struct bgp_session_XON_args) == 0) ; - ++peering_engine_queue_stats.xon ; + ++routing_engine_queue_stats.xon ; - bgp_to_peering_engine(mqb) ; + bgp_to_routing_engine(mqb) ; } /*------------------------------------------------------------------------------ - * Peering Engine: process incoming XON message -- mqb action function. + * Routing Engine: process incoming XON message -- mqb action function. */ static void bgp_session_do_XON(mqueue_block mqb, mqb_flag_t flag) @@ -854,14 +876,14 @@ bgp_session_do_XON(mqueue_block mqb, mqb_flag_t flag) int xoff = (session->flow_control <= 0); session->flow_control = BGP_XON_REFRESH; if (xoff) - bgp_write (session->peer) ; + bgp_write (session->peer, NULL) ; } mqb_free(mqb) ; } /*============================================================================== - * Peering Engine: send set ttl message to BGP Engine + * Routing Engine: send set ttl message to BGP Engine * */ void @@ -918,7 +940,7 @@ bgp_session_do_set_ttl(mqueue_block mqb, mqb_flag_t flag) * pointer is NULL -- this is largely paranoia, but it would be a grave * mistake for the listening socket(s) to find a session which is not active ! * - * NB: accessing Peering Engine "private" variable -- no lock required. + * NB: accessing Routing Engine "private" variable -- no lock required. * * accessing index_entry when not active -- no lock required. */ @@ -943,13 +965,13 @@ bgp_session_is_active(bgp_session session) } ; /*------------------------------------------------------------------------------ - * Peering Engine: if session is limping we defer re-enabling the session + * Routing Engine: if session is limping we defer re-enabling the session * until it is disabled. * * returns 1 if limping and defer * returns 0 if not limping * - * NB: accessing Peering Engine "private" variable -- no lock required. + * NB: accessing Routing Engine "private" variable -- no lock required. */ static int bgp_session_defer_if_limping(bgp_session session) diff --git a/bgpd/bgp_session.h b/bgpd/bgp_session.h index 5af81688..5b144db1 100644 --- a/bgpd/bgp_session.h +++ b/bgpd/bgp_session.h @@ -59,7 +59,7 @@ * For simplicity, the BGP Engine may lock the session associated with the * connection it is dealing with. * - * Parts of the session structure are private to the Peering Engine, and + * Parts of the session structure are private to the Routing Engine, and * do not require the mutex for access. * * NB: the connections associated with a BGP session are private to the BGP @@ -99,34 +99,31 @@ struct bgp_session /* While sIdle and sStopped: * - * the session belongs to the Peering Engine. + * the session belongs to the Routing Engine. * * The BGP Engine will not touch a session in these states and the - * Peering Engine may do what it likes with it. + * Routing Engine may do what it likes with it. * * While sEnabled, sEstablished and sStopping: * * the session belongs to the BGP Engine. * - * A (very) few items in the session may be accessed by the Peering Engine, + * A (very) few items in the session may be accessed by the Routing Engine, * as noted below. (Subject to the mutex.) * - * Only the Peering Engine creates and destroys sessions. The BGP Engine + * Only the Routing Engine creates and destroys sessions. The BGP Engine * assumes that a session will not be destroyed while it is sEnabled, * sEstablished or sStopping. * - * These are private to the Peering Engine. + * These are private to the Routing Engine. */ bgp_session_state_t state ; int defer_enable ; /* set when waiting for stop */ - /* Flow control. Incremented when an update packet is sent - * from peering to BGP engine. Decremented when packet processed - * by BGP engine. On transition to 0 BGP engine should send an XON. - */ - int flow_control; + int flow_control ; /* limits number of updates sent + by the Routing Engine */ - /* These are private to the Peering Engine, and are set each time a session + /* These are private to the Routing Engine, and are set each time a session * event message is received from the BGP Engine. */ bgp_session_event_t event ; /* last event */ @@ -208,11 +205,11 @@ struct bgp_session * the session, and sets the stopped flag. * * The active flag is set when one or more connections are activated, and - * cleared when either the BGP Engine stops the session or the Peering + * cleared when either the BGP Engine stops the session or the Routing * Engine disables it. When not "active" all messages other than disable * and enable are ignored. This deals with the hiatus that exists between * the BGP Engine signalling that it has stopped (because of some exception) - * and the Peering Engine acknowledging that (by disabling the session). + * and the Routing Engine acknowledging that (by disabling the session). */ bgp_connection connections[bgp_connection_count] ; @@ -282,8 +279,8 @@ struct bgp_session_XON_args /* to Routeing Engine */ /* no further arguments */ } ; MQB_ARGS_SIZE_OK(bgp_session_XON_args) ; -enum { BGP_XON_REFRESH = 12, - BGP_XON_KICK = 4, +enum { BGP_XON_REFRESH = 40, + BGP_XON_KICK = 20, } ; struct bgp_session_ttl_args /* to bgp Engine */ @@ -330,7 +327,7 @@ bgp_session_event(bgp_session session, bgp_session_event_t event, int stopped) ; extern void -bgp_session_update_send(bgp_session session, struct stream* upd) ; +bgp_session_update_send(bgp_session session, struct stream_fifo* fifo) ; extern void bgp_session_route_refresh_send(bgp_session session, bgp_route_refresh rr) ; @@ -346,10 +343,9 @@ extern void bgp_session_route_refresh_recv(bgp_session session, bgp_route_refresh rr); extern int -bgp_session_is_XOFF(bgp_peer peer); - -extern int bgp_session_is_XON(bgp_peer peer); +extern int +bgp_session_dec_flow_count(bgp_peer peer) ; extern void bgp_session_set_ttl(bgp_session session, int ttl); diff --git a/bgpd/bgpd.c b/bgpd/bgpd.c index 154cb28d..238bd01c 100644 --- a/bgpd/bgpd.c +++ b/bgpd/bgpd.c @@ -4712,7 +4712,6 @@ bgp_terminate (int terminating, int retain_mode) for (ALL_LIST_ELEMENTS (bm->bgp, mnode, mnnode, bgp)) for (ALL_LIST_ELEMENTS (bgp->peer, node, nnode, peer)) { -fprintf(stderr, ">>> %s:", peer->host) ; if (retain_mode) bgp_peer_disable(peer, NULL); else if (terminating) @@ -4725,7 +4724,6 @@ fprintf(stderr, ">>> %s:", peer->host) ; else bgp_notify_send(peer, BGP_NOTIFY_CEASE, BGP_NOTIFY_CEASE_ADMIN_RESET); -fprintf(stderr, "<<<\n") ; } if (!retain_mode) diff --git a/bgpd/bgpd.cx b/bgpd/bgpd.cx new file mode 100644 index 00000000..955b344b --- /dev/null +++ b/bgpd/bgpd.cx @@ -0,0 +1,4777 @@ +/* BGP-4, BGP-4+ daemon program + Copyright (C) 1996, 97, 98, 99, 2000 Kunihiro Ishiguro + +This file is part of GNU Zebra. + +GNU Zebra is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 2, or (at your option) any +later version. + +GNU Zebra is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GNU Zebra; see the file COPYING. If not, write to the Free +Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA +02111-1307, USA. */ + +#include <zebra.h> + +#include "prefix.h" +#include "thread.h" +#include "buffer.h" +#include "stream.h" +#include "command.h" +#include "sockunion.h" +#include "network.h" +#include "memory.h" +#include "filter.h" +#include "routemap.h" +#include "str.h" +#include "log.h" +#include "plist.h" +#include "linklist.h" +#include "workqueue.h" + +#include "bgpd/bgpd.h" +#include "bgpd/bgp.h" +#include "bgpd/bgp_peer.h" + +#include "bgpd/bgp_table.h" +#include "bgpd/bgp_aspath.h" +#include "bgpd/bgp_route.h" +#include "bgpd/bgp_dump.h" +#include "bgpd/bgp_debug.h" +#include "bgpd/bgp_community.h" +#include "bgpd/bgp_attr.h" +#include "bgpd/bgp_regex.h" +#include "bgpd/bgp_clist.h" +#include "bgpd/bgp_fsm.h" +#include "bgpd/bgp_packet.h" +#include "bgpd/bgp_zebra.h" +#include "bgpd/bgp_open.h" +#include "bgpd/bgp_filter.h" +#include "bgpd/bgp_nexthop.h" +#include "bgpd/bgp_damp.h" +#include "bgpd/bgp_mplsvpn.h" +#include "bgpd/bgp_advertise.h" +#include "bgpd/bgp_network.h" +#include "bgpd/bgp_vty.h" +#ifdef HAVE_SNMP +#include "bgpd/bgp_snmp.h" +#endif /* HAVE_SNMP */ + +/* BGP process wide configuration. */ +static struct bgp_master bgp_master; + +extern struct in_addr router_id_zebra; + +/* BGP process wide configuration pointer to export. */ +struct bgp_master *bm; + +/* BGP process wide nexus. */ +qpn_nexus cli_nexus = NULL; +qpn_nexus bgp_nexus = NULL; +qpn_nexus routing_nexus = NULL; + +/* BGP community-list. */ +struct community_list_handler *bgp_clist; + +/* true while program terminating */ +static int program_terminating = 0; + +/* BGP global flag manipulation. */ +int +bgp_option_set (int flag) +{ + switch (flag) + { + case BGP_OPT_NO_FIB: + case BGP_OPT_MULTIPLE_INSTANCE: + case BGP_OPT_CONFIG_CISCO: + SET_FLAG (bm->options, flag); + break; + default: + return BGP_ERR_INVALID_FLAG; + } + return 0; +} + +int +bgp_option_unset (int flag) +{ + switch (flag) + { + case BGP_OPT_MULTIPLE_INSTANCE: + if (listcount (bm->bgp) > 1) + return BGP_ERR_MULTIPLE_INSTANCE_USED; + /* Fall through. */ + case BGP_OPT_NO_FIB: + case BGP_OPT_CONFIG_CISCO: + UNSET_FLAG (bm->options, flag); + break; + default: + return BGP_ERR_INVALID_FLAG; + } + return 0; +} + +int +bgp_option_check (int flag) +{ + return CHECK_FLAG (bm->options, flag); +} + +/* BGP flag manipulation. */ +int +bgp_flag_set (struct bgp *bgp, int flag) +{ + SET_FLAG (bgp->flags, flag); + return 0; +} + +int +bgp_flag_unset (struct bgp *bgp, int flag) +{ + UNSET_FLAG (bgp->flags, flag); + return 0; +} + +int +bgp_flag_check (struct bgp *bgp, int flag) +{ + return CHECK_FLAG (bgp->flags, flag); +} + +/* Internal function to set BGP structure configureation flag. */ +static void +bgp_config_set (struct bgp *bgp, int config) +{ + SET_FLAG (bgp->config, config); +} + +static void +bgp_config_unset (struct bgp *bgp, int config) +{ + UNSET_FLAG (bgp->config, config); +} + +static int +bgp_config_check (struct bgp *bgp, int config) +{ + return CHECK_FLAG (bgp->config, config); +} + +/* Set BGP router identifier. */ +int +bgp_router_id_set (struct bgp *bgp, struct in_addr *id) +{ + struct peer *peer; + struct listnode *node, *nnode; + + if (bgp_config_check (bgp, BGP_CONFIG_ROUTER_ID) + && IPV4_ADDR_SAME (&bgp->router_id, id)) + return 0; + + IPV4_ADDR_COPY (&bgp->router_id, id); + bgp_config_set (bgp, BGP_CONFIG_ROUTER_ID); + + /* Set all peer's local identifier with this value. */ + for (ALL_LIST_ELEMENTS (bgp->peer, node, nnode, peer)) + { + IPV4_ADDR_COPY (&peer->local_id, id); + + if (peer->state == bgp_peer_sEstablished) + { + peer->last_reset = PEER_DOWN_RID_CHANGE; + bgp_notify_send (peer, BGP_NOTIFY_CEASE, + BGP_NOTIFY_CEASE_CONFIG_CHANGE); + } + } + return 0; +} + +/* BGP's cluster-id control. */ +int +bgp_cluster_id_set (struct bgp *bgp, struct in_addr *cluster_id) +{ + struct peer *peer; + struct listnode *node, *nnode; + + if (bgp_config_check (bgp, BGP_CONFIG_CLUSTER_ID) + && IPV4_ADDR_SAME (&bgp->cluster_id, cluster_id)) + return 0; + + IPV4_ADDR_COPY (&bgp->cluster_id, cluster_id); + bgp_config_set (bgp, BGP_CONFIG_CLUSTER_ID); + + /* Clear all IBGP peer. */ + for (ALL_LIST_ELEMENTS (bgp->peer, node, nnode, peer)) + { + if (peer_sort (peer) != BGP_PEER_IBGP) + continue; + + if (peer->state == bgp_peer_sEstablished) + { + peer->last_reset = PEER_DOWN_CLID_CHANGE; + bgp_notify_send (peer, BGP_NOTIFY_CEASE, + BGP_NOTIFY_CEASE_CONFIG_CHANGE); + } + } + return 0; +} + +int +bgp_cluster_id_unset (struct bgp *bgp) +{ + struct peer *peer; + struct listnode *node, *nnode; + + if (! bgp_config_check (bgp, BGP_CONFIG_CLUSTER_ID)) + return 0; + + bgp->cluster_id.s_addr = 0; + bgp_config_unset (bgp, BGP_CONFIG_CLUSTER_ID); + + /* Clear all IBGP peer. */ + for (ALL_LIST_ELEMENTS (bgp->peer, node, nnode, peer)) + { + if (peer_sort (peer) != BGP_PEER_IBGP) + continue; + + if (peer->state == bgp_peer_sEstablished) + { + peer->last_reset = PEER_DOWN_CLID_CHANGE; + bgp_notify_send (peer, BGP_NOTIFY_CEASE, + BGP_NOTIFY_CEASE_CONFIG_CHANGE); + } + } + return 0; +} + +/* BGP timer configuration. */ +int +bgp_timers_set (struct bgp *bgp, u_int32_t keepalive, u_int32_t holdtime) +{ + bgp->default_keepalive = (keepalive < holdtime / 3 + ? keepalive : holdtime / 3); + bgp->default_holdtime = holdtime; + + return 0; +} + +int +bgp_timers_unset (struct bgp *bgp) +{ + bgp->default_keepalive = BGP_DEFAULT_KEEPALIVE; + bgp->default_holdtime = BGP_DEFAULT_HOLDTIME; + + return 0; +} + +/* BGP confederation configuration. */ +int +bgp_confederation_id_set (struct bgp *bgp, as_t as) +{ + struct peer *peer; + struct listnode *node, *nnode; + int already_confed; + + if (as == 0) + return BGP_ERR_INVALID_AS; + + /* Remember - were we doing confederation before? */ + already_confed = bgp_config_check (bgp, BGP_CONFIG_CONFEDERATION); + bgp->confed_id = as; + bgp_config_set (bgp, BGP_CONFIG_CONFEDERATION); + + /* If we were doing confederation already, this is just an external + AS change. Just Reset EBGP sessions, not CONFED sessions. If we + were not doing confederation before, reset all EBGP sessions. */ + for (ALL_LIST_ELEMENTS (bgp->peer, node, nnode, peer)) + { + /* We're looking for peers who's AS is not local or part of our + confederation. */ + if (already_confed) + { + if (peer_sort (peer) == BGP_PEER_EBGP) + { + peer->local_as = as; + peer->last_reset = PEER_DOWN_CONFED_ID_CHANGE; + bgp_notify_send(peer, BGP_NOTIFY_CEASE, + BGP_NOTIFY_CEASE_CONFIG_CHANGE); + } + } + else + { + /* Not doign confederation before, so reset every non-local + session */ + if (peer_sort (peer) != BGP_PEER_IBGP) + { + /* Reset the local_as to be our EBGP one */ + if (peer_sort (peer) == BGP_PEER_EBGP) + peer->local_as = as; + peer->last_reset = PEER_DOWN_CONFED_ID_CHANGE; + bgp_notify_send(peer, BGP_NOTIFY_CEASE, + BGP_NOTIFY_CEASE_CONFIG_CHANGE); + } + } + } + return 0; +} + +int +bgp_confederation_id_unset (struct bgp *bgp) +{ + struct peer *peer; + struct listnode *node, *nnode; + + bgp->confed_id = 0; + bgp_config_unset (bgp, BGP_CONFIG_CONFEDERATION); + + for (ALL_LIST_ELEMENTS (bgp->peer, node, nnode, peer)) + { + /* We're looking for peers who's AS is not local */ + if (peer_sort (peer) != BGP_PEER_IBGP) + { + peer->local_as = bgp->as; + peer->last_reset = PEER_DOWN_CONFED_ID_CHANGE; + bgp_notify_send(peer, BGP_NOTIFY_CEASE, + BGP_NOTIFY_CEASE_CONFIG_CHANGE); + } + } + return 0; +} + +/* Is an AS part of the confed or not? */ +int +bgp_confederation_peers_check (struct bgp *bgp, as_t as) +{ + int i; + + if (! bgp) + return 0; + + for (i = 0; i < bgp->confed_peers_cnt; i++) + if (bgp->confed_peers[i] == as) + return 1; + + return 0; +} + +/* Add an AS to the confederation set. */ +int +bgp_confederation_peers_add (struct bgp *bgp, as_t as) +{ + struct peer *peer; + struct listnode *node, *nnode; + + if (! bgp) + return BGP_ERR_INVALID_BGP; + + if (bgp->as == as) + return BGP_ERR_INVALID_AS; + + if (bgp_confederation_peers_check (bgp, as)) + return -1; + + if (bgp->confed_peers) + bgp->confed_peers = XREALLOC (MTYPE_BGP_CONFED_LIST, + bgp->confed_peers, + (bgp->confed_peers_cnt + 1) * sizeof (as_t)); + else + bgp->confed_peers = XMALLOC (MTYPE_BGP_CONFED_LIST, + (bgp->confed_peers_cnt + 1) * sizeof (as_t)); + + bgp->confed_peers[bgp->confed_peers_cnt] = as; + bgp->confed_peers_cnt++; + + if (bgp_config_check (bgp, BGP_CONFIG_CONFEDERATION)) + { + for (ALL_LIST_ELEMENTS (bgp->peer, node, nnode, peer)) + { + if (peer->as == as) + { + peer->local_as = bgp->as; + peer->last_reset = PEER_DOWN_CONFED_PEER_CHANGE; + bgp_notify_send(peer, BGP_NOTIFY_CEASE, + BGP_NOTIFY_CEASE_CONFIG_CHANGE); + } + } + } + return 0; +} + +/* Delete an AS from the confederation set. */ +int +bgp_confederation_peers_remove (struct bgp *bgp, as_t as) +{ + int i; + int j; + struct peer *peer; + struct listnode *node, *nnode; + + if (! bgp) + return -1; + + if (! bgp_confederation_peers_check (bgp, as)) + return -1; + + for (i = 0; i < bgp->confed_peers_cnt; i++) + if (bgp->confed_peers[i] == as) + for(j = i + 1; j < bgp->confed_peers_cnt; j++) + bgp->confed_peers[j - 1] = bgp->confed_peers[j]; + + bgp->confed_peers_cnt--; + + if (bgp->confed_peers_cnt == 0) + { + if (bgp->confed_peers) + XFREE (MTYPE_BGP_CONFED_LIST, bgp->confed_peers); + bgp->confed_peers = NULL; + } + else + bgp->confed_peers = XREALLOC (MTYPE_BGP_CONFED_LIST, + bgp->confed_peers, + bgp->confed_peers_cnt * sizeof (as_t)); + + /* Now reset any peer who's remote AS has just been removed from the + CONFED */ + if (bgp_config_check (bgp, BGP_CONFIG_CONFEDERATION)) + { + for (ALL_LIST_ELEMENTS (bgp->peer, node, nnode, peer)) + { + if (peer->as == as) + { + peer->local_as = bgp->confed_id; + peer->last_reset = PEER_DOWN_CONFED_PEER_CHANGE; + bgp_notify_send(peer, BGP_NOTIFY_CEASE, + BGP_NOTIFY_CEASE_CONFIG_CHANGE); + } + } + } + + return 0; +} + +/* Local preference configuration. */ +int +bgp_default_local_preference_set (struct bgp *bgp, u_int32_t local_pref) +{ + if (! bgp) + return -1; + + bgp->default_local_pref = local_pref; + + return 0; +} + +int +bgp_default_local_preference_unset (struct bgp *bgp) +{ + if (! bgp) + return -1; + + bgp->default_local_pref = BGP_DEFAULT_LOCAL_PREF; + + return 0; +} + +/* If peer is RSERVER_CLIENT in at least one address family and is not member + of a peer_group for that family, return 1. + Used to check wether the peer is included in list bgp->rsclient. */ +int +peer_rsclient_active (struct peer *peer) +{ + int i; + int j; + + for (i=AFI_IP; i < AFI_MAX; i++) + for (j=SAFI_UNICAST; j < SAFI_MAX; j++) + if (CHECK_FLAG(peer->af_flags[i][j], PEER_FLAG_RSERVER_CLIENT) + && ! peer->af_group[i][j]) + return 1; + return 0; +} + +/* Peer comparison function for sorting. */ +static int +peer_cmp (struct peer *p1, struct peer *p2) +{ + return sockunion_cmp (&p1->su, &p2->su); +} + +int +peer_af_flag_check (struct peer *peer, afi_t afi, safi_t safi, u_int32_t flag) +{ + return CHECK_FLAG (peer->af_flags[afi][safi], flag); +} + +/* Reset all address family specific configuration. */ +static void +peer_af_flag_reset (struct peer *peer, afi_t afi, safi_t safi) +{ + int i; + struct bgp_filter *filter; + char orf_name[BUFSIZ]; + + filter = &peer->filter[afi][safi]; + + /* Clear neighbor filter and route-map */ + for (i = FILTER_IN; i < FILTER_MAX; i++) + { + if (filter->dlist[i].name) + { + free (filter->dlist[i].name); + filter->dlist[i].name = NULL; + } + prefix_list_unset_ref(&filter->plist[i].ref) ; + if (filter->aslist[i].name) + { + free (filter->aslist[i].name); + filter->aslist[i].name = NULL; + } + } + for (i = RMAP_IN; i < RMAP_MAX; i++) + { + if (filter->map[i].name) + { + free (filter->map[i].name); + filter->map[i].name = NULL; + } + } + + /* Clear unsuppress map. */ + if (filter->usmap.name) + free (filter->usmap.name); + filter->usmap.name = NULL; + filter->usmap.map = NULL; + + /* Clear neighbor's all address family flags. */ + peer->af_flags[afi][safi] = 0; + + /* Clear neighbor's all address family sflags. */ + peer->af_sflags[afi][safi] = 0; + + /* Clear neighbor's all address family capabilities. */ + peer->af_cap[afi][safi] = 0; + + /* Clear ORF info */ + peer->orf_plist[afi][safi] = NULL; + sprintf (orf_name, "%s.%d.%d", peer->host, afi, safi); + prefix_bgp_orf_remove_all (orf_name); + + /* Set default neighbor send-community. */ + if (! bgp_option_check (BGP_OPT_CONFIG_CISCO)) + { + SET_FLAG (peer->af_flags[afi][safi], PEER_FLAG_SEND_COMMUNITY); + SET_FLAG (peer->af_flags[afi][safi], PEER_FLAG_SEND_EXT_COMMUNITY); + } + + /* Clear neighbor default_originate_rmap */ + if (peer->default_rmap[afi][safi].name) + free (peer->default_rmap[afi][safi].name); + peer->default_rmap[afi][safi].name = NULL; + peer->default_rmap[afi][safi].map = NULL; + + /* Clear neighbor maximum-prefix */ + peer->pmax[afi][safi] = 0; + peer->pmax_threshold[afi][safi] = MAXIMUM_PREFIX_THRESHOLD_DEFAULT; +} + +/* peer global config reset */ +static void +peer_global_config_reset (struct peer *peer) +{ + peer->weight = 0; + peer->change_local_as = 0; + peer->ttl = (peer_sort (peer) == BGP_PEER_IBGP ? 255 : 1); + if (peer->update_source) + { + sockunion_free (peer->update_source); + peer->update_source = NULL; + } + if (peer->update_if) + { + XFREE (MTYPE_PEER_UPDATE_SOURCE, peer->update_if); + peer->update_if = NULL; + } + + if (peer_sort (peer) == BGP_PEER_IBGP) + peer->v_routeadv = BGP_DEFAULT_IBGP_ROUTEADV; + else + peer->v_routeadv = BGP_DEFAULT_EBGP_ROUTEADV; + + peer->flags = 0; + peer->config = 0; + peer->holdtime = 0; + peer->keepalive = 0; + peer->connect = 0; + peer->v_connect = BGP_DEFAULT_CONNECT_RETRY; +} + +/* Check peer's AS number and determin is this peer IBGP or EBGP */ +int +peer_sort (struct peer *peer) +{ + struct bgp *bgp; + + bgp = peer->bgp; + + /* Peer-group */ + if (CHECK_FLAG (peer->sflags, PEER_STATUS_GROUP)) + { + if (peer->as) + return (bgp->as == peer->as ? BGP_PEER_IBGP : BGP_PEER_EBGP); + else + { + struct peer *peer1; + peer1 = listnode_head (peer->group->peer); + if (peer1) + return (peer1->local_as == peer1->as + ? BGP_PEER_IBGP : BGP_PEER_EBGP); + } + return BGP_PEER_INTERNAL; + } + + /* Normal peer */ + if (bgp && CHECK_FLAG (bgp->config, BGP_CONFIG_CONFEDERATION)) + { + if (peer->local_as == 0) + return BGP_PEER_INTERNAL; + + if (peer->local_as == peer->as) + { + if (peer->local_as == bgp->confed_id) + return BGP_PEER_EBGP; + else + return BGP_PEER_IBGP; + } + + if (bgp_confederation_peers_check (bgp, peer->as)) + return BGP_PEER_CONFED; + + return BGP_PEER_EBGP; + } + else + { + return (peer->local_as == 0 + ? BGP_PEER_INTERNAL : peer->local_as == peer->as + ? BGP_PEER_IBGP : BGP_PEER_EBGP); + } +} + + +/* increase reference count on a struct peer */ +struct peer * +peer_lock (struct peer *peer) +{ + assert (peer && (peer->lock >= 0)); + + peer->lock++; + + return peer; +} + +/* decrease reference count on a struct peer + * struct peer is freed and NULL returned if last reference + */ +struct peer * +peer_unlock (struct peer *peer) +{ + assert (peer && (peer->lock > 0)); + + peer->lock--; + + if (peer->lock == 0) + { +#if 0 + zlog_debug ("unlocked and freeing"); + zlog_backtrace (LOG_DEBUG); +#endif + peer_free (peer); + return NULL; + } + +#if 0 + if (peer->lock == 1) + { + zlog_debug ("unlocked to 1"); + zlog_backtrace (LOG_DEBUG); + } +#endif + + return peer; +} + + +/* Make accept BGP peer. Called from bgp_accept (). */ +struct peer * +peer_create_accept (struct bgp *bgp) +{ + struct peer *peer; + + peer = peer_new (bgp); + + peer = peer_lock (peer); /* bgp peer list reference */ + listnode_add_sort (bgp->peer, peer); + + return peer; +} + +/* Change peer's AS number. */ +static void +peer_as_change (struct peer *peer, as_t as) +{ + int type; + + /* Stop peer. */ + if (! CHECK_FLAG (peer->sflags, PEER_STATUS_GROUP)) + { + peer->last_reset = PEER_DOWN_REMOTE_AS_CHANGE; + bgp_notify_send(peer, BGP_NOTIFY_CEASE, + BGP_NOTIFY_CEASE_CONFIG_CHANGE); + } + type = peer_sort (peer); + peer->as = as; + + if (bgp_config_check (peer->bgp, BGP_CONFIG_CONFEDERATION) + && ! bgp_confederation_peers_check (peer->bgp, as) + && peer->bgp->as != as) + peer->local_as = peer->bgp->confed_id; + else + peer->local_as = peer->bgp->as; + + /* Advertisement-interval reset */ + if (peer_sort (peer) == BGP_PEER_IBGP) + peer->v_routeadv = BGP_DEFAULT_IBGP_ROUTEADV; + else + peer->v_routeadv = BGP_DEFAULT_EBGP_ROUTEADV; + + /* TTL reset */ + if (peer_sort (peer) == BGP_PEER_IBGP) + peer->ttl = 255; + else if (type == BGP_PEER_IBGP) + peer->ttl = 1; + + /* reflector-client reset */ + if (peer_sort (peer) != BGP_PEER_IBGP) + { + UNSET_FLAG (peer->af_flags[AFI_IP][SAFI_UNICAST], + PEER_FLAG_REFLECTOR_CLIENT); + UNSET_FLAG (peer->af_flags[AFI_IP][SAFI_MULTICAST], + PEER_FLAG_REFLECTOR_CLIENT); + UNSET_FLAG (peer->af_flags[AFI_IP][SAFI_MPLS_VPN], + PEER_FLAG_REFLECTOR_CLIENT); + UNSET_FLAG (peer->af_flags[AFI_IP6][SAFI_UNICAST], + PEER_FLAG_REFLECTOR_CLIENT); + UNSET_FLAG (peer->af_flags[AFI_IP6][SAFI_MULTICAST], + PEER_FLAG_REFLECTOR_CLIENT); + } + + /* local-as reset */ + if (peer_sort (peer) != BGP_PEER_EBGP) + { + peer->change_local_as = 0; + UNSET_FLAG (peer->flags, PEER_FLAG_LOCAL_AS_NO_PREPEND); + } +} + +/* If peer does not exist, create new one. If peer already exists, + set AS number to the peer. */ +int +peer_remote_as (struct bgp *bgp, union sockunion *su, as_t *as, + afi_t afi, safi_t safi) +{ + struct peer *peer; + as_t local_as; + + peer = peer_lookup (bgp, su); + + if (peer) + { + /* When this peer is a member of peer-group. */ + if (peer->group) + { + if (peer->group->conf->as) + { + /* Return peer group's AS number. */ + *as = peer->group->conf->as; + return BGP_ERR_PEER_GROUP_MEMBER; + } + if (peer_sort (peer->group->conf) == BGP_PEER_IBGP) + { + if (bgp->as != *as) + { + *as = peer->as; + return BGP_ERR_PEER_GROUP_PEER_TYPE_DIFFERENT; + } + } + else + { + if (bgp->as == *as) + { + *as = peer->as; + return BGP_ERR_PEER_GROUP_PEER_TYPE_DIFFERENT; + } + } + } + + /* Existing peer's AS number change. */ + if (peer->as != *as) + peer_as_change (peer, *as); + } + else + { + + /* If the peer is not part of our confederation, and its not an + iBGP peer then spoof the source AS */ + if (bgp_config_check (bgp, BGP_CONFIG_CONFEDERATION) + && ! bgp_confederation_peers_check (bgp, *as) + && bgp->as != *as) + local_as = bgp->confed_id; + else + local_as = bgp->as; + + /* If this is IPv4 unicast configuration and "no bgp default + ipv4-unicast" is specified. */ + + if (bgp_flag_check (bgp, BGP_FLAG_NO_DEFAULT_IPV4) + && afi == AFI_IP && safi == SAFI_UNICAST) + peer = peer_create (su, bgp, local_as, *as, 0, 0); + else + peer = peer_create (su, bgp, local_as, *as, afi, safi); + } + + return 0; +} + +/* Activate the peer or peer group for specified AFI and SAFI. */ +int +peer_activate (struct peer *peer, afi_t afi, safi_t safi) +{ + int active; + + if (peer->afc[afi][safi]) + return 0; + + /* Activate the address family configuration. */ + if (CHECK_FLAG (peer->sflags, PEER_STATUS_GROUP)) + peer->afc[afi][safi] = 1; + else + { + active = peer_active (peer); + + peer->afc[afi][safi] = 1; + + if (! active && peer_active (peer)) + bgp_peer_enable (peer); + else +#if 0 + /* TODO: Dynamic capability */ + { + if (peer->status == Established) + { + if (CHECK_FLAG (peer->cap, PEER_CAP_DYNAMIC_RCV)) + { + peer->afc_adv[afi][safi] = 1; + bgp_capability_send (peer, afi, safi, + CAPABILITY_CODE_MP, + CAPABILITY_ACTION_SET); + if (peer->afc_recv[afi][safi]) + { + peer->afc_nego[afi][safi] = 1; + bgp_announce_route (peer, afi, safi); + } + } + else +#endif + { + peer->last_reset = PEER_DOWN_AF_ACTIVATE; + bgp_notify_send(peer, BGP_NOTIFY_CEASE, + BGP_NOTIFY_CEASE_CONFIG_CHANGE); + } +#if 0 + } + } +#endif + } + return 0; +} + +int +peer_deactivate (struct peer *peer, afi_t afi, safi_t safi) +{ + struct peer_group *group; + struct peer *peer1; + struct listnode *node, *nnode; + + if (CHECK_FLAG (peer->sflags, PEER_STATUS_GROUP)) + { + group = peer->group; + + for (ALL_LIST_ELEMENTS (group->peer, node, nnode, peer1)) + { + if (peer1->af_group[afi][safi]) + return BGP_ERR_PEER_GROUP_MEMBER_EXISTS; + } + } + else + { + if (peer->af_group[afi][safi]) + return BGP_ERR_PEER_BELONGS_TO_GROUP; + } + + if (! peer->afc[afi][safi]) + return 0; + + /* De-activate the address family configuration. */ + peer->afc[afi][safi] = 0; + peer_af_flag_reset (peer, afi, safi); + + if (! CHECK_FLAG (peer->sflags, PEER_STATUS_GROUP)) + { + if (peer->state == bgp_peer_sEstablished) + { + if (CHECK_FLAG (peer->cap, PEER_CAP_DYNAMIC_RCV)) + { + peer->afc_adv[afi][safi] = 0; + peer->afc_nego[afi][safi] = 0; + + if (peer_active_nego (peer)) + { + bgp_capability_send (peer, afi, safi, + CAPABILITY_CODE_MP, + CAPABILITY_ACTION_UNSET); + bgp_clear_route (peer, afi, safi, BGP_CLEAR_ROUTE_NORMAL); + peer->pcount[afi][safi] = 0; + } + else + { + peer->last_reset = PEER_DOWN_NEIGHBOR_DELETE; + bgp_notify_send (peer, BGP_NOTIFY_CEASE, + BGP_NOTIFY_CEASE_CONFIG_CHANGE); + } + } + else + { + peer->last_reset = PEER_DOWN_NEIGHBOR_DELETE; + bgp_notify_send (peer, BGP_NOTIFY_CEASE, + BGP_NOTIFY_CEASE_CONFIG_CHANGE); + } + } + } + return 0; +} + +static int +peer_group_cmp (struct peer_group *g1, struct peer_group *g2) +{ + return strcmp (g1->name, g2->name); +} + +/* If peer is configured at least one address family return 1. */ +static int +peer_group_active (struct peer *peer) +{ + if (peer->af_group[AFI_IP][SAFI_UNICAST] + || peer->af_group[AFI_IP][SAFI_MULTICAST] + || peer->af_group[AFI_IP][SAFI_MPLS_VPN] + || peer->af_group[AFI_IP6][SAFI_UNICAST] + || peer->af_group[AFI_IP6][SAFI_MULTICAST]) + return 1; + return 0; +} + +/* Peer group cofiguration. */ +static struct peer_group * +peer_group_new (void) +{ + return (struct peer_group *) XCALLOC (MTYPE_PEER_GROUP, + sizeof (struct peer_group)); +} + +static void +peer_group_free (struct peer_group *group) +{ + XFREE (MTYPE_PEER_GROUP, group); +} + +struct peer_group * +peer_group_lookup (struct bgp *bgp, const char *name) +{ + struct peer_group *group; + struct listnode *node, *nnode; + + for (ALL_LIST_ELEMENTS (bgp->group, node, nnode, group)) + { + if (strcmp (group->name, name) == 0) + return group; + } + return NULL; +} + +struct peer_group * +peer_group_get (struct bgp *bgp, const char *name) +{ + struct peer_group *group; + + group = peer_group_lookup (bgp, name); + if (group) + return group; + + group = peer_group_new (); + group->bgp = bgp; + group->name = strdup (name); + group->peer = list_new (); + group->conf = peer_new (bgp); + if (! bgp_flag_check (bgp, BGP_FLAG_NO_DEFAULT_IPV4)) + group->conf->afc[AFI_IP][SAFI_UNICAST] = 1; + group->conf->host = XSTRDUP (MTYPE_BGP_PEER_HOST, name); + group->conf->group = group; + group->conf->as = 0; + group->conf->ttl = 1; + group->conf->v_routeadv = BGP_DEFAULT_EBGP_ROUTEADV; + UNSET_FLAG (group->conf->config, PEER_CONFIG_TIMER); + UNSET_FLAG (group->conf->config, PEER_CONFIG_CONNECT); + group->conf->keepalive = 0; + group->conf->holdtime = 0; + group->conf->connect = 0; + SET_FLAG (group->conf->sflags, PEER_STATUS_GROUP); + listnode_add_sort (bgp->group, group); + + return 0; +} + +static void +peer_group2peer_config_copy (struct peer_group *group, struct peer *peer, + afi_t afi, safi_t safi) +{ + int in = FILTER_IN; + int out = FILTER_OUT; + struct peer *conf; + struct bgp_filter *pfilter; + struct bgp_filter *gfilter; + + conf = group->conf; + pfilter = &peer->filter[afi][safi]; + gfilter = &conf->filter[afi][safi]; + + /* remote-as */ + if (conf->as) + peer->as = conf->as; + + /* remote-as */ + if (conf->change_local_as) + peer->change_local_as = conf->change_local_as; + + /* TTL */ + peer->ttl = conf->ttl; + + /* Weight */ + peer->weight = conf->weight; + + /* peer flags apply */ + peer->flags = conf->flags; + /* peer af_flags apply */ + peer->af_flags[afi][safi] = conf->af_flags[afi][safi]; + /* peer config apply */ + peer->config = conf->config; + + /* peer timers apply */ + peer->holdtime = conf->holdtime; + peer->keepalive = conf->keepalive; + peer->connect = conf->connect; + if (CHECK_FLAG (conf->config, PEER_CONFIG_CONNECT)) + peer->v_connect = conf->connect; + else + peer->v_connect = BGP_DEFAULT_CONNECT_RETRY; + + /* advertisement-interval reset */ + if (peer_sort (peer) == BGP_PEER_IBGP) + peer->v_routeadv = BGP_DEFAULT_IBGP_ROUTEADV; + else + peer->v_routeadv = BGP_DEFAULT_EBGP_ROUTEADV; + + /* password apply */ + if (peer->password) + XFREE (MTYPE_PEER_PASSWORD, peer->password); + + if (conf->password) + peer->password = XSTRDUP (MTYPE_PEER_PASSWORD, conf->password); + else + peer->password = NULL; + + /* maximum-prefix */ + peer->pmax[afi][safi] = conf->pmax[afi][safi]; + peer->pmax_threshold[afi][safi] = conf->pmax_threshold[afi][safi]; + peer->pmax_restart[afi][safi] = conf->pmax_restart[afi][safi]; + + /* allowas-in */ + peer->allowas_in[afi][safi] = conf->allowas_in[afi][safi]; + + /* route-server-client */ + if (CHECK_FLAG(conf->af_flags[afi][safi], PEER_FLAG_RSERVER_CLIENT)) + { + /* Make peer's RIB point to group's RIB. */ + peer->rib[afi][safi] = group->conf->rib[afi][safi]; + + /* Import policy. */ + if (pfilter->map[RMAP_IMPORT].name) + free (pfilter->map[RMAP_IMPORT].name); + if (gfilter->map[RMAP_IMPORT].name) + { + pfilter->map[RMAP_IMPORT].name = strdup (gfilter->map[RMAP_IMPORT].name); + pfilter->map[RMAP_IMPORT].map = gfilter->map[RMAP_IMPORT].map; + } + else + { + pfilter->map[RMAP_IMPORT].name = NULL; + pfilter->map[RMAP_IMPORT].map = NULL; + } + + /* Export policy. */ + if (gfilter->map[RMAP_EXPORT].name && ! pfilter->map[RMAP_EXPORT].name) + { + pfilter->map[RMAP_EXPORT].name = strdup (gfilter->map[RMAP_EXPORT].name); + pfilter->map[RMAP_EXPORT].map = gfilter->map[RMAP_EXPORT].map; + } + } + + /* default-originate route-map */ + if (conf->default_rmap[afi][safi].name) + { + if (peer->default_rmap[afi][safi].name) + free (peer->default_rmap[afi][safi].name); + peer->default_rmap[afi][safi].name = strdup (conf->default_rmap[afi][safi].name); + peer->default_rmap[afi][safi].map = conf->default_rmap[afi][safi].map; + } + + /* update-source apply */ + if (conf->update_source) + { + if (peer->update_source) + sockunion_free (peer->update_source); + if (peer->update_if) + { + XFREE (MTYPE_PEER_UPDATE_SOURCE, peer->update_if); + peer->update_if = NULL; + } + peer->update_source = sockunion_dup (conf->update_source); + } + else if (conf->update_if) + { + if (peer->update_if) + XFREE (MTYPE_PEER_UPDATE_SOURCE, peer->update_if); + if (peer->update_source) + { + sockunion_free (peer->update_source); + peer->update_source = NULL; + } + peer->update_if = XSTRDUP (MTYPE_PEER_UPDATE_SOURCE, conf->update_if); + } + + /* inbound filter apply */ + if (gfilter->dlist[in].name && ! pfilter->dlist[in].name) + { + if (pfilter->dlist[in].name) + free (pfilter->dlist[in].name); + pfilter->dlist[in].name = strdup (gfilter->dlist[in].name); + pfilter->dlist[in].alist = gfilter->dlist[in].alist; + } + if (! pfilter->plist[in].ref) + prefix_list_copy_ref(&pfilter->plist[in].ref, gfilter->plist[in].ref) ; + if (gfilter->aslist[in].name && ! pfilter->aslist[in].name) + { + if (pfilter->aslist[in].name) + free (pfilter->aslist[in].name); + pfilter->aslist[in].name = strdup (gfilter->aslist[in].name); + pfilter->aslist[in].aslist = gfilter->aslist[in].aslist; + } + if (gfilter->map[RMAP_IN].name && ! pfilter->map[RMAP_IN].name) + { + if (pfilter->map[RMAP_IN].name) + free (pfilter->map[RMAP_IN].name); + pfilter->map[RMAP_IN].name = strdup (gfilter->map[RMAP_IN].name); + pfilter->map[RMAP_IN].map = gfilter->map[RMAP_IN].map; + } + + /* outbound filter apply */ + if (gfilter->dlist[out].name) + { + if (pfilter->dlist[out].name) + free (pfilter->dlist[out].name); + pfilter->dlist[out].name = strdup (gfilter->dlist[out].name); + pfilter->dlist[out].alist = gfilter->dlist[out].alist; + } + else + { + if (pfilter->dlist[out].name) + free (pfilter->dlist[out].name); + pfilter->dlist[out].name = NULL; + pfilter->dlist[out].alist = NULL; + } + + prefix_list_copy_ref(&pfilter->plist[out].ref, gfilter->plist[out].ref) ; + + if (gfilter->aslist[out].name) + { + if (pfilter->aslist[out].name) + free (pfilter->aslist[out].name); + pfilter->aslist[out].name = strdup (gfilter->aslist[out].name); + pfilter->aslist[out].aslist = gfilter->aslist[out].aslist; + } + else + { + if (pfilter->aslist[out].name) + free (pfilter->aslist[out].name); + pfilter->aslist[out].name = NULL; + pfilter->aslist[out].aslist = NULL; + } + if (gfilter->map[RMAP_OUT].name) + { + if (pfilter->map[RMAP_OUT].name) + free (pfilter->map[RMAP_OUT].name); + pfilter->map[RMAP_OUT].name = strdup (gfilter->map[RMAP_OUT].name); + pfilter->map[RMAP_OUT].map = gfilter->map[RMAP_OUT].map; + } + else + { + if (pfilter->map[RMAP_OUT].name) + free (pfilter->map[RMAP_OUT].name); + pfilter->map[RMAP_OUT].name = NULL; + pfilter->map[RMAP_OUT].map = NULL; + } + + /* RS-client's import/export route-maps. */ + if (gfilter->map[RMAP_IMPORT].name) + { + if (pfilter->map[RMAP_IMPORT].name) + free (pfilter->map[RMAP_IMPORT].name); + pfilter->map[RMAP_IMPORT].name = strdup (gfilter->map[RMAP_IMPORT].name); + pfilter->map[RMAP_IMPORT].map = gfilter->map[RMAP_IMPORT].map; + } + else + { + if (pfilter->map[RMAP_IMPORT].name) + free (pfilter->map[RMAP_IMPORT].name); + pfilter->map[RMAP_IMPORT].name = NULL; + pfilter->map[RMAP_IMPORT].map = NULL; + } + if (gfilter->map[RMAP_EXPORT].name && ! pfilter->map[RMAP_EXPORT].name) + { + if (pfilter->map[RMAP_EXPORT].name) + free (pfilter->map[RMAP_EXPORT].name); + pfilter->map[RMAP_EXPORT].name = strdup (gfilter->map[RMAP_EXPORT].name); + pfilter->map[RMAP_EXPORT].map = gfilter->map[RMAP_EXPORT].map; + } + + if (gfilter->usmap.name) + { + if (pfilter->usmap.name) + free (pfilter->usmap.name); + pfilter->usmap.name = strdup (gfilter->usmap.name); + pfilter->usmap.map = gfilter->usmap.map; + } + else + { + if (pfilter->usmap.name) + free (pfilter->usmap.name); + pfilter->usmap.name = NULL; + pfilter->usmap.map = NULL; + } +} + +/* Peer group's remote AS configuration. */ +int +peer_group_remote_as (struct bgp *bgp, const char *group_name, as_t *as) +{ + struct peer_group *group; + struct peer *peer; + struct listnode *node, *nnode; + + group = peer_group_lookup (bgp, group_name); + if (! group) + return -1; + + if (group->conf->as == *as) + return 0; + + /* When we setup peer-group AS number all peer group member's AS + number must be updated to same number. */ + peer_as_change (group->conf, *as); + + for (ALL_LIST_ELEMENTS (group->peer, node, nnode, peer)) + { + if (peer->as != *as) + peer_as_change (peer, *as); + } + + return 0; +} + +int +peer_group_delete (struct peer_group *group) +{ + struct bgp *bgp; + struct peer *peer; + struct listnode *node, *nnode; + + bgp = group->bgp; + + for (ALL_LIST_ELEMENTS (group->peer, node, nnode, peer)) + { + peer->group = NULL; + peer_delete (peer); + } + list_delete (group->peer); + + free (group->name); + group->name = NULL; + + group->conf->group = NULL; + peer_delete (group->conf); + + /* Delete from all peer_group list. */ + listnode_delete (bgp->group, group); + + peer_group_free (group); + + return 0; +} + +int +peer_group_remote_as_delete (struct peer_group *group) +{ + struct peer *peer; + struct listnode *node, *nnode; + + if (! group->conf->as) + return 0; + + for (ALL_LIST_ELEMENTS (group->peer, node, nnode, peer)) + { + peer->group = NULL; + peer_delete (peer); + } + list_delete_all_node (group->peer); + + group->conf->as = 0; + + return 0; +} + +/* Bind specified peer to peer group. */ +int +peer_group_bind (struct bgp *bgp, union sockunion *su, + struct peer_group *group, afi_t afi, safi_t safi, as_t *as) +{ + struct peer *peer; + int first_member = 0; + + /* Check peer group's address family. */ + if (! group->conf->afc[afi][safi]) + return BGP_ERR_PEER_GROUP_AF_UNCONFIGURED; + + /* Lookup the peer. */ + peer = peer_lookup (bgp, su); + + /* Create a new peer. */ + if (! peer) + { + if (! group->conf->as) + return BGP_ERR_PEER_GROUP_NO_REMOTE_AS; + + peer = peer_create (su, bgp, bgp->as, group->conf->as, afi, safi); + peer->group = group; + peer->af_group[afi][safi] = 1; + + peer = peer_lock (peer); /* group->peer list reference */ + listnode_add (group->peer, peer); + peer_group2peer_config_copy (group, peer, afi, safi); + + return 0; + } + + /* When the peer already belongs to peer group, check the consistency. */ + if (peer->af_group[afi][safi]) + { + if (strcmp (peer->group->name, group->name) != 0) + return BGP_ERR_PEER_GROUP_CANT_CHANGE; + + return 0; + } + + /* Check current peer group configuration. */ + if (peer_group_active (peer) + && strcmp (peer->group->name, group->name) != 0) + return BGP_ERR_PEER_GROUP_MISMATCH; + + if (! group->conf->as) + { + if (peer_sort (group->conf) != BGP_PEER_INTERNAL + && peer_sort (group->conf) != peer_sort (peer)) + { + if (as) + *as = peer->as; + return BGP_ERR_PEER_GROUP_PEER_TYPE_DIFFERENT; + } + + if (peer_sort (group->conf) == BGP_PEER_INTERNAL) + first_member = 1; + } + + peer->af_group[afi][safi] = 1; + peer->afc[afi][safi] = 1; + if (! peer->group) + { + peer->group = group; + + peer = peer_lock (peer); /* group->peer list reference */ + listnode_add (group->peer, peer); + } + else + assert (group && peer->group == group); + + if (first_member) + { + /* Advertisement-interval reset */ + if (peer_sort (group->conf) == BGP_PEER_IBGP) + group->conf->v_routeadv = BGP_DEFAULT_IBGP_ROUTEADV; + else + group->conf->v_routeadv = BGP_DEFAULT_EBGP_ROUTEADV; + + /* ebgp-multihop reset */ + if (peer_sort (group->conf) == BGP_PEER_IBGP) + group->conf->ttl = 255; + + /* local-as reset */ + if (peer_sort (group->conf) != BGP_PEER_EBGP) + { + group->conf->change_local_as = 0; + UNSET_FLAG (peer->flags, PEER_FLAG_LOCAL_AS_NO_PREPEND); + } + } + + if (CHECK_FLAG(peer->af_flags[afi][safi], PEER_FLAG_RSERVER_CLIENT)) + { + struct listnode *pn; + + /* If it's not configured as RSERVER_CLIENT in any other address + family, without being member of a peer_group, remove it from + list bgp->rsclient.*/ + if (! peer_rsclient_active (peer) + && (pn = listnode_lookup (bgp->rsclient, peer))) + { + peer_unlock (peer); /* peer rsclient reference */ + list_delete_node (bgp->rsclient, pn); + + /* Clear our own rsclient rib for this afi/safi. */ + bgp_clear_route (peer, afi, safi, BGP_CLEAR_ROUTE_MY_RSCLIENT); + } + + bgp_table_finish (&peer->rib[afi][safi]); + + /* Import policy. */ + if (peer->filter[afi][safi].map[RMAP_IMPORT].name) + { + free (peer->filter[afi][safi].map[RMAP_IMPORT].name); + peer->filter[afi][safi].map[RMAP_IMPORT].name = NULL; + peer->filter[afi][safi].map[RMAP_IMPORT].map = NULL; + } + + /* Export policy. */ + if (! CHECK_FLAG(group->conf->af_flags[afi][safi], PEER_FLAG_RSERVER_CLIENT) + && peer->filter[afi][safi].map[RMAP_EXPORT].name) + { + free (peer->filter[afi][safi].map[RMAP_EXPORT].name); + peer->filter[afi][safi].map[RMAP_EXPORT].name = NULL; + peer->filter[afi][safi].map[RMAP_EXPORT].map = NULL; + } + } + + peer_group2peer_config_copy (group, peer, afi, safi); + + peer->last_reset = PEER_DOWN_RMAP_BIND; + bgp_notify_send(peer, BGP_NOTIFY_CEASE, + BGP_NOTIFY_CEASE_CONFIG_CHANGE); + + return 0; +} + +int +peer_group_unbind (struct bgp *bgp, struct peer *peer, + struct peer_group *group, afi_t afi, safi_t safi) +{ + if (! peer->af_group[afi][safi]) + return 0; + + if (group != peer->group) + return BGP_ERR_PEER_GROUP_MISMATCH; + + peer->af_group[afi][safi] = 0; + peer->afc[afi][safi] = 0; + peer_af_flag_reset (peer, afi, safi); + + if (peer->rib[afi][safi]) + peer->rib[afi][safi] = NULL; + + if (! peer_group_active (peer)) + { + assert (listnode_lookup (group->peer, peer)); + peer_unlock (peer); /* peer group list reference */ + listnode_delete (group->peer, peer); + peer->group = NULL; + if (group->conf->as) + { + peer_delete (peer); + return 0; + } + peer_global_config_reset (peer); + } + + peer->last_reset = PEER_DOWN_RMAP_UNBIND; + bgp_notify_send(peer, BGP_NOTIFY_CEASE, + BGP_NOTIFY_CEASE_CONFIG_CHANGE); + return 0; +} + +/* BGP instance creation by `router bgp' commands. */ +static struct bgp * +bgp_create (as_t *as, const char *name) +{ + struct bgp *bgp; + afi_t afi; + safi_t safi; + + if ( (bgp = XCALLOC (MTYPE_BGP, sizeof (struct bgp))) == NULL) + return NULL; + + bgp_lock (bgp); + bgp->peer_self = peer_new (bgp); + bgp->peer_self->host = XSTRDUP (MTYPE_BGP_PEER_HOST, "Static announcement"); + + bgp->peer = list_new (); + bgp->peer->cmp = (int (*)(void *, void *)) peer_cmp; + + bgp->group = list_new (); + bgp->group->cmp = (int (*)(void *, void *)) peer_group_cmp; + + bgp->rsclient = list_new (); + bgp->rsclient->cmp = (int (*)(void*, void*)) peer_cmp; + + for (afi = AFI_IP; afi < AFI_MAX; afi++) + for (safi = SAFI_UNICAST; safi < SAFI_MAX; safi++) + { + bgp->route[afi][safi] = bgp_table_init (afi, safi); + bgp->aggregate[afi][safi] = bgp_table_init (afi, safi); + bgp->rib[afi][safi] = bgp_table_init (afi, safi); + } + + bgp->default_local_pref = BGP_DEFAULT_LOCAL_PREF; + bgp->default_holdtime = BGP_DEFAULT_HOLDTIME; + bgp->default_keepalive = BGP_DEFAULT_KEEPALIVE; + bgp->restart_time = BGP_DEFAULT_RESTART_TIME; + bgp->stalepath_time = BGP_DEFAULT_STALEPATH_TIME; + + bgp->as = *as; + + if (name) + bgp->name = strdup (name); + + return bgp; +} + +/* Return first entry of BGP. */ +struct bgp * +bgp_get_default (void) +{ + if (bm->bgp->head) + return (listgetdata (listhead (bm->bgp))); + return NULL; +} + +/* Lookup BGP entry. */ +struct bgp * +bgp_lookup (as_t as, const char *name) +{ + struct bgp *bgp; + struct listnode *node, *nnode; + + for (ALL_LIST_ELEMENTS (bm->bgp, node, nnode, bgp)) + if (bgp->as == as + && ((bgp->name == NULL && name == NULL) + || (bgp->name && name && strcmp (bgp->name, name) == 0))) + return bgp; + return NULL; +} + +/* Lookup BGP structure by view name. */ +struct bgp * +bgp_lookup_by_name (const char *name) +{ + struct bgp *bgp; + struct listnode *node, *nnode; + + for (ALL_LIST_ELEMENTS (bm->bgp, node, nnode, bgp)) + if ((bgp->name == NULL && name == NULL) + || (bgp->name && name && strcmp (bgp->name, name) == 0)) + return bgp; + return NULL; +} + +/* Called from VTY commands. */ +int +bgp_get (struct bgp **bgp_val, as_t *as, const char *name) +{ + struct bgp *bgp; + + /* Multiple instance check. */ + if (bgp_option_check (BGP_OPT_MULTIPLE_INSTANCE)) + { + if (name) + bgp = bgp_lookup_by_name (name); + else + bgp = bgp_get_default (); + + /* Already exists. */ + if (bgp) + { + if (bgp->as != *as) + { + *as = bgp->as; + return BGP_ERR_INSTANCE_MISMATCH; + } + *bgp_val = bgp; + return 0; + } + } + else + { + /* BGP instance name can not be specified for single instance. */ + if (name) + return BGP_ERR_MULTIPLE_INSTANCE_NOT_SET; + + /* Get default BGP structure if exists. */ + bgp = bgp_get_default (); + + if (bgp) + { + if (bgp->as != *as) + { + *as = bgp->as; + return BGP_ERR_AS_MISMATCH; + } + *bgp_val = bgp; + return 0; + } + } + + bgp = bgp_create (as, name); + listnode_add (bm->bgp, bgp); + bgp_router_id_set(bgp, &router_id_zebra); + *bgp_val = bgp; + + return 0; +} + +/* Delete BGP instance. */ +int +bgp_delete (struct bgp *bgp) +{ + struct peer *peer; + struct peer_group *group; + struct listnode *node; + struct listnode *next; + afi_t afi; + int i; + + /* Delete static route. */ + bgp_static_delete (bgp); + + /* Unset redistribution. */ + for (afi = AFI_IP; afi < AFI_MAX; afi++) + for (i = 0; i < ZEBRA_ROUTE_MAX; i++) + if (i != ZEBRA_ROUTE_BGP) + bgp_redistribute_unset (bgp, afi, i); + + for (ALL_LIST_ELEMENTS (bgp->peer, node, next, peer)) + peer_delete (peer); + + for (ALL_LIST_ELEMENTS (bgp->group, node, next, group)) + peer_group_delete (group); + + assert (listcount (bgp->rsclient) == 0); + + if (bgp->peer_self) { + peer_delete(bgp->peer_self); + bgp->peer_self = NULL; + } + + /* Remove visibility via the master list - there may however still be + * routes to be processed still referencing the struct bgp. + */ + listnode_delete (bm->bgp, bgp); + + bgp_unlock(bgp); /* initial reference */ + + return 0; +} + +static void bgp_free (struct bgp *); + +void +bgp_lock (struct bgp *bgp) +{ + ++bgp->lock; +} + +void +bgp_unlock(struct bgp *bgp) +{ + assert(bgp->lock > 0); + if (--bgp->lock == 0) + bgp_free (bgp); +} + +static void +bgp_free (struct bgp *bgp) +{ + afi_t afi; + safi_t safi; + + list_delete (bgp->group); + list_delete (bgp->peer); + list_delete (bgp->rsclient); + + if (bgp->name) + free (bgp->name); + + for (afi = AFI_IP; afi < AFI_MAX; afi++) + for (safi = SAFI_UNICAST; safi < SAFI_MAX; safi++) + { + if (bgp->route[afi][safi]) + bgp_table_finish (&bgp->route[afi][safi]); + if (bgp->aggregate[afi][safi]) + bgp_table_finish (&bgp->aggregate[afi][safi]) ; + if (bgp->rib[afi][safi]) + bgp_table_finish (&bgp->rib[afi][safi]); + } + XFREE (MTYPE_BGP, bgp); +} + +struct peer * +peer_lookup (struct bgp *bgp, union sockunion *su) +{ + struct peer *peer; + struct listnode *node, *nnode; + + if (bgp != NULL) + { + for (ALL_LIST_ELEMENTS (bgp->peer, node, nnode, peer)) + if (sockunion_same (&peer->su, su) + && ! CHECK_FLAG (peer->sflags, PEER_STATUS_ACCEPT_PEER)) + return peer; + } + else if (bm->bgp != NULL) + { + struct listnode *bgpnode, *nbgpnode; + + for (ALL_LIST_ELEMENTS (bm->bgp, bgpnode, nbgpnode, bgp)) + for (ALL_LIST_ELEMENTS (bgp->peer, node, nnode, peer)) + if (sockunion_same (&peer->su, su) + && ! CHECK_FLAG (peer->sflags, PEER_STATUS_ACCEPT_PEER)) + return peer; + } + return NULL; +} + +struct peer * +peer_lookup_with_open (union sockunion *su, as_t remote_as, + struct in_addr *remote_id, int *as) +{ + struct peer *peer; + struct listnode *node; + struct listnode *bgpnode; + struct bgp *bgp; + + if (! bm->bgp) + return NULL; + + for (ALL_LIST_ELEMENTS_RO (bm->bgp, bgpnode, bgp)) + { + for (ALL_LIST_ELEMENTS_RO (bgp->peer, node, peer)) + { + if (sockunion_same (&peer->su, su) + && ! CHECK_FLAG (peer->sflags, PEER_STATUS_ACCEPT_PEER)) + { + if (peer->as == remote_as + && peer->remote_id.s_addr == remote_id->s_addr) + return peer; + if (peer->as == remote_as) + *as = 1; + } + } + + for (ALL_LIST_ELEMENTS_RO (bgp->peer, node, peer)) + { + if (sockunion_same (&peer->su, su) + && ! CHECK_FLAG (peer->sflags, PEER_STATUS_ACCEPT_PEER)) + { + if (peer->as == remote_as + && peer->remote_id.s_addr == 0) + return peer; + if (peer->as == remote_as) + *as = 1; + } + } + } + return NULL; +} + +/* If peer is configured at least one address family return 1. */ +int +peer_active (struct peer *peer) +{ + if (peer->afc[AFI_IP][SAFI_UNICAST] + || peer->afc[AFI_IP][SAFI_MULTICAST] + || peer->afc[AFI_IP][SAFI_MPLS_VPN] + || peer->afc[AFI_IP6][SAFI_UNICAST] + || peer->afc[AFI_IP6][SAFI_MULTICAST]) + return 1; + return 0; +} + +/* If peer is negotiated at least one address family return 1. */ +int +peer_active_nego (struct peer *peer) +{ + if (peer->afc_nego[AFI_IP][SAFI_UNICAST] + || peer->afc_nego[AFI_IP][SAFI_MULTICAST] + || peer->afc_nego[AFI_IP][SAFI_MPLS_VPN] + || peer->afc_nego[AFI_IP6][SAFI_UNICAST] + || peer->afc_nego[AFI_IP6][SAFI_MULTICAST]) + return 1; + return 0; +} + +/* peer_flag_change_type. */ +enum peer_change_type +{ + peer_change_none, + peer_change_reset, + peer_change_reset_in, + peer_change_reset_out, +}; + +static void +peer_change_action (struct peer *peer, afi_t afi, safi_t safi, + enum peer_change_type type) +{ + if (CHECK_FLAG (peer->sflags, PEER_STATUS_GROUP)) + return; + + if (type == peer_change_reset) + bgp_notify_send (peer, BGP_NOTIFY_CEASE, + BGP_NOTIFY_CEASE_CONFIG_CHANGE); + else if (type == peer_change_reset_in) + { + if (CHECK_FLAG (peer->cap, PEER_CAP_REFRESH_OLD_RCV) + || CHECK_FLAG (peer->cap, PEER_CAP_REFRESH_NEW_RCV)) + bgp_route_refresh_send (peer, afi, safi, 0, 0, 0); + else + bgp_notify_send (peer, BGP_NOTIFY_CEASE, + BGP_NOTIFY_CEASE_CONFIG_CHANGE); + } + else if (type == peer_change_reset_out) + bgp_announce_route (peer, afi, safi); +} + +struct peer_flag_action +{ + /* Peer's flag. */ + u_int32_t flag; + + /* This flag can be set for peer-group member. */ + u_char not_for_member; + + /* Action when the flag is changed. */ + enum peer_change_type type; + + /* Peer down cause */ + u_char peer_down; +}; + +static const struct peer_flag_action peer_flag_action_list[] = + { + { PEER_FLAG_PASSIVE, 0, peer_change_reset }, + { PEER_FLAG_SHUTDOWN, 0, peer_change_reset }, + { PEER_FLAG_DONT_CAPABILITY, 0, peer_change_none }, + { PEER_FLAG_OVERRIDE_CAPABILITY, 0, peer_change_none }, + { PEER_FLAG_STRICT_CAP_MATCH, 0, peer_change_none }, + { PEER_FLAG_DYNAMIC_CAPABILITY, 0, peer_change_reset }, + { PEER_FLAG_DISABLE_CONNECTED_CHECK, 0, peer_change_reset }, + { 0, 0, 0 } + }; + +static const struct peer_flag_action peer_af_flag_action_list[] = + { + { PEER_FLAG_NEXTHOP_SELF, 1, peer_change_reset_out }, + { PEER_FLAG_SEND_COMMUNITY, 1, peer_change_reset_out }, + { PEER_FLAG_SEND_EXT_COMMUNITY, 1, peer_change_reset_out }, + { PEER_FLAG_SOFT_RECONFIG, 0, peer_change_reset_in }, + { PEER_FLAG_REFLECTOR_CLIENT, 1, peer_change_reset }, + { PEER_FLAG_RSERVER_CLIENT, 1, peer_change_reset }, + { PEER_FLAG_AS_PATH_UNCHANGED, 1, peer_change_reset_out }, + { PEER_FLAG_NEXTHOP_UNCHANGED, 1, peer_change_reset_out }, + { PEER_FLAG_MED_UNCHANGED, 1, peer_change_reset_out }, + { PEER_FLAG_REMOVE_PRIVATE_AS, 1, peer_change_reset_out }, + { PEER_FLAG_ALLOWAS_IN, 0, peer_change_reset_in }, + { PEER_FLAG_ORF_PREFIX_SM, 1, peer_change_reset }, + { PEER_FLAG_ORF_PREFIX_RM, 1, peer_change_reset }, + { PEER_FLAG_NEXTHOP_LOCAL_UNCHANGED, 0, peer_change_reset_out }, + { 0, 0, 0 } + }; + +/* Proper action set. */ +static int +peer_flag_action_set (const struct peer_flag_action *action_list, int size, + struct peer_flag_action *action, u_int32_t flag) +{ + int i; + int found = 0; + int reset_in = 0; + int reset_out = 0; + const struct peer_flag_action *match = NULL; + + /* Check peer's frag action. */ + for (i = 0; i < size; i++) + { + match = &action_list[i]; + + if (match->flag == 0) + break; + + if (match->flag & flag) + { + found = 1; + + if (match->type == peer_change_reset_in) + reset_in = 1; + if (match->type == peer_change_reset_out) + reset_out = 1; + if (match->type == peer_change_reset) + { + reset_in = 1; + reset_out = 1; + } + if (match->not_for_member) + action->not_for_member = 1; + } + } + + /* Set peer clear type. */ + if (reset_in && reset_out) + action->type = peer_change_reset; + else if (reset_in) + action->type = peer_change_reset_in; + else if (reset_out) + action->type = peer_change_reset_out; + else + action->type = peer_change_none; + + return found; +} + +static void +peer_flag_modify_action (struct peer *peer, u_int32_t flag) +{ + if (flag == PEER_FLAG_SHUTDOWN) + { + if (CHECK_FLAG (peer->flags, flag)) + { + if (CHECK_FLAG (peer->sflags, PEER_STATUS_NSF_WAIT)) + peer_nsf_stop (peer); + + UNSET_FLAG (peer->sflags, PEER_STATUS_PREFIX_OVERFLOW); + if (peer->t_pmax_restart) + { + BGP_TIMER_OFF (peer->t_pmax_restart); + if (BGP_DEBUG (events, EVENTS)) + zlog_debug ("%s Maximum-prefix restart timer cancelled", + peer->host); + } + + if (CHECK_FLAG (peer->sflags, PEER_STATUS_NSF_WAIT)) + peer_nsf_stop (peer); + + bgp_notify_send_with_data(peer, BGP_NOTIFY_CEASE, + BGP_NOTIFY_CEASE_ADMIN_SHUTDOWN, NULL, 0); + } + else + { + peer->v_start = BGP_INIT_START_TIMER; + bgp_peer_disable(peer, NULL); + } + } + else + { + if (flag == PEER_FLAG_DYNAMIC_CAPABILITY) + peer->last_reset = PEER_DOWN_CAPABILITY_CHANGE; + else if (flag == PEER_FLAG_PASSIVE) + peer->last_reset = PEER_DOWN_PASSIVE_CHANGE; + else if (flag == PEER_FLAG_DISABLE_CONNECTED_CHECK) + peer->last_reset = PEER_DOWN_MULTIHOP_CHANGE; + + bgp_notify_send(peer, BGP_NOTIFY_CEASE, + BGP_NOTIFY_CEASE_CONFIG_CHANGE); + } +} + +/* Change specified peer flag. */ +static int +peer_flag_modify (struct peer *peer, u_int32_t flag, int set) +{ + int found; + int size; + struct peer_group *group; + struct listnode *node, *nnode; + struct peer_flag_action action; + + memset (&action, 0, sizeof (struct peer_flag_action)); + size = sizeof peer_flag_action_list / sizeof (struct peer_flag_action); + + found = peer_flag_action_set (peer_flag_action_list, size, &action, flag); + + /* No flag action is found. */ + if (! found) + return BGP_ERR_INVALID_FLAG; + + /* Not for peer-group member. */ + if (action.not_for_member && peer_group_active (peer)) + return BGP_ERR_INVALID_FOR_PEER_GROUP_MEMBER; + + /* When unset the peer-group member's flag we have to check + peer-group configuration. */ + if (! set && peer_group_active (peer)) + if (CHECK_FLAG (peer->group->conf->flags, flag)) + { + if (flag == PEER_FLAG_SHUTDOWN) + return BGP_ERR_PEER_GROUP_SHUTDOWN; + else + return BGP_ERR_PEER_GROUP_HAS_THE_FLAG; + } + + /* Flag conflict check. */ + if (set + && CHECK_FLAG (peer->flags | flag, PEER_FLAG_STRICT_CAP_MATCH) + && CHECK_FLAG (peer->flags | flag, PEER_FLAG_OVERRIDE_CAPABILITY)) + return BGP_ERR_PEER_FLAG_CONFLICT; + + if (! CHECK_FLAG (peer->sflags, PEER_STATUS_GROUP)) + { + if (set && CHECK_FLAG (peer->flags, flag) == flag) + return 0; + if (! set && ! CHECK_FLAG (peer->flags, flag)) + return 0; + } + + if (set) + SET_FLAG (peer->flags, flag); + else + UNSET_FLAG (peer->flags, flag); + + if (! CHECK_FLAG (peer->sflags, PEER_STATUS_GROUP)) + { + if (action.type == peer_change_reset) + peer_flag_modify_action (peer, flag); + + return 0; + } + + /* peer-group member updates. */ + group = peer->group; + + for (ALL_LIST_ELEMENTS (group->peer, node, nnode, peer)) + { + if (set && CHECK_FLAG (peer->flags, flag) == flag) + continue; + + if (! set && ! CHECK_FLAG (peer->flags, flag)) + continue; + + if (set) + SET_FLAG (peer->flags, flag); + else + UNSET_FLAG (peer->flags, flag); + + if (action.type == peer_change_reset) + peer_flag_modify_action (peer, flag); + } + return 0; +} + +int +peer_flag_set (struct peer *peer, u_int32_t flag) +{ + return peer_flag_modify (peer, flag, 1); +} + +int +peer_flag_unset (struct peer *peer, u_int32_t flag) +{ + return peer_flag_modify (peer, flag, 0); +} + +static int +peer_is_group_member (struct peer *peer, afi_t afi, safi_t safi) +{ + if (peer->af_group[afi][safi]) + return 1; + return 0; +} + +static int +peer_af_flag_modify (struct peer *peer, afi_t afi, safi_t safi, u_int32_t flag, + int set) +{ + int found; + int size; + struct listnode *node, *nnode; + struct peer_group *group; + struct peer_flag_action action; + + memset (&action, 0, sizeof (struct peer_flag_action)); + size = sizeof peer_af_flag_action_list / sizeof (struct peer_flag_action); + + found = peer_flag_action_set (peer_af_flag_action_list, size, &action, flag); + + /* No flag action is found. */ + if (! found) + return BGP_ERR_INVALID_FLAG; + + /* Adress family must be activated. */ + if (! peer->afc[afi][safi]) + return BGP_ERR_PEER_INACTIVE; + + /* Not for peer-group member. */ + if (action.not_for_member && peer_is_group_member (peer, afi, safi)) + return BGP_ERR_INVALID_FOR_PEER_GROUP_MEMBER; + + /* Spcecial check for reflector client. */ + if (flag & PEER_FLAG_REFLECTOR_CLIENT + && peer_sort (peer) != BGP_PEER_IBGP) + return BGP_ERR_NOT_INTERNAL_PEER; + + /* Spcecial check for remove-private-AS. */ + if (flag & PEER_FLAG_REMOVE_PRIVATE_AS + && peer_sort (peer) == BGP_PEER_IBGP) + return BGP_ERR_REMOVE_PRIVATE_AS; + + /* When unset the peer-group member's flag we have to check + peer-group configuration. */ + if (! set && peer->af_group[afi][safi]) + if (CHECK_FLAG (peer->group->conf->af_flags[afi][safi], flag)) + return BGP_ERR_PEER_GROUP_HAS_THE_FLAG; + + /* When current flag configuration is same as requested one. */ + if (! CHECK_FLAG (peer->sflags, PEER_STATUS_GROUP)) + { + if (set && CHECK_FLAG (peer->af_flags[afi][safi], flag) == flag) + return 0; + if (! set && ! CHECK_FLAG (peer->af_flags[afi][safi], flag)) + return 0; + } + + if (set) + SET_FLAG (peer->af_flags[afi][safi], flag); + else + UNSET_FLAG (peer->af_flags[afi][safi], flag); + + /* Execute action when peer is established. */ + if (! CHECK_FLAG (peer->sflags, PEER_STATUS_GROUP) + && peer->state == bgp_peer_sEstablished) + { + if (! set && flag == PEER_FLAG_SOFT_RECONFIG) + bgp_clear_adj_in (peer, afi, safi); + else + { + if (flag == PEER_FLAG_REFLECTOR_CLIENT) + peer->last_reset = PEER_DOWN_RR_CLIENT_CHANGE; + else if (flag == PEER_FLAG_RSERVER_CLIENT) + peer->last_reset = PEER_DOWN_RS_CLIENT_CHANGE; + else if (flag == PEER_FLAG_ORF_PREFIX_SM) + peer->last_reset = PEER_DOWN_CAPABILITY_CHANGE; + else if (flag == PEER_FLAG_ORF_PREFIX_RM) + peer->last_reset = PEER_DOWN_CAPABILITY_CHANGE; + + peer_change_action (peer, afi, safi, action.type); + } + + } + + /* Peer group member updates. */ + if (CHECK_FLAG (peer->sflags, PEER_STATUS_GROUP)) + { + group = peer->group; + + for (ALL_LIST_ELEMENTS (group->peer, node, nnode, peer)) + { + if (! peer->af_group[afi][safi]) + continue; + + if (set && CHECK_FLAG (peer->af_flags[afi][safi], flag) == flag) + continue; + + if (! set && ! CHECK_FLAG (peer->af_flags[afi][safi], flag)) + continue; + + if (set) + SET_FLAG (peer->af_flags[afi][safi], flag); + else + UNSET_FLAG (peer->af_flags[afi][safi], flag); + + if (peer->state == bgp_peer_sEstablished) + { + if (! set && flag == PEER_FLAG_SOFT_RECONFIG) + bgp_clear_adj_in (peer, afi, safi); + else + { + if (flag == PEER_FLAG_REFLECTOR_CLIENT) + peer->last_reset = PEER_DOWN_RR_CLIENT_CHANGE; + else if (flag == PEER_FLAG_RSERVER_CLIENT) + peer->last_reset = PEER_DOWN_RS_CLIENT_CHANGE; + else if (flag == PEER_FLAG_ORF_PREFIX_SM) + peer->last_reset = PEER_DOWN_CAPABILITY_CHANGE; + else if (flag == PEER_FLAG_ORF_PREFIX_RM) + peer->last_reset = PEER_DOWN_CAPABILITY_CHANGE; + + peer_change_action (peer, afi, safi, action.type); + } + } + } + } + return 0; +} + +int +peer_af_flag_set (struct peer *peer, afi_t afi, safi_t safi, u_int32_t flag) +{ + return peer_af_flag_modify (peer, afi, safi, flag, 1); +} + +int +peer_af_flag_unset (struct peer *peer, afi_t afi, safi_t safi, u_int32_t flag) +{ + return peer_af_flag_modify (peer, afi, safi, flag, 0); +} + +/* EBGP multihop configuration. */ +int +peer_ebgp_multihop_set (struct peer *peer, int ttl) +{ + struct peer_group *group; + struct listnode *node, *nnode; + bgp_session session = peer->session; + + if (peer_sort (peer) == BGP_PEER_IBGP) + return 0; + + peer->ttl = ttl; + + if (! CHECK_FLAG (peer->sflags, PEER_STATUS_GROUP)) + { + if (bgp_session_is_active(session) && peer_sort (peer) != BGP_PEER_IBGP) + bgp_session_set_ttl (session, peer->ttl); + } + else + { + group = peer->group; + for (ALL_LIST_ELEMENTS (group->peer, node, nnode, peer)) + { + if (peer_sort (peer) == BGP_PEER_IBGP) + continue; + + peer->ttl = group->conf->ttl; + session = peer->session; + + if (bgp_session_is_active(session)) + bgp_session_set_ttl (session, peer->ttl); + } + } + return 0; +} + +int +peer_ebgp_multihop_unset (struct peer *peer) +{ + struct peer_group *group; + struct listnode *node, *nnode; + bgp_session session = peer->session; + + if (peer_sort (peer) == BGP_PEER_IBGP) + return 0; + + if (peer_group_active (peer)) + peer->ttl = peer->group->conf->ttl; + else + peer->ttl = 1; + + if (! CHECK_FLAG (peer->sflags, PEER_STATUS_GROUP)) + { + if (bgp_session_is_active(session) && peer_sort (peer) != BGP_PEER_IBGP) + bgp_session_set_ttl (session, peer->ttl); + } + else + { + group = peer->group; + for (ALL_LIST_ELEMENTS (group->peer, node, nnode, peer)) + { + if (peer_sort (peer) == BGP_PEER_IBGP) + continue; + + peer->ttl = 1; + session = peer->session; + + if (bgp_session_is_active(session)) + bgp_session_set_ttl (session, peer->ttl); + } + } + + return 0; +} + +/* Neighbor description. */ +int +peer_description_set (struct peer *peer, char *desc) +{ + if (peer->desc) + XFREE (MTYPE_PEER_DESC, peer->desc); + + peer->desc = XSTRDUP (MTYPE_PEER_DESC, desc); + + return 0; +} + +int +peer_description_unset (struct peer *peer) +{ + if (peer->desc) + XFREE (MTYPE_PEER_DESC, peer->desc); + + peer->desc = NULL; + + return 0; +} + +/* Neighbor update-source. */ +int +peer_update_source_if_set (struct peer *peer, const char *ifname) +{ + struct peer_group *group; + struct listnode *node, *nnode; + + if (peer->update_if) + { + if (! CHECK_FLAG (peer->sflags, PEER_STATUS_GROUP) + && strcmp (peer->update_if, ifname) == 0) + return 0; + + XFREE (MTYPE_PEER_UPDATE_SOURCE, peer->update_if); + peer->update_if = NULL; + } + + if (peer->update_source) + { + sockunion_free (peer->update_source); + peer->update_source = NULL; + } + + peer->update_if = XSTRDUP (MTYPE_PEER_UPDATE_SOURCE, ifname); + + if (! CHECK_FLAG (peer->sflags, PEER_STATUS_GROUP)) + { + peer->last_reset = PEER_DOWN_UPDATE_SOURCE_CHANGE; + bgp_notify_send(peer, BGP_NOTIFY_CEASE, + BGP_NOTIFY_CEASE_CONFIG_CHANGE); + return 0; + } + + /* peer-group member updates. */ + group = peer->group; + for (ALL_LIST_ELEMENTS (group->peer, node, nnode, peer)) + { + if (peer->update_if) + { + if (strcmp (peer->update_if, ifname) == 0) + continue; + + XFREE (MTYPE_PEER_UPDATE_SOURCE, peer->update_if); + peer->update_if = NULL; + } + + if (peer->update_source) + { + sockunion_free (peer->update_source); + peer->update_source = NULL; + } + + peer->update_if = XSTRDUP (MTYPE_PEER_UPDATE_SOURCE, ifname); + + peer->last_reset = PEER_DOWN_UPDATE_SOURCE_CHANGE; + bgp_notify_send(peer, BGP_NOTIFY_CEASE, + BGP_NOTIFY_CEASE_CONFIG_CHANGE); + } + return 0; +} + +int +peer_update_source_addr_set (struct peer *peer, union sockunion *su) +{ + struct peer_group *group; + struct listnode *node, *nnode; + + if (peer->update_source) + { + if (! CHECK_FLAG (peer->sflags, PEER_STATUS_GROUP) + && sockunion_cmp (peer->update_source, su) == 0) + return 0; + sockunion_free (peer->update_source); + peer->update_source = NULL; + } + + if (peer->update_if) + { + XFREE (MTYPE_PEER_UPDATE_SOURCE, peer->update_if); + peer->update_if = NULL; + } + + peer->update_source = sockunion_dup (su); + + if (! CHECK_FLAG (peer->sflags, PEER_STATUS_GROUP)) + { + peer->last_reset = PEER_DOWN_UPDATE_SOURCE_CHANGE; + bgp_notify_send(peer, BGP_NOTIFY_CEASE, + BGP_NOTIFY_CEASE_CONFIG_CHANGE); + return 0; + } + + /* peer-group member updates. */ + group = peer->group; + for (ALL_LIST_ELEMENTS (group->peer, node, nnode, peer)) + { + if (peer->update_source) + { + if (sockunion_cmp (peer->update_source, su) == 0) + continue; + sockunion_free (peer->update_source); + peer->update_source = NULL; + } + + if (peer->update_if) + { + XFREE (MTYPE_PEER_UPDATE_SOURCE, peer->update_if); + peer->update_if = NULL; + } + + peer->update_source = sockunion_dup (su); + + peer->last_reset = PEER_DOWN_UPDATE_SOURCE_CHANGE; + bgp_notify_send(peer, BGP_NOTIFY_CEASE, + BGP_NOTIFY_CEASE_CONFIG_CHANGE); + } + return 0; +} + +int +peer_update_source_unset (struct peer *peer) +{ + union sockunion *su; + struct peer_group *group; + struct listnode *node, *nnode; + + if (! CHECK_FLAG (peer->sflags, PEER_STATUS_GROUP) + && ! peer->update_source + && ! peer->update_if) + return 0; + + if (peer->update_source) + { + sockunion_free (peer->update_source); + peer->update_source = NULL; + } + if (peer->update_if) + { + XFREE (MTYPE_PEER_UPDATE_SOURCE, peer->update_if); + peer->update_if = NULL; + } + + if (peer_group_active (peer)) + { + group = peer->group; + + if (group->conf->update_source) + { + su = sockunion_dup (group->conf->update_source); + peer->update_source = su; + } + else if (group->conf->update_if) + peer->update_if = + XSTRDUP (MTYPE_PEER_UPDATE_SOURCE, group->conf->update_if); + } + + if (! CHECK_FLAG (peer->sflags, PEER_STATUS_GROUP)) + { + peer->last_reset = PEER_DOWN_UPDATE_SOURCE_CHANGE; + bgp_notify_send(peer, BGP_NOTIFY_CEASE, + BGP_NOTIFY_CEASE_CONFIG_CHANGE); + return 0; + } + + /* peer-group member updates. */ + group = peer->group; + for (ALL_LIST_ELEMENTS (group->peer, node, nnode, peer)) + { + if (! peer->update_source && ! peer->update_if) + continue; + + if (peer->update_source) + { + sockunion_free (peer->update_source); + peer->update_source = NULL; + } + + if (peer->update_if) + { + XFREE (MTYPE_PEER_UPDATE_SOURCE, peer->update_if); + peer->update_if = NULL; + } + + peer->last_reset = PEER_DOWN_UPDATE_SOURCE_CHANGE; + bgp_notify_send(peer, BGP_NOTIFY_CEASE, + BGP_NOTIFY_CEASE_CONFIG_CHANGE); + } + return 0; +} + +int +peer_default_originate_set (struct peer *peer, afi_t afi, safi_t safi, + const char *rmap) +{ + struct peer_group *group; + struct listnode *node, *nnode; + + /* Adress family must be activated. */ + if (! peer->afc[afi][safi]) + return BGP_ERR_PEER_INACTIVE; + + /* Default originate can't be used for peer group memeber. */ + if (peer_is_group_member (peer, afi, safi)) + return BGP_ERR_INVALID_FOR_PEER_GROUP_MEMBER; + + if (! CHECK_FLAG (peer->af_flags[afi][safi], PEER_FLAG_DEFAULT_ORIGINATE) + || (rmap && ! peer->default_rmap[afi][safi].name) + || (rmap && strcmp (rmap, peer->default_rmap[afi][safi].name) != 0)) + { + SET_FLAG (peer->af_flags[afi][safi], PEER_FLAG_DEFAULT_ORIGINATE); + + if (rmap) + { + if (peer->default_rmap[afi][safi].name) + free (peer->default_rmap[afi][safi].name); + peer->default_rmap[afi][safi].name = strdup (rmap); + peer->default_rmap[afi][safi].map = route_map_lookup_by_name (rmap); + } + } + + if (! CHECK_FLAG (peer->sflags, PEER_STATUS_GROUP)) + { + if (peer->state == bgp_peer_sEstablished && peer->afc_nego[afi][safi]) + bgp_default_originate (peer, afi, safi, 0); + return 0; + } + + /* peer-group member updates. */ + group = peer->group; + for (ALL_LIST_ELEMENTS (group->peer, node, nnode, peer)) + { + SET_FLAG (peer->af_flags[afi][safi], PEER_FLAG_DEFAULT_ORIGINATE); + + if (rmap) + { + if (peer->default_rmap[afi][safi].name) + free (peer->default_rmap[afi][safi].name); + peer->default_rmap[afi][safi].name = strdup (rmap); + peer->default_rmap[afi][safi].map = route_map_lookup_by_name (rmap); + } + + if (peer->state == bgp_peer_sEstablished && peer->afc_nego[afi][safi]) + bgp_default_originate (peer, afi, safi, 0); + } + return 0; +} + +int +peer_default_originate_unset (struct peer *peer, afi_t afi, safi_t safi) +{ + struct peer_group *group; + struct listnode *node, *nnode; + + /* Adress family must be activated. */ + if (! peer->afc[afi][safi]) + return BGP_ERR_PEER_INACTIVE; + + /* Default originate can't be used for peer group memeber. */ + if (peer_is_group_member (peer, afi, safi)) + return BGP_ERR_INVALID_FOR_PEER_GROUP_MEMBER; + + if (CHECK_FLAG (peer->af_flags[afi][safi], PEER_FLAG_DEFAULT_ORIGINATE)) + { + UNSET_FLAG (peer->af_flags[afi][safi], PEER_FLAG_DEFAULT_ORIGINATE); + + if (peer->default_rmap[afi][safi].name) + free (peer->default_rmap[afi][safi].name); + peer->default_rmap[afi][safi].name = NULL; + peer->default_rmap[afi][safi].map = NULL; + } + + if (! CHECK_FLAG (peer->sflags, PEER_STATUS_GROUP)) + { + if (peer->state == bgp_peer_sEstablished && peer->afc_nego[afi][safi]) + bgp_default_originate (peer, afi, safi, 1); + return 0; + } + + /* peer-group member updates. */ + group = peer->group; + for (ALL_LIST_ELEMENTS (group->peer, node, nnode, peer)) + { + UNSET_FLAG (peer->af_flags[afi][safi], PEER_FLAG_DEFAULT_ORIGINATE); + + if (peer->default_rmap[afi][safi].name) + free (peer->default_rmap[afi][safi].name); + peer->default_rmap[afi][safi].name = NULL; + peer->default_rmap[afi][safi].map = NULL; + + if (peer->state == bgp_peer_sEstablished && peer->afc_nego[afi][safi]) + bgp_default_originate (peer, afi, safi, 1); + } + return 0; +} + +int +peer_port_set (struct peer *peer, u_int16_t port) +{ + peer->port = port; + return 0; +} + +int +peer_port_unset (struct peer *peer) +{ + peer->port = BGP_PORT_DEFAULT; + return 0; +} + +/* neighbor weight. */ +int +peer_weight_set (struct peer *peer, u_int16_t weight) +{ + struct peer_group *group; + struct listnode *node, *nnode; + + SET_FLAG (peer->config, PEER_CONFIG_WEIGHT); + peer->weight = weight; + + if (! CHECK_FLAG (peer->sflags, PEER_STATUS_GROUP)) + return 0; + + /* peer-group member updates. */ + group = peer->group; + for (ALL_LIST_ELEMENTS (group->peer, node, nnode, peer)) + { + peer->weight = group->conf->weight; + } + return 0; +} + +int +peer_weight_unset (struct peer *peer) +{ + struct peer_group *group; + struct listnode *node, *nnode; + + /* Set default weight. */ + if (peer_group_active (peer)) + peer->weight = peer->group->conf->weight; + else + peer->weight = 0; + + UNSET_FLAG (peer->config, PEER_CONFIG_WEIGHT); + + if (! CHECK_FLAG (peer->sflags, PEER_STATUS_GROUP)) + return 0; + + /* peer-group member updates. */ + group = peer->group; + for (ALL_LIST_ELEMENTS (group->peer, node, nnode, peer)) + { + peer->weight = 0; + } + return 0; +} + +int +peer_timers_set (struct peer *peer, u_int32_t keepalive, u_int32_t holdtime) +{ + struct peer_group *group; + struct listnode *node, *nnode; + + /* Not for peer group memeber. */ + if (peer_group_active (peer)) + return BGP_ERR_INVALID_FOR_PEER_GROUP_MEMBER; + + /* keepalive value check. */ + if (keepalive > 65535) + return BGP_ERR_INVALID_VALUE; + + /* Holdtime value check. */ + if (holdtime > 65535) + return BGP_ERR_INVALID_VALUE; + + /* Holdtime value must be either 0 or greater than 3. */ + if (holdtime < 3 && holdtime != 0) + return BGP_ERR_INVALID_VALUE; + + /* Set value to the configuration. */ + SET_FLAG (peer->config, PEER_CONFIG_TIMER); + peer->holdtime = holdtime; + peer->keepalive = (keepalive < holdtime / 3 ? keepalive : holdtime / 3); + + if (! CHECK_FLAG (peer->sflags, PEER_STATUS_GROUP)) + return 0; + + /* peer-group member updates. */ + group = peer->group; + for (ALL_LIST_ELEMENTS (group->peer, node, nnode, peer)) + { + SET_FLAG (peer->config, PEER_CONFIG_TIMER); + peer->holdtime = group->conf->holdtime; + peer->keepalive = group->conf->keepalive; + } + return 0; +} + +int +peer_timers_unset (struct peer *peer) +{ + struct peer_group *group; + struct listnode *node, *nnode; + + if (peer_group_active (peer)) + return BGP_ERR_INVALID_FOR_PEER_GROUP_MEMBER; + + /* Clear configuration. */ + UNSET_FLAG (peer->config, PEER_CONFIG_TIMER); + peer->keepalive = 0; + peer->holdtime = 0; + + if (! CHECK_FLAG (peer->sflags, PEER_STATUS_GROUP)) + return 0; + + /* peer-group member updates. */ + group = peer->group; + for (ALL_LIST_ELEMENTS (group->peer, node, nnode, peer)) + { + UNSET_FLAG (peer->config, PEER_CONFIG_TIMER); + peer->holdtime = 0; + peer->keepalive = 0; + } + + return 0; +} + +int +peer_timers_connect_set (struct peer *peer, u_int32_t connect) +{ + if (peer_group_active (peer)) + return BGP_ERR_INVALID_FOR_PEER_GROUP_MEMBER; + + if (connect > 65535) + return BGP_ERR_INVALID_VALUE; + + /* Set value to the configuration. */ + SET_FLAG (peer->config, PEER_CONFIG_CONNECT); + peer->connect = connect; + + /* Set value to timer setting. */ + peer->v_connect = connect; + + return 0; +} + +int +peer_timers_connect_unset (struct peer *peer) +{ + if (peer_group_active (peer)) + return BGP_ERR_INVALID_FOR_PEER_GROUP_MEMBER; + + /* Clear configuration. */ + UNSET_FLAG (peer->config, PEER_CONFIG_CONNECT); + peer->connect = 0; + + /* Set timer setting to default value. */ + peer->v_connect = BGP_DEFAULT_CONNECT_RETRY; + + return 0; +} + +int +peer_advertise_interval_set (struct peer *peer, u_int32_t routeadv) +{ + if (peer_group_active (peer)) + return BGP_ERR_INVALID_FOR_PEER_GROUP_MEMBER; + + if (routeadv > 600) + return BGP_ERR_INVALID_VALUE; + + SET_FLAG (peer->config, PEER_CONFIG_ROUTEADV); + peer->routeadv = routeadv; + peer->v_routeadv = routeadv; + + return 0; +} + +int +peer_advertise_interval_unset (struct peer *peer) +{ + if (peer_group_active (peer)) + return BGP_ERR_INVALID_FOR_PEER_GROUP_MEMBER; + + UNSET_FLAG (peer->config, PEER_CONFIG_ROUTEADV); + peer->routeadv = 0; + + if (peer_sort (peer) == BGP_PEER_IBGP) + peer->v_routeadv = BGP_DEFAULT_IBGP_ROUTEADV; + else + peer->v_routeadv = BGP_DEFAULT_EBGP_ROUTEADV; + + return 0; +} + +/* neighbor interface */ +int +peer_interface_set (struct peer *peer, const char *str) +{ + if (peer->ifname) + free (peer->ifname); + peer->ifname = strdup (str); + + return 0; +} + +int +peer_interface_unset (struct peer *peer) +{ + if (peer->ifname) + free (peer->ifname); + peer->ifname = NULL; + + return 0; +} + +/* Allow-as in. */ +int +peer_allowas_in_set (struct peer *peer, afi_t afi, safi_t safi, int allow_num) +{ + struct peer_group *group; + struct listnode *node, *nnode; + + if (allow_num < 1 || allow_num > 10) + return BGP_ERR_INVALID_VALUE; + + if (peer->allowas_in[afi][safi] != allow_num) + { + peer->allowas_in[afi][safi] = allow_num; + SET_FLAG (peer->af_flags[afi][safi], PEER_FLAG_ALLOWAS_IN); + peer_change_action (peer, afi, safi, peer_change_reset_in); + } + + if (! CHECK_FLAG (peer->sflags, PEER_STATUS_GROUP)) + return 0; + + group = peer->group; + for (ALL_LIST_ELEMENTS (group->peer, node, nnode, peer)) + { + if (peer->allowas_in[afi][safi] != allow_num) + { + peer->allowas_in[afi][safi] = allow_num; + SET_FLAG (peer->af_flags[afi][safi], PEER_FLAG_ALLOWAS_IN); + peer_change_action (peer, afi, safi, peer_change_reset_in); + } + + } + return 0; +} + +int +peer_allowas_in_unset (struct peer *peer, afi_t afi, safi_t safi) +{ + struct peer_group *group; + struct listnode *node, *nnode; + + if (CHECK_FLAG (peer->af_flags[afi][safi], PEER_FLAG_ALLOWAS_IN)) + { + peer->allowas_in[afi][safi] = 0; + peer_af_flag_unset (peer, afi, safi, PEER_FLAG_ALLOWAS_IN); + } + + if (! CHECK_FLAG (peer->sflags, PEER_STATUS_GROUP)) + return 0; + + group = peer->group; + for (ALL_LIST_ELEMENTS (group->peer, node, nnode, peer)) + { + if (CHECK_FLAG (peer->af_flags[afi][safi], PEER_FLAG_ALLOWAS_IN)) + { + peer->allowas_in[afi][safi] = 0; + peer_af_flag_unset (peer, afi, safi, PEER_FLAG_ALLOWAS_IN); + } + } + return 0; +} + +int +peer_local_as_set (struct peer *peer, as_t as, int no_prepend) +{ + struct bgp *bgp = peer->bgp; + struct peer_group *group; + struct listnode *node, *nnode; + + if (peer_sort (peer) != BGP_PEER_EBGP + && peer_sort (peer) != BGP_PEER_INTERNAL) + return BGP_ERR_LOCAL_AS_ALLOWED_ONLY_FOR_EBGP; + + if (bgp->as == as) + return BGP_ERR_CANNOT_HAVE_LOCAL_AS_SAME_AS; + + if (peer_group_active (peer)) + return BGP_ERR_INVALID_FOR_PEER_GROUP_MEMBER; + + if (peer->change_local_as == as && + ((CHECK_FLAG (peer->flags, PEER_FLAG_LOCAL_AS_NO_PREPEND) && no_prepend) + || (! CHECK_FLAG (peer->flags, PEER_FLAG_LOCAL_AS_NO_PREPEND) && ! no_prepend))) + return 0; + + peer->change_local_as = as; + if (no_prepend) + SET_FLAG (peer->flags, PEER_FLAG_LOCAL_AS_NO_PREPEND); + else + UNSET_FLAG (peer->flags, PEER_FLAG_LOCAL_AS_NO_PREPEND); + + if (! CHECK_FLAG (peer->sflags, PEER_STATUS_GROUP)) + { + peer->last_reset = PEER_DOWN_LOCAL_AS_CHANGE; + bgp_notify_send(peer, BGP_NOTIFY_CEASE, + BGP_NOTIFY_CEASE_CONFIG_CHANGE); + return 0; + } + + group = peer->group; + for (ALL_LIST_ELEMENTS (group->peer, node, nnode, peer)) + { + peer->change_local_as = as; + if (no_prepend) + SET_FLAG (peer->flags, PEER_FLAG_LOCAL_AS_NO_PREPEND); + else + UNSET_FLAG (peer->flags, PEER_FLAG_LOCAL_AS_NO_PREPEND); + + peer->last_reset = PEER_DOWN_LOCAL_AS_CHANGE; + bgp_notify_send(peer, BGP_NOTIFY_CEASE, + BGP_NOTIFY_CEASE_CONFIG_CHANGE); + } + + return 0; +} + +int +peer_local_as_unset (struct peer *peer) +{ + struct peer_group *group; + struct listnode *node, *nnode; + + if (peer_group_active (peer)) + return BGP_ERR_INVALID_FOR_PEER_GROUP_MEMBER; + + if (! peer->change_local_as) + return 0; + + peer->change_local_as = 0; + UNSET_FLAG (peer->flags, PEER_FLAG_LOCAL_AS_NO_PREPEND); + + if (! CHECK_FLAG (peer->sflags, PEER_STATUS_GROUP)) + { + peer->last_reset = PEER_DOWN_LOCAL_AS_CHANGE; + bgp_notify_send(peer, BGP_NOTIFY_CEASE, + BGP_NOTIFY_CEASE_CONFIG_CHANGE); + return 0; + } + + group = peer->group; + for (ALL_LIST_ELEMENTS (group->peer, node, nnode, peer)) + { + peer->change_local_as = 0; + UNSET_FLAG (peer->flags, PEER_FLAG_LOCAL_AS_NO_PREPEND); + + peer->last_reset = PEER_DOWN_LOCAL_AS_CHANGE; + bgp_notify_send(peer, BGP_NOTIFY_CEASE, + BGP_NOTIFY_CEASE_CONFIG_CHANGE); + } + return 0; +} + +/* Set password for authenticating with the peer. */ +int +peer_password_set (struct peer *peer, const char *password) +{ + struct listnode *nn, *nnode; + int len = password ? strlen(password) : 0; + int ret = BGP_SUCCESS; + + if ((len < PEER_PASSWORD_MINLEN) || (len > PEER_PASSWORD_MAXLEN)) + return BGP_ERR_INVALID_VALUE; + + if (peer->password && strcmp (peer->password, password) == 0 + && ! CHECK_FLAG (peer->sflags, PEER_STATUS_GROUP)) + return 0; + + if (peer->password) + XFREE (MTYPE_PEER_PASSWORD, peer->password); + + peer->password = XSTRDUP (MTYPE_PEER_PASSWORD, password); + + if (! CHECK_FLAG (peer->sflags, PEER_STATUS_GROUP)) + { + bgp_notify_send(peer, BGP_NOTIFY_CEASE, + BGP_NOTIFY_CEASE_CONFIG_CHANGE); + return BGP_SUCCESS; + } + + for (ALL_LIST_ELEMENTS (peer->group->peer, nn, nnode, peer)) + { + if (peer->password && strcmp (peer->password, password) == 0) + continue; + + if (peer->password) + XFREE (MTYPE_PEER_PASSWORD, peer->password); + + peer->password = XSTRDUP(MTYPE_PEER_PASSWORD, password); + + bgp_notify_send(peer, BGP_NOTIFY_CEASE, + BGP_NOTIFY_CEASE_CONFIG_CHANGE); + } + + return ret; +} + +int +peer_password_unset (struct peer *peer) +{ + struct listnode *nn, *nnode; + + if (!peer->password + && !CHECK_FLAG (peer->sflags, PEER_STATUS_GROUP)) + return 0; + + if (!CHECK_FLAG (peer->sflags, PEER_STATUS_GROUP)) + { + if (peer_group_active (peer) + && peer->group->conf->password + && strcmp (peer->group->conf->password, peer->password) == 0) + return BGP_ERR_PEER_GROUP_HAS_THE_FLAG; + + bgp_notify_send(peer, BGP_NOTIFY_CEASE, + BGP_NOTIFY_CEASE_CONFIG_CHANGE); + + if (peer->password) + XFREE (MTYPE_PEER_PASSWORD, peer->password); + + peer->password = NULL; + return 0; + } + + XFREE (MTYPE_PEER_PASSWORD, peer->password); + peer->password = NULL; + + for (ALL_LIST_ELEMENTS (peer->group->peer, nn, nnode, peer)) + { + if (!peer->password) + continue; + + bgp_notify_send(peer, BGP_NOTIFY_CEASE, + BGP_NOTIFY_CEASE_CONFIG_CHANGE); + + XFREE (MTYPE_PEER_PASSWORD, peer->password); + peer->password = NULL; + } + + return 0; +} + +/* Set distribute list to the peer. */ +int +peer_distribute_set (struct peer *peer, afi_t afi, safi_t safi, int direct, + const char *name) +{ + struct bgp_filter *filter; + struct peer_group *group; + struct listnode *node, *nnode; + + if (! peer->afc[afi][safi]) + return BGP_ERR_PEER_INACTIVE; + + if (direct != FILTER_IN && direct != FILTER_OUT) + return BGP_ERR_INVALID_VALUE; + + if (direct == FILTER_OUT && peer_is_group_member (peer, afi, safi)) + return BGP_ERR_INVALID_FOR_PEER_GROUP_MEMBER; + + filter = &peer->filter[afi][safi]; + + if (filter->plist[direct].ref) + return BGP_ERR_PEER_FILTER_CONFLICT; + + if (filter->dlist[direct].name) + free (filter->dlist[direct].name); + filter->dlist[direct].name = strdup (name); + filter->dlist[direct].alist = access_list_lookup (afi, name); + + if (! CHECK_FLAG (peer->sflags, PEER_STATUS_GROUP)) + return 0; + + group = peer->group; + for (ALL_LIST_ELEMENTS (group->peer, node, nnode, peer)) + { + filter = &peer->filter[afi][safi]; + + if (! peer->af_group[afi][safi]) + continue; + + if (filter->dlist[direct].name) + free (filter->dlist[direct].name); + filter->dlist[direct].name = strdup (name); + filter->dlist[direct].alist = access_list_lookup (afi, name); + } + + return 0; +} + +int +peer_distribute_unset (struct peer *peer, afi_t afi, safi_t safi, int direct) +{ + struct bgp_filter *filter; + struct bgp_filter *gfilter; + struct peer_group *group; + struct listnode *node, *nnode; + + if (! peer->afc[afi][safi]) + return BGP_ERR_PEER_INACTIVE; + + if (direct != FILTER_IN && direct != FILTER_OUT) + return BGP_ERR_INVALID_VALUE; + + if (direct == FILTER_OUT && peer_is_group_member (peer, afi, safi)) + return BGP_ERR_INVALID_FOR_PEER_GROUP_MEMBER; + + filter = &peer->filter[afi][safi]; + + /* apply peer-group filter */ + if (peer->af_group[afi][safi]) + { + gfilter = &peer->group->conf->filter[afi][safi]; + + if (gfilter->dlist[direct].name) + { + if (filter->dlist[direct].name) + free (filter->dlist[direct].name); + filter->dlist[direct].name = strdup (gfilter->dlist[direct].name); + filter->dlist[direct].alist = gfilter->dlist[direct].alist; + return 0; + } + } + + if (filter->dlist[direct].name) + free (filter->dlist[direct].name); + filter->dlist[direct].name = NULL; + filter->dlist[direct].alist = NULL; + + if (! CHECK_FLAG (peer->sflags, PEER_STATUS_GROUP)) + return 0; + + group = peer->group; + for (ALL_LIST_ELEMENTS (group->peer, node, nnode, peer)) + { + filter = &peer->filter[afi][safi]; + + if (! peer->af_group[afi][safi]) + continue; + + if (filter->dlist[direct].name) + free (filter->dlist[direct].name); + filter->dlist[direct].name = NULL; + filter->dlist[direct].alist = NULL; + } + + return 0; +} + +/* Update distribute list. */ +static void +peer_distribute_update (struct access_list *access) +{ + afi_t afi; + safi_t safi; + int direct; + struct listnode *mnode, *mnnode; + struct listnode *node, *nnode; + struct bgp *bgp; + struct peer *peer; + struct peer_group *group; + struct bgp_filter *filter; + + for (ALL_LIST_ELEMENTS (bm->bgp, mnode, mnnode, bgp)) + { + for (ALL_LIST_ELEMENTS (bgp->peer, node, nnode, peer)) + { + for (afi = AFI_IP; afi < AFI_MAX; afi++) + for (safi = SAFI_UNICAST; safi < SAFI_MAX; safi++) + { + filter = &peer->filter[afi][safi]; + + for (direct = FILTER_IN; direct < FILTER_MAX; direct++) + { + if (filter->dlist[direct].name) + filter->dlist[direct].alist = + access_list_lookup (afi, filter->dlist[direct].name); + else + filter->dlist[direct].alist = NULL; + } + } + } + for (ALL_LIST_ELEMENTS (bgp->group, node, nnode, group)) + { + for (afi = AFI_IP; afi < AFI_MAX; afi++) + for (safi = SAFI_UNICAST; safi < SAFI_MAX; safi++) + { + filter = &group->conf->filter[afi][safi]; + + for (direct = FILTER_IN; direct < FILTER_MAX; direct++) + { + if (filter->dlist[direct].name) + filter->dlist[direct].alist = + access_list_lookup (afi, filter->dlist[direct].name); + else + filter->dlist[direct].alist = NULL; + } + } + } + } +} + +/* Set prefix list to the peer. */ +int +peer_prefix_list_set (struct peer *peer, afi_t afi, safi_t safi, int direct, + const char *name) +{ + struct bgp_filter *filter; + struct peer_group *group; + struct listnode *node, *nnode; + prefix_list_ref ref ; + + if (! peer->afc[afi][safi]) + return BGP_ERR_PEER_INACTIVE; + + if (direct != FILTER_IN && direct != FILTER_OUT) + return BGP_ERR_INVALID_VALUE; + + if (direct == FILTER_OUT && peer_is_group_member (peer, afi, safi)) + return BGP_ERR_INVALID_FOR_PEER_GROUP_MEMBER; + + filter = &peer->filter[afi][safi]; + + if (filter->dlist[direct].name) + return BGP_ERR_PEER_FILTER_CONFLICT; + + ref = prefix_list_set_ref(&filter->plist[direct].ref, afi, name) ; + + if (! CHECK_FLAG (peer->sflags, PEER_STATUS_GROUP)) + return 0; + + group = peer->group; + for (ALL_LIST_ELEMENTS (group->peer, node, nnode, peer)) + { + filter = &peer->filter[afi][safi]; + + if (! peer->af_group[afi][safi]) + continue; + + prefix_list_copy_ref(&filter->plist[direct].ref, ref) ; + } + return 0; +} + +int +peer_prefix_list_unset (struct peer *peer, afi_t afi, safi_t safi, int direct) +{ + struct bgp_filter *filter; + struct bgp_filter *gfilter; + struct peer_group *group; + struct listnode *node, *nnode; + + if (! peer->afc[afi][safi]) + return BGP_ERR_PEER_INACTIVE; + + if (direct != FILTER_IN && direct != FILTER_OUT) + return BGP_ERR_INVALID_VALUE; + + if (direct == FILTER_OUT && peer_is_group_member (peer, afi, safi)) + return BGP_ERR_INVALID_FOR_PEER_GROUP_MEMBER; + + filter = &peer->filter[afi][safi]; + + /* apply peer-group filter */ + if (peer->af_group[afi][safi]) + { + gfilter = &peer->group->conf->filter[afi][safi]; + + if (gfilter->plist[direct].ref) + { + prefix_list_copy_ref(&filter->plist[direct].ref, + gfilter->plist[direct].ref) ; + return 0; + } + } + + prefix_list_unset_ref(&filter->plist[direct].ref) ; + + if (! CHECK_FLAG (peer->sflags, PEER_STATUS_GROUP)) + return 0; + + group = peer->group; + for (ALL_LIST_ELEMENTS (group->peer, node, nnode, peer)) + { + filter = &peer->filter[afi][safi]; + + if (! peer->af_group[afi][safi]) + continue; + + prefix_list_unset_ref(&filter->plist[direct].ref) ; + } + + return 0; +} + +/* Update prefix-list list. */ +static void +peer_prefix_list_update (struct prefix_list *plist) +{ + /* This function used to fix up the addresses of prefix lists whenever + * a prefix list was changed. That is now done by the symbol reference + * mechanism. + * + * This function could have a use in updating a peer when a prefix list + * is changed ? + */ +} + +int +peer_aslist_set (struct peer *peer, afi_t afi, safi_t safi, int direct, + const char *name) +{ + struct bgp_filter *filter; + struct peer_group *group; + struct listnode *node, *nnode; + + if (! peer->afc[afi][safi]) + return BGP_ERR_PEER_INACTIVE; + + if (direct != FILTER_IN && direct != FILTER_OUT) + return BGP_ERR_INVALID_VALUE; + + if (direct == FILTER_OUT && peer_is_group_member (peer, afi, safi)) + return BGP_ERR_INVALID_FOR_PEER_GROUP_MEMBER; + + filter = &peer->filter[afi][safi]; + + if (filter->aslist[direct].name) + free (filter->aslist[direct].name); + filter->aslist[direct].name = strdup (name); + filter->aslist[direct].aslist = as_list_lookup (name); + + if (! CHECK_FLAG (peer->sflags, PEER_STATUS_GROUP)) + return 0; + + group = peer->group; + for (ALL_LIST_ELEMENTS (group->peer, node, nnode, peer)) + { + filter = &peer->filter[afi][safi]; + + if (! peer->af_group[afi][safi]) + continue; + + if (filter->aslist[direct].name) + free (filter->aslist[direct].name); + filter->aslist[direct].name = strdup (name); + filter->aslist[direct].aslist = as_list_lookup (name); + } + return 0; +} + +int +peer_aslist_unset (struct peer *peer,afi_t afi, safi_t safi, int direct) +{ + struct bgp_filter *filter; + struct bgp_filter *gfilter; + struct peer_group *group; + struct listnode *node, *nnode; + + if (! peer->afc[afi][safi]) + return BGP_ERR_PEER_INACTIVE; + + if (direct != FILTER_IN && direct != FILTER_OUT) + return BGP_ERR_INVALID_VALUE; + + if (direct == FILTER_OUT && peer_is_group_member (peer, afi, safi)) + return BGP_ERR_INVALID_FOR_PEER_GROUP_MEMBER; + + filter = &peer->filter[afi][safi]; + + /* apply peer-group filter */ + if (peer->af_group[afi][safi]) + { + gfilter = &peer->group->conf->filter[afi][safi]; + + if (gfilter->aslist[direct].name) + { + if (filter->aslist[direct].name) + free (filter->aslist[direct].name); + filter->aslist[direct].name = strdup (gfilter->aslist[direct].name); + filter->aslist[direct].aslist = gfilter->aslist[direct].aslist; + return 0; + } + } + + if (filter->aslist[direct].name) + free (filter->aslist[direct].name); + filter->aslist[direct].name = NULL; + filter->aslist[direct].aslist = NULL; + + if (! CHECK_FLAG (peer->sflags, PEER_STATUS_GROUP)) + return 0; + + group = peer->group; + for (ALL_LIST_ELEMENTS (group->peer, node, nnode, peer)) + { + filter = &peer->filter[afi][safi]; + + if (! peer->af_group[afi][safi]) + continue; + + if (filter->aslist[direct].name) + free (filter->aslist[direct].name); + filter->aslist[direct].name = NULL; + filter->aslist[direct].aslist = NULL; + } + + return 0; +} + +static void +peer_aslist_update (void) +{ + afi_t afi; + safi_t safi; + int direct; + struct listnode *mnode, *mnnode; + struct listnode *node, *nnode; + struct bgp *bgp; + struct peer *peer; + struct peer_group *group; + struct bgp_filter *filter; + + for (ALL_LIST_ELEMENTS (bm->bgp, mnode, mnnode, bgp)) + { + for (ALL_LIST_ELEMENTS (bgp->peer, node, nnode, peer)) + { + for (afi = AFI_IP; afi < AFI_MAX; afi++) + for (safi = SAFI_UNICAST; safi < SAFI_MAX; safi++) + { + filter = &peer->filter[afi][safi]; + + for (direct = FILTER_IN; direct < FILTER_MAX; direct++) + { + if (filter->aslist[direct].name) + filter->aslist[direct].aslist = + as_list_lookup (filter->aslist[direct].name); + else + filter->aslist[direct].aslist = NULL; + } + } + } + for (ALL_LIST_ELEMENTS (bgp->group, node, nnode, group)) + { + for (afi = AFI_IP; afi < AFI_MAX; afi++) + for (safi = SAFI_UNICAST; safi < SAFI_MAX; safi++) + { + filter = &group->conf->filter[afi][safi]; + + for (direct = FILTER_IN; direct < FILTER_MAX; direct++) + { + if (filter->aslist[direct].name) + filter->aslist[direct].aslist = + as_list_lookup (filter->aslist[direct].name); + else + filter->aslist[direct].aslist = NULL; + } + } + } + } +} + +/* Set route-map to the peer. */ +int +peer_route_map_set (struct peer *peer, afi_t afi, safi_t safi, int direct, + const char *name) +{ + struct bgp_filter *filter; + struct peer_group *group; + struct listnode *node, *nnode; + + if (! peer->afc[afi][safi]) + return BGP_ERR_PEER_INACTIVE; + + if (direct != RMAP_IN && direct != RMAP_OUT && + direct != RMAP_IMPORT && direct != RMAP_EXPORT) + return BGP_ERR_INVALID_VALUE; + + if ( (direct == RMAP_OUT || direct == RMAP_IMPORT) + && peer_is_group_member (peer, afi, safi)) + return BGP_ERR_INVALID_FOR_PEER_GROUP_MEMBER; + + filter = &peer->filter[afi][safi]; + + if (filter->map[direct].name) + free (filter->map[direct].name); + + filter->map[direct].name = strdup (name); + filter->map[direct].map = route_map_lookup_by_name (name); + + if (! CHECK_FLAG (peer->sflags, PEER_STATUS_GROUP)) + return 0; + + group = peer->group; + for (ALL_LIST_ELEMENTS (group->peer, node, nnode, peer)) + { + filter = &peer->filter[afi][safi]; + + if (! peer->af_group[afi][safi]) + continue; + + if (filter->map[direct].name) + free (filter->map[direct].name); + filter->map[direct].name = strdup (name); + filter->map[direct].map = route_map_lookup_by_name (name); + } + return 0; +} + +/* Unset route-map from the peer. */ +int +peer_route_map_unset (struct peer *peer, afi_t afi, safi_t safi, int direct) +{ + struct bgp_filter *filter; + struct bgp_filter *gfilter; + struct peer_group *group; + struct listnode *node, *nnode; + + if (! peer->afc[afi][safi]) + return BGP_ERR_PEER_INACTIVE; + + if (direct != RMAP_IN && direct != RMAP_OUT && + direct != RMAP_IMPORT && direct != RMAP_EXPORT) + return BGP_ERR_INVALID_VALUE; + + if ( (direct == RMAP_OUT || direct == RMAP_IMPORT) + && peer_is_group_member (peer, afi, safi)) + return BGP_ERR_INVALID_FOR_PEER_GROUP_MEMBER; + + filter = &peer->filter[afi][safi]; + + /* apply peer-group filter */ + if (peer->af_group[afi][safi]) + { + gfilter = &peer->group->conf->filter[afi][safi]; + + if (gfilter->map[direct].name) + { + if (filter->map[direct].name) + free (filter->map[direct].name); + filter->map[direct].name = strdup (gfilter->map[direct].name); + filter->map[direct].map = gfilter->map[direct].map; + return 0; + } + } + + if (filter->map[direct].name) + free (filter->map[direct].name); + filter->map[direct].name = NULL; + filter->map[direct].map = NULL; + + if (! CHECK_FLAG (peer->sflags, PEER_STATUS_GROUP)) + return 0; + + group = peer->group; + for (ALL_LIST_ELEMENTS (group->peer, node, nnode, peer)) + { + filter = &peer->filter[afi][safi]; + + if (! peer->af_group[afi][safi]) + continue; + + if (filter->map[direct].name) + free (filter->map[direct].name); + filter->map[direct].name = NULL; + filter->map[direct].map = NULL; + } + return 0; +} + +/* Set unsuppress-map to the peer. */ +int +peer_unsuppress_map_set (struct peer *peer, afi_t afi, safi_t safi, + const char *name) +{ + struct bgp_filter *filter; + struct peer_group *group; + struct listnode *node, *nnode; + + if (! peer->afc[afi][safi]) + return BGP_ERR_PEER_INACTIVE; + + if (peer_is_group_member (peer, afi, safi)) + return BGP_ERR_INVALID_FOR_PEER_GROUP_MEMBER; + + filter = &peer->filter[afi][safi]; + + if (filter->usmap.name) + free (filter->usmap.name); + + filter->usmap.name = strdup (name); + filter->usmap.map = route_map_lookup_by_name (name); + + if (! CHECK_FLAG (peer->sflags, PEER_STATUS_GROUP)) + return 0; + + group = peer->group; + for (ALL_LIST_ELEMENTS (group->peer, node, nnode, peer)) + { + filter = &peer->filter[afi][safi]; + + if (! peer->af_group[afi][safi]) + continue; + + if (filter->usmap.name) + free (filter->usmap.name); + filter->usmap.name = strdup (name); + filter->usmap.map = route_map_lookup_by_name (name); + } + return 0; +} + +/* Unset route-map from the peer. */ +int +peer_unsuppress_map_unset (struct peer *peer, afi_t afi, safi_t safi) +{ + struct bgp_filter *filter; + struct peer_group *group; + struct listnode *node, *nnode; + + if (! peer->afc[afi][safi]) + return BGP_ERR_PEER_INACTIVE; + + if (peer_is_group_member (peer, afi, safi)) + return BGP_ERR_INVALID_FOR_PEER_GROUP_MEMBER; + + filter = &peer->filter[afi][safi]; + + if (filter->usmap.name) + free (filter->usmap.name); + filter->usmap.name = NULL; + filter->usmap.map = NULL; + + if (! CHECK_FLAG (peer->sflags, PEER_STATUS_GROUP)) + return 0; + + group = peer->group; + for (ALL_LIST_ELEMENTS (group->peer, node, nnode, peer)) + { + filter = &peer->filter[afi][safi]; + + if (! peer->af_group[afi][safi]) + continue; + + if (filter->usmap.name) + free (filter->usmap.name); + filter->usmap.name = NULL; + filter->usmap.map = NULL; + } + return 0; +} + +int +peer_maximum_prefix_set (struct peer *peer, afi_t afi, safi_t safi, + u_int32_t max, u_char threshold, + int warning, u_int16_t restart) +{ + struct peer_group *group; + struct listnode *node, *nnode; + + if (! peer->afc[afi][safi]) + return BGP_ERR_PEER_INACTIVE; + + SET_FLAG (peer->af_flags[afi][safi], PEER_FLAG_MAX_PREFIX); + peer->pmax[afi][safi] = max; + peer->pmax_threshold[afi][safi] = threshold; + peer->pmax_restart[afi][safi] = restart; + if (warning) + SET_FLAG (peer->af_flags[afi][safi], PEER_FLAG_MAX_PREFIX_WARNING); + else + UNSET_FLAG (peer->af_flags[afi][safi], PEER_FLAG_MAX_PREFIX_WARNING); + + if (! CHECK_FLAG (peer->sflags, PEER_STATUS_GROUP)) + return 0; + + group = peer->group; + for (ALL_LIST_ELEMENTS (group->peer, node, nnode, peer)) + { + if (! peer->af_group[afi][safi]) + continue; + + SET_FLAG (peer->af_flags[afi][safi], PEER_FLAG_MAX_PREFIX); + peer->pmax[afi][safi] = max; + peer->pmax_threshold[afi][safi] = threshold; + peer->pmax_restart[afi][safi] = restart; + if (warning) + SET_FLAG (peer->af_flags[afi][safi], PEER_FLAG_MAX_PREFIX_WARNING); + else + UNSET_FLAG (peer->af_flags[afi][safi], PEER_FLAG_MAX_PREFIX_WARNING); + } + return 0; +} + +int +peer_maximum_prefix_unset (struct peer *peer, afi_t afi, safi_t safi) +{ + struct peer_group *group; + struct listnode *node, *nnode; + + if (! peer->afc[afi][safi]) + return BGP_ERR_PEER_INACTIVE; + + /* apply peer-group config */ + if (peer->af_group[afi][safi]) + { + if (CHECK_FLAG (peer->group->conf->af_flags[afi][safi], + PEER_FLAG_MAX_PREFIX)) + SET_FLAG (peer->af_flags[afi][safi], PEER_FLAG_MAX_PREFIX); + else + UNSET_FLAG (peer->af_flags[afi][safi], PEER_FLAG_MAX_PREFIX); + + if (CHECK_FLAG (peer->group->conf->af_flags[afi][safi], + PEER_FLAG_MAX_PREFIX_WARNING)) + SET_FLAG (peer->af_flags[afi][safi], PEER_FLAG_MAX_PREFIX_WARNING); + else + UNSET_FLAG (peer->af_flags[afi][safi], PEER_FLAG_MAX_PREFIX_WARNING); + + peer->pmax[afi][safi] = peer->group->conf->pmax[afi][safi]; + peer->pmax_threshold[afi][safi] = peer->group->conf->pmax_threshold[afi][safi]; + peer->pmax_restart[afi][safi] = peer->group->conf->pmax_restart[afi][safi]; + return 0; + } + + UNSET_FLAG (peer->af_flags[afi][safi], PEER_FLAG_MAX_PREFIX); + UNSET_FLAG (peer->af_flags[afi][safi], PEER_FLAG_MAX_PREFIX_WARNING); + peer->pmax[afi][safi] = 0; + peer->pmax_threshold[afi][safi] = 0; + peer->pmax_restart[afi][safi] = 0; + + if (! CHECK_FLAG (peer->sflags, PEER_STATUS_GROUP)) + return 0; + + group = peer->group; + for (ALL_LIST_ELEMENTS (group->peer, node, nnode, peer)) + { + if (! peer->af_group[afi][safi]) + continue; + + UNSET_FLAG (peer->af_flags[afi][safi], PEER_FLAG_MAX_PREFIX); + UNSET_FLAG (peer->af_flags[afi][safi], PEER_FLAG_MAX_PREFIX_WARNING); + peer->pmax[afi][safi] = 0; + peer->pmax_threshold[afi][safi] = 0; + peer->pmax_restart[afi][safi] = 0; + } + return 0; +} + +int +peer_clear (struct peer *peer) +{ + if (! CHECK_FLAG (peer->flags, PEER_FLAG_SHUTDOWN)) + { + if (CHECK_FLAG (peer->sflags, PEER_STATUS_PREFIX_OVERFLOW)) + { + UNSET_FLAG (peer->sflags, PEER_STATUS_PREFIX_OVERFLOW); + if (peer->t_pmax_restart) + { + BGP_TIMER_OFF (peer->t_pmax_restart); + if (BGP_DEBUG (events, EVENTS)) + zlog_debug ("%s Maximum-prefix restart timer cancelled", + peer->host); + } + + /* Beware we may still be clearing, if so the end of + * clearing will enable the peer */ + if (peer->state == bgp_peer_sIdle) + bgp_peer_enable(peer); + + return 0; + } + + peer->v_start = BGP_INIT_START_TIMER; + bgp_notify_send(peer, BGP_NOTIFY_CEASE, + BGP_NOTIFY_CEASE_ADMIN_RESET); + } + return 0; +} + +int +peer_clear_soft (struct peer *peer, afi_t afi, safi_t safi, + enum bgp_clear_type stype) +{ + if (peer->state == bgp_peer_sEstablished) + return 0; + + if (! peer->afc[afi][safi]) + return BGP_ERR_AF_UNCONFIGURED; + + if (stype == BGP_CLEAR_SOFT_RSCLIENT) + { + if (! CHECK_FLAG (peer->af_flags[afi][safi], PEER_FLAG_RSERVER_CLIENT)) + return 0; + bgp_check_local_routes_rsclient (peer, afi, safi); + bgp_soft_reconfig_rsclient (peer, afi, safi); + } + + if (stype == BGP_CLEAR_SOFT_OUT || stype == BGP_CLEAR_SOFT_BOTH) + bgp_announce_route (peer, afi, safi); + + if (stype == BGP_CLEAR_SOFT_IN_ORF_PREFIX) + { + if (CHECK_FLAG (peer->af_cap[afi][safi], PEER_CAP_ORF_PREFIX_SM_ADV) + && (CHECK_FLAG (peer->af_cap[afi][safi], PEER_CAP_ORF_PREFIX_RM_RCV) + || CHECK_FLAG (peer->af_cap[afi][safi], PEER_CAP_ORF_PREFIX_RM_OLD_RCV))) + { + struct bgp_filter *filter = &peer->filter[afi][safi]; + u_char prefix_type; + + if (CHECK_FLAG (peer->af_cap[afi][safi], PEER_CAP_ORF_PREFIX_RM_RCV)) + prefix_type = ORF_TYPE_PREFIX; + else + prefix_type = ORF_TYPE_PREFIX_OLD; + + if (filter->plist[FILTER_IN].ref) + { + if (CHECK_FLAG (peer->af_sflags[afi][safi], PEER_STATUS_ORF_PREFIX_SEND)) + bgp_route_refresh_send (peer, afi, safi, + prefix_type, REFRESH_DEFER, 1); + bgp_route_refresh_send (peer, afi, safi, prefix_type, + REFRESH_IMMEDIATE, 0); + } + else + { + if (CHECK_FLAG (peer->af_sflags[afi][safi], PEER_STATUS_ORF_PREFIX_SEND)) + bgp_route_refresh_send (peer, afi, safi, + prefix_type, REFRESH_IMMEDIATE, 1); + else + bgp_route_refresh_send (peer, afi, safi, 0, 0, 0); + } + return 0; + } + } + + if (stype == BGP_CLEAR_SOFT_IN || stype == BGP_CLEAR_SOFT_BOTH + || stype == BGP_CLEAR_SOFT_IN_ORF_PREFIX) + { + /* If neighbor has soft reconfiguration inbound flag. + Use Adj-RIB-In database. */ + if (CHECK_FLAG (peer->af_flags[afi][safi], PEER_FLAG_SOFT_RECONFIG)) + bgp_soft_reconfig_in (peer, afi, safi); + else + { + /* If neighbor has route refresh capability, send route refresh + message to the peer. */ + if (CHECK_FLAG (peer->cap, PEER_CAP_REFRESH_OLD_RCV) + || CHECK_FLAG (peer->cap, PEER_CAP_REFRESH_NEW_RCV)) + bgp_route_refresh_send (peer, afi, safi, 0, 0, 0); + else + return BGP_ERR_SOFT_RECONFIG_UNCONFIGURED; + } + } + return 0; +} + +/* Display peer uptime.*/ +/* XXX: why does this function return char * when it takes buffer? */ +char * +peer_uptime (time_t uptime2, char *buf, size_t len) +{ + time_t uptime1; + struct tm *tm; + + /* Check buffer length. */ + if (len < BGP_UPTIME_LEN) + { + zlog_warn ("peer_uptime (): buffer shortage %lu", (u_long)len); + /* XXX: should return status instead of buf... */ + snprintf (buf, len, "<error> "); + return buf; + } + + /* If there is no connection has been done before print `never'. */ + if (uptime2 == 0) + { + snprintf (buf, len, "never "); + return buf; + } + + /* Get current time. */ + uptime1 = time (NULL); + uptime1 -= uptime2; + tm = gmtime (&uptime1); + + /* Making formatted timer strings. */ +#define ONE_DAY_SECOND 60*60*24 +#define ONE_WEEK_SECOND 60*60*24*7 + + if (uptime1 < ONE_DAY_SECOND) + snprintf (buf, len, "%02d:%02d:%02d", + tm->tm_hour, tm->tm_min, tm->tm_sec); + else if (uptime1 < ONE_WEEK_SECOND) + snprintf (buf, len, "%dd%02dh%02dm", + tm->tm_yday, tm->tm_hour, tm->tm_min); + else + snprintf (buf, len, "%02dw%dd%02dh", + tm->tm_yday/7, tm->tm_yday - ((tm->tm_yday/7) * 7), tm->tm_hour); + return buf; +} + +static void +bgp_config_write_filter (struct vty *vty, struct peer *peer, + afi_t afi, safi_t safi) +{ + struct bgp_filter *filter; + struct bgp_filter *gfilter = NULL; + char *addr; + int in = FILTER_IN; + int out = FILTER_OUT; + + addr = peer->host; + filter = &peer->filter[afi][safi]; + if (peer->af_group[afi][safi]) + gfilter = &peer->group->conf->filter[afi][safi]; + + /* distribute-list. */ + if (filter->dlist[in].name) + if (! gfilter || ! gfilter->dlist[in].name + || strcmp (filter->dlist[in].name, gfilter->dlist[in].name) != 0) + vty_out (vty, " neighbor %s distribute-list %s in%s", addr, + filter->dlist[in].name, VTY_NEWLINE); + if (filter->dlist[out].name && ! gfilter) + vty_out (vty, " neighbor %s distribute-list %s out%s", addr, + filter->dlist[out].name, VTY_NEWLINE); + + /* prefix-list. */ + if ( filter->plist[in].ref && (! gfilter + || (prefix_list_ref_ident(gfilter->plist[in].ref) + != prefix_list_ref_ident(filter->plist[in].ref))) ) + vty_out (vty, " neighbor %s prefix-list %s in%s", addr, + prefix_list_ref_name(filter->plist[in].ref), VTY_NEWLINE); + + if (filter->plist[out].ref && ! gfilter) + vty_out (vty, " neighbor %s prefix-list %s out%s", addr, + prefix_list_ref_name(filter->plist[out].ref), VTY_NEWLINE); + + /* route-map. */ + if (filter->map[RMAP_IN].name) + if (! gfilter || ! gfilter->map[RMAP_IN].name + || strcmp (filter->map[RMAP_IN].name, gfilter->map[RMAP_IN].name) != 0) + vty_out (vty, " neighbor %s route-map %s in%s", addr, + filter->map[RMAP_IN].name, VTY_NEWLINE); + if (filter->map[RMAP_OUT].name && ! gfilter) + vty_out (vty, " neighbor %s route-map %s out%s", addr, + filter->map[RMAP_OUT].name, VTY_NEWLINE); + if (filter->map[RMAP_IMPORT].name && ! gfilter) + vty_out (vty, " neighbor %s route-map %s import%s", addr, + filter->map[RMAP_IMPORT].name, VTY_NEWLINE); + if (filter->map[RMAP_EXPORT].name) + if (! gfilter || ! gfilter->map[RMAP_EXPORT].name + || strcmp (filter->map[RMAP_EXPORT].name, + gfilter->map[RMAP_EXPORT].name) != 0) + vty_out (vty, " neighbor %s route-map %s export%s", addr, + filter->map[RMAP_EXPORT].name, VTY_NEWLINE); + + /* unsuppress-map */ + if (filter->usmap.name && ! gfilter) + vty_out (vty, " neighbor %s unsuppress-map %s%s", addr, + filter->usmap.name, VTY_NEWLINE); + + /* filter-list. */ + if (filter->aslist[in].name) + if (! gfilter || ! gfilter->aslist[in].name + || strcmp (filter->aslist[in].name, gfilter->aslist[in].name) != 0) + vty_out (vty, " neighbor %s filter-list %s in%s", addr, + filter->aslist[in].name, VTY_NEWLINE); + if (filter->aslist[out].name && ! gfilter) + vty_out (vty, " neighbor %s filter-list %s out%s", addr, + filter->aslist[out].name, VTY_NEWLINE); +} + +/* BGP peer configuration display function. */ +static void +bgp_config_write_peer (struct vty *vty, struct bgp *bgp, + struct peer *peer, afi_t afi, safi_t safi) +{ + struct bgp_filter *filter; + struct peer *g_peer = NULL; + char buf[SU_ADDRSTRLEN]; + char *addr; + + filter = &peer->filter[afi][safi]; + addr = peer->host; + if (peer_group_active (peer)) + g_peer = peer->group->conf; + + /************************************ + ****** Global to the neighbor ****** + ************************************/ + if (afi == AFI_IP && safi == SAFI_UNICAST) + { + /* remote-as. */ + if (! peer_group_active (peer)) + { + if (CHECK_FLAG (peer->sflags, PEER_STATUS_GROUP)) + vty_out (vty, " neighbor %s peer-group%s", addr, + VTY_NEWLINE); + if (peer->as) + vty_out (vty, " neighbor %s remote-as %u%s", addr, peer->as, + VTY_NEWLINE); + } + else + { + if (! g_peer->as) + vty_out (vty, " neighbor %s remote-as %u%s", addr, peer->as, + VTY_NEWLINE); + if (peer->af_group[AFI_IP][SAFI_UNICAST]) + vty_out (vty, " neighbor %s peer-group %s%s", addr, + peer->group->name, VTY_NEWLINE); + } + + /* local-as. */ + if (peer->change_local_as) + if (! peer_group_active (peer)) + vty_out (vty, " neighbor %s local-as %u%s%s", addr, + peer->change_local_as, + CHECK_FLAG (peer->flags, PEER_FLAG_LOCAL_AS_NO_PREPEND) ? + " no-prepend" : "", VTY_NEWLINE); + + /* Description. */ + if (peer->desc) + vty_out (vty, " neighbor %s description %s%s", addr, peer->desc, + VTY_NEWLINE); + + /* Shutdown. */ + if (CHECK_FLAG (peer->flags, PEER_FLAG_SHUTDOWN)) + if (! peer_group_active (peer) || + ! CHECK_FLAG (g_peer->flags, PEER_FLAG_SHUTDOWN)) + vty_out (vty, " neighbor %s shutdown%s", addr, VTY_NEWLINE); + + /* Password. */ + if (peer->password) + if (!peer_group_active (peer) + || ! g_peer->password + || strcmp (peer->password, g_peer->password) != 0) + vty_out (vty, " neighbor %s password %s%s", addr, peer->password, + VTY_NEWLINE); + + /* BGP port. */ + if (peer->port != BGP_PORT_DEFAULT) + vty_out (vty, " neighbor %s port %d%s", addr, peer->port, + VTY_NEWLINE); + + /* Local interface name. */ + if (peer->ifname) + vty_out (vty, " neighbor %s interface %s%s", addr, peer->ifname, + VTY_NEWLINE); + + /* Passive. */ + if (CHECK_FLAG (peer->flags, PEER_FLAG_PASSIVE)) + if (! peer_group_active (peer) || + ! CHECK_FLAG (g_peer->flags, PEER_FLAG_PASSIVE)) + vty_out (vty, " neighbor %s passive%s", addr, VTY_NEWLINE); + + /* EBGP multihop. */ + if (peer_sort (peer) != BGP_PEER_IBGP && peer->ttl != 1) + if (! peer_group_active (peer) || + g_peer->ttl != peer->ttl) + vty_out (vty, " neighbor %s ebgp-multihop %d%s", addr, peer->ttl, + VTY_NEWLINE); + + /* disable-connected-check. */ + if (CHECK_FLAG (peer->flags, PEER_FLAG_DISABLE_CONNECTED_CHECK)) + if (! peer_group_active (peer) || + ! CHECK_FLAG (g_peer->flags, PEER_FLAG_DISABLE_CONNECTED_CHECK)) + vty_out (vty, " neighbor %s disable-connected-check%s", addr, VTY_NEWLINE); + + /* Update-source. */ + if (peer->update_if) + if (! peer_group_active (peer) || ! g_peer->update_if + || strcmp (g_peer->update_if, peer->update_if) != 0) + vty_out (vty, " neighbor %s update-source %s%s", addr, + peer->update_if, VTY_NEWLINE); + if (peer->update_source) + if (! peer_group_active (peer) || ! g_peer->update_source + || sockunion_cmp (g_peer->update_source, + peer->update_source) != 0) + vty_out (vty, " neighbor %s update-source %s%s", addr, + sockunion2str (peer->update_source, buf, SU_ADDRSTRLEN), + VTY_NEWLINE); + + /* advertisement-interval */ + if (CHECK_FLAG (peer->config, PEER_CONFIG_ROUTEADV)) + vty_out (vty, " neighbor %s advertisement-interval %d%s", + addr, peer->v_routeadv, VTY_NEWLINE); + + /* timers. */ + if (CHECK_FLAG (peer->config, PEER_CONFIG_TIMER) + && ! peer_group_active (peer)) + vty_out (vty, " neighbor %s timers %d %d%s", addr, + peer->keepalive, peer->holdtime, VTY_NEWLINE); + + if (CHECK_FLAG (peer->config, PEER_CONFIG_CONNECT)) + vty_out (vty, " neighbor %s timers connect %d%s", addr, + peer->connect, VTY_NEWLINE); + + /* Default weight. */ + if (CHECK_FLAG (peer->config, PEER_CONFIG_WEIGHT)) + if (! peer_group_active (peer) || + g_peer->weight != peer->weight) + vty_out (vty, " neighbor %s weight %d%s", addr, peer->weight, + VTY_NEWLINE); + + /* Dynamic capability. */ + if (CHECK_FLAG (peer->flags, PEER_FLAG_DYNAMIC_CAPABILITY)) + if (! peer_group_active (peer) || + ! CHECK_FLAG (g_peer->flags, PEER_FLAG_DYNAMIC_CAPABILITY)) + vty_out (vty, " neighbor %s capability dynamic%s", addr, + VTY_NEWLINE); + + /* dont capability negotiation. */ + if (CHECK_FLAG (peer->flags, PEER_FLAG_DONT_CAPABILITY)) + if (! peer_group_active (peer) || + ! CHECK_FLAG (g_peer->flags, PEER_FLAG_DONT_CAPABILITY)) + vty_out (vty, " neighbor %s dont-capability-negotiate%s", addr, + VTY_NEWLINE); + + /* override capability negotiation. */ + if (CHECK_FLAG (peer->flags, PEER_FLAG_OVERRIDE_CAPABILITY)) + if (! peer_group_active (peer) || + ! CHECK_FLAG (g_peer->flags, PEER_FLAG_OVERRIDE_CAPABILITY)) + vty_out (vty, " neighbor %s override-capability%s", addr, + VTY_NEWLINE); + + /* strict capability negotiation. */ + if (CHECK_FLAG (peer->flags, PEER_FLAG_STRICT_CAP_MATCH)) + if (! peer_group_active (peer) || + ! CHECK_FLAG (g_peer->flags, PEER_FLAG_STRICT_CAP_MATCH)) + vty_out (vty, " neighbor %s strict-capability-match%s", addr, + VTY_NEWLINE); + + if (! peer_group_active (peer)) + { + if (bgp_flag_check (bgp, BGP_FLAG_NO_DEFAULT_IPV4)) + { + if (peer->afc[AFI_IP][SAFI_UNICAST]) + vty_out (vty, " neighbor %s activate%s", addr, VTY_NEWLINE); + } + else + { + if (! peer->afc[AFI_IP][SAFI_UNICAST]) + vty_out (vty, " no neighbor %s activate%s", addr, VTY_NEWLINE); + } + } + } + + + /************************************ + ****** Per AF to the neighbor ****** + ************************************/ + + if (! (afi == AFI_IP && safi == SAFI_UNICAST)) + { + if (peer->af_group[afi][safi]) + vty_out (vty, " neighbor %s peer-group %s%s", addr, + peer->group->name, VTY_NEWLINE); + else + vty_out (vty, " neighbor %s activate%s", addr, VTY_NEWLINE); + } + + /* ORF capability. */ + if (CHECK_FLAG (peer->af_flags[afi][safi], PEER_FLAG_ORF_PREFIX_SM) + || CHECK_FLAG (peer->af_flags[afi][safi], PEER_FLAG_ORF_PREFIX_RM)) + if (! peer->af_group[afi][safi]) + { + vty_out (vty, " neighbor %s capability orf prefix-list", addr); + + if (CHECK_FLAG (peer->af_flags[afi][safi], PEER_FLAG_ORF_PREFIX_SM) + && CHECK_FLAG (peer->af_flags[afi][safi], PEER_FLAG_ORF_PREFIX_RM)) + vty_out (vty, " both"); + else if (CHECK_FLAG (peer->af_flags[afi][safi], PEER_FLAG_ORF_PREFIX_SM)) + vty_out (vty, " send"); + else + vty_out (vty, " receive"); + vty_out (vty, "%s", VTY_NEWLINE); + } + + /* Route reflector client. */ + if (peer_af_flag_check (peer, afi, safi, PEER_FLAG_REFLECTOR_CLIENT) + && ! peer->af_group[afi][safi]) + vty_out (vty, " neighbor %s route-reflector-client%s", addr, + VTY_NEWLINE); + + /* Nexthop self. */ + if (peer_af_flag_check (peer, afi, safi, PEER_FLAG_NEXTHOP_SELF) + && ! peer->af_group[afi][safi]) + vty_out (vty, " neighbor %s next-hop-self%s", addr, VTY_NEWLINE); + + /* Remove private AS. */ + if (peer_af_flag_check (peer, afi, safi, PEER_FLAG_REMOVE_PRIVATE_AS) + && ! peer->af_group[afi][safi]) + vty_out (vty, " neighbor %s remove-private-AS%s", + addr, VTY_NEWLINE); + + /* send-community print. */ + if (! peer->af_group[afi][safi]) + { + if (bgp_option_check (BGP_OPT_CONFIG_CISCO)) + { + if (peer_af_flag_check (peer, afi, safi, PEER_FLAG_SEND_COMMUNITY) + && peer_af_flag_check (peer, afi, safi, PEER_FLAG_SEND_EXT_COMMUNITY)) + vty_out (vty, " neighbor %s send-community both%s", addr, VTY_NEWLINE); + else if (peer_af_flag_check (peer, afi, safi, PEER_FLAG_SEND_EXT_COMMUNITY)) + vty_out (vty, " neighbor %s send-community extended%s", + addr, VTY_NEWLINE); + else if (peer_af_flag_check (peer, afi, safi, PEER_FLAG_SEND_COMMUNITY)) + vty_out (vty, " neighbor %s send-community%s", addr, VTY_NEWLINE); + } + else + { + if (! peer_af_flag_check (peer, afi, safi, PEER_FLAG_SEND_COMMUNITY) + && ! peer_af_flag_check (peer, afi, safi, PEER_FLAG_SEND_EXT_COMMUNITY)) + vty_out (vty, " no neighbor %s send-community both%s", + addr, VTY_NEWLINE); + else if (! peer_af_flag_check (peer, afi, safi, PEER_FLAG_SEND_EXT_COMMUNITY)) + vty_out (vty, " no neighbor %s send-community extended%s", + addr, VTY_NEWLINE); + else if (! peer_af_flag_check (peer, afi, safi, PEER_FLAG_SEND_COMMUNITY)) + vty_out (vty, " no neighbor %s send-community%s", + addr, VTY_NEWLINE); + } + } + + /* Default information */ + if (peer_af_flag_check (peer, afi, safi, PEER_FLAG_DEFAULT_ORIGINATE) + && ! peer->af_group[afi][safi]) + { + vty_out (vty, " neighbor %s default-originate", addr); + if (peer->default_rmap[afi][safi].name) + vty_out (vty, " route-map %s", peer->default_rmap[afi][safi].name); + vty_out (vty, "%s", VTY_NEWLINE); + } + + /* Soft reconfiguration inbound. */ + if (CHECK_FLAG (peer->af_flags[afi][safi], PEER_FLAG_SOFT_RECONFIG)) + if (! peer->af_group[afi][safi] || + ! CHECK_FLAG (g_peer->af_flags[afi][safi], PEER_FLAG_SOFT_RECONFIG)) + vty_out (vty, " neighbor %s soft-reconfiguration inbound%s", addr, + VTY_NEWLINE); + + /* maximum-prefix. */ + if (CHECK_FLAG (peer->af_flags[afi][safi], PEER_FLAG_MAX_PREFIX)) + if (! peer->af_group[afi][safi] + || g_peer->pmax[afi][safi] != peer->pmax[afi][safi] + || g_peer->pmax_threshold[afi][safi] != peer->pmax_threshold[afi][safi] + || CHECK_FLAG (g_peer->af_flags[afi][safi], PEER_FLAG_MAX_PREFIX_WARNING) + != CHECK_FLAG (peer->af_flags[afi][safi], PEER_FLAG_MAX_PREFIX_WARNING)) + { + vty_out (vty, " neighbor %s maximum-prefix %ld", addr, peer->pmax[afi][safi]); + if (peer->pmax_threshold[afi][safi] != MAXIMUM_PREFIX_THRESHOLD_DEFAULT) + vty_out (vty, " %d", peer->pmax_threshold[afi][safi]); + if (CHECK_FLAG (peer->af_flags[afi][safi], PEER_FLAG_MAX_PREFIX_WARNING)) + vty_out (vty, " warning-only"); + if (peer->pmax_restart[afi][safi]) + vty_out (vty, " restart %d", peer->pmax_restart[afi][safi]); + vty_out (vty, "%s", VTY_NEWLINE); + } + + /* Route server client. */ + if (CHECK_FLAG (peer->af_flags[afi][safi], PEER_FLAG_RSERVER_CLIENT) + && ! peer->af_group[afi][safi]) + vty_out (vty, " neighbor %s route-server-client%s", addr, VTY_NEWLINE); + + /* Allow AS in. */ + if (peer_af_flag_check (peer, afi, safi, PEER_FLAG_ALLOWAS_IN)) + if (! peer_group_active (peer) + || ! peer_af_flag_check (g_peer, afi, safi, PEER_FLAG_ALLOWAS_IN) + || peer->allowas_in[afi][safi] != g_peer->allowas_in[afi][safi]) + { + if (peer->allowas_in[afi][safi] == 3) + vty_out (vty, " neighbor %s allowas-in%s", addr, VTY_NEWLINE); + else + vty_out (vty, " neighbor %s allowas-in %d%s", addr, + peer->allowas_in[afi][safi], VTY_NEWLINE); + } + + /* Filter. */ + bgp_config_write_filter (vty, peer, afi, safi); + + /* atribute-unchanged. */ + if ((CHECK_FLAG (peer->af_flags[afi][safi], PEER_FLAG_AS_PATH_UNCHANGED) + || CHECK_FLAG (peer->af_flags[afi][safi], PEER_FLAG_NEXTHOP_UNCHANGED) + || CHECK_FLAG (peer->af_flags[afi][safi], PEER_FLAG_MED_UNCHANGED)) + && ! peer->af_group[afi][safi]) + { + if (CHECK_FLAG (peer->af_flags[afi][safi], PEER_FLAG_AS_PATH_UNCHANGED) + && CHECK_FLAG (peer->af_flags[afi][safi], PEER_FLAG_NEXTHOP_UNCHANGED) + && CHECK_FLAG (peer->af_flags[afi][safi], PEER_FLAG_MED_UNCHANGED)) + vty_out (vty, " neighbor %s attribute-unchanged%s", addr, VTY_NEWLINE); + else + vty_out (vty, " neighbor %s attribute-unchanged%s%s%s%s", addr, + (CHECK_FLAG (peer->af_flags[afi][safi], PEER_FLAG_AS_PATH_UNCHANGED)) ? + " as-path" : "", + (CHECK_FLAG (peer->af_flags[afi][safi], PEER_FLAG_NEXTHOP_UNCHANGED)) ? + " next-hop" : "", + (CHECK_FLAG (peer->af_flags[afi][safi], PEER_FLAG_MED_UNCHANGED)) ? + " med" : "", VTY_NEWLINE); + } +} + +/* Display "address-family" configuration header. */ +void +bgp_config_write_family_header (struct vty *vty, afi_t afi, safi_t safi, + int *write) +{ + if (*write) + return; + + if (afi == AFI_IP && safi == SAFI_UNICAST) + return; + + vty_out (vty, "!%s address-family ", VTY_NEWLINE); + + if (afi == AFI_IP) + { + if (safi == SAFI_MULTICAST) + vty_out (vty, "ipv4 multicast"); + else if (safi == SAFI_MPLS_VPN) + vty_out (vty, "vpnv4 unicast"); + } + else if (afi == AFI_IP6) + { + vty_out (vty, "ipv6"); + + if (safi == SAFI_MULTICAST) + vty_out (vty, " multicast"); + } + + vty_out (vty, "%s", VTY_NEWLINE); + + *write = 1; +} + +/* Address family based peer configuration display. */ +static int +bgp_config_write_family (struct vty *vty, struct bgp *bgp, afi_t afi, + safi_t safi) +{ + int write = 0; + struct peer *peer; + struct peer_group *group; + struct listnode *node, *nnode; + + bgp_config_write_network (vty, bgp, afi, safi, &write); + + bgp_config_write_redistribute (vty, bgp, afi, safi, &write); + + for (ALL_LIST_ELEMENTS (bgp->group, node, nnode, group)) + { + if (group->conf->afc[afi][safi]) + { + bgp_config_write_family_header (vty, afi, safi, &write); + bgp_config_write_peer (vty, bgp, group->conf, afi, safi); + } + } + for (ALL_LIST_ELEMENTS (bgp->peer, node, nnode, peer)) + { + if (peer->afc[afi][safi]) + { + if (! CHECK_FLAG (peer->sflags, PEER_STATUS_ACCEPT_PEER)) + { + bgp_config_write_family_header (vty, afi, safi, &write); + bgp_config_write_peer (vty, bgp, peer, afi, safi); + } + } + } + if (write) + vty_out (vty, " exit-address-family%s", VTY_NEWLINE); + + return write; +} + +int +bgp_config_write (struct vty *vty) +{ + int write = 0; + struct bgp *bgp; + struct peer_group *group; + struct peer *peer; + struct listnode *node, *nnode; + struct listnode *mnode, *mnnode; + + /* BGP Multiple instance. */ + if (bgp_option_check (BGP_OPT_MULTIPLE_INSTANCE)) + { + vty_out (vty, "bgp multiple-instance%s", VTY_NEWLINE); + write++; + } + + /* BGP Config type. */ + if (bgp_option_check (BGP_OPT_CONFIG_CISCO)) + { + vty_out (vty, "bgp config-type cisco%s", VTY_NEWLINE); + write++; + } + + /* BGP configuration. */ + for (ALL_LIST_ELEMENTS (bm->bgp, mnode, mnnode, bgp)) + { + if (write) + vty_out (vty, "!%s", VTY_NEWLINE); + + /* Router bgp ASN */ + vty_out (vty, "router bgp %u", bgp->as); + + if (bgp_option_check (BGP_OPT_MULTIPLE_INSTANCE)) + { + if (bgp->name) + vty_out (vty, " view %s", bgp->name); + } + vty_out (vty, "%s", VTY_NEWLINE); + + /* No Synchronization */ + if (bgp_option_check (BGP_OPT_CONFIG_CISCO)) + vty_out (vty, " no synchronization%s", VTY_NEWLINE); + + /* BGP fast-external-failover. */ + if (CHECK_FLAG (bgp->flags, BGP_FLAG_NO_FAST_EXT_FAILOVER)) + vty_out (vty, " no bgp fast-external-failover%s", VTY_NEWLINE); + + /* BGP router ID. */ + if (CHECK_FLAG (bgp->config, BGP_CONFIG_ROUTER_ID)) + vty_out (vty, " bgp router-id %s%s", safe_inet_ntoa (bgp->router_id), + VTY_NEWLINE); + + /* BGP log-neighbor-changes. */ + if (bgp_flag_check (bgp, BGP_FLAG_LOG_NEIGHBOR_CHANGES)) + vty_out (vty, " bgp log-neighbor-changes%s", VTY_NEWLINE); + + /* BGP configuration. */ + if (bgp_flag_check (bgp, BGP_FLAG_ALWAYS_COMPARE_MED)) + vty_out (vty, " bgp always-compare-med%s", VTY_NEWLINE); + + /* BGP default ipv4-unicast. */ + if (bgp_flag_check (bgp, BGP_FLAG_NO_DEFAULT_IPV4)) + vty_out (vty, " no bgp default ipv4-unicast%s", VTY_NEWLINE); + + /* BGP default local-preference. */ + if (bgp->default_local_pref != BGP_DEFAULT_LOCAL_PREF) + vty_out (vty, " bgp default local-preference %d%s", + bgp->default_local_pref, VTY_NEWLINE); + + /* BGP client-to-client reflection. */ + if (bgp_flag_check (bgp, BGP_FLAG_NO_CLIENT_TO_CLIENT)) + vty_out (vty, " no bgp client-to-client reflection%s", VTY_NEWLINE); + + /* BGP cluster ID. */ + if (CHECK_FLAG (bgp->config, BGP_CONFIG_CLUSTER_ID)) + vty_out (vty, " bgp cluster-id %s%s", safe_inet_ntoa (bgp->cluster_id), + VTY_NEWLINE); + + /* Confederation identifier*/ + if (CHECK_FLAG (bgp->config, BGP_CONFIG_CONFEDERATION)) + vty_out (vty, " bgp confederation identifier %i%s", bgp->confed_id, + VTY_NEWLINE); + + /* Confederation peer */ + if (bgp->confed_peers_cnt > 0) + { + int i; + + vty_out (vty, " bgp confederation peers"); + + for (i = 0; i < bgp->confed_peers_cnt; i++) + vty_out(vty, " %u", bgp->confed_peers[i]); + + vty_out (vty, "%s", VTY_NEWLINE); + } + + /* BGP enforce-first-as. */ + if (bgp_flag_check (bgp, BGP_FLAG_ENFORCE_FIRST_AS)) + vty_out (vty, " bgp enforce-first-as%s", VTY_NEWLINE); + + /* BGP deterministic-med. */ + if (bgp_flag_check (bgp, BGP_FLAG_DETERMINISTIC_MED)) + vty_out (vty, " bgp deterministic-med%s", VTY_NEWLINE); + + /* BGP graceful-restart. */ + if (bgp->stalepath_time != BGP_DEFAULT_STALEPATH_TIME) + vty_out (vty, " bgp graceful-restart stalepath-time %d%s", + bgp->stalepath_time, VTY_NEWLINE); + if (bgp_flag_check (bgp, BGP_FLAG_GRACEFUL_RESTART)) + vty_out (vty, " bgp graceful-restart%s", VTY_NEWLINE); + + /* BGP bestpath method. */ + if (bgp_flag_check (bgp, BGP_FLAG_ASPATH_IGNORE)) + vty_out (vty, " bgp bestpath as-path ignore%s", VTY_NEWLINE); + if (bgp_flag_check (bgp, BGP_FLAG_ASPATH_CONFED)) + vty_out (vty, " bgp bestpath as-path confed%s", VTY_NEWLINE); + if (bgp_flag_check (bgp, BGP_FLAG_COMPARE_ROUTER_ID)) + vty_out (vty, " bgp bestpath compare-routerid%s", VTY_NEWLINE); + if (bgp_flag_check (bgp, BGP_FLAG_MED_CONFED) + || bgp_flag_check (bgp, BGP_FLAG_MED_MISSING_AS_WORST)) + { + vty_out (vty, " bgp bestpath med"); + if (bgp_flag_check (bgp, BGP_FLAG_MED_CONFED)) + vty_out (vty, " confed"); + if (bgp_flag_check (bgp, BGP_FLAG_MED_MISSING_AS_WORST)) + vty_out (vty, " missing-as-worst"); + vty_out (vty, "%s", VTY_NEWLINE); + } + + /* BGP network import check. */ + if (bgp_flag_check (bgp, BGP_FLAG_IMPORT_CHECK)) + vty_out (vty, " bgp network import-check%s", VTY_NEWLINE); + + /* BGP scan interval. */ + bgp_config_write_scan_time (vty); + + /* BGP flag dampening. */ + if (CHECK_FLAG (bgp->af_flags[AFI_IP][SAFI_UNICAST], + BGP_CONFIG_DAMPENING)) + bgp_config_write_damp (vty); + + /* BGP static route configuration. */ + bgp_config_write_network (vty, bgp, AFI_IP, SAFI_UNICAST, &write); + + /* BGP redistribute configuration. */ + bgp_config_write_redistribute (vty, bgp, AFI_IP, SAFI_UNICAST, &write); + + /* BGP timers configuration. */ + if (bgp->default_keepalive != BGP_DEFAULT_KEEPALIVE + && bgp->default_holdtime != BGP_DEFAULT_HOLDTIME) + vty_out (vty, " timers bgp %d %d%s", bgp->default_keepalive, + bgp->default_holdtime, VTY_NEWLINE); + + /* peer-group */ + for (ALL_LIST_ELEMENTS (bgp->group, node, nnode, group)) + { + bgp_config_write_peer (vty, bgp, group->conf, AFI_IP, SAFI_UNICAST); + } + + /* Normal neighbor configuration. */ + for (ALL_LIST_ELEMENTS (bgp->peer, node, nnode, peer)) + { + if (! CHECK_FLAG (peer->sflags, PEER_STATUS_ACCEPT_PEER)) + bgp_config_write_peer (vty, bgp, peer, AFI_IP, SAFI_UNICAST); + } + + /* Distance configuration. */ + bgp_config_write_distance (vty, bgp); + + /* No auto-summary */ + if (bgp_option_check (BGP_OPT_CONFIG_CISCO)) + vty_out (vty, " no auto-summary%s", VTY_NEWLINE); + + /* IPv4 multicast configuration. */ + write += bgp_config_write_family (vty, bgp, AFI_IP, SAFI_MULTICAST); + + /* IPv4 VPN configuration. */ + write += bgp_config_write_family (vty, bgp, AFI_IP, SAFI_MPLS_VPN); + + /* IPv6 unicast configuration. */ + write += bgp_config_write_family (vty, bgp, AFI_IP6, SAFI_UNICAST); + + /* IPv6 multicast configuration. */ + write += bgp_config_write_family (vty, bgp, AFI_IP6, SAFI_MULTICAST); + + write++; + } + return write; +} + +void +bgp_master_init (void) +{ + memset (&bgp_master, 0, sizeof (struct bgp_master)); + + bm = &bgp_master; + bm->bgp = list_new (); + bm->listen_sockets = list_new (); + bm->port = BGP_PORT_DEFAULT; + bm->master = thread_master_create (); + bm->start_time = time (NULL); +} + + +void +bgp_init (void) +{ + /* peer index */ + bgp_peer_index_init(NULL); + + /* BGP VTY commands installation. */ + bgp_vty_init (); + + /* Init zebra. */ + bgp_zebra_init (); + + /* BGP inits. */ + bgp_attr_init (); + bgp_debug_init (); + bgp_dump_init (); + bgp_route_init (); + bgp_route_map_init (); + bgp_scan_init (); + bgp_mplsvpn_init (); + + /* Access list initialize. */ + access_list_init (); + access_list_add_hook (peer_distribute_update); + access_list_delete_hook (peer_distribute_update); + + /* Filter list initialize. */ + bgp_filter_init (); + as_list_add_hook (peer_aslist_update); + as_list_delete_hook (peer_aslist_update); + + /* Prefix list initialize.*/ + prefix_list_init (); + prefix_list_add_hook (peer_prefix_list_update); + prefix_list_delete_hook (peer_prefix_list_update); + + /* Community list initialize. */ + bgp_clist = community_list_init (); + +#ifdef HAVE_SNMP + bgp_snmp_init (); +#endif /* HAVE_SNMP */ +} + +void +bgp_terminate (int terminating, int retain_mode) +{ + struct bgp *bgp; + struct peer *peer; + struct listnode *node, *nnode; + struct listnode *mnode, *mnnode; + + program_terminating = terminating; + + /* Disable all peers */ + for (ALL_LIST_ELEMENTS (bm->bgp, mnode, mnnode, bgp)) + for (ALL_LIST_ELEMENTS (bgp->peer, node, nnode, peer)) + { +fprintf(stderr, ">>> %s:", peer->host) ; + if (retain_mode) + bgp_peer_disable(peer, NULL); + else if (terminating) + peer_flag_set(peer, PEER_FLAG_SHUTDOWN); + else + bgp_notify_send(peer, BGP_NOTIFY_CEASE, + BGP_NOTIFY_CEASE_ADMIN_RESET); +fprintf(stderr, "<<<\n") ; + } + + if (!retain_mode) + { + bgp_cleanup_routes (); + + if (bm->process_main_queue) + { + work_queue_free (bm->process_main_queue); + bm->process_main_queue = NULL; + } + if (bm->process_rsclient_queue) + { + work_queue_free (bm->process_rsclient_queue); + bm->process_rsclient_queue = NULL; + } + } + + /* if no sessions were enabled then need to check here */ + program_terminate_if_all_disabled(); +} + +/* If we are terminating the program, and all sessions are disabled + * then terminate all threads + */ +void +program_terminate_if_all_disabled(void) +{ + struct bgp *bgp; + struct peer *peer; + struct listnode *node, *nnode; + struct listnode *mnode, *mnnode; + + if (!program_terminating) + return; + + /* are there any active sessions remaining? */ + for (ALL_LIST_ELEMENTS (bm->bgp, mnode, mnnode, bgp)) + for (ALL_LIST_ELEMENTS (bgp->peer, node, nnode, peer)) + if (bgp_session_is_active(peer->session)) + return; + + /* ask remaining pthreads to die */ + if (qpthreads_enabled && routing_nexus != NULL) + qpn_terminate(routing_nexus); + + if (qpthreads_enabled && bgp_nexus != NULL) + qpn_terminate(bgp_nexus); + + if (cli_nexus != NULL) + qpn_terminate(cli_nexus); +} + diff --git a/lib/mqueue.h b/lib/mqueue.h index d8790246..355aec23 100644 --- a/lib/mqueue.h +++ b/lib/mqueue.h @@ -93,16 +93,16 @@ typedef void mqueue_action(mqueue_block mqb, mqb_flag_t flag) ; enum { mqb_args_size_max = 64 } ; /* maximum size of struct args */ enum { mqb_argv_size_unit = 16 } ; /* allocate argv in these units */ -struct args +struct mqb_args { - char data[mqb_args_size_max] ; /* empty space */ + char bytes[mqb_args_size_max] ; /* empty space */ } ; #define MQB_ARGS_SIZE_OK(s) CONFIRM(sizeof(struct s) <= mqb_args_size_max) struct mqueue_block { - struct args args ; /* user structure */ + struct mqb_args args ; /* user structure */ mqueue_block next ; /* single linked list */ @@ -116,8 +116,12 @@ struct mqueue_block mqb_index_t argv_next ; /* iterator */ } ; -/* mqueue_block structures are malloced. That guarantees maximum alignment. */ -/* To guarantee maximum alignment for "struct args", it must be first item ! */ +/* mqueue_block structures are malloced. That guarantees maximum alignment. + * To guarantee maximum alignment for "struct args", it must be first item ! + * + * (The typedef is required to stop Eclipse (3.4.2 with CDT 5.0) whining + * about first argument of offsetof().) + */ typedef struct mqueue_block mqueue_block_t ; CONFIRM(offsetof(mqueue_block_t, args) == 0) ; diff --git a/lib/qpnexus.c b/lib/qpnexus.c index 8a78a70b..cb0bd12c 100644 --- a/lib/qpnexus.c +++ b/lib/qpnexus.c @@ -36,7 +36,13 @@ static void qpn_in_thread_init(qpn_nexus qpn); */ -/* Initialise a nexus -- allocating it if required. +/*============================================================================== + * Initialisation, add hook, free etc. + * + */ + +/*------------------------------------------------------------------------------ + * Initialise a nexus -- allocating it if required. * * If main_thread is set then no new thread will be created * when qpn_exec() is called, instead the finite state machine will be @@ -45,7 +51,7 @@ static void qpn_in_thread_init(qpn_nexus qpn); * * Returns the qpn_nexus. */ -qpn_nexus +extern qpn_nexus qpn_init_new(qpn_nexus qpn, int main_thread) { if (qpn == NULL) @@ -53,16 +59,27 @@ qpn_init_new(qpn_nexus qpn, int main_thread) else memset(qpn, 0, sizeof(struct qpn_nexus)) ; - qpn->selection = qps_selection_init_new(qpn->selection); - qpn->pile = qtimer_pile_init_new(qpn->pile); - qpn->queue = mqueue_init_new(qpn->queue, mqt_signal_unicast); + qpn->selection = qps_selection_init_new(qpn->selection); + qpn->pile = qtimer_pile_init_new(qpn->pile); + qpn->queue = mqueue_init_new(qpn->queue, mqt_signal_unicast); qpn->main_thread = main_thread; - qpn->start = qpn_start; + qpn->start = qpn_start; return qpn; } -/* free timers, selection, message queue and nexus +/*------------------------------------------------------------------------------ + * Add a hook function to the given nexus. + */ +extern void +qpn_add_hook_function(qpn_hook_list list, void* hook) +{ + passert(list->count < qpn_hooks_max) ; + list->hooks[list->count++] = hook ; +} ; + +/*------------------------------------------------------------------------------ + * free timers, selection, message queue and nexus * return NULL */ qpn_nexus @@ -99,24 +116,25 @@ qpn_free(qpn_nexus qpn) return NULL; } -/* If not main thread create new qpthread. - * Execute the state machine */ -void +/*============================================================================== + * Execution of a nexus + */ + +/*------------------------------------------------------------------------------ + * If not main qpthread create new qpthread. + * + * For all qpthreads: start the thread ! + */ +extern void qpn_exec(qpn_nexus qpn) { if (qpn->main_thread) - { - /* Run the state machine in calling thread */ - qpn->start(qpn); - } + qpn->start(qpn); else - { - /* create a qpthread and run the state machine in it */ - qpt_thread_create(qpn->start, qpn, NULL) ; - } -} + qpt_thread_create(qpn->start, qpn, NULL) ; +} ; -/*============================================================================== +/*------------------------------------------------------------------------------ * Pthread routine * * Processes: @@ -145,78 +163,86 @@ qpn_start(void* arg) qpn_nexus qpn = arg; mqueue_block mqb; int actions; - qtime_mono_t now; - qtime_mono_t max_wait; - int i; + qtime_mono_t now ; + qtime_t max_wait ; + unsigned i; + unsigned done ; + unsigned wait ; - /* now in our thread, complete initialisation */ + /* now in our thread, complete initialisation */ qpn_in_thread_init(qpn); + /* Until required to terminate, loop */ + done = 1 ; while (!qpn->terminate) { - now = qt_get_monotonic(); + wait = (done == 0) ; /* may wait this time only if nothing + found to do on the last pass */ - /* Signals are highest priority. - * only execute on the main thread */ + /* Signals are highest priority -- only execute for main thread + * + * Restarts "done" for this pass. + */ if (qpn->main_thread) - quagga_sigevent_process (); + done = quagga_sigevent_process() ; + else + done = 0 ; - /* max time to wait in pselect */ - max_wait = QTIME(MAX_PSELECT_TIMOUT); - - /* event hooks, if any. High priority */ - for (i = 0; i < NUM_EVENT_HOOK; ++i) - { - if (qpn->event_hook[i] != NULL) - { - /* first, second and third priority */ - qtime_mono_t event_wait = qpn->event_hook[i](qpn_pri_third); - if (event_wait > 0 && event_wait < max_wait) - max_wait = event_wait; - } - } + /* Foreground hooks, if any. */ + for (i = 0; i < qpn->foreground.count ; ++i) + done |= ((qpn_hook_function*)(qpn->foreground.hooks[i]))() ; /* drain the message queue, will be in waiting for signal state * when it's empty */ - for (;;) + + if (done != 0) + wait = 0 ; /* turn off wait if found something */ + + while (1) { - mqb = mqueue_dequeue(qpn->queue, 1, qpn->mts) ; + mqb = mqueue_dequeue(qpn->queue, wait, qpn->mts) ; if (mqb == NULL) break; mqb_dispatch(mqb, mqb_action); - } - /* Event hooks, if any. All priorities */ - for (i = 0; i < NUM_EVENT_HOOK; ++i) - { - if (qpn->event_hook[i] != NULL) - { - /* first, second third and fourth priority */ - qtime_mono_t event_wait = qpn->event_hook[i](qpn_pri_fourth); - if (event_wait > 0 && event_wait < max_wait) - max_wait = event_wait; - } - } - - /* block for some input, output, signal or timeout */ - actions = qps_pselect(qpn->selection, - qtimer_pile_top_time(qpn->pile, now + max_wait)); - - /* process I/O actions */ - while (actions) - actions = qps_dispatch_next(qpn->selection) ; + done = 1 ; /* done something */ + wait = 0 ; /* turn off wait */ + } ; - mqueue_done_waiting(qpn->queue, qpn->mts); + /* block for some input, output, signal or timeout + * + * wait will be true iff did nothing the last time round the loop, and + * not found anything to be done up to this point either. + */ + if (wait) + max_wait = qtimer_pile_top_wait(qpn->pile, QTIME(MAX_PSELECT_WAIT)) ; + else + max_wait = 0 ; + + actions = qps_pselect(qpn->selection, max_wait) ; + done |= actions ; + + if (wait) + mqueue_done_waiting(qpn->queue, qpn->mts); + + /* process I/O actions */ + while (actions) + actions = qps_dispatch_next(qpn->selection) ; - /* process timers */ + /* process timers */ + now = qt_get_monotonic() ; while (qtimer_pile_dispatch_next(qpn->pile, now)) - { - } - } + done = 1 ; + + /* If nothing done in this pass, see if anything in the background */ + if (done == 0) + for (i = 0; i < qpn->background.count ; ++i) + done |= ((qpn_hook_function*)(qpn->background.hooks[i]))() ; + } ; /* last bit of code to run in this thread */ - if (qpn->in_thread_final) + if (qpn->in_thread_final != NULL) qpn->in_thread_final(); return NULL; diff --git a/lib/qpnexus.h b/lib/qpnexus.h index a6cad148..d5b7c5a6 100644 --- a/lib/qpnexus.h +++ b/lib/qpnexus.h @@ -48,31 +48,27 @@ */ /* maximum time in seconds to sit in a pselect */ -#define MAX_PSELECT_TIMOUT 10 +#define MAX_PSELECT_WAIT 10 /* signal for message queues */ #define SIGMQUEUE SIGUSR2 /* number of event hooks */ -#define NUM_EVENT_HOOK 2 - -/* Work priorities */ -enum qpn_priority -{ - qpn_pri_highest = 1, - - qpn_pri_first = 1, - qpn_pri_second = 2, - qpn_pri_third = 3, - qpn_pri_fourth = 4, - - qpn_pri_lowest = 4, -}; +enum { qpn_hooks_max = 4 } ; /*============================================================================== * Data Structures. */ +typedef int qpn_hook_function(void) ; + +typedef struct qpn_hook_list* qpn_hook_list ; +struct qpn_hook_list +{ + void* hooks[qpn_hooks_max] ; + unsigned count ; +} ; + typedef struct qpn_nexus* qpn_nexus ; struct qpn_nexus @@ -99,30 +95,45 @@ struct qpn_nexus /* qpthread routine, can override */ void* (*start)(void*); - /* in-thread initialize, can override. Called within the thread - * after all other initializion just before thread loop */ + /* in-thread initialise, can override. Called within the thread + * after all other initialisation just before thread loop */ void (*in_thread_init)(void); - /* in-thread finalize, can override. Called within thread + /* in-thread finalise, can override. Called within thread * just before thread dies. Nexus components all exist but * thread loop is no longer executed */ void (*in_thread_final)(void); - /* thread loop events, can override. Called before and after message queue, - * and before I/O and timers. - * Hook should perform all work <= given priority. - * Returns the time to try again, 0 means default to maximum. + /* in-thread queue(s) of events or other work. + * + * The hook function(s) are called in the qpnexus loop, at the top of the + * loop. So in addition to the mqueue, I/O, timers and any background stuff, + * the thread may have other queue(s) of things to be done. + * + * Hook function can process some queue(s) of things to be done. It does not + * have to empty its queues, but it MUST only return 0 if all queues are now + * empty. */ - qtime_mono_t (*event_hook[NUM_EVENT_HOOK])(enum qpn_priority); - + struct qpn_hook_list foreground ; + + /* in-thread background queue(s) of events or other work. + * + * The hook functions are called at the bottom of the qpnexus loop, but only + * when there is absolutely nothing else to do. + * + * The hook function should do some unit of background work (if there is any) + * and return. MUST return 0 iff there is no more work to do. + */ + struct qpn_hook_list background ; }; /*============================================================================== * Functions */ -extern qpn_nexus qpn_init_new(qpn_nexus qtn, int main_thread); -extern void qpn_exec(qpn_nexus qtn); +extern qpn_nexus qpn_init_new(qpn_nexus qpn, int main_thread); +extern void qpn_add_hook_function(qpn_hook_list list, void* hook) ; +extern void qpn_exec(qpn_nexus qpn); extern void qpn_terminate(qpn_nexus qpn); extern qpn_nexus qpn_free(qpn_nexus qpn); diff --git a/lib/qpselect.c b/lib/qpselect.c index 7df59752..d3f8e5ad 100644 --- a/lib/qpselect.c +++ b/lib/qpselect.c @@ -270,13 +270,8 @@ qps_set_signal(qps_selection qps, int signum, sigset_t sigmask) } ; } ; -/* Execute a pselect for the given selection -- subject to the given timeout - * *time*. - * - * The time-out time is an "absolute" time, as measured by qt_get_monotonic(). - * - * A timeout time <= the current qt_get_monotonic() is treated as a zero - * timeout period, and will return immediately from the pselect. +/* Execute a pselect for the given selection -- subject to the given maximum + * time to wait. * * There is no support for an infinite timeout. * @@ -289,7 +284,7 @@ qps_set_signal(qps_selection qps, int signum, sigset_t sigmask) * The qps_dispatch_next() processes the returns from pselect(). */ int -qps_pselect(qps_selection qps, qtime_mono_t timeout) +qps_pselect(qps_selection qps, qtime_t max_wait) { struct timespec ts ; qps_mnum_t mnum ; @@ -334,16 +329,15 @@ qps_pselect(qps_selection qps, qtime_mono_t timeout) qps->tried_fd_last = qps->fd_last ; qps->pend_fd = 0 ; - /* Convert timeout time to interval for pselect() */ - timeout -= qt_get_monotonic() ; - if (timeout < 0) - timeout = 0 ; + /* Make sure not trying to do something stupid */ + if (max_wait < 0) + max_wait = 0 ; /* Finally ready for the main event */ n = pselect(qps->fd_last + 1, p_fds[qps_read_mnum], p_fds[qps_write_mnum], p_fds[qps_error_mnum], - qtime2timespec(&ts, timeout), + qtime2timespec(&ts, max_wait), (qps->signum != 0) ? &qps->sigmask : NULL) ; /* If have something, set and return the pending count. */ diff --git a/lib/qtimers.c b/lib/qtimers.c index dcce24b9..0aef52a4 100644 --- a/lib/qtimers.c +++ b/lib/qtimers.c @@ -108,7 +108,9 @@ qtimer_pile_init_new(qtimer_pile qtp) * timers -- invalid heap -- need to properly initialise */ - /* Eclipse flags offsetof(struct qtimer, backlink) as a syntax error :-( */ + /* (The typedef is required to stop Eclipse (3.4.2 with CDT 5.0) whining + * about first argument of offsetof().) + */ typedef struct qtimer qtimer_t ; heap_init_new_backlinked(&qtp->timers, 0, (heap_cmp*)qtimer_cmp, @@ -122,15 +124,18 @@ qtimer_pile_init_new(qtimer_pile qtp) * empty, or the top entry times out after the maximum time, then the maximum * is returned. */ -qtime_mono_t -qtimer_pile_top_time(qtimer_pile qtp, qtime_mono_t max_time) +qtime_t +qtimer_pile_top_wait(qtimer_pile qtp, qtime_t max_wait) { + qtime_t top_wait ; qtimer qtr = heap_top_item(&qtp->timers) ; - if ((qtr == NULL) || (qtr->time >= max_time)) - return max_time ; - else - return qtr->time ; + if (qtr == NULL) + return max_wait ; + + top_wait = qtr->time - qt_get_monotonic() ; + + return (top_wait < max_wait) ? top_wait : max_wait ; } ; /* Dispatch the next timer whose time is <= the given "upto" time. @@ -157,7 +162,6 @@ qtimer_pile_dispatch_next(qtimer_pile qtp, qtime_mono_t upto) qtr->state = qtr_state_unset_pending ; qtr->action(qtr, qtr->timer_info, upto) ; - assert(qtp == qtr->pile); if (qtr->state == qtr_state_unset_pending) qtimer_unset(qtr) ; @@ -372,7 +376,9 @@ qtimer_pile_verify(qtimer_pile qtp) vector_index e ; qtimer qtr ; - /* Eclipse flags offsetof(struct qtimer, backlink) as a syntax error :-( */ + /* (The typedef is required to stop Eclipse (3.4.2 with CDT 5.0) whining + * about first argument of offsetof().) + */ typedef struct qtimer qtimer_t ; assert(th->cmp == (heap_cmp*)qtimer_cmp) ; diff --git a/lib/qtimers.h b/lib/qtimers.h index 3d509acb..0bc3d7a1 100644 --- a/lib/qtimers.h +++ b/lib/qtimers.h @@ -85,8 +85,8 @@ qtimer_pile_init_new(qtimer_pile qtp) ; int qtimer_pile_dispatch_next(qtimer_pile qtp, qtime_mono_t upto) ; -qtime_mono_t -qtimer_pile_top_time(qtimer_pile qtp, qtime_mono_t max_time) ; +qtime_t +qtimer_pile_top_wait(qtimer_pile qtp, qtime_t max_wait) ; qtimer qtimer_pile_ream(qtimer_pile qtp, int free_structure) ; diff --git a/lib/sigevent.c b/lib/sigevent.c index 30e9a3d1..a3d4219c 100644 --- a/lib/sigevent.c +++ b/lib/sigevent.c @@ -16,7 +16,7 @@ * You should have received a copy of the GNU General Public License * along with Quagga; see the file COPYING. If not, write to the Free * Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA - * 02111-1307, USA. + * 02111-1307, USA. */ #include <zebra.h> @@ -41,13 +41,13 @@ struct quagga_sigevent_master_t { struct thread *t; - struct quagga_signal_t *signals; + struct quagga_signal_t *signals; int sigc; - + volatile sig_atomic_t caught; } sigmaster; -/* Generic signal handler +/* Generic signal handler * Schedules signal event thread */ static void @@ -55,24 +55,30 @@ quagga_signal_handler (int signo) { int i; struct quagga_signal_t *sig; - + for (i = 0; i < sigmaster.sigc; i++) { sig = &(sigmaster.signals[i]); - + if (sig->signal == signo) sig->caught = 1; } - + sigmaster.caught = 1; -} +} -/* check if signals have been caught and run appropriate handlers */ +/* check if signals have been caught and run appropriate handlers + * + * Returns: 0 => nothing to do + * -1 => failed + * > 0 => done this many signals + */ int quagga_sigevent_process (void) { struct quagga_signal_t *sig; int i; + int done ; #ifdef SIGEVENT_BLOCK_SIGNALS /* shouldnt need to block signals, but potentially may be needed */ sigset_t newmask, oldmask; @@ -85,7 +91,7 @@ quagga_sigevent_process (void) sigfillset (&newmask); sigdelset (&newmask, SIGTRAP); sigdelset (&newmask, SIGKILL); - + if ( (sigprocmask (SIG_BLOCK, &newmask, &oldmask)) < 0) { zlog_err ("quagga_signal_timer: couldnt block signals!"); @@ -93,13 +99,14 @@ quagga_sigevent_process (void) } #endif /* SIGEVENT_BLOCK_SIGNALS */ + done = 0 ; if (sigmaster.caught > 0) { sigmaster.caught = 0; /* must not read or set sigmaster.caught after here, * race condition with per-sig caught flags if one does */ - + for (i = 0; i < sigmaster.sigc; i++) { sig = &(sigmaster.signals[i]); @@ -108,6 +115,7 @@ quagga_sigevent_process (void) { sig->caught = 0; sig->handler (); + ++done ; } } } @@ -117,7 +125,7 @@ quagga_sigevent_process (void) return -1; #endif /* SIGEVENT_BLOCK_SIGNALS */ - return 0; + return done ; } #ifdef SIGEVENT_SCHEDULE_THREAD @@ -159,7 +167,7 @@ signal_set (int signo) } ret = sigaction (signo, &sig, &osig); - if (ret < 0) + if (ret < 0) return ret; else return 0; @@ -245,13 +253,13 @@ trap_default_signals(void) SIGUSR1, SIGUSR2, #ifdef SIGPOLL - SIGPOLL, + SIGPOLL, #endif #ifdef SIGVTALRM SIGVTALRM, #endif #ifdef SIGSTKFLT - SIGSTKFLT, + SIGSTKFLT, #endif }; static const int ignore_signals[] = { @@ -309,8 +317,8 @@ trap_default_signals(void) } } -void -signal_init (struct thread_master *m, int sigc, +void +signal_init (struct thread_master *m, int sigc, struct quagga_signal_t signals[]) { @@ -320,7 +328,7 @@ signal_init (struct thread_master *m, int sigc, /* First establish some default handlers that can be overridden by the application. */ trap_default_signals(); - + while (i < sigc) { sig = &signals[i]; @@ -332,9 +340,9 @@ signal_init (struct thread_master *m, int sigc, sigmaster.sigc = sigc; sigmaster.signals = signals; -#ifdef SIGEVENT_SCHEDULE_THREAD - sigmaster.t = - thread_add_timer (m, quagga_signal_timer, &sigmaster, +#ifdef SIGEVENT_SCHEDULE_THREAD + sigmaster.t = + thread_add_timer (m, quagga_signal_timer, &sigmaster, QUAGGA_SIGNAL_TIMER_INTERVAL); #endif /* SIGEVENT_SCHEDULE_THREAD */ } diff --git a/lib/stream.c b/lib/stream.c index 14c7c589..b4c16977 100644 --- a/lib/stream.c +++ b/lib/stream.c @@ -998,46 +998,6 @@ stream_flush (struct stream* s, int fd) } /*------------------------------------------------------------------------------ - * Try to write stream contents to the file descriptor -- assuming non-blocking. - * - * Loops if gets EINTR. - * - * If writes everything, resets the stream. - * - * If does not write everything, then would block. - * - * Returns: >= 0 number of bytes left to write - * -1 => some error (not including EINTR, EAGAIN or EWOULDBLOCK) - */ -int -stream_flush_try(struct stream* s, int fd) -{ - int have ; - int ret ; - - STREAM_VERIFY_SANE(s); - - while ((have = (s->endp - s->getp)) != 0) - { - ret = write(fd, s->data + s->getp, have) ; - if (ret > 0) - s->getp += ret ; - else if (ret < 0) - { - ret = errno ; - if ((ret == EAGAIN) || (ret == EWOULDBLOCK)) - return have ; - if (ret != EINTR) - return -1 ; - } ; - } ; - - s->getp = s->endp = 0; - - return 0 ; -} - -/*------------------------------------------------------------------------------ * Transfer contents of stream to given buffer and reset stream. * * Transfers *entire* stream buffer. @@ -1113,6 +1073,13 @@ stream_fifo_head (struct stream_fifo *fifo) } void +stream_fifo_reset (struct stream_fifo *fifo) +{ + fifo->head = fifo->tail = NULL; + fifo->count = 0; +} + +void stream_fifo_clean (struct stream_fifo *fifo) { struct stream *s; diff --git a/lib/stream.h b/lib/stream.h index 094cf0c6..e7303652 100644 --- a/lib/stream.h +++ b/lib/stream.h @@ -224,6 +224,7 @@ extern struct stream_fifo *stream_fifo_new (void); extern void stream_fifo_push (struct stream_fifo *fifo, struct stream *s); extern struct stream *stream_fifo_pop (struct stream_fifo *fifo); extern struct stream *stream_fifo_head (struct stream_fifo *fifo); +extern void stream_fifo_reset (struct stream_fifo *fifo); extern void stream_fifo_clean (struct stream_fifo *fifo); extern void stream_fifo_free (struct stream_fifo *fifo); diff --git a/lib/thread.c b/lib/thread.c index f2b873ac..3df9acf7 100644 --- a/lib/thread.c +++ b/lib/thread.c @@ -31,6 +31,7 @@ #include "command.h" #include "sigevent.h" #include "qpthreads.h" +#include "qtimers.h" /* Recent absolute time of day */ struct timeval recent_time; @@ -47,7 +48,12 @@ static qpt_mutex_t thread_mutex; #define UNLOCK qpt_mutex_unlock(&thread_mutex); static struct hash *cpu_record = NULL; -/* Struct timeval's tv_usec one second value. */ +/* Pointer to qtimer pile to be used, if any */ +static qtimer_pile use_qtimer_pile = NULL ; +static qtimer spare_qtimers = NULL ; +static unsigned used_standard_timer = 0 ; + +/* Struct timeval's tv_usec one second value. */ #define TIMER_SECOND_MICRO 1000000L /* Adjust so that tv_usec is in the range [0,TIMER_SECOND_MICRO). @@ -238,18 +244,51 @@ cpu_record_hash_cmp (const struct cpu_thread_history *a, static void * cpu_record_hash_alloc (struct cpu_thread_history *a) { - struct cpu_thread_history *new; + const char* b ; + const char* e ; + char* n ; + int l ; + struct cpu_thread_history *new ; + + /* Establish start and length of name, removing leading/trailing + * spaces and any enclosing (...) -- recursively. + */ + b = a->funcname ; + e = b + strlen(b) - 1 ; + + while (1) + { + while (*b == ' ') + ++b ; /* strip leading spaces */ + if (*b == '\0') + break ; /* quit if now empty */ + while (*e == ' ') + --e ; /* strip trailing spaces */ + if ((*b != '(') || (*e != ')')) + break ; /* quit if not now (...) */ + ++b ; + --e ; /* discard ( and ) */ + } ; + + l = (e + 1) - b ; /* length excluding trailing \0 */ + + n = XMALLOC(MTYPE_THREAD_FUNCNAME, l + 1) ; + memcpy(n, b, l) ; + n[l] = '\0' ; + + /* Allocate empty structure and set address and name */ new = XCALLOC (MTYPE_THREAD_STATS, sizeof (struct cpu_thread_history)); - new->func = a->func; - new->funcname = XSTRDUP(MTYPE_THREAD_FUNCNAME, a->funcname); - return new; + new->func = a->func; + new->funcname = n ; + + return new ; } static void cpu_record_hash_free (void *a) { struct cpu_thread_history *hist = a; - char* funcname = miyagi(hist->funcname) ; + void* funcname = miyagi(hist->funcname) ; XFREE (MTYPE_THREAD_FUNCNAME, funcname); XFREE (MTYPE_THREAD_STATS, hist); @@ -497,7 +536,6 @@ thread_add_unuse (struct thread_master *m, struct thread *thread) assert (thread->prev == NULL); assert (thread->type == THREAD_UNUSED); thread_list_add (&m->unuse, thread); - /* XXX: Should we deallocate funcname here? */ } /* Free all unused thread. */ @@ -510,8 +548,13 @@ thread_list_free (struct thread_master *m, struct thread_list *list) for (t = list->head; t; t = next) { next = t->next; - if (t->funcname) - XFREE (MTYPE_THREAD_FUNCNAME, t->funcname); + + if ( (use_qtimer_pile != NULL) + && ( (t->type == THREAD_TIMER || t->type == THREAD_BACKGROUND) ) + && (t->u.qtr != NULL) + ) + qtimer_free(t->u.qtr) ; + XFREE (MTYPE_THREAD, t); list->count--; m->alloc--; @@ -522,6 +565,8 @@ thread_list_free (struct thread_master *m, struct thread_list *list) void thread_master_free (struct thread_master *m) { + qtimer qtr ; + thread_list_free (m, &m->read); thread_list_free (m, &m->write); thread_list_free (m, &m->timer); @@ -540,6 +585,12 @@ thread_master_free (struct thread_master *m) cpu_record = NULL; } UNLOCK + + while ((qtr = spare_qtimers) != NULL) + { + spare_qtimers = (void*)(qtr->pile) ; + qtimer_free(qtr) ; + } ; } /* Thread list is empty or not. */ @@ -570,34 +621,26 @@ thread_timer_remain_second (struct thread *thread) return 0; } -/* Trim blankspace and "()"s */ -static char * -strip_funcname (const char *funcname) -{ - char buff[100]; - char tmp, *ret, *e, *b = buff; +/* Get new cpu history */ - strncpy(buff, funcname, sizeof(buff)); - buff[ sizeof(buff) -1] = '\0'; - e = buff +strlen(buff) -1; +static struct cpu_thread_history* +thread_get_hist(struct thread* thread, const char* funcname) +{ + struct cpu_thread_history tmp ; + struct cpu_thread_history* hist ; - /* Wont work for funcname == "Word (explanation)" */ + tmp.func = thread->func ; + tmp.funcname = funcname ; - while (*b == ' ' || *b == '(') - ++b; - while (*e == ' ' || *e == ')') - --e; - e++; + LOCK + hist = hash_get (cpu_record, &tmp, + (void * (*) (void *))cpu_record_hash_alloc); + UNLOCK - tmp = *e; - *e = '\0'; - ret = XSTRDUP (MTYPE_THREAD_FUNCNAME, b); - *e = tmp; + return hist ; +} ; - return ret; -} - -/* Get new thread. */ +/* Get new thread. */ static struct thread * thread_get (struct thread_master *m, u_char type, int (*func) (struct thread *), void *arg, const char* funcname) @@ -607,23 +650,22 @@ thread_get (struct thread_master *m, u_char type, if (!thread_empty (&m->unuse)) { thread = thread_trim_head (&m->unuse); - if (thread->funcname) - XFREE(MTYPE_THREAD_FUNCNAME, thread->funcname); + memset(thread, 0, sizeof (struct thread)) ; } else { thread = XCALLOC (MTYPE_THREAD, sizeof (struct thread)); m->alloc++; } - thread->type = type; + thread->type = type; thread->add_type = type; - thread->master = m; - thread->func = func; - thread->arg = arg; + thread->master = m; + thread->func = func; + thread->arg = arg; - thread->funcname = strip_funcname(funcname); + thread->hist = thread_get_hist(thread, funcname) ; - return thread; + return thread ; } /* Add new read thread. */ @@ -672,48 +714,190 @@ funcname_thread_add_write (struct thread_master *m, return thread; } +/*============================================================================== + * Timer Threads -- THREAD_TIMER and THREAD_BACKGROUND + * + * Standard Timer Threads are sorted by the "struct timeval sands", and + * processed by thread_timer_process() -- which moves any expired timer + * threads onto the THREAD_READY queue. So, the scheduling of background stuff + * is done by not processing the THREAD_BACKGROUND queue until there is + * nothing else to do. + * + * When using a qtimer_pile: + * + * * THREAD_TIMER threads have an associated qtimer. + * + * When the timer expires, the qtimer is cut from the thread (and put onto + * the spare_qtimers list). The thread is then queued on the THREAD_READY + * queue (as before). + * + * * THREAD_BACKGROUND threads which have a non-zero delay are treated much + * as THREAD_TIMER, except that when the timer expires, the thread is + * queued on the THREAD_BACKGROUND queue. + * + * The THREAD_BACKGROUND queue is visited only when there is nothing else + * to do. + * + * Note that when using a qtimer_pile, and there is an active qtimer associated + * with the thread, the thread will be on the THREAD_TIMER queue -- so that it + * can be collected up and released if required. + * + * NB: when using a qtimer_pile, if there is a qtimer associated with a + * THREAD_TIMER or a THREAD_BACKGROUND thread, then thread->u.qtr points + * at the qtimer. + * + * AND, conversely, if there is no qtimer, then thread->u.ptr == NULL. + */ + +/*------------------------------------------------------------------------------ + * Set use_qtimer_pile ! + */ +extern void +thread_set_qtimer_pile(qtimer_pile pile) +{ + passert(!used_standard_timer) ; + + use_qtimer_pile = pile ; +} ; + +/*------------------------------------------------------------------------------ + * Unset qtimer associated with the given THREAD_TIMER or THREAD_BACKGROUND + * thread -- if any. + * + * Moves any qtimer onto the spare_qtimers list. + */ +static void +thread_qtimer_unset(struct thread* thread) +{ + qtimer qtr ; + assert (thread->type == THREAD_TIMER || thread->type == THREAD_BACKGROUND); + assert (use_qtimer_pile != NULL) ; + + qtr = thread->u.qtr ; + if (qtr != NULL) + { + qtimer_unset(qtr) ; + + qtr->pile = (void*)spare_qtimers ; + spare_qtimers = qtr ; + + thread->u.qtr = NULL ; + } ; +} ; + +/*------------------------------------------------------------------------------ + * The qtimer action function -- when using qtimer pile (!) + * + * Remove thread from the THREAD_TIMER queue and unset the qtimer, place + * thread on the THREAD_READY or the THREAD_BACKGROUND queue as required. + */ +static void +thread_qtimer_dispatch(qtimer qtr, void* timer_info, qtime_mono_t when) +{ + struct thread* thread = timer_info ; + + thread_list_delete (&thread->master->timer, thread) ; + thread_qtimer_unset(thread) ; + + switch (thread->type) + { + case THREAD_TIMER: + thread->type = THREAD_READY; + thread_list_add (&thread->master->ready, thread); + break ; + + case THREAD_BACKGROUND: + thread_list_add (&thread->master->background, thread); + break ; + + default: + zabort("invalid thread type in thread_qtimer_dispatch") ; + } ; +} ; + +/*------------------------------------------------------------------------------ + * For standard timers, return time left on first timer on the given list. + */ +static struct timeval * +thread_timer_wait (struct thread_list *tlist, struct timeval *timer_val) +{ + if (!thread_empty (tlist)) + { + *timer_val = timeval_subtract (tlist->head->u.sands, relative_time); + return timer_val; + } + return NULL; +} + +/*------------------------------------------------------------------------------ + * Add timer of given type -- either standard or qtimer_pile as required. + * + * Timer interval is given as a struct timeval. + */ static struct thread * -funcname_thread_add_timer_timeval (struct thread_master *m, - int (*func) (struct thread *), +funcname_thread_add_timer_timeval(struct thread_master *m, + int (*func) (struct thread *), int type, void *arg, struct timeval *time_relative, const char* funcname) { struct thread *thread; - struct thread_list *list; - struct timeval alarm_time; - struct thread *tt; assert (m != NULL); + assert (time_relative != NULL); assert (type == THREAD_TIMER || type == THREAD_BACKGROUND); - assert (time_relative); - list = ((type == THREAD_TIMER) ? &m->timer : &m->background); thread = thread_get (m, type, func, arg, funcname); - /* Do we need jitter here? */ - quagga_get_relative (NULL); - alarm_time.tv_sec = relative_time.tv_sec + time_relative->tv_sec; - alarm_time.tv_usec = relative_time.tv_usec + time_relative->tv_usec; - thread->u.sands = timeval_adjust(alarm_time); - - /* Sort by timeval. */ - for (tt = list->head; tt; tt = tt->next) - if (timeval_cmp (thread->u.sands, tt->u.sands) <= 0) - break; + if (use_qtimer_pile == NULL) + { + struct thread_list *list; + struct timeval alarm_time; + struct thread *tt; - if (tt) - thread_list_add_before (list, tt, thread); + /* Do we need jitter here? */ + quagga_get_relative (NULL); + alarm_time.tv_sec = relative_time.tv_sec + time_relative->tv_sec; + alarm_time.tv_usec = relative_time.tv_usec + time_relative->tv_usec; + thread->u.sands = timeval_adjust(alarm_time); + + /* Sort by timeval. */ + list = ((type == THREAD_TIMER) ? &m->timer : &m->background); + for (tt = list->head; tt; tt = tt->next) + if (timeval_cmp (thread->u.sands, tt->u.sands) <= 0) + break; + + if (tt) + thread_list_add_before (list, tt, thread); + else + thread_list_add (list, thread); + + used_standard_timer = 1 ; + } else - thread_list_add (list, thread); + { + qtimer qtr = spare_qtimers ; + if (qtr != NULL) + spare_qtimers = (qtimer)(qtr->pile) ; + + qtr = qtimer_init_new(qtr, use_qtimer_pile, NULL, thread) ; + thread->u.qtr = qtr ; + + qtimer_set_interval(qtr, timeval2qtime(time_relative), + thread_qtimer_dispatch) ; + thread_list_add(&m->timer, thread) ; + } ; return thread; } - -/* Add timer event thread. */ +/*------------------------------------------------------------------------------ + * Add a THREAD_TIMER timer -- either standard or qtimer_pile as required. + * + * Timer interval is given in seconds. + */ struct thread * funcname_thread_add_timer (struct thread_master *m, int (*func) (struct thread *), @@ -721,16 +905,18 @@ funcname_thread_add_timer (struct thread_master *m, { struct timeval trel; - assert (m != NULL); - - trel.tv_sec = timer; + trel.tv_sec = timer; trel.tv_usec = 0; return funcname_thread_add_timer_timeval (m, func, THREAD_TIMER, arg, &trel, funcname); } -/* Add timer event thread with "millisecond" resolution */ +/*------------------------------------------------------------------------------ + * Add a THREAD_TIMER timer -- either standard or qtimer_pile as required. + * + * Timer interval is given in milliseconds. + */ struct thread * funcname_thread_add_timer_msec (struct thread_master *m, int (*func) (struct thread *), @@ -738,45 +924,56 @@ funcname_thread_add_timer_msec (struct thread_master *m, { struct timeval trel; - assert (m != NULL); - - trel.tv_sec = timer / 1000; - trel.tv_usec = 1000*(timer % 1000); + trel.tv_sec = timer / 1000 ; + trel.tv_usec = (timer % 1000) * 1000 ; return funcname_thread_add_timer_timeval (m, func, THREAD_TIMER, - arg, &trel, funcname); + arg, &trel, funcname); } -/* Add a background thread, with an optional millisec delay */ +/*------------------------------------------------------------------------------ + * Add a THREAD_BACKGROUND thread -- either standard or qtimer_pile as required. + * + * Timer interval is given in milliseconds. + * + * For qtimer_pile, if the delay is zero, the thread is placed straight onto + * the THREAD_BACKGROUND queue. + */ struct thread * funcname_thread_add_background (struct thread_master *m, int (*func) (struct thread *), void *arg, long delay, const char *funcname) { - struct timeval trel; + if ((delay != 0) || (use_qtimer_pile == NULL)) + { + struct timeval trel; - assert (m != NULL); + trel.tv_sec = delay / 1000; + trel.tv_usec = (delay % 1000) * 1000 ; - if (delay) - { - trel.tv_sec = delay / 1000; - trel.tv_usec = 1000*(delay % 1000); + return funcname_thread_add_timer_timeval (m, func, THREAD_BACKGROUND, + arg, &trel, funcname); } else { - trel.tv_sec = 0; - trel.tv_usec = 0; - } + struct thread* thread ; + + assert (m != NULL); - return funcname_thread_add_timer_timeval (m, func, THREAD_BACKGROUND, - arg, &trel, funcname); + thread = thread_get (m, THREAD_BACKGROUND, func, arg, funcname); + thread_list_add (&m->background, thread) ; + + return thread ; + } ; } +/*----------------------------------------------------------------------------*/ /* Add simple event thread. */ struct thread * funcname_thread_add_event (struct thread_master *m, - int (*func) (struct thread *), void *arg, int val, const char* funcname) + int (*func) (struct thread *), void *arg, int val, + const char* funcname) { struct thread *thread; @@ -789,7 +986,11 @@ funcname_thread_add_event (struct thread_master *m, return thread; } -/* Cancel thread from scheduler. */ +/*------------------------------------------------------------------------------ + * Cancel thread from scheduler. + * + * Note that when using qtimer_pile need to unset any associated qtimer. + */ void thread_cancel (struct thread *thread) { @@ -808,6 +1009,8 @@ thread_cancel (struct thread *thread) list = &thread->master->write; break; case THREAD_TIMER: + if ((use_qtimer_pile != NULL) && (thread->u.qtr != NULL)) + thread_qtimer_unset(thread) ; list = &thread->master->timer; break; case THREAD_EVENT: @@ -817,13 +1020,21 @@ thread_cancel (struct thread *thread) list = &thread->master->ready; break; case THREAD_BACKGROUND: - list = &thread->master->background; + if ((use_qtimer_pile != NULL) && (thread->u.qtr != NULL)) + { + thread_qtimer_unset(thread) ; + list = &thread->master->timer; + } + else + list = &thread->master->background; break; + default: - return; - break; + return ; } + thread_list_delete (list, thread); + thread->type = THREAD_UNUSED; thread_add_unuse (thread->master, thread); } @@ -854,24 +1065,12 @@ thread_cancel_event (struct thread_master *m, void *arg) return ret; } -static struct timeval * -thread_timer_wait (struct thread_list *tlist, struct timeval *timer_val) -{ - if (!thread_empty (tlist)) - { - *timer_val = timeval_subtract (tlist->head->u.sands, relative_time); - return timer_val; - } - return NULL; -} - static struct thread * thread_run (struct thread_master *m, struct thread *thread, struct thread *fetch) { *fetch = *thread; thread->type = THREAD_UNUSED; - thread->funcname = NULL; /* thread_call will free fetch's copied pointer */ thread_add_unuse (m, thread); return fetch; } @@ -921,7 +1120,11 @@ thread_timer_process (struct thread_list *list, struct timeval *timenow) return ready; } -/* Fetch next ready thread. */ +/*------------------------------------------------------------------------------ + * Fetch next ready thread -- for standard thread handing. + * + * (This is not used when using qtimer_pile, or qnexus stuff.) + */ struct thread * thread_fetch (struct thread_master *m, struct thread *fetch) { @@ -939,8 +1142,7 @@ thread_fetch (struct thread_master *m, struct thread *fetch) int num = 0; /* Signals are highest priority */ - if (!qpthreads_enabled) - quagga_sigevent_process (); + quagga_sigevent_process (); /* Normal event are the next highest priority. */ if ((thread = thread_trim_head (&m->event)) != NULL) @@ -1009,69 +1211,66 @@ thread_fetch (struct thread_master *m, struct thread *fetch) } } - -/* Fetch next ready thread <= given priority. Events and timeouts only. - * No I/O. If nothing to do returns NULL and sets event_wait to - * recommended time to be called again. */ -struct thread * -thread_fetch_event (enum qpn_priority priority, struct thread_master *m, struct thread *fetch, - qtime_mono_t *event_wait) +/*------------------------------------------------------------------------------ + * Empties the event and ready queues. + * + * This is used when qnexus is managing most things, including I/O. Must be + * using qtimer_pile ! + * + * This runs "legacy" event and ready queues only. + * + * Returns: the number of threads dispatched. + * + * Legacy timers are handled by the qtimer_pile, and their related threads will + * be placed on the ready queue when they expire. + * + * The background queue is handled separately. + */ +extern int +thread_dispatch(struct thread_master *m) { - struct thread *thread; - struct timeval timer_val; - struct timeval timer_val_bg; - struct timeval *timer_wait; - struct timeval *timer_wait_bg; - - *event_wait = 0; - - /* Normal event are the next highest priority. */ - if ((thread = thread_trim_head (&m->event)) != NULL) - return thread_run (m, thread, fetch); + struct thread_list* list ; + struct thread fetch ; + int count = 0 ; - if (priority <= qpn_pri_first) - return NULL; - - /* If there are any ready threads from previous scheduler runs, - * process top of them. - */ - if ((thread = thread_trim_head (&m->ready)) != NULL) - return thread_run (m, thread, fetch); - - if (priority <= qpn_pri_second) - return NULL; - - /* Check foreground timers. */ - quagga_get_relative (NULL); - thread_timer_process (&m->timer, &relative_time); - - if ((thread = thread_trim_head (&m->ready)) != NULL) - return thread_run (m, thread, fetch); + while (1) + { + if (thread_empty(list = &m->event)) + if (thread_empty(list = &m->ready)) + return count ; - if (priority <= qpn_pri_third) - return NULL; + thread_call(thread_run(m, thread_list_delete(list, list->head), &fetch)) ; - /* Background timer/events, lowest priority */ - thread_timer_process (&m->background, &relative_time); + ++count ; + } ; +} ; - if ((thread = thread_trim_head (&m->ready)) != NULL) - return thread_run (m, thread, fetch); +/*------------------------------------------------------------------------------ + * Dispatch first item on the background queue, if any. + * + * This is used when qnexus is managing most things. + * + * Background threads spend their lives being cycled around the background + * queue -- possibly via the timer queue, if a delay is put in before the next + * invocation. + * + * Returns: 1 if dispatched a background thread + * 0 if there are no background threads + */ +extern int +thread_dispatch_background(struct thread_master *m) +{ + struct thread* thread ; + struct thread fetch ; - /* Calculate select wait timer if nothing else to do */ - timer_wait = thread_timer_wait (&m->timer, &timer_val); - timer_wait_bg = thread_timer_wait (&m->background, &timer_val_bg); + if ((thread = thread_trim_head (&m->background)) == NULL) + return 0 ; - if (timer_wait_bg && - (!timer_wait || (timeval_cmp (*timer_wait, *timer_wait_bg) > 0))) - timer_wait = timer_wait_bg; + thread_call(thread_run(m, thread, &fetch)) ; - /* When is the next timer due ? */ - *event_wait = (timer_wait != NULL) - ? timeval2qtime(timer_wait) - : 0; + return 1 ; +} ; - return NULL; -} unsigned long thread_consumed_time (RUSAGE_T *now, RUSAGE_T *start, unsigned long *cputime) @@ -1130,25 +1329,6 @@ thread_call (struct thread *thread) unsigned long realtime, cputime; RUSAGE_T ru; - /* Cache a pointer to the relevant cpu history thread, if the thread - * does not have it yet. - * - * Callers submitting 'dummy threads' hence must take care that - * thread->cpu is NULL - */ - if (!thread->hist) - { - struct cpu_thread_history tmp; - - tmp.func = thread->func; - tmp.funcname = thread->funcname; - - LOCK - thread->hist = hash_get (cpu_record, &tmp, - (void * (*) (void *))cpu_record_hash_alloc); - UNLOCK - } - GETRUSAGE (&thread->ru); (*thread->func) (thread); @@ -1157,19 +1337,22 @@ thread_call (struct thread *thread) realtime = thread_consumed_time (&ru, &thread->ru, &cputime); - LOCK - thread->hist->real.total += realtime; - if (thread->hist->real.max < realtime) - thread->hist->real.max = realtime; + if (thread->hist != NULL) + { + LOCK + thread->hist->real.total += realtime; + if (thread->hist->real.max < realtime) + thread->hist->real.max = realtime; #ifdef HAVE_RUSAGE - thread->hist->cpu.total += cputime; - if (thread->hist->cpu.max < cputime) - thread->hist->cpu.max = cputime; + thread->hist->cpu.total += cputime; + if (thread->hist->cpu.max < cputime) + thread->hist->cpu.max = cputime; #endif - ++(thread->hist->total_calls); - thread->hist->types |= (1 << thread->add_type); - UNLOCK + ++(thread->hist->total_calls); + thread->hist->types |= (1 << thread->add_type); + UNLOCK + } ; #ifdef CONSUMED_TIME_CHECK if (realtime > CONSUMED_TIME_CHECK) @@ -1180,13 +1363,12 @@ thread_call (struct thread *thread) * to fix. */ zlog_warn ("SLOW THREAD: task %s (%lx) ran for %lums (cpu time %lums)", - thread->funcname, + (thread->hist != NULL) ? thread->hist->funcname : "??", (unsigned long) thread->func, realtime/1000, cputime/1000); } #endif /* CONSUMED_TIME_CHECK */ - XFREE (MTYPE_THREAD_FUNCNAME, thread->funcname); } /* Execute thread */ @@ -1207,11 +1389,9 @@ funcname_thread_execute (struct thread_master *m, dummy.func = func; dummy.arg = arg; dummy.u.val = val; - dummy.funcname = strip_funcname (funcname); + dummy.hist = thread_get_hist(&dummy, funcname) ; thread_call (&dummy); - XFREE (MTYPE_THREAD_FUNCNAME, dummy.funcname); - return NULL; } diff --git a/lib/thread.h b/lib/thread.h index 1e68007a..fa021486 100644 --- a/lib/thread.h +++ b/lib/thread.h @@ -16,7 +16,7 @@ * You should have received a copy of the GNU General Public License * along with GNU Zebra; see the file COPYING. If not, write to the Free * Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA - * 02111-1307, USA. + * 02111-1307, USA. */ #ifndef _ZEBRA_THREAD_H @@ -25,6 +25,7 @@ #include <sys/resource.h> #include "qtime.h" #include "qpnexus.h" +#include "qtimers.h" struct rusage_t { @@ -68,22 +69,22 @@ struct thread { thread_type type; /* thread type */ thread_type add_type; /* thread type */ - struct thread *next; /* next pointer of the thread */ + struct thread *next; /* next pointer of the thread */ struct thread *prev; /* previous pointer of the thread */ struct thread_master *master; /* pointer to the struct thread_master. */ int (*func) (struct thread *); /* event function */ void *arg; /* event argument */ union { - int val; /* second argument of the event. */ + int val; /* second argument of the event. */ int fd; /* file descriptor in case of read/write. */ - struct timeval sands; /* rest of time sands value. */ + struct timeval sands; /* rest of time sands value. */ + qtimer qtr ; /* pointer to related qtimer */ } u; RUSAGE_T ru; /* Indepth usage info. */ struct cpu_thread_history *hist; /* cache pointer to cpu_history */ - char* funcname; }; -struct cpu_thread_history +struct cpu_thread_history { int (*func)(struct thread *); const char *funcname; @@ -169,8 +170,9 @@ extern struct thread_master *thread_master_create (void); extern void thread_master_free (struct thread_master *); extern void thread_init_r (void); extern void thread_finish (void); +extern void thread_set_qtimer_pile(qtimer_pile pile) ; -extern struct thread *funcname_thread_add_read (struct thread_master *, +extern struct thread *funcname_thread_add_read (struct thread_master *, int (*)(struct thread *), void *, int, const char*); extern struct thread *funcname_thread_add_write (struct thread_master *, @@ -196,8 +198,8 @@ extern struct thread *funcname_thread_execute (struct thread_master *, extern void thread_cancel (struct thread *); extern unsigned int thread_cancel_event (struct thread_master *, void *); extern struct thread *thread_fetch (struct thread_master *, struct thread *); -struct thread * thread_fetch_event (enum qpn_priority,struct thread_master *m, struct thread *fetch, - qtime_mono_t *event_wait); +extern int thread_dispatch(struct thread_master *m) ; +extern int thread_dispatch_background(struct thread_master *m) ; extern void thread_call (struct thread *); extern unsigned long thread_timer_remain_second (struct thread *); extern int thread_should_yield (struct thread *); diff --git a/lib/workqueue.c b/lib/workqueue.c index 7c811edd..6f2cd531 100644 --- a/lib/workqueue.c +++ b/lib/workqueue.c @@ -1,4 +1,4 @@ -/* +/* * Quagga Work Queue Support. * * Copyright (C) 2005 Sun Microsystems, Inc. @@ -18,38 +18,30 @@ * You should have received a copy of the GNU General Public License * along with Quagga; see the file COPYING. If not, write to the Free * Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA - * 02111-1307, USA. + * 02111-1307, USA. */ #include <lib/zebra.h> #include "thread.h" #include "memory.h" #include "workqueue.h" -#include "linklist.h" #include "command.h" #include "log.h" +#include "linklist.h" /* master list of work_queues */ static struct list work_queues; #define WORK_QUEUE_MIN_GRANULARITY 1 -static struct work_queue_item * -work_queue_item_new (struct work_queue *wq) -{ - struct work_queue_item *item; - assert (wq); - - item = XCALLOC (MTYPE_WORK_QUEUE_ITEM, - sizeof (struct work_queue_item)); - - return item; -} - static void -work_queue_item_free (struct work_queue_item *item) +work_queue_item_free (struct work_queue *wq, struct work_queue_item *item) { - XFREE (MTYPE_WORK_QUEUE_ITEM, item); + /* call private data deletion callback if needed */ + if (wq->spec.del_item_data != NULL) + wq->spec.del_item_data (wq, item) ; + + XFREE (MTYPE_WORK_QUEUE_ITEM, item) ; return; } @@ -58,46 +50,40 @@ struct work_queue * work_queue_new (struct thread_master *m, const char *queue_name) { struct work_queue *new; - + new = XCALLOC (MTYPE_WORK_QUEUE, sizeof (struct work_queue)); if (new == NULL) return new; - - new->name = XSTRDUP (MTYPE_WORK_QUEUE_NAME, queue_name); + + new->name = XSTRDUP (MTYPE_WORK_QUEUE_NAME, queue_name); new->master = m; SET_FLAG (new->flags, WQ_UNPLUGGED); - - if ( (new->items = list_new ()) == NULL) - { - XFREE (MTYPE_WORK_QUEUE_NAME, new->name); - XFREE (MTYPE_WORK_QUEUE, new); - - return NULL; - } - - new->items->del = (void (*)(void *)) work_queue_item_free; - + listnode_add (&work_queues, new); - + new->cycles.granularity = WORK_QUEUE_MIN_GRANULARITY; /* Default values, can be overriden by caller */ new->spec.hold = WORK_QUEUE_DEFAULT_HOLD; - + return new; } void work_queue_free (struct work_queue *wq) { + work_queue_item item ; + if (wq->thread != NULL) thread_cancel(wq->thread); - - /* list_delete frees items via callback */ - list_delete (wq->items); - listnode_delete (&work_queues, wq); - + + while ((item = wq->head) != NULL) + { + wq->head = item->next ; + work_queue_item_free(wq, item) ; + } ; + XFREE (MTYPE_WORK_QUEUE_NAME, wq->name); XFREE (MTYPE_WORK_QUEUE, wq); return; @@ -109,59 +95,151 @@ work_queue_schedule (struct work_queue *wq, unsigned int delay) /* if appropriate, schedule work queue thread */ if ( CHECK_FLAG (wq->flags, WQ_UNPLUGGED) && (wq->thread == NULL) - && (listcount (wq->items) > 0) ) + && (wq->head != NULL) ) { - wq->thread = thread_add_background (wq->master, work_queue_run, + wq->thread = thread_add_background (wq->master, work_queue_run, wq, delay); return 1; } else return 0; } - -void -work_queue_add (struct work_queue *wq, void *data) + +/*------------------------------------------------------------------------------ + * Create new work queue item and place on the end of the given work queue. + * + * Schedules the work queue if there were no items (unless already scheduled + * or plugged). + * + * Returns the address of the args area in the new item. + */ +extern void* +work_queue_item_add (struct work_queue *wq) { - struct work_queue_item *item; - + work_queue_item item ; + assert (wq); - if (!(item = work_queue_item_new (wq))) + item = XCALLOC (MTYPE_WORK_QUEUE_ITEM, sizeof (struct work_queue_item)); + + if (item == NULL) { zlog_err ("%s: unable to get new queue item", __func__); - return; + return NULL ; + } + + item->next = NULL ; + if (wq->head == NULL) + { + assert(wq->list_count == 0) ; + wq->head = item ; + item->prev = NULL ; } - - item->data = data; - listnode_add (wq->items, item); - + else + { + assert((wq->tail != NULL) && (wq->list_count > 0)) ; + wq->tail->next = item ; + item->prev = wq->tail ; + } ; + wq->tail = item ; + + ++wq->list_count ; work_queue_schedule (wq, wq->spec.hold); - - return; + + return work_queue_item_args(item) ; } static void -work_queue_item_remove (struct work_queue *wq, struct listnode *ln) +work_queue_item_remove (struct work_queue *wq, work_queue_item item) { - struct work_queue_item *item = listgetdata (ln); + assert ((wq != NULL) && (item != NULL)) ; + + if (wq->head == item) + { + /* Removing the first item */ + assert(item->prev == NULL) ; + + wq->head = item->next ; - assert (item && item->data); + if (wq->tail == item) + { + /* Removing the only item */ + assert((item->next == NULL) && (wq->list_count == 1)) ; + wq->tail = NULL ; + } + else + { + /* First, but not the only item */ + assert((item->next != NULL) && (wq->list_count > 1)) ; + wq->head->prev = NULL ; + } ; + } + else if (wq->tail == item) + { + /* Removing last, but not only item */ + assert(item->next == NULL) ; + assert((item->prev != NULL) && (wq->list_count > 1)) ; + + wq->tail = item->prev ; + wq->tail->next = NULL ; + } + else + { + /* Removing from somewhere in middle */ + assert(item->next != NULL) ; + assert((item->prev != NULL) && (wq->list_count > 2)) ; + + item->prev->next = item->next ; + item->next->prev = item->prev ; + } ; - /* call private data deletion callback if needed */ - if (wq->spec.del_item_data) - wq->spec.del_item_data (wq, item->data); + --wq->list_count ; + work_queue_item_free (wq, item); - list_delete_node (wq->items, ln); - work_queue_item_free (item); - return; } -static void -work_queue_item_requeue (struct work_queue *wq, struct listnode *ln) +static work_queue_item +work_queue_item_requeue (struct work_queue *wq, work_queue_item item) { - LISTNODE_DETACH (wq->items, ln); - LISTNODE_ATTACH (wq->items, ln); /* attach to end of list */ + work_queue_item next = item->next ; + work_queue_item last = wq->tail ; + + assert(last != NULL) ; + + if (last == item) + { + /* Requeuing last item -- easy ! */ + assert(next == NULL) ; + return item ; + } ; + + assert(next != NULL) ; + + if (wq->head == item) + { + /* Requeuing first, but not only item */ + assert(item->prev == NULL) ; + + wq->head = next ; + next->prev = NULL ; + } + else + { + /* Requeuing something in middle */ + assert(item->prev != NULL) ; + + item->prev->next = item->next ; + item->next->prev = item->prev ; + } ; + + item->next = NULL ; + item->prev = last ; + + last->next = item ; + wq->tail = item ; + + return next ; } DEFUN(show_work_queues, @@ -172,8 +250,8 @@ DEFUN(show_work_queues, { struct listnode *node; struct work_queue *wq; - - vty_out (vty, + + vty_out (vty, "%c %8s %5s %8s %21s%s", ' ', "List","(ms) ","Q. Runs","Cycle Counts ", VTY_NEWLINE); @@ -183,24 +261,24 @@ DEFUN(show_work_queues, "Items", "Hold", "Total", - "Best","Gran.","Avg.", - "Name", + "Best","Gran.","Avg.", + "Name", VTY_NEWLINE); - + for (ALL_LIST_ELEMENTS_RO ((&work_queues), node, wq)) { vty_out (vty,"%c %8d %5d %8ld %7d %6d %6u %s%s", (CHECK_FLAG (wq->flags, WQ_UNPLUGGED) ? ' ' : 'P'), - listcount (wq->items), + wq->list_count, wq->spec.hold, wq->runs, wq->cycles.best, wq->cycles.granularity, - (wq->runs) ? + (wq->runs) ? (unsigned int) (wq->cycles.total / wq->runs) : 0, wq->name, VTY_NEWLINE); } - + return CMD_SUCCESS; } @@ -212,9 +290,9 @@ work_queue_plug (struct work_queue *wq) { if (wq->thread) thread_cancel (wq->thread); - + wq->thread = NULL; - + UNSET_FLAG (wq->flags, WQ_UNPLUGGED); } @@ -232,22 +310,21 @@ work_queue_unplug (struct work_queue *wq) /* timer thread to process a work queue * will reschedule itself if required, - * otherwise work_queue_item_add + * otherwise work_queue_item_add */ int work_queue_run (struct thread *thread) { struct work_queue *wq; - struct work_queue_item *item; + work_queue_item next, item ; wq_item_status ret; unsigned int cycles = 0; - struct listnode *node, *nnode; char yielded = 0; wq = THREAD_ARG (thread); wq->thread = NULL; - assert (wq && wq->items); + assert (wq != NULL) ; /* calculate cycle granularity: * list iteration == 1 cycle @@ -258,38 +335,40 @@ work_queue_run (struct thread *thread) * * Best: starts low, can only increase * - * Granularity: starts at WORK_QUEUE_MIN_GRANULARITY, can be decreased - * if we run to end of time slot, can increase otherwise + * Granularity: starts at WORK_QUEUE_MIN_GRANULARITY, can be decreased + * if we run to end of time slot, can increase otherwise * by a small factor. * * We could use just the average and save some work, however we want to be * able to adjust quickly to CPU pressure. Average wont shift much if * daemon has been running a long time. */ - if (wq->cycles.granularity == 0) - wq->cycles.granularity = WORK_QUEUE_MIN_GRANULARITY; + if (wq->cycles.granularity == 0) + wq->cycles.granularity = WORK_QUEUE_MIN_GRANULARITY; - for (ALL_LIST_ELEMENTS (wq->items, node, nnode, item)) + next = wq->head ; + while (next != NULL) { - assert (item && item->data); - + item = next ; + next = item->next ; /* default next item */ + /* dont run items which are past their allowed retries */ if (item->ran > wq->spec.max_retries) { /* run error handler, if any */ - if (wq->spec.errorfunc) - wq->spec.errorfunc (wq, item->data); - work_queue_item_remove (wq, node); + if (wq->spec.errorfunc != NULL) + wq->spec.errorfunc (wq, item); + work_queue_item_remove (wq, item); continue; } /* run and take care of items that want to be retried immediately */ do { - ret = wq->spec.workfunc (wq, item->data); + ret = wq->spec.workfunc (wq, item); item->ran++; } - while ((ret == WQ_RETRY_NOW) + while ((ret == WQ_RETRY_NOW) && (item->ran < wq->spec.max_retries)); switch (ret) @@ -308,21 +387,21 @@ work_queue_run (struct thread *thread) case WQ_REQUEUE: { item->ran--; - work_queue_item_requeue (wq, node); + next = work_queue_item_requeue (wq, item); break; } case WQ_RETRY_NOW: /* a RETRY_NOW that gets here has exceeded max_tries, same as ERROR */ case WQ_ERROR: { - if (wq->spec.errorfunc) + if (wq->spec.errorfunc != NULL) wq->spec.errorfunc (wq, item); } /* fall through here is deliberate */ case WQ_SUCCESS: default: { - work_queue_item_remove (wq, node); + work_queue_item_remove (wq, item); break; } } @@ -331,7 +410,7 @@ work_queue_run (struct thread *thread) cycles++; /* test if we should yield */ - if ( !(cycles % wq->cycles.granularity) + if ( !(cycles % wq->cycles.granularity) && thread_should_yield (thread)) { yielded = 1; @@ -346,15 +425,15 @@ stats: /* we yielded, check whether granularity should be reduced */ if (yielded && (cycles < wq->cycles.granularity)) { - wq->cycles.granularity = ((cycles > 0) ? cycles + wq->cycles.granularity = ((cycles > 0) ? cycles : WORK_QUEUE_MIN_GRANULARITY); } - + if (cycles >= (wq->cycles.granularity)) { if (cycles > wq->cycles.best) wq->cycles.best = cycles; - + /* along with yielded check, provides hysteris for granularity */ if (cycles > (wq->cycles.granularity * WQ_HYSTERIS_FACTOR * 2)) wq->cycles.granularity *= WQ_HYSTERIS_FACTOR; /* quick ramp-up */ @@ -362,7 +441,7 @@ stats: wq->cycles.granularity += WQ_HYSTERIS_FACTOR; } #undef WQ_HYSTERIS_FACTOR - + wq->runs++; wq->cycles.total += cycles; @@ -370,12 +449,12 @@ stats: printf ("%s: cycles %d, new: best %d, worst %d\n", __func__, cycles, wq->cycles.best, wq->cycles.granularity); #endif - + /* Is the queue done yet? If it is, call the completion callback. */ - if (listcount (wq->items) > 0) + if (wq->head != NULL) work_queue_schedule (wq, 0); else if (wq->spec.completion_func) wq->spec.completion_func (wq); - + return 0; } diff --git a/lib/workqueue.h b/lib/workqueue.h index f59499a0..5d2f2da2 100644 --- a/lib/workqueue.h +++ b/lib/workqueue.h @@ -1,4 +1,4 @@ -/* +/* * Quagga Work Queues. * * Copyright (C) 2005 Sun Microsystems, Inc. @@ -18,14 +18,18 @@ * You should have received a copy of the GNU General Public License * along with Quagga; see the file COPYING. If not, write to the Free * Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA - * 02111-1307, USA. + * 02111-1307, USA. */ #ifndef _QUAGGA_WORK_QUEUE_H #define _QUAGGA_WORK_QUEUE_H +#ifndef Inline +#define Inline static inline +#endif + /* Hold time for the initial schedule of a queue run, in millisec */ -#define WORK_QUEUE_DEFAULT_HOLD 50 +#define WORK_QUEUE_DEFAULT_HOLD 50 /* action value, for use by item processor and item error handlers */ typedef enum @@ -40,12 +44,37 @@ typedef enum * the particular item.. */ } wq_item_status; +enum { wq_args_size_max = 24 } ; /* maximum size of union wq_args */ + +union wq_args +{ + void* data ; + char bytes[wq_args_size_max] ; /* empty space `*/ +} ; + +#define WQ_ARGS_SIZE_OK(s) CONFIRM(sizeof(struct s) <= wq_args_size_max) + /* A single work queue item, unsurprisingly */ +typedef struct work_queue_item* work_queue_item ; struct work_queue_item { - void *data; /* opaque data */ + union wq_args args ; /* cast as required */ + + struct work_queue_item* next ; /* the queue itself */ + struct work_queue_item* prev ; + unsigned short ran; /* # of times item has been run */ -}; +} ; + +/* work_queue_item structures are malloced. That guarantees maximum alignment. + * To guarantee maximum alignment for "struct args", it must be first item ! + * + * (The typedef is required to stop Eclipse (3.4.2 with CDT 5.0) whining + * about first argument of offsetof().) + */ +typedef struct work_queue_item work_queue_item_t ; +CONFIRM(offsetof(work_queue_item_t, args) == 0) ; + /* so guaranteed max alignment */ #define WQ_UNPLUGGED (1 << 0) /* available for draining */ @@ -57,52 +86,55 @@ struct work_queue struct thread_master *master; /* thread master */ struct thread *thread; /* thread, if one is active */ char *name; /* work queue name */ - + /* Specification for this work queue. * Public, must be set before use by caller. May be modified at will. */ struct { /* optional opaque user data, global to the queue. */ void *data; - + /* work function to process items with: * First argument is the workqueue queue. * Second argument is the item data */ - wq_item_status (*workfunc) (struct work_queue *, void *); + wq_item_status (*workfunc) (struct work_queue *, work_queue_item); /* error handling function, optional */ - void (*errorfunc) (struct work_queue *, struct work_queue_item *); - + void (*errorfunc) (struct work_queue *, work_queue_item); + /* callback to delete user specific item data */ - void (*del_item_data) (struct work_queue *, void *); - + void (*del_item_data) (struct work_queue *, work_queue_item); + /* completion callback, called when queue is emptied, optional */ void (*completion_func) (struct work_queue *); - + /* max number of retries to make for item that errors */ - unsigned int max_retries; + unsigned int max_retries; unsigned int hold; /* hold time for first run, in ms */ } spec; - + /* remaining fields should be opaque to users */ - struct list *items; /* queue item list */ - unsigned long runs; /* runs count */ - + work_queue_item head ; /* queue item list */ + work_queue_item tail ; + unsigned list_count ; + + unsigned long runs; /* runs count */ + struct { unsigned int best; unsigned int granularity; unsigned long total; } cycles; /* cycle counts */ - + /* private state */ u_int16_t flags; /* user set flag */ }; /* User API */ -/* create a new work queue, of given name. +/* create a new work queue, of given name. * user must fill in the spec of the returned work queue before adding * anything to it */ @@ -112,7 +144,10 @@ extern struct work_queue *work_queue_new (struct thread_master *, extern void work_queue_free (struct work_queue *); /* Add the supplied data as an item onto the workqueue */ -extern void work_queue_add (struct work_queue *, void *); +Inline void work_queue_add (struct work_queue *, void *); + +extern void* work_queue_item_add(struct work_queue* wq) ; +Inline void* work_queue_item_args(work_queue_item item) ; /* plug the queue, ie prevent it from being drained / processed */ extern void work_queue_plug (struct work_queue *wq); @@ -122,4 +157,22 @@ extern void work_queue_unplug (struct work_queue *wq); /* Helpers, exported for thread.c and command.c */ extern int work_queue_run (struct thread *); extern struct cmd_element show_work_queues_cmd; + +/*============================================================================== + * The Inline functions + */ + +Inline void work_queue_add (struct work_queue* wq, void* data) +{ + union wq_args* args = work_queue_item_add(wq) ; + args->data = data ; +} + +/* Return pointer to the args area in the given work queue item */ +Inline void* +work_queue_item_args(work_queue_item item) +{ + return &item->args ; +} ; + #endif /* _QUAGGA_WORK_QUEUE_H */ diff --git a/tests/heavy-wq.c b/tests/heavy-wq.c index 4cd499a5..bf3ab85a 100644 --- a/tests/heavy-wq.c +++ b/tests/heavy-wq.c @@ -81,15 +81,15 @@ heavy_wq_add (struct vty *vty, const char *str, int i) } static void -slow_func_err (struct work_queue *wq, struct work_queue_item *item) +slow_func_err (struct work_queue *wq, work_queue_item item) { printf ("%s: running error function\n", __func__); } static void -slow_func_del (struct work_queue *wq, void *data) +slow_func_del (struct work_queue *wq, work_queue_item item) { - struct heavy_wq_node *hn = data; + struct heavy_wq_node *hn = item->args.data; assert (hn && hn->str); printf ("%s: %s\n", __func__, hn->str); XFREE (MTYPE_PREFIX_LIST_STR, hn->str); @@ -98,9 +98,9 @@ slow_func_del (struct work_queue *wq, void *data) } static wq_item_status -slow_func (struct work_queue *wq, void *data) +slow_func (struct work_queue *wq, work_queue_item item) { - struct heavy_wq_node *hn = data; + struct heavy_wq_node *hn = item->args.data; double x = 1; int j; @@ -163,11 +163,11 @@ heavy_wq_init () return -1; } - heavy_wq->spec.workfunc = &slow_func; - heavy_wq->spec.errorfunc = &slow_func_err; + heavy_wq->spec.workfunc = &slow_func; + heavy_wq->spec.errorfunc = &slow_func_err; heavy_wq->spec.del_item_data = &slow_func_del; - heavy_wq->spec.max_retries = 3; - heavy_wq->spec.hold = 1000; + heavy_wq->spec.max_retries = 3; + heavy_wq->spec.hold = 1000; return 0; } diff --git a/zebra/zebra_rib.c b/zebra/zebra_rib.c index 12f3fa5a..0677cafd 100644 --- a/zebra/zebra_rib.c +++ b/zebra/zebra_rib.c @@ -16,7 +16,7 @@ * You should have received a copy of the GNU General Public License * along with GNU Zebra; see the file COPYING. If not, write to the Free * Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA - * 02111-1307, USA. + * 02111-1307, USA. */ #include <zebra.h> @@ -52,7 +52,7 @@ int rib_process_hold_time = 10; /* Each route type's string and default distance value. */ static const struct -{ +{ int key; int distance; } route_info[] = @@ -68,7 +68,7 @@ static const struct {ZEBRA_ROUTE_ISIS, 115}, {ZEBRA_ROUTE_BGP, 20 /* IBGP is 200. */} }; - + /* Vector for routing table. */ static vector vrf_vector; @@ -141,7 +141,7 @@ vrf_static_table (afi_t afi, safi_t safi, u_int32_t id) return vrf->stable[afi][safi]; } - + /* Add nexthop to the end of the list. */ static void nexthop_add (struct rib *rib, struct nexthop *nexthop) @@ -226,7 +226,7 @@ nexthop_ipv4_add (struct rib *rib, struct in_addr *ipv4, struct in_addr *src) } static struct nexthop * -nexthop_ipv4_ifindex_add (struct rib *rib, struct in_addr *ipv4, +nexthop_ipv4_ifindex_add (struct rib *rib, struct in_addr *ipv4, struct in_addr *src, unsigned int ifindex) { struct nexthop *nexthop; @@ -338,7 +338,7 @@ nexthop_active_ipv4 (struct rib *rib, struct nexthop *nexthop, int set, while (rn) { route_unlock_node (rn); - + /* If lookup self prefix return immediately. */ if (rn == top) return 0; @@ -354,7 +354,7 @@ nexthop_active_ipv4 (struct rib *rib, struct nexthop *nexthop, int set, /* If there is no selected route or matched route is EGP, go up tree. */ - if (! match + if (! match || match->type == ZEBRA_ROUTE_BGP) { do { @@ -371,7 +371,7 @@ nexthop_active_ipv4 (struct rib *rib, struct nexthop *nexthop, int set, newhop = match->nexthop; if (newhop && nexthop->type == NEXTHOP_TYPE_IPV4) nexthop->ifindex = newhop->ifindex; - + return 1; } else if (CHECK_FLAG (rib->flags, ZEBRA_FLAG_INTERNAL)) @@ -439,7 +439,7 @@ nexthop_active_ipv6 (struct rib *rib, struct nexthop *nexthop, int set, while (rn) { route_unlock_node (rn); - + /* If lookup self prefix return immediately. */ if (rn == top) return 0; @@ -473,7 +473,7 @@ nexthop_active_ipv6 (struct rib *rib, struct nexthop *nexthop, int set, if (newhop && nexthop->type == NEXTHOP_TYPE_IPV6) nexthop->ifindex = newhop->ifindex; - + return 1; } else if (CHECK_FLAG (rib->flags, ZEBRA_FLAG_INTERNAL)) @@ -534,7 +534,7 @@ rib_match_ipv4 (struct in_addr addr) while (rn) { route_unlock_node (rn); - + /* Pick up selected route. */ for (match = rn->info; match; match = match->next) { @@ -546,7 +546,7 @@ rib_match_ipv4 (struct in_addr addr) /* If there is no selected route or matched route is EGP, go up tree. */ - if (! match + if (! match || match->type == ZEBRA_ROUTE_BGP) { do { @@ -607,7 +607,7 @@ rib_lookup_ipv4 (struct prefix_ipv4 *p) if (match->type == ZEBRA_ROUTE_CONNECT) return match; - + for (nexthop = match->nexthop; nexthop; nexthop = nexthop->next) if (CHECK_FLAG (nexthop->flags, NEXTHOP_FLAG_FIB)) return match; @@ -665,7 +665,7 @@ rib_lookup_ipv4_route (struct prefix_ipv4 *p, union sockunion * qgate) if (match->type == ZEBRA_ROUTE_CONNECT) return ZEBRA_RIB_FOUND_CONNECTED; - + /* Ok, we have a cood candidate, let's check it's nexthop list... */ for (nexthop = match->nexthop; nexthop; nexthop = nexthop->next) if (CHECK_FLAG (nexthop->flags, NEXTHOP_FLAG_FIB)) @@ -716,7 +716,7 @@ rib_match_ipv6 (struct in6_addr *addr) while (rn) { route_unlock_node (rn); - + /* Pick up selected route. */ for (match = rn->info; match; match = match->next) { @@ -728,7 +728,7 @@ rib_match_ipv6 (struct in6_addr *addr) /* If there is no selected route or matched route is EGP, go up tree. */ - if (! match + if (! match || match->type == ZEBRA_ROUTE_BGP) { do { @@ -900,7 +900,7 @@ nexthop_active_update (struct route_node *rn, struct rib *rib, int set) return rib->nexthop_active_num; } - + static void rib_install_kernel (struct route_node *rn, struct rib *rib) @@ -980,9 +980,9 @@ rib_process (struct route_node *rn) int installed = 0; struct nexthop *nexthop = NULL; char buf[INET6_ADDRSTRLEN]; - + assert (rn); - + if (IS_ZEBRA_DEBUG_RIB || IS_ZEBRA_DEBUG_RIB_Q) inet_ntop (rn->p.family, &rn->p.u.prefix, buf, INET6_ADDRSTRLEN); @@ -992,14 +992,14 @@ rib_process (struct route_node *rn) * may be passed to rib_unlink() in the middle of iteration. */ next = rib->next; - + /* Currently installed rib. */ if (CHECK_FLAG (rib->flags, ZEBRA_FLAG_SELECTED)) { assert (fib == NULL); fib = rib; } - + /* Unlock removed routes, so they'll be freed, bar the FIB entry, * which we need to do do further work with below. */ @@ -1014,10 +1014,10 @@ rib_process (struct route_node *rn) } else del = rib; - + continue; } - + /* Skip unreachable nexthop. */ if (! nexthop_active_update (rn, rib, 0)) continue; @@ -1032,14 +1032,14 @@ rib_process (struct route_node *rn) select = rib; continue; } - + /* filter route selection in following order: * - connected beats other types * - lower distance beats higher * - lower metric beats higher for equal distance * - last, hence oldest, route wins tie break. */ - + /* Connected routes. Pick the last connected * route of the set of lowest metric connected routes. */ @@ -1052,18 +1052,18 @@ rib_process (struct route_node *rn) } else if (select->type == ZEBRA_ROUTE_CONNECT) continue; - + /* higher distance loses */ if (rib->distance > select->distance) continue; - + /* lower wins */ if (rib->distance < select->distance) { select = rib; continue; } - + /* metric tie-breaks equal distance */ if (rib->metric <= select->metric) select = rib; @@ -1090,14 +1090,14 @@ rib_process (struct route_node *rn) /* Set real nexthop. */ nexthop_active_update (rn, select, 1); - + if (! RIB_SYSTEM_ROUTE (select)) rib_install_kernel (rn, select); redistribute_add (&rn->p, select); } else if (! RIB_SYSTEM_ROUTE (select)) { - /* Housekeeping code to deal with + /* Housekeeping code to deal with race conditions in kernel with linux netlink reporting interface up before IPv4 or IPv6 protocol is ready to add routes. @@ -1110,7 +1110,7 @@ rib_process (struct route_node *rn) installed = 1; break; } - if (! installed) + if (! installed) rib_install_kernel (rn, select); } goto end; @@ -1167,7 +1167,7 @@ end: } /* Take a list of route_node structs and return 1, if there was a record - * picked from it and processed by rib_process(). Don't process more, + * picked from it and processed by rib_process(). Don't process more, * than one RN record; operate only in the specified sub-queue. */ static unsigned int @@ -1202,9 +1202,9 @@ process_subq (struct list * subq, u_char qindex) * is pointed to the meta queue structure. */ static wq_item_status -meta_queue_process (struct work_queue *dummy, void *data) +meta_queue_process (struct work_queue *dummy, work_queue_item item) { - struct meta_queue * mq = data; + struct meta_queue * mq = item->args.data ; unsigned i; for (i = 0; i < MQ_SIZE; i++) @@ -1271,7 +1271,7 @@ rib_meta_queue_add (struct meta_queue *mq, struct route_node *rn) static void rib_queue_add (struct zebra_t *zebra, struct route_node *rn) { - + if (IS_ZEBRA_DEBUG_RIB_Q) { char buf[INET6_ADDRSTRLEN]; @@ -1289,7 +1289,7 @@ rib_queue_add (struct zebra_t *zebra, struct route_node *rn) * holder, if necessary, then push the work into it in any case. * This semantics was introduced after 0.99.9 release. */ - if (!zebra->ribq->items->count) + if (zebra->ribq->head == NULL) work_queue_add (zebra->ribq, zebra->mq); rib_meta_queue_add (zebra->mq, rn); @@ -1320,7 +1320,7 @@ meta_queue_new (void) static void rib_queue_init (struct zebra_t *zebra) { - if (! (zebra->ribq = work_queue_new (zebra->master, + if (! (zebra->ribq = work_queue_new (zebra->master, "route_node processing"))) { zlog_err ("%s: could not initialise work queue!", __func__); @@ -1328,12 +1328,13 @@ rib_queue_init (struct zebra_t *zebra) } /* fill in the work queue spec */ - zebra->ribq->spec.workfunc = &meta_queue_process; - zebra->ribq->spec.errorfunc = NULL; + zebra->ribq->spec.workfunc = &meta_queue_process; + zebra->ribq->spec.errorfunc = NULL; + zebra->ribq->spec.del_item_data = NULL ; /* XXX: TODO: These should be runtime configurable via vty */ zebra->ribq->spec.max_retries = 3; zebra->ribq->spec.hold = rib_process_hold_time; - + if (!(zebra->mq = meta_queue_new ())) zlog_err ("%s: could not initialise meta queue!", __func__); } @@ -1365,7 +1366,7 @@ rib_queue_init (struct zebra_t *zebra) * state must be preserved as and when the head RIB entry of a * route_node is changed by rib_unlink / rib_link. A small complication, * but saves having to allocate a dedicated object for this. - * + * * Refcounting (aka "locking" throughout the GNU Zebra and Quagga code): * * - route_nodes: refcounted by: @@ -1375,16 +1376,16 @@ rib_queue_init (struct zebra_t *zebra) * - managed by: rib_addqueue, rib_process. * */ - + /* Add RIB to head of the route node. */ static void rib_link (struct route_node *rn, struct rib *rib) { struct rib *head; char buf[INET6_ADDRSTRLEN]; - + assert (rib && rn); - + route_lock_node (rn); /* rn route table reference */ if (IS_ZEBRA_DEBUG_RIB) @@ -1412,8 +1413,8 @@ rib_link (struct route_node *rn, struct rib *rib) static void rib_addnode (struct route_node *rn, struct rib *rib) { - /* RIB node has been un-removed before route-node is processed. - * route_node must hence already be on the queue for processing.. + /* RIB node has been un-removed before route-node is processed. + * route_node must hence already be on the queue for processing.. */ if (CHECK_FLAG (rib->status, RIB_ENTRY_REMOVED)) { @@ -1453,7 +1454,7 @@ rib_unlink (struct route_node *rn, struct rib *rib) else { rn->info = rib->next; - + if (rn->info) { if (IS_ZEBRA_DEBUG_RIB) @@ -1489,7 +1490,7 @@ rib_delnode (struct route_node *rn, struct rib *rib) } int -rib_add_ipv4 (int type, int flags, struct prefix_ipv4 *p, +rib_add_ipv4 (int type, int flags, struct prefix_ipv4 *p, struct in_addr *gate, struct in_addr *src, unsigned int ifindex, u_int32_t vrf_id, u_int32_t metric, u_char distance) @@ -1527,7 +1528,7 @@ rib_add_ipv4 (int type, int flags, struct prefix_ipv4 *p, { if (CHECK_FLAG (rib->status, RIB_ENTRY_REMOVED)) continue; - + if (rib->type != type) continue; if (rib->type != ZEBRA_ROUTE_CONNECT) @@ -1576,7 +1577,7 @@ rib_add_ipv4 (int type, int flags, struct prefix_ipv4 *p, if (IS_ZEBRA_DEBUG_RIB) zlog_debug ("%s: calling rib_addnode (%p, %p)", __func__, rn, rib); rib_addnode (rn, rib); - + /* Free implicit route.*/ if (same) { @@ -1584,7 +1585,7 @@ rib_add_ipv4 (int type, int flags, struct prefix_ipv4 *p, zlog_debug ("%s: calling rib_delnode (%p, %p)", __func__, rn, rib); rib_delnode (rn, same); } - + route_unlock_node (rn); return 0; } @@ -1753,7 +1754,7 @@ rib_add_ipv4_multipath (struct prefix_ipv4 *p, struct rib *rib) struct route_node *rn; struct rib *same; struct nexthop *nexthop; - + /* Lookup table. */ table = vrf_table (AFI_IP, SAFI_UNICAST, 0); if (! table) @@ -1767,7 +1768,7 @@ rib_add_ipv4_multipath (struct prefix_ipv4 *p, struct rib *rib) rib->distance = route_info[rib->type].distance; /* iBGP distance is 200. */ - if (rib->type == ZEBRA_ROUTE_BGP + if (rib->type == ZEBRA_ROUTE_BGP && CHECK_FLAG (rib->flags, ZEBRA_FLAG_IBGP)) rib->distance = 200; } @@ -1781,12 +1782,12 @@ rib_add_ipv4_multipath (struct prefix_ipv4 *p, struct rib *rib) { if (CHECK_FLAG (same->status, RIB_ENTRY_REMOVED)) continue; - + if (same->type == rib->type && same->table == rib->table && same->type != ZEBRA_ROUTE_CONNECT) break; } - + /* If this route is kernel route, set FIB flag to the route. */ if (rib->type == ZEBRA_ROUTE_KERNEL || rib->type == ZEBRA_ROUTE_CONNECT) for (nexthop = rib->nexthop; nexthop; nexthop = nexthop->next) @@ -1812,7 +1813,7 @@ rib_add_ipv4_multipath (struct prefix_ipv4 *p, struct rib *rib) } rib_delnode (rn, same); } - + route_unlock_node (rn); return 0; } @@ -1842,8 +1843,8 @@ rib_delete_ipv4 (int type, int flags, struct prefix_ipv4 *p, if (IS_ZEBRA_DEBUG_KERNEL && gate) zlog_debug ("rib_delete_ipv4(): route delete %s/%d via %s ifindex %d", inet_ntop (AF_INET, &p->prefix, buf1, INET_ADDRSTRLEN), - p->prefixlen, - inet_ntoa (*gate), + p->prefixlen, + inet_ntoa (*gate), ifindex); /* Lookup route node. */ @@ -1895,7 +1896,7 @@ rib_delete_ipv4 (int type, int flags, struct prefix_ipv4 *p, else if (gate == NULL || ((nexthop = rib->nexthop) && (IPV4_ADDR_SAME (&nexthop->gate.ipv4, gate) || - IPV4_ADDR_SAME (&nexthop->rgate.ipv4, gate)))) + IPV4_ADDR_SAME (&nexthop->rgate.ipv4, gate)))) { same = rib; break; @@ -1936,14 +1937,14 @@ rib_delete_ipv4 (int type, int flags, struct prefix_ipv4 *p, return ZEBRA_ERR_RTNOEXIST; } } - + if (same) rib_delnode (rn, same); - + route_unlock_node (rn); return 0; } - + /* Install static route into rib. */ static void static_install_ipv4 (struct prefix *p, struct static_ipv4 *si) @@ -1963,7 +1964,7 @@ static_install_ipv4 (struct prefix *p, struct static_ipv4 *si) { if (CHECK_FLAG (rib->status, RIB_ENTRY_REMOVED)) continue; - + if (rib->type == ZEBRA_ROUTE_STATIC && rib->distance == si->distance) break; } @@ -1991,7 +1992,7 @@ static_install_ipv4 (struct prefix *p, struct static_ipv4 *si) { /* This is new static route. */ rib = XCALLOC (MTYPE_RIB, sizeof (struct rib)); - + rib->type = ZEBRA_ROUTE_STATIC; rib->distance = si->distance; rib->metric = 0; @@ -2048,7 +2049,7 @@ static_uninstall_ipv4 (struct prefix *p, struct static_ipv4 *si) table = vrf_table (AFI_IP, SAFI_UNICAST, 0); if (! table) return; - + /* Lookup existing route with type and distance. */ rn = route_node_lookup (table, p); if (! rn) @@ -2080,7 +2081,7 @@ static_uninstall_ipv4 (struct prefix *p, struct static_ipv4 *si) route_unlock_node (rn); return; } - + /* Check nexthop. */ if (rib->nexthop_num == 1) rib_delnode (rn, rib); @@ -2113,7 +2114,7 @@ static_add_ipv4 (struct prefix *p, struct in_addr *gate, const char *ifname, stable = vrf_static_table (AFI_IP, SAFI_UNICAST, vrf_id); if (! stable) return -1; - + /* Lookup static route prefix. */ rn = route_node_get (stable, p); @@ -2244,7 +2245,7 @@ static_delete_ipv4 (struct prefix *p, struct in_addr *gate, const char *ifname, if (si->next) si->next->prev = si->prev; route_unlock_node (rn); - + /* Free static route configuration. */ if (ifname) XFREE (0, si->gate.ifname); @@ -2255,7 +2256,7 @@ static_delete_ipv4 (struct prefix *p, struct in_addr *gate, const char *ifname, return 1; } - + #ifdef HAVE_IPV6 static int rib_bogus_ipv6 (int type, struct prefix_ipv6 *p, @@ -2300,7 +2301,7 @@ rib_add_ipv6 (int type, int flags, struct prefix_ipv6 *p, /* Set default distance by route type. */ if (!distance) distance = route_info[type].distance; - + if (type == ZEBRA_ROUTE_BGP && CHECK_FLAG (flags, ZEBRA_FLAG_IBGP)) distance = 200; @@ -2336,7 +2337,7 @@ rib_add_ipv6 (int type, int flags, struct prefix_ipv6 *p, /* Allocate new rib structure. */ rib = XCALLOC (MTYPE_RIB, sizeof (struct rib)); - + rib->type = type; rib->distance = distance; rib->flags = flags; @@ -2367,7 +2368,7 @@ rib_add_ipv6 (int type, int flags, struct prefix_ipv6 *p, /* Free implicit route.*/ if (same) rib_delnode (rn, same); - + route_unlock_node (rn); return 0; } @@ -2393,7 +2394,7 @@ rib_delete_ipv6 (int type, int flags, struct prefix_ipv6 *p, table = vrf_table (AFI_IP6, SAFI_UNICAST, 0); if (! table) return 0; - + /* Lookup route node. */ rn = route_node_lookup (table, (struct prefix *) p); if (! rn) @@ -2487,11 +2488,11 @@ rib_delete_ipv6 (int type, int flags, struct prefix_ipv6 *p, if (same) rib_delnode (rn, same); - + route_unlock_node (rn); return 0; } - + /* Install static route into rib. */ static void static_install_ipv6 (struct prefix *p, struct static_ipv6 *si) @@ -2540,7 +2541,7 @@ static_install_ipv6 (struct prefix *p, struct static_ipv6 *si) { /* This is new static route. */ rib = XCALLOC (MTYPE_RIB, sizeof (struct rib)); - + rib->type = ZEBRA_ROUTE_STATIC; rib->distance = si->distance; rib->metric = 0; @@ -2608,7 +2609,7 @@ static_uninstall_ipv6 (struct prefix *p, struct static_ipv6 *si) { if (CHECK_FLAG (rib->status, RIB_ENTRY_REMOVED)) continue; - + if (rib->type == ZEBRA_ROUTE_STATIC && rib->distance == si->distance) break; } @@ -2630,7 +2631,7 @@ static_uninstall_ipv6 (struct prefix *p, struct static_ipv6 *si) route_unlock_node (rn); return; } - + /* Check nexthop. */ if (rib->nexthop_num == 1) { @@ -2664,12 +2665,12 @@ static_add_ipv6 (struct prefix *p, u_char type, struct in6_addr *gate, stable = vrf_static_table (AFI_IP6, SAFI_UNICAST, vrf_id); if (! stable) return -1; - + if (!gate && (type == STATIC_IPV6_GATEWAY || type == STATIC_IPV6_GATEWAY_IFNAME)) return -1; - - if (!ifname && + + if (!ifname && (type == STATIC_IPV6_GATEWAY_IFNAME || type == STATIC_IPV6_IFNAME)) return -1; @@ -2679,7 +2680,7 @@ static_add_ipv6 (struct prefix *p, u_char type, struct in6_addr *gate, /* Do nothing if there is a same static route. */ for (si = rn->info; si; si = si->next) { - if (distance == si->distance + if (distance == si->distance && type == si->type && (! gate || IPV6_ADDR_SAME (gate, &si->ipv6)) && (! ifname || strcmp (ifname, si->ifname) == 0)) @@ -2757,7 +2758,7 @@ static_delete_ipv6 (struct prefix *p, u_char type, struct in6_addr *gate, /* Find same static route is the tree */ for (si = rn->info; si; si = si->next) - if (distance == si->distance + if (distance == si->distance && type == si->type && (! gate || IPV6_ADDR_SAME (gate, &si->ipv6)) && (! ifname || strcmp (ifname, si->ifname) == 0)) @@ -2780,7 +2781,7 @@ static_delete_ipv6 (struct prefix *p, u_char type, struct in6_addr *gate, rn->info = si->next; if (si->next) si->next->prev = si->prev; - + /* Free static route configuration. */ if (ifname) XFREE (0, si->ifname); @@ -2789,14 +2790,14 @@ static_delete_ipv6 (struct prefix *p, u_char type, struct in6_addr *gate, return 1; } #endif /* HAVE_IPV6 */ - + /* RIB update function. */ void rib_update (void) { struct route_node *rn; struct route_table *table; - + table = vrf_table (AFI_IP, SAFI_UNICAST, 0); if (table) for (rn = route_top (table); rn; rn = route_next (rn)) @@ -2810,7 +2811,7 @@ rib_update (void) rib_queue_add (&zebrad, rn); } - + /* Remove all routes which comes from non main table. */ static void rib_weed_table (struct route_table *table) @@ -2841,7 +2842,7 @@ rib_weed_tables (void) rib_weed_table (vrf_table (AFI_IP, SAFI_UNICAST, 0)); rib_weed_table (vrf_table (AFI_IP6, SAFI_UNICAST, 0)); } - + /* Delete self installed routes after zebra is relaunched. */ static void rib_sweep_table (struct route_table *table) @@ -2860,7 +2861,7 @@ rib_sweep_table (struct route_table *table) if (CHECK_FLAG (rib->status, RIB_ENTRY_REMOVED)) continue; - if (rib->type == ZEBRA_ROUTE_KERNEL && + if (rib->type == ZEBRA_ROUTE_KERNEL && CHECK_FLAG (rib->flags, ZEBRA_FLAG_SELFROUTE)) { ret = rib_uninstall_kernel (rn, rib); @@ -2877,7 +2878,7 @@ rib_sweep_route (void) rib_sweep_table (vrf_table (AFI_IP, SAFI_UNICAST, 0)); rib_sweep_table (vrf_table (AFI_IP6, SAFI_UNICAST, 0)); } - + /* Close RIB and clean up kernel routes. */ static void rib_close_table (struct route_table *table) @@ -2902,7 +2903,7 @@ rib_close (void) rib_close_table (vrf_table (AFI_IP, SAFI_UNICAST, 0)); rib_close_table (vrf_table (AFI_IP6, SAFI_UNICAST, 0)); } - + /* Routing information base initialize. */ void rib_init (void) |