diff options
author | Chris Hall <GMCH@hestia.halldom.com> | 2010-02-16 09:52:14 +0000 |
---|---|---|
committer | Chris Hall <GMCH@hestia.halldom.com> | 2010-02-16 09:52:14 +0000 |
commit | 9856e17cf2495d1f7db16e866f16bc4a8447524d (patch) | |
tree | 260d0c56610ad8f8db533737a59cbda33665752f | |
parent | 3b9932d5f7cdeac29a81bceeb190479b675a0435 (diff) | |
download | quagga-9856e17cf2495d1f7db16e866f16bc4a8447524d.tar.bz2 quagga-9856e17cf2495d1f7db16e866f16bc4a8447524d.tar.xz |
Revised thread/timer handling, work queue and scheduling.
Updated quagga thread handling to use qtimers when using the new
qpnexus -- so all timers are qtimers in the new scheme.
Updated work queue handling so that each work queue item is a single
malloced structure, not three. (Only bgpd and zebra use the work
queue system.)
When using qpnexus the background thread queue is no longer a timer
queue, but simply a list of pending background threads. When a
background thread is waiting on a timer, it is in the qtimer pile,
same like any other thread.
When using qpnexus, the only remaining quagga thread queues are the
event and ready queues.
Revised the qpnexus loop so that only when there is nothing else to
do will it consider the background threads.
Revised write I/O in the BGP Engine so that all writing is via the
connection's write buffer. Revised the write I/O in the Routeing
Engine, so that it passes groups of updates in a single mqueue
message. This all reduces the number of TCP packets sent (because
BGP messages are collected together in the connection's write buffer)
and reduces the number of mqueue messages involved.
(No need for TCP_CORK.)
Code and comments review for the new code.
modified: bgpd/bgp_advertise.c
modified: bgpd/bgp_common.h
modified: bgpd/bgp_connection.c
modified: bgpd/bgp_connection.h
modified: bgpd/bgp_engine.h
modified: bgpd/bgp_fsm.c
modified: bgpd/bgp_main.c
modified: bgpd/bgp_msg_read.c
modified: bgpd/bgp_msg_write.c
modified: bgpd/bgp_network.c
modified: bgpd/bgp_packet.c
modified: bgpd/bgp_packet.h
modified: bgpd/bgp_peer.c
modified: bgpd/bgp_peer_index.h
modified: bgpd/bgp_route.c
modified: bgpd/bgp_route_refresh.h
modified: bgpd/bgp_session.c
modified: bgpd/bgp_session.h
modified: bgpd/bgpd.c
new file: bgpd/bgpd.cx
modified: lib/mqueue.h
modified: lib/qpnexus.c
modified: lib/qpnexus.h
modified: lib/qpselect.c
modified: lib/qtimers.c
modified: lib/qtimers.h
modified: lib/sigevent.c
modified: lib/stream.c
modified: lib/stream.h
modified: lib/thread.c
modified: lib/thread.h
modified: lib/workqueue.c
modified: lib/workqueue.h
modified: tests/heavy-wq.c
modified: zebra/zebra_rib.c
35 files changed, 6281 insertions, 1157 deletions
diff --git a/bgpd/bgp_advertise.c b/bgpd/bgp_advertise.c index 90579e7c..04f1f847 100644 --- a/bgpd/bgp_advertise.c +++ b/bgpd/bgp_advertise.c @@ -300,7 +300,7 @@ bgp_adj_out_unset (struct bgp_node *rn, struct peer *peer, struct prefix *p, bgp_advertise_fifo_add(&peer->sync[afi][safi]->withdraw, adv); /* Schedule packet write. */ - bgp_write(peer); + bgp_write(peer, NULL) ; } else { diff --git a/bgpd/bgp_common.h b/bgpd/bgp_common.h index f6f06446..19db9d8a 100644 --- a/bgpd/bgp_common.h +++ b/bgpd/bgp_common.h @@ -107,7 +107,7 @@ enum bgp_session_events bgp_session_null_event = 0, bgp_session_eEstablished, /* session state -> sEstablished */ - bgp_session_eDisabled, /* disabled by Peering Engine */ + bgp_session_eDisabled, /* disabled by Routeing Engine */ bgp_session_eStart, /* enter sConnect/sAccept from sIdle */ bgp_session_eRetry, /* loop round in sConnect/sAccept */ diff --git a/bgpd/bgp_connection.c b/bgpd/bgp_connection.c index 8320ae49..1c427318 100644 --- a/bgpd/bgp_connection.c +++ b/bgpd/bgp_connection.c @@ -93,7 +93,6 @@ static const char* bgp_connection_tags[] = static void bgp_connection_init_host(bgp_connection connection, const char* tag) ; -static void bgp_write_buffer_init_new(bgp_wbuffer wb, size_t size) ; static void bgp_write_buffer_free(bgp_wbuffer wb) ; /*------------------------------------------------------------------------------ @@ -122,7 +121,8 @@ bgp_connection_init_new(bgp_connection connection, bgp_session session, * * comatose not comatose * * next NULL -- not on the connection queue * * prev NULL -- not on the connection queue - * * post bgp_fsm_null_event + * * follow_on bgp_fsm_null_event + * * exception bgp_session_null_event * * fsm_active not active * * notification NULL -- none received or sent * * err no error, so far @@ -138,15 +138,11 @@ bgp_connection_init_new(bgp_connection connection, bgp_session session, * * msg_type none -- set when reading message * * msg_size none -- set when reading message * * notification_pending nothing pending - * * wbuff all pointers NULL -- empty buffer - * *except* must set limit so is not "full". + * * wbuff all pointers NULL -- empty but not writable */ - - confirm(bgp_fsm_sInitial == 0) ; - confirm(bgp_fsm_null_event == 0) ; - - connection->wbuff.limit = connection->wbuff.base + - bgp_write_buffer_full_threshold ; + confirm(bgp_fsm_sInitial == 0) ; + confirm(bgp_fsm_null_event == 0) ; + confirm(bgp_session_null_event == 0) ; /* Link back to session, point at its mutex and point session here */ connection->session = session ; @@ -327,8 +323,8 @@ bgp_connection_exit(bgp_connection connection) static void bgp_connection_free(bgp_connection connection) { - assert( (connection->state == bgp_fsm_sStopping) - && (connection->session == NULL) + assert( (connection->state == bgp_fsm_sStopping) + && (connection->session == NULL) && ( (connection->lock_count == 0) || (connection->lock_count == CUT_LOOSE_LOCK_COUNT) ) ) ; @@ -353,20 +349,22 @@ bgp_connection_free(bgp_connection connection) } ; /*------------------------------------------------------------------------------ - * Allocate new write buffer and initialise pointers + * If required, allocate new write buffer. + * Initialise pointers empty and writable. * - * NB: assumes structure has been zeroised by the initialisation of the - * enclosing connection. + * NB: structure was zeroised the enclosing connection was initialised. + * Buffer may have been allocated since then. */ static void -bgp_write_buffer_init_new(bgp_wbuffer wb, size_t size) +bgp_write_buffer_init(bgp_wbuffer wb, size_t size) { - assert(wb->base == NULL) ; - - wb->base = XMALLOC(MTYPE_STREAM_DATA, size) ; - wb->limit = wb->base + size ; + if (wb->base == NULL) + { + wb->base = XMALLOC(MTYPE_STREAM_DATA, size) ; + wb->limit = wb->base + size ; + } ; - wb->p_in = wb->p_out = wb->base ; + bgp_write_buffer_reset(wb) ; } ; /*------------------------------------------------------------------------------ @@ -376,7 +374,9 @@ static void bgp_write_buffer_free(bgp_wbuffer wb) { if (wb->base != NULL) - XFREE(MTYPE_STREAM_DATA, wb->base) ; + XFREE(MTYPE_STREAM_DATA, wb->base) ; /* sets wb->base = NULL */ + + wb->p_in = wb->p_out = wb->limit = wb->base; } ; /*============================================================================== @@ -449,12 +449,18 @@ bgp_connection_queue_del(bgp_connection connection) * pending queue (success) or remove connection from the pending queue. * * This is also where connections come to die. + * + * Returns: 0 => nothing to do + * 1 => dealt with one or more queued bits of work */ -extern void +extern int bgp_connection_queue_process(void) { mqueue_block mqb ; + if (bgp_connection_queue == NULL) + return 0 ; + while (bgp_connection_queue != NULL) { /* select the first in the queue, and step to the next */ @@ -486,6 +492,8 @@ bgp_connection_queue_process(void) if (mqb == mqueue_local_head(&connection->pending_queue)) bgp_connection_queue_del(connection) ; } ; + + return 1 ; } ; /*------------------------------------------------------------------------------ @@ -527,17 +535,29 @@ bgp_connection_add_pending(bgp_connection connection, mqueue_block mqb, * * Expects connection to either be newly created or recently closed. * + * For connect() connections this is done at connect() time, so before any + * connection comes up. + * + * For accept() connections this is done at accept() time, so when the + * connection comes up. + * + * The file is disabled in all modes. + * + * To complete the process must bgp_connection_start(), which resets the write + * buffer (allocating if required), and ensures that all is ready to read/write. + * * Resets: * * * closes any file that may be lingering (should never be) - * * resets all buffering (should all be empty) + * * reset all stream buffers to empty (should already be) + * * set write buffer unwritable * * Sets: * * * if secondary connection, turn off accept() - * * sets the qfile and fd ready for use + * * sets the qfile and fd ready for use -- disabled in all modes * * clears err -- must be OK so far - * * discards any open_state and notification + * * discards any open_state * * copies hold_timer_interval and keep_alive_timer_interval from session * * Expects: @@ -545,13 +565,15 @@ bgp_connection_add_pending(bgp_connection connection, mqueue_block mqb, * * links to/from session to be set up (including ordinal) * * timers to be initialised * * log and host to be set up - * * buffers to exist + * * stream buffers to exist * * Does not touch: * - * * state of the connection (including post event) + * * state of the connection (including exception and follow-on event) * * timers -- FSM looks after those * + * NB: nothing can be written until bgp_connection_start() has been called. + * * NB: requires the session to be LOCKED. */ extern void @@ -580,7 +602,65 @@ bgp_connection_open(bgp_connection connection, int fd) } ; /*------------------------------------------------------------------------------ + * Start connection which has just come up -- connect() or accept() + * + * Copy the local and remote addresses and note the effective address family. + * + * Make sure now have a write buffer, and set it empty and writable. + */ +extern void +bgp_connection_start(bgp_connection connection, union sockunion* su_local, + union sockunion* su_remote) +{ + sockunion_set_dup(&connection->su_local, su_local) ; + sockunion_set_dup(&connection->su_remote, su_remote) ; + + connection->paf = sockunion_family(connection->su_local) ; + + bgp_write_buffer_init(&connection->wbuff, bgp_wbuff_size) ; +} ; + +/*------------------------------------------------------------------------------ + * Stop connection + * + * * reset stream buffers + * * empty out any pending queue + * * remove from the BGP Engine connection queue, if there + * * clear session->active flag, so will not process any more messages + * that expect some message to be sent. + * * no notification pending (yet) + * + * If required: + * + * * set write buffer unwritable + * * disable file in write mode + * + * NB: requires the session to be LOCKED. + */ +static void +bgp_connection_stop(bgp_connection connection, int stop_writer) +{ + /* Reset all stream buffering empty. */ + stream_reset(connection->ibuf) ; + stream_reset(connection->obuf) ; + + connection->read_pending = 0 ; + connection->read_header = 0 ; + connection->notification_pending = 0 ; + + /* Empty out the pending queue and remove from connection queue */ + mqueue_local_reset_keep(&connection->pending_queue) ; + bgp_connection_queue_del(connection) ; + + /* If required: set write buffer *unwritable* (and empty). */ + if (stop_writer) + bgp_write_buffer_unwritable(&connection->wbuff) ; +} ; + +/*------------------------------------------------------------------------------ * Enable connection for accept() + * + * NB: requires the session to be LOCKED. */ extern void bgp_connection_enable_accept(bgp_connection connection) @@ -590,6 +670,8 @@ bgp_connection_enable_accept(bgp_connection connection) /*------------------------------------------------------------------------------ * Disable connection for accept() -- assuming still have session ! + * + * NB: requires the session to be LOCKED. */ extern void bgp_connection_disable_accept(bgp_connection connection) @@ -605,7 +687,8 @@ bgp_connection_disable_accept(bgp_connection connection) * * if there is an fd, close it * * if qfile is active, remove it * * forget any addresses - * * reset all buffering to empty + * * reset all stream buffers to empty + * * reset write buffer to unwritable * * empties the pending queue -- destroying all messages * * * for secondary connection: disable accept @@ -630,6 +713,8 @@ bgp_connection_disable_accept(bgp_connection connection) * * bgp_connection_free() -- to finally discard * * * bgp_connection_full_close() -- can do this again + * + * NB: requires the session to be LOCKED. */ extern void bgp_connection_full_close(bgp_connection connection, int unset_timers) @@ -658,20 +743,8 @@ bgp_connection_full_close(bgp_connection connection, int unset_timers) sockunion_unset(&connection->su_local) ; sockunion_unset(&connection->su_remote) ; - /* Reset all buffering empty. */ - stream_reset(connection->ibuf) ; - stream_reset(connection->obuf) ; - - connection->read_pending = 0 ; - connection->read_header = 0 ; - connection->notification_pending = 0 ; - - connection->wbuff.p_in = connection->wbuff.base ; - connection->wbuff.p_out = connection->wbuff.base ; - - /* Empty out the pending queue and remove from connection queue */ - mqueue_local_reset_keep(&connection->pending_queue) ; - bgp_connection_queue_del(connection) ; + /* Bring connection to a stop. */ + bgp_connection_stop(connection, 1) ; } ; /*------------------------------------------------------------------------------ @@ -691,34 +764,39 @@ bgp_connection_full_close(bgp_connection connection, int unset_timers) * be written (at least as far as the write buffer). * * Everything else is left untouched. + * + * Returns: 1 => OK, ready to send NOTIFICATION now + * 0 => no file descriptor => no chance of sending NOTIFICATION + * + * NB: requires the session to be LOCKED. */ -extern void +extern int bgp_connection_part_close(bgp_connection connection) { + bgp_session session = connection->session ; bgp_wbuffer wb = &connection->wbuff ; int fd ; uint8_t* p ; bgp_size_t mlen ; - /* close the qfile and any associate file descriptor */ + /* Check that have a usable file descriptor */ fd = qps_file_fd(&connection->qf) ; - if (fd != fd_undef) - { - shutdown(fd, SHUT_RD) ; - qps_disable_modes(&connection->qf, qps_read_mbit) ; - } ; - /* Reset all input buffering. */ - stream_reset(connection->ibuf) ; + if (fd == fd_undef) + return 0 ; - connection->read_pending = 0 ; - connection->read_header = 0 ; + /* Shutdown the read side of this connection */ + shutdown(fd, SHUT_RD) ; + qps_disable_modes(&connection->qf, qps_read_mbit) ; - /* Reset obuf and purge wbuff. */ - stream_reset(connection->obuf) ; + /* Stop all buffering activity, except for write buffer. */ + bgp_connection_stop(connection, 0) ; - connection->notification_pending = 0 ; + /* Turn off session->active (if still attached). */ + if (session != NULL) + session->active = 0 ; + /* Purge wbuff of all but current partly written message (if any) */ if (wb->p_in != wb->p_out) /* will be equal if buffer is empty */ { passert(wb->p_out < wb->p_in) ; @@ -739,62 +817,49 @@ bgp_connection_part_close(bgp_connection connection) wb->p_in = wb->base + mlen ; } else - wb->p_in = wb->p_out = wb->base ; + bgp_write_buffer_reset(wb) ; - /* Empty out the pending queue and remove from connection queue */ - mqueue_local_reset_keep(&connection->pending_queue) ; - bgp_connection_queue_del(connection) ; + /* OK -- part closed, ready to send NOTIFICATION */ + return 1 ; } ; /*============================================================================== * Writing to BGP connection -- once TCP connection has come up. * - * All writing is done by preparing a BGP message in the "obuf" buffer, - * and then calling bgp_connection_write(). + * Nothing is written directly -- all writing is qpselect driven. * - * If possible, that is written away immediately. If not, then no further - * messages may be prepared until the buffer has been cleared. - * - * Write the contents of the "work" buffer. + * All writing is done by preparing a BGP message in a stream buffer, + * and then calling bgp_connection_write(). The contents of the stream buffer + * are transferred to the connection's write buffer. * * Returns true <=> able to write the entire buffer without blocking. */ -static int bgp_connection_write_direct(bgp_connection connection, - struct stream* s) ; static void bgp_connection_write_action(qps_file qf, void* file_info) ; /*------------------------------------------------------------------------------ - * Write the contents of the given stream, if possible - * - * Writes everything or nothing. + * Write the contents of the given stream * - * If the write buffer is empty, then will attempt to write directly to the - * socket, buffering anything that cannot be sent immediately. Any errors - * encountered in this process generate an FSM event. + * Writes everything or FATAL error. * - * In case it is relevant, identifies when the data has been written all the - * way into the TCP buffer. + * Returns: 1 => written to wbuff -- stream reset, empty * - * Returns: 2 => written to TCP -- it's gone -- stream reset, empty - * 1 => written to wbuff -- waiting for socket -- stream reset, empty - * 0 => nothing written -- insufficient space in wbuff - * -1 => failed -- error event generated + * NB: actual I/O occurs in the qpselect action function -- so this cannot + * fail ! */ extern int bgp_connection_write(bgp_connection connection, struct stream* s) { bgp_wbuffer wb = &connection->wbuff ; - if (bgp_write_buffer_empty(wb)) - { - /* write buffer is empty -- attempt to write directly */ - return bgp_connection_write_direct(connection, s) ; - } ; - - /* Write nothing if cannot write everything */ + /* FATAL error if cannot write everything. */ if (bgp_write_buffer_cannot(wb, stream_pending(s))) - return 0 ; + zabort("Write buffer does not have enough room") ; + + /* If buffer is empty, enable write mode */ + if (bgp_write_buffer_empty(wb)) + qps_enable_mode(&connection->qf, qps_write_mnum, + bgp_connection_write_action) ; /* Transfer the obuf contents to the write buffer. */ wb->p_in = stream_transfer(wb->p_in, s, wb->limit) ; @@ -803,71 +868,19 @@ bgp_connection_write(bgp_connection connection, struct stream* s) } ; /*------------------------------------------------------------------------------ - * The write buffer is empty -- so try to write stream directly. - * - * If cannot empty the stream directly to the TCP buffers, transfer it to to - * the write buffer, and enable the qpselect action. - * (This is where the write buffer is allocated, if it hasn't yet been.) - * - * Either way, the stream is cleared and can be reused (unless failed). - * - * Returns: 2 => written to TCP -- it's gone -- stream reset, empty - * 1 => written to wbuff -- waiting for socket -- stream reset, empty - * -1 => failed -- error event generated - */ -enum { bgp_wbuff_size = BGP_MSG_MAX_L * 10 } ; - -static int -bgp_connection_write_direct(bgp_connection connection, struct stream* s) -{ - int ret ; - - ret = stream_flush_try(s, qps_file_fd(&connection->qf)) ; - - if (ret == 0) - return 2 ; /* Done: wbuff and stream are empty */ - - else if (ret > 0) - { - bgp_wbuffer wb = &connection->wbuff ; - - /* Partial write -- set up buffering, if required. */ - if (wb->base == NULL) - bgp_write_buffer_init_new(wb, bgp_wbuff_size) ; - - /* Transfer *entire* message to staging buffer */ - wb->p_in = stream_transfer(wb->base, s, wb->limit) ; - - wb->p_out = wb->p_in - ret ; /* output from here */ - - /* Must now be enabled to write */ - qps_enable_mode(&connection->qf, qps_write_mnum, - bgp_connection_write_action) ; - - return 1 ; /* Done: wbuff is not empty -- stream is */ - } ; - - /* write failed -- signal error and return failed */ - bgp_fsm_io_error(connection, errno) ; - - return -1 ; -} ; - -/*------------------------------------------------------------------------------ * Write Action for bgp connection. * * Empty the write buffer if we can. * * If empties that, disable write mode, then: * - * -- if notification is pending, then generate a notification sent event + * -- if notification is pending, generate a notification sent event * * -- otherwise: place connection on the connection queue, so can start to * flush out anything on the connection's pending queue. * - * If empty out everything, disable write mode. - * - * If encounter an error, generate TCP_fatal_error event. + * If encounter an error, generate TCP_fatal_error event, forcing buffer + * empty but unwritable. */ static void bgp_connection_write_action(qps_file qf, void* file_info) @@ -894,14 +907,16 @@ bgp_connection_write_action(qps_file qf, void* file_info) continue ; if ((ret != EAGAIN) && (ret != EWOULDBLOCK)) - bgp_fsm_io_error(connection, errno) ; - + { + bgp_write_buffer_unwritable(wb) ; + bgp_fsm_io_error(connection, errno) ; + } ; return ; } ; } ; /* Buffer is empty -- reset it and disable write mode */ - wb->p_out = wb->p_in = wb->base ; + bgp_write_buffer_reset(wb) ; qps_disable_modes(&connection->qf, qps_write_mbit) ; diff --git a/bgpd/bgp_connection.h b/bgpd/bgp_connection.h index 054cd953..d50d2985 100644 --- a/bgpd/bgp_connection.h +++ b/bgpd/bgp_connection.h @@ -90,31 +90,24 @@ enum bgp_fsm_events } ; /*============================================================================== - * BGP Connection Structure + * BGP Connection Structures * - * The BGP Connection is the main data structure for the BGP Engine. + *------------------------------------------------------------------------------ + * Write buffer for connection. * - * When a session terminates, or a connection is shut it may have a short - * independent life, if a NOTIFICATION message is pending. + * NB: when connection is initialised all the pointers are set NULL. * - */ - -/* Write buffer for connection. + * The buffer is not allocated until the TCP connection comes up. * * NB: p_out == p_in => buffer is empty * - * BUT: buffer is not allocated until required, and until then - * p_out == p_in == NULL -- empty does NOT imply usable ! + * BUT: p_out == limit => buffer is not writable. * - * AND: when buffer is emptied, p_out and p_in will be some way down the - * buffer. + * When connection is first initialised all pointers are NULL, so the + * buffer is "empty but not writable". * - * SO: before writing, check for base != NULL and set p_out = p_in = base. - * - * NB: before buffer is allocated base == NULL, but limit is set to NULL + n, - * so that buffer does not appear full. - * - * SO: not full does NOT imply that p_out/p_in/base are set, either ! + * When connection is opened, closed or fails, buffer is set into this + * "empty but not writable" state. */ typedef struct bgp_wbuffer* bgp_wbuffer ; struct bgp_wbuffer @@ -126,7 +119,17 @@ struct bgp_wbuffer uint8_t* limit ; } ; +/* Buffer is allocated for a number of maximum size BGP messages. */ +enum { bgp_wbuff_size = BGP_MSG_MAX_L * 10 } ; +/*============================================================================== + * BGP Connection Structure + * + * The BGP Connection is the main data structure for the BGP Engine. + * + * When a session terminates, or a connection is shut it may have a short + * independent life, if a NOTIFICATION message is pending. + */ struct bgp_connection { bgp_session session ; /* session connection belongs to */ @@ -147,7 +150,7 @@ struct bgp_connection int fsm_active ; /* active in FSM counter */ bgp_fsm_event_t follow_on ; /* event raised within FSM */ - bgp_session_event_t except ; /* exception posted here */ + bgp_session_event_t exception; /* exception posted here */ bgp_notify notification ; /* if any sent/received */ int err ; /* erno, if any */ @@ -201,6 +204,9 @@ extern void bgp_connection_open(bgp_connection connection, int fd) ; extern void +bgp_connection_start(bgp_connection connection, union sockunion* su_local, + union sockunion* su_remote) ; +extern void bgp_connection_enable_accept(bgp_connection connection) ; extern void @@ -218,7 +224,7 @@ bgp_connection_full_close(bgp_connection connection, int unset_timers) ; #define bgp_connection_close(conn) bgp_connection_full_close(conn, 0) #define bgp_connection_close_down(conn) bgp_connection_full_close(conn, 1) -extern void +extern int bgp_connection_part_close(bgp_connection connection) ; extern void @@ -236,7 +242,7 @@ bgp_connection_queue_add(bgp_connection connection) ; extern void bgp_connection_queue_del(bgp_connection connection) ; -extern void +extern int bgp_connection_queue_process(void) ; Inline int @@ -251,12 +257,28 @@ bgp_connection_add_pending(bgp_connection connection, mqueue_block mqb, bgp_connection* is_pending) ; /*------------------------------------------------------------------------------ + * Set buffer *unwritable* (buffer appears full, but nothing pending). + */ +Inline void +bgp_write_buffer_unwritable(bgp_wbuffer wb) +{ + wb->p_in = wb->p_out = wb->limit ; +} ; + +/*------------------------------------------------------------------------------ + * If allocated: set buffer empty + * If unallocated: buffer remains *unwritable* + */ +Inline void +bgp_write_buffer_reset(bgp_wbuffer wb) +{ + wb->p_in = wb->p_out = wb->base ; +} ; + +/*------------------------------------------------------------------------------ * See if do NOT have enough room for what want to write PLUS 1. * - * NB: before using the buffer the caller MUST ensure it has been allocated. - * - * Unallocated buffer is made to appear to have room for one maximum - * size BGP message. + * NB: there is never any room in an unallocated buffer. */ Inline int bgp_write_buffer_cannot(bgp_wbuffer wb, size_t want) @@ -267,30 +289,35 @@ bgp_write_buffer_cannot(bgp_wbuffer wb, size_t want) /*------------------------------------------------------------------------------ * Full if NOT enough room for a maximum size BGP message + 1 * - * NB: this will be FALSE if the buffer has not been allocated -- because can - * allocate a buffer and proceed if required. + * NB: there is never any room in an unallocated buffer. */ enum { bgp_write_buffer_full_threshold = BGP_MSG_MAX_L + 1 } ; Inline int -bgp_write_buffer_full(bgp_wbuffer wb) +bgp_write_buffer_cannot_max(bgp_wbuffer wb) { return bgp_write_buffer_cannot(wb, BGP_MSG_MAX_L) ; } ; /*------------------------------------------------------------------------------ - * Empty if in and out pointers are equal. - * - * NB: buffer is empty if it has not yet been allocated. - * - * NOT empty => allocated. + * See if buffer has anything in it. * - * NB: empty does NOT imply that both pointers are at the start of the buffer. + * If empty, ensures that the buffer has been allocated, and sets the pointers + * to the start of the buffer -- so all set to go. */ Inline int bgp_write_buffer_empty(bgp_wbuffer wb) { - return (wb->p_out == wb->p_in) ; + if (wb->p_out < wb->p_in) + return 0 ; /* not empty => has buffer */ + + dassert(wb->p_out == wb->p_in) ; + + passert(wb->base != NULL) ; /* must have buffer */ + + bgp_write_buffer_reset(wb) ; /* pointers to start of buffer */ + + return 1 ; /* empty and all ready to go */ } ; /*------------------------------------------------------------------------------ @@ -299,8 +326,6 @@ bgp_write_buffer_empty(bgp_wbuffer wb) * NB: if returns 0, may not yet have been allocated. * * > 0 => allocated. - * - * NB: 0 does NOT imply that both pointers are at the start of the buffer. */ Inline int bgp_write_buffer_pending(bgp_wbuffer wb) @@ -313,9 +338,9 @@ bgp_write_buffer_pending(bgp_wbuffer wb) * As above, for connection */ Inline int -bgp_connection_write_full(bgp_connection connection) +bgp_connection_write_cannot_max(bgp_connection connection) { - return bgp_write_buffer_full(&connection->wbuff) ; + return bgp_write_buffer_cannot_max(&connection->wbuff) ; } ; /*------------------------------------------------------------------------------ diff --git a/bgpd/bgp_engine.h b/bgpd/bgp_engine.h index fdbcef70..3a751885 100644 --- a/bgpd/bgp_engine.h +++ b/bgpd/bgp_engine.h @@ -51,7 +51,7 @@ struct queue_stats } ; static struct queue_stats bgp_engine_queue_stats ; -static struct queue_stats peering_engine_queue_stats ; +static struct queue_stats routing_engine_queue_stats ; Inline void bgp_queue_logging(const char* name, mqueue_queue mq, struct queue_stats* stats) @@ -62,6 +62,8 @@ bgp_queue_logging(const char* name, mqueue_queue mq, struct queue_stats* stats) ++stats->count ; + qpt_mutex_lock(&mq->mutex) ; + if (mq->count > stats->max) stats->max = mq->count ; if (mq->count > stats->recent) @@ -70,7 +72,10 @@ bgp_queue_logging(const char* name, mqueue_queue mq, struct queue_stats* stats) stats->total += mq->count ; if (stats->count < 1000) - return ; + { + qpt_mutex_unlock(&mq->mutex) ; + return ; + } ; my_count = 0 ; @@ -83,6 +88,8 @@ bgp_queue_logging(const char* name, mqueue_queue mq, struct queue_stats* stats) assert(my_count == mq->count) ; + qpt_mutex_unlock(&mq->mutex) ; + average = stats->total ; average /= stats->count ; @@ -121,24 +128,24 @@ bgp_to_bgp_engine_priority(mqueue_block mqb) * */ -/* Send given message to the Peering Engine -- ordinary +/* Send given message to the Routing Engine -- ordinary */ Inline void -bgp_to_peering_engine(mqueue_block mqb) +bgp_to_routing_engine(mqueue_block mqb) { mqueue_enqueue(routing_nexus->queue, mqb, 0) ; - bgp_queue_logging("Peering Engine", routing_nexus->queue, - &peering_engine_queue_stats) ; + bgp_queue_logging("Routing Engine", routing_nexus->queue, + &routing_engine_queue_stats) ; } ; -/* Send given message to the Peering Engine -- priority +/* Send given message to the Routing Engine -- priority */ Inline void -bgp_to_peering_engine_priority(mqueue_block mqb) +bgp_to_routing_engine_priority(mqueue_block mqb) { mqueue_enqueue(routing_nexus->queue, mqb, 1) ; - bgp_queue_logging("Peering Engine", routing_nexus->queue, - &peering_engine_queue_stats) ; + bgp_queue_logging("Routing Engine", routing_nexus->queue, + &routing_engine_queue_stats) ; } ; #endif /* QUAGGA_BGP_ENGINE_H */ diff --git a/bgpd/bgp_fsm.c b/bgpd/bgp_fsm.c index 8a8be52d..77afa12f 100644 --- a/bgpd/bgp_fsm.c +++ b/bgpd/bgp_fsm.c @@ -57,7 +57,7 @@ * * In general the FSM manages connections, but there is some interaction with * the session. In particular, exceptions are expressed as session_eXXX - * values -- which are passed to the Peering Engine as session events. The + * values -- which are passed to the Routing Engine as session events. The * handling of FSM events is depends mostly on the FSM state, but any * exception influences that too. * @@ -273,7 +273,7 @@ * notification -- any NOTIFICATION message * err -- any I/O or other error * - * on exit from the FSM this information is passed to the Peering Engine. + * on exit from the FSM this information is passed to the Routing Engine. * * Can throw exceptions within the FSM, as discussed above. * @@ -446,7 +446,7 @@ bgp_fsm_enable_session(bgp_session session) * */ static void -bgp_fsm_throw(bgp_connection connection, bgp_session_event_t except, +bgp_fsm_throw(bgp_connection connection, bgp_session_event_t exception, bgp_notify notification, int err, bgp_fsm_event_t event) ; static bgp_fsm_state_t @@ -548,10 +548,10 @@ bgp_fsm_disable_session(bgp_session session, bgp_notify notification) * fsm_active/follow_on mechanism looks after this. */ extern void -bgp_fsm_exception(bgp_connection connection, bgp_session_event_t except, +bgp_fsm_exception(bgp_connection connection, bgp_session_event_t exception, bgp_notify notification) { - bgp_fsm_throw(connection, except, notification, 0, bgp_fsm_eBGP_Stop) ; + bgp_fsm_throw(connection, exception, notification, 0, bgp_fsm_eBGP_Stop) ; } ; /*------------------------------------------------------------------------------ @@ -559,7 +559,7 @@ bgp_fsm_exception(bgp_connection connection, bgp_session_event_t except, * * A connection will discard any sibling if: * - * * the session is being disabled (by the Peering Engine) + * * the session is being disabled (by the Routing Engine) * * * an invalid event is bringing down the session * @@ -655,7 +655,7 @@ bgp_fsm_io_error(bgp_connection connection, int err) * This is used by the connect() and accept() qpselect actions. It is also * used if a connect() attempt fails immediately. * - * If err == 0, then all is well: copy the local and remote sockunions + * If err == 0, then all is well: start the connection (can now write to it) * and generate TCP_connection_open event * * If err is one of: @@ -666,6 +666,9 @@ bgp_fsm_io_error(bgp_connection connection, int err) * these errors.) * * Other errors are reported as TCP_fatal_error. + * + * NB: in any case on entry to this function the file is *disabled* in all + * modes. */ extern void bgp_fsm_connect_completed(bgp_connection connection, int err, @@ -674,12 +677,8 @@ bgp_fsm_connect_completed(bgp_connection connection, int err, { if (err == 0) { + bgp_connection_start(connection, su_local, su_remote) ; bgp_fsm_event(connection, bgp_fsm_eTCP_connection_open) ; - - sockunion_set_dup(&connection->su_local, su_local) ; - sockunion_set_dup(&connection->su_remote, su_remote) ; - - connection->paf = sockunion_family(connection->su_local) ; } else if ( (err == ECONNREFUSED) || (err == ECONNRESET) @@ -697,12 +696,12 @@ bgp_fsm_connect_completed(bgp_connection connection, int err, * NB: takes responsibility for the notification structure. */ static void -bgp_fsm_throw(bgp_connection connection, bgp_session_event_t except, +bgp_fsm_throw(bgp_connection connection, bgp_session_event_t exception, bgp_notify notification, int err, bgp_fsm_event_t event) { - connection->except = except ; + connection->exception = exception ; bgp_notify_set(&connection->notification, notification) ; - connection->err = err ; + connection->err = err ; bgp_fsm_event(connection, event) ; } ; @@ -721,10 +720,10 @@ bgp_fsm_throw(bgp_connection connection, bgp_session_event_t except, * NB: takes responsibility for the notification structure. */ static bgp_fsm_state_t -bgp_fsm_throw_stop(bgp_connection connection, bgp_session_event_t except, +bgp_fsm_throw_stop(bgp_connection connection, bgp_session_event_t exception, bgp_notify notification) { - bgp_fsm_throw(connection, except, notification, 0, bgp_fsm_eBGP_Stop) ; + bgp_fsm_throw(connection, exception, notification, 0, bgp_fsm_eBGP_Stop) ; return connection->state ; } ; @@ -1477,7 +1476,7 @@ bgp_fsm_event(bgp_connection connection, bgp_fsm_event_t event) } while (--connection->fsm_active != 0) ; /* If required, post session event. */ - if (connection->except != bgp_session_null_event) + if (connection->exception != bgp_session_null_event) { int stopped = (connection->state == bgp_fsm_sStopping) ; int has_session = (connection->session != NULL) ; @@ -1488,16 +1487,16 @@ bgp_fsm_event(bgp_connection connection, bgp_fsm_event_t event) * connection->state will be Stopping is when the session is being * stopped. (eDiscard and eCollision go quietly to Stopping !) */ - if ((connection->except <= bgp_session_max_event) && has_session) - bgp_session_event(connection->session, connection->except, + if ((connection->exception <= bgp_session_max_event) && has_session) + bgp_session_event(connection->session, connection->exception, bgp_notify_take(&connection->notification), connection->err, connection->ordinal, stopped) ; /* Tidy up -- notification already cleared */ - connection->except = bgp_session_null_event ; - connection->err = 0 ; + connection->exception = bgp_session_null_event ; + connection->err = 0 ; bgp_notify_unset(&connection->notification) ; /* if any */ if (stopped && has_session) @@ -1552,12 +1551,12 @@ static bgp_fsm_action(bgp_fsm_enter) */ static bgp_fsm_action(bgp_fsm_stop) { - if (connection->except == bgp_session_null_event) + if (connection->exception == bgp_session_null_event) return bgp_fsm_invalid(connection, bgp_fsm_sStopping, event) ; - if ( (connection->except == bgp_session_eDisabled) - || (connection->except == bgp_session_eDiscard) - || (connection->except == bgp_session_eInvalid) ) + if ( (connection->exception == bgp_session_eDisabled) + || (connection->exception == bgp_session_eDiscard) + || (connection->exception == bgp_session_eInvalid) ) next_state = bgp_fsm_sStopping ; return bgp_fsm_catch(connection, next_state) ; @@ -1590,7 +1589,7 @@ static bgp_fsm_action(bgp_fsm_invalid) * * Enters either sConnect or sActive, depending on primary/secondary. * - * Throws a session_eStart exception so the Peering Engine gets to see this, + * Throws a session_eStart exception so the Routing Engine gets to see this, * and a follow-on fsm_eBGP_Start event to kick the connect() or accept() into * life. * @@ -2029,12 +2028,12 @@ bgp_fsm_catch(bgp_connection connection, bgp_fsm_state_t next_state) { bgp_notify send_notification ; - assert(connection->except != bgp_session_null_event) ; + assert(connection->exception != bgp_session_null_event) ; /* Have a notification to send iff not just received one, and is in a * suitable state to send one at all. */ - if (connection->except == bgp_session_eNOM_recv) + if (connection->exception == bgp_session_eNOM_recv) send_notification = NULL ; else { @@ -2046,54 +2045,36 @@ bgp_fsm_catch(bgp_connection connection, bgp_fsm_state_t next_state) send_notification = connection->notification ; } ; - /* If there is a NOTIFICATION to send, now is the time to do that. + /* If there is a NOTIFICATION to send, send it if possible. * Otherwise, close the connection but leave the timers. * * The state transition stuff looks after timers. In particular an error * in Connect/Active states leaves the ConnectRetryTimer running. */ - if (send_notification != NULL) + if ((send_notification != NULL) && bgp_connection_part_close(connection)) { - int ret ; - /* If not changing to stopping, we hold in the current state until * the NOTIFICATION process is complete. */ if (next_state != bgp_fsm_sStopping) next_state = connection->state ; - /* Close for reading and flush write buffers. */ - bgp_connection_part_close(connection) ; - + /* Make sure that cannot pop out a Keepalive ! */ qtimer_unset(&connection->keepalive_timer) ; - /* Write the message + /* Write the message */ + bgp_msg_write_notification(connection, send_notification) ; + + /* notification is sitting in the write buffer + * + * notification_pending is set, so write action will raise the required + * event in due course. * - * If the write fails it raises a suitable event, which will now be - * sitting waiting to be processed on the way out of the FSM. + * Set the HoldTimer to something suitable. Don't really expect this + * to happen in anything except sEstablished state -- but copes. (Is + * ready to wait 20 seconds in sStopping state and 5 otherwise.) */ - ret = bgp_msg_write_notification(connection, send_notification) ; - - connection->notification_pending = (ret >= 0) ; - /* is pending if not failed */ - if (ret > 0) - /* notification reached the TCP buffers instantly - * - * Send ourselves the good news ! - */ - bgp_fsm_notification_sent(connection) ; - - else if (ret == 0) - /* notification is sitting in the write buffer - * - * notification_pending is set, so write action will raise the required - * event in due course. - * - * Set the HoldTimer to something suitable. Don't really expect this - * to happen in anything except sEstablished state -- but copes. (Is - * ready to wait 20 seconds in sStopping state and 5 otherwise.) - */ - bgp_hold_timer_set(connection, + bgp_hold_timer_set(connection, (next_state == bgp_fsm_sStopping) ? 20 : 5) ; } else @@ -2106,7 +2087,7 @@ bgp_fsm_catch(bgp_connection connection, bgp_fsm_state_t next_state) /* If sStopping and not eDiscard, do in any sibling */ if ( (next_state == bgp_fsm_sStopping) - && (connection->except != bgp_session_eDiscard) ) + && (connection->exception != bgp_session_eDiscard) ) { bgp_connection sibling ; diff --git a/bgpd/bgp_main.c b/bgpd/bgp_main.c index 94f8c7e5..3c6d70aa 100644 --- a/bgpd/bgp_main.c +++ b/bgpd/bgp_main.c @@ -37,6 +37,7 @@ Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA #include "plist.h" #include "qpnexus.h" #include "qlib_init.h" +#include "thread.h" #include "bgpd/bgpd.h" #include "bgpd/bgp_attr.h" @@ -84,8 +85,8 @@ void sigusr2 (void); static void bgp_exit (int); static void init_second_stage(int pthreads); static void bgp_in_thread_init(void); -static qtime_mono_t routing_event_hook(enum qpn_priority priority); -static qtime_mono_t bgp_event_hook(enum qpn_priority priority); +static int routing_foreground(void); +static int routing_background(void); static void sighup_action(mqueue_block mqb, mqb_flag_t flag); static void sighup_enqueue(void); static void sigterm_action(mqueue_block mqb, mqb_flag_t flag); @@ -129,7 +130,6 @@ char *config_file = NULL; /* Have we done the second stage initialization? */ static int done_2nd_state_init = 0; - /* Process ID saved for use by init system */ static const char *pid_file = PATH_BGPD_PID; @@ -390,26 +390,32 @@ init_second_stage(int pthreads) /* if using pthreads create additional nexus */ if (qpthreads_enabled) { - bgp_nexus = qpn_init_new(bgp_nexus, 0); + bgp_nexus = qpn_init_new(bgp_nexus, 0); routing_nexus = qpn_init_new(routing_nexus, 0); } else { /* we all share the single nexus and single thread */ - bgp_nexus = cli_nexus; + bgp_nexus = cli_nexus; routing_nexus = cli_nexus; } + /* Tell thread stuff to use this qtimer pile */ + thread_set_qtimer_pile(routing_nexus->pile) ; + /* Nexus hooks. * Beware if !qpthreads_enabled then there is only 1 nexus object - * with all nexus pointers being aliases for it. So only one routine - * per hook for *all* nexus. + * with all nexus pointers being aliases for it. */ - bgp_nexus->in_thread_init = bgp_in_thread_init; - bgp_nexus->in_thread_final = bgp_close_listeners; - routing_nexus->event_hook[0] = routing_event_hook; - bgp_nexus->event_hook[1] = bgp_event_hook; - confirm(NUM_EVENT_HOOK >= 2); + bgp_nexus->in_thread_init = bgp_in_thread_init ; + bgp_nexus->in_thread_final = bgp_close_listeners ; + + qpn_add_hook_function(&routing_nexus->foreground, routing_foreground) ; + qpn_add_hook_function(&bgp_nexus->foreground, bgp_connection_queue_process) ; + + qpn_add_hook_function(&routing_nexus->background, routing_background) ; + + confirm(qpn_hooks_max >= 2) ; /* vty and zclient can use either nexus or threads. * For bgp client we always want nexus, regardless of pthreads. @@ -616,26 +622,18 @@ bgp_in_thread_init(void) bgp_open_listeners(bm->port, bm->address); } -/* legacy threads */ -static qtime_mono_t -routing_event_hook(enum qpn_priority priority) +/* legacy threads in routing engine */ +static int +routing_foreground(void) { - struct thread thread; - qtime_mono_t event_wait; - - while (thread_fetch_event (priority, master, &thread, &event_wait)) - thread_call (&thread); - - return event_wait; + return thread_dispatch(master) ; } -/* BGP local queued events */ -static qtime_mono_t -bgp_event_hook(enum qpn_priority priority) +/* background threads in routing engine */ +static int +routing_background(void) { - if (priority >= qpn_pri_fourth) - bgp_connection_queue_process(); - return 0; + return thread_dispatch_background(master) ; } /* SIGINT/TERM SIGHUP need to tell routing engine what to do */ diff --git a/bgpd/bgp_msg_read.c b/bgpd/bgp_msg_read.c index 48227364..b218c5cf 100644 --- a/bgpd/bgp_msg_read.c +++ b/bgpd/bgp_msg_read.c @@ -1373,7 +1373,7 @@ bgp_msg_update_receive (bgp_connection connection, bgp_size_t body_size) ++connection->session->stats.update_in ; connection->session->stats.update_time = time(NULL) ; - /* PRO TEM: pass raw update message across to Peering Engine */ + /* PRO TEM: pass raw update message across to Routing Engine */ /* TODO: decode update messages in the BGP Engine. */ bgp_session_update_recv(connection->session, connection->ibuf, body_size); } diff --git a/bgpd/bgp_msg_write.c b/bgpd/bgp_msg_write.c index ef843157..d61ba642 100644 --- a/bgpd/bgp_msg_write.c +++ b/bgpd/bgp_msg_write.c @@ -72,12 +72,12 @@ /*------------------------------------------------------------------------------ * Make NOTIFICATION message and dispatch. * - * NB: the write buffers will have been flushed -- so expect success ! + * NB: the write buffers MUST have been flushed -- so demand success ! * - * Returns: 2 => written to TCP -- it's gone - * 1 => written to wbuff -- waiting for socket - * 0 => nothing written -- insufficient space in wbuff - * -1 => failed -- error event generated + * Returns: 1 => written to wbuff -- qpselect will write from there + * + * NB: actual I/O occurs in the qpselect action function -- so this cannot + * fail ! * * NB: requires the session LOCKED -- connection-wise */ @@ -143,7 +143,12 @@ bgp_msg_write_notification(bgp_connection connection, bgp_notify notification) bgp_notify_free(text_form) ; } ; - /* Finally -- write the obuf away */ + /* Set flag so that write_action raises required event when buffer becomes + * empty. + */ + connection->notification_pending = 1 ; + + /* Finally -- write the obuf away */ return bgp_connection_write(connection, s) ; } ; @@ -156,10 +161,11 @@ bgp_msg_write_notification(bgp_connection connection, bgp_notify notification) * KEEPALIVE is sent in response to OPEN, and that MUST be sent. But if the * buffers are full at that point, something is broken ! * - * Returns: 2 => written to TCP -- it's gone - * 1 => written to wbuff -- waiting for socket - * 0 => nothing written -- insufficient space in wbuff - * -1 => failed -- error event generated + * Returns: 1 => written to wbuff -- qpselect will write from there + * 0 => nothing written -- no need, buffer not empty ! + * + * NB: actual I/O occurs in the qpselect action function -- so this cannot + * fail ! * * NB: requires the session LOCKED -- connection-wise */ @@ -208,10 +214,10 @@ bgp_open_capability_orf (struct stream *s, iAFI_t afi, iSAFI_t safi, * OPEN is the first message to be sent. If the buffers are not empty, * something is badly wrong ! * - * Returns: 2 => written to TCP -- it's gone - * 1 => written to wbuff -- waiting for socket - * 0 => nothing written -- wbuff was too full !!! - * -1 => failed -- error event generated + * Returns: 1 => written to wbuff -- qpselect will write from there + * + * NB: actual I/O occurs in the qpselect action function -- so this cannot + * fail ! * * NB: requires the session LOCKED -- connection-wise */ @@ -221,8 +227,6 @@ bgp_msg_send_open(bgp_connection connection, bgp_open_state open_state) struct stream *s = connection->obuf ; int length ; - assert(bgp_connection_write_empty(connection)) ; - ++connection->session->stats.open_out ; /* Make OPEN message header */ @@ -262,7 +266,7 @@ bgp_msg_send_open(bgp_connection connection, bgp_open_state open_state) /* Finally -- write the obuf away */ return bgp_connection_write(connection, s) ; -} +} ; enum { @@ -488,9 +492,11 @@ bgp_msg_orf_prefix(struct stream* s, uint8_t common, * * Supports the status quo, only Address-Prefix ORF. * - * Returns: > 0 => all written - * 0 => unable to write everything - * < 0 => failed -- error event generated + * Returns: 1 => written to wbuff -- qpselect will write from there + * 0 => nothing written -- insufficient space in wbuff + * + * NB: actual I/O occurs in the qpselect action function -- so this cannot + * fail ! * * NB: requires the session LOCKED -- connection-wise */ @@ -501,7 +507,6 @@ bgp_msg_send_route_refresh(bgp_connection connection, bgp_route_refresh rr) uint8_t msg_type ; flag_t done ; bgp_size_t msg_len ; - int ret ; ++connection->session->stats.refresh_out ; @@ -512,7 +517,7 @@ bgp_msg_send_route_refresh(bgp_connection connection, bgp_route_refresh rr) do { - if (bgp_connection_write_full(connection)) + if (bgp_connection_write_cannot_max(connection)) return 0 ; /* Construct BGP message header for new/old form ROUTE-REFRESH */ @@ -534,10 +539,7 @@ bgp_msg_send_route_refresh(bgp_connection connection, bgp_route_refresh rr) zlog_debug ("%s sending REFRESH_REQ for afi/safi: %d/%d length %d", connection->host, rr->afi, rr->safi, msg_len) ; - ret = bgp_connection_write(connection, s) ; - if (ret < 0) - return ret ; - + bgp_connection_write(connection, s) ; } while (!done) ; return done ; @@ -779,17 +781,18 @@ bgp_msg_orf_prefix(struct stream* s, uint8_t common, /*------------------------------------------------------------------------------ * Make UPDATE message and dispatch. * - * Returns: 2 => written to TCP -- it's gone - * 1 => written to wbuff -- waiting for socket + * Returns: 1 => written to wbuff -- qpselect will write from there * 0 => nothing written -- insufficient space in wbuff - * -1 => failed -- error event generated + * + * NB: actual I/O occurs in the qpselect action function -- so this cannot + * fail ! * * NB: requires the session LOCKED -- connection-wise */ extern int bgp_msg_send_update(bgp_connection connection, struct stream* s) { - if (bgp_connection_write_full(connection)) + if (bgp_connection_write_cannot_max(connection)) return 0 ; ++connection->session->stats.update_out ; @@ -804,19 +807,18 @@ bgp_msg_send_update(bgp_connection connection, struct stream* s) /*------------------------------------------------------------------------------ * Make End-of-RIB message and dispatch. * - * - * - * Returns: 2 => written to TCP -- it's gone - * 1 => written to wbuff -- waiting for socket + * Returns: 1 => written to wbuff -- qpselect will write from there * 0 => nothing written -- insufficient space in wbuff - * -1 => failed -- error event generated + * + * NB: actual I/O occurs in the qpselect action function -- so this cannot + * fail ! */ extern int bgp_msg_send_end_of_rib(bgp_connection connection, iAFI_t afi, iSAFI_t safi) { struct stream *s = connection->obuf ; - if (bgp_connection_write_full(connection)) + if (bgp_connection_write_cannot_max(connection)) return 0 ; /* Make UPDATE message header */ diff --git a/bgpd/bgp_network.c b/bgpd/bgp_network.c index 0c1072c9..fa1dbd37 100644 --- a/bgpd/bgp_network.c +++ b/bgpd/bgp_network.c @@ -479,7 +479,7 @@ bgp_accept_action(qps_file qf, void* file_info) * This is running in the BGP Engine thread, so cannot in any case be * foxed by the other connection making changes. * - * The session is active, so the Peering Engine will not make any changes + * The session is active, so the Routing Engine will not make any changes * except under the mutex, and will not destroy the session. */ diff --git a/bgpd/bgp_packet.c b/bgpd/bgp_packet.c index d565c265..8955be3b 100644 --- a/bgpd/bgp_packet.c +++ b/bgpd/bgp_packet.c @@ -87,21 +87,6 @@ bgp_packet_set_size (struct stream *s) return cp; } -/* Add new packet to the peer. */ -static void -bgp_packet_add (struct peer *peer, struct stream *s) -{ - /* Add packet to the end of list. */ - stream_fifo_push (peer->obuf, s); -} - -/* Free first packet. */ -static void -bgp_packet_delete (struct peer *peer) -{ - stream_free (stream_fifo_pop (peer->obuf)); -} - #if 0 /* Check file descriptor whether connect is established. */ static void @@ -437,8 +422,7 @@ bgp_default_update_send (struct peer *peer, struct attr *attr, #endif /* DEBUG */ /* Add packet to the peer. */ - bgp_packet_add (peer, stream_dup (s)); - bgp_write(peer); + bgp_write(peer, s); } /*------------------------------------------------------------------------------ @@ -513,8 +497,7 @@ bgp_default_withdraw_send (struct peer *peer, afi_t afi, safi_t safi) bgp_packet_set_size (s); /* Add packet to the peer. */ - bgp_packet_add (peer, stream_dup (s)); - bgp_write(peer); + bgp_write(peer, s); } /*------------------------------------------------------------------------------ @@ -616,69 +599,35 @@ bgp_write_proceed (struct peer *peer) /*------------------------------------------------------------------------------ * Write packets to the peer -- subject to the XON flow control. * - * Empties the obuf queue first. + * Takes an optional stream argument, if not NULL then must be peer->work, + * in which there is a message to be sent. * * Then processes the peer->sync structure to generate further updates. * * TODO: work out how bgp_routeadv_timer fits into this. */ int -bgp_write (bgp_peer peer) +bgp_write (bgp_peer peer, struct stream* s) { - u_char type; - struct stream *s; - int free_s ; + if (s != NULL) + stream_fifo_push(peer->obuf, stream_dup(s)) ; while (bgp_session_is_XON(peer)) { - free_s = 0 ; - - s = stream_fifo_head(peer->obuf) ; /* returns own stream */ - if (s != NULL) - free_s = 1 ; - else - { - s = bgp_write_packet(peer); /* uses peer->work */ - if (s == NULL) - break; - } ; - - bgp_session_update_send(peer->session, s); - - /* Retrieve BGP packet type. */ - stream_set_getp (s, BGP_MARKER_SIZE + 2); - type = stream_getc (s); + s = bgp_write_packet(peer); /* uses peer->work */ + if (s == NULL) + break; - switch (type) - { - case BGP_MSG_OPEN: - break; - case BGP_MSG_UPDATE: - break; - case BGP_MSG_NOTIFY: - /* Double start timer. */ - peer->v_start *= 2; - - /* Overflow check. */ - if (peer->v_start >= (60 * 2)) - peer->v_start = (60 * 2); - - assert(0); /* shouldn't get notifies through here */ - return 0; - case BGP_MSG_KEEPALIVE: - break; - case BGP_MSG_ROUTE_REFRESH_NEW: - case BGP_MSG_ROUTE_REFRESH_OLD: - break; - case BGP_MSG_CAPABILITY: - break; - } + stream_fifo_push (peer->obuf, stream_dup(s)) ; - /* OK we sent packet so delete it. */ - if (free_s) - bgp_packet_delete (peer); + /* Count down flow control, send fifo if hits BGP_XON_KICK */ + if (bgp_session_dec_flow_count(peer)) + bgp_session_update_send(peer->session, peer->obuf) ; + } ; - } + /* In any case, send what's in the FIFO */ + if (stream_fifo_head(peer->obuf) != NULL) + bgp_session_update_send(peer->session, peer->obuf) ; return 0; } @@ -842,7 +791,7 @@ bgp_notify_send_with_data (struct peer *peer, u_char code, u_char sub_code, peer->last_reset = PEER_DOWN_NOTIFY_SEND; } - bgp_peer_disable(peer, notification); + bgp_peer_disable(peer, notification); } /* Send BGP notify packet. */ @@ -1032,14 +981,14 @@ bgp_capability_send (struct peer *peer, afi_t afi, safi_t safi, int capability_code, int action) { struct stream *s; - struct stream *packet; int length; /* Adjust safi code. */ if (safi == SAFI_MPLS_VPN) safi = BGP_SAFI_VPNV4; - s = stream_new (BGP_MAX_PACKET_SIZE); + s = peer->work; + stream_reset (s); /* Make BGP update packet. */ bgp_packet_set_marker (s, BGP_MSG_CAPABILITY); @@ -1063,18 +1012,12 @@ bgp_capability_send (struct peer *peer, afi_t afi, safi_t safi, /* Set packet size. */ length = bgp_packet_set_size (s); - /* Make real packet. */ - packet = stream_dup (s); - stream_free (s); - - /* Add packet to the peer. */ - bgp_packet_add (peer, packet); - if (BGP_DEBUG (normal, NORMAL)) zlog_debug ("%s send message type %d, length (incl. header) %d", - peer->host, BGP_MSG_CAPABILITY, length); + peer->host, BGP_MSG_CAPABILITY, length); - bgp_write(peer); + /* Add packet to the peer. */ + bgp_write(peer, s); } #if 0 diff --git a/bgpd/bgp_packet.h b/bgpd/bgp_packet.h index 81937522..f0798846 100644 --- a/bgpd/bgp_packet.h +++ b/bgpd/bgp_packet.h @@ -44,7 +44,7 @@ Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA /* Packet send and receive function prototypes. */ extern int bgp_read (struct thread *); -extern int bgp_write (bgp_peer peer); +extern int bgp_write (bgp_peer peer, struct stream*); extern void bgp_keepalive_send (struct peer *); extern void bgp_open_send (struct peer *); diff --git a/bgpd/bgp_peer.c b/bgpd/bgp_peer.c index e6be06c7..16ec8bd9 100644 --- a/bgpd/bgp_peer.c +++ b/bgpd/bgp_peer.c @@ -202,6 +202,7 @@ bgp_session_has_established(bgp_peer peer) bgp_notify_unset(&(peer->session->notification)); /* Clear start timer value to default. */ + /* TODO: figure out where to increase the IdleHoldTimer */ peer->v_start = BGP_INIT_START_TIMER; /* Increment established count. */ @@ -547,7 +548,9 @@ bgp_timer_set (struct peer *peer) static int bgp_routeadv_timer (struct thread *thread) { - struct peer *peer; + struct peer *peer; + uint32_t jittered ; + uint32_t jitter ; peer = THREAD_ARG (thread); peer->t_routeadv = NULL; @@ -559,10 +562,21 @@ bgp_routeadv_timer (struct thread *thread) peer->synctime = time (NULL); - bgp_write(peer); + bgp_write(peer, NULL); + + /* Apply +/- 10% jitter to the route advertise timer. + * + * The time is in seconds, so for anything less than 10 seconds this forced + * to be +/- 1 second. + */ + jittered = jitter = peer->v_routeadv ; + if (jitter < 10) + jitter = 10 ; + jittered = (jittered * 90) + (rand() % (jitter * 20)) ; /* jitter is +/-10% */ + jittered = (jittered + 50) / 100 ; - BGP_TIMER_ON (peer->t_routeadv, bgp_routeadv_timer, - peer->v_routeadv); + /* TODO: move this to the Routeing Engine qtimer pile. */ + BGP_TIMER_ON (peer->t_routeadv, bgp_routeadv_timer, jittered) ; return 0; } @@ -1023,7 +1037,15 @@ void bgp_peer_disable(bgp_peer peer, bgp_notify notification) { if (bgp_session_is_active(peer->session)) + { + /* This code has been moved from where it was, in bgp_write */ + /* TODO: not clear whether v_start handling is still correct */ + peer->v_start *= 2; + if (peer->v_start >= (60 * 2)) + peer->v_start = (60 * 2); + bgp_session_disable(peer, notification); + } else { bgp_notify_free(notification) ; diff --git a/bgpd/bgp_peer_index.h b/bgpd/bgp_peer_index.h index 38d70907..c99ec710 100644 --- a/bgpd/bgp_peer_index.h +++ b/bgpd/bgp_peer_index.h @@ -40,7 +40,7 @@ typedef unsigned bgp_peer_id_t ; struct bgp_peer_index_entry { - bgp_peer peer ; /* used by Peering Engine */ + bgp_peer peer ; /* used by Routing Engine */ /* The accept pointer is used by the listening socket(s) to find the * session when it is prepared to accept a connection. diff --git a/bgpd/bgp_route.c b/bgpd/bgp_route.c index 7d3ad901..c5191b18 100644 --- a/bgpd/bgp_route.c +++ b/bgpd/bgp_route.c @@ -1457,10 +1457,12 @@ struct bgp_process_queue safi_t safi; }; +WQ_ARGS_SIZE_OK(bgp_process_queue) ; + static wq_item_status -bgp_process_rsclient (struct work_queue *wq, void *data) +bgp_process_rsclient (struct work_queue *wq, work_queue_item item) { - struct bgp_process_queue *pq = data; + struct bgp_process_queue *pq = work_queue_item_args(item) ; struct bgp *bgp = pq->bgp; struct bgp_node *rn = pq->rn; afi_t afi = pq->afi; @@ -1518,9 +1520,9 @@ bgp_process_rsclient (struct work_queue *wq, void *data) } static wq_item_status -bgp_process_main (struct work_queue *wq, void *data) +bgp_process_main (struct work_queue *wq, work_queue_item item) { - struct bgp_process_queue *pq = data; + struct bgp_process_queue *pq = work_queue_item_args(item) ; struct bgp *bgp = pq->bgp; struct bgp_node *rn = pq->rn; afi_t afi = pq->afi; @@ -1592,15 +1594,14 @@ bgp_process_main (struct work_queue *wq, void *data) } static void -bgp_processq_del (struct work_queue *wq, void *data) +bgp_processq_del (struct work_queue *wq, work_queue_item item) { - struct bgp_process_queue *pq = data; + struct bgp_process_queue *pq = work_queue_item_args(item); struct bgp_table *table = pq->rn->table; bgp_unlock (pq->bgp); bgp_unlock_node (pq->rn); bgp_table_unlock (table); - XFREE (MTYPE_BGP_PROCESS_QUEUE, pq); } static void @@ -1617,21 +1618,23 @@ bgp_process_queue_init (void) exit (1); } - bm->process_main_queue->spec.workfunc = &bgp_process_main; - bm->process_rsclient_queue->spec.workfunc = &bgp_process_rsclient; - bm->process_main_queue->spec.del_item_data = &bgp_processq_del; - bm->process_rsclient_queue->spec.del_item_data - = bm->process_main_queue->spec.del_item_data; - bm->process_main_queue->spec.max_retries - = bm->process_main_queue->spec.max_retries = 0; - bm->process_rsclient_queue->spec.hold - = bm->process_main_queue->spec.hold = 50; + bm->process_main_queue->spec.data = bm->master ; + bm->process_main_queue->spec.errorfunc = NULL ; + bm->process_main_queue->spec.workfunc = &bgp_process_main; + bm->process_main_queue->spec.del_item_data = &bgp_processq_del; + bm->process_main_queue->spec.completion_func = NULL ; + bm->process_main_queue->spec.max_retries = 0; + bm->process_main_queue->spec.hold = 50; + + bm->process_rsclient_queue->spec = bm->process_main_queue->spec ; + bm->process_rsclient_queue->spec.workfunc = &bgp_process_rsclient; } void bgp_process (struct bgp *bgp, struct bgp_node *rn, afi_t afi, safi_t safi) { struct bgp_process_queue *pqnode; + struct work_queue* wq ; /* already scheduled for processing? */ if (CHECK_FLAG (rn->flags, BGP_NODE_PROCESS_SCHEDULED)) @@ -1641,29 +1644,31 @@ bgp_process (struct bgp *bgp, struct bgp_node *rn, afi_t afi, safi_t safi) (bm->process_rsclient_queue == NULL) ) bgp_process_queue_init (); - pqnode = XCALLOC (MTYPE_BGP_PROCESS_QUEUE, - sizeof (struct bgp_process_queue)); - if (!pqnode) - return; - - /* all unlocked in bgp_processq_del */ - bgp_table_lock (rn->table); - pqnode->rn = bgp_lock_node (rn); - pqnode->bgp = bgp; - bgp_lock (bgp); - pqnode->afi = afi; - pqnode->safi = safi; - switch (rn->table->type) { case BGP_TABLE_MAIN: - work_queue_add (bm->process_main_queue, pqnode); + wq = bm->process_main_queue ; break; case BGP_TABLE_RSCLIENT: - work_queue_add (bm->process_rsclient_queue, pqnode); + wq = bm->process_rsclient_queue ; break; + default: + zabort("invalid rn->table->type") ; } + pqnode = work_queue_item_add(wq); + + if (!pqnode) + return; + + /* all unlocked in bgp_processq_del */ + bgp_table_lock (rn->table); + pqnode->rn = bgp_lock_node (rn); + pqnode->bgp = bgp; + bgp_lock (bgp); + pqnode->afi = afi; + pqnode->safi = safi; + return; } @@ -2672,17 +2677,18 @@ bgp_soft_reconfig_in (struct peer *peer, afi_t afi, safi_t safi) bgp_soft_reconfig_table (peer, afi, safi, table); } - struct bgp_clear_node_queue { struct bgp_node *rn; enum bgp_clear_route_type purpose; }; +WQ_ARGS_SIZE_OK(bgp_clear_node_queue) ; + static wq_item_status -bgp_clear_route_node (struct work_queue *wq, void *data) +bgp_clear_route_node (struct work_queue *wq, work_queue_item item) { - struct bgp_clear_node_queue *cnq = data; + struct bgp_clear_node_queue *cnq = work_queue_item_args(item) ; struct bgp_node *rn = cnq->rn; struct peer *peer = wq->spec.data; struct bgp_info *ri; @@ -2708,15 +2714,14 @@ bgp_clear_route_node (struct work_queue *wq, void *data) } static void -bgp_clear_node_queue_del (struct work_queue *wq, void *data) +bgp_clear_node_queue_del (struct work_queue *wq, work_queue_item item) { - struct bgp_clear_node_queue *cnq = data; + struct bgp_clear_node_queue *cnq = work_queue_item_args(item) ; struct bgp_node *rn = cnq->rn; struct bgp_table *table = rn->table; bgp_unlock_node (rn); bgp_table_unlock (table); - XFREE (MTYPE_BGP_CLEAR_NODE_QUEUE, cnq); } static void @@ -2823,11 +2828,9 @@ bgp_clear_route_table (struct peer *peer, afi_t afi, safi_t safi, /* both unlocked in bgp_clear_node_queue_del */ bgp_table_lock (rn->table); bgp_lock_node (rn); - cnq = XCALLOC (MTYPE_BGP_CLEAR_NODE_QUEUE, - sizeof (struct bgp_clear_node_queue)); - cnq->rn = rn; + cnq = work_queue_item_add(peer->clear_node_queue) ; + cnq->rn = rn; cnq->purpose = purpose; - work_queue_add (peer->clear_node_queue, cnq); break; } diff --git a/bgpd/bgp_route_refresh.h b/bgpd/bgp_route_refresh.h index b6e5eaf5..3afd997e 100644 --- a/bgpd/bgp_route_refresh.h +++ b/bgpd/bgp_route_refresh.h @@ -65,7 +65,10 @@ struct bgp_orf_entry } body ; } ; -typedef struct bgp_orf_entry bgp_orf_entry_t ; /* calm down Eclipse */ +/* (The typedef is required to stop Eclipse (3.4.2 with CDT 5.0) whining + * about first argument of offsetof().) + */ +typedef struct bgp_orf_entry bgp_orf_entry_t ; enum { bgp_orf_unknown_min_l = sizeof(struct bgp_orf_entry) diff --git a/bgpd/bgp_session.c b/bgpd/bgp_session.c index a2b49da5..9d17e36c 100644 --- a/bgpd/bgp_session.c +++ b/bgpd/bgp_session.c @@ -80,10 +80,10 @@ static void bgp_session_do_route_refresh_recv(mqueue_block mqb, mqb_flag_t flag) * change any shared item in the session, except under the mutex. And * even then it may make no sense ! * - * NB: a session reaches eDisabled when the Peering Engine has sent a disable + * NB: a session reaches eDisabled when the Routing Engine has sent a disable * request to the BGP Engine, AND an eDisabled event has come back. * - * While the Peering Engine is waiting for the eDisabled event, the session + * While the Routing Engine is waiting for the eDisabled event, the session * is in sLimping state. * * The BGP Engine's primary interest is in its (private) bgp_connection @@ -212,7 +212,7 @@ bgp_session_free(bgp_session session) } /*============================================================================== - * Peering Engine: enable session for given peer -- allocate if required. + * Routing Engine: enable session for given peer -- allocate if required. * * Sets up the session given the current state of the peer. If the state * changes, then need to disable the session and re-enable it again with new @@ -226,12 +226,12 @@ bgp_session_enable(bgp_peer peer) /* Set up session if required. Check session if already exists. * - * Only the Peering Engine creates sessions, so it is safe to pick up the + * Only the Routing Engine creates sessions, so it is safe to pick up the * peer->session pointer and test it. * * If session exists, it MUST be inactive. * - * Peering Engine does not require the mutex while the session is inactive. + * Routing Engine does not require the mutex while the session is inactive. */ session = peer->session ; @@ -348,7 +348,7 @@ bgp_session_do_enable(mqueue_block mqb, mqb_flag_t flag) } ; /*============================================================================== - * Peering Engine: disable session for given peer -- if enabled (!). + * Routing Engine: disable session for given peer -- if enabled (!). * * Passes any bgp_notify to the BGP Engine, which will dispose of it in due * course. @@ -398,7 +398,7 @@ bgp_session_disable(bgp_peer peer, bgp_notify notification) * * the disable is being issued in response to a stopped event from * the BGP Engine. * - * * the session is stopped, but the message to the Peering Engine is + * * the session is stopped, but the message to the Routing Engine is * still in its message queue. * * * the session is stopped while the disable message is in the @@ -410,11 +410,11 @@ bgp_session_disable(bgp_peer peer, bgp_notify notification) * * NB: The BGP Engine will discard any outstanding work for the session. * - * The Peering Engine should discard all further messages for this + * The Routing Engine should discard all further messages for this * session up to the eDisabled, and must then discard any other * messages for the session. * - * NB: the Peering Engine MUST not issue any further messages until it sees + * NB: the Routing Engine MUST not issue any further messages until it sees * the returned eDisabled event. */ mqb = mqb_init_new(NULL, bgp_session_do_disable, session) ; @@ -433,7 +433,6 @@ bgp_session_disable(bgp_peer peer, bgp_notify notification) c = 0 ; s = 0 ; } ; - fprintf(stderr, " session disable %d/%d", c, s) ; } ; ++bgp_engine_queue_stats.event ; @@ -469,7 +468,7 @@ bgp_session_do_disable(mqueue_block mqb, mqb_flag_t flag) /*============================================================================== * BGP Engine: send session event signal to Routeing Engine * - * NB: is passing responsibility for the notification to the Peering Engine. + * NB: is passing responsibility for the notification to the Routing Engine. */ extern void bgp_session_event(bgp_session session, bgp_session_event_t event, @@ -494,21 +493,20 @@ bgp_session_event(bgp_session session, bgp_session_event_t event, args->ordinal = ordinal ; args->stopped = stopped, - ++peering_engine_queue_stats.event ; + ++routing_engine_queue_stats.event ; - bgp_to_peering_engine(mqb) ; -} + bgp_to_routing_engine(mqb) ; +} ; /*============================================================================== - * Peering Engine: dispatch update to peer -> BGP Engine + * Routing Engine: dispatch update(s) to peer -> BGP Engine * - * PRO TEM -- this is being passed the pre-packaged BGP message. + * PRO TEM -- this is being passed the pre-packaged BGP message(s). * - * The BGP Engine takes care of discarding the stream block once it's been - * dealt with. + * The BGP Engine takes care of discarding the stream block(s) once dealt with. */ extern void -bgp_session_update_send(bgp_session session, struct stream* upd) +bgp_session_update_send(bgp_session session, struct stream_fifo* fifo) { struct bgp_session_update_args* args ; mqueue_block mqb ; @@ -516,37 +514,38 @@ bgp_session_update_send(bgp_session session, struct stream* upd) mqb = mqb_init_new(NULL, bgp_session_do_update_send, session) ; args = mqb_get_args(mqb) ; - args->buf = stream_dup(upd) ; + args->buf = stream_fifo_head(fifo) ; args->is_pending = NULL ; - args->xon_kick = (session->flow_control == BGP_XON_KICK); - session->flow_control--; + args->xon_kick = (session->flow_control == BGP_XON_KICK); ++bgp_engine_queue_stats.update ; bgp_to_bgp_engine(mqb) ; + + stream_fifo_reset(fifo) ; } ; /*------------------------------------------------------------------------------ - * BGP Engine: write given BGP update message -- mqb action function. + * BGP Engine: write given BGP update message(s) -- mqb action function. * * Each connection has a pending queue associated with it, onto which messages * are put if the connection's write buffer is unable to absorb any further * messages. * - * This function is called both when the mqb is received from the Peering + * This function is called both when the mqb is received from the Routing * Engine, and when the BGP Engine is trying to empty the connection's pending * queue. * - * When the mqb is received from the Peering Engine, then: + * When the mqb is received from the Routing Engine, then: * - * -- if the connection's pending queue is empty, try to send the message. + * -- if the connection's pending queue is empty, try to send the message(s). * * When the mqb is from connection's pending queue, then: * - * -- try to send the message. + * -- try to send the message(s). * - * In any case, if cannot send the message (and not encountered any error), add - * it (back) to the connection's pending queue. + * In any case, if cannot send all the message(s), add it (back) to the + * connection's pending queue. * * If the mqb has been dealt with, it is freed, along with the stream buffer. * Also, update the flow control counter, and issue XON if required. @@ -557,43 +556,54 @@ bgp_session_do_update_send(mqueue_block mqb, mqb_flag_t flag) struct bgp_session_update_args* args = mqb_get_args(mqb) ; bgp_session session = mqb_get_arg0(mqb) ; - if ((flag == mqb_action) && session->active) + while (args->buf != NULL) { - bgp_connection connection = session->connections[bgp_connection_primary] ; - assert(connection != NULL) ; + struct stream* buf ; - /* If established, try and send. */ - if (connection->state == bgp_fsm_sEstablished) + if ((flag == mqb_action) && session->active) { - int ret = bgp_connection_no_pending(connection, &args->is_pending) ; + bgp_connection connection ; - if (ret != 0) - ret = bgp_msg_send_update(connection, args->buf) ; + connection = session->connections[bgp_connection_primary] ; + assert(connection != NULL) ; - if (ret == 0) - { - /* Either there is already a pending queue, or the message - * could not be sent (and has not failed) -- so add to the - * pending queue. - */ - bgp_connection_add_pending(connection, mqb, &args->is_pending) ; - return ; /* Quit now, with message intact. */ - } - else if (ret > 0) + /* If established, try and send. */ + if (connection->state == bgp_fsm_sEstablished) { - /* Successfully wrote the message. XON if requested */ - if (args->xon_kick) - bgp_session_XON(session); + int ret ; + ret = bgp_connection_no_pending(connection, &args->is_pending) ; + + if (ret != 0) + ret = bgp_msg_send_update(connection, args->buf) ; + + if (ret == 0) + { + /* Either there is already a pending queue, or the message + * could not be sent (and has not failed) -- so add to the + * pending queue. + */ + bgp_connection_add_pending(connection, mqb, + &args->is_pending) ; + return ; /* Quit now, with message intact. */ + } } ; } ; + + buf = args->buf ; + args->buf = buf->next ; + + stream_free(buf) ; } ; - stream_free(args->buf) ; + /* If gets to here, then has dealt with all message(s). */ + if ((flag == mqb_action) && (args->xon_kick)) + bgp_session_XON(session) ; + mqb_free(mqb) ; } ; /*------------------------------------------------------------------------------ - * Peering Engine: are we in XON state ? + * Routing Engine: are we in XON state ? */ extern int bgp_session_is_XON(bgp_peer peer) @@ -606,8 +616,20 @@ bgp_session_is_XON(bgp_peer peer) return result; } ; +/*------------------------------------------------------------------------------ + * Count down flow control -- signal if reached XON point. + */ +extern int +bgp_session_dec_flow_count(bgp_peer peer) +{ + bgp_session session = peer->session; + + assert(session->flow_control > 0) ; + return (--session->flow_control == BGP_XON_KICK) ; +} ; + /*============================================================================== - * Peering Engine: dispatch Route Refresh to peer -> BGP Engine + * Routing Engine: dispatch Route Refresh to peer -> BGP Engine * * The BGP Engine takes care of discarding the bgp_route_refresh once it's been * dealt with. @@ -671,7 +693,7 @@ bgp_session_do_route_refresh_send(mqueue_block mqb, mqb_flag_t flag) } ; /*============================================================================== - * Peering Engine: dispatch End-of-RIB to peer -> BGP Engine + * Routing Engine: dispatch End-of-RIB to peer -> BGP Engine */ extern void bgp_session_end_of_rib_send(bgp_session session, qAFI_t afi, qSAFI_t safi) @@ -736,11 +758,11 @@ bgp_session_do_end_of_rib_send(mqueue_block mqb, mqb_flag_t flag) } ; /*============================================================================== - * BGP Engine: forward incoming update -> Peering Engine + * BGP Engine: forward incoming update -> Routing Engine * * PRO TEM -- this is being passed the raw BGP message. * - * The Peering Engine takes care of discarding the stream block once it's been + * The Routing Engine takes care of discarding the stream block once it's been * dealt with. */ extern void @@ -756,13 +778,13 @@ bgp_session_update_recv(bgp_session session, struct stream* buf, bgp_size_t size args->size = size; args->xon_kick = 0; - ++peering_engine_queue_stats.update ; + ++routing_engine_queue_stats.update ; - bgp_to_peering_engine(mqb) ; + bgp_to_routing_engine(mqb) ; } /*------------------------------------------------------------------------------ - * Peering Engine: process incoming update message -- mqb action function. + * Routing Engine: process incoming update message -- mqb action function. */ static void bgp_session_do_update_recv(mqueue_block mqb, mqb_flag_t flag) @@ -787,7 +809,7 @@ bgp_session_do_update_recv(mqueue_block mqb, mqb_flag_t flag) /*============================================================================== * BGP Engine: received Route Refresh to peer * - * The Peering Engine takes care of discarding the bgp_route_refresh once + * The Routing Engine takes care of discarding the bgp_route_refresh once * it's been dealt with. */ extern void @@ -802,11 +824,11 @@ bgp_session_route_refresh_recv(bgp_session session, bgp_route_refresh rr) args->rr = rr ; args->is_pending = NULL ; - bgp_to_peering_engine(mqb) ; + bgp_to_routing_engine(mqb) ; } ; /*------------------------------------------------------------------------------ - * Peering Engine: receive given BGP route refresh message -- mqb action + * Routing Engine: receive given BGP route refresh message -- mqb action * function. */ static void @@ -823,7 +845,7 @@ bgp_session_do_route_refresh_recv(mqueue_block mqb, mqb_flag_t flag) } /*============================================================================== - * BGP Engine: send XON message to Peering Engine + * BGP Engine: send XON message to Routing Engine * * Can be sent more packets now */ @@ -836,13 +858,13 @@ bgp_session_XON(bgp_session session) confirm(sizeof(struct bgp_session_XON_args) == 0) ; - ++peering_engine_queue_stats.xon ; + ++routing_engine_queue_stats.xon ; - bgp_to_peering_engine(mqb) ; + bgp_to_routing_engine(mqb) ; } /*------------------------------------------------------------------------------ - * Peering Engine: process incoming XON message -- mqb action function. + * Routing Engine: process incoming XON message -- mqb action function. */ static void bgp_session_do_XON(mqueue_block mqb, mqb_flag_t flag) @@ -854,14 +876,14 @@ bgp_session_do_XON(mqueue_block mqb, mqb_flag_t flag) int xoff = (session->flow_control <= 0); session->flow_control = BGP_XON_REFRESH; if (xoff) - bgp_write (session->peer) ; + bgp_write (session->peer, NULL) ; } mqb_free(mqb) ; } /*============================================================================== - * Peering Engine: send set ttl message to BGP Engine + * Routing Engine: send set ttl message to BGP Engine * */ void @@ -918,7 +940,7 @@ bgp_session_do_set_ttl(mqueue_block mqb, mqb_flag_t flag) * pointer is NULL -- this is largely paranoia, but it would be a grave * mistake for the listening socket(s) to find a session which is not active ! * - * NB: accessing Peering Engine "private" variable -- no lock required. + * NB: accessing Routing Engine "private" variable -- no lock required. * * accessing index_entry when not active -- no lock required. */ @@ -943,13 +965,13 @@ bgp_session_is_active(bgp_session session) } ; /*------------------------------------------------------------------------------ - * Peering Engine: if session is limping we defer re-enabling the session + * Routing Engine: if session is limping we defer re-enabling the session * until it is disabled. * * returns 1 if limping and defer * returns 0 if not limping * - * NB: accessing Peering Engine "private" variable -- no lock required. + * NB: accessing Routing Engine "private" variable -- no lock required. */ static int bgp_session_defer_if_limping(bgp_session session) diff --git a/bgpd/bgp_session.h b/bgpd/bgp_session.h index 5af81688..5b144db1 100644 --- a/bgpd/bgp_session.h +++ b/bgpd/bgp_session.h @@ -59,7 +59,7 @@ * For simplicity, the BGP Engine may lock the session associated with the * connection it is dealing with. * - * Parts of the session structure are private to the Peering Engine, and + * Parts of the session structure are private to the Routing Engine, and * do not require the mutex for access. * * NB: the connections associated with a BGP session are private to the BGP @@ -99,34 +99,31 @@ struct bgp_session /* While sIdle and sStopped: * - * the session belongs to the Peering Engine. + * the session belongs to the Routing Engine. * * The BGP Engine will not touch a session in these states and the - * Peering Engine may do what it likes with it. + * Routing Engine may do what it likes with it. * * While sEnabled, sEstablished and sStopping: * * the session belongs to the BGP Engine. * - * A (very) few items in the session may be accessed by the Peering Engine, + * A (very) few items in the session may be accessed by the Routing Engine, * as noted below. (Subject to the mutex.) * - * Only the Peering Engine creates and destroys sessions. The BGP Engine + * Only the Routing Engine creates and destroys sessions. The BGP Engine * assumes that a session will not be destroyed while it is sEnabled, * sEstablished or sStopping. * - * These are private to the Peering Engine. + * These are private to the Routing Engine. */ bgp_session_state_t state ; int defer_enable ; /* set when waiting for stop */ - /* Flow control. Incremented when an update packet is sent - * from peering to BGP engine. Decremented when packet processed - * by BGP engine. On transition to 0 BGP engine should send an XON. - */ - int flow_control; + int flow_control ; /* limits number of updates sent + by the Routing Engine */ - /* These are private to the Peering Engine, and are set each time a session + /* These are private to the Routing Engine, and are set each time a session * event message is received from the BGP Engine. */ bgp_session_event_t event ; /* last event */ @@ -208,11 +205,11 @@ struct bgp_session * the session, and sets the stopped flag. * * The active flag is set when one or more connections are activated, and - * cleared when either the BGP Engine stops the session or the Peering + * cleared when either the BGP Engine stops the session or the Routing * Engine disables it. When not "active" all messages other than disable * and enable are ignored. This deals with the hiatus that exists between * the BGP Engine signalling that it has stopped (because of some exception) - * and the Peering Engine acknowledging that (by disabling the session). + * and the Routing Engine acknowledging that (by disabling the session). */ bgp_connection connections[bgp_connection_count] ; @@ -282,8 +279,8 @@ struct bgp_session_XON_args /* to Routeing Engine */ /* no further arguments */ } ; MQB_ARGS_SIZE_OK(bgp_session_XON_args) ; -enum { BGP_XON_REFRESH = 12, - BGP_XON_KICK = 4, +enum { BGP_XON_REFRESH = 40, + BGP_XON_KICK = 20, } ; struct bgp_session_ttl_args /* to bgp Engine */ @@ -330,7 +327,7 @@ bgp_session_event(bgp_session session, bgp_session_event_t event, int stopped) ; extern void -bgp_session_update_send(bgp_session session, struct stream* upd) ; +bgp_session_update_send(bgp_session session, struct stream_fifo* fifo) ; extern void bgp_session_route_refresh_send(bgp_session session, bgp_route_refresh rr) ; @@ -346,10 +343,9 @@ extern void bgp_session_route_refresh_recv(bgp_session session, bgp_route_refresh rr); extern int -bgp_session_is_XOFF(bgp_peer peer); - -extern int bgp_session_is_XON(bgp_peer peer); +extern int +bgp_session_dec_flow_count(bgp_peer peer) ; extern void bgp_session_set_ttl(bgp_session session, int ttl); diff --git a/bgpd/bgpd.c b/bgpd/bgpd.c index 154cb28d..238bd01c 100644 --- a/bgpd/bgpd.c +++ b/bgpd/bgpd.c @@ -4712,7 +4712,6 @@ bgp_terminate (int terminating, int retain_mode) for (ALL_LIST_ELEMENTS (bm->bgp, mnode, mnnode, bgp)) for (ALL_LIST_ELEMENTS (bgp->peer, node, nnode, peer)) { -fprintf(stderr, ">>> %s:", peer->host) ; if (retain_mode) bgp_peer_disable(peer, NULL); else if (terminating) @@ -4725,7 +4724,6 @@ fprintf(stderr, ">>> %s:", peer->host) ; else bgp_notify_send(peer, BGP_NOTIFY_CEASE, BGP_NOTIFY_CEASE_ADMIN_RESET); -fprintf(stderr, "<<<\n") ; } if (!retain_mode) diff --git a/bgpd/bgpd.cx b/bgpd/bgpd.cx new file mode 100644 index 00000000..955b344b --- /dev/null +++ b/bgpd/bgpd.cx @@ -0,0 +1,4777 @@ +/* BGP-4, BGP-4+ daemon program + Copyright (C) 1996, 97, 98, 99, 2000 Kunihiro Ishiguro + +This file is part of GNU Zebra. + +GNU Zebra is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 2, or (at your option) any +later version. + +GNU Zebra is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GNU Zebra; see the file COPYING. If not, write to the Free +Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA +02111-1307, USA. */ + +#include <zebra.h> + +#include "prefix.h" +#include "thread.h" +#include "buffer.h" +#include "stream.h" +#include "command.h" +#include "sockunion.h" +#include "network.h" +#include "memory.h" +#include "filter.h" +#include "routemap.h" +#include "str.h" +#include "log.h" +#include "plist.h" +#include "linklist.h" +#include "workqueue.h" + +#include "bgpd/bgpd.h" +#include "bgpd/bgp.h" +#include "bgpd/bgp_peer.h" + +#include "bgpd/bgp_table.h" +#include "bgpd/bgp_aspath.h" +#include "bgpd/bgp_route.h" +#include "bgpd/bgp_dump.h" +#include "bgpd/bgp_debug.h" +#include "bgpd/bgp_community.h" +#include "bgpd/bgp_attr.h" +#include "bgpd/bgp_regex.h" +#include "bgpd/bgp_clist.h" +#include "bgpd/bgp_fsm.h" +#include "bgpd/bgp_packet.h" +#include "bgpd/bgp_zebra.h" +#include "bgpd/bgp_open.h" +#include "bgpd/bgp_filter.h" +#include "bgpd/bgp_nexthop.h" +#include "bgpd/bgp_damp.h" +#include "bgpd/bgp_mplsvpn.h" +#include "bgpd/bgp_advertise.h" +#include "bgpd/bgp_network.h" +#include "bgpd/bgp_vty.h" +#ifdef HAVE_SNMP +#include "bgpd/bgp_snmp.h" +#endif /* HAVE_SNMP */ + +/* BGP process wide configuration. */ +static struct bgp_master bgp_master; + +extern struct in_addr router_id_zebra; + +/* BGP process wide configuration pointer to export. */ +struct bgp_master *bm; + +/* BGP process wide nexus. */ +qpn_nexus cli_nexus = NULL; +qpn_nexus bgp_nexus = NULL; +qpn_nexus routing_nexus = NULL; + +/* BGP community-list. */ +struct community_list_handler *bgp_clist; + +/* true while program terminating */ +static int program_terminating = 0; + +/* BGP global flag manipulation. */ +int +bgp_option_set (int flag) +{ + switch (flag) + { + case BGP_OPT_NO_FIB: + case BGP_OPT_MULTIPLE_INSTANCE: + case BGP_OPT_CONFIG_CISCO: + SET_FLAG (bm->options, flag); + break; + default: + return BGP_ERR_INVALID_FLAG; + } + return 0; +} + +int +bgp_option_unset (int flag) +{ + switch (flag) + { + case BGP_OPT_MULTIPLE_INSTANCE: + if (listcount (bm->bgp) > 1) + return BGP_ERR_MULTIPLE_INSTANCE_USED; + /* Fall through. */ + case BGP_OPT_NO_FIB: + case BGP_OPT_CONFIG_CISCO: + UNSET_FLAG (bm->options, flag); + break; + default: + return BGP_ERR_INVALID_FLAG; + } + return 0; +} + +int +bgp_option_check (int flag) +{ + return CHECK_FLAG (bm->options, flag); +} + +/* BGP flag manipulation. */ +int +bgp_flag_set (struct bgp *bgp, int flag) +{ + SET_FLAG (bgp->flags, flag); + return 0; +} + +int +bgp_flag_unset (struct bgp *bgp, int flag) +{ + UNSET_FLAG (bgp->flags, flag); + return 0; +} + +int +bgp_flag_check (struct bgp *bgp, int flag) +{ + return CHECK_FLAG (bgp->flags, flag); +} + +/* Internal function to set BGP structure configureation flag. */ +static void +bgp_config_set (struct bgp *bgp, int config) +{ + SET_FLAG (bgp->config, config); +} + +static void +bgp_config_unset (struct bgp *bgp, int config) +{ + UNSET_FLAG (bgp->config, config); +} + +static int +bgp_config_check (struct bgp *bgp, int config) +{ + return CHECK_FLAG (bgp->config, config); +} + +/* Set BGP router identifier. */ +int +bgp_router_id_set (struct bgp *bgp, struct in_addr *id) +{ + struct peer *peer; + struct listnode *node, *nnode; + + if (bgp_config_check (bgp, BGP_CONFIG_ROUTER_ID) + && IPV4_ADDR_SAME (&bgp->router_id, id)) + return 0; + + IPV4_ADDR_COPY (&bgp->router_id, id); + bgp_config_set (bgp, BGP_CONFIG_ROUTER_ID); + + /* Set all peer's local identifier with this value. */ + for (ALL_LIST_ELEMENTS (bgp->peer, node, nnode, peer)) + { + IPV4_ADDR_COPY (&peer->local_id, id); + + if (peer->state == bgp_peer_sEstablished) + { + peer->last_reset = PEER_DOWN_RID_CHANGE; + bgp_notify_send (peer, BGP_NOTIFY_CEASE, + BGP_NOTIFY_CEASE_CONFIG_CHANGE); + } + } + return 0; +} + +/* BGP's cluster-id control. */ +int +bgp_cluster_id_set (struct bgp *bgp, struct in_addr *cluster_id) +{ + struct peer *peer; + struct listnode *node, *nnode; + + if (bgp_config_check (bgp, BGP_CONFIG_CLUSTER_ID) + && IPV4_ADDR_SAME (&bgp->cluster_id, cluster_id)) + return 0; + + IPV4_ADDR_COPY (&bgp->cluster_id, cluster_id); + bgp_config_set (bgp, BGP_CONFIG_CLUSTER_ID); + + /* Clear all IBGP peer. */ + for (ALL_LIST_ELEMENTS (bgp->peer, node, nnode, peer)) + { + if (peer_sort (peer) != BGP_PEER_IBGP) + continue; + + if (peer->state == bgp_peer_sEstablished) + { + peer->last_reset = PEER_DOWN_CLID_CHANGE; + bgp_notify_send (peer, BGP_NOTIFY_CEASE, + BGP_NOTIFY_CEASE_CONFIG_CHANGE); + } + } + return 0; +} + +int +bgp_cluster_id_unset (struct bgp *bgp) +{ + struct peer *peer; + struct listnode *node, *nnode; + + if (! bgp_config_check (bgp, BGP_CONFIG_CLUSTER_ID)) + return 0; + + bgp->cluster_id.s_addr = 0; + bgp_config_unset (bgp, BGP_CONFIG_CLUSTER_ID); + + /* Clear all IBGP peer. */ + for (ALL_LIST_ELEMENTS (bgp->peer, node, nnode, peer)) + { + if (peer_sort (peer) != BGP_PEER_IBGP) + continue; + + if (peer->state == bgp_peer_sEstablished) + { + peer->last_reset = PEER_DOWN_CLID_CHANGE; + bgp_notify_send (peer, BGP_NOTIFY_CEASE, + BGP_NOTIFY_CEASE_CONFIG_CHANGE); + } + } + return 0; +} + +/* BGP timer configuration. */ +int +bgp_timers_set (struct bgp *bgp, u_int32_t keepalive, u_int32_t holdtime) +{ + bgp->default_keepalive = (keepalive < holdtime / 3 + ? keepalive : holdtime / 3); + bgp->default_holdtime = holdtime; + + return 0; +} + +int +bgp_timers_unset (struct bgp *bgp) +{ + bgp->default_keepalive = BGP_DEFAULT_KEEPALIVE; + bgp->default_holdtime = BGP_DEFAULT_HOLDTIME; + + return 0; +} + +/* BGP confederation configuration. */ +int +bgp_confederation_id_set (struct bgp *bgp, as_t as) +{ + struct peer *peer; + struct listnode *node, *nnode; + int already_confed; + + if (as == 0) + return BGP_ERR_INVALID_AS; + + /* Remember - were we doing confederation before? */ + already_confed = bgp_config_check (bgp, BGP_CONFIG_CONFEDERATION); + bgp->confed_id = as; + bgp_config_set (bgp, BGP_CONFIG_CONFEDERATION); + + /* If we were doing confederation already, this is just an external + AS change. Just Reset EBGP sessions, not CONFED sessions. If we + were not doing confederation before, reset all EBGP sessions. */ + for (ALL_LIST_ELEMENTS (bgp->peer, node, nnode, peer)) + { + /* We're looking for peers who's AS is not local or part of our + confederation. */ + if (already_confed) + { + if (peer_sort (peer) == BGP_PEER_EBGP) + { + peer->local_as = as; + peer->last_reset = PEER_DOWN_CONFED_ID_CHANGE; + bgp_notify_send(peer, BGP_NOTIFY_CEASE, + BGP_NOTIFY_CEASE_CONFIG_CHANGE); + } + } + else + { + /* Not doign confederation before, so reset every non-local + session */ + if (peer_sort (peer) != BGP_PEER_IBGP) + { + /* Reset the local_as to be our EBGP one */ + if (peer_sort (peer) == BGP_PEER_EBGP) + peer->local_as = as; + peer->last_reset = PEER_DOWN_CONFED_ID_CHANGE; + bgp_notify_send(peer, BGP_NOTIFY_CEASE, + BGP_NOTIFY_CEASE_CONFIG_CHANGE); + } + } + } + return 0; +} + +int +bgp_confederation_id_unset (struct bgp *bgp) +{ + struct peer *peer; + struct listnode *node, *nnode; + + bgp->confed_id = 0; + bgp_config_unset (bgp, BGP_CONFIG_CONFEDERATION); + + for (ALL_LIST_ELEMENTS (bgp->peer, node, nnode, peer)) + { + /* We're looking for peers who's AS is not local */ + if (peer_sort (peer) != BGP_PEER_IBGP) + { + peer->local_as = bgp->as; + peer->last_reset = PEER_DOWN_CONFED_ID_CHANGE; + bgp_notify_send(peer, BGP_NOTIFY_CEASE, + BGP_NOTIFY_CEASE_CONFIG_CHANGE); + } + } + return 0; +} + +/* Is an AS part of the confed or not? */ +int +bgp_confederation_peers_check (struct bgp *bgp, as_t as) +{ + int i; + + if (! bgp) + return 0; + + for (i = 0; i < bgp->confed_peers_cnt; i++) + if (bgp->confed_peers[i] == as) + return 1; + + return 0; +} + +/* Add an AS to the confederation set. */ +int +bgp_confederation_peers_add (struct bgp *bgp, as_t as) +{ + struct peer *peer; + struct listnode *node, *nnode; + + if (! bgp) + return BGP_ERR_INVALID_BGP; + + if (bgp->as == as) + return BGP_ERR_INVALID_AS; + + if (bgp_confederation_peers_check (bgp, as)) + return -1; + + if (bgp->confed_peers) + bgp->confed_peers = XREALLOC (MTYPE_BGP_CONFED_LIST, + bgp->confed_peers, + (bgp->confed_peers_cnt + 1) * sizeof (as_t)); + else + bgp->confed_peers = XMALLOC (MTYPE_BGP_CONFED_LIST, + (bgp->confed_peers_cnt + 1) * sizeof (as_t)); + + bgp->confed_peers[bgp->confed_peers_cnt] = as; + bgp->confed_peers_cnt++; + + if (bgp_config_check (bgp, BGP_CONFIG_CONFEDERATION)) + { + for (ALL_LIST_ELEMENTS (bgp->peer, node, nnode, peer)) + { + if (peer->as == as) + { + peer->local_as = bgp->as; + peer->last_reset = PEER_DOWN_CONFED_PEER_CHANGE; + bgp_notify_send(peer, BGP_NOTIFY_CEASE, + BGP_NOTIFY_CEASE_CONFIG_CHANGE); + } + } + } + return 0; +} + +/* Delete an AS from the confederation set. */ +int +bgp_confederation_peers_remove (struct bgp *bgp, as_t as) +{ + int i; + int j; + struct peer *peer; + struct listnode *node, *nnode; + + if (! bgp) + return -1; + + if (! bgp_confederation_peers_check (bgp, as)) + return -1; + + for (i = 0; i < bgp->confed_peers_cnt; i++) + if (bgp->confed_peers[i] == as) + for(j = i + 1; j < bgp->confed_peers_cnt; j++) + bgp->confed_peers[j - 1] = bgp->confed_peers[j]; + + bgp->confed_peers_cnt--; + + if (bgp->confed_peers_cnt == 0) + { + if (bgp->confed_peers) + XFREE (MTYPE_BGP_CONFED_LIST, bgp->confed_peers); + bgp->confed_peers = NULL; + } + else + bgp->confed_peers = XREALLOC (MTYPE_BGP_CONFED_LIST, + bgp->confed_peers, + bgp->confed_peers_cnt * sizeof (as_t)); + + /* Now reset any peer who's remote AS has just been removed from the + CONFED */ + if (bgp_config_check (bgp, BGP_CONFIG_CONFEDERATION)) + { + for (ALL_LIST_ELEMENTS (bgp->peer, node, nnode, peer)) + { + if (peer->as == as) + { + peer->local_as = bgp->confed_id; + peer->last_reset = PEER_DOWN_CONFED_PEER_CHANGE; + bgp_notify_send(peer, BGP_NOTIFY_CEASE, + BGP_NOTIFY_CEASE_CONFIG_CHANGE); + } + } + } + + return 0; +} + +/* Local preference configuration. */ +int +bgp_default_local_preference_set (struct bgp *bgp, u_int32_t local_pref) +{ + if (! bgp) + return -1; + + bgp->default_local_pref = local_pref; + + return 0; +} + +int +bgp_default_local_preference_unset (struct bgp *bgp) +{ + if (! bgp) + return -1; + + bgp->default_local_pref = BGP_DEFAULT_LOCAL_PREF; + + return 0; +} + +/* If peer is RSERVER_CLIENT in at least one address family and is not member + of a peer_group for that family, return 1. + Used to check wether the peer is included in list bgp->rsclient. */ +int +peer_rsclient_active (struct peer *peer) +{ + int i; + int j; + + for (i=AFI_IP; i < AFI_MAX; i++) + for (j=SAFI_UNICAST; j < SAFI_MAX; j++) + if (CHECK_FLAG(peer->af_flags[i][j], PEER_FLAG_RSERVER_CLIENT) + && ! peer->af_group[i][j]) + return 1; + return 0; +} + +/* Peer comparison function for sorting. */ +static int +peer_cmp (struct peer *p1, struct peer *p2) +{ + return sockunion_cmp (&p1->su, &p2->su); +} + +int +peer_af_flag_check (struct peer *peer, afi_t afi, safi_t safi, u_int32_t flag) +{ + return CHECK_FLAG (peer->af_flags[afi][safi], flag); +} + +/* Reset all address family specific configuration. */ +static void +peer_af_flag_reset (struct peer *peer, afi_t afi, safi_t safi) +{ + int i; + struct bgp_filter *filter; + char orf_name[BUFSIZ]; + + filter = &peer->filter[afi][safi]; + + /* Clear neighbor filter and route-map */ + for (i = FILTER_IN; i < FILTER_MAX; i++) + { + if (filter->dlist[i].name) + { + free (filter->dlist[i].name); + filter->dlist[i].name = NULL; + } + prefix_list_unset_ref(&filter->plist[i].ref) ; + if (filter->aslist[i].name) + { + free (filter->aslist[i].name); + filter->aslist[i].name = NULL; + } + } + for (i = RMAP_IN; i < RMAP_MAX; i++) + { + if (filter->map[i].name) + { + free (filter->map[i].name); + filter->map[i].name = NULL; + } + } + + /* Clear unsuppress map. */ + if (filter->usmap.name) + free (filter->usmap.name); + filter->usmap.name = NULL; + filter->usmap.map = NULL; + + /* Clear neighbor's all address family flags. */ + peer->af_flags[afi][safi] = 0; + + /* Clear neighbor's all address family sflags. */ + peer->af_sflags[afi][safi] = 0; + + /* Clear neighbor's all address family capabilities. */ + peer->af_cap[afi][safi] = 0; + + /* Clear ORF info */ + peer->orf_plist[afi][safi] = NULL; + sprintf (orf_name, "%s.%d.%d", peer->host, afi, safi); + prefix_bgp_orf_remove_all (orf_name); + + /* Set default neighbor send-community. */ + if (! bgp_option_check (BGP_OPT_CONFIG_CISCO)) + { + SET_FLAG (peer->af_flags[afi][safi], PEER_FLAG_SEND_COMMUNITY); + SET_FLAG (peer->af_flags[afi][safi], PEER_FLAG_SEND_EXT_COMMUNITY); + } + + /* Clear neighbor default_originate_rmap */ + if (peer->default_rmap[afi][safi].name) + free (peer->default_rmap[afi][safi].name); + peer->default_rmap[afi][safi].name = NULL; + peer->default_rmap[afi][safi].map = NULL; + + /* Clear neighbor maximum-prefix */ + peer->pmax[afi][safi] = 0; + peer->pmax_threshold[afi][safi] = MAXIMUM_PREFIX_THRESHOLD_DEFAULT; +} + +/* peer global config reset */ +static void +peer_global_config_reset (struct peer *peer) +{ + peer->weight = 0; + peer->change_local_as = 0; + peer->ttl = (peer_sort (peer) == BGP_PEER_IBGP ? 255 : 1); + if (peer->update_source) + { + sockunion_free (peer->update_source); + peer->update_source = NULL; + } + if (peer->update_if) + { + XFREE (MTYPE_PEER_UPDATE_SOURCE, peer->update_if); + peer->update_if = NULL; + } + + if (peer_sort (peer) == BGP_PEER_IBGP) + peer->v_routeadv = BGP_DEFAULT_IBGP_ROUTEADV; + else + peer->v_routeadv = BGP_DEFAULT_EBGP_ROUTEADV; + + peer->flags = 0; + peer->config = 0; + peer->holdtime = 0; + peer->keepalive = 0; + peer->connect = 0; + peer->v_connect = BGP_DEFAULT_CONNECT_RETRY; +} + +/* Check peer's AS number and determin is this peer IBGP or EBGP */ +int +peer_sort (struct peer *peer) +{ + struct bgp *bgp; + + bgp = peer->bgp; + + /* Peer-group */ + if (CHECK_FLAG (peer->sflags, PEER_STATUS_GROUP)) + { + if (peer->as) + return (bgp->as == peer->as ? BGP_PEER_IBGP : BGP_PEER_EBGP); + else + { + struct peer *peer1; + peer1 = listnode_head (peer->group->peer); + if (peer1) + return (peer1->local_as == peer1->as + ? BGP_PEER_IBGP : BGP_PEER_EBGP); + } + return BGP_PEER_INTERNAL; + } + + /* Normal peer */ + if (bgp && CHECK_FLAG (bgp->config, BGP_CONFIG_CONFEDERATION)) + { + if (peer->local_as == 0) + return BGP_PEER_INTERNAL; + + if (peer->local_as == peer->as) + { + if (peer->local_as == bgp->confed_id) + return BGP_PEER_EBGP; + else + return BGP_PEER_IBGP; + } + + if (bgp_confederation_peers_check (bgp, peer->as)) + return BGP_PEER_CONFED; + + return BGP_PEER_EBGP; + } + else + { + return (peer->local_as == 0 + ? BGP_PEER_INTERNAL : peer->local_as == peer->as + ? BGP_PEER_IBGP : BGP_PEER_EBGP); + } +} + + +/* increase reference count on a struct peer */ +struct peer * +peer_lock (struct peer *peer) +{ + assert (peer && (peer->lock >= 0)); + + peer->lock++; + + return peer; +} + +/* decrease reference count on a struct peer + * struct peer is freed and NULL returned if last reference + */ +struct peer * +peer_unlock (struct peer *peer) +{ + assert (peer && (peer->lock > 0)); + + peer->lock--; + + if (peer->lock == 0) + { +#if 0 + zlog_debug ("unlocked and freeing"); + zlog_backtrace (LOG_DEBUG); +#endif + peer_free (peer); + return NULL; + } + +#if 0 + if (peer->lock == 1) + { + zlog_debug ("unlocked to 1"); + zlog_backtrace (LOG_DEBUG); + } +#endif + + return peer; +} + + +/* Make accept BGP peer. Called from bgp_accept (). */ +struct peer * +peer_create_accept (struct bgp *bgp) +{ + struct peer *peer; + + peer = peer_new (bgp); + + peer = peer_lock (peer); /* bgp peer list reference */ + listnode_add_sort (bgp->peer, peer); + + return peer; +} + +/* Change peer's AS number. */ +static void +peer_as_change (struct peer *peer, as_t as) +{ + int type; + + /* Stop peer. */ + if (! CHECK_FLAG (peer->sflags, PEER_STATUS_GROUP)) + { + peer->last_reset = PEER_DOWN_REMOTE_AS_CHANGE; + bgp_notify_send(peer, BGP_NOTIFY_CEASE, + BGP_NOTIFY_CEASE_CONFIG_CHANGE); + } + type = peer_sort (peer); + peer->as = as; + + if (bgp_config_check (peer->bgp, BGP_CONFIG_CONFEDERATION) + && ! bgp_confederation_peers_check (peer->bgp, as) + && peer->bgp->as != as) + peer->local_as = peer->bgp->confed_id; + else + peer->local_as = peer->bgp->as; + + /* Advertisement-interval reset */ + if (peer_sort (peer) == BGP_PEER_IBGP) + peer->v_routeadv = BGP_DEFAULT_IBGP_ROUTEADV; + else + peer->v_routeadv = BGP_DEFAULT_EBGP_ROUTEADV; + + /* TTL reset */ + if (peer_sort (peer) == BGP_PEER_IBGP) + peer->ttl = 255; + else if (type == BGP_PEER_IBGP) + peer->ttl = 1; + + /* reflector-client reset */ + if (peer_sort (peer) != BGP_PEER_IBGP) + { + UNSET_FLAG (peer->af_flags[AFI_IP][SAFI_UNICAST], + PEER_FLAG_REFLECTOR_CLIENT); + UNSET_FLAG (peer->af_flags[AFI_IP][SAFI_MULTICAST], + PEER_FLAG_REFLECTOR_CLIENT); + UNSET_FLAG (peer->af_flags[AFI_IP][SAFI_MPLS_VPN], + PEER_FLAG_REFLECTOR_CLIENT); + UNSET_FLAG (peer->af_flags[AFI_IP6][SAFI_UNICAST], + PEER_FLAG_REFLECTOR_CLIENT); + UNSET_FLAG (peer->af_flags[AFI_IP6][SAFI_MULTICAST], + PEER_FLAG_REFLECTOR_CLIENT); + } + + /* local-as reset */ + if (peer_sort (peer) != BGP_PEER_EBGP) + { + peer->change_local_as = 0; + UNSET_FLAG (peer->flags, PEER_FLAG_LOCAL_AS_NO_PREPEND); + } +} + +/* If peer does not exist, create new one. If peer already exists, + set AS number to the peer. */ +int +peer_remote_as (struct bgp *bgp, union sockunion *su, as_t *as, + afi_t afi, safi_t safi) +{ + struct peer *peer; + as_t local_as; + + peer = peer_lookup (bgp, su); + + if (peer) + { + /* When this peer is a member of peer-group. */ + if (peer->group) + { + if (peer->group->conf->as) + { + /* Return peer group's AS number. */ + *as = peer->group->conf->as; + return BGP_ERR_PEER_GROUP_MEMBER; + } + if (peer_sort (peer->group->conf) == BGP_PEER_IBGP) + { + if (bgp->as != *as) + { + *as = peer->as; + return BGP_ERR_PEER_GROUP_PEER_TYPE_DIFFERENT; + } + } + else + { + if (bgp->as == *as) + { + *as = peer->as; + return BGP_ERR_PEER_GROUP_PEER_TYPE_DIFFERENT; + } + } + } + + /* Existing peer's AS number change. */ + if (peer->as != *as) + peer_as_change (peer, *as); + } + else + { + + /* If the peer is not part of our confederation, and its not an + iBGP peer then spoof the source AS */ + if (bgp_config_check (bgp, BGP_CONFIG_CONFEDERATION) + && ! bgp_confederation_peers_check (bgp, *as) + && bgp->as != *as) + local_as = bgp->confed_id; + else + local_as = bgp->as; + + /* If this is IPv4 unicast configuration and "no bgp default + ipv4-unicast" is specified. */ + + if (bgp_flag_check (bgp, BGP_FLAG_NO_DEFAULT_IPV4) + && afi == AFI_IP && safi == SAFI_UNICAST) + peer = peer_create (su, bgp, local_as, *as, 0, 0); + else + peer = peer_create (su, bgp, local_as, *as, afi, safi); + } + + return 0; +} + +/* Activate the peer or peer group for specified AFI and SAFI. */ +int +peer_activate (struct peer *peer, afi_t afi, safi_t safi) +{ + int active; + + if (peer->afc[afi][safi]) + return 0; + + /* Activate the address family configuration. */ + if (CHECK_FLAG (peer->sflags, PEER_STATUS_GROUP)) + peer->afc[afi][safi] = 1; + else + { + active = peer_active (peer); + + peer->afc[afi][safi] = 1; + + if (! active && peer_active (peer)) + bgp_peer_enable (peer); + else +#if 0 + /* TODO: Dynamic capability */ + { + if (peer->status == Established) + { + if (CHECK_FLAG (peer->cap, PEER_CAP_DYNAMIC_RCV)) + { + peer->afc_adv[afi][safi] = 1; + bgp_capability_send (peer, afi, safi, + CAPABILITY_CODE_MP, + CAPABILITY_ACTION_SET); + if (peer->afc_recv[afi][safi]) + { + peer->afc_nego[afi][safi] = 1; + bgp_announce_route (peer, afi, safi); + } + } + else +#endif + { + peer->last_reset = PEER_DOWN_AF_ACTIVATE; + bgp_notify_send(peer, BGP_NOTIFY_CEASE, + BGP_NOTIFY_CEASE_CONFIG_CHANGE); + } +#if 0 + } + } +#endif + } + return 0; +} + +int +peer_deactivate (struct peer *peer, afi_t afi, safi_t safi) +{ + struct peer_group *group; + struct peer *peer1; + struct listnode *node, *nnode; + + if (CHECK_FLAG (peer->sflags, PEER_STATUS_GROUP)) + { + group = peer->group; + + for (ALL_LIST_ELEMENTS (group->peer, node, nnode, peer1)) + { + if (peer1->af_group[afi][safi]) + return BGP_ERR_PEER_GROUP_MEMBER_EXISTS; + } + } + else + { + if (peer->af_group[afi][safi]) + return BGP_ERR_PEER_BELONGS_TO_GROUP; + } + + if (! peer->afc[afi][safi]) + return 0; + + /* De-activate the address family configuration. */ + peer->afc[afi][safi] = 0; + peer_af_flag_reset (peer, afi, safi); + + if (! CHECK_FLAG (peer->sflags, PEER_STATUS_GROUP)) + { + if (peer->state == bgp_peer_sEstablished) + { + if (CHECK_FLAG (peer->cap, PEER_CAP_DYNAMIC_RCV)) + { + peer->afc_adv[afi][safi] = 0; + peer->afc_nego[afi][safi] = 0; + + if (peer_active_nego (peer)) + { + bgp_capability_send (peer, afi, safi, + CAPABILITY_CODE_MP, + CAPABILITY_ACTION_UNSET); + bgp_clear_route (peer, afi, safi, BGP_CLEAR_ROUTE_NORMAL); + peer->pcount[afi][safi] = 0; + } + else + { + peer->last_reset = PEER_DOWN_NEIGHBOR_DELETE; + bgp_notify_send (peer, BGP_NOTIFY_CEASE, + BGP_NOTIFY_CEASE_CONFIG_CHANGE); + } + } + else + { + peer->last_reset = PEER_DOWN_NEIGHBOR_DELETE; + bgp_notify_send (peer, BGP_NOTIFY_CEASE, + BGP_NOTIFY_CEASE_CONFIG_CHANGE); + } + } + } + return 0; +} + +static int +peer_group_cmp (struct peer_group *g1, struct peer_group *g2) +{ + return strcmp (g1->name, g2->name); +} + +/* If peer is configured at least one address family return 1. */ +static int +peer_group_active (struct peer *peer) +{ + if (peer->af_group[AFI_IP][SAFI_UNICAST] + || peer->af_group[AFI_IP][SAFI_MULTICAST] + || peer->af_group[AFI_IP][SAFI_MPLS_VPN] + || peer->af_group[AFI_IP6][SAFI_UNICAST] + || peer->af_group[AFI_IP6][SAFI_MULTICAST]) + return 1; + return 0; +} + +/* Peer group cofiguration. */ +static struct peer_group * +peer_group_new (void) +{ + return (struct peer_group *) XCALLOC (MTYPE_PEER_GROUP, + sizeof (struct peer_group)); +} + +static void +peer_group_free (struct peer_group *group) +{ + XFREE (MTYPE_PEER_GROUP, group); +} + +struct peer_group * +peer_group_lookup (struct bgp *bgp, const char *name) +{ + struct peer_group *group; + struct listnode *node, *nnode; + + for (ALL_LIST_ELEMENTS (bgp->group, node, nnode, group)) + { + if (strcmp (group->name, name) == 0) + return group; + } + return NULL; +} + +struct peer_group * +peer_group_get (struct bgp *bgp, const char *name) +{ + struct peer_group *group; + + group = peer_group_lookup (bgp, name); + if (group) + return group; + + group = peer_group_new (); + group->bgp = bgp; + group->name = strdup (name); + group->peer = list_new (); + group->conf = peer_new (bgp); + if (! bgp_flag_check (bgp, BGP_FLAG_NO_DEFAULT_IPV4)) + group->conf->afc[AFI_IP][SAFI_UNICAST] = 1; + group->conf->host = XSTRDUP (MTYPE_BGP_PEER_HOST, name); + group->conf->group = group; + group->conf->as = 0; + group->conf->ttl = 1; + group->conf->v_routeadv = BGP_DEFAULT_EBGP_ROUTEADV; + UNSET_FLAG (group->conf->config, PEER_CONFIG_TIMER); + UNSET_FLAG (group->conf->config, PEER_CONFIG_CONNECT); + group->conf->keepalive = 0; + group->conf->holdtime = 0; + group->conf->connect = 0; + SET_FLAG (group->conf->sflags, PEER_STATUS_GROUP); + listnode_add_sort (bgp->group, group); + + return 0; +} + +static void +peer_group2peer_config_copy (struct peer_group *group, struct peer *peer, + afi_t afi, safi_t safi) +{ + int in = FILTER_IN; + int out = FILTER_OUT; + struct peer *conf; + struct bgp_filter *pfilter; + struct bgp_filter *gfilter; + + conf = group->conf; + pfilter = &peer->filter[afi][safi]; + gfilter = &conf->filter[afi][safi]; + + /* remote-as */ + if (conf->as) + peer->as = conf->as; + + /* remote-as */ + if (conf->change_local_as) + peer->change_local_as = conf->change_local_as; + + /* TTL */ + peer->ttl = conf->ttl; + + /* Weight */ + peer->weight = conf->weight; + + /* peer flags apply */ + peer->flags = conf->flags; + /* peer af_flags apply */ + peer->af_flags[afi][safi] = conf->af_flags[afi][safi]; + /* peer config apply */ + peer->config = conf->config; + + /* peer timers apply */ + peer->holdtime = conf->holdtime; + peer->keepalive = conf->keepalive; + peer->connect = conf->connect; + if (CHECK_FLAG (conf->config, PEER_CONFIG_CONNECT)) + peer->v_connect = conf->connect; + else + peer->v_connect = BGP_DEFAULT_CONNECT_RETRY; + + /* advertisement-interval reset */ + if (peer_sort (peer) == BGP_PEER_IBGP) + peer->v_routeadv = BGP_DEFAULT_IBGP_ROUTEADV; + else + peer->v_routeadv = BGP_DEFAULT_EBGP_ROUTEADV; + + /* password apply */ + if (peer->password) + XFREE (MTYPE_PEER_PASSWORD, peer->password); + + if (conf->password) + peer->password = XSTRDUP (MTYPE_PEER_PASSWORD, conf->password); + else + peer->password = NULL; + + /* maximum-prefix */ + peer->pmax[afi][safi] = conf->pmax[afi][safi]; + peer->pmax_threshold[afi][safi] = conf->pmax_threshold[afi][safi]; + peer->pmax_restart[afi][safi] = conf->pmax_restart[afi][safi]; + + /* allowas-in */ + peer->allowas_in[afi][safi] = conf->allowas_in[afi][safi]; + + /* route-server-client */ + if (CHECK_FLAG(conf->af_flags[afi][safi], PEER_FLAG_RSERVER_CLIENT)) + { + /* Make peer's RIB point to group's RIB. */ + peer->rib[afi][safi] = group->conf->rib[afi][safi]; + + /* Import policy. */ + if (pfilter->map[RMAP_IMPORT].name) + free (pfilter->map[RMAP_IMPORT].name); + if (gfilter->map[RMAP_IMPORT].name) + { + pfilter->map[RMAP_IMPORT].name = strdup (gfilter->map[RMAP_IMPORT].name); + pfilter->map[RMAP_IMPORT].map = gfilter->map[RMAP_IMPORT].map; + } + else + { + pfilter->map[RMAP_IMPORT].name = NULL; + pfilter->map[RMAP_IMPORT].map = NULL; + } + + /* Export policy. */ + if (gfilter->map[RMAP_EXPORT].name && ! pfilter->map[RMAP_EXPORT].name) + { + pfilter->map[RMAP_EXPORT].name = strdup (gfilter->map[RMAP_EXPORT].name); + pfilter->map[RMAP_EXPORT].map = gfilter->map[RMAP_EXPORT].map; + } + } + + /* default-originate route-map */ + if (conf->default_rmap[afi][safi].name) + { + if (peer->default_rmap[afi][safi].name) + free (peer->default_rmap[afi][safi].name); + peer->default_rmap[afi][safi].name = strdup (conf->default_rmap[afi][safi].name); + peer->default_rmap[afi][safi].map = conf->default_rmap[afi][safi].map; + } + + /* update-source apply */ + if (conf->update_source) + { + if (peer->update_source) + sockunion_free (peer->update_source); + if (peer->update_if) + { + XFREE (MTYPE_PEER_UPDATE_SOURCE, peer->update_if); + peer->update_if = NULL; + } + peer->update_source = sockunion_dup (conf->update_source); + } + else if (conf->update_if) + { + if (peer->update_if) + XFREE (MTYPE_PEER_UPDATE_SOURCE, peer->update_if); + if (peer->update_source) + { + sockunion_free (peer->update_source); + peer->update_source = NULL; + } + peer->update_if = XSTRDUP (MTYPE_PEER_UPDATE_SOURCE, conf->update_if); + } + + /* inbound filter apply */ + if (gfilter->dlist[in].name && ! pfilter->dlist[in].name) + { + if (pfilter->dlist[in].name) + free (pfilter->dlist[in].name); + pfilter->dlist[in].name = strdup (gfilter->dlist[in].name); + pfilter->dlist[in].alist = gfilter->dlist[in].alist; + } + if (! pfilter->plist[in].ref) + prefix_list_copy_ref(&pfilter->plist[in].ref, gfilter->plist[in].ref) ; + if (gfilter->aslist[in].name && ! pfilter->aslist[in].name) + { + if (pfilter->aslist[in].name) + free (pfilter->aslist[in].name); + pfilter->aslist[in].name = strdup (gfilter->aslist[in].name); + pfilter->aslist[in].aslist = gfilter->aslist[in].aslist; + } + if (gfilter->map[RMAP_IN].name && ! pfilter->map[RMAP_IN].name) + { + if (pfilter->map[RMAP_IN].name) + free (pfilter->map[RMAP_IN].name); + pfilter->map[RMAP_IN].name = strdup (gfilter->map[RMAP_IN].name); + pfilter->map[RMAP_IN].map = gfilter->map[RMAP_IN].map; + } + + /* outbound filter apply */ + if (gfilter->dlist[out].name) + { + if (pfilter->dlist[out].name) + free (pfilter->dlist[out].name); + pfilter->dlist[out].name = strdup (gfilter->dlist[out].name); + pfilter->dlist[out].alist = gfilter->dlist[out].alist; + } + else + { + if (pfilter->dlist[out].name) + free (pfilter->dlist[out].name); + pfilter->dlist[out].name = NULL; + pfilter->dlist[out].alist = NULL; + } + + prefix_list_copy_ref(&pfilter->plist[out].ref, gfilter->plist[out].ref) ; + + if (gfilter->aslist[out].name) + { + if (pfilter->aslist[out].name) + free (pfilter->aslist[out].name); + pfilter->aslist[out].name = strdup (gfilter->aslist[out].name); + pfilter->aslist[out].aslist = gfilter->aslist[out].aslist; + } + else + { + if (pfilter->aslist[out].name) + free (pfilter->aslist[out].name); + pfilter->aslist[out].name = NULL; + pfilter->aslist[out].aslist = NULL; + } + if (gfilter->map[RMAP_OUT].name) + { + if (pfilter->map[RMAP_OUT].name) + free (pfilter->map[RMAP_OUT].name); + pfilter->map[RMAP_OUT].name = strdup (gfilter->map[RMAP_OUT].name); + pfilter->map[RMAP_OUT].map = gfilter->map[RMAP_OUT].map; + } + else + { + if (pfilter->map[RMAP_OUT].name) + free (pfilter->map[RMAP_OUT].name); + pfilter->map[RMAP_OUT].name = NULL; + pfilter->map[RMAP_OUT].map = NULL; + } + + /* RS-client's import/export route-maps. */ + if (gfilter->map[RMAP_IMPORT].name) + { + if (pfilter->map[RMAP_IMPORT].name) + free (pfilter->map[RMAP_IMPORT].name); + pfilter->map[RMAP_IMPORT].name = strdup (gfilter->map[RMAP_IMPORT].name); + pfilter->map[RMAP_IMPORT].map = gfilter->map[RMAP_IMPORT].map; + } + else + { + if (pfilter->map[RMAP_IMPORT].name) + free (pfilter->map[RMAP_IMPORT].name); + pfilter->map[RMAP_IMPORT].name = NULL; + pfilter->map[RMAP_IMPORT].map = NULL; + } + if (gfilter->map[RMAP_EXPORT].name && ! pfilter->map[RMAP_EXPORT].name) + { + if (pfilter->map[RMAP_EXPORT].name) + free (pfilter->map[RMAP_EXPORT].name); + pfilter->map[RMAP_EXPORT].name = strdup (gfilter->map[RMAP_EXPORT].name); + pfilter->map[RMAP_EXPORT].map = gfilter->map[RMAP_EXPORT].map; + } + + if (gfilter->usmap.name) + { + if (pfilter->usmap.name) + free (pfilter->usmap.name); + pfilter->usmap.name = strdup (gfilter->usmap.name); + pfilter->usmap.map = gfilter->usmap.map; + } + else + { + if (pfilter->usmap.name) + free (pfilter->usmap.name); + pfilter->usmap.name = NULL; + pfilter->usmap.map = NULL; + } +} + +/* Peer group's remote AS configuration. */ +int +peer_group_remote_as (struct bgp *bgp, const char *group_name, as_t *as) +{ + struct peer_group *group; + struct peer *peer; + struct listnode *node, *nnode; + + group = peer_group_lookup (bgp, group_name); + if (! group) + return -1; + + if (group->conf->as == *as) + return 0; + + /* When we setup peer-group AS number all peer group member's AS + number must be updated to same number. */ + peer_as_change (group->conf, *as); + + for (ALL_LIST_ELEMENTS (group->peer, node, nnode, peer)) + { + if (peer->as != *as) + peer_as_change (peer, *as); + } + + return 0; +} + +int +peer_group_delete (struct peer_group *group) +{ + struct bgp *bgp; + struct peer *peer; + struct listnode *node, *nnode; + + bgp = group->bgp; + + for (ALL_LIST_ELEMENTS (group->peer, node, nnode, peer)) + { + peer->group = NULL; + peer_delete (peer); + } + list_delete (group->peer); + + free (group->name); + group->name = NULL; + + group->conf->group = NULL; + peer_delete (group->conf); + + /* Delete from all peer_group list. */ + listnode_delete (bgp->group, group); + + peer_group_free (group); + + return 0; +} + +int +peer_group_remote_as_delete (struct peer_group *group) +{ + struct peer *peer; + struct listnode *node, *nnode; + + if (! group->conf->as) + return 0; + + for (ALL_LIST_ELEMENTS (group->peer, node, nnode, peer)) + { + peer->group = NULL; + peer_delete (peer); + } + list_delete_all_node (group->peer); + + group->conf->as = 0; + + return 0; +} + +/* Bind specified peer to peer group. */ +int +peer_group_bind (struct bgp *bgp, union sockunion *su, + struct peer_group *group, afi_t afi, safi_t safi, as_t *as) +{ + struct peer *peer; + int first_member = 0; + + /* Check peer group's address family. */ + if (! group->conf->afc[afi][safi]) + return BGP_ERR_PEER_GROUP_AF_UNCONFIGURED; + + /* Lookup the peer. */ + peer = peer_lookup (bgp, su); + + /* Create a new peer. */ + if (! peer) + { + if (! group->conf->as) + return BGP_ERR_PEER_GROUP_NO_REMOTE_AS; + + peer = peer_create (su, bgp, bgp->as, group->conf->as, afi, safi); + peer->group = group; + peer->af_group[afi][safi] = 1; + + peer = peer_lock (peer); /* group->peer list reference */ + listnode_add (group->peer, peer); + peer_group2peer_config_copy (group, peer, afi, safi); + + return 0; + } + + /* When the peer already belongs to peer group, check the consistency. */ + if (peer->af_group[afi][safi]) + { + if (strcmp (peer->group->name, group->name) != 0) + return BGP_ERR_PEER_GROUP_CANT_CHANGE; + + return 0; + } + + /* Check current peer group configuration. */ + if (peer_group_active (peer) + && strcmp (peer->group->name, group->name) != 0) + return BGP_ERR_PEER_GROUP_MISMATCH; + + if (! group->conf->as) + { + if (peer_sort (group->conf) != BGP_PEER_INTERNAL + && peer_sort (group->conf) != peer_sort (peer)) + { + if (as) + *as = peer->as; + return BGP_ERR_PEER_GROUP_PEER_TYPE_DIFFERENT; + } + + if (peer_sort (group->conf) == BGP_PEER_INTERNAL) + first_member = 1; + } + + peer->af_group[afi][safi] = 1; + peer->afc[afi][safi] = 1; + if (! peer->group) + { + peer->group = group; + + peer = peer_lock (peer); /* group->peer list reference */ + listnode_add (group->peer, peer); + } + else + assert (group && peer->group == group); + + if (first_member) + { + /* Advertisement-interval reset */ + if (peer_sort (group->conf) == BGP_PEER_IBGP) + group->conf->v_routeadv = BGP_DEFAULT_IBGP_ROUTEADV; + else + group->conf->v_routeadv = BGP_DEFAULT_EBGP_ROUTEADV; + + /* ebgp-multihop reset */ + if (peer_sort (group->conf) == BGP_PEER_IBGP) + group->conf->ttl = 255; + + /* local-as reset */ + if (peer_sort (group->conf) != BGP_PEER_EBGP) + { + group->conf->change_local_as = 0; + UNSET_FLAG (peer->flags, PEER_FLAG_LOCAL_AS_NO_PREPEND); + } + } + + if (CHECK_FLAG(peer->af_flags[afi][safi], PEER_FLAG_RSERVER_CLIENT)) + { + struct listnode *pn; + + /* If it's not configured as RSERVER_CLIENT in any other address + family, without being member of a peer_group, remove it from + list bgp->rsclient.*/ + if (! peer_rsclient_active (peer) + && (pn = listnode_lookup (bgp->rsclient, peer))) + { + peer_unlock (peer); /* peer rsclient reference */ + list_delete_node (bgp->rsclient, pn); + + /* Clear our own rsclient rib for this afi/safi. */ + bgp_clear_route (peer, afi, safi, BGP_CLEAR_ROUTE_MY_RSCLIENT); + } + + bgp_table_finish (&peer->rib[afi][safi]); + + /* Import policy. */ + if (peer->filter[afi][safi].map[RMAP_IMPORT].name) + { + free (peer->filter[afi][safi].map[RMAP_IMPORT].name); + peer->filter[afi][safi].map[RMAP_IMPORT].name = NULL; + peer->filter[afi][safi].map[RMAP_IMPORT].map = NULL; + } + + /* Export policy. */ + if (! CHECK_FLAG(group->conf->af_flags[afi][safi], PEER_FLAG_RSERVER_CLIENT) + && peer->filter[afi][safi].map[RMAP_EXPORT].name) + { + free (peer->filter[afi][safi].map[RMAP_EXPORT].name); + peer->filter[afi][safi].map[RMAP_EXPORT].name = NULL; + peer->filter[afi][safi].map[RMAP_EXPORT].map = NULL; + } + } + + peer_group2peer_config_copy (group, peer, afi, safi); + + peer->last_reset = PEER_DOWN_RMAP_BIND; + bgp_notify_send(peer, BGP_NOTIFY_CEASE, + BGP_NOTIFY_CEASE_CONFIG_CHANGE); + + return 0; +} + +int +peer_group_unbind (struct bgp *bgp, struct peer *peer, + struct peer_group *group, afi_t afi, safi_t safi) +{ + if (! peer->af_group[afi][safi]) + return 0; + + if (group != peer->group) + return BGP_ERR_PEER_GROUP_MISMATCH; + + peer->af_group[afi][safi] = 0; + peer->afc[afi][safi] = 0; + peer_af_flag_reset (peer, afi, safi); + + if (peer->rib[afi][safi]) + peer->rib[afi][safi] = NULL; + + if (! peer_group_active (peer)) + { + assert (listnode_lookup (group->peer, peer)); + peer_unlock (peer); /* peer group list reference */ + listnode_delete (group->peer, peer); + peer->group = NULL; + if (group->conf->as) + { + peer_delete (peer); + return 0; + } + peer_global_config_reset (peer); + } + + peer->last_reset = PEER_DOWN_RMAP_UNBIND; + bgp_notify_send(peer, BGP_NOTIFY_CEASE, + BGP_NOTIFY_CEASE_CONFIG_CHANGE); + return 0; +} + +/* BGP instance creation by `router bgp' commands. */ +static struct bgp * +bgp_create (as_t *as, const char *name) +{ + struct bgp *bgp; + afi_t afi; + safi_t safi; + + if ( (bgp = XCALLOC (MTYPE_BGP, sizeof (struct bgp))) == NULL) + return NULL; + + bgp_lock (bgp); + bgp->peer_self = peer_new (bgp); + bgp->peer_self->host = XSTRDUP (MTYPE_BGP_PEER_HOST, "Static announcement"); + + bgp->peer = list_new (); + bgp->peer->cmp = (int (*)(void *, void *)) peer_cmp; + + bgp->group = list_new (); + bgp->group->cmp = (int (*)(void *, void *)) peer_group_cmp; + + bgp->rsclient = list_new (); + bgp->rsclient->cmp = (int (*)(void*, void*)) peer_cmp; + + for (afi = AFI_IP; afi < AFI_MAX; afi++) + for (safi = SAFI_UNICAST; safi < SAFI_MAX; safi++) + { + bgp->route[afi][safi] = bgp_table_init (afi, safi); + bgp->aggregate[afi][safi] = bgp_table_init (afi, safi); + bgp->rib[afi][safi] = bgp_table_init (afi, safi); + } + + bgp->default_local_pref = BGP_DEFAULT_LOCAL_PREF; + bgp->default_holdtime = BGP_DEFAULT_HOLDTIME; + bgp->default_keepalive = BGP_DEFAULT_KEEPALIVE; + bgp->restart_time = BGP_DEFAULT_RESTART_TIME; + bgp->stalepath_time = BGP_DEFAULT_STALEPATH_TIME; + + bgp->as = *as; + + if (name) + bgp->name = strdup (name); + + return bgp; +} + +/* Return first entry of BGP. */ +struct bgp * +bgp_get_default (void) +{ + if (bm->bgp->head) + return (listgetdata (listhead (bm->bgp))); + return NULL; +} + +/* Lookup BGP entry. */ +struct bgp * +bgp_lookup (as_t as, const char *name) +{ + struct bgp *bgp; + struct listnode *node, *nnode; + + for (ALL_LIST_ELEMENTS (bm->bgp, node, nnode, bgp)) + if (bgp->as == as + && ((bgp->name == NULL && name == NULL) + || (bgp->name && name && strcmp (bgp->name, name) == 0))) + return bgp; + return NULL; +} + +/* Lookup BGP structure by view name. */ +struct bgp * +bgp_lookup_by_name (const char *name) +{ + struct bgp *bgp; + struct listnode *node, *nnode; + + for (ALL_LIST_ELEMENTS (bm->bgp, node, nnode, bgp)) + if ((bgp->name == NULL && name == NULL) + || (bgp->name && name && strcmp (bgp->name, name) == 0)) + return bgp; + return NULL; +} + +/* Called from VTY commands. */ +int +bgp_get (struct bgp **bgp_val, as_t *as, const char *name) +{ + struct bgp *bgp; + + /* Multiple instance check. */ + if (bgp_option_check (BGP_OPT_MULTIPLE_INSTANCE)) + { + if (name) + bgp = bgp_lookup_by_name (name); + else + bgp = bgp_get_default (); + + /* Already exists. */ + if (bgp) + { + if (bgp->as != *as) + { + *as = bgp->as; + return BGP_ERR_INSTANCE_MISMATCH; + } + *bgp_val = bgp; + return 0; + } + } + else + { + /* BGP instance name can not be specified for single instance. */ + if (name) + return BGP_ERR_MULTIPLE_INSTANCE_NOT_SET; + + /* Get default BGP structure if exists. */ + bgp = bgp_get_default (); + + if (bgp) + { + if (bgp->as != *as) + { + *as = bgp->as; + return BGP_ERR_AS_MISMATCH; + } + *bgp_val = bgp; + return 0; + } + } + + bgp = bgp_create (as, name); + listnode_add (bm->bgp, bgp); + bgp_router_id_set(bgp, &router_id_zebra); + *bgp_val = bgp; + + return 0; +} + +/* Delete BGP instance. */ +int +bgp_delete (struct bgp *bgp) +{ + struct peer *peer; + struct peer_group *group; + struct listnode *node; + struct listnode *next; + afi_t afi; + int i; + + /* Delete static route. */ + bgp_static_delete (bgp); + + /* Unset redistribution. */ + for (afi = AFI_IP; afi < AFI_MAX; afi++) + for (i = 0; i < ZEBRA_ROUTE_MAX; i++) + if (i != ZEBRA_ROUTE_BGP) + bgp_redistribute_unset (bgp, afi, i); + + for (ALL_LIST_ELEMENTS (bgp->peer, node, next, peer)) + peer_delete (peer); + + for (ALL_LIST_ELEMENTS (bgp->group, node, next, group)) + peer_group_delete (group); + + assert (listcount (bgp->rsclient) == 0); + + if (bgp->peer_self) { + peer_delete(bgp->peer_self); + bgp->peer_self = NULL; + } + + /* Remove visibility via the master list - there may however still be + * routes to be processed still referencing the struct bgp. + */ + listnode_delete (bm->bgp, bgp); + + bgp_unlock(bgp); /* initial reference */ + + return 0; +} + +static void bgp_free (struct bgp *); + +void +bgp_lock (struct bgp *bgp) +{ + ++bgp->lock; +} + +void +bgp_unlock(struct bgp *bgp) +{ + assert(bgp->lock > 0); + if (--bgp->lock == 0) + bgp_free (bgp); +} + +static void +bgp_free (struct bgp *bgp) +{ + afi_t afi; + safi_t safi; + + list_delete (bgp->group); + list_delete (bgp->peer); + list_delete (bgp->rsclient); + + if (bgp->name) + free (bgp->name); + + for (afi = AFI_IP; afi < AFI_MAX; afi++) + for (safi = SAFI_UNICAST; safi < SAFI_MAX; safi++) + { + if (bgp->route[afi][safi]) + bgp_table_finish (&bgp->route[afi][safi]); + if (bgp->aggregate[afi][safi]) + bgp_table_finish (&bgp->aggregate[afi][safi]) ; + if (bgp->rib[afi][safi]) + bgp_table_finish (&bgp->rib[afi][safi]); + } + XFREE (MTYPE_BGP, bgp); +} + +struct peer * +peer_lookup (struct bgp *bgp, union sockunion *su) +{ + struct peer *peer; + struct listnode *node, *nnode; + + if (bgp != NULL) + { + for (ALL_LIST_ELEMENTS (bgp->peer, node, nnode, peer)) + if (sockunion_same (&peer->su, su) + && ! CHECK_FLAG (peer->sflags, PEER_STATUS_ACCEPT_PEER)) + return peer; + } + else if (bm->bgp != NULL) + { + struct listnode *bgpnode, *nbgpnode; + + for (ALL_LIST_ELEMENTS (bm->bgp, bgpnode, nbgpnode, bgp)) + for (ALL_LIST_ELEMENTS (bgp->peer, node, nnode, peer)) + if (sockunion_same (&peer->su, su) + && ! CHECK_FLAG (peer->sflags, PEER_STATUS_ACCEPT_PEER)) + return peer; + } + return NULL; +} + +struct peer * +peer_lookup_with_open (union sockunion *su, as_t remote_as, + struct in_addr *remote_id, int *as) +{ + struct peer *peer; + struct listnode *node; + struct listnode *bgpnode; + struct bgp *bgp; + + if (! bm->bgp) + return NULL; + + for (ALL_LIST_ELEMENTS_RO (bm->bgp, bgpnode, bgp)) + { + for (ALL_LIST_ELEMENTS_RO (bgp->peer, node, peer)) + { + if (sockunion_same (&peer->su, su) + && ! CHECK_FLAG (peer->sflags, PEER_STATUS_ACCEPT_PEER)) + { + if (peer->as == remote_as + && peer->remote_id.s_addr == remote_id->s_addr) + return peer; + if (peer->as == remote_as) + *as = 1; + } + } + + for (ALL_LIST_ELEMENTS_RO (bgp->peer, node, peer)) + { + if (sockunion_same (&peer->su, su) + && ! CHECK_FLAG (peer->sflags, PEER_STATUS_ACCEPT_PEER)) + { + if (peer->as == remote_as + && peer->remote_id.s_addr == 0) + return peer; + if (peer->as == remote_as) + *as = 1; + } + } + } + return NULL; +} + +/* If peer is configured at least one address family return 1. */ +int +peer_active (struct peer *peer) +{ + if (peer->afc[AFI_IP][SAFI_UNICAST] + || peer->afc[AFI_IP][SAFI_MULTICAST] + || peer->afc[AFI_IP][SAFI_MPLS_VPN] + || peer->afc[AFI_IP6][SAFI_UNICAST] + || peer->afc[AFI_IP6][SAFI_MULTICAST]) + return 1; + return 0; +} + +/* If peer is negotiated at least one address family return 1. */ +int +peer_active_nego (struct peer *peer) +{ + if (peer->afc_nego[AFI_IP][SAFI_UNICAST] + || peer->afc_nego[AFI_IP][SAFI_MULTICAST] + || peer->afc_nego[AFI_IP][SAFI_MPLS_VPN] + || peer->afc_nego[AFI_IP6][SAFI_UNICAST] + || peer->afc_nego[AFI_IP6][SAFI_MULTICAST]) + return 1; + return 0; +} + +/* peer_flag_change_type. */ +enum peer_change_type +{ + peer_change_none, + peer_change_reset, + peer_change_reset_in, + peer_change_reset_out, +}; + +static void +peer_change_action (struct peer *peer, afi_t afi, safi_t safi, + enum peer_change_type type) +{ + if (CHECK_FLAG (peer->sflags, PEER_STATUS_GROUP)) + return; + + if (type == peer_change_reset) + bgp_notify_send (peer, BGP_NOTIFY_CEASE, + BGP_NOTIFY_CEASE_CONFIG_CHANGE); + else if (type == peer_change_reset_in) + { + if (CHECK_FLAG (peer->cap, PEER_CAP_REFRESH_OLD_RCV) + || CHECK_FLAG (peer->cap, PEER_CAP_REFRESH_NEW_RCV)) + bgp_route_refresh_send (peer, afi, safi, 0, 0, 0); + else + bgp_notify_send (peer, BGP_NOTIFY_CEASE, + BGP_NOTIFY_CEASE_CONFIG_CHANGE); + } + else if (type == peer_change_reset_out) + bgp_announce_route (peer, afi, safi); +} + +struct peer_flag_action +{ + /* Peer's flag. */ + u_int32_t flag; + + /* This flag can be set for peer-group member. */ + u_char not_for_member; + + /* Action when the flag is changed. */ + enum peer_change_type type; + + /* Peer down cause */ + u_char peer_down; +}; + +static const struct peer_flag_action peer_flag_action_list[] = + { + { PEER_FLAG_PASSIVE, 0, peer_change_reset }, + { PEER_FLAG_SHUTDOWN, 0, peer_change_reset }, + { PEER_FLAG_DONT_CAPABILITY, 0, peer_change_none }, + { PEER_FLAG_OVERRIDE_CAPABILITY, 0, peer_change_none }, + { PEER_FLAG_STRICT_CAP_MATCH, 0, peer_change_none }, + { PEER_FLAG_DYNAMIC_CAPABILITY, 0, peer_change_reset }, + { PEER_FLAG_DISABLE_CONNECTED_CHECK, 0, peer_change_reset }, + { 0, 0, 0 } + }; + +static const struct peer_flag_action peer_af_flag_action_list[] = + { + { PEER_FLAG_NEXTHOP_SELF, 1, peer_change_reset_out }, + { PEER_FLAG_SEND_COMMUNITY, 1, peer_change_reset_out }, + { PEER_FLAG_SEND_EXT_COMMUNITY, 1, peer_change_reset_out }, + { PEER_FLAG_SOFT_RECONFIG, 0, peer_change_reset_in }, + { PEER_FLAG_REFLECTOR_CLIENT, 1, peer_change_reset }, + { PEER_FLAG_RSERVER_CLIENT, 1, peer_change_reset }, + { PEER_FLAG_AS_PATH_UNCHANGED, 1, peer_change_reset_out }, + { PEER_FLAG_NEXTHOP_UNCHANGED, 1, peer_change_reset_out }, + { PEER_FLAG_MED_UNCHANGED, 1, peer_change_reset_out }, + { PEER_FLAG_REMOVE_PRIVATE_AS, 1, peer_change_reset_out }, + { PEER_FLAG_ALLOWAS_IN, 0, peer_change_reset_in }, + { PEER_FLAG_ORF_PREFIX_SM, 1, peer_change_reset }, + { PEER_FLAG_ORF_PREFIX_RM, 1, peer_change_reset }, + { PEER_FLAG_NEXTHOP_LOCAL_UNCHANGED, 0, peer_change_reset_out }, + { 0, 0, 0 } + }; + +/* Proper action set. */ +static int +peer_flag_action_set (const struct peer_flag_action *action_list, int size, + struct peer_flag_action *action, u_int32_t flag) +{ + int i; + int found = 0; + int reset_in = 0; + int reset_out = 0; + const struct peer_flag_action *match = NULL; + + /* Check peer's frag action. */ + for (i = 0; i < size; i++) + { + match = &action_list[i]; + + if (match->flag == 0) + break; + + if (match->flag & flag) + { + found = 1; + + if (match->type == peer_change_reset_in) + reset_in = 1; + if (match->type == peer_change_reset_out) + reset_out = 1; + if (match->type == peer_change_reset) + { + reset_in = 1; + reset_out = 1; + } + if (match->not_for_member) + action->not_for_member = 1; + } + } + + /* Set peer clear type. */ + if (reset_in && reset_out) + action->type = peer_change_reset; + else if (reset_in) + action->type = peer_change_reset_in; + else if (reset_out) + action->type = peer_change_reset_out; + else + action->type = peer_change_none; + + return found; +} + +static void +peer_flag_modify_action (struct peer *peer, u_int32_t flag) +{ + if (flag == PEER_FLAG_SHUTDOWN) + { + if (CHECK_FLAG (peer->flags, flag)) + { + if (CHECK_FLAG (peer->sflags, PEER_STATUS_NSF_WAIT)) + peer_nsf_stop (peer); + + UNSET_FLAG (peer->sflags, PEER_STATUS_PREFIX_OVERFLOW); + if (peer->t_pmax_restart) + { + BGP_TIMER_OFF (peer->t_pmax_restart); + if (BGP_DEBUG (events, EVENTS)) + zlog_debug ("%s Maximum-prefix restart timer cancelled", + peer->host); + } + + if (CHECK_FLAG (peer->sflags, PEER_STATUS_NSF_WAIT)) + peer_nsf_stop (peer); + + bgp_notify_send_with_data(peer, BGP_NOTIFY_CEASE, + BGP_NOTIFY_CEASE_ADMIN_SHUTDOWN, NULL, 0); + } + else + { + peer->v_start = BGP_INIT_START_TIMER; + bgp_peer_disable(peer, NULL); + } + } + else + { + if (flag == PEER_FLAG_DYNAMIC_CAPABILITY) + peer->last_reset = PEER_DOWN_CAPABILITY_CHANGE; + else if (flag == PEER_FLAG_PASSIVE) + peer->last_reset = PEER_DOWN_PASSIVE_CHANGE; + else if (flag == PEER_FLAG_DISABLE_CONNECTED_CHECK) + peer->last_reset = PEER_DOWN_MULTIHOP_CHANGE; + + bgp_notify_send(peer, BGP_NOTIFY_CEASE, + BGP_NOTIFY_CEASE_CONFIG_CHANGE); + } +} + +/* Change specified peer flag. */ +static int +peer_flag_modify (struct peer *peer, u_int32_t flag, int set) +{ + int found; + int size; + struct peer_group *group; + struct listnode *node, *nnode; + struct peer_flag_action action; + + memset (&action, 0, sizeof (struct peer_flag_action)); + size = sizeof peer_flag_action_list / sizeof (struct peer_flag_action); + + found = peer_flag_action_set (peer_flag_action_list, size, &action, flag); + + /* No flag action is found. */ + if (! found) + return BGP_ERR_INVALID_FLAG; + + /* Not for peer-group member. */ + if (action.not_for_member && peer_group_active (peer)) + return BGP_ERR_INVALID_FOR_PEER_GROUP_MEMBER; + + /* When unset the peer-group member's flag we have to check + peer-group configuration. */ + if (! set && peer_group_active (peer)) + if (CHECK_FLAG (peer->group->conf->flags, flag)) + { + if (flag == PEER_FLAG_SHUTDOWN) + return BGP_ERR_PEER_GROUP_SHUTDOWN; + else + return BGP_ERR_PEER_GROUP_HAS_THE_FLAG; + } + + /* Flag conflict check. */ + if (set + && CHECK_FLAG (peer->flags | flag, PEER_FLAG_STRICT_CAP_MATCH) + && CHECK_FLAG (peer->flags | flag, PEER_FLAG_OVERRIDE_CAPABILITY)) + return BGP_ERR_PEER_FLAG_CONFLICT; + + if (! CHECK_FLAG (peer->sflags, PEER_STATUS_GROUP)) + { + if (set && CHECK_FLAG (peer->flags, flag) == flag) + return 0; + if (! set && ! CHECK_FLAG (peer->flags, flag)) + return 0; + } + + if (set) + SET_FLAG (peer->flags, flag); + else + UNSET_FLAG (peer->flags, flag); + + if (! CHECK_FLAG (peer->sflags, PEER_STATUS_GROUP)) + { + if (action.type == peer_change_reset) + peer_flag_modify_action (peer, flag); + + return 0; + } + + /* peer-group member updates. */ + group = peer->group; + + for (ALL_LIST_ELEMENTS (group->peer, node, nnode, peer)) + { + if (set && CHECK_FLAG (peer->flags, flag) == flag) + continue; + + if (! set && ! CHECK_FLAG (peer->flags, flag)) + continue; + + if (set) + SET_FLAG (peer->flags, flag); + else + UNSET_FLAG (peer->flags, flag); + + if (action.type == peer_change_reset) + peer_flag_modify_action (peer, flag); + } + return 0; +} + +int +peer_flag_set (struct peer *peer, u_int32_t flag) +{ + return peer_flag_modify (peer, flag, 1); +} + +int +peer_flag_unset (struct peer *peer, u_int32_t flag) +{ + return peer_flag_modify (peer, flag, 0); +} + +static int +peer_is_group_member (struct peer *peer, afi_t afi, safi_t safi) +{ + if (peer->af_group[afi][safi]) + return 1; + return 0; +} + +static int +peer_af_flag_modify (struct peer *peer, afi_t afi, safi_t safi, u_int32_t flag, + int set) +{ + int found; + int size; + struct listnode *node, *nnode; + struct peer_group *group; + struct peer_flag_action action; + + memset (&action, 0, sizeof (struct peer_flag_action)); + size = sizeof peer_af_flag_action_list / sizeof (struct peer_flag_action); + + found = peer_flag_action_set (peer_af_flag_action_list, size, &action, flag); + + /* No flag action is found. */ + if (! found) + return BGP_ERR_INVALID_FLAG; + + /* Adress family must be activated. */ + if (! peer->afc[afi][safi]) + return BGP_ERR_PEER_INACTIVE; + + /* Not for peer-group member. */ + if (action.not_for_member && peer_is_group_member (peer, afi, safi)) + return BGP_ERR_INVALID_FOR_PEER_GROUP_MEMBER; + + /* Spcecial check for reflector client. */ + if (flag & PEER_FLAG_REFLECTOR_CLIENT + && peer_sort (peer) != BGP_PEER_IBGP) + return BGP_ERR_NOT_INTERNAL_PEER; + + /* Spcecial check for remove-private-AS. */ + if (flag & PEER_FLAG_REMOVE_PRIVATE_AS + && peer_sort (peer) == BGP_PEER_IBGP) + return BGP_ERR_REMOVE_PRIVATE_AS; + + /* When unset the peer-group member's flag we have to check + peer-group configuration. */ + if (! set && peer->af_group[afi][safi]) + if (CHECK_FLAG (peer->group->conf->af_flags[afi][safi], flag)) + return BGP_ERR_PEER_GROUP_HAS_THE_FLAG; + + /* When current flag configuration is same as requested one. */ + if (! CHECK_FLAG (peer->sflags, PEER_STATUS_GROUP)) + { + if (set && CHECK_FLAG (peer->af_flags[afi][safi], flag) == flag) + return 0; + if (! set && ! CHECK_FLAG (peer->af_flags[afi][safi], flag)) + return 0; + } + + if (set) + SET_FLAG (peer->af_flags[afi][safi], flag); + else + UNSET_FLAG (peer->af_flags[afi][safi], flag); + + /* Execute action when peer is established. */ + if (! CHECK_FLAG (peer->sflags, PEER_STATUS_GROUP) + && peer->state == bgp_peer_sEstablished) + { + if (! set && flag == PEER_FLAG_SOFT_RECONFIG) + bgp_clear_adj_in (peer, afi, safi); + else + { + if (flag == PEER_FLAG_REFLECTOR_CLIENT) + peer->last_reset = PEER_DOWN_RR_CLIENT_CHANGE; + else if (flag == PEER_FLAG_RSERVER_CLIENT) + peer->last_reset = PEER_DOWN_RS_CLIENT_CHANGE; + else if (flag == PEER_FLAG_ORF_PREFIX_SM) + peer->last_reset = PEER_DOWN_CAPABILITY_CHANGE; + else if (flag == PEER_FLAG_ORF_PREFIX_RM) + peer->last_reset = PEER_DOWN_CAPABILITY_CHANGE; + + peer_change_action (peer, afi, safi, action.type); + } + + } + + /* Peer group member updates. */ + if (CHECK_FLAG (peer->sflags, PEER_STATUS_GROUP)) + { + group = peer->group; + + for (ALL_LIST_ELEMENTS (group->peer, node, nnode, peer)) + { + if (! peer->af_group[afi][safi]) + continue; + + if (set && CHECK_FLAG (peer->af_flags[afi][safi], flag) == flag) + continue; + + if (! set && ! CHECK_FLAG (peer->af_flags[afi][safi], flag)) + continue; + + if (set) + SET_FLAG (peer->af_flags[afi][safi], flag); + else + UNSET_FLAG (peer->af_flags[afi][safi], flag); + + if (peer->state == bgp_peer_sEstablished) + { + if (! set && flag == PEER_FLAG_SOFT_RECONFIG) + bgp_clear_adj_in (peer, afi, safi); + else + { + if (flag == PEER_FLAG_REFLECTOR_CLIENT) + peer->last_reset = PEER_DOWN_RR_CLIENT_CHANGE; + else if (flag == PEER_FLAG_RSERVER_CLIENT) + peer->last_reset = PEER_DOWN_RS_CLIENT_CHANGE; + else if (flag == PEER_FLAG_ORF_PREFIX_SM) + peer->last_reset = PEER_DOWN_CAPABILITY_CHANGE; + else if (flag == PEER_FLAG_ORF_PREFIX_RM) + peer->last_reset = PEER_DOWN_CAPABILITY_CHANGE; + + peer_change_action (peer, afi, safi, action.type); + } + } + } + } + return 0; +} + +int +peer_af_flag_set (struct peer *peer, afi_t afi, safi_t safi, u_int32_t flag) +{ + return peer_af_flag_modify (peer, afi, safi, flag, 1); +} + +int +peer_af_flag_unset (struct peer *peer, afi_t afi, safi_t safi, u_int32_t flag) +{ + return peer_af_flag_modify (peer, afi, safi, flag, 0); +} + +/* EBGP multihop configuration. */ +int +peer_ebgp_multihop_set (struct peer *peer, int ttl) +{ + struct peer_group *group; + struct listnode *node, *nnode; + bgp_session session = peer->session; + + if (peer_sort (peer) == BGP_PEER_IBGP) + return 0; + + peer->ttl = ttl; + + if (! CHECK_FLAG (peer->sflags, PEER_STATUS_GROUP)) + { + if (bgp_session_is_active(session) && peer_sort (peer) != BGP_PEER_IBGP) + bgp_session_set_ttl (session, peer->ttl); + } + else + { + group = peer->group; + for (ALL_LIST_ELEMENTS (group->peer, node, nnode, peer)) + { + if (peer_sort (peer) == BGP_PEER_IBGP) + continue; + + peer->ttl = group->conf->ttl; + session = peer->session; + + if (bgp_session_is_active(session)) + bgp_session_set_ttl (session, peer->ttl); + } + } + return 0; +} + +int +peer_ebgp_multihop_unset (struct peer *peer) +{ + struct peer_group *group; + struct listnode *node, *nnode; + bgp_session session = peer->session; + + if (peer_sort (peer) == BGP_PEER_IBGP) + return 0; + + if (peer_group_active (peer)) + peer->ttl = peer->group->conf->ttl; + else + peer->ttl = 1; + + if (! CHECK_FLAG (peer->sflags, PEER_STATUS_GROUP)) + { + if (bgp_session_is_active(session) && peer_sort (peer) != BGP_PEER_IBGP) + bgp_session_set_ttl (session, peer->ttl); + } + else + { + group = peer->group; + for (ALL_LIST_ELEMENTS (group->peer, node, nnode, peer)) + { + if (peer_sort (peer) == BGP_PEER_IBGP) + continue; + + peer->ttl = 1; + session = peer->session; + + if (bgp_session_is_active(session)) + bgp_session_set_ttl (session, peer->ttl); + } + } + + return 0; +} + +/* Neighbor description. */ +int +peer_description_set (struct peer *peer, char *desc) +{ + if (peer->desc) + XFREE (MTYPE_PEER_DESC, peer->desc); + + peer->desc = XSTRDUP (MTYPE_PEER_DESC, desc); + + return 0; +} + +int +peer_description_unset (struct peer *peer) +{ + if (peer->desc) + XFREE (MTYPE_PEER_DESC, peer->desc); + + peer->desc = NULL; + + return 0; +} + +/* Neighbor update-source. */ +int +peer_update_source_if_set (struct peer *peer, const char *ifname) +{ + struct peer_group *group; + struct listnode *node, *nnode; + + if (peer->update_if) + { + if (! CHECK_FLAG (peer->sflags, PEER_STATUS_GROUP) + && strcmp (peer->update_if, ifname) == 0) + return 0; + + XFREE (MTYPE_PEER_UPDATE_SOURCE, peer->update_if); + peer->update_if = NULL; + } + + if (peer->update_source) + { + sockunion_free (peer->update_source); + peer->update_source = NULL; + } + + peer->update_if = XSTRDUP (MTYPE_PEER_UPDATE_SOURCE, ifname); + + if (! CHECK_FLAG (peer->sflags, PEER_STATUS_GROUP)) + { + peer->last_reset = PEER_DOWN_UPDATE_SOURCE_CHANGE; + bgp_notify_send(peer, BGP_NOTIFY_CEASE, + BGP_NOTIFY_CEASE_CONFIG_CHANGE); + return 0; + } + + /* peer-group member updates. */ + group = peer->group; + for (ALL_LIST_ELEMENTS (group->peer, node, nnode, peer)) + { + if (peer->update_if) + { + if (strcmp (peer->update_if, ifname) == 0) + continue; + + XFREE (MTYPE_PEER_UPDATE_SOURCE, peer->update_if); + peer->update_if = NULL; + } + + if (peer->update_source) + { + sockunion_free (peer->update_source); + peer->update_source = NULL; + } + + peer->update_if = XSTRDUP (MTYPE_PEER_UPDATE_SOURCE, ifname); + + peer->last_reset = PEER_DOWN_UPDATE_SOURCE_CHANGE; + bgp_notify_send(peer, BGP_NOTIFY_CEASE, + BGP_NOTIFY_CEASE_CONFIG_CHANGE); + } + return 0; +} + +int +peer_update_source_addr_set (struct peer *peer, union sockunion *su) +{ + struct peer_group *group; + struct listnode *node, *nnode; + + if (peer->update_source) + { + if (! CHECK_FLAG (peer->sflags, PEER_STATUS_GROUP) + && sockunion_cmp (peer->update_source, su) == 0) + return 0; + sockunion_free (peer->update_source); + peer->update_source = NULL; + } + + if (peer->update_if) + { + XFREE (MTYPE_PEER_UPDATE_SOURCE, peer->update_if); + peer->update_if = NULL; + } + + peer->update_source = sockunion_dup (su); + + if (! CHECK_FLAG (peer->sflags, PEER_STATUS_GROUP)) + { + peer->last_reset = PEER_DOWN_UPDATE_SOURCE_CHANGE; + bgp_notify_send(peer, BGP_NOTIFY_CEASE, + BGP_NOTIFY_CEASE_CONFIG_CHANGE); + return 0; + } + + /* peer-group member updates. */ + group = peer->group; + for (ALL_LIST_ELEMENTS (group->peer, node, nnode, peer)) + { + if (peer->update_source) + { + if (sockunion_cmp (peer->update_source, su) == 0) + continue; + sockunion_free (peer->update_source); + peer->update_source = NULL; + } + + if (peer->update_if) + { + XFREE (MTYPE_PEER_UPDATE_SOURCE, peer->update_if); + peer->update_if = NULL; + } + + peer->update_source = sockunion_dup (su); + + peer->last_reset = PEER_DOWN_UPDATE_SOURCE_CHANGE; + bgp_notify_send(peer, BGP_NOTIFY_CEASE, + BGP_NOTIFY_CEASE_CONFIG_CHANGE); + } + return 0; +} + +int +peer_update_source_unset (struct peer *peer) +{ + union sockunion *su; + struct peer_group *group; + struct listnode *node, *nnode; + + if (! CHECK_FLAG (peer->sflags, PEER_STATUS_GROUP) + && ! peer->update_source + && ! peer->update_if) + return 0; + + if (peer->update_source) + { + sockunion_free (peer->update_source); + peer->update_source = NULL; + } + if (peer->update_if) + { + XFREE (MTYPE_PEER_UPDATE_SOURCE, peer->update_if); + peer->update_if = NULL; + } + + if (peer_group_active (peer)) + { + group = peer->group; + + if (group->conf->update_source) + { + su = sockunion_dup (group->conf->update_source); + peer->update_source = su; + } + else if (group->conf->update_if) + peer->update_if = + XSTRDUP (MTYPE_PEER_UPDATE_SOURCE, group->conf->update_if); + } + + if (! CHECK_FLAG (peer->sflags, PEER_STATUS_GROUP)) + { + peer->last_reset = PEER_DOWN_UPDATE_SOURCE_CHANGE; + bgp_notify_send(peer, BGP_NOTIFY_CEASE, + BGP_NOTIFY_CEASE_CONFIG_CHANGE); + return 0; + } + + /* peer-group member updates. */ + group = peer->group; + for (ALL_LIST_ELEMENTS (group->peer, node, nnode, peer)) + { + if (! peer->update_source && ! peer->update_if) + continue; + + if (peer->update_source) + { + sockunion_free (peer->update_source); + peer->update_source = NULL; + } + + if (peer->update_if) + { + XFREE (MTYPE_PEER_UPDATE_SOURCE, peer->update_if); + peer->update_if = NULL; + } + + peer->last_reset = PEER_DOWN_UPDATE_SOURCE_CHANGE; + bgp_notify_send(peer, BGP_NOTIFY_CEASE, + BGP_NOTIFY_CEASE_CONFIG_CHANGE); + } + return 0; +} + +int +peer_default_originate_set (struct peer *peer, afi_t afi, safi_t safi, + const char *rmap) +{ + struct peer_group *group; + struct listnode *node, *nnode; + + /* Adress family must be activated. */ + if (! peer->afc[afi][safi]) + return BGP_ERR_PEER_INACTIVE; + + /* Default originate can't be used for peer group memeber. */ + if (peer_is_group_member (peer, afi, safi)) + return BGP_ERR_INVALID_FOR_PEER_GROUP_MEMBER; + + if (! CHECK_FLAG (peer->af_flags[afi][safi], PEER_FLAG_DEFAULT_ORIGINATE) + || (rmap && ! peer->default_rmap[afi][safi].name) + || (rmap && strcmp (rmap, peer->default_rmap[afi][safi].name) != 0)) + { + SET_FLAG (peer->af_flags[afi][safi], PEER_FLAG_DEFAULT_ORIGINATE); + + if (rmap) + { + if (peer->default_rmap[afi][safi].name) + free (peer->default_rmap[afi][safi].name); + peer->default_rmap[afi][safi].name = strdup (rmap); + peer->default_rmap[afi][safi].map = route_map_lookup_by_name (rmap); + } + } + + if (! CHECK_FLAG (peer->sflags, PEER_STATUS_GROUP)) + { + if (peer->state == bgp_peer_sEstablished && peer->afc_nego[afi][safi]) + bgp_default_originate (peer, afi, safi, 0); + return 0; + } + + /* peer-group member updates. */ + group = peer->group; + for (ALL_LIST_ELEMENTS (group->peer, node, nnode, peer)) + { + SET_FLAG (peer->af_flags[afi][safi], PEER_FLAG_DEFAULT_ORIGINATE); + + if (rmap) + { + if (peer->default_rmap[afi][safi].name) + free (peer->default_rmap[afi][safi].name); + peer->default_rmap[afi][safi].name = strdup (rmap); + peer->default_rmap[afi][safi].map = route_map_lookup_by_name (rmap); + } + + if (peer->state == bgp_peer_sEstablished && peer->afc_nego[afi][safi]) + bgp_default_originate (peer, afi, safi, 0); + } + return 0; +} + +int +peer_default_originate_unset (struct peer *peer, afi_t afi, safi_t safi) +{ + struct peer_group *group; + struct listnode *node, *nnode; + + /* Adress family must be activated. */ + if (! peer->afc[afi][safi]) + return BGP_ERR_PEER_INACTIVE; + + /* Default originate can't be used for peer group memeber. */ + if (peer_is_group_member (peer, afi, safi)) + return BGP_ERR_INVALID_FOR_PEER_GROUP_MEMBER; + + if (CHECK_FLAG (peer->af_flags[afi][safi], PEER_FLAG_DEFAULT_ORIGINATE)) + { + UNSET_FLAG (peer->af_flags[afi][safi], PEER_FLAG_DEFAULT_ORIGINATE); + + if (peer->default_rmap[afi][safi].name) + free (peer->default_rmap[afi][safi].name); + peer->default_rmap[afi][safi].name = NULL; + peer->default_rmap[afi][safi].map = NULL; + } + + if (! CHECK_FLAG (peer->sflags, PEER_STATUS_GROUP)) + { + if (peer->state == bgp_peer_sEstablished && peer->afc_nego[afi][safi]) + bgp_default_originate (peer, afi, safi, 1); + return 0; + } + + /* peer-group member updates. */ + group = peer->group; + for (ALL_LIST_ELEMENTS (group->peer, node, nnode, peer)) + { + UNSET_FLAG (peer->af_flags[afi][safi], PEER_FLAG_DEFAULT_ORIGINATE); + + if (peer->default_rmap[afi][safi].name) + free (peer->default_rmap[afi][safi].name); + peer->default_rmap[afi][safi].name = NULL; + peer->default_rmap[afi][safi].map = NULL; + + if (peer->state == bgp_peer_sEstablished && peer->afc_nego[afi][safi]) + bgp_default_originate (peer, afi, safi, 1); + } + return 0; +} + +int +peer_port_set (struct peer *peer, u_int16_t port) +{ + peer->port = port; + return 0; +} + +int +peer_port_unset (struct peer *peer) +{ + peer->port = BGP_PORT_DEFAULT; + return 0; +} + +/* neighbor weight. */ +int +peer_weight_set (struct peer *peer, u_int16_t weight) +{ + struct peer_group *group; + struct listnode *node, *nnode; + + SET_FLAG (peer->config, PEER_CONFIG_WEIGHT); + peer->weight = weight; + + if (! CHECK_FLAG (peer->sflags, PEER_STATUS_GROUP)) + return 0; + + /* peer-group member updates. */ + group = peer->group; + for (ALL_LIST_ELEMENTS (group->peer, node, nnode, peer)) + { + peer->weight = group->conf->weight; + } + return 0; +} + +int +peer_weight_unset (struct peer *peer) +{ + struct peer_group *group; + struct listnode *node, *nnode; + + /* Set default weight. */ + if (peer_group_active (peer)) + peer->weight = peer->group->conf->weight; + else + peer->weight = 0; + + UNSET_FLAG (peer->config, PEER_CONFIG_WEIGHT); + + if (! CHECK_FLAG (peer->sflags, PEER_STATUS_GROUP)) + return 0; + + /* peer-group member updates. */ + group = peer->group; + for (ALL_LIST_ELEMENTS (group->peer, node, nnode, peer)) + { + peer->weight = 0; + } + return 0; +} + +int +peer_timers_set (struct peer *peer, u_int32_t keepalive, u_int32_t holdtime) +{ + struct peer_group *group; + struct listnode *node, *nnode; + + /* Not for peer group memeber. */ + if (peer_group_active (peer)) + return BGP_ERR_INVALID_FOR_PEER_GROUP_MEMBER; + + /* keepalive value check. */ + if (keepalive > 65535) + return BGP_ERR_INVALID_VALUE; + + /* Holdtime value check. */ + if (holdtime > 65535) + return BGP_ERR_INVALID_VALUE; + + /* Holdtime value must be either 0 or greater than 3. */ + if (holdtime < 3 && holdtime != 0) + return BGP_ERR_INVALID_VALUE; + + /* Set value to the configuration. */ + SET_FLAG (peer->config, PEER_CONFIG_TIMER); + peer->holdtime = holdtime; + peer->keepalive = (keepalive < holdtime / 3 ? keepalive : holdtime / 3); + + if (! CHECK_FLAG (peer->sflags, PEER_STATUS_GROUP)) + return 0; + + /* peer-group member updates. */ + group = peer->group; + for (ALL_LIST_ELEMENTS (group->peer, node, nnode, peer)) + { + SET_FLAG (peer->config, PEER_CONFIG_TIMER); + peer->holdtime = group->conf->holdtime; + peer->keepalive = group->conf->keepalive; + } + return 0; +} + +int +peer_timers_unset (struct peer *peer) +{ + struct peer_group *group; + struct listnode *node, *nnode; + + if (peer_group_active (peer)) + return BGP_ERR_INVALID_FOR_PEER_GROUP_MEMBER; + + /* Clear configuration. */ + UNSET_FLAG (peer->config, PEER_CONFIG_TIMER); + peer->keepalive = 0; + peer->holdtime = 0; + + if (! CHECK_FLAG (peer->sflags, PEER_STATUS_GROUP)) + return 0; + + /* peer-group member updates. */ + group = peer->group; + for (ALL_LIST_ELEMENTS (group->peer, node, nnode, peer)) + { + UNSET_FLAG (peer->config, PEER_CONFIG_TIMER); + peer->holdtime = 0; + peer->keepalive = 0; + } + + return 0; +} + +int +peer_timers_connect_set (struct peer *peer, u_int32_t connect) +{ + if (peer_group_active (peer)) + return BGP_ERR_INVALID_FOR_PEER_GROUP_MEMBER; + + if (connect > 65535) + return BGP_ERR_INVALID_VALUE; + + /* Set value to the configuration. */ + SET_FLAG (peer->config, PEER_CONFIG_CONNECT); + peer->connect = connect; + + /* Set value to timer setting. */ + peer->v_connect = connect; + + return 0; +} + +int +peer_timers_connect_unset (struct peer *peer) +{ + if (peer_group_active (peer)) + return BGP_ERR_INVALID_FOR_PEER_GROUP_MEMBER; + + /* Clear configuration. */ + UNSET_FLAG (peer->config, PEER_CONFIG_CONNECT); + peer->connect = 0; + + /* Set timer setting to default value. */ + peer->v_connect = BGP_DEFAULT_CONNECT_RETRY; + + return 0; +} + +int +peer_advertise_interval_set (struct peer *peer, u_int32_t routeadv) +{ + if (peer_group_active (peer)) + return BGP_ERR_INVALID_FOR_PEER_GROUP_MEMBER; + + if (routeadv > 600) + return BGP_ERR_INVALID_VALUE; + + SET_FLAG (peer->config, PEER_CONFIG_ROUTEADV); + peer->routeadv = routeadv; + peer->v_routeadv = routeadv; + + return 0; +} + +int +peer_advertise_interval_unset (struct peer *peer) +{ + if (peer_group_active (peer)) + return BGP_ERR_INVALID_FOR_PEER_GROUP_MEMBER; + + UNSET_FLAG (peer->config, PEER_CONFIG_ROUTEADV); + peer->routeadv = 0; + + if (peer_sort (peer) == BGP_PEER_IBGP) + peer->v_routeadv = BGP_DEFAULT_IBGP_ROUTEADV; + else + peer->v_routeadv = BGP_DEFAULT_EBGP_ROUTEADV; + + return 0; +} + +/* neighbor interface */ +int +peer_interface_set (struct peer *peer, const char *str) +{ + if (peer->ifname) + free (peer->ifname); + peer->ifname = strdup (str); + + return 0; +} + +int +peer_interface_unset (struct peer *peer) +{ + if (peer->ifname) + free (peer->ifname); + peer->ifname = NULL; + + return 0; +} + +/* Allow-as in. */ +int +peer_allowas_in_set (struct peer *peer, afi_t afi, safi_t safi, int allow_num) +{ + struct peer_group *group; + struct listnode *node, *nnode; + + if (allow_num < 1 || allow_num > 10) + return BGP_ERR_INVALID_VALUE; + + if (peer->allowas_in[afi][safi] != allow_num) + { + peer->allowas_in[afi][safi] = allow_num; + SET_FLAG (peer->af_flags[afi][safi], PEER_FLAG_ALLOWAS_IN); + peer_change_action (peer, afi, safi, peer_change_reset_in); + } + + if (! CHECK_FLAG (peer->sflags, PEER_STATUS_GROUP)) + return 0; + + group = peer->group; + for (ALL_LIST_ELEMENTS (group->peer, node, nnode, peer)) + { + if (peer->allowas_in[afi][safi] != allow_num) + { + peer->allowas_in[afi][safi] = allow_num; + SET_FLAG (peer->af_flags[afi][safi], PEER_FLAG_ALLOWAS_IN); + peer_change_action (peer, afi, safi, peer_change_reset_in); + } + + } + return 0; +} + +int +peer_allowas_in_unset (struct peer *peer, afi_t afi, safi_t safi) +{ + struct peer_group *group; + struct listnode *node, *nnode; + + if (CHECK_FLAG (peer->af_flags[afi][safi], PEER_FLAG_ALLOWAS_IN)) + { + peer->allowas_in[afi][safi] = 0; + peer_af_flag_unset (peer, afi, safi, PEER_FLAG_ALLOWAS_IN); + } + + if (! CHECK_FLAG (peer->sflags, PEER_STATUS_GROUP)) + return 0; + + group = peer->group; + for (ALL_LIST_ELEMENTS (group->peer, node, nnode, peer)) + { + if (CHECK_FLAG (peer->af_flags[afi][safi], PEER_FLAG_ALLOWAS_IN)) + { + peer->allowas_in[afi][safi] = 0; + peer_af_flag_unset (peer, afi, safi, PEER_FLAG_ALLOWAS_IN); + } + } + return 0; +} + +int +peer_local_as_set (struct peer *peer, as_t as, int no_prepend) +{ + struct bgp *bgp = peer->bgp; + struct peer_group *group; + struct listnode *node, *nnode; + + if (peer_sort (peer) != BGP_PEER_EBGP + && peer_sort (peer) != BGP_PEER_INTERNAL) + return BGP_ERR_LOCAL_AS_ALLOWED_ONLY_FOR_EBGP; + + if (bgp->as == as) + return BGP_ERR_CANNOT_HAVE_LOCAL_AS_SAME_AS; + + if (peer_group_active (peer)) + return BGP_ERR_INVALID_FOR_PEER_GROUP_MEMBER; + + if (peer->change_local_as == as && + ((CHECK_FLAG (peer->flags, PEER_FLAG_LOCAL_AS_NO_PREPEND) && no_prepend) + || (! CHECK_FLAG (peer->flags, PEER_FLAG_LOCAL_AS_NO_PREPEND) && ! no_prepend))) + return 0; + + peer->change_local_as = as; + if (no_prepend) + SET_FLAG (peer->flags, PEER_FLAG_LOCAL_AS_NO_PREPEND); + else + UNSET_FLAG (peer->flags, PEER_FLAG_LOCAL_AS_NO_PREPEND); + + if (! CHECK_FLAG (peer->sflags, PEER_STATUS_GROUP)) + { + peer->last_reset = PEER_DOWN_LOCAL_AS_CHANGE; + bgp_notify_send(peer, BGP_NOTIFY_CEASE, + BGP_NOTIFY_CEASE_CONFIG_CHANGE); + return 0; + } + + group = peer->group; + for (ALL_LIST_ELEMENTS (group->peer, node, nnode, peer)) + { + peer->change_local_as = as; + if (no_prepend) + SET_FLAG (peer->flags, PEER_FLAG_LOCAL_AS_NO_PREPEND); + else + UNSET_FLAG (peer->flags, PEER_FLAG_LOCAL_AS_NO_PREPEND); + + peer->last_reset = PEER_DOWN_LOCAL_AS_CHANGE; + bgp_notify_send(peer, BGP_NOTIFY_CEASE, + BGP_NOTIFY_CEASE_CONFIG_CHANGE); + } + + return 0; +} + +int +peer_local_as_unset (struct peer *peer) +{ + struct peer_group *group; + struct listnode *node, *nnode; + + if (peer_group_active (peer)) + return BGP_ERR_INVALID_FOR_PEER_GROUP_MEMBER; + + if (! peer->change_local_as) + return 0; + + peer->change_local_as = 0; + UNSET_FLAG (peer->flags, PEER_FLAG_LOCAL_AS_NO_PREPEND); + + if (! CHECK_FLAG (peer->sflags, PEER_STATUS_GROUP)) + { + peer->last_reset = PEER_DOWN_LOCAL_AS_CHANGE; + bgp_notify_send(peer, BGP_NOTIFY_CEASE, + BGP_NOTIFY_CEASE_CONFIG_CHANGE); + return 0; + } + + group = peer->group; + for (ALL_LIST_ELEMENTS (group->peer, node, nnode, peer)) + { + peer->change_local_as = 0; + UNSET_FLAG (peer->flags, PEER_FLAG_LOCAL_AS_NO_PREPEND); + + peer->last_reset = PEER_DOWN_LOCAL_AS_CHANGE; + bgp_notify_send(peer, BGP_NOTIFY_CEASE, + BGP_NOTIFY_CEASE_CONFIG_CHANGE); + } + return 0; +} + +/* Set password for authenticating with the peer. */ +int +peer_password_set (struct peer *peer, const char *password) +{ + struct listnode *nn, *nnode; + int len = password ? strlen(password) : 0; + int ret = BGP_SUCCESS; + + if ((len < PEER_PASSWORD_MINLEN) || (len > PEER_PASSWORD_MAXLEN)) + return BGP_ERR_INVALID_VALUE; + + if (peer->password && strcmp (peer->password, password) == 0 + && ! CHECK_FLAG (peer->sflags, PEER_STATUS_GROUP)) + return 0; + + if (peer->password) + XFREE (MTYPE_PEER_PASSWORD, peer->password); + + peer->password = XSTRDUP (MTYPE_PEER_PASSWORD, password); + + if (! CHECK_FLAG (peer->sflags, PEER_STATUS_GROUP)) + { + bgp_notify_send(peer, BGP_NOTIFY_CEASE, + BGP_NOTIFY_CEASE_CONFIG_CHANGE); + return BGP_SUCCESS; + } + + for (ALL_LIST_ELEMENTS (peer->group->peer, nn, nnode, peer)) + { + if (peer->password && strcmp (peer->password, password) == 0) + continue; + + if (peer->password) + XFREE (MTYPE_PEER_PASSWORD, peer->password); + + peer->password = XSTRDUP(MTYPE_PEER_PASSWORD, password); + + bgp_notify_send(peer, BGP_NOTIFY_CEASE, + BGP_NOTIFY_CEASE_CONFIG_CHANGE); + } + + return ret; +} + +int +peer_password_unset (struct peer *peer) +{ + struct listnode *nn, *nnode; + + if (!peer->password + && !CHECK_FLAG (peer->sflags, PEER_STATUS_GROUP)) + return 0; + + if (!CHECK_FLAG (peer->sflags, PEER_STATUS_GROUP)) + { + if (peer_group_active (peer) + && peer->group->conf->password + && strcmp (peer->group->conf->password, peer->password) == 0) + return BGP_ERR_PEER_GROUP_HAS_THE_FLAG; + + bgp_notify_send(peer, BGP_NOTIFY_CEASE, + BGP_NOTIFY_CEASE_CONFIG_CHANGE); + + if (peer->password) + XFREE (MTYPE_PEER_PASSWORD, peer->password); + + peer->password = NULL; + return 0; + } + + XFREE (MTYPE_PEER_PASSWORD, peer->password); + peer->password = NULL; + + for (ALL_LIST_ELEMENTS (peer->group->peer, nn, nnode, peer)) + { + if (!peer->password) + continue; + + bgp_notify_send(peer, BGP_NOTIFY_CEASE, + BGP_NOTIFY_CEASE_CONFIG_CHANGE); + + XFREE (MTYPE_PEER_PASSWORD, peer->password); + peer->password = NULL; + } + + return 0; +} + +/* Set distribute list to the peer. */ +int +peer_distribute_set (struct peer *peer, afi_t afi, safi_t safi, int direct, + const char *name) +{ + struct bgp_filter *filter; + struct peer_group *group; + struct listnode *node, *nnode; + + if (! peer->afc[afi][safi]) + return BGP_ERR_PEER_INACTIVE; + + if (direct != FILTER_IN && direct != FILTER_OUT) + return BGP_ERR_INVALID_VALUE; + + if (direct == FILTER_OUT && peer_is_group_member (peer, afi, safi)) + return BGP_ERR_INVALID_FOR_PEER_GROUP_MEMBER; + + filter = &peer->filter[afi][safi]; + + if (filter->plist[direct].ref) + return BGP_ERR_PEER_FILTER_CONFLICT; + + if (filter->dlist[direct].name) + free (filter->dlist[direct].name); + filter->dlist[direct].name = strdup (name); + filter->dlist[direct].alist = access_list_lookup (afi, name); + + if (! CHECK_FLAG (peer->sflags, PEER_STATUS_GROUP)) + return 0; + + group = peer->group; + for (ALL_LIST_ELEMENTS (group->peer, node, nnode, peer)) + { + filter = &peer->filter[afi][safi]; + + if (! peer->af_group[afi][safi]) + continue; + + if (filter->dlist[direct].name) + free (filter->dlist[direct].name); + filter->dlist[direct].name = strdup (name); + filter->dlist[direct].alist = access_list_lookup (afi, name); + } + + return 0; +} + +int +peer_distribute_unset (struct peer *peer, afi_t afi, safi_t safi, int direct) +{ + struct bgp_filter *filter; + struct bgp_filter *gfilter; + struct peer_group *group; + struct listnode *node, *nnode; + + if (! peer->afc[afi][safi]) + return BGP_ERR_PEER_INACTIVE; + + if (direct != FILTER_IN && direct != FILTER_OUT) + return BGP_ERR_INVALID_VALUE; + + if (direct == FILTER_OUT && peer_is_group_member (peer, afi, safi)) + return BGP_ERR_INVALID_FOR_PEER_GROUP_MEMBER; + + filter = &peer->filter[afi][safi]; + + /* apply peer-group filter */ + if (peer->af_group[afi][safi]) + { + gfilter = &peer->group->conf->filter[afi][safi]; + + if (gfilter->dlist[direct].name) + { + if (filter->dlist[direct].name) + free (filter->dlist[direct].name); + filter->dlist[direct].name = strdup (gfilter->dlist[direct].name); + filter->dlist[direct].alist = gfilter->dlist[direct].alist; + return 0; + } + } + + if (filter->dlist[direct].name) + free (filter->dlist[direct].name); + filter->dlist[direct].name = NULL; + filter->dlist[direct].alist = NULL; + + if (! CHECK_FLAG (peer->sflags, PEER_STATUS_GROUP)) + return 0; + + group = peer->group; + for (ALL_LIST_ELEMENTS (group->peer, node, nnode, peer)) + { + filter = &peer->filter[afi][safi]; + + if (! peer->af_group[afi][safi]) + continue; + + if (filter->dlist[direct].name) + free (filter->dlist[direct].name); + filter->dlist[direct].name = NULL; + filter->dlist[direct].alist = NULL; + } + + return 0; +} + +/* Update distribute list. */ +static void +peer_distribute_update (struct access_list *access) +{ + afi_t afi; + safi_t safi; + int direct; + struct listnode *mnode, *mnnode; + struct listnode *node, *nnode; + struct bgp *bgp; + struct peer *peer; + struct peer_group *group; + struct bgp_filter *filter; + + for (ALL_LIST_ELEMENTS (bm->bgp, mnode, mnnode, bgp)) + { + for (ALL_LIST_ELEMENTS (bgp->peer, node, nnode, peer)) + { + for (afi = AFI_IP; afi < AFI_MAX; afi++) + for (safi = SAFI_UNICAST; safi < SAFI_MAX; safi++) + { + filter = &peer->filter[afi][safi]; + + for (direct = FILTER_IN; direct < FILTER_MAX; direct++) + { + if (filter->dlist[direct].name) + filter->dlist[direct].alist = + access_list_lookup (afi, filter->dlist[direct].name); + else + filter->dlist[direct].alist = NULL; + } + } + } + for (ALL_LIST_ELEMENTS (bgp->group, node, nnode, group)) + { + for (afi = AFI_IP; afi < AFI_MAX; afi++) + for (safi = SAFI_UNICAST; safi < SAFI_MAX; safi++) + { + filter = &group->conf->filter[afi][safi]; + + for (direct = FILTER_IN; direct < FILTER_MAX; direct++) + { + if (filter->dlist[direct].name) + filter->dlist[direct].alist = + access_list_lookup (afi, filter->dlist[direct].name); + else + filter->dlist[direct].alist = NULL; + } + } + } + } +} + +/* Set prefix list to the peer. */ +int +peer_prefix_list_set (struct peer *peer, afi_t afi, safi_t safi, int direct, + const char *name) +{ + struct bgp_filter *filter; + struct peer_group *group; + struct listnode *node, *nnode; + prefix_list_ref ref ; + + if (! peer->afc[afi][safi]) + return BGP_ERR_PEER_INACTIVE; + + if (direct != FILTER_IN && direct != FILTER_OUT) + return BGP_ERR_INVALID_VALUE; + + if (direct == FILTER_OUT && peer_is_group_member (peer, afi, safi)) + return BGP_ERR_INVALID_FOR_PEER_GROUP_MEMBER; + + filter = &peer->filter[afi][safi]; + + if (filter->dlist[direct].name) + return BGP_ERR_PEER_FILTER_CONFLICT; + + ref = prefix_list_set_ref(&filter->plist[direct].ref, afi, name) ; + + if (! CHECK_FLAG (peer->sflags, PEER_STATUS_GROUP)) + return 0; + + group = peer->group; + for (ALL_LIST_ELEMENTS (group->peer, node, nnode, peer)) + { + filter = &peer->filter[afi][safi]; + + if (! peer->af_group[afi][safi]) + continue; + + prefix_list_copy_ref(&filter->plist[direct].ref, ref) ; + } + return 0; +} + +int +peer_prefix_list_unset (struct peer *peer, afi_t afi, safi_t safi, int direct) +{ + struct bgp_filter *filter; + struct bgp_filter *gfilter; + struct peer_group *group; + struct listnode *node, *nnode; + + if (! peer->afc[afi][safi]) + return BGP_ERR_PEER_INACTIVE; + + if (direct != FILTER_IN && direct != FILTER_OUT) + return BGP_ERR_INVALID_VALUE; + + if (direct == FILTER_OUT && peer_is_group_member (peer, afi, safi)) + return BGP_ERR_INVALID_FOR_PEER_GROUP_MEMBER; + + filter = &peer->filter[afi][safi]; + + /* apply peer-group filter */ + if (peer->af_group[afi][safi]) + { + gfilter = &peer->group->conf->filter[afi][safi]; + + if (gfilter->plist[direct].ref) + { + prefix_list_copy_ref(&filter->plist[direct].ref, + gfilter->plist[direct].ref) ; + return 0; + } + } + + prefix_list_unset_ref(&filter->plist[direct].ref) ; + + if (! CHECK_FLAG (peer->sflags, PEER_STATUS_GROUP)) + return 0; + + group = peer->group; + for (ALL_LIST_ELEMENTS (group->peer, node, nnode, peer)) + { + filter = &peer->filter[afi][safi]; + + if (! peer->af_group[afi][safi]) + continue; + + prefix_list_unset_ref(&filter->plist[direct].ref) ; + } + + return 0; +} + +/* Update prefix-list list. */ +static void +peer_prefix_list_update (struct prefix_list *plist) +{ + /* This function used to fix up the addresses of prefix lists whenever + * a prefix list was changed. That is now done by the symbol reference + * mechanism. + * + * This function could have a use in updating a peer when a prefix list + * is changed ? + */ +} + +int +peer_aslist_set (struct peer *peer, afi_t afi, safi_t safi, int direct, + const char *name) +{ + struct bgp_filter *filter; + struct peer_group *group; + struct listnode *node, *nnode; + + if (! peer->afc[afi][safi]) + return BGP_ERR_PEER_INACTIVE; + + if (direct != FILTER_IN && direct != FILTER_OUT) + return BGP_ERR_INVALID_VALUE; + + if (direct == FILTER_OUT && peer_is_group_member (peer, afi, safi)) + return BGP_ERR_INVALID_FOR_PEER_GROUP_MEMBER; + + filter = &peer->filter[afi][safi]; + + if (filter->aslist[direct].name) + free (filter->aslist[direct].name); + filter->aslist[direct].name = strdup (name); + filter->aslist[direct].aslist = as_list_lookup (name); + + if (! CHECK_FLAG (peer->sflags, PEER_STATUS_GROUP)) + return 0; + + group = peer->group; + for (ALL_LIST_ELEMENTS (group->peer, node, nnode, peer)) + { + filter = &peer->filter[afi][safi]; + + if (! peer->af_group[afi][safi]) + continue; + + if (filter->aslist[direct].name) + free (filter->aslist[direct].name); + filter->aslist[direct].name = strdup (name); + filter->aslist[direct].aslist = as_list_lookup (name); + } + return 0; +} + +int +peer_aslist_unset (struct peer *peer,afi_t afi, safi_t safi, int direct) +{ + struct bgp_filter *filter; + struct bgp_filter *gfilter; + struct peer_group *group; + struct listnode *node, *nnode; + + if (! peer->afc[afi][safi]) + return BGP_ERR_PEER_INACTIVE; + + if (direct != FILTER_IN && direct != FILTER_OUT) + return BGP_ERR_INVALID_VALUE; + + if (direct == FILTER_OUT && peer_is_group_member (peer, afi, safi)) + return BGP_ERR_INVALID_FOR_PEER_GROUP_MEMBER; + + filter = &peer->filter[afi][safi]; + + /* apply peer-group filter */ + if (peer->af_group[afi][safi]) + { + gfilter = &peer->group->conf->filter[afi][safi]; + + if (gfilter->aslist[direct].name) + { + if (filter->aslist[direct].name) + free (filter->aslist[direct].name); + filter->aslist[direct].name = strdup (gfilter->aslist[direct].name); + filter->aslist[direct].aslist = gfilter->aslist[direct].aslist; + return 0; + } + } + + if (filter->aslist[direct].name) + free (filter->aslist[direct].name); + filter->aslist[direct].name = NULL; + filter->aslist[direct].aslist = NULL; + + if (! CHECK_FLAG (peer->sflags, PEER_STATUS_GROUP)) + return 0; + + group = peer->group; + for (ALL_LIST_ELEMENTS (group->peer, node, nnode, peer)) + { + filter = &peer->filter[afi][safi]; + + if (! peer->af_group[afi][safi]) + continue; + + if (filter->aslist[direct].name) + free (filter->aslist[direct].name); + filter->aslist[direct].name = NULL; + filter->aslist[direct].aslist = NULL; + } + + return 0; +} + +static void +peer_aslist_update (void) +{ + afi_t afi; + safi_t safi; + int direct; + struct listnode *mnode, *mnnode; + struct listnode *node, *nnode; + struct bgp *bgp; + struct peer *peer; + struct peer_group *group; + struct bgp_filter *filter; + + for (ALL_LIST_ELEMENTS (bm->bgp, mnode, mnnode, bgp)) + { + for (ALL_LIST_ELEMENTS (bgp->peer, node, nnode, peer)) + { + for (afi = AFI_IP; afi < AFI_MAX; afi++) + for (safi = SAFI_UNICAST; safi < SAFI_MAX; safi++) + { + filter = &peer->filter[afi][safi]; + + for (direct = FILTER_IN; direct < FILTER_MAX; direct++) + { + if (filter->aslist[direct].name) + filter->aslist[direct].aslist = + as_list_lookup (filter->aslist[direct].name); + else + filter->aslist[direct].aslist = NULL; + } + } + } + for (ALL_LIST_ELEMENTS (bgp->group, node, nnode, group)) + { + for (afi = AFI_IP; afi < AFI_MAX; afi++) + for (safi = SAFI_UNICAST; safi < SAFI_MAX; safi++) + { + filter = &group->conf->filter[afi][safi]; + + for (direct = FILTER_IN; direct < FILTER_MAX; direct++) + { + if (filter->aslist[direct].name) + filter->aslist[direct].aslist = + as_list_lookup (filter->aslist[direct].name); + else + filter->aslist[direct].aslist = NULL; + } + } + } + } +} + +/* Set route-map to the peer. */ +int +peer_route_map_set (struct peer *peer, afi_t afi, safi_t safi, int direct, + const char *name) +{ + struct bgp_filter *filter; + struct peer_group *group; + struct listnode *node, *nnode; + + if (! peer->afc[afi][safi]) + return BGP_ERR_PEER_INACTIVE; + + if (direct != RMAP_IN && direct != RMAP_OUT && + direct != RMAP_IMPORT && direct != RMAP_EXPORT) + return BGP_ERR_INVALID_VALUE; + + if ( (direct == RMAP_OUT || direct == RMAP_IMPORT) + && peer_is_group_member (peer, afi, safi)) + return BGP_ERR_INVALID_FOR_PEER_GROUP_MEMBER; + + filter = &peer->filter[afi][safi]; + + if (filter->map[direct].name) + free (filter->map[direct].name); + + filter->map[direct].name = strdup (name); + filter->map[direct].map = route_map_lookup_by_name (name); + + if (! CHECK_FLAG (peer->sflags, PEER_STATUS_GROUP)) + return 0; + + group = peer->group; + for (ALL_LIST_ELEMENTS (group->peer, node, nnode, peer)) + { + filter = &peer->filter[afi][safi]; + + if (! peer->af_group[afi][safi]) + continue; + + if (filter->map[direct].name) + free (filter->map[direct].name); + filter->map[direct].name = strdup (name); + filter->map[direct].map = route_map_lookup_by_name (name); + } + return 0; +} + +/* Unset route-map from the peer. */ +int +peer_route_map_unset (struct peer *peer, afi_t afi, safi_t safi, int direct) +{ + struct bgp_filter *filter; + struct bgp_filter *gfilter; + struct peer_group *group; + struct listnode *node, *nnode; + + if (! peer->afc[afi][safi]) + return BGP_ERR_PEER_INACTIVE; + + if (direct != RMAP_IN && direct != RMAP_OUT && + direct != RMAP_IMPORT && direct != RMAP_EXPORT) + return BGP_ERR_INVALID_VALUE; + + if ( (direct == RMAP_OUT || direct == RMAP_IMPORT) + && peer_is_group_member (peer, afi, safi)) + return BGP_ERR_INVALID_FOR_PEER_GROUP_MEMBER; + + filter = &peer->filter[afi][safi]; + + /* apply peer-group filter */ + if (peer->af_group[afi][safi]) + { + gfilter = &peer->group->conf->filter[afi][safi]; + + if (gfilter->map[direct].name) + { + if (filter->map[direct].name) + free (filter->map[direct].name); + filter->map[direct].name = strdup (gfilter->map[direct].name); + filter->map[direct].map = gfilter->map[direct].map; + return 0; + } + } + + if (filter->map[direct].name) + free (filter->map[direct].name); + filter->map[direct].name = NULL; + filter->map[direct].map = NULL; + + if (! CHECK_FLAG (peer->sflags, PEER_STATUS_GROUP)) + return 0; + + group = peer->group; + for (ALL_LIST_ELEMENTS (group->peer, node, nnode, peer)) + { + filter = &peer->filter[afi][safi]; + + if (! peer->af_group[afi][safi]) + continue; + + if (filter->map[direct].name) + free (filter->map[direct].name); + filter->map[direct].name = NULL; + filter->map[direct].map = NULL; + } + return 0; +} + +/* Set unsuppress-map to the peer. */ +int +peer_unsuppress_map_set (struct peer *peer, afi_t afi, safi_t safi, + const char *name) +{ + struct bgp_filter *filter; + struct peer_group *group; + struct listnode *node, *nnode; + + if (! peer->afc[afi][safi]) + return BGP_ERR_PEER_INACTIVE; + + if (peer_is_group_member (peer, afi, safi)) + return BGP_ERR_INVALID_FOR_PEER_GROUP_MEMBER; + + filter = &peer->filter[afi][safi]; + + if (filter->usmap.name) + free (filter->usmap.name); + + filter->usmap.name = strdup (name); + filter->usmap.map = route_map_lookup_by_name (name); + + if (! CHECK_FLAG (peer->sflags, PEER_STATUS_GROUP)) + return 0; + + group = peer->group; + for (ALL_LIST_ELEMENTS (group->peer, node, nnode, peer)) + { + filter = &peer->filter[afi][safi]; + + if (! peer->af_group[afi][safi]) + continue; + + if (filter->usmap.name) + free (filter->usmap.name); + filter->usmap.name = strdup (name); + filter->usmap.map = route_map_lookup_by_name (name); + } + return 0; +} + +/* Unset route-map from the peer. */ +int +peer_unsuppress_map_unset (struct peer *peer, afi_t afi, safi_t safi) +{ + struct bgp_filter *filter; + struct peer_group *group; + struct listnode *node, *nnode; + + if (! peer->afc[afi][safi]) + return BGP_ERR_PEER_INACTIVE; + + if (peer_is_group_member (peer, afi, safi)) + return BGP_ERR_INVALID_FOR_PEER_GROUP_MEMBER; + + filter = &peer->filter[afi][safi]; + + if (filter->usmap.name) + free (filter->usmap.name); + filter->usmap.name = NULL; + filter->usmap.map = NULL; + + if (! CHECK_FLAG (peer->sflags, PEER_STATUS_GROUP)) + return 0; + + group = peer->group; + for (ALL_LIST_ELEMENTS (group->peer, node, nnode, peer)) + { + filter = &peer->filter[afi][safi]; + + if (! peer->af_group[afi][safi]) + continue; + + if (filter->usmap.name) + free (filter->usmap.name); + filter->usmap.name = NULL; + filter->usmap.map = NULL; + } + return 0; +} + +int +peer_maximum_prefix_set (struct peer *peer, afi_t afi, safi_t safi, + u_int32_t max, u_char threshold, + int warning, u_int16_t restart) +{ + struct peer_group *group; + struct listnode *node, *nnode; + + if (! peer->afc[afi][safi]) + return BGP_ERR_PEER_INACTIVE; + + SET_FLAG (peer->af_flags[afi][safi], PEER_FLAG_MAX_PREFIX); + peer->pmax[afi][safi] = max; + peer->pmax_threshold[afi][safi] = threshold; + peer->pmax_restart[afi][safi] = restart; + if (warning) + SET_FLAG (peer->af_flags[afi][safi], PEER_FLAG_MAX_PREFIX_WARNING); + else + UNSET_FLAG (peer->af_flags[afi][safi], PEER_FLAG_MAX_PREFIX_WARNING); + + if (! CHECK_FLAG (peer->sflags, PEER_STATUS_GROUP)) + return 0; + + group = peer->group; + for (ALL_LIST_ELEMENTS (group->peer, node, nnode, peer)) + { + if (! peer->af_group[afi][safi]) + continue; + + SET_FLAG (peer->af_flags[afi][safi], PEER_FLAG_MAX_PREFIX); + peer->pmax[afi][safi] = max; + peer->pmax_threshold[afi][safi] = threshold; + peer->pmax_restart[afi][safi] = restart; + if (warning) + SET_FLAG (peer->af_flags[afi][safi], PEER_FLAG_MAX_PREFIX_WARNING); + else + UNSET_FLAG (peer->af_flags[afi][safi], PEER_FLAG_MAX_PREFIX_WARNING); + } + return 0; +} + +int +peer_maximum_prefix_unset (struct peer *peer, afi_t afi, safi_t safi) +{ + struct peer_group *group; + struct listnode *node, *nnode; + + if (! peer->afc[afi][safi]) + return BGP_ERR_PEER_INACTIVE; + + /* apply peer-group config */ + if (peer->af_group[afi][safi]) + { + if (CHECK_FLAG (peer->group->conf->af_flags[afi][safi], + PEER_FLAG_MAX_PREFIX)) + SET_FLAG (peer->af_flags[afi][safi], PEER_FLAG_MAX_PREFIX); + else + UNSET_FLAG (peer->af_flags[afi][safi], PEER_FLAG_MAX_PREFIX); + + if (CHECK_FLAG (peer->group->conf->af_flags[afi][safi], + PEER_FLAG_MAX_PREFIX_WARNING)) + SET_FLAG (peer->af_flags[afi][safi], PEER_FLAG_MAX_PREFIX_WARNING); + else + UNSET_FLAG (peer->af_flags[afi][safi], PEER_FLAG_MAX_PREFIX_WARNING); + + peer->pmax[afi][safi] = peer->group->conf->pmax[afi][safi]; + peer->pmax_threshold[afi][safi] = peer->group->conf->pmax_threshold[afi][safi]; + peer->pmax_restart[afi][safi] = peer->group->conf->pmax_restart[afi][safi]; + return 0; + } + + UNSET_FLAG (peer->af_flags[afi][safi], PEER_FLAG_MAX_PREFIX); + UNSET_FLAG (peer->af_flags[afi][safi], PEER_FLAG_MAX_PREFIX_WARNING); + peer->pmax[afi][safi] = 0; + peer->pmax_threshold[afi][safi] = 0; + peer->pmax_restart[afi][safi] = 0; + + if (! CHECK_FLAG (peer->sflags, PEER_STATUS_GROUP)) + return 0; + + group = peer->group; + for (ALL_LIST_ELEMENTS (group->peer, node, nnode, peer)) + { + if (! peer->af_group[afi][safi]) + continue; + + UNSET_FLAG (peer->af_flags[afi][safi], PEER_FLAG_MAX_PREFIX); + UNSET_FLAG (peer->af_flags[afi][safi], PEER_FLAG_MAX_PREFIX_WARNING); + peer->pmax[afi][safi] = 0; + peer->pmax_threshold[afi][safi] = 0; + peer->pmax_restart[afi][safi] = 0; + } + return 0; +} + +int +peer_clear (struct peer *peer) +{ + if (! CHECK_FLAG (peer->flags, PEER_FLAG_SHUTDOWN)) + { + if (CHECK_FLAG (peer->sflags, PEER_STATUS_PREFIX_OVERFLOW)) + { + UNSET_FLAG (peer->sflags, PEER_STATUS_PREFIX_OVERFLOW); + if (peer->t_pmax_restart) + { + BGP_TIMER_OFF (peer->t_pmax_restart); + if (BGP_DEBUG (events, EVENTS)) + zlog_debug ("%s Maximum-prefix restart timer cancelled", + peer->host); + } + + /* Beware we may still be clearing, if so the end of + * clearing will enable the peer */ + if (peer->state == bgp_peer_sIdle) + bgp_peer_enable(peer); + + return 0; + } + + peer->v_start = BGP_INIT_START_TIMER; + bgp_notify_send(peer, BGP_NOTIFY_CEASE, + BGP_NOTIFY_CEASE_ADMIN_RESET); + } + return 0; +} + +int +peer_clear_soft (struct peer *peer, afi_t afi, safi_t safi, + enum bgp_clear_type stype) +{ + if (peer->state == bgp_peer_sEstablished) + return 0; + + if (! peer->afc[afi][safi]) + return BGP_ERR_AF_UNCONFIGURED; + + if (stype == BGP_CLEAR_SOFT_RSCLIENT) + { + if (! CHECK_FLAG (peer->af_flags[afi][safi], PEER_FLAG_RSERVER_CLIENT)) + return 0; + bgp_check_local_routes_rsclient (peer, afi, safi); + bgp_soft_reconfig_rsclient (peer, afi, safi); + } + + if (stype == BGP_CLEAR_SOFT_OUT || stype == BGP_CLEAR_SOFT_BOTH) + bgp_announce_route (peer, afi, safi); + + if (stype == BGP_CLEAR_SOFT_IN_ORF_PREFIX) + { + if (CHECK_FLAG (peer->af_cap[afi][safi], PEER_CAP_ORF_PREFIX_SM_ADV) + && (CHECK_FLAG (peer->af_cap[afi][safi], PEER_CAP_ORF_PREFIX_RM_RCV) + || CHECK_FLAG (peer->af_cap[afi][safi], PEER_CAP_ORF_PREFIX_RM_OLD_RCV))) + { + struct bgp_filter *filter = &peer->filter[afi][safi]; + u_char prefix_type; + + if (CHECK_FLAG (peer->af_cap[afi][safi], PEER_CAP_ORF_PREFIX_RM_RCV)) + prefix_type = ORF_TYPE_PREFIX; + else + prefix_type = ORF_TYPE_PREFIX_OLD; + + if (filter->plist[FILTER_IN].ref) + { + if (CHECK_FLAG (peer->af_sflags[afi][safi], PEER_STATUS_ORF_PREFIX_SEND)) + bgp_route_refresh_send (peer, afi, safi, + prefix_type, REFRESH_DEFER, 1); + bgp_route_refresh_send (peer, afi, safi, prefix_type, + REFRESH_IMMEDIATE, 0); + } + else + { + if (CHECK_FLAG (peer->af_sflags[afi][safi], PEER_STATUS_ORF_PREFIX_SEND)) + bgp_route_refresh_send (peer, afi, safi, + prefix_type, REFRESH_IMMEDIATE, 1); + else + bgp_route_refresh_send (peer, afi, safi, 0, 0, 0); + } + return 0; + } + } + + if (stype == BGP_CLEAR_SOFT_IN || stype == BGP_CLEAR_SOFT_BOTH + || stype == BGP_CLEAR_SOFT_IN_ORF_PREFIX) + { + /* If neighbor has soft reconfiguration inbound flag. + Use Adj-RIB-In database. */ + if (CHECK_FLAG (peer->af_flags[afi][safi], PEER_FLAG_SOFT_RECONFIG)) + bgp_soft_reconfig_in (peer, afi, safi); + else + { + /* If neighbor has route refresh capability, send route refresh + message to the peer. */ + if (CHECK_FLAG (peer->cap, PEER_CAP_REFRESH_OLD_RCV) + || CHECK_FLAG (peer->cap, PEER_CAP_REFRESH_NEW_RCV)) + bgp_route_refresh_send (peer, afi, safi, 0, 0, 0); + else + return BGP_ERR_SOFT_RECONFIG_UNCONFIGURED; + } + } + return 0; +} + +/* Display peer uptime.*/ +/* XXX: why does this function return char * when it takes buffer? */ +char * +peer_uptime (time_t uptime2, char *buf, size_t len) +{ + time_t uptime1; + struct tm *tm; + + /* Check buffer length. */ + if (len < BGP_UPTIME_LEN) + { + zlog_warn ("peer_uptime (): buffer shortage %lu", (u_long)len); + /* XXX: should return status instead of buf... */ + snprintf (buf, len, "<error> "); + return buf; + } + + /* If there is no connection has been done before print `never'. */ + if (uptime2 == 0) + { + snprintf (buf, len, "never "); + return buf; + } + + /* Get current time. */ + uptime1 = time (NULL); + uptime1 -= uptime2; + tm = gmtime (&uptime1); + + /* Making formatted timer strings. */ +#define ONE_DAY_SECOND 60*60*24 +#define ONE_WEEK_SECOND 60*60*24*7 + + if (uptime1 < ONE_DAY_SECOND) + snprintf (buf, len, "%02d:%02d:%02d", + tm->tm_hour, tm->tm_min, tm->tm_sec); + else if (uptime1 < ONE_WEEK_SECOND) + snprintf (buf, len, "%dd%02dh%02dm", + tm->tm_yday, tm->tm_hour, tm->tm_min); + else + snprintf (buf, len, "%02dw%dd%02dh", + tm->tm_yday/7, tm->tm_yday - ((tm->tm_yday/7) * 7), tm->tm_hour); + return buf; +} + +static void +bgp_config_write_filter (struct vty *vty, struct peer *peer, + afi_t afi, safi_t safi) +{ + struct bgp_filter *filter; + struct bgp_filter *gfilter = NULL; + char *addr; + int in = FILTER_IN; + int out = FILTER_OUT; + + addr = peer->host; + filter = &peer->filter[afi][safi]; + if (peer->af_group[afi][safi]) + gfilter = &peer->group->conf->filter[afi][safi]; + + /* distribute-list. */ + if (filter->dlist[in].name) + if (! gfilter || ! gfilter->dlist[in].name + || strcmp (filter->dlist[in].name, gfilter->dlist[in].name) != 0) + vty_out (vty, " neighbor %s distribute-list %s in%s", addr, + filter->dlist[in].name, VTY_NEWLINE); + if (filter->dlist[out].name && ! gfilter) + vty_out (vty, " neighbor %s distribute-list %s out%s", addr, + filter->dlist[out].name, VTY_NEWLINE); + + /* prefix-list. */ + if ( filter->plist[in].ref && (! gfilter + || (prefix_list_ref_ident(gfilter->plist[in].ref) + != prefix_list_ref_ident(filter->plist[in].ref))) ) + vty_out (vty, " neighbor %s prefix-list %s in%s", addr, + prefix_list_ref_name(filter->plist[in].ref), VTY_NEWLINE); + + if (filter->plist[out].ref && ! gfilter) + vty_out (vty, " neighbor %s prefix-list %s out%s", addr, + prefix_list_ref_name(filter->plist[out].ref), VTY_NEWLINE); + + /* route-map. */ + if (filter->map[RMAP_IN].name) + if (! gfilter || ! gfilter->map[RMAP_IN].name + || strcmp (filter->map[RMAP_IN].name, gfilter->map[RMAP_IN].name) != 0) + vty_out (vty, " neighbor %s route-map %s in%s", addr, + filter->map[RMAP_IN].name, VTY_NEWLINE); + if (filter->map[RMAP_OUT].name && ! gfilter) + vty_out (vty, " neighbor %s route-map %s out%s", addr, + filter->map[RMAP_OUT].name, VTY_NEWLINE); + if (filter->map[RMAP_IMPORT].name && ! gfilter) + vty_out (vty, " neighbor %s route-map %s import%s", addr, + filter->map[RMAP_IMPORT].name, VTY_NEWLINE); + if (filter->map[RMAP_EXPORT].name) + if (! gfilter || ! gfilter->map[RMAP_EXPORT].name + || strcmp (filter->map[RMAP_EXPORT].name, + gfilter->map[RMAP_EXPORT].name) != 0) + vty_out (vty, " neighbor %s route-map %s export%s", addr, + filter->map[RMAP_EXPORT].name, VTY_NEWLINE); + + /* unsuppress-map */ + if (filter->usmap.name && ! gfilter) + vty_out (vty, " neighbor %s unsuppress-map %s%s", addr, + filter->usmap.name, VTY_NEWLINE); + + /* filter-list. */ + if (filter->aslist[in].name) + if (! gfilter || ! gfilter->aslist[in].name + || strcmp (filter->aslist[in].name, gfilter->aslist[in].name) != 0) + vty_out (vty, " neighbor %s filter-list %s in%s", addr, + filter->aslist[in].name, VTY_NEWLINE); + if (filter->aslist[out].name && ! gfilter) + vty_out (vty, " neighbor %s filter-list %s out%s", addr, + filter->aslist[out].name, VTY_NEWLINE); +} + +/* BGP peer configuration display function. */ +static void +bgp_config_write_peer (struct vty *vty, struct bgp *bgp, + struct peer *peer, afi_t afi, safi_t safi) +{ + struct bgp_filter *filter; + struct peer *g_peer = NULL; + char buf[SU_ADDRSTRLEN]; + char *addr; + + filter = &peer->filter[afi][safi]; + addr = peer->host; + if (peer_group_active (peer)) + g_peer = peer->group->conf; + + /************************************ + ****** Global to the neighbor ****** + ************************************/ + if (afi == AFI_IP && safi == SAFI_UNICAST) + { + /* remote-as. */ + if (! peer_group_active (peer)) + { + if (CHECK_FLAG (peer->sflags, PEER_STATUS_GROUP)) + vty_out (vty, " neighbor %s peer-group%s", addr, + VTY_NEWLINE); + if (peer->as) + vty_out (vty, " neighbor %s remote-as %u%s", addr, peer->as, + VTY_NEWLINE); + } + else + { + if (! g_peer->as) + vty_out (vty, " neighbor %s remote-as %u%s", addr, peer->as, + VTY_NEWLINE); + if (peer->af_group[AFI_IP][SAFI_UNICAST]) + vty_out (vty, " neighbor %s peer-group %s%s", addr, + peer->group->name, VTY_NEWLINE); + } + + /* local-as. */ + if (peer->change_local_as) + if (! peer_group_active (peer)) + vty_out (vty, " neighbor %s local-as %u%s%s", addr, + peer->change_local_as, + CHECK_FLAG (peer->flags, PEER_FLAG_LOCAL_AS_NO_PREPEND) ? + " no-prepend" : "", VTY_NEWLINE); + + /* Description. */ + if (peer->desc) + vty_out (vty, " neighbor %s description %s%s", addr, peer->desc, + VTY_NEWLINE); + + /* Shutdown. */ + if (CHECK_FLAG (peer->flags, PEER_FLAG_SHUTDOWN)) + if (! peer_group_active (peer) || + ! CHECK_FLAG (g_peer->flags, PEER_FLAG_SHUTDOWN)) + vty_out (vty, " neighbor %s shutdown%s", addr, VTY_NEWLINE); + + /* Password. */ + if (peer->password) + if (!peer_group_active (peer) + || ! g_peer->password + || strcmp (peer->password, g_peer->password) != 0) + vty_out (vty, " neighbor %s password %s%s", addr, peer->password, + VTY_NEWLINE); + + /* BGP port. */ + if (peer->port != BGP_PORT_DEFAULT) + vty_out (vty, " neighbor %s port %d%s", addr, peer->port, + VTY_NEWLINE); + + /* Local interface name. */ + if (peer->ifname) + vty_out (vty, " neighbor %s interface %s%s", addr, peer->ifname, + VTY_NEWLINE); + + /* Passive. */ + if (CHECK_FLAG (peer->flags, PEER_FLAG_PASSIVE)) + if (! peer_group_active (peer) || + ! CHECK_FLAG (g_peer->flags, PEER_FLAG_PASSIVE)) + vty_out (vty, " neighbor %s passive%s", addr, VTY_NEWLINE); + + /* EBGP multihop. */ + if (peer_sort (peer) != BGP_PEER_IBGP && peer->ttl != 1) + if (! peer_group_active (peer) || + g_peer->ttl != peer->ttl) + vty_out (vty, " neighbor %s ebgp-multihop %d%s", addr, peer->ttl, + VTY_NEWLINE); + + /* disable-connected-check. */ + if (CHECK_FLAG (peer->flags, PEER_FLAG_DISABLE_CONNECTED_CHECK)) + if (! peer_group_active (peer) || + ! CHECK_FLAG (g_peer->flags, PEER_FLAG_DISABLE_CONNECTED_CHECK)) + vty_out (vty, " neighbor %s disable-connected-check%s", addr, VTY_NEWLINE); + + /* Update-source. */ + if (peer->update_if) + if (! peer_group_active (peer) || ! g_peer->update_if + || strcmp (g_peer->update_if, peer->update_if) != 0) + vty_out (vty, " neighbor %s update-source %s%s", addr, + peer->update_if, VTY_NEWLINE); + if (peer->update_source) + if (! peer_group_active (peer) || ! g_peer->update_source + || sockunion_cmp (g_peer->update_source, + peer->update_source) != 0) + vty_out (vty, " neighbor %s update-source %s%s", addr, + sockunion2str (peer->update_source, buf, SU_ADDRSTRLEN), + VTY_NEWLINE); + + /* advertisement-interval */ + if (CHECK_FLAG (peer->config, PEER_CONFIG_ROUTEADV)) + vty_out (vty, " neighbor %s advertisement-interval %d%s", + addr, peer->v_routeadv, VTY_NEWLINE); + + /* timers. */ + if (CHECK_FLAG (peer->config, PEER_CONFIG_TIMER) + && ! peer_group_active (peer)) + vty_out (vty, " neighbor %s timers %d %d%s", addr, + peer->keepalive, peer->holdtime, VTY_NEWLINE); + + if (CHECK_FLAG (peer->config, PEER_CONFIG_CONNECT)) + vty_out (vty, " neighbor %s timers connect %d%s", addr, + peer->connect, VTY_NEWLINE); + + /* Default weight. */ + if (CHECK_FLAG (peer->config, PEER_CONFIG_WEIGHT)) + if (! peer_group_active (peer) || + g_peer->weight != peer->weight) + vty_out (vty, " neighbor %s weight %d%s", addr, peer->weight, + VTY_NEWLINE); + + /* Dynamic capability. */ + if (CHECK_FLAG (peer->flags, PEER_FLAG_DYNAMIC_CAPABILITY)) + if (! peer_group_active (peer) || + ! CHECK_FLAG (g_peer->flags, PEER_FLAG_DYNAMIC_CAPABILITY)) + vty_out (vty, " neighbor %s capability dynamic%s", addr, + VTY_NEWLINE); + + /* dont capability negotiation. */ + if (CHECK_FLAG (peer->flags, PEER_FLAG_DONT_CAPABILITY)) + if (! peer_group_active (peer) || + ! CHECK_FLAG (g_peer->flags, PEER_FLAG_DONT_CAPABILITY)) + vty_out (vty, " neighbor %s dont-capability-negotiate%s", addr, + VTY_NEWLINE); + + /* override capability negotiation. */ + if (CHECK_FLAG (peer->flags, PEER_FLAG_OVERRIDE_CAPABILITY)) + if (! peer_group_active (peer) || + ! CHECK_FLAG (g_peer->flags, PEER_FLAG_OVERRIDE_CAPABILITY)) + vty_out (vty, " neighbor %s override-capability%s", addr, + VTY_NEWLINE); + + /* strict capability negotiation. */ + if (CHECK_FLAG (peer->flags, PEER_FLAG_STRICT_CAP_MATCH)) + if (! peer_group_active (peer) || + ! CHECK_FLAG (g_peer->flags, PEER_FLAG_STRICT_CAP_MATCH)) + vty_out (vty, " neighbor %s strict-capability-match%s", addr, + VTY_NEWLINE); + + if (! peer_group_active (peer)) + { + if (bgp_flag_check (bgp, BGP_FLAG_NO_DEFAULT_IPV4)) + { + if (peer->afc[AFI_IP][SAFI_UNICAST]) + vty_out (vty, " neighbor %s activate%s", addr, VTY_NEWLINE); + } + else + { + if (! peer->afc[AFI_IP][SAFI_UNICAST]) + vty_out (vty, " no neighbor %s activate%s", addr, VTY_NEWLINE); + } + } + } + + + /************************************ + ****** Per AF to the neighbor ****** + ************************************/ + + if (! (afi == AFI_IP && safi == SAFI_UNICAST)) + { + if (peer->af_group[afi][safi]) + vty_out (vty, " neighbor %s peer-group %s%s", addr, + peer->group->name, VTY_NEWLINE); + else + vty_out (vty, " neighbor %s activate%s", addr, VTY_NEWLINE); + } + + /* ORF capability. */ + if (CHECK_FLAG (peer->af_flags[afi][safi], PEER_FLAG_ORF_PREFIX_SM) + || CHECK_FLAG (peer->af_flags[afi][safi], PEER_FLAG_ORF_PREFIX_RM)) + if (! peer->af_group[afi][safi]) + { + vty_out (vty, " neighbor %s capability orf prefix-list", addr); + + if (CHECK_FLAG (peer->af_flags[afi][safi], PEER_FLAG_ORF_PREFIX_SM) + && CHECK_FLAG (peer->af_flags[afi][safi], PEER_FLAG_ORF_PREFIX_RM)) + vty_out (vty, " both"); + else if (CHECK_FLAG (peer->af_flags[afi][safi], PEER_FLAG_ORF_PREFIX_SM)) + vty_out (vty, " send"); + else + vty_out (vty, " receive"); + vty_out (vty, "%s", VTY_NEWLINE); + } + + /* Route reflector client. */ + if (peer_af_flag_check (peer, afi, safi, PEER_FLAG_REFLECTOR_CLIENT) + && ! peer->af_group[afi][safi]) + vty_out (vty, " neighbor %s route-reflector-client%s", addr, + VTY_NEWLINE); + + /* Nexthop self. */ + if (peer_af_flag_check (peer, afi, safi, PEER_FLAG_NEXTHOP_SELF) + && ! peer->af_group[afi][safi]) + vty_out (vty, " neighbor %s next-hop-self%s", addr, VTY_NEWLINE); + + /* Remove private AS. */ + if (peer_af_flag_check (peer, afi, safi, PEER_FLAG_REMOVE_PRIVATE_AS) + && ! peer->af_group[afi][safi]) + vty_out (vty, " neighbor %s remove-private-AS%s", + addr, VTY_NEWLINE); + + /* send-community print. */ + if (! peer->af_group[afi][safi]) + { + if (bgp_option_check (BGP_OPT_CONFIG_CISCO)) + { + if (peer_af_flag_check (peer, afi, safi, PEER_FLAG_SEND_COMMUNITY) + && peer_af_flag_check (peer, afi, safi, PEER_FLAG_SEND_EXT_COMMUNITY)) + vty_out (vty, " neighbor %s send-community both%s", addr, VTY_NEWLINE); + else if (peer_af_flag_check (peer, afi, safi, PEER_FLAG_SEND_EXT_COMMUNITY)) + vty_out (vty, " neighbor %s send-community extended%s", + addr, VTY_NEWLINE); + else if (peer_af_flag_check (peer, afi, safi, PEER_FLAG_SEND_COMMUNITY)) + vty_out (vty, " neighbor %s send-community%s", addr, VTY_NEWLINE); + } + else + { + if (! peer_af_flag_check (peer, afi, safi, PEER_FLAG_SEND_COMMUNITY) + && ! peer_af_flag_check (peer, afi, safi, PEER_FLAG_SEND_EXT_COMMUNITY)) + vty_out (vty, " no neighbor %s send-community both%s", + addr, VTY_NEWLINE); + else if (! peer_af_flag_check (peer, afi, safi, PEER_FLAG_SEND_EXT_COMMUNITY)) + vty_out (vty, " no neighbor %s send-community extended%s", + addr, VTY_NEWLINE); + else if (! peer_af_flag_check (peer, afi, safi, PEER_FLAG_SEND_COMMUNITY)) + vty_out (vty, " no neighbor %s send-community%s", + addr, VTY_NEWLINE); + } + } + + /* Default information */ + if (peer_af_flag_check (peer, afi, safi, PEER_FLAG_DEFAULT_ORIGINATE) + && ! peer->af_group[afi][safi]) + { + vty_out (vty, " neighbor %s default-originate", addr); + if (peer->default_rmap[afi][safi].name) + vty_out (vty, " route-map %s", peer->default_rmap[afi][safi].name); + vty_out (vty, "%s", VTY_NEWLINE); + } + + /* Soft reconfiguration inbound. */ + if (CHECK_FLAG (peer->af_flags[afi][safi], PEER_FLAG_SOFT_RECONFIG)) + if (! peer->af_group[afi][safi] || + ! CHECK_FLAG (g_peer->af_flags[afi][safi], PEER_FLAG_SOFT_RECONFIG)) + vty_out (vty, " neighbor %s soft-reconfiguration inbound%s", addr, + VTY_NEWLINE); + + /* maximum-prefix. */ + if (CHECK_FLAG (peer->af_flags[afi][safi], PEER_FLAG_MAX_PREFIX)) + if (! peer->af_group[afi][safi] + || g_peer->pmax[afi][safi] != peer->pmax[afi][safi] + || g_peer->pmax_threshold[afi][safi] != peer->pmax_threshold[afi][safi] + || CHECK_FLAG (g_peer->af_flags[afi][safi], PEER_FLAG_MAX_PREFIX_WARNING) + != CHECK_FLAG (peer->af_flags[afi][safi], PEER_FLAG_MAX_PREFIX_WARNING)) + { + vty_out (vty, " neighbor %s maximum-prefix %ld", addr, peer->pmax[afi][safi]); + if (peer->pmax_threshold[afi][safi] != MAXIMUM_PREFIX_THRESHOLD_DEFAULT) + vty_out (vty, " %d", peer->pmax_threshold[afi][safi]); + if (CHECK_FLAG (peer->af_flags[afi][safi], PEER_FLAG_MAX_PREFIX_WARNING)) + vty_out (vty, " warning-only"); + if (peer->pmax_restart[afi][safi]) + vty_out (vty, " restart %d", peer->pmax_restart[afi][safi]); + vty_out (vty, "%s", VTY_NEWLINE); + } + + /* Route server client. */ + if (CHECK_FLAG (peer->af_flags[afi][safi], PEER_FLAG_RSERVER_CLIENT) + && ! peer->af_group[afi][safi]) + vty_out (vty, " neighbor %s route-server-client%s", addr, VTY_NEWLINE); + + /* Allow AS in. */ + if (peer_af_flag_check (peer, afi, safi, PEER_FLAG_ALLOWAS_IN)) + if (! peer_group_active (peer) + || ! peer_af_flag_check (g_peer, afi, safi, PEER_FLAG_ALLOWAS_IN) + || peer->allowas_in[afi][safi] != g_peer->allowas_in[afi][safi]) + { + if (peer->allowas_in[afi][safi] == 3) + vty_out (vty, " neighbor %s allowas-in%s", addr, VTY_NEWLINE); + else + vty_out (vty, " neighbor %s allowas-in %d%s", addr, + peer->allowas_in[afi][safi], VTY_NEWLINE); + } + + /* Filter. */ + bgp_config_write_filter (vty, peer, afi, safi); + + /* atribute-unchanged. */ + if ((CHECK_FLAG (peer->af_flags[afi][safi], PEER_FLAG_AS_PATH_UNCHANGED) + || CHECK_FLAG (peer->af_flags[afi][safi], PEER_FLAG_NEXTHOP_UNCHANGED) + || CHECK_FLAG (peer->af_flags[afi][safi], PEER_FLAG_MED_UNCHANGED)) + && ! peer->af_group[afi][safi]) + { + if (CHECK_FLAG (peer->af_flags[afi][safi], PEER_FLAG_AS_PATH_UNCHANGED) + && CHECK_FLAG (peer->af_flags[afi][safi], PEER_FLAG_NEXTHOP_UNCHANGED) + && CHECK_FLAG (peer->af_flags[afi][safi], PEER_FLAG_MED_UNCHANGED)) + vty_out (vty, " neighbor %s attribute-unchanged%s", addr, VTY_NEWLINE); + else + vty_out (vty, " neighbor %s attribute-unchanged%s%s%s%s", addr, + (CHECK_FLAG (peer->af_flags[afi][safi], PEER_FLAG_AS_PATH_UNCHANGED)) ? + " as-path" : "", + (CHECK_FLAG (peer->af_flags[afi][safi], PEER_FLAG_NEXTHOP_UNCHANGED)) ? + " next-hop" : "", + (CHECK_FLAG (peer->af_flags[afi][safi], PEER_FLAG_MED_UNCHANGED)) ? + " med" : "", VTY_NEWLINE); + } +} + +/* Display "address-family" configuration header. */ +void +bgp_config_write_family_header (struct vty *vty, afi_t afi, safi_t safi, + int *write) +{ + if (*write) + return; + + if (afi == AFI_IP && safi == SAFI_UNICAST) + return; + + vty_out (vty, "!%s address-family ", VTY_NEWLINE); + + if (afi == AFI_IP) + { + if (safi == SAFI_MULTICAST) + vty_out (vty, "ipv4 multicast"); + else if (safi == SAFI_MPLS_VPN) + vty_out (vty, "vpnv4 unicast"); + } + else if (afi == AFI_IP6) + { + vty_out (vty, "ipv6"); + + if (safi == SAFI_MULTICAST) + vty_out (vty, " multicast"); + } + + vty_out (vty, "%s", VTY_NEWLINE); + + *write = 1; +} + +/* Address family based peer configuration display. */ +static int +bgp_config_write_family (struct vty *vty, struct bgp *bgp, afi_t afi, + safi_t safi) +{ + int write = 0; + struct peer *peer; + struct peer_group *group; + struct listnode *node, *nnode; + + bgp_config_write_network (vty, bgp, afi, safi, &write); + + bgp_config_write_redistribute (vty, bgp, afi, safi, &write); + + for (ALL_LIST_ELEMENTS (bgp->group, node, nnode, group)) + { + if (group->conf->afc[afi][safi]) + { + bgp_config_write_family_header (vty, afi, safi, &write); + bgp_config_write_peer (vty, bgp, group->conf, afi, safi); + } + } + for (ALL_LIST_ELEMENTS (bgp->peer, node, nnode, peer)) + { + if (peer->afc[afi][safi]) + { + if (! CHECK_FLAG (peer->sflags, PEER_STATUS_ACCEPT_PEER)) + { + bgp_config_write_family_header (vty, afi, safi, &write); + bgp_config_write_peer (vty, bgp, peer, afi, safi); + } + } + } + if (write) + vty_out (vty, " exit-address-family%s", VTY_NEWLINE); + + return write; +} + +int +bgp_config_write (struct vty *vty) +{ + int write = 0; + struct bgp *bgp; + struct peer_group *group; + struct peer *peer; + struct listnode *node, *nnode; + struct listnode *mnode, *mnnode; + + /* BGP Multiple instance. */ + if (bgp_option_check (BGP_OPT_MULTIPLE_INSTANCE)) + { + vty_out (vty, "bgp multiple-instance%s", VTY_NEWLINE); + write++; + } + + /* BGP Config type. */ + if (bgp_option_check (BGP_OPT_CONFIG_CISCO)) + { + vty_out (vty, "bgp config-type cisco%s", VTY_NEWLINE); + write++; + } + + /* BGP configuration. */ + for (ALL_LIST_ELEMENTS (bm->bgp, mnode, mnnode, bgp)) + { + if (write) + vty_out (vty, "!%s", VTY_NEWLINE); + + /* Router bgp ASN */ + vty_out (vty, "router bgp %u", bgp->as); + + if (bgp_option_check (BGP_OPT_MULTIPLE_INSTANCE)) + { + if (bgp->name) + vty_out (vty, " view %s", bgp->name); + } + vty_out (vty, "%s", VTY_NEWLINE); + + /* No Synchronization */ + if (bgp_option_check (BGP_OPT_CONFIG_CISCO)) + vty_out (vty, " no synchronization%s", VTY_NEWLINE); + + /* BGP fast-external-failover. */ + if (CHECK_FLAG (bgp->flags, BGP_FLAG_NO_FAST_EXT_FAILOVER)) + vty_out (vty, " no bgp fast-external-failover%s", VTY_NEWLINE); + + /* BGP router ID. */ + if (CHECK_FLAG (bgp->config, BGP_CONFIG_ROUTER_ID)) + vty_out (vty, " bgp router-id %s%s", safe_inet_ntoa (bgp->router_id), + VTY_NEWLINE); + + /* BGP log-neighbor-changes. */ + if (bgp_flag_check (bgp, BGP_FLAG_LOG_NEIGHBOR_CHANGES)) + vty_out (vty, " bgp log-neighbor-changes%s", VTY_NEWLINE); + + /* BGP configuration. */ + if (bgp_flag_check (bgp, BGP_FLAG_ALWAYS_COMPARE_MED)) + vty_out (vty, " bgp always-compare-med%s", VTY_NEWLINE); + + /* BGP default ipv4-unicast. */ + if (bgp_flag_check (bgp, BGP_FLAG_NO_DEFAULT_IPV4)) + vty_out (vty, " no bgp default ipv4-unicast%s", VTY_NEWLINE); + + /* BGP default local-preference. */ + if (bgp->default_local_pref != BGP_DEFAULT_LOCAL_PREF) + vty_out (vty, " bgp default local-preference %d%s", + bgp->default_local_pref, VTY_NEWLINE); + + /* BGP client-to-client reflection. */ + if (bgp_flag_check (bgp, BGP_FLAG_NO_CLIENT_TO_CLIENT)) + vty_out (vty, " no bgp client-to-client reflection%s", VTY_NEWLINE); + + /* BGP cluster ID. */ + if (CHECK_FLAG (bgp->config, BGP_CONFIG_CLUSTER_ID)) + vty_out (vty, " bgp cluster-id %s%s", safe_inet_ntoa (bgp->cluster_id), + VTY_NEWLINE); + + /* Confederation identifier*/ + if (CHECK_FLAG (bgp->config, BGP_CONFIG_CONFEDERATION)) + vty_out (vty, " bgp confederation identifier %i%s", bgp->confed_id, + VTY_NEWLINE); + + /* Confederation peer */ + if (bgp->confed_peers_cnt > 0) + { + int i; + + vty_out (vty, " bgp confederation peers"); + + for (i = 0; i < bgp->confed_peers_cnt; i++) + vty_out(vty, " %u", bgp->confed_peers[i]); + + vty_out (vty, "%s", VTY_NEWLINE); + } + + /* BGP enforce-first-as. */ + if (bgp_flag_check (bgp, BGP_FLAG_ENFORCE_FIRST_AS)) + vty_out (vty, " bgp enforce-first-as%s", VTY_NEWLINE); + + /* BGP deterministic-med. */ + if (bgp_flag_check (bgp, BGP_FLAG_DETERMINISTIC_MED)) + vty_out (vty, " bgp deterministic-med%s", VTY_NEWLINE); + + /* BGP graceful-restart. */ + if (bgp->stalepath_time != BGP_DEFAULT_STALEPATH_TIME) + vty_out (vty, " bgp graceful-restart stalepath-time %d%s", + bgp->stalepath_time, VTY_NEWLINE); + if (bgp_flag_check (bgp, BGP_FLAG_GRACEFUL_RESTART)) + vty_out (vty, " bgp graceful-restart%s", VTY_NEWLINE); + + /* BGP bestpath method. */ + if (bgp_flag_check (bgp, BGP_FLAG_ASPATH_IGNORE)) + vty_out (vty, " bgp bestpath as-path ignore%s", VTY_NEWLINE); + if (bgp_flag_check (bgp, BGP_FLAG_ASPATH_CONFED)) + vty_out (vty, " bgp bestpath as-path confed%s", VTY_NEWLINE); + if (bgp_flag_check (bgp, BGP_FLAG_COMPARE_ROUTER_ID)) + vty_out (vty, " bgp bestpath compare-routerid%s", VTY_NEWLINE); + if (bgp_flag_check (bgp, BGP_FLAG_MED_CONFED) + || bgp_flag_check (bgp, BGP_FLAG_MED_MISSING_AS_WORST)) + { + vty_out (vty, " bgp bestpath med"); + if (bgp_flag_check (bgp, BGP_FLAG_MED_CONFED)) + vty_out (vty, " confed"); + if (bgp_flag_check (bgp, BGP_FLAG_MED_MISSING_AS_WORST)) + vty_out (vty, " missing-as-worst"); + vty_out (vty, "%s", VTY_NEWLINE); + } + + /* BGP network import check. */ + if (bgp_flag_check (bgp, BGP_FLAG_IMPORT_CHECK)) + vty_out (vty, " bgp network import-check%s", VTY_NEWLINE); + + /* BGP scan interval. */ + bgp_config_write_scan_time (vty); + + /* BGP flag dampening. */ + if (CHECK_FLAG (bgp->af_flags[AFI_IP][SAFI_UNICAST], + BGP_CONFIG_DAMPENING)) + bgp_config_write_damp (vty); + + /* BGP static route configuration. */ + bgp_config_write_network (vty, bgp, AFI_IP, SAFI_UNICAST, &write); + + /* BGP redistribute configuration. */ + bgp_config_write_redistribute (vty, bgp, AFI_IP, SAFI_UNICAST, &write); + + /* BGP timers configuration. */ + if (bgp->default_keepalive != BGP_DEFAULT_KEEPALIVE + && bgp->default_holdtime != BGP_DEFAULT_HOLDTIME) + vty_out (vty, " timers bgp %d %d%s", bgp->default_keepalive, + bgp->default_holdtime, VTY_NEWLINE); + + /* peer-group */ + for (ALL_LIST_ELEMENTS (bgp->group, node, nnode, group)) + { + bgp_config_write_peer (vty, bgp, group->conf, AFI_IP, SAFI_UNICAST); + } + + /* Normal neighbor configuration. */ + for (ALL_LIST_ELEMENTS (bgp->peer, node, nnode, peer)) + { + if (! CHECK_FLAG (peer->sflags, PEER_STATUS_ACCEPT_PEER)) + bgp_config_write_peer (vty, bgp, peer, AFI_IP, SAFI_UNICAST); + } + + /* Distance configuration. */ + bgp_config_write_distance (vty, bgp); + + /* No auto-summary */ + if (bgp_option_check (BGP_OPT_CONFIG_CISCO)) + vty_out (vty, " no auto-summary%s", VTY_NEWLINE); + + /* IPv4 multicast configuration. */ + write += bgp_config_write_family (vty, bgp, AFI_IP, SAFI_MULTICAST); + + /* IPv4 VPN configuration. */ + write += bgp_config_write_family (vty, bgp, AFI_IP, SAFI_MPLS_VPN); + + /* IPv6 unicast configuration. */ + write += bgp_config_write_family (vty, bgp, AFI_IP6, SAFI_UNICAST); + + /* IPv6 multicast configuration. */ + write += bgp_config_write_family (vty, bgp, AFI_IP6, SAFI_MULTICAST); + + write++; + } + return write; +} + +void +bgp_master_init (void) +{ + memset (&bgp_master, 0, sizeof (struct bgp_master)); + + bm = &bgp_master; + bm->bgp = list_new (); + bm->listen_sockets = list_new (); + bm->port = BGP_PORT_DEFAULT; + bm->master = thread_master_create (); + bm->start_time = time (NULL); +} + + +void +bgp_init (void) +{ + /* peer index */ + bgp_peer_index_init(NULL); + + /* BGP VTY commands installation. */ + bgp_vty_init (); + + /* Init zebra. */ + bgp_zebra_init (); + + /* BGP inits. */ + bgp_attr_init (); + bgp_debug_init (); + bgp_dump_init (); + bgp_route_init (); + bgp_route_map_init (); + bgp_scan_init (); + bgp_mplsvpn_init (); + + /* Access list initialize. */ + access_list_init (); + access_list_add_hook (peer_distribute_update); + access_list_delete_hook (peer_distribute_update); + + /* Filter list initialize. */ + bgp_filter_init (); + as_list_add_hook (peer_aslist_update); + as_list_delete_hook (peer_aslist_update); + + /* Prefix list initialize.*/ + prefix_list_init (); + prefix_list_add_hook (peer_prefix_list_update); + prefix_list_delete_hook (peer_prefix_list_update); + + /* Community list initialize. */ + bgp_clist = community_list_init (); + +#ifdef HAVE_SNMP + bgp_snmp_init (); +#endif /* HAVE_SNMP */ +} + +void +bgp_terminate (int terminating, int retain_mode) +{ + struct bgp *bgp; + struct peer *peer; + struct listnode *node, *nnode; + struct listnode *mnode, *mnnode; + + program_terminating = terminating; + + /* Disable all peers */ + for (ALL_LIST_ELEMENTS (bm->bgp, mnode, mnnode, bgp)) + for (ALL_LIST_ELEMENTS (bgp->peer, node, nnode, peer)) + { +fprintf(stderr, ">>> %s:", peer->host) ; + if (retain_mode) + bgp_peer_disable(peer, NULL); + else if (terminating) + peer_flag_set(peer, PEER_FLAG_SHUTDOWN); + else + bgp_notify_send(peer, BGP_NOTIFY_CEASE, + BGP_NOTIFY_CEASE_ADMIN_RESET); +fprintf(stderr, "<<<\n") ; + } + + if (!retain_mode) + { + bgp_cleanup_routes (); + + if (bm->process_main_queue) + { + work_queue_free (bm->process_main_queue); + bm->process_main_queue = NULL; + } + if (bm->process_rsclient_queue) + { + work_queue_free (bm->process_rsclient_queue); + bm->process_rsclient_queue = NULL; + } + } + + /* if no sessions were enabled then need to check here */ + program_terminate_if_all_disabled(); +} + +/* If we are terminating the program, and all sessions are disabled + * then terminate all threads + */ +void +program_terminate_if_all_disabled(void) +{ + struct bgp *bgp; + struct peer *peer; + struct listnode *node, *nnode; + struct listnode *mnode, *mnnode; + + if (!program_terminating) + return; + + /* are there any active sessions remaining? */ + for (ALL_LIST_ELEMENTS (bm->bgp, mnode, mnnode, bgp)) + for (ALL_LIST_ELEMENTS (bgp->peer, node, nnode, peer)) + if (bgp_session_is_active(peer->session)) + return; + + /* ask remaining pthreads to die */ + if (qpthreads_enabled && routing_nexus != NULL) + qpn_terminate(routing_nexus); + + if (qpthreads_enabled && bgp_nexus != NULL) + qpn_terminate(bgp_nexus); + + if (cli_nexus != NULL) + qpn_terminate(cli_nexus); +} + diff --git a/lib/mqueue.h b/lib/mqueue.h index d8790246..355aec23 100644 --- a/lib/mqueue.h +++ b/lib/mqueue.h @@ -93,16 +93,16 @@ typedef void mqueue_action(mqueue_block mqb, mqb_flag_t flag) ; enum { mqb_args_size_max = 64 } ; /* maximum size of struct args */ enum { mqb_argv_size_unit = 16 } ; /* allocate argv in these units */ -struct args +struct mqb_args { - char data[mqb_args_size_max] ; /* empty space */ + char bytes[mqb_args_size_max] ; /* empty space */ } ; #define MQB_ARGS_SIZE_OK(s) CONFIRM(sizeof(struct s) <= mqb_args_size_max) struct mqueue_block { - struct args args ; /* user structure */ + struct mqb_args args ; /* user structure */ mqueue_block next ; /* single linked list */ @@ -116,8 +116,12 @@ struct mqueue_block mqb_index_t argv_next ; /* iterator */ } ; -/* mqueue_block structures are malloced. That guarantees maximum alignment. */ -/* To guarantee maximum alignment for "struct args", it must be first item ! */ +/* mqueue_block structures are malloced. That guarantees maximum alignment. + * To guarantee maximum alignment for "struct args", it must be first item ! + * + * (The typedef is required to stop Eclipse (3.4.2 with CDT 5.0) whining + * about first argument of offsetof().) + */ typedef struct mqueue_block mqueue_block_t ; CONFIRM(offsetof(mqueue_block_t, args) == 0) ; diff --git a/lib/qpnexus.c b/lib/qpnexus.c index 8a78a70b..cb0bd12c 100644 --- a/lib/qpnexus.c +++ b/lib/qpnexus.c @@ -36,7 +36,13 @@ static void qpn_in_thread_init(qpn_nexus qpn); */ -/* Initialise a nexus -- allocating it if required. +/*============================================================================== + * Initialisation, add hook, free etc. + * + */ + +/*------------------------------------------------------------------------------ + * Initialise a nexus -- allocating it if required. * * If main_thread is set then no new thread will be created * when qpn_exec() is called, instead the finite state machine will be @@ -45,7 +51,7 @@ static void qpn_in_thread_init(qpn_nexus qpn); * * Returns the qpn_nexus. */ -qpn_nexus +extern qpn_nexus qpn_init_new(qpn_nexus qpn, int main_thread) { if (qpn == NULL) @@ -53,16 +59,27 @@ qpn_init_new(qpn_nexus qpn, int main_thread) else memset(qpn, 0, sizeof(struct qpn_nexus)) ; - qpn->selection = qps_selection_init_new(qpn->selection); - qpn->pile = qtimer_pile_init_new(qpn->pile); - qpn->queue = mqueue_init_new(qpn->queue, mqt_signal_unicast); + qpn->selection = qps_selection_init_new(qpn->selection); + qpn->pile = qtimer_pile_init_new(qpn->pile); + qpn->queue = mqueue_init_new(qpn->queue, mqt_signal_unicast); qpn->main_thread = main_thread; - qpn->start = qpn_start; + qpn->start = qpn_start; return qpn; } -/* free timers, selection, message queue and nexus +/*------------------------------------------------------------------------------ + * Add a hook function to the given nexus. + */ +extern void +qpn_add_hook_function(qpn_hook_list list, void* hook) +{ + passert(list->count < qpn_hooks_max) ; + list->hooks[list->count++] = hook ; +} ; + +/*------------------------------------------------------------------------------ + * free timers, selection, message queue and nexus * return NULL */ qpn_nexus @@ -99,24 +116,25 @@ qpn_free(qpn_nexus qpn) return NULL; } -/* If not main thread create new qpthread. - * Execute the state machine */ -void +/*============================================================================== + * Execution of a nexus + */ + +/*------------------------------------------------------------------------------ + * If not main qpthread create new qpthread. + * + * For all qpthreads: start the thread ! + */ +extern void qpn_exec(qpn_nexus qpn) { if (qpn->main_thread) - { - /* Run the state machine in calling thread */ - qpn->start(qpn); - } + qpn->start(qpn); else - { - /* create a qpthread and run the state machine in it */ - qpt_thread_create(qpn->start, qpn, NULL) ; - } -} + qpt_thread_create(qpn->start, qpn, NULL) ; +} ; -/*============================================================================== +/*------------------------------------------------------------------------------ * Pthread routine * * Processes: @@ -145,78 +163,86 @@ qpn_start(void* arg) qpn_nexus qpn = arg; mqueue_block mqb; int actions; - qtime_mono_t now; - qtime_mono_t max_wait; - int i; + qtime_mono_t now ; + qtime_t max_wait ; + unsigned i; + unsigned done ; + unsigned wait ; - /* now in our thread, complete initialisation */ + /* now in our thread, complete initialisation */ qpn_in_thread_init(qpn); + /* Until required to terminate, loop */ + done = 1 ; while (!qpn->terminate) { - now = qt_get_monotonic(); + wait = (done == 0) ; /* may wait this time only if nothing + found to do on the last pass */ - /* Signals are highest priority. - * only execute on the main thread */ + /* Signals are highest priority -- only execute for main thread + * + * Restarts "done" for this pass. + */ if (qpn->main_thread) - quagga_sigevent_process (); + done = quagga_sigevent_process() ; + else + done = 0 ; - /* max time to wait in pselect */ - max_wait = QTIME(MAX_PSELECT_TIMOUT); - - /* event hooks, if any. High priority */ - for (i = 0; i < NUM_EVENT_HOOK; ++i) - { - if (qpn->event_hook[i] != NULL) - { - /* first, second and third priority */ - qtime_mono_t event_wait = qpn->event_hook[i](qpn_pri_third); - if (event_wait > 0 && event_wait < max_wait) - max_wait = event_wait; - } - } + /* Foreground hooks, if any. */ + for (i = 0; i < qpn->foreground.count ; ++i) + done |= ((qpn_hook_function*)(qpn->foreground.hooks[i]))() ; /* drain the message queue, will be in waiting for signal state * when it's empty */ - for (;;) + + if (done != 0) + wait = 0 ; /* turn off wait if found something */ + + while (1) { - mqb = mqueue_dequeue(qpn->queue, 1, qpn->mts) ; + mqb = mqueue_dequeue(qpn->queue, wait, qpn->mts) ; if (mqb == NULL) break; mqb_dispatch(mqb, mqb_action); - } - /* Event hooks, if any. All priorities */ - for (i = 0; i < NUM_EVENT_HOOK; ++i) - { - if (qpn->event_hook[i] != NULL) - { - /* first, second third and fourth priority */ - qtime_mono_t event_wait = qpn->event_hook[i](qpn_pri_fourth); - if (event_wait > 0 && event_wait < max_wait) - max_wait = event_wait; - } - } - - /* block for some input, output, signal or timeout */ - actions = qps_pselect(qpn->selection, - qtimer_pile_top_time(qpn->pile, now + max_wait)); - - /* process I/O actions */ - while (actions) - actions = qps_dispatch_next(qpn->selection) ; + done = 1 ; /* done something */ + wait = 0 ; /* turn off wait */ + } ; - mqueue_done_waiting(qpn->queue, qpn->mts); + /* block for some input, output, signal or timeout + * + * wait will be true iff did nothing the last time round the loop, and + * not found anything to be done up to this point either. + */ + if (wait) + max_wait = qtimer_pile_top_wait(qpn->pile, QTIME(MAX_PSELECT_WAIT)) ; + else + max_wait = 0 ; + + actions = qps_pselect(qpn->selection, max_wait) ; + done |= actions ; + + if (wait) + mqueue_done_waiting(qpn->queue, qpn->mts); + + /* process I/O actions */ + while (actions) + actions = qps_dispatch_next(qpn->selection) ; - /* process timers */ + /* process timers */ + now = qt_get_monotonic() ; while (qtimer_pile_dispatch_next(qpn->pile, now)) - { - } - } + done = 1 ; + + /* If nothing done in this pass, see if anything in the background */ + if (done == 0) + for (i = 0; i < qpn->background.count ; ++i) + done |= ((qpn_hook_function*)(qpn->background.hooks[i]))() ; + } ; /* last bit of code to run in this thread */ - if (qpn->in_thread_final) + if (qpn->in_thread_final != NULL) qpn->in_thread_final(); return NULL; diff --git a/lib/qpnexus.h b/lib/qpnexus.h index a6cad148..d5b7c5a6 100644 --- a/lib/qpnexus.h +++ b/lib/qpnexus.h @@ -48,31 +48,27 @@ */ /* maximum time in seconds to sit in a pselect */ -#define MAX_PSELECT_TIMOUT 10 +#define MAX_PSELECT_WAIT 10 /* signal for message queues */ #define SIGMQUEUE SIGUSR2 /* number of event hooks */ -#define NUM_EVENT_HOOK 2 - -/* Work priorities */ -enum qpn_priority -{ - qpn_pri_highest = 1, - - qpn_pri_first = 1, - qpn_pri_second = 2, - qpn_pri_third = 3, - qpn_pri_fourth = 4, - - qpn_pri_lowest = 4, -}; +enum { qpn_hooks_max = 4 } ; /*============================================================================== * Data Structures. */ +typedef int qpn_hook_function(void) ; + +typedef struct qpn_hook_list* qpn_hook_list ; +struct qpn_hook_list +{ + void* hooks[qpn_hooks_max] ; + unsigned count ; +} ; + typedef struct qpn_nexus* qpn_nexus ; struct qpn_nexus @@ -99,30 +95,45 @@ struct qpn_nexus /* qpthread routine, can override */ void* (*start)(void*); - /* in-thread initialize, can override. Called within the thread - * after all other initializion just before thread loop */ + /* in-thread initialise, can override. Called within the thread + * after all other initialisation just before thread loop */ void (*in_thread_init)(void); - /* in-thread finalize, can override. Called within thread + /* in-thread finalise, can override. Called within thread * just before thread dies. Nexus components all exist but * thread loop is no longer executed */ void (*in_thread_final)(void); - /* thread loop events, can override. Called before and after message queue, - * and before I/O and timers. - * Hook should perform all work <= given priority. - * Returns the time to try again, 0 means default to maximum. + /* in-thread queue(s) of events or other work. + * + * The hook function(s) are called in the qpnexus loop, at the top of the + * loop. So in addition to the mqueue, I/O, timers and any background stuff, + * the thread may have other queue(s) of things to be done. + * + * Hook function can process some queue(s) of things to be done. It does not + * have to empty its queues, but it MUST only return 0 if all queues are now + * empty. */ - qtime_mono_t (*event_hook[NUM_EVENT_HOOK])(enum qpn_priority); - + struct qpn_hook_list foreground ; + + /* in-thread background queue(s) of events or other work. + * + * The hook functions are called at the bottom of the qpnexus loop, but only + * when there is absolutely nothing else to do. + * + * The hook function should do some unit of background work (if there is any) + * and return. MUST return 0 iff there is no more work to do. + */ + struct qpn_hook_list background ; }; /*============================================================================== * Functions */ -extern qpn_nexus qpn_init_new(qpn_nexus qtn, int main_thread); -extern void qpn_exec(qpn_nexus qtn); +extern qpn_nexus qpn_init_new(qpn_nexus qpn, int main_thread); +extern void qpn_add_hook_function(qpn_hook_list list, void* hook) ; +extern void qpn_exec(qpn_nexus qpn); extern void qpn_terminate(qpn_nexus qpn); extern qpn_nexus qpn_free(qpn_nexus qpn); diff --git a/lib/qpselect.c b/lib/qpselect.c index 7df59752..d3f8e5ad 100644 --- a/lib/qpselect.c +++ b/lib/qpselect.c @@ -270,13 +270,8 @@ qps_set_signal(qps_selection qps, int signum, sigset_t sigmask) } ; } ; -/* Execute a pselect for the given selection -- subject to the given timeout - * *time*. - * - * The time-out time is an "absolute" time, as measured by qt_get_monotonic(). - * - * A timeout time <= the current qt_get_monotonic() is treated as a zero - * timeout period, and will return immediately from the pselect. +/* Execute a pselect for the given selection -- subject to the given maximum + * time to wait. * * There is no support for an infinite timeout. * @@ -289,7 +284,7 @@ qps_set_signal(qps_selection qps, int signum, sigset_t sigmask) * The qps_dispatch_next() processes the returns from pselect(). */ int -qps_pselect(qps_selection qps, qtime_mono_t timeout) +qps_pselect(qps_selection qps, qtime_t max_wait) { struct timespec ts ; qps_mnum_t mnum ; @@ -334,16 +329,15 @@ qps_pselect(qps_selection qps, qtime_mono_t timeout) qps->tried_fd_last = qps->fd_last ; qps->pend_fd = 0 ; - /* Convert timeout time to interval for pselect() */ - timeout -= qt_get_monotonic() ; - if (timeout < 0) - timeout = 0 ; + /* Make sure not trying to do something stupid */ + if (max_wait < 0) + max_wait = 0 ; /* Finally ready for the main event */ n = pselect(qps->fd_last + 1, p_fds[qps_read_mnum], p_fds[qps_write_mnum], p_fds[qps_error_mnum], - qtime2timespec(&ts, timeout), + qtime2timespec(&ts, max_wait), (qps->signum != 0) ? &qps->sigmask : NULL) ; /* If have something, set and return the pending count. */ diff --git a/lib/qtimers.c b/lib/qtimers.c index dcce24b9..0aef52a4 100644 --- a/lib/qtimers.c +++ b/lib/qtimers.c @@ -108,7 +108,9 @@ qtimer_pile_init_new(qtimer_pile qtp) * timers -- invalid heap -- need to properly initialise */ - /* Eclipse flags offsetof(struct qtimer, backlink) as a syntax error :-( */ + /* (The typedef is required to stop Eclipse (3.4.2 with CDT 5.0) whining + * about first argument of offsetof().) + */ typedef struct qtimer qtimer_t ; heap_init_new_backlinked(&qtp->timers, 0, (heap_cmp*)qtimer_cmp, @@ -122,15 +124,18 @@ qtimer_pile_init_new(qtimer_pile qtp) * empty, or the top entry times out after the maximum time, then the maximum * is returned. */ -qtime_mono_t -qtimer_pile_top_time(qtimer_pile qtp, qtime_mono_t max_time) +qtime_t +qtimer_pile_top_wait(qtimer_pile qtp, qtime_t max_wait) { + qtime_t top_wait ; qtimer qtr = heap_top_item(&qtp->timers) ; - if ((qtr == NULL) || (qtr->time >= max_time)) - return max_time ; - else - return qtr->time ; + if (qtr == NULL) + return max_wait ; + + top_wait = qtr->time - qt_get_monotonic() ; + + return (top_wait < max_wait) ? top_wait : max_wait ; } ; /* Dispatch the next timer whose time is <= the given "upto" time. @@ -157,7 +162,6 @@ qtimer_pile_dispatch_next(qtimer_pile qtp, qtime_mono_t upto) qtr->state = qtr_state_unset_pending ; qtr->action(qtr, qtr->timer_info, upto) ; - assert(qtp == qtr->pile); if (qtr->state == qtr_state_unset_pending) qtimer_unset(qtr) ; @@ -372,7 +376,9 @@ qtimer_pile_verify(qtimer_pile qtp) vector_index e ; qtimer qtr ; - /* Eclipse flags offsetof(struct qtimer, backlink) as a syntax error :-( */ + /* (The typedef is required to stop Eclipse (3.4.2 with CDT 5.0) whining + * about first argument of offsetof().) + */ typedef struct qtimer qtimer_t ; assert(th->cmp == (heap_cmp*)qtimer_cmp) ; diff --git a/lib/qtimers.h b/lib/qtimers.h index 3d509acb..0bc3d7a1 100644 --- a/lib/qtimers.h +++ b/lib/qtimers.h @@ -85,8 +85,8 @@ qtimer_pile_init_new(qtimer_pile qtp) ; int qtimer_pile_dispatch_next(qtimer_pile qtp, qtime_mono_t upto) ; -qtime_mono_t -qtimer_pile_top_time(qtimer_pile qtp, qtime_mono_t max_time) ; +qtime_t +qtimer_pile_top_wait(qtimer_pile qtp, qtime_t max_wait) ; qtimer qtimer_pile_ream(qtimer_pile qtp, int free_structure) ; diff --git a/lib/sigevent.c b/lib/sigevent.c index 30e9a3d1..a3d4219c 100644 --- a/lib/sigevent.c +++ b/lib/sigevent.c @@ -16,7 +16,7 @@ * You should have received a copy of the GNU General Public License * along with Quagga; see the file COPYING. If not, write to the Free * Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA - * 02111-1307, USA. + * 02111-1307, USA. */ #include <zebra.h> @@ -41,13 +41,13 @@ struct quagga_sigevent_master_t { struct thread *t; - struct quagga_signal_t *signals; + struct quagga_signal_t *signals; int sigc; - + volatile sig_atomic_t caught; } sigmaster; -/* Generic signal handler +/* Generic signal handler * Schedules signal event thread */ static void @@ -55,24 +55,30 @@ quagga_signal_handler (int signo) { int i; struct quagga_signal_t *sig; - + for (i = 0; i < sigmaster.sigc; i++) { sig = &(sigmaster.signals[i]); - + if (sig->signal == signo) sig->caught = 1; } - + sigmaster.caught = 1; -} +} -/* check if signals have been caught and run appropriate handlers */ +/* check if signals have been caught and run appropriate handlers + * + * Returns: 0 => nothing to do + * -1 => failed + * > 0 => done this many signals + */ int quagga_sigevent_process (void) { struct quagga_signal_t *sig; int i; + int done ; #ifdef SIGEVENT_BLOCK_SIGNALS /* shouldnt need to block signals, but potentially may be needed */ sigset_t newmask, oldmask; @@ -85,7 +91,7 @@ quagga_sigevent_process (void) sigfillset (&newmask); sigdelset (&newmask, SIGTRAP); sigdelset (&newmask, SIGKILL); - + if ( (sigprocmask (SIG_BLOCK, &newmask, &oldmask)) < 0) { zlog_err ("quagga_signal_timer: couldnt block signals!"); @@ -93,13 +99,14 @@ quagga_sigevent_process (void) } #endif /* SIGEVENT_BLOCK_SIGNALS */ + done = 0 ; if (sigmaster.caught > 0) { sigmaster.caught = 0; /* must not read or set sigmaster.caught after here, * race condition with per-sig caught flags if one does */ - + for (i = 0; i < sigmaster.sigc; i++) { sig = &(sigmaster.signals[i]); @@ -108,6 +115,7 @@ quagga_sigevent_process (void) { sig->caught = 0; sig->handler (); + ++done ; } } } @@ -117,7 +125,7 @@ quagga_sigevent_process (void) return -1; #endif /* SIGEVENT_BLOCK_SIGNALS */ - return 0; + return done ; } #ifdef SIGEVENT_SCHEDULE_THREAD @@ -159,7 +167,7 @@ signal_set (int signo) } ret = sigaction (signo, &sig, &osig); - if (ret < 0) + if (ret < 0) return ret; else return 0; @@ -245,13 +253,13 @@ trap_default_signals(void) SIGUSR1, SIGUSR2, #ifdef SIGPOLL - SIGPOLL, + SIGPOLL, #endif #ifdef SIGVTALRM SIGVTALRM, #endif #ifdef SIGSTKFLT - SIGSTKFLT, + SIGSTKFLT, #endif }; static const int ignore_signals[] = { @@ -309,8 +317,8 @@ trap_default_signals(void) } } -void -signal_init (struct thread_master *m, int sigc, +void +signal_init (struct thread_master *m, int sigc, struct quagga_signal_t signals[]) { @@ -320,7 +328,7 @@ signal_init (struct thread_master *m, int sigc, /* First establish some default handlers that can be overridden by the application. */ trap_default_signals(); - + while (i < sigc) { sig = &signals[i]; @@ -332,9 +340,9 @@ signal_init (struct thread_master *m, int sigc, sigmaster.sigc = sigc; sigmaster.signals = signals; -#ifdef SIGEVENT_SCHEDULE_THREAD - sigmaster.t = - thread_add_timer (m, quagga_signal_timer, &sigmaster, +#ifdef SIGEVENT_SCHEDULE_THREAD + sigmaster.t = + thread_add_timer (m, quagga_signal_timer, &sigmaster, QUAGGA_SIGNAL_TIMER_INTERVAL); #endif /* SIGEVENT_SCHEDULE_THREAD */ } diff --git a/lib/stream.c b/lib/stream.c index 14c7c589..b4c16977 100644 --- a/lib/stream.c +++ b/lib/stream.c @@ -998,46 +998,6 @@ stream_flush (struct stream* s, int fd) } /*------------------------------------------------------------------------------ - * Try to write stream contents to the file descriptor -- assuming non-blocking. - * - * Loops if gets EINTR. - * - * If writes everything, resets the stream. - * - * If does not write everything, then would block. - * - * Returns: >= 0 number of bytes left to write - * -1 => some error (not including EINTR, EAGAIN or EWOULDBLOCK) - */ -int -stream_flush_try(struct stream* s, int fd) -{ - int have ; - int ret ; - - STREAM_VERIFY_SANE(s); - - while ((have = (s->endp - s->getp)) != 0) - { - ret = write(fd, s->data + s->getp, have) ; - if (ret > 0) - s->getp += ret ; - else if (ret < 0) - { - ret = errno ; - if ((ret == EAGAIN) || (ret == EWOULDBLOCK)) - return have ; - if (ret != EINTR) - return -1 ; - } ; - } ; - - s->getp = s->endp = 0; - - return 0 ; -} - -/*------------------------------------------------------------------------------ * Transfer contents of stream to given buffer and reset stream. * * Transfers *entire* stream buffer. @@ -1113,6 +1073,13 @@ stream_fifo_head (struct stream_fifo *fifo) } void +stream_fifo_reset (struct stream_fifo *fifo) +{ + fifo->head = fifo->tail = NULL; + fifo->count = 0; +} + +void stream_fifo_clean (struct stream_fifo *fifo) { struct stream *s; diff --git a/lib/stream.h b/lib/stream.h index 094cf0c6..e7303652 100644 --- a/lib/stream.h +++ b/lib/stream.h @@ -224,6 +224,7 @@ extern struct stream_fifo *stream_fifo_new (void); extern void stream_fifo_push (struct stream_fifo *fifo, struct stream *s); extern struct stream *stream_fifo_pop (struct stream_fifo *fifo); extern struct stream *stream_fifo_head (struct stream_fifo *fifo); +extern void stream_fifo_reset (struct stream_fifo *fifo); extern void stream_fifo_clean (struct stream_fifo *fifo); extern void stream_fifo_free (struct stream_fifo *fifo); diff --git a/lib/thread.c b/lib/thread.c index f2b873ac..3df9acf7 100644 --- a/lib/thread.c +++ b/lib/thread.c @@ -31,6 +31,7 @@ #include "command.h" #include "sigevent.h" #include "qpthreads.h" +#include "qtimers.h" /* Recent absolute time of day */ struct timeval recent_time; @@ -47,7 +48,12 @@ static qpt_mutex_t thread_mutex; #define UNLOCK qpt_mutex_unlock(&thread_mutex); static struct hash *cpu_record = NULL; -/* Struct timeval's tv_usec one second value. */ +/* Pointer to qtimer pile to be used, if any */ +static qtimer_pile use_qtimer_pile = NULL ; +static qtimer spare_qtimers = NULL ; +static unsigned used_standard_timer = 0 ; + +/* Struct timeval's tv_usec one second value. */ #define TIMER_SECOND_MICRO 1000000L /* Adjust so that tv_usec is in the range [0,TIMER_SECOND_MICRO). @@ -238,18 +244,51 @@ cpu_record_hash_cmp (const struct cpu_thread_history *a, static void * cpu_record_hash_alloc (struct cpu_thread_history *a) { - struct cpu_thread_history *new; + const char* b ; + const char* e ; + char* n ; + int l ; + struct cpu_thread_history *new ; + + /* Establish start and length of name, removing leading/trailing + * spaces and any enclosing (...) -- recursively. + */ + b = a->funcname ; + e = b + strlen(b) - 1 ; + + while (1) + { + while (*b == ' ') + ++b ; /* strip leading spaces */ + if (*b == '\0') + break ; /* quit if now empty */ + while (*e == ' ') + --e ; /* strip trailing spaces */ + if ((*b != '(') || (*e != ')')) + break ; /* quit if not now (...) */ + ++b ; + --e ; /* discard ( and ) */ + } ; + + l = (e + 1) - b ; /* length excluding trailing \0 */ + + n = XMALLOC(MTYPE_THREAD_FUNCNAME, l + 1) ; + memcpy(n, b, l) ; + n[l] = '\0' ; + + /* Allocate empty structure and set address and name */ new = XCALLOC (MTYPE_THREAD_STATS, sizeof (struct cpu_thread_history)); - new->func = a->func; - new->funcname = XSTRDUP(MTYPE_THREAD_FUNCNAME, a->funcname); - return new; + new->func = a->func; + new->funcname = n ; + + return new ; } static void cpu_record_hash_free (void *a) { struct cpu_thread_history *hist = a; - char* funcname = miyagi(hist->funcname) ; + void* funcname = miyagi(hist->funcname) ; XFREE (MTYPE_THREAD_FUNCNAME, funcname); XFREE (MTYPE_THREAD_STATS, hist); @@ -497,7 +536,6 @@ thread_add_unuse (struct thread_master *m, struct thread *thread) assert (thread->prev == NULL); assert (thread->type == THREAD_UNUSED); thread_list_add (&m->unuse, thread); - /* XXX: Should we deallocate funcname here? */ } /* Free all unused thread. */ @@ -510,8 +548,13 @@ thread_list_free (struct thread_master *m, struct thread_list *list) for (t = list->head; t; t = next) { next = t->next; - if (t->funcname) - XFREE (MTYPE_THREAD_FUNCNAME, t->funcname); + + if ( (use_qtimer_pile != NULL) + && ( (t->type == THREAD_TIMER || t->type == THREAD_BACKGROUND) ) + && (t->u.qtr != NULL) + ) + qtimer_free(t->u.qtr) ; + XFREE (MTYPE_THREAD, t); list->count--; m->alloc--; @@ -522,6 +565,8 @@ thread_list_free (struct thread_master *m, struct thread_list *list) void thread_master_free (struct thread_master *m) { + qtimer qtr ; + thread_list_free (m, &m->read); thread_list_free (m, &m->write); thread_list_free (m, &m->timer); @@ -540,6 +585,12 @@ thread_master_free (struct thread_master *m) cpu_record = NULL; } UNLOCK + + while ((qtr = spare_qtimers) != NULL) + { + spare_qtimers = (void*)(qtr->pile) ; + qtimer_free(qtr) ; + } ; } /* Thread list is empty or not. */ @@ -570,34 +621,26 @@ thread_timer_remain_second (struct thread *thread) return 0; } -/* Trim blankspace and "()"s */ -static char * -strip_funcname (const char *funcname) -{ - char buff[100]; - char tmp, *ret, *e, *b = buff; +/* Get new cpu history */ - strncpy(buff, funcname, sizeof(buff)); - buff[ sizeof(buff) -1] = '\0'; - e = buff +strlen(buff) -1; +static struct cpu_thread_history* +thread_get_hist(struct thread* thread, const char* funcname) +{ + struct cpu_thread_history tmp ; + struct cpu_thread_history* hist ; - /* Wont work for funcname == "Word (explanation)" */ + tmp.func = thread->func ; + tmp.funcname = funcname ; - while (*b == ' ' || *b == '(') - ++b; - while (*e == ' ' || *e == ')') - --e; - e++; + LOCK + hist = hash_get (cpu_record, &tmp, + (void * (*) (void *))cpu_record_hash_alloc); + UNLOCK - tmp = *e; - *e = '\0'; - ret = XSTRDUP (MTYPE_THREAD_FUNCNAME, b); - *e = tmp; + return hist ; +} ; - return ret; -} - -/* Get new thread. */ +/* Get new thread. */ static struct thread * thread_get (struct thread_master *m, u_char type, int (*func) (struct thread *), void *arg, const char* funcname) @@ -607,23 +650,22 @@ thread_get (struct thread_master *m, u_char type, if (!thread_empty (&m->unuse)) { thread = thread_trim_head (&m->unuse); - if (thread->funcname) - XFREE(MTYPE_THREAD_FUNCNAME, thread->funcname); + memset(thread, 0, sizeof (struct thread)) ; } else { thread = XCALLOC (MTYPE_THREAD, sizeof (struct thread)); m->alloc++; } - thread->type = type; + thread->type = type; thread->add_type = type; - thread->master = m; - thread->func = func; - thread->arg = arg; + thread->master = m; + thread->func = func; + thread->arg = arg; - thread->funcname = strip_funcname(funcname); + thread->hist = thread_get_hist(thread, funcname) ; - return thread; + return thread ; } /* Add new read thread. */ @@ -672,48 +714,190 @@ funcname_thread_add_write (struct thread_master *m, return thread; } +/*============================================================================== + * Timer Threads -- THREAD_TIMER and THREAD_BACKGROUND + * + * Standard Timer Threads are sorted by the "struct timeval sands", and + * processed by thread_timer_process() -- which moves any expired timer + * threads onto the THREAD_READY queue. So, the scheduling of background stuff + * is done by not processing the THREAD_BACKGROUND queue until there is + * nothing else to do. + * + * When using a qtimer_pile: + * + * * THREAD_TIMER threads have an associated qtimer. + * + * When the timer expires, the qtimer is cut from the thread (and put onto + * the spare_qtimers list). The thread is then queued on the THREAD_READY + * queue (as before). + * + * * THREAD_BACKGROUND threads which have a non-zero delay are treated much + * as THREAD_TIMER, except that when the timer expires, the thread is + * queued on the THREAD_BACKGROUND queue. + * + * The THREAD_BACKGROUND queue is visited only when there is nothing else + * to do. + * + * Note that when using a qtimer_pile, and there is an active qtimer associated + * with the thread, the thread will be on the THREAD_TIMER queue -- so that it + * can be collected up and released if required. + * + * NB: when using a qtimer_pile, if there is a qtimer associated with a + * THREAD_TIMER or a THREAD_BACKGROUND thread, then thread->u.qtr points + * at the qtimer. + * + * AND, conversely, if there is no qtimer, then thread->u.ptr == NULL. + */ + +/*------------------------------------------------------------------------------ + * Set use_qtimer_pile ! + */ +extern void +thread_set_qtimer_pile(qtimer_pile pile) +{ + passert(!used_standard_timer) ; + + use_qtimer_pile = pile ; +} ; + +/*------------------------------------------------------------------------------ + * Unset qtimer associated with the given THREAD_TIMER or THREAD_BACKGROUND + * thread -- if any. + * + * Moves any qtimer onto the spare_qtimers list. + */ +static void +thread_qtimer_unset(struct thread* thread) +{ + qtimer qtr ; + assert (thread->type == THREAD_TIMER || thread->type == THREAD_BACKGROUND); + assert (use_qtimer_pile != NULL) ; + + qtr = thread->u.qtr ; + if (qtr != NULL) + { + qtimer_unset(qtr) ; + + qtr->pile = (void*)spare_qtimers ; + spare_qtimers = qtr ; + + thread->u.qtr = NULL ; + } ; +} ; + +/*------------------------------------------------------------------------------ + * The qtimer action function -- when using qtimer pile (!) + * + * Remove thread from the THREAD_TIMER queue and unset the qtimer, place + * thread on the THREAD_READY or the THREAD_BACKGROUND queue as required. + */ +static void +thread_qtimer_dispatch(qtimer qtr, void* timer_info, qtime_mono_t when) +{ + struct thread* thread = timer_info ; + + thread_list_delete (&thread->master->timer, thread) ; + thread_qtimer_unset(thread) ; + + switch (thread->type) + { + case THREAD_TIMER: + thread->type = THREAD_READY; + thread_list_add (&thread->master->ready, thread); + break ; + + case THREAD_BACKGROUND: + thread_list_add (&thread->master->background, thread); + break ; + + default: + zabort("invalid thread type in thread_qtimer_dispatch") ; + } ; +} ; + +/*------------------------------------------------------------------------------ + * For standard timers, return time left on first timer on the given list. + */ +static struct timeval * +thread_timer_wait (struct thread_list *tlist, struct timeval *timer_val) +{ + if (!thread_empty (tlist)) + { + *timer_val = timeval_subtract (tlist->head->u.sands, relative_time); + return timer_val; + } + return NULL; +} + +/*------------------------------------------------------------------------------ + * Add timer of given type -- either standard or qtimer_pile as required. + * + * Timer interval is given as a struct timeval. + */ static struct thread * -funcname_thread_add_timer_timeval (struct thread_master *m, - int (*func) (struct thread *), +funcname_thread_add_timer_timeval(struct thread_master *m, + int (*func) (struct thread *), int type, void *arg, struct timeval *time_relative, const char* funcname) { struct thread *thread; - struct thread_list *list; - struct timeval alarm_time; - struct thread *tt; assert (m != NULL); + assert (time_relative != NULL); assert (type == THREAD_TIMER || type == THREAD_BACKGROUND); - assert (time_relative); - list = ((type == THREAD_TIMER) ? &m->timer : &m->background); thread = thread_get (m, type, func, arg, funcname); - /* Do we need jitter here? */ - quagga_get_relative (NULL); - alarm_time.tv_sec = relative_time.tv_sec + time_relative->tv_sec; - alarm_time.tv_usec = relative_time.tv_usec + time_relative->tv_usec; - thread->u.sands = timeval_adjust(alarm_time); - - /* Sort by timeval. */ - for (tt = list->head; tt; tt = tt->next) - if (timeval_cmp (thread->u.sands, tt->u.sands) <= 0) - break; + if (use_qtimer_pile == NULL) + { + struct thread_list *list; + struct timeval alarm_time; + struct thread *tt; - if (tt) - thread_list_add_before (list, tt, thread); + /* Do we need jitter here? */ + quagga_get_relative (NULL); + alarm_time.tv_sec = relative_time.tv_sec + time_relative->tv_sec; + alarm_time.tv_usec = relative_time.tv_usec + time_relative->tv_usec; + thread->u.sands = timeval_adjust(alarm_time); + + /* Sort by timeval. */ + list = ((type == THREAD_TIMER) ? &m->timer : &m->background); + for (tt = list->head; tt; tt = tt->next) + if (timeval_cmp (thread->u.sands, tt->u.sands) <= 0) + break; + + if (tt) + thread_list_add_before (list, tt, thread); + else + thread_list_add (list, thread); + + used_standard_timer = 1 ; + } else - thread_list_add (list, thread); + { + qtimer qtr = spare_qtimers ; + if (qtr != NULL) + spare_qtimers = (qtimer)(qtr->pile) ; + + qtr = qtimer_init_new(qtr, use_qtimer_pile, NULL, thread) ; + thread->u.qtr = qtr ; + + qtimer_set_interval(qtr, timeval2qtime(time_relative), + thread_qtimer_dispatch) ; + thread_list_add(&m->timer, thread) ; + } ; return thread; } - -/* Add timer event thread. */ +/*------------------------------------------------------------------------------ + * Add a THREAD_TIMER timer -- either standard or qtimer_pile as required. + * + * Timer interval is given in seconds. + */ struct thread * funcname_thread_add_timer (struct thread_master *m, int (*func) (struct thread *), @@ -721,16 +905,18 @@ funcname_thread_add_timer (struct thread_master *m, { struct timeval trel; - assert (m != NULL); - - trel.tv_sec = timer; + trel.tv_sec = timer; trel.tv_usec = 0; return funcname_thread_add_timer_timeval (m, func, THREAD_TIMER, arg, &trel, funcname); } -/* Add timer event thread with "millisecond" resolution */ +/*------------------------------------------------------------------------------ + * Add a THREAD_TIMER timer -- either standard or qtimer_pile as required. + * + * Timer interval is given in milliseconds. + */ struct thread * funcname_thread_add_timer_msec (struct thread_master *m, int (*func) (struct thread *), @@ -738,45 +924,56 @@ funcname_thread_add_timer_msec (struct thread_master *m, { struct timeval trel; - assert (m != NULL); - - trel.tv_sec = timer / 1000; - trel.tv_usec = 1000*(timer % 1000); + trel.tv_sec = timer / 1000 ; + trel.tv_usec = (timer % 1000) * 1000 ; return funcname_thread_add_timer_timeval (m, func, THREAD_TIMER, - arg, &trel, funcname); + arg, &trel, funcname); } -/* Add a background thread, with an optional millisec delay */ +/*------------------------------------------------------------------------------ + * Add a THREAD_BACKGROUND thread -- either standard or qtimer_pile as required. + * + * Timer interval is given in milliseconds. + * + * For qtimer_pile, if the delay is zero, the thread is placed straight onto + * the THREAD_BACKGROUND queue. + */ struct thread * funcname_thread_add_background (struct thread_master *m, int (*func) (struct thread *), void *arg, long delay, const char *funcname) { - struct timeval trel; + if ((delay != 0) || (use_qtimer_pile == NULL)) + { + struct timeval trel; - assert (m != NULL); + trel.tv_sec = delay / 1000; + trel.tv_usec = (delay % 1000) * 1000 ; - if (delay) - { - trel.tv_sec = delay / 1000; - trel.tv_usec = 1000*(delay % 1000); + return funcname_thread_add_timer_timeval (m, func, THREAD_BACKGROUND, + arg, &trel, funcname); } else { - trel.tv_sec = 0; - trel.tv_usec = 0; - } + struct thread* thread ; + + assert (m != NULL); - return funcname_thread_add_timer_timeval (m, func, THREAD_BACKGROUND, - arg, &trel, funcname); + thread = thread_get (m, THREAD_BACKGROUND, func, arg, funcname); + thread_list_add (&m->background, thread) ; + + return thread ; + } ; } +/*----------------------------------------------------------------------------*/ /* Add simple event thread. */ struct thread * funcname_thread_add_event (struct thread_master *m, - int (*func) (struct thread *), void *arg, int val, const char* funcname) + int (*func) (struct thread *), void *arg, int val, + const char* funcname) { struct thread *thread; @@ -789,7 +986,11 @@ funcname_thread_add_event (struct thread_master *m, return thread; } -/* Cancel thread from scheduler. */ +/*------------------------------------------------------------------------------ + * Cancel thread from scheduler. + * + * Note that when using qtimer_pile need to unset any associated qtimer. + */ void thread_cancel (struct thread *thread) { @@ -808,6 +1009,8 @@ thread_cancel (struct thread *thread) list = &thread->master->write; break; case THREAD_TIMER: + if ((use_qtimer_pile != NULL) && (thread->u.qtr != NULL)) + thread_qtimer_unset(thread) ; list = &thread->master->timer; break; case THREAD_EVENT: @@ -817,13 +1020,21 @@ thread_cancel (struct thread *thread) list = &thread->master->ready; break; case THREAD_BACKGROUND: - list = &thread->master->background; + if ((use_qtimer_pile != NULL) && (thread->u.qtr != NULL)) + { + thread_qtimer_unset(thread) ; + list = &thread->master->timer; + } + else + list = &thread->master->background; break; + default: - return; - break; + return ; } + thread_list_delete (list, thread); + thread->type = THREAD_UNUSED; thread_add_unuse (thread->master, thread); } @@ -854,24 +1065,12 @@ thread_cancel_event (struct thread_master *m, void *arg) return ret; } -static struct timeval * -thread_timer_wait (struct thread_list *tlist, struct timeval *timer_val) -{ - if (!thread_empty (tlist)) - { - *timer_val = timeval_subtract (tlist->head->u.sands, relative_time); - return timer_val; - } - return NULL; -} - static struct thread * thread_run (struct thread_master *m, struct thread *thread, struct thread *fetch) { *fetch = *thread; thread->type = THREAD_UNUSED; - thread->funcname = NULL; /* thread_call will free fetch's copied pointer */ thread_add_unuse (m, thread); return fetch; } @@ -921,7 +1120,11 @@ thread_timer_process (struct thread_list *list, struct timeval *timenow) return ready; } -/* Fetch next ready thread. */ +/*------------------------------------------------------------------------------ + * Fetch next ready thread -- for standard thread handing. + * + * (This is not used when using qtimer_pile, or qnexus stuff.) + */ struct thread * thread_fetch (struct thread_master *m, struct thread *fetch) { @@ -939,8 +1142,7 @@ thread_fetch (struct thread_master *m, struct thread *fetch) int num = 0; /* Signals are highest priority */ - if (!qpthreads_enabled) - quagga_sigevent_process (); + quagga_sigevent_process (); /* Normal event are the next highest priority. */ if ((thread = thread_trim_head (&m->event)) != NULL) @@ -1009,69 +1211,66 @@ thread_fetch (struct thread_master *m, struct thread *fetch) } } - -/* Fetch next ready thread <= given priority. Events and timeouts only. - * No I/O. If nothing to do returns NULL and sets event_wait to - * recommended time to be called again. */ -struct thread * -thread_fetch_event (enum qpn_priority priority, struct thread_master *m, struct thread *fetch, - qtime_mono_t *event_wait) +/*------------------------------------------------------------------------------ + * Empties the event and ready queues. + * + * This is used when qnexus is managing most things, including I/O. Must be + * using qtimer_pile ! + * + * This runs "legacy" event and ready queues only. + * + * Returns: the number of threads dispatched. + * + * Legacy timers are handled by the qtimer_pile, and their related threads will + * be placed on the ready queue when they expire. + * + * The background queue is handled separately. + */ +extern int +thread_dispatch(struct thread_master *m) { - struct thread *thread; - struct timeval timer_val; - struct timeval timer_val_bg; - struct timeval *timer_wait; - struct timeval *timer_wait_bg; - - *event_wait = 0; - - /* Normal event are the next highest priority. */ - if ((thread = thread_trim_head (&m->event)) != NULL) - return thread_run (m, thread, fetch); + struct thread_list* list ; + struct thread fetch ; + int count = 0 ; - if (priority <= qpn_pri_first) - return NULL; - - /* If there are any ready threads from previous scheduler runs, - * process top of them. - */ - if ((thread = thread_trim_head (&m->ready)) != NULL) - return thread_run (m, thread, fetch); - - if (priority <= qpn_pri_second) - return NULL; - - /* Check foreground timers. */ - quagga_get_relative (NULL); - thread_timer_process (&m->timer, &relative_time); - - if ((thread = thread_trim_head (&m->ready)) != NULL) - return thread_run (m, thread, fetch); + while (1) + { + if (thread_empty(list = &m->event)) + if (thread_empty(list = &m->ready)) + return count ; - if (priority <= qpn_pri_third) - return NULL; + thread_call(thread_run(m, thread_list_delete(list, list->head), &fetch)) ; - /* Background timer/events, lowest priority */ - thread_timer_process (&m->background, &relative_time); + ++count ; + } ; +} ; - if ((thread = thread_trim_head (&m->ready)) != NULL) - return thread_run (m, thread, fetch); +/*------------------------------------------------------------------------------ + * Dispatch first item on the background queue, if any. + * + * This is used when qnexus is managing most things. + * + * Background threads spend their lives being cycled around the background + * queue -- possibly via the timer queue, if a delay is put in before the next + * invocation. + * + * Returns: 1 if dispatched a background thread + * 0 if there are no background threads + */ +extern int +thread_dispatch_background(struct thread_master *m) +{ + struct thread* thread ; + struct thread fetch ; - /* Calculate select wait timer if nothing else to do */ - timer_wait = thread_timer_wait (&m->timer, &timer_val); - timer_wait_bg = thread_timer_wait (&m->background, &timer_val_bg); + if ((thread = thread_trim_head (&m->background)) == NULL) + return 0 ; - if (timer_wait_bg && - (!timer_wait || (timeval_cmp (*timer_wait, *timer_wait_bg) > 0))) - timer_wait = timer_wait_bg; + thread_call(thread_run(m, thread, &fetch)) ; - /* When is the next timer due ? */ - *event_wait = (timer_wait != NULL) - ? timeval2qtime(timer_wait) - : 0; + return 1 ; +} ; - return NULL; -} unsigned long thread_consumed_time (RUSAGE_T *now, RUSAGE_T *start, unsigned long *cputime) @@ -1130,25 +1329,6 @@ thread_call (struct thread *thread) unsigned long realtime, cputime; RUSAGE_T ru; - /* Cache a pointer to the relevant cpu history thread, if the thread - * does not have it yet. - * - * Callers submitting 'dummy threads' hence must take care that - * thread->cpu is NULL - */ - if (!thread->hist) - { - struct cpu_thread_history tmp; - - tmp.func = thread->func; - tmp.funcname = thread->funcname; - - LOCK - thread->hist = hash_get (cpu_record, &tmp, - (void * (*) (void *))cpu_record_hash_alloc); - UNLOCK - } - GETRUSAGE (&thread->ru); (*thread->func) (thread); @@ -1157,19 +1337,22 @@ thread_call (struct thread *thread) realtime = thread_consumed_time (&ru, &thread->ru, &cputime); - LOCK - thread->hist->real.total += realtime; - if (thread->hist->real.max < realtime) - thread->hist->real.max = realtime; + if (thread->hist != NULL) + { + LOCK + thread->hist->real.total += realtime; + if (thread->hist->real.max < realtime) + thread->hist->real.max = realtime; #ifdef HAVE_RUSAGE - thread->hist->cpu.total += cputime; - if (thread->hist->cpu.max < cputime) - thread->hist->cpu.max = cputime; + thread->hist->cpu.total += cputime; + if (thread->hist->cpu.max < cputime) + thread->hist->cpu.max = cputime; #endif - ++(thread->hist->total_calls); - thread->hist->types |= (1 << thread->add_type); - UNLOCK + ++(thread->hist->total_calls); + thread->hist->types |= (1 << thread->add_type); + UNLOCK + } ; #ifdef CONSUMED_TIME_CHECK if (realtime > CONSUMED_TIME_CHECK) @@ -1180,13 +1363,12 @@ thread_call (struct thread *thread) * to fix. */ zlog_warn ("SLOW THREAD: task %s (%lx) ran for %lums (cpu time %lums)", - thread->funcname, + (thread->hist != NULL) ? thread->hist->funcname : "??", (unsigned long) thread->func, realtime/1000, cputime/1000); } #endif /* CONSUMED_TIME_CHECK */ - XFREE (MTYPE_THREAD_FUNCNAME, thread->funcname); } /* Execute thread */ @@ -1207,11 +1389,9 @@ funcname_thread_execute (struct thread_master *m, dummy.func = func; dummy.arg = arg; dummy.u.val = val; - dummy.funcname = strip_funcname (funcname); + dummy.hist = thread_get_hist(&dummy, funcname) ; thread_call (&dummy); - XFREE (MTYPE_THREAD_FUNCNAME, dummy.funcname); - return NULL; } diff --git a/lib/thread.h b/lib/thread.h index 1e68007a..fa021486 100644 --- a/lib/thread.h +++ b/lib/thread.h @@ -16,7 +16,7 @@ * You should have received a copy of the GNU General Public License * along with GNU Zebra; see the file COPYING. If not, write to the Free * Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA - * 02111-1307, USA. + * 02111-1307, USA. */ #ifndef _ZEBRA_THREAD_H @@ -25,6 +25,7 @@ #include <sys/resource.h> #include "qtime.h" #include "qpnexus.h" +#include "qtimers.h" struct rusage_t { @@ -68,22 +69,22 @@ struct thread { thread_type type; /* thread type */ thread_type add_type; /* thread type */ - struct thread *next; /* next pointer of the thread */ + struct thread *next; /* next pointer of the thread */ struct thread *prev; /* previous pointer of the thread */ struct thread_master *master; /* pointer to the struct thread_master. */ int (*func) (struct thread *); /* event function */ void *arg; /* event argument */ union { - int val; /* second argument of the event. */ + int val; /* second argument of the event. */ int fd; /* file descriptor in case of read/write. */ - struct timeval sands; /* rest of time sands value. */ + struct timeval sands; /* rest of time sands value. */ + qtimer qtr ; /* pointer to related qtimer */ } u; RUSAGE_T ru; /* Indepth usage info. */ struct cpu_thread_history *hist; /* cache pointer to cpu_history */ - char* funcname; }; -struct cpu_thread_history +struct cpu_thread_history { int (*func)(struct thread *); const char *funcname; @@ -169,8 +170,9 @@ extern struct thread_master *thread_master_create (void); extern void thread_master_free (struct thread_master *); extern void thread_init_r (void); extern void thread_finish (void); +extern void thread_set_qtimer_pile(qtimer_pile pile) ; -extern struct thread *funcname_thread_add_read (struct thread_master *, +extern struct thread *funcname_thread_add_read (struct thread_master *, int (*)(struct thread *), void *, int, const char*); extern struct thread *funcname_thread_add_write (struct thread_master *, @@ -196,8 +198,8 @@ extern struct thread *funcname_thread_execute (struct thread_master *, extern void thread_cancel (struct thread *); extern unsigned int thread_cancel_event (struct thread_master *, void *); extern struct thread *thread_fetch (struct thread_master *, struct thread *); -struct thread * thread_fetch_event (enum qpn_priority,struct thread_master *m, struct thread *fetch, - qtime_mono_t *event_wait); +extern int thread_dispatch(struct thread_master *m) ; +extern int thread_dispatch_background(struct thread_master *m) ; extern void thread_call (struct thread *); extern unsigned long thread_timer_remain_second (struct thread *); extern int thread_should_yield (struct thread *); diff --git a/lib/workqueue.c b/lib/workqueue.c index 7c811edd..6f2cd531 100644 --- a/lib/workqueue.c +++ b/lib/workqueue.c @@ -1,4 +1,4 @@ -/* +/* * Quagga Work Queue Support. * * Copyright (C) 2005 Sun Microsystems, Inc. @@ -18,38 +18,30 @@ * You should have received a copy of the GNU General Public License * along with Quagga; see the file COPYING. If not, write to the Free * Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA - * 02111-1307, USA. + * 02111-1307, USA. */ #include <lib/zebra.h> #include "thread.h" #include "memory.h" #include "workqueue.h" -#include "linklist.h" #include "command.h" #include "log.h" +#include "linklist.h" /* master list of work_queues */ static struct list work_queues; #define WORK_QUEUE_MIN_GRANULARITY 1 -static struct work_queue_item * -work_queue_item_new (struct work_queue *wq) -{ - struct work_queue_item *item; - assert (wq); - - item = XCALLOC (MTYPE_WORK_QUEUE_ITEM, - sizeof (struct work_queue_item)); - - return item; -} - static void -work_queue_item_free (struct work_queue_item *item) +work_queue_item_free (struct work_queue *wq, struct work_queue_item *item) { - XFREE (MTYPE_WORK_QUEUE_ITEM, item); + /* call private data deletion callback if needed */ + if (wq->spec.del_item_data != NULL) + wq->spec.del_item_data (wq, item) ; + + XFREE (MTYPE_WORK_QUEUE_ITEM, item) ; return; } @@ -58,46 +50,40 @@ struct work_queue * work_queue_new (struct thread_master *m, const char *queue_name) { struct work_queue *new; - + new = XCALLOC (MTYPE_WORK_QUEUE, sizeof (struct work_queue)); if (new == NULL) return new; - - new->name = XSTRDUP (MTYPE_WORK_QUEUE_NAME, queue_name); + + new->name = XSTRDUP (MTYPE_WORK_QUEUE_NAME, queue_name); new->master = m; SET_FLAG (new->flags, WQ_UNPLUGGED); - - if ( (new->items = list_new ()) == NULL) - { - XFREE (MTYPE_WORK_QUEUE_NAME, new->name); - XFREE (MTYPE_WORK_QUEUE, new); - - return NULL; - } - - new->items->del = (void (*)(void *)) work_queue_item_free; - + listnode_add (&work_queues, new); - + new->cycles.granularity = WORK_QUEUE_MIN_GRANULARITY; /* Default values, can be overriden by caller */ new->spec.hold = WORK_QUEUE_DEFAULT_HOLD; - + return new; } void work_queue_free (struct work_queue *wq) { + work_queue_item item ; + if (wq->thread != NULL) thread_cancel(wq->thread); - - /* list_delete frees items via callback */ - list_delete (wq->items); - listnode_delete (&work_queues, wq); - + + while ((item = wq->head) != NULL) + { + wq->head = item->next ; + work_queue_item_free(wq, item) ; + } ; + XFREE (MTYPE_WORK_QUEUE_NAME, wq->name); XFREE (MTYPE_WORK_QUEUE, wq); return; @@ -109,59 +95,151 @@ work_queue_schedule (struct work_queue *wq, unsigned int delay) /* if appropriate, schedule work queue thread */ if ( CHECK_FLAG (wq->flags, WQ_UNPLUGGED) && (wq->thread == NULL) - && (listcount (wq->items) > 0) ) + && (wq->head != NULL) ) { - wq->thread = thread_add_background (wq->master, work_queue_run, + wq->thread = thread_add_background (wq->master, work_queue_run, wq, delay); return 1; } else return 0; } - -void -work_queue_add (struct work_queue *wq, void *data) + +/*------------------------------------------------------------------------------ + * Create new work queue item and place on the end of the given work queue. + * + * Schedules the work queue if there were no items (unless already scheduled + * or plugged). + * + * Returns the address of the args area in the new item. + */ +extern void* +work_queue_item_add (struct work_queue *wq) { - struct work_queue_item *item; - + work_queue_item item ; + assert (wq); - if (!(item = work_queue_item_new (wq))) + item = XCALLOC (MTYPE_WORK_QUEUE_ITEM, sizeof (struct work_queue_item)); + + if (item == NULL) { zlog_err ("%s: unable to get new queue item", __func__); - return; + return NULL ; + } + + item->next = NULL ; + if (wq->head == NULL) + { + assert(wq->list_count == 0) ; + wq->head = item ; + item->prev = NULL ; } - - item->data = data; - listnode_add (wq->items, item); - + else + { + assert((wq->tail != NULL) && (wq->list_count > 0)) ; + wq->tail->next = item ; + item->prev = wq->tail ; + } ; + wq->tail = item ; + + ++wq->list_count ; work_queue_schedule (wq, wq->spec.hold); - - return; + + return work_queue_item_args(item) ; } static void -work_queue_item_remove (struct work_queue *wq, struct listnode *ln) +work_queue_item_remove (struct work_queue *wq, work_queue_item item) { - struct work_queue_item *item = listgetdata (ln); + assert ((wq != NULL) && (item != NULL)) ; + + if (wq->head == item) + { + /* Removing the first item */ + assert(item->prev == NULL) ; + + wq->head = item->next ; - assert (item && item->data); + if (wq->tail == item) + { + /* Removing the only item */ + assert((item->next == NULL) && (wq->list_count == 1)) ; + wq->tail = NULL ; + } + else + { + /* First, but not the only item */ + assert((item->next != NULL) && (wq->list_count > 1)) ; + wq->head->prev = NULL ; + } ; + } + else if (wq->tail == item) + { + /* Removing last, but not only item */ + assert(item->next == NULL) ; + assert((item->prev != NULL) && (wq->list_count > 1)) ; + + wq->tail = item->prev ; + wq->tail->next = NULL ; + } + else + { + /* Removing from somewhere in middle */ + assert(item->next != NULL) ; + assert((item->prev != NULL) && (wq->list_count > 2)) ; + + item->prev->next = item->next ; + item->next->prev = item->prev ; + } ; - /* call private data deletion callback if needed */ - if (wq->spec.del_item_data) - wq->spec.del_item_data (wq, item->data); + --wq->list_count ; + work_queue_item_free (wq, item); - list_delete_node (wq->items, ln); - work_queue_item_free (item); - return; } -static void -work_queue_item_requeue (struct work_queue *wq, struct listnode *ln) +static work_queue_item +work_queue_item_requeue (struct work_queue *wq, work_queue_item item) { - LISTNODE_DETACH (wq->items, ln); - LISTNODE_ATTACH (wq->items, ln); /* attach to end of list */ + work_queue_item next = item->next ; + work_queue_item last = wq->tail ; + + assert(last != NULL) ; + + if (last == item) + { + /* Requeuing last item -- easy ! */ + assert(next == NULL) ; + return item ; + } ; + + assert(next != NULL) ; + + if (wq->head == item) + { + /* Requeuing first, but not only item */ + assert(item->prev == NULL) ; + + wq->head = next ; + next->prev = NULL ; + } + else + { + /* Requeuing something in middle */ + assert(item->prev != NULL) ; + + item->prev->next = item->next ; + item->next->prev = item->prev ; + } ; + + item->next = NULL ; + item->prev = last ; + + last->next = item ; + wq->tail = item ; + + return next ; } DEFUN(show_work_queues, @@ -172,8 +250,8 @@ DEFUN(show_work_queues, { struct listnode *node; struct work_queue *wq; - - vty_out (vty, + + vty_out (vty, "%c %8s %5s %8s %21s%s", ' ', "List","(ms) ","Q. Runs","Cycle Counts ", VTY_NEWLINE); @@ -183,24 +261,24 @@ DEFUN(show_work_queues, "Items", "Hold", "Total", - "Best","Gran.","Avg.", - "Name", + "Best","Gran.","Avg.", + "Name", VTY_NEWLINE); - + for (ALL_LIST_ELEMENTS_RO ((&work_queues), node, wq)) { vty_out (vty,"%c %8d %5d %8ld %7d %6d %6u %s%s", (CHECK_FLAG (wq->flags, WQ_UNPLUGGED) ? ' ' : 'P'), - listcount (wq->items), + wq->list_count, wq->spec.hold, wq->runs, wq->cycles.best, wq->cycles.granularity, - (wq->runs) ? + (wq->runs) ? (unsigned int) (wq->cycles.total / wq->runs) : 0, wq->name, VTY_NEWLINE); } - + return CMD_SUCCESS; } @@ -212,9 +290,9 @@ work_queue_plug (struct work_queue *wq) { if (wq->thread) thread_cancel (wq->thread); - + wq->thread = NULL; - + UNSET_FLAG (wq->flags, WQ_UNPLUGGED); } @@ -232,22 +310,21 @@ work_queue_unplug (struct work_queue *wq) /* timer thread to process a work queue * will reschedule itself if required, - * otherwise work_queue_item_add + * otherwise work_queue_item_add */ int work_queue_run (struct thread *thread) { struct work_queue *wq; - struct work_queue_item *item; + work_queue_item next, item ; wq_item_status ret; unsigned int cycles = 0; - struct listnode *node, *nnode; char yielded = 0; wq = THREAD_ARG (thread); wq->thread = NULL; - assert (wq && wq->items); + assert (wq != NULL) ; /* calculate cycle granularity: * list iteration == 1 cycle @@ -258,38 +335,40 @@ work_queue_run (struct thread *thread) * * Best: starts low, can only increase * - * Granularity: starts at WORK_QUEUE_MIN_GRANULARITY, can be decreased - * if we run to end of time slot, can increase otherwise + * Granularity: starts at WORK_QUEUE_MIN_GRANULARITY, can be decreased + * if we run to end of time slot, can increase otherwise * by a small factor. * * We could use just the average and save some work, however we want to be * able to adjust quickly to CPU pressure. Average wont shift much if * daemon has been running a long time. */ - if (wq->cycles.granularity == 0) - wq->cycles.granularity = WORK_QUEUE_MIN_GRANULARITY; + if (wq->cycles.granularity == 0) + wq->cycles.granularity = WORK_QUEUE_MIN_GRANULARITY; - for (ALL_LIST_ELEMENTS (wq->items, node, nnode, item)) + next = wq->head ; + while (next != NULL) { - assert (item && item->data); - + item = next ; + next = item->next ; /* default next item */ + /* dont run items which are past their allowed retries */ if (item->ran > wq->spec.max_retries) { /* run error handler, if any */ - if (wq->spec.errorfunc) - wq->spec.errorfunc (wq, item->data); - work_queue_item_remove (wq, node); + if (wq->spec.errorfunc != NULL) + wq->spec.errorfunc (wq, item); + work_queue_item_remove (wq, item); continue; } /* run and take care of items that want to be retried immediately */ do { - ret = wq->spec.workfunc (wq, item->data); + ret = wq->spec.workfunc (wq, item); item->ran++; } - while ((ret == WQ_RETRY_NOW) + while ((ret == WQ_RETRY_NOW) && (item->ran < wq->spec.max_retries)); switch (ret) @@ -308,21 +387,21 @@ work_queue_run (struct thread *thread) case WQ_REQUEUE: { item->ran--; - work_queue_item_requeue (wq, node); + next = work_queue_item_requeue (wq, item); break; } case WQ_RETRY_NOW: /* a RETRY_NOW that gets here has exceeded max_tries, same as ERROR */ case WQ_ERROR: { - if (wq->spec.errorfunc) + if (wq->spec.errorfunc != NULL) wq->spec.errorfunc (wq, item); } /* fall through here is deliberate */ case WQ_SUCCESS: default: { - work_queue_item_remove (wq, node); + work_queue_item_remove (wq, item); break; } } @@ -331,7 +410,7 @@ work_queue_run (struct thread *thread) cycles++; /* test if we should yield */ - if ( !(cycles % wq->cycles.granularity) + if ( !(cycles % wq->cycles.granularity) && thread_should_yield (thread)) { yielded = 1; @@ -346,15 +425,15 @@ stats: /* we yielded, check whether granularity should be reduced */ if (yielded && (cycles < wq->cycles.granularity)) { - wq->cycles.granularity = ((cycles > 0) ? cycles + wq->cycles.granularity = ((cycles > 0) ? cycles : WORK_QUEUE_MIN_GRANULARITY); } - + if (cycles >= (wq->cycles.granularity)) { if (cycles > wq->cycles.best) wq->cycles.best = cycles; - + /* along with yielded check, provides hysteris for granularity */ if (cycles > (wq->cycles.granularity * WQ_HYSTERIS_FACTOR * 2)) wq->cycles.granularity *= WQ_HYSTERIS_FACTOR; /* quick ramp-up */ @@ -362,7 +441,7 @@ stats: wq->cycles.granularity += WQ_HYSTERIS_FACTOR; } #undef WQ_HYSTERIS_FACTOR - + wq->runs++; wq->cycles.total += cycles; @@ -370,12 +449,12 @@ stats: printf ("%s: cycles %d, new: best %d, worst %d\n", __func__, cycles, wq->cycles.best, wq->cycles.granularity); #endif - + /* Is the queue done yet? If it is, call the completion callback. */ - if (listcount (wq->items) > 0) + if (wq->head != NULL) work_queue_schedule (wq, 0); else if (wq->spec.completion_func) wq->spec.completion_func (wq); - + return 0; } diff --git a/lib/workqueue.h b/lib/workqueue.h index f59499a0..5d2f2da2 100644 --- a/lib/workqueue.h +++ b/lib/workqueue.h @@ -1,4 +1,4 @@ -/* +/* * Quagga Work Queues. * * Copyright (C) 2005 Sun Microsystems, Inc. @@ -18,14 +18,18 @@ * You should have received a copy of the GNU General Public License * along with Quagga; see the file COPYING. If not, write to the Free * Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA - * 02111-1307, USA. + * 02111-1307, USA. */ #ifndef _QUAGGA_WORK_QUEUE_H #define _QUAGGA_WORK_QUEUE_H +#ifndef Inline +#define Inline static inline +#endif + /* Hold time for the initial schedule of a queue run, in millisec */ -#define WORK_QUEUE_DEFAULT_HOLD 50 +#define WORK_QUEUE_DEFAULT_HOLD 50 /* action value, for use by item processor and item error handlers */ typedef enum @@ -40,12 +44,37 @@ typedef enum * the particular item.. */ } wq_item_status; +enum { wq_args_size_max = 24 } ; /* maximum size of union wq_args */ + +union wq_args +{ + void* data ; + char bytes[wq_args_size_max] ; /* empty space `*/ +} ; + +#define WQ_ARGS_SIZE_OK(s) CONFIRM(sizeof(struct s) <= wq_args_size_max) + /* A single work queue item, unsurprisingly */ +typedef struct work_queue_item* work_queue_item ; struct work_queue_item { - void *data; /* opaque data */ + union wq_args args ; /* cast as required */ + + struct work_queue_item* next ; /* the queue itself */ + struct work_queue_item* prev ; + unsigned short ran; /* # of times item has been run */ -}; +} ; + +/* work_queue_item structures are malloced. That guarantees maximum alignment. + * To guarantee maximum alignment for "struct args", it must be first item ! + * + * (The typedef is required to stop Eclipse (3.4.2 with CDT 5.0) whining + * about first argument of offsetof().) + */ +typedef struct work_queue_item work_queue_item_t ; +CONFIRM(offsetof(work_queue_item_t, args) == 0) ; + /* so guaranteed max alignment */ #define WQ_UNPLUGGED (1 << 0) /* available for draining */ @@ -57,52 +86,55 @@ struct work_queue struct thread_master *master; /* thread master */ struct thread *thread; /* thread, if one is active */ char *name; /* work queue name */ - + /* Specification for this work queue. * Public, must be set before use by caller. May be modified at will. */ struct { /* optional opaque user data, global to the queue. */ void *data; - + /* work function to process items with: * First argument is the workqueue queue. * Second argument is the item data */ - wq_item_status (*workfunc) (struct work_queue *, void *); + wq_item_status (*workfunc) (struct work_queue *, work_queue_item); /* error handling function, optional */ - void (*errorfunc) (struct work_queue *, struct work_queue_item *); - + void (*errorfunc) (struct work_queue *, work_queue_item); + /* callback to delete user specific item data */ - void (*del_item_data) (struct work_queue *, void *); - + void (*del_item_data) (struct work_queue *, work_queue_item); + /* completion callback, called when queue is emptied, optional */ void (*completion_func) (struct work_queue *); - + /* max number of retries to make for item that errors */ - unsigned int max_retries; + unsigned int max_retries; unsigned int hold; /* hold time for first run, in ms */ } spec; - + /* remaining fields should be opaque to users */ - struct list *items; /* queue item list */ - unsigned long runs; /* runs count */ - + work_queue_item head ; /* queue item list */ + work_queue_item tail ; + unsigned list_count ; + + unsigned long runs; /* runs count */ + struct { unsigned int best; unsigned int granularity; unsigned long total; } cycles; /* cycle counts */ - + /* private state */ u_int16_t flags; /* user set flag */ }; /* User API */ -/* create a new work queue, of given name. +/* create a new work queue, of given name. * user must fill in the spec of the returned work queue before adding * anything to it */ @@ -112,7 +144,10 @@ extern struct work_queue *work_queue_new (struct thread_master *, extern void work_queue_free (struct work_queue *); /* Add the supplied data as an item onto the workqueue */ -extern void work_queue_add (struct work_queue *, void *); +Inline void work_queue_add (struct work_queue *, void *); + +extern void* work_queue_item_add(struct work_queue* wq) ; +Inline void* work_queue_item_args(work_queue_item item) ; /* plug the queue, ie prevent it from being drained / processed */ extern void work_queue_plug (struct work_queue *wq); @@ -122,4 +157,22 @@ extern void work_queue_unplug (struct work_queue *wq); /* Helpers, exported for thread.c and command.c */ extern int work_queue_run (struct thread *); extern struct cmd_element show_work_queues_cmd; + +/*============================================================================== + * The Inline functions + */ + +Inline void work_queue_add (struct work_queue* wq, void* data) +{ + union wq_args* args = work_queue_item_add(wq) ; + args->data = data ; +} + +/* Return pointer to the args area in the given work queue item */ +Inline void* +work_queue_item_args(work_queue_item item) +{ + return &item->args ; +} ; + #endif /* _QUAGGA_WORK_QUEUE_H */ diff --git a/tests/heavy-wq.c b/tests/heavy-wq.c index 4cd499a5..bf3ab85a 100644 --- a/tests/heavy-wq.c +++ b/tests/heavy-wq.c @@ -81,15 +81,15 @@ heavy_wq_add (struct vty *vty, const char *str, int i) } static void -slow_func_err (struct work_queue *wq, struct work_queue_item *item) +slow_func_err (struct work_queue *wq, work_queue_item item) { printf ("%s: running error function\n", __func__); } static void -slow_func_del (struct work_queue *wq, void *data) +slow_func_del (struct work_queue *wq, work_queue_item item) { - struct heavy_wq_node *hn = data; + struct heavy_wq_node *hn = item->args.data; assert (hn && hn->str); printf ("%s: %s\n", __func__, hn->str); XFREE (MTYPE_PREFIX_LIST_STR, hn->str); @@ -98,9 +98,9 @@ slow_func_del (struct work_queue *wq, void *data) } static wq_item_status -slow_func (struct work_queue *wq, void *data) +slow_func (struct work_queue *wq, work_queue_item item) { - struct heavy_wq_node *hn = data; + struct heavy_wq_node *hn = item->args.data; double x = 1; int j; @@ -163,11 +163,11 @@ heavy_wq_init () return -1; } - heavy_wq->spec.workfunc = &slow_func; - heavy_wq->spec.errorfunc = &slow_func_err; + heavy_wq->spec.workfunc = &slow_func; + heavy_wq->spec.errorfunc = &slow_func_err; heavy_wq->spec.del_item_data = &slow_func_del; - heavy_wq->spec.max_retries = 3; - heavy_wq->spec.hold = 1000; + heavy_wq->spec.max_retries = 3; + heavy_wq->spec.hold = 1000; return 0; } diff --git a/zebra/zebra_rib.c b/zebra/zebra_rib.c index 12f3fa5a..0677cafd 100644 --- a/zebra/zebra_rib.c +++ b/zebra/zebra_rib.c @@ -16,7 +16,7 @@ * You should have received a copy of the GNU General Public License * along with GNU Zebra; see the file COPYING. If not, write to the Free * Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA - * 02111-1307, USA. + * 02111-1307, USA. */ #include <zebra.h> @@ -52,7 +52,7 @@ int rib_process_hold_time = 10; /* Each route type's string and default distance value. */ static const struct -{ +{ int key; int distance; } route_info[] = @@ -68,7 +68,7 @@ static const struct {ZEBRA_ROUTE_ISIS, 115}, {ZEBRA_ROUTE_BGP, 20 /* IBGP is 200. */} }; - + /* Vector for routing table. */ static vector vrf_vector; @@ -141,7 +141,7 @@ vrf_static_table (afi_t afi, safi_t safi, u_int32_t id) return vrf->stable[afi][safi]; } - + /* Add nexthop to the end of the list. */ static void nexthop_add (struct rib *rib, struct nexthop *nexthop) @@ -226,7 +226,7 @@ nexthop_ipv4_add (struct rib *rib, struct in_addr *ipv4, struct in_addr *src) } static struct nexthop * -nexthop_ipv4_ifindex_add (struct rib *rib, struct in_addr *ipv4, +nexthop_ipv4_ifindex_add (struct rib *rib, struct in_addr *ipv4, struct in_addr *src, unsigned int ifindex) { struct nexthop *nexthop; @@ -338,7 +338,7 @@ nexthop_active_ipv4 (struct rib *rib, struct nexthop *nexthop, int set, while (rn) { route_unlock_node (rn); - + /* If lookup self prefix return immediately. */ if (rn == top) return 0; @@ -354,7 +354,7 @@ nexthop_active_ipv4 (struct rib *rib, struct nexthop *nexthop, int set, /* If there is no selected route or matched route is EGP, go up tree. */ - if (! match + if (! match || match->type == ZEBRA_ROUTE_BGP) { do { @@ -371,7 +371,7 @@ nexthop_active_ipv4 (struct rib *rib, struct nexthop *nexthop, int set, newhop = match->nexthop; if (newhop && nexthop->type == NEXTHOP_TYPE_IPV4) nexthop->ifindex = newhop->ifindex; - + return 1; } else if (CHECK_FLAG (rib->flags, ZEBRA_FLAG_INTERNAL)) @@ -439,7 +439,7 @@ nexthop_active_ipv6 (struct rib *rib, struct nexthop *nexthop, int set, while (rn) { route_unlock_node (rn); - + /* If lookup self prefix return immediately. */ if (rn == top) return 0; @@ -473,7 +473,7 @@ nexthop_active_ipv6 (struct rib *rib, struct nexthop *nexthop, int set, if (newhop && nexthop->type == NEXTHOP_TYPE_IPV6) nexthop->ifindex = newhop->ifindex; - + return 1; } else if (CHECK_FLAG (rib->flags, ZEBRA_FLAG_INTERNAL)) @@ -534,7 +534,7 @@ rib_match_ipv4 (struct in_addr addr) while (rn) { route_unlock_node (rn); - + /* Pick up selected route. */ for (match = rn->info; match; match = match->next) { @@ -546,7 +546,7 @@ rib_match_ipv4 (struct in_addr addr) /* If there is no selected route or matched route is EGP, go up tree. */ - if (! match + if (! match || match->type == ZEBRA_ROUTE_BGP) { do { @@ -607,7 +607,7 @@ rib_lookup_ipv4 (struct prefix_ipv4 *p) if (match->type == ZEBRA_ROUTE_CONNECT) return match; - + for (nexthop = match->nexthop; nexthop; nexthop = nexthop->next) if (CHECK_FLAG (nexthop->flags, NEXTHOP_FLAG_FIB)) return match; @@ -665,7 +665,7 @@ rib_lookup_ipv4_route (struct prefix_ipv4 *p, union sockunion * qgate) if (match->type == ZEBRA_ROUTE_CONNECT) return ZEBRA_RIB_FOUND_CONNECTED; - + /* Ok, we have a cood candidate, let's check it's nexthop list... */ for (nexthop = match->nexthop; nexthop; nexthop = nexthop->next) if (CHECK_FLAG (nexthop->flags, NEXTHOP_FLAG_FIB)) @@ -716,7 +716,7 @@ rib_match_ipv6 (struct in6_addr *addr) while (rn) { route_unlock_node (rn); - + /* Pick up selected route. */ for (match = rn->info; match; match = match->next) { @@ -728,7 +728,7 @@ rib_match_ipv6 (struct in6_addr *addr) /* If there is no selected route or matched route is EGP, go up tree. */ - if (! match + if (! match || match->type == ZEBRA_ROUTE_BGP) { do { @@ -900,7 +900,7 @@ nexthop_active_update (struct route_node *rn, struct rib *rib, int set) return rib->nexthop_active_num; } - + static void rib_install_kernel (struct route_node *rn, struct rib *rib) @@ -980,9 +980,9 @@ rib_process (struct route_node *rn) int installed = 0; struct nexthop *nexthop = NULL; char buf[INET6_ADDRSTRLEN]; - + assert (rn); - + if (IS_ZEBRA_DEBUG_RIB || IS_ZEBRA_DEBUG_RIB_Q) inet_ntop (rn->p.family, &rn->p.u.prefix, buf, INET6_ADDRSTRLEN); @@ -992,14 +992,14 @@ rib_process (struct route_node *rn) * may be passed to rib_unlink() in the middle of iteration. */ next = rib->next; - + /* Currently installed rib. */ if (CHECK_FLAG (rib->flags, ZEBRA_FLAG_SELECTED)) { assert (fib == NULL); fib = rib; } - + /* Unlock removed routes, so they'll be freed, bar the FIB entry, * which we need to do do further work with below. */ @@ -1014,10 +1014,10 @@ rib_process (struct route_node *rn) } else del = rib; - + continue; } - + /* Skip unreachable nexthop. */ if (! nexthop_active_update (rn, rib, 0)) continue; @@ -1032,14 +1032,14 @@ rib_process (struct route_node *rn) select = rib; continue; } - + /* filter route selection in following order: * - connected beats other types * - lower distance beats higher * - lower metric beats higher for equal distance * - last, hence oldest, route wins tie break. */ - + /* Connected routes. Pick the last connected * route of the set of lowest metric connected routes. */ @@ -1052,18 +1052,18 @@ rib_process (struct route_node *rn) } else if (select->type == ZEBRA_ROUTE_CONNECT) continue; - + /* higher distance loses */ if (rib->distance > select->distance) continue; - + /* lower wins */ if (rib->distance < select->distance) { select = rib; continue; } - + /* metric tie-breaks equal distance */ if (rib->metric <= select->metric) select = rib; @@ -1090,14 +1090,14 @@ rib_process (struct route_node *rn) /* Set real nexthop. */ nexthop_active_update (rn, select, 1); - + if (! RIB_SYSTEM_ROUTE (select)) rib_install_kernel (rn, select); redistribute_add (&rn->p, select); } else if (! RIB_SYSTEM_ROUTE (select)) { - /* Housekeeping code to deal with + /* Housekeeping code to deal with race conditions in kernel with linux netlink reporting interface up before IPv4 or IPv6 protocol is ready to add routes. @@ -1110,7 +1110,7 @@ rib_process (struct route_node *rn) installed = 1; break; } - if (! installed) + if (! installed) rib_install_kernel (rn, select); } goto end; @@ -1167,7 +1167,7 @@ end: } /* Take a list of route_node structs and return 1, if there was a record - * picked from it and processed by rib_process(). Don't process more, + * picked from it and processed by rib_process(). Don't process more, * than one RN record; operate only in the specified sub-queue. */ static unsigned int @@ -1202,9 +1202,9 @@ process_subq (struct list * subq, u_char qindex) * is pointed to the meta queue structure. */ static wq_item_status -meta_queue_process (struct work_queue *dummy, void *data) +meta_queue_process (struct work_queue *dummy, work_queue_item item) { - struct meta_queue * mq = data; + struct meta_queue * mq = item->args.data ; unsigned i; for (i = 0; i < MQ_SIZE; i++) @@ -1271,7 +1271,7 @@ rib_meta_queue_add (struct meta_queue *mq, struct route_node *rn) static void rib_queue_add (struct zebra_t *zebra, struct route_node *rn) { - + if (IS_ZEBRA_DEBUG_RIB_Q) { char buf[INET6_ADDRSTRLEN]; @@ -1289,7 +1289,7 @@ rib_queue_add (struct zebra_t *zebra, struct route_node *rn) * holder, if necessary, then push the work into it in any case. * This semantics was introduced after 0.99.9 release. */ - if (!zebra->ribq->items->count) + if (zebra->ribq->head == NULL) work_queue_add (zebra->ribq, zebra->mq); rib_meta_queue_add (zebra->mq, rn); @@ -1320,7 +1320,7 @@ meta_queue_new (void) static void rib_queue_init (struct zebra_t *zebra) { - if (! (zebra->ribq = work_queue_new (zebra->master, + if (! (zebra->ribq = work_queue_new (zebra->master, "route_node processing"))) { zlog_err ("%s: could not initialise work queue!", __func__); @@ -1328,12 +1328,13 @@ rib_queue_init (struct zebra_t *zebra) } /* fill in the work queue spec */ - zebra->ribq->spec.workfunc = &meta_queue_process; - zebra->ribq->spec.errorfunc = NULL; + zebra->ribq->spec.workfunc = &meta_queue_process; + zebra->ribq->spec.errorfunc = NULL; + zebra->ribq->spec.del_item_data = NULL ; /* XXX: TODO: These should be runtime configurable via vty */ zebra->ribq->spec.max_retries = 3; zebra->ribq->spec.hold = rib_process_hold_time; - + if (!(zebra->mq = meta_queue_new ())) zlog_err ("%s: could not initialise meta queue!", __func__); } @@ -1365,7 +1366,7 @@ rib_queue_init (struct zebra_t *zebra) * state must be preserved as and when the head RIB entry of a * route_node is changed by rib_unlink / rib_link. A small complication, * but saves having to allocate a dedicated object for this. - * + * * Refcounting (aka "locking" throughout the GNU Zebra and Quagga code): * * - route_nodes: refcounted by: @@ -1375,16 +1376,16 @@ rib_queue_init (struct zebra_t *zebra) * - managed by: rib_addqueue, rib_process. * */ - + /* Add RIB to head of the route node. */ static void rib_link (struct route_node *rn, struct rib *rib) { struct rib *head; char buf[INET6_ADDRSTRLEN]; - + assert (rib && rn); - + route_lock_node (rn); /* rn route table reference */ if (IS_ZEBRA_DEBUG_RIB) @@ -1412,8 +1413,8 @@ rib_link (struct route_node *rn, struct rib *rib) static void rib_addnode (struct route_node *rn, struct rib *rib) { - /* RIB node has been un-removed before route-node is processed. - * route_node must hence already be on the queue for processing.. + /* RIB node has been un-removed before route-node is processed. + * route_node must hence already be on the queue for processing.. */ if (CHECK_FLAG (rib->status, RIB_ENTRY_REMOVED)) { @@ -1453,7 +1454,7 @@ rib_unlink (struct route_node *rn, struct rib *rib) else { rn->info = rib->next; - + if (rn->info) { if (IS_ZEBRA_DEBUG_RIB) @@ -1489,7 +1490,7 @@ rib_delnode (struct route_node *rn, struct rib *rib) } int -rib_add_ipv4 (int type, int flags, struct prefix_ipv4 *p, +rib_add_ipv4 (int type, int flags, struct prefix_ipv4 *p, struct in_addr *gate, struct in_addr *src, unsigned int ifindex, u_int32_t vrf_id, u_int32_t metric, u_char distance) @@ -1527,7 +1528,7 @@ rib_add_ipv4 (int type, int flags, struct prefix_ipv4 *p, { if (CHECK_FLAG (rib->status, RIB_ENTRY_REMOVED)) continue; - + if (rib->type != type) continue; if (rib->type != ZEBRA_ROUTE_CONNECT) @@ -1576,7 +1577,7 @@ rib_add_ipv4 (int type, int flags, struct prefix_ipv4 *p, if (IS_ZEBRA_DEBUG_RIB) zlog_debug ("%s: calling rib_addnode (%p, %p)", __func__, rn, rib); rib_addnode (rn, rib); - + /* Free implicit route.*/ if (same) { @@ -1584,7 +1585,7 @@ rib_add_ipv4 (int type, int flags, struct prefix_ipv4 *p, zlog_debug ("%s: calling rib_delnode (%p, %p)", __func__, rn, rib); rib_delnode (rn, same); } - + route_unlock_node (rn); return 0; } @@ -1753,7 +1754,7 @@ rib_add_ipv4_multipath (struct prefix_ipv4 *p, struct rib *rib) struct route_node *rn; struct rib *same; struct nexthop *nexthop; - + /* Lookup table. */ table = vrf_table (AFI_IP, SAFI_UNICAST, 0); if (! table) @@ -1767,7 +1768,7 @@ rib_add_ipv4_multipath (struct prefix_ipv4 *p, struct rib *rib) rib->distance = route_info[rib->type].distance; /* iBGP distance is 200. */ - if (rib->type == ZEBRA_ROUTE_BGP + if (rib->type == ZEBRA_ROUTE_BGP && CHECK_FLAG (rib->flags, ZEBRA_FLAG_IBGP)) rib->distance = 200; } @@ -1781,12 +1782,12 @@ rib_add_ipv4_multipath (struct prefix_ipv4 *p, struct rib *rib) { if (CHECK_FLAG (same->status, RIB_ENTRY_REMOVED)) continue; - + if (same->type == rib->type && same->table == rib->table && same->type != ZEBRA_ROUTE_CONNECT) break; } - + /* If this route is kernel route, set FIB flag to the route. */ if (rib->type == ZEBRA_ROUTE_KERNEL || rib->type == ZEBRA_ROUTE_CONNECT) for (nexthop = rib->nexthop; nexthop; nexthop = nexthop->next) @@ -1812,7 +1813,7 @@ rib_add_ipv4_multipath (struct prefix_ipv4 *p, struct rib *rib) } rib_delnode (rn, same); } - + route_unlock_node (rn); return 0; } @@ -1842,8 +1843,8 @@ rib_delete_ipv4 (int type, int flags, struct prefix_ipv4 *p, if (IS_ZEBRA_DEBUG_KERNEL && gate) zlog_debug ("rib_delete_ipv4(): route delete %s/%d via %s ifindex %d", inet_ntop (AF_INET, &p->prefix, buf1, INET_ADDRSTRLEN), - p->prefixlen, - inet_ntoa (*gate), + p->prefixlen, + inet_ntoa (*gate), ifindex); /* Lookup route node. */ @@ -1895,7 +1896,7 @@ rib_delete_ipv4 (int type, int flags, struct prefix_ipv4 *p, else if (gate == NULL || ((nexthop = rib->nexthop) && (IPV4_ADDR_SAME (&nexthop->gate.ipv4, gate) || - IPV4_ADDR_SAME (&nexthop->rgate.ipv4, gate)))) + IPV4_ADDR_SAME (&nexthop->rgate.ipv4, gate)))) { same = rib; break; @@ -1936,14 +1937,14 @@ rib_delete_ipv4 (int type, int flags, struct prefix_ipv4 *p, return ZEBRA_ERR_RTNOEXIST; } } - + if (same) rib_delnode (rn, same); - + route_unlock_node (rn); return 0; } - + /* Install static route into rib. */ static void static_install_ipv4 (struct prefix *p, struct static_ipv4 *si) @@ -1963,7 +1964,7 @@ static_install_ipv4 (struct prefix *p, struct static_ipv4 *si) { if (CHECK_FLAG (rib->status, RIB_ENTRY_REMOVED)) continue; - + if (rib->type == ZEBRA_ROUTE_STATIC && rib->distance == si->distance) break; } @@ -1991,7 +1992,7 @@ static_install_ipv4 (struct prefix *p, struct static_ipv4 *si) { /* This is new static route. */ rib = XCALLOC (MTYPE_RIB, sizeof (struct rib)); - + rib->type = ZEBRA_ROUTE_STATIC; rib->distance = si->distance; rib->metric = 0; @@ -2048,7 +2049,7 @@ static_uninstall_ipv4 (struct prefix *p, struct static_ipv4 *si) table = vrf_table (AFI_IP, SAFI_UNICAST, 0); if (! table) return; - + /* Lookup existing route with type and distance. */ rn = route_node_lookup (table, p); if (! rn) @@ -2080,7 +2081,7 @@ static_uninstall_ipv4 (struct prefix *p, struct static_ipv4 *si) route_unlock_node (rn); return; } - + /* Check nexthop. */ if (rib->nexthop_num == 1) rib_delnode (rn, rib); @@ -2113,7 +2114,7 @@ static_add_ipv4 (struct prefix *p, struct in_addr *gate, const char *ifname, stable = vrf_static_table (AFI_IP, SAFI_UNICAST, vrf_id); if (! stable) return -1; - + /* Lookup static route prefix. */ rn = route_node_get (stable, p); @@ -2244,7 +2245,7 @@ static_delete_ipv4 (struct prefix *p, struct in_addr *gate, const char *ifname, if (si->next) si->next->prev = si->prev; route_unlock_node (rn); - + /* Free static route configuration. */ if (ifname) XFREE (0, si->gate.ifname); @@ -2255,7 +2256,7 @@ static_delete_ipv4 (struct prefix *p, struct in_addr *gate, const char *ifname, return 1; } - + #ifdef HAVE_IPV6 static int rib_bogus_ipv6 (int type, struct prefix_ipv6 *p, @@ -2300,7 +2301,7 @@ rib_add_ipv6 (int type, int flags, struct prefix_ipv6 *p, /* Set default distance by route type. */ if (!distance) distance = route_info[type].distance; - + if (type == ZEBRA_ROUTE_BGP && CHECK_FLAG (flags, ZEBRA_FLAG_IBGP)) distance = 200; @@ -2336,7 +2337,7 @@ rib_add_ipv6 (int type, int flags, struct prefix_ipv6 *p, /* Allocate new rib structure. */ rib = XCALLOC (MTYPE_RIB, sizeof (struct rib)); - + rib->type = type; rib->distance = distance; rib->flags = flags; @@ -2367,7 +2368,7 @@ rib_add_ipv6 (int type, int flags, struct prefix_ipv6 *p, /* Free implicit route.*/ if (same) rib_delnode (rn, same); - + route_unlock_node (rn); return 0; } @@ -2393,7 +2394,7 @@ rib_delete_ipv6 (int type, int flags, struct prefix_ipv6 *p, table = vrf_table (AFI_IP6, SAFI_UNICAST, 0); if (! table) return 0; - + /* Lookup route node. */ rn = route_node_lookup (table, (struct prefix *) p); if (! rn) @@ -2487,11 +2488,11 @@ rib_delete_ipv6 (int type, int flags, struct prefix_ipv6 *p, if (same) rib_delnode (rn, same); - + route_unlock_node (rn); return 0; } - + /* Install static route into rib. */ static void static_install_ipv6 (struct prefix *p, struct static_ipv6 *si) @@ -2540,7 +2541,7 @@ static_install_ipv6 (struct prefix *p, struct static_ipv6 *si) { /* This is new static route. */ rib = XCALLOC (MTYPE_RIB, sizeof (struct rib)); - + rib->type = ZEBRA_ROUTE_STATIC; rib->distance = si->distance; rib->metric = 0; @@ -2608,7 +2609,7 @@ static_uninstall_ipv6 (struct prefix *p, struct static_ipv6 *si) { if (CHECK_FLAG (rib->status, RIB_ENTRY_REMOVED)) continue; - + if (rib->type == ZEBRA_ROUTE_STATIC && rib->distance == si->distance) break; } @@ -2630,7 +2631,7 @@ static_uninstall_ipv6 (struct prefix *p, struct static_ipv6 *si) route_unlock_node (rn); return; } - + /* Check nexthop. */ if (rib->nexthop_num == 1) { @@ -2664,12 +2665,12 @@ static_add_ipv6 (struct prefix *p, u_char type, struct in6_addr *gate, stable = vrf_static_table (AFI_IP6, SAFI_UNICAST, vrf_id); if (! stable) return -1; - + if (!gate && (type == STATIC_IPV6_GATEWAY || type == STATIC_IPV6_GATEWAY_IFNAME)) return -1; - - if (!ifname && + + if (!ifname && (type == STATIC_IPV6_GATEWAY_IFNAME || type == STATIC_IPV6_IFNAME)) return -1; @@ -2679,7 +2680,7 @@ static_add_ipv6 (struct prefix *p, u_char type, struct in6_addr *gate, /* Do nothing if there is a same static route. */ for (si = rn->info; si; si = si->next) { - if (distance == si->distance + if (distance == si->distance && type == si->type && (! gate || IPV6_ADDR_SAME (gate, &si->ipv6)) && (! ifname || strcmp (ifname, si->ifname) == 0)) @@ -2757,7 +2758,7 @@ static_delete_ipv6 (struct prefix *p, u_char type, struct in6_addr *gate, /* Find same static route is the tree */ for (si = rn->info; si; si = si->next) - if (distance == si->distance + if (distance == si->distance && type == si->type && (! gate || IPV6_ADDR_SAME (gate, &si->ipv6)) && (! ifname || strcmp (ifname, si->ifname) == 0)) @@ -2780,7 +2781,7 @@ static_delete_ipv6 (struct prefix *p, u_char type, struct in6_addr *gate, rn->info = si->next; if (si->next) si->next->prev = si->prev; - + /* Free static route configuration. */ if (ifname) XFREE (0, si->ifname); @@ -2789,14 +2790,14 @@ static_delete_ipv6 (struct prefix *p, u_char type, struct in6_addr *gate, return 1; } #endif /* HAVE_IPV6 */ - + /* RIB update function. */ void rib_update (void) { struct route_node *rn; struct route_table *table; - + table = vrf_table (AFI_IP, SAFI_UNICAST, 0); if (table) for (rn = route_top (table); rn; rn = route_next (rn)) @@ -2810,7 +2811,7 @@ rib_update (void) rib_queue_add (&zebrad, rn); } - + /* Remove all routes which comes from non main table. */ static void rib_weed_table (struct route_table *table) @@ -2841,7 +2842,7 @@ rib_weed_tables (void) rib_weed_table (vrf_table (AFI_IP, SAFI_UNICAST, 0)); rib_weed_table (vrf_table (AFI_IP6, SAFI_UNICAST, 0)); } - + /* Delete self installed routes after zebra is relaunched. */ static void rib_sweep_table (struct route_table *table) @@ -2860,7 +2861,7 @@ rib_sweep_table (struct route_table *table) if (CHECK_FLAG (rib->status, RIB_ENTRY_REMOVED)) continue; - if (rib->type == ZEBRA_ROUTE_KERNEL && + if (rib->type == ZEBRA_ROUTE_KERNEL && CHECK_FLAG (rib->flags, ZEBRA_FLAG_SELFROUTE)) { ret = rib_uninstall_kernel (rn, rib); @@ -2877,7 +2878,7 @@ rib_sweep_route (void) rib_sweep_table (vrf_table (AFI_IP, SAFI_UNICAST, 0)); rib_sweep_table (vrf_table (AFI_IP6, SAFI_UNICAST, 0)); } - + /* Close RIB and clean up kernel routes. */ static void rib_close_table (struct route_table *table) @@ -2902,7 +2903,7 @@ rib_close (void) rib_close_table (vrf_table (AFI_IP, SAFI_UNICAST, 0)); rib_close_table (vrf_table (AFI_IP6, SAFI_UNICAST, 0)); } - + /* Routing information base initialize. */ void rib_init (void) |