summaryrefslogtreecommitdiffstats
path: root/bgpd/bgpd.c
diff options
context:
space:
mode:
authorPaul Jakma <paul.jakma@sun.com>2006-09-14 02:58:49 +0000
committerPaul Jakma <paul.jakma@sun.com>2006-09-14 02:58:49 +0000
commitca058a30b1ea57f83871ab4cf1c9a91ea4064d52 (patch)
treeab38ab59bad607c9b41a093cb8b35bec766f30b3 /bgpd/bgpd.c
parent2815e61ffbbf9c362896f3912d925cf78e125ee1 (diff)
downloadquagga-ca058a30b1ea57f83871ab4cf1c9a91ea4064d52.tar.bz2
quagga-ca058a30b1ea57f83871ab4cf1c9a91ea4064d52.tar.xz
[bgpd] Fix 0.99 shutdown regression, introduce Clearing and Deleted states
2006-09-14 Paul Jakma <paul.jakma@sun.com> * (general) Fix some niggly issues around 'shutdown' and clearing by adding a Clearing FSM wait-state and a hidden 'Deleted' FSM state, to allow deleted peers to 'cool off' and hit 0 references. This introduces a slow memory leak of struct peer, however that's more a testament to the fragility of the reference counting than a bug in this patch, cleanup of reference counting to fix this is to follow. * bgpd.h: Add Clearing, Deleted states and Clearing_Completed and event. * bgp_debug.c: (bgp_status_msg[]) Add strings for Clearing and Deleted. * bgp_fsm.h: Don't allow timer/event threads to set anything for Deleted peers. * bgp_fsm.c: (bgp_timer_set) Add Clearing and Deleted. Deleted needs to stop everything. (bgp_stop) Remove explicit fsm_change_status call, the general framework handles the transition. (bgp_start) Log a warning if a start is attempted on a peer that should stay down, trying to start a peer. (struct .. FSM) Add Clearing_Completed events, has little influence except when in state Clearing to signal wait-state can end. Add Clearing and Deleted states, former is a wait-state, latter is a placeholder state to allow peers to disappear quietly once refcounts settle. (bgp_event) Try reduce verbosity of FSM state-change debug, changes to same state are not interesting (Established->Established) Allow NULL action functions in FSM. * bgp_packet.c: (bgp_write) Use FSM events, rather than trying to twiddle directly with FSM state behind the back of FSM. (bgp_write_notify) ditto. (bgp_read) Remove the vague ACCEPT_PEER peer_unlock, or else this patch crashes, now it leaks instead. * bgp_route.c: (bgp_clear_node_complete) Clearing_Completed event, to end clearing. (bgp_clear_route) See extensive comments. * bgpd.c: (peer_free) should only be called while in Deleted, peer refcounting controls when peer_free is called. bgp_sync_delete should be here, not in peer_delete. (peer_delete) Initiate delete. Transition to Deleted state manually. When removing peer from indices that provide visibility of it, take great care to be idempotent wrt the reference counting of struct peer through those indices. Use bgp_timer_set, rather than replicating. Call to bgp_sync_delete isn't appropriate here, sync can be referenced while shutting down and finishing deletion. (peer_group_bind) Take care to be idempotent wrt list references indexing peers.
Diffstat (limited to 'bgpd/bgpd.c')
-rw-r--r--bgpd/bgpd.c85
1 files changed, 56 insertions, 29 deletions
diff --git a/bgpd/bgpd.c b/bgpd/bgpd.c
index 8ed598d2..733b33a6 100644
--- a/bgpd/bgpd.c
+++ b/bgpd/bgpd.c
@@ -687,6 +687,15 @@ peer_sort (struct peer *peer)
static inline void
peer_free (struct peer *peer)
{
+ assert (peer->status == Deleted);
+
+ /* this /ought/ to have been done already through bgp_stop earlier,
+ * but just to be sure..
+ */
+ bgp_timer_set (peer);
+ BGP_READ_OFF (peer->t_read);
+ BGP_WRITE_OFF (peer->t_write);
+
if (peer->desc)
XFREE (MTYPE_PEER_DESC, peer->desc);
@@ -704,6 +713,7 @@ peer_free (struct peer *peer)
if (peer->clear_node_queue)
work_queue_free (peer->clear_node_queue);
+ bgp_sync_delete (peer);
memset (peer, 0, sizeof (struct peer));
XFREE (MTYPE_BGP_PEER, peer);
@@ -714,7 +724,8 @@ struct peer *
peer_lock (struct peer *peer)
{
assert (peer && (peer->lock >= 0));
-
+ assert (peer->status != Deleted);
+
peer->lock++;
return peer;
@@ -761,18 +772,17 @@ peer_new ()
struct servent *sp;
/* Allocate new peer. */
- peer = XMALLOC (MTYPE_BGP_PEER, sizeof (struct peer));
- memset (peer, 0, sizeof (struct peer));
+ peer = XCALLOC (MTYPE_BGP_PEER, sizeof (struct peer));
/* Set default value. */
peer->fd = -1;
- peer->lock = 1;
peer->v_start = BGP_INIT_START_TIMER;
peer->v_connect = BGP_DEFAULT_CONNECT_RETRY;
peer->v_asorig = BGP_DEFAULT_ASORIGINATE;
peer->status = Idle;
peer->ostatus = Idle;
peer->weight = 0;
+ peer = peer_lock (peer); /* initial reference */
/* Set default flags. */
for (afi = AFI_IP; afi < AFI_MAX; afi++)
@@ -1139,7 +1149,17 @@ peer_nsf_stop (struct peer *peer)
bgp_clear_route_all (peer);
}
-/* Delete peer from confguration. */
+/* Delete peer from confguration.
+ *
+ * The peer is moved to a dead-end "Deleted" neighbour-state, to allow
+ * it to "cool off" and refcounts to hit 0, at which state it is freed.
+ *
+ * This function /should/ take care to be idempotent, to guard against
+ * it being called multiple times through stray events that come in
+ * that happen to result in this function being called again. That
+ * said, getting here for a "Deleted" peer is a bug in the neighbour
+ * FSM.
+ */
int
peer_delete (struct peer *peer)
{
@@ -1149,6 +1169,8 @@ peer_delete (struct peer *peer)
struct bgp *bgp;
struct bgp_filter *filter;
+ assert (peer->status != Deleted);
+
bgp = peer->bgp;
if (CHECK_FLAG (peer->sflags, PEER_STATUS_NSF_WAIT))
@@ -1158,8 +1180,13 @@ peer_delete (struct peer *peer)
relationship. */
if (peer->group)
{
- peer = peer_unlock (peer); /* peer-group reference */
- listnode_delete (peer->group->peer, peer);
+ struct listnode *pn;
+
+ if ((pn = listnode_lookup (peer->group->peer, peer)))
+ {
+ peer = peer_unlock (peer); /* group->peer list reference */
+ list_delete_node (peer->group->peer, pn);
+ }
peer->group = NULL;
}
@@ -1169,29 +1196,25 @@ peer_delete (struct peer *peer)
*/
peer->last_reset = PEER_DOWN_NEIGHBOR_DELETE;
bgp_stop (peer);
- bgp_fsm_change_status (peer, Idle); /* stops all timers */
+ bgp_fsm_change_status (peer, Deleted);
+ bgp_timer_set (peer); /* stops all timers for Deleted */
- /* Stop all timers - should already have been done in bgp_stop */
- BGP_TIMER_OFF (peer->t_start);
- BGP_TIMER_OFF (peer->t_connect);
- BGP_TIMER_OFF (peer->t_holdtime);
- BGP_TIMER_OFF (peer->t_keepalive);
- BGP_TIMER_OFF (peer->t_asorig);
- BGP_TIMER_OFF (peer->t_routeadv);
- BGP_TIMER_OFF (peer->t_pmax_restart);
- BGP_TIMER_OFF (peer->t_gr_restart);
- BGP_TIMER_OFF (peer->t_gr_stale);
-
/* Delete from all peer list. */
if (! CHECK_FLAG (peer->sflags, PEER_STATUS_GROUP))
{
- peer_unlock (peer); /* bgp peer list reference */
- listnode_delete (bgp->peer, peer);
+ struct listnode *pn;
+
+ if ((pn = listnode_lookup (bgp->peer, peer)))
+ {
+ peer_unlock (peer); /* bgp peer list reference */
+ list_delete_node (bgp->peer, pn);
+ }
- if (peer_rsclient_active (peer))
+ if (peer_rsclient_active (peer)
+ && (pn = listnode_lookup (bgp->rsclient, peer)))
{
peer_unlock (peer); /* rsclient list reference */
- listnode_delete (bgp->rsclient, peer);
+ list_delete_node (bgp->rsclient, pn);
}
}
@@ -1219,8 +1242,6 @@ peer_delete (struct peer *peer)
sockunion_free (peer->su_remote);
peer->su_local = peer->su_remote = NULL;
- bgp_sync_delete (peer);
-
/* Free filter related memory. */
for (afi = AFI_IP; afi < AFI_MAX; afi++)
for (safi = SAFI_UNICAST; safi < SAFI_MAX; safi++)
@@ -1692,7 +1713,7 @@ peer_group_bind (struct bgp *bgp, union sockunion *su,
peer->group = group;
peer->af_group[afi][safi] = 1;
- peer = peer_lock (peer); /* peer-group group->peer reference */
+ peer = peer_lock (peer); /* group->peer list reference */
listnode_add (group->peer, peer);
peer_group2peer_config_copy (group, peer, afi, safi);
@@ -1733,9 +1754,11 @@ peer_group_bind (struct bgp *bgp, union sockunion *su,
{
peer->group = group;
- peer = peer_lock (peer); /* peer-group group->peer reference */
+ peer = peer_lock (peer); /* group->peer list reference */
listnode_add (group->peer, peer);
}
+ else
+ assert (group && peer->group == group);
if (first_member)
{
@@ -1759,13 +1782,16 @@ peer_group_bind (struct bgp *bgp, union sockunion *su,
if (CHECK_FLAG(peer->af_flags[afi][safi], PEER_FLAG_RSERVER_CLIENT))
{
+ struct listnode *pn;
+
/* If it's not configured as RSERVER_CLIENT in any other address
family, without being member of a peer_group, remove it from
list bgp->rsclient.*/
- if (! peer_rsclient_active (peer))
+ if (! peer_rsclient_active (peer)
+ && (pn = listnode_lookup (bgp->rsclient, peer)))
{
peer_unlock (peer); /* peer rsclient reference */
- listnode_delete (bgp->rsclient, peer);
+ list_delete_node (bgp->rsclient, pn);
}
bgp_table_finish (peer->rib[afi][safi]);
@@ -1821,6 +1847,7 @@ peer_group_unbind (struct bgp *bgp, struct peer *peer,
if (! peer_group_active (peer))
{
+ assert (listnode_lookup (group->peer, peer));
peer_unlock (peer); /* peer group list reference */
listnode_delete (group->peer, peer);
peer->group = NULL;