summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid Lamparter <equinox@diac24.net>2010-02-04 03:00:46 +0100
committerDavid Lamparter <equinox@diac24.net>2010-02-04 03:30:17 +0100
commitf4b356490dc03d3758b50da11b1548254fb56138 (patch)
treeead2e56720973f2f1c7eebe0ae032f2c826bc328
parent7d5fe54553c735a12e3fc40c3d9f0e59c79a33c6 (diff)
parent2442cdc3064f87860972ce6397fbaab52e200a00 (diff)
downloadquagga-f4b356490dc03d3758b50da11b1548254fb56138.tar.bz2
quagga-f4b356490dc03d3758b50da11b1548254fb56138.tar.xz
Merge branch 'patches/sh_bgp-write-cork' into dn42
Conflicts: lib/sockunion.c
-rw-r--r--bgpd/bgp_fsm.c2
-rw-r--r--bgpd/bgp_network.c1
-rw-r--r--bgpd/bgp_packet.c80
-rw-r--r--bgpd/bgpd.c3
-rw-r--r--lib/sockunion.c10
-rw-r--r--lib/sockunion.h1
-rw-r--r--lib/stream.c26
-rw-r--r--lib/stream.h4
-rw-r--r--lib/thread.c4
-rw-r--r--lib/thread.h1
10 files changed, 50 insertions, 82 deletions
diff --git a/bgpd/bgp_fsm.c b/bgpd/bgp_fsm.c
index 487ebddb..e18fc8cd 100644
--- a/bgpd/bgp_fsm.c
+++ b/bgpd/bgp_fsm.c
@@ -704,7 +704,7 @@ bgp_start (struct peer *peer)
peer->fd);
return -1;
}
- BGP_READ_ON (peer->t_read, bgp_read, peer->fd);
+
BGP_WRITE_ON (peer->t_write, bgp_write, peer->fd);
break;
}
diff --git a/bgpd/bgp_network.c b/bgpd/bgp_network.c
index be01bbd6..83b1142e 100644
--- a/bgpd/bgp_network.c
+++ b/bgpd/bgp_network.c
@@ -150,6 +150,7 @@ bgp_accept (struct thread *thread)
zlog_err ("[Error] BGP socket accept failed (%s)", safe_strerror (errno));
return -1;
}
+ set_nonblocking (bgp_sock);
if (BGP_DEBUG (events, EVENTS))
zlog_debug ("[Event] BGP connection from host %s", inet_sutop (&su, buf));
diff --git a/bgpd/bgp_packet.c b/bgpd/bgp_packet.c
index b29bc1f0..1e2f95e3 100644
--- a/bgpd/bgp_packet.c
+++ b/bgpd/bgp_packet.c
@@ -107,8 +107,7 @@ bgp_connect_check (struct peer *peer)
socklen_t slen;
int ret;
- /* Anyway I have to reset read and write thread. */
- BGP_READ_OFF (peer->t_read);
+ /* Anyway I have to reset write thread. */
BGP_WRITE_OFF (peer->t_write);
/* Check file descriptor. */
@@ -597,7 +596,6 @@ bgp_write (struct thread *thread)
struct stream *s;
int num;
unsigned int count = 0;
- int write_errno;
/* Yes first of all get peer pointer. */
peer = THREAD_ARG (thread);
@@ -610,46 +608,37 @@ bgp_write (struct thread *thread)
return 0;
}
- /* Nonblocking write until TCP output buffer is full. */
- while (1)
+ s = bgp_write_packet (peer);
+ if (!s)
+ return 0; /* nothing to send */
+
+ sockopt_cork (peer->fd, 1);
+
+ /* Nonblocking write until TCP output buffer is full. */
+ do
{
int writenum;
- int val;
-
- s = bgp_write_packet (peer);
- if (! s)
- return 0;
-
- /* XXX: FIXME, the socket should be NONBLOCK from the start
- * status shouldnt need to be toggled on each write
- */
- val = fcntl (peer->fd, F_GETFL, 0);
- fcntl (peer->fd, F_SETFL, val|O_NONBLOCK);
/* Number of bytes to be sent. */
writenum = stream_get_endp (s) - stream_get_getp (s);
/* Call write() system call. */
num = write (peer->fd, STREAM_PNT (s), writenum);
- write_errno = errno;
- fcntl (peer->fd, F_SETFL, val);
- if (num <= 0)
+ if (num < 0)
{
- /* Partial write. */
- if (write_errno == EWOULDBLOCK || write_errno == EAGAIN)
- break;
+ /* write failed either retry needed or error */
+ if (ERRNO_IO_RETRY(errno))
+ break;
BGP_EVENT_ADD (peer, TCP_fatal_error);
return 0;
}
+
if (num != writenum)
{
+ /* Partial write */
stream_forward_getp (s, num);
-
- if (write_errno == EAGAIN)
- break;
-
- continue;
+ break;
}
/* Retrieve BGP packet type. */
@@ -690,13 +679,14 @@ bgp_write (struct thread *thread)
/* OK we send packet so delete it. */
bgp_packet_delete (peer);
-
- if (++count >= BGP_WRITE_PACKET_MAX)
- break;
}
+ while (++count < BGP_WRITE_PACKET_MAX &&
+ (s = bgp_write_packet (peer)) != NULL);
if (bgp_write_proceed (peer))
BGP_WRITE_ON (peer->t_write, bgp_write, peer->fd);
+ else
+ sockopt_cork (peer->fd, 0);
return 0;
}
@@ -705,7 +695,7 @@ bgp_write (struct thread *thread)
static int
bgp_write_notify (struct peer *peer)
{
- int ret;
+ int ret, val;
u_char type;
struct stream *s;
@@ -715,7 +705,10 @@ bgp_write_notify (struct peer *peer)
return 0;
assert (stream_get_endp (s) >= BGP_HEADER_SIZE);
- /* I'm not sure fd is writable. */
+ /* Put socket in blocking mode. */
+ val = fcntl (peer->fd, F_GETFL, 0);
+ fcntl (peer->fd, F_SETFL, val & ~O_NONBLOCK);
+
ret = writen (peer->fd, STREAM_DATA (s), stream_get_endp (s));
if (ret <= 0)
{
@@ -2237,12 +2230,13 @@ bgp_read_packet (struct peer *peer)
return 0;
/* Read packet from fd. */
- nbytes = stream_read_unblock (peer->ibuf, peer->fd, readsize);
+ nbytes = stream_read_try (peer->ibuf, peer->fd, readsize);
/* If read byte is smaller than zero then error occured. */
if (nbytes < 0)
{
- if (errno == EAGAIN)
+ /* Transient error should retry */
+ if (nbytes == -2)
return -1;
plog_err (peer->log, "%s [Error] bgp_read_packet error: %s",
@@ -2319,22 +2313,14 @@ bgp_read (struct thread *thread)
peer = THREAD_ARG (thread);
peer->t_read = NULL;
- /* For non-blocking IO check. */
- if (peer->status == Connect)
- {
- bgp_connect_check (peer);
- goto done;
- }
- else
+ if (peer->fd < 0)
{
- if (peer->fd < 0)
- {
- zlog_err ("bgp_read peer's fd is negative value %d", peer->fd);
- return -1;
- }
- BGP_READ_ON (peer->t_read, bgp_read, peer->fd);
+ zlog_err ("bgp_read peer's fd is negative value %d", peer->fd);
+ return -1;
}
+ BGP_READ_ON (peer->t_read, bgp_read, peer->fd);
+
/* Read packet header to determine type of the packet */
if (peer->packet_size == 0)
peer->packet_size = BGP_HEADER_SIZE;
diff --git a/bgpd/bgpd.c b/bgpd/bgpd.c
index 882fe37c..a03025f6 100644
--- a/bgpd/bgpd.c
+++ b/bgpd/bgpd.c
@@ -2257,6 +2257,9 @@ peer_change_action (struct peer *peer, afi_t afi, safi_t safi,
if (CHECK_FLAG (peer->sflags, PEER_STATUS_GROUP))
return;
+ if (peer->status != Established)
+ return;
+
if (type == peer_change_reset)
bgp_notify_send (peer, BGP_NOTIFY_CEASE,
BGP_NOTIFY_CEASE_CONFIG_CHANGE);
diff --git a/lib/sockunion.c b/lib/sockunion.c
index d1f6d80b..5de3bcfc 100644
--- a/lib/sockunion.c
+++ b/lib/sockunion.c
@@ -551,6 +551,16 @@ sockopt_v6only (int family, int sock)
return 0;
}
+int
+sockopt_cork (int sock, int onoff)
+{
+#ifdef TCP_CORK
+ return setsockopt (sock, IPPROTO_TCP, TCP_CORK, &onoff, sizeof(onoff));
+#else
+ return 0;
+#endif
+}
+
/* If same family and same prefix return 1. */
int
sockunion_same (union sockunion *su1, union sockunion *su2)
diff --git a/lib/sockunion.h b/lib/sockunion.h
index db145cf2..2e8ce2d0 100644
--- a/lib/sockunion.h
+++ b/lib/sockunion.h
@@ -103,6 +103,7 @@ extern int sockopt_v6only (int family, int sock);
extern int sockunion_bind (int sock, union sockunion *,
unsigned short, union sockunion *);
extern int sockopt_ttl (int family, int sock, int ttl);
+extern int sockopt_cork (int sock, int onoff);
extern int sockunion_socket (union sockunion *su);
extern const char *inet_sutop (union sockunion *su, char *str);
extern enum connect_result sockunion_connect (int fd, union sockunion *su,
diff --git a/lib/stream.c b/lib/stream.c
index 983330ff..bff74d7b 100644
--- a/lib/stream.c
+++ b/lib/stream.c
@@ -711,32 +711,6 @@ stream_read (struct stream *s, int fd, size_t size)
return nbytes;
}
-/* Read size from fd. */
-int
-stream_read_unblock (struct stream *s, int fd, size_t size)
-{
- int nbytes;
- int val;
-
- STREAM_VERIFY_SANE(s);
-
- if (STREAM_WRITEABLE (s) < size)
- {
- STREAM_BOUND_WARN (s, "put");
- return 0;
- }
-
- val = fcntl (fd, F_GETFL, 0);
- fcntl (fd, F_SETFL, val|O_NONBLOCK);
- nbytes = read (fd, s->data + s->endp, size);
- fcntl (fd, F_SETFL, val);
-
- if (nbytes > 0)
- s->endp += nbytes;
-
- return nbytes;
-}
-
ssize_t
stream_read_try(struct stream *s, int fd, size_t size)
{
diff --git a/lib/stream.h b/lib/stream.h
index 3e4ba7b4..486a3f93 100644
--- a/lib/stream.h
+++ b/lib/stream.h
@@ -181,10 +181,6 @@ extern u_int32_t stream_get_ipv4 (struct stream *);
Use stream_read_try instead. */
extern int stream_read (struct stream *, int, size_t);
-/* Deprecated: all file descriptors should already be non-blocking.
- Will be removed. Use stream_read_try instead. */
-extern int stream_read_unblock (struct stream *, int, size_t);
-
/* Read up to size bytes into the stream.
Return code:
>0: number of bytes read
diff --git a/lib/thread.c b/lib/thread.c
index 70f8defa..1e714417 100644
--- a/lib/thread.c
+++ b/lib/thread.c
@@ -958,7 +958,6 @@ thread_fetch (struct thread_master *m, struct thread *fetch)
struct thread *thread;
fd_set readfd;
fd_set writefd;
- fd_set exceptfd;
struct timeval timer_val;
struct timeval timer_val_bg;
struct timeval *timer_wait;
@@ -984,7 +983,6 @@ thread_fetch (struct thread_master *m, struct thread *fetch)
/* Structure copy. */
readfd = m->readfd;
writefd = m->writefd;
- exceptfd = m->exceptfd;
/* Calculate select wait timer if nothing else to do */
quagga_get_relative (NULL);
@@ -995,7 +993,7 @@ thread_fetch (struct thread_master *m, struct thread *fetch)
(!timer_wait || (timeval_cmp (*timer_wait, *timer_wait_bg) > 0)))
timer_wait = timer_wait_bg;
- num = select (FD_SETSIZE, &readfd, &writefd, &exceptfd, timer_wait);
+ num = select (FD_SETSIZE, &readfd, &writefd, NULL, timer_wait);
/* Signals should get quick treatment */
if (num < 0)
diff --git a/lib/thread.h b/lib/thread.h
index e5b1cb35..a6aed173 100644
--- a/lib/thread.h
+++ b/lib/thread.h
@@ -53,7 +53,6 @@ struct thread_master
struct thread_list background;
fd_set readfd;
fd_set writefd;
- fd_set exceptfd;
unsigned long alloc;
};