aboutsummaryrefslogtreecommitdiffstats
path: root/main/gdnsd/pidrace-fix.patch
blob: 2c9e62dcfed860f924de7afda2425315632f0ace (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
diff --git a/gdnsd/libgdnsd/libdmn/dmn.h b/gdnsd/libgdnsd/libdmn/dmn.h
index ff4d6a5..6ddfeef 100644
--- a/gdnsd/libgdnsd/libdmn/dmn.h
+++ b/gdnsd/libgdnsd/libdmn/dmn.h
@@ -25,6 +25,7 @@
 #include <errno.h>
 #include <stdarg.h>
 #include <syslog.h>
+#include <sys/types.h>
 
 // gcc function attributes
 #if defined __GNUC__ && __GNUC__ >= 3 // gcc 3.0+
@@ -53,15 +54,17 @@
 
 // Attempt to daemonize the current process using "pidfile"
 //  as the pidfile pathname.  logname is used as the daemon's
-//  official name for openlog() purposes.
+//  official name for openlog() purposes.  If "restart" is
+//  true, attempt unracy shutdown of any previous instance and
+//  take over.
 DMN_F_NONNULL
-void dmn_daemonize(const char* logname, const char* pidfile);
+void dmn_daemonize(const char* logname, const char* pidfile, const bool restart);
 
 // Check the status of a daemon using "pidfile".  Return value
 //  of zero means not running, otherwise the return value is
 //  the pid of the running daemon
 DMN_F_NONNULL
-int dmn_status(const char* pidfile);
+pid_t dmn_status(const char* pidfile);
 
 // Attempt to stop any running daemon using "pidfile".  This function
 //  will make several attempts (with an increasing delay) to terminate
@@ -69,7 +72,7 @@ int dmn_status(const char* pidfile);
 // retval == 0 means daemon was not running, or was successfully killed.
 // retval != 0 means daemon is still running (and the pid is the retval)
 DMN_F_NONNULL
-int dmn_stop(const char* pidfile);
+pid_t dmn_stop(const char* pidfile);
 
 // Send an aribtrary signal to a running daemon using "pidfile".
 DMN_F_NONNULL
diff --git a/gdnsd/libgdnsd/libdmn/dmn_daemon.c b/gdnsd/libgdnsd/libdmn/dmn_daemon.c
index 4d77844..bf74959 100644
--- a/gdnsd/libgdnsd/libdmn/dmn_daemon.c
+++ b/gdnsd/libgdnsd/libdmn/dmn_daemon.c
@@ -35,89 +35,103 @@
 
 #include "dmn.h"
 
-static bool dmn_daemonized = false;
+// this simply trusts fcntl pid info
+//  on the lock we hold for daemon lifetime
+//  to inform us of a running daemon's pid.
+// the string pid stored in the file is just
+//  for reference for humans or other tools.
 
+static bool dmn_daemonized = false;
 static const size_t pblen = 22;
 
-static int check_pidfile(const char* pidfile) {
+static pid_t check_pidfile(const char* pidfile) {
     dmn_assert(pidfile);
 
-    char pidbuf[pblen];
-
     const int pidfd = open(pidfile, O_RDONLY);
     if(pidfd == -1) {
         if (errno == ENOENT) return 0;
         else dmn_log_fatal("open() of pidfile '%s' failed: %s", pidfile, dmn_strerror(errno));
     }
 
-    const int readrv = read(pidfd, pidbuf, (size_t) 15);
-    if(readrv == -1) {
-        close(pidfd);
-        dmn_log_fatal("read() from pidfile '%s' failed: %s", pidfile, dmn_strerror(errno));
-    }
+    struct flock pidlock_info;
+    memset(&pidlock_info, 0, sizeof(struct flock));
+    pidlock_info.l_type = F_WRLCK;
+    pidlock_info.l_whence = SEEK_SET;
+
+    // should not fail unless something's horribly wrong
+    if(fcntl(pidfd, F_GETLK, &pidlock_info))
+        dmn_log_fatal("bug: fcntl(%s, F_GETLK) failed: %s", pidfile, dmn_strerror(errno));
 
     close(pidfd);
 
-    if(readrv == 0) {
-        dmn_log_info("empty pidfile '%s', wiping it out", pidfile);
-        unlink(pidfile);
+    if(pidlock_info.l_type == F_UNLCK) {
+        dmn_log_debug("Found stale pidfile at %s, ignoring", pidfile);
         return 0;
     }
-    pidbuf[readrv] = '\0';
 
-    errno = 0;
-    const int pidnum = strtol(pidbuf, NULL, 10);
-    if(errno) {
-        dmn_log_info("wiping out pidfile '%s': %s", pidfile, dmn_strerror(errno));
-        unlink(pidfile);
-        return 0;
-    }
+    return pidlock_info.l_pid;
+}
 
-    if(pidnum <= 0) {
-        dmn_log_info("invalid pid found in pidfile in '%s', wiping it out", pidfile);
-        unlink(pidfile);
-        return 0;
-    }
+static bool pidrace_inner(const pid_t pid, const int pidfd) {
+    bool rv = true; // cannot get lock
 
-    if(kill(pidnum, 0)) {
-        dmn_log_info("Found stale pidfile for pid %i in %s, wiping it out", pidnum, pidfile);
-        unlink(pidfile);
-        return 0;
+    char pidbuf[pblen];
+    const ssize_t pidlen = snprintf(pidbuf, pblen, "%li\n", (long)pid);
+    if(pidlen < 2)
+        dmn_log_fatal("snprintf() for pidfile failed");
+
+    struct flock pidlock_set;
+    memset(&pidlock_set, 0, sizeof(struct flock));
+    pidlock_set.l_type = F_WRLCK;
+    pidlock_set.l_whence = SEEK_SET;
+    if(fcntl(pidfd, F_SETLK, &pidlock_set)) {
+        if(errno != EAGAIN && errno != EACCES)
+            dmn_log_fatal("bug? fcntl(pidfile, F_SETLK) failed: %s", dmn_strerror(errno));
+    }
+    else {
+        rv = false; // got lock
+        if(ftruncate(pidfd, 0))
+            dmn_log_fatal("truncating pidfile failed: %s", dmn_strerror(errno));
+        if(write(pidfd, pidbuf, (size_t) pidlen) != pidlen)
+            dmn_log_fatal("writing to pidfile failed: %s", dmn_strerror(errno));
     }
 
-    return pidnum;
+    return rv;
 }
 
-static long make_pidfile(const char* pidfile) {
+static pid_t startup_pidrace(const char* pidfile, const bool restart) {
     dmn_assert(pidfile);
 
-    long pid = (long)getpid();
-    char pidbuf[pblen];
-
-    int pidfd = open(pidfile, O_WRONLY | O_CREAT | O_EXCL, 0666);
-    if(pidfd == -1) dmn_log_fatal("creation of new pidfile %s failed: %s", pidfile, dmn_strerror(errno));
-
-    const ssize_t pidlen = snprintf(pidbuf, pblen, "%li\n", pid);
-    if(pidlen < 2) {
-        close(pidfd);
-        unlink(pidfile);
-        dmn_log_fatal("snprintf() for pidfile failed");
-    }
-    if(write(pidfd, pidbuf, (size_t) pidlen) != pidlen) {
-        close(pidfd);
-        unlink(pidfile);
-        dmn_log_fatal("writing to new pidfile %s failed: %s", pidfile, dmn_strerror(errno));
+    pid_t pid = getpid();
+
+    int pidfd = open(pidfile, O_WRONLY | O_CREAT, 0666);
+    if(pidfd == -1) dmn_log_fatal("open(%s, O_WRONLY|O_CREAT) failed: %s", pidfile, dmn_strerror(errno));
+
+    if(restart) {
+        struct timeval tv;
+        unsigned tries = 1;
+        unsigned maxtries = 10;
+        while(tries++ <= maxtries) {
+            const pid_t old_pid = check_pidfile(pidfile);
+            if(old_pid && !kill(old_pid, SIGTERM)) {
+                tv.tv_sec = 0;
+                tv.tv_usec = 100000 * tries;
+                select(0, NULL, NULL, NULL, &tv);
+            }
+            if(!pidrace_inner(pid, pidfd))
+                return pid;
+        }
+        dmn_log_fatal("Could not restart: failed to shut down previous instance and acquire pidfile lock");
     }
-    if(close(pidfd) == -1) {
-        unlink(pidfile);
-        dmn_log_fatal("closing new pidfile %s failed: %s", pidfile, dmn_strerror(errno));
+    else if(pidrace_inner(pid, pidfd)) {
+        dmn_log_fatal("Could not start: lost startup race to another daemon");
     }
 
     return pid;
 }
 
-static int fork_and_exit(void) {
-    const int mypid = fork();
+static pid_t fork_and_exit(void) {
+    const pid_t mypid = fork();
     if (mypid == -1)     // parent: failure
         return 0;
     else if (mypid != 0) // parent: success
@@ -126,12 +140,12 @@ static int fork_and_exit(void) {
         return 1;
 }
 
-void dmn_daemonize(const char* logname, const char* pidfile) {
+void dmn_daemonize(const char* logname, const char* pidfile, const bool restart) {
     dmn_assert(pidfile);
 
-    const int oldpid = check_pidfile(pidfile);
-    if(oldpid)
-        dmn_log_fatal("I am already running at pid %i in %s, failing", oldpid, pidfile);
+    const pid_t oldpid = check_pidfile(pidfile);
+    if(oldpid && !restart)
+        dmn_log_fatal("I am already running at pid %li in %s, failing", (long)oldpid, pidfile);
 
     if(!fork_and_exit()) dmn_log_fatal("fork() failed: %s", dmn_strerror(errno));
 
@@ -156,41 +170,40 @@ void dmn_daemonize(const char* logname, const char* pidfile) {
 
     umask(022);
 
-    long pid = make_pidfile(pidfile);
+    const pid_t pid = startup_pidrace(pidfile, restart);
 
     if(!freopen("/dev/null", "r", stdin))
         dmn_log_fatal("Cannot open /dev/null: %s", dmn_strerror(errno));
     if(!freopen("/dev/null", "w", stdout))
         dmn_log_fatal("Cannot open /dev/null: %s", dmn_strerror(errno));
 
-    dmn_log_info("Daemonizing at pid %li ...", pid);
+    dmn_log_info("Daemonizing at pid %li ...", (long)pid);
 
     if(!freopen("/dev/null", "r+", stderr))
         dmn_log_fatal("Cannot open /dev/null: %s", dmn_strerror(errno));
     openlog(logname, LOG_NDELAY|LOG_PID, LOG_DAEMON);
     dmn_daemonized = true;
-    dmn_log_info("Daemonized succesfully, pid is %li", pid);
+    dmn_log_info("Daemonized succesfully, pid is %li", (long)pid);
 }
 
-int dmn_status(const char* pidfile) { dmn_assert(pidfile); return check_pidfile(pidfile); }
+pid_t dmn_status(const char* pidfile) { dmn_assert(pidfile); return check_pidfile(pidfile); }
 
-int dmn_stop(const char* pidfile) {
+pid_t dmn_stop(const char* pidfile) {
     dmn_assert(pidfile);
 
-    const int pid = check_pidfile(pidfile);
+    const pid_t pid = check_pidfile(pidfile);
     if(!pid) {
         dmn_log_info("Did not find a running daemon to stop!");
         return 0;
     }
 
-    struct timeval tv;
-
     // This will basically do a kill/sleep
     //  loop for a total of 10 attempts over
     //  the course of 5.5 seconds before giving
     //  up, with the sleep delay increasing from
     //  100ms at the start up to 1s at the end.
 
+    struct timeval tv;
     unsigned tries = 1;
     unsigned maxtries = 10;
     while(tries++ <= maxtries && !kill(pid, SIGTERM)) {
@@ -200,23 +213,21 @@ int dmn_stop(const char* pidfile) {
     }
 
     if(!kill(pid, 0)) {
-        dmn_log_err("Cannot stop daemon at pid %i", pid);
+        dmn_log_err("Cannot stop daemon at pid %li", (long)pid);
         return pid;
     }
 
-    unlink(pidfile);
-
     return 0;
 }
 
 void dmn_signal(const char* pidfile, int sig) {
     dmn_assert(pidfile);
 
-    const int pid = check_pidfile(pidfile);
+    const pid_t pid = check_pidfile(pidfile);
     if(!pid)
         dmn_log_err("Did not find a running daemon to signal!");
     else if(kill(pid, sig))
-        dmn_log_err("Cannot signal daemon at pid %i", pid);
+        dmn_log_err("Cannot signal daemon at pid %li", (long)pid);
 }
 
 bool dmn_is_daemonized(void) { return dmn_daemonized; }
diff --git a/gdnsd/main.c b/gdnsd/main.c
index 50a0aa8..677edef 100644
--- a/gdnsd/main.c
+++ b/gdnsd/main.c
@@ -211,7 +211,7 @@ typedef enum {
     ACT_START,
     ACT_STOP,
     ACT_RESTART,
-    ACT_CRESTART,
+    ACT_CRESTART, // downgrades to ACT_RESTART after checking...
     ACT_STATUS,
     ACT_LPE_ON,
     ACT_LPE_OFF,
@@ -287,7 +287,7 @@ int main(int argc, char** argv) {
     free(conf_arg);
 
     // Check if we're already running...
-    const int oldpid = dmn_status(gconfig.pidfile);
+    const pid_t oldpid = dmn_status(gconfig.pidfile);
 
     // Take action
     if(action == ACT_STATUS) {
@@ -295,7 +295,7 @@ int main(int argc, char** argv) {
             log_info("status: not running");
             exit(3);
         }
-        log_info("status: running at pid %i", oldpid);
+        log_info("status: running at pid %li", (long)oldpid);
         exit(0);
     }
 
@@ -334,12 +334,11 @@ int main(int argc, char** argv) {
         exit(0);
     }
 
-    if(action == ACT_RESTART) {
-        log_info("Attempting to stop the running daemon instance for restart...");
-        if(dmn_stop(gconfig.pidfile))
-            log_fatal("...Running daemon failed to stop, cannot continue with restart...");
-        log_info("...Previous daemon successfully shut down (or was not up), this instance coming online");
-    }
+    // from here out, all actions are attempting startup...
+    dmn_assert(action == ACT_STARTFG
+            || action == ACT_START
+            || action == ACT_RESTART
+    );
 
     const bool started_as_root = !geteuid();
 
@@ -413,7 +412,7 @@ int main(int argc, char** argv) {
         // so that the daemonization fork+exit pairs don't
         //   execute the plugins' exit handlers
         skip_plugins_cleanup = true;
-        dmn_daemonize(PACKAGE_NAME, gconfig.pidfile);
+        dmn_daemonize(PACKAGE_NAME, gconfig.pidfile, (action == ACT_RESTART));
         skip_plugins_cleanup = false;
     }