diff --git a/src/journal/journald-server.c b/src/journal/journald-server.c index daeecd5191196..a810829b24f7f 100644 --- a/src/journal/journald-server.c +++ b/src/journal/journald-server.c @@ -67,6 +67,8 @@ #define RECHECK_AVAILABLE_SPACE_USEC (30*USEC_PER_SEC) +#define NOTIFY_SNDBUF_SIZE (8*1024*1024) + /* Pick a good default that is likely to fit into AF_UNIX and AF_INET SOCK_DGRAM datagrams, and even leaves some room + * for a bit of additional metadata. */ #define DEFAULT_LINE_MAX (48*1024) @@ -1556,6 +1558,126 @@ static int server_open_hostname(Server *s) { return 0; } +static int dispatch_notify_event(sd_event_source *es, int fd, uint32_t revents, void *userdata) { + Server *s = userdata; + int r; + + assert(s); + assert(s->notify_event_source == es); + assert(s->notify_fd == fd); + + if (revents != EPOLLOUT) { + log_error("Invalid events on notify file descriptor."); + return -EINVAL; + } + + /* The $NOTIFY_SOCKET is writable again, now send exactly one + * message on it. Either it's the initial READY=1 event or an + * stdout stream event. If there's nothing to write anymore, + * turn our event source off. The next time there's something + * to send it will be turned on again. */ + + if (!s->sent_notify_ready) { + static const char p[] = + "READY=1\n" + "STATUS=Processing requests..."; + ssize_t l; + + l = send(s->notify_fd, p, strlen(p), MSG_DONTWAIT); + if (l < 0) { + if (errno == EAGAIN) + return 0; + + return log_error_errno(errno, "Failed to send READY=1 notification message: %m"); + } + + s->sent_notify_ready = true; + log_debug("Sent READY=1 notification."); + + } else if (s->stdout_streams_notify_queue) + /* Dispatch one stream notification event */ + stdout_stream_send_notify(s->stdout_streams_notify_queue); + + /* Leave us enabled if there's still more to to do. */ + if (s->stdout_streams_notify_queue) + return 0; + + /* There was nothing to do anymore, let's turn ourselves off. */ + r = sd_event_source_set_enabled(es, SD_EVENT_OFF); + if (r < 0) + return log_error_errno(r, "Failed to turn off notify event source: %m"); + + return 0; +} + +static int server_connect_notify(Server *s) { + union sockaddr_union sa = { + .un.sun_family = AF_UNIX, + }; + const char *e; + int r; + + assert(s); + assert(s->notify_fd < 0); + assert(!s->notify_event_source); + + /* + So here's the problem: we'd like to send notification + messages to PID 1, but we cannot do that via sd_notify(), + since that's synchronous, and we might end up blocking on + it. Specifically: given that PID 1 might block on + dbus-daemon during IPC, and dbus-daemon is logging to us, + and might hence block on us, we might end up in a deadlock + if we block on sending PID 1 notification messages -- by + generating a full blocking circle. To avoid this, let's + create a non-blocking socket, and connect it to the + notification socket, and then wait for POLLOUT before we + send anything. This should efficiently avoid any deadlocks, + as we'll never block on PID 1, hence PID 1 can safely block + on dbus-daemon which can safely block on us again. + + Don't think that this issue is real? It is, see: + https://github.com/systemd/systemd/issues/1505 + */ + + e = getenv("NOTIFY_SOCKET"); + if (!e) + return 0; + + if ((e[0] != '@' && e[0] != '/') || e[1] == 0) { + log_error("NOTIFY_SOCKET set to an invalid value: %s", e); + return -EINVAL; + } + + if (strlen(e) > sizeof(sa.un.sun_path)) { + log_error("NOTIFY_SOCKET path too long: %s", e); + return -EINVAL; + } + + s->notify_fd = socket(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0); + if (s->notify_fd < 0) + return log_error_errno(errno, "Failed to create notify socket: %m"); + + (void) fd_inc_sndbuf(s->notify_fd, NOTIFY_SNDBUF_SIZE); + + strncpy(sa.un.sun_path, e, sizeof(sa.un.sun_path)); + if (sa.un.sun_path[0] == '@') + sa.un.sun_path[0] = 0; + + r = connect(s->notify_fd, &sa.sa, offsetof(struct sockaddr_un, sun_path) + strlen(e)); + if (r < 0) + return log_error_errno(errno, "Failed to connect to notify socket: %m"); + + r = sd_event_add_io(s->event, &s->notify_event_source, s->notify_fd, EPOLLOUT, dispatch_notify_event, s); + if (r < 0) + return log_error_errno(r, "Failed to watch notification socket: %m"); + + /* This should fire pretty soon, which we'll use to send the + * READY=1 event. */ + + return 0; +} + int server_init(Server *s) { _cleanup_fdset_free_ FDSet *fds = NULL; int n, r, fd; @@ -1563,7 +1685,7 @@ int server_init(Server *s) { assert(s); zero(*s); - s->syslog_fd = s->native_fd = s->stdout_fd = s->dev_kmsg_fd = s->audit_fd = s->hostname_fd = -1; + s->syslog_fd = s->native_fd = s->stdout_fd = s->dev_kmsg_fd = s->audit_fd = s->hostname_fd = s->notify_fd = -1; s->compress = true; s->seal = true; @@ -1611,8 +1733,6 @@ int server_init(Server *s) { if (r < 0) return log_error_errno(r, "Failed to create event loop: %m"); - sd_event_set_watchdog(s->event, true); - n = sd_listen_fds(true); if (n < 0) return log_error_errno(n, "Failed to read listening file descriptors from environment: %m"); @@ -1718,6 +1838,8 @@ int server_init(Server *s) { server_cache_boot_id(s); server_cache_machine_id(s); + (void) server_connect_notify(s); + r = system_journal_open(s, false); if (r < 0) return r; @@ -1770,6 +1892,7 @@ void server_done(Server *s) { sd_event_source_unref(s->sigterm_event_source); sd_event_source_unref(s->sigint_event_source); sd_event_source_unref(s->hostname_event_source); + sd_event_source_unref(s->notify_event_source); sd_event_unref(s->event); safe_close(s->syslog_fd); @@ -1778,6 +1901,7 @@ void server_done(Server *s) { safe_close(s->dev_kmsg_fd); safe_close(s->audit_fd); safe_close(s->hostname_fd); + safe_close(s->notify_fd); if (s->rate_limit) journal_rate_limit_free(s->rate_limit); diff --git a/src/journal/journald-server.h b/src/journal/journald-server.h index b294107788033..e59ff35e2280e 100644 --- a/src/journal/journald-server.h +++ b/src/journal/journald-server.h @@ -27,12 +27,15 @@ #include #include +typedef struct Server Server; + #include "sd-event.h" #include "journal-file.h" #include "hashmap.h" #include "util.h" #include "audit.h" #include "journald-rate-limit.h" +#include "journald-stream.h" #include "list.h" typedef enum Storage { @@ -52,15 +55,14 @@ typedef enum SplitMode { _SPLIT_INVALID = -1 } SplitMode; -typedef struct StdoutStream StdoutStream; - -typedef struct Server { +struct Server { int syslog_fd; int native_fd; int stdout_fd; int dev_kmsg_fd; int audit_fd; int hostname_fd; + int notify_fd; sd_event *event; @@ -75,6 +77,7 @@ typedef struct Server { sd_event_source *sigterm_event_source; sd_event_source *sigint_event_source; sd_event_source *hostname_event_source; + sd_event_source *notify_event_source; JournalFile *runtime_journal; JournalFile *system_journal; @@ -114,6 +117,7 @@ typedef struct Server { usec_t oldest_file_usec; LIST_HEAD(StdoutStream, stdout_streams); + LIST_HEAD(StdoutStream, stdout_streams_notify_queue); unsigned n_stdout_streams; char *tty_path; @@ -135,6 +139,7 @@ typedef struct Server { struct udev *udev; + bool sent_notify_ready; bool sync_scheduled; char machine_id_field[sizeof("_MACHINE_ID=") + 32]; @@ -145,7 +150,7 @@ typedef struct Server { char *cgroup_root; size_t line_max; -} Server; +}; #define N_IOVEC_META_FIELDS 20 #define N_IOVEC_KERNEL_FIELDS 64 diff --git a/src/journal/journald-stream.c b/src/journal/journald-stream.c index 4d6b7ad189de7..9118d1a3189d9 100644 --- a/src/journal/journald-stream.c +++ b/src/journal/journald-stream.c @@ -80,6 +80,7 @@ struct StdoutStream { bool forward_to_console:1; bool fdstore:1; + bool in_notify_queue:1; char *buffer; size_t length; @@ -90,6 +91,7 @@ struct StdoutStream { char *state_file; LIST_FIELDS(StdoutStream, stdout_stream); + LIST_FIELDS(StdoutStream, stdout_stream_notify_queue); char id_field[sizeof("_STREAM_ID=")-1 + SD_ID128_STRING_MAX]; }; @@ -102,6 +104,9 @@ void stdout_stream_free(StdoutStream *s) { assert(s->server->n_stdout_streams > 0); s->server->n_stdout_streams --; LIST_REMOVE(stdout_stream, s->server->stdout_streams, s); + + if (s->in_notify_queue) + LIST_REMOVE(stdout_stream_notify_queue, s->server->stdout_streams_notify_queue, s); } if (s->event_source) { @@ -126,7 +131,7 @@ static void stdout_stream_destroy(StdoutStream *s) { return; if (s->state_file) - unlink(s->state_file); + (void) unlink(s->state_file); stdout_stream_free(s); } @@ -210,11 +215,15 @@ static int stdout_stream_save(StdoutStream *s) { free(temp_path); temp_path = NULL; - /* Store the connection fd in PID 1, so that we get it passed - * in again on next start */ - if (!s->fdstore) { - sd_pid_notify_with_fds(0, false, "FDSTORE=1", &s->fd, 1); - s->fdstore = true; + if (!s->fdstore && !s->in_notify_queue) { + LIST_PREPEND(stdout_stream_notify_queue, s->server->stdout_streams_notify_queue, s); + s->in_notify_queue = true; + + if (s->server->notify_event_source) { + r = sd_event_source_set_enabled(s->server->notify_event_source, SD_EVENT_ON); + if (r < 0) + log_warning_errno(r, "Failed to enable notify event source: %m"); + } } finish: @@ -801,3 +810,50 @@ int server_open_stdout_socket(Server *s, FDSet *fds) { return 0; } + +void stdout_stream_send_notify(StdoutStream *s) { + struct iovec iovec = { + .iov_base = (char*) "FDSTORE=1", + .iov_len = strlen("FDSTORE=1"), + }; + struct msghdr msghdr = { + .msg_iov = &iovec, + .msg_iovlen = 1, + }; + struct cmsghdr *cmsg; + ssize_t l; + + assert(s); + assert(!s->fdstore); + assert(s->in_notify_queue); + assert(s->server); + assert(s->server->notify_fd >= 0); + + /* Store the connection fd in PID 1, so that we get it passed + * in again on next start */ + + msghdr.msg_controllen = CMSG_SPACE(sizeof(int)); + msghdr.msg_control = alloca0(msghdr.msg_controllen); + + cmsg = CMSG_FIRSTHDR(&msghdr); + cmsg->cmsg_level = SOL_SOCKET; + cmsg->cmsg_type = SCM_RIGHTS; + cmsg->cmsg_len = CMSG_LEN(sizeof(int)); + + memcpy(CMSG_DATA(cmsg), &s->fd, sizeof(int)); + + l = sendmsg(s->server->notify_fd, &msghdr, MSG_DONTWAIT|MSG_NOSIGNAL); + if (l < 0) { + if (errno == EAGAIN) + return; + + log_error_errno(errno, "Failed to send stream file descriptor to service manager: %m"); + } else { + log_debug("Successfully sent stream file descriptor to service manager."); + s->fdstore = 1; + } + + LIST_REMOVE(stdout_stream_notify_queue, s->server->stdout_streams_notify_queue, s); + s->in_notify_queue = false; + +} diff --git a/src/journal/journald-stream.h b/src/journal/journald-stream.h index 94bf955d782e3..fd8c94fb65197 100644 --- a/src/journal/journald-stream.h +++ b/src/journal/journald-stream.h @@ -21,9 +21,12 @@ along with systemd; If not, see . ***/ +typedef struct StdoutStream StdoutStream; + #include "fdset.h" #include "journald-server.h" int server_open_stdout_socket(Server *s, FDSet *fds); void stdout_stream_free(StdoutStream *s); +void stdout_stream_send_notify(StdoutStream *s); diff --git a/src/journal/journald.c b/src/journal/journald.c index 15bbcbe3def52..b7ba2b6ec54bd 100644 --- a/src/journal/journald.c +++ b/src/journal/journald.c @@ -64,10 +64,6 @@ int main(int argc, char *argv[]) { log_debug("systemd-journald running as pid "PID_FMT, getpid()); server_driver_message(&server, SD_MESSAGE_JOURNAL_START, "Journal started"); - sd_notify(false, - "READY=1\n" - "STATUS=Processing requests..."); - for (;;) { usec_t t = USEC_INFINITY, n; @@ -120,10 +116,6 @@ int main(int argc, char *argv[]) { server_driver_message(&server, SD_MESSAGE_JOURNAL_STOP, "Journal stopped"); finish: - sd_notify(false, - "STOPPING=1\n" - "STATUS=Shutting down..."); - server_done(&server); return r < 0 ? EXIT_FAILURE : EXIT_SUCCESS; diff --git a/units/systemd-journald.service.in b/units/systemd-journald.service.in index 0d1ea61fe8a8d..c94c0bfba19b1 100644 --- a/units/systemd-journald.service.in +++ b/units/systemd-journald.service.in @@ -22,7 +22,6 @@ RestartSec=0 StandardOutput=null FileDescriptorStoreMax=4224 CapabilityBoundingSet=CAP_SYS_ADMIN CAP_DAC_OVERRIDE CAP_SYS_PTRACE CAP_SYSLOG CAP_AUDIT_CONTROL CAP_AUDIT_READ CAP_CHOWN CAP_DAC_READ_SEARCH CAP_FOWNER CAP_SETUID CAP_SETGID CAP_MAC_OVERRIDE -WatchdogSec=3min # Increase the default a bit in order to allow many simultaneous # services being run since we keep one fd open per service. Also, when