From 892f2ded6f94dfc0527e27357a1b48b8811429a3 Mon Sep 17 00:00:00 2001 From: Giuseppe Scrivano Date: Fri, 3 Feb 2017 14:17:34 +0100 Subject: [PATCH] fix systemd-notify when using a different PID namespace The current support of systemd-notify has a race condition as the message send to the systemd notify socket might be dropped if the sender process is not running by the time systemd checks for the sender of the datagram. A proper fix of this in systemd would require changes to the kernel to maintain the cgroup of the sender process when it is dead (but it is not probably going to happen...) Generally, the solution to this issue is to specify the PID in the message itself so that systemd has not to guess the sender, but this wouldn't work when running in a PID namespace as the container will pass the PID known in its namespace (something like PID=1,2,3..) and systemd running on the host is not able to map it to the runc service. The proposed solution is to have a proxy in runc that forwards the messages to the host systemd. Example of this issue: https://github.com/projectatomic/atomic-system-containers/pull/24 Signed-off-by: Giuseppe Scrivano --- notify_socket.go | 81 ++++++++++++++++++++++++++++++++++++++++++++++++ restore.go | 9 +++++- signals.go | 18 +++++++++-- utils.go | 4 --- utils_linux.go | 26 +++++++++++----- 5 files changed, 122 insertions(+), 16 deletions(-) create mode 100644 notify_socket.go diff --git a/notify_socket.go b/notify_socket.go new file mode 100644 index 00000000000..5e174e0badd --- /dev/null +++ b/notify_socket.go @@ -0,0 +1,81 @@ +// +build linux + +package main + +import ( + "fmt" + "net" + "path/filepath" + + "github.com/Sirupsen/logrus" + "github.com/opencontainers/runtime-spec/specs-go" + "github.com/urfave/cli" +) + +type notifySocket struct { + socket *net.UnixConn + host string + socketPath string +} + +func newNotifySocket(context *cli.Context, notifySocketHost string, id string) *notifySocket { + if notifySocketHost == "" { + return nil + } + + root := filepath.Join(context.GlobalString("root"), id) + path := filepath.Join(root, "notify.sock") + + notifySocket := ¬ifySocket{ + socket: nil, + host: notifySocketHost, + socketPath: path, + } + + return notifySocket +} + +func (ns *notifySocket) Close() error { + return ns.socket.Close() +} + +// If systemd is supporting sd_notify protocol, this function will add support +// for sd_notify protocol from within the container. +func (s *notifySocket) setupSpec(context *cli.Context, spec *specs.Spec) { + mount := specs.Mount{Destination: s.host, Type: "bind", Source: s.socketPath, Options: []string{"bind"}} + spec.Mounts = append(spec.Mounts, mount) + spec.Process.Env = append(spec.Process.Env, fmt.Sprintf("NOTIFY_SOCKET=%s", s.host)) +} + +func (s *notifySocket) setupSocket() error { + addr := net.UnixAddr{ + Name: s.socketPath, + Net: "unixgram", + } + + socket, err := net.ListenUnixgram("unixgram", &addr) + if err != nil { + return err + } + + s.socket = socket + return nil +} + +func (notifySocket *notifySocket) run() { + buf := make([]byte, 512) + notifySocketHostAddr := net.UnixAddr{Name: notifySocket.host, Net: "unixgram"} + client, err := net.DialUnix("unixgram", nil, ¬ifySocketHostAddr) + if err != nil { + logrus.Error(err) + return + } + for { + r, err := notifySocket.socket.Read(buf) + if err != nil { + break + } + + client.Write(buf[0:r]) + } +} diff --git a/restore.go b/restore.go index 91b1efe56c5..a238205a00d 100644 --- a/restore.go +++ b/restore.go @@ -165,7 +165,14 @@ func restoreContainer(context *cli.Context, spec *specs.Spec, config *configs.Co if err != nil { return -1, err } - handler := newSignalHandler(!context.Bool("no-subreaper")) + + notifySocket := newNotifySocket(context, os.Getenv("NOTIFY_SOCKET"), id) + if notifySocket != nil { + notifySocket.setupSpec(context, spec) + notifySocket.setupSocket() + } + + handler := newSignalHandler(!context.Bool("no-subreaper"), notifySocket) if err := container.Restore(process, options); err != nil { return -1, err } diff --git a/signals.go b/signals.go index f3dcf1790fc..0171da375a6 100644 --- a/signals.go +++ b/signals.go @@ -17,7 +17,9 @@ const signalBufferSize = 2048 // newSignalHandler returns a signal handler for processing SIGCHLD and SIGWINCH signals // while still forwarding all other signals to the process. -func newSignalHandler(enableSubreaper bool) *signalHandler { +// If notifySocket is present, use it to read systemd notifications from the container and +// forward them to notifySocketHost. +func newSignalHandler(enableSubreaper bool, notifySocket *notifySocket) *signalHandler { if enableSubreaper { // set us as the subreaper before registering the signal handler for the container if err := system.SetSubreaper(1); err != nil { @@ -30,7 +32,8 @@ func newSignalHandler(enableSubreaper bool) *signalHandler { // handle all signals for the process. signal.Notify(s) return &signalHandler{ - signals: s, + signals: s, + notifySocket: notifySocket, } } @@ -42,7 +45,8 @@ type exit struct { } type signalHandler struct { - signals chan os.Signal + signals chan os.Signal + notifySocket *notifySocket } // forward handles the main signal event loop forwarding, resizing, or reaping depending @@ -54,6 +58,11 @@ func (h *signalHandler) forward(process *libcontainer.Process, tty *tty) (int, e if err != nil { return -1, err } + + if h.notifySocket != nil { + go h.notifySocket.run() + } + // perform the initial tty resize. tty.resize() for s := range h.signals { @@ -75,6 +84,9 @@ func (h *signalHandler) forward(process *libcontainer.Process, tty *tty) (int, e // status because we must ensure that any of the go specific process // fun such as flushing pipes are complete before we return. process.Wait() + if h.notifySocket != nil { + h.notifySocket.Close() + } return e.status, nil } } diff --git a/utils.go b/utils.go index ce610440d43..cb199555a1f 100644 --- a/utils.go +++ b/utils.go @@ -58,10 +58,6 @@ func setupSpec(context *cli.Context) (*specs.Spec, error) { if err != nil { return nil, err } - notifySocket := os.Getenv("NOTIFY_SOCKET") - if notifySocket != "" { - setupSdNotify(spec, notifySocket) - } if os.Geteuid() != 0 { return nil, fmt.Errorf("runc should be run as root") } diff --git a/utils_linux.go b/utils_linux.go index 867c2d7adbb..b8c105a9e4a 100644 --- a/utils_linux.go +++ b/utils_linux.go @@ -95,13 +95,6 @@ func newProcess(p specs.Process) (*libcontainer.Process, error) { return lp, nil } -// If systemd is supporting sd_notify protocol, this function will add support -// for sd_notify protocol from within the container. -func setupSdNotify(spec *specs.Spec, notifySocket string) { - spec.Mounts = append(spec.Mounts, specs.Mount{Destination: notifySocket, Type: "bind", Source: notifySocket, Options: []string{"bind"}}) - spec.Process.Env = append(spec.Process.Env, fmt.Sprintf("NOTIFY_SOCKET=%s", notifySocket)) -} - func destroy(container libcontainer.Container) { if err := container.Destroy(); err != nil { logrus.Error(err) @@ -184,6 +177,7 @@ type runner struct { consoleSocket string container libcontainer.Container create bool + notifySocket *notifySocket } func (r *runner) terminalinfo() *libcontainer.TerminalInfo { @@ -225,6 +219,10 @@ func (r *runner) run(config *specs.Process) (int, error) { r.destroy() return -1, fmt.Errorf("cannot use console socket if runc will not detach or allocate tty") } + if detach && r.notifySocket != nil { + r.destroy() + return -1, fmt.Errorf("cannot detach when using NOTIFY_SOCKET") + } startFn := r.container.Start if !r.create { @@ -233,7 +231,7 @@ func (r *runner) run(config *specs.Process) (int, error) { // Setting up IO is a two stage process. We need to modify process to deal // with detaching containers, and then we get a tty after the container has // started. - handler := newSignalHandler(r.enableSubreaper) + handler := newSignalHandler(r.enableSubreaper, r.notifySocket) tty, err := setupIO(process, rootuid, rootgid, config.Terminal, detach) if err != nil { r.destroy() @@ -336,10 +334,21 @@ func startContainer(context *cli.Context, spec *specs.Spec, create bool) (int, e if id == "" { return -1, errEmptyID } + + notifySocket := newNotifySocket(context, os.Getenv("NOTIFY_SOCKET"), id) + if notifySocket != nil { + notifySocket.setupSpec(context, spec) + } + container, err := createContainer(context, id, spec) if err != nil { return -1, err } + + if notifySocket != nil { + notifySocket.setupSocket() + } + // Support on-demand socket activation by passing file descriptors into the container init process. listenFDs := []*os.File{} if os.Getenv("LISTEN_FDS") != "" { @@ -350,6 +359,7 @@ func startContainer(context *cli.Context, spec *specs.Spec, create bool) (int, e shouldDestroy: true, container: container, listenFDs: listenFDs, + notifySocket: notifySocket, consoleSocket: context.String("console-socket"), detach: context.Bool("detach"), pidFile: context.String("pid-file"),