Skip to content

Commit f1d0dd8

Browse files
committed
runc create/run/exec: show fatal errors from init
In case early stage of runc init (nsenter) fails for some reason, it logs error(s) with FATAL log level, via bail(). The runc init log is read by a parent (runc create/run/exec) and is logged via normal logrus mechanism, which is all fine and dandy, except when `runc init` fails, we return the error from the parent (which is usually not too helpful, for example): runc run failed: unable to start container process: can't get final child's PID from pipe: EOF Now, the actual underlying error is from runc init and it was logged earlier; here's how full runc output looks like: FATA[0000] nsexec-1[3247792]: failed to unshare remaining namespaces: No space left on device FATA[0000] nsexec-0[3247790]: failed to sync with stage-1: next state ERRO[0000] runc run failed: unable to start container process: can't get final child's PID from pipe: EOF The problem is, upper level runtimes tend to ignore everything except the last line from runc, and thus error reported by e.g. docker is not very helpful. This patch tries to improve the situation by collecting FATAL errors from runc init and appending those to the error returned (instead of logging). With it, the above error will look like this: ERRO[0000] runc run failed: unable to start container process: can't get final child's PID from pipe: EOF; runc init error(s): nsexec-1[141549]: failed to unshare remaining namespaces: No space left on device; nsexec-0[141547]: failed to sync with stage-1: next state Yes, it is long and ugly, but at least the upper level runtime will report it. Signed-off-by: Kir Kolyshkin <kolyshkin@gmail.com> (cherry picked from commit f944cce) Signed-off-by: Aleksa Sarai <cyphar@cyphar.com>
1 parent 4615662 commit f1d0dd8

File tree

2 files changed

+40
-16
lines changed

2 files changed

+40
-16
lines changed

libcontainer/container_linux.go

Lines changed: 13 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -338,8 +338,6 @@ func (c *Container) start(process *Process) (retErr error) {
338338
// We do not need the cloned binaries once the process is spawned.
339339
defer process.closeClonedExes()
340340

341-
logsDone := parent.forwardChildLogs()
342-
343341
// Before starting "runc init", mark all non-stdio open files as O_CLOEXEC
344342
// to make sure we don't leak any files into "runc init". Any files to be
345343
// passed to "runc init" through ExtraFiles will get dup2'd by the Go
@@ -349,21 +347,28 @@ func (c *Container) start(process *Process) (retErr error) {
349347
if err := utils.CloseExecFrom(3); err != nil {
350348
return fmt.Errorf("unable to mark non-stdio fds as cloexec: %w", err)
351349
}
352-
if err := parent.start(); err != nil {
353-
return fmt.Errorf("unable to start container process: %w", err)
354-
}
355350

356-
if logsDone != nil {
351+
if logsDone := parent.forwardChildLogs(); logsDone != nil {
357352
defer func() {
358353
// Wait for log forwarder to finish. This depends on
359354
// runc init closing the _LIBCONTAINER_LOGPIPE log fd.
360355
err := <-logsDone
361-
if err != nil && retErr == nil {
362-
retErr = fmt.Errorf("unable to forward init logs: %w", err)
356+
if err != nil {
357+
// runc init errors are important; make sure retErr has them.
358+
err = fmt.Errorf("runc init error(s): %w", err)
359+
if retErr != nil {
360+
retErr = fmt.Errorf("%w; %w", retErr, err)
361+
} else {
362+
retErr = err
363+
}
363364
}
364365
}()
365366
}
366367

368+
if err := parent.start(); err != nil {
369+
return fmt.Errorf("unable to start container process: %w", err)
370+
}
371+
367372
if process.Init {
368373
c.fifo.Close()
369374
if c.config.HasHook(configs.Poststart) {

libcontainer/logs/logs.go

Lines changed: 27 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -2,12 +2,16 @@ package logs
22

33
import (
44
"bufio"
5+
"bytes"
56
"encoding/json"
7+
"errors"
68
"io"
79

810
"github.com/sirupsen/logrus"
911
)
1012

13+
var fatalsSep = []byte("; ")
14+
1115
func ForwardLogs(logPipe io.ReadCloser) chan error {
1216
done := make(chan error, 1)
1317
s := bufio.NewScanner(logPipe)
@@ -23,24 +27,33 @@ func ForwardLogs(logPipe io.ReadCloser) chan error {
2327
}
2428

2529
go func() {
30+
fatals := []byte{}
2631
for s.Scan() {
27-
processEntry(s.Bytes(), logger)
32+
fatals = processEntry(s.Bytes(), logger, fatals)
33+
}
34+
if err := s.Err(); err != nil {
35+
logrus.Errorf("error reading from log source: %v", err)
2836
}
2937
if err := logPipe.Close(); err != nil {
3038
logrus.Errorf("error closing log source: %v", err)
3139
}
32-
// The only error we want to return is when reading from
33-
// logPipe has failed.
34-
done <- s.Err()
40+
// The only error we return is fatal messages from runc init.
41+
var err error
42+
if len(fatals) > 0 {
43+
err = errors.New(string(bytes.TrimSuffix(fatals, fatalsSep)))
44+
}
45+
done <- err
3546
close(done)
3647
}()
3748

3849
return done
3950
}
4051

41-
func processEntry(text []byte, logger *logrus.Logger) {
52+
// processEntry parses the error and either logs it via the standard logger or,
53+
// if this is a fatal error, appends its text to fatals.
54+
func processEntry(text []byte, logger *logrus.Logger, fatals []byte) []byte {
4255
if len(text) == 0 {
43-
return
56+
return fatals
4457
}
4558

4659
var jl struct {
@@ -49,8 +62,14 @@ func processEntry(text []byte, logger *logrus.Logger) {
4962
}
5063
if err := json.Unmarshal(text, &jl); err != nil {
5164
logrus.Errorf("failed to decode %q to json: %v", text, err)
52-
return
65+
return fatals
5366
}
5467

55-
logger.Log(jl.Level, jl.Msg)
68+
if jl.Level == logrus.FatalLevel {
69+
fatals = append(fatals, jl.Msg...)
70+
fatals = append(fatals, fatalsSep...)
71+
} else {
72+
logger.Log(jl.Level, jl.Msg)
73+
}
74+
return fatals
5675
}

0 commit comments

Comments
 (0)