From 0994249a5ec4e363bfcf9af58a87a722e9a3a31b Mon Sep 17 00:00:00 2001
From: Aleksa Sarai <cyphar@cyphar.com>
Date: Tue, 26 Dec 2023 23:53:07 +1100
Subject: [PATCH 2/8] init: verify after chdir that cwd is inside the container

If a file descriptor of a directory in the host's mount namespace is
leaked to runc init, a malicious config.json could use /proc/self/fd/...
as a working directory to allow for host filesystem access after the
container runs. This can also be exploited by a container process if it
knows that an administrator will use "runc exec --cwd" and the target
--cwd (the attacker can change that cwd to be a symlink pointing to
/proc/self/fd/... and wait for the process to exec and then snoop on
/proc/$pid/cwd to get access to the host). The former issue can lead to
a critical vulnerability in Docker and Kubernetes, while the latter is a
container breakout.

We can (ab)use the fact that getcwd(2) on Linux detects this exact case,
and getcwd(3) and Go's Getwd() return an error as a result. Thus, if we
just do os.Getwd() after chdir we can easily detect this case and error
out.

In runc 1.1, a /sys/fs/cgroup handle happens to be leaked to "runc
init", making this exploitable. On runc main it just so happens that the
leaked /sys/fs/cgroup gets clobbered and thus this is only consistently
exploitable for runc 1.1.

Fixes: GHSA-xr7r-f8xq-vfvv CVE-2024-21626
Co-developed-by: lifubang <lifubang@acmcoder.com>
Signed-off-by: lifubang <lifubang@acmcoder.com>
[refactored the implementation and added more comments]
Signed-off-by: Aleksa Sarai <cyphar@cyphar.com>
---
 libcontainer/init_linux.go               | 31 ++++++++++++++++++++++++
 libcontainer/integration/seccomp_test.go | 20 +++++++--------
 2 files changed, 41 insertions(+), 10 deletions(-)

diff --git a/libcontainer/init_linux.go b/libcontainer/init_linux.go
index 5b88c71fc83a..d9f18139f54b 100644
--- a/libcontainer/init_linux.go
+++ b/libcontainer/init_linux.go
@@ -8,6 +8,7 @@ import (
 	"io"
 	"net"
 	"os"
+	"path/filepath"
 	"strings"
 	"unsafe"
 
@@ -135,6 +136,32 @@ func populateProcessEnvironment(env []string) error {
 	return nil
 }
 
+// verifyCwd ensures that the current directory is actually inside the mount
+// namespace root of the current process.
+func verifyCwd() error {
+	// getcwd(2) on Linux detects if cwd is outside of the rootfs of the
+	// current mount namespace root, and in that case prefixes "(unreachable)"
+	// to the returned string. glibc's getcwd(3) and Go's Getwd() both detect
+	// when this happens and return ENOENT rather than returning a non-absolute
+	// path. In both cases we can therefore easily detect if we have an invalid
+	// cwd by checking the return value of getcwd(3). See getcwd(3) for more
+	// details, and CVE-2024-21626 for the security issue that motivated this
+	// check.
+	//
+	// We have to use unix.Getwd() here because os.Getwd() has a workaround for
+	// $PWD which involves doing stat(.), which can fail if the current
+	// directory is inaccessible to the container process.
+	if wd, err := unix.Getwd(); errors.Is(err, unix.ENOENT) {
+		return errors.New("current working directory is outside of container mount namespace root -- possible container breakout detected")
+	} else if err != nil {
+		return fmt.Errorf("failed to verify if current working directory is safe: %w", err)
+	} else if !filepath.IsAbs(wd) {
+		// We shouldn't ever hit this, but check just in case.
+		return fmt.Errorf("current working directory is not absolute -- possible container breakout detected: cwd is %q", wd)
+	}
+	return nil
+}
+
 // finalizeNamespace drops the caps, sets the correct user
 // and working dir, and closes any leaked file descriptors
 // before executing the command inside the namespace
@@ -193,6 +220,10 @@ func finalizeNamespace(config *initConfig) error {
 			return fmt.Errorf("chdir to cwd (%q) set in config.json failed: %w", config.Cwd, err)
 		}
 	}
+	// Make sure our final working directory is inside the container.
+	if err := verifyCwd(); err != nil {
+		return err
+	}
 	if err := system.ClearKeepCaps(); err != nil {
 		return fmt.Errorf("unable to clear keep caps: %w", err)
 	}
diff --git a/libcontainer/integration/seccomp_test.go b/libcontainer/integration/seccomp_test.go
index 31092a0a5d21..ecdfa7957df1 100644
--- a/libcontainer/integration/seccomp_test.go
+++ b/libcontainer/integration/seccomp_test.go
@@ -13,7 +13,7 @@ import (
 	libseccomp "github.com/seccomp/libseccomp-golang"
 )
 
-func TestSeccompDenyGetcwdWithErrno(t *testing.T) {
+func TestSeccompDenySyslogWithErrno(t *testing.T) {
 	if testing.Short() {
 		return
 	}
@@ -25,7 +25,7 @@ func TestSeccompDenyGetcwdWithErrno(t *testing.T) {
 		DefaultAction: configs.Allow,
 		Syscalls: []*configs.Syscall{
 			{
-				Name:     "getcwd",
+				Name:     "syslog",
 				Action:   configs.Errno,
 				ErrnoRet: &errnoRet,
 			},
@@ -39,7 +39,7 @@ func TestSeccompDenyGetcwdWithErrno(t *testing.T) {
 	buffers := newStdBuffers()
 	pwd := &libcontainer.Process{
 		Cwd:    "/",
-		Args:   []string{"pwd"},
+		Args:   []string{"dmesg"},
 		Env:    standardEnvironment,
 		Stdin:  buffers.Stdin,
 		Stdout: buffers.Stdout,
@@ -65,17 +65,17 @@ func TestSeccompDenyGetcwdWithErrno(t *testing.T) {
 	}
 
 	if exitCode == 0 {
-		t.Fatalf("Getcwd should fail with negative exit code, instead got %d!", exitCode)
+		t.Fatalf("dmesg should fail with negative exit code, instead got %d!", exitCode)
 	}
 
-	expected := "pwd: getcwd: No such process"
+	expected := "dmesg: klogctl: No such process"
 	actual := strings.Trim(buffers.Stderr.String(), "\n")
 	if actual != expected {
 		t.Fatalf("Expected output %s but got %s\n", expected, actual)
 	}
 }
 
-func TestSeccompDenyGetcwd(t *testing.T) {
+func TestSeccompDenySyslog(t *testing.T) {
 	if testing.Short() {
 		return
 	}
@@ -85,7 +85,7 @@ func TestSeccompDenyGetcwd(t *testing.T) {
 		DefaultAction: configs.Allow,
 		Syscalls: []*configs.Syscall{
 			{
-				Name:   "getcwd",
+				Name:   "syslog",
 				Action: configs.Errno,
 			},
 		},
@@ -98,7 +98,7 @@ func TestSeccompDenyGetcwd(t *testing.T) {
 	buffers := newStdBuffers()
 	pwd := &libcontainer.Process{
 		Cwd:    "/",
-		Args:   []string{"pwd"},
+		Args:   []string{"dmesg"},
 		Env:    standardEnvironment,
 		Stdin:  buffers.Stdin,
 		Stdout: buffers.Stdout,
@@ -124,10 +124,10 @@ func TestSeccompDenyGetcwd(t *testing.T) {
 	}
 
 	if exitCode == 0 {
-		t.Fatalf("Getcwd should fail with negative exit code, instead got %d!", exitCode)
+		t.Fatalf("dmesg should fail with negative exit code, instead got %d!", exitCode)
 	}
 
-	expected := "pwd: getcwd: Operation not permitted"
+	expected := "dmesg: klogctl: Operation not permitted"
 	actual := strings.Trim(buffers.Stderr.String(), "\n")
 	if actual != expected {
 		t.Fatalf("Expected output %s but got %s\n", expected, actual)
-- 
2.43.0