From 0d0d6a75c01f1b073fddaa9e26a909ee171cde49 Mon Sep 17 00:00:00 2001 From: Cameron Nemo Date: Mon, 11 Feb 2019 17:53:28 -0800 Subject: [PATCH] lxc: fix CVE-2019-5736 --- srcpkgs/lxc/patches/CVE-2019-5736.patch | 399 ++++++++++++++++++++++++ srcpkgs/lxc/template | 2 +- 2 files changed, 400 insertions(+), 1 deletion(-) create mode 100644 srcpkgs/lxc/patches/CVE-2019-5736.patch diff --git a/srcpkgs/lxc/patches/CVE-2019-5736.patch b/srcpkgs/lxc/patches/CVE-2019-5736.patch new file mode 100644 index 0000000000..cb105e2912 --- /dev/null +++ b/srcpkgs/lxc/patches/CVE-2019-5736.patch @@ -0,0 +1,399 @@ +From 6400238d08cdf1ca20d49bafb85f4e224348bf9d Mon Sep 17 00:00:00 2001 +From: Christian Brauner +Date: Sat, 26 Jan 2019 01:19:29 +0100 +Subject: [PATCH] CVE-2019-5736 (runC): rexec callers as memfd +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Adam Iwaniuk and Borys Popławski discovered that an attacker can compromise the +runC host binary from inside a privileged runC container. As a result, this +could be exploited to gain root access on the host. runC is used as the default +runtime for containers with Docker, containerd, Podman, and CRI-O. + +The attack can be made when attaching to a running container or when starting a +container running a specially crafted image. For example, when runC attaches +to a container the attacker can trick it into executing itself. This could be +done by replacing the target binary inside the container with a custom binary +pointing back at the runC binary itself. As an example, if the target binary +was /bin/bash, this could be replaced with an executable script specifying the +interpreter path #!/proc/self/exe (/proc/self/exec is a symbolic link created +by the kernel for every process which points to the binary that was executed +for that process). As such when /bin/bash is executed inside the container, +instead the target of /proc/self/exe will be executed - which will point to the +runc binary on the host. The attacker can then proceed to write to the target +of /proc/self/exe to try and overwrite the runC binary on the host. However in +general, this will not succeed as the kernel will not permit it to be +overwritten whilst runC is executing. To overcome this, the attacker can +instead open a file descriptor to /proc/self/exe using the O_PATH flag and then +proceed to reopen the binary as O_WRONLY through /proc/self/fd/ and try to +write to it in a busy loop from a separate process. Ultimately it will succeed +when the runC binary exits. After this the runC binary is compromised and can +be used to attack other containers or the host itself. + +This attack is only possible with privileged containers since it requires root +privilege on the host to overwrite the runC binary. Unprivileged containers +with a non-identity ID mapping do not have the permission to write to the host +binary and therefore are unaffected by this attack. + +LXC is also impacted in a similar manner by this vulnerability, however as the +LXC project considers privileged containers to be unsafe no CVE has been +assigned for this issue for LXC. Quoting from the +https://linuxcontainers.org/lxc/security/ project's Security information page: + +"As privileged containers are considered unsafe, we typically will not consider +new container escape exploits to be security issues worthy of a CVE and quick +fix. We will however try to mitigate those issues so that accidental damage to +the host is prevented." + +To prevent this attack, LXC has been patched to create a temporary copy of the +calling binary itself when it starts or attaches to containers. To do this LXC +creates an anonymous, in-memory file using the memfd_create() system call and +copies itself into the temporary in-memory file, which is then sealed to +prevent further modifications. LXC then executes this sealed, in-memory file +instead of the original on-disk binary. Any compromising write operations from +a privileged container to the host LXC binary will then write to the temporary +in-memory binary and not to the host binary on-disk, preserving the integrity +of the host LXC binary. Also as the temporary, in-memory LXC binary is sealed, +writes to this will also fail. + +Note: memfd_create() was added to the Linux kernel in the 3.17 release. + +Signed-off-by: Christian Brauner +Co-Developed-by: Alesa Sarai +Acked-by: Serge Hallyn +Signed-off-by: Christian Brauner +--- + configure.ac | 12 +++ + src/lxc/Makefile.am | 4 + + src/lxc/file_utils.c | 41 ++++++++- + src/lxc/file_utils.h | 1 + + src/lxc/rexec.c | 181 +++++++++++++++++++++++++++++++++++++ + src/lxc/syscall_wrappers.h | 14 +++ + 6 files changed, 252 insertions(+), 1 deletion(-) + create mode 100644 src/lxc/rexec.c + +diff --git a/configure.ac b/configure.ac +index 8313b18d1..d43dabc0d 100644 +--- configure.ac ++++ configure.ac +@@ -746,6 +746,17 @@ AM_COND_IF([ENABLE_DLOG], + ]) + ]) + ++AC_ARG_ENABLE([memfd-rexec], ++ [AC_HELP_STRING([--enable-memfd-rexec], [enforce liblxc as a memfd to protect against certain symlink attacks [default=yes]])], ++ [], [enable_memfd_rexec=yes]) ++AM_CONDITIONAL([ENFORCE_MEMFD_REXEC], [test "x$enable_memfd_rexec" = "xyes"]) ++if test "x$enable_memfd_rexec" = "xyes"; then ++ AC_DEFINE([ENFORCE_MEMFD_REXEC], 1, [Rexec liblxc as memfd]) ++ AC_MSG_RESULT([yes]) ++else ++ AC_MSG_RESULT([no]) ++fi ++ + # Files requiring some variable expansion + AC_CONFIG_FILES([ + Makefile +@@ -974,6 +985,7 @@ Security features: + - Linux capabilities: $enable_capabilities + - seccomp: $enable_seccomp + - SELinux: $enable_selinux ++ - memfd rexec: $enable_memfd_rexec + + PAM: + - PAM module: $enable_pam +diff --git a/src/lxc/Makefile.am b/src/lxc/Makefile.am +index 6ba9ecad2..dc8aa3d77 100644 +--- src/lxc/Makefile.am ++++ src/lxc/Makefile.am +@@ -177,6 +177,10 @@ if !HAVE_STRLCAT + liblxc_la_SOURCES += ../include/strlcat.c ../include/strlcat.h + endif + ++if ENFORCE_MEMFD_REXEC ++liblxc_la_SOURCES += rexec.c ++endif ++ + AM_CFLAGS = -DLXCROOTFSMOUNT=\"$(LXCROOTFSMOUNT)\" \ + -DLXCPATH=\"$(LXCPATH)\" \ + -DLXC_GLOBAL_CONF=\"$(LXC_GLOBAL_CONF)\" \ +diff --git a/src/lxc/file_utils.c b/src/lxc/file_utils.c +index f89aa638d..930fd738a 100644 +--- src/lxc/file_utils.c ++++ src/lxc/file_utils.c +@@ -31,7 +31,7 @@ + #include "config.h" + #include "file_utils.h" + #include "macro.h" +-#include "string.h" ++#include "string_utils.h" + + int lxc_write_to_file(const char *filename, const void *buf, size_t count, + bool add_newline, mode_t mode) +@@ -327,3 +327,42 @@ ssize_t lxc_sendfile_nointr(int out_fd, int in_fd, off_t *offset, size_t count) + + return ret; + } ++ ++char *file_to_buf(char *path, size_t *length) ++{ ++ int fd; ++ char buf[PATH_MAX]; ++ char *copy = NULL; ++ ++ if (!length) ++ return NULL; ++ ++ fd = open(path, O_RDONLY | O_CLOEXEC); ++ if (fd < 0) ++ return NULL; ++ ++ *length = 0; ++ for (;;) { ++ int n; ++ char *old = copy; ++ ++ n = lxc_read_nointr(fd, buf, sizeof(buf)); ++ if (n < 0) ++ goto on_error; ++ if (!n) ++ break; ++ ++ copy = must_realloc(old, (*length + n) * sizeof(*old)); ++ memcpy(copy + *length, buf, n); ++ *length += n; ++ } ++ ++ close(fd); ++ return copy; ++ ++on_error: ++ close(fd); ++ free(copy); ++ ++ return NULL; ++} +diff --git a/src/lxc/file_utils.h b/src/lxc/file_utils.h +index 6361557a0..518a61af3 100644 +--- src/lxc/file_utils.h ++++ src/lxc/file_utils.h +@@ -55,5 +55,6 @@ extern bool is_fs_type(const struct statfs *fs, fs_type_magic magic_val); + extern FILE *fopen_cloexec(const char *path, const char *mode); + extern ssize_t lxc_sendfile_nointr(int out_fd, int in_fd, off_t *offset, + size_t count); ++extern char *file_to_buf(char *path, size_t *length); + + #endif /* __LXC_FILE_UTILS_H */ +diff --git a/src/lxc/rexec.c b/src/lxc/rexec.c +new file mode 100644 +index 000000000..396bd617f +--- /dev/null ++++ src/lxc/rexec.c +@@ -0,0 +1,181 @@ ++/* liblxcapi ++ * ++ * Copyright © 2019 Christian Brauner . ++ * Copyright © 2019 Canonical Ltd. ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License version 2, as ++ * published by the Free Software Foundation. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License along ++ * with this program; if not, write to the Free Software Foundation, Inc., ++ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. ++ */ ++ ++#ifndef _GNU_SOURCE ++#define _GNU_SOURCE 1 ++#endif ++#include ++#include ++#include ++#include ++ ++#include "config.h" ++#include "file_utils.h" ++#include "raw_syscalls.h" ++#include "string_utils.h" ++#include "syscall_wrappers.h" ++ ++#define LXC_MEMFD_REXEC_SEALS \ ++ (F_SEAL_SEAL | F_SEAL_SHRINK | F_SEAL_GROW | F_SEAL_WRITE) ++ ++static int push_vargs(char *data, int data_length, char ***output) ++{ ++ int num = 0; ++ char *cur = data; ++ ++ if (!data || *output) ++ return -1; ++ ++ *output = must_realloc(NULL, sizeof(**output)); ++ ++ while (cur < data + data_length) { ++ num++; ++ *output = must_realloc(*output, (num + 1) * sizeof(**output)); ++ ++ (*output)[num - 1] = cur; ++ cur += strlen(cur) + 1; ++ } ++ (*output)[num] = NULL; ++ return num; ++} ++ ++static int parse_exec_params(char ***argv, char ***envp) ++{ ++ int ret; ++ char *cmdline = NULL, *env = NULL; ++ size_t cmdline_size, env_size; ++ ++ cmdline = file_to_buf("/proc/self/cmdline", &cmdline_size); ++ if (!cmdline) ++ goto on_error; ++ ++ env = file_to_buf("/proc/self/environ", &env_size); ++ if (!env) ++ goto on_error; ++ ++ ret = push_vargs(cmdline, cmdline_size, argv); ++ if (ret <= 0) ++ goto on_error; ++ ++ ret = push_vargs(env, env_size, envp); ++ if (ret <= 0) ++ goto on_error; ++ ++ return 0; ++ ++on_error: ++ free(env); ++ free(cmdline); ++ ++ return -1; ++} ++ ++static int is_memfd(void) ++{ ++ int fd, saved_errno, seals; ++ ++ fd = open("/proc/self/exe", O_RDONLY | O_CLOEXEC); ++ if (fd < 0) ++ return -ENOTRECOVERABLE; ++ ++ seals = fcntl(fd, F_GET_SEALS); ++ saved_errno = errno; ++ close(fd); ++ errno = saved_errno; ++ if (seals < 0) ++ return -EINVAL; ++ ++ return seals == LXC_MEMFD_REXEC_SEALS; ++} ++ ++static void lxc_rexec_as_memfd(char **argv, char **envp, const char *memfd_name) ++{ ++ int saved_errno; ++ ssize_t bytes_sent; ++ int fd = -1, memfd = -1; ++ ++ memfd = memfd_create(memfd_name, MFD_ALLOW_SEALING | MFD_CLOEXEC); ++ if (memfd < 0) ++ return; ++ ++ fd = open("/proc/self/exe", O_RDONLY | O_CLOEXEC); ++ if (fd < 0) ++ goto on_error; ++ ++ /* sendfile() handles up to 2GB. */ ++ bytes_sent = lxc_sendfile_nointr(memfd, fd, NULL, LXC_SENDFILE_MAX); ++ saved_errno = errno; ++ close(fd); ++ errno = saved_errno; ++ if (bytes_sent < 0) ++ goto on_error; ++ ++ if (fcntl(memfd, F_ADD_SEALS, LXC_MEMFD_REXEC_SEALS)) ++ goto on_error; ++ ++ fexecve(memfd, argv, envp); ++ ++on_error: ++ saved_errno = errno; ++ close(memfd); ++ errno = saved_errno; ++} ++ ++static int lxc_rexec(const char *memfd_name) ++{ ++ int ret; ++ char **argv = NULL, **envp = NULL; ++ ++ ret = is_memfd(); ++ if (ret < 0 && ret == -ENOTRECOVERABLE) { ++ fprintf(stderr, ++ "%s - Failed to determine whether this is a memfd\n", ++ strerror(errno)); ++ return -1; ++ } else if (ret > 0) { ++ return 0; ++ } ++ ++ ret = parse_exec_params(&argv, &envp); ++ if (ret < 0) { ++ fprintf(stderr, ++ "%s - Failed to parse command line parameters\n", ++ strerror(errno)); ++ return -1; ++ } ++ ++ lxc_rexec_as_memfd(argv, envp, memfd_name); ++ fprintf(stderr, "%s - Failed to rexec as memfd\n", strerror(errno)); ++ return -1; ++} ++ ++/** ++ * This function will copy any binary that calls liblxc into a memory file and ++ * will use the memfd to rexecute the binary. This is done to prevent attacks ++ * through the /proc/self/exe symlink to corrupt the host binary when host and ++ * container are in the same user namespace or have set up an identity id ++ * mapping: CVE-2019-5736. ++ */ ++__attribute__((constructor)) static void liblxc_rexec(void) ++{ ++ if (lxc_rexec("liblxc")) { ++ fprintf(stderr, "Failed to re-execute liblxc via memory file descriptor\n"); ++ _exit(EXIT_FAILURE); ++ } ++} +diff --git a/src/lxc/syscall_wrappers.h b/src/lxc/syscall_wrappers.h +index 42d94db28..dca4d1571 100644 +--- src/lxc/syscall_wrappers.h ++++ src/lxc/syscall_wrappers.h +@@ -58,6 +58,20 @@ static inline long __keyctl(int cmd, unsigned long arg2, unsigned long arg3, + #define keyctl __keyctl + #endif + ++#ifndef F_LINUX_SPECIFIC_BASE ++#define F_LINUX_SPECIFIC_BASE 1024 ++#endif ++#ifndef F_ADD_SEALS ++#define F_ADD_SEALS (F_LINUX_SPECIFIC_BASE + 9) ++#define F_GET_SEALS (F_LINUX_SPECIFIC_BASE + 10) ++#endif ++#ifndef F_SEAL_SEAL ++#define F_SEAL_SEAL 0x0001 ++#define F_SEAL_SHRINK 0x0002 ++#define F_SEAL_GROW 0x0004 ++#define F_SEAL_WRITE 0x0008 ++#endif ++ + #ifndef HAVE_MEMFD_CREATE + static inline int memfd_create(const char *name, unsigned int flags) { + #ifndef __NR_memfd_create diff --git a/srcpkgs/lxc/template b/srcpkgs/lxc/template index c81a5d995d..0a61df9586 100644 --- a/srcpkgs/lxc/template +++ b/srcpkgs/lxc/template @@ -3,7 +3,7 @@ _desc="Linux Containers" pkgname=lxc version=3.0.3 -revision=2 +revision=3 build_style=gnu-configure configure_args="--enable-doc --enable-seccomp --enable-capabilities --enable-apparmor --with-distro=none