kernel-hardening - Re: [PATCH v11 06/12] seccomp: add system call filtering using BPF

Follow @Openwall on Twitter for new release announcements and other news
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <CAGXu5jKJ7pYeULM=CvifpQ8jUoyH6Ska2Znx9tGjhz0tn5kZmQ@mail.gmail.com>
Date: Mon, 27 Feb 2012 10:55:10 -0800
From: Kees Cook <keescook@...omium.org>
To: Eric Paris <eparis@...hat.com>
Cc: Will Drewry <wad@...omium.org>, Kees Cook <kees@...ntu.com>, linux-kernel@...r.kernel.org, 
	linux-arch@...r.kernel.org, linux-doc@...r.kernel.org, 
	kernel-hardening@...ts.openwall.com, netdev@...r.kernel.org, x86@...nel.org, 
	arnd@...db.de, davem@...emloft.net, hpa@...or.com, mingo@...hat.com, 
	oleg@...hat.com, peterz@...radead.org, rdunlap@...otime.net, 
	mcgrathr@...omium.org, tglx@...utronix.de, luto@....edu, 
	serge.hallyn@...onical.com, djm@...drot.org, scarybeasts@...il.com, 
	indan@....nu, pmoore@...hat.com, akpm@...ux-foundation.org, corbet@....net, 
	eric.dumazet@...il.com, markus@...omium.org, coreyb@...ux.vnet.ibm.com
Subject: Re: [PATCH v11 06/12] seccomp: add system call filtering using BPF

On Mon, Feb 27, 2012 at 8:49 AM, Eric Paris <eparis@...hat.com> wrote:
> On Mon, 2012-02-27 at 10:23 -0600, Will Drewry wrote:
>> On Sun, Feb 26, 2012 at 2:28 PM, Kees Cook <kees@...ntu.com> wrote:
>> > On Fri, Feb 24, 2012 at 09:21:45PM -0600, Will Drewry wrote:
>> >> diff --git a/kernel/seccomp.c b/kernel/seccomp.c
>> >> index e8d76c5..25e8296 100644
>> >> --- a/kernel/seccomp.c
>> >> +++ b/kernel/seccomp.c
>> >> [...]
>> >> +static void seccomp_filter_log_failure(int syscall)
>> >> +{
>> >> +     int compat = 0;
>> >> +#ifdef CONFIG_COMPAT
>> >> +     compat = is_compat_task();
>> >> +#endif
>> >> +     pr_info("%s[%d]: %ssystem call %d blocked at 0x%lx\n",
>> >> +             current->comm, task_pid_nr(current),
>> >> +             (compat ? "compat " : ""),
>> >> +             syscall, KSTK_EIP(current));
>> >> +}
>> >> [...]
>> >> +#ifdef CONFIG_SECCOMP_FILTER
>> >> +     case SECCOMP_MODE_FILTER:
>> >> +             if (seccomp_run_filters(this_syscall) == SECCOMP_RET_ALLOW)
>> >> +                     return;
>> >> +             seccomp_filter_log_failure(this_syscall);
>> >> +             exit_code = SIGSYS;
>> >> +             break;
>> >> +#endif
>> >>       default:
>> >>               BUG();
>> >>       }
>> >> @@ -56,7 +324,7 @@ void __secure_computing(int this_syscall)
>> >>       dump_stack();
>> >>  #endif
>> >>       audit_seccomp(this_syscall);
>> >> -     do_exit(SIGKILL);
>> >> +     do_exit(exit_code);
>> >>  }
>> >
>> > I think the seccomp_filter_log_failure() use is redundant with the
>> > audit_seccomp call.  Here's a possible reorganization of the logging...
>>
>> Cool - a comment below:
>>
>> > From: Kees Cook <keescook@...omium.org>
>> > Date: Sun, 26 Feb 2012 11:56:12 -0800
>> > Subject: [PATCH] seccomp: improve audit logging details
>> >
>> > This consolidates the seccomp filter error logging path and adds more
>> > details to the audit log.
>> >
>> > Signed-off-by: Kees Cook <keescook@...omium.org>
>> > ---
>> >  include/linux/audit.h |    8 ++++----
>> >  kernel/auditsc.c      |    9 +++++++--
>> >  kernel/seccomp.c      |   15 +--------------
>> >  3 files changed, 12 insertions(+), 20 deletions(-)
>> >
>> > diff --git a/include/linux/audit.h b/include/linux/audit.h
>> > index 9ff7a2c..5aa6cfc 100644
>> > --- a/include/linux/audit.h
>> > +++ b/include/linux/audit.h
>> > @@ -463,7 +463,7 @@ extern void audit_putname(const char *name);
>> >  extern void __audit_inode(const char *name, const struct dentry *dentry);
>> >  extern void __audit_inode_child(const struct dentry *dentry,
>> >                                const struct inode *parent);
>> > -extern void __audit_seccomp(unsigned long syscall);
>> > +extern void __audit_seccomp(unsigned long syscall, long signr);
>> >  extern void __audit_ptrace(struct task_struct *t);
>> >
>> >  static inline int audit_dummy_context(void)
>> > @@ -508,10 +508,10 @@ static inline void audit_inode_child(const struct dentry *dentry,
>> >  }
>> >  void audit_core_dumps(long signr);
>> >
>> > -static inline void audit_seccomp(unsigned long syscall)
>> > +static inline void audit_seccomp(unsigned long syscall, long signr)
>> >  {
>> >        if (unlikely(!audit_dummy_context()))
>> > -               __audit_seccomp(syscall);
>> > +               __audit_seccomp(syscall, signr);
>> >  }
>> >
>> >  static inline void audit_ptrace(struct task_struct *t)
>> > @@ -634,7 +634,7 @@ extern int audit_signals;
>> >  #define audit_inode(n,d) do { (void)(d); } while (0)
>> >  #define audit_inode_child(i,p) do { ; } while (0)
>> >  #define audit_core_dumps(i) do { ; } while (0)
>> > -#define audit_seccomp(i) do { ; } while (0)
>> > +#define audit_seccomp(i,s) do { ; } while (0)
>> >  #define auditsc_get_stamp(c,t,s) (0)
>> >  #define audit_get_loginuid(t) (-1)
>> >  #define audit_get_sessionid(t) (-1)
>> > diff --git a/kernel/auditsc.c b/kernel/auditsc.c
>> > index af1de0f..74652fe 100644
>> > --- a/kernel/auditsc.c
>> > +++ b/kernel/auditsc.c
>> > @@ -67,6 +67,7 @@
>> >  #include <linux/syscalls.h>
>> >  #include <linux/capability.h>
>> >  #include <linux/fs_struct.h>
>> > +#include <linux/compat.h>
>> >
>> >  #include "audit.h"
>> >
>> > @@ -2710,13 +2711,17 @@ void audit_core_dumps(long signr)
>> >        audit_log_end(ab);
>> >  }
>> >
>> > -void __audit_seccomp(unsigned long syscall)
>> > +void __audit_seccomp(unsigned long syscall, long signr)
>> >  {
>> >        struct audit_buffer *ab;
>> >
>> >        ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_ANOM_ABEND);
>> > -       audit_log_abend(ab, "seccomp", SIGKILL);
>> > +       audit_log_abend(ab, "seccomp", signr);
>> >        audit_log_format(ab, " syscall=%ld", syscall);
>> > +#ifdef CONFIG_COMPAT
>> > +       audit_log_format(ab, " compat=%d", is_compat_task());
>> > +#endif
>>
>> Should this just use syscall_get_arch to get the AUDIT_ARCH now? :)
>
> I'm waffling on this one, but I'm leaning towards not including compat
> at all.  If you include it, yes, you should use the generic function.
>
> If you have CONFIG_AUDITSC and started audit you are going to get this,
> along with a0-a4, in a separate but associated audit record.  Thus you
> get all the interesting/relevant info.  Without CONFIG_AUDITSC and
> running auditd you get compat, but nothing else.  Why is compat so
> interesting?

You mean as used in audit_log_exit() ? It looks like that depends on a
lot of state cached in __audit_syscall_entry() and finally triggered
in __audit_syscall_exit() (and ..._free()). I don't think this is
really want seccomp wants to be involved in.

By CONFIG_AUDITSC, you mean CONFIG_AUDITSYSCALL? Without that set,
audit_seccomp is a no-op.

The reason compat needs to be reported (or rather, arch) is because
just reporting syscall is ambiguous. It either needs arch or compat to
distinguish it.

> This patch would duplicate the arch=field from that record (calling it
> compat).  So if we are going to duplicate it in another record, we
> should at least call it the same thing (arch=%x)

Right, I agree with Will, this should be arch=%x via
syscall_get_arch() if it's going to happen here.

> My current thinking, and I'm not settled would be to include syscall,
> a0-a4 and arch in the record only if !CONFIG_AUDITSC.  (ip doesn't show
> up elsewhere, so that makes sense here)
>
> It might be annoying to have to find the info in the right record, but
> if you use the auparse audit library tools, it should 'Just Work'...

Given that this is more about logging an abend-like condition, I don't
think it should need to depend on having all syscall auditing enabled
for the process just to get the arch. It really feels like a distinct
condition. But maybe I'm misunderstanding something about how
auditsc.c does its work.

>> > +       audit_log_format(ab, " ip=0x%lx", KSTK_EIP(current));
>> >        audit_log_end(ab);
>> >  }
>> >
>> > diff --git a/kernel/seccomp.c b/kernel/seccomp.c
>> > index 5aabc3c..40af83f 100644
>> > --- a/kernel/seccomp.c
>> > +++ b/kernel/seccomp.c
>> > @@ -57,18 +57,6 @@ struct seccomp_filter {
>> >        struct sock_filter insns[];
>> >  };
>> >
>> > -static void seccomp_filter_log_failure(int syscall)
>> > -{
>> > -       int compat = 0;
>> > -#ifdef CONFIG_COMPAT
>> > -       compat = is_compat_task();
>> > -#endif
>> > -       pr_info("%s[%d]: %ssystem call %d blocked at 0x%lx\n",
>> > -               current->comm, task_pid_nr(current),
>> > -               (compat ? "compat " : ""),
>> > -               syscall, KSTK_EIP(current));
>> > -}
>> > -
>> >  /**
>> >  * get_u32 - returns a u32 offset into data
>> >  * @data: a unsigned 64 bit value
>> > @@ -378,7 +366,6 @@ int __secure_computing_int(int this_syscall)
>> >                default:
>> >                        break;
>> >                }
>> > -               seccomp_filter_log_failure(this_syscall);
>> >                exit_code = SIGSYS;
>> >                break;
>> >        }
>> > @@ -390,7 +377,7 @@ int __secure_computing_int(int this_syscall)
>> >  #ifdef SECCOMP_DEBUG
>> >        dump_stack();
>> >  #endif
>> > -       audit_seccomp(this_syscall);
>> > +       audit_seccomp(this_syscall, exit_code);
>> >        do_exit(exit_code);
>> >        return -1;      /* never reached */
>> >  }
>> > --
>> > 1.7.0.4
>>
>> I'll pull this into the series if that's okay with you?

Let me send a modified version that doesn't include arch, just to
avoid that can of worms for the moment. A separate patch can add that
later, along with all the get_audit_arch() routines for the other
architectures. My original intent was to avoid the duplication between
pr_info() and audit_seccomp(). :)

-Kees

-- 
Kees Cook
ChromeOS Security
Confused about mailing lists and their use? Read about mailing lists on Wikipedia and check out these guidelines on proper formatting of your messages.