Openwall GNU/*/Linux - a small security-enhanced Linux distro for servers
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date: Wed, 15 Mar 2017 10:43:10 -0700
From: Thomas Garnier <thgarnie@...gle.com>
To: "H. Peter Anvin" <hpa@...or.com>
Cc: Andy Lutomirski <luto@...capital.net>, Ingo Molnar <mingo@...nel.org>, 
	Martin Schwidefsky <schwidefsky@...ibm.com>, Heiko Carstens <heiko.carstens@...ibm.com>, 
	David Howells <dhowells@...hat.com>, Arnd Bergmann <arnd@...db.de>, Al Viro <viro@...iv.linux.org.uk>, 
	Dave Hansen <dave.hansen@...el.com>, René Nyffenegger <mail@...enyffenegger.ch>, 
	Andrew Morton <akpm@...ux-foundation.org>, Kees Cook <keescook@...omium.org>, 
	"Paul E . McKenney" <paulmck@...ux.vnet.ibm.com>, Andy Lutomirski <luto@...nel.org>, 
	Ard Biesheuvel <ard.biesheuvel@...aro.org>, Nicolas Pitre <nicolas.pitre@...aro.org>, 
	Petr Mladek <pmladek@...e.com>, Sebastian Andrzej Siewior <bigeasy@...utronix.de>, 
	Sergey Senozhatsky <sergey.senozhatsky@...il.com>, Helge Deller <deller@....de>, 
	Rik van Riel <riel@...hat.com>, John Stultz <john.stultz@...aro.org>, 
	Thomas Gleixner <tglx@...utronix.de>, Oleg Nesterov <oleg@...hat.com>, 
	Stephen Smalley <sds@...ho.nsa.gov>, Pavel Tikhomirov <ptikhomirov@...tuozzo.com>, 
	Frederic Weisbecker <fweisbec@...il.com>, Stanislav Kinsburskiy <skinsbursky@...tuozzo.com>, 
	Ingo Molnar <mingo@...hat.com>, Paolo Bonzini <pbonzini@...hat.com>, 
	Dmitry Safonov <dsafonov@...tuozzo.com>, Borislav Petkov <bp@...en8.de>, 
	Josh Poimboeuf <jpoimboe@...hat.com>, Brian Gerst <brgerst@...il.com>, Jan Beulich <JBeulich@...e.com>, 
	Christian Borntraeger <borntraeger@...ibm.com>, Fenghua Yu <fenghua.yu@...el.com>, 
	He Chen <he.chen@...ux.intel.com>, Russell King <linux@...linux.org.uk>, 
	Vladimir Murzin <vladimir.murzin@....com>, Will Deacon <will.deacon@....com>, 
	Catalin Marinas <catalin.marinas@....com>, Mark Rutland <mark.rutland@....com>, 
	James Morse <james.morse@....com>, "David A . Long" <dave.long@...aro.org>, 
	Pratyush Anand <panand@...hat.com>, Laura Abbott <labbott@...hat.com>, 
	Andre Przywara <andre.przywara@....com>, Chris Metcalf <cmetcalf@...lanox.com>, 
	linux-s390 <linux-s390@...r.kernel.org>, LKML <linux-kernel@...r.kernel.org>, 
	Linux API <linux-api@...r.kernel.org>, "the arch/x86 maintainers" <x86@...nel.org>, 
	"linux-arm-kernel@...ts.infradead.org" <linux-arm-kernel@...ts.infradead.org>, 
	Kernel Hardening <kernel-hardening@...ts.openwall.com>
Subject: Re: [PATCH v3 2/4] x86/syscalls: Specific usage of verify_pre_usermode_state

Thanks for the feedback. I will look into inlining by default (looking
at code size on different arch), the updated patch for x86 in the
meantime:
===========

Implement specific usage of verify_pre_usermode_state for user-mode
returns for x86.
---
Based on next-20170308
---
 arch/x86/Kconfig                        |  1 +
 arch/x86/entry/common.c                 |  3 +++
 arch/x86/entry/entry_64.S               |  8 ++++++++
 arch/x86/include/asm/pgtable_64_types.h | 11 +++++++++++
 arch/x86/include/asm/processor.h        | 11 -----------
 5 files changed, 23 insertions(+), 11 deletions(-)

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 005df7c825f5..6d48e18e6f09 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -63,6 +63,7 @@ config X86
  select ARCH_MIGHT_HAVE_ACPI_PDC if ACPI
  select ARCH_MIGHT_HAVE_PC_PARPORT
  select ARCH_MIGHT_HAVE_PC_SERIO
+ select ARCH_NO_SYSCALL_VERIFY_PRE_USERMODE_STATE
  select ARCH_SUPPORTS_ATOMIC_RMW
  select ARCH_SUPPORTS_DEFERRED_STRUCT_PAGE_INIT
  select ARCH_SUPPORTS_NUMA_BALANCING if X86_64
diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c
index 370c42c7f046..525edbb77f03 100644
--- a/arch/x86/entry/common.c
+++ b/arch/x86/entry/common.c
@@ -22,6 +22,7 @@
 #include <linux/context_tracking.h>
 #include <linux/user-return-notifier.h>
 #include <linux/uprobes.h>
+#include <linux/syscalls.h>

 #include <asm/desc.h>
 #include <asm/traps.h>
@@ -180,6 +181,8 @@ __visible inline void
prepare_exit_to_usermode(struct pt_regs *regs)
  struct thread_info *ti = current_thread_info();
  u32 cached_flags;

+ verify_pre_usermode_state();
+
  if (IS_ENABLED(CONFIG_PROVE_LOCKING) && WARN_ON(!irqs_disabled()))
  local_irq_disable();

diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index d2b2a2948ffe..c079b010205c 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -218,6 +218,14 @@ entry_SYSCALL_64_fastpath:
  testl $_TIF_ALLWORK_MASK, TASK_TI_flags(%r11)
  jnz 1f

+ /*
+ * If address limit is not based on user-mode, jump to slow path for
+ * additional security checks.
+ */
+ movq $TASK_SIZE_MAX, %rcx
+ cmp %rcx, TASK_addr_limit(%r11)
+ jnz 1f
+
  LOCKDEP_SYS_EXIT
  TRACE_IRQS_ON /* user mode is traced as IRQs on */
  movq RIP(%rsp), %rcx
diff --git a/arch/x86/include/asm/pgtable_64_types.h
b/arch/x86/include/asm/pgtable_64_types.h
index 3a264200c62f..0fbbb79d058c 100644
--- a/arch/x86/include/asm/pgtable_64_types.h
+++ b/arch/x86/include/asm/pgtable_64_types.h
@@ -76,4 +76,15 @@ typedef struct { pteval_t pte; } pte_t;

 #define EARLY_DYNAMIC_PAGE_TABLES 64

+/*
+ * User space process size. 47bits minus one guard page.  The guard
+ * page is necessary on Intel CPUs: if a SYSCALL instruction is at
+ * the highest possible canonical userspace address, then that
+ * syscall will enter the kernel with a non-canonical return
+ * address, and SYSRET will explode dangerously.  We avoid this
+ * particular problem by preventing anything from being mapped
+ * at the maximum canonical address.
+ */
+#define TASK_SIZE_MAX ((_AC(1, UL) << 47) - PAGE_SIZE)
+
 #endif /* _ASM_X86_PGTABLE_64_DEFS_H */
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index f385eca5407a..9bc99d37133e 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -829,17 +829,6 @@ static inline void spin_lock_prefetch(const void *x)
 #define KSTK_ESP(task) (task_pt_regs(task)->sp)

 #else
-/*
- * User space process size. 47bits minus one guard page.  The guard
- * page is necessary on Intel CPUs: if a SYSCALL instruction is at
- * the highest possible canonical userspace address, then that
- * syscall will enter the kernel with a non-canonical return
- * address, and SYSRET will explode dangerously.  We avoid this
- * particular problem by preventing anything from being mapped
- * at the maximum canonical address.
- */
-#define TASK_SIZE_MAX ((1UL << 47) - PAGE_SIZE)
-
 /* This decides where the kernel will search for a free chunk of vm
  * space during mmap's.
  */
-- 
2.12.0.367.g23dc2f6d3c-goog


On Tue, Mar 14, 2017 at 10:53 AM, H. Peter Anvin <hpa@...or.com> wrote:
> On 03/14/17 09:51, Thomas Garnier wrote:
>>>
>>> I wanted to comment on that thing: why on earth isn't
>>> verify_pre_usermode_state() an inline?  Making it an out-of-line
>>> function adds pointless extra overhead to the C code when we are talking
>>> about a few instructions.
>>
>> Because outside of arch specific implementation it is called by each
>> syscall handler. it will increase the code size a lot.
>>
>
> Don't assume that.  On a lot of architectures a function call can be
> more expensive than a simple compare and branch, because the compiler
> has to assume a whole bunch of registers are lost at that point.
>
> Either way, don't penalize the common architectures for it.  Not okay.
>
>         -hpa
>



-- 
Thomas

Powered by blists - more mailing lists

Your e-mail address:

Powered by Openwall GNU/*/Linux - Powered by OpenVZ