kernel-hardening - Re: [PATCH v1 01/27] x86/crypto: Adapt assembly for PIE support

Follow @Openwall on Twitter for new release announcements and other news
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <CAKv+Gu9XMnNA0UoGfFMQmC9=Ryh6dcOduxH+tq49bcdvBwhyQw@mail.gmail.com>
Date: Fri, 20 Oct 2017 09:28:00 +0100
From: Ard Biesheuvel <ard.biesheuvel@...aro.org>
To: Ingo Molnar <mingo@...nel.org>
Cc: Thomas Garnier <thgarnie@...gle.com>, Herbert Xu <herbert@...dor.apana.org.au>, 
	"David S . Miller" <davem@...emloft.net>, Thomas Gleixner <tglx@...utronix.de>, Ingo Molnar <mingo@...hat.com>, 
	"H . Peter Anvin" <hpa@...or.com>, Peter Zijlstra <peterz@...radead.org>, 
	Josh Poimboeuf <jpoimboe@...hat.com>, Arnd Bergmann <arnd@...db.de>, Kees Cook <keescook@...omium.org>, 
	Andrey Ryabinin <aryabinin@...tuozzo.com>, Matthias Kaehlcke <mka@...omium.org>, 
	Tom Lendacky <thomas.lendacky@....com>, Andy Lutomirski <luto@...nel.org>, 
	"Kirill A . Shutemov" <kirill.shutemov@...ux.intel.com>, Borislav Petkov <bp@...e.de>, 
	"Rafael J . Wysocki" <rjw@...ysocki.net>, Len Brown <len.brown@...el.com>, Pavel Machek <pavel@....cz>, 
	Juergen Gross <jgross@...e.com>, Chris Wright <chrisw@...s-sol.org>, 
	Alok Kataria <akataria@...are.com>, Rusty Russell <rusty@...tcorp.com.au>, Tejun Heo <tj@...nel.org>, 
	Christoph Lameter <cl@...ux.com>, Boris Ostrovsky <boris.ostrovsky@...cle.com>, 
	Paul Gortmaker <paul.gortmaker@...driver.com>, Andrew Morton <akpm@...ux-foundation.org>, 
	Alexey Dobriyan <adobriyan@...il.com>, "Paul E . McKenney" <paulmck@...ux.vnet.ibm.com>, 
	Nicolas Pitre <nicolas.pitre@...aro.org>, Borislav Petkov <bp@...en8.de>, 
	"Luis R . Rodriguez" <mcgrof@...nel.org>, Greg Kroah-Hartman <gregkh@...uxfoundation.org>, 
	Christopher Li <sparse@...isli.org>, Steven Rostedt <rostedt@...dmis.org>, 
	Jason Baron <jbaron@...mai.com>, Mika Westerberg <mika.westerberg@...ux.intel.com>, 
	Dou Liyang <douly.fnst@...fujitsu.com>, 
	"Rafael J . Wysocki" <rafael.j.wysocki@...el.com>, Lukas Wunner <lukas@...ner.de>, 
	Masahiro Yamada <yamada.masahiro@...ionext.com>, Alexei Starovoitov <ast@...nel.org>, 
	Daniel Borkmann <daniel@...earbox.net>, Markus Trippelsdorf <markus@...ppelsdorf.de>, 
	Paolo Bonzini <pbonzini@...hat.com>, Radim Krčmář <rkrcmar@...hat.com>, 
	Joerg Roedel <joro@...tes.org>, Rik van Riel <riel@...hat.com>, David Howells <dhowells@...hat.com>, 
	Waiman Long <longman@...hat.com>, Kyle Huey <me@...ehuey.com>, Jonathan Corbet <corbet@....net>, 
	Michal Hocko <mhocko@...e.com>, Peter Foley <pefoley2@...oley.com>, Paul Bolle <pebolle@...cali.nl>, 
	Jiri Kosina <jkosina@...e.cz>, "H . J . Lu" <hjl.tools@...il.com>, Rob Landley <rob@...dley.net>, 
	Baoquan He <bhe@...hat.com>, Jan H . Schönherr <jschoenh@...zon.de>, 
	Daniel Micay <danielmicay@...il.com>, "x86@...nel.org" <x86@...nel.org>, 
	"linux-crypto@...r.kernel.org" <linux-crypto@...r.kernel.org>, 
	"linux-kernel@...r.kernel.org" <linux-kernel@...r.kernel.org>, linux-pm <linux-pm@...r.kernel.org>, 
	virtualization@...ts.linux-foundation.org, 
	xen-devel <xen-devel@...ts.xenproject.org>, 
	"linux-arch@...r.kernel.org" <linux-arch@...r.kernel.org>, Linux-Sparse <linux-sparse@...r.kernel.org>, 
	KVM devel mailing list <kvm@...r.kernel.org>, Linux Doc Mailing List <linux-doc@...r.kernel.org>, 
	Kernel Hardening <kernel-hardening@...ts.openwall.com>
Subject: Re: [PATCH v1 01/27] x86/crypto: Adapt assembly for PIE support

On 20 October 2017 at 09:24, Ingo Molnar <mingo@...nel.org> wrote:
>
> * Thomas Garnier <thgarnie@...gle.com> wrote:
>
>> Change the assembly code to use only relative references of symbols for the
>> kernel to be PIE compatible.
>>
>> Position Independent Executable (PIE) support will allow to extended the
>> KASLR randomization range below the -2G memory limit.
>
>> diff --git a/arch/x86/crypto/aes-x86_64-asm_64.S b/arch/x86/crypto/aes-x86_64-asm_64.S
>> index 8739cf7795de..86fa068e5e81 100644
>> --- a/arch/x86/crypto/aes-x86_64-asm_64.S
>> +++ b/arch/x86/crypto/aes-x86_64-asm_64.S
>> @@ -48,8 +48,12 @@
>>  #define R10  %r10
>>  #define R11  %r11
>>
>> +/* Hold global for PIE suport */
>> +#define RBASE        %r12
>> +
>>  #define prologue(FUNC,KEY,B128,B192,r1,r2,r5,r6,r7,r8,r9,r10,r11) \
>>       ENTRY(FUNC);                    \
>> +     pushq   RBASE;                  \
>>       movq    r1,r2;                  \
>>       leaq    KEY+48(r8),r9;          \
>>       movq    r10,r11;                \
>> @@ -74,54 +78,63 @@
>>       movl    r6 ## E,4(r9);          \
>>       movl    r7 ## E,8(r9);          \
>>       movl    r8 ## E,12(r9);         \
>> +     popq    RBASE;                  \
>>       ret;                            \
>>       ENDPROC(FUNC);
>>
>> +#define round_mov(tab_off, reg_i, reg_o) \
>> +     leaq    tab_off(%rip), RBASE; \
>> +     movl    (RBASE,reg_i,4), reg_o;
>> +
>> +#define round_xor(tab_off, reg_i, reg_o) \
>> +     leaq    tab_off(%rip), RBASE; \
>> +     xorl    (RBASE,reg_i,4), reg_o;
>> +
>>  #define round(TAB,OFFSET,r1,r2,r3,r4,r5,r6,r7,r8,ra,rb,rc,rd) \
>>       movzbl  r2 ## H,r5 ## E;        \
>>       movzbl  r2 ## L,r6 ## E;        \
>> -     movl    TAB+1024(,r5,4),r5 ## E;\
>> +     round_mov(TAB+1024, r5, r5 ## E)\
>>       movw    r4 ## X,r2 ## X;        \
>> -     movl    TAB(,r6,4),r6 ## E;     \
>> +     round_mov(TAB, r6, r6 ## E)     \
>>       roll    $16,r2 ## E;            \
>>       shrl    $16,r4 ## E;            \
>>       movzbl  r4 ## L,r7 ## E;        \
>>       movzbl  r4 ## H,r4 ## E;        \
>>       xorl    OFFSET(r8),ra ## E;     \
>>       xorl    OFFSET+4(r8),rb ## E;   \
>> -     xorl    TAB+3072(,r4,4),r5 ## E;\
>> -     xorl    TAB+2048(,r7,4),r6 ## E;\
>> +     round_xor(TAB+3072, r4, r5 ## E)\
>> +     round_xor(TAB+2048, r7, r6 ## E)\
>>       movzbl  r1 ## L,r7 ## E;        \
>>       movzbl  r1 ## H,r4 ## E;        \
>> -     movl    TAB+1024(,r4,4),r4 ## E;\
>> +     round_mov(TAB+1024, r4, r4 ## E)\
>>       movw    r3 ## X,r1 ## X;        \
>>       roll    $16,r1 ## E;            \
>>       shrl    $16,r3 ## E;            \
>> -     xorl    TAB(,r7,4),r5 ## E;     \
>> +     round_xor(TAB, r7, r5 ## E)     \
>>       movzbl  r3 ## L,r7 ## E;        \
>>       movzbl  r3 ## H,r3 ## E;        \
>> -     xorl    TAB+3072(,r3,4),r4 ## E;\
>> -     xorl    TAB+2048(,r7,4),r5 ## E;\
>> +     round_xor(TAB+3072, r3, r4 ## E)\
>> +     round_xor(TAB+2048, r7, r5 ## E)\
>>       movzbl  r1 ## L,r7 ## E;        \
>>       movzbl  r1 ## H,r3 ## E;        \
>>       shrl    $16,r1 ## E;            \
>> -     xorl    TAB+3072(,r3,4),r6 ## E;\
>> -     movl    TAB+2048(,r7,4),r3 ## E;\
>> +     round_xor(TAB+3072, r3, r6 ## E)\
>> +     round_mov(TAB+2048, r7, r3 ## E)\
>>       movzbl  r1 ## L,r7 ## E;        \
>>       movzbl  r1 ## H,r1 ## E;        \
>> -     xorl    TAB+1024(,r1,4),r6 ## E;\
>> -     xorl    TAB(,r7,4),r3 ## E;     \
>> +     round_xor(TAB+1024, r1, r6 ## E)\
>> +     round_xor(TAB, r7, r3 ## E)     \
>>       movzbl  r2 ## H,r1 ## E;        \
>>       movzbl  r2 ## L,r7 ## E;        \
>>       shrl    $16,r2 ## E;            \
>> -     xorl    TAB+3072(,r1,4),r3 ## E;\
>> -     xorl    TAB+2048(,r7,4),r4 ## E;\
>> +     round_xor(TAB+3072, r1, r3 ## E)\
>> +     round_xor(TAB+2048, r7, r4 ## E)\
>>       movzbl  r2 ## H,r1 ## E;        \
>>       movzbl  r2 ## L,r2 ## E;        \
>>       xorl    OFFSET+8(r8),rc ## E;   \
>>       xorl    OFFSET+12(r8),rd ## E;  \
>> -     xorl    TAB+1024(,r1,4),r3 ## E;\
>> -     xorl    TAB(,r2,4),r4 ## E;
>> +     round_xor(TAB+1024, r1, r3 ## E)\
>> +     round_xor(TAB, r2, r4 ## E)
>
> This appears to be adding unconditional overhead to a function that was moved to
> assembly to improve its performance.
>

I did some benchmarking on this code a while ago and, interestingly,
it was slower than the generic C implementation (on a Pentium E2200),
so we may want to consider whether we still need this driver in the
first place.
Confused about mailing lists and their use? Read about mailing lists on Wikipedia and check out these guidelines on proper formatting of your messages.