Skip to content

Commit

Permalink
---
Browse files Browse the repository at this point in the history
yaml
---
r: 272284
b: refs/heads/master
c: e827bb0
h: refs/heads/master
v: v3
  • Loading branch information
Jussi Kivilinna authored and Herbert Xu committed Oct 21, 2011
1 parent a716876 commit 1f643e8
Show file tree
Hide file tree
Showing 2 changed files with 99 additions and 101 deletions.
2 changes: 1 addition & 1 deletion [refs]
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
---
refs/heads/master: fad8fa4782fde8afffc16b2b907b7f5bdbf03133
refs/heads/master: e827bb09c815955d5d5f0ddf98483a7efd04f55b
198 changes: 98 additions & 100 deletions trunk/arch/x86/crypto/blowfish-x86_64-asm_64.S
Original file line number Diff line number Diff line change
Expand Up @@ -56,50 +56,41 @@

#define RT0 %rbp
#define RT1 %rsi
#define RT2 %r8
#define RT3 %r9

#define RT0d %ebp
#define RT1d %esi
#define RT2d %r8d
#define RT3d %r9d

#define RK0 %r8
#define RK1 %r9
#define RK2 %r10
#define RK3 %r11

#define RK0d %r8d
#define RK1d %r9d
#define RK2d %r10d
#define RK3d %r11d

#define RKEY %r12
#define RKEY %r10

/***********************************************************************
* 1-way blowfish
***********************************************************************/
#define F(x, k) \
rorq $16, x; \
movzbl x ## bh, RT0d; \
movzbl x ## bl, RT1d; \
rolq $16, x; \
movl s0(CTX,RT0,4), k ## d; \
addl s1(CTX,RT1,4), k ## d; \
movzbl x ## bh, RT0d; \
movzbl x ## bl, RT1d; \
rolq $32, x; \
xorl s2(CTX,RT0,4), k ## d; \
addl s3(CTX,RT1,4), k ## d; \
xorq k, x;
#define F() \
rorq $16, RX0; \
movzbl RX0bh, RT0d; \
movzbl RX0bl, RT1d; \
rolq $16, RX0; \
movl s0(CTX,RT0,4), RT0d; \
addl s1(CTX,RT1,4), RT0d; \
movzbl RX0bh, RT1d; \
movzbl RX0bl, RT2d; \
rolq $32, RX0; \
xorl s2(CTX,RT1,4), RT0d; \
addl s3(CTX,RT2,4), RT0d; \
xorq RT0, RX0;

#define add_roundkey_enc(n) \
xorq p+4*(n)(CTX), RX0;

#define round_enc(n) \
add_roundkey_enc(n); \
\
F(RX0, RK0); \
F(RX0, RK0);

#define round_final_enc(n) \
xorq p+4*(n)(CTX), RX0;
F(); \
F();

#define add_roundkey_dec(n) \
movq p+4*(n-1)(CTX), RT0; \
Expand All @@ -109,8 +100,8 @@
#define round_dec(n) \
add_roundkey_dec(n); \
\
F(RX0, RK0); \
F(RX0, RK0); \
F(); \
F(); \

#define read_block() \
movq (RIO), RX0; \
Expand All @@ -130,16 +121,15 @@
.type __blowfish_enc_blk,@function;

__blowfish_enc_blk:
// input:
// %rdi: ctx, CTX
// %rsi: dst
// %rdx: src
// %rcx: bool xor
pushq %rbp;
pushq %rbx;

pushq %rsi;
pushq %rcx;
/* input:
* %rdi: ctx, CTX
* %rsi: dst
* %rdx: src
* %rcx: bool, if true: xor output
*/
movq %rbp, %r11;

movq %rsi, %r10;
movq %rdx, RIO;

read_block();
Expand All @@ -154,38 +144,31 @@ __blowfish_enc_blk:
round_enc(14);
add_roundkey_enc(16);

popq %rbp;
popq RIO;
movq %r11, %rbp;

test %bpl, %bpl;
movq %r10, RIO;
test %cl, %cl;
jnz __enc_xor;

write_block();

__enc_ret:
popq %rbx;
popq %rbp;

ret;

__enc_xor:
xor_block();

jmp __enc_ret;
ret;

.align 8
.global blowfish_dec_blk
.type blowfish_dec_blk,@function;

blowfish_dec_blk:
// input:
// %rdi: ctx, CTX
// %rsi: dst
// %rdx: src
pushq %rbp;
pushq %rbx;

pushq %rsi;
/* input:
* %rdi: ctx, CTX
* %rsi: dst
* %rdx: src
*/
movq %rbp, %r11;

movq %rsi, %r10;
movq %rdx, RIO;

read_block();
Expand All @@ -200,17 +183,33 @@ blowfish_dec_blk:
round_dec(3);
add_roundkey_dec(1);

popq RIO;
movq %r10, RIO;
write_block();

popq %rbx;
popq %rbp;
movq %r11, %rbp;

ret;

/**********************************************************************
4-way blowfish, four blocks parallel
**********************************************************************/

/* F() for 4-way. Slower when used alone/1-way, but faster when used
* parallel/4-way (tested on AMD Phenom II & Intel Xeon E7330).
*/
#define F4(x) \
movzbl x ## bh, RT1d; \
movzbl x ## bl, RT3d; \
rorq $16, x; \
movzbl x ## bh, RT0d; \
movzbl x ## bl, RT2d; \
rorq $16, x; \
movl s0(CTX,RT0,4), RT0d; \
addl s1(CTX,RT2,4), RT0d; \
xorl s2(CTX,RT1,4), RT0d; \
addl s3(CTX,RT3,4), RT0d; \
xorq RT0, x;

#define add_preloaded_roundkey4() \
xorq RKEY, RX0; \
xorq RKEY, RX1; \
Expand All @@ -227,15 +226,15 @@ blowfish_dec_blk:
#define round_enc4(n) \
add_roundkey_enc4(n); \
\
F(RX0, RK0); \
F(RX1, RK1); \
F(RX2, RK2); \
F(RX3, RK3); \
F4(RX0); \
F4(RX1); \
F4(RX2); \
F4(RX3); \
\
F(RX0, RK0); \
F(RX1, RK1); \
F(RX2, RK2); \
F(RX3, RK3);
F4(RX0); \
F4(RX1); \
F4(RX2); \
F4(RX3);

#define preload_roundkey_dec(n) \
movq p+4*((n)-1)(CTX), RKEY; \
Expand All @@ -248,15 +247,15 @@ blowfish_dec_blk:
#define round_dec4(n) \
add_roundkey_dec4(n); \
\
F(RX0, RK0); \
F(RX1, RK1); \
F(RX2, RK2); \
F(RX3, RK3); \
F4(RX0); \
F4(RX1); \
F4(RX2); \
F4(RX3); \
\
F(RX0, RK0); \
F(RX1, RK1); \
F(RX2, RK2); \
F(RX3, RK3);
F4(RX0); \
F4(RX1); \
F4(RX2); \
F4(RX3);

#define read_block4() \
movq (RIO), RX0; \
Expand Down Expand Up @@ -306,18 +305,19 @@ blowfish_dec_blk:
.type __blowfish_enc_blk_4way,@function;

__blowfish_enc_blk_4way:
// input:
// %rdi: ctx, CTX
// %rsi: dst
// %rdx: src
// %rcx: bool xor
/* input:
* %rdi: ctx, CTX
* %rsi: dst
* %rdx: src
* %rcx: bool, if true: xor output
*/
pushq %rbp;
pushq %rbx;
pushq RKEY;
pushq %rcx;

preload_roundkey_enc(0);

pushq %rsi;
pushq %rcx;
movq %rsi, %r11;
movq %rdx, RIO;

read_block4();
Expand All @@ -333,40 +333,39 @@ __blowfish_enc_blk_4way:
add_preloaded_roundkey4();

popq %rbp;
popq RIO;
movq %r11, RIO;

test %bpl, %bpl;
jnz __enc_xor4;

write_block4();

__enc_ret4:
popq RKEY;
popq %rbx;
popq %rbp;

ret;

__enc_xor4:
xor_block4();

jmp __enc_ret4;
popq %rbx;
popq %rbp;
ret;

.align 8
.global blowfish_dec_blk_4way
.type blowfish_dec_blk_4way,@function;

blowfish_dec_blk_4way:
// input:
// %rdi: ctx, CTX
// %rsi: dst
// %rdx: src
/* input:
* %rdi: ctx, CTX
* %rsi: dst
* %rdx: src
*/
pushq %rbp;
pushq %rbx;
pushq RKEY;
preload_roundkey_dec(17);

pushq %rsi;
movq %rsi, %r11;
movq %rdx, RIO;

read_block4();
Expand All @@ -381,10 +380,9 @@ blowfish_dec_blk_4way:
round_dec4(3);
add_preloaded_roundkey4();

popq RIO;
movq %r11, RIO;
write_block4();

popq RKEY;
popq %rbx;
popq %rbp;

Expand Down

0 comments on commit 1f643e8

Please sign in to comment.