Skip to content

Commit

Permalink
---
Browse files Browse the repository at this point in the history
yaml
---
r: 323998
b: refs/heads/master
c: 841e360
h: refs/heads/master
v: v3
  • Loading branch information
Suresh Siddha authored and H. Peter Anvin committed Sep 18, 2012
1 parent 8e07788 commit 3eebb15
Show file tree
Hide file tree
Showing 4 changed files with 30 additions and 143 deletions.
2 changes: 1 addition & 1 deletion [refs]
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
---
refs/heads/master: 9c1c3fac53378c9782c18f80107965578d7b7167
refs/heads/master: 841e3604d35aa70d399146abdc526d8c89a2c2f5
56 changes: 8 additions & 48 deletions trunk/arch/x86/include/asm/xor_32.h
Original file line number Diff line number Diff line change
Expand Up @@ -534,38 +534,6 @@ static struct xor_block_template xor_block_p5_mmx = {
* Copyright (C) 1999 Zach Brown (with obvious credit due Ingo)
*/

#define XMMS_SAVE \
do { \
preempt_disable(); \
cr0 = read_cr0(); \
clts(); \
asm volatile( \
"movups %%xmm0,(%0) ;\n\t" \
"movups %%xmm1,0x10(%0) ;\n\t" \
"movups %%xmm2,0x20(%0) ;\n\t" \
"movups %%xmm3,0x30(%0) ;\n\t" \
: \
: "r" (xmm_save) \
: "memory"); \
} while (0)

#define XMMS_RESTORE \
do { \
asm volatile( \
"sfence ;\n\t" \
"movups (%0),%%xmm0 ;\n\t" \
"movups 0x10(%0),%%xmm1 ;\n\t" \
"movups 0x20(%0),%%xmm2 ;\n\t" \
"movups 0x30(%0),%%xmm3 ;\n\t" \
: \
: "r" (xmm_save) \
: "memory"); \
write_cr0(cr0); \
preempt_enable(); \
} while (0)

#define ALIGN16 __attribute__((aligned(16)))

#define OFFS(x) "16*("#x")"
#define PF_OFFS(x) "256+16*("#x")"
#define PF0(x) " prefetchnta "PF_OFFS(x)"(%1) ;\n"
Expand All @@ -587,10 +555,8 @@ static void
xor_sse_2(unsigned long bytes, unsigned long *p1, unsigned long *p2)
{
unsigned long lines = bytes >> 8;
char xmm_save[16*4] ALIGN16;
int cr0;

XMMS_SAVE;
kernel_fpu_begin();

asm volatile(
#undef BLOCK
Expand Down Expand Up @@ -633,18 +599,16 @@ xor_sse_2(unsigned long bytes, unsigned long *p1, unsigned long *p2)
:
: "memory");

XMMS_RESTORE;
kernel_fpu_end();
}

static void
xor_sse_3(unsigned long bytes, unsigned long *p1, unsigned long *p2,
unsigned long *p3)
{
unsigned long lines = bytes >> 8;
char xmm_save[16*4] ALIGN16;
int cr0;

XMMS_SAVE;
kernel_fpu_begin();

asm volatile(
#undef BLOCK
Expand Down Expand Up @@ -694,18 +658,16 @@ xor_sse_3(unsigned long bytes, unsigned long *p1, unsigned long *p2,
:
: "memory" );

XMMS_RESTORE;
kernel_fpu_end();
}

static void
xor_sse_4(unsigned long bytes, unsigned long *p1, unsigned long *p2,
unsigned long *p3, unsigned long *p4)
{
unsigned long lines = bytes >> 8;
char xmm_save[16*4] ALIGN16;
int cr0;

XMMS_SAVE;
kernel_fpu_begin();

asm volatile(
#undef BLOCK
Expand Down Expand Up @@ -762,18 +724,16 @@ xor_sse_4(unsigned long bytes, unsigned long *p1, unsigned long *p2,
:
: "memory" );

XMMS_RESTORE;
kernel_fpu_end();
}

static void
xor_sse_5(unsigned long bytes, unsigned long *p1, unsigned long *p2,
unsigned long *p3, unsigned long *p4, unsigned long *p5)
{
unsigned long lines = bytes >> 8;
char xmm_save[16*4] ALIGN16;
int cr0;

XMMS_SAVE;
kernel_fpu_begin();

/* Make sure GCC forgets anything it knows about p4 or p5,
such that it won't pass to the asm volatile below a
Expand Down Expand Up @@ -850,7 +810,7 @@ xor_sse_5(unsigned long bytes, unsigned long *p1, unsigned long *p2,
like assuming they have some legal value. */
asm("" : "=r" (p4), "=r" (p5));

XMMS_RESTORE;
kernel_fpu_end();
}

static struct xor_block_template xor_block_pIII_sse = {
Expand Down
61 changes: 9 additions & 52 deletions trunk/arch/x86/include/asm/xor_64.h
Original file line number Diff line number Diff line change
Expand Up @@ -34,41 +34,7 @@
* no advantages to be gotten from x86-64 here anyways.
*/

typedef struct {
unsigned long a, b;
} __attribute__((aligned(16))) xmm_store_t;

/* Doesn't use gcc to save the XMM registers, because there is no easy way to
tell it to do a clts before the register saving. */
#define XMMS_SAVE \
do { \
preempt_disable(); \
asm volatile( \
"movq %%cr0,%0 ;\n\t" \
"clts ;\n\t" \
"movups %%xmm0,(%1) ;\n\t" \
"movups %%xmm1,0x10(%1) ;\n\t" \
"movups %%xmm2,0x20(%1) ;\n\t" \
"movups %%xmm3,0x30(%1) ;\n\t" \
: "=&r" (cr0) \
: "r" (xmm_save) \
: "memory"); \
} while (0)

#define XMMS_RESTORE \
do { \
asm volatile( \
"sfence ;\n\t" \
"movups (%1),%%xmm0 ;\n\t" \
"movups 0x10(%1),%%xmm1 ;\n\t" \
"movups 0x20(%1),%%xmm2 ;\n\t" \
"movups 0x30(%1),%%xmm3 ;\n\t" \
"movq %0,%%cr0 ;\n\t" \
: \
: "r" (cr0), "r" (xmm_save) \
: "memory"); \
preempt_enable(); \
} while (0)
#include <asm/i387.h>

#define OFFS(x) "16*("#x")"
#define PF_OFFS(x) "256+16*("#x")"
Expand All @@ -91,10 +57,8 @@ static void
xor_sse_2(unsigned long bytes, unsigned long *p1, unsigned long *p2)
{
unsigned int lines = bytes >> 8;
unsigned long cr0;
xmm_store_t xmm_save[4];

XMMS_SAVE;
kernel_fpu_begin();

asm volatile(
#undef BLOCK
Expand Down Expand Up @@ -135,19 +99,16 @@ xor_sse_2(unsigned long bytes, unsigned long *p1, unsigned long *p2)
: [inc] "r" (256UL)
: "memory");

XMMS_RESTORE;
kernel_fpu_end();
}

static void
xor_sse_3(unsigned long bytes, unsigned long *p1, unsigned long *p2,
unsigned long *p3)
{
unsigned int lines = bytes >> 8;
xmm_store_t xmm_save[4];
unsigned long cr0;

XMMS_SAVE;

kernel_fpu_begin();
asm volatile(
#undef BLOCK
#define BLOCK(i) \
Expand Down Expand Up @@ -194,18 +155,16 @@ xor_sse_3(unsigned long bytes, unsigned long *p1, unsigned long *p2,
[p1] "+r" (p1), [p2] "+r" (p2), [p3] "+r" (p3)
: [inc] "r" (256UL)
: "memory");
XMMS_RESTORE;
kernel_fpu_end();
}

static void
xor_sse_4(unsigned long bytes, unsigned long *p1, unsigned long *p2,
unsigned long *p3, unsigned long *p4)
{
unsigned int lines = bytes >> 8;
xmm_store_t xmm_save[4];
unsigned long cr0;

XMMS_SAVE;
kernel_fpu_begin();

asm volatile(
#undef BLOCK
Expand Down Expand Up @@ -261,18 +220,16 @@ xor_sse_4(unsigned long bytes, unsigned long *p1, unsigned long *p2,
: [inc] "r" (256UL)
: "memory" );

XMMS_RESTORE;
kernel_fpu_end();
}

static void
xor_sse_5(unsigned long bytes, unsigned long *p1, unsigned long *p2,
unsigned long *p3, unsigned long *p4, unsigned long *p5)
{
unsigned int lines = bytes >> 8;
xmm_store_t xmm_save[4];
unsigned long cr0;

XMMS_SAVE;
kernel_fpu_begin();

asm volatile(
#undef BLOCK
Expand Down Expand Up @@ -336,7 +293,7 @@ xor_sse_5(unsigned long bytes, unsigned long *p1, unsigned long *p2,
: [inc] "r" (256UL)
: "memory");

XMMS_RESTORE;
kernel_fpu_end();
}

static struct xor_block_template xor_block_sse = {
Expand Down
54 changes: 12 additions & 42 deletions trunk/arch/x86/include/asm/xor_avx.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,32 +20,6 @@
#include <linux/compiler.h>
#include <asm/i387.h>

#define ALIGN32 __aligned(32)

#define YMM_SAVED_REGS 4

#define YMMS_SAVE \
do { \
preempt_disable(); \
cr0 = read_cr0(); \
clts(); \
asm volatile("vmovaps %%ymm0, %0" : "=m" (ymm_save[0]) : : "memory"); \
asm volatile("vmovaps %%ymm1, %0" : "=m" (ymm_save[32]) : : "memory"); \
asm volatile("vmovaps %%ymm2, %0" : "=m" (ymm_save[64]) : : "memory"); \
asm volatile("vmovaps %%ymm3, %0" : "=m" (ymm_save[96]) : : "memory"); \
} while (0);

#define YMMS_RESTORE \
do { \
asm volatile("sfence" : : : "memory"); \
asm volatile("vmovaps %0, %%ymm3" : : "m" (ymm_save[96])); \
asm volatile("vmovaps %0, %%ymm2" : : "m" (ymm_save[64])); \
asm volatile("vmovaps %0, %%ymm1" : : "m" (ymm_save[32])); \
asm volatile("vmovaps %0, %%ymm0" : : "m" (ymm_save[0])); \
write_cr0(cr0); \
preempt_enable(); \
} while (0);

#define BLOCK4(i) \
BLOCK(32 * i, 0) \
BLOCK(32 * (i + 1), 1) \
Expand All @@ -60,10 +34,9 @@ do { \

static void xor_avx_2(unsigned long bytes, unsigned long *p0, unsigned long *p1)
{
unsigned long cr0, lines = bytes >> 9;
char ymm_save[32 * YMM_SAVED_REGS] ALIGN32;
unsigned long lines = bytes >> 9;

YMMS_SAVE
kernel_fpu_begin();

while (lines--) {
#undef BLOCK
Expand All @@ -82,16 +55,15 @@ do { \
p1 = (unsigned long *)((uintptr_t)p1 + 512);
}

YMMS_RESTORE
kernel_fpu_end();
}

static void xor_avx_3(unsigned long bytes, unsigned long *p0, unsigned long *p1,
unsigned long *p2)
{
unsigned long cr0, lines = bytes >> 9;
char ymm_save[32 * YMM_SAVED_REGS] ALIGN32;
unsigned long lines = bytes >> 9;

YMMS_SAVE
kernel_fpu_begin();

while (lines--) {
#undef BLOCK
Expand All @@ -113,16 +85,15 @@ do { \
p2 = (unsigned long *)((uintptr_t)p2 + 512);
}

YMMS_RESTORE
kernel_fpu_end();
}

static void xor_avx_4(unsigned long bytes, unsigned long *p0, unsigned long *p1,
unsigned long *p2, unsigned long *p3)
{
unsigned long cr0, lines = bytes >> 9;
char ymm_save[32 * YMM_SAVED_REGS] ALIGN32;
unsigned long lines = bytes >> 9;

YMMS_SAVE
kernel_fpu_begin();

while (lines--) {
#undef BLOCK
Expand All @@ -147,16 +118,15 @@ do { \
p3 = (unsigned long *)((uintptr_t)p3 + 512);
}

YMMS_RESTORE
kernel_fpu_end();
}

static void xor_avx_5(unsigned long bytes, unsigned long *p0, unsigned long *p1,
unsigned long *p2, unsigned long *p3, unsigned long *p4)
{
unsigned long cr0, lines = bytes >> 9;
char ymm_save[32 * YMM_SAVED_REGS] ALIGN32;
unsigned long lines = bytes >> 9;

YMMS_SAVE
kernel_fpu_begin();

while (lines--) {
#undef BLOCK
Expand Down Expand Up @@ -184,7 +154,7 @@ do { \
p4 = (unsigned long *)((uintptr_t)p4 + 512);
}

YMMS_RESTORE
kernel_fpu_end();
}

static struct xor_block_template xor_block_avx = {
Expand Down

0 comments on commit 3eebb15

Please sign in to comment.