From 37d8425d78842feb02d19423e4d73e7c59f11914 Mon Sep 17 00:00:00 2001 From: Fenghua Yu Date: Tue, 17 May 2011 15:29:18 -0700 Subject: [PATCH] --- yaml --- r: 245520 b: refs/heads/master c: 2f19e06ac30771c7cb96fd61d8aeacfa74dac21c h: refs/heads/master v: v3 --- [refs] | 2 +- trunk/arch/x86/lib/memset_64.S | 54 ++++++++++++++++++++++++++-------- 2 files changed, 43 insertions(+), 13 deletions(-) diff --git a/[refs] b/[refs] index 9ab68ad84461..68f9fc1f4119 100644 --- a/[refs] +++ b/[refs] @@ -1,2 +1,2 @@ --- -refs/heads/master: 057e05c1d6440117875f455e59da8691e08f65d5 +refs/heads/master: 2f19e06ac30771c7cb96fd61d8aeacfa74dac21c diff --git a/trunk/arch/x86/lib/memset_64.S b/trunk/arch/x86/lib/memset_64.S index 09d344269652..79bd454b78a3 100644 --- a/trunk/arch/x86/lib/memset_64.S +++ b/trunk/arch/x86/lib/memset_64.S @@ -2,9 +2,13 @@ #include #include +#include +#include /* - * ISO C memset - set a memory block to a byte value. + * ISO C memset - set a memory block to a byte value. This function uses fast + * string to get better performance than the original function. The code is + * simpler and shorter than the orignal function as well. * * rdi destination * rsi value (char) @@ -31,6 +35,28 @@ .Lmemset_e: .previous +/* + * ISO C memset - set a memory block to a byte value. This function uses + * enhanced rep stosb to override the fast string function. + * The code is simpler and shorter than the fast string function as well. + * + * rdi destination + * rsi value (char) + * rdx count (bytes) + * + * rax original destination + */ + .section .altinstr_replacement, "ax", @progbits +.Lmemset_c_e: + movq %rdi,%r9 + movb %sil,%al + movl %edx,%ecx + rep stosb + movq %r9,%rax + ret +.Lmemset_e_e: + .previous + ENTRY(memset) ENTRY(__memset) CFI_STARTPROC @@ -112,16 +138,20 @@ ENTRY(__memset) ENDPROC(memset) ENDPROC(__memset) - /* Some CPUs run faster using the string instructions. - It is also a lot simpler. Use this when possible */ - -#include - + /* Some CPUs support enhanced REP MOVSB/STOSB feature. + * It is recommended to use this when possible. + * + * If enhanced REP MOVSB/STOSB feature is not available, use fast string + * instructions. + * + * Otherwise, use original memset function. + * + * In .altinstructions section, ERMS feature is placed after REG_GOOD + * feature to implement the right patch order. + */ .section .altinstructions,"a" - .align 8 - .quad memset - .quad .Lmemset_c - .word X86_FEATURE_REP_GOOD - .byte .Lfinal - memset - .byte .Lmemset_e - .Lmemset_c + altinstruction_entry memset,.Lmemset_c,X86_FEATURE_REP_GOOD,\ + .Lfinal-memset,.Lmemset_e-.Lmemset_c + altinstruction_entry memset,.Lmemset_c_e,X86_FEATURE_ERMS, \ + .Lfinal-memset,.Lmemset_e_e-.Lmemset_c_e .previous