Skip to content

Commit

Permalink
include/uapi/linux/byteorder, swab: force inlining of some byteswap o…
Browse files Browse the repository at this point in the history
…perations

Sometimes gcc mysteriously doesn't inline
very small functions we expect to be inlined. See

    https://gcc.gnu.org/bugzilla/show_bug.cgi?id=66122

With this .config:
http://busybox.net/~vda/kernel_config_OPTIMIZE_INLINING_and_Os,
the following functions get deinlined many times.
Examples of disassembly:

<get_unaligned_be16> (12 copies, 51 calls):
       66 8b 07                mov    (%rdi),%ax
       55                      push   %rbp
       48 89 e5                mov    %rsp,%rbp
       86 e0                   xchg   %ah,%al
       5d                      pop    %rbp
       c3                      retq

<get_unaligned_be32> (12 copies, 135 calls):
       8b 07                   mov    (%rdi),%eax
       55                      push   %rbp
       48 89 e5                mov    %rsp,%rbp
       0f c8                   bswap  %eax
       5d                      pop    %rbp
       c3                      retq

<get_unaligned_be64> (2 copies, 20 calls):
       48 8b 07                mov    (%rdi),%rax
       55                      push   %rbp
       48 89 e5                mov    %rsp,%rbp
       48 0f c8                bswap  %rax
       5d                      pop    %rbp
       c3                      retq

<__swab16p> (16 copies, 146 calls):
       55                      push   %rbp
       89 f8                   mov    %edi,%eax
       86 e0                   xchg   %ah,%al
       48 89 e5                mov    %rsp,%rbp
       5d                      pop    %rbp
       c3                      retq

<__swab32p> (43 copies, ~560 calls):
       55                      push   %rbp
       89 f8                   mov    %edi,%eax
       0f c8                   bswap  %eax
       48 89 e5                mov    %rsp,%rbp
       5d                      pop    %rbp
       c3                      retq

<__swab64p> (21 copies, 119 calls):
       55                      push   %rbp
       48 89 f8                mov    %rdi,%rax
       48 0f c8                bswap  %rax
       48 89 e5                mov    %rsp,%rbp
       5d                      pop    %rbp
       c3                      retq

<__swab32s> (6 copies, 47 calls):
       8b 07                   mov    (%rdi),%eax
       55                      push   %rbp
       48 89 e5                mov    %rsp,%rbp
       0f c8                   bswap  %eax
       89 07                   mov    %eax,(%rdi)
       5d                      pop    %rbp
       c3                      retq

This patch fixes this via s/inline/__always_inline/.
Code size decrease after the patch is ~4.5k:

    text     data      bss       dec     hex filename
92202377 20826112 36417536 149446025 8e85d89 vmlinux
92197848 20826112 36417536 149441496 8e84bd8 vmlinux5_swap_after

Signed-off-by: Denys Vlasenko <dvlasenk@redhat.com>
Acked-by: Ingo Molnar <mingo@kernel.org>
Cc: Thomas Graf <tgraf@suug.ch>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: David Rientjes <rientjes@google.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
  • Loading branch information
Denys Vlasenko authored and Linus Torvalds committed Mar 17, 2016
1 parent a644fdf commit bc27fb6
Show file tree
Hide file tree
Showing 3 changed files with 29 additions and 29 deletions.
24 changes: 12 additions & 12 deletions include/uapi/linux/byteorder/big_endian.h
Original file line number Diff line number Diff line change
Expand Up @@ -40,51 +40,51 @@
#define __cpu_to_be16(x) ((__force __be16)(__u16)(x))
#define __be16_to_cpu(x) ((__force __u16)(__be16)(x))

static inline __le64 __cpu_to_le64p(const __u64 *p)
static __always_inline __le64 __cpu_to_le64p(const __u64 *p)
{
return (__force __le64)__swab64p(p);
}
static inline __u64 __le64_to_cpup(const __le64 *p)
static __always_inline __u64 __le64_to_cpup(const __le64 *p)
{
return __swab64p((__u64 *)p);
}
static inline __le32 __cpu_to_le32p(const __u32 *p)
static __always_inline __le32 __cpu_to_le32p(const __u32 *p)
{
return (__force __le32)__swab32p(p);
}
static inline __u32 __le32_to_cpup(const __le32 *p)
static __always_inline __u32 __le32_to_cpup(const __le32 *p)
{
return __swab32p((__u32 *)p);
}
static inline __le16 __cpu_to_le16p(const __u16 *p)
static __always_inline __le16 __cpu_to_le16p(const __u16 *p)
{
return (__force __le16)__swab16p(p);
}
static inline __u16 __le16_to_cpup(const __le16 *p)
static __always_inline __u16 __le16_to_cpup(const __le16 *p)
{
return __swab16p((__u16 *)p);
}
static inline __be64 __cpu_to_be64p(const __u64 *p)
static __always_inline __be64 __cpu_to_be64p(const __u64 *p)
{
return (__force __be64)*p;
}
static inline __u64 __be64_to_cpup(const __be64 *p)
static __always_inline __u64 __be64_to_cpup(const __be64 *p)
{
return (__force __u64)*p;
}
static inline __be32 __cpu_to_be32p(const __u32 *p)
static __always_inline __be32 __cpu_to_be32p(const __u32 *p)
{
return (__force __be32)*p;
}
static inline __u32 __be32_to_cpup(const __be32 *p)
static __always_inline __u32 __be32_to_cpup(const __be32 *p)
{
return (__force __u32)*p;
}
static inline __be16 __cpu_to_be16p(const __u16 *p)
static __always_inline __be16 __cpu_to_be16p(const __u16 *p)
{
return (__force __be16)*p;
}
static inline __u16 __be16_to_cpup(const __be16 *p)
static __always_inline __u16 __be16_to_cpup(const __be16 *p)
{
return (__force __u16)*p;
}
Expand Down
24 changes: 12 additions & 12 deletions include/uapi/linux/byteorder/little_endian.h
Original file line number Diff line number Diff line change
Expand Up @@ -40,51 +40,51 @@
#define __cpu_to_be16(x) ((__force __be16)__swab16((x)))
#define __be16_to_cpu(x) __swab16((__force __u16)(__be16)(x))

static inline __le64 __cpu_to_le64p(const __u64 *p)
static __always_inline __le64 __cpu_to_le64p(const __u64 *p)
{
return (__force __le64)*p;
}
static inline __u64 __le64_to_cpup(const __le64 *p)
static __always_inline __u64 __le64_to_cpup(const __le64 *p)
{
return (__force __u64)*p;
}
static inline __le32 __cpu_to_le32p(const __u32 *p)
static __always_inline __le32 __cpu_to_le32p(const __u32 *p)
{
return (__force __le32)*p;
}
static inline __u32 __le32_to_cpup(const __le32 *p)
static __always_inline __u32 __le32_to_cpup(const __le32 *p)
{
return (__force __u32)*p;
}
static inline __le16 __cpu_to_le16p(const __u16 *p)
static __always_inline __le16 __cpu_to_le16p(const __u16 *p)
{
return (__force __le16)*p;
}
static inline __u16 __le16_to_cpup(const __le16 *p)
static __always_inline __u16 __le16_to_cpup(const __le16 *p)
{
return (__force __u16)*p;
}
static inline __be64 __cpu_to_be64p(const __u64 *p)
static __always_inline __be64 __cpu_to_be64p(const __u64 *p)
{
return (__force __be64)__swab64p(p);
}
static inline __u64 __be64_to_cpup(const __be64 *p)
static __always_inline __u64 __be64_to_cpup(const __be64 *p)
{
return __swab64p((__u64 *)p);
}
static inline __be32 __cpu_to_be32p(const __u32 *p)
static __always_inline __be32 __cpu_to_be32p(const __u32 *p)
{
return (__force __be32)__swab32p(p);
}
static inline __u32 __be32_to_cpup(const __be32 *p)
static __always_inline __u32 __be32_to_cpup(const __be32 *p)
{
return __swab32p((__u32 *)p);
}
static inline __be16 __cpu_to_be16p(const __u16 *p)
static __always_inline __be16 __cpu_to_be16p(const __u16 *p)
{
return (__force __be16)__swab16p(p);
}
static inline __u16 __be16_to_cpup(const __be16 *p)
static __always_inline __u16 __be16_to_cpup(const __be16 *p)
{
return __swab16p((__u16 *)p);
}
Expand Down
10 changes: 5 additions & 5 deletions include/uapi/linux/swab.h
Original file line number Diff line number Diff line change
Expand Up @@ -151,7 +151,7 @@ static inline __attribute_const__ __u32 __fswahb32(__u32 val)
* __swab16p - return a byteswapped 16-bit value from a pointer
* @p: pointer to a naturally-aligned 16-bit value
*/
static inline __u16 __swab16p(const __u16 *p)
static __always_inline __u16 __swab16p(const __u16 *p)
{
#ifdef __arch_swab16p
return __arch_swab16p(p);
Expand All @@ -164,7 +164,7 @@ static inline __u16 __swab16p(const __u16 *p)
* __swab32p - return a byteswapped 32-bit value from a pointer
* @p: pointer to a naturally-aligned 32-bit value
*/
static inline __u32 __swab32p(const __u32 *p)
static __always_inline __u32 __swab32p(const __u32 *p)
{
#ifdef __arch_swab32p
return __arch_swab32p(p);
Expand All @@ -177,7 +177,7 @@ static inline __u32 __swab32p(const __u32 *p)
* __swab64p - return a byteswapped 64-bit value from a pointer
* @p: pointer to a naturally-aligned 64-bit value
*/
static inline __u64 __swab64p(const __u64 *p)
static __always_inline __u64 __swab64p(const __u64 *p)
{
#ifdef __arch_swab64p
return __arch_swab64p(p);
Expand Down Expand Up @@ -232,7 +232,7 @@ static inline void __swab16s(__u16 *p)
* __swab32s - byteswap a 32-bit value in-place
* @p: pointer to a naturally-aligned 32-bit value
*/
static inline void __swab32s(__u32 *p)
static __always_inline void __swab32s(__u32 *p)
{
#ifdef __arch_swab32s
__arch_swab32s(p);
Expand All @@ -245,7 +245,7 @@ static inline void __swab32s(__u32 *p)
* __swab64s - byteswap a 64-bit value in-place
* @p: pointer to a naturally-aligned 64-bit value
*/
static inline void __swab64s(__u64 *p)
static __always_inline void __swab64s(__u64 *p)
{
#ifdef __arch_swab64s
__arch_swab64s(p);
Expand Down

0 comments on commit bc27fb6

Please sign in to comment.