-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
net: filter: BPF 'JIT' compiler for PPC64
An implementation of a code generator for BPF programs to speed up packet filtering on PPC64, inspired by Eric Dumazet's x86-64 version. Filter code is generated as an ABI-compliant function in module_alloc()'d mem with stackframe & prologue/epilogue generated if required (simple filters don't need anything more than an li/blr). The filter's local variables, M[], live in registers. Supports all BPF opcodes, although "complicated" loads from negative packet offsets (e.g. SKF_LL_OFF) are not yet supported. There are a couple of further optimisations left for future work; many-pass assembly with branch-reach reduction and a register allocator to push M[] variables into volatile registers would improve the code quality further. This currently supports big-endian 64-bit PowerPC only (but is fairly simple to port to PPC32 or LE!). Enabled in the same way as x86-64: echo 1 > /proc/sys/net/core/bpf_jit_enable Or, enabled with extra debug output: echo 2 > /proc/sys/net/core/bpf_jit_enable Signed-off-by: Matt Evans <matt@ozlabs.org> Acked-by: Eric Dumazet <eric.dumazet@gmail.com> Signed-off-by: David S. Miller <davem@davemloft.net>
- Loading branch information
Matt Evans
authored and
David S. Miller
committed
Jul 21, 2011
1 parent
3aeb7d2
commit 0ca87f0
Showing
7 changed files
with
1,106 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
# | ||
# Arch-specific network modules | ||
# | ||
obj-$(CONFIG_BPF_JIT) += bpf_jit_64.o bpf_jit_comp.o |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,227 @@ | ||
/* bpf_jit.h: BPF JIT compiler for PPC64 | ||
* | ||
* Copyright 2011 Matt Evans <matt@ozlabs.org>, IBM Corporation | ||
* | ||
* This program is free software; you can redistribute it and/or | ||
* modify it under the terms of the GNU General Public License | ||
* as published by the Free Software Foundation; version 2 | ||
* of the License. | ||
*/ | ||
#ifndef _BPF_JIT_H | ||
#define _BPF_JIT_H | ||
|
||
#define BPF_PPC_STACK_LOCALS 32 | ||
#define BPF_PPC_STACK_BASIC (48+64) | ||
#define BPF_PPC_STACK_SAVE (18*8) | ||
#define BPF_PPC_STACKFRAME (BPF_PPC_STACK_BASIC+BPF_PPC_STACK_LOCALS+ \ | ||
BPF_PPC_STACK_SAVE) | ||
#define BPF_PPC_SLOWPATH_FRAME (48+64) | ||
|
||
/* | ||
* Generated code register usage: | ||
* | ||
* As normal PPC C ABI (e.g. r1=sp, r2=TOC), with: | ||
* | ||
* skb r3 (Entry parameter) | ||
* A register r4 | ||
* X register r5 | ||
* addr param r6 | ||
* r7-r10 scratch | ||
* skb->data r14 | ||
* skb headlen r15 (skb->len - skb->data_len) | ||
* m[0] r16 | ||
* m[...] ... | ||
* m[15] r31 | ||
*/ | ||
#define r_skb 3 | ||
#define r_ret 3 | ||
#define r_A 4 | ||
#define r_X 5 | ||
#define r_addr 6 | ||
#define r_scratch1 7 | ||
#define r_D 14 | ||
#define r_HL 15 | ||
#define r_M 16 | ||
|
||
#ifndef __ASSEMBLY__ | ||
|
||
/* | ||
* Assembly helpers from arch/powerpc/net/bpf_jit.S: | ||
*/ | ||
extern u8 sk_load_word[], sk_load_half[], sk_load_byte[], sk_load_byte_msh[]; | ||
|
||
#define FUNCTION_DESCR_SIZE 24 | ||
|
||
/* | ||
* 16-bit immediate helper macros: HA() is for use with sign-extending instrs | ||
* (e.g. LD, ADDI). If the bottom 16 bits is "-ve", add another bit into the | ||
* top half to negate the effect (i.e. 0xffff + 1 = 0x(1)0000). | ||
*/ | ||
#define IMM_H(i) ((uintptr_t)(i)>>16) | ||
#define IMM_HA(i) (((uintptr_t)(i)>>16) + \ | ||
(((uintptr_t)(i) & 0x8000) >> 15)) | ||
#define IMM_L(i) ((uintptr_t)(i) & 0xffff) | ||
|
||
#define PLANT_INSTR(d, idx, instr) \ | ||
do { if (d) { (d)[idx] = instr; } idx++; } while (0) | ||
#define EMIT(instr) PLANT_INSTR(image, ctx->idx, instr) | ||
|
||
#define PPC_NOP() EMIT(PPC_INST_NOP) | ||
#define PPC_BLR() EMIT(PPC_INST_BLR) | ||
#define PPC_BLRL() EMIT(PPC_INST_BLRL) | ||
#define PPC_MTLR(r) EMIT(PPC_INST_MTLR | __PPC_RT(r)) | ||
#define PPC_ADDI(d, a, i) EMIT(PPC_INST_ADDI | __PPC_RT(d) | \ | ||
__PPC_RA(a) | IMM_L(i)) | ||
#define PPC_MR(d, a) PPC_OR(d, a, a) | ||
#define PPC_LI(r, i) PPC_ADDI(r, 0, i) | ||
#define PPC_ADDIS(d, a, i) EMIT(PPC_INST_ADDIS | \ | ||
__PPC_RS(d) | __PPC_RA(a) | IMM_L(i)) | ||
#define PPC_LIS(r, i) PPC_ADDIS(r, 0, i) | ||
#define PPC_STD(r, base, i) EMIT(PPC_INST_STD | __PPC_RS(r) | \ | ||
__PPC_RA(base) | ((i) & 0xfffc)) | ||
|
||
#define PPC_LD(r, base, i) EMIT(PPC_INST_LD | __PPC_RT(r) | \ | ||
__PPC_RA(base) | IMM_L(i)) | ||
#define PPC_LWZ(r, base, i) EMIT(PPC_INST_LWZ | __PPC_RT(r) | \ | ||
__PPC_RA(base) | IMM_L(i)) | ||
#define PPC_LHZ(r, base, i) EMIT(PPC_INST_LHZ | __PPC_RT(r) | \ | ||
__PPC_RA(base) | IMM_L(i)) | ||
/* Convenience helpers for the above with 'far' offsets: */ | ||
#define PPC_LD_OFFS(r, base, i) do { if ((i) < 32768) PPC_LD(r, base, i); \ | ||
else { PPC_ADDIS(r, base, IMM_HA(i)); \ | ||
PPC_LD(r, r, IMM_L(i)); } } while(0) | ||
|
||
#define PPC_LWZ_OFFS(r, base, i) do { if ((i) < 32768) PPC_LWZ(r, base, i); \ | ||
else { PPC_ADDIS(r, base, IMM_HA(i)); \ | ||
PPC_LWZ(r, r, IMM_L(i)); } } while(0) | ||
|
||
#define PPC_LHZ_OFFS(r, base, i) do { if ((i) < 32768) PPC_LHZ(r, base, i); \ | ||
else { PPC_ADDIS(r, base, IMM_HA(i)); \ | ||
PPC_LHZ(r, r, IMM_L(i)); } } while(0) | ||
|
||
#define PPC_CMPWI(a, i) EMIT(PPC_INST_CMPWI | __PPC_RA(a) | IMM_L(i)) | ||
#define PPC_CMPDI(a, i) EMIT(PPC_INST_CMPDI | __PPC_RA(a) | IMM_L(i)) | ||
#define PPC_CMPLWI(a, i) EMIT(PPC_INST_CMPLWI | __PPC_RA(a) | IMM_L(i)) | ||
#define PPC_CMPLW(a, b) EMIT(PPC_INST_CMPLW | __PPC_RA(a) | __PPC_RB(b)) | ||
|
||
#define PPC_SUB(d, a, b) EMIT(PPC_INST_SUB | __PPC_RT(d) | \ | ||
__PPC_RB(a) | __PPC_RA(b)) | ||
#define PPC_ADD(d, a, b) EMIT(PPC_INST_ADD | __PPC_RT(d) | \ | ||
__PPC_RA(a) | __PPC_RB(b)) | ||
#define PPC_MUL(d, a, b) EMIT(PPC_INST_MULLW | __PPC_RT(d) | \ | ||
__PPC_RA(a) | __PPC_RB(b)) | ||
#define PPC_MULHWU(d, a, b) EMIT(PPC_INST_MULHWU | __PPC_RT(d) | \ | ||
__PPC_RA(a) | __PPC_RB(b)) | ||
#define PPC_MULI(d, a, i) EMIT(PPC_INST_MULLI | __PPC_RT(d) | \ | ||
__PPC_RA(a) | IMM_L(i)) | ||
#define PPC_DIVWU(d, a, b) EMIT(PPC_INST_DIVWU | __PPC_RT(d) | \ | ||
__PPC_RA(a) | __PPC_RB(b)) | ||
#define PPC_AND(d, a, b) EMIT(PPC_INST_AND | __PPC_RA(d) | \ | ||
__PPC_RS(a) | __PPC_RB(b)) | ||
#define PPC_ANDI(d, a, i) EMIT(PPC_INST_ANDI | __PPC_RA(d) | \ | ||
__PPC_RS(a) | IMM_L(i)) | ||
#define PPC_AND_DOT(d, a, b) EMIT(PPC_INST_ANDDOT | __PPC_RA(d) | \ | ||
__PPC_RS(a) | __PPC_RB(b)) | ||
#define PPC_OR(d, a, b) EMIT(PPC_INST_OR | __PPC_RA(d) | \ | ||
__PPC_RS(a) | __PPC_RB(b)) | ||
#define PPC_ORI(d, a, i) EMIT(PPC_INST_ORI | __PPC_RA(d) | \ | ||
__PPC_RS(a) | IMM_L(i)) | ||
#define PPC_ORIS(d, a, i) EMIT(PPC_INST_ORIS | __PPC_RA(d) | \ | ||
__PPC_RS(a) | IMM_L(i)) | ||
#define PPC_SLW(d, a, s) EMIT(PPC_INST_SLW | __PPC_RA(d) | \ | ||
__PPC_RS(a) | __PPC_RB(s)) | ||
#define PPC_SRW(d, a, s) EMIT(PPC_INST_SRW | __PPC_RA(d) | \ | ||
__PPC_RS(a) | __PPC_RB(s)) | ||
/* slwi = rlwinm Rx, Ry, n, 0, 31-n */ | ||
#define PPC_SLWI(d, a, i) EMIT(PPC_INST_RLWINM | __PPC_RA(d) | \ | ||
__PPC_RS(a) | __PPC_SH(i) | \ | ||
__PPC_MB(0) | __PPC_ME(31-(i))) | ||
/* srwi = rlwinm Rx, Ry, 32-n, n, 31 */ | ||
#define PPC_SRWI(d, a, i) EMIT(PPC_INST_RLWINM | __PPC_RA(d) | \ | ||
__PPC_RS(a) | __PPC_SH(32-(i)) | \ | ||
__PPC_MB(i) | __PPC_ME(31)) | ||
/* sldi = rldicr Rx, Ry, n, 63-n */ | ||
#define PPC_SLDI(d, a, i) EMIT(PPC_INST_RLDICR | __PPC_RA(d) | \ | ||
__PPC_RS(a) | __PPC_SH(i) | \ | ||
__PPC_MB(63-(i)) | (((i) & 0x20) >> 4)) | ||
#define PPC_NEG(d, a) EMIT(PPC_INST_NEG | __PPC_RT(d) | __PPC_RA(a)) | ||
|
||
/* Long jump; (unconditional 'branch') */ | ||
#define PPC_JMP(dest) EMIT(PPC_INST_BRANCH | \ | ||
(((dest) - (ctx->idx * 4)) & 0x03fffffc)) | ||
/* "cond" here covers BO:BI fields. */ | ||
#define PPC_BCC_SHORT(cond, dest) EMIT(PPC_INST_BRANCH_COND | \ | ||
(((cond) & 0x3ff) << 16) | \ | ||
(((dest) - (ctx->idx * 4)) & \ | ||
0xfffc)) | ||
#define PPC_LI32(d, i) do { PPC_LI(d, IMM_L(i)); \ | ||
if ((u32)(uintptr_t)(i) >= 32768) { \ | ||
PPC_ADDIS(d, d, IMM_HA(i)); \ | ||
} } while(0) | ||
#define PPC_LI64(d, i) do { \ | ||
if (!((uintptr_t)(i) & 0xffffffff00000000ULL)) \ | ||
PPC_LI32(d, i); \ | ||
else { \ | ||
PPC_LIS(d, ((uintptr_t)(i) >> 48)); \ | ||
if ((uintptr_t)(i) & 0x0000ffff00000000ULL) \ | ||
PPC_ORI(d, d, \ | ||
((uintptr_t)(i) >> 32) & 0xffff); \ | ||
PPC_SLDI(d, d, 32); \ | ||
if ((uintptr_t)(i) & 0x00000000ffff0000ULL) \ | ||
PPC_ORIS(d, d, \ | ||
((uintptr_t)(i) >> 16) & 0xffff); \ | ||
if ((uintptr_t)(i) & 0x000000000000ffffULL) \ | ||
PPC_ORI(d, d, (uintptr_t)(i) & 0xffff); \ | ||
} } while (0); | ||
|
||
static inline bool is_nearbranch(int offset) | ||
{ | ||
return (offset < 32768) && (offset >= -32768); | ||
} | ||
|
||
/* | ||
* The fly in the ointment of code size changing from pass to pass is | ||
* avoided by padding the short branch case with a NOP. If code size differs | ||
* with different branch reaches we will have the issue of code moving from | ||
* one pass to the next and will need a few passes to converge on a stable | ||
* state. | ||
*/ | ||
#define PPC_BCC(cond, dest) do { \ | ||
if (is_nearbranch((dest) - (ctx->idx * 4))) { \ | ||
PPC_BCC_SHORT(cond, dest); \ | ||
PPC_NOP(); \ | ||
} else { \ | ||
/* Flip the 'T or F' bit to invert comparison */ \ | ||
PPC_BCC_SHORT(cond ^ COND_CMP_TRUE, (ctx->idx+2)*4); \ | ||
PPC_JMP(dest); \ | ||
} } while(0) | ||
|
||
/* To create a branch condition, select a bit of cr0... */ | ||
#define CR0_LT 0 | ||
#define CR0_GT 1 | ||
#define CR0_EQ 2 | ||
/* ...and modify BO[3] */ | ||
#define COND_CMP_TRUE 0x100 | ||
#define COND_CMP_FALSE 0x000 | ||
/* Together, they make all required comparisons: */ | ||
#define COND_GT (CR0_GT | COND_CMP_TRUE) | ||
#define COND_GE (CR0_LT | COND_CMP_FALSE) | ||
#define COND_EQ (CR0_EQ | COND_CMP_TRUE) | ||
#define COND_NE (CR0_EQ | COND_CMP_FALSE) | ||
#define COND_LT (CR0_LT | COND_CMP_TRUE) | ||
|
||
#define SEEN_DATAREF 0x10000 /* might call external helpers */ | ||
#define SEEN_XREG 0x20000 /* X reg is used */ | ||
#define SEEN_MEM 0x40000 /* SEEN_MEM+(1<<n) = use mem[n] for temporary | ||
* storage */ | ||
#define SEEN_MEM_MSK 0x0ffff | ||
|
||
struct codegen_context { | ||
unsigned int seen; | ||
unsigned int idx; | ||
int pc_ret0; /* bpf index of first RET #0 instruction (if any) */ | ||
}; | ||
|
||
#endif | ||
|
||
#endif |
Oops, something went wrong.