bpf: Add x86-64 JIT support for bpf_addr_space_cast instruction.

LLVM generates bpf_addr_space_cast instruction while translating
pointers between native (zero) address space and
__attribute__((address_space(N))).
The addr_space=1 is reserved as bpf_arena address space.

rY = addr_space_cast(rX, 0, 1) is processed by the verifier and
converted to normal 32-bit move: wX = wY

rY = addr_space_cast(rX, 1, 0) has to be converted by JIT:

aux_reg = upper_32_bits of arena->user_vm_start
aux_reg <<= 32
wX = wY // clear upper 32 bits of dst register
if (wX) // if not zero add upper bits of user_vm_start
  wX |= aux_reg

JIT can do it more efficiently:

mov dst_reg32, src_reg32  // 32-bit move
shl dst_reg, 32
or dst_reg, user_vm_start
rol dst_reg, 32
xor r11, r11
test dst_reg32, dst_reg32 // check if lower 32-bit are zero
cmove r11, dst_reg	  // if so, set dst_reg to zero
			  // Intel swapped src/dst register encoding in CMOVcc

Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Acked-by: Eduard Zingerman <eddyz87@gmail.com>
Link: https://lore.kernel.org/bpf/20240308010812.89848-5-alexei.starovoitov@gmail.com
This commit is contained in:
Alexei Starovoitov 2024-03-07 17:08:02 -08:00 committed by Andrii Nakryiko
parent 2fe99eb0cc
commit 142fd4d2dc
3 changed files with 47 additions and 1 deletions

View File

@ -1276,13 +1276,14 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, u8 *rw_image
bool tail_call_seen = false;
bool seen_exit = false;
u8 temp[BPF_MAX_INSN_SIZE + BPF_INSN_SAFETY];
u64 arena_vm_start;
u64 arena_vm_start, user_vm_start;
int i, excnt = 0;
int ilen, proglen = 0;
u8 *prog = temp;
int err;
arena_vm_start = bpf_arena_get_kern_vm_start(bpf_prog->aux->arena);
user_vm_start = bpf_arena_get_user_vm_start(bpf_prog->aux->arena);
detect_reg_usage(insn, insn_cnt, callee_regs_used,
&tail_call_seen);
@ -1350,6 +1351,40 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, u8 *rw_image
break;
case BPF_ALU64 | BPF_MOV | BPF_X:
if (insn->off == BPF_ADDR_SPACE_CAST &&
insn->imm == 1U << 16) {
if (dst_reg != src_reg)
/* 32-bit mov */
emit_mov_reg(&prog, false, dst_reg, src_reg);
/* shl dst_reg, 32 */
maybe_emit_1mod(&prog, dst_reg, true);
EMIT3(0xC1, add_1reg(0xE0, dst_reg), 32);
/* or dst_reg, user_vm_start */
maybe_emit_1mod(&prog, dst_reg, true);
if (is_axreg(dst_reg))
EMIT1_off32(0x0D, user_vm_start >> 32);
else
EMIT2_off32(0x81, add_1reg(0xC8, dst_reg), user_vm_start >> 32);
/* rol dst_reg, 32 */
maybe_emit_1mod(&prog, dst_reg, true);
EMIT3(0xC1, add_1reg(0xC0, dst_reg), 32);
/* xor r11, r11 */
EMIT3(0x4D, 0x31, 0xDB);
/* test dst_reg32, dst_reg32; check if lower 32-bit are zero */
maybe_emit_mod(&prog, dst_reg, dst_reg, false);
EMIT2(0x85, add_2reg(0xC0, dst_reg, dst_reg));
/* cmove r11, dst_reg; if so, set dst_reg to zero */
/* WARNING: Intel swapped src/dst register encoding in CMOVcc !!! */
maybe_emit_mod(&prog, AUX_REG, dst_reg, true);
EMIT3(0x0F, 0x44, add_2reg(0xC0, AUX_REG, dst_reg));
break;
}
fallthrough;
case BPF_ALU | BPF_MOV | BPF_X:
if (insn->off == 0)
emit_mov_reg(&prog,
@ -3432,6 +3467,11 @@ void bpf_arch_poke_desc_update(struct bpf_jit_poke_descriptor *poke,
}
}
bool bpf_jit_supports_arena(void)
{
return true;
}
bool bpf_jit_supports_ptr_xchg(void)
{
return true;

View File

@ -962,6 +962,7 @@ bool bpf_jit_supports_kfunc_call(void);
bool bpf_jit_supports_far_kfunc_call(void);
bool bpf_jit_supports_exceptions(void);
bool bpf_jit_supports_ptr_xchg(void);
bool bpf_jit_supports_arena(void);
void arch_bpf_stack_walk(bool (*consume_fn)(void *cookie, u64 ip, u64 sp, u64 bp), void *cookie);
bool bpf_helper_changes_pkt_data(void *func);

View File

@ -2932,6 +2932,11 @@ bool __weak bpf_jit_supports_far_kfunc_call(void)
return false;
}
bool __weak bpf_jit_supports_arena(void)
{
return false;
}
/* Return TRUE if the JIT backend satisfies the following two conditions:
* 1) JIT backend supports atomic_xchg() on pointer-sized words.
* 2) Under the specific arch, the implementation of xchg() is the same