btf: expose BTF info through sysfs

Make .BTF section allocated and expose its contents through sysfs.

/sys/kernel/btf directory is created to contain all the BTFs present
inside kernel. Currently there is only kernel's main BTF, represented as
/sys/kernel/btf/kernel file. Once kernel modules' BTFs are supported,
each module will expose its BTF as /sys/kernel/btf/<module-name> file.

Current approach relies on a few pieces coming together:
1. pahole is used to take almost final vmlinux image (modulo .BTF and
   kallsyms) and generate .BTF section by converting DWARF info into
   BTF. This section is not allocated and not mapped to any segment,
   though, so is not yet accessible from inside kernel at runtime.
2. objcopy dumps .BTF contents into binary file and subsequently
   convert binary file into linkable object file with automatically
   generated symbols _binary__btf_kernel_bin_start and
   _binary__btf_kernel_bin_end, pointing to start and end, respectively,
   of BTF raw data.
3. final vmlinux image is generated by linking this object file (and
   kallsyms, if necessary). sysfs_btf.c then creates
   /sys/kernel/btf/kernel file and exposes embedded BTF contents through
   it. This allows, e.g., libbpf and bpftool access BTF info at
   well-known location, without resorting to searching for vmlinux image
   on disk (location of which is not standardized and vmlinux image
   might not be even available in some scenarios, e.g., inside qemu
   during testing).

Alternative approach using .incbin assembler directive to embed BTF
contents directly was attempted but didn't work, because sysfs_proc.o is
not re-compiled during link-vmlinux.sh stage. This is required, though,
to update embedded BTF data (initially empty data is embedded, then
pahole generates BTF info and we need to regenerate sysfs_btf.o with
updated contents, but it's too late at that point).

If BTF couldn't be generated due to missing or too old pahole,
sysfs_btf.c handles that gracefully by detecting that
_binary__btf_kernel_bin_start (weak symbol) is 0 and not creating
/sys/kernel/btf at all.

v2->v3:
- added Documentation/ABI/testing/sysfs-kernel-btf (Greg K-H);
- created proper kobject (btf_kobj) for btf directory (Greg K-H);
- undo v2 change of reusing vmlinux, as it causes extra kallsyms pass
  due to initially missing  __binary__btf_kernel_bin_{start/end} symbols;

v1->v2:
- allow kallsyms stage to re-use vmlinux generated by gen_btf();

Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Andrii Nakryiko <andriin@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
This commit is contained in:
Andrii Nakryiko 2019-08-12 11:39:47 -07:00 committed by Daniel Borkmann
parent a664a83457
commit 341dfcf8d7
4 changed files with 104 additions and 19 deletions

View File

@ -0,0 +1,17 @@
What: /sys/kernel/btf
Date: Aug 2019
KernelVersion: 5.5
Contact: bpf@vger.kernel.org
Description:
Contains BTF type information and related data for kernel and
kernel modules.
What: /sys/kernel/btf/kernel
Date: Aug 2019
KernelVersion: 5.5
Contact: bpf@vger.kernel.org
Description:
Read-only binary attribute exposing kernel's own BTF type
information with description of all internal kernel types. See
Documentation/bpf/btf.rst for detailed description of format
itself.

View File

@ -22,3 +22,6 @@ obj-$(CONFIG_CGROUP_BPF) += cgroup.o
ifeq ($(CONFIG_INET),y) ifeq ($(CONFIG_INET),y)
obj-$(CONFIG_BPF_SYSCALL) += reuseport_array.o obj-$(CONFIG_BPF_SYSCALL) += reuseport_array.o
endif endif
ifeq ($(CONFIG_SYSFS),y)
obj-$(CONFIG_DEBUG_INFO_BTF) += sysfs_btf.o
endif

51
kernel/bpf/sysfs_btf.c Normal file
View File

@ -0,0 +1,51 @@
// SPDX-License-Identifier: GPL-2.0
/*
* Provide kernel BTF information for introspection and use by eBPF tools.
*/
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/kobject.h>
#include <linux/init.h>
#include <linux/sysfs.h>
/* See scripts/link-vmlinux.sh, gen_btf() func for details */
extern char __weak _binary__btf_kernel_bin_start[];
extern char __weak _binary__btf_kernel_bin_end[];
static ssize_t
btf_kernel_read(struct file *file, struct kobject *kobj,
struct bin_attribute *bin_attr,
char *buf, loff_t off, size_t len)
{
memcpy(buf, _binary__btf_kernel_bin_start + off, len);
return len;
}
static struct bin_attribute bin_attr_btf_kernel __ro_after_init = {
.attr = { .name = "kernel", .mode = 0444, },
.read = btf_kernel_read,
};
static struct kobject *btf_kobj;
static int __init btf_kernel_init(void)
{
int err;
if (!_binary__btf_kernel_bin_start)
return 0;
btf_kobj = kobject_create_and_add("btf", kernel_kobj);
if (IS_ERR(btf_kobj)) {
err = PTR_ERR(btf_kobj);
btf_kobj = NULL;
return err;
}
bin_attr_btf_kernel.size = _binary__btf_kernel_bin_end -
_binary__btf_kernel_bin_start;
return sysfs_create_bin_file(btf_kobj, &bin_attr_btf_kernel);
}
subsys_initcall(btf_kernel_init);

View File

@ -56,8 +56,8 @@ modpost_link()
} }
# Link of vmlinux # Link of vmlinux
# ${1} - optional extra .o files # ${1} - output file
# ${2} - output file # ${@:2} - optional extra .o files
vmlinux_link() vmlinux_link()
{ {
local lds="${objtree}/${KBUILD_LDS}" local lds="${objtree}/${KBUILD_LDS}"
@ -70,9 +70,9 @@ vmlinux_link()
--start-group \ --start-group \
${KBUILD_VMLINUX_LIBS} \ ${KBUILD_VMLINUX_LIBS} \
--end-group \ --end-group \
${1}" ${@:2}"
${LD} ${KBUILD_LDFLAGS} ${LDFLAGS_vmlinux} -o ${2} \ ${LD} ${KBUILD_LDFLAGS} ${LDFLAGS_vmlinux} -o ${1} \
-T ${lds} ${objects} -T ${lds} ${objects}
else else
objects="-Wl,--whole-archive \ objects="-Wl,--whole-archive \
@ -81,9 +81,9 @@ vmlinux_link()
-Wl,--start-group \ -Wl,--start-group \
${KBUILD_VMLINUX_LIBS} \ ${KBUILD_VMLINUX_LIBS} \
-Wl,--end-group \ -Wl,--end-group \
${1}" ${@:2}"
${CC} ${CFLAGS_vmlinux} -o ${2} \ ${CC} ${CFLAGS_vmlinux} -o ${1} \
-Wl,-T,${lds} \ -Wl,-T,${lds} \
${objects} \ ${objects} \
-lutil -lrt -lpthread -lutil -lrt -lpthread
@ -92,23 +92,34 @@ vmlinux_link()
} }
# generate .BTF typeinfo from DWARF debuginfo # generate .BTF typeinfo from DWARF debuginfo
# ${1} - vmlinux image
# ${2} - file to dump raw BTF data into
gen_btf() gen_btf()
{ {
local pahole_ver; local pahole_ver
local bin_arch
if ! [ -x "$(command -v ${PAHOLE})" ]; then if ! [ -x "$(command -v ${PAHOLE})" ]; then
info "BTF" "${1}: pahole (${PAHOLE}) is not available" info "BTF" "${1}: pahole (${PAHOLE}) is not available"
return 0 return 1
fi fi
pahole_ver=$(${PAHOLE} --version | sed -E 's/v([0-9]+)\.([0-9]+)/\1\2/') pahole_ver=$(${PAHOLE} --version | sed -E 's/v([0-9]+)\.([0-9]+)/\1\2/')
if [ "${pahole_ver}" -lt "113" ]; then if [ "${pahole_ver}" -lt "113" ]; then
info "BTF" "${1}: pahole version $(${PAHOLE} --version) is too old, need at least v1.13" info "BTF" "${1}: pahole version $(${PAHOLE} --version) is too old, need at least v1.13"
return 0 return 1
fi fi
info "BTF" ${1} info "BTF" ${2}
vmlinux_link ${1}
LLVM_OBJCOPY=${OBJCOPY} ${PAHOLE} -J ${1} LLVM_OBJCOPY=${OBJCOPY} ${PAHOLE} -J ${1}
# dump .BTF section into raw binary file to link with final vmlinux
bin_arch=$(${OBJDUMP} -f ${1} | grep architecture | \
cut -d, -f1 | cut -d' ' -f2)
${OBJCOPY} --dump-section .BTF=.btf.kernel.bin ${1} 2>/dev/null
${OBJCOPY} -I binary -O ${CONFIG_OUTPUT_FORMAT} -B ${bin_arch} \
--rename-section .data=.BTF .btf.kernel.bin ${2}
} }
# Create ${2} .o file with all symbols from the ${1} object file # Create ${2} .o file with all symbols from the ${1} object file
@ -153,6 +164,7 @@ sortextable()
# Delete output files in case of error # Delete output files in case of error
cleanup() cleanup()
{ {
rm -f .btf.*
rm -f .tmp_System.map rm -f .tmp_System.map
rm -f .tmp_kallsyms* rm -f .tmp_kallsyms*
rm -f .tmp_vmlinux* rm -f .tmp_vmlinux*
@ -215,6 +227,13 @@ ${MAKE} -f "${srctree}/scripts/Makefile.modpost" vmlinux.o
info MODINFO modules.builtin.modinfo info MODINFO modules.builtin.modinfo
${OBJCOPY} -j .modinfo -O binary vmlinux.o modules.builtin.modinfo ${OBJCOPY} -j .modinfo -O binary vmlinux.o modules.builtin.modinfo
btf_kernel_bin_o=""
if [ -n "${CONFIG_DEBUG_INFO_BTF}" ]; then
if gen_btf .tmp_vmlinux.btf .btf.kernel.bin.o ; then
btf_kernel_bin_o=.btf.kernel.bin.o
fi
fi
kallsymso="" kallsymso=""
kallsyms_vmlinux="" kallsyms_vmlinux=""
if [ -n "${CONFIG_KALLSYMS}" ]; then if [ -n "${CONFIG_KALLSYMS}" ]; then
@ -246,11 +265,11 @@ if [ -n "${CONFIG_KALLSYMS}" ]; then
kallsyms_vmlinux=.tmp_vmlinux2 kallsyms_vmlinux=.tmp_vmlinux2
# step 1 # step 1
vmlinux_link "" .tmp_vmlinux1 vmlinux_link .tmp_vmlinux1 ${btf_kernel_bin_o}
kallsyms .tmp_vmlinux1 .tmp_kallsyms1.o kallsyms .tmp_vmlinux1 .tmp_kallsyms1.o
# step 2 # step 2
vmlinux_link .tmp_kallsyms1.o .tmp_vmlinux2 vmlinux_link .tmp_vmlinux2 .tmp_kallsyms1.o ${btf_kernel_bin_o}
kallsyms .tmp_vmlinux2 .tmp_kallsyms2.o kallsyms .tmp_vmlinux2 .tmp_kallsyms2.o
# step 3 # step 3
@ -261,18 +280,13 @@ if [ -n "${CONFIG_KALLSYMS}" ]; then
kallsymso=.tmp_kallsyms3.o kallsymso=.tmp_kallsyms3.o
kallsyms_vmlinux=.tmp_vmlinux3 kallsyms_vmlinux=.tmp_vmlinux3
vmlinux_link .tmp_kallsyms2.o .tmp_vmlinux3 vmlinux_link .tmp_vmlinux3 .tmp_kallsyms2.o ${btf_kernel_bin_o}
kallsyms .tmp_vmlinux3 .tmp_kallsyms3.o kallsyms .tmp_vmlinux3 .tmp_kallsyms3.o
fi fi
fi fi
info LD vmlinux info LD vmlinux
vmlinux_link "${kallsymso}" vmlinux vmlinux_link vmlinux "${kallsymso}" "${btf_kernel_bin_o}"
if [ -n "${CONFIG_DEBUG_INFO_BTF}" ]; then
gen_btf vmlinux
fi
if [ -n "${CONFIG_BUILDTIME_EXTABLE_SORT}" ]; then if [ -n "${CONFIG_BUILDTIME_EXTABLE_SORT}" ]; then
info SORTEX vmlinux info SORTEX vmlinux