Skip to content

Commit

Permalink
Merge branch 'Make uprobe attachment APK aware'
Browse files Browse the repository at this point in the history
Daniel Müller says:

====================

On Android, APKs (android packages; zip packages with somewhat
prescriptive contents) are first class citizens in the system: the
shared objects contained in them don't exist in unpacked form on the
file system. Rather, they are mmaped directly from within the archive
and the archive is also what the kernel is aware of.

For users that complicates the process of attaching a uprobe to a
function contained in a shared object in one such APK: they'd have to
find the byte offset of said function from the beginning of the archive.
That is cumbersome to do manually and can be fragile, because various
changes could invalidate said offset.

That is why for uprobes inside ELF files (not inside an APK), commit
d112c9ce249b ("libbpf: Support function name-based attach uprobes") added
support for attaching to symbols by name. On Android, that mechanism
currently does not work, because this logic is not APK aware.

This patch set introduces first class support for attaching uprobes to
functions inside ELF objects contained in APKs via function names. We
add support for recognizing the following syntax for a binary path:
  <archive>!/<binary-in-archive>

  (e.g., /system/app/test-app.apk!/lib/arm64-v8a/libc++.so)

This syntax is common in the Android eco system and used by tools such
as simpleperf. It is also what is being proposed for bcc [0].

If the user provides such a binary path, we find <binary-in-archive>
(lib/arm64-v8a/libc++.so in the example) inside of <archive>
(/system/app/test-app.apk). We perform the regular ELF offset search
inside the binary and add that to the offset within the archive itself,
to retrieve the offset at which to attach the uprobe.

[0] https://github.com/iovisor/bcc/pull/4440

Changelog
---------
v3->v4:
- use ERR_PTR instead of libbpf_err_ptr() in zip_archive_open()
- eliminated err variable from elf_find_func_offset_from_archive()

v2->v3:
- adjusted zip_archive_open() to report errno
- fixed provided libbpf_strlcpy() buffer size argument
- adjusted find_cd() to handle errors better
- use fewer local variables in get_entry_at_offset()

v1->v2:
- removed unaligned_* types
- switched to using __u32 and __u16
- switched to using errno constants instead of hard-coded negative values
- added another pr_debug() message
- shortened central_directory_* to cd_*
- inlined cd_file_header_at_offset() function
- bunch of syntactical changes
====================

Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
  • Loading branch information
Andrii Nakryiko committed Mar 2, 2023
2 parents db52b58 + c44fd84 commit b1d462b
Show file tree
Hide file tree
Showing 4 changed files with 495 additions and 28 deletions.
2 changes: 1 addition & 1 deletion tools/lib/bpf/Build
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
libbpf-y := libbpf.o bpf.o nlattr.o btf.o libbpf_errno.o str_error.o \
netlink.o bpf_prog_linfo.o libbpf_probes.o hashmap.o \
btf_dump.o ringbuf.o strset.o linker.o gen_loader.o relo_core.o \
usdt.o
usdt.o zip.o
146 changes: 119 additions & 27 deletions tools/lib/bpf/libbpf.c
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@
#include "libbpf_internal.h"
#include "hashmap.h"
#include "bpf_gen_internal.h"
#include "zip.h"

#ifndef BPF_FS_MAGIC
#define BPF_FS_MAGIC 0xcafe4a11
Expand Down Expand Up @@ -10530,32 +10531,19 @@ static Elf_Scn *elf_find_next_scn_by_type(Elf *elf, int sh_type, Elf_Scn *scn)
return NULL;
}

/* Find offset of function name in object specified by path. "name" matches
* symbol name or name@@LIB for library functions.
/* Find offset of function name in the provided ELF object. "binary_path" is
* the path to the ELF binary represented by "elf", and only used for error
* reporting matters. "name" matches symbol name or name@@LIB for library
* functions.
*/
static long elf_find_func_offset(const char *binary_path, const char *name)
static long elf_find_func_offset(Elf *elf, const char *binary_path, const char *name)
{
int fd, i, sh_types[2] = { SHT_DYNSYM, SHT_SYMTAB };
int i, sh_types[2] = { SHT_DYNSYM, SHT_SYMTAB };
bool is_shared_lib, is_name_qualified;
char errmsg[STRERR_BUFSIZE];
long ret = -ENOENT;
size_t name_len;
GElf_Ehdr ehdr;
Elf *elf;

fd = open(binary_path, O_RDONLY | O_CLOEXEC);
if (fd < 0) {
ret = -errno;
pr_warn("failed to open %s: %s\n", binary_path,
libbpf_strerror_r(ret, errmsg, sizeof(errmsg)));
return ret;
}
elf = elf_begin(fd, ELF_C_READ_MMAP, NULL);
if (!elf) {
pr_warn("elf: could not read elf from %s: %s\n", binary_path, elf_errmsg(-1));
close(fd);
return -LIBBPF_ERRNO__FORMAT;
}
if (!gelf_getehdr(elf, &ehdr)) {
pr_warn("elf: failed to get ehdr from %s: %s\n", binary_path, elf_errmsg(-1));
ret = -LIBBPF_ERRNO__FORMAT;
Expand All @@ -10568,7 +10556,7 @@ static long elf_find_func_offset(const char *binary_path, const char *name)
/* Does name specify "@@LIB"? */
is_name_qualified = strstr(name, "@@") != NULL;

/* Search SHT_DYNSYM, SHT_SYMTAB for symbol. This search order is used because if
/* Search SHT_DYNSYM, SHT_SYMTAB for symbol. This search order is used because if
* a binary is stripped, it may only have SHT_DYNSYM, and a fully-statically
* linked binary may not have SHT_DYMSYM, so absence of a section should not be
* reported as a warning/error.
Expand Down Expand Up @@ -10681,11 +10669,101 @@ static long elf_find_func_offset(const char *binary_path, const char *name)
}
}
out:
return ret;
}

/* Find offset of function name in ELF object specified by path. "name" matches
* symbol name or name@@LIB for library functions.
*/
static long elf_find_func_offset_from_file(const char *binary_path, const char *name)
{
char errmsg[STRERR_BUFSIZE];
long ret = -ENOENT;
Elf *elf;
int fd;

fd = open(binary_path, O_RDONLY | O_CLOEXEC);
if (fd < 0) {
ret = -errno;
pr_warn("failed to open %s: %s\n", binary_path,
libbpf_strerror_r(ret, errmsg, sizeof(errmsg)));
return ret;
}
elf = elf_begin(fd, ELF_C_READ_MMAP, NULL);
if (!elf) {
pr_warn("elf: could not read elf from %s: %s\n", binary_path, elf_errmsg(-1));
close(fd);
return -LIBBPF_ERRNO__FORMAT;
}

ret = elf_find_func_offset(elf, binary_path, name);
elf_end(elf);
close(fd);
return ret;
}

/* Find offset of function name in archive specified by path. Currently
* supported are .zip files that do not compress their contents, as used on
* Android in the form of APKs, for example. "file_name" is the name of the ELF
* file inside the archive. "func_name" matches symbol name or name@@LIB for
* library functions.
*
* An overview of the APK format specifically provided here:
* https://en.wikipedia.org/w/index.php?title=Apk_(file_format)&oldid=1139099120#Package_contents
*/
static long elf_find_func_offset_from_archive(const char *archive_path, const char *file_name,
const char *func_name)
{
struct zip_archive *archive;
struct zip_entry entry;
long ret;
Elf *elf;

archive = zip_archive_open(archive_path);
if (IS_ERR(archive)) {
ret = PTR_ERR(archive);
pr_warn("zip: failed to open %s: %ld\n", archive_path, ret);
return ret;
}

ret = zip_archive_find_entry(archive, file_name, &entry);
if (ret) {
pr_warn("zip: could not find archive member %s in %s: %ld\n", file_name,
archive_path, ret);
goto out;
}
pr_debug("zip: found entry for %s in %s at 0x%lx\n", file_name, archive_path,
(unsigned long)entry.data_offset);

if (entry.compression) {
pr_warn("zip: entry %s of %s is compressed and cannot be handled\n", file_name,
archive_path);
ret = -LIBBPF_ERRNO__FORMAT;
goto out;
}

elf = elf_memory((void *)entry.data, entry.data_length);
if (!elf) {
pr_warn("elf: could not read elf file %s from %s: %s\n", file_name, archive_path,
elf_errmsg(-1));
ret = -LIBBPF_ERRNO__LIBELF;
goto out;
}

ret = elf_find_func_offset(elf, file_name, func_name);
if (ret > 0) {
pr_debug("elf: symbol address match for %s of %s in %s: 0x%x + 0x%lx = 0x%lx\n",
func_name, file_name, archive_path, entry.data_offset, ret,
ret + entry.data_offset);
ret += entry.data_offset;
}
elf_end(elf);

out:
zip_archive_close(archive);
return ret;
}

static const char *arch_specific_lib_paths(void)
{
/*
Expand Down Expand Up @@ -10771,9 +10849,10 @@ bpf_program__attach_uprobe_opts(const struct bpf_program *prog, pid_t pid,
const char *binary_path, size_t func_offset,
const struct bpf_uprobe_opts *opts)
{
DECLARE_LIBBPF_OPTS(bpf_perf_event_opts, pe_opts);
const char *archive_path = NULL, *archive_sep = NULL;
char errmsg[STRERR_BUFSIZE], *legacy_probe = NULL;
char full_binary_path[PATH_MAX];
DECLARE_LIBBPF_OPTS(bpf_perf_event_opts, pe_opts);
char full_path[PATH_MAX];
struct bpf_link *link;
size_t ref_ctr_off;
int pfd, err;
Expand All @@ -10790,21 +10869,34 @@ bpf_program__attach_uprobe_opts(const struct bpf_program *prog, pid_t pid,
if (!binary_path)
return libbpf_err_ptr(-EINVAL);

if (!strchr(binary_path, '/')) {
err = resolve_full_path(binary_path, full_binary_path,
sizeof(full_binary_path));
/* Check if "binary_path" refers to an archive. */
archive_sep = strstr(binary_path, "!/");
if (archive_sep) {
full_path[0] = '\0';
libbpf_strlcpy(full_path, binary_path,
min(sizeof(full_path), (size_t)(archive_sep - binary_path + 1)));
archive_path = full_path;
binary_path = archive_sep + 2;
} else if (!strchr(binary_path, '/')) {
err = resolve_full_path(binary_path, full_path, sizeof(full_path));
if (err) {
pr_warn("prog '%s': failed to resolve full path for '%s': %d\n",
prog->name, binary_path, err);
return libbpf_err_ptr(err);
}
binary_path = full_binary_path;
binary_path = full_path;
}
func_name = OPTS_GET(opts, func_name, NULL);
if (func_name) {
long sym_off;

sym_off = elf_find_func_offset(binary_path, func_name);
if (archive_path) {
sym_off = elf_find_func_offset_from_archive(archive_path, binary_path,
func_name);
binary_path = archive_path;
} else {
sym_off = elf_find_func_offset_from_file(binary_path, func_name);
}
if (sym_off < 0)
return libbpf_err_ptr(sym_off);
func_offset += sym_off;
Expand Down
Loading

0 comments on commit b1d462b

Please sign in to comment.