
hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/release-management/issues/IBK2MJ -------------------------------- In order to use TrIO, we should provide some basic tools. These are mainly about how to prepare the trace for TrIO. If the user want to use TrIO in container on-demand loading scenario, they may use these scripts and tools. Signed-off-by: Hongbo Li <lihongbo22@huawei.com> --- tools/trio/bpf/iotracker/Makefile | 99 +++++ tools/trio/bpf/iotracker/iotracker.bpf.c | 52 +++ tools/trio/bpf/iotracker/iotracker.c | 57 +++ tools/trio/bpf/rio_tracker_mod/Makefile | 9 + tools/trio/bpf/rio_tracker_mod/rio_tracker.c | 370 +++++++++++++++++++ tools/trio/scripts/trace_parser.py | 287 ++++++++++++++ 6 files changed, 874 insertions(+) create mode 100644 tools/trio/bpf/iotracker/Makefile create mode 100644 tools/trio/bpf/iotracker/iotracker.bpf.c create mode 100644 tools/trio/bpf/iotracker/iotracker.c create mode 100644 tools/trio/bpf/rio_tracker_mod/Makefile create mode 100644 tools/trio/bpf/rio_tracker_mod/rio_tracker.c create mode 100644 tools/trio/scripts/trace_parser.py diff --git a/tools/trio/bpf/iotracker/Makefile b/tools/trio/bpf/iotracker/Makefile new file mode 100644 index 000000000000..f5c279c62224 --- /dev/null +++ b/tools/trio/bpf/iotracker/Makefile @@ -0,0 +1,99 @@ +# SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) +include ../../../scripts/Makefile.include + +OUTPUT ?= $(abspath .output) + +BPFTOOL_OUTPUT := $(OUTPUT)bpftool/ +DEFAULT_BPFTOOL := $(BPFTOOL_OUTPUT)bootstrap/bpftool +BPFTOOL ?= $(DEFAULT_BPFTOOL) +LIBBPF_SRC := $(abspath ../../../lib/bpf) +BPFOBJ_OUTPUT := $(OUTPUT)libbpf/ +BPFOBJ := $(BPFOBJ_OUTPUT)libbpf.a +BPF_DESTDIR := $(BPFOBJ_OUTPUT) +BPF_INCLUDE := $(BPF_DESTDIR)/include +INCLUDES := -I$(OUTPUT) -I$(BPF_INCLUDE) -I$(abspath ../../../include/uapi) +CFLAGS := -g -Wall $(CLANG_CROSS_FLAGS) +CFLAGS += $(EXTRA_CFLAGS) +LDFLAGS += $(EXTRA_LDFLAGS) +LDLIBS += -lelf -lz +ifeq ($(shell uname -m), x86_64) + ARCH_FLAG := __TARGET_ARCH_x86 +else + ARCH_FLAG := __TARGET_ARCH_arm64 +endif + +# Try to detect best kernel BTF source +KERNEL_REL := $(shell uname -r) +VMLINUX_BTF_PATHS := $(if $(O),$(O)/vmlinux) \ + $(if $(KBUILD_OUTPUT),$(KBUILD_OUTPUT)/vmlinux) \ + ../../../../vmlinux /sys/kernel/btf/vmlinux \ + /boot/vmlinux-$(KERNEL_REL) +VMLINUX_BTF_PATH := $(or $(VMLINUX_BTF),$(firstword \ + $(wildcard $(VMLINUX_BTF_PATHS)))) + +ifeq ($(V),1) +Q = +else +Q = @ +MAKEFLAGS += --no-print-directory +submake_extras := feature_display=0 +endif + +.DELETE_ON_ERROR: + +.PHONY: all clean iotracker libbpf_hdrs +all: iotracker + +iotracker: $(OUTPUT)/iotracker + +clean: + $(call QUIET_CLEAN, iotracker) + $(Q)$(RM) -r $(BPFOBJ_OUTPUT) $(BPFTOOL_OUTPUT) + $(Q)$(RM) $(OUTPUT)*.o $(OUTPUT)*.d + $(Q)$(RM) $(OUTPUT)*.skel.h $(OUTPUT)vmlinux.h + $(Q)$(RM) $(OUTPUT)iotracker + $(Q)$(RM) -r .output + +libbpf_hdrs: $(BPFOBJ) + +$(OUTPUT)/iotracker: $(OUTPUT)/iotracker.o $(BPFOBJ) + $(QUIET_LINK)$(CC) $(CFLAGS) $(LDFLAGS) $^ $(LDLIBS) -o $@ + +$(OUTPUT)/iotracker.o: $(OUTPUT)/iotracker.skel.h \ + $(OUTPUT)/iotracker.bpf.o | libbpf_hdrs + +$(OUTPUT)/iotracker.bpf.o: $(OUTPUT)/vmlinux.h | libbpf_hdrs + +$(OUTPUT)/%.skel.h: $(OUTPUT)/%.bpf.o | $(BPFTOOL) + $(QUIET_GEN)$(BPFTOOL) gen skeleton $< > $@ + + +$(OUTPUT)/%.bpf.o: %.bpf.c $(BPFOBJ) | $(OUTPUT) + $(QUIET_GEN)$(CLANG) -g -O2 --target=bpf -D$(ARCH_FLAG) $(INCLUDES) \ + -c $(filter %.c,$^) -o $@ && \ + $(LLVM_STRIP) -g $@ + +$(OUTPUT)/%.o: %.c | $(OUTPUT) + $(QUIET_CC)$(CC) $(CFLAGS) $(INCLUDES) -c $(filter %.c,$^) -o $@ + +$(OUTPUT) $(BPFOBJ_OUTPUT) $(BPFTOOL_OUTPUT): + $(QUIET_MKDIR)mkdir -p $@ + +$(OUTPUT)/vmlinux.h: $(VMLINUX_BTF_PATH) | $(OUTPUT) $(BPFTOOL) +ifeq ($(VMLINUX_H),) + $(Q)if [ ! -e "$(VMLINUX_BTF_PATH)" ] ; then \ + echo "Couldn't find kernel BTF; set VMLINUX_BTF to" \ + "specify its location." >&2; \ + exit 1;\ + fi + $(QUIET_GEN)$(BPFTOOL) btf dump file $(VMLINUX_BTF_PATH) format c > $@ +else + $(Q)cp "$(VMLINUX_H)" $@ +endif + +$(BPFOBJ): $(wildcard $(LIBBPF_SRC)/*.[ch] $(LIBBPF_SRC)/Makefile) | $(BPFOBJ_OUTPUT) + $(Q)$(MAKE) $(submake_extras) -C $(LIBBPF_SRC) OUTPUT=$(BPFOBJ_OUTPUT) \ + DESTDIR=$(BPFOBJ_OUTPUT) prefix= $(abspath $@) install_headers + +$(DEFAULT_BPFTOOL): | $(BPFTOOL_OUTPUT) + $(Q)$(MAKE) $(submake_extras) -C ../../../bpf/bpftool OUTPUT=$(BPFTOOL_OUTPUT) bootstrap diff --git a/tools/trio/bpf/iotracker/iotracker.bpf.c b/tools/trio/bpf/iotracker/iotracker.bpf.c new file mode 100644 index 000000000000..d032fb52c059 --- /dev/null +++ b/tools/trio/bpf/iotracker/iotracker.bpf.c @@ -0,0 +1,52 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright(c) 2025 Huawei Technologies Co., Ltd + */ + +#include <vmlinux.h> +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_core_read.h> +#include <bpf/bpf_tracing.h> + +enum { + TAG_READ = 0, + TAG_PRE_FAULT = 1, + TAG_POST_FAULT = 2 +}; + +extern void bpf_tracker_rio(unsigned long addr1, unsigned long addr2, int tag) __ksym; + +char LICENSE[] SEC("license") = "Dual BSD/GPL"; + +SEC("kprobe/erofs_file_read_iter") +int BPF_KPROBE(erofs_file_read_iter_entry, struct kiocb *iocb, + struct iov_iter *to) +{ + struct kiocb *_iocb; + struct iov_iter *_to; + + bpf_core_read(&_iocb, sizeof(struct kiocb *), &iocb); + bpf_core_read(&_to, sizeof(struct iov_iter *), &to); + + bpf_tracker_rio((unsigned long)_iocb, (unsigned long)_to, TAG_READ); + return 0; +} + +SEC("kprobe/filemap_fault") +int BPF_KPROBE(filemap_fault_entry, struct vm_fault *vmf) +{ + struct vm_fault *_vmf; + + bpf_core_read(&_vmf, sizeof(struct vm_fault *), &vmf); + bpf_tracker_rio((unsigned long)_vmf, 0, TAG_PRE_FAULT); + return 0; +} + +SEC("kprobe/finish_fault") +int BPF_KPROBE(finish_fault_entry, struct vm_fault *vmf) +{ + struct vm_fault *_vmf; + + bpf_core_read(&_vmf, sizeof(struct vm_fault *), &vmf); + bpf_tracker_rio((unsigned long)_vmf, 0, TAG_POST_FAULT); + return 0; +} diff --git a/tools/trio/bpf/iotracker/iotracker.c b/tools/trio/bpf/iotracker/iotracker.c new file mode 100644 index 000000000000..a9af6087793a --- /dev/null +++ b/tools/trio/bpf/iotracker/iotracker.c @@ -0,0 +1,57 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright(c) 2025 Huawei Technologies Co., Ltd + */ + +#include <stdio.h> +#include <unistd.h> +#include <sys/resource.h> +#include <bpf/libbpf.h> +#include "iotracker.skel.h" + +static int libbpf_print_fn(enum libbpf_print_level level, const char *format, va_list args) +{ + return vfprintf(stderr, format, args); +} + +int main(int argc, char **argv) +{ + struct iotracker_bpf *skel; + int err; + + /* Set up libbpf errors and debug info callback */ + libbpf_set_print(libbpf_print_fn); + + /* Open BPF application */ + skel = iotracker_bpf__open(); + if (!skel) { + fprintf(stderr, "Failed to open BPF skeleton\n"); + return 1; + } + + /* Load & verify BPF programs */ + err = iotracker_bpf__load(skel); + if (err) { + fprintf(stderr, "Failed to load and verify BPF skeleton\n"); + goto cleanup; + } + + /* Attach tracepoint handler */ + err = iotracker_bpf__attach(skel); + if (err) { + fprintf(stderr, "Failed to attach BPF skeleton\n"); + goto cleanup; + } + + printf("Successfully started! Please run `sudo cat /sys/kernel/debug/tracing/trace_pipe`" + "to see output of the BPF programs.\n"); + + for (;;) { + /* trigger our BPF program */ + fprintf(stderr, "."); + sleep(1); + } + +cleanup: + iotracker_bpf__destroy(skel); + return -err; +} diff --git a/tools/trio/bpf/rio_tracker_mod/Makefile b/tools/trio/bpf/rio_tracker_mod/Makefile new file mode 100644 index 000000000000..22942d7124c7 --- /dev/null +++ b/tools/trio/bpf/rio_tracker_mod/Makefile @@ -0,0 +1,9 @@ +PWD = $(shell pwd) +KVERS =$(shell uname -r) +KERNDIR =/lib/modules/${KVERS}/build/ +obj-m += rio_tracker.o +build: kernel_modules +kernel_modules: + make -C $(KERNDIR) M=$(PWD) modules +clean: + make -C $(KERNDIR) M=$(PWD) clean diff --git a/tools/trio/bpf/rio_tracker_mod/rio_tracker.c b/tools/trio/bpf/rio_tracker_mod/rio_tracker.c new file mode 100644 index 000000000000..656805e4bcba --- /dev/null +++ b/tools/trio/bpf/rio_tracker_mod/rio_tracker.c @@ -0,0 +1,370 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright(c) 2025 Huawei Technologies Co., Ltd + */ + +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/btf.h> +#include <linux/btf_ids.h> + +#include <linux/dcache.h> +#include <linux/string.h> +#include <asm/current.h> +#include <linux/uaccess.h> +#include <linux/vmalloc.h> +#include <linux/spinlock.h> +#include <linux/fs.h> +#include <linux/pagemap.h> +#include <linux/mm.h> +#include <linux/uio.h> +#include <linux/err.h> +#include <linux/nsproxy.h> +#include <linux/utsname.h> +#include <linux/printk.h> + +/* 20MB for default, changed as your needs */ +static uint32_t tracker_buffer_size = 20971520; +module_param(tracker_buffer_size, uint, 0444); + +static char *tracker_output = "/"; +module_param(tracker_output, charp, 0444); +MODULE_PARM_DESC(tracker_output, "Must be set by the user."); + +struct rio_tracker_mgr { + bool enable; + struct kobject *object; + char *host_ns; + + /* buffer for trace */ + spinlock_t lock; + char *data; + uint32_t pos; +}; + +enum { + TAG_READ = 0, + TAG_PRE_FAULT = 1, + TAG_POST_FAULT = 2 +}; + +static struct rio_tracker_mgr rtracker = {0}; + +ssize_t enable_show(struct kobject *kobj, struct kobj_attribute *attr, + char *buf) +{ + return sprintf(buf, "%d\n", rtracker.enable); +} + +ssize_t enable_store(struct kobject *kobj, struct kobj_attribute *attr, + const char *buf, size_t count) +{ + ssize_t ret; + int value; + + ret = kstrtoint(buf, 10, &value); + if (ret < 0) { + pr_err("store attr failed\n"); + return -EINVAL; + } + + if (0 != value && 1 != value) + return -EINVAL; + + rtracker.enable = value; + return count; +} + +static void _dump_trace(void) +{ + struct file *filp; + void *buffer; + ssize_t ret; + loff_t pos; + + spin_lock(&rtracker.lock); + rtracker.data[rtracker.pos] = '\0'; + buffer = rtracker.data; + spin_unlock(&rtracker.lock); + + filp = filp_open(tracker_output, O_RDWR | O_CREAT | O_TRUNC, 0644); + if (IS_ERR(filp)) { + pr_warn("dump failed, file(%s) open failed, err:%ld\n", + tracker_output, PTR_ERR(filp)); + return; + } + + pos = 0; + ret = kernel_write(filp, buffer, rtracker.pos, &pos); + if (ret < 0) + pr_warn("dump failed, file(%s) write failed, err:%ld, len:%u\n", + tracker_output, ret, rtracker.pos); + else + pr_info("dump to %s %ld bytes successfully!\n", + tracker_output, ret); + filp_close(filp, NULL); +} + +ssize_t dump_store(struct kobject *kobj, struct kobj_attribute *attr, + const char *buf, size_t count) +{ + _dump_trace(); + return count; +} + +ssize_t reset_store(struct kobject *kobj, struct kobj_attribute *attr, + const char *buf, size_t count) +{ + spin_lock(&rtracker.lock); + rtracker.pos = 0; + spin_unlock(&rtracker.lock); + return count; +} + +ssize_t host_ns_show(struct kobject *kobj, struct kobj_attribute *attr, + char *buf) +{ + return sprintf(buf, "%s\n", rtracker.host_ns); +} + +ssize_t host_ns_store(struct kobject *kobj, struct kobj_attribute *attr, + const char *buf, size_t count) +{ + char *new_prefix = kstrdup(buf, GFP_KERNEL); + + if (!new_prefix) + return -ENOMEM; + + swap(rtracker.host_ns, new_prefix); + kfree(new_prefix); + return count; +} + +static struct kobj_attribute enable_attr = + __ATTR(enable, 0664, enable_show, enable_store); +struct kobj_attribute dump_attr = + __ATTR(dump, 0200, NULL, dump_store); +struct kobj_attribute reset_attr = + __ATTR(reset, 0200, NULL, reset_store); +struct kobj_attribute host_ns_attr = + __ATTR(host_ns, 0664, host_ns_show, host_ns_store); + +static struct attribute *tracker_kobj_attrs[] = { + &enable_attr.attr, + &dump_attr.attr, + &reset_attr.attr, + &host_ns_attr.attr, + NULL, +}; + +const struct attribute_group tracker_attr_group = { + .attrs = tracker_kobj_attrs, +}; + +__diag_push(); +__diag_ignore_all("-Wmissing-prototypes", +"Global functions as their definitions will be in rio_tracker.ko BTF"); + +static inline bool _target_process(const char *name) +{ + if (!rtracker.host_ns) + return false; + + return !!str_has_prefix(name, rtracker.host_ns); +} + +static void mark_rio(struct file *file, unsigned long off, unsigned long len) +{ + const struct path *path = (const struct path *)&(file->f_path); + char buff[256] = {0}; + char *ret_path = NULL; + int written; + + /* only track regular file */ + if (!S_ISREG(file_inode(file)->i_mode)) + return; + + ret_path = d_path(path, buff, sizeof(buff)); + if (IS_ERR(ret_path)) { + pr_err("get fpath failed, ret:%ld\n", PTR_ERR(ret_path)); + return; + } + + spin_lock(&rtracker.lock); + if (rtracker.pos >= tracker_buffer_size) { + spin_unlock(&rtracker.lock); + pr_err("tracker buffer is not enough, please enlarge it!\n"); + return; + } + + /* fill each trace item */ + written = snprintf(rtracker.data + rtracker.pos, + tracker_buffer_size - rtracker.pos, "%s,%lu,%lu,%lu\n", + ret_path, file_inode(file)->i_ino, off, len); + if (written >= 0 && written <= tracker_buffer_size - rtracker.pos) { + rtracker.pos += written; + } else { + pr_warn("trace data append failed for path:%s, off:%lu, len:%lu\n", + ret_path, off, len); + } + spin_unlock(&rtracker.lock); +} + +void bpf_tracker_rio_read(unsigned long addr1, unsigned long addr2) +{ + struct kiocb *iocb = (struct kiocb *)addr1; + struct iov_iter *to = (struct iov_iter *)addr2; + struct file *filp; + size_t size, count; + loff_t foff; + + filp = iocb->ki_filp; + count = iov_iter_count(to); + foff = (iocb->ki_pos >> PAGE_SHIFT) << PAGE_SHIFT; + size = ((count >> PAGE_SHIFT) + 1) << PAGE_SHIFT; + mark_rio(filp, foff, size); +} + +void bpf_tracker_rio_pre_fault(unsigned long addr) +{ + struct vm_fault *vmf = (struct vm_fault *)addr; + struct file *file = vmf->vma->vm_file; + struct inode *inode; + struct address_space *mapping; + struct folio *folio; + pgoff_t max_idx, index; + loff_t off; + size_t len; + + if (!file) + return; + + mapping = file->f_mapping; + inode = mapping->host; + index = vmf->pgoff; + max_idx = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE); + if (index >= max_idx) + return; + folio = filemap_get_folio(mapping, index); + if (IS_ERR(folio)) + return; + off = folio_pos(folio); + len = folio_size(folio); + folio_put(folio); + + mark_rio(file, off, len); +} + +void bpf_tracker_rio_post_fault(unsigned long addr) +{ + struct vm_fault *vmf = (struct vm_fault *)addr; + struct vm_area_struct *vma = vmf->vma; + bool is_cow = (vmf->flags & FAULT_FLAG_WRITE) && + !(vma->vm_flags & VM_SHARED); + struct file *file = vmf->vma->vm_file; + struct folio *folio; + + /* only tracker the fault by reading */ + if (is_cow) + return; + + if (!file) + return; + + folio = page_folio(vmf->page); + mark_rio(file, folio_pos(folio), folio_size(folio)); +} + +__bpf_kfunc void bpf_tracker_rio(unsigned long addr1, unsigned long addr2, + int tag) +{ + if (!rtracker.enable) + return; + + /* only track the matched UTS namespace */ + if (!_target_process(current->nsproxy->uts_ns->name.nodename)) + return; + + switch (tag) { + case TAG_READ: + bpf_tracker_rio_read(addr1, addr2); + break; + case TAG_PRE_FAULT: + bpf_tracker_rio_pre_fault(addr1); + break; + case TAG_POST_FAULT: + bpf_tracker_rio_post_fault(addr1); + break; + } +} + +__diag_pop(); + +BTF_SET8_START(bpf_rio_tracker_ids) +BTF_ID_FLAGS(func, bpf_tracker_rio) +BTF_SET8_END(bpf_rio_tracker_ids) + +static const struct btf_kfunc_id_set kfuncs_set = { + .owner = THIS_MODULE, + .set = &bpf_rio_tracker_ids, +}; + +static __init int rio_tracker_init(void) +{ + struct file *filp = filp_open(tracker_output, O_RDWR | O_CREAT, 0644); + int ret; + + if (IS_ERR(filp)) { + pr_err("rio tracker parameter error, %s is invalid, err:%ld\n", + tracker_output, PTR_ERR(filp)); + return -EINVAL; + } + filp_close(filp, NULL); + + rtracker.enable = false; + rtracker.object = kobject_create_and_add("rio_tracker", kernel_kobj); + ret = sysfs_create_group(rtracker.object, &tracker_attr_group); + if (ret < 0) { + pr_err("rio tracker init failed, sysfs kobject create failed\n"); + kobject_put(rtracker.object); + return ret; + } + + rtracker.data = vmalloc(tracker_buffer_size + 1); + if (!rtracker.data) { + ret = -ENOMEM; + goto cleanup; + } + spin_lock_init(&rtracker.lock); + rtracker.pos = 0; + + /* register self-defined bpf helper */ + ret = register_btf_kfunc_id_set(BPF_PROG_TYPE_UNSPEC, &kfuncs_set); + if (ret) { + pr_err("register btf kfunc error with retcode:%d\n", ret); + goto cleanup; + } + + pr_info("rio tracker init success!\n"); + return 0; + +cleanup: + if (rtracker.data) + vfree(rtracker.data); + kobject_put(rtracker.object); + return ret; +} + +static __exit void rio_tracker_exit(void) +{ + _dump_trace(); + kfree(rtracker.host_ns); + vfree(rtracker.data); + kobject_put(rtracker.object); + pr_info("rio tracker exit\n"); +} + +module_init(rio_tracker_init); +module_exit(rio_tracker_exit); + +MODULE_LICENSE("Dual BSD/GPL"); +MODULE_DESCRIPTION("Runtime io Tracker module!"); diff --git a/tools/trio/scripts/trace_parser.py b/tools/trio/scripts/trace_parser.py new file mode 100644 index 000000000000..5cffc70ad8c4 --- /dev/null +++ b/tools/trio/scripts/trace_parser.py @@ -0,0 +1,287 @@ +#! /usr/env python +# SPDX-License-Identifier: GPL-2.0 +import os +import sys +import copy +import json +import argparse +import shutil +import hashlib +import subprocess + + +PATH_IO_DELIMIT = "|" +PATH_PATH_DELIMIT = "@" +IO_IO_DELIMIT = "+" +VAR_DELIMIT = "," + + +class TraceParser(object): + def __init__(self, trace_file, output_dir, rootfs): + self.output = os.path.join(output_dir, "trace.json") + self.meta = os.path.join(output_dir, "trace.meta") + self.data = os.path.join(output_dir, "trace.data") + self.src_file = trace_file + self.parent = rootfs + self.trace_map = {} + self.fpath_map = {} + self.total_bytes = 0 + + @staticmethod + def get_page_size(): + try: + size = subprocess.run(["getconf", "PAGESIZE"], capture_output=True, text=True) + page_size = int(size.stdout.strip()) + return page_size + except Exception as e: + print("Exception:%s, set default pagesize is 4096" % str(e)) + return 4096 + + @staticmethod + def merge_partition(entry, items): + target_begin = entry[0] + target_end = entry[1] + prev_begin = 0 + prev_end = 0 + + # copy one for operating + items_bak = copy.deepcopy(items) + pidx = -1 + # find the insert point + for item in items: + curr_begin = item[0] + curr_end = item[1] + if curr_begin > target_begin: + break + pidx += 1 + prev_begin = curr_begin + prev_end = curr_end + + # merge prev node + if pidx != -1: + if prev_end >= target_begin: + target_begin = prev_begin + if target_end < prev_end: + target_end = prev_end + # remove extra overlay item + del items_bak[pidx] + # put placeholder + items_bak.insert(pidx, (0, 0)) + + # merge next node + idx = pidx + 1 + while idx < len(items): + next_begin = items[idx][0] + next_end = items[idx][1] + if target_end < next_begin: + break + if target_end < next_end: + target_end = next_end + del items_bak[idx] + # put placeholder + items_bak.insert(idx, (0, 0)) + idx += 1 + + # release old list + del items + items_bak.insert(pidx + 1, (target_begin, target_end)) + for item in items_bak: + # remove placeholder + if item[0] == 0 and item[1] == 0: + items_bak.remove(item) + return items_bak + + def in_blacklist(self, path): + # not tracker the memory file system + if (path.startswith("/tmp/") or path.startswith("/proc/") + or path.startswith("/sys/")): + return True + return False + + def parse_trace(self): + with open(self.src_file, 'r') as f: + for line in f.readlines(): + buf_list = line.split(',') + filepath = buf_list[0].strip() + ino = int(buf_list[1].strip()) + off = int(buf_list[2].strip()) + len = int(buf_list[3].strip()) + real_path = filepath if not self.parent else "%s/%s" % (self.parent, filepath) + if not os.path.exists(real_path) or not os.path.isfile(real_path): + continue + + if self.in_blacklist(filepath): + print("Path %s in blacklist should be skip!" % filepath) + continue + + # verify file io + size = int(os.path.getsize(real_path)) + end = len + off + if off >= size: + continue + if end > size: + end = size + + if filepath not in self.trace_map: + self.trace_map[filepath] = [(off, end)] + self.fpath_map[filepath] = ino + else: + items = self.trace_map[filepath] + new_items = TraceParser.merge_partition((off, end), items) + self.trace_map[filepath] = new_items + + for value in self.trace_map.values(): + for item in value: + self.total_bytes += (item[1] - item[0]) + + def trans_data(self): + try: + with open(self.data, 'rb') as f: + data = f.read() + file_hash = hashlib.sha256(data).hexdigest() + file_hash = os.path.join(os.path.dirname(self.data), file_hash) + shutil.copyfile(self.data, file_hash) + print("trace data:%s" % file_hash) + except Exception as e: + raise Exception("trans data exception:%s" % str(e)) + + def trans_meta(self): + try: + all = "" + f = open(self.meta) + data = json.load(f) + entries = data["entries"] + for entry in entries: + #name = entry["name"] + ios = entry["io"] + ino = entry["ino"] + ios_str = "" + for io in ios: + target_off = io[0] + target_len = io[1] + source_off = io[2] + if ios_str == "": + ios_str = "%d%s%d%s%d" % (target_off, VAR_DELIMIT, target_len, VAR_DELIMIT, source_off) + continue + ios_str = "%s%s%d%s%d%s%d" % (ios_str, IO_IO_DELIMIT, target_off, VAR_DELIMIT, target_len, VAR_DELIMIT, source_off) + if all == "": + all = "%d%s%s" % (ino, PATH_IO_DELIMIT, ios_str) + continue + all = "%s%s%s%s%s" % (all, PATH_PATH_DELIMIT, ino, PATH_IO_DELIMIT, ios_str) + + # save file + hashobj = hashlib.sha256() + hashobj.update(all.encode()) + sha256 = hashobj.hexdigest() + sha256 = os.path.join(os.path.dirname(self.meta), sha256) + with open(sha256, 'w') as f: + f.write(all) + print("trace meta:%s" % sha256) + except Exception as e: + raise Exception("trans meta exception:%s" % str(e)) + + @staticmethod + def dump_map(map, path): + jsObj = json.dumps(map) + fd = open(path, 'w') + fd.write(jsObj) + fd.close() + + def dump_trace(self): + TraceParser.dump_map(self.trace_map, self.output) + + def generate_data(self): + def read_data(path, off, len): + with open(path, "rb") as fd: + fd.seek(off, 0) + text = fd.read(len) + return text + + def read_zero_data(len): + tmp_file = "/tmp/zero.bin" + if not os.path.exists(tmp_file): + zero_data = b'\x00' * TraceParser.get_page_size() + with open(tmp_file, 'wb') as f: + f.write(zero_data) + with open(tmp_file, 'rb') as fd: + fd.seek(0, 0) + text = fd.read(len) + return text + + trace_meta = { + "version": 1, + "entries": [] + } + foff = 0 + with open(self.data, "wb") as file: + for key, value in self.trace_map.items(): + path = key + real_path = path if not self.parent else "%s/%s" % (self.parent, path) + if not os.path.exists(real_path): + continue + entry = { + "name": path, + "ino": self.fpath_map[path], + "io": [] + } + for item in value: + off = item[0] + len = (item[1] - item[0]) + data = read_data(real_path, off, len) + file.write(data) + entry["io"].append((off, len, foff)) + # padding with zero + page_size = TraceParser.get_page_size() + pad_len = page_size - (len % page_size) + if pad_len != page_size: + pad_data = read_zero_data(pad_len) + file.write(pad_data) + len += pad_len + foff += len + trace_meta["entries"].append(entry) + TraceParser.dump_map(trace_meta, self.meta) + + +def main(argv): + parser = argparse.ArgumentParser('container trace parser') + parser.add_argument('--trace_file', + required=True, + type=str, + help='trace source') + parser.add_argument('--output_dir', + required=True, + type=str, + help='output directory') + parser.add_argument('--root', + required=True, + type=str, + help='root of filter') + try: + args = parser.parse_args() + trace_file = args.trace_file + output_dir = args.output_dir + rootfs = args.root + if not os.path.exists(trace_file) or not os.path.exists(output_dir) \ + or not os.path.exists(rootfs): + print("Please input the valid path") + return -1 + parser = TraceParser(trace_file, output_dir, rootfs) + parser.parse_trace() + parser.dump_trace() # metadata to json + parser.generate_data() + parser.trans_meta() + parser.trans_data() + + return 0 + except Exception as e: + print("page cache build exception:%s" % str(e)) + return -1 + + +if __name__ == '__main__': + try: + ret = main(sys.argv[1:]) + except Exception as main_e: + print(str(main_e)) + ret = -1 + sys.exit(ret) -- 2.34.1