hulk inclusion category: feature bugzilla: https://atomgit.com/openeuler/kernel/issues/8613 -------------------------------- This is a userspace tool for MFS to provide: programmable strategy framework. Some basic strategies have added in test, and more flexible strategies can be written based on this framework. Signed-off-by: Hongbo Li <lihongbo22@huawei.com> --- tools/mfs/maio-utils/CMakeLists.txt | 101 ++++ tools/mfs/maio-utils/README.md | 17 + tools/mfs/maio-utils/build/build.sh | 91 +++ .../mfs/maio-utils/include/client/fs_client.h | 17 + tools/mfs/maio-utils/include/inner/inner.h | 37 ++ tools/mfs/maio-utils/include/loader/loader.h | 10 + tools/mfs/maio-utils/include/maio.h | 53 ++ .../maio-utils/include/parser/req_parser.h | 13 + tools/mfs/maio-utils/include/policy/policy.h | 17 + tools/mfs/maio-utils/include/uapi/mfs.h | 62 ++ tools/mfs/maio-utils/src/CMakeLists.txt | 42 ++ .../mfs/maio-utils/src/client/CMakeLists.txt | 17 + tools/mfs/maio-utils/src/client/fs_client.c | 359 ++++++++++++ .../mfs/maio-utils/src/loader/CMakeLists.txt | 18 + tools/mfs/maio-utils/src/loader/loader.c | 144 +++++ tools/mfs/maio-utils/src/main.c | 156 +++++ .../mfs/maio-utils/src/parser/CMakeLists.txt | 16 + tools/mfs/maio-utils/src/parser/req_parser.c | 313 ++++++++++ .../mfs/maio-utils/src/policy/CMakeLists.txt | 16 + tools/mfs/maio-utils/src/policy/policy.c | 114 ++++ tools/mfs/maio-utils/test/Makefile | 26 + tools/mfs/maio-utils/test/README.md | 61 ++ tools/mfs/maio-utils/test/strategy_demo.c | 45 ++ .../mfs/maio-utils/test/strategy_generate.cpp | 296 ++++++++++ .../mfs/maio-utils/test/strategy_prefetch.cpp | 150 +++++ tools/mfs/maio-utils/test/strategy_prefetch.h | 21 + tools/mfs/maio-utils/test/strategy_replay.cpp | 540 ++++++++++++++++++ tools/mfs/maio-utils/test/strategy_template.h | 21 + tools/mfs/maio-utils/test/strategy_toend.c | 46 ++ tools/mfs/maio-utils/test/trace_analyse.py | 87 +++ tools/mfs/maio-utils/test/trace_gen.py | 116 ++++ 31 files changed, 3022 insertions(+) create mode 100644 tools/mfs/maio-utils/CMakeLists.txt create mode 100644 tools/mfs/maio-utils/README.md create mode 100644 tools/mfs/maio-utils/build/build.sh create mode 100644 tools/mfs/maio-utils/include/client/fs_client.h create mode 100644 tools/mfs/maio-utils/include/inner/inner.h create mode 100644 tools/mfs/maio-utils/include/loader/loader.h create mode 100644 tools/mfs/maio-utils/include/maio.h create mode 100644 tools/mfs/maio-utils/include/parser/req_parser.h create mode 100644 tools/mfs/maio-utils/include/policy/policy.h create mode 100644 tools/mfs/maio-utils/include/uapi/mfs.h create mode 100644 tools/mfs/maio-utils/src/CMakeLists.txt create mode 100644 tools/mfs/maio-utils/src/client/CMakeLists.txt create mode 100644 tools/mfs/maio-utils/src/client/fs_client.c create mode 100644 tools/mfs/maio-utils/src/loader/CMakeLists.txt create mode 100644 tools/mfs/maio-utils/src/loader/loader.c create mode 100644 tools/mfs/maio-utils/src/main.c create mode 100644 tools/mfs/maio-utils/src/parser/CMakeLists.txt create mode 100644 tools/mfs/maio-utils/src/parser/req_parser.c create mode 100644 tools/mfs/maio-utils/src/policy/CMakeLists.txt create mode 100644 tools/mfs/maio-utils/src/policy/policy.c create mode 100644 tools/mfs/maio-utils/test/Makefile create mode 100644 tools/mfs/maio-utils/test/README.md create mode 100644 tools/mfs/maio-utils/test/strategy_demo.c create mode 100644 tools/mfs/maio-utils/test/strategy_generate.cpp create mode 100644 tools/mfs/maio-utils/test/strategy_prefetch.cpp create mode 100644 tools/mfs/maio-utils/test/strategy_prefetch.h create mode 100644 tools/mfs/maio-utils/test/strategy_replay.cpp create mode 100644 tools/mfs/maio-utils/test/strategy_template.h create mode 100644 tools/mfs/maio-utils/test/strategy_toend.c create mode 100644 tools/mfs/maio-utils/test/trace_analyse.py create mode 100644 tools/mfs/maio-utils/test/trace_gen.py diff --git a/tools/mfs/maio-utils/CMakeLists.txt b/tools/mfs/maio-utils/CMakeLists.txt new file mode 100644 index 000000000000..a13fe2bf1d7f --- /dev/null +++ b/tools/mfs/maio-utils/CMakeLists.txt @@ -0,0 +1,101 @@ +#******************************************************************************* +#******************************************************************************* + +cmake_minimum_required(VERSION 3.0) +cmake_policy(SET CMP0057 NEW) + +project(maio) + +#******************************************************************************* +#******************************************************************************* + +add_definitions(-D__LINUX_USR__) +add_definitions(-D__USE_GNU) +add_definitions(-D__POSIX_SOURCE) +add_definitions(-D_GNU_SOURCE) +add_definitions(-D_FILE_OFFSET_BITS=64) + +#******************************************************************************* +#******************************************************************************* + +include(TestBigEndian) + +TEST_BIG_ENDIAN(BIGENDIAN) + if (${BIGENDIAN}) + message(">>> Big Endian") + add_definitions(-D_CMAKE_BIGENDIAN) + else() + message(">>> Little Endian") + endif(${BIGENDIAN}) + +#******************************************************************************* +#******************************************************************************* + +EXECUTE_PROCESS(COMMAND uname -m COMMAND tr -d '\n' OUTPUT_VARIABLE ARCH) +if(${ARCH} STREQUAL "x86_64" OR ${ARCH} STREQUAL "aarch64") + message(STATUS "Architecture: ${ARCH}") +else() + message(STATUS "Invalid Architecture: ${ARCH}") + return() +endif() + +#******************************************************************************* +#******************************************************************************* + +set(BASE_FLAGS "-g -Wall -Werror -fno-strict-overflow -rdynamic") +set(SP_C_FLAGS "-std=gnu99") +set(SP_C++_FLAGS "-std=c++11") +set(SP_RELEASE_FLAGS "-O2") +set(SP_DEBUG_FLAGS "-O0") +set(SEC_REQ_FLAGS "-fstack-protector-all -fPIE -pie -fPIC -Wl,-z,relro,-z,now,-z,noexecstack") +set(SEC_SUG_BASE_FLAGS "-Wformat=2") +set(SEC_SUG_SP_C_FLAGS "-Wshadow") + +set(C_FLAGS "${BASE_FLAGS} ${SP_C_FLAGS}") +set(C++_FLAGS "${BASE_FLAGS} ${SP_C++_FLAGS}") +set(SEC_C_FLAGS "${SEC_REQ_FLAGS} ${SEC_SUG_BASE_FLAGS} ${SEC_SUG_SP_C_FLAGS}") +set(SEC_C++_FLAGS "${SEC_REQ_FLAGS} ${SEC_SUG_BASE_FLAGS}") +set(DEPC_FLAGS "-Wno-error=deprecated-declarations -Wno-deprecated-declarations") + +if(CMAKE_BUILD_TYPE MATCHES "Release") + message(">>> Release Mode") + set(CMAKE_C_FLAGS "${C_FLAGS} ${SP_RELEASE_FLAGS} ${SEC_C_FLAGS}") + set(CMAKE_CXX_FLAGS "${C++_FLAGS} ${SP_RELEASE_FLAGS} ${SEC_C++_FLAGS} ${DEPC_FLAGS}") +else() + message(">>> Debug Mode") + add_definitions(-D_DEBUG) + set(CMAKE_C_FLAGS "${C_FLAGS} ${SP_DEBUG_FLAGS} ${SEC_C_FLAGS}") + set(CMAKE_CXX_FLAGS "${C++_FLAGS} ${SP_DEBUG_FLAGS} ${SEC_C++_FLAGS} ${DEPC_FLAGS}") + set(COVERAGE_FLAGS "-coverage") + set(COVERAGE_LINK "gcov") +endif() + +message(STATUS ">>> Source Dir: " ${PROJECT_SOURCE_DIR}) +message(STATUS ">>> Binary Dir: " ${PROJECT_BINARY_DIR}) +message(STATUS ">>> C_FLAGS : " ${CMAKE_C_FLAGS}) +message(STATUS ">>> CXX_FLAGS : " ${CMAKE_CXX_FLAGS}) + +#******************************************************************************* +#******************************************************************************* + +set(EXECUTABLE_OUTPUT_PATH ${PROJECT_BINARY_DIR}/bin) + +#******************************************************************************* +#******************************************************************************* + +if(DEFINED THIRD_PARTY) + message(">>> third_party: ${THIRD_PARTY}") +else() + set(THIRD_PARTY ${PROJECT_SOURCE_DIR}/lib) +endif() + +set(INFRA_DIR ${PROJECT_SOURCE_DIR}/infrastructure) +set(SECUREC_DIR ${THIRD_PARTY}/securec) +set(ZLOG_DIR ${THIRD_PARTY}/zlog) +set(LIBNUMA_DIR ${THIRD_PARTY}/libnuma) + +#******************************************************************************* +#******************************************************************************* + +add_subdirectory(infrastructure) +add_subdirectory(src) diff --git a/tools/mfs/maio-utils/README.md b/tools/mfs/maio-utils/README.md new file mode 100644 index 000000000000..40477bebf130 --- /dev/null +++ b/tools/mfs/maio-utils/README.md @@ -0,0 +1,17 @@ +# maio-utils + +#### How to build + +1. prepare thirdparty by using: +``` +git clone -b threadpool_bind https://github.com/hb-lee/infrastructure.git infrastructure +git clone -b 1.2.18 https://github.com/HardySimpson/zlog.git thirdparty/zlog +git clone https://github.com/chriszt/securec.git thirdparty/securec +git clone -b v2.0.19 https://github.com/numactl/numactl.git thirdparty/libnuma +``` +2. cd build +3. bash build.sh prepare # only once. +4. bash build.sh build # [debug|release] + +#### How to run +Run with `./maio -m ${MNTPOINT}`, here ${MNTPOINT} means the mfs mount point. If you want to use some strategies, you can run with `./maio -m ${MNTPOINT} -s ${STRATEGYLIB}` where ${STRATEGYLIB} is your strategy library. Some examples are provided in `test` directory. diff --git a/tools/mfs/maio-utils/build/build.sh b/tools/mfs/maio-utils/build/build.sh new file mode 100644 index 000000000000..91b49d3d6bf8 --- /dev/null +++ b/tools/mfs/maio-utils/build/build.sh @@ -0,0 +1,91 @@ +#!/bin/bash + +LOCAL_PATH="$(readlink -e "$(dirname "$0")")" +PRJ_PATH="$(readlink -e "${LOCAL_PATH}/..")" +THIRD_SUB="${PRJ_PATH}/thirdparty" +Lib_PATH="${PRJ_PATH}/lib" + +ARCH=$(uname -m) + +function build_zlog() { + local zlog_dir=${THIRD_SUB}/zlog + local build_path=${zlog_dir}/build + mkdir ${build_path} + mkdir -p ${Lib_PATH}/zlog + cd ${zlog_dir} + make PREFIX=`pwd`/build/zlog + make PREFIX=`pwd`/build/zlog install + cp -rdp ${build_path}/zlog/include ${Lib_PATH}/zlog + cp -rdp ${build_path}/zlog/lib ${Lib_PATH}/zlog + cd - +} + +function build_securec() { + local securec_dir=${THIRD_SUB}/securec + local securec_lib=${securec_dir}/build/securec/lib + mkdir -p ${securec_lib} + mkdir -p ${Lib_PATH}/securec + cd ${securec_dir} + if [ "${ARCH}" = "aarch64" ]; then + make -f aarch64-so.mk + else + make -f x86-so.mk + fi + cp -rdp include build/securec + cp libsecurec.so build/securec/lib + cp -rdp build/securec/include ${Lib_PATH}/securec + cp -rdp build/securec/lib ${Lib_PATH}/securec + cd - +} + +function build_libnuma() { + local libnuma_dir=${THIRD_SUB}/libnuma + local build_path=${libnuma_dir}/build + mkdir ${build_path} + mkdir -p ${Lib_PATH}/libnuma + cd ${libnuma_dir} + ./autogen.sh + ./configure --prefix=`pwd`/build + make -j4 + make install + cp -rdp build/include ${Lib_PATH}/libnuma + cp -rdp build/lib ${Lib_PATH}/libnuma + cd - +} + +function prepare_thirdparty() { + build_jsoncpp + build_zlog + build_securec + build_libnuma +} + +function build_program() { + local BUILD_TYPE=$1 + + set -x + if [ "${BUILD_TYPE}" == "release" ]; then + cmake .. -DCMAKE_BUILD_TYPE=Release + else + cmake .. -DCMAKE_BUILD_TYPE=Debug + fi + set +x + + make clean + make VERBOSE=1 -j +} + +function main() { + local ACTION="$1" + local PARAMETER="$2" + + if [ "${ACTION}" == "prepare" ]; then + prepare_thirdparty + fi + if [ "$ACTION" == "build" ]; then + build_program $PARAMETER + fi +} + +main $@ +exit $? diff --git a/tools/mfs/maio-utils/include/client/fs_client.h b/tools/mfs/maio-utils/include/client/fs_client.h new file mode 100644 index 000000000000..df3a086e317a --- /dev/null +++ b/tools/mfs/maio-utils/include/client/fs_client.h @@ -0,0 +1,17 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright(c) 2025 Huawei Technologies Co., Ltd + */ +#ifndef _FS_CLIENT_H_ +#define _FS_CLIENT_H_ + +#include <stdint.h> + +int fs_evict_by_path(const char *path, uint64_t off, uint64_t len); +int fs_load_by_path(const char *path, uint64_t off, uint64_t len); +int fs_load_by_fd(int fd, uint64_t off, uint64_t len); +int fs_sync_by_path(const char *path, void *buf, uint64_t off, uint64_t len); + +int fs_client_init(const char *source); +void fs_client_exit(void); + +#endif diff --git a/tools/mfs/maio-utils/include/inner/inner.h b/tools/mfs/maio-utils/include/inner/inner.h new file mode 100644 index 000000000000..fbb87e13b1bb --- /dev/null +++ b/tools/mfs/maio-utils/include/inner/inner.h @@ -0,0 +1,37 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright(c) 2025 Huawei Technologies Co., Ltd + */ +#ifndef _INNER_H_ +#define _INNER_H_ + +#include <time.h> +#include <stdint.h> + +static inline uint64_t _get_ts(void) +{ + struct timespec now = {0}; + (void)clock_gettime(CLOCK_MONOTONIC_COARSE, &now); + return (uint64_t)now.tv_sec; +} + +/* io context for loader and writeback */ +struct io_context { + uint32_t id; /* msg id */ + uint64_t off; /* io offset in fd */ + uint64_t len; /* io length */ + uint16_t numaid; /* numa node of this io, -1 means non-bind */ + int fd; /* io file handle */ + char *path; /* fullpath of this io */ + void *buf; /* flighting buffer for writeback */ + + uint64_t seq; + + void *private; + void (*end_io)(int ret, void *private); +}; + +void loader_update_curseq(uint64_t seq); +int loader_evict_submit(struct io_context *ioctx); +int loader_io_submit(struct io_context *ioctx); + +#endif diff --git a/tools/mfs/maio-utils/include/loader/loader.h b/tools/mfs/maio-utils/include/loader/loader.h new file mode 100644 index 000000000000..7bf0f163b292 --- /dev/null +++ b/tools/mfs/maio-utils/include/loader/loader.h @@ -0,0 +1,10 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright(c) 2025 Huawei Technologies Co., Ltd + */ +#ifndef _LOADER_H_ +#define _LOADER_H_ + +int loader_init(void); +void loader_exit(void); + +#endif diff --git a/tools/mfs/maio-utils/include/maio.h b/tools/mfs/maio-utils/include/maio.h new file mode 100644 index 000000000000..a508d5b1fa27 --- /dev/null +++ b/tools/mfs/maio-utils/include/maio.h @@ -0,0 +1,53 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright(c) 2025 Huawei Technologies Co., Ltd + */ +#ifndef _MAIO_H_ +#define _MAIO_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +#include <stdint.h> + +struct maio_entry { + char *fpath; + uint64_t toff; + uint64_t tlen; + uint8_t tnuma; + uint64_t seq; +}; + +struct maio { + /* IN */ + int fd; + uint64_t off; + uint64_t len; + int pid; + uint8_t op; + uint64_t ts; + uint32_t cksz; + + /* OUT */ + uint16_t flags; +#define MAIO_WITH_SEQ 0x0001 + uint64_t curseq; + struct maio_entry entries[]; +}; + +struct maio_operation { + int max_io; + int (*init) (void); + void (*exit) (void); + int (*load) (struct maio **io); + int (*evict) (struct maio **io); +}; + +char *_get_fullpath(char *fpath, int fd); +void maio_preload(struct maio *maio, int nio); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/tools/mfs/maio-utils/include/parser/req_parser.h b/tools/mfs/maio-utils/include/parser/req_parser.h new file mode 100644 index 000000000000..bcb23474b58b --- /dev/null +++ b/tools/mfs/maio-utils/include/parser/req_parser.h @@ -0,0 +1,13 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright(c) 2025 Huawei Technologies Co., Ltd + */ +#ifndef __REQ_PARSER_H__ +#define __REQ_PARSER_H__ + +#include <stdint.h> + +int parser_init(uint32_t fs_mode, const char *mntpoint); +void parser_destory(void); +int maio_parse_req(void *buf, int size); + +#endif diff --git a/tools/mfs/maio-utils/include/policy/policy.h b/tools/mfs/maio-utils/include/policy/policy.h new file mode 100644 index 000000000000..954d25edc894 --- /dev/null +++ b/tools/mfs/maio-utils/include/policy/policy.h @@ -0,0 +1,17 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright(c) 2025 Huawei Technologies Co., Ltd + */ +#ifndef _POLICY_H_ +#define _POLICY_H_ + +#include "maio.h" + +int policy_max_io(void); +int policy_load(struct maio **io); +int policy_evict(struct maio **io); +int policy_register(const char *path); +void policy_unregister(void); +int policy_init(void); +void policy_exit(void); + +#endif diff --git a/tools/mfs/maio-utils/include/uapi/mfs.h b/tools/mfs/maio-utils/include/uapi/mfs.h new file mode 100644 index 000000000000..a7d5882b5e50 --- /dev/null +++ b/tools/mfs/maio-utils/include/uapi/mfs.h @@ -0,0 +1,62 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ + +#ifndef _UAPI_LINUX_MFS_H +#define _UAPI_LINUX_MFS_H + +#include <linux/types.h> +#include <linux/ioctl.h> + +enum mfs_opcode { + MFS_OP_READ = 0, + MFS_OP_FAULT, + MFS_OP_FAROUND, +}; + +enum { + MFS_MODE_NONE = 0, + MFS_MODE_LOCAL, + MFS_MODE_REMOTE, +}; + +struct mfs_ioc_ra { + __u64 off; + __u64 len; +}; + +struct mfs_ioc_done { + __u32 id; + __u32 ret; +}; + +struct mfs_ioc_rpath { + __u16 max; + __u16 len; + __u8 d[]; +}; + +#define MFS_IOC_RA _IOW(0xbc, 1, struct mfs_ioc_ra) +#define MFS_IOC_DONE _IOW(0xbc, 2, struct mfs_ioc_done) +#define MFS_IOC_RPATH _IOWR(0xbc, 3, struct mfs_ioc_rpath) + +struct mfs_ioc_fsinfo { + __u8 mode; /* 0: none, 1: local, 2: remote */ +}; + +#define MFS_IOC_FSINFO _IOR(0xbd, 1, struct mfs_ioc_fsinfo) + +struct mfs_msg { + __u8 version; + __u8 opcode; + __u16 len; + __u32 fd; + __u32 id; + __u8 data[]; +}; + +struct mfs_read { + __u64 off; + __u64 len; + __s32 pid; +}; + +#endif /* _UAPI_LINUX_MFS_H */ diff --git a/tools/mfs/maio-utils/src/CMakeLists.txt b/tools/mfs/maio-utils/src/CMakeLists.txt new file mode 100644 index 000000000000..fd053a8bd081 --- /dev/null +++ b/tools/mfs/maio-utils/src/CMakeLists.txt @@ -0,0 +1,42 @@ +#******************************************************************************* +#******************************************************************************* + +include_directories(${PROJECT_SOURCE_DIR}/include) +include_directories(${PROJECT_SOURCE_DIR}/include/inner) +include_directories(${PROJECT_SOURCE_DIR}/include/uapi) +include_directories(${PROJECT_SOURCE_DIR}/include/client) +include_directories(${PROJECT_SOURCE_DIR}/include/loader) +include_directories(${PROJECT_SOURCE_DIR}/include/policy) +include_directories(${PROJECT_SOURCE_DIR}/include/monitor) +include_directories(${PROJECT_SOURCE_DIR}/include/parser) + +include_directories(${INFRA_DIR}/include) +include_directories(${SECUREC_DIR}/include) +include_directories(${ZLOG_DIR}/include) +include_directories(${LIBNUMA_DIR}/include) + +#******************************************************************************* +#******************************************************************************* + +link_directories(${PROJECT_BINARY_DIR}/lib) +link_directories(${SECUREC_DIR}/lib) +link_directories(${ZLOG_DIR}/lib) +link_directories(${LIBNUMA_DIR}/lib) + +#******************************************************************************* +#******************************************************************************* + +add_subdirectory(client) +add_subdirectory(loader) +add_subdirectory(policy) +add_subdirectory(parser) + +add_executable(maio-utils main.c) +target_link_libraries(maio-utils "-Wl,--whole-archive" + client + loader + policy + parser + "-Wl,--no-whole-archive" + infra + dl) diff --git a/tools/mfs/maio-utils/src/client/CMakeLists.txt b/tools/mfs/maio-utils/src/client/CMakeLists.txt new file mode 100644 index 000000000000..ca1c91fd3057 --- /dev/null +++ b/tools/mfs/maio-utils/src/client/CMakeLists.txt @@ -0,0 +1,17 @@ +#******************************************************************************* +#******************************************************************************* + +include_directories(${CMAKE_CURRENT_SOURCE_DIR}) + +#******************************************************************************* +#******************************************************************************* + +#aux_source_directory(. CLIENT_SRC) +set(CLIENT_SRC fs_client.c) + +#******************************************************************************* +#******************************************************************************* + +add_library(client ${CLIENT_SRC}) +target_link_libraries(client infra + securec) diff --git a/tools/mfs/maio-utils/src/client/fs_client.c b/tools/mfs/maio-utils/src/client/fs_client.c new file mode 100644 index 000000000000..94ef1b70be97 --- /dev/null +++ b/tools/mfs/maio-utils/src/client/fs_client.c @@ -0,0 +1,359 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright(c) 2025 Huawei Technologies Co., Ltd + */ +#include "fs_client.h" +#include "mfs.h" +#include "inner.h" +#include "hashmap.h" +#include "hashfunc.h" +#include "stimer.h" +#include "atomic.h" +#include "log.h" +#include "list.h" +#include "spinlock.h" +#include "sysdef.h" +#include "securec.h" + +#include <fcntl.h> +#include <errno.h> +#include <string.h> +#include <unistd.h> +#include <sys/ioctl.h> + +#define TIMER_CYCLE 10000 /* 10s */ +#define FD_TMOUT 3600 /* 60s */ + +struct fs_client { + int rootfd; + hashmap_t *map; + stimer_t *timer; + list_head_t head; + spinlock_t lock; + uint64_t tmout; +}; + +struct fs_client g_client = {0}; + +typedef struct { + uint32_t plen; + char *path; +} fdkey_t; + +typedef struct { + fdkey_t key; + int fd; + int ref; + uint64_t ts; + list_head_t link; + hashlink_t hash; +} fd_t; + +static inline int fd_cmp(void *first, void *second) +{ + fdkey_t *key1 = (fdkey_t *)first; + fdkey_t *key2 = (fdkey_t *)second; + + if (key1->plen != key2->plen) + return -1; + + return strcmp(key1->path, key2->path); +} + +static inline uint32_t fd_hash(void *args) +{ + fdkey_t *key = (fdkey_t *)args; + uint64_t hash = hashstr(key->path, key->plen); + + return (uint32_t)hash; +} + +static inline void fd_inc(void *args, hashlink_t *link) +{ + fd_t *f = container_of(link, fd_t, hash); + int ref = atomic_s32_inc(&f->ref); + if (ref <= 0) { + log_error("fd(%d) for path(%s) with invalid ref(%d)", + f->fd, f->key.path, ref); + sys_assert(0); + } +} + +static inline int fd_dec(void *args, hashlink_t *link) +{ + fd_t *f = container_of(link, fd_t, hash); + int ref = atomic_s32_dec(&f->ref); + + if (ref < 0) { + log_error("fd(%d) for path(%s) with invalid ref(%d)", + f->fd, f->key.path, ref); + sys_assert(0); + } + /* update access timestamp for the last one */ + if (ref == 0) + f->ts = _get_ts(); + return -1; +} + +static inline int fd_expire(void *args, hashlink_t *link) +{ + fd_t *f = container_of(link, fd_t, hash); + int ref = atomic_s32_fetch(&f->ref); + uint64_t cur; + + if (ref > 0) + return -1; + if (ref < 0) { + log_error("fd(%d) for path(%s) with invalid ref(%d)", + f->fd, f->key.path, ref); + sys_assert(0); + } + cur = _get_ts(); + return ((cur - f->ts) >= g_client.tmout) ? 0 : -1; +} + +static fd_t *fd_alloc(const char *path) +{ + fd_t *f = (fd_t *)malloc(sizeof(fd_t)); + if (!f) { + log_error("alloc fd_t fail"); + return NULL; + } + + f->key.path = strdup(path); + if (!f->key.path) { + log_error("strdup(%s) fail", path); + free(f); + return NULL; + } + + f->key.plen = strlen(path) + 1; + f->fd = -1; + f->ref = 1; + f->hash.key = &f->key; + list_init(&f->link); + return f; +} + +static void fd_free(fd_t *f) +{ + if (f->fd > 0) { + int ret = close(f->fd); + if (ret != 0) { + log_error("close(%d) fail, err(%s)", f->fd, strerror(errno)); + } + f->fd = -1; + } + + if (f->key.path) { + free(f->key.path); + f->key.path = NULL; + } + list_del(&f->link); + free(f); +} + +static int fd_fetch(const char *path, fd_t **f_res, int dirfd) +{ + fdkey_t key; + key.plen = strlen(path) + 1; + key.path = (char *)path; + + /* fd in cache */ + hashlink_t *data = NULL; + if (EEXIST == hashmap_search(g_client.map, &key, &data, NULL, fd_inc)) { + *f_res = container_of(data, fd_t, hash); + (*f_res)->ts = _get_ts(); + return 0; + } + + /* new fd */ + fd_t *f = fd_alloc(path); + if (!f) + return -ENOMEM; + + /* open file */ + if (dirfd > 0) + f->fd = openat(dirfd, path, O_RDONLY, S_IRUSR | S_IRGRP); + else + f->fd = open(path, O_RDONLY, S_IRUSR | S_IRGRP); + if (f->fd < 0) { + if (errno != ENOENT) + log_error("open(%d,%s) fail, err(%s)", dirfd, path, strerror(errno)); + fd_free(f); + return errno; + } + + /* add to fd cache */ + hashlink_t *old = NULL; + if (EEXIST == hashmap_insert(g_client.map, &f->hash, &old, NULL, fd_inc)) { + fd_free(f); + f = container_of(old, fd_t, hash); + } + + spinlock_lock(&g_client.lock); + list_add_tail(&f->link, &g_client.head); + spinlock_unlock(&g_client.lock); + f->ts = _get_ts(); + *f_res = f; + return 0; +} + +static inline void fd_restore(fd_t *f) +{ + (void)hashmap_protect(g_client.map, &f->key, NULL, fd_dec); +} + +static void _fd_gc(void *args) +{ + list_head_t head, *curr, *next; + hashlink_t *data = NULL; + + list_init(&head); + spinlock_lock(&g_client.lock); + list_splice(&g_client.head, &head); + spinlock_unlock(&g_client.lock); + + list_foreach_safe(curr, next, &head) + { + fd_t *f = container_of(curr, fd_t, link); + if (atomic_s32_fetch(&f->ref) != 0) + continue; + /* try to destroy */ + if (0 != hashmap_delete(g_client.map, &f->key, &data, NULL, fd_expire)) + continue; + list_del(&f->link); + log_info("path:%s with fd:%d is expire.", f->key.path, f->fd); + fd_free(f); + } + + spinlock_lock(&g_client.lock); + list_splice(&head, &g_client.head); + spinlock_unlock(&g_client.lock); +} + +int fs_evict_by_path(const char *path, uint64_t off, uint64_t len) +{ + int ret; + fd_t *f; + uint64_t step = 4096 * 4; + + ret = fd_fetch(path, &f, 0); + if (ret) + return ret; + + while (len) { + if (step > len) + step = len; + ret = posix_fadvise(f->fd, off, len, POSIX_FADV_DONTNEED); + if (ret < 0) + break; + len -= step; + off += step; + } + + fd_restore(f); + return ret; +} + +int fs_load_by_path(const char *path, uint64_t off, uint64_t len) +{ + int ret; + fd_t *f; + uint64_t step = 16 * 4096; + char *buf = NULL; + + ret = fd_fetch(path, &f, 0); + if (ret) + return ret; + + buf = (char *)malloc(step); + + while (len) { + if (step > len) + step = len; + ret = pread(f->fd, buf, step, off); + if (ret <= 0) + break; + len -= step; + off += step; + } + free(buf); + fd_restore(f); + return ret; +} + +int fs_load_by_fd(int fd, uint64_t off, uint64_t len) +{ + struct mfs_ioc_ra ra; + + ra.off = off; + ra.len = len; + return ioctl(fd, MFS_IOC_RA, &ra); +} + +int fs_sync_by_path(const char *path, void *buf, uint64_t off, uint64_t len) +{ + ssize_t rsize = 0, ret; + uint64_t pos = 0; + fd_t *f; + + ret = fd_fetch(path + 1, &f, g_client.rootfd); + if (ret) + return ret; + + do { + ret = pread(f->fd, buf + pos, len - pos, off + pos); + if (ret == 0) + break; + if (ret < 0) { + log_error("failed to read:%s", strerror(errno)); + fd_restore(f); + return -1; + } + pos += ret; + rsize += ret; + } while (rsize < len); + fd_restore(f); + return 0; +} + +int fs_client_init(const char *source) +{ + int ret; + + g_client.rootfd = -1; + if (source) { + g_client.rootfd = open(source, O_RDONLY | O_DIRECTORY); + if (g_client.rootfd <= 0) { + log_error("open %s failed", source); + return -1; + } + } + ret = hashmap_create(1024, fd_cmp, fd_hash, &g_client.map); + if (ret) { + log_error("client fd map alloc failed"); + close(g_client.rootfd); + return ret; + } + + g_client.timer = stimer_create("LfsGc", TIMER_CYCLE, NULL, _fd_gc); + if (!g_client.timer) { + log_error("timer create failed"); + hashmap_destroy(g_client.map, NULL, NULL); + close(g_client.rootfd); + return -1; + } + list_init(&g_client.head); + spinlock_init(&g_client.lock); + g_client.tmout = FD_TMOUT; + return 0; +} + +void fs_client_exit(void) +{ + spinlock_destroy(&g_client.lock); + stimer_destroy(g_client.timer); + hashmap_destroy(g_client.map, NULL, NULL); + close(g_client.rootfd); +} + diff --git a/tools/mfs/maio-utils/src/loader/CMakeLists.txt b/tools/mfs/maio-utils/src/loader/CMakeLists.txt new file mode 100644 index 000000000000..08e1b637be26 --- /dev/null +++ b/tools/mfs/maio-utils/src/loader/CMakeLists.txt @@ -0,0 +1,18 @@ +#******************************************************************************* +#******************************************************************************* + +include_directories(${CMAKE_CURRENT_SOURCE_DIR}) + +#******************************************************************************* +#******************************************************************************* + +aux_source_directory(. LOADER_SRC) + +#******************************************************************************* +#******************************************************************************* + +add_library(loader ${LOADER_SRC}) +target_link_libraries(loader infra + numa + securec) + diff --git a/tools/mfs/maio-utils/src/loader/loader.c b/tools/mfs/maio-utils/src/loader/loader.c new file mode 100644 index 000000000000..f3460735c94e --- /dev/null +++ b/tools/mfs/maio-utils/src/loader/loader.c @@ -0,0 +1,144 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright(c) 2025 Huawei Technologies Co., Ltd + */ +#include "loader.h" + +#include "fs_client.h" +#include "threadpool.h" +#include "log.h" +#include "atomic.h" +#include "inner.h" +#include "sysdef.h" + +#include "securec.h" +#include "numa.h" +#include <errno.h> + +struct loader_mgr { + int numa_num; + threadpool_t **numa_thp; + uint64_t curseq; +}; + +struct loader_mgr g_local_mgr; + +static threadpool_t *choose_worker_pool(struct io_context *ioctx) +{ + static uint64_t cid; + int nid; + + if (ioctx->numaid == (uint16_t)-1) + nid = cid++ % g_local_mgr.numa_num; + else + nid = ioctx->numaid % g_local_mgr.numa_num; + return g_local_mgr.numa_thp[nid]; +} + +static void handle_io(void *args) +{ + struct io_context *ioctx = (struct io_context *)args; + int ret; + + if (g_local_mgr.curseq > ioctx->seq) { + if (ioctx->end_io) + ioctx->end_io(0, ioctx->private); + return; + } + ret = ioctx->path ? fs_load_by_path(ioctx->path, ioctx->off, ioctx->len) + : fs_load_by_fd(ioctx->fd, ioctx->off, ioctx->len); + if (ioctx->end_io) + ioctx->end_io(ret, ioctx->private); +} + +int loader_io_submit(struct io_context *ioctx) +{ + threadpool_t *worker_pools = choose_worker_pool(ioctx); + + threadpool_submit(worker_pools, ioctx, handle_io); + return 0; +} + +void loader_update_curseq(uint64_t seq) +{ + uint64_t curseq = atomic_u64_fetch(&g_local_mgr.curseq); + uint64_t old; + + while (seq > curseq) { + if (atomic_u64_cas(&g_local_mgr.curseq, curseq, seq, &old)) + break; + curseq = old; + } +} + +static void handle_evict(void *args) +{ + struct io_context *ioctx = (struct io_context *)args; + int ret; + + ret = ioctx->path ? fs_evict_by_path(ioctx->path, ioctx->off, ioctx->len) : 0; + if (ioctx->end_io) + ioctx->end_io(ret, ioctx->private); +} + +int loader_evict_submit(struct io_context *ioctx) +{ + threadpool_t *worker_pools = choose_worker_pool(ioctx); + + threadpool_submit(worker_pools, ioctx, handle_evict); + return 0; +} + +static void _free_loader(void) +{ + int i; + + if (!g_local_mgr.numa_thp) + return; + + for (i = 0; i < g_local_mgr.numa_num; i++) + threadpool_destroy(g_local_mgr.numa_thp[i]); + free(g_local_mgr.numa_thp); + g_local_mgr.numa_thp = NULL; +} + +int loader_init(void) +{ + int i, ret, wnum = 8; + char name[THD_NAME + 1] = {0}; + + if (numa_available() < 0) { + log_error("Current system does not support NUAM api"); + return -1; + } + + g_local_mgr.numa_num = numa_max_node() + 1; + g_local_mgr.curseq = 0; + ret = 0; + do { + g_local_mgr.numa_thp = + calloc(g_local_mgr.numa_num, sizeof(threadpool_t *)); + if (!g_local_mgr.numa_thp) { + log_error("alloc for numa threadpool failed"); + ret = -1; + break; + } + for (i = 0; i < g_local_mgr.numa_num; i++) { + sprintf_s(name, sizeof(name), "Loader-%d", i); + g_local_mgr.numa_thp[i] = threadpool_create(name, wnum, i); + if (!g_local_mgr.numa_thp[i]) { + log_error("alloc Loader-%d failed", i); + ret = -1; + break; + } + } + } while (0); + + if (ret != 0) + _free_loader(); + return ret; +} + +void loader_exit(void) +{ + _free_loader(); +} diff --git a/tools/mfs/maio-utils/src/main.c b/tools/mfs/maio-utils/src/main.c new file mode 100644 index 000000000000..ac5473d00011 --- /dev/null +++ b/tools/mfs/maio-utils/src/main.c @@ -0,0 +1,156 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright(c) 2025 Huawei Technologies Co., Ltd + */ +#include <stdlib.h> +#include <unistd.h> +#include <string.h> +#include <getopt.h> +#include <errno.h> +#include <fcntl.h> +#include <poll.h> + +#include "loader.h" +#include "log.h" +#include "maio.h" +#include "policy.h" +#include "req_parser.h" +#include "fs_client.h" +#include "mfs.h" +#include "securec.h" + +#include <sys/ioctl.h> +#include <sys/statfs.h> + +#define MAX_BUF_SIZE 1024 +#define MFS_SUPER_MAGIC 0x85428370 + +static int read_req(int fd) +{ + char *buf; + int ret; + + buf = calloc(MAX_BUF_SIZE, sizeof(char)); + if (!buf) { + log_error("failed to alloc buf\n"); + return -1; + } + ret = read(fd, buf, MAX_BUF_SIZE); + if (ret <= 0) { + if (ret < 0) + log_error("failed to read, ret:%d\n", ret); + return -1; + } + return maio_parse_req(buf, ret); +} + +static void dev_monitor(int dfd) +{ + struct pollfd pfd; + int ret; + + pfd.fd = dfd; + pfd.events = POLLIN; + while (1) { + ret = poll(&pfd, 1, -1); + if (ret < 0) { + log_error("poll failed\n"); + return; + } + if (ret == 0 || !(pfd.revents & POLLIN)) { + log_error("poll events error, ret:%d, revents:%x\n", ret, pfd.revents); + continue; + } + while (!read_req(pfd.fd)) {} + } +} + +int main(int argc, char *argv[]) +{ + const char *mntpoint = NULL, *strategylib = NULL; + struct mfs_ioc_fsinfo fsinfo = {0}; + int dfd, ret, opt; + struct statfs buf; + char devname[10]; + + while ((opt = getopt(argc, argv, "m:s:")) != -1) { + switch (opt) { + case 'm': + mntpoint = optarg; + break; + case 's': + strategylib = optarg; + break; + default: + fprintf(stderr, "Usage: %s -m ${mnt} [-s ${strategylib}]\n", argv[0]); + return -EINVAL; + } + } + if (!mntpoint) { + fprintf(stderr, "mount point should specify\n"); + fprintf(stderr, "Usage: %s -m ${mnt} [-s ${strategylib}]\n", argv[0]); + return -1; + } + + ret = statfs(mntpoint, &buf); + if (ret) { + log_error("statfs %s failed, errstr:%s\n", mntpoint, strerror(errno)); + return -1; + } + if (buf.f_type != MFS_SUPER_MAGIC) { + log_error("fstype(%x) is invalid, please check the mountpoint", buf.f_type); + return -1; + } + + sprintf_s(devname, sizeof(devname), "/dev/mfs%ld", buf.f_spare[0]); + /* Open with O_CLOEXEC to avoid potential fd leaks */ + dfd = open(devname, O_RDWR | O_CLOEXEC); + if (dfd < 0) { + log_error("open %s failed errstr:%s\n", devname, strerror(errno)); + return -1; + } + + ret = ioctl(dfd, MFS_IOC_FSINFO, (unsigned long)&fsinfo); + if (ret < 0) { + log_error("ioctl get fsinfo failed, ret:%d\n", ret); + goto close_fd; + } + if (fsinfo.mode != 1) { + log_error("fs runing mode(%d) is not supported", fsinfo.mode); + goto close_fd; + } + ret = loader_init(); + if (ret) { + log_error("loader init failed"); + goto close_fd; + } + ret = fs_client_init(NULL); + if (ret) { + log_error("clients init failed"); + goto free_loader; + } + ret = parser_init(fsinfo.mode, mntpoint); + if (ret) { + log_error("parser init failed"); + goto free_client; + } + ret = policy_init(); + if (ret) { + log_error("policy init failed"); + goto free_parser; + } + ret = policy_register(strategylib); + if (ret) { + log_info("register new policy failed, use default policy"); + } + + dev_monitor(dfd); +free_parser: + parser_destory(); +free_client: + fs_client_exit(); +free_loader: + loader_exit(); +close_fd: + close(dfd); + return ret; +} diff --git a/tools/mfs/maio-utils/src/parser/CMakeLists.txt b/tools/mfs/maio-utils/src/parser/CMakeLists.txt new file mode 100644 index 000000000000..72e5849da8e4 --- /dev/null +++ b/tools/mfs/maio-utils/src/parser/CMakeLists.txt @@ -0,0 +1,16 @@ +#******************************************************************************* +#******************************************************************************* + +include_directories(${CMAKE_CURRENT_SOURCE_DIR}) + +#******************************************************************************* +#******************************************************************************* + +aux_source_directory(. PARSER_SRC) + +#******************************************************************************* +#******************************************************************************* + +add_library(parser ${PARSER_SRC}) +target_link_libraries(parser infra + securec) diff --git a/tools/mfs/maio-utils/src/parser/req_parser.c b/tools/mfs/maio-utils/src/parser/req_parser.c new file mode 100644 index 000000000000..137eb8d8cb3f --- /dev/null +++ b/tools/mfs/maio-utils/src/parser/req_parser.c @@ -0,0 +1,313 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright(c) 2025 Huawei Technologies Co., Ltd + */ + +#include "inner.h" +#include "log.h" +#include "mfs.h" +#include "policy.h" +#include "securec.h" +#include "threadpool.h" + +#include <stdbool.h> +#include <errno.h> +#include <sys/ioctl.h> +#include <sys/stat.h> + +struct parser_mgr { + uint32_t fs_mode; + uint32_t chunk_size; + const char *mntpoint; + threadpool_t *parser; +}; + +struct parser_mgr g_parser_mgr; + +#define PARSER_NUM 8 +#define MAX_PATH_SIZE 1024 + +#define __round_mask(x, y) ((__typeof__(x))((y)-1)) +#define round_up(x, y) ((((x)-1) | __round_mask(x, y))+1) +#define round_down(x, y) ((x) & ~__round_mask(x, y)) + +#define DIV_ROUND_UP(n, d) (((n) + (d) - 1) / (d)) +#define MIN(x, y) ((x) < (y) ? (x) : (y)) + +bool is_power_of_2(unsigned long n) +{ + return (n != 0 && ((n & (n - 1)) == 0)); +} + +void ctx_cb(int ret, void *args) +{ + struct io_context *ctx = (struct io_context *)args; + + if (ctx->path) + free(ctx->path); + if (ctx->buf) + free(ctx->buf); + free(ctx); +} + +void _submit_local_io(int fd, struct maio_entry *entry) +{ + struct io_context *ioctx; + + ioctx = calloc(1, sizeof(struct io_context)); + if (!ioctx) { + log_error("ioctx alloc failed"); + return; + } + ioctx->off = entry->toff; + ioctx->len = entry->tlen; + ioctx->numaid = entry->tnuma; + ioctx->fd = fd; + ioctx->path = entry->fpath; + ioctx->seq = entry->seq; + ioctx->private = ioctx; + ioctx->end_io = ctx_cb; + loader_io_submit(ioctx); +} + +char *get_file_path(int fd) +{ + struct mfs_ioc_rpath *rp; + char *path; + int ret; + + rp = malloc(sizeof(struct mfs_ioc_rpath) + MAX_PATH_SIZE); + if (!rp) { + log_error("rpath alloc failed"); + return NULL; + } + rp->max = MAX_PATH_SIZE; + ret = ioctl(fd, MFS_IOC_RPATH, (unsigned long)rp); + if (ret) { + log_error("realpath failed for fd:%d, ret:%d", fd, ret); + free(rp); + return NULL; + } + rp->d[rp->len] = '\0'; + + path = malloc(strlen(g_parser_mgr.mntpoint) + rp->len); + if (!path) { + log_error("malloc path %s %s failed", g_parser_mgr.mntpoint, (const char *)rp->d); + return NULL; + } + sprintf(path, "%s%s", g_parser_mgr.mntpoint, (const char *)rp->d); + free(rp); + return path; +} + +char *_get_fullpath(char *fpath, int fd) +{ + struct mfs_ioc_rpath *rp; + char *path; + int ret; + + if (fpath) + return fpath; + rp = malloc(sizeof(struct mfs_ioc_rpath) + MAX_PATH_SIZE); + if (!rp) { + log_error("rpath alloc failed"); + return NULL; + } + rp->max = MAX_PATH_SIZE; + ret = ioctl(fd, MFS_IOC_RPATH, (unsigned long)rp); + if (ret) { + log_error("realpath failed for fd:%d, ret:%d", fd, ret); + free(rp); + return NULL; + } + rp->d[rp->len] = '\0'; + path = strdup((const char *)rp->d); + if (!path) + log_error("strdup path(%s) failed", (const char *)rp->d); + free(rp); + return path; +} + +void maio_preload(struct maio *maio, int nio) +{ + struct maio_entry *entry; + + for (int i = 0; i < nio; ++i) { + entry = &maio->entries[i]; + _submit_local_io(0, entry); + } +} + +uint8_t pid_to_numaid(int pid) +{ + return (uint8_t)-1; +} + +void _process_read_io(uint32_t id, struct maio *maio, int nio) +{ + struct maio_entry *entry; + int i; + + /* process the successor io */ + for (i = 0; i < nio; i++) { + entry = &maio->entries[i]; + _submit_local_io(maio->fd, entry); + } +} + +void _submit_evict(struct maio_entry *entry) +{ + struct io_context *ioctx; + + ioctx = calloc(1, sizeof(struct io_context)); + if (!ioctx) { + log_error("ioctx alloc failed"); + return; + } + ioctx->off = entry->toff; + ioctx->len = entry->tlen; + ioctx->numaid = -1; + ioctx->path = entry->fpath; + ioctx->private = ioctx; + ioctx->end_io = ctx_cb; + loader_evict_submit(ioctx); +} + +void _process_evict(struct mfs_msg *msg) +{ + struct maio_entry *entry; + struct mfs_read *read; + uint64_t soff, eoff; + struct maio *maio; + int ret; + + if (g_parser_mgr.fs_mode != MFS_MODE_LOCAL) + return; + + maio = calloc(1, sizeof(struct maio)); + if (!maio) { + log_error("failed to alloc maio"); + return; + } + + read = (void *)msg->data; + maio->cksz = g_parser_mgr.chunk_size; + soff = round_down(read->off, maio->cksz); + eoff = round_up(read->off + read->len, maio->cksz); + maio->fd = msg->fd; + maio->off = read->off; + maio->len = eoff - soff; + maio->pid = read->pid; + maio->op = msg->opcode; + + ret = policy_evict(&maio); + if (ret <= 0) + goto out; + + for (int i = 0; i < ret; ++i) { + entry = &maio->entries[i]; + _submit_evict(entry); + } +out: + free(maio); +} + +void _parse_read_req(struct mfs_msg *msg) +{ + struct mfs_read *read; + struct maio *maio; + uint64_t soff, eoff, ts; + int ret, io_num = policy_max_io(); + + _process_evict(msg); + + /* alloc maio slots */ + maio = calloc(1, sizeof(struct maio) + io_num * sizeof(struct maio_entry)); + if (!maio) { + log_error("failed to alloc maio, num:%d", io_num); + return; + } + read = (void *)msg->data; + maio->cksz = g_parser_mgr.chunk_size; + soff = round_down(read->off, maio->cksz); + eoff = round_up(read->off + read->len, maio->cksz); + ts = _get_ts(); + maio->fd = msg->fd; + maio->off = soff; + maio->len = eoff - soff; + maio->pid = read->pid; + maio->op = msg->opcode; + maio->ts = ts; + + /* fill slots from policy */ + ret = policy_load(&maio); + if (ret < 0) { + char *path = _get_fullpath(NULL, maio->fd); + if (path) { + log_warn("io on path:%s%s offset:%lu length:%lu dropped", + g_parser_mgr.mntpoint, path, read->off, read->len); + free(path); + } + free(maio); + return; + } + + if (maio->flags & MAIO_WITH_SEQ) + loader_update_curseq(maio->curseq); + + /* keep the input parameter */ + maio->fd = msg->fd; + maio->off = soff; + maio->len = eoff - soff; + maio->cksz = g_parser_mgr.chunk_size; + maio->pid = read->pid; + maio->op = msg->opcode; + maio->ts = ts; + + /* process each maio in slots */ + _process_read_io(msg->id, maio, ret); + free(maio); +} + +void _parse_one_req(void *buf) +{ + struct mfs_msg *msg = (struct mfs_msg *)buf; + + if (msg->opcode == MFS_OP_READ || msg->opcode == MFS_OP_FAULT) + _parse_read_req(msg); + + free(buf); +} + +int maio_parse_req(void *buf, int size) +{ + struct mfs_msg *msg = (struct mfs_msg *)buf; + + if (msg->len != size) + return -1; + + threadpool_submit(g_parser_mgr.parser, buf, _parse_one_req); + return 0; +} + +int parser_init(uint32_t fs_mode, const char *mntpoint) +{ + g_parser_mgr.fs_mode = fs_mode; + g_parser_mgr.mntpoint = strdup(mntpoint); + g_parser_mgr.chunk_size = 4096; + if (!is_power_of_2(g_parser_mgr.chunk_size)) { + log_error("chunk size must be power of 2"); + return -1; + } + g_parser_mgr.parser = threadpool_create("ReqParser", PARSER_NUM, -1); + if (!g_parser_mgr.parser) { + log_error("failed to alloc parser threadpool"); + return -1; + } + + return 0; +} + +void parser_destory(void) +{ + threadpool_destroy(g_parser_mgr.parser); +} diff --git a/tools/mfs/maio-utils/src/policy/CMakeLists.txt b/tools/mfs/maio-utils/src/policy/CMakeLists.txt new file mode 100644 index 000000000000..714023475e07 --- /dev/null +++ b/tools/mfs/maio-utils/src/policy/CMakeLists.txt @@ -0,0 +1,16 @@ +#******************************************************************************* +#******************************************************************************* + +include_directories(${CMAKE_CURRENT_SOURCE_DIR}) + +#******************************************************************************* +#******************************************************************************* + +aux_source_directory(. POLICY_SRC) + +#******************************************************************************* +#******************************************************************************* + +add_library(policy ${POLICY_SRC}) +target_link_libraries(policy infra + securec) diff --git a/tools/mfs/maio-utils/src/policy/policy.c b/tools/mfs/maio-utils/src/policy/policy.c new file mode 100644 index 000000000000..c77ab31752be --- /dev/null +++ b/tools/mfs/maio-utils/src/policy/policy.c @@ -0,0 +1,114 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright(c) 2025 Huawei Technologies Co., Ltd + */ +#include "policy.h" + +#include "maio.h" +#include "log.h" +#include "securec.h" +#include <dlfcn.h> + +typedef struct maio_operation* (*strategy_init)(); + +struct policy_mgr { + void *handle; + struct maio_operation *strategy; +}; + +struct policy_mgr g_policy; + +static int default_init(void) +{ + return 0; +} + +static void default_exit(void) +{ +} + +static int default_load(struct maio **io) +{ + return 0; +} + +static int default_evict(struct maio **io) +{ + return 0; +} + +struct maio_operation default_ops = { + .max_io = 1, + .init = default_init, + .exit = default_exit, + .load = default_load, + .evict = default_evict, +}; + +static void set_default_strategy(void) +{ + g_policy.strategy = &default_ops; +} + +int policy_max_io(void) +{ + return g_policy.strategy->max_io; +} + +/* return the filled num of io */ +int policy_load(struct maio **io) +{ + return g_policy.strategy->load(io); +} + +int policy_evict(struct maio **io) +{ + return g_policy.strategy->evict(io); +} + +int policy_register(const char *path) +{ + strategy_init sinit; + + if (!path || strlen(path) == 0) + return -1; + + g_policy.handle = dlopen(path, RTLD_LAZY); + if (!g_policy.handle) { + log_error("dlopen failed:%s", dlerror()); + return -1; + } + + sinit = dlsym(g_policy.handle, "register_strategy"); + if (!sinit) { + log_error("dlsym failed:%s", dlerror()); + dlclose(g_policy.handle); + g_policy.handle = NULL; + return -1; + } + g_policy.strategy = sinit(); + if (g_policy.strategy->init()) + return -1; + log_info("register strategy:%s done.", path); + return 0; +} + +void policy_unregister(void) +{ + if (g_policy.handle) { + g_policy.strategy->exit(); + dlclose(g_policy.handle); + g_policy.handle = NULL; + } + set_default_strategy(); +} + +int policy_init(void) +{ + g_policy.handle = NULL; + set_default_strategy(); + return 0; +} + +void policy_exit(void) +{ +} diff --git a/tools/mfs/maio-utils/test/Makefile b/tools/mfs/maio-utils/test/Makefile new file mode 100644 index 000000000000..6c87247aafa0 --- /dev/null +++ b/tools/mfs/maio-utils/test/Makefile @@ -0,0 +1,26 @@ +# SPDX-License-Identifier: GPL-2.0 + +CC := gcc +CFLAGS := -fPIC -Wall -Werror -O2 +CPPFLAGS := -fPIC -Wall -Werror -O2 -std=c++11 -g +LDFLAGS := -shared + +CFLAGS += $(addprefix -I,../include) +CPPFLAGS += $(addprefix -I,../include) + +C_SRCS := $(wildcard *.c) +CPP_SRCS := $(wildcard *.cpp) +TARGETS := $(C_SRCS:.c=.so) $(CPP_SRCS:.cpp=.so) + +all: $(TARGETS) + +%.so: %.c + $(CC) $(CFLAGS) $(LDFLAGS) -o $@ $< + +%.so: %.cpp + $(CC) $(CPPFLAGS) $(LDFLAGS) -o $@ $< + +clean: + rm -f $(TARGETS) + +.PHONY: all clean diff --git a/tools/mfs/maio-utils/test/README.md b/tools/mfs/maio-utils/test/README.md new file mode 100644 index 000000000000..a7b7980de933 --- /dev/null +++ b/tools/mfs/maio-utils/test/README.md @@ -0,0 +1,61 @@ +Here, we provide some strategies library for some cases. + +- strategy_demo +A simple strategy library shows how to write a demo. + +- strategy_prefetch +A prefetch strategy library to prefetch data. + +- strategy_toend +A readahead strategy until the end. + +- strategy_generate +A tools using maio-util to generate the io trace. + +- strategy_replay +A strategy to replay the io trace. + +### How to build + +Using `make -j` to build the libraries. + +### How to use + +Set the library path as the `-s` parameter's value to run with maio-utils. Here we show usage of the advanced replay strategy: + +- generate the io trace + +> assume you have mounted the mfs at /mnt/mfs + +This is the tracing period. + +``` +export MAIO_TRACE_FILE=trace.txt +export MAIO_MNTPOINT=/mnt/mfs # /mnt/mfs is the MFS mountpoint +./bin/maio-utils -m /mnt/mfs -s /home/lihb/workspace/hulk-6.6/maio-utils/test/strategy_generate.so +``` +Then, you can trigger the io. After finishing, you would get the trace file with the content likes: + +``` +xxx/model-00001-of-000008.safetensors 0 32768 0 +xxx/model-00001-of-000008.safetensors 32768 32768 0 +xxx/model-00001-of-000008.safetensors 65536 32768 0 +xxx/model-00001-of-000008.safetensors 98304 32768 0 +xxx/model-00001-of-000008.safetensors 131072 32768 0 +... + +``` + +The format is "<path> <offset> <length> <numaid>". + +- replay the io trace + +> assume you have mounted the mfs at /mnt/mfs and begin to launch the model. + +This is the running period. + +``` +export MAIO_TRACE_FILE=trace.txt +export MAIO_MNTPOINT=/mnt/mfs # /mnt/mfs is the MFS mountpoint +./bin/maio-utils -m /mnt/mfs -s /home/lihb/workspace/hulk-6.6/maio-utils/test/replay.so +``` diff --git a/tools/mfs/maio-utils/test/strategy_demo.c b/tools/mfs/maio-utils/test/strategy_demo.c new file mode 100644 index 000000000000..82a5cd178b03 --- /dev/null +++ b/tools/mfs/maio-utils/test/strategy_demo.c @@ -0,0 +1,45 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright(c) 2025 Huawei Technologies Co., Ltd + */ +#include "maio.h" +#include <stddef.h> + +int demo_init(void) +{ + return 0; +} + +void demo_exit(void) +{ +} + +int demo_load(struct maio **io) +{ + struct maio_entry *entry; + + entry = (*io)->entries; + entry[0].toff = (*io)->off + (*io)->len; + entry[0].tlen = (*io)->cksz; + entry[0].fpath = NULL; + entry[0].tnuma = -1; + return 1; +} + +int demo_evict(struct maio **io) +{ + ((void)io); + return 0; +} + +const struct maio_operation demo_strategy = { + .max_io = 1, + .init = demo_init, + .exit = demo_exit, + .load = demo_load, + .evict = demo_evict, +}; + +struct maio_operation *register_strategy(void) +{ + return (struct maio_operation *)&demo_strategy; +} diff --git a/tools/mfs/maio-utils/test/strategy_generate.cpp b/tools/mfs/maio-utils/test/strategy_generate.cpp new file mode 100644 index 000000000000..929be30e6a42 --- /dev/null +++ b/tools/mfs/maio-utils/test/strategy_generate.cpp @@ -0,0 +1,296 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright(c) 2025 Huawei Technologies Co., Ltd + */ +#include "maio.h" +#include "strategy_template.h" + +#include <array> +#include <atomic> +#include <cstring> +#include <condition_variable> +#include <fstream> +#include <sstream> +#include <iostream> +#include <mutex> +#include <queue> +#include <thread> +#include <unordered_map> +#include <vector> +#include <unistd.h> + +using namespace std; + +#define MAIO_MNTPOINT "MAIO_MNTPOINT" +#define MAIO_TRACE_FILE "MAIO_TRACE_FILE" +#define CACHEDIR "cachedir" + +#define MAXLINE 1024 + +/* Data struct definition */ +template<typename T> +class SafeQueue { +private: + queue<T> data_queue; + mutex mtx; + condition_variable cv; + +public: + // 添加数据到队列 + void push(T item) { + lock_guard<mutex> lock(mtx); + data_queue.push(move(item)); + cv.notify_one(); + } + + // 从队列取出数据(阻塞直到有数据) + T pop() { + unique_lock<mutex> lock(mtx); + cv.wait(lock, [this]{ return !data_queue.empty(); }); + T item = move(data_queue.front()); + data_queue.pop(); + return item; + } + + // 检查队列是否为空 + bool empty() { + lock_guard<mutex> lock(mtx); + return data_queue.empty(); + } +}; + +struct io { + string name; + uint64_t off; + uint64_t len; + uint8_t npu; + + io(const string& _name, uint64_t _off, uint64_t _len, uint8_t _npu) + : name(_name), off(_off), len(_len), npu(_npu) {} +}; + +/* Global variable definition */ +unordered_map<int, char*> fd_map; /* map fd to path */ +mutex fd_mutex; +string cachedir; + +vector<io> iovec; /* io traces */ +mutex iovec_mutex; +SafeQueue<vector<io>> iovec_queue; + +ofstream tfile; /* trace file */ + +thread writer; + +unordered_map<int, int> process_npu_map; +mutex pnpu_mutex; + +atomic<int> npu_smi_enable(-1); + +/* fd_map helper function */ +char* fd_path_find(int iofd) { + lock_guard<mutex> lock(fd_mutex); + auto it = fd_map.find(iofd); + if (it != fd_map.end()) { + return it->second; + } else { + char *path = _get_fullpath(NULL, iofd); + fd_map.insert({iofd, path}); + return path; + } +} + +/* iovec helper function */ +void io_insert_data(string name, uint64_t off, uint64_t len, uint8_t npu) +{ + lock_guard<mutex> lock(iovec_mutex); + iovec.emplace_back(name, off, len, npu); + + vector<io> copy_iovec = move(iovec); + iovec.clear(); + + iovec_queue.push(move(copy_iovec)); +} + +void get_mount_option(const char* mntpoint, const char* option) +{ + ifstream mounts("/proc/mounts"); + string line, target_mp; + + if (!mounts.is_open()) { + cerr << "Error: could not open /proc/mounts" << endl; + return; + } + + while (getline(mounts, line)) { + istringstream iss(line); + string device, mp, fs_type, options; + + if (!(iss >> device >> mp >> fs_type >> options)) + continue; + + if (mp.back() != '/') + mp += '/'; + target_mp = mntpoint; + if (target_mp.back() != '/') + target_mp += '/'; + + if (target_mp == mp) { + istringstream opt_stream(options); + string opt; + + while (getline(opt_stream, opt, ',')) { + if (opt.find(option) == 0) { + if (opt[strlen(option)] == '=') { + cachedir = opt.substr(strlen(option) + 1); + } + } + } + break; + } + } +} + +void writer_thread() { + while (true) { + vector<io> iovec_to_write = iovec_queue.pop(); + + for (const auto &item : iovec_to_write) + tfile << cachedir << item.name << " " << item.off << " " << item.len << " " << (unsigned int)item.npu << endl; + tfile.flush(); + } +} + +string exec_cmd(const char* cmd) { + array<char, 128> buffer; + string result; + unique_ptr<FILE, decltype(&pclose)> pipe(popen(cmd, "r"), pclose); + + if (!pipe) { + throw std::runtime_error("popen() failed!"); + } + while (fgets(buffer.data(), buffer.size(), pipe.get()) != nullptr) { + result += buffer.data(); + } + return result; +} + +bool isNumber(const std::string& str) { + return !str.empty() && str.find_first_not_of("0123456789") == std::string::npos; +} + +void get_npu_info(string &npuinfo) +{ + istringstream iss(npuinfo); + string line; + + while (getline(iss, line)) { + istringstream lineStream(line); + string token; + vector<string> tokens; + while (getline(lineStream, token, ' ')) { + if (!token.empty()) { + tokens.push_back(token); + } + } + if (tokens.size() >= 3) { + if (!isNumber(tokens[1]) || !isNumber(tokens[4])) + continue; + int npuId = stoi(tokens[1]); + int processId = stoi(tokens[4]); + process_npu_map[processId] = npuId; + } + } +} + +int process_npu_find(int processid) { + lock_guard<mutex> lock(pnpu_mutex); + auto it = process_npu_map.find(processid); + + if (npu_smi_enable.load() == -1) { + if (exec_cmd("command -v npu-smi").empty()) + npu_smi_enable.store(0); + else + npu_smi_enable.store(1); + } + + if (npu_smi_enable.load() == 0) + return 0; + + if (it != process_npu_map.end()) { + return it->second; + } else { + string result = exec_cmd("npu-smi info"); + get_npu_info(result); + auto it = process_npu_map.find(processid); + if (it != process_npu_map.end()) + return it->second; + else + return 0; + } +} + +int strategy_init(void) +{ + char *tfilename = getenv(MAIO_TRACE_FILE); + char *mntpoint = getenv(MAIO_MNTPOINT); + + if(!tfilename || !mntpoint) { + printf("no MAIO_TRACE_FILE or MAIO_MNTPOINT env variable\n"); + return -1; + } + get_mount_option(mntpoint, CACHEDIR); + string stfilename = tfilename; + tfile.open(stfilename, ios::trunc); + writer = thread(writer_thread); + if (!tfile.is_open()) { + cerr << "Error: could not open" << stfilename << endl; + return -1; + } + return 0; +} + +void strategy_exit(void) +{ + lock_guard<mutex> lock(iovec_mutex); + vector<io> copy_iovec = move(iovec); + iovec.clear(); + iovec_queue.push(move(copy_iovec)); + + tfile.close(); + writer.join(); +} + +int strategy_load(struct maio **io) +{ + char *path; + string pathStr; + int ret = 0; + + if (!tfile.is_open()) + return -1; + + path = fd_path_find((*io)->fd); + pathStr.assign(path); + io_insert_data(pathStr, (*io)->off, (*io)->len, process_npu_find((*io)->pid)); + + return ret; +} + +int strategy_evict(struct maio **io) +{ + ((void)io); + return 0; +} + +const struct maio_operation maio_strategy = { + .max_io = 1, + .init = strategy_init, + .exit = strategy_exit, + .load = strategy_load, + .evict = strategy_evict, +}; + +struct maio_operation *register_strategy() +{ + return (struct maio_operation *)&maio_strategy; +} diff --git a/tools/mfs/maio-utils/test/strategy_prefetch.cpp b/tools/mfs/maio-utils/test/strategy_prefetch.cpp new file mode 100644 index 000000000000..9ba205931cdc --- /dev/null +++ b/tools/mfs/maio-utils/test/strategy_prefetch.cpp @@ -0,0 +1,150 @@ +#include "strategy_prefetch.h" +#include "maio.h" +#include <iostream> +#include <map> +#include <cstring> +#include <string> +#include <stdint.h> +#include <sys/stat.h> +#include <sys/mman.h> +#include <dirent.h> +#include <fcntl.h> +#include <unistd.h> + +using namespace std; + +struct prefetch_mgr { + map<string, uint64_t> files; +}; + +struct prefetch_mgr g_prefetch; + +int get_files(const char *parent) +{ + DIR *dir; + struct dirent *entry; + string filepath; + struct stat buf; + + if ((dir = opendir(parent)) == NULL) { + cout<<"opening directory failed"<<endl; + return -1; + } + + while ((entry = readdir(dir)) != NULL) { + if (strcmp(entry->d_name, ".") == 0 || strcmp(entry->d_name, "..") == 0) + continue; + + filepath = string(parent) + "/" + string(entry->d_name); + if (stat(filepath.c_str(), &buf) == -1) { + cout<<"stat path:"<<filepath<<" failed"<<endl; + continue; + } + if (S_ISDIR(buf.st_mode)) + continue; + cout<<"Find file:"<<filepath<<" size:"<<buf.st_size<<endl; + g_prefetch.files.insert(pair<string, uint64_t>(filepath, buf.st_size)); + } + + map<string, uint64_t>::iterator it; + for (it = g_prefetch.files.begin(); it != g_prefetch.files.end(); ++it) { + cout<<"path:"<<it->first<<" size:"<<it->second<<endl; + } + return 0; +} + +int prefetch_init(void) +{ + char *parent = getenv("MODEL_WEIGHT_DIR"); + if (!parent) { + cout<<"please set the env MODEL_WEIGHT_DIR"<<endl; + return -1; + } + + if (get_files(parent) != 0) { + cout<<"get all files failed"<<endl; + return -1; + } + return 0; +} + +void prefetch_exit(void) +{ +} + +struct thread_ctx { + char *path; + uint64_t off; + uint64_t len; +}; + +void *fault(void *arg) +{ + struct thread_ctx *ctx = (struct thread_ctx *)arg; + int fd = open(ctx->path, O_RDONLY); + if (fd < 0) { + cout<<"open file:%s"<<ctx->path<<" failed"<<endl; + free(ctx); + return NULL; + } + + cout<<"FAULT path:"<<ctx->path<<" fd:"<<fd<<" off:"<<ctx->off<<" len:"<<ctx->len<<endl; + void *addr = mmap(NULL, ctx->len, PROT_READ, MAP_SHARED, fd, 0); + uint64_t idx; + char tmp, total, *buffer = (char *)addr; + total = 'B'; + for (idx = 0; idx < ctx->len; idx += 4096) { + tmp = buffer[idx]; + total += tmp; + } + cout<<"Fault calculate total:"<<total<<endl; + munmap(addr, ctx->len); + close(fd); + free(ctx); + return NULL; +} + +int prefetch_load(struct maio **io) +{ + map<string, uint64_t>::iterator it; + struct thread_ctx *ctx; + pthread_t t0; + int ret; + + for (it = g_prefetch.files.begin(); it != g_prefetch.files.end(); ++it) { + ctx = (struct thread_ctx *)malloc(sizeof(struct thread_ctx)); + if (!ctx) { + cout<<"malloc ctx failed"<<endl; + continue; + } + ctx->path = strdup(it->first.c_str()); + ctx->off = 0; + ctx->len = it->second; + ret = pthread_create(&t0, NULL, fault, ctx); + if (ret == 0) + pthread_detach(t0); + } + g_prefetch.files.clear(); + + return 0; +} + +int prefetch_evict(struct maio **io) +{ + ((void)io); + return 0; +} + +const struct maio_operation prefetch_strategy = { + .max_io = 1, + .init = prefetch_init, + .exit = prefetch_exit, + .load = prefetch_load, + .evict = prefetch_evict, +}; + +struct maio_operation *register_strategy() +{ + return (struct maio_operation *)&prefetch_strategy; +} + diff --git a/tools/mfs/maio-utils/test/strategy_prefetch.h b/tools/mfs/maio-utils/test/strategy_prefetch.h new file mode 100644 index 000000000000..e487e38b3d8b --- /dev/null +++ b/tools/mfs/maio-utils/test/strategy_prefetch.h @@ -0,0 +1,21 @@ +#ifndef STRATEGY_PREFETCH_H +#define STRATEGY_PREFETCH_H + +#ifdef __cplusplus +extern "C" { +#endif + +int prefetch_init(void); +void prefetch_exit(void); +int prefetch_load(struct maio *io); +int prefetch_evict(struct maio **io); + +extern const struct maio_operation prefetch_strategy; + +struct maio_operation *register_strategy(void); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/tools/mfs/maio-utils/test/strategy_replay.cpp b/tools/mfs/maio-utils/test/strategy_replay.cpp new file mode 100644 index 000000000000..85c1c7defedd --- /dev/null +++ b/tools/mfs/maio-utils/test/strategy_replay.cpp @@ -0,0 +1,540 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright(c) 2025 Huawei Technologies Co., Ltd + */ +#include "maio.h" +#include "strategy_template.h" + +#include <atomic> +#include <array> +#include <cstring> +#include <fstream> +#include <iostream> +#include <map> +#include <mutex> +#include <sstream> +#include <string> +#include <thread> +#include <unordered_map> +#include <vector> +#include <pthread.h> +#include <fcntl.h> +#include <unistd.h> + +using namespace std; + +#define MAIO_MNTPOINT "MAIO_MNTPOINT" +#define MAIO_TRACE_FILE "MAIO_TRACE_FILE" +#define CACHEDIR "cachedir" +#define MAXLINE 1024 + +/* Data structure definition */ +enum { + IO_NONE = 0, + IO_LOADED, + IO_EVICTED, +}; + +struct io { + string name; + uint64_t off; + uint64_t len; + uint8_t npu; + int flag; + + explicit io(const string& _name, uint64_t _off, uint64_t _len, uint8_t _npu) + : name(_name), off(_off), len(_len), npu(_npu), flag(IO_NONE) {} +}; + + +struct ioKey { + string name; + uint64_t off; + uint64_t len; + uint8_t npu; + + ioKey(const string& _name, uint64_t _off, uint64_t _len, uint8_t _npu) + : name(_name), off(_off), len(_len), npu(_npu) {} + + bool operator<(const ioKey &other) const { + return name < other.name || (name == other.name && off < other.off) || + (name == other.name && off == other.off && len < other.len); + } +}; + +vector<io> iovec; /* io traces */ +map<ioKey, int> io_to_index; +unordered_map<int, char*> fd_map; +mutex fd_mutex; +string cachedir; + +unordered_map<int, int> npu_numa_map; + +unordered_map<int, int> process_npu_map; +mutex pnpu_mutex; + +/* for evict */ +#define EVICT_DIST 10000 +#define EVICT_IO_MAX 5000 +thread_local size_t evict_idx = 0; + +atomic<int> npu_smi_enable(-1); +atomic<uint64_t> memory_usage(0); + +uint64_t memory_limit; +uint64_t memory_limit_bound; +string usage_in_bytes_path = "memory.usage_in_bytes"; +string limit_in_bytes_path = "memory.limit_in_bytes"; +string cgroup_mem=""; + +string get_cgroup_path() +{ + fstream cgroup_file("/proc/self/cgroup"); + if (!cgroup_file.is_open()) { + cerr << "Failed to open /proc/self/cgroup" << endl; + return ""; + } + + string line; + string cgroup_path; + while (getline(cgroup_file, line)) { + istringstream iss(line); + string hierarchy, controller, path; + if (!getline(iss, hierarchy, ':')) + continue; + if (!getline(iss, controller, ':')) + continue; + if (!getline(iss, path, ':')) + continue; + if (controller == "memory") { + cgroup_path = path; + break; + } + } + cgroup_file.close(); + return cgroup_path; +} + +/* read from /sys/fs/cgroup/memory/memory.[usage_in_bytes|limit_in_bytes] */ +uint64_t read_cgroup_memory(const string &file_path) +{ + if (cgroup_mem == "") + return -1; + string whole_path = "/sys/fs/cgroup/memory"+cgroup_mem+"/"+file_path; + ifstream cgroup_file(whole_path); + + if(!cgroup_file.is_open()) { + cerr << "Failed to open " << file_path << endl; + return -1; + } + + string line; + while(getline(cgroup_file, line)) { + try { + return stoll(line); + } catch (...) { + cerr << "Failed to parse line" << line << endl; + } + } + + cgroup_file.close(); + return -1; +} + +/* mount helper */ +void get_mount_option(const char* mntpoint, const char* option) +{ + ifstream mounts("/proc/mounts"); + string line, target_mp; + + if (!mounts.is_open()) { + cerr << "Error: could not open /proc/mounts" << endl; + return; + } + + while (getline(mounts, line)) { + istringstream iss(line); + string device, mp, fs_type, options; + + if (!(iss >> device >> mp >> fs_type >> options)) + continue; + + if (mp.back() != '/') + mp += '/'; + target_mp = mntpoint; + if (target_mp.back() != '/') + target_mp += '/'; + + if (target_mp == mp) { + istringstream opt_stream(options); + string opt; + + while (getline(opt_stream, opt, ',')) { + if (opt.find(option) == 0) { + if (opt[strlen(option)] == '=') { + cachedir = opt.substr(strlen(option) + 1); + } + } + } + break; + } + } +} + +/* fd_map helper function */ +char* fd_path_find(int iofd) { + lock_guard<mutex> lock(fd_mutex); + auto it = fd_map.find(iofd); + if (it != fd_map.end()) { + return it->second; + } else { + char *path = _get_fullpath(NULL, iofd); + size_t len = strlen(path) + cachedir.length() + 1; + char *realpath = (char*)malloc(len); + snprintf(realpath, len, "%s%s", cachedir.c_str(), path); + fd_map.insert({iofd, realpath}); + return realpath; + } +} + +string exec_cmd(const char* cmd) { + array<char, 128> buffer; + string result = ""; + unique_ptr<FILE, decltype(&pclose)> pipe(popen(cmd, "r"), pclose); + + if (!pipe) { + throw std::runtime_error("popen() failed!"); + } + while (fgets(buffer.data(), buffer.size(), pipe.get()) != nullptr) { + result += buffer.data(); + } + return result; +} + +bool isNumber(const std::string& str) { + return !str.empty() && str.find_first_not_of("0123456789") == std::string::npos; +} + +/* cpu_numa_map builder from lscpu */ +unordered_map<string, int> build_cpu_numa_map() { + unordered_map<string, int> cpuNumaMap; + string result = exec_cmd("lscpu"); + istringstream iss(result); + string line; + + while (getline(iss, line)) { + if (line.find("NUMA node") != string::npos) { + istringstream lineStream(line); + string key, value; + int numaNode; + getline(lineStream, value, ':'); + getline(lineStream, key); + /* what if numa is more than 10 */ + value = value.substr(value.find("NUMA node") + 9, 1); + if (!isNumber(value)) + continue; + numaNode = stoi(value); + key.erase(0, key.find_first_not_of(" \t")); + key.erase(key.find_last_not_of(" \t") + 1); + cpuNumaMap[key]=numaNode; + } + } + return cpuNumaMap; +} + +/* npu_numa_map builder, from npu-smi info -t topo */ +void build_npu_map_map() +{ + if (npu_smi_enable.load() == -1) { + if (exec_cmd("command -v npu-smi").empty()) + npu_smi_enable.store(0); + else + npu_smi_enable.store(1); + } + + if (npu_smi_enable.load() == 0) + return; + + unordered_map<string, int> cpuNumaMap = build_cpu_numa_map(); + string result = exec_cmd("npu-smi info -t topo"); + istringstream iss(result); + string line; + size_t target_token_size = 0; + + cout << "NPU\tCPU_AFFINITY\tNUMA_NODE" << endl; + while (getline(iss, line)) { + istringstream lineStream(line); + string npuStr, numaStr, token; + vector<string> tokens; + + while (lineStream >> token) { + tokens.push_back(token); + } + + if (tokens.size() >= 2 && tokens.back() == "Affinity") { + target_token_size = tokens.size() - 1 + 1; + continue; + } + + if (tokens.size() != target_token_size) + continue; + + int npuId = stoi(tokens[0].substr(3)); + + numaStr = tokens.back(); + cout << npuId << "\t" << numaStr << "\t\t" << cpuNumaMap[numaStr] << endl; + npu_numa_map[npuId] = cpuNumaMap[numaStr]; + } +} + +/* process_npu_map builder, from npu-smi info */ +void get_npu_info(string &npuinfo) +{ + istringstream iss(npuinfo); + string line; + + while (getline(iss, line)) { + istringstream lineStream(line); + string token; + vector<string> tokens; + while (getline(lineStream, token, ' ')) { + if (!token.empty()) { + tokens.push_back(token); + } + } + if (tokens.size() >= 3) { + if (!isNumber(tokens[1]) || !isNumber(tokens[4])) + continue; + int npuId = stoi(tokens[1]); + int processId = stoi(tokens[4]); + process_npu_map[processId] = npuId; + } + } +} + +/* process_npu_map helper */ +int process_npu_find(int processid) { + lock_guard<mutex> lock(pnpu_mutex); + auto it = process_npu_map.find(processid); + + if (npu_smi_enable.load() == 0) + return 0; + + if (it != process_npu_map.end()) { + return it->second; + } else { + string result = exec_cmd("npu-smi info"); + get_npu_info(result); + auto it = process_npu_map.find(processid); + if (it != process_npu_map.end()) + return it->second; + else + return 0; + } +} + +int strategy_init(void) +{ + const char *mntpoint = getenv(MAIO_MNTPOINT); + char *tfilename = getenv(MAIO_TRACE_FILE); + if(!tfilename || !mntpoint) { + fprintf(stderr, "no MAIO_TRACE_FILE or MAIO_MNTPOINT env variable\n"); + return -1; + } + ifstream tfile(tfilename); + string line; + + cgroup_mem=get_cgroup_path(); + memory_limit = read_cgroup_memory(limit_in_bytes_path); + memory_limit_bound = memory_limit * 4 / 5; + cout << "memory_limit: " << memory_limit << " memory_limit_bound: " << memory_limit_bound << endl; + get_mount_option(mntpoint, CACHEDIR); + build_npu_map_map(); + + if(!tfile.is_open()) { + cerr << "Failed to open file:" << tfilename << endl; + return -1; + } + + while(getline(tfile, line)) { + istringstream iss(line); + + string name; + uint64_t off; + uint64_t len; + unsigned int npu; + if (iss >> name >> off >> len >> npu) { + iovec.emplace_back(name, off, len, npu); + io_to_index[ioKey(name, off, len, npu)] = iovec.size() - 1; + } else { + cerr << "Failed to parse line:" << line << endl; + } + } + + tfile.close(); + return 0; +} + +void strategy_exit(void) +{ +} + +int strategy_load(struct maio **io) +{ + int npuid = process_npu_find((*io)->pid); + struct maio_entry *entry; + string pathStr; + char *path; + uint64_t idx = iovec.size(); + int ret = 0; + struct maio *new_io; + + path = fd_path_find((*io)->fd); + pathStr.assign(path); + + ioKey key(pathStr, (*io)->off, (*io)->len, npuid); + auto io_it = io_to_index.lower_bound(key); + if (io_it == io_to_index.end()) { + entry = (*io)->entries; + entry[0].toff = (*io)->off + (*io)->len; + entry[0].tlen = 0; + entry[0].fpath = NULL; + entry[0].tnuma = npu_numa_map[npuid]; + return 1; + } + if (key < io_it->first) { + if (io_it != io_to_index.begin()) { + --io_it; + if (io_it->first.name != pathStr) { + ++io_it; + } + } + } + + idx = io_it->second; + (*io)->flags |= MAIO_WITH_SEQ; + (*io)->curseq = idx; + entry = (*io)->entries; + int ios = 0; + uint64_t memory_usage = read_cgroup_memory(usage_in_bytes_path); + vector<int> io_extra; + + /* fill ios below max_io */ + while (idx < iovec.size()) { + if (iovec[idx].flag == IO_LOADED) { + idx++; + continue; + } + memory_usage += iovec[idx].len / 2; + if (memory_usage > memory_limit_bound) + break; + if (ios >= maio_strategy.max_io) { + io_extra.push_back(idx); + goto next_io; + } + entry[ios].seq = idx; + if (pathStr == iovec[idx].name) { + entry[ios].toff = iovec[idx].off; + entry[ios].tlen = iovec[idx].len; + entry[ios].tnuma = npu_numa_map[npuid]; + } else { + entry[ios].fpath = strdup(iovec[idx].name.c_str()); + entry[ios].toff = iovec[idx].off; + entry[ios].tlen = iovec[idx].len; + entry[ios].tnuma = npu_numa_map[iovec[idx].npu]; + } +next_io: + iovec[idx].flag = IO_LOADED; + idx++; + ios++; + } + + /* fill ios above max_io */ + if (ios > maio_strategy.max_io) { + new_io = (struct maio *)realloc(*io, sizeof(struct maio) + ios * sizeof(struct maio_entry)); + if (!new_io) { + printf("failed to alloc ios:%d maio\n", ios); + return -1; + } + *io = new_io; + entry = (*io)->entries; + for (int i = maio_strategy.max_io; i < ios; ++i) { + int io_idx = io_extra[i - maio_strategy.max_io]; + entry[i].seq = io_idx; + if (pathStr == iovec[io_idx].name) { + entry[i].toff = iovec[io_idx].off; + entry[i].tlen = iovec[io_idx].len; + entry[i].tnuma = npu_numa_map[npuid]; + } else { + entry[i].fpath = strdup(iovec[io_idx].name.c_str()); + entry[i].toff = iovec[io_idx].off; + entry[i].tlen = iovec[io_idx].len; + entry[i].tnuma = npu_numa_map[iovec[io_idx].npu]; + } + } + } + + // printf("file:%s off:%lu len:%lu generate_ios:%d from %d",path, (*io)->off, (*io)->len, ios, io_it->second); + ret = ios; + return ret; +} + +int strategy_evict(struct maio **io) +{ + string pathStr; + char *path; + path = fd_path_find((*io)->fd); + pathStr.assign(path); + ioKey key(pathStr, (*io)->off, (*io)->len, (*io)->pid); + auto io_it = io_to_index.lower_bound(key); + int dist = 0; + size_t i = 0, io_idx = io_it->second; + struct maio_entry *entry; + struct maio *new_io; + uint64_t evict_memory = 0; + uint64_t memory_usage = read_cgroup_memory(usage_in_bytes_path); + + if (memory_usage < (memory_limit_bound * 3 / 4)) + return 0; + dist = io_idx - evict_idx; + if (dist < 0) + return 0; + if (static_cast<size_t>(dist) < iovec.size() / 8 && io_it != io_to_index.end()) + return 0; + size_t evict_win = 0; + if (io_it != io_to_index.end()) { + evict_win = min(iovec.size() / 10, io_idx - 1); + } else { + evict_win = dist; + } + + new_io = (struct maio*)realloc(*io, sizeof(struct maio) + evict_win * sizeof(struct maio_entry)); + if (!new_io) + return -1; + *io = new_io; + entry = new_io->entries; + + for (i = 0; i < evict_win && evict_idx < iovec.size(); ++i) { + evict_memory += iovec[evict_idx].len; + if (evict_memory > memory_limit_bound / 8) + break; + entry[i].fpath = strdup(iovec[evict_idx].name.c_str()); + entry[i].toff = iovec[evict_idx].off; + entry[i].tlen = iovec[evict_idx].len; + evict_idx++; + iovec[evict_idx].flag = IO_EVICTED; + } + return i; +} + +const struct maio_operation maio_strategy = { + .max_io = 500, + .init = strategy_init, + .exit = strategy_exit, + .load = strategy_load, + .evict = strategy_evict, +}; + +struct maio_operation *register_strategy() +{ + return (struct maio_operation *)&maio_strategy; +} diff --git a/tools/mfs/maio-utils/test/strategy_template.h b/tools/mfs/maio-utils/test/strategy_template.h new file mode 100644 index 000000000000..c574d72e4a03 --- /dev/null +++ b/tools/mfs/maio-utils/test/strategy_template.h @@ -0,0 +1,21 @@ +#ifndef STRATEGY_TEMPLATE_H +#define STRATEGY_TEMPLATE_H + +#ifdef __cplusplus +extern "C" { +#endif + +int strategy_init(void); +void strategy_exit(void); +int strategy_load(struct maio **io); +int strategy_evict(struct maio *io); + +extern const struct maio_operation maio_strategy; + +struct maio_operation *register_strategy(void); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/tools/mfs/maio-utils/test/strategy_toend.c b/tools/mfs/maio-utils/test/strategy_toend.c new file mode 100644 index 000000000000..56a068d450e5 --- /dev/null +++ b/tools/mfs/maio-utils/test/strategy_toend.c @@ -0,0 +1,46 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright(c) 2025 Huawei Technologies Co., Ltd + */ +#include "maio.h" +#include <stddef.h> + +int demo_init(void) +{ + return 0; +} + +void demo_exit(void) +{ +} + +int demo_load(struct maio **io) +{ + struct maio_entry *entry; + + entry = (*io)->entries; + entry[0].toff = (*io)->off + (*io)->len; + /* Read Ahead until the end */ + entry[0].tlen = 0; + entry[0].fpath = NULL; + entry[0].tnuma = -1; + return 1; +} + +int demo_evict(struct maio **io) +{ + ((void)io); + return 0; +} + +const struct maio_operation demo_strategy = { + .max_io = 1, + .init = demo_init, + .exit = demo_exit, + .load = demo_load, + .evict = demo_evict, +}; + +struct maio_operation *register_strategy(void) +{ + return (struct maio_operation *)&demo_strategy; +} diff --git a/tools/mfs/maio-utils/test/trace_analyse.py b/tools/mfs/maio-utils/test/trace_analyse.py new file mode 100644 index 000000000000..c25d1ce364df --- /dev/null +++ b/tools/mfs/maio-utils/test/trace_analyse.py @@ -0,0 +1,87 @@ +def merge_intervals(intervals): + """ + 合并区间 + :param intervals: 区间列表,每个区间是一个元组 (start, end) + :return: 合并后的区间列表 + """ + if not intervals: + return [] + + # 按起始位置排序 + intervals.sort(key=lambda x: x[0]) + + merged = [intervals[0]] + for current in intervals[1:]: + last_merged = merged[-1] + if current[0] <= last_merged[1]: # 有重叠 + merged[-1] = (last_merged[0], max(last_merged[1], current[1])) + else: + merged.append(current) + + return merged + +def process_file(input_file): + """ + 处理输入文件 + :param input_file: 输入文件路径 + :return: 每个文件的合并区间和总大小 + """ + from collections import defaultdict + + # 使用字典按文件名分组 + file_intervals = defaultdict(list) + + with open(input_file, 'r') as f: + for line in f: + parts = line.strip().split() + if len(parts) < 3: + print(f"跳过无效行: {line.strip()}") + continue + + # 解析文件名、起始位置、大小 + filename, start, size = parts[:3] + start = int(start) + size = int(size) + + # 计算区间 + end = start + size + file_intervals[filename].append((start, end)) + + # 合并每个文件的区间 + merged_results = {} + for filename, intervals in file_intervals.items(): + merged_intervals = merge_intervals(intervals) + # 计算合并后的区间总大小 + total_size = sum(end - start for start, end in merged_intervals) + merged_results[filename] = { + "merged_intervals": merged_intervals, + "total_size": total_size + } + + return merged_results + +def main(): + import sys + + if len(sys.argv) != 2: + print("用法: python script.py <输入文件>") + sys.exit(1) + + input_file = sys.argv[1] + + merged_results = process_file(input_file) + + whole_result=0 + + for filename, result in merged_results.items(): + print(f"文件: {filename}") + # print("合并后的区间:") + # for interval in result["merged_intervals"]: + # print(f"起始位置: {interval[0]}, 结束位置: {interval[1]}") + print(f"所有区间内数据的总大小: {result['total_size']} 字节") + print("-" * 40) + whole_result+=result['total_size'] + print(f"所有文件内数据的总大小: {whole_result} 字节") + +if __name__ == "__main__": + main() diff --git a/tools/mfs/maio-utils/test/trace_gen.py b/tools/mfs/maio-utils/test/trace_gen.py new file mode 100644 index 000000000000..d050fa48801c --- /dev/null +++ b/tools/mfs/maio-utils/test/trace_gen.py @@ -0,0 +1,116 @@ +import argparse +import os +import sys + +FILENAME_IDX = 0 +OFF_IDX = 1 +LEN_IDX = 2 +NUMA_IDX = 3 +OFF_BOUND = 1024 * 1024 * 4096 + +class ReplayHintGenerator(object): + def __init__(self, trace_file, output_file): + self.output_file = output_file + self.trace_file = trace_file + self.data_list = [] + + def generate_hint(self): + with open(self.output_file, 'w') as f: + for data in self.data_list: + data_off = data[OFF_IDX] + data_len = data[LEN_IDX] + print(data_off) + step = (512 * 1024 * 1024) + while data_len: + if data_len < step: + step = data_len + print(f"{data[FILENAME_IDX]} {data_off} {step} {data[NUMA_IDX]}", file=f) + data_len-=step + data_off+=step + + def parse_trace(self): + file_map_index={} + with open(self.trace_file, 'r') as f: + for line in f: + parts = line.strip().split(' ') + + filename, off, length, numaid = parts + off = int(off) + length = int(length) + numaid = int(numaid) + if self.data_list: + tail_data = self.data_list[-1] + ''' + 首先尝试跟末尾的数据合并: + 如若跟末尾的数据是同一个文件跟numaid, + 两者之间的距离小于设定值, + 就进行合并 + ''' + if (tail_data[FILENAME_IDX] == filename and tail_data[NUMA_IDX] == numaid): + tail_off = tail_data[OFF_IDX] + tail_end = tail_data[OFF_IDX] + tail_data[LEN_IDX] + end = off + length + abs_off = max(abs(off - tail_end), abs(tail_off - end)) + if abs_off < OFF_BOUND: + tail_data[OFF_IDX] = min(tail_off, off) + tail_data[LEN_IDX] = max(tail_end, end) - tail_data[OFF_IDX] + continue + else: + ''' + 否则跟之前同一文件和numaid的数据进行合并 + 如若两者之间距离小于设定值, + 就进行合并 + ''' + key = (filename, numaid) + if key in file_map_index: + index = file_map_index[key] + data = self.data_list[index] + data_off = data[OFF_IDX] + data_end = data[OFF_IDX] + data[LEN_IDX] + end = off + length + abs_off = max(abs(off - data_end), abs(data_off - end)) + if abs_off < OFF_BOUND: + data[OFF_IDX] = min(data_off, off) + data[LEN_IDX] = max(data_end, end) - data[OFF_IDX] + continue + self.data_list.append([filename, off, length, numaid]) + file_map_index[(filename, numaid)] = len(self.data_list) - 1 + + +def main(agrv): + """ + Parse the argument + """ + parser = argparse.ArgumentParser('model trace parser') + parser.add_argument('--trace_file', + required=True, + type=str, + help='trace source') + parser.add_argument('--output_file', + required=True, + type=str, + help='output of hint file') + try: + args = parser.parse_args() + trace_file = args.trace_file + output_file = args.output_file + if not os.path.exists(trace_file): + print("{trace_file} not exist") + return -1 + + hint_generator=ReplayHintGenerator(trace_file, output_file) + hint_generator.parse_trace() + hint_generator.generate_hint() + return 0 + except Exception as e: + print(f"Parse model trace exception:{str(e)}") + return -1 + +if __name__ == '__main__': + try: + ret = main(sys.argv[1:]) + except Exception as main_e: + print(str(main_e)) + ret = -1 + sys.exit(ret) + -- 2.34.1
反馈: 您发送到kernel@openeuler.org的补丁/补丁集,已成功转换为PR! PR链接地址: https://atomgit.com/openeuler/kernel/merge_requests/21244 邮件列表地址:https://mailweb.openeuler.org/archives/list/kernel@openeuler.org/message/7NG... FeedBack: The patch(es) which you have sent to kernel@openeuler.org mailing list has been converted to a pull request successfully! Pull request link: https://atomgit.com/openeuler/kernel/merge_requests/21244 Mailing list address: https://mailweb.openeuler.org/archives/list/kernel@openeuler.org/message/7NG...
participants (2)
-
Hongbo Li -
patchwork bot