[PATCH OLK-5.10 0/2] fuse: support fastpath

Support fuse fastpath by using the newly added kernel_ipc module. chenrenhui (2): ipc: add kernel_ipc module to enable fast process switching fuse: support fastpath via kernel_ipc Kconfig | 2 + arch/arm64/configs/openeuler_defconfig | 2 + arch/x86/configs/openeuler_defconfig | 2 + fs/fuse/Kconfig | 6 + fs/fuse/dev.c | 744 +++++++++++++++++++++++++ fs/fuse/dir.c | 87 ++- fs/fuse/file.c | 235 +++++++- fs/fuse/fuse_i.h | 55 ++ fs/fuse/inode.c | 112 ++++ fs/fuse/readdir.c | 8 + include/linux/kernel_ipc.h | 48 ++ include/uapi/linux/fuse.h | 15 +- ipc/Kconfig | 5 + ipc/Makefile | 2 +- ipc/kernel_ipc.c | 301 ++++++++++ 15 files changed, 1596 insertions(+), 28 deletions(-) create mode 100644 include/linux/kernel_ipc.h create mode 100644 ipc/Kconfig create mode 100644 ipc/kernel_ipc.c -- 2.33.0

euleros inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/IC4GP7 CVE: NA ---------------------------------------- Add the kernel_ipc module to support fast switching and communication between processes. Signed-off-by: chenrenhui <chenrenhui1@huawei.com> --- Kconfig | 2 + arch/arm64/configs/openeuler_defconfig | 1 + arch/x86/configs/openeuler_defconfig | 2 + include/linux/kernel_ipc.h | 48 ++++ ipc/Kconfig | 5 + ipc/Makefile | 2 +- ipc/kernel_ipc.c | 301 +++++++++++++++++++++++++ 7 files changed, 360 insertions(+), 1 deletion(-) create mode 100644 include/linux/kernel_ipc.h create mode 100644 ipc/Kconfig create mode 100644 ipc/kernel_ipc.c diff --git a/Kconfig b/Kconfig index 745bc773f567..38535ecc8aa6 100644 --- a/Kconfig +++ b/Kconfig @@ -21,6 +21,8 @@ source "drivers/Kconfig" source "fs/Kconfig" +source "ipc/Kconfig" + source "security/Kconfig" source "crypto/Kconfig" diff --git a/arch/arm64/configs/openeuler_defconfig b/arch/arm64/configs/openeuler_defconfig index fb9f92d11bde..4c7a26b1504a 100644 --- a/arch/arm64/configs/openeuler_defconfig +++ b/arch/arm64/configs/openeuler_defconfig @@ -6660,6 +6660,7 @@ CONFIG_FILE_MITIGATION_FALSE_SHARING=y # end of File systems CONFIG_RESCTRL=y +CONFIG_KERNEL_IPC=m # # Security options diff --git a/arch/x86/configs/openeuler_defconfig b/arch/x86/configs/openeuler_defconfig index 47a79860bfb7..fee20534051d 100644 --- a/arch/x86/configs/openeuler_defconfig +++ b/arch/x86/configs/openeuler_defconfig @@ -7714,6 +7714,8 @@ CONFIG_DLM_DEBUG=y CONFIG_IO_WQ=y # end of File systems +# CONFIG_KERNEL_IPC is not set + # # Security options # diff --git a/include/linux/kernel_ipc.h b/include/linux/kernel_ipc.h new file mode 100644 index 000000000000..f4a5fd03d9f1 --- /dev/null +++ b/include/linux/kernel_ipc.h @@ -0,0 +1,48 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (c) Huawei Technologies Co., Ltd. 2024-2024. All rights reserved. + * Description: Kernel IPC header + * Author: yangyun + * Create: 2024-05-31 + */ +#ifndef __KERNEL_IPC_H_ +#define __KERNEL_IPC_H_ + +struct kernel_ipc_bind_info { + //unsigned int session_id; + unsigned int data_size; + + struct task_struct *client_task; + struct task_struct *server_task; + // struct task_struct *server_task_get; + + bool is_calling; + bool client_need_exit; + bool server_need_exit; + + atomic_t nr_call; + + //struct kref ref; + spinlock_t lock; + struct list_head node; +}; + +void kernel_ipc_wakeup_server_task(struct kernel_ipc_bind_info *bind_info); + +void *kernel_ipc_bind(struct task_struct *server_task); + +void kernel_ipc_unbind(struct kernel_ipc_bind_info *bind_info, + struct task_struct *server_task); + +ssize_t kernel_ipc_do_call(struct kernel_ipc_bind_info *bind_info, + struct task_struct *tsk); + +long kernel_ipc_ret_call(struct kernel_ipc_bind_info *bind_info, + struct task_struct *tsk); + +long kernel_ipc_wait_call(struct kernel_ipc_bind_info *bind_info, + struct task_struct *tsk); + +void kernel_ipc_release(struct kernel_ipc_bind_info *bind_info); + +#endif diff --git a/ipc/Kconfig b/ipc/Kconfig new file mode 100644 index 000000000000..3ba44d78d1ff --- /dev/null +++ b/ipc/Kconfig @@ -0,0 +1,5 @@ +config KERNEL_IPC + tristate "Kernel IPC Call" + default n + help + Inter-process call, used to switch threads quickly. \ No newline at end of file diff --git a/ipc/Makefile b/ipc/Makefile index c2558c430f51..528a1233431a 100644 --- a/ipc/Makefile +++ b/ipc/Makefile @@ -9,4 +9,4 @@ obj-$(CONFIG_SYSVIPC_SYSCTL) += ipc_sysctl.o obj-$(CONFIG_POSIX_MQUEUE) += mqueue.o msgutil.o obj-$(CONFIG_IPC_NS) += namespace.o obj-$(CONFIG_POSIX_MQUEUE_SYSCTL) += mq_sysctl.o - +obj-$(CONFIG_KERNEL_IPC) += kernel_ipc.o diff --git a/ipc/kernel_ipc.c b/ipc/kernel_ipc.c new file mode 100644 index 000000000000..dc0b07e6f5f6 --- /dev/null +++ b/ipc/kernel_ipc.c @@ -0,0 +1,301 @@ +// SPDX-License-Identifier: GPL-2.0 + +#ifndef pr_fmt +# define pr_fmt(fmt) "kernel_ipc: " fmt +#endif + +#include <linux/init.h> +#include <linux/module.h> +#include <linux/kernel.h> +#include <linux/types.h> +#include <linux/slab.h> +#include <linux/sched.h> +#include <linux/printk.h> +#include <linux/preempt.h> +#include <linux/sched/signal.h> +#include <linux/kernel_ipc.h> +#include <linux/sched/debug.h> + +//#define IPC_DEBUG(fmt, ...) kernel_ipc_print(KERN_DEBUG fmt, ##__VA_ARGS__) +#define IPC_DEBUG(fmt, ...) + + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("yangyun"); +MODULE_DESCRIPTION("kernel ipc"); +MODULE_VERSION("1.0"); + +static inline void bind_info_lock(struct kernel_ipc_bind_info *bind_info) +{ + spin_lock(&bind_info->lock); +} + +static inline void bind_info_unlock(struct kernel_ipc_bind_info *bind_info) +{ + spin_unlock(&bind_info->lock); +} + +static inline int kernel_ipc_check_task_consistency(struct task_struct *client, + struct task_struct *server) +{ + if (client->pid == server->pid) { + pr_err("error: client(%s/%d) and server(%s/%d) is same\n", client->comm, + client->pid, server->comm, server->pid); + return -EPERM; + } + + return 0; +} + +static inline ssize_t +kernel_ipc_call_check(struct kernel_ipc_bind_info *bind_info, + struct task_struct *tsk) +{ + ssize_t ret = 0; + struct task_struct *server_task; + + if (!bind_info) + return -ENOENT; + + if (bind_info->client_task) { + pr_err("error: bind already with client task: %s/%d, current is : %s/%d", + bind_info->client_task->comm, bind_info->client_task->pid, + tsk->comm, tsk->pid); + return -EEXIST; + } + + server_task = bind_info->server_task; + if (!server_task) { + pr_err("error: server thread is not exsit\n"); + return -ESRCH; + } + + return ret; +} + +static inline void kernel_ipc_client_init( + struct kernel_ipc_bind_info *bind_info, struct task_struct *tsk) +{ + bind_info->client_task = tsk; +} + + +static inline void kernel_ipc_client_exit( + struct kernel_ipc_bind_info *bind_info, struct task_struct *tsk) +{ + bind_info->client_task = NULL; +} + +static inline int kernel_ipc_get_client_exit_code( + const struct kernel_ipc_bind_info *bind_info) +{ + return bind_info->client_need_exit ? -ESRCH : 0; +} + +static inline void kernel_ipc_wakeup_client_task( + struct kernel_ipc_bind_info *bind_info) +{ + struct task_struct *client_task; + + client_task = bind_info->client_task; + bind_info->client_need_exit = true; + wake_up_process(client_task); +} + +void kernel_ipc_wakeup_server_task(struct kernel_ipc_bind_info *bind_info) +{ + struct task_struct *server_task; + + server_task = bind_info->server_task; + bind_info->server_need_exit = true; + wake_up_process(server_task); +} +EXPORT_SYMBOL_GPL(kernel_ipc_wakeup_server_task); + +void *kernel_ipc_bind(struct task_struct *server_task) +{ + struct kernel_ipc_bind_info *bind_info = NULL; + + bind_info = kcalloc(1, sizeof(struct kernel_ipc_bind_info), GFP_KERNEL); + if (!bind_info) + return ERR_PTR(-ENOMEM); + + bind_info->server_task = server_task; + + return (void *) bind_info; +} +EXPORT_SYMBOL_GPL(kernel_ipc_bind); + +void kernel_ipc_release(struct kernel_ipc_bind_info *bind_info) +{ + if (bind_info) { + if (bind_info->client_task && bind_info->is_calling) + kernel_ipc_wakeup_client_task(bind_info); + kfree(bind_info); + } +} +EXPORT_SYMBOL_GPL(kernel_ipc_release); + +void kernel_ipc_unbind(struct kernel_ipc_bind_info *bind_info, + struct task_struct *server_task) +{ + if (bind_info) { + if (bind_info->server_task == server_task) { + bind_info->server_task = NULL; + if (bind_info->client_task && bind_info->is_calling) + kernel_ipc_wakeup_client_task(bind_info); + kfree(bind_info); + } + } + +} +EXPORT_SYMBOL_GPL(kernel_ipc_unbind); + +ssize_t kernel_ipc_do_call(struct kernel_ipc_bind_info *bind_info, + struct task_struct *tsk) +{ + struct task_struct *server_task; + ssize_t ret; + + ret = kernel_ipc_call_check(bind_info, tsk); + if (ret) { + pr_err("kernel ipc call check and init failed, errno: %ld\n", ret); + return ret; + } + + kernel_ipc_client_init(bind_info, tsk); + + server_task = bind_info->server_task; + + bind_info->client_need_exit = false; + bind_info->is_calling = true; + + preempt_disable(); /* optimize performance if preemption occurs */ + smp_mb(); + wake_up_process(server_task); + preempt_enable(); + IPC_DEBUG("[cpu/%d][%s/%d] ipc do call server(%s/%d)\n", + smp_processor_id(), tsk->comm, tsk->pid, server_task->comm, + server_task->pid); + + set_current_state(TASK_INTERRUPTIBLE); + while (bind_info->is_calling) { + IPC_DEBUG("[cpu/%d][%s/%d] client begin schedule\n", smp_processor_id(), + tsk->comm, tsk->pid); + schedule(); + IPC_DEBUG("[cpu/%d][%s/%d] client schedule end\n", smp_processor_id(), + tsk->comm, tsk->pid); + if (signal_pending(current)) { + ret = -EINTR; + pr_err("[cpu/%d][%s/%d] client has signal pending break\n", + smp_processor_id(), tsk->comm, tsk->pid); + break; + } + set_current_state(TASK_INTERRUPTIBLE); + } + set_current_state(TASK_RUNNING); + + if (bind_info->is_calling) { + pr_err("[cpu/%d][%s/%d] server is still calling, but client is waken up\n", + smp_processor_id(), tsk->comm, tsk->pid); + pr_err("[cpu/%d][%s/%d] servertask(%s/%d) is running on cpu %d\n", + smp_processor_id(), tsk->comm, tsk->pid, server_task->comm, + server_task->pid, task_cpu(server_task)); + //show_stack(server_task, NULL, KERN_DEBUG); + pr_err("[cpu/%d][%s/%d] show_stack end in %s\n", + smp_processor_id(), tsk->comm, tsk->pid, __func__); + } + + kernel_ipc_client_exit(bind_info, tsk); + + if (ret == -EINTR) + return ret; + ret = kernel_ipc_get_client_exit_code(bind_info); + + return ret; +} +EXPORT_SYMBOL_GPL(kernel_ipc_do_call); + +long kernel_ipc_ret_call(struct kernel_ipc_bind_info *bind_info, + struct task_struct *tsk) +{ + struct task_struct *client_task; + + if (!bind_info->is_calling) + return 0; + + bind_info_lock(bind_info); + client_task = bind_info->client_task; + if (!client_task) { + bind_info_unlock(bind_info); + return -ESRCH; + } + + bind_info_unlock(bind_info); + + bind_info->is_calling = false; + preempt_disable(); + /* memory barrier for preempt */ + smp_mb(); + wake_up_process(client_task); + preempt_enable(); + IPC_DEBUG("[CPU/%d][%s/%d] client task pid: %d, state: %d\n", + smp_processor_id(), tsk->comm, tsk->pid, client_task->pid, + client_task->state); + + return 0; +} +EXPORT_SYMBOL_GPL(kernel_ipc_ret_call); + +long kernel_ipc_wait_call(struct kernel_ipc_bind_info *bind_info, + struct task_struct *tsk) +{ + long ret = 0; + sigset_t pending_signals; + + for (;;) { + set_current_state(TASK_INTERRUPTIBLE); + if (bind_info->is_calling) + break; + + if (bind_info->server_need_exit) { + ret = -ENODEV; + break; + } + + schedule(); + + if (signal_pending_state(TASK_INTERRUPTIBLE, tsk) + && !bind_info->is_calling) { + if (fatal_signal_pending(tsk)) { + pr_err("[CPU/%d][%s/%d] current task has SIGKILL\n", + smp_processor_id(), tsk->comm, tsk->pid); + } + + pending_signals = current->pending.signal; + ret = -ERESTARTSYS; + break; + } + } + + set_current_state(TASK_RUNNING); + return ret; +} +EXPORT_SYMBOL_GPL(kernel_ipc_wait_call); + +static int __init +kernel_ipc_init(void) +{ + pr_info("kernel ipc init\n"); + return 0; +} + +static void __exit +kernel_ipc_exit(void) +{ + pr_info("kernel ipc exit\n"); +} + + +module_init(kernel_ipc_init); +module_exit(kernel_ipc_exit); -- 2.33.0

euleros inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/IC4GP7 CVE: NA ---------------------------------------- Support fuse fastpath Signed-off-by: chenrenhui <chenrenhui1@huawei.com> --- arch/arm64/configs/openeuler_defconfig | 1 + fs/fuse/Kconfig | 6 + fs/fuse/dev.c | 744 +++++++++++++++++++++++++ fs/fuse/dir.c | 87 ++- fs/fuse/file.c | 235 +++++++- fs/fuse/fuse_i.h | 55 ++ fs/fuse/inode.c | 112 ++++ fs/fuse/readdir.c | 8 + include/uapi/linux/fuse.h | 15 +- 9 files changed, 1236 insertions(+), 27 deletions(-) diff --git a/arch/arm64/configs/openeuler_defconfig b/arch/arm64/configs/openeuler_defconfig index 4c7a26b1504a..e6046993cfb8 100644 --- a/arch/arm64/configs/openeuler_defconfig +++ b/arch/arm64/configs/openeuler_defconfig @@ -6394,6 +6394,7 @@ CONFIG_FUSE_FS=m CONFIG_CUSE=m CONFIG_VIRTIO_FS=m CONFIG_FUSE_DAX=y +CONFIG_FUSE_FASTPATH=y CONFIG_OVERLAY_FS=m # CONFIG_OVERLAY_FS_REDIRECT_DIR is not set CONFIG_OVERLAY_FS_REDIRECT_ALWAYS_FOLLOW=y diff --git a/fs/fuse/Kconfig b/fs/fuse/Kconfig index 40ce9a1c12e5..fee4bdb9e330 100644 --- a/fs/fuse/Kconfig +++ b/fs/fuse/Kconfig @@ -52,3 +52,9 @@ config FUSE_DAX If you want to allow mounting a Virtio Filesystem with the "dax" option, answer Y. + +config FUSE_FASTPATH + bool "fuse fastpath support" + default n + depends on FUSE_FS + depends on KERNEL_IPC diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index e6cbed7aedcb..485386d195d1 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -22,6 +22,12 @@ #include <linux/splice.h> #include <linux/sched.h> +#ifdef CONFIG_FUSE_FASTPATH +#include <linux/preempt.h> +#include <linux/sched/task.h> +#include <linux/kernel_ipc.h> +#endif + MODULE_ALIAS_MISCDEV(FUSE_MINOR); MODULE_ALIAS("devname:fuse"); @@ -103,6 +109,283 @@ static void fuse_drop_waiting(struct fuse_conn *fc) static void fuse_put_request(struct fuse_req *req); +#ifdef CONFIG_FUSE_FASTPATH + +#define MEM_PREFL1_64B(ptr) __builtin_prefetch((ptr), 0, 0) +#define MEM_PREFL2_64B(ptr) __builtin_prefetch((ptr), 0, 2) +#define MEM_PREFL1_256B(l1ptr) do { \ + MEM_PREFL1_64B((l1ptr) + 0 * 64); \ + MEM_PREFL1_64B((l1ptr) + 1 * 64); \ + MEM_PREFL1_64B((l1ptr) + 2 * 64); \ + MEM_PREFL1_64B((l1ptr) + 3 * 64); \ +} while (0) +#define MEM_PREFL2_256B(l2ptr) do { \ + MEM_PREFL2_64B((l2ptr) + 0 * 64); \ + MEM_PREFL2_64B((l2ptr) + 1 * 64); \ + MEM_PREFL2_64B((l2ptr) + 2 * 64); \ + MEM_PREFL2_64B((l2ptr) + 3 * 64); \ +} while (0) +#define MEM_PREFL1_128B(l1ptr) do { \ + MEM_PREFL1_64B((l1ptr) + 0 * 64); \ + MEM_PREFL1_64B((l1ptr) + 1 * 64); \ +} while (0) +#define MEM_PREFL2_128B(l2ptr) do { \ + MEM_PREFL2_64B((l2ptr) + 0 * 64); \ + MEM_PREFL2_64B((l2ptr) + 1 * 64); \ +} while (0) +#define LOAD_64B(reg, src) do { \ + (reg)[0] = *((src) + 0); \ + (reg)[1] = *((src) + 1); \ + (reg)[2] = *((src) + 2); \ + (reg)[3] = *((src) + 3); \ +} while (0) +#define STORE_64B(dst, reg) do { \ + *((dst) + 0) = (reg)[0]; \ + *((dst) + 1) = (reg)[1]; \ + *((dst) + 2) = (reg)[2]; \ + *((dst) + 3) = (reg)[3]; \ +} while (0) + +#define MEMCPY_256B(dst, reg, src) do { \ + LOAD_64B((reg) + 0, (src) + 0); \ + LOAD_64B((reg) + 4, (src) + 4); \ + LOAD_64B((reg) + 8, (src) + 8); \ + LOAD_64B((reg) + 12, (src) + 12); \ + STORE_64B((dst) + 0, (reg) + 0); \ + STORE_64B((dst) + 4, (reg) + 4); \ + STORE_64B((dst) + 8, (reg) + 8); \ + STORE_64B((dst) + 12, (reg) + 12); \ +} while (0) +#define MEMCPY_128B(dst, reg, src) do { \ + LOAD_64B((reg) + 0, (src) + 0); \ + LOAD_64B((reg) + 4, (src) + 4); \ + STORE_64B((dst) + 0, (reg) + 0); \ + STORE_64B((dst) + 4, (reg) + 4); \ +} while (0) +#define MAX_REG_NUM (16) +#define MAX_L1_PREF_SIZE (512) +#define MAX_L2_PREF_SIZE (1024) +#define MID1_OP_SIZE (128) +#define MID1_MEM_REG_NUM (128 / 16) +#define MAX_OP_SIZE (256) +#define MAX_MEM_REG_NUM (256 / 16) +#define MID_OP_SIZE (64) +#define MID_MEM_REG_NUM (64 / 16) +#define MIN_OP_SIZE (16) + +static void *memcpy_acl(void *dest, const void *src, size_t n) +{ + __uint128_t *dstp = (__uint128_t *)dest; + __uint128_t *srcp = (__uint128_t *)src; + __uint128_t regs[MAX_REG_NUM]; + size_t num = n; + + while (num >= MAX_OP_SIZE) { + MEM_PREFL1_256B(((char *)srcp) + MAX_L1_PREF_SIZE); + MEM_PREFL2_256B(((char *)srcp) + MAX_L2_PREF_SIZE); + MEMCPY_256B(dstp, regs, srcp); + num -= MAX_OP_SIZE; + dstp += MAX_MEM_REG_NUM; + srcp += MAX_MEM_REG_NUM; + } + while (num >= MID1_OP_SIZE) { + MEM_PREFL1_128B(((char *)srcp) + MAX_L1_PREF_SIZE); + MEM_PREFL2_128B(((char *)srcp) + MAX_L2_PREF_SIZE); + MEMCPY_128B(dstp, regs, srcp); + num -= MID1_OP_SIZE; + dstp += MID1_MEM_REG_NUM; + srcp += MID1_MEM_REG_NUM; + } + while (num >= MID_OP_SIZE) { + LOAD_64B(regs, srcp); + STORE_64B(dstp, regs); + num -= MID_OP_SIZE; + dstp += MID_MEM_REG_NUM; + srcp += MID_MEM_REG_NUM; + } + + while (num >= MIN_OP_SIZE) { + *dstp = *srcp; + num -= MIN_OP_SIZE; + dstp += 1; + srcp += 1; + } + + if (num > 0) { + char *pdst = (char *)dstp; + char *psrc = (char *)srcp; + + while (num > 0) { + *pdst = *psrc; + num--; + pdst++; + psrc++; + } + } + + return dest; +} + +static inline int fuse_req_cred_init(struct fuse_conn *fc, struct fuse_req *req) +{ + req->in.h.uid = from_kuid(fc->user_ns, current_fsuid()); + req->in.h.gid = from_kgid(fc->user_ns, current_fsgid()); + req->in.h.pid = pid_nr_ns(task_pid(current), fc->pid_ns); + + if (unlikely(req->in.h.uid == ((uid_t)-1) || + req->in.h.gid == ((gid_t)-1))) { + fuse_drop_waiting(fc); + return -EOVERFLOW; + } + + return 0; +} + +static void fuse_force_creds(struct fuse_req *req); + +static struct fuse_req *fuse_get_req_sync(struct fuse_mount *fm, + struct fuse_ipc_info *ipc_info, struct fuse_args *args) +{ + struct fuse_conn *fc = fm->fc; + struct fuse_req *req; + int err; + + atomic_inc(&fc->num_waiting); + if (!fc->initialized) { + err = -EINTR; + if (wait_event_killable_exclusive(fc->blocked_waitq, fc->initialized)) + goto out; + } + /* Matches smp_wmb() in fuse_set_initialized() */ + smp_rmb(); + + err = -ENOTCONN; + if (!fc->connected) + goto out; + + err = -ECONNREFUSED; + if (fc->conn_error) + goto out; + + req = &ipc_info->req; + req->fm = fm; + + if (args->force) { + if (!args->nocreds) + fuse_force_creds(req); + } else { + err = fuse_req_cred_init(fc, req); + if (err) + goto out; + } + return req; + + out: + fuse_drop_waiting(fc); + return ERR_PTR(err); +} + +u64 fuse_get_unique_from_fc(struct fuse_conn *fc) +{ + fc->reqctr += FUSE_REQ_ID_STEP; + return fc->reqctr; +} + +static void __fuse_ipc_send(struct fuse_req *req, struct task_struct *tsk, + struct fuse_ipc_info *ipc_info) +{ + ssize_t ret; + + FUSE_DEBUG("[cpu/%d][%s/%d] fuse ipc send begin: unique: %d,opcode: %d\n", + smp_processor_id(), current->comm, current->pid, + req->in.h.unique, req->in.h.opcode); + ret = kernel_ipc_do_call(ipc_info->bind_info, tsk); + + FUSE_DEBUG("[cpu/%d][%s/%d] end\n", smp_processor_id(), current->comm, + current->pid); + + if (ret) { + pr_warn("[cpu/%d][%s/%d] fuse_simple_request send failed: ", + smp_processor_id(), current->comm, current->pid); + pr_warn("unique: %lld, opcode: %d, return value: %ld\n", + req->in.h.unique, req->in.h.opcode, ret); + req->out.h.error = ret; + } +} + +static void fuse_adjust_compat(struct fuse_conn *fc, struct fuse_args *args); + +ssize_t fuse_simple_request_fast(struct fuse_mount *fm, struct fuse_args *args) +{ + struct fuse_conn *fc = fm->fc; + struct fuse_req *req; + ssize_t ret; + struct fuse_ipc_info *ipc_info; + cpumask_t old_mask; + cpumask_t new_mask; + + ipc_info = this_cpu_ptr(fc->percpu_ipc_info); + + old_mask = current->cpus_mask; + cpumask_clear(&new_mask); + cpumask_set_cpu(raw_smp_processor_id(), &new_mask); + set_cpus_allowed_ptr(current, &new_mask); + + mutex_lock(&ipc_info->mutex_lock); + + req = fuse_get_req_sync(fm, ipc_info, args); + if (IS_ERR(req)) { + mutex_unlock(&ipc_info->mutex_lock); + return PTR_ERR(req); + } + + /* Needs to be done after fuse_get_req() so that fc->minor is valid */ + fuse_adjust_compat(fc, args); + + req->in.h.opcode = args->opcode; + req->in.h.nodeid = args->nodeid; + req->args = args; + req->in.h.unique = fuse_get_unique_from_fc(req->fm->fc); + req->in.h.len = sizeof(struct fuse_in_header) + + fuse_len_args(req->args->in_numargs, + (struct fuse_arg *) req->args->in_args); + + if (!args->noreply) + __set_bit(FR_ISREPLY, &req->flags); + + __fuse_ipc_send(req, current, ipc_info); + + set_cpus_allowed_ptr(current, &old_mask); + ret = req->out.h.error; + if (!ret && args->out_argvar) { + WARN_ON(args->out_numargs == 0); + ret = args->out_args[args->out_numargs - 1].size; + } + fuse_drop_waiting(fc); + + mutex_unlock(&ipc_info->mutex_lock); + + return ret; +} + +static void fuse_wakeup_server(struct fuse_conn *fc) +{ + int cpu; + + if (!fc->percpu_ipc_info) + return; + for_each_possible_cpu(cpu) { + struct fuse_ipc_info *ipc_info; + + ipc_info = per_cpu_ptr(fc->percpu_ipc_info, cpu); + + if (ipc_info && ipc_info->bind_info) { + kernel_ipc_wakeup_server_task(ipc_info->bind_info); + } + } +} +#endif + static struct fuse_req *fuse_get_req(struct fuse_mount *fm, bool for_background) { struct fuse_conn *fc = fm->fc; @@ -238,6 +521,11 @@ void fuse_queue_forget(struct fuse_conn *fc, struct fuse_forget_link *forget, { struct fuse_iqueue *fiq = &fc->iq; +#ifdef CONFIG_FUSE_FASTPATH + if (fc->no_forget) + return; +#endif + forget->forget_one.nodeid = nodeid; forget->forget_one.nlookup = nlookup; @@ -490,6 +778,11 @@ ssize_t fuse_simple_request(struct fuse_mount *fm, struct fuse_args *args) struct fuse_req *req; ssize_t ret; +#ifdef CONFIG_FUSE_FASTPATH + if (fc->use_fastpath) + return fuse_simple_request_fast(fm, args); +#endif + if (args->force) { atomic_inc(&fc->num_waiting); req = fuse_request_alloc(fm, GFP_KERNEL | __GFP_NOFAIL); @@ -558,6 +851,14 @@ int fuse_simple_background(struct fuse_mount *fm, struct fuse_args *args, { struct fuse_req *req; +#ifdef CONFIG_FUSE_FASTPATH + if (fm && fm->fc && fm->fc->use_fastpath && args->opcode != 26) { + pr_warn("there is a %s: opcode: %d, nodeid: %lld\n", + __func__, args->opcode, args->nodeid); + return -EINVAL; + } +#endif + if (args->force) { WARN_ON(!args->nocreds); req = fuse_request_alloc(fm, gfp_flags); @@ -759,10 +1060,24 @@ static int fuse_copy_do(struct fuse_copy_state *cs, void **val, unsigned *size) void *pgaddr = kmap_atomic(cs->pg); void *buf = pgaddr + cs->offset; +#ifdef CONFIG_FUSE_FASTPATH + if (likely(cs->req && cs->req->fm->fc->use_fastpath)) { + if (cs->write) + memcpy_acl(buf, *val, ncpy); + else + memcpy_acl(*val, buf, ncpy); + } else { + if (cs->write) + memcpy(buf, *val, ncpy); + else + memcpy(*val, buf, ncpy); + } +#else if (cs->write) memcpy(buf, *val, ncpy); else memcpy(*val, buf, ncpy); +#endif kunmap_atomic(pgaddr); *val += ncpy; @@ -2144,6 +2459,10 @@ void fuse_abort_conn(struct fuse_conn *fc) LIST_HEAD(to_end); unsigned int i; +#ifdef CONFIG_FUSE_FASTPATH + if (fc->use_fastpath) + fuse_wakeup_server(fc); +#endif /* Background queuing checks fc->connected under bg_lock */ spin_lock(&fc->bg_lock); fc->connected = 0; @@ -2262,6 +2581,424 @@ static int fuse_device_clone(struct fuse_conn *fc, struct file *new) return 0; } +#ifdef CONFIG_FUSE_FASTPATH +static long fuse_ipc_bind(struct fuse_conn *fc, struct task_struct *tsk) +{ + struct fuse_ipc_info *ipc_info = this_cpu_ptr(fc->percpu_ipc_info); + void *data = NULL; + + mutex_lock(&ipc_info->mutex_lock); + if (ipc_info->bind_info) { + FUSE_DEBUG("server %s/%d bind already\n", tsk->comm, tsk->pid); + mutex_unlock(&ipc_info->mutex_lock); + return -EEXIST; + } + + data = kernel_ipc_bind(tsk); + if (IS_ERR(data)) { + mutex_unlock(&ipc_info->mutex_lock); + return PTR_ERR(data); + } + + ipc_info->bind_info = data; + mutex_unlock(&ipc_info->mutex_lock); + + FUSE_DEBUG("%s/%d bind to fuse_conn success\n", tsk->comm, tsk->pid); + return 0; +} + +static long fuse_ipc_unbind(struct fuse_conn *fc, struct task_struct *tsk) +{ + struct fuse_ipc_info *ipc_info = this_cpu_ptr(fc->percpu_ipc_info); + + //mutex_lock(&ipc_info->mutex_lock); + kernel_ipc_unbind(ipc_info->bind_info, tsk); + ipc_info->bind_info = NULL; + //mutex_unlock(&ipc_info->mutex_lock); + + FUSE_DEBUG("%s/%d unbind success\n", tsk->comm, tsk->pid); + return 0; +} + +static int fuse_read_copy(struct fuse_ipc_info *ipc_info, + struct fuse_copy_state *cs, struct fuse_req *req, struct fuse_arg *args, + int reqsize, unsigned int argpages, int numargs) +{ + unsigned int i; + int offset = 0; + int err = 0; + + struct fuse_arg *last_arg = &args[numargs - 1]; + void *data_page = ipc_info->data_page; + + if (reqsize > FUSE_DATA_PAGE_SIZE) { + if (argpages) { + int size_without_pages = reqsize - last_arg->size; + + if (size_without_pages > FUSE_DATA_PAGE_SIZE) { + pr_err("arg size is greater than 4K, have pages\n"); + return -1; + } + } else { + pr_warn("arg size is greater than 4K, no pages\n"); + return -1; + } + } + + memcpy(data_page, &req->in.h, sizeof(req->in.h)); + offset += sizeof(req->in.h); + for (i = 0; i < numargs; i++) { + struct fuse_arg *arg = &args[i]; + + if (i == numargs - 1 && argpages) { + err = fuse_copy_pages(cs, arg->size, 0); + } else { + memcpy(((char *)data_page + offset), arg->value, arg->size); + offset += arg->size; + } + } + + return 0; +} + +static ssize_t fuse_ipc_do_read(struct fuse_ipc_info *ipc_info, + struct fuse_copy_state *cs, size_t nbytes) +{ + struct fuse_req *req; + struct fuse_args *args; + unsigned int reqsize; + ssize_t err; + + req = &ipc_info->req; + args = req->args; + reqsize = req->in.h.len; + + if (nbytes < reqsize) { + req->out.h.error = -EIO; + if (args->opcode == FUSE_SETXATTR) + req->out.h.error = -E2BIG; + fuse_request_end(req); + return -EINVAL; + } + + cs->req = req; + err = fuse_read_copy(ipc_info, cs, req, (struct fuse_arg *) args->in_args, + reqsize, args->in_pages, args->in_numargs); + fuse_copy_finish(cs); + clear_bit(FR_LOCKED, &req->flags); + if (err) { + req->out.h.error = -EIO; + goto out_end; + } + if (!test_bit(FR_ISREPLY, &req->flags)) { + err = reqsize; + goto out_end; + } + FUSE_DEBUG( + "[%s] opcode: %d, unique: %d, return reqsize is %d\n", + __func__, args->opcode, req->in.h.unique, reqsize); + return reqsize; + +out_end: + fuse_drop_waiting(req->fm->fc); + FUSE_DEBUG("[%s] error: %ld\n", __func__, err); + return err; +} + +static ssize_t fuse_ipc_read(struct fuse_ipc_info *ipc_info, + struct fuse_ipc_io *ipc_in_data) +{ + struct iov_iter iter; + struct fuse_copy_state cs; + struct iovec iov = {.iov_base = ipc_in_data->buf, + .iov_len = ipc_in_data->buf_len}; + + iov_iter_init(&iter, READ, &iov, 1, iov.iov_len); + + fuse_copy_init(&cs, 1, &iter); + + return fuse_ipc_do_read(ipc_info, &cs, iov_iter_count(&iter)); +} + +static long fuse_ipc_wait_call(struct file *file, struct fuse_conn *fc, + struct task_struct *tsk, struct fuse_ipc_io *ipc_in_data) +{ + long ret = 0; + struct fuse_ipc_info *ipc_info = this_cpu_ptr(fc->percpu_ipc_info); + + FUSE_DEBUG("[cpu/%d][%s/%d] wait call slow start\n", smp_processor_id(), + tsk->comm, tsk->pid); + ret = kernel_ipc_wait_call(ipc_info->bind_info, tsk); + if (ret < 0) { + pr_err("error[%d/%s]: kernel_ipc_wait_call error: %ld\n", + smp_processor_id(), tsk->comm, ret); + return ret; + } + + ret = fuse_ipc_read(ipc_info, ipc_in_data); + + FUSE_DEBUG("[cpu/%d][%s/%d] wait call slow end, ret = %ld\n", + smp_processor_id(), tsk->comm, tsk->pid, ret); + return ret; +} + +static ssize_t fuse_ipc_write(struct fuse_ipc_info *ipc_info, + struct fuse_ipc_io *ipc_out_data) +{ + struct fuse_copy_state cs; + struct iovec iovstack[UIO_FASTIOV]; + struct iovec *iov = iovstack; + struct iov_iter iter; + ssize_t ret; + size_t nbytes; + struct fuse_out_header *oh; + struct fuse_req *req; + + + ret = import_iovec(WRITE, ipc_out_data->iov, ipc_out_data->count, + ARRAY_SIZE(iovstack), &iov, &iter); + if (ret < 0) { + pr_warn("[cpu/%d] [%s/%d] %s: import_iovec failed: %ld\n", + smp_processor_id(), current->comm, current->pid, __func__, ret); + return ret; + } + + fuse_copy_init(&cs, 0, &iter); + + nbytes = iov_iter_count(&iter); + + oh = (struct fuse_out_header *) ipc_info->data_page; + + ret = -EINVAL; + if (oh->len != (nbytes + sizeof(struct fuse_out_header))) { + pr_warn( + "[cpu/%d][%s/%d]failed %s:oh.unique:%lld, oh.len: %d nbytes: %lu\n", + smp_processor_id(), current->comm, current->pid, __func__, + oh->unique, oh->len, nbytes); + goto copy_finish; + } + + /* + * Zero oh.unique indicates unsolicited notification message + * and error contains notification code. + */ + if (!oh->unique) { + pr_warn("[cpu/%d] [%s/%d] %s: failed oh.unique is zero\n", + smp_processor_id(), current->comm, current->pid, __func__); + goto out; + } + + ret = -EINVAL; + if (oh->error <= -512 || oh->error > 0) { + pr_err("[cpu/%d] [%s/%d] failed %s: oh.error: %d\n", + smp_processor_id(), current->comm, + current->pid, __func__, oh->error); + goto copy_finish; + } + + ret = -ENOENT; + req = &ipc_info->req; + FUSE_DEBUG( + "[cpu/%d] [%s/%d] %s: req opcode: %d, unique: %d, nodeid: %llu\n", + smp_processor_id(), current->comm, current->pid, __func__, + req->in.h.opcode, req->in.h.unique, req->in.h.nodeid); + if (!req) { + FUSE_DEBUG("failed %s: req is null\n", __func__); + goto copy_finish; + } + + if (oh->unique & FUSE_INT_REQ_BIT) { + FUSE_DEBUG("failed %s: interrupt\n", __func__); + goto copy_finish; + } + req->out.h = *oh; + cs.req = req; + if (!req->args->page_replace) + cs.move_pages = 0; + + if (oh->error) + ret = (nbytes + sizeof(struct fuse_out_header)) != sizeof(*oh) ? + -EINVAL : 0; + else + ret = copy_out_args(&cs, req->args, + nbytes + sizeof(struct fuse_out_header)); + fuse_copy_finish(&cs); + + out: + return ret ? ret : nbytes; + + copy_finish: + fuse_copy_finish(&cs); + goto out; +} + +static long fuse_ipc_ret_call(struct file *file, struct fuse_conn *fc, + struct task_struct *tsk, struct fuse_ipc_io *ipc_out_data) +{ + long ret; + long num_written; + struct fuse_ipc_info *ipc_info = this_cpu_ptr(fc->percpu_ipc_info); + + num_written = fuse_ipc_write(ipc_info, ipc_out_data); + FUSE_DEBUG("[cpu/%d] [%s/%d] fuse_ipc_write end\n", smp_processor_id(), + tsk->comm, tsk->pid); + if (num_written < 0) { + pr_err("[cpu/%d] [%s/%d]fuse_ipc_write failed %ld\n", + smp_processor_id(), tsk->comm, tsk->pid, num_written); + return num_written; + } + + ret = kernel_ipc_ret_call(ipc_info->bind_info, tsk); + if (ret) { + pr_err("error: kernel_ipc_ret_call error: %ld\n", ret); + return ret; + } + + FUSE_DEBUG("[cpu/%d] [%s/%d] ret call end\n", smp_processor_id(), + tsk->comm, tsk->pid); + return num_written; +} + +static long fuse_ipc_wait_and_ret_call(struct file *file, struct fuse_conn *fc, + struct task_struct *tsk, unsigned long arg) +{ + struct kernel_ipc_bind_info *bind_info; + struct fuse_ipc_io ipc_io_data; + long ret = 0; + struct fuse_ipc_info *ipc_info = this_cpu_ptr(fc->percpu_ipc_info); + + bind_info = ipc_info->bind_info; + if (!bind_info) + return -ENOENT; + + if (copy_from_user(&ipc_io_data, (struct fuse_ipc_io __user *)arg, + sizeof(ipc_io_data))) + return -EFAULT; + + if (bind_info->is_calling) { + + ret = fuse_ipc_ret_call(file, fc, tsk, &ipc_io_data); + if (ret) { + FUSE_DEBUG("[cpu/%d] [%s/%d] error: fuse_ipc_ret_call :%d\n", + smp_processor_id(), + current->comm, current->pid, ret); + return ret; + } + } + + return fuse_ipc_wait_call(file, fc, tsk, &ipc_io_data); +} + +static long fuse_ipc_ioctl(struct file *file, unsigned int cmd, + unsigned long arg) +{ + struct fuse_dev *fud = NULL; + struct fuse_conn *fc = NULL; + long err = -ENOTTY; + struct fuse_ipc_io ipc_io_data; + + fud = fuse_get_dev(file); + if (fud == NULL) + return -EINVAL; + + fc = fud->fc; + if (fc == NULL) + return -EINVAL; + + switch (cmd) { + case FUSE_DEV_IOC_IPC_BIND: + err = fuse_ipc_bind(fc, current); + break; + case FUSE_DEV_IOC_WAIT_RET_CALL: + FUSE_DEBUG("[cpu/%d][%s/%d] fuse ipc wait and ret call begin\n", + smp_processor_id(), current->comm, current->pid); + err = fuse_ipc_wait_and_ret_call(file, fc, current, arg); + FUSE_DEBUG("[cpu/%d][%s/%d] fuse ipc wait and ret endd: ret:%d\n", + smp_processor_id(), current->comm, current->pid, err); + break; + case FUSE_DEV_IOC_IPC_UNBIND: + err = fuse_ipc_unbind(fc, current); + break; + case FUSE_DEV_IOC_WAIT_CALL: + if (copy_from_user(&ipc_io_data, (struct fuse_ipc_io __user *)arg, + sizeof(ipc_io_data))) + return -EFAULT; + err = fuse_ipc_wait_call(file, fc, current, &ipc_io_data); + break; + case FUSE_DEV_IOC_RET_CALL: + if (copy_from_user(&ipc_io_data, (struct fuse_ipc_io __user *)arg, + sizeof(ipc_io_data))) + return -EFAULT; + err = fuse_ipc_ret_call(file, fc, current, &ipc_io_data); + break; + } + + return err; +} + +void fuse_ipc_free_data_page(struct fuse_ipc_info *ipc_info) +{ + void *data_page; + + data_page = ipc_info->data_page; + if (data_page) { + ipc_info->data_page = NULL; + ClearPageReserved(virt_to_page(data_page)); + free_page((uintptr_t)data_page); + } +} + +static int fuse_mmap(struct file *filp, struct vm_area_struct *vma) +{ + void *data_page; + unsigned long pfn; + unsigned long vmsize; + struct fuse_dev *fud = NULL; + struct fuse_conn *fc = NULL; + struct fuse_ipc_info *ipc_info; + + fud = fuse_get_dev(filp); + if (fud == NULL) + return -EINVAL; + + fc = fud->fc; + + if (fc->percpu_ipc_info == NULL) + return -EINVAL; + + ipc_info = this_cpu_ptr(fc->percpu_ipc_info); + + data_page = ipc_info->data_page; + + if (data_page) + return -EEXIST; + data_page = (void *) get_zeroed_page(GFP_KERNEL); + if (!data_page) { + pr_err("get zero page failed\n"); + return -ENOMEM; + } + SetPageReserved(virt_to_page(data_page)); + ipc_info->data_page = data_page; + + pfn = virt_to_pfn(data_page); + vmsize = vma->vm_end - vma->vm_start; + /* allocated memory size should not be less than ipc data page size */ + if (vmsize < FUSE_DATA_PAGE_SIZE) { + fuse_ipc_free_data_page(ipc_info); + pr_err("free_data_page"); + return -ENXIO; + } + if (remap_pfn_range(vma, vma->vm_start, pfn, vmsize, vma->vm_page_prot)) { + fuse_ipc_free_data_page(ipc_info); + pr_err("again\n"); + return -EAGAIN; + } + + FUSE_DEBUG("fuse mmap success\n"); + return 0; +} +#endif + static long fuse_dev_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { @@ -2295,6 +3032,10 @@ static long fuse_dev_ioctl(struct file *file, unsigned int cmd, } } } +#ifdef CONFIG_FUSE_FASTPATH + else + err = fuse_ipc_ioctl(file, cmd, arg); +#endif return err; } @@ -2311,6 +3052,9 @@ const struct file_operations fuse_dev_operations = { .fasync = fuse_dev_fasync, .unlocked_ioctl = fuse_dev_ioctl, .compat_ioctl = compat_ptr_ioctl, +#ifdef CONFIG_FUSE_FASTPATH + .mmap = fuse_mmap, +#endif }; EXPORT_SYMBOL_GPL(fuse_dev_operations); diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index edaf47356dff..5c4f6a1a6e7a 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -221,10 +221,15 @@ static int fuse_dentry_revalidate(struct dentry *entry, unsigned int flags) fm = get_fuse_mount(inode); - forget = fuse_alloc_forget(); - ret = -ENOMEM; - if (!forget) - goto out; +#ifdef CONFIG_FUSE_FASTPATH + if (!fm->fc->no_forget) +#endif + { + forget = fuse_alloc_forget(); + ret = -ENOMEM; + if (!forget) + goto out; + } attr_version = fuse_get_attr_version(fm->fc); @@ -244,11 +249,19 @@ static int fuse_dentry_revalidate(struct dentry *entry, unsigned int flags) outarg.nodeid, 1); goto invalid; } +#ifdef CONFIG_FUSE_FASTPATH + fuse_inc_nlookup(fm->fc, fi); +#else spin_lock(&fi->lock); fi->nlookup++; spin_unlock(&fi->lock); +#endif } - kfree(forget); +#ifdef CONFIG_FUSE_FASTPATH + if (!fm->fc->no_forget) +#endif + kfree(forget); + if (ret == -ENOMEM || ret == -EINTR) goto out; if (ret || fuse_invalid_attr(&outarg.attr) || @@ -432,11 +445,15 @@ int fuse_lookup_name(struct super_block *sb, u64 nodeid, const struct qstr *name if (name->len > FUSE_NAME_MAX) goto out; - - forget = fuse_alloc_forget(); - err = -ENOMEM; - if (!forget) - goto out; +#ifdef CONFIG_FUSE_FASTPATH + if (!fm->fc->no_forget) +#endif + { + forget = fuse_alloc_forget(); + err = -ENOMEM; + if (!forget) + goto out; + } attr_version = fuse_get_attr_version(fm->fc); @@ -467,7 +484,10 @@ int fuse_lookup_name(struct super_block *sb, u64 nodeid, const struct qstr *name err = 0; out_put_forget: - kfree(forget); +#ifdef CONFIG_FUSE_FASTPATH + if (!fm->fc->no_forget) +#endif + kfree(forget); out: return err; } @@ -546,10 +566,15 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry, /* Userspace expects S_IFREG in create mode */ BUG_ON((mode & S_IFMT) != S_IFREG); - forget = fuse_alloc_forget(); - err = -ENOMEM; - if (!forget) - goto out_err; +#ifdef CONFIG_FUSE_FASTPATH + if (!fm->fc->no_forget) +#endif + { + forget = fuse_alloc_forget(); + err = -ENOMEM; + if (!forget) + goto out_err; + } err = -ENOMEM; ff = fuse_file_alloc(fm); @@ -604,7 +629,10 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry, err = -ENOMEM; goto out_err; } - kfree(forget); +#ifdef CONFIG_FUSE_FASTPATH + if (!fm->fc->no_forget) +#endif + kfree(forget); d_instantiate(entry, inode); fuse_change_entry_timeout(entry, &outentry); fuse_dir_changed(dir); @@ -621,11 +649,13 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry, invalidate_inode_pages2(inode->i_mapping); } return err; - out_free_ff: fuse_file_free(ff); out_put_forget_req: - kfree(forget); +#ifdef CONFIG_FUSE_FASTPATH + if (!fm->fc->no_forget) +#endif + kfree(forget); out_err: return err; } @@ -693,9 +723,14 @@ static int create_new_entry(struct fuse_mount *fm, struct fuse_args *args, if (fuse_is_bad(dir)) return -EIO; - forget = fuse_alloc_forget(); - if (!forget) - return -ENOMEM; +#ifdef CONFIG_FUSE_FASTPATH + if (!fm->fc->no_forget) +#endif + { + forget = fuse_alloc_forget(); + if (!forget) + return -ENOMEM; + } memset(&outarg, 0, sizeof(outarg)); args->nodeid = get_node_id(dir); @@ -719,7 +754,10 @@ static int create_new_entry(struct fuse_mount *fm, struct fuse_args *args, fuse_queue_forget(fm->fc, forget, outarg.nodeid, 1); return -ENOMEM; } - kfree(forget); +#ifdef CONFIG_FUSE_FASTPATH + if (!fm->fc->no_forget) +#endif + kfree(forget); d_drop(entry); d = d_splice_alias(inode, entry); @@ -736,7 +774,10 @@ static int create_new_entry(struct fuse_mount *fm, struct fuse_args *args, return 0; out_put_forget_req: - kfree(forget); +#ifdef CONFIG_FUSE_FASTPATH + if (!fm->fc->no_forget) +#endif + kfree(forget); return err; } diff --git a/fs/fuse/file.c b/fs/fuse/file.c index e0aac6019cdb..abf818b35d30 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -331,7 +331,12 @@ void fuse_release_common(struct file *file, bool isdir) * synchronous RELEASE is allowed (and desirable) in this case * because the server can be trusted not to screw up. */ - fuse_file_put(ff, ff->fm->fc->destroy, isdir); +#ifdef CONFIG_FUSE_FASTPATH + if (ff->fm->fc->use_fastpath) + fuse_file_put(ff, true, isdir); + else +#endif + fuse_file_put(ff, ff->fm->fc->destroy, isdir); } static int fuse_open(struct inode *inode, struct file *file) @@ -431,6 +436,11 @@ static bool fuse_range_is_writeback(struct inode *inode, pgoff_t idx_from, struct fuse_inode *fi = get_fuse_inode(inode); bool found; +#ifdef CONFIG_FUSE_FASTPATH + if (RB_EMPTY_ROOT(&fi->writepages)) + return false; +#endif + spin_lock(&fi->lock); found = fuse_find_writeback(fi, idx_from, idx_to); spin_unlock(&fi->lock); @@ -696,8 +706,13 @@ static void fuse_aio_complete(struct fuse_io_priv *io, int err, ssize_t pos) kref_put(&io->refcnt, fuse_io_release); } +#ifdef CONFIG_FUSE_FASTPATH +struct fuse_io_args *fuse_io_alloc(struct fuse_io_priv *io, + unsigned int npages) +#else static struct fuse_io_args *fuse_io_alloc(struct fuse_io_priv *io, unsigned int npages) +#endif { struct fuse_io_args *ia; @@ -714,7 +729,11 @@ static struct fuse_io_args *fuse_io_alloc(struct fuse_io_priv *io, return ia; } +#ifdef CONFIG_FUSE_FASTPATH +void fuse_io_free(struct fuse_io_args *ia) +#else static void fuse_io_free(struct fuse_io_args *ia) +#endif { kfree(ia->ap.pages); kfree(ia); @@ -1473,6 +1492,196 @@ static int fuse_get_user_pages(struct fuse_args_pages *ap, struct iov_iter *ii, return ret < 0 ? ret : 0; } +#ifdef CONFIG_FUSE_FASTPATH +static ssize_t fuse_send_write_sync(struct fuse_io_args *ia, loff_t pos, + size_t count, fl_owner_t owner, struct kiocb *iocb) +{ + struct file *file = iocb->ki_filp; + struct fuse_file *ff = file->private_data; + struct fuse_mount *fm = ff->fm; + struct fuse_write_in *inarg = &ia->write.in; + ssize_t err; + + fuse_write_args_fill(ia, ff, pos, count); + inarg->flags = fuse_write_flags(iocb); + if (owner != NULL) { + inarg->write_flags |= FUSE_WRITE_LOCKOWNER; + inarg->lock_owner = fuse_lock_owner_id(fm->fc, owner); + } + + err = fuse_simple_request(fm, &ia->ap.args); + if (!err && ia->write.out.size > count) + err = -EIO; + + return err ?: ia->write.out.size; +} + +static ssize_t fuse_send_read_sync(struct fuse_io_args *ia, loff_t pos, + size_t count, fl_owner_t owner, struct kiocb *iocb) +{ + struct file *file = iocb->ki_filp; + struct fuse_file *ff = file->private_data; + struct fuse_mount *fm = ff->fm; + + fuse_read_args_fill(ia, file, pos, count, FUSE_READ); + if (owner != NULL) { + ia->read.in.read_flags |= FUSE_READ_LOCKOWNER; + ia->read.in.lock_owner = fuse_lock_owner_id(fm->fc, owner); + } + + return fuse_simple_request(fm, &ia->ap.args); +} + +static void fuse_memset_ia(struct fuse_io_args *ia) +{ + struct page **pages; + struct fuse_page_desc *descs; + + pages = ia->ap.pages; + descs = ia->ap.descs; + memset(ia, 0, sizeof(*ia)); + ia->ap.pages = pages; + ia->ap.descs = descs; +} + +ssize_t fuse_direct_io_fast(struct kiocb *iocb, struct iov_iter *iter, + loff_t *ppos, int flags) +{ + int write = flags & FUSE_DIO_WRITE; + int cuse = flags & FUSE_DIO_CUSE; + struct file *file = iocb->ki_filp; + struct inode *inode = file->f_mapping->host; + struct fuse_file *ff = file->private_data; + struct fuse_conn *fc = ff->fm->fc; + size_t nmax = write ? fc->max_write : fc->max_read; + loff_t pos = *ppos; + size_t count = iov_iter_count(iter); + pgoff_t idx_from = pos >> PAGE_SHIFT; + pgoff_t idx_to = (pos + count - 1) >> PAGE_SHIFT; + ssize_t res = 0; + int err = 0; + struct fuse_io_args *ia; + unsigned int max_pages; + bool should_dirty; + struct fuse_ipc_info *ipc_info; + + max_pages = iov_iter_npages(iter, fc->max_pages); + ipc_info = this_cpu_ptr(fc->percpu_ipc_info); + mutex_lock(&ipc_info->ia_mutex_lock); + ia = ipc_info->ia; + if (!cuse && fuse_range_is_writeback(inode, idx_from, idx_to)) { + if (!write) + inode_lock(inode); + fuse_sync_writes(inode); + if (!write) + inode_unlock(inode); + } + + should_dirty = !write && iter_is_iovec(iter); + while (count) { + ssize_t nres; + fl_owner_t owner = current->files; + size_t nbytes = min(count, nmax); + + err = fuse_get_user_pages(&ia->ap, iter, &nbytes, write, + max_pages, fc->use_pages_for_kvec_io); + if (err && !nbytes) + break; + + if (write) { + if (!capable(CAP_FSETID)) + ia->write.in.write_flags |= FUSE_WRITE_KILL_SUIDGID; + nres = fuse_send_write_sync(ia, pos, nbytes, owner, iocb); + } else { + nres = fuse_send_read_sync(ia, pos, nbytes, owner, iocb); + } + + fuse_release_user_pages(&ia->ap, 0, should_dirty); + + if (nres < 0) { + iov_iter_revert(iter, nbytes); + err = nres; + break; + } + WARN_ON(nres > nbytes); + + count -= nres; + res += nres; + pos += nres; + if (nres != nbytes) { + iov_iter_revert(iter, nbytes - nres); + break; + } + if (count) { + fuse_memset_ia(ia); + max_pages = iov_iter_npages(iter, fc->max_pages); + } + + } + + fuse_memset_ia(ia); + mutex_unlock(&ipc_info->ia_mutex_lock); + + if (res > 0) + *ppos = pos; + + return res > 0 ? res : err; +} + +static ssize_t __fuse_direct_read_fast(struct kiocb *iocb, + struct iov_iter *iter, + loff_t *ppos) +{ + ssize_t res; + struct inode *inode = file_inode(iocb->ki_filp); + + res = fuse_direct_io_fast(iocb, iter, ppos, 0); + + fuse_invalidate_atime(inode); + + return res; +} + +static void fuse_do_truncate(struct file *file); + +static ssize_t +fuse_direct_IO_fast(struct kiocb *iocb, struct iov_iter *iter) +{ + ssize_t ret = 0; + struct file *file = iocb->ki_filp; +// struct fuse_file *ff = file->private_data; + loff_t pos = 0; + struct inode *inode; + loff_t i_size; + size_t count = iov_iter_count(iter), shortened = 0; + loff_t offset = iocb->ki_pos; + + pos = offset; + inode = file->f_mapping->host; + i_size = i_size_read(inode); + + if ((iov_iter_rw(iter) == READ) && (offset >= i_size)) + return 0; + + if (iov_iter_rw(iter) == WRITE) { + ret = fuse_direct_io_fast(iocb, iter, &pos, FUSE_DIO_WRITE); + fuse_invalidate_attr(inode); + } else { + ret = __fuse_direct_read_fast(iocb, iter, &pos); + } + iov_iter_reexpand(iter, iov_iter_count(iter) + shortened); + + if (iov_iter_rw(iter) == WRITE) { + if (ret > 0) + fuse_write_update_size(inode, pos); + else if (ret < 0 && offset + count > i_size) + fuse_do_truncate(file); + } + + return ret; +} +#endif + ssize_t fuse_direct_io(struct fuse_io_priv *io, struct iov_iter *iter, loff_t *ppos, int flags) { @@ -1584,6 +1793,13 @@ static ssize_t fuse_direct_read_iter(struct kiocb *iocb, struct iov_iter *to) if (!is_sync_kiocb(iocb) && iocb->ki_flags & IOCB_DIRECT) { res = fuse_direct_IO(iocb, to); } else { +#ifdef CONFIG_FUSE_FASTPATH + struct inode *inode = file_inode(iocb->ki_filp); + struct fuse_conn *fc = get_fuse_conn(inode); + + if (fc->use_fastpath) + return __fuse_direct_read_fast(iocb, to, &iocb->ki_pos); +#endif struct fuse_io_priv io = FUSE_IO_PRIV_SYNC(iocb); res = __fuse_direct_read(&io, to, &iocb->ki_pos); @@ -1605,8 +1821,16 @@ static ssize_t fuse_direct_write_iter(struct kiocb *iocb, struct iov_iter *from) if (!is_sync_kiocb(iocb) && iocb->ki_flags & IOCB_DIRECT) { res = fuse_direct_IO(iocb, from); } else { - res = fuse_direct_io(&io, from, &iocb->ki_pos, - FUSE_DIO_WRITE); +#ifdef CONFIG_FUSE_FASTPATH + struct fuse_conn *fc = get_fuse_conn(inode); + + if (fc->use_fastpath) + res = fuse_direct_io_fast(iocb, from, &iocb->ki_pos, + FUSE_DIO_WRITE); + else +#endif + res = fuse_direct_io(&io, from, &iocb->ki_pos, + FUSE_DIO_WRITE); } } fuse_invalidate_attr(inode); @@ -3212,6 +3436,11 @@ fuse_direct_IO(struct kiocb *iocb, struct iov_iter *iter) loff_t offset = iocb->ki_pos; struct fuse_io_priv *io; +#ifdef CONFIG_FUSE_FASTPATH + if (ff->fm->fc->use_fastpath) + return fuse_direct_IO_fast(iocb, iter); +#endif + pos = offset; inode = file->f_mapping->host; i_size = i_size_read(inode); diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index 749003487776..0fbbf88ad826 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -515,6 +515,10 @@ struct fuse_fs_context { bool no_force_umount:1; bool legacy_opts_show:1; bool dax:1; +#ifdef CONFIG_FUSE_FASTPATH + bool no_forget:1; + bool use_fastpath:1; +#endif unsigned int max_read; unsigned int blksize; const char *subtype; @@ -526,6 +530,17 @@ struct fuse_fs_context { void **fudptr; }; +#ifdef CONFIG_FUSE_FASTPATH +struct fuse_ipc_info { + struct fuse_req req; + struct fuse_io_args *ia; + void *bind_info; + struct mutex mutex_lock; + struct mutex ia_mutex_lock; + void *data_page; +}; +#endif + /** * A Fuse connection. * @@ -816,6 +831,13 @@ struct fuse_conn { /** List of filesystems using this connection */ struct list_head mounts; +#ifdef CONFIG_FUSE_FASTPATH + unsigned int no_forget:1; + unsigned int use_fastpath:1; + struct fuse_ipc_info __percpu *percpu_ipc_info; + u64 reqctr; +#endif + KABI_RESERVE(1) KABI_RESERVE(2) KABI_RESERVE(3) @@ -1261,4 +1283,37 @@ void fuse_dax_inode_cleanup(struct inode *inode); bool fuse_dax_check_alignment(struct fuse_conn *fc, unsigned int map_alignment); void fuse_dax_cancel_work(struct fuse_conn *fc); +#ifdef CONFIG_FUSE_FASTPATH + +#define FUSE_DEBUG(fmt, args...) +//#define FUSE_DEBUG(fmt, args...) printk(fmt, ##args) + +#define FUSE_DATA_PAGE_SIZE 4096 + +struct fuse_io_args *fuse_io_alloc(struct fuse_io_priv *io, + unsigned int npages); +void fuse_io_free(struct fuse_io_args *ia); +void fuse_ipc_free_data_page(struct fuse_ipc_info *ipc_info); + +static inline void fuse_inc_nlookup(struct fuse_conn *fc, struct fuse_inode *fi) +{ + if (fc->no_forget) + return; + + spin_lock(&fi->lock); + fi->nlookup++; + spin_unlock(&fi->lock); +} + +static inline void fuse_dec_nlookup(struct fuse_conn *fc, struct fuse_inode *fi) +{ + if (fc->no_forget) + return; + + spin_lock(&fi->lock); + fi->nlookup--; + spin_lock(&fi->lock); +} +#endif /* CONFIG_FUSE_FASTPATH */ + #endif /* _FS_FUSE_I_H */ diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index 0a639aed9eba..3d21e551b092 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -23,6 +23,9 @@ #include <linux/exportfs.h> #include <linux/posix_acl.h> #include <linux/pid_namespace.h> +#ifdef CONFIG_FUSE_FASTPATH +#include <linux/kernel_ipc.h> +#endif MODULE_AUTHOR("Miklos Szeredi <miklos@szeredi.hu>"); MODULE_DESCRIPTION("Filesystem in Userspace"); @@ -370,9 +373,13 @@ struct inode *fuse_iget(struct super_block *sb, u64 nodeid, } done: fi = get_fuse_inode(inode); +#ifdef CONFIG_FUSE_FASTPATH + fuse_inc_nlookup(fc, fi); +#else spin_lock(&fi->lock); fi->nlookup++; spin_unlock(&fi->lock); +#endif fuse_change_attributes(inode, attr, attr_valid, attr_version); return inode; @@ -528,6 +535,10 @@ enum { OPT_ALLOW_OTHER, OPT_MAX_READ, OPT_BLKSIZE, +#ifdef CONFIG_FUSE_FASTPATH + OPT_NO_FORGET, + OPT_USE_FASTPATH, +#endif OPT_ERR }; @@ -542,6 +553,10 @@ static const struct fs_parameter_spec fuse_fs_parameters[] = { fsparam_u32 ("max_read", OPT_MAX_READ), fsparam_u32 ("blksize", OPT_BLKSIZE), fsparam_string ("subtype", OPT_SUBTYPE), +#ifdef CONFIG_FUSE_FASTPATH + fsparam_flag("no_forget", OPT_NO_FORGET), + fsparam_flag("use_fastpath", OPT_USE_FASTPATH), +#endif {} }; @@ -625,6 +640,16 @@ static int fuse_parse_param(struct fs_context *fc, struct fs_parameter *param) ctx->blksize = result.uint_32; break; +#ifdef CONFIG_FUSE_FASTPATH + case OPT_NO_FORGET: + ctx->no_forget = true; + break; + + case OPT_USE_FASTPATH: + ctx->use_fastpath = true; + break; +#endif + default: return -EINVAL; } @@ -732,11 +757,77 @@ void fuse_conn_init(struct fuse_conn *fc, struct fuse_mount *fm, } EXPORT_SYMBOL_GPL(fuse_conn_init); +#ifdef CONFIG_FUSE_FASTPATH +static int fuse_alloc_ipc_info(struct fuse_conn *fc) +{ + int cpu; + + fc->percpu_ipc_info = alloc_percpu(struct fuse_ipc_info); + if (fc->percpu_ipc_info == NULL) + return -ENOMEM; + + for_each_possible_cpu(cpu) { + struct fuse_ipc_info *ipc_info; + + ipc_info = per_cpu_ptr(fc->percpu_ipc_info, cpu); + ipc_info->bind_info = NULL; + ipc_info->ia = NULL; + ipc_info->data_page = NULL; + mutex_init(&ipc_info->mutex_lock); + mutex_init(&ipc_info->ia_mutex_lock); + } + + return 0; +} + +static void fuse_free_ipc_info(struct fuse_conn *fc) +{ + int cpu; + + if (!fc->percpu_ipc_info) + return; + + for_each_possible_cpu(cpu) { + struct fuse_ipc_info *ipc_info; + + ipc_info = per_cpu_ptr(fc->percpu_ipc_info, cpu); + + if (ipc_info && ipc_info->bind_info) + kernel_ipc_release(ipc_info->bind_info); + + if (ipc_info && ipc_info->ia) + fuse_io_free(ipc_info->ia); + + fuse_ipc_free_data_page(ipc_info); + } + free_percpu(fc->percpu_ipc_info); +} + +static void ipc_info_init(struct fuse_conn *fc, bool *ok) +{ + int cpu; + struct fuse_ipc_info *ipc_info; + + for_each_possible_cpu(cpu) { + + ipc_info = per_cpu_ptr(fc->percpu_ipc_info, cpu); + ipc_info->ia = fuse_io_alloc(NULL, fc->max_pages); + if (ipc_info->ia == NULL) { + *ok = false; + break; + } + } +} +#endif + void fuse_conn_put(struct fuse_conn *fc) { if (refcount_dec_and_test(&fc->count)) { struct fuse_iqueue *fiq = &fc->iq; +#ifdef CONFIG_FUSE_FASTPATH + fuse_free_ipc_info(fc); +#endif if (IS_ENABLED(CONFIG_FUSE_DAX)) fuse_dax_conn_free(fc); if (fiq->ops->release) @@ -1097,6 +1188,11 @@ static void process_init_reply(struct fuse_mount *fm, struct fuse_args *args, } kfree(ia); +#ifdef CONFIG_FUSE_FASTPATH + if (fc->use_fastpath) + ipc_info_init(fc, &ok); +#endif + if (!ok) { fc->conn_init = 0; fc->conn_error = 1; @@ -1332,7 +1428,11 @@ int fuse_fill_super_submount(struct super_block *sb, * its nlookup should not be incremented. fuse_iget() does * that, though, so undo it here. */ +#ifdef CONFIG_FUSE_FASTPATH + fuse_dec_nlookup(fm->fc, get_fuse_inode(root)); +#else get_fuse_inode(root)->nlookup--; +#endif sb->s_d_op = &fuse_dentry_operations; sb->s_root = d_make_root(root); if (!sb->s_root) @@ -1402,6 +1502,10 @@ int fuse_fill_super_common(struct super_block *sb, struct fuse_fs_context *ctx) fc->destroy = ctx->destroy; fc->no_control = ctx->no_control; fc->no_force_umount = ctx->no_force_umount; +#ifdef CONFIG_FUSE_FASTPATH + fc->no_forget = ctx->no_forget; + fc->use_fastpath = ctx->use_fastpath; +#endif err = -ENOMEM; root = fuse_get_root_inode(sb, ctx->rootmode); @@ -1478,6 +1582,14 @@ static int fuse_fill_super(struct super_block *sb, struct fs_context *fsc) fuse_conn_init(fc, fm, sb->s_user_ns, &fuse_dev_fiq_ops, NULL); fc->release = fuse_free_conn; +#ifdef CONFIG_FUSE_FASTPATH + if (ctx->use_fastpath) { + err = fuse_alloc_ipc_info(fc); + if (err) + goto err_put_conn; + } +#endif + sb->s_fs_info = fm; err = fuse_fill_super_common(sb, ctx); diff --git a/fs/fuse/readdir.c b/fs/fuse/readdir.c index 14e99ffa57af..f18632027d8b 100644 --- a/fs/fuse/readdir.c +++ b/fs/fuse/readdir.c @@ -218,9 +218,13 @@ static int fuse_direntplus_link(struct file *file, } fi = get_fuse_inode(inode); +#ifdef CONFIG_FUSE_FASTPATH + fuse_inc_nlookup(fc, fi); +#else spin_lock(&fi->lock); fi->nlookup++; spin_unlock(&fi->lock); +#endif forget_all_cached_acls(inode); fuse_change_attributes(inode, &o->attr, @@ -247,9 +251,13 @@ static int fuse_direntplus_link(struct file *file, if (!IS_ERR(inode)) { struct fuse_inode *fi = get_fuse_inode(inode); +#ifdef CONFIG_FUSE_FASTPATH + fuse_dec_nlookup(fc, fi); +#else spin_lock(&fi->lock); fi->nlookup--; spin_unlock(&fi->lock); +#endif } return PTR_ERR(dentry); } diff --git a/include/uapi/linux/fuse.h b/include/uapi/linux/fuse.h index 98ca64d1beb6..5af0ebd3e567 100644 --- a/include/uapi/linux/fuse.h +++ b/include/uapi/linux/fuse.h @@ -902,8 +902,21 @@ struct fuse_notify_retrieve_in { uint64_t dummy4; }; +struct fuse_ipc_io { + char __user *buf; + uint64_t buf_len; + struct iovec *iov; + int count; +}; + /* Device ioctls: */ -#define FUSE_DEV_IOC_CLONE _IOR(229, 0, uint32_t) +#define FUSE_DEV_IOC_MAGIC 229 +#define FUSE_DEV_IOC_CLONE _IOR(FUSE_DEV_IOC_MAGIC, 0, uint32_t) +#define FUSE_DEV_IOC_IPC_BIND _IO(FUSE_DEV_IOC_MAGIC, 1) +#define FUSE_DEV_IOC_WAIT_RET_CALL _IOR(FUSE_DEV_IOC_MAGIC, 2, struct fuse_ipc_io) +#define FUSE_DEV_IOC_IPC_UNBIND _IO(FUSE_DEV_IOC_MAGIC, 4) +#define FUSE_DEV_IOC_WAIT_CALL _IOR(FUSE_DEV_IOC_MAGIC, 5, struct fuse_ipc_io) +#define FUSE_DEV_IOC_RET_CALL _IOR(FUSE_DEV_IOC_MAGIC, 6, struct fuse_ipc_io) struct fuse_lseek_in { uint64_t fh; -- 2.33.0

反馈: 您发送到kernel@openeuler.org的补丁/补丁集,已成功转换为PR! PR链接地址: https://gitee.com/openeuler/kernel/pulls/16090 邮件列表地址:https://mailweb.openeuler.org/archives/list/kernel@openeuler.org/message/XBA... FeedBack: The patch(es) which you have sent to kernel@openeuler.org mailing list has been converted to a pull request successfully! Pull request link: https://gitee.com/openeuler/kernel/pulls/16090 Mailing list address: https://mailweb.openeuler.org/archives/list/kernel@openeuler.org/message/XBA...
participants (2)
-
chenrenhui
-
patchwork bot