From: Miklos Szeredi mszeredi@redhat.com
mainline inclusion from mainline-v5.15-rc1 commit 0cad6246621b5887d5b33fea84219d2a71f2f99a category: perf bugzilla: https://gitee.com/openeuler/kernel/issues/I6ZCW0 CVE: NA
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?i...
--------------------------------
Add a rcu argument to the ->get_acl() callback to allow get_cached_acl_rcu() to call the ->get_acl() method in the next patch.
Signed-off-by: Miklos Szeredi mszeredi@redhat.com [chengzhihao: rename get_acl to get_acl2 to prevent KABI changes, and only backport(realize) overlayfs] Conflicts: fs/overlayfs/dir.c fs/overlayfs/inode.c fs/overlayfs/overlayfs.h fs/posix_acl.c include/linux/fs.h Signed-off-by: Zhihao Cheng chengzhihao1@huawei.com Reviewed-by: Zhang Yi yi.zhang@huawei.com Signed-off-by: Yongqiang Liu liuyongqiang13@huawei.com --- fs/overlayfs/dir.c | 2 +- fs/overlayfs/inode.c | 9 ++++++--- fs/overlayfs/overlayfs.h | 2 +- fs/posix_acl.c | 7 +++++-- include/linux/fs.h | 1 + 5 files changed, 14 insertions(+), 7 deletions(-)
diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c index 8570e755a392..cdbb916aa636 100644 --- a/fs/overlayfs/dir.c +++ b/fs/overlayfs/dir.c @@ -1292,6 +1292,6 @@ const struct inode_operations ovl_dir_inode_operations = { .permission = ovl_permission, .getattr = ovl_getattr, .listxattr = ovl_listxattr, - .get_acl = ovl_get_acl, + .get_acl2 = ovl_get_acl, .update_time = ovl_update_time, }; diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c index 3eabe457a5d5..3aa33c18cb80 100644 --- a/fs/overlayfs/inode.c +++ b/fs/overlayfs/inode.c @@ -421,12 +421,15 @@ ssize_t ovl_listxattr(struct dentry *dentry, char *list, size_t size) return res; }
-struct posix_acl *ovl_get_acl(struct inode *inode, int type) +struct posix_acl *ovl_get_acl(struct inode *inode, int type, bool rcu) { struct inode *realinode = ovl_inode_real(inode); const struct cred *old_cred; struct posix_acl *acl;
+ if (rcu) + return ERR_PTR(-ECHILD); + if (!IS_ENABLED(CONFIG_FS_POSIX_ACL) || !IS_POSIXACL(realinode)) return NULL;
@@ -480,7 +483,7 @@ static const struct inode_operations ovl_file_inode_operations = { .permission = ovl_permission, .getattr = ovl_getattr, .listxattr = ovl_listxattr, - .get_acl = ovl_get_acl, + .get_acl2 = ovl_get_acl, .update_time = ovl_update_time, .fiemap = ovl_fiemap, }; @@ -498,7 +501,7 @@ static const struct inode_operations ovl_special_inode_operations = { .permission = ovl_permission, .getattr = ovl_getattr, .listxattr = ovl_listxattr, - .get_acl = ovl_get_acl, + .get_acl2 = ovl_get_acl, .update_time = ovl_update_time, };
diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h index 76e096ad2ef9..38fada5fa552 100644 --- a/fs/overlayfs/overlayfs.h +++ b/fs/overlayfs/overlayfs.h @@ -358,7 +358,7 @@ int ovl_xattr_set(struct dentry *dentry, struct inode *inode, const char *name, int ovl_xattr_get(struct dentry *dentry, struct inode *inode, const char *name, void *value, size_t size); ssize_t ovl_listxattr(struct dentry *dentry, char *list, size_t size); -struct posix_acl *ovl_get_acl(struct inode *inode, int type); +struct posix_acl *ovl_get_acl(struct inode *inode, int type, bool rcu); int ovl_update_time(struct inode *inode, struct timespec64 *ts, int flags); bool ovl_is_private_xattr(const char *name);
diff --git a/fs/posix_acl.c b/fs/posix_acl.c index 2fd0fde16fe1..8e3fe9cbb28c 100644 --- a/fs/posix_acl.c +++ b/fs/posix_acl.c @@ -133,11 +133,14 @@ struct posix_acl *get_acl(struct inode *inode, int type) * If the filesystem doesn't have a get_acl() function at all, we'll * just create the negative cache entry. */ - if (!inode->i_op->get_acl) { + if (!inode->i_op->get_acl && !inode->i_op->get_acl2) { set_cached_acl(inode, type, NULL); return NULL; } - acl = inode->i_op->get_acl(inode, type); + if (inode->i_op->get_acl) + acl = inode->i_op->get_acl(inode, type); + else + acl = inode->i_op->get_acl2(inode, type, false);
if (IS_ERR(acl)) { /* diff --git a/include/linux/fs.h b/include/linux/fs.h index 6363c0a67af5..bd3e09a3c9a9 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1838,6 +1838,7 @@ struct inode_operations { const char * (*get_link) (struct dentry *, struct inode *, struct delayed_call *); int (*permission) (struct inode *, int); struct posix_acl * (*get_acl)(struct inode *, int); + struct posix_acl * (*get_acl2)(struct inode *, int, bool);
int (*readlink) (struct dentry *, char __user *,int);
From: Baokun Li libaokun1@huawei.com
mainline inclusion from mainline-v6.3-rc8 commit 1ba1199ec5747f475538c0d25a32804e5ba1dfde category: bugfix bugzilla: 188601, https://gitee.com/openeuler/kernel/issues/I6TNTC CVE: NA
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?i...
--------------------------------
KASAN report null-ptr-deref: ================================================================== BUG: KASAN: null-ptr-deref in bdi_split_work_to_wbs+0x5c5/0x7b0 Write of size 8 at addr 0000000000000000 by task sync/943 CPU: 5 PID: 943 Comm: sync Tainted: 6.3.0-rc5-next-20230406-dirty #461 Call Trace: <TASK> dump_stack_lvl+0x7f/0xc0 print_report+0x2ba/0x340 kasan_report+0xc4/0x120 kasan_check_range+0x1b7/0x2e0 __kasan_check_write+0x24/0x40 bdi_split_work_to_wbs+0x5c5/0x7b0 sync_inodes_sb+0x195/0x630 sync_inodes_one_sb+0x3a/0x50 iterate_supers+0x106/0x1b0 ksys_sync+0x98/0x160 [...] ==================================================================
The race that causes the above issue is as follows:
cpu1 cpu2 -------------------------|------------------------- inode_switch_wbs INIT_WORK(&isw->work, inode_switch_wbs_work_fn) queue_rcu_work(isw_wq, &isw->work) // queue_work async inode_switch_wbs_work_fn wb_put_many(old_wb, nr_switched) percpu_ref_put_many ref->data->release(ref) cgwb_release queue_work(cgwb_release_wq, &wb->release_work) // queue_work async &wb->release_work cgwb_release_workfn ksys_sync iterate_supers sync_inodes_one_sb sync_inodes_sb bdi_split_work_to_wbs kmalloc(sizeof(*work), GFP_ATOMIC) // alloc memory failed percpu_ref_exit ref->data = NULL kfree(data) wb_get(wb) percpu_ref_get(&wb->refcnt) percpu_ref_get_many(ref, 1) atomic_long_add(nr, &ref->data->count) atomic64_add(i, v) // trigger null-ptr-deref
bdi_split_work_to_wbs() traverses &bdi->wb_list to split work into all wbs. If the allocation of new work fails, the on-stack fallback will be used and the reference count of the current wb is increased afterwards. If cgroup writeback membership switches occur before getting the reference count and the current wb is released as old_wd, then calling wb_get() or wb_put() will trigger the null pointer dereference above.
This issue was introduced in v4.3-rc7 (see fix tag1). Both sync_inodes_sb() and __writeback_inodes_sb_nr() calls to bdi_split_work_to_wbs() can trigger this issue. For scenarios called via sync_inodes_sb(), originally commit 7fc5854f8c6e ("writeback: synchronize sync(2) against cgroup writeback membership switches") reduced the possibility of the issue by adding wb_switch_rwsem, but in v5.14-rc1 (see fix tag2) removed the "inode_io_list_del_locked(inode, old_wb)" from inode_switch_wbs_work_fn() so that wb->state contains WB_has_dirty_io, thus old_wb is not skipped when traversing wbs in bdi_split_work_to_wbs(), and the issue becomes easily reproducible again.
To solve this problem, percpu_ref_exit() is called under RCU protection to avoid race between cgwb_release_workfn() and bdi_split_work_to_wbs(). Moreover, replace wb_get() with wb_tryget() in bdi_split_work_to_wbs(), and skip the current wb if wb_tryget() fails because the wb has already been shutdown.
Link: https://lkml.kernel.org/r/20230410130826.1492525-1-libaokun1@huawei.com Fixes: b817525a4a80 ("writeback: bdi_writeback iteration must not skip dying ones") Signed-off-by: Baokun Li libaokun1@huawei.com Reviewed-by: Jan Kara jack@suse.cz Acked-by: Tejun Heo tj@kernel.org Cc: Alexander Viro viro@zeniv.linux.org.uk Cc: Andreas Dilger adilger.kernel@dilger.ca Cc: Christian Brauner brauner@kernel.org Cc: Dennis Zhou dennis@kernel.org Cc: Hou Tao houtao1@huawei.com Cc: yangerkun yangerkun@huawei.com Cc: Zhang Yi yi.zhang@huawei.com Cc: Jens Axboe axboe@kernel.dk Cc: stable@vger.kernel.org Signed-off-by: Andrew Morton akpm@linux-foundation.org
Conflicts: mm/backing-dev.c
Signed-off-by: Baokun Li libaokun1@huawei.com Reviewed-by: Zhang Yi yi.zhang@huawei.com Reviewed-by: Yang Erkun yangerkun@huawei.com Signed-off-by: Yongqiang Liu liuyongqiang13@huawei.com --- fs/fs-writeback.c | 17 ++++++++++------- mm/backing-dev.c | 12 ++++++++++-- 2 files changed, 20 insertions(+), 9 deletions(-)
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 23a632f02839..dd53d00a2583 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -884,6 +884,16 @@ static void bdi_split_work_to_wbs(struct backing_dev_info *bdi, continue; }
+ /* + * If wb_tryget fails, the wb has been shutdown, skip it. + * + * Pin @wb so that it stays on @bdi->wb_list. This allows + * continuing iteration from @wb after dropping and + * regrabbing rcu read lock. + */ + if (!wb_tryget(wb)) + continue; + /* alloc failed, execute synchronously using on-stack fallback */ work = &fallback_work; *work = *base_work; @@ -892,13 +902,6 @@ static void bdi_split_work_to_wbs(struct backing_dev_info *bdi, work->done = &fallback_work_done;
wb_queue_work(wb, work); - - /* - * Pin @wb so that it stays on @bdi->wb_list. This allows - * continuing iteration from @wb after dropping and - * regrabbing rcu read lock. - */ - wb_get(wb); last_wb = wb;
rcu_read_unlock(); diff --git a/mm/backing-dev.c b/mm/backing-dev.c index f80d5c2ff420..c5caa7e91935 100644 --- a/mm/backing-dev.c +++ b/mm/backing-dev.c @@ -487,6 +487,15 @@ void wb_congested_put(struct bdi_writeback_congested *congested) kfree(congested); }
+static void cgwb_free_rcu(struct rcu_head *rcu_head) +{ + struct bdi_writeback *wb = container_of(rcu_head, + struct bdi_writeback, rcu); + + percpu_ref_exit(&wb->refcnt); + kfree(wb); +} + static void cgwb_release_workfn(struct work_struct *work) { struct bdi_writeback *wb = container_of(work, struct bdi_writeback, @@ -504,9 +513,8 @@ static void cgwb_release_workfn(struct work_struct *work) blkcg_cgwb_put(blkcg);
fprop_local_destroy_percpu(&wb->memcg_completions); - percpu_ref_exit(&wb->refcnt); wb_exit(wb); - kfree_rcu(wb, rcu); + call_rcu(&wb->rcu, cgwb_free_rcu); }
static void cgwb_release(struct percpu_ref *refcnt)
From: Miklos Szeredi mszeredi@redhat.com
mainline inclusion from mainline-v5.15-rc1 commit 332f606b32b6291a944c8cf23b91f53a6e676525 category: perf bugzilla: https://gitee.com/openeuler/kernel/issues/I6ZCW0 CVE: NA
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?i...
--------------------------------
Overlayfs does not cache ACL's (to avoid double caching). Instead it just calls the underlying filesystem's i_op->get_acl(), which will return the cached value, if possible.
In rcu path walk, however, get_cached_acl_rcu() is employed to get the value from the cache, which will fail on overlayfs resulting in dropping out of rcu walk mode. This can result in a big performance hit in certain situations.
Fix by calling ->get_acl() with rcu=true in case of ACL_DONT_CACHE (which indicates pass-through)
Reported-by: garyhuang zjh.20052005@163.com Signed-off-by: Miklos Szeredi mszeredi@redhat.com Conflicts: fs/posix_acl.c Signed-off-by: Zhihao Cheng chengzhihao1@huawei.com Reviewed-by: Zhang Yi yi.zhang@huawei.com Signed-off-by: Yongqiang Liu liuyongqiang13@huawei.com --- fs/overlayfs/inode.c | 7 ++++--- fs/posix_acl.c | 13 ++++++++++++- include/linux/fs.h | 5 +++++ include/linux/posix_acl.h | 3 ++- 4 files changed, 23 insertions(+), 5 deletions(-)
diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c index 3aa33c18cb80..5c82c95e57d2 100644 --- a/fs/overlayfs/inode.c +++ b/fs/overlayfs/inode.c @@ -13,6 +13,7 @@ #include <linux/xattr.h> #include <linux/posix_acl.h> #include <linux/ratelimit.h> +#include <linux/namei.h> #include "overlayfs.h"
@@ -427,12 +428,12 @@ struct posix_acl *ovl_get_acl(struct inode *inode, int type, bool rcu) const struct cred *old_cred; struct posix_acl *acl;
- if (rcu) - return ERR_PTR(-ECHILD); - if (!IS_ENABLED(CONFIG_FS_POSIX_ACL) || !IS_POSIXACL(realinode)) return NULL;
+ if (rcu) + return get_cached_acl_rcu(realinode, type); + old_cred = ovl_override_creds(inode->i_sb); acl = get_acl(realinode, type); revert_creds(old_cred); diff --git a/fs/posix_acl.c b/fs/posix_acl.c index 8e3fe9cbb28c..f64ed0c7394b 100644 --- a/fs/posix_acl.c +++ b/fs/posix_acl.c @@ -21,6 +21,7 @@ #include <linux/xattr.h> #include <linux/export.h> #include <linux/user_namespace.h> +#include <linux/namei.h>
static struct posix_acl **acl_by_type(struct inode *inode, int type) { @@ -55,7 +56,17 @@ EXPORT_SYMBOL(get_cached_acl);
struct posix_acl *get_cached_acl_rcu(struct inode *inode, int type) { - return rcu_dereference(*acl_by_type(inode, type)); + struct posix_acl *acl = rcu_dereference(*acl_by_type(inode, type)); + + if (acl == ACL_DONT_CACHE && inode->i_op->get_acl2) { + struct posix_acl *ret; + + ret = inode->i_op->get_acl2(inode, type, LOOKUP_RCU); + if (!IS_ERR(ret)) + acl = ret; + } + + return acl; } EXPORT_SYMBOL(get_cached_acl_rcu);
diff --git a/include/linux/fs.h b/include/linux/fs.h index bd3e09a3c9a9..9729cfad4175 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -594,6 +594,11 @@ static inline void mapping_allow_writable(struct address_space *mapping)
struct posix_acl; #define ACL_NOT_CACHED ((void *)(-1)) +/* + * ACL_DONT_CACHE is for stacked filesystems, that rely on underlying fs to + * cache the ACL. This also means that ->get_acl2() can be called in RCU mode + * with the LOOKUP_RCU flag. + */ #define ACL_DONT_CACHE ((void *)(-3))
static inline struct posix_acl * diff --git a/include/linux/posix_acl.h b/include/linux/posix_acl.h index 540595a321a7..0c842d07091a 100644 --- a/include/linux/posix_acl.h +++ b/include/linux/posix_acl.h @@ -71,6 +71,8 @@ extern int __posix_acl_chmod(struct posix_acl **, gfp_t, umode_t); extern struct posix_acl *get_posix_acl(struct inode *, int); extern int set_posix_acl(struct inode *, int, struct posix_acl *);
+struct posix_acl *get_cached_acl_rcu(struct inode *inode, int type); + #ifdef CONFIG_FS_POSIX_ACL extern int posix_acl_chmod(struct inode *, umode_t); extern int posix_acl_create(struct inode *, umode_t *, struct posix_acl **, @@ -81,7 +83,6 @@ extern int simple_set_acl(struct inode *, struct posix_acl *, int); extern int simple_acl_create(struct inode *, struct inode *);
struct posix_acl *get_cached_acl(struct inode *inode, int type); -struct posix_acl *get_cached_acl_rcu(struct inode *inode, int type); void set_cached_acl(struct inode *inode, int type, struct posix_acl *acl); void forget_cached_acl(struct inode *inode, int type); void forget_all_cached_acls(struct inode *inode);
From: Zhihao Cheng chengzhihao1@huawei.com
hulk inclusion category: perf bugzilla: https://gitee.com/openeuler/kernel/issues/I6ZCW0 CVE: NA
--------------------------------
Fix kabi broken for importing new inode operation get_inode_acl.
Signed-off-by: Zhihao Cheng chengzhihao1@huawei.com Reviewed-by: Zhang Yi yi.zhang@huawei.com Reviewed-by: Jialin Zhang zhangjialin11@huawei.com Signed-off-by: Yongqiang Liu liuyongqiang13@huawei.com --- include/linux/fs.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-)
diff --git a/include/linux/fs.h b/include/linux/fs.h index 9729cfad4175..7dc4508d12c9 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1843,7 +1843,6 @@ struct inode_operations { const char * (*get_link) (struct dentry *, struct inode *, struct delayed_call *); int (*permission) (struct inode *, int); struct posix_acl * (*get_acl)(struct inode *, int); - struct posix_acl * (*get_acl2)(struct inode *, int, bool);
int (*readlink) (struct dentry *, char __user *,int);
@@ -1868,7 +1867,11 @@ struct inode_operations { int (*tmpfile) (struct inode *, struct dentry *, umode_t); int (*set_acl)(struct inode *, struct posix_acl *, int);
+#ifndef __GENKSYMS__ + struct posix_acl * (*get_acl2)(struct inode *, int, bool); +#else KABI_RESERVE(1) +#endif KABI_RESERVE(2) KABI_RESERVE(3) KABI_RESERVE(4)
From: Parav Pandit parav@mellanox.com
mainline inclusion from mainline-v5.6-rc5 commit e4103312d7b7afb8a3a7a842a33ef2b1856b2c0f category: bugfix bugzilla: https://gitee.com/src-openeuler/kernel/issues/I6X49E CVE: CVE-2023-2176
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?i...
---------------------------
This reverts commit 219d2e9dfda9431b808c28d5efc74b404b95b638.
The call chain below requires the cm_id_priv's destination address to be setup before performing rdma_bind_addr(). Otherwise source port allocation fails as cma_port_is_unique() no longer sees the correct tuple to allow duplicate users of the source port.
rdma_resolve_addr() cma_bind_addr() rdma_bind_addr() cma_get_port() cma_alloc_any_port() cma_port_is_unique() <- compared with zero daddr
This can result in false failures to connect, particularly if the source port range is restricted.
Fixes: 219d2e9dfda9 ("RDMA/cma: Simplify rdma_resolve_addr() error flow") Link: https://lore.kernel.org/r/20200212072635.682689-4-leon@kernel.org Signed-off-by: Parav Pandit parav@mellanox.com Signed-off-by: Leon Romanovsky leonro@mellanox.com Signed-off-by: Jason Gunthorpe jgg@mellanox.com (cherry picked from commit e4103312d7b7afb8a3a7a842a33ef2b1856b2c0f) Signed-off-by: Liu Jian liujian56@huawei.com
Conflicts: drivers/infiniband/core/cma.c Reviewed-by: Yue Haibing yuehaibing@huawei.com Reviewed-by: Wang Weiyang wangweiyang2@huawei.com Signed-off-by: Yongqiang Liu liuyongqiang13@huawei.com --- drivers/infiniband/core/cma.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-)
diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index 319bfef00a4a..db5d4290de2b 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -2997,19 +2997,26 @@ int rdma_resolve_addr(struct rdma_cm_id *id, struct sockaddr *src_addr, int ret;
id_priv = container_of(id, struct rdma_id_private, id); + memcpy(cma_dst_addr(id_priv), dst_addr, rdma_addr_size(dst_addr)); if (id_priv->state == RDMA_CM_IDLE) { ret = cma_bind_addr(id, src_addr, dst_addr); - if (ret) + if (ret) { + memset(cma_dst_addr(id_priv), 0, + rdma_addr_size(dst_addr)); return ret; + } }
- if (cma_family(id_priv) != dst_addr->sa_family) + if (cma_family(id_priv) != dst_addr->sa_family) { + memset(cma_dst_addr(id_priv), 0, rdma_addr_size(dst_addr)); return -EINVAL; + }
- if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, RDMA_CM_ADDR_QUERY)) + if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, RDMA_CM_ADDR_QUERY)) { + memset(cma_dst_addr(id_priv), 0, rdma_addr_size(dst_addr)); return -EINVAL; + }
- memcpy(cma_dst_addr(id_priv), dst_addr, rdma_addr_size(dst_addr)); atomic_inc(&id_priv->refcount); if (cma_any_addr(dst_addr)) { ret = cma_resolve_loopback(id_priv);
From: Patrisious Haddad phaddad@nvidia.com
mainline inclusion from mainline-v6.3-rc1 commit 8d037973d48c026224ab285e6a06985ccac6f7bf category: bugfix bugzilla: https://gitee.com/src-openeuler/kernel/issues/I6X49E CVE: CVE-2023-2176
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?i...
---------------------------
Refactor rdma_bind_addr function so that it doesn't require that the cma destination address be changed before calling it.
So now it will update the destination address internally only when it is really needed and after passing all the required checks.
Which in turn results in a cleaner and more sensible call and error handling flows for the functions that call it directly or indirectly.
Signed-off-by: Patrisious Haddad phaddad@nvidia.com Reported-by: Wei Chen harperchen1110@gmail.com Reviewed-by: Mark Zhang markzhang@nvidia.com Link: https://lore.kernel.org/r/3d0e9a2fd62bc10ba02fed1c7c48a48638952320.167281927... Signed-off-by: Leon Romanovsky leon@kernel.org (cherry picked from commit 8d037973d48c026224ab285e6a06985ccac6f7bf) Signed-off-by: Liu Jian liujian56@huawei.com
Conflicts: drivers/infiniband/core/cma.c Reviewed-by: Yue Haibing yuehaibing@huawei.com Reviewed-by: Wang Weiyang wangweiyang2@huawei.com Signed-off-by: Yongqiang Liu liuyongqiang13@huawei.com --- drivers/infiniband/core/cma.c | 162 +++++++++++++++++----------------- 1 file changed, 83 insertions(+), 79 deletions(-)
diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index db5d4290de2b..20a5501b740a 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -2969,77 +2969,6 @@ static int cma_resolve_ib_addr(struct rdma_id_private *id_priv) return ret; }
-static int cma_bind_addr(struct rdma_cm_id *id, struct sockaddr *src_addr, - const struct sockaddr *dst_addr) -{ - if (!src_addr || !src_addr->sa_family) { - src_addr = (struct sockaddr *) &id->route.addr.src_addr; - src_addr->sa_family = dst_addr->sa_family; - if (IS_ENABLED(CONFIG_IPV6) && - dst_addr->sa_family == AF_INET6) { - struct sockaddr_in6 *src_addr6 = (struct sockaddr_in6 *) src_addr; - struct sockaddr_in6 *dst_addr6 = (struct sockaddr_in6 *) dst_addr; - src_addr6->sin6_scope_id = dst_addr6->sin6_scope_id; - if (ipv6_addr_type(&dst_addr6->sin6_addr) & IPV6_ADDR_LINKLOCAL) - id->route.addr.dev_addr.bound_dev_if = dst_addr6->sin6_scope_id; - } else if (dst_addr->sa_family == AF_IB) { - ((struct sockaddr_ib *) src_addr)->sib_pkey = - ((struct sockaddr_ib *) dst_addr)->sib_pkey; - } - } - return rdma_bind_addr(id, src_addr); -} - -int rdma_resolve_addr(struct rdma_cm_id *id, struct sockaddr *src_addr, - const struct sockaddr *dst_addr, int timeout_ms) -{ - struct rdma_id_private *id_priv; - int ret; - - id_priv = container_of(id, struct rdma_id_private, id); - memcpy(cma_dst_addr(id_priv), dst_addr, rdma_addr_size(dst_addr)); - if (id_priv->state == RDMA_CM_IDLE) { - ret = cma_bind_addr(id, src_addr, dst_addr); - if (ret) { - memset(cma_dst_addr(id_priv), 0, - rdma_addr_size(dst_addr)); - return ret; - } - } - - if (cma_family(id_priv) != dst_addr->sa_family) { - memset(cma_dst_addr(id_priv), 0, rdma_addr_size(dst_addr)); - return -EINVAL; - } - - if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, RDMA_CM_ADDR_QUERY)) { - memset(cma_dst_addr(id_priv), 0, rdma_addr_size(dst_addr)); - return -EINVAL; - } - - atomic_inc(&id_priv->refcount); - if (cma_any_addr(dst_addr)) { - ret = cma_resolve_loopback(id_priv); - } else { - if (dst_addr->sa_family == AF_IB) { - ret = cma_resolve_ib_addr(id_priv); - } else { - ret = rdma_resolve_ip(cma_src_addr(id_priv), - dst_addr, &id->route.addr.dev_addr, - timeout_ms, addr_handler, id_priv); - } - } - if (ret) - goto err; - - return 0; -err: - cma_comp_exch(id_priv, RDMA_CM_ADDR_QUERY, RDMA_CM_ADDR_BOUND); - cma_deref_id(id_priv); - return ret; -} -EXPORT_SYMBOL(rdma_resolve_addr); - int rdma_set_reuseaddr(struct rdma_cm_id *id, int reuse) { struct rdma_id_private *id_priv; @@ -3422,27 +3351,26 @@ int rdma_listen(struct rdma_cm_id *id, int backlog) } EXPORT_SYMBOL(rdma_listen);
-int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr) +static int rdma_bind_addr_dst(struct rdma_id_private *id_priv, + struct sockaddr *addr, const struct sockaddr *daddr) { - struct rdma_id_private *id_priv; + struct sockaddr *id_daddr; int ret; - struct sockaddr *daddr;
if (addr->sa_family != AF_INET && addr->sa_family != AF_INET6 && addr->sa_family != AF_IB) return -EAFNOSUPPORT;
- id_priv = container_of(id, struct rdma_id_private, id); if (!cma_comp_exch(id_priv, RDMA_CM_IDLE, RDMA_CM_ADDR_BOUND)) return -EINVAL;
- ret = cma_check_linklocal(&id->route.addr.dev_addr, addr); + ret = cma_check_linklocal(&id_priv->id.route.addr.dev_addr, addr); if (ret) goto err1;
memcpy(cma_src_addr(id_priv), addr, rdma_addr_size(addr)); if (!cma_any_addr(addr)) { - ret = cma_translate_addr(addr, &id->route.addr.dev_addr); + ret = cma_translate_addr(addr, &id_priv->id.route.addr.dev_addr); if (ret) goto err1;
@@ -3462,8 +3390,10 @@ int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr) } #endif } - daddr = cma_dst_addr(id_priv); - daddr->sa_family = addr->sa_family; + id_daddr = cma_dst_addr(id_priv); + if (daddr != id_daddr) + memcpy(id_daddr, daddr, rdma_addr_size(addr)); + id_daddr->sa_family = addr->sa_family;
ret = cma_get_port(id_priv); if (ret) @@ -3478,6 +3408,80 @@ int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr) cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, RDMA_CM_IDLE); return ret; } + +static int cma_bind_addr(struct rdma_cm_id *id, struct sockaddr *src_addr, + const struct sockaddr *dst_addr) +{ + struct rdma_id_private *id_priv = + container_of(id, struct rdma_id_private, id); + + if (!src_addr || !src_addr->sa_family) { + src_addr = (struct sockaddr *) &id->route.addr.src_addr; + src_addr->sa_family = dst_addr->sa_family; + if (IS_ENABLED(CONFIG_IPV6) && + dst_addr->sa_family == AF_INET6) { + struct sockaddr_in6 *src_addr6 = (struct sockaddr_in6 *) src_addr; + struct sockaddr_in6 *dst_addr6 = (struct sockaddr_in6 *) dst_addr; + src_addr6->sin6_scope_id = dst_addr6->sin6_scope_id; + if (ipv6_addr_type(&dst_addr6->sin6_addr) & IPV6_ADDR_LINKLOCAL) + id->route.addr.dev_addr.bound_dev_if = dst_addr6->sin6_scope_id; + } else if (dst_addr->sa_family == AF_IB) { + ((struct sockaddr_ib *) src_addr)->sib_pkey = + ((struct sockaddr_ib *) dst_addr)->sib_pkey; + } + } + return rdma_bind_addr_dst(id_priv, src_addr, dst_addr); +} + +int rdma_resolve_addr(struct rdma_cm_id *id, struct sockaddr *src_addr, + const struct sockaddr *dst_addr, int timeout_ms) +{ + struct rdma_id_private *id_priv; + int ret; + + id_priv = container_of(id, struct rdma_id_private, id); + if (id_priv->state == RDMA_CM_IDLE) { + ret = cma_bind_addr(id, src_addr, dst_addr); + if (ret) + return ret; + } + + if (cma_family(id_priv) != dst_addr->sa_family) + return -EINVAL; + + if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, RDMA_CM_ADDR_QUERY)) + return -EINVAL; + + atomic_inc(&id_priv->refcount); + if (cma_any_addr(dst_addr)) { + ret = cma_resolve_loopback(id_priv); + } else { + if (dst_addr->sa_family == AF_IB) { + ret = cma_resolve_ib_addr(id_priv); + } else { + ret = rdma_resolve_ip(cma_src_addr(id_priv), + dst_addr, &id->route.addr.dev_addr, + timeout_ms, addr_handler, id_priv); + } + } + if (ret) + goto err; + + return 0; +err: + cma_comp_exch(id_priv, RDMA_CM_ADDR_QUERY, RDMA_CM_ADDR_BOUND); + cma_deref_id(id_priv); + return ret; +} +EXPORT_SYMBOL(rdma_resolve_addr); + +int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr) +{ + struct rdma_id_private *id_priv = + container_of(id, struct rdma_id_private, id); + + return rdma_bind_addr_dst(id_priv, addr, cma_dst_addr(id_priv)); +} EXPORT_SYMBOL(rdma_bind_addr);
static int cma_format_hdr(void *hdr, struct rdma_id_private *id_priv)
From: Xin Long lucien.xin@gmail.com
mainline inclusion from mainline-v5.19-rc7 commit 181d8d2066c000ba0a0e6940a7ad80f1a0e68e9d category: bugfix bugzilla: 188712,https://gitee.com/src-openeuler/kernel/issues/I6X47E CVE: CVE-2023-2177
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?i...
---------------------------
A NULL pointer dereference was reported by Wei Chen:
BUG: kernel NULL pointer dereference, address: 0000000000000000 RIP: 0010:__list_del_entry_valid+0x26/0x80 Call Trace: <TASK> sctp_sched_dequeue_common+0x1c/0x90 sctp_sched_prio_dequeue+0x67/0x80 __sctp_outq_teardown+0x299/0x380 sctp_outq_free+0x15/0x20 sctp_association_free+0xc3/0x440 sctp_do_sm+0x1ca7/0x2210 sctp_assoc_bh_rcv+0x1f6/0x340
This happens when calling sctp_sendmsg without connecting to server first. In this case, a data chunk already queues up in send queue of client side when processing the INIT_ACK from server in sctp_process_init() where it calls sctp_stream_init() to alloc stream_in. If it fails to alloc stream_in all stream_out will be freed in sctp_stream_init's err path. Then in the asoc freeing it will crash when dequeuing this data chunk as stream_out is missing.
As we can't free stream out before dequeuing all data from send queue, and this patch is to fix it by moving the err path stream_out/in freeing in sctp_stream_init() to sctp_stream_free() which is eventually called when freeing the asoc in sctp_association_free(). This fix also makes the code in sctp_process_init() more clear.
Note that in sctp_association_init() when it fails in sctp_stream_init(), sctp_association_free() will not be called, and in that case it should go to 'stream_free' err path to free stream instead of 'fail_init'.
Fixes: 5bbbbe32a431 ("sctp: introduce stream scheduler foundations") Reported-by: Wei Chen harperchen1110@gmail.com Signed-off-by: Xin Long lucien.xin@gmail.com Link: https://lore.kernel.org/r/831a3dc100c4908ff76e5bcc363be97f2778bc0b.165878706... Signed-off-by: Jakub Kicinski kuba@kernel.org Conflicts: net/sctp/stream.c Signed-off-by: Dong Chenchen dongchenchen2@huawei.com Reviewed-by: Yue Haibing yuehaibing@huawei.com Reviewed-by: Xiu Jianfeng xiujianfeng@huawei.com Signed-off-by: Yongqiang Liu liuyongqiang13@huawei.com --- net/sctp/associola.c | 5 ++--- net/sctp/stream.c | 18 +++--------------- 2 files changed, 5 insertions(+), 18 deletions(-)
diff --git a/net/sctp/associola.c b/net/sctp/associola.c index d17708800652..38aba2c4bac4 100644 --- a/net/sctp/associola.c +++ b/net/sctp/associola.c @@ -246,9 +246,8 @@ static struct sctp_association *sctp_association_init( if (!sctp_ulpq_init(&asoc->ulpq, asoc)) goto fail_init;
- if (sctp_stream_init(&asoc->stream, asoc->c.sinit_num_ostreams, - 0, gfp)) - goto fail_init; + if (sctp_stream_init(&asoc->stream, asoc->c.sinit_num_ostreams, 0, gfp)) + goto stream_free;
/* Initialize default path MTU. */ asoc->pathmtu = sp->pathmtu; diff --git a/net/sctp/stream.c b/net/sctp/stream.c index 516bc48be5bc..435cbf4549e7 100644 --- a/net/sctp/stream.c +++ b/net/sctp/stream.c @@ -231,7 +231,7 @@ int sctp_stream_init(struct sctp_stream *stream, __u16 outcnt, __u16 incnt,
ret = sctp_stream_alloc_out(stream, outcnt, gfp); if (ret) - goto out; + return ret;
stream->outcnt = outcnt; for (i = 0; i < stream->outcnt; i++) @@ -240,21 +240,9 @@ int sctp_stream_init(struct sctp_stream *stream, __u16 outcnt, __u16 incnt, in: sctp_stream_interleave_init(stream); if (!incnt) - goto out; + return 0;
- ret = sctp_stream_alloc_in(stream, incnt, gfp); - if (ret) { - sched->free(stream); - fa_free(stream->out); - stream->out = NULL; - stream->outcnt = 0; - goto out; - } - - stream->incnt = incnt; - -out: - return ret; + return sctp_stream_alloc_in(stream, incnt, gfp); }
int sctp_stream_init_ext(struct sctp_stream *stream, __u16 sid)