From: Zhengchao Shao shaozhengchao@huawei.com
mainline inclusion from mainline-v5.19-rc6 commit 02884a4f12de11f54d4ca67a07dd1f111d96fdbd category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I5GEVR CVE: NA
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/herbert/cryptodev-2.6.git/co...
--------------------------------
When kunpeng920 encryption driver is used to deencrypt and decrypt packets during the softirq, it is not allowed to use mutex lock. The kernel will report the following error:
BUG: scheduling while atomic: swapper/57/0/0x00000300 Call trace: dump_backtrace+0x0/0x1e4 show_stack+0x20/0x2c dump_stack+0xd8/0x140 __schedule_bug+0x68/0x80 __schedule+0x728/0x840 schedule+0x50/0xe0 schedule_preempt_disabled+0x18/0x24 __mutex_lock.constprop.0+0x594/0x5dc __mutex_lock_slowpath+0x1c/0x30 mutex_lock+0x50/0x60 sec_request_init+0x8c/0x1a0 [hisi_sec2] sec_process+0x28/0x1ac [hisi_sec2] sec_skcipher_crypto+0xf4/0x1d4 [hisi_sec2] sec_skcipher_encrypt+0x1c/0x30 [hisi_sec2] crypto_skcipher_encrypt+0x2c/0x40 crypto_authenc_encrypt+0xc8/0xfc [authenc] crypto_aead_encrypt+0x2c/0x40 echainiv_encrypt+0x144/0x1a0 [echainiv] crypto_aead_encrypt+0x2c/0x40 esp_output_tail+0x348/0x5c0 [esp4] esp_output+0x120/0x19c [esp4] xfrm_output_one+0x25c/0x4d4 xfrm_output_resume+0x6c/0x1fc xfrm_output+0xac/0x3c0 xfrm4_output+0x64/0x130 ip_build_and_send_pkt+0x158/0x20c tcp_v4_send_synack+0xdc/0x1f0 tcp_conn_request+0x7d0/0x994 tcp_v4_conn_request+0x58/0x6c tcp_v6_conn_request+0xf0/0x100 tcp_rcv_state_process+0x1cc/0xd60 tcp_v4_do_rcv+0x10c/0x250 tcp_v4_rcv+0xfc4/0x10a4 ip_protocol_deliver_rcu+0xf4/0x200 ip_local_deliver_finish+0x58/0x70 ip_local_deliver+0x68/0x120 ip_sublist_rcv_finish+0x70/0x94 ip_list_rcv_finish.constprop.0+0x17c/0x1d0 ip_sublist_rcv+0x40/0xb0 ip_list_rcv+0x140/0x1dc __netif_receive_skb_list_core+0x154/0x28c __netif_receive_skb_list+0x120/0x1a0 netif_receive_skb_list_internal+0xe4/0x1f0 napi_complete_done+0x70/0x1f0 gro_cell_poll+0x9c/0xb0 napi_poll+0xcc/0x264 net_rx_action+0xd4/0x21c __do_softirq+0x130/0x358 irq_exit+0x11c/0x13c __handle_domain_irq+0x88/0xf0 gic_handle_irq+0x78/0x2c0 el1_irq+0xb8/0x140 arch_cpu_idle+0x18/0x40 default_idle_call+0x5c/0x1c0 cpuidle_idle_call+0x174/0x1b0 do_idle+0xc8/0x160 cpu_startup_entry+0x30/0x11c secondary_start_kernel+0x158/0x1e4 softirq: huh, entered softirq 3 NET_RX 0000000093774ee4 with preempt_count 00000100, exited with fffffe00?
Fixes: 416d82204df4 ("crypto: hisilicon - add HiSilicon SEC V2 driver") Signed-off-by: Zhengchao Shao shaozhengchao@huawei.com Signed-off-by: Herbert Xu herbert@gondor.apana.org.au
Signed-off-by: Zhengchao Shao shaozhengchao@huawei.com Reviewed-by: Yue Haibing yuehaibing@huawei.com Reviewed-by: Wei Yongjun weiyongjun1@huawei.com Signed-off-by: Zheng Zengkai zhengzengkai@huawei.com --- drivers/crypto/hisilicon/sec2/sec.h | 2 +- drivers/crypto/hisilicon/sec2/sec_crypto.c | 20 ++++++++++---------- 2 files changed, 11 insertions(+), 11 deletions(-)
diff --git a/drivers/crypto/hisilicon/sec2/sec.h b/drivers/crypto/hisilicon/sec2/sec.h index c2e9b01187a7..a44c8dba3cda 100644 --- a/drivers/crypto/hisilicon/sec2/sec.h +++ b/drivers/crypto/hisilicon/sec2/sec.h @@ -119,7 +119,7 @@ struct sec_qp_ctx { struct idr req_idr; struct sec_alg_res res[QM_Q_DEPTH]; struct sec_ctx *ctx; - struct mutex req_lock; + spinlock_t req_lock; struct list_head backlog; struct hisi_acc_sgl_pool *c_in_pool; struct hisi_acc_sgl_pool *c_out_pool; diff --git a/drivers/crypto/hisilicon/sec2/sec_crypto.c b/drivers/crypto/hisilicon/sec2/sec_crypto.c index 6eebe739893c..71dfa7db6394 100644 --- a/drivers/crypto/hisilicon/sec2/sec_crypto.c +++ b/drivers/crypto/hisilicon/sec2/sec_crypto.c @@ -127,11 +127,11 @@ static int sec_alloc_req_id(struct sec_req *req, struct sec_qp_ctx *qp_ctx) { int req_id;
- mutex_lock(&qp_ctx->req_lock); + spin_lock_bh(&qp_ctx->req_lock);
req_id = idr_alloc_cyclic(&qp_ctx->req_idr, NULL, 0, QM_Q_DEPTH, GFP_ATOMIC); - mutex_unlock(&qp_ctx->req_lock); + spin_unlock_bh(&qp_ctx->req_lock); if (unlikely(req_id < 0)) { dev_err(req->ctx->dev, "alloc req id fail!\n"); return req_id; @@ -156,9 +156,9 @@ static void sec_free_req_id(struct sec_req *req) qp_ctx->req_list[req_id] = NULL; req->qp_ctx = NULL;
- mutex_lock(&qp_ctx->req_lock); + spin_lock_bh(&qp_ctx->req_lock); idr_remove(&qp_ctx->req_idr, req_id); - mutex_unlock(&qp_ctx->req_lock); + spin_unlock_bh(&qp_ctx->req_lock); }
static u8 pre_parse_finished_bd(struct bd_status *status, void *resp) @@ -273,7 +273,7 @@ static int sec_bd_send(struct sec_ctx *ctx, struct sec_req *req) !(req->flag & CRYPTO_TFM_REQ_MAY_BACKLOG)) return -EBUSY;
- mutex_lock(&qp_ctx->req_lock); + spin_lock_bh(&qp_ctx->req_lock); ret = hisi_qp_send(qp_ctx->qp, &req->sec_sqe);
if (ctx->fake_req_limit <= @@ -281,10 +281,10 @@ static int sec_bd_send(struct sec_ctx *ctx, struct sec_req *req) list_add_tail(&req->backlog_head, &qp_ctx->backlog); atomic64_inc(&ctx->sec->debug.dfx.send_cnt); atomic64_inc(&ctx->sec->debug.dfx.send_busy_cnt); - mutex_unlock(&qp_ctx->req_lock); + spin_unlock_bh(&qp_ctx->req_lock); return -EBUSY; } - mutex_unlock(&qp_ctx->req_lock); + spin_unlock_bh(&qp_ctx->req_lock);
if (unlikely(ret == -EBUSY)) return -ENOBUFS; @@ -487,7 +487,7 @@ static int sec_create_qp_ctx(struct hisi_qm *qm, struct sec_ctx *ctx,
qp->req_cb = sec_req_cb;
- mutex_init(&qp_ctx->req_lock); + spin_lock_init(&qp_ctx->req_lock); idr_init(&qp_ctx->req_idr); INIT_LIST_HEAD(&qp_ctx->backlog);
@@ -1382,7 +1382,7 @@ static struct sec_req *sec_back_req_clear(struct sec_ctx *ctx, { struct sec_req *backlog_req = NULL;
- mutex_lock(&qp_ctx->req_lock); + spin_lock_bh(&qp_ctx->req_lock); if (ctx->fake_req_limit >= atomic_read(&qp_ctx->qp->qp_status.used) && !list_empty(&qp_ctx->backlog)) { @@ -1390,7 +1390,7 @@ static struct sec_req *sec_back_req_clear(struct sec_ctx *ctx, typeof(*backlog_req), backlog_head); list_del(&backlog_req->backlog_head); } - mutex_unlock(&qp_ctx->req_lock); + spin_unlock_bh(&qp_ctx->req_lock);
return backlog_req; }
From: GUO Zihua guozihua@huawei.com
hulk inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I5GRSV CVE: NA
--------------------------------
In function pgp_generate_fingerprint, return value is not set correctly shall kmalloc failed, leading to the caller triggering a read out-of-bound while reading ctx.fingerprint or ctx.raw_fingerprint.
This patch fixes this issue by setting correct return value on the error path.
Fixes: 4006f47d4e21 ("KEYS: PGP data parser") Signed-off-by: GUO Zihua guozihua@huawei.com Reviewed-by: Roberto Sassu roberto.sassu@huawei.com Signed-off-by: Zheng Zengkai zhengzengkai@huawei.com --- crypto/asymmetric_keys/pgp_public_key.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-)
diff --git a/crypto/asymmetric_keys/pgp_public_key.c b/crypto/asymmetric_keys/pgp_public_key.c index 27b9efeafc4f..33a089797e59 100644 --- a/crypto/asymmetric_keys/pgp_public_key.c +++ b/crypto/asymmetric_keys/pgp_public_key.c @@ -152,8 +152,10 @@ static int pgp_generate_fingerprint(struct pgp_key_data_parse_context *ctx, digest_size = crypto_shash_digestsize(tfm);
raw_fingerprint = kmalloc(digest_size, GFP_KERNEL); - if (!raw_fingerprint) + if (!raw_fingerprint) { + ret = -ENOMEM; goto cleanup_hash; + }
ret = crypto_shash_final(digest, raw_fingerprint); if (ret < 0) @@ -161,8 +163,10 @@ static int pgp_generate_fingerprint(struct pgp_key_data_parse_context *ctx,
ctx->fingerprint_len = digest_size * 2; fingerprint = kmalloc(digest_size * 2 + 1, GFP_KERNEL); - if (!fingerprint) + if (!fingerprint) { + ret = -ENOMEM; goto cleanup_raw_fingerprint; + }
offset = digest_size - 8; pr_debug("offset %u/%u\n", offset, digest_size);
From: GUO Zihua guozihua@huawei.com
hulk inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I5H4FC CVE: NA
--------------------------------
Under normal condition, when there is a user id packet, there will always be a public key packet in the front, meaning ctx.fingerprint will never be NULL. However, if a malicious or faulty PGP key is provided with only user id packet but not public key packet, a read out-of-bound will be triggered during the generation of key description. To make things worse, a NULL pointer deference could be triggered in pgp_key_generate_id().
This patch adds a safe guard which prevents parsing the key further if no public key packet is provided.
Fixes: a98cb7a4b757 ("KEYS: Provide PGP key description autogeneration") Signed-off-by: GUO Zihua guozihua@huawei.com Reviewed-by: Roberto Sassu roberto.sassu@huawei.com Signed-off-by: Zheng Zengkai zhengzengkai@huawei.com --- crypto/asymmetric_keys/pgp_public_key.c | 5 +++++ 1 file changed, 5 insertions(+)
diff --git a/crypto/asymmetric_keys/pgp_public_key.c b/crypto/asymmetric_keys/pgp_public_key.c index 33a089797e59..98b1707a0164 100644 --- a/crypto/asymmetric_keys/pgp_public_key.c +++ b/crypto/asymmetric_keys/pgp_public_key.c @@ -315,6 +315,11 @@ static int pgp_key_parse(struct key_preparsed_payload *prep) if (ret < 0) goto error;
+ if (!ctx.fingerprint) { + ret = -EINVAL; + goto error; + } + if (ctx.user_id && ctx.user_id_len > 0) { /* Propose a description for the key * (user ID without the comment)
From: GUO Zihua guozihua@huawei.com
hulk inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I5H61R CVE: NA
--------------------------------
pgp_key_generate_id() is trying get the size of a flexible length structure, however the sizeof() is called on the pointer itself. Besides, considering it's trying to get the size of a flexible length structure, use struct_size() instead.
Fixes: 4006f47d4e21 ("KEYS: PGP data parser") Signed-off-by: GUO Zihua guozihua@huawei.com Reviewed-by: Roberto Sassu roberto.sassu@huawei.com Signed-off-by: Zheng Zengkai zhengzengkai@huawei.com --- crypto/asymmetric_keys/pgp_public_key.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/crypto/asymmetric_keys/pgp_public_key.c b/crypto/asymmetric_keys/pgp_public_key.c index 98b1707a0164..928029a13435 100644 --- a/crypto/asymmetric_keys/pgp_public_key.c +++ b/crypto/asymmetric_keys/pgp_public_key.c @@ -283,7 +283,8 @@ static struct asymmetric_key_ids *pgp_key_generate_id( goto error;
kids->id[0] = kid; - kids->id[1] = kmemdup(kid, sizeof(kid) + fingerprint_len, GFP_KERNEL); + kids->id[1] = kmemdup(kid, struct_size(kid, data, fingerprint_len), + GFP_KERNEL); if (!kids->id[1]) goto error;
From: Yu Kuai yukuai3@huawei.com
hulk inclusion category: bugfix bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I5HEZ8 CVE: NA
--------------------------------
commit 5c250d556e10 ("blk-mq: fix kabi broken in struct request") intrudoce 'struct request_wrapper' to fix kabi broken in 'struct request', it requires to allocate more size for 'struct request'. However, flush_rq is missed for such adaptation, which will lead to following slab-out-of-bounds:
================================================================== BUG: KASAN: slab-out-of-bounds in sg_init_table+0x23/0x40 Write of size 4096 at addr ffff88812249a148 by task swapper/0/1
Call Trace: dump_stack+0xbe/0xf9 ? sg_init_table+0x23/0x40 print_address_description.constprop.0+0x1e/0x220 ? _raw_spin_lock_irqsave+0x80/0xe0 ? _raw_write_unlock_irqrestore+0x20/0x20 ? blk_alloc_flush_queue+0xd3/0x1a0 ? sg_init_table+0x23/0x40 ? sg_init_table+0x23/0x40 kasan_report.cold+0x67/0x7f ? sg_init_table+0x23/0x40 check_memory_region+0x17c/0x1e0 memset+0x20/0x40 sg_init_table+0x23/0x40 virtblk_init_request+0x3d/0x50 ? virtblk_map_queues+0x40/0x40 blk_mq_realloc_hw_ctxs+0x44d/0xb50 blk_mq_init_allocated_queue+0x20f/0x980 ? blk_set_default_limits+0x1ac/0x1c0 ? blk_alloc_queue+0x3f0/0x410 blk_mq_init_queue_data+0x58/0xa0 virtblk_probe+0x51b/0xee0 ? cache_type_store+0x1a0/0x1a0 ? __sanitizer_cov_trace_switch+0x50/0x90 ? ioread8+0x89/0xa0 virtio_dev_probe+0x449/0x5d0 ? virtio_features_ok.part.0+0xb0/0xb0 really_probe+0x26d/0x8a0 driver_probe_device+0xef/0x280 device_driver_attach+0xaf/0xc0 __driver_attach+0x158/0x280 ? device_driver_attach+0xc0/0xc0 bus_for_each_dev+0x111/0x180 ? subsys_dev_iter_exit+0x20/0x20 ? bus_add_driver+0xb6/0x3e0 ? klist_node_init+0x7c/0xb0 bus_add_driver+0x336/0x3e0 driver_register+0x105/0x1a0 ? nbd_init+0x273/0x273 init+0x69/0xad do_one_initcall+0xcb/0x370 ? initcall_blacklisted+0x1b0/0x1b0 ? parameq+0x110/0x110 ? __kasan_kmalloc.constprop.0+0xc2/0xd0 ? kasan_unpoison_shadow+0x33/0x40 do_initcalls+0x223/0x265 kernel_init_freeable+0x2bb/0x302 ? rest_init+0xea/0xea kernel_init+0x13/0x1f6 ? rest_init+0xea/0xea ret_from_fork+0x22/0x30
Allocated by task 1: kasan_save_stack+0x1b/0x40 __kasan_kmalloc.constprop.0+0xc2/0xd0 blk_alloc_flush_queue+0xd3/0x1a0 blk_mq_realloc_hw_ctxs+0x9fa/0xb50 blk_mq_init_allocated_queue+0x20f/0x980 blk_mq_init_queue_data+0x58/0xa0 virtblk_probe+0x51b/0xee0 virtio_dev_probe+0x449/0x5d0 really_probe+0x26d/0x8a0 driver_probe_device+0xef/0x280 device_driver_attach+0xaf/0xc0 __driver_attach+0x158/0x280 bus_for_each_dev+0x111/0x180 bus_add_driver+0x336/0x3e0 driver_register+0x105/0x1a0 init+0x69/0xad do_one_initcall+0xcb/0x370 do_initcalls+0x223/0x265 kernel_init_freeable+0x2bb/0x302 kernel_init+0x13/0x1f6 ret_from_fork+0x22/0x30
Fixes: 5c250d556e10 ("blk-mq: fix kabi broken in struct request") Signed-off-by: Yu Kuai yukuai3@huawei.com Reviewed-by: Jason Yan yanaijie@huawei.com Signed-off-by: Zheng Zengkai zhengzengkai@huawei.com --- block/blk-flush.c | 2 +- drivers/scsi/scsi_error.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/block/blk-flush.c b/block/blk-flush.c index 82919829bc4d..71faf07a626f 100644 --- a/block/blk-flush.c +++ b/block/blk-flush.c @@ -470,7 +470,7 @@ struct blk_flush_queue *blk_alloc_flush_queue(int node, int cmd_size, gfp_t flags) { struct blk_flush_queue *fq; - int rq_sz = sizeof(struct request); + int rq_sz = sizeof(struct request_wrapper);
fq = kzalloc_node(sizeof(*fq), flags, node); if (!fq) diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c index f11f51e2465f..bcbeadb2d0f0 100644 --- a/drivers/scsi/scsi_error.c +++ b/drivers/scsi/scsi_error.c @@ -2359,7 +2359,7 @@ scsi_ioctl_reset(struct scsi_device *dev, int __user *arg) return -EIO;
error = -EIO; - rq = kzalloc(sizeof(struct request) + sizeof(struct scsi_cmnd) + + rq = kzalloc(sizeof(struct request_wrapper) + sizeof(struct scsi_cmnd) + shost->hostt->cmd_size, GFP_KERNEL); if (!rq) goto out_put_autopm_host;
From: Zhang Zekun zhangzekun11@huawei.com
hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I5E461 CVE: NA
------------------------
Enable ACPI HMAT and memory hot remove feature on arm64 by default.
For ACPI_HMAT: ACPI HMAT describe the memory attributes, such as bandwidth and latency details, related to the System Physical Address(SPA) Memory Ranges. HMAT is especially useful when software wants to get some information about a certain special memory's memory attributes, such as PMEM and HBM.
For MEMORY_HOT_REMOTE: Add support for memory hot remove feature. Some special memory, such as HBM, can be power comsuming, and will only be used in some aimed senarios. With memory hot remove feature, User can offline the idle memory for energy saving purpose when this special memory is unused.
As PMEM and HBM are getting more popular, ACPI_HMAT and memory hot remove feature should be enabled as default.
The following configs should be opened with CONFIG_ACPI_HMAT by default: 1.CONFIG_EFI_SOFT_RESERVE=y 2.CONFIG_HMEM_REPORTING=y 3.CONFIG_DEV_DAX_HMEM=m 4.CONFIG_DEV_DAX_HMEM_DEVICES=y
Signed-off-by: Zhang Zekun zhangzekun11@huawei.com Reviewed-by: Chao Liu liuchao173@huawei.com Reviewed-by: Kefeng Wang wangkefeng.wang@huawei.com Reviewed-by: Kai Liu kai.liu@suse.com Signed-off-by: Zheng Zengkai zhengzengkai@huawei.com --- arch/arm64/configs/openeuler_defconfig | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-)
diff --git a/arch/arm64/configs/openeuler_defconfig b/arch/arm64/configs/openeuler_defconfig index 78a63cbc3db6..d246fd508ef6 100644 --- a/arch/arm64/configs/openeuler_defconfig +++ b/arch/arm64/configs/openeuler_defconfig @@ -709,7 +709,12 @@ CONFIG_ACPI_REDUCED_HARDWARE_ONLY=y CONFIG_ACPI_NFIT=m # CONFIG_NFIT_SECURITY_DEBUG is not set CONFIG_ACPI_NUMA=y -# CONFIG_ACPI_HMAT is not set +CONFIG_ACPI_HMAT=y +CONFIG_EFI_SOFT_RESERVE=y +# CONFIG_ZONE_DEVICE is not set +CONFIG_HMEM_REPORTING=y +CONFIG_DEV_DAX_HMEM=m +CONFIG_DEV_DAX_HMEM_DEVICES=y CONFIG_HAVE_ACPI_APEI=y CONFIG_ACPI_APEI=y CONFIG_ACPI_APEI_GHES=y @@ -1046,7 +1051,7 @@ CONFIG_COHERENT_DEVICE=y CONFIG_MEMORY_HOTPLUG=y CONFIG_MEMORY_HOTPLUG_SPARSE=y CONFIG_MEMORY_HOTPLUG_DEFAULT_ONLINE=y -# CONFIG_MEMORY_HOTREMOVE is not set +CONFIG_MEMORY_HOTREMOVE=y CONFIG_SPLIT_PTLOCK_CPUS=4 CONFIG_MEMORY_BALLOON=y CONFIG_BALLOON_COMPACTION=y
From: Chen Zhongjin chenzhongjin@huawei.com
mainline inclusion from mainline-v5.18-rc1 commit b6f21d14f1ac1261579b691673a0c823275cbaf8 category: bugfix bugzilla: https://gitee.com/src-openeuler/kernel/issues/I5H8ET
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?i...
--------------------------------
For EABI stack unwinding, when loading .ko module the EXIDX sections will be added to a unwind_table list.
However not all EXIDX sections are added because EXIDX sections are searched by hardcoded section names.
For functions in other sections such as .ref.text or .kprobes.text, gcc generates seprated EXIDX sections (such as .ARM.exidx.ref.text or .ARM.exidx.kprobes.text).
These extra EXIDX sections are not loaded, so when unwinding functions in these sections, we will failed with:
unwind: Index not found xxx
To fix that, I refactor the code for searching and adding EXIDX sections:
- Check section type to search EXIDX tables (0x70000001) instead of strcmp() the hardcoded names. Then find the corresponding text sections by their section names.
- Add a unwind_table list in module->arch to save their own unwind_table instead of the fixed-lenth array.
- Save .ARM.exidx.init.text section ptr, because it should be cleaned after module init.
Now all EXIDX sections of .ko can be added correctly.
Signed-off-by: Chen Zhongjin chenzhongjin@huawei.com Reviewed-by: Kuohai Xu xukuohai@huawei.com Signed-off-by: Zheng Zengkai zhengzengkai@huawei.com --- arch/arm/include/asm/module.h | 17 ++------ arch/arm/include/asm/unwind.h | 1 + arch/arm/kernel/module.c | 78 ++++++++++++++++++----------------- 3 files changed, 45 insertions(+), 51 deletions(-)
diff --git a/arch/arm/include/asm/module.h b/arch/arm/include/asm/module.h index 734b8fe36896..c7c5dc6f3777 100644 --- a/arch/arm/include/asm/module.h +++ b/arch/arm/include/asm/module.h @@ -3,20 +3,10 @@ #define _ASM_ARM_MODULE_H
#include <asm-generic/module.h> - -struct unwind_table; +#include <asm/unwind.h>
#ifdef CONFIG_ARM_UNWIND -enum { - ARM_SEC_INIT, - ARM_SEC_DEVINIT, - ARM_SEC_CORE, - ARM_SEC_EXIT, - ARM_SEC_DEVEXIT, - ARM_SEC_HOT, - ARM_SEC_UNLIKELY, - ARM_SEC_MAX, -}; +#define ELF_SECTION_UNWIND 0x70000001 #endif
#define PLT_ENT_STRIDE L1_CACHE_BYTES @@ -37,7 +27,8 @@ struct mod_plt_sec {
struct mod_arch_specific { #ifdef CONFIG_ARM_UNWIND - struct unwind_table *unwind[ARM_SEC_MAX]; + struct list_head unwind_list; + struct unwind_table *init_table; #endif #ifdef CONFIG_ARM_MODULE_PLTS struct mod_plt_sec core; diff --git a/arch/arm/include/asm/unwind.h b/arch/arm/include/asm/unwind.h index 0f8a3439902d..b51f85417f58 100644 --- a/arch/arm/include/asm/unwind.h +++ b/arch/arm/include/asm/unwind.h @@ -24,6 +24,7 @@ struct unwind_idx {
struct unwind_table { struct list_head list; + struct list_head mod_list; const struct unwind_idx *start; const struct unwind_idx *origin; const struct unwind_idx *stop; diff --git a/arch/arm/kernel/module.c b/arch/arm/kernel/module.c index 1cd09cf38c69..bfe2bc380d38 100644 --- a/arch/arm/kernel/module.c +++ b/arch/arm/kernel/module.c @@ -369,46 +369,40 @@ int module_finalize(const Elf32_Ehdr *hdr, const Elf_Shdr *sechdrs, #ifdef CONFIG_ARM_UNWIND const char *secstrs = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset; const Elf_Shdr *sechdrs_end = sechdrs + hdr->e_shnum; - struct mod_unwind_map maps[ARM_SEC_MAX]; - int i; + struct list_head *unwind_list = &mod->arch.unwind_list;
- memset(maps, 0, sizeof(maps)); + INIT_LIST_HEAD(unwind_list); + mod->arch.init_table = NULL;
for (s = sechdrs; s < sechdrs_end; s++) { const char *secname = secstrs + s->sh_name; + const char *txtname; + const Elf_Shdr *txt_sec;
- if (!(s->sh_flags & SHF_ALLOC)) + if (!(s->sh_flags & SHF_ALLOC) || + s->sh_type != ELF_SECTION_UNWIND) continue;
- if (strcmp(".ARM.exidx.init.text", secname) == 0) - maps[ARM_SEC_INIT].unw_sec = s; - else if (strcmp(".ARM.exidx", secname) == 0) - maps[ARM_SEC_CORE].unw_sec = s; - else if (strcmp(".ARM.exidx.exit.text", secname) == 0) - maps[ARM_SEC_EXIT].unw_sec = s; - else if (strcmp(".ARM.exidx.text.unlikely", secname) == 0) - maps[ARM_SEC_UNLIKELY].unw_sec = s; - else if (strcmp(".ARM.exidx.text.hot", secname) == 0) - maps[ARM_SEC_HOT].unw_sec = s; - else if (strcmp(".init.text", secname) == 0) - maps[ARM_SEC_INIT].txt_sec = s; - else if (strcmp(".text", secname) == 0) - maps[ARM_SEC_CORE].txt_sec = s; - else if (strcmp(".exit.text", secname) == 0) - maps[ARM_SEC_EXIT].txt_sec = s; - else if (strcmp(".text.unlikely", secname) == 0) - maps[ARM_SEC_UNLIKELY].txt_sec = s; - else if (strcmp(".text.hot", secname) == 0) - maps[ARM_SEC_HOT].txt_sec = s; - } + if (!strcmp(".ARM.exidx", secname)) + txtname = ".text"; + else + txtname = secname + strlen(".ARM.exidx"); + txt_sec = find_mod_section(hdr, sechdrs, txtname); + + if (txt_sec) { + struct unwind_table *table = + unwind_table_add(s->sh_addr, + s->sh_size, + txt_sec->sh_addr, + txt_sec->sh_size);
- for (i = 0; i < ARM_SEC_MAX; i++) - if (maps[i].unw_sec && maps[i].txt_sec) - mod->arch.unwind[i] = - unwind_table_add(maps[i].unw_sec->sh_addr, - maps[i].unw_sec->sh_size, - maps[i].txt_sec->sh_addr, - maps[i].txt_sec->sh_size); + list_add(&table->mod_list, unwind_list); + + /* save init table for module_arch_freeing_init */ + if (strcmp(".ARM.exidx.init.text", secname) == 0) + mod->arch.init_table = table; + } + } #endif #ifdef CONFIG_ARM_PATCH_PHYS_VIRT s = find_mod_section(hdr, sechdrs, ".pv_table"); @@ -429,19 +423,27 @@ void module_arch_cleanup(struct module *mod) { #ifdef CONFIG_ARM_UNWIND - int i; + struct unwind_table *tmp; + struct unwind_table *n;
- for (i = 0; i < ARM_SEC_MAX; i++) { - unwind_table_del(mod->arch.unwind[i]); - mod->arch.unwind[i] = NULL; + list_for_each_entry_safe(tmp, n, + &mod->arch.unwind_list, mod_list) { + list_del(&tmp->mod_list); + unwind_table_del(tmp); } + mod->arch.init_table = NULL; #endif }
void __weak module_arch_freeing_init(struct module *mod) { #ifdef CONFIG_ARM_UNWIND - unwind_table_del(mod->arch.unwind[ARM_SEC_INIT]); - mod->arch.unwind[ARM_SEC_INIT] = NULL; + struct unwind_table *init = mod->arch.init_table; + + if (init) { + mod->arch.init_table = NULL; + list_del(&init->mod_list); + unwind_table_del(init); + } #endif }
From: Al Viro viro@zeniv.linux.org.uk
mainline inclusion from mainline-v5.14-rc1 commit 185ab886d3fb283e837283c343bf539c371e26cf category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I4OM1C CVE: NA
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?i...
--------------------------------
Duplicated logics in all bind variants (autobind, bind-to-path, bind-to-abstract) gets taken into a common helper.
Signed-off-by: Al Viro viro@zeniv.linux.org.uk Signed-off-by: David S. Miller davem@davemloft.net Signed-off-by: Baisong Zhong zhongbaisong@huawei.com Reviewed-by: Yue Haibing yuehaibing@huawei.com Reviewed-by: Wei Yongjun weiyongjun1@huawei.com Signed-off-by: Zheng Zengkai zhengzengkai@huawei.com --- net/unix/af_unix.c | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-)
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index b7edca89e0ba..8c5389e4cb2c 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -262,6 +262,14 @@ static void __unix_insert_socket(struct hlist_head *list, struct sock *sk) sk_add_node(sk, list); }
+static void __unix_set_addr(struct sock *sk, struct unix_address *addr, + unsigned hash) +{ + __unix_remove_socket(sk); + smp_store_release(&unix_sk(sk)->addr, addr); + __unix_insert_socket(&unix_socket_table[hash], sk); +} + static inline void unix_remove_socket(struct sock *sk) { spin_lock(&unix_table_lock); @@ -935,9 +943,7 @@ static int unix_autobind(struct socket *sock) } addr->hash ^= sk->sk_type;
- __unix_remove_socket(sk); - smp_store_release(&u->addr, addr); - __unix_insert_socket(&unix_socket_table[addr->hash], sk); + __unix_set_addr(sk, addr, addr->hash); spin_unlock(&unix_table_lock); err = 0;
@@ -1039,7 +1045,6 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) int err; unsigned int hash; struct unix_address *addr; - struct hlist_head *list; struct path path = { };
err = -EINVAL; @@ -1091,25 +1096,20 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) hash = d_backing_inode(path.dentry)->i_ino & (UNIX_HASH_SIZE - 1); spin_lock(&unix_table_lock); u->path = path; - list = &unix_socket_table[hash]; } else { spin_lock(&unix_table_lock); err = -EADDRINUSE; if (__unix_find_socket_byname(net, sunaddr, addr_len, sk->sk_type, hash)) { + spin_unlock(&unix_table_lock); unix_release_addr(addr); - goto out_unlock; + goto out_up; } - - list = &unix_socket_table[addr->hash]; + hash = addr->hash; }
err = 0; - __unix_remove_socket(sk); - smp_store_release(&u->addr, addr); - __unix_insert_socket(list, sk); - -out_unlock: + __unix_set_addr(sk, addr, hash); spin_unlock(&unix_table_lock); out_up: mutex_unlock(&u->bindlock);
From: Al Viro viro@zeniv.linux.org.uk
mainline inclusion from mainline-v5.14-rc1 commit c34d4582518ff83a4848c2d33a46be82e2499a5b category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I4OM1C CVE: NA
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?i...
--------------------------------
makes it easier to massage; we do pay for that by extra work (kmalloc+memcpy+kfree) in some error cases, but those are not on the hot paths anyway.
Signed-off-by: Al Viro viro@zeniv.linux.org.uk Signed-off-by: David S. Miller davem@davemloft.net Signed-off-by: Baisong Zhong zhongbaisong@huawei.com Reviewed-by: Yue Haibing yuehaibing@huawei.com Reviewed-by: Wei Yongjun weiyongjun1@huawei.com Signed-off-by: Zheng Zengkai zhengzengkai@huawei.com --- net/unix/af_unix.c | 28 +++++++++++++++------------- 1 file changed, 15 insertions(+), 13 deletions(-)
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 8c5389e4cb2c..8dcd342bee10 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -1061,6 +1061,15 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) if (err < 0) goto out; addr_len = err; + err = -ENOMEM; + addr = kmalloc(sizeof(*addr)+addr_len, GFP_KERNEL); + if (!addr) + goto out; + + memcpy(addr->name, sunaddr, addr_len); + addr->len = addr_len; + addr->hash = hash ^ sk->sk_type; + refcount_set(&addr->refcnt, 1);
if (sun_path[0]) { umode_t mode = S_IFSOCK | @@ -1069,7 +1078,7 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) if (err) { if (err == -EEXIST) err = -EADDRINUSE; - goto out; + goto out_addr; } }
@@ -1081,16 +1090,6 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) if (u->addr) goto out_up;
- err = -ENOMEM; - addr = kmalloc(sizeof(*addr)+addr_len, GFP_KERNEL); - if (!addr) - goto out_up; - - memcpy(addr->name, sunaddr, addr_len); - addr->len = addr_len; - addr->hash = hash ^ sk->sk_type; - refcount_set(&addr->refcnt, 1); - if (sun_path[0]) { addr->hash = UNIX_HASH_SIZE; hash = d_backing_inode(path.dentry)->i_ino & (UNIX_HASH_SIZE - 1); @@ -1102,20 +1101,23 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) if (__unix_find_socket_byname(net, sunaddr, addr_len, sk->sk_type, hash)) { spin_unlock(&unix_table_lock); - unix_release_addr(addr); goto out_up; } hash = addr->hash; }
- err = 0; __unix_set_addr(sk, addr, hash); spin_unlock(&unix_table_lock); + addr = NULL; + err = 0; out_up: mutex_unlock(&u->bindlock); out_put: if (err) path_put(&path); +out_addr: + if (addr) + unix_release_addr(addr); out: return err; }
From: Al Viro viro@zeniv.linux.org.uk
mainline inclusion from mainline-v5.14-rc1 commit aee515170576609a0aa3413dc06a7f36f05a5fe2 category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I4OM1C CVE: NA
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?i...
--------------------------------
We do get some duplication that way, but it's minor compared to parts that are different. What we get is an ability to change locking in BSD case without making failure exits very hard to follow.
Signed-off-by: Al Viro viro@zeniv.linux.org.uk Signed-off-by: David S. Miller davem@davemloft.net Signed-off-by: Baisong Zhong zhongbaisong@huawei.com Reviewed-by: Yue Haibing yuehaibing@huawei.com Reviewed-by: Wei Yongjun weiyongjun1@huawei.com Signed-off-by: Zheng Zengkai zhengzengkai@huawei.com --- net/unix/af_unix.c | 55 ++++++++++++++++++++++++++++------------------ 1 file changed, 34 insertions(+), 21 deletions(-)
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 8dcd342bee10..685c190e4366 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -1045,7 +1045,6 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) int err; unsigned int hash; struct unix_address *addr; - struct path path = { };
err = -EINVAL; if (addr_len < offsetofend(struct sockaddr_un, sun_family) || @@ -1072,6 +1071,7 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) refcount_set(&addr->refcnt, 1);
if (sun_path[0]) { + struct path path = { }; umode_t mode = S_IFSOCK | (SOCK_INODE(sock)->i_mode & ~current_umask()); err = unix_mknod(sun_path, mode, &path); @@ -1080,41 +1080,54 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) err = -EADDRINUSE; goto out_addr; } - }
- err = mutex_lock_interruptible(&u->bindlock); - if (err) - goto out_put; + err = mutex_lock_interruptible(&u->bindlock); + if (err) { + path_put(&path); + goto out_addr; + }
- err = -EINVAL; - if (u->addr) - goto out_up; + err = -EINVAL; + if (u->addr) { + mutex_unlock(&u->bindlock); + path_put(&path); + goto out_addr; + }
- if (sun_path[0]) { addr->hash = UNIX_HASH_SIZE; hash = d_backing_inode(path.dentry)->i_ino & (UNIX_HASH_SIZE - 1); spin_lock(&unix_table_lock); u->path = path; + __unix_set_addr(sk, addr, hash); + spin_unlock(&unix_table_lock); + mutex_unlock(&u->bindlock); + addr = NULL; + err = 0; } else { + err = mutex_lock_interruptible(&u->bindlock); + if (err) + goto out_addr; + + err = -EINVAL; + if (u->addr) { + mutex_unlock(&u->bindlock); + goto out_addr; + } + spin_lock(&unix_table_lock); err = -EADDRINUSE; if (__unix_find_socket_byname(net, sunaddr, addr_len, sk->sk_type, hash)) { spin_unlock(&unix_table_lock); - goto out_up; + mutex_unlock(&u->bindlock); + goto out_addr; } - hash = addr->hash; + __unix_set_addr(sk, addr, addr->hash); + spin_unlock(&unix_table_lock); + mutex_unlock(&u->bindlock); + addr = NULL; + err = 0; } - - __unix_set_addr(sk, addr, hash); - spin_unlock(&unix_table_lock); - addr = NULL; - err = 0; -out_up: - mutex_unlock(&u->bindlock); -out_put: - if (err) - path_put(&path); out_addr: if (addr) unix_release_addr(addr);
From: Al Viro viro@zeniv.linux.org.uk
mainline inclusion from mainline-v5.14-rc1 commit fa42d910a38ee310d5c6826563dd58a08735d5b0 category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I4OM1C CVE: NA
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?i...
--------------------------------
unix_bind_bsd() and unix_bind_abstract() respectively.
Signed-off-by: Al Viro viro@zeniv.linux.org.uk Signed-off-by: David S. Miller davem@davemloft.net Signed-off-by: Baisong Zhong zhongbaisong@huawei.com Reviewed-by: Yue Haibing yuehaibing@huawei.com Reviewed-by: Wei Yongjun weiyongjun1@huawei.com Signed-off-by: Zheng Zengkai zhengzengkai@huawei.com --- net/unix/af_unix.c | 147 +++++++++++++++++++++++---------------------- 1 file changed, 74 insertions(+), 73 deletions(-)
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 685c190e4366..9172058866b4 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -1035,104 +1035,105 @@ static int unix_mknod(const char *sun_path, umode_t mode, struct path *res) return err; }
+static int unix_bind_bsd(struct sock *sk, struct unix_address *addr) +{ + struct unix_sock *u = unix_sk(sk); + struct path path = { }; + umode_t mode = S_IFSOCK | + (SOCK_INODE(sk->sk_socket)->i_mode & ~current_umask()); + unsigned int hash; + int err; + + err = unix_mknod(addr->name->sun_path, mode, &path); + if (err) + return err; + + err = mutex_lock_interruptible(&u->bindlock); + if (err) { + path_put(&path); + return err; + } + + if (u->addr) { + mutex_unlock(&u->bindlock); + path_put(&path); + return -EINVAL; + } + + addr->hash = UNIX_HASH_SIZE; + hash = d_backing_inode(path.dentry)->i_ino & (UNIX_HASH_SIZE - 1); + spin_lock(&unix_table_lock); + u->path = path; + __unix_set_addr(sk, addr, hash); + spin_unlock(&unix_table_lock); + mutex_unlock(&u->bindlock); + return 0; +} + +static int unix_bind_abstract(struct sock *sk, unsigned hash, + struct unix_address *addr) +{ + struct unix_sock *u = unix_sk(sk); + int err; + + err = mutex_lock_interruptible(&u->bindlock); + if (err) + return err; + + if (u->addr) { + mutex_unlock(&u->bindlock); + return -EINVAL; + } + + spin_lock(&unix_table_lock); + if (__unix_find_socket_byname(sock_net(sk), addr->name, addr->len, + sk->sk_type, hash)) { + spin_unlock(&unix_table_lock); + mutex_unlock(&u->bindlock); + return -EADDRINUSE; + } + __unix_set_addr(sk, addr, addr->hash); + spin_unlock(&unix_table_lock); + mutex_unlock(&u->bindlock); + return 0; +} + static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) { struct sock *sk = sock->sk; - struct net *net = sock_net(sk); - struct unix_sock *u = unix_sk(sk); struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr; char *sun_path = sunaddr->sun_path; int err; unsigned int hash; struct unix_address *addr;
- err = -EINVAL; if (addr_len < offsetofend(struct sockaddr_un, sun_family) || sunaddr->sun_family != AF_UNIX) - goto out; + return -EINVAL;
- if (addr_len == sizeof(short)) { - err = unix_autobind(sock); - goto out; - } + if (addr_len == sizeof(short)) + return unix_autobind(sock);
err = unix_mkname(sunaddr, addr_len, &hash); if (err < 0) - goto out; + return err; addr_len = err; - err = -ENOMEM; addr = kmalloc(sizeof(*addr)+addr_len, GFP_KERNEL); if (!addr) - goto out; + return -ENOMEM;
memcpy(addr->name, sunaddr, addr_len); addr->len = addr_len; addr->hash = hash ^ sk->sk_type; refcount_set(&addr->refcnt, 1);
- if (sun_path[0]) { - struct path path = { }; - umode_t mode = S_IFSOCK | - (SOCK_INODE(sock)->i_mode & ~current_umask()); - err = unix_mknod(sun_path, mode, &path); - if (err) { - if (err == -EEXIST) - err = -EADDRINUSE; - goto out_addr; - } - - err = mutex_lock_interruptible(&u->bindlock); - if (err) { - path_put(&path); - goto out_addr; - } - - err = -EINVAL; - if (u->addr) { - mutex_unlock(&u->bindlock); - path_put(&path); - goto out_addr; - } - - addr->hash = UNIX_HASH_SIZE; - hash = d_backing_inode(path.dentry)->i_ino & (UNIX_HASH_SIZE - 1); - spin_lock(&unix_table_lock); - u->path = path; - __unix_set_addr(sk, addr, hash); - spin_unlock(&unix_table_lock); - mutex_unlock(&u->bindlock); - addr = NULL; - err = 0; - } else { - err = mutex_lock_interruptible(&u->bindlock); - if (err) - goto out_addr; - - err = -EINVAL; - if (u->addr) { - mutex_unlock(&u->bindlock); - goto out_addr; - } - - spin_lock(&unix_table_lock); - err = -EADDRINUSE; - if (__unix_find_socket_byname(net, sunaddr, addr_len, - sk->sk_type, hash)) { - spin_unlock(&unix_table_lock); - mutex_unlock(&u->bindlock); - goto out_addr; - } - __unix_set_addr(sk, addr, addr->hash); - spin_unlock(&unix_table_lock); - mutex_unlock(&u->bindlock); - addr = NULL; - err = 0; - } -out_addr: - if (addr) + if (sun_path[0]) + err = unix_bind_bsd(sk, addr); + else + err = unix_bind_abstract(sk, hash, addr); + if (err) unix_release_addr(addr); -out: - return err; + return err == -EEXIST ? -EADDRINUSE : err; }
static void unix_state_double_lock(struct sock *sk1, struct sock *sk2)
From: Kuniyuki Iwashima kuniyu@amazon.co.jp
mainline inclusion from mainline-v5.17-rc1 commit 755662ce78d14c1a9118df921c528b1f992ded2e category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I4OM1C CVE: NA
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?i...
--------------------------------
The length of the AF_UNIX socket address contains an offset to the member sun_path of struct sockaddr_un.
Currently, the preceding member is just sun_family, and its type is sa_family_t and resolved to short. Therefore, the offset is represented by sizeof(short). However, it is not clear and fragile to changes in struct sockaddr_storage or sockaddr_un.
This commit makes it clear and robust by rewriting sizeof() with offsetof().
Signed-off-by: Kuniyuki Iwashima kuniyu@amazon.co.jp Signed-off-by: Jakub Kicinski kuba@kernel.org Signed-off-by: Baisong Zhong zhongbaisong@huawei.com Reviewed-by: Yue Haibing yuehaibing@huawei.com Reviewed-by: Wei Yongjun weiyongjun1@huawei.com Signed-off-by: Zheng Zengkai zhengzengkai@huawei.com --- net/unix/af_unix.c | 19 ++++++++++++------- net/unix/diag.c | 3 ++- 2 files changed, 14 insertions(+), 8 deletions(-)
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 9172058866b4..cdd2a46e2a98 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -230,7 +230,8 @@ static int unix_mkname(struct sockaddr_un *sunaddr, int len, unsigned int *hashp { *hashp = 0;
- if (len <= sizeof(short) || len > sizeof(*sunaddr)) + if (len <= offsetof(struct sockaddr_un, sun_path) || + len > sizeof(*sunaddr)) return -EINVAL; if (!sunaddr || sunaddr->sun_family != AF_UNIX) return -EINVAL; @@ -243,7 +244,8 @@ static int unix_mkname(struct sockaddr_un *sunaddr, int len, unsigned int *hashp * kernel address buffer. */ ((char *)sunaddr)[len] = 0; - len = strlen(sunaddr->sun_path)+1+sizeof(short); + len = strlen(sunaddr->sun_path) + + offsetof(struct sockaddr_un, sun_path) + 1; return len; }
@@ -911,7 +913,8 @@ static int unix_autobind(struct socket *sock) goto out;
err = -ENOMEM; - addr = kzalloc(sizeof(*addr) + sizeof(short) + 16, GFP_KERNEL); + addr = kzalloc(sizeof(*addr) + + offsetof(struct sockaddr_un, sun_path) + 16, GFP_KERNEL); if (!addr) goto out;
@@ -919,7 +922,8 @@ static int unix_autobind(struct socket *sock) refcount_set(&addr->refcnt, 1);
retry: - addr->len = sprintf(addr->name->sun_path+1, "%05x", ordernum) + 1 + sizeof(short); + addr->len = sprintf(addr->name->sun_path + 1, "%05x", ordernum) + + offsetof(struct sockaddr_un, sun_path) + 1; addr->hash = unix_hash_fold(csum_partial(addr->name, addr->len, 0));
spin_lock(&unix_table_lock); @@ -1111,7 +1115,7 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) sunaddr->sun_family != AF_UNIX) return -EINVAL;
- if (addr_len == sizeof(short)) + if (addr_len == offsetof(struct sockaddr_un, sun_path)) return unix_autobind(sock);
err = unix_mkname(sunaddr, addr_len, &hash); @@ -1549,7 +1553,7 @@ static int unix_getname(struct socket *sock, struct sockaddr *uaddr, int peer) if (!addr) { sunaddr->sun_family = AF_UNIX; sunaddr->sun_path[0] = 0; - err = sizeof(short); + err = offsetof(struct sockaddr_un, sun_path); } else { err = addr->len; memcpy(sunaddr, addr->name, addr->len); @@ -2927,7 +2931,8 @@ static int unix_seq_show(struct seq_file *seq, void *v) seq_putc(seq, ' ');
i = 0; - len = u->addr->len - sizeof(short); + len = u->addr->len - + offsetof(struct sockaddr_un, sun_path); if (!UNIX_ABSTRACT(s)) len--; else { diff --git a/net/unix/diag.c b/net/unix/diag.c index 9ff64f9df1f3..dd77e81b41a0 100644 --- a/net/unix/diag.c +++ b/net/unix/diag.c @@ -19,7 +19,8 @@ static int sk_diag_dump_name(struct sock *sk, struct sk_buff *nlskb) if (!addr) return 0;
- return nla_put(nlskb, UNIX_DIAG_NAME, addr->len - sizeof(short), + return nla_put(nlskb, UNIX_DIAG_NAME, + addr->len - offsetof(struct sockaddr_un, sun_path), addr->name->sun_path); }
From: Al Viro viro@zeniv.linux.org.uk
mainline inclusion from mainline-v5.14-rc1 commit be752283a2a2b4bfc2df512b5d9b03a34aece252 category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I4OM1C CVE: NA
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?i...
--------------------------------
We only care about exclusive or of those, so pass that directly. Makes life simpler for callers as well...
Signed-off-by: Al Viro viro@zeniv.linux.org.uk Signed-off-by: David S. Miller davem@davemloft.net Signed-off-by: Baisong Zhong zhongbaisong@huawei.com Reviewed-by: Yue Haibing yuehaibing@huawei.com Reviewed-by: Wei Yongjun weiyongjun1@huawei.com Signed-off-by: Zheng Zengkai zhengzengkai@huawei.com --- net/unix/af_unix.c | 23 ++++++++++------------- 1 file changed, 10 insertions(+), 13 deletions(-)
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index cdd2a46e2a98..e59f5714e02f 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -288,11 +288,11 @@ static inline void unix_insert_socket(struct hlist_head *list, struct sock *sk)
static struct sock *__unix_find_socket_byname(struct net *net, struct sockaddr_un *sunname, - int len, int type, unsigned int hash) + int len, unsigned int hash) { struct sock *s;
- sk_for_each(s, &unix_socket_table[hash ^ type]) { + sk_for_each(s, &unix_socket_table[hash]) { struct unix_sock *u = unix_sk(s);
if (!net_eq(sock_net(s), net)) @@ -307,13 +307,12 @@ static struct sock *__unix_find_socket_byname(struct net *net,
static inline struct sock *unix_find_socket_byname(struct net *net, struct sockaddr_un *sunname, - int len, int type, - unsigned int hash) + int len, unsigned int hash) { struct sock *s;
spin_lock(&unix_table_lock); - s = __unix_find_socket_byname(net, sunname, len, type, hash); + s = __unix_find_socket_byname(net, sunname, len, hash); if (s) sock_hold(s); spin_unlock(&unix_table_lock); @@ -925,12 +924,12 @@ static int unix_autobind(struct socket *sock) addr->len = sprintf(addr->name->sun_path + 1, "%05x", ordernum) + offsetof(struct sockaddr_un, sun_path) + 1; addr->hash = unix_hash_fold(csum_partial(addr->name, addr->len, 0)); + addr->hash ^= sk->sk_type;
spin_lock(&unix_table_lock); ordernum = (ordernum+1)&0xFFFFF;
- if (__unix_find_socket_byname(net, addr->name, addr->len, sock->type, - addr->hash)) { + if (__unix_find_socket_byname(net, addr->name, addr->len, addr->hash)) { spin_unlock(&unix_table_lock); /* * __unix_find_socket_byname() may take long time if many names @@ -945,7 +944,6 @@ static int unix_autobind(struct socket *sock) } goto retry; } - addr->hash ^= sk->sk_type;
__unix_set_addr(sk, addr, addr->hash); spin_unlock(&unix_table_lock); @@ -992,7 +990,7 @@ static struct sock *unix_find_other(struct net *net, } } else { err = -ECONNREFUSED; - u = unix_find_socket_byname(net, sunname, len, type, hash); + u = unix_find_socket_byname(net, sunname, len, type ^ hash); if (u) { struct dentry *dentry; dentry = unix_sk(u)->path.dentry; @@ -1074,8 +1072,7 @@ static int unix_bind_bsd(struct sock *sk, struct unix_address *addr) return 0; }
-static int unix_bind_abstract(struct sock *sk, unsigned hash, - struct unix_address *addr) +static int unix_bind_abstract(struct sock *sk, struct unix_address *addr) { struct unix_sock *u = unix_sk(sk); int err; @@ -1091,7 +1088,7 @@ static int unix_bind_abstract(struct sock *sk, unsigned hash,
spin_lock(&unix_table_lock); if (__unix_find_socket_byname(sock_net(sk), addr->name, addr->len, - sk->sk_type, hash)) { + addr->hash)) { spin_unlock(&unix_table_lock); mutex_unlock(&u->bindlock); return -EADDRINUSE; @@ -1134,7 +1131,7 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) if (sun_path[0]) err = unix_bind_bsd(sk, addr); else - err = unix_bind_abstract(sk, hash, addr); + err = unix_bind_abstract(sk, addr); if (err) unix_release_addr(addr); return err == -EEXIST ? -EADDRINUSE : err;
From: Kuniyuki Iwashima kuniyu@amazon.co.jp
mainline inclusion from mainline-v5.17-rc1 commit f7ed31f4615f4e1d97c0e4325c5b8a240e10073c category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I4OM1C CVE: NA
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?i...
--------------------------------
We do not use struct socket in unix_autobind() and pass struct sock to unix_bind_bsd() and unix_bind_abstract(). Let's pass it to unix_autobind() as well.
Also, this patch fixes these errors by checkpatch.pl.
ERROR: do not use assignment in if condition #1795: FILE: net/unix/af_unix.c:1795: + if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr
CHECK: Logical continuations should be on the previous line #1796: FILE: net/unix/af_unix.c:1796: + if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr + && (err = unix_autobind(sock)) != 0)
Signed-off-by: Kuniyuki Iwashima kuniyu@amazon.co.jp Signed-off-by: Jakub Kicinski kuba@kernel.org Signed-off-by: Baisong Zhong zhongbaisong@huawei.com Reviewed-by: Yue Haibing yuehaibing@huawei.com Reviewed-by: Wei Yongjun weiyongjun1@huawei.com Signed-off-by: Zheng Zengkai zhengzengkai@huawei.com --- net/unix/af_unix.c | 36 +++++++++++++++++++++--------------- 1 file changed, 21 insertions(+), 15 deletions(-)
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index e59f5714e02f..006e35065e51 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -894,15 +894,13 @@ static int unix_release(struct socket *sock) return 0; }
-static int unix_autobind(struct socket *sock) +static int unix_autobind(struct sock *sk) { - struct sock *sk = sock->sk; - struct net *net = sock_net(sk); struct unix_sock *u = unix_sk(sk); - static u32 ordernum = 1; struct unix_address *addr; - int err; unsigned int retries = 0; + static u32 ordernum = 1; + int err;
err = mutex_lock_interruptible(&u->bindlock); if (err) @@ -929,7 +927,8 @@ static int unix_autobind(struct socket *sock) spin_lock(&unix_table_lock); ordernum = (ordernum+1)&0xFFFFF;
- if (__unix_find_socket_byname(net, addr->name, addr->len, addr->hash)) { + if (__unix_find_socket_byname(sock_net(sk), addr->name, addr->len, + addr->hash)) { spin_unlock(&unix_table_lock); /* * __unix_find_socket_byname() may take long time if many names @@ -1113,7 +1112,7 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) return -EINVAL;
if (addr_len == offsetof(struct sockaddr_un, sun_path)) - return unix_autobind(sock); + return unix_autobind(sk);
err = unix_mkname(sunaddr, addr_len, &hash); if (err < 0) @@ -1183,8 +1182,11 @@ static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr, alen = err;
if (test_bit(SOCK_PASSCRED, &sock->flags) && - !unix_sk(sk)->addr && (err = unix_autobind(sock)) != 0) - goto out; + !unix_sk(sk)->addr) { + err = unix_autobind(sk); + if (err) + goto out; + }
restart: other = unix_find_other(net, sunaddr, alen, sock->type, hash, &err); @@ -1284,9 +1286,11 @@ static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr, goto out; addr_len = err;
- if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr && - (err = unix_autobind(sock)) != 0) - goto out; + if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr) { + err = unix_autobind(sk); + if (err) + goto out; + }
timeo = sock_sndtimeo(sk, flags & O_NONBLOCK);
@@ -1737,9 +1741,11 @@ static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg, goto out; }
- if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr - && (err = unix_autobind(sock)) != 0) - goto out; + if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr) { + err = unix_autobind(sk); + if (err) + goto out; + }
err = -EMSGSIZE; if (len > sk->sk_sndbuf - 32)
From: Al Viro viro@zeniv.linux.org.uk
mainline inclusion from mainline-v5.14-rc1 commit 71e6be6f7d2bada7099d79205779c4452d4fd35b category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I4OM1C CVE: NA
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?i...
--------------------------------
Signed-off-by: Al Viro viro@zeniv.linux.org.uk Signed-off-by: David S. Miller davem@davemloft.net
Conflicts: net/unix/af_unix.c
Signed-off-by: Baisong Zhong zhongbaisong@huawei.com Reviewed-by: Yue Haibing yuehaibing@huawei.com Reviewed-by: Yue Haibing yuehaibing@huawei.com Reviewed-by: Wei Yongjun weiyongjun1@huawei.com Signed-off-by: Zheng Zengkai zhengzengkai@huawei.com --- net/unix/af_unix.c | 39 +++++++++++++++------------------------ 1 file changed, 15 insertions(+), 24 deletions(-)
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 006e35065e51..3eba0e23c512 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -1007,45 +1007,36 @@ static struct sock *unix_find_other(struct net *net, return NULL; }
-static int unix_mknod(const char *sun_path, umode_t mode, struct path *res) +static int unix_bind_bsd(struct sock *sk, struct unix_address *addr) { + struct unix_sock *u = unix_sk(sk); + umode_t mode = S_IFSOCK | + (SOCK_INODE(sk->sk_socket)->i_mode & ~current_umask()); + struct path parent, path; struct dentry *dentry; - struct path path; - int err = 0; + unsigned int hash; + int err; + /* * Get the parent directory, calculate the hash for last * component. */ - dentry = kern_path_create(AT_FDCWD, sun_path, &path, 0); - err = PTR_ERR(dentry); + dentry = kern_path_create(AT_FDCWD, addr->name->sun_path, &parent, 0); if (IS_ERR(dentry)) - return err; + return PTR_ERR(dentry);
/* * All right, let's create it. */ - err = security_path_mknod(&path, dentry, mode, 0); + err = security_path_mknod(&parent, dentry, mode, 0); if (!err) { - err = vfs_mknod(d_inode(path.dentry), dentry, mode, 0); + err = vfs_mknod(d_inode(parent.dentry), dentry, mode, 0); if (!err) { - res->mnt = mntget(path.mnt); - res->dentry = dget(dentry); + path.mnt = mntget(parent.mnt); + path.dentry = dget(dentry); } } - done_path_create(&path, dentry); - return err; -} - -static int unix_bind_bsd(struct sock *sk, struct unix_address *addr) -{ - struct unix_sock *u = unix_sk(sk); - struct path path = { }; - umode_t mode = S_IFSOCK | - (SOCK_INODE(sk->sk_socket)->i_mode & ~current_umask()); - unsigned int hash; - int err; - - err = unix_mknod(addr->name->sun_path, mode, &path); + done_path_create(&parent, dentry); if (err) return err;
From: Kuniyuki Iwashima kuniyu@amazon.co.jp
mainline inclusion from mainline-v5.17-rc1 commit fa39ef0e472961baef49ddb0e6f7b8ebb555bd8f category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I4OM1C CVE: NA
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?i...
--------------------------------
As done in the commit fa42d910a38e ("unix_bind(): take BSD and abstract address cases into new helpers"), this patch moves BSD and abstract address cases from unix_find_other() into unix_find_bsd() and unix_find_abstract().
Signed-off-by: Kuniyuki Iwashima kuniyu@amazon.co.jp Signed-off-by: Jakub Kicinski kuba@kernel.org
Conflicts: net/unix/af_unix.c
Signed-off-by: Baisong Zhong zhongbaisong@huawei.com Reviewed-by: Yue Haibing yuehaibing@huawei.com Reviewed-by: Wei Yongjun weiyongjun1@huawei.com Signed-off-by: Zheng Zengkai zhengzengkai@huawei.com --- include/linux/fs.h | 4 ++ net/unix/af_unix.c | 136 +++++++++++++++++++++++++++------------------ 2 files changed, 85 insertions(+), 55 deletions(-)
diff --git a/include/linux/fs.h b/include/linux/fs.h index 18259e38dcd7..a7bc1eaa27ee 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2844,6 +2844,10 @@ static inline int bmap(struct inode *inode, sector_t *block) extern int notify_change(struct dentry *, struct iattr *, struct inode **); extern int inode_permission(struct inode *, int); extern int generic_permission(struct inode *, int); +static inline int path_permission(const struct path *path, int mask) +{ + return inode_permission(d_inode(path->dentry), mask); +} extern int __check_sticky(struct inode *dir, struct inode *inode);
static inline bool execute_ok(struct inode *inode) diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 3eba0e23c512..eafd1d62bf55 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -894,6 +894,87 @@ static int unix_release(struct socket *sock) return 0; }
+static struct sock *unix_find_bsd(struct net *net, struct sockaddr_un *sunaddr, + int type, int *error) +{ + struct inode *inode; + struct path path; + struct sock *sk; + int err; + + err = kern_path(sunaddr->sun_path, LOOKUP_FOLLOW, &path); + if (err) + goto fail; + + err = path_permission(&path, MAY_WRITE); + if (err) + goto path_put; + + err = -ECONNREFUSED; + inode = d_backing_inode(path.dentry); + if (!S_ISSOCK(inode->i_mode)) + goto path_put; + + sk = unix_find_socket_byinode(inode); + if (!sk) + goto path_put; + + err = -EPROTOTYPE; + if (sk->sk_type == type) + touch_atime(&path); + else + goto sock_put; + + path_put(&path); + + return sk; + +sock_put: + sock_put(sk); +path_put: + path_put(&path); +fail: + *error = err; + return NULL; +} + +static struct sock *unix_find_abstract(struct net *net, + struct sockaddr_un *sunaddr, + int addr_len, int type, + unsigned int hash, int *error) +{ + struct dentry *dentry; + struct sock *sk; + + sk = unix_find_socket_byname(net, sunaddr, addr_len, type ^ hash); + if (!sk) { + *error = -ECONNREFUSED; + return NULL; + } + + dentry = unix_sk(sk)->path.dentry; + if (dentry) + touch_atime(&unix_sk(sk)->path); + + return sk; +} + +static struct sock *unix_find_other(struct net *net, + struct sockaddr_un *sunaddr, + int addr_len, int type, + unsigned int hash, int *error) +{ + struct sock *sk; + + if (sunaddr->sun_path[0]) + sk = unix_find_bsd(net, sunaddr, type, error); + else + sk = unix_find_abstract(net, sunaddr, addr_len, type, hash, + error); + + return sk; +} + static int unix_autobind(struct sock *sk) { struct unix_sock *u = unix_sk(sk); @@ -952,61 +1033,6 @@ out: mutex_unlock(&u->bindlock); return err; }
-static struct sock *unix_find_other(struct net *net, - struct sockaddr_un *sunname, int len, - int type, unsigned int hash, int *error) -{ - struct sock *u; - struct path path; - int err = 0; - - if (sunname->sun_path[0]) { - struct inode *inode; - err = kern_path(sunname->sun_path, LOOKUP_FOLLOW, &path); - if (err) - goto fail; - inode = d_backing_inode(path.dentry); - err = inode_permission(inode, MAY_WRITE); - if (err) - goto put_fail; - - err = -ECONNREFUSED; - if (!S_ISSOCK(inode->i_mode)) - goto put_fail; - u = unix_find_socket_byinode(inode); - if (!u) - goto put_fail; - - if (u->sk_type == type) - touch_atime(&path); - - path_put(&path); - - err = -EPROTOTYPE; - if (u->sk_type != type) { - sock_put(u); - goto fail; - } - } else { - err = -ECONNREFUSED; - u = unix_find_socket_byname(net, sunname, len, type ^ hash); - if (u) { - struct dentry *dentry; - dentry = unix_sk(u)->path.dentry; - if (dentry) - touch_atime(&unix_sk(u)->path); - } else - goto fail; - } - return u; - -put_fail: - path_put(&path); -fail: - *error = err; - return NULL; -} - static int unix_bind_bsd(struct sock *sk, struct unix_address *addr) { struct unix_sock *u = unix_sk(sk);
From: Kuniyuki Iwashima kuniyu@amazon.co.jp
mainline inclusion from mainline-v5.17-rc1 commit aed26f557bbc94f0c778f63d7dfe86af99208f68 category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I4OM1C CVE: NA
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?i...
--------------------------------
We can return an error as a pointer and need not pass an additional argument to unix_find_other().
Signed-off-by: Kuniyuki Iwashima kuniyu@amazon.co.jp Signed-off-by: Jakub Kicinski kuba@kernel.org Signed-off-by: Baisong Zhong zhongbaisong@huawei.com Reviewed-by: Yue Haibing yuehaibing@huawei.com Reviewed-by: Wei Yongjun weiyongjun1@huawei.com Signed-off-by: Zheng Zengkai zhengzengkai@huawei.com --- net/unix/af_unix.c | 40 ++++++++++++++++++++++------------------ 1 file changed, 22 insertions(+), 18 deletions(-)
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index eafd1d62bf55..e78477a4fa4b 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -895,7 +895,7 @@ static int unix_release(struct socket *sock) }
static struct sock *unix_find_bsd(struct net *net, struct sockaddr_un *sunaddr, - int type, int *error) + int type) { struct inode *inode; struct path path; @@ -934,23 +934,20 @@ static struct sock *unix_find_bsd(struct net *net, struct sockaddr_un *sunaddr, path_put: path_put(&path); fail: - *error = err; - return NULL; + return ERR_PTR(err); }
static struct sock *unix_find_abstract(struct net *net, struct sockaddr_un *sunaddr, int addr_len, int type, - unsigned int hash, int *error) + unsigned int hash) { struct dentry *dentry; struct sock *sk;
sk = unix_find_socket_byname(net, sunaddr, addr_len, type ^ hash); - if (!sk) { - *error = -ECONNREFUSED; - return NULL; - } + if (!sk) + return ERR_PTR(-ECONNREFUSED);
dentry = unix_sk(sk)->path.dentry; if (dentry) @@ -962,15 +959,14 @@ static struct sock *unix_find_abstract(struct net *net, static struct sock *unix_find_other(struct net *net, struct sockaddr_un *sunaddr, int addr_len, int type, - unsigned int hash, int *error) + unsigned int hash) { struct sock *sk;
if (sunaddr->sun_path[0]) - sk = unix_find_bsd(net, sunaddr, type, error); + sk = unix_find_bsd(net, sunaddr, type); else - sk = unix_find_abstract(net, sunaddr, addr_len, type, hash, - error); + sk = unix_find_abstract(net, sunaddr, addr_len, type, hash);
return sk; } @@ -1206,9 +1202,11 @@ static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr, }
restart: - other = unix_find_other(net, sunaddr, alen, sock->type, hash, &err); - if (!other) + other = unix_find_other(net, sunaddr, alen, sock->type, hash); + if (IS_ERR(other)) { + err = PTR_ERR(other); goto out; + }
unix_state_double_lock(sk, other);
@@ -1330,9 +1328,12 @@ static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr,
restart: /* Find listening sock. */ - other = unix_find_other(net, sunaddr, addr_len, sk->sk_type, hash, &err); - if (!other) + other = unix_find_other(net, sunaddr, addr_len, sk->sk_type, hash); + if (IS_ERR(other)) { + err = PTR_ERR(other); + other = NULL; goto out; + }
/* Latch state of peer */ unix_state_lock(other); @@ -1803,9 +1804,12 @@ static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg, goto out_free;
other = unix_find_other(net, sunaddr, namelen, sk->sk_type, - hash, &err); - if (other == NULL) + hash); + if (IS_ERR(other)) { + err = PTR_ERR(other); + other = NULL; goto out_free; + } }
if (sk_filter(other, skb) < 0) {
From: Kuniyuki Iwashima kuniyu@amazon.co.jp
mainline inclusion from mainline-v5.17-rc1 commit b8a58aa6fccc5b2940f0da18c7f02e8a1deb693a category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I4OM1C CVE: NA
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?i...
--------------------------------
unix_mkname() tests socket address length and family and does some processing based on the address type. It is called in the early stage, and therefore some instructions are redundant and can end up in vain.
The address length/family tests are done twice in unix_bind(). Also, the address type is rechecked later in unix_bind() and unix_find_other(), where we can do the same processing. Moreover, in the BSD address case, the hash is set to 0 but never used and confusing.
This patch moves the address tests out of unix_mkname(), and the following patches move the other part into appropriate places and remove unix_mkname() finally.
Signed-off-by: Kuniyuki Iwashima kuniyu@amazon.co.jp Signed-off-by: Jakub Kicinski kuba@kernel.org Signed-off-by: Baisong Zhong zhongbaisong@huawei.com Reviewed-by: Yue Haibing yuehaibing@huawei.com Reviewed-by: Wei Yongjun weiyongjun1@huawei.com Signed-off-by: Zheng Zengkai zhengzengkai@huawei.com --- net/unix/af_unix.c | 40 ++++++++++++++++++++++++++++++---------- 1 file changed, 30 insertions(+), 10 deletions(-)
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index e78477a4fa4b..ef8b4fcd52fb 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -226,15 +226,22 @@ static inline void unix_release_addr(struct unix_address *addr) * - if started by zero, it is abstract name. */
+static int unix_validate_addr(struct sockaddr_un *sunaddr, int addr_len) +{ + if (addr_len <= offsetof(struct sockaddr_un, sun_path) || + addr_len > sizeof(*sunaddr)) + return -EINVAL; + + if (sunaddr->sun_family != AF_UNIX) + return -EINVAL; + + return 0; +} + static int unix_mkname(struct sockaddr_un *sunaddr, int len, unsigned int *hashp) { *hashp = 0;
- if (len <= offsetof(struct sockaddr_un, sun_path) || - len > sizeof(*sunaddr)) - return -EINVAL; - if (!sunaddr || sunaddr->sun_family != AF_UNIX) - return -EINVAL; if (sunaddr->sun_path[0]) { /* * This may look like an off by one error but it is a bit more @@ -1120,13 +1127,14 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) unsigned int hash; struct unix_address *addr;
- if (addr_len < offsetofend(struct sockaddr_un, sun_family) || - sunaddr->sun_family != AF_UNIX) - return -EINVAL; - - if (addr_len == offsetof(struct sockaddr_un, sun_path)) + if (addr_len == offsetof(struct sockaddr_un, sun_path) && + sunaddr->sun_family == AF_UNIX) return unix_autobind(sk);
+ err = unix_validate_addr(sunaddr, addr_len); + if (err) + return err; + err = unix_mkname(sunaddr, addr_len, &hash); if (err < 0) return err; @@ -1189,6 +1197,10 @@ static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr, goto out;
if (addr->sa_family != AF_UNSPEC) { + err = unix_validate_addr(sunaddr, alen); + if (err) + goto out; + err = unix_mkname(sunaddr, alen, &hash); if (err < 0) goto out; @@ -1296,6 +1308,10 @@ static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr, int err; long timeo;
+ err = unix_validate_addr(sunaddr, addr_len); + if (err) + goto out; + err = unix_mkname(sunaddr, addr_len, &hash); if (err < 0) goto out; @@ -1747,6 +1763,10 @@ static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg, goto out;
if (msg->msg_namelen) { + err = unix_validate_addr(sunaddr, msg->msg_namelen); + if (err) + goto out; + err = unix_mkname(sunaddr, msg->msg_namelen, &hash); if (err < 0) goto out;
From: Kuniyuki Iwashima kuniyu@amazon.co.jp
mainline inclusion from mainline-v5.17-rc1 commit d2d8c9fddb1c11ccfa73bf0ad2b1e6b4ea7afdaf category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I4OM1C CVE: NA
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?i...
--------------------------------
We should not call unix_mkname() before unix_find_other() and instead do the same thing where necessary based on the address type:
- terminating the address with '\0' in unix_find_bsd() - calculating the hash in unix_find_abstract().
Signed-off-by: Kuniyuki Iwashima kuniyu@amazon.co.jp Signed-off-by: Jakub Kicinski kuba@kernel.org Signed-off-by: Baisong Zhong zhongbaisong@huawei.com Reviewed-by: Yue Haibing yuehaibing@huawei.com Reviewed-by: Wei Yongjun weiyongjun1@huawei.com Signed-off-by: Zheng Zengkai zhengzengkai@huawei.com --- net/unix/af_unix.c | 63 ++++++++++++++++++---------------------------- 1 file changed, 25 insertions(+), 38 deletions(-)
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index ef8b4fcd52fb..333b76fd830c 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -238,19 +238,25 @@ static int unix_validate_addr(struct sockaddr_un *sunaddr, int addr_len) return 0; }
+static void unix_mkname_bsd(struct sockaddr_un *sunaddr, int addr_len) +{ + /* This may look like an off by one error but it is a bit more + * subtle. 108 is the longest valid AF_UNIX path for a binding. + * sun_path[108] doesn't as such exist. However in kernel space + * we are guaranteed that it is a valid memory location in our + * kernel address buffer because syscall functions always pass + * a pointer of struct sockaddr_storage which has a bigger buffer + * than 108. + */ + ((char *)sunaddr)[addr_len] = 0; +} + static int unix_mkname(struct sockaddr_un *sunaddr, int len, unsigned int *hashp) { *hashp = 0;
if (sunaddr->sun_path[0]) { - /* - * This may look like an off by one error but it is a bit more - * subtle. 108 is the longest valid AF_UNIX path for a binding. - * sun_path[108] doesn't as such exist. However in kernel space - * we are guaranteed that it is a valid memory location in our - * kernel address buffer. - */ - ((char *)sunaddr)[len] = 0; + unix_mkname_bsd(sunaddr, len); len = strlen(sunaddr->sun_path) + offsetof(struct sockaddr_un, sun_path) + 1; return len; @@ -902,13 +908,14 @@ static int unix_release(struct socket *sock) }
static struct sock *unix_find_bsd(struct net *net, struct sockaddr_un *sunaddr, - int type) + int addr_len, int type) { struct inode *inode; struct path path; struct sock *sk; int err;
+ unix_mkname_bsd(sunaddr, addr_len); err = kern_path(sunaddr->sun_path, LOOKUP_FOLLOW, &path); if (err) goto fail; @@ -946,9 +953,9 @@ static struct sock *unix_find_bsd(struct net *net, struct sockaddr_un *sunaddr,
static struct sock *unix_find_abstract(struct net *net, struct sockaddr_un *sunaddr, - int addr_len, int type, - unsigned int hash) + int addr_len, int type) { + unsigned int hash = unix_hash_fold(csum_partial(sunaddr, addr_len, 0)); struct dentry *dentry; struct sock *sk;
@@ -965,15 +972,14 @@ static struct sock *unix_find_abstract(struct net *net,
static struct sock *unix_find_other(struct net *net, struct sockaddr_un *sunaddr, - int addr_len, int type, - unsigned int hash) + int addr_len, int type) { struct sock *sk;
if (sunaddr->sun_path[0]) - sk = unix_find_bsd(net, sunaddr, type); + sk = unix_find_bsd(net, sunaddr, addr_len, type); else - sk = unix_find_abstract(net, sunaddr, addr_len, type, hash); + sk = unix_find_abstract(net, sunaddr, addr_len, type);
return sk; } @@ -1189,7 +1195,6 @@ static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr, struct net *net = sock_net(sk); struct sockaddr_un *sunaddr = (struct sockaddr_un *)addr; struct sock *other; - unsigned int hash; int err;
err = -EINVAL; @@ -1201,11 +1206,6 @@ static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr, if (err) goto out;
- err = unix_mkname(sunaddr, alen, &hash); - if (err < 0) - goto out; - alen = err; - if (test_bit(SOCK_PASSCRED, &sock->flags) && !unix_sk(sk)->addr) { err = unix_autobind(sk); @@ -1214,7 +1214,7 @@ static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr, }
restart: - other = unix_find_other(net, sunaddr, alen, sock->type, hash); + other = unix_find_other(net, sunaddr, alen, sock->type); if (IS_ERR(other)) { err = PTR_ERR(other); goto out; @@ -1303,7 +1303,6 @@ static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr, struct sock *newsk = NULL; struct sock *other = NULL; struct sk_buff *skb = NULL; - unsigned int hash; int st; int err; long timeo; @@ -1312,11 +1311,6 @@ static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr, if (err) goto out;
- err = unix_mkname(sunaddr, addr_len, &hash); - if (err < 0) - goto out; - addr_len = err; - if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr) { err = unix_autobind(sk); if (err) @@ -1344,7 +1338,7 @@ static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr,
restart: /* Find listening sock. */ - other = unix_find_other(net, sunaddr, addr_len, sk->sk_type, hash); + other = unix_find_other(net, sunaddr, addr_len, sk->sk_type); if (IS_ERR(other)) { err = PTR_ERR(other); other = NULL; @@ -1744,9 +1738,7 @@ static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg, struct unix_sock *u = unix_sk(sk); DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, msg->msg_name); struct sock *other = NULL; - int namelen = 0; /* fake GCC */ int err; - unsigned int hash; struct sk_buff *skb; long timeo; struct scm_cookie scm; @@ -1766,11 +1758,6 @@ static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg, err = unix_validate_addr(sunaddr, msg->msg_namelen); if (err) goto out; - - err = unix_mkname(sunaddr, msg->msg_namelen, &hash); - if (err < 0) - goto out; - namelen = err; } else { sunaddr = NULL; err = -ENOTCONN; @@ -1823,8 +1810,8 @@ static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg, if (sunaddr == NULL) goto out_free;
- other = unix_find_other(net, sunaddr, namelen, sk->sk_type, - hash); + other = unix_find_other(net, sunaddr, msg->msg_namelen, + sk->sk_type); if (IS_ERR(other)) { err = PTR_ERR(other); other = NULL;
From: Kuniyuki Iwashima kuniyu@amazon.co.jp
mainline inclusion from mainline-v5.17-rc1 commit 5c32a3ed64b4c87ed6d9978074db5f0a54c4cd20 category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I4OM1C CVE: NA
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?i...
--------------------------------
This patch removes unix_mkname() and postpones calculating a hash to unix_bind_abstract(). Some BSD stuffs still remain in unix_bind() though, the next patch packs them into unix_bind_bsd().
Signed-off-by: Kuniyuki Iwashima kuniyu@amazon.co.jp Signed-off-by: Jakub Kicinski kuba@kernel.org Signed-off-by: Baisong Zhong zhongbaisong@huawei.com Reviewed-by: Yue Haibing yuehaibing@huawei.com Reviewed-by: Wei Yongjun weiyongjun1@huawei.com Signed-off-by: Zheng Zengkai zhengzengkai@huawei.com --- net/unix/af_unix.c | 34 +++++++++++----------------------- 1 file changed, 11 insertions(+), 23 deletions(-)
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 333b76fd830c..9f1273fab014 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -251,21 +251,6 @@ static void unix_mkname_bsd(struct sockaddr_un *sunaddr, int addr_len) ((char *)sunaddr)[addr_len] = 0; }
-static int unix_mkname(struct sockaddr_un *sunaddr, int len, unsigned int *hashp) -{ - *hashp = 0; - - if (sunaddr->sun_path[0]) { - unix_mkname_bsd(sunaddr, len); - len = strlen(sunaddr->sun_path) + - offsetof(struct sockaddr_un, sun_path) + 1; - return len; - } - - *hashp = unix_hash_fold(csum_partial(sunaddr, len, 0)); - return len; -} - static void __unix_remove_socket(struct sock *sk) { sk_del_node_init(sk); @@ -1111,6 +1096,9 @@ static int unix_bind_abstract(struct sock *sk, struct unix_address *addr) return -EINVAL; }
+ addr->hash = unix_hash_fold(csum_partial(addr->name, addr->len, 0)); + addr->hash ^= sk->sk_type; + spin_lock(&unix_table_lock); if (__unix_find_socket_byname(sock_net(sk), addr->name, addr->len, addr->hash)) { @@ -1126,12 +1114,11 @@ static int unix_bind_abstract(struct sock *sk, struct unix_address *addr)
static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) { - struct sock *sk = sock->sk; struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr; char *sun_path = sunaddr->sun_path; - int err; - unsigned int hash; + struct sock *sk = sock->sk; struct unix_address *addr; + int err;
if (addr_len == offsetof(struct sockaddr_un, sun_path) && sunaddr->sun_family == AF_UNIX) @@ -1141,17 +1128,18 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) if (err) return err;
- err = unix_mkname(sunaddr, addr_len, &hash); - if (err < 0) - return err; - addr_len = err; + if (sun_path[0]) { + unix_mkname_bsd(sunaddr, addr_len); + addr_len = strlen(sunaddr->sun_path) + + offsetof(struct sockaddr_un, sun_path) + 1; + } + addr = kmalloc(sizeof(*addr)+addr_len, GFP_KERNEL); if (!addr) return -ENOMEM;
memcpy(addr->name, sunaddr, addr_len); addr->len = addr_len; - addr->hash = hash ^ sk->sk_type; refcount_set(&addr->refcnt, 1);
if (sun_path[0])
From: Al Viro viro@zeniv.linux.org.uk
mainline inclusion from mainline-v5.14-rc1 commit 56c1731b280dc71febf5df80fcac1923ea973ab8 category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I4OM1C CVE: NA
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?i...
--------------------------------
Final preparations for doing unlink on failure past the successful mknod. We can't hold ->bindlock over ->mknod() or ->unlink(), since either might do sb_start_write() (e.g. on overlayfs). However, we can do it while holding filesystem and VFS locks - doing kern_path_create() vfs_mknod() grab ->bindlock if u->addr had been set drop ->bindlock done_path_create return -EINVAL else assign the address to socket drop ->bindlock done_path_create return 0 would be deadlock-free. Here we massage unix_bind_bsd() to that form. We are still doing equivalent transformations.
Next commit will *not* be an equivalent transformation - it will add a call of vfs_unlink() before done_path_create() in "alread bound" case.
Signed-off-by: Al Viro viro@zeniv.linux.org.uk Signed-off-by: David S. Miller davem@davemloft.net
Conflicts: net/unix/af_unix.c
Signed-off-by: Baisong Zhong zhongbaisong@huawei.com Reviewed-by: Yue Haibing yuehaibing@huawei.com Reviewed-by: Wei Yongjun weiyongjun1@huawei.com Signed-off-by: Zheng Zengkai zhengzengkai@huawei.com --- net/unix/af_unix.c | 26 +++++++++++--------------- 1 file changed, 11 insertions(+), 15 deletions(-)
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 9f1273fab014..fbb7dcaf7d85 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -1032,7 +1032,7 @@ static int unix_bind_bsd(struct sock *sk, struct unix_address *addr) struct unix_sock *u = unix_sk(sk); umode_t mode = S_IFSOCK | (SOCK_INODE(sk->sk_socket)->i_mode & ~current_umask()); - struct path parent, path; + struct path parent; struct dentry *dentry; unsigned int hash; int err; @@ -1049,36 +1049,32 @@ static int unix_bind_bsd(struct sock *sk, struct unix_address *addr) * All right, let's create it. */ err = security_path_mknod(&parent, dentry, mode, 0); - if (!err) { + if (!err) err = vfs_mknod(d_inode(parent.dentry), dentry, mode, 0); - if (!err) { - path.mnt = mntget(parent.mnt); - path.dentry = dget(dentry); - } - } - done_path_create(&parent, dentry); - if (err) + if (err) { + done_path_create(&parent, dentry); return err; - + } err = mutex_lock_interruptible(&u->bindlock); if (err) { - path_put(&path); + done_path_create(&parent, dentry); return err; } - if (u->addr) { mutex_unlock(&u->bindlock); - path_put(&path); + done_path_create(&parent, dentry); return -EINVAL; }
addr->hash = UNIX_HASH_SIZE; - hash = d_backing_inode(path.dentry)->i_ino & (UNIX_HASH_SIZE - 1); + hash = d_backing_inode(dentry)->i_ino & (UNIX_HASH_SIZE - 1); spin_lock(&unix_table_lock); - u->path = path; + u->path.mnt = mntget(parent.mnt); + u->path.dentry = dget(dentry); __unix_set_addr(sk, addr, hash); spin_unlock(&unix_table_lock); mutex_unlock(&u->bindlock); + done_path_create(&parent, dentry); return 0; }
From: Al Viro viro@zeniv.linux.org.uk
mainline inclusion from mainline-v5.14-rc1 commit c0c3b8d380a8f54c75786d41f6f9efbe761dac6c category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I4OM1C CVE: NA
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?i...
--------------------------------
We can do that more or less safely, since the parent is held locked all along. Yes, somebody might observe the object via dcache, only to have it disappear afterwards, but there's really no good way to prevent that. It won't race with other bind(2) or attempts to move the sucker elsewhere, or put something else in its place - locked parent prevents that.
Signed-off-by: Al Viro viro@zeniv.linux.org.uk Signed-off-by: David S. Miller davem@davemloft.net
Conflicts: net/unix/af_unix.c
Signed-off-by: Baisong Zhong zhongbaisong@huawei.com Reviewed-by: Yue Haibing yuehaibing@huawei.com Reviewed-by: Wei Yongjun weiyongjun1@huawei.com Signed-off-by: Zheng Zengkai zhengzengkai@huawei.com --- net/unix/af_unix.c | 29 ++++++++++++++++------------- 1 file changed, 16 insertions(+), 13 deletions(-)
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index fbb7dcaf7d85..a443b8862c4c 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -1051,20 +1051,13 @@ static int unix_bind_bsd(struct sock *sk, struct unix_address *addr) err = security_path_mknod(&parent, dentry, mode, 0); if (!err) err = vfs_mknod(d_inode(parent.dentry), dentry, mode, 0); - if (err) { - done_path_create(&parent, dentry); - return err; - } + if (err) + goto out; err = mutex_lock_interruptible(&u->bindlock); - if (err) { - done_path_create(&parent, dentry); - return err; - } - if (u->addr) { - mutex_unlock(&u->bindlock); - done_path_create(&parent, dentry); - return -EINVAL; - } + if (err) + goto out_unlink; + if (u->addr) + goto out_unlock;
addr->hash = UNIX_HASH_SIZE; hash = d_backing_inode(dentry)->i_ino & (UNIX_HASH_SIZE - 1); @@ -1076,6 +1069,16 @@ static int unix_bind_bsd(struct sock *sk, struct unix_address *addr) mutex_unlock(&u->bindlock); done_path_create(&parent, dentry); return 0; + +out_unlock: + mutex_unlock(&u->bindlock); + err = -EINVAL; +out_unlink: + /* failed after successful mknod? unlink what we'd created... */ + vfs_unlink(d_inode(parent.dentry), dentry, NULL); +out: + done_path_create(&parent, dentry); + return err; }
static int unix_bind_abstract(struct sock *sk, struct unix_address *addr)
From: Kuniyuki Iwashima kuniyu@amazon.co.jp
mainline inclusion from mainline-v5.17-rc1 commit 12f21c49ad83eba93d0485b8c9edcc28201bee93 category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I4OM1C CVE: NA
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?i...
--------------------------------
To terminate address with '\0' in unix_bind_bsd(), we add unix_create_addr() and call it in unix_bind_bsd() and unix_bind_abstract().
Also, unix_bind_abstract() does not return -EEXIST. Only kern_path_create() and vfs_mknod() in unix_bind_bsd() can return it, so we move the last error check in unix_bind() to unix_bind_bsd().
Signed-off-by: Kuniyuki Iwashima kuniyu@amazon.co.jp Signed-off-by: Jakub Kicinski kuba@kernel.org
Conflicts: net/unix/af_unix.c
Signed-off-by: Baisong Zhong zhongbaisong@huawei.com Reviewed-by: Yue Haibing yuehaibing@huawei.com Reviewed-by: Wei Yongjun weiyongjun1@huawei.com Signed-off-by: Zheng Zengkai zhengzengkai@huawei.com --- net/unix/af_unix.c | 105 ++++++++++++++++++++++++++++----------------- 1 file changed, 66 insertions(+), 39 deletions(-)
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index a443b8862c4c..4a25742ed4be 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -213,6 +213,22 @@ struct sock *unix_peer_get(struct sock *s) } EXPORT_SYMBOL_GPL(unix_peer_get);
+static struct unix_address *unix_create_addr(struct sockaddr_un *sunaddr, + int addr_len) +{ + struct unix_address *addr; + + addr = kmalloc(sizeof(*addr) + addr_len, GFP_KERNEL); + if (!addr) + return NULL; + + refcount_set(&addr->refcnt, 1); + addr->len = addr_len; + memcpy(addr->name, sunaddr, addr_len); + + return addr; +} + static inline void unix_release_addr(struct unix_address *addr) { if (refcount_dec_and_test(&addr->refcnt)) @@ -1027,23 +1043,35 @@ out: mutex_unlock(&u->bindlock); return err; }
-static int unix_bind_bsd(struct sock *sk, struct unix_address *addr) +static int unix_bind_bsd(struct sock *sk, struct sockaddr_un *sunaddr, + int addr_len) { - struct unix_sock *u = unix_sk(sk); umode_t mode = S_IFSOCK | (SOCK_INODE(sk->sk_socket)->i_mode & ~current_umask()); - struct path parent; + struct unix_sock *u = unix_sk(sk); + struct unix_address *addr; struct dentry *dentry; + struct path parent; unsigned int hash; int err;
+ unix_mkname_bsd(sunaddr, addr_len); + addr_len = strlen(sunaddr->sun_path) + + offsetof(struct sockaddr_un, sun_path) + 1; + + addr = unix_create_addr(sunaddr, addr_len); + if (!addr) + return -ENOMEM; + /* * Get the parent directory, calculate the hash for last * component. */ dentry = kern_path_create(AT_FDCWD, addr->name->sun_path, &parent, 0); - if (IS_ERR(dentry)) - return PTR_ERR(dentry); + if (IS_ERR(dentry)) { + err = PTR_ERR(dentry); + goto out; + }
/* * All right, let's create it. @@ -1052,7 +1080,7 @@ static int unix_bind_bsd(struct sock *sk, struct unix_address *addr) if (!err) err = vfs_mknod(d_inode(parent.dentry), dentry, mode, 0); if (err) - goto out; + goto out_path; err = mutex_lock_interruptible(&u->bindlock); if (err) goto out_unlink; @@ -1076,47 +1104,61 @@ static int unix_bind_bsd(struct sock *sk, struct unix_address *addr) out_unlink: /* failed after successful mknod? unlink what we'd created... */ vfs_unlink(d_inode(parent.dentry), dentry, NULL); -out: +out_path: done_path_create(&parent, dentry); - return err; +out: + unix_release_addr(addr); + return err == -EEXIST ? -EADDRINUSE : err; }
-static int unix_bind_abstract(struct sock *sk, struct unix_address *addr) +static int unix_bind_abstract(struct sock *sk, struct sockaddr_un *sunaddr, + int addr_len) { struct unix_sock *u = unix_sk(sk); + struct unix_address *addr; int err;
+ addr = unix_create_addr(sunaddr, addr_len); + if (!addr) + return -ENOMEM; + err = mutex_lock_interruptible(&u->bindlock); if (err) - return err; + goto out;
if (u->addr) { - mutex_unlock(&u->bindlock); - return -EINVAL; + err = -EINVAL; + goto out_mutex; }
addr->hash = unix_hash_fold(csum_partial(addr->name, addr->len, 0)); addr->hash ^= sk->sk_type;
spin_lock(&unix_table_lock); + if (__unix_find_socket_byname(sock_net(sk), addr->name, addr->len, - addr->hash)) { - spin_unlock(&unix_table_lock); - mutex_unlock(&u->bindlock); - return -EADDRINUSE; - } + addr->hash)) + goto out_spin; + __unix_set_addr(sk, addr, addr->hash); spin_unlock(&unix_table_lock); mutex_unlock(&u->bindlock); return 0; + +out_spin: + spin_unlock(&unix_table_lock); + err = -EADDRINUSE; +out_mutex: + mutex_unlock(&u->bindlock); +out: + unix_release_addr(addr); + return err; }
static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) { struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr; - char *sun_path = sunaddr->sun_path; struct sock *sk = sock->sk; - struct unix_address *addr; int err;
if (addr_len == offsetof(struct sockaddr_un, sun_path) && @@ -1127,27 +1169,12 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) if (err) return err;
- if (sun_path[0]) { - unix_mkname_bsd(sunaddr, addr_len); - addr_len = strlen(sunaddr->sun_path) + - offsetof(struct sockaddr_un, sun_path) + 1; - } - - addr = kmalloc(sizeof(*addr)+addr_len, GFP_KERNEL); - if (!addr) - return -ENOMEM; - - memcpy(addr->name, sunaddr, addr_len); - addr->len = addr_len; - refcount_set(&addr->refcnt, 1); - - if (sun_path[0]) - err = unix_bind_bsd(sk, addr); + if (sunaddr->sun_path[0]) + err = unix_bind_bsd(sk, sunaddr, addr_len); else - err = unix_bind_abstract(sk, addr); - if (err) - unix_release_addr(addr); - return err == -EEXIST ? -EADDRINUSE : err; + err = unix_bind_abstract(sk, sunaddr, addr_len); + + return err; }
static void unix_state_double_lock(struct sock *sk1, struct sock *sk2)
From: Kuniyuki Iwashima kuniyu@amazon.co.jp
mainline inclusion from mainline-v5.17-rc1 commit 5ce7ab4961a9320ca0836e06849210d088723a56 category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I4OM1C CVE: NA
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?i...
--------------------------------
In BSD and abstract address cases, we store sockets in the hash table with keys between 0 and UNIX_HASH_SIZE - 1. However, the hash saved in a socket varies depending on its address type; sockets with BSD addresses always have UNIX_HASH_SIZE in their unix_sk(sk)->addr->hash.
This is just for the UNIX_ABSTRACT() macro used to check the address type. The difference of the saved hashes comes from the first byte of the address in the first place. So, we can test it directly.
Then we can keep a real hash in each socket and replace unix_table_lock with per-hash locks in the later patch.
Signed-off-by: Kuniyuki Iwashima kuniyu@amazon.co.jp Signed-off-by: Jakub Kicinski kuba@kernel.org
Conflicts: tools/testing/selftests/bpf/progs/bpf_iter_unix.c tools/testing/selftests/bpf/progs/bpf_tracing_net.h tools/testing/selftests/bpf/progs/test_skc_to_unix_sock.c
Signed-off-by: Baisong Zhong zhongbaisong@huawei.com Reviewed-by: Yue Haibing yuehaibing@huawei.com Reviewed-by: Wei Yongjun weiyongjun1@huawei.com Signed-off-by: Zheng Zengkai zhengzengkai@huawei.com --- net/unix/af_unix.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-)
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 4a25742ed4be..e66551d6a43d 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -133,8 +133,6 @@ static struct hlist_head *unix_sockets_unbound(void *addr) return &unix_socket_table[UNIX_HASH_SIZE + hash]; }
-#define UNIX_ABSTRACT(sk) (unix_sk(sk)->addr->hash < UNIX_HASH_SIZE) - #ifdef CONFIG_SECURITY_NETWORK static void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb) { @@ -2978,9 +2976,9 @@ static int unix_seq_show(struct seq_file *seq, void *v) i = 0; len = u->addr->len - offsetof(struct sockaddr_un, sun_path); - if (!UNIX_ABSTRACT(s)) + if (u->addr->name->sun_path[0]) { len--; - else { + } else { seq_putc(seq, '@'); i++; }
From: Kuniyuki Iwashima kuniyu@amazon.co.jp
mainline inclusion from mainline-v5.15-rc4 commit f4bd73b5a950866f6c6fc98a7b684d307c5d586a category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I4OM1C CVE: NA
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?i...
--------------------------------
unix_create1() returns NULL on error, and the callers assume that it never fails for reasons other than out of memory. So, the callers always return -ENOMEM when unix_create1() fails.
However, it also returns NULL when the number of af_unix sockets exceeds twice the limit controlled by sysctl: fs.file-max. In this case, the callers should return -ENFILE like alloc_empty_file().
This patch changes unix_create1() to return the correct error value instead of NULL on error.
Out of curiosity, the assumption has been wrong since 1999 due to this change introduced in 2.2.4 [0].
diff -u --recursive --new-file v2.2.3/linux/net/unix/af_unix.c linux/net/unix/af_unix.c --- v2.2.3/linux/net/unix/af_unix.c Tue Jan 19 11:32:53 1999 +++ linux/net/unix/af_unix.c Sun Mar 21 07:22:00 1999 @@ -388,6 +413,9 @@ { struct sock *sk;
+ if (atomic_read(&unix_nr_socks) >= 2*max_files) + return NULL; + MOD_INC_USE_COUNT; sk = sk_alloc(PF_UNIX, GFP_KERNEL, 1); if (!sk) {
[0]: https://cdn.kernel.org/pub/linux/kernel/v2.2/patch-2.2.4.gz
Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Kuniyuki Iwashima kuniyu@amazon.co.jp Signed-off-by: David S. Miller davem@davemloft.net
Conflicts: net/unix/af_unix.c
Signed-off-by: Baisong Zhong zhongbaisong@huawei.com Reviewed-by: Yue Haibing yuehaibing@huawei.com Reviewed-by: Wei Yongjun weiyongjun1@huawei.com Signed-off-by: Zheng Zengkai zhengzengkai@huawei.com --- net/unix/af_unix.c | 50 ++++++++++++++++++++++++++++++---------------- 1 file changed, 33 insertions(+), 17 deletions(-)
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index e66551d6a43d..9589e54bd3c9 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -821,16 +821,22 @@ static struct proto unix_proto = {
static struct sock *unix_create1(struct net *net, struct socket *sock, int kern) { - struct sock *sk = NULL; struct unix_sock *u; + struct sock *sk; + int err;
atomic_long_inc(&unix_nr_socks); - if (atomic_long_read(&unix_nr_socks) > 2 * get_max_files()) - goto out; + if (atomic_long_read(&unix_nr_socks) > 2 * get_max_files()) { + err = -ENFILE; + goto err; + }
sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_proto, kern); - if (!sk) - goto out; + + if (!sk) { + err = -ENOMEM; + goto err; + }
sock_init_data(sock, sk);
@@ -850,20 +856,23 @@ static struct sock *unix_create1(struct net *net, struct socket *sock, int kern) init_waitqueue_func_entry(&u->peer_wake, unix_dgram_peer_wake_relay); memset(&u->scm_stat, 0, sizeof(struct scm_stat)); unix_insert_socket(unix_sockets_unbound(sk), sk); -out: - if (sk == NULL) - atomic_long_dec(&unix_nr_socks); - else { - local_bh_disable(); - sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); - local_bh_enable(); - } + + local_bh_disable(); + sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); + local_bh_enable(); + return sk; + +err: + atomic_long_dec(&unix_nr_socks); + return ERR_PTR(err); }
static int unix_create(struct net *net, struct socket *sock, int protocol, int kern) { + struct sock *sk; + if (protocol && protocol != PF_UNIX) return -EPROTONOSUPPORT;
@@ -890,7 +899,11 @@ static int unix_create(struct net *net, struct socket *sock, int protocol, return -ESOCKTNOSUPPORT; }
- return unix_create1(net, sock, kern) ? 0 : -ENOMEM; + sk = unix_create1(net, sock, kern); + if (IS_ERR(sk)) + return PTR_ERR(sk); + + return 0; }
static int unix_release(struct socket *sock) @@ -1336,12 +1349,15 @@ static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr, we will have to recheck all again in any case. */
- err = -ENOMEM; - /* create new sock for complete connection */ newsk = unix_create1(sock_net(sk), NULL, 0); - if (newsk == NULL) + if (IS_ERR(newsk)) { + err = PTR_ERR(newsk); + newsk = NULL; goto out; + } + + err = -ENOMEM;
/* Allocate skb for sending to listening sock */ skb = sock_wmalloc(newsk, 1, 0, GFP_KERNEL);
From: Kuniyuki Iwashima kuniyu@amazon.co.jp
mainline inclusion from mainline-v5.17-rc1 commit f452be496a5c8f58b1a67cde79e89b9f1cfde31c category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I4OM1C CVE: NA
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?i...
--------------------------------
This patch adds three helper functions that calculate hashes for unbound sockets and bound sockets with BSD/abstract addresses.
Signed-off-by: Kuniyuki Iwashima kuniyu@amazon.co.jp Signed-off-by: Jakub Kicinski kuba@kernel.org Signed-off-by: Baisong Zhong zhongbaisong@huawei.com Reviewed-by: Yue Haibing yuehaibing@huawei.com Reviewed-by: Wei Yongjun weiyongjun1@huawei.com Signed-off-by: Zheng Zengkai zhengzengkai@huawei.com --- net/unix/af_unix.c | 64 +++++++++++++++++++++++++--------------------- 1 file changed, 35 insertions(+), 29 deletions(-)
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 9589e54bd3c9..114b09d22b68 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -122,15 +122,38 @@ DEFINE_SPINLOCK(unix_table_lock); EXPORT_SYMBOL_GPL(unix_table_lock); static atomic_long_t unix_nr_socks;
+/* SMP locking strategy: + * hash table is protected with spinlock unix_table_lock + * each socket state is protected by separate spin lock. + */
-static struct hlist_head *unix_sockets_unbound(void *addr) +static unsigned int unix_unbound_hash(struct sock *sk) { - unsigned long hash = (unsigned long)addr; + unsigned long hash = (unsigned long)sk;
hash ^= hash >> 16; hash ^= hash >> 8; - hash %= UNIX_HASH_SIZE; - return &unix_socket_table[UNIX_HASH_SIZE + hash]; + hash ^= sk->sk_type; + + return UNIX_HASH_SIZE + (hash & (UNIX_HASH_SIZE - 1)); +} + +static unsigned int unix_bsd_hash(struct inode *i) +{ + return i->i_ino & (UNIX_HASH_SIZE - 1); +} + +static unsigned int unix_abstract_hash(struct sockaddr_un *sunaddr, + int addr_len, int type) +{ + __wsum csum = csum_partial(sunaddr, addr_len, 0); + unsigned int hash; + + hash = (__force unsigned int)csum_fold(csum); + hash ^= hash >> 8; + hash ^= type; + + return hash & (UNIX_HASH_SIZE - 1); }
#ifdef CONFIG_SECURITY_NETWORK @@ -161,20 +184,6 @@ static inline bool unix_secdata_eq(struct scm_cookie *scm, struct sk_buff *skb) } #endif /* CONFIG_SECURITY_NETWORK */
-/* - * SMP locking strategy: - * hash table is protected with spinlock unix_table_lock - * each socket state is protected by separate spin lock. - */ - -static inline unsigned int unix_hash_fold(__wsum n) -{ - unsigned int hash = (__force unsigned int)csum_fold(n); - - hash ^= hash>>8; - return hash&(UNIX_HASH_SIZE-1); -} - #define unix_peer(sk) (unix_sk(sk)->peer)
static inline int unix_our_peer(struct sock *sk, struct sock *osk) @@ -333,11 +342,11 @@ static inline struct sock *unix_find_socket_byname(struct net *net,
static struct sock *unix_find_socket_byinode(struct inode *i) { + unsigned int hash = unix_bsd_hash(i); struct sock *s;
spin_lock(&unix_table_lock); - sk_for_each(s, - &unix_socket_table[i->i_ino & (UNIX_HASH_SIZE - 1)]) { + sk_for_each(s, &unix_socket_table[hash]) { struct dentry *dentry = unix_sk(s)->path.dentry;
if (dentry && d_backing_inode(dentry) == i) { @@ -855,7 +864,7 @@ static struct sock *unix_create1(struct net *net, struct socket *sock, int kern) init_waitqueue_head(&u->peer_wait); init_waitqueue_func_entry(&u->peer_wake, unix_dgram_peer_wake_relay); memset(&u->scm_stat, 0, sizeof(struct scm_stat)); - unix_insert_socket(unix_sockets_unbound(sk), sk); + unix_insert_socket(&unix_socket_table[unix_unbound_hash(sk)], sk);
local_bh_disable(); sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); @@ -967,11 +976,11 @@ static struct sock *unix_find_abstract(struct net *net, struct sockaddr_un *sunaddr, int addr_len, int type) { - unsigned int hash = unix_hash_fold(csum_partial(sunaddr, addr_len, 0)); + unsigned int hash = unix_abstract_hash(sunaddr, addr_len, type); struct dentry *dentry; struct sock *sk;
- sk = unix_find_socket_byname(net, sunaddr, addr_len, type ^ hash); + sk = unix_find_socket_byname(net, sunaddr, addr_len, hash); if (!sk) return ERR_PTR(-ECONNREFUSED);
@@ -1023,8 +1032,7 @@ static int unix_autobind(struct sock *sk) retry: addr->len = sprintf(addr->name->sun_path + 1, "%05x", ordernum) + offsetof(struct sockaddr_un, sun_path) + 1; - addr->hash = unix_hash_fold(csum_partial(addr->name, addr->len, 0)); - addr->hash ^= sk->sk_type; + addr->hash = unix_abstract_hash(addr->name, addr->len, sk->sk_type);
spin_lock(&unix_table_lock); ordernum = (ordernum+1)&0xFFFFF; @@ -1099,7 +1107,7 @@ static int unix_bind_bsd(struct sock *sk, struct sockaddr_un *sunaddr, goto out_unlock;
addr->hash = UNIX_HASH_SIZE; - hash = d_backing_inode(dentry)->i_ino & (UNIX_HASH_SIZE - 1); + hash = unix_bsd_hash(d_backing_inode(dentry)); spin_lock(&unix_table_lock); u->path.mnt = mntget(parent.mnt); u->path.dentry = dget(dentry); @@ -1142,9 +1150,7 @@ static int unix_bind_abstract(struct sock *sk, struct sockaddr_un *sunaddr, goto out_mutex; }
- addr->hash = unix_hash_fold(csum_partial(addr->name, addr->len, 0)); - addr->hash ^= sk->sk_type; - + addr->hash = unix_abstract_hash(addr->name, addr->len, sk->sk_type); spin_lock(&unix_table_lock);
if (__unix_find_socket_byname(sock_net(sk), addr->name, addr->len,
From: Kuniyuki Iwashima kuniyu@amazon.co.jp
mainline inclusion from mainline-v5.17-rc1 commit e6b4b873896f0e9298f70d25726f4bb1e1b265ba category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I4OM1C CVE: NA
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?i...
--------------------------------
To replace unix_table_lock with per-hash locks in the next patch, we need to save a hash in each socket because /proc/net/unix or BPF prog iterate sockets while holding a hash table lock and release it later in a different function.
Currently, we store a real/pseudo hash in struct unix_address. However, we do not allocate it to unbound sockets, nor should we do just for that. For this purpose, we can use sk_hash. Then, we no longer use the hash field in struct unix_address and can remove it.
Also, this patch does - rename unix_insert_socket() to unix_insert_unbound_socket() - remove the redundant list argument from __unix_insert_socket() and unix_insert_unbound_socket() - use 'unsigned int' instead of 'unsigned' in __unix_set_addr_hash() - remove 'inline' from unix_remove_socket() and unix_insert_unbound_socket().
Signed-off-by: Kuniyuki Iwashima kuniyu@amazon.co.jp Signed-off-by: Jakub Kicinski kuba@kernel.org Signed-off-by: Baisong Zhong zhongbaisong@huawei.com Reviewed-by: Yue Haibing yuehaibing@huawei.com Reviewed-by: Wei Yongjun weiyongjun1@huawei.com Signed-off-by: Zheng Zengkai zhengzengkai@huawei.com --- include/net/af_unix.h | 2 +- net/unix/af_unix.c | 42 +++++++++++++++++++++++------------------- 2 files changed, 24 insertions(+), 20 deletions(-)
diff --git a/include/net/af_unix.h b/include/net/af_unix.h index f42fdddecd41..beb6ef56bf85 100644 --- a/include/net/af_unix.h +++ b/include/net/af_unix.h @@ -26,7 +26,7 @@ extern struct hlist_head unix_socket_table[2 * UNIX_HASH_SIZE]; struct unix_address { refcount_t refcnt; int len; - unsigned int hash; + unsigned int hash; /* deprecated */ struct sockaddr_un name[]; };
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 114b09d22b68..552f95c7d9e1 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -279,31 +279,33 @@ static void __unix_remove_socket(struct sock *sk) sk_del_node_init(sk); }
-static void __unix_insert_socket(struct hlist_head *list, struct sock *sk) +static void __unix_insert_socket(struct sock *sk) { WARN_ON(!sk_unhashed(sk)); - sk_add_node(sk, list); + sk_add_node(sk, &unix_socket_table[sk->sk_hash]); }
-static void __unix_set_addr(struct sock *sk, struct unix_address *addr, - unsigned hash) +static void __unix_set_addr_hash(struct sock *sk, struct unix_address *addr, + unsigned int hash) { __unix_remove_socket(sk); smp_store_release(&unix_sk(sk)->addr, addr); - __unix_insert_socket(&unix_socket_table[hash], sk); + + sk->sk_hash = hash; + __unix_insert_socket(sk); }
-static inline void unix_remove_socket(struct sock *sk) +static void unix_remove_socket(struct sock *sk) { spin_lock(&unix_table_lock); __unix_remove_socket(sk); spin_unlock(&unix_table_lock); }
-static inline void unix_insert_socket(struct hlist_head *list, struct sock *sk) +static void unix_insert_unbound_socket(struct sock *sk) { spin_lock(&unix_table_lock); - __unix_insert_socket(list, sk); + __unix_insert_socket(sk); spin_unlock(&unix_table_lock); }
@@ -849,6 +851,7 @@ static struct sock *unix_create1(struct net *net, struct socket *sock, int kern)
sock_init_data(sock, sk);
+ sk->sk_hash = unix_unbound_hash(sk); sk->sk_allocation = GFP_KERNEL_ACCOUNT; sk->sk_write_space = unix_write_space; sk->sk_max_ack_backlog = net->unx.sysctl_max_dgram_qlen; @@ -864,7 +867,7 @@ static struct sock *unix_create1(struct net *net, struct socket *sock, int kern) init_waitqueue_head(&u->peer_wait); init_waitqueue_func_entry(&u->peer_wake, unix_dgram_peer_wake_relay); memset(&u->scm_stat, 0, sizeof(struct scm_stat)); - unix_insert_socket(&unix_socket_table[unix_unbound_hash(sk)], sk); + unix_insert_unbound_socket(sk);
local_bh_disable(); sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); @@ -1011,6 +1014,7 @@ static int unix_autobind(struct sock *sk) struct unix_address *addr; unsigned int retries = 0; static u32 ordernum = 1; + unsigned int new_hash; int err;
err = mutex_lock_interruptible(&u->bindlock); @@ -1032,13 +1036,13 @@ static int unix_autobind(struct sock *sk) retry: addr->len = sprintf(addr->name->sun_path + 1, "%05x", ordernum) + offsetof(struct sockaddr_un, sun_path) + 1; - addr->hash = unix_abstract_hash(addr->name, addr->len, sk->sk_type);
+ new_hash = unix_abstract_hash(addr->name, addr->len, sk->sk_type); spin_lock(&unix_table_lock); ordernum = (ordernum+1)&0xFFFFF;
if (__unix_find_socket_byname(sock_net(sk), addr->name, addr->len, - addr->hash)) { + new_hash)) { spin_unlock(&unix_table_lock); /* * __unix_find_socket_byname() may take long time if many names @@ -1054,7 +1058,7 @@ static int unix_autobind(struct sock *sk) goto retry; }
- __unix_set_addr(sk, addr, addr->hash); + __unix_set_addr_hash(sk, addr, new_hash); spin_unlock(&unix_table_lock); err = 0;
@@ -1069,9 +1073,9 @@ static int unix_bind_bsd(struct sock *sk, struct sockaddr_un *sunaddr, (SOCK_INODE(sk->sk_socket)->i_mode & ~current_umask()); struct unix_sock *u = unix_sk(sk); struct unix_address *addr; + unsigned int new_hash; struct dentry *dentry; struct path parent; - unsigned int hash; int err;
unix_mkname_bsd(sunaddr, addr_len); @@ -1106,12 +1110,11 @@ static int unix_bind_bsd(struct sock *sk, struct sockaddr_un *sunaddr, if (u->addr) goto out_unlock;
- addr->hash = UNIX_HASH_SIZE; - hash = unix_bsd_hash(d_backing_inode(dentry)); + new_hash = unix_bsd_hash(d_backing_inode(dentry)); spin_lock(&unix_table_lock); u->path.mnt = mntget(parent.mnt); u->path.dentry = dget(dentry); - __unix_set_addr(sk, addr, hash); + __unix_set_addr_hash(sk, addr, new_hash); spin_unlock(&unix_table_lock); mutex_unlock(&u->bindlock); done_path_create(&parent, dentry); @@ -1135,6 +1138,7 @@ static int unix_bind_abstract(struct sock *sk, struct sockaddr_un *sunaddr, { struct unix_sock *u = unix_sk(sk); struct unix_address *addr; + unsigned int new_hash; int err;
addr = unix_create_addr(sunaddr, addr_len); @@ -1150,14 +1154,14 @@ static int unix_bind_abstract(struct sock *sk, struct sockaddr_un *sunaddr, goto out_mutex; }
- addr->hash = unix_abstract_hash(addr->name, addr->len, sk->sk_type); + new_hash = unix_abstract_hash(addr->name, addr->len, sk->sk_type); spin_lock(&unix_table_lock);
if (__unix_find_socket_byname(sock_net(sk), addr->name, addr->len, - addr->hash)) + new_hash)) goto out_spin;
- __unix_set_addr(sk, addr, addr->hash); + __unix_set_addr_hash(sk, addr, new_hash); spin_unlock(&unix_table_lock); mutex_unlock(&u->bindlock); return 0;
From: Kuniyuki Iwashima kuniyu@amazon.co.jp
mainline inclusion from mainline-v5.17-rc1 commit afd20b9290e184c203fe22f2d6b80dc7127ba724 category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I4OM1C CVE: NA
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?i...
--------------------------------
The hash table of AF_UNIX sockets is protected by the single lock. This patch replaces it with per-hash locks.
The effect is noticeable when we handle multiple sockets simultaneously. Here is a test result on an EC2 c5.24xlarge instance. It shows latency (under 10us only) in unix_insert_unbound_socket() while 64 CPUs creating 1024 sockets for each in parallel.
Without this patch:
nsec : count distribution 0 : 179 | | 500 : 3021 |********* | 1000 : 6271 |******************* | 1500 : 6318 |******************* | 2000 : 5828 |***************** | 2500 : 5124 |*************** | 3000 : 4426 |************* | 3500 : 3672 |*********** | 4000 : 3138 |********* | 4500 : 2811 |******** | 5000 : 2384 |******* | 5500 : 2023 |****** | 6000 : 1954 |***** | 6500 : 1737 |***** | 7000 : 1749 |***** | 7500 : 1520 |**** | 8000 : 1469 |**** | 8500 : 1394 |**** | 9000 : 1232 |*** | 9500 : 1138 |*** | 10000 : 994 |*** |
With this patch:
nsec : count distribution 0 : 1634 |**** | 500 : 13170 |****************************************| 1000 : 13156 |*************************************** | 1500 : 9010 |*************************** | 2000 : 6363 |******************* | 2500 : 4443 |************* | 3000 : 3240 |********* | 3500 : 2549 |******* | 4000 : 1872 |***** | 4500 : 1504 |**** | 5000 : 1247 |*** | 5500 : 1035 |*** | 6000 : 889 |** | 6500 : 744 |** | 7000 : 634 |* | 7500 : 498 |* | 8000 : 433 |* | 8500 : 355 |* | 9000 : 336 |* | 9500 : 284 | | 10000 : 243 | |
Signed-off-by: Kuniyuki Iwashima kuniyu@amazon.co.jp Signed-off-by: Jakub Kicinski kuba@kernel.org
Conflicts: net/unix/af_unix.c
Signed-off-by: Baisong Zhong zhongbaisong@huawei.com Reviewed-by: Yue Haibing yuehaibing@huawei.com Reviewed-by: Wei Yongjun weiyongjun1@huawei.com Signed-off-by: Zheng Zengkai zhengzengkai@huawei.com --- include/net/af_unix.h | 2 +- net/unix/af_unix.c | 98 ++++++++++++++++++++++++++----------------- net/unix/diag.c | 20 ++++----- 3 files changed, 71 insertions(+), 49 deletions(-)
diff --git a/include/net/af_unix.h b/include/net/af_unix.h index beb6ef56bf85..ae41d8ee970a 100644 --- a/include/net/af_unix.h +++ b/include/net/af_unix.h @@ -20,7 +20,7 @@ struct sock *unix_peer_get(struct sock *sk); #define UNIX_HASH_BITS 8
extern unsigned int unix_tot_inflight; -extern spinlock_t unix_table_lock; +extern spinlock_t unix_table_locks[2 * UNIX_HASH_SIZE]; extern struct hlist_head unix_socket_table[2 * UNIX_HASH_SIZE];
struct unix_address { diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 552f95c7d9e1..b497dee9b2a4 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -116,14 +116,14 @@
#include "scm.h"
+spinlock_t unix_table_locks[2 * UNIX_HASH_SIZE]; +EXPORT_SYMBOL_GPL(unix_table_locks); struct hlist_head unix_socket_table[2 * UNIX_HASH_SIZE]; EXPORT_SYMBOL_GPL(unix_socket_table); -DEFINE_SPINLOCK(unix_table_lock); -EXPORT_SYMBOL_GPL(unix_table_lock); static atomic_long_t unix_nr_socks;
/* SMP locking strategy: - * hash table is protected with spinlock unix_table_lock + * hash table is protected with spinlock unix_table_locks * each socket state is protected by separate spin lock. */
@@ -156,6 +156,25 @@ static unsigned int unix_abstract_hash(struct sockaddr_un *sunaddr, return hash & (UNIX_HASH_SIZE - 1); }
+static void unix_table_double_lock(unsigned int hash1, unsigned int hash2) +{ + /* hash1 and hash2 is never the same because + * one is between 0 and UNIX_HASH_SIZE - 1, and + * another is between UNIX_HASH_SIZE and UNIX_HASH_SIZE * 2. + */ + if (hash1 > hash2) + swap(hash1, hash2); + + spin_lock(&unix_table_locks[hash1]); + spin_lock_nested(&unix_table_locks[hash2], SINGLE_DEPTH_NESTING); +} + +static void unix_table_double_unlock(unsigned int hash1, unsigned int hash2) +{ + spin_unlock(&unix_table_locks[hash1]); + spin_unlock(&unix_table_locks[hash2]); +} + #ifdef CONFIG_SECURITY_NETWORK static void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb) { @@ -297,16 +316,16 @@ static void __unix_set_addr_hash(struct sock *sk, struct unix_address *addr,
static void unix_remove_socket(struct sock *sk) { - spin_lock(&unix_table_lock); + spin_lock(&unix_table_locks[sk->sk_hash]); __unix_remove_socket(sk); - spin_unlock(&unix_table_lock); + spin_unlock(&unix_table_locks[sk->sk_hash]); }
static void unix_insert_unbound_socket(struct sock *sk) { - spin_lock(&unix_table_lock); + spin_lock(&unix_table_locks[sk->sk_hash]); __unix_insert_socket(sk); - spin_unlock(&unix_table_lock); + spin_unlock(&unix_table_locks[sk->sk_hash]); }
static struct sock *__unix_find_socket_byname(struct net *net, @@ -334,11 +353,11 @@ static inline struct sock *unix_find_socket_byname(struct net *net, { struct sock *s;
- spin_lock(&unix_table_lock); + spin_lock(&unix_table_locks[hash]); s = __unix_find_socket_byname(net, sunname, len, hash); if (s) sock_hold(s); - spin_unlock(&unix_table_lock); + spin_unlock(&unix_table_locks[hash]); return s; }
@@ -347,19 +366,18 @@ static struct sock *unix_find_socket_byinode(struct inode *i) unsigned int hash = unix_bsd_hash(i); struct sock *s;
- spin_lock(&unix_table_lock); + spin_lock(&unix_table_locks[hash]); sk_for_each(s, &unix_socket_table[hash]) { struct dentry *dentry = unix_sk(s)->path.dentry;
if (dentry && d_backing_inode(dentry) == i) { sock_hold(s); - goto found; + spin_unlock(&unix_table_locks[hash]); + return s; } } - s = NULL; -found: - spin_unlock(&unix_table_lock); - return s; + spin_unlock(&unix_table_locks[hash]); + return NULL; }
/* Support code for asymmetrically connected dgram sockets @@ -1010,11 +1028,11 @@ static struct sock *unix_find_other(struct net *net,
static int unix_autobind(struct sock *sk) { + unsigned int new_hash, old_hash = sk->sk_hash; struct unix_sock *u = unix_sk(sk); struct unix_address *addr; unsigned int retries = 0; static u32 ordernum = 1; - unsigned int new_hash; int err;
err = mutex_lock_interruptible(&u->bindlock); @@ -1038,12 +1056,13 @@ static int unix_autobind(struct sock *sk) offsetof(struct sockaddr_un, sun_path) + 1;
new_hash = unix_abstract_hash(addr->name, addr->len, sk->sk_type); - spin_lock(&unix_table_lock); + unix_table_double_lock(old_hash, new_hash); ordernum = (ordernum+1)&0xFFFFF;
if (__unix_find_socket_byname(sock_net(sk), addr->name, addr->len, new_hash)) { - spin_unlock(&unix_table_lock); + unix_table_double_unlock(old_hash, new_hash); + /* * __unix_find_socket_byname() may take long time if many names * are already in use. @@ -1059,7 +1078,7 @@ static int unix_autobind(struct sock *sk) }
__unix_set_addr_hash(sk, addr, new_hash); - spin_unlock(&unix_table_lock); + unix_table_double_unlock(old_hash, new_hash); err = 0;
out: mutex_unlock(&u->bindlock); @@ -1071,9 +1090,9 @@ static int unix_bind_bsd(struct sock *sk, struct sockaddr_un *sunaddr, { umode_t mode = S_IFSOCK | (SOCK_INODE(sk->sk_socket)->i_mode & ~current_umask()); + unsigned int new_hash, old_hash = sk->sk_hash; struct unix_sock *u = unix_sk(sk); struct unix_address *addr; - unsigned int new_hash; struct dentry *dentry; struct path parent; int err; @@ -1111,11 +1130,11 @@ static int unix_bind_bsd(struct sock *sk, struct sockaddr_un *sunaddr, goto out_unlock;
new_hash = unix_bsd_hash(d_backing_inode(dentry)); - spin_lock(&unix_table_lock); + unix_table_double_lock(old_hash, new_hash); u->path.mnt = mntget(parent.mnt); u->path.dentry = dget(dentry); __unix_set_addr_hash(sk, addr, new_hash); - spin_unlock(&unix_table_lock); + unix_table_double_unlock(old_hash, new_hash); mutex_unlock(&u->bindlock); done_path_create(&parent, dentry); return 0; @@ -1136,9 +1155,9 @@ static int unix_bind_bsd(struct sock *sk, struct sockaddr_un *sunaddr, static int unix_bind_abstract(struct sock *sk, struct sockaddr_un *sunaddr, int addr_len) { + unsigned int new_hash, old_hash = sk->sk_hash; struct unix_sock *u = unix_sk(sk); struct unix_address *addr; - unsigned int new_hash; int err;
addr = unix_create_addr(sunaddr, addr_len); @@ -1155,19 +1174,19 @@ static int unix_bind_abstract(struct sock *sk, struct sockaddr_un *sunaddr, }
new_hash = unix_abstract_hash(addr->name, addr->len, sk->sk_type); - spin_lock(&unix_table_lock); + unix_table_double_lock(old_hash, new_hash);
if (__unix_find_socket_byname(sock_net(sk), addr->name, addr->len, new_hash)) goto out_spin;
__unix_set_addr_hash(sk, addr, new_hash); - spin_unlock(&unix_table_lock); + unix_table_double_unlock(old_hash, new_hash); mutex_unlock(&u->bindlock); return 0;
out_spin: - spin_unlock(&unix_table_lock); + unix_table_double_unlock(old_hash, new_hash); err = -EADDRINUSE; out_mutex: mutex_unlock(&u->bindlock); @@ -1469,9 +1488,9 @@ static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr, * * The contents of *(otheru->addr) and otheru->path * are seen fully set up here, since we have found - * otheru in hash under unix_table_lock. Insertion + * otheru in hash under unix_table_locks. Insertion * into the hash chain we'd found it in had been done - * in an earlier critical area protected by unix_table_lock, + * in an earlier critical area protected by unix_table_locks, * the same one where we'd set *(otheru->addr) contents, * as well as otheru->path and otheru->addr itself. * @@ -2900,7 +2919,7 @@ static __poll_t unix_dgram_poll(struct file *file, struct socket *sock, #define BUCKET_SPACE (BITS_PER_LONG - (UNIX_HASH_BITS + 1) - 1)
#define get_bucket(x) ((x) >> BUCKET_SPACE) -#define get_offset(x) ((x) & ((1L << BUCKET_SPACE) - 1)) +#define get_offset(x) ((x) & ((1UL << BUCKET_SPACE) - 1)) #define set_bucket_offset(b, o) ((b) << BUCKET_SPACE | (o))
static struct sock *unix_from_bucket(struct seq_file *seq, loff_t *pos) @@ -2924,7 +2943,7 @@ static struct sock *unix_next_socket(struct seq_file *seq, struct sock *sk, loff_t *pos) { - unsigned long bucket; + unsigned long bucket = get_bucket(*pos);
while (sk > (struct sock *)SEQ_START_TOKEN) { sk = sk_next(sk); @@ -2935,12 +2954,13 @@ static struct sock *unix_next_socket(struct seq_file *seq, }
do { + spin_lock(&unix_table_locks[bucket]); sk = unix_from_bucket(seq, pos); if (sk) return sk;
next_bucket: - bucket = get_bucket(*pos) + 1; + spin_unlock(&unix_table_locks[bucket++]); *pos = set_bucket_offset(bucket, 1); } while (bucket < ARRAY_SIZE(unix_socket_table));
@@ -2948,10 +2968,7 @@ static struct sock *unix_next_socket(struct seq_file *seq, }
static void *unix_seq_start(struct seq_file *seq, loff_t *pos) - __acquires(unix_table_lock) { - spin_lock(&unix_table_lock); - if (!*pos) return SEQ_START_TOKEN;
@@ -2968,9 +2985,11 @@ static void *unix_seq_next(struct seq_file *seq, void *v, loff_t *pos) }
static void unix_seq_stop(struct seq_file *seq, void *v) - __releases(unix_table_lock) { - spin_unlock(&unix_table_lock); + struct sock *sk = v; + + if (sk) + spin_unlock(&unix_table_locks[sk->sk_hash]); }
static int unix_seq_show(struct seq_file *seq, void *v) @@ -2995,7 +3014,7 @@ static int unix_seq_show(struct seq_file *seq, void *v) (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTING : SS_DISCONNECTING), sock_i_ino(s));
- if (u->addr) { // under unix_table_lock here + if (u->addr) { // under unix_table_locks here int i, len; seq_putc(seq, ' ');
@@ -3067,10 +3086,13 @@ static struct pernet_operations unix_net_ops = {
static int __init af_unix_init(void) { - int rc = -1; + int i, rc = -1;
BUILD_BUG_ON(sizeof(struct unix_skb_parms) > sizeof_field(struct sk_buff, cb));
+ for (i = 0; i < 2 * UNIX_HASH_SIZE; i++) + spin_lock_init(&unix_table_locks[i]); + rc = proto_register(&unix_proto, 1); if (rc != 0) { pr_crit("%s: Cannot create unix_sock SLAB cache!\n", __func__); diff --git a/net/unix/diag.c b/net/unix/diag.c index dd77e81b41a0..c522ab94d0c8 100644 --- a/net/unix/diag.c +++ b/net/unix/diag.c @@ -13,7 +13,7 @@
static int sk_diag_dump_name(struct sock *sk, struct sk_buff *nlskb) { - /* might or might not have unix_table_lock */ + /* might or might not have unix_table_locks */ struct unix_address *addr = smp_load_acquire(&unix_sk(sk)->addr);
if (!addr) @@ -204,13 +204,13 @@ static int unix_diag_dump(struct sk_buff *skb, struct netlink_callback *cb) s_slot = cb->args[0]; num = s_num = cb->args[1];
- spin_lock(&unix_table_lock); for (slot = s_slot; slot < ARRAY_SIZE(unix_socket_table); s_num = 0, slot++) { struct sock *sk;
num = 0; + spin_lock(&unix_table_locks[slot]); sk_for_each(sk, &unix_socket_table[slot]) { if (!net_eq(sock_net(sk), net)) continue; @@ -221,14 +221,16 @@ static int unix_diag_dump(struct sk_buff *skb, struct netlink_callback *cb) if (sk_diag_dump(sk, skb, req, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, - NLM_F_MULTI) < 0) + NLM_F_MULTI) < 0) { + spin_unlock(&unix_table_locks[slot]); goto done; + } next: num++; } + spin_unlock(&unix_table_locks[slot]); } done: - spin_unlock(&unix_table_lock); cb->args[0] = slot; cb->args[1] = num;
@@ -237,21 +239,19 @@ static int unix_diag_dump(struct sk_buff *skb, struct netlink_callback *cb)
static struct sock *unix_lookup_by_ino(unsigned int ino) { - int i; struct sock *sk; + int i;
- spin_lock(&unix_table_lock); for (i = 0; i < ARRAY_SIZE(unix_socket_table); i++) { + spin_lock(&unix_table_locks[i]); sk_for_each(sk, &unix_socket_table[i]) if (ino == sock_i_ino(sk)) { sock_hold(sk); - spin_unlock(&unix_table_lock); - + spin_unlock(&unix_table_locks[i]); return sk; } + spin_unlock(&unix_table_locks[i]); } - - spin_unlock(&unix_table_lock); return NULL; }
From: Kuniyuki Iwashima kuniyu@amazon.co.jp
mainline inclusion from mainline-v5.17-rc1 commit 9acbc584c3a4e9706703039708ec947ffc152c66 category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I4OM1C CVE: NA
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?i...
--------------------------------
When we bind an AF_UNIX socket without a name specified, the kernel selects an available one from 0x00000 to 0xFFFFF. unix_autobind() starts searching from a number in the 'static' variable and increments it after acquiring two locks.
If multiple processes try autobind, they obtain the same lock and check if a socket in the hash list has the same name. If not, one process uses it, and all except one end up retrying the _next_ number (actually not, it may be incremented by the other processes). The more we autobind sockets in parallel, the longer the latency gets. We can avoid such a race by searching for a name from a random number.
These show latency in unix_autobind() while 64 CPUs are simultaneously autobind-ing 1024 sockets for each.
Without this patch:
usec : count distribution 0 : 1176 |*** | 2 : 3655 |*********** | 4 : 4094 |************* | 6 : 3831 |************ | 8 : 3829 |************ | 10 : 3844 |************ | 12 : 3638 |*********** | 14 : 2992 |********* | 16 : 2485 |******* | 18 : 2230 |******* | 20 : 2095 |****** | 22 : 1853 |***** | 24 : 1827 |***** | 26 : 1677 |***** | 28 : 1473 |**** | 30 : 1573 |***** | 32 : 1417 |**** | 34 : 1385 |**** | 36 : 1345 |**** | 38 : 1344 |**** | 40 : 1200 |*** |
With this patch:
usec : count distribution 0 : 1855 |****** | 2 : 6464 |********************* | 4 : 9936 |******************************** | 6 : 12107 |****************************************| 8 : 10441 |********************************** | 10 : 7264 |*********************** | 12 : 4254 |************** | 14 : 2538 |******** | 16 : 1596 |***** | 18 : 1088 |*** | 20 : 800 |** | 22 : 670 |** | 24 : 601 |* | 26 : 562 |* | 28 : 525 |* | 30 : 446 |* | 32 : 378 |* | 34 : 337 |* | 36 : 317 |* | 38 : 314 |* | 40 : 298 | |
Signed-off-by: Kuniyuki Iwashima kuniyu@amazon.co.jp Signed-off-by: Jakub Kicinski kuba@kernel.org Signed-off-by: Baisong Zhong zhongbaisong@huawei.com Reviewed-by: Yue Haibing yuehaibing@huawei.com Reviewed-by: Wei Yongjun weiyongjun1@huawei.com Signed-off-by: Zheng Zengkai zhengzengkai@huawei.com --- net/unix/af_unix.c | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-)
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index b497dee9b2a4..de3a1ffac26d 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -1031,8 +1031,7 @@ static int unix_autobind(struct sock *sk) unsigned int new_hash, old_hash = sk->sk_hash; struct unix_sock *u = unix_sk(sk); struct unix_address *addr; - unsigned int retries = 0; - static u32 ordernum = 1; + u32 lastnum, ordernum; int err;
err = mutex_lock_interruptible(&u->bindlock); @@ -1048,32 +1047,35 @@ static int unix_autobind(struct sock *sk) if (!addr) goto out;
+ addr->len = offsetof(struct sockaddr_un, sun_path) + 6; addr->name->sun_family = AF_UNIX; refcount_set(&addr->refcnt, 1);
+ ordernum = prandom_u32(); + lastnum = ordernum & 0xFFFFF; retry: - addr->len = sprintf(addr->name->sun_path + 1, "%05x", ordernum) + - offsetof(struct sockaddr_un, sun_path) + 1; + ordernum = (ordernum + 1) & 0xFFFFF; + sprintf(addr->name->sun_path + 1, "%05x", ordernum);
new_hash = unix_abstract_hash(addr->name, addr->len, sk->sk_type); unix_table_double_lock(old_hash, new_hash); - ordernum = (ordernum+1)&0xFFFFF;
if (__unix_find_socket_byname(sock_net(sk), addr->name, addr->len, new_hash)) { unix_table_double_unlock(old_hash, new_hash);
- /* - * __unix_find_socket_byname() may take long time if many names + /* __unix_find_socket_byname() may take long time if many names * are already in use. */ cond_resched(); - /* Give up if all names seems to be in use. */ - if (retries++ == 0xFFFFF) { + + if (ordernum == lastnum) { + /* Give up if all names seems to be in use. */ err = -ENOSPC; - kfree(addr); + unix_release_addr(addr); goto out; } + goto retry; }