From: Steve French stfrench@microsoft.com
stable inclusion from linux-4.19.209 commit 43d2e0fbc67f8bcfb069130f4028a04887ae76b6
--------------------------------
commit 9ed38fd4a15417cac83967360cf20b853bfab9b6 upstream.
Although very unlikely that the tlink pointer would be null in this case, get_next_mid function can in theory return null (but not an error) so need to check for null (not for IS_ERR, which can not be returned here).
Address warning:
fs/smbfs_client/connect.c:2392 cifs_match_super() warn: 'tlink' isn't an ERR_PTR
Pointed out by Dan Carpenter via smatch code analysis tool
CC: stable@vger.kernel.org Reported-by: Dan Carpenter dan.carpenter@oracle.com Acked-by: Ronnie Sahlberg lsahlber@redhat.com Signed-off-by: Steve French stfrench@microsoft.com Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Acked-by: Jason Yan yanaijie@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- fs/cifs/connect.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-)
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 907be252c5d47..36104dd8eb4dd 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -3373,9 +3373,10 @@ cifs_match_super(struct super_block *sb, void *data) spin_lock(&cifs_tcp_ses_lock); cifs_sb = CIFS_SB(sb); tlink = cifs_get_tlink(cifs_sb_master_tlink(cifs_sb)); - if (IS_ERR(tlink)) { + if (tlink == NULL) { + /* can not match superblock if tlink were ever null */ spin_unlock(&cifs_tcp_ses_lock); - return rc; + return 0; } tcon = tlink_tcon(tlink); ses = tcon->ses;
From: Pali Rohár pali@kernel.org
stable inclusion from linux-4.19.209 commit 4a319dd66f497bb5e8441950af48c845c0670849
--------------------------------
commit 74e1eb3b4a1ef2e564b4bdeb6e92afe844e900de upstream.
Driver's tx_empty callback should signal when the transmit shift register is empty. So when the last character has been sent.
STAT_TX_FIFO_EMP bit signals only that HW transmit FIFO is empty, which happens when the last byte is loaded into transmit shift register.
STAT_TX_EMP bit signals when the both HW transmit FIFO and transmit shift register are empty.
So replace STAT_TX_FIFO_EMP check by STAT_TX_EMP in mvebu_uart_tx_empty() callback function.
Fixes: 30530791a7a0 ("serial: mvebu-uart: initial support for Armada-3700 serial port") Cc: stable stable@vger.kernel.org Signed-off-by: Pali Rohár pali@kernel.org Link: https://lore.kernel.org/r/20210911132017.25505-1-pali@kernel.org Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Acked-by: Jason Yan yanaijie@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- drivers/tty/serial/mvebu-uart.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/tty/serial/mvebu-uart.c b/drivers/tty/serial/mvebu-uart.c index 170e446a2f625..494e476ac8db9 100644 --- a/drivers/tty/serial/mvebu-uart.c +++ b/drivers/tty/serial/mvebu-uart.c @@ -162,7 +162,7 @@ static unsigned int mvebu_uart_tx_empty(struct uart_port *port) st = readl(port->membase + UART_STAT); spin_unlock_irqrestore(&port->lock, flags);
- return (st & STAT_TX_FIFO_EMP) ? TIOCSER_TEMT : 0; + return (st & STAT_TX_EMP) ? TIOCSER_TEMT : 0; }
static unsigned int mvebu_uart_get_mctrl(struct uart_port *port)
From: Baokun Li libaokun1@huawei.com
stable inclusion from linux-4.19.209 commit 0e82e793caa01b744641d0f9f23b7e1cac12a056
--------------------------------
[ Upstream commit 4e28550829258f7dab97383acaa477bd724c0ff4 ]
ISCSI_NET_PARAM_IFACE_ENABLE belongs to enum iscsi_net_param instead of iscsi_iface_param so move it to ISCSI_NET_PARAM. Otherwise, when we call into the driver, we might not match and return that we don't want attr visible in sysfs. Found in code review.
Link: https://lore.kernel.org/r/20210901085336.2264295-1-libaokun1@huawei.com Fixes: e746f3451ec7 ("scsi: iscsi: Fix iface sysfs attr detection") Reviewed-by: Lee Duncan lduncan@suse.com Signed-off-by: Baokun Li libaokun1@huawei.com Signed-off-by: Martin K. Petersen martin.petersen@oracle.com Signed-off-by: Sasha Levin sashal@kernel.org Acked-by: Jason Yan yanaijie@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- drivers/scsi/scsi_transport_iscsi.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/drivers/scsi/scsi_transport_iscsi.c b/drivers/scsi/scsi_transport_iscsi.c index d3632547799c9..3d489193c8907 100644 --- a/drivers/scsi/scsi_transport_iscsi.c +++ b/drivers/scsi/scsi_transport_iscsi.c @@ -429,9 +429,7 @@ static umode_t iscsi_iface_attr_is_visible(struct kobject *kobj, struct iscsi_transport *t = iface->transport; int param = -1;
- if (attr == &dev_attr_iface_enabled.attr) - param = ISCSI_NET_PARAM_IFACE_ENABLE; - else if (attr == &dev_attr_iface_def_taskmgmt_tmo.attr) + if (attr == &dev_attr_iface_def_taskmgmt_tmo.attr) param = ISCSI_IFACE_PARAM_DEF_TASKMGMT_TMO; else if (attr == &dev_attr_iface_header_digest.attr) param = ISCSI_IFACE_PARAM_HDRDGST_EN; @@ -471,7 +469,9 @@ static umode_t iscsi_iface_attr_is_visible(struct kobject *kobj, if (param != -1) return t->attr_is_visible(ISCSI_IFACE_PARAM, param);
- if (attr == &dev_attr_iface_vlan_id.attr) + if (attr == &dev_attr_iface_enabled.attr) + param = ISCSI_NET_PARAM_IFACE_ENABLE; + else if (attr == &dev_attr_iface_vlan_id.attr) param = ISCSI_NET_PARAM_VLAN_ID; else if (attr == &dev_attr_iface_vlan_priority.attr) param = ISCSI_NET_PARAM_VLAN_PRIORITY;
From: Kaige Fu kaige.fu@linux.alibaba.com
stable inclusion from linux-4.19.209 commit 5701e8bff314c155e7afdc467b1e0389d86853d0
--------------------------------
[ Upstream commit 280bef512933b2dda01d681d8cbe499b98fc5bdd ]
In its_vpe_irq_domain_alloc, when its_vpe_init() returns an error, there is an off-by-one in the number of VPEs to be freed.
Fix it by simply passing the number of VPEs allocated, which is the index of the loop iterating over the VPEs.
Fixes: 7d75bbb4bc1a ("irqchip/gic-v3-its: Add VPE irq domain allocation/teardown") Signed-off-by: Kaige Fu kaige.fu@linux.alibaba.com [maz: fixed commit message] Signed-off-by: Marc Zyngier maz@kernel.org Link: https://lore.kernel.org/r/d9e36dee512e63670287ed9eff884a5d8d6d27f2.163167231... Signed-off-by: Sasha Levin sashal@kernel.org Acked-by: Jason Yan yanaijie@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- drivers/irqchip/irq-gic-v3-its.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c index 79648a2139412..b191dc7af7e6e 100644 --- a/drivers/irqchip/irq-gic-v3-its.c +++ b/drivers/irqchip/irq-gic-v3-its.c @@ -3388,7 +3388,7 @@ static int its_vpe_irq_domain_alloc(struct irq_domain *domain, unsigned int virq
if (err) { if (i > 0) - its_vpe_irq_domain_free(domain, virq, i - 1); + its_vpe_irq_domain_free(domain, virq, i);
its_lpi_free(bitmap, base, nr_ids); its_free_prop_table(vprop_page);
From: Christoph Hellwig hch@lst.de
stable inclusion from linux-4.19.209 commit 906b4dcc180808f0ba74ec1e07a5cf708463bc48
--------------------------------
[ Upstream commit 7df835a32a8bedf7ce88efcfa7c9b245b52ff139 ]
Commit b0140891a8cea3 ("md: Fix race when creating a new md device.") not only moved assigning mddev->gendisk before calling add_disk, which fixes the races described in the commit log, but also added a mddev->open_mutex critical section over add_disk and creation of the md kobj. Adding a kobject after add_disk is racy vs deleting the gendisk right after adding it, but md already prevents against that by holding a mddev->active reference.
On the other hand taking this lock added a lock order reversal with what is not disk->open_mutex (used to be bdev->bd_mutex when the commit was added) for partition devices, which need that lock for the internal open for the partition scan, and a recent commit also takes it for non-partitioned devices, leading to further lockdep splatter.
Fixes: b0140891a8ce ("md: Fix race when creating a new md device.") Fixes: d62633873590 ("block: support delayed holder registration") Reported-by: syzbot+fadc0aaf497e6a493b9f@syzkaller.appspotmail.com Signed-off-by: Christoph Hellwig hch@lst.de Tested-by: syzbot+fadc0aaf497e6a493b9f@syzkaller.appspotmail.com Reviewed-by: NeilBrown neilb@suse.de Signed-off-by: Song Liu songliubraving@fb.com Signed-off-by: Sasha Levin sashal@kernel.org Acked-by: Jason Yan yanaijie@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- drivers/md/md.c | 5 ----- 1 file changed, 5 deletions(-)
diff --git a/drivers/md/md.c b/drivers/md/md.c index c8f26788c3da5..8a2656cf7127d 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -5467,10 +5467,6 @@ static int md_alloc(dev_t dev, char *name) */ disk->flags |= GENHD_FL_EXT_DEVT; mddev->gendisk = disk; - /* As soon as we call add_disk(), another thread could get - * through to md_open, so make sure it doesn't get too far - */ - mutex_lock(&mddev->open_mutex); add_disk(disk);
error = kobject_add(&mddev->kobj, &disk_to_dev(disk)->kobj, "%s", "md"); @@ -5485,7 +5481,6 @@ static int md_alloc(dev_t dev, char *name) if (mddev->kobj.sd && sysfs_create_group(&mddev->kobj, &md_bitmap_group)) pr_debug("pointless warning\n"); - mutex_unlock(&mddev->open_mutex); abort: mutex_unlock(&disks_mutex); if (!error && mddev->kobj.sd) {
From: Dan Li ashimida@linux.alibaba.com
stable inclusion from linux-4.19.209 commit 93cb263d81bc9b67abd2e250a3caa21fb983a5e1
--------------------------------
[ Upstream commit 9fcb2e93f41c07a400885325e7dbdfceba6efaec ]
__stack_chk_guard is setup once while init stage and never changed after that.
Although the modification of this variable at runtime will usually cause the kernel to crash (so does the attacker), it should be marked as __ro_after_init, and it should not affect performance if it is placed in the ro_after_init section.
Signed-off-by: Dan Li ashimida@linux.alibaba.com Acked-by: Mark Rutland mark.rutland@arm.com Link: https://lore.kernel.org/r/1631612642-102881-1-git-send-email-ashimida@linux.... Signed-off-by: Catalin Marinas catalin.marinas@arm.com Signed-off-by: Sasha Levin sashal@kernel.org Acked-by: Jason Yan yanaijie@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- arch/arm64/kernel/process.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c index 406744566cbed..e5be78915632c 100644 --- a/arch/arm64/kernel/process.c +++ b/arch/arm64/kernel/process.c @@ -62,7 +62,7 @@
#ifdef CONFIG_STACKPROTECTOR #include <linux/stackprotector.h> -unsigned long __stack_chk_guard __read_mostly; +unsigned long __stack_chk_guard __ro_after_init; EXPORT_SYMBOL(__stack_chk_guard); #endif
From: Eric Dumazet edumazet@google.com
stable inclusion from linux-4.19.209 commit eb6eeb056c992d20a4d3b4fb7dd05471ba32fea3
--------------------------------
commit 9efdda4e3abed13f0903b7b6e4d4c2102019440a upstream.
When a qdisc setup including pacing FQ is dismantled and recreated, some TCP packets are sent earlier than instructed by TCP stack.
TCP can be fooled when ACK comes back, because the following operation can return a negative value.
tcp_time_stamp(tp) - tp->rx_opt.rcv_tsecr;
Some paths in TCP stack were not dealing properly with this, this patch addresses four of them.
Fixes: ab408b6dc744 ("tcp: switch tcp and sch_fq to new earliest departure time model") Signed-off-by: Eric Dumazet edumazet@google.com Signed-off-by: David S. Miller davem@davemloft.net Signed-off-by: Qiumiao Zhang zhangqiumiao1@huawei.com Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Conflicts: net/ipv4/tcp_timer.c [yyl: keep implention of tcp_clamp_rto_to_user_timeout() and retransmits_timed_out() as mainline] Reviewed-by: Yue Haibing yuehaibing@huawei.com Acked-by: Jason Yan yanaijie@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- net/ipv4/tcp_input.c | 16 ++++++++++------ net/ipv4/tcp_timer.c | 10 ++++++---- 2 files changed, 16 insertions(+), 10 deletions(-)
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index ac6c1c41fbffa..36415c66cd9a9 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -581,10 +581,12 @@ static inline void tcp_rcv_rtt_measure_ts(struct sock *sk, u32 delta = tcp_time_stamp(tp) - tp->rx_opt.rcv_tsecr; u32 delta_us;
- if (!delta) - delta = 1; - delta_us = delta * (USEC_PER_SEC / TCP_TS_HZ); - tcp_rcv_rtt_update(tp, delta_us, 0); + if (likely(delta < INT_MAX / (USEC_PER_SEC / TCP_TS_HZ))) { + if (!delta) + delta = 1; + delta_us = delta * (USEC_PER_SEC / TCP_TS_HZ); + tcp_rcv_rtt_update(tp, delta_us, 0); + } } }
@@ -2934,9 +2936,11 @@ static bool tcp_ack_update_rtt(struct sock *sk, const int flag, if (seq_rtt_us < 0 && tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr && flag & FLAG_ACKED) { u32 delta = tcp_time_stamp(tp) - tp->rx_opt.rcv_tsecr; - u32 delta_us = delta * (USEC_PER_SEC / TCP_TS_HZ);
- seq_rtt_us = ca_rtt_us = delta_us; + if (likely(delta < INT_MAX / (USEC_PER_SEC / TCP_TS_HZ))) { + seq_rtt_us = delta * (USEC_PER_SEC / TCP_TS_HZ); + ca_rtt_us = seq_rtt_us; + } } rs->rtt_us = ca_rtt_us; /* RTT of last (S)ACKed packet (or -1) */ if (seq_rtt_us < 0) diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index f4e2d01c058d4..d071ed6b8b9a0 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -26,15 +26,17 @@ static u32 tcp_clamp_rto_to_user_timeout(const struct sock *sk) { struct inet_connection_sock *icsk = inet_csk(sk); u32 elapsed, start_ts; + s32 remaining;
start_ts = tcp_sk(sk)->retrans_stamp; if (!icsk->icsk_user_timeout) return icsk->icsk_rto; elapsed = tcp_time_stamp(tcp_sk(sk)) - start_ts; - if (elapsed >= icsk->icsk_user_timeout) + remaining = icsk->icsk_user_timeout - elapsed; + if (remaining <= 0) return 1; /* user timeout has passed; fire ASAP */ - else - return min_t(u32, icsk->icsk_rto, msecs_to_jiffies(icsk->icsk_user_timeout - elapsed)); + + return min_t(u32, icsk->icsk_rto, msecs_to_jiffies(remaining)); }
/** @@ -203,7 +205,7 @@ static bool retransmits_timed_out(struct sock *sk, timeout = tcp_model_timeout(sk, boundary, rto_base); }
- return (tcp_time_stamp(tcp_sk(sk)) - start_ts) >= timeout; + return (s32)(tcp_time_stamp(tcp_sk(sk)) - start_ts - timeout) >= 0; }
/* A write timeout has occurred. Process the after effects. */
From: Igor Matheus Andrade Torrente igormtorrente@gmail.com
stable inclusion from linux-4.19.209 commit 8a6a240f52e14356386030d8958ae8b1761d2325
--------------------------------
[ Upstream commit 3b0c406124719b625b1aba431659f5cdc24a982c ]
This issue happens when a userspace program does an ioctl FBIOPUT_VSCREENINFO passing the fb_var_screeninfo struct containing only the fields xres, yres, and bits_per_pixel with values.
If this struct is the same as the previous ioctl, the vc_resize() detects it and doesn't call the resize_screen(), leaving the fb_var_screeninfo incomplete. And this leads to the updatescrollmode() calculates a wrong value to fbcon_display->vrows, which makes the real_y() return a wrong value of y, and that value, eventually, causes the imageblit to access an out-of-bound address value.
To solve this issue I made the resize_screen() be called even if the screen does not need any resizing, so it will "fix and fill" the fb_var_screeninfo independently.
Cc: stable stable@vger.kernel.org # after 5.15-rc2 is out, give it time to bake Reported-and-tested-by: syzbot+858dc7a2f7ef07c2c219@syzkaller.appspotmail.com Signed-off-by: Igor Matheus Andrade Torrente igormtorrente@gmail.com Link: https://lore.kernel.org/r/20210628134509.15895-1-igormtorrente@gmail.com Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Sasha Levin sashal@kernel.org Acked-by: Jason Yan yanaijie@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- drivers/tty/vt/vt.c | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-)
diff --git a/drivers/tty/vt/vt.c b/drivers/tty/vt/vt.c index b2b5f19fb2fb9..72e3989dffa60 100644 --- a/drivers/tty/vt/vt.c +++ b/drivers/tty/vt/vt.c @@ -1218,8 +1218,25 @@ static int vc_do_resize(struct tty_struct *tty, struct vc_data *vc, new_row_size = new_cols << 1; new_screen_size = new_row_size * new_rows;
- if (new_cols == vc->vc_cols && new_rows == vc->vc_rows) - return 0; + if (new_cols == vc->vc_cols && new_rows == vc->vc_rows) { + /* + * This function is being called here to cover the case + * where the userspace calls the FBIOPUT_VSCREENINFO twice, + * passing the same fb_var_screeninfo containing the fields + * yres/xres equal to a number non-multiple of vc_font.height + * and yres_virtual/xres_virtual equal to number lesser than the + * vc_font.height and yres/xres. + * In the second call, the struct fb_var_screeninfo isn't + * being modified by the underlying driver because of the + * if above, and this causes the fbcon_display->vrows to become + * negative and it eventually leads to out-of-bound + * access by the imageblit function. + * To give the correct values to the struct and to not have + * to deal with possible errors from the code below, we call + * the resize_screen here as well. + */ + return resize_screen(vc, new_cols, new_rows, user); + }
if (new_screen_size > KMALLOC_MAX_SIZE || !new_screen_size) return -EINVAL;
From: Kevin Hao haokexin@gmail.com
stable inclusion from linux-4.19.209 commit 30d57cf2c4116ca6d34ecd1cac94ad84f8bc446c
--------------------------------
[ Upstream commit e5c6b312ce3cc97e90ea159446e6bfa06645364d ]
The struct sugov_tunables is protected by the kobject, so we can't free it directly. Otherwise we would get a call trace like this: ODEBUG: free active (active state 0) object type: timer_list hint: delayed_work_timer_fn+0x0/0x30 WARNING: CPU: 3 PID: 720 at lib/debugobjects.c:505 debug_print_object+0xb8/0x100 Modules linked in: CPU: 3 PID: 720 Comm: a.sh Tainted: G W 5.14.0-rc1-next-20210715-yocto-standard+ #507 Hardware name: Marvell OcteonTX CN96XX board (DT) pstate: 40400009 (nZcv daif +PAN -UAO -TCO BTYPE=--) pc : debug_print_object+0xb8/0x100 lr : debug_print_object+0xb8/0x100 sp : ffff80001ecaf910 x29: ffff80001ecaf910 x28: ffff00011b10b8d0 x27: ffff800011043d80 x26: ffff00011a8f0000 x25: ffff800013cb3ff0 x24: 0000000000000000 x23: ffff80001142aa68 x22: ffff800011043d80 x21: ffff00010de46f20 x20: ffff800013c0c520 x19: ffff800011d8f5b0 x18: 0000000000000010 x17: 6e6968207473696c x16: 5f72656d6974203a x15: 6570797420746365 x14: 6a626f2029302065 x13: 303378302f307830 x12: 2b6e665f72656d69 x11: ffff8000124b1560 x10: ffff800012331520 x9 : ffff8000100ca6b0 x8 : 000000000017ffe8 x7 : c0000000fffeffff x6 : 0000000000000001 x5 : ffff800011d8c000 x4 : ffff800011d8c740 x3 : 0000000000000000 x2 : ffff0001108301c0 x1 : ab3c90eedf9c0f00 x0 : 0000000000000000 Call trace: debug_print_object+0xb8/0x100 __debug_check_no_obj_freed+0x1c0/0x230 debug_check_no_obj_freed+0x20/0x88 slab_free_freelist_hook+0x154/0x1c8 kfree+0x114/0x5d0 sugov_exit+0xbc/0xc0 cpufreq_exit_governor+0x44/0x90 cpufreq_set_policy+0x268/0x4a8 store_scaling_governor+0xe0/0x128 store+0xc0/0xf0 sysfs_kf_write+0x54/0x80 kernfs_fop_write_iter+0x128/0x1c0 new_sync_write+0xf0/0x190 vfs_write+0x2d4/0x478 ksys_write+0x74/0x100 __arm64_sys_write+0x24/0x30 invoke_syscall.constprop.0+0x54/0xe0 do_el0_svc+0x64/0x158 el0_svc+0x2c/0xb0 el0t_64_sync_handler+0xb0/0xb8 el0t_64_sync+0x198/0x19c irq event stamp: 5518 hardirqs last enabled at (5517): [<ffff8000100cbd7c>] console_unlock+0x554/0x6c8 hardirqs last disabled at (5518): [<ffff800010fc0638>] el1_dbg+0x28/0xa0 softirqs last enabled at (5504): [<ffff8000100106e0>] __do_softirq+0x4d0/0x6c0 softirqs last disabled at (5483): [<ffff800010049548>] irq_exit+0x1b0/0x1b8
So split the original sugov_tunables_free() into two functions, sugov_clear_global_tunables() is just used to clear the global_tunables and the new sugov_tunables_free() is used as kobj_type::release to release the sugov_tunables safely.
Fixes: 9bdcb44e391d ("cpufreq: schedutil: New governor based on scheduler utilization data") Cc: 4.7+ stable@vger.kernel.org # 4.7+ Signed-off-by: Kevin Hao haokexin@gmail.com Acked-by: Viresh Kumar viresh.kumar@linaro.org Signed-off-by: Rafael J. Wysocki rafael.j.wysocki@intel.com Signed-off-by: Sasha Levin sashal@kernel.org Acked-by: Jason Yan yanaijie@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- kernel/sched/cpufreq_schedutil.c | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-)
diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c index 1b7ec822dc757..60f0e0e048f0e 100644 --- a/kernel/sched/cpufreq_schedutil.c +++ b/kernel/sched/cpufreq_schedutil.c @@ -591,9 +591,17 @@ static struct attribute *sugov_attributes[] = { NULL };
+static void sugov_tunables_free(struct kobject *kobj) +{ + struct gov_attr_set *attr_set = container_of(kobj, struct gov_attr_set, kobj); + + kfree(to_sugov_tunables(attr_set)); +} + static struct kobj_type sugov_tunables_ktype = { .default_attrs = sugov_attributes, .sysfs_ops = &governor_sysfs_ops, + .release = &sugov_tunables_free, };
/********************** cpufreq governor interface *********************/ @@ -693,12 +701,10 @@ static struct sugov_tunables *sugov_tunables_alloc(struct sugov_policy *sg_polic return tunables; }
-static void sugov_tunables_free(struct sugov_tunables *tunables) +static void sugov_clear_global_tunables(void) { if (!have_governor_per_policy()) global_tunables = NULL; - - kfree(tunables); }
static int sugov_init(struct cpufreq_policy *policy) @@ -761,7 +767,7 @@ static int sugov_init(struct cpufreq_policy *policy) fail: kobject_put(&tunables->attr_set.kobj); policy->governor_data = NULL; - sugov_tunables_free(tunables); + sugov_clear_global_tunables();
stop_kthread: sugov_kthread_stop(sg_policy); @@ -788,7 +794,7 @@ static void sugov_exit(struct cpufreq_policy *policy) count = gov_attr_set_put(&tunables->attr_set, &sg_policy->tunables_hook); policy->governor_data = NULL; if (!count) - sugov_tunables_free(tunables); + sugov_clear_global_tunables();
mutex_unlock(&global_tunables_lock);
From: Eric Dumazet edumazet@google.com
stable inclusion from linux-4.19.209 commit 0512a9aede6e4417c4fa6e0042a7ca8bc7e06b86
--------------------------------
[ Upstream commit 35306eb23814444bd4021f8a1c3047d3cb0c8b2b ]
Jann Horn reported that SO_PEERCRED and SO_PEERGROUPS implementations are racy, as af_unix can concurrently change sk_peer_pid and sk_peer_cred.
In order to fix this issue, this patch adds a new spinlock that needs to be used whenever these fields are read or written.
Jann also pointed out that l2cap_sock_get_peer_pid_cb() is currently reading sk->sk_peer_pid which makes no sense, as this field is only possibly set by AF_UNIX sockets. We will have to clean this in a separate patch. This could be done by reverting b48596d1dc25 "Bluetooth: L2CAP: Add get_peer_pid callback" or implementing what was truly expected.
Fixes: 109f6e39fa07 ("af_unix: Allow SO_PEERCRED to work across namespaces.") Signed-off-by: Eric Dumazet edumazet@google.com Reported-by: Jann Horn jannh@google.com Cc: Eric W. Biederman ebiederm@xmission.com Cc: Luiz Augusto von Dentz luiz.von.dentz@intel.com Cc: Marcel Holtmann marcel@holtmann.org Signed-off-by: David S. Miller davem@davemloft.net Signed-off-by: Sasha Levin sashal@kernel.org Acked-by: Jason Yan yanaijie@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- include/net/sock.h | 2 ++ net/core/sock.c | 32 ++++++++++++++++++++++++++------ net/unix/af_unix.c | 34 ++++++++++++++++++++++++++++------ 3 files changed, 56 insertions(+), 12 deletions(-)
diff --git a/include/net/sock.h b/include/net/sock.h index 2c4e0800e2dc6..b90b92882b3b8 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -472,8 +472,10 @@ struct sock { u32 sk_ack_backlog; u32 sk_max_ack_backlog; kuid_t sk_uid; + spinlock_t sk_peer_lock; struct pid *sk_peer_pid; const struct cred *sk_peer_cred; + long sk_rcvtimeo; ktime_t sk_stamp; #if BITS_PER_LONG==32 diff --git a/net/core/sock.c b/net/core/sock.c index a0c1d8531e86e..66a21528c7390 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1079,6 +1079,16 @@ int sock_setsockopt(struct socket *sock, int level, int optname, } EXPORT_SYMBOL(sock_setsockopt);
+static const struct cred *sk_get_peer_cred(struct sock *sk) +{ + const struct cred *cred; + + spin_lock(&sk->sk_peer_lock); + cred = get_cred(sk->sk_peer_cred); + spin_unlock(&sk->sk_peer_lock); + + return cred; +}
static void cred_to_ucred(struct pid *pid, const struct cred *cred, struct ucred *ucred) @@ -1253,7 +1263,11 @@ int sock_getsockopt(struct socket *sock, int level, int optname, struct ucred peercred; if (len > sizeof(peercred)) len = sizeof(peercred); + + spin_lock(&sk->sk_peer_lock); cred_to_ucred(sk->sk_peer_pid, sk->sk_peer_cred, &peercred); + spin_unlock(&sk->sk_peer_lock); + if (copy_to_user(optval, &peercred, len)) return -EFAULT; goto lenout; @@ -1261,20 +1275,23 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
case SO_PEERGROUPS: { + const struct cred *cred; int ret, n;
- if (!sk->sk_peer_cred) + cred = sk_get_peer_cred(sk); + if (!cred) return -ENODATA;
- n = sk->sk_peer_cred->group_info->ngroups; + n = cred->group_info->ngroups; if (len < n * sizeof(gid_t)) { len = n * sizeof(gid_t); + put_cred(cred); return put_user(len, optlen) ? -EFAULT : -ERANGE; } len = n * sizeof(gid_t);
- ret = groups_to_user((gid_t __user *)optval, - sk->sk_peer_cred->group_info); + ret = groups_to_user((gid_t __user *)optval, cred->group_info); + put_cred(cred); if (ret) return ret; goto lenout; @@ -1598,9 +1615,10 @@ static void __sk_destruct(struct rcu_head *head) sk->sk_frag.page = NULL; }
- if (sk->sk_peer_cred) - put_cred(sk->sk_peer_cred); + /* We do not need to acquire sk->sk_peer_lock, we are the last user. */ + put_cred(sk->sk_peer_cred); put_pid(sk->sk_peer_pid); + if (likely(sk->sk_net_refcnt)) put_net(sock_net(sk)); sk_prot_free(sk->sk_prot_creator, sk); @@ -2848,6 +2866,8 @@ void sock_init_data(struct socket *sock, struct sock *sk)
sk->sk_peer_pid = NULL; sk->sk_peer_cred = NULL; + spin_lock_init(&sk->sk_peer_lock); + sk->sk_write_pending = 0; sk->sk_rcvlowat = 1; sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT; diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index c293a558b0d4f..82279dbd2f627 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -600,20 +600,42 @@ static void unix_release_sock(struct sock *sk, int embrion)
static void init_peercred(struct sock *sk) { - put_pid(sk->sk_peer_pid); - if (sk->sk_peer_cred) - put_cred(sk->sk_peer_cred); + const struct cred *old_cred; + struct pid *old_pid; + + spin_lock(&sk->sk_peer_lock); + old_pid = sk->sk_peer_pid; + old_cred = sk->sk_peer_cred; sk->sk_peer_pid = get_pid(task_tgid(current)); sk->sk_peer_cred = get_current_cred(); + spin_unlock(&sk->sk_peer_lock); + + put_pid(old_pid); + put_cred(old_cred); }
static void copy_peercred(struct sock *sk, struct sock *peersk) { - put_pid(sk->sk_peer_pid); - if (sk->sk_peer_cred) - put_cred(sk->sk_peer_cred); + const struct cred *old_cred; + struct pid *old_pid; + + if (sk < peersk) { + spin_lock(&sk->sk_peer_lock); + spin_lock_nested(&peersk->sk_peer_lock, SINGLE_DEPTH_NESTING); + } else { + spin_lock(&peersk->sk_peer_lock); + spin_lock_nested(&sk->sk_peer_lock, SINGLE_DEPTH_NESTING); + } + old_pid = sk->sk_peer_pid; + old_cred = sk->sk_peer_cred; sk->sk_peer_pid = get_pid(peersk->sk_peer_pid); sk->sk_peer_cred = get_cred(peersk->sk_peer_cred); + + spin_unlock(&sk->sk_peer_lock); + spin_unlock(&peersk->sk_peer_lock); + + put_pid(old_pid); + put_cred(old_cred); }
static int unix_listen(struct socket *sock, int backlog)
From: Chen Jingwen chenjingwen6@huawei.com
stable inclusion from linux-4.19.209 commit 90b69bd32a137c751a64fdbff4c8521d6c6a172d
--------------------------------
commit 9b2f72cc0aa4bb444541bb87581c35b7508b37d3 upstream.
In commit b212921b13bd ("elf: don't use MAP_FIXED_NOREPLACE for elf executable mappings") we still leave MAP_FIXED_NOREPLACE in place for load_elf_interp.
Unfortunately, this will cause kernel to fail to start with:
1 (init): Uhuuh, elf segment at 00003ffff7ffd000 requested but the memory is mapped already Failed to execute /init (error -17)
The reason is that the elf interpreter (ld.so) has overlapping segments.
readelf -l ld-2.31.so Program Headers: Type Offset VirtAddr PhysAddr FileSiz MemSiz Flags Align LOAD 0x0000000000000000 0x0000000000000000 0x0000000000000000 0x000000000002c94c 0x000000000002c94c R E 0x10000 LOAD 0x000000000002dae0 0x000000000003dae0 0x000000000003dae0 0x00000000000021e8 0x0000000000002320 RW 0x10000 LOAD 0x000000000002fe00 0x000000000003fe00 0x000000000003fe00 0x00000000000011ac 0x0000000000001328 RW 0x10000
The reason for this problem is the same as described in commit ad55eac74f20 ("elf: enforce MAP_FIXED on overlaying elf segments").
Not only executable binaries, elf interpreters (e.g. ld.so) can have overlapping elf segments, so we better drop MAP_FIXED_NOREPLACE and go back to MAP_FIXED in load_elf_interp.
Fixes: 4ed28639519c ("fs, elf: drop MAP_FIXED usage from elf_map") Cc: stable@vger.kernel.org # v4.19 Cc: Andrew Morton akpm@linux-foundation.org Cc: Michal Hocko mhocko@suse.com Signed-off-by: Chen Jingwen chenjingwen6@huawei.com Signed-off-by: Linus Torvalds torvalds@linux-foundation.org Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Acked-by: Jason Yan yanaijie@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- fs/binfmt_elf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index 059da83112ff9..0fb4234331905 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -601,7 +601,7 @@ static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex, elf_prot |= PROT_EXEC; vaddr = eppnt->p_vaddr; if (interp_elf_ex->e_type == ET_EXEC || load_addr_set) - elf_type |= MAP_FIXED_NOREPLACE; + elf_type |= MAP_FIXED; else if (no_base && interp_elf_ex->e_type == ET_DYN) load_addr = -vaddr;
From: Eric Dumazet edumazet@google.com
stable inclusion from linux-4.19.209 commit f6a6cced386920fe3d24b79ea129827766ae4106
--------------------------------
commit a9f5970767d11eadc805d5283f202612c7ba1f59 upstream.
up->corkflag field can be read or written without any lock. Annotate accesses to avoid possible syzbot/KCSAN reports.
Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Eric Dumazet edumazet@google.com Signed-off-by: David S. Miller davem@davemloft.net Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Acked-by: Jason Yan yanaijie@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- net/ipv4/udp.c | 10 +++++----- net/ipv6/udp.c | 2 +- 2 files changed, 6 insertions(+), 6 deletions(-)
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 8200a2f542b06..6ef22a9966246 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -932,7 +932,7 @@ int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) __be16 dport; u8 tos; int err, is_udplite = IS_UDPLITE(sk); - int corkreq = up->corkflag || msg->msg_flags&MSG_MORE; + int corkreq = READ_ONCE(up->corkflag) || msg->msg_flags&MSG_MORE; int (*getfrag)(void *, char *, int, int, int, struct sk_buff *); struct sk_buff *skb; struct ip_options_data opt_copy; @@ -1240,7 +1240,7 @@ int udp_sendpage(struct sock *sk, struct page *page, int offset, }
up->len += size; - if (!(up->corkflag || (flags&MSG_MORE))) + if (!(READ_ONCE(up->corkflag) || (flags&MSG_MORE))) ret = udp_push_pending_frames(sk); if (!ret) ret = size; @@ -2465,9 +2465,9 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname, switch (optname) { case UDP_CORK: if (val != 0) { - up->corkflag = 1; + WRITE_ONCE(up->corkflag, 1); } else { - up->corkflag = 0; + WRITE_ONCE(up->corkflag, 0); lock_sock(sk); push_pending_frames(sk); release_sock(sk); @@ -2580,7 +2580,7 @@ int udp_lib_getsockopt(struct sock *sk, int level, int optname,
switch (optname) { case UDP_CORK: - val = up->corkflag; + val = READ_ONCE(up->corkflag); break;
case UDP_ENCAP: diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index b0553c0371e5a..581763d6df12d 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -1169,7 +1169,7 @@ int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) int addr_len = msg->msg_namelen; bool connected = false; int ulen = len; - int corkreq = up->corkflag || msg->msg_flags&MSG_MORE; + int corkreq = READ_ONCE(up->corkflag) || msg->msg_flags&MSG_MORE; int err; int is_udplite = IS_UDPLITE(sk); int (*getfrag)(void *, char *, int, int, int, struct sk_buff *);
From: NeilBrown neilb@suse.com
stable inclusion from linux-4.19.209 commit 7581fdf6c01991f03b1b51d524f1b7b64c920ce3
--------------------------------
commit f06bc03339ad4c1baa964a5f0606247ac1c3c50b upstream.
It is common practice for helpers like this to silently, accept a NULL pointer. get_rpccred() and put_rpccred() used by NFS act this way and using the same interface will ease the conversion for NFS, and simplify the resulting code.
Signed-off-by: NeilBrown neilb@suse.com Signed-off-by: Anna Schumaker Anna.Schumaker@Netapp.com Cc: Eric Dumazet edumazet@google.com Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Acked-by: Jason Yan yanaijie@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- include/linux/cred.h | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-)
diff --git a/include/linux/cred.h b/include/linux/cred.h index 1dc351d8548bf..4b081e4911c84 100644 --- a/include/linux/cred.h +++ b/include/linux/cred.h @@ -240,7 +240,7 @@ static inline struct cred *get_new_cred(struct cred *cred) * @cred: The credentials to reference * * Get a reference on the specified set of credentials. The caller must - * release the reference. + * release the reference. If %NULL is passed, it is returned with no action. * * This is used to deal with a committed set of credentials. Although the * pointer is const, this will temporarily discard the const and increment the @@ -251,6 +251,8 @@ static inline struct cred *get_new_cred(struct cred *cred) static inline const struct cred *get_cred(const struct cred *cred) { struct cred *nonconst_cred = (struct cred *) cred; + if (!cred) + return cred; validate_creds(cred); nonconst_cred->non_rcu = 0; return get_new_cred(nonconst_cred); @@ -261,7 +263,7 @@ static inline const struct cred *get_cred(const struct cred *cred) * @cred: The credentials to release * * Release a reference to a set of credentials, deleting them when the last ref - * is released. + * is released. If %NULL is passed, nothing is done. * * This takes a const pointer to a set of credentials because the credentials * on task_struct are attached by const pointers to prevent accidental @@ -271,9 +273,11 @@ static inline void put_cred(const struct cred *_cred) { struct cred *cred = (struct cred *) _cred;
- validate_creds(cred); - if (atomic_dec_and_test(&(cred)->usage)) - __put_cred(cred); + if (cred) { + validate_creds(cred); + if (atomic_dec_and_test(&(cred)->usage)) + __put_cred(cred); + } }
/**
From: Ming Lei ming.lei@redhat.com
stable inclusion from linux-4.19.210 commit eadb60bcc2005247d97dcb3becee57aba4024ff4
--------------------------------
[ Upstream commit 265dfe8ebbabae7959060bd1c3f75c2473b697ed ]
After a device is initialized via device_initialize() it should be freed via put_device(). sd_probe() currently gets this wrong, fix it up.
Link: https://lore.kernel.org/r/20210906090112.531442-1-ming.lei@redhat.com Reviewed-by: Bart Van Assche bvanassche@acm.org Reviewed-by: Christoph Hellwig hch@lst.de Signed-off-by: Ming Lei ming.lei@redhat.com Signed-off-by: Martin K. Petersen martin.petersen@oracle.com Signed-off-by: Sasha Levin sashal@kernel.org Acked-by: Jason Yan yanaijie@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- drivers/scsi/sd.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-)
diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index 4acce1f56b9d3..e674565ae4885 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -3450,15 +3450,16 @@ static int sd_probe(struct device *dev) }
device_initialize(&sdkp->dev); - sdkp->dev.parent = dev; + sdkp->dev.parent = get_device(dev); sdkp->dev.class = &sd_disk_class; dev_set_name(&sdkp->dev, "%s", dev_name(dev));
error = device_add(&sdkp->dev); - if (error) - goto out_free_index; + if (error) { + put_device(&sdkp->dev); + goto out; + }
- get_device(dev); dev_set_drvdata(dev, sdkp);
get_device(&sdkp->dev); /* prevent release before async_schedule */
From: Johan Almbladh johan.almbladh@anyfinetworks.com
stable inclusion from linux-4.19.211 commit 9b6b7054dd8e8f9750d5d7d88d35421dff3b16f4
--------------------------------
[ Upstream commit 79e3445b38e0cab94264a3894c0c3d57c930b97e ]
On ARM CPUs that lack div/mod instructions, ALU32 BPF_DIV and BPF_MOD are implemented using a call to a helper function. Before, the emitted code for those function calls failed to preserve caller-saved ARM registers. Since some of those registers happen to be mapped to BPF registers, it resulted in eBPF register values being overwritten.
This patch emits code to push and pop the remaining caller-saved ARM registers r2-r3 into the stack during the div/mod function call. ARM registers r0-r1 are used as arguments and return value, and those were already saved and restored correctly.
Fixes: 39c13c204bb1 ("arm: eBPF JIT compiler") Signed-off-by: Johan Almbladh johan.almbladh@anyfinetworks.com Signed-off-by: Daniel Borkmann daniel@iogearbox.net Signed-off-by: Sasha Levin sashal@kernel.org Acked-by: Jason Yan yanaijie@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- arch/arm/net/bpf_jit_32.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+)
diff --git a/arch/arm/net/bpf_jit_32.c b/arch/arm/net/bpf_jit_32.c index 5f7766ebdeb5a..8fcdb65a4c68b 100644 --- a/arch/arm/net/bpf_jit_32.c +++ b/arch/arm/net/bpf_jit_32.c @@ -39,6 +39,10 @@ * +-----+ * |RSVD | JIT scratchpad * current ARM_SP => +-----+ <= (BPF_FP - STACK_SIZE + SCRATCH_SIZE) + * | ... | caller-saved registers + * +-----+ + * | ... | arguments passed on stack + * ARM_SP during call => +-----| * | | * | ... | Function call stack * | | @@ -66,6 +70,12 @@ * * When popping registers off the stack at the end of a BPF function, we * reference them via the current ARM_FP register. + * + * Some eBPF operations are implemented via a call to a helper function. + * Such calls are "invisible" in the eBPF code, so it is up to the calling + * program to preserve any caller-saved ARM registers during the call. The + * JIT emits code to push and pop those registers onto the stack, immediately + * above the callee stack frame. */ #define CALLEE_MASK (1 << ARM_R4 | 1 << ARM_R5 | 1 << ARM_R6 | \ 1 << ARM_R7 | 1 << ARM_R8 | 1 << ARM_R9 | \ @@ -73,6 +83,8 @@ #define CALLEE_PUSH_MASK (CALLEE_MASK | 1 << ARM_LR) #define CALLEE_POP_MASK (CALLEE_MASK | 1 << ARM_PC)
+#define CALLER_MASK (1 << ARM_R0 | 1 << ARM_R1 | 1 << ARM_R2 | 1 << ARM_R3) + enum { /* Stack layout - these are offsets from (top of stack - 4) */ BPF_R2_HI, @@ -467,6 +479,7 @@ static inline int epilogue_offset(const struct jit_ctx *ctx)
static inline void emit_udivmod(u8 rd, u8 rm, u8 rn, struct jit_ctx *ctx, u8 op) { + const int exclude_mask = BIT(ARM_R0) | BIT(ARM_R1); const s8 *tmp = bpf2a32[TMP_REG_1];
#if __LINUX_ARM_ARCH__ == 7 @@ -498,11 +511,17 @@ static inline void emit_udivmod(u8 rd, u8 rm, u8 rn, struct jit_ctx *ctx, u8 op) emit(ARM_MOV_R(ARM_R0, rm), ctx); }
+ /* Push caller-saved registers on stack */ + emit(ARM_PUSH(CALLER_MASK & ~exclude_mask), ctx); + /* Call appropriate function */ emit_mov_i(ARM_IP, op == BPF_DIV ? (u32)jit_udiv32 : (u32)jit_mod32, ctx); emit_blx_r(ARM_IP, ctx);
+ /* Restore caller-saved registers from stack */ + emit(ARM_POP(CALLER_MASK & ~exclude_mask), ctx); + /* Save return value */ if (rd != ARM_R0) emit(ARM_MOV_R(rd, ARM_R0), ctx);
From: Pavel Skripkin paskripkin@gmail.com
stable inclusion from linux-4.19.211 commit 2397b9e118721292429fea8807a698e71b94795f
--------------------------------
[ Upstream commit ca6e11c337daf7925ff8a2aac8e84490a8691905 ]
Syzbot reported memory leak in MDIO bus interface, the problem was in wrong state logic.
MDIOBUS_ALLOCATED indicates 2 states: 1. Bus is only allocated 2. Bus allocated and __mdiobus_register() fails, but device_register() was called
In case of device_register() has been called we should call put_device() to correctly free the memory allocated for this device, but mdiobus_free() calls just kfree(dev) in case of MDIOBUS_ALLOCATED state
To avoid this behaviour we need to set bus->state to MDIOBUS_UNREGISTERED _before_ calling device_register(), because put_device() should be called even in case of device_register() failure.
Link: https://lore.kernel.org/netdev/YVMRWNDZDUOvQjHL@shell.armlinux.org.uk/ Fixes: 46abc02175b3 ("phylib: give mdio buses a device tree presence") Reported-and-tested-by: syzbot+398e7dc692ddbbb4cfec@syzkaller.appspotmail.com Reviewed-by: Dan Carpenter dan.carpenter@oracle.com Signed-off-by: Pavel Skripkin paskripkin@gmail.com Link: https://lore.kernel.org/r/eceae1429fbf8fa5c73dd2a0d39d525aa905074d.163302406... Signed-off-by: Jakub Kicinski kuba@kernel.org Signed-off-by: Sasha Levin sashal@kernel.org Acked-by: Jason Yan yanaijie@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- drivers/net/phy/mdio_bus.c | 7 +++++++ 1 file changed, 7 insertions(+)
diff --git a/drivers/net/phy/mdio_bus.c b/drivers/net/phy/mdio_bus.c index 5f25da904eba8..82cc1983c09aa 100644 --- a/drivers/net/phy/mdio_bus.c +++ b/drivers/net/phy/mdio_bus.c @@ -377,6 +377,13 @@ int __mdiobus_register(struct mii_bus *bus, struct module *owner) bus->dev.groups = NULL; dev_set_name(&bus->dev, "%s", bus->id);
+ /* We need to set state to MDIOBUS_UNREGISTERED to correctly release + * the device in mdiobus_free() + * + * State will be updated later in this function in case of success + */ + bus->state = MDIOBUS_UNREGISTERED; + err = device_register(&bus->dev); if (err) { pr_err("mii_bus %s failed to register\n", bus->id);
From: Eric Dumazet edumazet@google.com
stable inclusion from linux-4.19.211 commit d07098f45be868a9cdce6c616563c36c64dbbd87
--------------------------------
[ Upstream commit 560ee196fe9e5037e5015e2cdb14b3aecb1cd7dc ]
syzbot reported another NULL deref in fifo_set_limit() [1]
I could repro the issue with :
unshare -n tc qd add dev lo root handle 1:0 tbf limit 200000 burst 70000 rate 100Mbit tc qd replace dev lo parent 1:0 pfifo_fast tc qd change dev lo root handle 1:0 tbf limit 300000 burst 70000 rate 100Mbit
pfifo_fast does not have a change() operation. Make fifo_set_limit() more robust about this.
[1] BUG: kernel NULL pointer dereference, address: 0000000000000000 PGD 1cf99067 P4D 1cf99067 PUD 7ca49067 PMD 0 Oops: 0010 [#1] PREEMPT SMP KASAN CPU: 1 PID: 14443 Comm: syz-executor959 Not tainted 5.15.0-rc3-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 RIP: 0010:0x0 Code: Unable to access opcode bytes at RIP 0xffffffffffffffd6. RSP: 0018:ffffc9000e2f7310 EFLAGS: 00010246 RAX: dffffc0000000000 RBX: ffffffff8d6ecc00 RCX: 0000000000000000 RDX: 0000000000000000 RSI: ffff888024c27910 RDI: ffff888071e34000 RBP: ffff888071e34000 R08: 0000000000000001 R09: ffffffff8fcfb947 R10: 0000000000000001 R11: 0000000000000000 R12: ffff888024c27910 R13: ffff888071e34018 R14: 0000000000000000 R15: ffff88801ef74800 FS: 00007f321d897700(0000) GS:ffff8880b9d00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: ffffffffffffffd6 CR3: 00000000722c3000 CR4: 00000000003506e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: fifo_set_limit net/sched/sch_fifo.c:242 [inline] fifo_set_limit+0x198/0x210 net/sched/sch_fifo.c:227 tbf_change+0x6ec/0x16d0 net/sched/sch_tbf.c:418 qdisc_change net/sched/sch_api.c:1332 [inline] tc_modify_qdisc+0xd9a/0x1a60 net/sched/sch_api.c:1634 rtnetlink_rcv_msg+0x413/0xb80 net/core/rtnetlink.c:5572 netlink_rcv_skb+0x153/0x420 net/netlink/af_netlink.c:2504 netlink_unicast_kernel net/netlink/af_netlink.c:1314 [inline] netlink_unicast+0x533/0x7d0 net/netlink/af_netlink.c:1340 netlink_sendmsg+0x86d/0xdb0 net/netlink/af_netlink.c:1929 sock_sendmsg_nosec net/socket.c:704 [inline] sock_sendmsg+0xcf/0x120 net/socket.c:724 ____sys_sendmsg+0x6e8/0x810 net/socket.c:2409 ___sys_sendmsg+0xf3/0x170 net/socket.c:2463 __sys_sendmsg+0xe5/0x1b0 net/socket.c:2492 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x35/0xb0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x44/0xae
Fixes: fb0305ce1b03 ("net-sched: consolidate default fifo qdisc setup") Signed-off-by: Eric Dumazet edumazet@google.com Reported-by: syzbot syzkaller@googlegroups.com Link: https://lore.kernel.org/r/20210930212239.3430364-1-eric.dumazet@gmail.com Signed-off-by: Jakub Kicinski kuba@kernel.org Signed-off-by: Sasha Levin sashal@kernel.org Acked-by: Jason Yan yanaijie@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- net/sched/sch_fifo.c | 3 +++ 1 file changed, 3 insertions(+)
diff --git a/net/sched/sch_fifo.c b/net/sched/sch_fifo.c index 24893d3b5d229..bcd3ca97caea1 100644 --- a/net/sched/sch_fifo.c +++ b/net/sched/sch_fifo.c @@ -152,6 +152,9 @@ int fifo_set_limit(struct Qdisc *q, unsigned int limit) if (strncmp(q->ops->id + 1, "fifo", 4) != 0) return 0;
+ if (!q->ops->change) + return 0; + nla = kmalloc(nla_attr_size(sizeof(struct tc_fifo_qopt)), GFP_KERNEL); if (nla) { nla->nla_type = RTM_NEWQDISC;
From: Eric Dumazet edumazet@google.com
stable inclusion from linux-4.19.211 commit 198d4e60e932f59bc8555afb015b90e8c898ac1b
--------------------------------
[ Upstream commit dbe0b88064494b7bb6a9b2aa7e085b14a3112d44 ]
bridge_fill_linkxstats() is using nla_reserve_64bit().
We must use nla_total_size_64bit() instead of nla_total_size() for corresponding data structure.
Fixes: 1080ab95e3c7 ("net: bridge: add support for IGMP/MLD stats and export them via netlink") Signed-off-by: Eric Dumazet edumazet@google.com Cc: Nikolay Aleksandrov nikolay@nvidia.com Cc: Vivien Didelot vivien.didelot@gmail.com Acked-by: Nikolay Aleksandrov nikolay@nvidia.com Signed-off-by: David S. Miller davem@davemloft.net Signed-off-by: Sasha Levin sashal@kernel.org Acked-by: Jason Yan yanaijie@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- net/bridge/br_netlink.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index ec2b58a09f763..c00cb376263a0 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -1511,7 +1511,7 @@ static size_t br_get_linkxstats_size(const struct net_device *dev, int attr) }
return numvls * nla_total_size(sizeof(struct bridge_vlan_xstats)) + - nla_total_size(sizeof(struct br_mcast_stats)) + + nla_total_size_64bit(sizeof(struct br_mcast_stats)) + nla_total_size(0); }
From: Eric Dumazet edumazet@google.com
stable inclusion from linux-4.19.211 commit d4e931feea8225698311a5f9e423b17ef0979c3b
--------------------------------
[ Upstream commit 7707a4d01a648e4c655101a469c956cb11273655 ]
While existing code is correct, KCSAN is reporting a data-race in netlink_insert / netlink_sendmsg [1]
It is correct to read nlk->bound without a lock, as netlink_autobind() will acquire all needed locks.
[1] BUG: KCSAN: data-race in netlink_insert / netlink_sendmsg
write to 0xffff8881031c8b30 of 1 bytes by task 18752 on cpu 0: netlink_insert+0x5cc/0x7f0 net/netlink/af_netlink.c:597 netlink_autobind+0xa9/0x150 net/netlink/af_netlink.c:842 netlink_sendmsg+0x479/0x7c0 net/netlink/af_netlink.c:1892 sock_sendmsg_nosec net/socket.c:703 [inline] sock_sendmsg net/socket.c:723 [inline] ____sys_sendmsg+0x360/0x4d0 net/socket.c:2392 ___sys_sendmsg net/socket.c:2446 [inline] __sys_sendmsg+0x1ed/0x270 net/socket.c:2475 __do_sys_sendmsg net/socket.c:2484 [inline] __se_sys_sendmsg net/socket.c:2482 [inline] __x64_sys_sendmsg+0x42/0x50 net/socket.c:2482 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x3d/0x90 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x44/0xae
read to 0xffff8881031c8b30 of 1 bytes by task 18751 on cpu 1: netlink_sendmsg+0x270/0x7c0 net/netlink/af_netlink.c:1891 sock_sendmsg_nosec net/socket.c:703 [inline] sock_sendmsg net/socket.c:723 [inline] __sys_sendto+0x2a8/0x370 net/socket.c:2019 __do_sys_sendto net/socket.c:2031 [inline] __se_sys_sendto net/socket.c:2027 [inline] __x64_sys_sendto+0x74/0x90 net/socket.c:2027 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x3d/0x90 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x44/0xae
value changed: 0x00 -> 0x01
Reported by Kernel Concurrency Sanitizer on: CPU: 1 PID: 18751 Comm: syz-executor.0 Not tainted 5.14.0-rc1-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
Fixes: da314c9923fe ("netlink: Replace rhash_portid with bound") Signed-off-by: Eric Dumazet edumazet@google.com Reported-by: syzbot syzkaller@googlegroups.com Signed-off-by: David S. Miller davem@davemloft.net Signed-off-by: Sasha Levin sashal@kernel.org Conflicts: net/netlink/af_netlink.c [yyl: adjust context] Reviewed-by: Yue Haibing yuehaibing@huawei.com Acked-by: Jason Yan yanaijie@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- net/netlink/af_netlink.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-)
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 7c64ae48c0194..305248cfb5d2f 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -599,7 +599,10 @@ static int netlink_insert(struct sock *sk, u32 portid)
/* We need to ensure that the socket is hashed and visible. */ smp_wmb(); - nlk_sk(sk)->bound = portid; + /* Paired with lockless reads from netlink_bind(), + * netlink_connect() and netlink_sendmsg(). + */ + WRITE_ONCE(nlk_sk(sk)->bound, portid);
err: release_sock(sk); @@ -1017,7 +1020,8 @@ static int netlink_bind(struct socket *sock, struct sockaddr *addr, if (nlk->ngroups < BITS_PER_LONG) groups &= (1UL << nlk->ngroups) - 1;
- bound = nlk->bound; + /* Paired with WRITE_ONCE() in netlink_insert() */ + bound = READ_ONCE(nlk->bound); if (bound) { /* Ensure nlk->portid is up-to-date. */ smp_rmb(); @@ -1103,8 +1107,9 @@ static int netlink_connect(struct socket *sock, struct sockaddr *addr,
/* No need for barriers here as we return to user-space without * using any of the bound attributes. + * Paired with WRITE_ONCE() in netlink_insert(). */ - if (!nlk->bound) + if (!READ_ONCE(nlk->bound)) err = netlink_autobind(sock);
if (err == 0) { @@ -1869,7 +1874,8 @@ static int netlink_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) dst_group = nlk->dst_group; }
- if (!nlk->bound) { + /* Paired with WRITE_ONCE() in netlink_insert() */ + if (!READ_ONCE(nlk->bound)) { err = netlink_autobind(sock); if (err) goto out;
From: Eric Dumazet edumazet@google.com
stable inclusion from linux-4.19.211 commit 8cf8286ff286716ad61cef5d2a647faeb218d081
--------------------------------
[ Upstream commit d34367991933d28bd7331f67a759be9a8c474014 ]
rtnl_fill_statsinfo() is filling skb with one mandatory if_stats_msg structure.
nlmsg_put(skb, pid, seq, type, sizeof(struct if_stats_msg), flags);
But if_nlmsg_stats_size() never considered the needed storage.
This bug did not show up because alloc_skb(X) allocates skb with extra tailroom, because of added alignments. This could very well be changed in the future to have deterministic behavior.
Fixes: 10c9ead9f3c6 ("rtnetlink: add new RTM_GETSTATS message to dump link stats") Signed-off-by: Eric Dumazet edumazet@google.com Cc: Roopa Prabhu roopa@nvidia.com Acked-by: Roopa Prabhu roopa@nvidia.com Signed-off-by: David S. Miller davem@davemloft.net Signed-off-by: Sasha Levin sashal@kernel.org Acked-by: Jason Yan yanaijie@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- net/core/rtnetlink.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 055fd09ac1114..83de32e34bb55 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -4512,7 +4512,7 @@ static int rtnl_fill_statsinfo(struct sk_buff *skb, struct net_device *dev, static size_t if_nlmsg_stats_size(const struct net_device *dev, u32 filter_mask) { - size_t size = 0; + size_t size = NLMSG_ALIGN(sizeof(struct if_stats_msg));
if (stats_attr_valid(filter_mask, IFLA_STATS_LINK_64, 0)) size += nla_total_size_64bit(sizeof(struct rtnl_link_stats64));
From: Jeremy Sowden jeremy@azazel.net
stable inclusion from linux-4.19.212 commit d169887d824bff0101c7c7ec2292d250e8cfe95d
--------------------------------
[ Upstream commit 310e2d43c3ad429c1fba4b175806cf1f55ed73a6 ]
ip6tables only sets the `IP6T_F_PROTO` flag on a rule if a protocol is specified (`-p tcp`, for example). However, if the flag is not set, `ip6_packet_match` doesn't call `ipv6_find_hdr` for the skb, in which case the fragment offset is left uninitialized and a garbage value is passed to each matcher.
Signed-off-by: Jeremy Sowden jeremy@azazel.net Reviewed-by: Florian Westphal fw@strlen.de Signed-off-by: Pablo Neira Ayuso pablo@netfilter.org Signed-off-by: Sasha Levin sashal@kernel.org Acked-by: Jason Yan yanaijie@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- net/ipv6/netfilter/ip6_tables.c | 1 + 1 file changed, 1 insertion(+)
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index dd0c1073dc8ee..d93490ac82759 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -276,6 +276,7 @@ ip6t_do_table(struct sk_buff *skb, * things we don't know, ie. tcp syn flag or ports). If the * rule is also a fragment-specific rule, non-fragments won't * match it. */ + acpar.fragoff = 0; acpar.hotdrop = false; acpar.state = state;
From: 王贇 yun.wang@linux.alibaba.com
stable inclusion from linux-4.19.212 commit 65492b05f4a4a18300863f00c51481cd8b32c689
--------------------------------
[ Upstream commit b193e15ac69d56f35e1d8e2b5d16cbd47764d053 ]
We observed below report when playing with netlink sock:
UBSAN: shift-out-of-bounds in net/sched/sch_api.c:580:10 shift exponent 249 is too large for 32-bit type CPU: 0 PID: 685 Comm: a.out Not tainted Call Trace: dump_stack_lvl+0x8d/0xcf ubsan_epilogue+0xa/0x4e __ubsan_handle_shift_out_of_bounds+0x161/0x182 __qdisc_calculate_pkt_len+0xf0/0x190 __dev_queue_xmit+0x2ed/0x15b0
it seems like kernel won't check the stab log value passing from user, and will use the insane value later to calculate pkt_len.
This patch just add a check on the size/cell_log to avoid insane calculation.
Reported-by: Abaci abaci@linux.alibaba.com Signed-off-by: Michael Wang yun.wang@linux.alibaba.com Signed-off-by: David S. Miller davem@davemloft.net Signed-off-by: Sasha Levin sashal@kernel.org Acked-by: Jason Yan yanaijie@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- include/net/pkt_sched.h | 1 + net/sched/sch_api.c | 6 ++++++ 2 files changed, 7 insertions(+)
diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h index 5e99771a5dcc2..edca90ef3bdc4 100644 --- a/include/net/pkt_sched.h +++ b/include/net/pkt_sched.h @@ -11,6 +11,7 @@ #include <uapi/linux/pkt_sched.h>
#define DEFAULT_TX_QUEUE_LEN 1000 +#define STAB_SIZE_LOG_MAX 30
struct qdisc_walker { int stop; diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index 576f70e078bc6..2c9aa550763dd 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -498,6 +498,12 @@ static struct qdisc_size_table *qdisc_get_stab(struct nlattr *opt, return stab; }
+ if (s->size_log > STAB_SIZE_LOG_MAX || + s->cell_log > STAB_SIZE_LOG_MAX) { + NL_SET_ERR_MSG(extack, "Invalid logarithmic size of size table"); + return ERR_PTR(-EINVAL); + } + stab = kmalloc(sizeof(*stab) + tsize * sizeof(u16), GFP_KERNEL); if (!stab) return ERR_PTR(-ENOMEM);
From: Anand K Mistry amistry@google.com
stable inclusion from linux-4.19.212 commit 2e5484e212af8fe551eda63a42336bce37d9e1cd
--------------------------------
[ Upstream commit 02d029a41dc986e2d5a77ecca45803857b346829 ]
perf_init_event tries multiple init callbacks and does not reset the event state between tries. When x86_pmu_event_init runs, it unconditionally sets the destroy callback to hw_perf_event_destroy. On the next init attempt after x86_pmu_event_init, in perf_try_init_event, if the pmu's capabilities includes PERF_PMU_CAP_NO_EXCLUDE, the destroy callback will be run. However, if the next init didn't set the destroy callback, hw_perf_event_destroy will be run (since the callback wasn't reset).
Looking at other pmu init functions, the common pattern is to only set the destroy callback on a successful init. Resetting the callback on failure tries to replicate that pattern.
This was discovered after commit f11dd0d80555 ("perf/x86/amd/ibs: Extend PERF_PMU_CAP_NO_EXCLUDE to IBS Op") when the second (and only second) run of the perf tool after a reboot results in 0 samples being generated. The extra run of hw_perf_event_destroy results in active_events having an extra decrement on each perf run. The second run has active_events == 0 and every subsequent run has active_events < 0. When active_events == 0, the NMI handler will early-out and not record any samples.
Signed-off-by: Anand K Mistry amistry@google.com Signed-off-by: Peter Zijlstra (Intel) peterz@infradead.org Link: https://lkml.kernel.org/r/20210929170405.1.I078b98ee7727f9ae9d6df8262bad7e32... Signed-off-by: Sasha Levin sashal@kernel.org Acked-by: Jason Yan yanaijie@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- arch/x86/events/core.c | 1 + 1 file changed, 1 insertion(+)
diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c index 7239f6109cc0b..a8e97f7176699 100644 --- a/arch/x86/events/core.c +++ b/arch/x86/events/core.c @@ -2165,6 +2165,7 @@ static int x86_pmu_event_init(struct perf_event *event) if (err) { if (event->destroy) event->destroy(event); + event->destroy = NULL; }
if (READ_ONCE(x86_pmu.attr_rdpmc) &&
From: Peter Zijlstra peterz@infradead.org
stable inclusion from linux-4.19.212 commit dcc0e1a90a4a1bcc6837ed1c0496f164525eaac9
--------------------------------
[ Upstream commit 83d40a61046f73103b4e5d8f1310261487ff63b0 ]
vmlinux.o: warning: objtool: check_preemption_disabled()+0x81: call to is_percpu_thread() leaves .noinstr.text section
Reported-by: Stephen Rothwell sfr@canb.auug.org.au Signed-off-by: Peter Zijlstra (Intel) peterz@infradead.org Link: https://lkml.kernel.org/r/20210928084218.063371959@infradead.org Signed-off-by: Sasha Levin sashal@kernel.org Acked-by: Jason Yan yanaijie@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- include/linux/sched.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/include/linux/sched.h b/include/linux/sched.h index 2d864748d696f..945a57ecd9a51 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1475,7 +1475,7 @@ extern struct pid *cad_pid; #define tsk_used_math(p) ((p)->flags & PF_USED_MATH) #define used_math() tsk_used_math(current)
-static inline bool is_percpu_thread(void) +static __always_inline bool is_percpu_thread(void) { #ifdef CONFIG_SMP return (current->flags & PF_NO_SETAFFINITY) &&
From: James Morse james.morse@arm.com
stable inclusion from linux-4.19.213 commit f961a224e73138a93f4df8503ecd8c6928291c53
--------------------------------
commit 64e87d4bd3201bf8a4685083ee4daf5c0d001452 upstream.
domain_add_cpu() is called whenever a CPU is brought online. The earlier call to domain_setup_ctrlval() allocates the control value arrays.
If domain_setup_mon_state() fails, the control value arrays are not freed.
Add the missing kfree() calls.
Fixes: 1bd2a63b4f0de ("x86/intel_rdt/mba_sc: Add initialization support") Fixes: edf6fa1c4a951 ("x86/intel_rdt/cqm: Add RMID (Resource monitoring ID) management") Signed-off-by: James Morse james.morse@arm.com Signed-off-by: Borislav Petkov bp@suse.de Acked-by: Reinette Chatre reinette.chatre@intel.com Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20210917165958.28313-1-james.morse@arm.com Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Acked-by: Jason Yan yanaijie@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- arch/x86/kernel/cpu/intel_rdt.c | 2 ++ 1 file changed, 2 insertions(+)
diff --git a/arch/x86/kernel/cpu/intel_rdt.c b/arch/x86/kernel/cpu/intel_rdt.c index b99a04da70f61..f955847a9527f 100644 --- a/arch/x86/kernel/cpu/intel_rdt.c +++ b/arch/x86/kernel/cpu/intel_rdt.c @@ -561,6 +561,8 @@ static void domain_add_cpu(int cpu, struct rdt_resource *r) }
if (r->mon_capable && domain_setup_mon_state(r, d)) { + kfree(d->ctrl_val); + kfree(d->mbps_val); kfree(d); return; }
From: Jackie Liu liuyun01@kylinos.cn
stable inclusion from linux-4.19.213 commit 420824f45900b25739f01fa01c57569f8f59f55c
--------------------------------
commit 596143e3aec35c93508d6b7a05ddc999ee209b61 upstream.
Fix modpost Section mismatch error in next_platform_timer().
[...] WARNING: modpost: vmlinux.o(.text.unlikely+0x26e60): Section mismatch in reference from the function next_platform_timer() to the variable .init.data:acpi_gtdt_desc The function next_platform_timer() references the variable __initdata acpi_gtdt_desc. This is often because next_platform_timer lacks a __initdata annotation or the annotation of acpi_gtdt_desc is wrong.
WARNING: modpost: vmlinux.o(.text.unlikely+0x26e64): Section mismatch in reference from the function next_platform_timer() to the variable .init.data:acpi_gtdt_desc The function next_platform_timer() references the variable __initdata acpi_gtdt_desc. This is often because next_platform_timer lacks a __initdata annotation or the annotation of acpi_gtdt_desc is wrong.
ERROR: modpost: Section mismatches detected. Set CONFIG_SECTION_MISMATCH_WARN_ONLY=y to allow them. make[1]: *** [scripts/Makefile.modpost:59: vmlinux.symvers] Error 1 make[1]: *** Deleting file 'vmlinux.symvers' make: *** [Makefile:1176: vmlinux] Error 2 [...]
Fixes: a712c3ed9b8a ("acpi/arm64: Add memory-mapped timer support in GTDT driver") Signed-off-by: Jackie Liu liuyun01@kylinos.cn Acked-by: Hanjun Guo guohanjun@huawei.com Link: https://lore.kernel.org/r/20210823092526.2407526-1-liu.yun@linux.dev Signed-off-by: Catalin Marinas catalin.marinas@arm.com Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Acked-by: Jason Yan yanaijie@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- drivers/acpi/arm64/gtdt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/acpi/arm64/gtdt.c b/drivers/acpi/arm64/gtdt.c index c39b36c558d6f..7a181a8a9bf04 100644 --- a/drivers/acpi/arm64/gtdt.c +++ b/drivers/acpi/arm64/gtdt.c @@ -39,7 +39,7 @@ struct acpi_gtdt_descriptor {
static struct acpi_gtdt_descriptor acpi_gtdt_desc __initdata;
-static inline void *next_platform_timer(void *platform_timer) +static inline __init void *next_platform_timer(void *platform_timer) { struct acpi_gtdt_header *gh = platform_timer;
From: Gerald Schaefer gerald.schaefer@linux.ibm.com
stable inclusion from linux-4.19.214 commit db9aadf3bb8d7c1def4813f142cba9cf94be2999
--------------------------------
[ Upstream commit 293d92cbbd2418ca2ba43fed07f1b92e884d1c77 ]
The following warning occurred sporadically on s390: DMA-API: nvme 0006:00:00.0: device driver maps memory from kernel text or rodata [addr=0000000048cc5e2f] [len=131072] WARNING: CPU: 4 PID: 825 at kernel/dma/debug.c:1083 check_for_illegal_area+0xa8/0x138
It is a false-positive warning, due to broken logic in debug_dma_map_sg(). check_for_illegal_area() checks for overlay of sg elements with kernel text or rodata. It is called with sg_dma_len(s) instead of s->length as parameter. After the call to ->map_sg(), sg_dma_len() will contain the length of possibly combined sg elements in the DMA address space, and not the individual sg element length, which would be s->length.
The check will then use the physical start address of an sg element, and add the DMA length for the overlap check, which could result in the false warning, because the DMA length can be larger than the actual single sg element length.
In addition, the call to check_for_illegal_area() happens in the iteration over mapped_ents, which will not include all individual sg elements if any of them were combined in ->map_sg().
Fix this by using s->length instead of sg_dma_len(s). Also put the call to check_for_illegal_area() in a separate loop, iterating over all the individual sg elements ("nents" instead of "mapped_ents").
While at it, as suggested by Robin Murphy, also move check_for_stack() inside the new loop, as it is similarly concerned with validating the individual sg elements.
Link: https://lore.kernel.org/lkml/20210705185252.4074653-1-gerald.schaefer@linux.... Fixes: 884d05970bfb ("dma-debug: use sg_dma_len accessor") Signed-off-by: Gerald Schaefer gerald.schaefer@linux.ibm.com Signed-off-by: Christoph Hellwig hch@lst.de Signed-off-by: Sasha Levin sashal@kernel.org Acked-by: Jason Yan yanaijie@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- kernel/dma/debug.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-)
diff --git a/kernel/dma/debug.c b/kernel/dma/debug.c index 3a2397444076e..1c82b0d25498f 100644 --- a/kernel/dma/debug.c +++ b/kernel/dma/debug.c @@ -1422,6 +1422,12 @@ void debug_dma_map_sg(struct device *dev, struct scatterlist *sg, if (unlikely(dma_debug_disabled())) return;
+ for_each_sg(sg, s, nents, i) { + check_for_stack(dev, sg_page(s), s->offset); + if (!PageHighMem(sg_page(s))) + check_for_illegal_area(dev, sg_virt(s), s->length); + } + for_each_sg(sg, s, mapped_ents, i) { entry = dma_entry_alloc(); if (!entry) @@ -1437,12 +1443,6 @@ void debug_dma_map_sg(struct device *dev, struct scatterlist *sg, entry->sg_call_ents = nents; entry->sg_mapped_ents = mapped_ents;
- check_for_stack(dev, sg_page(s), s->offset); - - if (!PageHighMem(sg_page(s))) { - check_for_illegal_area(dev, sg_virt(s), sg_dma_len(s)); - } - check_sg_segment(dev, s);
add_dma_entry(entry);
From: "Matthew Wilcox (Oracle)" willy@infradead.org
stable inclusion from linux-4.19.214 commit c1ba20965b59c2eeb54a845ca5cab4fc7bcf9735
--------------------------------
commit 032146cda85566abcd1c4884d9d23e4e30a07e9a upstream.
If we open a file without read access and then pass the fd to a syscall whose implementation calls kernel_read_file_from_fd(), we get a warning from __kernel_read():
if (WARN_ON_ONCE(!(file->f_mode & FMODE_READ)))
This currently affects both finit_module() and kexec_file_load(), but it could affect other syscalls in the future.
Link: https://lkml.kernel.org/r/20211007220110.600005-1-willy@infradead.org Fixes: b844f0ecbc56 ("vfs: define kernel_copy_file_from_fd()") Signed-off-by: Matthew Wilcox (Oracle) willy@infradead.org Reported-by: Hao Sun sunhao.th@gmail.com Reviewed-by: Kees Cook keescook@chromium.org Acked-by: Christian Brauner christian.brauner@ubuntu.com Cc: Al Viro viro@zeniv.linux.org.uk Cc: Mimi Zohar zohar@linux.ibm.com Cc: stable@vger.kernel.org Signed-off-by: Andrew Morton akpm@linux-foundation.org Signed-off-by: Linus Torvalds torvalds@linux-foundation.org Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Acked-by: Jason Yan yanaijie@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- fs/exec.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/fs/exec.c b/fs/exec.c index 07b536eb6c446..426842c38ecb6 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -984,7 +984,7 @@ int kernel_read_file_from_fd(int fd, void **buf, loff_t *size, loff_t max_size, struct fd f = fdget(fd); int ret = -EBADF;
- if (!f.file) + if (!f.file || !(f.file->f_mode & FMODE_READ)) goto out;
ret = kernel_read_file(f.file, buf, size, max_size, id);
From: Miaohe Lin linmiaohe@huawei.com
stable inclusion from linux-4.19.214 commit 21e23e4c02fc9aa1f4b037f30e47d215d1a27f21
--------------------------------
commit 899447f669da76cc3605665e1a95ee877bc464cc upstream.
If object's reuse is delayed, it will be excluded from the reconstructed freelist. But we forgot to adjust the cnt accordingly. So there will be a mismatch between reconstructed freelist depth and cnt. This will lead to free_debug_processing() complaining about freelist count or a incorrect slub inuse count.
Link: https://lkml.kernel.org/r/20210916123920.48704-3-linmiaohe@huawei.com Fixes: c3895391df38 ("kasan, slub: fix handling of kasan_slab_free hook") Signed-off-by: Miaohe Lin linmiaohe@huawei.com Reviewed-by: Vlastimil Babka vbabka@suse.cz Cc: Andrey Konovalov andreyknvl@gmail.com Cc: Andrey Ryabinin ryabinin.a.a@gmail.com Cc: Bharata B Rao bharata@linux.ibm.com Cc: Christoph Lameter cl@linux.com Cc: David Rientjes rientjes@google.com Cc: Faiyaz Mohammed faiyazm@codeaurora.org Cc: Greg Kroah-Hartman gregkh@linuxfoundation.org Cc: Joonsoo Kim iamjoonsoo.kim@lge.com Cc: Kees Cook keescook@chromium.org Cc: Pekka Enberg penberg@kernel.org Cc: Roman Gushchin guro@fb.com Cc: stable@vger.kernel.org Signed-off-by: Andrew Morton akpm@linux-foundation.org Signed-off-by: Linus Torvalds torvalds@linux-foundation.org Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Acked-by: Jason Yan yanaijie@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- mm/slub.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-)
diff --git a/mm/slub.c b/mm/slub.c index 04aa35f650877..7b5630ca92746 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -1398,7 +1398,8 @@ static __always_inline bool slab_free_hook(struct kmem_cache *s, void *x) }
static inline bool slab_free_freelist_hook(struct kmem_cache *s, - void **head, void **tail) + void **head, void **tail, + int *cnt) { /* * Compiler cannot detect this function can be removed if slab_free_hook() @@ -1427,6 +1428,12 @@ static inline bool slab_free_freelist_hook(struct kmem_cache *s, *head = object; if (!*tail) *tail = object; + } else { + /* + * Adjust the reconstructed freelist depth + * accordingly if object's reuse is delayed. + */ + --(*cnt); } } while (object != old_tail);
@@ -2994,7 +3001,7 @@ static __always_inline void slab_free(struct kmem_cache *s, struct page *page, * With KASAN enabled slab_free_freelist_hook modifies the freelist * to remove objects, whose reuse must be delayed. */ - if (slab_free_freelist_hook(s, &head, &tail)) + if (slab_free_freelist_hook(s, &head, &tail, &cnt)) do_slab_free(s, page, head, tail, cnt, addr); }
From: Vegard Nossum vegard.nossum@gmail.com
stable inclusion from linux-4.19.214 commit c2eec4bd849f754e6e0e89652256b2034943319a
--------------------------------
commit 77076934afdcd46516caf18ed88b2f88025c9ddb upstream.
This option, NF_CONNTRACK_SECMARK, is a bool, so it can never be 'm'.
Fixes: 33b8e77605620 ("[NETFILTER]: Add CONFIG_NETFILTER_ADVANCED option") Signed-off-by: Vegard Nossum vegard.nossum@oracle.com Signed-off-by: Pablo Neira Ayuso pablo@netfilter.org Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Acked-by: Jason Yan yanaijie@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- net/netfilter/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index e0fb56d67d42f..56cddadb65d0c 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -93,7 +93,7 @@ config NF_CONNTRACK_MARK config NF_CONNTRACK_SECMARK bool 'Connection tracking security mark support' depends on NETWORK_SECMARK - default m if NETFILTER_ADVANCED=n + default y if NETFILTER_ADVANCED=n help This option enables security markings to be applied to connections. Typically they are copied to connections from
From: "Steven Rostedt (VMware)" rostedt@goodmis.org
stable inclusion from linux-4.19.214 commit 3de1ed125fc4c35bf7abb08260646100a6dcb04e
--------------------------------
commit ed65df63a39a3f6ed04f7258de8b6789e5021c18 upstream.
While writing an email explaining the "bit = 0" logic for a discussion on making ftrace_test_recursion_trylock() disable preemption, I discovered a path that makes the "not do the logic if bit is zero" unsafe.
The recursion logic is done in hot paths like the function tracer. Thus, any code executed causes noticeable overhead. Thus, tricks are done to try to limit the amount of code executed. This included the recursion testing logic.
Having recursion testing is important, as there are many paths that can end up in an infinite recursion cycle when tracing every function in the kernel. Thus protection is needed to prevent that from happening.
Because it is OK to recurse due to different running context levels (e.g. an interrupt preempts a trace, and then a trace occurs in the interrupt handler), a set of bits are used to know which context one is in (normal, softirq, irq and NMI). If a recursion occurs in the same level, it is prevented*.
Then there are infrastructure levels of recursion as well. When more than one callback is attached to the same function to trace, it calls a loop function to iterate over all the callbacks. Both the callbacks and the loop function have recursion protection. The callbacks use the "ftrace_test_recursion_trylock()" which has a "function" set of context bits to test, and the loop function calls the internal trace_test_and_set_recursion() directly, with an "internal" set of bits.
If an architecture does not implement all the features supported by ftrace then the callbacks are never called directly, and the loop function is called instead, which will implement the features of ftrace.
Since both the loop function and the callbacks do recursion protection, it was seemed unnecessary to do it in both locations. Thus, a trick was made to have the internal set of recursion bits at a more significant bit location than the function bits. Then, if any of the higher bits were set, the logic of the function bits could be skipped, as any new recursion would first have to go through the loop function.
This is true for architectures that do not support all the ftrace features, because all functions being traced must first go through the loop function before going to the callbacks. But this is not true for architectures that support all the ftrace features. That's because the loop function could be called due to two callbacks attached to the same function, but then a recursion function inside the callback could be called that does not share any other callback, and it will be called directly.
i.e.
traced_function_1: [ more than one callback tracing it ] call loop_func
loop_func: trace_recursion set internal bit call callback
callback: trace_recursion [ skipped because internal bit is set, return 0 ] call traced_function_2
traced_function_2: [ only traced by above callback ] call callback
callback: trace_recursion [ skipped because internal bit is set, return 0 ] call traced_function_2
[ wash, rinse, repeat, BOOM! out of shampoo! ]
Thus, the "bit == 0 skip" trick is not safe, unless the loop function is call for all functions.
Since we want to encourage architectures to implement all ftrace features, having them slow down due to this extra logic may encourage the maintainers to update to the latest ftrace features. And because this logic is only safe for them, remove it completely.
[*] There is on layer of recursion that is allowed, and that is to allow for the transition between interrupt context (normal -> softirq -> irq -> NMI), because a trace may occur before the context update is visible to the trace recursion logic.
Link: https://lore.kernel.org/all/609b565a-ed6e-a1da-f025-166691b5d994@linux.aliba... Link: https://lkml.kernel.org/r/20211018154412.09fcad3c@gandalf.local.home
Cc: Linus Torvalds torvalds@linux-foundation.org Cc: Petr Mladek pmladek@suse.com Cc: Ingo Molnar mingo@redhat.com Cc: "James E.J. Bottomley" James.Bottomley@hansenpartnership.com Cc: Helge Deller deller@gmx.de Cc: Michael Ellerman mpe@ellerman.id.au Cc: Benjamin Herrenschmidt benh@kernel.crashing.org Cc: Paul Mackerras paulus@samba.org Cc: Paul Walmsley paul.walmsley@sifive.com Cc: Palmer Dabbelt palmer@dabbelt.com Cc: Albert Ou aou@eecs.berkeley.edu Cc: Thomas Gleixner tglx@linutronix.de Cc: Borislav Petkov bp@alien8.de Cc: "H. Peter Anvin" hpa@zytor.com Cc: Josh Poimboeuf jpoimboe@redhat.com Cc: Jiri Kosina jikos@kernel.org Cc: Miroslav Benes mbenes@suse.cz Cc: Joe Lawrence joe.lawrence@redhat.com Cc: Colin Ian King colin.king@canonical.com Cc: Masami Hiramatsu mhiramat@kernel.org Cc: "Peter Zijlstra (Intel)" peterz@infradead.org Cc: Nicholas Piggin npiggin@gmail.com Cc: Jisheng Zhang jszhang@kernel.org Cc: =?utf-8?b?546L6LSH?= yun.wang@linux.alibaba.com Cc: Guo Ren guoren@kernel.org Cc: stable@vger.kernel.org Fixes: edc15cafcbfa3 ("tracing: Avoid unnecessary multiple recursion checks") Signed-off-by: Steven Rostedt (VMware) rostedt@goodmis.org Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Acked-by: Jason Yan yanaijie@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- kernel/trace/ftrace.c | 4 +-- kernel/trace/trace.h | 64 +++++++++++----------------------- kernel/trace/trace_functions.c | 2 +- 3 files changed, 23 insertions(+), 47 deletions(-)
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 0da3460fac514..e9d112972c7fd 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -6320,7 +6320,7 @@ __ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip, struct ftrace_ops *op; int bit;
- bit = trace_test_and_set_recursion(TRACE_LIST_START, TRACE_LIST_MAX); + bit = trace_test_and_set_recursion(TRACE_LIST_START); if (bit < 0) return;
@@ -6392,7 +6392,7 @@ static void ftrace_ops_assist_func(unsigned long ip, unsigned long parent_ip, { int bit;
- bit = trace_test_and_set_recursion(TRACE_LIST_START, TRACE_LIST_MAX); + bit = trace_test_and_set_recursion(TRACE_LIST_START); if (bit < 0) return;
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 6b83264e1bead..0d27bbab52c95 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -467,23 +467,8 @@ struct tracer { * When function tracing occurs, the following steps are made: * If arch does not support a ftrace feature: * call internal function (uses INTERNAL bits) which calls... - * If callback is registered to the "global" list, the list - * function is called and recursion checks the GLOBAL bits. - * then this function calls... * The function callback, which can use the FTRACE bits to * check for recursion. - * - * Now if the arch does not suppport a feature, and it calls - * the global list function which calls the ftrace callback - * all three of these steps will do a recursion protection. - * There's no reason to do one if the previous caller already - * did. The recursion that we are protecting against will - * go through the same steps again. - * - * To prevent the multiple recursion checks, if a recursion - * bit is set that is higher than the MAX bit of the current - * check, then we know that the check was made by the previous - * caller, and we can skip the current check. */ enum { TRACE_BUFFER_BIT, @@ -496,12 +481,14 @@ enum { TRACE_FTRACE_NMI_BIT, TRACE_FTRACE_IRQ_BIT, TRACE_FTRACE_SIRQ_BIT, + TRACE_FTRACE_TRANSITION_BIT,
- /* INTERNAL_BITs must be greater than FTRACE_BITs */ + /* Internal use recursion bits */ TRACE_INTERNAL_BIT, TRACE_INTERNAL_NMI_BIT, TRACE_INTERNAL_IRQ_BIT, TRACE_INTERNAL_SIRQ_BIT, + TRACE_INTERNAL_TRANSITION_BIT,
TRACE_BRANCH_BIT, /* @@ -534,12 +521,6 @@ enum {
TRACE_GRAPH_DEPTH_START_BIT, TRACE_GRAPH_DEPTH_END_BIT, - - /* - * When transitioning between context, the preempt_count() may - * not be correct. Allow for a single recursion to cover this case. - */ - TRACE_TRANSITION_BIT, };
#define trace_recursion_set(bit) do { (current)->trace_recursion |= (1<<(bit)); } while (0) @@ -559,12 +540,18 @@ enum { #define TRACE_CONTEXT_BITS 4
#define TRACE_FTRACE_START TRACE_FTRACE_BIT -#define TRACE_FTRACE_MAX ((1 << (TRACE_FTRACE_START + TRACE_CONTEXT_BITS)) - 1)
#define TRACE_LIST_START TRACE_INTERNAL_BIT -#define TRACE_LIST_MAX ((1 << (TRACE_LIST_START + TRACE_CONTEXT_BITS)) - 1)
-#define TRACE_CONTEXT_MASK TRACE_LIST_MAX +#define TRACE_CONTEXT_MASK ((1 << (TRACE_LIST_START + TRACE_CONTEXT_BITS)) - 1) + +enum { + TRACE_CTX_NMI, + TRACE_CTX_IRQ, + TRACE_CTX_SOFTIRQ, + TRACE_CTX_NORMAL, + TRACE_CTX_TRANSITION, +};
static __always_inline int trace_get_context_bit(void) { @@ -572,59 +559,48 @@ static __always_inline int trace_get_context_bit(void)
if (in_interrupt()) { if (in_nmi()) - bit = 0; + bit = TRACE_CTX_NMI;
else if (in_irq()) - bit = 1; + bit = TRACE_CTX_IRQ; else - bit = 2; + bit = TRACE_CTX_SOFTIRQ; } else - bit = 3; + bit = TRACE_CTX_NORMAL;
return bit; }
-static __always_inline int trace_test_and_set_recursion(int start, int max) +static __always_inline int trace_test_and_set_recursion(int start) { unsigned int val = current->trace_recursion; int bit;
- /* A previous recursion check was made */ - if ((val & TRACE_CONTEXT_MASK) > max) - return 0; - bit = trace_get_context_bit() + start; if (unlikely(val & (1 << bit))) { /* * It could be that preempt_count has not been updated during * a switch between contexts. Allow for a single recursion. */ - bit = TRACE_TRANSITION_BIT; + bit = start + TRACE_CTX_TRANSITION; if (trace_recursion_test(bit)) return -1; trace_recursion_set(bit); barrier(); - return bit + 1; + return bit; }
- /* Normal check passed, clear the transition to allow it again */ - trace_recursion_clear(TRACE_TRANSITION_BIT); - val |= 1 << bit; current->trace_recursion = val; barrier();
- return bit + 1; + return bit; }
static __always_inline void trace_clear_recursion(int bit) { unsigned int val = current->trace_recursion;
- if (!bit) - return; - - bit--; bit = 1 << bit; val &= ~bit;
diff --git a/kernel/trace/trace_functions.c b/kernel/trace/trace_functions.c index b611cd36e22db..4e8acfe3437fd 100644 --- a/kernel/trace/trace_functions.c +++ b/kernel/trace/trace_functions.c @@ -138,7 +138,7 @@ function_trace_call(unsigned long ip, unsigned long parent_ip, pc = preempt_count(); preempt_disable_notrace();
- bit = trace_test_and_set_recursion(TRACE_FTRACE_START, TRACE_FTRACE_MAX); + bit = trace_test_and_set_recursion(TRACE_FTRACE_START); if (bit < 0) goto out;