[PATCH OLK-6.6 0/5] CVE-2025-39948
Jacob Keller (1): ice: fix Rx page leak on multi-buffer frames Larysa Zaremba (1): ice: Introduce ice_xdp_buff Maciej Fijalkowski (2): ice: gather page_count()'s of each frag right before XDP prog call ice: stop storing XDP verdict within ice_rx_buf Michal Kubiak (1): ice: fix incorrect counter for buffer allocation failures drivers/net/ethernet/intel/ice/ice_txrx.c | 122 ++++++++++++++++---------- drivers/net/ethernet/intel/ice/ice_txrx.h | 20 +++-- drivers/net/ethernet/intel/ice/ice_txrx_lib.h | 53 +++-------- 3 files changed, 99 insertions(+), 96 deletions(-) -- 2.9.5
反馈: 您发送到kernel@openeuler.org的补丁/补丁集,已成功转换为PR! PR链接地址: https://atomgit.com/openeuler/kernel/merge_requests/20702 邮件列表地址:https://mailweb.openeuler.org/archives/list/kernel@openeuler.org/message/E3M... FeedBack: The patch(es) which you have sent to kernel@openeuler.org mailing list has been converted to a pull request successfully! Pull request link: https://atomgit.com/openeuler/kernel/merge_requests/20702 Mailing list address: https://mailweb.openeuler.org/archives/list/kernel@openeuler.org/message/E3M...
From: Larysa Zaremba <larysa.zaremba@intel.com> stable inclusion from stable-v6.6.104 commit 40a9f217cde125996fbd14e9384d46aa3610230d category: bugfix bugzilla: https://atomgit.com/src-openeuler/kernel/issues/8195 CVE: CVE-2025-39948 Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id=... -------------------------------- [ Upstream commit d951c14ad237b087f0d1377c44932fcc0b322c40 ] In order to use XDP hints via kfuncs we need to put RX descriptor and miscellaneous data next to xdp_buff. Same as in hints implementations in other drivers, we achieve this through putting xdp_buff into a child structure. Currently, xdp_buff is stored in the ring structure, so replace it with union that includes child structure. This way enough memory is available while existing XDP code remains isolated from hints. Minimum size of the new child structure (ice_xdp_buff) is exactly 64 bytes (single cache line). To place it at the start of a cache line, move 'next' field from CL1 to CL4, as it isn't used often. This still leaves 192 bits available in CL3 for packet context extensions. Signed-off-by: Larysa Zaremba <larysa.zaremba@intel.com> Reviewed-by: Maciej Fijalkowski <maciej.fijalkowski@intel.com> Link: https://lore.kernel.org/r/20231205210847.28460-5-larysa.zaremba@intel.com Signed-off-by: Alexei Starovoitov <ast@kernel.org> Stable-dep-of: b1a0c977c6f1 ("ice: fix incorrect counter for buffer allocation failures") Signed-off-by: Sasha Levin <sashal@kernel.org> Signed-off-by: Zhang Changzhong <zhangchangzhong@huawei.com> --- drivers/net/ethernet/intel/ice/ice_txrx.c | 7 +++++-- drivers/net/ethernet/intel/ice/ice_txrx.h | 18 +++++++++++++++--- drivers/net/ethernet/intel/ice/ice_txrx_lib.h | 10 ++++++++++ 3 files changed, 30 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.c b/drivers/net/ethernet/intel/ice/ice_txrx.c index f5023ac..46f10f3 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx.c +++ b/drivers/net/ethernet/intel/ice/ice_txrx.c @@ -528,13 +528,14 @@ int ice_setup_rx_ring(struct ice_rx_ring *rx_ring) * @xdp_prog: XDP program to run * @xdp_ring: ring to be used for XDP_TX action * @rx_buf: Rx buffer to store the XDP action + * @eop_desc: Last descriptor in packet to read metadata from * * Returns any of ICE_XDP_{PASS, CONSUMED, TX, REDIR} */ static void ice_run_xdp(struct ice_rx_ring *rx_ring, struct xdp_buff *xdp, struct bpf_prog *xdp_prog, struct ice_tx_ring *xdp_ring, - struct ice_rx_buf *rx_buf) + struct ice_rx_buf *rx_buf, union ice_32b_rx_flex_desc *eop_desc) { unsigned int ret = ICE_XDP_PASS; u32 act; @@ -542,6 +543,8 @@ ice_run_xdp(struct ice_rx_ring *rx_ring, struct xdp_buff *xdp, if (!xdp_prog) goto exit; + ice_xdp_meta_set_desc(xdp, eop_desc); + act = bpf_prog_run_xdp(xdp_prog, xdp); switch (act) { case XDP_PASS: @@ -1240,7 +1243,7 @@ int ice_clean_rx_irq(struct ice_rx_ring *rx_ring, int budget) if (ice_is_non_eop(rx_ring, rx_desc)) continue; - ice_run_xdp(rx_ring, xdp, xdp_prog, xdp_ring, rx_buf); + ice_run_xdp(rx_ring, xdp, xdp_prog, xdp_ring, rx_buf, rx_desc); if (rx_buf->act == ICE_XDP_PASS) goto construct_skb; total_rx_bytes += xdp_get_buff_len(xdp); diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.h b/drivers/net/ethernet/intel/ice/ice_txrx.h index 407d4c3..ad2d286 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx.h +++ b/drivers/net/ethernet/intel/ice/ice_txrx.h @@ -257,6 +257,14 @@ enum ice_rx_dtype { ICE_RX_DTYPE_SPLIT_ALWAYS = 2, }; +struct ice_xdp_buff { + struct xdp_buff xdp_buff; + const union ice_32b_rx_flex_desc *eop_desc; +}; + +/* Required for compatibility with xdp_buffs from xsk_pool */ +static_assert(offsetof(struct ice_xdp_buff, xdp_buff) == 0); + /* indices into GLINT_ITR registers */ #define ICE_RX_ITR ICE_IDX_ITR0 #define ICE_TX_ITR ICE_IDX_ITR1 @@ -298,7 +306,6 @@ enum ice_dynamic_itr { /* descriptor ring, associated with a VSI */ struct ice_rx_ring { /* CL1 - 1st cacheline starts here */ - struct ice_rx_ring *next; /* pointer to next ring in q_vector */ void *desc; /* Descriptor ring memory */ struct device *dev; /* Used for DMA mapping */ struct net_device *netdev; /* netdev ring maps to */ @@ -310,12 +317,16 @@ struct ice_rx_ring { u16 count; /* Number of descriptors */ u16 reg_idx; /* HW register index of the ring */ u16 next_to_alloc; - /* CL2 - 2nd cacheline starts here */ + union { struct ice_rx_buf *rx_buf; struct xdp_buff **xdp_buf; }; - struct xdp_buff xdp; + /* CL2 - 2nd cacheline starts here */ + union { + struct ice_xdp_buff xdp_ext; + struct xdp_buff xdp; + }; /* CL3 - 3rd cacheline starts here */ struct bpf_prog *xdp_prog; u16 rx_offset; @@ -332,6 +343,7 @@ struct ice_rx_ring { /* CL4 - 4th cacheline starts here */ struct ice_channel *ch; struct ice_tx_ring *xdp_ring; + struct ice_rx_ring *next; /* pointer to next ring in q_vector */ struct xsk_buff_pool *xsk_pool; u32 nr_frags; dma_addr_t dma; /* physical address of ring */ diff --git a/drivers/net/ethernet/intel/ice/ice_txrx_lib.h b/drivers/net/ethernet/intel/ice/ice_txrx_lib.h index b0e5667..00971f8 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx_lib.h +++ b/drivers/net/ethernet/intel/ice/ice_txrx_lib.h @@ -164,4 +164,14 @@ ice_process_skb_fields(struct ice_rx_ring *rx_ring, struct sk_buff *skb, u16 ptype); void ice_receive_skb(struct ice_rx_ring *rx_ring, struct sk_buff *skb, u16 vlan_tag); + +static inline void +ice_xdp_meta_set_desc(struct xdp_buff *xdp, + union ice_32b_rx_flex_desc *eop_desc) +{ + struct ice_xdp_buff *xdp_ext = container_of(xdp, struct ice_xdp_buff, + xdp_buff); + + xdp_ext->eop_desc = eop_desc; +} #endif /* !_ICE_TXRX_LIB_H_ */ -- 2.9.5
From: Maciej Fijalkowski <maciej.fijalkowski@intel.com> stable inclusion from stable-v6.6.104 commit 29bcd31ace16910f2765a22125e4baf4fc7c2c95 category: bugfix bugzilla: https://atomgit.com/src-openeuler/kernel/issues/8195 CVE: CVE-2025-39948 Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id=... -------------------------------- [ Upstream commit 11c4aa074d547d825b19cd8d9f288254d89d805c ] If we store the pgcnt on few fragments while being in the middle of gathering the whole frame and we stumbled upon DD bit not being set, we terminate the NAPI Rx processing loop and come back later on. Then on next NAPI execution we work on previously stored pgcnt. Imagine that second half of page was used actively by networking stack and by the time we came back, stack is not busy with this page anymore and decremented the refcnt. The page reuse algorithm in this case should be good to reuse the page but given the old refcnt it will not do so and attempt to release the page via page_frag_cache_drain() with pagecnt_bias used as an arg. This in turn will result in negative refcnt on struct page, which was initially observed by Xu Du. Therefore, move the page count storage from ice_get_rx_buf() to a place where we are sure that whole frame has been collected, but before calling XDP program as it internally can also change the page count of fragments belonging to xdp_buff. Fixes: ac0753391195 ("ice: Store page count inside ice_rx_buf") Reported-and-tested-by: Xu Du <xudu@redhat.com> Reviewed-by: Przemek Kitszel <przemyslaw.kitszel@intel.com> Reviewed-by: Simon Horman <horms@kernel.org> Co-developed-by: Jacob Keller <jacob.e.keller@intel.com> Signed-off-by: Jacob Keller <jacob.e.keller@intel.com> Signed-off-by: Maciej Fijalkowski <maciej.fijalkowski@intel.com> Tested-by: Chandan Kumar Rout <chandanx.rout@intel.com> (A Contingent Worker at Intel) Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com> Stable-dep-of: b1a0c977c6f1 ("ice: fix incorrect counter for buffer allocation failures") Signed-off-by: Sasha Levin <sashal@kernel.org> Signed-off-by: Zhang Changzhong <zhangchangzhong@huawei.com> --- drivers/net/ethernet/intel/ice/ice_txrx.c | 27 ++++++++++++++++++++++++++- 1 file changed, 26 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.c b/drivers/net/ethernet/intel/ice/ice_txrx.c index 46f10f3..beb5c86 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx.c +++ b/drivers/net/ethernet/intel/ice/ice_txrx.c @@ -924,7 +924,6 @@ ice_get_rx_buf(struct ice_rx_ring *rx_ring, const unsigned int size, struct ice_rx_buf *rx_buf; rx_buf = &rx_ring->rx_buf[ntc]; - rx_buf->pgcnt = page_count(rx_buf->page); prefetchw(rx_buf->page); if (!size) @@ -941,6 +940,31 @@ ice_get_rx_buf(struct ice_rx_ring *rx_ring, const unsigned int size, } /** + * ice_get_pgcnts - grab page_count() for gathered fragments + * @rx_ring: Rx descriptor ring to store the page counts on + * + * This function is intended to be called right before running XDP + * program so that the page recycling mechanism will be able to take + * a correct decision regarding underlying pages; this is done in such + * way as XDP program can change the refcount of page + */ +static void ice_get_pgcnts(struct ice_rx_ring *rx_ring) +{ + u32 nr_frags = rx_ring->nr_frags + 1; + u32 idx = rx_ring->first_desc; + struct ice_rx_buf *rx_buf; + u32 cnt = rx_ring->count; + + for (int i = 0; i < nr_frags; i++) { + rx_buf = &rx_ring->rx_buf[idx]; + rx_buf->pgcnt = page_count(rx_buf->page); + + if (++idx == cnt) + idx = 0; + } +} + +/** * ice_build_skb - Build skb around an existing buffer * @rx_ring: Rx descriptor ring to transact packets on * @xdp: xdp_buff pointing to the data @@ -1243,6 +1267,7 @@ int ice_clean_rx_irq(struct ice_rx_ring *rx_ring, int budget) if (ice_is_non_eop(rx_ring, rx_desc)) continue; + ice_get_pgcnts(rx_ring); ice_run_xdp(rx_ring, xdp, xdp_prog, xdp_ring, rx_buf, rx_desc); if (rx_buf->act == ICE_XDP_PASS) goto construct_skb; -- 2.9.5
From: Maciej Fijalkowski <maciej.fijalkowski@intel.com> stable inclusion from stable-v6.6.104 commit 05fc7307e352015b97386e2878d8aff22d48382d category: bugfix bugzilla: https://atomgit.com/src-openeuler/kernel/issues/8195 CVE: CVE-2025-39948 Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id=... -------------------------------- [ Upstream commit 468a1952df78f65c5991b7ac885c8b5b7dd87bab ] Idea behind having ice_rx_buf::act was to simplify and speed up the Rx data path by walking through buffers that were representing cleaned HW Rx descriptors. Since it caused us a major headache recently and we rolled back to old approach that 'puts' Rx buffers right after running XDP prog/creating skb, this is useless now and should be removed. Get rid of ice_rx_buf::act and related logic. We still need to take care of a corner case where XDP program releases a particular fragment. Make ice_run_xdp() to return its result and use it within ice_put_rx_mbuf(). Fixes: 2fba7dc5157b ("ice: Add support for XDP multi-buffer on Rx side") Reviewed-by: Przemek Kitszel <przemyslaw.kitszel@intel.com> Reviewed-by: Simon Horman <horms@kernel.org> Signed-off-by: Maciej Fijalkowski <maciej.fijalkowski@intel.com> Tested-by: Chandan Kumar Rout <chandanx.rout@intel.com> (A Contingent Worker at Intel) Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com> Stable-dep-of: b1a0c977c6f1 ("ice: fix incorrect counter for buffer allocation failures") Signed-off-by: Sasha Levin <sashal@kernel.org> Signed-off-by: Zhang Changzhong <zhangchangzhong@huawei.com> --- drivers/net/ethernet/intel/ice/ice_txrx.c | 62 ++++++++++++++++----------- drivers/net/ethernet/intel/ice/ice_txrx.h | 1 - drivers/net/ethernet/intel/ice/ice_txrx_lib.h | 43 ------------------- 3 files changed, 36 insertions(+), 70 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.c b/drivers/net/ethernet/intel/ice/ice_txrx.c index beb5c86..e7084c6a 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx.c +++ b/drivers/net/ethernet/intel/ice/ice_txrx.c @@ -527,15 +527,14 @@ int ice_setup_rx_ring(struct ice_rx_ring *rx_ring) * @xdp: xdp_buff used as input to the XDP program * @xdp_prog: XDP program to run * @xdp_ring: ring to be used for XDP_TX action - * @rx_buf: Rx buffer to store the XDP action * @eop_desc: Last descriptor in packet to read metadata from * * Returns any of ICE_XDP_{PASS, CONSUMED, TX, REDIR} */ -static void +static u32 ice_run_xdp(struct ice_rx_ring *rx_ring, struct xdp_buff *xdp, struct bpf_prog *xdp_prog, struct ice_tx_ring *xdp_ring, - struct ice_rx_buf *rx_buf, union ice_32b_rx_flex_desc *eop_desc) + union ice_32b_rx_flex_desc *eop_desc) { unsigned int ret = ICE_XDP_PASS; u32 act; @@ -574,7 +573,7 @@ ice_run_xdp(struct ice_rx_ring *rx_ring, struct xdp_buff *xdp, ret = ICE_XDP_CONSUMED; } exit: - ice_set_rx_bufs_act(xdp, rx_ring, ret); + return ret; } /** @@ -860,10 +859,8 @@ ice_add_xdp_frag(struct ice_rx_ring *rx_ring, struct xdp_buff *xdp, xdp_buff_set_frags_flag(xdp); } - if (unlikely(sinfo->nr_frags == MAX_SKB_FRAGS)) { - ice_set_rx_bufs_act(xdp, rx_ring, ICE_XDP_CONSUMED); + if (unlikely(sinfo->nr_frags == MAX_SKB_FRAGS)) return -ENOMEM; - } __skb_fill_page_desc_noacc(sinfo, sinfo->nr_frags++, rx_buf->page, rx_buf->page_offset, size); @@ -1076,12 +1073,12 @@ ice_construct_skb(struct ice_rx_ring *rx_ring, struct xdp_buff *xdp) rx_buf->page_offset + headlen, size, xdp->frame_sz); } else { - /* buffer is unused, change the act that should be taken later - * on; data was copied onto skb's linear part so there's no + /* buffer is unused, restore biased page count in Rx buffer; + * data was copied onto skb's linear part so there's no * need for adjusting page offset and we can reuse this buffer * as-is */ - rx_buf->act = ICE_SKB_CONSUMED; + rx_buf->pagecnt_bias++; } if (unlikely(xdp_buff_has_frags(xdp))) { @@ -1134,29 +1131,34 @@ ice_put_rx_buf(struct ice_rx_ring *rx_ring, struct ice_rx_buf *rx_buf) * @xdp: XDP buffer carrying linear + frags part * @xdp_xmit: XDP_TX/XDP_REDIRECT verdict storage * @ntc: a current next_to_clean value to be stored at rx_ring + * @verdict: return code from XDP program execution * * Walk through gathered fragments and satisfy internal page * recycle mechanism; we take here an action related to verdict * returned by XDP program; */ static void ice_put_rx_mbuf(struct ice_rx_ring *rx_ring, struct xdp_buff *xdp, - u32 *xdp_xmit, u32 ntc) + u32 *xdp_xmit, u32 ntc, u32 verdict) { u32 nr_frags = rx_ring->nr_frags + 1; u32 idx = rx_ring->first_desc; u32 cnt = rx_ring->count; + u32 post_xdp_frags = 1; struct ice_rx_buf *buf; int i; - for (i = 0; i < nr_frags; i++) { + if (unlikely(xdp_buff_has_frags(xdp))) + post_xdp_frags += xdp_get_shared_info_from_buff(xdp)->nr_frags; + + for (i = 0; i < post_xdp_frags; i++) { buf = &rx_ring->rx_buf[idx]; - if (buf->act & (ICE_XDP_TX | ICE_XDP_REDIR)) { + if (verdict & (ICE_XDP_TX | ICE_XDP_REDIR)) { ice_rx_buf_adjust_pg_offset(buf, xdp->frame_sz); - *xdp_xmit |= buf->act; - } else if (buf->act & ICE_XDP_CONSUMED) { + *xdp_xmit |= verdict; + } else if (verdict & ICE_XDP_CONSUMED) { buf->pagecnt_bias++; - } else if (buf->act == ICE_XDP_PASS) { + } else if (verdict == ICE_XDP_PASS) { ice_rx_buf_adjust_pg_offset(buf, xdp->frame_sz); } @@ -1165,6 +1167,17 @@ static void ice_put_rx_mbuf(struct ice_rx_ring *rx_ring, struct xdp_buff *xdp, if (++idx == cnt) idx = 0; } + /* handle buffers that represented frags released by XDP prog; + * for these we keep pagecnt_bias as-is; refcount from struct page + * has been decremented within XDP prog and we do not have to increase + * the biased refcnt + */ + for (; i < nr_frags; i++) { + buf = &rx_ring->rx_buf[idx]; + ice_put_rx_buf(rx_ring, buf); + if (++idx == cnt) + idx = 0; + } xdp->data = NULL; rx_ring->first_desc = ntc; @@ -1191,9 +1204,9 @@ int ice_clean_rx_irq(struct ice_rx_ring *rx_ring, int budget) struct ice_tx_ring *xdp_ring = NULL; struct bpf_prog *xdp_prog = NULL; u32 ntc = rx_ring->next_to_clean; + u32 cached_ntu, xdp_verdict; u32 cnt = rx_ring->count; u32 xdp_xmit = 0; - u32 cached_ntu; bool failure; xdp_prog = READ_ONCE(rx_ring->xdp_prog); @@ -1257,7 +1270,7 @@ int ice_clean_rx_irq(struct ice_rx_ring *rx_ring, int budget) xdp_prepare_buff(xdp, hard_start, offset, size, !!offset); xdp_buff_clear_frags_flag(xdp); } else if (ice_add_xdp_frag(rx_ring, xdp, rx_buf, size)) { - ice_put_rx_mbuf(rx_ring, xdp, NULL, ntc); + ice_put_rx_mbuf(rx_ring, xdp, NULL, ntc, ICE_XDP_CONSUMED); break; } if (++ntc == cnt) @@ -1268,13 +1281,13 @@ int ice_clean_rx_irq(struct ice_rx_ring *rx_ring, int budget) continue; ice_get_pgcnts(rx_ring); - ice_run_xdp(rx_ring, xdp, xdp_prog, xdp_ring, rx_buf, rx_desc); - if (rx_buf->act == ICE_XDP_PASS) + xdp_verdict = ice_run_xdp(rx_ring, xdp, xdp_prog, xdp_ring, rx_desc); + if (xdp_verdict == ICE_XDP_PASS) goto construct_skb; total_rx_bytes += xdp_get_buff_len(xdp); total_rx_pkts++; - ice_put_rx_mbuf(rx_ring, xdp, &xdp_xmit, ntc); + ice_put_rx_mbuf(rx_ring, xdp, &xdp_xmit, ntc, xdp_verdict); continue; construct_skb: @@ -1285,12 +1298,9 @@ int ice_clean_rx_irq(struct ice_rx_ring *rx_ring, int budget) /* exit if we failed to retrieve a buffer */ if (!skb) { rx_ring->ring_stats->rx_stats.alloc_page_failed++; - rx_buf->act = ICE_XDP_CONSUMED; - if (unlikely(xdp_buff_has_frags(xdp))) - ice_set_rx_bufs_act(xdp, rx_ring, - ICE_XDP_CONSUMED); + xdp_verdict = ICE_XDP_CONSUMED; } - ice_put_rx_mbuf(rx_ring, xdp, &xdp_xmit, ntc); + ice_put_rx_mbuf(rx_ring, xdp, &xdp_xmit, ntc, xdp_verdict); if (!skb) break; diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.h b/drivers/net/ethernet/intel/ice/ice_txrx.h index ad2d286..53a155d 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx.h +++ b/drivers/net/ethernet/intel/ice/ice_txrx.h @@ -201,7 +201,6 @@ struct ice_rx_buf { struct page *page; unsigned int page_offset; unsigned int pgcnt; - unsigned int act; unsigned int pagecnt_bias; }; diff --git a/drivers/net/ethernet/intel/ice/ice_txrx_lib.h b/drivers/net/ethernet/intel/ice/ice_txrx_lib.h index 00971f8..41efafc 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx_lib.h +++ b/drivers/net/ethernet/intel/ice/ice_txrx_lib.h @@ -6,49 +6,6 @@ #include "ice.h" /** - * ice_set_rx_bufs_act - propagate Rx buffer action to frags - * @xdp: XDP buffer representing frame (linear and frags part) - * @rx_ring: Rx ring struct - * act: action to store onto Rx buffers related to XDP buffer parts - * - * Set action that should be taken before putting Rx buffer from first frag - * to the last. - */ -static inline void -ice_set_rx_bufs_act(struct xdp_buff *xdp, const struct ice_rx_ring *rx_ring, - const unsigned int act) -{ - u32 sinfo_frags = xdp_get_shared_info_from_buff(xdp)->nr_frags; - u32 nr_frags = rx_ring->nr_frags + 1; - u32 idx = rx_ring->first_desc; - u32 cnt = rx_ring->count; - struct ice_rx_buf *buf; - - for (int i = 0; i < nr_frags; i++) { - buf = &rx_ring->rx_buf[idx]; - buf->act = act; - - if (++idx == cnt) - idx = 0; - } - - /* adjust pagecnt_bias on frags freed by XDP prog */ - if (sinfo_frags < rx_ring->nr_frags && act == ICE_XDP_CONSUMED) { - u32 delta = rx_ring->nr_frags - sinfo_frags; - - while (delta) { - if (idx == 0) - idx = cnt - 1; - else - idx--; - buf = &rx_ring->rx_buf[idx]; - buf->pagecnt_bias--; - delta--; - } - } -} - -/** * ice_test_staterr - tests bits in Rx descriptor status and error fields * @status_err_n: Rx descriptor status_error0 or status_error1 bits * @stat_err_bits: value to mask -- 2.9.5
From: Michal Kubiak <michal.kubiak@intel.com> stable inclusion from stable-v6.6.104 commit dc70ea942fcd221e507497fff9a5be900c05d5e6 category: bugfix bugzilla: https://atomgit.com/src-openeuler/kernel/issues/8195 CVE: CVE-2025-39948 Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id=... -------------------------------- [ Upstream commit b1a0c977c6f1130f7dd125ee3db8c2435d7e3d41 ] Currently, the driver increments `alloc_page_failed` when buffer allocation fails in `ice_clean_rx_irq()`. However, this counter is intended for page allocation failures, not buffer allocation issues. This patch corrects the counter by incrementing `alloc_buf_failed` instead, ensuring accurate statistics reporting for buffer allocation failures. Fixes: 2fba7dc5157b ("ice: Add support for XDP multi-buffer on Rx side") Reported-by: Jacob Keller <jacob.e.keller@intel.com> Suggested-by: Paul Menzel <pmenzel@molgen.mpg.de> Signed-off-by: Michal Kubiak <michal.kubiak@intel.com> Reviewed-by: Paul Menzel <pmenzel@molgen.mpg.de> Reviewed-by: Jason Xing <kerneljasonxing@gmail.com> Reviewed-by: Aleksandr Loktionov <aleksandr.loktionov@intel.com> Tested-by: Priya Singh <priyax.singh@intel.com> Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com> Signed-off-by: Sasha Levin <sashal@kernel.org> Signed-off-by: Zhang Changzhong <zhangchangzhong@huawei.com> --- drivers/net/ethernet/intel/ice/ice_txrx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.c b/drivers/net/ethernet/intel/ice/ice_txrx.c index e7084c6a..eae4376 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx.c +++ b/drivers/net/ethernet/intel/ice/ice_txrx.c @@ -1297,7 +1297,7 @@ int ice_clean_rx_irq(struct ice_rx_ring *rx_ring, int budget) skb = ice_construct_skb(rx_ring, xdp); /* exit if we failed to retrieve a buffer */ if (!skb) { - rx_ring->ring_stats->rx_stats.alloc_page_failed++; + rx_ring->ring_stats->rx_stats.alloc_buf_failed++; xdp_verdict = ICE_XDP_CONSUMED; } ice_put_rx_mbuf(rx_ring, xdp, &xdp_xmit, ntc, xdp_verdict); -- 2.9.5
From: Jacob Keller <jacob.e.keller@intel.com> mainline inclusion from mainline-v6.17-rc7 commit 84bf1ac85af84d354c7a2fdbdc0d4efc8aaec34b category: bugfix bugzilla: https://atomgit.com/src-openeuler/kernel/issues/8195 CVE: CVE-2025-39948 Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?i... -------------------------------- The ice_put_rx_mbuf() function handles calling ice_put_rx_buf() for each buffer in the current frame. This function was introduced as part of handling multi-buffer XDP support in the ice driver. It works by iterating over the buffers from first_desc up to 1 plus the total number of fragments in the frame, cached from before the XDP program was executed. If the hardware posts a descriptor with a size of 0, the logic used in ice_put_rx_mbuf() breaks. Such descriptors get skipped and don't get added as fragments in ice_add_xdp_frag. Since the buffer isn't counted as a fragment, we do not iterate over it in ice_put_rx_mbuf(), and thus we don't call ice_put_rx_buf(). Because we don't call ice_put_rx_buf(), we don't attempt to re-use the page or free it. This leaves a stale page in the ring, as we don't increment next_to_alloc. The ice_reuse_rx_page() assumes that the next_to_alloc has been incremented properly, and that it always points to a buffer with a NULL page. Since this function doesn't check, it will happily recycle a page over the top of the next_to_alloc buffer, losing track of the old page. Note that this leak only occurs for multi-buffer frames. The ice_put_rx_mbuf() function always handles at least one buffer, so a single-buffer frame will always get handled correctly. It is not clear precisely why the hardware hands us descriptors with a size of 0 sometimes, but it happens somewhat regularly with "jumbo frames" used by 9K MTU. To fix ice_put_rx_mbuf(), we need to make sure to call ice_put_rx_buf() on all buffers between first_desc and next_to_clean. Borrow the logic of a similar function in i40e used for this same purpose. Use the same logic also in ice_get_pgcnts(). Instead of iterating over just the number of fragments, use a loop which iterates until the current index reaches to the next_to_clean element just past the current frame. Unlike i40e, the ice_put_rx_mbuf() function does call ice_put_rx_buf() on the last buffer of the frame indicating the end of packet. For non-linear (multi-buffer) frames, we need to take care when adjusting the pagecnt_bias. An XDP program might release fragments from the tail of the frame, in which case that fragment page is already released. Only update the pagecnt_bias for the first descriptor and fragments still remaining post-XDP program. Take care to only access the shared info for fragmented buffers, as this avoids a significant cache miss. The xdp_xmit value only needs to be updated if an XDP program is run, and only once per packet. Drop the xdp_xmit pointer argument from ice_put_rx_mbuf(). Instead, set xdp_xmit in the ice_clean_rx_irq() function directly. This avoids needing to pass the argument and avoids an extra bit-wise OR for each buffer in the frame. Move the increment of the ntc local variable to ensure its updated *before* all calls to ice_get_pgcnts() or ice_put_rx_mbuf(), as the loop logic requires the index of the element just after the current frame. Now that we use an index pointer in the ring to identify the packet, we no longer need to track or cache the number of fragments in the rx_ring. Cc: Christoph Petrausch <christoph.petrausch@deepl.com> Cc: Jesper Dangaard Brouer <hawk@kernel.org> Reported-by: Jaroslav Pulchart <jaroslav.pulchart@gooddata.com> Closes: https://lore.kernel.org/netdev/CAK8fFZ4hY6GUJNENz3wY9jaYLZXGfpr7dnZxzGMYoE44... Fixes: 743bbd93cf29 ("ice: put Rx buffers after being done with current frame") Tested-by: Michal Kubiak <michal.kubiak@intel.com> Signed-off-by: Jacob Keller <jacob.e.keller@intel.com> Acked-by: Jesper Dangaard Brouer <hawk@kernel.org> Tested-by: Priya Singh <priyax.singh@intel.com> Tested-by: Rinitha S <sx.rinitha@intel.com> (A Contingent worker at Intel) Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com> Conflicts: drivers/net/ethernet/intel/ice/ice_txrx.h drivers/net/ethernet/intel/ice/ice_txrx.c [conflicts due to not merge 7e61c89c6065 ("ice: store max_frame and rx_buf_len only in ice_rx_ring"), conflicts due to not merge dc5e7a3513ef ("ice: add a separate Rx handler for flow director commands")] Signed-off-by: Wang Liang <wangliang74@huawei.com> Signed-off-by: Zhang Changzhong <zhangchangzhong@huawei.com> --- drivers/net/ethernet/intel/ice/ice_txrx.c | 80 +++++++++++++------------------ drivers/net/ethernet/intel/ice/ice_txrx.h | 1 - 2 files changed, 34 insertions(+), 47 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.c b/drivers/net/ethernet/intel/ice/ice_txrx.c index eae4376..cc43da2 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx.c +++ b/drivers/net/ethernet/intel/ice/ice_txrx.c @@ -865,10 +865,6 @@ ice_add_xdp_frag(struct ice_rx_ring *rx_ring, struct xdp_buff *xdp, __skb_fill_page_desc_noacc(sinfo, sinfo->nr_frags++, rx_buf->page, rx_buf->page_offset, size); sinfo->xdp_frags_size += size; - /* remember frag count before XDP prog execution; bpf_xdp_adjust_tail() - * can pop off frags but driver has to handle it on its own - */ - rx_ring->nr_frags = sinfo->nr_frags; if (page_is_pfmemalloc(rx_buf->page)) xdp_buff_set_frag_pfmemalloc(xdp); @@ -939,20 +935,20 @@ ice_get_rx_buf(struct ice_rx_ring *rx_ring, const unsigned int size, /** * ice_get_pgcnts - grab page_count() for gathered fragments * @rx_ring: Rx descriptor ring to store the page counts on + * @ntc: the next to clean element (not included in this frame!) * * This function is intended to be called right before running XDP * program so that the page recycling mechanism will be able to take * a correct decision regarding underlying pages; this is done in such * way as XDP program can change the refcount of page */ -static void ice_get_pgcnts(struct ice_rx_ring *rx_ring) +static void ice_get_pgcnts(struct ice_rx_ring *rx_ring, unsigned int ntc) { - u32 nr_frags = rx_ring->nr_frags + 1; u32 idx = rx_ring->first_desc; struct ice_rx_buf *rx_buf; u32 cnt = rx_ring->count; - for (int i = 0; i < nr_frags; i++) { + while (idx != ntc) { rx_buf = &rx_ring->rx_buf[idx]; rx_buf->pgcnt = page_count(rx_buf->page); @@ -1126,62 +1122,51 @@ ice_put_rx_buf(struct ice_rx_ring *rx_ring, struct ice_rx_buf *rx_buf) } /** - * ice_put_rx_mbuf - ice_put_rx_buf() caller, for all frame frags + * ice_put_rx_mbuf - ice_put_rx_buf() caller, for all buffers in frame * @rx_ring: Rx ring with all the auxiliary data * @xdp: XDP buffer carrying linear + frags part - * @xdp_xmit: XDP_TX/XDP_REDIRECT verdict storage - * @ntc: a current next_to_clean value to be stored at rx_ring + * @ntc: the next to clean element (not included in this frame!) * @verdict: return code from XDP program execution * - * Walk through gathered fragments and satisfy internal page - * recycle mechanism; we take here an action related to verdict - * returned by XDP program; + * Called after XDP program is completed, or on error with verdict set to + * ICE_XDP_CONSUMED. + * + * Walk through buffers from first_desc to the end of the frame, releasing + * buffers and satisfying internal page recycle mechanism. The action depends + * on verdict from XDP program. */ static void ice_put_rx_mbuf(struct ice_rx_ring *rx_ring, struct xdp_buff *xdp, - u32 *xdp_xmit, u32 ntc, u32 verdict) + u32 ntc, u32 verdict) { - u32 nr_frags = rx_ring->nr_frags + 1; u32 idx = rx_ring->first_desc; u32 cnt = rx_ring->count; - u32 post_xdp_frags = 1; struct ice_rx_buf *buf; - int i; + u32 xdp_frags = 0; + int i = 0; if (unlikely(xdp_buff_has_frags(xdp))) - post_xdp_frags += xdp_get_shared_info_from_buff(xdp)->nr_frags; + xdp_frags = xdp_get_shared_info_from_buff(xdp)->nr_frags; - for (i = 0; i < post_xdp_frags; i++) { + while (idx != ntc) { buf = &rx_ring->rx_buf[idx]; + if (++idx == cnt) + idx = 0; - if (verdict & (ICE_XDP_TX | ICE_XDP_REDIR)) { + /* An XDP program could release fragments from the end of the + * buffer. For these, we need to keep the pagecnt_bias as-is. + * To do this, only adjust pagecnt_bias for fragments up to + * the total remaining after the XDP program has run. + */ + if (verdict != ICE_XDP_CONSUMED) ice_rx_buf_adjust_pg_offset(buf, xdp->frame_sz); - *xdp_xmit |= verdict; - } else if (verdict & ICE_XDP_CONSUMED) { + else if (i++ <= xdp_frags) buf->pagecnt_bias++; - } else if (verdict == ICE_XDP_PASS) { - ice_rx_buf_adjust_pg_offset(buf, xdp->frame_sz); - } ice_put_rx_buf(rx_ring, buf); - - if (++idx == cnt) - idx = 0; - } - /* handle buffers that represented frags released by XDP prog; - * for these we keep pagecnt_bias as-is; refcount from struct page - * has been decremented within XDP prog and we do not have to increase - * the biased refcnt - */ - for (; i < nr_frags; i++) { - buf = &rx_ring->rx_buf[idx]; - ice_put_rx_buf(rx_ring, buf); - if (++idx == cnt) - idx = 0; } xdp->data = NULL; rx_ring->first_desc = ntc; - rx_ring->nr_frags = 0; } /** @@ -1262,6 +1247,10 @@ int ice_clean_rx_irq(struct ice_rx_ring *rx_ring, int budget) /* retrieve a buffer from the ring */ rx_buf = ice_get_rx_buf(rx_ring, size, ntc); + /* Increment ntc before calls to ice_put_rx_mbuf() */ + if (++ntc == cnt) + ntc = 0; + if (!xdp->data) { void *hard_start; @@ -1270,24 +1259,23 @@ int ice_clean_rx_irq(struct ice_rx_ring *rx_ring, int budget) xdp_prepare_buff(xdp, hard_start, offset, size, !!offset); xdp_buff_clear_frags_flag(xdp); } else if (ice_add_xdp_frag(rx_ring, xdp, rx_buf, size)) { - ice_put_rx_mbuf(rx_ring, xdp, NULL, ntc, ICE_XDP_CONSUMED); + ice_put_rx_mbuf(rx_ring, xdp, ntc, ICE_XDP_CONSUMED); break; } - if (++ntc == cnt) - ntc = 0; /* skip if it is NOP desc */ if (ice_is_non_eop(rx_ring, rx_desc)) continue; - ice_get_pgcnts(rx_ring); + ice_get_pgcnts(rx_ring, ntc); xdp_verdict = ice_run_xdp(rx_ring, xdp, xdp_prog, xdp_ring, rx_desc); if (xdp_verdict == ICE_XDP_PASS) goto construct_skb; total_rx_bytes += xdp_get_buff_len(xdp); total_rx_pkts++; - ice_put_rx_mbuf(rx_ring, xdp, &xdp_xmit, ntc, xdp_verdict); + ice_put_rx_mbuf(rx_ring, xdp, ntc, xdp_verdict); + xdp_xmit |= xdp_verdict & (ICE_XDP_TX | ICE_XDP_REDIR); continue; construct_skb: @@ -1300,7 +1288,7 @@ int ice_clean_rx_irq(struct ice_rx_ring *rx_ring, int budget) rx_ring->ring_stats->rx_stats.alloc_buf_failed++; xdp_verdict = ICE_XDP_CONSUMED; } - ice_put_rx_mbuf(rx_ring, xdp, &xdp_xmit, ntc, xdp_verdict); + ice_put_rx_mbuf(rx_ring, xdp, ntc, xdp_verdict); if (!skb) break; diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.h b/drivers/net/ethernet/intel/ice/ice_txrx.h index 53a155d..f2f86b9 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx.h +++ b/drivers/net/ethernet/intel/ice/ice_txrx.h @@ -344,7 +344,6 @@ struct ice_rx_ring { struct ice_tx_ring *xdp_ring; struct ice_rx_ring *next; /* pointer to next ring in q_vector */ struct xsk_buff_pool *xsk_pool; - u32 nr_frags; dma_addr_t dma; /* physical address of ring */ u64 cached_phctime; u16 rx_buf_len; -- 2.9.5
participants (2)
-
patchwork bot -
Zhang Changzhong