As the skb->pp_recycle and page->pp_magic may not be enough to track if a frag page is from page pool after the calling of __skb_frag_ref(), mostly because of a data race, see: commit 2cc3aeb5eccc ("skbuff: Fix a potential race while recycling page_pool packets").
As the case of tcp, there may be fragmenting, coalescing or retransmiting case that might lose the track if a frag page is from page pool or not.
So increment the frag count when __skb_frag_ref() is called, and use the bit 0 in frag->bv_page to indicate if a page is from a page pool, which automically pass down to another frag->bv_page when doing a '*new_frag = *frag' or memcpying the shinfo.
It seems we could do the trick for rx too if it makes sense.
Signed-off-by: Yunsheng Lin linyunsheng@huawei.com --- include/linux/skbuff.h | 43 ++++++++++++++++++++++++++++++++++++++++--- include/net/page_pool.h | 5 +++++ 2 files changed, 45 insertions(+), 3 deletions(-)
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 6bdb0db..2878d26 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -331,6 +331,11 @@ static inline unsigned int skb_frag_size(const skb_frag_t *frag) return frag->bv_len; }
+static inline bool skb_frag_is_pp(const skb_frag_t *frag) +{ + return (unsigned long)frag->bv_page & 1UL; +} + /** * skb_frag_size_set() - Sets the size of a skb fragment * @frag: skb fragment @@ -2190,6 +2195,21 @@ static inline void __skb_fill_page_desc(struct sk_buff *skb, int i, skb->pfmemalloc = true; }
+static inline void __skb_fill_pp_page_desc(struct sk_buff *skb, int i, + struct page *page, int off, + int size) +{ + skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; + + frag->bv_page = (struct page *)((unsigned long)page | 0x1UL); + frag->bv_offset = off; + skb_frag_size_set(frag, size); + + page = compound_head(page); + if (page_is_pfmemalloc(page)) + skb->pfmemalloc = true; +} + /** * skb_fill_page_desc - initialise a paged fragment in an skb * @skb: buffer containing fragment to be initialised @@ -2211,6 +2231,14 @@ static inline void skb_fill_page_desc(struct sk_buff *skb, int i, skb_shinfo(skb)->nr_frags = i + 1; }
+static inline void skb_fill_pp_page_desc(struct sk_buff *skb, int i, + struct page *page, int off, + int size) +{ + __skb_fill_pp_page_desc(skb, i, page, off, size); + skb_shinfo(skb)->nr_frags = i + 1; +} + void skb_add_rx_frag(struct sk_buff *skb, int i, struct page *page, int off, int size, unsigned int truesize);
@@ -3062,7 +3090,10 @@ static inline void skb_frag_off_copy(skb_frag_t *fragto, */ static inline struct page *skb_frag_page(const skb_frag_t *frag) { - return frag->bv_page; + unsigned long page = (unsigned long)frag->bv_page; + + page &= ~1UL; + return (struct page *)page; }
/** @@ -3073,7 +3104,12 @@ static inline struct page *skb_frag_page(const skb_frag_t *frag) */ static inline void __skb_frag_ref(skb_frag_t *frag) { - get_page(skb_frag_page(frag)); + struct page *page = skb_frag_page(frag); + + if (skb_frag_is_pp(frag)) + page_pool_atomic_inc_frag_count(page); + else + get_page(page); }
/** @@ -3101,7 +3137,8 @@ static inline void __skb_frag_unref(skb_frag_t *frag, bool recycle) struct page *page = skb_frag_page(frag);
#ifdef CONFIG_PAGE_POOL - if (recycle && page_pool_return_skb_page(page)) + if ((recycle || skb_frag_is_pp(frag)) && + page_pool_return_skb_page(page)) return; #endif put_page(page); diff --git a/include/net/page_pool.h b/include/net/page_pool.h index 8d4ae4b..86babb2 100644 --- a/include/net/page_pool.h +++ b/include/net/page_pool.h @@ -270,6 +270,11 @@ static inline long page_pool_atomic_sub_frag_count_return(struct page *page, return ret; }
+static void page_pool_atomic_inc_frag_count(struct page *page) +{ + atomic_long_inc(&page->pp_frag_count); +} + static inline bool is_page_pool_compiled_in(void) { #ifdef CONFIG_PAGE_POOL