tcp: avoid atomic operations on sk->sk_rmem_alloc

TCP uses generic skb_set_owner_r() and sock_rfree()
for received packets, with socket lock being owned.

Switch to private versions, avoiding two atomic operations
per packet.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Reviewed-by: Neal Cardwell <ncardwell@google.com>
Reviewed-by: Kuniyuki Iwashima <kuniyu@amazon.com>
Link: https://patch.msgid.link/20250320121604.3342831-1-edumazet@google.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
This commit is contained in:
Eric Dumazet 2025-03-20 12:16:04 +00:00 committed by Jakub Kicinski
parent c1dacb4577
commit 0de2a5c4b8
4 changed files with 35 additions and 6 deletions

View file

@ -779,6 +779,7 @@ static inline int tcp_bound_to_half_wnd(struct tcp_sock *tp, int pktsize)
/* tcp.c */ /* tcp.c */
void tcp_get_info(struct sock *, struct tcp_info *); void tcp_get_info(struct sock *, struct tcp_info *);
void tcp_sock_rfree(struct sk_buff *skb);
/* Read 'sendfile()'-style from a TCP socket */ /* Read 'sendfile()'-style from a TCP socket */
int tcp_read_sock(struct sock *sk, read_descriptor_t *desc, int tcp_read_sock(struct sock *sk, read_descriptor_t *desc,
@ -2898,4 +2899,18 @@ enum skb_drop_reason tcp_inbound_hash(struct sock *sk,
const void *saddr, const void *daddr, const void *saddr, const void *daddr,
int family, int dif, int sdif); int family, int dif, int sdif);
/* version of skb_set_owner_r() avoiding one atomic_add() */
static inline void tcp_skb_set_owner_r(struct sk_buff *skb, struct sock *sk)
{
skb_orphan(skb);
skb->sk = sk;
skb->destructor = tcp_sock_rfree;
sock_owned_by_me(sk);
atomic_set(&sk->sk_rmem_alloc,
atomic_read(&sk->sk_rmem_alloc) + skb->truesize);
sk_forward_alloc_add(sk, -skb->truesize);
}
#endif /* _TCP_H */ #endif /* _TCP_H */

View file

@ -1525,11 +1525,25 @@ void tcp_cleanup_rbuf(struct sock *sk, int copied)
__tcp_cleanup_rbuf(sk, copied); __tcp_cleanup_rbuf(sk, copied);
} }
/* private version of sock_rfree() avoiding one atomic_sub() */
void tcp_sock_rfree(struct sk_buff *skb)
{
struct sock *sk = skb->sk;
unsigned int len = skb->truesize;
sock_owned_by_me(sk);
atomic_set(&sk->sk_rmem_alloc,
atomic_read(&sk->sk_rmem_alloc) - len);
sk_forward_alloc_add(sk, len);
sk_mem_reclaim(sk);
}
static void tcp_eat_recv_skb(struct sock *sk, struct sk_buff *skb) static void tcp_eat_recv_skb(struct sock *sk, struct sk_buff *skb)
{ {
__skb_unlink(skb, &sk->sk_receive_queue); __skb_unlink(skb, &sk->sk_receive_queue);
if (likely(skb->destructor == sock_rfree)) { if (likely(skb->destructor == tcp_sock_rfree)) {
sock_rfree(skb); tcp_sock_rfree(skb);
skb->destructor = NULL; skb->destructor = NULL;
skb->sk = NULL; skb->sk = NULL;
return skb_attempt_defer_free(skb); return skb_attempt_defer_free(skb);

View file

@ -189,7 +189,7 @@ void tcp_fastopen_add_skb(struct sock *sk, struct sk_buff *skb)
tcp_segs_in(tp, skb); tcp_segs_in(tp, skb);
__skb_pull(skb, tcp_hdrlen(skb)); __skb_pull(skb, tcp_hdrlen(skb));
sk_forced_mem_schedule(sk, skb->truesize); sk_forced_mem_schedule(sk, skb->truesize);
skb_set_owner_r(skb, sk); tcp_skb_set_owner_r(skb, sk);
TCP_SKB_CB(skb)->seq++; TCP_SKB_CB(skb)->seq++;
TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_SYN; TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_SYN;

View file

@ -5171,7 +5171,7 @@ end:
if (tcp_is_sack(tp)) if (tcp_is_sack(tp))
tcp_grow_window(sk, skb, false); tcp_grow_window(sk, skb, false);
skb_condense(skb); skb_condense(skb);
skb_set_owner_r(skb, sk); tcp_skb_set_owner_r(skb, sk);
} }
} }
@ -5187,7 +5187,7 @@ static int __must_check tcp_queue_rcv(struct sock *sk, struct sk_buff *skb,
tcp_rcv_nxt_update(tcp_sk(sk), TCP_SKB_CB(skb)->end_seq); tcp_rcv_nxt_update(tcp_sk(sk), TCP_SKB_CB(skb)->end_seq);
if (!eaten) { if (!eaten) {
tcp_add_receive_queue(sk, skb); tcp_add_receive_queue(sk, skb);
skb_set_owner_r(skb, sk); tcp_skb_set_owner_r(skb, sk);
} }
return eaten; return eaten;
} }
@ -5504,7 +5504,7 @@ skip_this:
__skb_queue_before(list, skb, nskb); __skb_queue_before(list, skb, nskb);
else else
__skb_queue_tail(&tmp, nskb); /* defer rbtree insertion */ __skb_queue_tail(&tmp, nskb); /* defer rbtree insertion */
skb_set_owner_r(nskb, sk); tcp_skb_set_owner_r(nskb, sk);
mptcp_skb_ext_move(nskb, skb); mptcp_skb_ext_move(nskb, skb);
/* Copy data, releasing collapsed skbs. */ /* Copy data, releasing collapsed skbs. */